diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -13379,3 +13379,1009 @@ Use FP16 precision: False 02/24/2022 18:23:13 - INFO - codeparrot_training - Step 12998: {'lr': 0.0004379815273007039, 'samples': 6655488, 'steps': 12998, 'loss/train': 2.557718276977539} 02/24/2022 18:23:19 - INFO - codeparrot_training - Step 12999: {'lr': 0.0004379707399878276, 'samples': 6656000, 'steps': 12999, 'loss/train': 1.6013245582580566} 02/24/2022 18:23:19 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/24/2022 18:23:36 - WARNING - huggingface_hub.repository - Several commits (13) will be pushed upstream. +02/24/2022 18:23:36 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +02/24/2022 18:24:23 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + e455b12..7d01f0b floral-grass-11 -> floral-grass-11 + +02/24/2022 18:24:28 - INFO - codeparrot_training - Step 13000: {'lr': 0.00043795995186974435, 'samples': 6656512, 'steps': 13000, 'loss/train': 2.1346521377563477} +02/24/2022 18:24:33 - INFO - codeparrot_training - Step 13001: {'lr': 0.0004379491629465004, 'samples': 6657024, 'steps': 13001, 'loss/train': 1.4408684968948364} +02/24/2022 18:24:37 - INFO - codeparrot_training - Step 13002: {'lr': 0.00043793837321814185, 'samples': 6657536, 'steps': 13002, 'loss/train': 1.5857553482055664} +02/24/2022 18:24:42 - INFO - codeparrot_training - Step 13003: {'lr': 0.000437927582684715, 'samples': 6658048, 'steps': 13003, 'loss/train': 1.689317226409912} +02/24/2022 18:24:46 - INFO - codeparrot_training - Step 13004: {'lr': 0.0004379167913462661, 'samples': 6658560, 'steps': 13004, 'loss/train': 1.8870562314987183} +02/24/2022 18:24:51 - INFO - codeparrot_training - Step 13005: {'lr': 0.0004379059992028412, 'samples': 6659072, 'steps': 13005, 'loss/train': 2.039428234100342} +02/24/2022 18:24:55 - INFO - codeparrot_training - Step 13006: {'lr': 0.00043789520625448685, 'samples': 6659584, 'steps': 13006, 'loss/train': 2.1801905632019043} +02/24/2022 18:25:01 - INFO - codeparrot_training - Step 13007: {'lr': 0.000437884412501249, 'samples': 6660096, 'steps': 13007, 'loss/train': 2.1378047466278076} +02/24/2022 18:25:04 - INFO - codeparrot_training - Step 13008: {'lr': 0.00043787361794317403, 'samples': 6660608, 'steps': 13008, 'loss/train': 2.4507038593292236} +02/24/2022 18:25:10 - INFO - codeparrot_training - Step 13009: {'lr': 0.0004378628225803081, 'samples': 6661120, 'steps': 13009, 'loss/train': 1.9727485179901123} +02/24/2022 18:25:13 - INFO - codeparrot_training - Step 13010: {'lr': 0.0004378520264126975, 'samples': 6661632, 'steps': 13010, 'loss/train': 1.06065833568573} +02/24/2022 18:25:19 - INFO - codeparrot_training - Step 13011: {'lr': 0.0004378412294403885, 'samples': 6662144, 'steps': 13011, 'loss/train': 1.6586016416549683} +02/24/2022 18:25:22 - INFO - codeparrot_training - Step 13012: {'lr': 0.0004378304316634273, 'samples': 6662656, 'steps': 13012, 'loss/train': 2.9551174640655518} +02/24/2022 18:25:29 - INFO - codeparrot_training - Step 13013: {'lr': 0.0004378196330818602, 'samples': 6663168, 'steps': 13013, 'loss/train': 2.4696385860443115} +02/24/2022 18:25:32 - INFO - codeparrot_training - Step 13014: {'lr': 0.00043780883369573336, 'samples': 6663680, 'steps': 13014, 'loss/train': 2.9421322345733643} +02/24/2022 18:25:38 - INFO - codeparrot_training - Step 13015: {'lr': 0.00043779803350509316, 'samples': 6664192, 'steps': 13015, 'loss/train': 1.4251253604888916} +02/24/2022 18:25:41 - INFO - codeparrot_training - Step 13016: {'lr': 0.0004377872325099858, 'samples': 6664704, 'steps': 13016, 'loss/train': 0.14430415630340576} +02/24/2022 18:25:47 - INFO - codeparrot_training - Step 13017: {'lr': 0.0004377764307104576, 'samples': 6665216, 'steps': 13017, 'loss/train': 3.7814066410064697} +02/24/2022 18:25:50 - INFO - codeparrot_training - Step 13018: {'lr': 0.00043776562810655473, 'samples': 6665728, 'steps': 13018, 'loss/train': 1.9222338199615479} +02/24/2022 18:25:56 - INFO - codeparrot_training - Step 13019: {'lr': 0.0004377548246983236, 'samples': 6666240, 'steps': 13019, 'loss/train': 1.3617608547210693} +02/24/2022 18:26:00 - INFO - codeparrot_training - Step 13020: {'lr': 0.0004377440204858104, 'samples': 6666752, 'steps': 13020, 'loss/train': 1.7688344717025757} +02/24/2022 18:26:05 - INFO - codeparrot_training - Step 13021: {'lr': 0.0004377332154690614, 'samples': 6667264, 'steps': 13021, 'loss/train': 1.9365235567092896} +02/24/2022 18:26:09 - INFO - codeparrot_training - Step 13022: {'lr': 0.0004377224096481229, 'samples': 6667776, 'steps': 13022, 'loss/train': 2.4146294593811035} +02/24/2022 18:26:14 - INFO - codeparrot_training - Step 13023: {'lr': 0.0004377116030230413, 'samples': 6668288, 'steps': 13023, 'loss/train': 2.081615686416626} +02/24/2022 18:26:18 - INFO - codeparrot_training - Step 13024: {'lr': 0.0004377007955938628, 'samples': 6668800, 'steps': 13024, 'loss/train': 1.7210839986801147} +02/24/2022 18:26:24 - INFO - codeparrot_training - Step 13025: {'lr': 0.0004376899873606336, 'samples': 6669312, 'steps': 13025, 'loss/train': 1.253495693206787} +02/24/2022 18:26:27 - INFO - codeparrot_training - Step 13026: {'lr': 0.0004376791783234001, 'samples': 6669824, 'steps': 13026, 'loss/train': 2.019789934158325} +02/24/2022 18:26:33 - INFO - codeparrot_training - Step 13027: {'lr': 0.0004376683684822086, 'samples': 6670336, 'steps': 13027, 'loss/train': 0.6084689497947693} +02/24/2022 18:26:36 - INFO - codeparrot_training - Step 13028: {'lr': 0.0004376575578371055, 'samples': 6670848, 'steps': 13028, 'loss/train': 1.7511945962905884} +02/24/2022 18:26:42 - INFO - codeparrot_training - Step 13029: {'lr': 0.0004376467463881369, 'samples': 6671360, 'steps': 13029, 'loss/train': 1.479759931564331} +02/24/2022 18:26:45 - INFO - codeparrot_training - Step 13030: {'lr': 0.0004376359341353492, 'samples': 6671872, 'steps': 13030, 'loss/train': 3.672703266143799} +02/24/2022 18:26:51 - INFO - codeparrot_training - Step 13031: {'lr': 0.00043762512107878884, 'samples': 6672384, 'steps': 13031, 'loss/train': 2.759281873703003} +02/24/2022 18:26:54 - INFO - codeparrot_training - Step 13032: {'lr': 0.00043761430721850206, 'samples': 6672896, 'steps': 13032, 'loss/train': 1.9744319915771484} +02/24/2022 18:27:00 - INFO - codeparrot_training - Step 13033: {'lr': 0.0004376034925545351, 'samples': 6673408, 'steps': 13033, 'loss/train': 2.404545783996582} +02/24/2022 18:27:03 - INFO - codeparrot_training - Step 13034: {'lr': 0.0004375926770869343, 'samples': 6673920, 'steps': 13034, 'loss/train': 2.4459218978881836} +02/24/2022 18:27:09 - INFO - codeparrot_training - Step 13035: {'lr': 0.00043758186081574614, 'samples': 6674432, 'steps': 13035, 'loss/train': 2.392439126968384} +02/24/2022 18:27:13 - INFO - codeparrot_training - Step 13036: {'lr': 0.00043757104374101677, 'samples': 6674944, 'steps': 13036, 'loss/train': 2.167207956314087} +02/24/2022 18:27:18 - INFO - codeparrot_training - Step 13037: {'lr': 0.00043756022586279264, 'samples': 6675456, 'steps': 13037, 'loss/train': 1.6302781105041504} +02/24/2022 18:27:22 - INFO - codeparrot_training - Step 13038: {'lr': 0.00043754940718112, 'samples': 6675968, 'steps': 13038, 'loss/train': 2.507948637008667} +02/24/2022 18:27:28 - INFO - codeparrot_training - Step 13039: {'lr': 0.0004375385876960454, 'samples': 6676480, 'steps': 13039, 'loss/train': 0.4598539173603058} +02/24/2022 18:27:31 - INFO - codeparrot_training - Step 13040: {'lr': 0.0004375277674076149, 'samples': 6676992, 'steps': 13040, 'loss/train': 2.299009084701538} +02/24/2022 18:27:37 - INFO - codeparrot_training - Step 13041: {'lr': 0.00043751694631587504, 'samples': 6677504, 'steps': 13041, 'loss/train': 1.8661608695983887} +02/24/2022 18:27:40 - INFO - codeparrot_training - Step 13042: {'lr': 0.00043750612442087215, 'samples': 6678016, 'steps': 13042, 'loss/train': 1.9540120363235474} +02/24/2022 18:27:46 - INFO - codeparrot_training - Step 13043: {'lr': 0.0004374953017226525, 'samples': 6678528, 'steps': 13043, 'loss/train': 2.130275249481201} +02/24/2022 18:27:49 - INFO - codeparrot_training - Step 13044: {'lr': 0.0004374844782212626, 'samples': 6679040, 'steps': 13044, 'loss/train': 2.635179042816162} +02/24/2022 18:27:55 - INFO - codeparrot_training - Step 13045: {'lr': 0.0004374736539167487, 'samples': 6679552, 'steps': 13045, 'loss/train': 2.7220656871795654} +02/24/2022 18:27:58 - INFO - codeparrot_training - Step 13046: {'lr': 0.0004374628288091571, 'samples': 6680064, 'steps': 13046, 'loss/train': 2.7231671810150146} +02/24/2022 18:28:04 - INFO - codeparrot_training - Step 13047: {'lr': 0.0004374520028985344, 'samples': 6680576, 'steps': 13047, 'loss/train': 0.4303608536720276} +02/24/2022 18:28:07 - INFO - codeparrot_training - Step 13048: {'lr': 0.0004374411761849268, 'samples': 6681088, 'steps': 13048, 'loss/train': 1.8444992303848267} +02/24/2022 18:28:13 - INFO - codeparrot_training - Step 13049: {'lr': 0.0004374303486683807, 'samples': 6681600, 'steps': 13049, 'loss/train': 0.9657476544380188} +02/24/2022 18:28:17 - INFO - codeparrot_training - Step 13050: {'lr': 0.0004374195203489425, 'samples': 6682112, 'steps': 13050, 'loss/train': 2.81044864654541} +02/24/2022 18:28:23 - INFO - codeparrot_training - Step 13051: {'lr': 0.0004374086912266586, 'samples': 6682624, 'steps': 13051, 'loss/train': 3.1938674449920654} +02/24/2022 18:28:26 - INFO - codeparrot_training - Step 13052: {'lr': 0.0004373978613015753, 'samples': 6683136, 'steps': 13052, 'loss/train': 2.0069711208343506} +02/24/2022 18:28:32 - INFO - codeparrot_training - Step 13053: {'lr': 0.0004373870305737392, 'samples': 6683648, 'steps': 13053, 'loss/train': 2.423370122909546} +02/24/2022 18:28:35 - INFO - codeparrot_training - Step 13054: {'lr': 0.00043737619904319654, 'samples': 6684160, 'steps': 13054, 'loss/train': 1.4854636192321777} +02/24/2022 18:28:41 - INFO - codeparrot_training - Step 13055: {'lr': 0.0004373653667099937, 'samples': 6684672, 'steps': 13055, 'loss/train': 1.288827896118164} +02/24/2022 18:28:44 - INFO - codeparrot_training - Step 13056: {'lr': 0.00043735453357417707, 'samples': 6685184, 'steps': 13056, 'loss/train': 2.0451629161834717} +02/24/2022 18:28:50 - INFO - codeparrot_training - Step 13057: {'lr': 0.00043734369963579323, 'samples': 6685696, 'steps': 13057, 'loss/train': 1.8356995582580566} +02/24/2022 18:28:53 - INFO - codeparrot_training - Step 13058: {'lr': 0.0004373328648948884, 'samples': 6686208, 'steps': 13058, 'loss/train': 1.7394015789031982} +02/24/2022 18:28:59 - INFO - codeparrot_training - Step 13059: {'lr': 0.0004373220293515091, 'samples': 6686720, 'steps': 13059, 'loss/train': 1.4280154705047607} +02/24/2022 18:29:02 - INFO - codeparrot_training - Step 13060: {'lr': 0.00043731119300570166, 'samples': 6687232, 'steps': 13060, 'loss/train': 1.967885971069336} +02/24/2022 18:29:08 - INFO - codeparrot_training - Step 13061: {'lr': 0.0004373003558575126, 'samples': 6687744, 'steps': 13061, 'loss/train': 2.5358312129974365} +02/24/2022 18:29:12 - INFO - codeparrot_training - Step 13062: {'lr': 0.00043728951790698823, 'samples': 6688256, 'steps': 13062, 'loss/train': 2.411454439163208} +02/24/2022 18:29:17 - INFO - codeparrot_training - Step 13063: {'lr': 0.00043727867915417505, 'samples': 6688768, 'steps': 13063, 'loss/train': 1.7838704586029053} +02/24/2022 18:29:21 - INFO - codeparrot_training - Step 13064: {'lr': 0.00043726783959911953, 'samples': 6689280, 'steps': 13064, 'loss/train': 1.6130849123001099} +02/24/2022 18:29:27 - INFO - codeparrot_training - Step 13065: {'lr': 0.00043725699924186803, 'samples': 6689792, 'steps': 13065, 'loss/train': 0.8311630487442017} +02/24/2022 18:29:30 - INFO - codeparrot_training - Step 13066: {'lr': 0.00043724615808246695, 'samples': 6690304, 'steps': 13066, 'loss/train': 2.0785346031188965} +02/24/2022 18:29:35 - INFO - codeparrot_training - Step 13067: {'lr': 0.0004372353161209628, 'samples': 6690816, 'steps': 13067, 'loss/train': 0.2719285190105438} +02/24/2022 18:29:39 - INFO - codeparrot_training - Step 13068: {'lr': 0.000437224473357402, 'samples': 6691328, 'steps': 13068, 'loss/train': 1.5799168348312378} +02/24/2022 18:29:44 - INFO - codeparrot_training - Step 13069: {'lr': 0.0004372136297918311, 'samples': 6691840, 'steps': 13069, 'loss/train': 1.7601932287216187} +02/24/2022 18:29:50 - INFO - codeparrot_training - Step 13070: {'lr': 0.0004372027854242964, 'samples': 6692352, 'steps': 13070, 'loss/train': 1.5112253427505493} +02/24/2022 18:29:54 - INFO - codeparrot_training - Step 13071: {'lr': 0.0004371919402548444, 'samples': 6692864, 'steps': 13071, 'loss/train': 0.9533903002738953} +02/24/2022 18:30:00 - INFO - codeparrot_training - Step 13072: {'lr': 0.00043718109428352156, 'samples': 6693376, 'steps': 13072, 'loss/train': 2.0507044792175293} +02/24/2022 18:30:03 - INFO - codeparrot_training - Step 13073: {'lr': 0.00043717024751037436, 'samples': 6693888, 'steps': 13073, 'loss/train': 1.9658995866775513} +02/24/2022 18:30:09 - INFO - codeparrot_training - Step 13074: {'lr': 0.0004371593999354493, 'samples': 6694400, 'steps': 13074, 'loss/train': 1.318845272064209} +02/24/2022 18:30:12 - INFO - codeparrot_training - Step 13075: {'lr': 0.0004371485515587927, 'samples': 6694912, 'steps': 13075, 'loss/train': 1.7926405668258667} +02/24/2022 18:30:18 - INFO - codeparrot_training - Step 13076: {'lr': 0.0004371377023804512, 'samples': 6695424, 'steps': 13076, 'loss/train': 2.1227822303771973} +02/24/2022 18:30:21 - INFO - codeparrot_training - Step 13077: {'lr': 0.00043712685240047125, 'samples': 6695936, 'steps': 13077, 'loss/train': 2.1113078594207764} +02/24/2022 18:30:27 - INFO - codeparrot_training - Step 13078: {'lr': 0.00043711600161889917, 'samples': 6696448, 'steps': 13078, 'loss/train': 2.54327654838562} +02/24/2022 18:30:30 - INFO - codeparrot_training - Step 13079: {'lr': 0.0004371051500357816, 'samples': 6696960, 'steps': 13079, 'loss/train': 3.1392409801483154} +02/24/2022 18:30:36 - INFO - codeparrot_training - Step 13080: {'lr': 0.000437094297651165, 'samples': 6697472, 'steps': 13080, 'loss/train': 0.3492695689201355} +02/24/2022 18:30:40 - INFO - codeparrot_training - Step 13081: {'lr': 0.00043708344446509586, 'samples': 6697984, 'steps': 13081, 'loss/train': 2.4953410625457764} +02/24/2022 18:30:45 - INFO - codeparrot_training - Step 13082: {'lr': 0.0004370725904776206, 'samples': 6698496, 'steps': 13082, 'loss/train': 2.3644306659698486} +02/24/2022 18:30:49 - INFO - codeparrot_training - Step 13083: {'lr': 0.0004370617356887858, 'samples': 6699008, 'steps': 13083, 'loss/train': 2.571532726287842} +02/24/2022 18:30:54 - INFO - codeparrot_training - Step 13084: {'lr': 0.00043705088009863793, 'samples': 6699520, 'steps': 13084, 'loss/train': 2.2510273456573486} +02/24/2022 18:30:58 - INFO - codeparrot_training - Step 13085: {'lr': 0.0004370400237072234, 'samples': 6700032, 'steps': 13085, 'loss/train': 2.9907498359680176} +02/24/2022 18:31:03 - INFO - codeparrot_training - Step 13086: {'lr': 0.0004370291665145889, 'samples': 6700544, 'steps': 13086, 'loss/train': 1.3774452209472656} +02/24/2022 18:31:07 - INFO - codeparrot_training - Step 13087: {'lr': 0.00043701830852078076, 'samples': 6701056, 'steps': 13087, 'loss/train': 2.6954946517944336} +02/24/2022 18:31:12 - INFO - codeparrot_training - Step 13088: {'lr': 0.0004370074497258456, 'samples': 6701568, 'steps': 13088, 'loss/train': 1.8511327505111694} +02/24/2022 18:31:16 - INFO - codeparrot_training - Step 13089: {'lr': 0.00043699659012983, 'samples': 6702080, 'steps': 13089, 'loss/train': 2.4330554008483887} +02/24/2022 18:31:21 - INFO - codeparrot_training - Step 13090: {'lr': 0.00043698572973278026, 'samples': 6702592, 'steps': 13090, 'loss/train': 0.9355196952819824} +02/24/2022 18:31:25 - INFO - codeparrot_training - Step 13091: {'lr': 0.0004369748685347431, 'samples': 6703104, 'steps': 13091, 'loss/train': 2.0947518348693848} +02/24/2022 18:31:30 - INFO - codeparrot_training - Step 13092: {'lr': 0.00043696400653576496, 'samples': 6703616, 'steps': 13092, 'loss/train': 2.21494722366333} +02/24/2022 18:31:34 - INFO - codeparrot_training - Step 13093: {'lr': 0.00043695314373589234, 'samples': 6704128, 'steps': 13093, 'loss/train': 1.9236592054367065} +02/24/2022 18:31:39 - INFO - codeparrot_training - Step 13094: {'lr': 0.00043694228013517185, 'samples': 6704640, 'steps': 13094, 'loss/train': 1.3976666927337646} +02/24/2022 18:31:43 - INFO - codeparrot_training - Step 13095: {'lr': 0.00043693141573365003, 'samples': 6705152, 'steps': 13095, 'loss/train': 2.096853494644165} +02/24/2022 18:31:49 - INFO - codeparrot_training - Step 13096: {'lr': 0.0004369205505313733, 'samples': 6705664, 'steps': 13096, 'loss/train': 2.0255119800567627} +02/24/2022 18:31:52 - INFO - codeparrot_training - Step 13097: {'lr': 0.0004369096845283883, 'samples': 6706176, 'steps': 13097, 'loss/train': 1.203255534172058} +02/24/2022 18:31:58 - INFO - codeparrot_training - Step 13098: {'lr': 0.0004368988177247416, 'samples': 6706688, 'steps': 13098, 'loss/train': 1.4004253149032593} +02/24/2022 18:32:01 - INFO - codeparrot_training - Step 13099: {'lr': 0.00043688795012047975, 'samples': 6707200, 'steps': 13099, 'loss/train': 3.4179847240448} +02/24/2022 18:32:07 - INFO - codeparrot_training - Step 13100: {'lr': 0.00043687708171564923, 'samples': 6707712, 'steps': 13100, 'loss/train': 1.6716430187225342} +02/24/2022 18:32:11 - INFO - codeparrot_training - Step 13101: {'lr': 0.0004368662125102966, 'samples': 6708224, 'steps': 13101, 'loss/train': 0.7230225205421448} +02/24/2022 18:32:16 - INFO - codeparrot_training - Step 13102: {'lr': 0.00043685534250446846, 'samples': 6708736, 'steps': 13102, 'loss/train': 2.184340238571167} +02/24/2022 18:32:20 - INFO - codeparrot_training - Step 13103: {'lr': 0.0004368444716982114, 'samples': 6709248, 'steps': 13103, 'loss/train': 1.14542818069458} +02/24/2022 18:32:25 - INFO - codeparrot_training - Step 13104: {'lr': 0.0004368336000915719, 'samples': 6709760, 'steps': 13104, 'loss/train': 2.194945812225342} +02/24/2022 18:32:29 - INFO - codeparrot_training - Step 13105: {'lr': 0.0004368227276845966, 'samples': 6710272, 'steps': 13105, 'loss/train': 1.4404478073120117} +02/24/2022 18:32:35 - INFO - codeparrot_training - Step 13106: {'lr': 0.0004368118544773321, 'samples': 6710784, 'steps': 13106, 'loss/train': 2.1111202239990234} +02/24/2022 18:32:39 - INFO - codeparrot_training - Step 13107: {'lr': 0.00043680098046982495, 'samples': 6711296, 'steps': 13107, 'loss/train': 3.037461042404175} +02/24/2022 18:32:44 - INFO - codeparrot_training - Step 13108: {'lr': 0.00043679010566212163, 'samples': 6711808, 'steps': 13108, 'loss/train': 1.7464441061019897} +02/24/2022 18:32:48 - INFO - codeparrot_training - Step 13109: {'lr': 0.0004367792300542689, 'samples': 6712320, 'steps': 13109, 'loss/train': 1.7309730052947998} +02/24/2022 18:32:53 - INFO - codeparrot_training - Step 13110: {'lr': 0.00043676835364631316, 'samples': 6712832, 'steps': 13110, 'loss/train': 1.6087268590927124} +02/24/2022 18:32:57 - INFO - codeparrot_training - Step 13111: {'lr': 0.00043675747643830116, 'samples': 6713344, 'steps': 13111, 'loss/train': 3.231724977493286} +02/24/2022 18:33:02 - INFO - codeparrot_training - Step 13112: {'lr': 0.0004367465984302794, 'samples': 6713856, 'steps': 13112, 'loss/train': 1.6795626878738403} +02/24/2022 18:33:06 - INFO - codeparrot_training - Step 13113: {'lr': 0.0004367357196222946, 'samples': 6714368, 'steps': 13113, 'loss/train': 1.807559847831726} +02/24/2022 18:33:12 - INFO - codeparrot_training - Step 13114: {'lr': 0.00043672484001439316, 'samples': 6714880, 'steps': 13114, 'loss/train': 2.354203462600708} +02/24/2022 18:33:15 - INFO - codeparrot_training - Step 13115: {'lr': 0.00043671395960662184, 'samples': 6715392, 'steps': 13115, 'loss/train': 2.1525044441223145} +02/24/2022 18:33:21 - INFO - codeparrot_training - Step 13116: {'lr': 0.0004367030783990272, 'samples': 6715904, 'steps': 13116, 'loss/train': 1.8892061710357666} +02/24/2022 18:33:25 - INFO - codeparrot_training - Step 13117: {'lr': 0.0004366921963916559, 'samples': 6716416, 'steps': 13117, 'loss/train': 1.7278432846069336} +02/24/2022 18:33:30 - INFO - codeparrot_training - Step 13118: {'lr': 0.0004366813135845545, 'samples': 6716928, 'steps': 13118, 'loss/train': 3.2987256050109863} +02/24/2022 18:33:34 - INFO - codeparrot_training - Step 13119: {'lr': 0.00043667042997776965, 'samples': 6717440, 'steps': 13119, 'loss/train': 2.8281095027923584} +02/24/2022 18:33:39 - INFO - codeparrot_training - Step 13120: {'lr': 0.00043665954557134786, 'samples': 6717952, 'steps': 13120, 'loss/train': 2.366917133331299} +02/24/2022 18:33:43 - INFO - codeparrot_training - Step 13121: {'lr': 0.0004366486603653359, 'samples': 6718464, 'steps': 13121, 'loss/train': 1.9473650455474854} +02/24/2022 18:33:48 - INFO - codeparrot_training - Step 13122: {'lr': 0.00043663777435978037, 'samples': 6718976, 'steps': 13122, 'loss/train': 2.808049440383911} +02/24/2022 18:33:52 - INFO - codeparrot_training - Step 13123: {'lr': 0.0004366268875547278, 'samples': 6719488, 'steps': 13123, 'loss/train': 2.011204957962036} +02/24/2022 18:33:57 - INFO - codeparrot_training - Step 13124: {'lr': 0.000436615999950225, 'samples': 6720000, 'steps': 13124, 'loss/train': 2.8301994800567627} +02/24/2022 18:34:01 - INFO - codeparrot_training - Step 13125: {'lr': 0.0004366051115463184, 'samples': 6720512, 'steps': 13125, 'loss/train': 2.068774700164795} +02/24/2022 18:34:07 - INFO - codeparrot_training - Step 13126: {'lr': 0.0004365942223430549, 'samples': 6721024, 'steps': 13126, 'loss/train': 1.955896258354187} +02/24/2022 18:34:10 - INFO - codeparrot_training - Step 13127: {'lr': 0.0004365833323404809, 'samples': 6721536, 'steps': 13127, 'loss/train': 2.6178290843963623} +02/24/2022 18:34:16 - INFO - codeparrot_training - Step 13128: {'lr': 0.0004365724415386432, 'samples': 6722048, 'steps': 13128, 'loss/train': 1.4386388063430786} +02/24/2022 18:34:19 - INFO - codeparrot_training - Step 13129: {'lr': 0.0004365615499375884, 'samples': 6722560, 'steps': 13129, 'loss/train': 2.6756575107574463} +02/24/2022 18:34:25 - INFO - codeparrot_training - Step 13130: {'lr': 0.0004365506575373631, 'samples': 6723072, 'steps': 13130, 'loss/train': 1.0711939334869385} +02/24/2022 18:34:28 - INFO - codeparrot_training - Step 13131: {'lr': 0.0004365397643380141, 'samples': 6723584, 'steps': 13131, 'loss/train': 2.709085464477539} +02/24/2022 18:34:34 - INFO - codeparrot_training - Step 13132: {'lr': 0.000436528870339588, 'samples': 6724096, 'steps': 13132, 'loss/train': 1.7947182655334473} +02/24/2022 18:34:37 - INFO - codeparrot_training - Step 13133: {'lr': 0.0004365179755421314, 'samples': 6724608, 'steps': 13133, 'loss/train': 2.4967987537384033} +02/24/2022 18:34:43 - INFO - codeparrot_training - Step 13134: {'lr': 0.00043650707994569095, 'samples': 6725120, 'steps': 13134, 'loss/train': 1.885949730873108} +02/24/2022 18:34:46 - INFO - codeparrot_training - Step 13135: {'lr': 0.0004364961835503135, 'samples': 6725632, 'steps': 13135, 'loss/train': 2.3373868465423584} +02/24/2022 18:34:52 - INFO - codeparrot_training - Step 13136: {'lr': 0.00043648528635604556, 'samples': 6726144, 'steps': 13136, 'loss/train': 3.3740181922912598} +02/24/2022 18:34:55 - INFO - codeparrot_training - Step 13137: {'lr': 0.00043647438836293383, 'samples': 6726656, 'steps': 13137, 'loss/train': 1.6684520244598389} +02/24/2022 18:35:01 - INFO - codeparrot_training - Step 13138: {'lr': 0.0004364634895710251, 'samples': 6727168, 'steps': 13138, 'loss/train': 2.4966161251068115} +02/24/2022 18:35:05 - INFO - codeparrot_training - Step 13139: {'lr': 0.000436452589980366, 'samples': 6727680, 'steps': 13139, 'loss/train': 2.312422037124634} +02/24/2022 18:35:10 - INFO - codeparrot_training - Step 13140: {'lr': 0.00043644168959100315, 'samples': 6728192, 'steps': 13140, 'loss/train': 1.892502784729004} +02/24/2022 18:35:13 - INFO - codeparrot_training - Step 13141: {'lr': 0.0004364307884029834, 'samples': 6728704, 'steps': 13141, 'loss/train': 2.480815887451172} +02/24/2022 18:35:20 - INFO - codeparrot_training - Step 13142: {'lr': 0.0004364198864163533, 'samples': 6729216, 'steps': 13142, 'loss/train': 2.2424378395080566} +02/24/2022 18:35:23 - INFO - codeparrot_training - Step 13143: {'lr': 0.00043640898363115954, 'samples': 6729728, 'steps': 13143, 'loss/train': 2.198420286178589} +02/24/2022 18:35:29 - INFO - codeparrot_training - Step 13144: {'lr': 0.000436398080047449, 'samples': 6730240, 'steps': 13144, 'loss/train': 0.4985601305961609} +02/24/2022 18:35:32 - INFO - codeparrot_training - Step 13145: {'lr': 0.0004363871756652682, 'samples': 6730752, 'steps': 13145, 'loss/train': 2.4132590293884277} +02/24/2022 18:35:38 - INFO - codeparrot_training - Step 13146: {'lr': 0.00043637627048466395, 'samples': 6731264, 'steps': 13146, 'loss/train': 1.7860790491104126} +02/24/2022 18:35:41 - INFO - codeparrot_training - Step 13147: {'lr': 0.00043636536450568293, 'samples': 6731776, 'steps': 13147, 'loss/train': 1.7798588275909424} +02/24/2022 18:35:47 - INFO - codeparrot_training - Step 13148: {'lr': 0.0004363544577283718, 'samples': 6732288, 'steps': 13148, 'loss/train': 2.481811285018921} +02/24/2022 18:35:50 - INFO - codeparrot_training - Step 13149: {'lr': 0.00043634355015277745, 'samples': 6732800, 'steps': 13149, 'loss/train': 2.4156012535095215} +02/24/2022 18:35:56 - INFO - codeparrot_training - Step 13150: {'lr': 0.0004363326417789465, 'samples': 6733312, 'steps': 13150, 'loss/train': 2.1469268798828125} +02/24/2022 18:35:59 - INFO - codeparrot_training - Step 13151: {'lr': 0.0004363217326069256, 'samples': 6733824, 'steps': 13151, 'loss/train': 2.174891948699951} +02/24/2022 18:36:06 - INFO - codeparrot_training - Step 13152: {'lr': 0.0004363108226367616, 'samples': 6734336, 'steps': 13152, 'loss/train': 1.8401378393173218} +02/24/2022 18:36:09 - INFO - codeparrot_training - Step 13153: {'lr': 0.0004362999118685012, 'samples': 6734848, 'steps': 13153, 'loss/train': 2.1276886463165283} +02/24/2022 18:36:15 - INFO - codeparrot_training - Step 13154: {'lr': 0.0004362890003021911, 'samples': 6735360, 'steps': 13154, 'loss/train': 3.3623147010803223} +02/24/2022 18:36:18 - INFO - codeparrot_training - Step 13155: {'lr': 0.00043627808793787813, 'samples': 6735872, 'steps': 13155, 'loss/train': 1.6755790710449219} +02/24/2022 18:36:24 - INFO - codeparrot_training - Step 13156: {'lr': 0.00043626717477560897, 'samples': 6736384, 'steps': 13156, 'loss/train': 2.3421478271484375} +02/24/2022 18:36:27 - INFO - codeparrot_training - Step 13157: {'lr': 0.00043625626081543033, 'samples': 6736896, 'steps': 13157, 'loss/train': 0.820044219493866} +02/24/2022 18:36:33 - INFO - codeparrot_training - Step 13158: {'lr': 0.0004362453460573891, 'samples': 6737408, 'steps': 13158, 'loss/train': 1.2742438316345215} +02/24/2022 18:36:36 - INFO - codeparrot_training - Step 13159: {'lr': 0.0004362344305015319, 'samples': 6737920, 'steps': 13159, 'loss/train': 2.3827643394470215} +02/24/2022 18:36:42 - INFO - codeparrot_training - Step 13160: {'lr': 0.0004362235141479055, 'samples': 6738432, 'steps': 13160, 'loss/train': 1.021011471748352} +02/24/2022 18:36:45 - INFO - codeparrot_training - Step 13161: {'lr': 0.00043621259699655674, 'samples': 6738944, 'steps': 13161, 'loss/train': 1.639274001121521} +02/24/2022 18:36:52 - INFO - codeparrot_training - Step 13162: {'lr': 0.0004362016790475324, 'samples': 6739456, 'steps': 13162, 'loss/train': 1.8226795196533203} +02/24/2022 18:36:55 - INFO - codeparrot_training - Step 13163: {'lr': 0.0004361907603008791, 'samples': 6739968, 'steps': 13163, 'loss/train': 1.8377959728240967} +02/24/2022 18:37:01 - INFO - codeparrot_training - Step 13164: {'lr': 0.00043617984075664375, 'samples': 6740480, 'steps': 13164, 'loss/train': 2.4114608764648438} +02/24/2022 18:37:04 - INFO - codeparrot_training - Step 13165: {'lr': 0.000436168920414873, 'samples': 6740992, 'steps': 13165, 'loss/train': 5.044597625732422} +02/24/2022 18:37:10 - INFO - codeparrot_training - Step 13166: {'lr': 0.0004361579992756138, 'samples': 6741504, 'steps': 13166, 'loss/train': 2.1394155025482178} +02/24/2022 18:37:13 - INFO - codeparrot_training - Step 13167: {'lr': 0.00043614707733891285, 'samples': 6742016, 'steps': 13167, 'loss/train': 2.0595955848693848} +02/24/2022 18:37:19 - INFO - codeparrot_training - Step 13168: {'lr': 0.00043613615460481685, 'samples': 6742528, 'steps': 13168, 'loss/train': 1.9713562726974487} +02/24/2022 18:37:22 - INFO - codeparrot_training - Step 13169: {'lr': 0.0004361252310733728, 'samples': 6743040, 'steps': 13169, 'loss/train': 2.32989764213562} +02/24/2022 18:37:28 - INFO - codeparrot_training - Step 13170: {'lr': 0.0004361143067446273, 'samples': 6743552, 'steps': 13170, 'loss/train': 2.230102777481079} +02/24/2022 18:37:31 - INFO - codeparrot_training - Step 13171: {'lr': 0.00043610338161862713, 'samples': 6744064, 'steps': 13171, 'loss/train': 2.675751209259033} +02/24/2022 18:37:38 - INFO - codeparrot_training - Step 13172: {'lr': 0.00043609245569541924, 'samples': 6744576, 'steps': 13172, 'loss/train': 2.435805082321167} +02/24/2022 18:37:41 - INFO - codeparrot_training - Step 13173: {'lr': 0.0004360815289750503, 'samples': 6745088, 'steps': 13173, 'loss/train': 1.8633240461349487} +02/24/2022 18:37:47 - INFO - codeparrot_training - Step 13174: {'lr': 0.0004360706014575672, 'samples': 6745600, 'steps': 13174, 'loss/train': 2.5471673011779785} +02/24/2022 18:37:50 - INFO - codeparrot_training - Step 13175: {'lr': 0.00043605967314301673, 'samples': 6746112, 'steps': 13175, 'loss/train': 0.826678454875946} +02/24/2022 18:37:56 - INFO - codeparrot_training - Step 13176: {'lr': 0.0004360487440314458, 'samples': 6746624, 'steps': 13176, 'loss/train': 2.2414722442626953} +02/24/2022 18:38:00 - INFO - codeparrot_training - Step 13177: {'lr': 0.000436037814122901, 'samples': 6747136, 'steps': 13177, 'loss/train': 4.344521522521973} +02/24/2022 18:38:05 - INFO - codeparrot_training - Step 13178: {'lr': 0.0004360268834174294, 'samples': 6747648, 'steps': 13178, 'loss/train': 0.8745490312576294} +02/24/2022 18:38:09 - INFO - codeparrot_training - Step 13179: {'lr': 0.00043601595191507757, 'samples': 6748160, 'steps': 13179, 'loss/train': 1.3443816900253296} +02/24/2022 18:38:15 - INFO - codeparrot_training - Step 13180: {'lr': 0.0004360050196158925, 'samples': 6748672, 'steps': 13180, 'loss/train': 1.440228819847107} +02/24/2022 18:38:19 - INFO - codeparrot_training - Step 13181: {'lr': 0.000435994086519921, 'samples': 6749184, 'steps': 13181, 'loss/train': 2.425391435623169} +02/24/2022 18:38:22 - INFO - codeparrot_training - Step 13182: {'lr': 0.00043598315262720995, 'samples': 6749696, 'steps': 13182, 'loss/train': 3.68023681640625} +02/24/2022 18:38:28 - INFO - codeparrot_training - Step 13183: {'lr': 0.00043597221793780606, 'samples': 6750208, 'steps': 13183, 'loss/train': 1.579888939857483} +02/24/2022 18:38:31 - INFO - codeparrot_training - Step 13184: {'lr': 0.0004359612824517563, 'samples': 6750720, 'steps': 13184, 'loss/train': 1.7140872478485107} +02/24/2022 18:38:37 - INFO - codeparrot_training - Step 13185: {'lr': 0.0004359503461691074, 'samples': 6751232, 'steps': 13185, 'loss/train': 2.5259652137756348} +02/24/2022 18:38:40 - INFO - codeparrot_training - Step 13186: {'lr': 0.00043593940908990625, 'samples': 6751744, 'steps': 13186, 'loss/train': 1.8807111978530884} +02/24/2022 18:38:46 - INFO - codeparrot_training - Step 13187: {'lr': 0.00043592847121419974, 'samples': 6752256, 'steps': 13187, 'loss/train': 2.509537696838379} +02/24/2022 18:38:49 - INFO - codeparrot_training - Step 13188: {'lr': 0.00043591753254203474, 'samples': 6752768, 'steps': 13188, 'loss/train': 2.5340421199798584} +02/24/2022 18:38:56 - INFO - codeparrot_training - Step 13189: {'lr': 0.00043590659307345803, 'samples': 6753280, 'steps': 13189, 'loss/train': 2.152726888656616} +02/24/2022 18:39:00 - INFO - codeparrot_training - Step 13190: {'lr': 0.0004358956528085165, 'samples': 6753792, 'steps': 13190, 'loss/train': 2.254812479019165} +02/24/2022 18:39:05 - INFO - codeparrot_training - Step 13191: {'lr': 0.0004358847117472571, 'samples': 6754304, 'steps': 13191, 'loss/train': 2.7281293869018555} +02/24/2022 18:39:09 - INFO - codeparrot_training - Step 13192: {'lr': 0.00043587376988972655, 'samples': 6754816, 'steps': 13192, 'loss/train': 2.2432076930999756} +02/24/2022 18:39:14 - INFO - codeparrot_training - Step 13193: {'lr': 0.0004358628272359718, 'samples': 6755328, 'steps': 13193, 'loss/train': 4.2456955909729} +02/24/2022 18:39:18 - INFO - codeparrot_training - Step 13194: {'lr': 0.0004358518837860397, 'samples': 6755840, 'steps': 13194, 'loss/train': 2.813513994216919} +02/24/2022 18:39:23 - INFO - codeparrot_training - Step 13195: {'lr': 0.0004358409395399772, 'samples': 6756352, 'steps': 13195, 'loss/train': 1.5284693241119385} +02/24/2022 18:39:27 - INFO - codeparrot_training - Step 13196: {'lr': 0.00043582999449783103, 'samples': 6756864, 'steps': 13196, 'loss/train': 2.2048540115356445} +02/24/2022 18:39:32 - INFO - codeparrot_training - Step 13197: {'lr': 0.00043581904865964825, 'samples': 6757376, 'steps': 13197, 'loss/train': 2.481902837753296} +02/24/2022 18:39:36 - INFO - codeparrot_training - Step 13198: {'lr': 0.0004358081020254756, 'samples': 6757888, 'steps': 13198, 'loss/train': 2.173698902130127} +02/24/2022 18:39:42 - INFO - codeparrot_training - Step 13199: {'lr': 0.0004357971545953601, 'samples': 6758400, 'steps': 13199, 'loss/train': 2.6389975547790527} +02/24/2022 18:39:46 - INFO - codeparrot_training - Step 13200: {'lr': 0.00043578620636934855, 'samples': 6758912, 'steps': 13200, 'loss/train': 2.111168384552002} +02/24/2022 18:39:51 - INFO - codeparrot_training - Step 13201: {'lr': 0.0004357752573474879, 'samples': 6759424, 'steps': 13201, 'loss/train': 2.589674234390259} +02/24/2022 18:39:55 - INFO - codeparrot_training - Step 13202: {'lr': 0.0004357643075298251, 'samples': 6759936, 'steps': 13202, 'loss/train': 1.3990951776504517} +02/24/2022 18:40:00 - INFO - codeparrot_training - Step 13203: {'lr': 0.00043575335691640695, 'samples': 6760448, 'steps': 13203, 'loss/train': 1.8983817100524902} +02/24/2022 18:40:04 - INFO - codeparrot_training - Step 13204: {'lr': 0.0004357424055072804, 'samples': 6760960, 'steps': 13204, 'loss/train': 1.6345925331115723} +02/24/2022 18:40:09 - INFO - codeparrot_training - Step 13205: {'lr': 0.0004357314533024923, 'samples': 6761472, 'steps': 13205, 'loss/train': 2.351658582687378} +02/24/2022 18:40:13 - INFO - codeparrot_training - Step 13206: {'lr': 0.0004357205003020897, 'samples': 6761984, 'steps': 13206, 'loss/train': 1.238976001739502} +02/24/2022 18:40:18 - INFO - codeparrot_training - Step 13207: {'lr': 0.00043570954650611944, 'samples': 6762496, 'steps': 13207, 'loss/train': 2.736942768096924} +02/24/2022 18:40:22 - INFO - codeparrot_training - Step 13208: {'lr': 0.00043569859191462847, 'samples': 6763008, 'steps': 13208, 'loss/train': 1.632069706916809} +02/24/2022 18:40:28 - INFO - codeparrot_training - Step 13209: {'lr': 0.0004356876365276636, 'samples': 6763520, 'steps': 13209, 'loss/train': 0.4474453926086426} +02/24/2022 18:40:31 - INFO - codeparrot_training - Step 13210: {'lr': 0.00043567668034527195, 'samples': 6764032, 'steps': 13210, 'loss/train': 1.9706964492797852} +02/24/2022 18:40:37 - INFO - codeparrot_training - Step 13211: {'lr': 0.0004356657233675004, 'samples': 6764544, 'steps': 13211, 'loss/train': 1.9530082941055298} +02/24/2022 18:40:40 - INFO - codeparrot_training - Step 13212: {'lr': 0.00043565476559439577, 'samples': 6765056, 'steps': 13212, 'loss/train': 2.1657114028930664} +02/24/2022 18:40:46 - INFO - codeparrot_training - Step 13213: {'lr': 0.0004356438070260051, 'samples': 6765568, 'steps': 13213, 'loss/train': 2.159106492996216} +02/24/2022 18:40:49 - INFO - codeparrot_training - Step 13214: {'lr': 0.00043563284766237533, 'samples': 6766080, 'steps': 13214, 'loss/train': 4.27814245223999} +02/24/2022 18:40:55 - INFO - codeparrot_training - Step 13215: {'lr': 0.00043562188750355336, 'samples': 6766592, 'steps': 13215, 'loss/train': 2.5424985885620117} +02/24/2022 18:40:58 - INFO - codeparrot_training - Step 13216: {'lr': 0.0004356109265495861, 'samples': 6767104, 'steps': 13216, 'loss/train': 2.1559038162231445} +02/24/2022 18:41:04 - INFO - codeparrot_training - Step 13217: {'lr': 0.00043559996480052067, 'samples': 6767616, 'steps': 13217, 'loss/train': 1.9655873775482178} +02/24/2022 18:41:07 - INFO - codeparrot_training - Step 13218: {'lr': 0.0004355890022564039, 'samples': 6768128, 'steps': 13218, 'loss/train': 2.3823423385620117} +02/24/2022 18:41:13 - INFO - codeparrot_training - Step 13219: {'lr': 0.00043557803891728275, 'samples': 6768640, 'steps': 13219, 'loss/train': 1.8842852115631104} +02/24/2022 18:41:17 - INFO - codeparrot_training - Step 13220: {'lr': 0.00043556707478320425, 'samples': 6769152, 'steps': 13220, 'loss/train': 1.9322139024734497} +02/24/2022 18:41:22 - INFO - codeparrot_training - Step 13221: {'lr': 0.00043555610985421527, 'samples': 6769664, 'steps': 13221, 'loss/train': 2.273167610168457} +02/24/2022 18:41:26 - INFO - codeparrot_training - Step 13222: {'lr': 0.0004355451441303629, 'samples': 6770176, 'steps': 13222, 'loss/train': 2.4323787689208984} +02/24/2022 18:41:31 - INFO - codeparrot_training - Step 13223: {'lr': 0.000435534177611694, 'samples': 6770688, 'steps': 13223, 'loss/train': 1.3483549356460571} +02/24/2022 18:41:35 - INFO - codeparrot_training - Step 13224: {'lr': 0.0004355232102982556, 'samples': 6771200, 'steps': 13224, 'loss/train': 2.819002389907837} +02/24/2022 18:41:40 - INFO - codeparrot_training - Step 13225: {'lr': 0.00043551224219009473, 'samples': 6771712, 'steps': 13225, 'loss/train': 1.9850126504898071} +02/24/2022 18:41:44 - INFO - codeparrot_training - Step 13226: {'lr': 0.0004355012732872583, 'samples': 6772224, 'steps': 13226, 'loss/train': 1.8604122400283813} +02/24/2022 18:41:50 - INFO - codeparrot_training - Step 13227: {'lr': 0.00043549030358979324, 'samples': 6772736, 'steps': 13227, 'loss/train': 1.710444688796997} +02/24/2022 18:41:53 - INFO - codeparrot_training - Step 13228: {'lr': 0.0004354793330977467, 'samples': 6773248, 'steps': 13228, 'loss/train': 2.4048092365264893} +02/24/2022 18:41:59 - INFO - codeparrot_training - Step 13229: {'lr': 0.00043546836181116555, 'samples': 6773760, 'steps': 13229, 'loss/train': 2.7660040855407715} +02/24/2022 18:42:03 - INFO - codeparrot_training - Step 13230: {'lr': 0.0004354573897300969, 'samples': 6774272, 'steps': 13230, 'loss/train': 1.83676278591156} +02/24/2022 18:42:08 - INFO - codeparrot_training - Step 13231: {'lr': 0.0004354464168545876, 'samples': 6774784, 'steps': 13231, 'loss/train': 2.3647661209106445} +02/24/2022 18:42:12 - INFO - codeparrot_training - Step 13232: {'lr': 0.0004354354431846848, 'samples': 6775296, 'steps': 13232, 'loss/train': 0.3141121566295624} +02/24/2022 18:42:17 - INFO - codeparrot_training - Step 13233: {'lr': 0.0004354244687204354, 'samples': 6775808, 'steps': 13233, 'loss/train': 1.9678032398223877} +02/24/2022 18:42:21 - INFO - codeparrot_training - Step 13234: {'lr': 0.00043541349346188653, 'samples': 6776320, 'steps': 13234, 'loss/train': 1.8628443479537964} +02/24/2022 18:42:26 - INFO - codeparrot_training - Step 13235: {'lr': 0.000435402517409085, 'samples': 6776832, 'steps': 13235, 'loss/train': 1.5543805360794067} +02/24/2022 18:42:32 - INFO - codeparrot_training - Step 13236: {'lr': 0.0004353915405620781, 'samples': 6777344, 'steps': 13236, 'loss/train': 3.8039443492889404} +02/24/2022 18:42:35 - INFO - codeparrot_training - Step 13237: {'lr': 0.0004353805629209126, 'samples': 6777856, 'steps': 13237, 'loss/train': 2.3002946376800537} +02/24/2022 18:42:39 - INFO - codeparrot_training - Step 13238: {'lr': 0.0004353695844856357, 'samples': 6778368, 'steps': 13238, 'loss/train': 1.9879086017608643} +02/24/2022 18:42:44 - INFO - codeparrot_training - Step 13239: {'lr': 0.00043535860525629436, 'samples': 6778880, 'steps': 13239, 'loss/train': 1.1805089712142944} +02/24/2022 18:42:50 - INFO - codeparrot_training - Step 13240: {'lr': 0.00043534762523293557, 'samples': 6779392, 'steps': 13240, 'loss/train': 2.4244401454925537} +02/24/2022 18:42:53 - INFO - codeparrot_training - Step 13241: {'lr': 0.00043533664441560636, 'samples': 6779904, 'steps': 13241, 'loss/train': 1.9856505393981934} +02/24/2022 18:42:59 - INFO - codeparrot_training - Step 13242: {'lr': 0.0004353256628043539, 'samples': 6780416, 'steps': 13242, 'loss/train': 2.9495370388031006} +02/24/2022 18:43:02 - INFO - codeparrot_training - Step 13243: {'lr': 0.00043531468039922515, 'samples': 6780928, 'steps': 13243, 'loss/train': 2.4557976722717285} +02/24/2022 18:43:09 - INFO - codeparrot_training - Step 13244: {'lr': 0.0004353036972002671, 'samples': 6781440, 'steps': 13244, 'loss/train': 1.9739067554473877} +02/24/2022 18:43:12 - INFO - codeparrot_training - Step 13245: {'lr': 0.0004352927132075269, 'samples': 6781952, 'steps': 13245, 'loss/train': 0.5142982602119446} +02/24/2022 18:43:18 - INFO - codeparrot_training - Step 13246: {'lr': 0.00043528172842105154, 'samples': 6782464, 'steps': 13246, 'loss/train': 2.5212056636810303} +02/24/2022 18:43:21 - INFO - codeparrot_training - Step 13247: {'lr': 0.00043527074284088806, 'samples': 6782976, 'steps': 13247, 'loss/train': 1.782245397567749} +02/24/2022 18:43:27 - INFO - codeparrot_training - Step 13248: {'lr': 0.0004352597564670836, 'samples': 6783488, 'steps': 13248, 'loss/train': 1.6626681089401245} +02/24/2022 18:43:30 - INFO - codeparrot_training - Step 13249: {'lr': 0.00043524876929968516, 'samples': 6784000, 'steps': 13249, 'loss/train': 1.9425214529037476} +02/24/2022 18:43:36 - INFO - codeparrot_training - Step 13250: {'lr': 0.0004352377813387398, 'samples': 6784512, 'steps': 13250, 'loss/train': 0.9346581697463989} +02/24/2022 18:43:39 - INFO - codeparrot_training - Step 13251: {'lr': 0.0004352267925842946, 'samples': 6785024, 'steps': 13251, 'loss/train': 2.778203248977661} +02/24/2022 18:43:45 - INFO - codeparrot_training - Step 13252: {'lr': 0.00043521580303639663, 'samples': 6785536, 'steps': 13252, 'loss/train': 1.0520458221435547} +02/24/2022 18:43:48 - INFO - codeparrot_training - Step 13253: {'lr': 0.000435204812695093, 'samples': 6786048, 'steps': 13253, 'loss/train': 3.075870990753174} +02/24/2022 18:43:54 - INFO - codeparrot_training - Step 13254: {'lr': 0.00043519382156043075, 'samples': 6786560, 'steps': 13254, 'loss/train': 2.560676097869873} +02/24/2022 18:43:58 - INFO - codeparrot_training - Step 13255: {'lr': 0.0004351828296324569, 'samples': 6787072, 'steps': 13255, 'loss/train': 2.248198986053467} +02/24/2022 18:44:03 - INFO - codeparrot_training - Step 13256: {'lr': 0.00043517183691121875, 'samples': 6787584, 'steps': 13256, 'loss/train': 2.514848470687866} +02/24/2022 18:44:07 - INFO - codeparrot_training - Step 13257: {'lr': 0.00043516084339676316, 'samples': 6788096, 'steps': 13257, 'loss/train': 2.6968235969543457} +02/24/2022 18:44:12 - INFO - codeparrot_training - Step 13258: {'lr': 0.00043514984908913734, 'samples': 6788608, 'steps': 13258, 'loss/train': 2.6198337078094482} +02/24/2022 18:44:16 - INFO - codeparrot_training - Step 13259: {'lr': 0.0004351388539883883, 'samples': 6789120, 'steps': 13259, 'loss/train': 1.7377967834472656} +02/24/2022 18:44:21 - INFO - codeparrot_training - Step 13260: {'lr': 0.00043512785809456323, 'samples': 6789632, 'steps': 13260, 'loss/train': 1.7979042530059814} +02/24/2022 18:44:25 - INFO - codeparrot_training - Step 13261: {'lr': 0.00043511686140770925, 'samples': 6790144, 'steps': 13261, 'loss/train': 3.0527443885803223} +02/24/2022 18:44:30 - INFO - codeparrot_training - Step 13262: {'lr': 0.0004351058639278734, 'samples': 6790656, 'steps': 13262, 'loss/train': 0.6716596484184265} +02/24/2022 18:44:34 - INFO - codeparrot_training - Step 13263: {'lr': 0.0004350948656551028, 'samples': 6791168, 'steps': 13263, 'loss/train': 3.097402334213257} +02/24/2022 18:44:39 - INFO - codeparrot_training - Step 13264: {'lr': 0.0004350838665894445, 'samples': 6791680, 'steps': 13264, 'loss/train': 1.6201318502426147} +02/24/2022 18:44:43 - INFO - codeparrot_training - Step 13265: {'lr': 0.0004350728667309458, 'samples': 6792192, 'steps': 13265, 'loss/train': 2.168053388595581} +02/24/2022 18:44:49 - INFO - codeparrot_training - Step 13266: {'lr': 0.0004350618660796536, 'samples': 6792704, 'steps': 13266, 'loss/train': 1.9852432012557983} +02/24/2022 18:44:52 - INFO - codeparrot_training - Step 13267: {'lr': 0.0004350508646356152, 'samples': 6793216, 'steps': 13267, 'loss/train': 2.677222967147827} +02/24/2022 18:44:58 - INFO - codeparrot_training - Step 13268: {'lr': 0.00043503986239887765, 'samples': 6793728, 'steps': 13268, 'loss/train': 1.8020939826965332} +02/24/2022 18:45:01 - INFO - codeparrot_training - Step 13269: {'lr': 0.0004350288593694881, 'samples': 6794240, 'steps': 13269, 'loss/train': 2.4397799968719482} +02/24/2022 18:45:07 - INFO - codeparrot_training - Step 13270: {'lr': 0.00043501785554749363, 'samples': 6794752, 'steps': 13270, 'loss/train': 2.659473180770874} +02/24/2022 18:45:10 - INFO - codeparrot_training - Step 13271: {'lr': 0.00043500685093294145, 'samples': 6795264, 'steps': 13271, 'loss/train': 2.226707935333252} +02/24/2022 18:45:16 - INFO - codeparrot_training - Step 13272: {'lr': 0.0004349958455258786, 'samples': 6795776, 'steps': 13272, 'loss/train': 1.7915911674499512} +02/24/2022 18:45:19 - INFO - codeparrot_training - Step 13273: {'lr': 0.00043498483932635237, 'samples': 6796288, 'steps': 13273, 'loss/train': 1.2942596673965454} +02/24/2022 18:45:25 - INFO - codeparrot_training - Step 13274: {'lr': 0.0004349738323344098, 'samples': 6796800, 'steps': 13274, 'loss/train': 2.5414822101593018} +02/24/2022 18:45:28 - INFO - codeparrot_training - Step 13275: {'lr': 0.00043496282455009807, 'samples': 6797312, 'steps': 13275, 'loss/train': 2.5828800201416016} +02/24/2022 18:45:34 - INFO - codeparrot_training - Step 13276: {'lr': 0.00043495181597346435, 'samples': 6797824, 'steps': 13276, 'loss/train': 2.121838092803955} +02/24/2022 18:45:37 - INFO - codeparrot_training - Step 13277: {'lr': 0.0004349408066045557, 'samples': 6798336, 'steps': 13277, 'loss/train': 1.9473315477371216} +02/24/2022 18:45:43 - INFO - codeparrot_training - Step 13278: {'lr': 0.00043492979644341943, 'samples': 6798848, 'steps': 13278, 'loss/train': 2.4124510288238525} +02/24/2022 18:45:46 - INFO - codeparrot_training - Step 13279: {'lr': 0.0004349187854901026, 'samples': 6799360, 'steps': 13279, 'loss/train': 2.3382465839385986} +02/24/2022 18:45:53 - INFO - codeparrot_training - Step 13280: {'lr': 0.00043490777374465244, 'samples': 6799872, 'steps': 13280, 'loss/train': 1.5391281843185425} +02/24/2022 18:45:57 - INFO - codeparrot_training - Step 13281: {'lr': 0.0004348967612071161, 'samples': 6800384, 'steps': 13281, 'loss/train': 2.163550615310669} +02/24/2022 18:46:02 - INFO - codeparrot_training - Step 13282: {'lr': 0.0004348857478775407, 'samples': 6800896, 'steps': 13282, 'loss/train': 2.435809850692749} +02/24/2022 18:46:06 - INFO - codeparrot_training - Step 13283: {'lr': 0.00043487473375597354, 'samples': 6801408, 'steps': 13283, 'loss/train': 1.8940573930740356} +02/24/2022 18:46:11 - INFO - codeparrot_training - Step 13284: {'lr': 0.00043486371884246164, 'samples': 6801920, 'steps': 13284, 'loss/train': 1.351772665977478} +02/24/2022 18:46:15 - INFO - codeparrot_training - Step 13285: {'lr': 0.0004348527031370523, 'samples': 6802432, 'steps': 13285, 'loss/train': 2.4928598403930664} +02/24/2022 18:46:20 - INFO - codeparrot_training - Step 13286: {'lr': 0.00043484168663979265, 'samples': 6802944, 'steps': 13286, 'loss/train': 2.2198801040649414} +02/24/2022 18:46:24 - INFO - codeparrot_training - Step 13287: {'lr': 0.00043483066935073, 'samples': 6803456, 'steps': 13287, 'loss/train': 2.9466030597686768} +02/24/2022 18:46:29 - INFO - codeparrot_training - Step 13288: {'lr': 0.0004348196512699114, 'samples': 6803968, 'steps': 13288, 'loss/train': 2.033850908279419} +02/24/2022 18:46:33 - INFO - codeparrot_training - Step 13289: {'lr': 0.00043480863239738404, 'samples': 6804480, 'steps': 13289, 'loss/train': 3.5231668949127197} +02/24/2022 18:46:39 - INFO - codeparrot_training - Step 13290: {'lr': 0.0004347976127331953, 'samples': 6804992, 'steps': 13290, 'loss/train': 0.2825157344341278} +02/24/2022 18:46:43 - INFO - codeparrot_training - Step 13291: {'lr': 0.00043478659227739216, 'samples': 6805504, 'steps': 13291, 'loss/train': 2.493319034576416} +02/24/2022 18:46:48 - INFO - codeparrot_training - Step 13292: {'lr': 0.00043477557103002197, 'samples': 6806016, 'steps': 13292, 'loss/train': 2.368492603302002} +02/24/2022 18:46:52 - INFO - codeparrot_training - Step 13293: {'lr': 0.00043476454899113193, 'samples': 6806528, 'steps': 13293, 'loss/train': 2.5041770935058594} +02/24/2022 18:46:57 - INFO - codeparrot_training - Step 13294: {'lr': 0.00043475352616076927, 'samples': 6807040, 'steps': 13294, 'loss/train': 1.8983755111694336} +02/24/2022 18:47:01 - INFO - codeparrot_training - Step 13295: {'lr': 0.0004347425025389811, 'samples': 6807552, 'steps': 13295, 'loss/train': 2.371962308883667} +02/24/2022 18:47:07 - INFO - codeparrot_training - Step 13296: {'lr': 0.0004347314781258147, 'samples': 6808064, 'steps': 13296, 'loss/train': 1.1902852058410645} +02/24/2022 18:47:10 - INFO - codeparrot_training - Step 13297: {'lr': 0.00043472045292131735, 'samples': 6808576, 'steps': 13297, 'loss/train': 1.7911893129348755} +02/24/2022 18:47:16 - INFO - codeparrot_training - Step 13298: {'lr': 0.0004347094269255362, 'samples': 6809088, 'steps': 13298, 'loss/train': 2.4793660640716553} +02/24/2022 18:47:19 - INFO - codeparrot_training - Step 13299: {'lr': 0.0004346984001385186, 'samples': 6809600, 'steps': 13299, 'loss/train': 1.0510674715042114} +02/24/2022 18:47:25 - INFO - codeparrot_training - Step 13300: {'lr': 0.00043468737256031155, 'samples': 6810112, 'steps': 13300, 'loss/train': 1.7443426847457886} +02/24/2022 18:47:29 - INFO - codeparrot_training - Step 13301: {'lr': 0.00043467634419096257, 'samples': 6810624, 'steps': 13301, 'loss/train': 2.1588470935821533} +02/24/2022 18:47:34 - INFO - codeparrot_training - Step 13302: {'lr': 0.00043466531503051875, 'samples': 6811136, 'steps': 13302, 'loss/train': 2.269651174545288} +02/24/2022 18:47:38 - INFO - codeparrot_training - Step 13303: {'lr': 0.0004346542850790273, 'samples': 6811648, 'steps': 13303, 'loss/train': 0.8095269799232483} +02/24/2022 18:47:44 - INFO - codeparrot_training - Step 13304: {'lr': 0.00043464325433653563, 'samples': 6812160, 'steps': 13304, 'loss/train': 2.1540334224700928} +02/24/2022 18:47:47 - INFO - codeparrot_training - Step 13305: {'lr': 0.00043463222280309076, 'samples': 6812672, 'steps': 13305, 'loss/train': 2.9058022499084473} +02/24/2022 18:47:51 - INFO - codeparrot_training - Step 13306: {'lr': 0.00043462119047874015, 'samples': 6813184, 'steps': 13306, 'loss/train': 2.6206648349761963} +02/24/2022 18:47:56 - INFO - codeparrot_training - Step 13307: {'lr': 0.000434610157363531, 'samples': 6813696, 'steps': 13307, 'loss/train': 2.822443962097168} +02/24/2022 18:48:00 - INFO - codeparrot_training - Step 13308: {'lr': 0.0004345991234575105, 'samples': 6814208, 'steps': 13308, 'loss/train': 2.512821912765503} +02/24/2022 18:48:05 - INFO - codeparrot_training - Step 13309: {'lr': 0.00043458808876072595, 'samples': 6814720, 'steps': 13309, 'loss/train': 2.2585558891296387} +02/24/2022 18:48:09 - INFO - codeparrot_training - Step 13310: {'lr': 0.0004345770532732247, 'samples': 6815232, 'steps': 13310, 'loss/train': 2.420112133026123} +02/24/2022 18:48:15 - INFO - codeparrot_training - Step 13311: {'lr': 0.00043456601699505407, 'samples': 6815744, 'steps': 13311, 'loss/train': 0.8712084293365479} +02/24/2022 18:48:18 - INFO - codeparrot_training - Step 13312: {'lr': 0.00043455497992626104, 'samples': 6816256, 'steps': 13312, 'loss/train': 1.9645135402679443} +02/24/2022 18:48:24 - INFO - codeparrot_training - Step 13313: {'lr': 0.0004345439420668932, 'samples': 6816768, 'steps': 13313, 'loss/train': 1.6409363746643066} +02/24/2022 18:48:28 - INFO - codeparrot_training - Step 13314: {'lr': 0.0004345329034169977, 'samples': 6817280, 'steps': 13314, 'loss/train': 2.681473731994629} +02/24/2022 18:48:33 - INFO - codeparrot_training - Step 13315: {'lr': 0.00043452186397662174, 'samples': 6817792, 'steps': 13315, 'loss/train': 2.7246925830841064} +02/24/2022 18:48:37 - INFO - codeparrot_training - Step 13316: {'lr': 0.0004345108237458128, 'samples': 6818304, 'steps': 13316, 'loss/train': 1.6731756925582886} +02/24/2022 18:48:42 - INFO - codeparrot_training - Step 13317: {'lr': 0.00043449978272461806, 'samples': 6818816, 'steps': 13317, 'loss/train': 2.075159788131714} +02/24/2022 18:48:46 - INFO - codeparrot_training - Step 13318: {'lr': 0.0004344887409130848, 'samples': 6819328, 'steps': 13318, 'loss/train': 2.192598342895508} +02/24/2022 18:48:51 - INFO - codeparrot_training - Step 13319: {'lr': 0.0004344776983112604, 'samples': 6819840, 'steps': 13319, 'loss/train': 0.6538500189781189} +02/24/2022 18:48:55 - INFO - codeparrot_training - Step 13320: {'lr': 0.0004344666549191921, 'samples': 6820352, 'steps': 13320, 'loss/train': 2.219744920730591} +02/24/2022 18:49:00 - INFO - codeparrot_training - Step 13321: {'lr': 0.0004344556107369272, 'samples': 6820864, 'steps': 13321, 'loss/train': 2.6723968982696533} +02/24/2022 18:49:04 - INFO - codeparrot_training - Step 13322: {'lr': 0.00043444456576451307, 'samples': 6821376, 'steps': 13322, 'loss/train': 1.642527461051941} +02/24/2022 18:49:09 - INFO - codeparrot_training - Step 13323: {'lr': 0.000434433520001997, 'samples': 6821888, 'steps': 13323, 'loss/train': 2.5387818813323975} +02/24/2022 18:49:13 - INFO - codeparrot_training - Step 13324: {'lr': 0.0004344224734494263, 'samples': 6822400, 'steps': 13324, 'loss/train': 1.1320644617080688} +02/24/2022 18:49:18 - INFO - codeparrot_training - Step 13325: {'lr': 0.00043441142610684826, 'samples': 6822912, 'steps': 13325, 'loss/train': 0.46869421005249023} +02/24/2022 18:49:22 - INFO - codeparrot_training - Step 13326: {'lr': 0.0004344003779743102, 'samples': 6823424, 'steps': 13326, 'loss/train': 3.0613038539886475} +02/24/2022 18:49:27 - INFO - codeparrot_training - Step 13327: {'lr': 0.0004343893290518595, 'samples': 6823936, 'steps': 13327, 'loss/train': 2.912856340408325} +02/24/2022 18:49:31 - INFO - codeparrot_training - Step 13328: {'lr': 0.0004343782793395435, 'samples': 6824448, 'steps': 13328, 'loss/train': 1.0939773321151733} +02/24/2022 18:49:38 - INFO - codeparrot_training - Step 13329: {'lr': 0.00043436722883740943, 'samples': 6824960, 'steps': 13329, 'loss/train': 2.795685291290283} +02/24/2022 18:49:41 - INFO - codeparrot_training - Step 13330: {'lr': 0.0004343561775455047, 'samples': 6825472, 'steps': 13330, 'loss/train': 2.1160271167755127} +02/24/2022 18:49:47 - INFO - codeparrot_training - Step 13331: {'lr': 0.00043434512546387674, 'samples': 6825984, 'steps': 13331, 'loss/train': 2.545982599258423} +02/24/2022 18:49:50 - INFO - codeparrot_training - Step 13332: {'lr': 0.0004343340725925727, 'samples': 6826496, 'steps': 13332, 'loss/train': 2.631455898284912} +02/24/2022 18:49:56 - INFO - codeparrot_training - Step 13333: {'lr': 0.0004343230189316401, 'samples': 6827008, 'steps': 13333, 'loss/train': 1.516426682472229} +02/24/2022 18:49:59 - INFO - codeparrot_training - Step 13334: {'lr': 0.00043431196448112615, 'samples': 6827520, 'steps': 13334, 'loss/train': 2.9467992782592773} +02/24/2022 18:50:05 - INFO - codeparrot_training - Step 13335: {'lr': 0.0004343009092410783, 'samples': 6828032, 'steps': 13335, 'loss/train': 2.1306159496307373} +02/24/2022 18:50:10 - INFO - codeparrot_training - Step 13336: {'lr': 0.0004342898532115439, 'samples': 6828544, 'steps': 13336, 'loss/train': 1.820534348487854} +02/24/2022 18:50:14 - INFO - codeparrot_training - Step 13337: {'lr': 0.00043427879639257024, 'samples': 6829056, 'steps': 13337, 'loss/train': 1.6871719360351562} +02/24/2022 18:50:20 - INFO - codeparrot_training - Step 13338: {'lr': 0.0004342677387842048, 'samples': 6829568, 'steps': 13338, 'loss/train': 2.1549885272979736} +02/24/2022 18:50:24 - INFO - codeparrot_training - Step 13339: {'lr': 0.0004342566803864948, 'samples': 6830080, 'steps': 13339, 'loss/train': 1.6332987546920776} +02/24/2022 18:50:29 - INFO - codeparrot_training - Step 13340: {'lr': 0.0004342456211994877, 'samples': 6830592, 'steps': 13340, 'loss/train': 0.8337832689285278} +02/24/2022 18:50:33 - INFO - codeparrot_training - Step 13341: {'lr': 0.0004342345612232309, 'samples': 6831104, 'steps': 13341, 'loss/train': 1.2280763387680054} +02/24/2022 18:50:39 - INFO - codeparrot_training - Step 13342: {'lr': 0.0004342235004577717, 'samples': 6831616, 'steps': 13342, 'loss/train': 2.5863442420959473} +02/24/2022 18:50:42 - INFO - codeparrot_training - Step 13343: {'lr': 0.00043421243890315753, 'samples': 6832128, 'steps': 13343, 'loss/train': 0.7359707951545715} +02/24/2022 18:50:45 - INFO - codeparrot_training - Step 13344: {'lr': 0.0004342013765594358, 'samples': 6832640, 'steps': 13344, 'loss/train': 3.0660793781280518} +02/24/2022 18:50:52 - INFO - codeparrot_training - Step 13345: {'lr': 0.0004341903134266538, 'samples': 6833152, 'steps': 13345, 'loss/train': 2.3280622959136963} +02/24/2022 18:50:55 - INFO - codeparrot_training - Step 13346: {'lr': 0.0004341792495048591, 'samples': 6833664, 'steps': 13346, 'loss/train': 2.480565071105957} +02/24/2022 18:51:01 - INFO - codeparrot_training - Step 13347: {'lr': 0.00043416818479409894, 'samples': 6834176, 'steps': 13347, 'loss/train': 2.3103058338165283} +02/24/2022 18:51:04 - INFO - codeparrot_training - Step 13348: {'lr': 0.0004341571192944207, 'samples': 6834688, 'steps': 13348, 'loss/train': 0.11379817873239517} +02/24/2022 18:51:08 - INFO - codeparrot_training - Step 13349: {'lr': 0.00043414605300587183, 'samples': 6835200, 'steps': 13349, 'loss/train': 2.5075957775115967} +02/24/2022 18:51:14 - INFO - codeparrot_training - Step 13350: {'lr': 0.0004341349859284998, 'samples': 6835712, 'steps': 13350, 'loss/train': 2.269487142562866} +02/24/2022 18:51:17 - INFO - codeparrot_training - Step 13351: {'lr': 0.0004341239180623519, 'samples': 6836224, 'steps': 13351, 'loss/train': 1.6705011129379272} +02/24/2022 18:51:23 - INFO - codeparrot_training - Step 13352: {'lr': 0.0004341128494074756, 'samples': 6836736, 'steps': 13352, 'loss/train': 1.96543288230896} +02/24/2022 18:51:28 - INFO - codeparrot_training - Step 13353: {'lr': 0.00043410177996391837, 'samples': 6837248, 'steps': 13353, 'loss/train': 2.173870086669922} +02/24/2022 18:51:32 - INFO - codeparrot_training - Step 13354: {'lr': 0.00043409070973172753, 'samples': 6837760, 'steps': 13354, 'loss/train': 1.942000389099121} +02/24/2022 18:51:35 - INFO - codeparrot_training - Step 13355: {'lr': 0.0004340796387109506, 'samples': 6838272, 'steps': 13355, 'loss/train': 2.818225383758545} +02/24/2022 18:51:41 - INFO - codeparrot_training - Step 13356: {'lr': 0.00043406856690163487, 'samples': 6838784, 'steps': 13356, 'loss/train': 1.9128395318984985} +02/24/2022 18:51:48 - INFO - codeparrot_training - Step 13357: {'lr': 0.0004340574943038279, 'samples': 6839296, 'steps': 13357, 'loss/train': 1.6974326372146606} +02/24/2022 18:51:52 - INFO - codeparrot_training - Step 13358: {'lr': 0.00043404642091757705, 'samples': 6839808, 'steps': 13358, 'loss/train': 0.8889933228492737} +02/24/2022 18:51:57 - INFO - codeparrot_training - Step 13359: {'lr': 0.0004340353467429299, 'samples': 6840320, 'steps': 13359, 'loss/train': 2.474644899368286} +02/24/2022 18:52:01 - INFO - codeparrot_training - Step 13360: {'lr': 0.00043402427177993366, 'samples': 6840832, 'steps': 13360, 'loss/train': 2.1150295734405518} +02/24/2022 18:52:06 - INFO - codeparrot_training - Step 13361: {'lr': 0.00043401319602863584, 'samples': 6841344, 'steps': 13361, 'loss/train': 1.3615708351135254} +02/24/2022 18:52:10 - INFO - codeparrot_training - Step 13362: {'lr': 0.0004340021194890839, 'samples': 6841856, 'steps': 13362, 'loss/train': 1.4819518327713013} +02/24/2022 18:52:15 - INFO - codeparrot_training - Step 13363: {'lr': 0.0004339910421613253, 'samples': 6842368, 'steps': 13363, 'loss/train': 2.0121209621429443} +02/24/2022 18:52:19 - INFO - codeparrot_training - Step 13364: {'lr': 0.0004339799640454076, 'samples': 6842880, 'steps': 13364, 'loss/train': 1.5883493423461914} +02/24/2022 18:52:24 - INFO - codeparrot_training - Step 13365: {'lr': 0.0004339688851413781, 'samples': 6843392, 'steps': 13365, 'loss/train': 1.5042500495910645} +02/24/2022 18:52:28 - INFO - codeparrot_training - Step 13366: {'lr': 0.0004339578054492843, 'samples': 6843904, 'steps': 13366, 'loss/train': 2.0957536697387695} +02/24/2022 18:52:34 - INFO - codeparrot_training - Step 13367: {'lr': 0.0004339467249691737, 'samples': 6844416, 'steps': 13367, 'loss/train': 2.4097070693969727} +02/24/2022 18:52:37 - INFO - codeparrot_training - Step 13368: {'lr': 0.0004339356437010937, 'samples': 6844928, 'steps': 13368, 'loss/train': 2.522966146469116} +02/24/2022 18:52:43 - INFO - codeparrot_training - Step 13369: {'lr': 0.00043392456164509185, 'samples': 6845440, 'steps': 13369, 'loss/train': 2.0167455673217773} +02/24/2022 18:52:46 - INFO - codeparrot_training - Step 13370: {'lr': 0.00043391347880121554, 'samples': 6845952, 'steps': 13370, 'loss/train': 2.096789598464966} +02/24/2022 18:52:52 - INFO - codeparrot_training - Step 13371: {'lr': 0.00043390239516951235, 'samples': 6846464, 'steps': 13371, 'loss/train': 2.0977468490600586} +02/24/2022 18:52:56 - INFO - codeparrot_training - Step 13372: {'lr': 0.0004338913107500297, 'samples': 6846976, 'steps': 13372, 'loss/train': 3.343346357345581} +02/24/2022 18:53:01 - INFO - codeparrot_training - Step 13373: {'lr': 0.00043388022554281504, 'samples': 6847488, 'steps': 13373, 'loss/train': 2.03631854057312} +02/24/2022 18:53:05 - INFO - codeparrot_training - Step 13374: {'lr': 0.00043386913954791584, 'samples': 6848000, 'steps': 13374, 'loss/train': 2.520256519317627} +02/24/2022 18:53:10 - INFO - codeparrot_training - Step 13375: {'lr': 0.0004338580527653797, 'samples': 6848512, 'steps': 13375, 'loss/train': 2.7449727058410645} +02/24/2022 18:53:14 - INFO - codeparrot_training - Step 13376: {'lr': 0.000433846965195254, 'samples': 6849024, 'steps': 13376, 'loss/train': 1.5822027921676636} +02/24/2022 18:53:20 - INFO - codeparrot_training - Step 13377: {'lr': 0.0004338358768375863, 'samples': 6849536, 'steps': 13377, 'loss/train': 1.7537716627120972} +02/24/2022 18:53:23 - INFO - codeparrot_training - Step 13378: {'lr': 0.000433824787692424, 'samples': 6850048, 'steps': 13378, 'loss/train': 2.6050710678100586} +02/24/2022 18:53:29 - INFO - codeparrot_training - Step 13379: {'lr': 0.0004338136977598148, 'samples': 6850560, 'steps': 13379, 'loss/train': 2.313286542892456} +02/24/2022 18:53:32 - INFO - codeparrot_training - Step 13380: {'lr': 0.000433802607039806, 'samples': 6851072, 'steps': 13380, 'loss/train': 1.917705774307251} +02/24/2022 18:53:38 - INFO - codeparrot_training - Step 13381: {'lr': 0.00043379151553244523, 'samples': 6851584, 'steps': 13381, 'loss/train': 2.555523157119751} +02/24/2022 18:53:41 - INFO - codeparrot_training - Step 13382: {'lr': 0.00043378042323778, 'samples': 6852096, 'steps': 13382, 'loss/train': 1.6866884231567383} +02/24/2022 18:53:47 - INFO - codeparrot_training - Step 13383: {'lr': 0.00043376933015585776, 'samples': 6852608, 'steps': 13383, 'loss/train': 0.8979901671409607} +02/24/2022 18:53:50 - INFO - codeparrot_training - Step 13384: {'lr': 0.000433758236286726, 'samples': 6853120, 'steps': 13384, 'loss/train': 2.1274333000183105} +02/24/2022 18:53:56 - INFO - codeparrot_training - Step 13385: {'lr': 0.0004337471416304324, 'samples': 6853632, 'steps': 13385, 'loss/train': 2.098496913909912} +02/24/2022 18:53:59 - INFO - codeparrot_training - Step 13386: {'lr': 0.00043373604618702436, 'samples': 6854144, 'steps': 13386, 'loss/train': 1.6126383543014526} +02/24/2022 18:54:06 - INFO - codeparrot_training - Step 13387: {'lr': 0.00043372494995654943, 'samples': 6854656, 'steps': 13387, 'loss/train': 1.942667007446289} +02/24/2022 18:54:09 - INFO - codeparrot_training - Step 13388: {'lr': 0.00043371385293905517, 'samples': 6855168, 'steps': 13388, 'loss/train': 2.040477752685547} +02/24/2022 18:54:15 - INFO - codeparrot_training - Step 13389: {'lr': 0.0004337027551345891, 'samples': 6855680, 'steps': 13389, 'loss/train': 0.6316954493522644} +02/24/2022 18:54:18 - INFO - codeparrot_training - Step 13390: {'lr': 0.0004336916565431987, 'samples': 6856192, 'steps': 13390, 'loss/train': 1.7285830974578857} +02/24/2022 18:54:24 - INFO - codeparrot_training - Step 13391: {'lr': 0.0004336805571649316, 'samples': 6856704, 'steps': 13391, 'loss/train': 0.21150599420070648} +02/24/2022 18:54:27 - INFO - codeparrot_training - Step 13392: {'lr': 0.0004336694569998354, 'samples': 6857216, 'steps': 13392, 'loss/train': 1.7394059896469116} +02/24/2022 18:54:33 - INFO - codeparrot_training - Step 13393: {'lr': 0.00043365835604795746, 'samples': 6857728, 'steps': 13393, 'loss/train': 2.860260248184204} +02/24/2022 18:54:36 - INFO - codeparrot_training - Step 13394: {'lr': 0.0004336472543093455, 'samples': 6858240, 'steps': 13394, 'loss/train': 2.5539393424987793} +02/24/2022 18:54:42 - INFO - codeparrot_training - Step 13395: {'lr': 0.000433636151784047, 'samples': 6858752, 'steps': 13395, 'loss/train': 1.6724191904067993} +02/24/2022 18:54:45 - INFO - codeparrot_training - Step 13396: {'lr': 0.00043362504847210956, 'samples': 6859264, 'steps': 13396, 'loss/train': 1.963356614112854} +02/24/2022 18:54:51 - INFO - codeparrot_training - Step 13397: {'lr': 0.0004336139443735807, 'samples': 6859776, 'steps': 13397, 'loss/train': 2.2137465476989746} +02/24/2022 18:54:54 - INFO - codeparrot_training - Step 13398: {'lr': 0.000433602839488508, 'samples': 6860288, 'steps': 13398, 'loss/train': 2.2427098751068115} +02/24/2022 18:55:00 - INFO - codeparrot_training - Step 13399: {'lr': 0.00043359173381693906, 'samples': 6860800, 'steps': 13399, 'loss/train': 1.8561426401138306} +02/24/2022 18:55:03 - INFO - codeparrot_training - Step 13400: {'lr': 0.0004335806273589214, 'samples': 6861312, 'steps': 13400, 'loss/train': 0.9804264903068542} +02/24/2022 18:55:09 - INFO - codeparrot_training - Step 13401: {'lr': 0.00043356952011450265, 'samples': 6861824, 'steps': 13401, 'loss/train': 2.3413875102996826} +02/24/2022 18:55:13 - INFO - codeparrot_training - Step 13402: {'lr': 0.0004335584120837304, 'samples': 6862336, 'steps': 13402, 'loss/train': 1.9771844148635864} +02/24/2022 18:55:19 - INFO - codeparrot_training - Step 13403: {'lr': 0.0004335473032666521, 'samples': 6862848, 'steps': 13403, 'loss/train': 2.8203139305114746} +02/24/2022 18:55:22 - INFO - codeparrot_training - Step 13404: {'lr': 0.00043353619366331546, 'samples': 6863360, 'steps': 13404, 'loss/train': 1.2091772556304932} +02/24/2022 18:55:28 - INFO - codeparrot_training - Step 13405: {'lr': 0.0004335250832737681, 'samples': 6863872, 'steps': 13405, 'loss/train': 1.1968530416488647} +02/24/2022 18:55:31 - INFO - codeparrot_training - Step 13406: {'lr': 0.00043351397209805755, 'samples': 6864384, 'steps': 13406, 'loss/train': 1.3702644109725952} +02/24/2022 18:55:37 - INFO - codeparrot_training - Step 13407: {'lr': 0.0004335028601362314, 'samples': 6864896, 'steps': 13407, 'loss/train': 1.7492936849594116} +02/24/2022 18:55:40 - INFO - codeparrot_training - Step 13408: {'lr': 0.0004334917473883373, 'samples': 6865408, 'steps': 13408, 'loss/train': 2.7847790718078613} +02/24/2022 18:55:46 - INFO - codeparrot_training - Step 13409: {'lr': 0.0004334806338544227, 'samples': 6865920, 'steps': 13409, 'loss/train': 2.966379165649414} +02/24/2022 18:55:49 - INFO - codeparrot_training - Step 13410: {'lr': 0.0004334695195345355, 'samples': 6866432, 'steps': 13410, 'loss/train': 1.479927659034729} +02/24/2022 18:55:55 - INFO - codeparrot_training - Step 13411: {'lr': 0.000433458404428723, 'samples': 6866944, 'steps': 13411, 'loss/train': 2.035550355911255} +02/24/2022 18:55:58 - INFO - codeparrot_training - Step 13412: {'lr': 0.00043344728853703297, 'samples': 6867456, 'steps': 13412, 'loss/train': 2.2750933170318604} +02/24/2022 18:56:04 - INFO - codeparrot_training - Step 13413: {'lr': 0.00043343617185951305, 'samples': 6867968, 'steps': 13413, 'loss/train': 2.1236705780029297} +02/24/2022 18:56:08 - INFO - codeparrot_training - Step 13414: {'lr': 0.0004334250543962108, 'samples': 6868480, 'steps': 13414, 'loss/train': 2.128988027572632} +02/24/2022 18:56:13 - INFO - codeparrot_training - Step 13415: {'lr': 0.00043341393614717384, 'samples': 6868992, 'steps': 13415, 'loss/train': 1.898743987083435} +02/24/2022 18:56:17 - INFO - codeparrot_training - Step 13416: {'lr': 0.0004334028171124499, 'samples': 6869504, 'steps': 13416, 'loss/train': 2.1568007469177246} +02/24/2022 18:56:22 - INFO - codeparrot_training - Step 13417: {'lr': 0.0004333916972920864, 'samples': 6870016, 'steps': 13417, 'loss/train': 2.27823543548584} +02/24/2022 18:56:26 - INFO - codeparrot_training - Step 13418: {'lr': 0.00043338057668613117, 'samples': 6870528, 'steps': 13418, 'loss/train': 1.9554824829101562} +02/24/2022 18:56:31 - INFO - codeparrot_training - Step 13419: {'lr': 0.00043336945529463177, 'samples': 6871040, 'steps': 13419, 'loss/train': 1.2947747707366943} +02/24/2022 18:56:35 - INFO - codeparrot_training - Step 13420: {'lr': 0.00043335833311763597, 'samples': 6871552, 'steps': 13420, 'loss/train': 1.5561705827713013} +02/24/2022 18:56:40 - INFO - codeparrot_training - Step 13421: {'lr': 0.00043334721015519115, 'samples': 6872064, 'steps': 13421, 'loss/train': 2.5657753944396973} +02/24/2022 18:56:44 - INFO - codeparrot_training - Step 13422: {'lr': 0.00043333608640734513, 'samples': 6872576, 'steps': 13422, 'loss/train': 2.1511964797973633} +02/24/2022 18:56:51 - INFO - codeparrot_training - Step 13423: {'lr': 0.0004333249618741455, 'samples': 6873088, 'steps': 13423, 'loss/train': 1.474923014640808} +02/24/2022 18:56:54 - INFO - codeparrot_training - Step 13424: {'lr': 0.00043331383655564003, 'samples': 6873600, 'steps': 13424, 'loss/train': 1.8163540363311768} +02/24/2022 18:56:59 - INFO - codeparrot_training - Step 13425: {'lr': 0.0004333027104518762, 'samples': 6874112, 'steps': 13425, 'loss/train': 2.0045571327209473} +02/24/2022 18:57:03 - INFO - codeparrot_training - Step 13426: {'lr': 0.00043329158356290187, 'samples': 6874624, 'steps': 13426, 'loss/train': 1.8382295370101929} +02/24/2022 18:57:08 - INFO - codeparrot_training - Step 13427: {'lr': 0.00043328045588876454, 'samples': 6875136, 'steps': 13427, 'loss/train': 1.6040394306182861} +02/24/2022 18:57:12 - INFO - codeparrot_training - Step 13428: {'lr': 0.0004332693274295119, 'samples': 6875648, 'steps': 13428, 'loss/train': 1.1823590993881226} +02/24/2022 18:57:17 - INFO - codeparrot_training - Step 13429: {'lr': 0.0004332581981851917, 'samples': 6876160, 'steps': 13429, 'loss/train': 1.4518547058105469} +02/24/2022 18:57:21 - INFO - codeparrot_training - Step 13430: {'lr': 0.00043324706815585156, 'samples': 6876672, 'steps': 13430, 'loss/train': 1.9056583642959595} +02/24/2022 18:57:26 - INFO - codeparrot_training - Step 13431: {'lr': 0.00043323593734153915, 'samples': 6877184, 'steps': 13431, 'loss/train': 1.6445872783660889} +02/24/2022 18:57:30 - INFO - codeparrot_training - Step 13432: {'lr': 0.00043322480574230215, 'samples': 6877696, 'steps': 13432, 'loss/train': 1.6481211185455322} +02/24/2022 18:57:35 - INFO - codeparrot_training - Step 13433: {'lr': 0.00043321367335818833, 'samples': 6878208, 'steps': 13433, 'loss/train': 2.1485066413879395} +02/24/2022 18:57:39 - INFO - codeparrot_training - Step 13434: {'lr': 0.0004332025401892453, 'samples': 6878720, 'steps': 13434, 'loss/train': 2.0059142112731934} +02/24/2022 18:57:45 - INFO - codeparrot_training - Step 13435: {'lr': 0.00043319140623552073, 'samples': 6879232, 'steps': 13435, 'loss/train': 2.371858835220337} +02/24/2022 18:57:49 - INFO - codeparrot_training - Step 13436: {'lr': 0.0004331802714970624, 'samples': 6879744, 'steps': 13436, 'loss/train': 3.014699697494507} +02/24/2022 18:57:54 - INFO - codeparrot_training - Step 13437: {'lr': 0.00043316913597391785, 'samples': 6880256, 'steps': 13437, 'loss/train': 2.0820155143737793} +02/24/2022 18:57:58 - INFO - codeparrot_training - Step 13438: {'lr': 0.00043315799966613496, 'samples': 6880768, 'steps': 13438, 'loss/train': 1.8914895057678223} +02/24/2022 18:58:03 - INFO - codeparrot_training - Step 13439: {'lr': 0.00043314686257376136, 'samples': 6881280, 'steps': 13439, 'loss/train': 1.5246726274490356} +02/24/2022 18:58:07 - INFO - codeparrot_training - Step 13440: {'lr': 0.0004331357246968447, 'samples': 6881792, 'steps': 13440, 'loss/train': 0.6227343082427979} +02/24/2022 18:58:12 - INFO - codeparrot_training - Step 13441: {'lr': 0.0004331245860354328, 'samples': 6882304, 'steps': 13441, 'loss/train': 2.1620190143585205} +02/24/2022 18:58:16 - INFO - codeparrot_training - Step 13442: {'lr': 0.0004331134465895733, 'samples': 6882816, 'steps': 13442, 'loss/train': 0.47370097041130066} +02/24/2022 18:58:21 - INFO - codeparrot_training - Step 13443: {'lr': 0.00043310230635931394, 'samples': 6883328, 'steps': 13443, 'loss/train': 1.625957727432251} +02/24/2022 18:58:25 - INFO - codeparrot_training - Step 13444: {'lr': 0.0004330911653447024, 'samples': 6883840, 'steps': 13444, 'loss/train': 1.1147783994674683} +02/24/2022 18:58:30 - INFO - codeparrot_training - Step 13445: {'lr': 0.0004330800235457866, 'samples': 6884352, 'steps': 13445, 'loss/train': 2.186103582382202} +02/24/2022 18:58:34 - INFO - codeparrot_training - Step 13446: {'lr': 0.00043306888096261394, 'samples': 6884864, 'steps': 13446, 'loss/train': 3.074098825454712} +02/24/2022 18:58:39 - INFO - codeparrot_training - Step 13447: {'lr': 0.0004330577375952324, 'samples': 6885376, 'steps': 13447, 'loss/train': 1.6368879079818726} +02/24/2022 18:58:43 - INFO - codeparrot_training - Step 13448: {'lr': 0.0004330465934436896, 'samples': 6885888, 'steps': 13448, 'loss/train': 0.1662389039993286} +02/24/2022 18:58:50 - INFO - codeparrot_training - Step 13449: {'lr': 0.0004330354485080334, 'samples': 6886400, 'steps': 13449, 'loss/train': 0.9146532416343689} +02/24/2022 18:58:53 - INFO - codeparrot_training - Step 13450: {'lr': 0.0004330243027883114, 'samples': 6886912, 'steps': 13450, 'loss/train': 2.3914132118225098} +02/24/2022 18:58:58 - INFO - codeparrot_training - Step 13451: {'lr': 0.0004330131562845714, 'samples': 6887424, 'steps': 13451, 'loss/train': 2.218575954437256} +02/24/2022 18:59:02 - INFO - codeparrot_training - Step 13452: {'lr': 0.00043300200899686113, 'samples': 6887936, 'steps': 13452, 'loss/train': 1.9424468278884888} +02/24/2022 18:59:07 - INFO - codeparrot_training - Step 13453: {'lr': 0.0004329908609252284, 'samples': 6888448, 'steps': 13453, 'loss/train': 2.5715761184692383} +02/24/2022 18:59:11 - INFO - codeparrot_training - Step 13454: {'lr': 0.00043297971206972095, 'samples': 6888960, 'steps': 13454, 'loss/train': 1.081486463546753} +02/24/2022 18:59:17 - INFO - codeparrot_training - Step 13455: {'lr': 0.0004329685624303865, 'samples': 6889472, 'steps': 13455, 'loss/train': 2.1514930725097656} +02/24/2022 18:59:20 - INFO - codeparrot_training - Step 13456: {'lr': 0.0004329574120072728, 'samples': 6889984, 'steps': 13456, 'loss/train': 2.117000102996826} +02/24/2022 18:59:26 - INFO - codeparrot_training - Step 13457: {'lr': 0.00043294626080042767, 'samples': 6890496, 'steps': 13457, 'loss/train': 2.0635335445404053} +02/24/2022 18:59:29 - INFO - codeparrot_training - Step 13458: {'lr': 0.0004329351088098988, 'samples': 6891008, 'steps': 13458, 'loss/train': 1.7340364456176758} +02/24/2022 18:59:36 - INFO - codeparrot_training - Step 13459: {'lr': 0.0004329239560357341, 'samples': 6891520, 'steps': 13459, 'loss/train': 1.3921512365341187} +02/24/2022 18:59:39 - INFO - codeparrot_training - Step 13460: {'lr': 0.0004329128024779812, 'samples': 6892032, 'steps': 13460, 'loss/train': 1.8784161806106567} +02/24/2022 18:59:45 - INFO - codeparrot_training - Step 13461: {'lr': 0.00043290164813668795, 'samples': 6892544, 'steps': 13461, 'loss/train': 2.0356743335723877} +02/24/2022 18:59:48 - INFO - codeparrot_training - Step 13462: {'lr': 0.0004328904930119021, 'samples': 6893056, 'steps': 13462, 'loss/train': 2.024158477783203} +02/24/2022 18:59:54 - INFO - codeparrot_training - Step 13463: {'lr': 0.0004328793371036714, 'samples': 6893568, 'steps': 13463, 'loss/train': 1.0979338884353638} +02/24/2022 18:59:57 - INFO - codeparrot_training - Step 13464: {'lr': 0.0004328681804120438, 'samples': 6894080, 'steps': 13464, 'loss/train': 1.8582379817962646} +02/24/2022 19:00:03 - INFO - codeparrot_training - Step 13465: {'lr': 0.000432857022937067, 'samples': 6894592, 'steps': 13465, 'loss/train': 1.650192379951477} +02/24/2022 19:00:06 - INFO - codeparrot_training - Step 13466: {'lr': 0.00043284586467878865, 'samples': 6895104, 'steps': 13466, 'loss/train': 2.0719406604766846} +02/24/2022 19:00:12 - INFO - codeparrot_training - Step 13467: {'lr': 0.0004328347056372568, 'samples': 6895616, 'steps': 13467, 'loss/train': 2.1869418621063232} +02/24/2022 19:00:15 - INFO - codeparrot_training - Step 13468: {'lr': 0.00043282354581251903, 'samples': 6896128, 'steps': 13468, 'loss/train': 1.8174875974655151} +02/24/2022 19:00:22 - INFO - codeparrot_training - Step 13469: {'lr': 0.0004328123852046233, 'samples': 6896640, 'steps': 13469, 'loss/train': 1.8327851295471191} +02/24/2022 19:00:25 - INFO - codeparrot_training - Step 13470: {'lr': 0.0004328012238136173, 'samples': 6897152, 'steps': 13470, 'loss/train': 2.7620625495910645} +02/24/2022 19:00:31 - INFO - codeparrot_training - Step 13471: {'lr': 0.000432790061639549, 'samples': 6897664, 'steps': 13471, 'loss/train': 1.7467918395996094} +02/24/2022 19:00:34 - INFO - codeparrot_training - Step 13472: {'lr': 0.00043277889868246605, 'samples': 6898176, 'steps': 13472, 'loss/train': 3.432149887084961} +02/24/2022 19:00:40 - INFO - codeparrot_training - Step 13473: {'lr': 0.0004327677349424164, 'samples': 6898688, 'steps': 13473, 'loss/train': 1.9549671411514282} +02/24/2022 19:00:43 - INFO - codeparrot_training - Step 13474: {'lr': 0.0004327565704194477, 'samples': 6899200, 'steps': 13474, 'loss/train': 1.76982843875885} +02/24/2022 19:00:49 - INFO - codeparrot_training - Step 13475: {'lr': 0.0004327454051136079, 'samples': 6899712, 'steps': 13475, 'loss/train': 2.368537425994873} +02/24/2022 19:00:53 - INFO - codeparrot_training - Step 13476: {'lr': 0.0004327342390249449, 'samples': 6900224, 'steps': 13476, 'loss/train': 2.650076389312744} +02/24/2022 19:00:58 - INFO - codeparrot_training - Step 13477: {'lr': 0.00043272307215350635, 'samples': 6900736, 'steps': 13477, 'loss/train': 0.5039814114570618} +02/24/2022 19:01:02 - INFO - codeparrot_training - Step 13478: {'lr': 0.0004327119044993403, 'samples': 6901248, 'steps': 13478, 'loss/train': 2.7400102615356445} +02/24/2022 19:01:07 - INFO - codeparrot_training - Step 13479: {'lr': 0.0004327007360624944, 'samples': 6901760, 'steps': 13479, 'loss/train': 2.3195197582244873} +02/24/2022 19:01:10 - INFO - codeparrot_training - Step 13480: {'lr': 0.0004326895668430165, 'samples': 6902272, 'steps': 13480, 'loss/train': 3.7669460773468018} +02/24/2022 19:01:17 - INFO - codeparrot_training - Step 13481: {'lr': 0.0004326783968409546, 'samples': 6902784, 'steps': 13481, 'loss/train': 2.371272087097168} +02/24/2022 19:01:20 - INFO - codeparrot_training - Step 13482: {'lr': 0.00043266722605635644, 'samples': 6903296, 'steps': 13482, 'loss/train': 2.170497417449951} +02/24/2022 19:01:26 - INFO - codeparrot_training - Step 13483: {'lr': 0.0004326560544892699, 'samples': 6903808, 'steps': 13483, 'loss/train': 1.9850348234176636} +02/24/2022 19:01:29 - INFO - codeparrot_training - Step 13484: {'lr': 0.00043264488213974275, 'samples': 6904320, 'steps': 13484, 'loss/train': 1.9562952518463135} +02/24/2022 19:01:35 - INFO - codeparrot_training - Step 13485: {'lr': 0.00043263370900782297, 'samples': 6904832, 'steps': 13485, 'loss/train': 2.2027151584625244} +02/24/2022 19:01:38 - INFO - codeparrot_training - Step 13486: {'lr': 0.0004326225350935583, 'samples': 6905344, 'steps': 13486, 'loss/train': 1.579418659210205} +02/24/2022 19:01:44 - INFO - codeparrot_training - Step 13487: {'lr': 0.00043261136039699676, 'samples': 6905856, 'steps': 13487, 'loss/train': 1.213860273361206} +02/24/2022 19:01:47 - INFO - codeparrot_training - Step 13488: {'lr': 0.0004326001849181862, 'samples': 6906368, 'steps': 13488, 'loss/train': 2.603863000869751} +02/24/2022 19:01:53 - INFO - codeparrot_training - Step 13489: {'lr': 0.0004325890086571743, 'samples': 6906880, 'steps': 13489, 'loss/train': 1.5113205909729004} +02/24/2022 19:01:56 - INFO - codeparrot_training - Step 13490: {'lr': 0.00043257783161400917, 'samples': 6907392, 'steps': 13490, 'loss/train': 2.793163776397705} +02/24/2022 19:02:02 - INFO - codeparrot_training - Step 13491: {'lr': 0.0004325666537887385, 'samples': 6907904, 'steps': 13491, 'loss/train': 0.5322578549385071} +02/24/2022 19:02:05 - INFO - codeparrot_training - Step 13492: {'lr': 0.00043255547518141033, 'samples': 6908416, 'steps': 13492, 'loss/train': 2.004286050796509} +02/24/2022 19:02:11 - INFO - codeparrot_training - Step 13493: {'lr': 0.0004325442957920724, 'samples': 6908928, 'steps': 13493, 'loss/train': 0.12340640276670456} +02/24/2022 19:02:14 - INFO - codeparrot_training - Step 13494: {'lr': 0.0004325331156207727, 'samples': 6909440, 'steps': 13494, 'loss/train': 0.9747650623321533} +02/24/2022 19:02:21 - INFO - codeparrot_training - Step 13495: {'lr': 0.00043252193466755906, 'samples': 6909952, 'steps': 13495, 'loss/train': 1.4896043539047241} +02/24/2022 19:02:24 - INFO - codeparrot_training - Step 13496: {'lr': 0.0004325107529324795, 'samples': 6910464, 'steps': 13496, 'loss/train': 2.659832715988159} +02/24/2022 19:02:30 - INFO - codeparrot_training - Step 13497: {'lr': 0.0004324995704155817, 'samples': 6910976, 'steps': 13497, 'loss/train': 1.9089082479476929} +02/24/2022 19:02:33 - INFO - codeparrot_training - Step 13498: {'lr': 0.0004324883871169138, 'samples': 6911488, 'steps': 13498, 'loss/train': 2.02074933052063} +02/24/2022 19:02:39 - INFO - codeparrot_training - Step 13499: {'lr': 0.00043247720303652353, 'samples': 6912000, 'steps': 13499, 'loss/train': 0.494108110666275} +02/24/2022 19:02:42 - INFO - codeparrot_training - Step 13500: {'lr': 0.0004324660181744589, 'samples': 6912512, 'steps': 13500, 'loss/train': 2.467935085296631} +02/24/2022 19:02:48 - INFO - codeparrot_training - Step 13501: {'lr': 0.00043245483253076777, 'samples': 6913024, 'steps': 13501, 'loss/train': 1.9143275022506714} +02/24/2022 19:02:51 - INFO - codeparrot_training - Step 13502: {'lr': 0.0004324436461054981, 'samples': 6913536, 'steps': 13502, 'loss/train': 1.2303646802902222} +02/24/2022 19:02:57 - INFO - codeparrot_training - Step 13503: {'lr': 0.00043243245889869775, 'samples': 6914048, 'steps': 13503, 'loss/train': 3.229280948638916} +02/24/2022 19:03:00 - INFO - codeparrot_training - Step 13504: {'lr': 0.0004324212709104147, 'samples': 6914560, 'steps': 13504, 'loss/train': 1.7756668329238892} +02/24/2022 19:03:06 - INFO - codeparrot_training - Step 13505: {'lr': 0.0004324100821406969, 'samples': 6915072, 'steps': 13505, 'loss/train': 0.8699193596839905} +02/24/2022 19:03:09 - INFO - codeparrot_training - Step 13506: {'lr': 0.00043239889258959215, 'samples': 6915584, 'steps': 13506, 'loss/train': 1.8626275062561035} +02/24/2022 19:03:16 - INFO - codeparrot_training - Step 13507: {'lr': 0.00043238770225714854, 'samples': 6916096, 'steps': 13507, 'loss/train': 0.3798440992832184} +02/24/2022 19:03:20 - INFO - codeparrot_training - Step 13508: {'lr': 0.00043237651114341383, 'samples': 6916608, 'steps': 13508, 'loss/train': 2.3320095539093018} +02/24/2022 19:03:25 - INFO - codeparrot_training - Step 13509: {'lr': 0.0004323653192484361, 'samples': 6917120, 'steps': 13509, 'loss/train': 2.872229814529419} +02/24/2022 19:03:29 - INFO - codeparrot_training - Step 13510: {'lr': 0.0004323541265722633, 'samples': 6917632, 'steps': 13510, 'loss/train': 1.3713935613632202} +02/24/2022 19:03:34 - INFO - codeparrot_training - Step 13511: {'lr': 0.0004323429331149432, 'samples': 6918144, 'steps': 13511, 'loss/train': 1.5069878101348877} +02/24/2022 19:03:38 - INFO - codeparrot_training - Step 13512: {'lr': 0.000432331738876524, 'samples': 6918656, 'steps': 13512, 'loss/train': 3.0355465412139893} +02/24/2022 19:03:43 - INFO - codeparrot_training - Step 13513: {'lr': 0.00043232054385705345, 'samples': 6919168, 'steps': 13513, 'loss/train': 1.9760501384735107} +02/24/2022 19:03:47 - INFO - codeparrot_training - Step 13514: {'lr': 0.0004323093480565796, 'samples': 6919680, 'steps': 13514, 'loss/train': 1.7026857137680054} +02/24/2022 19:03:52 - INFO - codeparrot_training - Step 13515: {'lr': 0.0004322981514751504, 'samples': 6920192, 'steps': 13515, 'loss/train': 1.505321741104126} +02/24/2022 19:03:56 - INFO - codeparrot_training - Step 13516: {'lr': 0.0004322869541128138, 'samples': 6920704, 'steps': 13516, 'loss/train': 1.5018874406814575} +02/24/2022 19:04:02 - INFO - codeparrot_training - Step 13517: {'lr': 0.00043227575596961783, 'samples': 6921216, 'steps': 13517, 'loss/train': 2.7122464179992676} +02/24/2022 19:04:06 - INFO - codeparrot_training - Step 13518: {'lr': 0.00043226455704561034, 'samples': 6921728, 'steps': 13518, 'loss/train': 2.9626035690307617} +02/24/2022 19:04:11 - INFO - codeparrot_training - Step 13519: {'lr': 0.0004322533573408394, 'samples': 6922240, 'steps': 13519, 'loss/train': 1.6918985843658447} +02/24/2022 19:04:15 - INFO - codeparrot_training - Step 13520: {'lr': 0.00043224215685535287, 'samples': 6922752, 'steps': 13520, 'loss/train': 2.0553183555603027} +02/24/2022 19:04:20 - INFO - codeparrot_training - Step 13521: {'lr': 0.0004322309555891989, 'samples': 6923264, 'steps': 13521, 'loss/train': 2.2857887744903564} +02/24/2022 19:04:24 - INFO - codeparrot_training - Step 13522: {'lr': 0.00043221975354242536, 'samples': 6923776, 'steps': 13522, 'loss/train': 2.6552069187164307} +02/24/2022 19:04:29 - INFO - codeparrot_training - Step 13523: {'lr': 0.0004322085507150802, 'samples': 6924288, 'steps': 13523, 'loss/train': 0.5546449422836304} +02/24/2022 19:04:33 - INFO - codeparrot_training - Step 13524: {'lr': 0.00043219734710721146, 'samples': 6924800, 'steps': 13524, 'loss/train': 1.7232391834259033} +02/24/2022 19:04:38 - INFO - codeparrot_training - Step 13525: {'lr': 0.00043218614271886725, 'samples': 6925312, 'steps': 13525, 'loss/train': 1.8191306591033936} +02/24/2022 19:04:42 - INFO - codeparrot_training - Step 13526: {'lr': 0.0004321749375500954, 'samples': 6925824, 'steps': 13526, 'loss/train': 2.7485036849975586} +02/24/2022 19:04:48 - INFO - codeparrot_training - Step 13527: {'lr': 0.0004321637316009439, 'samples': 6926336, 'steps': 13527, 'loss/train': 2.03973650932312} +02/24/2022 19:04:51 - INFO - codeparrot_training - Step 13528: {'lr': 0.00043215252487146096, 'samples': 6926848, 'steps': 13528, 'loss/train': 1.5198118686676025} +02/24/2022 19:04:57 - INFO - codeparrot_training - Step 13529: {'lr': 0.0004321413173616943, 'samples': 6927360, 'steps': 13529, 'loss/train': 1.5036728382110596} +02/24/2022 19:05:00 - INFO - codeparrot_training - Step 13530: {'lr': 0.00043213010907169213, 'samples': 6927872, 'steps': 13530, 'loss/train': 2.8774921894073486} +02/24/2022 19:05:06 - INFO - codeparrot_training - Step 13531: {'lr': 0.00043211890000150247, 'samples': 6928384, 'steps': 13531, 'loss/train': 1.2315572500228882} +02/24/2022 19:05:09 - INFO - codeparrot_training - Step 13532: {'lr': 0.0004321076901511731, 'samples': 6928896, 'steps': 13532, 'loss/train': 1.9138476848602295} +02/24/2022 19:05:15 - INFO - codeparrot_training - Step 13533: {'lr': 0.00043209647952075235, 'samples': 6929408, 'steps': 13533, 'loss/train': 1.8021141290664673} +02/24/2022 19:05:18 - INFO - codeparrot_training - Step 13534: {'lr': 0.00043208526811028806, 'samples': 6929920, 'steps': 13534, 'loss/train': 1.293871283531189} +02/24/2022 19:05:24 - INFO - codeparrot_training - Step 13535: {'lr': 0.00043207405591982835, 'samples': 6930432, 'steps': 13535, 'loss/train': 1.8187755346298218} +02/24/2022 19:05:27 - INFO - codeparrot_training - Step 13536: {'lr': 0.0004320628429494212, 'samples': 6930944, 'steps': 13536, 'loss/train': 2.0494000911712646} +02/24/2022 19:05:33 - INFO - codeparrot_training - Step 13537: {'lr': 0.00043205162919911455, 'samples': 6931456, 'steps': 13537, 'loss/train': 2.9351913928985596} +02/24/2022 19:05:36 - INFO - codeparrot_training - Step 13538: {'lr': 0.0004320404146689566, 'samples': 6931968, 'steps': 13538, 'loss/train': 1.784485936164856} +02/24/2022 19:05:42 - INFO - codeparrot_training - Step 13539: {'lr': 0.0004320291993589953, 'samples': 6932480, 'steps': 13539, 'loss/train': 1.4332714080810547} +02/24/2022 19:05:45 - INFO - codeparrot_training - Step 13540: {'lr': 0.0004320179832692787, 'samples': 6932992, 'steps': 13540, 'loss/train': 2.744737148284912} +02/24/2022 19:05:51 - INFO - codeparrot_training - Step 13541: {'lr': 0.0004320067663998549, 'samples': 6933504, 'steps': 13541, 'loss/train': 2.7020082473754883} +02/24/2022 19:05:57 - INFO - codeparrot_training - Step 13542: {'lr': 0.00043199554875077183, 'samples': 6934016, 'steps': 13542, 'loss/train': 2.465879201889038} +02/24/2022 19:06:01 - INFO - codeparrot_training - Step 13543: {'lr': 0.00043198433032207774, 'samples': 6934528, 'steps': 13543, 'loss/train': 1.4109089374542236} +02/24/2022 19:06:06 - INFO - codeparrot_training - Step 13544: {'lr': 0.00043197311111382045, 'samples': 6935040, 'steps': 13544, 'loss/train': 1.873335599899292} +02/24/2022 19:06:10 - INFO - codeparrot_training - Step 13545: {'lr': 0.0004319618911260482, 'samples': 6935552, 'steps': 13545, 'loss/train': 2.226513147354126} +02/24/2022 19:06:13 - INFO - codeparrot_training - Step 13546: {'lr': 0.0004319506703588089, 'samples': 6936064, 'steps': 13546, 'loss/train': 1.7113468647003174} +02/24/2022 19:06:19 - INFO - codeparrot_training - Step 13547: {'lr': 0.00043193944881215075, 'samples': 6936576, 'steps': 13547, 'loss/train': 2.309114456176758} +02/24/2022 19:06:22 - INFO - codeparrot_training - Step 13548: {'lr': 0.00043192822648612184, 'samples': 6937088, 'steps': 13548, 'loss/train': 0.13835661113262177} +02/24/2022 19:06:28 - INFO - codeparrot_training - Step 13549: {'lr': 0.0004319170033807701, 'samples': 6937600, 'steps': 13549, 'loss/train': 1.448536992073059} +02/24/2022 19:06:34 - INFO - codeparrot_training - Step 13550: {'lr': 0.00043190577949614375, 'samples': 6938112, 'steps': 13550, 'loss/train': 1.6425881385803223} +02/24/2022 19:06:38 - INFO - codeparrot_training - Step 13551: {'lr': 0.00043189455483229073, 'samples': 6938624, 'steps': 13551, 'loss/train': 2.04683518409729} +02/24/2022 19:06:41 - INFO - codeparrot_training - Step 13552: {'lr': 0.00043188332938925923, 'samples': 6939136, 'steps': 13552, 'loss/train': 2.404266357421875} +02/24/2022 19:06:45 - INFO - codeparrot_training - Step 13553: {'lr': 0.0004318721031670973, 'samples': 6939648, 'steps': 13553, 'loss/train': 2.4058384895324707} +02/24/2022 19:06:51 - INFO - codeparrot_training - Step 13554: {'lr': 0.00043186087616585303, 'samples': 6940160, 'steps': 13554, 'loss/train': 2.257333755493164} +02/24/2022 19:06:54 - INFO - codeparrot_training - Step 13555: {'lr': 0.0004318496483855745, 'samples': 6940672, 'steps': 13555, 'loss/train': 2.4622716903686523} +02/24/2022 19:07:00 - INFO - codeparrot_training - Step 13556: {'lr': 0.0004318384198263099, 'samples': 6941184, 'steps': 13556, 'loss/train': 1.5970168113708496} +02/24/2022 19:07:03 - INFO - codeparrot_training - Step 13557: {'lr': 0.00043182719048810714, 'samples': 6941696, 'steps': 13557, 'loss/train': 2.133849620819092} +02/24/2022 19:07:09 - INFO - codeparrot_training - Step 13558: {'lr': 0.00043181596037101443, 'samples': 6942208, 'steps': 13558, 'loss/train': 2.686640501022339} +02/24/2022 19:07:12 - INFO - codeparrot_training - Step 13559: {'lr': 0.00043180472947508, 'samples': 6942720, 'steps': 13559, 'loss/train': 1.2532762289047241} +02/24/2022 19:07:18 - INFO - codeparrot_training - Step 13560: {'lr': 0.0004317934978003517, 'samples': 6943232, 'steps': 13560, 'loss/train': 3.6173346042633057} +02/24/2022 19:07:21 - INFO - codeparrot_training - Step 13561: {'lr': 0.0004317822653468778, 'samples': 6943744, 'steps': 13561, 'loss/train': 2.8468985557556152} +02/24/2022 19:07:27 - INFO - codeparrot_training - Step 13562: {'lr': 0.00043177103211470647, 'samples': 6944256, 'steps': 13562, 'loss/train': 1.8747682571411133} +02/24/2022 19:07:30 - INFO - codeparrot_training - Step 13563: {'lr': 0.00043175979810388575, 'samples': 6944768, 'steps': 13563, 'loss/train': 2.063985824584961} +02/24/2022 19:07:37 - INFO - codeparrot_training - Step 13564: {'lr': 0.0004317485633144638, 'samples': 6945280, 'steps': 13564, 'loss/train': 3.4184486865997314} +02/24/2022 19:07:41 - INFO - codeparrot_training - Step 13565: {'lr': 0.0004317373277464886, 'samples': 6945792, 'steps': 13565, 'loss/train': 1.3864854574203491} +02/24/2022 19:07:46 - INFO - codeparrot_training - Step 13566: {'lr': 0.0004317260914000085, 'samples': 6946304, 'steps': 13566, 'loss/train': 1.22243332862854} +02/24/2022 19:07:50 - INFO - codeparrot_training - Step 13567: {'lr': 0.00043171485427507145, 'samples': 6946816, 'steps': 13567, 'loss/train': 1.9268128871917725} +02/24/2022 19:07:55 - INFO - codeparrot_training - Step 13568: {'lr': 0.0004317036163717257, 'samples': 6947328, 'steps': 13568, 'loss/train': 4.796426773071289} +02/24/2022 19:07:59 - INFO - codeparrot_training - Step 13569: {'lr': 0.00043169237769001936, 'samples': 6947840, 'steps': 13569, 'loss/train': 2.196671724319458} +02/24/2022 19:08:04 - INFO - codeparrot_training - Step 13570: {'lr': 0.0004316811382300006, 'samples': 6948352, 'steps': 13570, 'loss/train': 2.194404125213623} +02/24/2022 19:08:08 - INFO - codeparrot_training - Step 13571: {'lr': 0.0004316698979917175, 'samples': 6948864, 'steps': 13571, 'loss/train': 1.0558006763458252} +02/24/2022 19:08:13 - INFO - codeparrot_training - Step 13572: {'lr': 0.0004316586569752182, 'samples': 6949376, 'steps': 13572, 'loss/train': 2.7078187465667725} +02/24/2022 19:08:17 - INFO - codeparrot_training - Step 13573: {'lr': 0.00043164741518055097, 'samples': 6949888, 'steps': 13573, 'loss/train': 2.9460179805755615} +02/24/2022 19:08:23 - INFO - codeparrot_training - Step 13574: {'lr': 0.0004316361726077639, 'samples': 6950400, 'steps': 13574, 'loss/train': 1.184583067893982} +02/24/2022 19:08:26 - INFO - codeparrot_training - Step 13575: {'lr': 0.0004316249292569051, 'samples': 6950912, 'steps': 13575, 'loss/train': 1.6814242601394653} +02/24/2022 19:08:32 - INFO - codeparrot_training - Step 13576: {'lr': 0.0004316136851280228, 'samples': 6951424, 'steps': 13576, 'loss/train': 0.5795541405677795} +02/24/2022 19:08:36 - INFO - codeparrot_training - Step 13577: {'lr': 0.00043160244022116514, 'samples': 6951936, 'steps': 13577, 'loss/train': 1.4604476690292358} +02/24/2022 19:08:41 - INFO - codeparrot_training - Step 13578: {'lr': 0.0004315911945363802, 'samples': 6952448, 'steps': 13578, 'loss/train': 1.8934495449066162} +02/24/2022 19:08:45 - INFO - codeparrot_training - Step 13579: {'lr': 0.00043157994807371634, 'samples': 6952960, 'steps': 13579, 'loss/train': 1.6934781074523926} +02/24/2022 19:08:50 - INFO - codeparrot_training - Step 13580: {'lr': 0.00043156870083322166, 'samples': 6953472, 'steps': 13580, 'loss/train': 1.825491189956665} +02/24/2022 19:08:54 - INFO - codeparrot_training - Step 13581: {'lr': 0.0004315574528149443, 'samples': 6953984, 'steps': 13581, 'loss/train': 1.064252495765686} +02/24/2022 19:08:59 - INFO - codeparrot_training - Step 13582: {'lr': 0.00043154620401893244, 'samples': 6954496, 'steps': 13582, 'loss/train': 2.3033533096313477} +02/24/2022 19:09:03 - INFO - codeparrot_training - Step 13583: {'lr': 0.0004315349544452343, 'samples': 6955008, 'steps': 13583, 'loss/train': 1.6521732807159424} +02/24/2022 19:09:08 - INFO - codeparrot_training - Step 13584: {'lr': 0.00043152370409389794, 'samples': 6955520, 'steps': 13584, 'loss/train': 3.037415027618408} +02/24/2022 19:09:12 - INFO - codeparrot_training - Step 13585: {'lr': 0.00043151245296497184, 'samples': 6956032, 'steps': 13585, 'loss/train': 1.4827784299850464} +02/24/2022 19:09:17 - INFO - codeparrot_training - Step 13586: {'lr': 0.000431501201058504, 'samples': 6956544, 'steps': 13586, 'loss/train': 2.637298583984375} +02/24/2022 19:09:21 - INFO - codeparrot_training - Step 13587: {'lr': 0.0004314899483745426, 'samples': 6957056, 'steps': 13587, 'loss/train': 2.552401065826416} +02/24/2022 19:09:26 - INFO - codeparrot_training - Step 13588: {'lr': 0.0004314786949131359, 'samples': 6957568, 'steps': 13588, 'loss/train': 2.3268959522247314} +02/24/2022 19:09:30 - INFO - codeparrot_training - Step 13589: {'lr': 0.0004314674406743321, 'samples': 6958080, 'steps': 13589, 'loss/train': 2.118502378463745} +02/24/2022 19:09:36 - INFO - codeparrot_training - Step 13590: {'lr': 0.00043145618565817946, 'samples': 6958592, 'steps': 13590, 'loss/train': 2.1962571144104004} +02/24/2022 19:09:41 - INFO - codeparrot_training - Step 13591: {'lr': 0.00043144492986472603, 'samples': 6959104, 'steps': 13591, 'loss/train': 1.1924455165863037} +02/24/2022 19:09:45 - INFO - codeparrot_training - Step 13592: {'lr': 0.0004314336732940202, 'samples': 6959616, 'steps': 13592, 'loss/train': 2.9314956665039062} +02/24/2022 19:09:50 - INFO - codeparrot_training - Step 13593: {'lr': 0.0004314224159461102, 'samples': 6960128, 'steps': 13593, 'loss/train': 2.478891134262085} +02/24/2022 19:09:54 - INFO - codeparrot_training - Step 13594: {'lr': 0.0004314111578210441, 'samples': 6960640, 'steps': 13594, 'loss/train': 1.8915724754333496} +02/24/2022 19:09:59 - INFO - codeparrot_training - Step 13595: {'lr': 0.0004313998989188702, 'samples': 6961152, 'steps': 13595, 'loss/train': 1.4155244827270508} +02/24/2022 19:10:03 - INFO - codeparrot_training - Step 13596: {'lr': 0.00043138863923963664, 'samples': 6961664, 'steps': 13596, 'loss/train': 2.005995750427246} +02/24/2022 19:10:08 - INFO - codeparrot_training - Step 13597: {'lr': 0.0004313773787833919, 'samples': 6962176, 'steps': 13597, 'loss/train': 1.8183064460754395} +02/24/2022 19:10:12 - INFO - codeparrot_training - Step 13598: {'lr': 0.0004313661175501841, 'samples': 6962688, 'steps': 13598, 'loss/train': 2.2222249507904053} +02/24/2022 19:10:18 - INFO - codeparrot_training - Step 13599: {'lr': 0.00043135485554006127, 'samples': 6963200, 'steps': 13599, 'loss/train': 1.8670982122421265} +02/24/2022 19:10:21 - INFO - codeparrot_training - Step 13600: {'lr': 0.0004313435927530719, 'samples': 6963712, 'steps': 13600, 'loss/train': 2.011847496032715} +02/24/2022 19:10:27 - INFO - codeparrot_training - Step 13601: {'lr': 0.00043133232918926426, 'samples': 6964224, 'steps': 13601, 'loss/train': 2.1976442337036133} +02/24/2022 19:10:30 - INFO - codeparrot_training - Step 13602: {'lr': 0.0004313210648486864, 'samples': 6964736, 'steps': 13602, 'loss/train': 1.497009515762329} +02/24/2022 19:10:36 - INFO - codeparrot_training - Step 13603: {'lr': 0.00043130979973138664, 'samples': 6965248, 'steps': 13603, 'loss/train': 1.391070008277893} +02/24/2022 19:10:39 - INFO - codeparrot_training - Step 13604: {'lr': 0.00043129853383741334, 'samples': 6965760, 'steps': 13604, 'loss/train': 3.091587781906128} +02/24/2022 19:10:45 - INFO - codeparrot_training - Step 13605: {'lr': 0.00043128726716681464, 'samples': 6966272, 'steps': 13605, 'loss/train': 1.5055516958236694} +02/24/2022 19:10:48 - INFO - codeparrot_training - Step 13606: {'lr': 0.0004312759997196389, 'samples': 6966784, 'steps': 13606, 'loss/train': 1.4908604621887207} +02/24/2022 19:10:54 - INFO - codeparrot_training - Step 13607: {'lr': 0.00043126473149593424, 'samples': 6967296, 'steps': 13607, 'loss/train': 1.7723292112350464} +02/24/2022 19:10:57 - INFO - codeparrot_training - Step 13608: {'lr': 0.00043125346249574915, 'samples': 6967808, 'steps': 13608, 'loss/train': 2.4015259742736816} +02/24/2022 19:11:03 - INFO - codeparrot_training - Step 13609: {'lr': 0.0004312421927191318, 'samples': 6968320, 'steps': 13609, 'loss/train': 2.184799909591675} +02/24/2022 19:11:07 - INFO - codeparrot_training - Step 13610: {'lr': 0.00043123092216613035, 'samples': 6968832, 'steps': 13610, 'loss/train': 2.1721858978271484} +02/24/2022 19:11:12 - INFO - codeparrot_training - Step 13611: {'lr': 0.0004312196508367932, 'samples': 6969344, 'steps': 13611, 'loss/train': 2.9096438884735107} +02/24/2022 19:11:16 - INFO - codeparrot_training - Step 13612: {'lr': 0.0004312083787311686, 'samples': 6969856, 'steps': 13612, 'loss/train': 2.17563533782959} +02/24/2022 19:11:21 - INFO - codeparrot_training - Step 13613: {'lr': 0.0004311971058493049, 'samples': 6970368, 'steps': 13613, 'loss/train': 1.903287649154663} +02/24/2022 19:11:25 - INFO - codeparrot_training - Step 13614: {'lr': 0.0004311858321912503, 'samples': 6970880, 'steps': 13614, 'loss/train': 1.232810616493225} +02/24/2022 19:11:31 - INFO - codeparrot_training - Step 13615: {'lr': 0.0004311745577570531, 'samples': 6971392, 'steps': 13615, 'loss/train': 0.4791630208492279} +02/24/2022 19:11:34 - INFO - codeparrot_training - Step 13616: {'lr': 0.0004311632825467617, 'samples': 6971904, 'steps': 13616, 'loss/train': 2.1557745933532715} +02/24/2022 19:11:39 - INFO - codeparrot_training - Step 13617: {'lr': 0.00043115200656042426, 'samples': 6972416, 'steps': 13617, 'loss/train': 1.3522979021072388} +02/24/2022 19:11:43 - INFO - codeparrot_training - Step 13618: {'lr': 0.00043114072979808914, 'samples': 6972928, 'steps': 13618, 'loss/train': 1.9285956621170044} +02/24/2022 19:11:48 - INFO - codeparrot_training - Step 13619: {'lr': 0.00043112945225980473, 'samples': 6973440, 'steps': 13619, 'loss/train': 1.6338764429092407} +02/24/2022 19:11:52 - INFO - codeparrot_training - Step 13620: {'lr': 0.00043111817394561917, 'samples': 6973952, 'steps': 13620, 'loss/train': 2.6576662063598633} +02/24/2022 19:11:58 - INFO - codeparrot_training - Step 13621: {'lr': 0.0004311068948555809, 'samples': 6974464, 'steps': 13621, 'loss/train': 1.338390827178955} +02/24/2022 19:12:01 - INFO - codeparrot_training - Step 13622: {'lr': 0.0004310956149897382, 'samples': 6974976, 'steps': 13622, 'loss/train': 1.3402029275894165} +02/24/2022 19:12:06 - INFO - codeparrot_training - Step 13623: {'lr': 0.00043108433434813943, 'samples': 6975488, 'steps': 13623, 'loss/train': 1.94597327709198} +02/24/2022 19:12:10 - INFO - codeparrot_training - Step 13624: {'lr': 0.00043107305293083276, 'samples': 6976000, 'steps': 13624, 'loss/train': 2.299471855163574} +02/24/2022 19:12:16 - INFO - codeparrot_training - Step 13625: {'lr': 0.0004310617707378668, 'samples': 6976512, 'steps': 13625, 'loss/train': 2.528211832046509} +02/24/2022 19:12:20 - INFO - codeparrot_training - Step 13626: {'lr': 0.0004310504877692896, 'samples': 6977024, 'steps': 13626, 'loss/train': 3.587096929550171} +02/24/2022 19:12:25 - INFO - codeparrot_training - Step 13627: {'lr': 0.00043103920402514956, 'samples': 6977536, 'steps': 13627, 'loss/train': 1.9988280534744263} +02/24/2022 19:12:29 - INFO - codeparrot_training - Step 13628: {'lr': 0.00043102791950549513, 'samples': 6978048, 'steps': 13628, 'loss/train': 2.581573963165283} +02/24/2022 19:12:34 - INFO - codeparrot_training - Step 13629: {'lr': 0.00043101663421037453, 'samples': 6978560, 'steps': 13629, 'loss/train': 2.308349132537842} +02/24/2022 19:12:38 - INFO - codeparrot_training - Step 13630: {'lr': 0.00043100534813983617, 'samples': 6979072, 'steps': 13630, 'loss/train': 1.44800865650177} +02/24/2022 19:12:43 - INFO - codeparrot_training - Step 13631: {'lr': 0.00043099406129392835, 'samples': 6979584, 'steps': 13631, 'loss/train': 2.951619863510132} +02/24/2022 19:12:47 - INFO - codeparrot_training - Step 13632: {'lr': 0.00043098277367269953, 'samples': 6980096, 'steps': 13632, 'loss/train': 2.598477363586426} +02/24/2022 19:12:52 - INFO - codeparrot_training - Step 13633: {'lr': 0.0004309714852761979, 'samples': 6980608, 'steps': 13633, 'loss/train': 1.5477696657180786} +02/24/2022 19:12:56 - INFO - codeparrot_training - Step 13634: {'lr': 0.0004309601961044719, 'samples': 6981120, 'steps': 13634, 'loss/train': 2.181486129760742} +02/24/2022 19:13:02 - INFO - codeparrot_training - Step 13635: {'lr': 0.0004309489061575699, 'samples': 6981632, 'steps': 13635, 'loss/train': 1.551332712173462} +02/24/2022 19:13:05 - INFO - codeparrot_training - Step 13636: {'lr': 0.0004309376154355402, 'samples': 6982144, 'steps': 13636, 'loss/train': 2.204392910003662} +02/24/2022 19:13:11 - INFO - codeparrot_training - Step 13637: {'lr': 0.00043092632393843124, 'samples': 6982656, 'steps': 13637, 'loss/train': 1.9445143938064575} +02/24/2022 19:13:14 - INFO - codeparrot_training - Step 13638: {'lr': 0.00043091503166629136, 'samples': 6983168, 'steps': 13638, 'loss/train': 2.391655921936035} +02/24/2022 19:13:20 - INFO - codeparrot_training - Step 13639: {'lr': 0.000430903738619169, 'samples': 6983680, 'steps': 13639, 'loss/train': 1.3919063806533813} +02/24/2022 19:13:23 - INFO - codeparrot_training - Step 13640: {'lr': 0.00043089244479711233, 'samples': 6984192, 'steps': 13640, 'loss/train': 2.4541327953338623} +02/24/2022 19:13:29 - INFO - codeparrot_training - Step 13641: {'lr': 0.00043088115020016994, 'samples': 6984704, 'steps': 13641, 'loss/train': 1.7544317245483398} +02/24/2022 19:13:34 - INFO - codeparrot_training - Step 13642: {'lr': 0.00043086985482839016, 'samples': 6985216, 'steps': 13642, 'loss/train': 1.6198045015335083} +02/24/2022 19:13:38 - INFO - codeparrot_training - Step 13643: {'lr': 0.00043085855868182135, 'samples': 6985728, 'steps': 13643, 'loss/train': 2.3710861206054688} +02/24/2022 19:13:45 - INFO - codeparrot_training - Step 13644: {'lr': 0.0004308472617605118, 'samples': 6986240, 'steps': 13644, 'loss/train': 1.6309189796447754} +02/24/2022 19:13:48 - INFO - codeparrot_training - Step 13645: {'lr': 0.00043083596406451015, 'samples': 6986752, 'steps': 13645, 'loss/train': 0.1144043579697609} +02/24/2022 19:13:54 - INFO - codeparrot_training - Step 13646: {'lr': 0.0004308246655938646, 'samples': 6987264, 'steps': 13646, 'loss/train': 1.7623697519302368} +02/24/2022 19:13:57 - INFO - codeparrot_training - Step 13647: {'lr': 0.0004308133663486236, 'samples': 6987776, 'steps': 13647, 'loss/train': 2.623563289642334} +02/24/2022 19:14:03 - INFO - codeparrot_training - Step 13648: {'lr': 0.00043080206632883553, 'samples': 6988288, 'steps': 13648, 'loss/train': 1.596634864807129} +02/24/2022 19:14:06 - INFO - codeparrot_training - Step 13649: {'lr': 0.0004307907655345488, 'samples': 6988800, 'steps': 13649, 'loss/train': 2.036360502243042} +02/24/2022 19:14:12 - INFO - codeparrot_training - Step 13650: {'lr': 0.0004307794639658119, 'samples': 6989312, 'steps': 13650, 'loss/train': 1.4130501747131348} +02/24/2022 19:14:15 - INFO - codeparrot_training - Step 13651: {'lr': 0.0004307681616226732, 'samples': 6989824, 'steps': 13651, 'loss/train': 1.5663180351257324} +02/24/2022 19:14:21 - INFO - codeparrot_training - Step 13652: {'lr': 0.000430756858505181, 'samples': 6990336, 'steps': 13652, 'loss/train': 2.8578941822052} +02/24/2022 19:14:24 - INFO - codeparrot_training - Step 13653: {'lr': 0.0004307455546133838, 'samples': 6990848, 'steps': 13653, 'loss/train': 0.7819316387176514} +02/24/2022 19:14:30 - INFO - codeparrot_training - Step 13654: {'lr': 0.00043073424994733014, 'samples': 6991360, 'steps': 13654, 'loss/train': 1.5253733396530151} +02/24/2022 19:14:33 - INFO - codeparrot_training - Step 13655: {'lr': 0.0004307229445070683, 'samples': 6991872, 'steps': 13655, 'loss/train': 2.6275267601013184} +02/24/2022 19:14:40 - INFO - codeparrot_training - Step 13656: {'lr': 0.0004307116382926468, 'samples': 6992384, 'steps': 13656, 'loss/train': 2.8324756622314453} +02/24/2022 19:14:44 - INFO - codeparrot_training - Step 13657: {'lr': 0.0004307003313041139, 'samples': 6992896, 'steps': 13657, 'loss/train': 1.418109655380249} +02/24/2022 19:14:49 - INFO - codeparrot_training - Step 13658: {'lr': 0.0004306890235415183, 'samples': 6993408, 'steps': 13658, 'loss/train': 2.501063346862793} +02/24/2022 19:14:53 - INFO - codeparrot_training - Step 13659: {'lr': 0.0004306777150049082, 'samples': 6993920, 'steps': 13659, 'loss/train': 1.6798590421676636} +02/24/2022 19:14:58 - INFO - codeparrot_training - Step 13660: {'lr': 0.0004306664056943322, 'samples': 6994432, 'steps': 13660, 'loss/train': 8.496380805969238} +02/24/2022 19:15:02 - INFO - codeparrot_training - Step 13661: {'lr': 0.0004306550956098386, 'samples': 6994944, 'steps': 13661, 'loss/train': 1.7745139598846436} +02/24/2022 19:15:07 - INFO - codeparrot_training - Step 13662: {'lr': 0.000430643784751476, 'samples': 6995456, 'steps': 13662, 'loss/train': 2.3376801013946533} +02/24/2022 19:15:11 - INFO - codeparrot_training - Step 13663: {'lr': 0.0004306324731192929, 'samples': 6995968, 'steps': 13663, 'loss/train': 1.2211185693740845} +02/24/2022 19:15:16 - INFO - codeparrot_training - Step 13664: {'lr': 0.00043062116071333745, 'samples': 6996480, 'steps': 13664, 'loss/train': 3.09124755859375} +02/24/2022 19:15:20 - INFO - codeparrot_training - Step 13665: {'lr': 0.0004306098475336584, 'samples': 6996992, 'steps': 13665, 'loss/train': 2.5851945877075195} +02/24/2022 19:15:26 - INFO - codeparrot_training - Step 13666: {'lr': 0.0004305985335803041, 'samples': 6997504, 'steps': 13666, 'loss/train': 1.8962674140930176} +02/24/2022 19:15:29 - INFO - codeparrot_training - Step 13667: {'lr': 0.000430587218853323, 'samples': 6998016, 'steps': 13667, 'loss/train': 2.4098546504974365} +02/24/2022 19:15:35 - INFO - codeparrot_training - Step 13668: {'lr': 0.0004305759033527636, 'samples': 6998528, 'steps': 13668, 'loss/train': 1.8245320320129395} +02/24/2022 19:15:38 - INFO - codeparrot_training - Step 13669: {'lr': 0.0004305645870786744, 'samples': 6999040, 'steps': 13669, 'loss/train': 2.325986862182617} +02/24/2022 19:15:44 - INFO - codeparrot_training - Step 13670: {'lr': 0.00043055327003110384, 'samples': 6999552, 'steps': 13670, 'loss/train': 1.0751984119415283} +02/24/2022 19:15:47 - INFO - codeparrot_training - Step 13671: {'lr': 0.00043054195221010037, 'samples': 7000064, 'steps': 13671, 'loss/train': 2.629345655441284} +02/24/2022 19:15:53 - INFO - codeparrot_training - Step 13672: {'lr': 0.00043053063361571256, 'samples': 7000576, 'steps': 13672, 'loss/train': 2.271709680557251} +02/24/2022 19:15:56 - INFO - codeparrot_training - Step 13673: {'lr': 0.0004305193142479888, 'samples': 7001088, 'steps': 13673, 'loss/train': 1.5242174863815308} +02/24/2022 19:16:02 - INFO - codeparrot_training - Step 13674: {'lr': 0.0004305079941069776, 'samples': 7001600, 'steps': 13674, 'loss/train': 2.424975633621216} +02/24/2022 19:16:05 - INFO - codeparrot_training - Step 13675: {'lr': 0.0004304966731927276, 'samples': 7002112, 'steps': 13675, 'loss/train': 2.5726494789123535} +02/24/2022 19:16:11 - INFO - codeparrot_training - Step 13676: {'lr': 0.000430485351505287, 'samples': 7002624, 'steps': 13676, 'loss/train': 1.4505350589752197} +02/24/2022 19:16:14 - INFO - codeparrot_training - Step 13677: {'lr': 0.00043047402904470455, 'samples': 7003136, 'steps': 13677, 'loss/train': 1.4280500411987305} +02/24/2022 19:16:19 - INFO - codeparrot_training - Step 13678: {'lr': 0.00043046270581102865, 'samples': 7003648, 'steps': 13678, 'loss/train': 1.0448979139328003} +02/24/2022 19:16:23 - INFO - codeparrot_training - Step 13679: {'lr': 0.00043045138180430783, 'samples': 7004160, 'steps': 13679, 'loss/train': 0.19420510530471802} +02/24/2022 19:16:28 - INFO - codeparrot_training - Step 13680: {'lr': 0.00043044005702459054, 'samples': 7004672, 'steps': 13680, 'loss/train': 2.1825525760650635} +02/24/2022 19:16:32 - INFO - codeparrot_training - Step 13681: {'lr': 0.0004304287314719254, 'samples': 7005184, 'steps': 13681, 'loss/train': 1.7918729782104492} +02/24/2022 19:16:38 - INFO - codeparrot_training - Step 13682: {'lr': 0.00043041740514636085, 'samples': 7005696, 'steps': 13682, 'loss/train': 3.2685329914093018} +02/24/2022 19:16:42 - INFO - codeparrot_training - Step 13683: {'lr': 0.0004304060780479454, 'samples': 7006208, 'steps': 13683, 'loss/train': 2.373638153076172} +02/24/2022 19:16:47 - INFO - codeparrot_training - Step 13684: {'lr': 0.0004303947501767276, 'samples': 7006720, 'steps': 13684, 'loss/train': 1.5358431339263916} +02/24/2022 19:16:51 - INFO - codeparrot_training - Step 13685: {'lr': 0.0004303834215327561, 'samples': 7007232, 'steps': 13685, 'loss/train': 1.933910608291626} +02/24/2022 19:16:56 - INFO - codeparrot_training - Step 13686: {'lr': 0.00043037209211607913, 'samples': 7007744, 'steps': 13686, 'loss/train': 1.9050073623657227} +02/24/2022 19:17:00 - INFO - codeparrot_training - Step 13687: {'lr': 0.00043036076192674546, 'samples': 7008256, 'steps': 13687, 'loss/train': 2.118858575820923} +02/24/2022 19:17:06 - INFO - codeparrot_training - Step 13688: {'lr': 0.00043034943096480357, 'samples': 7008768, 'steps': 13688, 'loss/train': 1.9457725286483765} +02/24/2022 19:17:09 - INFO - codeparrot_training - Step 13689: {'lr': 0.000430338099230302, 'samples': 7009280, 'steps': 13689, 'loss/train': 2.4389522075653076} +02/24/2022 19:17:15 - INFO - codeparrot_training - Step 13690: {'lr': 0.00043032676672328916, 'samples': 7009792, 'steps': 13690, 'loss/train': 1.885743260383606} +02/24/2022 19:17:18 - INFO - codeparrot_training - Step 13691: {'lr': 0.00043031543344381384, 'samples': 7010304, 'steps': 13691, 'loss/train': 1.9437928199768066} +02/24/2022 19:17:25 - INFO - codeparrot_training - Step 13692: {'lr': 0.0004303040993919244, 'samples': 7010816, 'steps': 13692, 'loss/train': 2.450385808944702} +02/24/2022 19:17:28 - INFO - codeparrot_training - Step 13693: {'lr': 0.00043029276456766946, 'samples': 7011328, 'steps': 13693, 'loss/train': 1.8689649105072021} +02/24/2022 19:17:34 - INFO - codeparrot_training - Step 13694: {'lr': 0.00043028142897109754, 'samples': 7011840, 'steps': 13694, 'loss/train': 1.6598165035247803} +02/24/2022 19:17:37 - INFO - codeparrot_training - Step 13695: {'lr': 0.0004302700926022573, 'samples': 7012352, 'steps': 13695, 'loss/train': 1.3444586992263794} +02/24/2022 19:17:43 - INFO - codeparrot_training - Step 13696: {'lr': 0.0004302587554611972, 'samples': 7012864, 'steps': 13696, 'loss/train': 2.2317521572113037} +02/24/2022 19:17:46 - INFO - codeparrot_training - Step 13697: {'lr': 0.0004302474175479658, 'samples': 7013376, 'steps': 13697, 'loss/train': 2.0625648498535156} +02/24/2022 19:17:52 - INFO - codeparrot_training - Step 13698: {'lr': 0.0004302360788626117, 'samples': 7013888, 'steps': 13698, 'loss/train': 1.4547176361083984} +02/24/2022 19:17:57 - INFO - codeparrot_training - Step 13699: {'lr': 0.00043022473940518345, 'samples': 7014400, 'steps': 13699, 'loss/train': 1.1368025541305542} +02/24/2022 19:18:01 - INFO - codeparrot_training - Step 13700: {'lr': 0.0004302133991757297, 'samples': 7014912, 'steps': 13700, 'loss/train': 2.170872449874878} +02/24/2022 19:18:07 - INFO - codeparrot_training - Step 13701: {'lr': 0.00043020205817429895, 'samples': 7015424, 'steps': 13701, 'loss/train': 1.2136664390563965} +02/24/2022 19:18:10 - INFO - codeparrot_training - Step 13702: {'lr': 0.0004301907164009398, 'samples': 7015936, 'steps': 13702, 'loss/train': 1.1966272592544556} +02/24/2022 19:18:14 - INFO - codeparrot_training - Step 13703: {'lr': 0.00043017937385570083, 'samples': 7016448, 'steps': 13703, 'loss/train': 2.951408624649048} +02/24/2022 19:18:20 - INFO - codeparrot_training - Step 13704: {'lr': 0.00043016803053863063, 'samples': 7016960, 'steps': 13704, 'loss/train': 1.6956672668457031} +02/24/2022 19:18:23 - INFO - codeparrot_training - Step 13705: {'lr': 0.00043015668644977783, 'samples': 7017472, 'steps': 13705, 'loss/train': 2.078306198120117} +02/24/2022 19:18:29 - INFO - codeparrot_training - Step 13706: {'lr': 0.000430145341589191, 'samples': 7017984, 'steps': 13706, 'loss/train': 1.5965362787246704} +02/24/2022 19:18:32 - INFO - codeparrot_training - Step 13707: {'lr': 0.0004301339959569187, 'samples': 7018496, 'steps': 13707, 'loss/train': 1.8603307008743286} +02/24/2022 19:18:38 - INFO - codeparrot_training - Step 13708: {'lr': 0.00043012264955300954, 'samples': 7019008, 'steps': 13708, 'loss/train': 1.7873365879058838} +02/24/2022 19:18:41 - INFO - codeparrot_training - Step 13709: {'lr': 0.0004301113023775122, 'samples': 7019520, 'steps': 13709, 'loss/train': 1.5965356826782227} +02/24/2022 19:18:47 - INFO - codeparrot_training - Step 13710: {'lr': 0.00043009995443047517, 'samples': 7020032, 'steps': 13710, 'loss/train': 1.4226073026657104} +02/24/2022 19:18:51 - INFO - codeparrot_training - Step 13711: {'lr': 0.0004300886057119472, 'samples': 7020544, 'steps': 13711, 'loss/train': 2.959944725036621} +02/24/2022 19:18:56 - INFO - codeparrot_training - Step 13712: {'lr': 0.00043007725622197675, 'samples': 7021056, 'steps': 13712, 'loss/train': 1.8905389308929443} +02/24/2022 19:19:00 - INFO - codeparrot_training - Step 13713: {'lr': 0.00043006590596061256, 'samples': 7021568, 'steps': 13713, 'loss/train': 1.7996097803115845} +02/24/2022 19:19:05 - INFO - codeparrot_training - Step 13714: {'lr': 0.0004300545549279032, 'samples': 7022080, 'steps': 13714, 'loss/train': 2.627183437347412} +02/24/2022 19:19:09 - INFO - codeparrot_training - Step 13715: {'lr': 0.0004300432031238973, 'samples': 7022592, 'steps': 13715, 'loss/train': 1.592428207397461} +02/24/2022 19:19:14 - INFO - codeparrot_training - Step 13716: {'lr': 0.00043003185054864344, 'samples': 7023104, 'steps': 13716, 'loss/train': 1.9637314081192017} +02/24/2022 19:19:18 - INFO - codeparrot_training - Step 13717: {'lr': 0.0004300204972021903, 'samples': 7023616, 'steps': 13717, 'loss/train': 2.055371046066284} +02/24/2022 19:19:24 - INFO - codeparrot_training - Step 13718: {'lr': 0.00043000914308458663, 'samples': 7024128, 'steps': 13718, 'loss/train': 2.8856101036071777} +02/24/2022 19:19:27 - INFO - codeparrot_training - Step 13719: {'lr': 0.0004299977881958808, 'samples': 7024640, 'steps': 13719, 'loss/train': 2.062641143798828} +02/24/2022 19:19:33 - INFO - codeparrot_training - Step 13720: {'lr': 0.0004299864325361217, 'samples': 7025152, 'steps': 13720, 'loss/train': 2.971893548965454} +02/24/2022 19:19:36 - INFO - codeparrot_training - Step 13721: {'lr': 0.00042997507610535783, 'samples': 7025664, 'steps': 13721, 'loss/train': 2.027390480041504} +02/24/2022 19:19:42 - INFO - codeparrot_training - Step 13722: {'lr': 0.00042996371890363796, 'samples': 7026176, 'steps': 13722, 'loss/train': 0.9466265439987183} +02/24/2022 19:19:46 - INFO - codeparrot_training - Step 13723: {'lr': 0.00042995236093101055, 'samples': 7026688, 'steps': 13723, 'loss/train': 2.238375663757324} +02/24/2022 19:19:51 - INFO - codeparrot_training - Step 13724: {'lr': 0.0004299410021875244, 'samples': 7027200, 'steps': 13724, 'loss/train': 1.4154530763626099} +02/24/2022 19:19:55 - INFO - codeparrot_training - Step 13725: {'lr': 0.00042992964267322823, 'samples': 7027712, 'steps': 13725, 'loss/train': 1.372147560119629} +02/24/2022 19:20:01 - INFO - codeparrot_training - Step 13726: {'lr': 0.00042991828238817046, 'samples': 7028224, 'steps': 13726, 'loss/train': 2.961617946624756} +02/24/2022 19:20:04 - INFO - codeparrot_training - Step 13727: {'lr': 0.0004299069213324, 'samples': 7028736, 'steps': 13727, 'loss/train': 1.9149107933044434} +02/24/2022 19:20:08 - INFO - codeparrot_training - Step 13728: {'lr': 0.0004298955595059654, 'samples': 7029248, 'steps': 13728, 'loss/train': 1.019468903541565} +02/24/2022 19:20:15 - INFO - codeparrot_training - Step 13729: {'lr': 0.00042988419690891534, 'samples': 7029760, 'steps': 13729, 'loss/train': 2.0500237941741943} +02/24/2022 19:20:18 - INFO - codeparrot_training - Step 13730: {'lr': 0.00042987283354129846, 'samples': 7030272, 'steps': 13730, 'loss/train': 1.713131070137024} +02/24/2022 19:20:24 - INFO - codeparrot_training - Step 13731: {'lr': 0.0004298614694031635, 'samples': 7030784, 'steps': 13731, 'loss/train': 3.7322075366973877} +02/24/2022 19:20:29 - INFO - codeparrot_training - Step 13732: {'lr': 0.0004298501044945591, 'samples': 7031296, 'steps': 13732, 'loss/train': 1.6995245218276978} +02/24/2022 19:20:33 - INFO - codeparrot_training - Step 13733: {'lr': 0.000429838738815534, 'samples': 7031808, 'steps': 13733, 'loss/train': 2.088724374771118} +02/24/2022 19:20:38 - INFO - codeparrot_training - Step 13734: {'lr': 0.00042982737236613687, 'samples': 7032320, 'steps': 13734, 'loss/train': 2.341543674468994} +02/24/2022 19:20:42 - INFO - codeparrot_training - Step 13735: {'lr': 0.00042981600514641635, 'samples': 7032832, 'steps': 13735, 'loss/train': 2.0815110206604004} +02/24/2022 19:20:45 - INFO - codeparrot_training - Step 13736: {'lr': 0.00042980463715642115, 'samples': 7033344, 'steps': 13736, 'loss/train': 2.4765756130218506} +02/24/2022 19:20:51 - INFO - codeparrot_training - Step 13737: {'lr': 0.0004297932683962, 'samples': 7033856, 'steps': 13737, 'loss/train': 2.6112093925476074} +02/24/2022 19:20:54 - INFO - codeparrot_training - Step 13738: {'lr': 0.00042978189886580157, 'samples': 7034368, 'steps': 13738, 'loss/train': 1.3575396537780762} +02/24/2022 19:21:00 - INFO - codeparrot_training - Step 13739: {'lr': 0.00042977052856527456, 'samples': 7034880, 'steps': 13739, 'loss/train': 1.7285126447677612} +02/24/2022 19:21:04 - INFO - codeparrot_training - Step 13740: {'lr': 0.00042975915749466763, 'samples': 7035392, 'steps': 13740, 'loss/train': 2.420454740524292} +02/24/2022 19:21:09 - INFO - codeparrot_training - Step 13741: {'lr': 0.0004297477856540296, 'samples': 7035904, 'steps': 13741, 'loss/train': 2.9618031978607178} +02/24/2022 19:21:13 - INFO - codeparrot_training - Step 13742: {'lr': 0.00042973641304340916, 'samples': 7036416, 'steps': 13742, 'loss/train': 2.3210268020629883} +02/24/2022 19:21:18 - INFO - codeparrot_training - Step 13743: {'lr': 0.00042972503966285503, 'samples': 7036928, 'steps': 13743, 'loss/train': 2.0137109756469727} +02/24/2022 19:21:24 - INFO - codeparrot_training - Step 13744: {'lr': 0.00042971366551241587, 'samples': 7037440, 'steps': 13744, 'loss/train': 1.216625452041626} +02/24/2022 19:21:27 - INFO - codeparrot_training - Step 13745: {'lr': 0.00042970229059214037, 'samples': 7037952, 'steps': 13745, 'loss/train': 1.7579693794250488} +02/24/2022 19:21:33 - INFO - codeparrot_training - Step 13746: {'lr': 0.0004296909149020774, 'samples': 7038464, 'steps': 13746, 'loss/train': 2.274695634841919} +02/24/2022 19:21:36 - INFO - codeparrot_training - Step 13747: {'lr': 0.0004296795384422756, 'samples': 7038976, 'steps': 13747, 'loss/train': 2.0781469345092773} +02/24/2022 19:21:42 - INFO - codeparrot_training - Step 13748: {'lr': 0.00042966816121278365, 'samples': 7039488, 'steps': 13748, 'loss/train': 2.7990827560424805} +02/24/2022 19:21:46 - INFO - codeparrot_training - Step 13749: {'lr': 0.00042965678321365045, 'samples': 7040000, 'steps': 13749, 'loss/train': 2.105743885040283} +02/24/2022 19:21:51 - INFO - codeparrot_training - Step 13750: {'lr': 0.00042964540444492453, 'samples': 7040512, 'steps': 13750, 'loss/train': 1.5459169149398804} +02/24/2022 19:21:55 - INFO - codeparrot_training - Step 13751: {'lr': 0.00042963402490665484, 'samples': 7041024, 'steps': 13751, 'loss/train': 2.268190383911133} +02/24/2022 19:22:01 - INFO - codeparrot_training - Step 13752: {'lr': 0.0004296226445988899, 'samples': 7041536, 'steps': 13752, 'loss/train': 2.209700345993042} +02/24/2022 19:22:04 - INFO - codeparrot_training - Step 13753: {'lr': 0.0004296112635216787, 'samples': 7042048, 'steps': 13753, 'loss/train': 1.8361694812774658} +02/24/2022 19:22:10 - INFO - codeparrot_training - Step 13754: {'lr': 0.00042959988167506983, 'samples': 7042560, 'steps': 13754, 'loss/train': 1.5439350605010986} +02/24/2022 19:22:13 - INFO - codeparrot_training - Step 13755: {'lr': 0.00042958849905911213, 'samples': 7043072, 'steps': 13755, 'loss/train': 1.547868013381958} +02/24/2022 19:22:19 - INFO - codeparrot_training - Step 13756: {'lr': 0.0004295771156738543, 'samples': 7043584, 'steps': 13756, 'loss/train': 0.36390218138694763} +02/24/2022 19:22:22 - INFO - codeparrot_training - Step 13757: {'lr': 0.00042956573151934507, 'samples': 7044096, 'steps': 13757, 'loss/train': 0.7468468546867371} +02/24/2022 19:22:28 - INFO - codeparrot_training - Step 13758: {'lr': 0.00042955434659563334, 'samples': 7044608, 'steps': 13758, 'loss/train': 1.6567022800445557} +02/24/2022 19:22:31 - INFO - codeparrot_training - Step 13759: {'lr': 0.00042954296090276777, 'samples': 7045120, 'steps': 13759, 'loss/train': 0.9269330501556396} +02/24/2022 19:22:36 - INFO - codeparrot_training - Step 13760: {'lr': 0.0004295315744407972, 'samples': 7045632, 'steps': 13760, 'loss/train': 1.100716471672058} +02/24/2022 19:22:40 - INFO - codeparrot_training - Step 13761: {'lr': 0.0004295201872097704, 'samples': 7046144, 'steps': 13761, 'loss/train': 1.6133983135223389} +02/24/2022 19:22:45 - INFO - codeparrot_training - Step 13762: {'lr': 0.0004295087992097361, 'samples': 7046656, 'steps': 13762, 'loss/train': 2.823902130126953} +02/24/2022 19:22:49 - INFO - codeparrot_training - Step 13763: {'lr': 0.00042949741044074306, 'samples': 7047168, 'steps': 13763, 'loss/train': 1.0988500118255615} +02/24/2022 19:22:55 - INFO - codeparrot_training - Step 13764: {'lr': 0.00042948602090284014, 'samples': 7047680, 'steps': 13764, 'loss/train': 1.7745718955993652} +02/24/2022 19:22:59 - INFO - codeparrot_training - Step 13765: {'lr': 0.00042947463059607606, 'samples': 7048192, 'steps': 13765, 'loss/train': 1.2690480947494507} +02/24/2022 19:23:04 - INFO - codeparrot_training - Step 13766: {'lr': 0.0004294632395204997, 'samples': 7048704, 'steps': 13766, 'loss/train': 2.875951051712036} +02/24/2022 19:23:08 - INFO - codeparrot_training - Step 13767: {'lr': 0.0004294518476761598, 'samples': 7049216, 'steps': 13767, 'loss/train': 0.4162357747554779} +02/24/2022 19:23:13 - INFO - codeparrot_training - Step 13768: {'lr': 0.00042944045506310515, 'samples': 7049728, 'steps': 13768, 'loss/train': 2.7400615215301514} +02/24/2022 19:23:17 - INFO - codeparrot_training - Step 13769: {'lr': 0.0004294290616813846, 'samples': 7050240, 'steps': 13769, 'loss/train': 1.1161319017410278} +02/24/2022 19:23:22 - INFO - codeparrot_training - Step 13770: {'lr': 0.00042941766753104696, 'samples': 7050752, 'steps': 13770, 'loss/train': 2.0202841758728027} +02/24/2022 19:23:26 - INFO - codeparrot_training - Step 13771: {'lr': 0.00042940627261214094, 'samples': 7051264, 'steps': 13771, 'loss/train': 2.79789137840271} +02/24/2022 19:23:31 - INFO - codeparrot_training - Step 13772: {'lr': 0.00042939487692471534, 'samples': 7051776, 'steps': 13772, 'loss/train': 2.5752716064453125} +02/24/2022 19:23:35 - INFO - codeparrot_training - Step 13773: {'lr': 0.0004293834804688192, 'samples': 7052288, 'steps': 13773, 'loss/train': 2.0483415126800537} +02/24/2022 19:23:41 - INFO - codeparrot_training - Step 13774: {'lr': 0.00042937208324450116, 'samples': 7052800, 'steps': 13774, 'loss/train': 9.001176834106445} +02/24/2022 19:23:44 - INFO - codeparrot_training - Step 13775: {'lr': 0.00042936068525181004, 'samples': 7053312, 'steps': 13775, 'loss/train': 1.8988256454467773} +02/24/2022 19:23:50 - INFO - codeparrot_training - Step 13776: {'lr': 0.00042934928649079467, 'samples': 7053824, 'steps': 13776, 'loss/train': 0.8681538105010986} +02/24/2022 19:23:53 - INFO - codeparrot_training - Step 13777: {'lr': 0.0004293378869615039, 'samples': 7054336, 'steps': 13777, 'loss/train': 1.0394240617752075} +02/24/2022 19:23:59 - INFO - codeparrot_training - Step 13778: {'lr': 0.00042932648666398667, 'samples': 7054848, 'steps': 13778, 'loss/train': 0.8579873442649841} +02/24/2022 19:24:02 - INFO - codeparrot_training - Step 13779: {'lr': 0.0004293150855982916, 'samples': 7055360, 'steps': 13779, 'loss/train': 1.411608099937439} +02/24/2022 19:24:08 - INFO - codeparrot_training - Step 13780: {'lr': 0.0004293036837644677, 'samples': 7055872, 'steps': 13780, 'loss/train': 1.1304799318313599} +02/24/2022 19:24:11 - INFO - codeparrot_training - Step 13781: {'lr': 0.0004292922811625637, 'samples': 7056384, 'steps': 13781, 'loss/train': 1.9228967428207397} +02/24/2022 19:24:17 - INFO - codeparrot_training - Step 13782: {'lr': 0.0004292808777926286, 'samples': 7056896, 'steps': 13782, 'loss/train': 1.3900909423828125} +02/24/2022 19:24:20 - INFO - codeparrot_training - Step 13783: {'lr': 0.0004292694736547111, 'samples': 7057408, 'steps': 13783, 'loss/train': 2.519258737564087} +02/24/2022 19:24:26 - INFO - codeparrot_training - Step 13784: {'lr': 0.0004292580687488601, 'samples': 7057920, 'steps': 13784, 'loss/train': 2.419724225997925} +02/24/2022 19:24:30 - INFO - codeparrot_training - Step 13785: {'lr': 0.00042924666307512437, 'samples': 7058432, 'steps': 13785, 'loss/train': 2.781606674194336} +02/24/2022 19:24:36 - INFO - codeparrot_training - Step 13786: {'lr': 0.000429235256633553, 'samples': 7058944, 'steps': 13786, 'loss/train': 2.4013800621032715} +02/24/2022 19:24:39 - INFO - codeparrot_training - Step 13787: {'lr': 0.0004292238494241946, 'samples': 7059456, 'steps': 13787, 'loss/train': 2.3291451930999756} +02/24/2022 19:24:45 - INFO - codeparrot_training - Step 13788: {'lr': 0.00042921244144709817, 'samples': 7059968, 'steps': 13788, 'loss/train': 2.263153314590454} +02/24/2022 19:24:48 - INFO - codeparrot_training - Step 13789: {'lr': 0.0004292010327023125, 'samples': 7060480, 'steps': 13789, 'loss/train': 1.587754487991333} +02/24/2022 19:24:54 - INFO - codeparrot_training - Step 13790: {'lr': 0.00042918962318988664, 'samples': 7060992, 'steps': 13790, 'loss/train': 1.3965115547180176} +02/24/2022 19:24:57 - INFO - codeparrot_training - Step 13791: {'lr': 0.00042917821290986926, 'samples': 7061504, 'steps': 13791, 'loss/train': 2.5567522048950195} +02/24/2022 19:25:03 - INFO - codeparrot_training - Step 13792: {'lr': 0.0004291668018623093, 'samples': 7062016, 'steps': 13792, 'loss/train': 2.453965902328491} +02/24/2022 19:25:06 - INFO - codeparrot_training - Step 13793: {'lr': 0.00042915539004725564, 'samples': 7062528, 'steps': 13793, 'loss/train': 1.6210194826126099} +02/24/2022 19:25:12 - INFO - codeparrot_training - Step 13794: {'lr': 0.0004291439774647572, 'samples': 7063040, 'steps': 13794, 'loss/train': 2.0600123405456543} +02/24/2022 19:25:16 - INFO - codeparrot_training - Step 13795: {'lr': 0.00042913256411486277, 'samples': 7063552, 'steps': 13795, 'loss/train': 2.1193196773529053} +02/24/2022 19:25:21 - INFO - codeparrot_training - Step 13796: {'lr': 0.0004291211499976214, 'samples': 7064064, 'steps': 13796, 'loss/train': 2.3493311405181885} +02/24/2022 19:25:25 - INFO - codeparrot_training - Step 13797: {'lr': 0.00042910973511308195, 'samples': 7064576, 'steps': 13797, 'loss/train': 1.6887013912200928} +02/24/2022 19:25:30 - INFO - codeparrot_training - Step 13798: {'lr': 0.0004290983194612932, 'samples': 7065088, 'steps': 13798, 'loss/train': 1.3335912227630615} +02/24/2022 19:25:34 - INFO - codeparrot_training - Step 13799: {'lr': 0.00042908690304230415, 'samples': 7065600, 'steps': 13799, 'loss/train': 1.6617976427078247} +02/24/2022 19:25:40 - INFO - codeparrot_training - Step 13800: {'lr': 0.00042907548585616363, 'samples': 7066112, 'steps': 13800, 'loss/train': 2.6258771419525146} +02/24/2022 19:25:43 - INFO - codeparrot_training - Step 13801: {'lr': 0.00042906406790292053, 'samples': 7066624, 'steps': 13801, 'loss/train': 2.3570327758789062} +02/24/2022 19:25:49 - INFO - codeparrot_training - Step 13802: {'lr': 0.00042905264918262386, 'samples': 7067136, 'steps': 13802, 'loss/train': 1.6215236186981201} +02/24/2022 19:25:52 - INFO - codeparrot_training - Step 13803: {'lr': 0.00042904122969532256, 'samples': 7067648, 'steps': 13803, 'loss/train': 1.531136393547058} +02/24/2022 19:25:58 - INFO - codeparrot_training - Step 13804: {'lr': 0.0004290298094410655, 'samples': 7068160, 'steps': 13804, 'loss/train': 1.1906249523162842} +02/24/2022 19:26:01 - INFO - codeparrot_training - Step 13805: {'lr': 0.0004290183884199015, 'samples': 7068672, 'steps': 13805, 'loss/train': 1.763102412223816} +02/24/2022 19:26:07 - INFO - codeparrot_training - Step 13806: {'lr': 0.00042900696663187963, 'samples': 7069184, 'steps': 13806, 'loss/train': 1.5061942338943481} +02/24/2022 19:26:10 - INFO - codeparrot_training - Step 13807: {'lr': 0.00042899554407704876, 'samples': 7069696, 'steps': 13807, 'loss/train': 2.170198440551758} +02/24/2022 19:26:16 - INFO - codeparrot_training - Step 13808: {'lr': 0.0004289841207554578, 'samples': 7070208, 'steps': 13808, 'loss/train': 2.469888210296631} +02/24/2022 19:26:19 - INFO - codeparrot_training - Step 13809: {'lr': 0.0004289726966671557, 'samples': 7070720, 'steps': 13809, 'loss/train': 2.30928897857666} +02/24/2022 19:26:26 - INFO - codeparrot_training - Step 13810: {'lr': 0.00042896127181219135, 'samples': 7071232, 'steps': 13810, 'loss/train': 2.186579704284668} +02/24/2022 19:26:29 - INFO - codeparrot_training - Step 13811: {'lr': 0.0004289498461906138, 'samples': 7071744, 'steps': 13811, 'loss/train': 1.1831737756729126} +02/24/2022 19:26:34 - INFO - codeparrot_training - Step 13812: {'lr': 0.00042893841980247194, 'samples': 7072256, 'steps': 13812, 'loss/train': 1.0793462991714478} +02/24/2022 19:26:38 - INFO - codeparrot_training - Step 13813: {'lr': 0.00042892699264781463, 'samples': 7072768, 'steps': 13813, 'loss/train': 2.049481153488159} +02/24/2022 19:26:43 - INFO - codeparrot_training - Step 13814: {'lr': 0.000428915564726691, 'samples': 7073280, 'steps': 13814, 'loss/train': 2.2335526943206787} +02/24/2022 19:26:47 - INFO - codeparrot_training - Step 13815: {'lr': 0.0004289041360391499, 'samples': 7073792, 'steps': 13815, 'loss/train': 2.0923101902008057} +02/24/2022 19:26:52 - INFO - codeparrot_training - Step 13816: {'lr': 0.0004288927065852402, 'samples': 7074304, 'steps': 13816, 'loss/train': 2.344900369644165} +02/24/2022 19:26:56 - INFO - codeparrot_training - Step 13817: {'lr': 0.000428881276365011, 'samples': 7074816, 'steps': 13817, 'loss/train': 0.7411916255950928} +02/24/2022 19:27:02 - INFO - codeparrot_training - Step 13818: {'lr': 0.00042886984537851124, 'samples': 7075328, 'steps': 13818, 'loss/train': 0.9422500133514404} +02/24/2022 19:27:05 - INFO - codeparrot_training - Step 13819: {'lr': 0.0004288584136257898, 'samples': 7075840, 'steps': 13819, 'loss/train': 2.4227230548858643} +02/24/2022 19:27:12 - INFO - codeparrot_training - Step 13820: {'lr': 0.00042884698110689574, 'samples': 7076352, 'steps': 13820, 'loss/train': 2.399198293685913} +02/24/2022 19:27:15 - INFO - codeparrot_training - Step 13821: {'lr': 0.000428835547821878, 'samples': 7076864, 'steps': 13821, 'loss/train': 2.7497496604919434} +02/24/2022 19:27:21 - INFO - codeparrot_training - Step 13822: {'lr': 0.00042882411377078556, 'samples': 7077376, 'steps': 13822, 'loss/train': 2.3676512241363525} +02/24/2022 19:27:24 - INFO - codeparrot_training - Step 13823: {'lr': 0.00042881267895366736, 'samples': 7077888, 'steps': 13823, 'loss/train': 1.8745065927505493} +02/24/2022 19:27:30 - INFO - codeparrot_training - Step 13824: {'lr': 0.00042880124337057253, 'samples': 7078400, 'steps': 13824, 'loss/train': 3.031111240386963} +02/24/2022 19:27:33 - INFO - codeparrot_training - Step 13825: {'lr': 0.00042878980702154985, 'samples': 7078912, 'steps': 13825, 'loss/train': 2.54958438873291} +02/24/2022 19:27:39 - INFO - codeparrot_training - Step 13826: {'lr': 0.00042877836990664844, 'samples': 7079424, 'steps': 13826, 'loss/train': 2.217719554901123} +02/24/2022 19:27:42 - INFO - codeparrot_training - Step 13827: {'lr': 0.00042876693202591724, 'samples': 7079936, 'steps': 13827, 'loss/train': 2.8435380458831787} +02/24/2022 19:27:48 - INFO - codeparrot_training - Step 13828: {'lr': 0.0004287554933794053, 'samples': 7080448, 'steps': 13828, 'loss/train': 1.2885924577713013} +02/24/2022 19:27:54 - INFO - codeparrot_training - Step 13829: {'lr': 0.0004287440539671616, 'samples': 7080960, 'steps': 13829, 'loss/train': 2.5434088706970215} +02/24/2022 19:27:58 - INFO - codeparrot_training - Step 13830: {'lr': 0.0004287326137892351, 'samples': 7081472, 'steps': 13830, 'loss/train': 1.6488548517227173} +02/24/2022 19:28:03 - INFO - codeparrot_training - Step 13831: {'lr': 0.00042872117284567486, 'samples': 7081984, 'steps': 13831, 'loss/train': 1.8399345874786377} +02/24/2022 19:28:07 - INFO - codeparrot_training - Step 13832: {'lr': 0.0004287097311365299, 'samples': 7082496, 'steps': 13832, 'loss/train': 1.0356048345565796} +02/24/2022 19:28:12 - INFO - codeparrot_training - Step 13833: {'lr': 0.0004286982886618491, 'samples': 7083008, 'steps': 13833, 'loss/train': 1.7882654666900635} +02/24/2022 19:28:16 - INFO - codeparrot_training - Step 13834: {'lr': 0.0004286868454216816, 'samples': 7083520, 'steps': 13834, 'loss/train': 1.4949910640716553} +02/24/2022 19:28:21 - INFO - codeparrot_training - Step 13835: {'lr': 0.00042867540141607643, 'samples': 7084032, 'steps': 13835, 'loss/train': 0.19167253375053406} +02/24/2022 19:28:25 - INFO - codeparrot_training - Step 13836: {'lr': 0.0004286639566450826, 'samples': 7084544, 'steps': 13836, 'loss/train': 1.452298641204834} +02/24/2022 19:28:31 - INFO - codeparrot_training - Step 13837: {'lr': 0.00042865251110874903, 'samples': 7085056, 'steps': 13837, 'loss/train': 0.1963367760181427} +02/24/2022 19:28:34 - INFO - codeparrot_training - Step 13838: {'lr': 0.00042864106480712495, 'samples': 7085568, 'steps': 13838, 'loss/train': 2.1012227535247803} +02/24/2022 19:28:40 - INFO - codeparrot_training - Step 13839: {'lr': 0.00042862961774025915, 'samples': 7086080, 'steps': 13839, 'loss/train': 1.7118444442749023} +02/24/2022 19:28:43 - INFO - codeparrot_training - Step 13840: {'lr': 0.00042861816990820087, 'samples': 7086592, 'steps': 13840, 'loss/train': 1.9184969663619995} +02/24/2022 19:28:49 - INFO - codeparrot_training - Step 13841: {'lr': 0.00042860672131099904, 'samples': 7087104, 'steps': 13841, 'loss/train': 2.407456874847412} +02/24/2022 19:28:53 - INFO - codeparrot_training - Step 13842: {'lr': 0.00042859527194870275, 'samples': 7087616, 'steps': 13842, 'loss/train': 1.7508593797683716} +02/24/2022 19:28:58 - INFO - codeparrot_training - Step 13843: {'lr': 0.000428583821821361, 'samples': 7088128, 'steps': 13843, 'loss/train': 2.053947925567627} +02/24/2022 19:29:02 - INFO - codeparrot_training - Step 13844: {'lr': 0.00042857237092902285, 'samples': 7088640, 'steps': 13844, 'loss/train': 2.5115182399749756} +02/24/2022 19:29:05 - INFO - codeparrot_training - Step 13845: {'lr': 0.0004285609192717374, 'samples': 7089152, 'steps': 13845, 'loss/train': 2.3884294033050537} +02/24/2022 19:29:11 - INFO - codeparrot_training - Step 13846: {'lr': 0.00042854946684955366, 'samples': 7089664, 'steps': 13846, 'loss/train': 2.2477335929870605} +02/24/2022 19:29:14 - INFO - codeparrot_training - Step 13847: {'lr': 0.00042853801366252067, 'samples': 7090176, 'steps': 13847, 'loss/train': 2.7596707344055176} +02/24/2022 19:29:20 - INFO - codeparrot_training - Step 13848: {'lr': 0.00042852655971068756, 'samples': 7090688, 'steps': 13848, 'loss/train': 0.15768425166606903} +02/24/2022 19:29:24 - INFO - codeparrot_training - Step 13849: {'lr': 0.0004285151049941033, 'samples': 7091200, 'steps': 13849, 'loss/train': 2.1418614387512207} +02/24/2022 19:29:29 - INFO - codeparrot_training - Step 13850: {'lr': 0.00042850364951281707, 'samples': 7091712, 'steps': 13850, 'loss/train': 1.3765848875045776} +02/24/2022 19:29:33 - INFO - codeparrot_training - Step 13851: {'lr': 0.00042849219326687786, 'samples': 7092224, 'steps': 13851, 'loss/train': 1.527712106704712} +02/24/2022 19:29:38 - INFO - codeparrot_training - Step 13852: {'lr': 0.0004284807362563348, 'samples': 7092736, 'steps': 13852, 'loss/train': 2.095750570297241} +02/24/2022 19:29:42 - INFO - codeparrot_training - Step 13853: {'lr': 0.00042846927848123694, 'samples': 7093248, 'steps': 13853, 'loss/train': 2.2116305828094482} +02/24/2022 19:29:47 - INFO - codeparrot_training - Step 13854: {'lr': 0.00042845781994163334, 'samples': 7093760, 'steps': 13854, 'loss/train': 1.7456697225570679} +02/24/2022 19:29:51 - INFO - codeparrot_training - Step 13855: {'lr': 0.00042844636063757316, 'samples': 7094272, 'steps': 13855, 'loss/train': 2.178934335708618} +02/24/2022 19:29:57 - INFO - codeparrot_training - Step 13856: {'lr': 0.00042843490056910534, 'samples': 7094784, 'steps': 13856, 'loss/train': 1.930143117904663} +02/24/2022 19:30:00 - INFO - codeparrot_training - Step 13857: {'lr': 0.0004284234397362791, 'samples': 7095296, 'steps': 13857, 'loss/train': 2.493631362915039} +02/24/2022 19:30:06 - INFO - codeparrot_training - Step 13858: {'lr': 0.0004284119781391436, 'samples': 7095808, 'steps': 13858, 'loss/train': 2.002756357192993} +02/24/2022 19:30:09 - INFO - codeparrot_training - Step 13859: {'lr': 0.00042840051577774766, 'samples': 7096320, 'steps': 13859, 'loss/train': 1.677224040031433} +02/24/2022 19:30:15 - INFO - codeparrot_training - Step 13860: {'lr': 0.00042838905265214067, 'samples': 7096832, 'steps': 13860, 'loss/train': 2.9802050590515137} +02/24/2022 19:30:18 - INFO - codeparrot_training - Step 13861: {'lr': 0.0004283775887623716, 'samples': 7097344, 'steps': 13861, 'loss/train': 2.2648096084594727} +02/24/2022 19:30:24 - INFO - codeparrot_training - Step 13862: {'lr': 0.0004283661241084896, 'samples': 7097856, 'steps': 13862, 'loss/train': 2.162268877029419} +02/24/2022 19:30:27 - INFO - codeparrot_training - Step 13863: {'lr': 0.0004283546586905437, 'samples': 7098368, 'steps': 13863, 'loss/train': 1.8374004364013672} +02/24/2022 19:30:33 - INFO - codeparrot_training - Step 13864: {'lr': 0.00042834319250858316, 'samples': 7098880, 'steps': 13864, 'loss/train': 1.72138512134552} +02/24/2022 19:30:36 - INFO - codeparrot_training - Step 13865: {'lr': 0.000428331725562657, 'samples': 7099392, 'steps': 13865, 'loss/train': 1.6780811548233032} +02/24/2022 19:30:42 - INFO - codeparrot_training - Step 13866: {'lr': 0.0004283202578528143, 'samples': 7099904, 'steps': 13866, 'loss/train': 2.028473138809204} +02/24/2022 19:30:45 - INFO - codeparrot_training - Step 13867: {'lr': 0.00042830878937910426, 'samples': 7100416, 'steps': 13867, 'loss/train': 2.8656997680664062} +02/24/2022 19:30:51 - INFO - codeparrot_training - Step 13868: {'lr': 0.000428297320141576, 'samples': 7100928, 'steps': 13868, 'loss/train': 0.9876556992530823} +02/24/2022 19:30:57 - INFO - codeparrot_training - Step 13869: {'lr': 0.00042828585014027863, 'samples': 7101440, 'steps': 13869, 'loss/train': 2.00734281539917} +02/24/2022 19:31:00 - INFO - codeparrot_training - Step 13870: {'lr': 0.0004282743793752613, 'samples': 7101952, 'steps': 13870, 'loss/train': 1.3925220966339111} +02/24/2022 19:31:06 - INFO - codeparrot_training - Step 13871: {'lr': 0.0004282629078465732, 'samples': 7102464, 'steps': 13871, 'loss/train': 1.8243123292922974} +02/24/2022 19:31:10 - INFO - codeparrot_training - Step 13872: {'lr': 0.0004282514355542633, 'samples': 7102976, 'steps': 13872, 'loss/train': 2.651815414428711} +02/24/2022 19:31:15 - INFO - codeparrot_training - Step 13873: {'lr': 0.0004282399624983808, 'samples': 7103488, 'steps': 13873, 'loss/train': 2.9097788333892822} +02/24/2022 19:31:19 - INFO - codeparrot_training - Step 13874: {'lr': 0.000428228488678975, 'samples': 7104000, 'steps': 13874, 'loss/train': 0.12431791424751282} +02/24/2022 19:31:24 - INFO - codeparrot_training - Step 13875: {'lr': 0.000428217014096095, 'samples': 7104512, 'steps': 13875, 'loss/train': 3.4056780338287354} +02/24/2022 19:31:27 - INFO - codeparrot_training - Step 13876: {'lr': 0.00042820553874978987, 'samples': 7105024, 'steps': 13876, 'loss/train': 1.8900150060653687} +02/24/2022 19:31:33 - INFO - codeparrot_training - Step 13877: {'lr': 0.0004281940626401087, 'samples': 7105536, 'steps': 13877, 'loss/train': 2.707639217376709} +02/24/2022 19:31:37 - INFO - codeparrot_training - Step 13878: {'lr': 0.0004281825857671008, 'samples': 7106048, 'steps': 13878, 'loss/train': 1.4920686483383179} +02/24/2022 19:31:42 - INFO - codeparrot_training - Step 13879: {'lr': 0.00042817110813081526, 'samples': 7106560, 'steps': 13879, 'loss/train': 2.484635829925537} +02/24/2022 19:31:46 - INFO - codeparrot_training - Step 13880: {'lr': 0.00042815962973130134, 'samples': 7107072, 'steps': 13880, 'loss/train': 2.5091655254364014} +02/24/2022 19:31:51 - INFO - codeparrot_training - Step 13881: {'lr': 0.00042814815056860814, 'samples': 7107584, 'steps': 13881, 'loss/train': 2.1803884506225586} +02/24/2022 19:31:55 - INFO - codeparrot_training - Step 13882: {'lr': 0.0004281366706427848, 'samples': 7108096, 'steps': 13882, 'loss/train': 1.9432035684585571} +02/24/2022 19:32:03 - INFO - codeparrot_training - Step 13883: {'lr': 0.0004281251899538805, 'samples': 7108608, 'steps': 13883, 'loss/train': 2.4190433025360107} +02/24/2022 19:32:06 - INFO - codeparrot_training - Step 13884: {'lr': 0.0004281137085019445, 'samples': 7109120, 'steps': 13884, 'loss/train': 2.6437957286834717} +02/24/2022 19:32:12 - INFO - codeparrot_training - Step 13885: {'lr': 0.0004281022262870259, 'samples': 7109632, 'steps': 13885, 'loss/train': 2.5035533905029297} +02/24/2022 19:32:15 - INFO - codeparrot_training - Step 13886: {'lr': 0.00042809074330917387, 'samples': 7110144, 'steps': 13886, 'loss/train': 2.959852695465088} +02/24/2022 19:32:21 - INFO - codeparrot_training - Step 13887: {'lr': 0.00042807925956843775, 'samples': 7110656, 'steps': 13887, 'loss/train': 2.476499319076538} +02/24/2022 19:32:24 - INFO - codeparrot_training - Step 13888: {'lr': 0.0004280677750648665, 'samples': 7111168, 'steps': 13888, 'loss/train': 2.1213388442993164} +02/24/2022 19:32:30 - INFO - codeparrot_training - Step 13889: {'lr': 0.0004280562897985095, 'samples': 7111680, 'steps': 13889, 'loss/train': 2.1540985107421875} +02/24/2022 19:32:33 - INFO - codeparrot_training - Step 13890: {'lr': 0.00042804480376941597, 'samples': 7112192, 'steps': 13890, 'loss/train': 2.0236587524414062} +02/24/2022 19:32:39 - INFO - codeparrot_training - Step 13891: {'lr': 0.0004280333169776349, 'samples': 7112704, 'steps': 13891, 'loss/train': 2.021449089050293} +02/24/2022 19:32:42 - INFO - codeparrot_training - Step 13892: {'lr': 0.00042802182942321576, 'samples': 7113216, 'steps': 13892, 'loss/train': 1.9408913850784302} +02/24/2022 19:32:50 - INFO - codeparrot_training - Step 13893: {'lr': 0.00042801034110620756, 'samples': 7113728, 'steps': 13893, 'loss/train': 2.097508668899536} +02/24/2022 19:32:54 - INFO - codeparrot_training - Step 13894: {'lr': 0.00042799885202665964, 'samples': 7114240, 'steps': 13894, 'loss/train': 1.7103838920593262} +02/24/2022 19:32:59 - INFO - codeparrot_training - Step 13895: {'lr': 0.0004279873621846211, 'samples': 7114752, 'steps': 13895, 'loss/train': 1.814162254333496} +02/24/2022 19:33:03 - INFO - codeparrot_training - Step 13896: {'lr': 0.0004279758715801412, 'samples': 7115264, 'steps': 13896, 'loss/train': 3.1128132343292236} +02/24/2022 19:33:08 - INFO - codeparrot_training - Step 13897: {'lr': 0.0004279643802132692, 'samples': 7115776, 'steps': 13897, 'loss/train': 2.096381664276123} +02/24/2022 19:33:12 - INFO - codeparrot_training - Step 13898: {'lr': 0.0004279528880840544, 'samples': 7116288, 'steps': 13898, 'loss/train': 1.4319543838500977} +02/24/2022 19:33:18 - INFO - codeparrot_training - Step 13899: {'lr': 0.00042794139519254583, 'samples': 7116800, 'steps': 13899, 'loss/train': 1.8660544157028198} +02/24/2022 19:33:21 - INFO - codeparrot_training - Step 13900: {'lr': 0.00042792990153879285, 'samples': 7117312, 'steps': 13900, 'loss/train': 2.2695634365081787} +02/24/2022 19:33:27 - INFO - codeparrot_training - Step 13901: {'lr': 0.00042791840712284466, 'samples': 7117824, 'steps': 13901, 'loss/train': 2.7423362731933594} +02/24/2022 19:33:30 - INFO - codeparrot_training - Step 13902: {'lr': 0.0004279069119447505, 'samples': 7118336, 'steps': 13902, 'loss/train': 0.5829432010650635} +02/24/2022 19:33:37 - INFO - codeparrot_training - Step 13903: {'lr': 0.0004278954160045597, 'samples': 7118848, 'steps': 13903, 'loss/train': 1.1926600933074951} +02/24/2022 19:33:41 - INFO - codeparrot_training - Step 13904: {'lr': 0.0004278839193023214, 'samples': 7119360, 'steps': 13904, 'loss/train': 2.1163101196289062} +02/24/2022 19:33:47 - INFO - codeparrot_training - Step 13905: {'lr': 0.00042787242183808485, 'samples': 7119872, 'steps': 13905, 'loss/train': 2.2231385707855225} +02/24/2022 19:33:50 - INFO - codeparrot_training - Step 13906: {'lr': 0.00042786092361189927, 'samples': 7120384, 'steps': 13906, 'loss/train': 2.0194473266601562} +02/24/2022 19:33:56 - INFO - codeparrot_training - Step 13907: {'lr': 0.00042784942462381403, 'samples': 7120896, 'steps': 13907, 'loss/train': 1.9014095067977905} +02/24/2022 19:33:59 - INFO - codeparrot_training - Step 13908: {'lr': 0.0004278379248738783, 'samples': 7121408, 'steps': 13908, 'loss/train': 2.3189709186553955} +02/24/2022 19:34:03 - INFO - codeparrot_training - Step 13909: {'lr': 0.00042782642436214137, 'samples': 7121920, 'steps': 13909, 'loss/train': 2.4209084510803223} +02/24/2022 19:34:08 - INFO - codeparrot_training - Step 13910: {'lr': 0.00042781492308865255, 'samples': 7122432, 'steps': 13910, 'loss/train': 0.5683369040489197} +02/24/2022 19:34:12 - INFO - codeparrot_training - Step 13911: {'lr': 0.000427803421053461, 'samples': 7122944, 'steps': 13911, 'loss/train': 2.0672590732574463} +02/24/2022 19:34:17 - INFO - codeparrot_training - Step 13912: {'lr': 0.0004277919182566161, 'samples': 7123456, 'steps': 13912, 'loss/train': 1.339026927947998} +02/24/2022 19:34:20 - INFO - codeparrot_training - Step 13913: {'lr': 0.0004277804146981671, 'samples': 7123968, 'steps': 13913, 'loss/train': 1.1579540967941284} +02/24/2022 19:34:28 - INFO - codeparrot_training - Step 13914: {'lr': 0.00042776891037816324, 'samples': 7124480, 'steps': 13914, 'loss/train': 2.402513265609741} +02/24/2022 19:34:34 - INFO - codeparrot_training - Step 13915: {'lr': 0.00042775740529665373, 'samples': 7124992, 'steps': 13915, 'loss/train': 1.5199159383773804} +02/24/2022 19:34:37 - INFO - codeparrot_training - Step 13916: {'lr': 0.000427745899453688, 'samples': 7125504, 'steps': 13916, 'loss/train': 2.0406506061553955} +02/24/2022 19:34:43 - INFO - codeparrot_training - Step 13917: {'lr': 0.0004277343928493153, 'samples': 7126016, 'steps': 13917, 'loss/train': 2.941728353500366} +02/24/2022 19:34:46 - INFO - codeparrot_training - Step 13918: {'lr': 0.0004277228854835849, 'samples': 7126528, 'steps': 13918, 'loss/train': 2.309279441833496} +02/24/2022 19:34:52 - INFO - codeparrot_training - Step 13919: {'lr': 0.0004277113773565461, 'samples': 7127040, 'steps': 13919, 'loss/train': 2.1819634437561035} +02/24/2022 19:34:55 - INFO - codeparrot_training - Step 13920: {'lr': 0.00042769986846824813, 'samples': 7127552, 'steps': 13920, 'loss/train': 2.8887381553649902} +02/24/2022 19:35:01 - INFO - codeparrot_training - Step 13921: {'lr': 0.00042768835881874036, 'samples': 7128064, 'steps': 13921, 'loss/train': 1.737459421157837} +02/24/2022 19:35:04 - INFO - codeparrot_training - Step 13922: {'lr': 0.00042767684840807214, 'samples': 7128576, 'steps': 13922, 'loss/train': 1.5378804206848145} +02/24/2022 19:35:11 - INFO - codeparrot_training - Step 13923: {'lr': 0.00042766533723629264, 'samples': 7129088, 'steps': 13923, 'loss/train': 2.1596288681030273} +02/24/2022 19:35:15 - INFO - codeparrot_training - Step 13924: {'lr': 0.0004276538253034513, 'samples': 7129600, 'steps': 13924, 'loss/train': 2.5023815631866455} +02/24/2022 19:35:20 - INFO - codeparrot_training - Step 13925: {'lr': 0.0004276423126095974, 'samples': 7130112, 'steps': 13925, 'loss/train': 2.2584335803985596} +02/24/2022 19:35:24 - INFO - codeparrot_training - Step 13926: {'lr': 0.0004276307991547802, 'samples': 7130624, 'steps': 13926, 'loss/train': 2.548325777053833} +02/24/2022 19:35:29 - INFO - codeparrot_training - Step 13927: {'lr': 0.0004276192849390491, 'samples': 7131136, 'steps': 13927, 'loss/train': 3.0007622241973877} +02/24/2022 19:35:33 - INFO - codeparrot_training - Step 13928: {'lr': 0.0004276077699624534, 'samples': 7131648, 'steps': 13928, 'loss/train': 1.0408521890640259} +02/24/2022 19:35:36 - INFO - codeparrot_training - Step 13929: {'lr': 0.00042759625422504236, 'samples': 7132160, 'steps': 13929, 'loss/train': 1.2168833017349243} +02/24/2022 19:35:43 - INFO - codeparrot_training - Step 13930: {'lr': 0.00042758473772686533, 'samples': 7132672, 'steps': 13930, 'loss/train': 1.8865442276000977} +02/24/2022 19:35:46 - INFO - codeparrot_training - Step 13931: {'lr': 0.0004275732204679718, 'samples': 7133184, 'steps': 13931, 'loss/train': 2.5318686962127686} +02/24/2022 19:35:51 - INFO - codeparrot_training - Step 13932: {'lr': 0.0004275617024484109, 'samples': 7133696, 'steps': 13932, 'loss/train': 1.77742338180542} +02/24/2022 19:35:55 - INFO - codeparrot_training - Step 13933: {'lr': 0.000427550183668232, 'samples': 7134208, 'steps': 13933, 'loss/train': 3.4306695461273193} +02/24/2022 19:36:00 - INFO - codeparrot_training - Step 13934: {'lr': 0.00042753866412748455, 'samples': 7134720, 'steps': 13934, 'loss/train': 0.13265100121498108} +02/24/2022 19:36:04 - INFO - codeparrot_training - Step 13935: {'lr': 0.00042752714382621784, 'samples': 7135232, 'steps': 13935, 'loss/train': 2.451072931289673} +02/24/2022 19:36:09 - INFO - codeparrot_training - Step 13936: {'lr': 0.0004275156227644812, 'samples': 7135744, 'steps': 13936, 'loss/train': 1.64609694480896} +02/24/2022 19:36:13 - INFO - codeparrot_training - Step 13937: {'lr': 0.00042750410094232394, 'samples': 7136256, 'steps': 13937, 'loss/train': 1.148285150527954} +02/24/2022 19:36:18 - INFO - codeparrot_training - Step 13938: {'lr': 0.0004274925783597956, 'samples': 7136768, 'steps': 13938, 'loss/train': 2.6608564853668213} +02/24/2022 19:36:22 - INFO - codeparrot_training - Step 13939: {'lr': 0.0004274810550169453, 'samples': 7137280, 'steps': 13939, 'loss/train': 1.4991357326507568} +02/24/2022 19:36:29 - INFO - codeparrot_training - Step 13940: {'lr': 0.00042746953091382254, 'samples': 7137792, 'steps': 13940, 'loss/train': 1.3732562065124512} +02/24/2022 19:36:33 - INFO - codeparrot_training - Step 13941: {'lr': 0.00042745800605047677, 'samples': 7138304, 'steps': 13941, 'loss/train': 2.1584925651550293} +02/24/2022 19:36:38 - INFO - codeparrot_training - Step 13942: {'lr': 0.00042744648042695717, 'samples': 7138816, 'steps': 13942, 'loss/train': 1.8618122339248657} +02/24/2022 19:36:42 - INFO - codeparrot_training - Step 13943: {'lr': 0.0004274349540433132, 'samples': 7139328, 'steps': 13943, 'loss/train': 1.9625802040100098} +02/24/2022 19:36:47 - INFO - codeparrot_training - Step 13944: {'lr': 0.00042742342689959425, 'samples': 7139840, 'steps': 13944, 'loss/train': 2.0464932918548584} +02/24/2022 19:36:51 - INFO - codeparrot_training - Step 13945: {'lr': 0.00042741189899584965, 'samples': 7140352, 'steps': 13945, 'loss/train': 1.3461633920669556} +02/24/2022 19:36:56 - INFO - codeparrot_training - Step 13946: {'lr': 0.00042740037033212877, 'samples': 7140864, 'steps': 13946, 'loss/train': 2.097648859024048} +02/24/2022 19:37:00 - INFO - codeparrot_training - Step 13947: {'lr': 0.0004273888409084811, 'samples': 7141376, 'steps': 13947, 'loss/train': 2.3671865463256836} +02/24/2022 19:37:05 - INFO - codeparrot_training - Step 13948: {'lr': 0.0004273773107249559, 'samples': 7141888, 'steps': 13948, 'loss/train': 2.186274766921997} +02/24/2022 19:37:09 - INFO - codeparrot_training - Step 13949: {'lr': 0.0004273657797816027, 'samples': 7142400, 'steps': 13949, 'loss/train': 2.4363222122192383} +02/24/2022 19:37:14 - INFO - codeparrot_training - Step 13950: {'lr': 0.0004273542480784708, 'samples': 7142912, 'steps': 13950, 'loss/train': 1.1649454832077026} +02/24/2022 19:37:18 - INFO - codeparrot_training - Step 13951: {'lr': 0.00042734271561560956, 'samples': 7143424, 'steps': 13951, 'loss/train': 1.4500069618225098} +02/24/2022 19:37:25 - INFO - codeparrot_training - Step 13952: {'lr': 0.00042733118239306845, 'samples': 7143936, 'steps': 13952, 'loss/train': 1.8847614526748657} +02/24/2022 19:37:28 - INFO - codeparrot_training - Step 13953: {'lr': 0.0004273196484108969, 'samples': 7144448, 'steps': 13953, 'loss/train': 1.9720548391342163} +02/24/2022 19:37:34 - INFO - codeparrot_training - Step 13954: {'lr': 0.00042730811366914435, 'samples': 7144960, 'steps': 13954, 'loss/train': 2.0264317989349365} +02/24/2022 19:37:37 - INFO - codeparrot_training - Step 13955: {'lr': 0.0004272965781678601, 'samples': 7145472, 'steps': 13955, 'loss/train': 2.0273795127868652} +02/24/2022 19:37:43 - INFO - codeparrot_training - Step 13956: {'lr': 0.0004272850419070935, 'samples': 7145984, 'steps': 13956, 'loss/train': 2.9655637741088867} +02/24/2022 19:37:46 - INFO - codeparrot_training - Step 13957: {'lr': 0.00042727350488689416, 'samples': 7146496, 'steps': 13957, 'loss/train': 3.0586349964141846} +02/24/2022 19:37:52 - INFO - codeparrot_training - Step 13958: {'lr': 0.00042726196710731135, 'samples': 7147008, 'steps': 13958, 'loss/train': 2.34102463722229} +02/24/2022 19:37:55 - INFO - codeparrot_training - Step 13959: {'lr': 0.0004272504285683947, 'samples': 7147520, 'steps': 13959, 'loss/train': 1.2469335794448853} +02/24/2022 19:38:03 - INFO - codeparrot_training - Step 13960: {'lr': 0.0004272388892701934, 'samples': 7148032, 'steps': 13960, 'loss/train': 2.715869903564453} +02/24/2022 19:38:06 - INFO - codeparrot_training - Step 13961: {'lr': 0.000427227349212757, 'samples': 7148544, 'steps': 13961, 'loss/train': 2.4502198696136475} +02/24/2022 19:38:11 - INFO - codeparrot_training - Step 13962: {'lr': 0.0004272158083961348, 'samples': 7149056, 'steps': 13962, 'loss/train': 2.77317214012146} +02/24/2022 19:38:15 - INFO - codeparrot_training - Step 13963: {'lr': 0.0004272042668203765, 'samples': 7149568, 'steps': 13963, 'loss/train': 2.952699899673462} +02/24/2022 19:38:20 - INFO - codeparrot_training - Step 13964: {'lr': 0.00042719272448553137, 'samples': 7150080, 'steps': 13964, 'loss/train': 0.8886147737503052} +02/24/2022 19:38:24 - INFO - codeparrot_training - Step 13965: {'lr': 0.00042718118139164883, 'samples': 7150592, 'steps': 13965, 'loss/train': 1.395202398300171} +02/24/2022 19:38:30 - INFO - codeparrot_training - Step 13966: {'lr': 0.00042716963753877836, 'samples': 7151104, 'steps': 13966, 'loss/train': 2.372715711593628} +02/24/2022 19:38:33 - INFO - codeparrot_training - Step 13967: {'lr': 0.0004271580929269695, 'samples': 7151616, 'steps': 13967, 'loss/train': 1.9061944484710693} +02/24/2022 19:38:39 - INFO - codeparrot_training - Step 13968: {'lr': 0.0004271465475562716, 'samples': 7152128, 'steps': 13968, 'loss/train': 1.1169462203979492} +02/24/2022 19:38:42 - INFO - codeparrot_training - Step 13969: {'lr': 0.00042713500142673404, 'samples': 7152640, 'steps': 13969, 'loss/train': 1.716810703277588} +02/24/2022 19:38:48 - INFO - codeparrot_training - Step 13970: {'lr': 0.00042712345453840644, 'samples': 7153152, 'steps': 13970, 'loss/train': 1.4124257564544678} +02/24/2022 19:38:51 - INFO - codeparrot_training - Step 13971: {'lr': 0.00042711190689133827, 'samples': 7153664, 'steps': 13971, 'loss/train': 2.0908589363098145} +02/24/2022 19:38:58 - INFO - codeparrot_training - Step 13972: {'lr': 0.0004271003584855788, 'samples': 7154176, 'steps': 13972, 'loss/train': 2.243739604949951} +02/24/2022 19:39:02 - INFO - codeparrot_training - Step 13973: {'lr': 0.0004270888093211778, 'samples': 7154688, 'steps': 13973, 'loss/train': 2.46085786819458} +02/24/2022 19:39:07 - INFO - codeparrot_training - Step 13974: {'lr': 0.0004270772593981844, 'samples': 7155200, 'steps': 13974, 'loss/train': 3.0705995559692383} +02/24/2022 19:39:11 - INFO - codeparrot_training - Step 13975: {'lr': 0.0004270657087166484, 'samples': 7155712, 'steps': 13975, 'loss/train': 2.056504011154175} +02/24/2022 19:39:16 - INFO - codeparrot_training - Step 13976: {'lr': 0.000427054157276619, 'samples': 7156224, 'steps': 13976, 'loss/train': 1.7271323204040527} +02/24/2022 19:39:20 - INFO - codeparrot_training - Step 13977: {'lr': 0.0004270426050781458, 'samples': 7156736, 'steps': 13977, 'loss/train': 2.601325511932373} +02/24/2022 19:39:25 - INFO - codeparrot_training - Step 13978: {'lr': 0.00042703105212127846, 'samples': 7157248, 'steps': 13978, 'loss/train': 1.536615014076233} +02/24/2022 19:39:29 - INFO - codeparrot_training - Step 13979: {'lr': 0.0004270194984060662, 'samples': 7157760, 'steps': 13979, 'loss/train': 2.1201298236846924} +02/24/2022 19:39:35 - INFO - codeparrot_training - Step 13980: {'lr': 0.0004270079439325586, 'samples': 7158272, 'steps': 13980, 'loss/train': 2.6276681423187256} +02/24/2022 19:39:38 - INFO - codeparrot_training - Step 13981: {'lr': 0.0004269963887008053, 'samples': 7158784, 'steps': 13981, 'loss/train': 1.9070311784744263} +02/24/2022 19:39:44 - INFO - codeparrot_training - Step 13982: {'lr': 0.00042698483271085555, 'samples': 7159296, 'steps': 13982, 'loss/train': 1.881844401359558} +02/24/2022 19:39:47 - INFO - codeparrot_training - Step 13983: {'lr': 0.0004269732759627589, 'samples': 7159808, 'steps': 13983, 'loss/train': 1.634737491607666} +02/24/2022 19:39:53 - INFO - codeparrot_training - Step 13984: {'lr': 0.0004269617184565651, 'samples': 7160320, 'steps': 13984, 'loss/train': 1.7941128015518188} +02/24/2022 19:39:56 - INFO - codeparrot_training - Step 13985: {'lr': 0.00042695016019232343, 'samples': 7160832, 'steps': 13985, 'loss/train': 1.1788517236709595} +02/24/2022 19:40:03 - INFO - codeparrot_training - Step 13986: {'lr': 0.0004269386011700834, 'samples': 7161344, 'steps': 13986, 'loss/train': 1.9005893468856812} +02/24/2022 19:40:07 - INFO - codeparrot_training - Step 13987: {'lr': 0.00042692704138989467, 'samples': 7161856, 'steps': 13987, 'loss/train': 1.90274178981781} +02/24/2022 19:40:12 - INFO - codeparrot_training - Step 13988: {'lr': 0.00042691548085180666, 'samples': 7162368, 'steps': 13988, 'loss/train': 1.7172231674194336} +02/24/2022 19:40:16 - INFO - codeparrot_training - Step 13989: {'lr': 0.00042690391955586886, 'samples': 7162880, 'steps': 13989, 'loss/train': 1.4420868158340454} +02/24/2022 19:40:21 - INFO - codeparrot_training - Step 13990: {'lr': 0.00042689235750213093, 'samples': 7163392, 'steps': 13990, 'loss/train': 2.505735397338867} +02/24/2022 19:40:25 - INFO - codeparrot_training - Step 13991: {'lr': 0.0004268807946906422, 'samples': 7163904, 'steps': 13991, 'loss/train': 2.922675848007202} +02/24/2022 19:40:31 - INFO - codeparrot_training - Step 13992: {'lr': 0.0004268692311214524, 'samples': 7164416, 'steps': 13992, 'loss/train': 2.881211757659912} +02/24/2022 19:40:34 - INFO - codeparrot_training - Step 13993: {'lr': 0.00042685766679461095, 'samples': 7164928, 'steps': 13993, 'loss/train': 2.036750078201294} +02/24/2022 19:40:39 - INFO - codeparrot_training - Step 13994: {'lr': 0.0004268461017101674, 'samples': 7165440, 'steps': 13994, 'loss/train': 1.924837350845337} +02/24/2022 19:40:43 - INFO - codeparrot_training - Step 13995: {'lr': 0.00042683453586817136, 'samples': 7165952, 'steps': 13995, 'loss/train': 1.8309330940246582} +02/24/2022 19:40:50 - INFO - codeparrot_training - Step 13996: {'lr': 0.00042682296926867226, 'samples': 7166464, 'steps': 13996, 'loss/train': 2.459993839263916} +02/24/2022 19:40:54 - INFO - codeparrot_training - Step 13997: {'lr': 0.0004268114019117197, 'samples': 7166976, 'steps': 13997, 'loss/train': 1.8924742937088013} +02/24/2022 19:40:59 - INFO - codeparrot_training - Step 13998: {'lr': 0.00042679983379736324, 'samples': 7167488, 'steps': 13998, 'loss/train': 1.7144615650177002} +02/24/2022 19:41:03 - INFO - codeparrot_training - Step 13999: {'lr': 0.0004267882649256525, 'samples': 7168000, 'steps': 13999, 'loss/train': 1.6556921005249023} +02/24/2022 19:41:03 - INFO - codeparrot_training - Evaluating and saving model checkpoint