diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -42554,3 +42554,1009 @@ Use FP16 precision: False 02/26/2022 07:56:41 - INFO - codeparrot_training - Step 41998: {'lr': 3.351001337047e-05, 'samples': 21503488, 'steps': 41998, 'loss/train': 1.3362501859664917} 02/26/2022 07:56:45 - INFO - codeparrot_training - Step 41999: {'lr': 3.350183074847549e-05, 'samples': 21504000, 'steps': 41999, 'loss/train': 1.3207478523254395} 02/26/2022 07:56:45 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/26/2022 07:57:03 - WARNING - huggingface_hub.repository - Several commits (42) will be pushed upstream. +02/26/2022 07:57:03 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +02/26/2022 07:57:36 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + 2427a89..2c9f21a floral-grass-11 -> floral-grass-11 + +02/26/2022 07:57:44 - INFO - codeparrot_training - Step 42000: {'lr': 3.3493649053890325e-05, 'samples': 21504512, 'steps': 42000, 'loss/train': 1.5494685173034668} +02/26/2022 07:57:48 - INFO - codeparrot_training - Step 42001: {'lr': 3.34854682867497e-05, 'samples': 21505024, 'steps': 42001, 'loss/train': 1.836612582206726} +02/26/2022 07:57:53 - INFO - codeparrot_training - Step 42002: {'lr': 3.347728844708855e-05, 'samples': 21505536, 'steps': 42002, 'loss/train': 1.8202868700027466} +02/26/2022 07:57:57 - INFO - codeparrot_training - Step 42003: {'lr': 3.3469109534941976e-05, 'samples': 21506048, 'steps': 42003, 'loss/train': 0.8949858546257019} +02/26/2022 07:58:02 - INFO - codeparrot_training - Step 42004: {'lr': 3.346093155034491e-05, 'samples': 21506560, 'steps': 42004, 'loss/train': 1.3837007284164429} +02/26/2022 07:58:06 - INFO - codeparrot_training - Step 42005: {'lr': 3.345275449333249e-05, 'samples': 21507072, 'steps': 42005, 'loss/train': 1.9728405475616455} +02/26/2022 07:58:11 - INFO - codeparrot_training - Step 42006: {'lr': 3.3444578363939715e-05, 'samples': 21507584, 'steps': 42006, 'loss/train': 1.2473104000091553} +02/26/2022 07:58:15 - INFO - codeparrot_training - Step 42007: {'lr': 3.343640316220162e-05, 'samples': 21508096, 'steps': 42007, 'loss/train': 1.5608479976654053} +02/26/2022 07:58:20 - INFO - codeparrot_training - Step 42008: {'lr': 3.342822888815314e-05, 'samples': 21508608, 'steps': 42008, 'loss/train': 0.278462678194046} +02/26/2022 07:58:24 - INFO - codeparrot_training - Step 42009: {'lr': 3.342005554182942e-05, 'samples': 21509120, 'steps': 42009, 'loss/train': 1.3350027799606323} +02/26/2022 07:58:30 - INFO - codeparrot_training - Step 42010: {'lr': 3.34118831232654e-05, 'samples': 21509632, 'steps': 42010, 'loss/train': 2.1366682052612305} +02/26/2022 07:58:33 - INFO - codeparrot_training - Step 42011: {'lr': 3.3403711632496114e-05, 'samples': 21510144, 'steps': 42011, 'loss/train': 1.8913192749023438} +02/26/2022 07:58:39 - INFO - codeparrot_training - Step 42012: {'lr': 3.3395541069556477e-05, 'samples': 21510656, 'steps': 42012, 'loss/train': 1.1241506338119507} +02/26/2022 07:58:42 - INFO - codeparrot_training - Step 42013: {'lr': 3.3387371434481646e-05, 'samples': 21511168, 'steps': 42013, 'loss/train': 1.008219599723816} +02/26/2022 07:58:48 - INFO - codeparrot_training - Step 42014: {'lr': 3.337920272730649e-05, 'samples': 21511680, 'steps': 42014, 'loss/train': 2.3886592388153076} +02/26/2022 07:58:51 - INFO - codeparrot_training - Step 42015: {'lr': 3.3371034948066064e-05, 'samples': 21512192, 'steps': 42015, 'loss/train': 2.2312192916870117} +02/26/2022 07:58:58 - INFO - codeparrot_training - Step 42016: {'lr': 3.336286809679531e-05, 'samples': 21512704, 'steps': 42016, 'loss/train': 1.4221895933151245} +02/26/2022 07:59:01 - INFO - codeparrot_training - Step 42017: {'lr': 3.335470217352926e-05, 'samples': 21513216, 'steps': 42017, 'loss/train': 1.9809162616729736} +02/26/2022 07:59:07 - INFO - codeparrot_training - Step 42018: {'lr': 3.3346537178302825e-05, 'samples': 21513728, 'steps': 42018, 'loss/train': 1.392099380493164} +02/26/2022 07:59:10 - INFO - codeparrot_training - Step 42019: {'lr': 3.333837311115115e-05, 'samples': 21514240, 'steps': 42019, 'loss/train': 2.8892767429351807} +02/26/2022 07:59:16 - INFO - codeparrot_training - Step 42020: {'lr': 3.3330209972108976e-05, 'samples': 21514752, 'steps': 42020, 'loss/train': 0.7633283138275146} +02/26/2022 07:59:19 - INFO - codeparrot_training - Step 42021: {'lr': 3.332204776121142e-05, 'samples': 21515264, 'steps': 42021, 'loss/train': 2.427926778793335} +02/26/2022 07:59:25 - INFO - codeparrot_training - Step 42022: {'lr': 3.331388647849337e-05, 'samples': 21515776, 'steps': 42022, 'loss/train': 0.5591949820518494} +02/26/2022 07:59:29 - INFO - codeparrot_training - Step 42023: {'lr': 3.3305726123989936e-05, 'samples': 21516288, 'steps': 42023, 'loss/train': 2.251450538635254} +02/26/2022 07:59:32 - INFO - codeparrot_training - Step 42024: {'lr': 3.329756669773584e-05, 'samples': 21516800, 'steps': 42024, 'loss/train': 2.8341686725616455} +02/26/2022 07:59:38 - INFO - codeparrot_training - Step 42025: {'lr': 3.3289408199766224e-05, 'samples': 21517312, 'steps': 42025, 'loss/train': 1.8091281652450562} +02/26/2022 07:59:41 - INFO - codeparrot_training - Step 42026: {'lr': 3.3281250630115886e-05, 'samples': 21517824, 'steps': 42026, 'loss/train': 2.607653856277466} +02/26/2022 07:59:48 - INFO - codeparrot_training - Step 42027: {'lr': 3.327309398882e-05, 'samples': 21518336, 'steps': 42027, 'loss/train': 1.6428308486938477} +02/26/2022 07:59:51 - INFO - codeparrot_training - Step 42028: {'lr': 3.326493827591323e-05, 'samples': 21518848, 'steps': 42028, 'loss/train': 0.2699722647666931} +02/26/2022 07:59:57 - INFO - codeparrot_training - Step 42029: {'lr': 3.325678349143071e-05, 'samples': 21519360, 'steps': 42029, 'loss/train': 1.4577146768569946} +02/26/2022 08:00:00 - INFO - codeparrot_training - Step 42030: {'lr': 3.324862963540726e-05, 'samples': 21519872, 'steps': 42030, 'loss/train': 1.332014560699463} +02/26/2022 08:00:06 - INFO - codeparrot_training - Step 42031: {'lr': 3.3240476707877884e-05, 'samples': 21520384, 'steps': 42031, 'loss/train': 1.5099050998687744} +02/26/2022 08:00:09 - INFO - codeparrot_training - Step 42032: {'lr': 3.323232470887749e-05, 'samples': 21520896, 'steps': 42032, 'loss/train': 1.7806147336959839} +02/26/2022 08:00:15 - INFO - codeparrot_training - Step 42033: {'lr': 3.322417363844099e-05, 'samples': 21521408, 'steps': 42033, 'loss/train': 1.884886384010315} +02/26/2022 08:00:18 - INFO - codeparrot_training - Step 42034: {'lr': 3.3216023496603245e-05, 'samples': 21521920, 'steps': 42034, 'loss/train': 0.9722924828529358} +02/26/2022 08:00:24 - INFO - codeparrot_training - Step 42035: {'lr': 3.3207874283399247e-05, 'samples': 21522432, 'steps': 42035, 'loss/train': 2.0101940631866455} +02/26/2022 08:00:27 - INFO - codeparrot_training - Step 42036: {'lr': 3.3199725998863896e-05, 'samples': 21522944, 'steps': 42036, 'loss/train': 1.547472596168518} +02/26/2022 08:00:34 - INFO - codeparrot_training - Step 42037: {'lr': 3.319157864303207e-05, 'samples': 21523456, 'steps': 42037, 'loss/train': 2.094116449356079} +02/26/2022 08:00:38 - INFO - codeparrot_training - Step 42038: {'lr': 3.318343221593864e-05, 'samples': 21523968, 'steps': 42038, 'loss/train': 2.825582981109619} +02/26/2022 08:00:43 - INFO - codeparrot_training - Step 42039: {'lr': 3.3175286717618574e-05, 'samples': 21524480, 'steps': 42039, 'loss/train': 2.123781442642212} +02/26/2022 08:00:47 - INFO - codeparrot_training - Step 42040: {'lr': 3.316714214810676e-05, 'samples': 21524992, 'steps': 42040, 'loss/train': 2.219564437866211} +02/26/2022 08:00:52 - INFO - codeparrot_training - Step 42041: {'lr': 3.315899850743803e-05, 'samples': 21525504, 'steps': 42041, 'loss/train': 1.5891379117965698} +02/26/2022 08:00:56 - INFO - codeparrot_training - Step 42042: {'lr': 3.315085579564725e-05, 'samples': 21526016, 'steps': 42042, 'loss/train': 1.7330372333526611} +02/26/2022 08:01:01 - INFO - codeparrot_training - Step 42043: {'lr': 3.314271401276939e-05, 'samples': 21526528, 'steps': 42043, 'loss/train': 2.413616180419922} +02/26/2022 08:01:05 - INFO - codeparrot_training - Step 42044: {'lr': 3.3134573158839276e-05, 'samples': 21527040, 'steps': 42044, 'loss/train': 2.170334577560425} +02/26/2022 08:01:10 - INFO - codeparrot_training - Step 42045: {'lr': 3.312643323389181e-05, 'samples': 21527552, 'steps': 42045, 'loss/train': 1.7984298467636108} +02/26/2022 08:01:13 - INFO - codeparrot_training - Step 42046: {'lr': 3.311829423796184e-05, 'samples': 21528064, 'steps': 42046, 'loss/train': 1.1373330354690552} +02/26/2022 08:01:19 - INFO - codeparrot_training - Step 42047: {'lr': 3.311015617108415e-05, 'samples': 21528576, 'steps': 42047, 'loss/train': 2.523714780807495} +02/26/2022 08:01:23 - INFO - codeparrot_training - Step 42048: {'lr': 3.310201903329377e-05, 'samples': 21529088, 'steps': 42048, 'loss/train': 1.9073712825775146} +02/26/2022 08:01:30 - INFO - codeparrot_training - Step 42049: {'lr': 3.3093882824625445e-05, 'samples': 21529600, 'steps': 42049, 'loss/train': 1.5528634786605835} +02/26/2022 08:01:33 - INFO - codeparrot_training - Step 42050: {'lr': 3.308574754511404e-05, 'samples': 21530112, 'steps': 42050, 'loss/train': 2.2407617568969727} +02/26/2022 08:01:39 - INFO - codeparrot_training - Step 42051: {'lr': 3.307761319479438e-05, 'samples': 21530624, 'steps': 42051, 'loss/train': 2.026862382888794} +02/26/2022 08:01:42 - INFO - codeparrot_training - Step 42052: {'lr': 3.30694797737014e-05, 'samples': 21531136, 'steps': 42052, 'loss/train': 0.8512178659439087} +02/26/2022 08:01:48 - INFO - codeparrot_training - Step 42053: {'lr': 3.306134728186983e-05, 'samples': 21531648, 'steps': 42053, 'loss/train': 0.7336413264274597} +02/26/2022 08:01:51 - INFO - codeparrot_training - Step 42054: {'lr': 3.3053215719334645e-05, 'samples': 21532160, 'steps': 42054, 'loss/train': 2.0124905109405518} +02/26/2022 08:01:57 - INFO - codeparrot_training - Step 42055: {'lr': 3.3045085086130524e-05, 'samples': 21532672, 'steps': 42055, 'loss/train': 1.8189895153045654} +02/26/2022 08:02:00 - INFO - codeparrot_training - Step 42056: {'lr': 3.30369553822924e-05, 'samples': 21533184, 'steps': 42056, 'loss/train': 1.744791865348816} +02/26/2022 08:02:06 - INFO - codeparrot_training - Step 42057: {'lr': 3.3028826607855e-05, 'samples': 21533696, 'steps': 42057, 'loss/train': 1.1750150918960571} +02/26/2022 08:02:09 - INFO - codeparrot_training - Step 42058: {'lr': 3.302069876285335e-05, 'samples': 21534208, 'steps': 42058, 'loss/train': 1.453096866607666} +02/26/2022 08:02:15 - INFO - codeparrot_training - Step 42059: {'lr': 3.3012571847322006e-05, 'samples': 21534720, 'steps': 42059, 'loss/train': 2.6410202980041504} +02/26/2022 08:02:18 - INFO - codeparrot_training - Step 42060: {'lr': 3.300444586129595e-05, 'samples': 21535232, 'steps': 42060, 'loss/train': 1.747113585472107} +02/26/2022 08:02:24 - INFO - codeparrot_training - Step 42061: {'lr': 3.299632080480988e-05, 'samples': 21535744, 'steps': 42061, 'loss/train': 1.5606273412704468} +02/26/2022 08:02:27 - INFO - codeparrot_training - Step 42062: {'lr': 3.298819667789882e-05, 'samples': 21536256, 'steps': 42062, 'loss/train': 1.8926509618759155} +02/26/2022 08:02:34 - INFO - codeparrot_training - Step 42063: {'lr': 3.298007348059726e-05, 'samples': 21536768, 'steps': 42063, 'loss/train': 2.08646559715271} +02/26/2022 08:02:37 - INFO - codeparrot_training - Step 42064: {'lr': 3.297195121294022e-05, 'samples': 21537280, 'steps': 42064, 'loss/train': 2.0213544368743896} +02/26/2022 08:02:43 - INFO - codeparrot_training - Step 42065: {'lr': 3.296382987496238e-05, 'samples': 21537792, 'steps': 42065, 'loss/train': 1.816352367401123} +02/26/2022 08:02:46 - INFO - codeparrot_training - Step 42066: {'lr': 3.2955709466698625e-05, 'samples': 21538304, 'steps': 42066, 'loss/train': 0.8643584847450256} +02/26/2022 08:02:52 - INFO - codeparrot_training - Step 42067: {'lr': 3.29475899881837e-05, 'samples': 21538816, 'steps': 42067, 'loss/train': 0.5341842174530029} +02/26/2022 08:02:55 - INFO - codeparrot_training - Step 42068: {'lr': 3.293947143945236e-05, 'samples': 21539328, 'steps': 42068, 'loss/train': 1.8572545051574707} +02/26/2022 08:03:01 - INFO - codeparrot_training - Step 42069: {'lr': 3.293135382053938e-05, 'samples': 21539840, 'steps': 42069, 'loss/train': 2.3421874046325684} +02/26/2022 08:03:06 - INFO - codeparrot_training - Step 42070: {'lr': 3.292323713147957e-05, 'samples': 21540352, 'steps': 42070, 'loss/train': 1.0097898244857788} +02/26/2022 08:03:10 - INFO - codeparrot_training - Step 42071: {'lr': 3.2915121372307704e-05, 'samples': 21540864, 'steps': 42071, 'loss/train': 2.0727267265319824} +02/26/2022 08:03:16 - INFO - codeparrot_training - Step 42072: {'lr': 3.290700654305853e-05, 'samples': 21541376, 'steps': 42072, 'loss/train': 0.837982714176178} +02/26/2022 08:03:20 - INFO - codeparrot_training - Step 42073: {'lr': 3.289889264376672e-05, 'samples': 21541888, 'steps': 42073, 'loss/train': 1.8117860555648804} +02/26/2022 08:03:25 - INFO - codeparrot_training - Step 42074: {'lr': 3.289077967446721e-05, 'samples': 21542400, 'steps': 42074, 'loss/train': 4.799282073974609} +02/26/2022 08:03:29 - INFO - codeparrot_training - Step 42075: {'lr': 3.288266763519462e-05, 'samples': 21542912, 'steps': 42075, 'loss/train': 2.1497035026550293} +02/26/2022 08:03:34 - INFO - codeparrot_training - Step 42076: {'lr': 3.287455652598378e-05, 'samples': 21543424, 'steps': 42076, 'loss/train': 1.7228047847747803} +02/26/2022 08:03:38 - INFO - codeparrot_training - Step 42077: {'lr': 3.28664463468693e-05, 'samples': 21543936, 'steps': 42077, 'loss/train': 2.276219367980957} +02/26/2022 08:03:43 - INFO - codeparrot_training - Step 42078: {'lr': 3.285833709788611e-05, 'samples': 21544448, 'steps': 42078, 'loss/train': 1.7032498121261597} +02/26/2022 08:03:47 - INFO - codeparrot_training - Step 42079: {'lr': 3.285022877906882e-05, 'samples': 21544960, 'steps': 42079, 'loss/train': 1.9602081775665283} +02/26/2022 08:03:52 - INFO - codeparrot_training - Step 42080: {'lr': 3.284212139045223e-05, 'samples': 21545472, 'steps': 42080, 'loss/train': 1.1926156282424927} +02/26/2022 08:03:56 - INFO - codeparrot_training - Step 42081: {'lr': 3.2834014932070945e-05, 'samples': 21545984, 'steps': 42081, 'loss/train': 0.9904447793960571} +02/26/2022 08:04:02 - INFO - codeparrot_training - Step 42082: {'lr': 3.2825909403959884e-05, 'samples': 21546496, 'steps': 42082, 'loss/train': 4.8680739402771} +02/26/2022 08:04:05 - INFO - codeparrot_training - Step 42083: {'lr': 3.281780480615362e-05, 'samples': 21547008, 'steps': 42083, 'loss/train': 1.8000751733779907} +02/26/2022 08:04:12 - INFO - codeparrot_training - Step 42084: {'lr': 3.280970113868695e-05, 'samples': 21547520, 'steps': 42084, 'loss/train': 1.3541169166564941} +02/26/2022 08:04:15 - INFO - codeparrot_training - Step 42085: {'lr': 3.280159840159447e-05, 'samples': 21548032, 'steps': 42085, 'loss/train': 3.1170871257781982} +02/26/2022 08:04:20 - INFO - codeparrot_training - Step 42086: {'lr': 3.279349659491104e-05, 'samples': 21548544, 'steps': 42086, 'loss/train': 1.6846998929977417} +02/26/2022 08:04:24 - INFO - codeparrot_training - Step 42087: {'lr': 3.278539571867131e-05, 'samples': 21549056, 'steps': 42087, 'loss/train': 0.8489043116569519} +02/26/2022 08:04:30 - INFO - codeparrot_training - Step 42088: {'lr': 3.277729577290997e-05, 'samples': 21549568, 'steps': 42088, 'loss/train': 2.935265064239502} +02/26/2022 08:04:33 - INFO - codeparrot_training - Step 42089: {'lr': 3.2769196757661654e-05, 'samples': 21550080, 'steps': 42089, 'loss/train': 1.5126219987869263} +02/26/2022 08:04:39 - INFO - codeparrot_training - Step 42090: {'lr': 3.276109867296117e-05, 'samples': 21550592, 'steps': 42090, 'loss/train': 2.5620248317718506} +02/26/2022 08:04:42 - INFO - codeparrot_training - Step 42091: {'lr': 3.2753001518843194e-05, 'samples': 21551104, 'steps': 42091, 'loss/train': 1.7641351222991943} +02/26/2022 08:04:48 - INFO - codeparrot_training - Step 42092: {'lr': 3.2744905295342295e-05, 'samples': 21551616, 'steps': 42092, 'loss/train': 2.3234059810638428} +02/26/2022 08:04:51 - INFO - codeparrot_training - Step 42093: {'lr': 3.273681000249329e-05, 'samples': 21552128, 'steps': 42093, 'loss/train': 1.7815600633621216} +02/26/2022 08:04:58 - INFO - codeparrot_training - Step 42094: {'lr': 3.2728715640330784e-05, 'samples': 21552640, 'steps': 42094, 'loss/train': 1.3718794584274292} +02/26/2022 08:05:01 - INFO - codeparrot_training - Step 42095: {'lr': 3.272062220888949e-05, 'samples': 21553152, 'steps': 42095, 'loss/train': 1.898031234741211} +02/26/2022 08:05:07 - INFO - codeparrot_training - Step 42096: {'lr': 3.271252970820401e-05, 'samples': 21553664, 'steps': 42096, 'loss/train': 1.3841553926467896} +02/26/2022 08:05:10 - INFO - codeparrot_training - Step 42097: {'lr': 3.2704438138309146e-05, 'samples': 21554176, 'steps': 42097, 'loss/train': 1.3685778379440308} +02/26/2022 08:05:16 - INFO - codeparrot_training - Step 42098: {'lr': 3.2696347499239385e-05, 'samples': 21554688, 'steps': 42098, 'loss/train': 2.179177761077881} +02/26/2022 08:05:20 - INFO - codeparrot_training - Step 42099: {'lr': 3.268825779102949e-05, 'samples': 21555200, 'steps': 42099, 'loss/train': 2.3120627403259277} +02/26/2022 08:05:23 - INFO - codeparrot_training - Step 42100: {'lr': 3.268016901371407e-05, 'samples': 21555712, 'steps': 42100, 'loss/train': 1.750062346458435} +02/26/2022 08:05:29 - INFO - codeparrot_training - Step 42101: {'lr': 3.267208116732784e-05, 'samples': 21556224, 'steps': 42101, 'loss/train': 2.200613021850586} +02/26/2022 08:05:32 - INFO - codeparrot_training - Step 42102: {'lr': 3.266399425190542e-05, 'samples': 21556736, 'steps': 42102, 'loss/train': 0.6815493106842041} +02/26/2022 08:05:38 - INFO - codeparrot_training - Step 42103: {'lr': 3.265590826748141e-05, 'samples': 21557248, 'steps': 42103, 'loss/train': 2.0527563095092773} +02/26/2022 08:05:41 - INFO - codeparrot_training - Step 42104: {'lr': 3.2647823214090436e-05, 'samples': 21557760, 'steps': 42104, 'loss/train': 2.652242660522461} +02/26/2022 08:05:47 - INFO - codeparrot_training - Step 42105: {'lr': 3.2639739091767236e-05, 'samples': 21558272, 'steps': 42105, 'loss/train': 2.2487101554870605} +02/26/2022 08:05:50 - INFO - codeparrot_training - Step 42106: {'lr': 3.263165590054634e-05, 'samples': 21558784, 'steps': 42106, 'loss/train': 1.612600564956665} +02/26/2022 08:05:56 - INFO - codeparrot_training - Step 42107: {'lr': 3.262357364046245e-05, 'samples': 21559296, 'steps': 42107, 'loss/train': 1.869500756263733} +02/26/2022 08:05:59 - INFO - codeparrot_training - Step 42108: {'lr': 3.261549231155006e-05, 'samples': 21559808, 'steps': 42108, 'loss/train': 0.8940420150756836} +02/26/2022 08:06:06 - INFO - codeparrot_training - Step 42109: {'lr': 3.260741191384395e-05, 'samples': 21560320, 'steps': 42109, 'loss/train': 1.62667977809906} +02/26/2022 08:06:09 - INFO - codeparrot_training - Step 42110: {'lr': 3.2599332447378644e-05, 'samples': 21560832, 'steps': 42110, 'loss/train': 1.635771632194519} +02/26/2022 08:06:15 - INFO - codeparrot_training - Step 42111: {'lr': 3.259125391218875e-05, 'samples': 21561344, 'steps': 42111, 'loss/train': 1.9833099842071533} +02/26/2022 08:06:18 - INFO - codeparrot_training - Step 42112: {'lr': 3.258317630830887e-05, 'samples': 21561856, 'steps': 42112, 'loss/train': 0.9248422384262085} +02/26/2022 08:06:24 - INFO - codeparrot_training - Step 42113: {'lr': 3.257509963577365e-05, 'samples': 21562368, 'steps': 42113, 'loss/train': 1.0906859636306763} +02/26/2022 08:06:27 - INFO - codeparrot_training - Step 42114: {'lr': 3.256702389461763e-05, 'samples': 21562880, 'steps': 42114, 'loss/train': 2.0703229904174805} +02/26/2022 08:06:33 - INFO - codeparrot_training - Step 42115: {'lr': 3.2558949084875525e-05, 'samples': 21563392, 'steps': 42115, 'loss/train': 2.565927267074585} +02/26/2022 08:06:36 - INFO - codeparrot_training - Step 42116: {'lr': 3.255087520658173e-05, 'samples': 21563904, 'steps': 42116, 'loss/train': 2.0509915351867676} +02/26/2022 08:06:42 - INFO - codeparrot_training - Step 42117: {'lr': 3.254280225977102e-05, 'samples': 21564416, 'steps': 42117, 'loss/train': 1.783235788345337} +02/26/2022 08:06:45 - INFO - codeparrot_training - Step 42118: {'lr': 3.253473024447781e-05, 'samples': 21564928, 'steps': 42118, 'loss/train': 0.44805601239204407} +02/26/2022 08:06:51 - INFO - codeparrot_training - Step 42119: {'lr': 3.2526659160736885e-05, 'samples': 21565440, 'steps': 42119, 'loss/train': 1.477900505065918} +02/26/2022 08:06:54 - INFO - codeparrot_training - Step 42120: {'lr': 3.251858900858259e-05, 'samples': 21565952, 'steps': 42120, 'loss/train': 1.5423990488052368} +02/26/2022 08:07:01 - INFO - codeparrot_training - Step 42121: {'lr': 3.2510519788049644e-05, 'samples': 21566464, 'steps': 42121, 'loss/train': 1.6041160821914673} +02/26/2022 08:07:04 - INFO - codeparrot_training - Step 42122: {'lr': 3.250245149917252e-05, 'samples': 21566976, 'steps': 42122, 'loss/train': 0.27017030119895935} +02/26/2022 08:07:10 - INFO - codeparrot_training - Step 42123: {'lr': 3.249438414198594e-05, 'samples': 21567488, 'steps': 42123, 'loss/train': 0.8072147965431213} +02/26/2022 08:07:13 - INFO - codeparrot_training - Step 42124: {'lr': 3.2486317716524246e-05, 'samples': 21568000, 'steps': 42124, 'loss/train': 2.299391031265259} +02/26/2022 08:07:19 - INFO - codeparrot_training - Step 42125: {'lr': 3.247825222282216e-05, 'samples': 21568512, 'steps': 42125, 'loss/train': 0.8135859370231628} +02/26/2022 08:07:22 - INFO - codeparrot_training - Step 42126: {'lr': 3.247018766091411e-05, 'samples': 21569024, 'steps': 42126, 'loss/train': 2.7928144931793213} +02/26/2022 08:07:28 - INFO - codeparrot_training - Step 42127: {'lr': 3.246212403083476e-05, 'samples': 21569536, 'steps': 42127, 'loss/train': 2.626312255859375} +02/26/2022 08:07:31 - INFO - codeparrot_training - Step 42128: {'lr': 3.245406133261858e-05, 'samples': 21570048, 'steps': 42128, 'loss/train': 1.3988430500030518} +02/26/2022 08:07:37 - INFO - codeparrot_training - Step 42129: {'lr': 3.244599956630015e-05, 'samples': 21570560, 'steps': 42129, 'loss/train': 1.2855547666549683} +02/26/2022 08:07:40 - INFO - codeparrot_training - Step 42130: {'lr': 3.243793873191395e-05, 'samples': 21571072, 'steps': 42130, 'loss/train': 1.954200029373169} +02/26/2022 08:07:47 - INFO - codeparrot_training - Step 42131: {'lr': 3.24298788294945e-05, 'samples': 21571584, 'steps': 42131, 'loss/train': 2.026719570159912} +02/26/2022 08:07:53 - INFO - codeparrot_training - Step 42132: {'lr': 3.2421819859076415e-05, 'samples': 21572096, 'steps': 42132, 'loss/train': 2.2114007472991943} +02/26/2022 08:07:56 - INFO - codeparrot_training - Step 42133: {'lr': 3.241376182069414e-05, 'samples': 21572608, 'steps': 42133, 'loss/train': 1.7515803575515747} +02/26/2022 08:08:02 - INFO - codeparrot_training - Step 42134: {'lr': 3.240570471438225e-05, 'samples': 21573120, 'steps': 42134, 'loss/train': 0.6081182360649109} +02/26/2022 08:08:05 - INFO - codeparrot_training - Step 42135: {'lr': 3.239764854017516e-05, 'samples': 21573632, 'steps': 42135, 'loss/train': 2.7466824054718018} +02/26/2022 08:08:11 - INFO - codeparrot_training - Step 42136: {'lr': 3.2389593298107506e-05, 'samples': 21574144, 'steps': 42136, 'loss/train': 0.5307530760765076} +02/26/2022 08:08:14 - INFO - codeparrot_training - Step 42137: {'lr': 3.238153898821372e-05, 'samples': 21574656, 'steps': 42137, 'loss/train': 2.0336854457855225} +02/26/2022 08:08:20 - INFO - codeparrot_training - Step 42138: {'lr': 3.237348561052833e-05, 'samples': 21575168, 'steps': 42138, 'loss/train': 1.5019077062606812} +02/26/2022 08:08:23 - INFO - codeparrot_training - Step 42139: {'lr': 3.2365433165085777e-05, 'samples': 21575680, 'steps': 42139, 'loss/train': 2.0933120250701904} +02/26/2022 08:08:30 - INFO - codeparrot_training - Step 42140: {'lr': 3.235738165192065e-05, 'samples': 21576192, 'steps': 42140, 'loss/train': 1.8026868104934692} +02/26/2022 08:08:33 - INFO - codeparrot_training - Step 42141: {'lr': 3.234933107106738e-05, 'samples': 21576704, 'steps': 42141, 'loss/train': 1.2884650230407715} +02/26/2022 08:08:39 - INFO - codeparrot_training - Step 42142: {'lr': 3.234128142256049e-05, 'samples': 21577216, 'steps': 42142, 'loss/train': 2.894014358520508} +02/26/2022 08:08:42 - INFO - codeparrot_training - Step 42143: {'lr': 3.2333232706434346e-05, 'samples': 21577728, 'steps': 42143, 'loss/train': 1.3358802795410156} +02/26/2022 08:08:48 - INFO - codeparrot_training - Step 42144: {'lr': 3.23251849227236e-05, 'samples': 21578240, 'steps': 42144, 'loss/train': 2.144378662109375} +02/26/2022 08:08:51 - INFO - codeparrot_training - Step 42145: {'lr': 3.231713807146264e-05, 'samples': 21578752, 'steps': 42145, 'loss/train': 2.0812461376190186} +02/26/2022 08:08:57 - INFO - codeparrot_training - Step 42146: {'lr': 3.2309092152685905e-05, 'samples': 21579264, 'steps': 42146, 'loss/train': 2.204704999923706} +02/26/2022 08:09:00 - INFO - codeparrot_training - Step 42147: {'lr': 3.230104716642787e-05, 'samples': 21579776, 'steps': 42147, 'loss/train': 1.767375111579895} +02/26/2022 08:09:06 - INFO - codeparrot_training - Step 42148: {'lr': 3.229300311272307e-05, 'samples': 21580288, 'steps': 42148, 'loss/train': 0.8443734645843506} +02/26/2022 08:09:09 - INFO - codeparrot_training - Step 42149: {'lr': 3.2284959991605914e-05, 'samples': 21580800, 'steps': 42149, 'loss/train': 0.5046215057373047} +02/26/2022 08:09:15 - INFO - codeparrot_training - Step 42150: {'lr': 3.227691780311087e-05, 'samples': 21581312, 'steps': 42150, 'loss/train': 0.4444860517978668} +02/26/2022 08:09:18 - INFO - codeparrot_training - Step 42151: {'lr': 3.226887654727231e-05, 'samples': 21581824, 'steps': 42151, 'loss/train': 1.469581127166748} +02/26/2022 08:09:24 - INFO - codeparrot_training - Step 42152: {'lr': 3.226083622412479e-05, 'samples': 21582336, 'steps': 42152, 'loss/train': 1.8099740743637085} +02/26/2022 08:09:27 - INFO - codeparrot_training - Step 42153: {'lr': 3.225279683370266e-05, 'samples': 21582848, 'steps': 42153, 'loss/train': 0.1667536199092865} +02/26/2022 08:09:33 - INFO - codeparrot_training - Step 42154: {'lr': 3.224475837604052e-05, 'samples': 21583360, 'steps': 42154, 'loss/train': 0.7521966099739075} +02/26/2022 08:09:36 - INFO - codeparrot_training - Step 42155: {'lr': 3.223672085117257e-05, 'samples': 21583872, 'steps': 42155, 'loss/train': 1.3273061513900757} +02/26/2022 08:09:43 - INFO - codeparrot_training - Step 42156: {'lr': 3.222868425913345e-05, 'samples': 21584384, 'steps': 42156, 'loss/train': 1.8184926509857178} +02/26/2022 08:09:46 - INFO - codeparrot_training - Step 42157: {'lr': 3.2220648599957407e-05, 'samples': 21584896, 'steps': 42157, 'loss/train': 1.391922116279602} +02/26/2022 08:09:52 - INFO - codeparrot_training - Step 42158: {'lr': 3.221261387367908e-05, 'samples': 21585408, 'steps': 42158, 'loss/train': 1.7123470306396484} +02/26/2022 08:09:55 - INFO - codeparrot_training - Step 42159: {'lr': 3.220458008033261e-05, 'samples': 21585920, 'steps': 42159, 'loss/train': 1.8236193656921387} +02/26/2022 08:10:01 - INFO - codeparrot_training - Step 42160: {'lr': 3.219654721995266e-05, 'samples': 21586432, 'steps': 42160, 'loss/train': 1.917496681213379} +02/26/2022 08:10:04 - INFO - codeparrot_training - Step 42161: {'lr': 3.218851529257347e-05, 'samples': 21586944, 'steps': 42161, 'loss/train': 1.730018973350525} +02/26/2022 08:10:10 - INFO - codeparrot_training - Step 42162: {'lr': 3.218048429822959e-05, 'samples': 21587456, 'steps': 42162, 'loss/train': 0.8438036441802979} +02/26/2022 08:10:13 - INFO - codeparrot_training - Step 42163: {'lr': 3.217245423695534e-05, 'samples': 21587968, 'steps': 42163, 'loss/train': 1.4133445024490356} +02/26/2022 08:10:19 - INFO - codeparrot_training - Step 42164: {'lr': 3.2164425108785114e-05, 'samples': 21588480, 'steps': 42164, 'loss/train': 2.0032968521118164} +02/26/2022 08:10:22 - INFO - codeparrot_training - Step 42165: {'lr': 3.215639691375327e-05, 'samples': 21588992, 'steps': 42165, 'loss/train': 1.2030644416809082} +02/26/2022 08:10:29 - INFO - codeparrot_training - Step 42166: {'lr': 3.21483696518943e-05, 'samples': 21589504, 'steps': 42166, 'loss/train': 1.6856492757797241} +02/26/2022 08:10:32 - INFO - codeparrot_training - Step 42167: {'lr': 3.214034332324256e-05, 'samples': 21590016, 'steps': 42167, 'loss/train': 0.993902862071991} +02/26/2022 08:10:38 - INFO - codeparrot_training - Step 42168: {'lr': 3.2132317927832386e-05, 'samples': 21590528, 'steps': 42168, 'loss/train': 1.6544512510299683} +02/26/2022 08:10:41 - INFO - codeparrot_training - Step 42169: {'lr': 3.2124293465698146e-05, 'samples': 21591040, 'steps': 42169, 'loss/train': 1.4478480815887451} +02/26/2022 08:10:47 - INFO - codeparrot_training - Step 42170: {'lr': 3.21162699368743e-05, 'samples': 21591552, 'steps': 42170, 'loss/train': 0.8067625761032104} +02/26/2022 08:10:50 - INFO - codeparrot_training - Step 42171: {'lr': 3.2108247341395166e-05, 'samples': 21592064, 'steps': 42171, 'loss/train': 1.3318606615066528} +02/26/2022 08:10:56 - INFO - codeparrot_training - Step 42172: {'lr': 3.2100225679295104e-05, 'samples': 21592576, 'steps': 42172, 'loss/train': 0.8684771656990051} +02/26/2022 08:10:59 - INFO - codeparrot_training - Step 42173: {'lr': 3.2092204950608436e-05, 'samples': 21593088, 'steps': 42173, 'loss/train': 1.346718192100525} +02/26/2022 08:11:05 - INFO - codeparrot_training - Step 42174: {'lr': 3.208418515536962e-05, 'samples': 21593600, 'steps': 42174, 'loss/train': 2.430318832397461} +02/26/2022 08:11:08 - INFO - codeparrot_training - Step 42175: {'lr': 3.2076166293612956e-05, 'samples': 21594112, 'steps': 42175, 'loss/train': 0.9546864628791809} +02/26/2022 08:11:15 - INFO - codeparrot_training - Step 42176: {'lr': 3.206814836537281e-05, 'samples': 21594624, 'steps': 42176, 'loss/train': 1.54837965965271} +02/26/2022 08:11:19 - INFO - codeparrot_training - Step 42177: {'lr': 3.2060131370683505e-05, 'samples': 21595136, 'steps': 42177, 'loss/train': 1.2214683294296265} +02/26/2022 08:11:24 - INFO - codeparrot_training - Step 42178: {'lr': 3.205211530957935e-05, 'samples': 21595648, 'steps': 42178, 'loss/train': 1.5218291282653809} +02/26/2022 08:11:28 - INFO - codeparrot_training - Step 42179: {'lr': 3.204410018209478e-05, 'samples': 21596160, 'steps': 42179, 'loss/train': 0.08363626897335052} +02/26/2022 08:11:33 - INFO - codeparrot_training - Step 42180: {'lr': 3.2036085988264045e-05, 'samples': 21596672, 'steps': 42180, 'loss/train': 1.9375556707382202} +02/26/2022 08:11:37 - INFO - codeparrot_training - Step 42181: {'lr': 3.2028072728121525e-05, 'samples': 21597184, 'steps': 42181, 'loss/train': 1.794095754623413} +02/26/2022 08:11:43 - INFO - codeparrot_training - Step 42182: {'lr': 3.20200604017015e-05, 'samples': 21597696, 'steps': 42182, 'loss/train': 1.295919418334961} +02/26/2022 08:11:46 - INFO - codeparrot_training - Step 42183: {'lr': 3.201204900903834e-05, 'samples': 21598208, 'steps': 42183, 'loss/train': 2.2694764137268066} +02/26/2022 08:11:50 - INFO - codeparrot_training - Step 42184: {'lr': 3.200403855016637e-05, 'samples': 21598720, 'steps': 42184, 'loss/train': 1.5445663928985596} +02/26/2022 08:11:55 - INFO - codeparrot_training - Step 42185: {'lr': 3.199602902511986e-05, 'samples': 21599232, 'steps': 42185, 'loss/train': 1.3078842163085938} +02/26/2022 08:11:59 - INFO - codeparrot_training - Step 42186: {'lr': 3.198802043393309e-05, 'samples': 21599744, 'steps': 42186, 'loss/train': 1.4613194465637207} +02/26/2022 08:12:05 - INFO - codeparrot_training - Step 42187: {'lr': 3.198001277664045e-05, 'samples': 21600256, 'steps': 42187, 'loss/train': 1.7340518236160278} +02/26/2022 08:12:09 - INFO - codeparrot_training - Step 42188: {'lr': 3.197200605327616e-05, 'samples': 21600768, 'steps': 42188, 'loss/train': 1.4510672092437744} +02/26/2022 08:12:14 - INFO - codeparrot_training - Step 42189: {'lr': 3.196400026387469e-05, 'samples': 21601280, 'steps': 42189, 'loss/train': 1.1401158571243286} +02/26/2022 08:12:18 - INFO - codeparrot_training - Step 42190: {'lr': 3.195599540847008e-05, 'samples': 21601792, 'steps': 42190, 'loss/train': 2.254307508468628} +02/26/2022 08:12:24 - INFO - codeparrot_training - Step 42191: {'lr': 3.1947991487096815e-05, 'samples': 21602304, 'steps': 42191, 'loss/train': 1.657935380935669} +02/26/2022 08:12:27 - INFO - codeparrot_training - Step 42192: {'lr': 3.1939988499789076e-05, 'samples': 21602816, 'steps': 42192, 'loss/train': 2.2772719860076904} +02/26/2022 08:12:33 - INFO - codeparrot_training - Step 42193: {'lr': 3.193198644658127e-05, 'samples': 21603328, 'steps': 42193, 'loss/train': 2.2732653617858887} +02/26/2022 08:12:36 - INFO - codeparrot_training - Step 42194: {'lr': 3.1923985327507485e-05, 'samples': 21603840, 'steps': 42194, 'loss/train': 2.5038034915924072} +02/26/2022 08:12:42 - INFO - codeparrot_training - Step 42195: {'lr': 3.191598514260213e-05, 'samples': 21604352, 'steps': 42195, 'loss/train': 2.12198805809021} +02/26/2022 08:12:45 - INFO - codeparrot_training - Step 42196: {'lr': 3.190798589189944e-05, 'samples': 21604864, 'steps': 42196, 'loss/train': 1.015647292137146} +02/26/2022 08:12:51 - INFO - codeparrot_training - Step 42197: {'lr': 3.189998757543378e-05, 'samples': 21605376, 'steps': 42197, 'loss/train': 1.8399434089660645} +02/26/2022 08:12:54 - INFO - codeparrot_training - Step 42198: {'lr': 3.189199019323918e-05, 'samples': 21605888, 'steps': 42198, 'loss/train': 1.6753349304199219} +02/26/2022 08:13:00 - INFO - codeparrot_training - Step 42199: {'lr': 3.1883993745350114e-05, 'samples': 21606400, 'steps': 42199, 'loss/train': 1.3343709707260132} +02/26/2022 08:13:03 - INFO - codeparrot_training - Step 42200: {'lr': 3.187599823180071e-05, 'samples': 21606912, 'steps': 42200, 'loss/train': 1.2284804582595825} +02/26/2022 08:13:08 - INFO - codeparrot_training - Step 42201: {'lr': 3.186800365262532e-05, 'samples': 21607424, 'steps': 42201, 'loss/train': 1.6069765090942383} +02/26/2022 08:13:12 - INFO - codeparrot_training - Step 42202: {'lr': 3.1860010007858125e-05, 'samples': 21607936, 'steps': 42202, 'loss/train': 1.5057530403137207} +02/26/2022 08:13:19 - INFO - codeparrot_training - Step 42203: {'lr': 3.18520172975334e-05, 'samples': 21608448, 'steps': 42203, 'loss/train': 2.3396847248077393} +02/26/2022 08:13:22 - INFO - codeparrot_training - Step 42204: {'lr': 3.184402552168528e-05, 'samples': 21608960, 'steps': 42204, 'loss/train': 1.5844547748565674} +02/26/2022 08:13:28 - INFO - codeparrot_training - Step 42205: {'lr': 3.183603468034815e-05, 'samples': 21609472, 'steps': 42205, 'loss/train': 0.2942953109741211} +02/26/2022 08:13:31 - INFO - codeparrot_training - Step 42206: {'lr': 3.1828044773556195e-05, 'samples': 21609984, 'steps': 42206, 'loss/train': 1.871066927909851} +02/26/2022 08:13:37 - INFO - codeparrot_training - Step 42207: {'lr': 3.182005580134359e-05, 'samples': 21610496, 'steps': 42207, 'loss/train': 1.0271105766296387} +02/26/2022 08:13:40 - INFO - codeparrot_training - Step 42208: {'lr': 3.181206776374454e-05, 'samples': 21611008, 'steps': 42208, 'loss/train': 1.138145923614502} +02/26/2022 08:13:46 - INFO - codeparrot_training - Step 42209: {'lr': 3.180408066079335e-05, 'samples': 21611520, 'steps': 42209, 'loss/train': 2.3307571411132812} +02/26/2022 08:13:49 - INFO - codeparrot_training - Step 42210: {'lr': 3.1796094492524216e-05, 'samples': 21612032, 'steps': 42210, 'loss/train': 1.8100149631500244} +02/26/2022 08:13:55 - INFO - codeparrot_training - Step 42211: {'lr': 3.1788109258971324e-05, 'samples': 21612544, 'steps': 42211, 'loss/train': 0.5624557137489319} +02/26/2022 08:13:58 - INFO - codeparrot_training - Step 42212: {'lr': 3.1780124960168824e-05, 'samples': 21613056, 'steps': 42212, 'loss/train': 1.4848436117172241} +02/26/2022 08:14:05 - INFO - codeparrot_training - Step 42213: {'lr': 3.1772141596151023e-05, 'samples': 21613568, 'steps': 42213, 'loss/train': 2.242154598236084} +02/26/2022 08:14:08 - INFO - codeparrot_training - Step 42214: {'lr': 3.1764159166952075e-05, 'samples': 21614080, 'steps': 42214, 'loss/train': 1.3728671073913574} +02/26/2022 08:14:14 - INFO - codeparrot_training - Step 42215: {'lr': 3.175617767260616e-05, 'samples': 21614592, 'steps': 42215, 'loss/train': 1.8052622079849243} +02/26/2022 08:14:17 - INFO - codeparrot_training - Step 42216: {'lr': 3.174819711314744e-05, 'samples': 21615104, 'steps': 42216, 'loss/train': 1.9396475553512573} +02/26/2022 08:14:23 - INFO - codeparrot_training - Step 42217: {'lr': 3.1740217488610195e-05, 'samples': 21615616, 'steps': 42217, 'loss/train': 1.9824506044387817} +02/26/2022 08:14:26 - INFO - codeparrot_training - Step 42218: {'lr': 3.173223879902856e-05, 'samples': 21616128, 'steps': 42218, 'loss/train': 1.5740658044815063} +02/26/2022 08:14:32 - INFO - codeparrot_training - Step 42219: {'lr': 3.172426104443671e-05, 'samples': 21616640, 'steps': 42219, 'loss/train': 1.2283740043640137} +02/26/2022 08:14:35 - INFO - codeparrot_training - Step 42220: {'lr': 3.1716284224868744e-05, 'samples': 21617152, 'steps': 42220, 'loss/train': 1.7913917303085327} +02/26/2022 08:14:41 - INFO - codeparrot_training - Step 42221: {'lr': 3.170830834035896e-05, 'samples': 21617664, 'steps': 42221, 'loss/train': 1.6830540895462036} +02/26/2022 08:14:44 - INFO - codeparrot_training - Step 42222: {'lr': 3.170033339094147e-05, 'samples': 21618176, 'steps': 42222, 'loss/train': 1.0630455017089844} +02/26/2022 08:14:51 - INFO - codeparrot_training - Step 42223: {'lr': 3.169235937665038e-05, 'samples': 21618688, 'steps': 42223, 'loss/train': 1.574013590812683} +02/26/2022 08:14:55 - INFO - codeparrot_training - Step 42224: {'lr': 3.168438629752002e-05, 'samples': 21619200, 'steps': 42224, 'loss/train': 2.3928468227386475} +02/26/2022 08:15:00 - INFO - codeparrot_training - Step 42225: {'lr': 3.16764141535843e-05, 'samples': 21619712, 'steps': 42225, 'loss/train': 1.7158324718475342} +02/26/2022 08:15:04 - INFO - codeparrot_training - Step 42226: {'lr': 3.1668442944877556e-05, 'samples': 21620224, 'steps': 42226, 'loss/train': 1.8108742237091064} +02/26/2022 08:15:09 - INFO - codeparrot_training - Step 42227: {'lr': 3.166047267143382e-05, 'samples': 21620736, 'steps': 42227, 'loss/train': 2.149655342102051} +02/26/2022 08:15:13 - INFO - codeparrot_training - Step 42228: {'lr': 3.16525033332874e-05, 'samples': 21621248, 'steps': 42228, 'loss/train': 1.1872278451919556} +02/26/2022 08:15:18 - INFO - codeparrot_training - Step 42229: {'lr': 3.164453493047223e-05, 'samples': 21621760, 'steps': 42229, 'loss/train': 1.3453013896942139} +02/26/2022 08:15:22 - INFO - codeparrot_training - Step 42230: {'lr': 3.163656746302257e-05, 'samples': 21622272, 'steps': 42230, 'loss/train': 2.3913726806640625} +02/26/2022 08:15:27 - INFO - codeparrot_training - Step 42231: {'lr': 3.162860093097247e-05, 'samples': 21622784, 'steps': 42231, 'loss/train': 2.323552131652832} +02/26/2022 08:15:31 - INFO - codeparrot_training - Step 42232: {'lr': 3.162063533435619e-05, 'samples': 21623296, 'steps': 42232, 'loss/train': 2.199755907058716} +02/26/2022 08:15:38 - INFO - codeparrot_training - Step 42233: {'lr': 3.161267067320769e-05, 'samples': 21623808, 'steps': 42233, 'loss/train': 1.5659087896347046} +02/26/2022 08:15:41 - INFO - codeparrot_training - Step 42234: {'lr': 3.1604706947561205e-05, 'samples': 21624320, 'steps': 42234, 'loss/train': 2.040925979614258} +02/26/2022 08:15:47 - INFO - codeparrot_training - Step 42235: {'lr': 3.159674415745073e-05, 'samples': 21624832, 'steps': 42235, 'loss/train': 3.170947551727295} +02/26/2022 08:15:50 - INFO - codeparrot_training - Step 42236: {'lr': 3.158878230291054e-05, 'samples': 21625344, 'steps': 42236, 'loss/train': 0.8010342121124268} +02/26/2022 08:15:56 - INFO - codeparrot_training - Step 42237: {'lr': 3.1580821383974636e-05, 'samples': 21625856, 'steps': 42237, 'loss/train': 2.8901357650756836} +02/26/2022 08:15:59 - INFO - codeparrot_training - Step 42238: {'lr': 3.1572861400677135e-05, 'samples': 21626368, 'steps': 42238, 'loss/train': 1.9376899003982544} +02/26/2022 08:16:05 - INFO - codeparrot_training - Step 42239: {'lr': 3.156490235305209e-05, 'samples': 21626880, 'steps': 42239, 'loss/train': 1.1256794929504395} +02/26/2022 08:16:08 - INFO - codeparrot_training - Step 42240: {'lr': 3.15569442411337e-05, 'samples': 21627392, 'steps': 42240, 'loss/train': 1.6314609050750732} +02/26/2022 08:16:14 - INFO - codeparrot_training - Step 42241: {'lr': 3.1548987064955985e-05, 'samples': 21627904, 'steps': 42241, 'loss/train': 0.919796884059906} +02/26/2022 08:16:17 - INFO - codeparrot_training - Step 42242: {'lr': 3.154103082455306e-05, 'samples': 21628416, 'steps': 42242, 'loss/train': 2.8674371242523193} +02/26/2022 08:16:23 - INFO - codeparrot_training - Step 42243: {'lr': 3.153307551995893e-05, 'samples': 21628928, 'steps': 42243, 'loss/train': 2.042776107788086} +02/26/2022 08:16:28 - INFO - codeparrot_training - Step 42244: {'lr': 3.1525121151207787e-05, 'samples': 21629440, 'steps': 42244, 'loss/train': 1.126320719718933} +02/26/2022 08:16:32 - INFO - codeparrot_training - Step 42245: {'lr': 3.151716771833366e-05, 'samples': 21629952, 'steps': 42245, 'loss/train': 1.825308918952942} +02/26/2022 08:16:37 - INFO - codeparrot_training - Step 42246: {'lr': 3.15092152213706e-05, 'samples': 21630464, 'steps': 42246, 'loss/train': 1.1401276588439941} +02/26/2022 08:16:41 - INFO - codeparrot_training - Step 42247: {'lr': 3.1501263660352606e-05, 'samples': 21630976, 'steps': 42247, 'loss/train': 0.5610069036483765} +02/26/2022 08:16:48 - INFO - codeparrot_training - Step 42248: {'lr': 3.1493313035313916e-05, 'samples': 21631488, 'steps': 42248, 'loss/train': 1.1515543460845947} +02/26/2022 08:16:51 - INFO - codeparrot_training - Step 42249: {'lr': 3.14853633462884e-05, 'samples': 21632000, 'steps': 42249, 'loss/train': 1.8861671686172485} +02/26/2022 08:16:57 - INFO - codeparrot_training - Step 42250: {'lr': 3.147741459331033e-05, 'samples': 21632512, 'steps': 42250, 'loss/train': 1.9794964790344238} +02/26/2022 08:17:00 - INFO - codeparrot_training - Step 42251: {'lr': 3.146946677641352e-05, 'samples': 21633024, 'steps': 42251, 'loss/train': 1.2931498289108276} +02/26/2022 08:17:06 - INFO - codeparrot_training - Step 42252: {'lr': 3.146151989563214e-05, 'samples': 21633536, 'steps': 42252, 'loss/train': 1.604335904121399} +02/26/2022 08:17:09 - INFO - codeparrot_training - Step 42253: {'lr': 3.145357395100021e-05, 'samples': 21634048, 'steps': 42253, 'loss/train': 1.6945403814315796} +02/26/2022 08:17:15 - INFO - codeparrot_training - Step 42254: {'lr': 3.144562894255185e-05, 'samples': 21634560, 'steps': 42254, 'loss/train': 1.5309677124023438} +02/26/2022 08:17:18 - INFO - codeparrot_training - Step 42255: {'lr': 3.143768487032092e-05, 'samples': 21635072, 'steps': 42255, 'loss/train': 2.0649807453155518} +02/26/2022 08:17:24 - INFO - codeparrot_training - Step 42256: {'lr': 3.14297417343416e-05, 'samples': 21635584, 'steps': 42256, 'loss/train': 2.199768304824829} +02/26/2022 08:17:27 - INFO - codeparrot_training - Step 42257: {'lr': 3.142179953464788e-05, 'samples': 21636096, 'steps': 42257, 'loss/train': 2.436216115951538} +02/26/2022 08:17:34 - INFO - codeparrot_training - Step 42258: {'lr': 3.1413858271273746e-05, 'samples': 21636608, 'steps': 42258, 'loss/train': 1.1742217540740967} +02/26/2022 08:17:38 - INFO - codeparrot_training - Step 42259: {'lr': 3.1405917944253183e-05, 'samples': 21637120, 'steps': 42259, 'loss/train': 2.3030474185943604} +02/26/2022 08:17:41 - INFO - codeparrot_training - Step 42260: {'lr': 3.139797855362031e-05, 'samples': 21637632, 'steps': 42260, 'loss/train': 2.2223446369171143} +02/26/2022 08:17:47 - INFO - codeparrot_training - Step 42261: {'lr': 3.139004009940907e-05, 'samples': 21638144, 'steps': 42261, 'loss/train': 1.2616757154464722} +02/26/2022 08:17:50 - INFO - codeparrot_training - Step 42262: {'lr': 3.138210258165344e-05, 'samples': 21638656, 'steps': 42262, 'loss/train': 0.5822426676750183} +02/26/2022 08:17:56 - INFO - codeparrot_training - Step 42263: {'lr': 3.137416600038753e-05, 'samples': 21639168, 'steps': 42263, 'loss/train': 2.199300527572632} +02/26/2022 08:17:59 - INFO - codeparrot_training - Step 42264: {'lr': 3.136623035564526e-05, 'samples': 21639680, 'steps': 42264, 'loss/train': 1.511427879333496} +02/26/2022 08:18:05 - INFO - codeparrot_training - Step 42265: {'lr': 3.135829564746062e-05, 'samples': 21640192, 'steps': 42265, 'loss/train': 1.935234546661377} +02/26/2022 08:18:08 - INFO - codeparrot_training - Step 42266: {'lr': 3.1350361875867565e-05, 'samples': 21640704, 'steps': 42266, 'loss/train': 1.5492324829101562} +02/26/2022 08:18:14 - INFO - codeparrot_training - Step 42267: {'lr': 3.1342429040900205e-05, 'samples': 21641216, 'steps': 42267, 'loss/train': 1.8779046535491943} +02/26/2022 08:18:17 - INFO - codeparrot_training - Step 42268: {'lr': 3.133449714259243e-05, 'samples': 21641728, 'steps': 42268, 'loss/train': 3.9783482551574707} +02/26/2022 08:18:24 - INFO - codeparrot_training - Step 42269: {'lr': 3.132656618097826e-05, 'samples': 21642240, 'steps': 42269, 'loss/train': 2.906923532485962} +02/26/2022 08:18:27 - INFO - codeparrot_training - Step 42270: {'lr': 3.131863615609157e-05, 'samples': 21642752, 'steps': 42270, 'loss/train': 1.0831663608551025} +02/26/2022 08:18:33 - INFO - codeparrot_training - Step 42271: {'lr': 3.1310707067966456e-05, 'samples': 21643264, 'steps': 42271, 'loss/train': 1.9712858200073242} +02/26/2022 08:18:36 - INFO - codeparrot_training - Step 42272: {'lr': 3.1302778916636824e-05, 'samples': 21643776, 'steps': 42272, 'loss/train': 2.589186906814575} +02/26/2022 08:18:42 - INFO - codeparrot_training - Step 42273: {'lr': 3.129485170213667e-05, 'samples': 21644288, 'steps': 42273, 'loss/train': 2.5730698108673096} +02/26/2022 08:18:45 - INFO - codeparrot_training - Step 42274: {'lr': 3.128692542449985e-05, 'samples': 21644800, 'steps': 42274, 'loss/train': 2.575571298599243} +02/26/2022 08:18:51 - INFO - codeparrot_training - Step 42275: {'lr': 3.127900008376044e-05, 'samples': 21645312, 'steps': 42275, 'loss/train': 2.1145694255828857} +02/26/2022 08:18:54 - INFO - codeparrot_training - Step 42276: {'lr': 3.1271075679952355e-05, 'samples': 21645824, 'steps': 42276, 'loss/train': 0.9241102337837219} +02/26/2022 08:19:00 - INFO - codeparrot_training - Step 42277: {'lr': 3.1263152213109516e-05, 'samples': 21646336, 'steps': 42277, 'loss/train': 0.840644896030426} +02/26/2022 08:19:03 - INFO - codeparrot_training - Step 42278: {'lr': 3.1255229683265826e-05, 'samples': 21646848, 'steps': 42278, 'loss/train': 0.11633437126874924} +02/26/2022 08:19:10 - INFO - codeparrot_training - Step 42279: {'lr': 3.124730809045534e-05, 'samples': 21647360, 'steps': 42279, 'loss/train': 1.53107750415802} +02/26/2022 08:19:13 - INFO - codeparrot_training - Step 42280: {'lr': 3.1239387434711914e-05, 'samples': 21647872, 'steps': 42280, 'loss/train': 1.4984701871871948} +02/26/2022 08:19:19 - INFO - codeparrot_training - Step 42281: {'lr': 3.1231467716069494e-05, 'samples': 21648384, 'steps': 42281, 'loss/train': 0.7035585641860962} +02/26/2022 08:19:22 - INFO - codeparrot_training - Step 42282: {'lr': 3.1223548934561945e-05, 'samples': 21648896, 'steps': 42282, 'loss/train': 2.0375263690948486} +02/26/2022 08:19:28 - INFO - codeparrot_training - Step 42283: {'lr': 3.12156310902233e-05, 'samples': 21649408, 'steps': 42283, 'loss/train': 4.35195779800415} +02/26/2022 08:19:31 - INFO - codeparrot_training - Step 42284: {'lr': 3.120771418308735e-05, 'samples': 21649920, 'steps': 42284, 'loss/train': 1.8416963815689087} +02/26/2022 08:19:37 - INFO - codeparrot_training - Step 42285: {'lr': 3.119979821318819e-05, 'samples': 21650432, 'steps': 42285, 'loss/train': 1.4881956577301025} +02/26/2022 08:19:40 - INFO - codeparrot_training - Step 42286: {'lr': 3.1191883180559515e-05, 'samples': 21650944, 'steps': 42286, 'loss/train': 1.1917601823806763} +02/26/2022 08:19:46 - INFO - codeparrot_training - Step 42287: {'lr': 3.118396908523538e-05, 'samples': 21651456, 'steps': 42287, 'loss/train': 2.698991060256958} +02/26/2022 08:19:49 - INFO - codeparrot_training - Step 42288: {'lr': 3.1176055927249595e-05, 'samples': 21651968, 'steps': 42288, 'loss/train': 2.216356039047241} +02/26/2022 08:19:55 - INFO - codeparrot_training - Step 42289: {'lr': 3.11681437066362e-05, 'samples': 21652480, 'steps': 42289, 'loss/train': 0.3686668574810028} +02/26/2022 08:19:58 - INFO - codeparrot_training - Step 42290: {'lr': 3.1160232423428885e-05, 'samples': 21652992, 'steps': 42290, 'loss/train': 2.4461677074432373} +02/26/2022 08:20:04 - INFO - codeparrot_training - Step 42291: {'lr': 3.1152322077661724e-05, 'samples': 21653504, 'steps': 42291, 'loss/train': 2.5734941959381104} +02/26/2022 08:20:07 - INFO - codeparrot_training - Step 42292: {'lr': 3.114441266936843e-05, 'samples': 21654016, 'steps': 42292, 'loss/train': 1.983603596687317} +02/26/2022 08:20:13 - INFO - codeparrot_training - Step 42293: {'lr': 3.113650419858311e-05, 'samples': 21654528, 'steps': 42293, 'loss/train': 2.3230643272399902} +02/26/2022 08:20:19 - INFO - codeparrot_training - Step 42294: {'lr': 3.11285966653394e-05, 'samples': 21655040, 'steps': 42294, 'loss/train': 1.3155059814453125} +02/26/2022 08:20:23 - INFO - codeparrot_training - Step 42295: {'lr': 3.1120690069671334e-05, 'samples': 21655552, 'steps': 42295, 'loss/train': 0.4166004955768585} +02/26/2022 08:20:27 - INFO - codeparrot_training - Step 42296: {'lr': 3.1112784411612667e-05, 'samples': 21656064, 'steps': 42296, 'loss/train': 7.708520412445068} +02/26/2022 08:20:32 - INFO - codeparrot_training - Step 42297: {'lr': 3.110487969119738e-05, 'samples': 21656576, 'steps': 42297, 'loss/train': 1.7013347148895264} +02/26/2022 08:20:36 - INFO - codeparrot_training - Step 42298: {'lr': 3.1096975908459304e-05, 'samples': 21657088, 'steps': 42298, 'loss/train': 2.0086581707000732} +02/26/2022 08:20:41 - INFO - codeparrot_training - Step 42299: {'lr': 3.108907306343226e-05, 'samples': 21657600, 'steps': 42299, 'loss/train': 2.2423789501190186} +02/26/2022 08:20:45 - INFO - codeparrot_training - Step 42300: {'lr': 3.108117115615005e-05, 'samples': 21658112, 'steps': 42300, 'loss/train': 0.8812868595123291} +02/26/2022 08:20:50 - INFO - codeparrot_training - Step 42301: {'lr': 3.1073270186646655e-05, 'samples': 21658624, 'steps': 42301, 'loss/train': 1.3461356163024902} +02/26/2022 08:20:54 - INFO - codeparrot_training - Step 42302: {'lr': 3.1065370154955835e-05, 'samples': 21659136, 'steps': 42302, 'loss/train': 1.2405699491500854} +02/26/2022 08:20:59 - INFO - codeparrot_training - Step 42303: {'lr': 3.105747106111148e-05, 'samples': 21659648, 'steps': 42303, 'loss/train': 1.9656100273132324} +02/26/2022 08:21:03 - INFO - codeparrot_training - Step 42304: {'lr': 3.1049572905147375e-05, 'samples': 21660160, 'steps': 42304, 'loss/train': 1.7684097290039062} +02/26/2022 08:21:08 - INFO - codeparrot_training - Step 42305: {'lr': 3.104167568709734e-05, 'samples': 21660672, 'steps': 42305, 'loss/train': 0.8035821318626404} +02/26/2022 08:21:11 - INFO - codeparrot_training - Step 42306: {'lr': 3.103377940699528e-05, 'samples': 21661184, 'steps': 42306, 'loss/train': 2.4385085105895996} +02/26/2022 08:21:18 - INFO - codeparrot_training - Step 42307: {'lr': 3.102588406487497e-05, 'samples': 21661696, 'steps': 42307, 'loss/train': 0.8234643340110779} +02/26/2022 08:21:22 - INFO - codeparrot_training - Step 42308: {'lr': 3.101798966077024e-05, 'samples': 21662208, 'steps': 42308, 'loss/train': 1.9941766262054443} +02/26/2022 08:21:27 - INFO - codeparrot_training - Step 42309: {'lr': 3.101009619471487e-05, 'samples': 21662720, 'steps': 42309, 'loss/train': 0.6015418767929077} +02/26/2022 08:21:33 - INFO - codeparrot_training - Step 42310: {'lr': 3.100220366674275e-05, 'samples': 21663232, 'steps': 42310, 'loss/train': 1.7579880952835083} +02/26/2022 08:21:36 - INFO - codeparrot_training - Step 42311: {'lr': 3.099431207688769e-05, 'samples': 21663744, 'steps': 42311, 'loss/train': 1.1578298807144165} +02/26/2022 08:21:42 - INFO - codeparrot_training - Step 42312: {'lr': 3.0986421425183403e-05, 'samples': 21664256, 'steps': 42312, 'loss/train': 1.647736668586731} +02/26/2022 08:21:45 - INFO - codeparrot_training - Step 42313: {'lr': 3.0978531711663706e-05, 'samples': 21664768, 'steps': 42313, 'loss/train': 2.0021708011627197} +02/26/2022 08:21:52 - INFO - codeparrot_training - Step 42314: {'lr': 3.097064293636251e-05, 'samples': 21665280, 'steps': 42314, 'loss/train': 1.823815941810608} +02/26/2022 08:21:55 - INFO - codeparrot_training - Step 42315: {'lr': 3.0962755099313486e-05, 'samples': 21665792, 'steps': 42315, 'loss/train': 2.05018949508667} +02/26/2022 08:22:01 - INFO - codeparrot_training - Step 42316: {'lr': 3.095486820055049e-05, 'samples': 21666304, 'steps': 42316, 'loss/train': 1.6738413572311401} +02/26/2022 08:22:05 - INFO - codeparrot_training - Step 42317: {'lr': 3.0946982240107255e-05, 'samples': 21666816, 'steps': 42317, 'loss/train': 1.5243638753890991} +02/26/2022 08:22:08 - INFO - codeparrot_training - Step 42318: {'lr': 3.09390972180176e-05, 'samples': 21667328, 'steps': 42318, 'loss/train': 0.03138976916670799} +02/26/2022 08:22:14 - INFO - codeparrot_training - Step 42319: {'lr': 3.093121313431527e-05, 'samples': 21667840, 'steps': 42319, 'loss/train': 0.18562248349189758} +02/26/2022 08:22:17 - INFO - codeparrot_training - Step 42320: {'lr': 3.092332998903416e-05, 'samples': 21668352, 'steps': 42320, 'loss/train': 1.573781132698059} +02/26/2022 08:22:23 - INFO - codeparrot_training - Step 42321: {'lr': 3.091544778220781e-05, 'samples': 21668864, 'steps': 42321, 'loss/train': 2.235450029373169} +02/26/2022 08:22:26 - INFO - codeparrot_training - Step 42322: {'lr': 3.090756651387019e-05, 'samples': 21669376, 'steps': 42322, 'loss/train': 1.8926372528076172} +02/26/2022 08:22:32 - INFO - codeparrot_training - Step 42323: {'lr': 3.089968618405492e-05, 'samples': 21669888, 'steps': 42323, 'loss/train': 1.7123184204101562} +02/26/2022 08:22:35 - INFO - codeparrot_training - Step 42324: {'lr': 3.089180679279596e-05, 'samples': 21670400, 'steps': 42324, 'loss/train': 1.0564671754837036} +02/26/2022 08:22:41 - INFO - codeparrot_training - Step 42325: {'lr': 3.088392834012679e-05, 'samples': 21670912, 'steps': 42325, 'loss/train': 1.7197434902191162} +02/26/2022 08:22:44 - INFO - codeparrot_training - Step 42326: {'lr': 3.087605082608136e-05, 'samples': 21671424, 'steps': 42326, 'loss/train': 1.489651083946228} +02/26/2022 08:22:50 - INFO - codeparrot_training - Step 42327: {'lr': 3.086817425069327e-05, 'samples': 21671936, 'steps': 42327, 'loss/train': 1.2963221073150635} +02/26/2022 08:22:55 - INFO - codeparrot_training - Step 42328: {'lr': 3.086029861399647e-05, 'samples': 21672448, 'steps': 42328, 'loss/train': 2.574021100997925} +02/26/2022 08:22:59 - INFO - codeparrot_training - Step 42329: {'lr': 3.085242391602447e-05, 'samples': 21672960, 'steps': 42329, 'loss/train': 1.0760599374771118} +02/26/2022 08:23:05 - INFO - codeparrot_training - Step 42330: {'lr': 3.084455015681112e-05, 'samples': 21673472, 'steps': 42330, 'loss/train': 1.3865455389022827} +02/26/2022 08:23:08 - INFO - codeparrot_training - Step 42331: {'lr': 3.083667733639009e-05, 'samples': 21673984, 'steps': 42331, 'loss/train': 1.760359287261963} +02/26/2022 08:23:14 - INFO - codeparrot_training - Step 42332: {'lr': 3.082880545479519e-05, 'samples': 21674496, 'steps': 42332, 'loss/train': 1.8878101110458374} +02/26/2022 08:23:17 - INFO - codeparrot_training - Step 42333: {'lr': 3.082093451206011e-05, 'samples': 21675008, 'steps': 42333, 'loss/train': 2.0384135246276855} +02/26/2022 08:23:25 - INFO - codeparrot_training - Step 42334: {'lr': 3.0813064508218555e-05, 'samples': 21675520, 'steps': 42334, 'loss/train': 0.2993811070919037} +02/26/2022 08:23:28 - INFO - codeparrot_training - Step 42335: {'lr': 3.080519544330415e-05, 'samples': 21676032, 'steps': 42335, 'loss/train': 2.2004995346069336} +02/26/2022 08:23:34 - INFO - codeparrot_training - Step 42336: {'lr': 3.0797327317350744e-05, 'samples': 21676544, 'steps': 42336, 'loss/train': 1.6180719137191772} +02/26/2022 08:23:38 - INFO - codeparrot_training - Step 42337: {'lr': 3.078946013039199e-05, 'samples': 21677056, 'steps': 42337, 'loss/train': 1.9453177452087402} +02/26/2022 08:23:43 - INFO - codeparrot_training - Step 42338: {'lr': 3.0781593882461575e-05, 'samples': 21677568, 'steps': 42338, 'loss/train': 1.849726915359497} +02/26/2022 08:23:46 - INFO - codeparrot_training - Step 42339: {'lr': 3.077372857359317e-05, 'samples': 21678080, 'steps': 42339, 'loss/train': 0.7992461919784546} +02/26/2022 08:23:52 - INFO - codeparrot_training - Step 42340: {'lr': 3.076586420382052e-05, 'samples': 21678592, 'steps': 42340, 'loss/train': 2.3807413578033447} +02/26/2022 08:23:55 - INFO - codeparrot_training - Step 42341: {'lr': 3.075800077317731e-05, 'samples': 21679104, 'steps': 42341, 'loss/train': 1.4038259983062744} +02/26/2022 08:24:01 - INFO - codeparrot_training - Step 42342: {'lr': 3.075013828169723e-05, 'samples': 21679616, 'steps': 42342, 'loss/train': 0.455833375453949} +02/26/2022 08:24:05 - INFO - codeparrot_training - Step 42343: {'lr': 3.074227672941385e-05, 'samples': 21680128, 'steps': 42343, 'loss/train': 2.2301933765411377} +02/26/2022 08:24:08 - INFO - codeparrot_training - Step 42344: {'lr': 3.0734416116360994e-05, 'samples': 21680640, 'steps': 42344, 'loss/train': 2.6922214031219482} +02/26/2022 08:24:16 - INFO - codeparrot_training - Step 42345: {'lr': 3.07265564425723e-05, 'samples': 21681152, 'steps': 42345, 'loss/train': 2.2362728118896484} +02/26/2022 08:24:20 - INFO - codeparrot_training - Step 42346: {'lr': 3.0718697708081386e-05, 'samples': 21681664, 'steps': 42346, 'loss/train': 1.0965604782104492} +02/26/2022 08:24:25 - INFO - codeparrot_training - Step 42347: {'lr': 3.0710839912921896e-05, 'samples': 21682176, 'steps': 42347, 'loss/train': 2.840318202972412} +02/26/2022 08:24:29 - INFO - codeparrot_training - Step 42348: {'lr': 3.070298305712757e-05, 'samples': 21682688, 'steps': 42348, 'loss/train': 0.7933400869369507} +02/26/2022 08:24:34 - INFO - codeparrot_training - Step 42349: {'lr': 3.0695127140732055e-05, 'samples': 21683200, 'steps': 42349, 'loss/train': 2.0706417560577393} +02/26/2022 08:24:37 - INFO - codeparrot_training - Step 42350: {'lr': 3.0687272163768986e-05, 'samples': 21683712, 'steps': 42350, 'loss/train': 2.36657452583313} +02/26/2022 08:24:43 - INFO - codeparrot_training - Step 42351: {'lr': 3.0679418126271923e-05, 'samples': 21684224, 'steps': 42351, 'loss/train': 1.8002938032150269} +02/26/2022 08:24:47 - INFO - codeparrot_training - Step 42352: {'lr': 3.067156502827465e-05, 'samples': 21684736, 'steps': 42352, 'loss/train': 1.7515136003494263} +02/26/2022 08:24:52 - INFO - codeparrot_training - Step 42353: {'lr': 3.066371286981076e-05, 'samples': 21685248, 'steps': 42353, 'loss/train': 1.3350982666015625} +02/26/2022 08:24:56 - INFO - codeparrot_training - Step 42354: {'lr': 3.065586165091386e-05, 'samples': 21685760, 'steps': 42354, 'loss/train': 1.6943905353546143} +02/26/2022 08:25:03 - INFO - codeparrot_training - Step 42355: {'lr': 3.064801137161763e-05, 'samples': 21686272, 'steps': 42355, 'loss/train': 1.2283161878585815} +02/26/2022 08:25:06 - INFO - codeparrot_training - Step 42356: {'lr': 3.064016203195558e-05, 'samples': 21686784, 'steps': 42356, 'loss/train': 2.336106777191162} +02/26/2022 08:25:12 - INFO - codeparrot_training - Step 42357: {'lr': 3.063231363196148e-05, 'samples': 21687296, 'steps': 42357, 'loss/train': 0.7059616446495056} +02/26/2022 08:25:15 - INFO - codeparrot_training - Step 42358: {'lr': 3.0624466171668864e-05, 'samples': 21687808, 'steps': 42358, 'loss/train': 1.272324800491333} +02/26/2022 08:25:21 - INFO - codeparrot_training - Step 42359: {'lr': 3.0616619651111455e-05, 'samples': 21688320, 'steps': 42359, 'loss/train': 1.4218025207519531} +02/26/2022 08:25:24 - INFO - codeparrot_training - Step 42360: {'lr': 3.06087740703227e-05, 'samples': 21688832, 'steps': 42360, 'loss/train': 1.0767157077789307} +02/26/2022 08:25:28 - INFO - codeparrot_training - Step 42361: {'lr': 3.0600929429336315e-05, 'samples': 21689344, 'steps': 42361, 'loss/train': 1.4596195220947266} +02/26/2022 08:25:33 - INFO - codeparrot_training - Step 42362: {'lr': 3.0593085728185865e-05, 'samples': 21689856, 'steps': 42362, 'loss/train': 1.6313985586166382} +02/26/2022 08:25:39 - INFO - codeparrot_training - Step 42363: {'lr': 3.0585242966905045e-05, 'samples': 21690368, 'steps': 42363, 'loss/train': 1.8777227401733398} +02/26/2022 08:25:42 - INFO - codeparrot_training - Step 42364: {'lr': 3.057740114552726e-05, 'samples': 21690880, 'steps': 42364, 'loss/train': 2.151515007019043} +02/26/2022 08:25:49 - INFO - codeparrot_training - Step 42365: {'lr': 3.05695602640863e-05, 'samples': 21691392, 'steps': 42365, 'loss/train': 1.2571848630905151} +02/26/2022 08:25:53 - INFO - codeparrot_training - Step 42366: {'lr': 3.056172032261559e-05, 'samples': 21691904, 'steps': 42366, 'loss/train': 1.419875979423523} +02/26/2022 08:25:58 - INFO - codeparrot_training - Step 42367: {'lr': 3.0553881321148836e-05, 'samples': 21692416, 'steps': 42367, 'loss/train': 1.7723785638809204} +02/26/2022 08:26:02 - INFO - codeparrot_training - Step 42368: {'lr': 3.05460432597196e-05, 'samples': 21692928, 'steps': 42368, 'loss/train': 1.3066699504852295} +02/26/2022 08:26:07 - INFO - codeparrot_training - Step 42369: {'lr': 3.053820613836142e-05, 'samples': 21693440, 'steps': 42369, 'loss/train': 0.8458231091499329} +02/26/2022 08:26:11 - INFO - codeparrot_training - Step 42370: {'lr': 3.053036995710781e-05, 'samples': 21693952, 'steps': 42370, 'loss/train': 1.9319634437561035} +02/26/2022 08:26:16 - INFO - codeparrot_training - Step 42371: {'lr': 3.0522534715992476e-05, 'samples': 21694464, 'steps': 42371, 'loss/train': 2.048814058303833} +02/26/2022 08:26:20 - INFO - codeparrot_training - Step 42372: {'lr': 3.051470041504889e-05, 'samples': 21694976, 'steps': 42372, 'loss/train': 1.852872371673584} +02/26/2022 08:26:25 - INFO - codeparrot_training - Step 42373: {'lr': 3.0506867054310662e-05, 'samples': 21695488, 'steps': 42373, 'loss/train': 1.510689377784729} +02/26/2022 08:26:29 - INFO - codeparrot_training - Step 42374: {'lr': 3.0499034633811234e-05, 'samples': 21696000, 'steps': 42374, 'loss/train': 2.0618083477020264} +02/26/2022 08:26:36 - INFO - codeparrot_training - Step 42375: {'lr': 3.0491203153584334e-05, 'samples': 21696512, 'steps': 42375, 'loss/train': 1.0150772333145142} +02/26/2022 08:26:39 - INFO - codeparrot_training - Step 42376: {'lr': 3.048337261366338e-05, 'samples': 21697024, 'steps': 42376, 'loss/train': 2.3928732872009277} +02/26/2022 08:26:45 - INFO - codeparrot_training - Step 42377: {'lr': 3.047554301408198e-05, 'samples': 21697536, 'steps': 42377, 'loss/train': 1.7462522983551025} +02/26/2022 08:26:49 - INFO - codeparrot_training - Step 42378: {'lr': 3.0467714354873582e-05, 'samples': 21698048, 'steps': 42378, 'loss/train': 2.236633777618408} +02/26/2022 08:26:52 - INFO - codeparrot_training - Step 42379: {'lr': 3.0459886636071827e-05, 'samples': 21698560, 'steps': 42379, 'loss/train': 0.3334674537181854} +02/26/2022 08:26:58 - INFO - codeparrot_training - Step 42380: {'lr': 3.0452059857710184e-05, 'samples': 21699072, 'steps': 42380, 'loss/train': 1.3294657468795776} +02/26/2022 08:27:01 - INFO - codeparrot_training - Step 42381: {'lr': 3.044423401982227e-05, 'samples': 21699584, 'steps': 42381, 'loss/train': 0.7406435012817383} +02/26/2022 08:27:07 - INFO - codeparrot_training - Step 42382: {'lr': 3.0436409122441472e-05, 'samples': 21700096, 'steps': 42382, 'loss/train': 1.9715319871902466} +02/26/2022 08:27:10 - INFO - codeparrot_training - Step 42383: {'lr': 3.0428585165601403e-05, 'samples': 21700608, 'steps': 42383, 'loss/train': 1.4250870943069458} +02/26/2022 08:27:16 - INFO - codeparrot_training - Step 42384: {'lr': 3.0420762149335562e-05, 'samples': 21701120, 'steps': 42384, 'loss/train': 0.9443029165267944} +02/26/2022 08:27:19 - INFO - codeparrot_training - Step 42385: {'lr': 3.0412940073677454e-05, 'samples': 21701632, 'steps': 42385, 'loss/train': 1.3233338594436646} +02/26/2022 08:27:26 - INFO - codeparrot_training - Step 42386: {'lr': 3.0405118938660547e-05, 'samples': 21702144, 'steps': 42386, 'loss/train': 2.2087249755859375} +02/26/2022 08:27:30 - INFO - codeparrot_training - Step 42387: {'lr': 3.0397298744318404e-05, 'samples': 21702656, 'steps': 42387, 'loss/train': 1.8795509338378906} +02/26/2022 08:27:35 - INFO - codeparrot_training - Step 42388: {'lr': 3.038947949068452e-05, 'samples': 21703168, 'steps': 42388, 'loss/train': 2.835408926010132} +02/26/2022 08:27:39 - INFO - codeparrot_training - Step 42389: {'lr': 3.038166117779237e-05, 'samples': 21703680, 'steps': 42389, 'loss/train': 1.5740962028503418} +02/26/2022 08:27:44 - INFO - codeparrot_training - Step 42390: {'lr': 3.03738438056754e-05, 'samples': 21704192, 'steps': 42390, 'loss/train': 1.1738770008087158} +02/26/2022 08:27:48 - INFO - codeparrot_training - Step 42391: {'lr': 3.0366027374367195e-05, 'samples': 21704704, 'steps': 42391, 'loss/train': 1.4663310050964355} +02/26/2022 08:27:53 - INFO - codeparrot_training - Step 42392: {'lr': 3.035821188390117e-05, 'samples': 21705216, 'steps': 42392, 'loss/train': 1.115710973739624} +02/26/2022 08:27:57 - INFO - codeparrot_training - Step 42393: {'lr': 3.0350397334310774e-05, 'samples': 21705728, 'steps': 42393, 'loss/train': 2.5159871578216553} +02/26/2022 08:28:02 - INFO - codeparrot_training - Step 42394: {'lr': 3.0342583725629592e-05, 'samples': 21706240, 'steps': 42394, 'loss/train': 1.4024797677993774} +02/26/2022 08:28:06 - INFO - codeparrot_training - Step 42395: {'lr': 3.0334771057891036e-05, 'samples': 21706752, 'steps': 42395, 'loss/train': 2.4171509742736816} +02/26/2022 08:28:11 - INFO - codeparrot_training - Step 42396: {'lr': 3.0326959331128558e-05, 'samples': 21707264, 'steps': 42396, 'loss/train': 1.453932285308838} +02/26/2022 08:28:17 - INFO - codeparrot_training - Step 42397: {'lr': 3.031914854537557e-05, 'samples': 21707776, 'steps': 42397, 'loss/train': 1.4566223621368408} +02/26/2022 08:28:20 - INFO - codeparrot_training - Step 42398: {'lr': 3.0311338700665657e-05, 'samples': 21708288, 'steps': 42398, 'loss/train': 1.837753176689148} +02/26/2022 08:28:26 - INFO - codeparrot_training - Step 42399: {'lr': 3.0303529797032214e-05, 'samples': 21708800, 'steps': 42399, 'loss/train': 2.5541043281555176} +02/26/2022 08:28:29 - INFO - codeparrot_training - Step 42400: {'lr': 3.0295721834508684e-05, 'samples': 21709312, 'steps': 42400, 'loss/train': 1.472905158996582} +02/26/2022 08:28:37 - INFO - codeparrot_training - Step 42401: {'lr': 3.0287914813128482e-05, 'samples': 21709824, 'steps': 42401, 'loss/train': 1.9961215257644653} +02/26/2022 08:28:40 - INFO - codeparrot_training - Step 42402: {'lr': 3.0280108732925165e-05, 'samples': 21710336, 'steps': 42402, 'loss/train': 1.735489010810852} +02/26/2022 08:28:46 - INFO - codeparrot_training - Step 42403: {'lr': 3.0272303593931987e-05, 'samples': 21710848, 'steps': 42403, 'loss/train': 0.4769146144390106} +02/26/2022 08:28:50 - INFO - codeparrot_training - Step 42404: {'lr': 3.026449939618256e-05, 'samples': 21711360, 'steps': 42404, 'loss/train': 2.2279601097106934} +02/26/2022 08:28:53 - INFO - codeparrot_training - Step 42405: {'lr': 3.0256696139710187e-05, 'samples': 21711872, 'steps': 42405, 'loss/train': 2.2474365234375} +02/26/2022 08:28:59 - INFO - codeparrot_training - Step 42406: {'lr': 3.0248893824548402e-05, 'samples': 21712384, 'steps': 42406, 'loss/train': 2.2075304985046387} +02/26/2022 08:29:02 - INFO - codeparrot_training - Step 42407: {'lr': 3.024109245073056e-05, 'samples': 21712896, 'steps': 42407, 'loss/train': 2.376878499984741} +02/26/2022 08:29:08 - INFO - codeparrot_training - Step 42408: {'lr': 3.0233292018290113e-05, 'samples': 21713408, 'steps': 42408, 'loss/train': 2.2249040603637695} +02/26/2022 08:29:11 - INFO - codeparrot_training - Step 42409: {'lr': 3.0225492527260424e-05, 'samples': 21713920, 'steps': 42409, 'loss/train': 2.0526816844940186} +02/26/2022 08:29:17 - INFO - codeparrot_training - Step 42410: {'lr': 3.021769397767496e-05, 'samples': 21714432, 'steps': 42410, 'loss/train': 8.584919929504395} +02/26/2022 08:29:20 - INFO - codeparrot_training - Step 42411: {'lr': 3.0209896369567142e-05, 'samples': 21714944, 'steps': 42411, 'loss/train': 1.173508644104004} +02/26/2022 08:29:26 - INFO - codeparrot_training - Step 42412: {'lr': 3.0202099702970303e-05, 'samples': 21715456, 'steps': 42412, 'loss/train': 0.9102339148521423} +02/26/2022 08:29:29 - INFO - codeparrot_training - Step 42413: {'lr': 3.0194303977917835e-05, 'samples': 21715968, 'steps': 42413, 'loss/train': 1.3327618837356567} +02/26/2022 08:29:37 - INFO - codeparrot_training - Step 42414: {'lr': 3.0186509194443214e-05, 'samples': 21716480, 'steps': 42414, 'loss/train': 2.7832436561584473} +02/26/2022 08:29:40 - INFO - codeparrot_training - Step 42415: {'lr': 3.0178715352579793e-05, 'samples': 21716992, 'steps': 42415, 'loss/train': 1.7575819492340088} +02/26/2022 08:29:46 - INFO - codeparrot_training - Step 42416: {'lr': 3.017092245236097e-05, 'samples': 21717504, 'steps': 42416, 'loss/train': 1.674803614616394} +02/26/2022 08:29:49 - INFO - codeparrot_training - Step 42417: {'lr': 3.016313049382005e-05, 'samples': 21718016, 'steps': 42417, 'loss/train': 1.9499880075454712} +02/26/2022 08:29:55 - INFO - codeparrot_training - Step 42418: {'lr': 3.0155339476990533e-05, 'samples': 21718528, 'steps': 42418, 'loss/train': 0.9251204133033752} +02/26/2022 08:29:58 - INFO - codeparrot_training - Step 42419: {'lr': 3.014754940190567e-05, 'samples': 21719040, 'steps': 42419, 'loss/train': 1.6484394073486328} +02/26/2022 08:30:04 - INFO - codeparrot_training - Step 42420: {'lr': 3.0139760268598992e-05, 'samples': 21719552, 'steps': 42420, 'loss/train': 2.097637176513672} +02/26/2022 08:30:07 - INFO - codeparrot_training - Step 42421: {'lr': 3.0131972077103694e-05, 'samples': 21720064, 'steps': 42421, 'loss/train': 1.2741641998291016} +02/26/2022 08:30:12 - INFO - codeparrot_training - Step 42422: {'lr': 3.0124184827453244e-05, 'samples': 21720576, 'steps': 42422, 'loss/train': 2.612875461578369} +02/26/2022 08:30:16 - INFO - codeparrot_training - Step 42423: {'lr': 3.01163985196809e-05, 'samples': 21721088, 'steps': 42423, 'loss/train': 1.790881872177124} +02/26/2022 08:30:23 - INFO - codeparrot_training - Step 42424: {'lr': 3.0108613153820215e-05, 'samples': 21721600, 'steps': 42424, 'loss/train': 2.145505428314209} +02/26/2022 08:30:27 - INFO - codeparrot_training - Step 42425: {'lr': 3.0100828729904274e-05, 'samples': 21722112, 'steps': 42425, 'loss/train': 2.0099036693573} +02/26/2022 08:30:32 - INFO - codeparrot_training - Step 42426: {'lr': 3.0093045247966633e-05, 'samples': 21722624, 'steps': 42426, 'loss/train': 1.2753727436065674} +02/26/2022 08:30:35 - INFO - codeparrot_training - Step 42427: {'lr': 3.008526270804049e-05, 'samples': 21723136, 'steps': 42427, 'loss/train': 1.5990394353866577} +02/26/2022 08:30:41 - INFO - codeparrot_training - Step 42428: {'lr': 3.0077481110159317e-05, 'samples': 21723648, 'steps': 42428, 'loss/train': 1.1991422176361084} +02/26/2022 08:30:45 - INFO - codeparrot_training - Step 42429: {'lr': 3.0069700454356392e-05, 'samples': 21724160, 'steps': 42429, 'loss/train': 1.2713897228240967} +02/26/2022 08:30:50 - INFO - codeparrot_training - Step 42430: {'lr': 3.0061920740664995e-05, 'samples': 21724672, 'steps': 42430, 'loss/train': 2.3636350631713867} +02/26/2022 08:30:53 - INFO - codeparrot_training - Step 42431: {'lr': 3.005414196911846e-05, 'samples': 21725184, 'steps': 42431, 'loss/train': 1.3054356575012207} +02/26/2022 08:30:59 - INFO - codeparrot_training - Step 42432: {'lr': 3.0046364139750203e-05, 'samples': 21725696, 'steps': 42432, 'loss/train': 1.9482861757278442} +02/26/2022 08:31:02 - INFO - codeparrot_training - Step 42433: {'lr': 3.003858725259348e-05, 'samples': 21726208, 'steps': 42433, 'loss/train': 1.681530475616455} +02/26/2022 08:31:10 - INFO - codeparrot_training - Step 42434: {'lr': 3.0030811307681595e-05, 'samples': 21726720, 'steps': 42434, 'loss/train': 0.826411783695221} +02/26/2022 08:31:13 - INFO - codeparrot_training - Step 42435: {'lr': 3.0023036305047853e-05, 'samples': 21727232, 'steps': 42435, 'loss/train': 1.230531096458435} +02/26/2022 08:31:19 - INFO - codeparrot_training - Step 42436: {'lr': 3.001526224472553e-05, 'samples': 21727744, 'steps': 42436, 'loss/train': 1.088001012802124} +02/26/2022 08:31:22 - INFO - codeparrot_training - Step 42437: {'lr': 3.000748912674803e-05, 'samples': 21728256, 'steps': 42437, 'loss/train': 1.0397390127182007} +02/26/2022 08:31:28 - INFO - codeparrot_training - Step 42438: {'lr': 2.9999716951148588e-05, 'samples': 21728768, 'steps': 42438, 'loss/train': 1.8594697713851929} +02/26/2022 08:31:31 - INFO - codeparrot_training - Step 42439: {'lr': 2.9991945717960518e-05, 'samples': 21729280, 'steps': 42439, 'loss/train': 1.3108189105987549} +02/26/2022 08:31:37 - INFO - codeparrot_training - Step 42440: {'lr': 2.9984175427217013e-05, 'samples': 21729792, 'steps': 42440, 'loss/train': 1.2480056285858154} +02/26/2022 08:31:40 - INFO - codeparrot_training - Step 42441: {'lr': 2.9976406078951497e-05, 'samples': 21730304, 'steps': 42441, 'loss/train': 1.4303146600723267} +02/26/2022 08:31:46 - INFO - codeparrot_training - Step 42442: {'lr': 2.9968637673197186e-05, 'samples': 21730816, 'steps': 42442, 'loss/train': 2.3805794715881348} +02/26/2022 08:31:51 - INFO - codeparrot_training - Step 42443: {'lr': 2.996087020998739e-05, 'samples': 21731328, 'steps': 42443, 'loss/train': 0.4328295886516571} +02/26/2022 08:31:55 - INFO - codeparrot_training - Step 42444: {'lr': 2.9953103689355275e-05, 'samples': 21731840, 'steps': 42444, 'loss/train': 1.1838759183883667} +02/26/2022 08:32:00 - INFO - codeparrot_training - Step 42445: {'lr': 2.9945338111334265e-05, 'samples': 21732352, 'steps': 42445, 'loss/train': 1.1324834823608398} +02/26/2022 08:32:03 - INFO - codeparrot_training - Step 42446: {'lr': 2.993757347595752e-05, 'samples': 21732864, 'steps': 42446, 'loss/train': 0.7343243956565857} +02/26/2022 08:32:09 - INFO - codeparrot_training - Step 42447: {'lr': 2.992980978325835e-05, 'samples': 21733376, 'steps': 42447, 'loss/train': 1.7889782190322876} +02/26/2022 08:32:12 - INFO - codeparrot_training - Step 42448: {'lr': 2.992204703326995e-05, 'samples': 21733888, 'steps': 42448, 'loss/train': 0.775888204574585} +02/26/2022 08:32:20 - INFO - codeparrot_training - Step 42449: {'lr': 2.9914285226025655e-05, 'samples': 21734400, 'steps': 42449, 'loss/train': 0.9927094578742981} +02/26/2022 08:32:23 - INFO - codeparrot_training - Step 42450: {'lr': 2.990652436155869e-05, 'samples': 21734912, 'steps': 42450, 'loss/train': 0.948738694190979} +02/26/2022 08:32:29 - INFO - codeparrot_training - Step 42451: {'lr': 2.9898764439902303e-05, 'samples': 21735424, 'steps': 42451, 'loss/train': 1.452772855758667} +02/26/2022 08:32:32 - INFO - codeparrot_training - Step 42452: {'lr': 2.9891005461089638e-05, 'samples': 21735936, 'steps': 42452, 'loss/train': 0.7828855514526367} +02/26/2022 08:32:36 - INFO - codeparrot_training - Step 42453: {'lr': 2.988324742515408e-05, 'samples': 21736448, 'steps': 42453, 'loss/train': 2.955240249633789} +02/26/2022 08:32:41 - INFO - codeparrot_training - Step 42454: {'lr': 2.9875490332128718e-05, 'samples': 21736960, 'steps': 42454, 'loss/train': 2.041384696960449} +02/26/2022 08:32:45 - INFO - codeparrot_training - Step 42455: {'lr': 2.9867734182046996e-05, 'samples': 21737472, 'steps': 42455, 'loss/train': 2.191662073135376} +02/26/2022 08:32:50 - INFO - codeparrot_training - Step 42456: {'lr': 2.9859978974941887e-05, 'samples': 21737984, 'steps': 42456, 'loss/train': 0.8403798937797546} +02/26/2022 08:32:56 - INFO - codeparrot_training - Step 42457: {'lr': 2.985222471084678e-05, 'samples': 21738496, 'steps': 42457, 'loss/train': 1.6946067810058594} +02/26/2022 08:32:59 - INFO - codeparrot_training - Step 42458: {'lr': 2.9844471389794763e-05, 'samples': 21739008, 'steps': 42458, 'loss/train': 1.6430662870407104} +02/26/2022 08:33:07 - INFO - codeparrot_training - Step 42459: {'lr': 2.983671901181925e-05, 'samples': 21739520, 'steps': 42459, 'loss/train': 2.090486764907837} +02/26/2022 08:33:10 - INFO - codeparrot_training - Step 42460: {'lr': 2.982896757695322e-05, 'samples': 21740032, 'steps': 42460, 'loss/train': 3.746598482131958} +02/26/2022 08:33:16 - INFO - codeparrot_training - Step 42461: {'lr': 2.9821217085230002e-05, 'samples': 21740544, 'steps': 42461, 'loss/train': 1.7544538974761963} +02/26/2022 08:33:19 - INFO - codeparrot_training - Step 42462: {'lr': 2.9813467536682764e-05, 'samples': 21741056, 'steps': 42462, 'loss/train': 1.0944174528121948} +02/26/2022 08:33:25 - INFO - codeparrot_training - Step 42463: {'lr': 2.9805718931344788e-05, 'samples': 21741568, 'steps': 42463, 'loss/train': 1.6205421686172485} +02/26/2022 08:33:28 - INFO - codeparrot_training - Step 42464: {'lr': 2.9797971269249103e-05, 'samples': 21742080, 'steps': 42464, 'loss/train': 0.6108075380325317} +02/26/2022 08:33:33 - INFO - codeparrot_training - Step 42465: {'lr': 2.979022455042904e-05, 'samples': 21742592, 'steps': 42465, 'loss/train': 1.625720739364624} +02/26/2022 08:33:37 - INFO - codeparrot_training - Step 42466: {'lr': 2.978247877491769e-05, 'samples': 21743104, 'steps': 42466, 'loss/train': 1.8870893716812134} +02/26/2022 08:33:42 - INFO - codeparrot_training - Step 42467: {'lr': 2.9774733942748295e-05, 'samples': 21743616, 'steps': 42467, 'loss/train': 1.022445559501648} +02/26/2022 08:33:46 - INFO - codeparrot_training - Step 42468: {'lr': 2.976699005395403e-05, 'samples': 21744128, 'steps': 42468, 'loss/train': 1.6900724172592163} +02/26/2022 08:33:53 - INFO - codeparrot_training - Step 42469: {'lr': 2.975924710856803e-05, 'samples': 21744640, 'steps': 42469, 'loss/train': 1.018027424812317} +02/26/2022 08:33:56 - INFO - codeparrot_training - Step 42470: {'lr': 2.9751505106623472e-05, 'samples': 21745152, 'steps': 42470, 'loss/train': 0.714512825012207} +02/26/2022 08:34:02 - INFO - codeparrot_training - Step 42471: {'lr': 2.9743764048153545e-05, 'samples': 21745664, 'steps': 42471, 'loss/train': 2.2454349994659424} +02/26/2022 08:34:06 - INFO - codeparrot_training - Step 42472: {'lr': 2.9736023933191388e-05, 'samples': 21746176, 'steps': 42472, 'loss/train': 1.4878127574920654} +02/26/2022 08:34:11 - INFO - codeparrot_training - Step 42473: {'lr': 2.9728284761770198e-05, 'samples': 21746688, 'steps': 42473, 'loss/train': 0.6094620227813721} +02/26/2022 08:34:15 - INFO - codeparrot_training - Step 42474: {'lr': 2.9720546533923004e-05, 'samples': 21747200, 'steps': 42474, 'loss/train': 1.5329066514968872} +02/26/2022 08:34:20 - INFO - codeparrot_training - Step 42475: {'lr': 2.9712809249683116e-05, 'samples': 21747712, 'steps': 42475, 'loss/train': 1.4630742073059082} +02/26/2022 08:34:24 - INFO - codeparrot_training - Step 42476: {'lr': 2.9705072909083587e-05, 'samples': 21748224, 'steps': 42476, 'loss/train': 1.295642614364624} +02/26/2022 08:34:29 - INFO - codeparrot_training - Step 42477: {'lr': 2.9697337512157586e-05, 'samples': 21748736, 'steps': 42477, 'loss/train': 2.0840306282043457} +02/26/2022 08:34:33 - INFO - codeparrot_training - Step 42478: {'lr': 2.96896030589382e-05, 'samples': 21749248, 'steps': 42478, 'loss/train': 2.4006764888763428} +02/26/2022 08:34:40 - INFO - codeparrot_training - Step 42479: {'lr': 2.9681869549458617e-05, 'samples': 21749760, 'steps': 42479, 'loss/train': 1.8804248571395874} +02/26/2022 08:34:43 - INFO - codeparrot_training - Step 42480: {'lr': 2.967413698375196e-05, 'samples': 21750272, 'steps': 42480, 'loss/train': 2.096904993057251} +02/26/2022 08:34:49 - INFO - codeparrot_training - Step 42481: {'lr': 2.9666405361851335e-05, 'samples': 21750784, 'steps': 42481, 'loss/train': 0.3582897484302521} +02/26/2022 08:34:52 - INFO - codeparrot_training - Step 42482: {'lr': 2.965867468378988e-05, 'samples': 21751296, 'steps': 42482, 'loss/train': 1.8962023258209229} +02/26/2022 08:34:58 - INFO - codeparrot_training - Step 42483: {'lr': 2.9650944949600632e-05, 'samples': 21751808, 'steps': 42483, 'loss/train': 1.4463261365890503} +02/26/2022 08:35:01 - INFO - codeparrot_training - Step 42484: {'lr': 2.964321615931684e-05, 'samples': 21752320, 'steps': 42484, 'loss/train': 3.009766101837158} +02/26/2022 08:35:07 - INFO - codeparrot_training - Step 42485: {'lr': 2.96354883129715e-05, 'samples': 21752832, 'steps': 42485, 'loss/train': 1.0305395126342773} +02/26/2022 08:35:10 - INFO - codeparrot_training - Step 42486: {'lr': 2.9627761410597782e-05, 'samples': 21753344, 'steps': 42486, 'loss/train': 1.8539543151855469} +02/26/2022 08:35:16 - INFO - codeparrot_training - Step 42487: {'lr': 2.962003545222869e-05, 'samples': 21753856, 'steps': 42487, 'loss/train': 2.012070655822754} +02/26/2022 08:35:19 - INFO - codeparrot_training - Step 42488: {'lr': 2.9612310437897472e-05, 'samples': 21754368, 'steps': 42488, 'loss/train': 0.9607176780700684} +02/26/2022 08:35:25 - INFO - codeparrot_training - Step 42489: {'lr': 2.960458636763705e-05, 'samples': 21754880, 'steps': 42489, 'loss/train': 1.3465746641159058} +02/26/2022 08:35:28 - INFO - codeparrot_training - Step 42490: {'lr': 2.9596863241480697e-05, 'samples': 21755392, 'steps': 42490, 'loss/train': 0.32310932874679565} +02/26/2022 08:35:34 - INFO - codeparrot_training - Step 42491: {'lr': 2.958914105946131e-05, 'samples': 21755904, 'steps': 42491, 'loss/train': 1.3257124423980713} +02/26/2022 08:35:38 - INFO - codeparrot_training - Step 42492: {'lr': 2.958141982161211e-05, 'samples': 21756416, 'steps': 42492, 'loss/train': 0.9360162019729614} +02/26/2022 08:35:43 - INFO - codeparrot_training - Step 42493: {'lr': 2.957369952796607e-05, 'samples': 21756928, 'steps': 42493, 'loss/train': 2.098499298095703} +02/26/2022 08:35:47 - INFO - codeparrot_training - Step 42494: {'lr': 2.9565980178556385e-05, 'samples': 21757440, 'steps': 42494, 'loss/train': 2.0931572914123535} +02/26/2022 08:35:54 - INFO - codeparrot_training - Step 42495: {'lr': 2.9558261773415946e-05, 'samples': 21757952, 'steps': 42495, 'loss/train': 1.463809847831726} +02/26/2022 08:35:57 - INFO - codeparrot_training - Step 42496: {'lr': 2.9550544312577975e-05, 'samples': 21758464, 'steps': 42496, 'loss/train': 1.48870849609375} +02/26/2022 08:36:03 - INFO - codeparrot_training - Step 42497: {'lr': 2.954282779607545e-05, 'samples': 21758976, 'steps': 42497, 'loss/train': 1.8429895639419556} +02/26/2022 08:36:06 - INFO - codeparrot_training - Step 42498: {'lr': 2.953511222394151e-05, 'samples': 21759488, 'steps': 42498, 'loss/train': 1.6548742055892944} +02/26/2022 08:36:12 - INFO - codeparrot_training - Step 42499: {'lr': 2.9527397596209066e-05, 'samples': 21760000, 'steps': 42499, 'loss/train': 2.055388927459717} +02/26/2022 08:36:15 - INFO - codeparrot_training - Step 42500: {'lr': 2.9519683912911265e-05, 'samples': 21760512, 'steps': 42500, 'loss/train': 1.2720177173614502} +02/26/2022 08:36:21 - INFO - codeparrot_training - Step 42501: {'lr': 2.951197117408111e-05, 'samples': 21761024, 'steps': 42501, 'loss/train': 0.6935843229293823} +02/26/2022 08:36:24 - INFO - codeparrot_training - Step 42502: {'lr': 2.950425937975168e-05, 'samples': 21761536, 'steps': 42502, 'loss/train': 0.7947617173194885} +02/26/2022 08:36:31 - INFO - codeparrot_training - Step 42503: {'lr': 2.949654852995601e-05, 'samples': 21762048, 'steps': 42503, 'loss/train': 2.2537436485290527} +02/26/2022 08:36:34 - INFO - codeparrot_training - Step 42504: {'lr': 2.9488838624727092e-05, 'samples': 21762560, 'steps': 42504, 'loss/train': 0.44738835096359253} +02/26/2022 08:36:37 - INFO - codeparrot_training - Step 42505: {'lr': 2.948112966409791e-05, 'samples': 21763072, 'steps': 42505, 'loss/train': 2.3323655128479004} +02/26/2022 08:36:45 - INFO - codeparrot_training - Step 42506: {'lr': 2.9473421648101627e-05, 'samples': 21763584, 'steps': 42506, 'loss/train': 0.7735550999641418} +02/26/2022 08:36:48 - INFO - codeparrot_training - Step 42507: {'lr': 2.9465714576771164e-05, 'samples': 21764096, 'steps': 42507, 'loss/train': 1.2459367513656616} +02/26/2022 08:36:54 - INFO - codeparrot_training - Step 42508: {'lr': 2.9458008450139546e-05, 'samples': 21764608, 'steps': 42508, 'loss/train': 2.6039538383483887} +02/26/2022 08:36:57 - INFO - codeparrot_training - Step 42509: {'lr': 2.945030326823975e-05, 'samples': 21765120, 'steps': 42509, 'loss/train': 1.5605965852737427} +02/26/2022 08:37:03 - INFO - codeparrot_training - Step 42510: {'lr': 2.9442599031104855e-05, 'samples': 21765632, 'steps': 42510, 'loss/train': 1.2071714401245117} +02/26/2022 08:37:06 - INFO - codeparrot_training - Step 42511: {'lr': 2.9434895738767843e-05, 'samples': 21766144, 'steps': 42511, 'loss/train': 1.2468230724334717} +02/26/2022 08:37:12 - INFO - codeparrot_training - Step 42512: {'lr': 2.942719339126171e-05, 'samples': 21766656, 'steps': 42512, 'loss/train': 1.1582939624786377} +02/26/2022 08:37:15 - INFO - codeparrot_training - Step 42513: {'lr': 2.9419491988619374e-05, 'samples': 21767168, 'steps': 42513, 'loss/train': 2.4870450496673584} +02/26/2022 08:37:21 - INFO - codeparrot_training - Step 42514: {'lr': 2.941179153087395e-05, 'samples': 21767680, 'steps': 42514, 'loss/train': 1.1085726022720337} +02/26/2022 08:37:24 - INFO - codeparrot_training - Step 42515: {'lr': 2.940409201805838e-05, 'samples': 21768192, 'steps': 42515, 'loss/train': 2.046430826187134} +02/26/2022 08:37:31 - INFO - codeparrot_training - Step 42516: {'lr': 2.9396393450205617e-05, 'samples': 21768704, 'steps': 42516, 'loss/train': 2.6757395267486572} +02/26/2022 08:37:37 - INFO - codeparrot_training - Step 42517: {'lr': 2.9388695827348598e-05, 'samples': 21769216, 'steps': 42517, 'loss/train': 1.5505820512771606} +02/26/2022 08:37:40 - INFO - codeparrot_training - Step 42518: {'lr': 2.9380999149520417e-05, 'samples': 21769728, 'steps': 42518, 'loss/train': 1.9852464199066162} +02/26/2022 08:37:44 - INFO - codeparrot_training - Step 42519: {'lr': 2.9373303416753983e-05, 'samples': 21770240, 'steps': 42519, 'loss/train': 1.8253002166748047} +02/26/2022 08:37:49 - INFO - codeparrot_training - Step 42520: {'lr': 2.9365608629082246e-05, 'samples': 21770752, 'steps': 42520, 'loss/train': 1.0893845558166504} +02/26/2022 08:37:55 - INFO - codeparrot_training - Step 42521: {'lr': 2.9357914786538153e-05, 'samples': 21771264, 'steps': 42521, 'loss/train': 2.7885308265686035} +02/26/2022 08:37:58 - INFO - codeparrot_training - Step 42522: {'lr': 2.9350221889154733e-05, 'samples': 21771776, 'steps': 42522, 'loss/train': 2.164459228515625} +02/26/2022 08:38:04 - INFO - codeparrot_training - Step 42523: {'lr': 2.93425299369649e-05, 'samples': 21772288, 'steps': 42523, 'loss/train': 1.3384541273117065} +02/26/2022 08:38:07 - INFO - codeparrot_training - Step 42524: {'lr': 2.933483893000158e-05, 'samples': 21772800, 'steps': 42524, 'loss/train': 1.1142393350601196} +02/26/2022 08:38:14 - INFO - codeparrot_training - Step 42525: {'lr': 2.932714886829771e-05, 'samples': 21773312, 'steps': 42525, 'loss/train': 1.8720353841781616} +02/26/2022 08:38:18 - INFO - codeparrot_training - Step 42526: {'lr': 2.9319459751886323e-05, 'samples': 21773824, 'steps': 42526, 'loss/train': 0.49738532304763794} +02/26/2022 08:38:23 - INFO - codeparrot_training - Step 42527: {'lr': 2.9311771580800284e-05, 'samples': 21774336, 'steps': 42527, 'loss/train': 1.9495829343795776} +02/26/2022 08:38:27 - INFO - codeparrot_training - Step 42528: {'lr': 2.930408435507248e-05, 'samples': 21774848, 'steps': 42528, 'loss/train': 1.5275200605392456} +02/26/2022 08:38:32 - INFO - codeparrot_training - Step 42529: {'lr': 2.9296398074736025e-05, 'samples': 21775360, 'steps': 42529, 'loss/train': 2.031266689300537} +02/26/2022 08:38:36 - INFO - codeparrot_training - Step 42530: {'lr': 2.9288712739823586e-05, 'samples': 21775872, 'steps': 42530, 'loss/train': 2.146622896194458} +02/26/2022 08:38:41 - INFO - codeparrot_training - Step 42531: {'lr': 2.9281028350368304e-05, 'samples': 21776384, 'steps': 42531, 'loss/train': 1.7802083492279053} +02/26/2022 08:38:45 - INFO - codeparrot_training - Step 42532: {'lr': 2.9273344906402933e-05, 'samples': 21776896, 'steps': 42532, 'loss/train': 0.5005853772163391} +02/26/2022 08:38:50 - INFO - codeparrot_training - Step 42533: {'lr': 2.926566240796058e-05, 'samples': 21777408, 'steps': 42533, 'loss/train': 2.0109593868255615} +02/26/2022 08:38:54 - INFO - codeparrot_training - Step 42534: {'lr': 2.9257980855073918e-05, 'samples': 21777920, 'steps': 42534, 'loss/train': 1.67629873752594} +02/26/2022 08:38:59 - INFO - codeparrot_training - Step 42535: {'lr': 2.925030024777603e-05, 'samples': 21778432, 'steps': 42535, 'loss/train': 0.08264874666929245} +02/26/2022 08:39:03 - INFO - codeparrot_training - Step 42536: {'lr': 2.9242620586099723e-05, 'samples': 21778944, 'steps': 42536, 'loss/train': 2.4621009826660156} +02/26/2022 08:39:08 - INFO - codeparrot_training - Step 42537: {'lr': 2.923494187007797e-05, 'samples': 21779456, 'steps': 42537, 'loss/train': 1.8159173727035522} +02/26/2022 08:39:12 - INFO - codeparrot_training - Step 42538: {'lr': 2.9227264099743634e-05, 'samples': 21779968, 'steps': 42538, 'loss/train': 1.6453264951705933} +02/26/2022 08:39:17 - INFO - codeparrot_training - Step 42539: {'lr': 2.9219587275129578e-05, 'samples': 21780480, 'steps': 42539, 'loss/train': 1.9699792861938477} +02/26/2022 08:39:21 - INFO - codeparrot_training - Step 42540: {'lr': 2.9211911396268664e-05, 'samples': 21780992, 'steps': 42540, 'loss/train': 2.738023281097412} +02/26/2022 08:39:28 - INFO - codeparrot_training - Step 42541: {'lr': 2.9204236463193868e-05, 'samples': 21781504, 'steps': 42541, 'loss/train': 2.03415584564209} +02/26/2022 08:39:31 - INFO - codeparrot_training - Step 42542: {'lr': 2.9196562475938022e-05, 'samples': 21782016, 'steps': 42542, 'loss/train': 1.325456976890564} +02/26/2022 08:39:37 - INFO - codeparrot_training - Step 42543: {'lr': 2.9188889434533988e-05, 'samples': 21782528, 'steps': 42543, 'loss/train': 1.7245045900344849} +02/26/2022 08:39:40 - INFO - codeparrot_training - Step 42544: {'lr': 2.9181217339014577e-05, 'samples': 21783040, 'steps': 42544, 'loss/train': 1.2913734912872314} +02/26/2022 08:39:46 - INFO - codeparrot_training - Step 42545: {'lr': 2.917354618941276e-05, 'samples': 21783552, 'steps': 42545, 'loss/train': 1.4345474243164062} +02/26/2022 08:39:49 - INFO - codeparrot_training - Step 42546: {'lr': 2.916587598576134e-05, 'samples': 21784064, 'steps': 42546, 'loss/train': 2.3989639282226562} +02/26/2022 08:39:55 - INFO - codeparrot_training - Step 42547: {'lr': 2.915820672809319e-05, 'samples': 21784576, 'steps': 42547, 'loss/train': 1.8241488933563232} +02/26/2022 08:39:58 - INFO - codeparrot_training - Step 42548: {'lr': 2.9150538416441135e-05, 'samples': 21785088, 'steps': 42548, 'loss/train': 1.5071810483932495} +02/26/2022 08:40:04 - INFO - codeparrot_training - Step 42549: {'lr': 2.914287105083807e-05, 'samples': 21785600, 'steps': 42549, 'loss/train': 1.6656205654144287} +02/26/2022 08:40:07 - INFO - codeparrot_training - Step 42550: {'lr': 2.9135204631316776e-05, 'samples': 21786112, 'steps': 42550, 'loss/train': 2.246217727661133} +02/26/2022 08:40:15 - INFO - codeparrot_training - Step 42551: {'lr': 2.912753915791022e-05, 'samples': 21786624, 'steps': 42551, 'loss/train': 1.5617811679840088} +02/26/2022 08:40:18 - INFO - codeparrot_training - Step 42552: {'lr': 2.911987463065105e-05, 'samples': 21787136, 'steps': 42552, 'loss/train': 1.8169063329696655} +02/26/2022 08:40:24 - INFO - codeparrot_training - Step 42553: {'lr': 2.9112211049572263e-05, 'samples': 21787648, 'steps': 42553, 'loss/train': 0.3276555836200714} +02/26/2022 08:40:27 - INFO - codeparrot_training - Step 42554: {'lr': 2.9104548414706555e-05, 'samples': 21788160, 'steps': 42554, 'loss/train': 1.6070188283920288} +02/26/2022 08:40:33 - INFO - codeparrot_training - Step 42555: {'lr': 2.9096886726086957e-05, 'samples': 21788672, 'steps': 42555, 'loss/train': 0.46988704800605774} +02/26/2022 08:40:36 - INFO - codeparrot_training - Step 42556: {'lr': 2.9089225983746028e-05, 'samples': 21789184, 'steps': 42556, 'loss/train': 0.986107349395752} +02/26/2022 08:40:42 - INFO - codeparrot_training - Step 42557: {'lr': 2.908156618771676e-05, 'samples': 21789696, 'steps': 42557, 'loss/train': 1.9635928869247437} +02/26/2022 08:40:45 - INFO - codeparrot_training - Step 42558: {'lr': 2.9073907338031864e-05, 'samples': 21790208, 'steps': 42558, 'loss/train': 2.283731698989868} +02/26/2022 08:40:51 - INFO - codeparrot_training - Step 42559: {'lr': 2.9066249434724275e-05, 'samples': 21790720, 'steps': 42559, 'loss/train': 1.7113900184631348} +02/26/2022 08:40:54 - INFO - codeparrot_training - Step 42560: {'lr': 2.9058592477826635e-05, 'samples': 21791232, 'steps': 42560, 'loss/train': 0.26361116766929626} +02/26/2022 08:41:02 - INFO - codeparrot_training - Step 42561: {'lr': 2.905093646737189e-05, 'samples': 21791744, 'steps': 42561, 'loss/train': 1.330338716506958} +02/26/2022 08:41:05 - INFO - codeparrot_training - Step 42562: {'lr': 2.9043281403392768e-05, 'samples': 21792256, 'steps': 42562, 'loss/train': 1.6504161357879639} +02/26/2022 08:41:11 - INFO - codeparrot_training - Step 42563: {'lr': 2.9035627285922017e-05, 'samples': 21792768, 'steps': 42563, 'loss/train': 1.2182122468948364} +02/26/2022 08:41:14 - INFO - codeparrot_training - Step 42564: {'lr': 2.9027974114992528e-05, 'samples': 21793280, 'steps': 42564, 'loss/train': 2.004009246826172} +02/26/2022 08:41:19 - INFO - codeparrot_training - Step 42565: {'lr': 2.9020321890637026e-05, 'samples': 21793792, 'steps': 42565, 'loss/train': 2.345698356628418} +02/26/2022 08:41:23 - INFO - codeparrot_training - Step 42566: {'lr': 2.9012670612888286e-05, 'samples': 21794304, 'steps': 42566, 'loss/train': 1.1215437650680542} +02/26/2022 08:41:29 - INFO - codeparrot_training - Step 42567: {'lr': 2.9005020281779065e-05, 'samples': 21794816, 'steps': 42567, 'loss/train': 0.8166053295135498} +02/26/2022 08:41:32 - INFO - codeparrot_training - Step 42568: {'lr': 2.8997370897342197e-05, 'samples': 21795328, 'steps': 42568, 'loss/train': 1.7318686246871948} +02/26/2022 08:41:37 - INFO - codeparrot_training - Step 42569: {'lr': 2.8989722459610402e-05, 'samples': 21795840, 'steps': 42569, 'loss/train': 1.941198468208313} +02/26/2022 08:41:41 - INFO - codeparrot_training - Step 42570: {'lr': 2.898207496861649e-05, 'samples': 21796352, 'steps': 42570, 'loss/train': 1.4996109008789062} +02/26/2022 08:41:46 - INFO - codeparrot_training - Step 42571: {'lr': 2.8974428424393127e-05, 'samples': 21796864, 'steps': 42571, 'loss/train': 2.7091126441955566} +02/26/2022 08:41:50 - INFO - codeparrot_training - Step 42572: {'lr': 2.896678282697318e-05, 'samples': 21797376, 'steps': 42572, 'loss/train': 2.3050708770751953} +02/26/2022 08:41:55 - INFO - codeparrot_training - Step 42573: {'lr': 2.8959138176389342e-05, 'samples': 21797888, 'steps': 42573, 'loss/train': 2.0634372234344482} +02/26/2022 08:41:59 - INFO - codeparrot_training - Step 42574: {'lr': 2.8951494472674365e-05, 'samples': 21798400, 'steps': 42574, 'loss/train': 1.5202707052230835} +02/26/2022 08:42:05 - INFO - codeparrot_training - Step 42575: {'lr': 2.8943851715860946e-05, 'samples': 21798912, 'steps': 42575, 'loss/train': 0.830349862575531} +02/26/2022 08:42:08 - INFO - codeparrot_training - Step 42576: {'lr': 2.893620990598192e-05, 'samples': 21799424, 'steps': 42576, 'loss/train': 1.5002254247665405} +02/26/2022 08:42:12 - INFO - codeparrot_training - Step 42577: {'lr': 2.892856904306998e-05, 'samples': 21799936, 'steps': 42577, 'loss/train': 0.6080021262168884} +02/26/2022 08:42:19 - INFO - codeparrot_training - Step 42578: {'lr': 2.8920929127157852e-05, 'samples': 21800448, 'steps': 42578, 'loss/train': 0.7348437309265137} +02/26/2022 08:42:23 - INFO - codeparrot_training - Step 42579: {'lr': 2.8913290158278232e-05, 'samples': 21800960, 'steps': 42579, 'loss/train': 1.9319316148757935} +02/26/2022 08:42:28 - INFO - codeparrot_training - Step 42580: {'lr': 2.89056521364639e-05, 'samples': 21801472, 'steps': 42580, 'loss/train': 2.409445285797119} +02/26/2022 08:42:32 - INFO - codeparrot_training - Step 42581: {'lr': 2.889801506174755e-05, 'samples': 21801984, 'steps': 42581, 'loss/train': 0.8004657030105591} +02/26/2022 08:42:37 - INFO - codeparrot_training - Step 42582: {'lr': 2.889037893416191e-05, 'samples': 21802496, 'steps': 42582, 'loss/train': 1.27082359790802} +02/26/2022 08:42:41 - INFO - codeparrot_training - Step 42583: {'lr': 2.8882743753739615e-05, 'samples': 21803008, 'steps': 42583, 'loss/train': 0.9892042279243469} +02/26/2022 08:42:46 - INFO - codeparrot_training - Step 42584: {'lr': 2.8875109520513505e-05, 'samples': 21803520, 'steps': 42584, 'loss/train': 0.99040687084198} +02/26/2022 08:42:50 - INFO - codeparrot_training - Step 42585: {'lr': 2.8867476234516134e-05, 'samples': 21804032, 'steps': 42585, 'loss/train': 2.163007974624634} +02/26/2022 08:42:55 - INFO - codeparrot_training - Step 42586: {'lr': 2.885984389578039e-05, 'samples': 21804544, 'steps': 42586, 'loss/train': 0.9899985194206238} +02/26/2022 08:42:59 - INFO - codeparrot_training - Step 42587: {'lr': 2.8852212504338752e-05, 'samples': 21805056, 'steps': 42587, 'loss/train': 1.6692215204238892} +02/26/2022 08:43:06 - INFO - codeparrot_training - Step 42588: {'lr': 2.8844582060224055e-05, 'samples': 21805568, 'steps': 42588, 'loss/train': 0.6595707535743713} +02/26/2022 08:43:09 - INFO - codeparrot_training - Step 42589: {'lr': 2.883695256346891e-05, 'samples': 21806080, 'steps': 42589, 'loss/train': 1.915860652923584} +02/26/2022 08:43:15 - INFO - codeparrot_training - Step 42590: {'lr': 2.8829324014106122e-05, 'samples': 21806592, 'steps': 42590, 'loss/train': 0.5317558646202087} +02/26/2022 08:43:19 - INFO - codeparrot_training - Step 42591: {'lr': 2.8821696412168168e-05, 'samples': 21807104, 'steps': 42591, 'loss/train': 1.5545148849487305} +02/26/2022 08:43:24 - INFO - codeparrot_training - Step 42592: {'lr': 2.881406975768791e-05, 'samples': 21807616, 'steps': 42592, 'loss/train': 2.173269033432007} +02/26/2022 08:43:28 - INFO - codeparrot_training - Step 42593: {'lr': 2.8806444050697876e-05, 'samples': 21808128, 'steps': 42593, 'loss/train': 2.067986488342285} +02/26/2022 08:43:34 - INFO - codeparrot_training - Step 42594: {'lr': 2.8798819291230905e-05, 'samples': 21808640, 'steps': 42594, 'loss/train': 0.9304419755935669} +02/26/2022 08:43:37 - INFO - codeparrot_training - Step 42595: {'lr': 2.8791195479319437e-05, 'samples': 21809152, 'steps': 42595, 'loss/train': 2.0435805320739746} +02/26/2022 08:43:43 - INFO - codeparrot_training - Step 42596: {'lr': 2.878357261499631e-05, 'samples': 21809664, 'steps': 42596, 'loss/train': 1.7889502048492432} +02/26/2022 08:43:46 - INFO - codeparrot_training - Step 42597: {'lr': 2.8775950698294084e-05, 'samples': 21810176, 'steps': 42597, 'loss/train': 1.5202713012695312} +02/26/2022 08:43:52 - INFO - codeparrot_training - Step 42598: {'lr': 2.876832972924545e-05, 'samples': 21810688, 'steps': 42598, 'loss/train': 0.14154919981956482} +02/26/2022 08:43:55 - INFO - codeparrot_training - Step 42599: {'lr': 2.876070970788308e-05, 'samples': 21811200, 'steps': 42599, 'loss/train': 1.1133460998535156} +02/26/2022 08:44:02 - INFO - codeparrot_training - Step 42600: {'lr': 2.875309063423956e-05, 'samples': 21811712, 'steps': 42600, 'loss/train': 1.5177854299545288} +02/26/2022 08:44:06 - INFO - codeparrot_training - Step 42601: {'lr': 2.87454725083475e-05, 'samples': 21812224, 'steps': 42601, 'loss/train': 1.5799976587295532} +02/26/2022 08:44:11 - INFO - codeparrot_training - Step 42602: {'lr': 2.873785533023962e-05, 'samples': 21812736, 'steps': 42602, 'loss/train': 8.020909309387207} +02/26/2022 08:44:15 - INFO - codeparrot_training - Step 42603: {'lr': 2.8730239099948514e-05, 'samples': 21813248, 'steps': 42603, 'loss/train': 1.5111581087112427} +02/26/2022 08:44:20 - INFO - codeparrot_training - Step 42604: {'lr': 2.8722623817506786e-05, 'samples': 21813760, 'steps': 42604, 'loss/train': 2.1292598247528076} +02/26/2022 08:44:24 - INFO - codeparrot_training - Step 42605: {'lr': 2.8715009482947056e-05, 'samples': 21814272, 'steps': 42605, 'loss/train': 1.3609613180160522} +02/26/2022 08:44:29 - INFO - codeparrot_training - Step 42606: {'lr': 2.870739609630199e-05, 'samples': 21814784, 'steps': 42606, 'loss/train': 1.2966855764389038} +02/26/2022 08:44:33 - INFO - codeparrot_training - Step 42607: {'lr': 2.8699783657604172e-05, 'samples': 21815296, 'steps': 42607, 'loss/train': 1.4713834524154663} +02/26/2022 08:44:38 - INFO - codeparrot_training - Step 42608: {'lr': 2.8692172166886215e-05, 'samples': 21815808, 'steps': 42608, 'loss/train': 0.7541775703430176} +02/26/2022 08:44:42 - INFO - codeparrot_training - Step 42609: {'lr': 2.8684561624180733e-05, 'samples': 21816320, 'steps': 42609, 'loss/train': 1.8889870643615723} +02/26/2022 08:44:49 - INFO - codeparrot_training - Step 42610: {'lr': 2.8676952029520225e-05, 'samples': 21816832, 'steps': 42610, 'loss/train': 1.6704007387161255} +02/26/2022 08:44:54 - INFO - codeparrot_training - Step 42611: {'lr': 2.8669343382937474e-05, 'samples': 21817344, 'steps': 42611, 'loss/train': 1.8519840240478516} +02/26/2022 08:44:58 - INFO - codeparrot_training - Step 42612: {'lr': 2.8661735684464952e-05, 'samples': 21817856, 'steps': 42612, 'loss/train': 2.4465653896331787} +02/26/2022 08:45:04 - INFO - codeparrot_training - Step 42613: {'lr': 2.865412893413527e-05, 'samples': 21818368, 'steps': 42613, 'loss/train': 1.7758008241653442} +02/26/2022 08:45:07 - INFO - codeparrot_training - Step 42614: {'lr': 2.864652313198096e-05, 'samples': 21818880, 'steps': 42614, 'loss/train': 2.6221158504486084} +02/26/2022 08:45:13 - INFO - codeparrot_training - Step 42615: {'lr': 2.863891827803472e-05, 'samples': 21819392, 'steps': 42615, 'loss/train': 1.1948521137237549} +02/26/2022 08:45:16 - INFO - codeparrot_training - Step 42616: {'lr': 2.863131437232905e-05, 'samples': 21819904, 'steps': 42616, 'loss/train': 1.8522579669952393} +02/26/2022 08:45:22 - INFO - codeparrot_training - Step 42617: {'lr': 2.862371141489653e-05, 'samples': 21820416, 'steps': 42617, 'loss/train': 1.9316136837005615} +02/26/2022 08:45:25 - INFO - codeparrot_training - Step 42618: {'lr': 2.8616109405769697e-05, 'samples': 21820928, 'steps': 42618, 'loss/train': 2.8018152713775635} +02/26/2022 08:45:29 - INFO - codeparrot_training - Step 42619: {'lr': 2.8608508344981216e-05, 'samples': 21821440, 'steps': 42619, 'loss/train': 0.1489679366350174} +02/26/2022 08:45:34 - INFO - codeparrot_training - Step 42620: {'lr': 2.860090823256359e-05, 'samples': 21821952, 'steps': 42620, 'loss/train': 1.258629322052002} +02/26/2022 08:45:38 - INFO - codeparrot_training - Step 42621: {'lr': 2.8593309068549344e-05, 'samples': 21822464, 'steps': 42621, 'loss/train': 1.5470376014709473} +02/26/2022 08:45:43 - INFO - codeparrot_training - Step 42622: {'lr': 2.8585710852971015e-05, 'samples': 21822976, 'steps': 42622, 'loss/train': 2.7234253883361816} +02/26/2022 08:45:47 - INFO - codeparrot_training - Step 42623: {'lr': 2.8578113585861264e-05, 'samples': 21823488, 'steps': 42623, 'loss/train': 1.8056012392044067} +02/26/2022 08:45:54 - INFO - codeparrot_training - Step 42624: {'lr': 2.8570517267252488e-05, 'samples': 21824000, 'steps': 42624, 'loss/train': 2.4880359172821045} +02/26/2022 08:45:58 - INFO - codeparrot_training - Step 42625: {'lr': 2.8562921897177408e-05, 'samples': 21824512, 'steps': 42625, 'loss/train': 1.9545180797576904} +02/26/2022 08:46:03 - INFO - codeparrot_training - Step 42626: {'lr': 2.8555327475668358e-05, 'samples': 21825024, 'steps': 42626, 'loss/train': 1.5446813106536865} +02/26/2022 08:46:06 - INFO - codeparrot_training - Step 42627: {'lr': 2.8547734002758035e-05, 'samples': 21825536, 'steps': 42627, 'loss/train': 1.217718482017517} +02/26/2022 08:46:12 - INFO - codeparrot_training - Step 42628: {'lr': 2.8540141478478832e-05, 'samples': 21826048, 'steps': 42628, 'loss/train': 1.8193233013153076} +02/26/2022 08:46:15 - INFO - codeparrot_training - Step 42629: {'lr': 2.853254990286347e-05, 'samples': 21826560, 'steps': 42629, 'loss/train': 1.8696283102035522} +02/26/2022 08:46:21 - INFO - codeparrot_training - Step 42630: {'lr': 2.85249592759442e-05, 'samples': 21827072, 'steps': 42630, 'loss/train': 2.6454615592956543} +02/26/2022 08:46:26 - INFO - codeparrot_training - Step 42631: {'lr': 2.851736959775375e-05, 'samples': 21827584, 'steps': 42631, 'loss/train': 1.6382313966751099} +02/26/2022 08:46:30 - INFO - codeparrot_training - Step 42632: {'lr': 2.8509780868324507e-05, 'samples': 21828096, 'steps': 42632, 'loss/train': 0.9552024006843567} +02/26/2022 08:46:37 - INFO - codeparrot_training - Step 42633: {'lr': 2.8502193087689144e-05, 'samples': 21828608, 'steps': 42633, 'loss/train': 2.2013673782348633} +02/26/2022 08:46:41 - INFO - codeparrot_training - Step 42634: {'lr': 2.8494606255879935e-05, 'samples': 21829120, 'steps': 42634, 'loss/train': 1.4669660329818726} +02/26/2022 08:46:46 - INFO - codeparrot_training - Step 42635: {'lr': 2.848702037292955e-05, 'samples': 21829632, 'steps': 42635, 'loss/train': 1.0452195405960083} +02/26/2022 08:46:49 - INFO - codeparrot_training - Step 42636: {'lr': 2.8479435438870382e-05, 'samples': 21830144, 'steps': 42636, 'loss/train': 1.7292696237564087} +02/26/2022 08:46:55 - INFO - codeparrot_training - Step 42637: {'lr': 2.8471851453735042e-05, 'samples': 21830656, 'steps': 42637, 'loss/train': 1.498539924621582} +02/26/2022 08:46:58 - INFO - codeparrot_training - Step 42638: {'lr': 2.8464268417555923e-05, 'samples': 21831168, 'steps': 42638, 'loss/train': 2.4578187465667725} +02/26/2022 08:47:04 - INFO - codeparrot_training - Step 42639: {'lr': 2.845668633036555e-05, 'samples': 21831680, 'steps': 42639, 'loss/train': 1.7947949171066284} +02/26/2022 08:47:08 - INFO - codeparrot_training - Step 42640: {'lr': 2.8449105192196318e-05, 'samples': 21832192, 'steps': 42640, 'loss/train': 2.132197380065918} +02/26/2022 08:47:13 - INFO - codeparrot_training - Step 42641: {'lr': 2.844152500308081e-05, 'samples': 21832704, 'steps': 42641, 'loss/train': 2.067821502685547} +02/26/2022 08:47:16 - INFO - codeparrot_training - Step 42642: {'lr': 2.8433945763051472e-05, 'samples': 21833216, 'steps': 42642, 'loss/train': 2.615954637527466} +02/26/2022 08:47:24 - INFO - codeparrot_training - Step 42643: {'lr': 2.842636747214075e-05, 'samples': 21833728, 'steps': 42643, 'loss/train': 0.8795211911201477} +02/26/2022 08:47:27 - INFO - codeparrot_training - Step 42644: {'lr': 2.8418790130381067e-05, 'samples': 21834240, 'steps': 42644, 'loss/train': 2.1950387954711914} +02/26/2022 08:47:33 - INFO - codeparrot_training - Step 42645: {'lr': 2.8411213737805e-05, 'samples': 21834752, 'steps': 42645, 'loss/train': 1.9486632347106934} +02/26/2022 08:47:36 - INFO - codeparrot_training - Step 42646: {'lr': 2.8403638294444896e-05, 'samples': 21835264, 'steps': 42646, 'loss/train': 2.1200315952301025} +02/26/2022 08:47:42 - INFO - codeparrot_training - Step 42647: {'lr': 2.8396063800333246e-05, 'samples': 21835776, 'steps': 42647, 'loss/train': 2.3895275592803955} +02/26/2022 08:47:45 - INFO - codeparrot_training - Step 42648: {'lr': 2.838849025550244e-05, 'samples': 21836288, 'steps': 42648, 'loss/train': 0.36600908637046814} +02/26/2022 08:47:51 - INFO - codeparrot_training - Step 42649: {'lr': 2.8380917659985045e-05, 'samples': 21836800, 'steps': 42649, 'loss/train': 1.7769306898117065} +02/26/2022 08:47:54 - INFO - codeparrot_training - Step 42650: {'lr': 2.8373346013813417e-05, 'samples': 21837312, 'steps': 42650, 'loss/train': 1.5565471649169922} +02/26/2022 08:48:00 - INFO - codeparrot_training - Step 42651: {'lr': 2.8365775317020004e-05, 'samples': 21837824, 'steps': 42651, 'loss/train': 1.6738334894180298} +02/26/2022 08:48:03 - INFO - codeparrot_training - Step 42652: {'lr': 2.8358205569637168e-05, 'samples': 21838336, 'steps': 42652, 'loss/train': 0.7688226699829102} +02/26/2022 08:48:11 - INFO - codeparrot_training - Step 42653: {'lr': 2.835063677169744e-05, 'samples': 21838848, 'steps': 42653, 'loss/train': 0.896828293800354} +02/26/2022 08:48:14 - INFO - codeparrot_training - Step 42654: {'lr': 2.834306892323324e-05, 'samples': 21839360, 'steps': 42654, 'loss/train': 1.6306073665618896} +02/26/2022 08:48:20 - INFO - codeparrot_training - Step 42655: {'lr': 2.8335502024276925e-05, 'samples': 21839872, 'steps': 42655, 'loss/train': 1.7563389539718628} +02/26/2022 08:48:23 - INFO - codeparrot_training - Step 42656: {'lr': 2.8327936074860865e-05, 'samples': 21840384, 'steps': 42656, 'loss/train': 1.9090511798858643} +02/26/2022 08:48:29 - INFO - codeparrot_training - Step 42657: {'lr': 2.8320371075017613e-05, 'samples': 21840896, 'steps': 42657, 'loss/train': 1.8975558280944824} +02/26/2022 08:48:32 - INFO - codeparrot_training - Step 42658: {'lr': 2.831280702477951e-05, 'samples': 21841408, 'steps': 42658, 'loss/train': 2.1529974937438965} +02/26/2022 08:48:38 - INFO - codeparrot_training - Step 42659: {'lr': 2.830524392417888e-05, 'samples': 21841920, 'steps': 42659, 'loss/train': 1.7760286331176758} +02/26/2022 08:48:41 - INFO - codeparrot_training - Step 42660: {'lr': 2.829768177324829e-05, 'samples': 21842432, 'steps': 42660, 'loss/train': 1.5817909240722656} +02/26/2022 08:48:46 - INFO - codeparrot_training - Step 42661: {'lr': 2.8290120572019933e-05, 'samples': 21842944, 'steps': 42661, 'loss/train': 1.4045246839523315} +02/26/2022 08:48:50 - INFO - codeparrot_training - Step 42662: {'lr': 2.828256032052634e-05, 'samples': 21843456, 'steps': 42662, 'loss/train': 1.3400585651397705} +02/26/2022 08:48:55 - INFO - codeparrot_training - Step 42663: {'lr': 2.8275001018799817e-05, 'samples': 21843968, 'steps': 42663, 'loss/train': 2.821943521499634} +02/26/2022 08:48:59 - INFO - codeparrot_training - Step 42664: {'lr': 2.8267442666872893e-05, 'samples': 21844480, 'steps': 42664, 'loss/train': 2.115737199783325} +02/26/2022 08:49:04 - INFO - codeparrot_training - Step 42665: {'lr': 2.825988526477771e-05, 'samples': 21844992, 'steps': 42665, 'loss/train': 0.9830888509750366} +02/26/2022 08:49:08 - INFO - codeparrot_training - Step 42666: {'lr': 2.825232881254686e-05, 'samples': 21845504, 'steps': 42666, 'loss/train': 0.5918101668357849} +02/26/2022 08:49:13 - INFO - codeparrot_training - Step 42667: {'lr': 2.8244773310212522e-05, 'samples': 21846016, 'steps': 42667, 'loss/train': 0.917984127998352} +02/26/2022 08:49:17 - INFO - codeparrot_training - Step 42668: {'lr': 2.8237218757807297e-05, 'samples': 21846528, 'steps': 42668, 'loss/train': 1.7263184785842896} +02/26/2022 08:49:22 - INFO - codeparrot_training - Step 42669: {'lr': 2.8229665155363294e-05, 'samples': 21847040, 'steps': 42669, 'loss/train': 1.574794888496399} +02/26/2022 08:49:26 - INFO - codeparrot_training - Step 42670: {'lr': 2.8222112502913037e-05, 'samples': 21847552, 'steps': 42670, 'loss/train': 2.032003879547119} +02/26/2022 08:49:33 - INFO - codeparrot_training - Step 42671: {'lr': 2.8214560800488788e-05, 'samples': 21848064, 'steps': 42671, 'loss/train': 2.283432960510254} +02/26/2022 08:49:36 - INFO - codeparrot_training - Step 42672: {'lr': 2.8207010048122954e-05, 'samples': 21848576, 'steps': 42672, 'loss/train': 1.1152524948120117} +02/26/2022 08:49:42 - INFO - codeparrot_training - Step 42673: {'lr': 2.819946024584791e-05, 'samples': 21849088, 'steps': 42673, 'loss/train': 1.4223766326904297} +02/26/2022 08:49:46 - INFO - codeparrot_training - Step 42674: {'lr': 2.8191911393695923e-05, 'samples': 21849600, 'steps': 42674, 'loss/train': 2.281524896621704} +02/26/2022 08:49:51 - INFO - codeparrot_training - Step 42675: {'lr': 2.8184363491699285e-05, 'samples': 21850112, 'steps': 42675, 'loss/train': 1.5728832483291626} +02/26/2022 08:49:55 - INFO - codeparrot_training - Step 42676: {'lr': 2.8176816539890488e-05, 'samples': 21850624, 'steps': 42676, 'loss/train': 0.5886314511299133} +02/26/2022 08:50:00 - INFO - codeparrot_training - Step 42677: {'lr': 2.8169270538301733e-05, 'samples': 21851136, 'steps': 42677, 'loss/train': 1.7444263696670532} +02/26/2022 08:50:04 - INFO - codeparrot_training - Step 42678: {'lr': 2.816172548696541e-05, 'samples': 21851648, 'steps': 42678, 'loss/train': 2.145519495010376} +02/26/2022 08:50:09 - INFO - codeparrot_training - Step 42679: {'lr': 2.8154181385913747e-05, 'samples': 21852160, 'steps': 42679, 'loss/train': 2.729649782180786} +02/26/2022 08:50:13 - INFO - codeparrot_training - Step 42680: {'lr': 2.8146638235179213e-05, 'samples': 21852672, 'steps': 42680, 'loss/train': 2.2197585105895996} +02/26/2022 08:50:20 - INFO - codeparrot_training - Step 42681: {'lr': 2.8139096034794005e-05, 'samples': 21853184, 'steps': 42681, 'loss/train': 2.3298866748809814} +02/26/2022 08:50:23 - INFO - codeparrot_training - Step 42682: {'lr': 2.813155478479046e-05, 'samples': 21853696, 'steps': 42682, 'loss/train': 0.1754862368106842} +02/26/2022 08:50:29 - INFO - codeparrot_training - Step 42683: {'lr': 2.8124014485200827e-05, 'samples': 21854208, 'steps': 42683, 'loss/train': 1.8918489217758179} +02/26/2022 08:50:32 - INFO - codeparrot_training - Step 42684: {'lr': 2.811647513605753e-05, 'samples': 21854720, 'steps': 42684, 'loss/train': 1.54283607006073} +02/26/2022 08:50:38 - INFO - codeparrot_training - Step 42685: {'lr': 2.810893673739273e-05, 'samples': 21855232, 'steps': 42685, 'loss/train': 1.3746970891952515} +02/26/2022 08:50:41 - INFO - codeparrot_training - Step 42686: {'lr': 2.810139928923891e-05, 'samples': 21855744, 'steps': 42686, 'loss/train': 1.8520458936691284} +02/26/2022 08:50:47 - INFO - codeparrot_training - Step 42687: {'lr': 2.809386279162812e-05, 'samples': 21856256, 'steps': 42687, 'loss/train': 1.4545599222183228} +02/26/2022 08:50:50 - INFO - codeparrot_training - Step 42688: {'lr': 2.8086327244592815e-05, 'samples': 21856768, 'steps': 42688, 'loss/train': 1.3848674297332764} +02/26/2022 08:50:56 - INFO - codeparrot_training - Step 42689: {'lr': 2.8078792648165154e-05, 'samples': 21857280, 'steps': 42689, 'loss/train': 1.182730793952942} +02/26/2022 08:51:00 - INFO - codeparrot_training - Step 42690: {'lr': 2.8071259002377585e-05, 'samples': 21857792, 'steps': 42690, 'loss/train': 1.8035038709640503} +02/26/2022 08:51:07 - INFO - codeparrot_training - Step 42691: {'lr': 2.8063726307262172e-05, 'samples': 21858304, 'steps': 42691, 'loss/train': 7.538046836853027} +02/26/2022 08:51:11 - INFO - codeparrot_training - Step 42692: {'lr': 2.8056194562851355e-05, 'samples': 21858816, 'steps': 42692, 'loss/train': 0.4667845368385315} +02/26/2022 08:51:14 - INFO - codeparrot_training - Step 42693: {'lr': 2.8048663769177308e-05, 'samples': 21859328, 'steps': 42693, 'loss/train': 1.344470739364624} +02/26/2022 08:51:19 - INFO - codeparrot_training - Step 42694: {'lr': 2.804113392627225e-05, 'samples': 21859840, 'steps': 42694, 'loss/train': 2.0444798469543457} +02/26/2022 08:51:23 - INFO - codeparrot_training - Step 42695: {'lr': 2.8033605034168546e-05, 'samples': 21860352, 'steps': 42695, 'loss/train': 2.03246808052063} +02/26/2022 08:51:29 - INFO - codeparrot_training - Step 42696: {'lr': 2.8026077092898396e-05, 'samples': 21860864, 'steps': 42696, 'loss/train': 1.15717613697052} +02/26/2022 08:51:32 - INFO - codeparrot_training - Step 42697: {'lr': 2.8018550102494046e-05, 'samples': 21861376, 'steps': 42697, 'loss/train': 5.0372796058654785} +02/26/2022 08:51:38 - INFO - codeparrot_training - Step 42698: {'lr': 2.8011024062987693e-05, 'samples': 21861888, 'steps': 42698, 'loss/train': 1.5328001976013184} +02/26/2022 08:51:41 - INFO - codeparrot_training - Step 42699: {'lr': 2.8003498974411678e-05, 'samples': 21862400, 'steps': 42699, 'loss/train': 1.55319082736969} +02/26/2022 08:51:46 - INFO - codeparrot_training - Step 42700: {'lr': 2.7995974836798194e-05, 'samples': 21862912, 'steps': 42700, 'loss/train': 1.9834460020065308} +02/26/2022 08:51:50 - INFO - codeparrot_training - Step 42701: {'lr': 2.7988451650179435e-05, 'samples': 21863424, 'steps': 42701, 'loss/train': 1.7467377185821533} +02/26/2022 08:51:57 - INFO - codeparrot_training - Step 42702: {'lr': 2.7980929414587602e-05, 'samples': 21863936, 'steps': 42702, 'loss/train': 1.1268304586410522} +02/26/2022 08:52:01 - INFO - codeparrot_training - Step 42703: {'lr': 2.797340813005503e-05, 'samples': 21864448, 'steps': 42703, 'loss/train': 1.7960563898086548} +02/26/2022 08:52:06 - INFO - codeparrot_training - Step 42704: {'lr': 2.796588779661388e-05, 'samples': 21864960, 'steps': 42704, 'loss/train': 2.186213970184326} +02/26/2022 08:52:10 - INFO - codeparrot_training - Step 42705: {'lr': 2.795836841429636e-05, 'samples': 21865472, 'steps': 42705, 'loss/train': 1.7804704904556274} +02/26/2022 08:52:15 - INFO - codeparrot_training - Step 42706: {'lr': 2.795084998313463e-05, 'samples': 21865984, 'steps': 42706, 'loss/train': 1.4118626117706299} +02/26/2022 08:52:19 - INFO - codeparrot_training - Step 42707: {'lr': 2.7943332503161e-05, 'samples': 21866496, 'steps': 42707, 'loss/train': 1.3047691583633423} +02/26/2022 08:52:24 - INFO - codeparrot_training - Step 42708: {'lr': 2.793581597440764e-05, 'samples': 21867008, 'steps': 42708, 'loss/train': 1.8832931518554688} +02/26/2022 08:52:28 - INFO - codeparrot_training - Step 42709: {'lr': 2.792830039690672e-05, 'samples': 21867520, 'steps': 42709, 'loss/train': 2.0773541927337646} +02/26/2022 08:52:33 - INFO - codeparrot_training - Step 42710: {'lr': 2.7920785770690377e-05, 'samples': 21868032, 'steps': 42710, 'loss/train': 2.4615700244903564} +02/26/2022 08:52:36 - INFO - codeparrot_training - Step 42711: {'lr': 2.791327209579095e-05, 'samples': 21868544, 'steps': 42711, 'loss/train': 1.280531406402588} +02/26/2022 08:52:42 - INFO - codeparrot_training - Step 42712: {'lr': 2.790575937224052e-05, 'samples': 21869056, 'steps': 42712, 'loss/train': 1.0784783363342285} +02/26/2022 08:52:45 - INFO - codeparrot_training - Step 42713: {'lr': 2.7898247600071284e-05, 'samples': 21869568, 'steps': 42713, 'loss/train': 2.4117431640625} +02/26/2022 08:52:51 - INFO - codeparrot_training - Step 42714: {'lr': 2.7890736779315417e-05, 'samples': 21870080, 'steps': 42714, 'loss/train': 2.470004081726074} +02/26/2022 08:52:54 - INFO - codeparrot_training - Step 42715: {'lr': 2.788322691000514e-05, 'samples': 21870592, 'steps': 42715, 'loss/train': 1.7888002395629883} +02/26/2022 08:53:02 - INFO - codeparrot_training - Step 42716: {'lr': 2.787571799217259e-05, 'samples': 21871104, 'steps': 42716, 'loss/train': 1.6819690465927124} +02/26/2022 08:53:05 - INFO - codeparrot_training - Step 42717: {'lr': 2.786821002584991e-05, 'samples': 21871616, 'steps': 42717, 'loss/train': 2.0524652004241943} +02/26/2022 08:53:11 - INFO - codeparrot_training - Step 42718: {'lr': 2.7860703011069244e-05, 'samples': 21872128, 'steps': 42718, 'loss/train': 1.5147196054458618} +02/26/2022 08:53:14 - INFO - codeparrot_training - Step 42719: {'lr': 2.785319694786287e-05, 'samples': 21872640, 'steps': 42719, 'loss/train': 2.29386568069458} +02/26/2022 08:53:20 - INFO - codeparrot_training - Step 42720: {'lr': 2.784569183626276e-05, 'samples': 21873152, 'steps': 42720, 'loss/train': 0.7137149572372437} +02/26/2022 08:53:23 - INFO - codeparrot_training - Step 42721: {'lr': 2.783818767630131e-05, 'samples': 21873664, 'steps': 42721, 'loss/train': 2.816556692123413} +02/26/2022 08:53:29 - INFO - codeparrot_training - Step 42722: {'lr': 2.7830684468010403e-05, 'samples': 21874176, 'steps': 42722, 'loss/train': 0.13773326575756073} +02/26/2022 08:53:33 - INFO - codeparrot_training - Step 42723: {'lr': 2.7823182211422326e-05, 'samples': 21874688, 'steps': 42723, 'loss/train': 1.8043315410614014} +02/26/2022 08:53:38 - INFO - codeparrot_training - Step 42724: {'lr': 2.7815680906569162e-05, 'samples': 21875200, 'steps': 42724, 'loss/train': 2.102477550506592} +02/26/2022 08:53:42 - INFO - codeparrot_training - Step 42725: {'lr': 2.7808180553483164e-05, 'samples': 21875712, 'steps': 42725, 'loss/train': 1.5142573118209839} +02/26/2022 08:53:47 - INFO - codeparrot_training - Step 42726: {'lr': 2.7800681152196277e-05, 'samples': 21876224, 'steps': 42726, 'loss/train': 1.1307779550552368} +02/26/2022 08:53:50 - INFO - codeparrot_training - Step 42727: {'lr': 2.7793182702740753e-05, 'samples': 21876736, 'steps': 42727, 'loss/train': 0.04525814577937126} +02/26/2022 08:53:58 - INFO - codeparrot_training - Step 42728: {'lr': 2.7785685205148625e-05, 'samples': 21877248, 'steps': 42728, 'loss/train': 1.863305926322937} +02/26/2022 08:54:01 - INFO - codeparrot_training - Step 42729: {'lr': 2.777818865945217e-05, 'samples': 21877760, 'steps': 42729, 'loss/train': 1.04934823513031} +02/26/2022 08:54:07 - INFO - codeparrot_training - Step 42730: {'lr': 2.7770693065683277e-05, 'samples': 21878272, 'steps': 42730, 'loss/train': 1.5525736808776855} +02/26/2022 08:54:10 - INFO - codeparrot_training - Step 42731: {'lr': 2.776319842387423e-05, 'samples': 21878784, 'steps': 42731, 'loss/train': 2.3730316162109375} +02/26/2022 08:54:16 - INFO - codeparrot_training - Step 42732: {'lr': 2.775570473405703e-05, 'samples': 21879296, 'steps': 42732, 'loss/train': 1.4984279870986938} +02/26/2022 08:54:19 - INFO - codeparrot_training - Step 42733: {'lr': 2.7748211996263845e-05, 'samples': 21879808, 'steps': 42733, 'loss/train': 0.18546174466609955} +02/26/2022 08:54:25 - INFO - codeparrot_training - Step 42734: {'lr': 2.774072021052676e-05, 'samples': 21880320, 'steps': 42734, 'loss/train': 1.2692204713821411} +02/26/2022 08:54:28 - INFO - codeparrot_training - Step 42735: {'lr': 2.773322937687786e-05, 'samples': 21880832, 'steps': 42735, 'loss/train': 1.0544992685317993} +02/26/2022 08:54:34 - INFO - codeparrot_training - Step 42736: {'lr': 2.772573949534918e-05, 'samples': 21881344, 'steps': 42736, 'loss/train': 2.1529440879821777} +02/26/2022 08:54:37 - INFO - codeparrot_training - Step 42737: {'lr': 2.771825056597291e-05, 'samples': 21881856, 'steps': 42737, 'loss/train': 1.9256763458251953} +02/26/2022 08:54:45 - INFO - codeparrot_training - Step 42738: {'lr': 2.7710762588781053e-05, 'samples': 21882368, 'steps': 42738, 'loss/train': 2.49788761138916} +02/26/2022 08:54:50 - INFO - codeparrot_training - Step 42739: {'lr': 2.7703275563805697e-05, 'samples': 21882880, 'steps': 42739, 'loss/train': 1.7890676259994507} +02/26/2022 08:54:54 - INFO - codeparrot_training - Step 42740: {'lr': 2.7695789491078925e-05, 'samples': 21883392, 'steps': 42740, 'loss/train': 3.541902542114258} +02/26/2022 08:54:57 - INFO - codeparrot_training - Step 42741: {'lr': 2.7688304370632773e-05, 'samples': 21883904, 'steps': 42741, 'loss/train': 0.9804078340530396} +02/26/2022 08:55:03 - INFO - codeparrot_training - Step 42742: {'lr': 2.7680820202499373e-05, 'samples': 21884416, 'steps': 42742, 'loss/train': 2.0154688358306885} +02/26/2022 08:55:08 - INFO - codeparrot_training - Step 42743: {'lr': 2.7673336986710733e-05, 'samples': 21884928, 'steps': 42743, 'loss/train': 1.085662841796875} +02/26/2022 08:55:12 - INFO - codeparrot_training - Step 42744: {'lr': 2.766585472329894e-05, 'samples': 21885440, 'steps': 42744, 'loss/train': 2.0492982864379883} +02/26/2022 08:55:17 - INFO - codeparrot_training - Step 42745: {'lr': 2.7658373412295962e-05, 'samples': 21885952, 'steps': 42745, 'loss/train': 1.88246750831604} +02/26/2022 08:55:21 - INFO - codeparrot_training - Step 42746: {'lr': 2.7650893053733972e-05, 'samples': 21886464, 'steps': 42746, 'loss/train': 2.7981507778167725} +02/26/2022 08:55:28 - INFO - codeparrot_training - Step 42747: {'lr': 2.7643413647644945e-05, 'samples': 21886976, 'steps': 42747, 'loss/train': 1.7878522872924805} +02/26/2022 08:55:31 - INFO - codeparrot_training - Step 42748: {'lr': 2.7635935194060933e-05, 'samples': 21887488, 'steps': 42748, 'loss/train': 1.369206428527832} +02/26/2022 08:55:35 - INFO - codeparrot_training - Step 42749: {'lr': 2.762845769301389e-05, 'samples': 21888000, 'steps': 42749, 'loss/train': 1.4847517013549805} +02/26/2022 08:55:40 - INFO - codeparrot_training - Step 42750: {'lr': 2.762098114453601e-05, 'samples': 21888512, 'steps': 42750, 'loss/train': 1.7825376987457275} +02/26/2022 08:55:46 - INFO - codeparrot_training - Step 42751: {'lr': 2.761350554865921e-05, 'samples': 21889024, 'steps': 42751, 'loss/train': 2.1239709854125977} +02/26/2022 08:55:49 - INFO - codeparrot_training - Step 42752: {'lr': 2.7606030905415552e-05, 'samples': 21889536, 'steps': 42752, 'loss/train': 1.5714926719665527} +02/26/2022 08:55:55 - INFO - codeparrot_training - Step 42753: {'lr': 2.7598557214836977e-05, 'samples': 21890048, 'steps': 42753, 'loss/train': 1.631894588470459} +02/26/2022 08:55:58 - INFO - codeparrot_training - Step 42754: {'lr': 2.759108447695563e-05, 'samples': 21890560, 'steps': 42754, 'loss/train': 1.4789563417434692} +02/26/2022 08:56:04 - INFO - codeparrot_training - Step 42755: {'lr': 2.7583612691803373e-05, 'samples': 21891072, 'steps': 42755, 'loss/train': 1.7448941469192505} +02/26/2022 08:56:07 - INFO - codeparrot_training - Step 42756: {'lr': 2.757614185941243e-05, 'samples': 21891584, 'steps': 42756, 'loss/train': 0.7392439842224121} +02/26/2022 08:56:13 - INFO - codeparrot_training - Step 42757: {'lr': 2.7568671979814554e-05, 'samples': 21892096, 'steps': 42757, 'loss/train': 1.2305270433425903} +02/26/2022 08:56:16 - INFO - codeparrot_training - Step 42758: {'lr': 2.7561203053041882e-05, 'samples': 21892608, 'steps': 42758, 'loss/train': 2.3158226013183594} +02/26/2022 08:56:22 - INFO - codeparrot_training - Step 42759: {'lr': 2.7553735079126368e-05, 'samples': 21893120, 'steps': 42759, 'loss/train': 0.6426398158073425} +02/26/2022 08:56:25 - INFO - codeparrot_training - Step 42760: {'lr': 2.7546268058100094e-05, 'samples': 21893632, 'steps': 42760, 'loss/train': 2.1408002376556396} +02/26/2022 08:56:31 - INFO - codeparrot_training - Step 42761: {'lr': 2.7538801989994894e-05, 'samples': 21894144, 'steps': 42761, 'loss/train': 0.8857948184013367} +02/26/2022 08:56:34 - INFO - codeparrot_training - Step 42762: {'lr': 2.7531336874842855e-05, 'samples': 21894656, 'steps': 42762, 'loss/train': 2.3472254276275635} +02/26/2022 08:56:41 - INFO - codeparrot_training - Step 42763: {'lr': 2.7523872712675896e-05, 'samples': 21895168, 'steps': 42763, 'loss/train': 1.4410910606384277} +02/26/2022 08:56:45 - INFO - codeparrot_training - Step 42764: {'lr': 2.751640950352613e-05, 'samples': 21895680, 'steps': 42764, 'loss/train': 0.9286631941795349} +02/26/2022 08:56:50 - INFO - codeparrot_training - Step 42765: {'lr': 2.7508947247425282e-05, 'samples': 21896192, 'steps': 42765, 'loss/train': 1.7183406352996826} +02/26/2022 08:56:54 - INFO - codeparrot_training - Step 42766: {'lr': 2.7501485944405547e-05, 'samples': 21896704, 'steps': 42766, 'loss/train': 2.250690460205078} +02/26/2022 08:56:59 - INFO - codeparrot_training - Step 42767: {'lr': 2.7494025594498707e-05, 'samples': 21897216, 'steps': 42767, 'loss/train': 2.3027243614196777} +02/26/2022 08:57:03 - INFO - codeparrot_training - Step 42768: {'lr': 2.7486566197736873e-05, 'samples': 21897728, 'steps': 42768, 'loss/train': 1.9800598621368408} +02/26/2022 08:57:08 - INFO - codeparrot_training - Step 42769: {'lr': 2.7479107754151937e-05, 'samples': 21898240, 'steps': 42769, 'loss/train': 1.6510326862335205} +02/26/2022 08:57:12 - INFO - codeparrot_training - Step 42770: {'lr': 2.7471650263775848e-05, 'samples': 21898752, 'steps': 42770, 'loss/train': 1.366326928138733} +02/26/2022 08:57:17 - INFO - codeparrot_training - Step 42771: {'lr': 2.7464193726640497e-05, 'samples': 21899264, 'steps': 42771, 'loss/train': 1.0782345533370972} +02/26/2022 08:57:21 - INFO - codeparrot_training - Step 42772: {'lr': 2.745673814277794e-05, 'samples': 21899776, 'steps': 42772, 'loss/train': 1.430353045463562} +02/26/2022 08:57:28 - INFO - codeparrot_training - Step 42773: {'lr': 2.744928351222001e-05, 'samples': 21900288, 'steps': 42773, 'loss/train': 1.9887620210647583} +02/26/2022 08:57:31 - INFO - codeparrot_training - Step 42774: {'lr': 2.744182983499871e-05, 'samples': 21900800, 'steps': 42774, 'loss/train': 1.608001947402954} +02/26/2022 08:57:37 - INFO - codeparrot_training - Step 42775: {'lr': 2.7434377111145908e-05, 'samples': 21901312, 'steps': 42775, 'loss/train': 1.6254725456237793} +02/26/2022 08:57:40 - INFO - codeparrot_training - Step 42776: {'lr': 2.7426925340693577e-05, 'samples': 21901824, 'steps': 42776, 'loss/train': 1.550337791442871} +02/26/2022 08:57:46 - INFO - codeparrot_training - Step 42777: {'lr': 2.7419474523673633e-05, 'samples': 21902336, 'steps': 42777, 'loss/train': 1.4018499851226807} +02/26/2022 08:57:49 - INFO - codeparrot_training - Step 42778: {'lr': 2.7412024660117997e-05, 'samples': 21902848, 'steps': 42778, 'loss/train': 1.440045952796936} +02/26/2022 08:57:55 - INFO - codeparrot_training - Step 42779: {'lr': 2.7404575750058503e-05, 'samples': 21903360, 'steps': 42779, 'loss/train': 1.0258502960205078} +02/26/2022 08:57:58 - INFO - codeparrot_training - Step 42780: {'lr': 2.7397127793527183e-05, 'samples': 21903872, 'steps': 42780, 'loss/train': 1.6109308004379272} +02/26/2022 08:58:04 - INFO - codeparrot_training - Step 42781: {'lr': 2.7389680790555872e-05, 'samples': 21904384, 'steps': 42781, 'loss/train': 2.0095551013946533} +02/26/2022 08:58:07 - INFO - codeparrot_training - Step 42782: {'lr': 2.738223474117649e-05, 'samples': 21904896, 'steps': 42782, 'loss/train': 2.636659622192383} +02/26/2022 08:58:15 - INFO - codeparrot_training - Step 42783: {'lr': 2.7374789645420895e-05, 'samples': 21905408, 'steps': 42783, 'loss/train': 0.7914487719535828} +02/26/2022 08:58:18 - INFO - codeparrot_training - Step 42784: {'lr': 2.736734550332104e-05, 'samples': 21905920, 'steps': 42784, 'loss/train': 2.005279064178467} +02/26/2022 08:58:24 - INFO - codeparrot_training - Step 42785: {'lr': 2.7359902314908758e-05, 'samples': 21906432, 'steps': 42785, 'loss/train': 2.5495150089263916} +02/26/2022 08:58:27 - INFO - codeparrot_training - Step 42786: {'lr': 2.7352460080215995e-05, 'samples': 21906944, 'steps': 42786, 'loss/train': 1.6117534637451172} +02/26/2022 08:58:33 - INFO - codeparrot_training - Step 42787: {'lr': 2.734501879927459e-05, 'samples': 21907456, 'steps': 42787, 'loss/train': 1.3697905540466309} +02/26/2022 08:58:36 - INFO - codeparrot_training - Step 42788: {'lr': 2.7337578472116348e-05, 'samples': 21907968, 'steps': 42788, 'loss/train': 1.1379101276397705} +02/26/2022 08:58:42 - INFO - codeparrot_training - Step 42789: {'lr': 2.73301390987733e-05, 'samples': 21908480, 'steps': 42789, 'loss/train': 2.1402475833892822} +02/26/2022 08:58:45 - INFO - codeparrot_training - Step 42790: {'lr': 2.7322700679277223e-05, 'samples': 21908992, 'steps': 42790, 'loss/train': 1.7058736085891724} +02/26/2022 08:58:51 - INFO - codeparrot_training - Step 42791: {'lr': 2.731526321365996e-05, 'samples': 21909504, 'steps': 42791, 'loss/train': 2.434065580368042} +02/26/2022 08:58:54 - INFO - codeparrot_training - Step 42792: {'lr': 2.7307826701953392e-05, 'samples': 21910016, 'steps': 42792, 'loss/train': 1.7840499877929688} +02/26/2022 08:59:00 - INFO - codeparrot_training - Step 42793: {'lr': 2.730039114418939e-05, 'samples': 21910528, 'steps': 42793, 'loss/train': 2.0112526416778564} +02/26/2022 08:59:03 - INFO - codeparrot_training - Step 42794: {'lr': 2.729295654039976e-05, 'samples': 21911040, 'steps': 42794, 'loss/train': 1.910519003868103} +02/26/2022 08:59:11 - INFO - codeparrot_training - Step 42795: {'lr': 2.7285522890616504e-05, 'samples': 21911552, 'steps': 42795, 'loss/train': 1.799712061882019} +02/26/2022 08:59:14 - INFO - codeparrot_training - Step 42796: {'lr': 2.7278090194871237e-05, 'samples': 21912064, 'steps': 42796, 'loss/train': 2.5501792430877686} +02/26/2022 08:59:19 - INFO - codeparrot_training - Step 42797: {'lr': 2.7270658453195957e-05, 'samples': 21912576, 'steps': 42797, 'loss/train': 2.504366636276245} +02/26/2022 08:59:23 - INFO - codeparrot_training - Step 42798: {'lr': 2.726322766562242e-05, 'samples': 21913088, 'steps': 42798, 'loss/train': 1.3743587732315063} +02/26/2022 08:59:28 - INFO - codeparrot_training - Step 42799: {'lr': 2.7255797832182572e-05, 'samples': 21913600, 'steps': 42799, 'loss/train': 1.4520469903945923} +02/26/2022 08:59:32 - INFO - codeparrot_training - Step 42800: {'lr': 2.7248368952908055e-05, 'samples': 21914112, 'steps': 42800, 'loss/train': 2.395886182785034} +02/26/2022 08:59:37 - INFO - codeparrot_training - Step 42801: {'lr': 2.724094102783084e-05, 'samples': 21914624, 'steps': 42801, 'loss/train': 1.3224307298660278} +02/26/2022 08:59:41 - INFO - codeparrot_training - Step 42802: {'lr': 2.7233514056982655e-05, 'samples': 21915136, 'steps': 42802, 'loss/train': 2.0678722858428955} +02/26/2022 08:59:46 - INFO - codeparrot_training - Step 42803: {'lr': 2.722608804039542e-05, 'samples': 21915648, 'steps': 42803, 'loss/train': 0.7412338256835938} +02/26/2022 08:59:52 - INFO - codeparrot_training - Step 42804: {'lr': 2.7218662978100854e-05, 'samples': 21916160, 'steps': 42804, 'loss/train': 1.9862208366394043} +02/26/2022 08:59:55 - INFO - codeparrot_training - Step 42805: {'lr': 2.7211238870130826e-05, 'samples': 21916672, 'steps': 42805, 'loss/train': 1.9224451780319214} +02/26/2022 08:59:59 - INFO - codeparrot_training - Step 42806: {'lr': 2.7203815716517062e-05, 'samples': 21917184, 'steps': 42806, 'loss/train': 1.2230496406555176} +02/26/2022 09:00:04 - INFO - codeparrot_training - Step 42807: {'lr': 2.7196393517291417e-05, 'samples': 21917696, 'steps': 42807, 'loss/train': 1.4456367492675781} +02/26/2022 09:00:12 - INFO - codeparrot_training - Step 42808: {'lr': 2.718897227248571e-05, 'samples': 21918208, 'steps': 42808, 'loss/train': 0.760595440864563} +02/26/2022 09:00:15 - INFO - codeparrot_training - Step 42809: {'lr': 2.718155198213168e-05, 'samples': 21918720, 'steps': 42809, 'loss/train': 1.357527256011963} +02/26/2022 09:00:19 - INFO - codeparrot_training - Step 42810: {'lr': 2.717413264626109e-05, 'samples': 21919232, 'steps': 42810, 'loss/train': 1.1176189184188843} +02/26/2022 09:00:24 - INFO - codeparrot_training - Step 42811: {'lr': 2.71667142649058e-05, 'samples': 21919744, 'steps': 42811, 'loss/train': 0.6460078954696655} +02/26/2022 09:00:28 - INFO - codeparrot_training - Step 42812: {'lr': 2.7159296838097565e-05, 'samples': 21920256, 'steps': 42812, 'loss/train': 3.394054889678955} +02/26/2022 09:00:33 - INFO - codeparrot_training - Step 42813: {'lr': 2.715188036586813e-05, 'samples': 21920768, 'steps': 42813, 'loss/train': 2.3883965015411377} +02/26/2022 09:00:37 - INFO - codeparrot_training - Step 42814: {'lr': 2.71444648482492e-05, 'samples': 21921280, 'steps': 42814, 'loss/train': 2.1934337615966797} +02/26/2022 09:00:42 - INFO - codeparrot_training - Step 42815: {'lr': 2.713705028527272e-05, 'samples': 21921792, 'steps': 42815, 'loss/train': 1.7336368560791016} +02/26/2022 09:00:46 - INFO - codeparrot_training - Step 42816: {'lr': 2.712963667697027e-05, 'samples': 21922304, 'steps': 42816, 'loss/train': 2.1103127002716064} +02/26/2022 09:00:51 - INFO - codeparrot_training - Step 42817: {'lr': 2.7122224023373778e-05, 'samples': 21922816, 'steps': 42817, 'loss/train': 1.9909189939498901} +02/26/2022 09:00:55 - INFO - codeparrot_training - Step 42818: {'lr': 2.7114812324514822e-05, 'samples': 21923328, 'steps': 42818, 'loss/train': 0.8142868876457214} +02/26/2022 09:01:02 - INFO - codeparrot_training - Step 42819: {'lr': 2.7107401580425295e-05, 'samples': 21923840, 'steps': 42819, 'loss/train': 1.324966311454773} +02/26/2022 09:01:06 - INFO - codeparrot_training - Step 42820: {'lr': 2.7099991791136867e-05, 'samples': 21924352, 'steps': 42820, 'loss/train': 1.7044597864151} +02/26/2022 09:01:11 - INFO - codeparrot_training - Step 42821: {'lr': 2.7092582956681294e-05, 'samples': 21924864, 'steps': 42821, 'loss/train': 1.8956921100616455} +02/26/2022 09:01:15 - INFO - codeparrot_training - Step 42822: {'lr': 2.708517507709027e-05, 'samples': 21925376, 'steps': 42822, 'loss/train': 0.36719071865081787} +02/26/2022 09:01:20 - INFO - codeparrot_training - Step 42823: {'lr': 2.7077768152395626e-05, 'samples': 21925888, 'steps': 42823, 'loss/train': 0.9787956476211548} +02/26/2022 09:01:23 - INFO - codeparrot_training - Step 42824: {'lr': 2.7070362182629038e-05, 'samples': 21926400, 'steps': 42824, 'loss/train': 1.921280026435852} +02/26/2022 09:01:29 - INFO - codeparrot_training - Step 42825: {'lr': 2.706295716782223e-05, 'samples': 21926912, 'steps': 42825, 'loss/train': 1.9951118230819702} +02/26/2022 09:01:33 - INFO - codeparrot_training - Step 42826: {'lr': 2.7055553108006866e-05, 'samples': 21927424, 'steps': 42826, 'loss/train': 1.4546281099319458} +02/26/2022 09:01:38 - INFO - codeparrot_training - Step 42827: {'lr': 2.7048150003214784e-05, 'samples': 21927936, 'steps': 42827, 'loss/train': 0.6301075220108032} +02/26/2022 09:01:42 - INFO - codeparrot_training - Step 42828: {'lr': 2.70407478534776e-05, 'samples': 21928448, 'steps': 42828, 'loss/train': 1.3864514827728271} +02/26/2022 09:01:47 - INFO - codeparrot_training - Step 42829: {'lr': 2.7033346658827034e-05, 'samples': 21928960, 'steps': 42829, 'loss/train': 0.6658378839492798} +02/26/2022 09:01:51 - INFO - codeparrot_training - Step 42830: {'lr': 2.7025946419294845e-05, 'samples': 21929472, 'steps': 42830, 'loss/train': 5.909066200256348} +02/26/2022 09:01:58 - INFO - codeparrot_training - Step 42831: {'lr': 2.7018547134912725e-05, 'samples': 21929984, 'steps': 42831, 'loss/train': 0.8054854273796082} +02/26/2022 09:02:01 - INFO - codeparrot_training - Step 42832: {'lr': 2.7011148805712316e-05, 'samples': 21930496, 'steps': 42832, 'loss/train': 1.828218936920166} +02/26/2022 09:02:07 - INFO - codeparrot_training - Step 42833: {'lr': 2.7003751431725314e-05, 'samples': 21931008, 'steps': 42833, 'loss/train': 0.6490828990936279} +02/26/2022 09:02:10 - INFO - codeparrot_training - Step 42834: {'lr': 2.6996355012983502e-05, 'samples': 21931520, 'steps': 42834, 'loss/train': 2.3491430282592773} +02/26/2022 09:02:16 - INFO - codeparrot_training - Step 42835: {'lr': 2.698895954951841e-05, 'samples': 21932032, 'steps': 42835, 'loss/train': 2.0695760250091553} +02/26/2022 09:02:20 - INFO - codeparrot_training - Step 42836: {'lr': 2.6981565041361873e-05, 'samples': 21932544, 'steps': 42836, 'loss/train': 1.6444177627563477} +02/26/2022 09:02:25 - INFO - codeparrot_training - Step 42837: {'lr': 2.697417148854542e-05, 'samples': 21933056, 'steps': 42837, 'loss/train': 1.3091180324554443} +02/26/2022 09:02:29 - INFO - codeparrot_training - Step 42838: {'lr': 2.6966778891100884e-05, 'samples': 21933568, 'steps': 42838, 'loss/train': 2.7320001125335693} +02/26/2022 09:02:34 - INFO - codeparrot_training - Step 42839: {'lr': 2.6959387249059775e-05, 'samples': 21934080, 'steps': 42839, 'loss/train': 1.8345000743865967} +02/26/2022 09:02:41 - INFO - codeparrot_training - Step 42840: {'lr': 2.6951996562453866e-05, 'samples': 21934592, 'steps': 42840, 'loss/train': 1.17380952835083} +02/26/2022 09:02:45 - INFO - codeparrot_training - Step 42841: {'lr': 2.6944606831314722e-05, 'samples': 21935104, 'steps': 42841, 'loss/train': 1.2819693088531494} +02/26/2022 09:02:50 - INFO - codeparrot_training - Step 42842: {'lr': 2.6937218055674116e-05, 'samples': 21935616, 'steps': 42842, 'loss/train': 0.9826082587242126} +02/26/2022 09:02:54 - INFO - codeparrot_training - Step 42843: {'lr': 2.6929830235563613e-05, 'samples': 21936128, 'steps': 42843, 'loss/train': 0.06444505602121353} +02/26/2022 09:02:59 - INFO - codeparrot_training - Step 42844: {'lr': 2.6922443371014904e-05, 'samples': 21936640, 'steps': 42844, 'loss/train': 2.2556002140045166} +02/26/2022 09:03:03 - INFO - codeparrot_training - Step 42845: {'lr': 2.6915057462059578e-05, 'samples': 21937152, 'steps': 42845, 'loss/train': 1.4239226579666138} +02/26/2022 09:03:08 - INFO - codeparrot_training - Step 42846: {'lr': 2.690767250872933e-05, 'samples': 21937664, 'steps': 42846, 'loss/train': 1.681216835975647} +02/26/2022 09:03:12 - INFO - codeparrot_training - Step 42847: {'lr': 2.6900288511055775e-05, 'samples': 21938176, 'steps': 42847, 'loss/train': 8.6160249710083} +02/26/2022 09:03:17 - INFO - codeparrot_training - Step 42848: {'lr': 2.6892905469070554e-05, 'samples': 21938688, 'steps': 42848, 'loss/train': 2.25268292427063} +02/26/2022 09:03:21 - INFO - codeparrot_training - Step 42849: {'lr': 2.6885523382805226e-05, 'samples': 21939200, 'steps': 42849, 'loss/train': 0.38531386852264404} +02/26/2022 09:03:27 - INFO - codeparrot_training - Step 42850: {'lr': 2.6878142252291515e-05, 'samples': 21939712, 'steps': 42850, 'loss/train': 0.6773548126220703} +02/26/2022 09:03:30 - INFO - codeparrot_training - Step 42851: {'lr': 2.687076207756095e-05, 'samples': 21940224, 'steps': 42851, 'loss/train': 1.1235246658325195} +02/26/2022 09:03:34 - INFO - codeparrot_training - Step 42852: {'lr': 2.6863382858645313e-05, 'samples': 21940736, 'steps': 42852, 'loss/train': 2.1405186653137207} +02/26/2022 09:03:39 - INFO - codeparrot_training - Step 42853: {'lr': 2.6856004595575966e-05, 'samples': 21941248, 'steps': 42853, 'loss/train': 2.213709831237793} +02/26/2022 09:03:43 - INFO - codeparrot_training - Step 42854: {'lr': 2.6848627288384665e-05, 'samples': 21941760, 'steps': 42854, 'loss/train': 1.7483052015304565} +02/26/2022 09:03:48 - INFO - codeparrot_training - Step 42855: {'lr': 2.684125093710296e-05, 'samples': 21942272, 'steps': 42855, 'loss/train': 1.7833229303359985} +02/26/2022 09:03:52 - INFO - codeparrot_training - Step 42856: {'lr': 2.6833875541762587e-05, 'samples': 21942784, 'steps': 42856, 'loss/train': 1.3186804056167603} +02/26/2022 09:03:59 - INFO - codeparrot_training - Step 42857: {'lr': 2.682650110239493e-05, 'samples': 21943296, 'steps': 42857, 'loss/train': 1.5045169591903687} +02/26/2022 09:04:02 - INFO - codeparrot_training - Step 42858: {'lr': 2.6819127619031687e-05, 'samples': 21943808, 'steps': 42858, 'loss/train': 1.8732916116714478} +02/26/2022 09:04:08 - INFO - codeparrot_training - Step 42859: {'lr': 2.6811755091704415e-05, 'samples': 21944320, 'steps': 42859, 'loss/train': 1.7660460472106934} +02/26/2022 09:04:11 - INFO - codeparrot_training - Step 42860: {'lr': 2.6804383520444812e-05, 'samples': 21944832, 'steps': 42860, 'loss/train': 1.4170451164245605} +02/26/2022 09:04:19 - INFO - codeparrot_training - Step 42861: {'lr': 2.6797012905284247e-05, 'samples': 21945344, 'steps': 42861, 'loss/train': 1.2842391729354858} +02/26/2022 09:04:22 - INFO - codeparrot_training - Step 42862: {'lr': 2.6789643246254463e-05, 'samples': 21945856, 'steps': 42862, 'loss/train': 2.146132707595825} +02/26/2022 09:04:28 - INFO - codeparrot_training - Step 42863: {'lr': 2.6782274543386914e-05, 'samples': 21946368, 'steps': 42863, 'loss/train': 2.29295015335083} +02/26/2022 09:04:31 - INFO - codeparrot_training - Step 42864: {'lr': 2.6774906796713295e-05, 'samples': 21946880, 'steps': 42864, 'loss/train': 0.8187737464904785} +02/26/2022 09:04:37 - INFO - codeparrot_training - Step 42865: {'lr': 2.6767540006265052e-05, 'samples': 21947392, 'steps': 42865, 'loss/train': 1.1048065423965454} +02/26/2022 09:04:40 - INFO - codeparrot_training - Step 42866: {'lr': 2.6760174172073826e-05, 'samples': 21947904, 'steps': 42866, 'loss/train': 3.348705530166626} +02/26/2022 09:04:46 - INFO - codeparrot_training - Step 42867: {'lr': 2.6752809294171094e-05, 'samples': 21948416, 'steps': 42867, 'loss/train': 1.3998491764068604} +02/26/2022 09:04:49 - INFO - codeparrot_training - Step 42868: {'lr': 2.674544537258841e-05, 'samples': 21948928, 'steps': 42868, 'loss/train': 2.9467873573303223} +02/26/2022 09:04:55 - INFO - codeparrot_training - Step 42869: {'lr': 2.6738082407357367e-05, 'samples': 21949440, 'steps': 42869, 'loss/train': 1.3641581535339355} +02/26/2022 09:04:58 - INFO - codeparrot_training - Step 42870: {'lr': 2.6730720398509516e-05, 'samples': 21949952, 'steps': 42870, 'loss/train': 1.601888656616211} +02/26/2022 09:05:04 - INFO - codeparrot_training - Step 42871: {'lr': 2.6723359346076338e-05, 'samples': 21950464, 'steps': 42871, 'loss/train': 1.9685685634613037} +02/26/2022 09:05:07 - INFO - codeparrot_training - Step 42872: {'lr': 2.6715999250089358e-05, 'samples': 21950976, 'steps': 42872, 'loss/train': 2.945636749267578} +02/26/2022 09:05:15 - INFO - codeparrot_training - Step 42873: {'lr': 2.6708640110580192e-05, 'samples': 21951488, 'steps': 42873, 'loss/train': 2.2861685752868652} +02/26/2022 09:05:19 - INFO - codeparrot_training - Step 42874: {'lr': 2.6701281927580284e-05, 'samples': 21952000, 'steps': 42874, 'loss/train': 0.08399217575788498} +02/26/2022 09:05:24 - INFO - codeparrot_training - Step 42875: {'lr': 2.6693924701121196e-05, 'samples': 21952512, 'steps': 42875, 'loss/train': 1.6448696851730347} +02/26/2022 09:05:27 - INFO - codeparrot_training - Step 42876: {'lr': 2.6686568431234376e-05, 'samples': 21953024, 'steps': 42876, 'loss/train': 1.7844494581222534} +02/26/2022 09:05:33 - INFO - codeparrot_training - Step 42877: {'lr': 2.6679213117951434e-05, 'samples': 21953536, 'steps': 42877, 'loss/train': 2.702091693878174} +02/26/2022 09:05:38 - INFO - codeparrot_training - Step 42878: {'lr': 2.6671858761303848e-05, 'samples': 21954048, 'steps': 42878, 'loss/train': 1.927246332168579} +02/26/2022 09:05:42 - INFO - codeparrot_training - Step 42879: {'lr': 2.666450536132309e-05, 'samples': 21954560, 'steps': 42879, 'loss/train': 1.785115361213684} +02/26/2022 09:05:47 - INFO - codeparrot_training - Step 42880: {'lr': 2.6657152918040605e-05, 'samples': 21955072, 'steps': 42880, 'loss/train': 2.2215871810913086} +02/26/2022 09:05:51 - INFO - codeparrot_training - Step 42881: {'lr': 2.6649801431488043e-05, 'samples': 21955584, 'steps': 42881, 'loss/train': 2.1950230598449707} +02/26/2022 09:05:58 - INFO - codeparrot_training - Step 42882: {'lr': 2.6642450901696785e-05, 'samples': 21956096, 'steps': 42882, 'loss/train': 1.02681303024292} +02/26/2022 09:06:02 - INFO - codeparrot_training - Step 42883: {'lr': 2.663510132869834e-05, 'samples': 21956608, 'steps': 42883, 'loss/train': 2.010436534881592} +02/26/2022 09:06:07 - INFO - codeparrot_training - Step 42884: {'lr': 2.6627752712524157e-05, 'samples': 21957120, 'steps': 42884, 'loss/train': 1.6045893430709839} +02/26/2022 09:06:11 - INFO - codeparrot_training - Step 42885: {'lr': 2.6620405053205816e-05, 'samples': 21957632, 'steps': 42885, 'loss/train': 0.755906343460083} +02/26/2022 09:06:16 - INFO - codeparrot_training - Step 42886: {'lr': 2.661305835077471e-05, 'samples': 21958144, 'steps': 42886, 'loss/train': 1.079380750656128} +02/26/2022 09:06:20 - INFO - codeparrot_training - Step 42887: {'lr': 2.6605712605262315e-05, 'samples': 21958656, 'steps': 42887, 'loss/train': 0.6011679768562317} +02/26/2022 09:06:25 - INFO - codeparrot_training - Step 42888: {'lr': 2.659836781670008e-05, 'samples': 21959168, 'steps': 42888, 'loss/train': 0.6199708580970764} +02/26/2022 09:06:29 - INFO - codeparrot_training - Step 42889: {'lr': 2.659102398511956e-05, 'samples': 21959680, 'steps': 42889, 'loss/train': 1.0654090642929077} +02/26/2022 09:06:34 - INFO - codeparrot_training - Step 42890: {'lr': 2.6583681110552093e-05, 'samples': 21960192, 'steps': 42890, 'loss/train': 0.5442640781402588} +02/26/2022 09:06:38 - INFO - codeparrot_training - Step 42891: {'lr': 2.6576339193029293e-05, 'samples': 21960704, 'steps': 42891, 'loss/train': 1.217951774597168} +02/26/2022 09:06:45 - INFO - codeparrot_training - Step 42892: {'lr': 2.656899823258241e-05, 'samples': 21961216, 'steps': 42892, 'loss/train': 1.5691418647766113} +02/26/2022 09:06:48 - INFO - codeparrot_training - Step 42893: {'lr': 2.6561658229243028e-05, 'samples': 21961728, 'steps': 42893, 'loss/train': 2.1433398723602295} +02/26/2022 09:06:54 - INFO - codeparrot_training - Step 42894: {'lr': 2.655431918304252e-05, 'samples': 21962240, 'steps': 42894, 'loss/train': 2.3932013511657715} +02/26/2022 09:06:57 - INFO - codeparrot_training - Step 42895: {'lr': 2.654698109401246e-05, 'samples': 21962752, 'steps': 42895, 'loss/train': 1.721458077430725} +02/26/2022 09:07:03 - INFO - codeparrot_training - Step 42896: {'lr': 2.6539643962184058e-05, 'samples': 21963264, 'steps': 42896, 'loss/train': 1.9035429954528809} +02/26/2022 09:07:06 - INFO - codeparrot_training - Step 42897: {'lr': 2.6532307787588943e-05, 'samples': 21963776, 'steps': 42897, 'loss/train': 2.4287753105163574} +02/26/2022 09:07:12 - INFO - codeparrot_training - Step 42898: {'lr': 2.6524972570258377e-05, 'samples': 21964288, 'steps': 42898, 'loss/train': 1.8739185333251953} +02/26/2022 09:07:15 - INFO - codeparrot_training - Step 42899: {'lr': 2.6517638310223996e-05, 'samples': 21964800, 'steps': 42899, 'loss/train': 1.8823384046554565} +02/26/2022 09:07:21 - INFO - codeparrot_training - Step 42900: {'lr': 2.6510305007516973e-05, 'samples': 21965312, 'steps': 42900, 'loss/train': 1.3579708337783813} +02/26/2022 09:07:24 - INFO - codeparrot_training - Step 42901: {'lr': 2.6502972662168888e-05, 'samples': 21965824, 'steps': 42901, 'loss/train': 2.5500195026397705} +02/26/2022 09:07:31 - INFO - codeparrot_training - Step 42902: {'lr': 2.6495641274211057e-05, 'samples': 21966336, 'steps': 42902, 'loss/train': 1.8145298957824707} +02/26/2022 09:07:35 - INFO - codeparrot_training - Step 42903: {'lr': 2.648831084367498e-05, 'samples': 21966848, 'steps': 42903, 'loss/train': 1.9289880990982056} +02/26/2022 09:07:40 - INFO - codeparrot_training - Step 42904: {'lr': 2.6480981370592016e-05, 'samples': 21967360, 'steps': 42904, 'loss/train': 2.3520002365112305} +02/26/2022 09:07:44 - INFO - codeparrot_training - Step 42905: {'lr': 2.6473652854993535e-05, 'samples': 21967872, 'steps': 42905, 'loss/train': 1.8150992393493652} +02/26/2022 09:07:49 - INFO - codeparrot_training - Step 42906: {'lr': 2.64663252969109e-05, 'samples': 21968384, 'steps': 42906, 'loss/train': 2.84921932220459} +02/26/2022 09:07:53 - INFO - codeparrot_training - Step 42907: {'lr': 2.6458998696375608e-05, 'samples': 21968896, 'steps': 42907, 'loss/train': 1.309278964996338} +02/26/2022 09:07:58 - INFO - codeparrot_training - Step 42908: {'lr': 2.6451673053418972e-05, 'samples': 21969408, 'steps': 42908, 'loss/train': 3.8604185581207275} +02/26/2022 09:08:02 - INFO - codeparrot_training - Step 42909: {'lr': 2.6444348368072385e-05, 'samples': 21969920, 'steps': 42909, 'loss/train': 1.825760841369629} +02/26/2022 09:08:07 - INFO - codeparrot_training - Step 42910: {'lr': 2.643702464036718e-05, 'samples': 21970432, 'steps': 42910, 'loss/train': 2.179658889770508} +02/26/2022 09:08:11 - INFO - codeparrot_training - Step 42911: {'lr': 2.6429701870334805e-05, 'samples': 21970944, 'steps': 42911, 'loss/train': 0.33878087997436523} +02/26/2022 09:08:16 - INFO - codeparrot_training - Step 42912: {'lr': 2.6422380058006597e-05, 'samples': 21971456, 'steps': 42912, 'loss/train': 1.795589804649353} +02/26/2022 09:08:20 - INFO - codeparrot_training - Step 42913: {'lr': 2.6415059203413944e-05, 'samples': 21971968, 'steps': 42913, 'loss/train': 0.8963598608970642} +02/26/2022 09:08:25 - INFO - codeparrot_training - Step 42914: {'lr': 2.6407739306588103e-05, 'samples': 21972480, 'steps': 42914, 'loss/train': 1.4238911867141724} +02/26/2022 09:08:29 - INFO - codeparrot_training - Step 42915: {'lr': 2.6400420367560573e-05, 'samples': 21972992, 'steps': 42915, 'loss/train': 1.0536459684371948} +02/26/2022 09:08:35 - INFO - codeparrot_training - Step 42916: {'lr': 2.639310238636264e-05, 'samples': 21973504, 'steps': 42916, 'loss/train': 1.2638682126998901} +02/26/2022 09:08:38 - INFO - codeparrot_training - Step 42917: {'lr': 2.638578536302566e-05, 'samples': 21974016, 'steps': 42917, 'loss/train': 0.14891071617603302} +02/26/2022 09:08:45 - INFO - codeparrot_training - Step 42918: {'lr': 2.6378469297580977e-05, 'samples': 21974528, 'steps': 42918, 'loss/train': 2.00984787940979} +02/26/2022 09:08:49 - INFO - codeparrot_training - Step 42919: {'lr': 2.6371154190059838e-05, 'samples': 21975040, 'steps': 42919, 'loss/train': 1.3349881172180176} +02/26/2022 09:08:54 - INFO - codeparrot_training - Step 42920: {'lr': 2.6363840040493748e-05, 'samples': 21975552, 'steps': 42920, 'loss/train': 2.069300889968872} +02/26/2022 09:08:58 - INFO - codeparrot_training - Step 42921: {'lr': 2.635652684891393e-05, 'samples': 21976064, 'steps': 42921, 'loss/train': 1.7318470478057861} +02/26/2022 09:09:03 - INFO - codeparrot_training - Step 42922: {'lr': 2.6349214615351754e-05, 'samples': 21976576, 'steps': 42922, 'loss/train': 2.430473804473877} +02/26/2022 09:09:07 - INFO - codeparrot_training - Step 42923: {'lr': 2.6341903339838462e-05, 'samples': 21977088, 'steps': 42923, 'loss/train': 6.047491073608398} +02/26/2022 09:09:12 - INFO - codeparrot_training - Step 42924: {'lr': 2.6334593022405513e-05, 'samples': 21977600, 'steps': 42924, 'loss/train': 0.8418083786964417} +02/26/2022 09:09:16 - INFO - codeparrot_training - Step 42925: {'lr': 2.6327283663084067e-05, 'samples': 21978112, 'steps': 42925, 'loss/train': 3.577162504196167} +02/26/2022 09:09:21 - INFO - codeparrot_training - Step 42926: {'lr': 2.6319975261905634e-05, 'samples': 21978624, 'steps': 42926, 'loss/train': 1.8548102378845215} +02/26/2022 09:09:25 - INFO - codeparrot_training - Step 42927: {'lr': 2.6312667818901292e-05, 'samples': 21979136, 'steps': 42927, 'loss/train': 2.1302738189697266} +02/26/2022 09:09:32 - INFO - codeparrot_training - Step 42928: {'lr': 2.6305361334102497e-05, 'samples': 21979648, 'steps': 42928, 'loss/train': 1.8433518409729004} +02/26/2022 09:09:36 - INFO - codeparrot_training - Step 42929: {'lr': 2.6298055807540465e-05, 'samples': 21980160, 'steps': 42929, 'loss/train': 1.9911000728607178} +02/26/2022 09:09:41 - INFO - codeparrot_training - Step 42930: {'lr': 2.6290751239246623e-05, 'samples': 21980672, 'steps': 42930, 'loss/train': 1.6666487455368042} +02/26/2022 09:09:45 - INFO - codeparrot_training - Step 42931: {'lr': 2.6283447629252054e-05, 'samples': 21981184, 'steps': 42931, 'loss/train': 1.787108063697815} +02/26/2022 09:09:50 - INFO - codeparrot_training - Step 42932: {'lr': 2.6276144977588234e-05, 'samples': 21981696, 'steps': 42932, 'loss/train': 2.0184366703033447} +02/26/2022 09:09:54 - INFO - codeparrot_training - Step 42933: {'lr': 2.6268843284286303e-05, 'samples': 21982208, 'steps': 42933, 'loss/train': 1.377815842628479} +02/26/2022 09:09:59 - INFO - codeparrot_training - Step 42934: {'lr': 2.6261542549377683e-05, 'samples': 21982720, 'steps': 42934, 'loss/train': 0.7126639485359192} +02/26/2022 09:10:03 - INFO - codeparrot_training - Step 42935: {'lr': 2.625424277289351e-05, 'samples': 21983232, 'steps': 42935, 'loss/train': 1.7294493913650513} +02/26/2022 09:10:08 - INFO - codeparrot_training - Step 42936: {'lr': 2.6246943954865126e-05, 'samples': 21983744, 'steps': 42936, 'loss/train': 0.9614233374595642} +02/26/2022 09:10:12 - INFO - codeparrot_training - Step 42937: {'lr': 2.6239646095323754e-05, 'samples': 21984256, 'steps': 42937, 'loss/train': 1.8178631067276} +02/26/2022 09:10:17 - INFO - codeparrot_training - Step 42938: {'lr': 2.623234919430076e-05, 'samples': 21984768, 'steps': 42938, 'loss/train': 1.7760825157165527} +02/26/2022 09:10:21 - INFO - codeparrot_training - Step 42939: {'lr': 2.622505325182728e-05, 'samples': 21985280, 'steps': 42939, 'loss/train': 1.433170199394226} +02/26/2022 09:10:28 - INFO - codeparrot_training - Step 42940: {'lr': 2.6217758267934655e-05, 'samples': 21985792, 'steps': 42940, 'loss/train': 3.0513675212860107} +02/26/2022 09:10:31 - INFO - codeparrot_training - Step 42941: {'lr': 2.6210464242654025e-05, 'samples': 21986304, 'steps': 42941, 'loss/train': 1.539406657218933} +02/26/2022 09:10:37 - INFO - codeparrot_training - Step 42942: {'lr': 2.6203171176016754e-05, 'samples': 21986816, 'steps': 42942, 'loss/train': 2.441770553588867} +02/26/2022 09:10:40 - INFO - codeparrot_training - Step 42943: {'lr': 2.619587906805404e-05, 'samples': 21987328, 'steps': 42943, 'loss/train': 2.55214262008667} +02/26/2022 09:10:46 - INFO - codeparrot_training - Step 42944: {'lr': 2.618858791879711e-05, 'samples': 21987840, 'steps': 42944, 'loss/train': 1.7590361833572388} +02/26/2022 09:10:49 - INFO - codeparrot_training - Step 42945: {'lr': 2.6181297728277154e-05, 'samples': 21988352, 'steps': 42945, 'loss/train': 2.0431292057037354} +02/26/2022 09:10:55 - INFO - codeparrot_training - Step 42946: {'lr': 2.6174008496525515e-05, 'samples': 21988864, 'steps': 42946, 'loss/train': 1.5398809909820557} +02/26/2022 09:10:58 - INFO - codeparrot_training - Step 42947: {'lr': 2.6166720223573333e-05, 'samples': 21989376, 'steps': 42947, 'loss/train': 1.4127390384674072} +02/26/2022 09:11:04 - INFO - codeparrot_training - Step 42948: {'lr': 2.615943290945183e-05, 'samples': 21989888, 'steps': 42948, 'loss/train': 2.0602078437805176} +02/26/2022 09:11:07 - INFO - codeparrot_training - Step 42949: {'lr': 2.6152146554192206e-05, 'samples': 21990400, 'steps': 42949, 'loss/train': 0.34139397740364075} +02/26/2022 09:11:15 - INFO - codeparrot_training - Step 42950: {'lr': 2.6144861157825773e-05, 'samples': 21990912, 'steps': 42950, 'loss/train': 2.181121349334717} +02/26/2022 09:11:18 - INFO - codeparrot_training - Step 42951: {'lr': 2.613757672038364e-05, 'samples': 21991424, 'steps': 42951, 'loss/train': 1.5091323852539062} +02/26/2022 09:11:24 - INFO - codeparrot_training - Step 42952: {'lr': 2.613029324189706e-05, 'samples': 21991936, 'steps': 42952, 'loss/train': 1.7841856479644775} +02/26/2022 09:11:27 - INFO - codeparrot_training - Step 42953: {'lr': 2.6123010722397173e-05, 'samples': 21992448, 'steps': 42953, 'loss/train': 2.0733344554901123} +02/26/2022 09:11:32 - INFO - codeparrot_training - Step 42954: {'lr': 2.6115729161915265e-05, 'samples': 21992960, 'steps': 42954, 'loss/train': 1.8553426265716553} +02/26/2022 09:11:36 - INFO - codeparrot_training - Step 42955: {'lr': 2.6108448560482474e-05, 'samples': 21993472, 'steps': 42955, 'loss/train': 1.8669122457504272} +02/26/2022 09:11:41 - INFO - codeparrot_training - Step 42956: {'lr': 2.6101168918130026e-05, 'samples': 21993984, 'steps': 42956, 'loss/train': 2.373201608657837} +02/26/2022 09:11:45 - INFO - codeparrot_training - Step 42957: {'lr': 2.609389023488898e-05, 'samples': 21994496, 'steps': 42957, 'loss/train': 1.4350141286849976} +02/26/2022 09:11:50 - INFO - codeparrot_training - Step 42958: {'lr': 2.6086612510790698e-05, 'samples': 21995008, 'steps': 42958, 'loss/train': 1.8349874019622803} +02/26/2022 09:11:54 - INFO - codeparrot_training - Step 42959: {'lr': 2.607933574586624e-05, 'samples': 21995520, 'steps': 42959, 'loss/train': 1.8287482261657715} +02/26/2022 09:11:59 - INFO - codeparrot_training - Step 42960: {'lr': 2.6072059940146776e-05, 'samples': 21996032, 'steps': 42960, 'loss/train': 2.688157320022583} +02/26/2022 09:12:03 - INFO - codeparrot_training - Step 42961: {'lr': 2.6064785093663528e-05, 'samples': 21996544, 'steps': 42961, 'loss/train': 1.2066864967346191} +02/26/2022 09:12:08 - INFO - codeparrot_training - Step 42962: {'lr': 2.6057511206447644e-05, 'samples': 21997056, 'steps': 42962, 'loss/train': 1.4768345355987549} +02/26/2022 09:12:14 - INFO - codeparrot_training - Step 42963: {'lr': 2.6050238278530285e-05, 'samples': 21997568, 'steps': 42963, 'loss/train': 0.8944421410560608} +02/26/2022 09:12:17 - INFO - codeparrot_training - Step 42964: {'lr': 2.6042966309942544e-05, 'samples': 21998080, 'steps': 42964, 'loss/train': 2.481175661087036} +02/26/2022 09:12:25 - INFO - codeparrot_training - Step 42965: {'lr': 2.6035695300715702e-05, 'samples': 21998592, 'steps': 42965, 'loss/train': 2.2580296993255615} +02/26/2022 09:12:28 - INFO - codeparrot_training - Step 42966: {'lr': 2.602842525088073e-05, 'samples': 21999104, 'steps': 42966, 'loss/train': 1.8494553565979004} +02/26/2022 09:12:34 - INFO - codeparrot_training - Step 42967: {'lr': 2.6021156160468935e-05, 'samples': 21999616, 'steps': 42967, 'loss/train': 0.03226064145565033} +02/26/2022 09:12:37 - INFO - codeparrot_training - Step 42968: {'lr': 2.6013888029511294e-05, 'samples': 22000128, 'steps': 42968, 'loss/train': 2.2895877361297607} +02/26/2022 09:12:43 - INFO - codeparrot_training - Step 42969: {'lr': 2.6006620858039176e-05, 'samples': 22000640, 'steps': 42969, 'loss/train': 1.4299345016479492} +02/26/2022 09:12:46 - INFO - codeparrot_training - Step 42970: {'lr': 2.599935464608344e-05, 'samples': 22001152, 'steps': 42970, 'loss/train': 1.790269374847412} +02/26/2022 09:12:50 - INFO - codeparrot_training - Step 42971: {'lr': 2.5992089393675396e-05, 'samples': 22001664, 'steps': 42971, 'loss/train': 1.8767997026443481} +02/26/2022 09:12:55 - INFO - codeparrot_training - Step 42972: {'lr': 2.598482510084607e-05, 'samples': 22002176, 'steps': 42972, 'loss/train': 1.2279059886932373} +02/26/2022 09:12:59 - INFO - codeparrot_training - Step 42973: {'lr': 2.5977561767626668e-05, 'samples': 22002688, 'steps': 42973, 'loss/train': 1.668615698814392} +02/26/2022 09:13:04 - INFO - codeparrot_training - Step 42974: {'lr': 2.597029939404827e-05, 'samples': 22003200, 'steps': 42974, 'loss/train': 2.2787258625030518} +02/26/2022 09:13:08 - INFO - codeparrot_training - Step 42975: {'lr': 2.5963037980141935e-05, 'samples': 22003712, 'steps': 42975, 'loss/train': 1.1480618715286255} +02/26/2022 09:13:15 - INFO - codeparrot_training - Step 42976: {'lr': 2.595577752593878e-05, 'samples': 22004224, 'steps': 42976, 'loss/train': 1.9394205808639526} +02/26/2022 09:13:18 - INFO - codeparrot_training - Step 42977: {'lr': 2.5948518031469998e-05, 'samples': 22004736, 'steps': 42977, 'loss/train': 1.4486584663391113} +02/26/2022 09:13:24 - INFO - codeparrot_training - Step 42978: {'lr': 2.5941259496766595e-05, 'samples': 22005248, 'steps': 42978, 'loss/train': 2.7966549396514893} +02/26/2022 09:13:28 - INFO - codeparrot_training - Step 42979: {'lr': 2.5934001921859684e-05, 'samples': 22005760, 'steps': 42979, 'loss/train': 1.8758902549743652} +02/26/2022 09:13:33 - INFO - codeparrot_training - Step 42980: {'lr': 2.5926745306780324e-05, 'samples': 22006272, 'steps': 42980, 'loss/train': 1.7142419815063477} +02/26/2022 09:13:37 - INFO - codeparrot_training - Step 42981: {'lr': 2.5919489651559686e-05, 'samples': 22006784, 'steps': 42981, 'loss/train': 1.8362321853637695} +02/26/2022 09:13:42 - INFO - codeparrot_training - Step 42982: {'lr': 2.5912234956228798e-05, 'samples': 22007296, 'steps': 42982, 'loss/train': 1.8216270208358765} +02/26/2022 09:13:46 - INFO - codeparrot_training - Step 42983: {'lr': 2.5904981220818747e-05, 'samples': 22007808, 'steps': 42983, 'loss/train': 1.2957565784454346} +02/26/2022 09:13:53 - INFO - codeparrot_training - Step 42984: {'lr': 2.5897728445360536e-05, 'samples': 22008320, 'steps': 42984, 'loss/train': 1.6118088960647583} +02/26/2022 09:13:56 - INFO - codeparrot_training - Step 42985: {'lr': 2.5890476629885334e-05, 'samples': 22008832, 'steps': 42985, 'loss/train': 2.1677441596984863} +02/26/2022 09:14:02 - INFO - codeparrot_training - Step 42986: {'lr': 2.5883225774424146e-05, 'samples': 22009344, 'steps': 42986, 'loss/train': 0.9376107454299927} +02/26/2022 09:14:05 - INFO - codeparrot_training - Step 42987: {'lr': 2.587597587900814e-05, 'samples': 22009856, 'steps': 42987, 'loss/train': 2.973043441772461} +02/26/2022 09:14:11 - INFO - codeparrot_training - Step 42988: {'lr': 2.586872694366818e-05, 'samples': 22010368, 'steps': 42988, 'loss/train': 1.6875289678573608} +02/26/2022 09:14:16 - INFO - codeparrot_training - Step 42989: {'lr': 2.5861478968435492e-05, 'samples': 22010880, 'steps': 42989, 'loss/train': 1.72090482711792} +02/26/2022 09:14:20 - INFO - codeparrot_training - Step 42990: {'lr': 2.5854231953340995e-05, 'samples': 22011392, 'steps': 42990, 'loss/train': 3.1360926628112793} +02/26/2022 09:14:26 - INFO - codeparrot_training - Step 42991: {'lr': 2.5846985898415888e-05, 'samples': 22011904, 'steps': 42991, 'loss/train': 2.655022382736206} +02/26/2022 09:14:29 - INFO - codeparrot_training - Step 42992: {'lr': 2.5839740803691032e-05, 'samples': 22012416, 'steps': 42992, 'loss/train': 0.8258548378944397} +02/26/2022 09:14:33 - INFO - codeparrot_training - Step 42993: {'lr': 2.5832496669197597e-05, 'samples': 22012928, 'steps': 42993, 'loss/train': 3.383549451828003} +02/26/2022 09:14:38 - INFO - codeparrot_training - Step 42994: {'lr': 2.5825253494966504e-05, 'samples': 22013440, 'steps': 42994, 'loss/train': 1.366349220275879} +02/26/2022 09:14:42 - INFO - codeparrot_training - Step 42995: {'lr': 2.5818011281028953e-05, 'samples': 22013952, 'steps': 42995, 'loss/train': 2.713393211364746} +02/26/2022 09:14:49 - INFO - codeparrot_training - Step 42996: {'lr': 2.5810770027415748e-05, 'samples': 22014464, 'steps': 42996, 'loss/train': 0.46251311898231506} +02/26/2022 09:14:52 - INFO - codeparrot_training - Step 42997: {'lr': 2.580352973415806e-05, 'samples': 22014976, 'steps': 42997, 'loss/train': 1.973315715789795} +02/26/2022 09:14:58 - INFO - codeparrot_training - Step 42998: {'lr': 2.5796290401286865e-05, 'samples': 22015488, 'steps': 42998, 'loss/train': 1.8694614171981812} +02/26/2022 09:15:01 - INFO - codeparrot_training - Step 42999: {'lr': 2.578905202883311e-05, 'samples': 22016000, 'steps': 42999, 'loss/train': 1.5767271518707275} +02/26/2022 09:15:01 - INFO - codeparrot_training - Evaluating and saving model checkpoint