diff --git "a/train_cosine_uncond.out" "b/train_cosine_uncond.out" new file mode 100644--- /dev/null +++ "b/train_cosine_uncond.out" @@ -0,0 +1,11398 @@ +/athenahomes/gabrijel/miniconda3/envs/track-generator/lib/python3.11/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: '/athenahomes/gabrijel/miniconda3/envs/track-generator/lib/python3.11/site-packages/torchvision/image.so: undefined symbol: _ZN3c1017RegisterOperatorsD1Ev'If you don't plan on using image functionality from `torchvision.io`, you can ignore this warning. Otherwise, there might be something wrong with your environment. Did you have `libjpeg` or `libpng` installed before building `torchvision` from source? + warn( +Schedule: cosine +Cfg: False +Output path: /scratch/shared/beegfs/gabrijel/m2l/mini +Patch Size: 4 +Device: cuda:3 +===================================================================================== +Layer (type:depth-idx) Param # +===================================================================================== +DiT 18,816 +├─PatchEmbed: 1-1 -- +│ └─Conv2d: 2-1 6,528 +├─TimestepEmbedder: 1-2 -- +│ └─Mlp: 2-2 -- +│ │ └─Linear: 3-1 98,688 +│ │ └─SiLU: 3-2 -- +│ │ └─Linear: 3-3 147,840 +├─ModuleList: 1-3 -- +│ └─DiTBlock: 2-3 -- +│ │ └─LayerNorm: 3-4 -- +│ │ └─MultiheadAttention: 3-5 591,360 +│ │ └─LayerNorm: 3-6 -- +│ │ └─Mlp: 3-7 1,181,568 +│ │ └─Sequential: 3-8 887,040 +│ └─DiTBlock: 2-4 -- +│ │ └─LayerNorm: 3-9 -- +│ │ └─MultiheadAttention: 3-10 591,360 +│ │ └─LayerNorm: 3-11 -- +│ │ └─Mlp: 3-12 1,181,568 +│ │ └─Sequential: 3-13 887,040 +│ └─DiTBlock: 2-5 -- +│ │ └─LayerNorm: 3-14 -- +│ │ └─MultiheadAttention: 3-15 591,360 +│ │ └─LayerNorm: 3-16 -- +│ │ └─Mlp: 3-17 1,181,568 +│ │ └─Sequential: 3-18 887,040 +│ └─DiTBlock: 2-6 -- +│ │ └─LayerNorm: 3-19 -- +│ │ └─MultiheadAttention: 3-20 591,360 +│ │ └─LayerNorm: 3-21 -- +│ │ └─Mlp: 3-22 1,181,568 +│ │ └─Sequential: 3-23 887,040 +│ └─DiTBlock: 2-7 -- +│ │ └─LayerNorm: 3-24 -- +│ │ └─MultiheadAttention: 3-25 591,360 +│ │ └─LayerNorm: 3-26 -- +│ │ └─Mlp: 3-27 1,181,568 +│ │ └─Sequential: 3-28 887,040 +│ └─DiTBlock: 2-8 -- +│ │ └─LayerNorm: 3-29 -- +│ │ └─MultiheadAttention: 3-30 591,360 +│ │ └─LayerNorm: 3-31 -- +│ │ └─Mlp: 3-32 1,181,568 +│ │ └─Sequential: 3-33 887,040 +├─FinalLayer: 1-4 -- +│ └─LayerNorm: 2-9 -- +│ └─Linear: 2-10 6,160 +│ └─Sequential: 2-11 -- +│ │ └─SiLU: 3-34 -- +│ │ └─Linear: 3-35 295,680 +├─Unpatchify: 1-5 -- +===================================================================================== +Total params: 16,533,520 +Trainable params: 16,514,704 +Non-trainable params: 18,816 +===================================================================================== + +EPOCH: 1 +Loss at step 0: 1.003037691116333 +Loss at step 50: 0.27661845088005066 +Loss at step 100: 0.1424816995859146 +Loss at step 150: 0.149506077170372 +Loss at step 200: 0.12479458749294281 +Loss at step 250: 0.11426719278097153 +Loss at step 300: 0.1121714636683464 +Loss at step 350: 0.1525452435016632 +Loss at step 400: 0.13195544481277466 +Loss at step 450: 0.13427531719207764 +Loss at step 500: 0.11618398129940033 +Loss at step 550: 0.10502675175666809 +Loss at step 600: 0.0915534719824791 +Loss at step 650: 0.09573911130428314 +Loss at step 700: 0.1133173331618309 +Loss at step 750: 0.1329326033592224 +Loss at step 800: 0.10761799663305283 +Loss at step 850: 0.11455381661653519 +Loss at step 900: 0.12381163984537125 +Mean training loss after epoch 1: 0.1539469359716627 + +EPOCH: 2 +Loss at step 0: 0.10565946251153946 +Loss at step 50: 0.10705439746379852 +Loss at step 100: 0.10383749008178711 +Loss at step 150: 0.06799700856208801 +Loss at step 200: 0.10335274040699005 +Loss at step 250: 0.06992191821336746 +Loss at step 300: 0.08639557659626007 +Loss at step 350: 0.075557179749012 +Loss at step 400: 0.08633825182914734 +Loss at step 450: 0.07788940519094467 +Loss at step 500: 0.07782382518053055 +Loss at step 550: 0.08034727722406387 +Loss at step 600: 0.08342839032411575 +Loss at step 650: 0.07229728251695633 +Loss at step 700: 0.1125161275267601 +Loss at step 750: 0.07861600071191788 +Loss at step 800: 0.07409120351076126 +Loss at step 850: 0.078849658370018 +Loss at step 900: 0.094577357172966 +Mean training loss after epoch 2: 0.0890754868369748 + +EPOCH: 3 +Loss at step 0: 0.09418325871229172 +Loss at step 50: 0.07725059241056442 +Loss at step 100: 0.07888046652078629 +Loss at step 150: 0.07631003856658936 +Loss at step 200: 0.08261995762586594 +Loss at step 250: 0.08296551555395126 +Loss at step 300: 0.06751654297113419 +Loss at step 350: 0.056437570601701736 +Loss at step 400: 0.08921732008457184 +Loss at step 450: 0.07375114411115646 +Loss at step 500: 0.0711858719587326 +Loss at step 550: 0.07505157589912415 +Loss at step 600: 0.08232348412275314 +Loss at step 650: 0.06679479032754898 +Loss at step 700: 0.08148904889822006 +Loss at step 750: 0.08080136030912399 +Loss at step 800: 0.08312830328941345 +Loss at step 850: 0.07133378833532333 +Loss at step 900: 0.09072605520486832 +Mean training loss after epoch 3: 0.0814713569544653 + +EPOCH: 4 +Loss at step 0: 0.07232233136892319 +Loss at step 50: 0.07750030606985092 +Loss at step 100: 0.07092481851577759 +Loss at step 150: 0.07385428249835968 +Loss at step 200: 0.0765489712357521 +Loss at step 250: 0.08501236885786057 +Loss at step 300: 0.0753922238945961 +Loss at step 350: 0.09025567770004272 +Loss at step 400: 0.07097692042589188 +Loss at step 450: 0.07708340883255005 +Loss at step 500: 0.09128562361001968 +Loss at step 550: 0.06018142029643059 +Loss at step 600: 0.09149488806724548 +Loss at step 650: 0.06561129540205002 +Loss at step 700: 0.06980118900537491 +Loss at step 750: 0.08550712466239929 +Loss at step 800: 0.09240806102752686 +Loss at step 850: 0.07110103964805603 +Loss at step 900: 0.07148086279630661 +Mean training loss after epoch 4: 0.0787551273812236 + +EPOCH: 5 +Loss at step 0: 0.06837005913257599 +Loss at step 50: 0.08118490874767303 +Loss at step 100: 0.07443185895681381 +Loss at step 150: 0.07052610069513321 +Loss at step 200: 0.06819907575845718 +Loss at step 250: 0.07496345043182373 +Loss at step 300: 0.06800892949104309 +Loss at step 350: 0.06302531063556671 +Loss at step 400: 0.08613581210374832 +Loss at step 450: 0.08716695010662079 +Loss at step 500: 0.06726646423339844 +Loss at step 550: 0.08373793959617615 +Loss at step 600: 0.06744256615638733 +Loss at step 650: 0.06358864903450012 +Loss at step 700: 0.07609168440103531 +Loss at step 750: 0.08561772853136063 +Loss at step 800: 0.08098775148391724 +Loss at step 850: 0.06596729904413223 +Loss at step 900: 0.09818398952484131 +Mean training loss after epoch 5: 0.07621333939132532 + +EPOCH: 6 +Loss at step 0: 0.07262420654296875 +Loss at step 50: 0.08250504732131958 +Loss at step 100: 0.0771770104765892 +Loss at step 150: 0.10342646390199661 +Loss at step 200: 0.058930426836013794 +Loss at step 250: 0.07842208445072174 +Loss at step 300: 0.07260201871395111 +Loss at step 350: 0.08667268604040146 +Loss at step 400: 0.07582581788301468 +Loss at step 450: 0.06609180569648743 +Loss at step 500: 0.06838462501764297 +Loss at step 550: 0.0686202421784401 +Loss at step 600: 0.07450604438781738 +Loss at step 650: 0.06995474547147751 +Loss at step 700: 0.09052643924951553 +Loss at step 750: 0.06410960853099823 +Loss at step 800: 0.06000955030322075 +Loss at step 850: 0.058537937700748444 +Loss at step 900: 0.05873030051589012 +Mean training loss after epoch 6: 0.07148564022296527 + +EPOCH: 7 +Loss at step 0: 0.08014527708292007 +Loss at step 50: 0.06883670389652252 +Loss at step 100: 0.05498388782143593 +Loss at step 150: 0.049575336277484894 +Loss at step 200: 0.07284802198410034 +Loss at step 250: 0.05627802759408951 +Loss at step 300: 0.08170035481452942 +Loss at step 350: 0.07256447523832321 +Loss at step 400: 0.0588550940155983 +Loss at step 450: 0.07103971391916275 +Loss at step 500: 0.07324987649917603 +Loss at step 550: 0.06593427062034607 +Loss at step 600: 0.0544898696243763 +Loss at step 650: 0.07234711199998856 +Loss at step 700: 0.0544438362121582 +Loss at step 750: 0.057596754282712936 +Loss at step 800: 0.05363701656460762 +Loss at step 850: 0.06061040610074997 +Loss at step 900: 0.050478413701057434 +Mean training loss after epoch 7: 0.061715417988360055 + +EPOCH: 8 +Loss at step 0: 0.06242280825972557 +Loss at step 50: 0.06322493404150009 +Loss at step 100: 0.06131700053811073 +Loss at step 150: 0.0666111558675766 +Loss at step 200: 0.05284688621759415 +Loss at step 250: 0.05521265044808388 +Loss at step 300: 0.0472453273832798 +Loss at step 350: 0.05958862975239754 +Loss at step 400: 0.05755053088068962 +Loss at step 450: 0.05801082402467728 +Loss at step 500: 0.05060916021466255 +Loss at step 550: 0.07028011977672577 +Loss at step 600: 0.06796812266111374 +Loss at step 650: 0.0757439136505127 +Loss at step 700: 0.07297901809215546 +Loss at step 750: 0.04786565154790878 +Loss at step 800: 0.04735714942216873 +Loss at step 850: 0.050904281437397 +Loss at step 900: 0.06405418366193771 +Mean training loss after epoch 8: 0.05874664374569586 + +EPOCH: 9 +Loss at step 0: 0.05896533280611038 +Loss at step 50: 0.045270130038261414 +Loss at step 100: 0.06076472997665405 +Loss at step 150: 0.06235165521502495 +Loss at step 200: 0.05219578742980957 +Loss at step 250: 0.08207492530345917 +Loss at step 300: 0.0685364380478859 +Loss at step 350: 0.05931883305311203 +Loss at step 400: 0.08336658030748367 +Loss at step 450: 0.0543605200946331 +Loss at step 500: 0.06624364852905273 +Loss at step 550: 0.05275069177150726 +Loss at step 600: 0.08901050686836243 +Loss at step 650: 0.059423163533210754 +Loss at step 700: 0.05075065791606903 +Loss at step 750: 0.06457599997520447 +Loss at step 800: 0.07323766499757767 +Loss at step 850: 0.05342152714729309 +Loss at step 900: 0.048495594412088394 +Mean training loss after epoch 9: 0.057180338576912626 + +EPOCH: 10 +Loss at step 0: 0.05593691021203995 +Loss at step 50: 0.0404839850962162 +Loss at step 100: 0.05115879699587822 +Loss at step 150: 0.06267639249563217 +Loss at step 200: 0.05636795982718468 +Loss at step 250: 0.04891287907958031 +Loss at step 300: 0.05107974261045456 +Loss at step 350: 0.04856284707784653 +Loss at step 400: 0.04412970319390297 +Loss at step 450: 0.060254551470279694 +Loss at step 500: 0.051702968776226044 +Loss at step 550: 0.05409117415547371 +Loss at step 600: 0.051070645451545715 +Loss at step 650: 0.052753277122974396 +Loss at step 700: 0.047309085726737976 +Loss at step 750: 0.06630449742078781 +Loss at step 800: 0.05757919326424599 +Loss at step 850: 0.06830720603466034 +Loss at step 900: 0.06392861902713776 +Mean training loss after epoch 10: 0.055850355824364276 + +EPOCH: 11 +Loss at step 0: 0.05673862248659134 +Loss at step 50: 0.07288111001253128 +Loss at step 100: 0.055279891937971115 +Loss at step 150: 0.0829085037112236 +Loss at step 200: 0.05173470079898834 +Loss at step 250: 0.07002110034227371 +Loss at step 300: 0.041786838322877884 +Loss at step 350: 0.05969792976975441 +Loss at step 400: 0.05089668184518814 +Loss at step 450: 0.05513685569167137 +Loss at step 500: 0.06826495379209518 +Loss at step 550: 0.050237782299518585 +Loss at step 600: 0.045402273535728455 +Loss at step 650: 0.037458840757608414 +Loss at step 700: 0.047057949006557465 +Loss at step 750: 0.04279854893684387 +Loss at step 800: 0.05576750636100769 +Loss at step 850: 0.06578527390956879 +Loss at step 900: 0.053406111896038055 +Mean training loss after epoch 11: 0.05481477373682741 + +EPOCH: 12 +Loss at step 0: 0.07697451114654541 +Loss at step 50: 0.07025545835494995 +Loss at step 100: 0.04223531857132912 +Loss at step 150: 0.042284440249204636 +Loss at step 200: 0.05219224467873573 +Loss at step 250: 0.04662042483687401 +Loss at step 300: 0.04564218968153 +Loss at step 350: 0.05778402090072632 +Loss at step 400: 0.04940832406282425 +Loss at step 450: 0.06825084984302521 +Loss at step 500: 0.047397445887327194 +Loss at step 550: 0.05655599385499954 +Loss at step 600: 0.038294047117233276 +Loss at step 650: 0.05758621171116829 +Loss at step 700: 0.05851387232542038 +Loss at step 750: 0.04812590777873993 +Loss at step 800: 0.05467564985156059 +Loss at step 850: 0.03674978390336037 +Loss at step 900: 0.048017699271440506 +Mean training loss after epoch 12: 0.05442187320719014 + +EPOCH: 13 +Loss at step 0: 0.05527647212147713 +Loss at step 50: 0.05649548023939133 +Loss at step 100: 0.043758414685726166 +Loss at step 150: 0.03759016841650009 +Loss at step 200: 0.04890500754117966 +Loss at step 250: 0.05938975140452385 +Loss at step 300: 0.06087874621152878 +Loss at step 350: 0.05134967714548111 +Loss at step 400: 0.05763311684131622 +Loss at step 450: 0.055892378091812134 +Loss at step 500: 0.0512239933013916 +Loss at step 550: 0.04450003057718277 +Loss at step 600: 0.04901410639286041 +Loss at step 650: 0.05313221737742424 +Loss at step 700: 0.05558817461133003 +Loss at step 750: 0.0535665825009346 +Loss at step 800: 0.08542494475841522 +Loss at step 850: 0.05859728157520294 +Loss at step 900: 0.06013469025492668 +Mean training loss after epoch 13: 0.05281311084530247 + +EPOCH: 14 +Loss at step 0: 0.0647217184305191 +Loss at step 50: 0.05076614022254944 +Loss at step 100: 0.03850740194320679 +Loss at step 150: 0.055288080126047134 +Loss at step 200: 0.040912121534347534 +Loss at step 250: 0.04788177087903023 +Loss at step 300: 0.06873010843992233 +Loss at step 350: 0.05255446210503578 +Loss at step 400: 0.0451323427259922 +Loss at step 450: 0.04819340631365776 +Loss at step 500: 0.059297483414411545 +Loss at step 550: 0.045939523726701736 +Loss at step 600: 0.06561025977134705 +Loss at step 650: 0.04447270184755325 +Loss at step 700: 0.06097499653697014 +Loss at step 750: 0.059666190296411514 +Loss at step 800: 0.04177537560462952 +Loss at step 850: 0.0449681282043457 +Loss at step 900: 0.05002983286976814 +Mean training loss after epoch 14: 0.052944248951256656 + +EPOCH: 15 +Loss at step 0: 0.037823475897312164 +Loss at step 50: 0.07952471077442169 +Loss at step 100: 0.04094187170267105 +Loss at step 150: 0.060179922729730606 +Loss at step 200: 0.04950674995779991 +Loss at step 250: 0.06051012873649597 +Loss at step 300: 0.051027752459049225 +Loss at step 350: 0.08003382384777069 +Loss at step 400: 0.054900161921978 +Loss at step 450: 0.03677508607506752 +Loss at step 500: 0.07743576914072037 +Loss at step 550: 0.05444803088903427 +Loss at step 600: 0.05422133579850197 +Loss at step 650: 0.04789598658680916 +Loss at step 700: 0.043794967234134674 +Loss at step 750: 0.04442184790968895 +Loss at step 800: 0.04888957738876343 +Loss at step 850: 0.03554798662662506 +Loss at step 900: 0.054901354014873505 +Mean training loss after epoch 15: 0.05222853389916135 + +EPOCH: 16 +Loss at step 0: 0.04499037191271782 +Loss at step 50: 0.04059568792581558 +Loss at step 100: 0.03880501538515091 +Loss at step 150: 0.04736030101776123 +Loss at step 200: 0.054259732365608215 +Loss at step 250: 0.056340526789426804 +Loss at step 300: 0.04246404767036438 +Loss at step 350: 0.05467449873685837 +Loss at step 400: 0.038072679191827774 +Loss at step 450: 0.05155707895755768 +Loss at step 500: 0.04925785958766937 +Loss at step 550: 0.04731006547808647 +Loss at step 600: 0.05020074173808098 +Loss at step 650: 0.04935452342033386 +Loss at step 700: 0.05027550831437111 +Loss at step 750: 0.04625604674220085 +Loss at step 800: 0.07112246006727219 +Loss at step 850: 0.04084265977144241 +Loss at step 900: 0.05504539981484413 +Mean training loss after epoch 16: 0.05157376733670103 + +EPOCH: 17 +Loss at step 0: 0.07536608725786209 +Loss at step 50: 0.042563579976558685 +Loss at step 100: 0.035157352685928345 +Loss at step 150: 0.05038196220993996 +Loss at step 200: 0.049875665456056595 +Loss at step 250: 0.03781870752573013 +Loss at step 300: 0.056721512228250504 +Loss at step 350: 0.04967416077852249 +Loss at step 400: 0.03947357088327408 +Loss at step 450: 0.053190600126981735 +Loss at step 500: 0.0426432341337204 +Loss at step 550: 0.05922308564186096 +Loss at step 600: 0.054390799254179 +Loss at step 650: 0.08069872856140137 +Loss at step 700: 0.04753753915429115 +Loss at step 750: 0.036334265023469925 +Loss at step 800: 0.04727853834629059 +Loss at step 850: 0.0459805391728878 +Loss at step 900: 0.0718865841627121 +Mean training loss after epoch 17: 0.05112017348392813 + +EPOCH: 18 +Loss at step 0: 0.04541482776403427 +Loss at step 50: 0.06357073783874512 +Loss at step 100: 0.05133732780814171 +Loss at step 150: 0.048947181552648544 +Loss at step 200: 0.04920993372797966 +Loss at step 250: 0.07129635661840439 +Loss at step 300: 0.041890259832143784 +Loss at step 350: 0.04461813345551491 +Loss at step 400: 0.06786632537841797 +Loss at step 450: 0.03864361345767975 +Loss at step 500: 0.05946999043226242 +Loss at step 550: 0.047408659011125565 +Loss at step 600: 0.04371762275695801 +Loss at step 650: 0.05137581750750542 +Loss at step 700: 0.039694201201200485 +Loss at step 750: 0.0421057753264904 +Loss at step 800: 0.056266531348228455 +Loss at step 850: 0.04970719292759895 +Loss at step 900: 0.04250839352607727 +Mean training loss after epoch 18: 0.05031145928002624 + +EPOCH: 19 +Loss at step 0: 0.04314997047185898 +Loss at step 50: 0.041050687432289124 +Loss at step 100: 0.04350142553448677 +Loss at step 150: 0.04735090211033821 +Loss at step 200: 0.042304959148168564 +Loss at step 250: 0.0446118600666523 +Loss at step 300: 0.057678062468767166 +Loss at step 350: 0.05146310478448868 +Loss at step 400: 0.04377828165888786 +Loss at step 450: 0.03493072837591171 +Loss at step 500: 0.06859597563743591 +Loss at step 550: 0.046260152012109756 +Loss at step 600: 0.043077658861875534 +Loss at step 650: 0.055410489439964294 +Loss at step 700: 0.05613521486520767 +Loss at step 750: 0.051349785178899765 +Loss at step 800: 0.04863078147172928 +Loss at step 850: 0.03905788064002991 +Loss at step 900: 0.050505876541137695 +Mean training loss after epoch 19: 0.04981041669662891 + +EPOCH: 20 +Loss at step 0: 0.05344609543681145 +Loss at step 50: 0.04180081561207771 +Loss at step 100: 0.04135894775390625 +Loss at step 150: 0.04881883040070534 +Loss at step 200: 0.05157637596130371 +Loss at step 250: 0.06887549161911011 +Loss at step 300: 0.04278310760855675 +Loss at step 350: 0.05660872906446457 +Loss at step 400: 0.04872731491923332 +Loss at step 450: 0.04364878311753273 +Loss at step 500: 0.06668026745319366 +Loss at step 550: 0.046359237283468246 +Loss at step 600: 0.03858613967895508 +Loss at step 650: 0.0463811419904232 +Loss at step 700: 0.0656844824552536 +Loss at step 750: 0.04059052839875221 +Loss at step 800: 0.04606105759739876 +Loss at step 850: 0.04518028721213341 +Loss at step 900: 0.0448128879070282 +Mean training loss after epoch 20: 0.04971912971088119 + +EPOCH: 21 +Loss at step 0: 0.04147127643227577 +Loss at step 50: 0.06674125045537949 +Loss at step 100: 0.046588215976953506 +Loss at step 150: 0.04992952197790146 +Loss at step 200: 0.05683210864663124 +Loss at step 250: 0.06446004658937454 +Loss at step 300: 0.07351561635732651 +Loss at step 350: 0.048821087926626205 +Loss at step 400: 0.06500155478715897 +Loss at step 450: 0.045991450548172 +Loss at step 500: 0.04785887897014618 +Loss at step 550: 0.048360276967287064 +Loss at step 600: 0.06245497241616249 +Loss at step 650: 0.04141472652554512 +Loss at step 700: 0.04987093433737755 +Loss at step 750: 0.0448041595518589 +Loss at step 800: 0.039200447499752045 +Loss at step 850: 0.061862435191869736 +Loss at step 900: 0.0374799408018589 +Mean training loss after epoch 21: 0.04948336795083622 + +EPOCH: 22 +Loss at step 0: 0.04231114313006401 +Loss at step 50: 0.051757242530584335 +Loss at step 100: 0.052523884922266006 +Loss at step 150: 0.04511893540620804 +Loss at step 200: 0.047847066074609756 +Loss at step 250: 0.03973916918039322 +Loss at step 300: 0.04602658003568649 +Loss at step 350: 0.04559744521975517 +Loss at step 400: 0.038105208426713943 +Loss at step 450: 0.04447225108742714 +Loss at step 500: 0.052992742508649826 +Loss at step 550: 0.04886539652943611 +Loss at step 600: 0.04174038767814636 +Loss at step 650: 0.0403764434158802 +Loss at step 700: 0.037883441895246506 +Loss at step 750: 0.04011991620063782 +Loss at step 800: 0.04163298383355141 +Loss at step 850: 0.0440143346786499 +Loss at step 900: 0.04741881787776947 +Mean training loss after epoch 22: 0.04886792954812045 + +EPOCH: 23 +Loss at step 0: 0.043948426842689514 +Loss at step 50: 0.04617948457598686 +Loss at step 100: 0.041180990636348724 +Loss at step 150: 0.07108350843191147 +Loss at step 200: 0.05402369797229767 +Loss at step 250: 0.04757066071033478 +Loss at step 300: 0.04691469669342041 +Loss at step 350: 0.056709371507167816 +Loss at step 400: 0.08028485625982285 +Loss at step 450: 0.03732737526297569 +Loss at step 500: 0.0625799298286438 +Loss at step 550: 0.06315683573484421 +Loss at step 600: 0.06575877964496613 +Loss at step 650: 0.03729895502328873 +Loss at step 700: 0.04726678505539894 +Loss at step 750: 0.052553772926330566 +Loss at step 800: 0.04982905834913254 +Loss at step 850: 0.04016149044036865 +Loss at step 900: 0.0371665395796299 +Mean training loss after epoch 23: 0.04921432044793929 + +EPOCH: 24 +Loss at step 0: 0.06877944618463516 +Loss at step 50: 0.04670701548457146 +Loss at step 100: 0.05103413760662079 +Loss at step 150: 0.04245966300368309 +Loss at step 200: 0.0422213077545166 +Loss at step 250: 0.05819988250732422 +Loss at step 300: 0.03808581456542015 +Loss at step 350: 0.044201094657182693 +Loss at step 400: 0.04023940488696098 +Loss at step 450: 0.03730885311961174 +Loss at step 500: 0.0679292157292366 +Loss at step 550: 0.04315111041069031 +Loss at step 600: 0.040241699665784836 +Loss at step 650: 0.048743754625320435 +Loss at step 700: 0.06133408471941948 +Loss at step 750: 0.035759568214416504 +Loss at step 800: 0.07543523609638214 +Loss at step 850: 0.041768599301576614 +Loss at step 900: 0.0580260194838047 +Mean training loss after epoch 24: 0.04907009113572045 + +EPOCH: 25 +Loss at step 0: 0.04890033230185509 +Loss at step 50: 0.04610893502831459 +Loss at step 100: 0.062320057302713394 +Loss at step 150: 0.04533512517809868 +Loss at step 200: 0.04547395929694176 +Loss at step 250: 0.05030956491827965 +Loss at step 300: 0.06314963847398758 +Loss at step 350: 0.04957621917128563 +Loss at step 400: 0.04225068539381027 +Loss at step 450: 0.060565605759620667 +Loss at step 500: 0.04107939824461937 +Loss at step 550: 0.04405638575553894 +Loss at step 600: 0.035555195063352585 +Loss at step 650: 0.04223763942718506 +Loss at step 700: 0.05385289713740349 +Loss at step 750: 0.04406140744686127 +Loss at step 800: 0.04753494635224342 +Loss at step 850: 0.03908270597457886 +Loss at step 900: 0.056173697113990784 +Mean training loss after epoch 25: 0.04854819401185205 + +EPOCH: 26 +Loss at step 0: 0.04513881728053093 +Loss at step 50: 0.04251381754875183 +Loss at step 100: 0.056283511221408844 +Loss at step 150: 0.03885107487440109 +Loss at step 200: 0.04387012869119644 +Loss at step 250: 0.08682840317487717 +Loss at step 300: 0.042384058237075806 +Loss at step 350: 0.04147539287805557 +Loss at step 400: 0.037699781358242035 +Loss at step 450: 0.08088959008455276 +Loss at step 500: 0.045566800981760025 +Loss at step 550: 0.04850791022181511 +Loss at step 600: 0.03581390157341957 +Loss at step 650: 0.036787454038858414 +Loss at step 700: 0.04770814999938011 +Loss at step 750: 0.044783566147089005 +Loss at step 800: 0.031522780656814575 +Loss at step 850: 0.04192390292882919 +Loss at step 900: 0.053765568882226944 +Mean training loss after epoch 26: 0.048748386362547684 + +EPOCH: 27 +Loss at step 0: 0.062231797724962234 +Loss at step 50: 0.05955164507031441 +Loss at step 100: 0.039682600647211075 +Loss at step 150: 0.06752344220876694 +Loss at step 200: 0.061354734003543854 +Loss at step 250: 0.037967052310705185 +Loss at step 300: 0.04776729643344879 +Loss at step 350: 0.0504353865981102 +Loss at step 400: 0.04052473604679108 +Loss at step 450: 0.05573800951242447 +Loss at step 500: 0.04068378359079361 +Loss at step 550: 0.03938471898436546 +Loss at step 600: 0.04409739375114441 +Loss at step 650: 0.06390330195426941 +Loss at step 700: 0.04190912842750549 +Loss at step 750: 0.04705492779612541 +Loss at step 800: 0.05942341312766075 +Loss at step 850: 0.054906755685806274 +Loss at step 900: 0.04091959819197655 +Mean training loss after epoch 27: 0.04785118063590102 + +EPOCH: 28 +Loss at step 0: 0.040065884590148926 +Loss at step 50: 0.041816335171461105 +Loss at step 100: 0.04768802598118782 +Loss at step 150: 0.04224841296672821 +Loss at step 200: 0.0600610077381134 +Loss at step 250: 0.04674086347222328 +Loss at step 300: 0.03749881684780121 +Loss at step 350: 0.05725927650928497 +Loss at step 400: 0.03463057056069374 +Loss at step 450: 0.04139943793416023 +Loss at step 500: 0.036785632371902466 +Loss at step 550: 0.037280261516571045 +Loss at step 600: 0.042486049234867096 +Loss at step 650: 0.039891812950372696 +Loss at step 700: 0.045055538415908813 +Loss at step 750: 0.044806648045778275 +Loss at step 800: 0.035296935588121414 +Loss at step 850: 0.03997200354933739 +Loss at step 900: 0.037372902035713196 +Mean training loss after epoch 28: 0.04802111568433771 + +EPOCH: 29 +Loss at step 0: 0.04029547795653343 +Loss at step 50: 0.0373159721493721 +Loss at step 100: 0.03195224702358246 +Loss at step 150: 0.03967324644327164 +Loss at step 200: 0.043493278324604034 +Loss at step 250: 0.0421735905110836 +Loss at step 300: 0.05834754556417465 +Loss at step 350: 0.04977051913738251 +Loss at step 400: 0.04890020564198494 +Loss at step 450: 0.06144270300865173 +Loss at step 500: 0.031416650861501694 +Loss at step 550: 0.05089873448014259 +Loss at step 600: 0.0559379979968071 +Loss at step 650: 0.0431060828268528 +Loss at step 700: 0.07970152795314789 +Loss at step 750: 0.056120067834854126 +Loss at step 800: 0.03743378818035126 +Loss at step 850: 0.04540432617068291 +Loss at step 900: 0.03750884160399437 +Mean training loss after epoch 29: 0.0479990868275163 + +EPOCH: 30 +Loss at step 0: 0.06987270712852478 +Loss at step 50: 0.04634237289428711 +Loss at step 100: 0.04522722586989403 +Loss at step 150: 0.037521373480558395 +Loss at step 200: 0.04052366316318512 +Loss at step 250: 0.047215189784765244 +Loss at step 300: 0.03429022431373596 +Loss at step 350: 0.042049288749694824 +Loss at step 400: 0.04067468270659447 +Loss at step 450: 0.04121248796582222 +Loss at step 500: 0.041117824614048004 +Loss at step 550: 0.04192543774843216 +Loss at step 600: 0.0396721288561821 +Loss at step 650: 0.0341549776494503 +Loss at step 700: 0.0468466654419899 +Loss at step 750: 0.047854892909526825 +Loss at step 800: 0.04301496222615242 +Loss at step 850: 0.06360778212547302 +Loss at step 900: 0.04396238178014755 +Mean training loss after epoch 30: 0.047711722947943055 + +EPOCH: 31 +Loss at step 0: 0.046192727982997894 +Loss at step 50: 0.056457825005054474 +Loss at step 100: 0.04437893256545067 +Loss at step 150: 0.03571672737598419 +Loss at step 200: 0.041461531072854996 +Loss at step 250: 0.038135726004838943 +Loss at step 300: 0.04517757520079613 +Loss at step 350: 0.03788148984313011 +Loss at step 400: 0.04530879482626915 +Loss at step 450: 0.04953639209270477 +Loss at step 500: 0.04382310435175896 +Loss at step 550: 0.04811768978834152 +Loss at step 600: 0.05481934919953346 +Loss at step 650: 0.04873424395918846 +Loss at step 700: 0.03968692570924759 +Loss at step 750: 0.0569743812084198 +Loss at step 800: 0.034903742372989655 +Loss at step 850: 0.041342843323946 +Loss at step 900: 0.04470614343881607 +Mean training loss after epoch 31: 0.04718852380135738 + +EPOCH: 32 +Loss at step 0: 0.04153997823596001 +Loss at step 50: 0.05805188789963722 +Loss at step 100: 0.041979819536209106 +Loss at step 150: 0.056203462183475494 +Loss at step 200: 0.0658581554889679 +Loss at step 250: 0.047670237720012665 +Loss at step 300: 0.04966076835989952 +Loss at step 350: 0.03970366343855858 +Loss at step 400: 0.04131742939352989 +Loss at step 450: 0.0559941828250885 +Loss at step 500: 0.0515303798019886 +Loss at step 550: 0.03879852220416069 +Loss at step 600: 0.04554179310798645 +Loss at step 650: 0.041446492075920105 +Loss at step 700: 0.045973826199769974 +Loss at step 750: 0.05146137624979019 +Loss at step 800: 0.050301820039749146 +Loss at step 850: 0.05406748503446579 +Loss at step 900: 0.047541599720716476 +Mean training loss after epoch 32: 0.04734661062555844 + +EPOCH: 33 +Loss at step 0: 0.04255426675081253 +Loss at step 50: 0.045197855681180954 +Loss at step 100: 0.04791636765003204 +Loss at step 150: 0.053649891167879105 +Loss at step 200: 0.04649053514003754 +Loss at step 250: 0.06254279613494873 +Loss at step 300: 0.04070403426885605 +Loss at step 350: 0.06130688264966011 +Loss at step 400: 0.033172477036714554 +Loss at step 450: 0.0585973784327507 +Loss at step 500: 0.03862085938453674 +Loss at step 550: 0.05066192150115967 +Loss at step 600: 0.04806030914187431 +Loss at step 650: 0.040029771625995636 +Loss at step 700: 0.05965670570731163 +Loss at step 750: 0.04943963512778282 +Loss at step 800: 0.04053017124533653 +Loss at step 850: 0.0498194620013237 +Loss at step 900: 0.0408606119453907 +Mean training loss after epoch 33: 0.0466234264577598 + +EPOCH: 34 +Loss at step 0: 0.03834282234311104 +Loss at step 50: 0.04476098716259003 +Loss at step 100: 0.03126645088195801 +Loss at step 150: 0.04436739161610603 +Loss at step 200: 0.035322658717632294 +Loss at step 250: 0.04000015929341316 +Loss at step 300: 0.03503677248954773 +Loss at step 350: 0.036471109837293625 +Loss at step 400: 0.041077565401792526 +Loss at step 450: 0.05009006708860397 +Loss at step 500: 0.03923311457037926 +Loss at step 550: 0.027845796197652817 +Loss at step 600: 0.044510193169116974 +Loss at step 650: 0.04472494497895241 +Loss at step 700: 0.036923471838235855 +Loss at step 750: 0.03947778046131134 +Loss at step 800: 0.04731610417366028 +Loss at step 850: 0.0408109687268734 +Loss at step 900: 0.04986851289868355 +Mean training loss after epoch 34: 0.046696712082224104 + +EPOCH: 35 +Loss at step 0: 0.05347377434372902 +Loss at step 50: 0.04718519747257233 +Loss at step 100: 0.03765302151441574 +Loss at step 150: 0.04094993695616722 +Loss at step 200: 0.03967397287487984 +Loss at step 250: 0.04097428172826767 +Loss at step 300: 0.044033098965883255 +Loss at step 350: 0.04170210286974907 +Loss at step 400: 0.0495026521384716 +Loss at step 450: 0.04580632597208023 +Loss at step 500: 0.03558526188135147 +Loss at step 550: 0.03231578320264816 +Loss at step 600: 0.03808858245611191 +Loss at step 650: 0.07824570685625076 +Loss at step 700: 0.054205626249313354 +Loss at step 750: 0.04045689105987549 +Loss at step 800: 0.05172043293714523 +Loss at step 850: 0.04387115314602852 +Loss at step 900: 0.041183892637491226 +Mean training loss after epoch 35: 0.046853596549123716 + +EPOCH: 36 +Loss at step 0: 0.033817023038864136 +Loss at step 50: 0.044775836169719696 +Loss at step 100: 0.04400419443845749 +Loss at step 150: 0.03964809328317642 +Loss at step 200: 0.0711473822593689 +Loss at step 250: 0.05198086053133011 +Loss at step 300: 0.04352946951985359 +Loss at step 350: 0.04083481431007385 +Loss at step 400: 0.05258826166391373 +Loss at step 450: 0.05412430688738823 +Loss at step 500: 0.036205705255270004 +Loss at step 550: 0.051293227821588516 +Loss at step 600: 0.039234597235918045 +Loss at step 650: 0.04175081476569176 +Loss at step 700: 0.044233884662389755 +Loss at step 750: 0.047170866280794144 +Loss at step 800: 0.06287672370672226 +Loss at step 850: 0.045048587024211884 +Loss at step 900: 0.047904081642627716 +Mean training loss after epoch 36: 0.04643794561007511 + +EPOCH: 37 +Loss at step 0: 0.04380636289715767 +Loss at step 50: 0.05075736716389656 +Loss at step 100: 0.03939420357346535 +Loss at step 150: 0.041874587535858154 +Loss at step 200: 0.03474506363272667 +Loss at step 250: 0.04335370287299156 +Loss at step 300: 0.03736516833305359 +Loss at step 350: 0.04953424260020256 +Loss at step 400: 0.034158531576395035 +Loss at step 450: 0.09222576022148132 +Loss at step 500: 0.03409889340400696 +Loss at step 550: 0.05518524721264839 +Loss at step 600: 0.04299849644303322 +Loss at step 650: 0.03976324200630188 +Loss at step 700: 0.04704044759273529 +Loss at step 750: 0.05765819549560547 +Loss at step 800: 0.06325271725654602 +Loss at step 850: 0.041912227869033813 +Loss at step 900: 0.03947821259498596 +Mean training loss after epoch 37: 0.046509189958146006 + +EPOCH: 38 +Loss at step 0: 0.04105685278773308 +Loss at step 50: 0.03853827714920044 +Loss at step 100: 0.0615091510117054 +Loss at step 150: 0.037095218896865845 +Loss at step 200: 0.07169077545404434 +Loss at step 250: 0.04093547165393829 +Loss at step 300: 0.03914790228009224 +Loss at step 350: 0.041914209723472595 +Loss at step 400: 0.04600575566291809 +Loss at step 450: 0.04167331010103226 +Loss at step 500: 0.05215302109718323 +Loss at step 550: 0.057997964322566986 +Loss at step 600: 0.04561407491564751 +Loss at step 650: 0.035688042640686035 +Loss at step 700: 0.041937340050935745 +Loss at step 750: 0.042008861899375916 +Loss at step 800: 0.043819013983011246 +Loss at step 850: 0.06488332897424698 +Loss at step 900: 0.04783814772963524 +Mean training loss after epoch 38: 0.04656100857343628 + +EPOCH: 39 +Loss at step 0: 0.046811822801828384 +Loss at step 50: 0.03458044305443764 +Loss at step 100: 0.03519456088542938 +Loss at step 150: 0.04023684188723564 +Loss at step 200: 0.0593542717397213 +Loss at step 250: 0.04351789876818657 +Loss at step 300: 0.03666255623102188 +Loss at step 350: 0.042385295033454895 +Loss at step 400: 0.04535805433988571 +Loss at step 450: 0.040133584290742874 +Loss at step 500: 0.04300827905535698 +Loss at step 550: 0.03428584709763527 +Loss at step 600: 0.043937813490629196 +Loss at step 650: 0.04107518866658211 +Loss at step 700: 0.0656203031539917 +Loss at step 750: 0.04333982616662979 +Loss at step 800: 0.04911601543426514 +Loss at step 850: 0.04030092433094978 +Loss at step 900: 0.037120744585990906 +Mean training loss after epoch 39: 0.045584303543352876 + +EPOCH: 40 +Loss at step 0: 0.043902307748794556 +Loss at step 50: 0.04687623307108879 +Loss at step 100: 0.02807818166911602 +Loss at step 150: 0.0314083956182003 +Loss at step 200: 0.03794383257627487 +Loss at step 250: 0.06447502225637436 +Loss at step 300: 0.03832196444272995 +Loss at step 350: 0.04955507069826126 +Loss at step 400: 0.04458783194422722 +Loss at step 450: 0.06182854250073433 +Loss at step 500: 0.034158360213041306 +Loss at step 550: 0.05126744136214256 +Loss at step 600: 0.04173079878091812 +Loss at step 650: 0.047507818788290024 +Loss at step 700: 0.039553675800561905 +Loss at step 750: 0.036442361772060394 +Loss at step 800: 0.03931163251399994 +Loss at step 850: 0.03706500306725502 +Loss at step 900: 0.041083257645368576 +Mean training loss after epoch 40: 0.04583525314712639 + +EPOCH: 41 +Loss at step 0: 0.04119272530078888 +Loss at step 50: 0.03651197627186775 +Loss at step 100: 0.04471709579229355 +Loss at step 150: 0.047409866005182266 +Loss at step 200: 0.057044386863708496 +Loss at step 250: 0.03891262039542198 +Loss at step 300: 0.058914415538311005 +Loss at step 350: 0.04932182654738426 +Loss at step 400: 0.03527601808309555 +Loss at step 450: 0.05954860895872116 +Loss at step 500: 0.05854950472712517 +Loss at step 550: 0.04339790716767311 +Loss at step 600: 0.04158823564648628 +Loss at step 650: 0.07653175294399261 +Loss at step 700: 0.06495469808578491 +Loss at step 750: 0.043791264295578 +Loss at step 800: 0.036664631217718124 +Loss at step 850: 0.04060354083776474 +Loss at step 900: 0.05105862393975258 +Mean training loss after epoch 41: 0.04578510606919588 + +EPOCH: 42 +Loss at step 0: 0.036811619997024536 +Loss at step 50: 0.03967383876442909 +Loss at step 100: 0.046618491411209106 +Loss at step 150: 0.04137549176812172 +Loss at step 200: 0.04536155238747597 +Loss at step 250: 0.0344829335808754 +Loss at step 300: 0.04808695986866951 +Loss at step 350: 0.034518640488386154 +Loss at step 400: 0.040801674127578735 +Loss at step 450: 0.04482355713844299 +Loss at step 500: 0.05345488339662552 +Loss at step 550: 0.042088404297828674 +Loss at step 600: 0.03969200327992439 +Loss at step 650: 0.03745196387171745 +Loss at step 700: 0.05043499544262886 +Loss at step 750: 0.03882772848010063 +Loss at step 800: 0.040638964623212814 +Loss at step 850: 0.059426199644804 +Loss at step 900: 0.055314578115940094 +Mean training loss after epoch 42: 0.0457447647595647 + +EPOCH: 43 +Loss at step 0: 0.039296090602874756 +Loss at step 50: 0.03757215291261673 +Loss at step 100: 0.045208077877759933 +Loss at step 150: 0.05504849553108215 +Loss at step 200: 0.03858547285199165 +Loss at step 250: 0.052830155938863754 +Loss at step 300: 0.03214762732386589 +Loss at step 350: 0.054610736668109894 +Loss at step 400: 0.072967529296875 +Loss at step 450: 0.045324429869651794 +Loss at step 500: 0.05956553295254707 +Loss at step 550: 0.040348950773477554 +Loss at step 600: 0.038659922778606415 +Loss at step 650: 0.04288933053612709 +Loss at step 700: 0.05102803558111191 +Loss at step 750: 0.04348177835345268 +Loss at step 800: 0.03679901733994484 +Loss at step 850: 0.04044809564948082 +Loss at step 900: 0.05503632500767708 +Mean training loss after epoch 43: 0.04555305257550816 + +EPOCH: 44 +Loss at step 0: 0.03301229327917099 +Loss at step 50: 0.03727089241147041 +Loss at step 100: 0.042083825916051865 +Loss at step 150: 0.03446154296398163 +Loss at step 200: 0.0325627475976944 +Loss at step 250: 0.03536694869399071 +Loss at step 300: 0.03226042538881302 +Loss at step 350: 0.029795240610837936 +Loss at step 400: 0.06270813196897507 +Loss at step 450: 0.052609071135520935 +Loss at step 500: 0.04123945161700249 +Loss at step 550: 0.04485645145177841 +Loss at step 600: 0.03443359211087227 +Loss at step 650: 0.05156237632036209 +Loss at step 700: 0.05249813199043274 +Loss at step 750: 0.04118892922997475 +Loss at step 800: 0.042128149420022964 +Loss at step 850: 0.05222824215888977 +Loss at step 900: 0.04898892343044281 +Mean training loss after epoch 44: 0.045909754842567416 + +EPOCH: 45 +Loss at step 0: 0.03726803511381149 +Loss at step 50: 0.041821129620075226 +Loss at step 100: 0.04788229241967201 +Loss at step 150: 0.07195736467838287 +Loss at step 200: 0.04190758243203163 +Loss at step 250: 0.04406964033842087 +Loss at step 300: 0.045066285878419876 +Loss at step 350: 0.08542950451374054 +Loss at step 400: 0.036356471478939056 +Loss at step 450: 0.05726455897092819 +Loss at step 500: 0.04086785390973091 +Loss at step 550: 0.04460451379418373 +Loss at step 600: 0.04124069958925247 +Loss at step 650: 0.03595242276787758 +Loss at step 700: 0.05711403861641884 +Loss at step 750: 0.04426044225692749 +Loss at step 800: 0.03155608847737312 +Loss at step 850: 0.04087338596582413 +Loss at step 900: 0.03957609087228775 +Mean training loss after epoch 45: 0.04559232201625798 + +EPOCH: 46 +Loss at step 0: 0.03411095589399338 +Loss at step 50: 0.04670671746134758 +Loss at step 100: 0.05237060412764549 +Loss at step 150: 0.036262497305870056 +Loss at step 200: 0.057636260986328125 +Loss at step 250: 0.04624015465378761 +Loss at step 300: 0.0441092886030674 +Loss at step 350: 0.053948499262332916 +Loss at step 400: 0.0504290908575058 +Loss at step 450: 0.058462247252464294 +Loss at step 500: 0.03294298052787781 +Loss at step 550: 0.03885740414261818 +Loss at step 600: 0.03582470864057541 +Loss at step 650: 0.03290253505110741 +Loss at step 700: 0.034320082515478134 +Loss at step 750: 0.04563366621732712 +Loss at step 800: 0.037790633738040924 +Loss at step 850: 0.04004897549748421 +Loss at step 900: 0.05734499916434288 +Mean training loss after epoch 46: 0.045401750487892995 + +EPOCH: 47 +Loss at step 0: 0.057515017688274384 +Loss at step 50: 0.05918058380484581 +Loss at step 100: 0.06794729083776474 +Loss at step 150: 0.04365697130560875 +Loss at step 200: 0.03533811867237091 +Loss at step 250: 0.0432097464799881 +Loss at step 300: 0.04337077960371971 +Loss at step 350: 0.05455371364951134 +Loss at step 400: 0.0480773001909256 +Loss at step 450: 0.03549404814839363 +Loss at step 500: 0.04109460487961769 +Loss at step 550: 0.05278811976313591 +Loss at step 600: 0.044051751494407654 +Loss at step 650: 0.03891625255346298 +Loss at step 700: 0.033852893859148026 +Loss at step 750: 0.05946379527449608 +Loss at step 800: 0.03839148208498955 +Loss at step 850: 0.05594377964735031 +Loss at step 900: 0.04837467148900032 +Mean training loss after epoch 47: 0.04540569746672218 + +EPOCH: 48 +Loss at step 0: 0.03606181964278221 +Loss at step 50: 0.0393998920917511 +Loss at step 100: 0.0373966284096241 +Loss at step 150: 0.03726385161280632 +Loss at step 200: 0.053862374275922775 +Loss at step 250: 0.039281781762838364 +Loss at step 300: 0.052047569304704666 +Loss at step 350: 0.04218338802456856 +Loss at step 400: 0.054648831486701965 +Loss at step 450: 0.03355717286467552 +Loss at step 500: 0.04461154714226723 +Loss at step 550: 0.05380718782544136 +Loss at step 600: 0.03750714287161827 +Loss at step 650: 0.04356677457690239 +Loss at step 700: 0.056141939014196396 +Loss at step 750: 0.05722740665078163 +Loss at step 800: 0.05378589779138565 +Loss at step 850: 0.04318094253540039 +Loss at step 900: 0.03716384992003441 +Mean training loss after epoch 48: 0.04575384488261775 + +EPOCH: 49 +Loss at step 0: 0.03405068814754486 +Loss at step 50: 0.0324208065867424 +Loss at step 100: 0.058766111731529236 +Loss at step 150: 0.050890326499938965 +Loss at step 200: 0.07835625112056732 +Loss at step 250: 0.06103396788239479 +Loss at step 300: 0.045262910425662994 +Loss at step 350: 0.039874423295259476 +Loss at step 400: 0.04232581704854965 +Loss at step 450: 0.0406830869615078 +Loss at step 500: 0.034022778272628784 +Loss at step 550: 0.042880669236183167 +Loss at step 600: 0.04260297492146492 +Loss at step 650: 0.03611469268798828 +Loss at step 700: 0.04044531658291817 +Loss at step 750: 0.04906051978468895 +Loss at step 800: 0.039296798408031464 +Loss at step 850: 0.07345636188983917 +Loss at step 900: 0.03570806607604027 +Mean training loss after epoch 49: 0.04555136628591938 + +EPOCH: 50 +Loss at step 0: 0.03376990929245949 +Loss at step 50: 0.03915917128324509 +Loss at step 100: 0.045188069343566895 +Loss at step 150: 0.03396744653582573 +Loss at step 200: 0.037946101278066635 +Loss at step 250: 0.048230335116386414 +Loss at step 300: 0.03957950696349144 +Loss at step 350: 0.06345433741807938 +Loss at step 400: 0.05446555092930794 +Loss at step 450: 0.041967470198869705 +Loss at step 500: 0.06003414839506149 +Loss at step 550: 0.04475271329283714 +Loss at step 600: 0.0462694950401783 +Loss at step 650: 0.03884352743625641 +Loss at step 700: 0.03437794744968414 +Loss at step 750: 0.03485594317317009 +Loss at step 800: 0.03861059248447418 +Loss at step 850: 0.04278019815683365 +Loss at step 900: 0.036629848182201385 +Mean training loss after epoch 50: 0.04574327027675376 + +EPOCH: 51 +Loss at step 0: 0.04773973673582077 +Loss at step 50: 0.04592078924179077 +Loss at step 100: 0.04153735190629959 +Loss at step 150: 0.04289252310991287 +Loss at step 200: 0.05378266051411629 +Loss at step 250: 0.05587004870176315 +Loss at step 300: 0.04163934290409088 +Loss at step 350: 0.0363738052546978 +Loss at step 400: 0.037308674305677414 +Loss at step 450: 0.03993014246225357 +Loss at step 500: 0.038397662341594696 +Loss at step 550: 0.03834199905395508 +Loss at step 600: 0.04541454836726189 +Loss at step 650: 0.03435764089226723 +Loss at step 700: 0.049492184072732925 +Loss at step 750: 0.06151849403977394 +Loss at step 800: 0.04823997616767883 +Loss at step 850: 0.05454571172595024 +Loss at step 900: 0.05095576122403145 +Mean training loss after epoch 51: 0.04479894658792883 + +EPOCH: 52 +Loss at step 0: 0.05793466791510582 +Loss at step 50: 0.0341513492166996 +Loss at step 100: 0.031205492094159126 +Loss at step 150: 0.04038567468523979 +Loss at step 200: 0.04924270883202553 +Loss at step 250: 0.03751857206225395 +Loss at step 300: 0.03824424743652344 +Loss at step 350: 0.03565317019820213 +Loss at step 400: 0.03829163685441017 +Loss at step 450: 0.04630826786160469 +Loss at step 500: 0.036059457808732986 +Loss at step 550: 0.0380818210542202 +Loss at step 600: 0.05621505156159401 +Loss at step 650: 0.04004345089197159 +Loss at step 700: 0.04257235303521156 +Loss at step 750: 0.050609856843948364 +Loss at step 800: 0.03490433469414711 +Loss at step 850: 0.06108823046088219 +Loss at step 900: 0.03759215399622917 +Mean training loss after epoch 52: 0.04466682242781623 + +EPOCH: 53 +Loss at step 0: 0.07153818756341934 +Loss at step 50: 0.037624310702085495 +Loss at step 100: 0.04795879125595093 +Loss at step 150: 0.042059168219566345 +Loss at step 200: 0.04592517390847206 +Loss at step 250: 0.047940898686647415 +Loss at step 300: 0.03760400414466858 +Loss at step 350: 0.058457329869270325 +Loss at step 400: 0.04059361293911934 +Loss at step 450: 0.03498782962560654 +Loss at step 500: 0.03895006701350212 +Loss at step 550: 0.03143053501844406 +Loss at step 600: 0.04286786541342735 +Loss at step 650: 0.02890370972454548 +Loss at step 700: 0.03054078109562397 +Loss at step 750: 0.0536319725215435 +Loss at step 800: 0.03379025310277939 +Loss at step 850: 0.041429825127124786 +Loss at step 900: 0.036586616188287735 +Mean training loss after epoch 53: 0.045109023038210516 + +EPOCH: 54 +Loss at step 0: 0.034632205963134766 +Loss at step 50: 0.050900544971227646 +Loss at step 100: 0.055058639496564865 +Loss at step 150: 0.03736027702689171 +Loss at step 200: 0.03886985033750534 +Loss at step 250: 0.04547104611992836 +Loss at step 300: 0.03748707473278046 +Loss at step 350: 0.04005163908004761 +Loss at step 400: 0.04667902737855911 +Loss at step 450: 0.058137696236371994 +Loss at step 500: 0.036225397139787674 +Loss at step 550: 0.04980180785059929 +Loss at step 600: 0.0319833941757679 +Loss at step 650: 0.04132211208343506 +Loss at step 700: 0.031180264428257942 +Loss at step 750: 0.055976614356040955 +Loss at step 800: 0.040415141731500626 +Loss at step 850: 0.04537496343255043 +Loss at step 900: 0.06559032946825027 +Mean training loss after epoch 54: 0.0447263324688842 + +EPOCH: 55 +Loss at step 0: 0.03894679620862007 +Loss at step 50: 0.055382825434207916 +Loss at step 100: 0.05500679835677147 +Loss at step 150: 0.03706794232130051 +Loss at step 200: 0.03788752481341362 +Loss at step 250: 0.044823382049798965 +Loss at step 300: 0.04081863537430763 +Loss at step 350: 0.051558658480644226 +Loss at step 400: 0.04245118051767349 +Loss at step 450: 0.032435957342386246 +Loss at step 500: 0.043013881891965866 +Loss at step 550: 0.06566750258207321 +Loss at step 600: 0.03832860663533211 +Loss at step 650: 0.04228802025318146 +Loss at step 700: 0.06671218574047089 +Loss at step 750: 0.03785625100135803 +Loss at step 800: 0.036910444498062134 +Loss at step 850: 0.0428953543305397 +Loss at step 900: 0.04201735183596611 +Mean training loss after epoch 55: 0.0454020186234067 + +EPOCH: 56 +Loss at step 0: 0.04074004292488098 +Loss at step 50: 0.037089236080646515 +Loss at step 100: 0.038650039583444595 +Loss at step 150: 0.0470503568649292 +Loss at step 200: 0.0335656963288784 +Loss at step 250: 0.05130336433649063 +Loss at step 300: 0.041321609169244766 +Loss at step 350: 0.05360228195786476 +Loss at step 400: 0.054425470530986786 +Loss at step 450: 0.034286580979824066 +Loss at step 500: 0.05873318016529083 +Loss at step 550: 0.04183107987046242 +Loss at step 600: 0.04275026172399521 +Loss at step 650: 0.036619555205106735 +Loss at step 700: 0.04658738896250725 +Loss at step 750: 0.044796522706747055 +Loss at step 800: 0.06018178537487984 +Loss at step 850: 0.06020748242735863 +Loss at step 900: 0.04875624179840088 +Mean training loss after epoch 56: 0.04514047994947573 + +EPOCH: 57 +Loss at step 0: 0.037959903478622437 +Loss at step 50: 0.045734651386737823 +Loss at step 100: 0.03870757669210434 +Loss at step 150: 0.053085923194885254 +Loss at step 200: 0.043345555663108826 +Loss at step 250: 0.04010351002216339 +Loss at step 300: 0.036371856927871704 +Loss at step 350: 0.037596240639686584 +Loss at step 400: 0.03472841903567314 +Loss at step 450: 0.043242860585451126 +Loss at step 500: 0.04697151854634285 +Loss at step 550: 0.03386232256889343 +Loss at step 600: 0.05848577991127968 +Loss at step 650: 0.037965212017297745 +Loss at step 700: 0.0595490001142025 +Loss at step 750: 0.07245443761348724 +Loss at step 800: 0.04417521506547928 +Loss at step 850: 0.03808566927909851 +Loss at step 900: 0.040217895060777664 +Mean training loss after epoch 57: 0.04452560655772686 + +EPOCH: 58 +Loss at step 0: 0.04217497631907463 +Loss at step 50: 0.04118611663579941 +Loss at step 100: 0.03760745748877525 +Loss at step 150: 0.03788227215409279 +Loss at step 200: 0.036340679973363876 +Loss at step 250: 0.044277723878622055 +Loss at step 300: 0.07555166631937027 +Loss at step 350: 0.039096567779779434 +Loss at step 400: 0.0501922108232975 +Loss at step 450: 0.04151548445224762 +Loss at step 500: 0.04863559827208519 +Loss at step 550: 0.04006786644458771 +Loss at step 600: 0.03473867475986481 +Loss at step 650: 0.03917982801795006 +Loss at step 700: 0.038434259593486786 +Loss at step 750: 0.04143731668591499 +Loss at step 800: 0.04229950159788132 +Loss at step 850: 0.04580509290099144 +Loss at step 900: 0.04491458460688591 +Mean training loss after epoch 58: 0.044204890307015195 + +EPOCH: 59 +Loss at step 0: 0.04443920776247978 +Loss at step 50: 0.038615334779024124 +Loss at step 100: 0.044301483780145645 +Loss at step 150: 0.04094228893518448 +Loss at step 200: 0.039516597986221313 +Loss at step 250: 0.04826976731419563 +Loss at step 300: 0.06281120330095291 +Loss at step 350: 0.04962306469678879 +Loss at step 400: 0.0349610410630703 +Loss at step 450: 0.04815901815891266 +Loss at step 500: 0.04071445018053055 +Loss at step 550: 0.03885610029101372 +Loss at step 600: 0.04688568040728569 +Loss at step 650: 0.03309250622987747 +Loss at step 700: 0.0372539721429348 +Loss at step 750: 0.06692992895841599 +Loss at step 800: 0.05041303485631943 +Loss at step 850: 0.049872804433107376 +Loss at step 900: 0.036385852843523026 +Mean training loss after epoch 59: 0.044467081796369955 + +EPOCH: 60 +Loss at step 0: 0.04014415666460991 +Loss at step 50: 0.08164425194263458 +Loss at step 100: 0.039430782198905945 +Loss at step 150: 0.03921978920698166 +Loss at step 200: 0.04293350130319595 +Loss at step 250: 0.04878218099474907 +Loss at step 300: 0.039910636842250824 +Loss at step 350: 0.042387500405311584 +Loss at step 400: 0.056811921298503876 +Loss at step 450: 0.03985873982310295 +Loss at step 500: 0.046190857887268066 +Loss at step 550: 0.04720006883144379 +Loss at step 600: 0.04626326635479927 +Loss at step 650: 0.05943738669157028 +Loss at step 700: 0.07274050265550613 +Loss at step 750: 0.03711295872926712 +Loss at step 800: 0.04901483654975891 +Loss at step 850: 0.05160197988152504 +Loss at step 900: 0.04616200923919678 +Mean training loss after epoch 60: 0.04484942184487131 + +EPOCH: 61 +Loss at step 0: 0.034708376973867416 +Loss at step 50: 0.04810373857617378 +Loss at step 100: 0.031009318307042122 +Loss at step 150: 0.04073278605937958 +Loss at step 200: 0.037685640156269073 +Loss at step 250: 0.03710269555449486 +Loss at step 300: 0.03902563825249672 +Loss at step 350: 0.03862561285495758 +Loss at step 400: 0.04300009831786156 +Loss at step 450: 0.059608832001686096 +Loss at step 500: 0.041195113211870193 +Loss at step 550: 0.05418688431382179 +Loss at step 600: 0.054096776992082596 +Loss at step 650: 0.040527116507291794 +Loss at step 700: 0.05888514593243599 +Loss at step 750: 0.053690355271101 +Loss at step 800: 0.04354054108262062 +Loss at step 850: 0.03404700756072998 +Loss at step 900: 0.06319855898618698 +Mean training loss after epoch 61: 0.04449913012924225 + +EPOCH: 62 +Loss at step 0: 0.038908716291189194 +Loss at step 50: 0.034311648458242416 +Loss at step 100: 0.05498092994093895 +Loss at step 150: 0.0483626127243042 +Loss at step 200: 0.053626175969839096 +Loss at step 250: 0.07578056305646896 +Loss at step 300: 0.03906629979610443 +Loss at step 350: 0.04471297562122345 +Loss at step 400: 0.05710723251104355 +Loss at step 450: 0.06131591647863388 +Loss at step 500: 0.03654533997178078 +Loss at step 550: 0.03932672366499901 +Loss at step 600: 0.03606953099370003 +Loss at step 650: 0.03935694694519043 +Loss at step 700: 0.05097009614109993 +Loss at step 750: 0.05328104645013809 +Loss at step 800: 0.04115912318229675 +Loss at step 850: 0.05281233787536621 +Loss at step 900: 0.041887253522872925 +Mean training loss after epoch 62: 0.043596246160630354 + +EPOCH: 63 +Loss at step 0: 0.029015716165304184 +Loss at step 50: 0.04414261877536774 +Loss at step 100: 0.03649734705686569 +Loss at step 150: 0.03882032260298729 +Loss at step 200: 0.04342050477862358 +Loss at step 250: 0.04559728503227234 +Loss at step 300: 0.05425933748483658 +Loss at step 350: 0.04868466034531593 +Loss at step 400: 0.03153557702898979 +Loss at step 450: 0.0450434684753418 +Loss at step 500: 0.04320262745022774 +Loss at step 550: 0.03585299476981163 +Loss at step 600: 0.06846386194229126 +Loss at step 650: 0.047943733632564545 +Loss at step 700: 0.03418514505028725 +Loss at step 750: 0.03347398713231087 +Loss at step 800: 0.05686632916331291 +Loss at step 850: 0.06825472414493561 +Loss at step 900: 0.04557757452130318 +Mean training loss after epoch 63: 0.04486468083052429 + +EPOCH: 64 +Loss at step 0: 0.038345180451869965 +Loss at step 50: 0.04878535494208336 +Loss at step 100: 0.03994043916463852 +Loss at step 150: 0.03193584829568863 +Loss at step 200: 0.04250764101743698 +Loss at step 250: 0.042187292128801346 +Loss at step 300: 0.03934232145547867 +Loss at step 350: 0.0445597879588604 +Loss at step 400: 0.05384596064686775 +Loss at step 450: 0.05301209166646004 +Loss at step 500: 0.05442678555846214 +Loss at step 550: 0.04261470213532448 +Loss at step 600: 0.07980488240718842 +Loss at step 650: 0.04727376252412796 +Loss at step 700: 0.05658571049571037 +Loss at step 750: 0.033086925745010376 +Loss at step 800: 0.03404126316308975 +Loss at step 850: 0.03186289966106415 +Loss at step 900: 0.05820830911397934 +Mean training loss after epoch 64: 0.04441411661973068 + +EPOCH: 65 +Loss at step 0: 0.06885399669408798 +Loss at step 50: 0.05599125474691391 +Loss at step 100: 0.04768454283475876 +Loss at step 150: 0.053254634141922 +Loss at step 200: 0.03931909427046776 +Loss at step 250: 0.036698125302791595 +Loss at step 300: 0.05797281861305237 +Loss at step 350: 0.04019729793071747 +Loss at step 400: 0.04408475384116173 +Loss at step 450: 0.04160203039646149 +Loss at step 500: 0.04312726855278015 +Loss at step 550: 0.03871629014611244 +Loss at step 600: 0.03298605978488922 +Loss at step 650: 0.036178793758153915 +Loss at step 700: 0.05048339441418648 +Loss at step 750: 0.05091601237654686 +Loss at step 800: 0.04056558758020401 +Loss at step 850: 0.03996409475803375 +Loss at step 900: 0.05364915356040001 +Mean training loss after epoch 65: 0.044274648685238636 + +EPOCH: 66 +Loss at step 0: 0.04305730387568474 +Loss at step 50: 0.03969784080982208 +Loss at step 100: 0.035962287336587906 +Loss at step 150: 0.041350722312927246 +Loss at step 200: 0.04764682427048683 +Loss at step 250: 0.052914198487997055 +Loss at step 300: 0.052789609879255295 +Loss at step 350: 0.04125749692320824 +Loss at step 400: 0.05972205847501755 +Loss at step 450: 0.03646932169795036 +Loss at step 500: 0.04054495692253113 +Loss at step 550: 0.057925526052713394 +Loss at step 600: 0.05502486601471901 +Loss at step 650: 0.06423813849687576 +Loss at step 700: 0.041651081293821335 +Loss at step 750: 0.03151623159646988 +Loss at step 800: 0.037293124943971634 +Loss at step 850: 0.05984169617295265 +Loss at step 900: 0.036552462726831436 +Mean training loss after epoch 66: 0.04449663154963555 + +EPOCH: 67 +Loss at step 0: 0.03824847564101219 +Loss at step 50: 0.03565288707613945 +Loss at step 100: 0.039270512759685516 +Loss at step 150: 0.03623337671160698 +Loss at step 200: 0.050690602511167526 +Loss at step 250: 0.03742752596735954 +Loss at step 300: 0.04467052221298218 +Loss at step 350: 0.03912796080112457 +Loss at step 400: 0.04786604270339012 +Loss at step 450: 0.03586800396442413 +Loss at step 500: 0.05089021101593971 +Loss at step 550: 0.045541878789663315 +Loss at step 600: 0.03662842512130737 +Loss at step 650: 0.03804018348455429 +Loss at step 700: 0.06523645669221878 +Loss at step 750: 0.043222736567258835 +Loss at step 800: 0.05228416249155998 +Loss at step 850: 0.044387586414813995 +Loss at step 900: 0.03197185695171356 +Mean training loss after epoch 67: 0.044540597860619965 + +EPOCH: 68 +Loss at step 0: 0.03530889376997948 +Loss at step 50: 0.05193200334906578 +Loss at step 100: 0.04188147187232971 +Loss at step 150: 0.044340841472148895 +Loss at step 200: 0.039617713540792465 +Loss at step 250: 0.029141953215003014 +Loss at step 300: 0.059967041015625 +Loss at step 350: 0.0396573543548584 +Loss at step 400: 0.05593840777873993 +Loss at step 450: 0.0386933758854866 +Loss at step 500: 0.034154921770095825 +Loss at step 550: 0.03156885504722595 +Loss at step 600: 0.036909282207489014 +Loss at step 650: 0.04749900475144386 +Loss at step 700: 0.05062949284911156 +Loss at step 750: 0.04943801835179329 +Loss at step 800: 0.04978271201252937 +Loss at step 850: 0.04305383935570717 +Loss at step 900: 0.04517525061964989 +Mean training loss after epoch 68: 0.04403649984416105 + +EPOCH: 69 +Loss at step 0: 0.04713786393404007 +Loss at step 50: 0.03688153252005577 +Loss at step 100: 0.04463440924882889 +Loss at step 150: 0.05203818157315254 +Loss at step 200: 0.03601740673184395 +Loss at step 250: 0.05964969843626022 +Loss at step 300: 0.04440165311098099 +Loss at step 350: 0.03362405300140381 +Loss at step 400: 0.03362003713846207 +Loss at step 450: 0.05673094093799591 +Loss at step 500: 0.05858657509088516 +Loss at step 550: 0.0479864664375782 +Loss at step 600: 0.04205408692359924 +Loss at step 650: 0.04245060309767723 +Loss at step 700: 0.047696929425001144 +Loss at step 750: 0.054844941943883896 +Loss at step 800: 0.0449918769299984 +Loss at step 850: 0.038500912487506866 +Loss at step 900: 0.036807581782341 +Mean training loss after epoch 69: 0.044288720746935684 + +EPOCH: 70 +Loss at step 0: 0.033517539501190186 +Loss at step 50: 0.03352891281247139 +Loss at step 100: 0.03971768915653229 +Loss at step 150: 0.037568893283605576 +Loss at step 200: 0.066883884370327 +Loss at step 250: 0.04035981744527817 +Loss at step 300: 0.04199424386024475 +Loss at step 350: 0.04756559059023857 +Loss at step 400: 0.03550230711698532 +Loss at step 450: 0.05051594227552414 +Loss at step 500: 0.03901894763112068 +Loss at step 550: 0.04193321615457535 +Loss at step 600: 0.039895620197057724 +Loss at step 650: 0.03211652860045433 +Loss at step 700: 0.040562212467193604 +Loss at step 750: 0.0446825809776783 +Loss at step 800: 0.0380869135260582 +Loss at step 850: 0.04290211945772171 +Loss at step 900: 0.0694216936826706 +Mean training loss after epoch 70: 0.0440076610275995 + +EPOCH: 71 +Loss at step 0: 0.062086571007966995 +Loss at step 50: 0.03848670795559883 +Loss at step 100: 0.04382505640387535 +Loss at step 150: 0.05180523172020912 +Loss at step 200: 0.057804908603429794 +Loss at step 250: 0.0323488749563694 +Loss at step 300: 0.034228235483169556 +Loss at step 350: 0.03648495674133301 +Loss at step 400: 0.06315921247005463 +Loss at step 450: 0.04083339869976044 +Loss at step 500: 0.07379228621721268 +Loss at step 550: 0.03787954896688461 +Loss at step 600: 0.05260041356086731 +Loss at step 650: 0.059406060725450516 +Loss at step 700: 0.037485867738723755 +Loss at step 750: 0.05460409075021744 +Loss at step 800: 0.034081242978572845 +Loss at step 850: 0.04085755720734596 +Loss at step 900: 0.040349770337343216 +Mean training loss after epoch 71: 0.04361870941092401 + +EPOCH: 72 +Loss at step 0: 0.029254566878080368 +Loss at step 50: 0.04164859652519226 +Loss at step 100: 0.055361270904541016 +Loss at step 150: 0.034679099917411804 +Loss at step 200: 0.057330336421728134 +Loss at step 250: 0.03006073087453842 +Loss at step 300: 0.04034830629825592 +Loss at step 350: 0.0592540018260479 +Loss at step 400: 0.050872188061475754 +Loss at step 450: 0.04640054702758789 +Loss at step 500: 0.04122794792056084 +Loss at step 550: 0.03518728166818619 +Loss at step 600: 0.03226521611213684 +Loss at step 650: 0.05286054685711861 +Loss at step 700: 0.056503985077142715 +Loss at step 750: 0.056091148406267166 +Loss at step 800: 0.0377902016043663 +Loss at step 850: 0.03336455300450325 +Loss at step 900: 0.053792499005794525 +Mean training loss after epoch 72: 0.04410509114215242 + +EPOCH: 73 +Loss at step 0: 0.038552019745111465 +Loss at step 50: 0.05634484440088272 +Loss at step 100: 0.04676920548081398 +Loss at step 150: 0.03462210297584534 +Loss at step 200: 0.03854707255959511 +Loss at step 250: 0.03935917094349861 +Loss at step 300: 0.03562043607234955 +Loss at step 350: 0.053162477910518646 +Loss at step 400: 0.037162214517593384 +Loss at step 450: 0.04485498368740082 +Loss at step 500: 0.03977523371577263 +Loss at step 550: 0.04587831348180771 +Loss at step 600: 0.03899795562028885 +Loss at step 650: 0.04515599459409714 +Loss at step 700: 0.04103762283921242 +Loss at step 750: 0.039450667798519135 +Loss at step 800: 0.03686264902353287 +Loss at step 850: 0.04491258040070534 +Loss at step 900: 0.0488051176071167 +Mean training loss after epoch 73: 0.04355169675632644 + +EPOCH: 74 +Loss at step 0: 0.04988710954785347 +Loss at step 50: 0.027748363092541695 +Loss at step 100: 0.04155075177550316 +Loss at step 150: 0.03735537454485893 +Loss at step 200: 0.032628364861011505 +Loss at step 250: 0.03547734394669533 +Loss at step 300: 0.037861354649066925 +Loss at step 350: 0.04635513946413994 +Loss at step 400: 0.048436835408210754 +Loss at step 450: 0.032090190798044205 +Loss at step 500: 0.03548084571957588 +Loss at step 550: 0.03390015661716461 +Loss at step 600: 0.047148313373327255 +Loss at step 650: 0.05798763036727905 +Loss at step 700: 0.04385918006300926 +Loss at step 750: 0.03126253932714462 +Loss at step 800: 0.036953262984752655 +Loss at step 850: 0.04472682997584343 +Loss at step 900: 0.04753943160176277 +Mean training loss after epoch 74: 0.0431889588994258 + +EPOCH: 75 +Loss at step 0: 0.04028462618589401 +Loss at step 50: 0.0357113741338253 +Loss at step 100: 0.036158740520477295 +Loss at step 150: 0.03210321068763733 +Loss at step 200: 0.05170506611466408 +Loss at step 250: 0.03830462694168091 +Loss at step 300: 0.03860298916697502 +Loss at step 350: 0.04723210632801056 +Loss at step 400: 0.03571812063455582 +Loss at step 450: 0.039360903203487396 +Loss at step 500: 0.05320487916469574 +Loss at step 550: 0.04517296701669693 +Loss at step 600: 0.06515920162200928 +Loss at step 650: 0.04200473055243492 +Loss at step 700: 0.04345053806900978 +Loss at step 750: 0.0318668894469738 +Loss at step 800: 0.03969453275203705 +Loss at step 850: 0.03803049027919769 +Loss at step 900: 0.05230528861284256 +Mean training loss after epoch 75: 0.04395143901194527 + +EPOCH: 76 +Loss at step 0: 0.05301972106099129 +Loss at step 50: 0.03598293662071228 +Loss at step 100: 0.034400515258312225 +Loss at step 150: 0.056835055351257324 +Loss at step 200: 0.034850649535655975 +Loss at step 250: 0.05500826612114906 +Loss at step 300: 0.034377194941043854 +Loss at step 350: 0.04097644239664078 +Loss at step 400: 0.040672894567251205 +Loss at step 450: 0.04145702347159386 +Loss at step 500: 0.04885398969054222 +Loss at step 550: 0.048190776258707047 +Loss at step 600: 0.05448161065578461 +Loss at step 650: 0.03598455339670181 +Loss at step 700: 0.039347682148218155 +Loss at step 750: 0.04168093949556351 +Loss at step 800: 0.05553968623280525 +Loss at step 850: 0.041357167065143585 +Loss at step 900: 0.05268019810318947 +Mean training loss after epoch 76: 0.04369923243247497 + +EPOCH: 77 +Loss at step 0: 0.05658970773220062 +Loss at step 50: 0.04428544640541077 +Loss at step 100: 0.043661028146743774 +Loss at step 150: 0.0697946846485138 +Loss at step 200: 0.03846244886517525 +Loss at step 250: 0.03738200664520264 +Loss at step 300: 0.0409412682056427 +Loss at step 350: 0.03415956720709801 +Loss at step 400: 0.04453108832240105 +Loss at step 450: 0.07758844643831253 +Loss at step 500: 0.04702244699001312 +Loss at step 550: 0.048941150307655334 +Loss at step 600: 0.033970486372709274 +Loss at step 650: 0.042242471128702164 +Loss at step 700: 0.035817310214042664 +Loss at step 750: 0.039164334535598755 +Loss at step 800: 0.049258679151535034 +Loss at step 850: 0.03764650970697403 +Loss at step 900: 0.037753425538539886 +Mean training loss after epoch 77: 0.04369013253122822 + +EPOCH: 78 +Loss at step 0: 0.03975120559334755 +Loss at step 50: 0.054156817495822906 +Loss at step 100: 0.037778813391923904 +Loss at step 150: 0.04597756266593933 +Loss at step 200: 0.05560242384672165 +Loss at step 250: 0.038838405162096024 +Loss at step 300: 0.04676831513643265 +Loss at step 350: 0.0348576083779335 +Loss at step 400: 0.0406932570040226 +Loss at step 450: 0.03799765929579735 +Loss at step 500: 0.056081756949424744 +Loss at step 550: 0.036105211824178696 +Loss at step 600: 0.041250597685575485 +Loss at step 650: 0.05187376216053963 +Loss at step 700: 0.039942413568496704 +Loss at step 750: 0.03421250730752945 +Loss at step 800: 0.04465855285525322 +Loss at step 850: 0.04682165011763573 +Loss at step 900: 0.035167813301086426 +Mean training loss after epoch 78: 0.043499736446958745 + +EPOCH: 79 +Loss at step 0: 0.04953160509467125 +Loss at step 50: 0.032434191554784775 +Loss at step 100: 0.060896556824445724 +Loss at step 150: 0.04916340485215187 +Loss at step 200: 0.04294075071811676 +Loss at step 250: 0.03761627897620201 +Loss at step 300: 0.05153829976916313 +Loss at step 350: 0.04079978168010712 +Loss at step 400: 0.03249897435307503 +Loss at step 450: 0.04026184603571892 +Loss at step 500: 0.04575842246413231 +Loss at step 550: 0.04249049723148346 +Loss at step 600: 0.043605584651231766 +Loss at step 650: 0.036947764456272125 +Loss at step 700: 0.04640074074268341 +Loss at step 750: 0.05125853419303894 +Loss at step 800: 0.053849928081035614 +Loss at step 850: 0.05566077306866646 +Loss at step 900: 0.045117028057575226 +Mean training loss after epoch 79: 0.04346864549383553 + +EPOCH: 80 +Loss at step 0: 0.06714697927236557 +Loss at step 50: 0.04404205456376076 +Loss at step 100: 0.04033733159303665 +Loss at step 150: 0.05670079216361046 +Loss at step 200: 0.043069370090961456 +Loss at step 250: 0.06097843125462532 +Loss at step 300: 0.028987368568778038 +Loss at step 350: 0.039095696061849594 +Loss at step 400: 0.05482931435108185 +Loss at step 450: 0.03586386889219284 +Loss at step 500: 0.041629496961832047 +Loss at step 550: 0.03990510106086731 +Loss at step 600: 0.054544877260923386 +Loss at step 650: 0.03798103332519531 +Loss at step 700: 0.04047699645161629 +Loss at step 750: 0.037949416786432266 +Loss at step 800: 0.0359608419239521 +Loss at step 850: 0.054671015590429306 +Loss at step 900: 0.038619618862867355 +Mean training loss after epoch 80: 0.043645817013993574 + +EPOCH: 81 +Loss at step 0: 0.041961781680583954 +Loss at step 50: 0.040087319910526276 +Loss at step 100: 0.05675582215189934 +Loss at step 150: 0.05277867987751961 +Loss at step 200: 0.03047415055334568 +Loss at step 250: 0.03613193705677986 +Loss at step 300: 0.043731123208999634 +Loss at step 350: 0.04356745257973671 +Loss at step 400: 0.03996061906218529 +Loss at step 450: 0.03311565890908241 +Loss at step 500: 0.04055457562208176 +Loss at step 550: 0.05245433375239372 +Loss at step 600: 0.038351498544216156 +Loss at step 650: 0.05595117434859276 +Loss at step 700: 0.03396110609173775 +Loss at step 750: 0.0379931665956974 +Loss at step 800: 0.03337615355849266 +Loss at step 850: 0.0378812812268734 +Loss at step 900: 0.03898458927869797 +Mean training loss after epoch 81: 0.0442062398152692 + +EPOCH: 82 +Loss at step 0: 0.06833680719137192 +Loss at step 50: 0.04500220715999603 +Loss at step 100: 0.03772377222776413 +Loss at step 150: 0.053365763276815414 +Loss at step 200: 0.03693930804729462 +Loss at step 250: 0.04482037574052811 +Loss at step 300: 0.049105748534202576 +Loss at step 350: 0.04576117917895317 +Loss at step 400: 0.03354388102889061 +Loss at step 450: 0.04362288862466812 +Loss at step 500: 0.037595588713884354 +Loss at step 550: 0.06382860988378525 +Loss at step 600: 0.03677208349108696 +Loss at step 650: 0.05604931339621544 +Loss at step 700: 0.05555162578821182 +Loss at step 750: 0.027041040360927582 +Loss at step 800: 0.04180339723825455 +Loss at step 850: 0.07140879333019257 +Loss at step 900: 0.04118446260690689 +Mean training loss after epoch 82: 0.04348311074443463 + +EPOCH: 83 +Loss at step 0: 0.03259948268532753 +Loss at step 50: 0.05375359207391739 +Loss at step 100: 0.03813811019062996 +Loss at step 150: 0.04393210634589195 +Loss at step 200: 0.05306578427553177 +Loss at step 250: 0.05070703849196434 +Loss at step 300: 0.042646415531635284 +Loss at step 350: 0.052681658416986465 +Loss at step 400: 0.039171285927295685 +Loss at step 450: 0.050351619720458984 +Loss at step 500: 0.036338597536087036 +Loss at step 550: 0.04029726982116699 +Loss at step 600: 0.04205435514450073 +Loss at step 650: 0.0543961375951767 +Loss at step 700: 0.027284376323223114 +Loss at step 750: 0.036568958312273026 +Loss at step 800: 0.04350341111421585 +Loss at step 850: 0.05256534367799759 +Loss at step 900: 0.03388596326112747 +Mean training loss after epoch 83: 0.04297601644084779 + +EPOCH: 84 +Loss at step 0: 0.04750841483473778 +Loss at step 50: 0.04352778568863869 +Loss at step 100: 0.03605922311544418 +Loss at step 150: 0.040445245802402496 +Loss at step 200: 0.044017449021339417 +Loss at step 250: 0.036079443991184235 +Loss at step 300: 0.03524768352508545 +Loss at step 350: 0.058157481253147125 +Loss at step 400: 0.03466608375310898 +Loss at step 450: 0.04185834154486656 +Loss at step 500: 0.039541035890579224 +Loss at step 550: 0.03365383669734001 +Loss at step 600: 0.032277606427669525 +Loss at step 650: 0.038126878440380096 +Loss at step 700: 0.06580884009599686 +Loss at step 750: 0.029152007773518562 +Loss at step 800: 0.05536936596035957 +Loss at step 850: 0.03807036951184273 +Loss at step 900: 0.03619582951068878 +Mean training loss after epoch 84: 0.043629243781269866 + +EPOCH: 85 +Loss at step 0: 0.049788717180490494 +Loss at step 50: 0.038022421300411224 +Loss at step 100: 0.032273098826408386 +Loss at step 150: 0.03269397094845772 +Loss at step 200: 0.035025231540203094 +Loss at step 250: 0.05248252674937248 +Loss at step 300: 0.03256775811314583 +Loss at step 350: 0.03763484209775925 +Loss at step 400: 0.04265531152486801 +Loss at step 450: 0.03436531126499176 +Loss at step 500: 0.03978480398654938 +Loss at step 550: 0.03378177434206009 +Loss at step 600: 0.06930387765169144 +Loss at step 650: 0.03307141363620758 +Loss at step 700: 0.053521107882261276 +Loss at step 750: 0.053345534950494766 +Loss at step 800: 0.037429336458444595 +Loss at step 850: 0.043063025921583176 +Loss at step 900: 0.03620496019721031 +Mean training loss after epoch 85: 0.04344467626118075 + +EPOCH: 86 +Loss at step 0: 0.07604561746120453 +Loss at step 50: 0.03270219638943672 +Loss at step 100: 0.048404015600681305 +Loss at step 150: 0.03520134463906288 +Loss at step 200: 0.04617032781243324 +Loss at step 250: 0.04424238204956055 +Loss at step 300: 0.05500755086541176 +Loss at step 350: 0.03861868381500244 +Loss at step 400: 0.041497714817523956 +Loss at step 450: 0.040724895894527435 +Loss at step 500: 0.05506446212530136 +Loss at step 550: 0.050323523581027985 +Loss at step 600: 0.04069007933139801 +Loss at step 650: 0.04165627062320709 +Loss at step 700: 0.03901602327823639 +Loss at step 750: 0.05073634907603264 +Loss at step 800: 0.057049982249736786 +Loss at step 850: 0.041814662516117096 +Loss at step 900: 0.050912611186504364 +Mean training loss after epoch 86: 0.04339223529603372 + +EPOCH: 87 +Loss at step 0: 0.03832190856337547 +Loss at step 50: 0.03612116724252701 +Loss at step 100: 0.046430062502622604 +Loss at step 150: 0.0354364775121212 +Loss at step 200: 0.04589875787496567 +Loss at step 250: 0.046340517699718475 +Loss at step 300: 0.03358977660536766 +Loss at step 350: 0.05608464032411575 +Loss at step 400: 0.03659365326166153 +Loss at step 450: 0.031970780342817307 +Loss at step 500: 0.05655407905578613 +Loss at step 550: 0.035570062696933746 +Loss at step 600: 0.050886280834674835 +Loss at step 650: 0.035846032202243805 +Loss at step 700: 0.033220771700143814 +Loss at step 750: 0.05016414076089859 +Loss at step 800: 0.03249731287360191 +Loss at step 850: 0.051579348742961884 +Loss at step 900: 0.04383271932601929 +Mean training loss after epoch 87: 0.043103055565802656 + +EPOCH: 88 +Loss at step 0: 0.059718307107686996 +Loss at step 50: 0.03696296736598015 +Loss at step 100: 0.05220537632703781 +Loss at step 150: 0.059678904712200165 +Loss at step 200: 0.036867622286081314 +Loss at step 250: 0.03667591139674187 +Loss at step 300: 0.034308452159166336 +Loss at step 350: 0.04058242589235306 +Loss at step 400: 0.03458724543452263 +Loss at step 450: 0.03771441802382469 +Loss at step 500: 0.03839738294482231 +Loss at step 550: 0.04058973491191864 +Loss at step 600: 0.039897527545690536 +Loss at step 650: 0.04819701239466667 +Loss at step 700: 0.039945267140865326 +Loss at step 750: 0.039837781339883804 +Loss at step 800: 0.03418014571070671 +Loss at step 850: 0.0561252199113369 +Loss at step 900: 0.03720290958881378 +Mean training loss after epoch 88: 0.043711327423434906 + +EPOCH: 89 +Loss at step 0: 0.0622403658926487 +Loss at step 50: 0.03501512110233307 +Loss at step 100: 0.03378031775355339 +Loss at step 150: 0.03363748639822006 +Loss at step 200: 0.04308519512414932 +Loss at step 250: 0.04319875314831734 +Loss at step 300: 0.06976722180843353 +Loss at step 350: 0.03761878237128258 +Loss at step 400: 0.03650447353720665 +Loss at step 450: 0.03223029896616936 +Loss at step 500: 0.0345304012298584 +Loss at step 550: 0.04749958589673042 +Loss at step 600: 0.04270829260349274 +Loss at step 650: 0.05911377817392349 +Loss at step 700: 0.03142249584197998 +Loss at step 750: 0.04553729668259621 +Loss at step 800: 0.039227329194545746 +Loss at step 850: 0.0500837042927742 +Loss at step 900: 0.038529522716999054 +Mean training loss after epoch 89: 0.043296830720707044 + +EPOCH: 90 +Loss at step 0: 0.03306594118475914 +Loss at step 50: 0.04650263860821724 +Loss at step 100: 0.04312523081898689 +Loss at step 150: 0.034545619040727615 +Loss at step 200: 0.04598233103752136 +Loss at step 250: 0.03660457953810692 +Loss at step 300: 0.04116194322705269 +Loss at step 350: 0.05145394057035446 +Loss at step 400: 0.05626528337597847 +Loss at step 450: 0.042840324342250824 +Loss at step 500: 0.03150201216340065 +Loss at step 550: 0.03885718807578087 +Loss at step 600: 0.04799865931272507 +Loss at step 650: 0.03733006864786148 +Loss at step 700: 0.03675772249698639 +Loss at step 750: 0.0403936468064785 +Loss at step 800: 0.060140881687402725 +Loss at step 850: 0.056603480130434036 +Loss at step 900: 0.03747309371829033 +Mean training loss after epoch 90: 0.04427998538202505 + +EPOCH: 91 +Loss at step 0: 0.04910198971629143 +Loss at step 50: 0.04037807509303093 +Loss at step 100: 0.03771810233592987 +Loss at step 150: 0.034219253808259964 +Loss at step 200: 0.04347480833530426 +Loss at step 250: 0.047169867902994156 +Loss at step 300: 0.041771892458200455 +Loss at step 350: 0.038890402764081955 +Loss at step 400: 0.03771951049566269 +Loss at step 450: 0.027332905679941177 +Loss at step 500: 0.0472487211227417 +Loss at step 550: 0.03344062715768814 +Loss at step 600: 0.039007991552352905 +Loss at step 650: 0.03601592034101486 +Loss at step 700: 0.03445816412568092 +Loss at step 750: 0.040622398257255554 +Loss at step 800: 0.03840038552880287 +Loss at step 850: 0.05284346267580986 +Loss at step 900: 0.046714216470718384 +Mean training loss after epoch 91: 0.04356519312166901 + +EPOCH: 92 +Loss at step 0: 0.07394585013389587 +Loss at step 50: 0.03972001373767853 +Loss at step 100: 0.03388422727584839 +Loss at step 150: 0.043385058641433716 +Loss at step 200: 0.0548548698425293 +Loss at step 250: 0.03832437843084335 +Loss at step 300: 0.05707750469446182 +Loss at step 350: 0.041515324264764786 +Loss at step 400: 0.031445201486349106 +Loss at step 450: 0.06388327479362488 +Loss at step 500: 0.037730999290943146 +Loss at step 550: 0.05023184418678284 +Loss at step 600: 0.0382477305829525 +Loss at step 650: 0.043892908841371536 +Loss at step 700: 0.03901918977499008 +Loss at step 750: 0.04354777932167053 +Loss at step 800: 0.040302760899066925 +Loss at step 850: 0.04244803264737129 +Loss at step 900: 0.05645808205008507 +Mean training loss after epoch 92: 0.042986952499158855 + +EPOCH: 93 +Loss at step 0: 0.03539813309907913 +Loss at step 50: 0.048581965267658234 +Loss at step 100: 0.04163219407200813 +Loss at step 150: 0.03968052938580513 +Loss at step 200: 0.03193528577685356 +Loss at step 250: 0.0411832369863987 +Loss at step 300: 0.03856983408331871 +Loss at step 350: 0.038298722356557846 +Loss at step 400: 0.04309234768152237 +Loss at step 450: 0.040360040962696075 +Loss at step 500: 0.04634615406394005 +Loss at step 550: 0.06071379780769348 +Loss at step 600: 0.04664158821105957 +Loss at step 650: 0.03374982625246048 +Loss at step 700: 0.052322570234537125 +Loss at step 750: 0.04002566263079643 +Loss at step 800: 0.03766759857535362 +Loss at step 850: 0.05379931628704071 +Loss at step 900: 0.037691425532102585 +Mean training loss after epoch 93: 0.043927824297455204 + +EPOCH: 94 +Loss at step 0: 0.06536293029785156 +Loss at step 50: 0.030876314267516136 +Loss at step 100: 0.03512885048985481 +Loss at step 150: 0.05157601460814476 +Loss at step 200: 0.07189564406871796 +Loss at step 250: 0.038722675293684006 +Loss at step 300: 0.03872859477996826 +Loss at step 350: 0.03599252551794052 +Loss at step 400: 0.042109329253435135 +Loss at step 450: 0.04384028911590576 +Loss at step 500: 0.036185961216688156 +Loss at step 550: 0.062151797115802765 +Loss at step 600: 0.03850793465971947 +Loss at step 650: 0.034010306000709534 +Loss at step 700: 0.04265720024704933 +Loss at step 750: 0.038343705236911774 +Loss at step 800: 0.0559229701757431 +Loss at step 850: 0.037223100662231445 +Loss at step 900: 0.03685881197452545 +Mean training loss after epoch 94: 0.04346638769983673 + +EPOCH: 95 +Loss at step 0: 0.07410895079374313 +Loss at step 50: 0.030644426122307777 +Loss at step 100: 0.03632679581642151 +Loss at step 150: 0.04430529475212097 +Loss at step 200: 0.03993538022041321 +Loss at step 250: 0.033615030348300934 +Loss at step 300: 0.03780102729797363 +Loss at step 350: 0.03041430376470089 +Loss at step 400: 0.04900200292468071 +Loss at step 450: 0.04082078859210014 +Loss at step 500: 0.04287412762641907 +Loss at step 550: 0.04590524733066559 +Loss at step 600: 0.03649120777845383 +Loss at step 650: 0.032129764556884766 +Loss at step 700: 0.07543421536684036 +Loss at step 750: 0.037203382700681686 +Loss at step 800: 0.048887595534324646 +Loss at step 850: 0.036267053335905075 +Loss at step 900: 0.036513570696115494 +Mean training loss after epoch 95: 0.04287138573928619 + +EPOCH: 96 +Loss at step 0: 0.03439642861485481 +Loss at step 50: 0.032979678362607956 +Loss at step 100: 0.03874444216489792 +Loss at step 150: 0.03373781591653824 +Loss at step 200: 0.03582676500082016 +Loss at step 250: 0.03851651772856712 +Loss at step 300: 0.03581823408603668 +Loss at step 350: 0.036770474165678024 +Loss at step 400: 0.03284694626927376 +Loss at step 450: 0.04079399257898331 +Loss at step 500: 0.04206319525837898 +Loss at step 550: 0.03856229409575462 +Loss at step 600: 0.04491313174366951 +Loss at step 650: 0.03701549395918846 +Loss at step 700: 0.053752969950437546 +Loss at step 750: 0.05399594083428383 +Loss at step 800: 0.0453498549759388 +Loss at step 850: 0.038302745670080185 +Loss at step 900: 0.04211289435625076 +Mean training loss after epoch 96: 0.0431959849816046 + +EPOCH: 97 +Loss at step 0: 0.03641681745648384 +Loss at step 50: 0.03312516584992409 +Loss at step 100: 0.05817564204335213 +Loss at step 150: 0.03705442696809769 +Loss at step 200: 0.0319865457713604 +Loss at step 250: 0.040375035256147385 +Loss at step 300: 0.059476666152477264 +Loss at step 350: 0.048445623368024826 +Loss at step 400: 0.03463175892829895 +Loss at step 450: 0.037774257361888885 +Loss at step 500: 0.0523386187851429 +Loss at step 550: 0.0657649040222168 +Loss at step 600: 0.04200076311826706 +Loss at step 650: 0.0315200611948967 +Loss at step 700: 0.04242639243602753 +Loss at step 750: 0.03878338262438774 +Loss at step 800: 0.05410474166274071 +Loss at step 850: 0.03390758857131004 +Loss at step 900: 0.051311835646629333 +Mean training loss after epoch 97: 0.04318891692040826 + +EPOCH: 98 +Loss at step 0: 0.046574775129556656 +Loss at step 50: 0.05641099810600281 +Loss at step 100: 0.03675806149840355 +Loss at step 150: 0.04064656049013138 +Loss at step 200: 0.041404858231544495 +Loss at step 250: 0.046953871846199036 +Loss at step 300: 0.03851976990699768 +Loss at step 350: 0.058378178626298904 +Loss at step 400: 0.04649720713496208 +Loss at step 450: 0.03760731220245361 +Loss at step 500: 0.03642148897051811 +Loss at step 550: 0.050206299871206284 +Loss at step 600: 0.04067458212375641 +Loss at step 650: 0.04598100110888481 +Loss at step 700: 0.06879585981369019 +Loss at step 750: 0.039883676916360855 +Loss at step 800: 0.05262867361307144 +Loss at step 850: 0.03709295392036438 +Loss at step 900: 0.03144507482647896 +Mean training loss after epoch 98: 0.043273623160986124 + +EPOCH: 99 +Loss at step 0: 0.05169651284813881 +Loss at step 50: 0.04774666577577591 +Loss at step 100: 0.04283786192536354 +Loss at step 150: 0.05346093699336052 +Loss at step 200: 0.06267943233251572 +Loss at step 250: 0.029984652996063232 +Loss at step 300: 0.04070792719721794 +Loss at step 350: 0.042360469698905945 +Loss at step 400: 0.04965902864933014 +Loss at step 450: 0.034532371908426285 +Loss at step 500: 0.031091198325157166 +Loss at step 550: 0.0353475958108902 +Loss at step 600: 0.03562759980559349 +Loss at step 650: 0.05511445179581642 +Loss at step 700: 0.04308035969734192 +Loss at step 750: 0.039943136274814606 +Loss at step 800: 0.03640330582857132 +Loss at step 850: 0.052483201026916504 +Loss at step 900: 0.038298215717077255 +Mean training loss after epoch 99: 0.04306189255761122 + +EPOCH: 100 +Loss at step 0: 0.03868278115987778 +Loss at step 50: 0.033273063600063324 +Loss at step 100: 0.06904282420873642 +Loss at step 150: 0.04511026293039322 +Loss at step 200: 0.03361647576093674 +Loss at step 250: 0.04274805635213852 +Loss at step 300: 0.04436466470360756 +Loss at step 350: 0.06252449005842209 +Loss at step 400: 0.04299504682421684 +Loss at step 450: 0.0542532280087471 +Loss at step 500: 0.0670432448387146 +Loss at step 550: 0.0696505531668663 +Loss at step 600: 0.059786610305309296 +Loss at step 650: 0.041346460580825806 +Loss at step 700: 0.05354555323719978 +Loss at step 750: 0.048287492245435715 +Loss at step 800: 0.03784614056348801 +Loss at step 850: 0.03387393057346344 +Loss at step 900: 0.03863274306058884 +Mean training loss after epoch 100: 0.04288483024842894 + +EPOCH: 101 +Loss at step 0: 0.0320252887904644 +Loss at step 50: 0.03704806789755821 +Loss at step 100: 0.04036383330821991 +Loss at step 150: 0.04344441741704941 +Loss at step 200: 0.036835040897130966 +Loss at step 250: 0.03519628942012787 +Loss at step 300: 0.038933347910642624 +Loss at step 350: 0.040308840572834015 +Loss at step 400: 0.06893625110387802 +Loss at step 450: 0.03442879393696785 +Loss at step 500: 0.06981664896011353 +Loss at step 550: 0.05139239877462387 +Loss at step 600: 0.045361023396253586 +Loss at step 650: 0.03686761483550072 +Loss at step 700: 0.03900550678372383 +Loss at step 750: 0.033268269151449203 +Loss at step 800: 0.036813121289014816 +Loss at step 850: 0.03814901039004326 +Loss at step 900: 0.05475571006536484 +Mean training loss after epoch 101: 0.04321747353828665 + +EPOCH: 102 +Loss at step 0: 0.05565916746854782 +Loss at step 50: 0.0469290092587471 +Loss at step 100: 0.03592206537723541 +Loss at step 150: 0.03643639758229256 +Loss at step 200: 0.06259587407112122 +Loss at step 250: 0.0463993102312088 +Loss at step 300: 0.03069981187582016 +Loss at step 350: 0.06601865589618683 +Loss at step 400: 0.03276149928569794 +Loss at step 450: 0.02996491827070713 +Loss at step 500: 0.0397891029715538 +Loss at step 550: 0.04958747699856758 +Loss at step 600: 0.05864010751247406 +Loss at step 650: 0.04672401770949364 +Loss at step 700: 0.04763220250606537 +Loss at step 750: 0.03473920747637749 +Loss at step 800: 0.037696756422519684 +Loss at step 850: 0.0555654838681221 +Loss at step 900: 0.05053749680519104 +Mean training loss after epoch 102: 0.043392531085274876 + +EPOCH: 103 +Loss at step 0: 0.03874469920992851 +Loss at step 50: 0.05485078692436218 +Loss at step 100: 0.03334563225507736 +Loss at step 150: 0.03602603077888489 +Loss at step 200: 0.05366092175245285 +Loss at step 250: 0.05623263493180275 +Loss at step 300: 0.04819313436746597 +Loss at step 350: 0.05220837891101837 +Loss at step 400: 0.03978671878576279 +Loss at step 450: 0.05293075740337372 +Loss at step 500: 0.03283224254846573 +Loss at step 550: 0.03934606537222862 +Loss at step 600: 0.039841100573539734 +Loss at step 650: 0.038126807659864426 +Loss at step 700: 0.04651801660656929 +Loss at step 750: 0.03205832839012146 +Loss at step 800: 0.03260406479239464 +Loss at step 850: 0.04802095517516136 +Loss at step 900: 0.07081270962953568 +Mean training loss after epoch 103: 0.04249289364758522 + +EPOCH: 104 +Loss at step 0: 0.03115161508321762 +Loss at step 50: 0.03379862755537033 +Loss at step 100: 0.041275929659605026 +Loss at step 150: 0.039807382971048355 +Loss at step 200: 0.04076811298727989 +Loss at step 250: 0.03608894720673561 +Loss at step 300: 0.03752580285072327 +Loss at step 350: 0.046833109110593796 +Loss at step 400: 0.051159657537937164 +Loss at step 450: 0.03741542249917984 +Loss at step 500: 0.038290105760097504 +Loss at step 550: 0.041247326880693436 +Loss at step 600: 0.04058607295155525 +Loss at step 650: 0.03616589680314064 +Loss at step 700: 0.04535121098160744 +Loss at step 750: 0.03494952619075775 +Loss at step 800: 0.03949456289410591 +Loss at step 850: 0.040163662284612656 +Loss at step 900: 0.03252166882157326 +Mean training loss after epoch 104: 0.043015945414855664 + +EPOCH: 105 +Loss at step 0: 0.036242932081222534 +Loss at step 50: 0.03272474184632301 +Loss at step 100: 0.04492475464940071 +Loss at step 150: 0.045704569667577744 +Loss at step 200: 0.036552704870700836 +Loss at step 250: 0.041963860392570496 +Loss at step 300: 0.03921288624405861 +Loss at step 350: 0.04874899238348007 +Loss at step 400: 0.04045712947845459 +Loss at step 450: 0.030772091820836067 +Loss at step 500: 0.05106096714735031 +Loss at step 550: 0.027127612382173538 +Loss at step 600: 0.05295705795288086 +Loss at step 650: 0.04706268757581711 +Loss at step 700: 0.03845628723502159 +Loss at step 750: 0.03371085971593857 +Loss at step 800: 0.04142371937632561 +Loss at step 850: 0.052407145500183105 +Loss at step 900: 0.037026021629571915 +Mean training loss after epoch 105: 0.04317484780558264 + +EPOCH: 106 +Loss at step 0: 0.029048895463347435 +Loss at step 50: 0.04750058054924011 +Loss at step 100: 0.03844717517495155 +Loss at step 150: 0.032337259501218796 +Loss at step 200: 0.03150492161512375 +Loss at step 250: 0.05508747696876526 +Loss at step 300: 0.04623647406697273 +Loss at step 350: 0.03420146927237511 +Loss at step 400: 0.054901327937841415 +Loss at step 450: 0.044081784784793854 +Loss at step 500: 0.05747245252132416 +Loss at step 550: 0.04383234679698944 +Loss at step 600: 0.03540794178843498 +Loss at step 650: 0.049649763852357864 +Loss at step 700: 0.03573313355445862 +Loss at step 750: 0.044091228395700455 +Loss at step 800: 0.03443525359034538 +Loss at step 850: 0.04229153320193291 +Loss at step 900: 0.058335255831480026 +Mean training loss after epoch 106: 0.04258429711219916 + +EPOCH: 107 +Loss at step 0: 0.0381803885102272 +Loss at step 50: 0.03685297816991806 +Loss at step 100: 0.034790556877851486 +Loss at step 150: 0.05032115802168846 +Loss at step 200: 0.033282600343227386 +Loss at step 250: 0.04043008014559746 +Loss at step 300: 0.03712046146392822 +Loss at step 350: 0.047968342900276184 +Loss at step 400: 0.043439656496047974 +Loss at step 450: 0.043730784207582474 +Loss at step 500: 0.03656981140375137 +Loss at step 550: 0.042178064584732056 +Loss at step 600: 0.04220736026763916 +Loss at step 650: 0.03219039365649223 +Loss at step 700: 0.03509557992219925 +Loss at step 750: 0.04011481627821922 +Loss at step 800: 0.029533641412854195 +Loss at step 850: 0.0380370207130909 +Loss at step 900: 0.03835464268922806 +Mean training loss after epoch 107: 0.042455973341933954 + +EPOCH: 108 +Loss at step 0: 0.053215205669403076 +Loss at step 50: 0.03992394357919693 +Loss at step 100: 0.03500846028327942 +Loss at step 150: 0.05199325084686279 +Loss at step 200: 0.03509242460131645 +Loss at step 250: 0.033662501722574234 +Loss at step 300: 0.0520416721701622 +Loss at step 350: 0.03401225432753563 +Loss at step 400: 0.04268940910696983 +Loss at step 450: 0.036937568336725235 +Loss at step 500: 0.053192269057035446 +Loss at step 550: 0.029121991246938705 +Loss at step 600: 0.035446252673864365 +Loss at step 650: 0.05657226964831352 +Loss at step 700: 0.040138814598321915 +Loss at step 750: 0.042686160653829575 +Loss at step 800: 0.03484735265374184 +Loss at step 850: 0.03614305332303047 +Loss at step 900: 0.04524796083569527 +Mean training loss after epoch 108: 0.04264132607021311 + +EPOCH: 109 +Loss at step 0: 0.03732350468635559 +Loss at step 50: 0.03490496426820755 +Loss at step 100: 0.039503492414951324 +Loss at step 150: 0.03732747957110405 +Loss at step 200: 0.036649059504270554 +Loss at step 250: 0.04840293154120445 +Loss at step 300: 0.04136611893773079 +Loss at step 350: 0.03097102977335453 +Loss at step 400: 0.03997035697102547 +Loss at step 450: 0.040247365832328796 +Loss at step 500: 0.033872444182634354 +Loss at step 550: 0.043919648975133896 +Loss at step 600: 0.03842050954699516 +Loss at step 650: 0.03633081167936325 +Loss at step 700: 0.03185936436057091 +Loss at step 750: 0.031556472182273865 +Loss at step 800: 0.04246347025036812 +Loss at step 850: 0.03699137642979622 +Loss at step 900: 0.038073956966400146 +Mean training loss after epoch 109: 0.042871232231908134 + +EPOCH: 110 +Loss at step 0: 0.044787343591451645 +Loss at step 50: 0.04396039992570877 +Loss at step 100: 0.03304486349225044 +Loss at step 150: 0.0329318642616272 +Loss at step 200: 0.03712211921811104 +Loss at step 250: 0.03624889254570007 +Loss at step 300: 0.039950963109731674 +Loss at step 350: 0.05183451250195503 +Loss at step 400: 0.033772554248571396 +Loss at step 450: 0.03334091603755951 +Loss at step 500: 0.04075763002038002 +Loss at step 550: 0.06279843300580978 +Loss at step 600: 0.05321997031569481 +Loss at step 650: 0.037117939442396164 +Loss at step 700: 0.03801078721880913 +Loss at step 750: 0.032297197729349136 +Loss at step 800: 0.049598902463912964 +Loss at step 850: 0.06499156355857849 +Loss at step 900: 0.034701526165008545 +Mean training loss after epoch 110: 0.04253625459095308 + +EPOCH: 111 +Loss at step 0: 0.04842279478907585 +Loss at step 50: 0.030813124030828476 +Loss at step 100: 0.03582942485809326 +Loss at step 150: 0.04401121661067009 +Loss at step 200: 0.03099776618182659 +Loss at step 250: 0.039740726351737976 +Loss at step 300: 0.049139585345983505 +Loss at step 350: 0.032573338598012924 +Loss at step 400: 0.0320868156850338 +Loss at step 450: 0.03244072571396828 +Loss at step 500: 0.036660704761743546 +Loss at step 550: 0.05817306786775589 +Loss at step 600: 0.046441372483968735 +Loss at step 650: 0.04924897104501724 +Loss at step 700: 0.032720550894737244 +Loss at step 750: 0.03674580901861191 +Loss at step 800: 0.0348411463201046 +Loss at step 850: 0.0344211645424366 +Loss at step 900: 0.03544338792562485 +Mean training loss after epoch 111: 0.04321895086808182 + +EPOCH: 112 +Loss at step 0: 0.02806468866765499 +Loss at step 50: 0.05090704187750816 +Loss at step 100: 0.04020220413804054 +Loss at step 150: 0.04250871017575264 +Loss at step 200: 0.041078586131334305 +Loss at step 250: 0.049902867525815964 +Loss at step 300: 0.03141561150550842 +Loss at step 350: 0.04201183468103409 +Loss at step 400: 0.055819183588027954 +Loss at step 450: 0.03885535150766373 +Loss at step 500: 0.06769788265228271 +Loss at step 550: 0.039124418050050735 +Loss at step 600: 0.0550897940993309 +Loss at step 650: 0.05477220192551613 +Loss at step 700: 0.03775335103273392 +Loss at step 750: 0.03932926058769226 +Loss at step 800: 0.03385027498006821 +Loss at step 850: 0.03438795357942581 +Loss at step 900: 0.04016561433672905 +Mean training loss after epoch 112: 0.042442821055603056 + +EPOCH: 113 +Loss at step 0: 0.03439236432313919 +Loss at step 50: 0.04079783707857132 +Loss at step 100: 0.04035534709692001 +Loss at step 150: 0.02992972545325756 +Loss at step 200: 0.03763053938746452 +Loss at step 250: 0.03948152810335159 +Loss at step 300: 0.05204397067427635 +Loss at step 350: 0.03488073870539665 +Loss at step 400: 0.039366453886032104 +Loss at step 450: 0.07258787006139755 +Loss at step 500: 0.03774897754192352 +Loss at step 550: 0.03692486137151718 +Loss at step 600: 0.03373141214251518 +Loss at step 650: 0.038297783583402634 +Loss at step 700: 0.03423190861940384 +Loss at step 750: 0.036938928067684174 +Loss at step 800: 0.02934008091688156 +Loss at step 850: 0.03978004679083824 +Loss at step 900: 0.05455329269170761 +Mean training loss after epoch 113: 0.04289359403357132 + +EPOCH: 114 +Loss at step 0: 0.04929478093981743 +Loss at step 50: 0.036822691559791565 +Loss at step 100: 0.033721890300512314 +Loss at step 150: 0.034397274255752563 +Loss at step 200: 0.044635675847530365 +Loss at step 250: 0.04207928106188774 +Loss at step 300: 0.039686571806669235 +Loss at step 350: 0.05589856952428818 +Loss at step 400: 0.03962375968694687 +Loss at step 450: 0.02821052446961403 +Loss at step 500: 0.059994060546159744 +Loss at step 550: 0.035869430750608444 +Loss at step 600: 0.035884320735931396 +Loss at step 650: 0.03565568849444389 +Loss at step 700: 0.03159397095441818 +Loss at step 750: 0.038982052356004715 +Loss at step 800: 0.03397154062986374 +Loss at step 850: 0.03521180897951126 +Loss at step 900: 0.036220405250787735 +Mean training loss after epoch 114: 0.042887798532335236 + +EPOCH: 115 +Loss at step 0: 0.03161928057670593 +Loss at step 50: 0.03745435178279877 +Loss at step 100: 0.03620803356170654 +Loss at step 150: 0.061530373990535736 +Loss at step 200: 0.03712744638323784 +Loss at step 250: 0.053335703909397125 +Loss at step 300: 0.03177780285477638 +Loss at step 350: 0.04157339781522751 +Loss at step 400: 0.04165922477841377 +Loss at step 450: 0.035079650580883026 +Loss at step 500: 0.048865292221307755 +Loss at step 550: 0.03243790939450264 +Loss at step 600: 0.04753963276743889 +Loss at step 650: 0.03399026021361351 +Loss at step 700: 0.04165761172771454 +Loss at step 750: 0.03889331966638565 +Loss at step 800: 0.0683445930480957 +Loss at step 850: 0.029029441997408867 +Loss at step 900: 0.0362982414662838 +Mean training loss after epoch 115: 0.04242513539480058 + +EPOCH: 116 +Loss at step 0: 0.04920484870672226 +Loss at step 50: 0.05231902003288269 +Loss at step 100: 0.05306987091898918 +Loss at step 150: 0.05240674689412117 +Loss at step 200: 0.030451014637947083 +Loss at step 250: 0.03890807926654816 +Loss at step 300: 0.05659289285540581 +Loss at step 350: 0.036650046706199646 +Loss at step 400: 0.0346701517701149 +Loss at step 450: 0.03688763082027435 +Loss at step 500: 0.036767538636922836 +Loss at step 550: 0.032335709780454636 +Loss at step 600: 0.05310001224279404 +Loss at step 650: 0.05066174641251564 +Loss at step 700: 0.05107351019978523 +Loss at step 750: 0.03427013009786606 +Loss at step 800: 0.04448677599430084 +Loss at step 850: 0.034152865409851074 +Loss at step 900: 0.03578594699501991 +Mean training loss after epoch 116: 0.04271252417583456 + +EPOCH: 117 +Loss at step 0: 0.041914403438568115 +Loss at step 50: 0.03944316506385803 +Loss at step 100: 0.036425188183784485 +Loss at step 150: 0.052166588604450226 +Loss at step 200: 0.03263081610202789 +Loss at step 250: 0.05043771117925644 +Loss at step 300: 0.042433347553014755 +Loss at step 350: 0.04409367963671684 +Loss at step 400: 0.05648966133594513 +Loss at step 450: 0.03183215856552124 +Loss at step 500: 0.0522194467484951 +Loss at step 550: 0.03536583483219147 +Loss at step 600: 0.037089429795742035 +Loss at step 650: 0.03506674990057945 +Loss at step 700: 0.07340752333402634 +Loss at step 750: 0.03434182330965996 +Loss at step 800: 0.05077645555138588 +Loss at step 850: 0.03686201199889183 +Loss at step 900: 0.04532172158360481 +Mean training loss after epoch 117: 0.04204082982872785 + +EPOCH: 118 +Loss at step 0: 0.0334637425839901 +Loss at step 50: 0.04286329075694084 +Loss at step 100: 0.039089079946279526 +Loss at step 150: 0.034637052565813065 +Loss at step 200: 0.03227698802947998 +Loss at step 250: 0.042662475258111954 +Loss at step 300: 0.04539114609360695 +Loss at step 350: 0.046212777495384216 +Loss at step 400: 0.04341699928045273 +Loss at step 450: 0.03234335035085678 +Loss at step 500: 0.03549254313111305 +Loss at step 550: 0.035518109798431396 +Loss at step 600: 0.05670395493507385 +Loss at step 650: 0.05329463258385658 +Loss at step 700: 0.05610251426696777 +Loss at step 750: 0.03034931793808937 +Loss at step 800: 0.05315934866666794 +Loss at step 850: 0.03849770873785019 +Loss at step 900: 0.043866049498319626 +Mean training loss after epoch 118: 0.04233125694540899 + +EPOCH: 119 +Loss at step 0: 0.041059985756874084 +Loss at step 50: 0.03781072795391083 +Loss at step 100: 0.058324579149484634 +Loss at step 150: 0.05159909278154373 +Loss at step 200: 0.03973580151796341 +Loss at step 250: 0.034565798938274384 +Loss at step 300: 0.03539511188864708 +Loss at step 350: 0.04175569489598274 +Loss at step 400: 0.033448122441768646 +Loss at step 450: 0.04465514048933983 +Loss at step 500: 0.035944875329732895 +Loss at step 550: 0.03353596478700638 +Loss at step 600: 0.03798174858093262 +Loss at step 650: 0.03126048669219017 +Loss at step 700: 0.035585448145866394 +Loss at step 750: 0.04394625499844551 +Loss at step 800: 0.03175988048315048 +Loss at step 850: 0.048157915472984314 +Loss at step 900: 0.05564304068684578 +Mean training loss after epoch 119: 0.04225547817438396 + +EPOCH: 120 +Loss at step 0: 0.03654998168349266 +Loss at step 50: 0.04451896995306015 +Loss at step 100: 0.03440498933196068 +Loss at step 150: 0.045890774577856064 +Loss at step 200: 0.06556855142116547 +Loss at step 250: 0.04978479444980621 +Loss at step 300: 0.05234746262431145 +Loss at step 350: 0.048763785511255264 +Loss at step 400: 0.03686203435063362 +Loss at step 450: 0.0392894446849823 +Loss at step 500: 0.02784913033246994 +Loss at step 550: 0.060626059770584106 +Loss at step 600: 0.028441544622182846 +Loss at step 650: 0.03267563879489899 +Loss at step 700: 0.07281859219074249 +Loss at step 750: 0.03018386848270893 +Loss at step 800: 0.05207233875989914 +Loss at step 850: 0.03853049501776695 +Loss at step 900: 0.03876703232526779 +Mean training loss after epoch 120: 0.04244164083558105 + +EPOCH: 121 +Loss at step 0: 0.032931819558143616 +Loss at step 50: 0.03577417507767677 +Loss at step 100: 0.05528498440980911 +Loss at step 150: 0.043831828981637955 +Loss at step 200: 0.040254004299640656 +Loss at step 250: 0.03872199356555939 +Loss at step 300: 0.05530036985874176 +Loss at step 350: 0.047019802033901215 +Loss at step 400: 0.05679032951593399 +Loss at step 450: 0.051629483699798584 +Loss at step 500: 0.04071091488003731 +Loss at step 550: 0.032426148653030396 +Loss at step 600: 0.03054644539952278 +Loss at step 650: 0.035769470036029816 +Loss at step 700: 0.04064173623919487 +Loss at step 750: 0.03792702406644821 +Loss at step 800: 0.0382523275911808 +Loss at step 850: 0.04808889329433441 +Loss at step 900: 0.03021102026104927 +Mean training loss after epoch 121: 0.042562772758177984 + +EPOCH: 122 +Loss at step 0: 0.03836292773485184 +Loss at step 50: 0.037795569747686386 +Loss at step 100: 0.037729810923337936 +Loss at step 150: 0.05760970711708069 +Loss at step 200: 0.03326259180903435 +Loss at step 250: 0.03908127546310425 +Loss at step 300: 0.05211610719561577 +Loss at step 350: 0.03992561250925064 +Loss at step 400: 0.03279055282473564 +Loss at step 450: 0.054718513041734695 +Loss at step 500: 0.028107738122344017 +Loss at step 550: 0.037835340946912766 +Loss at step 600: 0.0400877371430397 +Loss at step 650: 0.040323756635189056 +Loss at step 700: 0.050479333847761154 +Loss at step 750: 0.03604499623179436 +Loss at step 800: 0.061530329287052155 +Loss at step 850: 0.0346403643488884 +Loss at step 900: 0.042586684226989746 +Mean training loss after epoch 122: 0.042738208541277245 + +EPOCH: 123 +Loss at step 0: 0.037863537669181824 +Loss at step 50: 0.04166312888264656 +Loss at step 100: 0.0541631244122982 +Loss at step 150: 0.03443480655550957 +Loss at step 200: 0.03567218780517578 +Loss at step 250: 0.07678977400064468 +Loss at step 300: 0.035183656960725784 +Loss at step 350: 0.04932482913136482 +Loss at step 400: 0.03603409230709076 +Loss at step 450: 0.045590754598379135 +Loss at step 500: 0.050587721168994904 +Loss at step 550: 0.03978006914258003 +Loss at step 600: 0.03327057883143425 +Loss at step 650: 0.032649945467710495 +Loss at step 700: 0.040164392441511154 +Loss at step 750: 0.054399918764829636 +Loss at step 800: 0.03373962640762329 +Loss at step 850: 0.037361081689596176 +Loss at step 900: 0.040843669325113297 +Mean training loss after epoch 123: 0.04323216850188241 + +EPOCH: 124 +Loss at step 0: 0.054330259561538696 +Loss at step 50: 0.034485798329114914 +Loss at step 100: 0.03967555612325668 +Loss at step 150: 0.033644016832113266 +Loss at step 200: 0.08355532586574554 +Loss at step 250: 0.040038276463747025 +Loss at step 300: 0.059040505439043045 +Loss at step 350: 0.036361414939165115 +Loss at step 400: 0.032022830098867416 +Loss at step 450: 0.06843051314353943 +Loss at step 500: 0.03649810701608658 +Loss at step 550: 0.03632408380508423 +Loss at step 600: 0.04380091652274132 +Loss at step 650: 0.049652453511953354 +Loss at step 700: 0.031190911307930946 +Loss at step 750: 0.03930390998721123 +Loss at step 800: 0.03655650466680527 +Loss at step 850: 0.0601528100669384 +Loss at step 900: 0.07293304800987244 +Mean training loss after epoch 124: 0.043018826279542975 + +EPOCH: 125 +Loss at step 0: 0.03883155435323715 +Loss at step 50: 0.06450485438108444 +Loss at step 100: 0.046584781259298325 +Loss at step 150: 0.050592802464962006 +Loss at step 200: 0.033204685896635056 +Loss at step 250: 0.03923625871539116 +Loss at step 300: 0.03889136016368866 +Loss at step 350: 0.03396601602435112 +Loss at step 400: 0.036183927208185196 +Loss at step 450: 0.03738848865032196 +Loss at step 500: 0.050185903906822205 +Loss at step 550: 0.05953901633620262 +Loss at step 600: 0.05247611179947853 +Loss at step 650: 0.04176788777112961 +Loss at step 700: 0.03954014182090759 +Loss at step 750: 0.051179856061935425 +Loss at step 800: 0.029204268008470535 +Loss at step 850: 0.03406751900911331 +Loss at step 900: 0.04037805274128914 +Mean training loss after epoch 125: 0.042219243697456714 + +EPOCH: 126 +Loss at step 0: 0.05898353084921837 +Loss at step 50: 0.040512777864933014 +Loss at step 100: 0.0387885719537735 +Loss at step 150: 0.05783606320619583 +Loss at step 200: 0.0370011106133461 +Loss at step 250: 0.034667015075683594 +Loss at step 300: 0.07945484668016434 +Loss at step 350: 0.05463087558746338 +Loss at step 400: 0.05935561656951904 +Loss at step 450: 0.03419971093535423 +Loss at step 500: 0.0355793833732605 +Loss at step 550: 0.05645984038710594 +Loss at step 600: 0.049390655010938644 +Loss at step 650: 0.05601374804973602 +Loss at step 700: 0.03460899740457535 +Loss at step 750: 0.047203876078128815 +Loss at step 800: 0.043314892798662186 +Loss at step 850: 0.03529394418001175 +Loss at step 900: 0.03737284615635872 +Mean training loss after epoch 126: 0.0425018967548286 + +EPOCH: 127 +Loss at step 0: 0.03094486892223358 +Loss at step 50: 0.033535122871398926 +Loss at step 100: 0.03193407505750656 +Loss at step 150: 0.0369366817176342 +Loss at step 200: 0.02800428494811058 +Loss at step 250: 0.061115555465221405 +Loss at step 300: 0.03858532756567001 +Loss at step 350: 0.053538210690021515 +Loss at step 400: 0.04869364574551582 +Loss at step 450: 0.055324409157037735 +Loss at step 500: 0.03902880474925041 +Loss at step 550: 0.030761603266000748 +Loss at step 600: 0.04287640005350113 +Loss at step 650: 0.03342721611261368 +Loss at step 700: 0.053739070892333984 +Loss at step 750: 0.03543848916888237 +Loss at step 800: 0.04534701257944107 +Loss at step 850: 0.04458793252706528 +Loss at step 900: 0.05748144909739494 +Mean training loss after epoch 127: 0.042397225750232935 + +EPOCH: 128 +Loss at step 0: 0.04223093017935753 +Loss at step 50: 0.060702186077833176 +Loss at step 100: 0.05094020068645477 +Loss at step 150: 0.05862560123205185 +Loss at step 200: 0.05206675827503204 +Loss at step 250: 0.03660311549901962 +Loss at step 300: 0.033535201102495193 +Loss at step 350: 0.037460967898368835 +Loss at step 400: 0.053232479840517044 +Loss at step 450: 0.05418254807591438 +Loss at step 500: 0.03766205161809921 +Loss at step 550: 0.039794545620679855 +Loss at step 600: 0.04074326157569885 +Loss at step 650: 0.03707185387611389 +Loss at step 700: 0.04918089509010315 +Loss at step 750: 0.035559263080358505 +Loss at step 800: 0.038658492267131805 +Loss at step 850: 0.03465277701616287 +Loss at step 900: 0.055036671459674835 +Mean training loss after epoch 128: 0.04215331066812851 + +EPOCH: 129 +Loss at step 0: 0.05584968626499176 +Loss at step 50: 0.044562358409166336 +Loss at step 100: 0.04858553409576416 +Loss at step 150: 0.035875383764505386 +Loss at step 200: 0.03444959968328476 +Loss at step 250: 0.049522798508405685 +Loss at step 300: 0.038845498114824295 +Loss at step 350: 0.0346490778028965 +Loss at step 400: 0.0353156253695488 +Loss at step 450: 0.04465166851878166 +Loss at step 500: 0.051781926304101944 +Loss at step 550: 0.04664802551269531 +Loss at step 600: 0.04371103271842003 +Loss at step 650: 0.03156234323978424 +Loss at step 700: 0.03441200777888298 +Loss at step 750: 0.04080637916922569 +Loss at step 800: 0.04525822773575783 +Loss at step 850: 0.038173094391822815 +Loss at step 900: 0.03883713483810425 +Mean training loss after epoch 129: 0.042144837785265975 + +EPOCH: 130 +Loss at step 0: 0.03796138986945152 +Loss at step 50: 0.02935968153178692 +Loss at step 100: 0.0350450836122036 +Loss at step 150: 0.03702370822429657 +Loss at step 200: 0.04717714712023735 +Loss at step 250: 0.037151504307985306 +Loss at step 300: 0.030538195744156837 +Loss at step 350: 0.035160936415195465 +Loss at step 400: 0.056185707449913025 +Loss at step 450: 0.038169149309396744 +Loss at step 500: 0.03497578948736191 +Loss at step 550: 0.033725254237651825 +Loss at step 600: 0.03654317557811737 +Loss at step 650: 0.05485529825091362 +Loss at step 700: 0.035201311111450195 +Loss at step 750: 0.05112731456756592 +Loss at step 800: 0.0363975428044796 +Loss at step 850: 0.05157846212387085 +Loss at step 900: 0.053440894931554794 +Mean training loss after epoch 130: 0.04201112081135895 + +EPOCH: 131 +Loss at step 0: 0.05593106895685196 +Loss at step 50: 0.04118822142481804 +Loss at step 100: 0.05003751814365387 +Loss at step 150: 0.0459161177277565 +Loss at step 200: 0.03542783111333847 +Loss at step 250: 0.03617458790540695 +Loss at step 300: 0.046611104160547256 +Loss at step 350: 0.05079322308301926 +Loss at step 400: 0.05026945844292641 +Loss at step 450: 0.03160965070128441 +Loss at step 500: 0.03939694166183472 +Loss at step 550: 0.042725078761577606 +Loss at step 600: 0.039472032338380814 +Loss at step 650: 0.043966952711343765 +Loss at step 700: 0.05131252482533455 +Loss at step 750: 0.05381901189684868 +Loss at step 800: 0.04540075361728668 +Loss at step 850: 0.03819124028086662 +Loss at step 900: 0.03499366715550423 +Mean training loss after epoch 131: 0.04209081147477698 + +EPOCH: 132 +Loss at step 0: 0.03549477830529213 +Loss at step 50: 0.04868175461888313 +Loss at step 100: 0.06251970678567886 +Loss at step 150: 0.03984671086072922 +Loss at step 200: 0.03522738069295883 +Loss at step 250: 0.041131217032670975 +Loss at step 300: 0.03893343731760979 +Loss at step 350: 0.029867224395275116 +Loss at step 400: 0.03499307483434677 +Loss at step 450: 0.04396430775523186 +Loss at step 500: 0.039177216589450836 +Loss at step 550: 0.034836992621421814 +Loss at step 600: 0.0452614389359951 +Loss at step 650: 0.03610033541917801 +Loss at step 700: 0.03932995721697807 +Loss at step 750: 0.03963834419846535 +Loss at step 800: 0.03673923760652542 +Loss at step 850: 0.03914159908890724 +Loss at step 900: 0.0560661219060421 +Mean training loss after epoch 132: 0.042048414718351766 + +EPOCH: 133 +Loss at step 0: 0.037345174700021744 +Loss at step 50: 0.035683225840330124 +Loss at step 100: 0.042083919048309326 +Loss at step 150: 0.036224111914634705 +Loss at step 200: 0.03736821562051773 +Loss at step 250: 0.04696378856897354 +Loss at step 300: 0.030614478513598442 +Loss at step 350: 0.04149696230888367 +Loss at step 400: 0.03438796103000641 +Loss at step 450: 0.029761843383312225 +Loss at step 500: 0.04799075052142143 +Loss at step 550: 0.03374233841896057 +Loss at step 600: 0.03222092241048813 +Loss at step 650: 0.03481902927160263 +Loss at step 700: 0.032734550535678864 +Loss at step 750: 0.03693768009543419 +Loss at step 800: 0.057545632123947144 +Loss at step 850: 0.03527749702334404 +Loss at step 900: 0.08574411273002625 +Mean training loss after epoch 133: 0.0421965813387368 + +EPOCH: 134 +Loss at step 0: 0.033097926527261734 +Loss at step 50: 0.031262170523405075 +Loss at step 100: 0.03769355267286301 +Loss at step 150: 0.042281635105609894 +Loss at step 200: 0.04366563260555267 +Loss at step 250: 0.056263700127601624 +Loss at step 300: 0.0445694737136364 +Loss at step 350: 0.060232821851968765 +Loss at step 400: 0.043241508305072784 +Loss at step 450: 0.054095227271318436 +Loss at step 500: 0.03962637484073639 +Loss at step 550: 0.03208627551794052 +Loss at step 600: 0.036742426455020905 +Loss at step 650: 0.07225906848907471 +Loss at step 700: 0.031165501102805138 +Loss at step 750: 0.040376145392656326 +Loss at step 800: 0.031242134049534798 +Loss at step 850: 0.07587699592113495 +Loss at step 900: 0.03190525993704796 +Mean training loss after epoch 134: 0.04234294356829894 + +EPOCH: 135 +Loss at step 0: 0.04674546420574188 +Loss at step 50: 0.03515247255563736 +Loss at step 100: 0.030674539506435394 +Loss at step 150: 0.03536396101117134 +Loss at step 200: 0.03699110075831413 +Loss at step 250: 0.05407152697443962 +Loss at step 300: 0.03668119013309479 +Loss at step 350: 0.05391205474734306 +Loss at step 400: 0.035997532308101654 +Loss at step 450: 0.0386599525809288 +Loss at step 500: 0.05625467747449875 +Loss at step 550: 0.040797557681798935 +Loss at step 600: 0.03620133921504021 +Loss at step 650: 0.041961271315813065 +Loss at step 700: 0.04411289095878601 +Loss at step 750: 0.04163749888539314 +Loss at step 800: 0.03395351395010948 +Loss at step 850: 0.035000745207071304 +Loss at step 900: 0.049789972603321075 +Mean training loss after epoch 135: 0.04251719271339206 + +EPOCH: 136 +Loss at step 0: 0.03721001744270325 +Loss at step 50: 0.0380161888897419 +Loss at step 100: 0.03318065032362938 +Loss at step 150: 0.05025745928287506 +Loss at step 200: 0.02983713522553444 +Loss at step 250: 0.041543181985616684 +Loss at step 300: 0.03690667450428009 +Loss at step 350: 0.03452766686677933 +Loss at step 400: 0.06734222173690796 +Loss at step 450: 0.03532548248767853 +Loss at step 500: 0.0402277372777462 +Loss at step 550: 0.02923762984573841 +Loss at step 600: 0.03848644345998764 +Loss at step 650: 0.029214609414339066 +Loss at step 700: 0.03156350180506706 +Loss at step 750: 0.03646155819296837 +Loss at step 800: 0.030136052519083023 +Loss at step 850: 0.04044833034276962 +Loss at step 900: 0.03588998317718506 +Mean training loss after epoch 136: 0.042193242827299304 + +EPOCH: 137 +Loss at step 0: 0.04032415151596069 +Loss at step 50: 0.04570217803120613 +Loss at step 100: 0.041807111352682114 +Loss at step 150: 0.03328615799546242 +Loss at step 200: 0.03072769194841385 +Loss at step 250: 0.033465202897787094 +Loss at step 300: 0.033506300300359726 +Loss at step 350: 0.03391355648636818 +Loss at step 400: 0.04394785314798355 +Loss at step 450: 0.05369193106889725 +Loss at step 500: 0.038760554045438766 +Loss at step 550: 0.052724115550518036 +Loss at step 600: 0.048280686140060425 +Loss at step 650: 0.05860856920480728 +Loss at step 700: 0.0367179699242115 +Loss at step 750: 0.037339795380830765 +Loss at step 800: 0.042971864342689514 +Loss at step 850: 0.03581308200955391 +Loss at step 900: 0.03864269703626633 +Mean training loss after epoch 137: 0.04252129275280275 + +EPOCH: 138 +Loss at step 0: 0.03007676638662815 +Loss at step 50: 0.03923787921667099 +Loss at step 100: 0.03948834165930748 +Loss at step 150: 0.03789016231894493 +Loss at step 200: 0.046873949468135834 +Loss at step 250: 0.056466713547706604 +Loss at step 300: 0.03907657042145729 +Loss at step 350: 0.03223132714629173 +Loss at step 400: 0.04866752400994301 +Loss at step 450: 0.03630037605762482 +Loss at step 500: 0.029274195432662964 +Loss at step 550: 0.047705937176942825 +Loss at step 600: 0.05141275003552437 +Loss at step 650: 0.0336599238216877 +Loss at step 700: 0.03687726706266403 +Loss at step 750: 0.03609692305326462 +Loss at step 800: 0.03537045046687126 +Loss at step 850: 0.030984627082943916 +Loss at step 900: 0.032818686217069626 +Mean training loss after epoch 138: 0.04185507736051642 + +EPOCH: 139 +Loss at step 0: 0.039597246795892715 +Loss at step 50: 0.03734810650348663 +Loss at step 100: 0.03580016270279884 +Loss at step 150: 0.03648393228650093 +Loss at step 200: 0.03841892629861832 +Loss at step 250: 0.03909728676080704 +Loss at step 300: 0.04612003266811371 +Loss at step 350: 0.0468924380838871 +Loss at step 400: 0.05363857373595238 +Loss at step 450: 0.05658219754695892 +Loss at step 500: 0.03320447728037834 +Loss at step 550: 0.054929088801145554 +Loss at step 600: 0.07501211762428284 +Loss at step 650: 0.041361112147569656 +Loss at step 700: 0.034978266805410385 +Loss at step 750: 0.0490257628262043 +Loss at step 800: 0.04843713343143463 +Loss at step 850: 0.052945345640182495 +Loss at step 900: 0.031348828226327896 +Mean training loss after epoch 139: 0.04195261146547571 + +EPOCH: 140 +Loss at step 0: 0.038359805941581726 +Loss at step 50: 0.043496184051036835 +Loss at step 100: 0.03575534746050835 +Loss at step 150: 0.03715290129184723 +Loss at step 200: 0.05395453795790672 +Loss at step 250: 0.04312689229846001 +Loss at step 300: 0.039455071091651917 +Loss at step 350: 0.05315985530614853 +Loss at step 400: 0.04129849001765251 +Loss at step 450: 0.04127022251486778 +Loss at step 500: 0.04420750215649605 +Loss at step 550: 0.03587833046913147 +Loss at step 600: 0.038123976439237595 +Loss at step 650: 0.03822450339794159 +Loss at step 700: 0.040262363851070404 +Loss at step 750: 0.04293157905340195 +Loss at step 800: 0.03924191743135452 +Loss at step 850: 0.051242876797914505 +Loss at step 900: 0.030860869213938713 +Mean training loss after epoch 140: 0.041628152157849214 + +EPOCH: 141 +Loss at step 0: 0.0357309952378273 +Loss at step 50: 0.0405811183154583 +Loss at step 100: 0.03416278213262558 +Loss at step 150: 0.03351762890815735 +Loss at step 200: 0.030533114448189735 +Loss at step 250: 0.03451740741729736 +Loss at step 300: 0.03665946051478386 +Loss at step 350: 0.05478139594197273 +Loss at step 400: 0.05036437511444092 +Loss at step 450: 0.05603531748056412 +Loss at step 500: 0.03402724489569664 +Loss at step 550: 0.03750381991267204 +Loss at step 600: 0.040972087532281876 +Loss at step 650: 0.042260900139808655 +Loss at step 700: 0.03811081871390343 +Loss at step 750: 0.036845553666353226 +Loss at step 800: 0.0415690578520298 +Loss at step 850: 0.03965042531490326 +Loss at step 900: 0.03870100900530815 +Mean training loss after epoch 141: 0.04202673006763082 + +EPOCH: 142 +Loss at step 0: 0.03593452647328377 +Loss at step 50: 0.0351838618516922 +Loss at step 100: 0.06876881420612335 +Loss at step 150: 0.038944195955991745 +Loss at step 200: 0.040944647043943405 +Loss at step 250: 0.039801549166440964 +Loss at step 300: 0.03724919632077217 +Loss at step 350: 0.03012264519929886 +Loss at step 400: 0.028339020907878876 +Loss at step 450: 0.043429140001535416 +Loss at step 500: 0.039767246693372726 +Loss at step 550: 0.034304797649383545 +Loss at step 600: 0.0415533110499382 +Loss at step 650: 0.04240945726633072 +Loss at step 700: 0.03782442957162857 +Loss at step 750: 0.038175780326128006 +Loss at step 800: 0.0390368290245533 +Loss at step 850: 0.04891939461231232 +Loss at step 900: 0.04027196764945984 +Mean training loss after epoch 142: 0.042164087712542334 + +EPOCH: 143 +Loss at step 0: 0.05464017391204834 +Loss at step 50: 0.04778584465384483 +Loss at step 100: 0.043277475982904434 +Loss at step 150: 0.030136309564113617 +Loss at step 200: 0.038401342928409576 +Loss at step 250: 0.03655426949262619 +Loss at step 300: 0.03619815781712532 +Loss at step 350: 0.03555699810385704 +Loss at step 400: 0.06355958431959152 +Loss at step 450: 0.03683250769972801 +Loss at step 500: 0.031722113490104675 +Loss at step 550: 0.051293518394231796 +Loss at step 600: 0.04472873732447624 +Loss at step 650: 0.04285021498799324 +Loss at step 700: 0.03676331043243408 +Loss at step 750: 0.053878091275691986 +Loss at step 800: 0.039428550750017166 +Loss at step 850: 0.029044492170214653 +Loss at step 900: 0.046056412160396576 +Mean training loss after epoch 143: 0.04196451393875486 + +EPOCH: 144 +Loss at step 0: 0.03647203743457794 +Loss at step 50: 0.03546447679400444 +Loss at step 100: 0.028027458116412163 +Loss at step 150: 0.03706343099474907 +Loss at step 200: 0.02946298196911812 +Loss at step 250: 0.04093487188220024 +Loss at step 300: 0.03480026498436928 +Loss at step 350: 0.0481966957449913 +Loss at step 400: 0.04102826490998268 +Loss at step 450: 0.035431042313575745 +Loss at step 500: 0.05099305883049965 +Loss at step 550: 0.042340170592069626 +Loss at step 600: 0.05557699128985405 +Loss at step 650: 0.03197412192821503 +Loss at step 700: 0.035774651914834976 +Loss at step 750: 0.03557714819908142 +Loss at step 800: 0.042168546468019485 +Loss at step 850: 0.02944686822593212 +Loss at step 900: 0.034017935395240784 +Mean training loss after epoch 144: 0.04199131450323916 + +EPOCH: 145 +Loss at step 0: 0.0357891246676445 +Loss at step 50: 0.05408082529902458 +Loss at step 100: 0.03127046301960945 +Loss at step 150: 0.035092175006866455 +Loss at step 200: 0.04589945450425148 +Loss at step 250: 0.03216953203082085 +Loss at step 300: 0.04434205964207649 +Loss at step 350: 0.06629910320043564 +Loss at step 400: 0.04920700937509537 +Loss at step 450: 0.042137082666158676 +Loss at step 500: 0.031183796003460884 +Loss at step 550: 0.03542826697230339 +Loss at step 600: 0.04166002199053764 +Loss at step 650: 0.037325046956539154 +Loss at step 700: 0.04343381151556969 +Loss at step 750: 0.03837285563349724 +Loss at step 800: 0.0291228536516428 +Loss at step 850: 0.03319094330072403 +Loss at step 900: 0.036391112953424454 +Mean training loss after epoch 145: 0.04157962997569077 + +EPOCH: 146 +Loss at step 0: 0.03486700356006622 +Loss at step 50: 0.04799339547753334 +Loss at step 100: 0.053472988307476044 +Loss at step 150: 0.049768686294555664 +Loss at step 200: 0.04211004450917244 +Loss at step 250: 0.08333371579647064 +Loss at step 300: 0.03735140338540077 +Loss at step 350: 0.0437965951859951 +Loss at step 400: 0.04522951692342758 +Loss at step 450: 0.050311364233493805 +Loss at step 500: 0.05300811678171158 +Loss at step 550: 0.037035051733255386 +Loss at step 600: 0.03540425002574921 +Loss at step 650: 0.03272141516208649 +Loss at step 700: 0.04344155266880989 +Loss at step 750: 0.04171430692076683 +Loss at step 800: 0.03223220631480217 +Loss at step 850: 0.04631401598453522 +Loss at step 900: 0.034416262060403824 +Mean training loss after epoch 146: 0.04228796660383818 + +EPOCH: 147 +Loss at step 0: 0.04061895236372948 +Loss at step 50: 0.06510394811630249 +Loss at step 100: 0.034833405166864395 +Loss at step 150: 0.04406037554144859 +Loss at step 200: 0.045396748930215836 +Loss at step 250: 0.04945330694317818 +Loss at step 300: 0.0697096735239029 +Loss at step 350: 0.040425848215818405 +Loss at step 400: 0.03286696970462799 +Loss at step 450: 0.030878452584147453 +Loss at step 500: 0.052155978977680206 +Loss at step 550: 0.050235211849212646 +Loss at step 600: 0.03293605148792267 +Loss at step 650: 0.05016927048563957 +Loss at step 700: 0.03877097740769386 +Loss at step 750: 0.051481712609529495 +Loss at step 800: 0.0331539511680603 +Loss at step 850: 0.0463896170258522 +Loss at step 900: 0.050032272934913635 +Mean training loss after epoch 147: 0.041787002301181174 + +EPOCH: 148 +Loss at step 0: 0.0357489250600338 +Loss at step 50: 0.05856280401349068 +Loss at step 100: 0.0320919044315815 +Loss at step 150: 0.0821550190448761 +Loss at step 200: 0.04068780690431595 +Loss at step 250: 0.03332633152604103 +Loss at step 300: 0.0528477281332016 +Loss at step 350: 0.042828939855098724 +Loss at step 400: 0.04491664841771126 +Loss at step 450: 0.04121486842632294 +Loss at step 500: 0.03646986186504364 +Loss at step 550: 0.040154021233320236 +Loss at step 600: 0.03764384612441063 +Loss at step 650: 0.05622980371117592 +Loss at step 700: 0.0694388672709465 +Loss at step 750: 0.026673797518014908 +Loss at step 800: 0.033385686576366425 +Loss at step 850: 0.051487911492586136 +Loss at step 900: 0.03869228437542915 +Mean training loss after epoch 148: 0.0420387861078609 + +EPOCH: 149 +Loss at step 0: 0.032291069626808167 +Loss at step 50: 0.04381278157234192 +Loss at step 100: 0.05175172910094261 +Loss at step 150: 0.039617761969566345 +Loss at step 200: 0.04136640951037407 +Loss at step 250: 0.06429476290941238 +Loss at step 300: 0.06880918890237808 +Loss at step 350: 0.0365937314927578 +Loss at step 400: 0.04158145561814308 +Loss at step 450: 0.04357963427901268 +Loss at step 500: 0.03778946399688721 +Loss at step 550: 0.02897937409579754 +Loss at step 600: 0.03430233523249626 +Loss at step 650: 0.04783837869763374 +Loss at step 700: 0.033608194440603256 +Loss at step 750: 0.04544506222009659 +Loss at step 800: 0.048203710466623306 +Loss at step 850: 0.04275382310152054 +Loss at step 900: 0.041548918932676315 +Mean training loss after epoch 149: 0.042700570968311355 + +EPOCH: 150 +Loss at step 0: 0.049964435398578644 +Loss at step 50: 0.048390697687864304 +Loss at step 100: 0.05685277283191681 +Loss at step 150: 0.036489762365818024 +Loss at step 200: 0.05473584309220314 +Loss at step 250: 0.03579915314912796 +Loss at step 300: 0.03483206406235695 +Loss at step 350: 0.049429502338171005 +Loss at step 400: 0.050645049661397934 +Loss at step 450: 0.035665128380060196 +Loss at step 500: 0.055477336049079895 +Loss at step 550: 0.036538902670145035 +Loss at step 600: 0.034477416425943375 +Loss at step 650: 0.029415998607873917 +Loss at step 700: 0.036369092762470245 +Loss at step 750: 0.03708411753177643 +Loss at step 800: 0.04495839774608612 +Loss at step 850: 0.03702973574399948 +Loss at step 900: 0.03885873034596443 +Mean training loss after epoch 150: 0.041629522208815446 + +EPOCH: 151 +Loss at step 0: 0.03872460499405861 +Loss at step 50: 0.035674791783094406 +Loss at step 100: 0.03539711609482765 +Loss at step 150: 0.035271402448415756 +Loss at step 200: 0.038769178092479706 +Loss at step 250: 0.05824952945113182 +Loss at step 300: 0.035957783460617065 +Loss at step 350: 0.03432445973157883 +Loss at step 400: 0.05303642526268959 +Loss at step 450: 0.030788464471697807 +Loss at step 500: 0.03501858562231064 +Loss at step 550: 0.04111636430025101 +Loss at step 600: 0.032229967415332794 +Loss at step 650: 0.03571541979908943 +Loss at step 700: 0.03328194469213486 +Loss at step 750: 0.03248866647481918 +Loss at step 800: 0.03424318879842758 +Loss at step 850: 0.039510175585746765 +Loss at step 900: 0.05264480784535408 +Mean training loss after epoch 151: 0.041698548992448396 + +EPOCH: 152 +Loss at step 0: 0.043211840093135834 +Loss at step 50: 0.03340320661664009 +Loss at step 100: 0.048411767929792404 +Loss at step 150: 0.056272272020578384 +Loss at step 200: 0.051562827080488205 +Loss at step 250: 0.034891560673713684 +Loss at step 300: 0.05148777738213539 +Loss at step 350: 0.03595224395394325 +Loss at step 400: 0.028903206810355186 +Loss at step 450: 0.038183365017175674 +Loss at step 500: 0.03731566295027733 +Loss at step 550: 0.0356873981654644 +Loss at step 600: 0.0313677042722702 +Loss at step 650: 0.03932942450046539 +Loss at step 700: 0.0665639266371727 +Loss at step 750: 0.03317286819219589 +Loss at step 800: 0.03564203903079033 +Loss at step 850: 0.03477342799305916 +Loss at step 900: 0.04680752009153366 +Mean training loss after epoch 152: 0.041610624573664115 + +EPOCH: 153 +Loss at step 0: 0.030752496793866158 +Loss at step 50: 0.03984297439455986 +Loss at step 100: 0.031056780368089676 +Loss at step 150: 0.03939105570316315 +Loss at step 200: 0.06974080204963684 +Loss at step 250: 0.03848470747470856 +Loss at step 300: 0.03297055512666702 +Loss at step 350: 0.04547300189733505 +Loss at step 400: 0.03682214766740799 +Loss at step 450: 0.03131253272294998 +Loss at step 500: 0.04345107078552246 +Loss at step 550: 0.04177214205265045 +Loss at step 600: 0.030688272789120674 +Loss at step 650: 0.048584792762994766 +Loss at step 700: 0.038229744881391525 +Loss at step 750: 0.031089046970009804 +Loss at step 800: 0.03985687717795372 +Loss at step 850: 0.041947998106479645 +Loss at step 900: 0.044597942382097244 +Mean training loss after epoch 153: 0.042380063657535674 + +EPOCH: 154 +Loss at step 0: 0.05345641076564789 +Loss at step 50: 0.042899783700704575 +Loss at step 100: 0.03357723355293274 +Loss at step 150: 0.05140424147248268 +Loss at step 200: 0.02819659747183323 +Loss at step 250: 0.04493157938122749 +Loss at step 300: 0.03656427562236786 +Loss at step 350: 0.030955562368035316 +Loss at step 400: 0.0369301363825798 +Loss at step 450: 0.040071986615657806 +Loss at step 500: 0.05197841301560402 +Loss at step 550: 0.03778393194079399 +Loss at step 600: 0.03276905044913292 +Loss at step 650: 0.03182387724518776 +Loss at step 700: 0.03351379930973053 +Loss at step 750: 0.033173974603414536 +Loss at step 800: 0.0386836901307106 +Loss at step 850: 0.03227386251091957 +Loss at step 900: 0.035299576818943024 +Mean training loss after epoch 154: 0.04212207293539032 + +EPOCH: 155 +Loss at step 0: 0.05649520829319954 +Loss at step 50: 0.03102577105164528 +Loss at step 100: 0.032175131142139435 +Loss at step 150: 0.0337371900677681 +Loss at step 200: 0.03588712960481644 +Loss at step 250: 0.047558512538671494 +Loss at step 300: 0.05539235100150108 +Loss at step 350: 0.0677049309015274 +Loss at step 400: 0.04687006399035454 +Loss at step 450: 0.042771343141794205 +Loss at step 500: 0.03482664376497269 +Loss at step 550: 0.044766876846551895 +Loss at step 600: 0.045993443578481674 +Loss at step 650: 0.041110921651124954 +Loss at step 700: 0.03490821272134781 +Loss at step 750: 0.04456814005970955 +Loss at step 800: 0.05052883177995682 +Loss at step 850: 0.06528449058532715 +Loss at step 900: 0.036868300288915634 +Mean training loss after epoch 155: 0.04227992473269449 + +EPOCH: 156 +Loss at step 0: 0.05328263342380524 +Loss at step 50: 0.05558167025446892 +Loss at step 100: 0.035837870091199875 +Loss at step 150: 0.03872068226337433 +Loss at step 200: 0.062104836106300354 +Loss at step 250: 0.038871198892593384 +Loss at step 300: 0.035116370767354965 +Loss at step 350: 0.0337044931948185 +Loss at step 400: 0.035277001559734344 +Loss at step 450: 0.05462329462170601 +Loss at step 500: 0.03315630555152893 +Loss at step 550: 0.03951284661889076 +Loss at step 600: 0.04857729747891426 +Loss at step 650: 0.042670510709285736 +Loss at step 700: 0.030172094702720642 +Loss at step 750: 0.035838935524225235 +Loss at step 800: 0.05214604362845421 +Loss at step 850: 0.041880179196596146 +Loss at step 900: 0.03618021681904793 +Mean training loss after epoch 156: 0.04151801192469752 + +EPOCH: 157 +Loss at step 0: 0.053754597902297974 +Loss at step 50: 0.04583209753036499 +Loss at step 100: 0.048923052847385406 +Loss at step 150: 0.037170544266700745 +Loss at step 200: 0.05567541718482971 +Loss at step 250: 0.04202282056212425 +Loss at step 300: 0.0530812032520771 +Loss at step 350: 0.036701470613479614 +Loss at step 400: 0.04105161875486374 +Loss at step 450: 0.035315290093421936 +Loss at step 500: 0.03592460975050926 +Loss at step 550: 0.05100354924798012 +Loss at step 600: 0.03835726156830788 +Loss at step 650: 0.040117476135492325 +Loss at step 700: 0.03319944441318512 +Loss at step 750: 0.04874331131577492 +Loss at step 800: 0.03211164474487305 +Loss at step 850: 0.03677653148770332 +Loss at step 900: 0.03921843320131302 +Mean training loss after epoch 157: 0.04208396263778019 + +EPOCH: 158 +Loss at step 0: 0.053142696619033813 +Loss at step 50: 0.039625342935323715 +Loss at step 100: 0.041874464601278305 +Loss at step 150: 0.04172403737902641 +Loss at step 200: 0.03345603868365288 +Loss at step 250: 0.03492490202188492 +Loss at step 300: 0.038030609488487244 +Loss at step 350: 0.028495457023382187 +Loss at step 400: 0.04284537583589554 +Loss at step 450: 0.034388937056064606 +Loss at step 500: 0.055605459958314896 +Loss at step 550: 0.04129524528980255 +Loss at step 600: 0.03616258502006531 +Loss at step 650: 0.0285470113158226 +Loss at step 700: 0.0391588918864727 +Loss at step 750: 0.0373045951128006 +Loss at step 800: 0.036178652197122574 +Loss at step 850: 0.04029553756117821 +Loss at step 900: 0.030573036521673203 +Mean training loss after epoch 158: 0.041759737425728014 + +EPOCH: 159 +Loss at step 0: 0.03646078705787659 +Loss at step 50: 0.03891812264919281 +Loss at step 100: 0.04230709373950958 +Loss at step 150: 0.038509808480739594 +Loss at step 200: 0.053831759840250015 +Loss at step 250: 0.045457784086465836 +Loss at step 300: 0.035048820078372955 +Loss at step 350: 0.0402105338871479 +Loss at step 400: 0.040246471762657166 +Loss at step 450: 0.03213101997971535 +Loss at step 500: 0.039840925484895706 +Loss at step 550: 0.034487396478652954 +Loss at step 600: 0.05263527110219002 +Loss at step 650: 0.03569900244474411 +Loss at step 700: 0.03732169046998024 +Loss at step 750: 0.03432917222380638 +Loss at step 800: 0.029399283230304718 +Loss at step 850: 0.03557441011071205 +Loss at step 900: 0.05276878550648689 +Mean training loss after epoch 159: 0.04198013888056408 + +EPOCH: 160 +Loss at step 0: 0.03272593766450882 +Loss at step 50: 0.03808004409074783 +Loss at step 100: 0.04068906977772713 +Loss at step 150: 0.03765731677412987 +Loss at step 200: 0.044079214334487915 +Loss at step 250: 0.03938356041908264 +Loss at step 300: 0.05161362513899803 +Loss at step 350: 0.04128038510680199 +Loss at step 400: 0.05006210878491402 +Loss at step 450: 0.03323276713490486 +Loss at step 500: 0.04292380064725876 +Loss at step 550: 0.03615903854370117 +Loss at step 600: 0.04243318736553192 +Loss at step 650: 0.03709695115685463 +Loss at step 700: 0.05068572610616684 +Loss at step 750: 0.03722544386982918 +Loss at step 800: 0.056304723024368286 +Loss at step 850: 0.05459215119481087 +Loss at step 900: 0.035317618399858475 +Mean training loss after epoch 160: 0.04215756298970185 + +EPOCH: 161 +Loss at step 0: 0.042033180594444275 +Loss at step 50: 0.04185538738965988 +Loss at step 100: 0.03522934764623642 +Loss at step 150: 0.05067601427435875 +Loss at step 200: 0.03864051774144173 +Loss at step 250: 0.04868760704994202 +Loss at step 300: 0.03932592645287514 +Loss at step 350: 0.04293591156601906 +Loss at step 400: 0.0446302592754364 +Loss at step 450: 0.03664654493331909 +Loss at step 500: 0.05163183808326721 +Loss at step 550: 0.044752635061740875 +Loss at step 600: 0.03847457468509674 +Loss at step 650: 0.05418030917644501 +Loss at step 700: 0.04585876315832138 +Loss at step 750: 0.03625411167740822 +Loss at step 800: 0.035573311150074005 +Loss at step 850: 0.04612034559249878 +Loss at step 900: 0.03266897052526474 +Mean training loss after epoch 161: 0.041682401608461254 + +EPOCH: 162 +Loss at step 0: 0.03443225100636482 +Loss at step 50: 0.05295828357338905 +Loss at step 100: 0.044075947254896164 +Loss at step 150: 0.039766378700733185 +Loss at step 200: 0.03175616264343262 +Loss at step 250: 0.03700171783566475 +Loss at step 300: 0.06151432916522026 +Loss at step 350: 0.03539503365755081 +Loss at step 400: 0.035971418023109436 +Loss at step 450: 0.06043902039527893 +Loss at step 500: 0.026486312970519066 +Loss at step 550: 0.03172525390982628 +Loss at step 600: 0.05635107308626175 +Loss at step 650: 0.049782294780015945 +Loss at step 700: 0.03821375221014023 +Loss at step 750: 0.03226267918944359 +Loss at step 800: 0.04801420122385025 +Loss at step 850: 0.037035200744867325 +Loss at step 900: 0.0541158989071846 +Mean training loss after epoch 162: 0.041881437564312396 + +EPOCH: 163 +Loss at step 0: 0.046113207936286926 +Loss at step 50: 0.0443924181163311 +Loss at step 100: 0.03236755356192589 +Loss at step 150: 0.03812683746218681 +Loss at step 200: 0.03635641187429428 +Loss at step 250: 0.062125999480485916 +Loss at step 300: 0.05177285149693489 +Loss at step 350: 0.04134644195437431 +Loss at step 400: 0.051762331277132034 +Loss at step 450: 0.038076095283031464 +Loss at step 500: 0.048686202615499496 +Loss at step 550: 0.04135816916823387 +Loss at step 600: 0.04114864766597748 +Loss at step 650: 0.036704305559396744 +Loss at step 700: 0.049851950258016586 +Loss at step 750: 0.03393217548727989 +Loss at step 800: 0.03302428126335144 +Loss at step 850: 0.036545637995004654 +Loss at step 900: 0.042946405708789825 +Mean training loss after epoch 163: 0.04166837653188881 + +EPOCH: 164 +Loss at step 0: 0.05492968484759331 +Loss at step 50: 0.03143102303147316 +Loss at step 100: 0.03686508163809776 +Loss at step 150: 0.03780407831072807 +Loss at step 200: 0.03192293271422386 +Loss at step 250: 0.06838148087263107 +Loss at step 300: 0.05755019187927246 +Loss at step 350: 0.050360266119241714 +Loss at step 400: 0.036883942782878876 +Loss at step 450: 0.05594587326049805 +Loss at step 500: 0.0486307218670845 +Loss at step 550: 0.04816119745373726 +Loss at step 600: 0.05513868108391762 +Loss at step 650: 0.040407393127679825 +Loss at step 700: 0.035788264125585556 +Loss at step 750: 0.029059693217277527 +Loss at step 800: 0.03355870768427849 +Loss at step 850: 0.06275414675474167 +Loss at step 900: 0.04140639677643776 +Mean training loss after epoch 164: 0.0419985556732744 + +EPOCH: 165 +Loss at step 0: 0.05084880441427231 +Loss at step 50: 0.0543023943901062 +Loss at step 100: 0.056270111352205276 +Loss at step 150: 0.034627776592969894 +Loss at step 200: 0.029606739059090614 +Loss at step 250: 0.04984544217586517 +Loss at step 300: 0.033280953764915466 +Loss at step 350: 0.04561571031808853 +Loss at step 400: 0.034940462559461594 +Loss at step 450: 0.04139413684606552 +Loss at step 500: 0.040452610701322556 +Loss at step 550: 0.04350801184773445 +Loss at step 600: 0.03656912222504616 +Loss at step 650: 0.04006418213248253 +Loss at step 700: 0.04635033383965492 +Loss at step 750: 0.03196395933628082 +Loss at step 800: 0.03807682916522026 +Loss at step 850: 0.030541956424713135 +Loss at step 900: 0.05490187183022499 +Mean training loss after epoch 165: 0.04234211670477062 + +EPOCH: 166 +Loss at step 0: 0.0435408279299736 +Loss at step 50: 0.04618099704384804 +Loss at step 100: 0.05463716760277748 +Loss at step 150: 0.05916711688041687 +Loss at step 200: 0.05074099823832512 +Loss at step 250: 0.03297647461295128 +Loss at step 300: 0.03944069892168045 +Loss at step 350: 0.06627862900495529 +Loss at step 400: 0.03429859131574631 +Loss at step 450: 0.039032090455293655 +Loss at step 500: 0.0400698184967041 +Loss at step 550: 0.0376395508646965 +Loss at step 600: 0.03616385906934738 +Loss at step 650: 0.05470064654946327 +Loss at step 700: 0.04045157879590988 +Loss at step 750: 0.07541951537132263 +Loss at step 800: 0.05270006135106087 +Loss at step 850: 0.041193705052137375 +Loss at step 900: 0.03363768011331558 +Mean training loss after epoch 166: 0.041681498600872974 + +EPOCH: 167 +Loss at step 0: 0.03260944038629532 +Loss at step 50: 0.031192278489470482 +Loss at step 100: 0.030275052413344383 +Loss at step 150: 0.0319354273378849 +Loss at step 200: 0.038708776235580444 +Loss at step 250: 0.05172756686806679 +Loss at step 300: 0.0345488116145134 +Loss at step 350: 0.04113924130797386 +Loss at step 400: 0.035771701484918594 +Loss at step 450: 0.03828283026814461 +Loss at step 500: 0.0383487194776535 +Loss at step 550: 0.06837842613458633 +Loss at step 600: 0.03420782834291458 +Loss at step 650: 0.03368475288152695 +Loss at step 700: 0.036766745150089264 +Loss at step 750: 0.03165431320667267 +Loss at step 800: 0.04517103731632233 +Loss at step 850: 0.051145922392606735 +Loss at step 900: 0.03939900919795036 +Mean training loss after epoch 167: 0.04159986691227727 + +EPOCH: 168 +Loss at step 0: 0.04851570725440979 +Loss at step 50: 0.06281198561191559 +Loss at step 100: 0.04415833204984665 +Loss at step 150: 0.03673063963651657 +Loss at step 200: 0.03636414185166359 +Loss at step 250: 0.03222019225358963 +Loss at step 300: 0.0397556908428669 +Loss at step 350: 0.03364252671599388 +Loss at step 400: 0.03710572049021721 +Loss at step 450: 0.02969525195658207 +Loss at step 500: 0.08423370122909546 +Loss at step 550: 0.0415620356798172 +Loss at step 600: 0.027658242732286453 +Loss at step 650: 0.042391855269670486 +Loss at step 700: 0.03817259520292282 +Loss at step 750: 0.03259177878499031 +Loss at step 800: 0.03972531110048294 +Loss at step 850: 0.0303537305444479 +Loss at step 900: 0.03652387112379074 +Mean training loss after epoch 168: 0.04140357141579583 + +EPOCH: 169 +Loss at step 0: 0.05354360118508339 +Loss at step 50: 0.03195581212639809 +Loss at step 100: 0.06500847637653351 +Loss at step 150: 0.06610570102930069 +Loss at step 200: 0.04113024100661278 +Loss at step 250: 0.027583038434386253 +Loss at step 300: 0.03656454011797905 +Loss at step 350: 0.052170515060424805 +Loss at step 400: 0.03852315619587898 +Loss at step 450: 0.03867664188146591 +Loss at step 500: 0.04367469251155853 +Loss at step 550: 0.03310701623558998 +Loss at step 600: 0.06611268222332001 +Loss at step 650: 0.03098241798579693 +Loss at step 700: 0.04891456663608551 +Loss at step 750: 0.03942761942744255 +Loss at step 800: 0.041297849267721176 +Loss at step 850: 0.052537478506565094 +Loss at step 900: 0.0340694934129715 +Mean training loss after epoch 169: 0.04182671864967801 + +EPOCH: 170 +Loss at step 0: 0.0305585078895092 +Loss at step 50: 0.044676389545202255 +Loss at step 100: 0.03262167051434517 +Loss at step 150: 0.03465871140360832 +Loss at step 200: 0.053631119430065155 +Loss at step 250: 0.04847809672355652 +Loss at step 300: 0.03438197821378708 +Loss at step 350: 0.03563510254025459 +Loss at step 400: 0.041701238602399826 +Loss at step 450: 0.03720324486494064 +Loss at step 500: 0.03245695307850838 +Loss at step 550: 0.050643812865018845 +Loss at step 600: 0.0421716682612896 +Loss at step 650: 0.031076569110155106 +Loss at step 700: 0.03496023640036583 +Loss at step 750: 0.041589297354221344 +Loss at step 800: 0.0360647588968277 +Loss at step 850: 0.03295659273862839 +Loss at step 900: 0.03453189507126808 +Mean training loss after epoch 170: 0.04185354243169652 + +EPOCH: 171 +Loss at step 0: 0.053923431783914566 +Loss at step 50: 0.05165528133511543 +Loss at step 100: 0.05359581485390663 +Loss at step 150: 0.03814880549907684 +Loss at step 200: 0.057905055582523346 +Loss at step 250: 0.05054771900177002 +Loss at step 300: 0.029955018311738968 +Loss at step 350: 0.04426371306180954 +Loss at step 400: 0.037701405584812164 +Loss at step 450: 0.04641666263341904 +Loss at step 500: 0.05583236366510391 +Loss at step 550: 0.03619731217622757 +Loss at step 600: 0.03259444236755371 +Loss at step 650: 0.060233090072870255 +Loss at step 700: 0.04138468950986862 +Loss at step 750: 0.03163965418934822 +Loss at step 800: 0.032825879752635956 +Loss at step 850: 0.05752290412783623 +Loss at step 900: 0.03443465754389763 +Mean training loss after epoch 171: 0.04144824135389282 + +EPOCH: 172 +Loss at step 0: 0.03209567442536354 +Loss at step 50: 0.05818139761686325 +Loss at step 100: 0.05136827379465103 +Loss at step 150: 0.027739521116018295 +Loss at step 200: 0.042954180389642715 +Loss at step 250: 0.030690737068653107 +Loss at step 300: 0.03409494832158089 +Loss at step 350: 0.027100542560219765 +Loss at step 400: 0.03840995579957962 +Loss at step 450: 0.03903039172291756 +Loss at step 500: 0.04468401148915291 +Loss at step 550: 0.03910224139690399 +Loss at step 600: 0.0376056544482708 +Loss at step 650: 0.035212963819503784 +Loss at step 700: 0.03200206905603409 +Loss at step 750: 0.035134561359882355 +Loss at step 800: 0.03554246947169304 +Loss at step 850: 0.05527525022625923 +Loss at step 900: 0.03256995603442192 +Mean training loss after epoch 172: 0.04208877019441204 + +EPOCH: 173 +Loss at step 0: 0.035287100821733475 +Loss at step 50: 0.039894670248031616 +Loss at step 100: 0.03790206089615822 +Loss at step 150: 0.03075706586241722 +Loss at step 200: 0.05236460268497467 +Loss at step 250: 0.04161772504448891 +Loss at step 300: 0.033940721303224564 +Loss at step 350: 0.06850095838308334 +Loss at step 400: 0.03070612996816635 +Loss at step 450: 0.045446161180734634 +Loss at step 500: 0.032258786261081696 +Loss at step 550: 0.03494522348046303 +Loss at step 600: 0.045330144464969635 +Loss at step 650: 0.036904022097587585 +Loss at step 700: 0.032146237790584564 +Loss at step 750: 0.034715086221694946 +Loss at step 800: 0.03468701243400574 +Loss at step 850: 0.033052023500204086 +Loss at step 900: 0.034720417112112045 +Mean training loss after epoch 173: 0.042048089476282406 + +EPOCH: 174 +Loss at step 0: 0.03656395152211189 +Loss at step 50: 0.04568571224808693 +Loss at step 100: 0.03481351211667061 +Loss at step 150: 0.038224395364522934 +Loss at step 200: 0.058359503746032715 +Loss at step 250: 0.03833071514964104 +Loss at step 300: 0.043960943818092346 +Loss at step 350: 0.04375429451465607 +Loss at step 400: 0.03465664014220238 +Loss at step 450: 0.029857201501727104 +Loss at step 500: 0.044843051582574844 +Loss at step 550: 0.05128093063831329 +Loss at step 600: 0.06645429879426956 +Loss at step 650: 0.03523625433444977 +Loss at step 700: 0.03666084632277489 +Loss at step 750: 0.04340723156929016 +Loss at step 800: 0.03913688659667969 +Loss at step 850: 0.055996235460042953 +Loss at step 900: 0.032421406358480453 +Mean training loss after epoch 174: 0.04112288686655351 + +EPOCH: 175 +Loss at step 0: 0.028204411268234253 +Loss at step 50: 0.030527090653777122 +Loss at step 100: 0.039564263075590134 +Loss at step 150: 0.03648807108402252 +Loss at step 200: 0.042801812291145325 +Loss at step 250: 0.07026997953653336 +Loss at step 300: 0.053135115653276443 +Loss at step 350: 0.0397661030292511 +Loss at step 400: 0.047537870705127716 +Loss at step 450: 0.03553953766822815 +Loss at step 500: 0.0405605211853981 +Loss at step 550: 0.04098905622959137 +Loss at step 600: 0.05999923497438431 +Loss at step 650: 0.05660012736916542 +Loss at step 700: 0.03235200420022011 +Loss at step 750: 0.03290760517120361 +Loss at step 800: 0.034900546073913574 +Loss at step 850: 0.05151025578379631 +Loss at step 900: 0.041141875088214874 +Mean training loss after epoch 175: 0.04161210253096021 + +EPOCH: 176 +Loss at step 0: 0.03776821494102478 +Loss at step 50: 0.04021279513835907 +Loss at step 100: 0.041916023939847946 +Loss at step 150: 0.03895966708660126 +Loss at step 200: 0.032551463693380356 +Loss at step 250: 0.05833392217755318 +Loss at step 300: 0.048504963517189026 +Loss at step 350: 0.05007625371217728 +Loss at step 400: 0.03597918152809143 +Loss at step 450: 0.030169488862156868 +Loss at step 500: 0.03819960728287697 +Loss at step 550: 0.037356436252593994 +Loss at step 600: 0.03851252421736717 +Loss at step 650: 0.06501714140176773 +Loss at step 700: 0.05189223960042 +Loss at step 750: 0.03168368339538574 +Loss at step 800: 0.03951912373304367 +Loss at step 850: 0.03564509004354477 +Loss at step 900: 0.03276278078556061 +Mean training loss after epoch 176: 0.04125692002546749 + +EPOCH: 177 +Loss at step 0: 0.03515172004699707 +Loss at step 50: 0.03367337957024574 +Loss at step 100: 0.0318077951669693 +Loss at step 150: 0.03582939878106117 +Loss at step 200: 0.05992460623383522 +Loss at step 250: 0.0508149079978466 +Loss at step 300: 0.041243456304073334 +Loss at step 350: 0.05429394170641899 +Loss at step 400: 0.048130929470062256 +Loss at step 450: 0.035247184336185455 +Loss at step 500: 0.03542075678706169 +Loss at step 550: 0.055204953998327255 +Loss at step 600: 0.052897412329912186 +Loss at step 650: 0.06927145272493362 +Loss at step 700: 0.04794507846236229 +Loss at step 750: 0.03416883945465088 +Loss at step 800: 0.04960576817393303 +Loss at step 850: 0.03425171971321106 +Loss at step 900: 0.03997402638196945 +Mean training loss after epoch 177: 0.04163434571707681 + +EPOCH: 178 +Loss at step 0: 0.04990256577730179 +Loss at step 50: 0.0538577102124691 +Loss at step 100: 0.03529036417603493 +Loss at step 150: 0.03500866889953613 +Loss at step 200: 0.03199452906847 +Loss at step 250: 0.036851897835731506 +Loss at step 300: 0.029136331751942635 +Loss at step 350: 0.030358897522091866 +Loss at step 400: 0.051947467029094696 +Loss at step 450: 0.037148743867874146 +Loss at step 500: 0.03686618059873581 +Loss at step 550: 0.036732930690050125 +Loss at step 600: 0.04222632199525833 +Loss at step 650: 0.03982901945710182 +Loss at step 700: 0.03642851859331131 +Loss at step 750: 0.04014425352215767 +Loss at step 800: 0.0999036505818367 +Loss at step 850: 0.038242340087890625 +Loss at step 900: 0.035852327942848206 +Mean training loss after epoch 178: 0.04207657766875936 + +EPOCH: 179 +Loss at step 0: 0.04087178036570549 +Loss at step 50: 0.04753424972295761 +Loss at step 100: 0.03290014714002609 +Loss at step 150: 0.03695341572165489 +Loss at step 200: 0.03138614073395729 +Loss at step 250: 0.040319520980119705 +Loss at step 300: 0.049994390457868576 +Loss at step 350: 0.05205334722995758 +Loss at step 400: 0.06807678192853928 +Loss at step 450: 0.051330793648958206 +Loss at step 500: 0.03462173044681549 +Loss at step 550: 0.03375495225191116 +Loss at step 600: 0.036820266395807266 +Loss at step 650: 0.03800901770591736 +Loss at step 700: 0.030175233259797096 +Loss at step 750: 0.05230529233813286 +Loss at step 800: 0.03545716404914856 +Loss at step 850: 0.052242521196603775 +Loss at step 900: 0.030589766800403595 +Mean training loss after epoch 179: 0.04223245648003972 + +EPOCH: 180 +Loss at step 0: 0.03807156905531883 +Loss at step 50: 0.05775832384824753 +Loss at step 100: 0.03275652229785919 +Loss at step 150: 0.03259390965104103 +Loss at step 200: 0.06442693620920181 +Loss at step 250: 0.04907452315092087 +Loss at step 300: 0.033002205193042755 +Loss at step 350: 0.05629289895296097 +Loss at step 400: 0.039508040994405746 +Loss at step 450: 0.03669936582446098 +Loss at step 500: 0.0339655764400959 +Loss at step 550: 0.035679228603839874 +Loss at step 600: 0.039923977106809616 +Loss at step 650: 0.05427751690149307 +Loss at step 700: 0.037826668471097946 +Loss at step 750: 0.035374172031879425 +Loss at step 800: 0.06559602171182632 +Loss at step 850: 0.03814015910029411 +Loss at step 900: 0.03909355774521828 +Mean training loss after epoch 180: 0.04139347126218937 + +EPOCH: 181 +Loss at step 0: 0.03391748294234276 +Loss at step 50: 0.04056994244456291 +Loss at step 100: 0.049423567950725555 +Loss at step 150: 0.046806685626506805 +Loss at step 200: 0.03752606734633446 +Loss at step 250: 0.03340506553649902 +Loss at step 300: 0.04189173877239227 +Loss at step 350: 0.031038936227560043 +Loss at step 400: 0.0705329179763794 +Loss at step 450: 0.03440277650952339 +Loss at step 500: 0.0336567685008049 +Loss at step 550: 0.03429238125681877 +Loss at step 600: 0.031661275774240494 +Loss at step 650: 0.05041368678212166 +Loss at step 700: 0.030251847580075264 +Loss at step 750: 0.042865362018346786 +Loss at step 800: 0.033039726316928864 +Loss at step 850: 0.058725807815790176 +Loss at step 900: 0.03608013689517975 +Mean training loss after epoch 181: 0.04149253477157751 + +EPOCH: 182 +Loss at step 0: 0.03710400313138962 +Loss at step 50: 0.03371499851346016 +Loss at step 100: 0.0341544970870018 +Loss at step 150: 0.04096578434109688 +Loss at step 200: 0.05157192423939705 +Loss at step 250: 0.03875488415360451 +Loss at step 300: 0.03593416139483452 +Loss at step 350: 0.03340543434023857 +Loss at step 400: 0.05967196449637413 +Loss at step 450: 0.03931530937552452 +Loss at step 500: 0.05755196511745453 +Loss at step 550: 0.03740415349602699 +Loss at step 600: 0.03923661261796951 +Loss at step 650: 0.040235571563243866 +Loss at step 700: 0.04200943186879158 +Loss at step 750: 0.03610384464263916 +Loss at step 800: 0.031481098383665085 +Loss at step 850: 0.0516573041677475 +Loss at step 900: 0.047245364636182785 +Mean training loss after epoch 182: 0.041903669381939145 + +EPOCH: 183 +Loss at step 0: 0.04019775986671448 +Loss at step 50: 0.04790247604250908 +Loss at step 100: 0.04489818215370178 +Loss at step 150: 0.03816897049546242 +Loss at step 200: 0.033736344426870346 +Loss at step 250: 0.03466064855456352 +Loss at step 300: 0.040574636310338974 +Loss at step 350: 0.034679412841796875 +Loss at step 400: 0.034291334450244904 +Loss at step 450: 0.05978235602378845 +Loss at step 500: 0.04911112040281296 +Loss at step 550: 0.03342486545443535 +Loss at step 600: 0.052635472267866135 +Loss at step 650: 0.03890468180179596 +Loss at step 700: 0.03596392273902893 +Loss at step 750: 0.03404191508889198 +Loss at step 800: 0.05040137469768524 +Loss at step 850: 0.034710999578237534 +Loss at step 900: 0.038780633360147476 +Mean training loss after epoch 183: 0.041807248219371096 + +EPOCH: 184 +Loss at step 0: 0.05211412534117699 +Loss at step 50: 0.04960509017109871 +Loss at step 100: 0.03784497454762459 +Loss at step 150: 0.03517859801650047 +Loss at step 200: 0.03709854185581207 +Loss at step 250: 0.03380673751235008 +Loss at step 300: 0.03791601583361626 +Loss at step 350: 0.04009474813938141 +Loss at step 400: 0.03905599191784859 +Loss at step 450: 0.03254836052656174 +Loss at step 500: 0.05125289410352707 +Loss at step 550: 0.038431569933891296 +Loss at step 600: 0.03682177513837814 +Loss at step 650: 0.03914055600762367 +Loss at step 700: 0.03562482073903084 +Loss at step 750: 0.04414800554513931 +Loss at step 800: 0.03894803300499916 +Loss at step 850: 0.03770125284790993 +Loss at step 900: 0.030288131907582283 +Mean training loss after epoch 184: 0.04138687876527752 + +EPOCH: 185 +Loss at step 0: 0.03609955310821533 +Loss at step 50: 0.05682943016290665 +Loss at step 100: 0.038018904626369476 +Loss at step 150: 0.030884547159075737 +Loss at step 200: 0.041269998997449875 +Loss at step 250: 0.04373003542423248 +Loss at step 300: 0.035877473652362823 +Loss at step 350: 0.037849120795726776 +Loss at step 400: 0.043793316930532455 +Loss at step 450: 0.03450731933116913 +Loss at step 500: 0.04273483157157898 +Loss at step 550: 0.03578299283981323 +Loss at step 600: 0.032646261155605316 +Loss at step 650: 0.0462360754609108 +Loss at step 700: 0.054228439927101135 +Loss at step 750: 0.05155330151319504 +Loss at step 800: 0.03744790330529213 +Loss at step 850: 0.06680920720100403 +Loss at step 900: 0.03223875164985657 +Mean training loss after epoch 185: 0.041660312590981595 + +EPOCH: 186 +Loss at step 0: 0.033333949744701385 +Loss at step 50: 0.05429576709866524 +Loss at step 100: 0.03890854865312576 +Loss at step 150: 0.03497329354286194 +Loss at step 200: 0.034743137657642365 +Loss at step 250: 0.0356815941631794 +Loss at step 300: 0.05399752035737038 +Loss at step 350: 0.03176051005721092 +Loss at step 400: 0.03740697726607323 +Loss at step 450: 0.04082540050148964 +Loss at step 500: 0.05412736162543297 +Loss at step 550: 0.03972582891583443 +Loss at step 600: 0.038021307438611984 +Loss at step 650: 0.04753376543521881 +Loss at step 700: 0.05196103826165199 +Loss at step 750: 0.04077177867293358 +Loss at step 800: 0.03566981106996536 +Loss at step 850: 0.03721268102526665 +Loss at step 900: 0.05761560797691345 +Mean training loss after epoch 186: 0.04170741890269175 + +EPOCH: 187 +Loss at step 0: 0.03566679731011391 +Loss at step 50: 0.11628812551498413 +Loss at step 100: 0.03040335141122341 +Loss at step 150: 0.03489764779806137 +Loss at step 200: 0.05575336515903473 +Loss at step 250: 0.046886786818504333 +Loss at step 300: 0.035252176225185394 +Loss at step 350: 0.043795567005872726 +Loss at step 400: 0.028728723526000977 +Loss at step 450: 0.036280758678913116 +Loss at step 500: 0.04253661632537842 +Loss at step 550: 0.05303025618195534 +Loss at step 600: 0.03752442076802254 +Loss at step 650: 0.031123116612434387 +Loss at step 700: 0.03173130005598068 +Loss at step 750: 0.04688844829797745 +Loss at step 800: 0.036983974277973175 +Loss at step 850: 0.04824747145175934 +Loss at step 900: 0.039069145917892456 +Mean training loss after epoch 187: 0.04183094795626491 + +EPOCH: 188 +Loss at step 0: 0.031547438353300095 +Loss at step 50: 0.04061029478907585 +Loss at step 100: 0.04006023705005646 +Loss at step 150: 0.029827365651726723 +Loss at step 200: 0.040378764271736145 +Loss at step 250: 0.03698677942156792 +Loss at step 300: 0.03617756441235542 +Loss at step 350: 0.03405392915010452 +Loss at step 400: 0.048338402062654495 +Loss at step 450: 0.030941160395741463 +Loss at step 500: 0.04532836750149727 +Loss at step 550: 0.041155822575092316 +Loss at step 600: 0.045638855546712875 +Loss at step 650: 0.029842229560017586 +Loss at step 700: 0.03459912911057472 +Loss at step 750: 0.043139681220054626 +Loss at step 800: 0.033857397735118866 +Loss at step 850: 0.037075892090797424 +Loss at step 900: 0.0419020876288414 +Mean training loss after epoch 188: 0.04069976957201132 + +EPOCH: 189 +Loss at step 0: 0.033498603850603104 +Loss at step 50: 0.029143836349248886 +Loss at step 100: 0.05419186130166054 +Loss at step 150: 0.04173846170306206 +Loss at step 200: 0.037460848689079285 +Loss at step 250: 0.03811207413673401 +Loss at step 300: 0.07409770786762238 +Loss at step 350: 0.05379877984523773 +Loss at step 400: 0.0445668064057827 +Loss at step 450: 0.04734111204743385 +Loss at step 500: 0.03445335105061531 +Loss at step 550: 0.07331730425357819 +Loss at step 600: 0.03149990737438202 +Loss at step 650: 0.04199940711259842 +Loss at step 700: 0.05004751309752464 +Loss at step 750: 0.04749492183327675 +Loss at step 800: 0.03820522874593735 +Loss at step 850: 0.03572584688663483 +Loss at step 900: 0.04117851331830025 +Mean training loss after epoch 189: 0.041296352620429196 + +EPOCH: 190 +Loss at step 0: 0.03617745265364647 +Loss at step 50: 0.03318284451961517 +Loss at step 100: 0.03383420780301094 +Loss at step 150: 0.048314373940229416 +Loss at step 200: 0.03600124642252922 +Loss at step 250: 0.07072673738002777 +Loss at step 300: 0.04310063272714615 +Loss at step 350: 0.03930129110813141 +Loss at step 400: 0.042464740574359894 +Loss at step 450: 0.03321487456560135 +Loss at step 500: 0.0326659195125103 +Loss at step 550: 0.03674949333071709 +Loss at step 600: 0.04988384619355202 +Loss at step 650: 0.03373335674405098 +Loss at step 700: 0.04392938315868378 +Loss at step 750: 0.0330236591398716 +Loss at step 800: 0.041451502591371536 +Loss at step 850: 0.03775708004832268 +Loss at step 900: 0.027917273342609406 +Mean training loss after epoch 190: 0.04126379593039182 + +EPOCH: 191 +Loss at step 0: 0.03522908687591553 +Loss at step 50: 0.056289736181497574 +Loss at step 100: 0.04285778850317001 +Loss at step 150: 0.036079779267311096 +Loss at step 200: 0.030695710331201553 +Loss at step 250: 0.03508748486638069 +Loss at step 300: 0.06639096885919571 +Loss at step 350: 0.050350844860076904 +Loss at step 400: 0.035781338810920715 +Loss at step 450: 0.045949894934892654 +Loss at step 500: 0.02796305902302265 +Loss at step 550: 0.03321106359362602 +Loss at step 600: 0.0372857004404068 +Loss at step 650: 0.06294380873441696 +Loss at step 700: 0.05089360848069191 +Loss at step 750: 0.038762085139751434 +Loss at step 800: 0.039276279509067535 +Loss at step 850: 0.060114022344350815 +Loss at step 900: 0.04756484180688858 +Mean training loss after epoch 191: 0.041512265670369426 + +EPOCH: 192 +Loss at step 0: 0.034350521862506866 +Loss at step 50: 0.04084480553865433 +Loss at step 100: 0.0338444747030735 +Loss at step 150: 0.054738275706768036 +Loss at step 200: 0.03252284228801727 +Loss at step 250: 0.05226755887269974 +Loss at step 300: 0.03735841438174248 +Loss at step 350: 0.03251798450946808 +Loss at step 400: 0.051244113594293594 +Loss at step 450: 0.03884818032383919 +Loss at step 500: 0.04193374887108803 +Loss at step 550: 0.033448901027441025 +Loss at step 600: 0.033464811742305756 +Loss at step 650: 0.039219241589307785 +Loss at step 700: 0.03877369686961174 +Loss at step 750: 0.031385522335767746 +Loss at step 800: 0.03320163115859032 +Loss at step 850: 0.05189124494791031 +Loss at step 900: 0.04130826145410538 +Mean training loss after epoch 192: 0.041748421510923776 + +EPOCH: 193 +Loss at step 0: 0.04113989695906639 +Loss at step 50: 0.03168807551264763 +Loss at step 100: 0.035098232328891754 +Loss at step 150: 0.034963954240083694 +Loss at step 200: 0.05798143520951271 +Loss at step 250: 0.03390754386782646 +Loss at step 300: 0.04076037555932999 +Loss at step 350: 0.0436348132789135 +Loss at step 400: 0.03736273944377899 +Loss at step 450: 0.038291774690151215 +Loss at step 500: 0.040943972766399384 +Loss at step 550: 0.04079652577638626 +Loss at step 600: 0.04327332228422165 +Loss at step 650: 0.03669068217277527 +Loss at step 700: 0.037559207528829575 +Loss at step 750: 0.032399732619524 +Loss at step 800: 0.034300658851861954 +Loss at step 850: 0.03065013885498047 +Loss at step 900: 0.040528304874897 +Mean training loss after epoch 193: 0.04117203234776314 + +EPOCH: 194 +Loss at step 0: 0.06797874718904495 +Loss at step 50: 0.052924975752830505 +Loss at step 100: 0.05212656781077385 +Loss at step 150: 0.03927493467926979 +Loss at step 200: 0.043678004294633865 +Loss at step 250: 0.036535218358039856 +Loss at step 300: 0.03414019197225571 +Loss at step 350: 0.03480542451143265 +Loss at step 400: 0.04807673394680023 +Loss at step 450: 0.03477676957845688 +Loss at step 500: 0.05026653781533241 +Loss at step 550: 0.049459606409072876 +Loss at step 600: 0.05336325243115425 +Loss at step 650: 0.05143073946237564 +Loss at step 700: 0.029971925541758537 +Loss at step 750: 0.040802404284477234 +Loss at step 800: 0.037547655403614044 +Loss at step 850: 0.03940505161881447 +Loss at step 900: 0.029753219336271286 +Mean training loss after epoch 194: 0.04150237108090285 + +EPOCH: 195 +Loss at step 0: 0.03520885854959488 +Loss at step 50: 0.03639410436153412 +Loss at step 100: 0.034854043275117874 +Loss at step 150: 0.03551192209124565 +Loss at step 200: 0.025638144463300705 +Loss at step 250: 0.07378534972667694 +Loss at step 300: 0.058501437306404114 +Loss at step 350: 0.06586998701095581 +Loss at step 400: 0.0365779846906662 +Loss at step 450: 0.036176346242427826 +Loss at step 500: 0.04393264651298523 +Loss at step 550: 0.03411954268813133 +Loss at step 600: 0.03807177394628525 +Loss at step 650: 0.04445924982428551 +Loss at step 700: 0.06824284046888351 +Loss at step 750: 0.039268508553504944 +Loss at step 800: 0.02649448812007904 +Loss at step 850: 0.05371391400694847 +Loss at step 900: 0.06416977196931839 +Mean training loss after epoch 195: 0.04103545524449999 + +EPOCH: 196 +Loss at step 0: 0.038097675889730453 +Loss at step 50: 0.044847916811704636 +Loss at step 100: 0.03804577887058258 +Loss at step 150: 0.03618156909942627 +Loss at step 200: 0.04062632471323013 +Loss at step 250: 0.04594080522656441 +Loss at step 300: 0.03366316854953766 +Loss at step 350: 0.032435376197099686 +Loss at step 400: 0.0293735284358263 +Loss at step 450: 0.034268464893102646 +Loss at step 500: 0.03657921031117439 +Loss at step 550: 0.040299192070961 +Loss at step 600: 0.03601702302694321 +Loss at step 650: 0.04306451603770256 +Loss at step 700: 0.03470020368695259 +Loss at step 750: 0.0481594018638134 +Loss at step 800: 0.04332111403346062 +Loss at step 850: 0.048539843410253525 +Loss at step 900: 0.040765244513750076 +Mean training loss after epoch 196: 0.04173837025671689 + +EPOCH: 197 +Loss at step 0: 0.05736205354332924 +Loss at step 50: 0.027131736278533936 +Loss at step 100: 0.03562883287668228 +Loss at step 150: 0.03762797266244888 +Loss at step 200: 0.07137754559516907 +Loss at step 250: 0.06394952535629272 +Loss at step 300: 0.04078604280948639 +Loss at step 350: 0.04103813320398331 +Loss at step 400: 0.040748171508312225 +Loss at step 450: 0.049523528665304184 +Loss at step 500: 0.04065272584557533 +Loss at step 550: 0.05001842975616455 +Loss at step 600: 0.05851582810282707 +Loss at step 650: 0.0581602081656456 +Loss at step 700: 0.02705790475010872 +Loss at step 750: 0.040607865899801254 +Loss at step 800: 0.03715815767645836 +Loss at step 850: 0.03969978168606758 +Loss at step 900: 0.043329522013664246 +Mean training loss after epoch 197: 0.04137408531614458 + +EPOCH: 198 +Loss at step 0: 0.03971967101097107 +Loss at step 50: 0.03631794452667236 +Loss at step 100: 0.0338369719684124 +Loss at step 150: 0.04944315552711487 +Loss at step 200: 0.04070665314793587 +Loss at step 250: 0.051509302109479904 +Loss at step 300: 0.05552169308066368 +Loss at step 350: 0.03374495357275009 +Loss at step 400: 0.03621584549546242 +Loss at step 450: 0.03770939260721207 +Loss at step 500: 0.03463056683540344 +Loss at step 550: 0.02788911759853363 +Loss at step 600: 0.04447398707270622 +Loss at step 650: 0.05413061007857323 +Loss at step 700: 0.04044802114367485 +Loss at step 750: 0.07355301827192307 +Loss at step 800: 0.03673677518963814 +Loss at step 850: 0.037305835634469986 +Loss at step 900: 0.0418182909488678 +Mean training loss after epoch 198: 0.041858961478050453 + +EPOCH: 199 +Loss at step 0: 0.04305747523903847 +Loss at step 50: 0.03307000920176506 +Loss at step 100: 0.033796682953834534 +Loss at step 150: 0.0348166823387146 +Loss at step 200: 0.045635029673576355 +Loss at step 250: 0.03464725613594055 +Loss at step 300: 0.07720781862735748 +Loss at step 350: 0.024645846337080002 +Loss at step 400: 0.03345625847578049 +Loss at step 450: 0.03154450282454491 +Loss at step 500: 0.045259904116392136 +Loss at step 550: 0.03695496916770935 +Loss at step 600: 0.0499909333884716 +Loss at step 650: 0.04106828570365906 +Loss at step 700: 0.032556742429733276 +Loss at step 750: 0.03082137182354927 +Loss at step 800: 0.03785950317978859 +Loss at step 850: 0.05331188812851906 +Loss at step 900: 0.027672991156578064 +Mean training loss after epoch 199: 0.041491717618427425 + +EPOCH: 200 +Loss at step 0: 0.035790350288152695 +Loss at step 50: 0.041217826306819916 +Loss at step 100: 0.034450091421604156 +Loss at step 150: 0.03561009466648102 +Loss at step 200: 0.03485352545976639 +Loss at step 250: 0.029213659465312958 +Loss at step 300: 0.04888638108968735 +Loss at step 350: 0.049630194902420044 +Loss at step 400: 0.040246348828077316 +Loss at step 450: 0.051989808678627014 +Loss at step 500: 0.03532884642481804 +Loss at step 550: 0.05131925269961357 +Loss at step 600: 0.07050848752260208 +Loss at step 650: 0.051529932767152786 +Loss at step 700: 0.031611260026693344 +Loss at step 750: 0.05834163725376129 +Loss at step 800: 0.03199863061308861 +Loss at step 850: 0.050334375351667404 +Loss at step 900: 0.048048146069049835 +Mean training loss after epoch 200: 0.04144771575633842 + +EPOCH: 201 +Loss at step 0: 0.049343012273311615 +Loss at step 50: 0.03183600679039955 +Loss at step 100: 0.048347923904657364 +Loss at step 150: 0.05308234319090843 +Loss at step 200: 0.05167638882994652 +Loss at step 250: 0.04280967637896538 +Loss at step 300: 0.0367121621966362 +Loss at step 350: 0.05468958988785744 +Loss at step 400: 0.03841932862997055 +Loss at step 450: 0.034620095044374466 +Loss at step 500: 0.057135775685310364 +Loss at step 550: 0.041183676570653915 +Loss at step 600: 0.040930718183517456 +Loss at step 650: 0.03261233866214752 +Loss at step 700: 0.03489133343100548 +Loss at step 750: 0.02763674221932888 +Loss at step 800: 0.03225076198577881 +Loss at step 850: 0.05398840829730034 +Loss at step 900: 0.049016717821359634 +Mean training loss after epoch 201: 0.041507416116848175 + +EPOCH: 202 +Loss at step 0: 0.03353920951485634 +Loss at step 50: 0.03718918189406395 +Loss at step 100: 0.03364242613315582 +Loss at step 150: 0.03372986242175102 +Loss at step 200: 0.056242551654577255 +Loss at step 250: 0.04020671918988228 +Loss at step 300: 0.03130819648504257 +Loss at step 350: 0.03483444079756737 +Loss at step 400: 0.0482604056596756 +Loss at step 450: 0.03938969224691391 +Loss at step 500: 0.05265633016824722 +Loss at step 550: 0.03554932773113251 +Loss at step 600: 0.03541647270321846 +Loss at step 650: 0.029953882098197937 +Loss at step 700: 0.03256510570645332 +Loss at step 750: 0.04815101623535156 +Loss at step 800: 0.038196250796318054 +Loss at step 850: 0.03245275840163231 +Loss at step 900: 0.03632979467511177 +Mean training loss after epoch 202: 0.0410298561172953 + +EPOCH: 203 +Loss at step 0: 0.030459241941571236 +Loss at step 50: 0.043799057602882385 +Loss at step 100: 0.047694768756628036 +Loss at step 150: 0.03889485448598862 +Loss at step 200: 0.06752520054578781 +Loss at step 250: 0.02685130387544632 +Loss at step 300: 0.036014627665281296 +Loss at step 350: 0.037845950573682785 +Loss at step 400: 0.033889371901750565 +Loss at step 450: 0.031847402453422546 +Loss at step 500: 0.038399528712034225 +Loss at step 550: 0.03463640436530113 +Loss at step 600: 0.039087750017642975 +Loss at step 650: 0.03970421105623245 +Loss at step 700: 0.03944718837738037 +Loss at step 750: 0.03693550452589989 +Loss at step 800: 0.037040844559669495 +Loss at step 850: 0.0331515371799469 +Loss at step 900: 0.04205691069364548 +Mean training loss after epoch 203: 0.0412048772275289 + +EPOCH: 204 +Loss at step 0: 0.056759487837553024 +Loss at step 50: 0.0413278266787529 +Loss at step 100: 0.040270932018756866 +Loss at step 150: 0.0361940898001194 +Loss at step 200: 0.07127005606889725 +Loss at step 250: 0.05027209222316742 +Loss at step 300: 0.04283789172768593 +Loss at step 350: 0.05113120749592781 +Loss at step 400: 0.04532406106591225 +Loss at step 450: 0.04405355453491211 +Loss at step 500: 0.057780180126428604 +Loss at step 550: 0.037918925285339355 +Loss at step 600: 0.03008476458489895 +Loss at step 650: 0.04085720703005791 +Loss at step 700: 0.04501824453473091 +Loss at step 750: 0.03670662268996239 +Loss at step 800: 0.034831445664167404 +Loss at step 850: 0.035773713141679764 +Loss at step 900: 0.03600168228149414 +Mean training loss after epoch 204: 0.04139673354616488 + +EPOCH: 205 +Loss at step 0: 0.040792644023895264 +Loss at step 50: 0.039271481335163116 +Loss at step 100: 0.04059242457151413 +Loss at step 150: 0.03818690404295921 +Loss at step 200: 0.04294337332248688 +Loss at step 250: 0.057097338140010834 +Loss at step 300: 0.03699008747935295 +Loss at step 350: 0.05602728947997093 +Loss at step 400: 0.036347582936286926 +Loss at step 450: 0.03644917160272598 +Loss at step 500: 0.03737808018922806 +Loss at step 550: 0.03688392788171768 +Loss at step 600: 0.03842123597860336 +Loss at step 650: 0.031956832855939865 +Loss at step 700: 0.0357840359210968 +Loss at step 750: 0.037642884999513626 +Loss at step 800: 0.031830888241529465 +Loss at step 850: 0.03746681660413742 +Loss at step 900: 0.04127761349081993 +Mean training loss after epoch 205: 0.04121694114329273 + +EPOCH: 206 +Loss at step 0: 0.03936978057026863 +Loss at step 50: 0.03137960657477379 +Loss at step 100: 0.028490858152508736 +Loss at step 150: 0.038255635648965836 +Loss at step 200: 0.035686857998371124 +Loss at step 250: 0.04872118681669235 +Loss at step 300: 0.05316340923309326 +Loss at step 350: 0.037066906690597534 +Loss at step 400: 0.034964218735694885 +Loss at step 450: 0.034100595861673355 +Loss at step 500: 0.030275076627731323 +Loss at step 550: 0.05340017378330231 +Loss at step 600: 0.03532257676124573 +Loss at step 650: 0.039942771196365356 +Loss at step 700: 0.04474440962076187 +Loss at step 750: 0.0567655973136425 +Loss at step 800: 0.038071874529123306 +Loss at step 850: 0.03440165892243385 +Loss at step 900: 0.03704487904906273 +Mean training loss after epoch 206: 0.04125204876557723 + +EPOCH: 207 +Loss at step 0: 0.038039278239011765 +Loss at step 50: 0.04785553738474846 +Loss at step 100: 0.042042892426252365 +Loss at step 150: 0.038614820688962936 +Loss at step 200: 0.030518142506480217 +Loss at step 250: 0.03909027948975563 +Loss at step 300: 0.03875470906496048 +Loss at step 350: 0.03967742994427681 +Loss at step 400: 0.03191816061735153 +Loss at step 450: 0.046760812401771545 +Loss at step 500: 0.048585861921310425 +Loss at step 550: 0.0373445563018322 +Loss at step 600: 0.04253246262669563 +Loss at step 650: 0.043145034462213516 +Loss at step 700: 0.03523382544517517 +Loss at step 750: 0.05494747310876846 +Loss at step 800: 0.030589915812015533 +Loss at step 850: 0.03915799781680107 +Loss at step 900: 0.04673401266336441 +Mean training loss after epoch 207: 0.04131905022841781 + +EPOCH: 208 +Loss at step 0: 0.04305552691221237 +Loss at step 50: 0.034908805042505264 +Loss at step 100: 0.03438510000705719 +Loss at step 150: 0.05595008656382561 +Loss at step 200: 0.036673370748758316 +Loss at step 250: 0.061115823686122894 +Loss at step 300: 0.03532424941658974 +Loss at step 350: 0.03127359226346016 +Loss at step 400: 0.036795247346162796 +Loss at step 450: 0.03030930832028389 +Loss at step 500: 0.047763705253601074 +Loss at step 550: 0.03661363571882248 +Loss at step 600: 0.05410737171769142 +Loss at step 650: 0.06448393315076828 +Loss at step 700: 0.04093127325177193 +Loss at step 750: 0.05752434581518173 +Loss at step 800: 0.03656463697552681 +Loss at step 850: 0.03313460201025009 +Loss at step 900: 0.038126688450574875 +Mean training loss after epoch 208: 0.04117663974351466 + +EPOCH: 209 +Loss at step 0: 0.03493553772568703 +Loss at step 50: 0.03722948580980301 +Loss at step 100: 0.05398183688521385 +Loss at step 150: 0.0524870790541172 +Loss at step 200: 0.037257131189107895 +Loss at step 250: 0.04640546068549156 +Loss at step 300: 0.056868430227041245 +Loss at step 350: 0.04795893654227257 +Loss at step 400: 0.03179486468434334 +Loss at step 450: 0.03765290603041649 +Loss at step 500: 0.03589199110865593 +Loss at step 550: 0.0380818247795105 +Loss at step 600: 0.04972294718027115 +Loss at step 650: 0.029078220948576927 +Loss at step 700: 0.029039815068244934 +Loss at step 750: 0.03712788224220276 +Loss at step 800: 0.059975765645504 +Loss at step 850: 0.046831708401441574 +Loss at step 900: 0.062213387340307236 +Mean training loss after epoch 209: 0.04130176584254196 + +EPOCH: 210 +Loss at step 0: 0.03962980955839157 +Loss at step 50: 0.03791427984833717 +Loss at step 100: 0.03497166559100151 +Loss at step 150: 0.031263984739780426 +Loss at step 200: 0.051545023918151855 +Loss at step 250: 0.03977767378091812 +Loss at step 300: 0.04709793999791145 +Loss at step 350: 0.05070801451802254 +Loss at step 400: 0.05128360167145729 +Loss at step 450: 0.02927989326417446 +Loss at step 500: 0.04444617033004761 +Loss at step 550: 0.03739051893353462 +Loss at step 600: 0.05879747122526169 +Loss at step 650: 0.053738340735435486 +Loss at step 700: 0.033911995589733124 +Loss at step 750: 0.04100339114665985 +Loss at step 800: 0.04177099093794823 +Loss at step 850: 0.05663182586431503 +Loss at step 900: 0.06499599665403366 +Mean training loss after epoch 210: 0.04155924991527791 + +EPOCH: 211 +Loss at step 0: 0.030548926442861557 +Loss at step 50: 0.03719782829284668 +Loss at step 100: 0.02642173133790493 +Loss at step 150: 0.035055577754974365 +Loss at step 200: 0.047963909804821014 +Loss at step 250: 0.030586957931518555 +Loss at step 300: 0.03406506031751633 +Loss at step 350: 0.034562744200229645 +Loss at step 400: 0.05038195103406906 +Loss at step 450: 0.03493376076221466 +Loss at step 500: 0.040189724415540695 +Loss at step 550: 0.03963726758956909 +Loss at step 600: 0.05514547601342201 +Loss at step 650: 0.03667889162898064 +Loss at step 700: 0.03222879767417908 +Loss at step 750: 0.035416096448898315 +Loss at step 800: 0.041518017649650574 +Loss at step 850: 0.038832809776067734 +Loss at step 900: 0.04651348665356636 +Mean training loss after epoch 211: 0.040945966876963814 + +EPOCH: 212 +Loss at step 0: 0.03327246010303497 +Loss at step 50: 0.05043778568506241 +Loss at step 100: 0.04172353446483612 +Loss at step 150: 0.03507307171821594 +Loss at step 200: 0.029459193348884583 +Loss at step 250: 0.04598444327712059 +Loss at step 300: 0.03865790367126465 +Loss at step 350: 0.033216699957847595 +Loss at step 400: 0.03927202522754669 +Loss at step 450: 0.053678590804338455 +Loss at step 500: 0.0357988066971302 +Loss at step 550: 0.03844377398490906 +Loss at step 600: 0.040219828486442566 +Loss at step 650: 0.053833648562431335 +Loss at step 700: 0.05301403999328613 +Loss at step 750: 0.03721046820282936 +Loss at step 800: 0.030222177505493164 +Loss at step 850: 0.03824635595083237 +Loss at step 900: 0.03393498808145523 +Mean training loss after epoch 212: 0.04123707259816529 + +EPOCH: 213 +Loss at step 0: 0.05674567446112633 +Loss at step 50: 0.03383493423461914 +Loss at step 100: 0.034547653049230576 +Loss at step 150: 0.042903732508420944 +Loss at step 200: 0.03742729872465134 +Loss at step 250: 0.04211706295609474 +Loss at step 300: 0.03560095280408859 +Loss at step 350: 0.03154923394322395 +Loss at step 400: 0.03430347889661789 +Loss at step 450: 0.035916153341531754 +Loss at step 500: 0.04978935420513153 +Loss at step 550: 0.02993885800242424 +Loss at step 600: 0.0533515140414238 +Loss at step 650: 0.04010763391852379 +Loss at step 700: 0.03576485812664032 +Loss at step 750: 0.03653561323881149 +Loss at step 800: 0.034997548907995224 +Loss at step 850: 0.056449100375175476 +Loss at step 900: 0.0658298134803772 +Mean training loss after epoch 213: 0.0411296719149041 + +EPOCH: 214 +Loss at step 0: 0.03312665596604347 +Loss at step 50: 0.03246608376502991 +Loss at step 100: 0.051583290100097656 +Loss at step 150: 0.04974968358874321 +Loss at step 200: 0.05574967712163925 +Loss at step 250: 0.04247640445828438 +Loss at step 300: 0.0370447002351284 +Loss at step 350: 0.03886210918426514 +Loss at step 400: 0.05328872799873352 +Loss at step 450: 0.03179633617401123 +Loss at step 500: 0.05542878434062004 +Loss at step 550: 0.044399116188287735 +Loss at step 600: 0.034311443567276 +Loss at step 650: 0.03347017616033554 +Loss at step 700: 0.033897753804922104 +Loss at step 750: 0.04861360043287277 +Loss at step 800: 0.03839123249053955 +Loss at step 850: 0.06305564939975739 +Loss at step 900: 0.03083120472729206 +Mean training loss after epoch 214: 0.041287495432965664 + +EPOCH: 215 +Loss at step 0: 0.03692357987165451 +Loss at step 50: 0.03876027837395668 +Loss at step 100: 0.038466617465019226 +Loss at step 150: 0.03568306565284729 +Loss at step 200: 0.032258838415145874 +Loss at step 250: 0.033888742327690125 +Loss at step 300: 0.031359050422906876 +Loss at step 350: 0.0355716198682785 +Loss at step 400: 0.036228060722351074 +Loss at step 450: 0.034554749727249146 +Loss at step 500: 0.052061960101127625 +Loss at step 550: 0.03502297401428223 +Loss at step 600: 0.03877376392483711 +Loss at step 650: 0.07062792778015137 +Loss at step 700: 0.03748231753706932 +Loss at step 750: 0.048514317721128464 +Loss at step 800: 0.03904194012284279 +Loss at step 850: 0.042537979781627655 +Loss at step 900: 0.03580579534173012 +Mean training loss after epoch 215: 0.04120782274665482 + +EPOCH: 216 +Loss at step 0: 0.03698866814374924 +Loss at step 50: 0.04032306373119354 +Loss at step 100: 0.04292820021510124 +Loss at step 150: 0.043672092258930206 +Loss at step 200: 0.04129884019494057 +Loss at step 250: 0.06413669139146805 +Loss at step 300: 0.03333058953285217 +Loss at step 350: 0.036104969680309296 +Loss at step 400: 0.04874488711357117 +Loss at step 450: 0.039387352764606476 +Loss at step 500: 0.043956201523542404 +Loss at step 550: 0.02769467793405056 +Loss at step 600: 0.04720773547887802 +Loss at step 650: 0.05882936343550682 +Loss at step 700: 0.04028856381773949 +Loss at step 750: 0.06633619964122772 +Loss at step 800: 0.04738272726535797 +Loss at step 850: 0.03966781124472618 +Loss at step 900: 0.05900513380765915 +Mean training loss after epoch 216: 0.04053880623988569 + +EPOCH: 217 +Loss at step 0: 0.03476103022694588 +Loss at step 50: 0.04007076099514961 +Loss at step 100: 0.03548238426446915 +Loss at step 150: 0.0420597568154335 +Loss at step 200: 0.05475492775440216 +Loss at step 250: 0.03957177326083183 +Loss at step 300: 0.046720318496227264 +Loss at step 350: 0.037851348519325256 +Loss at step 400: 0.035817135125398636 +Loss at step 450: 0.03365528956055641 +Loss at step 500: 0.041856925934553146 +Loss at step 550: 0.03881705924868584 +Loss at step 600: 0.04341663047671318 +Loss at step 650: 0.0302233025431633 +Loss at step 700: 0.04495518282055855 +Loss at step 750: 0.07632829248905182 +Loss at step 800: 0.033838625997304916 +Loss at step 850: 0.058509018272161484 +Loss at step 900: 0.0263651292771101 +Mean training loss after epoch 217: 0.04165038813961976 + +EPOCH: 218 +Loss at step 0: 0.03161349147558212 +Loss at step 50: 0.03128485754132271 +Loss at step 100: 0.04100488871335983 +Loss at step 150: 0.029969152063131332 +Loss at step 200: 0.04331596568226814 +Loss at step 250: 0.03943278640508652 +Loss at step 300: 0.033166710287332535 +Loss at step 350: 0.029378816485404968 +Loss at step 400: 0.0342346727848053 +Loss at step 450: 0.036455534398555756 +Loss at step 500: 0.05356838181614876 +Loss at step 550: 0.03679025173187256 +Loss at step 600: 0.03293558210134506 +Loss at step 650: 0.03905247524380684 +Loss at step 700: 0.049808014184236526 +Loss at step 750: 0.05253978446125984 +Loss at step 800: 0.04768317937850952 +Loss at step 850: 0.03520479053258896 +Loss at step 900: 0.05699870362877846 +Mean training loss after epoch 218: 0.04153027399770741 + +EPOCH: 219 +Loss at step 0: 0.033535972237586975 +Loss at step 50: 0.05234470218420029 +Loss at step 100: 0.02671230025589466 +Loss at step 150: 0.03986935690045357 +Loss at step 200: 0.04880509898066521 +Loss at step 250: 0.04045942798256874 +Loss at step 300: 0.029259618371725082 +Loss at step 350: 0.04278722032904625 +Loss at step 400: 0.04514151066541672 +Loss at step 450: 0.04280414804816246 +Loss at step 500: 0.03450876101851463 +Loss at step 550: 0.04778549447655678 +Loss at step 600: 0.051221564412117004 +Loss at step 650: 0.03971226140856743 +Loss at step 700: 0.02804519422352314 +Loss at step 750: 0.03532065451145172 +Loss at step 800: 0.05204001069068909 +Loss at step 850: 0.0395912267267704 +Loss at step 900: 0.033726051449775696 +Mean training loss after epoch 219: 0.041007235062433714 + +EPOCH: 220 +Loss at step 0: 0.0667114332318306 +Loss at step 50: 0.047925014048814774 +Loss at step 100: 0.03692636638879776 +Loss at step 150: 0.030362550169229507 +Loss at step 200: 0.035843972116708755 +Loss at step 250: 0.0372978039085865 +Loss at step 300: 0.03436199575662613 +Loss at step 350: 0.04721400514245033 +Loss at step 400: 0.034291476011276245 +Loss at step 450: 0.03911425918340683 +Loss at step 500: 0.03957908973097801 +Loss at step 550: 0.03273768350481987 +Loss at step 600: 0.033866237848997116 +Loss at step 650: 0.035394810140132904 +Loss at step 700: 0.05452428758144379 +Loss at step 750: 0.04825645312666893 +Loss at step 800: 0.04440128430724144 +Loss at step 850: 0.04680107533931732 +Loss at step 900: 0.03324844688177109 +Mean training loss after epoch 220: 0.04129367575311521 + +EPOCH: 221 +Loss at step 0: 0.032262157648801804 +Loss at step 50: 0.038194503635168076 +Loss at step 100: 0.07236742973327637 +Loss at step 150: 0.06887383759021759 +Loss at step 200: 0.04868302121758461 +Loss at step 250: 0.03233625367283821 +Loss at step 300: 0.04105150327086449 +Loss at step 350: 0.029542885720729828 +Loss at step 400: 0.03776249662041664 +Loss at step 450: 0.03501143306493759 +Loss at step 500: 0.03716370835900307 +Loss at step 550: 0.03228867053985596 +Loss at step 600: 0.034387730062007904 +Loss at step 650: 0.05523118004202843 +Loss at step 700: 0.034401074051856995 +Loss at step 750: 0.04115273803472519 +Loss at step 800: 0.03586626797914505 +Loss at step 850: 0.04088988155126572 +Loss at step 900: 0.03924999013543129 +Mean training loss after epoch 221: 0.04116944233174009 + +EPOCH: 222 +Loss at step 0: 0.04622489586472511 +Loss at step 50: 0.034178245812654495 +Loss at step 100: 0.03427834063768387 +Loss at step 150: 0.038221172988414764 +Loss at step 200: 0.04681423306465149 +Loss at step 250: 0.038031622767448425 +Loss at step 300: 0.039281293749809265 +Loss at step 350: 0.034416165202856064 +Loss at step 400: 0.07188202440738678 +Loss at step 450: 0.038608696311712265 +Loss at step 500: 0.03562668710947037 +Loss at step 550: 0.04744146391749382 +Loss at step 600: 0.03219371289014816 +Loss at step 650: 0.029797524213790894 +Loss at step 700: 0.036422885954380035 +Loss at step 750: 0.038234759122133255 +Loss at step 800: 0.04142580181360245 +Loss at step 850: 0.03657116740942001 +Loss at step 900: 0.02977190725505352 +Mean training loss after epoch 222: 0.041052178505545996 + +EPOCH: 223 +Loss at step 0: 0.034760016947984695 +Loss at step 50: 0.03509802743792534 +Loss at step 100: 0.04074249789118767 +Loss at step 150: 0.03870717063546181 +Loss at step 200: 0.04823608696460724 +Loss at step 250: 0.02629156783223152 +Loss at step 300: 0.034971971064805984 +Loss at step 350: 0.03723495826125145 +Loss at step 400: 0.03717128559947014 +Loss at step 450: 0.039042744785547256 +Loss at step 500: 0.04182994365692139 +Loss at step 550: 0.03563202545046806 +Loss at step 600: 0.0682126060128212 +Loss at step 650: 0.04124102368950844 +Loss at step 700: 0.033520348370075226 +Loss at step 750: 0.0550982840359211 +Loss at step 800: 0.05311400443315506 +Loss at step 850: 0.041202362626791 +Loss at step 900: 0.07201080769300461 +Mean training loss after epoch 223: 0.04102069054291383 + +EPOCH: 224 +Loss at step 0: 0.037563711404800415 +Loss at step 50: 0.05556620657444 +Loss at step 100: 0.04725373536348343 +Loss at step 150: 0.029180817306041718 +Loss at step 200: 0.06255805492401123 +Loss at step 250: 0.05550690367817879 +Loss at step 300: 0.032185718417167664 +Loss at step 350: 0.03732905164361 +Loss at step 400: 0.03720862418413162 +Loss at step 450: 0.05634133890271187 +Loss at step 500: 0.037644654512405396 +Loss at step 550: 0.041919078677892685 +Loss at step 600: 0.04104840010404587 +Loss at step 650: 0.04214031249284744 +Loss at step 700: 0.03333045169711113 +Loss at step 750: 0.035872429609298706 +Loss at step 800: 0.055177945643663406 +Loss at step 850: 0.06531449407339096 +Loss at step 900: 0.04556897282600403 +Mean training loss after epoch 224: 0.041014108520898734 + +EPOCH: 225 +Loss at step 0: 0.03936973586678505 +Loss at step 50: 0.03350040689110756 +Loss at step 100: 0.037846505641937256 +Loss at step 150: 0.03458798676729202 +Loss at step 200: 0.0477675199508667 +Loss at step 250: 0.03837289661169052 +Loss at step 300: 0.03477032855153084 +Loss at step 350: 0.03714022412896156 +Loss at step 400: 0.03415047377347946 +Loss at step 450: 0.03395478427410126 +Loss at step 500: 0.0354052372276783 +Loss at step 550: 0.030104510486125946 +Loss at step 600: 0.043730005621910095 +Loss at step 650: 0.05497589707374573 +Loss at step 700: 0.05407802760601044 +Loss at step 750: 0.03253479674458504 +Loss at step 800: 0.03486725315451622 +Loss at step 850: 0.03352895379066467 +Loss at step 900: 0.03306787833571434 +Mean training loss after epoch 225: 0.0410375071050071 + +EPOCH: 226 +Loss at step 0: 0.03648871183395386 +Loss at step 50: 0.0366818942129612 +Loss at step 100: 0.037782009690999985 +Loss at step 150: 0.047991879284381866 +Loss at step 200: 0.04437065124511719 +Loss at step 250: 0.05026853084564209 +Loss at step 300: 0.048000041395425797 +Loss at step 350: 0.031056242063641548 +Loss at step 400: 0.035417258739471436 +Loss at step 450: 0.04013531655073166 +Loss at step 500: 0.05145081505179405 +Loss at step 550: 0.04228727146983147 +Loss at step 600: 0.05478085204958916 +Loss at step 650: 0.04012715443968773 +Loss at step 700: 0.05043533816933632 +Loss at step 750: 0.038202911615371704 +Loss at step 800: 0.036201126873493195 +Loss at step 850: 0.03652939572930336 +Loss at step 900: 0.026116954162716866 +Mean training loss after epoch 226: 0.04136124605547263 + +EPOCH: 227 +Loss at step 0: 0.04205761104822159 +Loss at step 50: 0.04090822488069534 +Loss at step 100: 0.043332166969776154 +Loss at step 150: 0.05705716460943222 +Loss at step 200: 0.03839666768908501 +Loss at step 250: 0.029485201463103294 +Loss at step 300: 0.056229788810014725 +Loss at step 350: 0.03689929470419884 +Loss at step 400: 0.030563808977603912 +Loss at step 450: 0.04460404813289642 +Loss at step 500: 0.03188980743288994 +Loss at step 550: 0.029740963131189346 +Loss at step 600: 0.035941313952207565 +Loss at step 650: 0.05650283768773079 +Loss at step 700: 0.04136836528778076 +Loss at step 750: 0.05103546008467674 +Loss at step 800: 0.049101848155260086 +Loss at step 850: 0.04119361191987991 +Loss at step 900: 0.034654609858989716 +Mean training loss after epoch 227: 0.04193418883263811 + +EPOCH: 228 +Loss at step 0: 0.038860369473695755 +Loss at step 50: 0.03692442551255226 +Loss at step 100: 0.030994482338428497 +Loss at step 150: 0.037316933274269104 +Loss at step 200: 0.03066915273666382 +Loss at step 250: 0.03554951027035713 +Loss at step 300: 0.05594007298350334 +Loss at step 350: 0.05369291454553604 +Loss at step 400: 0.05064915493130684 +Loss at step 450: 0.04152677580714226 +Loss at step 500: 0.056227780878543854 +Loss at step 550: 0.04867443069815636 +Loss at step 600: 0.03939269483089447 +Loss at step 650: 0.0381941944360733 +Loss at step 700: 0.046426430344581604 +Loss at step 750: 0.03176429122686386 +Loss at step 800: 0.041528403759002686 +Loss at step 850: 0.039497584104537964 +Loss at step 900: 0.037410248070955276 +Mean training loss after epoch 228: 0.04146860102648293 + +EPOCH: 229 +Loss at step 0: 0.03637472540140152 +Loss at step 50: 0.034893423318862915 +Loss at step 100: 0.04737719148397446 +Loss at step 150: 0.033185504376888275 +Loss at step 200: 0.0315871462225914 +Loss at step 250: 0.03322930634021759 +Loss at step 300: 0.03275240957736969 +Loss at step 350: 0.034512899816036224 +Loss at step 400: 0.041698455810546875 +Loss at step 450: 0.03479326516389847 +Loss at step 500: 0.04883550852537155 +Loss at step 550: 0.0731632336974144 +Loss at step 600: 0.06841642409563065 +Loss at step 650: 0.03672165796160698 +Loss at step 700: 0.042949128895998 +Loss at step 750: 0.03123210184276104 +Loss at step 800: 0.03435734286904335 +Loss at step 850: 0.029435977339744568 +Loss at step 900: 0.04522766172885895 +Mean training loss after epoch 229: 0.04051139461063246 + +EPOCH: 230 +Loss at step 0: 0.03913334757089615 +Loss at step 50: 0.055999431759119034 +Loss at step 100: 0.05603145435452461 +Loss at step 150: 0.03465709835290909 +Loss at step 200: 0.03714108467102051 +Loss at step 250: 0.049302857369184494 +Loss at step 300: 0.051496539264917374 +Loss at step 350: 0.03816322982311249 +Loss at step 400: 0.04686564579606056 +Loss at step 450: 0.05239836499094963 +Loss at step 500: 0.04643171653151512 +Loss at step 550: 0.03184344619512558 +Loss at step 600: 0.06275387108325958 +Loss at step 650: 0.0721859559416771 +Loss at step 700: 0.036288753151893616 +Loss at step 750: 0.029158808290958405 +Loss at step 800: 0.041492756456136703 +Loss at step 850: 0.039844442158937454 +Loss at step 900: 0.03264705464243889 +Mean training loss after epoch 230: 0.04095347651413509 + +EPOCH: 231 +Loss at step 0: 0.03575587272644043 +Loss at step 50: 0.03215237334370613 +Loss at step 100: 0.05879320204257965 +Loss at step 150: 0.03561503812670708 +Loss at step 200: 0.04932316765189171 +Loss at step 250: 0.031335704028606415 +Loss at step 300: 0.03901561349630356 +Loss at step 350: 0.038512203842401505 +Loss at step 400: 0.03318621963262558 +Loss at step 450: 0.04640994220972061 +Loss at step 500: 0.032737333327531815 +Loss at step 550: 0.048904407769441605 +Loss at step 600: 0.03512667119503021 +Loss at step 650: 0.03650322183966637 +Loss at step 700: 0.038705259561538696 +Loss at step 750: 0.04180947691202164 +Loss at step 800: 0.04233972355723381 +Loss at step 850: 0.04108935967087746 +Loss at step 900: 0.03689875453710556 +Mean training loss after epoch 231: 0.04083216685388706 + +EPOCH: 232 +Loss at step 0: 0.04886999726295471 +Loss at step 50: 0.05551153048872948 +Loss at step 100: 0.03532155975699425 +Loss at step 150: 0.038711488246917725 +Loss at step 200: 0.03422563523054123 +Loss at step 250: 0.0415818989276886 +Loss at step 300: 0.052041277289390564 +Loss at step 350: 0.031493090093135834 +Loss at step 400: 0.041716963052749634 +Loss at step 450: 0.045627281069755554 +Loss at step 500: 0.03781002387404442 +Loss at step 550: 0.04126285761594772 +Loss at step 600: 0.055617816746234894 +Loss at step 650: 0.030421976000070572 +Loss at step 700: 0.05503896623849869 +Loss at step 750: 0.04677780345082283 +Loss at step 800: 0.05122390016913414 +Loss at step 850: 0.05126957967877388 +Loss at step 900: 0.038854584097862244 +Mean training loss after epoch 232: 0.04083698726237329 + +EPOCH: 233 +Loss at step 0: 0.03805645555257797 +Loss at step 50: 0.05763966590166092 +Loss at step 100: 0.04127464070916176 +Loss at step 150: 0.03656071051955223 +Loss at step 200: 0.04233010485768318 +Loss at step 250: 0.06715351343154907 +Loss at step 300: 0.056858159601688385 +Loss at step 350: 0.038870055228471756 +Loss at step 400: 0.04549760743975639 +Loss at step 450: 0.03370843455195427 +Loss at step 500: 0.036580368876457214 +Loss at step 550: 0.030094511806964874 +Loss at step 600: 0.027368923649191856 +Loss at step 650: 0.036164265125989914 +Loss at step 700: 0.03536178916692734 +Loss at step 750: 0.03123387135565281 +Loss at step 800: 0.03417902812361717 +Loss at step 850: 0.06495968252420425 +Loss at step 900: 0.033987708389759064 +Mean training loss after epoch 233: 0.04114598854343647 + +EPOCH: 234 +Loss at step 0: 0.034131698310375214 +Loss at step 50: 0.03304879739880562 +Loss at step 100: 0.0375090166926384 +Loss at step 150: 0.04361898824572563 +Loss at step 200: 0.03519846498966217 +Loss at step 250: 0.03362388163805008 +Loss at step 300: 0.04630090668797493 +Loss at step 350: 0.03330394998192787 +Loss at step 400: 0.0410035215318203 +Loss at step 450: 0.03329356387257576 +Loss at step 500: 0.0334615595638752 +Loss at step 550: 0.0367317870259285 +Loss at step 600: 0.031206030398607254 +Loss at step 650: 0.0366726778447628 +Loss at step 700: 0.041628316044807434 +Loss at step 750: 0.03506317362189293 +Loss at step 800: 0.0323176383972168 +Loss at step 850: 0.03223805129528046 +Loss at step 900: 0.04771678149700165 +Mean training loss after epoch 234: 0.0408894696604532 + +EPOCH: 235 +Loss at step 0: 0.036682069301605225 +Loss at step 50: 0.03544003888964653 +Loss at step 100: 0.03550875186920166 +Loss at step 150: 0.038712434470653534 +Loss at step 200: 0.0328836590051651 +Loss at step 250: 0.03970502316951752 +Loss at step 300: 0.033910173922777176 +Loss at step 350: 0.037062786519527435 +Loss at step 400: 0.07173572480678558 +Loss at step 450: 0.059028297662734985 +Loss at step 500: 0.033802054822444916 +Loss at step 550: 0.03551178053021431 +Loss at step 600: 0.050311341881752014 +Loss at step 650: 0.02960454858839512 +Loss at step 700: 0.044369637966156006 +Loss at step 750: 0.03446542099118233 +Loss at step 800: 0.026896147057414055 +Loss at step 850: 0.0351390466094017 +Loss at step 900: 0.03585878759622574 +Mean training loss after epoch 235: 0.0412483900519354 + +EPOCH: 236 +Loss at step 0: 0.03225017711520195 +Loss at step 50: 0.033339180052280426 +Loss at step 100: 0.035093218088150024 +Loss at step 150: 0.041725076735019684 +Loss at step 200: 0.039611514657735825 +Loss at step 250: 0.03811977803707123 +Loss at step 300: 0.04116872325539589 +Loss at step 350: 0.04522959887981415 +Loss at step 400: 0.034504808485507965 +Loss at step 450: 0.03280076012015343 +Loss at step 500: 0.052272897213697433 +Loss at step 550: 0.035520099103450775 +Loss at step 600: 0.029213305562734604 +Loss at step 650: 0.03851242735981941 +Loss at step 700: 0.041083406656980515 +Loss at step 750: 0.047109778970479965 +Loss at step 800: 0.046369973570108414 +Loss at step 850: 0.03564689680933952 +Loss at step 900: 0.03542470932006836 +Mean training loss after epoch 236: 0.04041676042970818 + +EPOCH: 237 +Loss at step 0: 0.03667047992348671 +Loss at step 50: 0.05486058071255684 +Loss at step 100: 0.03827936202287674 +Loss at step 150: 0.0426441989839077 +Loss at step 200: 0.03697700798511505 +Loss at step 250: 0.034738317131996155 +Loss at step 300: 0.03901257738471031 +Loss at step 350: 0.04829375445842743 +Loss at step 400: 0.0641462430357933 +Loss at step 450: 0.055503036826848984 +Loss at step 500: 0.0356808640062809 +Loss at step 550: 0.0351221039891243 +Loss at step 600: 0.04857305809855461 +Loss at step 650: 0.035922374576330185 +Loss at step 700: 0.0438312292098999 +Loss at step 750: 0.028933558613061905 +Loss at step 800: 0.049483731389045715 +Loss at step 850: 0.0362832210958004 +Loss at step 900: 0.03917879983782768 +Mean training loss after epoch 237: 0.04058157685580157 + +EPOCH: 238 +Loss at step 0: 0.05662114545702934 +Loss at step 50: 0.07133477181196213 +Loss at step 100: 0.05366216599941254 +Loss at step 150: 0.05503072589635849 +Loss at step 200: 0.0364266000688076 +Loss at step 250: 0.03105452097952366 +Loss at step 300: 0.0429583378136158 +Loss at step 350: 0.03313998878002167 +Loss at step 400: 0.0366363525390625 +Loss at step 450: 0.02768549509346485 +Loss at step 500: 0.03483203426003456 +Loss at step 550: 0.04857843741774559 +Loss at step 600: 0.03954724594950676 +Loss at step 650: 0.035047147423028946 +Loss at step 700: 0.0341925173997879 +Loss at step 750: 0.03897126764059067 +Loss at step 800: 0.041156038641929626 +Loss at step 850: 0.042894527316093445 +Loss at step 900: 0.040707413107156754 +Mean training loss after epoch 238: 0.04040791804808925 + +EPOCH: 239 +Loss at step 0: 0.032015059143304825 +Loss at step 50: 0.049798812717199326 +Loss at step 100: 0.029131358489394188 +Loss at step 150: 0.03209542855620384 +Loss at step 200: 0.039810456335544586 +Loss at step 250: 0.04038550332188606 +Loss at step 300: 0.040614526718854904 +Loss at step 350: 0.03396230190992355 +Loss at step 400: 0.05594230443239212 +Loss at step 450: 0.05109965801239014 +Loss at step 500: 0.03540639579296112 +Loss at step 550: 0.03735024482011795 +Loss at step 600: 0.03928496688604355 +Loss at step 650: 0.03327350690960884 +Loss at step 700: 0.04737665131688118 +Loss at step 750: 0.030339328572154045 +Loss at step 800: 0.03868836164474487 +Loss at step 850: 0.04888433218002319 +Loss at step 900: 0.054284047335386276 +Mean training loss after epoch 239: 0.040756947924889354 + +EPOCH: 240 +Loss at step 0: 0.03621514514088631 +Loss at step 50: 0.037960972636938095 +Loss at step 100: 0.04036441445350647 +Loss at step 150: 0.04665497690439224 +Loss at step 200: 0.04599844664335251 +Loss at step 250: 0.03350859135389328 +Loss at step 300: 0.03978145867586136 +Loss at step 350: 0.04724019765853882 +Loss at step 400: 0.046361032873392105 +Loss at step 450: 0.04888570308685303 +Loss at step 500: 0.03416692838072777 +Loss at step 550: 0.0357830747961998 +Loss at step 600: 0.032979223877191544 +Loss at step 650: 0.04058676213026047 +Loss at step 700: 0.04173377901315689 +Loss at step 750: 0.0481884628534317 +Loss at step 800: 0.049226146191358566 +Loss at step 850: 0.03608490899205208 +Loss at step 900: 0.04782750457525253 +Mean training loss after epoch 240: 0.04125069013274491 + +EPOCH: 241 +Loss at step 0: 0.02865777723491192 +Loss at step 50: 0.030689535662531853 +Loss at step 100: 0.03503900393843651 +Loss at step 150: 0.043451108038425446 +Loss at step 200: 0.03308131545782089 +Loss at step 250: 0.03468373790383339 +Loss at step 300: 0.04644292965531349 +Loss at step 350: 0.043308407068252563 +Loss at step 400: 0.05058228597044945 +Loss at step 450: 0.0519171804189682 +Loss at step 500: 0.04207949712872505 +Loss at step 550: 0.04765189439058304 +Loss at step 600: 0.0473044328391552 +Loss at step 650: 0.042676154524087906 +Loss at step 700: 0.032991547137498856 +Loss at step 750: 0.03649139404296875 +Loss at step 800: 0.06195422634482384 +Loss at step 850: 0.03607760742306709 +Loss at step 900: 0.03868535906076431 +Mean training loss after epoch 241: 0.04128943655345994 + +EPOCH: 242 +Loss at step 0: 0.06794758886098862 +Loss at step 50: 0.03913401439785957 +Loss at step 100: 0.03793284669518471 +Loss at step 150: 0.04773392528295517 +Loss at step 200: 0.050653185695409775 +Loss at step 250: 0.025860438123345375 +Loss at step 300: 0.031997546553611755 +Loss at step 350: 0.03752926364541054 +Loss at step 400: 0.03538594767451286 +Loss at step 450: 0.029974274337291718 +Loss at step 500: 0.06059102341532707 +Loss at step 550: 0.039873912930488586 +Loss at step 600: 0.03409953415393829 +Loss at step 650: 0.03079247660934925 +Loss at step 700: 0.04495513439178467 +Loss at step 750: 0.03563304618000984 +Loss at step 800: 0.06306657195091248 +Loss at step 850: 0.03369851037859917 +Loss at step 900: 0.03801652416586876 +Mean training loss after epoch 242: 0.04038986093652591 + +EPOCH: 243 +Loss at step 0: 0.04689265042543411 +Loss at step 50: 0.03451554477214813 +Loss at step 100: 0.03268691524863243 +Loss at step 150: 0.03130830079317093 +Loss at step 200: 0.03496095910668373 +Loss at step 250: 0.04636874422430992 +Loss at step 300: 0.037325453013181686 +Loss at step 350: 0.03737280145287514 +Loss at step 400: 0.0473397895693779 +Loss at step 450: 0.05233558267354965 +Loss at step 500: 0.03210145980119705 +Loss at step 550: 0.03945494815707207 +Loss at step 600: 0.03543688729405403 +Loss at step 650: 0.03888096660375595 +Loss at step 700: 0.04111270606517792 +Loss at step 750: 0.029099611565470695 +Loss at step 800: 0.03430042788386345 +Loss at step 850: 0.03829982504248619 +Loss at step 900: 0.039772406220436096 +Mean training loss after epoch 243: 0.040650974677156795 + +EPOCH: 244 +Loss at step 0: 0.03601839020848274 +Loss at step 50: 0.03880251199007034 +Loss at step 100: 0.03493691235780716 +Loss at step 150: 0.02994871512055397 +Loss at step 200: 0.032999180257320404 +Loss at step 250: 0.03631100431084633 +Loss at step 300: 0.033189717680215836 +Loss at step 350: 0.03836946189403534 +Loss at step 400: 0.0370786115527153 +Loss at step 450: 0.033676642924547195 +Loss at step 500: 0.0403299480676651 +Loss at step 550: 0.03196725621819496 +Loss at step 600: 0.04806798696517944 +Loss at step 650: 0.04054030403494835 +Loss at step 700: 0.03655192628502846 +Loss at step 750: 0.042547885328531265 +Loss at step 800: 0.03825114667415619 +Loss at step 850: 0.03418620303273201 +Loss at step 900: 0.031493254005908966 +Mean training loss after epoch 244: 0.0404629212424858 + +EPOCH: 245 +Loss at step 0: 0.037501245737075806 +Loss at step 50: 0.036022938787937164 +Loss at step 100: 0.06744180619716644 +Loss at step 150: 0.04214358702301979 +Loss at step 200: 0.034945204854011536 +Loss at step 250: 0.029832666739821434 +Loss at step 300: 0.054127588868141174 +Loss at step 350: 0.027778994292020798 +Loss at step 400: 0.03888265788555145 +Loss at step 450: 0.03413667902350426 +Loss at step 500: 0.039878036826848984 +Loss at step 550: 0.0383932963013649 +Loss at step 600: 0.03586817532777786 +Loss at step 650: 0.04729319363832474 +Loss at step 700: 0.060726214200258255 +Loss at step 750: 0.032855063676834106 +Loss at step 800: 0.03362588211894035 +Loss at step 850: 0.0399589017033577 +Loss at step 900: 0.03648059815168381 +Mean training loss after epoch 245: 0.040341950649185096 + +EPOCH: 246 +Loss at step 0: 0.031979892402887344 +Loss at step 50: 0.06765910238027573 +Loss at step 100: 0.0613054521381855 +Loss at step 150: 0.03503995016217232 +Loss at step 200: 0.03813648223876953 +Loss at step 250: 0.03782317787408829 +Loss at step 300: 0.03368839621543884 +Loss at step 350: 0.03500468283891678 +Loss at step 400: 0.04959193989634514 +Loss at step 450: 0.0331171415746212 +Loss at step 500: 0.03166046738624573 +Loss at step 550: 0.037088554352521896 +Loss at step 600: 0.03945896402001381 +Loss at step 650: 0.03642991930246353 +Loss at step 700: 0.03628809377551079 +Loss at step 750: 0.05210085213184357 +Loss at step 800: 0.03868551924824715 +Loss at step 850: 0.041673678904771805 +Loss at step 900: 0.046727459877729416 +Mean training loss after epoch 246: 0.04055890298164539 + +EPOCH: 247 +Loss at step 0: 0.050526224076747894 +Loss at step 50: 0.03471115231513977 +Loss at step 100: 0.02761066146194935 +Loss at step 150: 0.03715616837143898 +Loss at step 200: 0.06270107626914978 +Loss at step 250: 0.036140527576208115 +Loss at step 300: 0.03348053991794586 +Loss at step 350: 0.04763561859726906 +Loss at step 400: 0.04335961118340492 +Loss at step 450: 0.03691563382744789 +Loss at step 500: 0.03904525935649872 +Loss at step 550: 0.04260946810245514 +Loss at step 600: 0.049258239567279816 +Loss at step 650: 0.03189285472035408 +Loss at step 700: 0.0375247485935688 +Loss at step 750: 0.03473655879497528 +Loss at step 800: 0.03262423351407051 +Loss at step 850: 0.046003468334674835 +Loss at step 900: 0.0298653244972229 +Mean training loss after epoch 247: 0.04082909842401044 + +EPOCH: 248 +Loss at step 0: 0.037177521735429764 +Loss at step 50: 0.03851611167192459 +Loss at step 100: 0.02362552471458912 +Loss at step 150: 0.04929950833320618 +Loss at step 200: 0.03481481969356537 +Loss at step 250: 0.030810419470071793 +Loss at step 300: 0.060464564710855484 +Loss at step 350: 0.03210290148854256 +Loss at step 400: 0.03180817514657974 +Loss at step 450: 0.03282691538333893 +Loss at step 500: 0.04492408409714699 +Loss at step 550: 0.04005492478609085 +Loss at step 600: 0.03255147859454155 +Loss at step 650: 0.039729081094264984 +Loss at step 700: 0.0332181416451931 +Loss at step 750: 0.03630407899618149 +Loss at step 800: 0.025422068312764168 +Loss at step 850: 0.028221523389220238 +Loss at step 900: 0.03473244234919548 +Mean training loss after epoch 248: 0.040842697498942614 + +EPOCH: 249 +Loss at step 0: 0.04472397640347481 +Loss at step 50: 0.04787271097302437 +Loss at step 100: 0.04805320128798485 +Loss at step 150: 0.034147415310144424 +Loss at step 200: 0.03937199339270592 +Loss at step 250: 0.032839335501194 +Loss at step 300: 0.030625566840171814 +Loss at step 350: 0.06075826659798622 +Loss at step 400: 0.030696650967001915 +Loss at step 450: 0.043999478220939636 +Loss at step 500: 0.05251993238925934 +Loss at step 550: 0.03596007078886032 +Loss at step 600: 0.03571072220802307 +Loss at step 650: 0.03692999854683876 +Loss at step 700: 0.05128655582666397 +Loss at step 750: 0.03889640048146248 +Loss at step 800: 0.0332813523709774 +Loss at step 850: 0.05229688435792923 +Loss at step 900: 0.03806335851550102 +Mean training loss after epoch 249: 0.04073733212088725 + +EPOCH: 250 +Loss at step 0: 0.03640764579176903 +Loss at step 50: 0.029529381543397903 +Loss at step 100: 0.03215811029076576 +Loss at step 150: 0.03508197143673897 +Loss at step 200: 0.06036895886063576 +Loss at step 250: 0.05026475340127945 +Loss at step 300: 0.03762553259730339 +Loss at step 350: 0.039728518575429916 +Loss at step 400: 0.051400668919086456 +Loss at step 450: 0.03622720018029213 +Loss at step 500: 0.04798509180545807 +Loss at step 550: 0.036011118441820145 +Loss at step 600: 0.03086777590215206 +Loss at step 650: 0.02947012148797512 +Loss at step 700: 0.033220358192920685 +Loss at step 750: 0.04179084300994873 +Loss at step 800: 0.03159976005554199 +Loss at step 850: 0.0499589778482914 +Loss at step 900: 0.05065244436264038 +Mean training loss after epoch 250: 0.04118522900396954 + +EPOCH: 251 +Loss at step 0: 0.02755182981491089 +Loss at step 50: 0.049285098910331726 +Loss at step 100: 0.03654266893863678 +Loss at step 150: 0.04497296363115311 +Loss at step 200: 0.03556593507528305 +Loss at step 250: 0.03584232181310654 +Loss at step 300: 0.032394710928201675 +Loss at step 350: 0.03097252920269966 +Loss at step 400: 0.03149956464767456 +Loss at step 450: 0.034771617501974106 +Loss at step 500: 0.03608296439051628 +Loss at step 550: 0.04529433697462082 +Loss at step 600: 0.033899255096912384 +Loss at step 650: 0.041714031249284744 +Loss at step 700: 0.03436104208230972 +Loss at step 750: 0.03629880025982857 +Loss at step 800: 0.038352809846401215 +Loss at step 850: 0.048439860343933105 +Loss at step 900: 0.040245577692985535 +Mean training loss after epoch 251: 0.04107680348063837 + +EPOCH: 252 +Loss at step 0: 0.04162813723087311 +Loss at step 50: 0.03678770735859871 +Loss at step 100: 0.036010533571243286 +Loss at step 150: 0.05083076283335686 +Loss at step 200: 0.04758510738611221 +Loss at step 250: 0.04673706740140915 +Loss at step 300: 0.03488384932279587 +Loss at step 350: 0.04749500751495361 +Loss at step 400: 0.03849106281995773 +Loss at step 450: 0.0354052409529686 +Loss at step 500: 0.032203562557697296 +Loss at step 550: 0.046269387006759644 +Loss at step 600: 0.03831486403942108 +Loss at step 650: 0.04683487117290497 +Loss at step 700: 0.03641248121857643 +Loss at step 750: 0.03526393696665764 +Loss at step 800: 0.039593227207660675 +Loss at step 850: 0.037872251123189926 +Loss at step 900: 0.03107110597193241 +Mean training loss after epoch 252: 0.040780991514417914 + +EPOCH: 253 +Loss at step 0: 0.04510458558797836 +Loss at step 50: 0.053014062345027924 +Loss at step 100: 0.032169047743082047 +Loss at step 150: 0.040137842297554016 +Loss at step 200: 0.03011404722929001 +Loss at step 250: 0.049014877527952194 +Loss at step 300: 0.04769476503133774 +Loss at step 350: 0.06785585731267929 +Loss at step 400: 0.05150467902421951 +Loss at step 450: 0.037310872226953506 +Loss at step 500: 0.03545456752181053 +Loss at step 550: 0.0527246855199337 +Loss at step 600: 0.034180376678705215 +Loss at step 650: 0.03249421715736389 +Loss at step 700: 0.03964534029364586 +Loss at step 750: 0.04362183064222336 +Loss at step 800: 0.033075109124183655 +Loss at step 850: 0.037886276841163635 +Loss at step 900: 0.04572896659374237 +Mean training loss after epoch 253: 0.040567862515304004 + +EPOCH: 254 +Loss at step 0: 0.03452959656715393 +Loss at step 50: 0.03886321932077408 +Loss at step 100: 0.04838445782661438 +Loss at step 150: 0.03837645798921585 +Loss at step 200: 0.03629623353481293 +Loss at step 250: 0.030103985220193863 +Loss at step 300: 0.03791821002960205 +Loss at step 350: 0.03373400866985321 +Loss at step 400: 0.0403708778321743 +Loss at step 450: 0.034411486238241196 +Loss at step 500: 0.03582341596484184 +Loss at step 550: 0.03185276687145233 +Loss at step 600: 0.05909956619143486 +Loss at step 650: 0.03586168587207794 +Loss at step 700: 0.03290408104658127 +Loss at step 750: 0.035262443125247955 +Loss at step 800: 0.038654476404190063 +Loss at step 850: 0.03391736373305321 +Loss at step 900: 0.03784594684839249 +Mean training loss after epoch 254: 0.04157071075698079 + +EPOCH: 255 +Loss at step 0: 0.036882199347019196 +Loss at step 50: 0.03993494436144829 +Loss at step 100: 0.04192814603447914 +Loss at step 150: 0.05400025099515915 +Loss at step 200: 0.036772482097148895 +Loss at step 250: 0.03589187189936638 +Loss at step 300: 0.041940346360206604 +Loss at step 350: 0.03448191285133362 +Loss at step 400: 0.029735693708062172 +Loss at step 450: 0.033656343817710876 +Loss at step 500: 0.055729810148477554 +Loss at step 550: 0.03558502718806267 +Loss at step 600: 0.03889984264969826 +Loss at step 650: 0.049456886947155 +Loss at step 700: 0.036641478538513184 +Loss at step 750: 0.048241570591926575 +Loss at step 800: 0.0453965850174427 +Loss at step 850: 0.03907885402441025 +Loss at step 900: 0.06074392423033714 +Mean training loss after epoch 255: 0.041012863235385305 + +EPOCH: 256 +Loss at step 0: 0.03528229892253876 +Loss at step 50: 0.03847229480743408 +Loss at step 100: 0.04104534536600113 +Loss at step 150: 0.03357851132750511 +Loss at step 200: 0.04962961748242378 +Loss at step 250: 0.054125647991895676 +Loss at step 300: 0.0447879284620285 +Loss at step 350: 0.04007836803793907 +Loss at step 400: 0.04048490524291992 +Loss at step 450: 0.03496857360005379 +Loss at step 500: 0.05139027535915375 +Loss at step 550: 0.044430509209632874 +Loss at step 600: 0.049299247562885284 +Loss at step 650: 0.04760231077671051 +Loss at step 700: 0.05970483645796776 +Loss at step 750: 0.030023494735360146 +Loss at step 800: 0.051494043320417404 +Loss at step 850: 0.047272972762584686 +Loss at step 900: 0.0323207825422287 +Mean training loss after epoch 256: 0.041227073287531765 +/athenahomes/gabrijel/miniconda3/envs/track-generator/lib/python3.11/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: '/athenahomes/gabrijel/miniconda3/envs/track-generator/lib/python3.11/site-packages/torchvision/image.so: undefined symbol: _ZN3c1017RegisterOperatorsD1Ev'If you don't plan on using image functionality from `torchvision.io`, you can ignore this warning. Otherwise, there might be something wrong with your environment. Did you have `libjpeg` or `libpng` installed before building `torchvision` from source? + warn( +Schedule: cosine +Cfg: False +Output path: /scratch/shared/beegfs/gabrijel/m2l/mini +Patch Size: 2 +Device: cuda:3 +===================================================================================== +Layer (type:depth-idx) Param # +===================================================================================== +DiT 75,264 +├─PatchEmbed: 1-1 -- +│ └─Conv2d: 2-1 1,920 +├─TimestepEmbedder: 1-2 -- +│ └─Mlp: 2-2 -- +│ │ └─Linear: 3-1 98,688 +│ │ └─SiLU: 3-2 -- +│ │ └─Linear: 3-3 147,840 +├─ModuleList: 1-3 -- +│ └─DiTBlock: 2-3 -- +│ │ └─LayerNorm: 3-4 -- +│ │ └─MultiheadAttention: 3-5 591,360 +│ │ └─LayerNorm: 3-6 -- +│ │ └─Mlp: 3-7 1,181,568 +│ │ └─Sequential: 3-8 887,040 +│ └─DiTBlock: 2-4 -- +│ │ └─LayerNorm: 3-9 -- +│ │ └─MultiheadAttention: 3-10 591,360 +│ │ └─LayerNorm: 3-11 -- +│ │ └─Mlp: 3-12 1,181,568 +│ │ └─Sequential: 3-13 887,040 +│ └─DiTBlock: 2-5 -- +│ │ └─LayerNorm: 3-14 -- +│ │ └─MultiheadAttention: 3-15 591,360 +│ │ └─LayerNorm: 3-16 -- +│ │ └─Mlp: 3-17 1,181,568 +│ │ └─Sequential: 3-18 887,040 +│ └─DiTBlock: 2-6 -- +│ │ └─LayerNorm: 3-19 -- +│ │ └─MultiheadAttention: 3-20 591,360 +│ │ └─LayerNorm: 3-21 -- +│ │ └─Mlp: 3-22 1,181,568 +│ │ └─Sequential: 3-23 887,040 +│ └─DiTBlock: 2-7 -- +│ │ └─LayerNorm: 3-24 -- +│ │ └─MultiheadAttention: 3-25 591,360 +│ │ └─LayerNorm: 3-26 -- +│ │ └─Mlp: 3-27 1,181,568 +│ │ └─Sequential: 3-28 887,040 +│ └─DiTBlock: 2-8 -- +│ │ └─LayerNorm: 3-29 -- +│ │ └─MultiheadAttention: 3-30 591,360 +│ │ └─LayerNorm: 3-31 -- +│ │ └─Mlp: 3-32 1,181,568 +│ │ └─Sequential: 3-33 887,040 +├─FinalLayer: 1-4 -- +│ └─LayerNorm: 2-9 -- +│ └─Linear: 2-10 1,540 +│ └─Sequential: 2-11 -- +│ │ └─SiLU: 3-34 -- +│ │ └─Linear: 3-35 295,680 +├─Unpatchify: 1-5 -- +===================================================================================== +Total params: 16,580,740 +Trainable params: 16,505,476 +Non-trainable params: 75,264 +===================================================================================== + +EPOCH: 1 +Loss at step 0: 0.9927976131439209 +Loss at step 50: 0.2647687792778015 +Loss at step 100: 0.1879013627767563 +Loss at step 150: 0.12602929770946503 +Loss at step 200: 0.11745952069759369 +Loss at step 250: 0.10707128793001175 +Loss at step 300: 0.1432727724313736 +Loss at step 350: 0.11516840010881424 +Loss at step 400: 0.10792354494333267 +Loss at step 450: 0.09377487003803253 +Loss at step 500: 0.10157567262649536 +Loss at step 550: 0.10117540508508682 +Loss at step 600: 0.11454605311155319 +Loss at step 650: 0.09202694892883301 +Loss at step 700: 0.08804260939359665 +Loss at step 750: 0.08932791650295258 +Loss at step 800: 0.08639156073331833 +Loss at step 850: 0.09178578108549118 +Loss at step 900: 0.09026865661144257 +Mean training loss after epoch 1: 0.14250233578783617 + +EPOCH: 2 +Loss at step 0: 0.08553174138069153 +Loss at step 50: 0.0918794572353363 +Loss at step 100: 0.07219964265823364 +Loss at step 150: 0.07668644934892654 +Loss at step 200: 0.08180388808250427 +Loss at step 250: 0.07124952226877213 +Loss at step 300: 0.08224574476480484 +Loss at step 350: 0.08902806788682938 +Loss at step 400: 0.0827031284570694 +Loss at step 450: 0.07146904617547989 +Loss at step 500: 0.07812539488077164 +Loss at step 550: 0.06646260619163513 +Loss at step 600: 0.06209966167807579 +Loss at step 650: 0.07814020663499832 +Loss at step 700: 0.058781880885362625 +Loss at step 750: 0.051821518689394 +Loss at step 800: 0.07093804329633713 +Loss at step 850: 0.0550219789147377 +Loss at step 900: 0.058788903057575226 +Mean training loss after epoch 2: 0.07401431160472603 + +EPOCH: 3 +Loss at step 0: 0.06717473268508911 +Loss at step 50: 0.07093988358974457 +Loss at step 100: 0.06632817536592484 +Loss at step 150: 0.07580439746379852 +Loss at step 200: 0.06400690972805023 +Loss at step 250: 0.07392463833093643 +Loss at step 300: 0.048493415117263794 +Loss at step 350: 0.0506267324090004 +Loss at step 400: 0.07013332098722458 +Loss at step 450: 0.05782090872526169 +Loss at step 500: 0.07724333554506302 +Loss at step 550: 0.05080138146877289 +Loss at step 600: 0.06723678857088089 +Loss at step 650: 0.057337675243616104 +Loss at step 700: 0.05216258764266968 +Loss at step 750: 0.05217692628502846 +Loss at step 800: 0.05593561381101608 +Loss at step 850: 0.07336627691984177 +Loss at step 900: 0.05595633387565613 +Mean training loss after epoch 3: 0.06080897097219663 + +EPOCH: 4 +Loss at step 0: 0.04946884885430336 +Loss at step 50: 0.054812829941511154 +Loss at step 100: 0.06162804737687111 +Loss at step 150: 0.05907006561756134 +Loss at step 200: 0.04258119687438011 +Loss at step 250: 0.06596523523330688 +Loss at step 300: 0.049682050943374634 +Loss at step 350: 0.05187582969665527 +Loss at step 400: 0.045258037745952606 +Loss at step 450: 0.050041262060403824 +Loss at step 500: 0.06738606095314026 +Loss at step 550: 0.057629767805337906 +Loss at step 600: 0.05861373245716095 +Loss at step 650: 0.054706621915102005 +Loss at step 700: 0.046074602752923965 +Loss at step 750: 0.04815784841775894 +Loss at step 800: 0.054592981934547424 +Loss at step 850: 0.06745585799217224 +Loss at step 900: 0.04226946830749512 +Mean training loss after epoch 4: 0.05705311590198006 + +EPOCH: 5 +Loss at step 0: 0.04679422453045845 +Loss at step 50: 0.05274326354265213 +Loss at step 100: 0.060322683304548264 +Loss at step 150: 0.047847017645835876 +Loss at step 200: 0.050057556480169296 +Loss at step 250: 0.04592166468501091 +Loss at step 300: 0.06508708745241165 +Loss at step 350: 0.06251583248376846 +Loss at step 400: 0.06060163676738739 +Loss at step 450: 0.04539049416780472 +Loss at step 500: 0.04933956637978554 +Loss at step 550: 0.044366367161273956 +Loss at step 600: 0.05056780204176903 +Loss at step 650: 0.05007366091012955 +Loss at step 700: 0.04736810550093651 +Loss at step 750: 0.0458783358335495 +Loss at step 800: 0.061146657913923264 +Loss at step 850: 0.04192593693733215 +Loss at step 900: 0.05574965849518776 +Mean training loss after epoch 5: 0.054514974121377666 + +EPOCH: 6 +Loss at step 0: 0.05552354082465172 +Loss at step 50: 0.06919590383768082 +Loss at step 100: 0.051523979753255844 +Loss at step 150: 0.07512925565242767 +Loss at step 200: 0.0469990149140358 +Loss at step 250: 0.04151985049247742 +Loss at step 300: 0.054267555475234985 +Loss at step 350: 0.046799663454294205 +Loss at step 400: 0.043437469750642776 +Loss at step 450: 0.0556630864739418 +Loss at step 500: 0.07886315882205963 +Loss at step 550: 0.07932259142398834 +Loss at step 600: 0.06433942168951035 +Loss at step 650: 0.05824227258563042 +Loss at step 700: 0.04445641115307808 +Loss at step 750: 0.05480702966451645 +Loss at step 800: 0.05086228623986244 +Loss at step 850: 0.050280384719371796 +Loss at step 900: 0.048203449696302414 +Mean training loss after epoch 6: 0.05360761121995668 + +EPOCH: 7 +Loss at step 0: 0.05120495334267616 +Loss at step 50: 0.041711125522851944 +Loss at step 100: 0.03893940523266792 +Loss at step 150: 0.043815772980451584 +Loss at step 200: 0.04661528393626213 +Loss at step 250: 0.04535030573606491 +Loss at step 300: 0.052326902747154236 +Loss at step 350: 0.04767979308962822 +Loss at step 400: 0.05384628102183342 +Loss at step 450: 0.03882434964179993 +Loss at step 500: 0.05370200797915459 +Loss at step 550: 0.05347057804465294 +Loss at step 600: 0.04494878277182579 +Loss at step 650: 0.04447983577847481 +Loss at step 700: 0.04920825734734535 +Loss at step 750: 0.057055290788412094 +Loss at step 800: 0.10477184504270554 +Loss at step 850: 0.043160244822502136 +Loss at step 900: 0.04887056723237038 +Mean training loss after epoch 7: 0.05228752429599066 + +EPOCH: 8 +Loss at step 0: 0.05011899024248123 +Loss at step 50: 0.06245705857872963 +Loss at step 100: 0.06631136685609818 +Loss at step 150: 0.04895845800638199 +Loss at step 200: 0.050268225371837616 +Loss at step 250: 0.04723553732037544 +Loss at step 300: 0.07536876201629639 +Loss at step 350: 0.06294748932123184 +Loss at step 400: 0.048326920717954636 +Loss at step 450: 0.04759557917714119 +Loss at step 500: 0.05410340428352356 +Loss at step 550: 0.05210983008146286 +Loss at step 600: 0.04399741441011429 +Loss at step 650: 0.041283536702394485 +Loss at step 700: 0.04357845336198807 +Loss at step 750: 0.060434866696596146 +Loss at step 800: 0.04585731029510498 +Loss at step 850: 0.04889417067170143 +Loss at step 900: 0.04579473286867142 +Mean training loss after epoch 8: 0.05117087174993334 + +EPOCH: 9 +Loss at step 0: 0.047287292778491974 +Loss at step 50: 0.04318024218082428 +Loss at step 100: 0.046293068677186966 +Loss at step 150: 0.03605443984270096 +Loss at step 200: 0.045547377318143845 +Loss at step 250: 0.0459781251847744 +Loss at step 300: 0.04060017317533493 +Loss at step 350: 0.045362140983343124 +Loss at step 400: 0.04601544514298439 +Loss at step 450: 0.05350184813141823 +Loss at step 500: 0.04330907389521599 +Loss at step 550: 0.043803706765174866 +Loss at step 600: 0.049809280782938004 +Loss at step 650: 0.04990199953317642 +Loss at step 700: 0.05597102642059326 +Loss at step 750: 0.04491954296827316 +Loss at step 800: 0.03813697397708893 +Loss at step 850: 0.043534938246011734 +Loss at step 900: 0.052817512303590775 +Mean training loss after epoch 9: 0.050545577774010995 + +EPOCH: 10 +Loss at step 0: 0.06345701217651367 +Loss at step 50: 0.04983758181333542 +Loss at step 100: 0.058084748685359955 +Loss at step 150: 0.057511843740940094 +Loss at step 200: 0.04950873926281929 +Loss at step 250: 0.044199153780937195 +Loss at step 300: 0.049990322440862656 +Loss at step 350: 0.042405661195516586 +Loss at step 400: 0.0410495363175869 +Loss at step 450: 0.050903402268886566 +Loss at step 500: 0.03998948261141777 +Loss at step 550: 0.04161235690116882 +Loss at step 600: 0.04452834278345108 +Loss at step 650: 0.04524111747741699 +Loss at step 700: 0.05646144971251488 +Loss at step 750: 0.039474230259656906 +Loss at step 800: 0.042933832854032516 +Loss at step 850: 0.04933589696884155 +Loss at step 900: 0.036208804696798325 +Mean training loss after epoch 10: 0.049290040768047515 + +EPOCH: 11 +Loss at step 0: 0.03956316038966179 +Loss at step 50: 0.047969572246074677 +Loss at step 100: 0.06317726522684097 +Loss at step 150: 0.04703586921095848 +Loss at step 200: 0.04931914806365967 +Loss at step 250: 0.047175485640764236 +Loss at step 300: 0.04173384979367256 +Loss at step 350: 0.04106055572628975 +Loss at step 400: 0.04758727177977562 +Loss at step 450: 0.04685395956039429 +Loss at step 500: 0.06105189397931099 +Loss at step 550: 0.06041964516043663 +Loss at step 600: 0.055400021374225616 +Loss at step 650: 0.04655126482248306 +Loss at step 700: 0.06059831380844116 +Loss at step 750: 0.06167222931981087 +Loss at step 800: 0.042501598596572876 +Loss at step 850: 0.042257603257894516 +Loss at step 900: 0.045965805649757385 +Mean training loss after epoch 11: 0.04964323368654259 + +EPOCH: 12 +Loss at step 0: 0.034203123301267624 +Loss at step 50: 0.05352773517370224 +Loss at step 100: 0.043338775634765625 +Loss at step 150: 0.04150288924574852 +Loss at step 200: 0.05577273294329643 +Loss at step 250: 0.055770426988601685 +Loss at step 300: 0.045309942215681076 +Loss at step 350: 0.04029889404773712 +Loss at step 400: 0.03722796589136124 +Loss at step 450: 0.04935304448008537 +Loss at step 500: 0.05218170955777168 +Loss at step 550: 0.061936669051647186 +Loss at step 600: 0.07341843843460083 +Loss at step 650: 0.0434122271835804 +Loss at step 700: 0.050429198890924454 +Loss at step 750: 0.054148439317941666 +Loss at step 800: 0.04618339240550995 +Loss at step 850: 0.039194535464048386 +Loss at step 900: 0.05453008413314819 +Mean training loss after epoch 12: 0.04853169766586345 + +EPOCH: 13 +Loss at step 0: 0.05767318978905678 +Loss at step 50: 0.04307478666305542 +Loss at step 100: 0.039039019495248795 +Loss at step 150: 0.050793424248695374 +Loss at step 200: 0.05458158627152443 +Loss at step 250: 0.0410580076277256 +Loss at step 300: 0.03630630671977997 +Loss at step 350: 0.04801056906580925 +Loss at step 400: 0.038546375930309296 +Loss at step 450: 0.04437912628054619 +Loss at step 500: 0.04366833716630936 +Loss at step 550: 0.07518304139375687 +Loss at step 600: 0.04526819288730621 +Loss at step 650: 0.07137490063905716 +Loss at step 700: 0.056305669248104095 +Loss at step 750: 0.05747681111097336 +Loss at step 800: 0.05244934558868408 +Loss at step 850: 0.04294973239302635 +Loss at step 900: 0.03596752509474754 +Mean training loss after epoch 13: 0.048368280024718505 + +EPOCH: 14 +Loss at step 0: 0.059222716838121414 +Loss at step 50: 0.04363219439983368 +Loss at step 100: 0.04555397853255272 +Loss at step 150: 0.03750060126185417 +Loss at step 200: 0.044108372181653976 +Loss at step 250: 0.05813005939126015 +Loss at step 300: 0.03639712557196617 +Loss at step 350: 0.07586781680583954 +Loss at step 400: 0.050042495131492615 +Loss at step 450: 0.054030727595090866 +Loss at step 500: 0.08736606687307358 +Loss at step 550: 0.0347326397895813 +Loss at step 600: 0.03835294768214226 +Loss at step 650: 0.04757542535662651 +Loss at step 700: 0.044310227036476135 +Loss at step 750: 0.03899923712015152 +Loss at step 800: 0.0389823392033577 +Loss at step 850: 0.046352993696928024 +Loss at step 900: 0.04369539022445679 +Mean training loss after epoch 14: 0.047218455241989095 + +EPOCH: 15 +Loss at step 0: 0.061945103108882904 +Loss at step 50: 0.040272314101457596 +Loss at step 100: 0.05934322997927666 +Loss at step 150: 0.061598729342222214 +Loss at step 200: 0.03942662104964256 +Loss at step 250: 0.07209756970405579 +Loss at step 300: 0.056602660566568375 +Loss at step 350: 0.04495527222752571 +Loss at step 400: 0.054804615676403046 +Loss at step 450: 0.060904551297426224 +Loss at step 500: 0.052522361278533936 +Loss at step 550: 0.04878659546375275 +Loss at step 600: 0.04031701013445854 +Loss at step 650: 0.04928752779960632 +Loss at step 700: 0.04112618789076805 +Loss at step 750: 0.05811849236488342 +Loss at step 800: 0.05275645852088928 +Loss at step 850: 0.05110972002148628 +Loss at step 900: 0.05189656466245651 +Mean training loss after epoch 15: 0.04713583987420683 + +EPOCH: 16 +Loss at step 0: 0.05751290172338486 +Loss at step 50: 0.05349951237440109 +Loss at step 100: 0.061340536922216415 +Loss at step 150: 0.04630905017256737 +Loss at step 200: 0.040518227964639664 +Loss at step 250: 0.04509662091732025 +Loss at step 300: 0.054649755358695984 +Loss at step 350: 0.04015738144516945 +Loss at step 400: 0.05614438280463219 +Loss at step 450: 0.04284175485372543 +Loss at step 500: 0.0447845458984375 +Loss at step 550: 0.04490595683455467 +Loss at step 600: 0.045310527086257935 +Loss at step 650: 0.04410380497574806 +Loss at step 700: 0.038343433290719986 +Loss at step 750: 0.05487282946705818 +Loss at step 800: 0.050881609320640564 +Loss at step 850: 0.05533342808485031 +Loss at step 900: 0.07102000713348389 +Mean training loss after epoch 16: 0.04699640631524802 + +EPOCH: 17 +Loss at step 0: 0.0580948069691658 +Loss at step 50: 0.03580525517463684 +Loss at step 100: 0.03325394168496132 +Loss at step 150: 0.0388118177652359 +Loss at step 200: 0.039655230939388275 +Loss at step 250: 0.04357254132628441 +Loss at step 300: 0.0388103649020195 +Loss at step 350: 0.062123239040374756 +Loss at step 400: 0.0393681563436985 +Loss at step 450: 0.045537207275629044 +Loss at step 500: 0.04149201512336731 +Loss at step 550: 0.056296877562999725 +Loss at step 600: 0.041269417852163315 +Loss at step 650: 0.047324035316705704 +Loss at step 700: 0.04247691109776497 +Loss at step 750: 0.033218417316675186 +Loss at step 800: 0.03631206229329109 +Loss at step 850: 0.03608255833387375 +Loss at step 900: 0.04036899283528328 +Mean training loss after epoch 17: 0.047017275433201014 + +EPOCH: 18 +Loss at step 0: 0.037257082760334015 +Loss at step 50: 0.03367447480559349 +Loss at step 100: 0.042098648846149445 +Loss at step 150: 0.039509519934654236 +Loss at step 200: 0.05941949412226677 +Loss at step 250: 0.038624610751867294 +Loss at step 300: 0.06093718111515045 +Loss at step 350: 0.044842202216386795 +Loss at step 400: 0.04626288637518883 +Loss at step 450: 0.05908516049385071 +Loss at step 500: 0.04012873023748398 +Loss at step 550: 0.038510892540216446 +Loss at step 600: 0.03817109763622284 +Loss at step 650: 0.04379972070455551 +Loss at step 700: 0.04390494152903557 +Loss at step 750: 0.03912144526839256 +Loss at step 800: 0.037638112902641296 +Loss at step 850: 0.04086960479617119 +Loss at step 900: 0.045219577848911285 +Mean training loss after epoch 18: 0.04582779359485485 + +EPOCH: 19 +Loss at step 0: 0.041718464344739914 +Loss at step 50: 0.041183002293109894 +Loss at step 100: 0.04069666936993599 +Loss at step 150: 0.037516038864851 +Loss at step 200: 0.047058697789907455 +Loss at step 250: 0.04376176744699478 +Loss at step 300: 0.042468659579753876 +Loss at step 350: 0.04458988457918167 +Loss at step 400: 0.04017651826143265 +Loss at step 450: 0.07378578186035156 +Loss at step 500: 0.0400574654340744 +Loss at step 550: 0.039062704890966415 +Loss at step 600: 0.038819320499897 +Loss at step 650: 0.033067211508750916 +Loss at step 700: 0.042712289839982986 +Loss at step 750: 0.03872883319854736 +Loss at step 800: 0.05610249936580658 +Loss at step 850: 0.03668671473860741 +Loss at step 900: 0.043801337480545044 +Mean training loss after epoch 19: 0.04611589505410652 + +EPOCH: 20 +Loss at step 0: 0.04827193170785904 +Loss at step 50: 0.05359834060072899 +Loss at step 100: 0.04653705283999443 +Loss at step 150: 0.05003025382757187 +Loss at step 200: 0.04673366993665695 +Loss at step 250: 0.040243230760097504 +Loss at step 300: 0.03828419744968414 +Loss at step 350: 0.04028535634279251 +Loss at step 400: 0.04643638804554939 +Loss at step 450: 0.03311799094080925 +Loss at step 500: 0.04406367987394333 +Loss at step 550: 0.03698456659913063 +Loss at step 600: 0.058640990406274796 +Loss at step 650: 0.040676768869161606 +Loss at step 700: 0.0424061119556427 +Loss at step 750: 0.05697501823306084 +Loss at step 800: 0.03824044018983841 +Loss at step 850: 0.066072478890419 +Loss at step 900: 0.041484538465738297 +Mean training loss after epoch 20: 0.04569254003004479 + +EPOCH: 21 +Loss at step 0: 0.052745524793863297 +Loss at step 50: 0.04679976403713226 +Loss at step 100: 0.06415614485740662 +Loss at step 150: 0.03858758881688118 +Loss at step 200: 0.03995379060506821 +Loss at step 250: 0.04070864990353584 +Loss at step 300: 0.047886040061712265 +Loss at step 350: 0.04501399025321007 +Loss at step 400: 0.03356078267097473 +Loss at step 450: 0.03801573067903519 +Loss at step 500: 0.037307173013687134 +Loss at step 550: 0.04218755289912224 +Loss at step 600: 0.05996202677488327 +Loss at step 650: 0.04204526171088219 +Loss at step 700: 0.07033933699131012 +Loss at step 750: 0.03862342983484268 +Loss at step 800: 0.038705822080373764 +Loss at step 850: 0.03514530509710312 +Loss at step 900: 0.03651607781648636 +Mean training loss after epoch 21: 0.045421224570016995 + +EPOCH: 22 +Loss at step 0: 0.05221446231007576 +Loss at step 50: 0.04250719025731087 +Loss at step 100: 0.05661725997924805 +Loss at step 150: 0.03511292487382889 +Loss at step 200: 0.040635790675878525 +Loss at step 250: 0.04838637635111809 +Loss at step 300: 0.0390804298222065 +Loss at step 350: 0.0534442774951458 +Loss at step 400: 0.038561638444662094 +Loss at step 450: 0.06379273533821106 +Loss at step 500: 0.035927664488554 +Loss at step 550: 0.06136680021882057 +Loss at step 600: 0.031272273510694504 +Loss at step 650: 0.045360028743743896 +Loss at step 700: 0.04633624106645584 +Loss at step 750: 0.04046519100666046 +Loss at step 800: 0.04161432385444641 +Loss at step 850: 0.042055003345012665 +Loss at step 900: 0.05448092147707939 +Mean training loss after epoch 22: 0.04483226262358651 + +EPOCH: 23 +Loss at step 0: 0.04911420866847038 +Loss at step 50: 0.0415814071893692 +Loss at step 100: 0.04444144666194916 +Loss at step 150: 0.03530682623386383 +Loss at step 200: 0.0463300421833992 +Loss at step 250: 0.038280509412288666 +Loss at step 300: 0.05271584540605545 +Loss at step 350: 0.05282475799322128 +Loss at step 400: 0.04728353023529053 +Loss at step 450: 0.03833460435271263 +Loss at step 500: 0.057132475078105927 +Loss at step 550: 0.0409892238676548 +Loss at step 600: 0.040609076619148254 +Loss at step 650: 0.040882013738155365 +Loss at step 700: 0.04064009711146355 +Loss at step 750: 0.04380909353494644 +Loss at step 800: 0.03657805919647217 +Loss at step 850: 0.03418336808681488 +Loss at step 900: 0.034976694732904434 +Mean training loss after epoch 23: 0.04488808057630367 + +EPOCH: 24 +Loss at step 0: 0.048248641192913055 +Loss at step 50: 0.05135813355445862 +Loss at step 100: 0.050695184618234634 +Loss at step 150: 0.05454166978597641 +Loss at step 200: 0.040228258818387985 +Loss at step 250: 0.045158132910728455 +Loss at step 300: 0.03014983795583248 +Loss at step 350: 0.030783720314502716 +Loss at step 400: 0.05381038039922714 +Loss at step 450: 0.03955896943807602 +Loss at step 500: 0.057066842913627625 +Loss at step 550: 0.036507830023765564 +Loss at step 600: 0.07247436791658401 +Loss at step 650: 0.0378834530711174 +Loss at step 700: 0.0714469701051712 +Loss at step 750: 0.040530942380428314 +Loss at step 800: 0.04141752049326897 +Loss at step 850: 0.034526366740465164 +Loss at step 900: 0.03569827228784561 +Mean training loss after epoch 24: 0.04518484828600497 + +EPOCH: 25 +Loss at step 0: 0.03326995298266411 +Loss at step 50: 0.033842310309410095 +Loss at step 100: 0.03736729547381401 +Loss at step 150: 0.0668163076043129 +Loss at step 200: 0.03536819666624069 +Loss at step 250: 0.03721887618303299 +Loss at step 300: 0.0470084510743618 +Loss at step 350: 0.04588761925697327 +Loss at step 400: 0.0489131323993206 +Loss at step 450: 0.04456213489174843 +Loss at step 500: 0.03591672703623772 +Loss at step 550: 0.05648083612322807 +Loss at step 600: 0.04321033135056496 +Loss at step 650: 0.04505500942468643 +Loss at step 700: 0.03846661001443863 +Loss at step 750: 0.03818375989794731 +Loss at step 800: 0.036762554198503494 +Loss at step 850: 0.05255861207842827 +Loss at step 900: 0.04515066742897034 +Mean training loss after epoch 25: 0.044807881285656874 + +EPOCH: 26 +Loss at step 0: 0.0373607873916626 +Loss at step 50: 0.043030742555856705 +Loss at step 100: 0.045759208500385284 +Loss at step 150: 0.04089202731847763 +Loss at step 200: 0.036131978034973145 +Loss at step 250: 0.05533413961529732 +Loss at step 300: 0.03699621185660362 +Loss at step 350: 0.03552429378032684 +Loss at step 400: 0.042007893323898315 +Loss at step 450: 0.04304102063179016 +Loss at step 500: 0.05209362506866455 +Loss at step 550: 0.06375249475240707 +Loss at step 600: 0.038298461586236954 +Loss at step 650: 0.04088416323065758 +Loss at step 700: 0.05234269052743912 +Loss at step 750: 0.053741395473480225 +Loss at step 800: 0.04723315313458443 +Loss at step 850: 0.057126112282276154 +Loss at step 900: 0.042302802205085754 +Mean training loss after epoch 26: 0.04456539857171492 + +EPOCH: 27 +Loss at step 0: 0.04674964025616646 +Loss at step 50: 0.04263751581311226 +Loss at step 100: 0.03667943552136421 +Loss at step 150: 0.040494099259376526 +Loss at step 200: 0.0386606901884079 +Loss at step 250: 0.040493160486221313 +Loss at step 300: 0.03662142530083656 +Loss at step 350: 0.03603795915842056 +Loss at step 400: 0.037907931953668594 +Loss at step 450: 0.05081484094262123 +Loss at step 500: 0.0563945472240448 +Loss at step 550: 0.0526045560836792 +Loss at step 600: 0.06008012220263481 +Loss at step 650: 0.051519934087991714 +Loss at step 700: 0.05095212161540985 +Loss at step 750: 0.038178347051143646 +Loss at step 800: 0.04066329076886177 +Loss at step 850: 0.038567233830690384 +Loss at step 900: 0.052257291972637177 +Mean training loss after epoch 27: 0.04509363583584965 + +EPOCH: 28 +Loss at step 0: 0.029913390055298805 +Loss at step 50: 0.03645753860473633 +Loss at step 100: 0.034360963851213455 +Loss at step 150: 0.03566978871822357 +Loss at step 200: 0.03904988244175911 +Loss at step 250: 0.03659699112176895 +Loss at step 300: 0.038922570645809174 +Loss at step 350: 0.033944662660360336 +Loss at step 400: 0.0383438803255558 +Loss at step 450: 0.05149287357926369 +Loss at step 500: 0.03719630837440491 +Loss at step 550: 0.03484205901622772 +Loss at step 600: 0.03765057399868965 +Loss at step 650: 0.06854096800088882 +Loss at step 700: 0.0372895784676075 +Loss at step 750: 0.044852498918771744 +Loss at step 800: 0.033244166523218155 +Loss at step 850: 0.04573490098118782 +Loss at step 900: 0.03735152631998062 +Mean training loss after epoch 28: 0.044933726687405286 + +EPOCH: 29 +Loss at step 0: 0.029447682201862335 +Loss at step 50: 0.0398627370595932 +Loss at step 100: 0.053118184208869934 +Loss at step 150: 0.03270377963781357 +Loss at step 200: 0.04028083756566048 +Loss at step 250: 0.057774025946855545 +Loss at step 300: 0.04933173581957817 +Loss at step 350: 0.054515670984983444 +Loss at step 400: 0.05123170465230942 +Loss at step 450: 0.042456742376089096 +Loss at step 500: 0.040116313844919205 +Loss at step 550: 0.04087410494685173 +Loss at step 600: 0.03468229994177818 +Loss at step 650: 0.039522793143987656 +Loss at step 700: 0.03545898199081421 +Loss at step 750: 0.04619523510336876 +Loss at step 800: 0.05729640647768974 +Loss at step 850: 0.051194336265325546 +Loss at step 900: 0.038833290338516235 +Mean training loss after epoch 29: 0.04402439670760367 + +EPOCH: 30 +Loss at step 0: 0.04906666278839111 +Loss at step 50: 0.04361962527036667 +Loss at step 100: 0.04044247418642044 +Loss at step 150: 0.05557110905647278 +Loss at step 200: 0.03803514316678047 +Loss at step 250: 0.04414329677820206 +Loss at step 300: 0.04065090790390968 +Loss at step 350: 0.03852323070168495 +Loss at step 400: 0.03416415676474571 +Loss at step 450: 0.05052315816283226 +Loss at step 500: 0.056460876017808914 +Loss at step 550: 0.041875824332237244 +Loss at step 600: 0.039723336696624756 +Loss at step 650: 0.03889099508523941 +Loss at step 700: 0.0717279314994812 +Loss at step 750: 0.05256667360663414 +Loss at step 800: 0.04998709261417389 +Loss at step 850: 0.03653858229517937 +Loss at step 900: 0.06387999653816223 +Mean training loss after epoch 30: 0.0444086283079978 + +EPOCH: 31 +Loss at step 0: 0.06570300459861755 +Loss at step 50: 0.03289508447051048 +Loss at step 100: 0.033931732177734375 +Loss at step 150: 0.05701424181461334 +Loss at step 200: 0.0524381622672081 +Loss at step 250: 0.04885663837194443 +Loss at step 300: 0.04105885699391365 +Loss at step 350: 0.03617656230926514 +Loss at step 400: 0.07422742247581482 +Loss at step 450: 0.04127587378025055 +Loss at step 500: 0.04956771805882454 +Loss at step 550: 0.032986026257276535 +Loss at step 600: 0.039893150329589844 +Loss at step 650: 0.03447552025318146 +Loss at step 700: 0.039409857243299484 +Loss at step 750: 0.051296427845954895 +Loss at step 800: 0.043854985386133194 +Loss at step 850: 0.058668699115514755 +Loss at step 900: 0.037739936262369156 +Mean training loss after epoch 31: 0.04468885555998412 + +EPOCH: 32 +Loss at step 0: 0.04160969331860542 +Loss at step 50: 0.03463306650519371 +Loss at step 100: 0.035780616104602814 +Loss at step 150: 0.05247391015291214 +Loss at step 200: 0.043705083429813385 +Loss at step 250: 0.0600837767124176 +Loss at step 300: 0.037433646619319916 +Loss at step 350: 0.05155890807509422 +Loss at step 400: 0.07468236237764359 +Loss at step 450: 0.06288442015647888 +Loss at step 500: 0.04347878322005272 +Loss at step 550: 0.04345081374049187 +Loss at step 600: 0.04002153500914574 +Loss at step 650: 0.030901411548256874 +Loss at step 700: 0.03812825679779053 +Loss at step 750: 0.039223719388246536 +Loss at step 800: 0.03850940614938736 +Loss at step 850: 0.04800470918416977 +Loss at step 900: 0.03695946931838989 +Mean training loss after epoch 32: 0.04362768248152504 + +EPOCH: 33 +Loss at step 0: 0.07225683331489563 +Loss at step 50: 0.039506975561380386 +Loss at step 100: 0.04103633388876915 +Loss at step 150: 0.036195915192365646 +Loss at step 200: 0.034182168543338776 +Loss at step 250: 0.052266839891672134 +Loss at step 300: 0.0427260585129261 +Loss at step 350: 0.039806365966796875 +Loss at step 400: 0.046187058091163635 +Loss at step 450: 0.035503558814525604 +Loss at step 500: 0.0356760174036026 +Loss at step 550: 0.03603162616491318 +Loss at step 600: 0.0453050471842289 +Loss at step 650: 0.04466850683093071 +Loss at step 700: 0.03671862930059433 +Loss at step 750: 0.032369568943977356 +Loss at step 800: 0.039522137492895126 +Loss at step 850: 0.05315655469894409 +Loss at step 900: 0.034537188708782196 +Mean training loss after epoch 33: 0.04295502561551612 + +EPOCH: 34 +Loss at step 0: 0.029321929439902306 +Loss at step 50: 0.034134674817323685 +Loss at step 100: 0.04984445869922638 +Loss at step 150: 0.035927992314100266 +Loss at step 200: 0.04887424409389496 +Loss at step 250: 0.037316031754016876 +Loss at step 300: 0.04465991631150246 +Loss at step 350: 0.05477762967348099 +Loss at step 400: 0.053357016295194626 +Loss at step 450: 0.050971269607543945 +Loss at step 500: 0.0499236024916172 +Loss at step 550: 0.04749893397092819 +Loss at step 600: 0.04637804254889488 +Loss at step 650: 0.04684502258896828 +Loss at step 700: 0.06652657687664032 +Loss at step 750: 0.043038755655288696 +Loss at step 800: 0.03603661060333252 +Loss at step 850: 0.04426427185535431 +Loss at step 900: 0.05061193183064461 +Mean training loss after epoch 34: 0.04389402393037195 + +EPOCH: 35 +Loss at step 0: 0.052734531462192535 +Loss at step 50: 0.07013734430074692 +Loss at step 100: 0.055571962147951126 +Loss at step 150: 0.03865116834640503 +Loss at step 200: 0.0372077152132988 +Loss at step 250: 0.04977886378765106 +Loss at step 300: 0.04531254991889 +Loss at step 350: 0.05584704130887985 +Loss at step 400: 0.0377432182431221 +Loss at step 450: 0.04084886610507965 +Loss at step 500: 0.07135910540819168 +Loss at step 550: 0.03363371267914772 +Loss at step 600: 0.03212135657668114 +Loss at step 650: 0.03952178731560707 +Loss at step 700: 0.041552383452653885 +Loss at step 750: 0.03404490277171135 +Loss at step 800: 0.03914336860179901 +Loss at step 850: 0.043152667582035065 +Loss at step 900: 0.03232409805059433 +Mean training loss after epoch 35: 0.04302508351819983 + +EPOCH: 36 +Loss at step 0: 0.04851457104086876 +Loss at step 50: 0.04562768712639809 +Loss at step 100: 0.03531981259584427 +Loss at step 150: 0.043214645236730576 +Loss at step 200: 0.03744692727923393 +Loss at step 250: 0.042286891490221024 +Loss at step 300: 0.049430038779973984 +Loss at step 350: 0.036436405032873154 +Loss at step 400: 0.034777041524648666 +Loss at step 450: 0.04153888300061226 +Loss at step 500: 0.03553176298737526 +Loss at step 550: 0.031961582601070404 +Loss at step 600: 0.05224454030394554 +Loss at step 650: 0.04388105496764183 +Loss at step 700: 0.040361806750297546 +Loss at step 750: 0.040422216057777405 +Loss at step 800: 0.03775598108768463 +Loss at step 850: 0.05734693259000778 +Loss at step 900: 0.04077355936169624 +Mean training loss after epoch 36: 0.04312853543743142 + +EPOCH: 37 +Loss at step 0: 0.03714748099446297 +Loss at step 50: 0.050660282373428345 +Loss at step 100: 0.03697645664215088 +Loss at step 150: 0.0827973410487175 +Loss at step 200: 0.04317590221762657 +Loss at step 250: 0.03618314117193222 +Loss at step 300: 0.037134528160095215 +Loss at step 350: 0.03909546509385109 +Loss at step 400: 0.038076251745224 +Loss at step 450: 0.04257143288850784 +Loss at step 500: 0.03321288153529167 +Loss at step 550: 0.03748180717229843 +Loss at step 600: 0.06636057794094086 +Loss at step 650: 0.03770602494478226 +Loss at step 700: 0.08009560406208038 +Loss at step 750: 0.03693768009543419 +Loss at step 800: 0.03650718182325363 +Loss at step 850: 0.04664753004908562 +Loss at step 900: 0.03936724364757538 +Mean training loss after epoch 37: 0.04336765655941928 + +EPOCH: 38 +Loss at step 0: 0.03465455397963524 +Loss at step 50: 0.030911577865481377 +Loss at step 100: 0.05364152789115906 +Loss at step 150: 0.03758525103330612 +Loss at step 200: 0.03997034952044487 +Loss at step 250: 0.0362134613096714 +Loss at step 300: 0.03510035201907158 +Loss at step 350: 0.03568132966756821 +Loss at step 400: 0.03927675634622574 +Loss at step 450: 0.043984606862068176 +Loss at step 500: 0.044523004442453384 +Loss at step 550: 0.036992426961660385 +Loss at step 600: 0.0432291254401207 +Loss at step 650: 0.04972879961133003 +Loss at step 700: 0.040408678352832794 +Loss at step 750: 0.03769862279295921 +Loss at step 800: 0.03676699846982956 +Loss at step 850: 0.03363250568509102 +Loss at step 900: 0.050196655094623566 +Mean training loss after epoch 38: 0.043665299007395054 + +EPOCH: 39 +Loss at step 0: 0.04297603666782379 +Loss at step 50: 0.03462286666035652 +Loss at step 100: 0.03346511349081993 +Loss at step 150: 0.04194847494363785 +Loss at step 200: 0.036040548235177994 +Loss at step 250: 0.03550048545002937 +Loss at step 300: 0.04542850703001022 +Loss at step 350: 0.05183808133006096 +Loss at step 400: 0.03676900267601013 +Loss at step 450: 0.049038663506507874 +Loss at step 500: 0.03395945951342583 +Loss at step 550: 0.0338163748383522 +Loss at step 600: 0.033632803708314896 +Loss at step 650: 0.03675498068332672 +Loss at step 700: 0.041949450969696045 +Loss at step 750: 0.04227084293961525 +Loss at step 800: 0.04024386778473854 +Loss at step 850: 0.03923187032341957 +Loss at step 900: 0.034244369715452194 +Mean training loss after epoch 39: 0.04297231583754772 + +EPOCH: 40 +Loss at step 0: 0.030180353671312332 +Loss at step 50: 0.049267880618572235 +Loss at step 100: 0.03916803002357483 +Loss at step 150: 0.056771960109472275 +Loss at step 200: 0.039305031299591064 +Loss at step 250: 0.039471086114645004 +Loss at step 300: 0.03026297129690647 +Loss at step 350: 0.0378076434135437 +Loss at step 400: 0.055273085832595825 +Loss at step 450: 0.04198889806866646 +Loss at step 500: 0.05805264040827751 +Loss at step 550: 0.04977709427475929 +Loss at step 600: 0.0806681215763092 +Loss at step 650: 0.08839074522256851 +Loss at step 700: 0.041415825486183167 +Loss at step 750: 0.0352671816945076 +Loss at step 800: 0.03809533268213272 +Loss at step 850: 0.03589823096990585 +Loss at step 900: 0.03772462159395218 +Mean training loss after epoch 40: 0.04321250221781385 + +EPOCH: 41 +Loss at step 0: 0.05648473650217056 +Loss at step 50: 0.03883674740791321 +Loss at step 100: 0.03668802231550217 +Loss at step 150: 0.03581535816192627 +Loss at step 200: 0.05020207539200783 +Loss at step 250: 0.03667880967259407 +Loss at step 300: 0.042587947100400925 +Loss at step 350: 0.03846283629536629 +Loss at step 400: 0.036278266459703445 +Loss at step 450: 0.039367277175188065 +Loss at step 500: 0.03347210958600044 +Loss at step 550: 0.042285192757844925 +Loss at step 600: 0.03368893265724182 +Loss at step 650: 0.03954387828707695 +Loss at step 700: 0.04458700865507126 +Loss at step 750: 0.04066156595945358 +Loss at step 800: 0.036489538848400116 +Loss at step 850: 0.03715793415904045 +Loss at step 900: 0.06632266938686371 +Mean training loss after epoch 41: 0.042843369641966784 + +EPOCH: 42 +Loss at step 0: 0.049074843525886536 +Loss at step 50: 0.031039627268910408 +Loss at step 100: 0.0409608855843544 +Loss at step 150: 0.0379287414252758 +Loss at step 200: 0.03141274303197861 +Loss at step 250: 0.03718740865588188 +Loss at step 300: 0.04370876029133797 +Loss at step 350: 0.02973066456615925 +Loss at step 400: 0.03484489768743515 +Loss at step 450: 0.05998479947447777 +Loss at step 500: 0.04298393055796623 +Loss at step 550: 0.0414208248257637 +Loss at step 600: 0.05147707462310791 +Loss at step 650: 0.06301922351121902 +Loss at step 700: 0.04647999256849289 +Loss at step 750: 0.0380011722445488 +Loss at step 800: 0.03523740544915199 +Loss at step 850: 0.07007824629545212 +Loss at step 900: 0.04118829593062401 +Mean training loss after epoch 42: 0.042861232980847486 + +EPOCH: 43 +Loss at step 0: 0.028600996360182762 +Loss at step 50: 0.03958134725689888 +Loss at step 100: 0.0379943922162056 +Loss at step 150: 0.0403524711728096 +Loss at step 200: 0.038039691746234894 +Loss at step 250: 0.05107349529862404 +Loss at step 300: 0.04073113203048706 +Loss at step 350: 0.043610718101263046 +Loss at step 400: 0.07114291191101074 +Loss at step 450: 0.04253702610731125 +Loss at step 500: 0.05893601104617119 +Loss at step 550: 0.0509442500770092 +Loss at step 600: 0.044168759137392044 +Loss at step 650: 0.054990626871585846 +Loss at step 700: 0.042283836752176285 +Loss at step 750: 0.03912976384162903 +Loss at step 800: 0.051697466522455215 +Loss at step 850: 0.05673801526427269 +Loss at step 900: 0.035775937139987946 +Mean training loss after epoch 43: 0.04222319512836524 + +EPOCH: 44 +Loss at step 0: 0.037387020885944366 +Loss at step 50: 0.03670916706323624 +Loss at step 100: 0.03850468993186951 +Loss at step 150: 0.04527543485164642 +Loss at step 200: 0.04270525649189949 +Loss at step 250: 0.0406525693833828 +Loss at step 300: 0.0392395555973053 +Loss at step 350: 0.03703998774290085 +Loss at step 400: 0.04435901716351509 +Loss at step 450: 0.030216777697205544 +Loss at step 500: 0.03501291945576668 +Loss at step 550: 0.05483980476856232 +Loss at step 600: 0.03645576909184456 +Loss at step 650: 0.03959187492728233 +Loss at step 700: 0.035912465304136276 +Loss at step 750: 0.033680666238069534 +Loss at step 800: 0.032813236117362976 +Loss at step 850: 0.03100864589214325 +Loss at step 900: 0.038795165717601776 +Mean training loss after epoch 44: 0.04228815817431029 + +EPOCH: 45 +Loss at step 0: 0.05061938613653183 +Loss at step 50: 0.06159723922610283 +Loss at step 100: 0.04928106442093849 +Loss at step 150: 0.04391185939311981 +Loss at step 200: 0.03841090202331543 +Loss at step 250: 0.03663986548781395 +Loss at step 300: 0.033025361597537994 +Loss at step 350: 0.05331796035170555 +Loss at step 400: 0.03757380694150925 +Loss at step 450: 0.0336463637650013 +Loss at step 500: 0.03897778317332268 +Loss at step 550: 0.03989659994840622 +Loss at step 600: 0.038776081055402756 +Loss at step 650: 0.05733334645628929 +Loss at step 700: 0.03490510582923889 +Loss at step 750: 0.039473410695791245 +Loss at step 800: 0.05830595642328262 +Loss at step 850: 0.05670051649212837 +Loss at step 900: 0.040327806025743484 +Mean training loss after epoch 45: 0.042787098425077094 + +EPOCH: 46 +Loss at step 0: 0.036934878677129745 +Loss at step 50: 0.033364858478307724 +Loss at step 100: 0.03347126394510269 +Loss at step 150: 0.041521187871694565 +Loss at step 200: 0.0378275141119957 +Loss at step 250: 0.03971440717577934 +Loss at step 300: 0.03849451243877411 +Loss at step 350: 0.030927833169698715 +Loss at step 400: 0.03408275172114372 +Loss at step 450: 0.04135040193796158 +Loss at step 500: 0.03571862354874611 +Loss at step 550: 0.039906665682792664 +Loss at step 600: 0.03439576178789139 +Loss at step 650: 0.052114088088274 +Loss at step 700: 0.04131833836436272 +Loss at step 750: 0.03150162473320961 +Loss at step 800: 0.053609561175107956 +Loss at step 850: 0.07118377834558487 +Loss at step 900: 0.038397371768951416 +Mean training loss after epoch 46: 0.0424582164174617 + +EPOCH: 47 +Loss at step 0: 0.0376482829451561 +Loss at step 50: 0.033311713486909866 +Loss at step 100: 0.046062029898166656 +Loss at step 150: 0.07413475960493088 +Loss at step 200: 0.06404237449169159 +Loss at step 250: 0.04188799113035202 +Loss at step 300: 0.044554203748703 +Loss at step 350: 0.03766021877527237 +Loss at step 400: 0.05552181974053383 +Loss at step 450: 0.037229984998703 +Loss at step 500: 0.03611749783158302 +Loss at step 550: 0.03990129381418228 +Loss at step 600: 0.04216839373111725 +Loss at step 650: 0.0565626285970211 +Loss at step 700: 0.05230112373828888 +Loss at step 750: 0.03784063085913658 +Loss at step 800: 0.03894263133406639 +Loss at step 850: 0.033128369599580765 +Loss at step 900: 0.030786553397774696 +Mean training loss after epoch 47: 0.042213420318102025 + +EPOCH: 48 +Loss at step 0: 0.06113245338201523 +Loss at step 50: 0.038732144981622696 +Loss at step 100: 0.04146707057952881 +Loss at step 150: 0.05363892391324043 +Loss at step 200: 0.04317339509725571 +Loss at step 250: 0.04006329923868179 +Loss at step 300: 0.038389842957258224 +Loss at step 350: 0.04166923090815544 +Loss at step 400: 0.050251998007297516 +Loss at step 450: 0.03234206885099411 +Loss at step 500: 0.05430510640144348 +Loss at step 550: 0.05468246340751648 +Loss at step 600: 0.03641614317893982 +Loss at step 650: 0.0358758419752121 +Loss at step 700: 0.04045901075005531 +Loss at step 750: 0.02902292087674141 +Loss at step 800: 0.037241075187921524 +Loss at step 850: 0.0750131905078888 +Loss at step 900: 0.0332866832613945 +Mean training loss after epoch 48: 0.04252630450538417 + +EPOCH: 49 +Loss at step 0: 0.04021422937512398 +Loss at step 50: 0.03641494736075401 +Loss at step 100: 0.038768261671066284 +Loss at step 150: 0.04296419024467468 +Loss at step 200: 0.036955609917640686 +Loss at step 250: 0.040370337665081024 +Loss at step 300: 0.03228134289383888 +Loss at step 350: 0.048863768577575684 +Loss at step 400: 0.03823704645037651 +Loss at step 450: 0.05663494020700455 +Loss at step 500: 0.03468574583530426 +Loss at step 550: 0.04946057125926018 +Loss at step 600: 0.04328424111008644 +Loss at step 650: 0.0349988117814064 +Loss at step 700: 0.04406754672527313 +Loss at step 750: 0.037463266402482986 +Loss at step 800: 0.04004407674074173 +Loss at step 850: 0.03687194362282753 +Loss at step 900: 0.03602185472846031 +Mean training loss after epoch 49: 0.042539564672230024 + +EPOCH: 50 +Loss at step 0: 0.03601893410086632 +Loss at step 50: 0.03488951548933983 +Loss at step 100: 0.04303381219506264 +Loss at step 150: 0.03584350645542145 +Loss at step 200: 0.03999720513820648 +Loss at step 250: 0.037075553089380264 +Loss at step 300: 0.04014355316758156 +Loss at step 350: 0.03799928352236748 +Loss at step 400: 0.03307428956031799 +Loss at step 450: 0.03780652582645416 +Loss at step 500: 0.04645475372672081 +Loss at step 550: 0.035657599568367004 +Loss at step 600: 0.0703330934047699 +Loss at step 650: 0.0441756397485733 +Loss at step 700: 0.03868475556373596 +Loss at step 750: 0.036150313913822174 +Loss at step 800: 0.05835910141468048 +Loss at step 850: 0.035950593650341034 +Loss at step 900: 0.033544495701789856 +Mean training loss after epoch 50: 0.04233735294412893 + +EPOCH: 51 +Loss at step 0: 0.03536150977015495 +Loss at step 50: 0.05265863612294197 +Loss at step 100: 0.053060974925756454 +Loss at step 150: 0.03569833189249039 +Loss at step 200: 0.03769461810588837 +Loss at step 250: 0.03149081766605377 +Loss at step 300: 0.03774293139576912 +Loss at step 350: 0.03972206637263298 +Loss at step 400: 0.03773874789476395 +Loss at step 450: 0.040503572672605515 +Loss at step 500: 0.04576042294502258 +Loss at step 550: 0.0350419245660305 +Loss at step 600: 0.04863131046295166 +Loss at step 650: 0.05377759039402008 +Loss at step 700: 0.03654267638921738 +Loss at step 750: 0.032249435782432556 +Loss at step 800: 0.04548071324825287 +Loss at step 850: 0.05506904795765877 +Loss at step 900: 0.03483176603913307 +Mean training loss after epoch 51: 0.04182767730031504 + +EPOCH: 52 +Loss at step 0: 0.035643722862005234 +Loss at step 50: 0.035993482917547226 +Loss at step 100: 0.039494823664426804 +Loss at step 150: 0.03567099943757057 +Loss at step 200: 0.0390964038670063 +Loss at step 250: 0.035363804548978806 +Loss at step 300: 0.04085114598274231 +Loss at step 350: 0.03391866385936737 +Loss at step 400: 0.048653654754161835 +Loss at step 450: 0.03442519158124924 +Loss at step 500: 0.035034891217947006 +Loss at step 550: 0.05182870849967003 +Loss at step 600: 0.038956109434366226 +Loss at step 650: 0.04642537236213684 +Loss at step 700: 0.03629075363278389 +Loss at step 750: 0.03320210427045822 +Loss at step 800: 0.03982256352901459 +Loss at step 850: 0.05380026251077652 +Loss at step 900: 0.04151134565472603 +Mean training loss after epoch 52: 0.041800402694626024 + +EPOCH: 53 +Loss at step 0: 0.05759120360016823 +Loss at step 50: 0.0414610281586647 +Loss at step 100: 0.03528021648526192 +Loss at step 150: 0.058663200587034225 +Loss at step 200: 0.03417353704571724 +Loss at step 250: 0.05214337259531021 +Loss at step 300: 0.030978785827755928 +Loss at step 350: 0.03184428811073303 +Loss at step 400: 0.04949423298239708 +Loss at step 450: 0.046923208981752396 +Loss at step 500: 0.04200230911374092 +Loss at step 550: 0.05963723734021187 +Loss at step 600: 0.048732444643974304 +Loss at step 650: 0.03525372967123985 +Loss at step 700: 0.042062558233737946 +Loss at step 750: 0.0699642226099968 +Loss at step 800: 0.03157135844230652 +Loss at step 850: 0.03461334854364395 +Loss at step 900: 0.03986768424510956 +Mean training loss after epoch 53: 0.04206207239710446 + +EPOCH: 54 +Loss at step 0: 0.043047960847616196 +Loss at step 50: 0.050478626042604446 +Loss at step 100: 0.03833431750535965 +Loss at step 150: 0.037333905696868896 +Loss at step 200: 0.07576791942119598 +Loss at step 250: 0.041819144040346146 +Loss at step 300: 0.05899069085717201 +Loss at step 350: 0.038387831300497055 +Loss at step 400: 0.04553873464465141 +Loss at step 450: 0.03330902010202408 +Loss at step 500: 0.03797665983438492 +Loss at step 550: 0.05493336543440819 +Loss at step 600: 0.038320817053318024 +Loss at step 650: 0.03387630730867386 +Loss at step 700: 0.03648149594664574 +Loss at step 750: 0.05657254159450531 +Loss at step 800: 0.0527743361890316 +Loss at step 850: 0.031990937888622284 +Loss at step 900: 0.05744639039039612 +Mean training loss after epoch 54: 0.042270664306385305 + +EPOCH: 55 +Loss at step 0: 0.04634654521942139 +Loss at step 50: 0.038860127329826355 +Loss at step 100: 0.0369553305208683 +Loss at step 150: 0.04601672664284706 +Loss at step 200: 0.026475802063941956 +Loss at step 250: 0.040818262845277786 +Loss at step 300: 0.050957221537828445 +Loss at step 350: 0.05213375389575958 +Loss at step 400: 0.03895549848675728 +Loss at step 450: 0.039853379130363464 +Loss at step 500: 0.0456225723028183 +Loss at step 550: 0.03201737627387047 +Loss at step 600: 0.04900722950696945 +Loss at step 650: 0.033515963703393936 +Loss at step 700: 0.045205309987068176 +Loss at step 750: 0.05664574354887009 +Loss at step 800: 0.05195131152868271 +Loss at step 850: 0.03943021968007088 +Loss at step 900: 0.051934707909822464 +Mean training loss after epoch 55: 0.041760075203915524 + +EPOCH: 56 +Loss at step 0: 0.035669680684804916 +Loss at step 50: 0.03731066361069679 +Loss at step 100: 0.03839431330561638 +Loss at step 150: 0.04437437653541565 +Loss at step 200: 0.03604341298341751 +Loss at step 250: 0.03787315636873245 +Loss at step 300: 0.04129105433821678 +Loss at step 350: 0.0681724026799202 +Loss at step 400: 0.037422459572553635 +Loss at step 450: 0.04147394374012947 +Loss at step 500: 0.030557721853256226 +Loss at step 550: 0.05729537457227707 +Loss at step 600: 0.044497594237327576 +Loss at step 650: 0.04281434789299965 +Loss at step 700: 0.03463006392121315 +Loss at step 750: 0.03795117139816284 +Loss at step 800: 0.03903064504265785 +Loss at step 850: 0.05226927250623703 +Loss at step 900: 0.044425107538700104 +Mean training loss after epoch 56: 0.04198346188518284 + +EPOCH: 57 +Loss at step 0: 0.040313441306352615 +Loss at step 50: 0.04098132625222206 +Loss at step 100: 0.037053175270557404 +Loss at step 150: 0.03720230981707573 +Loss at step 200: 0.03237070143222809 +Loss at step 250: 0.042227283120155334 +Loss at step 300: 0.05192343145608902 +Loss at step 350: 0.03451365604996681 +Loss at step 400: 0.03476078435778618 +Loss at step 450: 0.03759030997753143 +Loss at step 500: 0.03982206806540489 +Loss at step 550: 0.04919624701142311 +Loss at step 600: 0.028733564540743828 +Loss at step 650: 0.036303646862506866 +Loss at step 700: 0.040104515850543976 +Loss at step 750: 0.040087904781103134 +Loss at step 800: 0.0365641824901104 +Loss at step 850: 0.03147405758500099 +Loss at step 900: 0.04074550420045853 +Mean training loss after epoch 57: 0.0418718279198384 + +EPOCH: 58 +Loss at step 0: 0.027126234024763107 +Loss at step 50: 0.03955811262130737 +Loss at step 100: 0.03852409869432449 +Loss at step 150: 0.04588547348976135 +Loss at step 200: 0.0327930673956871 +Loss at step 250: 0.03407374769449234 +Loss at step 300: 0.0346791073679924 +Loss at step 350: 0.03419278562068939 +Loss at step 400: 0.04033670201897621 +Loss at step 450: 0.037295371294021606 +Loss at step 500: 0.036423176527023315 +Loss at step 550: 0.03983192518353462 +Loss at step 600: 0.03562218323349953 +Loss at step 650: 0.057547878473997116 +Loss at step 700: 0.04399918019771576 +Loss at step 750: 0.06756962835788727 +Loss at step 800: 0.0535319484770298 +Loss at step 850: 0.040021199733018875 +Loss at step 900: 0.05363428220152855 +Mean training loss after epoch 58: 0.04227230368035117 + +EPOCH: 59 +Loss at step 0: 0.054008278995752335 +Loss at step 50: 0.029929330572485924 +Loss at step 100: 0.043072547763586044 +Loss at step 150: 0.04712403565645218 +Loss at step 200: 0.04172234237194061 +Loss at step 250: 0.0380144938826561 +Loss at step 300: 0.06645963340997696 +Loss at step 350: 0.03476366773247719 +Loss at step 400: 0.044849783182144165 +Loss at step 450: 0.03969757631421089 +Loss at step 500: 0.041757795959711075 +Loss at step 550: 0.03557545319199562 +Loss at step 600: 0.03668488934636116 +Loss at step 650: 0.034634947776794434 +Loss at step 700: 0.036369070410728455 +Loss at step 750: 0.027953891083598137 +Loss at step 800: 0.05794013664126396 +Loss at step 850: 0.0434873141348362 +Loss at step 900: 0.04062079265713692 +Mean training loss after epoch 59: 0.04175872626557533 + +EPOCH: 60 +Loss at step 0: 0.05230647698044777 +Loss at step 50: 0.034911252558231354 +Loss at step 100: 0.038966234773397446 +Loss at step 150: 0.04930701106786728 +Loss at step 200: 0.039878372102975845 +Loss at step 250: 0.032641004770994186 +Loss at step 300: 0.05420990288257599 +Loss at step 350: 0.035432182252407074 +Loss at step 400: 0.047906797379255295 +Loss at step 450: 0.03237815201282501 +Loss at step 500: 0.05809643119573593 +Loss at step 550: 0.03982365503907204 +Loss at step 600: 0.03338718041777611 +Loss at step 650: 0.06101324036717415 +Loss at step 700: 0.05832071974873543 +Loss at step 750: 0.05672917887568474 +Loss at step 800: 0.035312261432409286 +Loss at step 850: 0.03269599750638008 +Loss at step 900: 0.03432564437389374 +Mean training loss after epoch 60: 0.04145304879336469 + +EPOCH: 61 +Loss at step 0: 0.05218600109219551 +Loss at step 50: 0.028676575049757957 +Loss at step 100: 0.05183856189250946 +Loss at step 150: 0.04828957840800285 +Loss at step 200: 0.03797846660017967 +Loss at step 250: 0.04018789157271385 +Loss at step 300: 0.029013371095061302 +Loss at step 350: 0.046787358820438385 +Loss at step 400: 0.03374910354614258 +Loss at step 450: 0.04779788479208946 +Loss at step 500: 0.03813735023140907 +Loss at step 550: 0.06082974001765251 +Loss at step 600: 0.031995780766010284 +Loss at step 650: 0.036440473049879074 +Loss at step 700: 0.03242507576942444 +Loss at step 750: 0.047881174832582474 +Loss at step 800: 0.03955503925681114 +Loss at step 850: 0.039465587586164474 +Loss at step 900: 0.032889991998672485 +Mean training loss after epoch 61: 0.04225412909505464 + +EPOCH: 62 +Loss at step 0: 0.07850071787834167 +Loss at step 50: 0.03590470924973488 +Loss at step 100: 0.03401835262775421 +Loss at step 150: 0.03575047105550766 +Loss at step 200: 0.03225778415799141 +Loss at step 250: 0.03880147635936737 +Loss at step 300: 0.03870739787817001 +Loss at step 350: 0.042368412017822266 +Loss at step 400: 0.03864947706460953 +Loss at step 450: 0.03193790838122368 +Loss at step 500: 0.043149128556251526 +Loss at step 550: 0.03357163816690445 +Loss at step 600: 0.043366439640522 +Loss at step 650: 0.04025929793715477 +Loss at step 700: 0.035577934235334396 +Loss at step 750: 0.03176751732826233 +Loss at step 800: 0.030962109565734863 +Loss at step 850: 0.03757867589592934 +Loss at step 900: 0.03229587897658348 +Mean training loss after epoch 62: 0.04198516900144787 + +EPOCH: 63 +Loss at step 0: 0.057671867311000824 +Loss at step 50: 0.04842713847756386 +Loss at step 100: 0.030468273907899857 +Loss at step 150: 0.05220678821206093 +Loss at step 200: 0.04170433431863785 +Loss at step 250: 0.05136094242334366 +Loss at step 300: 0.04169003665447235 +Loss at step 350: 0.031596481800079346 +Loss at step 400: 0.04273003712296486 +Loss at step 450: 0.061871133744716644 +Loss at step 500: 0.04741637408733368 +Loss at step 550: 0.03889886289834976 +Loss at step 600: 0.0323534719645977 +Loss at step 650: 0.03431367874145508 +Loss at step 700: 0.03811480104923248 +Loss at step 750: 0.03434113413095474 +Loss at step 800: 0.049015481024980545 +Loss at step 850: 0.04040595889091492 +Loss at step 900: 0.03862114995718002 +Mean training loss after epoch 63: 0.04131053804334547 + +EPOCH: 64 +Loss at step 0: 0.030019661411643028 +Loss at step 50: 0.03455144912004471 +Loss at step 100: 0.04964865371584892 +Loss at step 150: 0.03217058628797531 +Loss at step 200: 0.03193427622318268 +Loss at step 250: 0.03496089577674866 +Loss at step 300: 0.036626748740673065 +Loss at step 350: 0.0424736887216568 +Loss at step 400: 0.03943726047873497 +Loss at step 450: 0.040541987866163254 +Loss at step 500: 0.03838001936674118 +Loss at step 550: 0.03629530221223831 +Loss at step 600: 0.033054422587156296 +Loss at step 650: 0.0398416668176651 +Loss at step 700: 0.050053808838129044 +Loss at step 750: 0.035665612667798996 +Loss at step 800: 0.03748640790581703 +Loss at step 850: 0.04931091517210007 +Loss at step 900: 0.035654835402965546 +Mean training loss after epoch 64: 0.041265397145947035 + +EPOCH: 65 +Loss at step 0: 0.03760325163602829 +Loss at step 50: 0.03528781980276108 +Loss at step 100: 0.0328512042760849 +Loss at step 150: 0.03513569384813309 +Loss at step 200: 0.032193057239055634 +Loss at step 250: 0.038936797529459 +Loss at step 300: 0.03411776199936867 +Loss at step 350: 0.050534818321466446 +Loss at step 400: 0.05266318842768669 +Loss at step 450: 0.03857654333114624 +Loss at step 500: 0.03783871978521347 +Loss at step 550: 0.03197762742638588 +Loss at step 600: 0.03530753776431084 +Loss at step 650: 0.038086168467998505 +Loss at step 700: 0.03254755958914757 +Loss at step 750: 0.033772069960832596 +Loss at step 800: 0.03325922414660454 +Loss at step 850: 0.037996433675289154 +Loss at step 900: 0.044859953224658966 +Mean training loss after epoch 65: 0.041470230835031215 + +EPOCH: 66 +Loss at step 0: 0.039558980613946915 +Loss at step 50: 0.03798419609665871 +Loss at step 100: 0.0470040962100029 +Loss at step 150: 0.03549093380570412 +Loss at step 200: 0.06594263017177582 +Loss at step 250: 0.04572553560137749 +Loss at step 300: 0.05195481330156326 +Loss at step 350: 0.03912719711661339 +Loss at step 400: 0.033564552664756775 +Loss at step 450: 0.06788386404514313 +Loss at step 500: 0.04538029804825783 +Loss at step 550: 0.0429832860827446 +Loss at step 600: 0.044070933014154434 +Loss at step 650: 0.03674892336130142 +Loss at step 700: 0.04859331250190735 +Loss at step 750: 0.04238688573241234 +Loss at step 800: 0.05128232762217522 +Loss at step 850: 0.026888985186815262 +Loss at step 900: 0.054580822587013245 +Mean training loss after epoch 66: 0.04168821637159281 + +EPOCH: 67 +Loss at step 0: 0.048112139105796814 +Loss at step 50: 0.03597401827573776 +Loss at step 100: 0.040139153599739075 +Loss at step 150: 0.03657975420355797 +Loss at step 200: 0.03313577175140381 +Loss at step 250: 0.03261137753725052 +Loss at step 300: 0.032174672931432724 +Loss at step 350: 0.03411630168557167 +Loss at step 400: 0.05067075043916702 +Loss at step 450: 0.03755392134189606 +Loss at step 500: 0.049025941640138626 +Loss at step 550: 0.053921084851026535 +Loss at step 600: 0.03517812862992287 +Loss at step 650: 0.03399652615189552 +Loss at step 700: 0.04195648804306984 +Loss at step 750: 0.05345770716667175 +Loss at step 800: 0.03349097818136215 +Loss at step 850: 0.0440969280898571 +Loss at step 900: 0.03051835112273693 +Mean training loss after epoch 67: 0.04172010423698977 + +EPOCH: 68 +Loss at step 0: 0.029953550547361374 +Loss at step 50: 0.04670592024922371 +Loss at step 100: 0.035323210060596466 +Loss at step 150: 0.046807896345853806 +Loss at step 200: 0.062253061681985855 +Loss at step 250: 0.03276786580681801 +Loss at step 300: 0.033763352781534195 +Loss at step 350: 0.02836601622402668 +Loss at step 400: 0.03322148323059082 +Loss at step 450: 0.02729177102446556 +Loss at step 500: 0.04363562911748886 +Loss at step 550: 0.04207039624452591 +Loss at step 600: 0.04303210973739624 +Loss at step 650: 0.04108305275440216 +Loss at step 700: 0.033733610063791275 +Loss at step 750: 0.03041158616542816 +Loss at step 800: 0.03126723691821098 +Loss at step 850: 0.049652907997369766 +Loss at step 900: 0.04426473379135132 +Mean training loss after epoch 68: 0.04149038618855448 + +EPOCH: 69 +Loss at step 0: 0.05051280930638313 +Loss at step 50: 0.032365623861551285 +Loss at step 100: 0.035237330943346024 +Loss at step 150: 0.03612693399190903 +Loss at step 200: 0.03822920098900795 +Loss at step 250: 0.029838262125849724 +Loss at step 300: 0.038925401866436005 +Loss at step 350: 0.03142131119966507 +Loss at step 400: 0.0644800141453743 +Loss at step 450: 0.03479154035449028 +Loss at step 500: 0.05621056631207466 +Loss at step 550: 0.035777267068624496 +Loss at step 600: 0.03171798586845398 +Loss at step 650: 0.03767917677760124 +Loss at step 700: 0.03505399078130722 +Loss at step 750: 0.035311244428157806 +Loss at step 800: 0.03133586049079895 +Loss at step 850: 0.03778800740838051 +Loss at step 900: 0.03941011801362038 +Mean training loss after epoch 69: 0.04125585565879655 + +EPOCH: 70 +Loss at step 0: 0.033629123121500015 +Loss at step 50: 0.047444071620702744 +Loss at step 100: 0.05690629407763481 +Loss at step 150: 0.036379892379045486 +Loss at step 200: 0.03223971277475357 +Loss at step 250: 0.03425416722893715 +Loss at step 300: 0.05382617935538292 +Loss at step 350: 0.03412552550435066 +Loss at step 400: 0.03555070981383324 +Loss at step 450: 0.03165304288268089 +Loss at step 500: 0.056805454194545746 +Loss at step 550: 0.04862965643405914 +Loss at step 600: 0.045455142855644226 +Loss at step 650: 0.04065563902258873 +Loss at step 700: 0.034020159393548965 +Loss at step 750: 0.03897823393344879 +Loss at step 800: 0.04187474772334099 +Loss at step 850: 0.03641607239842415 +Loss at step 900: 0.04018361493945122 +Mean training loss after epoch 70: 0.04117715101379321 + +EPOCH: 71 +Loss at step 0: 0.031739939004182816 +Loss at step 50: 0.03575426712632179 +Loss at step 100: 0.041028402745723724 +Loss at step 150: 0.03342348709702492 +Loss at step 200: 0.03431496396660805 +Loss at step 250: 0.03221401944756508 +Loss at step 300: 0.048587020486593246 +Loss at step 350: 0.03845015913248062 +Loss at step 400: 0.04822957515716553 +Loss at step 450: 0.031151946634054184 +Loss at step 500: 0.033823247998952866 +Loss at step 550: 0.032001834362745285 +Loss at step 600: 0.04520208016037941 +Loss at step 650: 0.03050629422068596 +Loss at step 700: 0.05655837431550026 +Loss at step 750: 0.04253324121236801 +Loss at step 800: 0.05259395018219948 +Loss at step 850: 0.039520565420389175 +Loss at step 900: 0.052242618054151535 +Mean training loss after epoch 71: 0.04180061946243747 + +EPOCH: 72 +Loss at step 0: 0.03948841243982315 +Loss at step 50: 0.053918808698654175 +Loss at step 100: 0.03918600454926491 +Loss at step 150: 0.05136851593852043 +Loss at step 200: 0.03424490615725517 +Loss at step 250: 0.05601086467504501 +Loss at step 300: 0.03227918967604637 +Loss at step 350: 0.05336814001202583 +Loss at step 400: 0.050874777138233185 +Loss at step 450: 0.037637416273355484 +Loss at step 500: 0.02950294502079487 +Loss at step 550: 0.04061894118785858 +Loss at step 600: 0.030604930594563484 +Loss at step 650: 0.051947418600320816 +Loss at step 700: 0.03727438300848007 +Loss at step 750: 0.03555244579911232 +Loss at step 800: 0.043268777430057526 +Loss at step 850: 0.03692298382520676 +Loss at step 900: 0.048946987837553024 +Mean training loss after epoch 72: 0.04081341141521105 + +EPOCH: 73 +Loss at step 0: 0.02806745283305645 +Loss at step 50: 0.029986565932631493 +Loss at step 100: 0.030133534222841263 +Loss at step 150: 0.04718175157904625 +Loss at step 200: 0.07402471452951431 +Loss at step 250: 0.038135360926389694 +Loss at step 300: 0.037517957389354706 +Loss at step 350: 0.039786506444215775 +Loss at step 400: 0.04700249806046486 +Loss at step 450: 0.03678031638264656 +Loss at step 500: 0.03160851448774338 +Loss at step 550: 0.033725056797266006 +Loss at step 600: 0.0418279692530632 +Loss at step 650: 0.040318895131349564 +Loss at step 700: 0.03881711885333061 +Loss at step 750: 0.035916246473789215 +Loss at step 800: 0.04123677313327789 +Loss at step 850: 0.04058227315545082 +Loss at step 900: 0.03485511615872383 +Mean training loss after epoch 73: 0.04174466865824293 + +EPOCH: 74 +Loss at step 0: 0.02899175137281418 +Loss at step 50: 0.03824091702699661 +Loss at step 100: 0.038165558129549026 +Loss at step 150: 0.03265064209699631 +Loss at step 200: 0.032838623970746994 +Loss at step 250: 0.03530019149184227 +Loss at step 300: 0.03534851595759392 +Loss at step 350: 0.04351358860731125 +Loss at step 400: 0.04078420624136925 +Loss at step 450: 0.0450337752699852 +Loss at step 500: 0.03650393709540367 +Loss at step 550: 0.05287676304578781 +Loss at step 600: 0.0422467403113842 +Loss at step 650: 0.037526313215494156 +Loss at step 700: 0.03549263998866081 +Loss at step 750: 0.032682713121175766 +Loss at step 800: 0.03373374044895172 +Loss at step 850: 0.03157420828938484 +Loss at step 900: 0.03660622984170914 +Mean training loss after epoch 74: 0.04113919925548311 + +EPOCH: 75 +Loss at step 0: 0.030831068754196167 +Loss at step 50: 0.03200463950634003 +Loss at step 100: 0.03574752062559128 +Loss at step 150: 0.051985789090394974 +Loss at step 200: 0.02577095478773117 +Loss at step 250: 0.03551045060157776 +Loss at step 300: 0.036199092864990234 +Loss at step 350: 0.031102705746889114 +Loss at step 400: 0.03810339793562889 +Loss at step 450: 0.05419360101222992 +Loss at step 500: 0.05693572759628296 +Loss at step 550: 0.03602773696184158 +Loss at step 600: 0.03415317088365555 +Loss at step 650: 0.04872572049498558 +Loss at step 700: 0.039838407188653946 +Loss at step 750: 0.03620055317878723 +Loss at step 800: 0.04124224931001663 +Loss at step 850: 0.03605523332953453 +Loss at step 900: 0.05318422615528107 +Mean training loss after epoch 75: 0.04112100785872194 + +EPOCH: 76 +Loss at step 0: 0.05268241837620735 +Loss at step 50: 0.038748666644096375 +Loss at step 100: 0.05225672572851181 +Loss at step 150: 0.02982509881258011 +Loss at step 200: 0.037055786699056625 +Loss at step 250: 0.036166444420814514 +Loss at step 300: 0.042469609528779984 +Loss at step 350: 0.03820810094475746 +Loss at step 400: 0.036943428218364716 +Loss at step 450: 0.04263583943247795 +Loss at step 500: 0.042788028717041016 +Loss at step 550: 0.052858270704746246 +Loss at step 600: 0.030450038611888885 +Loss at step 650: 0.04485831782221794 +Loss at step 700: 0.06034138798713684 +Loss at step 750: 0.032158125191926956 +Loss at step 800: 0.03894932195544243 +Loss at step 850: 0.05779428780078888 +Loss at step 900: 0.0441526398062706 +Mean training loss after epoch 76: 0.04154504369944334 + +EPOCH: 77 +Loss at step 0: 0.039356961846351624 +Loss at step 50: 0.028416398912668228 +Loss at step 100: 0.03396177664399147 +Loss at step 150: 0.04099806770682335 +Loss at step 200: 0.06283032149076462 +Loss at step 250: 0.04138881340622902 +Loss at step 300: 0.061035893857479095 +Loss at step 350: 0.04593011736869812 +Loss at step 400: 0.05210472643375397 +Loss at step 450: 0.03505970537662506 +Loss at step 500: 0.032422494143247604 +Loss at step 550: 0.05127855762839317 +Loss at step 600: 0.036935120820999146 +Loss at step 650: 0.03441636264324188 +Loss at step 700: 0.03484315797686577 +Loss at step 750: 0.03388001397252083 +Loss at step 800: 0.037996839731931686 +Loss at step 850: 0.03551885113120079 +Loss at step 900: 0.033726323395967484 +Mean training loss after epoch 77: 0.04113060351747122 + +EPOCH: 78 +Loss at step 0: 0.032344914972782135 +Loss at step 50: 0.035145945847034454 +Loss at step 100: 0.03198443725705147 +Loss at step 150: 0.05172690376639366 +Loss at step 200: 0.04912606626749039 +Loss at step 250: 0.03330213204026222 +Loss at step 300: 0.04923785477876663 +Loss at step 350: 0.04205572232604027 +Loss at step 400: 0.03545050695538521 +Loss at step 450: 0.04081381857395172 +Loss at step 500: 0.04776203632354736 +Loss at step 550: 0.03815259784460068 +Loss at step 600: 0.05272816866636276 +Loss at step 650: 0.032447852194309235 +Loss at step 700: 0.0346088781952858 +Loss at step 750: 0.03316102921962738 +Loss at step 800: 0.03658919408917427 +Loss at step 850: 0.03636911138892174 +Loss at step 900: 0.04945993423461914 +Mean training loss after epoch 78: 0.041068576987205285 + +EPOCH: 79 +Loss at step 0: 0.03452589362859726 +Loss at step 50: 0.034600645303726196 +Loss at step 100: 0.06494107842445374 +Loss at step 150: 0.036988429725170135 +Loss at step 200: 0.027170222252607346 +Loss at step 250: 0.052600547671318054 +Loss at step 300: 0.0349196158349514 +Loss at step 350: 0.031728651374578476 +Loss at step 400: 0.032518353313207626 +Loss at step 450: 0.03552037850022316 +Loss at step 500: 0.03851528465747833 +Loss at step 550: 0.03267049416899681 +Loss at step 600: 0.02810988575220108 +Loss at step 650: 0.0534193180501461 +Loss at step 700: 0.02989153005182743 +Loss at step 750: 0.04602940008044243 +Loss at step 800: 0.03656817600131035 +Loss at step 850: 0.045206040143966675 +Loss at step 900: 0.03326031565666199 +Mean training loss after epoch 79: 0.0412763856264002 + +EPOCH: 80 +Loss at step 0: 0.03640799969434738 +Loss at step 50: 0.03698626905679703 +Loss at step 100: 0.036901723593473434 +Loss at step 150: 0.02941797859966755 +Loss at step 200: 0.05399346724152565 +Loss at step 250: 0.038527294993400574 +Loss at step 300: 0.03790619969367981 +Loss at step 350: 0.03662220761179924 +Loss at step 400: 0.034307632595300674 +Loss at step 450: 0.04109351709485054 +Loss at step 500: 0.03656264394521713 +Loss at step 550: 0.033020373433828354 +Loss at step 600: 0.031626660376787186 +Loss at step 650: 0.037163589149713516 +Loss at step 700: 0.03343282267451286 +Loss at step 750: 0.03530639782547951 +Loss at step 800: 0.07811196148395538 +Loss at step 850: 0.037117309868335724 +Loss at step 900: 0.036749448627233505 +Mean training loss after epoch 80: 0.04114491054252076 + +EPOCH: 81 +Loss at step 0: 0.05479753762483597 +Loss at step 50: 0.03132958337664604 +Loss at step 100: 0.037393227219581604 +Loss at step 150: 0.03205867111682892 +Loss at step 200: 0.07083668559789658 +Loss at step 250: 0.04586269333958626 +Loss at step 300: 0.060223255306482315 +Loss at step 350: 0.03582262247800827 +Loss at step 400: 0.033127203583717346 +Loss at step 450: 0.02675713784992695 +Loss at step 500: 0.035183269530534744 +Loss at step 550: 0.034161537885665894 +Loss at step 600: 0.03505062684416771 +Loss at step 650: 0.05540237948298454 +Loss at step 700: 0.03116615302860737 +Loss at step 750: 0.028590409085154533 +Loss at step 800: 0.03489331901073456 +Loss at step 850: 0.03400343284010887 +Loss at step 900: 0.06202857568860054 +Mean training loss after epoch 81: 0.04102237092684517 + +EPOCH: 82 +Loss at step 0: 0.03725624457001686 +Loss at step 50: 0.03783930093050003 +Loss at step 100: 0.03537292778491974 +Loss at step 150: 0.04294422268867493 +Loss at step 200: 0.03996248543262482 +Loss at step 250: 0.039540305733680725 +Loss at step 300: 0.033119600266218185 +Loss at step 350: 0.048681121319532394 +Loss at step 400: 0.05702071264386177 +Loss at step 450: 0.0402117483317852 +Loss at step 500: 0.040380872786045074 +Loss at step 550: 0.05610261484980583 +Loss at step 600: 0.035563670098781586 +Loss at step 650: 0.03173322603106499 +Loss at step 700: 0.03283904865384102 +Loss at step 750: 0.04451049864292145 +Loss at step 800: 0.03426874428987503 +Loss at step 850: 0.04587911069393158 +Loss at step 900: 0.039216335862874985 +Mean training loss after epoch 82: 0.041730244616185554 + +EPOCH: 83 +Loss at step 0: 0.04744863510131836 +Loss at step 50: 0.03610475733876228 +Loss at step 100: 0.035557087510824203 +Loss at step 150: 0.03861255571246147 +Loss at step 200: 0.030603276565670967 +Loss at step 250: 0.04871303215622902 +Loss at step 300: 0.07224379479885101 +Loss at step 350: 0.03971085324883461 +Loss at step 400: 0.03391997143626213 +Loss at step 450: 0.044155556708574295 +Loss at step 500: 0.034668054431676865 +Loss at step 550: 0.05175238102674484 +Loss at step 600: 0.049534719437360764 +Loss at step 650: 0.03303040564060211 +Loss at step 700: 0.029844136908650398 +Loss at step 750: 0.04341820254921913 +Loss at step 800: 0.03215205669403076 +Loss at step 850: 0.03988838940858841 +Loss at step 900: 0.05346183106303215 +Mean training loss after epoch 83: 0.041038075031073235 + +EPOCH: 84 +Loss at step 0: 0.0528486967086792 +Loss at step 50: 0.051950205117464066 +Loss at step 100: 0.040812257677316666 +Loss at step 150: 0.033961568027734756 +Loss at step 200: 0.048738449811935425 +Loss at step 250: 0.0713721364736557 +Loss at step 300: 0.031749922782182693 +Loss at step 350: 0.0486324168741703 +Loss at step 400: 0.04092682898044586 +Loss at step 450: 0.045225370675325394 +Loss at step 500: 0.03676753491163254 +Loss at step 550: 0.033339500427246094 +Loss at step 600: 0.04189130291342735 +Loss at step 650: 0.045919470489025116 +Loss at step 700: 0.03852545842528343 +Loss at step 750: 0.05126319080591202 +Loss at step 800: 0.052123866975307465 +Loss at step 850: 0.03563039004802704 +Loss at step 900: 0.032997358590364456 +Mean training loss after epoch 84: 0.041086105345837724 + +EPOCH: 85 +Loss at step 0: 0.058152444660663605 +Loss at step 50: 0.038068030029535294 +Loss at step 100: 0.03797232359647751 +Loss at step 150: 0.04052959382534027 +Loss at step 200: 0.050743792206048965 +Loss at step 250: 0.03145845606923103 +Loss at step 300: 0.03338661044836044 +Loss at step 350: 0.03677058219909668 +Loss at step 400: 0.03495321795344353 +Loss at step 450: 0.039931200444698334 +Loss at step 500: 0.03238952159881592 +Loss at step 550: 0.02847830392420292 +Loss at step 600: 0.05000064894556999 +Loss at step 650: 0.0393374003469944 +Loss at step 700: 0.03686242923140526 +Loss at step 750: 0.04062030091881752 +Loss at step 800: 0.0347592793405056 +Loss at step 850: 0.031245408579707146 +Loss at step 900: 0.038717370480298996 +Mean training loss after epoch 85: 0.04107632083909662 + +EPOCH: 86 +Loss at step 0: 0.03973394259810448 +Loss at step 50: 0.030584797263145447 +Loss at step 100: 0.06259417533874512 +Loss at step 150: 0.03153851255774498 +Loss at step 200: 0.055176712572574615 +Loss at step 250: 0.038454148918390274 +Loss at step 300: 0.04394076392054558 +Loss at step 350: 0.03507055342197418 +Loss at step 400: 0.03341430053114891 +Loss at step 450: 0.04914618283510208 +Loss at step 500: 0.04736623913049698 +Loss at step 550: 0.03985471650958061 +Loss at step 600: 0.03954179957509041 +Loss at step 650: 0.03194885700941086 +Loss at step 700: 0.035082027316093445 +Loss at step 750: 0.0482264868915081 +Loss at step 800: 0.03496174141764641 +Loss at step 850: 0.03850436955690384 +Loss at step 900: 0.057177163660526276 +Mean training loss after epoch 86: 0.04068803318194362 + +EPOCH: 87 +Loss at step 0: 0.04399581253528595 +Loss at step 50: 0.03488341346383095 +Loss at step 100: 0.03428840637207031 +Loss at step 150: 0.04180179163813591 +Loss at step 200: 0.03338738903403282 +Loss at step 250: 0.03731721267104149 +Loss at step 300: 0.03757598251104355 +Loss at step 350: 0.034594327211380005 +Loss at step 400: 0.036474548280239105 +Loss at step 450: 0.04442448914051056 +Loss at step 500: 0.027858780696988106 +Loss at step 550: 0.05683402344584465 +Loss at step 600: 0.03499235212802887 +Loss at step 650: 0.05560460314154625 +Loss at step 700: 0.036094002425670624 +Loss at step 750: 0.046759847551584244 +Loss at step 800: 0.04085097834467888 +Loss at step 850: 0.027367902919650078 +Loss at step 900: 0.034118689596652985 +Mean training loss after epoch 87: 0.04076247038975009 + +EPOCH: 88 +Loss at step 0: 0.05531320720911026 +Loss at step 50: 0.03014991246163845 +Loss at step 100: 0.05170894414186478 +Loss at step 150: 0.04032305255532265 +Loss at step 200: 0.03658030927181244 +Loss at step 250: 0.030780110508203506 +Loss at step 300: 0.034393101930618286 +Loss at step 350: 0.03057672642171383 +Loss at step 400: 0.04050949215888977 +Loss at step 450: 0.0360785648226738 +Loss at step 500: 0.03719230368733406 +Loss at step 550: 0.03799004480242729 +Loss at step 600: 0.03628097102046013 +Loss at step 650: 0.03763250261545181 +Loss at step 700: 0.036424919962882996 +Loss at step 750: 0.04473429173231125 +Loss at step 800: 0.04693128913640976 +Loss at step 850: 0.04265372082591057 +Loss at step 900: 0.031387049704790115 +Mean training loss after epoch 88: 0.04099947016146074 + +EPOCH: 89 +Loss at step 0: 0.034374941140413284 +Loss at step 50: 0.0402178056538105 +Loss at step 100: 0.03843294084072113 +Loss at step 150: 0.036413002759218216 +Loss at step 200: 0.039501357823610306 +Loss at step 250: 0.029510509222745895 +Loss at step 300: 0.036525655537843704 +Loss at step 350: 0.04213511198759079 +Loss at step 400: 0.037092261016368866 +Loss at step 450: 0.03379799425601959 +Loss at step 500: 0.03295033797621727 +Loss at step 550: 0.031381383538246155 +Loss at step 600: 0.0407857745885849 +Loss at step 650: 0.03752834349870682 +Loss at step 700: 0.03810695558786392 +Loss at step 750: 0.03579120337963104 +Loss at step 800: 0.0520140565931797 +Loss at step 850: 0.038909491151571274 +Loss at step 900: 0.03678249195218086 +Mean training loss after epoch 89: 0.040811247470329944 + +EPOCH: 90 +Loss at step 0: 0.04095301404595375 +Loss at step 50: 0.05082303658127785 +Loss at step 100: 0.04138334468007088 +Loss at step 150: 0.03449038416147232 +Loss at step 200: 0.038363419473171234 +Loss at step 250: 0.04477785900235176 +Loss at step 300: 0.03956672176718712 +Loss at step 350: 0.04098595306277275 +Loss at step 400: 0.03981344401836395 +Loss at step 450: 0.06300228089094162 +Loss at step 500: 0.051460955291986465 +Loss at step 550: 0.03568987548351288 +Loss at step 600: 0.039645079523324966 +Loss at step 650: 0.037908464670181274 +Loss at step 700: 0.03197165206074715 +Loss at step 750: 0.031707290560007095 +Loss at step 800: 0.049262162297964096 +Loss at step 850: 0.0344785712659359 +Loss at step 900: 0.034114595502614975 +Mean training loss after epoch 90: 0.040405405840195065 + +EPOCH: 91 +Loss at step 0: 0.034299690276384354 +Loss at step 50: 0.049692295491695404 +Loss at step 100: 0.04133080318570137 +Loss at step 150: 0.03282630443572998 +Loss at step 200: 0.03252938389778137 +Loss at step 250: 0.05053752288222313 +Loss at step 300: 0.0312495119869709 +Loss at step 350: 0.03907566890120506 +Loss at step 400: 0.0547638013958931 +Loss at step 450: 0.036120086908340454 +Loss at step 500: 0.04640704020857811 +Loss at step 550: 0.038280077278614044 +Loss at step 600: 0.031406719237565994 +Loss at step 650: 0.034109827131032944 +Loss at step 700: 0.037395015358924866 +Loss at step 750: 0.03358292579650879 +Loss at step 800: 0.04851951450109482 +Loss at step 850: 0.03339794650673866 +Loss at step 900: 0.035246942192316055 +Mean training loss after epoch 91: 0.04033133036085665 + +EPOCH: 92 +Loss at step 0: 0.032269254326820374 +Loss at step 50: 0.031161149963736534 +Loss at step 100: 0.060024768114089966 +Loss at step 150: 0.05082898959517479 +Loss at step 200: 0.050425995141267776 +Loss at step 250: 0.04740491509437561 +Loss at step 300: 0.054153922945261 +Loss at step 350: 0.03820345178246498 +Loss at step 400: 0.03505898267030716 +Loss at step 450: 0.03790952265262604 +Loss at step 500: 0.033213060349226 +Loss at step 550: 0.03766785189509392 +Loss at step 600: 0.03966356813907623 +Loss at step 650: 0.03682432696223259 +Loss at step 700: 0.03281429782509804 +Loss at step 750: 0.04580431431531906 +Loss at step 800: 0.04174647107720375 +Loss at step 850: 0.052905965596437454 +Loss at step 900: 0.03535788506269455 +Mean training loss after epoch 92: 0.04086645406438534 + +EPOCH: 93 +Loss at step 0: 0.031763963401317596 +Loss at step 50: 0.04610699787735939 +Loss at step 100: 0.03144437074661255 +Loss at step 150: 0.036727018654346466 +Loss at step 200: 0.029585840180516243 +Loss at step 250: 0.042516931891441345 +Loss at step 300: 0.04100366309285164 +Loss at step 350: 0.0635860487818718 +Loss at step 400: 0.03528447076678276 +Loss at step 450: 0.04262498766183853 +Loss at step 500: 0.04326464980840683 +Loss at step 550: 0.06723412871360779 +Loss at step 600: 0.03085038810968399 +Loss at step 650: 0.03192776069045067 +Loss at step 700: 0.03921186178922653 +Loss at step 750: 0.03242463245987892 +Loss at step 800: 0.054762352257966995 +Loss at step 850: 0.035244882106781006 +Loss at step 900: 0.03946463763713837 +Mean training loss after epoch 93: 0.04065965882329735 + +EPOCH: 94 +Loss at step 0: 0.04889944940805435 +Loss at step 50: 0.03690377622842789 +Loss at step 100: 0.03591502830386162 +Loss at step 150: 0.052436813712120056 +Loss at step 200: 0.034925758838653564 +Loss at step 250: 0.0353705957531929 +Loss at step 300: 0.061986085027456284 +Loss at step 350: 0.04875928536057472 +Loss at step 400: 0.040921784937381744 +Loss at step 450: 0.03871002793312073 +Loss at step 500: 0.03833552077412605 +Loss at step 550: 0.036446262151002884 +Loss at step 600: 0.039174746721982956 +Loss at step 650: 0.03723873943090439 +Loss at step 700: 0.03249559924006462 +Loss at step 750: 0.033896006643772125 +Loss at step 800: 0.03666757792234421 +Loss at step 850: 0.03137712925672531 +Loss at step 900: 0.051495201885700226 +Mean training loss after epoch 94: 0.04074181405418336 + +EPOCH: 95 +Loss at step 0: 0.0352119505405426 +Loss at step 50: 0.03640684485435486 +Loss at step 100: 0.032651618123054504 +Loss at step 150: 0.04401347041130066 +Loss at step 200: 0.040212228894233704 +Loss at step 250: 0.07343073934316635 +Loss at step 300: 0.03445030748844147 +Loss at step 350: 0.05565089359879494 +Loss at step 400: 0.03517284616827965 +Loss at step 450: 0.03394188731908798 +Loss at step 500: 0.032530587166547775 +Loss at step 550: 0.05683201923966408 +Loss at step 600: 0.03782414272427559 +Loss at step 650: 0.03312551975250244 +Loss at step 700: 0.03550421819090843 +Loss at step 750: 0.051708586513996124 +Loss at step 800: 0.04553850367665291 +Loss at step 850: 0.05390772596001625 +Loss at step 900: 0.029858194291591644 +Mean training loss after epoch 95: 0.040436128432403746 + +EPOCH: 96 +Loss at step 0: 0.03860674425959587 +Loss at step 50: 0.04928284138441086 +Loss at step 100: 0.030112681910395622 +Loss at step 150: 0.030009303241968155 +Loss at step 200: 0.03227289393544197 +Loss at step 250: 0.035471826791763306 +Loss at step 300: 0.045503050088882446 +Loss at step 350: 0.03439325466752052 +Loss at step 400: 0.03252046927809715 +Loss at step 450: 0.041864316910505295 +Loss at step 500: 0.06249577924609184 +Loss at step 550: 0.032730989158153534 +Loss at step 600: 0.050127070397138596 +Loss at step 650: 0.05265422537922859 +Loss at step 700: 0.033094629645347595 +Loss at step 750: 0.03569718822836876 +Loss at step 800: 0.03472054749727249 +Loss at step 850: 0.048938579857349396 +Loss at step 900: 0.04076386243104935 +Mean training loss after epoch 96: 0.0409784188537773 + +EPOCH: 97 +Loss at step 0: 0.039326950907707214 +Loss at step 50: 0.032597895711660385 +Loss at step 100: 0.06816505640745163 +Loss at step 150: 0.03475397452712059 +Loss at step 200: 0.05419183522462845 +Loss at step 250: 0.0331057570874691 +Loss at step 300: 0.04599350318312645 +Loss at step 350: 0.031561993062496185 +Loss at step 400: 0.03588414564728737 +Loss at step 450: 0.044904645532369614 +Loss at step 500: 0.03464886173605919 +Loss at step 550: 0.041914213448762894 +Loss at step 600: 0.03834737092256546 +Loss at step 650: 0.051248062402009964 +Loss at step 700: 0.03469805046916008 +Loss at step 750: 0.03618532046675682 +Loss at step 800: 0.0414600595831871 +Loss at step 850: 0.03126781806349754 +Loss at step 900: 0.04621324688196182 +Mean training loss after epoch 97: 0.04076281934778001 + +EPOCH: 98 +Loss at step 0: 0.03838682174682617 +Loss at step 50: 0.05533002316951752 +Loss at step 100: 0.030290279537439346 +Loss at step 150: 0.05067134276032448 +Loss at step 200: 0.04985364153981209 +Loss at step 250: 0.06334389001131058 +Loss at step 300: 0.05746433138847351 +Loss at step 350: 0.031397782266139984 +Loss at step 400: 0.03370814770460129 +Loss at step 450: 0.034410182386636734 +Loss at step 500: 0.05077733099460602 +Loss at step 550: 0.03374994918704033 +Loss at step 600: 0.042185988277196884 +Loss at step 650: 0.03396277129650116 +Loss at step 700: 0.036716412752866745 +Loss at step 750: 0.03274477645754814 +Loss at step 800: 0.033424075692892075 +Loss at step 850: 0.04032216593623161 +Loss at step 900: 0.03501669317483902 +Mean training loss after epoch 98: 0.040670416941409555 + +EPOCH: 99 +Loss at step 0: 0.03827624395489693 +Loss at step 50: 0.048487599939107895 +Loss at step 100: 0.049376241862773895 +Loss at step 150: 0.0495765246450901 +Loss at step 200: 0.031661536544561386 +Loss at step 250: 0.03776783123612404 +Loss at step 300: 0.0347079262137413 +Loss at step 350: 0.05254741013050079 +Loss at step 400: 0.05152810364961624 +Loss at step 450: 0.035800751298666 +Loss at step 500: 0.06741048395633698 +Loss at step 550: 0.03415104001760483 +Loss at step 600: 0.03682698681950569 +Loss at step 650: 0.03967992216348648 +Loss at step 700: 0.027164705097675323 +Loss at step 750: 0.050535522401332855 +Loss at step 800: 0.02985253557562828 +Loss at step 850: 0.038912225514650345 +Loss at step 900: 0.035812925547361374 +Mean training loss after epoch 99: 0.04073020558891647 + +EPOCH: 100 +Loss at step 0: 0.032607365399599075 +Loss at step 50: 0.06099787354469299 +Loss at step 100: 0.05005550757050514 +Loss at step 150: 0.03425469622015953 +Loss at step 200: 0.04151573032140732 +Loss at step 250: 0.048218220472335815 +Loss at step 300: 0.03169530630111694 +Loss at step 350: 0.028787700459361076 +Loss at step 400: 0.029403982684016228 +Loss at step 450: 0.03593473508954048 +Loss at step 500: 0.046153709292411804 +Loss at step 550: 0.03828473761677742 +Loss at step 600: 0.03990305960178375 +Loss at step 650: 0.062365055084228516 +Loss at step 700: 0.038013067096471786 +Loss at step 750: 0.033276695758104324 +Loss at step 800: 0.03590114414691925 +Loss at step 850: 0.040111057460308075 +Loss at step 900: 0.04882941395044327 +Mean training loss after epoch 100: 0.04036760972792914 + +EPOCH: 101 +Loss at step 0: 0.03448909521102905 +Loss at step 50: 0.03724827989935875 +Loss at step 100: 0.06898139417171478 +Loss at step 150: 0.02924741432070732 +Loss at step 200: 0.03393682464957237 +Loss at step 250: 0.03785952553153038 +Loss at step 300: 0.03147255256772041 +Loss at step 350: 0.0666293352842331 +Loss at step 400: 0.05227957293391228 +Loss at step 450: 0.032412849366664886 +Loss at step 500: 0.028628254309296608 +Loss at step 550: 0.03578814119100571 +Loss at step 600: 0.052394889295101166 +Loss at step 650: 0.0314219631254673 +Loss at step 700: 0.03494124859571457 +Loss at step 750: 0.03220677748322487 +Loss at step 800: 0.0704156830906868 +Loss at step 850: 0.038153473287820816 +Loss at step 900: 0.04836264252662659 +Mean training loss after epoch 101: 0.04046514103494918 + +EPOCH: 102 +Loss at step 0: 0.05540713295340538 +Loss at step 50: 0.06708371639251709 +Loss at step 100: 0.038608841598033905 +Loss at step 150: 0.03573644906282425 +Loss at step 200: 0.039247605949640274 +Loss at step 250: 0.032899416983127594 +Loss at step 300: 0.03535171225667 +Loss at step 350: 0.03774742782115936 +Loss at step 400: 0.03619568049907684 +Loss at step 450: 0.032232049852609634 +Loss at step 500: 0.05181793496012688 +Loss at step 550: 0.03434407338500023 +Loss at step 600: 0.03519069403409958 +Loss at step 650: 0.036724913865327835 +Loss at step 700: 0.038438115268945694 +Loss at step 750: 0.044145528227090836 +Loss at step 800: 0.046652257442474365 +Loss at step 850: 0.0551580972969532 +Loss at step 900: 0.03391639143228531 +Mean training loss after epoch 102: 0.04041277822742521 + +EPOCH: 103 +Loss at step 0: 0.03112451732158661 +Loss at step 50: 0.04157055541872978 +Loss at step 100: 0.047702889889478683 +Loss at step 150: 0.03303459286689758 +Loss at step 200: 0.03123403526842594 +Loss at step 250: 0.046796660870313644 +Loss at step 300: 0.06682164967060089 +Loss at step 350: 0.030688513070344925 +Loss at step 400: 0.04190076142549515 +Loss at step 450: 0.05001092329621315 +Loss at step 500: 0.03804301470518112 +Loss at step 550: 0.058865346014499664 +Loss at step 600: 0.05053896829485893 +Loss at step 650: 0.03335892781615257 +Loss at step 700: 0.05287832021713257 +Loss at step 750: 0.05739011988043785 +Loss at step 800: 0.03445238992571831 +Loss at step 850: 0.061511073261499405 +Loss at step 900: 0.029992185533046722 +Mean training loss after epoch 103: 0.04018082712759087 + +EPOCH: 104 +Loss at step 0: 0.05760432034730911 +Loss at step 50: 0.05041252821683884 +Loss at step 100: 0.027177143841981888 +Loss at step 150: 0.039961639791727066 +Loss at step 200: 0.0325581319630146 +Loss at step 250: 0.05665874481201172 +Loss at step 300: 0.03727357089519501 +Loss at step 350: 0.028202036395668983 +Loss at step 400: 0.03974505886435509 +Loss at step 450: 0.03842462599277496 +Loss at step 500: 0.03134937211871147 +Loss at step 550: 0.04020616412162781 +Loss at step 600: 0.04058777168393135 +Loss at step 650: 0.03416196629405022 +Loss at step 700: 0.06070096045732498 +Loss at step 750: 0.03729524463415146 +Loss at step 800: 0.04380563274025917 +Loss at step 850: 0.03157120570540428 +Loss at step 900: 0.03815029188990593 +Mean training loss after epoch 104: 0.040242427686002974 + +EPOCH: 105 +Loss at step 0: 0.0319080725312233 +Loss at step 50: 0.03261766955256462 +Loss at step 100: 0.04951752722263336 +Loss at step 150: 0.031716614961624146 +Loss at step 200: 0.046612419188022614 +Loss at step 250: 0.0327620655298233 +Loss at step 300: 0.042399682104587555 +Loss at step 350: 0.04419102892279625 +Loss at step 400: 0.032193947583436966 +Loss at step 450: 0.03805965930223465 +Loss at step 500: 0.049167387187480927 +Loss at step 550: 0.03526442497968674 +Loss at step 600: 0.034376244992017746 +Loss at step 650: 0.0354459322988987 +Loss at step 700: 0.038397882133722305 +Loss at step 750: 0.05540142208337784 +Loss at step 800: 0.030147867277264595 +Loss at step 850: 0.060497745871543884 +Loss at step 900: 0.041193459182977676 +Mean training loss after epoch 105: 0.04030951934614416 + +EPOCH: 106 +Loss at step 0: 0.03829112648963928 +Loss at step 50: 0.026048794388771057 +Loss at step 100: 0.0357942208647728 +Loss at step 150: 0.05229229852557182 +Loss at step 200: 0.04837630316615105 +Loss at step 250: 0.051231976598501205 +Loss at step 300: 0.04065598174929619 +Loss at step 350: 0.03193920850753784 +Loss at step 400: 0.0320788212120533 +Loss at step 450: 0.05464372783899307 +Loss at step 500: 0.026041459292173386 +Loss at step 550: 0.035844311118125916 +Loss at step 600: 0.029609395191073418 +Loss at step 650: 0.03459584712982178 +Loss at step 700: 0.036759935319423676 +Loss at step 750: 0.0362151637673378 +Loss at step 800: 0.0336429588496685 +Loss at step 850: 0.046769093722105026 +Loss at step 900: 0.03425263985991478 +Mean training loss after epoch 106: 0.04064369485822759 + +EPOCH: 107 +Loss at step 0: 0.07431833446025848 +Loss at step 50: 0.0602848082780838 +Loss at step 100: 0.029872529208660126 +Loss at step 150: 0.035330332815647125 +Loss at step 200: 0.045894455164670944 +Loss at step 250: 0.030223330482840538 +Loss at step 300: 0.036641091108322144 +Loss at step 350: 0.0410873144865036 +Loss at step 400: 0.044177498668432236 +Loss at step 450: 0.03326188400387764 +Loss at step 500: 0.040481384843587875 +Loss at step 550: 0.04043734073638916 +Loss at step 600: 0.03910863399505615 +Loss at step 650: 0.04387463256716728 +Loss at step 700: 0.03435670956969261 +Loss at step 750: 0.0295183677226305 +Loss at step 800: 0.03602305427193642 +Loss at step 850: 0.02967149391770363 +Loss at step 900: 0.03301510587334633 +Mean training loss after epoch 107: 0.040982513295323736 + +EPOCH: 108 +Loss at step 0: 0.05611558258533478 +Loss at step 50: 0.035307254642248154 +Loss at step 100: 0.02783018909394741 +Loss at step 150: 0.039593588560819626 +Loss at step 200: 0.06363610923290253 +Loss at step 250: 0.03429553285241127 +Loss at step 300: 0.03413379192352295 +Loss at step 350: 0.028662709519267082 +Loss at step 400: 0.039821330457925797 +Loss at step 450: 0.03959920257329941 +Loss at step 500: 0.036259181797504425 +Loss at step 550: 0.05385353043675423 +Loss at step 600: 0.026031946763396263 +Loss at step 650: 0.03385019302368164 +Loss at step 700: 0.028812086209654808 +Loss at step 750: 0.03397990018129349 +Loss at step 800: 0.03166824206709862 +Loss at step 850: 0.0317801870405674 +Loss at step 900: 0.03626967966556549 +Mean training loss after epoch 108: 0.040677893166142357 + +EPOCH: 109 +Loss at step 0: 0.04203055799007416 +Loss at step 50: 0.028228793293237686 +Loss at step 100: 0.04802173748612404 +Loss at step 150: 0.0384133942425251 +Loss at step 200: 0.0397668182849884 +Loss at step 250: 0.03989081457257271 +Loss at step 300: 0.03930295258760452 +Loss at step 350: 0.03303258866071701 +Loss at step 400: 0.05263761058449745 +Loss at step 450: 0.03365996852517128 +Loss at step 500: 0.05358096584677696 +Loss at step 550: 0.04638715088367462 +Loss at step 600: 0.03853611648082733 +Loss at step 650: 0.03495076298713684 +Loss at step 700: 0.06538189202547073 +Loss at step 750: 0.028637005016207695 +Loss at step 800: 0.035671524703502655 +Loss at step 850: 0.038928259164094925 +Loss at step 900: 0.030003154650330544 +Mean training loss after epoch 109: 0.040187274618173584 + +EPOCH: 110 +Loss at step 0: 0.03870801627635956 +Loss at step 50: 0.03549281880259514 +Loss at step 100: 0.05035026744008064 +Loss at step 150: 0.04582242667675018 +Loss at step 200: 0.03825569152832031 +Loss at step 250: 0.030541272833943367 +Loss at step 300: 0.039547670632600784 +Loss at step 350: 0.033150963485240936 +Loss at step 400: 0.033019401133060455 +Loss at step 450: 0.03229507431387901 +Loss at step 500: 0.03577958792448044 +Loss at step 550: 0.03508676588535309 +Loss at step 600: 0.032196156680583954 +Loss at step 650: 0.039787814021110535 +Loss at step 700: 0.05168083682656288 +Loss at step 750: 0.04113023355603218 +Loss at step 800: 0.03392178192734718 +Loss at step 850: 0.029219821095466614 +Loss at step 900: 0.04385785758495331 +Mean training loss after epoch 110: 0.04062215005283925 + +EPOCH: 111 +Loss at step 0: 0.0526747927069664 +Loss at step 50: 0.038879457861185074 +Loss at step 100: 0.03004498779773712 +Loss at step 150: 0.03682366758584976 +Loss at step 200: 0.03794126212596893 +Loss at step 250: 0.0487305149435997 +Loss at step 300: 0.04967457801103592 +Loss at step 350: 0.035163119435310364 +Loss at step 400: 0.03427344933152199 +Loss at step 450: 0.03088877536356449 +Loss at step 500: 0.03169834613800049 +Loss at step 550: 0.0374300479888916 +Loss at step 600: 0.033706407994031906 +Loss at step 650: 0.035374246537685394 +Loss at step 700: 0.039848484098911285 +Loss at step 750: 0.06113433837890625 +Loss at step 800: 0.04747779294848442 +Loss at step 850: 0.03599917143583298 +Loss at step 900: 0.03615652397274971 +Mean training loss after epoch 111: 0.039936521320915554 + +EPOCH: 112 +Loss at step 0: 0.0373336486518383 +Loss at step 50: 0.05114150419831276 +Loss at step 100: 0.05242919921875 +Loss at step 150: 0.03499115630984306 +Loss at step 200: 0.04813041538000107 +Loss at step 250: 0.03306647390127182 +Loss at step 300: 0.0505673922598362 +Loss at step 350: 0.03386365622282028 +Loss at step 400: 0.03137662261724472 +Loss at step 450: 0.03400233015418053 +Loss at step 500: 0.03519631177186966 +Loss at step 550: 0.04086479917168617 +Loss at step 600: 0.03908710181713104 +Loss at step 650: 0.05335430055856705 +Loss at step 700: 0.036864008754491806 +Loss at step 750: 0.051715828478336334 +Loss at step 800: 0.051827117800712585 +Loss at step 850: 0.03942806273698807 +Loss at step 900: 0.04879195615649223 +Mean training loss after epoch 112: 0.04063963858104909 + +EPOCH: 113 +Loss at step 0: 0.03795037046074867 +Loss at step 50: 0.0358436293900013 +Loss at step 100: 0.04843265190720558 +Loss at step 150: 0.05632951855659485 +Loss at step 200: 0.03601030632853508 +Loss at step 250: 0.03268331661820412 +Loss at step 300: 0.038877252489328384 +Loss at step 350: 0.045339521020650864 +Loss at step 400: 0.032848652452230453 +Loss at step 450: 0.04735710099339485 +Loss at step 500: 0.04660831019282341 +Loss at step 550: 0.05198223516345024 +Loss at step 600: 0.04441724345088005 +Loss at step 650: 0.05480624735355377 +Loss at step 700: 0.06743644922971725 +Loss at step 750: 0.03541141003370285 +Loss at step 800: 0.03227515518665314 +Loss at step 850: 0.03646775335073471 +Loss at step 900: 0.03438189998269081 +Mean training loss after epoch 113: 0.04005539193471421 + +EPOCH: 114 +Loss at step 0: 0.036579687148332596 +Loss at step 50: 0.03507820516824722 +Loss at step 100: 0.029598532244563103 +Loss at step 150: 0.03884422406554222 +Loss at step 200: 0.05779637023806572 +Loss at step 250: 0.04017007723450661 +Loss at step 300: 0.04302921146154404 +Loss at step 350: 0.033810097724199295 +Loss at step 400: 0.03218913450837135 +Loss at step 450: 0.03484483063220978 +Loss at step 500: 0.05256728455424309 +Loss at step 550: 0.032125137746334076 +Loss at step 600: 0.034768857061862946 +Loss at step 650: 0.036406390368938446 +Loss at step 700: 0.05086382478475571 +Loss at step 750: 0.03705102577805519 +Loss at step 800: 0.03796803578734398 +Loss at step 850: 0.037255316972732544 +Loss at step 900: 0.03930399939417839 +Mean training loss after epoch 114: 0.03997340493960612 + +EPOCH: 115 +Loss at step 0: 0.03761971741914749 +Loss at step 50: 0.02730981633067131 +Loss at step 100: 0.029183940961956978 +Loss at step 150: 0.029592841863632202 +Loss at step 200: 0.06739065796136856 +Loss at step 250: 0.038498494774103165 +Loss at step 300: 0.0405089445412159 +Loss at step 350: 0.036972276866436005 +Loss at step 400: 0.034451935440301895 +Loss at step 450: 0.045539602637290955 +Loss at step 500: 0.03423967957496643 +Loss at step 550: 0.03492945432662964 +Loss at step 600: 0.04334418848156929 +Loss at step 650: 0.03467882424592972 +Loss at step 700: 0.03075747936964035 +Loss at step 750: 0.038405630737543106 +Loss at step 800: 0.05285104736685753 +Loss at step 850: 0.03415987268090248 +Loss at step 900: 0.03387216106057167 +Mean training loss after epoch 115: 0.04025659561435233 + +EPOCH: 116 +Loss at step 0: 0.036216430366039276 +Loss at step 50: 0.05867926776409149 +Loss at step 100: 0.04880305752158165 +Loss at step 150: 0.024054808542132378 +Loss at step 200: 0.03405758738517761 +Loss at step 250: 0.0369703583419323 +Loss at step 300: 0.043173935264348984 +Loss at step 350: 0.03779338672757149 +Loss at step 400: 0.0379754900932312 +Loss at step 450: 0.04397716745734215 +Loss at step 500: 0.02539883553981781 +Loss at step 550: 0.03261855989694595 +Loss at step 600: 0.036792535334825516 +Loss at step 650: 0.03261512145400047 +Loss at step 700: 0.048828668892383575 +Loss at step 750: 0.03268284723162651 +Loss at step 800: 0.05022961646318436 +Loss at step 850: 0.04113951325416565 +Loss at step 900: 0.03498257324099541 +Mean training loss after epoch 116: 0.04056224513894269 + +EPOCH: 117 +Loss at step 0: 0.0348486453294754 +Loss at step 50: 0.04878906533122063 +Loss at step 100: 0.044236086308956146 +Loss at step 150: 0.03162962570786476 +Loss at step 200: 0.03578827157616615 +Loss at step 250: 0.030955882743000984 +Loss at step 300: 0.03401421010494232 +Loss at step 350: 0.03497142344713211 +Loss at step 400: 0.035778697580099106 +Loss at step 450: 0.03437361121177673 +Loss at step 500: 0.04439618065953255 +Loss at step 550: 0.03746693581342697 +Loss at step 600: 0.034327432513237 +Loss at step 650: 0.033807624131441116 +Loss at step 700: 0.03443426638841629 +Loss at step 750: 0.031515758484601974 +Loss at step 800: 0.03196234628558159 +Loss at step 850: 0.037271417677402496 +Loss at step 900: 0.030908746644854546 +Mean training loss after epoch 117: 0.03997806832194328 + +EPOCH: 118 +Loss at step 0: 0.06313231587409973 +Loss at step 50: 0.06051144748926163 +Loss at step 100: 0.04856443777680397 +Loss at step 150: 0.03401831164956093 +Loss at step 200: 0.03721427172422409 +Loss at step 250: 0.03343229740858078 +Loss at step 300: 0.03522949293255806 +Loss at step 350: 0.04258454963564873 +Loss at step 400: 0.06014687940478325 +Loss at step 450: 0.03544767573475838 +Loss at step 500: 0.039289381355047226 +Loss at step 550: 0.0608576163649559 +Loss at step 600: 0.03271527960896492 +Loss at step 650: 0.04725121706724167 +Loss at step 700: 0.04453769326210022 +Loss at step 750: 0.02915090322494507 +Loss at step 800: 0.05556664988398552 +Loss at step 850: 0.02730882354080677 +Loss at step 900: 0.030028440058231354 +Mean training loss after epoch 118: 0.04058234326279303 + +EPOCH: 119 +Loss at step 0: 0.037382058799266815 +Loss at step 50: 0.05149533599615097 +Loss at step 100: 0.03135047107934952 +Loss at step 150: 0.030940087512135506 +Loss at step 200: 0.03823164850473404 +Loss at step 250: 0.03651123121380806 +Loss at step 300: 0.03233299031853676 +Loss at step 350: 0.036667048931121826 +Loss at step 400: 0.03249666467308998 +Loss at step 450: 0.03559266775846481 +Loss at step 500: 0.0458882637321949 +Loss at step 550: 0.03821825981140137 +Loss at step 600: 0.033053841441869736 +Loss at step 650: 0.03599870204925537 +Loss at step 700: 0.029937120154500008 +Loss at step 750: 0.03885503485798836 +Loss at step 800: 0.03379277139902115 +Loss at step 850: 0.04492877423763275 +Loss at step 900: 0.04187827929854393 +Mean training loss after epoch 119: 0.04040607897195417 + +EPOCH: 120 +Loss at step 0: 0.028167858719825745 +Loss at step 50: 0.04726026579737663 +Loss at step 100: 0.0361696258187294 +Loss at step 150: 0.030510244891047478 +Loss at step 200: 0.0330553762614727 +Loss at step 250: 0.03535635769367218 +Loss at step 300: 0.05735327675938606 +Loss at step 350: 0.03452423959970474 +Loss at step 400: 0.04450173303484917 +Loss at step 450: 0.03576704487204552 +Loss at step 500: 0.046488843858242035 +Loss at step 550: 0.04781826585531235 +Loss at step 600: 0.04467238113284111 +Loss at step 650: 0.04754528030753136 +Loss at step 700: 0.043728072196245193 +Loss at step 750: 0.04102247580885887 +Loss at step 800: 0.054334353655576706 +Loss at step 850: 0.04936356842517853 +Loss at step 900: 0.044593654572963715 +Mean training loss after epoch 120: 0.03998779853198256 + +EPOCH: 121 +Loss at step 0: 0.05285077169537544 +Loss at step 50: 0.028145067393779755 +Loss at step 100: 0.029211612418293953 +Loss at step 150: 0.04488343000411987 +Loss at step 200: 0.029937224462628365 +Loss at step 250: 0.05586611479520798 +Loss at step 300: 0.047047797590494156 +Loss at step 350: 0.052246637642383575 +Loss at step 400: 0.05250212177634239 +Loss at step 450: 0.046561263501644135 +Loss at step 500: 0.029796786606311798 +Loss at step 550: 0.03729530796408653 +Loss at step 600: 0.035116907209157944 +Loss at step 650: 0.038523416966199875 +Loss at step 700: 0.055753014981746674 +Loss at step 750: 0.030627282336354256 +Loss at step 800: 0.036290332674980164 +Loss at step 850: 0.03514028340578079 +Loss at step 900: 0.0453634150326252 +Mean training loss after epoch 121: 0.040457542389948996 + +EPOCH: 122 +Loss at step 0: 0.031591176986694336 +Loss at step 50: 0.03447073698043823 +Loss at step 100: 0.041707564145326614 +Loss at step 150: 0.06623993813991547 +Loss at step 200: 0.03063139319419861 +Loss at step 250: 0.033697038888931274 +Loss at step 300: 0.05644533410668373 +Loss at step 350: 0.06907740980386734 +Loss at step 400: 0.036720581352710724 +Loss at step 450: 0.0389428436756134 +Loss at step 500: 0.02795400097966194 +Loss at step 550: 0.03139829635620117 +Loss at step 600: 0.03670325130224228 +Loss at step 650: 0.04355520382523537 +Loss at step 700: 0.03025864250957966 +Loss at step 750: 0.0512089803814888 +Loss at step 800: 0.0327790305018425 +Loss at step 850: 0.03410333767533302 +Loss at step 900: 0.034768156707286835 +Mean training loss after epoch 122: 0.04006110403790022 + +EPOCH: 123 +Loss at step 0: 0.037590596824884415 +Loss at step 50: 0.05157771334052086 +Loss at step 100: 0.05391733720898628 +Loss at step 150: 0.05016305297613144 +Loss at step 200: 0.04134446755051613 +Loss at step 250: 0.037453681230545044 +Loss at step 300: 0.05308147147297859 +Loss at step 350: 0.028448354452848434 +Loss at step 400: 0.03311985731124878 +Loss at step 450: 0.047355495393276215 +Loss at step 500: 0.03463435545563698 +Loss at step 550: 0.034671586006879807 +Loss at step 600: 0.03247098624706268 +Loss at step 650: 0.048032741993665695 +Loss at step 700: 0.03710617870092392 +Loss at step 750: 0.031204132363200188 +Loss at step 800: 0.038457345217466354 +Loss at step 850: 0.043605733662843704 +Loss at step 900: 0.03489955887198448 +Mean training loss after epoch 123: 0.04032010728044551 + +EPOCH: 124 +Loss at step 0: 0.033983953297138214 +Loss at step 50: 0.04215710237622261 +Loss at step 100: 0.033741917461156845 +Loss at step 150: 0.037536218762397766 +Loss at step 200: 0.035390716046094894 +Loss at step 250: 0.033167146146297455 +Loss at step 300: 0.03190934285521507 +Loss at step 350: 0.03862353414297104 +Loss at step 400: 0.03355590999126434 +Loss at step 450: 0.03378209099173546 +Loss at step 500: 0.03174929693341255 +Loss at step 550: 0.031022535637021065 +Loss at step 600: 0.04976525530219078 +Loss at step 650: 0.04322446137666702 +Loss at step 700: 0.03885587304830551 +Loss at step 750: 0.037186697125434875 +Loss at step 800: 0.04601681977510452 +Loss at step 850: 0.03291945904493332 +Loss at step 900: 0.042094048112630844 +Mean training loss after epoch 124: 0.040002900197593644 + +EPOCH: 125 +Loss at step 0: 0.0500323623418808 +Loss at step 50: 0.05065305903553963 +Loss at step 100: 0.033764030784368515 +Loss at step 150: 0.050167616456747055 +Loss at step 200: 0.030769914388656616 +Loss at step 250: 0.04813048616051674 +Loss at step 300: 0.03515201061964035 +Loss at step 350: 0.03375561162829399 +Loss at step 400: 0.050982020795345306 +Loss at step 450: 0.035507962107658386 +Loss at step 500: 0.03745051100850105 +Loss at step 550: 0.03788281977176666 +Loss at step 600: 0.03262507915496826 +Loss at step 650: 0.032084278762340546 +Loss at step 700: 0.040814898908138275 +Loss at step 750: 0.03551555424928665 +Loss at step 800: 0.03950602188706398 +Loss at step 850: 0.03731091693043709 +Loss at step 900: 0.034966662526130676 +Mean training loss after epoch 125: 0.040024525072894244 + +EPOCH: 126 +Loss at step 0: 0.034171294420957565 +Loss at step 50: 0.033561307936906815 +Loss at step 100: 0.0351739265024662 +Loss at step 150: 0.0548534132540226 +Loss at step 200: 0.03751838952302933 +Loss at step 250: 0.045688968151807785 +Loss at step 300: 0.04876801744103432 +Loss at step 350: 0.052135396748781204 +Loss at step 400: 0.034007418900728226 +Loss at step 450: 0.03813653811812401 +Loss at step 500: 0.06047852709889412 +Loss at step 550: 0.06819915026426315 +Loss at step 600: 0.03307413309812546 +Loss at step 650: 0.035687174648046494 +Loss at step 700: 0.03782489895820618 +Loss at step 750: 0.03208120912313461 +Loss at step 800: 0.062991663813591 +Loss at step 850: 0.04625096544623375 +Loss at step 900: 0.0389222614467144 +Mean training loss after epoch 126: 0.03970333728303851 + +EPOCH: 127 +Loss at step 0: 0.03681530803442001 +Loss at step 50: 0.04381631314754486 +Loss at step 100: 0.05013523995876312 +Loss at step 150: 0.06218918785452843 +Loss at step 200: 0.03095322847366333 +Loss at step 250: 0.03979748860001564 +Loss at step 300: 0.06336919218301773 +Loss at step 350: 0.03274674341082573 +Loss at step 400: 0.04084060713648796 +Loss at step 450: 0.03177779167890549 +Loss at step 500: 0.03313849866390228 +Loss at step 550: 0.038118936121463776 +Loss at step 600: 0.03555034101009369 +Loss at step 650: 0.0577574260532856 +Loss at step 700: 0.03207573667168617 +Loss at step 750: 0.020524565130472183 +Loss at step 800: 0.03095831535756588 +Loss at step 850: 0.026542063802480698 +Loss at step 900: 0.051946502178907394 +Mean training loss after epoch 127: 0.039454418913657856 + +EPOCH: 128 +Loss at step 0: 0.03673482686281204 +Loss at step 50: 0.03247862681746483 +Loss at step 100: 0.031764429062604904 +Loss at step 150: 0.02867070399224758 +Loss at step 200: 0.039199747145175934 +Loss at step 250: 0.05398840084671974 +Loss at step 300: 0.06494774669408798 +Loss at step 350: 0.05154864490032196 +Loss at step 400: 0.0476166307926178 +Loss at step 450: 0.06569583714008331 +Loss at step 500: 0.03495299443602562 +Loss at step 550: 0.03356633707880974 +Loss at step 600: 0.046172380447387695 +Loss at step 650: 0.047938257455825806 +Loss at step 700: 0.036930497735738754 +Loss at step 750: 0.038262851536273956 +Loss at step 800: 0.03517230600118637 +Loss at step 850: 0.034791771322488785 +Loss at step 900: 0.04119005799293518 +Mean training loss after epoch 128: 0.04036835033788101 + +EPOCH: 129 +Loss at step 0: 0.03706024959683418 +Loss at step 50: 0.03425043448805809 +Loss at step 100: 0.02833772636950016 +Loss at step 150: 0.03949480876326561 +Loss at step 200: 0.03533528372645378 +Loss at step 250: 0.03487789258360863 +Loss at step 300: 0.048765428364276886 +Loss at step 350: 0.044877734035253525 +Loss at step 400: 0.037002693861722946 +Loss at step 450: 0.04018561169505119 +Loss at step 500: 0.03295544534921646 +Loss at step 550: 0.049722786992788315 +Loss at step 600: 0.055026598274707794 +Loss at step 650: 0.03380793333053589 +Loss at step 700: 0.05094536021351814 +Loss at step 750: 0.03653072938323021 +Loss at step 800: 0.03610605001449585 +Loss at step 850: 0.040753696113824844 +Loss at step 900: 0.03856388479471207 +Mean training loss after epoch 129: 0.039871999711942066 + +EPOCH: 130 +Loss at step 0: 0.048286810517311096 +Loss at step 50: 0.047162044793367386 +Loss at step 100: 0.038353268057107925 +Loss at step 150: 0.0325942263007164 +Loss at step 200: 0.03514799103140831 +Loss at step 250: 0.03141734004020691 +Loss at step 300: 0.04474622756242752 +Loss at step 350: 0.03544167801737785 +Loss at step 400: 0.038670141249895096 +Loss at step 450: 0.046402618288993835 +Loss at step 500: 0.03563407063484192 +Loss at step 550: 0.04327638819813728 +Loss at step 600: 0.040320947766304016 +Loss at step 650: 0.040106456726789474 +Loss at step 700: 0.03996186703443527 +Loss at step 750: 0.037617284804582596 +Loss at step 800: 0.039552800357341766 +Loss at step 850: 0.06186339259147644 +Loss at step 900: 0.03492647036910057 +Mean training loss after epoch 130: 0.03991808345926596 + +EPOCH: 131 +Loss at step 0: 0.036616213619709015 +Loss at step 50: 0.04928402975201607 +Loss at step 100: 0.04600382223725319 +Loss at step 150: 0.05377383902668953 +Loss at step 200: 0.04120911657810211 +Loss at step 250: 0.0662044882774353 +Loss at step 300: 0.04024979844689369 +Loss at step 350: 0.030393140390515327 +Loss at step 400: 0.042372897267341614 +Loss at step 450: 0.037753261625766754 +Loss at step 500: 0.031086349859833717 +Loss at step 550: 0.03298509865999222 +Loss at step 600: 0.03297559171915054 +Loss at step 650: 0.030049888417124748 +Loss at step 700: 0.035726603120565414 +Loss at step 750: 0.04066278785467148 +Loss at step 800: 0.03545417636632919 +Loss at step 850: 0.03513539209961891 +Loss at step 900: 0.0556241013109684 +Mean training loss after epoch 131: 0.0402852121804124 + +EPOCH: 132 +Loss at step 0: 0.039677299559116364 +Loss at step 50: 0.04367973282933235 +Loss at step 100: 0.05246155336499214 +Loss at step 150: 0.034604262560606 +Loss at step 200: 0.049703508615493774 +Loss at step 250: 0.03727739304304123 +Loss at step 300: 0.049680761992931366 +Loss at step 350: 0.0293569453060627 +Loss at step 400: 0.0443883016705513 +Loss at step 450: 0.0507032610476017 +Loss at step 500: 0.03775497153401375 +Loss at step 550: 0.0356106273829937 +Loss at step 600: 0.06872064620256424 +Loss at step 650: 0.03747250884771347 +Loss at step 700: 0.03364582359790802 +Loss at step 750: 0.05006100982427597 +Loss at step 800: 0.049780216068029404 +Loss at step 850: 0.035216640681028366 +Loss at step 900: 0.03414424508810043 +Mean training loss after epoch 132: 0.03970895380750775 + +EPOCH: 133 +Loss at step 0: 0.036541033536195755 +Loss at step 50: 0.03570520505309105 +Loss at step 100: 0.03586961328983307 +Loss at step 150: 0.033113136887550354 +Loss at step 200: 0.031563837081193924 +Loss at step 250: 0.03401624783873558 +Loss at step 300: 0.0378023199737072 +Loss at step 350: 0.03832368552684784 +Loss at step 400: 0.02344367839396 +Loss at step 450: 0.04743019491434097 +Loss at step 500: 0.038056880235672 +Loss at step 550: 0.04119366407394409 +Loss at step 600: 0.03679410368204117 +Loss at step 650: 0.04440126568078995 +Loss at step 700: 0.0363314226269722 +Loss at step 750: 0.03746131435036659 +Loss at step 800: 0.05385933071374893 +Loss at step 850: 0.031379908323287964 +Loss at step 900: 0.042474813759326935 +Mean training loss after epoch 133: 0.039679838541045245 + +EPOCH: 134 +Loss at step 0: 0.027622492983937263 +Loss at step 50: 0.04098658636212349 +Loss at step 100: 0.03279804810881615 +Loss at step 150: 0.04508473724126816 +Loss at step 200: 0.0342780165374279 +Loss at step 250: 0.030243201181292534 +Loss at step 300: 0.028790751472115517 +Loss at step 350: 0.03409457579255104 +Loss at step 400: 0.03327581286430359 +Loss at step 450: 0.037851233035326004 +Loss at step 500: 0.031168678775429726 +Loss at step 550: 0.0319376140832901 +Loss at step 600: 0.04381627216935158 +Loss at step 650: 0.04569874703884125 +Loss at step 700: 0.033138565719127655 +Loss at step 750: 0.03507937490940094 +Loss at step 800: 0.04184393957257271 +Loss at step 850: 0.03176693618297577 +Loss at step 900: 0.030614210292696953 +Mean training loss after epoch 134: 0.03981186858594799 + +EPOCH: 135 +Loss at step 0: 0.03553043678402901 +Loss at step 50: 0.03979399800300598 +Loss at step 100: 0.043166384100914 +Loss at step 150: 0.04016077518463135 +Loss at step 200: 0.04015089198946953 +Loss at step 250: 0.038076434284448624 +Loss at step 300: 0.032090965658426285 +Loss at step 350: 0.05516954883933067 +Loss at step 400: 0.0338265635073185 +Loss at step 450: 0.05055184289813042 +Loss at step 500: 0.0306999534368515 +Loss at step 550: 0.046033430844545364 +Loss at step 600: 0.03614010661840439 +Loss at step 650: 0.03513707220554352 +Loss at step 700: 0.055435363203287125 +Loss at step 750: 0.033791683614254 +Loss at step 800: 0.040268998593091965 +Loss at step 850: 0.046138305217027664 +Loss at step 900: 0.036269623786211014 +Mean training loss after epoch 135: 0.040519412690356596 + +EPOCH: 136 +Loss at step 0: 0.031145719811320305 +Loss at step 50: 0.034706104546785355 +Loss at step 100: 0.04843144491314888 +Loss at step 150: 0.06583622097969055 +Loss at step 200: 0.05224967747926712 +Loss at step 250: 0.04415404796600342 +Loss at step 300: 0.03567051142454147 +Loss at step 350: 0.027567612007260323 +Loss at step 400: 0.05148737505078316 +Loss at step 450: 0.04981483146548271 +Loss at step 500: 0.04140967130661011 +Loss at step 550: 0.028734488412737846 +Loss at step 600: 0.03323344141244888 +Loss at step 650: 0.05476273223757744 +Loss at step 700: 0.04226115345954895 +Loss at step 750: 0.03669494390487671 +Loss at step 800: 0.03222007676959038 +Loss at step 850: 0.025839785113930702 +Loss at step 900: 0.03102259710431099 +Mean training loss after epoch 136: 0.040111369352096686 + +EPOCH: 137 +Loss at step 0: 0.035307832062244415 +Loss at step 50: 0.035859256982803345 +Loss at step 100: 0.04564115032553673 +Loss at step 150: 0.08037211000919342 +Loss at step 200: 0.044677089899778366 +Loss at step 250: 0.04060159623622894 +Loss at step 300: 0.05918145924806595 +Loss at step 350: 0.033060211688280106 +Loss at step 400: 0.038067109882831573 +Loss at step 450: 0.03555935248732567 +Loss at step 500: 0.04896773397922516 +Loss at step 550: 0.03716621175408363 +Loss at step 600: 0.03253568708896637 +Loss at step 650: 0.0386538952589035 +Loss at step 700: 0.03709159418940544 +Loss at step 750: 0.047217704355716705 +Loss at step 800: 0.03762747347354889 +Loss at step 850: 0.033402878791093826 +Loss at step 900: 0.03984730318188667 +Mean training loss after epoch 137: 0.0398748926123354 + +EPOCH: 138 +Loss at step 0: 0.0351131334900856 +Loss at step 50: 0.05153409764170647 +Loss at step 100: 0.05654660984873772 +Loss at step 150: 0.03171384707093239 +Loss at step 200: 0.044744957238435745 +Loss at step 250: 0.04206939414143562 +Loss at step 300: 0.05348724126815796 +Loss at step 350: 0.05097969248890877 +Loss at step 400: 0.0321083664894104 +Loss at step 450: 0.04224591329693794 +Loss at step 500: 0.03608119115233421 +Loss at step 550: 0.054212745279073715 +Loss at step 600: 0.03262381628155708 +Loss at step 650: 0.048176102340221405 +Loss at step 700: 0.03508109226822853 +Loss at step 750: 0.03597771376371384 +Loss at step 800: 0.04042951017618179 +Loss at step 850: 0.03498770296573639 +Loss at step 900: 0.03275318816304207 +Mean training loss after epoch 138: 0.04044409448316674 + +EPOCH: 139 +Loss at step 0: 0.05238460749387741 +Loss at step 50: 0.029966481029987335 +Loss at step 100: 0.031206198036670685 +Loss at step 150: 0.03607112914323807 +Loss at step 200: 0.0320424921810627 +Loss at step 250: 0.03686009347438812 +Loss at step 300: 0.04129014536738396 +Loss at step 350: 0.05626775696873665 +Loss at step 400: 0.03750087693333626 +Loss at step 450: 0.03866837918758392 +Loss at step 500: 0.04019865393638611 +Loss at step 550: 0.030603976920247078 +Loss at step 600: 0.035492926836013794 +Loss at step 650: 0.031438861042261124 +Loss at step 700: 0.044327959418296814 +Loss at step 750: 0.029902664944529533 +Loss at step 800: 0.03317349776625633 +Loss at step 850: 0.040507715195417404 +Loss at step 900: 0.03306630626320839 +Mean training loss after epoch 139: 0.03977981074524523 + +EPOCH: 140 +Loss at step 0: 0.06723352521657944 +Loss at step 50: 0.036315590143203735 +Loss at step 100: 0.048940982669591904 +Loss at step 150: 0.030496828258037567 +Loss at step 200: 0.03590953350067139 +Loss at step 250: 0.04100378230214119 +Loss at step 300: 0.035402316600084305 +Loss at step 350: 0.05464482679963112 +Loss at step 400: 0.05612416937947273 +Loss at step 450: 0.04811203107237816 +Loss at step 500: 0.03293948248028755 +Loss at step 550: 0.029463013634085655 +Loss at step 600: 0.0330667719244957 +Loss at step 650: 0.07245888561010361 +Loss at step 700: 0.03625422343611717 +Loss at step 750: 0.029948584735393524 +Loss at step 800: 0.03603658825159073 +Loss at step 850: 0.038006313145160675 +Loss at step 900: 0.04900289326906204 +Mean training loss after epoch 140: 0.03966290574433453 + +EPOCH: 141 +Loss at step 0: 0.05389322340488434 +Loss at step 50: 0.03978116065263748 +Loss at step 100: 0.028913181275129318 +Loss at step 150: 0.03143258020281792 +Loss at step 200: 0.05115267261862755 +Loss at step 250: 0.04871513321995735 +Loss at step 300: 0.04140704870223999 +Loss at step 350: 0.029615212231874466 +Loss at step 400: 0.04913806915283203 +Loss at step 450: 0.05245078727602959 +Loss at step 500: 0.03655010834336281 +Loss at step 550: 0.03750515356659889 +Loss at step 600: 0.03648988902568817 +Loss at step 650: 0.04609394073486328 +Loss at step 700: 0.04012591764330864 +Loss at step 750: 0.031112534925341606 +Loss at step 800: 0.04239273443818092 +Loss at step 850: 0.0286885853856802 +Loss at step 900: 0.04067554697394371 +Mean training loss after epoch 141: 0.04047492779950216 + +EPOCH: 142 +Loss at step 0: 0.0341968834400177 +Loss at step 50: 0.027987387031316757 +Loss at step 100: 0.04370037466287613 +Loss at step 150: 0.03250158205628395 +Loss at step 200: 0.03320758789777756 +Loss at step 250: 0.04050271958112717 +Loss at step 300: 0.05204188823699951 +Loss at step 350: 0.03396312892436981 +Loss at step 400: 0.03425634652376175 +Loss at step 450: 0.050483450293540955 +Loss at step 500: 0.06107752397656441 +Loss at step 550: 0.03602050989866257 +Loss at step 600: 0.030771542340517044 +Loss at step 650: 0.03399882838129997 +Loss at step 700: 0.04370621219277382 +Loss at step 750: 0.06062234938144684 +Loss at step 800: 0.030899103730916977 +Loss at step 850: 0.035059645771980286 +Loss at step 900: 0.05461152642965317 +Mean training loss after epoch 142: 0.03912595202769044 + +EPOCH: 143 +Loss at step 0: 0.03069254197180271 +Loss at step 50: 0.028873268514871597 +Loss at step 100: 0.028288697823882103 +Loss at step 150: 0.044573914259672165 +Loss at step 200: 0.035409681499004364 +Loss at step 250: 0.045168306678533554 +Loss at step 300: 0.03995560109615326 +Loss at step 350: 0.0302441269159317 +Loss at step 400: 0.03496329486370087 +Loss at step 450: 0.03343826159834862 +Loss at step 500: 0.034524980932474136 +Loss at step 550: 0.03748397156596184 +Loss at step 600: 0.07241212576627731 +Loss at step 650: 0.040051836520433426 +Loss at step 700: 0.06440601497888565 +Loss at step 750: 0.032848332077264786 +Loss at step 800: 0.043311264365911484 +Loss at step 850: 0.031081559136509895 +Loss at step 900: 0.038504716008901596 +Mean training loss after epoch 143: 0.039704615202571535 + +EPOCH: 144 +Loss at step 0: 0.034515753388404846 +Loss at step 50: 0.038643963634967804 +Loss at step 100: 0.04919477179646492 +Loss at step 150: 0.06632418185472488 +Loss at step 200: 0.03776159882545471 +Loss at step 250: 0.03524239733815193 +Loss at step 300: 0.03229808807373047 +Loss at step 350: 0.03772842139005661 +Loss at step 400: 0.07176003605127335 +Loss at step 450: 0.0438353456556797 +Loss at step 500: 0.03729798272252083 +Loss at step 550: 0.034596312791109085 +Loss at step 600: 0.05316857621073723 +Loss at step 650: 0.04838914796710014 +Loss at step 700: 0.03394513204693794 +Loss at step 750: 0.05002584308385849 +Loss at step 800: 0.04865674301981926 +Loss at step 850: 0.030374780297279358 +Loss at step 900: 0.03489556908607483 +Mean training loss after epoch 144: 0.040014465794221424 + +EPOCH: 145 +Loss at step 0: 0.05304267257452011 +Loss at step 50: 0.03572936728596687 +Loss at step 100: 0.03844626620411873 +Loss at step 150: 0.051656000316143036 +Loss at step 200: 0.0419786162674427 +Loss at step 250: 0.03228844702243805 +Loss at step 300: 0.03717874363064766 +Loss at step 350: 0.03276830166578293 +Loss at step 400: 0.05304082855582237 +Loss at step 450: 0.0370134636759758 +Loss at step 500: 0.03658261150121689 +Loss at step 550: 0.05247374251484871 +Loss at step 600: 0.05243967846035957 +Loss at step 650: 0.035890765488147736 +Loss at step 700: 0.029134448617696762 +Loss at step 750: 0.04950794205069542 +Loss at step 800: 0.031324274837970734 +Loss at step 850: 0.04002757743000984 +Loss at step 900: 0.05876762419939041 +Mean training loss after epoch 145: 0.03966649990282588 + +EPOCH: 146 +Loss at step 0: 0.03870001062750816 +Loss at step 50: 0.0340142585337162 +Loss at step 100: 0.032529689371585846 +Loss at step 150: 0.04962790012359619 +Loss at step 200: 0.06994400918483734 +Loss at step 250: 0.04276866093277931 +Loss at step 300: 0.054688457399606705 +Loss at step 350: 0.051464974880218506 +Loss at step 400: 0.0338805727660656 +Loss at step 450: 0.04141691327095032 +Loss at step 500: 0.04455658793449402 +Loss at step 550: 0.04114391282200813 +Loss at step 600: 0.03700781613588333 +Loss at step 650: 0.03334641084074974 +Loss at step 700: 0.037004128098487854 +Loss at step 750: 0.045164212584495544 +Loss at step 800: 0.04131518676877022 +Loss at step 850: 0.04450450465083122 +Loss at step 900: 0.05313121899962425 +Mean training loss after epoch 146: 0.040262286505624176 + +EPOCH: 147 +Loss at step 0: 0.03320532664656639 +Loss at step 50: 0.0375794917345047 +Loss at step 100: 0.03766798973083496 +Loss at step 150: 0.051330532878637314 +Loss at step 200: 0.03195059671998024 +Loss at step 250: 0.04082850366830826 +Loss at step 300: 0.0481402613222599 +Loss at step 350: 0.030774451792240143 +Loss at step 400: 0.03871845453977585 +Loss at step 450: 0.05088238790631294 +Loss at step 500: 0.03520108014345169 +Loss at step 550: 0.04160549119114876 +Loss at step 600: 0.038120336830616 +Loss at step 650: 0.040947865694761276 +Loss at step 700: 0.06268515437841415 +Loss at step 750: 0.02965262345969677 +Loss at step 800: 0.0288058090955019 +Loss at step 850: 0.03575523570179939 +Loss at step 900: 0.041679564863443375 +Mean training loss after epoch 147: 0.0400226015303689 + +EPOCH: 148 +Loss at step 0: 0.03675468638539314 +Loss at step 50: 0.03356386721134186 +Loss at step 100: 0.05130539834499359 +Loss at step 150: 0.058995168656110764 +Loss at step 200: 0.06680206954479218 +Loss at step 250: 0.029005855321884155 +Loss at step 300: 0.037632159888744354 +Loss at step 350: 0.032792042940855026 +Loss at step 400: 0.03614620864391327 +Loss at step 450: 0.03251120075583458 +Loss at step 500: 0.04506837949156761 +Loss at step 550: 0.034124311059713364 +Loss at step 600: 0.0380428172647953 +Loss at step 650: 0.04019821807742119 +Loss at step 700: 0.033369410783052444 +Loss at step 750: 0.05180910602211952 +Loss at step 800: 0.03275255858898163 +Loss at step 850: 0.035289227962493896 +Loss at step 900: 0.03412739187479019 +Mean training loss after epoch 148: 0.03986330867123439 + +EPOCH: 149 +Loss at step 0: 0.03967485949397087 +Loss at step 50: 0.0405656173825264 +Loss at step 100: 0.049536626785993576 +Loss at step 150: 0.0467953085899353 +Loss at step 200: 0.038718242198228836 +Loss at step 250: 0.03391097113490105 +Loss at step 300: 0.027988167479634285 +Loss at step 350: 0.030590718612074852 +Loss at step 400: 0.037134964019060135 +Loss at step 450: 0.04939684644341469 +Loss at step 500: 0.028641855344176292 +Loss at step 550: 0.049792949110269547 +Loss at step 600: 0.06914780288934708 +Loss at step 650: 0.028473980724811554 +Loss at step 700: 0.04541952535510063 +Loss at step 750: 0.056351106613874435 +Loss at step 800: 0.03810013830661774 +Loss at step 850: 0.031036950647830963 +Loss at step 900: 0.02989376336336136 +Mean training loss after epoch 149: 0.03950454026405046 + +EPOCH: 150 +Loss at step 0: 0.0497492291033268 +Loss at step 50: 0.04664904624223709 +Loss at step 100: 0.030672777444124222 +Loss at step 150: 0.037179891020059586 +Loss at step 200: 0.037141673266887665 +Loss at step 250: 0.033430758863687515 +Loss at step 300: 0.030099069699645042 +Loss at step 350: 0.04658585414290428 +Loss at step 400: 0.03212271258234978 +Loss at step 450: 0.037536367774009705 +Loss at step 500: 0.03360248729586601 +Loss at step 550: 0.029031341895461082 +Loss at step 600: 0.04348360747098923 +Loss at step 650: 0.032128266990184784 +Loss at step 700: 0.04996559023857117 +Loss at step 750: 0.03784072399139404 +Loss at step 800: 0.034901782870292664 +Loss at step 850: 0.036399587988853455 +Loss at step 900: 0.032460201531648636 +Mean training loss after epoch 150: 0.03932684691531508 + +EPOCH: 151 +Loss at step 0: 0.038559745997190475 +Loss at step 50: 0.034698486328125 +Loss at step 100: 0.034396491944789886 +Loss at step 150: 0.03460189327597618 +Loss at step 200: 0.03432363644242287 +Loss at step 250: 0.044413454830646515 +Loss at step 300: 0.0633033886551857 +Loss at step 350: 0.028063831850886345 +Loss at step 400: 0.03233586624264717 +Loss at step 450: 0.050064317882061005 +Loss at step 500: 0.04057862237095833 +Loss at step 550: 0.03515806421637535 +Loss at step 600: 0.03948560357093811 +Loss at step 650: 0.03225551173090935 +Loss at step 700: 0.0390482172369957 +Loss at step 750: 0.03579146787524223 +Loss at step 800: 0.03390445560216904 +Loss at step 850: 0.05053498223423958 +Loss at step 900: 0.04960924759507179 +Mean training loss after epoch 151: 0.0394894066931946 + +EPOCH: 152 +Loss at step 0: 0.04069296270608902 +Loss at step 50: 0.0362466461956501 +Loss at step 100: 0.03903277963399887 +Loss at step 150: 0.03854478523135185 +Loss at step 200: 0.030529094859957695 +Loss at step 250: 0.03584692254662514 +Loss at step 300: 0.03763008117675781 +Loss at step 350: 0.03907984122633934 +Loss at step 400: 0.032714176923036575 +Loss at step 450: 0.04580497741699219 +Loss at step 500: 0.030848801136016846 +Loss at step 550: 0.03909658268094063 +Loss at step 600: 0.0393528938293457 +Loss at step 650: 0.034250885248184204 +Loss at step 700: 0.039407070726156235 +Loss at step 750: 0.06522706151008606 +Loss at step 800: 0.02698972448706627 +Loss at step 850: 0.033097878098487854 +Loss at step 900: 0.039493318647146225 +Mean training loss after epoch 152: 0.039979385349081395 + +EPOCH: 153 +Loss at step 0: 0.03830374777317047 +Loss at step 50: 0.03738465532660484 +Loss at step 100: 0.03408047929406166 +Loss at step 150: 0.08162634074687958 +Loss at step 200: 0.047909438610076904 +Loss at step 250: 0.04590803012251854 +Loss at step 300: 0.03924799710512161 +Loss at step 350: 0.049378473311662674 +Loss at step 400: 0.03161664679646492 +Loss at step 450: 0.060903649777173996 +Loss at step 500: 0.0318431593477726 +Loss at step 550: 0.030885569751262665 +Loss at step 600: 0.04783306270837784 +Loss at step 650: 0.03933698311448097 +Loss at step 700: 0.03513640910387039 +Loss at step 750: 0.03802374005317688 +Loss at step 800: 0.03579352796077728 +Loss at step 850: 0.039611611515283585 +Loss at step 900: 0.032021258026361465 +Mean training loss after epoch 153: 0.03981151960806043 + +EPOCH: 154 +Loss at step 0: 0.03201507031917572 +Loss at step 50: 0.03376217558979988 +Loss at step 100: 0.030808014795184135 +Loss at step 150: 0.05287822335958481 +Loss at step 200: 0.035732246935367584 +Loss at step 250: 0.03311564028263092 +Loss at step 300: 0.033874545246362686 +Loss at step 350: 0.0348997600376606 +Loss at step 400: 0.03685653209686279 +Loss at step 450: 0.034120168536901474 +Loss at step 500: 0.027468038722872734 +Loss at step 550: 0.03726698458194733 +Loss at step 600: 0.03796930983662605 +Loss at step 650: 0.052408505231142044 +Loss at step 700: 0.03583231568336487 +Loss at step 750: 0.038083259016275406 +Loss at step 800: 0.03880752623081207 +Loss at step 850: 0.03209945186972618 +Loss at step 900: 0.04039257392287254 +Mean training loss after epoch 154: 0.03945487745201537 + +EPOCH: 155 +Loss at step 0: 0.03866823762655258 +Loss at step 50: 0.031757935881614685 +Loss at step 100: 0.03913172706961632 +Loss at step 150: 0.04874129965901375 +Loss at step 200: 0.05382993072271347 +Loss at step 250: 0.041536904871463776 +Loss at step 300: 0.030705511569976807 +Loss at step 350: 0.03425120934844017 +Loss at step 400: 0.03446534648537636 +Loss at step 450: 0.04940277710556984 +Loss at step 500: 0.03989547863602638 +Loss at step 550: 0.03524135425686836 +Loss at step 600: 0.045029882341623306 +Loss at step 650: 0.0432940311729908 +Loss at step 700: 0.039333123713731766 +Loss at step 750: 0.04918549954891205 +Loss at step 800: 0.052956581115722656 +Loss at step 850: 0.03641032800078392 +Loss at step 900: 0.03382905200123787 +Mean training loss after epoch 155: 0.039653815565380585 + +EPOCH: 156 +Loss at step 0: 0.052970923483371735 +Loss at step 50: 0.05297328904271126 +Loss at step 100: 0.04523034766316414 +Loss at step 150: 0.052893735468387604 +Loss at step 200: 0.06359665840864182 +Loss at step 250: 0.03374951332807541 +Loss at step 300: 0.033798668533563614 +Loss at step 350: 0.03770531713962555 +Loss at step 400: 0.06374736875295639 +Loss at step 450: 0.04122309386730194 +Loss at step 500: 0.03582649677991867 +Loss at step 550: 0.03951054438948631 +Loss at step 600: 0.03408455848693848 +Loss at step 650: 0.03591909632086754 +Loss at step 700: 0.028890229761600494 +Loss at step 750: 0.050827328115701675 +Loss at step 800: 0.03168226033449173 +Loss at step 850: 0.06453513354063034 +Loss at step 900: 0.05081882327795029 +Mean training loss after epoch 156: 0.03980678563981232 + +EPOCH: 157 +Loss at step 0: 0.03517033904790878 +Loss at step 50: 0.03029661625623703 +Loss at step 100: 0.04061397910118103 +Loss at step 150: 0.04112451896071434 +Loss at step 200: 0.05511137843132019 +Loss at step 250: 0.034754179418087006 +Loss at step 300: 0.03483182564377785 +Loss at step 350: 0.02881348878145218 +Loss at step 400: 0.03589953854680061 +Loss at step 450: 0.040302760899066925 +Loss at step 500: 0.02812015265226364 +Loss at step 550: 0.03429786115884781 +Loss at step 600: 0.042274147272109985 +Loss at step 650: 0.03722405433654785 +Loss at step 700: 0.06251759082078934 +Loss at step 750: 0.03280168026685715 +Loss at step 800: 0.033976029604673386 +Loss at step 850: 0.03572479635477066 +Loss at step 900: 0.03401172161102295 +Mean training loss after epoch 157: 0.03961787642494066 + +EPOCH: 158 +Loss at step 0: 0.038926295936107635 +Loss at step 50: 0.06311167031526566 +Loss at step 100: 0.039787448942661285 +Loss at step 150: 0.03861629217863083 +Loss at step 200: 0.04008471593260765 +Loss at step 250: 0.03857661783695221 +Loss at step 300: 0.03470795974135399 +Loss at step 350: 0.05390108749270439 +Loss at step 400: 0.047356195747852325 +Loss at step 450: 0.04332069307565689 +Loss at step 500: 0.03188915550708771 +Loss at step 550: 0.06753725558519363 +Loss at step 600: 0.05447079613804817 +Loss at step 650: 0.039038512855768204 +Loss at step 700: 0.034410372376441956 +Loss at step 750: 0.0353931188583374 +Loss at step 800: 0.040720392018556595 +Loss at step 850: 0.030527109280228615 +Loss at step 900: 0.03377766162157059 +Mean training loss after epoch 158: 0.03973492824121007 + +EPOCH: 159 +Loss at step 0: 0.04395056143403053 +Loss at step 50: 0.03635787218809128 +Loss at step 100: 0.052479084581136703 +Loss at step 150: 0.0331464521586895 +Loss at step 200: 0.03089253231883049 +Loss at step 250: 0.038187939673662186 +Loss at step 300: 0.030138317495584488 +Loss at step 350: 0.03846345096826553 +Loss at step 400: 0.049408961087465286 +Loss at step 450: 0.05199102312326431 +Loss at step 500: 0.03939513862133026 +Loss at step 550: 0.025730779394507408 +Loss at step 600: 0.040308210998773575 +Loss at step 650: 0.04811836779117584 +Loss at step 700: 0.03713226318359375 +Loss at step 750: 0.03334973752498627 +Loss at step 800: 0.054803516715765 +Loss at step 850: 0.038321588188409805 +Loss at step 900: 0.04296765848994255 +Mean training loss after epoch 159: 0.039721751105048254 + +EPOCH: 160 +Loss at step 0: 0.042155567556619644 +Loss at step 50: 0.03570934012532234 +Loss at step 100: 0.03494516387581825 +Loss at step 150: 0.03171156346797943 +Loss at step 200: 0.03144378587603569 +Loss at step 250: 0.04876674711704254 +Loss at step 300: 0.04549702629446983 +Loss at step 350: 0.036487117409706116 +Loss at step 400: 0.04203188419342041 +Loss at step 450: 0.038777709007263184 +Loss at step 500: 0.05074477568268776 +Loss at step 550: 0.037049807608127594 +Loss at step 600: 0.03319176658987999 +Loss at step 650: 0.04365323483943939 +Loss at step 700: 0.0491318553686142 +Loss at step 750: 0.04497558996081352 +Loss at step 800: 0.050616584718227386 +Loss at step 850: 0.03367560729384422 +Loss at step 900: 0.031395260244607925 +Mean training loss after epoch 160: 0.03994718715071932 + +EPOCH: 161 +Loss at step 0: 0.03787631168961525 +Loss at step 50: 0.03632188215851784 +Loss at step 100: 0.031822752207517624 +Loss at step 150: 0.03723493218421936 +Loss at step 200: 0.03751960024237633 +Loss at step 250: 0.03722744435071945 +Loss at step 300: 0.03279047831892967 +Loss at step 350: 0.038861632347106934 +Loss at step 400: 0.03202925994992256 +Loss at step 450: 0.03544725105166435 +Loss at step 500: 0.030634867027401924 +Loss at step 550: 0.034504830837249756 +Loss at step 600: 0.03699138015508652 +Loss at step 650: 0.03438907489180565 +Loss at step 700: 0.032897673547267914 +Loss at step 750: 0.039568059146404266 +Loss at step 800: 0.02716151438653469 +Loss at step 850: 0.04132404923439026 +Loss at step 900: 0.02859344705939293 +Mean training loss after epoch 161: 0.03965161397838707 + +EPOCH: 162 +Loss at step 0: 0.044058606028556824 +Loss at step 50: 0.03533129766583443 +Loss at step 100: 0.03633712977170944 +Loss at step 150: 0.03775249794125557 +Loss at step 200: 0.0342252217233181 +Loss at step 250: 0.05188572406768799 +Loss at step 300: 0.0261610709130764 +Loss at step 350: 0.04466400295495987 +Loss at step 400: 0.030652252957224846 +Loss at step 450: 0.06206464022397995 +Loss at step 500: 0.041849203407764435 +Loss at step 550: 0.04211672022938728 +Loss at step 600: 0.04404137656092644 +Loss at step 650: 0.037888288497924805 +Loss at step 700: 0.03815712034702301 +Loss at step 750: 0.0420524999499321 +Loss at step 800: 0.0497308187186718 +Loss at step 850: 0.03643658757209778 +Loss at step 900: 0.045854780822992325 +Mean training loss after epoch 162: 0.03955368834065158 + +EPOCH: 163 +Loss at step 0: 0.037637751549482346 +Loss at step 50: 0.03754173591732979 +Loss at step 100: 0.030969874933362007 +Loss at step 150: 0.04777289181947708 +Loss at step 200: 0.034243032336235046 +Loss at step 250: 0.030788376927375793 +Loss at step 300: 0.032340116798877716 +Loss at step 350: 0.06834275275468826 +Loss at step 400: 0.03906245157122612 +Loss at step 450: 0.04159681126475334 +Loss at step 500: 0.04897071421146393 +Loss at step 550: 0.032994478940963745 +Loss at step 600: 0.05192587152123451 +Loss at step 650: 0.05311141908168793 +Loss at step 700: 0.05736926198005676 +Loss at step 750: 0.03613514453172684 +Loss at step 800: 0.05063817650079727 +Loss at step 850: 0.04703342542052269 +Loss at step 900: 0.03148123249411583 +Mean training loss after epoch 163: 0.03973168544153542 + +EPOCH: 164 +Loss at step 0: 0.03522387892007828 +Loss at step 50: 0.0354304239153862 +Loss at step 100: 0.034576959908008575 +Loss at step 150: 0.04489249363541603 +Loss at step 200: 0.04102586582303047 +Loss at step 250: 0.04082818701863289 +Loss at step 300: 0.048556774854660034 +Loss at step 350: 0.028994642198085785 +Loss at step 400: 0.03966813161969185 +Loss at step 450: 0.05221426114439964 +Loss at step 500: 0.04641151428222656 +Loss at step 550: 0.051005858927965164 +Loss at step 600: 0.04715345799922943 +Loss at step 650: 0.06296427547931671 +Loss at step 700: 0.036891594529151917 +Loss at step 750: 0.030493998900055885 +Loss at step 800: 0.037916600704193115 +Loss at step 850: 0.04149070009589195 +Loss at step 900: 0.03550049662590027 +Mean training loss after epoch 164: 0.0396854249514274 + +EPOCH: 165 +Loss at step 0: 0.042220719158649445 +Loss at step 50: 0.03374775871634483 +Loss at step 100: 0.037937846034765244 +Loss at step 150: 0.07947231829166412 +Loss at step 200: 0.061438754200935364 +Loss at step 250: 0.03606860339641571 +Loss at step 300: 0.041866056621074677 +Loss at step 350: 0.04028409719467163 +Loss at step 400: 0.03585618734359741 +Loss at step 450: 0.05177586153149605 +Loss at step 500: 0.03241375833749771 +Loss at step 550: 0.040101245045661926 +Loss at step 600: 0.03462972491979599 +Loss at step 650: 0.03558705002069473 +Loss at step 700: 0.05475455895066261 +Loss at step 750: 0.04315561428666115 +Loss at step 800: 0.034266598522663116 +Loss at step 850: 0.03024034947156906 +Loss at step 900: 0.050545837730169296 +Mean training loss after epoch 165: 0.03933941557217064 + +EPOCH: 166 +Loss at step 0: 0.03422791138291359 +Loss at step 50: 0.0342918299138546 +Loss at step 100: 0.040826451033353806 +Loss at step 150: 0.04773464798927307 +Loss at step 200: 0.051334526389837265 +Loss at step 250: 0.030052201822400093 +Loss at step 300: 0.034113090485334396 +Loss at step 350: 0.030852695927023888 +Loss at step 400: 0.03177307918667793 +Loss at step 450: 0.02808055840432644 +Loss at step 500: 0.04130341112613678 +Loss at step 550: 0.05406184867024422 +Loss at step 600: 0.03329135850071907 +Loss at step 650: 0.039528608322143555 +Loss at step 700: 0.027340373024344444 +Loss at step 750: 0.05258629098534584 +Loss at step 800: 0.03488999977707863 +Loss at step 850: 0.03175424784421921 +Loss at step 900: 0.029860569164156914 +Mean training loss after epoch 166: 0.0395889686686652 + +EPOCH: 167 +Loss at step 0: 0.038849908858537674 +Loss at step 50: 0.04868149012327194 +Loss at step 100: 0.03719170764088631 +Loss at step 150: 0.050611406564712524 +Loss at step 200: 0.0537850558757782 +Loss at step 250: 0.03979859873652458 +Loss at step 300: 0.049695856869220734 +Loss at step 350: 0.030528413131833076 +Loss at step 400: 0.026854444295167923 +Loss at step 450: 0.0315445214509964 +Loss at step 500: 0.03543714061379433 +Loss at step 550: 0.03773332014679909 +Loss at step 600: 0.03539112210273743 +Loss at step 650: 0.05348960682749748 +Loss at step 700: 0.0349346399307251 +Loss at step 750: 0.05097145959734917 +Loss at step 800: 0.03509731963276863 +Loss at step 850: 0.06194102019071579 +Loss at step 900: 0.03483329713344574 +Mean training loss after epoch 167: 0.03931984007914564 + +EPOCH: 168 +Loss at step 0: 0.0442483015358448 +Loss at step 50: 0.03344307094812393 +Loss at step 100: 0.04714402183890343 +Loss at step 150: 0.036182697862386703 +Loss at step 200: 0.03450503572821617 +Loss at step 250: 0.04949743673205376 +Loss at step 300: 0.04699171707034111 +Loss at step 350: 0.03356409817934036 +Loss at step 400: 0.04914213716983795 +Loss at step 450: 0.03713100776076317 +Loss at step 500: 0.034673936665058136 +Loss at step 550: 0.039094239473342896 +Loss at step 600: 0.039589714258909225 +Loss at step 650: 0.055458154529333115 +Loss at step 700: 0.025569973513484 +Loss at step 750: 0.035783909261226654 +Loss at step 800: 0.0652955025434494 +Loss at step 850: 0.03070709854364395 +Loss at step 900: 0.030941976234316826 +Mean training loss after epoch 168: 0.03940760542644557 + +EPOCH: 169 +Loss at step 0: 0.03163331374526024 +Loss at step 50: 0.03836061432957649 +Loss at step 100: 0.04068070277571678 +Loss at step 150: 0.030763309448957443 +Loss at step 200: 0.036739546805620193 +Loss at step 250: 0.029106061905622482 +Loss at step 300: 0.032068174332380295 +Loss at step 350: 0.032730091363191605 +Loss at step 400: 0.05216984823346138 +Loss at step 450: 0.04650463908910751 +Loss at step 500: 0.040319133549928665 +Loss at step 550: 0.030558280646800995 +Loss at step 600: 0.041635192930698395 +Loss at step 650: 0.03210076317191124 +Loss at step 700: 0.025039872154593468 +Loss at step 750: 0.04510484263300896 +Loss at step 800: 0.04591650143265724 +Loss at step 850: 0.03117387555539608 +Loss at step 900: 0.03997860476374626 +Mean training loss after epoch 169: 0.039096843947702124 + +EPOCH: 170 +Loss at step 0: 0.04807845503091812 +Loss at step 50: 0.03216809406876564 +Loss at step 100: 0.052466776221990585 +Loss at step 150: 0.05527348443865776 +Loss at step 200: 0.04938875138759613 +Loss at step 250: 0.029139377176761627 +Loss at step 300: 0.03292334824800491 +Loss at step 350: 0.031563788652420044 +Loss at step 400: 0.030839821323752403 +Loss at step 450: 0.04812578111886978 +Loss at step 500: 0.03261122480034828 +Loss at step 550: 0.03332837298512459 +Loss at step 600: 0.029706457629799843 +Loss at step 650: 0.05334772169589996 +Loss at step 700: 0.038234107196331024 +Loss at step 750: 0.03391353040933609 +Loss at step 800: 0.03238190710544586 +Loss at step 850: 0.045458365231752396 +Loss at step 900: 0.031078660860657692 +Mean training loss after epoch 170: 0.03927428957058995 + +EPOCH: 171 +Loss at step 0: 0.03604511916637421 +Loss at step 50: 0.02860851213335991 +Loss at step 100: 0.030669499188661575 +Loss at step 150: 0.03728640079498291 +Loss at step 200: 0.03493388369679451 +Loss at step 250: 0.03077596053481102 +Loss at step 300: 0.02700110711157322 +Loss at step 350: 0.04018703103065491 +Loss at step 400: 0.029676662757992744 +Loss at step 450: 0.031749628484249115 +Loss at step 500: 0.04076163098216057 +Loss at step 550: 0.0323580838739872 +Loss at step 600: 0.05565223842859268 +Loss at step 650: 0.036014098674058914 +Loss at step 700: 0.03889094665646553 +Loss at step 750: 0.04819978028535843 +Loss at step 800: 0.035391367971897125 +Loss at step 850: 0.04018506780266762 +Loss at step 900: 0.03758300840854645 +Mean training loss after epoch 171: 0.03903876623309561 + +EPOCH: 172 +Loss at step 0: 0.040134530514478683 +Loss at step 50: 0.030954569578170776 +Loss at step 100: 0.032502155750989914 +Loss at step 150: 0.047871530055999756 +Loss at step 200: 0.03520025312900543 +Loss at step 250: 0.033117249608039856 +Loss at step 300: 0.04297640919685364 +Loss at step 350: 0.03170880302786827 +Loss at step 400: 0.03357839956879616 +Loss at step 450: 0.03541633114218712 +Loss at step 500: 0.10076101869344711 +Loss at step 550: 0.04049905017018318 +Loss at step 600: 0.03794636204838753 +Loss at step 650: 0.05078333243727684 +Loss at step 700: 0.032922934740781784 +Loss at step 750: 0.03392742574214935 +Loss at step 800: 0.034097425639629364 +Loss at step 850: 0.0374067984521389 +Loss at step 900: 0.03601730614900589 +Mean training loss after epoch 172: 0.039571806728871645 + +EPOCH: 173 +Loss at step 0: 0.051600147038698196 +Loss at step 50: 0.03650055453181267 +Loss at step 100: 0.03364503011107445 +Loss at step 150: 0.06648823618888855 +Loss at step 200: 0.032441750168800354 +Loss at step 250: 0.03592945635318756 +Loss at step 300: 0.03565963730216026 +Loss at step 350: 0.03935347869992256 +Loss at step 400: 0.05051932856440544 +Loss at step 450: 0.04962983727455139 +Loss at step 500: 0.051245179027318954 +Loss at step 550: 0.03936230018734932 +Loss at step 600: 0.04920635372400284 +Loss at step 650: 0.03439559042453766 +Loss at step 700: 0.044927652925252914 +Loss at step 750: 0.05031128227710724 +Loss at step 800: 0.03805975615978241 +Loss at step 850: 0.03496336564421654 +Loss at step 900: 0.04957449063658714 +Mean training loss after epoch 173: 0.03956725788928235 + +EPOCH: 174 +Loss at step 0: 0.027480291202664375 +Loss at step 50: 0.03697388619184494 +Loss at step 100: 0.03777739405632019 +Loss at step 150: 0.03281679376959801 +Loss at step 200: 0.03794809803366661 +Loss at step 250: 0.0320734940469265 +Loss at step 300: 0.0341799259185791 +Loss at step 350: 0.051777079701423645 +Loss at step 400: 0.0558072105050087 +Loss at step 450: 0.033157266676425934 +Loss at step 500: 0.04589252918958664 +Loss at step 550: 0.030640888959169388 +Loss at step 600: 0.032301343977451324 +Loss at step 650: 0.03360531106591225 +Loss at step 700: 0.03500358387827873 +Loss at step 750: 0.0347106046974659 +Loss at step 800: 0.028394857421517372 +Loss at step 850: 0.03694157674908638 +Loss at step 900: 0.04794781282544136 +Mean training loss after epoch 174: 0.03932879738279307 + +EPOCH: 175 +Loss at step 0: 0.03555794060230255 +Loss at step 50: 0.02648364193737507 +Loss at step 100: 0.03459541127085686 +Loss at step 150: 0.03992311656475067 +Loss at step 200: 0.024893423542380333 +Loss at step 250: 0.047270677983760834 +Loss at step 300: 0.03519446775317192 +Loss at step 350: 0.09180052578449249 +Loss at step 400: 0.02917756326496601 +Loss at step 450: 0.034309279173612595 +Loss at step 500: 0.03961813449859619 +Loss at step 550: 0.04994247853755951 +Loss at step 600: 0.04075918719172478 +Loss at step 650: 0.031336862593889236 +Loss at step 700: 0.03452292084693909 +Loss at step 750: 0.05241398140788078 +Loss at step 800: 0.027623998001217842 +Loss at step 850: 0.02912183292210102 +Loss at step 900: 0.03710698336362839 +Mean training loss after epoch 175: 0.039300479576674735 + +EPOCH: 176 +Loss at step 0: 0.036183230578899384 +Loss at step 50: 0.0666927918791771 +Loss at step 100: 0.0493197925388813 +Loss at step 150: 0.03803710639476776 +Loss at step 200: 0.03835935518145561 +Loss at step 250: 0.031230589374899864 +Loss at step 300: 0.03340180218219757 +Loss at step 350: 0.040427062660455704 +Loss at step 400: 0.03160826861858368 +Loss at step 450: 0.044037673622369766 +Loss at step 500: 0.03656746819615364 +Loss at step 550: 0.036755748093128204 +Loss at step 600: 0.059527214616537094 +Loss at step 650: 0.040880002081394196 +Loss at step 700: 0.051435764878988266 +Loss at step 750: 0.059903018176555634 +Loss at step 800: 0.03034154139459133 +Loss at step 850: 0.037557195872068405 +Loss at step 900: 0.034874048084020615 +Mean training loss after epoch 176: 0.040028610705598586 + +EPOCH: 177 +Loss at step 0: 0.03473574295639992 +Loss at step 50: 0.05093587562441826 +Loss at step 100: 0.02982262894511223 +Loss at step 150: 0.030545443296432495 +Loss at step 200: 0.03678588569164276 +Loss at step 250: 0.036256153136491776 +Loss at step 300: 0.04075793921947479 +Loss at step 350: 0.03677338734269142 +Loss at step 400: 0.03143291920423508 +Loss at step 450: 0.04609094187617302 +Loss at step 500: 0.04759456589818001 +Loss at step 550: 0.03229653090238571 +Loss at step 600: 0.037571556866168976 +Loss at step 650: 0.03082422725856304 +Loss at step 700: 0.06481219828128815 +Loss at step 750: 0.06181209906935692 +Loss at step 800: 0.036778099834918976 +Loss at step 850: 0.05306657403707504 +Loss at step 900: 0.027700865641236305 +Mean training loss after epoch 177: 0.03933695467042008 + +EPOCH: 178 +Loss at step 0: 0.03726204112172127 +Loss at step 50: 0.03066483698785305 +Loss at step 100: 0.03189198300242424 +Loss at step 150: 0.03496934846043587 +Loss at step 200: 0.03675440698862076 +Loss at step 250: 0.05385400727391243 +Loss at step 300: 0.06269004940986633 +Loss at step 350: 0.0388777069747448 +Loss at step 400: 0.040307316929101944 +Loss at step 450: 0.04561549797654152 +Loss at step 500: 0.032421860843896866 +Loss at step 550: 0.043146658688783646 +Loss at step 600: 0.04828475043177605 +Loss at step 650: 0.03869273141026497 +Loss at step 700: 0.027358677238225937 +Loss at step 750: 0.05097721889615059 +Loss at step 800: 0.03179521858692169 +Loss at step 850: 0.029768288135528564 +Loss at step 900: 0.03665889427065849 +Mean training loss after epoch 178: 0.03920851013402758 + +EPOCH: 179 +Loss at step 0: 0.0432792492210865 +Loss at step 50: 0.03617312014102936 +Loss at step 100: 0.06370623409748077 +Loss at step 150: 0.03260226547718048 +Loss at step 200: 0.0277892854064703 +Loss at step 250: 0.032151203602552414 +Loss at step 300: 0.03364957869052887 +Loss at step 350: 0.036430612206459045 +Loss at step 400: 0.02578619495034218 +Loss at step 450: 0.03799568489193916 +Loss at step 500: 0.030682628974318504 +Loss at step 550: 0.030547630041837692 +Loss at step 600: 0.032164983451366425 +Loss at step 650: 0.048926692456007004 +Loss at step 700: 0.03040003776550293 +Loss at step 750: 0.04074382036924362 +Loss at step 800: 0.03304985165596008 +Loss at step 850: 0.046545110642910004 +Loss at step 900: 0.035846710205078125 +Mean training loss after epoch 179: 0.03970178106088819 + +EPOCH: 180 +Loss at step 0: 0.041110847145318985 +Loss at step 50: 0.03983008489012718 +Loss at step 100: 0.038944534957408905 +Loss at step 150: 0.03114279918372631 +Loss at step 200: 0.035370562225580215 +Loss at step 250: 0.05519220605492592 +Loss at step 300: 0.049281731247901917 +Loss at step 350: 0.039435915648937225 +Loss at step 400: 0.046951428055763245 +Loss at step 450: 0.03373648226261139 +Loss at step 500: 0.034319255501031876 +Loss at step 550: 0.0509195514023304 +Loss at step 600: 0.0303181242197752 +Loss at step 650: 0.055917058140039444 +Loss at step 700: 0.03461319953203201 +Loss at step 750: 0.07615417242050171 +Loss at step 800: 0.08306694775819778 +Loss at step 850: 0.039755940437316895 +Loss at step 900: 0.05004990100860596 +Mean training loss after epoch 180: 0.04022076416776569 + +EPOCH: 181 +Loss at step 0: 0.029874665662646294 +Loss at step 50: 0.029927344992756844 +Loss at step 100: 0.03604910150170326 +Loss at step 150: 0.03975125402212143 +Loss at step 200: 0.03120443783700466 +Loss at step 250: 0.04868396371603012 +Loss at step 300: 0.049526773393154144 +Loss at step 350: 0.03383695334196091 +Loss at step 400: 0.034840282052755356 +Loss at step 450: 0.05584263056516647 +Loss at step 500: 0.0310691948980093 +Loss at step 550: 0.04415060952305794 +Loss at step 600: 0.032994214445352554 +Loss at step 650: 0.03248259797692299 +Loss at step 700: 0.03337256610393524 +Loss at step 750: 0.035838667303323746 +Loss at step 800: 0.050576042383909225 +Loss at step 850: 0.06149858236312866 +Loss at step 900: 0.035496558994054794 +Mean training loss after epoch 181: 0.03953436933267218 + +EPOCH: 182 +Loss at step 0: 0.03431776538491249 +Loss at step 50: 0.04044733941555023 +Loss at step 100: 0.049678489565849304 +Loss at step 150: 0.052443064749240875 +Loss at step 200: 0.03474302589893341 +Loss at step 250: 0.03703489899635315 +Loss at step 300: 0.04352117329835892 +Loss at step 350: 0.03962820768356323 +Loss at step 400: 0.037225913256406784 +Loss at step 450: 0.05509385094046593 +Loss at step 500: 0.03221511468291283 +Loss at step 550: 0.04929589107632637 +Loss at step 600: 0.0626567006111145 +Loss at step 650: 0.02498018741607666 +Loss at step 700: 0.03269823268055916 +Loss at step 750: 0.04034951329231262 +Loss at step 800: 0.038959357887506485 +Loss at step 850: 0.045052941888570786 +Loss at step 900: 0.030315130949020386 +Mean training loss after epoch 182: 0.03948396699292573 + +EPOCH: 183 +Loss at step 0: 0.05397326871752739 +Loss at step 50: 0.03509332984685898 +Loss at step 100: 0.04604775458574295 +Loss at step 150: 0.03923487663269043 +Loss at step 200: 0.04648161679506302 +Loss at step 250: 0.07260828465223312 +Loss at step 300: 0.04022159054875374 +Loss at step 350: 0.04950942099094391 +Loss at step 400: 0.05045710504055023 +Loss at step 450: 0.0487130731344223 +Loss at step 500: 0.03563311696052551 +Loss at step 550: 0.06734095513820648 +Loss at step 600: 0.031074846163392067 +Loss at step 650: 0.05064196512103081 +Loss at step 700: 0.06535758823156357 +Loss at step 750: 0.04022606834769249 +Loss at step 800: 0.0402403250336647 +Loss at step 850: 0.035112425684928894 +Loss at step 900: 0.048231590539216995 +Mean training loss after epoch 183: 0.03999095734979298 + +EPOCH: 184 +Loss at step 0: 0.06572314351797104 +Loss at step 50: 0.040734417736530304 +Loss at step 100: 0.033031612634658813 +Loss at step 150: 0.04663529992103577 +Loss at step 200: 0.028599590063095093 +Loss at step 250: 0.03933103755116463 +Loss at step 300: 0.029691778123378754 +Loss at step 350: 0.034657254815101624 +Loss at step 400: 0.0519569106400013 +Loss at step 450: 0.04233112558722496 +Loss at step 500: 0.03395779803395271 +Loss at step 550: 0.036949314177036285 +Loss at step 600: 0.036554135382175446 +Loss at step 650: 0.030880143865942955 +Loss at step 700: 0.044782429933547974 +Loss at step 750: 0.04911411926150322 +Loss at step 800: 0.037866562604904175 +Loss at step 850: 0.06262623518705368 +Loss at step 900: 0.03039371594786644 +Mean training loss after epoch 184: 0.039548526968814925 + +EPOCH: 185 +Loss at step 0: 0.0805806964635849 +Loss at step 50: 0.027917854487895966 +Loss at step 100: 0.03495066240429878 +Loss at step 150: 0.027538307011127472 +Loss at step 200: 0.03303145989775658 +Loss at step 250: 0.031799983233213425 +Loss at step 300: 0.06004394590854645 +Loss at step 350: 0.030346397310495377 +Loss at step 400: 0.031851932406425476 +Loss at step 450: 0.05824219435453415 +Loss at step 500: 0.028569376096129417 +Loss at step 550: 0.027987563982605934 +Loss at step 600: 0.042291879653930664 +Loss at step 650: 0.03323879837989807 +Loss at step 700: 0.059478759765625 +Loss at step 750: 0.03162425383925438 +Loss at step 800: 0.033148087561130524 +Loss at step 850: 0.03013218380510807 +Loss at step 900: 0.0303540900349617 +Mean training loss after epoch 185: 0.03991742543518734 + +EPOCH: 186 +Loss at step 0: 0.03823243826627731 +Loss at step 50: 0.04821484163403511 +Loss at step 100: 0.0335773304104805 +Loss at step 150: 0.03713402897119522 +Loss at step 200: 0.059686921536922455 +Loss at step 250: 0.030698001384735107 +Loss at step 300: 0.05005711689591408 +Loss at step 350: 0.03683238849043846 +Loss at step 400: 0.031736306846141815 +Loss at step 450: 0.02648918889462948 +Loss at step 500: 0.03295756131410599 +Loss at step 550: 0.055000800639390945 +Loss at step 600: 0.03838678076863289 +Loss at step 650: 0.03734767809510231 +Loss at step 700: 0.034189265221357346 +Loss at step 750: 0.031962718814611435 +Loss at step 800: 0.04328812658786774 +Loss at step 850: 0.04030853882431984 +Loss at step 900: 0.030331796035170555 +Mean training loss after epoch 186: 0.04037082401602698 + +EPOCH: 187 +Loss at step 0: 0.03479158505797386 +Loss at step 50: 0.027288708835840225 +Loss at step 100: 0.03628791123628616 +Loss at step 150: 0.04474860802292824 +Loss at step 200: 0.046368908137083054 +Loss at step 250: 0.03773191571235657 +Loss at step 300: 0.03852430358529091 +Loss at step 350: 0.043728701770305634 +Loss at step 400: 0.03673646226525307 +Loss at step 450: 0.04937160760164261 +Loss at step 500: 0.03505728766322136 +Loss at step 550: 0.03285621479153633 +Loss at step 600: 0.03919614851474762 +Loss at step 650: 0.03945038095116615 +Loss at step 700: 0.03538677096366882 +Loss at step 750: 0.044934406876564026 +Loss at step 800: 0.03320298716425896 +Loss at step 850: 0.04768715798854828 +Loss at step 900: 0.03354670852422714 +Mean training loss after epoch 187: 0.039411708287624664 + +EPOCH: 188 +Loss at step 0: 0.03753814473748207 +Loss at step 50: 0.04097013920545578 +Loss at step 100: 0.030687859281897545 +Loss at step 150: 0.0322105698287487 +Loss at step 200: 0.03676756098866463 +Loss at step 250: 0.038250818848609924 +Loss at step 300: 0.03754312917590141 +Loss at step 350: 0.03725780174136162 +Loss at step 400: 0.047191228717565536 +Loss at step 450: 0.06354070454835892 +Loss at step 500: 0.032046183943748474 +Loss at step 550: 0.04977399855852127 +Loss at step 600: 0.036721959710121155 +Loss at step 650: 0.05434718355536461 +Loss at step 700: 0.038213517516851425 +Loss at step 750: 0.024722011759877205 +Loss at step 800: 0.02971751242876053 +Loss at step 850: 0.0317879281938076 +Loss at step 900: 0.04744962230324745 +Mean training loss after epoch 188: 0.04035358981235322 + +EPOCH: 189 +Loss at step 0: 0.040479883551597595 +Loss at step 50: 0.036622148007154465 +Loss at step 100: 0.034205641597509384 +Loss at step 150: 0.04935614764690399 +Loss at step 200: 0.029448114335536957 +Loss at step 250: 0.03502771258354187 +Loss at step 300: 0.05513657256960869 +Loss at step 350: 0.04774866998195648 +Loss at step 400: 0.0404169000685215 +Loss at step 450: 0.06562421470880508 +Loss at step 500: 0.035340260714292526 +Loss at step 550: 0.0556841604411602 +Loss at step 600: 0.030720310285687447 +Loss at step 650: 0.036314480006694794 +Loss at step 700: 0.03438333421945572 +Loss at step 750: 0.033112671226263046 +Loss at step 800: 0.03592928498983383 +Loss at step 850: 0.03132610768079758 +Loss at step 900: 0.03413686528801918 +Mean training loss after epoch 189: 0.03904189117379916 + +EPOCH: 190 +Loss at step 0: 0.04827755317091942 +Loss at step 50: 0.029142968356609344 +Loss at step 100: 0.036558929830789566 +Loss at step 150: 0.0487666018307209 +Loss at step 200: 0.052352845668792725 +Loss at step 250: 0.05458885058760643 +Loss at step 300: 0.03482016548514366 +Loss at step 350: 0.029485084116458893 +Loss at step 400: 0.039250362664461136 +Loss at step 450: 0.051643602550029755 +Loss at step 500: 0.03167468681931496 +Loss at step 550: 0.04167293757200241 +Loss at step 600: 0.03159556910395622 +Loss at step 650: 0.04445411637425423 +Loss at step 700: 0.04958534613251686 +Loss at step 750: 0.03268647566437721 +Loss at step 800: 0.06649065017700195 +Loss at step 850: 0.06429065763950348 +Loss at step 900: 0.04084879532456398 +Mean training loss after epoch 190: 0.03958941305250819 + +EPOCH: 191 +Loss at step 0: 0.035234544426202774 +Loss at step 50: 0.037936512380838394 +Loss at step 100: 0.043479036539793015 +Loss at step 150: 0.03322908282279968 +Loss at step 200: 0.049596164375543594 +Loss at step 250: 0.04965569078922272 +Loss at step 300: 0.0495617538690567 +Loss at step 350: 0.03661232441663742 +Loss at step 400: 0.03508910536766052 +Loss at step 450: 0.036798376590013504 +Loss at step 500: 0.054994553327560425 +Loss at step 550: 0.03120352514088154 +Loss at step 600: 0.048640795052051544 +Loss at step 650: 0.0348392091691494 +Loss at step 700: 0.055915024131536484 +Loss at step 750: 0.05253498628735542 +Loss at step 800: 0.031080756336450577 +Loss at step 850: 0.04723038524389267 +Loss at step 900: 0.03465425223112106 +Mean training loss after epoch 191: 0.039508505227929876 + +EPOCH: 192 +Loss at step 0: 0.028439735993742943 +Loss at step 50: 0.04162871092557907 +Loss at step 100: 0.031034814193844795 +Loss at step 150: 0.05161105841398239 +Loss at step 200: 0.03534523397684097 +Loss at step 250: 0.03421344980597496 +Loss at step 300: 0.03441846743226051 +Loss at step 350: 0.05163728818297386 +Loss at step 400: 0.038883548229932785 +Loss at step 450: 0.031683698296546936 +Loss at step 500: 0.044283077120780945 +Loss at step 550: 0.0333872064948082 +Loss at step 600: 0.03577182814478874 +Loss at step 650: 0.035094004124403 +Loss at step 700: 0.052114710211753845 +Loss at step 750: 0.028549348935484886 +Loss at step 800: 0.031279418617486954 +Loss at step 850: 0.04974686726927757 +Loss at step 900: 0.06214940547943115 +Mean training loss after epoch 192: 0.03946408471747884 + +EPOCH: 193 +Loss at step 0: 0.02789539098739624 +Loss at step 50: 0.04128626734018326 +Loss at step 100: 0.031125077977776527 +Loss at step 150: 0.054317064583301544 +Loss at step 200: 0.035284653306007385 +Loss at step 250: 0.030020158737897873 +Loss at step 300: 0.0426630899310112 +Loss at step 350: 0.02814929373562336 +Loss at step 400: 0.03576153889298439 +Loss at step 450: 0.030557071790099144 +Loss at step 500: 0.07169098407030106 +Loss at step 550: 0.046338628977537155 +Loss at step 600: 0.07676282525062561 +Loss at step 650: 0.027175115421414375 +Loss at step 700: 0.03226439654827118 +Loss at step 750: 0.05648811534047127 +Loss at step 800: 0.05417701229453087 +Loss at step 850: 0.034749649465084076 +Loss at step 900: 0.05131750926375389 +Mean training loss after epoch 193: 0.0390426356639308 + +EPOCH: 194 +Loss at step 0: 0.031341664493083954 +Loss at step 50: 0.034224752336740494 +Loss at step 100: 0.030920159071683884 +Loss at step 150: 0.03192133828997612 +Loss at step 200: 0.05085880681872368 +Loss at step 250: 0.030830208212137222 +Loss at step 300: 0.032128382474184036 +Loss at step 350: 0.03746500611305237 +Loss at step 400: 0.03606203570961952 +Loss at step 450: 0.034037210047245026 +Loss at step 500: 0.032156843692064285 +Loss at step 550: 0.05800786241889 +Loss at step 600: 0.03398191183805466 +Loss at step 650: 0.034924451261758804 +Loss at step 700: 0.03250403329730034 +Loss at step 750: 0.02987152710556984 +Loss at step 800: 0.03198182210326195 +Loss at step 850: 0.03203301131725311 +Loss at step 900: 0.027829548344016075 +Mean training loss after epoch 194: 0.03956125608123124 + +EPOCH: 195 +Loss at step 0: 0.03269430249929428 +Loss at step 50: 0.040413130074739456 +Loss at step 100: 0.037335216999053955 +Loss at step 150: 0.037506330758333206 +Loss at step 200: 0.04935978725552559 +Loss at step 250: 0.04005909338593483 +Loss at step 300: 0.051902711391448975 +Loss at step 350: 0.04994789883494377 +Loss at step 400: 0.06731879711151123 +Loss at step 450: 0.041173528879880905 +Loss at step 500: 0.03356955945491791 +Loss at step 550: 0.03218766674399376 +Loss at step 600: 0.03400886803865433 +Loss at step 650: 0.046790432184934616 +Loss at step 700: 0.0261378213763237 +Loss at step 750: 0.04084371030330658 +Loss at step 800: 0.03878629207611084 +Loss at step 850: 0.031192457303404808 +Loss at step 900: 0.04036341607570648 +Mean training loss after epoch 195: 0.03942633575714156 + +EPOCH: 196 +Loss at step 0: 0.05267094075679779 +Loss at step 50: 0.051652032881975174 +Loss at step 100: 0.030373184010386467 +Loss at step 150: 0.04002588614821434 +Loss at step 200: 0.033278606832027435 +Loss at step 250: 0.03507424145936966 +Loss at step 300: 0.037266410887241364 +Loss at step 350: 0.039247218519449234 +Loss at step 400: 0.029236620292067528 +Loss at step 450: 0.03595879673957825 +Loss at step 500: 0.042438700795173645 +Loss at step 550: 0.04551468789577484 +Loss at step 600: 0.04240725561976433 +Loss at step 650: 0.03342801705002785 +Loss at step 700: 0.0306172464042902 +Loss at step 750: 0.03251747041940689 +Loss at step 800: 0.037676624953746796 +Loss at step 850: 0.03884509578347206 +Loss at step 900: 0.05178022384643555 +Mean training loss after epoch 196: 0.03961125413166371 + +EPOCH: 197 +Loss at step 0: 0.03382141515612602 +Loss at step 50: 0.036528680473566055 +Loss at step 100: 0.03256684169173241 +Loss at step 150: 0.04024352878332138 +Loss at step 200: 0.03290350362658501 +Loss at step 250: 0.05601493641734123 +Loss at step 300: 0.03842312470078468 +Loss at step 350: 0.03384149447083473 +Loss at step 400: 0.03558668866753578 +Loss at step 450: 0.03347795829176903 +Loss at step 500: 0.03716802969574928 +Loss at step 550: 0.03866909071803093 +Loss at step 600: 0.04315279796719551 +Loss at step 650: 0.030964933335781097 +Loss at step 700: 0.031178439036011696 +Loss at step 750: 0.030622651800513268 +Loss at step 800: 0.03555544838309288 +Loss at step 850: 0.04361952841281891 +Loss at step 900: 0.029841801151633263 +Mean training loss after epoch 197: 0.03926740740058519 + +EPOCH: 198 +Loss at step 0: 0.03077380359172821 +Loss at step 50: 0.0331268385052681 +Loss at step 100: 0.028607361018657684 +Loss at step 150: 0.027219180017709732 +Loss at step 200: 0.03859898820519447 +Loss at step 250: 0.043153245002031326 +Loss at step 300: 0.037560559809207916 +Loss at step 350: 0.03113088570535183 +Loss at step 400: 0.05178512632846832 +Loss at step 450: 0.03197965398430824 +Loss at step 500: 0.036399465054273605 +Loss at step 550: 0.039263416081666946 +Loss at step 600: 0.03154432773590088 +Loss at step 650: 0.033167921006679535 +Loss at step 700: 0.03971502184867859 +Loss at step 750: 0.029375536367297173 +Loss at step 800: 0.08838345855474472 +Loss at step 850: 0.054228276014328 +Loss at step 900: 0.034299567341804504 +Mean training loss after epoch 198: 0.039270908790213596 + +EPOCH: 199 +Loss at step 0: 0.032876938581466675 +Loss at step 50: 0.04957316815853119 +Loss at step 100: 0.04838715121150017 +Loss at step 150: 0.06222357973456383 +Loss at step 200: 0.03767170011997223 +Loss at step 250: 0.03373800590634346 +Loss at step 300: 0.03463589772582054 +Loss at step 350: 0.036685265600681305 +Loss at step 400: 0.037816766649484634 +Loss at step 450: 0.04994241148233414 +Loss at step 500: 0.05309994891285896 +Loss at step 550: 0.050136446952819824 +Loss at step 600: 0.034272655844688416 +Loss at step 650: 0.04841490089893341 +Loss at step 700: 0.032551348209381104 +Loss at step 750: 0.04803447797894478 +Loss at step 800: 0.027424441650509834 +Loss at step 850: 0.03509342297911644 +Loss at step 900: 0.04698752239346504 +Mean training loss after epoch 199: 0.039368446850811624 + +EPOCH: 200 +Loss at step 0: 0.033516522496938705 +Loss at step 50: 0.033314839005470276 +Loss at step 100: 0.028926312923431396 +Loss at step 150: 0.045188531279563904 +Loss at step 200: 0.03707105666399002 +Loss at step 250: 0.04511125758290291 +Loss at step 300: 0.041642624884843826 +Loss at step 350: 0.03727297484874725 +Loss at step 400: 0.04820782691240311 +Loss at step 450: 0.05873133987188339 +Loss at step 500: 0.03943607211112976 +Loss at step 550: 0.03903554007411003 +Loss at step 600: 0.03445577993988991 +Loss at step 650: 0.03482406958937645 +Loss at step 700: 0.03471191227436066 +Loss at step 750: 0.0357685312628746 +Loss at step 800: 0.046298760920763016 +Loss at step 850: 0.03532588854432106 +Loss at step 900: 0.04996367171406746 +Mean training loss after epoch 200: 0.03945373677646618 + +EPOCH: 201 +Loss at step 0: 0.035214416682720184 +Loss at step 50: 0.035346150398254395 +Loss at step 100: 0.034103572368621826 +Loss at step 150: 0.038458410650491714 +Loss at step 200: 0.038615066558122635 +Loss at step 250: 0.03179283067584038 +Loss at step 300: 0.035539235919713974 +Loss at step 350: 0.03436723351478577 +Loss at step 400: 0.05592014268040657 +Loss at step 450: 0.02996472455561161 +Loss at step 500: 0.03577533736824989 +Loss at step 550: 0.025224490091204643 +Loss at step 600: 0.03710426017642021 +Loss at step 650: 0.04781566187739372 +Loss at step 700: 0.03749268129467964 +Loss at step 750: 0.03389711305499077 +Loss at step 800: 0.03144014626741409 +Loss at step 850: 0.038093969225883484 +Loss at step 900: 0.04576469212770462 +Mean training loss after epoch 201: 0.03969399575620636 + +EPOCH: 202 +Loss at step 0: 0.0476631335914135 +Loss at step 50: 0.031282223761081696 +Loss at step 100: 0.044516369700431824 +Loss at step 150: 0.06186264380812645 +Loss at step 200: 0.0393553301692009 +Loss at step 250: 0.03279627859592438 +Loss at step 300: 0.0629049614071846 +Loss at step 350: 0.030579999089241028 +Loss at step 400: 0.031155675649642944 +Loss at step 450: 0.06583017855882645 +Loss at step 500: 0.05303068459033966 +Loss at step 550: 0.036140043288469315 +Loss at step 600: 0.03231840208172798 +Loss at step 650: 0.05249636247754097 +Loss at step 700: 0.051017142832279205 +Loss at step 750: 0.04280295595526695 +Loss at step 800: 0.03303103521466255 +Loss at step 850: 0.03351632133126259 +Loss at step 900: 0.038658663630485535 +Mean training loss after epoch 202: 0.03924750817268451 + +EPOCH: 203 +Loss at step 0: 0.04230416938662529 +Loss at step 50: 0.03197874128818512 +Loss at step 100: 0.034516096115112305 +Loss at step 150: 0.0965784564614296 +Loss at step 200: 0.036621060222387314 +Loss at step 250: 0.033483728766441345 +Loss at step 300: 0.05883985385298729 +Loss at step 350: 0.03732655197381973 +Loss at step 400: 0.03222108259797096 +Loss at step 450: 0.0372038297355175 +Loss at step 500: 0.031740378588438034 +Loss at step 550: 0.032624147832393646 +Loss at step 600: 0.035175345838069916 +Loss at step 650: 0.0395626574754715 +Loss at step 700: 0.04586372524499893 +Loss at step 750: 0.032561711966991425 +Loss at step 800: 0.06468833237886429 +Loss at step 850: 0.03629869595170021 +Loss at step 900: 0.030181996524333954 +Mean training loss after epoch 203: 0.03954556779161509 + +EPOCH: 204 +Loss at step 0: 0.029719999060034752 +Loss at step 50: 0.03435388207435608 +Loss at step 100: 0.035306770354509354 +Loss at step 150: 0.04551118612289429 +Loss at step 200: 0.03435719758272171 +Loss at step 250: 0.034615252166986465 +Loss at step 300: 0.030884413048624992 +Loss at step 350: 0.050709690898656845 +Loss at step 400: 0.038177862763404846 +Loss at step 450: 0.031270384788513184 +Loss at step 500: 0.035266853868961334 +Loss at step 550: 0.07006863504648209 +Loss at step 600: 0.03182707354426384 +Loss at step 650: 0.05542439594864845 +Loss at step 700: 0.04853573441505432 +Loss at step 750: 0.027383018285036087 +Loss at step 800: 0.03046734444797039 +Loss at step 850: 0.04207098111510277 +Loss at step 900: 0.03459654748439789 +Mean training loss after epoch 204: 0.03929966339456247 + +EPOCH: 205 +Loss at step 0: 0.0312785767018795 +Loss at step 50: 0.036261215806007385 +Loss at step 100: 0.04925072193145752 +Loss at step 150: 0.04340329393744469 +Loss at step 200: 0.039639346301555634 +Loss at step 250: 0.03776752948760986 +Loss at step 300: 0.042972952127456665 +Loss at step 350: 0.030083203688263893 +Loss at step 400: 0.04385426640510559 +Loss at step 450: 0.0570792555809021 +Loss at step 500: 0.05148709565401077 +Loss at step 550: 0.0342877060174942 +Loss at step 600: 0.037329480051994324 +Loss at step 650: 0.03023405931890011 +Loss at step 700: 0.027055177837610245 +Loss at step 750: 0.026842977851629257 +Loss at step 800: 0.050824299454689026 +Loss at step 850: 0.032287370413541794 +Loss at step 900: 0.02937036007642746 +Mean training loss after epoch 205: 0.03916449878197997 + +EPOCH: 206 +Loss at step 0: 0.03259656950831413 +Loss at step 50: 0.04985092580318451 +Loss at step 100: 0.02949802204966545 +Loss at step 150: 0.036611951887607574 +Loss at step 200: 0.039992108941078186 +Loss at step 250: 0.0345536433160305 +Loss at step 300: 0.032152608036994934 +Loss at step 350: 0.04637967050075531 +Loss at step 400: 0.02919534221291542 +Loss at step 450: 0.031057648360729218 +Loss at step 500: 0.03315690532326698 +Loss at step 550: 0.05003923177719116 +Loss at step 600: 0.045779258012771606 +Loss at step 650: 0.02811567671597004 +Loss at step 700: 0.038607172667980194 +Loss at step 750: 0.04060652107000351 +Loss at step 800: 0.0453539676964283 +Loss at step 850: 0.035857975482940674 +Loss at step 900: 0.031072260811924934 +Mean training loss after epoch 206: 0.038921635300079895 + +EPOCH: 207 +Loss at step 0: 0.03965787962079048 +Loss at step 50: 0.0446898527443409 +Loss at step 100: 0.028009677305817604 +Loss at step 150: 0.038405463099479675 +Loss at step 200: 0.06563003361225128 +Loss at step 250: 0.03391384705901146 +Loss at step 300: 0.04727596417069435 +Loss at step 350: 0.03483771160244942 +Loss at step 400: 0.036268409341573715 +Loss at step 450: 0.030389249324798584 +Loss at step 500: 0.0310251172631979 +Loss at step 550: 0.0379030704498291 +Loss at step 600: 0.040424078702926636 +Loss at step 650: 0.0521022230386734 +Loss at step 700: 0.033831410109996796 +Loss at step 750: 0.033530574291944504 +Loss at step 800: 0.036986444145441055 +Loss at step 850: 0.032420702278614044 +Loss at step 900: 0.03470515087246895 +Mean training loss after epoch 207: 0.03863498132858576 + +EPOCH: 208 +Loss at step 0: 0.033557191491127014 +Loss at step 50: 0.03412749990820885 +Loss at step 100: 0.048487868160009384 +Loss at step 150: 0.05572287738323212 +Loss at step 200: 0.04506176337599754 +Loss at step 250: 0.06600682437419891 +Loss at step 300: 0.05004393309354782 +Loss at step 350: 0.05113781616091728 +Loss at step 400: 0.03928277641534805 +Loss at step 450: 0.056767385452985764 +Loss at step 500: 0.04085297882556915 +Loss at step 550: 0.0346493199467659 +Loss at step 600: 0.031214237213134766 +Loss at step 650: 0.04923402890563011 +Loss at step 700: 0.06293520331382751 +Loss at step 750: 0.03206819295883179 +Loss at step 800: 0.037167709320783615 +Loss at step 850: 0.03659123182296753 +Loss at step 900: 0.06300326436758041 +Mean training loss after epoch 208: 0.03929860922279579 + +EPOCH: 209 +Loss at step 0: 0.03361137583851814 +Loss at step 50: 0.026428446173667908 +Loss at step 100: 0.034164272248744965 +Loss at step 150: 0.0364573635160923 +Loss at step 200: 0.041490085422992706 +Loss at step 250: 0.05014254152774811 +Loss at step 300: 0.04537137597799301 +Loss at step 350: 0.03958693891763687 +Loss at step 400: 0.03602161258459091 +Loss at step 450: 0.028751106932759285 +Loss at step 500: 0.04826076701283455 +Loss at step 550: 0.02965088002383709 +Loss at step 600: 0.03778247535228729 +Loss at step 650: 0.05167883262038231 +Loss at step 700: 0.029958970844745636 +Loss at step 750: 0.028692517429590225 +Loss at step 800: 0.029456930235028267 +Loss at step 850: 0.04437252879142761 +Loss at step 900: 0.03329407423734665 +Mean training loss after epoch 209: 0.03891603271367707 + +EPOCH: 210 +Loss at step 0: 0.034966133534908295 +Loss at step 50: 0.03536030650138855 +Loss at step 100: 0.07317616790533066 +Loss at step 150: 0.0346195213496685 +Loss at step 200: 0.027271442115306854 +Loss at step 250: 0.024927347898483276 +Loss at step 300: 0.031957872211933136 +Loss at step 350: 0.033988021314144135 +Loss at step 400: 0.0324539951980114 +Loss at step 450: 0.027275538071990013 +Loss at step 500: 0.034581758081912994 +Loss at step 550: 0.05167417973279953 +Loss at step 600: 0.03554034233093262 +Loss at step 650: 0.03346965089440346 +Loss at step 700: 0.03680897504091263 +Loss at step 750: 0.03390275314450264 +Loss at step 800: 0.03475474938750267 +Loss at step 850: 0.03505280613899231 +Loss at step 900: 0.030334360897541046 +Mean training loss after epoch 210: 0.03880712026352885 + +EPOCH: 211 +Loss at step 0: 0.036252912133932114 +Loss at step 50: 0.039842527359724045 +Loss at step 100: 0.05772003158926964 +Loss at step 150: 0.03693200275301933 +Loss at step 200: 0.04155736789107323 +Loss at step 250: 0.07334703207015991 +Loss at step 300: 0.03494369238615036 +Loss at step 350: 0.03264397382736206 +Loss at step 400: 0.029918232932686806 +Loss at step 450: 0.02940177358686924 +Loss at step 500: 0.03305615857243538 +Loss at step 550: 0.038510505110025406 +Loss at step 600: 0.03817708045244217 +Loss at step 650: 0.06494858860969543 +Loss at step 700: 0.030305013060569763 +Loss at step 750: 0.031748753041028976 +Loss at step 800: 0.03335447981953621 +Loss at step 850: 0.029417529702186584 +Loss at step 900: 0.03409936651587486 +Mean training loss after epoch 211: 0.03944876101582861 + +EPOCH: 212 +Loss at step 0: 0.03463529050350189 +Loss at step 50: 0.04601624608039856 +Loss at step 100: 0.03157833218574524 +Loss at step 150: 0.036684878170490265 +Loss at step 200: 0.02828829362988472 +Loss at step 250: 0.032570887356996536 +Loss at step 300: 0.03535538911819458 +Loss at step 350: 0.03101705014705658 +Loss at step 400: 0.04566393792629242 +Loss at step 450: 0.03168395534157753 +Loss at step 500: 0.0468168742954731 +Loss at step 550: 0.03592550754547119 +Loss at step 600: 0.030794337391853333 +Loss at step 650: 0.039325911551713943 +Loss at step 700: 0.03656806796789169 +Loss at step 750: 0.03873779997229576 +Loss at step 800: 0.03337247669696808 +Loss at step 850: 0.05102049559354782 +Loss at step 900: 0.03313032537698746 +Mean training loss after epoch 212: 0.038925153630247504 + +EPOCH: 213 +Loss at step 0: 0.03312905132770538 +Loss at step 50: 0.034856270998716354 +Loss at step 100: 0.03638290613889694 +Loss at step 150: 0.030590932816267014 +Loss at step 200: 0.03178534656763077 +Loss at step 250: 0.03628033027052879 +Loss at step 300: 0.03542640805244446 +Loss at step 350: 0.03868675231933594 +Loss at step 400: 0.03446945920586586 +Loss at step 450: 0.031632207334041595 +Loss at step 500: 0.03109968639910221 +Loss at step 550: 0.03217053413391113 +Loss at step 600: 0.031699612736701965 +Loss at step 650: 0.031050097197294235 +Loss at step 700: 0.03135169669985771 +Loss at step 750: 0.05426996573805809 +Loss at step 800: 0.031804606318473816 +Loss at step 850: 0.037588391453027725 +Loss at step 900: 0.058043185621500015 +Mean training loss after epoch 213: 0.038873043518934426 + +EPOCH: 214 +Loss at step 0: 0.028386587277054787 +Loss at step 50: 0.050295133143663406 +Loss at step 100: 0.046243395656347275 +Loss at step 150: 0.03635299205780029 +Loss at step 200: 0.032567527145147324 +Loss at step 250: 0.026220694184303284 +Loss at step 300: 0.029877593740820885 +Loss at step 350: 0.036172349005937576 +Loss at step 400: 0.028854016214609146 +Loss at step 450: 0.033716216683387756 +Loss at step 500: 0.022720973938703537 +Loss at step 550: 0.03392207995057106 +Loss at step 600: 0.03281420096755028 +Loss at step 650: 0.06516284495592117 +Loss at step 700: 0.038289912045001984 +Loss at step 750: 0.03463584557175636 +Loss at step 800: 0.03736652433872223 +Loss at step 850: 0.049522798508405685 +Loss at step 900: 0.03491751104593277 +Mean training loss after epoch 214: 0.0391494517685222 + +EPOCH: 215 +Loss at step 0: 0.028759721666574478 +Loss at step 50: 0.0389941930770874 +Loss at step 100: 0.030850136652588844 +Loss at step 150: 0.0318099707365036 +Loss at step 200: 0.034005057066679 +Loss at step 250: 0.052801866084337234 +Loss at step 300: 0.035044100135564804 +Loss at step 350: 0.052397601306438446 +Loss at step 400: 0.04918781667947769 +Loss at step 450: 0.0334693007171154 +Loss at step 500: 0.0463688038289547 +Loss at step 550: 0.033775392919778824 +Loss at step 600: 0.030683718621730804 +Loss at step 650: 0.03664135932922363 +Loss at step 700: 0.03418098762631416 +Loss at step 750: 0.03744911029934883 +Loss at step 800: 0.029192639514803886 +Loss at step 850: 0.05244458466768265 +Loss at step 900: 0.03115343302488327 +Mean training loss after epoch 215: 0.039243683344059026 + +EPOCH: 216 +Loss at step 0: 0.0490545816719532 +Loss at step 50: 0.04258950799703598 +Loss at step 100: 0.03089594468474388 +Loss at step 150: 0.051975492388010025 +Loss at step 200: 0.051118168979883194 +Loss at step 250: 0.03654303401708603 +Loss at step 300: 0.05530314892530441 +Loss at step 350: 0.048319995403289795 +Loss at step 400: 0.03275396302342415 +Loss at step 450: 0.03487536683678627 +Loss at step 500: 0.03248892351984978 +Loss at step 550: 0.03192398324608803 +Loss at step 600: 0.0315568670630455 +Loss at step 650: 0.033777620643377304 +Loss at step 700: 0.04205819591879845 +Loss at step 750: 0.05035315454006195 +Loss at step 800: 0.032998502254486084 +Loss at step 850: 0.03232625871896744 +Loss at step 900: 0.04717819020152092 +Mean training loss after epoch 216: 0.039436278183624815 + +EPOCH: 217 +Loss at step 0: 0.03221813589334488 +Loss at step 50: 0.03685177117586136 +Loss at step 100: 0.03466470539569855 +Loss at step 150: 0.031917549669742584 +Loss at step 200: 0.03797820210456848 +Loss at step 250: 0.04220956936478615 +Loss at step 300: 0.02914607711136341 +Loss at step 350: 0.05011358857154846 +Loss at step 400: 0.03618037700653076 +Loss at step 450: 0.03652055934071541 +Loss at step 500: 0.029459647834300995 +Loss at step 550: 0.031061850488185883 +Loss at step 600: 0.03643409535288811 +Loss at step 650: 0.04160930588841438 +Loss at step 700: 0.03050972893834114 +Loss at step 750: 0.03366422280669212 +Loss at step 800: 0.04819341376423836 +Loss at step 850: 0.04103286191821098 +Loss at step 900: 0.03597785905003548 +Mean training loss after epoch 217: 0.03884100024379901 + +EPOCH: 218 +Loss at step 0: 0.034691356122493744 +Loss at step 50: 0.0512816347181797 +Loss at step 100: 0.0498422309756279 +Loss at step 150: 0.034464772790670395 +Loss at step 200: 0.03690893575549126 +Loss at step 250: 0.027062783017754555 +Loss at step 300: 0.037605658173561096 +Loss at step 350: 0.034629516303539276 +Loss at step 400: 0.033738873898983 +Loss at step 450: 0.05193618685007095 +Loss at step 500: 0.0349096916615963 +Loss at step 550: 0.03417463228106499 +Loss at step 600: 0.03664156794548035 +Loss at step 650: 0.050338178873062134 +Loss at step 700: 0.04119217023253441 +Loss at step 750: 0.04932699352502823 +Loss at step 800: 0.032882045954465866 +Loss at step 850: 0.05195748060941696 +Loss at step 900: 0.056095194071531296 +Mean training loss after epoch 218: 0.039159961821221466 + +EPOCH: 219 +Loss at step 0: 0.03401932865381241 +Loss at step 50: 0.033970147371292114 +Loss at step 100: 0.02930084988474846 +Loss at step 150: 0.038355208933353424 +Loss at step 200: 0.02973788045346737 +Loss at step 250: 0.031476765871047974 +Loss at step 300: 0.03217240050435066 +Loss at step 350: 0.03983880579471588 +Loss at step 400: 0.035470906645059586 +Loss at step 450: 0.06286390125751495 +Loss at step 500: 0.03506041690707207 +Loss at step 550: 0.033520910888910294 +Loss at step 600: 0.03043210878968239 +Loss at step 650: 0.029626408591866493 +Loss at step 700: 0.03785299137234688 +Loss at step 750: 0.029762817546725273 +Loss at step 800: 0.03253250569105148 +Loss at step 850: 0.03220551460981369 +Loss at step 900: 0.03191516175866127 +Mean training loss after epoch 219: 0.03961604257533227 + +EPOCH: 220 +Loss at step 0: 0.035007499158382416 +Loss at step 50: 0.03676850348711014 +Loss at step 100: 0.04776357114315033 +Loss at step 150: 0.05322731286287308 +Loss at step 200: 0.0326763354241848 +Loss at step 250: 0.02881242334842682 +Loss at step 300: 0.06452496349811554 +Loss at step 350: 0.04654304310679436 +Loss at step 400: 0.033597029745578766 +Loss at step 450: 0.06089720129966736 +Loss at step 500: 0.05325775966048241 +Loss at step 550: 0.034849777817726135 +Loss at step 600: 0.03910364955663681 +Loss at step 650: 0.04495980218052864 +Loss at step 700: 0.03320116922259331 +Loss at step 750: 0.031099630519747734 +Loss at step 800: 0.02967253513634205 +Loss at step 850: 0.05618506297469139 +Loss at step 900: 0.023359069600701332 +Mean training loss after epoch 220: 0.03917579456711057 + +EPOCH: 221 +Loss at step 0: 0.033655982464551926 +Loss at step 50: 0.03283160179853439 +Loss at step 100: 0.06974532455205917 +Loss at step 150: 0.03525441884994507 +Loss at step 200: 0.033504560589790344 +Loss at step 250: 0.05242004245519638 +Loss at step 300: 0.049112677574157715 +Loss at step 350: 0.03913629800081253 +Loss at step 400: 0.030687332153320312 +Loss at step 450: 0.038405779749155045 +Loss at step 500: 0.055703528225421906 +Loss at step 550: 0.030176103115081787 +Loss at step 600: 0.0338451974093914 +Loss at step 650: 0.03483753651380539 +Loss at step 700: 0.046717628836631775 +Loss at step 750: 0.030152037739753723 +Loss at step 800: 0.051735568791627884 +Loss at step 850: 0.037322036921978 +Loss at step 900: 0.03671986982226372 +Mean training loss after epoch 221: 0.039529672089114246 + +EPOCH: 222 +Loss at step 0: 0.040746379643678665 +Loss at step 50: 0.03421521559357643 +Loss at step 100: 0.03173847496509552 +Loss at step 150: 0.027398396283388138 +Loss at step 200: 0.06909073889255524 +Loss at step 250: 0.02943158522248268 +Loss at step 300: 0.04270821437239647 +Loss at step 350: 0.036493346095085144 +Loss at step 400: 0.03201095387339592 +Loss at step 450: 0.03346439078450203 +Loss at step 500: 0.03585893660783768 +Loss at step 550: 0.03641887009143829 +Loss at step 600: 0.029378972947597504 +Loss at step 650: 0.03823823109269142 +Loss at step 700: 0.03458854556083679 +Loss at step 750: 0.03295740485191345 +Loss at step 800: 0.03287142887711525 +Loss at step 850: 0.027882471680641174 +Loss at step 900: 0.037205036729574203 +Mean training loss after epoch 222: 0.038900254218817265 + +EPOCH: 223 +Loss at step 0: 0.0465879812836647 +Loss at step 50: 0.04563435539603233 +Loss at step 100: 0.04606280103325844 +Loss at step 150: 0.03840605169534683 +Loss at step 200: 0.05436091497540474 +Loss at step 250: 0.03147437050938606 +Loss at step 300: 0.053010765463113785 +Loss at step 350: 0.048824213445186615 +Loss at step 400: 0.03178839758038521 +Loss at step 450: 0.05775563046336174 +Loss at step 500: 0.0351368747651577 +Loss at step 550: 0.03580415993928909 +Loss at step 600: 0.03660053759813309 +Loss at step 650: 0.04339035227894783 +Loss at step 700: 0.029888346791267395 +Loss at step 750: 0.06932703405618668 +Loss at step 800: 0.06762956827878952 +Loss at step 850: 0.047050874680280685 +Loss at step 900: 0.0409468412399292 +Mean training loss after epoch 223: 0.03930669576962238 + +EPOCH: 224 +Loss at step 0: 0.03919639065861702 +Loss at step 50: 0.03524477407336235 +Loss at step 100: 0.0306867565959692 +Loss at step 150: 0.036834586411714554 +Loss at step 200: 0.039752665907144547 +Loss at step 250: 0.03726755827665329 +Loss at step 300: 0.035303812474012375 +Loss at step 350: 0.052437279373407364 +Loss at step 400: 0.04689083993434906 +Loss at step 450: 0.05424738675355911 +Loss at step 500: 0.03356577455997467 +Loss at step 550: 0.033016446977853775 +Loss at step 600: 0.04336623102426529 +Loss at step 650: 0.03267418593168259 +Loss at step 700: 0.0531155988574028 +Loss at step 750: 0.03437129035592079 +Loss at step 800: 0.05175966024398804 +Loss at step 850: 0.028837081044912338 +Loss at step 900: 0.05922437086701393 +Mean training loss after epoch 224: 0.0386647883195009 + +EPOCH: 225 +Loss at step 0: 0.0487726666033268 +Loss at step 50: 0.03738081455230713 +Loss at step 100: 0.03074018657207489 +Loss at step 150: 0.03496900945901871 +Loss at step 200: 0.04817022383213043 +Loss at step 250: 0.04730185121297836 +Loss at step 300: 0.034258127212524414 +Loss at step 350: 0.034974534064531326 +Loss at step 400: 0.04086778685450554 +Loss at step 450: 0.03467993810772896 +Loss at step 500: 0.033085793256759644 +Loss at step 550: 0.09303003549575806 +Loss at step 600: 0.035393036901950836 +Loss at step 650: 0.035260267555713654 +Loss at step 700: 0.02982688695192337 +Loss at step 750: 0.032229792326688766 +Loss at step 800: 0.0330289825797081 +Loss at step 850: 0.03003479540348053 +Loss at step 900: 0.029361525550484657 +Mean training loss after epoch 225: 0.03890162737551592 + +EPOCH: 226 +Loss at step 0: 0.032596834003925323 +Loss at step 50: 0.03327403590083122 +Loss at step 100: 0.05183485522866249 +Loss at step 150: 0.035328250378370285 +Loss at step 200: 0.03449290245771408 +Loss at step 250: 0.03304436802864075 +Loss at step 300: 0.0349038764834404 +Loss at step 350: 0.034197621047496796 +Loss at step 400: 0.03215165063738823 +Loss at step 450: 0.05494612455368042 +Loss at step 500: 0.03127453476190567 +Loss at step 550: 0.039228979498147964 +Loss at step 600: 0.03493810445070267 +Loss at step 650: 0.031990375369787216 +Loss at step 700: 0.05123075097799301 +Loss at step 750: 0.02953650988638401 +Loss at step 800: 0.06226984038949013 +Loss at step 850: 0.0387241505086422 +Loss at step 900: 0.04094972461462021 +Mean training loss after epoch 226: 0.03866499135576522 + +EPOCH: 227 +Loss at step 0: 0.036986760795116425 +Loss at step 50: 0.03572157025337219 +Loss at step 100: 0.03125032037496567 +Loss at step 150: 0.03796594962477684 +Loss at step 200: 0.03207015246152878 +Loss at step 250: 0.04670996591448784 +Loss at step 300: 0.04626971855759621 +Loss at step 350: 0.032793719321489334 +Loss at step 400: 0.05023602768778801 +Loss at step 450: 0.04747440665960312 +Loss at step 500: 0.029485540464520454 +Loss at step 550: 0.032925404608249664 +Loss at step 600: 0.04897836595773697 +Loss at step 650: 0.03635069355368614 +Loss at step 700: 0.031118150800466537 +Loss at step 750: 0.04991304501891136 +Loss at step 800: 0.03546535223722458 +Loss at step 850: 0.04668305441737175 +Loss at step 900: 0.03488590568304062 +Mean training loss after epoch 227: 0.03901965910398058 + +EPOCH: 228 +Loss at step 0: 0.04974370822310448 +Loss at step 50: 0.03373951464891434 +Loss at step 100: 0.033895570784807205 +Loss at step 150: 0.03443300724029541 +Loss at step 200: 0.03293636441230774 +Loss at step 250: 0.04655670002102852 +Loss at step 300: 0.02601798251271248 +Loss at step 350: 0.03469035029411316 +Loss at step 400: 0.02628219872713089 +Loss at step 450: 0.034008219838142395 +Loss at step 500: 0.037605393677949905 +Loss at step 550: 0.04931284487247467 +Loss at step 600: 0.047764576971530914 +Loss at step 650: 0.03390823304653168 +Loss at step 700: 0.036051489412784576 +Loss at step 750: 0.03190223500132561 +Loss at step 800: 0.027517585083842278 +Loss at step 850: 0.026676075533032417 +Loss at step 900: 0.035612836480140686 +Mean training loss after epoch 228: 0.039303831541652616 + +EPOCH: 229 +Loss at step 0: 0.0288917887955904 +Loss at step 50: 0.06252633035182953 +Loss at step 100: 0.03233402222394943 +Loss at step 150: 0.03316564857959747 +Loss at step 200: 0.034948062151670456 +Loss at step 250: 0.02844369038939476 +Loss at step 300: 0.031051771715283394 +Loss at step 350: 0.04054409638047218 +Loss at step 400: 0.03256020322442055 +Loss at step 450: 0.03426603227853775 +Loss at step 500: 0.033789653331041336 +Loss at step 550: 0.027340136468410492 +Loss at step 600: 0.03498152643442154 +Loss at step 650: 0.033199094235897064 +Loss at step 700: 0.04883769899606705 +Loss at step 750: 0.041650619357824326 +Loss at step 800: 0.04393523558974266 +Loss at step 850: 0.03471892699599266 +Loss at step 900: 0.04146659001708031 +Mean training loss after epoch 229: 0.03924599493831905 + +EPOCH: 230 +Loss at step 0: 0.03501119837164879 +Loss at step 50: 0.031957730650901794 +Loss at step 100: 0.06566370278596878 +Loss at step 150: 0.03400876373052597 +Loss at step 200: 0.03895947337150574 +Loss at step 250: 0.03220095485448837 +Loss at step 300: 0.03845254331827164 +Loss at step 350: 0.03425909951329231 +Loss at step 400: 0.03328506276011467 +Loss at step 450: 0.03391508758068085 +Loss at step 500: 0.0358009859919548 +Loss at step 550: 0.03223380073904991 +Loss at step 600: 0.028488216921687126 +Loss at step 650: 0.03499921038746834 +Loss at step 700: 0.028844734653830528 +Loss at step 750: 0.04680425301194191 +Loss at step 800: 0.029129402711987495 +Loss at step 850: 0.05072740092873573 +Loss at step 900: 0.03400781750679016 +Mean training loss after epoch 230: 0.03910631895152681 + +EPOCH: 231 +Loss at step 0: 0.029594002291560173 +Loss at step 50: 0.033388398587703705 +Loss at step 100: 0.030222948640584946 +Loss at step 150: 0.03679189831018448 +Loss at step 200: 0.034237805753946304 +Loss at step 250: 0.03863455355167389 +Loss at step 300: 0.03193581849336624 +Loss at step 350: 0.04040062427520752 +Loss at step 400: 0.042988717555999756 +Loss at step 450: 0.048941951245069504 +Loss at step 500: 0.036740753799676895 +Loss at step 550: 0.04671098291873932 +Loss at step 600: 0.02967640571296215 +Loss at step 650: 0.03679099678993225 +Loss at step 700: 0.03040301240980625 +Loss at step 750: 0.048260949552059174 +Loss at step 800: 0.04979100823402405 +Loss at step 850: 0.03750582039356232 +Loss at step 900: 0.0348072350025177 +Mean training loss after epoch 231: 0.03864938818784093 + +EPOCH: 232 +Loss at step 0: 0.04080262780189514 +Loss at step 50: 0.0347711406648159 +Loss at step 100: 0.04852057993412018 +Loss at step 150: 0.035218965262174606 +Loss at step 200: 0.03393496945500374 +Loss at step 250: 0.047152820974588394 +Loss at step 300: 0.03276395425200462 +Loss at step 350: 0.03969300538301468 +Loss at step 400: 0.031070873141288757 +Loss at step 450: 0.0518072172999382 +Loss at step 500: 0.03238266333937645 +Loss at step 550: 0.03421289101243019 +Loss at step 600: 0.04663264751434326 +Loss at step 650: 0.033952854573726654 +Loss at step 700: 0.050659772008657455 +Loss at step 750: 0.02905084565281868 +Loss at step 800: 0.0381062813103199 +Loss at step 850: 0.03878077492117882 +Loss at step 900: 0.033104512840509415 +Mean training loss after epoch 232: 0.03919413653033565 + +EPOCH: 233 +Loss at step 0: 0.030844537541270256 +Loss at step 50: 0.05130849406123161 +Loss at step 100: 0.06363049894571304 +Loss at step 150: 0.032386112958192825 +Loss at step 200: 0.03350917622447014 +Loss at step 250: 0.04133734479546547 +Loss at step 300: 0.03743404150009155 +Loss at step 350: 0.030576646327972412 +Loss at step 400: 0.03490522503852844 +Loss at step 450: 0.046362556517124176 +Loss at step 500: 0.03323175013065338 +Loss at step 550: 0.042132873088121414 +Loss at step 600: 0.05004531517624855 +Loss at step 650: 0.036296043545007706 +Loss at step 700: 0.03690037876367569 +Loss at step 750: 0.041600458323955536 +Loss at step 800: 0.047059088945388794 +Loss at step 850: 0.03368044272065163 +Loss at step 900: 0.04677114263176918 +Mean training loss after epoch 233: 0.039063130267091524 + +EPOCH: 234 +Loss at step 0: 0.03360550105571747 +Loss at step 50: 0.03290083259344101 +Loss at step 100: 0.034932930022478104 +Loss at step 150: 0.03593425080180168 +Loss at step 200: 0.0440426729619503 +Loss at step 250: 0.05007448047399521 +Loss at step 300: 0.036359675228595734 +Loss at step 350: 0.031192978844046593 +Loss at step 400: 0.02669164352118969 +Loss at step 450: 0.0325494147837162 +Loss at step 500: 0.02928798645734787 +Loss at step 550: 0.037536706775426865 +Loss at step 600: 0.04039769247174263 +Loss at step 650: 0.03147507831454277 +Loss at step 700: 0.05367570370435715 +Loss at step 750: 0.044787436723709106 +Loss at step 800: 0.028223801404237747 +Loss at step 850: 0.05118735879659653 +Loss at step 900: 0.03116217814385891 +Mean training loss after epoch 234: 0.03894189336120701 + +EPOCH: 235 +Loss at step 0: 0.03430939093232155 +Loss at step 50: 0.03122139908373356 +Loss at step 100: 0.03365548327565193 +Loss at step 150: 0.029249092563986778 +Loss at step 200: 0.055865611881017685 +Loss at step 250: 0.04746852442622185 +Loss at step 300: 0.0332975871860981 +Loss at step 350: 0.04182635620236397 +Loss at step 400: 0.0324370302259922 +Loss at step 450: 0.03309211879968643 +Loss at step 500: 0.03604499250650406 +Loss at step 550: 0.02579680271446705 +Loss at step 600: 0.0383726991713047 +Loss at step 650: 0.03601831570267677 +Loss at step 700: 0.03624669089913368 +Loss at step 750: 0.03240237012505531 +Loss at step 800: 0.037177249789237976 +Loss at step 850: 0.033456508070230484 +Loss at step 900: 0.031031902879476547 +Mean training loss after epoch 235: 0.03901019287921155 + +EPOCH: 236 +Loss at step 0: 0.04972589761018753 +Loss at step 50: 0.044508837163448334 +Loss at step 100: 0.035206809639930725 +Loss at step 150: 0.03639085963368416 +Loss at step 200: 0.04725930094718933 +Loss at step 250: 0.05124089494347572 +Loss at step 300: 0.05921793729066849 +Loss at step 350: 0.0671318843960762 +Loss at step 400: 0.03563682362437248 +Loss at step 450: 0.03774426132440567 +Loss at step 500: 0.05267181992530823 +Loss at step 550: 0.03393133357167244 +Loss at step 600: 0.03175343573093414 +Loss at step 650: 0.05360681936144829 +Loss at step 700: 0.05347166955471039 +Loss at step 750: 0.035956475883722305 +Loss at step 800: 0.03781764209270477 +Loss at step 850: 0.031263165175914764 +Loss at step 900: 0.03862721472978592 +Mean training loss after epoch 236: 0.03922938913512014 + +EPOCH: 237 +Loss at step 0: 0.06566672772169113 +Loss at step 50: 0.031792644411325455 +Loss at step 100: 0.03735481947660446 +Loss at step 150: 0.034779392182826996 +Loss at step 200: 0.06874541938304901 +Loss at step 250: 0.05580836907029152 +Loss at step 300: 0.04530530422925949 +Loss at step 350: 0.03510119020938873 +Loss at step 400: 0.03770944103598595 +Loss at step 450: 0.06360543519258499 +Loss at step 500: 0.05799640715122223 +Loss at step 550: 0.03166048228740692 +Loss at step 600: 0.04853644594550133 +Loss at step 650: 0.03736702352762222 +Loss at step 700: 0.03405768796801567 +Loss at step 750: 0.03659890219569206 +Loss at step 800: 0.025363899767398834 +Loss at step 850: 0.03285384550690651 +Loss at step 900: 0.027758479118347168 +Mean training loss after epoch 237: 0.03887855290953539 + +EPOCH: 238 +Loss at step 0: 0.03370613232254982 +Loss at step 50: 0.04008691385388374 +Loss at step 100: 0.03578205406665802 +Loss at step 150: 0.05115322396159172 +Loss at step 200: 0.05261386185884476 +Loss at step 250: 0.03729558736085892 +Loss at step 300: 0.03199843317270279 +Loss at step 350: 0.05246369168162346 +Loss at step 400: 0.0468270368874073 +Loss at step 450: 0.03319402039051056 +Loss at step 500: 0.03305567055940628 +Loss at step 550: 0.05719682201743126 +Loss at step 600: 0.051412034779787064 +Loss at step 650: 0.03428887948393822 +Loss at step 700: 0.039233673363924026 +Loss at step 750: 0.028749994933605194 +Loss at step 800: 0.035130735486745834 +Loss at step 850: 0.03354177996516228 +Loss at step 900: 0.035492267459630966 +Mean training loss after epoch 238: 0.03845155325287314 + +EPOCH: 239 +Loss at step 0: 0.03153827786445618 +Loss at step 50: 0.028729312121868134 +Loss at step 100: 0.06446194648742676 +Loss at step 150: 0.05377689749002457 +Loss at step 200: 0.08363740891218185 +Loss at step 250: 0.028110457584261894 +Loss at step 300: 0.03177076205611229 +Loss at step 350: 0.02922712452709675 +Loss at step 400: 0.03213167190551758 +Loss at step 450: 0.03212027996778488 +Loss at step 500: 0.03912307322025299 +Loss at step 550: 0.032081134617328644 +Loss at step 600: 0.04772243648767471 +Loss at step 650: 0.029768118634819984 +Loss at step 700: 0.05186186730861664 +Loss at step 750: 0.0529065765440464 +Loss at step 800: 0.0341126024723053 +Loss at step 850: 0.051606934517621994 +Loss at step 900: 0.06367186456918716 +Mean training loss after epoch 239: 0.03928863341961779 + +EPOCH: 240 +Loss at step 0: 0.03887162730097771 +Loss at step 50: 0.050880786031484604 +Loss at step 100: 0.039547499269247055 +Loss at step 150: 0.028909575194120407 +Loss at step 200: 0.03536584973335266 +Loss at step 250: 0.03659897297620773 +Loss at step 300: 0.033108536154031754 +Loss at step 350: 0.02675941213965416 +Loss at step 400: 0.03612198680639267 +Loss at step 450: 0.05426546931266785 +Loss at step 500: 0.0330321379005909 +Loss at step 550: 0.05149666219949722 +Loss at step 600: 0.03463984280824661 +Loss at step 650: 0.028366481885313988 +Loss at step 700: 0.044816501438617706 +Loss at step 750: 0.04147651046514511 +Loss at step 800: 0.033853594213724136 +Loss at step 850: 0.03742264583706856 +Loss at step 900: 0.03427162766456604 +Mean training loss after epoch 240: 0.03903400417028079 + +EPOCH: 241 +Loss at step 0: 0.03618745505809784 +Loss at step 50: 0.03541552275419235 +Loss at step 100: 0.03832630813121796 +Loss at step 150: 0.031097810715436935 +Loss at step 200: 0.05100856348872185 +Loss at step 250: 0.05207272991538048 +Loss at step 300: 0.03732670843601227 +Loss at step 350: 0.03722361475229263 +Loss at step 400: 0.035946499556303024 +Loss at step 450: 0.04949504882097244 +Loss at step 500: 0.04036898910999298 +Loss at step 550: 0.03357747942209244 +Loss at step 600: 0.03259134665131569 +Loss at step 650: 0.037669405341148376 +Loss at step 700: 0.03358788415789604 +Loss at step 750: 0.05227615311741829 +Loss at step 800: 0.03881647065281868 +Loss at step 850: 0.033830676227808 +Loss at step 900: 0.04052143916487694 +Mean training loss after epoch 241: 0.03923677153257864 + +EPOCH: 242 +Loss at step 0: 0.031095046550035477 +Loss at step 50: 0.036116208881139755 +Loss at step 100: 0.029616231098771095 +Loss at step 150: 0.04071303829550743 +Loss at step 200: 0.05126992613077164 +Loss at step 250: 0.02957015484571457 +Loss at step 300: 0.04773343354463577 +Loss at step 350: 0.045865584164857864 +Loss at step 400: 0.03263732045888901 +Loss at step 450: 0.032689858227968216 +Loss at step 500: 0.04566469416022301 +Loss at step 550: 0.0382232740521431 +Loss at step 600: 0.0350029319524765 +Loss at step 650: 0.032325662672519684 +Loss at step 700: 0.029781801626086235 +Loss at step 750: 0.049002304673194885 +Loss at step 800: 0.05032792314887047 +Loss at step 850: 0.04957038536667824 +Loss at step 900: 0.03759152814745903 +Mean training loss after epoch 242: 0.03931932861029085 + +EPOCH: 243 +Loss at step 0: 0.038250263780355453 +Loss at step 50: 0.03833189606666565 +Loss at step 100: 0.03076758421957493 +Loss at step 150: 0.037498589605093 +Loss at step 200: 0.03554268926382065 +Loss at step 250: 0.029882796108722687 +Loss at step 300: 0.035731032490730286 +Loss at step 350: 0.030183272436261177 +Loss at step 400: 0.049779996275901794 +Loss at step 450: 0.040445808321237564 +Loss at step 500: 0.047706544399261475 +Loss at step 550: 0.047556981444358826 +Loss at step 600: 0.03681233897805214 +Loss at step 650: 0.02908683754503727 +Loss at step 700: 0.02920670434832573 +Loss at step 750: 0.030341751873493195 +Loss at step 800: 0.030939605087041855 +Loss at step 850: 0.03304276987910271 +Loss at step 900: 0.03191913291811943 +Mean training loss after epoch 243: 0.03905273881206698 + +EPOCH: 244 +Loss at step 0: 0.06598794460296631 +Loss at step 50: 0.02893797680735588 +Loss at step 100: 0.03549230843782425 +Loss at step 150: 0.03681396320462227 +Loss at step 200: 0.03673600032925606 +Loss at step 250: 0.04525090008974075 +Loss at step 300: 0.03606670722365379 +Loss at step 350: 0.024963827803730965 +Loss at step 400: 0.05136997625231743 +Loss at step 450: 0.06354724615812302 +Loss at step 500: 0.03802937641739845 +Loss at step 550: 0.04526302590966225 +Loss at step 600: 0.03441905602812767 +Loss at step 650: 0.04524055868387222 +Loss at step 700: 0.02656574547290802 +Loss at step 750: 0.07117734104394913 +Loss at step 800: 0.036995913833379745 +Loss at step 850: 0.03178925812244415 +Loss at step 900: 0.037087518721818924 +Mean training loss after epoch 244: 0.039370466573739736 + +EPOCH: 245 +Loss at step 0: 0.048373542726039886 +Loss at step 50: 0.033451057970523834 +Loss at step 100: 0.028194300830364227 +Loss at step 150: 0.047540027648210526 +Loss at step 200: 0.035591401159763336 +Loss at step 250: 0.047648560255765915 +Loss at step 300: 0.02961515262722969 +Loss at step 350: 0.036826133728027344 +Loss at step 400: 0.036649350076913834 +Loss at step 450: 0.046033795922994614 +Loss at step 500: 0.038278304040431976 +Loss at step 550: 0.05737805739045143 +Loss at step 600: 0.034337978810071945 +Loss at step 650: 0.047907229512929916 +Loss at step 700: 0.029077300801873207 +Loss at step 750: 0.026141535490751266 +Loss at step 800: 0.040920063853263855 +Loss at step 850: 0.03886491060256958 +Loss at step 900: 0.03947722166776657 +Mean training loss after epoch 245: 0.0392171511811806 + +EPOCH: 246 +Loss at step 0: 0.03227032348513603 +Loss at step 50: 0.03560794144868851 +Loss at step 100: 0.03743302822113037 +Loss at step 150: 0.030393380671739578 +Loss at step 200: 0.05443951487541199 +Loss at step 250: 0.050018344074487686 +Loss at step 300: 0.0319996252655983 +Loss at step 350: 0.03510141000151634 +Loss at step 400: 0.05677584186196327 +Loss at step 450: 0.05292211100459099 +Loss at step 500: 0.04954027011990547 +Loss at step 550: 0.036719441413879395 +Loss at step 600: 0.037654343992471695 +Loss at step 650: 0.045069120824337006 +Loss at step 700: 0.04070558398962021 +Loss at step 750: 0.03152972832322121 +Loss at step 800: 0.032653797417879105 +Loss at step 850: 0.032190416008234024 +Loss at step 900: 0.03875408694148064 +Mean training loss after epoch 246: 0.03970623201827632 + +EPOCH: 247 +Loss at step 0: 0.03510020300745964 +Loss at step 50: 0.028460653498768806 +Loss at step 100: 0.027597110718488693 +Loss at step 150: 0.0368226058781147 +Loss at step 200: 0.03382598236203194 +Loss at step 250: 0.0317443311214447 +Loss at step 300: 0.04087996855378151 +Loss at step 350: 0.029916564002633095 +Loss at step 400: 0.03555725887417793 +Loss at step 450: 0.047546543180942535 +Loss at step 500: 0.03285599499940872 +Loss at step 550: 0.03534551337361336 +Loss at step 600: 0.06219349056482315 +Loss at step 650: 0.0399542972445488 +Loss at step 700: 0.028029421344399452 +Loss at step 750: 0.0445200577378273 +Loss at step 800: 0.03424978256225586 +Loss at step 850: 0.025731278583407402 +Loss at step 900: 0.038073569536209106 +Mean training loss after epoch 247: 0.039303670599580065 + +EPOCH: 248 +Loss at step 0: 0.030523089691996574 +Loss at step 50: 0.03858974575996399 +Loss at step 100: 0.03324702009558678 +Loss at step 150: 0.033646613359451294 +Loss at step 200: 0.028181303292512894 +Loss at step 250: 0.03686067834496498 +Loss at step 300: 0.02987145632505417 +Loss at step 350: 0.02894950844347477 +Loss at step 400: 0.06689367443323135 +Loss at step 450: 0.029212500900030136 +Loss at step 500: 0.03293319419026375 +Loss at step 550: 0.032734330743551254 +Loss at step 600: 0.028700964525341988 +Loss at step 650: 0.0392322838306427 +Loss at step 700: 0.048850029706954956 +Loss at step 750: 0.05006673187017441 +Loss at step 800: 0.03270864486694336 +Loss at step 850: 0.03546704351902008 +Loss at step 900: 0.03319983929395676 +Mean training loss after epoch 248: 0.03888359318163667 + +EPOCH: 249 +Loss at step 0: 0.036848295480012894 +Loss at step 50: 0.03344952315092087 +Loss at step 100: 0.030375657603144646 +Loss at step 150: 0.04395264759659767 +Loss at step 200: 0.032356906682252884 +Loss at step 250: 0.03715647757053375 +Loss at step 300: 0.047168467193841934 +Loss at step 350: 0.028079552575945854 +Loss at step 400: 0.06304143369197845 +Loss at step 450: 0.03134738653898239 +Loss at step 500: 0.03943207114934921 +Loss at step 550: 0.02837996557354927 +Loss at step 600: 0.043990012258291245 +Loss at step 650: 0.05276433750987053 +Loss at step 700: 0.03396638482809067 +Loss at step 750: 0.02971942536532879 +Loss at step 800: 0.034827522933483124 +Loss at step 850: 0.05165525898337364 +Loss at step 900: 0.04828884080052376 +Mean training loss after epoch 249: 0.03892020832127663 + +EPOCH: 250 +Loss at step 0: 0.03388892486691475 +Loss at step 50: 0.03131557255983353 +Loss at step 100: 0.05106263980269432 +Loss at step 150: 0.034251436591148376 +Loss at step 200: 0.05417611449956894 +Loss at step 250: 0.035362519323825836 +Loss at step 300: 0.03308453783392906 +Loss at step 350: 0.03788948431611061 +Loss at step 400: 0.03544377163052559 +Loss at step 450: 0.036667026579380035 +Loss at step 500: 0.05221394822001457 +Loss at step 550: 0.04554745554924011 +Loss at step 600: 0.030167769640684128 +Loss at step 650: 0.0340573750436306 +Loss at step 700: 0.04917588457465172 +Loss at step 750: 0.03632596880197525 +Loss at step 800: 0.03073965013027191 +Loss at step 850: 0.027795109897851944 +Loss at step 900: 0.03306938707828522 +Mean training loss after epoch 250: 0.03891344393081248 + +EPOCH: 251 +Loss at step 0: 0.039425771683454514 +Loss at step 50: 0.030994482338428497 +Loss at step 100: 0.05115703493356705 +Loss at step 150: 0.03247106820344925 +Loss at step 200: 0.036715537309646606 +Loss at step 250: 0.04827232286334038 +Loss at step 300: 0.04005703702569008 +Loss at step 350: 0.03334115818142891 +Loss at step 400: 0.03590434789657593 +Loss at step 450: 0.035763200372457504 +Loss at step 500: 0.04415174201130867 +Loss at step 550: 0.040398284792900085 +Loss at step 600: 0.0336516872048378 +Loss at step 650: 0.06026959791779518 +Loss at step 700: 0.05111640691757202 +Loss at step 750: 0.03100166656076908 +Loss at step 800: 0.030193939805030823 +Loss at step 850: 0.03528062254190445 +Loss at step 900: 0.0353427454829216 +Mean training loss after epoch 251: 0.03915926600014096 + +EPOCH: 252 +Loss at step 0: 0.047453004866838455 +Loss at step 50: 0.03374917432665825 +Loss at step 100: 0.028373297303915024 +Loss at step 150: 0.06039762496948242 +Loss at step 200: 0.030490513890981674 +Loss at step 250: 0.026922443881630898 +Loss at step 300: 0.023987047374248505 +Loss at step 350: 0.03156132623553276 +Loss at step 400: 0.0331733338534832 +Loss at step 450: 0.028657851740717888 +Loss at step 500: 0.03813005983829498 +Loss at step 550: 0.028067875653505325 +Loss at step 600: 0.038628771901130676 +Loss at step 650: 0.033749524503946304 +Loss at step 700: 0.035677965730428696 +Loss at step 750: 0.028826430439949036 +Loss at step 800: 0.035348471254110336 +Loss at step 850: 0.03522353619337082 +Loss at step 900: 0.03503499925136566 +Mean training loss after epoch 252: 0.03881135373028802 + +EPOCH: 253 +Loss at step 0: 0.05614131689071655 +Loss at step 50: 0.02495838701725006 +Loss at step 100: 0.047003425657749176 +Loss at step 150: 0.029494477435946465 +Loss at step 200: 0.030162688344717026 +Loss at step 250: 0.06391389667987823 +Loss at step 300: 0.02871713973581791 +Loss at step 350: 0.03513010963797569 +Loss at step 400: 0.06640415638685226 +Loss at step 450: 0.03930817171931267 +Loss at step 500: 0.029120804741978645 +Loss at step 550: 0.02939414419233799 +Loss at step 600: 0.0732555091381073 +Loss at step 650: 0.030200641602277756 +Loss at step 700: 0.03482096642255783 +Loss at step 750: 0.030384305864572525 +Loss at step 800: 0.034591879695653915 +Loss at step 850: 0.03804127871990204 +Loss at step 900: 0.04978170618414879 +Mean training loss after epoch 253: 0.038376694193669854 + +EPOCH: 254 +Loss at step 0: 0.03105386532843113 +Loss at step 50: 0.04386408254504204 +Loss at step 100: 0.0490780733525753 +Loss at step 150: 0.035760875791311264 +Loss at step 200: 0.03169526532292366 +Loss at step 250: 0.033175766468048096 +Loss at step 300: 0.04693169519305229 +Loss at step 350: 0.033079493790864944 +Loss at step 400: 0.0317985899746418 +Loss at step 450: 0.03733246773481369 +Loss at step 500: 0.07064293324947357 +Loss at step 550: 0.03856551647186279 +Loss at step 600: 0.04498601704835892 +Loss at step 650: 0.050545915961265564 +Loss at step 700: 0.032745424658060074 +Loss at step 750: 0.03153373301029205 +Loss at step 800: 0.031938981264829636 +Loss at step 850: 0.06265679746866226 +Loss at step 900: 0.06484922766685486 +Mean training loss after epoch 254: 0.038683842222240056 + +EPOCH: 255 +Loss at step 0: 0.06329744309186935 +Loss at step 50: 0.04864448308944702 +Loss at step 100: 0.04800715669989586 +Loss at step 150: 0.05042517930269241 +Loss at step 200: 0.03493422269821167 +Loss at step 250: 0.033742208033800125 +Loss at step 300: 0.04552479088306427 +Loss at step 350: 0.03966813534498215 +Loss at step 400: 0.035213813185691833 +Loss at step 450: 0.037367723882198334 +Loss at step 500: 0.03437703102827072 +Loss at step 550: 0.036713361740112305 +Loss at step 600: 0.04268074035644531 +Loss at step 650: 0.03121114708483219 +Loss at step 700: 0.04252978786826134 +Loss at step 750: 0.028949955478310585 +Loss at step 800: 0.03465810418128967 +Loss at step 850: 0.030163034796714783 +Loss at step 900: 0.03492124006152153 +Mean training loss after epoch 255: 0.03891279746387114 + +EPOCH: 256 +Loss at step 0: 0.034355781972408295 +Loss at step 50: 0.051526568830013275 +Loss at step 100: 0.05003800615668297 +Loss at step 150: 0.03800094127655029 +Loss at step 200: 0.06754325330257416 +Loss at step 250: 0.03478672355413437 +Loss at step 300: 0.051567237824201584 +Loss at step 350: 0.0353977233171463 +Loss at step 400: 0.050872646272182465 +Loss at step 450: 0.04051677882671356 +Loss at step 500: 0.03876348212361336 +Loss at step 550: 0.04580125957727432 +Loss at step 600: 0.03618134185671806 +Loss at step 650: 0.025788437575101852 +Loss at step 700: 0.03462769463658333 +Loss at step 750: 0.0296726506203413 +Loss at step 800: 0.035372376441955566 +Loss at step 850: 0.06778371334075928 +Loss at step 900: 0.04379701986908913 +Mean training loss after epoch 256: 0.039542128389546356