diff --git "a/train_linear_uncond.out" "b/train_linear_uncond.out" new file mode 100644--- /dev/null +++ "b/train_linear_uncond.out" @@ -0,0 +1,11398 @@ +/athenahomes/gabrijel/miniconda3/envs/track-generator/lib/python3.11/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: '/athenahomes/gabrijel/miniconda3/envs/track-generator/lib/python3.11/site-packages/torchvision/image.so: undefined symbol: _ZN3c1017RegisterOperatorsD1Ev'If you don't plan on using image functionality from `torchvision.io`, you can ignore this warning. Otherwise, there might be something wrong with your environment. Did you have `libjpeg` or `libpng` installed before building `torchvision` from source? + warn( +Schedule: linear +Cfg: False +Output path: /scratch/shared/beegfs/gabrijel/m2l/mini +Patch Size: 4 +Device: cuda:7 +===================================================================================== +Layer (type:depth-idx) Param # +===================================================================================== +DiT 18,816 +├─PatchEmbed: 1-1 -- +│ └─Conv2d: 2-1 6,528 +├─TimestepEmbedder: 1-2 -- +│ └─Mlp: 2-2 -- +│ │ └─Linear: 3-1 98,688 +│ │ └─SiLU: 3-2 -- +│ │ └─Linear: 3-3 147,840 +├─ModuleList: 1-3 -- +│ └─DiTBlock: 2-3 -- +│ │ └─LayerNorm: 3-4 -- +│ │ └─MultiheadAttention: 3-5 591,360 +│ │ └─LayerNorm: 3-6 -- +│ │ └─Mlp: 3-7 1,181,568 +│ │ └─Sequential: 3-8 887,040 +│ └─DiTBlock: 2-4 -- +│ │ └─LayerNorm: 3-9 -- +│ │ └─MultiheadAttention: 3-10 591,360 +│ │ └─LayerNorm: 3-11 -- +│ │ └─Mlp: 3-12 1,181,568 +│ │ └─Sequential: 3-13 887,040 +│ └─DiTBlock: 2-5 -- +│ │ └─LayerNorm: 3-14 -- +│ │ └─MultiheadAttention: 3-15 591,360 +│ │ └─LayerNorm: 3-16 -- +│ │ └─Mlp: 3-17 1,181,568 +│ │ └─Sequential: 3-18 887,040 +│ └─DiTBlock: 2-6 -- +│ │ └─LayerNorm: 3-19 -- +│ │ └─MultiheadAttention: 3-20 591,360 +│ │ └─LayerNorm: 3-21 -- +│ │ └─Mlp: 3-22 1,181,568 +│ │ └─Sequential: 3-23 887,040 +│ └─DiTBlock: 2-7 -- +│ │ └─LayerNorm: 3-24 -- +│ │ └─MultiheadAttention: 3-25 591,360 +│ │ └─LayerNorm: 3-26 -- +│ │ └─Mlp: 3-27 1,181,568 +│ │ └─Sequential: 3-28 887,040 +│ └─DiTBlock: 2-8 -- +│ │ └─LayerNorm: 3-29 -- +│ │ └─MultiheadAttention: 3-30 591,360 +│ │ └─LayerNorm: 3-31 -- +│ │ └─Mlp: 3-32 1,181,568 +│ │ └─Sequential: 3-33 887,040 +├─FinalLayer: 1-4 -- +│ └─LayerNorm: 2-9 -- +│ └─Linear: 2-10 6,160 +│ └─Sequential: 2-11 -- +│ │ └─SiLU: 3-34 -- +│ │ └─Linear: 3-35 295,680 +├─Unpatchify: 1-5 -- +===================================================================================== +Total params: 16,533,520 +Trainable params: 16,514,704 +Non-trainable params: 18,816 +===================================================================================== + +EPOCH: 1 +Loss at step 0: 0.9983355402946472 +Loss at step 50: 0.2903079092502594 +Loss at step 100: 0.18228086829185486 +Loss at step 150: 0.16647522151470184 +Loss at step 200: 0.14757655560970306 +Loss at step 250: 0.16753649711608887 +Loss at step 300: 0.15121056139469147 +Loss at step 350: 0.14301012456417084 +Loss at step 400: 0.14755477011203766 +Loss at step 450: 0.17956554889678955 +Loss at step 500: 0.13031719624996185 +Loss at step 550: 0.1285550445318222 +Loss at step 600: 0.12730535864830017 +Loss at step 650: 0.11963178217411041 +Loss at step 700: 0.1506616473197937 +Loss at step 750: 0.1204003319144249 +Loss at step 800: 0.14057542383670807 +Loss at step 850: 0.1300417184829712 +Loss at step 900: 0.1165168285369873 +Mean training loss after epoch 1: 0.17563413155835067 + +EPOCH: 2 +Loss at step 0: 0.09970653802156448 +Loss at step 50: 0.1185920462012291 +Loss at step 100: 0.17068375647068024 +Loss at step 150: 0.10113532841205597 +Loss at step 200: 0.09918314218521118 +Loss at step 250: 0.11327861994504929 +Loss at step 300: 0.09577237069606781 +Loss at step 350: 0.12124773859977722 +Loss at step 400: 0.10005252063274384 +Loss at step 450: 0.09838350862264633 +Loss at step 500: 0.11507082730531693 +Loss at step 550: 0.08560161292552948 +Loss at step 600: 0.09647355228662491 +Loss at step 650: 0.10567637532949448 +Loss at step 700: 0.10416877269744873 +Loss at step 750: 0.09836909174919128 +Loss at step 800: 0.0903918668627739 +Loss at step 850: 0.113694928586483 +Loss at step 900: 0.08678935468196869 +Mean training loss after epoch 2: 0.10604571053849608 + +EPOCH: 3 +Loss at step 0: 0.09160861372947693 +Loss at step 50: 0.13036148250102997 +Loss at step 100: 0.08778151869773865 +Loss at step 150: 0.10061898082494736 +Loss at step 200: 0.09973453730344772 +Loss at step 250: 0.09909766167402267 +Loss at step 300: 0.08312250673770905 +Loss at step 350: 0.09266644716262817 +Loss at step 400: 0.09767643362283707 +Loss at step 450: 0.08973315358161926 +Loss at step 500: 0.09321015328168869 +Loss at step 550: 0.09130364656448364 +Loss at step 600: 0.10766744613647461 +Loss at step 650: 0.09763919562101364 +Loss at step 700: 0.09630802273750305 +Loss at step 750: 0.09602616727352142 +Loss at step 800: 0.09875944256782532 +Loss at step 850: 0.09144000709056854 +Loss at step 900: 0.10816992074251175 +Mean training loss after epoch 3: 0.09615795769448728 + +EPOCH: 4 +Loss at step 0: 0.10575225949287415 +Loss at step 50: 0.09224344789981842 +Loss at step 100: 0.12793342769145966 +Loss at step 150: 0.09402000159025192 +Loss at step 200: 0.09129811823368073 +Loss at step 250: 0.12204641848802567 +Loss at step 300: 0.12323334813117981 +Loss at step 350: 0.09363935887813568 +Loss at step 400: 0.08846374601125717 +Loss at step 450: 0.08909369260072708 +Loss at step 500: 0.08233509957790375 +Loss at step 550: 0.07722485810518265 +Loss at step 600: 0.0780453309416771 +Loss at step 650: 0.06701749563217163 +Loss at step 700: 0.09400634467601776 +Loss at step 750: 0.07676397264003754 +Loss at step 800: 0.10502510517835617 +Loss at step 850: 0.11276748776435852 +Loss at step 900: 0.080634206533432 +Mean training loss after epoch 4: 0.09358636362116728 + +EPOCH: 5 +Loss at step 0: 0.08062907308340073 +Loss at step 50: 0.10158559679985046 +Loss at step 100: 0.1101926639676094 +Loss at step 150: 0.0826915055513382 +Loss at step 200: 0.11665242165327072 +Loss at step 250: 0.08714853972196579 +Loss at step 300: 0.09433847665786743 +Loss at step 350: 0.08811353147029877 +Loss at step 400: 0.09194111824035645 +Loss at step 450: 0.07689813524484634 +Loss at step 500: 0.09142694622278214 +Loss at step 550: 0.0790068656206131 +Loss at step 600: 0.06898579001426697 +Loss at step 650: 0.0760478749871254 +Loss at step 700: 0.07217445969581604 +Loss at step 750: 0.06753315776586533 +Loss at step 800: 0.0963238775730133 +Loss at step 850: 0.08646237105131149 +Loss at step 900: 0.06669653207063675 +Mean training loss after epoch 5: 0.08425523685033261 + +EPOCH: 6 +Loss at step 0: 0.06345484405755997 +Loss at step 50: 0.07751647382974625 +Loss at step 100: 0.05976444110274315 +Loss at step 150: 0.06740166991949081 +Loss at step 200: 0.08459659665822983 +Loss at step 250: 0.06102846935391426 +Loss at step 300: 0.07602357864379883 +Loss at step 350: 0.08334130793809891 +Loss at step 400: 0.08608637005090714 +Loss at step 450: 0.09524637460708618 +Loss at step 500: 0.07069969177246094 +Loss at step 550: 0.06725702434778214 +Loss at step 600: 0.061389561742544174 +Loss at step 650: 0.05431130528450012 +Loss at step 700: 0.07779577374458313 +Loss at step 750: 0.07553412765264511 +Loss at step 800: 0.06130794435739517 +Loss at step 850: 0.05557093396782875 +Loss at step 900: 0.06525351852178574 +Mean training loss after epoch 6: 0.0735207463243305 + +EPOCH: 7 +Loss at step 0: 0.08350516110658646 +Loss at step 50: 0.06710024923086166 +Loss at step 100: 0.08563098311424255 +Loss at step 150: 0.08081067353487015 +Loss at step 200: 0.08755811303853989 +Loss at step 250: 0.06885039061307907 +Loss at step 300: 0.05837796628475189 +Loss at step 350: 0.07269607484340668 +Loss at step 400: 0.06415263563394547 +Loss at step 450: 0.06556472927331924 +Loss at step 500: 0.06317287683486938 +Loss at step 550: 0.06656274944543839 +Loss at step 600: 0.05699075758457184 +Loss at step 650: 0.074561707675457 +Loss at step 700: 0.05932226777076721 +Loss at step 750: 0.057862941175699234 +Loss at step 800: 0.07067865133285522 +Loss at step 850: 0.05109178647398949 +Loss at step 900: 0.054282329976558685 +Mean training loss after epoch 7: 0.0700209329623594 + +EPOCH: 8 +Loss at step 0: 0.07905767112970352 +Loss at step 50: 0.0569082647562027 +Loss at step 100: 0.06671469658613205 +Loss at step 150: 0.05578415468335152 +Loss at step 200: 0.06267664581537247 +Loss at step 250: 0.07516257464885712 +Loss at step 300: 0.06780201196670532 +Loss at step 350: 0.06226211413741112 +Loss at step 400: 0.06963491439819336 +Loss at step 450: 0.07530141621828079 +Loss at step 500: 0.08229654282331467 +Loss at step 550: 0.0780356302857399 +Loss at step 600: 0.09733521193265915 +Loss at step 650: 0.06161467358469963 +Loss at step 700: 0.057157281786203384 +Loss at step 750: 0.06048029288649559 +Loss at step 800: 0.05342398211359978 +Loss at step 850: 0.06593238562345505 +Loss at step 900: 0.07183299958705902 +Mean training loss after epoch 8: 0.06768541646076799 + +EPOCH: 9 +Loss at step 0: 0.06287264823913574 +Loss at step 50: 0.056453216820955276 +Loss at step 100: 0.07968240976333618 +Loss at step 150: 0.07410801202058792 +Loss at step 200: 0.06614744663238525 +Loss at step 250: 0.06051461398601532 +Loss at step 300: 0.07186289876699448 +Loss at step 350: 0.05872350558638573 +Loss at step 400: 0.07972300797700882 +Loss at step 450: 0.07875444740056992 +Loss at step 500: 0.06810487806797028 +Loss at step 550: 0.05482642725110054 +Loss at step 600: 0.04889826849102974 +Loss at step 650: 0.07333357632160187 +Loss at step 700: 0.08024629950523376 +Loss at step 750: 0.05990247800946236 +Loss at step 800: 0.052494991570711136 +Loss at step 850: 0.047434426844120026 +Loss at step 900: 0.057309895753860474 +Mean training loss after epoch 9: 0.06518641077299743 + +EPOCH: 10 +Loss at step 0: 0.06373000144958496 +Loss at step 50: 0.055598627775907516 +Loss at step 100: 0.05055440962314606 +Loss at step 150: 0.07378523796796799 +Loss at step 200: 0.058023326098918915 +Loss at step 250: 0.0513606071472168 +Loss at step 300: 0.06811454892158508 +Loss at step 350: 0.06071093678474426 +Loss at step 400: 0.05394907295703888 +Loss at step 450: 0.04418645426630974 +Loss at step 500: 0.04900849610567093 +Loss at step 550: 0.06269282847642899 +Loss at step 600: 0.060749154537916183 +Loss at step 650: 0.07935914397239685 +Loss at step 700: 0.061867643147706985 +Loss at step 750: 0.06523965299129486 +Loss at step 800: 0.05672140419483185 +Loss at step 850: 0.06867726147174835 +Loss at step 900: 0.058768752962350845 +Mean training loss after epoch 10: 0.06339541932683128 + +EPOCH: 11 +Loss at step 0: 0.04564620926976204 +Loss at step 50: 0.07839568704366684 +Loss at step 100: 0.059821296483278275 +Loss at step 150: 0.06548560410737991 +Loss at step 200: 0.058042000979185104 +Loss at step 250: 0.05941858887672424 +Loss at step 300: 0.053579967468976974 +Loss at step 350: 0.051413312554359436 +Loss at step 400: 0.05536244064569473 +Loss at step 450: 0.048355117440223694 +Loss at step 500: 0.058389175683259964 +Loss at step 550: 0.05399579927325249 +Loss at step 600: 0.06611743569374084 +Loss at step 650: 0.055846620351076126 +Loss at step 700: 0.07106217741966248 +Loss at step 750: 0.059953007847070694 +Loss at step 800: 0.049926940351724625 +Loss at step 850: 0.09752129018306732 +Loss at step 900: 0.06295330822467804 +Mean training loss after epoch 11: 0.06256261436558609 + +EPOCH: 12 +Loss at step 0: 0.052633389830589294 +Loss at step 50: 0.06669321656227112 +Loss at step 100: 0.07102053612470627 +Loss at step 150: 0.05241028591990471 +Loss at step 200: 0.06535645574331284 +Loss at step 250: 0.05791535973548889 +Loss at step 300: 0.06131890043616295 +Loss at step 350: 0.05711239203810692 +Loss at step 400: 0.05817507207393646 +Loss at step 450: 0.07284418493509293 +Loss at step 500: 0.06170254200696945 +Loss at step 550: 0.0500304289162159 +Loss at step 600: 0.05348748341202736 +Loss at step 650: 0.061170149594545364 +Loss at step 700: 0.047910425812006 +Loss at step 750: 0.05857230722904205 +Loss at step 800: 0.05939800292253494 +Loss at step 850: 0.06371153146028519 +Loss at step 900: 0.05496177822351456 +Mean training loss after epoch 12: 0.06171424858478595 + +EPOCH: 13 +Loss at step 0: 0.06578821688890457 +Loss at step 50: 0.0659237876534462 +Loss at step 100: 0.06951215118169785 +Loss at step 150: 0.05437469482421875 +Loss at step 200: 0.054824620485305786 +Loss at step 250: 0.05196427181363106 +Loss at step 300: 0.05771702155470848 +Loss at step 350: 0.06764619797468185 +Loss at step 400: 0.05289178714156151 +Loss at step 450: 0.07843384891748428 +Loss at step 500: 0.0617065504193306 +Loss at step 550: 0.0580575205385685 +Loss at step 600: 0.05890209600329399 +Loss at step 650: 0.05977654829621315 +Loss at step 700: 0.05137667432427406 +Loss at step 750: 0.07429292798042297 +Loss at step 800: 0.07163972407579422 +Loss at step 850: 0.0521073192358017 +Loss at step 900: 0.0468786284327507 +Mean training loss after epoch 13: 0.06090371139935339 + +EPOCH: 14 +Loss at step 0: 0.06719788908958435 +Loss at step 50: 0.05598434433341026 +Loss at step 100: 0.05002114176750183 +Loss at step 150: 0.07383722811937332 +Loss at step 200: 0.07450052350759506 +Loss at step 250: 0.050906889140605927 +Loss at step 300: 0.0649464875459671 +Loss at step 350: 0.052742283791303635 +Loss at step 400: 0.05077645927667618 +Loss at step 450: 0.05291593819856644 +Loss at step 500: 0.06566813588142395 +Loss at step 550: 0.0796283707022667 +Loss at step 600: 0.05047386884689331 +Loss at step 650: 0.056442808359861374 +Loss at step 700: 0.05101129040122032 +Loss at step 750: 0.06580564379692078 +Loss at step 800: 0.05336259305477142 +Loss at step 850: 0.0647038072347641 +Loss at step 900: 0.0711858794093132 +Mean training loss after epoch 14: 0.06023188904007234 + +EPOCH: 15 +Loss at step 0: 0.051209595054388046 +Loss at step 50: 0.07037373632192612 +Loss at step 100: 0.05406580865383148 +Loss at step 150: 0.05419589951634407 +Loss at step 200: 0.07283905148506165 +Loss at step 250: 0.07526110857725143 +Loss at step 300: 0.06381464004516602 +Loss at step 350: 0.05956785008311272 +Loss at step 400: 0.0589938722550869 +Loss at step 450: 0.054017942398786545 +Loss at step 500: 0.04625575244426727 +Loss at step 550: 0.05990011617541313 +Loss at step 600: 0.04846929758787155 +Loss at step 650: 0.06021719425916672 +Loss at step 700: 0.05522119626402855 +Loss at step 750: 0.06695924699306488 +Loss at step 800: 0.05300808325409889 +Loss at step 850: 0.06322570145130157 +Loss at step 900: 0.06867984682321548 +Mean training loss after epoch 15: 0.059584184515990936 + +EPOCH: 16 +Loss at step 0: 0.06001434847712517 +Loss at step 50: 0.05556982383131981 +Loss at step 100: 0.0510258749127388 +Loss at step 150: 0.046986714005470276 +Loss at step 200: 0.07314082980155945 +Loss at step 250: 0.08063054084777832 +Loss at step 300: 0.052996404469013214 +Loss at step 350: 0.0537990927696228 +Loss at step 400: 0.054799389094114304 +Loss at step 450: 0.06207343563437462 +Loss at step 500: 0.04643355309963226 +Loss at step 550: 0.06084449961781502 +Loss at step 600: 0.07005240023136139 +Loss at step 650: 0.053219374269247055 +Loss at step 700: 0.07254889607429504 +Loss at step 750: 0.05616176128387451 +Loss at step 800: 0.06277403235435486 +Loss at step 850: 0.05936311185359955 +Loss at step 900: 0.05711854249238968 +Mean training loss after epoch 16: 0.05832975195335554 + +EPOCH: 17 +Loss at step 0: 0.058853935450315475 +Loss at step 50: 0.06889771670103073 +Loss at step 100: 0.05524708330631256 +Loss at step 150: 0.04575890675187111 +Loss at step 200: 0.07842770963907242 +Loss at step 250: 0.06022914499044418 +Loss at step 300: 0.04581920802593231 +Loss at step 350: 0.05120668560266495 +Loss at step 400: 0.05855054035782814 +Loss at step 450: 0.05131610482931137 +Loss at step 500: 0.05962131917476654 +Loss at step 550: 0.04929868504405022 +Loss at step 600: 0.07280829548835754 +Loss at step 650: 0.054516226053237915 +Loss at step 700: 0.057747211307287216 +Loss at step 750: 0.05581604689359665 +Loss at step 800: 0.05908640846610069 +Loss at step 850: 0.05070893466472626 +Loss at step 900: 0.043494001030921936 +Mean training loss after epoch 17: 0.058903095008594905 + +EPOCH: 18 +Loss at step 0: 0.06538410484790802 +Loss at step 50: 0.051274463534355164 +Loss at step 100: 0.04736437648534775 +Loss at step 150: 0.07051735371351242 +Loss at step 200: 0.0554698221385479 +Loss at step 250: 0.06378737837076187 +Loss at step 300: 0.05453440174460411 +Loss at step 350: 0.05707269534468651 +Loss at step 400: 0.05320645868778229 +Loss at step 450: 0.06928934156894684 +Loss at step 500: 0.04702098295092583 +Loss at step 550: 0.050672747194767 +Loss at step 600: 0.05967143923044205 +Loss at step 650: 0.06356772035360336 +Loss at step 700: 0.055718742311000824 +Loss at step 750: 0.057205889374017715 +Loss at step 800: 0.047695912420749664 +Loss at step 850: 0.05081622302532196 +Loss at step 900: 0.055412884801626205 +Mean training loss after epoch 18: 0.05785927040649375 + +EPOCH: 19 +Loss at step 0: 0.054076991975307465 +Loss at step 50: 0.06943845003843307 +Loss at step 100: 0.06442653387784958 +Loss at step 150: 0.052176982164382935 +Loss at step 200: 0.052588578313589096 +Loss at step 250: 0.05831068754196167 +Loss at step 300: 0.06005692481994629 +Loss at step 350: 0.0878036618232727 +Loss at step 400: 0.048771556466817856 +Loss at step 450: 0.0574478879570961 +Loss at step 500: 0.0661652535200119 +Loss at step 550: 0.054166946560144424 +Loss at step 600: 0.05260612070560455 +Loss at step 650: 0.06961595267057419 +Loss at step 700: 0.05419684201478958 +Loss at step 750: 0.05358076095581055 +Loss at step 800: 0.06662043929100037 +Loss at step 850: 0.04488072916865349 +Loss at step 900: 0.05998760089278221 +Mean training loss after epoch 19: 0.05815208868097776 + +EPOCH: 20 +Loss at step 0: 0.05099622532725334 +Loss at step 50: 0.06339594721794128 +Loss at step 100: 0.046588506549596786 +Loss at step 150: 0.07224276661872864 +Loss at step 200: 0.04482608661055565 +Loss at step 250: 0.05454130843281746 +Loss at step 300: 0.056804075837135315 +Loss at step 350: 0.07370352745056152 +Loss at step 400: 0.06112191081047058 +Loss at step 450: 0.05823417380452156 +Loss at step 500: 0.051358625292778015 +Loss at step 550: 0.04314117133617401 +Loss at step 600: 0.05443539470434189 +Loss at step 650: 0.07500668615102768 +Loss at step 700: 0.054563477635383606 +Loss at step 750: 0.06464920938014984 +Loss at step 800: 0.05668611824512482 +Loss at step 850: 0.04960291460156441 +Loss at step 900: 0.0451640747487545 +Mean training loss after epoch 20: 0.05707174998277159 + +EPOCH: 21 +Loss at step 0: 0.07718510180711746 +Loss at step 50: 0.04663774371147156 +Loss at step 100: 0.04668251797556877 +Loss at step 150: 0.04964904114603996 +Loss at step 200: 0.05001554265618324 +Loss at step 250: 0.08538968116044998 +Loss at step 300: 0.07177683711051941 +Loss at step 350: 0.06762474030256271 +Loss at step 400: 0.042368996888399124 +Loss at step 450: 0.05503622069954872 +Loss at step 500: 0.06707964837551117 +Loss at step 550: 0.06183305010199547 +Loss at step 600: 0.08403396606445312 +Loss at step 650: 0.04448738321661949 +Loss at step 700: 0.06849902868270874 +Loss at step 750: 0.05558907985687256 +Loss at step 800: 0.0561121329665184 +Loss at step 850: 0.06678000837564468 +Loss at step 900: 0.04941483214497566 +Mean training loss after epoch 21: 0.05673469266673522 + +EPOCH: 22 +Loss at step 0: 0.05634870380163193 +Loss at step 50: 0.06990368664264679 +Loss at step 100: 0.05607541650533676 +Loss at step 150: 0.07026470452547073 +Loss at step 200: 0.04599803313612938 +Loss at step 250: 0.06881234049797058 +Loss at step 300: 0.07719657570123672 +Loss at step 350: 0.051717568188905716 +Loss at step 400: 0.05527161434292793 +Loss at step 450: 0.05421420559287071 +Loss at step 500: 0.08476296067237854 +Loss at step 550: 0.07108505815267563 +Loss at step 600: 0.04690362885594368 +Loss at step 650: 0.05130293220281601 +Loss at step 700: 0.08341491222381592 +Loss at step 750: 0.04663535952568054 +Loss at step 800: 0.0468871034681797 +Loss at step 850: 0.066974937915802 +Loss at step 900: 0.06168410927057266 +Mean training loss after epoch 22: 0.05633798754339152 + +EPOCH: 23 +Loss at step 0: 0.055603016167879105 +Loss at step 50: 0.06739943474531174 +Loss at step 100: 0.06736724078655243 +Loss at step 150: 0.06426652520895004 +Loss at step 200: 0.048184823244810104 +Loss at step 250: 0.043103743344545364 +Loss at step 300: 0.06536700576543808 +Loss at step 350: 0.07435965538024902 +Loss at step 400: 0.0550750307738781 +Loss at step 450: 0.04587224870920181 +Loss at step 500: 0.047334957867860794 +Loss at step 550: 0.06349505484104156 +Loss at step 600: 0.06470964103937149 +Loss at step 650: 0.05199477821588516 +Loss at step 700: 0.05377091094851494 +Loss at step 750: 0.05131344869732857 +Loss at step 800: 0.06293124705553055 +Loss at step 850: 0.04941703751683235 +Loss at step 900: 0.07516985386610031 +Mean training loss after epoch 23: 0.05627640763293706 + +EPOCH: 24 +Loss at step 0: 0.04173830524086952 +Loss at step 50: 0.07472342997789383 +Loss at step 100: 0.061910394579172134 +Loss at step 150: 0.05912601575255394 +Loss at step 200: 0.05331674963235855 +Loss at step 250: 0.06256377696990967 +Loss at step 300: 0.050509173423051834 +Loss at step 350: 0.049964651465415955 +Loss at step 400: 0.04792248457670212 +Loss at step 450: 0.06528089195489883 +Loss at step 500: 0.0651390329003334 +Loss at step 550: 0.053137555718421936 +Loss at step 600: 0.04568968340754509 +Loss at step 650: 0.07677997648715973 +Loss at step 700: 0.048330362886190414 +Loss at step 750: 0.04938879609107971 +Loss at step 800: 0.057953160256147385 +Loss at step 850: 0.05150565132498741 +Loss at step 900: 0.0656861737370491 +Mean training loss after epoch 24: 0.055865966311808846 + +EPOCH: 25 +Loss at step 0: 0.05240952968597412 +Loss at step 50: 0.05742964893579483 +Loss at step 100: 0.0495821014046669 +Loss at step 150: 0.061766646802425385 +Loss at step 200: 0.049928683787584305 +Loss at step 250: 0.04931206628680229 +Loss at step 300: 0.05201227590441704 +Loss at step 350: 0.045436810702085495 +Loss at step 400: 0.0685374066233635 +Loss at step 450: 0.04369892179965973 +Loss at step 500: 0.04743170738220215 +Loss at step 550: 0.04734017699956894 +Loss at step 600: 0.07181960344314575 +Loss at step 650: 0.06123042479157448 +Loss at step 700: 0.06197654828429222 +Loss at step 750: 0.04835381358861923 +Loss at step 800: 0.04684865474700928 +Loss at step 850: 0.043482501059770584 +Loss at step 900: 0.05451043322682381 +Mean training loss after epoch 25: 0.05553666281023387 + +EPOCH: 26 +Loss at step 0: 0.04902949929237366 +Loss at step 50: 0.047454584389925 +Loss at step 100: 0.05288848653435707 +Loss at step 150: 0.06165778636932373 +Loss at step 200: 0.04231761395931244 +Loss at step 250: 0.047398891299963 +Loss at step 300: 0.05936333164572716 +Loss at step 350: 0.04636383056640625 +Loss at step 400: 0.05997397005558014 +Loss at step 450: 0.05327463522553444 +Loss at step 500: 0.049193691462278366 +Loss at step 550: 0.06166054308414459 +Loss at step 600: 0.048282913863658905 +Loss at step 650: 0.06263080984354019 +Loss at step 700: 0.051939111202955246 +Loss at step 750: 0.054134104400873184 +Loss at step 800: 0.04906224086880684 +Loss at step 850: 0.056777238845825195 +Loss at step 900: 0.04580234736204147 +Mean training loss after epoch 26: 0.05597451611209526 + +EPOCH: 27 +Loss at step 0: 0.05022750049829483 +Loss at step 50: 0.04589780792593956 +Loss at step 100: 0.06941407173871994 +Loss at step 150: 0.06381004303693771 +Loss at step 200: 0.05559215694665909 +Loss at step 250: 0.04773980751633644 +Loss at step 300: 0.056919630616903305 +Loss at step 350: 0.046822451055049896 +Loss at step 400: 0.05615323781967163 +Loss at step 450: 0.05349696800112724 +Loss at step 500: 0.046199917793273926 +Loss at step 550: 0.06860701739788055 +Loss at step 600: 0.058246761560440063 +Loss at step 650: 0.05177094787359238 +Loss at step 700: 0.05336657166481018 +Loss at step 750: 0.04641367495059967 +Loss at step 800: 0.05980854481458664 +Loss at step 850: 0.053325604647397995 +Loss at step 900: 0.04588623344898224 +Mean training loss after epoch 27: 0.05520913504493008 + +EPOCH: 28 +Loss at step 0: 0.04828135669231415 +Loss at step 50: 0.06155947968363762 +Loss at step 100: 0.04859397932887077 +Loss at step 150: 0.0652473121881485 +Loss at step 200: 0.04381202161312103 +Loss at step 250: 0.05521320551633835 +Loss at step 300: 0.054711487144231796 +Loss at step 350: 0.05465519055724144 +Loss at step 400: 0.051808763295412064 +Loss at step 450: 0.055641040205955505 +Loss at step 500: 0.0528603121638298 +Loss at step 550: 0.04882833734154701 +Loss at step 600: 0.05004681274294853 +Loss at step 650: 0.05289579927921295 +Loss at step 700: 0.04791521281003952 +Loss at step 750: 0.04925371706485748 +Loss at step 800: 0.062198929488658905 +Loss at step 850: 0.0545211136341095 +Loss at step 900: 0.04870881140232086 +Mean training loss after epoch 28: 0.05540319499589487 + +EPOCH: 29 +Loss at step 0: 0.07517839968204498 +Loss at step 50: 0.053784165531396866 +Loss at step 100: 0.04891663044691086 +Loss at step 150: 0.04563986882567406 +Loss at step 200: 0.04488968104124069 +Loss at step 250: 0.04799796640872955 +Loss at step 300: 0.043860044330358505 +Loss at step 350: 0.05952528491616249 +Loss at step 400: 0.046813756227493286 +Loss at step 450: 0.0429517887532711 +Loss at step 500: 0.06442321091890335 +Loss at step 550: 0.05994388088583946 +Loss at step 600: 0.06675710529088974 +Loss at step 650: 0.06444482505321503 +Loss at step 700: 0.04667859151959419 +Loss at step 750: 0.05296240746974945 +Loss at step 800: 0.04430631548166275 +Loss at step 850: 0.05376960337162018 +Loss at step 900: 0.048872265964746475 +Mean training loss after epoch 29: 0.05483736815467191 + +EPOCH: 30 +Loss at step 0: 0.05368848145008087 +Loss at step 50: 0.04030803591012955 +Loss at step 100: 0.04363354295492172 +Loss at step 150: 0.042191192507743835 +Loss at step 200: 0.05077720060944557 +Loss at step 250: 0.04715349152684212 +Loss at step 300: 0.06778068095445633 +Loss at step 350: 0.06741156429052353 +Loss at step 400: 0.052945297211408615 +Loss at step 450: 0.05811982974410057 +Loss at step 500: 0.0684160515666008 +Loss at step 550: 0.06103063002228737 +Loss at step 600: 0.05403907969594002 +Loss at step 650: 0.0707012265920639 +Loss at step 700: 0.043527133762836456 +Loss at step 750: 0.05779201164841652 +Loss at step 800: 0.0498744361102581 +Loss at step 850: 0.04957523196935654 +Loss at step 900: 0.058999303728342056 +Mean training loss after epoch 30: 0.054860019119087056 + +EPOCH: 31 +Loss at step 0: 0.049310024827718735 +Loss at step 50: 0.07176560163497925 +Loss at step 100: 0.06974601745605469 +Loss at step 150: 0.05386912450194359 +Loss at step 200: 0.05039747431874275 +Loss at step 250: 0.07695865631103516 +Loss at step 300: 0.06572337448596954 +Loss at step 350: 0.055669013410806656 +Loss at step 400: 0.06238775700330734 +Loss at step 450: 0.05986689403653145 +Loss at step 500: 0.07013839483261108 +Loss at step 550: 0.05601054057478905 +Loss at step 600: 0.045355163514614105 +Loss at step 650: 0.0437605045735836 +Loss at step 700: 0.06718980520963669 +Loss at step 750: 0.07880247384309769 +Loss at step 800: 0.04746502265334129 +Loss at step 850: 0.045701764523983 +Loss at step 900: 0.05296020954847336 +Mean training loss after epoch 31: 0.054413466378728716 + +EPOCH: 32 +Loss at step 0: 0.05617668107151985 +Loss at step 50: 0.06208961829543114 +Loss at step 100: 0.053446657955646515 +Loss at step 150: 0.06886325776576996 +Loss at step 200: 0.06299979239702225 +Loss at step 250: 0.05151471868157387 +Loss at step 300: 0.044561661779880524 +Loss at step 350: 0.04918292164802551 +Loss at step 400: 0.05904281884431839 +Loss at step 450: 0.06192123144865036 +Loss at step 500: 0.052302587777376175 +Loss at step 550: 0.05089617520570755 +Loss at step 600: 0.04949166625738144 +Loss at step 650: 0.0480043925344944 +Loss at step 700: 0.04619540274143219 +Loss at step 750: 0.054513826966285706 +Loss at step 800: 0.04930388182401657 +Loss at step 850: 0.06360268592834473 +Loss at step 900: 0.06642002612352371 +Mean training loss after epoch 32: 0.054555596295259655 + +EPOCH: 33 +Loss at step 0: 0.044782672077417374 +Loss at step 50: 0.047284357249736786 +Loss at step 100: 0.04925493523478508 +Loss at step 150: 0.04576935991644859 +Loss at step 200: 0.053064171224832535 +Loss at step 250: 0.06933432817459106 +Loss at step 300: 0.0492779016494751 +Loss at step 350: 0.06055759638547897 +Loss at step 400: 0.05520835518836975 +Loss at step 450: 0.05451856181025505 +Loss at step 500: 0.08360431343317032 +Loss at step 550: 0.04742000997066498 +Loss at step 600: 0.08052371442317963 +Loss at step 650: 0.056882817298173904 +Loss at step 700: 0.04971032217144966 +Loss at step 750: 0.048635419458150864 +Loss at step 800: 0.04947608709335327 +Loss at step 850: 0.05255472660064697 +Loss at step 900: 0.045274876058101654 +Mean training loss after epoch 33: 0.05415046823097826 + +EPOCH: 34 +Loss at step 0: 0.045806266367435455 +Loss at step 50: 0.04681321606040001 +Loss at step 100: 0.05726482346653938 +Loss at step 150: 0.04425106942653656 +Loss at step 200: 0.05739280581474304 +Loss at step 250: 0.05311074107885361 +Loss at step 300: 0.05525273084640503 +Loss at step 350: 0.06916007399559021 +Loss at step 400: 0.04250306263566017 +Loss at step 450: 0.04752336069941521 +Loss at step 500: 0.06781148910522461 +Loss at step 550: 0.047149695456027985 +Loss at step 600: 0.06493551284074783 +Loss at step 650: 0.07514088600873947 +Loss at step 700: 0.045343514531850815 +Loss at step 750: 0.05078522488474846 +Loss at step 800: 0.047780513763427734 +Loss at step 850: 0.054518286138772964 +Loss at step 900: 0.04313283786177635 +Mean training loss after epoch 34: 0.05403605787786466 + +EPOCH: 35 +Loss at step 0: 0.04942372441291809 +Loss at step 50: 0.04298358038067818 +Loss at step 100: 0.04731900617480278 +Loss at step 150: 0.05448926240205765 +Loss at step 200: 0.049438413232564926 +Loss at step 250: 0.0424935482442379 +Loss at step 300: 0.04982948303222656 +Loss at step 350: 0.049933772534132004 +Loss at step 400: 0.04660452529788017 +Loss at step 450: 0.052708424627780914 +Loss at step 500: 0.059705957770347595 +Loss at step 550: 0.04691097512841225 +Loss at step 600: 0.05173955112695694 +Loss at step 650: 0.05308176577091217 +Loss at step 700: 0.06367108970880508 +Loss at step 750: 0.043883707374334335 +Loss at step 800: 0.04438396170735359 +Loss at step 850: 0.057260822504758835 +Loss at step 900: 0.07005802541971207 +Mean training loss after epoch 35: 0.05400091113804627 + +EPOCH: 36 +Loss at step 0: 0.048135872930288315 +Loss at step 50: 0.04429541528224945 +Loss at step 100: 0.04816179722547531 +Loss at step 150: 0.050422072410583496 +Loss at step 200: 0.04276927188038826 +Loss at step 250: 0.04845673590898514 +Loss at step 300: 0.05383414402604103 +Loss at step 350: 0.044670142233371735 +Loss at step 400: 0.04708537086844444 +Loss at step 450: 0.0464540496468544 +Loss at step 500: 0.09475626796483994 +Loss at step 550: 0.043900322169065475 +Loss at step 600: 0.057160843163728714 +Loss at step 650: 0.0667324811220169 +Loss at step 700: 0.049069277942180634 +Loss at step 750: 0.05670153349637985 +Loss at step 800: 0.052660055458545685 +Loss at step 850: 0.04810985550284386 +Loss at step 900: 0.04657828062772751 +Mean training loss after epoch 36: 0.0533432104130353 + +EPOCH: 37 +Loss at step 0: 0.049028180539608 +Loss at step 50: 0.05117243900895119 +Loss at step 100: 0.05490840971469879 +Loss at step 150: 0.052274204790592194 +Loss at step 200: 0.058677662163972855 +Loss at step 250: 0.04750730097293854 +Loss at step 300: 0.049710631370544434 +Loss at step 350: 0.05399344116449356 +Loss at step 400: 0.05363984778523445 +Loss at step 450: 0.04734344780445099 +Loss at step 500: 0.0479048416018486 +Loss at step 550: 0.06255520135164261 +Loss at step 600: 0.06729632616043091 +Loss at step 650: 0.04429512843489647 +Loss at step 700: 0.06309160590171814 +Loss at step 750: 0.060823433101177216 +Loss at step 800: 0.04790882021188736 +Loss at step 850: 0.07554106414318085 +Loss at step 900: 0.04480592533946037 +Mean training loss after epoch 37: 0.05358854416352726 + +EPOCH: 38 +Loss at step 0: 0.04920826852321625 +Loss at step 50: 0.04302031546831131 +Loss at step 100: 0.05267776548862457 +Loss at step 150: 0.04506663978099823 +Loss at step 200: 0.06167095899581909 +Loss at step 250: 0.06222051382064819 +Loss at step 300: 0.07280968874692917 +Loss at step 350: 0.043978333473205566 +Loss at step 400: 0.053931284695863724 +Loss at step 450: 0.04726745933294296 +Loss at step 500: 0.04504808411002159 +Loss at step 550: 0.05862213671207428 +Loss at step 600: 0.04817679896950722 +Loss at step 650: 0.061317045241594315 +Loss at step 700: 0.040229760110378265 +Loss at step 750: 0.04928894340991974 +Loss at step 800: 0.06195059418678284 +Loss at step 850: 0.04965474084019661 +Loss at step 900: 0.056939881294965744 +Mean training loss after epoch 38: 0.05318885975713923 + +EPOCH: 39 +Loss at step 0: 0.036743342876434326 +Loss at step 50: 0.05339318886399269 +Loss at step 100: 0.04880494251847267 +Loss at step 150: 0.04200832173228264 +Loss at step 200: 0.07544858008623123 +Loss at step 250: 0.06991397589445114 +Loss at step 300: 0.044047802686691284 +Loss at step 350: 0.05753207579255104 +Loss at step 400: 0.057998549193143845 +Loss at step 450: 0.04294561594724655 +Loss at step 500: 0.055925071239471436 +Loss at step 550: 0.05336412414908409 +Loss at step 600: 0.0410737544298172 +Loss at step 650: 0.0614604651927948 +Loss at step 700: 0.04496048390865326 +Loss at step 750: 0.04443386569619179 +Loss at step 800: 0.04651591181755066 +Loss at step 850: 0.059542082250118256 +Loss at step 900: 0.042888931930065155 +Mean training loss after epoch 39: 0.05249884058195137 + +EPOCH: 40 +Loss at step 0: 0.04826343059539795 +Loss at step 50: 0.040495824068784714 +Loss at step 100: 0.051535263657569885 +Loss at step 150: 0.055648550391197205 +Loss at step 200: 0.05435577407479286 +Loss at step 250: 0.04643140360713005 +Loss at step 300: 0.06293434649705887 +Loss at step 350: 0.04663405567407608 +Loss at step 400: 0.048650555312633514 +Loss at step 450: 0.06192856281995773 +Loss at step 500: 0.048388052731752396 +Loss at step 550: 0.06402363628149033 +Loss at step 600: 0.06136562302708626 +Loss at step 650: 0.05427950248122215 +Loss at step 700: 0.04497317969799042 +Loss at step 750: 0.0739879459142685 +Loss at step 800: 0.050202686339616776 +Loss at step 850: 0.0465034656226635 +Loss at step 900: 0.04377468675374985 +Mean training loss after epoch 40: 0.053237553006772804 + +EPOCH: 41 +Loss at step 0: 0.048492979258298874 +Loss at step 50: 0.06710416823625565 +Loss at step 100: 0.043159645050764084 +Loss at step 150: 0.04971924424171448 +Loss at step 200: 0.05384298041462898 +Loss at step 250: 0.06480372697114944 +Loss at step 300: 0.04880737140774727 +Loss at step 350: 0.046118125319480896 +Loss at step 400: 0.04685906693339348 +Loss at step 450: 0.06743953377008438 +Loss at step 500: 0.05346196889877319 +Loss at step 550: 0.04346584156155586 +Loss at step 600: 0.04821471497416496 +Loss at step 650: 0.049697145819664 +Loss at step 700: 0.046393875032663345 +Loss at step 750: 0.08475202322006226 +Loss at step 800: 0.0552048422396183 +Loss at step 850: 0.04914353787899017 +Loss at step 900: 0.06159406155347824 +Mean training loss after epoch 41: 0.05221217456879392 + +EPOCH: 42 +Loss at step 0: 0.05505258962512016 +Loss at step 50: 0.0581231415271759 +Loss at step 100: 0.05356051027774811 +Loss at step 150: 0.03949100151658058 +Loss at step 200: 0.07585103064775467 +Loss at step 250: 0.0604354627430439 +Loss at step 300: 0.04425599053502083 +Loss at step 350: 0.04763419181108475 +Loss at step 400: 0.03915484622120857 +Loss at step 450: 0.07893966138362885 +Loss at step 500: 0.05233079940080643 +Loss at step 550: 0.04180252179503441 +Loss at step 600: 0.05490505322813988 +Loss at step 650: 0.04374103248119354 +Loss at step 700: 0.054531000554561615 +Loss at step 750: 0.049043282866477966 +Loss at step 800: 0.04987652972340584 +Loss at step 850: 0.042168211191892624 +Loss at step 900: 0.04810643568634987 +Mean training loss after epoch 42: 0.05287218012058659 + +EPOCH: 43 +Loss at step 0: 0.049878060817718506 +Loss at step 50: 0.044121284037828445 +Loss at step 100: 0.054123129695653915 +Loss at step 150: 0.045792996883392334 +Loss at step 200: 0.04043523222208023 +Loss at step 250: 0.052563607692718506 +Loss at step 300: 0.045408330857753754 +Loss at step 350: 0.048832591623067856 +Loss at step 400: 0.06319952011108398 +Loss at step 450: 0.04602224752306938 +Loss at step 500: 0.046763792634010315 +Loss at step 550: 0.04179953411221504 +Loss at step 600: 0.05411501228809357 +Loss at step 650: 0.0647578313946724 +Loss at step 700: 0.046431947499513626 +Loss at step 750: 0.0521182045340538 +Loss at step 800: 0.04799725115299225 +Loss at step 850: 0.044459160417318344 +Loss at step 900: 0.04997462034225464 +Mean training loss after epoch 43: 0.05321052268361931 + +EPOCH: 44 +Loss at step 0: 0.04813164100050926 +Loss at step 50: 0.07186423987150192 +Loss at step 100: 0.06321091204881668 +Loss at step 150: 0.044569287449121475 +Loss at step 200: 0.05702079087495804 +Loss at step 250: 0.07186122983694077 +Loss at step 300: 0.054812051355838776 +Loss at step 350: 0.04854024201631546 +Loss at step 400: 0.05047724395990372 +Loss at step 450: 0.044880982488393784 +Loss at step 500: 0.04093560203909874 +Loss at step 550: 0.06272460520267487 +Loss at step 600: 0.05477312207221985 +Loss at step 650: 0.06216924265027046 +Loss at step 700: 0.05296294391155243 +Loss at step 750: 0.07925018668174744 +Loss at step 800: 0.047127027064561844 +Loss at step 850: 0.049582138657569885 +Loss at step 900: 0.04926576837897301 +Mean training loss after epoch 44: 0.05313253677936632 + +EPOCH: 45 +Loss at step 0: 0.05009717121720314 +Loss at step 50: 0.04993962123990059 +Loss at step 100: 0.05195142701268196 +Loss at step 150: 0.06182171776890755 +Loss at step 200: 0.05586962029337883 +Loss at step 250: 0.04891344904899597 +Loss at step 300: 0.04907814785838127 +Loss at step 350: 0.04345877096056938 +Loss at step 400: 0.04359842836856842 +Loss at step 450: 0.05410851538181305 +Loss at step 500: 0.06188173219561577 +Loss at step 550: 0.06476299464702606 +Loss at step 600: 0.04802829772233963 +Loss at step 650: 0.0468716137111187 +Loss at step 700: 0.05188152939081192 +Loss at step 750: 0.0496804378926754 +Loss at step 800: 0.043597668409347534 +Loss at step 850: 0.05091344937682152 +Loss at step 900: 0.0488690510392189 +Mean training loss after epoch 45: 0.0525502272124992 + +EPOCH: 46 +Loss at step 0: 0.0397719070315361 +Loss at step 50: 0.041939012706279755 +Loss at step 100: 0.051259931176900864 +Loss at step 150: 0.07020445913076401 +Loss at step 200: 0.05168981850147247 +Loss at step 250: 0.05698699876666069 +Loss at step 300: 0.04702237620949745 +Loss at step 350: 0.04895839840173721 +Loss at step 400: 0.04782966896891594 +Loss at step 450: 0.045815467834472656 +Loss at step 500: 0.058022595942020416 +Loss at step 550: 0.041971467435359955 +Loss at step 600: 0.05226023122668266 +Loss at step 650: 0.08180022239685059 +Loss at step 700: 0.0505705401301384 +Loss at step 750: 0.04178490862250328 +Loss at step 800: 0.04926880821585655 +Loss at step 850: 0.06063561514019966 +Loss at step 900: 0.04142581671476364 +Mean training loss after epoch 46: 0.052521981795364096 + +EPOCH: 47 +Loss at step 0: 0.04740827903151512 +Loss at step 50: 0.048029154539108276 +Loss at step 100: 0.04536948725581169 +Loss at step 150: 0.06387459486722946 +Loss at step 200: 0.043577875941991806 +Loss at step 250: 0.053286317735910416 +Loss at step 300: 0.04355001449584961 +Loss at step 350: 0.04840279370546341 +Loss at step 400: 0.049341876059770584 +Loss at step 450: 0.058885592967271805 +Loss at step 500: 0.044950518757104874 +Loss at step 550: 0.04819031432271004 +Loss at step 600: 0.04252804443240166 +Loss at step 650: 0.049527183175086975 +Loss at step 700: 0.051508281379938126 +Loss at step 750: 0.05461161956191063 +Loss at step 800: 0.043660979717969894 +Loss at step 850: 0.04972599819302559 +Loss at step 900: 0.06055428087711334 +Mean training loss after epoch 47: 0.05266621692983835 + +EPOCH: 48 +Loss at step 0: 0.05066758021712303 +Loss at step 50: 0.06456887722015381 +Loss at step 100: 0.05360349267721176 +Loss at step 150: 0.06359003484249115 +Loss at step 200: 0.059380218386650085 +Loss at step 250: 0.05050073191523552 +Loss at step 300: 0.045959558337926865 +Loss at step 350: 0.05051184073090553 +Loss at step 400: 0.06030847132205963 +Loss at step 450: 0.08371499180793762 +Loss at step 500: 0.05186876282095909 +Loss at step 550: 0.047674741595983505 +Loss at step 600: 0.059994522482156754 +Loss at step 650: 0.04567626118659973 +Loss at step 700: 0.04180555418133736 +Loss at step 750: 0.058448683470487595 +Loss at step 800: 0.07519315183162689 +Loss at step 850: 0.04043939709663391 +Loss at step 900: 0.04979979246854782 +Mean training loss after epoch 48: 0.052430077398350754 + +EPOCH: 49 +Loss at step 0: 0.045753031969070435 +Loss at step 50: 0.04825032874941826 +Loss at step 100: 0.06268720328807831 +Loss at step 150: 0.03970901295542717 +Loss at step 200: 0.06400401890277863 +Loss at step 250: 0.04407063126564026 +Loss at step 300: 0.04915120452642441 +Loss at step 350: 0.05953553318977356 +Loss at step 400: 0.06962674111127853 +Loss at step 450: 0.04344185069203377 +Loss at step 500: 0.07823880016803741 +Loss at step 550: 0.08006315678358078 +Loss at step 600: 0.04089845344424248 +Loss at step 650: 0.04615790396928787 +Loss at step 700: 0.040230028331279755 +Loss at step 750: 0.06509250402450562 +Loss at step 800: 0.045775242149829865 +Loss at step 850: 0.06718691438436508 +Loss at step 900: 0.05391642451286316 +Mean training loss after epoch 49: 0.052541146495703184 + +EPOCH: 50 +Loss at step 0: 0.04362127557396889 +Loss at step 50: 0.041194621473550797 +Loss at step 100: 0.0422651506960392 +Loss at step 150: 0.06311669945716858 +Loss at step 200: 0.07398732751607895 +Loss at step 250: 0.05276181176304817 +Loss at step 300: 0.04806424677371979 +Loss at step 350: 0.06842450797557831 +Loss at step 400: 0.03838564455509186 +Loss at step 450: 0.03971916064620018 +Loss at step 500: 0.04507923126220703 +Loss at step 550: 0.041310377418994904 +Loss at step 600: 0.042919863015413284 +Loss at step 650: 0.04912989214062691 +Loss at step 700: 0.05486125126481056 +Loss at step 750: 0.06316930055618286 +Loss at step 800: 0.039251018315553665 +Loss at step 850: 0.0745597556233406 +Loss at step 900: 0.05484545975923538 +Mean training loss after epoch 50: 0.05229271163961399 + +EPOCH: 51 +Loss at step 0: 0.04454149305820465 +Loss at step 50: 0.038626059889793396 +Loss at step 100: 0.045790646225214005 +Loss at step 150: 0.052484456449747086 +Loss at step 200: 0.048697587102651596 +Loss at step 250: 0.055726081132888794 +Loss at step 300: 0.04525963217020035 +Loss at step 350: 0.041517049074172974 +Loss at step 400: 0.04858122766017914 +Loss at step 450: 0.06374815851449966 +Loss at step 500: 0.047224387526512146 +Loss at step 550: 0.0765778198838234 +Loss at step 600: 0.042055536061525345 +Loss at step 650: 0.0453353114426136 +Loss at step 700: 0.04511193931102753 +Loss at step 750: 0.05036647990345955 +Loss at step 800: 0.04617953673005104 +Loss at step 850: 0.046869706362485886 +Loss at step 900: 0.042309194803237915 +Mean training loss after epoch 51: 0.052001943529795994 + +EPOCH: 52 +Loss at step 0: 0.05415983498096466 +Loss at step 50: 0.04441351816058159 +Loss at step 100: 0.03835921734571457 +Loss at step 150: 0.05179912969470024 +Loss at step 200: 0.04769515246152878 +Loss at step 250: 0.06047235056757927 +Loss at step 300: 0.06351741403341293 +Loss at step 350: 0.07091790437698364 +Loss at step 400: 0.03563545644283295 +Loss at step 450: 0.04722283408045769 +Loss at step 500: 0.04395308718085289 +Loss at step 550: 0.04455622658133507 +Loss at step 600: 0.04446759819984436 +Loss at step 650: 0.060026250779628754 +Loss at step 700: 0.05045117065310478 +Loss at step 750: 0.03576822206377983 +Loss at step 800: 0.04025300219655037 +Loss at step 850: 0.04234004393219948 +Loss at step 900: 0.04205656051635742 +Mean training loss after epoch 52: 0.05203618080234095 + +EPOCH: 53 +Loss at step 0: 0.045803770422935486 +Loss at step 50: 0.05061497539281845 +Loss at step 100: 0.044035013765096664 +Loss at step 150: 0.04356559365987778 +Loss at step 200: 0.04283801466226578 +Loss at step 250: 0.04917405545711517 +Loss at step 300: 0.041770972311496735 +Loss at step 350: 0.04369813948869705 +Loss at step 400: 0.05442122742533684 +Loss at step 450: 0.0687759518623352 +Loss at step 500: 0.04686306044459343 +Loss at step 550: 0.04113399609923363 +Loss at step 600: 0.04691078141331673 +Loss at step 650: 0.07274896651506424 +Loss at step 700: 0.07397522777318954 +Loss at step 750: 0.06044146046042442 +Loss at step 800: 0.04066580906510353 +Loss at step 850: 0.04666329175233841 +Loss at step 900: 0.052705973386764526 +Mean training loss after epoch 53: 0.05238387568085306 + +EPOCH: 54 +Loss at step 0: 0.041573990136384964 +Loss at step 50: 0.06842967122793198 +Loss at step 100: 0.06014475226402283 +Loss at step 150: 0.044582054018974304 +Loss at step 200: 0.05355361849069595 +Loss at step 250: 0.05124492943286896 +Loss at step 300: 0.046497806906700134 +Loss at step 350: 0.053566839545965195 +Loss at step 400: 0.057152897119522095 +Loss at step 450: 0.04383227229118347 +Loss at step 500: 0.05796577408909798 +Loss at step 550: 0.052839502692222595 +Loss at step 600: 0.057708125561475754 +Loss at step 650: 0.07206779718399048 +Loss at step 700: 0.03874930366873741 +Loss at step 750: 0.0680498257279396 +Loss at step 800: 0.05470266193151474 +Loss at step 850: 0.03999590128660202 +Loss at step 900: 0.04179631546139717 +Mean training loss after epoch 54: 0.05207229190384909 + +EPOCH: 55 +Loss at step 0: 0.0377667173743248 +Loss at step 50: 0.052575405687093735 +Loss at step 100: 0.053772225975990295 +Loss at step 150: 0.04463893547654152 +Loss at step 200: 0.06127486750483513 +Loss at step 250: 0.04966483637690544 +Loss at step 300: 0.04667363315820694 +Loss at step 350: 0.05984867736697197 +Loss at step 400: 0.047269247472286224 +Loss at step 450: 0.04527159035205841 +Loss at step 500: 0.04170394688844681 +Loss at step 550: 0.05216154083609581 +Loss at step 600: 0.04971633478999138 +Loss at step 650: 0.045844290405511856 +Loss at step 700: 0.04495132714509964 +Loss at step 750: 0.07438484579324722 +Loss at step 800: 0.04290897026658058 +Loss at step 850: 0.06724435091018677 +Loss at step 900: 0.051532480865716934 +Mean training loss after epoch 55: 0.05150807814906909 + +EPOCH: 56 +Loss at step 0: 0.053031373769044876 +Loss at step 50: 0.04470488056540489 +Loss at step 100: 0.04107522964477539 +Loss at step 150: 0.049091774970293045 +Loss at step 200: 0.04882743954658508 +Loss at step 250: 0.05533779785037041 +Loss at step 300: 0.04599086567759514 +Loss at step 350: 0.04507995396852493 +Loss at step 400: 0.06217412278056145 +Loss at step 450: 0.08096732944250107 +Loss at step 500: 0.05908403918147087 +Loss at step 550: 0.04016939550638199 +Loss at step 600: 0.050974152982234955 +Loss at step 650: 0.04722588136792183 +Loss at step 700: 0.054369643330574036 +Loss at step 750: 0.0384032167494297 +Loss at step 800: 0.04521019384264946 +Loss at step 850: 0.04233548045158386 +Loss at step 900: 0.08362801373004913 +Mean training loss after epoch 56: 0.05211158758446352 + +EPOCH: 57 +Loss at step 0: 0.06317093968391418 +Loss at step 50: 0.04137517511844635 +Loss at step 100: 0.047858383506536484 +Loss at step 150: 0.033742863684892654 +Loss at step 200: 0.047814786434173584 +Loss at step 250: 0.04853028431534767 +Loss at step 300: 0.04676063731312752 +Loss at step 350: 0.043547630310058594 +Loss at step 400: 0.05538675934076309 +Loss at step 450: 0.05031970143318176 +Loss at step 500: 0.05007030814886093 +Loss at step 550: 0.03759891167283058 +Loss at step 600: 0.04197157174348831 +Loss at step 650: 0.05117228627204895 +Loss at step 700: 0.0656769797205925 +Loss at step 750: 0.053210314363241196 +Loss at step 800: 0.04274747893214226 +Loss at step 850: 0.04270635545253754 +Loss at step 900: 0.058782123029232025 +Mean training loss after epoch 57: 0.05131748943393037 + +EPOCH: 58 +Loss at step 0: 0.08087599277496338 +Loss at step 50: 0.04812050610780716 +Loss at step 100: 0.07059059292078018 +Loss at step 150: 0.05367376282811165 +Loss at step 200: 0.04877069592475891 +Loss at step 250: 0.053296081721782684 +Loss at step 300: 0.0462053157389164 +Loss at step 350: 0.06931379437446594 +Loss at step 400: 0.05078483372926712 +Loss at step 450: 0.051207542419433594 +Loss at step 500: 0.04072359949350357 +Loss at step 550: 0.039386190474033356 +Loss at step 600: 0.04435833916068077 +Loss at step 650: 0.04245788976550102 +Loss at step 700: 0.04916996881365776 +Loss at step 750: 0.05382833629846573 +Loss at step 800: 0.050218310207128525 +Loss at step 850: 0.047946810722351074 +Loss at step 900: 0.06746986508369446 +Mean training loss after epoch 58: 0.05104054565003305 + +EPOCH: 59 +Loss at step 0: 0.05999535694718361 +Loss at step 50: 0.04584382474422455 +Loss at step 100: 0.04974939674139023 +Loss at step 150: 0.05097542703151703 +Loss at step 200: 0.04138175770640373 +Loss at step 250: 0.05049022287130356 +Loss at step 300: 0.03946281597018242 +Loss at step 350: 0.08311013877391815 +Loss at step 400: 0.06084642559289932 +Loss at step 450: 0.04867410659790039 +Loss at step 500: 0.04012276604771614 +Loss at step 550: 0.05408205837011337 +Loss at step 600: 0.0586974062025547 +Loss at step 650: 0.04502899572253227 +Loss at step 700: 0.05178259685635567 +Loss at step 750: 0.064545638859272 +Loss at step 800: 0.06685365736484528 +Loss at step 850: 0.04701273515820503 +Loss at step 900: 0.06203797832131386 +Mean training loss after epoch 59: 0.05155511892466212 + +EPOCH: 60 +Loss at step 0: 0.058535631746053696 +Loss at step 50: 0.05170891061425209 +Loss at step 100: 0.048429232090711594 +Loss at step 150: 0.05549125745892525 +Loss at step 200: 0.05299908667802811 +Loss at step 250: 0.05365258827805519 +Loss at step 300: 0.04311291500926018 +Loss at step 350: 0.07034912705421448 +Loss at step 400: 0.047563884407281876 +Loss at step 450: 0.04342697188258171 +Loss at step 500: 0.061160627752542496 +Loss at step 550: 0.0486142560839653 +Loss at step 600: 0.04625854641199112 +Loss at step 650: 0.042920589447021484 +Loss at step 700: 0.060848187655210495 +Loss at step 750: 0.07132137566804886 +Loss at step 800: 0.044205449521541595 +Loss at step 850: 0.05284859985113144 +Loss at step 900: 0.04726368933916092 +Mean training loss after epoch 60: 0.051569287790291345 + +EPOCH: 61 +Loss at step 0: 0.04738976061344147 +Loss at step 50: 0.052817586809396744 +Loss at step 100: 0.05003659054636955 +Loss at step 150: 0.06801318377256393 +Loss at step 200: 0.04250960424542427 +Loss at step 250: 0.049939434975385666 +Loss at step 300: 0.06283506006002426 +Loss at step 350: 0.04970017075538635 +Loss at step 400: 0.03650680556893349 +Loss at step 450: 0.04220809042453766 +Loss at step 500: 0.05645519122481346 +Loss at step 550: 0.060918036848306656 +Loss at step 600: 0.04384808987379074 +Loss at step 650: 0.037678662687540054 +Loss at step 700: 0.0465891994535923 +Loss at step 750: 0.04755179584026337 +Loss at step 800: 0.044103607535362244 +Loss at step 850: 0.042533889412879944 +Loss at step 900: 0.04972691461443901 +Mean training loss after epoch 61: 0.05127930554992227 + +EPOCH: 62 +Loss at step 0: 0.05528667941689491 +Loss at step 50: 0.04755740985274315 +Loss at step 100: 0.0373450443148613 +Loss at step 150: 0.05713503062725067 +Loss at step 200: 0.05165950581431389 +Loss at step 250: 0.07551972568035126 +Loss at step 300: 0.04046683758497238 +Loss at step 350: 0.055742040276527405 +Loss at step 400: 0.04514850303530693 +Loss at step 450: 0.048485610634088516 +Loss at step 500: 0.044758979231119156 +Loss at step 550: 0.049884356558322906 +Loss at step 600: 0.04770389944314957 +Loss at step 650: 0.05163015425205231 +Loss at step 700: 0.056672826409339905 +Loss at step 750: 0.06794029474258423 +Loss at step 800: 0.042503807693719864 +Loss at step 850: 0.06137290969491005 +Loss at step 900: 0.06044364720582962 +Mean training loss after epoch 62: 0.051982933329716166 + +EPOCH: 63 +Loss at step 0: 0.04336988553404808 +Loss at step 50: 0.043766994029283524 +Loss at step 100: 0.04508104920387268 +Loss at step 150: 0.04441763460636139 +Loss at step 200: 0.04586563631892204 +Loss at step 250: 0.06062905490398407 +Loss at step 300: 0.04486079141497612 +Loss at step 350: 0.08710489422082901 +Loss at step 400: 0.04010272026062012 +Loss at step 450: 0.04638424515724182 +Loss at step 500: 0.056498244404792786 +Loss at step 550: 0.042970821261405945 +Loss at step 600: 0.05591898784041405 +Loss at step 650: 0.044500160962343216 +Loss at step 700: 0.04285691678524017 +Loss at step 750: 0.04552895575761795 +Loss at step 800: 0.04361153393983841 +Loss at step 850: 0.055818941444158554 +Loss at step 900: 0.049143653362989426 +Mean training loss after epoch 63: 0.05110511967121983 + +EPOCH: 64 +Loss at step 0: 0.05364586412906647 +Loss at step 50: 0.06424316763877869 +Loss at step 100: 0.0522276908159256 +Loss at step 150: 0.042973924428224564 +Loss at step 200: 0.04133244976401329 +Loss at step 250: 0.041361186653375626 +Loss at step 300: 0.060344304889440536 +Loss at step 350: 0.05783580616116524 +Loss at step 400: 0.039369162172079086 +Loss at step 450: 0.04506192356348038 +Loss at step 500: 0.04335858300328255 +Loss at step 550: 0.03608706220984459 +Loss at step 600: 0.04972478747367859 +Loss at step 650: 0.07046394795179367 +Loss at step 700: 0.05100264772772789 +Loss at step 750: 0.05745388939976692 +Loss at step 800: 0.04488983377814293 +Loss at step 850: 0.0571855753660202 +Loss at step 900: 0.05422484874725342 +Mean training loss after epoch 64: 0.051848013196259674 + +EPOCH: 65 +Loss at step 0: 0.04308832436800003 +Loss at step 50: 0.04793333262205124 +Loss at step 100: 0.05103219300508499 +Loss at step 150: 0.04699505865573883 +Loss at step 200: 0.05162741243839264 +Loss at step 250: 0.07764565944671631 +Loss at step 300: 0.04883769154548645 +Loss at step 350: 0.049897223711013794 +Loss at step 400: 0.0422045923769474 +Loss at step 450: 0.041999612003564835 +Loss at step 500: 0.05178384855389595 +Loss at step 550: 0.04668722674250603 +Loss at step 600: 0.0390990749001503 +Loss at step 650: 0.04738851264119148 +Loss at step 700: 0.04255633056163788 +Loss at step 750: 0.04879869520664215 +Loss at step 800: 0.049041133373975754 +Loss at step 850: 0.04227061942219734 +Loss at step 900: 0.05342070013284683 +Mean training loss after epoch 65: 0.05144866913763572 + +EPOCH: 66 +Loss at step 0: 0.06849034875631332 +Loss at step 50: 0.06466875970363617 +Loss at step 100: 0.05799201875925064 +Loss at step 150: 0.04567936807870865 +Loss at step 200: 0.04877908155322075 +Loss at step 250: 0.04811312258243561 +Loss at step 300: 0.08053313940763474 +Loss at step 350: 0.05537424236536026 +Loss at step 400: 0.044538527727127075 +Loss at step 450: 0.03976328670978546 +Loss at step 500: 0.055103544145822525 +Loss at step 550: 0.04782833158969879 +Loss at step 600: 0.04748750105500221 +Loss at step 650: 0.05221512168645859 +Loss at step 700: 0.058313485234975815 +Loss at step 750: 0.03828319534659386 +Loss at step 800: 0.05259169265627861 +Loss at step 850: 0.047793835401535034 +Loss at step 900: 0.0616377554833889 +Mean training loss after epoch 66: 0.050753900316605434 + +EPOCH: 67 +Loss at step 0: 0.0573640838265419 +Loss at step 50: 0.04512825608253479 +Loss at step 100: 0.04667126387357712 +Loss at step 150: 0.04672510549426079 +Loss at step 200: 0.04571056365966797 +Loss at step 250: 0.05993441864848137 +Loss at step 300: 0.04445397108793259 +Loss at step 350: 0.045120734721422195 +Loss at step 400: 0.04431392624974251 +Loss at step 450: 0.04557759314775467 +Loss at step 500: 0.05933799967169762 +Loss at step 550: 0.04717828333377838 +Loss at step 600: 0.04832756146788597 +Loss at step 650: 0.04953905940055847 +Loss at step 700: 0.041904594749212265 +Loss at step 750: 0.04194236174225807 +Loss at step 800: 0.05983283370733261 +Loss at step 850: 0.04606281965970993 +Loss at step 900: 0.053905948996543884 +Mean training loss after epoch 67: 0.05064001257247381 + +EPOCH: 68 +Loss at step 0: 0.05109892040491104 +Loss at step 50: 0.04497474059462547 +Loss at step 100: 0.04142839089035988 +Loss at step 150: 0.04223794862627983 +Loss at step 200: 0.04317435249686241 +Loss at step 250: 0.062465324997901917 +Loss at step 300: 0.04388558864593506 +Loss at step 350: 0.05621608346700668 +Loss at step 400: 0.04806473106145859 +Loss at step 450: 0.058821868151426315 +Loss at step 500: 0.04536747187376022 +Loss at step 550: 0.06085909157991409 +Loss at step 600: 0.04545456916093826 +Loss at step 650: 0.045010678470134735 +Loss at step 700: 0.04773971810936928 +Loss at step 750: 0.07405710965394974 +Loss at step 800: 0.04988197237253189 +Loss at step 850: 0.041021715849637985 +Loss at step 900: 0.04839077591896057 +Mean training loss after epoch 68: 0.051044248127892836 + +EPOCH: 69 +Loss at step 0: 0.04385637491941452 +Loss at step 50: 0.038993023335933685 +Loss at step 100: 0.05328406020998955 +Loss at step 150: 0.05639820918440819 +Loss at step 200: 0.05191503092646599 +Loss at step 250: 0.056312691420316696 +Loss at step 300: 0.055389370769262314 +Loss at step 350: 0.06495311111211777 +Loss at step 400: 0.04791368544101715 +Loss at step 450: 0.04152796044945717 +Loss at step 500: 0.03938838839530945 +Loss at step 550: 0.03803623467683792 +Loss at step 600: 0.04181329533457756 +Loss at step 650: 0.07720479369163513 +Loss at step 700: 0.04015904292464256 +Loss at step 750: 0.042780909687280655 +Loss at step 800: 0.046566564589738846 +Loss at step 850: 0.0494224913418293 +Loss at step 900: 0.04329321160912514 +Mean training loss after epoch 69: 0.05078200433951324 + +EPOCH: 70 +Loss at step 0: 0.056876666843891144 +Loss at step 50: 0.051962271332740784 +Loss at step 100: 0.04754125699400902 +Loss at step 150: 0.047346558421850204 +Loss at step 200: 0.04560653492808342 +Loss at step 250: 0.041812729090452194 +Loss at step 300: 0.041336268186569214 +Loss at step 350: 0.05108466371893883 +Loss at step 400: 0.05767818167805672 +Loss at step 450: 0.05100925639271736 +Loss at step 500: 0.04376669228076935 +Loss at step 550: 0.04816117510199547 +Loss at step 600: 0.03772101178765297 +Loss at step 650: 0.04485584795475006 +Loss at step 700: 0.044409025460481644 +Loss at step 750: 0.04541383683681488 +Loss at step 800: 0.04853028431534767 +Loss at step 850: 0.06401737034320831 +Loss at step 900: 0.08462882041931152 +Mean training loss after epoch 70: 0.05083621570121632 + +EPOCH: 71 +Loss at step 0: 0.04284537583589554 +Loss at step 50: 0.04159299284219742 +Loss at step 100: 0.06057169288396835 +Loss at step 150: 0.04077513888478279 +Loss at step 200: 0.06341886520385742 +Loss at step 250: 0.04820159450173378 +Loss at step 300: 0.04560007154941559 +Loss at step 350: 0.050114408135414124 +Loss at step 400: 0.0713987722992897 +Loss at step 450: 0.04859645292162895 +Loss at step 500: 0.042600397020578384 +Loss at step 550: 0.06530575454235077 +Loss at step 600: 0.050033506006002426 +Loss at step 650: 0.061648137867450714 +Loss at step 700: 0.06506557017564774 +Loss at step 750: 0.041063062846660614 +Loss at step 800: 0.049412503838539124 +Loss at step 850: 0.05959435924887657 +Loss at step 900: 0.048059143126010895 +Mean training loss after epoch 71: 0.05068792418431816 + +EPOCH: 72 +Loss at step 0: 0.049439556896686554 +Loss at step 50: 0.044245924800634384 +Loss at step 100: 0.04976751655340195 +Loss at step 150: 0.04735398665070534 +Loss at step 200: 0.04139484092593193 +Loss at step 250: 0.045761097222566605 +Loss at step 300: 0.05132567510008812 +Loss at step 350: 0.04483842849731445 +Loss at step 400: 0.046243395656347275 +Loss at step 450: 0.06019704043865204 +Loss at step 500: 0.03278471529483795 +Loss at step 550: 0.04224205017089844 +Loss at step 600: 0.05009873956441879 +Loss at step 650: 0.07804665714502335 +Loss at step 700: 0.04602454602718353 +Loss at step 750: 0.059691738337278366 +Loss at step 800: 0.06150290369987488 +Loss at step 850: 0.062211230397224426 +Loss at step 900: 0.03884781524538994 +Mean training loss after epoch 72: 0.051169464146214 + +EPOCH: 73 +Loss at step 0: 0.0594070665538311 +Loss at step 50: 0.05606803670525551 +Loss at step 100: 0.06303609907627106 +Loss at step 150: 0.04441472515463829 +Loss at step 200: 0.06135348603129387 +Loss at step 250: 0.06138978153467178 +Loss at step 300: 0.05169282853603363 +Loss at step 350: 0.04429536312818527 +Loss at step 400: 0.0859065130352974 +Loss at step 450: 0.04651627317070961 +Loss at step 500: 0.03940609470009804 +Loss at step 550: 0.050857897847890854 +Loss at step 600: 0.04482409730553627 +Loss at step 650: 0.04593918099999428 +Loss at step 700: 0.04536588862538338 +Loss at step 750: 0.05844881385564804 +Loss at step 800: 0.04812794551253319 +Loss at step 850: 0.04788389801979065 +Loss at step 900: 0.05554303899407387 +Mean training loss after epoch 73: 0.05071380187961847 + +EPOCH: 74 +Loss at step 0: 0.05896979197859764 +Loss at step 50: 0.041102759540081024 +Loss at step 100: 0.0544707328081131 +Loss at step 150: 0.046593014150857925 +Loss at step 200: 0.043392036110162735 +Loss at step 250: 0.0437026172876358 +Loss at step 300: 0.0558653362095356 +Loss at step 350: 0.06265842914581299 +Loss at step 400: 0.060463447123765945 +Loss at step 450: 0.047898709774017334 +Loss at step 500: 0.0714346393942833 +Loss at step 550: 0.04333339259028435 +Loss at step 600: 0.046093448996543884 +Loss at step 650: 0.06117042526602745 +Loss at step 700: 0.04578028619289398 +Loss at step 750: 0.04158996045589447 +Loss at step 800: 0.051580436527729034 +Loss at step 850: 0.0455697737634182 +Loss at step 900: 0.04313727840781212 +Mean training loss after epoch 74: 0.05040215402206124 + +EPOCH: 75 +Loss at step 0: 0.036150012165308 +Loss at step 50: 0.06707154959440231 +Loss at step 100: 0.08219407498836517 +Loss at step 150: 0.04792626574635506 +Loss at step 200: 0.05358075350522995 +Loss at step 250: 0.05565543845295906 +Loss at step 300: 0.04264166206121445 +Loss at step 350: 0.04805701971054077 +Loss at step 400: 0.04911353439092636 +Loss at step 450: 0.05508382245898247 +Loss at step 500: 0.03717143461108208 +Loss at step 550: 0.04317256435751915 +Loss at step 600: 0.04827655479311943 +Loss at step 650: 0.0534575991332531 +Loss at step 700: 0.05309074744582176 +Loss at step 750: 0.0749034434556961 +Loss at step 800: 0.04915199428796768 +Loss at step 850: 0.06150740757584572 +Loss at step 900: 0.04647724702954292 +Mean training loss after epoch 75: 0.05028174744088894 + +EPOCH: 76 +Loss at step 0: 0.04095843434333801 +Loss at step 50: 0.03990088775753975 +Loss at step 100: 0.05097648873925209 +Loss at step 150: 0.059667352586984634 +Loss at step 200: 0.04494746774435043 +Loss at step 250: 0.05807385966181755 +Loss at step 300: 0.039101194590330124 +Loss at step 350: 0.03942447155714035 +Loss at step 400: 0.03974013403058052 +Loss at step 450: 0.04322149232029915 +Loss at step 500: 0.041964974254369736 +Loss at step 550: 0.05160591006278992 +Loss at step 600: 0.04972744360566139 +Loss at step 650: 0.06586267799139023 +Loss at step 700: 0.04740482196211815 +Loss at step 750: 0.041358787566423416 +Loss at step 800: 0.04616885259747505 +Loss at step 850: 0.046862635761499405 +Loss at step 900: 0.0371193066239357 +Mean training loss after epoch 76: 0.05065003309899302 + +EPOCH: 77 +Loss at step 0: 0.03876151144504547 +Loss at step 50: 0.041604675352573395 +Loss at step 100: 0.040909599512815475 +Loss at step 150: 0.051800619810819626 +Loss at step 200: 0.03688057139515877 +Loss at step 250: 0.05366109311580658 +Loss at step 300: 0.051146719604730606 +Loss at step 350: 0.046151358634233475 +Loss at step 400: 0.04119784012436867 +Loss at step 450: 0.04879605397582054 +Loss at step 500: 0.03607388213276863 +Loss at step 550: 0.04680034890770912 +Loss at step 600: 0.03730824589729309 +Loss at step 650: 0.04544806852936745 +Loss at step 700: 0.0559263601899147 +Loss at step 750: 0.047746121883392334 +Loss at step 800: 0.04314638674259186 +Loss at step 850: 0.05282014608383179 +Loss at step 900: 0.04249249026179314 +Mean training loss after epoch 77: 0.05017089983547674 + +EPOCH: 78 +Loss at step 0: 0.050493162125349045 +Loss at step 50: 0.043876782059669495 +Loss at step 100: 0.04767073318362236 +Loss at step 150: 0.041124023497104645 +Loss at step 200: 0.04166269302368164 +Loss at step 250: 0.06193937361240387 +Loss at step 300: 0.0626758486032486 +Loss at step 350: 0.042888909578323364 +Loss at step 400: 0.060628559440374374 +Loss at step 450: 0.03915600851178169 +Loss at step 500: 0.0409984290599823 +Loss at step 550: 0.06091492250561714 +Loss at step 600: 0.06195938214659691 +Loss at step 650: 0.0474432148039341 +Loss at step 700: 0.04040948301553726 +Loss at step 750: 0.053341757506132126 +Loss at step 800: 0.04398433491587639 +Loss at step 850: 0.04341616481542587 +Loss at step 900: 0.06400007754564285 +Mean training loss after epoch 78: 0.0501008252424599 + +EPOCH: 79 +Loss at step 0: 0.04047247767448425 +Loss at step 50: 0.038122545927762985 +Loss at step 100: 0.05078230798244476 +Loss at step 150: 0.045978739857673645 +Loss at step 200: 0.04876568913459778 +Loss at step 250: 0.03813665732741356 +Loss at step 300: 0.06317652016878128 +Loss at step 350: 0.047005295753479004 +Loss at step 400: 0.05567540228366852 +Loss at step 450: 0.06306200474500656 +Loss at step 500: 0.0423128642141819 +Loss at step 550: 0.048326484858989716 +Loss at step 600: 0.055216796696186066 +Loss at step 650: 0.03841697797179222 +Loss at step 700: 0.05925244092941284 +Loss at step 750: 0.05109957233071327 +Loss at step 800: 0.040870875120162964 +Loss at step 850: 0.043398622423410416 +Loss at step 900: 0.04509499669075012 +Mean training loss after epoch 79: 0.05064751731672648 + +EPOCH: 80 +Loss at step 0: 0.04361267387866974 +Loss at step 50: 0.06350944191217422 +Loss at step 100: 0.04381895810365677 +Loss at step 150: 0.064673513174057 +Loss at step 200: 0.07732006907463074 +Loss at step 250: 0.06438292562961578 +Loss at step 300: 0.04323553293943405 +Loss at step 350: 0.04201902449131012 +Loss at step 400: 0.0412403829395771 +Loss at step 450: 0.07155206054449081 +Loss at step 500: 0.041891247034072876 +Loss at step 550: 0.04960064962506294 +Loss at step 600: 0.043957602232694626 +Loss at step 650: 0.049328770488500595 +Loss at step 700: 0.048465318977832794 +Loss at step 750: 0.06472156196832657 +Loss at step 800: 0.045555371791124344 +Loss at step 850: 0.05001545697450638 +Loss at step 900: 0.046885859221220016 +Mean training loss after epoch 80: 0.05028015977419071 + +EPOCH: 81 +Loss at step 0: 0.0491047203540802 +Loss at step 50: 0.06322059780359268 +Loss at step 100: 0.05141130089759827 +Loss at step 150: 0.05059296265244484 +Loss at step 200: 0.07662580162286758 +Loss at step 250: 0.0399882011115551 +Loss at step 300: 0.05067141726613045 +Loss at step 350: 0.05648646876215935 +Loss at step 400: 0.04622841626405716 +Loss at step 450: 0.06971102952957153 +Loss at step 500: 0.0463925376534462 +Loss at step 550: 0.04714679718017578 +Loss at step 600: 0.04719393327832222 +Loss at step 650: 0.04403239116072655 +Loss at step 700: 0.0401025153696537 +Loss at step 750: 0.043403323739767075 +Loss at step 800: 0.04973575100302696 +Loss at step 850: 0.04222571477293968 +Loss at step 900: 0.05466935411095619 +Mean training loss after epoch 81: 0.05012804751536612 + +EPOCH: 82 +Loss at step 0: 0.06599084287881851 +Loss at step 50: 0.04906318709254265 +Loss at step 100: 0.0606396347284317 +Loss at step 150: 0.04976354166865349 +Loss at step 200: 0.04637707769870758 +Loss at step 250: 0.0642530769109726 +Loss at step 300: 0.043574195355176926 +Loss at step 350: 0.04294143244624138 +Loss at step 400: 0.05652209743857384 +Loss at step 450: 0.03822587430477142 +Loss at step 500: 0.05175904557108879 +Loss at step 550: 0.035549454391002655 +Loss at step 600: 0.0410168319940567 +Loss at step 650: 0.05308538302779198 +Loss at step 700: 0.04550240933895111 +Loss at step 750: 0.04537738487124443 +Loss at step 800: 0.04948045685887337 +Loss at step 850: 0.042508162558078766 +Loss at step 900: 0.04231604188680649 +Mean training loss after epoch 82: 0.05044001965984098 + +EPOCH: 83 +Loss at step 0: 0.04684387892484665 +Loss at step 50: 0.048755135387182236 +Loss at step 100: 0.041401688009500504 +Loss at step 150: 0.0571637861430645 +Loss at step 200: 0.04524967446923256 +Loss at step 250: 0.04957064241170883 +Loss at step 300: 0.04169003665447235 +Loss at step 350: 0.053255610167980194 +Loss at step 400: 0.05588313564658165 +Loss at step 450: 0.0644545704126358 +Loss at step 500: 0.04179318994283676 +Loss at step 550: 0.061642616987228394 +Loss at step 600: 0.053229063749313354 +Loss at step 650: 0.046040017157793045 +Loss at step 700: 0.04496080055832863 +Loss at step 750: 0.03747273609042168 +Loss at step 800: 0.06245652958750725 +Loss at step 850: 0.05753291770815849 +Loss at step 900: 0.0637684166431427 +Mean training loss after epoch 83: 0.050293109147374566 + +EPOCH: 84 +Loss at step 0: 0.04468037560582161 +Loss at step 50: 0.0407944954931736 +Loss at step 100: 0.04365966096520424 +Loss at step 150: 0.047214604914188385 +Loss at step 200: 0.05007154867053032 +Loss at step 250: 0.04624723643064499 +Loss at step 300: 0.04511556401848793 +Loss at step 350: 0.05350862070918083 +Loss at step 400: 0.050428394228219986 +Loss at step 450: 0.044789157807826996 +Loss at step 500: 0.06237851828336716 +Loss at step 550: 0.05221202224493027 +Loss at step 600: 0.051561061292886734 +Loss at step 650: 0.056768082082271576 +Loss at step 700: 0.053329553455114365 +Loss at step 750: 0.0466947928071022 +Loss at step 800: 0.05319839343428612 +Loss at step 850: 0.05585790053009987 +Loss at step 900: 0.03981849551200867 +Mean training loss after epoch 84: 0.049864841609208316 + +EPOCH: 85 +Loss at step 0: 0.05181165412068367 +Loss at step 50: 0.03820275515317917 +Loss at step 100: 0.04444373771548271 +Loss at step 150: 0.04108769819140434 +Loss at step 200: 0.04246417433023453 +Loss at step 250: 0.039016030728816986 +Loss at step 300: 0.04603908583521843 +Loss at step 350: 0.040993593633174896 +Loss at step 400: 0.05349775031208992 +Loss at step 450: 0.05070693418383598 +Loss at step 500: 0.047059349715709686 +Loss at step 550: 0.05132042616605759 +Loss at step 600: 0.05048171803355217 +Loss at step 650: 0.04761815443634987 +Loss at step 700: 0.046665433794260025 +Loss at step 750: 0.052451737225055695 +Loss at step 800: 0.045714475214481354 +Loss at step 850: 0.05897098407149315 +Loss at step 900: 0.04949139058589935 +Mean training loss after epoch 85: 0.05029823541490317 + +EPOCH: 86 +Loss at step 0: 0.04369106516242027 +Loss at step 50: 0.042588986456394196 +Loss at step 100: 0.04777897521853447 +Loss at step 150: 0.044493578374385834 +Loss at step 200: 0.050188470631837845 +Loss at step 250: 0.04831194132566452 +Loss at step 300: 0.04908660054206848 +Loss at step 350: 0.04946219176054001 +Loss at step 400: 0.05184337869286537 +Loss at step 450: 0.04250304773449898 +Loss at step 500: 0.051955729722976685 +Loss at step 550: 0.061035457998514175 +Loss at step 600: 0.047487273812294006 +Loss at step 650: 0.04472692683339119 +Loss at step 700: 0.04448065906763077 +Loss at step 750: 0.04124612733721733 +Loss at step 800: 0.043513063341379166 +Loss at step 850: 0.03915812447667122 +Loss at step 900: 0.0382428802549839 +Mean training loss after epoch 86: 0.05009119467599305 + +EPOCH: 87 +Loss at step 0: 0.05095880851149559 +Loss at step 50: 0.045517683029174805 +Loss at step 100: 0.04677272588014603 +Loss at step 150: 0.06373700499534607 +Loss at step 200: 0.04365698620676994 +Loss at step 250: 0.04854476451873779 +Loss at step 300: 0.04194008558988571 +Loss at step 350: 0.03907415643334389 +Loss at step 400: 0.04553912580013275 +Loss at step 450: 0.04613690450787544 +Loss at step 500: 0.07278534024953842 +Loss at step 550: 0.048487041145563126 +Loss at step 600: 0.047068387269973755 +Loss at step 650: 0.04968239367008209 +Loss at step 700: 0.058298733085393906 +Loss at step 750: 0.0643329992890358 +Loss at step 800: 0.0516803152859211 +Loss at step 850: 0.04430660977959633 +Loss at step 900: 0.04280978813767433 +Mean training loss after epoch 87: 0.05002625877542028 + +EPOCH: 88 +Loss at step 0: 0.04393602907657623 +Loss at step 50: 0.04834788665175438 +Loss at step 100: 0.0434185191988945 +Loss at step 150: 0.05852837488055229 +Loss at step 200: 0.0483776293694973 +Loss at step 250: 0.05162229388952255 +Loss at step 300: 0.04313952475786209 +Loss at step 350: 0.07865279912948608 +Loss at step 400: 0.04052116349339485 +Loss at step 450: 0.049096766859292984 +Loss at step 500: 0.04366188496351242 +Loss at step 550: 0.039974093437194824 +Loss at step 600: 0.044570375233888626 +Loss at step 650: 0.041044965386390686 +Loss at step 700: 0.051192838698625565 +Loss at step 750: 0.039279479533433914 +Loss at step 800: 0.04494643583893776 +Loss at step 850: 0.044698745012283325 +Loss at step 900: 0.03779034689068794 +Mean training loss after epoch 88: 0.0498642759965554 + +EPOCH: 89 +Loss at step 0: 0.0585855096578598 +Loss at step 50: 0.05932657793164253 +Loss at step 100: 0.044373150914907455 +Loss at step 150: 0.08216756582260132 +Loss at step 200: 0.05879248306155205 +Loss at step 250: 0.05768197774887085 +Loss at step 300: 0.0490029975771904 +Loss at step 350: 0.0601043738424778 +Loss at step 400: 0.04236773028969765 +Loss at step 450: 0.03872307017445564 +Loss at step 500: 0.041831620037555695 +Loss at step 550: 0.041933972388505936 +Loss at step 600: 0.052849650382995605 +Loss at step 650: 0.0436653308570385 +Loss at step 700: 0.06039625033736229 +Loss at step 750: 0.05589767172932625 +Loss at step 800: 0.044709354639053345 +Loss at step 850: 0.04682032763957977 +Loss at step 900: 0.04470133036375046 +Mean training loss after epoch 89: 0.050291009811259536 + +EPOCH: 90 +Loss at step 0: 0.039277415722608566 +Loss at step 50: 0.03933994472026825 +Loss at step 100: 0.042282912880182266 +Loss at step 150: 0.06838640570640564 +Loss at step 200: 0.044560160487890244 +Loss at step 250: 0.04813050106167793 +Loss at step 300: 0.05455067753791809 +Loss at step 350: 0.06280061602592468 +Loss at step 400: 0.05438091605901718 +Loss at step 450: 0.038389481604099274 +Loss at step 500: 0.06412532925605774 +Loss at step 550: 0.039286572486162186 +Loss at step 600: 0.04716777801513672 +Loss at step 650: 0.06910375505685806 +Loss at step 700: 0.05583615228533745 +Loss at step 750: 0.07748617976903915 +Loss at step 800: 0.04131267964839935 +Loss at step 850: 0.07676172256469727 +Loss at step 900: 0.04817894473671913 +Mean training loss after epoch 90: 0.04983033083363383 + +EPOCH: 91 +Loss at step 0: 0.049061257392168045 +Loss at step 50: 0.037872083485126495 +Loss at step 100: 0.04391055926680565 +Loss at step 150: 0.053276658058166504 +Loss at step 200: 0.049994125962257385 +Loss at step 250: 0.04931704327464104 +Loss at step 300: 0.05432236194610596 +Loss at step 350: 0.04680928960442543 +Loss at step 400: 0.05403105914592743 +Loss at step 450: 0.049496982246637344 +Loss at step 500: 0.0427384190261364 +Loss at step 550: 0.05488106608390808 +Loss at step 600: 0.058135177940130234 +Loss at step 650: 0.04383372142910957 +Loss at step 700: 0.04125663638114929 +Loss at step 750: 0.06725571304559708 +Loss at step 800: 0.07571817934513092 +Loss at step 850: 0.07384885847568512 +Loss at step 900: 0.037387069314718246 +Mean training loss after epoch 91: 0.05019385402184178 + +EPOCH: 92 +Loss at step 0: 0.04014800861477852 +Loss at step 50: 0.05836324021220207 +Loss at step 100: 0.045416850596666336 +Loss at step 150: 0.04712790623307228 +Loss at step 200: 0.043005961924791336 +Loss at step 250: 0.060662891715765 +Loss at step 300: 0.06501764059066772 +Loss at step 350: 0.04174492135643959 +Loss at step 400: 0.04959685727953911 +Loss at step 450: 0.04646313190460205 +Loss at step 500: 0.038406651467084885 +Loss at step 550: 0.05364498868584633 +Loss at step 600: 0.04004823789000511 +Loss at step 650: 0.04323982074856758 +Loss at step 700: 0.05084436014294624 +Loss at step 750: 0.06959839910268784 +Loss at step 800: 0.04866340756416321 +Loss at step 850: 0.04788866639137268 +Loss at step 900: 0.051007479429244995 +Mean training loss after epoch 92: 0.049720132929969955 + +EPOCH: 93 +Loss at step 0: 0.05557652935385704 +Loss at step 50: 0.04277807101607323 +Loss at step 100: 0.05255037173628807 +Loss at step 150: 0.04120642691850662 +Loss at step 200: 0.04100532829761505 +Loss at step 250: 0.03594323620200157 +Loss at step 300: 0.08923674374818802 +Loss at step 350: 0.0498499870300293 +Loss at step 400: 0.048554111272096634 +Loss at step 450: 0.04192011430859566 +Loss at step 500: 0.07473090291023254 +Loss at step 550: 0.03929669409990311 +Loss at step 600: 0.043463919311761856 +Loss at step 650: 0.03491586819291115 +Loss at step 700: 0.04260878264904022 +Loss at step 750: 0.049960214644670486 +Loss at step 800: 0.06402973085641861 +Loss at step 850: 0.045460302382707596 +Loss at step 900: 0.04794945567846298 +Mean training loss after epoch 93: 0.049451748782923736 + +EPOCH: 94 +Loss at step 0: 0.04499164968729019 +Loss at step 50: 0.04590319097042084 +Loss at step 100: 0.049210406839847565 +Loss at step 150: 0.04769102483987808 +Loss at step 200: 0.040975701063871384 +Loss at step 250: 0.046544015407562256 +Loss at step 300: 0.04071179777383804 +Loss at step 350: 0.06244956701993942 +Loss at step 400: 0.06274320185184479 +Loss at step 450: 0.055728282779455185 +Loss at step 500: 0.042496416717767715 +Loss at step 550: 0.046795833855867386 +Loss at step 600: 0.06083468720316887 +Loss at step 650: 0.04817851632833481 +Loss at step 700: 0.05039082467556 +Loss at step 750: 0.044243067502975464 +Loss at step 800: 0.05883360281586647 +Loss at step 850: 0.0478750616312027 +Loss at step 900: 0.08606721460819244 +Mean training loss after epoch 94: 0.04974391502040282 + +EPOCH: 95 +Loss at step 0: 0.057691607624292374 +Loss at step 50: 0.042584680020809174 +Loss at step 100: 0.04423626512289047 +Loss at step 150: 0.06067153811454773 +Loss at step 200: 0.04689512401819229 +Loss at step 250: 0.04467698931694031 +Loss at step 300: 0.04655725136399269 +Loss at step 350: 0.04027937725186348 +Loss at step 400: 0.044612202793359756 +Loss at step 450: 0.04771237447857857 +Loss at step 500: 0.06385154277086258 +Loss at step 550: 0.062465835362672806 +Loss at step 600: 0.04614602029323578 +Loss at step 650: 0.037040915340185165 +Loss at step 700: 0.06572984158992767 +Loss at step 750: 0.04687514528632164 +Loss at step 800: 0.042898356914520264 +Loss at step 850: 0.046665970236063004 +Loss at step 900: 0.05340198054909706 +Mean training loss after epoch 95: 0.04937299289333541 + +EPOCH: 96 +Loss at step 0: 0.05962271988391876 +Loss at step 50: 0.07211997359991074 +Loss at step 100: 0.04459046572446823 +Loss at step 150: 0.03993728756904602 +Loss at step 200: 0.04579629749059677 +Loss at step 250: 0.06677412986755371 +Loss at step 300: 0.06456674635410309 +Loss at step 350: 0.04230700805783272 +Loss at step 400: 0.04002726823091507 +Loss at step 450: 0.042940907180309296 +Loss at step 500: 0.03914265334606171 +Loss at step 550: 0.041893184185028076 +Loss at step 600: 0.06043830141425133 +Loss at step 650: 0.04915029928088188 +Loss at step 700: 0.04539262875914574 +Loss at step 750: 0.04084897041320801 +Loss at step 800: 0.047899503260850906 +Loss at step 850: 0.06316399574279785 +Loss at step 900: 0.04435691982507706 +Mean training loss after epoch 96: 0.050040775389750124 + +EPOCH: 97 +Loss at step 0: 0.04789218679070473 +Loss at step 50: 0.04089616239070892 +Loss at step 100: 0.039492350071668625 +Loss at step 150: 0.040240563452243805 +Loss at step 200: 0.05491875484585762 +Loss at step 250: 0.04898751527070999 +Loss at step 300: 0.06473278999328613 +Loss at step 350: 0.04388058930635452 +Loss at step 400: 0.050799865275621414 +Loss at step 450: 0.04026194289326668 +Loss at step 500: 0.04950572922825813 +Loss at step 550: 0.03989250585436821 +Loss at step 600: 0.036694977432489395 +Loss at step 650: 0.08643174171447754 +Loss at step 700: 0.04590022563934326 +Loss at step 750: 0.0429609939455986 +Loss at step 800: 0.06255394220352173 +Loss at step 850: 0.04290178418159485 +Loss at step 900: 0.041528016328811646 +Mean training loss after epoch 97: 0.049179471060157076 + +EPOCH: 98 +Loss at step 0: 0.03783627599477768 +Loss at step 50: 0.05607382580637932 +Loss at step 100: 0.06049241125583649 +Loss at step 150: 0.04907791689038277 +Loss at step 200: 0.06436482071876526 +Loss at step 250: 0.056805189698934555 +Loss at step 300: 0.040144648402929306 +Loss at step 350: 0.04463665559887886 +Loss at step 400: 0.0641000047326088 +Loss at step 450: 0.05795852839946747 +Loss at step 500: 0.04304036125540733 +Loss at step 550: 0.05028947815299034 +Loss at step 600: 0.05049578845500946 +Loss at step 650: 0.06089025363326073 +Loss at step 700: 0.04726036265492439 +Loss at step 750: 0.04342418164014816 +Loss at step 800: 0.057857219129800797 +Loss at step 850: 0.04461564123630524 +Loss at step 900: 0.06939269602298737 +Mean training loss after epoch 98: 0.050154118683498936 + +EPOCH: 99 +Loss at step 0: 0.04825258627533913 +Loss at step 50: 0.04299546778202057 +Loss at step 100: 0.04459988698363304 +Loss at step 150: 0.05117509141564369 +Loss at step 200: 0.07741483300924301 +Loss at step 250: 0.05179974436759949 +Loss at step 300: 0.06307202577590942 +Loss at step 350: 0.061997365206480026 +Loss at step 400: 0.04400479048490524 +Loss at step 450: 0.045625850558280945 +Loss at step 500: 0.04375815764069557 +Loss at step 550: 0.047082919627428055 +Loss at step 600: 0.04621750861406326 +Loss at step 650: 0.09841378033161163 +Loss at step 700: 0.03337441012263298 +Loss at step 750: 0.056146372109651566 +Loss at step 800: 0.04033663123846054 +Loss at step 850: 0.047317780554294586 +Loss at step 900: 0.057236455380916595 +Mean training loss after epoch 99: 0.04982604287755388 + +EPOCH: 100 +Loss at step 0: 0.052107181400060654 +Loss at step 50: 0.039571814239025116 +Loss at step 100: 0.045148201286792755 +Loss at step 150: 0.06296626478433609 +Loss at step 200: 0.05753709748387337 +Loss at step 250: 0.04446747899055481 +Loss at step 300: 0.07383007556200027 +Loss at step 350: 0.05172615125775337 +Loss at step 400: 0.03983968123793602 +Loss at step 450: 0.05956204608082771 +Loss at step 500: 0.07800130546092987 +Loss at step 550: 0.048807211220264435 +Loss at step 600: 0.077269047498703 +Loss at step 650: 0.04149217903614044 +Loss at step 700: 0.05162139609456062 +Loss at step 750: 0.04396390914916992 +Loss at step 800: 0.04087481275200844 +Loss at step 850: 0.045810144394636154 +Loss at step 900: 0.039682719856500626 +Mean training loss after epoch 100: 0.04999654611417734 + +EPOCH: 101 +Loss at step 0: 0.045760709792375565 +Loss at step 50: 0.04717067629098892 +Loss at step 100: 0.06235860288143158 +Loss at step 150: 0.05869724228978157 +Loss at step 200: 0.07086438685655594 +Loss at step 250: 0.0466604046523571 +Loss at step 300: 0.04460503160953522 +Loss at step 350: 0.0443180613219738 +Loss at step 400: 0.048107296228408813 +Loss at step 450: 0.049075305461883545 +Loss at step 500: 0.04535678029060364 +Loss at step 550: 0.04355894774198532 +Loss at step 600: 0.03775535151362419 +Loss at step 650: 0.04465343430638313 +Loss at step 700: 0.0478530116379261 +Loss at step 750: 0.04624643176794052 +Loss at step 800: 0.04836367443203926 +Loss at step 850: 0.05689162015914917 +Loss at step 900: 0.05115973949432373 +Mean training loss after epoch 101: 0.04886056547130603 + +EPOCH: 102 +Loss at step 0: 0.04280661419034004 +Loss at step 50: 0.046223532408475876 +Loss at step 100: 0.06344103068113327 +Loss at step 150: 0.06735927611589432 +Loss at step 200: 0.0450076162815094 +Loss at step 250: 0.043714605271816254 +Loss at step 300: 0.05964679643511772 +Loss at step 350: 0.04852592572569847 +Loss at step 400: 0.06650976091623306 +Loss at step 450: 0.06737393885850906 +Loss at step 500: 0.04146235063672066 +Loss at step 550: 0.05304909497499466 +Loss at step 600: 0.04457699507474899 +Loss at step 650: 0.04462988302111626 +Loss at step 700: 0.042461052536964417 +Loss at step 750: 0.0389283262193203 +Loss at step 800: 0.039654649794101715 +Loss at step 850: 0.04782576858997345 +Loss at step 900: 0.045491255819797516 +Mean training loss after epoch 102: 0.04913707144621974 + +EPOCH: 103 +Loss at step 0: 0.042935654520988464 +Loss at step 50: 0.0867469534277916 +Loss at step 100: 0.05027663707733154 +Loss at step 150: 0.05164818838238716 +Loss at step 200: 0.06284451484680176 +Loss at step 250: 0.038571760058403015 +Loss at step 300: 0.06248420104384422 +Loss at step 350: 0.03964437544345856 +Loss at step 400: 0.04561103880405426 +Loss at step 450: 0.06050344184041023 +Loss at step 500: 0.04475904256105423 +Loss at step 550: 0.04132961481809616 +Loss at step 600: 0.04759273678064346 +Loss at step 650: 0.03824814409017563 +Loss at step 700: 0.06081436574459076 +Loss at step 750: 0.04030332341790199 +Loss at step 800: 0.04460236057639122 +Loss at step 850: 0.04680190607905388 +Loss at step 900: 0.04777403175830841 +Mean training loss after epoch 103: 0.04938587918480449 + +EPOCH: 104 +Loss at step 0: 0.04400665685534477 +Loss at step 50: 0.05959223583340645 +Loss at step 100: 0.03760972246527672 +Loss at step 150: 0.039439551532268524 +Loss at step 200: 0.09007610380649567 +Loss at step 250: 0.042512208223342896 +Loss at step 300: 0.034985098987817764 +Loss at step 350: 0.049520496279001236 +Loss at step 400: 0.04796219617128372 +Loss at step 450: 0.04228133335709572 +Loss at step 500: 0.04360059276223183 +Loss at step 550: 0.04456563666462898 +Loss at step 600: 0.04457034170627594 +Loss at step 650: 0.049908578395843506 +Loss at step 700: 0.040692396461963654 +Loss at step 750: 0.043096184730529785 +Loss at step 800: 0.053272027522325516 +Loss at step 850: 0.042506732046604156 +Loss at step 900: 0.04804360866546631 +Mean training loss after epoch 104: 0.04929994916054867 + +EPOCH: 105 +Loss at step 0: 0.04899117723107338 +Loss at step 50: 0.04950609803199768 +Loss at step 100: 0.052086710929870605 +Loss at step 150: 0.06391295790672302 +Loss at step 200: 0.053744953125715256 +Loss at step 250: 0.08033426851034164 +Loss at step 300: 0.04080202430486679 +Loss at step 350: 0.060095902532339096 +Loss at step 400: 0.0598762221634388 +Loss at step 450: 0.06966009736061096 +Loss at step 500: 0.04339897260069847 +Loss at step 550: 0.04572024941444397 +Loss at step 600: 0.04672747105360031 +Loss at step 650: 0.04225257411599159 +Loss at step 700: 0.047590579837560654 +Loss at step 750: 0.03972988575696945 +Loss at step 800: 0.044095516204833984 +Loss at step 850: 0.04969096556305885 +Loss at step 900: 0.047418151050806046 +Mean training loss after epoch 105: 0.04937732300516575 + +EPOCH: 106 +Loss at step 0: 0.05497300252318382 +Loss at step 50: 0.046334460377693176 +Loss at step 100: 0.065945565700531 +Loss at step 150: 0.058181412518024445 +Loss at step 200: 0.06826873868703842 +Loss at step 250: 0.04634973034262657 +Loss at step 300: 0.04741493612527847 +Loss at step 350: 0.05877118185162544 +Loss at step 400: 0.04178042337298393 +Loss at step 450: 0.03650778904557228 +Loss at step 500: 0.053654737770557404 +Loss at step 550: 0.05204720422625542 +Loss at step 600: 0.057626157999038696 +Loss at step 650: 0.04381169006228447 +Loss at step 700: 0.06072195991873741 +Loss at step 750: 0.046658679842948914 +Loss at step 800: 0.04075635224580765 +Loss at step 850: 0.06091855838894844 +Loss at step 900: 0.05518174543976784 +Mean training loss after epoch 106: 0.04960385300139628 + +EPOCH: 107 +Loss at step 0: 0.07947132736444473 +Loss at step 50: 0.04343802109360695 +Loss at step 100: 0.04565172269940376 +Loss at step 150: 0.05918736755847931 +Loss at step 200: 0.04629148542881012 +Loss at step 250: 0.049539919942617416 +Loss at step 300: 0.04714967682957649 +Loss at step 350: 0.042181696742773056 +Loss at step 400: 0.09630771726369858 +Loss at step 450: 0.045540228486061096 +Loss at step 500: 0.037972185760736465 +Loss at step 550: 0.04795186221599579 +Loss at step 600: 0.06482075899839401 +Loss at step 650: 0.04020782187581062 +Loss at step 700: 0.03658795356750488 +Loss at step 750: 0.047075994312763214 +Loss at step 800: 0.06613016873598099 +Loss at step 850: 0.04847263544797897 +Loss at step 900: 0.053228069096803665 +Mean training loss after epoch 107: 0.04912408655370349 + +EPOCH: 108 +Loss at step 0: 0.04614869877696037 +Loss at step 50: 0.05540541931986809 +Loss at step 100: 0.04560448229312897 +Loss at step 150: 0.04742952063679695 +Loss at step 200: 0.043483782559633255 +Loss at step 250: 0.045661479234695435 +Loss at step 300: 0.04707382246851921 +Loss at step 350: 0.04687026888132095 +Loss at step 400: 0.045411381870508194 +Loss at step 450: 0.04991434887051582 +Loss at step 500: 0.04509153217077255 +Loss at step 550: 0.04734543338418007 +Loss at step 600: 0.06072453781962395 +Loss at step 650: 0.04440120607614517 +Loss at step 700: 0.03767363354563713 +Loss at step 750: 0.043010029941797256 +Loss at step 800: 0.04071033000946045 +Loss at step 850: 0.051883891224861145 +Loss at step 900: 0.04133019223809242 +Mean training loss after epoch 108: 0.04879119812743242 + +EPOCH: 109 +Loss at step 0: 0.046464625746011734 +Loss at step 50: 0.04093238711357117 +Loss at step 100: 0.04891631379723549 +Loss at step 150: 0.04365249350667 +Loss at step 200: 0.05766184255480766 +Loss at step 250: 0.04144643247127533 +Loss at step 300: 0.03720324486494064 +Loss at step 350: 0.03984092175960541 +Loss at step 400: 0.04034002497792244 +Loss at step 450: 0.05746486037969589 +Loss at step 500: 0.05810108035802841 +Loss at step 550: 0.050075363367795944 +Loss at step 600: 0.048313792794942856 +Loss at step 650: 0.05712393298745155 +Loss at step 700: 0.04662285000085831 +Loss at step 750: 0.04030412435531616 +Loss at step 800: 0.0432622954249382 +Loss at step 850: 0.04647242650389671 +Loss at step 900: 0.05425579473376274 +Mean training loss after epoch 109: 0.04953729929620904 + +EPOCH: 110 +Loss at step 0: 0.0411602258682251 +Loss at step 50: 0.05827384814620018 +Loss at step 100: 0.043220408260822296 +Loss at step 150: 0.04430871456861496 +Loss at step 200: 0.045169781893491745 +Loss at step 250: 0.05283591151237488 +Loss at step 300: 0.05933456867933273 +Loss at step 350: 0.046529658138751984 +Loss at step 400: 0.040965303778648376 +Loss at step 450: 0.04413319379091263 +Loss at step 500: 0.04077326878905296 +Loss at step 550: 0.043787796050310135 +Loss at step 600: 0.04563599079847336 +Loss at step 650: 0.044102415442466736 +Loss at step 700: 0.045195579528808594 +Loss at step 750: 0.042094580829143524 +Loss at step 800: 0.035660937428474426 +Loss at step 850: 0.057654477655887604 +Loss at step 900: 0.048552632331848145 +Mean training loss after epoch 110: 0.048957486833527145 + +EPOCH: 111 +Loss at step 0: 0.03841492533683777 +Loss at step 50: 0.059388112276792526 +Loss at step 100: 0.04795737564563751 +Loss at step 150: 0.0561683215200901 +Loss at step 200: 0.04971979185938835 +Loss at step 250: 0.044348783791065216 +Loss at step 300: 0.06231353059411049 +Loss at step 350: 0.0432267040014267 +Loss at step 400: 0.05721968039870262 +Loss at step 450: 0.04102534428238869 +Loss at step 500: 0.05491535738110542 +Loss at step 550: 0.06234057992696762 +Loss at step 600: 0.04239436611533165 +Loss at step 650: 0.04116687923669815 +Loss at step 700: 0.059775322675704956 +Loss at step 750: 0.04518639296293259 +Loss at step 800: 0.05157170444726944 +Loss at step 850: 0.04857839271426201 +Loss at step 900: 0.047513917088508606 +Mean training loss after epoch 111: 0.0491226587309512 + +EPOCH: 112 +Loss at step 0: 0.039920929819345474 +Loss at step 50: 0.06145167723298073 +Loss at step 100: 0.0493244044482708 +Loss at step 150: 0.07455165684223175 +Loss at step 200: 0.03904502093791962 +Loss at step 250: 0.04348757863044739 +Loss at step 300: 0.040329862385988235 +Loss at step 350: 0.05998861789703369 +Loss at step 400: 0.057073384523391724 +Loss at step 450: 0.059601519256830215 +Loss at step 500: 0.05865882709622383 +Loss at step 550: 0.04348515346646309 +Loss at step 600: 0.04377220943570137 +Loss at step 650: 0.04647468402981758 +Loss at step 700: 0.05077949911355972 +Loss at step 750: 0.055902641266584396 +Loss at step 800: 0.04917966574430466 +Loss at step 850: 0.0431235209107399 +Loss at step 900: 0.042045775800943375 +Mean training loss after epoch 112: 0.04909551963766119 + +EPOCH: 113 +Loss at step 0: 0.04534011334180832 +Loss at step 50: 0.05043407157063484 +Loss at step 100: 0.05908971652388573 +Loss at step 150: 0.04556915536522865 +Loss at step 200: 0.04136772081255913 +Loss at step 250: 0.04921168461441994 +Loss at step 300: 0.04228943958878517 +Loss at step 350: 0.056910835206508636 +Loss at step 400: 0.06554708629846573 +Loss at step 450: 0.04124125465750694 +Loss at step 500: 0.06157703697681427 +Loss at step 550: 0.035458244383335114 +Loss at step 600: 0.03967063128948212 +Loss at step 650: 0.04669573903083801 +Loss at step 700: 0.05790575221180916 +Loss at step 750: 0.043564386665821075 +Loss at step 800: 0.04100175201892853 +Loss at step 850: 0.0428055040538311 +Loss at step 900: 0.040880363434553146 +Mean training loss after epoch 113: 0.0488038733820004 + +EPOCH: 114 +Loss at step 0: 0.04454513266682625 +Loss at step 50: 0.03937241807579994 +Loss at step 100: 0.04196732118725777 +Loss at step 150: 0.04437490180134773 +Loss at step 200: 0.04796938970685005 +Loss at step 250: 0.04044102877378464 +Loss at step 300: 0.04208871349692345 +Loss at step 350: 0.05569366365671158 +Loss at step 400: 0.043097980320453644 +Loss at step 450: 0.0441015288233757 +Loss at step 500: 0.058906905353069305 +Loss at step 550: 0.04146979749202728 +Loss at step 600: 0.040441833436489105 +Loss at step 650: 0.056940022855997086 +Loss at step 700: 0.07504553347826004 +Loss at step 750: 0.05497382581233978 +Loss at step 800: 0.0657416507601738 +Loss at step 850: 0.07697372138500214 +Loss at step 900: 0.05003587156534195 +Mean training loss after epoch 114: 0.04886269948677595 + +EPOCH: 115 +Loss at step 0: 0.049239370971918106 +Loss at step 50: 0.04651816561818123 +Loss at step 100: 0.05062873288989067 +Loss at step 150: 0.041380420327186584 +Loss at step 200: 0.032675668597221375 +Loss at step 250: 0.06499211490154266 +Loss at step 300: 0.04590611532330513 +Loss at step 350: 0.05127798765897751 +Loss at step 400: 0.04326001554727554 +Loss at step 450: 0.050357040017843246 +Loss at step 500: 0.042039621621370316 +Loss at step 550: 0.05137178674340248 +Loss at step 600: 0.05794161930680275 +Loss at step 650: 0.04365627095103264 +Loss at step 700: 0.04435086250305176 +Loss at step 750: 0.04284870624542236 +Loss at step 800: 0.04623869061470032 +Loss at step 850: 0.04527898132801056 +Loss at step 900: 0.07595506310462952 +Mean training loss after epoch 115: 0.04851217690640802 + +EPOCH: 116 +Loss at step 0: 0.07263600826263428 +Loss at step 50: 0.047122083604335785 +Loss at step 100: 0.04262327402830124 +Loss at step 150: 0.060882121324539185 +Loss at step 200: 0.042846664786338806 +Loss at step 250: 0.049980469048023224 +Loss at step 300: 0.045155204832553864 +Loss at step 350: 0.05639110133051872 +Loss at step 400: 0.0591181181371212 +Loss at step 450: 0.054432857781648636 +Loss at step 500: 0.04256144165992737 +Loss at step 550: 0.041754625737667084 +Loss at step 600: 0.04379655793309212 +Loss at step 650: 0.047621600329875946 +Loss at step 700: 0.066741943359375 +Loss at step 750: 0.03842903673648834 +Loss at step 800: 0.05322851985692978 +Loss at step 850: 0.04346533119678497 +Loss at step 900: 0.0467534102499485 +Mean training loss after epoch 116: 0.04943336310909628 + +EPOCH: 117 +Loss at step 0: 0.06075050309300423 +Loss at step 50: 0.04224651679396629 +Loss at step 100: 0.062219925224781036 +Loss at step 150: 0.054078053683042526 +Loss at step 200: 0.047531113028526306 +Loss at step 250: 0.04119610786437988 +Loss at step 300: 0.04106178879737854 +Loss at step 350: 0.050513286143541336 +Loss at step 400: 0.0394163616001606 +Loss at step 450: 0.04288054257631302 +Loss at step 500: 0.04593044891953468 +Loss at step 550: 0.05849483609199524 +Loss at step 600: 0.04663632810115814 +Loss at step 650: 0.03900349140167236 +Loss at step 700: 0.04097558930516243 +Loss at step 750: 0.05106016620993614 +Loss at step 800: 0.06703641265630722 +Loss at step 850: 0.043362122029066086 +Loss at step 900: 0.0641951635479927 +Mean training loss after epoch 117: 0.04911646797363438 + +EPOCH: 118 +Loss at step 0: 0.04563150927424431 +Loss at step 50: 0.06086203455924988 +Loss at step 100: 0.038985393941402435 +Loss at step 150: 0.055265191942453384 +Loss at step 200: 0.04292469471693039 +Loss at step 250: 0.08031522482633591 +Loss at step 300: 0.04275266453623772 +Loss at step 350: 0.04127759113907814 +Loss at step 400: 0.06426413357257843 +Loss at step 450: 0.05787650868296623 +Loss at step 500: 0.04377782344818115 +Loss at step 550: 0.054199256002902985 +Loss at step 600: 0.07862336188554764 +Loss at step 650: 0.03703734278678894 +Loss at step 700: 0.040442951023578644 +Loss at step 750: 0.04093562439084053 +Loss at step 800: 0.044174786657094955 +Loss at step 850: 0.04770414158701897 +Loss at step 900: 0.043135613203048706 +Mean training loss after epoch 118: 0.04894365319835225 + +EPOCH: 119 +Loss at step 0: 0.04171827808022499 +Loss at step 50: 0.049404025077819824 +Loss at step 100: 0.04278334602713585 +Loss at step 150: 0.04215197637677193 +Loss at step 200: 0.04485711827874184 +Loss at step 250: 0.05562586709856987 +Loss at step 300: 0.04929919168353081 +Loss at step 350: 0.04804707318544388 +Loss at step 400: 0.04283322021365166 +Loss at step 450: 0.04240407794713974 +Loss at step 500: 0.06615712493658066 +Loss at step 550: 0.04115685448050499 +Loss at step 600: 0.04170466214418411 +Loss at step 650: 0.04672640562057495 +Loss at step 700: 0.04450565204024315 +Loss at step 750: 0.06255766749382019 +Loss at step 800: 0.041007835417985916 +Loss at step 850: 0.08071881532669067 +Loss at step 900: 0.0449286550283432 +Mean training loss after epoch 119: 0.0485914434904038 + +EPOCH: 120 +Loss at step 0: 0.057035062462091446 +Loss at step 50: 0.03592396527528763 +Loss at step 100: 0.041817255318164825 +Loss at step 150: 0.04161430895328522 +Loss at step 200: 0.05746040493249893 +Loss at step 250: 0.060640834271907806 +Loss at step 300: 0.04397527500987053 +Loss at step 350: 0.04138850048184395 +Loss at step 400: 0.05005555972456932 +Loss at step 450: 0.051147542893886566 +Loss at step 500: 0.04160153120756149 +Loss at step 550: 0.044997841119766235 +Loss at step 600: 0.043310947716236115 +Loss at step 650: 0.05811003968119621 +Loss at step 700: 0.045911144465208054 +Loss at step 750: 0.042939648032188416 +Loss at step 800: 0.04511018469929695 +Loss at step 850: 0.04143624007701874 +Loss at step 900: 0.04032384231686592 +Mean training loss after epoch 120: 0.048467967957099366 + +EPOCH: 121 +Loss at step 0: 0.03983534500002861 +Loss at step 50: 0.05939049646258354 +Loss at step 100: 0.04120434448122978 +Loss at step 150: 0.05125848576426506 +Loss at step 200: 0.048965394496917725 +Loss at step 250: 0.05017925053834915 +Loss at step 300: 0.043667227029800415 +Loss at step 350: 0.05484000965952873 +Loss at step 400: 0.035525258630514145 +Loss at step 450: 0.04117269441485405 +Loss at step 500: 0.04747036471962929 +Loss at step 550: 0.042418528348207474 +Loss at step 600: 0.044149115681648254 +Loss at step 650: 0.05649358406662941 +Loss at step 700: 0.04157646372914314 +Loss at step 750: 0.04470214620232582 +Loss at step 800: 0.04793870449066162 +Loss at step 850: 0.04266034811735153 +Loss at step 900: 0.04572739824652672 +Mean training loss after epoch 121: 0.04890743888882813 + +EPOCH: 122 +Loss at step 0: 0.06103586032986641 +Loss at step 50: 0.048349134624004364 +Loss at step 100: 0.03630019724369049 +Loss at step 150: 0.04603029787540436 +Loss at step 200: 0.061829712241888046 +Loss at step 250: 0.07509749382734299 +Loss at step 300: 0.03790666535496712 +Loss at step 350: 0.042576082050800323 +Loss at step 400: 0.05572664737701416 +Loss at step 450: 0.04324556887149811 +Loss at step 500: 0.044810645282268524 +Loss at step 550: 0.04270986467599869 +Loss at step 600: 0.03889309614896774 +Loss at step 650: 0.04540348798036575 +Loss at step 700: 0.06196150928735733 +Loss at step 750: 0.0464889295399189 +Loss at step 800: 0.041600387543439865 +Loss at step 850: 0.04614332690834999 +Loss at step 900: 0.038348231464624405 +Mean training loss after epoch 122: 0.04855712304618567 + +EPOCH: 123 +Loss at step 0: 0.05810731649398804 +Loss at step 50: 0.04313647001981735 +Loss at step 100: 0.05441255867481232 +Loss at step 150: 0.04376358911395073 +Loss at step 200: 0.06490452587604523 +Loss at step 250: 0.04314413666725159 +Loss at step 300: 0.04717148467898369 +Loss at step 350: 0.041424088180065155 +Loss at step 400: 0.0443015918135643 +Loss at step 450: 0.03849348425865173 +Loss at step 500: 0.05773429945111275 +Loss at step 550: 0.044420529156923294 +Loss at step 600: 0.05497591197490692 +Loss at step 650: 0.039886225014925 +Loss at step 700: 0.055062104016542435 +Loss at step 750: 0.04063385725021362 +Loss at step 800: 0.03802849352359772 +Loss at step 850: 0.039669085294008255 +Loss at step 900: 0.04170946776866913 +Mean training loss after epoch 123: 0.048422273705159426 + +EPOCH: 124 +Loss at step 0: 0.042183905839920044 +Loss at step 50: 0.0603187270462513 +Loss at step 100: 0.0740886777639389 +Loss at step 150: 0.07530931383371353 +Loss at step 200: 0.05191066116094589 +Loss at step 250: 0.060493383556604385 +Loss at step 300: 0.04185192659497261 +Loss at step 350: 0.044185735285282135 +Loss at step 400: 0.04517350345849991 +Loss at step 450: 0.03962967172265053 +Loss at step 500: 0.0435171015560627 +Loss at step 550: 0.07818641513586044 +Loss at step 600: 0.040757354348897934 +Loss at step 650: 0.03997200354933739 +Loss at step 700: 0.0485215038061142 +Loss at step 750: 0.048821400851011276 +Loss at step 800: 0.06954380124807358 +Loss at step 850: 0.04831724613904953 +Loss at step 900: 0.0486610010266304 +Mean training loss after epoch 124: 0.048536703780865366 + +EPOCH: 125 +Loss at step 0: 0.04163498803973198 +Loss at step 50: 0.042778369039297104 +Loss at step 100: 0.04387155920267105 +Loss at step 150: 0.06343777477741241 +Loss at step 200: 0.04969377815723419 +Loss at step 250: 0.041626885533332825 +Loss at step 300: 0.05148531496524811 +Loss at step 350: 0.057024456560611725 +Loss at step 400: 0.04719799384474754 +Loss at step 450: 0.04529045894742012 +Loss at step 500: 0.05086985230445862 +Loss at step 550: 0.05117557942867279 +Loss at step 600: 0.03715503215789795 +Loss at step 650: 0.07316252589225769 +Loss at step 700: 0.03857869654893875 +Loss at step 750: 0.06300487369298935 +Loss at step 800: 0.0399857833981514 +Loss at step 850: 0.07803910970687866 +Loss at step 900: 0.036358244717121124 +Mean training loss after epoch 125: 0.04916067674819594 + +EPOCH: 126 +Loss at step 0: 0.04838414117693901 +Loss at step 50: 0.04294362664222717 +Loss at step 100: 0.04159873351454735 +Loss at step 150: 0.042511116713285446 +Loss at step 200: 0.04957247152924538 +Loss at step 250: 0.04917381703853607 +Loss at step 300: 0.04365123063325882 +Loss at step 350: 0.07293613255023956 +Loss at step 400: 0.03935328498482704 +Loss at step 450: 0.06260377913713455 +Loss at step 500: 0.04235241189599037 +Loss at step 550: 0.05745767802000046 +Loss at step 600: 0.06603095680475235 +Loss at step 650: 0.042347900569438934 +Loss at step 700: 0.057476382702589035 +Loss at step 750: 0.035526663064956665 +Loss at step 800: 0.04379204660654068 +Loss at step 850: 0.0628025159239769 +Loss at step 900: 0.05237077549099922 +Mean training loss after epoch 126: 0.048432261037674035 + +EPOCH: 127 +Loss at step 0: 0.06268385052680969 +Loss at step 50: 0.051177430897951126 +Loss at step 100: 0.040111787617206573 +Loss at step 150: 0.044988639652729034 +Loss at step 200: 0.05918775126338005 +Loss at step 250: 0.06395591795444489 +Loss at step 300: 0.043191928416490555 +Loss at step 350: 0.04045666754245758 +Loss at step 400: 0.07624790817499161 +Loss at step 450: 0.04203274846076965 +Loss at step 500: 0.04372192919254303 +Loss at step 550: 0.059020206332206726 +Loss at step 600: 0.0441201776266098 +Loss at step 650: 0.07067705690860748 +Loss at step 700: 0.038178447633981705 +Loss at step 750: 0.039162084460258484 +Loss at step 800: 0.03905875235795975 +Loss at step 850: 0.032145753502845764 +Loss at step 900: 0.05132643133401871 +Mean training loss after epoch 127: 0.04913109017492357 + +EPOCH: 128 +Loss at step 0: 0.04397174343466759 +Loss at step 50: 0.04540161043405533 +Loss at step 100: 0.038996513932943344 +Loss at step 150: 0.042848970741033554 +Loss at step 200: 0.07096147537231445 +Loss at step 250: 0.06006637215614319 +Loss at step 300: 0.039105046540498734 +Loss at step 350: 0.06571116298437119 +Loss at step 400: 0.044456325471401215 +Loss at step 450: 0.04355058819055557 +Loss at step 500: 0.043969616293907166 +Loss at step 550: 0.04763530194759369 +Loss at step 600: 0.057992640882730484 +Loss at step 650: 0.03909863531589508 +Loss at step 700: 0.0343610905110836 +Loss at step 750: 0.046472325921058655 +Loss at step 800: 0.04603516682982445 +Loss at step 850: 0.04348667711019516 +Loss at step 900: 0.040033984929323196 +Mean training loss after epoch 128: 0.04950167793137178 + +EPOCH: 129 +Loss at step 0: 0.06148488074541092 +Loss at step 50: 0.04208707809448242 +Loss at step 100: 0.043088123202323914 +Loss at step 150: 0.05930984020233154 +Loss at step 200: 0.03653711453080177 +Loss at step 250: 0.04291157424449921 +Loss at step 300: 0.046600330621004105 +Loss at step 350: 0.04494825750589371 +Loss at step 400: 0.04764273762702942 +Loss at step 450: 0.056651342660188675 +Loss at step 500: 0.06256579607725143 +Loss at step 550: 0.043976861983537674 +Loss at step 600: 0.04726428911089897 +Loss at step 650: 0.05647360906004906 +Loss at step 700: 0.05426730215549469 +Loss at step 750: 0.04720715805888176 +Loss at step 800: 0.041113127022981644 +Loss at step 850: 0.055044785141944885 +Loss at step 900: 0.047878120094537735 +Mean training loss after epoch 129: 0.04866698666263237 + +EPOCH: 130 +Loss at step 0: 0.054736845195293427 +Loss at step 50: 0.052255257964134216 +Loss at step 100: 0.0421096496284008 +Loss at step 150: 0.04380587115883827 +Loss at step 200: 0.0501069538295269 +Loss at step 250: 0.04811951518058777 +Loss at step 300: 0.03899279236793518 +Loss at step 350: 0.0629047229886055 +Loss at step 400: 0.07400815933942795 +Loss at step 450: 0.05996164679527283 +Loss at step 500: 0.04069483280181885 +Loss at step 550: 0.03507518395781517 +Loss at step 600: 0.04574788361787796 +Loss at step 650: 0.035077378153800964 +Loss at step 700: 0.03787882626056671 +Loss at step 750: 0.0495583675801754 +Loss at step 800: 0.0378677174448967 +Loss at step 850: 0.042402032762765884 +Loss at step 900: 0.059458594769239426 +Mean training loss after epoch 130: 0.04842078967143033 + +EPOCH: 131 +Loss at step 0: 0.06482542306184769 +Loss at step 50: 0.04043654352426529 +Loss at step 100: 0.042813777923583984 +Loss at step 150: 0.036659494042396545 +Loss at step 200: 0.04225246235728264 +Loss at step 250: 0.04724736139178276 +Loss at step 300: 0.06054338067770004 +Loss at step 350: 0.050970472395420074 +Loss at step 400: 0.06093669310212135 +Loss at step 450: 0.03843550756573677 +Loss at step 500: 0.04226549342274666 +Loss at step 550: 0.05632136017084122 +Loss at step 600: 0.04163910821080208 +Loss at step 650: 0.05894172936677933 +Loss at step 700: 0.04215703159570694 +Loss at step 750: 0.07663297653198242 +Loss at step 800: 0.04128982499241829 +Loss at step 850: 0.05727154761552811 +Loss at step 900: 0.04268449544906616 +Mean training loss after epoch 131: 0.04867420719265302 + +EPOCH: 132 +Loss at step 0: 0.044410500675439835 +Loss at step 50: 0.04667460918426514 +Loss at step 100: 0.05588072910904884 +Loss at step 150: 0.04419002681970596 +Loss at step 200: 0.057145826518535614 +Loss at step 250: 0.04840205982327461 +Loss at step 300: 0.04183244705200195 +Loss at step 350: 0.048349685966968536 +Loss at step 400: 0.04426371306180954 +Loss at step 450: 0.03677039593458176 +Loss at step 500: 0.053969789296388626 +Loss at step 550: 0.03847557306289673 +Loss at step 600: 0.07313726097345352 +Loss at step 650: 0.059583332389593124 +Loss at step 700: 0.05662522464990616 +Loss at step 750: 0.04177241399884224 +Loss at step 800: 0.04153234511613846 +Loss at step 850: 0.03694000840187073 +Loss at step 900: 0.039544060826301575 +Mean training loss after epoch 132: 0.04867334462511641 + +EPOCH: 133 +Loss at step 0: 0.040018945932388306 +Loss at step 50: 0.04584896191954613 +Loss at step 100: 0.040729980915784836 +Loss at step 150: 0.04127957299351692 +Loss at step 200: 0.04251483082771301 +Loss at step 250: 0.04427496716380119 +Loss at step 300: 0.04527648538351059 +Loss at step 350: 0.03747234493494034 +Loss at step 400: 0.07233289629220963 +Loss at step 450: 0.05398392677307129 +Loss at step 500: 0.06243046745657921 +Loss at step 550: 0.07688362896442413 +Loss at step 600: 0.042867373675107956 +Loss at step 650: 0.051807958632707596 +Loss at step 700: 0.04724586009979248 +Loss at step 750: 0.03599642589688301 +Loss at step 800: 0.041538383811712265 +Loss at step 850: 0.03811323642730713 +Loss at step 900: 0.04759371280670166 +Mean training loss after epoch 133: 0.04907246646501108 + +EPOCH: 134 +Loss at step 0: 0.05114765837788582 +Loss at step 50: 0.058783549815416336 +Loss at step 100: 0.037937093526124954 +Loss at step 150: 0.041301969438791275 +Loss at step 200: 0.05513118952512741 +Loss at step 250: 0.06060388684272766 +Loss at step 300: 0.062013376504182816 +Loss at step 350: 0.04496423527598381 +Loss at step 400: 0.04880368337035179 +Loss at step 450: 0.05628389120101929 +Loss at step 500: 0.04492185637354851 +Loss at step 550: 0.05619790032505989 +Loss at step 600: 0.045054689049720764 +Loss at step 650: 0.03817691653966904 +Loss at step 700: 0.03742721676826477 +Loss at step 750: 0.05202801153063774 +Loss at step 800: 0.06104091927409172 +Loss at step 850: 0.03983212634921074 +Loss at step 900: 0.050141334533691406 +Mean training loss after epoch 134: 0.04853650034347704 + +EPOCH: 135 +Loss at step 0: 0.040278926491737366 +Loss at step 50: 0.061523158103227615 +Loss at step 100: 0.04186854138970375 +Loss at step 150: 0.05238495022058487 +Loss at step 200: 0.055366151034832 +Loss at step 250: 0.04503040388226509 +Loss at step 300: 0.04272735118865967 +Loss at step 350: 0.047476626932621 +Loss at step 400: 0.045122481882572174 +Loss at step 450: 0.053545620292425156 +Loss at step 500: 0.05657735839486122 +Loss at step 550: 0.04603251442313194 +Loss at step 600: 0.047145113348960876 +Loss at step 650: 0.041638556867837906 +Loss at step 700: 0.07475831359624863 +Loss at step 750: 0.04856273531913757 +Loss at step 800: 0.04979180172085762 +Loss at step 850: 0.04520677402615547 +Loss at step 900: 0.0413791686296463 +Mean training loss after epoch 135: 0.04856533988087035 + +EPOCH: 136 +Loss at step 0: 0.05234678462147713 +Loss at step 50: 0.04555804282426834 +Loss at step 100: 0.057365790009498596 +Loss at step 150: 0.0460912249982357 +Loss at step 200: 0.05540347844362259 +Loss at step 250: 0.037525150924921036 +Loss at step 300: 0.03999201953411102 +Loss at step 350: 0.04410259798169136 +Loss at step 400: 0.04462926462292671 +Loss at step 450: 0.038584351539611816 +Loss at step 500: 0.06189253181219101 +Loss at step 550: 0.045476678758859634 +Loss at step 600: 0.06370377540588379 +Loss at step 650: 0.04767335578799248 +Loss at step 700: 0.049350298941135406 +Loss at step 750: 0.06470299512147903 +Loss at step 800: 0.048080988228321075 +Loss at step 850: 0.04377365857362747 +Loss at step 900: 0.03533263877034187 +Mean training loss after epoch 136: 0.04819402361967798 + +EPOCH: 137 +Loss at step 0: 0.0481446273624897 +Loss at step 50: 0.041267331689596176 +Loss at step 100: 0.049897972494363785 +Loss at step 150: 0.039551299065351486 +Loss at step 200: 0.03836191073060036 +Loss at step 250: 0.0524468719959259 +Loss at step 300: 0.05562204122543335 +Loss at step 350: 0.043910734355449677 +Loss at step 400: 0.03637081757187843 +Loss at step 450: 0.044259343296289444 +Loss at step 500: 0.04341624677181244 +Loss at step 550: 0.05463051050901413 +Loss at step 600: 0.04135718196630478 +Loss at step 650: 0.048858027905225754 +Loss at step 700: 0.07133223116397858 +Loss at step 750: 0.04914814978837967 +Loss at step 800: 0.046384260058403015 +Loss at step 850: 0.050610706210136414 +Loss at step 900: 0.03980546444654465 +Mean training loss after epoch 137: 0.04831259969725157 + +EPOCH: 138 +Loss at step 0: 0.04712590202689171 +Loss at step 50: 0.038392726331949234 +Loss at step 100: 0.045814406126737595 +Loss at step 150: 0.04660135135054588 +Loss at step 200: 0.058653946965932846 +Loss at step 250: 0.05153726786375046 +Loss at step 300: 0.05915222689509392 +Loss at step 350: 0.051903653889894485 +Loss at step 400: 0.06838671863079071 +Loss at step 450: 0.04489339143037796 +Loss at step 500: 0.050316039472818375 +Loss at step 550: 0.039450667798519135 +Loss at step 600: 0.0504353865981102 +Loss at step 650: 0.036666203290224075 +Loss at step 700: 0.03875046223402023 +Loss at step 750: 0.052333228290081024 +Loss at step 800: 0.04133755713701248 +Loss at step 850: 0.04131413251161575 +Loss at step 900: 0.03761408105492592 +Mean training loss after epoch 138: 0.048635963978035365 + +EPOCH: 139 +Loss at step 0: 0.0435025580227375 +Loss at step 50: 0.0523710623383522 +Loss at step 100: 0.044839054346084595 +Loss at step 150: 0.056432344019412994 +Loss at step 200: 0.042951688170433044 +Loss at step 250: 0.04507075250148773 +Loss at step 300: 0.04655547812581062 +Loss at step 350: 0.034055162221193314 +Loss at step 400: 0.04011010378599167 +Loss at step 450: 0.04736752435564995 +Loss at step 500: 0.05656151846051216 +Loss at step 550: 0.0535053089261055 +Loss at step 600: 0.061301134526729584 +Loss at step 650: 0.057354364544153214 +Loss at step 700: 0.057053837925195694 +Loss at step 750: 0.06484664976596832 +Loss at step 800: 0.03953123837709427 +Loss at step 850: 0.042488183826208115 +Loss at step 900: 0.03870140761137009 +Mean training loss after epoch 139: 0.04885044406805593 + +EPOCH: 140 +Loss at step 0: 0.07074099779129028 +Loss at step 50: 0.045731350779533386 +Loss at step 100: 0.0471193790435791 +Loss at step 150: 0.041951894760131836 +Loss at step 200: 0.04359697550535202 +Loss at step 250: 0.05909973010420799 +Loss at step 300: 0.04221344739198685 +Loss at step 350: 0.04282532259821892 +Loss at step 400: 0.04231211543083191 +Loss at step 450: 0.04453182965517044 +Loss at step 500: 0.048076462000608444 +Loss at step 550: 0.07255866378545761 +Loss at step 600: 0.043639201670885086 +Loss at step 650: 0.039162542670965195 +Loss at step 700: 0.05361824482679367 +Loss at step 750: 0.04256186634302139 +Loss at step 800: 0.044742170721292496 +Loss at step 850: 0.0435599721968174 +Loss at step 900: 0.05830463767051697 +Mean training loss after epoch 140: 0.04836525745999648 + +EPOCH: 141 +Loss at step 0: 0.04624384269118309 +Loss at step 50: 0.04682048037648201 +Loss at step 100: 0.06397434324026108 +Loss at step 150: 0.05555067956447601 +Loss at step 200: 0.039103370159864426 +Loss at step 250: 0.043093107640743256 +Loss at step 300: 0.0423198901116848 +Loss at step 350: 0.04129696637392044 +Loss at step 400: 0.038106512278318405 +Loss at step 450: 0.041128188371658325 +Loss at step 500: 0.03907640650868416 +Loss at step 550: 0.04152397811412811 +Loss at step 600: 0.03696560114622116 +Loss at step 650: 0.046987976878881454 +Loss at step 700: 0.05122114717960358 +Loss at step 750: 0.059763383120298386 +Loss at step 800: 0.04435301572084427 +Loss at step 850: 0.037067193537950516 +Loss at step 900: 0.05867924913764 +Mean training loss after epoch 141: 0.0480450714416087 + +EPOCH: 142 +Loss at step 0: 0.06373215466737747 +Loss at step 50: 0.049846481531858444 +Loss at step 100: 0.04507457837462425 +Loss at step 150: 0.04652165248990059 +Loss at step 200: 0.04113485664129257 +Loss at step 250: 0.03837631270289421 +Loss at step 300: 0.05318281427025795 +Loss at step 350: 0.04007640853524208 +Loss at step 400: 0.061074357479810715 +Loss at step 450: 0.03983987122774124 +Loss at step 500: 0.043495167046785355 +Loss at step 550: 0.04439198970794678 +Loss at step 600: 0.035443373024463654 +Loss at step 650: 0.047762032598257065 +Loss at step 700: 0.04749166592955589 +Loss at step 750: 0.059363022446632385 +Loss at step 800: 0.04428018629550934 +Loss at step 850: 0.047673244029283524 +Loss at step 900: 0.04275432974100113 +Mean training loss after epoch 142: 0.048657710987653556 + +EPOCH: 143 +Loss at step 0: 0.042693670839071274 +Loss at step 50: 0.042551349848508835 +Loss at step 100: 0.04730642959475517 +Loss at step 150: 0.04919397458434105 +Loss at step 200: 0.05563896894454956 +Loss at step 250: 0.04050802066922188 +Loss at step 300: 0.05546027049422264 +Loss at step 350: 0.036101412028074265 +Loss at step 400: 0.052813995629549026 +Loss at step 450: 0.05089308321475983 +Loss at step 500: 0.045163534581661224 +Loss at step 550: 0.05942877382040024 +Loss at step 600: 0.05770060792565346 +Loss at step 650: 0.039511777460575104 +Loss at step 700: 0.049696601927280426 +Loss at step 750: 0.05028544366359711 +Loss at step 800: 0.04040723666548729 +Loss at step 850: 0.04718749225139618 +Loss at step 900: 0.07806974649429321 +Mean training loss after epoch 143: 0.04902585167318646 + +EPOCH: 144 +Loss at step 0: 0.05804404243826866 +Loss at step 50: 0.04718763381242752 +Loss at step 100: 0.0356881357729435 +Loss at step 150: 0.05017884820699692 +Loss at step 200: 0.04575234279036522 +Loss at step 250: 0.0503799170255661 +Loss at step 300: 0.04296661913394928 +Loss at step 350: 0.052042461931705475 +Loss at step 400: 0.04375484958291054 +Loss at step 450: 0.056774821132421494 +Loss at step 500: 0.03729504346847534 +Loss at step 550: 0.04469287395477295 +Loss at step 600: 0.051766760647296906 +Loss at step 650: 0.04263311251997948 +Loss at step 700: 0.04621722549200058 +Loss at step 750: 0.0660976842045784 +Loss at step 800: 0.04683491587638855 +Loss at step 850: 0.046580541878938675 +Loss at step 900: 0.05977582186460495 +Mean training loss after epoch 144: 0.04839002868649103 + +EPOCH: 145 +Loss at step 0: 0.05558139458298683 +Loss at step 50: 0.045531827956438065 +Loss at step 100: 0.059311628341674805 +Loss at step 150: 0.0732608363032341 +Loss at step 200: 0.04518021270632744 +Loss at step 250: 0.04026031494140625 +Loss at step 300: 0.040452100336551666 +Loss at step 350: 0.043984875082969666 +Loss at step 400: 0.047714486718177795 +Loss at step 450: 0.0592566654086113 +Loss at step 500: 0.039471060037612915 +Loss at step 550: 0.04915758967399597 +Loss at step 600: 0.03705943748354912 +Loss at step 650: 0.04733803868293762 +Loss at step 700: 0.04039984568953514 +Loss at step 750: 0.03842097893357277 +Loss at step 800: 0.04031631723046303 +Loss at step 850: 0.04361629858613014 +Loss at step 900: 0.048709671944379807 +Mean training loss after epoch 145: 0.04862735733620203 + +EPOCH: 146 +Loss at step 0: 0.057448770850896835 +Loss at step 50: 0.0607534721493721 +Loss at step 100: 0.0423283651471138 +Loss at step 150: 0.04173853248357773 +Loss at step 200: 0.035910993814468384 +Loss at step 250: 0.04674556106328964 +Loss at step 300: 0.04162338376045227 +Loss at step 350: 0.04964924603700638 +Loss at step 400: 0.04337562248110771 +Loss at step 450: 0.04616432636976242 +Loss at step 500: 0.05841411277651787 +Loss at step 550: 0.043881699442863464 +Loss at step 600: 0.04897647723555565 +Loss at step 650: 0.04756643623113632 +Loss at step 700: 0.05724957585334778 +Loss at step 750: 0.0396745428442955 +Loss at step 800: 0.04640636593103409 +Loss at step 850: 0.0590142123401165 +Loss at step 900: 0.043545953929424286 +Mean training loss after epoch 146: 0.04824300293030261 + +EPOCH: 147 +Loss at step 0: 0.04215994477272034 +Loss at step 50: 0.0643826350569725 +Loss at step 100: 0.03632713854312897 +Loss at step 150: 0.05572391301393509 +Loss at step 200: 0.05293171480298042 +Loss at step 250: 0.058714382350444794 +Loss at step 300: 0.04719340056180954 +Loss at step 350: 0.0753440335392952 +Loss at step 400: 0.038738399744033813 +Loss at step 450: 0.05015306547284126 +Loss at step 500: 0.03244847059249878 +Loss at step 550: 0.04754006117582321 +Loss at step 600: 0.0509917289018631 +Loss at step 650: 0.0427037738263607 +Loss at step 700: 0.04843328893184662 +Loss at step 750: 0.058805886656045914 +Loss at step 800: 0.04237232357263565 +Loss at step 850: 0.05297659710049629 +Loss at step 900: 0.05693311616778374 +Mean training loss after epoch 147: 0.048540185342656014 + +EPOCH: 148 +Loss at step 0: 0.04593652859330177 +Loss at step 50: 0.04117123782634735 +Loss at step 100: 0.04190152883529663 +Loss at step 150: 0.04190709441900253 +Loss at step 200: 0.04059859365224838 +Loss at step 250: 0.05513789504766464 +Loss at step 300: 0.04363248869776726 +Loss at step 350: 0.0445505827665329 +Loss at step 400: 0.05362852290272713 +Loss at step 450: 0.045699458569288254 +Loss at step 500: 0.05891362950205803 +Loss at step 550: 0.042759135365486145 +Loss at step 600: 0.038186006247997284 +Loss at step 650: 0.054739780724048615 +Loss at step 700: 0.060079678893089294 +Loss at step 750: 0.05571344494819641 +Loss at step 800: 0.04230241850018501 +Loss at step 850: 0.038030944764614105 +Loss at step 900: 0.04926208034157753 +Mean training loss after epoch 148: 0.04860585665445465 + +EPOCH: 149 +Loss at step 0: 0.03481580317020416 +Loss at step 50: 0.03888147324323654 +Loss at step 100: 0.0642576664686203 +Loss at step 150: 0.04319475218653679 +Loss at step 200: 0.038781557232141495 +Loss at step 250: 0.04241320490837097 +Loss at step 300: 0.04154130816459656 +Loss at step 350: 0.038142889738082886 +Loss at step 400: 0.039367374032735825 +Loss at step 450: 0.03835767135024071 +Loss at step 500: 0.044024087488651276 +Loss at step 550: 0.04194725304841995 +Loss at step 600: 0.07128673791885376 +Loss at step 650: 0.052909236401319504 +Loss at step 700: 0.03328966721892357 +Loss at step 750: 0.05034976080060005 +Loss at step 800: 0.046249933540821075 +Loss at step 850: 0.05342822149395943 +Loss at step 900: 0.048845551908016205 +Mean training loss after epoch 149: 0.04791478424676573 + +EPOCH: 150 +Loss at step 0: 0.053384244441986084 +Loss at step 50: 0.05841400474309921 +Loss at step 100: 0.05480840057134628 +Loss at step 150: 0.043820276856422424 +Loss at step 200: 0.050548210740089417 +Loss at step 250: 0.047486647963523865 +Loss at step 300: 0.045761529356241226 +Loss at step 350: 0.04864931106567383 +Loss at step 400: 0.050155892968177795 +Loss at step 450: 0.05292592570185661 +Loss at step 500: 0.05828842520713806 +Loss at step 550: 0.05316944047808647 +Loss at step 600: 0.05375320091843605 +Loss at step 650: 0.07094620168209076 +Loss at step 700: 0.05142736807465553 +Loss at step 750: 0.052177708595991135 +Loss at step 800: 0.05282355099916458 +Loss at step 850: 0.038416244089603424 +Loss at step 900: 0.04303006827831268 +Mean training loss after epoch 150: 0.0484924851092639 + +EPOCH: 151 +Loss at step 0: 0.04487397149205208 +Loss at step 50: 0.049064040184020996 +Loss at step 100: 0.037728872150182724 +Loss at step 150: 0.035597823560237885 +Loss at step 200: 0.03782735392451286 +Loss at step 250: 0.06127091124653816 +Loss at step 300: 0.04580070078372955 +Loss at step 350: 0.04515635967254639 +Loss at step 400: 0.057037487626075745 +Loss at step 450: 0.03509005531668663 +Loss at step 500: 0.0589592382311821 +Loss at step 550: 0.045534294098615646 +Loss at step 600: 0.05385633185505867 +Loss at step 650: 0.06598689407110214 +Loss at step 700: 0.0454428493976593 +Loss at step 750: 0.056535813957452774 +Loss at step 800: 0.03904716670513153 +Loss at step 850: 0.041778501123189926 +Loss at step 900: 0.07030586898326874 +Mean training loss after epoch 151: 0.04798035930468838 + +EPOCH: 152 +Loss at step 0: 0.07071492075920105 +Loss at step 50: 0.04405219107866287 +Loss at step 100: 0.05220147594809532 +Loss at step 150: 0.05572608485817909 +Loss at step 200: 0.0366838276386261 +Loss at step 250: 0.055251043289899826 +Loss at step 300: 0.04539750516414642 +Loss at step 350: 0.046769242733716965 +Loss at step 400: 0.053091391921043396 +Loss at step 450: 0.04220592603087425 +Loss at step 500: 0.06756363064050674 +Loss at step 550: 0.04876582324504852 +Loss at step 600: 0.04254632815718651 +Loss at step 650: 0.046705301851034164 +Loss at step 700: 0.0380624383687973 +Loss at step 750: 0.06718366593122482 +Loss at step 800: 0.04155055433511734 +Loss at step 850: 0.04162800312042236 +Loss at step 900: 0.048360876739025116 +Mean training loss after epoch 152: 0.04849436458176387 + +EPOCH: 153 +Loss at step 0: 0.04329727962613106 +Loss at step 50: 0.060314297676086426 +Loss at step 100: 0.05559522286057472 +Loss at step 150: 0.040955375880002975 +Loss at step 200: 0.05377096310257912 +Loss at step 250: 0.048019684851169586 +Loss at step 300: 0.04331258684396744 +Loss at step 350: 0.04413050785660744 +Loss at step 400: 0.06414957344532013 +Loss at step 450: 0.03696039691567421 +Loss at step 500: 0.05207742750644684 +Loss at step 550: 0.03713711351156235 +Loss at step 600: 0.04187649488449097 +Loss at step 650: 0.044978220015764236 +Loss at step 700: 0.042159419506788254 +Loss at step 750: 0.058531858026981354 +Loss at step 800: 0.04438680037856102 +Loss at step 850: 0.03364855423569679 +Loss at step 900: 0.05622141435742378 +Mean training loss after epoch 153: 0.04835928466592008 + +EPOCH: 154 +Loss at step 0: 0.0459480807185173 +Loss at step 50: 0.05812818929553032 +Loss at step 100: 0.03274017199873924 +Loss at step 150: 0.042274110019207 +Loss at step 200: 0.04648832231760025 +Loss at step 250: 0.04001867026090622 +Loss at step 300: 0.051973555237054825 +Loss at step 350: 0.07372510433197021 +Loss at step 400: 0.04973408952355385 +Loss at step 450: 0.0410282164812088 +Loss at step 500: 0.035443082451820374 +Loss at step 550: 0.0500008724629879 +Loss at step 600: 0.04820454120635986 +Loss at step 650: 0.04027475416660309 +Loss at step 700: 0.03530288115143776 +Loss at step 750: 0.05977945402264595 +Loss at step 800: 0.04663990065455437 +Loss at step 850: 0.04106836020946503 +Loss at step 900: 0.036848925054073334 +Mean training loss after epoch 154: 0.04780262200308761 + +EPOCH: 155 +Loss at step 0: 0.04625759273767471 +Loss at step 50: 0.04308951646089554 +Loss at step 100: 0.038227614015340805 +Loss at step 150: 0.03923320770263672 +Loss at step 200: 0.04197337478399277 +Loss at step 250: 0.05629514157772064 +Loss at step 300: 0.06302011758089066 +Loss at step 350: 0.044735606759786606 +Loss at step 400: 0.0724698156118393 +Loss at step 450: 0.057902056723833084 +Loss at step 500: 0.0458591990172863 +Loss at step 550: 0.046093542128801346 +Loss at step 600: 0.037538472563028336 +Loss at step 650: 0.04100722074508667 +Loss at step 700: 0.04673120006918907 +Loss at step 750: 0.04096222668886185 +Loss at step 800: 0.06320616602897644 +Loss at step 850: 0.056759562343358994 +Loss at step 900: 0.041417088359594345 +Mean training loss after epoch 155: 0.04816004396803471 + +EPOCH: 156 +Loss at step 0: 0.039857249706983566 +Loss at step 50: 0.06291884928941727 +Loss at step 100: 0.059106066823005676 +Loss at step 150: 0.043470703065395355 +Loss at step 200: 0.05661414563655853 +Loss at step 250: 0.04727950319647789 +Loss at step 300: 0.04499393329024315 +Loss at step 350: 0.07303772121667862 +Loss at step 400: 0.04668113589286804 +Loss at step 450: 0.05506623536348343 +Loss at step 500: 0.056904714554548264 +Loss at step 550: 0.048274409025907516 +Loss at step 600: 0.04217054322361946 +Loss at step 650: 0.04032851755619049 +Loss at step 700: 0.04587779566645622 +Loss at step 750: 0.03720478713512421 +Loss at step 800: 0.05316713824868202 +Loss at step 850: 0.0485495924949646 +Loss at step 900: 0.04272492974996567 +Mean training loss after epoch 156: 0.048234062801514356 + +EPOCH: 157 +Loss at step 0: 0.05053047090768814 +Loss at step 50: 0.03939590975642204 +Loss at step 100: 0.041260235011577606 +Loss at step 150: 0.04548722878098488 +Loss at step 200: 0.04173439368605614 +Loss at step 250: 0.038228798657655716 +Loss at step 300: 0.04170013964176178 +Loss at step 350: 0.044444892555475235 +Loss at step 400: 0.04408299922943115 +Loss at step 450: 0.06677277386188507 +Loss at step 500: 0.05013129860162735 +Loss at step 550: 0.0505794994533062 +Loss at step 600: 0.04230627045035362 +Loss at step 650: 0.041304729878902435 +Loss at step 700: 0.04125434532761574 +Loss at step 750: 0.044668931514024734 +Loss at step 800: 0.03910967335104942 +Loss at step 850: 0.04597482830286026 +Loss at step 900: 0.057160988450050354 +Mean training loss after epoch 157: 0.047883411388852194 + +EPOCH: 158 +Loss at step 0: 0.04547041282057762 +Loss at step 50: 0.040588174015283585 +Loss at step 100: 0.041275907307863235 +Loss at step 150: 0.04536469653248787 +Loss at step 200: 0.04868501424789429 +Loss at step 250: 0.047116998583078384 +Loss at step 300: 0.05178843438625336 +Loss at step 350: 0.06012246385216713 +Loss at step 400: 0.04221094772219658 +Loss at step 450: 0.06199638545513153 +Loss at step 500: 0.04952610284090042 +Loss at step 550: 0.04230426624417305 +Loss at step 600: 0.04388788715004921 +Loss at step 650: 0.07366587966680527 +Loss at step 700: 0.041067831218242645 +Loss at step 750: 0.05204010754823685 +Loss at step 800: 0.04052109271287918 +Loss at step 850: 0.06078173220157623 +Loss at step 900: 0.03880725055932999 +Mean training loss after epoch 158: 0.04835029504001776 + +EPOCH: 159 +Loss at step 0: 0.03724703937768936 +Loss at step 50: 0.0333692692220211 +Loss at step 100: 0.06205986440181732 +Loss at step 150: 0.04299628362059593 +Loss at step 200: 0.0597137026488781 +Loss at step 250: 0.05111514776945114 +Loss at step 300: 0.07086595147848129 +Loss at step 350: 0.055920567363500595 +Loss at step 400: 0.06450559943914413 +Loss at step 450: 0.04608069360256195 +Loss at step 500: 0.040959298610687256 +Loss at step 550: 0.041273053735494614 +Loss at step 600: 0.03992389515042305 +Loss at step 650: 0.048654794692993164 +Loss at step 700: 0.0550660640001297 +Loss at step 750: 0.0409843735396862 +Loss at step 800: 0.04160774499177933 +Loss at step 850: 0.04375200718641281 +Loss at step 900: 0.04071790352463722 +Mean training loss after epoch 159: 0.04827048637068221 + +EPOCH: 160 +Loss at step 0: 0.0453384630382061 +Loss at step 50: 0.05181225761771202 +Loss at step 100: 0.05148591846227646 +Loss at step 150: 0.04713484272360802 +Loss at step 200: 0.045340463519096375 +Loss at step 250: 0.03916546702384949 +Loss at step 300: 0.05083591118454933 +Loss at step 350: 0.04356459528207779 +Loss at step 400: 0.03757152706384659 +Loss at step 450: 0.055116716772317886 +Loss at step 500: 0.03918007016181946 +Loss at step 550: 0.03780669346451759 +Loss at step 600: 0.05379398912191391 +Loss at step 650: 0.06382010132074356 +Loss at step 700: 0.0644092708826065 +Loss at step 750: 0.039012741297483444 +Loss at step 800: 0.03531036153435707 +Loss at step 850: 0.045648787170648575 +Loss at step 900: 0.04045699164271355 +Mean training loss after epoch 160: 0.048193234910588786 + +EPOCH: 161 +Loss at step 0: 0.039211153984069824 +Loss at step 50: 0.03740132972598076 +Loss at step 100: 0.042130906134843826 +Loss at step 150: 0.040710531175136566 +Loss at step 200: 0.05023274943232536 +Loss at step 250: 0.04341929033398628 +Loss at step 300: 0.04198530688881874 +Loss at step 350: 0.046126071363687515 +Loss at step 400: 0.04655809327960014 +Loss at step 450: 0.05000368878245354 +Loss at step 500: 0.046613000333309174 +Loss at step 550: 0.037483301013708115 +Loss at step 600: 0.0432882234454155 +Loss at step 650: 0.03723549097776413 +Loss at step 700: 0.05256292596459389 +Loss at step 750: 0.04325420781970024 +Loss at step 800: 0.06717808544635773 +Loss at step 850: 0.039090413600206375 +Loss at step 900: 0.0538630336523056 +Mean training loss after epoch 161: 0.048082554859838 + +EPOCH: 162 +Loss at step 0: 0.044330205768346786 +Loss at step 50: 0.04258774593472481 +Loss at step 100: 0.03910255432128906 +Loss at step 150: 0.036397743970155716 +Loss at step 200: 0.07563651353120804 +Loss at step 250: 0.04596198350191116 +Loss at step 300: 0.04345595836639404 +Loss at step 350: 0.03504598140716553 +Loss at step 400: 0.048572197556495667 +Loss at step 450: 0.04974628984928131 +Loss at step 500: 0.06307299435138702 +Loss at step 550: 0.041871778666973114 +Loss at step 600: 0.03992389515042305 +Loss at step 650: 0.04740828648209572 +Loss at step 700: 0.0470350906252861 +Loss at step 750: 0.04374994337558746 +Loss at step 800: 0.06015469878911972 +Loss at step 850: 0.04350601136684418 +Loss at step 900: 0.047128826379776 +Mean training loss after epoch 162: 0.047747951721760634 + +EPOCH: 163 +Loss at step 0: 0.04692613705992699 +Loss at step 50: 0.06888081878423691 +Loss at step 100: 0.036830708384513855 +Loss at step 150: 0.052634693682193756 +Loss at step 200: 0.04034806787967682 +Loss at step 250: 0.04440351203083992 +Loss at step 300: 0.06110226362943649 +Loss at step 350: 0.040500156581401825 +Loss at step 400: 0.044761404395103455 +Loss at step 450: 0.04339404031634331 +Loss at step 500: 0.04216424748301506 +Loss at step 550: 0.059924717992544174 +Loss at step 600: 0.045603103935718536 +Loss at step 650: 0.04283668473362923 +Loss at step 700: 0.05030033364892006 +Loss at step 750: 0.039133220911026 +Loss at step 800: 0.057411544024944305 +Loss at step 850: 0.05786745250225067 +Loss at step 900: 0.04501548036932945 +Mean training loss after epoch 163: 0.04824849020348175 + +EPOCH: 164 +Loss at step 0: 0.03550463169813156 +Loss at step 50: 0.0633787214756012 +Loss at step 100: 0.04124382883310318 +Loss at step 150: 0.05888846516609192 +Loss at step 200: 0.04371283948421478 +Loss at step 250: 0.03670411929488182 +Loss at step 300: 0.039638716727495193 +Loss at step 350: 0.049977418035268784 +Loss at step 400: 0.04291170462965965 +Loss at step 450: 0.03694941848516464 +Loss at step 500: 0.049233220517635345 +Loss at step 550: 0.043179940432310104 +Loss at step 600: 0.05921640619635582 +Loss at step 650: 0.04280020296573639 +Loss at step 700: 0.04227759689092636 +Loss at step 750: 0.04361196234822273 +Loss at step 800: 0.054291170090436935 +Loss at step 850: 0.03758884221315384 +Loss at step 900: 0.05697787553071976 +Mean training loss after epoch 164: 0.04804269300062837 + +EPOCH: 165 +Loss at step 0: 0.05557151511311531 +Loss at step 50: 0.0587000772356987 +Loss at step 100: 0.0501614585518837 +Loss at step 150: 0.038764290511608124 +Loss at step 200: 0.044976379722356796 +Loss at step 250: 0.055857643485069275 +Loss at step 300: 0.06069709733128548 +Loss at step 350: 0.04490916430950165 +Loss at step 400: 0.03925700485706329 +Loss at step 450: 0.06412060558795929 +Loss at step 500: 0.053303152322769165 +Loss at step 550: 0.04243078827857971 +Loss at step 600: 0.08040867000818253 +Loss at step 650: 0.03901030868291855 +Loss at step 700: 0.04218718409538269 +Loss at step 750: 0.047164350748062134 +Loss at step 800: 0.04435146600008011 +Loss at step 850: 0.035926155745983124 +Loss at step 900: 0.05201363563537598 +Mean training loss after epoch 165: 0.04783858970474841 + +EPOCH: 166 +Loss at step 0: 0.03938109427690506 +Loss at step 50: 0.042888738214969635 +Loss at step 100: 0.04646110534667969 +Loss at step 150: 0.0484512560069561 +Loss at step 200: 0.060359735041856766 +Loss at step 250: 0.0390639528632164 +Loss at step 300: 0.03708013519644737 +Loss at step 350: 0.045403484255075455 +Loss at step 400: 0.03366703540086746 +Loss at step 450: 0.04615085944533348 +Loss at step 500: 0.042963236570358276 +Loss at step 550: 0.04573823884129524 +Loss at step 600: 0.047747932374477386 +Loss at step 650: 0.04017927125096321 +Loss at step 700: 0.0455608069896698 +Loss at step 750: 0.04481720179319382 +Loss at step 800: 0.06634066998958588 +Loss at step 850: 0.04572658985853195 +Loss at step 900: 0.039339661598205566 +Mean training loss after epoch 166: 0.04834944995513349 + +EPOCH: 167 +Loss at step 0: 0.04016329348087311 +Loss at step 50: 0.04596622660756111 +Loss at step 100: 0.05588633567094803 +Loss at step 150: 0.040766872465610504 +Loss at step 200: 0.04208094999194145 +Loss at step 250: 0.046603426337242126 +Loss at step 300: 0.044336747378110886 +Loss at step 350: 0.04083812236785889 +Loss at step 400: 0.06697167456150055 +Loss at step 450: 0.04010671004652977 +Loss at step 500: 0.03462489694356918 +Loss at step 550: 0.05797656625509262 +Loss at step 600: 0.057218655943870544 +Loss at step 650: 0.04548359662294388 +Loss at step 700: 0.05653776228427887 +Loss at step 750: 0.042586132884025574 +Loss at step 800: 0.04476004093885422 +Loss at step 850: 0.04645125940442085 +Loss at step 900: 0.048836737871170044 +Mean training loss after epoch 167: 0.04786918218583186 + +EPOCH: 168 +Loss at step 0: 0.03471286594867706 +Loss at step 50: 0.03670928254723549 +Loss at step 100: 0.043461233377456665 +Loss at step 150: 0.04437993839383125 +Loss at step 200: 0.041008226573467255 +Loss at step 250: 0.04751768708229065 +Loss at step 300: 0.0536065474152565 +Loss at step 350: 0.0469471700489521 +Loss at step 400: 0.03854810446500778 +Loss at step 450: 0.07615312933921814 +Loss at step 500: 0.04116733744740486 +Loss at step 550: 0.04944491386413574 +Loss at step 600: 0.044501736760139465 +Loss at step 650: 0.04537106305360794 +Loss at step 700: 0.04387466982007027 +Loss at step 750: 0.06568220257759094 +Loss at step 800: 0.03639619052410126 +Loss at step 850: 0.06143248826265335 +Loss at step 900: 0.042865101248025894 +Mean training loss after epoch 168: 0.04830502153340497 + +EPOCH: 169 +Loss at step 0: 0.05912965536117554 +Loss at step 50: 0.04840218275785446 +Loss at step 100: 0.03871849179267883 +Loss at step 150: 0.04544514790177345 +Loss at step 200: 0.046935684978961945 +Loss at step 250: 0.05107824131846428 +Loss at step 300: 0.04316607117652893 +Loss at step 350: 0.03962690383195877 +Loss at step 400: 0.03878362104296684 +Loss at step 450: 0.055034033954143524 +Loss at step 500: 0.04237452149391174 +Loss at step 550: 0.04672616720199585 +Loss at step 600: 0.05990850552916527 +Loss at step 650: 0.05064459145069122 +Loss at step 700: 0.04280518367886543 +Loss at step 750: 0.04032537341117859 +Loss at step 800: 0.03860695660114288 +Loss at step 850: 0.046331822872161865 +Loss at step 900: 0.03128514811396599 +Mean training loss after epoch 169: 0.048152569200851515 + +EPOCH: 170 +Loss at step 0: 0.041450854390859604 +Loss at step 50: 0.03990516439080238 +Loss at step 100: 0.0387093685567379 +Loss at step 150: 0.04420987516641617 +Loss at step 200: 0.03706914186477661 +Loss at step 250: 0.08555349707603455 +Loss at step 300: 0.06981302797794342 +Loss at step 350: 0.03682062029838562 +Loss at step 400: 0.0639551505446434 +Loss at step 450: 0.049523431807756424 +Loss at step 500: 0.04642587527632713 +Loss at step 550: 0.0436428040266037 +Loss at step 600: 0.0385555662214756 +Loss at step 650: 0.050943773239851 +Loss at step 700: 0.04858536645770073 +Loss at step 750: 0.044519104063510895 +Loss at step 800: 0.05575723573565483 +Loss at step 850: 0.05204610899090767 +Loss at step 900: 0.03793632984161377 +Mean training loss after epoch 170: 0.04791234011080728 + +EPOCH: 171 +Loss at step 0: 0.044062405824661255 +Loss at step 50: 0.045016683638095856 +Loss at step 100: 0.04862944036722183 +Loss at step 150: 0.06489747017621994 +Loss at step 200: 0.06100083142518997 +Loss at step 250: 0.0405079647898674 +Loss at step 300: 0.04127231985330582 +Loss at step 350: 0.04358292371034622 +Loss at step 400: 0.04021648317575455 +Loss at step 450: 0.04850099980831146 +Loss at step 500: 0.041261885315179825 +Loss at step 550: 0.043275266885757446 +Loss at step 600: 0.04252476617693901 +Loss at step 650: 0.044980164617300034 +Loss at step 700: 0.06977424770593643 +Loss at step 750: 0.07127927243709564 +Loss at step 800: 0.05320040509104729 +Loss at step 850: 0.04173273965716362 +Loss at step 900: 0.04646040499210358 +Mean training loss after epoch 171: 0.04785673142369114 + +EPOCH: 172 +Loss at step 0: 0.05874127522110939 +Loss at step 50: 0.04307828098535538 +Loss at step 100: 0.043825771659612656 +Loss at step 150: 0.03771326318383217 +Loss at step 200: 0.0571480430662632 +Loss at step 250: 0.043937478214502335 +Loss at step 300: 0.045638833194971085 +Loss at step 350: 0.03914119303226471 +Loss at step 400: 0.0508493110537529 +Loss at step 450: 0.049065884202718735 +Loss at step 500: 0.059703923761844635 +Loss at step 550: 0.04213320091366768 +Loss at step 600: 0.04521491378545761 +Loss at step 650: 0.04572264105081558 +Loss at step 700: 0.05035974830389023 +Loss at step 750: 0.04215908795595169 +Loss at step 800: 0.07618053257465363 +Loss at step 850: 0.05508400499820709 +Loss at step 900: 0.04323957487940788 +Mean training loss after epoch 172: 0.048219702399170984 + +EPOCH: 173 +Loss at step 0: 0.0630272924900055 +Loss at step 50: 0.04246233031153679 +Loss at step 100: 0.04157164692878723 +Loss at step 150: 0.04426368325948715 +Loss at step 200: 0.050010617822408676 +Loss at step 250: 0.054920848459005356 +Loss at step 300: 0.04029375687241554 +Loss at step 350: 0.05328219011425972 +Loss at step 400: 0.05327213555574417 +Loss at step 450: 0.04361400753259659 +Loss at step 500: 0.05305149406194687 +Loss at step 550: 0.03830067440867424 +Loss at step 600: 0.037342775613069534 +Loss at step 650: 0.04863709583878517 +Loss at step 700: 0.046254005283117294 +Loss at step 750: 0.04329884797334671 +Loss at step 800: 0.04585893824696541 +Loss at step 850: 0.05183754116296768 +Loss at step 900: 0.041897207498550415 +Mean training loss after epoch 173: 0.04741202490026954 + +EPOCH: 174 +Loss at step 0: 0.04717153310775757 +Loss at step 50: 0.04508895054459572 +Loss at step 100: 0.06018811836838722 +Loss at step 150: 0.05807791277766228 +Loss at step 200: 0.05724905803799629 +Loss at step 250: 0.054653048515319824 +Loss at step 300: 0.0381939634680748 +Loss at step 350: 0.043500013649463654 +Loss at step 400: 0.06323181092739105 +Loss at step 450: 0.044171128422021866 +Loss at step 500: 0.04500236362218857 +Loss at step 550: 0.05303451046347618 +Loss at step 600: 0.043790094554424286 +Loss at step 650: 0.040429770946502686 +Loss at step 700: 0.042834337800741196 +Loss at step 750: 0.03391848877072334 +Loss at step 800: 0.038800373673439026 +Loss at step 850: 0.07094977796077728 +Loss at step 900: 0.05766303092241287 +Mean training loss after epoch 174: 0.048058488712048354 + +EPOCH: 175 +Loss at step 0: 0.039672765880823135 +Loss at step 50: 0.047277409583330154 +Loss at step 100: 0.03393502160906792 +Loss at step 150: 0.07484497874975204 +Loss at step 200: 0.04364631697535515 +Loss at step 250: 0.05306398496031761 +Loss at step 300: 0.04497294872999191 +Loss at step 350: 0.04128796234726906 +Loss at step 400: 0.05583059415221214 +Loss at step 450: 0.052477236837148666 +Loss at step 500: 0.039360690861940384 +Loss at step 550: 0.04565472900867462 +Loss at step 600: 0.04093862324953079 +Loss at step 650: 0.03860726207494736 +Loss at step 700: 0.05767451226711273 +Loss at step 750: 0.05455547943711281 +Loss at step 800: 0.04439374431967735 +Loss at step 850: 0.06780869513750076 +Loss at step 900: 0.036375537514686584 +Mean training loss after epoch 175: 0.04779444205195411 + +EPOCH: 176 +Loss at step 0: 0.040001560002565384 +Loss at step 50: 0.03882373869419098 +Loss at step 100: 0.03838953748345375 +Loss at step 150: 0.04933854937553406 +Loss at step 200: 0.06696529686450958 +Loss at step 250: 0.05438848212361336 +Loss at step 300: 0.03607292100787163 +Loss at step 350: 0.0425918772816658 +Loss at step 400: 0.03666551783680916 +Loss at step 450: 0.05698138102889061 +Loss at step 500: 0.04309232905507088 +Loss at step 550: 0.04739917069673538 +Loss at step 600: 0.07291050255298615 +Loss at step 650: 0.04511267691850662 +Loss at step 700: 0.04103662073612213 +Loss at step 750: 0.056768640875816345 +Loss at step 800: 0.04761182889342308 +Loss at step 850: 0.03921983763575554 +Loss at step 900: 0.052924562245607376 +Mean training loss after epoch 176: 0.04786713044844203 + +EPOCH: 177 +Loss at step 0: 0.03719140589237213 +Loss at step 50: 0.054833974689245224 +Loss at step 100: 0.042535051703453064 +Loss at step 150: 0.05122394859790802 +Loss at step 200: 0.05404502525925636 +Loss at step 250: 0.046931661665439606 +Loss at step 300: 0.05378315597772598 +Loss at step 350: 0.05709729716181755 +Loss at step 400: 0.039831679314374924 +Loss at step 450: 0.05173439159989357 +Loss at step 500: 0.05373867228627205 +Loss at step 550: 0.04308965057134628 +Loss at step 600: 0.04798905551433563 +Loss at step 650: 0.058071766048669815 +Loss at step 700: 0.03824401646852493 +Loss at step 750: 0.04338771477341652 +Loss at step 800: 0.04543429985642433 +Loss at step 850: 0.03673559054732323 +Loss at step 900: 0.05093778297305107 +Mean training loss after epoch 177: 0.04770984406521452 + +EPOCH: 178 +Loss at step 0: 0.03889928013086319 +Loss at step 50: 0.038725294172763824 +Loss at step 100: 0.05214297026395798 +Loss at step 150: 0.03752566874027252 +Loss at step 200: 0.051187656819820404 +Loss at step 250: 0.043889258056879044 +Loss at step 300: 0.037656743079423904 +Loss at step 350: 0.0435580313205719 +Loss at step 400: 0.044449854642152786 +Loss at step 450: 0.05996621772646904 +Loss at step 500: 0.04108566418290138 +Loss at step 550: 0.04017878323793411 +Loss at step 600: 0.042209696024656296 +Loss at step 650: 0.04749433323740959 +Loss at step 700: 0.04319657385349274 +Loss at step 750: 0.04151131957769394 +Loss at step 800: 0.035941604524850845 +Loss at step 850: 0.05634953826665878 +Loss at step 900: 0.0608520582318306 +Mean training loss after epoch 178: 0.04793926947978514 + +EPOCH: 179 +Loss at step 0: 0.06153324618935585 +Loss at step 50: 0.043950047343969345 +Loss at step 100: 0.050524745136499405 +Loss at step 150: 0.055241577327251434 +Loss at step 200: 0.06451208144426346 +Loss at step 250: 0.05579543113708496 +Loss at step 300: 0.03944617137312889 +Loss at step 350: 0.03841405361890793 +Loss at step 400: 0.053001053631305695 +Loss at step 450: 0.03794353827834129 +Loss at step 500: 0.04541242867708206 +Loss at step 550: 0.06758102774620056 +Loss at step 600: 0.058663833886384964 +Loss at step 650: 0.04135841131210327 +Loss at step 700: 0.041594091802835464 +Loss at step 750: 0.0413949228823185 +Loss at step 800: 0.06963195651769638 +Loss at step 850: 0.04178568720817566 +Loss at step 900: 0.041012246161699295 +Mean training loss after epoch 179: 0.047935921265912465 + +EPOCH: 180 +Loss at step 0: 0.054800860583782196 +Loss at step 50: 0.057918839156627655 +Loss at step 100: 0.06530427187681198 +Loss at step 150: 0.04080653935670853 +Loss at step 200: 0.06178249418735504 +Loss at step 250: 0.03474525362253189 +Loss at step 300: 0.03998439013957977 +Loss at step 350: 0.040060240775346756 +Loss at step 400: 0.05585366114974022 +Loss at step 450: 0.03464942052960396 +Loss at step 500: 0.04138392582535744 +Loss at step 550: 0.044589634984731674 +Loss at step 600: 0.03505365550518036 +Loss at step 650: 0.039933279156684875 +Loss at step 700: 0.04146985709667206 +Loss at step 750: 0.03817606717348099 +Loss at step 800: 0.04251636192202568 +Loss at step 850: 0.04755231365561485 +Loss at step 900: 0.0561416856944561 +Mean training loss after epoch 180: 0.04803674862678371 + +EPOCH: 181 +Loss at step 0: 0.04383077472448349 +Loss at step 50: 0.04233729839324951 +Loss at step 100: 0.041766028851270676 +Loss at step 150: 0.050506655126810074 +Loss at step 200: 0.04898097366094589 +Loss at step 250: 0.05893184617161751 +Loss at step 300: 0.0461932010948658 +Loss at step 350: 0.042738884687423706 +Loss at step 400: 0.06247379630804062 +Loss at step 450: 0.04725629463791847 +Loss at step 500: 0.03814047947525978 +Loss at step 550: 0.041461292654275894 +Loss at step 600: 0.034317221492528915 +Loss at step 650: 0.06429808586835861 +Loss at step 700: 0.04430270940065384 +Loss at step 750: 0.07002732902765274 +Loss at step 800: 0.04496608301997185 +Loss at step 850: 0.04879084229469299 +Loss at step 900: 0.06839743256568909 +Mean training loss after epoch 181: 0.048074375732795896 + +EPOCH: 182 +Loss at step 0: 0.03871675580739975 +Loss at step 50: 0.05230184271931648 +Loss at step 100: 0.04270555078983307 +Loss at step 150: 0.038462307304143906 +Loss at step 200: 0.04052406921982765 +Loss at step 250: 0.040313564240932465 +Loss at step 300: 0.04183410853147507 +Loss at step 350: 0.035924844443798065 +Loss at step 400: 0.056299399584531784 +Loss at step 450: 0.057996124029159546 +Loss at step 500: 0.05014818534255028 +Loss at step 550: 0.04200182482600212 +Loss at step 600: 0.038449592888355255 +Loss at step 650: 0.046056024730205536 +Loss at step 700: 0.0405617393553257 +Loss at step 750: 0.03934327885508537 +Loss at step 800: 0.0610644556581974 +Loss at step 850: 0.04535294324159622 +Loss at step 900: 0.033430129289627075 +Mean training loss after epoch 182: 0.04832562191018672 + +EPOCH: 183 +Loss at step 0: 0.036010876297950745 +Loss at step 50: 0.04224606975913048 +Loss at step 100: 0.03600187972187996 +Loss at step 150: 0.03781977295875549 +Loss at step 200: 0.059864919632673264 +Loss at step 250: 0.042338885366916656 +Loss at step 300: 0.056968189775943756 +Loss at step 350: 0.03725806623697281 +Loss at step 400: 0.042822204530239105 +Loss at step 450: 0.0370936244726181 +Loss at step 500: 0.039331045001745224 +Loss at step 550: 0.04539065435528755 +Loss at step 600: 0.03994951769709587 +Loss at step 650: 0.041090138256549835 +Loss at step 700: 0.046655185520648956 +Loss at step 750: 0.043534260243177414 +Loss at step 800: 0.04265182837843895 +Loss at step 850: 0.0385807529091835 +Loss at step 900: 0.037442490458488464 +Mean training loss after epoch 183: 0.047833521693531895 + +EPOCH: 184 +Loss at step 0: 0.04919074475765228 +Loss at step 50: 0.045014142990112305 +Loss at step 100: 0.045465223491191864 +Loss at step 150: 0.05177324637770653 +Loss at step 200: 0.04405376687645912 +Loss at step 250: 0.04481072723865509 +Loss at step 300: 0.06025009602308273 +Loss at step 350: 0.04140651971101761 +Loss at step 400: 0.06253649294376373 +Loss at step 450: 0.048434264957904816 +Loss at step 500: 0.05416630208492279 +Loss at step 550: 0.08864015340805054 +Loss at step 600: 0.04183298721909523 +Loss at step 650: 0.04423489421606064 +Loss at step 700: 0.048669204115867615 +Loss at step 750: 0.04699452966451645 +Loss at step 800: 0.05200556665658951 +Loss at step 850: 0.035744450986385345 +Loss at step 900: 0.06793856620788574 +Mean training loss after epoch 184: 0.04786371748282838 + +EPOCH: 185 +Loss at step 0: 0.05523157864809036 +Loss at step 50: 0.03955930098891258 +Loss at step 100: 0.039453573524951935 +Loss at step 150: 0.0442165732383728 +Loss at step 200: 0.04149215295910835 +Loss at step 250: 0.043978556990623474 +Loss at step 300: 0.04654141142964363 +Loss at step 350: 0.04084505885839462 +Loss at step 400: 0.04865162447094917 +Loss at step 450: 0.04154777526855469 +Loss at step 500: 0.04474378377199173 +Loss at step 550: 0.042184848338365555 +Loss at step 600: 0.054101720452308655 +Loss at step 650: 0.05183820798993111 +Loss at step 700: 0.04103800654411316 +Loss at step 750: 0.04112875089049339 +Loss at step 800: 0.038881879299879074 +Loss at step 850: 0.038138825446367264 +Loss at step 900: 0.047496359795331955 +Mean training loss after epoch 185: 0.048118152242622524 + +EPOCH: 186 +Loss at step 0: 0.03754881024360657 +Loss at step 50: 0.07332713901996613 +Loss at step 100: 0.038701653480529785 +Loss at step 150: 0.03860720619559288 +Loss at step 200: 0.04194875434041023 +Loss at step 250: 0.048693232238292694 +Loss at step 300: 0.044113025069236755 +Loss at step 350: 0.04328901693224907 +Loss at step 400: 0.061431173235177994 +Loss at step 450: 0.05555334687232971 +Loss at step 500: 0.038392748683691025 +Loss at step 550: 0.04468535631895065 +Loss at step 600: 0.04965001717209816 +Loss at step 650: 0.04109157249331474 +Loss at step 700: 0.06959868967533112 +Loss at step 750: 0.052380774170160294 +Loss at step 800: 0.04139813408255577 +Loss at step 850: 0.04160447046160698 +Loss at step 900: 0.04466529190540314 +Mean training loss after epoch 186: 0.047223423047662416 + +EPOCH: 187 +Loss at step 0: 0.039763227105140686 +Loss at step 50: 0.03643863648176193 +Loss at step 100: 0.05508458614349365 +Loss at step 150: 0.07337236404418945 +Loss at step 200: 0.05853257700800896 +Loss at step 250: 0.05075812712311745 +Loss at step 300: 0.03620835393667221 +Loss at step 350: 0.04197229444980621 +Loss at step 400: 0.06066759303212166 +Loss at step 450: 0.04113887622952461 +Loss at step 500: 0.04262823238968849 +Loss at step 550: 0.037297651171684265 +Loss at step 600: 0.046386927366256714 +Loss at step 650: 0.042545318603515625 +Loss at step 700: 0.04918098449707031 +Loss at step 750: 0.04709876328706741 +Loss at step 800: 0.04110825061798096 +Loss at step 850: 0.04179352894425392 +Loss at step 900: 0.036416348069906235 +Mean training loss after epoch 187: 0.04767845810126903 + +EPOCH: 188 +Loss at step 0: 0.048182617872953415 +Loss at step 50: 0.04360995814204216 +Loss at step 100: 0.04184760898351669 +Loss at step 150: 0.04823771119117737 +Loss at step 200: 0.04970955103635788 +Loss at step 250: 0.042916812002658844 +Loss at step 300: 0.043156836181879044 +Loss at step 350: 0.037737615406513214 +Loss at step 400: 0.04413697123527527 +Loss at step 450: 0.040749531239271164 +Loss at step 500: 0.03961421549320221 +Loss at step 550: 0.05225567892193794 +Loss at step 600: 0.0359407477080822 +Loss at step 650: 0.04625893384218216 +Loss at step 700: 0.05923420935869217 +Loss at step 750: 0.06793612986803055 +Loss at step 800: 0.04449528083205223 +Loss at step 850: 0.04622213914990425 +Loss at step 900: 0.04093151167035103 +Mean training loss after epoch 188: 0.047770752911088565 + +EPOCH: 189 +Loss at step 0: 0.04627120494842529 +Loss at step 50: 0.07442046701908112 +Loss at step 100: 0.04691905155777931 +Loss at step 150: 0.040141426026821136 +Loss at step 200: 0.08012638986110687 +Loss at step 250: 0.03636894002556801 +Loss at step 300: 0.042231544852256775 +Loss at step 350: 0.048193302005529404 +Loss at step 400: 0.057147249579429626 +Loss at step 450: 0.04204179719090462 +Loss at step 500: 0.057568732649087906 +Loss at step 550: 0.03927038609981537 +Loss at step 600: 0.06474674493074417 +Loss at step 650: 0.04288960248231888 +Loss at step 700: 0.05222388729453087 +Loss at step 750: 0.046212323009967804 +Loss at step 800: 0.03785299137234688 +Loss at step 850: 0.043860163539648056 +Loss at step 900: 0.038221947848796844 +Mean training loss after epoch 189: 0.04751784460885184 + +EPOCH: 190 +Loss at step 0: 0.05741728097200394 +Loss at step 50: 0.0408351793885231 +Loss at step 100: 0.04271361604332924 +Loss at step 150: 0.06173841655254364 +Loss at step 200: 0.046138834208250046 +Loss at step 250: 0.03720716014504433 +Loss at step 300: 0.04398762434720993 +Loss at step 350: 0.051786743104457855 +Loss at step 400: 0.057780444622039795 +Loss at step 450: 0.045605409890413284 +Loss at step 500: 0.06638097763061523 +Loss at step 550: 0.03936495631933212 +Loss at step 600: 0.041021354496479034 +Loss at step 650: 0.042361654341220856 +Loss at step 700: 0.03739864006638527 +Loss at step 750: 0.04387105628848076 +Loss at step 800: 0.04251284897327423 +Loss at step 850: 0.060001153498888016 +Loss at step 900: 0.05815451964735985 +Mean training loss after epoch 190: 0.04705080561943527 + +EPOCH: 191 +Loss at step 0: 0.03420727699995041 +Loss at step 50: 0.04255710169672966 +Loss at step 100: 0.04586543142795563 +Loss at step 150: 0.03845306113362312 +Loss at step 200: 0.04510979726910591 +Loss at step 250: 0.045417420566082 +Loss at step 300: 0.05335865169763565 +Loss at step 350: 0.03454035520553589 +Loss at step 400: 0.048875611275434494 +Loss at step 450: 0.046793606132268906 +Loss at step 500: 0.04844740033149719 +Loss at step 550: 0.04104892536997795 +Loss at step 600: 0.03835626691579819 +Loss at step 650: 0.0478467158973217 +Loss at step 700: 0.045346345752477646 +Loss at step 750: 0.03927779570221901 +Loss at step 800: 0.042425572872161865 +Loss at step 850: 0.04071808233857155 +Loss at step 900: 0.06463237851858139 +Mean training loss after epoch 191: 0.04754166946863569 + +EPOCH: 192 +Loss at step 0: 0.0414058119058609 +Loss at step 50: 0.03874916210770607 +Loss at step 100: 0.05025690421462059 +Loss at step 150: 0.04372229799628258 +Loss at step 200: 0.07018507272005081 +Loss at step 250: 0.046107593923807144 +Loss at step 300: 0.05362960323691368 +Loss at step 350: 0.042961690574884415 +Loss at step 400: 0.048974912613630295 +Loss at step 450: 0.04408952221274376 +Loss at step 500: 0.04607249051332474 +Loss at step 550: 0.03952783718705177 +Loss at step 600: 0.03549397736787796 +Loss at step 650: 0.0400603748857975 +Loss at step 700: 0.04696718603372574 +Loss at step 750: 0.04149600863456726 +Loss at step 800: 0.036627303808927536 +Loss at step 850: 0.054502200335264206 +Loss at step 900: 0.04651680588722229 +Mean training loss after epoch 192: 0.04756815710079187 + +EPOCH: 193 +Loss at step 0: 0.041548047214746475 +Loss at step 50: 0.043952494859695435 +Loss at step 100: 0.039594512432813644 +Loss at step 150: 0.056488435715436935 +Loss at step 200: 0.043292734771966934 +Loss at step 250: 0.04938404634594917 +Loss at step 300: 0.046673402190208435 +Loss at step 350: 0.040909361094236374 +Loss at step 400: 0.04096968099474907 +Loss at step 450: 0.05209429934620857 +Loss at step 500: 0.04939398914575577 +Loss at step 550: 0.06082567572593689 +Loss at step 600: 0.06392515450716019 +Loss at step 650: 0.045031577348709106 +Loss at step 700: 0.0657660961151123 +Loss at step 750: 0.039967190474271774 +Loss at step 800: 0.05111250653862953 +Loss at step 850: 0.04423690214753151 +Loss at step 900: 0.06263870745897293 +Mean training loss after epoch 193: 0.04807765843835213 + +EPOCH: 194 +Loss at step 0: 0.0478762686252594 +Loss at step 50: 0.03860266134142876 +Loss at step 100: 0.05493359640240669 +Loss at step 150: 0.044733427464962006 +Loss at step 200: 0.059110164642333984 +Loss at step 250: 0.05802267789840698 +Loss at step 300: 0.049866076558828354 +Loss at step 350: 0.04751328006386757 +Loss at step 400: 0.05284408852458 +Loss at step 450: 0.04603955149650574 +Loss at step 500: 0.05355359986424446 +Loss at step 550: 0.04578927904367447 +Loss at step 600: 0.044791433960199356 +Loss at step 650: 0.04587395861744881 +Loss at step 700: 0.03297532722353935 +Loss at step 750: 0.0493096224963665 +Loss at step 800: 0.037354834377765656 +Loss at step 850: 0.044524941593408585 +Loss at step 900: 0.03973622992634773 +Mean training loss after epoch 194: 0.047645290682056564 + +EPOCH: 195 +Loss at step 0: 0.03826490417122841 +Loss at step 50: 0.04147506505250931 +Loss at step 100: 0.0383659228682518 +Loss at step 150: 0.043682750314474106 +Loss at step 200: 0.04631073400378227 +Loss at step 250: 0.04294190928339958 +Loss at step 300: 0.05628957599401474 +Loss at step 350: 0.04553340747952461 +Loss at step 400: 0.04653447866439819 +Loss at step 450: 0.049798622727394104 +Loss at step 500: 0.04200226813554764 +Loss at step 550: 0.0341331847012043 +Loss at step 600: 0.04317701980471611 +Loss at step 650: 0.048169977962970734 +Loss at step 700: 0.03558465465903282 +Loss at step 750: 0.04354158788919449 +Loss at step 800: 0.04243353754281998 +Loss at step 850: 0.04070356488227844 +Loss at step 900: 0.05452003329992294 +Mean training loss after epoch 195: 0.047155337900654085 + +EPOCH: 196 +Loss at step 0: 0.06160542741417885 +Loss at step 50: 0.05587358772754669 +Loss at step 100: 0.04359889030456543 +Loss at step 150: 0.04628841578960419 +Loss at step 200: 0.03491334617137909 +Loss at step 250: 0.0406942293047905 +Loss at step 300: 0.03971292823553085 +Loss at step 350: 0.04201624169945717 +Loss at step 400: 0.03683924674987793 +Loss at step 450: 0.03664592653512955 +Loss at step 500: 0.05013517290353775 +Loss at step 550: 0.03480082377791405 +Loss at step 600: 0.06040562316775322 +Loss at step 650: 0.04107019677758217 +Loss at step 700: 0.04015062004327774 +Loss at step 750: 0.041376277804374695 +Loss at step 800: 0.039271436631679535 +Loss at step 850: 0.04940023645758629 +Loss at step 900: 0.060004059225320816 +Mean training loss after epoch 196: 0.04772178263171141 + +EPOCH: 197 +Loss at step 0: 0.04268595948815346 +Loss at step 50: 0.046157389879226685 +Loss at step 100: 0.035638563334941864 +Loss at step 150: 0.043725695461034775 +Loss at step 200: 0.04692140221595764 +Loss at step 250: 0.043637800961732864 +Loss at step 300: 0.038067035377025604 +Loss at step 350: 0.046212151646614075 +Loss at step 400: 0.04152873158454895 +Loss at step 450: 0.0443502813577652 +Loss at step 500: 0.039653100073337555 +Loss at step 550: 0.04365496337413788 +Loss at step 600: 0.060731545090675354 +Loss at step 650: 0.04539009928703308 +Loss at step 700: 0.043849341571331024 +Loss at step 750: 0.05376070737838745 +Loss at step 800: 0.05751893296837807 +Loss at step 850: 0.03861932456493378 +Loss at step 900: 0.04247673973441124 +Mean training loss after epoch 197: 0.04677098898935928 + +EPOCH: 198 +Loss at step 0: 0.036840375512838364 +Loss at step 50: 0.04715429246425629 +Loss at step 100: 0.047932133078575134 +Loss at step 150: 0.05844486877322197 +Loss at step 200: 0.04241935536265373 +Loss at step 250: 0.04418175294995308 +Loss at step 300: 0.04179241135716438 +Loss at step 350: 0.04610389098525047 +Loss at step 400: 0.04456351697444916 +Loss at step 450: 0.04448409751057625 +Loss at step 500: 0.04764093458652496 +Loss at step 550: 0.04509415850043297 +Loss at step 600: 0.039800357073545456 +Loss at step 650: 0.04029176011681557 +Loss at step 700: 0.04482760280370712 +Loss at step 750: 0.03845113888382912 +Loss at step 800: 0.04769859462976456 +Loss at step 850: 0.03872300684452057 +Loss at step 900: 0.05363779515028 +Mean training loss after epoch 198: 0.04708673674930959 + +EPOCH: 199 +Loss at step 0: 0.05655549466609955 +Loss at step 50: 0.048789456486701965 +Loss at step 100: 0.03765055909752846 +Loss at step 150: 0.07644180208444595 +Loss at step 200: 0.05269927158951759 +Loss at step 250: 0.05317987501621246 +Loss at step 300: 0.041730593889951706 +Loss at step 350: 0.045045170933008194 +Loss at step 400: 0.05730314552783966 +Loss at step 450: 0.07255282253026962 +Loss at step 500: 0.0479620136320591 +Loss at step 550: 0.040809620171785355 +Loss at step 600: 0.04493159428238869 +Loss at step 650: 0.04195355251431465 +Loss at step 700: 0.052201781421899796 +Loss at step 750: 0.04445454478263855 +Loss at step 800: 0.051893822848796844 +Loss at step 850: 0.03980604559183121 +Loss at step 900: 0.04029034823179245 +Mean training loss after epoch 199: 0.04745917953153663 + +EPOCH: 200 +Loss at step 0: 0.04165381193161011 +Loss at step 50: 0.04718153551220894 +Loss at step 100: 0.0432785227894783 +Loss at step 150: 0.04036586359143257 +Loss at step 200: 0.04541498050093651 +Loss at step 250: 0.04272327572107315 +Loss at step 300: 0.044632911682128906 +Loss at step 350: 0.05616108700633049 +Loss at step 400: 0.038284145295619965 +Loss at step 450: 0.044899724423885345 +Loss at step 500: 0.044379718601703644 +Loss at step 550: 0.036458030343055725 +Loss at step 600: 0.044261299073696136 +Loss at step 650: 0.04642018303275108 +Loss at step 700: 0.037503812462091446 +Loss at step 750: 0.04117279127240181 +Loss at step 800: 0.05372374877333641 +Loss at step 850: 0.036662474274635315 +Loss at step 900: 0.04429645836353302 +Mean training loss after epoch 200: 0.04756052539086164 + +EPOCH: 201 +Loss at step 0: 0.03706429526209831 +Loss at step 50: 0.04278825223445892 +Loss at step 100: 0.046013131737709045 +Loss at step 150: 0.05672388896346092 +Loss at step 200: 0.05708850547671318 +Loss at step 250: 0.061364877969026566 +Loss at step 300: 0.04516593739390373 +Loss at step 350: 0.04070292040705681 +Loss at step 400: 0.04406823217868805 +Loss at step 450: 0.03722516819834709 +Loss at step 500: 0.0718354657292366 +Loss at step 550: 0.04639776423573494 +Loss at step 600: 0.05333671718835831 +Loss at step 650: 0.04893007129430771 +Loss at step 700: 0.05592630058526993 +Loss at step 750: 0.06873811036348343 +Loss at step 800: 0.03710801154375076 +Loss at step 850: 0.042208123952150345 +Loss at step 900: 0.04473268240690231 +Mean training loss after epoch 201: 0.04686431107180777 + +EPOCH: 202 +Loss at step 0: 0.041791778057813644 +Loss at step 50: 0.04284992069005966 +Loss at step 100: 0.04192139953374863 +Loss at step 150: 0.04662506282329559 +Loss at step 200: 0.04492378979921341 +Loss at step 250: 0.04429016262292862 +Loss at step 300: 0.04375737905502319 +Loss at step 350: 0.04445771127939224 +Loss at step 400: 0.04503568634390831 +Loss at step 450: 0.04155305027961731 +Loss at step 500: 0.04711078852415085 +Loss at step 550: 0.039482831954956055 +Loss at step 600: 0.05751878395676613 +Loss at step 650: 0.061351265758275986 +Loss at step 700: 0.03946603089570999 +Loss at step 750: 0.04432506486773491 +Loss at step 800: 0.056940797716379166 +Loss at step 850: 0.055885810405015945 +Loss at step 900: 0.04237936809659004 +Mean training loss after epoch 202: 0.04772862195492045 + +EPOCH: 203 +Loss at step 0: 0.04773397743701935 +Loss at step 50: 0.04327407479286194 +Loss at step 100: 0.04508579894900322 +Loss at step 150: 0.043461427092552185 +Loss at step 200: 0.05130351707339287 +Loss at step 250: 0.04438156262040138 +Loss at step 300: 0.04453274607658386 +Loss at step 350: 0.042909543961286545 +Loss at step 400: 0.06393526494503021 +Loss at step 450: 0.04609208181500435 +Loss at step 500: 0.03904154151678085 +Loss at step 550: 0.0392855666577816 +Loss at step 600: 0.04210763797163963 +Loss at step 650: 0.03892316669225693 +Loss at step 700: 0.05992093309760094 +Loss at step 750: 0.0550675243139267 +Loss at step 800: 0.06265009194612503 +Loss at step 850: 0.06310317665338516 +Loss at step 900: 0.054041001945734024 +Mean training loss after epoch 203: 0.04766706726166295 + +EPOCH: 204 +Loss at step 0: 0.04879172518849373 +Loss at step 50: 0.054548487067222595 +Loss at step 100: 0.04368896782398224 +Loss at step 150: 0.04377016797661781 +Loss at step 200: 0.04682786390185356 +Loss at step 250: 0.04571085423231125 +Loss at step 300: 0.03425014764070511 +Loss at step 350: 0.04309099540114403 +Loss at step 400: 0.03687101975083351 +Loss at step 450: 0.03914305940270424 +Loss at step 500: 0.07001496851444244 +Loss at step 550: 0.05368474870920181 +Loss at step 600: 0.045788541436195374 +Loss at step 650: 0.04695022106170654 +Loss at step 700: 0.04823220521211624 +Loss at step 750: 0.0431053563952446 +Loss at step 800: 0.040326520800590515 +Loss at step 850: 0.04794798046350479 +Loss at step 900: 0.06846459954977036 +Mean training loss after epoch 204: 0.04748466620241592 + +EPOCH: 205 +Loss at step 0: 0.038869936019182205 +Loss at step 50: 0.0483323410153389 +Loss at step 100: 0.03647097200155258 +Loss at step 150: 0.07116826623678207 +Loss at step 200: 0.04692705348134041 +Loss at step 250: 0.03802592307329178 +Loss at step 300: 0.0458889938890934 +Loss at step 350: 0.046889692544937134 +Loss at step 400: 0.054742179811000824 +Loss at step 450: 0.03963998332619667 +Loss at step 500: 0.044107746332883835 +Loss at step 550: 0.048281230032444 +Loss at step 600: 0.053254593163728714 +Loss at step 650: 0.03416450321674347 +Loss at step 700: 0.07941679656505585 +Loss at step 750: 0.053226251155138016 +Loss at step 800: 0.04940042272210121 +Loss at step 850: 0.04608737677335739 +Loss at step 900: 0.05112919583916664 +Mean training loss after epoch 205: 0.04787254124593887 + +EPOCH: 206 +Loss at step 0: 0.04056898131966591 +Loss at step 50: 0.04409470781683922 +Loss at step 100: 0.052106477320194244 +Loss at step 150: 0.05386366695165634 +Loss at step 200: 0.04252477362751961 +Loss at step 250: 0.042529188096523285 +Loss at step 300: 0.04070581495761871 +Loss at step 350: 0.04777435213327408 +Loss at step 400: 0.04477166011929512 +Loss at step 450: 0.0518009327352047 +Loss at step 500: 0.04954981803894043 +Loss at step 550: 0.044055819511413574 +Loss at step 600: 0.04137140139937401 +Loss at step 650: 0.046945735812187195 +Loss at step 700: 0.05715560168027878 +Loss at step 750: 0.05292150750756264 +Loss at step 800: 0.051926590502262115 +Loss at step 850: 0.05012289434671402 +Loss at step 900: 0.04178960993885994 +Mean training loss after epoch 206: 0.04723712336470578 + +EPOCH: 207 +Loss at step 0: 0.039465125650167465 +Loss at step 50: 0.04640473052859306 +Loss at step 100: 0.045263804495334625 +Loss at step 150: 0.044337090104818344 +Loss at step 200: 0.05436590313911438 +Loss at step 250: 0.0401122085750103 +Loss at step 300: 0.03829149901866913 +Loss at step 350: 0.035889968276023865 +Loss at step 400: 0.039059098809957504 +Loss at step 450: 0.04057930037379265 +Loss at step 500: 0.047848891466856 +Loss at step 550: 0.04386613890528679 +Loss at step 600: 0.050865452736616135 +Loss at step 650: 0.04878632724285126 +Loss at step 700: 0.05158521607518196 +Loss at step 750: 0.04516533017158508 +Loss at step 800: 0.05034850537776947 +Loss at step 850: 0.04253244400024414 +Loss at step 900: 0.04081761837005615 +Mean training loss after epoch 207: 0.04768012097474736 + +EPOCH: 208 +Loss at step 0: 0.05140314996242523 +Loss at step 50: 0.04839057847857475 +Loss at step 100: 0.03876051679253578 +Loss at step 150: 0.043330371379852295 +Loss at step 200: 0.05247886851429939 +Loss at step 250: 0.0504843071103096 +Loss at step 300: 0.05746863782405853 +Loss at step 350: 0.04373469576239586 +Loss at step 400: 0.046105559915304184 +Loss at step 450: 0.046895015984773636 +Loss at step 500: 0.04299665987491608 +Loss at step 550: 0.059203166514635086 +Loss at step 600: 0.043251022696495056 +Loss at step 650: 0.06101146712899208 +Loss at step 700: 0.06282281130552292 +Loss at step 750: 0.046965207904577255 +Loss at step 800: 0.04413428530097008 +Loss at step 850: 0.03447035700082779 +Loss at step 900: 0.046553969383239746 +Mean training loss after epoch 208: 0.04761110454686542 + +EPOCH: 209 +Loss at step 0: 0.03972448781132698 +Loss at step 50: 0.03806695714592934 +Loss at step 100: 0.047880638390779495 +Loss at step 150: 0.0413251630961895 +Loss at step 200: 0.0397164560854435 +Loss at step 250: 0.04009636864066124 +Loss at step 300: 0.052486807107925415 +Loss at step 350: 0.04533063992857933 +Loss at step 400: 0.060858145356178284 +Loss at step 450: 0.04786223545670509 +Loss at step 500: 0.03934984654188156 +Loss at step 550: 0.04608427733182907 +Loss at step 600: 0.03994673117995262 +Loss at step 650: 0.05109752342104912 +Loss at step 700: 0.040626563131809235 +Loss at step 750: 0.042479824274778366 +Loss at step 800: 0.04133524000644684 +Loss at step 850: 0.04531863331794739 +Loss at step 900: 0.040771231055259705 +Mean training loss after epoch 209: 0.047952822776936264 + +EPOCH: 210 +Loss at step 0: 0.04377039149403572 +Loss at step 50: 0.04541727527976036 +Loss at step 100: 0.03588651493191719 +Loss at step 150: 0.05156930908560753 +Loss at step 200: 0.043680135160684586 +Loss at step 250: 0.053486987948417664 +Loss at step 300: 0.044207800179719925 +Loss at step 350: 0.04613008722662926 +Loss at step 400: 0.0558336079120636 +Loss at step 450: 0.042584385722875595 +Loss at step 500: 0.059597209095954895 +Loss at step 550: 0.056688111275434494 +Loss at step 600: 0.05070928484201431 +Loss at step 650: 0.04619111865758896 +Loss at step 700: 0.07332200556993484 +Loss at step 750: 0.04834013432264328 +Loss at step 800: 0.038321759551763535 +Loss at step 850: 0.05740134045481682 +Loss at step 900: 0.0568084754049778 +Mean training loss after epoch 210: 0.04771014568068262 + +EPOCH: 211 +Loss at step 0: 0.05112064629793167 +Loss at step 50: 0.05299792066216469 +Loss at step 100: 0.08274813741445541 +Loss at step 150: 0.04609530046582222 +Loss at step 200: 0.0453093983232975 +Loss at step 250: 0.041503746062517166 +Loss at step 300: 0.04886996001005173 +Loss at step 350: 0.04115530848503113 +Loss at step 400: 0.04516785219311714 +Loss at step 450: 0.05548004060983658 +Loss at step 500: 0.04185987263917923 +Loss at step 550: 0.05570684000849724 +Loss at step 600: 0.038409288972616196 +Loss at step 650: 0.03741353005170822 +Loss at step 700: 0.04396037012338638 +Loss at step 750: 0.048935666680336 +Loss at step 800: 0.03652661293745041 +Loss at step 850: 0.04117825999855995 +Loss at step 900: 0.03989038243889809 +Mean training loss after epoch 211: 0.04716580521577457 + +EPOCH: 212 +Loss at step 0: 0.04275142773985863 +Loss at step 50: 0.07302436977624893 +Loss at step 100: 0.04180949553847313 +Loss at step 150: 0.050561610609292984 +Loss at step 200: 0.053041815757751465 +Loss at step 250: 0.04191895201802254 +Loss at step 300: 0.036568038165569305 +Loss at step 350: 0.05690322071313858 +Loss at step 400: 0.05871741473674774 +Loss at step 450: 0.04028346389532089 +Loss at step 500: 0.0477386973798275 +Loss at step 550: 0.047106653451919556 +Loss at step 600: 0.04557666927576065 +Loss at step 650: 0.0409613773226738 +Loss at step 700: 0.0591888427734375 +Loss at step 750: 0.04181051254272461 +Loss at step 800: 0.052574288100004196 +Loss at step 850: 0.057426102459430695 +Loss at step 900: 0.04389532282948494 +Mean training loss after epoch 212: 0.047675337505413654 + +EPOCH: 213 +Loss at step 0: 0.045751966536045074 +Loss at step 50: 0.06599219143390656 +Loss at step 100: 0.056379590183496475 +Loss at step 150: 0.05545878782868385 +Loss at step 200: 0.04602925106883049 +Loss at step 250: 0.045319922268390656 +Loss at step 300: 0.04198266938328743 +Loss at step 350: 0.05146772786974907 +Loss at step 400: 0.04212498664855957 +Loss at step 450: 0.041515596210956573 +Loss at step 500: 0.05858281999826431 +Loss at step 550: 0.0436418354511261 +Loss at step 600: 0.0538911409676075 +Loss at step 650: 0.045281633734703064 +Loss at step 700: 0.040368396788835526 +Loss at step 750: 0.0417974516749382 +Loss at step 800: 0.06293785572052002 +Loss at step 850: 0.054669834673404694 +Loss at step 900: 0.053220782428979874 +Mean training loss after epoch 213: 0.04730810587610136 + +EPOCH: 214 +Loss at step 0: 0.05505721643567085 +Loss at step 50: 0.04407787322998047 +Loss at step 100: 0.04359830543398857 +Loss at step 150: 0.043503452092409134 +Loss at step 200: 0.051094286143779755 +Loss at step 250: 0.0470748096704483 +Loss at step 300: 0.04078816622495651 +Loss at step 350: 0.050563130527734756 +Loss at step 400: 0.06379721313714981 +Loss at step 450: 0.0460142157971859 +Loss at step 500: 0.042432211339473724 +Loss at step 550: 0.052870120853185654 +Loss at step 600: 0.04530122131109238 +Loss at step 650: 0.04430260881781578 +Loss at step 700: 0.04900128021836281 +Loss at step 750: 0.04145069792866707 +Loss at step 800: 0.041504740715026855 +Loss at step 850: 0.04399711266160011 +Loss at step 900: 0.050034299492836 +Mean training loss after epoch 214: 0.04735010903654322 + +EPOCH: 215 +Loss at step 0: 0.035147394984960556 +Loss at step 50: 0.07092860341072083 +Loss at step 100: 0.047119829803705215 +Loss at step 150: 0.044086702167987823 +Loss at step 200: 0.04092658311128616 +Loss at step 250: 0.05814220756292343 +Loss at step 300: 0.05727098882198334 +Loss at step 350: 0.039931993931531906 +Loss at step 400: 0.05919593572616577 +Loss at step 450: 0.04752025753259659 +Loss at step 500: 0.06127823516726494 +Loss at step 550: 0.041031986474990845 +Loss at step 600: 0.056928861886262894 +Loss at step 650: 0.037312570959329605 +Loss at step 700: 0.06454388052225113 +Loss at step 750: 0.04356052353978157 +Loss at step 800: 0.04385578632354736 +Loss at step 850: 0.04536022990942001 +Loss at step 900: 0.0454561747610569 +Mean training loss after epoch 215: 0.04758852161864228 + +EPOCH: 216 +Loss at step 0: 0.052799828350543976 +Loss at step 50: 0.04092658311128616 +Loss at step 100: 0.039658449590206146 +Loss at step 150: 0.06032325327396393 +Loss at step 200: 0.042359016835689545 +Loss at step 250: 0.07213549315929413 +Loss at step 300: 0.04846827685832977 +Loss at step 350: 0.04231898486614227 +Loss at step 400: 0.043453361839056015 +Loss at step 450: 0.05299398675560951 +Loss at step 500: 0.03673096001148224 +Loss at step 550: 0.06061755120754242 +Loss at step 600: 0.060114070773124695 +Loss at step 650: 0.05758287385106087 +Loss at step 700: 0.05680226534605026 +Loss at step 750: 0.040130745619535446 +Loss at step 800: 0.03982475399971008 +Loss at step 850: 0.04455851390957832 +Loss at step 900: 0.04347148537635803 +Mean training loss after epoch 216: 0.04698104819239202 + +EPOCH: 217 +Loss at step 0: 0.038384810090065 +Loss at step 50: 0.0590120330452919 +Loss at step 100: 0.03498348966240883 +Loss at step 150: 0.044721007347106934 +Loss at step 200: 0.05726437643170357 +Loss at step 250: 0.06335853785276413 +Loss at step 300: 0.052697841078042984 +Loss at step 350: 0.04317633435130119 +Loss at step 400: 0.03703523054718971 +Loss at step 450: 0.03691527619957924 +Loss at step 500: 0.04270530119538307 +Loss at step 550: 0.03907637298107147 +Loss at step 600: 0.07956478744745255 +Loss at step 650: 0.06561874598264694 +Loss at step 700: 0.04116937890648842 +Loss at step 750: 0.04935334622859955 +Loss at step 800: 0.044599100947380066 +Loss at step 850: 0.08549793064594269 +Loss at step 900: 0.06955025345087051 +Mean training loss after epoch 217: 0.04741869410122636 + +EPOCH: 218 +Loss at step 0: 0.06665004044771194 +Loss at step 50: 0.04330757260322571 +Loss at step 100: 0.0472387857735157 +Loss at step 150: 0.038241881877183914 +Loss at step 200: 0.03820139542222023 +Loss at step 250: 0.05501837655901909 +Loss at step 300: 0.04767284169793129 +Loss at step 350: 0.04072294011712074 +Loss at step 400: 0.034375522285699844 +Loss at step 450: 0.04513421654701233 +Loss at step 500: 0.03796324506402016 +Loss at step 550: 0.07669896632432938 +Loss at step 600: 0.03900623694062233 +Loss at step 650: 0.040642306208610535 +Loss at step 700: 0.040662724524736404 +Loss at step 750: 0.04617395997047424 +Loss at step 800: 0.04423138499259949 +Loss at step 850: 0.04463059827685356 +Loss at step 900: 0.04386137053370476 +Mean training loss after epoch 218: 0.047484128170017244 + +EPOCH: 219 +Loss at step 0: 0.041837047785520554 +Loss at step 50: 0.04610703885555267 +Loss at step 100: 0.039767805486917496 +Loss at step 150: 0.0410487987101078 +Loss at step 200: 0.05247655138373375 +Loss at step 250: 0.03966361656785011 +Loss at step 300: 0.03629960119724274 +Loss at step 350: 0.0468531996011734 +Loss at step 400: 0.05286935716867447 +Loss at step 450: 0.04778807982802391 +Loss at step 500: 0.05883286893367767 +Loss at step 550: 0.04666287451982498 +Loss at step 600: 0.044475652277469635 +Loss at step 650: 0.05771327763795853 +Loss at step 700: 0.06397523730993271 +Loss at step 750: 0.04072614014148712 +Loss at step 800: 0.039477065205574036 +Loss at step 850: 0.051850784569978714 +Loss at step 900: 0.0376930758357048 +Mean training loss after epoch 219: 0.047274095429254495 + +EPOCH: 220 +Loss at step 0: 0.04366578906774521 +Loss at step 50: 0.03873101994395256 +Loss at step 100: 0.03911314159631729 +Loss at step 150: 0.04766039922833443 +Loss at step 200: 0.05401189997792244 +Loss at step 250: 0.07961780577898026 +Loss at step 300: 0.04614045470952988 +Loss at step 350: 0.03977677598595619 +Loss at step 400: 0.054996393620967865 +Loss at step 450: 0.05609653890132904 +Loss at step 500: 0.05580352246761322 +Loss at step 550: 0.04408884420990944 +Loss at step 600: 0.038714561611413956 +Loss at step 650: 0.051280710846185684 +Loss at step 700: 0.05853607505559921 +Loss at step 750: 0.04544028267264366 +Loss at step 800: 0.041277285665273666 +Loss at step 850: 0.03493333235383034 +Loss at step 900: 0.04718414694070816 +Mean training loss after epoch 220: 0.04716123327978257 + +EPOCH: 221 +Loss at step 0: 0.04863929748535156 +Loss at step 50: 0.0390445701777935 +Loss at step 100: 0.04034244269132614 +Loss at step 150: 0.03342617675662041 +Loss at step 200: 0.03680483624339104 +Loss at step 250: 0.05681845545768738 +Loss at step 300: 0.03859260305762291 +Loss at step 350: 0.058830276131629944 +Loss at step 400: 0.04245946928858757 +Loss at step 450: 0.04263346269726753 +Loss at step 500: 0.04099033400416374 +Loss at step 550: 0.04028167575597763 +Loss at step 600: 0.037266429513692856 +Loss at step 650: 0.04039876535534859 +Loss at step 700: 0.05658671259880066 +Loss at step 750: 0.07674726843833923 +Loss at step 800: 0.059442825615406036 +Loss at step 850: 0.039951808750629425 +Loss at step 900: 0.04243859276175499 +Mean training loss after epoch 221: 0.04774948721453706 + +EPOCH: 222 +Loss at step 0: 0.053153473883867264 +Loss at step 50: 0.05122070759534836 +Loss at step 100: 0.039500556886196136 +Loss at step 150: 0.041711028665304184 +Loss at step 200: 0.04218527302145958 +Loss at step 250: 0.0447305403649807 +Loss at step 300: 0.04137835279107094 +Loss at step 350: 0.05255337432026863 +Loss at step 400: 0.0459631010890007 +Loss at step 450: 0.04235805571079254 +Loss at step 500: 0.05904815346002579 +Loss at step 550: 0.058946140110492706 +Loss at step 600: 0.047744330018758774 +Loss at step 650: 0.05897083878517151 +Loss at step 700: 0.045228008180856705 +Loss at step 750: 0.07652023434638977 +Loss at step 800: 0.06658067554235458 +Loss at step 850: 0.044594231992959976 +Loss at step 900: 0.04255520552396774 +Mean training loss after epoch 222: 0.04744304697523748 + +EPOCH: 223 +Loss at step 0: 0.047031596302986145 +Loss at step 50: 0.04045679047703743 +Loss at step 100: 0.04767059534788132 +Loss at step 150: 0.03950158879160881 +Loss at step 200: 0.04135603830218315 +Loss at step 250: 0.0434175580739975 +Loss at step 300: 0.05399486795067787 +Loss at step 350: 0.06458107382059097 +Loss at step 400: 0.044949959963560104 +Loss at step 450: 0.05583903193473816 +Loss at step 500: 0.06282207369804382 +Loss at step 550: 0.06257403641939163 +Loss at step 600: 0.03512616828083992 +Loss at step 650: 0.042726799845695496 +Loss at step 700: 0.053434744477272034 +Loss at step 750: 0.042995236814022064 +Loss at step 800: 0.05334443226456642 +Loss at step 850: 0.04633363336324692 +Loss at step 900: 0.04433233663439751 +Mean training loss after epoch 223: 0.047436836133919545 + +EPOCH: 224 +Loss at step 0: 0.04007522389292717 +Loss at step 50: 0.059121374040842056 +Loss at step 100: 0.04390476271510124 +Loss at step 150: 0.048143550753593445 +Loss at step 200: 0.055707111954689026 +Loss at step 250: 0.04048534110188484 +Loss at step 300: 0.0380890816450119 +Loss at step 350: 0.06077149510383606 +Loss at step 400: 0.04606717452406883 +Loss at step 450: 0.05691640079021454 +Loss at step 500: 0.038611020892858505 +Loss at step 550: 0.03844698518514633 +Loss at step 600: 0.04319305717945099 +Loss at step 650: 0.0398542694747448 +Loss at step 700: 0.03613333776593208 +Loss at step 750: 0.03880419582128525 +Loss at step 800: 0.05817781016230583 +Loss at step 850: 0.04662526398897171 +Loss at step 900: 0.039097584784030914 +Mean training loss after epoch 224: 0.04707898342930305 + +EPOCH: 225 +Loss at step 0: 0.04353899881243706 +Loss at step 50: 0.050924018025398254 +Loss at step 100: 0.0431477427482605 +Loss at step 150: 0.05140654742717743 +Loss at step 200: 0.06268211454153061 +Loss at step 250: 0.05623786523938179 +Loss at step 300: 0.0463687926530838 +Loss at step 350: 0.039990004152059555 +Loss at step 400: 0.03682544082403183 +Loss at step 450: 0.04200984910130501 +Loss at step 500: 0.04898510128259659 +Loss at step 550: 0.043444860726594925 +Loss at step 600: 0.040474530309438705 +Loss at step 650: 0.048006415367126465 +Loss at step 700: 0.04029586538672447 +Loss at step 750: 0.04451839253306389 +Loss at step 800: 0.051514215767383575 +Loss at step 850: 0.04700411111116409 +Loss at step 900: 0.058472760021686554 +Mean training loss after epoch 225: 0.047064537603431926 + +EPOCH: 226 +Loss at step 0: 0.04394407197833061 +Loss at step 50: 0.05346597731113434 +Loss at step 100: 0.05548607558012009 +Loss at step 150: 0.055829621851444244 +Loss at step 200: 0.0653204694390297 +Loss at step 250: 0.048385318368673325 +Loss at step 300: 0.03922620415687561 +Loss at step 350: 0.04363767430186272 +Loss at step 400: 0.039703067392110825 +Loss at step 450: 0.041631124913692474 +Loss at step 500: 0.04573330655694008 +Loss at step 550: 0.040640123188495636 +Loss at step 600: 0.07323020696640015 +Loss at step 650: 0.03348701447248459 +Loss at step 700: 0.05324455723166466 +Loss at step 750: 0.043708935379981995 +Loss at step 800: 0.05747424066066742 +Loss at step 850: 0.03639143332839012 +Loss at step 900: 0.0512024462223053 +Mean training loss after epoch 226: 0.04675335999030167 + +EPOCH: 227 +Loss at step 0: 0.048987869173288345 +Loss at step 50: 0.035255033522844315 +Loss at step 100: 0.04800346493721008 +Loss at step 150: 0.04644560068845749 +Loss at step 200: 0.05829877033829689 +Loss at step 250: 0.047439973801374435 +Loss at step 300: 0.04464838281273842 +Loss at step 350: 0.054507020860910416 +Loss at step 400: 0.04488610476255417 +Loss at step 450: 0.03926553577184677 +Loss at step 500: 0.04310767352581024 +Loss at step 550: 0.06332356482744217 +Loss at step 600: 0.05981716513633728 +Loss at step 650: 0.0617796815931797 +Loss at step 700: 0.06153799965977669 +Loss at step 750: 0.04407687112689018 +Loss at step 800: 0.040865469723939896 +Loss at step 850: 0.04663880914449692 +Loss at step 900: 0.06127316877245903 +Mean training loss after epoch 227: 0.047458961268446086 + +EPOCH: 228 +Loss at step 0: 0.07598868757486343 +Loss at step 50: 0.04697668179869652 +Loss at step 100: 0.04006880149245262 +Loss at step 150: 0.036018721759319305 +Loss at step 200: 0.061186742037534714 +Loss at step 250: 0.04012208431959152 +Loss at step 300: 0.04649828001856804 +Loss at step 350: 0.045614294707775116 +Loss at step 400: 0.03645792976021767 +Loss at step 450: 0.04029053449630737 +Loss at step 500: 0.041784532368183136 +Loss at step 550: 0.04227086901664734 +Loss at step 600: 0.06699317693710327 +Loss at step 650: 0.04080168902873993 +Loss at step 700: 0.05825301632285118 +Loss at step 750: 0.043675485998392105 +Loss at step 800: 0.0551060251891613 +Loss at step 850: 0.05499584600329399 +Loss at step 900: 0.04213486239314079 +Mean training loss after epoch 228: 0.0466448109644626 + +EPOCH: 229 +Loss at step 0: 0.053296320140361786 +Loss at step 50: 0.034150946885347366 +Loss at step 100: 0.04021020606160164 +Loss at step 150: 0.05806738883256912 +Loss at step 200: 0.04106394946575165 +Loss at step 250: 0.03447636961936951 +Loss at step 300: 0.045713938772678375 +Loss at step 350: 0.0461614653468132 +Loss at step 400: 0.04305008426308632 +Loss at step 450: 0.04936772957444191 +Loss at step 500: 0.06266023218631744 +Loss at step 550: 0.04445520043373108 +Loss at step 600: 0.04292086139321327 +Loss at step 650: 0.04346507787704468 +Loss at step 700: 0.039561931043863297 +Loss at step 750: 0.038913287222385406 +Loss at step 800: 0.03614294156432152 +Loss at step 850: 0.052126191556453705 +Loss at step 900: 0.08678729087114334 +Mean training loss after epoch 229: 0.04697026088357226 + +EPOCH: 230 +Loss at step 0: 0.048786457628011703 +Loss at step 50: 0.04212988540530205 +Loss at step 100: 0.07441911101341248 +Loss at step 150: 0.04295492172241211 +Loss at step 200: 0.041588328778743744 +Loss at step 250: 0.04133371263742447 +Loss at step 300: 0.04392175376415253 +Loss at step 350: 0.035162027925252914 +Loss at step 400: 0.05061633139848709 +Loss at step 450: 0.054585494101047516 +Loss at step 500: 0.05167056620121002 +Loss at step 550: 0.04089488834142685 +Loss at step 600: 0.06832866370677948 +Loss at step 650: 0.06363660097122192 +Loss at step 700: 0.03954530507326126 +Loss at step 750: 0.04157429561018944 +Loss at step 800: 0.04410552978515625 +Loss at step 850: 0.03722862899303436 +Loss at step 900: 0.04667089506983757 +Mean training loss after epoch 230: 0.04746422755009711 + +EPOCH: 231 +Loss at step 0: 0.06268423050642014 +Loss at step 50: 0.03882352262735367 +Loss at step 100: 0.048733919858932495 +Loss at step 150: 0.039344415068626404 +Loss at step 200: 0.036332037299871445 +Loss at step 250: 0.03771339729428291 +Loss at step 300: 0.0636814534664154 +Loss at step 350: 0.043056681752204895 +Loss at step 400: 0.05330965295433998 +Loss at step 450: 0.05905596539378166 +Loss at step 500: 0.05970004200935364 +Loss at step 550: 0.04497944563627243 +Loss at step 600: 0.055606722831726074 +Loss at step 650: 0.04521320387721062 +Loss at step 700: 0.04246276617050171 +Loss at step 750: 0.05280632898211479 +Loss at step 800: 0.04493777081370354 +Loss at step 850: 0.03721395134925842 +Loss at step 900: 0.04620326682925224 +Mean training loss after epoch 231: 0.04741931698128168 + +EPOCH: 232 +Loss at step 0: 0.04619834944605827 +Loss at step 50: 0.04343336448073387 +Loss at step 100: 0.06402186304330826 +Loss at step 150: 0.059715017676353455 +Loss at step 200: 0.05935760214924812 +Loss at step 250: 0.05277004837989807 +Loss at step 300: 0.044334717094898224 +Loss at step 350: 0.03390008956193924 +Loss at step 400: 0.03994208946824074 +Loss at step 450: 0.046327754855155945 +Loss at step 500: 0.03900787606835365 +Loss at step 550: 0.04249203950166702 +Loss at step 600: 0.03887832164764404 +Loss at step 650: 0.042341552674770355 +Loss at step 700: 0.03972656652331352 +Loss at step 750: 0.053369395434856415 +Loss at step 800: 0.045512761920690536 +Loss at step 850: 0.05577278882265091 +Loss at step 900: 0.05153714120388031 +Mean training loss after epoch 232: 0.047221900037388555 + +EPOCH: 233 +Loss at step 0: 0.042657602578401566 +Loss at step 50: 0.04294698312878609 +Loss at step 100: 0.03562244400382042 +Loss at step 150: 0.03930288180708885 +Loss at step 200: 0.04112962633371353 +Loss at step 250: 0.04507805407047272 +Loss at step 300: 0.03694811090826988 +Loss at step 350: 0.05841729789972305 +Loss at step 400: 0.04024740308523178 +Loss at step 450: 0.039725203067064285 +Loss at step 500: 0.04341194033622742 +Loss at step 550: 0.03842465952038765 +Loss at step 600: 0.039659276604652405 +Loss at step 650: 0.041395362466573715 +Loss at step 700: 0.04072455316781998 +Loss at step 750: 0.05474300682544708 +Loss at step 800: 0.04525244981050491 +Loss at step 850: 0.04803088307380676 +Loss at step 900: 0.041800759732723236 +Mean training loss after epoch 233: 0.047675394161201236 + +EPOCH: 234 +Loss at step 0: 0.047601815313100815 +Loss at step 50: 0.06194665655493736 +Loss at step 100: 0.059563398361206055 +Loss at step 150: 0.041149597615003586 +Loss at step 200: 0.04458397999405861 +Loss at step 250: 0.0376550666987896 +Loss at step 300: 0.05714569240808487 +Loss at step 350: 0.0439092181622982 +Loss at step 400: 0.04379114881157875 +Loss at step 450: 0.046425964683294296 +Loss at step 500: 0.03247520327568054 +Loss at step 550: 0.03677426651120186 +Loss at step 600: 0.05446552112698555 +Loss at step 650: 0.042451877146959305 +Loss at step 700: 0.06022040173411369 +Loss at step 750: 0.040039267390966415 +Loss at step 800: 0.03847021982073784 +Loss at step 850: 0.03691675513982773 +Loss at step 900: 0.040188200771808624 +Mean training loss after epoch 234: 0.04741387071608226 + +EPOCH: 235 +Loss at step 0: 0.040337491780519485 +Loss at step 50: 0.04153384268283844 +Loss at step 100: 0.037336643785238266 +Loss at step 150: 0.04118182137608528 +Loss at step 200: 0.041416674852371216 +Loss at step 250: 0.055867038667201996 +Loss at step 300: 0.044471994042396545 +Loss at step 350: 0.04303606227040291 +Loss at step 400: 0.05068814381957054 +Loss at step 450: 0.03922541067004204 +Loss at step 500: 0.057405758649110794 +Loss at step 550: 0.0728236511349678 +Loss at step 600: 0.03998977690935135 +Loss at step 650: 0.03862689435482025 +Loss at step 700: 0.03816212713718414 +Loss at step 750: 0.03907797113060951 +Loss at step 800: 0.03543504700064659 +Loss at step 850: 0.045930106192827225 +Loss at step 900: 0.04171489551663399 +Mean training loss after epoch 235: 0.04743372510943903 + +EPOCH: 236 +Loss at step 0: 0.035620808601379395 +Loss at step 50: 0.040052562952041626 +Loss at step 100: 0.05313817784190178 +Loss at step 150: 0.06370919942855835 +Loss at step 200: 0.036507148295640945 +Loss at step 250: 0.03502552956342697 +Loss at step 300: 0.04301616549491882 +Loss at step 350: 0.03871866688132286 +Loss at step 400: 0.04344163462519646 +Loss at step 450: 0.0470571406185627 +Loss at step 500: 0.042631007730960846 +Loss at step 550: 0.07669208198785782 +Loss at step 600: 0.03793485835194588 +Loss at step 650: 0.04107014834880829 +Loss at step 700: 0.05810288339853287 +Loss at step 750: 0.04152897745370865 +Loss at step 800: 0.056434329599142075 +Loss at step 850: 0.04527691379189491 +Loss at step 900: 0.05430632084608078 +Mean training loss after epoch 236: 0.04720110496057313 + +EPOCH: 237 +Loss at step 0: 0.05052132532000542 +Loss at step 50: 0.08012547343969345 +Loss at step 100: 0.058045610785484314 +Loss at step 150: 0.04248403012752533 +Loss at step 200: 0.03539876639842987 +Loss at step 250: 0.05631788447499275 +Loss at step 300: 0.045057542622089386 +Loss at step 350: 0.03877414017915726 +Loss at step 400: 0.05071646720170975 +Loss at step 450: 0.04051457718014717 +Loss at step 500: 0.03610832989215851 +Loss at step 550: 0.05710848793387413 +Loss at step 600: 0.046471673995256424 +Loss at step 650: 0.05935698002576828 +Loss at step 700: 0.041645292192697525 +Loss at step 750: 0.04495110735297203 +Loss at step 800: 0.04978562146425247 +Loss at step 850: 0.06922519952058792 +Loss at step 900: 0.056647542864084244 +Mean training loss after epoch 237: 0.04707003830830807 + +EPOCH: 238 +Loss at step 0: 0.05067695677280426 +Loss at step 50: 0.04787314683198929 +Loss at step 100: 0.04050994664430618 +Loss at step 150: 0.03658545762300491 +Loss at step 200: 0.055724214762449265 +Loss at step 250: 0.0383535660803318 +Loss at step 300: 0.043187085539102554 +Loss at step 350: 0.04068366438150406 +Loss at step 400: 0.048598501831293106 +Loss at step 450: 0.051716651767492294 +Loss at step 500: 0.0451512411236763 +Loss at step 550: 0.04616754129528999 +Loss at step 600: 0.0420907661318779 +Loss at step 650: 0.0488111637532711 +Loss at step 700: 0.04188105836510658 +Loss at step 750: 0.034969791769981384 +Loss at step 800: 0.08604151010513306 +Loss at step 850: 0.043520957231521606 +Loss at step 900: 0.07170840352773666 +Mean training loss after epoch 238: 0.0473295567688276 + +EPOCH: 239 +Loss at step 0: 0.04014192521572113 +Loss at step 50: 0.038639672100543976 +Loss at step 100: 0.04928378388285637 +Loss at step 150: 0.03701779246330261 +Loss at step 200: 0.04564748331904411 +Loss at step 250: 0.06821008771657944 +Loss at step 300: 0.05658178776502609 +Loss at step 350: 0.04335528984665871 +Loss at step 400: 0.04013707861304283 +Loss at step 450: 0.0400187149643898 +Loss at step 500: 0.04198727756738663 +Loss at step 550: 0.04488756135106087 +Loss at step 600: 0.058134496212005615 +Loss at step 650: 0.044846270233392715 +Loss at step 700: 0.05701998248696327 +Loss at step 750: 0.04405871033668518 +Loss at step 800: 0.04495856165885925 +Loss at step 850: 0.04108881205320358 +Loss at step 900: 0.03564365580677986 +Mean training loss after epoch 239: 0.047027412236038686 + +EPOCH: 240 +Loss at step 0: 0.05919811874628067 +Loss at step 50: 0.04205004498362541 +Loss at step 100: 0.036110155284404755 +Loss at step 150: 0.03948517516255379 +Loss at step 200: 0.043891001492738724 +Loss at step 250: 0.0431203655898571 +Loss at step 300: 0.04255463182926178 +Loss at step 350: 0.04341713339090347 +Loss at step 400: 0.0569964163005352 +Loss at step 450: 0.043533556163311005 +Loss at step 500: 0.04148015007376671 +Loss at step 550: 0.049632586538791656 +Loss at step 600: 0.04524538293480873 +Loss at step 650: 0.03986542299389839 +Loss at step 700: 0.05919334292411804 +Loss at step 750: 0.04900319129228592 +Loss at step 800: 0.04924944415688515 +Loss at step 850: 0.04429350793361664 +Loss at step 900: 0.04349055141210556 +Mean training loss after epoch 240: 0.047333256295844436 + +EPOCH: 241 +Loss at step 0: 0.04207955673336983 +Loss at step 50: 0.038583554327487946 +Loss at step 100: 0.0386909544467926 +Loss at step 150: 0.04193786531686783 +Loss at step 200: 0.04368520528078079 +Loss at step 250: 0.03882809355854988 +Loss at step 300: 0.06384032964706421 +Loss at step 350: 0.04240868613123894 +Loss at step 400: 0.03901847079396248 +Loss at step 450: 0.04022093862295151 +Loss at step 500: 0.05895869433879852 +Loss at step 550: 0.04181547462940216 +Loss at step 600: 0.04146876558661461 +Loss at step 650: 0.049333926290273666 +Loss at step 700: 0.04821881279349327 +Loss at step 750: 0.0656735897064209 +Loss at step 800: 0.04182281345129013 +Loss at step 850: 0.054167602211236954 +Loss at step 900: 0.06341934949159622 +Mean training loss after epoch 241: 0.046659974270124936 + +EPOCH: 242 +Loss at step 0: 0.03915708139538765 +Loss at step 50: 0.047171518206596375 +Loss at step 100: 0.056036826223134995 +Loss at step 150: 0.03442196547985077 +Loss at step 200: 0.03871029615402222 +Loss at step 250: 0.05094204097986221 +Loss at step 300: 0.04389587789773941 +Loss at step 350: 0.05669848248362541 +Loss at step 400: 0.053994324058294296 +Loss at step 450: 0.039314061403274536 +Loss at step 500: 0.04054942727088928 +Loss at step 550: 0.041670020669698715 +Loss at step 600: 0.04545379430055618 +Loss at step 650: 0.04204105585813522 +Loss at step 700: 0.057025615125894547 +Loss at step 750: 0.04496810957789421 +Loss at step 800: 0.04684646800160408 +Loss at step 850: 0.03260652348399162 +Loss at step 900: 0.045404549688100815 +Mean training loss after epoch 242: 0.04722864735823895 + +EPOCH: 243 +Loss at step 0: 0.04056708887219429 +Loss at step 50: 0.058635562658309937 +Loss at step 100: 0.06075160577893257 +Loss at step 150: 0.049098290503025055 +Loss at step 200: 0.03618660569190979 +Loss at step 250: 0.045571278780698776 +Loss at step 300: 0.04873976483941078 +Loss at step 350: 0.06908378005027771 +Loss at step 400: 0.05161737650632858 +Loss at step 450: 0.0382942371070385 +Loss at step 500: 0.04672587662935257 +Loss at step 550: 0.048816800117492676 +Loss at step 600: 0.03904996067285538 +Loss at step 650: 0.03952929377555847 +Loss at step 700: 0.03677036613225937 +Loss at step 750: 0.04177812114357948 +Loss at step 800: 0.04302003234624863 +Loss at step 850: 0.041392795741558075 +Loss at step 900: 0.05374664068222046 +Mean training loss after epoch 243: 0.04673918570949833 + +EPOCH: 244 +Loss at step 0: 0.05397259443998337 +Loss at step 50: 0.051949262619018555 +Loss at step 100: 0.037971723824739456 +Loss at step 150: 0.040903352200984955 +Loss at step 200: 0.05542260780930519 +Loss at step 250: 0.07131517678499222 +Loss at step 300: 0.0620107427239418 +Loss at step 350: 0.046937648206949234 +Loss at step 400: 0.044788774102926254 +Loss at step 450: 0.03686676546931267 +Loss at step 500: 0.05122985690832138 +Loss at step 550: 0.04788661375641823 +Loss at step 600: 0.04377329349517822 +Loss at step 650: 0.04340304061770439 +Loss at step 700: 0.04081183299422264 +Loss at step 750: 0.04269346222281456 +Loss at step 800: 0.03235775604844093 +Loss at step 850: 0.057394132018089294 +Loss at step 900: 0.03920556604862213 +Mean training loss after epoch 244: 0.04690273205386296 + +EPOCH: 245 +Loss at step 0: 0.06356802582740784 +Loss at step 50: 0.05200378969311714 +Loss at step 100: 0.05641835555434227 +Loss at step 150: 0.04435671493411064 +Loss at step 200: 0.04495227336883545 +Loss at step 250: 0.06070738285779953 +Loss at step 300: 0.04533640295267105 +Loss at step 350: 0.03971453011035919 +Loss at step 400: 0.05741814523935318 +Loss at step 450: 0.03619606792926788 +Loss at step 500: 0.03923622518777847 +Loss at step 550: 0.058454450219869614 +Loss at step 600: 0.053241584450006485 +Loss at step 650: 0.04373021796345711 +Loss at step 700: 0.04292839393019676 +Loss at step 750: 0.04143700376152992 +Loss at step 800: 0.038410406559705734 +Loss at step 850: 0.0428939051926136 +Loss at step 900: 0.039994798600673676 +Mean training loss after epoch 245: 0.04652742850882158 + +EPOCH: 246 +Loss at step 0: 0.053880155086517334 +Loss at step 50: 0.03900417312979698 +Loss at step 100: 0.04321351274847984 +Loss at step 150: 0.04429652541875839 +Loss at step 200: 0.04508763179183006 +Loss at step 250: 0.04875646159052849 +Loss at step 300: 0.04245120659470558 +Loss at step 350: 0.03354842588305473 +Loss at step 400: 0.04555755481123924 +Loss at step 450: 0.05434974655508995 +Loss at step 500: 0.04873263090848923 +Loss at step 550: 0.061547745019197464 +Loss at step 600: 0.040138132870197296 +Loss at step 650: 0.04374224692583084 +Loss at step 700: 0.05633601173758507 +Loss at step 750: 0.04022585600614548 +Loss at step 800: 0.04465155303478241 +Loss at step 850: 0.058879394084215164 +Loss at step 900: 0.04596970975399017 +Mean training loss after epoch 246: 0.04621862697957167 + +EPOCH: 247 +Loss at step 0: 0.03740677610039711 +Loss at step 50: 0.03778629004955292 +Loss at step 100: 0.05837121233344078 +Loss at step 150: 0.042970210313797 +Loss at step 200: 0.04055590182542801 +Loss at step 250: 0.04510975629091263 +Loss at step 300: 0.04198705032467842 +Loss at step 350: 0.03874443098902702 +Loss at step 400: 0.049550916999578476 +Loss at step 450: 0.04024399816989899 +Loss at step 500: 0.05571698024868965 +Loss at step 550: 0.043419331312179565 +Loss at step 600: 0.04440811276435852 +Loss at step 650: 0.040983110666275024 +Loss at step 700: 0.04399896040558815 +Loss at step 750: 0.03868815302848816 +Loss at step 800: 0.040444061160087585 +Loss at step 850: 0.047447554767131805 +Loss at step 900: 0.043713346123695374 +Mean training loss after epoch 247: 0.04691247007192007 + +EPOCH: 248 +Loss at step 0: 0.04006945341825485 +Loss at step 50: 0.03535091131925583 +Loss at step 100: 0.04494907706975937 +Loss at step 150: 0.04120033606886864 +Loss at step 200: 0.04759150370955467 +Loss at step 250: 0.05994563549757004 +Loss at step 300: 0.03854096680879593 +Loss at step 350: 0.04434619098901749 +Loss at step 400: 0.045870307832956314 +Loss at step 450: 0.050577688962221146 +Loss at step 500: 0.04680788144469261 +Loss at step 550: 0.05531508848071098 +Loss at step 600: 0.049306634813547134 +Loss at step 650: 0.034269288182258606 +Loss at step 700: 0.0644838735461235 +Loss at step 750: 0.03583788871765137 +Loss at step 800: 0.05227086320519447 +Loss at step 850: 0.04294784739613533 +Loss at step 900: 0.04728400707244873 +Mean training loss after epoch 248: 0.04668322092553637 + +EPOCH: 249 +Loss at step 0: 0.04188712313771248 +Loss at step 50: 0.036383070051670074 +Loss at step 100: 0.0610353983938694 +Loss at step 150: 0.03815960884094238 +Loss at step 200: 0.034124597907066345 +Loss at step 250: 0.062154851853847504 +Loss at step 300: 0.042882829904556274 +Loss at step 350: 0.05679420381784439 +Loss at step 400: 0.04250594973564148 +Loss at step 450: 0.04011699929833412 +Loss at step 500: 0.05614793300628662 +Loss at step 550: 0.07353191822767258 +Loss at step 600: 0.047859035432338715 +Loss at step 650: 0.045483119785785675 +Loss at step 700: 0.04289074242115021 +Loss at step 750: 0.056176040321588516 +Loss at step 800: 0.05899858474731445 +Loss at step 850: 0.04015076160430908 +Loss at step 900: 0.05441942811012268 +Mean training loss after epoch 249: 0.04700331613898023 + +EPOCH: 250 +Loss at step 0: 0.03961891308426857 +Loss at step 50: 0.06441330909729004 +Loss at step 100: 0.04343613609671593 +Loss at step 150: 0.0401047058403492 +Loss at step 200: 0.03948841243982315 +Loss at step 250: 0.04260681942105293 +Loss at step 300: 0.058747801929712296 +Loss at step 350: 0.03994559124112129 +Loss at step 400: 0.04251643642783165 +Loss at step 450: 0.04467840492725372 +Loss at step 500: 0.042453572154045105 +Loss at step 550: 0.03803998604416847 +Loss at step 600: 0.04704098775982857 +Loss at step 650: 0.04498320072889328 +Loss at step 700: 0.0504445917904377 +Loss at step 750: 0.0568452924489975 +Loss at step 800: 0.06857742369174957 +Loss at step 850: 0.04511500895023346 +Loss at step 900: 0.04446408152580261 +Mean training loss after epoch 250: 0.04667696272179898 + +EPOCH: 251 +Loss at step 0: 0.036432988941669464 +Loss at step 50: 0.039983708411455154 +Loss at step 100: 0.06488723307847977 +Loss at step 150: 0.04448876529932022 +Loss at step 200: 0.04631435498595238 +Loss at step 250: 0.04306500032544136 +Loss at step 300: 0.055690228939056396 +Loss at step 350: 0.05130288749933243 +Loss at step 400: 0.05100983753800392 +Loss at step 450: 0.046761877834796906 +Loss at step 500: 0.04531388357281685 +Loss at step 550: 0.04471338912844658 +Loss at step 600: 0.038645148277282715 +Loss at step 650: 0.05674571171402931 +Loss at step 700: 0.03916317969560623 +Loss at step 750: 0.05132918804883957 +Loss at step 800: 0.036909040063619614 +Loss at step 850: 0.04756888002157211 +Loss at step 900: 0.04153941571712494 +Mean training loss after epoch 251: 0.04704573198453958 + +EPOCH: 252 +Loss at step 0: 0.04232493042945862 +Loss at step 50: 0.04586152732372284 +Loss at step 100: 0.03852195292711258 +Loss at step 150: 0.038407791405916214 +Loss at step 200: 0.042226601392030716 +Loss at step 250: 0.04161105304956436 +Loss at step 300: 0.05575171485543251 +Loss at step 350: 0.05562499165534973 +Loss at step 400: 0.04631282761693001 +Loss at step 450: 0.05888797715306282 +Loss at step 500: 0.04068775847554207 +Loss at step 550: 0.04985671117901802 +Loss at step 600: 0.03735966607928276 +Loss at step 650: 0.0401538610458374 +Loss at step 700: 0.05814424529671669 +Loss at step 750: 0.04114815965294838 +Loss at step 800: 0.03903987631201744 +Loss at step 850: 0.041900113224983215 +Loss at step 900: 0.04376242682337761 +Mean training loss after epoch 252: 0.04689805341292737 + +EPOCH: 253 +Loss at step 0: 0.04712213575839996 +Loss at step 50: 0.03744637593626976 +Loss at step 100: 0.04753965139389038 +Loss at step 150: 0.043236613273620605 +Loss at step 200: 0.0362410768866539 +Loss at step 250: 0.038369320333004 +Loss at step 300: 0.04537704586982727 +Loss at step 350: 0.04625498503446579 +Loss at step 400: 0.07098408043384552 +Loss at step 450: 0.03732592239975929 +Loss at step 500: 0.05638297647237778 +Loss at step 550: 0.037737686187028885 +Loss at step 600: 0.07242827117443085 +Loss at step 650: 0.03800950199365616 +Loss at step 700: 0.04793880507349968 +Loss at step 750: 0.04170050844550133 +Loss at step 800: 0.03860677778720856 +Loss at step 850: 0.05417443811893463 +Loss at step 900: 0.043968137353658676 +Mean training loss after epoch 253: 0.04692439014898307 + +EPOCH: 254 +Loss at step 0: 0.051419589668512344 +Loss at step 50: 0.05988432466983795 +Loss at step 100: 0.0616888590157032 +Loss at step 150: 0.04247802495956421 +Loss at step 200: 0.0398801788687706 +Loss at step 250: 0.04294807091355324 +Loss at step 300: 0.07003361731767654 +Loss at step 350: 0.06442959606647491 +Loss at step 400: 0.04889055714011192 +Loss at step 450: 0.049414005130529404 +Loss at step 500: 0.04413381963968277 +Loss at step 550: 0.046305976808071136 +Loss at step 600: 0.0539136603474617 +Loss at step 650: 0.058604806661605835 +Loss at step 700: 0.04037125036120415 +Loss at step 750: 0.05922140181064606 +Loss at step 800: 0.044783320277929306 +Loss at step 850: 0.04637423902750015 +Loss at step 900: 0.0419696569442749 +Mean training loss after epoch 254: 0.04710233976830171 + +EPOCH: 255 +Loss at step 0: 0.03589440882205963 +Loss at step 50: 0.03913043066859245 +Loss at step 100: 0.054772406816482544 +Loss at step 150: 0.038391511887311935 +Loss at step 200: 0.043922554701566696 +Loss at step 250: 0.036541394889354706 +Loss at step 300: 0.04650618135929108 +Loss at step 350: 0.041059959679841995 +Loss at step 400: 0.0396684855222702 +Loss at step 450: 0.04587254300713539 +Loss at step 500: 0.04433554410934448 +Loss at step 550: 0.03697195649147034 +Loss at step 600: 0.06151704117655754 +Loss at step 650: 0.04050551354885101 +Loss at step 700: 0.07169540971517563 +Loss at step 750: 0.044441260397434235 +Loss at step 800: 0.04001397266983986 +Loss at step 850: 0.045147642493247986 +Loss at step 900: 0.05151965469121933 +Mean training loss after epoch 255: 0.04706612190823438 + +EPOCH: 256 +Loss at step 0: 0.04150315746665001 +Loss at step 50: 0.0580219142138958 +Loss at step 100: 0.04887067899107933 +Loss at step 150: 0.0464482307434082 +Loss at step 200: 0.0443718358874321 +Loss at step 250: 0.04127774015069008 +Loss at step 300: 0.03330198675394058 +Loss at step 350: 0.060376089066267014 +Loss at step 400: 0.03948952630162239 +Loss at step 450: 0.03571775183081627 +Loss at step 500: 0.04317997023463249 +Loss at step 550: 0.038010261952877045 +Loss at step 600: 0.06672787666320801 +Loss at step 650: 0.04856971278786659 +Loss at step 700: 0.03457494080066681 +Loss at step 750: 0.037759508937597275 +Loss at step 800: 0.047269709408283234 +Loss at step 850: 0.04098012298345566 +Loss at step 900: 0.03900384157896042 +Mean training loss after epoch 256: 0.04700150484564716 +/athenahomes/gabrijel/miniconda3/envs/track-generator/lib/python3.11/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: '/athenahomes/gabrijel/miniconda3/envs/track-generator/lib/python3.11/site-packages/torchvision/image.so: undefined symbol: _ZN3c1017RegisterOperatorsD1Ev'If you don't plan on using image functionality from `torchvision.io`, you can ignore this warning. Otherwise, there might be something wrong with your environment. Did you have `libjpeg` or `libpng` installed before building `torchvision` from source? + warn( +Schedule: linear +Cfg: False +Output path: /scratch/shared/beegfs/gabrijel/m2l/mini +Patch Size: 2 +Device: cuda:7 +===================================================================================== +Layer (type:depth-idx) Param # +===================================================================================== +DiT 75,264 +├─PatchEmbed: 1-1 -- +│ └─Conv2d: 2-1 1,920 +├─TimestepEmbedder: 1-2 -- +│ └─Mlp: 2-2 -- +│ │ └─Linear: 3-1 98,688 +│ │ └─SiLU: 3-2 -- +│ │ └─Linear: 3-3 147,840 +├─ModuleList: 1-3 -- +│ └─DiTBlock: 2-3 -- +│ │ └─LayerNorm: 3-4 -- +│ │ └─MultiheadAttention: 3-5 591,360 +│ │ └─LayerNorm: 3-6 -- +│ │ └─Mlp: 3-7 1,181,568 +│ │ └─Sequential: 3-8 887,040 +│ └─DiTBlock: 2-4 -- +│ │ └─LayerNorm: 3-9 -- +│ │ └─MultiheadAttention: 3-10 591,360 +│ │ └─LayerNorm: 3-11 -- +│ │ └─Mlp: 3-12 1,181,568 +│ │ └─Sequential: 3-13 887,040 +│ └─DiTBlock: 2-5 -- +│ │ └─LayerNorm: 3-14 -- +│ │ └─MultiheadAttention: 3-15 591,360 +│ │ └─LayerNorm: 3-16 -- +│ │ └─Mlp: 3-17 1,181,568 +│ │ └─Sequential: 3-18 887,040 +│ └─DiTBlock: 2-6 -- +│ │ └─LayerNorm: 3-19 -- +│ │ └─MultiheadAttention: 3-20 591,360 +│ │ └─LayerNorm: 3-21 -- +│ │ └─Mlp: 3-22 1,181,568 +│ │ └─Sequential: 3-23 887,040 +│ └─DiTBlock: 2-7 -- +│ │ └─LayerNorm: 3-24 -- +│ │ └─MultiheadAttention: 3-25 591,360 +│ │ └─LayerNorm: 3-26 -- +│ │ └─Mlp: 3-27 1,181,568 +│ │ └─Sequential: 3-28 887,040 +│ └─DiTBlock: 2-8 -- +│ │ └─LayerNorm: 3-29 -- +│ │ └─MultiheadAttention: 3-30 591,360 +│ │ └─LayerNorm: 3-31 -- +│ │ └─Mlp: 3-32 1,181,568 +│ │ └─Sequential: 3-33 887,040 +├─FinalLayer: 1-4 -- +│ └─LayerNorm: 2-9 -- +│ └─Linear: 2-10 1,540 +│ └─Sequential: 2-11 -- +│ │ └─SiLU: 3-34 -- +│ │ └─Linear: 3-35 295,680 +├─Unpatchify: 1-5 -- +===================================================================================== +Total params: 16,580,740 +Trainable params: 16,505,476 +Non-trainable params: 75,264 +===================================================================================== + +EPOCH: 1 +Loss at step 0: 1.0075995922088623 +Loss at step 50: 0.35373929142951965 +Loss at step 100: 0.20512253046035767 +Loss at step 150: 0.19249480962753296 +Loss at step 200: 0.13474738597869873 +Loss at step 250: 0.12876151502132416 +Loss at step 300: 0.12794511020183563 +Loss at step 350: 0.1289183497428894 +Loss at step 400: 0.12760040163993835 +Loss at step 450: 0.10254672169685364 +Loss at step 500: 0.1335090696811676 +Loss at step 550: 0.10882318019866943 +Loss at step 600: 0.12476962059736252 +Loss at step 650: 0.1439245641231537 +Loss at step 700: 0.10602888464927673 +Loss at step 750: 0.09511090815067291 +Loss at step 800: 0.10600529611110687 +Loss at step 850: 0.10087983310222626 +Loss at step 900: 0.08842425048351288 +Mean training loss after epoch 1: 0.15980786996990887 + +EPOCH: 2 +Loss at step 0: 0.09463421255350113 +Loss at step 50: 0.1224726065993309 +Loss at step 100: 0.09829632192850113 +Loss at step 150: 0.08635515719652176 +Loss at step 200: 0.07561938464641571 +Loss at step 250: 0.10052552074193954 +Loss at step 300: 0.09483928233385086 +Loss at step 350: 0.08772549778223038 +Loss at step 400: 0.08831494301557541 +Loss at step 450: 0.07784131169319153 +Loss at step 500: 0.07359441369771957 +Loss at step 550: 0.06896582990884781 +Loss at step 600: 0.07551287114620209 +Loss at step 650: 0.06922806799411774 +Loss at step 700: 0.06913796067237854 +Loss at step 750: 0.07675760984420776 +Loss at step 800: 0.06652159243822098 +Loss at step 850: 0.07553744316101074 +Loss at step 900: 0.0821470096707344 +Mean training loss after epoch 2: 0.08193719030808666 + +EPOCH: 3 +Loss at step 0: 0.06631435453891754 +Loss at step 50: 0.05638815462589264 +Loss at step 100: 0.07071908563375473 +Loss at step 150: 0.0715002492070198 +Loss at step 200: 0.07420272380113602 +Loss at step 250: 0.06595490872859955 +Loss at step 300: 0.08998582512140274 +Loss at step 350: 0.07675451785326004 +Loss at step 400: 0.06650132685899734 +Loss at step 450: 0.059282708913087845 +Loss at step 500: 0.06055615842342377 +Loss at step 550: 0.07107555866241455 +Loss at step 600: 0.06431345641613007 +Loss at step 650: 0.06158051639795303 +Loss at step 700: 0.08001387864351273 +Loss at step 750: 0.059054236859083176 +Loss at step 800: 0.07788924872875214 +Loss at step 850: 0.06556981056928635 +Loss at step 900: 0.05886811390519142 +Mean training loss after epoch 3: 0.07109683882325951 + +EPOCH: 4 +Loss at step 0: 0.0762590542435646 +Loss at step 50: 0.058827415108680725 +Loss at step 100: 0.06701105087995529 +Loss at step 150: 0.060533128678798676 +Loss at step 200: 0.05596846714615822 +Loss at step 250: 0.07146752625703812 +Loss at step 300: 0.06272891163825989 +Loss at step 350: 0.057031989097595215 +Loss at step 400: 0.0878763347864151 +Loss at step 450: 0.08566630631685257 +Loss at step 500: 0.06220609322190285 +Loss at step 550: 0.07066660374403 +Loss at step 600: 0.06180073320865631 +Loss at step 650: 0.07413389533758163 +Loss at step 700: 0.0667230561375618 +Loss at step 750: 0.08372411131858826 +Loss at step 800: 0.06152018532156944 +Loss at step 850: 0.07910443842411041 +Loss at step 900: 0.06704096496105194 +Mean training loss after epoch 4: 0.06699823619110752 + +EPOCH: 5 +Loss at step 0: 0.05685015022754669 +Loss at step 50: 0.06583278626203537 +Loss at step 100: 0.08272454887628555 +Loss at step 150: 0.07149537652730942 +Loss at step 200: 0.05819565802812576 +Loss at step 250: 0.05651334673166275 +Loss at step 300: 0.05213393270969391 +Loss at step 350: 0.07279562205076218 +Loss at step 400: 0.058949489146471024 +Loss at step 450: 0.07658764719963074 +Loss at step 500: 0.07114110887050629 +Loss at step 550: 0.05609923228621483 +Loss at step 600: 0.057970426976680756 +Loss at step 650: 0.05682592839002609 +Loss at step 700: 0.05577715486288071 +Loss at step 750: 0.061985500156879425 +Loss at step 800: 0.05465920642018318 +Loss at step 850: 0.058645788580179214 +Loss at step 900: 0.05570255219936371 +Mean training loss after epoch 5: 0.06316233371128278 + +EPOCH: 6 +Loss at step 0: 0.0619986429810524 +Loss at step 50: 0.060633618384599686 +Loss at step 100: 0.0616658553481102 +Loss at step 150: 0.058560483157634735 +Loss at step 200: 0.07018821686506271 +Loss at step 250: 0.06125182285904884 +Loss at step 300: 0.0612216517329216 +Loss at step 350: 0.057443685829639435 +Loss at step 400: 0.06583740562200546 +Loss at step 450: 0.06606021523475647 +Loss at step 500: 0.05745938420295715 +Loss at step 550: 0.07223519682884216 +Loss at step 600: 0.057167790830135345 +Loss at step 650: 0.0698588490486145 +Loss at step 700: 0.054305512458086014 +Loss at step 750: 0.04509487748146057 +Loss at step 800: 0.04614029824733734 +Loss at step 850: 0.0552181601524353 +Loss at step 900: 0.05041469633579254 +Mean training loss after epoch 6: 0.062095056301844656 + +EPOCH: 7 +Loss at step 0: 0.06194700300693512 +Loss at step 50: 0.05655692517757416 +Loss at step 100: 0.05771471560001373 +Loss at step 150: 0.0612168088555336 +Loss at step 200: 0.05316911265254021 +Loss at step 250: 0.08371683955192566 +Loss at step 300: 0.05929751321673393 +Loss at step 350: 0.07051137834787369 +Loss at step 400: 0.05091992765665054 +Loss at step 450: 0.061792246997356415 +Loss at step 500: 0.07786475867033005 +Loss at step 550: 0.0953163430094719 +Loss at step 600: 0.05070318281650543 +Loss at step 650: 0.057140737771987915 +Loss at step 700: 0.0754731222987175 +Loss at step 750: 0.06058558449149132 +Loss at step 800: 0.06910645216703415 +Loss at step 850: 0.06022610515356064 +Loss at step 900: 0.06358420848846436 +Mean training loss after epoch 7: 0.060837885781900206 + +EPOCH: 8 +Loss at step 0: 0.048634011298418045 +Loss at step 50: 0.046894509345293045 +Loss at step 100: 0.07354782521724701 +Loss at step 150: 0.05142446234822273 +Loss at step 200: 0.07011204957962036 +Loss at step 250: 0.07290949672460556 +Loss at step 300: 0.05525421351194382 +Loss at step 350: 0.05742413550615311 +Loss at step 400: 0.04870324581861496 +Loss at step 450: 0.06417348980903625 +Loss at step 500: 0.06928613781929016 +Loss at step 550: 0.05433778092265129 +Loss at step 600: 0.06168806180357933 +Loss at step 650: 0.06319282948970795 +Loss at step 700: 0.053914204239845276 +Loss at step 750: 0.07081945240497589 +Loss at step 800: 0.05305833742022514 +Loss at step 850: 0.058883801102638245 +Loss at step 900: 0.05913586542010307 +Mean training loss after epoch 8: 0.059929611590275884 + +EPOCH: 9 +Loss at step 0: 0.050412438809871674 +Loss at step 50: 0.05737834796309471 +Loss at step 100: 0.06300380825996399 +Loss at step 150: 0.05657083913683891 +Loss at step 200: 0.05534239113330841 +Loss at step 250: 0.05135667324066162 +Loss at step 300: 0.05326739698648453 +Loss at step 350: 0.06971684843301773 +Loss at step 400: 0.05204862356185913 +Loss at step 450: 0.05111617222428322 +Loss at step 500: 0.048222415149211884 +Loss at step 550: 0.07252516597509384 +Loss at step 600: 0.044828228652477264 +Loss at step 650: 0.04854441061615944 +Loss at step 700: 0.07063836604356766 +Loss at step 750: 0.05104904621839523 +Loss at step 800: 0.04823701083660126 +Loss at step 850: 0.052563365548849106 +Loss at step 900: 0.07150944322347641 +Mean training loss after epoch 9: 0.05857146157622973 + +EPOCH: 10 +Loss at step 0: 0.05714458227157593 +Loss at step 50: 0.054938219487667084 +Loss at step 100: 0.0573851652443409 +Loss at step 150: 0.057579681277275085 +Loss at step 200: 0.05514637008309364 +Loss at step 250: 0.04756045714020729 +Loss at step 300: 0.048124171793460846 +Loss at step 350: 0.06632916629314423 +Loss at step 400: 0.04589516669511795 +Loss at step 450: 0.047400813549757004 +Loss at step 500: 0.06649143248796463 +Loss at step 550: 0.07798301428556442 +Loss at step 600: 0.050804074853658676 +Loss at step 650: 0.05981069430708885 +Loss at step 700: 0.05568210780620575 +Loss at step 750: 0.05025237798690796 +Loss at step 800: 0.05304818972945213 +Loss at step 850: 0.05245773866772652 +Loss at step 900: 0.052751343697309494 +Mean training loss after epoch 10: 0.0577129253875345 + +EPOCH: 11 +Loss at step 0: 0.06588388979434967 +Loss at step 50: 0.06909658014774323 +Loss at step 100: 0.05105556175112724 +Loss at step 150: 0.05537199229001999 +Loss at step 200: 0.050810638815164566 +Loss at step 250: 0.06206526607275009 +Loss at step 300: 0.049683354794979095 +Loss at step 350: 0.05939784646034241 +Loss at step 400: 0.054627902805805206 +Loss at step 450: 0.0550394132733345 +Loss at step 500: 0.05084659159183502 +Loss at step 550: 0.05426187813282013 +Loss at step 600: 0.05465870350599289 +Loss at step 650: 0.05165308713912964 +Loss at step 700: 0.047603197395801544 +Loss at step 750: 0.04595797881484032 +Loss at step 800: 0.04408068582415581 +Loss at step 850: 0.05537965148687363 +Loss at step 900: 0.06405893713235855 +Mean training loss after epoch 11: 0.05671106054505178 + +EPOCH: 12 +Loss at step 0: 0.06417083740234375 +Loss at step 50: 0.06911082565784454 +Loss at step 100: 0.05815726891160011 +Loss at step 150: 0.046254076063632965 +Loss at step 200: 0.04874599725008011 +Loss at step 250: 0.05554010719060898 +Loss at step 300: 0.057270597666502 +Loss at step 350: 0.045894086360931396 +Loss at step 400: 0.04543235898017883 +Loss at step 450: 0.051934659481048584 +Loss at step 500: 0.052351921796798706 +Loss at step 550: 0.06272941827774048 +Loss at step 600: 0.04996446520090103 +Loss at step 650: 0.042777273803949356 +Loss at step 700: 0.08207011222839355 +Loss at step 750: 0.06261762976646423 +Loss at step 800: 0.04665262624621391 +Loss at step 850: 0.0601629801094532 +Loss at step 900: 0.06918733566999435 +Mean training loss after epoch 12: 0.05601017613972682 + +EPOCH: 13 +Loss at step 0: 0.05149199441075325 +Loss at step 50: 0.05454690381884575 +Loss at step 100: 0.05053124949336052 +Loss at step 150: 0.046606387943029404 +Loss at step 200: 0.05475420877337456 +Loss at step 250: 0.04629240930080414 +Loss at step 300: 0.045945633202791214 +Loss at step 350: 0.07261192798614502 +Loss at step 400: 0.04167588800191879 +Loss at step 450: 0.054250169545412064 +Loss at step 500: 0.044457338750362396 +Loss at step 550: 0.06483884900808334 +Loss at step 600: 0.05219787731766701 +Loss at step 650: 0.06185373291373253 +Loss at step 700: 0.06286443024873734 +Loss at step 750: 0.04679613932967186 +Loss at step 800: 0.05496010556817055 +Loss at step 850: 0.04822064936161041 +Loss at step 900: 0.06116361916065216 +Mean training loss after epoch 13: 0.05563471382503698 + +EPOCH: 14 +Loss at step 0: 0.052052225917577744 +Loss at step 50: 0.06278275698423386 +Loss at step 100: 0.06238124147057533 +Loss at step 150: 0.061954498291015625 +Loss at step 200: 0.04625730216503143 +Loss at step 250: 0.051161989569664 +Loss at step 300: 0.04852226749062538 +Loss at step 350: 0.053745754063129425 +Loss at step 400: 0.052604325115680695 +Loss at step 450: 0.045151662081480026 +Loss at step 500: 0.07446572184562683 +Loss at step 550: 0.05244326964020729 +Loss at step 600: 0.0493132621049881 +Loss at step 650: 0.04428206384181976 +Loss at step 700: 0.05138056352734566 +Loss at step 750: 0.055240605026483536 +Loss at step 800: 0.050788916647434235 +Loss at step 850: 0.05183254927396774 +Loss at step 900: 0.05565812438726425 +Mean training loss after epoch 14: 0.05502966848978483 + +EPOCH: 15 +Loss at step 0: 0.046297427266836166 +Loss at step 50: 0.05458499863743782 +Loss at step 100: 0.04571607708930969 +Loss at step 150: 0.05076564848423004 +Loss at step 200: 0.04872443526983261 +Loss at step 250: 0.04921704903244972 +Loss at step 300: 0.06756416708230972 +Loss at step 350: 0.07087595015764236 +Loss at step 400: 0.0532766692340374 +Loss at step 450: 0.05435803532600403 +Loss at step 500: 0.05162213742733002 +Loss at step 550: 0.04767274111509323 +Loss at step 600: 0.06485608965158463 +Loss at step 650: 0.04865581914782524 +Loss at step 700: 0.04675605148077011 +Loss at step 750: 0.04527098312973976 +Loss at step 800: 0.048395030200481415 +Loss at step 850: 0.07290451228618622 +Loss at step 900: 0.054566770792007446 +Mean training loss after epoch 15: 0.054631041176219995 + +EPOCH: 16 +Loss at step 0: 0.06164667382836342 +Loss at step 50: 0.0577978678047657 +Loss at step 100: 0.049172017723321915 +Loss at step 150: 0.04947298765182495 +Loss at step 200: 0.04651055857539177 +Loss at step 250: 0.05367325618863106 +Loss at step 300: 0.04757886752486229 +Loss at step 350: 0.05227513238787651 +Loss at step 400: 0.04909190163016319 +Loss at step 450: 0.05389949679374695 +Loss at step 500: 0.06675230711698532 +Loss at step 550: 0.04681697487831116 +Loss at step 600: 0.05404640734195709 +Loss at step 650: 0.04095203056931496 +Loss at step 700: 0.05199974402785301 +Loss at step 750: 0.07341897487640381 +Loss at step 800: 0.05079803243279457 +Loss at step 850: 0.048575498163700104 +Loss at step 900: 0.06440459191799164 +Mean training loss after epoch 16: 0.054061861180547455 + +EPOCH: 17 +Loss at step 0: 0.04553738236427307 +Loss at step 50: 0.06358225643634796 +Loss at step 100: 0.06717240065336227 +Loss at step 150: 0.06445685029029846 +Loss at step 200: 0.05771683529019356 +Loss at step 250: 0.050301384180784225 +Loss at step 300: 0.07634672522544861 +Loss at step 350: 0.04480393975973129 +Loss at step 400: 0.04892796650528908 +Loss at step 450: 0.04602775722742081 +Loss at step 500: 0.04639355465769768 +Loss at step 550: 0.045715853571891785 +Loss at step 600: 0.04254934564232826 +Loss at step 650: 0.04145136475563049 +Loss at step 700: 0.04441535472869873 +Loss at step 750: 0.04946678504347801 +Loss at step 800: 0.04965391755104065 +Loss at step 850: 0.0459962822496891 +Loss at step 900: 0.0501675121486187 +Mean training loss after epoch 17: 0.054101661117727566 + +EPOCH: 18 +Loss at step 0: 0.056217774748802185 +Loss at step 50: 0.05200424790382385 +Loss at step 100: 0.0668075829744339 +Loss at step 150: 0.04610215127468109 +Loss at step 200: 0.05222228914499283 +Loss at step 250: 0.0420372299849987 +Loss at step 300: 0.050539370626211166 +Loss at step 350: 0.05716679245233536 +Loss at step 400: 0.04419241473078728 +Loss at step 450: 0.04776414483785629 +Loss at step 500: 0.05182192474603653 +Loss at step 550: 0.04701581224799156 +Loss at step 600: 0.0810161903500557 +Loss at step 650: 0.04901707172393799 +Loss at step 700: 0.045345958322286606 +Loss at step 750: 0.045655202120542526 +Loss at step 800: 0.07321522384881973 +Loss at step 850: 0.0783211961388588 +Loss at step 900: 0.05769915506243706 +Mean training loss after epoch 18: 0.05276883952319622 + +EPOCH: 19 +Loss at step 0: 0.062434516847133636 +Loss at step 50: 0.04063642770051956 +Loss at step 100: 0.051540955901145935 +Loss at step 150: 0.048084940761327744 +Loss at step 200: 0.07581979781389236 +Loss at step 250: 0.04238951951265335 +Loss at step 300: 0.05079697072505951 +Loss at step 350: 0.06121717020869255 +Loss at step 400: 0.05678032711148262 +Loss at step 450: 0.041780199855566025 +Loss at step 500: 0.06636585295200348 +Loss at step 550: 0.05419893190264702 +Loss at step 600: 0.05819569155573845 +Loss at step 650: 0.049129780381917953 +Loss at step 700: 0.05700291693210602 +Loss at step 750: 0.05222182720899582 +Loss at step 800: 0.03833548724651337 +Loss at step 850: 0.052667684853076935 +Loss at step 900: 0.03975658118724823 +Mean training loss after epoch 19: 0.053116394465030636 + +EPOCH: 20 +Loss at step 0: 0.059991300106048584 +Loss at step 50: 0.05217447131872177 +Loss at step 100: 0.05107884481549263 +Loss at step 150: 0.0668342188000679 +Loss at step 200: 0.046785417944192886 +Loss at step 250: 0.042817097157239914 +Loss at step 300: 0.04819391295313835 +Loss at step 350: 0.06594890356063843 +Loss at step 400: 0.0886029377579689 +Loss at step 450: 0.05893642455339432 +Loss at step 500: 0.04310228303074837 +Loss at step 550: 0.05890632048249245 +Loss at step 600: 0.04422086477279663 +Loss at step 650: 0.04329510033130646 +Loss at step 700: 0.04558916762471199 +Loss at step 750: 0.0437995046377182 +Loss at step 800: 0.04750428721308708 +Loss at step 850: 0.0488579198718071 +Loss at step 900: 0.07720398902893066 +Mean training loss after epoch 20: 0.052237708590178095 + +EPOCH: 21 +Loss at step 0: 0.05259215086698532 +Loss at step 50: 0.04788410663604736 +Loss at step 100: 0.05145316198468208 +Loss at step 150: 0.0593537800014019 +Loss at step 200: 0.047262296080589294 +Loss at step 250: 0.03939272463321686 +Loss at step 300: 0.051646556705236435 +Loss at step 350: 0.0628833919763565 +Loss at step 400: 0.06001042202115059 +Loss at step 450: 0.0456918329000473 +Loss at step 500: 0.042765870690345764 +Loss at step 550: 0.056801993399858475 +Loss at step 600: 0.04275143891572952 +Loss at step 650: 0.04953426495194435 +Loss at step 700: 0.04525814950466156 +Loss at step 750: 0.04910612106323242 +Loss at step 800: 0.04751391336321831 +Loss at step 850: 0.06661234050989151 +Loss at step 900: 0.061157193034887314 +Mean training loss after epoch 21: 0.05161733940434354 + +EPOCH: 22 +Loss at step 0: 0.04405367374420166 +Loss at step 50: 0.05590132996439934 +Loss at step 100: 0.04485447332262993 +Loss at step 150: 0.04281968995928764 +Loss at step 200: 0.043407946825027466 +Loss at step 250: 0.04665181413292885 +Loss at step 300: 0.04848379269242287 +Loss at step 350: 0.07914330065250397 +Loss at step 400: 0.05594984069466591 +Loss at step 450: 0.050728943198919296 +Loss at step 500: 0.06559362262487411 +Loss at step 550: 0.05932296812534332 +Loss at step 600: 0.04063454270362854 +Loss at step 650: 0.06339822709560394 +Loss at step 700: 0.04818549007177353 +Loss at step 750: 0.0414045974612236 +Loss at step 800: 0.048732493072748184 +Loss at step 850: 0.06304115056991577 +Loss at step 900: 0.04507843405008316 +Mean training loss after epoch 22: 0.05192577388129636 + +EPOCH: 23 +Loss at step 0: 0.04445141181349754 +Loss at step 50: 0.04926789551973343 +Loss at step 100: 0.05867208540439606 +Loss at step 150: 0.059803787618875504 +Loss at step 200: 0.04820335656404495 +Loss at step 250: 0.0530536025762558 +Loss at step 300: 0.05899187922477722 +Loss at step 350: 0.044584088027477264 +Loss at step 400: 0.05066234990954399 +Loss at step 450: 0.06244485080242157 +Loss at step 500: 0.04398374632000923 +Loss at step 550: 0.06435897946357727 +Loss at step 600: 0.04494236782193184 +Loss at step 650: 0.0625678151845932 +Loss at step 700: 0.06937924772500992 +Loss at step 750: 0.036715101450681686 +Loss at step 800: 0.04341406747698784 +Loss at step 850: 0.04124848544597626 +Loss at step 900: 0.0456598624587059 +Mean training loss after epoch 23: 0.05132618372533113 + +EPOCH: 24 +Loss at step 0: 0.04877892881631851 +Loss at step 50: 0.05424458533525467 +Loss at step 100: 0.05974975600838661 +Loss at step 150: 0.04766708239912987 +Loss at step 200: 0.045475251972675323 +Loss at step 250: 0.05872789025306702 +Loss at step 300: 0.048234716057777405 +Loss at step 350: 0.045290764421224594 +Loss at step 400: 0.04282333329319954 +Loss at step 450: 0.058333445340394974 +Loss at step 500: 0.06469010561704636 +Loss at step 550: 0.07500461488962173 +Loss at step 600: 0.07830505073070526 +Loss at step 650: 0.06130191311240196 +Loss at step 700: 0.04635435715317726 +Loss at step 750: 0.041943423449993134 +Loss at step 800: 0.04361499845981598 +Loss at step 850: 0.060573626309633255 +Loss at step 900: 0.047398995608091354 +Mean training loss after epoch 24: 0.05160684263242333 + +EPOCH: 25 +Loss at step 0: 0.04251198470592499 +Loss at step 50: 0.05717554688453674 +Loss at step 100: 0.0491350032389164 +Loss at step 150: 0.04539532586932182 +Loss at step 200: 0.03891652449965477 +Loss at step 250: 0.04979914054274559 +Loss at step 300: 0.03949011489748955 +Loss at step 350: 0.05669165030121803 +Loss at step 400: 0.043263647705316544 +Loss at step 450: 0.04343562200665474 +Loss at step 500: 0.04691024497151375 +Loss at step 550: 0.0470493845641613 +Loss at step 600: 0.06308495253324509 +Loss at step 650: 0.04470175877213478 +Loss at step 700: 0.04631304740905762 +Loss at step 750: 0.048905499279499054 +Loss at step 800: 0.06554816663265228 +Loss at step 850: 0.050218984484672546 +Loss at step 900: 0.049364395439624786 +Mean training loss after epoch 25: 0.050819979885271364 + +EPOCH: 26 +Loss at step 0: 0.03715969994664192 +Loss at step 50: 0.06091685965657234 +Loss at step 100: 0.063819520175457 +Loss at step 150: 0.04815630987286568 +Loss at step 200: 0.04438916593790054 +Loss at step 250: 0.059880390763282776 +Loss at step 300: 0.042906735092401505 +Loss at step 350: 0.06107606738805771 +Loss at step 400: 0.04574216157197952 +Loss at step 450: 0.06003245338797569 +Loss at step 500: 0.061320655047893524 +Loss at step 550: 0.060623303055763245 +Loss at step 600: 0.036161016672849655 +Loss at step 650: 0.05289304256439209 +Loss at step 700: 0.0639103427529335 +Loss at step 750: 0.04415803775191307 +Loss at step 800: 0.06678518652915955 +Loss at step 850: 0.04489147290587425 +Loss at step 900: 0.0701601505279541 +Mean training loss after epoch 26: 0.05099492970861987 + +EPOCH: 27 +Loss at step 0: 0.04834058880805969 +Loss at step 50: 0.041343607008457184 +Loss at step 100: 0.03553866595029831 +Loss at step 150: 0.05271811783313751 +Loss at step 200: 0.044921182096004486 +Loss at step 250: 0.058446336537599564 +Loss at step 300: 0.052902523428201675 +Loss at step 350: 0.04670695587992668 +Loss at step 400: 0.048855800181627274 +Loss at step 450: 0.06302586197853088 +Loss at step 500: 0.04730769991874695 +Loss at step 550: 0.048699017614126205 +Loss at step 600: 0.035041410475969315 +Loss at step 650: 0.05921318382024765 +Loss at step 700: 0.0437665656208992 +Loss at step 750: 0.04930931702256203 +Loss at step 800: 0.04900570213794708 +Loss at step 850: 0.05415530502796173 +Loss at step 900: 0.046043891459703445 +Mean training loss after epoch 27: 0.05043758259319674 + +EPOCH: 28 +Loss at step 0: 0.042533595114946365 +Loss at step 50: 0.06361493468284607 +Loss at step 100: 0.06110368296504021 +Loss at step 150: 0.04034252092242241 +Loss at step 200: 0.04347515106201172 +Loss at step 250: 0.045848894864320755 +Loss at step 300: 0.042350199073553085 +Loss at step 350: 0.04258806258440018 +Loss at step 400: 0.04175529628992081 +Loss at step 450: 0.05830493941903114 +Loss at step 500: 0.06097899377346039 +Loss at step 550: 0.03716123104095459 +Loss at step 600: 0.04144633933901787 +Loss at step 650: 0.04639289155602455 +Loss at step 700: 0.06039012223482132 +Loss at step 750: 0.04440205171704292 +Loss at step 800: 0.04231288284063339 +Loss at step 850: 0.04419805482029915 +Loss at step 900: 0.04500934109091759 +Mean training loss after epoch 28: 0.050906813391712685 + +EPOCH: 29 +Loss at step 0: 0.055881500244140625 +Loss at step 50: 0.06231289356946945 +Loss at step 100: 0.04882965236902237 +Loss at step 150: 0.06262312829494476 +Loss at step 200: 0.06828181445598602 +Loss at step 250: 0.058023616671562195 +Loss at step 300: 0.04948877543210983 +Loss at step 350: 0.05341454967856407 +Loss at step 400: 0.04189556464552879 +Loss at step 450: 0.06160283088684082 +Loss at step 500: 0.058573994785547256 +Loss at step 550: 0.048130813986063004 +Loss at step 600: 0.04303271323442459 +Loss at step 650: 0.04687873274087906 +Loss at step 700: 0.04802021011710167 +Loss at step 750: 0.04121152311563492 +Loss at step 800: 0.052675824612379074 +Loss at step 850: 0.03745349124073982 +Loss at step 900: 0.04453900456428528 +Mean training loss after epoch 29: 0.05022250445905144 + +EPOCH: 30 +Loss at step 0: 0.0670885369181633 +Loss at step 50: 0.044897690415382385 +Loss at step 100: 0.050211936235427856 +Loss at step 150: 0.04805487021803856 +Loss at step 200: 0.0658612847328186 +Loss at step 250: 0.04369752109050751 +Loss at step 300: 0.03850596398115158 +Loss at step 350: 0.06756297498941422 +Loss at step 400: 0.06072334200143814 +Loss at step 450: 0.042236171662807465 +Loss at step 500: 0.05478910729289055 +Loss at step 550: 0.04254112392663956 +Loss at step 600: 0.03927816078066826 +Loss at step 650: 0.059359557926654816 +Loss at step 700: 0.050366565585136414 +Loss at step 750: 0.0479729026556015 +Loss at step 800: 0.044043682515621185 +Loss at step 850: 0.05858621746301651 +Loss at step 900: 0.04365117475390434 +Mean training loss after epoch 30: 0.050371239839522824 + +EPOCH: 31 +Loss at step 0: 0.054852455854415894 +Loss at step 50: 0.049651432782411575 +Loss at step 100: 0.048459991812705994 +Loss at step 150: 0.04741988703608513 +Loss at step 200: 0.04980848357081413 +Loss at step 250: 0.04312898591160774 +Loss at step 300: 0.04154994338750839 +Loss at step 350: 0.03851347789168358 +Loss at step 400: 0.04307034984230995 +Loss at step 450: 0.046164583414793015 +Loss at step 500: 0.052177317440509796 +Loss at step 550: 0.04778030514717102 +Loss at step 600: 0.04357085004448891 +Loss at step 650: 0.06223141402006149 +Loss at step 700: 0.05079686641693115 +Loss at step 750: 0.04232007637619972 +Loss at step 800: 0.04990779608488083 +Loss at step 850: 0.06623338162899017 +Loss at step 900: 0.09189185500144958 +Mean training loss after epoch 31: 0.05019639524172491 + +EPOCH: 32 +Loss at step 0: 0.04429066926240921 +Loss at step 50: 0.043835610151290894 +Loss at step 100: 0.0585714727640152 +Loss at step 150: 0.05228110030293465 +Loss at step 200: 0.05839800089597702 +Loss at step 250: 0.054243192076683044 +Loss at step 300: 0.05252903327345848 +Loss at step 350: 0.043195340782403946 +Loss at step 400: 0.04521758109331131 +Loss at step 450: 0.056195490062236786 +Loss at step 500: 0.04714139923453331 +Loss at step 550: 0.04732469096779823 +Loss at step 600: 0.03780977055430412 +Loss at step 650: 0.072181336581707 +Loss at step 700: 0.061914000660181046 +Loss at step 750: 0.041920144110918045 +Loss at step 800: 0.04962042346596718 +Loss at step 850: 0.059754401445388794 +Loss at step 900: 0.04798690974712372 +Mean training loss after epoch 32: 0.05042258716388933 + +EPOCH: 33 +Loss at step 0: 0.0643845871090889 +Loss at step 50: 0.044662799686193466 +Loss at step 100: 0.04836899787187576 +Loss at step 150: 0.04399054870009422 +Loss at step 200: 0.04428299143910408 +Loss at step 250: 0.04246906191110611 +Loss at step 300: 0.04072093963623047 +Loss at step 350: 0.04853418841958046 +Loss at step 400: 0.0455172061920166 +Loss at step 450: 0.0458773635327816 +Loss at step 500: 0.07973543554544449 +Loss at step 550: 0.051296427845954895 +Loss at step 600: 0.045639071613550186 +Loss at step 650: 0.06466232240200043 +Loss at step 700: 0.049812301993370056 +Loss at step 750: 0.05495742708444595 +Loss at step 800: 0.06231904402375221 +Loss at step 850: 0.04908932372927666 +Loss at step 900: 0.042002804577350616 +Mean training loss after epoch 33: 0.05006800416007098 + +EPOCH: 34 +Loss at step 0: 0.0410429984331131 +Loss at step 50: 0.04335249587893486 +Loss at step 100: 0.04271542653441429 +Loss at step 150: 0.04844750091433525 +Loss at step 200: 0.04638352617621422 +Loss at step 250: 0.04805449768900871 +Loss at step 300: 0.06073756143450737 +Loss at step 350: 0.03467216342687607 +Loss at step 400: 0.0450374037027359 +Loss at step 450: 0.05206913873553276 +Loss at step 500: 0.04162357747554779 +Loss at step 550: 0.05870816111564636 +Loss at step 600: 0.03519956395030022 +Loss at step 650: 0.058811575174331665 +Loss at step 700: 0.04412572458386421 +Loss at step 750: 0.0419934019446373 +Loss at step 800: 0.04975217580795288 +Loss at step 850: 0.0574377067387104 +Loss at step 900: 0.03829919919371605 +Mean training loss after epoch 34: 0.0499035893901706 + +EPOCH: 35 +Loss at step 0: 0.044991299510002136 +Loss at step 50: 0.05055626854300499 +Loss at step 100: 0.03677325323224068 +Loss at step 150: 0.037130892276763916 +Loss at step 200: 0.04066538065671921 +Loss at step 250: 0.041030410677194595 +Loss at step 300: 0.04623274877667427 +Loss at step 350: 0.04382021352648735 +Loss at step 400: 0.05231018364429474 +Loss at step 450: 0.05125284940004349 +Loss at step 500: 0.04078878462314606 +Loss at step 550: 0.04617936536669731 +Loss at step 600: 0.05754070729017258 +Loss at step 650: 0.05753948166966438 +Loss at step 700: 0.046890463680028915 +Loss at step 750: 0.04283486306667328 +Loss at step 800: 0.04463444650173187 +Loss at step 850: 0.04075390473008156 +Loss at step 900: 0.05805470049381256 +Mean training loss after epoch 35: 0.04980792412593929 + +EPOCH: 36 +Loss at step 0: 0.06121768057346344 +Loss at step 50: 0.06321048736572266 +Loss at step 100: 0.046906907111406326 +Loss at step 150: 0.04423534497618675 +Loss at step 200: 0.04593561962246895 +Loss at step 250: 0.057250477373600006 +Loss at step 300: 0.053563009947538376 +Loss at step 350: 0.05470506474375725 +Loss at step 400: 0.05173540115356445 +Loss at step 450: 0.05924222618341446 +Loss at step 500: 0.044110074639320374 +Loss at step 550: 0.0416620597243309 +Loss at step 600: 0.053804896771907806 +Loss at step 650: 0.04386766627430916 +Loss at step 700: 0.04909532144665718 +Loss at step 750: 0.053284160792827606 +Loss at step 800: 0.07890906184911728 +Loss at step 850: 0.038951218128204346 +Loss at step 900: 0.04595833271741867 +Mean training loss after epoch 36: 0.049352509785753326 + +EPOCH: 37 +Loss at step 0: 0.04700087383389473 +Loss at step 50: 0.06418884545564651 +Loss at step 100: 0.05069563537836075 +Loss at step 150: 0.0500437468290329 +Loss at step 200: 0.0683513730764389 +Loss at step 250: 0.05381709709763527 +Loss at step 300: 0.04601358249783516 +Loss at step 350: 0.04279237240552902 +Loss at step 400: 0.044277921319007874 +Loss at step 450: 0.04211915656924248 +Loss at step 500: 0.05392518267035484 +Loss at step 550: 0.044013332575559616 +Loss at step 600: 0.04268023371696472 +Loss at step 650: 0.043030720204114914 +Loss at step 700: 0.04672050476074219 +Loss at step 750: 0.05868598818778992 +Loss at step 800: 0.05074331536889076 +Loss at step 850: 0.04076524078845978 +Loss at step 900: 0.048133332282304764 +Mean training loss after epoch 37: 0.04927070460308081 + +EPOCH: 38 +Loss at step 0: 0.043423768132925034 +Loss at step 50: 0.05626245215535164 +Loss at step 100: 0.05275999754667282 +Loss at step 150: 0.03959965333342552 +Loss at step 200: 0.04147305712103844 +Loss at step 250: 0.04808575659990311 +Loss at step 300: 0.04279503598809242 +Loss at step 350: 0.05690730735659599 +Loss at step 400: 0.041516438126564026 +Loss at step 450: 0.04280721768736839 +Loss at step 500: 0.0445721372961998 +Loss at step 550: 0.04869619756937027 +Loss at step 600: 0.04467467963695526 +Loss at step 650: 0.03923617675900459 +Loss at step 700: 0.04328928515315056 +Loss at step 750: 0.06143806129693985 +Loss at step 800: 0.05247138440608978 +Loss at step 850: 0.04985184967517853 +Loss at step 900: 0.05711742863059044 +Mean training loss after epoch 38: 0.049564735294738684 + +EPOCH: 39 +Loss at step 0: 0.052482593804597855 +Loss at step 50: 0.05154209956526756 +Loss at step 100: 0.04020345211029053 +Loss at step 150: 0.05520298331975937 +Loss at step 200: 0.06086026132106781 +Loss at step 250: 0.04489890858530998 +Loss at step 300: 0.04966682568192482 +Loss at step 350: 0.04010385274887085 +Loss at step 400: 0.04077764227986336 +Loss at step 450: 0.0764329582452774 +Loss at step 500: 0.03747834637761116 +Loss at step 550: 0.04236274212598801 +Loss at step 600: 0.05438420921564102 +Loss at step 650: 0.051374856382608414 +Loss at step 700: 0.06341550499200821 +Loss at step 750: 0.039869170635938644 +Loss at step 800: 0.039742011576890945 +Loss at step 850: 0.052026260644197464 +Loss at step 900: 0.04109233245253563 +Mean training loss after epoch 39: 0.04974328056136682 + +EPOCH: 40 +Loss at step 0: 0.06371866166591644 +Loss at step 50: 0.040017832070589066 +Loss at step 100: 0.03901951387524605 +Loss at step 150: 0.041860293596982956 +Loss at step 200: 0.04036795347929001 +Loss at step 250: 0.04513201490044594 +Loss at step 300: 0.04086354374885559 +Loss at step 350: 0.0428619459271431 +Loss at step 400: 0.05866026133298874 +Loss at step 450: 0.04151544347405434 +Loss at step 500: 0.04968944936990738 +Loss at step 550: 0.06488409638404846 +Loss at step 600: 0.034048136323690414 +Loss at step 650: 0.038840699940919876 +Loss at step 700: 0.03685002774000168 +Loss at step 750: 0.0434001199901104 +Loss at step 800: 0.04271155968308449 +Loss at step 850: 0.04367971420288086 +Loss at step 900: 0.05182309076189995 +Mean training loss after epoch 40: 0.049370104506580054 + +EPOCH: 41 +Loss at step 0: 0.04788302630186081 +Loss at step 50: 0.04710985720157623 +Loss at step 100: 0.06721906363964081 +Loss at step 150: 0.06526092439889908 +Loss at step 200: 0.043196044862270355 +Loss at step 250: 0.043683011084795 +Loss at step 300: 0.039374545216560364 +Loss at step 350: 0.04694144427776337 +Loss at step 400: 0.04459935799241066 +Loss at step 450: 0.038564059883356094 +Loss at step 500: 0.03336578235030174 +Loss at step 550: 0.0400950089097023 +Loss at step 600: 0.045206982642412186 +Loss at step 650: 0.06758737564086914 +Loss at step 700: 0.06302134692668915 +Loss at step 750: 0.041137587279081345 +Loss at step 800: 0.044409070163965225 +Loss at step 850: 0.06189911067485809 +Loss at step 900: 0.04958909749984741 +Mean training loss after epoch 41: 0.049163244033196586 + +EPOCH: 42 +Loss at step 0: 0.06178005412220955 +Loss at step 50: 0.047716062515974045 +Loss at step 100: 0.06076199933886528 +Loss at step 150: 0.04326554015278816 +Loss at step 200: 0.05236411467194557 +Loss at step 250: 0.03990398719906807 +Loss at step 300: 0.04151206091046333 +Loss at step 350: 0.04638168588280678 +Loss at step 400: 0.056435633450746536 +Loss at step 450: 0.04092627018690109 +Loss at step 500: 0.0387774296104908 +Loss at step 550: 0.034973982721567154 +Loss at step 600: 0.04130071774125099 +Loss at step 650: 0.04746625944972038 +Loss at step 700: 0.047542594373226166 +Loss at step 750: 0.038054801523685455 +Loss at step 800: 0.05185136944055557 +Loss at step 850: 0.04502439871430397 +Loss at step 900: 0.04021717235445976 +Mean training loss after epoch 42: 0.048367911425512486 + +EPOCH: 43 +Loss at step 0: 0.04168252646923065 +Loss at step 50: 0.04136538878083229 +Loss at step 100: 0.0367027148604393 +Loss at step 150: 0.07566062361001968 +Loss at step 200: 0.044693440198898315 +Loss at step 250: 0.055736199021339417 +Loss at step 300: 0.0643707662820816 +Loss at step 350: 0.06569212675094604 +Loss at step 400: 0.04497396573424339 +Loss at step 450: 0.03956783935427666 +Loss at step 500: 0.04919717088341713 +Loss at step 550: 0.03983253240585327 +Loss at step 600: 0.0422062948346138 +Loss at step 650: 0.04591282829642296 +Loss at step 700: 0.04368511587381363 +Loss at step 750: 0.052725572139024734 +Loss at step 800: 0.05176183208823204 +Loss at step 850: 0.04265700653195381 +Loss at step 900: 0.04052303731441498 +Mean training loss after epoch 43: 0.048530283806992494 + +EPOCH: 44 +Loss at step 0: 0.04437807947397232 +Loss at step 50: 0.03821272775530815 +Loss at step 100: 0.042116209864616394 +Loss at step 150: 0.07781299203634262 +Loss at step 200: 0.04366188496351242 +Loss at step 250: 0.04133245721459389 +Loss at step 300: 0.045699361711740494 +Loss at step 350: 0.05984176695346832 +Loss at step 400: 0.05875586345791817 +Loss at step 450: 0.04496976360678673 +Loss at step 500: 0.05785861238837242 +Loss at step 550: 0.04218743368983269 +Loss at step 600: 0.03997413069009781 +Loss at step 650: 0.044579748064279556 +Loss at step 700: 0.03963901847600937 +Loss at step 750: 0.03772449493408203 +Loss at step 800: 0.04375980421900749 +Loss at step 850: 0.055860940366983414 +Loss at step 900: 0.0456465482711792 +Mean training loss after epoch 44: 0.04919719643223641 + +EPOCH: 45 +Loss at step 0: 0.056695207953453064 +Loss at step 50: 0.041634611785411835 +Loss at step 100: 0.05877414345741272 +Loss at step 150: 0.0455675907433033 +Loss at step 200: 0.04037399962544441 +Loss at step 250: 0.04223078861832619 +Loss at step 300: 0.04242297634482384 +Loss at step 350: 0.04520575702190399 +Loss at step 400: 0.051949094980955124 +Loss at step 450: 0.060411237180233 +Loss at step 500: 0.04765932634472847 +Loss at step 550: 0.04172941669821739 +Loss at step 600: 0.05463941767811775 +Loss at step 650: 0.0545109324157238 +Loss at step 700: 0.04387296736240387 +Loss at step 750: 0.03995988890528679 +Loss at step 800: 0.060580719262361526 +Loss at step 850: 0.045094870030879974 +Loss at step 900: 0.041054558008909225 +Mean training loss after epoch 45: 0.0485664135627528 + +EPOCH: 46 +Loss at step 0: 0.06592614948749542 +Loss at step 50: 0.056329987943172455 +Loss at step 100: 0.038764022290706635 +Loss at step 150: 0.05430060252547264 +Loss at step 200: 0.04260764271020889 +Loss at step 250: 0.0636754184961319 +Loss at step 300: 0.0442098006606102 +Loss at step 350: 0.038463469594717026 +Loss at step 400: 0.07078070938587189 +Loss at step 450: 0.03963498771190643 +Loss at step 500: 0.04048417508602142 +Loss at step 550: 0.04701978340744972 +Loss at step 600: 0.041770223528146744 +Loss at step 650: 0.04414483532309532 +Loss at step 700: 0.041967447847127914 +Loss at step 750: 0.05635293573141098 +Loss at step 800: 0.03649277985095978 +Loss at step 850: 0.045459356158971786 +Loss at step 900: 0.06429880112409592 +Mean training loss after epoch 46: 0.04838154225874303 + +EPOCH: 47 +Loss at step 0: 0.071054607629776 +Loss at step 50: 0.038554903119802475 +Loss at step 100: 0.050750359892845154 +Loss at step 150: 0.04123790189623833 +Loss at step 200: 0.05786864832043648 +Loss at step 250: 0.0756785199046135 +Loss at step 300: 0.03726591914892197 +Loss at step 350: 0.052801862359046936 +Loss at step 400: 0.04964063689112663 +Loss at step 450: 0.06271123141050339 +Loss at step 500: 0.0403226800262928 +Loss at step 550: 0.07195508480072021 +Loss at step 600: 0.059376101940870285 +Loss at step 650: 0.061720505356788635 +Loss at step 700: 0.053682491183280945 +Loss at step 750: 0.04990333318710327 +Loss at step 800: 0.04928659647703171 +Loss at step 850: 0.04075830802321434 +Loss at step 900: 0.036789149045944214 +Mean training loss after epoch 47: 0.04894758796474255 + +EPOCH: 48 +Loss at step 0: 0.04153459146618843 +Loss at step 50: 0.055467404425144196 +Loss at step 100: 0.042366329580545425 +Loss at step 150: 0.05131144821643829 +Loss at step 200: 0.043921127915382385 +Loss at step 250: 0.03675081208348274 +Loss at step 300: 0.04621627926826477 +Loss at step 350: 0.04254616051912308 +Loss at step 400: 0.04048822820186615 +Loss at step 450: 0.03754987567663193 +Loss at step 500: 0.06753776222467422 +Loss at step 550: 0.049380525946617126 +Loss at step 600: 0.04735976830124855 +Loss at step 650: 0.04307052865624428 +Loss at step 700: 0.05318225920200348 +Loss at step 750: 0.04880767688155174 +Loss at step 800: 0.057614535093307495 +Loss at step 850: 0.055632758885622025 +Loss at step 900: 0.0650673657655716 +Mean training loss after epoch 48: 0.04849924578039504 + +EPOCH: 49 +Loss at step 0: 0.04387345537543297 +Loss at step 50: 0.03771733492612839 +Loss at step 100: 0.03908662497997284 +Loss at step 150: 0.06957180798053741 +Loss at step 200: 0.04146793484687805 +Loss at step 250: 0.059470757842063904 +Loss at step 300: 0.06440120190382004 +Loss at step 350: 0.057556189596652985 +Loss at step 400: 0.0584796667098999 +Loss at step 450: 0.0389089472591877 +Loss at step 500: 0.05996524915099144 +Loss at step 550: 0.0584297850728035 +Loss at step 600: 0.05103912949562073 +Loss at step 650: 0.04569408670067787 +Loss at step 700: 0.05261000618338585 +Loss at step 750: 0.05376506224274635 +Loss at step 800: 0.04005511477589607 +Loss at step 850: 0.04126730561256409 +Loss at step 900: 0.04470016062259674 +Mean training loss after epoch 49: 0.04814462217170674 + +EPOCH: 50 +Loss at step 0: 0.046654462814331055 +Loss at step 50: 0.04599568620324135 +Loss at step 100: 0.04337119311094284 +Loss at step 150: 0.06170908734202385 +Loss at step 200: 0.04295524209737778 +Loss at step 250: 0.05097200721502304 +Loss at step 300: 0.0575161837041378 +Loss at step 350: 0.04440409690141678 +Loss at step 400: 0.03948948159813881 +Loss at step 450: 0.0466308631002903 +Loss at step 500: 0.05660380423069 +Loss at step 550: 0.0514553003013134 +Loss at step 600: 0.041717059910297394 +Loss at step 650: 0.0479588583111763 +Loss at step 700: 0.03928200900554657 +Loss at step 750: 0.0435732863843441 +Loss at step 800: 0.04843609407544136 +Loss at step 850: 0.0392707958817482 +Loss at step 900: 0.040503840893507004 +Mean training loss after epoch 50: 0.04808060409846718 + +EPOCH: 51 +Loss at step 0: 0.046570416539907455 +Loss at step 50: 0.03836369514465332 +Loss at step 100: 0.03985976055264473 +Loss at step 150: 0.054616279900074005 +Loss at step 200: 0.046012330800294876 +Loss at step 250: 0.038674358278512955 +Loss at step 300: 0.0401156023144722 +Loss at step 350: 0.07588022202253342 +Loss at step 400: 0.04220349341630936 +Loss at step 450: 0.0421881377696991 +Loss at step 500: 0.037213124334812164 +Loss at step 550: 0.03510317578911781 +Loss at step 600: 0.0416124053299427 +Loss at step 650: 0.04913988709449768 +Loss at step 700: 0.040654223412275314 +Loss at step 750: 0.04730251431465149 +Loss at step 800: 0.04148053377866745 +Loss at step 850: 0.06634870916604996 +Loss at step 900: 0.0449465736746788 +Mean training loss after epoch 51: 0.048010213562706384 + +EPOCH: 52 +Loss at step 0: 0.039007481187582016 +Loss at step 50: 0.04762705788016319 +Loss at step 100: 0.04501299932599068 +Loss at step 150: 0.07696259766817093 +Loss at step 200: 0.036894191056489944 +Loss at step 250: 0.05176336318254471 +Loss at step 300: 0.07270673662424088 +Loss at step 350: 0.03624795749783516 +Loss at step 400: 0.04916340485215187 +Loss at step 450: 0.055940959602594376 +Loss at step 500: 0.04801143705844879 +Loss at step 550: 0.04896067455410957 +Loss at step 600: 0.043537288904190063 +Loss at step 650: 0.058036286383867264 +Loss at step 700: 0.037920866161584854 +Loss at step 750: 0.04269323870539665 +Loss at step 800: 0.05025801807641983 +Loss at step 850: 0.037121109664440155 +Loss at step 900: 0.05860688537359238 +Mean training loss after epoch 52: 0.04807030894894844 + +EPOCH: 53 +Loss at step 0: 0.048174209892749786 +Loss at step 50: 0.04657326638698578 +Loss at step 100: 0.054728906601667404 +Loss at step 150: 0.0501372404396534 +Loss at step 200: 0.047292061150074005 +Loss at step 250: 0.04920757934451103 +Loss at step 300: 0.060553137212991714 +Loss at step 350: 0.04657771438360214 +Loss at step 400: 0.041407495737075806 +Loss at step 450: 0.04021656513214111 +Loss at step 500: 0.056851327419281006 +Loss at step 550: 0.05130401998758316 +Loss at step 600: 0.06028762459754944 +Loss at step 650: 0.04113583639264107 +Loss at step 700: 0.04342133179306984 +Loss at step 750: 0.041321054100990295 +Loss at step 800: 0.06136662885546684 +Loss at step 850: 0.04680129513144493 +Loss at step 900: 0.05718997120857239 +Mean training loss after epoch 53: 0.048173754466081985 + +EPOCH: 54 +Loss at step 0: 0.047833774238824844 +Loss at step 50: 0.04050271585583687 +Loss at step 100: 0.044445689767599106 +Loss at step 150: 0.05799761041998863 +Loss at step 200: 0.039338573813438416 +Loss at step 250: 0.05313317850232124 +Loss at step 300: 0.03940863907337189 +Loss at step 350: 0.05045534670352936 +Loss at step 400: 0.04396917670965195 +Loss at step 450: 0.04656389355659485 +Loss at step 500: 0.039620548486709595 +Loss at step 550: 0.054363951086997986 +Loss at step 600: 0.040903858840465546 +Loss at step 650: 0.04502870887517929 +Loss at step 700: 0.03964342176914215 +Loss at step 750: 0.04582899436354637 +Loss at step 800: 0.0710272490978241 +Loss at step 850: 0.04358744993805885 +Loss at step 900: 0.04310936853289604 +Mean training loss after epoch 54: 0.04815446674776103 + +EPOCH: 55 +Loss at step 0: 0.051317248493433 +Loss at step 50: 0.04444019868969917 +Loss at step 100: 0.03839680179953575 +Loss at step 150: 0.0455445758998394 +Loss at step 200: 0.04904934763908386 +Loss at step 250: 0.03952544555068016 +Loss at step 300: 0.04834434762597084 +Loss at step 350: 0.05323101580142975 +Loss at step 400: 0.039261650294065475 +Loss at step 450: 0.05736960470676422 +Loss at step 500: 0.04086262732744217 +Loss at step 550: 0.04510224983096123 +Loss at step 600: 0.03801380842924118 +Loss at step 650: 0.0654502809047699 +Loss at step 700: 0.04227443039417267 +Loss at step 750: 0.04313640296459198 +Loss at step 800: 0.04186270758509636 +Loss at step 850: 0.03945465013384819 +Loss at step 900: 0.044514819979667664 +Mean training loss after epoch 55: 0.048105461034439266 + +EPOCH: 56 +Loss at step 0: 0.043589457869529724 +Loss at step 50: 0.05869336798787117 +Loss at step 100: 0.042453307658433914 +Loss at step 150: 0.03535155579447746 +Loss at step 200: 0.04363277927041054 +Loss at step 250: 0.0467090979218483 +Loss at step 300: 0.045258987694978714 +Loss at step 350: 0.06672685593366623 +Loss at step 400: 0.046818021684885025 +Loss at step 450: 0.048656366765499115 +Loss at step 500: 0.03977872431278229 +Loss at step 550: 0.04043937101960182 +Loss at step 600: 0.03763727843761444 +Loss at step 650: 0.057002998888492584 +Loss at step 700: 0.048450011759996414 +Loss at step 750: 0.05234970897436142 +Loss at step 800: 0.03915264457464218 +Loss at step 850: 0.04743446037173271 +Loss at step 900: 0.050388772040605545 +Mean training loss after epoch 56: 0.048076416284386025 + +EPOCH: 57 +Loss at step 0: 0.042722828686237335 +Loss at step 50: 0.03941310942173004 +Loss at step 100: 0.041868336498737335 +Loss at step 150: 0.06520471721887589 +Loss at step 200: 0.05975699797272682 +Loss at step 250: 0.0423443466424942 +Loss at step 300: 0.03480306267738342 +Loss at step 350: 0.05211271345615387 +Loss at step 400: 0.04373591020703316 +Loss at step 450: 0.04422935098409653 +Loss at step 500: 0.040033187717199326 +Loss at step 550: 0.03651152551174164 +Loss at step 600: 0.045828089118003845 +Loss at step 650: 0.04409458115696907 +Loss at step 700: 0.04460090398788452 +Loss at step 750: 0.05023349076509476 +Loss at step 800: 0.039303459227085114 +Loss at step 850: 0.05849732086062431 +Loss at step 900: 0.044465143233537674 +Mean training loss after epoch 57: 0.04758283211144684 + +EPOCH: 58 +Loss at step 0: 0.07793443650007248 +Loss at step 50: 0.0465412437915802 +Loss at step 100: 0.039574287831783295 +Loss at step 150: 0.04919656738638878 +Loss at step 200: 0.04616691917181015 +Loss at step 250: 0.03959481045603752 +Loss at step 300: 0.03907226771116257 +Loss at step 350: 0.0577220618724823 +Loss at step 400: 0.03834746032953262 +Loss at step 450: 0.03915717452764511 +Loss at step 500: 0.05389558896422386 +Loss at step 550: 0.04508379101753235 +Loss at step 600: 0.06411364674568176 +Loss at step 650: 0.04608369246125221 +Loss at step 700: 0.04650292918086052 +Loss at step 750: 0.047246210277080536 +Loss at step 800: 0.0373479425907135 +Loss at step 850: 0.061928194016218185 +Loss at step 900: 0.042106423527002335 +Mean training loss after epoch 58: 0.047664002310842085 + +EPOCH: 59 +Loss at step 0: 0.03607901185750961 +Loss at step 50: 0.04769814759492874 +Loss at step 100: 0.05873991549015045 +Loss at step 150: 0.04440714418888092 +Loss at step 200: 0.04103497043251991 +Loss at step 250: 0.05442721024155617 +Loss at step 300: 0.04037656635046005 +Loss at step 350: 0.04745452105998993 +Loss at step 400: 0.0433388315141201 +Loss at step 450: 0.04134483262896538 +Loss at step 500: 0.06291520595550537 +Loss at step 550: 0.047317299991846085 +Loss at step 600: 0.0400472953915596 +Loss at step 650: 0.0391976572573185 +Loss at step 700: 0.04422984644770622 +Loss at step 750: 0.03773192688822746 +Loss at step 800: 0.03493031859397888 +Loss at step 850: 0.04502164572477341 +Loss at step 900: 0.03844921663403511 +Mean training loss after epoch 59: 0.04732135745849627 + +EPOCH: 60 +Loss at step 0: 0.04118996113538742 +Loss at step 50: 0.04305626451969147 +Loss at step 100: 0.04878903180360794 +Loss at step 150: 0.04232143610715866 +Loss at step 200: 0.04509598761796951 +Loss at step 250: 0.04246195778250694 +Loss at step 300: 0.044587139040231705 +Loss at step 350: 0.03914811089634895 +Loss at step 400: 0.046557433903217316 +Loss at step 450: 0.044400572776794434 +Loss at step 500: 0.04035966098308563 +Loss at step 550: 0.0393017940223217 +Loss at step 600: 0.044563695788383484 +Loss at step 650: 0.07715646177530289 +Loss at step 700: 0.04785558581352234 +Loss at step 750: 0.055963966995477676 +Loss at step 800: 0.04714891314506531 +Loss at step 850: 0.0540463924407959 +Loss at step 900: 0.060808874666690826 +Mean training loss after epoch 60: 0.047633435565239586 + +EPOCH: 61 +Loss at step 0: 0.04512996971607208 +Loss at step 50: 0.047515619546175 +Loss at step 100: 0.04961966350674629 +Loss at step 150: 0.0491953045129776 +Loss at step 200: 0.04204387962818146 +Loss at step 250: 0.04525350406765938 +Loss at step 300: 0.04634835198521614 +Loss at step 350: 0.03779623284935951 +Loss at step 400: 0.04411294311285019 +Loss at step 450: 0.036454617977142334 +Loss at step 500: 0.044686879962682724 +Loss at step 550: 0.062407054007053375 +Loss at step 600: 0.05763769894838333 +Loss at step 650: 0.04948614165186882 +Loss at step 700: 0.05643412470817566 +Loss at step 750: 0.03737170621752739 +Loss at step 800: 0.04201466217637062 +Loss at step 850: 0.05516848340630531 +Loss at step 900: 0.03836461529135704 +Mean training loss after epoch 61: 0.04757110587855392 + +EPOCH: 62 +Loss at step 0: 0.03898627310991287 +Loss at step 50: 0.06902450323104858 +Loss at step 100: 0.0726465955376625 +Loss at step 150: 0.04058443754911423 +Loss at step 200: 0.04343476891517639 +Loss at step 250: 0.0432150699198246 +Loss at step 300: 0.054757602512836456 +Loss at step 350: 0.04615873098373413 +Loss at step 400: 0.03766966238617897 +Loss at step 450: 0.06961634755134583 +Loss at step 500: 0.04850998520851135 +Loss at step 550: 0.05906073749065399 +Loss at step 600: 0.05508122220635414 +Loss at step 650: 0.039961911737918854 +Loss at step 700: 0.04223618283867836 +Loss at step 750: 0.04493911191821098 +Loss at step 800: 0.03980834409594536 +Loss at step 850: 0.038753628730773926 +Loss at step 900: 0.04183216392993927 +Mean training loss after epoch 62: 0.04773911904853417 + +EPOCH: 63 +Loss at step 0: 0.03987065702676773 +Loss at step 50: 0.038716014474630356 +Loss at step 100: 0.04608650505542755 +Loss at step 150: 0.0546819232404232 +Loss at step 200: 0.047643981873989105 +Loss at step 250: 0.04000033438205719 +Loss at step 300: 0.04256588593125343 +Loss at step 350: 0.04783113673329353 +Loss at step 400: 0.04281673580408096 +Loss at step 450: 0.04405758157372475 +Loss at step 500: 0.04527699947357178 +Loss at step 550: 0.037707142531871796 +Loss at step 600: 0.041260574012994766 +Loss at step 650: 0.057914432138204575 +Loss at step 700: 0.043336961418390274 +Loss at step 750: 0.05859449878334999 +Loss at step 800: 0.05876313894987106 +Loss at step 850: 0.055046480149030685 +Loss at step 900: 0.04020686075091362 +Mean training loss after epoch 63: 0.04782804918251058 + +EPOCH: 64 +Loss at step 0: 0.045894671231508255 +Loss at step 50: 0.04077927768230438 +Loss at step 100: 0.038366664201021194 +Loss at step 150: 0.039174336940050125 +Loss at step 200: 0.040383230894804 +Loss at step 250: 0.05649757385253906 +Loss at step 300: 0.04329577088356018 +Loss at step 350: 0.05102843418717384 +Loss at step 400: 0.038274023681879044 +Loss at step 450: 0.055590637028217316 +Loss at step 500: 0.05023416504263878 +Loss at step 550: 0.04705743119120598 +Loss at step 600: 0.045185744762420654 +Loss at step 650: 0.044194016605615616 +Loss at step 700: 0.0459500290453434 +Loss at step 750: 0.06974977254867554 +Loss at step 800: 0.04306946322321892 +Loss at step 850: 0.04421523958444595 +Loss at step 900: 0.03792290389537811 +Mean training loss after epoch 64: 0.04694063862217769 + +EPOCH: 65 +Loss at step 0: 0.042738817632198334 +Loss at step 50: 0.04411863535642624 +Loss at step 100: 0.053950633853673935 +Loss at step 150: 0.041000086814165115 +Loss at step 200: 0.043539103120565414 +Loss at step 250: 0.05965327471494675 +Loss at step 300: 0.03150484338402748 +Loss at step 350: 0.05821863189339638 +Loss at step 400: 0.04638699069619179 +Loss at step 450: 0.034982286393642426 +Loss at step 500: 0.04657517373561859 +Loss at step 550: 0.046452607959508896 +Loss at step 600: 0.0387742705643177 +Loss at step 650: 0.03545985743403435 +Loss at step 700: 0.06301256269216537 +Loss at step 750: 0.058682553470134735 +Loss at step 800: 0.042928650975227356 +Loss at step 850: 0.07728426158428192 +Loss at step 900: 0.03938567638397217 +Mean training loss after epoch 65: 0.047827781781728966 + +EPOCH: 66 +Loss at step 0: 0.0570816695690155 +Loss at step 50: 0.04733069986104965 +Loss at step 100: 0.04297904670238495 +Loss at step 150: 0.04814530164003372 +Loss at step 200: 0.046018172055482864 +Loss at step 250: 0.04306814447045326 +Loss at step 300: 0.06010618805885315 +Loss at step 350: 0.04113643243908882 +Loss at step 400: 0.044291891157627106 +Loss at step 450: 0.055407844483852386 +Loss at step 500: 0.04783373326063156 +Loss at step 550: 0.037621911615133286 +Loss at step 600: 0.04562778398394585 +Loss at step 650: 0.05813739448785782 +Loss at step 700: 0.04404817149043083 +Loss at step 750: 0.09041885286569595 +Loss at step 800: 0.04984774813055992 +Loss at step 850: 0.07523173838853836 +Loss at step 900: 0.04097443073987961 +Mean training loss after epoch 66: 0.04752154274980651 + +EPOCH: 67 +Loss at step 0: 0.04143616557121277 +Loss at step 50: 0.08796269446611404 +Loss at step 100: 0.054262831807136536 +Loss at step 150: 0.04591996222734451 +Loss at step 200: 0.04350891336798668 +Loss at step 250: 0.042350586503744125 +Loss at step 300: 0.03840792924165726 +Loss at step 350: 0.07431153953075409 +Loss at step 400: 0.05722375586628914 +Loss at step 450: 0.05687207356095314 +Loss at step 500: 0.03768659010529518 +Loss at step 550: 0.050057314336299896 +Loss at step 600: 0.07760375738143921 +Loss at step 650: 0.04933237284421921 +Loss at step 700: 0.03790581226348877 +Loss at step 750: 0.04498936980962753 +Loss at step 800: 0.03740730881690979 +Loss at step 850: 0.039160117506980896 +Loss at step 900: 0.0413285456597805 +Mean training loss after epoch 67: 0.0473792935088118 + +EPOCH: 68 +Loss at step 0: 0.04615543410181999 +Loss at step 50: 0.05738696828484535 +Loss at step 100: 0.03822232410311699 +Loss at step 150: 0.058881159871816635 +Loss at step 200: 0.04019222408533096 +Loss at step 250: 0.05622420087456703 +Loss at step 300: 0.04398506507277489 +Loss at step 350: 0.053851667791604996 +Loss at step 400: 0.03941120207309723 +Loss at step 450: 0.03599734976887703 +Loss at step 500: 0.03791561722755432 +Loss at step 550: 0.07174922525882721 +Loss at step 600: 0.05973157286643982 +Loss at step 650: 0.03945891559123993 +Loss at step 700: 0.04410084709525108 +Loss at step 750: 0.059830255806446075 +Loss at step 800: 0.07018953561782837 +Loss at step 850: 0.041691336780786514 +Loss at step 900: 0.040409721434116364 +Mean training loss after epoch 68: 0.04741711242954487 + +EPOCH: 69 +Loss at step 0: 0.06822960823774338 +Loss at step 50: 0.03677573427557945 +Loss at step 100: 0.047872308641672134 +Loss at step 150: 0.040340084582567215 +Loss at step 200: 0.05889175087213516 +Loss at step 250: 0.03868604451417923 +Loss at step 300: 0.04078621789813042 +Loss at step 350: 0.07734039425849915 +Loss at step 400: 0.0581878200173378 +Loss at step 450: 0.053424540907144547 +Loss at step 500: 0.03666060417890549 +Loss at step 550: 0.04429972916841507 +Loss at step 600: 0.06112226843833923 +Loss at step 650: 0.04150120168924332 +Loss at step 700: 0.04775947704911232 +Loss at step 750: 0.05095682293176651 +Loss at step 800: 0.04396393895149231 +Loss at step 850: 0.056271474808454514 +Loss at step 900: 0.048048894852399826 +Mean training loss after epoch 69: 0.04758670814295631 + +EPOCH: 70 +Loss at step 0: 0.05127924308180809 +Loss at step 50: 0.03587474673986435 +Loss at step 100: 0.04129428043961525 +Loss at step 150: 0.053819216787815094 +Loss at step 200: 0.03955303505063057 +Loss at step 250: 0.04259289428591728 +Loss at step 300: 0.04341386258602142 +Loss at step 350: 0.04002078250050545 +Loss at step 400: 0.04395505413413048 +Loss at step 450: 0.04200160503387451 +Loss at step 500: 0.08032595366239548 +Loss at step 550: 0.05446016415953636 +Loss at step 600: 0.07261621206998825 +Loss at step 650: 0.04178087040781975 +Loss at step 700: 0.038220349699258804 +Loss at step 750: 0.03684394806623459 +Loss at step 800: 0.04394752159714699 +Loss at step 850: 0.05843261629343033 +Loss at step 900: 0.07759283483028412 +Mean training loss after epoch 70: 0.04754900262315771 + +EPOCH: 71 +Loss at step 0: 0.050888143479824066 +Loss at step 50: 0.041283752769231796 +Loss at step 100: 0.04285949096083641 +Loss at step 150: 0.0846635028719902 +Loss at step 200: 0.04025402292609215 +Loss at step 250: 0.03653615713119507 +Loss at step 300: 0.03696757182478905 +Loss at step 350: 0.04051235690712929 +Loss at step 400: 0.043806347995996475 +Loss at step 450: 0.053630709648132324 +Loss at step 500: 0.05728735774755478 +Loss at step 550: 0.06122060492634773 +Loss at step 600: 0.041555218398571014 +Loss at step 650: 0.037492621690034866 +Loss at step 700: 0.04742196202278137 +Loss at step 750: 0.06283852458000183 +Loss at step 800: 0.03955661132931709 +Loss at step 850: 0.07465499639511108 +Loss at step 900: 0.054073721170425415 +Mean training loss after epoch 71: 0.04743520222874338 + +EPOCH: 72 +Loss at step 0: 0.041515085846185684 +Loss at step 50: 0.04594232514500618 +Loss at step 100: 0.06486237794160843 +Loss at step 150: 0.05722973495721817 +Loss at step 200: 0.040343545377254486 +Loss at step 250: 0.035639747977256775 +Loss at step 300: 0.04347945749759674 +Loss at step 350: 0.041574690490961075 +Loss at step 400: 0.05635509639978409 +Loss at step 450: 0.06067449599504471 +Loss at step 500: 0.03956625238060951 +Loss at step 550: 0.038249675184488297 +Loss at step 600: 0.040968749672174454 +Loss at step 650: 0.05996912345290184 +Loss at step 700: 0.04712619632482529 +Loss at step 750: 0.04036037251353264 +Loss at step 800: 0.0441366583108902 +Loss at step 850: 0.045532695949077606 +Loss at step 900: 0.04408133774995804 +Mean training loss after epoch 72: 0.046714516845084965 + +EPOCH: 73 +Loss at step 0: 0.04482561722397804 +Loss at step 50: 0.052709031850099564 +Loss at step 100: 0.05423188582062721 +Loss at step 150: 0.03632858768105507 +Loss at step 200: 0.038407932966947556 +Loss at step 250: 0.04293006286025047 +Loss at step 300: 0.07024706155061722 +Loss at step 350: 0.046826377511024475 +Loss at step 400: 0.05834141746163368 +Loss at step 450: 0.045909322798252106 +Loss at step 500: 0.042169976979494095 +Loss at step 550: 0.04086346551775932 +Loss at step 600: 0.03868945315480232 +Loss at step 650: 0.04697159305214882 +Loss at step 700: 0.04510665312409401 +Loss at step 750: 0.04176634922623634 +Loss at step 800: 0.04598810523748398 +Loss at step 850: 0.04379308223724365 +Loss at step 900: 0.04705145210027695 +Mean training loss after epoch 73: 0.04697709750614441 + +EPOCH: 74 +Loss at step 0: 0.048745185136795044 +Loss at step 50: 0.04157671704888344 +Loss at step 100: 0.03920102119445801 +Loss at step 150: 0.04256703332066536 +Loss at step 200: 0.05496145784854889 +Loss at step 250: 0.04215114936232567 +Loss at step 300: 0.061007458716630936 +Loss at step 350: 0.054140470921993256 +Loss at step 400: 0.05783966928720474 +Loss at step 450: 0.04721471294760704 +Loss at step 500: 0.06627503037452698 +Loss at step 550: 0.06257064640522003 +Loss at step 600: 0.07903435826301575 +Loss at step 650: 0.04348352923989296 +Loss at step 700: 0.03569105640053749 +Loss at step 750: 0.0532129742205143 +Loss at step 800: 0.04187948256731033 +Loss at step 850: 0.045226309448480606 +Loss at step 900: 0.050558362156152725 +Mean training loss after epoch 74: 0.0471976174633386 + +EPOCH: 75 +Loss at step 0: 0.040759578347206116 +Loss at step 50: 0.04871371388435364 +Loss at step 100: 0.04250158369541168 +Loss at step 150: 0.043691352009773254 +Loss at step 200: 0.03760522976517677 +Loss at step 250: 0.05975149944424629 +Loss at step 300: 0.04176358878612518 +Loss at step 350: 0.05473127216100693 +Loss at step 400: 0.06540390104055405 +Loss at step 450: 0.05255338177084923 +Loss at step 500: 0.04147755727171898 +Loss at step 550: 0.04068160429596901 +Loss at step 600: 0.06024593114852905 +Loss at step 650: 0.04236413910984993 +Loss at step 700: 0.0397140197455883 +Loss at step 750: 0.05809806287288666 +Loss at step 800: 0.04391433298587799 +Loss at step 850: 0.03449312970042229 +Loss at step 900: 0.0414324514567852 +Mean training loss after epoch 75: 0.04708102619104675 + +EPOCH: 76 +Loss at step 0: 0.0410638153553009 +Loss at step 50: 0.036824703216552734 +Loss at step 100: 0.06616359204053879 +Loss at step 150: 0.041972700506448746 +Loss at step 200: 0.0848299115896225 +Loss at step 250: 0.03932303562760353 +Loss at step 300: 0.040949221700429916 +Loss at step 350: 0.041197892278432846 +Loss at step 400: 0.03768625855445862 +Loss at step 450: 0.039091046899557114 +Loss at step 500: 0.03969506546854973 +Loss at step 550: 0.039847347885370255 +Loss at step 600: 0.040176793932914734 +Loss at step 650: 0.03941885754466057 +Loss at step 700: 0.05517857149243355 +Loss at step 750: 0.03895547240972519 +Loss at step 800: 0.041440192610025406 +Loss at step 850: 0.04371171072125435 +Loss at step 900: 0.04424459487199783 +Mean training loss after epoch 76: 0.046886064755573456 + +EPOCH: 77 +Loss at step 0: 0.05327722802758217 +Loss at step 50: 0.047982797026634216 +Loss at step 100: 0.047688763588666916 +Loss at step 150: 0.0443866103887558 +Loss at step 200: 0.046429701149463654 +Loss at step 250: 0.040853723883628845 +Loss at step 300: 0.05173882469534874 +Loss at step 350: 0.03788483142852783 +Loss at step 400: 0.040624164044857025 +Loss at step 450: 0.047464050352573395 +Loss at step 500: 0.040677011013031006 +Loss at step 550: 0.056968025863170624 +Loss at step 600: 0.03436114266514778 +Loss at step 650: 0.03963804617524147 +Loss at step 700: 0.03771521896123886 +Loss at step 750: 0.04085657000541687 +Loss at step 800: 0.038767196238040924 +Loss at step 850: 0.05170619487762451 +Loss at step 900: 0.04272765293717384 +Mean training loss after epoch 77: 0.04667229726235432 + +EPOCH: 78 +Loss at step 0: 0.04097730293869972 +Loss at step 50: 0.04055343195796013 +Loss at step 100: 0.033500295132398605 +Loss at step 150: 0.0574973039329052 +Loss at step 200: 0.04137289524078369 +Loss at step 250: 0.05968521907925606 +Loss at step 300: 0.0517006479203701 +Loss at step 350: 0.04575495794415474 +Loss at step 400: 0.038898590952157974 +Loss at step 450: 0.04337453842163086 +Loss at step 500: 0.04327523335814476 +Loss at step 550: 0.04569193720817566 +Loss at step 600: 0.045522697269916534 +Loss at step 650: 0.04721605032682419 +Loss at step 700: 0.04273367300629616 +Loss at step 750: 0.057780031114816666 +Loss at step 800: 0.04293037950992584 +Loss at step 850: 0.04057444632053375 +Loss at step 900: 0.055183615535497665 +Mean training loss after epoch 78: 0.047239946122013175 + +EPOCH: 79 +Loss at step 0: 0.04912033677101135 +Loss at step 50: 0.06817284226417542 +Loss at step 100: 0.043235428631305695 +Loss at step 150: 0.042690206319093704 +Loss at step 200: 0.050953786820173264 +Loss at step 250: 0.0426013320684433 +Loss at step 300: 0.038686323910951614 +Loss at step 350: 0.060366131365299225 +Loss at step 400: 0.05650699883699417 +Loss at step 450: 0.04733030125498772 +Loss at step 500: 0.060853373259305954 +Loss at step 550: 0.0391446128487587 +Loss at step 600: 0.05376390367746353 +Loss at step 650: 0.04451599717140198 +Loss at step 700: 0.04333176836371422 +Loss at step 750: 0.041507039219141006 +Loss at step 800: 0.036948103457689285 +Loss at step 850: 0.040453240275382996 +Loss at step 900: 0.036246802657842636 +Mean training loss after epoch 79: 0.04713592149897107 + +EPOCH: 80 +Loss at step 0: 0.05095755681395531 +Loss at step 50: 0.04211694747209549 +Loss at step 100: 0.04212859645485878 +Loss at step 150: 0.03907565772533417 +Loss at step 200: 0.04415880888700485 +Loss at step 250: 0.043855030089616776 +Loss at step 300: 0.042434077709913254 +Loss at step 350: 0.05637623369693756 +Loss at step 400: 0.04558110237121582 +Loss at step 450: 0.050731852650642395 +Loss at step 500: 0.05155442655086517 +Loss at step 550: 0.04046908766031265 +Loss at step 600: 0.06025182455778122 +Loss at step 650: 0.05209634453058243 +Loss at step 700: 0.043861255049705505 +Loss at step 750: 0.04493732750415802 +Loss at step 800: 0.03920512646436691 +Loss at step 850: 0.06101406738162041 +Loss at step 900: 0.04162540286779404 +Mean training loss after epoch 80: 0.047833663425338804 + +EPOCH: 81 +Loss at step 0: 0.03846841678023338 +Loss at step 50: 0.03800700604915619 +Loss at step 100: 0.0367264449596405 +Loss at step 150: 0.04300251230597496 +Loss at step 200: 0.03383096680045128 +Loss at step 250: 0.042511820793151855 +Loss at step 300: 0.045681387186050415 +Loss at step 350: 0.03893804922699928 +Loss at step 400: 0.05360741913318634 +Loss at step 450: 0.03366854041814804 +Loss at step 500: 0.043903399258852005 +Loss at step 550: 0.04172151908278465 +Loss at step 600: 0.042890891432762146 +Loss at step 650: 0.04399767145514488 +Loss at step 700: 0.04112936556339264 +Loss at step 750: 0.043750178068876266 +Loss at step 800: 0.03798866271972656 +Loss at step 850: 0.044352538883686066 +Loss at step 900: 0.04746660962700844 +Mean training loss after epoch 81: 0.04701745503349726 + +EPOCH: 82 +Loss at step 0: 0.03662620484828949 +Loss at step 50: 0.04461151733994484 +Loss at step 100: 0.07133741676807404 +Loss at step 150: 0.055357009172439575 +Loss at step 200: 0.04047105088829994 +Loss at step 250: 0.04308281093835831 +Loss at step 300: 0.0494520403444767 +Loss at step 350: 0.039743274450302124 +Loss at step 400: 0.036175236105918884 +Loss at step 450: 0.03906268626451492 +Loss at step 500: 0.07653811573982239 +Loss at step 550: 0.03863096237182617 +Loss at step 600: 0.05767972022294998 +Loss at step 650: 0.03567032516002655 +Loss at step 700: 0.038375139236450195 +Loss at step 750: 0.038932915776968 +Loss at step 800: 0.04408343508839607 +Loss at step 850: 0.06424372643232346 +Loss at step 900: 0.04954082891345024 +Mean training loss after epoch 82: 0.04718503172296896 + +EPOCH: 83 +Loss at step 0: 0.0524853877723217 +Loss at step 50: 0.04582804813981056 +Loss at step 100: 0.04933055862784386 +Loss at step 150: 0.05652166157960892 +Loss at step 200: 0.07147083431482315 +Loss at step 250: 0.04865981638431549 +Loss at step 300: 0.041352517902851105 +Loss at step 350: 0.04174588620662689 +Loss at step 400: 0.0558997206389904 +Loss at step 450: 0.04223625734448433 +Loss at step 500: 0.04813778027892113 +Loss at step 550: 0.07026468962430954 +Loss at step 600: 0.040337368845939636 +Loss at step 650: 0.07219982147216797 +Loss at step 700: 0.0650184154510498 +Loss at step 750: 0.046563148498535156 +Loss at step 800: 0.042573802173137665 +Loss at step 850: 0.04335649684071541 +Loss at step 900: 0.04042411595582962 +Mean training loss after epoch 83: 0.04719024992534029 + +EPOCH: 84 +Loss at step 0: 0.04806012287735939 +Loss at step 50: 0.04488600417971611 +Loss at step 100: 0.037725940346717834 +Loss at step 150: 0.04491431266069412 +Loss at step 200: 0.03242922201752663 +Loss at step 250: 0.04460249841213226 +Loss at step 300: 0.04300040006637573 +Loss at step 350: 0.04044809192419052 +Loss at step 400: 0.04025943949818611 +Loss at step 450: 0.0418863482773304 +Loss at step 500: 0.05103612318634987 +Loss at step 550: 0.045727066695690155 +Loss at step 600: 0.04193394258618355 +Loss at step 650: 0.05514995753765106 +Loss at step 700: 0.04191268980503082 +Loss at step 750: 0.044210828840732574 +Loss at step 800: 0.04739311337471008 +Loss at step 850: 0.03731629624962807 +Loss at step 900: 0.03570638224482536 +Mean training loss after epoch 84: 0.046592896264086144 + +EPOCH: 85 +Loss at step 0: 0.06211300194263458 +Loss at step 50: 0.04774096608161926 +Loss at step 100: 0.04384761303663254 +Loss at step 150: 0.04292440414428711 +Loss at step 200: 0.04449387267231941 +Loss at step 250: 0.05317692086100578 +Loss at step 300: 0.04303763806819916 +Loss at step 350: 0.05342160165309906 +Loss at step 400: 0.03743242472410202 +Loss at step 450: 0.04069390892982483 +Loss at step 500: 0.05557990446686745 +Loss at step 550: 0.03696519508957863 +Loss at step 600: 0.041040439158678055 +Loss at step 650: 0.05005531758069992 +Loss at step 700: 0.03598293289542198 +Loss at step 750: 0.04115710034966469 +Loss at step 800: 0.03746853396296501 +Loss at step 850: 0.04052548483014107 +Loss at step 900: 0.037609584629535675 +Mean training loss after epoch 85: 0.046546222246897376 + +EPOCH: 86 +Loss at step 0: 0.04030114784836769 +Loss at step 50: 0.04651087895035744 +Loss at step 100: 0.05847875401377678 +Loss at step 150: 0.043544359505176544 +Loss at step 200: 0.05292382463812828 +Loss at step 250: 0.057944778352975845 +Loss at step 300: 0.057547859847545624 +Loss at step 350: 0.04556535929441452 +Loss at step 400: 0.046927228569984436 +Loss at step 450: 0.039028167724609375 +Loss at step 500: 0.0458497516810894 +Loss at step 550: 0.05151519924402237 +Loss at step 600: 0.04135338217020035 +Loss at step 650: 0.038992609828710556 +Loss at step 700: 0.04243936389684677 +Loss at step 750: 0.04529775679111481 +Loss at step 800: 0.04758916422724724 +Loss at step 850: 0.053747836500406265 +Loss at step 900: 0.058469176292419434 +Mean training loss after epoch 86: 0.04620548715389995 + +EPOCH: 87 +Loss at step 0: 0.05536244809627533 +Loss at step 50: 0.04030721262097359 +Loss at step 100: 0.0553092323243618 +Loss at step 150: 0.04915754124522209 +Loss at step 200: 0.0423746183514595 +Loss at step 250: 0.043581295758485794 +Loss at step 300: 0.06138043478131294 +Loss at step 350: 0.05909031257033348 +Loss at step 400: 0.055286046117544174 +Loss at step 450: 0.04307536780834198 +Loss at step 500: 0.04408692196011543 +Loss at step 550: 0.03895223140716553 +Loss at step 600: 0.05573630705475807 +Loss at step 650: 0.0503263846039772 +Loss at step 700: 0.05658665671944618 +Loss at step 750: 0.03863617777824402 +Loss at step 800: 0.04650202393531799 +Loss at step 850: 0.04139861837029457 +Loss at step 900: 0.03975027799606323 +Mean training loss after epoch 87: 0.046502727310238745 + +EPOCH: 88 +Loss at step 0: 0.045563776046037674 +Loss at step 50: 0.041026610881090164 +Loss at step 100: 0.041799869388341904 +Loss at step 150: 0.05045092850923538 +Loss at step 200: 0.040286578238010406 +Loss at step 250: 0.050262533128261566 +Loss at step 300: 0.039544302970170975 +Loss at step 350: 0.0478665865957737 +Loss at step 400: 0.06087902560830116 +Loss at step 450: 0.055986080318689346 +Loss at step 500: 0.045001428574323654 +Loss at step 550: 0.04655374586582184 +Loss at step 600: 0.04062845557928085 +Loss at step 650: 0.03524141013622284 +Loss at step 700: 0.04796488955616951 +Loss at step 750: 0.04325706139206886 +Loss at step 800: 0.05093172937631607 +Loss at step 850: 0.04269649460911751 +Loss at step 900: 0.04599309340119362 +Mean training loss after epoch 88: 0.0467292155935439 + +EPOCH: 89 +Loss at step 0: 0.03498572111129761 +Loss at step 50: 0.04505831375718117 +Loss at step 100: 0.032936498522758484 +Loss at step 150: 0.043412357568740845 +Loss at step 200: 0.05033085122704506 +Loss at step 250: 0.04830761253833771 +Loss at step 300: 0.04398735612630844 +Loss at step 350: 0.05050428956747055 +Loss at step 400: 0.041426882147789 +Loss at step 450: 0.03829129412770271 +Loss at step 500: 0.05605369433760643 +Loss at step 550: 0.03872028738260269 +Loss at step 600: 0.037425778806209564 +Loss at step 650: 0.06310445070266724 +Loss at step 700: 0.039658624678850174 +Loss at step 750: 0.05665264651179314 +Loss at step 800: 0.03846779465675354 +Loss at step 850: 0.038600143045186996 +Loss at step 900: 0.04491173475980759 +Mean training loss after epoch 89: 0.04753672016629659 + +EPOCH: 90 +Loss at step 0: 0.04258023947477341 +Loss at step 50: 0.044276945292949677 +Loss at step 100: 0.04213239625096321 +Loss at step 150: 0.06880049407482147 +Loss at step 200: 0.04651037976145744 +Loss at step 250: 0.04323754832148552 +Loss at step 300: 0.03721318766474724 +Loss at step 350: 0.04560566321015358 +Loss at step 400: 0.05704891309142113 +Loss at step 450: 0.042375389486551285 +Loss at step 500: 0.0591731071472168 +Loss at step 550: 0.04094204679131508 +Loss at step 600: 0.053052760660648346 +Loss at step 650: 0.04108680039644241 +Loss at step 700: 0.0458051823079586 +Loss at step 750: 0.054628919810056686 +Loss at step 800: 0.04508604109287262 +Loss at step 850: 0.043105728924274445 +Loss at step 900: 0.0373399592936039 +Mean training loss after epoch 90: 0.04637778279131283 + +EPOCH: 91 +Loss at step 0: 0.0430794321000576 +Loss at step 50: 0.03386101871728897 +Loss at step 100: 0.03983902186155319 +Loss at step 150: 0.043143779039382935 +Loss at step 200: 0.05573510006070137 +Loss at step 250: 0.05860931798815727 +Loss at step 300: 0.0569300651550293 +Loss at step 350: 0.04307941347360611 +Loss at step 400: 0.05956780165433884 +Loss at step 450: 0.053321488201618195 +Loss at step 500: 0.042492058128118515 +Loss at step 550: 0.042753588408231735 +Loss at step 600: 0.04421638697385788 +Loss at step 650: 0.04033626616001129 +Loss at step 700: 0.04685452580451965 +Loss at step 750: 0.057677291333675385 +Loss at step 800: 0.041686028242111206 +Loss at step 850: 0.0433565117418766 +Loss at step 900: 0.047797273844480515 +Mean training loss after epoch 91: 0.04683416457509181 + +EPOCH: 92 +Loss at step 0: 0.04526752233505249 +Loss at step 50: 0.0441841296851635 +Loss at step 100: 0.057255759835243225 +Loss at step 150: 0.05717546492815018 +Loss at step 200: 0.0645579993724823 +Loss at step 250: 0.049342211335897446 +Loss at step 300: 0.03824814036488533 +Loss at step 350: 0.04087137430906296 +Loss at step 400: 0.06547455489635468 +Loss at step 450: 0.043666303157806396 +Loss at step 500: 0.07136151194572449 +Loss at step 550: 0.04037679731845856 +Loss at step 600: 0.045818764716386795 +Loss at step 650: 0.03880315274000168 +Loss at step 700: 0.03931305930018425 +Loss at step 750: 0.04683949798345566 +Loss at step 800: 0.036871861666440964 +Loss at step 850: 0.04416428506374359 +Loss at step 900: 0.06325354427099228 +Mean training loss after epoch 92: 0.04600482769032468 + +EPOCH: 93 +Loss at step 0: 0.06294853985309601 +Loss at step 50: 0.04256632924079895 +Loss at step 100: 0.054423704743385315 +Loss at step 150: 0.04159044474363327 +Loss at step 200: 0.03959943354129791 +Loss at step 250: 0.035830531269311905 +Loss at step 300: 0.040464695543050766 +Loss at step 350: 0.06768246740102768 +Loss at step 400: 0.05149000510573387 +Loss at step 450: 0.038842108100652695 +Loss at step 500: 0.04130643606185913 +Loss at step 550: 0.03519388288259506 +Loss at step 600: 0.05303003266453743 +Loss at step 650: 0.05530502274632454 +Loss at step 700: 0.03809872642159462 +Loss at step 750: 0.043990518897771835 +Loss at step 800: 0.0452754870057106 +Loss at step 850: 0.05544896796345711 +Loss at step 900: 0.055524323135614395 +Mean training loss after epoch 93: 0.04659848777390619 + +EPOCH: 94 +Loss at step 0: 0.04012817516922951 +Loss at step 50: 0.03704731911420822 +Loss at step 100: 0.04741260036826134 +Loss at step 150: 0.057084113359451294 +Loss at step 200: 0.03886241093277931 +Loss at step 250: 0.0521102137863636 +Loss at step 300: 0.045050811022520065 +Loss at step 350: 0.07392949610948563 +Loss at step 400: 0.039289481937885284 +Loss at step 450: 0.056882958859205246 +Loss at step 500: 0.10334857553243637 +Loss at step 550: 0.0412553995847702 +Loss at step 600: 0.034896694123744965 +Loss at step 650: 0.03679481893777847 +Loss at step 700: 0.050942663103342056 +Loss at step 750: 0.039942581206560135 +Loss at step 800: 0.07243037223815918 +Loss at step 850: 0.035497840493917465 +Loss at step 900: 0.037712402641773224 +Mean training loss after epoch 94: 0.046959693257663165 + +EPOCH: 95 +Loss at step 0: 0.054403457790613174 +Loss at step 50: 0.0661437064409256 +Loss at step 100: 0.042066242545843124 +Loss at step 150: 0.05207742750644684 +Loss at step 200: 0.04484620317816734 +Loss at step 250: 0.04082221910357475 +Loss at step 300: 0.04688990116119385 +Loss at step 350: 0.043736789375543594 +Loss at step 400: 0.055164869874715805 +Loss at step 450: 0.05996564403176308 +Loss at step 500: 0.04181736335158348 +Loss at step 550: 0.0405745655298233 +Loss at step 600: 0.06672053784132004 +Loss at step 650: 0.054868850857019424 +Loss at step 700: 0.04629993438720703 +Loss at step 750: 0.065439872443676 +Loss at step 800: 0.0440790168941021 +Loss at step 850: 0.04349711909890175 +Loss at step 900: 0.03859855979681015 +Mean training loss after epoch 95: 0.04599744187735482 + +EPOCH: 96 +Loss at step 0: 0.040530234575271606 +Loss at step 50: 0.05591876059770584 +Loss at step 100: 0.04553547129034996 +Loss at step 150: 0.05504228547215462 +Loss at step 200: 0.050723232328891754 +Loss at step 250: 0.03782333806157112 +Loss at step 300: 0.04495280981063843 +Loss at step 350: 0.04400955140590668 +Loss at step 400: 0.05539476498961449 +Loss at step 450: 0.0490998812019825 +Loss at step 500: 0.05460197851061821 +Loss at step 550: 0.04273224622011185 +Loss at step 600: 0.06938333064317703 +Loss at step 650: 0.040606457740068436 +Loss at step 700: 0.03727785497903824 +Loss at step 750: 0.046667106449604034 +Loss at step 800: 0.04326537624001503 +Loss at step 850: 0.04375113174319267 +Loss at step 900: 0.06577808409929276 +Mean training loss after epoch 96: 0.04626949657715841 + +EPOCH: 97 +Loss at step 0: 0.07202793657779694 +Loss at step 50: 0.047440625727176666 +Loss at step 100: 0.03818235918879509 +Loss at step 150: 0.044339776039123535 +Loss at step 200: 0.04141019284725189 +Loss at step 250: 0.07133287936449051 +Loss at step 300: 0.06798458099365234 +Loss at step 350: 0.05456782132387161 +Loss at step 400: 0.061687350273132324 +Loss at step 450: 0.053673937916755676 +Loss at step 500: 0.039644405245780945 +Loss at step 550: 0.04686584323644638 +Loss at step 600: 0.04945634305477142 +Loss at step 650: 0.0490257665514946 +Loss at step 700: 0.03872289881110191 +Loss at step 750: 0.05673569440841675 +Loss at step 800: 0.03633664548397064 +Loss at step 850: 0.059356894344091415 +Loss at step 900: 0.05027332156896591 +Mean training loss after epoch 97: 0.04676683567591441 + +EPOCH: 98 +Loss at step 0: 0.04440366476774216 +Loss at step 50: 0.04177078977227211 +Loss at step 100: 0.0452820360660553 +Loss at step 150: 0.0384245291352272 +Loss at step 200: 0.04596066474914551 +Loss at step 250: 0.048838935792446136 +Loss at step 300: 0.061357393860816956 +Loss at step 350: 0.06618337333202362 +Loss at step 400: 0.05215151235461235 +Loss at step 450: 0.04230041801929474 +Loss at step 500: 0.04279365390539169 +Loss at step 550: 0.037335582077503204 +Loss at step 600: 0.041467972099781036 +Loss at step 650: 0.04180183634161949 +Loss at step 700: 0.03996708244085312 +Loss at step 750: 0.04186198115348816 +Loss at step 800: 0.03563302382826805 +Loss at step 850: 0.04271884635090828 +Loss at step 900: 0.04177500680088997 +Mean training loss after epoch 98: 0.04692643187018727 + +EPOCH: 99 +Loss at step 0: 0.04024875909090042 +Loss at step 50: 0.040785618126392365 +Loss at step 100: 0.04529747739434242 +Loss at step 150: 0.050547268241643906 +Loss at step 200: 0.05665706843137741 +Loss at step 250: 0.045099228620529175 +Loss at step 300: 0.07350451499223709 +Loss at step 350: 0.040784575045108795 +Loss at step 400: 0.04848353564739227 +Loss at step 450: 0.04601228982210159 +Loss at step 500: 0.05875125154852867 +Loss at step 550: 0.03984513878822327 +Loss at step 600: 0.03362663462758064 +Loss at step 650: 0.04296380653977394 +Loss at step 700: 0.07485966384410858 +Loss at step 750: 0.04666467010974884 +Loss at step 800: 0.041170015931129456 +Loss at step 850: 0.043535225093364716 +Loss at step 900: 0.04665414243936539 +Mean training loss after epoch 99: 0.04672886201663058 + +EPOCH: 100 +Loss at step 0: 0.03907690569758415 +Loss at step 50: 0.04161510989069939 +Loss at step 100: 0.04537966847419739 +Loss at step 150: 0.041130367666482925 +Loss at step 200: 0.034240514039993286 +Loss at step 250: 0.04232052341103554 +Loss at step 300: 0.040373578667640686 +Loss at step 350: 0.04542124271392822 +Loss at step 400: 0.0378911979496479 +Loss at step 450: 0.05430685728788376 +Loss at step 500: 0.07519795000553131 +Loss at step 550: 0.04281020537018776 +Loss at step 600: 0.0402553528547287 +Loss at step 650: 0.052460674196481705 +Loss at step 700: 0.038558125495910645 +Loss at step 750: 0.0648709163069725 +Loss at step 800: 0.04978698119521141 +Loss at step 850: 0.04303291440010071 +Loss at step 900: 0.060903795063495636 +Mean training loss after epoch 100: 0.04635449066789928 + +EPOCH: 101 +Loss at step 0: 0.05339902266860008 +Loss at step 50: 0.040514107793569565 +Loss at step 100: 0.04715293273329735 +Loss at step 150: 0.040969353169202805 +Loss at step 200: 0.03878287598490715 +Loss at step 250: 0.0709432065486908 +Loss at step 300: 0.054566748440265656 +Loss at step 350: 0.038880303502082825 +Loss at step 400: 0.03952517360448837 +Loss at step 450: 0.036260757595300674 +Loss at step 500: 0.038245972245931625 +Loss at step 550: 0.03711141273379326 +Loss at step 600: 0.043874118477106094 +Loss at step 650: 0.03878263011574745 +Loss at step 700: 0.059451546519994736 +Loss at step 750: 0.04312610626220703 +Loss at step 800: 0.04129006713628769 +Loss at step 850: 0.04413214325904846 +Loss at step 900: 0.07639607042074203 +Mean training loss after epoch 101: 0.04611626348055121 + +EPOCH: 102 +Loss at step 0: 0.04296611249446869 +Loss at step 50: 0.03962035849690437 +Loss at step 100: 0.03798158839344978 +Loss at step 150: 0.041246864944696426 +Loss at step 200: 0.03905134275555611 +Loss at step 250: 0.0561823844909668 +Loss at step 300: 0.04799499362707138 +Loss at step 350: 0.06448386609554291 +Loss at step 400: 0.0575253963470459 +Loss at step 450: 0.039056289941072464 +Loss at step 500: 0.05260954424738884 +Loss at step 550: 0.043101824820041656 +Loss at step 600: 0.04934180527925491 +Loss at step 650: 0.062109071761369705 +Loss at step 700: 0.03551768884062767 +Loss at step 750: 0.05262370780110359 +Loss at step 800: 0.07427023351192474 +Loss at step 850: 0.04252688214182854 +Loss at step 900: 0.04384399205446243 +Mean training loss after epoch 102: 0.04602831927563018 + +EPOCH: 103 +Loss at step 0: 0.04268811643123627 +Loss at step 50: 0.04289247468113899 +Loss at step 100: 0.03939443454146385 +Loss at step 150: 0.03925861418247223 +Loss at step 200: 0.039750441908836365 +Loss at step 250: 0.058280959725379944 +Loss at step 300: 0.041140809655189514 +Loss at step 350: 0.05711166188120842 +Loss at step 400: 0.03726119548082352 +Loss at step 450: 0.04400964453816414 +Loss at step 500: 0.043159328401088715 +Loss at step 550: 0.03763923421502113 +Loss at step 600: 0.052103664726018906 +Loss at step 650: 0.0364920012652874 +Loss at step 700: 0.07342993468046188 +Loss at step 750: 0.05685194954276085 +Loss at step 800: 0.06026710197329521 +Loss at step 850: 0.047543320804834366 +Loss at step 900: 0.059920359402894974 +Mean training loss after epoch 103: 0.046579691108816594 + +EPOCH: 104 +Loss at step 0: 0.059332530945539474 +Loss at step 50: 0.03954814746975899 +Loss at step 100: 0.05902203172445297 +Loss at step 150: 0.0401795394718647 +Loss at step 200: 0.0448736846446991 +Loss at step 250: 0.038291528820991516 +Loss at step 300: 0.043111491948366165 +Loss at step 350: 0.04092888906598091 +Loss at step 400: 0.06010512635111809 +Loss at step 450: 0.04024302586913109 +Loss at step 500: 0.043869923800230026 +Loss at step 550: 0.05269040912389755 +Loss at step 600: 0.05494172126054764 +Loss at step 650: 0.04899188503623009 +Loss at step 700: 0.07332980632781982 +Loss at step 750: 0.04546227678656578 +Loss at step 800: 0.05833340436220169 +Loss at step 850: 0.03538486734032631 +Loss at step 900: 0.07298975437879562 +Mean training loss after epoch 104: 0.046396581543438724 + +EPOCH: 105 +Loss at step 0: 0.039035432040691376 +Loss at step 50: 0.049349650740623474 +Loss at step 100: 0.04110094904899597 +Loss at step 150: 0.07577597349882126 +Loss at step 200: 0.03782310709357262 +Loss at step 250: 0.04330572858452797 +Loss at step 300: 0.06284419447183609 +Loss at step 350: 0.04099452495574951 +Loss at step 400: 0.054274700582027435 +Loss at step 450: 0.04495414346456528 +Loss at step 500: 0.036758579313755035 +Loss at step 550: 0.059087108820676804 +Loss at step 600: 0.046631455421447754 +Loss at step 650: 0.04340684041380882 +Loss at step 700: 0.032876383513212204 +Loss at step 750: 0.05427297204732895 +Loss at step 800: 0.041013892740011215 +Loss at step 850: 0.07250183075666428 +Loss at step 900: 0.04028826206922531 +Mean training loss after epoch 105: 0.04655577463985506 + +EPOCH: 106 +Loss at step 0: 0.03419685736298561 +Loss at step 50: 0.039862923324108124 +Loss at step 100: 0.064559705555439 +Loss at step 150: 0.04381544515490532 +Loss at step 200: 0.03627309203147888 +Loss at step 250: 0.054370440542697906 +Loss at step 300: 0.043504197150468826 +Loss at step 350: 0.040975604206323624 +Loss at step 400: 0.06387734413146973 +Loss at step 450: 0.04055153951048851 +Loss at step 500: 0.04148499295115471 +Loss at step 550: 0.043211258947849274 +Loss at step 600: 0.04447993263602257 +Loss at step 650: 0.054644715040922165 +Loss at step 700: 0.04371237754821777 +Loss at step 750: 0.03501531481742859 +Loss at step 800: 0.06553583592176437 +Loss at step 850: 0.039397649466991425 +Loss at step 900: 0.04437081888318062 +Mean training loss after epoch 106: 0.04637773413814779 + +EPOCH: 107 +Loss at step 0: 0.05842943489551544 +Loss at step 50: 0.05536622926592827 +Loss at step 100: 0.04273920878767967 +Loss at step 150: 0.03758486732840538 +Loss at step 200: 0.05066695809364319 +Loss at step 250: 0.03833737596869469 +Loss at step 300: 0.04791479557752609 +Loss at step 350: 0.05754756182432175 +Loss at step 400: 0.04466303065419197 +Loss at step 450: 0.06724495440721512 +Loss at step 500: 0.05370062589645386 +Loss at step 550: 0.04716326668858528 +Loss at step 600: 0.042901404201984406 +Loss at step 650: 0.04186997562646866 +Loss at step 700: 0.046877022832632065 +Loss at step 750: 0.04178445041179657 +Loss at step 800: 0.03975269943475723 +Loss at step 850: 0.04837210103869438 +Loss at step 900: 0.07422516494989395 +Mean training loss after epoch 107: 0.04603555354116949 + +EPOCH: 108 +Loss at step 0: 0.04077273979783058 +Loss at step 50: 0.0416000559926033 +Loss at step 100: 0.037800516933202744 +Loss at step 150: 0.05095312371850014 +Loss at step 200: 0.039310406893491745 +Loss at step 250: 0.04412652924656868 +Loss at step 300: 0.04020579904317856 +Loss at step 350: 0.06208536773920059 +Loss at step 400: 0.041482310742139816 +Loss at step 450: 0.03684047609567642 +Loss at step 500: 0.044366635382175446 +Loss at step 550: 0.04781442508101463 +Loss at step 600: 0.03964928537607193 +Loss at step 650: 0.03971539065241814 +Loss at step 700: 0.04243103414773941 +Loss at step 750: 0.05631023272871971 +Loss at step 800: 0.04848416894674301 +Loss at step 850: 0.0394274927675724 +Loss at step 900: 0.0508500300347805 +Mean training loss after epoch 108: 0.046317251419017054 + +EPOCH: 109 +Loss at step 0: 0.08095970749855042 +Loss at step 50: 0.04784369841217995 +Loss at step 100: 0.06245036423206329 +Loss at step 150: 0.0419779010117054 +Loss at step 200: 0.03566703945398331 +Loss at step 250: 0.04204497113823891 +Loss at step 300: 0.050340164452791214 +Loss at step 350: 0.04158265143632889 +Loss at step 400: 0.03868810832500458 +Loss at step 450: 0.0391940139234066 +Loss at step 500: 0.04052245616912842 +Loss at step 550: 0.05940769985318184 +Loss at step 600: 0.06038498133420944 +Loss at step 650: 0.04475552588701248 +Loss at step 700: 0.04765903949737549 +Loss at step 750: 0.04082473739981651 +Loss at step 800: 0.036262065172195435 +Loss at step 850: 0.046245064586400986 +Loss at step 900: 0.03684520721435547 +Mean training loss after epoch 109: 0.045711824670434 + +EPOCH: 110 +Loss at step 0: 0.030645083636045456 +Loss at step 50: 0.04015166684985161 +Loss at step 100: 0.04643114283680916 +Loss at step 150: 0.04262892156839371 +Loss at step 200: 0.041889745742082596 +Loss at step 250: 0.03882431983947754 +Loss at step 300: 0.03965645655989647 +Loss at step 350: 0.05659327656030655 +Loss at step 400: 0.03698767349123955 +Loss at step 450: 0.04747560992836952 +Loss at step 500: 0.05945868417620659 +Loss at step 550: 0.04171829670667648 +Loss at step 600: 0.03555087745189667 +Loss at step 650: 0.04049551486968994 +Loss at step 700: 0.042998410761356354 +Loss at step 750: 0.05536282807588577 +Loss at step 800: 0.03336206078529358 +Loss at step 850: 0.05179869383573532 +Loss at step 900: 0.03831992298364639 +Mean training loss after epoch 110: 0.046206923243381194 + +EPOCH: 111 +Loss at step 0: 0.04484570771455765 +Loss at step 50: 0.04083515703678131 +Loss at step 100: 0.03923998773097992 +Loss at step 150: 0.0517459437251091 +Loss at step 200: 0.040324896574020386 +Loss at step 250: 0.03986585885286331 +Loss at step 300: 0.04124492406845093 +Loss at step 350: 0.040269963443279266 +Loss at step 400: 0.046555034816265106 +Loss at step 450: 0.04533685743808746 +Loss at step 500: 0.04282549396157265 +Loss at step 550: 0.03972497582435608 +Loss at step 600: 0.049212925136089325 +Loss at step 650: 0.03900828957557678 +Loss at step 700: 0.04603997990489006 +Loss at step 750: 0.056413665413856506 +Loss at step 800: 0.0426405593752861 +Loss at step 850: 0.036932650953531265 +Loss at step 900: 0.04290015622973442 +Mean training loss after epoch 111: 0.046077357136459746 + +EPOCH: 112 +Loss at step 0: 0.05669388547539711 +Loss at step 50: 0.036204006522893906 +Loss at step 100: 0.03623444214463234 +Loss at step 150: 0.04464982450008392 +Loss at step 200: 0.044757310301065445 +Loss at step 250: 0.0417352095246315 +Loss at step 300: 0.036208026111125946 +Loss at step 350: 0.03865165635943413 +Loss at step 400: 0.06228946894407272 +Loss at step 450: 0.05004267022013664 +Loss at step 500: 0.04246343672275543 +Loss at step 550: 0.03977387398481369 +Loss at step 600: 0.04470471292734146 +Loss at step 650: 0.06702602654695511 +Loss at step 700: 0.045339301228523254 +Loss at step 750: 0.04207688197493553 +Loss at step 800: 0.035957638174295425 +Loss at step 850: 0.05529293790459633 +Loss at step 900: 0.044689472764730453 +Mean training loss after epoch 112: 0.04646850054833426 + +EPOCH: 113 +Loss at step 0: 0.05731961876153946 +Loss at step 50: 0.048718031495809555 +Loss at step 100: 0.03754620999097824 +Loss at step 150: 0.0398227758705616 +Loss at step 200: 0.04112764820456505 +Loss at step 250: 0.04593532159924507 +Loss at step 300: 0.04434916377067566 +Loss at step 350: 0.04119689017534256 +Loss at step 400: 0.04239797592163086 +Loss at step 450: 0.05920061096549034 +Loss at step 500: 0.058681391179561615 +Loss at step 550: 0.044205211102962494 +Loss at step 600: 0.05011138692498207 +Loss at step 650: 0.041396234184503555 +Loss at step 700: 0.0415150485932827 +Loss at step 750: 0.050309259444475174 +Loss at step 800: 0.049963776022195816 +Loss at step 850: 0.05925675109028816 +Loss at step 900: 0.051933471113443375 +Mean training loss after epoch 113: 0.04634149961753377 + +EPOCH: 114 +Loss at step 0: 0.057218555361032486 +Loss at step 50: 0.038528770208358765 +Loss at step 100: 0.036585792899131775 +Loss at step 150: 0.038837626576423645 +Loss at step 200: 0.04759618267416954 +Loss at step 250: 0.052498359233140945 +Loss at step 300: 0.04247991740703583 +Loss at step 350: 0.06130564957857132 +Loss at step 400: 0.035507287830114365 +Loss at step 450: 0.04961086064577103 +Loss at step 500: 0.06836702674627304 +Loss at step 550: 0.03405602648854256 +Loss at step 600: 0.04634140059351921 +Loss at step 650: 0.059668924659490585 +Loss at step 700: 0.0648765042424202 +Loss at step 750: 0.0533207468688488 +Loss at step 800: 0.05213884264230728 +Loss at step 850: 0.0393037348985672 +Loss at step 900: 0.04062580317258835 +Mean training loss after epoch 114: 0.04633027143172745 + +EPOCH: 115 +Loss at step 0: 0.03240546956658363 +Loss at step 50: 0.0400904044508934 +Loss at step 100: 0.04299856722354889 +Loss at step 150: 0.04431208595633507 +Loss at step 200: 0.05355609208345413 +Loss at step 250: 0.04266049340367317 +Loss at step 300: 0.06450315564870834 +Loss at step 350: 0.05760972201824188 +Loss at step 400: 0.044622279703617096 +Loss at step 450: 0.03800900653004646 +Loss at step 500: 0.04497479274868965 +Loss at step 550: 0.047578390687704086 +Loss at step 600: 0.05184239521622658 +Loss at step 650: 0.03965822607278824 +Loss at step 700: 0.04492467641830444 +Loss at step 750: 0.07390051335096359 +Loss at step 800: 0.047471221536397934 +Loss at step 850: 0.038541559129953384 +Loss at step 900: 0.04088141396641731 +Mean training loss after epoch 115: 0.04589878086810872 + +EPOCH: 116 +Loss at step 0: 0.037789762020111084 +Loss at step 50: 0.03849641978740692 +Loss at step 100: 0.037546537816524506 +Loss at step 150: 0.04194404557347298 +Loss at step 200: 0.05341813713312149 +Loss at step 250: 0.04351886734366417 +Loss at step 300: 0.04950180649757385 +Loss at step 350: 0.0374019555747509 +Loss at step 400: 0.03940993547439575 +Loss at step 450: 0.039905428886413574 +Loss at step 500: 0.05122208967804909 +Loss at step 550: 0.040826376527547836 +Loss at step 600: 0.04701400548219681 +Loss at step 650: 0.05695256218314171 +Loss at step 700: 0.0626857578754425 +Loss at step 750: 0.044478483498096466 +Loss at step 800: 0.04216416925191879 +Loss at step 850: 0.0601658895611763 +Loss at step 900: 0.04678907245397568 +Mean training loss after epoch 116: 0.04651021857315035 + +EPOCH: 117 +Loss at step 0: 0.037471212446689606 +Loss at step 50: 0.043972447514534 +Loss at step 100: 0.06001920998096466 +Loss at step 150: 0.03786353021860123 +Loss at step 200: 0.06093040481209755 +Loss at step 250: 0.04418031871318817 +Loss at step 300: 0.03976394236087799 +Loss at step 350: 0.036719322204589844 +Loss at step 400: 0.04779161512851715 +Loss at step 450: 0.05750448256731033 +Loss at step 500: 0.04163109511137009 +Loss at step 550: 0.05361882969737053 +Loss at step 600: 0.04255877807736397 +Loss at step 650: 0.05427778512239456 +Loss at step 700: 0.0404336042702198 +Loss at step 750: 0.04462295398116112 +Loss at step 800: 0.0645865723490715 +Loss at step 850: 0.04527711868286133 +Loss at step 900: 0.04018669202923775 +Mean training loss after epoch 117: 0.045971217761828956 + +EPOCH: 118 +Loss at step 0: 0.03755127638578415 +Loss at step 50: 0.038924314081668854 +Loss at step 100: 0.03860301151871681 +Loss at step 150: 0.04419249668717384 +Loss at step 200: 0.04153229668736458 +Loss at step 250: 0.039285022765398026 +Loss at step 300: 0.039819516241550446 +Loss at step 350: 0.05134643614292145 +Loss at step 400: 0.04172617569565773 +Loss at step 450: 0.04393266141414642 +Loss at step 500: 0.04356786236166954 +Loss at step 550: 0.04360780864953995 +Loss at step 600: 0.07231322675943375 +Loss at step 650: 0.0467379130423069 +Loss at step 700: 0.04267450049519539 +Loss at step 750: 0.045808371156454086 +Loss at step 800: 0.04864206910133362 +Loss at step 850: 0.0354076512157917 +Loss at step 900: 0.04114735871553421 +Mean training loss after epoch 118: 0.04586054847391048 + +EPOCH: 119 +Loss at step 0: 0.051709577441215515 +Loss at step 50: 0.037069033831357956 +Loss at step 100: 0.03780200332403183 +Loss at step 150: 0.04707491397857666 +Loss at step 200: 0.037481363862752914 +Loss at step 250: 0.04747417941689491 +Loss at step 300: 0.04326586425304413 +Loss at step 350: 0.03653787076473236 +Loss at step 400: 0.037612587213516235 +Loss at step 450: 0.04148251563310623 +Loss at step 500: 0.04048687219619751 +Loss at step 550: 0.050548721104860306 +Loss at step 600: 0.041866809129714966 +Loss at step 650: 0.042547255754470825 +Loss at step 700: 0.038675274699926376 +Loss at step 750: 0.07011289894580841 +Loss at step 800: 0.04232465848326683 +Loss at step 850: 0.05413239076733589 +Loss at step 900: 0.04107024148106575 +Mean training loss after epoch 119: 0.0459741363103297 + +EPOCH: 120 +Loss at step 0: 0.038504891097545624 +Loss at step 50: 0.05093608796596527 +Loss at step 100: 0.04420357197523117 +Loss at step 150: 0.03867683932185173 +Loss at step 200: 0.041248466819524765 +Loss at step 250: 0.045260462909936905 +Loss at step 300: 0.04531749337911606 +Loss at step 350: 0.04064628854393959 +Loss at step 400: 0.04476870223879814 +Loss at step 450: 0.045387718826532364 +Loss at step 500: 0.054671142250299454 +Loss at step 550: 0.04121801629662514 +Loss at step 600: 0.03822920471429825 +Loss at step 650: 0.04424199089407921 +Loss at step 700: 0.06056874990463257 +Loss at step 750: 0.07144053280353546 +Loss at step 800: 0.04469859600067139 +Loss at step 850: 0.05118793249130249 +Loss at step 900: 0.03993112966418266 +Mean training loss after epoch 120: 0.0461361338354663 + +EPOCH: 121 +Loss at step 0: 0.048598166555166245 +Loss at step 50: 0.046460285782814026 +Loss at step 100: 0.037309709936380386 +Loss at step 150: 0.035007044672966 +Loss at step 200: 0.042252421379089355 +Loss at step 250: 0.04417074844241142 +Loss at step 300: 0.048708923161029816 +Loss at step 350: 0.04290599003434181 +Loss at step 400: 0.059995606541633606 +Loss at step 450: 0.0568898543715477 +Loss at step 500: 0.040973760187625885 +Loss at step 550: 0.039172057062387466 +Loss at step 600: 0.04389740526676178 +Loss at step 650: 0.053678564727306366 +Loss at step 700: 0.03402849659323692 +Loss at step 750: 0.04648209363222122 +Loss at step 800: 0.06095977500081062 +Loss at step 850: 0.041144050657749176 +Loss at step 900: 0.040352340787649155 +Mean training loss after epoch 121: 0.045643532465992454 + +EPOCH: 122 +Loss at step 0: 0.04100384935736656 +Loss at step 50: 0.049978889524936676 +Loss at step 100: 0.05517248064279556 +Loss at step 150: 0.031055454164743423 +Loss at step 200: 0.041113726794719696 +Loss at step 250: 0.040144968777894974 +Loss at step 300: 0.04757293313741684 +Loss at step 350: 0.03453289344906807 +Loss at step 400: 0.04535461217164993 +Loss at step 450: 0.04796682298183441 +Loss at step 500: 0.047278258949518204 +Loss at step 550: 0.04443208500742912 +Loss at step 600: 0.06164064258337021 +Loss at step 650: 0.042972173541784286 +Loss at step 700: 0.044566426426172256 +Loss at step 750: 0.04029228165745735 +Loss at step 800: 0.08674780279397964 +Loss at step 850: 0.04078933969140053 +Loss at step 900: 0.041573911905288696 +Mean training loss after epoch 122: 0.04543305902696177 + +EPOCH: 123 +Loss at step 0: 0.040762461721897125 +Loss at step 50: 0.06050635129213333 +Loss at step 100: 0.03502603992819786 +Loss at step 150: 0.042347244918346405 +Loss at step 200: 0.042769353836774826 +Loss at step 250: 0.05909914895892143 +Loss at step 300: 0.042644500732421875 +Loss at step 350: 0.0611402653157711 +Loss at step 400: 0.042645882815122604 +Loss at step 450: 0.04270201548933983 +Loss at step 500: 0.05965986102819443 +Loss at step 550: 0.06133631244301796 +Loss at step 600: 0.05163673311471939 +Loss at step 650: 0.05557316541671753 +Loss at step 700: 0.044210486114025116 +Loss at step 750: 0.07129313796758652 +Loss at step 800: 0.05499745160341263 +Loss at step 850: 0.0390758290886879 +Loss at step 900: 0.04831209033727646 +Mean training loss after epoch 123: 0.04600447345215247 + +EPOCH: 124 +Loss at step 0: 0.05026842653751373 +Loss at step 50: 0.043064311146736145 +Loss at step 100: 0.046557847410440445 +Loss at step 150: 0.03744037076830864 +Loss at step 200: 0.03734718635678291 +Loss at step 250: 0.04483895003795624 +Loss at step 300: 0.04188080504536629 +Loss at step 350: 0.03980600833892822 +Loss at step 400: 0.053175196051597595 +Loss at step 450: 0.038369517773389816 +Loss at step 500: 0.036276157945394516 +Loss at step 550: 0.044941678643226624 +Loss at step 600: 0.03974446654319763 +Loss at step 650: 0.03789375722408295 +Loss at step 700: 0.04138591140508652 +Loss at step 750: 0.054783109575510025 +Loss at step 800: 0.056210268288850784 +Loss at step 850: 0.05452005937695503 +Loss at step 900: 0.05974148213863373 +Mean training loss after epoch 124: 0.04581771659325244 + +EPOCH: 125 +Loss at step 0: 0.041972484439611435 +Loss at step 50: 0.044825561344623566 +Loss at step 100: 0.039463579654693604 +Loss at step 150: 0.0397324413061142 +Loss at step 200: 0.03820671886205673 +Loss at step 250: 0.056320372968912125 +Loss at step 300: 0.03598359227180481 +Loss at step 350: 0.038493890315294266 +Loss at step 400: 0.04525696486234665 +Loss at step 450: 0.03702262043952942 +Loss at step 500: 0.04839034751057625 +Loss at step 550: 0.05297674238681793 +Loss at step 600: 0.05814902484416962 +Loss at step 650: 0.032197099179029465 +Loss at step 700: 0.04624997824430466 +Loss at step 750: 0.04012664407491684 +Loss at step 800: 0.03970684856176376 +Loss at step 850: 0.03787669166922569 +Loss at step 900: 0.045866481959819794 +Mean training loss after epoch 125: 0.04531624829217887 + +EPOCH: 126 +Loss at step 0: 0.05826280266046524 +Loss at step 50: 0.04089495167136192 +Loss at step 100: 0.041354529559612274 +Loss at step 150: 0.03855195641517639 +Loss at step 200: 0.04952150955796242 +Loss at step 250: 0.04119924455881119 +Loss at step 300: 0.0543883815407753 +Loss at step 350: 0.04115807265043259 +Loss at step 400: 0.0343698151409626 +Loss at step 450: 0.04259225353598595 +Loss at step 500: 0.04278476908802986 +Loss at step 550: 0.052609384059906006 +Loss at step 600: 0.04406945779919624 +Loss at step 650: 0.05707991123199463 +Loss at step 700: 0.04246300086379051 +Loss at step 750: 0.04478396475315094 +Loss at step 800: 0.044683922082185745 +Loss at step 850: 0.0439789742231369 +Loss at step 900: 0.04476439207792282 +Mean training loss after epoch 126: 0.045250286509010834 + +EPOCH: 127 +Loss at step 0: 0.040831420570611954 +Loss at step 50: 0.04936080798506737 +Loss at step 100: 0.039828091859817505 +Loss at step 150: 0.04344824329018593 +Loss at step 200: 0.07785246521234512 +Loss at step 250: 0.041770193725824356 +Loss at step 300: 0.05306349694728851 +Loss at step 350: 0.040510062128305435 +Loss at step 400: 0.03641214966773987 +Loss at step 450: 0.04353412240743637 +Loss at step 500: 0.03834361955523491 +Loss at step 550: 0.03922133147716522 +Loss at step 600: 0.055905070155858994 +Loss at step 650: 0.04407220706343651 +Loss at step 700: 0.04791129752993584 +Loss at step 750: 0.05337601155042648 +Loss at step 800: 0.035851772874593735 +Loss at step 850: 0.03793738782405853 +Loss at step 900: 0.0419594831764698 +Mean training loss after epoch 127: 0.04599338613831794 + +EPOCH: 128 +Loss at step 0: 0.03862268105149269 +Loss at step 50: 0.0405275896191597 +Loss at step 100: 0.03689344599843025 +Loss at step 150: 0.042172908782958984 +Loss at step 200: 0.04073482006788254 +Loss at step 250: 0.04244450479745865 +Loss at step 300: 0.043165381997823715 +Loss at step 350: 0.040223345160484314 +Loss at step 400: 0.06073211506009102 +Loss at step 450: 0.04653718322515488 +Loss at step 500: 0.0404045507311821 +Loss at step 550: 0.05022686347365379 +Loss at step 600: 0.056685853749513626 +Loss at step 650: 0.05552152916789055 +Loss at step 700: 0.0357365682721138 +Loss at step 750: 0.039526671171188354 +Loss at step 800: 0.046645183116197586 +Loss at step 850: 0.047234948724508286 +Loss at step 900: 0.0433802492916584 +Mean training loss after epoch 128: 0.0459720018341629 + +EPOCH: 129 +Loss at step 0: 0.03502752259373665 +Loss at step 50: 0.05473744496703148 +Loss at step 100: 0.03878574073314667 +Loss at step 150: 0.05897366255521774 +Loss at step 200: 0.04994506761431694 +Loss at step 250: 0.04663032293319702 +Loss at step 300: 0.04252858832478523 +Loss at step 350: 0.042376160621643066 +Loss at step 400: 0.04295406863093376 +Loss at step 450: 0.03534657135605812 +Loss at step 500: 0.05883531644940376 +Loss at step 550: 0.041431523859500885 +Loss at step 600: 0.058561936020851135 +Loss at step 650: 0.03677096962928772 +Loss at step 700: 0.03849569335579872 +Loss at step 750: 0.03586084023118019 +Loss at step 800: 0.04240897297859192 +Loss at step 850: 0.04333720728754997 +Loss at step 900: 0.05081409215927124 +Mean training loss after epoch 129: 0.04512793424604798 + +EPOCH: 130 +Loss at step 0: 0.033854663372039795 +Loss at step 50: 0.052894338965415955 +Loss at step 100: 0.04445578157901764 +Loss at step 150: 0.04172798991203308 +Loss at step 200: 0.04548418149352074 +Loss at step 250: 0.036784593015909195 +Loss at step 300: 0.04260953888297081 +Loss at step 350: 0.06172532960772514 +Loss at step 400: 0.039861489087343216 +Loss at step 450: 0.04947962984442711 +Loss at step 500: 0.07458124309778214 +Loss at step 550: 0.039255235344171524 +Loss at step 600: 0.03499902784824371 +Loss at step 650: 0.03839031606912613 +Loss at step 700: 0.036956459283828735 +Loss at step 750: 0.04086356982588768 +Loss at step 800: 0.06321340799331665 +Loss at step 850: 0.03970802575349808 +Loss at step 900: 0.04106743261218071 +Mean training loss after epoch 130: 0.04547428623286646 + +EPOCH: 131 +Loss at step 0: 0.0371694453060627 +Loss at step 50: 0.04349057376384735 +Loss at step 100: 0.04466467350721359 +Loss at step 150: 0.045540228486061096 +Loss at step 200: 0.04217632859945297 +Loss at step 250: 0.03948524594306946 +Loss at step 300: 0.05858780816197395 +Loss at step 350: 0.06880269944667816 +Loss at step 400: 0.037473414093256 +Loss at step 450: 0.039207573980093 +Loss at step 500: 0.05114085227251053 +Loss at step 550: 0.04645032063126564 +Loss at step 600: 0.038855068385601044 +Loss at step 650: 0.05013133957982063 +Loss at step 700: 0.041280124336481094 +Loss at step 750: 0.03917493671178818 +Loss at step 800: 0.03518728166818619 +Loss at step 850: 0.04183494672179222 +Loss at step 900: 0.05750581994652748 +Mean training loss after epoch 131: 0.04626308222696471 + +EPOCH: 132 +Loss at step 0: 0.041072480380535126 +Loss at step 50: 0.04396248236298561 +Loss at step 100: 0.05704709514975548 +Loss at step 150: 0.045102186501026154 +Loss at step 200: 0.03581955283880234 +Loss at step 250: 0.06802576035261154 +Loss at step 300: 0.04626915976405144 +Loss at step 350: 0.036233335733413696 +Loss at step 400: 0.05813797935843468 +Loss at step 450: 0.03625818341970444 +Loss at step 500: 0.038066864013671875 +Loss at step 550: 0.05933103710412979 +Loss at step 600: 0.03980471193790436 +Loss at step 650: 0.04284186661243439 +Loss at step 700: 0.05383782833814621 +Loss at step 750: 0.0436798557639122 +Loss at step 800: 0.04281007498502731 +Loss at step 850: 0.05944066867232323 +Loss at step 900: 0.04448529705405235 +Mean training loss after epoch 132: 0.045641672376519456 + +EPOCH: 133 +Loss at step 0: 0.05789121612906456 +Loss at step 50: 0.03706563636660576 +Loss at step 100: 0.04219038411974907 +Loss at step 150: 0.04092131927609444 +Loss at step 200: 0.055233344435691833 +Loss at step 250: 0.04096319153904915 +Loss at step 300: 0.053505729883909225 +Loss at step 350: 0.04395528510212898 +Loss at step 400: 0.04027791693806648 +Loss at step 450: 0.05819796398282051 +Loss at step 500: 0.05279282480478287 +Loss at step 550: 0.04257214069366455 +Loss at step 600: 0.04531795531511307 +Loss at step 650: 0.0362158864736557 +Loss at step 700: 0.04103061929345131 +Loss at step 750: 0.05990707129240036 +Loss at step 800: 0.039924755692481995 +Loss at step 850: 0.03959909826517105 +Loss at step 900: 0.05487546697258949 +Mean training loss after epoch 133: 0.04584319568249081 + +EPOCH: 134 +Loss at step 0: 0.03977786377072334 +Loss at step 50: 0.0435795783996582 +Loss at step 100: 0.0525563582777977 +Loss at step 150: 0.04441561922430992 +Loss at step 200: 0.04466961324214935 +Loss at step 250: 0.0559440515935421 +Loss at step 300: 0.036405693739652634 +Loss at step 350: 0.057199254631996155 +Loss at step 400: 0.03914961963891983 +Loss at step 450: 0.04516276344656944 +Loss at step 500: 0.046975865960121155 +Loss at step 550: 0.06159335747361183 +Loss at step 600: 0.03958824276924133 +Loss at step 650: 0.034010034054517746 +Loss at step 700: 0.03919408842921257 +Loss at step 750: 0.04500383511185646 +Loss at step 800: 0.04093002527952194 +Loss at step 850: 0.043238770216703415 +Loss at step 900: 0.04340122267603874 +Mean training loss after epoch 134: 0.046198522606526994 + +EPOCH: 135 +Loss at step 0: 0.05791408196091652 +Loss at step 50: 0.07993082702159882 +Loss at step 100: 0.03890639916062355 +Loss at step 150: 0.047152455896139145 +Loss at step 200: 0.04214800149202347 +Loss at step 250: 0.053371671587228775 +Loss at step 300: 0.04034150391817093 +Loss at step 350: 0.03971869871020317 +Loss at step 400: 0.036422330886125565 +Loss at step 450: 0.053443558514118195 +Loss at step 500: 0.042906515300273895 +Loss at step 550: 0.039306722581386566 +Loss at step 600: 0.0388847179710865 +Loss at step 650: 0.04005836695432663 +Loss at step 700: 0.05823518708348274 +Loss at step 750: 0.04140573367476463 +Loss at step 800: 0.03347420692443848 +Loss at step 850: 0.041324496269226074 +Loss at step 900: 0.03632740676403046 +Mean training loss after epoch 135: 0.04562917453790906 + +EPOCH: 136 +Loss at step 0: 0.040670428425073624 +Loss at step 50: 0.03875201195478439 +Loss at step 100: 0.043917179107666016 +Loss at step 150: 0.06483276188373566 +Loss at step 200: 0.03349987044930458 +Loss at step 250: 0.051613204181194305 +Loss at step 300: 0.04034166410565376 +Loss at step 350: 0.03682360798120499 +Loss at step 400: 0.038947463035583496 +Loss at step 450: 0.051983773708343506 +Loss at step 500: 0.05303558334708214 +Loss at step 550: 0.039053842425346375 +Loss at step 600: 0.046976201236248016 +Loss at step 650: 0.045643147081136703 +Loss at step 700: 0.034928034991025925 +Loss at step 750: 0.044426947832107544 +Loss at step 800: 0.041393619030714035 +Loss at step 850: 0.042128901928663254 +Loss at step 900: 0.03995876386761665 +Mean training loss after epoch 136: 0.04578306689969639 + +EPOCH: 137 +Loss at step 0: 0.03910467028617859 +Loss at step 50: 0.03934948518872261 +Loss at step 100: 0.037075240164995193 +Loss at step 150: 0.0392436683177948 +Loss at step 200: 0.05861879885196686 +Loss at step 250: 0.03837770223617554 +Loss at step 300: 0.0688202828168869 +Loss at step 350: 0.04230966418981552 +Loss at step 400: 0.05877862498164177 +Loss at step 450: 0.05266865715384483 +Loss at step 500: 0.03888147324323654 +Loss at step 550: 0.037766627967357635 +Loss at step 600: 0.038470346480607986 +Loss at step 650: 0.05293159931898117 +Loss at step 700: 0.038872748613357544 +Loss at step 750: 0.05478575825691223 +Loss at step 800: 0.04210440814495087 +Loss at step 850: 0.03872475028038025 +Loss at step 900: 0.03433847054839134 +Mean training loss after epoch 137: 0.04534758429632767 + +EPOCH: 138 +Loss at step 0: 0.036849357187747955 +Loss at step 50: 0.04454248771071434 +Loss at step 100: 0.04018501937389374 +Loss at step 150: 0.04496845230460167 +Loss at step 200: 0.04221091419458389 +Loss at step 250: 0.05172024667263031 +Loss at step 300: 0.06087923049926758 +Loss at step 350: 0.059260811656713486 +Loss at step 400: 0.05331677198410034 +Loss at step 450: 0.05469122156500816 +Loss at step 500: 0.05487817898392677 +Loss at step 550: 0.03229672089219093 +Loss at step 600: 0.04171815514564514 +Loss at step 650: 0.056774429976940155 +Loss at step 700: 0.033217065036296844 +Loss at step 750: 0.03716914355754852 +Loss at step 800: 0.03678344935178757 +Loss at step 850: 0.03627348691225052 +Loss at step 900: 0.034149955958127975 +Mean training loss after epoch 138: 0.04598082240615318 + +EPOCH: 139 +Loss at step 0: 0.04552232474088669 +Loss at step 50: 0.053638193756341934 +Loss at step 100: 0.05953767150640488 +Loss at step 150: 0.04585081711411476 +Loss at step 200: 0.03536457195878029 +Loss at step 250: 0.04270733520388603 +Loss at step 300: 0.047890517860651016 +Loss at step 350: 0.047405242919921875 +Loss at step 400: 0.05747320130467415 +Loss at step 450: 0.04017220810055733 +Loss at step 500: 0.0424514003098011 +Loss at step 550: 0.05905534699559212 +Loss at step 600: 0.05135008320212364 +Loss at step 650: 0.03714684024453163 +Loss at step 700: 0.039433714002370834 +Loss at step 750: 0.040111348032951355 +Loss at step 800: 0.044568419456481934 +Loss at step 850: 0.05558893084526062 +Loss at step 900: 0.049497321248054504 +Mean training loss after epoch 139: 0.04576002788553233 + +EPOCH: 140 +Loss at step 0: 0.05574393644928932 +Loss at step 50: 0.0405304878950119 +Loss at step 100: 0.037973348051309586 +Loss at step 150: 0.04575343802571297 +Loss at step 200: 0.03651919588446617 +Loss at step 250: 0.051694780588150024 +Loss at step 300: 0.04050759971141815 +Loss at step 350: 0.053625427186489105 +Loss at step 400: 0.036020658910274506 +Loss at step 450: 0.04109492152929306 +Loss at step 500: 0.03977413475513458 +Loss at step 550: 0.040321312844753265 +Loss at step 600: 0.04473036900162697 +Loss at step 650: 0.06308896839618683 +Loss at step 700: 0.05576609447598457 +Loss at step 750: 0.03580252081155777 +Loss at step 800: 0.04580359533429146 +Loss at step 850: 0.04389779269695282 +Loss at step 900: 0.04559674486517906 +Mean training loss after epoch 140: 0.045368334477040556 + +EPOCH: 141 +Loss at step 0: 0.04766719043254852 +Loss at step 50: 0.043549906462430954 +Loss at step 100: 0.04613008350133896 +Loss at step 150: 0.05635123327374458 +Loss at step 200: 0.05265991762280464 +Loss at step 250: 0.04313316196203232 +Loss at step 300: 0.04594475403428078 +Loss at step 350: 0.060311976820230484 +Loss at step 400: 0.042665306478738785 +Loss at step 450: 0.042269911617040634 +Loss at step 500: 0.06722458451986313 +Loss at step 550: 0.03511561453342438 +Loss at step 600: 0.04270784184336662 +Loss at step 650: 0.04206705465912819 +Loss at step 700: 0.03623741492629051 +Loss at step 750: 0.03980948030948639 +Loss at step 800: 0.050176285207271576 +Loss at step 850: 0.03945159167051315 +Loss at step 900: 0.05429309234023094 +Mean training loss after epoch 141: 0.04637172566190648 + +EPOCH: 142 +Loss at step 0: 0.04240446165204048 +Loss at step 50: 0.05365457758307457 +Loss at step 100: 0.05313526839017868 +Loss at step 150: 0.04133016988635063 +Loss at step 200: 0.03692440688610077 +Loss at step 250: 0.04010925069451332 +Loss at step 300: 0.04152604565024376 +Loss at step 350: 0.052026428282260895 +Loss at step 400: 0.0384528785943985 +Loss at step 450: 0.03092273324728012 +Loss at step 500: 0.04847479239106178 +Loss at step 550: 0.03802608326077461 +Loss at step 600: 0.043377961963415146 +Loss at step 650: 0.046527136117219925 +Loss at step 700: 0.04104185104370117 +Loss at step 750: 0.041326746344566345 +Loss at step 800: 0.0401303693652153 +Loss at step 850: 0.056367017328739166 +Loss at step 900: 0.04089125618338585 +Mean training loss after epoch 142: 0.046012986546704 + +EPOCH: 143 +Loss at step 0: 0.06598270684480667 +Loss at step 50: 0.05495079979300499 +Loss at step 100: 0.04046940803527832 +Loss at step 150: 0.05609951168298721 +Loss at step 200: 0.0539410263299942 +Loss at step 250: 0.04219687730073929 +Loss at step 300: 0.037942685186862946 +Loss at step 350: 0.031891148537397385 +Loss at step 400: 0.05980689078569412 +Loss at step 450: 0.04871036112308502 +Loss at step 500: 0.04264569655060768 +Loss at step 550: 0.04309279844164848 +Loss at step 600: 0.06642031669616699 +Loss at step 650: 0.042909782379865646 +Loss at step 700: 0.04131978750228882 +Loss at step 750: 0.06211844086647034 +Loss at step 800: 0.05496831610798836 +Loss at step 850: 0.03892774507403374 +Loss at step 900: 0.039776165038347244 +Mean training loss after epoch 143: 0.04625846146306059 + +EPOCH: 144 +Loss at step 0: 0.04006717726588249 +Loss at step 50: 0.037215106189250946 +Loss at step 100: 0.04224834591150284 +Loss at step 150: 0.055948562920093536 +Loss at step 200: 0.06023121625185013 +Loss at step 250: 0.03742901608347893 +Loss at step 300: 0.036462996155023575 +Loss at step 350: 0.0405937060713768 +Loss at step 400: 0.03346812725067139 +Loss at step 450: 0.0524187795817852 +Loss at step 500: 0.0422993004322052 +Loss at step 550: 0.04444640502333641 +Loss at step 600: 0.037637852132320404 +Loss at step 650: 0.03817237541079521 +Loss at step 700: 0.03713873028755188 +Loss at step 750: 0.07052743434906006 +Loss at step 800: 0.05144381895661354 +Loss at step 850: 0.039708103984594345 +Loss at step 900: 0.042540404945611954 +Mean training loss after epoch 144: 0.04556358749193868 + +EPOCH: 145 +Loss at step 0: 0.04786206781864166 +Loss at step 50: 0.03650808706879616 +Loss at step 100: 0.03653348237276077 +Loss at step 150: 0.05578773841261864 +Loss at step 200: 0.04559545964002609 +Loss at step 250: 0.05184699967503548 +Loss at step 300: 0.04513511806726456 +Loss at step 350: 0.03866441920399666 +Loss at step 400: 0.042488545179367065 +Loss at step 450: 0.04186408221721649 +Loss at step 500: 0.036692872643470764 +Loss at step 550: 0.038832735270261765 +Loss at step 600: 0.0368347242474556 +Loss at step 650: 0.06775815784931183 +Loss at step 700: 0.03885146602988243 +Loss at step 750: 0.03941559046506882 +Loss at step 800: 0.03837131708860397 +Loss at step 850: 0.03325490653514862 +Loss at step 900: 0.043703410774469376 +Mean training loss after epoch 145: 0.04502220525900756 + +EPOCH: 146 +Loss at step 0: 0.035921309143304825 +Loss at step 50: 0.06218082830309868 +Loss at step 100: 0.04185305908322334 +Loss at step 150: 0.03764014691114426 +Loss at step 200: 0.05643292888998985 +Loss at step 250: 0.037144020199775696 +Loss at step 300: 0.04180319234728813 +Loss at step 350: 0.04509423300623894 +Loss at step 400: 0.04134766757488251 +Loss at step 450: 0.04754731059074402 +Loss at step 500: 0.05761423707008362 +Loss at step 550: 0.05948308855295181 +Loss at step 600: 0.04126744344830513 +Loss at step 650: 0.043336980044841766 +Loss at step 700: 0.050717536360025406 +Loss at step 750: 0.05504738911986351 +Loss at step 800: 0.05443797633051872 +Loss at step 850: 0.038941845297813416 +Loss at step 900: 0.03910057246685028 +Mean training loss after epoch 146: 0.045475439496163624 + +EPOCH: 147 +Loss at step 0: 0.05990152433514595 +Loss at step 50: 0.05167654529213905 +Loss at step 100: 0.03947964683175087 +Loss at step 150: 0.03925156220793724 +Loss at step 200: 0.05298918858170509 +Loss at step 250: 0.07117048650979996 +Loss at step 300: 0.04586714133620262 +Loss at step 350: 0.047423604875802994 +Loss at step 400: 0.05126449838280678 +Loss at step 450: 0.07629740238189697 +Loss at step 500: 0.04263874888420105 +Loss at step 550: 0.04026797413825989 +Loss at step 600: 0.05961938574910164 +Loss at step 650: 0.0441543348133564 +Loss at step 700: 0.08301635831594467 +Loss at step 750: 0.04251045361161232 +Loss at step 800: 0.05179477110505104 +Loss at step 850: 0.0435745008289814 +Loss at step 900: 0.045577626675367355 +Mean training loss after epoch 147: 0.04551575672445394 + +EPOCH: 148 +Loss at step 0: 0.045178480446338654 +Loss at step 50: 0.03654982149600983 +Loss at step 100: 0.05339212715625763 +Loss at step 150: 0.04239986464381218 +Loss at step 200: 0.044157423079013824 +Loss at step 250: 0.04163976013660431 +Loss at step 300: 0.03853766620159149 +Loss at step 350: 0.04205953702330589 +Loss at step 400: 0.03890163078904152 +Loss at step 450: 0.04245118796825409 +Loss at step 500: 0.03657424449920654 +Loss at step 550: 0.07423318922519684 +Loss at step 600: 0.03808849677443504 +Loss at step 650: 0.03773798793554306 +Loss at step 700: 0.05481389909982681 +Loss at step 750: 0.036997225135564804 +Loss at step 800: 0.06140691787004471 +Loss at step 850: 0.04195407032966614 +Loss at step 900: 0.03827495872974396 +Mean training loss after epoch 148: 0.04573833345890299 + +EPOCH: 149 +Loss at step 0: 0.04809282347559929 +Loss at step 50: 0.05493403226137161 +Loss at step 100: 0.04131351038813591 +Loss at step 150: 0.03981474041938782 +Loss at step 200: 0.05969184264540672 +Loss at step 250: 0.04067220166325569 +Loss at step 300: 0.04409331455826759 +Loss at step 350: 0.04014787822961807 +Loss at step 400: 0.046923354268074036 +Loss at step 450: 0.04042065888643265 +Loss at step 500: 0.04225459322333336 +Loss at step 550: 0.0412532240152359 +Loss at step 600: 0.05636883154511452 +Loss at step 650: 0.05289283022284508 +Loss at step 700: 0.032057926058769226 +Loss at step 750: 0.041756950318813324 +Loss at step 800: 0.05284972861409187 +Loss at step 850: 0.06394171714782715 +Loss at step 900: 0.04469018802046776 +Mean training loss after epoch 149: 0.045230526524756766 + +EPOCH: 150 +Loss at step 0: 0.043388642370700836 +Loss at step 50: 0.052566979080438614 +Loss at step 100: 0.0421791709959507 +Loss at step 150: 0.03338681906461716 +Loss at step 200: 0.03972415626049042 +Loss at step 250: 0.03962627053260803 +Loss at step 300: 0.06661241501569748 +Loss at step 350: 0.046421118080616 +Loss at step 400: 0.04191231727600098 +Loss at step 450: 0.03613325580954552 +Loss at step 500: 0.041502777487039566 +Loss at step 550: 0.04492828622460365 +Loss at step 600: 0.037504687905311584 +Loss at step 650: 0.049275368452072144 +Loss at step 700: 0.051390472799539566 +Loss at step 750: 0.03766629844903946 +Loss at step 800: 0.04697275906801224 +Loss at step 850: 0.03758503869175911 +Loss at step 900: 0.03459473326802254 +Mean training loss after epoch 150: 0.04578903002509557 + +EPOCH: 151 +Loss at step 0: 0.03707846626639366 +Loss at step 50: 0.04145946353673935 +Loss at step 100: 0.0949907973408699 +Loss at step 150: 0.05517743527889252 +Loss at step 200: 0.039290156215429306 +Loss at step 250: 0.05039934068918228 +Loss at step 300: 0.040898192673921585 +Loss at step 350: 0.05561210587620735 +Loss at step 400: 0.038385841995477676 +Loss at step 450: 0.05343605577945709 +Loss at step 500: 0.03507044166326523 +Loss at step 550: 0.0568498857319355 +Loss at step 600: 0.04534614831209183 +Loss at step 650: 0.03592472895979881 +Loss at step 700: 0.03634046018123627 +Loss at step 750: 0.03859354928135872 +Loss at step 800: 0.05330267548561096 +Loss at step 850: 0.03291086107492447 +Loss at step 900: 0.05305907875299454 +Mean training loss after epoch 151: 0.045274031372355625 + +EPOCH: 152 +Loss at step 0: 0.044275812804698944 +Loss at step 50: 0.04843960702419281 +Loss at step 100: 0.052989374846220016 +Loss at step 150: 0.04196736216545105 +Loss at step 200: 0.04173903539776802 +Loss at step 250: 0.04352372884750366 +Loss at step 300: 0.036866143345832825 +Loss at step 350: 0.04701049253344536 +Loss at step 400: 0.042806681245565414 +Loss at step 450: 0.03811962530016899 +Loss at step 500: 0.0360080786049366 +Loss at step 550: 0.03569600731134415 +Loss at step 600: 0.04361372068524361 +Loss at step 650: 0.040773551911115646 +Loss at step 700: 0.042050592601299286 +Loss at step 750: 0.04778597131371498 +Loss at step 800: 0.03920777514576912 +Loss at step 850: 0.050363898277282715 +Loss at step 900: 0.054238736629486084 +Mean training loss after epoch 152: 0.04526704357766203 + +EPOCH: 153 +Loss at step 0: 0.039691705256700516 +Loss at step 50: 0.041748300194740295 +Loss at step 100: 0.059794433414936066 +Loss at step 150: 0.042257245630025864 +Loss at step 200: 0.03810141980648041 +Loss at step 250: 0.05992001295089722 +Loss at step 300: 0.03939557075500488 +Loss at step 350: 0.03731861338019371 +Loss at step 400: 0.053389254957437515 +Loss at step 450: 0.06360919028520584 +Loss at step 500: 0.05024760961532593 +Loss at step 550: 0.03947141021490097 +Loss at step 600: 0.03574201092123985 +Loss at step 650: 0.03473079204559326 +Loss at step 700: 0.04438640922307968 +Loss at step 750: 0.06018330901861191 +Loss at step 800: 0.04331561550498009 +Loss at step 850: 0.05163159593939781 +Loss at step 900: 0.037725917994976044 +Mean training loss after epoch 153: 0.04605184269270727 + +EPOCH: 154 +Loss at step 0: 0.043428413569927216 +Loss at step 50: 0.06728603690862656 +Loss at step 100: 0.033477336168289185 +Loss at step 150: 0.0416242741048336 +Loss at step 200: 0.0435677133500576 +Loss at step 250: 0.03733016178011894 +Loss at step 300: 0.04144220054149628 +Loss at step 350: 0.06810735911130905 +Loss at step 400: 0.04457342252135277 +Loss at step 450: 0.04851897805929184 +Loss at step 500: 0.04180512949824333 +Loss at step 550: 0.03760434314608574 +Loss at step 600: 0.0396546833217144 +Loss at step 650: 0.04627712443470955 +Loss at step 700: 0.04180819168686867 +Loss at step 750: 0.056805189698934555 +Loss at step 800: 0.05428408086299896 +Loss at step 850: 0.03998415172100067 +Loss at step 900: 0.03821880370378494 +Mean training loss after epoch 154: 0.04552875211172457 + +EPOCH: 155 +Loss at step 0: 0.04474431648850441 +Loss at step 50: 0.04658782482147217 +Loss at step 100: 0.052998948842287064 +Loss at step 150: 0.05457345023751259 +Loss at step 200: 0.037124812602996826 +Loss at step 250: 0.04672694951295853 +Loss at step 300: 0.059743404388427734 +Loss at step 350: 0.05665802210569382 +Loss at step 400: 0.03814123198390007 +Loss at step 450: 0.054547786712646484 +Loss at step 500: 0.041575852781534195 +Loss at step 550: 0.05408891290426254 +Loss at step 600: 0.04946219176054001 +Loss at step 650: 0.05613132193684578 +Loss at step 700: 0.03633062541484833 +Loss at step 750: 0.03935260325670242 +Loss at step 800: 0.03483184427022934 +Loss at step 850: 0.05248025059700012 +Loss at step 900: 0.03814980387687683 +Mean training loss after epoch 155: 0.04548057709643836 + +EPOCH: 156 +Loss at step 0: 0.042536843568086624 +Loss at step 50: 0.05924232676625252 +Loss at step 100: 0.04061850905418396 +Loss at step 150: 0.05129500851035118 +Loss at step 200: 0.04443904384970665 +Loss at step 250: 0.042334310710430145 +Loss at step 300: 0.058757148683071136 +Loss at step 350: 0.06559840589761734 +Loss at step 400: 0.04199784994125366 +Loss at step 450: 0.03611515089869499 +Loss at step 500: 0.05993063375353813 +Loss at step 550: 0.04257189482450485 +Loss at step 600: 0.044019874185323715 +Loss at step 650: 0.09105333685874939 +Loss at step 700: 0.06172659248113632 +Loss at step 750: 0.03914230689406395 +Loss at step 800: 0.038897331804037094 +Loss at step 850: 0.045345887541770935 +Loss at step 900: 0.03844953328371048 +Mean training loss after epoch 156: 0.04548532924434142 + +EPOCH: 157 +Loss at step 0: 0.044992923736572266 +Loss at step 50: 0.044939108192920685 +Loss at step 100: 0.065875343978405 +Loss at step 150: 0.04289773106575012 +Loss at step 200: 0.05809518322348595 +Loss at step 250: 0.039758339524269104 +Loss at step 300: 0.039036646485328674 +Loss at step 350: 0.036862581968307495 +Loss at step 400: 0.03962497040629387 +Loss at step 450: 0.04077516868710518 +Loss at step 500: 0.06700360029935837 +Loss at step 550: 0.03938477113842964 +Loss at step 600: 0.04124846309423447 +Loss at step 650: 0.04274895042181015 +Loss at step 700: 0.044688545167446136 +Loss at step 750: 0.05564628541469574 +Loss at step 800: 0.04488258808851242 +Loss at step 850: 0.06354968994855881 +Loss at step 900: 0.03879409655928612 +Mean training loss after epoch 157: 0.045047152837091035 + +EPOCH: 158 +Loss at step 0: 0.04908401146531105 +Loss at step 50: 0.039189089089632034 +Loss at step 100: 0.058827657252550125 +Loss at step 150: 0.04006434977054596 +Loss at step 200: 0.04048857092857361 +Loss at step 250: 0.05965008586645126 +Loss at step 300: 0.05151711404323578 +Loss at step 350: 0.036515846848487854 +Loss at step 400: 0.040616486221551895 +Loss at step 450: 0.04044902324676514 +Loss at step 500: 0.05114457383751869 +Loss at step 550: 0.03619116172194481 +Loss at step 600: 0.038298461586236954 +Loss at step 650: 0.04230763390660286 +Loss at step 700: 0.04716203734278679 +Loss at step 750: 0.04186031594872475 +Loss at step 800: 0.04172353819012642 +Loss at step 850: 0.0344061404466629 +Loss at step 900: 0.031093526631593704 +Mean training loss after epoch 158: 0.04496006951578009 + +EPOCH: 159 +Loss at step 0: 0.03923662006855011 +Loss at step 50: 0.06269536912441254 +Loss at step 100: 0.04445149749517441 +Loss at step 150: 0.037026695907115936 +Loss at step 200: 0.06086227297782898 +Loss at step 250: 0.05890753120183945 +Loss at step 300: 0.04148515313863754 +Loss at step 350: 0.04519776999950409 +Loss at step 400: 0.03972244635224342 +Loss at step 450: 0.06374932825565338 +Loss at step 500: 0.04154137149453163 +Loss at step 550: 0.03708213195204735 +Loss at step 600: 0.03716995194554329 +Loss at step 650: 0.0348193496465683 +Loss at step 700: 0.04227104038000107 +Loss at step 750: 0.041377026587724686 +Loss at step 800: 0.03751562535762787 +Loss at step 850: 0.03657463937997818 +Loss at step 900: 0.05648829787969589 +Mean training loss after epoch 159: 0.04557349732014607 + +EPOCH: 160 +Loss at step 0: 0.05733686313033104 +Loss at step 50: 0.04218154773116112 +Loss at step 100: 0.04229149594902992 +Loss at step 150: 0.07705415040254593 +Loss at step 200: 0.04073299467563629 +Loss at step 250: 0.06253980845212936 +Loss at step 300: 0.04513583332300186 +Loss at step 350: 0.041486360132694244 +Loss at step 400: 0.04332815855741501 +Loss at step 450: 0.05480773001909256 +Loss at step 500: 0.052909351885318756 +Loss at step 550: 0.04117535427212715 +Loss at step 600: 0.03290819376707077 +Loss at step 650: 0.04302818700671196 +Loss at step 700: 0.041606031358242035 +Loss at step 750: 0.0447634682059288 +Loss at step 800: 0.05180688574910164 +Loss at step 850: 0.041656531393527985 +Loss at step 900: 0.06960886716842651 +Mean training loss after epoch 160: 0.04543006612953029 + +EPOCH: 161 +Loss at step 0: 0.03588409349322319 +Loss at step 50: 0.05434464290738106 +Loss at step 100: 0.03598414734005928 +Loss at step 150: 0.04042995721101761 +Loss at step 200: 0.05349036678671837 +Loss at step 250: 0.04168925806879997 +Loss at step 300: 0.036396291106939316 +Loss at step 350: 0.04340725764632225 +Loss at step 400: 0.03639363497495651 +Loss at step 450: 0.044818513095378876 +Loss at step 500: 0.05899752303957939 +Loss at step 550: 0.049398187547922134 +Loss at step 600: 0.043555911630392075 +Loss at step 650: 0.03922441601753235 +Loss at step 700: 0.051882170140743256 +Loss at step 750: 0.053881142288446426 +Loss at step 800: 0.05903434753417969 +Loss at step 850: 0.0446619987487793 +Loss at step 900: 0.042840100824832916 +Mean training loss after epoch 161: 0.045339322218032024 + +EPOCH: 162 +Loss at step 0: 0.04105321690440178 +Loss at step 50: 0.04220481589436531 +Loss at step 100: 0.0395016185939312 +Loss at step 150: 0.04435631260275841 +Loss at step 200: 0.049334146082401276 +Loss at step 250: 0.036160457879304886 +Loss at step 300: 0.03686879202723503 +Loss at step 350: 0.05342920497059822 +Loss at step 400: 0.033774420619010925 +Loss at step 450: 0.05329883098602295 +Loss at step 500: 0.039375629276037216 +Loss at step 550: 0.03732048720121384 +Loss at step 600: 0.052394505590200424 +Loss at step 650: 0.03998233750462532 +Loss at step 700: 0.07979018241167068 +Loss at step 750: 0.03776806220412254 +Loss at step 800: 0.046552594751119614 +Loss at step 850: 0.04259389638900757 +Loss at step 900: 0.044911470264196396 +Mean training loss after epoch 162: 0.045351764548625516 + +EPOCH: 163 +Loss at step 0: 0.04571383073925972 +Loss at step 50: 0.04169123247265816 +Loss at step 100: 0.041882432997226715 +Loss at step 150: 0.045187823474407196 +Loss at step 200: 0.036972526460886 +Loss at step 250: 0.055245254188776016 +Loss at step 300: 0.05190960317850113 +Loss at step 350: 0.04296186938881874 +Loss at step 400: 0.05747760832309723 +Loss at step 450: 0.052875205874443054 +Loss at step 500: 0.0420052595436573 +Loss at step 550: 0.04518750309944153 +Loss at step 600: 0.06262321025133133 +Loss at step 650: 0.03867659345269203 +Loss at step 700: 0.03831100091338158 +Loss at step 750: 0.0365147590637207 +Loss at step 800: 0.03939880058169365 +Loss at step 850: 0.03972802683711052 +Loss at step 900: 0.04097052291035652 +Mean training loss after epoch 163: 0.045367360637306786 + +EPOCH: 164 +Loss at step 0: 0.04127742350101471 +Loss at step 50: 0.03679262474179268 +Loss at step 100: 0.03651072457432747 +Loss at step 150: 0.04246117174625397 +Loss at step 200: 0.04623989760875702 +Loss at step 250: 0.048463836312294006 +Loss at step 300: 0.03811728209257126 +Loss at step 350: 0.03821835666894913 +Loss at step 400: 0.03935026377439499 +Loss at step 450: 0.037943240255117416 +Loss at step 500: 0.03789034113287926 +Loss at step 550: 0.03749782592058182 +Loss at step 600: 0.03543435409665108 +Loss at step 650: 0.03416849672794342 +Loss at step 700: 0.058213137090206146 +Loss at step 750: 0.05304522439837456 +Loss at step 800: 0.04203004017472267 +Loss at step 850: 0.050420477986335754 +Loss at step 900: 0.04690000042319298 +Mean training loss after epoch 164: 0.04543109026664038 + +EPOCH: 165 +Loss at step 0: 0.04869041591882706 +Loss at step 50: 0.03911471739411354 +Loss at step 100: 0.07036586850881577 +Loss at step 150: 0.03977502882480621 +Loss at step 200: 0.046984925866127014 +Loss at step 250: 0.043369609862565994 +Loss at step 300: 0.03979567438364029 +Loss at step 350: 0.034934449940919876 +Loss at step 400: 0.05723438411951065 +Loss at step 450: 0.05895712971687317 +Loss at step 500: 0.05698237195611 +Loss at step 550: 0.05524788796901703 +Loss at step 600: 0.0413394458591938 +Loss at step 650: 0.04716252535581589 +Loss at step 700: 0.042079079896211624 +Loss at step 750: 0.03934825584292412 +Loss at step 800: 0.03994974121451378 +Loss at step 850: 0.0448455810546875 +Loss at step 900: 0.04078914597630501 +Mean training loss after epoch 165: 0.044910616064662616 + +EPOCH: 166 +Loss at step 0: 0.03775063902139664 +Loss at step 50: 0.0360499769449234 +Loss at step 100: 0.037946317344903946 +Loss at step 150: 0.04141062870621681 +Loss at step 200: 0.04351644217967987 +Loss at step 250: 0.05158928409218788 +Loss at step 300: 0.06622082740068436 +Loss at step 350: 0.037192486226558685 +Loss at step 400: 0.04484424367547035 +Loss at step 450: 0.06915963441133499 +Loss at step 500: 0.056484825909137726 +Loss at step 550: 0.036653466522693634 +Loss at step 600: 0.03878455609083176 +Loss at step 650: 0.04279294237494469 +Loss at step 700: 0.04087692126631737 +Loss at step 750: 0.035178739577531815 +Loss at step 800: 0.04093317687511444 +Loss at step 850: 0.037604402750730515 +Loss at step 900: 0.040088482201099396 +Mean training loss after epoch 166: 0.045408313500204446 + +EPOCH: 167 +Loss at step 0: 0.03491378575563431 +Loss at step 50: 0.0409066379070282 +Loss at step 100: 0.05718398466706276 +Loss at step 150: 0.049059536308050156 +Loss at step 200: 0.05322270467877388 +Loss at step 250: 0.05449123680591583 +Loss at step 300: 0.03513224422931671 +Loss at step 350: 0.06100922077894211 +Loss at step 400: 0.03214343637228012 +Loss at step 450: 0.04762778803706169 +Loss at step 500: 0.060712311416864395 +Loss at step 550: 0.03510888293385506 +Loss at step 600: 0.05594760924577713 +Loss at step 650: 0.059307195246219635 +Loss at step 700: 0.04680970311164856 +Loss at step 750: 0.039606012403964996 +Loss at step 800: 0.03687047213315964 +Loss at step 850: 0.0415286049246788 +Loss at step 900: 0.04276140406727791 +Mean training loss after epoch 167: 0.04544779821547237 + +EPOCH: 168 +Loss at step 0: 0.03918340057134628 +Loss at step 50: 0.04150276258587837 +Loss at step 100: 0.04396246746182442 +Loss at step 150: 0.03944762796163559 +Loss at step 200: 0.03900706395506859 +Loss at step 250: 0.04237803444266319 +Loss at step 300: 0.037626154720783234 +Loss at step 350: 0.05572063848376274 +Loss at step 400: 0.03949202597141266 +Loss at step 450: 0.0373062938451767 +Loss at step 500: 0.038857705891132355 +Loss at step 550: 0.03877405822277069 +Loss at step 600: 0.03712764382362366 +Loss at step 650: 0.05993238463997841 +Loss at step 700: 0.042831260710954666 +Loss at step 750: 0.03752463310956955 +Loss at step 800: 0.03890422359108925 +Loss at step 850: 0.05146149545907974 +Loss at step 900: 0.05556134507060051 +Mean training loss after epoch 168: 0.04499623994591203 + +EPOCH: 169 +Loss at step 0: 0.03803873434662819 +Loss at step 50: 0.04022381827235222 +Loss at step 100: 0.05772239714860916 +Loss at step 150: 0.03760824725031853 +Loss at step 200: 0.041374195367097855 +Loss at step 250: 0.040693480521440506 +Loss at step 300: 0.038720596581697464 +Loss at step 350: 0.05135650932788849 +Loss at step 400: 0.060134224593639374 +Loss at step 450: 0.050536561757326126 +Loss at step 500: 0.03865848481655121 +Loss at step 550: 0.038190919905900955 +Loss at step 600: 0.05113137513399124 +Loss at step 650: 0.03493344038724899 +Loss at step 700: 0.04136475548148155 +Loss at step 750: 0.03572293743491173 +Loss at step 800: 0.06611035019159317 +Loss at step 850: 0.03651556372642517 +Loss at step 900: 0.039149269461631775 +Mean training loss after epoch 169: 0.045075246593607135 + +EPOCH: 170 +Loss at step 0: 0.033221058547496796 +Loss at step 50: 0.06467200070619583 +Loss at step 100: 0.042436376214027405 +Loss at step 150: 0.0380876287817955 +Loss at step 200: 0.03676445037126541 +Loss at step 250: 0.043210145086050034 +Loss at step 300: 0.032982319593429565 +Loss at step 350: 0.042823273688554764 +Loss at step 400: 0.03820237144827843 +Loss at step 450: 0.04563350975513458 +Loss at step 500: 0.04353397712111473 +Loss at step 550: 0.03746328130364418 +Loss at step 600: 0.04638344421982765 +Loss at step 650: 0.0410890094935894 +Loss at step 700: 0.07334408164024353 +Loss at step 750: 0.05744574964046478 +Loss at step 800: 0.035864830017089844 +Loss at step 850: 0.054765813052654266 +Loss at step 900: 0.03467581048607826 +Mean training loss after epoch 170: 0.04475828348351186 + +EPOCH: 171 +Loss at step 0: 0.03946680948138237 +Loss at step 50: 0.048724908381700516 +Loss at step 100: 0.04298014938831329 +Loss at step 150: 0.05222751572728157 +Loss at step 200: 0.03487500920891762 +Loss at step 250: 0.04806940630078316 +Loss at step 300: 0.052578944712877274 +Loss at step 350: 0.03708478435873985 +Loss at step 400: 0.037626273930072784 +Loss at step 450: 0.041827492415905 +Loss at step 500: 0.04390996694564819 +Loss at step 550: 0.043514225631952286 +Loss at step 600: 0.04226106405258179 +Loss at step 650: 0.033735670149326324 +Loss at step 700: 0.04193149879574776 +Loss at step 750: 0.04369864612817764 +Loss at step 800: 0.04188934713602066 +Loss at step 850: 0.04063555970788002 +Loss at step 900: 0.04550199955701828 +Mean training loss after epoch 171: 0.04538312486247785 + +EPOCH: 172 +Loss at step 0: 0.040509019047021866 +Loss at step 50: 0.03912323713302612 +Loss at step 100: 0.03942808881402016 +Loss at step 150: 0.034295473247766495 +Loss at step 200: 0.043597251176834106 +Loss at step 250: 0.04419099912047386 +Loss at step 300: 0.04572097584605217 +Loss at step 350: 0.04360748827457428 +Loss at step 400: 0.04081692919135094 +Loss at step 450: 0.03865746408700943 +Loss at step 500: 0.04633477330207825 +Loss at step 550: 0.041448138654232025 +Loss at step 600: 0.039717141538858414 +Loss at step 650: 0.04645777493715286 +Loss at step 700: 0.04155682772397995 +Loss at step 750: 0.03898464888334274 +Loss at step 800: 0.03769000247120857 +Loss at step 850: 0.03987337276339531 +Loss at step 900: 0.06085821986198425 +Mean training loss after epoch 172: 0.045138670782894214 + +EPOCH: 173 +Loss at step 0: 0.043256886303424835 +Loss at step 50: 0.036804549396038055 +Loss at step 100: 0.03744056075811386 +Loss at step 150: 0.04074953496456146 +Loss at step 200: 0.05458853393793106 +Loss at step 250: 0.04172831028699875 +Loss at step 300: 0.04577206075191498 +Loss at step 350: 0.060451701283454895 +Loss at step 400: 0.05195830762386322 +Loss at step 450: 0.0359431654214859 +Loss at step 500: 0.04396955296397209 +Loss at step 550: 0.061281684786081314 +Loss at step 600: 0.03914988040924072 +Loss at step 650: 0.04025467857718468 +Loss at step 700: 0.03316447511315346 +Loss at step 750: 0.07375076413154602 +Loss at step 800: 0.06582111120223999 +Loss at step 850: 0.04545915126800537 +Loss at step 900: 0.03918759524822235 +Mean training loss after epoch 173: 0.04522931279499393 + +EPOCH: 174 +Loss at step 0: 0.0425800085067749 +Loss at step 50: 0.04178735613822937 +Loss at step 100: 0.03696626052260399 +Loss at step 150: 0.03959193080663681 +Loss at step 200: 0.06498877704143524 +Loss at step 250: 0.037115003913640976 +Loss at step 300: 0.044266797602176666 +Loss at step 350: 0.060023147612810135 +Loss at step 400: 0.045276395976543427 +Loss at step 450: 0.03692108020186424 +Loss at step 500: 0.037775736302137375 +Loss at step 550: 0.038444884121418 +Loss at step 600: 0.04087059572339058 +Loss at step 650: 0.041457731276750565 +Loss at step 700: 0.04112256318330765 +Loss at step 750: 0.03821401298046112 +Loss at step 800: 0.04169802367687225 +Loss at step 850: 0.0467117503285408 +Loss at step 900: 0.0379408523440361 +Mean training loss after epoch 174: 0.04471652687731773 + +EPOCH: 175 +Loss at step 0: 0.04008456692099571 +Loss at step 50: 0.06706783920526505 +Loss at step 100: 0.06730801612138748 +Loss at step 150: 0.041434142738580704 +Loss at step 200: 0.041888099163770676 +Loss at step 250: 0.039836540818214417 +Loss at step 300: 0.048300761729478836 +Loss at step 350: 0.03916306421160698 +Loss at step 400: 0.05398337170481682 +Loss at step 450: 0.04164503887295723 +Loss at step 500: 0.04049764201045036 +Loss at step 550: 0.048616036772727966 +Loss at step 600: 0.0469265840947628 +Loss at step 650: 0.04091997072100639 +Loss at step 700: 0.04289554804563522 +Loss at step 750: 0.03553273156285286 +Loss at step 800: 0.05355411022901535 +Loss at step 850: 0.03872230276465416 +Loss at step 900: 0.04539531096816063 +Mean training loss after epoch 175: 0.04586763229213162 + +EPOCH: 176 +Loss at step 0: 0.07550738751888275 +Loss at step 50: 0.04106152802705765 +Loss at step 100: 0.03829363361001015 +Loss at step 150: 0.03770073503255844 +Loss at step 200: 0.08625390380620956 +Loss at step 250: 0.039955414831638336 +Loss at step 300: 0.06588955968618393 +Loss at step 350: 0.05732441693544388 +Loss at step 400: 0.03934857249259949 +Loss at step 450: 0.03775576502084732 +Loss at step 500: 0.031632594764232635 +Loss at step 550: 0.0339723601937294 +Loss at step 600: 0.03468909114599228 +Loss at step 650: 0.04268242418766022 +Loss at step 700: 0.04577663913369179 +Loss at step 750: 0.041514378041028976 +Loss at step 800: 0.0365169458091259 +Loss at step 850: 0.03689917176961899 +Loss at step 900: 0.03469032049179077 +Mean training loss after epoch 176: 0.0448228287250439 + +EPOCH: 177 +Loss at step 0: 0.055793117731809616 +Loss at step 50: 0.04907522350549698 +Loss at step 100: 0.05273120105266571 +Loss at step 150: 0.05470646545290947 +Loss at step 200: 0.06475169956684113 +Loss at step 250: 0.05420820415019989 +Loss at step 300: 0.049815282225608826 +Loss at step 350: 0.04672538861632347 +Loss at step 400: 0.0459635965526104 +Loss at step 450: 0.03694138675928116 +Loss at step 500: 0.03449946641921997 +Loss at step 550: 0.038890909403562546 +Loss at step 600: 0.042428404092788696 +Loss at step 650: 0.0732579231262207 +Loss at step 700: 0.05469639226794243 +Loss at step 750: 0.04410136863589287 +Loss at step 800: 0.06672464311122894 +Loss at step 850: 0.043602120131254196 +Loss at step 900: 0.03942745551466942 +Mean training loss after epoch 177: 0.04534658207409163 + +EPOCH: 178 +Loss at step 0: 0.04446268826723099 +Loss at step 50: 0.040215834975242615 +Loss at step 100: 0.042135920375585556 +Loss at step 150: 0.03886118158698082 +Loss at step 200: 0.03644685447216034 +Loss at step 250: 0.0459577850997448 +Loss at step 300: 0.04270868003368378 +Loss at step 350: 0.04443345591425896 +Loss at step 400: 0.0518028624355793 +Loss at step 450: 0.068543940782547 +Loss at step 500: 0.06209459900856018 +Loss at step 550: 0.07251634448766708 +Loss at step 600: 0.04095359146595001 +Loss at step 650: 0.058598246425390244 +Loss at step 700: 0.038213543593883514 +Loss at step 750: 0.042158592492341995 +Loss at step 800: 0.03564770147204399 +Loss at step 850: 0.05169399455189705 +Loss at step 900: 0.04700364172458649 +Mean training loss after epoch 178: 0.0453221335816485 + +EPOCH: 179 +Loss at step 0: 0.03976796939969063 +Loss at step 50: 0.04033729061484337 +Loss at step 100: 0.04179409518837929 +Loss at step 150: 0.03324873372912407 +Loss at step 200: 0.032916195690631866 +Loss at step 250: 0.04117589816451073 +Loss at step 300: 0.03998921811580658 +Loss at step 350: 0.06475420296192169 +Loss at step 400: 0.049541935324668884 +Loss at step 450: 0.060608234256505966 +Loss at step 500: 0.04295986518263817 +Loss at step 550: 0.047141678631305695 +Loss at step 600: 0.044318657368421555 +Loss at step 650: 0.05866311490535736 +Loss at step 700: 0.04052457585930824 +Loss at step 750: 0.06378310173749924 +Loss at step 800: 0.03989827632904053 +Loss at step 850: 0.04073023796081543 +Loss at step 900: 0.0349675789475441 +Mean training loss after epoch 179: 0.04525799386060314 + +EPOCH: 180 +Loss at step 0: 0.05747155472636223 +Loss at step 50: 0.05218419060111046 +Loss at step 100: 0.03754124790430069 +Loss at step 150: 0.04091673344373703 +Loss at step 200: 0.03725485876202583 +Loss at step 250: 0.07596219331026077 +Loss at step 300: 0.038437388837337494 +Loss at step 350: 0.03755740076303482 +Loss at step 400: 0.04112968593835831 +Loss at step 450: 0.03631724789738655 +Loss at step 500: 0.06737659126520157 +Loss at step 550: 0.03913811221718788 +Loss at step 600: 0.04556744173169136 +Loss at step 650: 0.0318446047604084 +Loss at step 700: 0.042834825813770294 +Loss at step 750: 0.04301831126213074 +Loss at step 800: 0.055800650268793106 +Loss at step 850: 0.04816875979304314 +Loss at step 900: 0.038564447313547134 +Mean training loss after epoch 180: 0.045385692032304276 + +EPOCH: 181 +Loss at step 0: 0.03608238697052002 +Loss at step 50: 0.06165727227926254 +Loss at step 100: 0.037536874413490295 +Loss at step 150: 0.04545210674405098 +Loss at step 200: 0.04086097329854965 +Loss at step 250: 0.03823969140648842 +Loss at step 300: 0.039707962423563004 +Loss at step 350: 0.059448592364788055 +Loss at step 400: 0.05524700507521629 +Loss at step 450: 0.03831179812550545 +Loss at step 500: 0.04520692676305771 +Loss at step 550: 0.043809860944747925 +Loss at step 600: 0.03104153834283352 +Loss at step 650: 0.04460705444216728 +Loss at step 700: 0.038301981985569 +Loss at step 750: 0.04287278652191162 +Loss at step 800: 0.039255548268556595 +Loss at step 850: 0.04299783706665039 +Loss at step 900: 0.03345674276351929 +Mean training loss after epoch 181: 0.04531496447095993 + +EPOCH: 182 +Loss at step 0: 0.041448090225458145 +Loss at step 50: 0.0361943319439888 +Loss at step 100: 0.0438222736120224 +Loss at step 150: 0.03845402970910072 +Loss at step 200: 0.03787136450409889 +Loss at step 250: 0.03723537176847458 +Loss at step 300: 0.03597729653120041 +Loss at step 350: 0.03099764510989189 +Loss at step 400: 0.06997344642877579 +Loss at step 450: 0.04262690618634224 +Loss at step 500: 0.041986748576164246 +Loss at step 550: 0.058174073696136475 +Loss at step 600: 0.04249894246459007 +Loss at step 650: 0.03965020552277565 +Loss at step 700: 0.052103664726018906 +Loss at step 750: 0.050087276846170425 +Loss at step 800: 0.07151447236537933 +Loss at step 850: 0.04376678168773651 +Loss at step 900: 0.05046973004937172 +Mean training loss after epoch 182: 0.04521241540442715 + +EPOCH: 183 +Loss at step 0: 0.03671390563249588 +Loss at step 50: 0.050042033195495605 +Loss at step 100: 0.04647556692361832 +Loss at step 150: 0.058942753821611404 +Loss at step 200: 0.037544336169958115 +Loss at step 250: 0.0569617934525013 +Loss at step 300: 0.045460816472768784 +Loss at step 350: 0.03858570381999016 +Loss at step 400: 0.03519095480442047 +Loss at step 450: 0.04146328568458557 +Loss at step 500: 0.04040391370654106 +Loss at step 550: 0.03958556056022644 +Loss at step 600: 0.04218403995037079 +Loss at step 650: 0.05361258238554001 +Loss at step 700: 0.03571748733520508 +Loss at step 750: 0.05799006298184395 +Loss at step 800: 0.03634899482131004 +Loss at step 850: 0.03730541095137596 +Loss at step 900: 0.03377115726470947 +Mean training loss after epoch 183: 0.04536729490658495 + +EPOCH: 184 +Loss at step 0: 0.04464931786060333 +Loss at step 50: 0.03904951363801956 +Loss at step 100: 0.04003390297293663 +Loss at step 150: 0.042056743055582047 +Loss at step 200: 0.03906440734863281 +Loss at step 250: 0.03890271857380867 +Loss at step 300: 0.04435289651155472 +Loss at step 350: 0.055097248405218124 +Loss at step 400: 0.04195256158709526 +Loss at step 450: 0.06254102289676666 +Loss at step 500: 0.05101059749722481 +Loss at step 550: 0.06676074117422104 +Loss at step 600: 0.042417895048856735 +Loss at step 650: 0.0414905920624733 +Loss at step 700: 0.061441801488399506 +Loss at step 750: 0.05699063092470169 +Loss at step 800: 0.04057307913899422 +Loss at step 850: 0.03859991207718849 +Loss at step 900: 0.03835051879286766 +Mean training loss after epoch 184: 0.04516869814577959 + +EPOCH: 185 +Loss at step 0: 0.051421746611595154 +Loss at step 50: 0.0399099625647068 +Loss at step 100: 0.04042365774512291 +Loss at step 150: 0.03791704773902893 +Loss at step 200: 0.0510861910879612 +Loss at step 250: 0.042042430490255356 +Loss at step 300: 0.037371791899204254 +Loss at step 350: 0.036745086312294006 +Loss at step 400: 0.0370803028345108 +Loss at step 450: 0.04322125390172005 +Loss at step 500: 0.06187579780817032 +Loss at step 550: 0.05119670182466507 +Loss at step 600: 0.054280854761600494 +Loss at step 650: 0.03828447684645653 +Loss at step 700: 0.04740214720368385 +Loss at step 750: 0.03791907802224159 +Loss at step 800: 0.036331482231616974 +Loss at step 850: 0.04813044145703316 +Loss at step 900: 0.03952896222472191 +Mean training loss after epoch 185: 0.045180073837989936 + +EPOCH: 186 +Loss at step 0: 0.04498914256691933 +Loss at step 50: 0.04715968668460846 +Loss at step 100: 0.05770222842693329 +Loss at step 150: 0.03872722387313843 +Loss at step 200: 0.037074651569128036 +Loss at step 250: 0.04110514745116234 +Loss at step 300: 0.03831303119659424 +Loss at step 350: 0.04751241207122803 +Loss at step 400: 0.045530661940574646 +Loss at step 450: 0.0462823286652565 +Loss at step 500: 0.06901341676712036 +Loss at step 550: 0.05835563689470291 +Loss at step 600: 0.054393261671066284 +Loss at step 650: 0.03972972184419632 +Loss at step 700: 0.038423001766204834 +Loss at step 750: 0.036091580986976624 +Loss at step 800: 0.044919703155756 +Loss at step 850: 0.04537256434559822 +Loss at step 900: 0.05405249819159508 +Mean training loss after epoch 186: 0.04532928395865441 + +EPOCH: 187 +Loss at step 0: 0.05729985982179642 +Loss at step 50: 0.058677252382040024 +Loss at step 100: 0.03162126615643501 +Loss at step 150: 0.04010162129998207 +Loss at step 200: 0.051025308668613434 +Loss at step 250: 0.0658147782087326 +Loss at step 300: 0.05688386783003807 +Loss at step 350: 0.04197298735380173 +Loss at step 400: 0.038468893617391586 +Loss at step 450: 0.05380532518029213 +Loss at step 500: 0.05603237450122833 +Loss at step 550: 0.03497246652841568 +Loss at step 600: 0.06134669855237007 +Loss at step 650: 0.04213636741042137 +Loss at step 700: 0.04698992520570755 +Loss at step 750: 0.04133175313472748 +Loss at step 800: 0.04680101200938225 +Loss at step 850: 0.03554354980587959 +Loss at step 900: 0.060700688511133194 +Mean training loss after epoch 187: 0.04549471488488572 + +EPOCH: 188 +Loss at step 0: 0.06150700896978378 +Loss at step 50: 0.03992079570889473 +Loss at step 100: 0.03656752035021782 +Loss at step 150: 0.04420023411512375 +Loss at step 200: 0.04109982028603554 +Loss at step 250: 0.051780328154563904 +Loss at step 300: 0.05179851874709129 +Loss at step 350: 0.05215723067522049 +Loss at step 400: 0.04282285273075104 +Loss at step 450: 0.054367948323488235 +Loss at step 500: 0.04113909974694252 +Loss at step 550: 0.044183120131492615 +Loss at step 600: 0.03922979533672333 +Loss at step 650: 0.04203737527132034 +Loss at step 700: 0.04239813610911369 +Loss at step 750: 0.03945309296250343 +Loss at step 800: 0.04305345192551613 +Loss at step 850: 0.06255516409873962 +Loss at step 900: 0.06415887922048569 +Mean training loss after epoch 188: 0.04541649123125557 + +EPOCH: 189 +Loss at step 0: 0.03606250882148743 +Loss at step 50: 0.03933314234018326 +Loss at step 100: 0.04676578566431999 +Loss at step 150: 0.03927544504404068 +Loss at step 200: 0.04321090877056122 +Loss at step 250: 0.05096959322690964 +Loss at step 300: 0.06481492519378662 +Loss at step 350: 0.03770839050412178 +Loss at step 400: 0.03531435877084732 +Loss at step 450: 0.04448651522397995 +Loss at step 500: 0.05828762426972389 +Loss at step 550: 0.04090716689825058 +Loss at step 600: 0.0789261981844902 +Loss at step 650: 0.05484716594219208 +Loss at step 700: 0.05484338104724884 +Loss at step 750: 0.04225367680191994 +Loss at step 800: 0.0344744436442852 +Loss at step 850: 0.03508119285106659 +Loss at step 900: 0.05666442587971687 +Mean training loss after epoch 189: 0.0449572853100643 + +EPOCH: 190 +Loss at step 0: 0.036354102194309235 +Loss at step 50: 0.039934489876031876 +Loss at step 100: 0.04111945629119873 +Loss at step 150: 0.040813714265823364 +Loss at step 200: 0.04762617498636246 +Loss at step 250: 0.038000114262104034 +Loss at step 300: 0.03428737819194794 +Loss at step 350: 0.03944721445441246 +Loss at step 400: 0.04759282246232033 +Loss at step 450: 0.044604476541280746 +Loss at step 500: 0.0633435845375061 +Loss at step 550: 0.03882480412721634 +Loss at step 600: 0.054397474974393845 +Loss at step 650: 0.05618679150938988 +Loss at step 700: 0.04229629784822464 +Loss at step 750: 0.03871293365955353 +Loss at step 800: 0.06301354616880417 +Loss at step 850: 0.03817937150597572 +Loss at step 900: 0.05107194557785988 +Mean training loss after epoch 190: 0.04462667460094637 + +EPOCH: 191 +Loss at step 0: 0.042228054255247116 +Loss at step 50: 0.050701119005680084 +Loss at step 100: 0.052508533000946045 +Loss at step 150: 0.039106011390686035 +Loss at step 200: 0.03532364219427109 +Loss at step 250: 0.04400981217622757 +Loss at step 300: 0.04086195304989815 +Loss at step 350: 0.04484499618411064 +Loss at step 400: 0.03560597449541092 +Loss at step 450: 0.05991164967417717 +Loss at step 500: 0.049849800765514374 +Loss at step 550: 0.03425825387239456 +Loss at step 600: 0.03875529766082764 +Loss at step 650: 0.04517103731632233 +Loss at step 700: 0.04152282327413559 +Loss at step 750: 0.04119221121072769 +Loss at step 800: 0.03203398361802101 +Loss at step 850: 0.037602610886096954 +Loss at step 900: 0.058391060680150986 +Mean training loss after epoch 191: 0.04558049460678403 + +EPOCH: 192 +Loss at step 0: 0.03846770152449608 +Loss at step 50: 0.05294552445411682 +Loss at step 100: 0.039572786539793015 +Loss at step 150: 0.054135531187057495 +Loss at step 200: 0.05206996574997902 +Loss at step 250: 0.03535915166139603 +Loss at step 300: 0.0513426698744297 +Loss at step 350: 0.038878701627254486 +Loss at step 400: 0.04100796580314636 +Loss at step 450: 0.03391486033797264 +Loss at step 500: 0.03219745308160782 +Loss at step 550: 0.03580109402537346 +Loss at step 600: 0.04475495219230652 +Loss at step 650: 0.03344600647687912 +Loss at step 700: 0.042744945734739304 +Loss at step 750: 0.04524318501353264 +Loss at step 800: 0.039950307458639145 +Loss at step 850: 0.05638068914413452 +Loss at step 900: 0.04225417971611023 +Mean training loss after epoch 192: 0.04473041640971896 + +EPOCH: 193 +Loss at step 0: 0.052307259291410446 +Loss at step 50: 0.041180260479450226 +Loss at step 100: 0.039213791489601135 +Loss at step 150: 0.04440511018037796 +Loss at step 200: 0.0383504256606102 +Loss at step 250: 0.04201306030154228 +Loss at step 300: 0.042279839515686035 +Loss at step 350: 0.035858143121004105 +Loss at step 400: 0.05396916717290878 +Loss at step 450: 0.041582655161619186 +Loss at step 500: 0.040042273700237274 +Loss at step 550: 0.038964465260505676 +Loss at step 600: 0.04344325140118599 +Loss at step 650: 0.05793509632349014 +Loss at step 700: 0.04047052189707756 +Loss at step 750: 0.04266829416155815 +Loss at step 800: 0.05522885546088219 +Loss at step 850: 0.03658655285835266 +Loss at step 900: 0.04531051591038704 +Mean training loss after epoch 193: 0.04490299261947558 + +EPOCH: 194 +Loss at step 0: 0.07624045014381409 +Loss at step 50: 0.06038878858089447 +Loss at step 100: 0.037549685686826706 +Loss at step 150: 0.03951912373304367 +Loss at step 200: 0.05546077340841293 +Loss at step 250: 0.03394531458616257 +Loss at step 300: 0.034961998462677 +Loss at step 350: 0.0561039038002491 +Loss at step 400: 0.0449695810675621 +Loss at step 450: 0.03996847942471504 +Loss at step 500: 0.03228776529431343 +Loss at step 550: 0.03723064064979553 +Loss at step 600: 0.05874907225370407 +Loss at step 650: 0.04251663386821747 +Loss at step 700: 0.05911070853471756 +Loss at step 750: 0.038170743733644485 +Loss at step 800: 0.04410136491060257 +Loss at step 850: 0.06028928607702255 +Loss at step 900: 0.040294378995895386 +Mean training loss after epoch 194: 0.044851225637979726 + +EPOCH: 195 +Loss at step 0: 0.03390637785196304 +Loss at step 50: 0.03562233969569206 +Loss at step 100: 0.033955685794353485 +Loss at step 150: 0.04058881476521492 +Loss at step 200: 0.04444936662912369 +Loss at step 250: 0.03238574042916298 +Loss at step 300: 0.05959978699684143 +Loss at step 350: 0.03507523983716965 +Loss at step 400: 0.04626872017979622 +Loss at step 450: 0.03496425598859787 +Loss at step 500: 0.03930877894163132 +Loss at step 550: 0.0654229000210762 +Loss at step 600: 0.05976109579205513 +Loss at step 650: 0.03499690815806389 +Loss at step 700: 0.04473992809653282 +Loss at step 750: 0.0406518429517746 +Loss at step 800: 0.03665410354733467 +Loss at step 850: 0.04552303999662399 +Loss at step 900: 0.044045720249414444 +Mean training loss after epoch 195: 0.045044549903683444 + +EPOCH: 196 +Loss at step 0: 0.05425940454006195 +Loss at step 50: 0.038902126252651215 +Loss at step 100: 0.040790215134620667 +Loss at step 150: 0.04072971269488335 +Loss at step 200: 0.03681737557053566 +Loss at step 250: 0.05410180613398552 +Loss at step 300: 0.065887451171875 +Loss at step 350: 0.04180807247757912 +Loss at step 400: 0.055970754474401474 +Loss at step 450: 0.055888351052999496 +Loss at step 500: 0.04597299173474312 +Loss at step 550: 0.05372442305088043 +Loss at step 600: 0.03480617329478264 +Loss at step 650: 0.04213497042655945 +Loss at step 700: 0.039121706038713455 +Loss at step 750: 0.040950238704681396 +Loss at step 800: 0.040735840797424316 +Loss at step 850: 0.056777238845825195 +Loss at step 900: 0.03371454030275345 +Mean training loss after epoch 196: 0.04554240391857779 + +EPOCH: 197 +Loss at step 0: 0.03983287140727043 +Loss at step 50: 0.06950125098228455 +Loss at step 100: 0.048171963542699814 +Loss at step 150: 0.05457686260342598 +Loss at step 200: 0.05584825575351715 +Loss at step 250: 0.05925960838794708 +Loss at step 300: 0.06430219113826752 +Loss at step 350: 0.042543333023786545 +Loss at step 400: 0.039240721613168716 +Loss at step 450: 0.0402703732252121 +Loss at step 500: 0.035046592354774475 +Loss at step 550: 0.04093187674880028 +Loss at step 600: 0.04115005210042 +Loss at step 650: 0.048454176634550095 +Loss at step 700: 0.03801300376653671 +Loss at step 750: 0.05521786957979202 +Loss at step 800: 0.047219499945640564 +Loss at step 850: 0.053783055394887924 +Loss at step 900: 0.0434098094701767 +Mean training loss after epoch 197: 0.04502933196572543 + +EPOCH: 198 +Loss at step 0: 0.04508521407842636 +Loss at step 50: 0.04187328740954399 +Loss at step 100: 0.043052975088357925 +Loss at step 150: 0.039457760751247406 +Loss at step 200: 0.03886120021343231 +Loss at step 250: 0.05653925612568855 +Loss at step 300: 0.04228125140070915 +Loss at step 350: 0.07102609425783157 +Loss at step 400: 0.04130791500210762 +Loss at step 450: 0.0439828559756279 +Loss at step 500: 0.0435461662709713 +Loss at step 550: 0.04238437861204147 +Loss at step 600: 0.05712062865495682 +Loss at step 650: 0.039715319871902466 +Loss at step 700: 0.04362877085804939 +Loss at step 750: 0.040013596415519714 +Loss at step 800: 0.036481063812971115 +Loss at step 850: 0.054095275700092316 +Loss at step 900: 0.038235854357481 +Mean training loss after epoch 198: 0.044582640420573985 + +EPOCH: 199 +Loss at step 0: 0.0348518081009388 +Loss at step 50: 0.065972238779068 +Loss at step 100: 0.03954975679516792 +Loss at step 150: 0.05651426687836647 +Loss at step 200: 0.07011231780052185 +Loss at step 250: 0.0420880988240242 +Loss at step 300: 0.04116426780819893 +Loss at step 350: 0.041534073650836945 +Loss at step 400: 0.03733363747596741 +Loss at step 450: 0.03985462710261345 +Loss at step 500: 0.03901340439915657 +Loss at step 550: 0.034180961549282074 +Loss at step 600: 0.031110573559999466 +Loss at step 650: 0.03391683101654053 +Loss at step 700: 0.05362424999475479 +Loss at step 750: 0.04446065425872803 +Loss at step 800: 0.053976841270923615 +Loss at step 850: 0.04202162101864815 +Loss at step 900: 0.04723302274942398 +Mean training loss after epoch 199: 0.04507658939792721 + +EPOCH: 200 +Loss at step 0: 0.03779774531722069 +Loss at step 50: 0.04198591783642769 +Loss at step 100: 0.0515945665538311 +Loss at step 150: 0.050251834094524384 +Loss at step 200: 0.05951753631234169 +Loss at step 250: 0.04200168326497078 +Loss at step 300: 0.04172653332352638 +Loss at step 350: 0.058568164706230164 +Loss at step 400: 0.0403682105243206 +Loss at step 450: 0.0426095649600029 +Loss at step 500: 0.043776560574769974 +Loss at step 550: 0.039078567177057266 +Loss at step 600: 0.040481287986040115 +Loss at step 650: 0.03760899603366852 +Loss at step 700: 0.04248422756791115 +Loss at step 750: 0.035797569900751114 +Loss at step 800: 0.03926771506667137 +Loss at step 850: 0.041401926428079605 +Loss at step 900: 0.03748273104429245 +Mean training loss after epoch 200: 0.04495057783155108 + +EPOCH: 201 +Loss at step 0: 0.03884557634592056 +Loss at step 50: 0.050161633640527725 +Loss at step 100: 0.0302677471190691 +Loss at step 150: 0.0514724962413311 +Loss at step 200: 0.03972337394952774 +Loss at step 250: 0.04437831789255142 +Loss at step 300: 0.0542287677526474 +Loss at step 350: 0.04234259948134422 +Loss at step 400: 0.0351545475423336 +Loss at step 450: 0.04100866615772247 +Loss at step 500: 0.03383158519864082 +Loss at step 550: 0.03359280154109001 +Loss at step 600: 0.03861341252923012 +Loss at step 650: 0.042006317526102066 +Loss at step 700: 0.04126603156328201 +Loss at step 750: 0.04565607011318207 +Loss at step 800: 0.03581497073173523 +Loss at step 850: 0.07403262704610825 +Loss at step 900: 0.0407702662050724 +Mean training loss after epoch 201: 0.04467799335614895 + +EPOCH: 202 +Loss at step 0: 0.036459699273109436 +Loss at step 50: 0.05457698926329613 +Loss at step 100: 0.04484090954065323 +Loss at step 150: 0.040278706699609756 +Loss at step 200: 0.047560740262269974 +Loss at step 250: 0.03637447953224182 +Loss at step 300: 0.04308488219976425 +Loss at step 350: 0.0718325674533844 +Loss at step 400: 0.07049299776554108 +Loss at step 450: 0.06196274980902672 +Loss at step 500: 0.04516976699233055 +Loss at step 550: 0.0500350184738636 +Loss at step 600: 0.03862667828798294 +Loss at step 650: 0.042549293488264084 +Loss at step 700: 0.04753423482179642 +Loss at step 750: 0.052133627235889435 +Loss at step 800: 0.04212679713964462 +Loss at step 850: 0.038050681352615356 +Loss at step 900: 0.03241612762212753 +Mean training loss after epoch 202: 0.04472457406633317 + +EPOCH: 203 +Loss at step 0: 0.037215664982795715 +Loss at step 50: 0.032891206443309784 +Loss at step 100: 0.045018360018730164 +Loss at step 150: 0.06143582612276077 +Loss at step 200: 0.03886640816926956 +Loss at step 250: 0.035291288048028946 +Loss at step 300: 0.057504940778017044 +Loss at step 350: 0.05473543703556061 +Loss at step 400: 0.03625832125544548 +Loss at step 450: 0.035616517066955566 +Loss at step 500: 0.03926508128643036 +Loss at step 550: 0.05199974402785301 +Loss at step 600: 0.039189863950014114 +Loss at step 650: 0.0431167371571064 +Loss at step 700: 0.052426017820835114 +Loss at step 750: 0.036755528301000595 +Loss at step 800: 0.03921264782547951 +Loss at step 850: 0.05505433306097984 +Loss at step 900: 0.05308936908841133 +Mean training loss after epoch 203: 0.04469277201565916 + +EPOCH: 204 +Loss at step 0: 0.056327663362026215 +Loss at step 50: 0.05358109250664711 +Loss at step 100: 0.041813381016254425 +Loss at step 150: 0.04506402835249901 +Loss at step 200: 0.10275832563638687 +Loss at step 250: 0.04550132527947426 +Loss at step 300: 0.04188266396522522 +Loss at step 350: 0.03988751396536827 +Loss at step 400: 0.04163822904229164 +Loss at step 450: 0.04411735758185387 +Loss at step 500: 0.05453697219491005 +Loss at step 550: 0.04448207840323448 +Loss at step 600: 0.04429555684328079 +Loss at step 650: 0.03348460793495178 +Loss at step 700: 0.04059666022658348 +Loss at step 750: 0.042830582708120346 +Loss at step 800: 0.040740229189395905 +Loss at step 850: 0.036354295909404755 +Loss at step 900: 0.042115114629268646 +Mean training loss after epoch 204: 0.04494994351548998 + +EPOCH: 205 +Loss at step 0: 0.0346858985722065 +Loss at step 50: 0.048479605466127396 +Loss at step 100: 0.037831686437129974 +Loss at step 150: 0.05968513339757919 +Loss at step 200: 0.0407082624733448 +Loss at step 250: 0.033279113471508026 +Loss at step 300: 0.03869318217039108 +Loss at step 350: 0.03500447794795036 +Loss at step 400: 0.05637707933783531 +Loss at step 450: 0.03902927041053772 +Loss at step 500: 0.03469936549663544 +Loss at step 550: 0.051097504794597626 +Loss at step 600: 0.053846679627895355 +Loss at step 650: 0.04048370197415352 +Loss at step 700: 0.03743143007159233 +Loss at step 750: 0.03328872472047806 +Loss at step 800: 0.03637666627764702 +Loss at step 850: 0.045726414769887924 +Loss at step 900: 0.03341805189847946 +Mean training loss after epoch 205: 0.045167320318567725 + +EPOCH: 206 +Loss at step 0: 0.037857044488191605 +Loss at step 50: 0.036084700375795364 +Loss at step 100: 0.05032286420464516 +Loss at step 150: 0.04375385493040085 +Loss at step 200: 0.054346516728401184 +Loss at step 250: 0.04114299267530441 +Loss at step 300: 0.043806493282318115 +Loss at step 350: 0.04038502275943756 +Loss at step 400: 0.04043358191847801 +Loss at step 450: 0.05199912190437317 +Loss at step 500: 0.03018077462911606 +Loss at step 550: 0.04163268208503723 +Loss at step 600: 0.06307704746723175 +Loss at step 650: 0.060263652354478836 +Loss at step 700: 0.04152842238545418 +Loss at step 750: 0.04058264568448067 +Loss at step 800: 0.0321500301361084 +Loss at step 850: 0.03421973064541817 +Loss at step 900: 0.03646183758974075 +Mean training loss after epoch 206: 0.045155012471748315 + +EPOCH: 207 +Loss at step 0: 0.05408443510532379 +Loss at step 50: 0.04282351955771446 +Loss at step 100: 0.05253462865948677 +Loss at step 150: 0.04871230944991112 +Loss at step 200: 0.03780169412493706 +Loss at step 250: 0.052702970802783966 +Loss at step 300: 0.041660260409116745 +Loss at step 350: 0.041011910885572433 +Loss at step 400: 0.04359430447220802 +Loss at step 450: 0.04130280762910843 +Loss at step 500: 0.05702915042638779 +Loss at step 550: 0.07156169414520264 +Loss at step 600: 0.03919968754053116 +Loss at step 650: 0.037663061171770096 +Loss at step 700: 0.05655404552817345 +Loss at step 750: 0.03995484113693237 +Loss at step 800: 0.03517705574631691 +Loss at step 850: 0.0407596118748188 +Loss at step 900: 0.043017756193876266 +Mean training loss after epoch 207: 0.04509733991423395 + +EPOCH: 208 +Loss at step 0: 0.039507992565631866 +Loss at step 50: 0.042232248932123184 +Loss at step 100: 0.04128454625606537 +Loss at step 150: 0.03922766074538231 +Loss at step 200: 0.03584706038236618 +Loss at step 250: 0.04830395430326462 +Loss at step 300: 0.05362813174724579 +Loss at step 350: 0.036093439906835556 +Loss at step 400: 0.054257966578006744 +Loss at step 450: 0.0522276796400547 +Loss at step 500: 0.036641769111156464 +Loss at step 550: 0.03452599421143532 +Loss at step 600: 0.04778366535902023 +Loss at step 650: 0.042172931134700775 +Loss at step 700: 0.039752472192049026 +Loss at step 750: 0.03692365437746048 +Loss at step 800: 0.03955672308802605 +Loss at step 850: 0.05519081652164459 +Loss at step 900: 0.03449350222945213 +Mean training loss after epoch 208: 0.04503057867503052 + +EPOCH: 209 +Loss at step 0: 0.044961389154195786 +Loss at step 50: 0.03376142680644989 +Loss at step 100: 0.04507856070995331 +Loss at step 150: 0.03825966268777847 +Loss at step 200: 0.04015452042222023 +Loss at step 250: 0.07117459923028946 +Loss at step 300: 0.0363958366215229 +Loss at step 350: 0.04067688062787056 +Loss at step 400: 0.03948739916086197 +Loss at step 450: 0.05396018177270889 +Loss at step 500: 0.04289233684539795 +Loss at step 550: 0.039055243134498596 +Loss at step 600: 0.05426563322544098 +Loss at step 650: 0.0574934221804142 +Loss at step 700: 0.05382296442985535 +Loss at step 750: 0.03529094159603119 +Loss at step 800: 0.05435455963015556 +Loss at step 850: 0.03684995695948601 +Loss at step 900: 0.0297185517847538 +Mean training loss after epoch 209: 0.04526096670977724 + +EPOCH: 210 +Loss at step 0: 0.04058726876974106 +Loss at step 50: 0.05923517793416977 +Loss at step 100: 0.03565850481390953 +Loss at step 150: 0.04380166530609131 +Loss at step 200: 0.04746098816394806 +Loss at step 250: 0.04354723170399666 +Loss at step 300: 0.03719792515039444 +Loss at step 350: 0.039609652012586594 +Loss at step 400: 0.04834899678826332 +Loss at step 450: 0.05389108508825302 +Loss at step 500: 0.06888122111558914 +Loss at step 550: 0.040637437254190445 +Loss at step 600: 0.03724227473139763 +Loss at step 650: 0.03726249188184738 +Loss at step 700: 0.035851676017045975 +Loss at step 750: 0.03935164585709572 +Loss at step 800: 0.045359667390584946 +Loss at step 850: 0.03593292459845543 +Loss at step 900: 0.03959966450929642 +Mean training loss after epoch 210: 0.045010104400119676 + +EPOCH: 211 +Loss at step 0: 0.05412076413631439 +Loss at step 50: 0.035833537578582764 +Loss at step 100: 0.04077209532260895 +Loss at step 150: 0.04086793214082718 +Loss at step 200: 0.03996654227375984 +Loss at step 250: 0.03943168371915817 +Loss at step 300: 0.04712972790002823 +Loss at step 350: 0.04212001711130142 +Loss at step 400: 0.03831418231129646 +Loss at step 450: 0.05440318211913109 +Loss at step 500: 0.058869414031505585 +Loss at step 550: 0.05010746791958809 +Loss at step 600: 0.04150356352329254 +Loss at step 650: 0.03864441066980362 +Loss at step 700: 0.056679073721170425 +Loss at step 750: 0.0612194761633873 +Loss at step 800: 0.041027311235666275 +Loss at step 850: 0.04049273207783699 +Loss at step 900: 0.04306565970182419 +Mean training loss after epoch 211: 0.04496387814654153 + +EPOCH: 212 +Loss at step 0: 0.040428005158901215 +Loss at step 50: 0.044881947338581085 +Loss at step 100: 0.035693928599357605 +Loss at step 150: 0.03877878189086914 +Loss at step 200: 0.06194490194320679 +Loss at step 250: 0.038871366530656815 +Loss at step 300: 0.03686348348855972 +Loss at step 350: 0.037648845463991165 +Loss at step 400: 0.03799467533826828 +Loss at step 450: 0.04925158619880676 +Loss at step 500: 0.0659669041633606 +Loss at step 550: 0.05052737146615982 +Loss at step 600: 0.0591677762567997 +Loss at step 650: 0.050599370151758194 +Loss at step 700: 0.04115499556064606 +Loss at step 750: 0.052049197256565094 +Loss at step 800: 0.06113138422369957 +Loss at step 850: 0.04366876184940338 +Loss at step 900: 0.03861301764845848 +Mean training loss after epoch 212: 0.04513690286258391 + +EPOCH: 213 +Loss at step 0: 0.05285370349884033 +Loss at step 50: 0.05193154886364937 +Loss at step 100: 0.03753611817955971 +Loss at step 150: 0.035451602190732956 +Loss at step 200: 0.03897653520107269 +Loss at step 250: 0.03773925453424454 +Loss at step 300: 0.06806177645921707 +Loss at step 350: 0.03877243027091026 +Loss at step 400: 0.05672023445367813 +Loss at step 450: 0.07924725860357285 +Loss at step 500: 0.04213910922408104 +Loss at step 550: 0.03636140003800392 +Loss at step 600: 0.039767637848854065 +Loss at step 650: 0.03515815734863281 +Loss at step 700: 0.048144981265068054 +Loss at step 750: 0.04423777759075165 +Loss at step 800: 0.03412081301212311 +Loss at step 850: 0.03691580146551132 +Loss at step 900: 0.04119018465280533 +Mean training loss after epoch 213: 0.044650168328889524 + +EPOCH: 214 +Loss at step 0: 0.05457661300897598 +Loss at step 50: 0.03794348984956741 +Loss at step 100: 0.03659582510590553 +Loss at step 150: 0.046614423394203186 +Loss at step 200: 0.04457275941967964 +Loss at step 250: 0.043618783354759216 +Loss at step 300: 0.04286082834005356 +Loss at step 350: 0.0513736717402935 +Loss at step 400: 0.04138994216918945 +Loss at step 450: 0.06595193594694138 +Loss at step 500: 0.0429360531270504 +Loss at step 550: 0.038039978593587875 +Loss at step 600: 0.038520269095897675 +Loss at step 650: 0.04079781472682953 +Loss at step 700: 0.053815118968486786 +Loss at step 750: 0.04545110836625099 +Loss at step 800: 0.07158294320106506 +Loss at step 850: 0.06954755634069443 +Loss at step 900: 0.04495590552687645 +Mean training loss after epoch 214: 0.04522798301528957 + +EPOCH: 215 +Loss at step 0: 0.049665819853544235 +Loss at step 50: 0.03529171645641327 +Loss at step 100: 0.05482344701886177 +Loss at step 150: 0.048784878104925156 +Loss at step 200: 0.04111328348517418 +Loss at step 250: 0.03428546339273453 +Loss at step 300: 0.03472927585244179 +Loss at step 350: 0.054579637944698334 +Loss at step 400: 0.06024343892931938 +Loss at step 450: 0.042097724974155426 +Loss at step 500: 0.04843873158097267 +Loss at step 550: 0.05022285133600235 +Loss at step 600: 0.04291398078203201 +Loss at step 650: 0.03270597383379936 +Loss at step 700: 0.04082271084189415 +Loss at step 750: 0.03781318664550781 +Loss at step 800: 0.03819233924150467 +Loss at step 850: 0.04560283571481705 +Loss at step 900: 0.03179331496357918 +Mean training loss after epoch 215: 0.04550626027240936 + +EPOCH: 216 +Loss at step 0: 0.043373238295316696 +Loss at step 50: 0.04261237755417824 +Loss at step 100: 0.03885236755013466 +Loss at step 150: 0.04303615167737007 +Loss at step 200: 0.04896118491888046 +Loss at step 250: 0.039159659296274185 +Loss at step 300: 0.05741894245147705 +Loss at step 350: 0.044325944036245346 +Loss at step 400: 0.03783963993191719 +Loss at step 450: 0.03157109394669533 +Loss at step 500: 0.041627589613199234 +Loss at step 550: 0.03817334398627281 +Loss at step 600: 0.045340098440647125 +Loss at step 650: 0.04011277109384537 +Loss at step 700: 0.044557858258485794 +Loss at step 750: 0.03209077939391136 +Loss at step 800: 0.03295816481113434 +Loss at step 850: 0.033359039574861526 +Loss at step 900: 0.03858625888824463 +Mean training loss after epoch 216: 0.04502291343550184 + +EPOCH: 217 +Loss at step 0: 0.05681389570236206 +Loss at step 50: 0.03845852240920067 +Loss at step 100: 0.056077923625707626 +Loss at step 150: 0.035981036722660065 +Loss at step 200: 0.05595504865050316 +Loss at step 250: 0.041053224354982376 +Loss at step 300: 0.03926733508706093 +Loss at step 350: 0.04627830162644386 +Loss at step 400: 0.03268348053097725 +Loss at step 450: 0.04414980486035347 +Loss at step 500: 0.034855637699365616 +Loss at step 550: 0.05574671924114227 +Loss at step 600: 0.055037789046764374 +Loss at step 650: 0.05297153815627098 +Loss at step 700: 0.04475615546107292 +Loss at step 750: 0.03842373192310333 +Loss at step 800: 0.035571083426475525 +Loss at step 850: 0.056201059371232986 +Loss at step 900: 0.043609797954559326 +Mean training loss after epoch 217: 0.04421219696550926 + +EPOCH: 218 +Loss at step 0: 0.052963174879550934 +Loss at step 50: 0.048171114176511765 +Loss at step 100: 0.0396120585501194 +Loss at step 150: 0.0504167303442955 +Loss at step 200: 0.058281030505895615 +Loss at step 250: 0.03602902591228485 +Loss at step 300: 0.032333582639694214 +Loss at step 350: 0.050377704203128815 +Loss at step 400: 0.038287870585918427 +Loss at step 450: 0.04581325873732567 +Loss at step 500: 0.05600558966398239 +Loss at step 550: 0.049919210374355316 +Loss at step 600: 0.05820900574326515 +Loss at step 650: 0.03368315473198891 +Loss at step 700: 0.0380982868373394 +Loss at step 750: 0.05515281483530998 +Loss at step 800: 0.0401419922709465 +Loss at step 850: 0.04098648950457573 +Loss at step 900: 0.036737143993377686 +Mean training loss after epoch 218: 0.04497726156966074 + +EPOCH: 219 +Loss at step 0: 0.03259208798408508 +Loss at step 50: 0.03930714726448059 +Loss at step 100: 0.04052778705954552 +Loss at step 150: 0.034389447420835495 +Loss at step 200: 0.06045966595411301 +Loss at step 250: 0.03595029562711716 +Loss at step 300: 0.05116666853427887 +Loss at step 350: 0.04187660291790962 +Loss at step 400: 0.03299875184893608 +Loss at step 450: 0.04430915787816048 +Loss at step 500: 0.03914002329111099 +Loss at step 550: 0.058175891637802124 +Loss at step 600: 0.03808284550905228 +Loss at step 650: 0.04047476127743721 +Loss at step 700: 0.040011998265981674 +Loss at step 750: 0.04318106919527054 +Loss at step 800: 0.04279594123363495 +Loss at step 850: 0.04298307001590729 +Loss at step 900: 0.03839527815580368 +Mean training loss after epoch 219: 0.04495206636700358 + +EPOCH: 220 +Loss at step 0: 0.06102355942130089 +Loss at step 50: 0.04265911504626274 +Loss at step 100: 0.03871495649218559 +Loss at step 150: 0.03565714880824089 +Loss at step 200: 0.0888419821858406 +Loss at step 250: 0.05545511096715927 +Loss at step 300: 0.051695115864276886 +Loss at step 350: 0.04234030842781067 +Loss at step 400: 0.04717526212334633 +Loss at step 450: 0.05097170174121857 +Loss at step 500: 0.038573749363422394 +Loss at step 550: 0.03916428983211517 +Loss at step 600: 0.06650888174772263 +Loss at step 650: 0.04581679031252861 +Loss at step 700: 0.03397097438573837 +Loss at step 750: 0.061474841088056564 +Loss at step 800: 0.03696952387690544 +Loss at step 850: 0.04420066624879837 +Loss at step 900: 0.03729543834924698 +Mean training loss after epoch 220: 0.044966987175330805 + +EPOCH: 221 +Loss at step 0: 0.036624446511268616 +Loss at step 50: 0.05607498437166214 +Loss at step 100: 0.0401344895362854 +Loss at step 150: 0.03695680946111679 +Loss at step 200: 0.038157038390636444 +Loss at step 250: 0.04234910011291504 +Loss at step 300: 0.05087887495756149 +Loss at step 350: 0.03369126468896866 +Loss at step 400: 0.04398808255791664 +Loss at step 450: 0.04650338739156723 +Loss at step 500: 0.042170874774456024 +Loss at step 550: 0.04143141582608223 +Loss at step 600: 0.035873983055353165 +Loss at step 650: 0.06861819326877594 +Loss at step 700: 0.06882251799106598 +Loss at step 750: 0.0421731062233448 +Loss at step 800: 0.04273369908332825 +Loss at step 850: 0.05213435739278793 +Loss at step 900: 0.04682132229208946 +Mean training loss after epoch 221: 0.0447278780612483 + +EPOCH: 222 +Loss at step 0: 0.03775576502084732 +Loss at step 50: 0.053810253739356995 +Loss at step 100: 0.04413290321826935 +Loss at step 150: 0.039237361401319504 +Loss at step 200: 0.06173623353242874 +Loss at step 250: 0.04083361476659775 +Loss at step 300: 0.04168682545423508 +Loss at step 350: 0.049414049834012985 +Loss at step 400: 0.05077894404530525 +Loss at step 450: 0.05252714455127716 +Loss at step 500: 0.0364990159869194 +Loss at step 550: 0.04104870557785034 +Loss at step 600: 0.06324135512113571 +Loss at step 650: 0.04146245867013931 +Loss at step 700: 0.03638554364442825 +Loss at step 750: 0.038369908928871155 +Loss at step 800: 0.03872247040271759 +Loss at step 850: 0.03753077983856201 +Loss at step 900: 0.03175564482808113 +Mean training loss after epoch 222: 0.044707719135338435 + +EPOCH: 223 +Loss at step 0: 0.042600058019161224 +Loss at step 50: 0.0361095629632473 +Loss at step 100: 0.042472511529922485 +Loss at step 150: 0.05590534210205078 +Loss at step 200: 0.045976608991622925 +Loss at step 250: 0.0428466722369194 +Loss at step 300: 0.054691363126039505 +Loss at step 350: 0.04458990320563316 +Loss at step 400: 0.048201583325862885 +Loss at step 450: 0.059085775166749954 +Loss at step 500: 0.055965278297662735 +Loss at step 550: 0.04840507358312607 +Loss at step 600: 0.05460871011018753 +Loss at step 650: 0.043371252715587616 +Loss at step 700: 0.03476656228303909 +Loss at step 750: 0.055277228355407715 +Loss at step 800: 0.03754817321896553 +Loss at step 850: 0.03959881514310837 +Loss at step 900: 0.04707843437790871 +Mean training loss after epoch 223: 0.044869944457210965 + +EPOCH: 224 +Loss at step 0: 0.04216871038079262 +Loss at step 50: 0.0509503036737442 +Loss at step 100: 0.03815936669707298 +Loss at step 150: 0.03966128081083298 +Loss at step 200: 0.03773387894034386 +Loss at step 250: 0.03810032457113266 +Loss at step 300: 0.03918851166963577 +Loss at step 350: 0.05037347972393036 +Loss at step 400: 0.03610139340162277 +Loss at step 450: 0.04785896837711334 +Loss at step 500: 0.042082108557224274 +Loss at step 550: 0.04990096762776375 +Loss at step 600: 0.05488254129886627 +Loss at step 650: 0.049004245549440384 +Loss at step 700: 0.04066131263971329 +Loss at step 750: 0.039992645382881165 +Loss at step 800: 0.048601362854242325 +Loss at step 850: 0.05271443352103233 +Loss at step 900: 0.0707949623465538 +Mean training loss after epoch 224: 0.04473640054051302 + +EPOCH: 225 +Loss at step 0: 0.03759758174419403 +Loss at step 50: 0.040139682590961456 +Loss at step 100: 0.037246230989694595 +Loss at step 150: 0.052819427102804184 +Loss at step 200: 0.04761229082942009 +Loss at step 250: 0.03445148095488548 +Loss at step 300: 0.03948068246245384 +Loss at step 350: 0.045023079961538315 +Loss at step 400: 0.04875577241182327 +Loss at step 450: 0.04386817663908005 +Loss at step 500: 0.04167744517326355 +Loss at step 550: 0.042385250329971313 +Loss at step 600: 0.044522128999233246 +Loss at step 650: 0.07706721872091293 +Loss at step 700: 0.041859012097120285 +Loss at step 750: 0.04114852473139763 +Loss at step 800: 0.061039477586746216 +Loss at step 850: 0.04040620103478432 +Loss at step 900: 0.03796588256955147 +Mean training loss after epoch 225: 0.04497609099051528 + +EPOCH: 226 +Loss at step 0: 0.03994573652744293 +Loss at step 50: 0.04332776740193367 +Loss at step 100: 0.04103902354836464 +Loss at step 150: 0.06540388613939285 +Loss at step 200: 0.07239285111427307 +Loss at step 250: 0.03270655497908592 +Loss at step 300: 0.037417493760585785 +Loss at step 350: 0.043828971683979034 +Loss at step 400: 0.046033453196287155 +Loss at step 450: 0.05358675494790077 +Loss at step 500: 0.040970269590616226 +Loss at step 550: 0.03782530128955841 +Loss at step 600: 0.039207715541124344 +Loss at step 650: 0.03836981579661369 +Loss at step 700: 0.049658969044685364 +Loss at step 750: 0.041786376386880875 +Loss at step 800: 0.04532037675380707 +Loss at step 850: 0.0385938324034214 +Loss at step 900: 0.05464131012558937 +Mean training loss after epoch 226: 0.04525555488365546 + +EPOCH: 227 +Loss at step 0: 0.049732666462659836 +Loss at step 50: 0.08904262632131577 +Loss at step 100: 0.0422864630818367 +Loss at step 150: 0.0387810617685318 +Loss at step 200: 0.038394052535295486 +Loss at step 250: 0.039801888167858124 +Loss at step 300: 0.03375151753425598 +Loss at step 350: 0.03412676230072975 +Loss at step 400: 0.043739572167396545 +Loss at step 450: 0.03239547833800316 +Loss at step 500: 0.057137902826070786 +Loss at step 550: 0.04229942709207535 +Loss at step 600: 0.04437802731990814 +Loss at step 650: 0.0532885417342186 +Loss at step 700: 0.055092040449380875 +Loss at step 750: 0.04317266866564751 +Loss at step 800: 0.045014284551143646 +Loss at step 850: 0.04303862899541855 +Loss at step 900: 0.05575668811798096 +Mean training loss after epoch 227: 0.044724021202274984 + +EPOCH: 228 +Loss at step 0: 0.04252273216843605 +Loss at step 50: 0.03665199875831604 +Loss at step 100: 0.03841029852628708 +Loss at step 150: 0.08242028951644897 +Loss at step 200: 0.05563180148601532 +Loss at step 250: 0.0397631973028183 +Loss at step 300: 0.07340270280838013 +Loss at step 350: 0.038010433316230774 +Loss at step 400: 0.04204168915748596 +Loss at step 450: 0.03747735172510147 +Loss at step 500: 0.04167129099369049 +Loss at step 550: 0.04536835476756096 +Loss at step 600: 0.043278586119413376 +Loss at step 650: 0.034653812646865845 +Loss at step 700: 0.04286987707018852 +Loss at step 750: 0.04035497084259987 +Loss at step 800: 0.052002497017383575 +Loss at step 850: 0.03951137885451317 +Loss at step 900: 0.052672963589429855 +Mean training loss after epoch 228: 0.04490817954013152 + +EPOCH: 229 +Loss at step 0: 0.05666624754667282 +Loss at step 50: 0.04129583761096001 +Loss at step 100: 0.040395479649305344 +Loss at step 150: 0.037631504237651825 +Loss at step 200: 0.04560894891619682 +Loss at step 250: 0.04400213807821274 +Loss at step 300: 0.05677139386534691 +Loss at step 350: 0.04267866536974907 +Loss at step 400: 0.054595232009887695 +Loss at step 450: 0.04197762906551361 +Loss at step 500: 0.03964201733469963 +Loss at step 550: 0.03835482895374298 +Loss at step 600: 0.04364710673689842 +Loss at step 650: 0.03945349529385567 +Loss at step 700: 0.05060425028204918 +Loss at step 750: 0.0383380651473999 +Loss at step 800: 0.03871174901723862 +Loss at step 850: 0.0911061242222786 +Loss at step 900: 0.07000719010829926 +Mean training loss after epoch 229: 0.04477876428363801 + +EPOCH: 230 +Loss at step 0: 0.03434494882822037 +Loss at step 50: 0.040060460567474365 +Loss at step 100: 0.04690749570727348 +Loss at step 150: 0.05711350217461586 +Loss at step 200: 0.04166051372885704 +Loss at step 250: 0.040621962398290634 +Loss at step 300: 0.040453843772411346 +Loss at step 350: 0.04803289473056793 +Loss at step 400: 0.038030534982681274 +Loss at step 450: 0.037674348801374435 +Loss at step 500: 0.03321957215666771 +Loss at step 550: 0.038866400718688965 +Loss at step 600: 0.042013779282569885 +Loss at step 650: 0.04012990742921829 +Loss at step 700: 0.03919626772403717 +Loss at step 750: 0.04081185907125473 +Loss at step 800: 0.03656207025051117 +Loss at step 850: 0.05564993619918823 +Loss at step 900: 0.061874303966760635 +Mean training loss after epoch 230: 0.04482146266744589 + +EPOCH: 231 +Loss at step 0: 0.057358138263225555 +Loss at step 50: 0.054861679673194885 +Loss at step 100: 0.057305946946144104 +Loss at step 150: 0.03653672710061073 +Loss at step 200: 0.04058054834604263 +Loss at step 250: 0.03630441799759865 +Loss at step 300: 0.04022179916501045 +Loss at step 350: 0.043208301067352295 +Loss at step 400: 0.041075047105550766 +Loss at step 450: 0.04240778461098671 +Loss at step 500: 0.042642347514629364 +Loss at step 550: 0.052777230739593506 +Loss at step 600: 0.039186976850032806 +Loss at step 650: 0.03977037966251373 +Loss at step 700: 0.04155982658267021 +Loss at step 750: 0.03896843641996384 +Loss at step 800: 0.038188107311725616 +Loss at step 850: 0.0390721894800663 +Loss at step 900: 0.03189798817038536 +Mean training loss after epoch 231: 0.045035481206707355 + +EPOCH: 232 +Loss at step 0: 0.05203377455472946 +Loss at step 50: 0.032070085406303406 +Loss at step 100: 0.04165864363312721 +Loss at step 150: 0.044198133051395416 +Loss at step 200: 0.05873839557170868 +Loss at step 250: 0.041851989924907684 +Loss at step 300: 0.034125711768865585 +Loss at step 350: 0.04675750806927681 +Loss at step 400: 0.03594406321644783 +Loss at step 450: 0.04332752525806427 +Loss at step 500: 0.044951438903808594 +Loss at step 550: 0.03468693420290947 +Loss at step 600: 0.05695968493819237 +Loss at step 650: 0.04225953668355942 +Loss at step 700: 0.051165319979190826 +Loss at step 750: 0.04052015766501427 +Loss at step 800: 0.056319672614336014 +Loss at step 850: 0.04222318157553673 +Loss at step 900: 0.04253341257572174 +Mean training loss after epoch 232: 0.044663353070521404 + +EPOCH: 233 +Loss at step 0: 0.043165430426597595 +Loss at step 50: 0.04060262069106102 +Loss at step 100: 0.033727094531059265 +Loss at step 150: 0.04066385328769684 +Loss at step 200: 0.037190794944763184 +Loss at step 250: 0.037413518875837326 +Loss at step 300: 0.041389066725969315 +Loss at step 350: 0.052861087024211884 +Loss at step 400: 0.04043381288647652 +Loss at step 450: 0.03807887062430382 +Loss at step 500: 0.07082954794168472 +Loss at step 550: 0.04320300370454788 +Loss at step 600: 0.04592312499880791 +Loss at step 650: 0.038250718265771866 +Loss at step 700: 0.03693762049078941 +Loss at step 750: 0.04395647719502449 +Loss at step 800: 0.04293740913271904 +Loss at step 850: 0.04935115948319435 +Loss at step 900: 0.05848970264196396 +Mean training loss after epoch 233: 0.04456619074397377 + +EPOCH: 234 +Loss at step 0: 0.05889430269598961 +Loss at step 50: 0.04189841449260712 +Loss at step 100: 0.0425519235432148 +Loss at step 150: 0.04033303260803223 +Loss at step 200: 0.03849061578512192 +Loss at step 250: 0.03834646940231323 +Loss at step 300: 0.054117921739816666 +Loss at step 350: 0.039138007909059525 +Loss at step 400: 0.038986776024103165 +Loss at step 450: 0.03748530149459839 +Loss at step 500: 0.06927170604467392 +Loss at step 550: 0.050579484552145004 +Loss at step 600: 0.05106034502387047 +Loss at step 650: 0.04545694217085838 +Loss at step 700: 0.05148870125412941 +Loss at step 750: 0.05205365642905235 +Loss at step 800: 0.039206478744745255 +Loss at step 850: 0.03897245228290558 +Loss at step 900: 0.034680310636758804 +Mean training loss after epoch 234: 0.04481489199406303 + +EPOCH: 235 +Loss at step 0: 0.08158397674560547 +Loss at step 50: 0.03938521444797516 +Loss at step 100: 0.043921031057834625 +Loss at step 150: 0.05398615077137947 +Loss at step 200: 0.05478700250387192 +Loss at step 250: 0.05557892471551895 +Loss at step 300: 0.04292656481266022 +Loss at step 350: 0.037867505103349686 +Loss at step 400: 0.040583230555057526 +Loss at step 450: 0.04054031893610954 +Loss at step 500: 0.038507431745529175 +Loss at step 550: 0.03390727564692497 +Loss at step 600: 0.03952161595225334 +Loss at step 650: 0.05023124814033508 +Loss at step 700: 0.054421987384557724 +Loss at step 750: 0.04453371837735176 +Loss at step 800: 0.036133889108896255 +Loss at step 850: 0.03931332379579544 +Loss at step 900: 0.04957771301269531 +Mean training loss after epoch 235: 0.04500210646595528 + +EPOCH: 236 +Loss at step 0: 0.04138018935918808 +Loss at step 50: 0.0410768985748291 +Loss at step 100: 0.04716905206441879 +Loss at step 150: 0.037718143314123154 +Loss at step 200: 0.03558113053441048 +Loss at step 250: 0.051637355238199234 +Loss at step 300: 0.04293530061841011 +Loss at step 350: 0.04584492743015289 +Loss at step 400: 0.041622091084718704 +Loss at step 450: 0.04335742071270943 +Loss at step 500: 0.04358315467834473 +Loss at step 550: 0.037794582545757294 +Loss at step 600: 0.06632398813962936 +Loss at step 650: 0.038531284779310226 +Loss at step 700: 0.055280305445194244 +Loss at step 750: 0.05984099954366684 +Loss at step 800: 0.0339098758995533 +Loss at step 850: 0.04214135929942131 +Loss at step 900: 0.040118806064128876 +Mean training loss after epoch 236: 0.044860685321647345 + +EPOCH: 237 +Loss at step 0: 0.044292692095041275 +Loss at step 50: 0.04102764278650284 +Loss at step 100: 0.040488895028829575 +Loss at step 150: 0.03925291448831558 +Loss at step 200: 0.051615528762340546 +Loss at step 250: 0.05971384793519974 +Loss at step 300: 0.0350177139043808 +Loss at step 350: 0.03716812655329704 +Loss at step 400: 0.03430664539337158 +Loss at step 450: 0.03838789835572243 +Loss at step 500: 0.054119765758514404 +Loss at step 550: 0.05195821821689606 +Loss at step 600: 0.031489208340644836 +Loss at step 650: 0.0379963219165802 +Loss at step 700: 0.04961507022380829 +Loss at step 750: 0.04009483382105827 +Loss at step 800: 0.07326838374137878 +Loss at step 850: 0.051296159625053406 +Loss at step 900: 0.03729995712637901 +Mean training loss after epoch 237: 0.044610461817859715 + +EPOCH: 238 +Loss at step 0: 0.042948514223098755 +Loss at step 50: 0.04202233627438545 +Loss at step 100: 0.04033079743385315 +Loss at step 150: 0.043983303010463715 +Loss at step 200: 0.044319335371255875 +Loss at step 250: 0.033823177218437195 +Loss at step 300: 0.04068777710199356 +Loss at step 350: 0.04168499633669853 +Loss at step 400: 0.052600763738155365 +Loss at step 450: 0.03858019784092903 +Loss at step 500: 0.040227260440588 +Loss at step 550: 0.04275752976536751 +Loss at step 600: 0.08539959043264389 +Loss at step 650: 0.04307982325553894 +Loss at step 700: 0.0384836383163929 +Loss at step 750: 0.04045542702078819 +Loss at step 800: 0.03990759700536728 +Loss at step 850: 0.05407320708036423 +Loss at step 900: 0.03998187184333801 +Mean training loss after epoch 238: 0.04446868840724166 + +EPOCH: 239 +Loss at step 0: 0.03785493224859238 +Loss at step 50: 0.04436418041586876 +Loss at step 100: 0.05409589037299156 +Loss at step 150: 0.044592611491680145 +Loss at step 200: 0.06618251651525497 +Loss at step 250: 0.041909780353307724 +Loss at step 300: 0.037626031786203384 +Loss at step 350: 0.0448286198079586 +Loss at step 400: 0.06135351583361626 +Loss at step 450: 0.060119789093732834 +Loss at step 500: 0.03888460993766785 +Loss at step 550: 0.03902534395456314 +Loss at step 600: 0.06106070801615715 +Loss at step 650: 0.045656196773052216 +Loss at step 700: 0.05269518122076988 +Loss at step 750: 0.05720165744423866 +Loss at step 800: 0.043440721929073334 +Loss at step 850: 0.03504670038819313 +Loss at step 900: 0.04462891444563866 +Mean training loss after epoch 239: 0.04510823841184886 + +EPOCH: 240 +Loss at step 0: 0.04483906179666519 +Loss at step 50: 0.04642260819673538 +Loss at step 100: 0.03795284405350685 +Loss at step 150: 0.03979024663567543 +Loss at step 200: 0.035046957433223724 +Loss at step 250: 0.057414375245571136 +Loss at step 300: 0.03562944754958153 +Loss at step 350: 0.05575153976678848 +Loss at step 400: 0.038566555827856064 +Loss at step 450: 0.05375271290540695 +Loss at step 500: 0.040569838136434555 +Loss at step 550: 0.050674013793468475 +Loss at step 600: 0.03350871056318283 +Loss at step 650: 0.03773665428161621 +Loss at step 700: 0.06452560424804688 +Loss at step 750: 0.03790397569537163 +Loss at step 800: 0.03755227476358414 +Loss at step 850: 0.055200040340423584 +Loss at step 900: 0.04047031328082085 +Mean training loss after epoch 240: 0.04476882062002476 + +EPOCH: 241 +Loss at step 0: 0.05416002497076988 +Loss at step 50: 0.03903517499566078 +Loss at step 100: 0.04650753363966942 +Loss at step 150: 0.04292520135641098 +Loss at step 200: 0.052666760981082916 +Loss at step 250: 0.04869057983160019 +Loss at step 300: 0.05322317034006119 +Loss at step 350: 0.04197530448436737 +Loss at step 400: 0.05593036860227585 +Loss at step 450: 0.04518859088420868 +Loss at step 500: 0.03525535389780998 +Loss at step 550: 0.06501279771327972 +Loss at step 600: 0.0375157967209816 +Loss at step 650: 0.04103926196694374 +Loss at step 700: 0.06551089137792587 +Loss at step 750: 0.038558341562747955 +Loss at step 800: 0.06308089941740036 +Loss at step 850: 0.04044673219323158 +Loss at step 900: 0.04032853990793228 +Mean training loss after epoch 241: 0.04456710113859825 + +EPOCH: 242 +Loss at step 0: 0.053582120686769485 +Loss at step 50: 0.035377200692892075 +Loss at step 100: 0.03758418560028076 +Loss at step 150: 0.03972852975130081 +Loss at step 200: 0.03694651648402214 +Loss at step 250: 0.039644137024879456 +Loss at step 300: 0.03829959034919739 +Loss at step 350: 0.03956885263323784 +Loss at step 400: 0.035739634186029434 +Loss at step 450: 0.038445767015218735 +Loss at step 500: 0.04465411603450775 +Loss at step 550: 0.03384018689393997 +Loss at step 600: 0.05751475319266319 +Loss at step 650: 0.044355764985084534 +Loss at step 700: 0.04282025247812271 +Loss at step 750: 0.043712906539440155 +Loss at step 800: 0.0361250564455986 +Loss at step 850: 0.04287397488951683 +Loss at step 900: 0.0370936319231987 +Mean training loss after epoch 242: 0.04469253835298105 + +EPOCH: 243 +Loss at step 0: 0.0418296717107296 +Loss at step 50: 0.047556519508361816 +Loss at step 100: 0.051378361880779266 +Loss at step 150: 0.042577408254146576 +Loss at step 200: 0.0369725227355957 +Loss at step 250: 0.04857877641916275 +Loss at step 300: 0.05720876529812813 +Loss at step 350: 0.05732298269867897 +Loss at step 400: 0.07769433408975601 +Loss at step 450: 0.042948994785547256 +Loss at step 500: 0.03954272344708443 +Loss at step 550: 0.04278720170259476 +Loss at step 600: 0.03669304773211479 +Loss at step 650: 0.043697506189346313 +Loss at step 700: 0.03610610589385033 +Loss at step 750: 0.040720947086811066 +Loss at step 800: 0.05602330341935158 +Loss at step 850: 0.04530062526464462 +Loss at step 900: 0.0683240219950676 +Mean training loss after epoch 243: 0.044250055798081195 + +EPOCH: 244 +Loss at step 0: 0.03642163798213005 +Loss at step 50: 0.037944864481687546 +Loss at step 100: 0.034634463489055634 +Loss at step 150: 0.03996540606021881 +Loss at step 200: 0.040756955742836 +Loss at step 250: 0.04065973684191704 +Loss at step 300: 0.04489821568131447 +Loss at step 350: 0.0636506974697113 +Loss at step 400: 0.05513636767864227 +Loss at step 450: 0.0457930862903595 +Loss at step 500: 0.03920304775238037 +Loss at step 550: 0.038328900933265686 +Loss at step 600: 0.030833875760436058 +Loss at step 650: 0.03486771881580353 +Loss at step 700: 0.048656705766916275 +Loss at step 750: 0.03840745612978935 +Loss at step 800: 0.05285422503948212 +Loss at step 850: 0.041253913193941116 +Loss at step 900: 0.03950178623199463 +Mean training loss after epoch 244: 0.044261600373428 + +EPOCH: 245 +Loss at step 0: 0.04619377851486206 +Loss at step 50: 0.04145263880491257 +Loss at step 100: 0.03602668270468712 +Loss at step 150: 0.05439251288771629 +Loss at step 200: 0.05764438584446907 +Loss at step 250: 0.06927946209907532 +Loss at step 300: 0.05830817297101021 +Loss at step 350: 0.036539625376462936 +Loss at step 400: 0.03861076757311821 +Loss at step 450: 0.037898194044828415 +Loss at step 500: 0.04033428430557251 +Loss at step 550: 0.08732940256595612 +Loss at step 600: 0.04207482561469078 +Loss at step 650: 0.050010379403829575 +Loss at step 700: 0.039302993565797806 +Loss at step 750: 0.04205681383609772 +Loss at step 800: 0.03592861443758011 +Loss at step 850: 0.02947244979441166 +Loss at step 900: 0.0527786910533905 +Mean training loss after epoch 245: 0.044361620255783676 + +EPOCH: 246 +Loss at step 0: 0.041245196014642715 +Loss at step 50: 0.03672011196613312 +Loss at step 100: 0.03628725931048393 +Loss at step 150: 0.057274896651506424 +Loss at step 200: 0.0555652491748333 +Loss at step 250: 0.043276552110910416 +Loss at step 300: 0.05015305429697037 +Loss at step 350: 0.0455438457429409 +Loss at step 400: 0.05552965775132179 +Loss at step 450: 0.03813120722770691 +Loss at step 500: 0.04435499757528305 +Loss at step 550: 0.06181136891245842 +Loss at step 600: 0.04896199330687523 +Loss at step 650: 0.06912817806005478 +Loss at step 700: 0.052352894097566605 +Loss at step 750: 0.052597131580114365 +Loss at step 800: 0.03395609185099602 +Loss at step 850: 0.05153059959411621 +Loss at step 900: 0.05455060675740242 +Mean training loss after epoch 246: 0.045143949588748816 + +EPOCH: 247 +Loss at step 0: 0.03641307353973389 +Loss at step 50: 0.03872988000512123 +Loss at step 100: 0.052318017929792404 +Loss at step 150: 0.03512036055326462 +Loss at step 200: 0.03669019415974617 +Loss at step 250: 0.03964295610785484 +Loss at step 300: 0.06011725217103958 +Loss at step 350: 0.03900329768657684 +Loss at step 400: 0.03485759347677231 +Loss at step 450: 0.039891231805086136 +Loss at step 500: 0.03382882848381996 +Loss at step 550: 0.039159636944532394 +Loss at step 600: 0.03741539269685745 +Loss at step 650: 0.03465544804930687 +Loss at step 700: 0.041061703115701675 +Loss at step 750: 0.04405885562300682 +Loss at step 800: 0.05437891557812691 +Loss at step 850: 0.0420810729265213 +Loss at step 900: 0.03921925649046898 +Mean training loss after epoch 247: 0.045000420261754284 + +EPOCH: 248 +Loss at step 0: 0.06635798513889313 +Loss at step 50: 0.06660524755716324 +Loss at step 100: 0.040310803800821304 +Loss at step 150: 0.056621745228767395 +Loss at step 200: 0.040016863495111465 +Loss at step 250: 0.03802765533328056 +Loss at step 300: 0.03586029261350632 +Loss at step 350: 0.03618862107396126 +Loss at step 400: 0.04300381615757942 +Loss at step 450: 0.0597403384745121 +Loss at step 500: 0.03901727870106697 +Loss at step 550: 0.0580160990357399 +Loss at step 600: 0.04214231297373772 +Loss at step 650: 0.04932720214128494 +Loss at step 700: 0.03509383648633957 +Loss at step 750: 0.04216500744223595 +Loss at step 800: 0.05994817614555359 +Loss at step 850: 0.0720582902431488 +Loss at step 900: 0.05517829954624176 +Mean training loss after epoch 248: 0.04450346884538116 + +EPOCH: 249 +Loss at step 0: 0.04463783651590347 +Loss at step 50: 0.03758547082543373 +Loss at step 100: 0.042582422494888306 +Loss at step 150: 0.03628899157047272 +Loss at step 200: 0.07420549541711807 +Loss at step 250: 0.05580678582191467 +Loss at step 300: 0.04314513877034187 +Loss at step 350: 0.03938824310898781 +Loss at step 400: 0.03405723348259926 +Loss at step 450: 0.0672236829996109 +Loss at step 500: 0.045462023466825485 +Loss at step 550: 0.039151985198259354 +Loss at step 600: 0.04036668315529823 +Loss at step 650: 0.06410281360149384 +Loss at step 700: 0.04101645573973656 +Loss at step 750: 0.040529631078243256 +Loss at step 800: 0.043987613171339035 +Loss at step 850: 0.033559225499629974 +Loss at step 900: 0.05273624137043953 +Mean training loss after epoch 249: 0.04516314866462115 + +EPOCH: 250 +Loss at step 0: 0.054309941828250885 +Loss at step 50: 0.0413537323474884 +Loss at step 100: 0.038513585925102234 +Loss at step 150: 0.03851177543401718 +Loss at step 200: 0.03764740750193596 +Loss at step 250: 0.032610274851322174 +Loss at step 300: 0.04569524526596069 +Loss at step 350: 0.04612278938293457 +Loss at step 400: 0.042659685015678406 +Loss at step 450: 0.03714698553085327 +Loss at step 500: 0.03636813163757324 +Loss at step 550: 0.04427662491798401 +Loss at step 600: 0.03525147959589958 +Loss at step 650: 0.03932900354266167 +Loss at step 700: 0.03503448888659477 +Loss at step 750: 0.058959297835826874 +Loss at step 800: 0.04120393469929695 +Loss at step 850: 0.044425517320632935 +Loss at step 900: 0.040592897683382034 +Mean training loss after epoch 250: 0.04448213370274633 + +EPOCH: 251 +Loss at step 0: 0.03864029794931412 +Loss at step 50: 0.03901481628417969 +Loss at step 100: 0.039011433720588684 +Loss at step 150: 0.04142244532704353 +Loss at step 200: 0.03568299487233162 +Loss at step 250: 0.041677020490169525 +Loss at step 300: 0.04524819552898407 +Loss at step 350: 0.03092583268880844 +Loss at step 400: 0.08884777873754501 +Loss at step 450: 0.03914395719766617 +Loss at step 500: 0.04016636312007904 +Loss at step 550: 0.0450739748775959 +Loss at step 600: 0.038682762533426285 +Loss at step 650: 0.043217163532972336 +Loss at step 700: 0.03636080399155617 +Loss at step 750: 0.029114747419953346 +Loss at step 800: 0.03528822213411331 +Loss at step 850: 0.0347789041697979 +Loss at step 900: 0.04094332456588745 +Mean training loss after epoch 251: 0.044786198445951254 + +EPOCH: 252 +Loss at step 0: 0.06574662774801254 +Loss at step 50: 0.057829611003398895 +Loss at step 100: 0.05110575631260872 +Loss at step 150: 0.037508122622966766 +Loss at step 200: 0.03761693090200424 +Loss at step 250: 0.043437499552965164 +Loss at step 300: 0.05702256038784981 +Loss at step 350: 0.033763155341148376 +Loss at step 400: 0.04185625910758972 +Loss at step 450: 0.03316926211118698 +Loss at step 500: 0.03681012988090515 +Loss at step 550: 0.05191672593355179 +Loss at step 600: 0.03989740088582039 +Loss at step 650: 0.06346093118190765 +Loss at step 700: 0.03768730163574219 +Loss at step 750: 0.034530941396951675 +Loss at step 800: 0.03948193043470383 +Loss at step 850: 0.04731574282050133 +Loss at step 900: 0.045385509729385376 +Mean training loss after epoch 252: 0.04453521531059353 + +EPOCH: 253 +Loss at step 0: 0.041396431624889374 +Loss at step 50: 0.03518753871321678 +Loss at step 100: 0.03264223411679268 +Loss at step 150: 0.0390775203704834 +Loss at step 200: 0.04928487539291382 +Loss at step 250: 0.048573993146419525 +Loss at step 300: 0.039923325181007385 +Loss at step 350: 0.039626359939575195 +Loss at step 400: 0.05669177696108818 +Loss at step 450: 0.03901868313550949 +Loss at step 500: 0.05529722943902016 +Loss at step 550: 0.03444294258952141 +Loss at step 600: 0.03714100271463394 +Loss at step 650: 0.05709639564156532 +Loss at step 700: 0.0428038015961647 +Loss at step 750: 0.03960294649004936 +Loss at step 800: 0.03291668742895126 +Loss at step 850: 0.05569472536444664 +Loss at step 900: 0.03604894131422043 +Mean training loss after epoch 253: 0.04444395763469912 + +EPOCH: 254 +Loss at step 0: 0.053607672452926636 +Loss at step 50: 0.036721326410770416 +Loss at step 100: 0.046456318348646164 +Loss at step 150: 0.03913817182183266 +Loss at step 200: 0.0369490385055542 +Loss at step 250: 0.042121268808841705 +Loss at step 300: 0.04145378991961479 +Loss at step 350: 0.05772620812058449 +Loss at step 400: 0.03847204148769379 +Loss at step 450: 0.03718934208154678 +Loss at step 500: 0.03975844383239746 +Loss at step 550: 0.04130327329039574 +Loss at step 600: 0.0561111681163311 +Loss at step 650: 0.052411098033189774 +Loss at step 700: 0.06321009993553162 +Loss at step 750: 0.03840126842260361 +Loss at step 800: 0.041070323437452316 +Loss at step 850: 0.04388450086116791 +Loss at step 900: 0.058426544070243835 +Mean training loss after epoch 254: 0.04487379144495929 + +EPOCH: 255 +Loss at step 0: 0.055367182940244675 +Loss at step 50: 0.039185795933008194 +Loss at step 100: 0.0385357066988945 +Loss at step 150: 0.06740199774503708 +Loss at step 200: 0.033642131835222244 +Loss at step 250: 0.03386206924915314 +Loss at step 300: 0.038438718765974045 +Loss at step 350: 0.04159536957740784 +Loss at step 400: 0.03636397048830986 +Loss at step 450: 0.03433806449174881 +Loss at step 500: 0.04151605814695358 +Loss at step 550: 0.054261527955532074 +Loss at step 600: 0.05456285923719406 +Loss at step 650: 0.05478399619460106 +Loss at step 700: 0.03770003095269203 +Loss at step 750: 0.03772468492388725 +Loss at step 800: 0.04290911182761192 +Loss at step 850: 0.03391693904995918 +Loss at step 900: 0.04064300283789635 +Mean training loss after epoch 255: 0.044759759181010314 + +EPOCH: 256 +Loss at step 0: 0.05573343113064766 +Loss at step 50: 0.03389902785420418 +Loss at step 100: 0.04108601436018944 +Loss at step 150: 0.036001987755298615 +Loss at step 200: 0.06409583985805511 +Loss at step 250: 0.040740005671978 +Loss at step 300: 0.05385272577404976 +Loss at step 350: 0.03897770121693611 +Loss at step 400: 0.05051175504922867 +Loss at step 450: 0.06579221785068512 +Loss at step 500: 0.03865097835659981 +Loss at step 550: 0.044112592935562134 +Loss at step 600: 0.04062749445438385 +Loss at step 650: 0.052707258611917496 +Loss at step 700: 0.03706040233373642 +Loss at step 750: 0.0546928308904171 +Loss at step 800: 0.03638901934027672 +Loss at step 850: 0.04192202165722847 +Loss at step 900: 0.035858165472745895 +Mean training loss after epoch 256: 0.04468840133271682