diff --git a/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_2000/partial_model_weights.pth b/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_2000/partial_model_weights.pth new file mode 100644 index 0000000000000000000000000000000000000000..1cc9fbaa4e8a06a34e24fde2130d927f7695747e --- /dev/null +++ b/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_2000/partial_model_weights.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d69d3076cf36901714a712ef246c8f8aa8be34bc6c5d9aedb36b51c90e6cd90 +size 1975288322 diff --git a/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_2000/train_config.json b/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_2000/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..af9da6f0b7292abd6082ed630f08e870d30f570b --- /dev/null +++ b/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_2000/train_config.json @@ -0,0 +1,29 @@ +{ + "stage": "stage2", + "lr": 3e-05, + "epochs": 10, + "log_interval": 4, + "gradient_clip": 1.0, + "tr_batch_size": 4, + "te_batch_size": 4, + "gradient_accumulation_steps": 1, + "update_params": [ + "all" + ], + "corpus": "math_derivation", + "num_of_sents": [ + 12, + 12 + ], + "encoder": "bert-base-cased", + "repeat": 1, + "max_num_each_cat": 2000, + "fb_mode": 0.0, + "set_loss_mask": false, + "use_label_dec": true, + "use_label_enc": false, + "decoder": "Qwen/Qwen2.5-0.5B", + "pretrained_path": null, + "device": "cuda", + "save_dir": "checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_2000" +} \ No newline at end of file diff --git a/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_2000/train_log.log b/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_2000/train_log.log new file mode 100644 index 0000000000000000000000000000000000000000..70f8c468c9476abdb2a8d8fab57b517b46fb79aa --- /dev/null +++ b/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_2000/train_log.log @@ -0,0 +1,8785 @@ +* training corpus: math_derivation +* total num: 14000 +* epochs: 10 +* batch size: 4 +* gradient_accumulation_steps: 1 +-------------------------------------------------------------------------------- +| epoch 1 | 3/ 2800 batches | train loss 2.2020216 +| epoch 1 | 7/ 2800 batches | train loss 2.1149039 +| epoch 1 | 11/ 2800 batches | train loss 1.9697452 +| epoch 1 | 15/ 2800 batches | train loss 2.0499644 +| epoch 1 | 19/ 2800 batches | train loss 1.5329959 +| epoch 1 | 23/ 2800 batches | train loss 1.2747960 +| epoch 1 | 27/ 2800 batches | train loss 0.9322165 +| epoch 1 | 31/ 2800 batches | train loss 0.8983100 +| epoch 1 | 35/ 2800 batches | train loss 0.6782925 +| epoch 1 | 39/ 2800 batches | train loss 0.7890149 +| epoch 1 | 43/ 2800 batches | train loss 0.5866985 +| epoch 1 | 47/ 2800 batches | train loss 0.6262555 +| epoch 1 | 51/ 2800 batches | train loss 0.5860358 +| epoch 1 | 55/ 2800 batches | train loss 0.8258903 +| epoch 1 | 59/ 2800 batches | train loss 0.5337468 +| epoch 1 | 63/ 2800 batches | train loss 0.5952193 +| epoch 1 | 67/ 2800 batches | train loss 0.8418536 +| epoch 1 | 71/ 2800 batches | train loss 0.6024867 +| epoch 1 | 75/ 2800 batches | train loss 0.6220992 +| epoch 1 | 79/ 2800 batches | train loss 0.7026230 +| epoch 1 | 83/ 2800 batches | train loss 0.5123565 +| epoch 1 | 87/ 2800 batches | train loss 0.7483655 +| epoch 1 | 91/ 2800 batches | train loss 0.8240635 +| epoch 1 | 95/ 2800 batches | train loss 0.7012210 +| epoch 1 | 99/ 2800 batches | train loss 0.5420492 +| epoch 1 | 103/ 2800 batches | train loss 0.5841335 +| epoch 1 | 107/ 2800 batches | train loss 0.5669057 +| epoch 1 | 111/ 2800 batches | train loss 0.6388200 +| epoch 1 | 115/ 2800 batches | train loss 0.5682598 +| epoch 1 | 119/ 2800 batches | train loss 0.5626957 +| epoch 1 | 123/ 2800 batches | train loss 0.4469690 +| epoch 1 | 127/ 2800 batches | train loss 0.6130927 +| epoch 1 | 131/ 2800 batches | train loss 0.5025209 +| epoch 1 | 135/ 2800 batches | train loss 0.4566346 +| epoch 1 | 139/ 2800 batches | train loss 0.4748950 +| epoch 1 | 143/ 2800 batches | train loss 0.5163067 +| epoch 1 | 147/ 2800 batches | train loss 0.5860047 +| epoch 1 | 151/ 2800 batches | train loss 0.5687558 +| epoch 1 | 155/ 2800 batches | train loss 0.4557992 +| epoch 1 | 159/ 2800 batches | train loss 0.6358055 +| epoch 1 | 163/ 2800 batches | train loss 0.6121570 +| epoch 1 | 167/ 2800 batches | train loss 0.6853542 +| epoch 1 | 171/ 2800 batches | train loss 0.5233868 +| epoch 1 | 175/ 2800 batches | train loss 0.5910053 +| epoch 1 | 179/ 2800 batches | train loss 0.6220578 +| epoch 1 | 183/ 2800 batches | train loss 0.6984842 +| epoch 1 | 187/ 2800 batches | train loss 0.4568812 +| epoch 1 | 191/ 2800 batches | train loss 0.4416384 +| epoch 1 | 195/ 2800 batches | train loss 0.6342903 +| epoch 1 | 199/ 2800 batches | train loss 0.6297632 +| epoch 1 | 203/ 2800 batches | train loss 0.5750951 +| epoch 1 | 207/ 2800 batches | train loss 0.6191089 +| epoch 1 | 211/ 2800 batches | train loss 0.5729115 +| epoch 1 | 215/ 2800 batches | train loss 0.5030589 +| epoch 1 | 219/ 2800 batches | train loss 0.7118088 +| epoch 1 | 223/ 2800 batches | train loss 0.6471116 +| epoch 1 | 227/ 2800 batches | train loss 0.5523973 +| epoch 1 | 231/ 2800 batches | train loss 0.5300097 +| epoch 1 | 235/ 2800 batches | train loss 0.5203780 +| epoch 1 | 239/ 2800 batches | train loss 0.6078067 +| epoch 1 | 243/ 2800 batches | train loss 0.6921397 +| epoch 1 | 247/ 2800 batches | train loss 0.6289350 +| epoch 1 | 251/ 2800 batches | train loss 0.6552084 +| epoch 1 | 255/ 2800 batches | train loss 0.6014233 +| epoch 1 | 259/ 2800 batches | train loss 0.7178446 +| epoch 1 | 263/ 2800 batches | train loss 0.8605973 +| epoch 1 | 267/ 2800 batches | train loss 0.5017668 +| epoch 1 | 271/ 2800 batches | train loss 0.5084022 +| epoch 1 | 275/ 2800 batches | train loss 0.5281534 +| epoch 1 | 279/ 2800 batches | train loss 0.6261023 +| epoch 1 | 283/ 2800 batches | train loss 0.4714875 +| epoch 1 | 287/ 2800 batches | train loss 0.6932990 +| epoch 1 | 291/ 2800 batches | train loss 0.4775437 +| epoch 1 | 295/ 2800 batches | train loss 0.6369916 +| epoch 1 | 299/ 2800 batches | train loss 0.4895060 +| epoch 1 | 303/ 2800 batches | train loss 0.6407928 +| epoch 1 | 307/ 2800 batches | train loss 0.6184205 +| epoch 1 | 311/ 2800 batches | train loss 0.4599852 +| epoch 1 | 315/ 2800 batches | train loss 0.4911909 +| epoch 1 | 319/ 2800 batches | train loss 0.6276218 +| epoch 1 | 323/ 2800 batches | train loss 0.5685492 +| epoch 1 | 327/ 2800 batches | train loss 0.5319932 +| epoch 1 | 331/ 2800 batches | train loss 0.5564360 +| epoch 1 | 335/ 2800 batches | train loss 0.5310606 +| epoch 1 | 339/ 2800 batches | train loss 0.4106426 +| epoch 1 | 343/ 2800 batches | train loss 0.5066261 +| epoch 1 | 347/ 2800 batches | train loss 0.6075306 +| epoch 1 | 351/ 2800 batches | train loss 0.5364986 +| epoch 1 | 355/ 2800 batches | train loss 0.6667399 +| epoch 1 | 359/ 2800 batches | train loss 0.5054867 +| epoch 1 | 363/ 2800 batches | train loss 0.4914863 +| epoch 1 | 367/ 2800 batches | train loss 0.5193712 +| epoch 1 | 371/ 2800 batches | train loss 0.5703886 +| epoch 1 | 375/ 2800 batches | train loss 0.4595285 +| epoch 1 | 379/ 2800 batches | train loss 0.7044475 +| epoch 1 | 383/ 2800 batches | train loss 0.4774646 +| epoch 1 | 387/ 2800 batches | train loss 0.4866432 +| epoch 1 | 391/ 2800 batches | train loss 0.5691451 +| epoch 1 | 395/ 2800 batches | train loss 0.6119294 +| epoch 1 | 399/ 2800 batches | train loss 0.6618832 +| epoch 1 | 403/ 2800 batches | train loss 0.5823749 +| epoch 1 | 407/ 2800 batches | train loss 0.6409843 +| epoch 1 | 411/ 2800 batches | train loss 0.4584346 +| epoch 1 | 415/ 2800 batches | train loss 0.6188012 +| epoch 1 | 419/ 2800 batches | train loss 0.4995615 +| epoch 1 | 423/ 2800 batches | train loss 0.4939079 +| epoch 1 | 427/ 2800 batches | train loss 0.4843548 +| epoch 1 | 431/ 2800 batches | train loss 0.4602552 +| epoch 1 | 435/ 2800 batches | train loss 0.5391015 +| epoch 1 | 439/ 2800 batches | train loss 0.5824575 +| epoch 1 | 443/ 2800 batches | train loss 0.4472552 +| epoch 1 | 447/ 2800 batches | train loss 0.4671012 +| epoch 1 | 451/ 2800 batches | train loss 0.5471854 +| epoch 1 | 455/ 2800 batches | train loss 0.5389341 +| epoch 1 | 459/ 2800 batches | train loss 0.4909654 +| epoch 1 | 463/ 2800 batches | train loss 0.6232703 +| epoch 1 | 467/ 2800 batches | train loss 0.4920681 +| epoch 1 | 471/ 2800 batches | train loss 0.5889614 +| epoch 1 | 475/ 2800 batches | train loss 0.5276216 +| epoch 1 | 479/ 2800 batches | train loss 0.5265723 +| epoch 1 | 483/ 2800 batches | train loss 0.5034848 +| epoch 1 | 487/ 2800 batches | train loss 0.5419223 +| epoch 1 | 491/ 2800 batches | train loss 0.3955886 +| epoch 1 | 495/ 2800 batches | train loss 0.4049598 +| epoch 1 | 499/ 2800 batches | train loss 0.5565974 +| epoch 1 | 503/ 2800 batches | train loss 0.4883701 +| epoch 1 | 507/ 2800 batches | train loss 0.7406322 +| epoch 1 | 511/ 2800 batches | train loss 0.6124833 +| epoch 1 | 515/ 2800 batches | train loss 0.4469721 +| epoch 1 | 519/ 2800 batches | train loss 0.6265420 +| epoch 1 | 523/ 2800 batches | train loss 0.4671845 +| epoch 1 | 527/ 2800 batches | train loss 0.4690690 +| epoch 1 | 531/ 2800 batches | train loss 0.5765749 +| epoch 1 | 535/ 2800 batches | train loss 0.4807640 +| epoch 1 | 539/ 2800 batches | train loss 0.5165646 +| epoch 1 | 543/ 2800 batches | train loss 0.4926535 +| epoch 1 | 547/ 2800 batches | train loss 0.4489402 +| epoch 1 | 551/ 2800 batches | train loss 0.5041710 +| epoch 1 | 555/ 2800 batches | train loss 0.5700620 +| epoch 1 | 559/ 2800 batches | train loss 0.5691777 +| epoch 1 | 563/ 2800 batches | train loss 0.5367559 +| epoch 1 | 567/ 2800 batches | train loss 0.4312826 +| epoch 1 | 571/ 2800 batches | train loss 0.5466287 +| epoch 1 | 575/ 2800 batches | train loss 0.4145494 +| epoch 1 | 579/ 2800 batches | train loss 0.4483250 +| epoch 1 | 583/ 2800 batches | train loss 0.5673369 +| epoch 1 | 587/ 2800 batches | train loss 1.0100275 +| epoch 1 | 591/ 2800 batches | train loss 0.4825935 +| epoch 1 | 595/ 2800 batches | train loss 0.4523577 +| epoch 1 | 599/ 2800 batches | train loss 0.4776897 +| epoch 1 | 603/ 2800 batches | train loss 0.4146764 +| epoch 1 | 607/ 2800 batches | train loss 0.4550419 +| epoch 1 | 611/ 2800 batches | train loss 0.4180249 +| epoch 1 | 615/ 2800 batches | train loss 0.4414200 +| epoch 1 | 619/ 2800 batches | train loss 0.5340481 +| epoch 1 | 623/ 2800 batches | train loss 0.5192973 +| epoch 1 | 627/ 2800 batches | train loss 0.5189964 +| epoch 1 | 631/ 2800 batches | train loss 0.3644260 +| epoch 1 | 635/ 2800 batches | train loss 0.5526770 +| epoch 1 | 639/ 2800 batches | train loss 0.4729619 +| epoch 1 | 643/ 2800 batches | train loss 0.5497622 +| epoch 1 | 647/ 2800 batches | train loss 0.5130231 +| epoch 1 | 651/ 2800 batches | train loss 0.4990191 +| epoch 1 | 655/ 2800 batches | train loss 0.4357119 +| epoch 1 | 659/ 2800 batches | train loss 0.4286630 +| epoch 1 | 663/ 2800 batches | train loss 0.6891166 +| epoch 1 | 667/ 2800 batches | train loss 0.5974586 +| epoch 1 | 671/ 2800 batches | train loss 0.5109333 +| epoch 1 | 675/ 2800 batches | train loss 0.4544605 +| epoch 1 | 679/ 2800 batches | train loss 0.5348004 +| epoch 1 | 683/ 2800 batches | train loss 0.6589206 +| epoch 1 | 687/ 2800 batches | train loss 0.4546715 +| epoch 1 | 691/ 2800 batches | train loss 0.4839939 +| epoch 1 | 695/ 2800 batches | train loss 0.5718029 +| epoch 1 | 699/ 2800 batches | train loss 0.4595102 +| epoch 1 | 703/ 2800 batches | train loss 0.5468065 +| epoch 1 | 707/ 2800 batches | train loss 0.4217767 +| epoch 1 | 711/ 2800 batches | train loss 0.4010866 +| epoch 1 | 715/ 2800 batches | train loss 0.4795959 +| epoch 1 | 719/ 2800 batches | train loss 0.5422595 +| epoch 1 | 723/ 2800 batches | train loss 0.5089815 +| epoch 1 | 727/ 2800 batches | train loss 0.5811104 +| epoch 1 | 731/ 2800 batches | train loss 0.5459651 +| epoch 1 | 735/ 2800 batches | train loss 0.4682018 +| epoch 1 | 739/ 2800 batches | train loss 0.5124077 +| epoch 1 | 743/ 2800 batches | train loss 0.4825717 +| epoch 1 | 747/ 2800 batches | train loss 0.6125820 +| epoch 1 | 751/ 2800 batches | train loss 0.4108935 +| epoch 1 | 755/ 2800 batches | train loss 0.4462435 +| epoch 1 | 759/ 2800 batches | train loss 0.4813511 +| epoch 1 | 763/ 2800 batches | train loss 0.6342444 +| epoch 1 | 767/ 2800 batches | train loss 0.5594472 +| epoch 1 | 771/ 2800 batches | train loss 0.5984166 +| epoch 1 | 775/ 2800 batches | train loss 0.6466655 +| epoch 1 | 779/ 2800 batches | train loss 0.4844991 +| epoch 1 | 783/ 2800 batches | train loss 0.4121030 +| epoch 1 | 787/ 2800 batches | train loss 0.4838561 +| epoch 1 | 791/ 2800 batches | train loss 0.4527972 +| epoch 1 | 795/ 2800 batches | train loss 0.5566658 +| epoch 1 | 799/ 2800 batches | train loss 0.4952623 +| epoch 1 | 803/ 2800 batches | train loss 0.3967353 +| epoch 1 | 807/ 2800 batches | train loss 0.5524163 +| epoch 1 | 811/ 2800 batches | train loss 0.4547111 +| epoch 1 | 815/ 2800 batches | train loss 0.5221047 +| epoch 1 | 819/ 2800 batches | train loss 0.4834194 +| epoch 1 | 823/ 2800 batches | train loss 0.4058651 +| epoch 1 | 827/ 2800 batches | train loss 0.3964179 +| epoch 1 | 831/ 2800 batches | train loss 0.5418170 +| epoch 1 | 835/ 2800 batches | train loss 0.5113438 +| epoch 1 | 839/ 2800 batches | train loss 0.3981040 +| epoch 1 | 843/ 2800 batches | train loss 0.4836026 +| epoch 1 | 847/ 2800 batches | train loss 0.5238943 +| epoch 1 | 851/ 2800 batches | train loss 0.4994019 +| epoch 1 | 855/ 2800 batches | train loss 0.5437150 +| epoch 1 | 859/ 2800 batches | train loss 0.5415858 +| epoch 1 | 863/ 2800 batches | train loss 0.4861547 +| epoch 1 | 867/ 2800 batches | train loss 0.5353647 +| epoch 1 | 871/ 2800 batches | train loss 0.5192751 +| epoch 1 | 875/ 2800 batches | train loss 0.4421036 +| epoch 1 | 879/ 2800 batches | train loss 0.4069194 +| epoch 1 | 883/ 2800 batches | train loss 0.5063661 +| epoch 1 | 887/ 2800 batches | train loss 0.4258303 +| epoch 1 | 891/ 2800 batches | train loss 0.6309304 +| epoch 1 | 895/ 2800 batches | train loss 0.5853977 +| epoch 1 | 899/ 2800 batches | train loss 0.5635772 +| epoch 1 | 903/ 2800 batches | train loss 0.5254437 +| epoch 1 | 907/ 2800 batches | train loss 0.5071553 +| epoch 1 | 911/ 2800 batches | train loss 0.4746926 +| epoch 1 | 915/ 2800 batches | train loss 0.4380308 +| epoch 1 | 919/ 2800 batches | train loss 0.4448147 +| epoch 1 | 923/ 2800 batches | train loss 0.5059874 +| epoch 1 | 927/ 2800 batches | train loss 0.4555414 +| epoch 1 | 931/ 2800 batches | train loss 0.4664444 +| epoch 1 | 935/ 2800 batches | train loss 0.4404963 +| epoch 1 | 939/ 2800 batches | train loss 0.4108750 +| epoch 1 | 943/ 2800 batches | train loss 0.5159959 +| epoch 1 | 947/ 2800 batches | train loss 0.4391544 +| epoch 1 | 951/ 2800 batches | train loss 0.5599073 +| epoch 1 | 955/ 2800 batches | train loss 0.5365632 +| epoch 1 | 959/ 2800 batches | train loss 0.4626333 +| epoch 1 | 963/ 2800 batches | train loss 0.5312564 +| epoch 1 | 967/ 2800 batches | train loss 0.5277774 +| epoch 1 | 971/ 2800 batches | train loss 0.5755202 +| epoch 1 | 975/ 2800 batches | train loss 0.4179085 +| epoch 1 | 979/ 2800 batches | train loss 0.5003977 +| epoch 1 | 983/ 2800 batches | train loss 0.4459077 +| epoch 1 | 987/ 2800 batches | train loss 0.5473162 +| epoch 1 | 991/ 2800 batches | train loss 0.3686861 +| epoch 1 | 995/ 2800 batches | train loss 0.3997256 +| epoch 1 | 999/ 2800 batches | train loss 0.5555695 +| epoch 1 | 1003/ 2800 batches | train loss 0.5038894 +| epoch 1 | 1007/ 2800 batches | train loss 0.4594295 +| epoch 1 | 1011/ 2800 batches | train loss 0.3930951 +| epoch 1 | 1015/ 2800 batches | train loss 0.4679394 +| epoch 1 | 1019/ 2800 batches | train loss 0.4512810 +| epoch 1 | 1023/ 2800 batches | train loss 0.4664630 +| epoch 1 | 1027/ 2800 batches | train loss 0.5982182 +| epoch 1 | 1031/ 2800 batches | train loss 0.4956509 +| epoch 1 | 1035/ 2800 batches | train loss 0.4937563 +| epoch 1 | 1039/ 2800 batches | train loss 0.5363517 +| epoch 1 | 1043/ 2800 batches | train loss 0.6252121 +| epoch 1 | 1047/ 2800 batches | train loss 0.5038141 +| epoch 1 | 1051/ 2800 batches | train loss 0.5287214 +| epoch 1 | 1055/ 2800 batches | train loss 0.5299402 +| epoch 1 | 1059/ 2800 batches | train loss 0.4480768 +| epoch 1 | 1063/ 2800 batches | train loss 0.5926060 +| epoch 1 | 1067/ 2800 batches | train loss 0.6162962 +| epoch 1 | 1071/ 2800 batches | train loss 0.4493462 +| epoch 1 | 1075/ 2800 batches | train loss 0.7230890 +| epoch 1 | 1079/ 2800 batches | train loss 0.4094940 +| epoch 1 | 1083/ 2800 batches | train loss 0.5992534 +| epoch 1 | 1087/ 2800 batches | train loss 0.4278825 +| epoch 1 | 1091/ 2800 batches | train loss 0.5403731 +| epoch 1 | 1095/ 2800 batches | train loss 0.5283014 +| epoch 1 | 1099/ 2800 batches | train loss 0.6294410 +| epoch 1 | 1103/ 2800 batches | train loss 0.5986919 +| epoch 1 | 1107/ 2800 batches | train loss 0.4225841 +| epoch 1 | 1111/ 2800 batches | train loss 0.5230483 +| epoch 1 | 1115/ 2800 batches | train loss 0.4186629 +| epoch 1 | 1119/ 2800 batches | train loss 0.4951383 +| epoch 1 | 1123/ 2800 batches | train loss 0.4604374 +| epoch 1 | 1127/ 2800 batches | train loss 0.4018077 +| epoch 1 | 1131/ 2800 batches | train loss 0.5176679 +| epoch 1 | 1135/ 2800 batches | train loss 0.4961989 +| epoch 1 | 1139/ 2800 batches | train loss 0.4313323 +| epoch 1 | 1143/ 2800 batches | train loss 0.5060882 +| epoch 1 | 1147/ 2800 batches | train loss 0.5960996 +| epoch 1 | 1151/ 2800 batches | train loss 0.4674763 +| epoch 1 | 1155/ 2800 batches | train loss 0.6103439 +| epoch 1 | 1159/ 2800 batches | train loss 0.5286791 +| epoch 1 | 1163/ 2800 batches | train loss 0.5214823 +| epoch 1 | 1167/ 2800 batches | train loss 0.4524164 +| epoch 1 | 1171/ 2800 batches | train loss 0.4827156 +| epoch 1 | 1175/ 2800 batches | train loss 0.6195606 +| epoch 1 | 1179/ 2800 batches | train loss 0.3961822 +| epoch 1 | 1183/ 2800 batches | train loss 0.4451470 +| epoch 1 | 1187/ 2800 batches | train loss 0.5911757 +| epoch 1 | 1191/ 2800 batches | train loss 0.6597587 +| epoch 1 | 1195/ 2800 batches | train loss 0.4480816 +| epoch 1 | 1199/ 2800 batches | train loss 0.5026225 +| epoch 1 | 1203/ 2800 batches | train loss 0.4485638 +| epoch 1 | 1207/ 2800 batches | train loss 0.4640584 +| epoch 1 | 1211/ 2800 batches | train loss 0.4705491 +| epoch 1 | 1215/ 2800 batches | train loss 0.5019318 +| epoch 1 | 1219/ 2800 batches | train loss 0.4923105 +| epoch 1 | 1223/ 2800 batches | train loss 0.5349467 +| epoch 1 | 1227/ 2800 batches | train loss 0.4135371 +| epoch 1 | 1231/ 2800 batches | train loss 0.5009989 +| epoch 1 | 1235/ 2800 batches | train loss 0.5146751 +| epoch 1 | 1239/ 2800 batches | train loss 0.5322430 +| epoch 1 | 1243/ 2800 batches | train loss 0.5155782 +| epoch 1 | 1247/ 2800 batches | train loss 0.4973457 +| epoch 1 | 1251/ 2800 batches | train loss 0.4324036 +| epoch 1 | 1255/ 2800 batches | train loss 0.4077443 +| epoch 1 | 1259/ 2800 batches | train loss 0.5273088 +| epoch 1 | 1263/ 2800 batches | train loss 0.5002948 +| epoch 1 | 1267/ 2800 batches | train loss 0.5803083 +| epoch 1 | 1271/ 2800 batches | train loss 0.4308412 +| epoch 1 | 1275/ 2800 batches | train loss 0.4953591 +| epoch 1 | 1279/ 2800 batches | train loss 0.4281947 +| epoch 1 | 1283/ 2800 batches | train loss 0.4382922 +| epoch 1 | 1287/ 2800 batches | train loss 0.5472810 +| epoch 1 | 1291/ 2800 batches | train loss 0.4791310 +| epoch 1 | 1295/ 2800 batches | train loss 0.3486379 +| epoch 1 | 1299/ 2800 batches | train loss 0.6188250 +| epoch 1 | 1303/ 2800 batches | train loss 0.4843029 +| epoch 1 | 1307/ 2800 batches | train loss 0.4843673 +| epoch 1 | 1311/ 2800 batches | train loss 0.5500463 +| epoch 1 | 1315/ 2800 batches | train loss 0.4705332 +| epoch 1 | 1319/ 2800 batches | train loss 0.4136729 +| epoch 1 | 1323/ 2800 batches | train loss 0.5422068 +| epoch 1 | 1327/ 2800 batches | train loss 0.4593726 +| epoch 1 | 1331/ 2800 batches | train loss 0.5059538 +| epoch 1 | 1335/ 2800 batches | train loss 0.6678383 +| epoch 1 | 1339/ 2800 batches | train loss 0.5469314 +| epoch 1 | 1343/ 2800 batches | train loss 0.4892066 +| epoch 1 | 1347/ 2800 batches | train loss 0.4430577 +| epoch 1 | 1351/ 2800 batches | train loss 0.4788913 +| epoch 1 | 1355/ 2800 batches | train loss 0.4326786 +| epoch 1 | 1359/ 2800 batches | train loss 0.5489132 +| epoch 1 | 1363/ 2800 batches | train loss 0.4100528 +| epoch 1 | 1367/ 2800 batches | train loss 0.4364434 +| epoch 1 | 1371/ 2800 batches | train loss 0.4419226 +| epoch 1 | 1375/ 2800 batches | train loss 0.4699704 +| epoch 1 | 1379/ 2800 batches | train loss 0.5355183 +| epoch 1 | 1383/ 2800 batches | train loss 0.5226727 +| epoch 1 | 1387/ 2800 batches | train loss 0.4365052 +| epoch 1 | 1391/ 2800 batches | train loss 0.4639739 +| epoch 1 | 1395/ 2800 batches | train loss 0.5028676 +| epoch 1 | 1399/ 2800 batches | train loss 0.4022093 +| epoch 1 | 1403/ 2800 batches | train loss 0.5008948 +| epoch 1 | 1407/ 2800 batches | train loss 0.5684798 +| epoch 1 | 1411/ 2800 batches | train loss 0.6847533 +| epoch 1 | 1415/ 2800 batches | train loss 0.4900282 +| epoch 1 | 1419/ 2800 batches | train loss 0.4705383 +| epoch 1 | 1423/ 2800 batches | train loss 0.4389376 +| epoch 1 | 1427/ 2800 batches | train loss 0.4570740 +| epoch 1 | 1431/ 2800 batches | train loss 0.5240908 +| epoch 1 | 1435/ 2800 batches | train loss 0.4633069 +| epoch 1 | 1439/ 2800 batches | train loss 0.4422211 +| epoch 1 | 1443/ 2800 batches | train loss 0.4094550 +| epoch 1 | 1447/ 2800 batches | train loss 0.5138534 +| epoch 1 | 1451/ 2800 batches | train loss 0.5391464 +| epoch 1 | 1455/ 2800 batches | train loss 0.5058972 +| epoch 1 | 1459/ 2800 batches | train loss 0.5011028 +| epoch 1 | 1463/ 2800 batches | train loss 0.4365595 +| epoch 1 | 1467/ 2800 batches | train loss 0.3966548 +| epoch 1 | 1471/ 2800 batches | train loss 0.3713274 +| epoch 1 | 1475/ 2800 batches | train loss 0.4512116 +| epoch 1 | 1479/ 2800 batches | train loss 0.4632071 +| epoch 1 | 1483/ 2800 batches | train loss 0.3800195 +| epoch 1 | 1487/ 2800 batches | train loss 0.4099075 +| epoch 1 | 1491/ 2800 batches | train loss 0.4503702 +| epoch 1 | 1495/ 2800 batches | train loss 0.4855320 +| epoch 1 | 1499/ 2800 batches | train loss 0.5296011 +| epoch 1 | 1503/ 2800 batches | train loss 0.4833373 +| epoch 1 | 1507/ 2800 batches | train loss 0.5054702 +| epoch 1 | 1511/ 2800 batches | train loss 0.4909423 +| epoch 1 | 1515/ 2800 batches | train loss 0.3767366 +| epoch 1 | 1519/ 2800 batches | train loss 0.4290552 +| epoch 1 | 1523/ 2800 batches | train loss 0.4255001 +| epoch 1 | 1527/ 2800 batches | train loss 0.4157024 +| epoch 1 | 1531/ 2800 batches | train loss 0.4190479 +| epoch 1 | 1535/ 2800 batches | train loss 0.6027162 +| epoch 1 | 1539/ 2800 batches | train loss 0.3733830 +| epoch 1 | 1543/ 2800 batches | train loss 0.5601729 +| epoch 1 | 1547/ 2800 batches | train loss 0.5236377 +| epoch 1 | 1551/ 2800 batches | train loss 0.5404846 +| epoch 1 | 1555/ 2800 batches | train loss 0.4655181 +| epoch 1 | 1559/ 2800 batches | train loss 0.3843268 +| epoch 1 | 1563/ 2800 batches | train loss 0.4779443 +| epoch 1 | 1567/ 2800 batches | train loss 0.4947690 +| epoch 1 | 1571/ 2800 batches | train loss 0.4494528 +| epoch 1 | 1575/ 2800 batches | train loss 0.5491884 +| epoch 1 | 1579/ 2800 batches | train loss 0.4789843 +| epoch 1 | 1583/ 2800 batches | train loss 0.5282558 +| epoch 1 | 1587/ 2800 batches | train loss 0.4877274 +| epoch 1 | 1591/ 2800 batches | train loss 0.4335879 +| epoch 1 | 1595/ 2800 batches | train loss 0.5414695 +| epoch 1 | 1599/ 2800 batches | train loss 0.4188586 +| epoch 1 | 1603/ 2800 batches | train loss 0.4846039 +| epoch 1 | 1607/ 2800 batches | train loss 0.5759864 +| epoch 1 | 1611/ 2800 batches | train loss 0.4505355 +| epoch 1 | 1615/ 2800 batches | train loss 0.5782081 +| epoch 1 | 1619/ 2800 batches | train loss 0.5685381 +| epoch 1 | 1623/ 2800 batches | train loss 0.5426090 +| epoch 1 | 1627/ 2800 batches | train loss 0.4721750 +| epoch 1 | 1631/ 2800 batches | train loss 0.4924566 +| epoch 1 | 1635/ 2800 batches | train loss 0.4097168 +| epoch 1 | 1639/ 2800 batches | train loss 0.4298247 +| epoch 1 | 1643/ 2800 batches | train loss 0.3721400 +| epoch 1 | 1647/ 2800 batches | train loss 0.4487706 +| epoch 1 | 1651/ 2800 batches | train loss 0.5734849 +| epoch 1 | 1655/ 2800 batches | train loss 0.4471001 +| epoch 1 | 1659/ 2800 batches | train loss 0.5121630 +| epoch 1 | 1663/ 2800 batches | train loss 0.5107815 +| epoch 1 | 1667/ 2800 batches | train loss 0.4293182 +| epoch 1 | 1671/ 2800 batches | train loss 0.5265590 +| epoch 1 | 1675/ 2800 batches | train loss 0.4645196 +| epoch 1 | 1679/ 2800 batches | train loss 0.4511667 +| epoch 1 | 1683/ 2800 batches | train loss 0.4095600 +| epoch 1 | 1687/ 2800 batches | train loss 0.4495184 +| epoch 1 | 1691/ 2800 batches | train loss 0.5242280 +| epoch 1 | 1695/ 2800 batches | train loss 0.3800748 +| epoch 1 | 1699/ 2800 batches | train loss 0.4800725 +| epoch 1 | 1703/ 2800 batches | train loss 0.4407192 +| epoch 1 | 1707/ 2800 batches | train loss 0.3792078 +| epoch 1 | 1711/ 2800 batches | train loss 0.5920833 +| epoch 1 | 1715/ 2800 batches | train loss 0.4357568 +| epoch 1 | 1719/ 2800 batches | train loss 0.5031924 +| epoch 1 | 1723/ 2800 batches | train loss 0.4746692 +| epoch 1 | 1727/ 2800 batches | train loss 0.3861404 +| epoch 1 | 1731/ 2800 batches | train loss 0.3383968 +| epoch 1 | 1735/ 2800 batches | train loss 0.4637406 +| epoch 1 | 1739/ 2800 batches | train loss 0.4481408 +| epoch 1 | 1743/ 2800 batches | train loss 0.4452527 +| epoch 1 | 1747/ 2800 batches | train loss 0.4974204 +| epoch 1 | 1751/ 2800 batches | train loss 0.5425600 +| epoch 1 | 1755/ 2800 batches | train loss 0.5056432 +| epoch 1 | 1759/ 2800 batches | train loss 0.4303957 +| epoch 1 | 1763/ 2800 batches | train loss 0.4063053 +| epoch 1 | 1767/ 2800 batches | train loss 0.5456790 +| epoch 1 | 1771/ 2800 batches | train loss 0.4494587 +| epoch 1 | 1775/ 2800 batches | train loss 0.6022917 +| epoch 1 | 1779/ 2800 batches | train loss 0.4890642 +| epoch 1 | 1783/ 2800 batches | train loss 0.3809088 +| epoch 1 | 1787/ 2800 batches | train loss 0.4207139 +| epoch 1 | 1791/ 2800 batches | train loss 0.4034679 +| epoch 1 | 1795/ 2800 batches | train loss 0.5450027 +| epoch 1 | 1799/ 2800 batches | train loss 0.3909850 +| epoch 1 | 1803/ 2800 batches | train loss 0.5502327 +| epoch 1 | 1807/ 2800 batches | train loss 0.5444447 +| epoch 1 | 1811/ 2800 batches | train loss 0.4216071 +| epoch 1 | 1815/ 2800 batches | train loss 0.5080860 +| epoch 1 | 1819/ 2800 batches | train loss 0.4855970 +| epoch 1 | 1823/ 2800 batches | train loss 0.5180454 +| epoch 1 | 1827/ 2800 batches | train loss 0.4907956 +| epoch 1 | 1831/ 2800 batches | train loss 0.5076772 +| epoch 1 | 1835/ 2800 batches | train loss 0.3766361 +| epoch 1 | 1839/ 2800 batches | train loss 0.5267165 +| epoch 1 | 1843/ 2800 batches | train loss 0.5928068 +| epoch 1 | 1847/ 2800 batches | train loss 0.4911760 +| epoch 1 | 1851/ 2800 batches | train loss 0.5559819 +| epoch 1 | 1855/ 2800 batches | train loss 0.3670713 +| epoch 1 | 1859/ 2800 batches | train loss 0.4184081 +| epoch 1 | 1863/ 2800 batches | train loss 0.5234499 +| epoch 1 | 1867/ 2800 batches | train loss 0.4623513 +| epoch 1 | 1871/ 2800 batches | train loss 0.5511841 +| epoch 1 | 1875/ 2800 batches | train loss 0.5764277 +| epoch 1 | 1879/ 2800 batches | train loss 0.3821413 +| epoch 1 | 1883/ 2800 batches | train loss 0.4225038 +| epoch 1 | 1887/ 2800 batches | train loss 0.4661316 +| epoch 1 | 1891/ 2800 batches | train loss 0.4851467 +| epoch 1 | 1895/ 2800 batches | train loss 0.5933904 +| epoch 1 | 1899/ 2800 batches | train loss 0.4124867 +| epoch 1 | 1903/ 2800 batches | train loss 0.5034956 +| epoch 1 | 1907/ 2800 batches | train loss 0.4745803 +| epoch 1 | 1911/ 2800 batches | train loss 0.5585215 +| epoch 1 | 1915/ 2800 batches | train loss 0.4801933 +| epoch 1 | 1919/ 2800 batches | train loss 0.4399416 +| epoch 1 | 1923/ 2800 batches | train loss 0.5239004 +| epoch 1 | 1927/ 2800 batches | train loss 0.3828907 +| epoch 1 | 1931/ 2800 batches | train loss 0.4754931 +| epoch 1 | 1935/ 2800 batches | train loss 0.4400192 +| epoch 1 | 1939/ 2800 batches | train loss 0.4886030 +| epoch 1 | 1943/ 2800 batches | train loss 0.4277847 +| epoch 1 | 1947/ 2800 batches | train loss 0.5094630 +| epoch 1 | 1951/ 2800 batches | train loss 0.4349436 +| epoch 1 | 1955/ 2800 batches | train loss 0.4664753 +| epoch 1 | 1959/ 2800 batches | train loss 0.4901925 +| epoch 1 | 1963/ 2800 batches | train loss 0.4557864 +| epoch 1 | 1967/ 2800 batches | train loss 0.5686505 +| epoch 1 | 1971/ 2800 batches | train loss 0.4858281 +| epoch 1 | 1975/ 2800 batches | train loss 0.3789474 +| epoch 1 | 1979/ 2800 batches | train loss 0.5011266 +| epoch 1 | 1983/ 2800 batches | train loss 0.4309384 +| epoch 1 | 1987/ 2800 batches | train loss 0.5295305 +| epoch 1 | 1991/ 2800 batches | train loss 0.5177435 +| epoch 1 | 1995/ 2800 batches | train loss 0.4946018 +| epoch 1 | 1999/ 2800 batches | train loss 0.4653175 +| epoch 1 | 2003/ 2800 batches | train loss 0.4632832 +| epoch 1 | 2007/ 2800 batches | train loss 0.4700167 +| epoch 1 | 2011/ 2800 batches | train loss 0.5190422 +| epoch 1 | 2015/ 2800 batches | train loss 0.5417208 +| epoch 1 | 2019/ 2800 batches | train loss 0.4587304 +| epoch 1 | 2023/ 2800 batches | train loss 0.4845886 +| epoch 1 | 2027/ 2800 batches | train loss 0.5450559 +| epoch 1 | 2031/ 2800 batches | train loss 0.5054083 +| epoch 1 | 2035/ 2800 batches | train loss 0.5744972 +| epoch 1 | 2039/ 2800 batches | train loss 0.5357602 +| epoch 1 | 2043/ 2800 batches | train loss 0.4193232 +| epoch 1 | 2047/ 2800 batches | train loss 0.5251123 +| epoch 1 | 2051/ 2800 batches | train loss 0.4005441 +| epoch 1 | 2055/ 2800 batches | train loss 0.4099370 +| epoch 1 | 2059/ 2800 batches | train loss 0.3738704 +| epoch 1 | 2063/ 2800 batches | train loss 0.5257016 +| epoch 1 | 2067/ 2800 batches | train loss 0.4130371 +| epoch 1 | 2071/ 2800 batches | train loss 0.4398539 +| epoch 1 | 2075/ 2800 batches | train loss 0.4289374 +| epoch 1 | 2079/ 2800 batches | train loss 0.4197134 +| epoch 1 | 2083/ 2800 batches | train loss 0.3993872 +| epoch 1 | 2087/ 2800 batches | train loss 0.4854288 +| epoch 1 | 2091/ 2800 batches | train loss 0.4496441 +| epoch 1 | 2095/ 2800 batches | train loss 0.4856478 +| epoch 1 | 2099/ 2800 batches | train loss 0.4921284 +| epoch 1 | 2103/ 2800 batches | train loss 0.4764284 +| epoch 1 | 2107/ 2800 batches | train loss 0.4892685 +| epoch 1 | 2111/ 2800 batches | train loss 0.4914037 +| epoch 1 | 2115/ 2800 batches | train loss 0.5013446 +| epoch 1 | 2119/ 2800 batches | train loss 0.3902774 +| epoch 1 | 2123/ 2800 batches | train loss 0.3884445 +| epoch 1 | 2127/ 2800 batches | train loss 0.3748969 +| epoch 1 | 2131/ 2800 batches | train loss 0.4582627 +| epoch 1 | 2135/ 2800 batches | train loss 0.5142173 +| epoch 1 | 2139/ 2800 batches | train loss 0.4251509 +| epoch 1 | 2143/ 2800 batches | train loss 0.4165471 +| epoch 1 | 2147/ 2800 batches | train loss 0.4746243 +| epoch 1 | 2151/ 2800 batches | train loss 0.4607552 +| epoch 1 | 2155/ 2800 batches | train loss 0.3066052 +| epoch 1 | 2159/ 2800 batches | train loss 0.5815446 +| epoch 1 | 2163/ 2800 batches | train loss 0.4837364 +| epoch 1 | 2167/ 2800 batches | train loss 0.5107360 +| epoch 1 | 2171/ 2800 batches | train loss 0.4566422 +| epoch 1 | 2175/ 2800 batches | train loss 0.5482365 +| epoch 1 | 2179/ 2800 batches | train loss 0.4303191 +| epoch 1 | 2183/ 2800 batches | train loss 0.4317698 +| epoch 1 | 2187/ 2800 batches | train loss 0.4599772 +| epoch 1 | 2191/ 2800 batches | train loss 0.5445828 +| epoch 1 | 2195/ 2800 batches | train loss 0.5723112 +| epoch 1 | 2199/ 2800 batches | train loss 0.5769248 +| epoch 1 | 2203/ 2800 batches | train loss 0.3680879 +| epoch 1 | 2207/ 2800 batches | train loss 0.5597544 +| epoch 1 | 2211/ 2800 batches | train loss 0.3980556 +| epoch 1 | 2215/ 2800 batches | train loss 0.4197377 +| epoch 1 | 2219/ 2800 batches | train loss 0.4766383 +| epoch 1 | 2223/ 2800 batches | train loss 0.5184728 +| epoch 1 | 2227/ 2800 batches | train loss 0.4333090 +| epoch 1 | 2231/ 2800 batches | train loss 0.3724797 +| epoch 1 | 2235/ 2800 batches | train loss 0.3974508 +| epoch 1 | 2239/ 2800 batches | train loss 0.6650735 +| epoch 1 | 2243/ 2800 batches | train loss 0.5277365 +| epoch 1 | 2247/ 2800 batches | train loss 0.4792911 +| epoch 1 | 2251/ 2800 batches | train loss 0.4633062 +| epoch 1 | 2255/ 2800 batches | train loss 0.3578637 +| epoch 1 | 2259/ 2800 batches | train loss 0.4610165 +| epoch 1 | 2263/ 2800 batches | train loss 0.4814970 +| epoch 1 | 2267/ 2800 batches | train loss 0.3987545 +| epoch 1 | 2271/ 2800 batches | train loss 0.4483413 +| epoch 1 | 2275/ 2800 batches | train loss 0.4825107 +| epoch 1 | 2279/ 2800 batches | train loss 0.4915794 +| epoch 1 | 2283/ 2800 batches | train loss 0.4244657 +| epoch 1 | 2287/ 2800 batches | train loss 0.4368293 +| epoch 1 | 2291/ 2800 batches | train loss 0.4233796 +| epoch 1 | 2295/ 2800 batches | train loss 0.4912027 +| epoch 1 | 2299/ 2800 batches | train loss 0.4952728 +| epoch 1 | 2303/ 2800 batches | train loss 0.4700705 +| epoch 1 | 2307/ 2800 batches | train loss 0.5037056 +| epoch 1 | 2311/ 2800 batches | train loss 0.4026176 +| epoch 1 | 2315/ 2800 batches | train loss 0.5475981 +| epoch 1 | 2319/ 2800 batches | train loss 0.3622584 +| epoch 1 | 2323/ 2800 batches | train loss 0.5203458 +| epoch 1 | 2327/ 2800 batches | train loss 0.4327952 +| epoch 1 | 2331/ 2800 batches | train loss 0.5341064 +| epoch 1 | 2335/ 2800 batches | train loss 0.4215441 +| epoch 1 | 2339/ 2800 batches | train loss 0.4532116 +| epoch 1 | 2343/ 2800 batches | train loss 0.4524862 +| epoch 1 | 2347/ 2800 batches | train loss 0.4146232 +| epoch 1 | 2351/ 2800 batches | train loss 0.3421383 +| epoch 1 | 2355/ 2800 batches | train loss 0.4494522 +| epoch 1 | 2359/ 2800 batches | train loss 0.4136313 +| epoch 1 | 2363/ 2800 batches | train loss 0.4527032 +| epoch 1 | 2367/ 2800 batches | train loss 0.4626319 +| epoch 1 | 2371/ 2800 batches | train loss 0.4903069 +| epoch 1 | 2375/ 2800 batches | train loss 0.5853989 +| epoch 1 | 2379/ 2800 batches | train loss 0.3992636 +| epoch 1 | 2383/ 2800 batches | train loss 0.4386411 +| epoch 1 | 2387/ 2800 batches | train loss 0.4699988 +| epoch 1 | 2391/ 2800 batches | train loss 0.5004065 +| epoch 1 | 2395/ 2800 batches | train loss 0.4129267 +| epoch 1 | 2399/ 2800 batches | train loss 0.4193683 +| epoch 1 | 2403/ 2800 batches | train loss 0.3865312 +| epoch 1 | 2407/ 2800 batches | train loss 0.4726045 +| epoch 1 | 2411/ 2800 batches | train loss 0.4644837 +| epoch 1 | 2415/ 2800 batches | train loss 0.4846400 +| epoch 1 | 2419/ 2800 batches | train loss 0.3938409 +| epoch 1 | 2423/ 2800 batches | train loss 0.5889875 +| epoch 1 | 2427/ 2800 batches | train loss 0.5193507 +| epoch 1 | 2431/ 2800 batches | train loss 0.4469704 +| epoch 1 | 2435/ 2800 batches | train loss 0.5168564 +| epoch 1 | 2439/ 2800 batches | train loss 0.4560899 +| epoch 1 | 2443/ 2800 batches | train loss 0.5136650 +| epoch 1 | 2447/ 2800 batches | train loss 0.4383927 +| epoch 1 | 2451/ 2800 batches | train loss 0.4315455 +| epoch 1 | 2455/ 2800 batches | train loss 0.4297639 +| epoch 1 | 2459/ 2800 batches | train loss 0.4769897 +| epoch 1 | 2463/ 2800 batches | train loss 0.3935646 +| epoch 1 | 2467/ 2800 batches | train loss 0.5282755 +| epoch 1 | 2471/ 2800 batches | train loss 0.4505222 +| epoch 1 | 2475/ 2800 batches | train loss 0.4526175 +| epoch 1 | 2479/ 2800 batches | train loss 0.6197653 +| epoch 1 | 2483/ 2800 batches | train loss 0.4542735 +| epoch 1 | 2487/ 2800 batches | train loss 0.4859233 +| epoch 1 | 2491/ 2800 batches | train loss 0.4174760 +| epoch 1 | 2495/ 2800 batches | train loss 0.5244979 +| epoch 1 | 2499/ 2800 batches | train loss 0.3654686 +| epoch 1 | 2503/ 2800 batches | train loss 0.4200141 +| epoch 1 | 2507/ 2800 batches | train loss 0.5117890 +| epoch 1 | 2511/ 2800 batches | train loss 0.4742432 +| epoch 1 | 2515/ 2800 batches | train loss 0.4142436 +| epoch 1 | 2519/ 2800 batches | train loss 0.5208866 +| epoch 1 | 2523/ 2800 batches | train loss 0.4360470 +| epoch 1 | 2527/ 2800 batches | train loss 0.5163345 +| epoch 1 | 2531/ 2800 batches | train loss 0.4211277 +| epoch 1 | 2535/ 2800 batches | train loss 0.5054313 +| epoch 1 | 2539/ 2800 batches | train loss 0.3528347 +| epoch 1 | 2543/ 2800 batches | train loss 0.5529426 +| epoch 1 | 2547/ 2800 batches | train loss 0.4130142 +| epoch 1 | 2551/ 2800 batches | train loss 0.3894074 +| epoch 1 | 2555/ 2800 batches | train loss 0.6009760 +| epoch 1 | 2559/ 2800 batches | train loss 0.4975429 +| epoch 1 | 2563/ 2800 batches | train loss 0.4754961 +| epoch 1 | 2567/ 2800 batches | train loss 0.5481845 +| epoch 1 | 2571/ 2800 batches | train loss 0.4959910 +| epoch 1 | 2575/ 2800 batches | train loss 0.5427504 +| epoch 1 | 2579/ 2800 batches | train loss 0.5441189 +| epoch 1 | 2583/ 2800 batches | train loss 0.5845964 +| epoch 1 | 2587/ 2800 batches | train loss 0.5071992 +| epoch 1 | 2591/ 2800 batches | train loss 0.4657200 +| epoch 1 | 2595/ 2800 batches | train loss 0.4425658 +| epoch 1 | 2599/ 2800 batches | train loss 0.4127115 +| epoch 1 | 2603/ 2800 batches | train loss 0.4408393 +| epoch 1 | 2607/ 2800 batches | train loss 0.4490016 +| epoch 1 | 2611/ 2800 batches | train loss 0.4873215 +| epoch 1 | 2615/ 2800 batches | train loss 0.4152213 +| epoch 1 | 2619/ 2800 batches | train loss 0.4459743 +| epoch 1 | 2623/ 2800 batches | train loss 0.4236138 +| epoch 1 | 2627/ 2800 batches | train loss 0.4738506 +| epoch 1 | 2631/ 2800 batches | train loss 0.5054228 +| epoch 1 | 2635/ 2800 batches | train loss 0.3984571 +| epoch 1 | 2639/ 2800 batches | train loss 0.3876235 +| epoch 1 | 2643/ 2800 batches | train loss 0.4584529 +| epoch 1 | 2647/ 2800 batches | train loss 0.4252188 +| epoch 1 | 2651/ 2800 batches | train loss 0.4967329 +| epoch 1 | 2655/ 2800 batches | train loss 0.5413947 +| epoch 1 | 2659/ 2800 batches | train loss 0.4533857 +| epoch 1 | 2663/ 2800 batches | train loss 0.6355494 +| epoch 1 | 2667/ 2800 batches | train loss 0.4356295 +| epoch 1 | 2671/ 2800 batches | train loss 0.6820162 +| epoch 1 | 2675/ 2800 batches | train loss 0.5706841 +| epoch 1 | 2679/ 2800 batches | train loss 0.4585831 +| epoch 1 | 2683/ 2800 batches | train loss 0.3186634 +| epoch 1 | 2687/ 2800 batches | train loss 0.3997449 +| epoch 1 | 2691/ 2800 batches | train loss 0.4683376 +| epoch 1 | 2695/ 2800 batches | train loss 0.4691739 +| epoch 1 | 2699/ 2800 batches | train loss 0.5413548 +| epoch 1 | 2703/ 2800 batches | train loss 0.4501043 +| epoch 1 | 2707/ 2800 batches | train loss 0.4432201 +| epoch 1 | 2711/ 2800 batches | train loss 0.5404228 +| epoch 1 | 2715/ 2800 batches | train loss 0.4622658 +| epoch 1 | 2719/ 2800 batches | train loss 0.5236773 +| epoch 1 | 2723/ 2800 batches | train loss 0.4130864 +| epoch 1 | 2727/ 2800 batches | train loss 0.4464462 +| epoch 1 | 2731/ 2800 batches | train loss 0.4367320 +| epoch 1 | 2735/ 2800 batches | train loss 0.4305120 +| epoch 1 | 2739/ 2800 batches | train loss 0.5565162 +| epoch 1 | 2743/ 2800 batches | train loss 0.4750898 +| epoch 1 | 2747/ 2800 batches | train loss 0.5436531 +| epoch 1 | 2751/ 2800 batches | train loss 0.4176089 +| epoch 1 | 2755/ 2800 batches | train loss 0.4328041 +| epoch 1 | 2759/ 2800 batches | train loss 0.4464582 +| epoch 1 | 2763/ 2800 batches | train loss 0.4001442 +| epoch 1 | 2767/ 2800 batches | train loss 0.5567628 +| epoch 1 | 2771/ 2800 batches | train loss 0.3609576 +| epoch 1 | 2775/ 2800 batches | train loss 0.4867220 +| epoch 1 | 2779/ 2800 batches | train loss 0.4296061 +| epoch 1 | 2783/ 2800 batches | train loss 0.4446099 +| epoch 1 | 2787/ 2800 batches | train loss 0.4708154 +| epoch 1 | 2791/ 2800 batches | train loss 0.4840951 +| epoch 1 | 2795/ 2800 batches | train loss 0.4474767 +| epoch 1 | 2799/ 2800 batches | train loss 0.4167202 +-------------------------------------------------------------------------------- +| epoch 1 | 3/ 2800 batches | test loss 0.4389935 +| epoch 1 | 7/ 2800 batches | test loss 0.5194660 +| epoch 1 | 11/ 2800 batches | test loss 0.4683138 +| epoch 1 | 15/ 2800 batches | test loss 0.5024632 +| epoch 1 | 19/ 2800 batches | test loss 0.4583189 +| epoch 1 | 23/ 2800 batches | test loss 0.5220083 +| epoch 1 | 27/ 2800 batches | test loss 0.4801008 +| epoch 1 | 31/ 2800 batches | test loss 0.3478616 +| epoch 1 | 35/ 2800 batches | test loss 0.4286295 +| epoch 1 | 39/ 2800 batches | test loss 0.4523042 +| epoch 1 | 43/ 2800 batches | test loss 0.4276894 +| epoch 1 | 47/ 2800 batches | test loss 0.4007383 +| epoch 1 | 51/ 2800 batches | test loss 0.3861062 +| epoch 1 | 55/ 2800 batches | test loss 0.5710780 +| epoch 1 | 59/ 2800 batches | test loss 0.4883986 +| epoch 1 | 63/ 2800 batches | test loss 0.5046685 +| epoch 1 | 67/ 2800 batches | test loss 0.4307051 +| epoch 1 | 71/ 2800 batches | test loss 0.4904421 +| epoch 1 | 75/ 2800 batches | test loss 0.4707628 +| epoch 1 | 79/ 2800 batches | test loss 0.4412766 +| epoch 1 | 83/ 2800 batches | test loss 0.4075232 +| epoch 1 | 87/ 2800 batches | test loss 0.3715004 +| epoch 1 | 91/ 2800 batches | test loss 0.4623886 +| epoch 1 | 95/ 2800 batches | test loss 0.4885040 +| epoch 1 | 99/ 2800 batches | test loss 0.5172502 +| epoch 1 | 103/ 2800 batches | test loss 0.4030899 +| epoch 1 | 107/ 2800 batches | test loss 0.4450304 +| epoch 1 | 111/ 2800 batches | test loss 0.4321930 +| epoch 1 | 115/ 2800 batches | test loss 0.4108972 +| epoch 1 | 119/ 2800 batches | test loss 0.4160588 +| epoch 1 | 123/ 2800 batches | test loss 0.4649709 +| epoch 1 | 127/ 2800 batches | test loss 0.5010011 +| epoch 1 | 131/ 2800 batches | test loss 0.4095434 +| epoch 1 | 135/ 2800 batches | test loss 0.6460873 +| epoch 1 | 139/ 2800 batches | test loss 0.5756428 +| epoch 1 | 143/ 2800 batches | test loss 0.3960165 +| epoch 1 | 147/ 2800 batches | test loss 0.3376868 +| epoch 1 | 151/ 2800 batches | test loss 0.4334392 +| epoch 1 | 155/ 2800 batches | test loss 0.4322112 +| epoch 1 | 159/ 2800 batches | test loss 0.4947662 +| epoch 1 | 163/ 2800 batches | test loss 0.5144436 +| epoch 1 | 167/ 2800 batches | test loss 0.4758366 +| epoch 1 | 171/ 2800 batches | test loss 0.4796649 +| epoch 1 | 175/ 2800 batches | test loss 0.5027733 +| epoch 1 | 179/ 2800 batches | test loss 0.4086775 +| epoch 1 | 183/ 2800 batches | test loss 0.4784831 +| epoch 1 | 187/ 2800 batches | test loss 0.4198326 +| epoch 1 | 191/ 2800 batches | test loss 0.5418273 +| epoch 1 | 195/ 2800 batches | test loss 0.4621742 +| epoch 1 | 199/ 2800 batches | test loss 0.4606395 +| epoch 1 | 203/ 2800 batches | test loss 0.4613924 +| epoch 1 | 207/ 2800 batches | test loss 0.4619678 +| epoch 1 | 211/ 2800 batches | test loss 0.4321483 +| epoch 1 | 215/ 2800 batches | test loss 0.5229800 +| epoch 1 | 219/ 2800 batches | test loss 0.5070372 +| epoch 1 | 223/ 2800 batches | test loss 0.4372866 +| epoch 1 | 227/ 2800 batches | test loss 0.4831394 +| epoch 1 | 231/ 2800 batches | test loss 0.4368348 +| epoch 1 | 235/ 2800 batches | test loss 0.6279612 +| epoch 1 | 239/ 2800 batches | test loss 0.3387697 +| epoch 1 | 243/ 2800 batches | test loss 0.4457711 +| epoch 1 | 247/ 2800 batches | test loss 0.4678808 +| epoch 1 | 251/ 2800 batches | test loss 0.4331232 +| epoch 1 | 255/ 2800 batches | test loss 0.4215406 +| epoch 1 | 259/ 2800 batches | test loss 0.4822374 +| epoch 1 | 263/ 2800 batches | test loss 0.4801182 +| epoch 1 | 267/ 2800 batches | test loss 0.4879822 +| epoch 1 | 271/ 2800 batches | test loss 0.4543334 +| epoch 1 | 275/ 2800 batches | test loss 0.4401581 +| epoch 1 | 279/ 2800 batches | test loss 0.3746572 +| epoch 1 | 283/ 2800 batches | test loss 0.4807541 +| epoch 1 | 287/ 2800 batches | test loss 0.3952104 +| epoch 1 | 291/ 2800 batches | test loss 0.4909123 +| epoch 1 | 295/ 2800 batches | test loss 0.3888081 +| epoch 1 | 299/ 2800 batches | test loss 0.3951769 +| epoch 1 | 303/ 2800 batches | test loss 0.4205818 +| epoch 1 | 307/ 2800 batches | test loss 0.3915096 +| epoch 1 | 311/ 2800 batches | test loss 0.5160240 +| epoch 1 | 315/ 2800 batches | test loss 0.4151044 +| epoch 1 | 319/ 2800 batches | test loss 0.4812964 +| epoch 1 | 323/ 2800 batches | test loss 0.4828308 +| epoch 1 | 327/ 2800 batches | test loss 0.5416832 +| epoch 1 | 331/ 2800 batches | test loss 0.4422660 +| epoch 1 | 335/ 2800 batches | test loss 0.4733617 +| epoch 1 | 339/ 2800 batches | test loss 0.4626338 +| epoch 1 | 343/ 2800 batches | test loss 0.4566988 +| epoch 1 | 347/ 2800 batches | test loss 0.4721340 +| epoch 1 | 351/ 2800 batches | test loss 0.4788257 +| epoch 1 | 355/ 2800 batches | test loss 0.4314158 +| epoch 1 | 359/ 2800 batches | test loss 0.3992428 +| epoch 1 | 363/ 2800 batches | test loss 0.3754002 +| epoch 1 | 367/ 2800 batches | test loss 0.5502565 +| epoch 1 | 371/ 2800 batches | test loss 0.5334891 +| epoch 1 | 375/ 2800 batches | test loss 0.5135837 +| epoch 1 | 379/ 2800 batches | test loss 0.4262379 +| epoch 1 | 383/ 2800 batches | test loss 0.5360141 +| epoch 1 | 387/ 2800 batches | test loss 0.5229900 +| epoch 1 | 391/ 2800 batches | test loss 0.4383558 +| epoch 1 | 395/ 2800 batches | test loss 0.3795666 +| epoch 1 | 399/ 2800 batches | test loss 0.5366781 +| epoch 1 | 403/ 2800 batches | test loss 0.5604934 +| epoch 1 | 407/ 2800 batches | test loss 0.4100235 +| epoch 1 | 411/ 2800 batches | test loss 0.3643626 +| epoch 1 | 415/ 2800 batches | test loss 0.4658885 +| epoch 1 | 419/ 2800 batches | test loss 0.5432851 +| epoch 1 | 423/ 2800 batches | test loss 0.3716265 +| epoch 1 | 427/ 2800 batches | test loss 0.3859804 +| epoch 1 | 431/ 2800 batches | test loss 0.4676363 +| epoch 1 | 435/ 2800 batches | test loss 0.4236991 +| epoch 1 | 439/ 2800 batches | test loss 0.5122083 +| epoch 1 | 443/ 2800 batches | test loss 0.3619400 +| epoch 1 | 447/ 2800 batches | test loss 0.4885893 +| epoch 1 | 451/ 2800 batches | test loss 0.3971581 +| epoch 1 | 455/ 2800 batches | test loss 0.4045677 +| epoch 1 | 459/ 2800 batches | test loss 0.4229557 +| epoch 1 | 463/ 2800 batches | test loss 0.3472890 +| epoch 1 | 467/ 2800 batches | test loss 0.5496774 +| epoch 1 | 471/ 2800 batches | test loss 0.5081009 +| epoch 1 | 475/ 2800 batches | test loss 0.4414252 +| epoch 1 | 479/ 2800 batches | test loss 0.4362465 +| epoch 1 | 483/ 2800 batches | test loss 0.4700671 +| epoch 1 | 487/ 2800 batches | test loss 0.5759003 +| epoch 1 | 491/ 2800 batches | test loss 0.6114309 +| epoch 1 | 495/ 2800 batches | test loss 0.3736557 +| epoch 1 | 499/ 2800 batches | test loss 0.4822226 +| epoch 1 | 503/ 2800 batches | test loss 0.3707365 +| epoch 1 | 507/ 2800 batches | test loss 0.6087053 +| epoch 1 | 511/ 2800 batches | test loss 0.4803171 +| epoch 1 | 515/ 2800 batches | test loss 0.5396947 +| epoch 1 | 519/ 2800 batches | test loss 0.4397177 +| epoch 1 | 523/ 2800 batches | test loss 0.4799599 +| epoch 1 | 527/ 2800 batches | test loss 0.4661466 +| epoch 1 | 531/ 2800 batches | test loss 0.5257246 +| epoch 1 | 535/ 2800 batches | test loss 0.3700360 +| epoch 1 | 539/ 2800 batches | test loss 0.4651444 +| epoch 1 | 543/ 2800 batches | test loss 0.4104305 +| epoch 1 | 547/ 2800 batches | test loss 0.5328023 +| epoch 1 | 551/ 2800 batches | test loss 0.5281737 +| epoch 1 | 555/ 2800 batches | test loss 0.5038491 +| epoch 1 | 559/ 2800 batches | test loss 0.5221434 +| epoch 1 | 563/ 2800 batches | test loss 0.3962513 +| epoch 1 | 567/ 2800 batches | test loss 0.5565747 +| epoch 1 | 571/ 2800 batches | test loss 0.5031670 +| epoch 1 | 575/ 2800 batches | test loss 0.4902875 +| epoch 1 | 579/ 2800 batches | test loss 0.4984934 +| epoch 1 | 583/ 2800 batches | test loss 0.4024166 +| epoch 1 | 587/ 2800 batches | test loss 0.3783377 +| epoch 1 | 591/ 2800 batches | test loss 0.5505606 +| epoch 1 | 595/ 2800 batches | test loss 0.4974920 +| epoch 1 | 599/ 2800 batches | test loss 0.4328534 +| epoch 1 | 603/ 2800 batches | test loss 0.4685238 +| epoch 1 | 607/ 2800 batches | test loss 0.3758541 +| epoch 1 | 611/ 2800 batches | test loss 0.4484152 +| epoch 1 | 615/ 2800 batches | test loss 0.4131344 +| epoch 1 | 619/ 2800 batches | test loss 0.4697218 +| epoch 1 | 623/ 2800 batches | test loss 0.4780459 +| epoch 1 | 627/ 2800 batches | test loss 0.4131713 +| epoch 1 | 631/ 2800 batches | test loss 0.4764066 +| epoch 1 | 635/ 2800 batches | test loss 0.6041948 +| epoch 1 | 639/ 2800 batches | test loss 0.4680351 +| epoch 1 | 643/ 2800 batches | test loss 0.5711833 +| epoch 1 | 647/ 2800 batches | test loss 0.4899628 +| epoch 1 | 651/ 2800 batches | test loss 0.4993559 +| epoch 1 | 655/ 2800 batches | test loss 0.4587371 +| epoch 1 | 659/ 2800 batches | test loss 0.4944685 +| epoch 1 | 663/ 2800 batches | test loss 0.4047614 +| epoch 1 | 667/ 2800 batches | test loss 0.5389886 +| epoch 1 | 671/ 2800 batches | test loss 0.4351008 +| epoch 1 | 675/ 2800 batches | test loss 0.7072098 +| epoch 1 | 679/ 2800 batches | test loss 0.4368964 +| epoch 1 | 683/ 2800 batches | test loss 0.3888397 +| epoch 1 | 687/ 2800 batches | test loss 0.4311260 +| epoch 1 | 691/ 2800 batches | test loss 0.4485843 +| epoch 1 | 695/ 2800 batches | test loss 0.4615120 +| epoch 1 | 699/ 2800 batches | test loss 0.4668613 +| epoch 1 | final test loss 0.4650, save model! +-------------------------------------------------------------------------------- +| epoch 2 | 3/ 2800 batches | train loss 0.3811271 +| epoch 2 | 7/ 2800 batches | train loss 0.4626871 +| epoch 2 | 11/ 2800 batches | train loss 0.4321542 +| epoch 2 | 15/ 2800 batches | train loss 0.4738529 +| epoch 2 | 19/ 2800 batches | train loss 0.5035285 +| epoch 2 | 23/ 2800 batches | train loss 0.5083270 +| epoch 2 | 27/ 2800 batches | train loss 0.4635752 +| epoch 2 | 31/ 2800 batches | train loss 0.4338359 +| epoch 2 | 35/ 2800 batches | train loss 0.4221844 +| epoch 2 | 39/ 2800 batches | train loss 0.2372000 +| epoch 2 | 43/ 2800 batches | train loss 0.4530051 +| epoch 2 | 47/ 2800 batches | train loss 0.5067532 +| epoch 2 | 51/ 2800 batches | train loss 0.4170953 +| epoch 2 | 55/ 2800 batches | train loss 0.5022687 +| epoch 2 | 59/ 2800 batches | train loss 0.3325064 +| epoch 2 | 63/ 2800 batches | train loss 0.3813705 +| epoch 2 | 67/ 2800 batches | train loss 0.4416319 +| epoch 2 | 71/ 2800 batches | train loss 0.3850610 +| epoch 2 | 75/ 2800 batches | train loss 0.4744503 +| epoch 2 | 79/ 2800 batches | train loss 0.5016470 +| epoch 2 | 83/ 2800 batches | train loss 0.4532813 +| epoch 2 | 87/ 2800 batches | train loss 0.3686725 +| epoch 2 | 91/ 2800 batches | train loss 0.4442031 +| epoch 2 | 95/ 2800 batches | train loss 0.4906155 +| epoch 2 | 99/ 2800 batches | train loss 0.5631646 +| epoch 2 | 103/ 2800 batches | train loss 0.4310207 +| epoch 2 | 107/ 2800 batches | train loss 0.5695239 +| epoch 2 | 111/ 2800 batches | train loss 0.4253701 +| epoch 2 | 115/ 2800 batches | train loss 0.4891412 +| epoch 2 | 119/ 2800 batches | train loss 0.3946670 +| epoch 2 | 123/ 2800 batches | train loss 0.4630075 +| epoch 2 | 127/ 2800 batches | train loss 0.4604017 +| epoch 2 | 131/ 2800 batches | train loss 0.4818787 +| epoch 2 | 135/ 2800 batches | train loss 0.4055558 +| epoch 2 | 139/ 2800 batches | train loss 0.3748560 +| epoch 2 | 143/ 2800 batches | train loss 0.4967083 +| epoch 2 | 147/ 2800 batches | train loss 0.4350009 +| epoch 2 | 151/ 2800 batches | train loss 0.4318262 +| epoch 2 | 155/ 2800 batches | train loss 0.3718905 +| epoch 2 | 159/ 2800 batches | train loss 0.5710565 +| epoch 2 | 163/ 2800 batches | train loss 0.3476709 +| epoch 2 | 167/ 2800 batches | train loss 0.4852734 +| epoch 2 | 171/ 2800 batches | train loss 0.5023404 +| epoch 2 | 175/ 2800 batches | train loss 0.4670958 +| epoch 2 | 179/ 2800 batches | train loss 0.3796215 +| epoch 2 | 183/ 2800 batches | train loss 0.4431964 +| epoch 2 | 187/ 2800 batches | train loss 0.5190756 +| epoch 2 | 191/ 2800 batches | train loss 0.4537640 +| epoch 2 | 195/ 2800 batches | train loss 0.5042354 +| epoch 2 | 199/ 2800 batches | train loss 0.5608445 +| epoch 2 | 203/ 2800 batches | train loss 0.4666483 +| epoch 2 | 207/ 2800 batches | train loss 0.4003159 +| epoch 2 | 211/ 2800 batches | train loss 0.3788770 +| epoch 2 | 215/ 2800 batches | train loss 0.5710148 +| epoch 2 | 219/ 2800 batches | train loss 0.5255919 +| epoch 2 | 223/ 2800 batches | train loss 0.4535156 +| epoch 2 | 227/ 2800 batches | train loss 0.5011410 +| epoch 2 | 231/ 2800 batches | train loss 0.5424023 +| epoch 2 | 235/ 2800 batches | train loss 0.5243012 +| epoch 2 | 239/ 2800 batches | train loss 0.6610659 +| epoch 2 | 243/ 2800 batches | train loss 0.4520863 +| epoch 2 | 247/ 2800 batches | train loss 0.5433022 +| epoch 2 | 251/ 2800 batches | train loss 0.4587935 +| epoch 2 | 255/ 2800 batches | train loss 0.4023640 +| epoch 2 | 259/ 2800 batches | train loss 0.5713179 +| epoch 2 | 263/ 2800 batches | train loss 0.4288449 +| epoch 2 | 267/ 2800 batches | train loss 0.4122949 +| epoch 2 | 271/ 2800 batches | train loss 0.5062334 +| epoch 2 | 275/ 2800 batches | train loss 0.3920296 +| epoch 2 | 279/ 2800 batches | train loss 0.4565556 +| epoch 2 | 283/ 2800 batches | train loss 0.4895002 +| epoch 2 | 287/ 2800 batches | train loss 0.5231091 +| epoch 2 | 291/ 2800 batches | train loss 0.3591492 +| epoch 2 | 295/ 2800 batches | train loss 0.3813560 +| epoch 2 | 299/ 2800 batches | train loss 0.3594409 +| epoch 2 | 303/ 2800 batches | train loss 0.3494494 +| epoch 2 | 307/ 2800 batches | train loss 0.5405153 +| epoch 2 | 311/ 2800 batches | train loss 0.3307335 +| epoch 2 | 315/ 2800 batches | train loss 0.4751128 +| epoch 2 | 319/ 2800 batches | train loss 0.5205493 +| epoch 2 | 323/ 2800 batches | train loss 0.4238505 +| epoch 2 | 327/ 2800 batches | train loss 0.3911513 +| epoch 2 | 331/ 2800 batches | train loss 0.5225353 +| epoch 2 | 335/ 2800 batches | train loss 0.4948814 +| epoch 2 | 339/ 2800 batches | train loss 0.3758949 +| epoch 2 | 343/ 2800 batches | train loss 0.4124345 +| epoch 2 | 347/ 2800 batches | train loss 0.4318812 +| epoch 2 | 351/ 2800 batches | train loss 0.4257521 +| epoch 2 | 355/ 2800 batches | train loss 0.3913827 +| epoch 2 | 359/ 2800 batches | train loss 0.5012269 +| epoch 2 | 363/ 2800 batches | train loss 0.4095215 +| epoch 2 | 367/ 2800 batches | train loss 0.4513606 +| epoch 2 | 371/ 2800 batches | train loss 0.4354728 +| epoch 2 | 375/ 2800 batches | train loss 0.4269858 +| epoch 2 | 379/ 2800 batches | train loss 0.5116255 +| epoch 2 | 383/ 2800 batches | train loss 0.4966434 +| epoch 2 | 387/ 2800 batches | train loss 0.4653462 +| epoch 2 | 391/ 2800 batches | train loss 0.4802595 +| epoch 2 | 395/ 2800 batches | train loss 0.3893237 +| epoch 2 | 399/ 2800 batches | train loss 0.4133030 +| epoch 2 | 403/ 2800 batches | train loss 0.4303513 +| epoch 2 | 407/ 2800 batches | train loss 0.4120629 +| epoch 2 | 411/ 2800 batches | train loss 0.3476796 +| epoch 2 | 415/ 2800 batches | train loss 0.4625182 +| epoch 2 | 419/ 2800 batches | train loss 0.5092174 +| epoch 2 | 423/ 2800 batches | train loss 0.4923681 +| epoch 2 | 427/ 2800 batches | train loss 0.4343489 +| epoch 2 | 431/ 2800 batches | train loss 0.4328842 +| epoch 2 | 435/ 2800 batches | train loss 0.4543908 +| epoch 2 | 439/ 2800 batches | train loss 0.3917967 +| epoch 2 | 443/ 2800 batches | train loss 0.3841732 +| epoch 2 | 447/ 2800 batches | train loss 0.4679470 +| epoch 2 | 451/ 2800 batches | train loss 0.4726909 +| epoch 2 | 455/ 2800 batches | train loss 0.3994731 +| epoch 2 | 459/ 2800 batches | train loss 0.4043773 +| epoch 2 | 463/ 2800 batches | train loss 0.3855835 +| epoch 2 | 467/ 2800 batches | train loss 0.3548046 +| epoch 2 | 471/ 2800 batches | train loss 0.4528150 +| epoch 2 | 475/ 2800 batches | train loss 0.4370787 +| epoch 2 | 479/ 2800 batches | train loss 0.4169326 +| epoch 2 | 483/ 2800 batches | train loss 0.3988466 +| epoch 2 | 487/ 2800 batches | train loss 0.4829754 +| epoch 2 | 491/ 2800 batches | train loss 0.3973124 +| epoch 2 | 495/ 2800 batches | train loss 0.4456136 +| epoch 2 | 499/ 2800 batches | train loss 0.3788894 +| epoch 2 | 503/ 2800 batches | train loss 0.4163460 +| epoch 2 | 507/ 2800 batches | train loss 0.4652062 +| epoch 2 | 511/ 2800 batches | train loss 0.3924318 +| epoch 2 | 515/ 2800 batches | train loss 0.4481857 +| epoch 2 | 519/ 2800 batches | train loss 0.3615829 +| epoch 2 | 523/ 2800 batches | train loss 0.4455074 +| epoch 2 | 527/ 2800 batches | train loss 0.4949004 +| epoch 2 | 531/ 2800 batches | train loss 0.5353408 +| epoch 2 | 535/ 2800 batches | train loss 0.3905566 +| epoch 2 | 539/ 2800 batches | train loss 0.5079507 +| epoch 2 | 543/ 2800 batches | train loss 0.3827958 +| epoch 2 | 547/ 2800 batches | train loss 0.5245920 +| epoch 2 | 551/ 2800 batches | train loss 0.4236313 +| epoch 2 | 555/ 2800 batches | train loss 0.3924514 +| epoch 2 | 559/ 2800 batches | train loss 0.5553377 +| epoch 2 | 563/ 2800 batches | train loss 0.4315988 +| epoch 2 | 567/ 2800 batches | train loss 0.4228120 +| epoch 2 | 571/ 2800 batches | train loss 0.4491678 +| epoch 2 | 575/ 2800 batches | train loss 0.3079703 +| epoch 2 | 579/ 2800 batches | train loss 0.4612514 +| epoch 2 | 583/ 2800 batches | train loss 0.4927627 +| epoch 2 | 587/ 2800 batches | train loss 0.4942470 +| epoch 2 | 591/ 2800 batches | train loss 0.4691391 +| epoch 2 | 595/ 2800 batches | train loss 0.4805734 +| epoch 2 | 599/ 2800 batches | train loss 0.4383348 +| epoch 2 | 603/ 2800 batches | train loss 0.3451718 +| epoch 2 | 607/ 2800 batches | train loss 0.4631471 +| epoch 2 | 611/ 2800 batches | train loss 0.3835837 +| epoch 2 | 615/ 2800 batches | train loss 0.3830586 +| epoch 2 | 619/ 2800 batches | train loss 0.5227854 +| epoch 2 | 623/ 2800 batches | train loss 0.5038424 +| epoch 2 | 627/ 2800 batches | train loss 0.5037646 +| epoch 2 | 631/ 2800 batches | train loss 0.3720019 +| epoch 2 | 635/ 2800 batches | train loss 0.4452391 +| epoch 2 | 639/ 2800 batches | train loss 0.4449825 +| epoch 2 | 643/ 2800 batches | train loss 0.4513757 +| epoch 2 | 647/ 2800 batches | train loss 0.3899816 +| epoch 2 | 651/ 2800 batches | train loss 0.4260798 +| epoch 2 | 655/ 2800 batches | train loss 0.5257467 +| epoch 2 | 659/ 2800 batches | train loss 0.4583325 +| epoch 2 | 663/ 2800 batches | train loss 0.3906067 +| epoch 2 | 667/ 2800 batches | train loss 0.3867030 +| epoch 2 | 671/ 2800 batches | train loss 0.6485579 +| epoch 2 | 675/ 2800 batches | train loss 0.4456132 +| epoch 2 | 679/ 2800 batches | train loss 0.5288363 +| epoch 2 | 683/ 2800 batches | train loss 0.4351309 +| epoch 2 | 687/ 2800 batches | train loss 0.4470272 +| epoch 2 | 691/ 2800 batches | train loss 0.3117336 +| epoch 2 | 695/ 2800 batches | train loss 0.3494790 +| epoch 2 | 699/ 2800 batches | train loss 0.3870350 +| epoch 2 | 703/ 2800 batches | train loss 0.5104282 +| epoch 2 | 707/ 2800 batches | train loss 0.4315245 +| epoch 2 | 711/ 2800 batches | train loss 0.4302948 +| epoch 2 | 715/ 2800 batches | train loss 0.4702298 +| epoch 2 | 719/ 2800 batches | train loss 0.4586290 +| epoch 2 | 723/ 2800 batches | train loss 0.4678326 +| epoch 2 | 727/ 2800 batches | train loss 0.4770128 +| epoch 2 | 731/ 2800 batches | train loss 0.4234307 +| epoch 2 | 735/ 2800 batches | train loss 0.5316730 +| epoch 2 | 739/ 2800 batches | train loss 0.4676037 +| epoch 2 | 743/ 2800 batches | train loss 0.4297038 +| epoch 2 | 747/ 2800 batches | train loss 0.4543464 +| epoch 2 | 751/ 2800 batches | train loss 0.5012459 +| epoch 2 | 755/ 2800 batches | train loss 0.4464392 +| epoch 2 | 759/ 2800 batches | train loss 0.4921103 +| epoch 2 | 763/ 2800 batches | train loss 0.3801484 +| epoch 2 | 767/ 2800 batches | train loss 0.5402746 +| epoch 2 | 771/ 2800 batches | train loss 0.5479714 +| epoch 2 | 775/ 2800 batches | train loss 0.4620798 +| epoch 2 | 779/ 2800 batches | train loss 0.4582528 +| epoch 2 | 783/ 2800 batches | train loss 0.4324473 +| epoch 2 | 787/ 2800 batches | train loss 0.4802330 +| epoch 2 | 791/ 2800 batches | train loss 0.4344670 +| epoch 2 | 795/ 2800 batches | train loss 0.5529969 +| epoch 2 | 799/ 2800 batches | train loss 0.4502378 +| epoch 2 | 803/ 2800 batches | train loss 0.5254643 +| epoch 2 | 807/ 2800 batches | train loss 0.3972807 +| epoch 2 | 811/ 2800 batches | train loss 0.4900045 +| epoch 2 | 815/ 2800 batches | train loss 0.4228690 +| epoch 2 | 819/ 2800 batches | train loss 0.5838322 +| epoch 2 | 823/ 2800 batches | train loss 0.4366000 +| epoch 2 | 827/ 2800 batches | train loss 0.4543967 +| epoch 2 | 831/ 2800 batches | train loss 0.4480785 +| epoch 2 | 835/ 2800 batches | train loss 0.3661053 +| epoch 2 | 839/ 2800 batches | train loss 0.6137636 +| epoch 2 | 843/ 2800 batches | train loss 0.4443342 +| epoch 2 | 847/ 2800 batches | train loss 0.4643660 +| epoch 2 | 851/ 2800 batches | train loss 0.5211540 +| epoch 2 | 855/ 2800 batches | train loss 0.4389006 +| epoch 2 | 859/ 2800 batches | train loss 0.4546298 +| epoch 2 | 863/ 2800 batches | train loss 0.4616195 +| epoch 2 | 867/ 2800 batches | train loss 0.5116366 +| epoch 2 | 871/ 2800 batches | train loss 0.4115418 +| epoch 2 | 875/ 2800 batches | train loss 0.4460102 +| epoch 2 | 879/ 2800 batches | train loss 0.4316396 +| epoch 2 | 883/ 2800 batches | train loss 0.4118907 +| epoch 2 | 887/ 2800 batches | train loss 0.4303472 +| epoch 2 | 891/ 2800 batches | train loss 0.4607024 +| epoch 2 | 895/ 2800 batches | train loss 0.5222567 +| epoch 2 | 899/ 2800 batches | train loss 0.3605507 +| epoch 2 | 903/ 2800 batches | train loss 0.4638890 +| epoch 2 | 907/ 2800 batches | train loss 0.4129083 +| epoch 2 | 911/ 2800 batches | train loss 0.4425235 +| epoch 2 | 915/ 2800 batches | train loss 0.4119598 +| epoch 2 | 919/ 2800 batches | train loss 0.3733197 +| epoch 2 | 923/ 2800 batches | train loss 0.4112264 +| epoch 2 | 927/ 2800 batches | train loss 0.4460387 +| epoch 2 | 931/ 2800 batches | train loss 0.4398082 +| epoch 2 | 935/ 2800 batches | train loss 0.5004552 +| epoch 2 | 939/ 2800 batches | train loss 0.4553304 +| epoch 2 | 943/ 2800 batches | train loss 0.5790910 +| epoch 2 | 947/ 2800 batches | train loss 0.5007447 +| epoch 2 | 951/ 2800 batches | train loss 0.4604053 +| epoch 2 | 955/ 2800 batches | train loss 0.4296958 +| epoch 2 | 959/ 2800 batches | train loss 0.4479076 +| epoch 2 | 963/ 2800 batches | train loss 0.4709558 +| epoch 2 | 967/ 2800 batches | train loss 0.4357831 +| epoch 2 | 971/ 2800 batches | train loss 0.4966828 +| epoch 2 | 975/ 2800 batches | train loss 0.4321802 +| epoch 2 | 979/ 2800 batches | train loss 0.3710250 +| epoch 2 | 983/ 2800 batches | train loss 0.4739124 +| epoch 2 | 987/ 2800 batches | train loss 0.4045529 +| epoch 2 | 991/ 2800 batches | train loss 0.3191053 +| epoch 2 | 995/ 2800 batches | train loss 0.4902273 +| epoch 2 | 999/ 2800 batches | train loss 0.4918661 +| epoch 2 | 1003/ 2800 batches | train loss 0.4142773 +| epoch 2 | 1007/ 2800 batches | train loss 0.5413485 +| epoch 2 | 1011/ 2800 batches | train loss 0.5071911 +| epoch 2 | 1015/ 2800 batches | train loss 0.4070266 +| epoch 2 | 1019/ 2800 batches | train loss 0.4756930 +| epoch 2 | 1023/ 2800 batches | train loss 0.3857906 +| epoch 2 | 1027/ 2800 batches | train loss 0.4299597 +| epoch 2 | 1031/ 2800 batches | train loss 0.5051552 +| epoch 2 | 1035/ 2800 batches | train loss 0.4009470 +| epoch 2 | 1039/ 2800 batches | train loss 0.4780328 +| epoch 2 | 1043/ 2800 batches | train loss 0.4339088 +| epoch 2 | 1047/ 2800 batches | train loss 0.5408769 +| epoch 2 | 1051/ 2800 batches | train loss 0.4530486 +| epoch 2 | 1055/ 2800 batches | train loss 0.4188107 +| epoch 2 | 1059/ 2800 batches | train loss 0.4116381 +| epoch 2 | 1063/ 2800 batches | train loss 0.4501374 +| epoch 2 | 1067/ 2800 batches | train loss 0.4127793 +| epoch 2 | 1071/ 2800 batches | train loss 0.5348634 +| epoch 2 | 1075/ 2800 batches | train loss 0.4227650 +| epoch 2 | 1079/ 2800 batches | train loss 0.4450073 +| epoch 2 | 1083/ 2800 batches | train loss 0.4420068 +| epoch 2 | 1087/ 2800 batches | train loss 0.4314218 +| epoch 2 | 1091/ 2800 batches | train loss 0.4388654 +| epoch 2 | 1095/ 2800 batches | train loss 0.4033369 +| epoch 2 | 1099/ 2800 batches | train loss 0.5100027 +| epoch 2 | 1103/ 2800 batches | train loss 0.4178987 +| epoch 2 | 1107/ 2800 batches | train loss 0.3540257 +| epoch 2 | 1111/ 2800 batches | train loss 0.5143665 +| epoch 2 | 1115/ 2800 batches | train loss 0.4359669 +| epoch 2 | 1119/ 2800 batches | train loss 0.5257991 +| epoch 2 | 1123/ 2800 batches | train loss 0.4009600 +| epoch 2 | 1127/ 2800 batches | train loss 0.4342840 +| epoch 2 | 1131/ 2800 batches | train loss 0.4797473 +| epoch 2 | 1135/ 2800 batches | train loss 0.4594472 +| epoch 2 | 1139/ 2800 batches | train loss 0.4251719 +| epoch 2 | 1143/ 2800 batches | train loss 0.4105916 +| epoch 2 | 1147/ 2800 batches | train loss 0.4129993 +| epoch 2 | 1151/ 2800 batches | train loss 0.4815417 +| epoch 2 | 1155/ 2800 batches | train loss 0.4951254 +| epoch 2 | 1159/ 2800 batches | train loss 0.4906404 +| epoch 2 | 1163/ 2800 batches | train loss 0.4822955 +| epoch 2 | 1167/ 2800 batches | train loss 0.4639271 +| epoch 2 | 1171/ 2800 batches | train loss 0.4527554 +| epoch 2 | 1175/ 2800 batches | train loss 0.4533165 +| epoch 2 | 1179/ 2800 batches | train loss 0.5016596 +| epoch 2 | 1183/ 2800 batches | train loss 0.4757650 +| epoch 2 | 1187/ 2800 batches | train loss 0.5421224 +| epoch 2 | 1191/ 2800 batches | train loss 0.5113759 +| epoch 2 | 1195/ 2800 batches | train loss 0.5321848 +| epoch 2 | 1199/ 2800 batches | train loss 0.4173613 +| epoch 2 | 1203/ 2800 batches | train loss 0.4127053 +| epoch 2 | 1207/ 2800 batches | train loss 0.4049380 +| epoch 2 | 1211/ 2800 batches | train loss 0.3863762 +| epoch 2 | 1215/ 2800 batches | train loss 0.3898436 +| epoch 2 | 1219/ 2800 batches | train loss 0.4204020 +| epoch 2 | 1223/ 2800 batches | train loss 0.4988259 +| epoch 2 | 1227/ 2800 batches | train loss 0.5403289 +| epoch 2 | 1231/ 2800 batches | train loss 0.3829378 +| epoch 2 | 1235/ 2800 batches | train loss 0.4849150 +| epoch 2 | 1239/ 2800 batches | train loss 0.4872727 +| epoch 2 | 1243/ 2800 batches | train loss 0.4097850 +| epoch 2 | 1247/ 2800 batches | train loss 0.4776702 +| epoch 2 | 1251/ 2800 batches | train loss 0.4591105 +| epoch 2 | 1255/ 2800 batches | train loss 0.4080397 +| epoch 2 | 1259/ 2800 batches | train loss 0.3772192 +| epoch 2 | 1263/ 2800 batches | train loss 0.4674986 +| epoch 2 | 1267/ 2800 batches | train loss 0.4930409 +| epoch 2 | 1271/ 2800 batches | train loss 0.4906058 +| epoch 2 | 1275/ 2800 batches | train loss 0.4214104 +| epoch 2 | 1279/ 2800 batches | train loss 0.4333375 +| epoch 2 | 1283/ 2800 batches | train loss 0.4052027 +| epoch 2 | 1287/ 2800 batches | train loss 0.4910396 +| epoch 2 | 1291/ 2800 batches | train loss 0.5337358 +| epoch 2 | 1295/ 2800 batches | train loss 0.4001134 +| epoch 2 | 1299/ 2800 batches | train loss 0.4322607 +| epoch 2 | 1303/ 2800 batches | train loss 0.5621481 +| epoch 2 | 1307/ 2800 batches | train loss 0.3491707 +| epoch 2 | 1311/ 2800 batches | train loss 0.5097631 +| epoch 2 | 1315/ 2800 batches | train loss 0.3973177 +| epoch 2 | 1319/ 2800 batches | train loss 0.5470278 +| epoch 2 | 1323/ 2800 batches | train loss 0.4985687 +| epoch 2 | 1327/ 2800 batches | train loss 0.4247349 +| epoch 2 | 1331/ 2800 batches | train loss 0.4936565 +| epoch 2 | 1335/ 2800 batches | train loss 0.3893157 +| epoch 2 | 1339/ 2800 batches | train loss 0.4032789 +| epoch 2 | 1343/ 2800 batches | train loss 0.5196466 +| epoch 2 | 1347/ 2800 batches | train loss 0.4721280 +| epoch 2 | 1351/ 2800 batches | train loss 0.4582187 +| epoch 2 | 1355/ 2800 batches | train loss 0.4760795 +| epoch 2 | 1359/ 2800 batches | train loss 0.4438350 +| epoch 2 | 1363/ 2800 batches | train loss 0.4564022 +| epoch 2 | 1367/ 2800 batches | train loss 0.4408749 +| epoch 2 | 1371/ 2800 batches | train loss 0.4664350 +| epoch 2 | 1375/ 2800 batches | train loss 0.5058856 +| epoch 2 | 1379/ 2800 batches | train loss 0.4265297 +| epoch 2 | 1383/ 2800 batches | train loss 0.3917756 +| epoch 2 | 1387/ 2800 batches | train loss 0.4549355 +| epoch 2 | 1391/ 2800 batches | train loss 0.4342715 +| epoch 2 | 1395/ 2800 batches | train loss 0.4062056 +| epoch 2 | 1399/ 2800 batches | train loss 0.3928122 +| epoch 2 | 1403/ 2800 batches | train loss 0.4627220 +| epoch 2 | 1407/ 2800 batches | train loss 0.4615152 +| epoch 2 | 1411/ 2800 batches | train loss 0.5097424 +| epoch 2 | 1415/ 2800 batches | train loss 0.4075157 +| epoch 2 | 1419/ 2800 batches | train loss 0.4620005 +| epoch 2 | 1423/ 2800 batches | train loss 0.4437301 +| epoch 2 | 1427/ 2800 batches | train loss 0.4406903 +| epoch 2 | 1431/ 2800 batches | train loss 0.3956366 +| epoch 2 | 1435/ 2800 batches | train loss 0.4059104 +| epoch 2 | 1439/ 2800 batches | train loss 0.4428267 +| epoch 2 | 1443/ 2800 batches | train loss 0.2921286 +| epoch 2 | 1447/ 2800 batches | train loss 0.3696012 +| epoch 2 | 1451/ 2800 batches | train loss 0.4319737 +| epoch 2 | 1455/ 2800 batches | train loss 0.3785365 +| epoch 2 | 1459/ 2800 batches | train loss 0.4959877 +| epoch 2 | 1463/ 2800 batches | train loss 0.4546932 +| epoch 2 | 1467/ 2800 batches | train loss 0.3588575 +| epoch 2 | 1471/ 2800 batches | train loss 0.5378387 +| epoch 2 | 1475/ 2800 batches | train loss 0.4712236 +| epoch 2 | 1479/ 2800 batches | train loss 0.4101979 +| epoch 2 | 1483/ 2800 batches | train loss 0.4618784 +| epoch 2 | 1487/ 2800 batches | train loss 0.3573419 +| epoch 2 | 1491/ 2800 batches | train loss 0.4951983 +| epoch 2 | 1495/ 2800 batches | train loss 0.4372171 +| epoch 2 | 1499/ 2800 batches | train loss 0.4258381 +| epoch 2 | 1503/ 2800 batches | train loss 0.3454111 +| epoch 2 | 1507/ 2800 batches | train loss 0.4726451 +| epoch 2 | 1511/ 2800 batches | train loss 0.4213558 +| epoch 2 | 1515/ 2800 batches | train loss 0.5246319 +| epoch 2 | 1519/ 2800 batches | train loss 0.4104688 +| epoch 2 | 1523/ 2800 batches | train loss 0.4747547 +| epoch 2 | 1527/ 2800 batches | train loss 0.5246226 +| epoch 2 | 1531/ 2800 batches | train loss 0.5033349 +| epoch 2 | 1535/ 2800 batches | train loss 0.4993302 +| epoch 2 | 1539/ 2800 batches | train loss 0.4779934 +| epoch 2 | 1543/ 2800 batches | train loss 0.5790686 +| epoch 2 | 1547/ 2800 batches | train loss 0.5288104 +| epoch 2 | 1551/ 2800 batches | train loss 0.4533915 +| epoch 2 | 1555/ 2800 batches | train loss 0.5078993 +| epoch 2 | 1559/ 2800 batches | train loss 0.4795070 +| epoch 2 | 1563/ 2800 batches | train loss 0.4232819 +| epoch 2 | 1567/ 2800 batches | train loss 0.3764274 +| epoch 2 | 1571/ 2800 batches | train loss 0.5021323 +| epoch 2 | 1575/ 2800 batches | train loss 0.4974982 +| epoch 2 | 1579/ 2800 batches | train loss 0.4793032 +| epoch 2 | 1583/ 2800 batches | train loss 0.5141742 +| epoch 2 | 1587/ 2800 batches | train loss 0.4893372 +| epoch 2 | 1591/ 2800 batches | train loss 0.4503145 +| epoch 2 | 1595/ 2800 batches | train loss 0.4082649 +| epoch 2 | 1599/ 2800 batches | train loss 0.4601863 +| epoch 2 | 1603/ 2800 batches | train loss 0.4168353 +| epoch 2 | 1607/ 2800 batches | train loss 0.4394351 +| epoch 2 | 1611/ 2800 batches | train loss 0.4442509 +| epoch 2 | 1615/ 2800 batches | train loss 0.4882978 +| epoch 2 | 1619/ 2800 batches | train loss 0.4056796 +| epoch 2 | 1623/ 2800 batches | train loss 0.3863330 +| epoch 2 | 1627/ 2800 batches | train loss 0.4635281 +| epoch 2 | 1631/ 2800 batches | train loss 0.4351618 +| epoch 2 | 1635/ 2800 batches | train loss 0.4435634 +| epoch 2 | 1639/ 2800 batches | train loss 0.5021278 +| epoch 2 | 1643/ 2800 batches | train loss 0.4388962 +| epoch 2 | 1647/ 2800 batches | train loss 0.4260520 +| epoch 2 | 1651/ 2800 batches | train loss 0.4617295 +| epoch 2 | 1655/ 2800 batches | train loss 0.4591645 +| epoch 2 | 1659/ 2800 batches | train loss 0.4438442 +| epoch 2 | 1663/ 2800 batches | train loss 0.4395856 +| epoch 2 | 1667/ 2800 batches | train loss 0.4870393 +| epoch 2 | 1671/ 2800 batches | train loss 0.4848451 +| epoch 2 | 1675/ 2800 batches | train loss 0.3930767 +| epoch 2 | 1679/ 2800 batches | train loss 0.4075162 +| epoch 2 | 1683/ 2800 batches | train loss 0.4710853 +| epoch 2 | 1687/ 2800 batches | train loss 0.5022917 +| epoch 2 | 1691/ 2800 batches | train loss 0.4624565 +| epoch 2 | 1695/ 2800 batches | train loss 0.6241195 +| epoch 2 | 1699/ 2800 batches | train loss 0.6390396 +| epoch 2 | 1703/ 2800 batches | train loss 0.5943784 +| epoch 2 | 1707/ 2800 batches | train loss 0.4372118 +| epoch 2 | 1711/ 2800 batches | train loss 0.4030561 +| epoch 2 | 1715/ 2800 batches | train loss 0.4809677 +| epoch 2 | 1719/ 2800 batches | train loss 0.4705356 +| epoch 2 | 1723/ 2800 batches | train loss 0.4734679 +| epoch 2 | 1727/ 2800 batches | train loss 0.5081261 +| epoch 2 | 1731/ 2800 batches | train loss 0.4116316 +| epoch 2 | 1735/ 2800 batches | train loss 0.3954395 +| epoch 2 | 1739/ 2800 batches | train loss 0.5220802 +| epoch 2 | 1743/ 2800 batches | train loss 0.4156252 +| epoch 2 | 1747/ 2800 batches | train loss 0.5260314 +| epoch 2 | 1751/ 2800 batches | train loss 0.4595739 +| epoch 2 | 1755/ 2800 batches | train loss 0.3855965 +| epoch 2 | 1759/ 2800 batches | train loss 0.4500840 +| epoch 2 | 1763/ 2800 batches | train loss 0.4817352 +| epoch 2 | 1767/ 2800 batches | train loss 0.4550061 +| epoch 2 | 1771/ 2800 batches | train loss 0.4206291 +| epoch 2 | 1775/ 2800 batches | train loss 0.4482178 +| epoch 2 | 1779/ 2800 batches | train loss 0.4180829 +| epoch 2 | 1783/ 2800 batches | train loss 0.4566343 +| epoch 2 | 1787/ 2800 batches | train loss 0.3850636 +| epoch 2 | 1791/ 2800 batches | train loss 0.4627444 +| epoch 2 | 1795/ 2800 batches | train loss 0.4514326 +| epoch 2 | 1799/ 2800 batches | train loss 0.4619006 +| epoch 2 | 1803/ 2800 batches | train loss 0.5702960 +| epoch 2 | 1807/ 2800 batches | train loss 0.5182426 +| epoch 2 | 1811/ 2800 batches | train loss 0.3481191 +| epoch 2 | 1815/ 2800 batches | train loss 0.5178784 +| epoch 2 | 1819/ 2800 batches | train loss 0.4287143 +| epoch 2 | 1823/ 2800 batches | train loss 0.4548221 +| epoch 2 | 1827/ 2800 batches | train loss 0.4288325 +| epoch 2 | 1831/ 2800 batches | train loss 0.4813898 +| epoch 2 | 1835/ 2800 batches | train loss 0.4737554 +| epoch 2 | 1839/ 2800 batches | train loss 0.3588779 +| epoch 2 | 1843/ 2800 batches | train loss 0.4174393 +| epoch 2 | 1847/ 2800 batches | train loss 0.4986295 +| epoch 2 | 1851/ 2800 batches | train loss 0.4712220 +| epoch 2 | 1855/ 2800 batches | train loss 0.4138292 +| epoch 2 | 1859/ 2800 batches | train loss 0.3974335 +| epoch 2 | 1863/ 2800 batches | train loss 0.4709811 +| epoch 2 | 1867/ 2800 batches | train loss 0.4006036 +| epoch 2 | 1871/ 2800 batches | train loss 0.4392042 +| epoch 2 | 1875/ 2800 batches | train loss 0.3948261 +| epoch 2 | 1879/ 2800 batches | train loss 0.4482459 +| epoch 2 | 1883/ 2800 batches | train loss 0.3654478 +| epoch 2 | 1887/ 2800 batches | train loss 0.4627403 +| epoch 2 | 1891/ 2800 batches | train loss 0.5178269 +| epoch 2 | 1895/ 2800 batches | train loss 0.4626476 +| epoch 2 | 1899/ 2800 batches | train loss 0.4731479 +| epoch 2 | 1903/ 2800 batches | train loss 0.4164232 +| epoch 2 | 1907/ 2800 batches | train loss 0.4796278 +| epoch 2 | 1911/ 2800 batches | train loss 0.4203406 +| epoch 2 | 1915/ 2800 batches | train loss 0.4271923 +| epoch 2 | 1919/ 2800 batches | train loss 0.5008761 +| epoch 2 | 1923/ 2800 batches | train loss 0.3894904 +| epoch 2 | 1927/ 2800 batches | train loss 0.4252682 +| epoch 2 | 1931/ 2800 batches | train loss 0.4516029 +| epoch 2 | 1935/ 2800 batches | train loss 0.5332143 +| epoch 2 | 1939/ 2800 batches | train loss 0.4746932 +| epoch 2 | 1943/ 2800 batches | train loss 0.4323208 +| epoch 2 | 1947/ 2800 batches | train loss 0.4412684 +| epoch 2 | 1951/ 2800 batches | train loss 0.4943211 +| epoch 2 | 1955/ 2800 batches | train loss 0.4007703 +| epoch 2 | 1959/ 2800 batches | train loss 0.3902989 +| epoch 2 | 1963/ 2800 batches | train loss 0.4843399 +| epoch 2 | 1967/ 2800 batches | train loss 0.5040300 +| epoch 2 | 1971/ 2800 batches | train loss 0.4654229 +| epoch 2 | 1975/ 2800 batches | train loss 0.5578399 +| epoch 2 | 1979/ 2800 batches | train loss 0.4920152 +| epoch 2 | 1983/ 2800 batches | train loss 0.5653864 +| epoch 2 | 1987/ 2800 batches | train loss 0.3392743 +| epoch 2 | 1991/ 2800 batches | train loss 0.4067597 +| epoch 2 | 1995/ 2800 batches | train loss 0.4657171 +| epoch 2 | 1999/ 2800 batches | train loss 0.4761074 +| epoch 2 | 2003/ 2800 batches | train loss 0.4898701 +| epoch 2 | 2007/ 2800 batches | train loss 0.3796963 +| epoch 2 | 2011/ 2800 batches | train loss 0.4418550 +| epoch 2 | 2015/ 2800 batches | train loss 0.4154779 +| epoch 2 | 2019/ 2800 batches | train loss 0.4449770 +| epoch 2 | 2023/ 2800 batches | train loss 0.4745356 +| epoch 2 | 2027/ 2800 batches | train loss 0.3526391 +| epoch 2 | 2031/ 2800 batches | train loss 0.4777471 +| epoch 2 | 2035/ 2800 batches | train loss 0.4820673 +| epoch 2 | 2039/ 2800 batches | train loss 0.4317772 +| epoch 2 | 2043/ 2800 batches | train loss 0.3685275 +| epoch 2 | 2047/ 2800 batches | train loss 0.4383198 +| epoch 2 | 2051/ 2800 batches | train loss 0.4893293 +| epoch 2 | 2055/ 2800 batches | train loss 0.4024464 +| epoch 2 | 2059/ 2800 batches | train loss 0.2055446 +| epoch 2 | 2063/ 2800 batches | train loss 0.3727049 +| epoch 2 | 2067/ 2800 batches | train loss 0.4222184 +| epoch 2 | 2071/ 2800 batches | train loss 0.4704439 +| epoch 2 | 2075/ 2800 batches | train loss 0.5667003 +| epoch 2 | 2079/ 2800 batches | train loss 0.4278044 +| epoch 2 | 2083/ 2800 batches | train loss 0.4765782 +| epoch 2 | 2087/ 2800 batches | train loss 0.4364819 +| epoch 2 | 2091/ 2800 batches | train loss 0.3661317 +| epoch 2 | 2095/ 2800 batches | train loss 0.3814402 +| epoch 2 | 2099/ 2800 batches | train loss 0.4734731 +| epoch 2 | 2103/ 2800 batches | train loss 0.4725043 +| epoch 2 | 2107/ 2800 batches | train loss 0.4322873 +| epoch 2 | 2111/ 2800 batches | train loss 0.5179701 +| epoch 2 | 2115/ 2800 batches | train loss 0.4838604 +| epoch 2 | 2119/ 2800 batches | train loss 0.4107907 +| epoch 2 | 2123/ 2800 batches | train loss 0.4672498 +| epoch 2 | 2127/ 2800 batches | train loss 0.4720881 +| epoch 2 | 2131/ 2800 batches | train loss 0.4994873 +| epoch 2 | 2135/ 2800 batches | train loss 0.4040333 +| epoch 2 | 2139/ 2800 batches | train loss 0.4239651 +| epoch 2 | 2143/ 2800 batches | train loss 0.6033756 +| epoch 2 | 2147/ 2800 batches | train loss 0.4908107 +| epoch 2 | 2151/ 2800 batches | train loss 0.4568755 +| epoch 2 | 2155/ 2800 batches | train loss 0.4503907 +| epoch 2 | 2159/ 2800 batches | train loss 0.4711703 +| epoch 2 | 2163/ 2800 batches | train loss 0.5135688 +| epoch 2 | 2167/ 2800 batches | train loss 0.4788396 +| epoch 2 | 2171/ 2800 batches | train loss 0.3604994 +| epoch 2 | 2175/ 2800 batches | train loss 0.3812063 +| epoch 2 | 2179/ 2800 batches | train loss 0.3695118 +| epoch 2 | 2183/ 2800 batches | train loss 0.3332299 +| epoch 2 | 2187/ 2800 batches | train loss 0.4480660 +| epoch 2 | 2191/ 2800 batches | train loss 0.4502240 +| epoch 2 | 2195/ 2800 batches | train loss 0.5030013 +| epoch 2 | 2199/ 2800 batches | train loss 0.4785157 +| epoch 2 | 2203/ 2800 batches | train loss 0.4663146 +| epoch 2 | 2207/ 2800 batches | train loss 0.4442221 +| epoch 2 | 2211/ 2800 batches | train loss 0.4579082 +| epoch 2 | 2215/ 2800 batches | train loss 0.4040958 +| epoch 2 | 2219/ 2800 batches | train loss 0.2939728 +| epoch 2 | 2223/ 2800 batches | train loss 0.3463209 +| epoch 2 | 2227/ 2800 batches | train loss 0.6268757 +| epoch 2 | 2231/ 2800 batches | train loss 0.3718224 +| epoch 2 | 2235/ 2800 batches | train loss 0.4278657 +| epoch 2 | 2239/ 2800 batches | train loss 0.4706835 +| epoch 2 | 2243/ 2800 batches | train loss 0.3849286 +| epoch 2 | 2247/ 2800 batches | train loss 0.4661320 +| epoch 2 | 2251/ 2800 batches | train loss 0.3645075 +| epoch 2 | 2255/ 2800 batches | train loss 0.4129333 +| epoch 2 | 2259/ 2800 batches | train loss 0.5126445 +| epoch 2 | 2263/ 2800 batches | train loss 0.4707782 +| epoch 2 | 2267/ 2800 batches | train loss 0.5500788 +| epoch 2 | 2271/ 2800 batches | train loss 0.4595234 +| epoch 2 | 2275/ 2800 batches | train loss 0.2117064 +| epoch 2 | 2279/ 2800 batches | train loss 0.4708264 +| epoch 2 | 2283/ 2800 batches | train loss 0.5134270 +| epoch 2 | 2287/ 2800 batches | train loss 0.4039695 +| epoch 2 | 2291/ 2800 batches | train loss 0.4083625 +| epoch 2 | 2295/ 2800 batches | train loss 0.4992381 +| epoch 2 | 2299/ 2800 batches | train loss 0.4334051 +| epoch 2 | 2303/ 2800 batches | train loss 0.4423911 +| epoch 2 | 2307/ 2800 batches | train loss 0.3790030 +| epoch 2 | 2311/ 2800 batches | train loss 0.4592713 +| epoch 2 | 2315/ 2800 batches | train loss 0.4676585 +| epoch 2 | 2319/ 2800 batches | train loss 0.4151401 +| epoch 2 | 2323/ 2800 batches | train loss 0.4001794 +| epoch 2 | 2327/ 2800 batches | train loss 0.5173748 +| epoch 2 | 2331/ 2800 batches | train loss 0.4976286 +| epoch 2 | 2335/ 2800 batches | train loss 0.4790941 +| epoch 2 | 2339/ 2800 batches | train loss 0.3963633 +| epoch 2 | 2343/ 2800 batches | train loss 0.5711703 +| epoch 2 | 2347/ 2800 batches | train loss 0.6142586 +| epoch 2 | 2351/ 2800 batches | train loss 0.3755533 +| epoch 2 | 2355/ 2800 batches | train loss 0.5112764 +| epoch 2 | 2359/ 2800 batches | train loss 0.3866606 +| epoch 2 | 2363/ 2800 batches | train loss 0.4308395 +| epoch 2 | 2367/ 2800 batches | train loss 0.5361233 +| epoch 2 | 2371/ 2800 batches | train loss 0.3835062 +| epoch 2 | 2375/ 2800 batches | train loss 0.4619182 +| epoch 2 | 2379/ 2800 batches | train loss 0.4610549 +| epoch 2 | 2383/ 2800 batches | train loss 0.4873530 +| epoch 2 | 2387/ 2800 batches | train loss 0.4229435 +| epoch 2 | 2391/ 2800 batches | train loss 0.4696947 +| epoch 2 | 2395/ 2800 batches | train loss 0.3533904 +| epoch 2 | 2399/ 2800 batches | train loss 0.4556270 +| epoch 2 | 2403/ 2800 batches | train loss 0.4154822 +| epoch 2 | 2407/ 2800 batches | train loss 0.4790860 +| epoch 2 | 2411/ 2800 batches | train loss 0.4153506 +| epoch 2 | 2415/ 2800 batches | train loss 0.3496753 +| epoch 2 | 2419/ 2800 batches | train loss 0.5072231 +| epoch 2 | 2423/ 2800 batches | train loss 0.4392529 +| epoch 2 | 2427/ 2800 batches | train loss 0.4147559 +| epoch 2 | 2431/ 2800 batches | train loss 0.4647568 +| epoch 2 | 2435/ 2800 batches | train loss 0.4580896 +| epoch 2 | 2439/ 2800 batches | train loss 0.3573499 +| epoch 2 | 2443/ 2800 batches | train loss 0.5297246 +| epoch 2 | 2447/ 2800 batches | train loss 0.4962139 +| epoch 2 | 2451/ 2800 batches | train loss 0.4035212 +| epoch 2 | 2455/ 2800 batches | train loss 0.4339462 +| epoch 2 | 2459/ 2800 batches | train loss 0.4857787 +| epoch 2 | 2463/ 2800 batches | train loss 0.4739095 +| epoch 2 | 2467/ 2800 batches | train loss 0.5065748 +| epoch 2 | 2471/ 2800 batches | train loss 0.4189784 +| epoch 2 | 2475/ 2800 batches | train loss 0.4306488 +| epoch 2 | 2479/ 2800 batches | train loss 0.4435352 +| epoch 2 | 2483/ 2800 batches | train loss 0.4820409 +| epoch 2 | 2487/ 2800 batches | train loss 0.4366160 +| epoch 2 | 2491/ 2800 batches | train loss 0.4493032 +| epoch 2 | 2495/ 2800 batches | train loss 0.4420917 +| epoch 2 | 2499/ 2800 batches | train loss 0.4975302 +| epoch 2 | 2503/ 2800 batches | train loss 0.3851183 +| epoch 2 | 2507/ 2800 batches | train loss 0.4683477 +| epoch 2 | 2511/ 2800 batches | train loss 0.5121533 +| epoch 2 | 2515/ 2800 batches | train loss 0.4296294 +| epoch 2 | 2519/ 2800 batches | train loss 0.4469453 +| epoch 2 | 2523/ 2800 batches | train loss 0.5008301 +| epoch 2 | 2527/ 2800 batches | train loss 0.4212031 +| epoch 2 | 2531/ 2800 batches | train loss 0.4218891 +| epoch 2 | 2535/ 2800 batches | train loss 0.4557544 +| epoch 2 | 2539/ 2800 batches | train loss 0.4257748 +| epoch 2 | 2543/ 2800 batches | train loss 0.4666208 +| epoch 2 | 2547/ 2800 batches | train loss 0.4149053 +| epoch 2 | 2551/ 2800 batches | train loss 0.4742810 +| epoch 2 | 2555/ 2800 batches | train loss 0.4704439 +| epoch 2 | 2559/ 2800 batches | train loss 0.4764247 +| epoch 2 | 2563/ 2800 batches | train loss 0.4528548 +| epoch 2 | 2567/ 2800 batches | train loss 0.4356369 +| epoch 2 | 2571/ 2800 batches | train loss 0.5467026 +| epoch 2 | 2575/ 2800 batches | train loss 0.3554885 +| epoch 2 | 2579/ 2800 batches | train loss 0.4902672 +| epoch 2 | 2583/ 2800 batches | train loss 0.4171873 +| epoch 2 | 2587/ 2800 batches | train loss 0.4777888 +| epoch 2 | 2591/ 2800 batches | train loss 0.4484214 +| epoch 2 | 2595/ 2800 batches | train loss 0.5599463 +| epoch 2 | 2599/ 2800 batches | train loss 0.4533256 +| epoch 2 | 2603/ 2800 batches | train loss 0.4339023 +| epoch 2 | 2607/ 2800 batches | train loss 0.3565988 +| epoch 2 | 2611/ 2800 batches | train loss 0.4911475 +| epoch 2 | 2615/ 2800 batches | train loss 0.4103681 +| epoch 2 | 2619/ 2800 batches | train loss 0.4493349 +| epoch 2 | 2623/ 2800 batches | train loss 0.4918053 +| epoch 2 | 2627/ 2800 batches | train loss 0.3849248 +| epoch 2 | 2631/ 2800 batches | train loss 0.4673167 +| epoch 2 | 2635/ 2800 batches | train loss 0.4757965 +| epoch 2 | 2639/ 2800 batches | train loss 0.5444368 +| epoch 2 | 2643/ 2800 batches | train loss 0.4051609 +| epoch 2 | 2647/ 2800 batches | train loss 0.4603779 +| epoch 2 | 2651/ 2800 batches | train loss 0.4924538 +| epoch 2 | 2655/ 2800 batches | train loss 0.5431579 +| epoch 2 | 2659/ 2800 batches | train loss 0.4248927 +| epoch 2 | 2663/ 2800 batches | train loss 0.4601076 +| epoch 2 | 2667/ 2800 batches | train loss 0.3139250 +| epoch 2 | 2671/ 2800 batches | train loss 0.3944605 +| epoch 2 | 2675/ 2800 batches | train loss 0.4479046 +| epoch 2 | 2679/ 2800 batches | train loss 0.3625035 +| epoch 2 | 2683/ 2800 batches | train loss 0.4947081 +| epoch 2 | 2687/ 2800 batches | train loss 0.4214146 +| epoch 2 | 2691/ 2800 batches | train loss 0.5115495 +| epoch 2 | 2695/ 2800 batches | train loss 0.3973511 +| epoch 2 | 2699/ 2800 batches | train loss 0.4521736 +| epoch 2 | 2703/ 2800 batches | train loss 0.4097875 +| epoch 2 | 2707/ 2800 batches | train loss 0.5431559 +| epoch 2 | 2711/ 2800 batches | train loss 0.4447654 +| epoch 2 | 2715/ 2800 batches | train loss 0.4397442 +| epoch 2 | 2719/ 2800 batches | train loss 0.3376305 +| epoch 2 | 2723/ 2800 batches | train loss 0.4332831 +| epoch 2 | 2727/ 2800 batches | train loss 0.4689602 +| epoch 2 | 2731/ 2800 batches | train loss 0.4614126 +| epoch 2 | 2735/ 2800 batches | train loss 0.4213990 +| epoch 2 | 2739/ 2800 batches | train loss 0.4086056 +| epoch 2 | 2743/ 2800 batches | train loss 0.4490624 +| epoch 2 | 2747/ 2800 batches | train loss 0.4506192 +| epoch 2 | 2751/ 2800 batches | train loss 0.4660586 +| epoch 2 | 2755/ 2800 batches | train loss 0.4817017 +| epoch 2 | 2759/ 2800 batches | train loss 0.5196201 +| epoch 2 | 2763/ 2800 batches | train loss 0.4559009 +| epoch 2 | 2767/ 2800 batches | train loss 0.3978995 +| epoch 2 | 2771/ 2800 batches | train loss 0.4023739 +| epoch 2 | 2775/ 2800 batches | train loss 0.4692814 +| epoch 2 | 2779/ 2800 batches | train loss 0.3807990 +| epoch 2 | 2783/ 2800 batches | train loss 0.5354024 +| epoch 2 | 2787/ 2800 batches | train loss 0.2921708 +| epoch 2 | 2791/ 2800 batches | train loss 0.4252195 +| epoch 2 | 2795/ 2800 batches | train loss 0.4891270 +| epoch 2 | 2799/ 2800 batches | train loss 0.4312936 +-------------------------------------------------------------------------------- +| epoch 2 | 3/ 2800 batches | test loss 0.5232878 +| epoch 2 | 7/ 2800 batches | test loss 0.4683285 +| epoch 2 | 11/ 2800 batches | test loss 0.4126173 +| epoch 2 | 15/ 2800 batches | test loss 0.4697418 +| epoch 2 | 19/ 2800 batches | test loss 0.4154184 +| epoch 2 | 23/ 2800 batches | test loss 0.4480073 +| epoch 2 | 27/ 2800 batches | test loss 0.4721535 +| epoch 2 | 31/ 2800 batches | test loss 0.4749947 +| epoch 2 | 35/ 2800 batches | test loss 0.4673318 +| epoch 2 | 39/ 2800 batches | test loss 0.5324794 +| epoch 2 | 43/ 2800 batches | test loss 0.4402598 +| epoch 2 | 47/ 2800 batches | test loss 0.4738473 +| epoch 2 | 51/ 2800 batches | test loss 0.4897513 +| epoch 2 | 55/ 2800 batches | test loss 0.3408017 +| epoch 2 | 59/ 2800 batches | test loss 0.5201396 +| epoch 2 | 63/ 2800 batches | test loss 0.5312167 +| epoch 2 | 67/ 2800 batches | test loss 0.4813979 +| epoch 2 | 71/ 2800 batches | test loss 0.4421744 +| epoch 2 | 75/ 2800 batches | test loss 0.5192711 +| epoch 2 | 79/ 2800 batches | test loss 0.4193758 +| epoch 2 | 83/ 2800 batches | test loss 0.3336459 +| epoch 2 | 87/ 2800 batches | test loss 0.4263607 +| epoch 2 | 91/ 2800 batches | test loss 0.5019450 +| epoch 2 | 95/ 2800 batches | test loss 0.4775255 +| epoch 2 | 99/ 2800 batches | test loss 0.4117057 +| epoch 2 | 103/ 2800 batches | test loss 0.4884515 +| epoch 2 | 107/ 2800 batches | test loss 0.5387912 +| epoch 2 | 111/ 2800 batches | test loss 0.4510639 +| epoch 2 | 115/ 2800 batches | test loss 0.4368986 +| epoch 2 | 119/ 2800 batches | test loss 0.4586731 +| epoch 2 | 123/ 2800 batches | test loss 0.3308769 +| epoch 2 | 127/ 2800 batches | test loss 0.4614264 +| epoch 2 | 131/ 2800 batches | test loss 0.4133461 +| epoch 2 | 135/ 2800 batches | test loss 0.4151056 +| epoch 2 | 139/ 2800 batches | test loss 0.3777802 +| epoch 2 | 143/ 2800 batches | test loss 0.4726337 +| epoch 2 | 147/ 2800 batches | test loss 0.4966695 +| epoch 2 | 151/ 2800 batches | test loss 0.5012437 +| epoch 2 | 155/ 2800 batches | test loss 0.3765896 +| epoch 2 | 159/ 2800 batches | test loss 0.4725139 +| epoch 2 | 163/ 2800 batches | test loss 0.5115609 +| epoch 2 | 167/ 2800 batches | test loss 0.4480646 +| epoch 2 | 171/ 2800 batches | test loss 0.4114740 +| epoch 2 | 175/ 2800 batches | test loss 0.5348982 +| epoch 2 | 179/ 2800 batches | test loss 0.4898159 +| epoch 2 | 183/ 2800 batches | test loss 0.4721739 +| epoch 2 | 187/ 2800 batches | test loss 0.4142612 +| epoch 2 | 191/ 2800 batches | test loss 0.4593256 +| epoch 2 | 195/ 2800 batches | test loss 0.4711369 +| epoch 2 | 199/ 2800 batches | test loss 0.4585865 +| epoch 2 | 203/ 2800 batches | test loss 0.4743104 +| epoch 2 | 207/ 2800 batches | test loss 0.4775003 +| epoch 2 | 211/ 2800 batches | test loss 0.4333078 +| epoch 2 | 215/ 2800 batches | test loss 0.4304208 +| epoch 2 | 219/ 2800 batches | test loss 0.4098345 +| epoch 2 | 223/ 2800 batches | test loss 0.5133691 +| epoch 2 | 227/ 2800 batches | test loss 0.4292347 +| epoch 2 | 231/ 2800 batches | test loss 0.4309061 +| epoch 2 | 235/ 2800 batches | test loss 0.4397737 +| epoch 2 | 239/ 2800 batches | test loss 0.4997742 +| epoch 2 | 243/ 2800 batches | test loss 0.5412508 +| epoch 2 | 247/ 2800 batches | test loss 0.4438891 +| epoch 2 | 251/ 2800 batches | test loss 0.4535372 +| epoch 2 | 255/ 2800 batches | test loss 0.5547884 +| epoch 2 | 259/ 2800 batches | test loss 0.3954635 +| epoch 2 | 263/ 2800 batches | test loss 0.4020814 +| epoch 2 | 267/ 2800 batches | test loss 0.4551358 +| epoch 2 | 271/ 2800 batches | test loss 0.5343168 +| epoch 2 | 275/ 2800 batches | test loss 0.4443219 +| epoch 2 | 279/ 2800 batches | test loss 0.4149777 +| epoch 2 | 283/ 2800 batches | test loss 0.3976318 +| epoch 2 | 287/ 2800 batches | test loss 0.4207072 +| epoch 2 | 291/ 2800 batches | test loss 0.5585695 +| epoch 2 | 295/ 2800 batches | test loss 0.4874782 +| epoch 2 | 299/ 2800 batches | test loss 0.4261833 +| epoch 2 | 303/ 2800 batches | test loss 0.4357547 +| epoch 2 | 307/ 2800 batches | test loss 0.3811727 +| epoch 2 | 311/ 2800 batches | test loss 0.5502947 +| epoch 2 | 315/ 2800 batches | test loss 0.4129769 +| epoch 2 | 319/ 2800 batches | test loss 0.4697291 +| epoch 2 | 323/ 2800 batches | test loss 0.4224835 +| epoch 2 | 327/ 2800 batches | test loss 0.4685842 +| epoch 2 | 331/ 2800 batches | test loss 0.4326941 +| epoch 2 | 335/ 2800 batches | test loss 0.4526818 +| epoch 2 | 339/ 2800 batches | test loss 0.3780622 +| epoch 2 | 343/ 2800 batches | test loss 0.4737834 +| epoch 2 | 347/ 2800 batches | test loss 0.3053282 +| epoch 2 | 351/ 2800 batches | test loss 0.4916722 +| epoch 2 | 355/ 2800 batches | test loss 0.5336680 +| epoch 2 | 359/ 2800 batches | test loss 0.5299052 +| epoch 2 | 363/ 2800 batches | test loss 0.4574631 +| epoch 2 | 367/ 2800 batches | test loss 0.4579806 +| epoch 2 | 371/ 2800 batches | test loss 0.4315276 +| epoch 2 | 375/ 2800 batches | test loss 0.4928948 +| epoch 2 | 379/ 2800 batches | test loss 0.5109484 +| epoch 2 | 383/ 2800 batches | test loss 0.4694780 +| epoch 2 | 387/ 2800 batches | test loss 0.5161328 +| epoch 2 | 391/ 2800 batches | test loss 0.4478546 +| epoch 2 | 395/ 2800 batches | test loss 0.5036120 +| epoch 2 | 399/ 2800 batches | test loss 0.4436015 +| epoch 2 | 403/ 2800 batches | test loss 0.4840587 +| epoch 2 | 407/ 2800 batches | test loss 0.4850461 +| epoch 2 | 411/ 2800 batches | test loss 0.5897328 +| epoch 2 | 415/ 2800 batches | test loss 0.4060819 +| epoch 2 | 419/ 2800 batches | test loss 0.4205198 +| epoch 2 | 423/ 2800 batches | test loss 0.5156432 +| epoch 2 | 427/ 2800 batches | test loss 0.5929978 +| epoch 2 | 431/ 2800 batches | test loss 0.4974034 +| epoch 2 | 435/ 2800 batches | test loss 0.5108457 +| epoch 2 | 439/ 2800 batches | test loss 0.4524794 +| epoch 2 | 443/ 2800 batches | test loss 0.4213062 +| epoch 2 | 447/ 2800 batches | test loss 0.3855439 +| epoch 2 | 451/ 2800 batches | test loss 0.4051354 +| epoch 2 | 455/ 2800 batches | test loss 0.5186788 +| epoch 2 | 459/ 2800 batches | test loss 0.4619870 +| epoch 2 | 463/ 2800 batches | test loss 0.3943442 +| epoch 2 | 467/ 2800 batches | test loss 0.4778387 +| epoch 2 | 471/ 2800 batches | test loss 0.4918758 +| epoch 2 | 475/ 2800 batches | test loss 0.4880772 +| epoch 2 | 479/ 2800 batches | test loss 0.4086790 +| epoch 2 | 483/ 2800 batches | test loss 0.4782826 +| epoch 2 | 487/ 2800 batches | test loss 0.5255842 +| epoch 2 | 491/ 2800 batches | test loss 0.4558123 +| epoch 2 | 495/ 2800 batches | test loss 0.4519730 +| epoch 2 | 499/ 2800 batches | test loss 0.4326527 +| epoch 2 | 503/ 2800 batches | test loss 0.4209088 +| epoch 2 | 507/ 2800 batches | test loss 0.4159306 +| epoch 2 | 511/ 2800 batches | test loss 0.4961393 +| epoch 2 | 515/ 2800 batches | test loss 0.4063616 +| epoch 2 | 519/ 2800 batches | test loss 0.4264519 +| epoch 2 | 523/ 2800 batches | test loss 0.4000192 +| epoch 2 | 527/ 2800 batches | test loss 0.4713491 +| epoch 2 | 531/ 2800 batches | test loss 0.4332909 +| epoch 2 | 535/ 2800 batches | test loss 0.3861344 +| epoch 2 | 539/ 2800 batches | test loss 0.5352275 +| epoch 2 | 543/ 2800 batches | test loss 0.5204253 +| epoch 2 | 547/ 2800 batches | test loss 0.4084204 +| epoch 2 | 551/ 2800 batches | test loss 0.5339264 +| epoch 2 | 555/ 2800 batches | test loss 0.4553852 +| epoch 2 | 559/ 2800 batches | test loss 0.4790889 +| epoch 2 | 563/ 2800 batches | test loss 0.4355171 +| epoch 2 | 567/ 2800 batches | test loss 0.4492971 +| epoch 2 | 571/ 2800 batches | test loss 0.4750047 +| epoch 2 | 575/ 2800 batches | test loss 0.4175327 +| epoch 2 | 579/ 2800 batches | test loss 0.4764944 +| epoch 2 | 583/ 2800 batches | test loss 0.4095207 +| epoch 2 | 587/ 2800 batches | test loss 0.4607068 +| epoch 2 | 591/ 2800 batches | test loss 0.3982055 +| epoch 2 | 595/ 2800 batches | test loss 0.3603368 +| epoch 2 | 599/ 2800 batches | test loss 0.4371673 +| epoch 2 | 603/ 2800 batches | test loss 0.3948286 +| epoch 2 | 607/ 2800 batches | test loss 0.4966057 +| epoch 2 | 611/ 2800 batches | test loss 0.4250763 +| epoch 2 | 615/ 2800 batches | test loss 0.4069941 +| epoch 2 | 619/ 2800 batches | test loss 0.4278206 +| epoch 2 | 623/ 2800 batches | test loss 0.4714335 +| epoch 2 | 627/ 2800 batches | test loss 0.4898192 +| epoch 2 | 631/ 2800 batches | test loss 0.4206027 +| epoch 2 | 635/ 2800 batches | test loss 0.4593972 +| epoch 2 | 639/ 2800 batches | test loss 0.4831571 +| epoch 2 | 643/ 2800 batches | test loss 0.4506583 +| epoch 2 | 647/ 2800 batches | test loss 0.4186963 +| epoch 2 | 651/ 2800 batches | test loss 0.3984824 +| epoch 2 | 655/ 2800 batches | test loss 0.4213030 +| epoch 2 | 659/ 2800 batches | test loss 0.5040513 +| epoch 2 | 663/ 2800 batches | test loss 0.4226098 +| epoch 2 | 667/ 2800 batches | test loss 0.5046645 +| epoch 2 | 671/ 2800 batches | test loss 0.5812368 +| epoch 2 | 675/ 2800 batches | test loss 0.5261053 +| epoch 2 | 679/ 2800 batches | test loss 0.4983081 +| epoch 2 | 683/ 2800 batches | test loss 0.4080486 +| epoch 2 | 687/ 2800 batches | test loss 0.4507325 +| epoch 2 | 691/ 2800 batches | test loss 0.4822459 +| epoch 2 | 695/ 2800 batches | test loss 0.5477077 +| epoch 2 | 699/ 2800 batches | test loss 0.4811338 +| epoch 2 | final test loss 0.4554, save model! +-------------------------------------------------------------------------------- +| epoch 3 | 3/ 2800 batches | train loss 0.4105433 +| epoch 3 | 7/ 2800 batches | train loss 0.4579675 +| epoch 3 | 11/ 2800 batches | train loss 0.4357134 +| epoch 3 | 15/ 2800 batches | train loss 0.4198657 +| epoch 3 | 19/ 2800 batches | train loss 0.4431823 +| epoch 3 | 23/ 2800 batches | train loss 0.4198375 +| epoch 3 | 27/ 2800 batches | train loss 0.4182959 +| epoch 3 | 31/ 2800 batches | train loss 0.4968922 +| epoch 3 | 35/ 2800 batches | train loss 0.4092172 +| epoch 3 | 39/ 2800 batches | train loss 0.5758330 +| epoch 3 | 43/ 2800 batches | train loss 0.5127612 +| epoch 3 | 47/ 2800 batches | train loss 0.4414711 +| epoch 3 | 51/ 2800 batches | train loss 0.5353831 +| epoch 3 | 55/ 2800 batches | train loss 0.4338151 +| epoch 3 | 59/ 2800 batches | train loss 0.4298454 +| epoch 3 | 63/ 2800 batches | train loss 0.4321272 +| epoch 3 | 67/ 2800 batches | train loss 0.4340793 +| epoch 3 | 71/ 2800 batches | train loss 0.5868769 +| epoch 3 | 75/ 2800 batches | train loss 0.4649112 +| epoch 3 | 79/ 2800 batches | train loss 0.5631734 +| epoch 3 | 83/ 2800 batches | train loss 0.3561602 +| epoch 3 | 87/ 2800 batches | train loss 0.3834195 +| epoch 3 | 91/ 2800 batches | train loss 0.4313735 +| epoch 3 | 95/ 2800 batches | train loss 0.4958230 +| epoch 3 | 99/ 2800 batches | train loss 0.4626665 +| epoch 3 | 103/ 2800 batches | train loss 0.3854941 +| epoch 3 | 107/ 2800 batches | train loss 0.4220502 +| epoch 3 | 111/ 2800 batches | train loss 0.5347933 +| epoch 3 | 115/ 2800 batches | train loss 0.4341067 +| epoch 3 | 119/ 2800 batches | train loss 0.4181002 +| epoch 3 | 123/ 2800 batches | train loss 0.4077959 +| epoch 3 | 127/ 2800 batches | train loss 0.4645240 +| epoch 3 | 131/ 2800 batches | train loss 0.4053062 +| epoch 3 | 135/ 2800 batches | train loss 0.4265890 +| epoch 3 | 139/ 2800 batches | train loss 0.5063599 +| epoch 3 | 143/ 2800 batches | train loss 0.3952933 +| epoch 3 | 147/ 2800 batches | train loss 0.4406570 +| epoch 3 | 151/ 2800 batches | train loss 0.3392561 +| epoch 3 | 155/ 2800 batches | train loss 0.3663281 +| epoch 3 | 159/ 2800 batches | train loss 0.5008313 +| epoch 3 | 163/ 2800 batches | train loss 0.5464561 +| epoch 3 | 167/ 2800 batches | train loss 0.4507903 +| epoch 3 | 171/ 2800 batches | train loss 0.4640923 +| epoch 3 | 175/ 2800 batches | train loss 0.3820466 +| epoch 3 | 179/ 2800 batches | train loss 0.5382119 +| epoch 3 | 183/ 2800 batches | train loss 0.4661563 +| epoch 3 | 187/ 2800 batches | train loss 0.3898868 +| epoch 3 | 191/ 2800 batches | train loss 0.4115297 +| epoch 3 | 195/ 2800 batches | train loss 0.4335406 +| epoch 3 | 199/ 2800 batches | train loss 0.4036421 +| epoch 3 | 203/ 2800 batches | train loss 0.4244933 +| epoch 3 | 207/ 2800 batches | train loss 0.3675411 +| epoch 3 | 211/ 2800 batches | train loss 0.3701153 +| epoch 3 | 215/ 2800 batches | train loss 0.3718534 +| epoch 3 | 219/ 2800 batches | train loss 0.4376782 +| epoch 3 | 223/ 2800 batches | train loss 0.4382818 +| epoch 3 | 227/ 2800 batches | train loss 0.4766544 +| epoch 3 | 231/ 2800 batches | train loss 0.4092157 +| epoch 3 | 235/ 2800 batches | train loss 0.4785674 +| epoch 3 | 239/ 2800 batches | train loss 0.5522065 +| epoch 3 | 243/ 2800 batches | train loss 0.5111634 +| epoch 3 | 247/ 2800 batches | train loss 0.4736213 +| epoch 3 | 251/ 2800 batches | train loss 0.4385487 +| epoch 3 | 255/ 2800 batches | train loss 0.3749106 +| epoch 3 | 259/ 2800 batches | train loss 0.4280028 +| epoch 3 | 263/ 2800 batches | train loss 0.4737985 +| epoch 3 | 267/ 2800 batches | train loss 0.3798953 +| epoch 3 | 271/ 2800 batches | train loss 0.4035908 +| epoch 3 | 275/ 2800 batches | train loss 0.4966347 +| epoch 3 | 279/ 2800 batches | train loss 0.4798065 +| epoch 3 | 283/ 2800 batches | train loss 0.2442749 +| epoch 3 | 287/ 2800 batches | train loss 0.4890723 +| epoch 3 | 291/ 2800 batches | train loss 0.4823532 +| epoch 3 | 295/ 2800 batches | train loss 0.4434992 +| epoch 3 | 299/ 2800 batches | train loss 0.4338607 +| epoch 3 | 303/ 2800 batches | train loss 0.4328920 +| epoch 3 | 307/ 2800 batches | train loss 0.4552524 +| epoch 3 | 311/ 2800 batches | train loss 0.3818716 +| epoch 3 | 315/ 2800 batches | train loss 0.4274700 +| epoch 3 | 319/ 2800 batches | train loss 0.4044143 +| epoch 3 | 323/ 2800 batches | train loss 0.4247671 +| epoch 3 | 327/ 2800 batches | train loss 0.4071321 +| epoch 3 | 331/ 2800 batches | train loss 0.4261954 +| epoch 3 | 335/ 2800 batches | train loss 0.3968362 +| epoch 3 | 339/ 2800 batches | train loss 0.4055942 +| epoch 3 | 343/ 2800 batches | train loss 0.4748609 +| epoch 3 | 347/ 2800 batches | train loss 0.4657122 +| epoch 3 | 351/ 2800 batches | train loss 0.4054157 +| epoch 3 | 355/ 2800 batches | train loss 0.4859201 +| epoch 3 | 359/ 2800 batches | train loss 0.4333568 +| epoch 3 | 363/ 2800 batches | train loss 0.3941413 +| epoch 3 | 367/ 2800 batches | train loss 0.3423884 +| epoch 3 | 371/ 2800 batches | train loss 0.3539498 +| epoch 3 | 375/ 2800 batches | train loss 0.4612633 +| epoch 3 | 379/ 2800 batches | train loss 0.4602146 +| epoch 3 | 383/ 2800 batches | train loss 0.4304098 +| epoch 3 | 387/ 2800 batches | train loss 0.3713253 +| epoch 3 | 391/ 2800 batches | train loss 0.4382371 +| epoch 3 | 395/ 2800 batches | train loss 0.4089968 +| epoch 3 | 399/ 2800 batches | train loss 0.2685489 +| epoch 3 | 403/ 2800 batches | train loss 0.4120426 +| epoch 3 | 407/ 2800 batches | train loss 0.4116284 +| epoch 3 | 411/ 2800 batches | train loss 0.4652164 +| epoch 3 | 415/ 2800 batches | train loss 0.3037093 +| epoch 3 | 419/ 2800 batches | train loss 0.3429003 +| epoch 3 | 423/ 2800 batches | train loss 0.4065161 +| epoch 3 | 427/ 2800 batches | train loss 0.4240603 +| epoch 3 | 431/ 2800 batches | train loss 0.4003509 +| epoch 3 | 435/ 2800 batches | train loss 0.5179089 +| epoch 3 | 439/ 2800 batches | train loss 0.4441096 +| epoch 3 | 443/ 2800 batches | train loss 0.5252730 +| epoch 3 | 447/ 2800 batches | train loss 0.3759257 +| epoch 3 | 451/ 2800 batches | train loss 0.4737902 +| epoch 3 | 455/ 2800 batches | train loss 0.4032415 +| epoch 3 | 459/ 2800 batches | train loss 0.4884554 +| epoch 3 | 463/ 2800 batches | train loss 0.4010958 +| epoch 3 | 467/ 2800 batches | train loss 0.4399390 +| epoch 3 | 471/ 2800 batches | train loss 0.5518051 +| epoch 3 | 475/ 2800 batches | train loss 0.4702255 +| epoch 3 | 479/ 2800 batches | train loss 0.5004654 +| epoch 3 | 483/ 2800 batches | train loss 0.3606586 +| epoch 3 | 487/ 2800 batches | train loss 0.4076539 +| epoch 3 | 491/ 2800 batches | train loss 0.3765267 +| epoch 3 | 495/ 2800 batches | train loss 0.4419314 +| epoch 3 | 499/ 2800 batches | train loss 0.4506128 +| epoch 3 | 503/ 2800 batches | train loss 0.5683337 +| epoch 3 | 507/ 2800 batches | train loss 0.3936191 +| epoch 3 | 511/ 2800 batches | train loss 0.4012188 +| epoch 3 | 515/ 2800 batches | train loss 0.3453304 +| epoch 3 | 519/ 2800 batches | train loss 0.4054186 +| epoch 3 | 523/ 2800 batches | train loss 0.4559435 +| epoch 3 | 527/ 2800 batches | train loss 0.4784343 +| epoch 3 | 531/ 2800 batches | train loss 0.4719193 +| epoch 3 | 535/ 2800 batches | train loss 0.4763731 +| epoch 3 | 539/ 2800 batches | train loss 0.4391937 +| epoch 3 | 543/ 2800 batches | train loss 0.3615656 +| epoch 3 | 547/ 2800 batches | train loss 0.4520609 +| epoch 3 | 551/ 2800 batches | train loss 0.3548553 +| epoch 3 | 555/ 2800 batches | train loss 0.3995430 +| epoch 3 | 559/ 2800 batches | train loss 0.3906064 +| epoch 3 | 563/ 2800 batches | train loss 0.2815515 +| epoch 3 | 567/ 2800 batches | train loss 0.5325354 +| epoch 3 | 571/ 2800 batches | train loss 0.5250503 +| epoch 3 | 575/ 2800 batches | train loss 0.4068738 +| epoch 3 | 579/ 2800 batches | train loss 0.4419007 +| epoch 3 | 583/ 2800 batches | train loss 0.2503987 +| epoch 3 | 587/ 2800 batches | train loss 0.3284976 +| epoch 3 | 591/ 2800 batches | train loss 0.4783491 +| epoch 3 | 595/ 2800 batches | train loss 0.4776498 +| epoch 3 | 599/ 2800 batches | train loss 0.4614543 +| epoch 3 | 603/ 2800 batches | train loss 0.4712729 +| epoch 3 | 607/ 2800 batches | train loss 0.4956661 +| epoch 3 | 611/ 2800 batches | train loss 0.4756745 +| epoch 3 | 615/ 2800 batches | train loss 0.4488443 +| epoch 3 | 619/ 2800 batches | train loss 0.4074942 +| epoch 3 | 623/ 2800 batches | train loss 0.3491517 +| epoch 3 | 627/ 2800 batches | train loss 0.4051123 +| epoch 3 | 631/ 2800 batches | train loss 0.4651123 +| epoch 3 | 635/ 2800 batches | train loss 0.5668973 +| epoch 3 | 639/ 2800 batches | train loss 0.4412682 +| epoch 3 | 643/ 2800 batches | train loss 0.4281949 +| epoch 3 | 647/ 2800 batches | train loss 0.4789656 +| epoch 3 | 651/ 2800 batches | train loss 0.4603179 +| epoch 3 | 655/ 2800 batches | train loss 0.4595999 +| epoch 3 | 659/ 2800 batches | train loss 0.5480245 +| epoch 3 | 663/ 2800 batches | train loss 0.4585043 +| epoch 3 | 667/ 2800 batches | train loss 0.4288186 +| epoch 3 | 671/ 2800 batches | train loss 0.4418711 +| epoch 3 | 675/ 2800 batches | train loss 0.2388747 +| epoch 3 | 679/ 2800 batches | train loss 0.5023906 +| epoch 3 | 683/ 2800 batches | train loss 0.4340322 +| epoch 3 | 687/ 2800 batches | train loss 0.5244756 +| epoch 3 | 691/ 2800 batches | train loss 0.5784230 +| epoch 3 | 695/ 2800 batches | train loss 0.4833699 +| epoch 3 | 699/ 2800 batches | train loss 0.3589152 +| epoch 3 | 703/ 2800 batches | train loss 0.4546295 +| epoch 3 | 707/ 2800 batches | train loss 0.3990646 +| epoch 3 | 711/ 2800 batches | train loss 0.4481113 +| epoch 3 | 715/ 2800 batches | train loss 0.3458750 +| epoch 3 | 719/ 2800 batches | train loss 0.3928614 +| epoch 3 | 723/ 2800 batches | train loss 0.5133156 +| epoch 3 | 727/ 2800 batches | train loss 0.3823340 +| epoch 3 | 731/ 2800 batches | train loss 0.4776016 +| epoch 3 | 735/ 2800 batches | train loss 0.4497914 +| epoch 3 | 739/ 2800 batches | train loss 0.4288356 +| epoch 3 | 743/ 2800 batches | train loss 0.3827524 +| epoch 3 | 747/ 2800 batches | train loss 0.4877673 +| epoch 3 | 751/ 2800 batches | train loss 0.3908973 +| epoch 3 | 755/ 2800 batches | train loss 0.4582818 +| epoch 3 | 759/ 2800 batches | train loss 0.4588766 +| epoch 3 | 763/ 2800 batches | train loss 0.3618612 +| epoch 3 | 767/ 2800 batches | train loss 0.4161779 +| epoch 3 | 771/ 2800 batches | train loss 0.5172204 +| epoch 3 | 775/ 2800 batches | train loss 0.4379208 +| epoch 3 | 779/ 2800 batches | train loss 0.4800780 +| epoch 3 | 783/ 2800 batches | train loss 0.4177109 +| epoch 3 | 787/ 2800 batches | train loss 0.4480754 +| epoch 3 | 791/ 2800 batches | train loss 0.2895827 +| epoch 3 | 795/ 2800 batches | train loss 0.4445174 +| epoch 3 | 799/ 2800 batches | train loss 0.4901642 +| epoch 3 | 803/ 2800 batches | train loss 0.4319944 +| epoch 3 | 807/ 2800 batches | train loss 0.5777824 +| epoch 3 | 811/ 2800 batches | train loss 0.5925952 +| epoch 3 | 815/ 2800 batches | train loss 0.4100521 +| epoch 3 | 819/ 2800 batches | train loss 0.3969917 +| epoch 3 | 823/ 2800 batches | train loss 0.4114803 +| epoch 3 | 827/ 2800 batches | train loss 0.3971868 +| epoch 3 | 831/ 2800 batches | train loss 0.4356220 +| epoch 3 | 835/ 2800 batches | train loss 0.5681646 +| epoch 3 | 839/ 2800 batches | train loss 0.4720785 +| epoch 3 | 843/ 2800 batches | train loss 0.5172246 +| epoch 3 | 847/ 2800 batches | train loss 0.4536240 +| epoch 3 | 851/ 2800 batches | train loss 0.4897391 +| epoch 3 | 855/ 2800 batches | train loss 0.3444234 +| epoch 3 | 859/ 2800 batches | train loss 0.3791724 +| epoch 3 | 863/ 2800 batches | train loss 0.4306021 +| epoch 3 | 867/ 2800 batches | train loss 0.3920354 +| epoch 3 | 871/ 2800 batches | train loss 0.5272510 +| epoch 3 | 875/ 2800 batches | train loss 0.3679408 +| epoch 3 | 879/ 2800 batches | train loss 0.4309839 +| epoch 3 | 883/ 2800 batches | train loss 0.4972476 +| epoch 3 | 887/ 2800 batches | train loss 0.4224983 +| epoch 3 | 891/ 2800 batches | train loss 0.4810211 +| epoch 3 | 895/ 2800 batches | train loss 0.4863834 +| epoch 3 | 899/ 2800 batches | train loss 0.4608306 +| epoch 3 | 903/ 2800 batches | train loss 0.4136417 +| epoch 3 | 907/ 2800 batches | train loss 0.5578597 +| epoch 3 | 911/ 2800 batches | train loss 0.4470971 +| epoch 3 | 915/ 2800 batches | train loss 0.3783144 +| epoch 3 | 919/ 2800 batches | train loss 0.3900200 +| epoch 3 | 923/ 2800 batches | train loss 0.3601856 +| epoch 3 | 927/ 2800 batches | train loss 0.4931474 +| epoch 3 | 931/ 2800 batches | train loss 0.5388578 +| epoch 3 | 935/ 2800 batches | train loss 0.4491069 +| epoch 3 | 939/ 2800 batches | train loss 0.4371812 +| epoch 3 | 943/ 2800 batches | train loss 0.3935309 +| epoch 3 | 947/ 2800 batches | train loss 0.5509058 +| epoch 3 | 951/ 2800 batches | train loss 0.5085825 +| epoch 3 | 955/ 2800 batches | train loss 0.4900202 +| epoch 3 | 959/ 2800 batches | train loss 0.4876474 +| epoch 3 | 963/ 2800 batches | train loss 0.3516687 +| epoch 3 | 967/ 2800 batches | train loss 0.4407849 +| epoch 3 | 971/ 2800 batches | train loss 0.5075715 +| epoch 3 | 975/ 2800 batches | train loss 0.4536886 +| epoch 3 | 979/ 2800 batches | train loss 0.3908936 +| epoch 3 | 983/ 2800 batches | train loss 0.4347029 +| epoch 3 | 987/ 2800 batches | train loss 0.4457499 +| epoch 3 | 991/ 2800 batches | train loss 0.4790763 +| epoch 3 | 995/ 2800 batches | train loss 0.4919513 +| epoch 3 | 999/ 2800 batches | train loss 0.4995550 +| epoch 3 | 1003/ 2800 batches | train loss 0.4946401 +| epoch 3 | 1007/ 2800 batches | train loss 0.5112972 +| epoch 3 | 1011/ 2800 batches | train loss 0.5003361 +| epoch 3 | 1015/ 2800 batches | train loss 0.4996992 +| epoch 3 | 1019/ 2800 batches | train loss 0.4365032 +| epoch 3 | 1023/ 2800 batches | train loss 0.4003093 +| epoch 3 | 1027/ 2800 batches | train loss 0.4201446 +| epoch 3 | 1031/ 2800 batches | train loss 0.4277446 +| epoch 3 | 1035/ 2800 batches | train loss 0.4834580 +| epoch 3 | 1039/ 2800 batches | train loss 0.4489563 +| epoch 3 | 1043/ 2800 batches | train loss 0.4953541 +| epoch 3 | 1047/ 2800 batches | train loss 0.3993167 +| epoch 3 | 1051/ 2800 batches | train loss 0.4809987 +| epoch 3 | 1055/ 2800 batches | train loss 0.4305111 +| epoch 3 | 1059/ 2800 batches | train loss 0.3846282 +| epoch 3 | 1063/ 2800 batches | train loss 0.4627314 +| epoch 3 | 1067/ 2800 batches | train loss 0.4499389 +| epoch 3 | 1071/ 2800 batches | train loss 0.4839808 +| epoch 3 | 1075/ 2800 batches | train loss 0.4170305 +| epoch 3 | 1079/ 2800 batches | train loss 0.4341896 +| epoch 3 | 1083/ 2800 batches | train loss 0.4591066 +| epoch 3 | 1087/ 2800 batches | train loss 0.3271855 +| epoch 3 | 1091/ 2800 batches | train loss 0.3867252 +| epoch 3 | 1095/ 2800 batches | train loss 0.4763819 +| epoch 3 | 1099/ 2800 batches | train loss 0.4663027 +| epoch 3 | 1103/ 2800 batches | train loss 0.4404639 +| epoch 3 | 1107/ 2800 batches | train loss 0.3794208 +| epoch 3 | 1111/ 2800 batches | train loss 0.4532183 +| epoch 3 | 1115/ 2800 batches | train loss 0.4762927 +| epoch 3 | 1119/ 2800 batches | train loss 0.3458180 +| epoch 3 | 1123/ 2800 batches | train loss 0.4623430 +| epoch 3 | 1127/ 2800 batches | train loss 0.4018216 +| epoch 3 | 1131/ 2800 batches | train loss 0.4035924 +| epoch 3 | 1135/ 2800 batches | train loss 0.4750564 +| epoch 3 | 1139/ 2800 batches | train loss 0.4485806 +| epoch 3 | 1143/ 2800 batches | train loss 0.5281061 +| epoch 3 | 1147/ 2800 batches | train loss 0.3893039 +| epoch 3 | 1151/ 2800 batches | train loss 0.3472007 +| epoch 3 | 1155/ 2800 batches | train loss 0.3703246 +| epoch 3 | 1159/ 2800 batches | train loss 0.4555724 +| epoch 3 | 1163/ 2800 batches | train loss 0.4351888 +| epoch 3 | 1167/ 2800 batches | train loss 0.4012532 +| epoch 3 | 1171/ 2800 batches | train loss 0.4391194 +| epoch 3 | 1175/ 2800 batches | train loss 0.5048035 +| epoch 3 | 1179/ 2800 batches | train loss 0.3945054 +| epoch 3 | 1183/ 2800 batches | train loss 0.4694770 +| epoch 3 | 1187/ 2800 batches | train loss 0.4602894 +| epoch 3 | 1191/ 2800 batches | train loss 0.4579124 +| epoch 3 | 1195/ 2800 batches | train loss 0.4212576 +| epoch 3 | 1199/ 2800 batches | train loss 0.4155319 +| epoch 3 | 1203/ 2800 batches | train loss 0.4448824 +| epoch 3 | 1207/ 2800 batches | train loss 0.4479846 +| epoch 3 | 1211/ 2800 batches | train loss 0.4549203 +| epoch 3 | 1215/ 2800 batches | train loss 0.4355088 +| epoch 3 | 1219/ 2800 batches | train loss 0.4792823 +| epoch 3 | 1223/ 2800 batches | train loss 0.4867627 +| epoch 3 | 1227/ 2800 batches | train loss 0.5112753 +| epoch 3 | 1231/ 2800 batches | train loss 0.5661674 +| epoch 3 | 1235/ 2800 batches | train loss 0.4190463 +| epoch 3 | 1239/ 2800 batches | train loss 0.4092581 +| epoch 3 | 1243/ 2800 batches | train loss 0.4580467 +| epoch 3 | 1247/ 2800 batches | train loss 0.4880299 +| epoch 3 | 1251/ 2800 batches | train loss 0.3703458 +| epoch 3 | 1255/ 2800 batches | train loss 0.3793252 +| epoch 3 | 1259/ 2800 batches | train loss 0.4775129 +| epoch 3 | 1263/ 2800 batches | train loss 0.4728179 +| epoch 3 | 1267/ 2800 batches | train loss 0.4107352 +| epoch 3 | 1271/ 2800 batches | train loss 0.4913082 +| epoch 3 | 1275/ 2800 batches | train loss 0.4640073 +| epoch 3 | 1279/ 2800 batches | train loss 0.3981546 +| epoch 3 | 1283/ 2800 batches | train loss 0.3089900 +| epoch 3 | 1287/ 2800 batches | train loss 0.4257742 +| epoch 3 | 1291/ 2800 batches | train loss 0.4180874 +| epoch 3 | 1295/ 2800 batches | train loss 0.4711261 +| epoch 3 | 1299/ 2800 batches | train loss 0.4072177 +| epoch 3 | 1303/ 2800 batches | train loss 0.4442051 +| epoch 3 | 1307/ 2800 batches | train loss 0.4284402 +| epoch 3 | 1311/ 2800 batches | train loss 0.4325392 +| epoch 3 | 1315/ 2800 batches | train loss 0.3971525 +| epoch 3 | 1319/ 2800 batches | train loss 0.4233544 +| epoch 3 | 1323/ 2800 batches | train loss 0.3408270 +| epoch 3 | 1327/ 2800 batches | train loss 0.5285891 +| epoch 3 | 1331/ 2800 batches | train loss 0.4210240 +| epoch 3 | 1335/ 2800 batches | train loss 0.4155412 +| epoch 3 | 1339/ 2800 batches | train loss 0.5164253 +| epoch 3 | 1343/ 2800 batches | train loss 0.4372522 +| epoch 3 | 1347/ 2800 batches | train loss 0.4012404 +| epoch 3 | 1351/ 2800 batches | train loss 0.4618519 +| epoch 3 | 1355/ 2800 batches | train loss 0.4164277 +| epoch 3 | 1359/ 2800 batches | train loss 0.3450423 +| epoch 3 | 1363/ 2800 batches | train loss 0.4008706 +| epoch 3 | 1367/ 2800 batches | train loss 0.3664019 +| epoch 3 | 1371/ 2800 batches | train loss 0.4488473 +| epoch 3 | 1375/ 2800 batches | train loss 0.4593292 +| epoch 3 | 1379/ 2800 batches | train loss 0.4906740 +| epoch 3 | 1383/ 2800 batches | train loss 0.4856032 +| epoch 3 | 1387/ 2800 batches | train loss 0.3767618 +| epoch 3 | 1391/ 2800 batches | train loss 0.3311666 +| epoch 3 | 1395/ 2800 batches | train loss 0.4863704 +| epoch 3 | 1399/ 2800 batches | train loss 0.3791789 +| epoch 3 | 1403/ 2800 batches | train loss 0.4953970 +| epoch 3 | 1407/ 2800 batches | train loss 0.4485709 +| epoch 3 | 1411/ 2800 batches | train loss 0.4330650 +| epoch 3 | 1415/ 2800 batches | train loss 0.4601515 +| epoch 3 | 1419/ 2800 batches | train loss 0.4343282 +| epoch 3 | 1423/ 2800 batches | train loss 0.4662114 +| epoch 3 | 1427/ 2800 batches | train loss 0.5722541 +| epoch 3 | 1431/ 2800 batches | train loss 0.3581424 +| epoch 3 | 1435/ 2800 batches | train loss 0.3661241 +| epoch 3 | 1439/ 2800 batches | train loss 0.5045397 +| epoch 3 | 1443/ 2800 batches | train loss 0.4618289 +| epoch 3 | 1447/ 2800 batches | train loss 0.3673139 +| epoch 3 | 1451/ 2800 batches | train loss 0.4828529 +| epoch 3 | 1455/ 2800 batches | train loss 0.5825658 +| epoch 3 | 1459/ 2800 batches | train loss 0.4293644 +| epoch 3 | 1463/ 2800 batches | train loss 0.4711225 +| epoch 3 | 1467/ 2800 batches | train loss 0.5659235 +| epoch 3 | 1471/ 2800 batches | train loss 0.4288470 +| epoch 3 | 1475/ 2800 batches | train loss 0.4193411 +| epoch 3 | 1479/ 2800 batches | train loss 0.4456294 +| epoch 3 | 1483/ 2800 batches | train loss 0.4076756 +| epoch 3 | 1487/ 2800 batches | train loss 0.4704668 +| epoch 3 | 1491/ 2800 batches | train loss 0.4670830 +| epoch 3 | 1495/ 2800 batches | train loss 0.3719957 +| epoch 3 | 1499/ 2800 batches | train loss 0.4424301 +| epoch 3 | 1503/ 2800 batches | train loss 0.4017543 +| epoch 3 | 1507/ 2800 batches | train loss 0.4692183 +| epoch 3 | 1511/ 2800 batches | train loss 0.4487245 +| epoch 3 | 1515/ 2800 batches | train loss 0.5006629 +| epoch 3 | 1519/ 2800 batches | train loss 0.4774241 +| epoch 3 | 1523/ 2800 batches | train loss 0.5251901 +| epoch 3 | 1527/ 2800 batches | train loss 0.4352694 +| epoch 3 | 1531/ 2800 batches | train loss 0.5237701 +| epoch 3 | 1535/ 2800 batches | train loss 0.5064568 +| epoch 3 | 1539/ 2800 batches | train loss 0.4203318 +| epoch 3 | 1543/ 2800 batches | train loss 0.3468820 +| epoch 3 | 1547/ 2800 batches | train loss 0.4306944 +| epoch 3 | 1551/ 2800 batches | train loss 0.5351468 +| epoch 3 | 1555/ 2800 batches | train loss 0.4620930 +| epoch 3 | 1559/ 2800 batches | train loss 0.4401935 +| epoch 3 | 1563/ 2800 batches | train loss 0.4352355 +| epoch 3 | 1567/ 2800 batches | train loss 0.4150593 +| epoch 3 | 1571/ 2800 batches | train loss 0.4659969 +| epoch 3 | 1575/ 2800 batches | train loss 0.5181659 +| epoch 3 | 1579/ 2800 batches | train loss 0.4676389 +| epoch 3 | 1583/ 2800 batches | train loss 0.5131367 +| epoch 3 | 1587/ 2800 batches | train loss 0.5005234 +| epoch 3 | 1591/ 2800 batches | train loss 0.4657559 +| epoch 3 | 1595/ 2800 batches | train loss 0.3980829 +| epoch 3 | 1599/ 2800 batches | train loss 0.3832717 +| epoch 3 | 1603/ 2800 batches | train loss 0.5092852 +| epoch 3 | 1607/ 2800 batches | train loss 0.4007024 +| epoch 3 | 1611/ 2800 batches | train loss 0.4471258 +| epoch 3 | 1615/ 2800 batches | train loss 0.3720877 +| epoch 3 | 1619/ 2800 batches | train loss 0.4410505 +| epoch 3 | 1623/ 2800 batches | train loss 0.4716371 +| epoch 3 | 1627/ 2800 batches | train loss 0.4515590 +| epoch 3 | 1631/ 2800 batches | train loss 0.4588687 +| epoch 3 | 1635/ 2800 batches | train loss 0.5236397 +| epoch 3 | 1639/ 2800 batches | train loss 0.4810125 +| epoch 3 | 1643/ 2800 batches | train loss 0.4441538 +| epoch 3 | 1647/ 2800 batches | train loss 0.4079045 +| epoch 3 | 1651/ 2800 batches | train loss 0.5027336 +| epoch 3 | 1655/ 2800 batches | train loss 0.4295031 +| epoch 3 | 1659/ 2800 batches | train loss 0.3998654 +| epoch 3 | 1663/ 2800 batches | train loss 0.4076186 +| epoch 3 | 1667/ 2800 batches | train loss 0.4162447 +| epoch 3 | 1671/ 2800 batches | train loss 0.4115177 +| epoch 3 | 1675/ 2800 batches | train loss 0.4057838 +| epoch 3 | 1679/ 2800 batches | train loss 0.4164329 +| epoch 3 | 1683/ 2800 batches | train loss 0.4466978 +| epoch 3 | 1687/ 2800 batches | train loss 0.4408664 +| epoch 3 | 1691/ 2800 batches | train loss 0.4157184 +| epoch 3 | 1695/ 2800 batches | train loss 0.3842164 +| epoch 3 | 1699/ 2800 batches | train loss 0.5401734 +| epoch 3 | 1703/ 2800 batches | train loss 0.4111449 +| epoch 3 | 1707/ 2800 batches | train loss 0.4297181 +| epoch 3 | 1711/ 2800 batches | train loss 0.4526067 +| epoch 3 | 1715/ 2800 batches | train loss 0.5190195 +| epoch 3 | 1719/ 2800 batches | train loss 0.4313974 +| epoch 3 | 1723/ 2800 batches | train loss 0.3993708 +| epoch 3 | 1727/ 2800 batches | train loss 0.4313502 +| epoch 3 | 1731/ 2800 batches | train loss 0.4519168 +| epoch 3 | 1735/ 2800 batches | train loss 0.4605664 +| epoch 3 | 1739/ 2800 batches | train loss 0.5130868 +| epoch 3 | 1743/ 2800 batches | train loss 0.5279102 +| epoch 3 | 1747/ 2800 batches | train loss 0.4812671 +| epoch 3 | 1751/ 2800 batches | train loss 0.5212623 +| epoch 3 | 1755/ 2800 batches | train loss 0.4053501 +| epoch 3 | 1759/ 2800 batches | train loss 0.4952128 +| epoch 3 | 1763/ 2800 batches | train loss 0.4260138 +| epoch 3 | 1767/ 2800 batches | train loss 0.4209146 +| epoch 3 | 1771/ 2800 batches | train loss 0.4078167 +| epoch 3 | 1775/ 2800 batches | train loss 0.5404804 +| epoch 3 | 1779/ 2800 batches | train loss 0.4680149 +| epoch 3 | 1783/ 2800 batches | train loss 0.4147279 +| epoch 3 | 1787/ 2800 batches | train loss 0.4811757 +| epoch 3 | 1791/ 2800 batches | train loss 0.4368815 +| epoch 3 | 1795/ 2800 batches | train loss 0.4379465 +| epoch 3 | 1799/ 2800 batches | train loss 0.4881055 +| epoch 3 | 1803/ 2800 batches | train loss 0.3663052 +| epoch 3 | 1807/ 2800 batches | train loss 0.4244404 +| epoch 3 | 1811/ 2800 batches | train loss 0.4766447 +| epoch 3 | 1815/ 2800 batches | train loss 0.4562581 +| epoch 3 | 1819/ 2800 batches | train loss 0.4258052 +| epoch 3 | 1823/ 2800 batches | train loss 0.5116553 +| epoch 3 | 1827/ 2800 batches | train loss 0.4285977 +| epoch 3 | 1831/ 2800 batches | train loss 0.4001860 +| epoch 3 | 1835/ 2800 batches | train loss 0.4297181 +| epoch 3 | 1839/ 2800 batches | train loss 0.4287972 +| epoch 3 | 1843/ 2800 batches | train loss 0.3710985 +| epoch 3 | 1847/ 2800 batches | train loss 0.4664956 +| epoch 3 | 1851/ 2800 batches | train loss 0.4855645 +| epoch 3 | 1855/ 2800 batches | train loss 0.4397956 +| epoch 3 | 1859/ 2800 batches | train loss 0.3980333 +| epoch 3 | 1863/ 2800 batches | train loss 0.1783833 +| epoch 3 | 1867/ 2800 batches | train loss 0.4053388 +| epoch 3 | 1871/ 2800 batches | train loss 0.5083970 +| epoch 3 | 1875/ 2800 batches | train loss 0.3898907 +| epoch 3 | 1879/ 2800 batches | train loss 0.4604982 +| epoch 3 | 1883/ 2800 batches | train loss 0.4351043 +| epoch 3 | 1887/ 2800 batches | train loss 0.4490936 +| epoch 3 | 1891/ 2800 batches | train loss 0.4159199 +| epoch 3 | 1895/ 2800 batches | train loss 0.4879612 +| epoch 3 | 1899/ 2800 batches | train loss 0.1932664 +| epoch 3 | 1903/ 2800 batches | train loss 0.4085219 +| epoch 3 | 1907/ 2800 batches | train loss 0.3768956 +| epoch 3 | 1911/ 2800 batches | train loss 0.3797319 +| epoch 3 | 1915/ 2800 batches | train loss 0.5430334 +| epoch 3 | 1919/ 2800 batches | train loss 0.4182483 +| epoch 3 | 1923/ 2800 batches | train loss 0.4390912 +| epoch 3 | 1927/ 2800 batches | train loss 0.6054895 +| epoch 3 | 1931/ 2800 batches | train loss 0.4047621 +| epoch 3 | 1935/ 2800 batches | train loss 0.4779730 +| epoch 3 | 1939/ 2800 batches | train loss 0.5534332 +| epoch 3 | 1943/ 2800 batches | train loss 0.4319287 +| epoch 3 | 1947/ 2800 batches | train loss 0.4599417 +| epoch 3 | 1951/ 2800 batches | train loss 0.4846015 +| epoch 3 | 1955/ 2800 batches | train loss 0.4969569 +| epoch 3 | 1959/ 2800 batches | train loss 0.4755745 +| epoch 3 | 1963/ 2800 batches | train loss 0.3376969 +| epoch 3 | 1967/ 2800 batches | train loss 0.3510215 +| epoch 3 | 1971/ 2800 batches | train loss 0.3962554 +| epoch 3 | 1975/ 2800 batches | train loss 0.4114545 +| epoch 3 | 1979/ 2800 batches | train loss 0.4525261 +| epoch 3 | 1983/ 2800 batches | train loss 0.4389329 +| epoch 3 | 1987/ 2800 batches | train loss 0.5263887 +| epoch 3 | 1991/ 2800 batches | train loss 0.3572226 +| epoch 3 | 1995/ 2800 batches | train loss 0.4090117 +| epoch 3 | 1999/ 2800 batches | train loss 0.4763520 +| epoch 3 | 2003/ 2800 batches | train loss 0.3794320 +| epoch 3 | 2007/ 2800 batches | train loss 0.4582764 +| epoch 3 | 2011/ 2800 batches | train loss 0.4320628 +| epoch 3 | 2015/ 2800 batches | train loss 0.4306648 +| epoch 3 | 2019/ 2800 batches | train loss 0.4524844 +| epoch 3 | 2023/ 2800 batches | train loss 0.4923115 +| epoch 3 | 2027/ 2800 batches | train loss 0.4639493 +| epoch 3 | 2031/ 2800 batches | train loss 0.5149682 +| epoch 3 | 2035/ 2800 batches | train loss 0.4462768 +| epoch 3 | 2039/ 2800 batches | train loss 0.4485348 +| epoch 3 | 2043/ 2800 batches | train loss 0.3534113 +| epoch 3 | 2047/ 2800 batches | train loss 0.5497655 +| epoch 3 | 2051/ 2800 batches | train loss 0.4970599 +| epoch 3 | 2055/ 2800 batches | train loss 0.3983309 +| epoch 3 | 2059/ 2800 batches | train loss 0.6011239 +| epoch 3 | 2063/ 2800 batches | train loss 0.4717138 +| epoch 3 | 2067/ 2800 batches | train loss 0.4284772 +| epoch 3 | 2071/ 2800 batches | train loss 0.4841809 +| epoch 3 | 2075/ 2800 batches | train loss 0.3257567 +| epoch 3 | 2079/ 2800 batches | train loss 0.4794306 +| epoch 3 | 2083/ 2800 batches | train loss 0.3698191 +| epoch 3 | 2087/ 2800 batches | train loss 0.4371525 +| epoch 3 | 2091/ 2800 batches | train loss 0.4745820 +| epoch 3 | 2095/ 2800 batches | train loss 0.3591136 +| epoch 3 | 2099/ 2800 batches | train loss 0.4679007 +| epoch 3 | 2103/ 2800 batches | train loss 0.4899907 +| epoch 3 | 2107/ 2800 batches | train loss 0.3622096 +| epoch 3 | 2111/ 2800 batches | train loss 0.3870106 +| epoch 3 | 2115/ 2800 batches | train loss 0.4115575 +| epoch 3 | 2119/ 2800 batches | train loss 0.3921303 +| epoch 3 | 2123/ 2800 batches | train loss 0.4937803 +| epoch 3 | 2127/ 2800 batches | train loss 0.4316483 +| epoch 3 | 2131/ 2800 batches | train loss 0.4161436 +| epoch 3 | 2135/ 2800 batches | train loss 0.3936597 +| epoch 3 | 2139/ 2800 batches | train loss 0.4374461 +| epoch 3 | 2143/ 2800 batches | train loss 0.3932249 +| epoch 3 | 2147/ 2800 batches | train loss 0.3752041 +| epoch 3 | 2151/ 2800 batches | train loss 0.4586848 +| epoch 3 | 2155/ 2800 batches | train loss 0.4262404 +| epoch 3 | 2159/ 2800 batches | train loss 0.4613714 +| epoch 3 | 2163/ 2800 batches | train loss 0.4624256 +| epoch 3 | 2167/ 2800 batches | train loss 0.4853269 +| epoch 3 | 2171/ 2800 batches | train loss 0.3797055 +| epoch 3 | 2175/ 2800 batches | train loss 0.4820258 +| epoch 3 | 2179/ 2800 batches | train loss 0.4687307 +| epoch 3 | 2183/ 2800 batches | train loss 0.4682101 +| epoch 3 | 2187/ 2800 batches | train loss 0.4083034 +| epoch 3 | 2191/ 2800 batches | train loss 0.4328706 +| epoch 3 | 2195/ 2800 batches | train loss 0.3729290 +| epoch 3 | 2199/ 2800 batches | train loss 0.4573902 +| epoch 3 | 2203/ 2800 batches | train loss 0.4906523 +| epoch 3 | 2207/ 2800 batches | train loss 0.5217566 +| epoch 3 | 2211/ 2800 batches | train loss 0.5025211 +| epoch 3 | 2215/ 2800 batches | train loss 0.5090883 +| epoch 3 | 2219/ 2800 batches | train loss 0.4262365 +| epoch 3 | 2223/ 2800 batches | train loss 0.4947374 +| epoch 3 | 2227/ 2800 batches | train loss 0.4790908 +| epoch 3 | 2231/ 2800 batches | train loss 0.3661986 +| epoch 3 | 2235/ 2800 batches | train loss 0.4557744 +| epoch 3 | 2239/ 2800 batches | train loss 0.1903262 +| epoch 3 | 2243/ 2800 batches | train loss 0.4309824 +| epoch 3 | 2247/ 2800 batches | train loss 0.3073032 +| epoch 3 | 2251/ 2800 batches | train loss 0.4504591 +| epoch 3 | 2255/ 2800 batches | train loss 0.4236307 +| epoch 3 | 2259/ 2800 batches | train loss 0.3504271 +| epoch 3 | 2263/ 2800 batches | train loss 0.4047191 +| epoch 3 | 2267/ 2800 batches | train loss 0.3798292 +| epoch 3 | 2271/ 2800 batches | train loss 0.4313149 +| epoch 3 | 2275/ 2800 batches | train loss 0.3939037 +| epoch 3 | 2279/ 2800 batches | train loss 0.4390127 +| epoch 3 | 2283/ 2800 batches | train loss 0.3556743 +| epoch 3 | 2287/ 2800 batches | train loss 0.4380934 +| epoch 3 | 2291/ 2800 batches | train loss 0.4192737 +| epoch 3 | 2295/ 2800 batches | train loss 0.4458361 +| epoch 3 | 2299/ 2800 batches | train loss 0.4549782 +| epoch 3 | 2303/ 2800 batches | train loss 0.5187408 +| epoch 3 | 2307/ 2800 batches | train loss 0.3989824 +| epoch 3 | 2311/ 2800 batches | train loss 0.3252929 +| epoch 3 | 2315/ 2800 batches | train loss 0.3190875 +| epoch 3 | 2319/ 2800 batches | train loss 0.3753228 +| epoch 3 | 2323/ 2800 batches | train loss 0.4783198 +| epoch 3 | 2327/ 2800 batches | train loss 0.4311435 +| epoch 3 | 2331/ 2800 batches | train loss 0.3904635 +| epoch 3 | 2335/ 2800 batches | train loss 0.4636423 +| epoch 3 | 2339/ 2800 batches | train loss 0.4098824 +| epoch 3 | 2343/ 2800 batches | train loss 0.5287606 +| epoch 3 | 2347/ 2800 batches | train loss 0.5682890 +| epoch 3 | 2351/ 2800 batches | train loss 0.4275708 +| epoch 3 | 2355/ 2800 batches | train loss 0.5099807 +| epoch 3 | 2359/ 2800 batches | train loss 0.4279356 +| epoch 3 | 2363/ 2800 batches | train loss 0.3592441 +| epoch 3 | 2367/ 2800 batches | train loss 0.3542234 +| epoch 3 | 2371/ 2800 batches | train loss 0.4092791 +| epoch 3 | 2375/ 2800 batches | train loss 0.4049574 +| epoch 3 | 2379/ 2800 batches | train loss 0.4990605 +| epoch 3 | 2383/ 2800 batches | train loss 0.3396130 +| epoch 3 | 2387/ 2800 batches | train loss 0.4266641 +| epoch 3 | 2391/ 2800 batches | train loss 0.3848806 +| epoch 3 | 2395/ 2800 batches | train loss 0.5052772 +| epoch 3 | 2399/ 2800 batches | train loss 0.4080613 +| epoch 3 | 2403/ 2800 batches | train loss 0.5029035 +| epoch 3 | 2407/ 2800 batches | train loss 0.4479403 +| epoch 3 | 2411/ 2800 batches | train loss 0.5025204 +| epoch 3 | 2415/ 2800 batches | train loss 0.3412725 +| epoch 3 | 2419/ 2800 batches | train loss 0.5018252 +| epoch 3 | 2423/ 2800 batches | train loss 0.4172713 +| epoch 3 | 2427/ 2800 batches | train loss 0.5894967 +| epoch 3 | 2431/ 2800 batches | train loss 0.4401578 +| epoch 3 | 2435/ 2800 batches | train loss 0.3787211 +| epoch 3 | 2439/ 2800 batches | train loss 0.4941114 +| epoch 3 | 2443/ 2800 batches | train loss 0.4446633 +| epoch 3 | 2447/ 2800 batches | train loss 0.4848042 +| epoch 3 | 2451/ 2800 batches | train loss 0.5229849 +| epoch 3 | 2455/ 2800 batches | train loss 0.4291602 +| epoch 3 | 2459/ 2800 batches | train loss 0.3792064 +| epoch 3 | 2463/ 2800 batches | train loss 0.2935204 +| epoch 3 | 2467/ 2800 batches | train loss 0.4591012 +| epoch 3 | 2471/ 2800 batches | train loss 0.3773724 +| epoch 3 | 2475/ 2800 batches | train loss 0.5081263 +| epoch 3 | 2479/ 2800 batches | train loss 0.4862643 +| epoch 3 | 2483/ 2800 batches | train loss 0.3904355 +| epoch 3 | 2487/ 2800 batches | train loss 0.4973785 +| epoch 3 | 2491/ 2800 batches | train loss 0.3693527 +| epoch 3 | 2495/ 2800 batches | train loss 0.4325075 +| epoch 3 | 2499/ 2800 batches | train loss 0.4617579 +| epoch 3 | 2503/ 2800 batches | train loss 0.4303977 +| epoch 3 | 2507/ 2800 batches | train loss 0.4983634 +| epoch 3 | 2511/ 2800 batches | train loss 0.5048674 +| epoch 3 | 2515/ 2800 batches | train loss 0.3712938 +| epoch 3 | 2519/ 2800 batches | train loss 0.4879987 +| epoch 3 | 2523/ 2800 batches | train loss 0.3939896 +| epoch 3 | 2527/ 2800 batches | train loss 0.5178457 +| epoch 3 | 2531/ 2800 batches | train loss 0.4114374 +| epoch 3 | 2535/ 2800 batches | train loss 0.4067439 +| epoch 3 | 2539/ 2800 batches | train loss 0.5285606 +| epoch 3 | 2543/ 2800 batches | train loss 0.4251925 +| epoch 3 | 2547/ 2800 batches | train loss 0.4738443 +| epoch 3 | 2551/ 2800 batches | train loss 0.3757596 +| epoch 3 | 2555/ 2800 batches | train loss 0.5075553 +| epoch 3 | 2559/ 2800 batches | train loss 0.4905689 +| epoch 3 | 2563/ 2800 batches | train loss 0.3778695 +| epoch 3 | 2567/ 2800 batches | train loss 0.4799541 +| epoch 3 | 2571/ 2800 batches | train loss 0.4899490 +| epoch 3 | 2575/ 2800 batches | train loss 0.4734615 +| epoch 3 | 2579/ 2800 batches | train loss 0.4425048 +| epoch 3 | 2583/ 2800 batches | train loss 0.5221907 +| epoch 3 | 2587/ 2800 batches | train loss 0.4418169 +| epoch 3 | 2591/ 2800 batches | train loss 0.4981845 +| epoch 3 | 2595/ 2800 batches | train loss 0.4895714 +| epoch 3 | 2599/ 2800 batches | train loss 0.4692281 +| epoch 3 | 2603/ 2800 batches | train loss 0.4925404 +| epoch 3 | 2607/ 2800 batches | train loss 0.3452806 +| epoch 3 | 2611/ 2800 batches | train loss 0.4405537 +| epoch 3 | 2615/ 2800 batches | train loss 0.5483592 +| epoch 3 | 2619/ 2800 batches | train loss 0.4386246 +| epoch 3 | 2623/ 2800 batches | train loss 0.4317822 +| epoch 3 | 2627/ 2800 batches | train loss 0.4436204 +| epoch 3 | 2631/ 2800 batches | train loss 0.4784618 +| epoch 3 | 2635/ 2800 batches | train loss 0.5296594 +| epoch 3 | 2639/ 2800 batches | train loss 0.4088664 +| epoch 3 | 2643/ 2800 batches | train loss 0.4104355 +| epoch 3 | 2647/ 2800 batches | train loss 0.4105899 +| epoch 3 | 2651/ 2800 batches | train loss 0.4139388 +| epoch 3 | 2655/ 2800 batches | train loss 0.4848329 +| epoch 3 | 2659/ 2800 batches | train loss 0.4993533 +| epoch 3 | 2663/ 2800 batches | train loss 0.4745022 +| epoch 3 | 2667/ 2800 batches | train loss 0.5389447 +| epoch 3 | 2671/ 2800 batches | train loss 0.3697710 +| epoch 3 | 2675/ 2800 batches | train loss 0.5096992 +| epoch 3 | 2679/ 2800 batches | train loss 0.4310651 +| epoch 3 | 2683/ 2800 batches | train loss 0.5210042 +| epoch 3 | 2687/ 2800 batches | train loss 0.4672098 +| epoch 3 | 2691/ 2800 batches | train loss 0.4241959 +| epoch 3 | 2695/ 2800 batches | train loss 0.3255473 +| epoch 3 | 2699/ 2800 batches | train loss 0.3637179 +| epoch 3 | 2703/ 2800 batches | train loss 0.3891156 +| epoch 3 | 2707/ 2800 batches | train loss 0.4501317 +| epoch 3 | 2711/ 2800 batches | train loss 0.4289080 +| epoch 3 | 2715/ 2800 batches | train loss 0.4227586 +| epoch 3 | 2719/ 2800 batches | train loss 0.4637509 +| epoch 3 | 2723/ 2800 batches | train loss 0.4374717 +| epoch 3 | 2727/ 2800 batches | train loss 0.4422187 +| epoch 3 | 2731/ 2800 batches | train loss 0.4640759 +| epoch 3 | 2735/ 2800 batches | train loss 0.5864416 +| epoch 3 | 2739/ 2800 batches | train loss 0.4081598 +| epoch 3 | 2743/ 2800 batches | train loss 0.4759772 +| epoch 3 | 2747/ 2800 batches | train loss 0.4768685 +| epoch 3 | 2751/ 2800 batches | train loss 0.3816802 +| epoch 3 | 2755/ 2800 batches | train loss 0.5217354 +| epoch 3 | 2759/ 2800 batches | train loss 0.4434056 +| epoch 3 | 2763/ 2800 batches | train loss 0.3585429 +| epoch 3 | 2767/ 2800 batches | train loss 0.3634012 +| epoch 3 | 2771/ 2800 batches | train loss 0.4480574 +| epoch 3 | 2775/ 2800 batches | train loss 0.5145907 +| epoch 3 | 2779/ 2800 batches | train loss 0.4181406 +| epoch 3 | 2783/ 2800 batches | train loss 0.5475377 +| epoch 3 | 2787/ 2800 batches | train loss 0.4409027 +| epoch 3 | 2791/ 2800 batches | train loss 0.4267612 +| epoch 3 | 2795/ 2800 batches | train loss 0.4132286 +| epoch 3 | 2799/ 2800 batches | train loss 0.4370237 +-------------------------------------------------------------------------------- +| epoch 3 | 3/ 2800 batches | test loss 0.5169294 +| epoch 3 | 7/ 2800 batches | test loss 0.4649446 +| epoch 3 | 11/ 2800 batches | test loss 0.3096460 +| epoch 3 | 15/ 2800 batches | test loss 0.3616727 +| epoch 3 | 19/ 2800 batches | test loss 0.4171140 +| epoch 3 | 23/ 2800 batches | test loss 0.4728486 +| epoch 3 | 27/ 2800 batches | test loss 0.4712887 +| epoch 3 | 31/ 2800 batches | test loss 0.3581490 +| epoch 3 | 35/ 2800 batches | test loss 0.3959392 +| epoch 3 | 39/ 2800 batches | test loss 0.4401752 +| epoch 3 | 43/ 2800 batches | test loss 0.4075714 +| epoch 3 | 47/ 2800 batches | test loss 0.4207710 +| epoch 3 | 51/ 2800 batches | test loss 0.4357671 +| epoch 3 | 55/ 2800 batches | test loss 0.3749139 +| epoch 3 | 59/ 2800 batches | test loss 0.4256636 +| epoch 3 | 63/ 2800 batches | test loss 0.4568670 +| epoch 3 | 67/ 2800 batches | test loss 0.4740769 +| epoch 3 | 71/ 2800 batches | test loss 0.4399251 +| epoch 3 | 75/ 2800 batches | test loss 0.5112203 +| epoch 3 | 79/ 2800 batches | test loss 0.4166378 +| epoch 3 | 83/ 2800 batches | test loss 0.4412371 +| epoch 3 | 87/ 2800 batches | test loss 0.4547672 +| epoch 3 | 91/ 2800 batches | test loss 0.4427016 +| epoch 3 | 95/ 2800 batches | test loss 0.4831227 +| epoch 3 | 99/ 2800 batches | test loss 0.4453110 +| epoch 3 | 103/ 2800 batches | test loss 0.4287367 +| epoch 3 | 107/ 2800 batches | test loss 0.4973518 +| epoch 3 | 111/ 2800 batches | test loss 0.5112407 +| epoch 3 | 115/ 2800 batches | test loss 0.4211007 +| epoch 3 | 119/ 2800 batches | test loss 0.4638974 +| epoch 3 | 123/ 2800 batches | test loss 0.4179459 +| epoch 3 | 127/ 2800 batches | test loss 0.4447578 +| epoch 3 | 131/ 2800 batches | test loss 0.4339584 +| epoch 3 | 135/ 2800 batches | test loss 0.4158952 +| epoch 3 | 139/ 2800 batches | test loss 0.3937335 +| epoch 3 | 143/ 2800 batches | test loss 0.5906985 +| epoch 3 | 147/ 2800 batches | test loss 0.4140146 +| epoch 3 | 151/ 2800 batches | test loss 0.4428812 +| epoch 3 | 155/ 2800 batches | test loss 0.4460838 +| epoch 3 | 159/ 2800 batches | test loss 0.4919471 +| epoch 3 | 163/ 2800 batches | test loss 0.3577073 +| epoch 3 | 167/ 2800 batches | test loss 0.3637984 +| epoch 3 | 171/ 2800 batches | test loss 0.4184693 +| epoch 3 | 175/ 2800 batches | test loss 0.5069515 +| epoch 3 | 179/ 2800 batches | test loss 0.5131004 +| epoch 3 | 183/ 2800 batches | test loss 0.4760957 +| epoch 3 | 187/ 2800 batches | test loss 0.4575951 +| epoch 3 | 191/ 2800 batches | test loss 0.3995699 +| epoch 3 | 195/ 2800 batches | test loss 0.4278991 +| epoch 3 | 199/ 2800 batches | test loss 0.4410270 +| epoch 3 | 203/ 2800 batches | test loss 0.4342015 +| epoch 3 | 207/ 2800 batches | test loss 0.4908076 +| epoch 3 | 211/ 2800 batches | test loss 0.5124588 +| epoch 3 | 215/ 2800 batches | test loss 0.4693818 +| epoch 3 | 219/ 2800 batches | test loss 0.4748710 +| epoch 3 | 223/ 2800 batches | test loss 0.4731482 +| epoch 3 | 227/ 2800 batches | test loss 0.3959123 +| epoch 3 | 231/ 2800 batches | test loss 0.4637720 +| epoch 3 | 235/ 2800 batches | test loss 0.4666740 +| epoch 3 | 239/ 2800 batches | test loss 0.4620340 +| epoch 3 | 243/ 2800 batches | test loss 0.3839295 +| epoch 3 | 247/ 2800 batches | test loss 0.3667917 +| epoch 3 | 251/ 2800 batches | test loss 0.4571404 +| epoch 3 | 255/ 2800 batches | test loss 0.4618052 +| epoch 3 | 259/ 2800 batches | test loss 0.2822135 +| epoch 3 | 263/ 2800 batches | test loss 0.3238862 +| epoch 3 | 267/ 2800 batches | test loss 0.4335778 +| epoch 3 | 271/ 2800 batches | test loss 0.4306282 +| epoch 3 | 275/ 2800 batches | test loss 0.4159773 +| epoch 3 | 279/ 2800 batches | test loss 0.4815519 +| epoch 3 | 283/ 2800 batches | test loss 0.4840892 +| epoch 3 | 287/ 2800 batches | test loss 0.4720685 +| epoch 3 | 291/ 2800 batches | test loss 0.4504758 +| epoch 3 | 295/ 2800 batches | test loss 0.4659166 +| epoch 3 | 299/ 2800 batches | test loss 0.4160449 +| epoch 3 | 303/ 2800 batches | test loss 0.5359734 +| epoch 3 | 307/ 2800 batches | test loss 0.4112599 +| epoch 3 | 311/ 2800 batches | test loss 0.3823540 +| epoch 3 | 315/ 2800 batches | test loss 0.4600146 +| epoch 3 | 319/ 2800 batches | test loss 0.3871272 +| epoch 3 | 323/ 2800 batches | test loss 0.5248725 +| epoch 3 | 327/ 2800 batches | test loss 0.5102069 +| epoch 3 | 331/ 2800 batches | test loss 0.6003343 +| epoch 3 | 335/ 2800 batches | test loss 0.4478969 +| epoch 3 | 339/ 2800 batches | test loss 0.4217449 +| epoch 3 | 343/ 2800 batches | test loss 0.4101008 +| epoch 3 | 347/ 2800 batches | test loss 0.4841971 +| epoch 3 | 351/ 2800 batches | test loss 0.4488678 +| epoch 3 | 355/ 2800 batches | test loss 0.4785885 +| epoch 3 | 359/ 2800 batches | test loss 0.4211367 +| epoch 3 | 363/ 2800 batches | test loss 0.4956213 +| epoch 3 | 367/ 2800 batches | test loss 0.4958122 +| epoch 3 | 371/ 2800 batches | test loss 0.4001355 +| epoch 3 | 375/ 2800 batches | test loss 0.4044694 +| epoch 3 | 379/ 2800 batches | test loss 0.4324628 +| epoch 3 | 383/ 2800 batches | test loss 0.4206187 +| epoch 3 | 387/ 2800 batches | test loss 0.4930736 +| epoch 3 | 391/ 2800 batches | test loss 0.3653297 +| epoch 3 | 395/ 2800 batches | test loss 0.5361236 +| epoch 3 | 399/ 2800 batches | test loss 0.4971868 +| epoch 3 | 403/ 2800 batches | test loss 0.4366732 +| epoch 3 | 407/ 2800 batches | test loss 0.4814156 +| epoch 3 | 411/ 2800 batches | test loss 0.4889894 +| epoch 3 | 415/ 2800 batches | test loss 0.4555815 +| epoch 3 | 419/ 2800 batches | test loss 0.4373989 +| epoch 3 | 423/ 2800 batches | test loss 0.4023121 +| epoch 3 | 427/ 2800 batches | test loss 0.4613273 +| epoch 3 | 431/ 2800 batches | test loss 0.4841399 +| epoch 3 | 435/ 2800 batches | test loss 0.4198020 +| epoch 3 | 439/ 2800 batches | test loss 0.3894035 +| epoch 3 | 443/ 2800 batches | test loss 0.4693322 +| epoch 3 | 447/ 2800 batches | test loss 0.5657884 +| epoch 3 | 451/ 2800 batches | test loss 0.4395626 +| epoch 3 | 455/ 2800 batches | test loss 0.4845806 +| epoch 3 | 459/ 2800 batches | test loss 0.4206587 +| epoch 3 | 463/ 2800 batches | test loss 0.4637471 +| epoch 3 | 467/ 2800 batches | test loss 0.4723493 +| epoch 3 | 471/ 2800 batches | test loss 0.5205645 +| epoch 3 | 475/ 2800 batches | test loss 0.5181330 +| epoch 3 | 479/ 2800 batches | test loss 0.4789375 +| epoch 3 | 483/ 2800 batches | test loss 0.4394146 +| epoch 3 | 487/ 2800 batches | test loss 0.4081091 +| epoch 3 | 491/ 2800 batches | test loss 0.4175997 +| epoch 3 | 495/ 2800 batches | test loss 0.3678926 +| epoch 3 | 499/ 2800 batches | test loss 0.4500932 +| epoch 3 | 503/ 2800 batches | test loss 0.3449983 +| epoch 3 | 507/ 2800 batches | test loss 0.4327165 +| epoch 3 | 511/ 2800 batches | test loss 0.4225624 +| epoch 3 | 515/ 2800 batches | test loss 0.4328034 +| epoch 3 | 519/ 2800 batches | test loss 0.4981858 +| epoch 3 | 523/ 2800 batches | test loss 0.3933508 +| epoch 3 | 527/ 2800 batches | test loss 0.4743059 +| epoch 3 | 531/ 2800 batches | test loss 0.3946448 +| epoch 3 | 535/ 2800 batches | test loss 0.6974591 +| epoch 3 | 539/ 2800 batches | test loss 0.3301187 +| epoch 3 | 543/ 2800 batches | test loss 0.4017628 +| epoch 3 | 547/ 2800 batches | test loss 0.4943102 +| epoch 3 | 551/ 2800 batches | test loss 0.4447594 +| epoch 3 | 555/ 2800 batches | test loss 0.4208943 +| epoch 3 | 559/ 2800 batches | test loss 0.3788597 +| epoch 3 | 563/ 2800 batches | test loss 0.4612139 +| epoch 3 | 567/ 2800 batches | test loss 0.4234188 +| epoch 3 | 571/ 2800 batches | test loss 0.3796824 +| epoch 3 | 575/ 2800 batches | test loss 0.3527602 +| epoch 3 | 579/ 2800 batches | test loss 0.4269416 +| epoch 3 | 583/ 2800 batches | test loss 0.4763232 +| epoch 3 | 587/ 2800 batches | test loss 0.4184099 +| epoch 3 | 591/ 2800 batches | test loss 0.4152759 +| epoch 3 | 595/ 2800 batches | test loss 0.4358627 +| epoch 3 | 599/ 2800 batches | test loss 0.4697216 +| epoch 3 | 603/ 2800 batches | test loss 0.4441870 +| epoch 3 | 607/ 2800 batches | test loss 0.3047507 +| epoch 3 | 611/ 2800 batches | test loss 0.5499826 +| epoch 3 | 615/ 2800 batches | test loss 0.4607521 +| epoch 3 | 619/ 2800 batches | test loss 0.4640993 +| epoch 3 | 623/ 2800 batches | test loss 0.4345995 +| epoch 3 | 627/ 2800 batches | test loss 0.3755267 +| epoch 3 | 631/ 2800 batches | test loss 0.4217219 +| epoch 3 | 635/ 2800 batches | test loss 0.4837005 +| epoch 3 | 639/ 2800 batches | test loss 0.4485481 +| epoch 3 | 643/ 2800 batches | test loss 0.4976286 +| epoch 3 | 647/ 2800 batches | test loss 0.5670731 +| epoch 3 | 651/ 2800 batches | test loss 0.4356546 +| epoch 3 | 655/ 2800 batches | test loss 0.4922785 +| epoch 3 | 659/ 2800 batches | test loss 0.4291468 +| epoch 3 | 663/ 2800 batches | test loss 0.3780039 +| epoch 3 | 667/ 2800 batches | test loss 0.4065132 +| epoch 3 | 671/ 2800 batches | test loss 0.2937895 +| epoch 3 | 675/ 2800 batches | test loss 0.5334076 +| epoch 3 | 679/ 2800 batches | test loss 0.5026523 +| epoch 3 | 683/ 2800 batches | test loss 0.4999399 +| epoch 3 | 687/ 2800 batches | test loss 0.5323702 +| epoch 3 | 691/ 2800 batches | test loss 0.4355992 +| epoch 3 | 695/ 2800 batches | test loss 0.4033564 +| epoch 3 | 699/ 2800 batches | test loss 0.4595608 +| epoch 3 | final test loss 0.4486, save model! +-------------------------------------------------------------------------------- +| epoch 4 | 3/ 2800 batches | train loss 0.3807892 +| epoch 4 | 7/ 2800 batches | train loss 0.4327366 +| epoch 4 | 11/ 2800 batches | train loss 0.3933196 +| epoch 4 | 15/ 2800 batches | train loss 0.2882172 +| epoch 4 | 19/ 2800 batches | train loss 0.4341776 +| epoch 4 | 23/ 2800 batches | train loss 0.4718451 +| epoch 4 | 27/ 2800 batches | train loss 0.4379196 +| epoch 4 | 31/ 2800 batches | train loss 0.4691088 +| epoch 4 | 35/ 2800 batches | train loss 0.3972698 +| epoch 4 | 39/ 2800 batches | train loss 0.4006738 +| epoch 4 | 43/ 2800 batches | train loss 0.4484283 +| epoch 4 | 47/ 2800 batches | train loss 0.4028733 +| epoch 4 | 51/ 2800 batches | train loss 0.5516562 +| epoch 4 | 55/ 2800 batches | train loss 0.4158955 +| epoch 4 | 59/ 2800 batches | train loss 0.3673841 +| epoch 4 | 63/ 2800 batches | train loss 0.4403861 +| epoch 4 | 67/ 2800 batches | train loss 0.3543867 +| epoch 4 | 71/ 2800 batches | train loss 0.4884466 +| epoch 4 | 75/ 2800 batches | train loss 0.4551626 +| epoch 4 | 79/ 2800 batches | train loss 0.4836235 +| epoch 4 | 83/ 2800 batches | train loss 0.4989775 +| epoch 4 | 87/ 2800 batches | train loss 0.4186031 +| epoch 4 | 91/ 2800 batches | train loss 0.4309883 +| epoch 4 | 95/ 2800 batches | train loss 0.3990323 +| epoch 4 | 99/ 2800 batches | train loss 0.4508370 +| epoch 4 | 103/ 2800 batches | train loss 0.4347593 +| epoch 4 | 107/ 2800 batches | train loss 0.3999450 +| epoch 4 | 111/ 2800 batches | train loss 0.4145579 +| epoch 4 | 115/ 2800 batches | train loss 0.3432305 +| epoch 4 | 119/ 2800 batches | train loss 0.4512502 +| epoch 4 | 123/ 2800 batches | train loss 0.4444728 +| epoch 4 | 127/ 2800 batches | train loss 0.3783288 +| epoch 4 | 131/ 2800 batches | train loss 0.3610344 +| epoch 4 | 135/ 2800 batches | train loss 0.4163404 +| epoch 4 | 139/ 2800 batches | train loss 0.3706053 +| epoch 4 | 143/ 2800 batches | train loss 0.5263619 +| epoch 4 | 147/ 2800 batches | train loss 0.3353453 +| epoch 4 | 151/ 2800 batches | train loss 0.3660761 +| epoch 4 | 155/ 2800 batches | train loss 0.4000579 +| epoch 4 | 159/ 2800 batches | train loss 0.4652469 +| epoch 4 | 163/ 2800 batches | train loss 0.5009544 +| epoch 4 | 167/ 2800 batches | train loss 0.4160841 +| epoch 4 | 171/ 2800 batches | train loss 0.4580387 +| epoch 4 | 175/ 2800 batches | train loss 0.4098404 +| epoch 4 | 179/ 2800 batches | train loss 0.4845084 +| epoch 4 | 183/ 2800 batches | train loss 0.4556896 +| epoch 4 | 187/ 2800 batches | train loss 0.4572098 +| epoch 4 | 191/ 2800 batches | train loss 0.4152211 +| epoch 4 | 195/ 2800 batches | train loss 0.3929092 +| epoch 4 | 199/ 2800 batches | train loss 0.3463849 +| epoch 4 | 203/ 2800 batches | train loss 0.4102643 +| epoch 4 | 207/ 2800 batches | train loss 0.4972053 +| epoch 4 | 211/ 2800 batches | train loss 0.4005270 +| epoch 4 | 215/ 2800 batches | train loss 0.4494336 +| epoch 4 | 219/ 2800 batches | train loss 0.3778976 +| epoch 4 | 223/ 2800 batches | train loss 0.4620713 +| epoch 4 | 227/ 2800 batches | train loss 0.3873421 +| epoch 4 | 231/ 2800 batches | train loss 0.4662261 +| epoch 4 | 235/ 2800 batches | train loss 0.4468675 +| epoch 4 | 239/ 2800 batches | train loss 0.3949063 +| epoch 4 | 243/ 2800 batches | train loss 0.3549862 +| epoch 4 | 247/ 2800 batches | train loss 0.4567482 +| epoch 4 | 251/ 2800 batches | train loss 0.4196270 +| epoch 4 | 255/ 2800 batches | train loss 0.4534202 +| epoch 4 | 259/ 2800 batches | train loss 0.5535647 +| epoch 4 | 263/ 2800 batches | train loss 0.3858877 +| epoch 4 | 267/ 2800 batches | train loss 0.4231195 +| epoch 4 | 271/ 2800 batches | train loss 0.4390133 +| epoch 4 | 275/ 2800 batches | train loss 0.3337414 +| epoch 4 | 279/ 2800 batches | train loss 0.4335029 +| epoch 4 | 283/ 2800 batches | train loss 0.3706448 +| epoch 4 | 287/ 2800 batches | train loss 0.3706741 +| epoch 4 | 291/ 2800 batches | train loss 0.4343744 +| epoch 4 | 295/ 2800 batches | train loss 0.2724938 +| epoch 4 | 299/ 2800 batches | train loss 0.4499624 +| epoch 4 | 303/ 2800 batches | train loss 0.4156540 +| epoch 4 | 307/ 2800 batches | train loss 0.3903113 +| epoch 4 | 311/ 2800 batches | train loss 0.4998903 +| epoch 4 | 315/ 2800 batches | train loss 0.4819362 +| epoch 4 | 319/ 2800 batches | train loss 0.3361868 +| epoch 4 | 323/ 2800 batches | train loss 0.4167761 +| epoch 4 | 327/ 2800 batches | train loss 0.4483750 +| epoch 4 | 331/ 2800 batches | train loss 0.5102581 +| epoch 4 | 335/ 2800 batches | train loss 0.4347227 +| epoch 4 | 339/ 2800 batches | train loss 0.3334676 +| epoch 4 | 343/ 2800 batches | train loss 0.4201748 +| epoch 4 | 347/ 2800 batches | train loss 0.4859869 +| epoch 4 | 351/ 2800 batches | train loss 0.2737948 +| epoch 4 | 355/ 2800 batches | train loss 0.3917616 +| epoch 4 | 359/ 2800 batches | train loss 0.5112423 +| epoch 4 | 363/ 2800 batches | train loss 0.4200287 +| epoch 4 | 367/ 2800 batches | train loss 0.3944864 +| epoch 4 | 371/ 2800 batches | train loss 0.4468338 +| epoch 4 | 375/ 2800 batches | train loss 0.3371941 +| epoch 4 | 379/ 2800 batches | train loss 0.4223474 +| epoch 4 | 383/ 2800 batches | train loss 0.3803867 +| epoch 4 | 387/ 2800 batches | train loss 0.4320677 +| epoch 4 | 391/ 2800 batches | train loss 0.4170201 +| epoch 4 | 395/ 2800 batches | train loss 0.4782382 +| epoch 4 | 399/ 2800 batches | train loss 0.4593951 +| epoch 4 | 403/ 2800 batches | train loss 0.3788517 +| epoch 4 | 407/ 2800 batches | train loss 0.4250963 +| epoch 4 | 411/ 2800 batches | train loss 0.4029689 +| epoch 4 | 415/ 2800 batches | train loss 0.3901547 +| epoch 4 | 419/ 2800 batches | train loss 0.4537830 +| epoch 4 | 423/ 2800 batches | train loss 0.3595997 +| epoch 4 | 427/ 2800 batches | train loss 0.3895594 +| epoch 4 | 431/ 2800 batches | train loss 0.4394678 +| epoch 4 | 435/ 2800 batches | train loss 0.4191786 +| epoch 4 | 439/ 2800 batches | train loss 0.4729309 +| epoch 4 | 443/ 2800 batches | train loss 0.4399012 +| epoch 4 | 447/ 2800 batches | train loss 0.5000207 +| epoch 4 | 451/ 2800 batches | train loss 0.3291306 +| epoch 4 | 455/ 2800 batches | train loss 0.4407893 +| epoch 4 | 459/ 2800 batches | train loss 0.3924664 +| epoch 4 | 463/ 2800 batches | train loss 0.4509891 +| epoch 4 | 467/ 2800 batches | train loss 0.4616760 +| epoch 4 | 471/ 2800 batches | train loss 0.4766268 +| epoch 4 | 475/ 2800 batches | train loss 0.3299351 +| epoch 4 | 479/ 2800 batches | train loss 0.4265634 +| epoch 4 | 483/ 2800 batches | train loss 0.5339648 +| epoch 4 | 487/ 2800 batches | train loss 0.4741933 +| epoch 4 | 491/ 2800 batches | train loss 0.3638532 +| epoch 4 | 495/ 2800 batches | train loss 0.3563235 +| epoch 4 | 499/ 2800 batches | train loss 0.4143401 +| epoch 4 | 503/ 2800 batches | train loss 0.4190674 +| epoch 4 | 507/ 2800 batches | train loss 0.3818240 +| epoch 4 | 511/ 2800 batches | train loss 0.3797594 +| epoch 4 | 515/ 2800 batches | train loss 0.4401146 +| epoch 4 | 519/ 2800 batches | train loss 0.4510165 +| epoch 4 | 523/ 2800 batches | train loss 0.3990402 +| epoch 4 | 527/ 2800 batches | train loss 0.5143753 +| epoch 4 | 531/ 2800 batches | train loss 0.4277020 +| epoch 4 | 535/ 2800 batches | train loss 0.4953715 +| epoch 4 | 539/ 2800 batches | train loss 0.4391167 +| epoch 4 | 543/ 2800 batches | train loss 0.4060019 +| epoch 4 | 547/ 2800 batches | train loss 0.4299866 +| epoch 4 | 551/ 2800 batches | train loss 0.4408130 +| epoch 4 | 555/ 2800 batches | train loss 0.4126831 +| epoch 4 | 559/ 2800 batches | train loss 0.5191752 +| epoch 4 | 563/ 2800 batches | train loss 0.3481320 +| epoch 4 | 567/ 2800 batches | train loss 0.4708153 +| epoch 4 | 571/ 2800 batches | train loss 0.4260373 +| epoch 4 | 575/ 2800 batches | train loss 0.4081831 +| epoch 4 | 579/ 2800 batches | train loss 0.4859315 +| epoch 4 | 583/ 2800 batches | train loss 0.5344330 +| epoch 4 | 587/ 2800 batches | train loss 0.4044951 +| epoch 4 | 591/ 2800 batches | train loss 0.4195178 +| epoch 4 | 595/ 2800 batches | train loss 0.4356373 +| epoch 4 | 599/ 2800 batches | train loss 0.4983318 +| epoch 4 | 603/ 2800 batches | train loss 0.3508736 +| epoch 4 | 607/ 2800 batches | train loss 0.4221396 +| epoch 4 | 611/ 2800 batches | train loss 0.4914631 +| epoch 4 | 615/ 2800 batches | train loss 0.4588630 +| epoch 4 | 619/ 2800 batches | train loss 0.4371254 +| epoch 4 | 623/ 2800 batches | train loss 0.5230628 +| epoch 4 | 627/ 2800 batches | train loss 0.4580069 +| epoch 4 | 631/ 2800 batches | train loss 0.5442522 +| epoch 4 | 635/ 2800 batches | train loss 0.4300294 +| epoch 4 | 639/ 2800 batches | train loss 0.4810307 +| epoch 4 | 643/ 2800 batches | train loss 0.4166374 +| epoch 4 | 647/ 2800 batches | train loss 0.4276723 +| epoch 4 | 651/ 2800 batches | train loss 0.3709273 +| epoch 4 | 655/ 2800 batches | train loss 0.4902788 +| epoch 4 | 659/ 2800 batches | train loss 0.5273699 +| epoch 4 | 663/ 2800 batches | train loss 0.4220800 +| epoch 4 | 667/ 2800 batches | train loss 0.4380122 +| epoch 4 | 671/ 2800 batches | train loss 0.4508222 +| epoch 4 | 675/ 2800 batches | train loss 0.3958138 +| epoch 4 | 679/ 2800 batches | train loss 0.3896924 +| epoch 4 | 683/ 2800 batches | train loss 0.4779308 +| epoch 4 | 687/ 2800 batches | train loss 0.4245033 +| epoch 4 | 691/ 2800 batches | train loss 0.3922537 +| epoch 4 | 695/ 2800 batches | train loss 0.4632821 +| epoch 4 | 699/ 2800 batches | train loss 0.3769447 +| epoch 4 | 703/ 2800 batches | train loss 0.4926245 +| epoch 4 | 707/ 2800 batches | train loss 0.3824167 +| epoch 4 | 711/ 2800 batches | train loss 0.3755336 +| epoch 4 | 715/ 2800 batches | train loss 0.4658063 +| epoch 4 | 719/ 2800 batches | train loss 0.4386201 +| epoch 4 | 723/ 2800 batches | train loss 0.3928597 +| epoch 4 | 727/ 2800 batches | train loss 0.3660596 +| epoch 4 | 731/ 2800 batches | train loss 0.4378826 +| epoch 4 | 735/ 2800 batches | train loss 0.5404111 +| epoch 4 | 739/ 2800 batches | train loss 0.3798577 +| epoch 4 | 743/ 2800 batches | train loss 0.3994635 +| epoch 4 | 747/ 2800 batches | train loss 0.4318661 +| epoch 4 | 751/ 2800 batches | train loss 0.3860425 +| epoch 4 | 755/ 2800 batches | train loss 0.4278659 +| epoch 4 | 759/ 2800 batches | train loss 0.4714098 +| epoch 4 | 763/ 2800 batches | train loss 0.3898705 +| epoch 4 | 767/ 2800 batches | train loss 0.3595657 +| epoch 4 | 771/ 2800 batches | train loss 0.4806418 +| epoch 4 | 775/ 2800 batches | train loss 0.4058961 +| epoch 4 | 779/ 2800 batches | train loss 0.4353687 +| epoch 4 | 783/ 2800 batches | train loss 0.5115061 +| epoch 4 | 787/ 2800 batches | train loss 0.4523278 +| epoch 4 | 791/ 2800 batches | train loss 0.4857516 +| epoch 4 | 795/ 2800 batches | train loss 0.4650756 +| epoch 4 | 799/ 2800 batches | train loss 0.4297880 +| epoch 4 | 803/ 2800 batches | train loss 0.3901075 +| epoch 4 | 807/ 2800 batches | train loss 0.4307800 +| epoch 4 | 811/ 2800 batches | train loss 0.4296959 +| epoch 4 | 815/ 2800 batches | train loss 0.4907073 +| epoch 4 | 819/ 2800 batches | train loss 0.3607629 +| epoch 4 | 823/ 2800 batches | train loss 0.4532564 +| epoch 4 | 827/ 2800 batches | train loss 0.3236586 +| epoch 4 | 831/ 2800 batches | train loss 0.2658108 +| epoch 4 | 835/ 2800 batches | train loss 0.4932857 +| epoch 4 | 839/ 2800 batches | train loss 0.4345552 +| epoch 4 | 843/ 2800 batches | train loss 0.3505558 +| epoch 4 | 847/ 2800 batches | train loss 0.3827487 +| epoch 4 | 851/ 2800 batches | train loss 0.4701724 +| epoch 4 | 855/ 2800 batches | train loss 0.4411092 +| epoch 4 | 859/ 2800 batches | train loss 0.3867397 +| epoch 4 | 863/ 2800 batches | train loss 0.5039889 +| epoch 4 | 867/ 2800 batches | train loss 0.3928934 +| epoch 4 | 871/ 2800 batches | train loss 0.4176269 +| epoch 4 | 875/ 2800 batches | train loss 0.2824875 +| epoch 4 | 879/ 2800 batches | train loss 0.3614700 +| epoch 4 | 883/ 2800 batches | train loss 0.4442566 +| epoch 4 | 887/ 2800 batches | train loss 0.4856340 +| epoch 4 | 891/ 2800 batches | train loss 0.3279968 +| epoch 4 | 895/ 2800 batches | train loss 0.3998623 +| epoch 4 | 899/ 2800 batches | train loss 0.3987588 +| epoch 4 | 903/ 2800 batches | train loss 0.3401665 +| epoch 4 | 907/ 2800 batches | train loss 0.3731319 +| epoch 4 | 911/ 2800 batches | train loss 0.4646314 +| epoch 4 | 915/ 2800 batches | train loss 0.4351989 +| epoch 4 | 919/ 2800 batches | train loss 0.4132232 +| epoch 4 | 923/ 2800 batches | train loss 0.3509880 +| epoch 4 | 927/ 2800 batches | train loss 0.3450477 +| epoch 4 | 931/ 2800 batches | train loss 0.4689947 +| epoch 4 | 935/ 2800 batches | train loss 0.4767335 +| epoch 4 | 939/ 2800 batches | train loss 0.4361434 +| epoch 4 | 943/ 2800 batches | train loss 0.3845633 +| epoch 4 | 947/ 2800 batches | train loss 0.4890644 +| epoch 4 | 951/ 2800 batches | train loss 0.4672423 +| epoch 4 | 955/ 2800 batches | train loss 0.4526602 +| epoch 4 | 959/ 2800 batches | train loss 0.4605063 +| epoch 4 | 963/ 2800 batches | train loss 0.4912190 +| epoch 4 | 967/ 2800 batches | train loss 0.4508826 +| epoch 4 | 971/ 2800 batches | train loss 0.4288567 +| epoch 4 | 975/ 2800 batches | train loss 0.3834156 +| epoch 4 | 979/ 2800 batches | train loss 0.4953575 +| epoch 4 | 983/ 2800 batches | train loss 0.4293537 +| epoch 4 | 987/ 2800 batches | train loss 0.4416866 +| epoch 4 | 991/ 2800 batches | train loss 0.4082681 +| epoch 4 | 995/ 2800 batches | train loss 0.4710357 +| epoch 4 | 999/ 2800 batches | train loss 0.5143676 +| epoch 4 | 1003/ 2800 batches | train loss 0.5032701 +| epoch 4 | 1007/ 2800 batches | train loss 0.3759529 +| epoch 4 | 1011/ 2800 batches | train loss 0.4153539 +| epoch 4 | 1015/ 2800 batches | train loss 0.3797227 +| epoch 4 | 1019/ 2800 batches | train loss 0.4499643 +| epoch 4 | 1023/ 2800 batches | train loss 0.4405591 +| epoch 4 | 1027/ 2800 batches | train loss 0.3561358 +| epoch 4 | 1031/ 2800 batches | train loss 0.4090281 +| epoch 4 | 1035/ 2800 batches | train loss 0.4883691 +| epoch 4 | 1039/ 2800 batches | train loss 0.3821460 +| epoch 4 | 1043/ 2800 batches | train loss 0.4530144 +| epoch 4 | 1047/ 2800 batches | train loss 0.4126643 +| epoch 4 | 1051/ 2800 batches | train loss 0.3493542 +| epoch 4 | 1055/ 2800 batches | train loss 0.4868817 +| epoch 4 | 1059/ 2800 batches | train loss 0.2735451 +| epoch 4 | 1063/ 2800 batches | train loss 0.4471140 +| epoch 4 | 1067/ 2800 batches | train loss 0.4054983 +| epoch 4 | 1071/ 2800 batches | train loss 0.4372856 +| epoch 4 | 1075/ 2800 batches | train loss 0.4357463 +| epoch 4 | 1079/ 2800 batches | train loss 0.4450167 +| epoch 4 | 1083/ 2800 batches | train loss 0.3335402 +| epoch 4 | 1087/ 2800 batches | train loss 0.4287249 +| epoch 4 | 1091/ 2800 batches | train loss 0.3883672 +| epoch 4 | 1095/ 2800 batches | train loss 0.3760507 +| epoch 4 | 1099/ 2800 batches | train loss 0.3638108 +| epoch 4 | 1103/ 2800 batches | train loss 0.3786476 +| epoch 4 | 1107/ 2800 batches | train loss 0.3965746 +| epoch 4 | 1111/ 2800 batches | train loss 0.4073740 +| epoch 4 | 1115/ 2800 batches | train loss 0.5082476 +| epoch 4 | 1119/ 2800 batches | train loss 0.4356938 +| epoch 4 | 1123/ 2800 batches | train loss 0.4117298 +| epoch 4 | 1127/ 2800 batches | train loss 0.4867409 +| epoch 4 | 1131/ 2800 batches | train loss 0.3900387 +| epoch 4 | 1135/ 2800 batches | train loss 0.3579667 +| epoch 4 | 1139/ 2800 batches | train loss 0.5118164 +| epoch 4 | 1143/ 2800 batches | train loss 0.4274577 +| epoch 4 | 1147/ 2800 batches | train loss 0.4921213 +| epoch 4 | 1151/ 2800 batches | train loss 0.4008069 +| epoch 4 | 1155/ 2800 batches | train loss 0.4385698 +| epoch 4 | 1159/ 2800 batches | train loss 0.4351121 +| epoch 4 | 1163/ 2800 batches | train loss 0.4350432 +| epoch 4 | 1167/ 2800 batches | train loss 0.4911017 +| epoch 4 | 1171/ 2800 batches | train loss 0.3878296 +| epoch 4 | 1175/ 2800 batches | train loss 0.3987718 +| epoch 4 | 1179/ 2800 batches | train loss 0.3996913 +| epoch 4 | 1183/ 2800 batches | train loss 0.4173974 +| epoch 4 | 1187/ 2800 batches | train loss 0.4254635 +| epoch 4 | 1191/ 2800 batches | train loss 0.5392342 +| epoch 4 | 1195/ 2800 batches | train loss 0.4585904 +| epoch 4 | 1199/ 2800 batches | train loss 0.4742798 +| epoch 4 | 1203/ 2800 batches | train loss 0.4171285 +| epoch 4 | 1207/ 2800 batches | train loss 0.4591135 +| epoch 4 | 1211/ 2800 batches | train loss 0.4102921 +| epoch 4 | 1215/ 2800 batches | train loss 0.4500834 +| epoch 4 | 1219/ 2800 batches | train loss 0.4569396 +| epoch 4 | 1223/ 2800 batches | train loss 0.3734360 +| epoch 4 | 1227/ 2800 batches | train loss 0.4403961 +| epoch 4 | 1231/ 2800 batches | train loss 0.5060055 +| epoch 4 | 1235/ 2800 batches | train loss 0.4441109 +| epoch 4 | 1239/ 2800 batches | train loss 0.4176336 +| epoch 4 | 1243/ 2800 batches | train loss 0.5683839 +| epoch 4 | 1247/ 2800 batches | train loss 0.5477296 +| epoch 4 | 1251/ 2800 batches | train loss 0.3522807 +| epoch 4 | 1255/ 2800 batches | train loss 0.4773045 +| epoch 4 | 1259/ 2800 batches | train loss 0.4372045 +| epoch 4 | 1263/ 2800 batches | train loss 0.4274986 +| epoch 4 | 1267/ 2800 batches | train loss 0.4319971 +| epoch 4 | 1271/ 2800 batches | train loss 0.5011964 +| epoch 4 | 1275/ 2800 batches | train loss 0.3967544 +| epoch 4 | 1279/ 2800 batches | train loss 0.3573767 +| epoch 4 | 1283/ 2800 batches | train loss 0.4105774 +| epoch 4 | 1287/ 2800 batches | train loss 0.3496591 +| epoch 4 | 1291/ 2800 batches | train loss 0.3467269 +| epoch 4 | 1295/ 2800 batches | train loss 0.4171301 +| epoch 4 | 1299/ 2800 batches | train loss 0.5520028 +| epoch 4 | 1303/ 2800 batches | train loss 0.4709271 +| epoch 4 | 1307/ 2800 batches | train loss 0.5380092 +| epoch 4 | 1311/ 2800 batches | train loss 0.3790525 +| epoch 4 | 1315/ 2800 batches | train loss 0.3947511 +| epoch 4 | 1319/ 2800 batches | train loss 0.3802879 +| epoch 4 | 1323/ 2800 batches | train loss 0.4248491 +| epoch 4 | 1327/ 2800 batches | train loss 0.4332802 +| epoch 4 | 1331/ 2800 batches | train loss 0.3538620 +| epoch 4 | 1335/ 2800 batches | train loss 0.3640678 +| epoch 4 | 1339/ 2800 batches | train loss 0.3945463 +| epoch 4 | 1343/ 2800 batches | train loss 0.4039631 +| epoch 4 | 1347/ 2800 batches | train loss 0.4469104 +| epoch 4 | 1351/ 2800 batches | train loss 0.4114411 +| epoch 4 | 1355/ 2800 batches | train loss 0.3903440 +| epoch 4 | 1359/ 2800 batches | train loss 0.4757760 +| epoch 4 | 1363/ 2800 batches | train loss 0.4520878 +| epoch 4 | 1367/ 2800 batches | train loss 0.4874703 +| epoch 4 | 1371/ 2800 batches | train loss 0.5704100 +| epoch 4 | 1375/ 2800 batches | train loss 0.3937174 +| epoch 4 | 1379/ 2800 batches | train loss 0.5108832 +| epoch 4 | 1383/ 2800 batches | train loss 0.4776112 +| epoch 4 | 1387/ 2800 batches | train loss 0.4456153 +| epoch 4 | 1391/ 2800 batches | train loss 0.4308698 +| epoch 4 | 1395/ 2800 batches | train loss 0.4078263 +| epoch 4 | 1399/ 2800 batches | train loss 0.4172375 +| epoch 4 | 1403/ 2800 batches | train loss 0.5338280 +| epoch 4 | 1407/ 2800 batches | train loss 0.5582901 +| epoch 4 | 1411/ 2800 batches | train loss 0.4415487 +| epoch 4 | 1415/ 2800 batches | train loss 0.4347566 +| epoch 4 | 1419/ 2800 batches | train loss 0.3294352 +| epoch 4 | 1423/ 2800 batches | train loss 0.4808874 +| epoch 4 | 1427/ 2800 batches | train loss 0.3974089 +| epoch 4 | 1431/ 2800 batches | train loss 0.3456378 +| epoch 4 | 1435/ 2800 batches | train loss 0.4249370 +| epoch 4 | 1439/ 2800 batches | train loss 0.3689719 +| epoch 4 | 1443/ 2800 batches | train loss 0.5091169 +| epoch 4 | 1447/ 2800 batches | train loss 0.4355061 +| epoch 4 | 1451/ 2800 batches | train loss 0.3821731 +| epoch 4 | 1455/ 2800 batches | train loss 0.5217779 +| epoch 4 | 1459/ 2800 batches | train loss 0.4227813 +| epoch 4 | 1463/ 2800 batches | train loss 0.6062583 +| epoch 4 | 1467/ 2800 batches | train loss 0.4094181 +| epoch 4 | 1471/ 2800 batches | train loss 0.4443445 +| epoch 4 | 1475/ 2800 batches | train loss 0.4096760 +| epoch 4 | 1479/ 2800 batches | train loss 0.5241559 +| epoch 4 | 1483/ 2800 batches | train loss 0.4640905 +| epoch 4 | 1487/ 2800 batches | train loss 0.4184015 +| epoch 4 | 1491/ 2800 batches | train loss 0.4032050 +| epoch 4 | 1495/ 2800 batches | train loss 0.4538146 +| epoch 4 | 1499/ 2800 batches | train loss 0.3570542 +| epoch 4 | 1503/ 2800 batches | train loss 0.4818923 +| epoch 4 | 1507/ 2800 batches | train loss 0.4294989 +| epoch 4 | 1511/ 2800 batches | train loss 0.3455500 +| epoch 4 | 1515/ 2800 batches | train loss 0.4144495 +| epoch 4 | 1519/ 2800 batches | train loss 0.4500695 +| epoch 4 | 1523/ 2800 batches | train loss 0.3682891 +| epoch 4 | 1527/ 2800 batches | train loss 0.4100387 +| epoch 4 | 1531/ 2800 batches | train loss 0.4345427 +| epoch 4 | 1535/ 2800 batches | train loss 0.4100213 +| epoch 4 | 1539/ 2800 batches | train loss 0.3816719 +| epoch 4 | 1543/ 2800 batches | train loss 0.5752071 +| epoch 4 | 1547/ 2800 batches | train loss 0.4449517 +| epoch 4 | 1551/ 2800 batches | train loss 0.5433226 +| epoch 4 | 1555/ 2800 batches | train loss 0.4717475 +| epoch 4 | 1559/ 2800 batches | train loss 0.4611452 +| epoch 4 | 1563/ 2800 batches | train loss 0.4103033 +| epoch 4 | 1567/ 2800 batches | train loss 0.3735802 +| epoch 4 | 1571/ 2800 batches | train loss 0.4146456 +| epoch 4 | 1575/ 2800 batches | train loss 0.4750096 +| epoch 4 | 1579/ 2800 batches | train loss 0.3338496 +| epoch 4 | 1583/ 2800 batches | train loss 0.3684617 +| epoch 4 | 1587/ 2800 batches | train loss 0.4323243 +| epoch 4 | 1591/ 2800 batches | train loss 0.4357673 +| epoch 4 | 1595/ 2800 batches | train loss 0.4560449 +| epoch 4 | 1599/ 2800 batches | train loss 0.4263699 +| epoch 4 | 1603/ 2800 batches | train loss 0.4344503 +| epoch 4 | 1607/ 2800 batches | train loss 0.5734880 +| epoch 4 | 1611/ 2800 batches | train loss 0.5035388 +| epoch 4 | 1615/ 2800 batches | train loss 0.4249167 +| epoch 4 | 1619/ 2800 batches | train loss 0.1848044 +| epoch 4 | 1623/ 2800 batches | train loss 0.3056748 +| epoch 4 | 1627/ 2800 batches | train loss 0.3825528 +| epoch 4 | 1631/ 2800 batches | train loss 0.3522843 +| epoch 4 | 1635/ 2800 batches | train loss 0.4136845 +| epoch 4 | 1639/ 2800 batches | train loss 0.4004478 +| epoch 4 | 1643/ 2800 batches | train loss 0.5231240 +| epoch 4 | 1647/ 2800 batches | train loss 0.4214219 +| epoch 4 | 1651/ 2800 batches | train loss 0.3572175 +| epoch 4 | 1655/ 2800 batches | train loss 0.5068051 +| epoch 4 | 1659/ 2800 batches | train loss 0.5032315 +| epoch 4 | 1663/ 2800 batches | train loss 0.4110891 +| epoch 4 | 1667/ 2800 batches | train loss 0.4185504 +| epoch 4 | 1671/ 2800 batches | train loss 0.4009119 +| epoch 4 | 1675/ 2800 batches | train loss 0.3825265 +| epoch 4 | 1679/ 2800 batches | train loss 0.4478067 +| epoch 4 | 1683/ 2800 batches | train loss 0.4430377 +| epoch 4 | 1687/ 2800 batches | train loss 0.4106717 +| epoch 4 | 1691/ 2800 batches | train loss 0.3571878 +| epoch 4 | 1695/ 2800 batches | train loss 0.4450181 +| epoch 4 | 1699/ 2800 batches | train loss 0.4338910 +| epoch 4 | 1703/ 2800 batches | train loss 0.4896673 +| epoch 4 | 1707/ 2800 batches | train loss 0.3784067 +| epoch 4 | 1711/ 2800 batches | train loss 0.4523618 +| epoch 4 | 1715/ 2800 batches | train loss 0.5216243 +| epoch 4 | 1719/ 2800 batches | train loss 0.4707917 +| epoch 4 | 1723/ 2800 batches | train loss 0.3288680 +| epoch 4 | 1727/ 2800 batches | train loss 0.4912602 +| epoch 4 | 1731/ 2800 batches | train loss 0.3415779 +| epoch 4 | 1735/ 2800 batches | train loss 0.3939235 +| epoch 4 | 1739/ 2800 batches | train loss 0.4997156 +| epoch 4 | 1743/ 2800 batches | train loss 0.4070843 +| epoch 4 | 1747/ 2800 batches | train loss 0.4302805 +| epoch 4 | 1751/ 2800 batches | train loss 0.4170448 +| epoch 4 | 1755/ 2800 batches | train loss 0.3909729 +| epoch 4 | 1759/ 2800 batches | train loss 0.3538946 +| epoch 4 | 1763/ 2800 batches | train loss 0.3667088 +| epoch 4 | 1767/ 2800 batches | train loss 0.4056922 +| epoch 4 | 1771/ 2800 batches | train loss 0.4385011 +| epoch 4 | 1775/ 2800 batches | train loss 0.4553897 +| epoch 4 | 1779/ 2800 batches | train loss 0.4155046 +| epoch 4 | 1783/ 2800 batches | train loss 0.4105145 +| epoch 4 | 1787/ 2800 batches | train loss 0.4325611 +| epoch 4 | 1791/ 2800 batches | train loss 0.3675399 +| epoch 4 | 1795/ 2800 batches | train loss 0.4616413 +| epoch 4 | 1799/ 2800 batches | train loss 0.3937179 +| epoch 4 | 1803/ 2800 batches | train loss 0.4341493 +| epoch 4 | 1807/ 2800 batches | train loss 0.3585544 +| epoch 4 | 1811/ 2800 batches | train loss 0.4071352 +| epoch 4 | 1815/ 2800 batches | train loss 0.4535607 +| epoch 4 | 1819/ 2800 batches | train loss 0.3860053 +| epoch 4 | 1823/ 2800 batches | train loss 0.5030226 +| epoch 4 | 1827/ 2800 batches | train loss 0.4368085 +| epoch 4 | 1831/ 2800 batches | train loss 0.4147686 +| epoch 4 | 1835/ 2800 batches | train loss 0.3938298 +| epoch 4 | 1839/ 2800 batches | train loss 0.4100221 +| epoch 4 | 1843/ 2800 batches | train loss 0.4630504 +| epoch 4 | 1847/ 2800 batches | train loss 0.4050877 +| epoch 4 | 1851/ 2800 batches | train loss 0.4732453 +| epoch 4 | 1855/ 2800 batches | train loss 0.4592618 +| epoch 4 | 1859/ 2800 batches | train loss 0.5299255 +| epoch 4 | 1863/ 2800 batches | train loss 0.5081754 +| epoch 4 | 1867/ 2800 batches | train loss 0.4129832 +| epoch 4 | 1871/ 2800 batches | train loss 0.3542678 +| epoch 4 | 1875/ 2800 batches | train loss 0.4293976 +| epoch 4 | 1879/ 2800 batches | train loss 0.4090213 +| epoch 4 | 1883/ 2800 batches | train loss 0.3834620 +| epoch 4 | 1887/ 2800 batches | train loss 0.4228610 +| epoch 4 | 1891/ 2800 batches | train loss 0.4150091 +| epoch 4 | 1895/ 2800 batches | train loss 0.5549601 +| epoch 4 | 1899/ 2800 batches | train loss 0.2224847 +| epoch 4 | 1903/ 2800 batches | train loss 0.3287275 +| epoch 4 | 1907/ 2800 batches | train loss 0.3347608 +| epoch 4 | 1911/ 2800 batches | train loss 0.4107589 +| epoch 4 | 1915/ 2800 batches | train loss 0.5192481 +| epoch 4 | 1919/ 2800 batches | train loss 0.4377674 +| epoch 4 | 1923/ 2800 batches | train loss 0.4356008 +| epoch 4 | 1927/ 2800 batches | train loss 0.4602901 +| epoch 4 | 1931/ 2800 batches | train loss 0.4664937 +| epoch 4 | 1935/ 2800 batches | train loss 0.4606049 +| epoch 4 | 1939/ 2800 batches | train loss 0.4426089 +| epoch 4 | 1943/ 2800 batches | train loss 0.4393557 +| epoch 4 | 1947/ 2800 batches | train loss 0.3771903 +| epoch 4 | 1951/ 2800 batches | train loss 0.4212575 +| epoch 4 | 1955/ 2800 batches | train loss 0.4048901 +| epoch 4 | 1959/ 2800 batches | train loss 0.4048957 +| epoch 4 | 1963/ 2800 batches | train loss 0.4413042 +| epoch 4 | 1967/ 2800 batches | train loss 0.4438805 +| epoch 4 | 1971/ 2800 batches | train loss 0.4647383 +| epoch 4 | 1975/ 2800 batches | train loss 0.4662334 +| epoch 4 | 1979/ 2800 batches | train loss 0.3356475 +| epoch 4 | 1983/ 2800 batches | train loss 0.4947466 +| epoch 4 | 1987/ 2800 batches | train loss 0.4295152 +| epoch 4 | 1991/ 2800 batches | train loss 0.4899555 +| epoch 4 | 1995/ 2800 batches | train loss 0.4247700 +| epoch 4 | 1999/ 2800 batches | train loss 0.4814957 +| epoch 4 | 2003/ 2800 batches | train loss 0.4057340 +| epoch 4 | 2007/ 2800 batches | train loss 0.4322219 +| epoch 4 | 2011/ 2800 batches | train loss 0.3887328 +| epoch 4 | 2015/ 2800 batches | train loss 0.4822366 +| epoch 4 | 2019/ 2800 batches | train loss 0.4391982 +| epoch 4 | 2023/ 2800 batches | train loss 0.4701206 +| epoch 4 | 2027/ 2800 batches | train loss 0.4015890 +| epoch 4 | 2031/ 2800 batches | train loss 0.3663772 +| epoch 4 | 2035/ 2800 batches | train loss 0.3838284 +| epoch 4 | 2039/ 2800 batches | train loss 0.5060769 +| epoch 4 | 2043/ 2800 batches | train loss 0.4403812 +| epoch 4 | 2047/ 2800 batches | train loss 0.4377115 +| epoch 4 | 2051/ 2800 batches | train loss 0.4027440 +| epoch 4 | 2055/ 2800 batches | train loss 0.3451445 +| epoch 4 | 2059/ 2800 batches | train loss 0.3943876 +| epoch 4 | 2063/ 2800 batches | train loss 0.4991161 +| epoch 4 | 2067/ 2800 batches | train loss 0.4160466 +| epoch 4 | 2071/ 2800 batches | train loss 0.4538833 +| epoch 4 | 2075/ 2800 batches | train loss 0.4130841 +| epoch 4 | 2079/ 2800 batches | train loss 0.3939214 +| epoch 4 | 2083/ 2800 batches | train loss 0.5080702 +| epoch 4 | 2087/ 2800 batches | train loss 0.3364095 +| epoch 4 | 2091/ 2800 batches | train loss 0.4524014 +| epoch 4 | 2095/ 2800 batches | train loss 0.4805465 +| epoch 4 | 2099/ 2800 batches | train loss 0.4434573 +| epoch 4 | 2103/ 2800 batches | train loss 0.6203411 +| epoch 4 | 2107/ 2800 batches | train loss 0.5119929 +| epoch 4 | 2111/ 2800 batches | train loss 0.3789548 +| epoch 4 | 2115/ 2800 batches | train loss 0.3792223 +| epoch 4 | 2119/ 2800 batches | train loss 0.4784471 +| epoch 4 | 2123/ 2800 batches | train loss 0.4108900 +| epoch 4 | 2127/ 2800 batches | train loss 0.4572393 +| epoch 4 | 2131/ 2800 batches | train loss 0.4181283 +| epoch 4 | 2135/ 2800 batches | train loss 0.4466442 +| epoch 4 | 2139/ 2800 batches | train loss 0.3685301 +| epoch 4 | 2143/ 2800 batches | train loss 0.4220639 +| epoch 4 | 2147/ 2800 batches | train loss 0.4860970 +| epoch 4 | 2151/ 2800 batches | train loss 0.4221105 +| epoch 4 | 2155/ 2800 batches | train loss 0.4009200 +| epoch 4 | 2159/ 2800 batches | train loss 0.4368253 +| epoch 4 | 2163/ 2800 batches | train loss 0.4668211 +| epoch 4 | 2167/ 2800 batches | train loss 0.4700757 +| epoch 4 | 2171/ 2800 batches | train loss 0.4778611 +| epoch 4 | 2175/ 2800 batches | train loss 0.4775338 +| epoch 4 | 2179/ 2800 batches | train loss 0.3343064 +| epoch 4 | 2183/ 2800 batches | train loss 0.4858443 +| epoch 4 | 2187/ 2800 batches | train loss 0.4217458 +| epoch 4 | 2191/ 2800 batches | train loss 0.3689537 +| epoch 4 | 2195/ 2800 batches | train loss 0.3345143 +| epoch 4 | 2199/ 2800 batches | train loss 0.3730813 +| epoch 4 | 2203/ 2800 batches | train loss 0.4083702 +| epoch 4 | 2207/ 2800 batches | train loss 0.4426413 +| epoch 4 | 2211/ 2800 batches | train loss 0.4001494 +| epoch 4 | 2215/ 2800 batches | train loss 0.4592700 +| epoch 4 | 2219/ 2800 batches | train loss 0.4682502 +| epoch 4 | 2223/ 2800 batches | train loss 0.4557722 +| epoch 4 | 2227/ 2800 batches | train loss 0.4712475 +| epoch 4 | 2231/ 2800 batches | train loss 0.5710424 +| epoch 4 | 2235/ 2800 batches | train loss 0.4628642 +| epoch 4 | 2239/ 2800 batches | train loss 0.3346013 +| epoch 4 | 2243/ 2800 batches | train loss 0.3871705 +| epoch 4 | 2247/ 2800 batches | train loss 0.3597330 +| epoch 4 | 2251/ 2800 batches | train loss 0.4287077 +| epoch 4 | 2255/ 2800 batches | train loss 0.3833598 +| epoch 4 | 2259/ 2800 batches | train loss 0.4330959 +| epoch 4 | 2263/ 2800 batches | train loss 0.4499797 +| epoch 4 | 2267/ 2800 batches | train loss 0.5325627 +| epoch 4 | 2271/ 2800 batches | train loss 0.4652851 +| epoch 4 | 2275/ 2800 batches | train loss 0.3789589 +| epoch 4 | 2279/ 2800 batches | train loss 0.4802116 +| epoch 4 | 2283/ 2800 batches | train loss 0.4708987 +| epoch 4 | 2287/ 2800 batches | train loss 0.3454916 +| epoch 4 | 2291/ 2800 batches | train loss 0.3964013 +| epoch 4 | 2295/ 2800 batches | train loss 0.5472106 +| epoch 4 | 2299/ 2800 batches | train loss 0.4360846 +| epoch 4 | 2303/ 2800 batches | train loss 0.4314629 +| epoch 4 | 2307/ 2800 batches | train loss 0.4751589 +| epoch 4 | 2311/ 2800 batches | train loss 0.4190728 +| epoch 4 | 2315/ 2800 batches | train loss 0.4517300 +| epoch 4 | 2319/ 2800 batches | train loss 0.3917307 +| epoch 4 | 2323/ 2800 batches | train loss 0.3942689 +| epoch 4 | 2327/ 2800 batches | train loss 0.4677246 +| epoch 4 | 2331/ 2800 batches | train loss 0.4449592 +| epoch 4 | 2335/ 2800 batches | train loss 0.4024921 +| epoch 4 | 2339/ 2800 batches | train loss 0.4292452 +| epoch 4 | 2343/ 2800 batches | train loss 0.4227991 +| epoch 4 | 2347/ 2800 batches | train loss 0.4611026 +| epoch 4 | 2351/ 2800 batches | train loss 0.3888589 +| epoch 4 | 2355/ 2800 batches | train loss 0.4274600 +| epoch 4 | 2359/ 2800 batches | train loss 0.4185463 +| epoch 4 | 2363/ 2800 batches | train loss 0.4935468 +| epoch 4 | 2367/ 2800 batches | train loss 0.4430729 +| epoch 4 | 2371/ 2800 batches | train loss 0.4328853 +| epoch 4 | 2375/ 2800 batches | train loss 0.4302966 +| epoch 4 | 2379/ 2800 batches | train loss 0.3692284 +| epoch 4 | 2383/ 2800 batches | train loss 0.4205548 +| epoch 4 | 2387/ 2800 batches | train loss 0.3505651 +| epoch 4 | 2391/ 2800 batches | train loss 0.4991598 +| epoch 4 | 2395/ 2800 batches | train loss 0.3806151 +| epoch 4 | 2399/ 2800 batches | train loss 0.4082516 +| epoch 4 | 2403/ 2800 batches | train loss 0.4222749 +| epoch 4 | 2407/ 2800 batches | train loss 0.4267902 +| epoch 4 | 2411/ 2800 batches | train loss 0.4793240 +| epoch 4 | 2415/ 2800 batches | train loss 0.4640130 +| epoch 4 | 2419/ 2800 batches | train loss 0.4890309 +| epoch 4 | 2423/ 2800 batches | train loss 0.4481943 +| epoch 4 | 2427/ 2800 batches | train loss 0.4088435 +| epoch 4 | 2431/ 2800 batches | train loss 0.4704008 +| epoch 4 | 2435/ 2800 batches | train loss 0.4405621 +| epoch 4 | 2439/ 2800 batches | train loss 0.4272099 +| epoch 4 | 2443/ 2800 batches | train loss 0.3957317 +| epoch 4 | 2447/ 2800 batches | train loss 0.4333327 +| epoch 4 | 2451/ 2800 batches | train loss 0.5274425 +| epoch 4 | 2455/ 2800 batches | train loss 0.3538514 +| epoch 4 | 2459/ 2800 batches | train loss 0.5547366 +| epoch 4 | 2463/ 2800 batches | train loss 0.3909250 +| epoch 4 | 2467/ 2800 batches | train loss 0.4237434 +| epoch 4 | 2471/ 2800 batches | train loss 0.4049704 +| epoch 4 | 2475/ 2800 batches | train loss 0.3810127 +| epoch 4 | 2479/ 2800 batches | train loss 0.4351971 +| epoch 4 | 2483/ 2800 batches | train loss 0.4287395 +| epoch 4 | 2487/ 2800 batches | train loss 0.3738407 +| epoch 4 | 2491/ 2800 batches | train loss 0.4296121 +| epoch 4 | 2495/ 2800 batches | train loss 0.3844055 +| epoch 4 | 2499/ 2800 batches | train loss 0.3704369 +| epoch 4 | 2503/ 2800 batches | train loss 0.4270892 +| epoch 4 | 2507/ 2800 batches | train loss 0.4361452 +| epoch 4 | 2511/ 2800 batches | train loss 0.4872730 +| epoch 4 | 2515/ 2800 batches | train loss 0.4146689 +| epoch 4 | 2519/ 2800 batches | train loss 0.3971919 +| epoch 4 | 2523/ 2800 batches | train loss 0.4313716 +| epoch 4 | 2527/ 2800 batches | train loss 0.4058864 +| epoch 4 | 2531/ 2800 batches | train loss 0.4159083 +| epoch 4 | 2535/ 2800 batches | train loss 0.4060773 +| epoch 4 | 2539/ 2800 batches | train loss 0.4560958 +| epoch 4 | 2543/ 2800 batches | train loss 0.4642864 +| epoch 4 | 2547/ 2800 batches | train loss 0.4482787 +| epoch 4 | 2551/ 2800 batches | train loss 0.4410629 +| epoch 4 | 2555/ 2800 batches | train loss 0.4500536 +| epoch 4 | 2559/ 2800 batches | train loss 0.4406340 +| epoch 4 | 2563/ 2800 batches | train loss 0.4169302 +| epoch 4 | 2567/ 2800 batches | train loss 0.5009117 +| epoch 4 | 2571/ 2800 batches | train loss 0.4410385 +| epoch 4 | 2575/ 2800 batches | train loss 0.4093117 +| epoch 4 | 2579/ 2800 batches | train loss 0.3678061 +| epoch 4 | 2583/ 2800 batches | train loss 0.4533792 +| epoch 4 | 2587/ 2800 batches | train loss 0.5612571 +| epoch 4 | 2591/ 2800 batches | train loss 0.3784393 +| epoch 4 | 2595/ 2800 batches | train loss 0.4428793 +| epoch 4 | 2599/ 2800 batches | train loss 0.3426209 +| epoch 4 | 2603/ 2800 batches | train loss 0.3914161 +| epoch 4 | 2607/ 2800 batches | train loss 0.4555431 +| epoch 4 | 2611/ 2800 batches | train loss 0.3709452 +| epoch 4 | 2615/ 2800 batches | train loss 0.4289933 +| epoch 4 | 2619/ 2800 batches | train loss 0.3996613 +| epoch 4 | 2623/ 2800 batches | train loss 0.3902507 +| epoch 4 | 2627/ 2800 batches | train loss 0.3941134 +| epoch 4 | 2631/ 2800 batches | train loss 0.3178323 +| epoch 4 | 2635/ 2800 batches | train loss 0.3945998 +| epoch 4 | 2639/ 2800 batches | train loss 0.4056052 +| epoch 4 | 2643/ 2800 batches | train loss 0.4378371 +| epoch 4 | 2647/ 2800 batches | train loss 0.4717978 +| epoch 4 | 2651/ 2800 batches | train loss 0.4297237 +| epoch 4 | 2655/ 2800 batches | train loss 0.3950437 +| epoch 4 | 2659/ 2800 batches | train loss 0.4393911 +| epoch 4 | 2663/ 2800 batches | train loss 0.3555033 +| epoch 4 | 2667/ 2800 batches | train loss 0.4289663 +| epoch 4 | 2671/ 2800 batches | train loss 0.4490962 +| epoch 4 | 2675/ 2800 batches | train loss 0.4345220 +| epoch 4 | 2679/ 2800 batches | train loss 0.4621304 +| epoch 4 | 2683/ 2800 batches | train loss 0.4489809 +| epoch 4 | 2687/ 2800 batches | train loss 0.4542688 +| epoch 4 | 2691/ 2800 batches | train loss 0.4607689 +| epoch 4 | 2695/ 2800 batches | train loss 0.4139331 +| epoch 4 | 2699/ 2800 batches | train loss 0.4094478 +| epoch 4 | 2703/ 2800 batches | train loss 0.4652317 +| epoch 4 | 2707/ 2800 batches | train loss 0.4097383 +| epoch 4 | 2711/ 2800 batches | train loss 0.4609524 +| epoch 4 | 2715/ 2800 batches | train loss 0.4908342 +| epoch 4 | 2719/ 2800 batches | train loss 0.5068669 +| epoch 4 | 2723/ 2800 batches | train loss 0.4180198 +| epoch 4 | 2727/ 2800 batches | train loss 0.3903865 +| epoch 4 | 2731/ 2800 batches | train loss 0.3333111 +| epoch 4 | 2735/ 2800 batches | train loss 0.5253251 +| epoch 4 | 2739/ 2800 batches | train loss 0.4306920 +| epoch 4 | 2743/ 2800 batches | train loss 0.4576845 +| epoch 4 | 2747/ 2800 batches | train loss 0.3885031 +| epoch 4 | 2751/ 2800 batches | train loss 0.4589127 +| epoch 4 | 2755/ 2800 batches | train loss 0.2679921 +| epoch 4 | 2759/ 2800 batches | train loss 0.5159762 +| epoch 4 | 2763/ 2800 batches | train loss 0.4198998 +| epoch 4 | 2767/ 2800 batches | train loss 0.4167250 +| epoch 4 | 2771/ 2800 batches | train loss 0.4926130 +| epoch 4 | 2775/ 2800 batches | train loss 0.5947277 +| epoch 4 | 2779/ 2800 batches | train loss 0.3876947 +| epoch 4 | 2783/ 2800 batches | train loss 0.4100856 +| epoch 4 | 2787/ 2800 batches | train loss 0.4276762 +| epoch 4 | 2791/ 2800 batches | train loss 0.3964183 +| epoch 4 | 2795/ 2800 batches | train loss 0.5102024 +| epoch 4 | 2799/ 2800 batches | train loss 0.3385561 +-------------------------------------------------------------------------------- +| epoch 4 | 3/ 2800 batches | test loss 0.4919740 +| epoch 4 | 7/ 2800 batches | test loss 0.4911575 +| epoch 4 | 11/ 2800 batches | test loss 0.3923791 +| epoch 4 | 15/ 2800 batches | test loss 0.3599579 +| epoch 4 | 19/ 2800 batches | test loss 0.3537427 +| epoch 4 | 23/ 2800 batches | test loss 0.4272306 +| epoch 4 | 27/ 2800 batches | test loss 0.4900556 +| epoch 4 | 31/ 2800 batches | test loss 0.4487306 +| epoch 4 | 35/ 2800 batches | test loss 0.4692821 +| epoch 4 | 39/ 2800 batches | test loss 0.5063593 +| epoch 4 | 43/ 2800 batches | test loss 0.4676699 +| epoch 4 | 47/ 2800 batches | test loss 0.4898216 +| epoch 4 | 51/ 2800 batches | test loss 0.4239202 +| epoch 4 | 55/ 2800 batches | test loss 0.4173586 +| epoch 4 | 59/ 2800 batches | test loss 0.3682920 +| epoch 4 | 63/ 2800 batches | test loss 0.4054934 +| epoch 4 | 67/ 2800 batches | test loss 0.4674021 +| epoch 4 | 71/ 2800 batches | test loss 0.4702383 +| epoch 4 | 75/ 2800 batches | test loss 0.4586660 +| epoch 4 | 79/ 2800 batches | test loss 0.5334131 +| epoch 4 | 83/ 2800 batches | test loss 0.4761699 +| epoch 4 | 87/ 2800 batches | test loss 0.4330797 +| epoch 4 | 91/ 2800 batches | test loss 0.4353113 +| epoch 4 | 95/ 2800 batches | test loss 0.4936264 +| epoch 4 | 99/ 2800 batches | test loss 0.3895102 +| epoch 4 | 103/ 2800 batches | test loss 0.3747488 +| epoch 4 | 107/ 2800 batches | test loss 0.5429114 +| epoch 4 | 111/ 2800 batches | test loss 0.3825004 +| epoch 4 | 115/ 2800 batches | test loss 0.4428762 +| epoch 4 | 119/ 2800 batches | test loss 0.4063340 +| epoch 4 | 123/ 2800 batches | test loss 0.4392786 +| epoch 4 | 127/ 2800 batches | test loss 0.4251083 +| epoch 4 | 131/ 2800 batches | test loss 0.4869820 +| epoch 4 | 135/ 2800 batches | test loss 0.4459745 +| epoch 4 | 139/ 2800 batches | test loss 0.3710446 +| epoch 4 | 143/ 2800 batches | test loss 0.4304878 +| epoch 4 | 147/ 2800 batches | test loss 0.3805480 +| epoch 4 | 151/ 2800 batches | test loss 0.5589656 +| epoch 4 | 155/ 2800 batches | test loss 0.5263748 +| epoch 4 | 159/ 2800 batches | test loss 0.3743851 +| epoch 4 | 163/ 2800 batches | test loss 0.4071954 +| epoch 4 | 167/ 2800 batches | test loss 0.5355073 +| epoch 4 | 171/ 2800 batches | test loss 0.4621982 +| epoch 4 | 175/ 2800 batches | test loss 0.4547339 +| epoch 4 | 179/ 2800 batches | test loss 0.5188532 +| epoch 4 | 183/ 2800 batches | test loss 0.5763208 +| epoch 4 | 187/ 2800 batches | test loss 0.3581689 +| epoch 4 | 191/ 2800 batches | test loss 0.4567097 +| epoch 4 | 195/ 2800 batches | test loss 0.4467806 +| epoch 4 | 199/ 2800 batches | test loss 0.4061048 +| epoch 4 | 203/ 2800 batches | test loss 0.5001928 +| epoch 4 | 207/ 2800 batches | test loss 0.4139689 +| epoch 4 | 211/ 2800 batches | test loss 0.4479793 +| epoch 4 | 215/ 2800 batches | test loss 0.5045635 +| epoch 4 | 219/ 2800 batches | test loss 0.6270289 +| epoch 4 | 223/ 2800 batches | test loss 0.3772445 +| epoch 4 | 227/ 2800 batches | test loss 0.3600639 +| epoch 4 | 231/ 2800 batches | test loss 0.4143935 +| epoch 4 | 235/ 2800 batches | test loss 0.4838632 +| epoch 4 | 239/ 2800 batches | test loss 0.5135267 +| epoch 4 | 243/ 2800 batches | test loss 0.5455267 +| epoch 4 | 247/ 2800 batches | test loss 0.4199602 +| epoch 4 | 251/ 2800 batches | test loss 0.4539979 +| epoch 4 | 255/ 2800 batches | test loss 0.4706591 +| epoch 4 | 259/ 2800 batches | test loss 0.3659360 +| epoch 4 | 263/ 2800 batches | test loss 0.5260783 +| epoch 4 | 267/ 2800 batches | test loss 0.5336707 +| epoch 4 | 271/ 2800 batches | test loss 0.4516129 +| epoch 4 | 275/ 2800 batches | test loss 0.4289247 +| epoch 4 | 279/ 2800 batches | test loss 0.5083264 +| epoch 4 | 283/ 2800 batches | test loss 0.4923216 +| epoch 4 | 287/ 2800 batches | test loss 0.3240967 +| epoch 4 | 291/ 2800 batches | test loss 0.5085077 +| epoch 4 | 295/ 2800 batches | test loss 0.3918353 +| epoch 4 | 299/ 2800 batches | test loss 0.4279593 +| epoch 4 | 303/ 2800 batches | test loss 0.3687596 +| epoch 4 | 307/ 2800 batches | test loss 0.4565874 +| epoch 4 | 311/ 2800 batches | test loss 0.4255113 +| epoch 4 | 315/ 2800 batches | test loss 0.4441029 +| epoch 4 | 319/ 2800 batches | test loss 0.4499781 +| epoch 4 | 323/ 2800 batches | test loss 0.4476847 +| epoch 4 | 327/ 2800 batches | test loss 0.5409712 +| epoch 4 | 331/ 2800 batches | test loss 0.4612468 +| epoch 4 | 335/ 2800 batches | test loss 0.3307382 +| epoch 4 | 339/ 2800 batches | test loss 0.4537775 +| epoch 4 | 343/ 2800 batches | test loss 0.3980873 +| epoch 4 | 347/ 2800 batches | test loss 0.4183573 +| epoch 4 | 351/ 2800 batches | test loss 0.4061968 +| epoch 4 | 355/ 2800 batches | test loss 0.3856526 +| epoch 4 | 359/ 2800 batches | test loss 0.4057038 +| epoch 4 | 363/ 2800 batches | test loss 0.3795769 +| epoch 4 | 367/ 2800 batches | test loss 0.3794146 +| epoch 4 | 371/ 2800 batches | test loss 0.5554135 +| epoch 4 | 375/ 2800 batches | test loss 0.4269409 +| epoch 4 | 379/ 2800 batches | test loss 0.4480713 +| epoch 4 | 383/ 2800 batches | test loss 0.2723461 +| epoch 4 | 387/ 2800 batches | test loss 0.5537360 +| epoch 4 | 391/ 2800 batches | test loss 0.3396708 +| epoch 4 | 395/ 2800 batches | test loss 0.4859841 +| epoch 4 | 399/ 2800 batches | test loss 0.4766225 +| epoch 4 | 403/ 2800 batches | test loss 0.5156159 +| epoch 4 | 407/ 2800 batches | test loss 0.4296578 +| epoch 4 | 411/ 2800 batches | test loss 0.4565055 +| epoch 4 | 415/ 2800 batches | test loss 0.3931860 +| epoch 4 | 419/ 2800 batches | test loss 0.5271620 +| epoch 4 | 423/ 2800 batches | test loss 0.4787940 +| epoch 4 | 427/ 2800 batches | test loss 0.4181963 +| epoch 4 | 431/ 2800 batches | test loss 0.4552109 +| epoch 4 | 435/ 2800 batches | test loss 0.5037614 +| epoch 4 | 439/ 2800 batches | test loss 0.5188806 +| epoch 4 | 443/ 2800 batches | test loss 0.4251251 +| epoch 4 | 447/ 2800 batches | test loss 0.5390349 +| epoch 4 | 451/ 2800 batches | test loss 0.4615715 +| epoch 4 | 455/ 2800 batches | test loss 0.4775494 +| epoch 4 | 459/ 2800 batches | test loss 0.3080355 +| epoch 4 | 463/ 2800 batches | test loss 0.4521914 +| epoch 4 | 467/ 2800 batches | test loss 0.4162133 +| epoch 4 | 471/ 2800 batches | test loss 0.3903903 +| epoch 4 | 475/ 2800 batches | test loss 0.4143828 +| epoch 4 | 479/ 2800 batches | test loss 0.4797834 +| epoch 4 | 483/ 2800 batches | test loss 0.4459648 +| epoch 4 | 487/ 2800 batches | test loss 0.4511796 +| epoch 4 | 491/ 2800 batches | test loss 0.4591531 +| epoch 4 | 495/ 2800 batches | test loss 0.4063091 +| epoch 4 | 499/ 2800 batches | test loss 0.4968208 +| epoch 4 | 503/ 2800 batches | test loss 0.3896914 +| epoch 4 | 507/ 2800 batches | test loss 0.4060429 +| epoch 4 | 511/ 2800 batches | test loss 0.3952516 +| epoch 4 | 515/ 2800 batches | test loss 0.3857152 +| epoch 4 | 519/ 2800 batches | test loss 0.3939400 +| epoch 4 | 523/ 2800 batches | test loss 0.4457459 +| epoch 4 | 527/ 2800 batches | test loss 0.3793060 +| epoch 4 | 531/ 2800 batches | test loss 0.4183581 +| epoch 4 | 535/ 2800 batches | test loss 0.4355259 +| epoch 4 | 539/ 2800 batches | test loss 0.4115964 +| epoch 4 | 543/ 2800 batches | test loss 0.4750012 +| epoch 4 | 547/ 2800 batches | test loss 0.4453985 +| epoch 4 | 551/ 2800 batches | test loss 0.4716077 +| epoch 4 | 555/ 2800 batches | test loss 0.4465209 +| epoch 4 | 559/ 2800 batches | test loss 0.5147691 +| epoch 4 | 563/ 2800 batches | test loss 0.4455045 +| epoch 4 | 567/ 2800 batches | test loss 0.3718055 +| epoch 4 | 571/ 2800 batches | test loss 0.3703730 +| epoch 4 | 575/ 2800 batches | test loss 0.4981763 +| epoch 4 | 579/ 2800 batches | test loss 0.3903583 +| epoch 4 | 583/ 2800 batches | test loss 0.4339848 +| epoch 4 | 587/ 2800 batches | test loss 0.4716940 +| epoch 4 | 591/ 2800 batches | test loss 0.3994444 +| epoch 4 | 595/ 2800 batches | test loss 0.4524627 +| epoch 4 | 599/ 2800 batches | test loss 0.4303739 +| epoch 4 | 603/ 2800 batches | test loss 0.3538792 +| epoch 4 | 607/ 2800 batches | test loss 0.4474870 +| epoch 4 | 611/ 2800 batches | test loss 0.3900634 +| epoch 4 | 615/ 2800 batches | test loss 0.5366908 +| epoch 4 | 619/ 2800 batches | test loss 0.4445144 +| epoch 4 | 623/ 2800 batches | test loss 0.5058870 +| epoch 4 | 627/ 2800 batches | test loss 0.3663864 +| epoch 4 | 631/ 2800 batches | test loss 0.4567490 +| epoch 4 | 635/ 2800 batches | test loss 0.4687533 +| epoch 4 | 639/ 2800 batches | test loss 0.5279980 +| epoch 4 | 643/ 2800 batches | test loss 0.5081175 +| epoch 4 | 647/ 2800 batches | test loss 0.4285221 +| epoch 4 | 651/ 2800 batches | test loss 0.4456691 +| epoch 4 | 655/ 2800 batches | test loss 0.4424514 +| epoch 4 | 659/ 2800 batches | test loss 0.3991839 +| epoch 4 | 663/ 2800 batches | test loss 0.4315283 +| epoch 4 | 667/ 2800 batches | test loss 0.4455409 +| epoch 4 | 671/ 2800 batches | test loss 0.4307350 +| epoch 4 | 675/ 2800 batches | test loss 0.4085278 +| epoch 4 | 679/ 2800 batches | test loss 0.4059895 +| epoch 4 | 683/ 2800 batches | test loss 0.4743522 +| epoch 4 | 687/ 2800 batches | test loss 0.4750212 +| epoch 4 | 691/ 2800 batches | test loss 0.3918314 +| epoch 4 | 695/ 2800 batches | test loss 0.5857305 +| epoch 4 | 699/ 2800 batches | test loss 0.3591468 +| epoch 4 | final test loss 0.4451, save model! +-------------------------------------------------------------------------------- +| epoch 5 | 3/ 2800 batches | train loss 0.3793967 +| epoch 5 | 7/ 2800 batches | train loss 0.4437954 +| epoch 5 | 11/ 2800 batches | train loss 0.4990571 +| epoch 5 | 15/ 2800 batches | train loss 0.3705745 +| epoch 5 | 19/ 2800 batches | train loss 0.3977699 +| epoch 5 | 23/ 2800 batches | train loss 0.4211416 +| epoch 5 | 27/ 2800 batches | train loss 0.3842563 +| epoch 5 | 31/ 2800 batches | train loss 0.4619216 +| epoch 5 | 35/ 2800 batches | train loss 0.4080739 +| epoch 5 | 39/ 2800 batches | train loss 0.4707264 +| epoch 5 | 43/ 2800 batches | train loss 0.4560899 +| epoch 5 | 47/ 2800 batches | train loss 0.4717397 +| epoch 5 | 51/ 2800 batches | train loss 0.4073296 +| epoch 5 | 55/ 2800 batches | train loss 0.4828787 +| epoch 5 | 59/ 2800 batches | train loss 0.3884903 +| epoch 5 | 63/ 2800 batches | train loss 0.3402263 +| epoch 5 | 67/ 2800 batches | train loss 0.4175579 +| epoch 5 | 71/ 2800 batches | train loss 0.3973159 +| epoch 5 | 75/ 2800 batches | train loss 0.3980605 +| epoch 5 | 79/ 2800 batches | train loss 0.4132595 +| epoch 5 | 83/ 2800 batches | train loss 0.3891628 +| epoch 5 | 87/ 2800 batches | train loss 0.4480270 +| epoch 5 | 91/ 2800 batches | train loss 0.4166961 +| epoch 5 | 95/ 2800 batches | train loss 0.3193950 +| epoch 5 | 99/ 2800 batches | train loss 0.3892093 +| epoch 5 | 103/ 2800 batches | train loss 0.4543775 +| epoch 5 | 107/ 2800 batches | train loss 0.4380332 +| epoch 5 | 111/ 2800 batches | train loss 0.4007111 +| epoch 5 | 115/ 2800 batches | train loss 0.4794589 +| epoch 5 | 119/ 2800 batches | train loss 0.4696122 +| epoch 5 | 123/ 2800 batches | train loss 0.3905368 +| epoch 5 | 127/ 2800 batches | train loss 0.3833467 +| epoch 5 | 131/ 2800 batches | train loss 0.3551472 +| epoch 5 | 135/ 2800 batches | train loss 0.3513881 +| epoch 5 | 139/ 2800 batches | train loss 0.3758847 +| epoch 5 | 143/ 2800 batches | train loss 0.3169828 +| epoch 5 | 147/ 2800 batches | train loss 0.3485664 +| epoch 5 | 151/ 2800 batches | train loss 0.3825141 +| epoch 5 | 155/ 2800 batches | train loss 0.4278940 +| epoch 5 | 159/ 2800 batches | train loss 0.4552855 +| epoch 5 | 163/ 2800 batches | train loss 0.3720232 +| epoch 5 | 167/ 2800 batches | train loss 0.3638482 +| epoch 5 | 171/ 2800 batches | train loss 0.4133196 +| epoch 5 | 175/ 2800 batches | train loss 0.4124584 +| epoch 5 | 179/ 2800 batches | train loss 0.4766659 +| epoch 5 | 183/ 2800 batches | train loss 0.4664388 +| epoch 5 | 187/ 2800 batches | train loss 0.4355538 +| epoch 5 | 191/ 2800 batches | train loss 0.4575062 +| epoch 5 | 195/ 2800 batches | train loss 0.4331713 +| epoch 5 | 199/ 2800 batches | train loss 0.4238951 +| epoch 5 | 203/ 2800 batches | train loss 0.4362762 +| epoch 5 | 207/ 2800 batches | train loss 0.4511300 +| epoch 5 | 211/ 2800 batches | train loss 0.3801779 +| epoch 5 | 215/ 2800 batches | train loss 0.4936019 +| epoch 5 | 219/ 2800 batches | train loss 0.4635815 +| epoch 5 | 223/ 2800 batches | train loss 0.3973376 +| epoch 5 | 227/ 2800 batches | train loss 0.4346094 +| epoch 5 | 231/ 2800 batches | train loss 0.3270850 +| epoch 5 | 235/ 2800 batches | train loss 0.4266925 +| epoch 5 | 239/ 2800 batches | train loss 0.3815249 +| epoch 5 | 243/ 2800 batches | train loss 0.3533221 +| epoch 5 | 247/ 2800 batches | train loss 0.4569813 +| epoch 5 | 251/ 2800 batches | train loss 0.4091923 +| epoch 5 | 255/ 2800 batches | train loss 0.4335608 +| epoch 5 | 259/ 2800 batches | train loss 0.3722845 +| epoch 5 | 263/ 2800 batches | train loss 0.4508958 +| epoch 5 | 267/ 2800 batches | train loss 0.4664594 +| epoch 5 | 271/ 2800 batches | train loss 0.3419056 +| epoch 5 | 275/ 2800 batches | train loss 0.5106205 +| epoch 5 | 279/ 2800 batches | train loss 0.5078056 +| epoch 5 | 283/ 2800 batches | train loss 0.5449964 +| epoch 5 | 287/ 2800 batches | train loss 0.4113706 +| epoch 5 | 291/ 2800 batches | train loss 0.3432652 +| epoch 5 | 295/ 2800 batches | train loss 0.3968611 +| epoch 5 | 299/ 2800 batches | train loss 0.3937067 +| epoch 5 | 303/ 2800 batches | train loss 0.3684824 +| epoch 5 | 307/ 2800 batches | train loss 0.3909893 +| epoch 5 | 311/ 2800 batches | train loss 0.4261751 +| epoch 5 | 315/ 2800 batches | train loss 0.3980858 +| epoch 5 | 319/ 2800 batches | train loss 0.3975236 +| epoch 5 | 323/ 2800 batches | train loss 0.3804604 +| epoch 5 | 327/ 2800 batches | train loss 0.3813543 +| epoch 5 | 331/ 2800 batches | train loss 0.3901252 +| epoch 5 | 335/ 2800 batches | train loss 0.4051479 +| epoch 5 | 339/ 2800 batches | train loss 0.3703399 +| epoch 5 | 343/ 2800 batches | train loss 0.4996177 +| epoch 5 | 347/ 2800 batches | train loss 0.3782518 +| epoch 5 | 351/ 2800 batches | train loss 0.4338005 +| epoch 5 | 355/ 2800 batches | train loss 0.5304585 +| epoch 5 | 359/ 2800 batches | train loss 0.4453000 +| epoch 5 | 363/ 2800 batches | train loss 0.3639694 +| epoch 5 | 367/ 2800 batches | train loss 0.4796655 +| epoch 5 | 371/ 2800 batches | train loss 0.4174385 +| epoch 5 | 375/ 2800 batches | train loss 0.2999250 +| epoch 5 | 379/ 2800 batches | train loss 0.3463587 +| epoch 5 | 383/ 2800 batches | train loss 0.4063882 +| epoch 5 | 387/ 2800 batches | train loss 0.3992981 +| epoch 5 | 391/ 2800 batches | train loss 0.3645369 +| epoch 5 | 395/ 2800 batches | train loss 0.3666576 +| epoch 5 | 399/ 2800 batches | train loss 0.4683358 +| epoch 5 | 403/ 2800 batches | train loss 0.4449457 +| epoch 5 | 407/ 2800 batches | train loss 0.4148464 +| epoch 5 | 411/ 2800 batches | train loss 0.3996174 +| epoch 5 | 415/ 2800 batches | train loss 0.4317682 +| epoch 5 | 419/ 2800 batches | train loss 0.4184936 +| epoch 5 | 423/ 2800 batches | train loss 0.4994005 +| epoch 5 | 427/ 2800 batches | train loss 0.3666485 +| epoch 5 | 431/ 2800 batches | train loss 0.4055186 +| epoch 5 | 435/ 2800 batches | train loss 0.4627469 +| epoch 5 | 439/ 2800 batches | train loss 0.3942437 +| epoch 5 | 443/ 2800 batches | train loss 0.4536338 +| epoch 5 | 447/ 2800 batches | train loss 0.4181991 +| epoch 5 | 451/ 2800 batches | train loss 0.3434196 +| epoch 5 | 455/ 2800 batches | train loss 0.4437295 +| epoch 5 | 459/ 2800 batches | train loss 0.3997554 +| epoch 5 | 463/ 2800 batches | train loss 0.4319986 +| epoch 5 | 467/ 2800 batches | train loss 0.4805304 +| epoch 5 | 471/ 2800 batches | train loss 0.4065284 +| epoch 5 | 475/ 2800 batches | train loss 0.4029185 +| epoch 5 | 479/ 2800 batches | train loss 0.4040272 +| epoch 5 | 483/ 2800 batches | train loss 0.4362423 +| epoch 5 | 487/ 2800 batches | train loss 0.3476859 +| epoch 5 | 491/ 2800 batches | train loss 0.4242653 +| epoch 5 | 495/ 2800 batches | train loss 0.3631733 +| epoch 5 | 499/ 2800 batches | train loss 0.3640322 +| epoch 5 | 503/ 2800 batches | train loss 0.4108592 +| epoch 5 | 507/ 2800 batches | train loss 0.3687657 +| epoch 5 | 511/ 2800 batches | train loss 0.4323560 +| epoch 5 | 515/ 2800 batches | train loss 0.4395173 +| epoch 5 | 519/ 2800 batches | train loss 0.3875670 +| epoch 5 | 523/ 2800 batches | train loss 0.3890425 +| epoch 5 | 527/ 2800 batches | train loss 0.3558481 +| epoch 5 | 531/ 2800 batches | train loss 0.4498467 +| epoch 5 | 535/ 2800 batches | train loss 0.3676677 +| epoch 5 | 539/ 2800 batches | train loss 0.3849245 +| epoch 5 | 543/ 2800 batches | train loss 0.4235575 +| epoch 5 | 547/ 2800 batches | train loss 0.2861559 +| epoch 5 | 551/ 2800 batches | train loss 0.4459317 +| epoch 5 | 555/ 2800 batches | train loss 0.3863065 +| epoch 5 | 559/ 2800 batches | train loss 0.4167795 +| epoch 5 | 563/ 2800 batches | train loss 0.4258911 +| epoch 5 | 567/ 2800 batches | train loss 0.4221090 +| epoch 5 | 571/ 2800 batches | train loss 0.4218164 +| epoch 5 | 575/ 2800 batches | train loss 0.4290995 +| epoch 5 | 579/ 2800 batches | train loss 0.4488684 +| epoch 5 | 583/ 2800 batches | train loss 0.4195226 +| epoch 5 | 587/ 2800 batches | train loss 0.4028909 +| epoch 5 | 591/ 2800 batches | train loss 0.4395108 +| epoch 5 | 595/ 2800 batches | train loss 0.4350115 +| epoch 5 | 599/ 2800 batches | train loss 0.4836783 +| epoch 5 | 603/ 2800 batches | train loss 0.4309551 +| epoch 5 | 607/ 2800 batches | train loss 0.4521930 +| epoch 5 | 611/ 2800 batches | train loss 0.4329932 +| epoch 5 | 615/ 2800 batches | train loss 0.3945583 +| epoch 5 | 619/ 2800 batches | train loss 0.4197497 +| epoch 5 | 623/ 2800 batches | train loss 0.4006796 +| epoch 5 | 627/ 2800 batches | train loss 0.4207484 +| epoch 5 | 631/ 2800 batches | train loss 0.4861066 +| epoch 5 | 635/ 2800 batches | train loss 0.4466239 +| epoch 5 | 639/ 2800 batches | train loss 0.4818923 +| epoch 5 | 643/ 2800 batches | train loss 0.4177034 +| epoch 5 | 647/ 2800 batches | train loss 0.4353291 +| epoch 5 | 651/ 2800 batches | train loss 0.3449662 +| epoch 5 | 655/ 2800 batches | train loss 0.3098059 +| epoch 5 | 659/ 2800 batches | train loss 0.3644840 +| epoch 5 | 663/ 2800 batches | train loss 0.4139398 +| epoch 5 | 667/ 2800 batches | train loss 0.4014466 +| epoch 5 | 671/ 2800 batches | train loss 0.4364865 +| epoch 5 | 675/ 2800 batches | train loss 0.3974363 +| epoch 5 | 679/ 2800 batches | train loss 0.3636145 +| epoch 5 | 683/ 2800 batches | train loss 0.4378254 +| epoch 5 | 687/ 2800 batches | train loss 0.4623954 +| epoch 5 | 691/ 2800 batches | train loss 0.4083071 +| epoch 5 | 695/ 2800 batches | train loss 0.5177555 +| epoch 5 | 699/ 2800 batches | train loss 0.3717789 +| epoch 5 | 703/ 2800 batches | train loss 0.4364485 +| epoch 5 | 707/ 2800 batches | train loss 0.4509102 +| epoch 5 | 711/ 2800 batches | train loss 0.3717794 +| epoch 5 | 715/ 2800 batches | train loss 0.4610232 +| epoch 5 | 719/ 2800 batches | train loss 0.3621545 +| epoch 5 | 723/ 2800 batches | train loss 0.4880182 +| epoch 5 | 727/ 2800 batches | train loss 0.4115984 +| epoch 5 | 731/ 2800 batches | train loss 0.4533382 +| epoch 5 | 735/ 2800 batches | train loss 0.3974121 +| epoch 5 | 739/ 2800 batches | train loss 0.3006637 +| epoch 5 | 743/ 2800 batches | train loss 0.4582469 +| epoch 5 | 747/ 2800 batches | train loss 0.4604970 +| epoch 5 | 751/ 2800 batches | train loss 0.3306727 +| epoch 5 | 755/ 2800 batches | train loss 0.4358575 +| epoch 5 | 759/ 2800 batches | train loss 0.4498738 +| epoch 5 | 763/ 2800 batches | train loss 0.4221900 +| epoch 5 | 767/ 2800 batches | train loss 0.4795816 +| epoch 5 | 771/ 2800 batches | train loss 0.3536854 +| epoch 5 | 775/ 2800 batches | train loss 0.3636625 +| epoch 5 | 779/ 2800 batches | train loss 0.3492759 +| epoch 5 | 783/ 2800 batches | train loss 0.5103890 +| epoch 5 | 787/ 2800 batches | train loss 0.4191924 +| epoch 5 | 791/ 2800 batches | train loss 0.2886751 +| epoch 5 | 795/ 2800 batches | train loss 0.4033983 +| epoch 5 | 799/ 2800 batches | train loss 0.3561582 +| epoch 5 | 803/ 2800 batches | train loss 0.4364904 +| epoch 5 | 807/ 2800 batches | train loss 0.4055197 +| epoch 5 | 811/ 2800 batches | train loss 0.4113516 +| epoch 5 | 815/ 2800 batches | train loss 0.4034260 +| epoch 5 | 819/ 2800 batches | train loss 0.4272191 +| epoch 5 | 823/ 2800 batches | train loss 0.3961577 +| epoch 5 | 827/ 2800 batches | train loss 0.4093665 +| epoch 5 | 831/ 2800 batches | train loss 0.4832522 +| epoch 5 | 835/ 2800 batches | train loss 0.3919676 +| epoch 5 | 839/ 2800 batches | train loss 0.4924660 +| epoch 5 | 843/ 2800 batches | train loss 0.1573388 +| epoch 5 | 847/ 2800 batches | train loss 0.4207276 +| epoch 5 | 851/ 2800 batches | train loss 0.3852314 +| epoch 5 | 855/ 2800 batches | train loss 0.3871254 +| epoch 5 | 859/ 2800 batches | train loss 0.3771343 +| epoch 5 | 863/ 2800 batches | train loss 0.4078764 +| epoch 5 | 867/ 2800 batches | train loss 0.4570614 +| epoch 5 | 871/ 2800 batches | train loss 0.4507237 +| epoch 5 | 875/ 2800 batches | train loss 0.4049167 +| epoch 5 | 879/ 2800 batches | train loss 0.3801212 +| epoch 5 | 883/ 2800 batches | train loss 0.3460361 +| epoch 5 | 887/ 2800 batches | train loss 0.4615175 +| epoch 5 | 891/ 2800 batches | train loss 0.4390040 +| epoch 5 | 895/ 2800 batches | train loss 0.4731463 +| epoch 5 | 899/ 2800 batches | train loss 0.5038010 +| epoch 5 | 903/ 2800 batches | train loss 0.5227344 +| epoch 5 | 907/ 2800 batches | train loss 0.4191123 +| epoch 5 | 911/ 2800 batches | train loss 0.3948570 +| epoch 5 | 915/ 2800 batches | train loss 0.4298696 +| epoch 5 | 919/ 2800 batches | train loss 0.3994861 +| epoch 5 | 923/ 2800 batches | train loss 0.4752688 +| epoch 5 | 927/ 2800 batches | train loss 0.3905115 +| epoch 5 | 931/ 2800 batches | train loss 0.4299044 +| epoch 5 | 935/ 2800 batches | train loss 0.3917199 +| epoch 5 | 939/ 2800 batches | train loss 0.3781352 +| epoch 5 | 943/ 2800 batches | train loss 0.4396688 +| epoch 5 | 947/ 2800 batches | train loss 0.4316940 +| epoch 5 | 951/ 2800 batches | train loss 0.4386109 +| epoch 5 | 955/ 2800 batches | train loss 0.3892777 +| epoch 5 | 959/ 2800 batches | train loss 0.3861567 +| epoch 5 | 963/ 2800 batches | train loss 0.3172169 +| epoch 5 | 967/ 2800 batches | train loss 0.3830306 +| epoch 5 | 971/ 2800 batches | train loss 0.3857433 +| epoch 5 | 975/ 2800 batches | train loss 0.4642936 +| epoch 5 | 979/ 2800 batches | train loss 0.3475411 +| epoch 5 | 983/ 2800 batches | train loss 0.4253039 +| epoch 5 | 987/ 2800 batches | train loss 0.4329828 +| epoch 5 | 991/ 2800 batches | train loss 0.5118235 +| epoch 5 | 995/ 2800 batches | train loss 0.4038511 +| epoch 5 | 999/ 2800 batches | train loss 0.4290341 +| epoch 5 | 1003/ 2800 batches | train loss 0.4167790 +| epoch 5 | 1007/ 2800 batches | train loss 0.4141878 +| epoch 5 | 1011/ 2800 batches | train loss 0.5164765 +| epoch 5 | 1015/ 2800 batches | train loss 0.5091793 +| epoch 5 | 1019/ 2800 batches | train loss 0.4021771 +| epoch 5 | 1023/ 2800 batches | train loss 0.4520007 +| epoch 5 | 1027/ 2800 batches | train loss 0.3425144 +| epoch 5 | 1031/ 2800 batches | train loss 0.4754259 +| epoch 5 | 1035/ 2800 batches | train loss 0.3870310 +| epoch 5 | 1039/ 2800 batches | train loss 0.4548271 +| epoch 5 | 1043/ 2800 batches | train loss 0.4447088 +| epoch 5 | 1047/ 2800 batches | train loss 0.3765981 +| epoch 5 | 1051/ 2800 batches | train loss 0.4001952 +| epoch 5 | 1055/ 2800 batches | train loss 0.4083785 +| epoch 5 | 1059/ 2800 batches | train loss 0.5214144 +| epoch 5 | 1063/ 2800 batches | train loss 0.4083799 +| epoch 5 | 1067/ 2800 batches | train loss 0.4021823 +| epoch 5 | 1071/ 2800 batches | train loss 0.4162305 +| epoch 5 | 1075/ 2800 batches | train loss 0.3685730 +| epoch 5 | 1079/ 2800 batches | train loss 0.3604925 +| epoch 5 | 1083/ 2800 batches | train loss 0.4461686 +| epoch 5 | 1087/ 2800 batches | train loss 0.3420826 +| epoch 5 | 1091/ 2800 batches | train loss 0.3983757 +| epoch 5 | 1095/ 2800 batches | train loss 0.4221064 +| epoch 5 | 1099/ 2800 batches | train loss 0.4023090 +| epoch 5 | 1103/ 2800 batches | train loss 0.3801173 +| epoch 5 | 1107/ 2800 batches | train loss 0.5417129 +| epoch 5 | 1111/ 2800 batches | train loss 0.4213092 +| epoch 5 | 1115/ 2800 batches | train loss 0.3856134 +| epoch 5 | 1119/ 2800 batches | train loss 0.3369890 +| epoch 5 | 1123/ 2800 batches | train loss 0.4813277 +| epoch 5 | 1127/ 2800 batches | train loss 0.4375792 +| epoch 5 | 1131/ 2800 batches | train loss 0.3547794 +| epoch 5 | 1135/ 2800 batches | train loss 0.4352936 +| epoch 5 | 1139/ 2800 batches | train loss 0.4262182 +| epoch 5 | 1143/ 2800 batches | train loss 0.3390806 +| epoch 5 | 1147/ 2800 batches | train loss 0.3702293 +| epoch 5 | 1151/ 2800 batches | train loss 0.3409397 +| epoch 5 | 1155/ 2800 batches | train loss 0.4443011 +| epoch 5 | 1159/ 2800 batches | train loss 0.3627850 +| epoch 5 | 1163/ 2800 batches | train loss 0.3560613 +| epoch 5 | 1167/ 2800 batches | train loss 0.3896838 +| epoch 5 | 1171/ 2800 batches | train loss 0.3864224 +| epoch 5 | 1175/ 2800 batches | train loss 0.3743239 +| epoch 5 | 1179/ 2800 batches | train loss 0.4581582 +| epoch 5 | 1183/ 2800 batches | train loss 0.4051726 +| epoch 5 | 1187/ 2800 batches | train loss 0.4670490 +| epoch 5 | 1191/ 2800 batches | train loss 0.3823254 +| epoch 5 | 1195/ 2800 batches | train loss 0.3512160 +| epoch 5 | 1199/ 2800 batches | train loss 0.4096496 +| epoch 5 | 1203/ 2800 batches | train loss 0.3794620 +| epoch 5 | 1207/ 2800 batches | train loss 0.5081843 +| epoch 5 | 1211/ 2800 batches | train loss 0.3119145 +| epoch 5 | 1215/ 2800 batches | train loss 0.4220861 +| epoch 5 | 1219/ 2800 batches | train loss 0.4072877 +| epoch 5 | 1223/ 2800 batches | train loss 0.4375113 +| epoch 5 | 1227/ 2800 batches | train loss 0.4039557 +| epoch 5 | 1231/ 2800 batches | train loss 0.4506795 +| epoch 5 | 1235/ 2800 batches | train loss 0.3730647 +| epoch 5 | 1239/ 2800 batches | train loss 0.4168418 +| epoch 5 | 1243/ 2800 batches | train loss 0.4748657 +| epoch 5 | 1247/ 2800 batches | train loss 0.3903334 +| epoch 5 | 1251/ 2800 batches | train loss 0.4157518 +| epoch 5 | 1255/ 2800 batches | train loss 0.4127036 +| epoch 5 | 1259/ 2800 batches | train loss 0.4026989 +| epoch 5 | 1263/ 2800 batches | train loss 0.5519112 +| epoch 5 | 1267/ 2800 batches | train loss 0.4176034 +| epoch 5 | 1271/ 2800 batches | train loss 0.2656506 +| epoch 5 | 1275/ 2800 batches | train loss 0.4526432 +| epoch 5 | 1279/ 2800 batches | train loss 0.3423672 +| epoch 5 | 1283/ 2800 batches | train loss 0.3548142 +| epoch 5 | 1287/ 2800 batches | train loss 0.3478134 +| epoch 5 | 1291/ 2800 batches | train loss 0.4252136 +| epoch 5 | 1295/ 2800 batches | train loss 0.4250453 +| epoch 5 | 1299/ 2800 batches | train loss 0.4275368 +| epoch 5 | 1303/ 2800 batches | train loss 0.4178499 +| epoch 5 | 1307/ 2800 batches | train loss 0.4417329 +| epoch 5 | 1311/ 2800 batches | train loss 0.3592115 +| epoch 5 | 1315/ 2800 batches | train loss 0.4301079 +| epoch 5 | 1319/ 2800 batches | train loss 0.4092982 +| epoch 5 | 1323/ 2800 batches | train loss 0.3531408 +| epoch 5 | 1327/ 2800 batches | train loss 0.4269395 +| epoch 5 | 1331/ 2800 batches | train loss 0.4461159 +| epoch 5 | 1335/ 2800 batches | train loss 0.4140298 +| epoch 5 | 1339/ 2800 batches | train loss 0.3589274 +| epoch 5 | 1343/ 2800 batches | train loss 0.3636680 +| epoch 5 | 1347/ 2800 batches | train loss 0.4468865 +| epoch 5 | 1351/ 2800 batches | train loss 0.3412932 +| epoch 5 | 1355/ 2800 batches | train loss 0.4843794 +| epoch 5 | 1359/ 2800 batches | train loss 0.4650464 +| epoch 5 | 1363/ 2800 batches | train loss 0.4188645 +| epoch 5 | 1367/ 2800 batches | train loss 0.3756415 +| epoch 5 | 1371/ 2800 batches | train loss 0.4437618 +| epoch 5 | 1375/ 2800 batches | train loss 0.5518322 +| epoch 5 | 1379/ 2800 batches | train loss 0.4466232 +| epoch 5 | 1383/ 2800 batches | train loss 0.3869067 +| epoch 5 | 1387/ 2800 batches | train loss 0.3841470 +| epoch 5 | 1391/ 2800 batches | train loss 0.4843955 +| epoch 5 | 1395/ 2800 batches | train loss 0.3874288 +| epoch 5 | 1399/ 2800 batches | train loss 0.4290257 +| epoch 5 | 1403/ 2800 batches | train loss 0.3586856 +| epoch 5 | 1407/ 2800 batches | train loss 0.4080085 +| epoch 5 | 1411/ 2800 batches | train loss 0.3535740 +| epoch 5 | 1415/ 2800 batches | train loss 0.3760005 +| epoch 5 | 1419/ 2800 batches | train loss 0.4323970 +| epoch 5 | 1423/ 2800 batches | train loss 0.3955452 +| epoch 5 | 1427/ 2800 batches | train loss 0.4795513 +| epoch 5 | 1431/ 2800 batches | train loss 0.3799176 +| epoch 5 | 1435/ 2800 batches | train loss 0.3945886 +| epoch 5 | 1439/ 2800 batches | train loss 0.4275368 +| epoch 5 | 1443/ 2800 batches | train loss 0.4095129 +| epoch 5 | 1447/ 2800 batches | train loss 0.4886277 +| epoch 5 | 1451/ 2800 batches | train loss 0.3846238 +| epoch 5 | 1455/ 2800 batches | train loss 0.4120297 +| epoch 5 | 1459/ 2800 batches | train loss 0.4056121 +| epoch 5 | 1463/ 2800 batches | train loss 0.3057043 +| epoch 5 | 1467/ 2800 batches | train loss 0.4621797 +| epoch 5 | 1471/ 2800 batches | train loss 0.3582169 +| epoch 5 | 1475/ 2800 batches | train loss 0.4194845 +| epoch 5 | 1479/ 2800 batches | train loss 0.4508489 +| epoch 5 | 1483/ 2800 batches | train loss 0.4553166 +| epoch 5 | 1487/ 2800 batches | train loss 0.4137121 +| epoch 5 | 1491/ 2800 batches | train loss 0.4081466 +| epoch 5 | 1495/ 2800 batches | train loss 0.4182070 +| epoch 5 | 1499/ 2800 batches | train loss 0.4129955 +| epoch 5 | 1503/ 2800 batches | train loss 0.3617932 +| epoch 5 | 1507/ 2800 batches | train loss 0.3902134 +| epoch 5 | 1511/ 2800 batches | train loss 0.3573759 +| epoch 5 | 1515/ 2800 batches | train loss 0.2975397 +| epoch 5 | 1519/ 2800 batches | train loss 0.3158749 +| epoch 5 | 1523/ 2800 batches | train loss 0.4897681 +| epoch 5 | 1527/ 2800 batches | train loss 0.4487871 +| epoch 5 | 1531/ 2800 batches | train loss 0.3814488 +| epoch 5 | 1535/ 2800 batches | train loss 0.4958942 +| epoch 5 | 1539/ 2800 batches | train loss 0.3654239 +| epoch 5 | 1543/ 2800 batches | train loss 0.3365416 +| epoch 5 | 1547/ 2800 batches | train loss 0.3913418 +| epoch 5 | 1551/ 2800 batches | train loss 0.4841289 +| epoch 5 | 1555/ 2800 batches | train loss 0.4060117 +| epoch 5 | 1559/ 2800 batches | train loss 0.4191172 +| epoch 5 | 1563/ 2800 batches | train loss 0.4188219 +| epoch 5 | 1567/ 2800 batches | train loss 0.4070101 +| epoch 5 | 1571/ 2800 batches | train loss 0.4627681 +| epoch 5 | 1575/ 2800 batches | train loss 0.3295499 +| epoch 5 | 1579/ 2800 batches | train loss 0.3914023 +| epoch 5 | 1583/ 2800 batches | train loss 0.4149337 +| epoch 5 | 1587/ 2800 batches | train loss 0.3905392 +| epoch 5 | 1591/ 2800 batches | train loss 0.3482262 +| epoch 5 | 1595/ 2800 batches | train loss 0.3831239 +| epoch 5 | 1599/ 2800 batches | train loss 0.3988171 +| epoch 5 | 1603/ 2800 batches | train loss 0.3849890 +| epoch 5 | 1607/ 2800 batches | train loss 0.4039187 +| epoch 5 | 1611/ 2800 batches | train loss 0.3980380 +| epoch 5 | 1615/ 2800 batches | train loss 0.4146959 +| epoch 5 | 1619/ 2800 batches | train loss 0.5812357 +| epoch 5 | 1623/ 2800 batches | train loss 0.3437328 +| epoch 5 | 1627/ 2800 batches | train loss 0.3561834 +| epoch 5 | 1631/ 2800 batches | train loss 0.3973020 +| epoch 5 | 1635/ 2800 batches | train loss 0.4435305 +| epoch 5 | 1639/ 2800 batches | train loss 0.4247757 +| epoch 5 | 1643/ 2800 batches | train loss 0.3507839 +| epoch 5 | 1647/ 2800 batches | train loss 0.4154149 +| epoch 5 | 1651/ 2800 batches | train loss 0.4194526 +| epoch 5 | 1655/ 2800 batches | train loss 0.4719496 +| epoch 5 | 1659/ 2800 batches | train loss 0.3654176 +| epoch 5 | 1663/ 2800 batches | train loss 0.4096958 +| epoch 5 | 1667/ 2800 batches | train loss 0.4574173 +| epoch 5 | 1671/ 2800 batches | train loss 0.5491189 +| epoch 5 | 1675/ 2800 batches | train loss 0.4159433 +| epoch 5 | 1679/ 2800 batches | train loss 0.4658981 +| epoch 5 | 1683/ 2800 batches | train loss 0.4126450 +| epoch 5 | 1687/ 2800 batches | train loss 0.3928214 +| epoch 5 | 1691/ 2800 batches | train loss 0.4137586 +| epoch 5 | 1695/ 2800 batches | train loss 0.3769796 +| epoch 5 | 1699/ 2800 batches | train loss 0.3925887 +| epoch 5 | 1703/ 2800 batches | train loss 0.4471868 +| epoch 5 | 1707/ 2800 batches | train loss 0.4175402 +| epoch 5 | 1711/ 2800 batches | train loss 0.3805140 +| epoch 5 | 1715/ 2800 batches | train loss 0.3650576 +| epoch 5 | 1719/ 2800 batches | train loss 0.4037941 +| epoch 5 | 1723/ 2800 batches | train loss 0.4102476 +| epoch 5 | 1727/ 2800 batches | train loss 0.3570808 +| epoch 5 | 1731/ 2800 batches | train loss 0.3965490 +| epoch 5 | 1735/ 2800 batches | train loss 0.4140968 +| epoch 5 | 1739/ 2800 batches | train loss 0.4174324 +| epoch 5 | 1743/ 2800 batches | train loss 0.3665716 +| epoch 5 | 1747/ 2800 batches | train loss 0.3573655 +| epoch 5 | 1751/ 2800 batches | train loss 0.4524181 +| epoch 5 | 1755/ 2800 batches | train loss 0.4835252 +| epoch 5 | 1759/ 2800 batches | train loss 0.3546021 +| epoch 5 | 1763/ 2800 batches | train loss 0.4726351 +| epoch 5 | 1767/ 2800 batches | train loss 0.4677343 +| epoch 5 | 1771/ 2800 batches | train loss 0.5125272 +| epoch 5 | 1775/ 2800 batches | train loss 0.3831500 +| epoch 5 | 1779/ 2800 batches | train loss 0.4474675 +| epoch 5 | 1783/ 2800 batches | train loss 0.4126096 +| epoch 5 | 1787/ 2800 batches | train loss 0.4264260 +| epoch 5 | 1791/ 2800 batches | train loss 0.4569139 +| epoch 5 | 1795/ 2800 batches | train loss 0.3724079 +| epoch 5 | 1799/ 2800 batches | train loss 0.3629231 +| epoch 5 | 1803/ 2800 batches | train loss 0.3656326 +| epoch 5 | 1807/ 2800 batches | train loss 0.4096439 +| epoch 5 | 1811/ 2800 batches | train loss 0.3984789 +| epoch 5 | 1815/ 2800 batches | train loss 0.4119842 +| epoch 5 | 1819/ 2800 batches | train loss 0.2094076 +| epoch 5 | 1823/ 2800 batches | train loss 0.4053899 +| epoch 5 | 1827/ 2800 batches | train loss 0.3892902 +| epoch 5 | 1831/ 2800 batches | train loss 0.3742354 +| epoch 5 | 1835/ 2800 batches | train loss 0.4873109 +| epoch 5 | 1839/ 2800 batches | train loss 0.3878035 +| epoch 5 | 1843/ 2800 batches | train loss 0.3908200 +| epoch 5 | 1847/ 2800 batches | train loss 0.4494884 +| epoch 5 | 1851/ 2800 batches | train loss 0.3738004 +| epoch 5 | 1855/ 2800 batches | train loss 0.3862329 +| epoch 5 | 1859/ 2800 batches | train loss 0.3688470 +| epoch 5 | 1863/ 2800 batches | train loss 0.4077012 +| epoch 5 | 1867/ 2800 batches | train loss 0.3769209 +| epoch 5 | 1871/ 2800 batches | train loss 0.4787306 +| epoch 5 | 1875/ 2800 batches | train loss 0.4903888 +| epoch 5 | 1879/ 2800 batches | train loss 0.4000450 +| epoch 5 | 1883/ 2800 batches | train loss 0.3438305 +| epoch 5 | 1887/ 2800 batches | train loss 0.4573304 +| epoch 5 | 1891/ 2800 batches | train loss 0.4682743 +| epoch 5 | 1895/ 2800 batches | train loss 0.4829013 +| epoch 5 | 1899/ 2800 batches | train loss 0.4632179 +| epoch 5 | 1903/ 2800 batches | train loss 0.4989810 +| epoch 5 | 1907/ 2800 batches | train loss 0.4992064 +| epoch 5 | 1911/ 2800 batches | train loss 0.3652018 +| epoch 5 | 1915/ 2800 batches | train loss 0.3652729 +| epoch 5 | 1919/ 2800 batches | train loss 0.4552694 +| epoch 5 | 1923/ 2800 batches | train loss 0.3636026 +| epoch 5 | 1927/ 2800 batches | train loss 0.5446275 +| epoch 5 | 1931/ 2800 batches | train loss 0.4418726 +| epoch 5 | 1935/ 2800 batches | train loss 0.5393034 +| epoch 5 | 1939/ 2800 batches | train loss 0.4182689 +| epoch 5 | 1943/ 2800 batches | train loss 0.4418222 +| epoch 5 | 1947/ 2800 batches | train loss 0.4353607 +| epoch 5 | 1951/ 2800 batches | train loss 0.3208365 +| epoch 5 | 1955/ 2800 batches | train loss 0.3597898 +| epoch 5 | 1959/ 2800 batches | train loss 0.4219513 +| epoch 5 | 1963/ 2800 batches | train loss 0.4695338 +| epoch 5 | 1967/ 2800 batches | train loss 0.5033591 +| epoch 5 | 1971/ 2800 batches | train loss 0.4865420 +| epoch 5 | 1975/ 2800 batches | train loss 0.4180202 +| epoch 5 | 1979/ 2800 batches | train loss 0.4661372 +| epoch 5 | 1983/ 2800 batches | train loss 0.4270057 +| epoch 5 | 1987/ 2800 batches | train loss 0.4256161 +| epoch 5 | 1991/ 2800 batches | train loss 0.4109860 +| epoch 5 | 1995/ 2800 batches | train loss 0.3436701 +| epoch 5 | 1999/ 2800 batches | train loss 0.3747688 +| epoch 5 | 2003/ 2800 batches | train loss 0.4057989 +| epoch 5 | 2007/ 2800 batches | train loss 0.4703327 +| epoch 5 | 2011/ 2800 batches | train loss 0.4113738 +| epoch 5 | 2015/ 2800 batches | train loss 0.4134963 +| epoch 5 | 2019/ 2800 batches | train loss 0.4117139 +| epoch 5 | 2023/ 2800 batches | train loss 0.4365504 +| epoch 5 | 2027/ 2800 batches | train loss 0.4276523 +| epoch 5 | 2031/ 2800 batches | train loss 0.5301428 +| epoch 5 | 2035/ 2800 batches | train loss 0.4193190 +| epoch 5 | 2039/ 2800 batches | train loss 0.4116669 +| epoch 5 | 2043/ 2800 batches | train loss 0.4692869 +| epoch 5 | 2047/ 2800 batches | train loss 0.4270789 +| epoch 5 | 2051/ 2800 batches | train loss 0.3895268 +| epoch 5 | 2055/ 2800 batches | train loss 0.4556011 +| epoch 5 | 2059/ 2800 batches | train loss 0.4354300 +| epoch 5 | 2063/ 2800 batches | train loss 0.4244687 +| epoch 5 | 2067/ 2800 batches | train loss 0.3838458 +| epoch 5 | 2071/ 2800 batches | train loss 0.4172718 +| epoch 5 | 2075/ 2800 batches | train loss 0.4389495 +| epoch 5 | 2079/ 2800 batches | train loss 0.4285513 +| epoch 5 | 2083/ 2800 batches | train loss 0.4536281 +| epoch 5 | 2087/ 2800 batches | train loss 0.4710895 +| epoch 5 | 2091/ 2800 batches | train loss 0.3920019 +| epoch 5 | 2095/ 2800 batches | train loss 0.3812258 +| epoch 5 | 2099/ 2800 batches | train loss 0.4510913 +| epoch 5 | 2103/ 2800 batches | train loss 0.4156591 +| epoch 5 | 2107/ 2800 batches | train loss 0.4298744 +| epoch 5 | 2111/ 2800 batches | train loss 0.3932872 +| epoch 5 | 2115/ 2800 batches | train loss 0.4631914 +| epoch 5 | 2119/ 2800 batches | train loss 0.4191380 +| epoch 5 | 2123/ 2800 batches | train loss 0.4169301 +| epoch 5 | 2127/ 2800 batches | train loss 0.4608860 +| epoch 5 | 2131/ 2800 batches | train loss 0.3949450 +| epoch 5 | 2135/ 2800 batches | train loss 0.4379472 +| epoch 5 | 2139/ 2800 batches | train loss 0.4030553 +| epoch 5 | 2143/ 2800 batches | train loss 0.3727344 +| epoch 5 | 2147/ 2800 batches | train loss 0.4677110 +| epoch 5 | 2151/ 2800 batches | train loss 0.4540632 +| epoch 5 | 2155/ 2800 batches | train loss 0.4044628 +| epoch 5 | 2159/ 2800 batches | train loss 0.3508415 +| epoch 5 | 2163/ 2800 batches | train loss 0.3848715 +| epoch 5 | 2167/ 2800 batches | train loss 0.4729264 +| epoch 5 | 2171/ 2800 batches | train loss 0.4100918 +| epoch 5 | 2175/ 2800 batches | train loss 0.4395277 +| epoch 5 | 2179/ 2800 batches | train loss 0.3930213 +| epoch 5 | 2183/ 2800 batches | train loss 0.3615819 +| epoch 5 | 2187/ 2800 batches | train loss 0.4146310 +| epoch 5 | 2191/ 2800 batches | train loss 0.4591349 +| epoch 5 | 2195/ 2800 batches | train loss 0.4194365 +| epoch 5 | 2199/ 2800 batches | train loss 0.4327492 +| epoch 5 | 2203/ 2800 batches | train loss 0.3539978 +| epoch 5 | 2207/ 2800 batches | train loss 0.4363554 +| epoch 5 | 2211/ 2800 batches | train loss 0.3834204 +| epoch 5 | 2215/ 2800 batches | train loss 0.4147828 +| epoch 5 | 2219/ 2800 batches | train loss 0.4721999 +| epoch 5 | 2223/ 2800 batches | train loss 0.3674442 +| epoch 5 | 2227/ 2800 batches | train loss 0.4006568 +| epoch 5 | 2231/ 2800 batches | train loss 0.4841483 +| epoch 5 | 2235/ 2800 batches | train loss 0.4142972 +| epoch 5 | 2239/ 2800 batches | train loss 0.5075034 +| epoch 5 | 2243/ 2800 batches | train loss 0.5659838 +| epoch 5 | 2247/ 2800 batches | train loss 0.4239153 +| epoch 5 | 2251/ 2800 batches | train loss 0.3838190 +| epoch 5 | 2255/ 2800 batches | train loss 0.4031565 +| epoch 5 | 2259/ 2800 batches | train loss 0.3540463 +| epoch 5 | 2263/ 2800 batches | train loss 0.4473625 +| epoch 5 | 2267/ 2800 batches | train loss 0.3992963 +| epoch 5 | 2271/ 2800 batches | train loss 0.3282120 +| epoch 5 | 2275/ 2800 batches | train loss 0.4798153 +| epoch 5 | 2279/ 2800 batches | train loss 0.4229521 +| epoch 5 | 2283/ 2800 batches | train loss 0.4200797 +| epoch 5 | 2287/ 2800 batches | train loss 0.5206702 +| epoch 5 | 2291/ 2800 batches | train loss 0.4103613 +| epoch 5 | 2295/ 2800 batches | train loss 0.4668134 +| epoch 5 | 2299/ 2800 batches | train loss 0.4425214 +| epoch 5 | 2303/ 2800 batches | train loss 0.4328211 +| epoch 5 | 2307/ 2800 batches | train loss 0.4138513 +| epoch 5 | 2311/ 2800 batches | train loss 0.4531565 +| epoch 5 | 2315/ 2800 batches | train loss 0.3770909 +| epoch 5 | 2319/ 2800 batches | train loss 0.4277920 +| epoch 5 | 2323/ 2800 batches | train loss 0.4153822 +| epoch 5 | 2327/ 2800 batches | train loss 0.4537163 +| epoch 5 | 2331/ 2800 batches | train loss 0.4749673 +| epoch 5 | 2335/ 2800 batches | train loss 0.4000696 +| epoch 5 | 2339/ 2800 batches | train loss 0.3748748 +| epoch 5 | 2343/ 2800 batches | train loss 0.2685018 +| epoch 5 | 2347/ 2800 batches | train loss 0.3719286 +| epoch 5 | 2351/ 2800 batches | train loss 0.3639136 +| epoch 5 | 2355/ 2800 batches | train loss 0.4481221 +| epoch 5 | 2359/ 2800 batches | train loss 0.3761613 +| epoch 5 | 2363/ 2800 batches | train loss 0.3910609 +| epoch 5 | 2367/ 2800 batches | train loss 0.4412404 +| epoch 5 | 2371/ 2800 batches | train loss 0.4100659 +| epoch 5 | 2375/ 2800 batches | train loss 0.5066139 +| epoch 5 | 2379/ 2800 batches | train loss 0.3456586 +| epoch 5 | 2383/ 2800 batches | train loss 0.4628463 +| epoch 5 | 2387/ 2800 batches | train loss 0.3817035 +| epoch 5 | 2391/ 2800 batches | train loss 0.4733225 +| epoch 5 | 2395/ 2800 batches | train loss 0.4232987 +| epoch 5 | 2399/ 2800 batches | train loss 0.3422531 +| epoch 5 | 2403/ 2800 batches | train loss 0.5629946 +| epoch 5 | 2407/ 2800 batches | train loss 0.4180157 +| epoch 5 | 2411/ 2800 batches | train loss 0.3302338 +| epoch 5 | 2415/ 2800 batches | train loss 0.4563596 +| epoch 5 | 2419/ 2800 batches | train loss 0.3883771 +| epoch 5 | 2423/ 2800 batches | train loss 0.3007491 +| epoch 5 | 2427/ 2800 batches | train loss 0.4700952 +| epoch 5 | 2431/ 2800 batches | train loss 0.4843003 +| epoch 5 | 2435/ 2800 batches | train loss 0.4154576 +| epoch 5 | 2439/ 2800 batches | train loss 0.5003462 +| epoch 5 | 2443/ 2800 batches | train loss 0.3919440 +| epoch 5 | 2447/ 2800 batches | train loss 0.4547766 +| epoch 5 | 2451/ 2800 batches | train loss 0.3826733 +| epoch 5 | 2455/ 2800 batches | train loss 0.3666398 +| epoch 5 | 2459/ 2800 batches | train loss 0.4567145 +| epoch 5 | 2463/ 2800 batches | train loss 0.3855523 +| epoch 5 | 2467/ 2800 batches | train loss 0.4207748 +| epoch 5 | 2471/ 2800 batches | train loss 0.3969280 +| epoch 5 | 2475/ 2800 batches | train loss 0.4306755 +| epoch 5 | 2479/ 2800 batches | train loss 0.4411299 +| epoch 5 | 2483/ 2800 batches | train loss 0.3742284 +| epoch 5 | 2487/ 2800 batches | train loss 0.3919124 +| epoch 5 | 2491/ 2800 batches | train loss 0.3721779 +| epoch 5 | 2495/ 2800 batches | train loss 0.3302924 +| epoch 5 | 2499/ 2800 batches | train loss 0.3972465 +| epoch 5 | 2503/ 2800 batches | train loss 0.3807420 +| epoch 5 | 2507/ 2800 batches | train loss 0.4670558 +| epoch 5 | 2511/ 2800 batches | train loss 0.5555319 +| epoch 5 | 2515/ 2800 batches | train loss 0.3792816 +| epoch 5 | 2519/ 2800 batches | train loss 0.4070380 +| epoch 5 | 2523/ 2800 batches | train loss 0.3918220 +| epoch 5 | 2527/ 2800 batches | train loss 0.4898818 +| epoch 5 | 2531/ 2800 batches | train loss 0.4385929 +| epoch 5 | 2535/ 2800 batches | train loss 0.4144335 +| epoch 5 | 2539/ 2800 batches | train loss 0.3996956 +| epoch 5 | 2543/ 2800 batches | train loss 0.4571822 +| epoch 5 | 2547/ 2800 batches | train loss 0.3999964 +| epoch 5 | 2551/ 2800 batches | train loss 0.5343997 +| epoch 5 | 2555/ 2800 batches | train loss 0.4012869 +| epoch 5 | 2559/ 2800 batches | train loss 0.5088413 +| epoch 5 | 2563/ 2800 batches | train loss 0.4260070 +| epoch 5 | 2567/ 2800 batches | train loss 0.3409905 +| epoch 5 | 2571/ 2800 batches | train loss 0.3851509 +| epoch 5 | 2575/ 2800 batches | train loss 0.4280163 +| epoch 5 | 2579/ 2800 batches | train loss 0.3926377 +| epoch 5 | 2583/ 2800 batches | train loss 0.4385846 +| epoch 5 | 2587/ 2800 batches | train loss 0.4146294 +| epoch 5 | 2591/ 2800 batches | train loss 0.4223447 +| epoch 5 | 2595/ 2800 batches | train loss 0.4314744 +| epoch 5 | 2599/ 2800 batches | train loss 0.5679001 +| epoch 5 | 2603/ 2800 batches | train loss 0.3701520 +| epoch 5 | 2607/ 2800 batches | train loss 0.4253659 +| epoch 5 | 2611/ 2800 batches | train loss 0.4336016 +| epoch 5 | 2615/ 2800 batches | train loss 0.4413290 +| epoch 5 | 2619/ 2800 batches | train loss 0.4794556 +| epoch 5 | 2623/ 2800 batches | train loss 0.4364986 +| epoch 5 | 2627/ 2800 batches | train loss 0.4685789 +| epoch 5 | 2631/ 2800 batches | train loss 0.4327176 +| epoch 5 | 2635/ 2800 batches | train loss 0.3926595 +| epoch 5 | 2639/ 2800 batches | train loss 0.4165643 +| epoch 5 | 2643/ 2800 batches | train loss 0.4722287 +| epoch 5 | 2647/ 2800 batches | train loss 0.3962935 +| epoch 5 | 2651/ 2800 batches | train loss 0.3777874 +| epoch 5 | 2655/ 2800 batches | train loss 0.3294331 +| epoch 5 | 2659/ 2800 batches | train loss 0.3409679 +| epoch 5 | 2663/ 2800 batches | train loss 0.3982848 +| epoch 5 | 2667/ 2800 batches | train loss 0.4954495 +| epoch 5 | 2671/ 2800 batches | train loss 0.3694321 +| epoch 5 | 2675/ 2800 batches | train loss 0.3775035 +| epoch 5 | 2679/ 2800 batches | train loss 0.3466995 +| epoch 5 | 2683/ 2800 batches | train loss 0.3595048 +| epoch 5 | 2687/ 2800 batches | train loss 0.4046527 +| epoch 5 | 2691/ 2800 batches | train loss 0.4217512 +| epoch 5 | 2695/ 2800 batches | train loss 0.4016856 +| epoch 5 | 2699/ 2800 batches | train loss 0.4317846 +| epoch 5 | 2703/ 2800 batches | train loss 0.4044623 +| epoch 5 | 2707/ 2800 batches | train loss 0.5230591 +| epoch 5 | 2711/ 2800 batches | train loss 0.4830320 +| epoch 5 | 2715/ 2800 batches | train loss 0.4308186 +| epoch 5 | 2719/ 2800 batches | train loss 0.4097176 +| epoch 5 | 2723/ 2800 batches | train loss 0.4464365 +| epoch 5 | 2727/ 2800 batches | train loss 0.3587511 +| epoch 5 | 2731/ 2800 batches | train loss 0.3777575 +| epoch 5 | 2735/ 2800 batches | train loss 0.3932522 +| epoch 5 | 2739/ 2800 batches | train loss 0.4238059 +| epoch 5 | 2743/ 2800 batches | train loss 0.4400680 +| epoch 5 | 2747/ 2800 batches | train loss 0.4034806 +| epoch 5 | 2751/ 2800 batches | train loss 0.5110314 +| epoch 5 | 2755/ 2800 batches | train loss 0.4370120 +| epoch 5 | 2759/ 2800 batches | train loss 0.4319288 +| epoch 5 | 2763/ 2800 batches | train loss 0.5152006 +| epoch 5 | 2767/ 2800 batches | train loss 0.5174773 +| epoch 5 | 2771/ 2800 batches | train loss 0.3775792 +| epoch 5 | 2775/ 2800 batches | train loss 0.3373286 +| epoch 5 | 2779/ 2800 batches | train loss 0.3629774 +| epoch 5 | 2783/ 2800 batches | train loss 0.3864515 +| epoch 5 | 2787/ 2800 batches | train loss 0.4613822 +| epoch 5 | 2791/ 2800 batches | train loss 0.4285756 +| epoch 5 | 2795/ 2800 batches | train loss 0.3558514 +| epoch 5 | 2799/ 2800 batches | train loss 0.4877154 +-------------------------------------------------------------------------------- +| epoch 5 | 3/ 2800 batches | test loss 0.6191929 +| epoch 5 | 7/ 2800 batches | test loss 0.3715943 +| epoch 5 | 11/ 2800 batches | test loss 0.4346339 +| epoch 5 | 15/ 2800 batches | test loss 0.4802201 +| epoch 5 | 19/ 2800 batches | test loss 0.5525638 +| epoch 5 | 23/ 2800 batches | test loss 0.5232369 +| epoch 5 | 27/ 2800 batches | test loss 0.4242901 +| epoch 5 | 31/ 2800 batches | test loss 0.4914761 +| epoch 5 | 35/ 2800 batches | test loss 0.4703593 +| epoch 5 | 39/ 2800 batches | test loss 0.4773129 +| epoch 5 | 43/ 2800 batches | test loss 0.3900209 +| epoch 5 | 47/ 2800 batches | test loss 0.4391286 +| epoch 5 | 51/ 2800 batches | test loss 0.4981300 +| epoch 5 | 55/ 2800 batches | test loss 0.4649750 +| epoch 5 | 59/ 2800 batches | test loss 0.4394449 +| epoch 5 | 63/ 2800 batches | test loss 0.4607123 +| epoch 5 | 67/ 2800 batches | test loss 0.4963171 +| epoch 5 | 71/ 2800 batches | test loss 0.5054814 +| epoch 5 | 75/ 2800 batches | test loss 0.3547502 +| epoch 5 | 79/ 2800 batches | test loss 0.4749043 +| epoch 5 | 83/ 2800 batches | test loss 0.4657522 +| epoch 5 | 87/ 2800 batches | test loss 0.4799162 +| epoch 5 | 91/ 2800 batches | test loss 0.4267209 +| epoch 5 | 95/ 2800 batches | test loss 0.4637982 +| epoch 5 | 99/ 2800 batches | test loss 0.5488644 +| epoch 5 | 103/ 2800 batches | test loss 0.4335110 +| epoch 5 | 107/ 2800 batches | test loss 0.4263653 +| epoch 5 | 111/ 2800 batches | test loss 0.3994005 +| epoch 5 | 115/ 2800 batches | test loss 0.4634360 +| epoch 5 | 119/ 2800 batches | test loss 0.5529593 +| epoch 5 | 123/ 2800 batches | test loss 0.3561338 +| epoch 5 | 127/ 2800 batches | test loss 0.4154443 +| epoch 5 | 131/ 2800 batches | test loss 0.4463313 +| epoch 5 | 135/ 2800 batches | test loss 0.3803678 +| epoch 5 | 139/ 2800 batches | test loss 0.3357319 +| epoch 5 | 143/ 2800 batches | test loss 0.3952062 +| epoch 5 | 147/ 2800 batches | test loss 0.4413412 +| epoch 5 | 151/ 2800 batches | test loss 0.4636185 +| epoch 5 | 155/ 2800 batches | test loss 0.4264198 +| epoch 5 | 159/ 2800 batches | test loss 0.4570908 +| epoch 5 | 163/ 2800 batches | test loss 0.4013911 +| epoch 5 | 167/ 2800 batches | test loss 0.5058808 +| epoch 5 | 171/ 2800 batches | test loss 0.4270197 +| epoch 5 | 175/ 2800 batches | test loss 0.4753033 +| epoch 5 | 179/ 2800 batches | test loss 0.4452955 +| epoch 5 | 183/ 2800 batches | test loss 0.4458138 +| epoch 5 | 187/ 2800 batches | test loss 0.3785246 +| epoch 5 | 191/ 2800 batches | test loss 0.3862970 +| epoch 5 | 195/ 2800 batches | test loss 0.4324162 +| epoch 5 | 199/ 2800 batches | test loss 0.7402608 +| epoch 5 | 203/ 2800 batches | test loss 0.4079690 +| epoch 5 | 207/ 2800 batches | test loss 0.3702506 +| epoch 5 | 211/ 2800 batches | test loss 0.4311636 +| epoch 5 | 215/ 2800 batches | test loss 0.4616240 +| epoch 5 | 219/ 2800 batches | test loss 0.5534256 +| epoch 5 | 223/ 2800 batches | test loss 0.5167660 +| epoch 5 | 227/ 2800 batches | test loss 0.4453467 +| epoch 5 | 231/ 2800 batches | test loss 0.4244707 +| epoch 5 | 235/ 2800 batches | test loss 0.3864086 +| epoch 5 | 239/ 2800 batches | test loss 0.4150688 +| epoch 5 | 243/ 2800 batches | test loss 0.4167249 +| epoch 5 | 247/ 2800 batches | test loss 0.4863200 +| epoch 5 | 251/ 2800 batches | test loss 0.4194565 +| epoch 5 | 255/ 2800 batches | test loss 0.3894130 +| epoch 5 | 259/ 2800 batches | test loss 0.4556327 +| epoch 5 | 263/ 2800 batches | test loss 0.4850999 +| epoch 5 | 267/ 2800 batches | test loss 0.3390546 +| epoch 5 | 271/ 2800 batches | test loss 0.3846282 +| epoch 5 | 275/ 2800 batches | test loss 0.4408176 +| epoch 5 | 279/ 2800 batches | test loss 0.3804339 +| epoch 5 | 283/ 2800 batches | test loss 0.3235270 +| epoch 5 | 287/ 2800 batches | test loss 0.4495099 +| epoch 5 | 291/ 2800 batches | test loss 0.4685192 +| epoch 5 | 295/ 2800 batches | test loss 0.3726254 +| epoch 5 | 299/ 2800 batches | test loss 0.3442868 +| epoch 5 | 303/ 2800 batches | test loss 0.4595016 +| epoch 5 | 307/ 2800 batches | test loss 0.4452932 +| epoch 5 | 311/ 2800 batches | test loss 0.5314672 +| epoch 5 | 315/ 2800 batches | test loss 0.4764813 +| epoch 5 | 319/ 2800 batches | test loss 0.4713638 +| epoch 5 | 323/ 2800 batches | test loss 0.3828391 +| epoch 5 | 327/ 2800 batches | test loss 0.3669626 +| epoch 5 | 331/ 2800 batches | test loss 0.4065652 +| epoch 5 | 335/ 2800 batches | test loss 0.4588050 +| epoch 5 | 339/ 2800 batches | test loss 0.3633033 +| epoch 5 | 343/ 2800 batches | test loss 0.4499413 +| epoch 5 | 347/ 2800 batches | test loss 0.3913086 +| epoch 5 | 351/ 2800 batches | test loss 0.4424137 +| epoch 5 | 355/ 2800 batches | test loss 0.4047118 +| epoch 5 | 359/ 2800 batches | test loss 0.4876114 +| epoch 5 | 363/ 2800 batches | test loss 0.4171421 +| epoch 5 | 367/ 2800 batches | test loss 0.5187622 +| epoch 5 | 371/ 2800 batches | test loss 0.4418567 +| epoch 5 | 375/ 2800 batches | test loss 0.4880649 +| epoch 5 | 379/ 2800 batches | test loss 0.4938836 +| epoch 5 | 383/ 2800 batches | test loss 0.5696413 +| epoch 5 | 387/ 2800 batches | test loss 0.4836606 +| epoch 5 | 391/ 2800 batches | test loss 0.5365367 +| epoch 5 | 395/ 2800 batches | test loss 0.4250683 +| epoch 5 | 399/ 2800 batches | test loss 0.4735498 +| epoch 5 | 403/ 2800 batches | test loss 0.4900396 +| epoch 5 | 407/ 2800 batches | test loss 0.5457038 +| epoch 5 | 411/ 2800 batches | test loss 0.4279185 +| epoch 5 | 415/ 2800 batches | test loss 0.4845679 +| epoch 5 | 419/ 2800 batches | test loss 0.4734716 +| epoch 5 | 423/ 2800 batches | test loss 0.3860150 +| epoch 5 | 427/ 2800 batches | test loss 0.5245735 +| epoch 5 | 431/ 2800 batches | test loss 0.5066794 +| epoch 5 | 435/ 2800 batches | test loss 0.5556289 +| epoch 5 | 439/ 2800 batches | test loss 0.4156290 +| epoch 5 | 443/ 2800 batches | test loss 0.6002981 +| epoch 5 | 447/ 2800 batches | test loss 0.5062312 +| epoch 5 | 451/ 2800 batches | test loss 0.4172767 +| epoch 5 | 455/ 2800 batches | test loss 0.5196486 +| epoch 5 | 459/ 2800 batches | test loss 0.3748465 +| epoch 5 | 463/ 2800 batches | test loss 0.4848760 +| epoch 5 | 467/ 2800 batches | test loss 0.3780059 +| epoch 5 | 471/ 2800 batches | test loss 0.4815592 +| epoch 5 | 475/ 2800 batches | test loss 0.4574282 +| epoch 5 | 479/ 2800 batches | test loss 0.5544189 +| epoch 5 | 483/ 2800 batches | test loss 0.3756416 +| epoch 5 | 487/ 2800 batches | test loss 0.4492126 +| epoch 5 | 491/ 2800 batches | test loss 0.3672001 +| epoch 5 | 495/ 2800 batches | test loss 0.4248043 +| epoch 5 | 499/ 2800 batches | test loss 0.4028297 +| epoch 5 | 503/ 2800 batches | test loss 0.4674823 +| epoch 5 | 507/ 2800 batches | test loss 0.4571869 +| epoch 5 | 511/ 2800 batches | test loss 0.4781469 +| epoch 5 | 515/ 2800 batches | test loss 0.4165836 +| epoch 5 | 519/ 2800 batches | test loss 0.4805354 +| epoch 5 | 523/ 2800 batches | test loss 0.4249217 +| epoch 5 | 527/ 2800 batches | test loss 0.5014155 +| epoch 5 | 531/ 2800 batches | test loss 0.4679318 +| epoch 5 | 535/ 2800 batches | test loss 0.5257229 +| epoch 5 | 539/ 2800 batches | test loss 0.5033573 +| epoch 5 | 543/ 2800 batches | test loss 0.4764312 +| epoch 5 | 547/ 2800 batches | test loss 0.4659438 +| epoch 5 | 551/ 2800 batches | test loss 0.5859892 +| epoch 5 | 555/ 2800 batches | test loss 0.4594808 +| epoch 5 | 559/ 2800 batches | test loss 0.4811697 +| epoch 5 | 563/ 2800 batches | test loss 0.4365583 +| epoch 5 | 567/ 2800 batches | test loss 0.4431944 +| epoch 5 | 571/ 2800 batches | test loss 0.4449312 +| epoch 5 | 575/ 2800 batches | test loss 0.4724479 +| epoch 5 | 579/ 2800 batches | test loss 0.3883981 +| epoch 5 | 583/ 2800 batches | test loss 0.3980494 +| epoch 5 | 587/ 2800 batches | test loss 0.4080851 +| epoch 5 | 591/ 2800 batches | test loss 0.5231883 +| epoch 5 | 595/ 2800 batches | test loss 0.3701268 +| epoch 5 | 599/ 2800 batches | test loss 0.4281744 +| epoch 5 | 603/ 2800 batches | test loss 0.5327054 +| epoch 5 | 607/ 2800 batches | test loss 0.4051557 +| epoch 5 | 611/ 2800 batches | test loss 0.4238441 +| epoch 5 | 615/ 2800 batches | test loss 0.4310258 +| epoch 5 | 619/ 2800 batches | test loss 0.3739243 +| epoch 5 | 623/ 2800 batches | test loss 0.4974566 +| epoch 5 | 627/ 2800 batches | test loss 0.3994256 +| epoch 5 | 631/ 2800 batches | test loss 0.4572163 +| epoch 5 | 635/ 2800 batches | test loss 0.4533792 +| epoch 5 | 639/ 2800 batches | test loss 0.4145175 +| epoch 5 | 643/ 2800 batches | test loss 0.4081367 +| epoch 5 | 647/ 2800 batches | test loss 0.4321409 +| epoch 5 | 651/ 2800 batches | test loss 0.4353990 +| epoch 5 | 655/ 2800 batches | test loss 0.3697620 +| epoch 5 | 659/ 2800 batches | test loss 0.4457775 +| epoch 5 | 663/ 2800 batches | test loss 0.4509129 +| epoch 5 | 667/ 2800 batches | test loss 0.3590881 +| epoch 5 | 671/ 2800 batches | test loss 0.5572060 +| epoch 5 | 675/ 2800 batches | test loss 0.4278007 +| epoch 5 | 679/ 2800 batches | test loss 0.4404930 +| epoch 5 | 683/ 2800 batches | test loss 0.3996302 +| epoch 5 | 687/ 2800 batches | test loss 0.4760506 +| epoch 5 | 691/ 2800 batches | test loss 0.5395583 +| epoch 5 | 695/ 2800 batches | test loss 0.4637029 +| epoch 5 | 699/ 2800 batches | test loss 0.4423940 +| epoch 5 | final test loss 0.4484, do not save model! +-------------------------------------------------------------------------------- +| epoch 6 | 3/ 2800 batches | train loss 0.4509172 +| epoch 6 | 7/ 2800 batches | train loss 0.3992621 +| epoch 6 | 11/ 2800 batches | train loss 0.3905996 +| epoch 6 | 15/ 2800 batches | train loss 0.3398199 +| epoch 6 | 19/ 2800 batches | train loss 0.3908527 +| epoch 6 | 23/ 2800 batches | train loss 0.3167901 +| epoch 6 | 27/ 2800 batches | train loss 0.3702567 +| epoch 6 | 31/ 2800 batches | train loss 0.3914857 +| epoch 6 | 35/ 2800 batches | train loss 0.4437449 +| epoch 6 | 39/ 2800 batches | train loss 0.3911364 +| epoch 6 | 43/ 2800 batches | train loss 0.4295539 +| epoch 6 | 47/ 2800 batches | train loss 0.3653174 +| epoch 6 | 51/ 2800 batches | train loss 0.4153043 +| epoch 6 | 55/ 2800 batches | train loss 0.3659362 +| epoch 6 | 59/ 2800 batches | train loss 0.4365826 +| epoch 6 | 63/ 2800 batches | train loss 0.3880310 +| epoch 6 | 67/ 2800 batches | train loss 0.4746453 +| epoch 6 | 71/ 2800 batches | train loss 0.3282841 +| epoch 6 | 75/ 2800 batches | train loss 0.4568343 +| epoch 6 | 79/ 2800 batches | train loss 0.3637952 +| epoch 6 | 83/ 2800 batches | train loss 0.4009619 +| epoch 6 | 87/ 2800 batches | train loss 0.4099206 +| epoch 6 | 91/ 2800 batches | train loss 0.3796301 +| epoch 6 | 95/ 2800 batches | train loss 0.4080571 +| epoch 6 | 99/ 2800 batches | train loss 0.3455979 +| epoch 6 | 103/ 2800 batches | train loss 0.3741289 +| epoch 6 | 107/ 2800 batches | train loss 0.3746317 +| epoch 6 | 111/ 2800 batches | train loss 0.4063075 +| epoch 6 | 115/ 2800 batches | train loss 0.3693450 +| epoch 6 | 119/ 2800 batches | train loss 0.3770829 +| epoch 6 | 123/ 2800 batches | train loss 0.3992559 +| epoch 6 | 127/ 2800 batches | train loss 0.3859933 +| epoch 6 | 131/ 2800 batches | train loss 0.3557754 +| epoch 6 | 135/ 2800 batches | train loss 0.2788138 +| epoch 6 | 139/ 2800 batches | train loss 0.4770949 +| epoch 6 | 143/ 2800 batches | train loss 0.3546595 +| epoch 6 | 147/ 2800 batches | train loss 0.4097062 +| epoch 6 | 151/ 2800 batches | train loss 0.3513751 +| epoch 6 | 155/ 2800 batches | train loss 0.3223870 +| epoch 6 | 159/ 2800 batches | train loss 0.4160263 +| epoch 6 | 163/ 2800 batches | train loss 0.3129150 +| epoch 6 | 167/ 2800 batches | train loss 0.3951907 +| epoch 6 | 171/ 2800 batches | train loss 0.4160975 +| epoch 6 | 175/ 2800 batches | train loss 0.3174350 +| epoch 6 | 179/ 2800 batches | train loss 0.4175281 +| epoch 6 | 183/ 2800 batches | train loss 0.4000210 +| epoch 6 | 187/ 2800 batches | train loss 0.3564927 +| epoch 6 | 191/ 2800 batches | train loss 0.3464776 +| epoch 6 | 195/ 2800 batches | train loss 0.3927885 +| epoch 6 | 199/ 2800 batches | train loss 0.3947333 +| epoch 6 | 203/ 2800 batches | train loss 0.3357590 +| epoch 6 | 207/ 2800 batches | train loss 0.4365048 +| epoch 6 | 211/ 2800 batches | train loss 0.4339417 +| epoch 6 | 215/ 2800 batches | train loss 0.3932850 +| epoch 6 | 219/ 2800 batches | train loss 0.4675336 +| epoch 6 | 223/ 2800 batches | train loss 0.3873463 +| epoch 6 | 227/ 2800 batches | train loss 0.3325904 +| epoch 6 | 231/ 2800 batches | train loss 0.3216337 +| epoch 6 | 235/ 2800 batches | train loss 0.4088145 +| epoch 6 | 239/ 2800 batches | train loss 0.3631310 +| epoch 6 | 243/ 2800 batches | train loss 0.4245219 +| epoch 6 | 247/ 2800 batches | train loss 0.4671947 +| epoch 6 | 251/ 2800 batches | train loss 0.4285930 +| epoch 6 | 255/ 2800 batches | train loss 0.4179195 +| epoch 6 | 259/ 2800 batches | train loss 0.4390547 +| epoch 6 | 263/ 2800 batches | train loss 0.4046309 +| epoch 6 | 267/ 2800 batches | train loss 0.3324691 +| epoch 6 | 271/ 2800 batches | train loss 0.4368789 +| epoch 6 | 275/ 2800 batches | train loss 0.4160997 +| epoch 6 | 279/ 2800 batches | train loss 0.4234389 +| epoch 6 | 283/ 2800 batches | train loss 0.4009683 +| epoch 6 | 287/ 2800 batches | train loss 0.3087370 +| epoch 6 | 291/ 2800 batches | train loss 0.2841707 +| epoch 6 | 295/ 2800 batches | train loss 0.3724232 +| epoch 6 | 299/ 2800 batches | train loss 0.4082966 +| epoch 6 | 303/ 2800 batches | train loss 0.3518966 +| epoch 6 | 307/ 2800 batches | train loss 0.3701771 +| epoch 6 | 311/ 2800 batches | train loss 0.4348273 +| epoch 6 | 315/ 2800 batches | train loss 0.4109831 +| epoch 6 | 319/ 2800 batches | train loss 0.3734830 +| epoch 6 | 323/ 2800 batches | train loss 0.3471524 +| epoch 6 | 327/ 2800 batches | train loss 0.3934640 +| epoch 6 | 331/ 2800 batches | train loss 0.4216297 +| epoch 6 | 335/ 2800 batches | train loss 0.3844841 +| epoch 6 | 339/ 2800 batches | train loss 0.3524322 +| epoch 6 | 343/ 2800 batches | train loss 0.3341535 +| epoch 6 | 347/ 2800 batches | train loss 0.3310836 +| epoch 6 | 351/ 2800 batches | train loss 0.3623325 +| epoch 6 | 355/ 2800 batches | train loss 0.3298552 +| epoch 6 | 359/ 2800 batches | train loss 0.4010208 +| epoch 6 | 363/ 2800 batches | train loss 0.4272878 +| epoch 6 | 367/ 2800 batches | train loss 0.3928416 +| epoch 6 | 371/ 2800 batches | train loss 0.3682222 +| epoch 6 | 375/ 2800 batches | train loss 0.3550912 +| epoch 6 | 379/ 2800 batches | train loss 0.3778999 +| epoch 6 | 383/ 2800 batches | train loss 0.4234113 +| epoch 6 | 387/ 2800 batches | train loss 0.4131380 +| epoch 6 | 391/ 2800 batches | train loss 0.3914745 +| epoch 6 | 395/ 2800 batches | train loss 0.3440538 +| epoch 6 | 399/ 2800 batches | train loss 0.3338237 +| epoch 6 | 403/ 2800 batches | train loss 0.3239010 +| epoch 6 | 407/ 2800 batches | train loss 0.4272287 +| epoch 6 | 411/ 2800 batches | train loss 0.4037234 +| epoch 6 | 415/ 2800 batches | train loss 0.4030960 +| epoch 6 | 419/ 2800 batches | train loss 0.3545389 +| epoch 6 | 423/ 2800 batches | train loss 0.4120240 +| epoch 6 | 427/ 2800 batches | train loss 0.3903494 +| epoch 6 | 431/ 2800 batches | train loss 0.3614347 +| epoch 6 | 435/ 2800 batches | train loss 0.3269492 +| epoch 6 | 439/ 2800 batches | train loss 0.4004184 +| epoch 6 | 443/ 2800 batches | train loss 0.3239630 +| epoch 6 | 447/ 2800 batches | train loss 0.3901119 +| epoch 6 | 451/ 2800 batches | train loss 0.3743305 +| epoch 6 | 455/ 2800 batches | train loss 0.4367905 +| epoch 6 | 459/ 2800 batches | train loss 0.3945705 +| epoch 6 | 463/ 2800 batches | train loss 0.3732518 +| epoch 6 | 467/ 2800 batches | train loss 0.3307675 +| epoch 6 | 471/ 2800 batches | train loss 0.3854476 +| epoch 6 | 475/ 2800 batches | train loss 0.4169435 +| epoch 6 | 479/ 2800 batches | train loss 0.3597981 +| epoch 6 | 483/ 2800 batches | train loss 0.4170067 +| epoch 6 | 487/ 2800 batches | train loss 0.4157882 +| epoch 6 | 491/ 2800 batches | train loss 0.3385536 +| epoch 6 | 495/ 2800 batches | train loss 0.3436194 +| epoch 6 | 499/ 2800 batches | train loss 0.3326471 +| epoch 6 | 503/ 2800 batches | train loss 0.3576151 +| epoch 6 | 507/ 2800 batches | train loss 0.4148650 +| epoch 6 | 511/ 2800 batches | train loss 0.4259696 +| epoch 6 | 515/ 2800 batches | train loss 0.3938229 +| epoch 6 | 519/ 2800 batches | train loss 0.3687829 +| epoch 6 | 523/ 2800 batches | train loss 0.4709249 +| epoch 6 | 527/ 2800 batches | train loss 0.4452182 +| epoch 6 | 531/ 2800 batches | train loss 0.4138675 +| epoch 6 | 535/ 2800 batches | train loss 0.4005012 +| epoch 6 | 539/ 2800 batches | train loss 0.3962831 +| epoch 6 | 543/ 2800 batches | train loss 0.3743002 +| epoch 6 | 547/ 2800 batches | train loss 0.4401835 +| epoch 6 | 551/ 2800 batches | train loss 0.3727329 +| epoch 6 | 555/ 2800 batches | train loss 0.4820641 +| epoch 6 | 559/ 2800 batches | train loss 0.4399997 +| epoch 6 | 563/ 2800 batches | train loss 0.3849882 +| epoch 6 | 567/ 2800 batches | train loss 0.3873470 +| epoch 6 | 571/ 2800 batches | train loss 0.4544472 +| epoch 6 | 575/ 2800 batches | train loss 0.3355472 +| epoch 6 | 579/ 2800 batches | train loss 0.2882369 +| epoch 6 | 583/ 2800 batches | train loss 0.3502604 +| epoch 6 | 587/ 2800 batches | train loss 0.3437362 +| epoch 6 | 591/ 2800 batches | train loss 0.3902586 +| epoch 6 | 595/ 2800 batches | train loss 0.5291036 +| epoch 6 | 599/ 2800 batches | train loss 0.4031475 +| epoch 6 | 603/ 2800 batches | train loss 0.4492904 +| epoch 6 | 607/ 2800 batches | train loss 0.3321836 +| epoch 6 | 611/ 2800 batches | train loss 0.3177524 +| epoch 6 | 615/ 2800 batches | train loss 0.4057586 +| epoch 6 | 619/ 2800 batches | train loss 0.3470559 +| epoch 6 | 623/ 2800 batches | train loss 0.3857115 +| epoch 6 | 627/ 2800 batches | train loss 0.4102788 +| epoch 6 | 631/ 2800 batches | train loss 0.4009504 +| epoch 6 | 635/ 2800 batches | train loss 0.3631113 +| epoch 6 | 639/ 2800 batches | train loss 0.3781013 +| epoch 6 | 643/ 2800 batches | train loss 0.3626659 +| epoch 6 | 647/ 2800 batches | train loss 0.3660114 +| epoch 6 | 651/ 2800 batches | train loss 0.3669664 +| epoch 6 | 655/ 2800 batches | train loss 0.3510441 +| epoch 6 | 659/ 2800 batches | train loss 0.3416876 +| epoch 6 | 663/ 2800 batches | train loss 0.3308990 +| epoch 6 | 667/ 2800 batches | train loss 0.3517970 +| epoch 6 | 671/ 2800 batches | train loss 0.4094564 +| epoch 6 | 675/ 2800 batches | train loss 0.3034140 +| epoch 6 | 679/ 2800 batches | train loss 0.4116268 +| epoch 6 | 683/ 2800 batches | train loss 0.3906866 +| epoch 6 | 687/ 2800 batches | train loss 0.4656415 +| epoch 6 | 691/ 2800 batches | train loss 0.3274194 +| epoch 6 | 695/ 2800 batches | train loss 0.4020058 +| epoch 6 | 699/ 2800 batches | train loss 0.4075042 +| epoch 6 | 703/ 2800 batches | train loss 0.4058180 +| epoch 6 | 707/ 2800 batches | train loss 0.3479463 +| epoch 6 | 711/ 2800 batches | train loss 0.4083437 +| epoch 6 | 715/ 2800 batches | train loss 0.3936631 +| epoch 6 | 719/ 2800 batches | train loss 0.3107380 +| epoch 6 | 723/ 2800 batches | train loss 0.3704149 +| epoch 6 | 727/ 2800 batches | train loss 0.4097306 +| epoch 6 | 731/ 2800 batches | train loss 0.3877239 +| epoch 6 | 735/ 2800 batches | train loss 0.4217004 +| epoch 6 | 739/ 2800 batches | train loss 0.3384236 +| epoch 6 | 743/ 2800 batches | train loss 0.3252345 +| epoch 6 | 747/ 2800 batches | train loss 0.3744950 +| epoch 6 | 751/ 2800 batches | train loss 0.4039915 +| epoch 6 | 755/ 2800 batches | train loss 0.3821571 +| epoch 6 | 759/ 2800 batches | train loss 0.3190659 +| epoch 6 | 763/ 2800 batches | train loss 0.3269646 +| epoch 6 | 767/ 2800 batches | train loss 0.3690365 +| epoch 6 | 771/ 2800 batches | train loss 0.3637626 +| epoch 6 | 775/ 2800 batches | train loss 0.3986847 +| epoch 6 | 779/ 2800 batches | train loss 0.3254588 +| epoch 6 | 783/ 2800 batches | train loss 0.3287295 +| epoch 6 | 787/ 2800 batches | train loss 0.3627301 +| epoch 6 | 791/ 2800 batches | train loss 0.3641838 +| epoch 6 | 795/ 2800 batches | train loss 0.3788727 +| epoch 6 | 799/ 2800 batches | train loss 0.3621905 +| epoch 6 | 803/ 2800 batches | train loss 0.3702951 +| epoch 6 | 807/ 2800 batches | train loss 0.3539917 +| epoch 6 | 811/ 2800 batches | train loss 0.4928470 +| epoch 6 | 815/ 2800 batches | train loss 0.3393130 +| epoch 6 | 819/ 2800 batches | train loss 0.3998399 +| epoch 6 | 823/ 2800 batches | train loss 0.3890750 +| epoch 6 | 827/ 2800 batches | train loss 0.2692925 +| epoch 6 | 831/ 2800 batches | train loss 0.3586245 +| epoch 6 | 835/ 2800 batches | train loss 0.4152910 +| epoch 6 | 839/ 2800 batches | train loss 0.3379543 +| epoch 6 | 843/ 2800 batches | train loss 0.4925651 +| epoch 6 | 847/ 2800 batches | train loss 0.4413388 +| epoch 6 | 851/ 2800 batches | train loss 0.4408023 +| epoch 6 | 855/ 2800 batches | train loss 0.3791152 +| epoch 6 | 859/ 2800 batches | train loss 0.3642900 +| epoch 6 | 863/ 2800 batches | train loss 0.3480728 +| epoch 6 | 867/ 2800 batches | train loss 0.4718807 +| epoch 6 | 871/ 2800 batches | train loss 0.3823590 +| epoch 6 | 875/ 2800 batches | train loss 0.4882070 +| epoch 6 | 879/ 2800 batches | train loss 0.3736849 +| epoch 6 | 883/ 2800 batches | train loss 0.4326899 +| epoch 6 | 887/ 2800 batches | train loss 0.3267587 +| epoch 6 | 891/ 2800 batches | train loss 0.4493150 +| epoch 6 | 895/ 2800 batches | train loss 0.4513252 +| epoch 6 | 899/ 2800 batches | train loss 0.3523636 +| epoch 6 | 903/ 2800 batches | train loss 0.3390101 +| epoch 6 | 907/ 2800 batches | train loss 0.3339080 +| epoch 6 | 911/ 2800 batches | train loss 0.4128318 +| epoch 6 | 915/ 2800 batches | train loss 0.3773144 +| epoch 6 | 919/ 2800 batches | train loss 0.3852757 +| epoch 6 | 923/ 2800 batches | train loss 0.3808370 +| epoch 6 | 927/ 2800 batches | train loss 0.4235452 +| epoch 6 | 931/ 2800 batches | train loss 0.3639455 +| epoch 6 | 935/ 2800 batches | train loss 0.3196943 +| epoch 6 | 939/ 2800 batches | train loss 0.4398130 +| epoch 6 | 943/ 2800 batches | train loss 0.4037567 +| epoch 6 | 947/ 2800 batches | train loss 0.4061132 +| epoch 6 | 951/ 2800 batches | train loss 0.3824120 +| epoch 6 | 955/ 2800 batches | train loss 0.4238198 +| epoch 6 | 959/ 2800 batches | train loss 0.3419189 +| epoch 6 | 963/ 2800 batches | train loss 0.3866539 +| epoch 6 | 967/ 2800 batches | train loss 0.5202675 +| epoch 6 | 971/ 2800 batches | train loss 0.4271837 +| epoch 6 | 975/ 2800 batches | train loss 0.3875816 +| epoch 6 | 979/ 2800 batches | train loss 0.4337991 +| epoch 6 | 983/ 2800 batches | train loss 0.3779075 +| epoch 6 | 987/ 2800 batches | train loss 0.4105841 +| epoch 6 | 991/ 2800 batches | train loss 0.3937186 +| epoch 6 | 995/ 2800 batches | train loss 0.3972456 +| epoch 6 | 999/ 2800 batches | train loss 0.4327748 +| epoch 6 | 1003/ 2800 batches | train loss 0.3629451 +| epoch 6 | 1007/ 2800 batches | train loss 0.4576858 +| epoch 6 | 1011/ 2800 batches | train loss 0.2512837 +| epoch 6 | 1015/ 2800 batches | train loss 0.3531993 +| epoch 6 | 1019/ 2800 batches | train loss 0.4844381 +| epoch 6 | 1023/ 2800 batches | train loss 0.3407066 +| epoch 6 | 1027/ 2800 batches | train loss 0.4361388 +| epoch 6 | 1031/ 2800 batches | train loss 0.4057867 +| epoch 6 | 1035/ 2800 batches | train loss 0.3455244 +| epoch 6 | 1039/ 2800 batches | train loss 0.4042771 +| epoch 6 | 1043/ 2800 batches | train loss 0.3538218 +| epoch 6 | 1047/ 2800 batches | train loss 0.3526931 +| epoch 6 | 1051/ 2800 batches | train loss 0.3756000 +| epoch 6 | 1055/ 2800 batches | train loss 0.1611323 +| epoch 6 | 1059/ 2800 batches | train loss 0.2996199 +| epoch 6 | 1063/ 2800 batches | train loss 0.4325331 +| epoch 6 | 1067/ 2800 batches | train loss 0.4433303 +| epoch 6 | 1071/ 2800 batches | train loss 0.4139003 +| epoch 6 | 1075/ 2800 batches | train loss 0.2901524 +| epoch 6 | 1079/ 2800 batches | train loss 0.3817409 +| epoch 6 | 1083/ 2800 batches | train loss 0.3921004 +| epoch 6 | 1087/ 2800 batches | train loss 0.3691334 +| epoch 6 | 1091/ 2800 batches | train loss 0.4638658 +| epoch 6 | 1095/ 2800 batches | train loss 0.3933839 +| epoch 6 | 1099/ 2800 batches | train loss 0.3871013 +| epoch 6 | 1103/ 2800 batches | train loss 0.3571173 +| epoch 6 | 1107/ 2800 batches | train loss 0.4006040 +| epoch 6 | 1111/ 2800 batches | train loss 0.3473135 +| epoch 6 | 1115/ 2800 batches | train loss 0.3176242 +| epoch 6 | 1119/ 2800 batches | train loss 0.4117354 +| epoch 6 | 1123/ 2800 batches | train loss 0.4368996 +| epoch 6 | 1127/ 2800 batches | train loss 0.4027559 +| epoch 6 | 1131/ 2800 batches | train loss 0.1588360 +| epoch 6 | 1135/ 2800 batches | train loss 0.2992531 +| epoch 6 | 1139/ 2800 batches | train loss 0.4128753 +| epoch 6 | 1143/ 2800 batches | train loss 0.4079363 +| epoch 6 | 1147/ 2800 batches | train loss 0.3901634 +| epoch 6 | 1151/ 2800 batches | train loss 0.3045056 +| epoch 6 | 1155/ 2800 batches | train loss 0.3262748 +| epoch 6 | 1159/ 2800 batches | train loss 0.3939973 +| epoch 6 | 1163/ 2800 batches | train loss 0.4249969 +| epoch 6 | 1167/ 2800 batches | train loss 0.3407717 +| epoch 6 | 1171/ 2800 batches | train loss 0.3706384 +| epoch 6 | 1175/ 2800 batches | train loss 0.4104999 +| epoch 6 | 1179/ 2800 batches | train loss 0.4157103 +| epoch 6 | 1183/ 2800 batches | train loss 0.4284478 +| epoch 6 | 1187/ 2800 batches | train loss 0.3354623 +| epoch 6 | 1191/ 2800 batches | train loss 0.3450675 +| epoch 6 | 1195/ 2800 batches | train loss 0.4109554 +| epoch 6 | 1199/ 2800 batches | train loss 0.3996941 +| epoch 6 | 1203/ 2800 batches | train loss 0.4098954 +| epoch 6 | 1207/ 2800 batches | train loss 0.3955864 +| epoch 6 | 1211/ 2800 batches | train loss 0.3169640 +| epoch 6 | 1215/ 2800 batches | train loss 0.3232689 +| epoch 6 | 1219/ 2800 batches | train loss 0.4795950 +| epoch 6 | 1223/ 2800 batches | train loss 0.3107439 +| epoch 6 | 1227/ 2800 batches | train loss 0.3654966 +| epoch 6 | 1231/ 2800 batches | train loss 0.4025038 +| epoch 6 | 1235/ 2800 batches | train loss 0.4368550 +| epoch 6 | 1239/ 2800 batches | train loss 0.4974378 +| epoch 6 | 1243/ 2800 batches | train loss 0.3822380 +| epoch 6 | 1247/ 2800 batches | train loss 0.3681185 +| epoch 6 | 1251/ 2800 batches | train loss 0.3394265 +| epoch 6 | 1255/ 2800 batches | train loss 0.4183888 +| epoch 6 | 1259/ 2800 batches | train loss 0.3772328 +| epoch 6 | 1263/ 2800 batches | train loss 0.3485668 +| epoch 6 | 1267/ 2800 batches | train loss 0.3785599 +| epoch 6 | 1271/ 2800 batches | train loss 0.3819456 +| epoch 6 | 1275/ 2800 batches | train loss 0.3911284 +| epoch 6 | 1279/ 2800 batches | train loss 0.4045484 +| epoch 6 | 1283/ 2800 batches | train loss 0.4079490 +| epoch 6 | 1287/ 2800 batches | train loss 0.4859305 +| epoch 6 | 1291/ 2800 batches | train loss 0.3691139 +| epoch 6 | 1295/ 2800 batches | train loss 0.4131835 +| epoch 6 | 1299/ 2800 batches | train loss 0.3432194 +| epoch 6 | 1303/ 2800 batches | train loss 0.4102636 +| epoch 6 | 1307/ 2800 batches | train loss 0.4447819 +| epoch 6 | 1311/ 2800 batches | train loss 0.3377939 +| epoch 6 | 1315/ 2800 batches | train loss 0.3668028 +| epoch 6 | 1319/ 2800 batches | train loss 0.4054558 +| epoch 6 | 1323/ 2800 batches | train loss 0.3401354 +| epoch 6 | 1327/ 2800 batches | train loss 0.4117766 +| epoch 6 | 1331/ 2800 batches | train loss 0.4079654 +| epoch 6 | 1335/ 2800 batches | train loss 0.4051880 +| epoch 6 | 1339/ 2800 batches | train loss 0.4062604 +| epoch 6 | 1343/ 2800 batches | train loss 0.3971221 +| epoch 6 | 1347/ 2800 batches | train loss 0.3356591 +| epoch 6 | 1351/ 2800 batches | train loss 0.4078158 +| epoch 6 | 1355/ 2800 batches | train loss 0.3994094 +| epoch 6 | 1359/ 2800 batches | train loss 0.3192857 +| epoch 6 | 1363/ 2800 batches | train loss 0.3582073 +| epoch 6 | 1367/ 2800 batches | train loss 0.3782648 +| epoch 6 | 1371/ 2800 batches | train loss 0.3754539 +| epoch 6 | 1375/ 2800 batches | train loss 0.3864732 +| epoch 6 | 1379/ 2800 batches | train loss 0.4396928 +| epoch 6 | 1383/ 2800 batches | train loss 0.4243716 +| epoch 6 | 1387/ 2800 batches | train loss 0.3854069 +| epoch 6 | 1391/ 2800 batches | train loss 0.4246152 +| epoch 6 | 1395/ 2800 batches | train loss 0.3567074 +| epoch 6 | 1399/ 2800 batches | train loss 0.4286488 +| epoch 6 | 1403/ 2800 batches | train loss 0.4216765 +| epoch 6 | 1407/ 2800 batches | train loss 0.4106759 +| epoch 6 | 1411/ 2800 batches | train loss 0.4593132 +| epoch 6 | 1415/ 2800 batches | train loss 0.3868778 +| epoch 6 | 1419/ 2800 batches | train loss 0.4510694 +| epoch 6 | 1423/ 2800 batches | train loss 0.3577136 +| epoch 6 | 1427/ 2800 batches | train loss 0.3865306 +| epoch 6 | 1431/ 2800 batches | train loss 0.4087391 +| epoch 6 | 1435/ 2800 batches | train loss 0.3719798 +| epoch 6 | 1439/ 2800 batches | train loss 0.4085500 +| epoch 6 | 1443/ 2800 batches | train loss 0.3755053 +| epoch 6 | 1447/ 2800 batches | train loss 0.3916681 +| epoch 6 | 1451/ 2800 batches | train loss 0.3590136 +| epoch 6 | 1455/ 2800 batches | train loss 0.3849798 +| epoch 6 | 1459/ 2800 batches | train loss 0.3242370 +| epoch 6 | 1463/ 2800 batches | train loss 0.4165263 +| epoch 6 | 1467/ 2800 batches | train loss 0.3318490 +| epoch 6 | 1471/ 2800 batches | train loss 0.3970303 +| epoch 6 | 1475/ 2800 batches | train loss 0.4157393 +| epoch 6 | 1479/ 2800 batches | train loss 0.3498307 +| epoch 6 | 1483/ 2800 batches | train loss 0.3568064 +| epoch 6 | 1487/ 2800 batches | train loss 0.3664214 +| epoch 6 | 1491/ 2800 batches | train loss 0.3487821 +| epoch 6 | 1495/ 2800 batches | train loss 0.3716947 +| epoch 6 | 1499/ 2800 batches | train loss 0.3593085 +| epoch 6 | 1503/ 2800 batches | train loss 0.3578224 +| epoch 6 | 1507/ 2800 batches | train loss 0.3908918 +| epoch 6 | 1511/ 2800 batches | train loss 0.4177640 +| epoch 6 | 1515/ 2800 batches | train loss 0.4181915 +| epoch 6 | 1519/ 2800 batches | train loss 0.3959222 +| epoch 6 | 1523/ 2800 batches | train loss 0.3603318 +| epoch 6 | 1527/ 2800 batches | train loss 0.4378249 +| epoch 6 | 1531/ 2800 batches | train loss 0.4154552 +| epoch 6 | 1535/ 2800 batches | train loss 0.4065442 +| epoch 6 | 1539/ 2800 batches | train loss 0.3988927 +| epoch 6 | 1543/ 2800 batches | train loss 0.3426109 +| epoch 6 | 1547/ 2800 batches | train loss 0.4112602 +| epoch 6 | 1551/ 2800 batches | train loss 0.4255902 +| epoch 6 | 1555/ 2800 batches | train loss 0.3972615 +| epoch 6 | 1559/ 2800 batches | train loss 0.4065337 +| epoch 6 | 1563/ 2800 batches | train loss 0.3549297 +| epoch 6 | 1567/ 2800 batches | train loss 0.2727369 +| epoch 6 | 1571/ 2800 batches | train loss 0.3662552 +| epoch 6 | 1575/ 2800 batches | train loss 0.3807384 +| epoch 6 | 1579/ 2800 batches | train loss 0.4221233 +| epoch 6 | 1583/ 2800 batches | train loss 0.3884497 +| epoch 6 | 1587/ 2800 batches | train loss 0.3722354 +| epoch 6 | 1591/ 2800 batches | train loss 0.4015854 +| epoch 6 | 1595/ 2800 batches | train loss 0.4328843 +| epoch 6 | 1599/ 2800 batches | train loss 0.3950836 +| epoch 6 | 1603/ 2800 batches | train loss 0.4031179 +| epoch 6 | 1607/ 2800 batches | train loss 0.3687326 +| epoch 6 | 1611/ 2800 batches | train loss 0.3739209 +| epoch 6 | 1615/ 2800 batches | train loss 0.3588656 +| epoch 6 | 1619/ 2800 batches | train loss 0.4291758 +| epoch 6 | 1623/ 2800 batches | train loss 0.4097854 +| epoch 6 | 1627/ 2800 batches | train loss 0.3901438 +| epoch 6 | 1631/ 2800 batches | train loss 0.3318148 +| epoch 6 | 1635/ 2800 batches | train loss 0.3749998 +| epoch 6 | 1639/ 2800 batches | train loss 0.4649402 +| epoch 6 | 1643/ 2800 batches | train loss 0.3950883 +| epoch 6 | 1647/ 2800 batches | train loss 0.4035993 +| epoch 6 | 1651/ 2800 batches | train loss 0.4239922 +| epoch 6 | 1655/ 2800 batches | train loss 0.3689275 +| epoch 6 | 1659/ 2800 batches | train loss 0.3508118 +| epoch 6 | 1663/ 2800 batches | train loss 0.3544515 +| epoch 6 | 1667/ 2800 batches | train loss 0.3901207 +| epoch 6 | 1671/ 2800 batches | train loss 0.3984654 +| epoch 6 | 1675/ 2800 batches | train loss 0.3891043 +| epoch 6 | 1679/ 2800 batches | train loss 0.3047628 +| epoch 6 | 1683/ 2800 batches | train loss 0.3735127 +| epoch 6 | 1687/ 2800 batches | train loss 0.4648746 +| epoch 6 | 1691/ 2800 batches | train loss 0.3852394 +| epoch 6 | 1695/ 2800 batches | train loss 0.3802004 +| epoch 6 | 1699/ 2800 batches | train loss 0.3912048 +| epoch 6 | 1703/ 2800 batches | train loss 0.3691202 +| epoch 6 | 1707/ 2800 batches | train loss 0.4097902 +| epoch 6 | 1711/ 2800 batches | train loss 0.3979564 +| epoch 6 | 1715/ 2800 batches | train loss 0.4939254 +| epoch 6 | 1719/ 2800 batches | train loss 0.3507968 +| epoch 6 | 1723/ 2800 batches | train loss 0.3626511 +| epoch 6 | 1727/ 2800 batches | train loss 0.3536428 +| epoch 6 | 1731/ 2800 batches | train loss 0.4151525 +| epoch 6 | 1735/ 2800 batches | train loss 0.3530225 +| epoch 6 | 1739/ 2800 batches | train loss 0.3907972 +| epoch 6 | 1743/ 2800 batches | train loss 0.4315357 +| epoch 6 | 1747/ 2800 batches | train loss 0.3528532 +| epoch 6 | 1751/ 2800 batches | train loss 0.3713259 +| epoch 6 | 1755/ 2800 batches | train loss 0.3298516 +| epoch 6 | 1759/ 2800 batches | train loss 0.3951078 +| epoch 6 | 1763/ 2800 batches | train loss 0.3560793 +| epoch 6 | 1767/ 2800 batches | train loss 0.3935781 +| epoch 6 | 1771/ 2800 batches | train loss 0.3645962 +| epoch 6 | 1775/ 2800 batches | train loss 0.3796422 +| epoch 6 | 1779/ 2800 batches | train loss 0.3440414 +| epoch 6 | 1783/ 2800 batches | train loss 0.3615646 +| epoch 6 | 1787/ 2800 batches | train loss 0.3574309 +| epoch 6 | 1791/ 2800 batches | train loss 0.4307434 +| epoch 6 | 1795/ 2800 batches | train loss 0.3839703 +| epoch 6 | 1799/ 2800 batches | train loss 0.4055020 +| epoch 6 | 1803/ 2800 batches | train loss 0.3822191 +| epoch 6 | 1807/ 2800 batches | train loss 0.3637734 +| epoch 6 | 1811/ 2800 batches | train loss 0.4618625 +| epoch 6 | 1815/ 2800 batches | train loss 0.3638045 +| epoch 6 | 1819/ 2800 batches | train loss 0.3634612 +| epoch 6 | 1823/ 2800 batches | train loss 0.3162496 +| epoch 6 | 1827/ 2800 batches | train loss 0.3850689 +| epoch 6 | 1831/ 2800 batches | train loss 0.3419944 +| epoch 6 | 1835/ 2800 batches | train loss 0.4047414 +| epoch 6 | 1839/ 2800 batches | train loss 0.3652943 +| epoch 6 | 1843/ 2800 batches | train loss 0.3803087 +| epoch 6 | 1847/ 2800 batches | train loss 0.3035134 +| epoch 6 | 1851/ 2800 batches | train loss 0.4165731 +| epoch 6 | 1855/ 2800 batches | train loss 0.3978417 +| epoch 6 | 1859/ 2800 batches | train loss 0.3442496 +| epoch 6 | 1863/ 2800 batches | train loss 0.3997671 +| epoch 6 | 1867/ 2800 batches | train loss 0.3882884 +| epoch 6 | 1871/ 2800 batches | train loss 0.3387585 +| epoch 6 | 1875/ 2800 batches | train loss 0.4303862 +| epoch 6 | 1879/ 2800 batches | train loss 0.2820891 +| epoch 6 | 1883/ 2800 batches | train loss 0.3318252 +| epoch 6 | 1887/ 2800 batches | train loss 0.4158859 +| epoch 6 | 1891/ 2800 batches | train loss 0.3159892 +| epoch 6 | 1895/ 2800 batches | train loss 0.4491495 +| epoch 6 | 1899/ 2800 batches | train loss 0.4079036 +| epoch 6 | 1903/ 2800 batches | train loss 0.3227460 +| epoch 6 | 1907/ 2800 batches | train loss 0.4131170 +| epoch 6 | 1911/ 2800 batches | train loss 0.4284773 +| epoch 6 | 1915/ 2800 batches | train loss 0.3933845 +| epoch 6 | 1919/ 2800 batches | train loss 0.4283474 +| epoch 6 | 1923/ 2800 batches | train loss 0.3991598 +| epoch 6 | 1927/ 2800 batches | train loss 0.3987091 +| epoch 6 | 1931/ 2800 batches | train loss 0.3582711 +| epoch 6 | 1935/ 2800 batches | train loss 0.3087929 +| epoch 6 | 1939/ 2800 batches | train loss 0.3857584 +| epoch 6 | 1943/ 2800 batches | train loss 0.4040073 +| epoch 6 | 1947/ 2800 batches | train loss 0.3897882 +| epoch 6 | 1951/ 2800 batches | train loss 0.4025612 +| epoch 6 | 1955/ 2800 batches | train loss 0.4142032 +| epoch 6 | 1959/ 2800 batches | train loss 0.3657759 +| epoch 6 | 1963/ 2800 batches | train loss 0.4182692 +| epoch 6 | 1967/ 2800 batches | train loss 0.4109654 +| epoch 6 | 1971/ 2800 batches | train loss 0.4652192 +| epoch 6 | 1975/ 2800 batches | train loss 0.4775360 +| epoch 6 | 1979/ 2800 batches | train loss 0.3843733 +| epoch 6 | 1983/ 2800 batches | train loss 0.3370168 +| epoch 6 | 1987/ 2800 batches | train loss 0.4341329 +| epoch 6 | 1991/ 2800 batches | train loss 0.3903493 +| epoch 6 | 1995/ 2800 batches | train loss 0.4509858 +| epoch 6 | 1999/ 2800 batches | train loss 0.4194513 +| epoch 6 | 2003/ 2800 batches | train loss 0.3299288 +| epoch 6 | 2007/ 2800 batches | train loss 0.3960708 +| epoch 6 | 2011/ 2800 batches | train loss 0.3631413 +| epoch 6 | 2015/ 2800 batches | train loss 0.3752100 +| epoch 6 | 2019/ 2800 batches | train loss 0.4264119 +| epoch 6 | 2023/ 2800 batches | train loss 0.3787044 +| epoch 6 | 2027/ 2800 batches | train loss 0.4264826 +| epoch 6 | 2031/ 2800 batches | train loss 0.3849796 +| epoch 6 | 2035/ 2800 batches | train loss 0.3511677 +| epoch 6 | 2039/ 2800 batches | train loss 0.3643061 +| epoch 6 | 2043/ 2800 batches | train loss 0.3195056 +| epoch 6 | 2047/ 2800 batches | train loss 0.2769763 +| epoch 6 | 2051/ 2800 batches | train loss 0.3559313 +| epoch 6 | 2055/ 2800 batches | train loss 0.3960118 +| epoch 6 | 2059/ 2800 batches | train loss 0.4172182 +| epoch 6 | 2063/ 2800 batches | train loss 0.3856570 +| epoch 6 | 2067/ 2800 batches | train loss 0.3791579 +| epoch 6 | 2071/ 2800 batches | train loss 0.3401769 +| epoch 6 | 2075/ 2800 batches | train loss 0.3471568 +| epoch 6 | 2079/ 2800 batches | train loss 0.3409320 +| epoch 6 | 2083/ 2800 batches | train loss 0.4071878 +| epoch 6 | 2087/ 2800 batches | train loss 0.3970603 +| epoch 6 | 2091/ 2800 batches | train loss 0.3852500 +| epoch 6 | 2095/ 2800 batches | train loss 0.3917591 +| epoch 6 | 2099/ 2800 batches | train loss 0.4135363 +| epoch 6 | 2103/ 2800 batches | train loss 0.4129934 +| epoch 6 | 2107/ 2800 batches | train loss 0.4010542 +| epoch 6 | 2111/ 2800 batches | train loss 0.4208994 +| epoch 6 | 2115/ 2800 batches | train loss 0.2537960 +| epoch 6 | 2119/ 2800 batches | train loss 0.4217876 +| epoch 6 | 2123/ 2800 batches | train loss 0.4145316 +| epoch 6 | 2127/ 2800 batches | train loss 0.3488705 +| epoch 6 | 2131/ 2800 batches | train loss 0.4099008 +| epoch 6 | 2135/ 2800 batches | train loss 0.4188343 +| epoch 6 | 2139/ 2800 batches | train loss 0.3712916 +| epoch 6 | 2143/ 2800 batches | train loss 0.4122305 +| epoch 6 | 2147/ 2800 batches | train loss 0.3822024 +| epoch 6 | 2151/ 2800 batches | train loss 0.3843378 +| epoch 6 | 2155/ 2800 batches | train loss 0.4404615 +| epoch 6 | 2159/ 2800 batches | train loss 0.4232899 +| epoch 6 | 2163/ 2800 batches | train loss 0.3838100 +| epoch 6 | 2167/ 2800 batches | train loss 0.3400711 +| epoch 6 | 2171/ 2800 batches | train loss 0.4144025 +| epoch 6 | 2175/ 2800 batches | train loss 0.3515426 +| epoch 6 | 2179/ 2800 batches | train loss 0.4051198 +| epoch 6 | 2183/ 2800 batches | train loss 0.3525325 +| epoch 6 | 2187/ 2800 batches | train loss 0.4338645 +| epoch 6 | 2191/ 2800 batches | train loss 0.4708327 +| epoch 6 | 2195/ 2800 batches | train loss 0.3971553 +| epoch 6 | 2199/ 2800 batches | train loss 0.4889037 +| epoch 6 | 2203/ 2800 batches | train loss 0.4003127 +| epoch 6 | 2207/ 2800 batches | train loss 0.3792701 +| epoch 6 | 2211/ 2800 batches | train loss 0.3053282 +| epoch 6 | 2215/ 2800 batches | train loss 0.4487881 +| epoch 6 | 2219/ 2800 batches | train loss 0.4268124 +| epoch 6 | 2223/ 2800 batches | train loss 0.4134361 +| epoch 6 | 2227/ 2800 batches | train loss 0.3629848 +| epoch 6 | 2231/ 2800 batches | train loss 0.4179281 +| epoch 6 | 2235/ 2800 batches | train loss 0.4137555 +| epoch 6 | 2239/ 2800 batches | train loss 0.3284330 +| epoch 6 | 2243/ 2800 batches | train loss 0.4384779 +| epoch 6 | 2247/ 2800 batches | train loss 0.3144172 +| epoch 6 | 2251/ 2800 batches | train loss 0.3751758 +| epoch 6 | 2255/ 2800 batches | train loss 0.4029031 +| epoch 6 | 2259/ 2800 batches | train loss 0.3921942 +| epoch 6 | 2263/ 2800 batches | train loss 0.4528330 +| epoch 6 | 2267/ 2800 batches | train loss 0.3880767 +| epoch 6 | 2271/ 2800 batches | train loss 0.4524513 +| epoch 6 | 2275/ 2800 batches | train loss 0.4576496 +| epoch 6 | 2279/ 2800 batches | train loss 0.4646348 +| epoch 6 | 2283/ 2800 batches | train loss 0.4030601 +| epoch 6 | 2287/ 2800 batches | train loss 0.3874045 +| epoch 6 | 2291/ 2800 batches | train loss 0.4156891 +| epoch 6 | 2295/ 2800 batches | train loss 0.3865973 +| epoch 6 | 2299/ 2800 batches | train loss 0.3876772 +| epoch 6 | 2303/ 2800 batches | train loss 0.3413765 +| epoch 6 | 2307/ 2800 batches | train loss 0.3828638 +| epoch 6 | 2311/ 2800 batches | train loss 0.3460093 +| epoch 6 | 2315/ 2800 batches | train loss 0.3423393 +| epoch 6 | 2319/ 2800 batches | train loss 0.3586971 +| epoch 6 | 2323/ 2800 batches | train loss 0.3734063 +| epoch 6 | 2327/ 2800 batches | train loss 0.4434784 +| epoch 6 | 2331/ 2800 batches | train loss 0.3902913 +| epoch 6 | 2335/ 2800 batches | train loss 0.4100744 +| epoch 6 | 2339/ 2800 batches | train loss 0.4478703 +| epoch 6 | 2343/ 2800 batches | train loss 0.3858759 +| epoch 6 | 2347/ 2800 batches | train loss 0.3353900 +| epoch 6 | 2351/ 2800 batches | train loss 0.3986279 +| epoch 6 | 2355/ 2800 batches | train loss 0.4186879 +| epoch 6 | 2359/ 2800 batches | train loss 0.3108888 +| epoch 6 | 2363/ 2800 batches | train loss 0.4180987 +| epoch 6 | 2367/ 2800 batches | train loss 0.3902397 +| epoch 6 | 2371/ 2800 batches | train loss 0.2552391 +| epoch 6 | 2375/ 2800 batches | train loss 0.4038779 +| epoch 6 | 2379/ 2800 batches | train loss 0.3824328 +| epoch 6 | 2383/ 2800 batches | train loss 0.4137351 +| epoch 6 | 2387/ 2800 batches | train loss 0.4320053 +| epoch 6 | 2391/ 2800 batches | train loss 0.4254349 +| epoch 6 | 2395/ 2800 batches | train loss 0.4201302 +| epoch 6 | 2399/ 2800 batches | train loss 0.3862569 +| epoch 6 | 2403/ 2800 batches | train loss 0.3634346 +| epoch 6 | 2407/ 2800 batches | train loss 0.4196286 +| epoch 6 | 2411/ 2800 batches | train loss 0.3480275 +| epoch 6 | 2415/ 2800 batches | train loss 0.3662243 +| epoch 6 | 2419/ 2800 batches | train loss 0.4128705 +| epoch 6 | 2423/ 2800 batches | train loss 0.3669134 +| epoch 6 | 2427/ 2800 batches | train loss 0.3738073 +| epoch 6 | 2431/ 2800 batches | train loss 0.3929752 +| epoch 6 | 2435/ 2800 batches | train loss 0.3277262 +| epoch 6 | 2439/ 2800 batches | train loss 0.4537084 +| epoch 6 | 2443/ 2800 batches | train loss 0.3027067 +| epoch 6 | 2447/ 2800 batches | train loss 0.4123296 +| epoch 6 | 2451/ 2800 batches | train loss 0.3949256 +| epoch 6 | 2455/ 2800 batches | train loss 0.3196333 +| epoch 6 | 2459/ 2800 batches | train loss 0.3005551 +| epoch 6 | 2463/ 2800 batches | train loss 0.3863489 +| epoch 6 | 2467/ 2800 batches | train loss 0.3460370 +| epoch 6 | 2471/ 2800 batches | train loss 0.3844752 +| epoch 6 | 2475/ 2800 batches | train loss 0.3445759 +| epoch 6 | 2479/ 2800 batches | train loss 0.3656366 +| epoch 6 | 2483/ 2800 batches | train loss 0.4138638 +| epoch 6 | 2487/ 2800 batches | train loss 0.3699187 +| epoch 6 | 2491/ 2800 batches | train loss 0.4035138 +| epoch 6 | 2495/ 2800 batches | train loss 0.3757805 +| epoch 6 | 2499/ 2800 batches | train loss 0.3756520 +| epoch 6 | 2503/ 2800 batches | train loss 0.3900161 +| epoch 6 | 2507/ 2800 batches | train loss 0.3976161 +| epoch 6 | 2511/ 2800 batches | train loss 0.4230070 +| epoch 6 | 2515/ 2800 batches | train loss 0.4190249 +| epoch 6 | 2519/ 2800 batches | train loss 0.3698575 +| epoch 6 | 2523/ 2800 batches | train loss 0.3904724 +| epoch 6 | 2527/ 2800 batches | train loss 0.3193843 +| epoch 6 | 2531/ 2800 batches | train loss 0.3755537 +| epoch 6 | 2535/ 2800 batches | train loss 0.4543418 +| epoch 6 | 2539/ 2800 batches | train loss 0.4197856 +| epoch 6 | 2543/ 2800 batches | train loss 0.3761985 +| epoch 6 | 2547/ 2800 batches | train loss 0.4595512 +| epoch 6 | 2551/ 2800 batches | train loss 0.3375363 +| epoch 6 | 2555/ 2800 batches | train loss 0.4448138 +| epoch 6 | 2559/ 2800 batches | train loss 0.3291956 +| epoch 6 | 2563/ 2800 batches | train loss 0.3465982 +| epoch 6 | 2567/ 2800 batches | train loss 0.3476825 +| epoch 6 | 2571/ 2800 batches | train loss 0.3226331 +| epoch 6 | 2575/ 2800 batches | train loss 0.3877332 +| epoch 6 | 2579/ 2800 batches | train loss 0.3512495 +| epoch 6 | 2583/ 2800 batches | train loss 0.4212583 +| epoch 6 | 2587/ 2800 batches | train loss 0.3855073 +| epoch 6 | 2591/ 2800 batches | train loss 0.3568057 +| epoch 6 | 2595/ 2800 batches | train loss 0.4522595 +| epoch 6 | 2599/ 2800 batches | train loss 0.3216383 +| epoch 6 | 2603/ 2800 batches | train loss 0.3837944 +| epoch 6 | 2607/ 2800 batches | train loss 0.4028583 +| epoch 6 | 2611/ 2800 batches | train loss 0.3869968 +| epoch 6 | 2615/ 2800 batches | train loss 0.3973718 +| epoch 6 | 2619/ 2800 batches | train loss 0.3831848 +| epoch 6 | 2623/ 2800 batches | train loss 0.3908134 +| epoch 6 | 2627/ 2800 batches | train loss 0.3979803 +| epoch 6 | 2631/ 2800 batches | train loss 0.3991202 +| epoch 6 | 2635/ 2800 batches | train loss 0.3906385 +| epoch 6 | 2639/ 2800 batches | train loss 0.5029713 +| epoch 6 | 2643/ 2800 batches | train loss 0.3598906 +| epoch 6 | 2647/ 2800 batches | train loss 0.4357992 +| epoch 6 | 2651/ 2800 batches | train loss 0.4054654 +| epoch 6 | 2655/ 2800 batches | train loss 0.4149531 +| epoch 6 | 2659/ 2800 batches | train loss 0.4317974 +| epoch 6 | 2663/ 2800 batches | train loss 0.4721629 +| epoch 6 | 2667/ 2800 batches | train loss 0.3920015 +| epoch 6 | 2671/ 2800 batches | train loss 0.4120203 +| epoch 6 | 2675/ 2800 batches | train loss 0.4227551 +| epoch 6 | 2679/ 2800 batches | train loss 0.3676787 +| epoch 6 | 2683/ 2800 batches | train loss 0.4175317 +| epoch 6 | 2687/ 2800 batches | train loss 0.3959782 +| epoch 6 | 2691/ 2800 batches | train loss 0.4127617 +| epoch 6 | 2695/ 2800 batches | train loss 0.3941715 +| epoch 6 | 2699/ 2800 batches | train loss 0.3551510 +| epoch 6 | 2703/ 2800 batches | train loss 0.3654213 +| epoch 6 | 2707/ 2800 batches | train loss 0.3261493 +| epoch 6 | 2711/ 2800 batches | train loss 0.4217894 +| epoch 6 | 2715/ 2800 batches | train loss 0.3999910 +| epoch 6 | 2719/ 2800 batches | train loss 0.3711165 +| epoch 6 | 2723/ 2800 batches | train loss 0.3583169 +| epoch 6 | 2727/ 2800 batches | train loss 0.4155502 +| epoch 6 | 2731/ 2800 batches | train loss 0.3580269 +| epoch 6 | 2735/ 2800 batches | train loss 0.3908138 +| epoch 6 | 2739/ 2800 batches | train loss 0.4574762 +| epoch 6 | 2743/ 2800 batches | train loss 0.4681781 +| epoch 6 | 2747/ 2800 batches | train loss 0.4038841 +| epoch 6 | 2751/ 2800 batches | train loss 0.3921337 +| epoch 6 | 2755/ 2800 batches | train loss 0.4289887 +| epoch 6 | 2759/ 2800 batches | train loss 0.3645599 +| epoch 6 | 2763/ 2800 batches | train loss 0.4392399 +| epoch 6 | 2767/ 2800 batches | train loss 0.3164409 +| epoch 6 | 2771/ 2800 batches | train loss 0.4070086 +| epoch 6 | 2775/ 2800 batches | train loss 0.4355505 +| epoch 6 | 2779/ 2800 batches | train loss 0.3398313 +| epoch 6 | 2783/ 2800 batches | train loss 0.3549770 +| epoch 6 | 2787/ 2800 batches | train loss 0.3846818 +| epoch 6 | 2791/ 2800 batches | train loss 0.4817479 +| epoch 6 | 2795/ 2800 batches | train loss 0.4005568 +| epoch 6 | 2799/ 2800 batches | train loss 0.3652272 +-------------------------------------------------------------------------------- +| epoch 6 | 3/ 2800 batches | test loss 0.4490427 +| epoch 6 | 7/ 2800 batches | test loss 0.4341687 +| epoch 6 | 11/ 2800 batches | test loss 0.3096265 +| epoch 6 | 15/ 2800 batches | test loss 0.4200359 +| epoch 6 | 19/ 2800 batches | test loss 0.4667063 +| epoch 6 | 23/ 2800 batches | test loss 0.4574357 +| epoch 6 | 27/ 2800 batches | test loss 0.4181734 +| epoch 6 | 31/ 2800 batches | test loss 0.3897303 +| epoch 6 | 35/ 2800 batches | test loss 0.4069260 +| epoch 6 | 39/ 2800 batches | test loss 0.4548666 +| epoch 6 | 43/ 2800 batches | test loss 0.4466105 +| epoch 6 | 47/ 2800 batches | test loss 0.4362918 +| epoch 6 | 51/ 2800 batches | test loss 0.5471625 +| epoch 6 | 55/ 2800 batches | test loss 0.5461363 +| epoch 6 | 59/ 2800 batches | test loss 0.3952306 +| epoch 6 | 63/ 2800 batches | test loss 0.4362592 +| epoch 6 | 67/ 2800 batches | test loss 0.4297116 +| epoch 6 | 71/ 2800 batches | test loss 0.5484987 +| epoch 6 | 75/ 2800 batches | test loss 0.3372541 +| epoch 6 | 79/ 2800 batches | test loss 0.4293182 +| epoch 6 | 83/ 2800 batches | test loss 0.4871223 +| epoch 6 | 87/ 2800 batches | test loss 0.3878674 +| epoch 6 | 91/ 2800 batches | test loss 0.4693552 +| epoch 6 | 95/ 2800 batches | test loss 0.4017314 +| epoch 6 | 99/ 2800 batches | test loss 0.4839148 +| epoch 6 | 103/ 2800 batches | test loss 0.4279189 +| epoch 6 | 107/ 2800 batches | test loss 0.4500915 +| epoch 6 | 111/ 2800 batches | test loss 0.4538133 +| epoch 6 | 115/ 2800 batches | test loss 0.4875496 +| epoch 6 | 119/ 2800 batches | test loss 0.4319398 +| epoch 6 | 123/ 2800 batches | test loss 0.3755554 +| epoch 6 | 127/ 2800 batches | test loss 0.4575383 +| epoch 6 | 131/ 2800 batches | test loss 0.3991042 +| epoch 6 | 135/ 2800 batches | test loss 0.4796314 +| epoch 6 | 139/ 2800 batches | test loss 0.4715561 +| epoch 6 | 143/ 2800 batches | test loss 0.4315991 +| epoch 6 | 147/ 2800 batches | test loss 0.5703436 +| epoch 6 | 151/ 2800 batches | test loss 0.4217934 +| epoch 6 | 155/ 2800 batches | test loss 0.5101433 +| epoch 6 | 159/ 2800 batches | test loss 0.3798362 +| epoch 6 | 163/ 2800 batches | test loss 0.4611795 +| epoch 6 | 167/ 2800 batches | test loss 0.4244871 +| epoch 6 | 171/ 2800 batches | test loss 0.4826043 +| epoch 6 | 175/ 2800 batches | test loss 0.4047811 +| epoch 6 | 179/ 2800 batches | test loss 0.4426713 +| epoch 6 | 183/ 2800 batches | test loss 0.4565427 +| epoch 6 | 187/ 2800 batches | test loss 0.4616451 +| epoch 6 | 191/ 2800 batches | test loss 0.4869815 +| epoch 6 | 195/ 2800 batches | test loss 0.3585598 +| epoch 6 | 199/ 2800 batches | test loss 0.4965184 +| epoch 6 | 203/ 2800 batches | test loss 0.4250640 +| epoch 6 | 207/ 2800 batches | test loss 0.3883275 +| epoch 6 | 211/ 2800 batches | test loss 0.5496306 +| epoch 6 | 215/ 2800 batches | test loss 0.4124213 +| epoch 6 | 219/ 2800 batches | test loss 0.5134141 +| epoch 6 | 223/ 2800 batches | test loss 0.5084510 +| epoch 6 | 227/ 2800 batches | test loss 0.5795346 +| epoch 6 | 231/ 2800 batches | test loss 0.4981308 +| epoch 6 | 235/ 2800 batches | test loss 0.4182048 +| epoch 6 | 239/ 2800 batches | test loss 0.6051588 +| epoch 6 | 243/ 2800 batches | test loss 0.4033242 +| epoch 6 | 247/ 2800 batches | test loss 0.5520929 +| epoch 6 | 251/ 2800 batches | test loss 0.5737986 +| epoch 6 | 255/ 2800 batches | test loss 0.4686020 +| epoch 6 | 259/ 2800 batches | test loss 0.5671042 +| epoch 6 | 263/ 2800 batches | test loss 0.3856773 +| epoch 6 | 267/ 2800 batches | test loss 0.4794413 +| epoch 6 | 271/ 2800 batches | test loss 0.4893323 +| epoch 6 | 275/ 2800 batches | test loss 0.4570268 +| epoch 6 | 279/ 2800 batches | test loss 0.6006893 +| epoch 6 | 283/ 2800 batches | test loss 0.4348801 +| epoch 6 | 287/ 2800 batches | test loss 0.4118937 +| epoch 6 | 291/ 2800 batches | test loss 0.4619255 +| epoch 6 | 295/ 2800 batches | test loss 0.4010648 +| epoch 6 | 299/ 2800 batches | test loss 0.4759617 +| epoch 6 | 303/ 2800 batches | test loss 0.5405874 +| epoch 6 | 307/ 2800 batches | test loss 0.5436129 +| epoch 6 | 311/ 2800 batches | test loss 0.4135492 +| epoch 6 | 315/ 2800 batches | test loss 0.4350455 +| epoch 6 | 319/ 2800 batches | test loss 0.4259388 +| epoch 6 | 323/ 2800 batches | test loss 0.4408053 +| epoch 6 | 327/ 2800 batches | test loss 0.4974714 +| epoch 6 | 331/ 2800 batches | test loss 0.5638800 +| epoch 6 | 335/ 2800 batches | test loss 0.3864645 +| epoch 6 | 339/ 2800 batches | test loss 0.4362435 +| epoch 6 | 343/ 2800 batches | test loss 0.4574401 +| epoch 6 | 347/ 2800 batches | test loss 0.4473535 +| epoch 6 | 351/ 2800 batches | test loss 0.4937421 +| epoch 6 | 355/ 2800 batches | test loss 0.4886155 +| epoch 6 | 359/ 2800 batches | test loss 0.4587506 +| epoch 6 | 363/ 2800 batches | test loss 0.5226493 +| epoch 6 | 367/ 2800 batches | test loss 0.4287481 +| epoch 6 | 371/ 2800 batches | test loss 0.4503865 +| epoch 6 | 375/ 2800 batches | test loss 0.3781777 +| epoch 6 | 379/ 2800 batches | test loss 0.5032505 +| epoch 6 | 383/ 2800 batches | test loss 0.4362425 +| epoch 6 | 387/ 2800 batches | test loss 0.4966731 +| epoch 6 | 391/ 2800 batches | test loss 0.4888715 +| epoch 6 | 395/ 2800 batches | test loss 0.5532181 +| epoch 6 | 399/ 2800 batches | test loss 0.3891572 +| epoch 6 | 403/ 2800 batches | test loss 0.4409358 +| epoch 6 | 407/ 2800 batches | test loss 0.4666986 +| epoch 6 | 411/ 2800 batches | test loss 0.4490708 +| epoch 6 | 415/ 2800 batches | test loss 0.4858993 +| epoch 6 | 419/ 2800 batches | test loss 0.5941558 +| epoch 6 | 423/ 2800 batches | test loss 0.4072019 +| epoch 6 | 427/ 2800 batches | test loss 0.4453407 +| epoch 6 | 431/ 2800 batches | test loss 0.4819256 +| epoch 6 | 435/ 2800 batches | test loss 0.4776165 +| epoch 6 | 439/ 2800 batches | test loss 0.4279676 +| epoch 6 | 443/ 2800 batches | test loss 0.4322526 +| epoch 6 | 447/ 2800 batches | test loss 0.4121982 +| epoch 6 | 451/ 2800 batches | test loss 0.4373511 +| epoch 6 | 455/ 2800 batches | test loss 0.4359653 +| epoch 6 | 459/ 2800 batches | test loss 0.3840516 +| epoch 6 | 463/ 2800 batches | test loss 0.4259804 +| epoch 6 | 467/ 2800 batches | test loss 0.3800870 +| epoch 6 | 471/ 2800 batches | test loss 0.3362842 +| epoch 6 | 475/ 2800 batches | test loss 0.4171779 +| epoch 6 | 479/ 2800 batches | test loss 0.4150624 +| epoch 6 | 483/ 2800 batches | test loss 0.4124261 +| epoch 6 | 487/ 2800 batches | test loss 0.4369906 +| epoch 6 | 491/ 2800 batches | test loss 0.4653218 +| epoch 6 | 495/ 2800 batches | test loss 0.4432923 +| epoch 6 | 499/ 2800 batches | test loss 0.5849345 +| epoch 6 | 503/ 2800 batches | test loss 0.5644259 +| epoch 6 | 507/ 2800 batches | test loss 0.3978911 +| epoch 6 | 511/ 2800 batches | test loss 0.5511426 +| epoch 6 | 515/ 2800 batches | test loss 0.5014846 +| epoch 6 | 519/ 2800 batches | test loss 0.4103953 +| epoch 6 | 523/ 2800 batches | test loss 0.4105698 +| epoch 6 | 527/ 2800 batches | test loss 0.4209135 +| epoch 6 | 531/ 2800 batches | test loss 0.4458861 +| epoch 6 | 535/ 2800 batches | test loss 0.4564019 +| epoch 6 | 539/ 2800 batches | test loss 0.4837489 +| epoch 6 | 543/ 2800 batches | test loss 0.4810838 +| epoch 6 | 547/ 2800 batches | test loss 0.4267504 +| epoch 6 | 551/ 2800 batches | test loss 0.4375775 +| epoch 6 | 555/ 2800 batches | test loss 0.4300041 +| epoch 6 | 559/ 2800 batches | test loss 0.4345038 +| epoch 6 | 563/ 2800 batches | test loss 0.5306202 +| epoch 6 | 567/ 2800 batches | test loss 0.4365491 +| epoch 6 | 571/ 2800 batches | test loss 0.4908713 +| epoch 6 | 575/ 2800 batches | test loss 0.3945586 +| epoch 6 | 579/ 2800 batches | test loss 0.4243883 +| epoch 6 | 583/ 2800 batches | test loss 0.5005120 +| epoch 6 | 587/ 2800 batches | test loss 0.4437026 +| epoch 6 | 591/ 2800 batches | test loss 0.4224835 +| epoch 6 | 595/ 2800 batches | test loss 0.4962125 +| epoch 6 | 599/ 2800 batches | test loss 0.4162090 +| epoch 6 | 603/ 2800 batches | test loss 0.4392630 +| epoch 6 | 607/ 2800 batches | test loss 0.5577253 +| epoch 6 | 611/ 2800 batches | test loss 0.4006222 +| epoch 6 | 615/ 2800 batches | test loss 0.3492247 +| epoch 6 | 619/ 2800 batches | test loss 0.4134925 +| epoch 6 | 623/ 2800 batches | test loss 0.4700774 +| epoch 6 | 627/ 2800 batches | test loss 0.4155279 +| epoch 6 | 631/ 2800 batches | test loss 0.5145572 +| epoch 6 | 635/ 2800 batches | test loss 0.4902973 +| epoch 6 | 639/ 2800 batches | test loss 0.4914753 +| epoch 6 | 643/ 2800 batches | test loss 0.4872665 +| epoch 6 | 647/ 2800 batches | test loss 0.4975351 +| epoch 6 | 651/ 2800 batches | test loss 0.4750877 +| epoch 6 | 655/ 2800 batches | test loss 0.4587968 +| epoch 6 | 659/ 2800 batches | test loss 0.4771199 +| epoch 6 | 663/ 2800 batches | test loss 0.4027244 +| epoch 6 | 667/ 2800 batches | test loss 0.6157579 +| epoch 6 | 671/ 2800 batches | test loss 0.3713360 +| epoch 6 | 675/ 2800 batches | test loss 0.5225878 +| epoch 6 | 679/ 2800 batches | test loss 0.4679001 +| epoch 6 | 683/ 2800 batches | test loss 0.4282135 +| epoch 6 | 687/ 2800 batches | test loss 0.4532724 +| epoch 6 | 691/ 2800 batches | test loss 0.4302177 +| epoch 6 | 695/ 2800 batches | test loss 0.4874991 +| epoch 6 | 699/ 2800 batches | test loss 0.4396578 +| epoch 6 | final test loss 0.4601, do not save model! +-------------------------------------------------------------------------------- +| epoch 7 | 3/ 2800 batches | train loss 0.3979583 +| epoch 7 | 7/ 2800 batches | train loss 0.3192588 +| epoch 7 | 11/ 2800 batches | train loss 0.3922989 +| epoch 7 | 15/ 2800 batches | train loss 0.4065014 +| epoch 7 | 19/ 2800 batches | train loss 0.3704165 +| epoch 7 | 23/ 2800 batches | train loss 0.3501792 +| epoch 7 | 27/ 2800 batches | train loss 0.3422261 +| epoch 7 | 31/ 2800 batches | train loss 0.3401504 +| epoch 7 | 35/ 2800 batches | train loss 0.3556501 +| epoch 7 | 39/ 2800 batches | train loss 0.4266050 +| epoch 7 | 43/ 2800 batches | train loss 0.3753300 +| epoch 7 | 47/ 2800 batches | train loss 0.4523987 +| epoch 7 | 51/ 2800 batches | train loss 0.3425301 +| epoch 7 | 55/ 2800 batches | train loss 0.2573217 +| epoch 7 | 59/ 2800 batches | train loss 0.3443500 +| epoch 7 | 63/ 2800 batches | train loss 0.3910050 +| epoch 7 | 67/ 2800 batches | train loss 0.3898488 +| epoch 7 | 71/ 2800 batches | train loss 0.2856963 +| epoch 7 | 75/ 2800 batches | train loss 0.3443674 +| epoch 7 | 79/ 2800 batches | train loss 0.2637004 +| epoch 7 | 83/ 2800 batches | train loss 0.2431349 +| epoch 7 | 87/ 2800 batches | train loss 0.3517731 +| epoch 7 | 91/ 2800 batches | train loss 0.3454834 +| epoch 7 | 95/ 2800 batches | train loss 0.3281415 +| epoch 7 | 99/ 2800 batches | train loss 0.1331854 +| epoch 7 | 103/ 2800 batches | train loss 0.3621056 +| epoch 7 | 107/ 2800 batches | train loss 0.3689624 +| epoch 7 | 111/ 2800 batches | train loss 0.3436530 +| epoch 7 | 115/ 2800 batches | train loss 0.3103422 +| epoch 7 | 119/ 2800 batches | train loss 0.3483969 +| epoch 7 | 123/ 2800 batches | train loss 0.3443944 +| epoch 7 | 127/ 2800 batches | train loss 0.3281771 +| epoch 7 | 131/ 2800 batches | train loss 0.3864089 +| epoch 7 | 135/ 2800 batches | train loss 0.3853753 +| epoch 7 | 139/ 2800 batches | train loss 0.3878759 +| epoch 7 | 143/ 2800 batches | train loss 0.3444669 +| epoch 7 | 147/ 2800 batches | train loss 0.3517868 +| epoch 7 | 151/ 2800 batches | train loss 0.3688580 +| epoch 7 | 155/ 2800 batches | train loss 0.2924701 +| epoch 7 | 159/ 2800 batches | train loss 0.2994976 +| epoch 7 | 163/ 2800 batches | train loss 0.3324713 +| epoch 7 | 167/ 2800 batches | train loss 0.3157297 +| epoch 7 | 171/ 2800 batches | train loss 0.3594978 +| epoch 7 | 175/ 2800 batches | train loss 0.3865038 +| epoch 7 | 179/ 2800 batches | train loss 0.3939027 +| epoch 7 | 183/ 2800 batches | train loss 0.3098149 +| epoch 7 | 187/ 2800 batches | train loss 0.3503577 +| epoch 7 | 191/ 2800 batches | train loss 0.3043551 +| epoch 7 | 195/ 2800 batches | train loss 0.3376508 +| epoch 7 | 199/ 2800 batches | train loss 0.4125554 +| epoch 7 | 203/ 2800 batches | train loss 0.3655473 +| epoch 7 | 207/ 2800 batches | train loss 0.3706267 +| epoch 7 | 211/ 2800 batches | train loss 0.3403476 +| epoch 7 | 215/ 2800 batches | train loss 0.3501659 +| epoch 7 | 219/ 2800 batches | train loss 0.2859934 +| epoch 7 | 223/ 2800 batches | train loss 0.3503450 +| epoch 7 | 227/ 2800 batches | train loss 0.4424088 +| epoch 7 | 231/ 2800 batches | train loss 0.3881370 +| epoch 7 | 235/ 2800 batches | train loss 0.2933549 +| epoch 7 | 239/ 2800 batches | train loss 0.3784387 +| epoch 7 | 243/ 2800 batches | train loss 0.4190338 +| epoch 7 | 247/ 2800 batches | train loss 0.3924004 +| epoch 7 | 251/ 2800 batches | train loss 0.3764642 +| epoch 7 | 255/ 2800 batches | train loss 0.3704675 +| epoch 7 | 259/ 2800 batches | train loss 0.3523987 +| epoch 7 | 263/ 2800 batches | train loss 0.3203860 +| epoch 7 | 267/ 2800 batches | train loss 0.3579287 +| epoch 7 | 271/ 2800 batches | train loss 0.3304997 +| epoch 7 | 275/ 2800 batches | train loss 0.3773566 +| epoch 7 | 279/ 2800 batches | train loss 0.3276847 +| epoch 7 | 283/ 2800 batches | train loss 0.3488652 +| epoch 7 | 287/ 2800 batches | train loss 0.3402678 +| epoch 7 | 291/ 2800 batches | train loss 0.3171011 +| epoch 7 | 295/ 2800 batches | train loss 0.3489657 +| epoch 7 | 299/ 2800 batches | train loss 0.3648134 +| epoch 7 | 303/ 2800 batches | train loss 0.3587608 +| epoch 7 | 307/ 2800 batches | train loss 0.3583444 +| epoch 7 | 311/ 2800 batches | train loss 0.3530189 +| epoch 7 | 315/ 2800 batches | train loss 0.3642778 +| epoch 7 | 319/ 2800 batches | train loss 0.3340139 +| epoch 7 | 323/ 2800 batches | train loss 0.3256389 +| epoch 7 | 327/ 2800 batches | train loss 0.3033928 +| epoch 7 | 331/ 2800 batches | train loss 0.3878213 +| epoch 7 | 335/ 2800 batches | train loss 0.3982599 +| epoch 7 | 339/ 2800 batches | train loss 0.3717906 +| epoch 7 | 343/ 2800 batches | train loss 0.3327609 +| epoch 7 | 347/ 2800 batches | train loss 0.3567959 +| epoch 7 | 351/ 2800 batches | train loss 0.4116335 +| epoch 7 | 355/ 2800 batches | train loss 0.3937225 +| epoch 7 | 359/ 2800 batches | train loss 0.3775832 +| epoch 7 | 363/ 2800 batches | train loss 0.2945811 +| epoch 7 | 367/ 2800 batches | train loss 0.3516893 +| epoch 7 | 371/ 2800 batches | train loss 0.3056087 +| epoch 7 | 375/ 2800 batches | train loss 0.3972133 +| epoch 7 | 379/ 2800 batches | train loss 0.3583986 +| epoch 7 | 383/ 2800 batches | train loss 0.3233756 +| epoch 7 | 387/ 2800 batches | train loss 0.3362387 +| epoch 7 | 391/ 2800 batches | train loss 0.3558343 +| epoch 7 | 395/ 2800 batches | train loss 0.3670427 +| epoch 7 | 399/ 2800 batches | train loss 0.2900288 +| epoch 7 | 403/ 2800 batches | train loss 0.3529023 +| epoch 7 | 407/ 2800 batches | train loss 0.3042620 +| epoch 7 | 411/ 2800 batches | train loss 0.3658398 +| epoch 7 | 415/ 2800 batches | train loss 0.3354307 +| epoch 7 | 419/ 2800 batches | train loss 0.3503404 +| epoch 7 | 423/ 2800 batches | train loss 0.3659561 +| epoch 7 | 427/ 2800 batches | train loss 0.3071237 +| epoch 7 | 431/ 2800 batches | train loss 0.3400118 +| epoch 7 | 435/ 2800 batches | train loss 0.3597683 +| epoch 7 | 439/ 2800 batches | train loss 0.3688426 +| epoch 7 | 443/ 2800 batches | train loss 0.3285468 +| epoch 7 | 447/ 2800 batches | train loss 0.3199904 +| epoch 7 | 451/ 2800 batches | train loss 0.3418738 +| epoch 7 | 455/ 2800 batches | train loss 0.3515806 +| epoch 7 | 459/ 2800 batches | train loss 0.3831892 +| epoch 7 | 463/ 2800 batches | train loss 0.3912745 +| epoch 7 | 467/ 2800 batches | train loss 0.3139553 +| epoch 7 | 471/ 2800 batches | train loss 0.3119042 +| epoch 7 | 475/ 2800 batches | train loss 0.3024866 +| epoch 7 | 479/ 2800 batches | train loss 0.3037522 +| epoch 7 | 483/ 2800 batches | train loss 0.3698918 +| epoch 7 | 487/ 2800 batches | train loss 0.2896712 +| epoch 7 | 491/ 2800 batches | train loss 0.3229790 +| epoch 7 | 495/ 2800 batches | train loss 0.3821739 +| epoch 7 | 499/ 2800 batches | train loss 0.3724327 +| epoch 7 | 503/ 2800 batches | train loss 0.3513483 +| epoch 7 | 507/ 2800 batches | train loss 0.3159399 +| epoch 7 | 511/ 2800 batches | train loss 0.2923925 +| epoch 7 | 515/ 2800 batches | train loss 0.3194228 +| epoch 7 | 519/ 2800 batches | train loss 0.3357458 +| epoch 7 | 523/ 2800 batches | train loss 0.3519959 +| epoch 7 | 527/ 2800 batches | train loss 0.3738103 +| epoch 7 | 531/ 2800 batches | train loss 0.3933611 +| epoch 7 | 535/ 2800 batches | train loss 0.3564531 +| epoch 7 | 539/ 2800 batches | train loss 0.3731979 +| epoch 7 | 543/ 2800 batches | train loss 0.3686365 +| epoch 7 | 547/ 2800 batches | train loss 0.3268543 +| epoch 7 | 551/ 2800 batches | train loss 0.4311295 +| epoch 7 | 555/ 2800 batches | train loss 0.3719237 +| epoch 7 | 559/ 2800 batches | train loss 0.3275846 +| epoch 7 | 563/ 2800 batches | train loss 0.4125631 +| epoch 7 | 567/ 2800 batches | train loss 0.3383156 +| epoch 7 | 571/ 2800 batches | train loss 0.3610655 +| epoch 7 | 575/ 2800 batches | train loss 0.3156918 +| epoch 7 | 579/ 2800 batches | train loss 0.3603489 +| epoch 7 | 583/ 2800 batches | train loss 0.4237631 +| epoch 7 | 587/ 2800 batches | train loss 0.3873977 +| epoch 7 | 591/ 2800 batches | train loss 0.3668207 +| epoch 7 | 595/ 2800 batches | train loss 0.3556915 +| epoch 7 | 599/ 2800 batches | train loss 0.4230815 +| epoch 7 | 603/ 2800 batches | train loss 0.3456677 +| epoch 7 | 607/ 2800 batches | train loss 0.3241155 +| epoch 7 | 611/ 2800 batches | train loss 0.3490457 +| epoch 7 | 615/ 2800 batches | train loss 0.3076209 +| epoch 7 | 619/ 2800 batches | train loss 0.3633438 +| epoch 7 | 623/ 2800 batches | train loss 0.3534574 +| epoch 7 | 627/ 2800 batches | train loss 0.3834143 +| epoch 7 | 631/ 2800 batches | train loss 0.4258060 +| epoch 7 | 635/ 2800 batches | train loss 0.3701265 +| epoch 7 | 639/ 2800 batches | train loss 0.1432900 +| epoch 7 | 643/ 2800 batches | train loss 0.3823779 +| epoch 7 | 647/ 2800 batches | train loss 0.3205299 +| epoch 7 | 651/ 2800 batches | train loss 0.3554977 +| epoch 7 | 655/ 2800 batches | train loss 0.3574773 +| epoch 7 | 659/ 2800 batches | train loss 0.3339954 +| epoch 7 | 663/ 2800 batches | train loss 0.3416296 +| epoch 7 | 667/ 2800 batches | train loss 0.3213316 +| epoch 7 | 671/ 2800 batches | train loss 0.2785554 +| epoch 7 | 675/ 2800 batches | train loss 0.3271278 +| epoch 7 | 679/ 2800 batches | train loss 0.3465745 +| epoch 7 | 683/ 2800 batches | train loss 0.4390594 +| epoch 7 | 687/ 2800 batches | train loss 0.3398566 +| epoch 7 | 691/ 2800 batches | train loss 0.3708261 +| epoch 7 | 695/ 2800 batches | train loss 0.3761269 +| epoch 7 | 699/ 2800 batches | train loss 0.3644633 +| epoch 7 | 703/ 2800 batches | train loss 0.3872825 +| epoch 7 | 707/ 2800 batches | train loss 0.2844560 +| epoch 7 | 711/ 2800 batches | train loss 0.3597217 +| epoch 7 | 715/ 2800 batches | train loss 0.3194134 +| epoch 7 | 719/ 2800 batches | train loss 0.3286649 +| epoch 7 | 723/ 2800 batches | train loss 0.4255371 +| epoch 7 | 727/ 2800 batches | train loss 0.3551923 +| epoch 7 | 731/ 2800 batches | train loss 0.3526565 +| epoch 7 | 735/ 2800 batches | train loss 0.3342923 +| epoch 7 | 739/ 2800 batches | train loss 0.3632864 +| epoch 7 | 743/ 2800 batches | train loss 0.2863384 +| epoch 7 | 747/ 2800 batches | train loss 0.4157819 +| epoch 7 | 751/ 2800 batches | train loss 0.3683413 +| epoch 7 | 755/ 2800 batches | train loss 0.2748213 +| epoch 7 | 759/ 2800 batches | train loss 0.2969137 +| epoch 7 | 763/ 2800 batches | train loss 0.3161058 +| epoch 7 | 767/ 2800 batches | train loss 0.4072548 +| epoch 7 | 771/ 2800 batches | train loss 0.3493504 +| epoch 7 | 775/ 2800 batches | train loss 0.2928171 +| epoch 7 | 779/ 2800 batches | train loss 0.2541188 +| epoch 7 | 783/ 2800 batches | train loss 0.3122895 +| epoch 7 | 787/ 2800 batches | train loss 0.3704069 +| epoch 7 | 791/ 2800 batches | train loss 0.4313180 +| epoch 7 | 795/ 2800 batches | train loss 0.3374669 +| epoch 7 | 799/ 2800 batches | train loss 0.3914965 +| epoch 7 | 803/ 2800 batches | train loss 0.3808547 +| epoch 7 | 807/ 2800 batches | train loss 0.3812973 +| epoch 7 | 811/ 2800 batches | train loss 0.3296817 +| epoch 7 | 815/ 2800 batches | train loss 0.3321380 +| epoch 7 | 819/ 2800 batches | train loss 0.3547651 +| epoch 7 | 823/ 2800 batches | train loss 0.3995493 +| epoch 7 | 827/ 2800 batches | train loss 0.3539250 +| epoch 7 | 831/ 2800 batches | train loss 0.3459341 +| epoch 7 | 835/ 2800 batches | train loss 0.4476006 +| epoch 7 | 839/ 2800 batches | train loss 0.3721794 +| epoch 7 | 843/ 2800 batches | train loss 0.3945993 +| epoch 7 | 847/ 2800 batches | train loss 0.4098607 +| epoch 7 | 851/ 2800 batches | train loss 0.3647668 +| epoch 7 | 855/ 2800 batches | train loss 0.4132689 +| epoch 7 | 859/ 2800 batches | train loss 0.3897432 +| epoch 7 | 863/ 2800 batches | train loss 0.3362224 +| epoch 7 | 867/ 2800 batches | train loss 0.3879572 +| epoch 7 | 871/ 2800 batches | train loss 0.3295215 +| epoch 7 | 875/ 2800 batches | train loss 0.4609173 +| epoch 7 | 879/ 2800 batches | train loss 0.3884865 +| epoch 7 | 883/ 2800 batches | train loss 0.4148248 +| epoch 7 | 887/ 2800 batches | train loss 0.3231266 +| epoch 7 | 891/ 2800 batches | train loss 0.3819892 +| epoch 7 | 895/ 2800 batches | train loss 0.4109296 +| epoch 7 | 899/ 2800 batches | train loss 0.2733878 +| epoch 7 | 903/ 2800 batches | train loss 0.3801827 +| epoch 7 | 907/ 2800 batches | train loss 0.3851913 +| epoch 7 | 911/ 2800 batches | train loss 0.3910713 +| epoch 7 | 915/ 2800 batches | train loss 0.4087944 +| epoch 7 | 919/ 2800 batches | train loss 0.3539233 +| epoch 7 | 923/ 2800 batches | train loss 0.3735571 +| epoch 7 | 927/ 2800 batches | train loss 0.3368691 +| epoch 7 | 931/ 2800 batches | train loss 0.3898373 +| epoch 7 | 935/ 2800 batches | train loss 0.3615609 +| epoch 7 | 939/ 2800 batches | train loss 0.4529414 +| epoch 7 | 943/ 2800 batches | train loss 0.3797410 +| epoch 7 | 947/ 2800 batches | train loss 0.3721742 +| epoch 7 | 951/ 2800 batches | train loss 0.3472726 +| epoch 7 | 955/ 2800 batches | train loss 0.3566748 +| epoch 7 | 959/ 2800 batches | train loss 0.3459560 +| epoch 7 | 963/ 2800 batches | train loss 0.3870173 +| epoch 7 | 967/ 2800 batches | train loss 0.3967004 +| epoch 7 | 971/ 2800 batches | train loss 0.3361791 +| epoch 7 | 975/ 2800 batches | train loss 0.3512806 +| epoch 7 | 979/ 2800 batches | train loss 0.2856334 +| epoch 7 | 983/ 2800 batches | train loss 0.4633467 +| epoch 7 | 987/ 2800 batches | train loss 0.2838400 +| epoch 7 | 991/ 2800 batches | train loss 0.3710042 +| epoch 7 | 995/ 2800 batches | train loss 0.4000718 +| epoch 7 | 999/ 2800 batches | train loss 0.4635372 +| epoch 7 | 1003/ 2800 batches | train loss 0.3312089 +| epoch 7 | 1007/ 2800 batches | train loss 0.3890681 +| epoch 7 | 1011/ 2800 batches | train loss 0.3656256 +| epoch 7 | 1015/ 2800 batches | train loss 0.3603380 +| epoch 7 | 1019/ 2800 batches | train loss 0.3011145 +| epoch 7 | 1023/ 2800 batches | train loss 0.3865079 +| epoch 7 | 1027/ 2800 batches | train loss 0.3328848 +| epoch 7 | 1031/ 2800 batches | train loss 0.3566124 +| epoch 7 | 1035/ 2800 batches | train loss 0.3152870 +| epoch 7 | 1039/ 2800 batches | train loss 0.3108158 +| epoch 7 | 1043/ 2800 batches | train loss 0.3265983 +| epoch 7 | 1047/ 2800 batches | train loss 0.3420712 +| epoch 7 | 1051/ 2800 batches | train loss 0.3513597 +| epoch 7 | 1055/ 2800 batches | train loss 0.3138526 +| epoch 7 | 1059/ 2800 batches | train loss 0.4279386 +| epoch 7 | 1063/ 2800 batches | train loss 0.5111067 +| epoch 7 | 1067/ 2800 batches | train loss 0.3935052 +| epoch 7 | 1071/ 2800 batches | train loss 0.3050683 +| epoch 7 | 1075/ 2800 batches | train loss 0.3807118 +| epoch 7 | 1079/ 2800 batches | train loss 0.3205119 +| epoch 7 | 1083/ 2800 batches | train loss 0.4698197 +| epoch 7 | 1087/ 2800 batches | train loss 0.3698249 +| epoch 7 | 1091/ 2800 batches | train loss 0.4113396 +| epoch 7 | 1095/ 2800 batches | train loss 0.3612042 +| epoch 7 | 1099/ 2800 batches | train loss 0.3334888 +| epoch 7 | 1103/ 2800 batches | train loss 0.3733801 +| epoch 7 | 1107/ 2800 batches | train loss 0.2507488 +| epoch 7 | 1111/ 2800 batches | train loss 0.3564855 +| epoch 7 | 1115/ 2800 batches | train loss 0.3582070 +| epoch 7 | 1119/ 2800 batches | train loss 0.3281506 +| epoch 7 | 1123/ 2800 batches | train loss 0.4264738 +| epoch 7 | 1127/ 2800 batches | train loss 0.3112258 +| epoch 7 | 1131/ 2800 batches | train loss 0.4048230 +| epoch 7 | 1135/ 2800 batches | train loss 0.3356898 +| epoch 7 | 1139/ 2800 batches | train loss 0.3145527 +| epoch 7 | 1143/ 2800 batches | train loss 0.3831781 +| epoch 7 | 1147/ 2800 batches | train loss 0.3589555 +| epoch 7 | 1151/ 2800 batches | train loss 0.3692595 +| epoch 7 | 1155/ 2800 batches | train loss 0.3392597 +| epoch 7 | 1159/ 2800 batches | train loss 0.3803692 +| epoch 7 | 1163/ 2800 batches | train loss 0.3917565 +| epoch 7 | 1167/ 2800 batches | train loss 0.4346440 +| epoch 7 | 1171/ 2800 batches | train loss 0.3552420 +| epoch 7 | 1175/ 2800 batches | train loss 0.4122663 +| epoch 7 | 1179/ 2800 batches | train loss 0.4083019 +| epoch 7 | 1183/ 2800 batches | train loss 0.4101181 +| epoch 7 | 1187/ 2800 batches | train loss 0.3653234 +| epoch 7 | 1191/ 2800 batches | train loss 0.3309298 +| epoch 7 | 1195/ 2800 batches | train loss 0.3701308 +| epoch 7 | 1199/ 2800 batches | train loss 0.3786700 +| epoch 7 | 1203/ 2800 batches | train loss 0.3100041 +| epoch 7 | 1207/ 2800 batches | train loss 0.3692243 +| epoch 7 | 1211/ 2800 batches | train loss 0.3448488 +| epoch 7 | 1215/ 2800 batches | train loss 0.3220535 +| epoch 7 | 1219/ 2800 batches | train loss 0.3368981 +| epoch 7 | 1223/ 2800 batches | train loss 0.4243058 +| epoch 7 | 1227/ 2800 batches | train loss 0.3676628 +| epoch 7 | 1231/ 2800 batches | train loss 0.3635058 +| epoch 7 | 1235/ 2800 batches | train loss 0.4106190 +| epoch 7 | 1239/ 2800 batches | train loss 0.3759998 +| epoch 7 | 1243/ 2800 batches | train loss 0.3182481 +| epoch 7 | 1247/ 2800 batches | train loss 0.3689723 +| epoch 7 | 1251/ 2800 batches | train loss 0.3520553 +| epoch 7 | 1255/ 2800 batches | train loss 0.3367054 +| epoch 7 | 1259/ 2800 batches | train loss 0.3832546 +| epoch 7 | 1263/ 2800 batches | train loss 0.3534153 +| epoch 7 | 1267/ 2800 batches | train loss 0.4060015 +| epoch 7 | 1271/ 2800 batches | train loss 0.3883626 +| epoch 7 | 1275/ 2800 batches | train loss 0.3712791 +| epoch 7 | 1279/ 2800 batches | train loss 0.3098601 +| epoch 7 | 1283/ 2800 batches | train loss 0.3853617 +| epoch 7 | 1287/ 2800 batches | train loss 0.3276861 +| epoch 7 | 1291/ 2800 batches | train loss 0.3976434 +| epoch 7 | 1295/ 2800 batches | train loss 0.3985853 +| epoch 7 | 1299/ 2800 batches | train loss 0.4620227 +| epoch 7 | 1303/ 2800 batches | train loss 0.3466098 +| epoch 7 | 1307/ 2800 batches | train loss 0.3881201 +| epoch 7 | 1311/ 2800 batches | train loss 0.3808964 +| epoch 7 | 1315/ 2800 batches | train loss 0.3443466 +| epoch 7 | 1319/ 2800 batches | train loss 0.4178787 +| epoch 7 | 1323/ 2800 batches | train loss 0.3825300 +| epoch 7 | 1327/ 2800 batches | train loss 0.4270363 +| epoch 7 | 1331/ 2800 batches | train loss 0.3185573 +| epoch 7 | 1335/ 2800 batches | train loss 0.3059752 +| epoch 7 | 1339/ 2800 batches | train loss 0.3238876 +| epoch 7 | 1343/ 2800 batches | train loss 0.3672442 +| epoch 7 | 1347/ 2800 batches | train loss 0.4960617 +| epoch 7 | 1351/ 2800 batches | train loss 0.4394344 +| epoch 7 | 1355/ 2800 batches | train loss 0.4031293 +| epoch 7 | 1359/ 2800 batches | train loss 0.3198456 +| epoch 7 | 1363/ 2800 batches | train loss 0.3252238 +| epoch 7 | 1367/ 2800 batches | train loss 0.3575973 +| epoch 7 | 1371/ 2800 batches | train loss 0.2813552 +| epoch 7 | 1375/ 2800 batches | train loss 0.3284593 +| epoch 7 | 1379/ 2800 batches | train loss 0.3552573 +| epoch 7 | 1383/ 2800 batches | train loss 0.3777052 +| epoch 7 | 1387/ 2800 batches | train loss 0.3766443 +| epoch 7 | 1391/ 2800 batches | train loss 0.3748514 +| epoch 7 | 1395/ 2800 batches | train loss 0.3756969 +| epoch 7 | 1399/ 2800 batches | train loss 0.3455245 +| epoch 7 | 1403/ 2800 batches | train loss 0.3454922 +| epoch 7 | 1407/ 2800 batches | train loss 0.3663625 +| epoch 7 | 1411/ 2800 batches | train loss 0.3895204 +| epoch 7 | 1415/ 2800 batches | train loss 0.3631027 +| epoch 7 | 1419/ 2800 batches | train loss 0.3454508 +| epoch 7 | 1423/ 2800 batches | train loss 0.3517136 +| epoch 7 | 1427/ 2800 batches | train loss 0.3211255 +| epoch 7 | 1431/ 2800 batches | train loss 0.3384852 +| epoch 7 | 1435/ 2800 batches | train loss 0.3389831 +| epoch 7 | 1439/ 2800 batches | train loss 0.3349254 +| epoch 7 | 1443/ 2800 batches | train loss 0.2988995 +| epoch 7 | 1447/ 2800 batches | train loss 0.3064131 +| epoch 7 | 1451/ 2800 batches | train loss 0.2605718 +| epoch 7 | 1455/ 2800 batches | train loss 0.3558441 +| epoch 7 | 1459/ 2800 batches | train loss 0.3307048 +| epoch 7 | 1463/ 2800 batches | train loss 0.3436851 +| epoch 7 | 1467/ 2800 batches | train loss 0.3305799 +| epoch 7 | 1471/ 2800 batches | train loss 0.3680121 +| epoch 7 | 1475/ 2800 batches | train loss 0.3244609 +| epoch 7 | 1479/ 2800 batches | train loss 0.3653610 +| epoch 7 | 1483/ 2800 batches | train loss 0.3260409 +| epoch 7 | 1487/ 2800 batches | train loss 0.3407492 +| epoch 7 | 1491/ 2800 batches | train loss 0.3665083 +| epoch 7 | 1495/ 2800 batches | train loss 0.3706855 +| epoch 7 | 1499/ 2800 batches | train loss 0.3586217 +| epoch 7 | 1503/ 2800 batches | train loss 0.3077231 +| epoch 7 | 1507/ 2800 batches | train loss 0.4184955 +| epoch 7 | 1511/ 2800 batches | train loss 0.3946781 +| epoch 7 | 1515/ 2800 batches | train loss 0.3719351 +| epoch 7 | 1519/ 2800 batches | train loss 0.4016826 +| epoch 7 | 1523/ 2800 batches | train loss 0.3762789 +| epoch 7 | 1527/ 2800 batches | train loss 0.3406601 +| epoch 7 | 1531/ 2800 batches | train loss 0.3959095 +| epoch 7 | 1535/ 2800 batches | train loss 0.3166815 +| epoch 7 | 1539/ 2800 batches | train loss 0.3861291 +| epoch 7 | 1543/ 2800 batches | train loss 0.3211943 +| epoch 7 | 1547/ 2800 batches | train loss 0.3293056 +| epoch 7 | 1551/ 2800 batches | train loss 0.4340141 +| epoch 7 | 1555/ 2800 batches | train loss 0.3649849 +| epoch 7 | 1559/ 2800 batches | train loss 0.3835492 +| epoch 7 | 1563/ 2800 batches | train loss 0.3529149 +| epoch 7 | 1567/ 2800 batches | train loss 0.4047303 +| epoch 7 | 1571/ 2800 batches | train loss 0.3183728 +| epoch 7 | 1575/ 2800 batches | train loss 0.3328250 +| epoch 7 | 1579/ 2800 batches | train loss 0.3438553 +| epoch 7 | 1583/ 2800 batches | train loss 0.3652984 +| epoch 7 | 1587/ 2800 batches | train loss 0.3057839 +| epoch 7 | 1591/ 2800 batches | train loss 0.3809492 +| epoch 7 | 1595/ 2800 batches | train loss 0.3822238 +| epoch 7 | 1599/ 2800 batches | train loss 0.3333292 +| epoch 7 | 1603/ 2800 batches | train loss 0.3608120 +| epoch 7 | 1607/ 2800 batches | train loss 0.3636084 +| epoch 7 | 1611/ 2800 batches | train loss 0.2996601 +| epoch 7 | 1615/ 2800 batches | train loss 0.3999005 +| epoch 7 | 1619/ 2800 batches | train loss 0.3847913 +| epoch 7 | 1623/ 2800 batches | train loss 0.3240798 +| epoch 7 | 1627/ 2800 batches | train loss 0.4054399 +| epoch 7 | 1631/ 2800 batches | train loss 0.4289907 +| epoch 7 | 1635/ 2800 batches | train loss 0.4741864 +| epoch 7 | 1639/ 2800 batches | train loss 0.3955186 +| epoch 7 | 1643/ 2800 batches | train loss 0.3011146 +| epoch 7 | 1647/ 2800 batches | train loss 0.3332102 +| epoch 7 | 1651/ 2800 batches | train loss 0.2600110 +| epoch 7 | 1655/ 2800 batches | train loss 0.3615637 +| epoch 7 | 1659/ 2800 batches | train loss 0.3731259 +| epoch 7 | 1663/ 2800 batches | train loss 0.3533295 +| epoch 7 | 1667/ 2800 batches | train loss 0.3347362 +| epoch 7 | 1671/ 2800 batches | train loss 0.3316447 +| epoch 7 | 1675/ 2800 batches | train loss 0.3504325 +| epoch 7 | 1679/ 2800 batches | train loss 0.3315176 +| epoch 7 | 1683/ 2800 batches | train loss 0.3437821 +| epoch 7 | 1687/ 2800 batches | train loss 0.3307063 +| epoch 7 | 1691/ 2800 batches | train loss 0.3627558 +| epoch 7 | 1695/ 2800 batches | train loss 0.3911517 +| epoch 7 | 1699/ 2800 batches | train loss 0.4160532 +| epoch 7 | 1703/ 2800 batches | train loss 0.3400852 +| epoch 7 | 1707/ 2800 batches | train loss 0.3516034 +| epoch 7 | 1711/ 2800 batches | train loss 0.3393756 +| epoch 7 | 1715/ 2800 batches | train loss 0.3308770 +| epoch 7 | 1719/ 2800 batches | train loss 0.3280097 +| epoch 7 | 1723/ 2800 batches | train loss 0.4666137 +| epoch 7 | 1727/ 2800 batches | train loss 0.3906744 +| epoch 7 | 1731/ 2800 batches | train loss 0.3640061 +| epoch 7 | 1735/ 2800 batches | train loss 0.3643233 +| epoch 7 | 1739/ 2800 batches | train loss 0.4457852 +| epoch 7 | 1743/ 2800 batches | train loss 0.3712947 +| epoch 7 | 1747/ 2800 batches | train loss 0.3376506 +| epoch 7 | 1751/ 2800 batches | train loss 0.2536729 +| epoch 7 | 1755/ 2800 batches | train loss 0.3375588 +| epoch 7 | 1759/ 2800 batches | train loss 0.3323960 +| epoch 7 | 1763/ 2800 batches | train loss 0.4152502 +| epoch 7 | 1767/ 2800 batches | train loss 0.3971475 +| epoch 7 | 1771/ 2800 batches | train loss 0.3237112 +| epoch 7 | 1775/ 2800 batches | train loss 0.3966816 +| epoch 7 | 1779/ 2800 batches | train loss 0.3683846 +| epoch 7 | 1783/ 2800 batches | train loss 0.3628306 +| epoch 7 | 1787/ 2800 batches | train loss 0.3099933 +| epoch 7 | 1791/ 2800 batches | train loss 0.3869988 +| epoch 7 | 1795/ 2800 batches | train loss 0.3659185 +| epoch 7 | 1799/ 2800 batches | train loss 0.4104798 +| epoch 7 | 1803/ 2800 batches | train loss 0.3302003 +| epoch 7 | 1807/ 2800 batches | train loss 0.3847146 +| epoch 7 | 1811/ 2800 batches | train loss 0.3627960 +| epoch 7 | 1815/ 2800 batches | train loss 0.3916713 +| epoch 7 | 1819/ 2800 batches | train loss 0.4065534 +| epoch 7 | 1823/ 2800 batches | train loss 0.4401979 +| epoch 7 | 1827/ 2800 batches | train loss 0.3791793 +| epoch 7 | 1831/ 2800 batches | train loss 0.3909439 +| epoch 7 | 1835/ 2800 batches | train loss 0.3725387 +| epoch 7 | 1839/ 2800 batches | train loss 0.3273254 +| epoch 7 | 1843/ 2800 batches | train loss 0.2357242 +| epoch 7 | 1847/ 2800 batches | train loss 0.3867338 +| epoch 7 | 1851/ 2800 batches | train loss 0.1668726 +| epoch 7 | 1855/ 2800 batches | train loss 0.3419740 +| epoch 7 | 1859/ 2800 batches | train loss 0.3381517 +| epoch 7 | 1863/ 2800 batches | train loss 0.3845562 +| epoch 7 | 1867/ 2800 batches | train loss 0.3394164 +| epoch 7 | 1871/ 2800 batches | train loss 0.4228481 +| epoch 7 | 1875/ 2800 batches | train loss 0.3610417 +| epoch 7 | 1879/ 2800 batches | train loss 0.2927122 +| epoch 7 | 1883/ 2800 batches | train loss 0.3696561 +| epoch 7 | 1887/ 2800 batches | train loss 0.3068234 +| epoch 7 | 1891/ 2800 batches | train loss 0.3008545 +| epoch 7 | 1895/ 2800 batches | train loss 0.3053287 +| epoch 7 | 1899/ 2800 batches | train loss 0.3456246 +| epoch 7 | 1903/ 2800 batches | train loss 0.3769244 +| epoch 7 | 1907/ 2800 batches | train loss 0.3637258 +| epoch 7 | 1911/ 2800 batches | train loss 0.4152788 +| epoch 7 | 1915/ 2800 batches | train loss 0.3747130 +| epoch 7 | 1919/ 2800 batches | train loss 0.3957911 +| epoch 7 | 1923/ 2800 batches | train loss 0.3408271 +| epoch 7 | 1927/ 2800 batches | train loss 0.4008314 +| epoch 7 | 1931/ 2800 batches | train loss 0.3289582 +| epoch 7 | 1935/ 2800 batches | train loss 0.4014561 +| epoch 7 | 1939/ 2800 batches | train loss 0.4121335 +| epoch 7 | 1943/ 2800 batches | train loss 0.4327192 +| epoch 7 | 1947/ 2800 batches | train loss 0.4272908 +| epoch 7 | 1951/ 2800 batches | train loss 0.3867306 +| epoch 7 | 1955/ 2800 batches | train loss 0.4267607 +| epoch 7 | 1959/ 2800 batches | train loss 0.2431842 +| epoch 7 | 1963/ 2800 batches | train loss 0.3912274 +| epoch 7 | 1967/ 2800 batches | train loss 0.3172702 +| epoch 7 | 1971/ 2800 batches | train loss 0.3869626 +| epoch 7 | 1975/ 2800 batches | train loss 0.3971007 +| epoch 7 | 1979/ 2800 batches | train loss 0.3625891 +| epoch 7 | 1983/ 2800 batches | train loss 0.3440980 +| epoch 7 | 1987/ 2800 batches | train loss 0.3333147 +| epoch 7 | 1991/ 2800 batches | train loss 0.3265749 +| epoch 7 | 1995/ 2800 batches | train loss 0.3345798 +| epoch 7 | 1999/ 2800 batches | train loss 0.3589461 +| epoch 7 | 2003/ 2800 batches | train loss 0.3771123 +| epoch 7 | 2007/ 2800 batches | train loss 0.3526230 +| epoch 7 | 2011/ 2800 batches | train loss 0.3207724 +| epoch 7 | 2015/ 2800 batches | train loss 0.4059905 +| epoch 7 | 2019/ 2800 batches | train loss 0.3827474 +| epoch 7 | 2023/ 2800 batches | train loss 0.3392239 +| epoch 7 | 2027/ 2800 batches | train loss 0.3996516 +| epoch 7 | 2031/ 2800 batches | train loss 0.3520077 +| epoch 7 | 2035/ 2800 batches | train loss 0.2527621 +| epoch 7 | 2039/ 2800 batches | train loss 0.3487717 +| epoch 7 | 2043/ 2800 batches | train loss 0.3007004 +| epoch 7 | 2047/ 2800 batches | train loss 0.3971747 +| epoch 7 | 2051/ 2800 batches | train loss 0.3433914 +| epoch 7 | 2055/ 2800 batches | train loss 0.3930078 +| epoch 7 | 2059/ 2800 batches | train loss 0.3742254 +| epoch 7 | 2063/ 2800 batches | train loss 0.3114783 +| epoch 7 | 2067/ 2800 batches | train loss 0.3800392 +| epoch 7 | 2071/ 2800 batches | train loss 0.3582004 +| epoch 7 | 2075/ 2800 batches | train loss 0.3605689 +| epoch 7 | 2079/ 2800 batches | train loss 0.3037620 +| epoch 7 | 2083/ 2800 batches | train loss 0.2657667 +| epoch 7 | 2087/ 2800 batches | train loss 0.3472314 +| epoch 7 | 2091/ 2800 batches | train loss 0.3118139 +| epoch 7 | 2095/ 2800 batches | train loss 0.3601193 +| epoch 7 | 2099/ 2800 batches | train loss 0.3306299 +| epoch 7 | 2103/ 2800 batches | train loss 0.3196405 +| epoch 7 | 2107/ 2800 batches | train loss 0.4415168 +| epoch 7 | 2111/ 2800 batches | train loss 0.2932009 +| epoch 7 | 2115/ 2800 batches | train loss 0.3661244 +| epoch 7 | 2119/ 2800 batches | train loss 0.4090498 +| epoch 7 | 2123/ 2800 batches | train loss 0.3541172 +| epoch 7 | 2127/ 2800 batches | train loss 0.3305987 +| epoch 7 | 2131/ 2800 batches | train loss 0.3955081 +| epoch 7 | 2135/ 2800 batches | train loss 0.4059688 +| epoch 7 | 2139/ 2800 batches | train loss 0.4646295 +| epoch 7 | 2143/ 2800 batches | train loss 0.3528238 +| epoch 7 | 2147/ 2800 batches | train loss 0.3752037 +| epoch 7 | 2151/ 2800 batches | train loss 0.4121796 +| epoch 7 | 2155/ 2800 batches | train loss 0.3097441 +| epoch 7 | 2159/ 2800 batches | train loss 0.3287908 +| epoch 7 | 2163/ 2800 batches | train loss 0.3578062 +| epoch 7 | 2167/ 2800 batches | train loss 0.3595292 +| epoch 7 | 2171/ 2800 batches | train loss 0.3732692 +| epoch 7 | 2175/ 2800 batches | train loss 0.3393326 +| epoch 7 | 2179/ 2800 batches | train loss 0.3550173 +| epoch 7 | 2183/ 2800 batches | train loss 0.2967223 +| epoch 7 | 2187/ 2800 batches | train loss 0.3228072 +| epoch 7 | 2191/ 2800 batches | train loss 0.3530587 +| epoch 7 | 2195/ 2800 batches | train loss 0.4543901 +| epoch 7 | 2199/ 2800 batches | train loss 0.4242686 +| epoch 7 | 2203/ 2800 batches | train loss 0.3657899 +| epoch 7 | 2207/ 2800 batches | train loss 0.3910830 +| epoch 7 | 2211/ 2800 batches | train loss 0.3780370 +| epoch 7 | 2215/ 2800 batches | train loss 0.4377308 +| epoch 7 | 2219/ 2800 batches | train loss 0.4099298 +| epoch 7 | 2223/ 2800 batches | train loss 0.4155068 +| epoch 7 | 2227/ 2800 batches | train loss 0.3646652 +| epoch 7 | 2231/ 2800 batches | train loss 0.4260843 +| epoch 7 | 2235/ 2800 batches | train loss 0.3933586 +| epoch 7 | 2239/ 2800 batches | train loss 0.3968304 +| epoch 7 | 2243/ 2800 batches | train loss 0.4161529 +| epoch 7 | 2247/ 2800 batches | train loss 0.3474356 +| epoch 7 | 2251/ 2800 batches | train loss 0.3626380 +| epoch 7 | 2255/ 2800 batches | train loss 0.3886392 +| epoch 7 | 2259/ 2800 batches | train loss 0.3394085 +| epoch 7 | 2263/ 2800 batches | train loss 0.3550370 +| epoch 7 | 2267/ 2800 batches | train loss 0.4068065 +| epoch 7 | 2271/ 2800 batches | train loss 0.3843020 +| epoch 7 | 2275/ 2800 batches | train loss 0.2936404 +| epoch 7 | 2279/ 2800 batches | train loss 0.3300580 +| epoch 7 | 2283/ 2800 batches | train loss 0.4178496 +| epoch 7 | 2287/ 2800 batches | train loss 0.2106158 +| epoch 7 | 2291/ 2800 batches | train loss 0.3452841 +| epoch 7 | 2295/ 2800 batches | train loss 0.3880039 +| epoch 7 | 2299/ 2800 batches | train loss 0.3698711 +| epoch 7 | 2303/ 2800 batches | train loss 0.3231387 +| epoch 7 | 2307/ 2800 batches | train loss 0.3588090 +| epoch 7 | 2311/ 2800 batches | train loss 0.3147527 +| epoch 7 | 2315/ 2800 batches | train loss 0.3205034 +| epoch 7 | 2319/ 2800 batches | train loss 0.2496670 +| epoch 7 | 2323/ 2800 batches | train loss 0.4201843 +| epoch 7 | 2327/ 2800 batches | train loss 0.3929560 +| epoch 7 | 2331/ 2800 batches | train loss 0.4264113 +| epoch 7 | 2335/ 2800 batches | train loss 0.3531457 +| epoch 7 | 2339/ 2800 batches | train loss 0.3261988 +| epoch 7 | 2343/ 2800 batches | train loss 0.3827838 +| epoch 7 | 2347/ 2800 batches | train loss 0.3150593 +| epoch 7 | 2351/ 2800 batches | train loss 0.2815511 +| epoch 7 | 2355/ 2800 batches | train loss 0.3970471 +| epoch 7 | 2359/ 2800 batches | train loss 0.3895387 +| epoch 7 | 2363/ 2800 batches | train loss 0.4354640 +| epoch 7 | 2367/ 2800 batches | train loss 0.4278631 +| epoch 7 | 2371/ 2800 batches | train loss 0.4153233 +| epoch 7 | 2375/ 2800 batches | train loss 0.3906316 +| epoch 7 | 2379/ 2800 batches | train loss 0.3761692 +| epoch 7 | 2383/ 2800 batches | train loss 0.3722834 +| epoch 7 | 2387/ 2800 batches | train loss 0.3555024 +| epoch 7 | 2391/ 2800 batches | train loss 0.3968647 +| epoch 7 | 2395/ 2800 batches | train loss 0.3635997 +| epoch 7 | 2399/ 2800 batches | train loss 0.3693130 +| epoch 7 | 2403/ 2800 batches | train loss 0.3447595 +| epoch 7 | 2407/ 2800 batches | train loss 0.4365419 +| epoch 7 | 2411/ 2800 batches | train loss 0.3708406 +| epoch 7 | 2415/ 2800 batches | train loss 0.2877910 +| epoch 7 | 2419/ 2800 batches | train loss 0.3222626 +| epoch 7 | 2423/ 2800 batches | train loss 0.3387735 +| epoch 7 | 2427/ 2800 batches | train loss 0.3901830 +| epoch 7 | 2431/ 2800 batches | train loss 0.2820805 +| epoch 7 | 2435/ 2800 batches | train loss 0.3563537 +| epoch 7 | 2439/ 2800 batches | train loss 0.4021157 +| epoch 7 | 2443/ 2800 batches | train loss 0.3892097 +| epoch 7 | 2447/ 2800 batches | train loss 0.3971071 +| epoch 7 | 2451/ 2800 batches | train loss 0.3403469 +| epoch 7 | 2455/ 2800 batches | train loss 0.3914536 +| epoch 7 | 2459/ 2800 batches | train loss 0.2904154 +| epoch 7 | 2463/ 2800 batches | train loss 0.4687277 +| epoch 7 | 2467/ 2800 batches | train loss 0.3618991 +| epoch 7 | 2471/ 2800 batches | train loss 0.4525234 +| epoch 7 | 2475/ 2800 batches | train loss 0.2426482 +| epoch 7 | 2479/ 2800 batches | train loss 0.4419960 +| epoch 7 | 2483/ 2800 batches | train loss 0.3399481 +| epoch 7 | 2487/ 2800 batches | train loss 0.3666625 +| epoch 7 | 2491/ 2800 batches | train loss 0.3347631 +| epoch 7 | 2495/ 2800 batches | train loss 0.3564939 +| epoch 7 | 2499/ 2800 batches | train loss 0.2851666 +| epoch 7 | 2503/ 2800 batches | train loss 0.3495605 +| epoch 7 | 2507/ 2800 batches | train loss 0.3680031 +| epoch 7 | 2511/ 2800 batches | train loss 0.4015349 +| epoch 7 | 2515/ 2800 batches | train loss 0.3578161 +| epoch 7 | 2519/ 2800 batches | train loss 0.3452825 +| epoch 7 | 2523/ 2800 batches | train loss 0.3446955 +| epoch 7 | 2527/ 2800 batches | train loss 0.3577203 +| epoch 7 | 2531/ 2800 batches | train loss 0.3095404 +| epoch 7 | 2535/ 2800 batches | train loss 0.2786283 +| epoch 7 | 2539/ 2800 batches | train loss 0.3526537 +| epoch 7 | 2543/ 2800 batches | train loss 0.3905330 +| epoch 7 | 2547/ 2800 batches | train loss 0.3699531 +| epoch 7 | 2551/ 2800 batches | train loss 0.3747238 +| epoch 7 | 2555/ 2800 batches | train loss 0.3580424 +| epoch 7 | 2559/ 2800 batches | train loss 0.3519202 +| epoch 7 | 2563/ 2800 batches | train loss 0.4043494 +| epoch 7 | 2567/ 2800 batches | train loss 0.3915518 +| epoch 7 | 2571/ 2800 batches | train loss 0.3339443 +| epoch 7 | 2575/ 2800 batches | train loss 0.3373268 +| epoch 7 | 2579/ 2800 batches | train loss 0.4115012 +| epoch 7 | 2583/ 2800 batches | train loss 0.3029721 +| epoch 7 | 2587/ 2800 batches | train loss 0.4163057 +| epoch 7 | 2591/ 2800 batches | train loss 0.3304967 +| epoch 7 | 2595/ 2800 batches | train loss 0.3221592 +| epoch 7 | 2599/ 2800 batches | train loss 0.3642963 +| epoch 7 | 2603/ 2800 batches | train loss 0.2908776 +| epoch 7 | 2607/ 2800 batches | train loss 0.4085839 +| epoch 7 | 2611/ 2800 batches | train loss 0.3885190 +| epoch 7 | 2615/ 2800 batches | train loss 0.3523042 +| epoch 7 | 2619/ 2800 batches | train loss 0.3011354 +| epoch 7 | 2623/ 2800 batches | train loss 0.3109870 +| epoch 7 | 2627/ 2800 batches | train loss 0.3764160 +| epoch 7 | 2631/ 2800 batches | train loss 0.3623190 +| epoch 7 | 2635/ 2800 batches | train loss 0.4578578 +| epoch 7 | 2639/ 2800 batches | train loss 0.3640223 +| epoch 7 | 2643/ 2800 batches | train loss 0.4101021 +| epoch 7 | 2647/ 2800 batches | train loss 0.3944377 +| epoch 7 | 2651/ 2800 batches | train loss 0.3529707 +| epoch 7 | 2655/ 2800 batches | train loss 0.4086833 +| epoch 7 | 2659/ 2800 batches | train loss 0.2823545 +| epoch 7 | 2663/ 2800 batches | train loss 0.4151182 +| epoch 7 | 2667/ 2800 batches | train loss 0.3839273 +| epoch 7 | 2671/ 2800 batches | train loss 0.2971001 +| epoch 7 | 2675/ 2800 batches | train loss 0.3667136 +| epoch 7 | 2679/ 2800 batches | train loss 0.3736833 +| epoch 7 | 2683/ 2800 batches | train loss 0.3622596 +| epoch 7 | 2687/ 2800 batches | train loss 0.3282691 +| epoch 7 | 2691/ 2800 batches | train loss 0.3508562 +| epoch 7 | 2695/ 2800 batches | train loss 0.3857372 +| epoch 7 | 2699/ 2800 batches | train loss 0.3765803 +| epoch 7 | 2703/ 2800 batches | train loss 0.3862244 +| epoch 7 | 2707/ 2800 batches | train loss 0.3325135 +| epoch 7 | 2711/ 2800 batches | train loss 0.3944290 +| epoch 7 | 2715/ 2800 batches | train loss 0.4896258 +| epoch 7 | 2719/ 2800 batches | train loss 0.3478422 +| epoch 7 | 2723/ 2800 batches | train loss 0.3393914 +| epoch 7 | 2727/ 2800 batches | train loss 0.3597974 +| epoch 7 | 2731/ 2800 batches | train loss 0.3370109 +| epoch 7 | 2735/ 2800 batches | train loss 0.4179820 +| epoch 7 | 2739/ 2800 batches | train loss 0.3451702 +| epoch 7 | 2743/ 2800 batches | train loss 0.3518550 +| epoch 7 | 2747/ 2800 batches | train loss 0.3388920 +| epoch 7 | 2751/ 2800 batches | train loss 0.3884616 +| epoch 7 | 2755/ 2800 batches | train loss 0.4039087 +| epoch 7 | 2759/ 2800 batches | train loss 0.3832127 +| epoch 7 | 2763/ 2800 batches | train loss 0.3746148 +| epoch 7 | 2767/ 2800 batches | train loss 0.3760789 +| epoch 7 | 2771/ 2800 batches | train loss 0.3688651 +| epoch 7 | 2775/ 2800 batches | train loss 0.3816455 +| epoch 7 | 2779/ 2800 batches | train loss 0.4181151 +| epoch 7 | 2783/ 2800 batches | train loss 0.3817746 +| epoch 7 | 2787/ 2800 batches | train loss 0.3445862 +| epoch 7 | 2791/ 2800 batches | train loss 0.3306986 +| epoch 7 | 2795/ 2800 batches | train loss 0.3518404 +| epoch 7 | 2799/ 2800 batches | train loss 0.3114580 +-------------------------------------------------------------------------------- +| epoch 7 | 3/ 2800 batches | test loss 0.5077752 +| epoch 7 | 7/ 2800 batches | test loss 0.4435452 +| epoch 7 | 11/ 2800 batches | test loss 0.3941751 +| epoch 7 | 15/ 2800 batches | test loss 0.5648985 +| epoch 7 | 19/ 2800 batches | test loss 0.5755360 +| epoch 7 | 23/ 2800 batches | test loss 0.3934594 +| epoch 7 | 27/ 2800 batches | test loss 0.3742010 +| epoch 7 | 31/ 2800 batches | test loss 0.5791382 +| epoch 7 | 35/ 2800 batches | test loss 0.5010300 +| epoch 7 | 39/ 2800 batches | test loss 0.4970757 +| epoch 7 | 43/ 2800 batches | test loss 0.4804649 +| epoch 7 | 47/ 2800 batches | test loss 0.5424148 +| epoch 7 | 51/ 2800 batches | test loss 0.5460074 +| epoch 7 | 55/ 2800 batches | test loss 0.4406656 +| epoch 7 | 59/ 2800 batches | test loss 0.3929342 +| epoch 7 | 63/ 2800 batches | test loss 0.5366561 +| epoch 7 | 67/ 2800 batches | test loss 0.4778844 +| epoch 7 | 71/ 2800 batches | test loss 0.4438653 +| epoch 7 | 75/ 2800 batches | test loss 0.5122945 +| epoch 7 | 79/ 2800 batches | test loss 0.4054781 +| epoch 7 | 83/ 2800 batches | test loss 0.4387192 +| epoch 7 | 87/ 2800 batches | test loss 0.4621640 +| epoch 7 | 91/ 2800 batches | test loss 0.4939016 +| epoch 7 | 95/ 2800 batches | test loss 0.5501004 +| epoch 7 | 99/ 2800 batches | test loss 0.8139144 +| epoch 7 | 103/ 2800 batches | test loss 0.5296293 +| epoch 7 | 107/ 2800 batches | test loss 0.6579833 +| epoch 7 | 111/ 2800 batches | test loss 0.5303842 +| epoch 7 | 115/ 2800 batches | test loss 0.4034418 +| epoch 7 | 119/ 2800 batches | test loss 0.4166447 +| epoch 7 | 123/ 2800 batches | test loss 0.4315562 +| epoch 7 | 127/ 2800 batches | test loss 0.3521261 +| epoch 7 | 131/ 2800 batches | test loss 0.6158830 +| epoch 7 | 135/ 2800 batches | test loss 0.4603536 +| epoch 7 | 139/ 2800 batches | test loss 0.3397229 +| epoch 7 | 143/ 2800 batches | test loss 0.3748147 +| epoch 7 | 147/ 2800 batches | test loss 0.4390913 +| epoch 7 | 151/ 2800 batches | test loss 0.5243665 +| epoch 7 | 155/ 2800 batches | test loss 0.5894244 +| epoch 7 | 159/ 2800 batches | test loss 0.6010748 +| epoch 7 | 163/ 2800 batches | test loss 0.4975522 +| epoch 7 | 167/ 2800 batches | test loss 0.5202873 +| epoch 7 | 171/ 2800 batches | test loss 0.4990349 +| epoch 7 | 175/ 2800 batches | test loss 0.4663207 +| epoch 7 | 179/ 2800 batches | test loss 0.5695395 +| epoch 7 | 183/ 2800 batches | test loss 0.4475835 +| epoch 7 | 187/ 2800 batches | test loss 0.5898719 +| epoch 7 | 191/ 2800 batches | test loss 0.5666555 +| epoch 7 | 195/ 2800 batches | test loss 0.4009925 +| epoch 7 | 199/ 2800 batches | test loss 0.5645608 +| epoch 7 | 203/ 2800 batches | test loss 0.5385960 +| epoch 7 | 207/ 2800 batches | test loss 0.5886902 +| epoch 7 | 211/ 2800 batches | test loss 0.3135762 +| epoch 7 | 215/ 2800 batches | test loss 0.4658192 +| epoch 7 | 219/ 2800 batches | test loss 0.4599816 +| epoch 7 | 223/ 2800 batches | test loss 0.6811926 +| epoch 7 | 227/ 2800 batches | test loss 0.5934213 +| epoch 7 | 231/ 2800 batches | test loss 0.5361804 +| epoch 7 | 235/ 2800 batches | test loss 0.4332949 +| epoch 7 | 239/ 2800 batches | test loss 0.4287401 +| epoch 7 | 243/ 2800 batches | test loss 0.4355486 +| epoch 7 | 247/ 2800 batches | test loss 0.4092965 +| epoch 7 | 251/ 2800 batches | test loss 0.4277009 +| epoch 7 | 255/ 2800 batches | test loss 0.4409218 +| epoch 7 | 259/ 2800 batches | test loss 0.4998934 +| epoch 7 | 263/ 2800 batches | test loss 0.3803553 +| epoch 7 | 267/ 2800 batches | test loss 0.5428258 +| epoch 7 | 271/ 2800 batches | test loss 0.4777265 +| epoch 7 | 275/ 2800 batches | test loss 0.3552468 +| epoch 7 | 279/ 2800 batches | test loss 0.5140915 +| epoch 7 | 283/ 2800 batches | test loss 0.4554866 +| epoch 7 | 287/ 2800 batches | test loss 0.4783574 +| epoch 7 | 291/ 2800 batches | test loss 0.4563638 +| epoch 7 | 295/ 2800 batches | test loss 0.4882379 +| epoch 7 | 299/ 2800 batches | test loss 0.5888679 +| epoch 7 | 303/ 2800 batches | test loss 0.5699664 +| epoch 7 | 307/ 2800 batches | test loss 0.5312603 +| epoch 7 | 311/ 2800 batches | test loss 0.4813143 +| epoch 7 | 315/ 2800 batches | test loss 0.5149555 +| epoch 7 | 319/ 2800 batches | test loss 0.3837437 +| epoch 7 | 323/ 2800 batches | test loss 0.5792387 +| epoch 7 | 327/ 2800 batches | test loss 0.5364991 +| epoch 7 | 331/ 2800 batches | test loss 0.3913201 +| epoch 7 | 335/ 2800 batches | test loss 0.5673564 +| epoch 7 | 339/ 2800 batches | test loss 0.5670233 +| epoch 7 | 343/ 2800 batches | test loss 0.5068365 +| epoch 7 | 347/ 2800 batches | test loss 0.6395526 +| epoch 7 | 351/ 2800 batches | test loss 0.4571574 +| epoch 7 | 355/ 2800 batches | test loss 0.3449813 +| epoch 7 | 359/ 2800 batches | test loss 0.4698987 +| epoch 7 | 363/ 2800 batches | test loss 0.5417398 +| epoch 7 | 367/ 2800 batches | test loss 0.3651725 +| epoch 7 | 371/ 2800 batches | test loss 0.4641380 +| epoch 7 | 375/ 2800 batches | test loss 0.6123497 +| epoch 7 | 379/ 2800 batches | test loss 0.5430081 +| epoch 7 | 383/ 2800 batches | test loss 0.4466618 +| epoch 7 | 387/ 2800 batches | test loss 0.4797303 +| epoch 7 | 391/ 2800 batches | test loss 0.3875873 +| epoch 7 | 395/ 2800 batches | test loss 0.6048930 +| epoch 7 | 399/ 2800 batches | test loss 0.4319689 +| epoch 7 | 403/ 2800 batches | test loss 0.4721528 +| epoch 7 | 407/ 2800 batches | test loss 0.3687541 +| epoch 7 | 411/ 2800 batches | test loss 0.6919146 +| epoch 7 | 415/ 2800 batches | test loss 0.4009501 +| epoch 7 | 419/ 2800 batches | test loss 0.6237300 +| epoch 7 | 423/ 2800 batches | test loss 0.5498790 +| epoch 7 | 427/ 2800 batches | test loss 0.5081613 +| epoch 7 | 431/ 2800 batches | test loss 0.6599317 +| epoch 7 | 435/ 2800 batches | test loss 0.4554773 +| epoch 7 | 439/ 2800 batches | test loss 0.5392078 +| epoch 7 | 443/ 2800 batches | test loss 0.4287221 +| epoch 7 | 447/ 2800 batches | test loss 0.4935081 +| epoch 7 | 451/ 2800 batches | test loss 0.4041281 +| epoch 7 | 455/ 2800 batches | test loss 0.3962771 +| epoch 7 | 459/ 2800 batches | test loss 0.4338968 +| epoch 7 | 463/ 2800 batches | test loss 0.4724733 +| epoch 7 | 467/ 2800 batches | test loss 0.5238991 +| epoch 7 | 471/ 2800 batches | test loss 0.4816707 +| epoch 7 | 475/ 2800 batches | test loss 0.5231832 +| epoch 7 | 479/ 2800 batches | test loss 0.4978411 +| epoch 7 | 483/ 2800 batches | test loss 0.5783191 +| epoch 7 | 487/ 2800 batches | test loss 0.4926659 +| epoch 7 | 491/ 2800 batches | test loss 0.4550481 +| epoch 7 | 495/ 2800 batches | test loss 0.4440692 +| epoch 7 | 499/ 2800 batches | test loss 0.4564684 +| epoch 7 | 503/ 2800 batches | test loss 0.4925337 +| epoch 7 | 507/ 2800 batches | test loss 0.4634597 +| epoch 7 | 511/ 2800 batches | test loss 0.5285319 +| epoch 7 | 515/ 2800 batches | test loss 0.4667286 +| epoch 7 | 519/ 2800 batches | test loss 0.5878050 +| epoch 7 | 523/ 2800 batches | test loss 0.5395341 +| epoch 7 | 527/ 2800 batches | test loss 0.3838577 +| epoch 7 | 531/ 2800 batches | test loss 0.4675539 +| epoch 7 | 535/ 2800 batches | test loss 0.4711144 +| epoch 7 | 539/ 2800 batches | test loss 0.3822912 +| epoch 7 | 543/ 2800 batches | test loss 0.6013713 +| epoch 7 | 547/ 2800 batches | test loss 0.4525638 +| epoch 7 | 551/ 2800 batches | test loss 0.5137588 +| epoch 7 | 555/ 2800 batches | test loss 0.4986758 +| epoch 7 | 559/ 2800 batches | test loss 0.5195304 +| epoch 7 | 563/ 2800 batches | test loss 0.4686208 +| epoch 7 | 567/ 2800 batches | test loss 0.4516723 +| epoch 7 | 571/ 2800 batches | test loss 0.5291201 +| epoch 7 | 575/ 2800 batches | test loss 0.6457435 +| epoch 7 | 579/ 2800 batches | test loss 0.4406856 +| epoch 7 | 583/ 2800 batches | test loss 0.4423309 +| epoch 7 | 587/ 2800 batches | test loss 0.5607366 +| epoch 7 | 591/ 2800 batches | test loss 0.5161164 +| epoch 7 | 595/ 2800 batches | test loss 0.2969013 +| epoch 7 | 599/ 2800 batches | test loss 0.4207780 +| epoch 7 | 603/ 2800 batches | test loss 0.4344356 +| epoch 7 | 607/ 2800 batches | test loss 0.5124624 +| epoch 7 | 611/ 2800 batches | test loss 0.4532950 +| epoch 7 | 615/ 2800 batches | test loss 0.4685751 +| epoch 7 | 619/ 2800 batches | test loss 0.6114532 +| epoch 7 | 623/ 2800 batches | test loss 0.5825683 +| epoch 7 | 627/ 2800 batches | test loss 0.6716598 +| epoch 7 | 631/ 2800 batches | test loss 0.5475045 +| epoch 7 | 635/ 2800 batches | test loss 0.5805717 +| epoch 7 | 639/ 2800 batches | test loss 0.5317178 +| epoch 7 | 643/ 2800 batches | test loss 0.4927356 +| epoch 7 | 647/ 2800 batches | test loss 0.3805230 +| epoch 7 | 651/ 2800 batches | test loss 0.5376776 +| epoch 7 | 655/ 2800 batches | test loss 0.4284830 +| epoch 7 | 659/ 2800 batches | test loss 0.4589590 +| epoch 7 | 663/ 2800 batches | test loss 0.4601696 +| epoch 7 | 667/ 2800 batches | test loss 0.5186832 +| epoch 7 | 671/ 2800 batches | test loss 0.5637182 +| epoch 7 | 675/ 2800 batches | test loss 0.5062217 +| epoch 7 | 679/ 2800 batches | test loss 0.3982415 +| epoch 7 | 683/ 2800 batches | test loss 0.4147130 +| epoch 7 | 687/ 2800 batches | test loss 0.6290660 +| epoch 7 | 691/ 2800 batches | test loss 0.4293702 +| epoch 7 | 695/ 2800 batches | test loss 0.4121187 +| epoch 7 | 699/ 2800 batches | test loss 0.4370305 +| epoch 7 | final test loss 0.4920, do not save model! +-------------------------------------------------------------------------------- +| epoch 8 | 3/ 2800 batches | train loss 0.2972332 +| epoch 8 | 7/ 2800 batches | train loss 0.2916184 +| epoch 8 | 11/ 2800 batches | train loss 0.4204843 +| epoch 8 | 15/ 2800 batches | train loss 0.3378643 +| epoch 8 | 19/ 2800 batches | train loss 0.3465049 +| epoch 8 | 23/ 2800 batches | train loss 0.2972284 +| epoch 8 | 27/ 2800 batches | train loss 0.2815152 +| epoch 8 | 31/ 2800 batches | train loss 0.3560289 +| epoch 8 | 35/ 2800 batches | train loss 0.3530794 +| epoch 8 | 39/ 2800 batches | train loss 0.2897188 +| epoch 8 | 43/ 2800 batches | train loss 0.3177562 +| epoch 8 | 47/ 2800 batches | train loss 0.3472637 +| epoch 8 | 51/ 2800 batches | train loss 0.3842348 +| epoch 8 | 55/ 2800 batches | train loss 0.3566434 +| epoch 8 | 59/ 2800 batches | train loss 0.2818988 +| epoch 8 | 63/ 2800 batches | train loss 0.3321393 +| epoch 8 | 67/ 2800 batches | train loss 0.3383120 +| epoch 8 | 71/ 2800 batches | train loss 0.2937343 +| epoch 8 | 75/ 2800 batches | train loss 0.3119967 +| epoch 8 | 79/ 2800 batches | train loss 0.3066607 +| epoch 8 | 83/ 2800 batches | train loss 0.3328708 +| epoch 8 | 87/ 2800 batches | train loss 0.3364678 +| epoch 8 | 91/ 2800 batches | train loss 0.4096974 +| epoch 8 | 95/ 2800 batches | train loss 0.3512676 +| epoch 8 | 99/ 2800 batches | train loss 0.2729569 +| epoch 8 | 103/ 2800 batches | train loss 0.3334668 +| epoch 8 | 107/ 2800 batches | train loss 0.3912849 +| epoch 8 | 111/ 2800 batches | train loss 0.3545099 +| epoch 8 | 115/ 2800 batches | train loss 0.3236607 +| epoch 8 | 119/ 2800 batches | train loss 0.2937896 +| epoch 8 | 123/ 2800 batches | train loss 0.3442137 +| epoch 8 | 127/ 2800 batches | train loss 0.3137139 +| epoch 8 | 131/ 2800 batches | train loss 0.3314713 +| epoch 8 | 135/ 2800 batches | train loss 0.3327470 +| epoch 8 | 139/ 2800 batches | train loss 0.3181979 +| epoch 8 | 143/ 2800 batches | train loss 0.3515837 +| epoch 8 | 147/ 2800 batches | train loss 0.1985905 +| epoch 8 | 151/ 2800 batches | train loss 0.2803490 +| epoch 8 | 155/ 2800 batches | train loss 0.2550831 +| epoch 8 | 159/ 2800 batches | train loss 0.3156260 +| epoch 8 | 163/ 2800 batches | train loss 0.2880922 +| epoch 8 | 167/ 2800 batches | train loss 0.2814462 +| epoch 8 | 171/ 2800 batches | train loss 0.3301715 +| epoch 8 | 175/ 2800 batches | train loss 0.4108769 +| epoch 8 | 179/ 2800 batches | train loss 0.3330605 +| epoch 8 | 183/ 2800 batches | train loss 0.3158009 +| epoch 8 | 187/ 2800 batches | train loss 0.3479173 +| epoch 8 | 191/ 2800 batches | train loss 0.2987083 +| epoch 8 | 195/ 2800 batches | train loss 0.3030728 +| epoch 8 | 199/ 2800 batches | train loss 0.3958703 +| epoch 8 | 203/ 2800 batches | train loss 0.3791336 +| epoch 8 | 207/ 2800 batches | train loss 0.3781931 +| epoch 8 | 211/ 2800 batches | train loss 0.2859796 +| epoch 8 | 215/ 2800 batches | train loss 0.3249370 +| epoch 8 | 219/ 2800 batches | train loss 0.3571307 +| epoch 8 | 223/ 2800 batches | train loss 0.2760806 +| epoch 8 | 227/ 2800 batches | train loss 0.3595714 +| epoch 8 | 231/ 2800 batches | train loss 0.2940708 +| epoch 8 | 235/ 2800 batches | train loss 0.3564828 +| epoch 8 | 239/ 2800 batches | train loss 0.3225749 +| epoch 8 | 243/ 2800 batches | train loss 0.3364209 +| epoch 8 | 247/ 2800 batches | train loss 0.3493682 +| epoch 8 | 251/ 2800 batches | train loss 0.3960842 +| epoch 8 | 255/ 2800 batches | train loss 0.2716620 +| epoch 8 | 259/ 2800 batches | train loss 0.3259383 +| epoch 8 | 263/ 2800 batches | train loss 0.3500841 +| epoch 8 | 267/ 2800 batches | train loss 0.2657773 +| epoch 8 | 271/ 2800 batches | train loss 0.3346146 +| epoch 8 | 275/ 2800 batches | train loss 0.2886877 +| epoch 8 | 279/ 2800 batches | train loss 0.3275447 +| epoch 8 | 283/ 2800 batches | train loss 0.3842897 +| epoch 8 | 287/ 2800 batches | train loss 0.3623680 +| epoch 8 | 291/ 2800 batches | train loss 0.3411218 +| epoch 8 | 295/ 2800 batches | train loss 0.3608076 +| epoch 8 | 299/ 2800 batches | train loss 0.3024180 +| epoch 8 | 303/ 2800 batches | train loss 0.3041397 +| epoch 8 | 307/ 2800 batches | train loss 0.3085670 +| epoch 8 | 311/ 2800 batches | train loss 0.3258146 +| epoch 8 | 315/ 2800 batches | train loss 0.3764538 +| epoch 8 | 319/ 2800 batches | train loss 0.3044850 +| epoch 8 | 323/ 2800 batches | train loss 0.3701699 +| epoch 8 | 327/ 2800 batches | train loss 0.3617582 +| epoch 8 | 331/ 2800 batches | train loss 0.3522585 +| epoch 8 | 335/ 2800 batches | train loss 0.3123382 +| epoch 8 | 339/ 2800 batches | train loss 0.4043537 +| epoch 8 | 343/ 2800 batches | train loss 0.3368452 +| epoch 8 | 347/ 2800 batches | train loss 0.3415231 +| epoch 8 | 351/ 2800 batches | train loss 0.2955272 +| epoch 8 | 355/ 2800 batches | train loss 0.3305886 +| epoch 8 | 359/ 2800 batches | train loss 0.3019193 +| epoch 8 | 363/ 2800 batches | train loss 0.3388769 +| epoch 8 | 367/ 2800 batches | train loss 0.3484031 +| epoch 8 | 371/ 2800 batches | train loss 0.3323844 +| epoch 8 | 375/ 2800 batches | train loss 0.3396837 +| epoch 8 | 379/ 2800 batches | train loss 0.3293495 +| epoch 8 | 383/ 2800 batches | train loss 0.3073542 +| epoch 8 | 387/ 2800 batches | train loss 0.3077088 +| epoch 8 | 391/ 2800 batches | train loss 0.2619885 +| epoch 8 | 395/ 2800 batches | train loss 0.3045867 +| epoch 8 | 399/ 2800 batches | train loss 0.3135627 +| epoch 8 | 403/ 2800 batches | train loss 0.4163736 +| epoch 8 | 407/ 2800 batches | train loss 0.2780842 +| epoch 8 | 411/ 2800 batches | train loss 0.3795824 +| epoch 8 | 415/ 2800 batches | train loss 0.3166267 +| epoch 8 | 419/ 2800 batches | train loss 0.3289213 +| epoch 8 | 423/ 2800 batches | train loss 0.2433039 +| epoch 8 | 427/ 2800 batches | train loss 0.3337577 +| epoch 8 | 431/ 2800 batches | train loss 0.4330170 +| epoch 8 | 435/ 2800 batches | train loss 0.3182900 +| epoch 8 | 439/ 2800 batches | train loss 0.3468485 +| epoch 8 | 443/ 2800 batches | train loss 0.2788198 +| epoch 8 | 447/ 2800 batches | train loss 0.2970365 +| epoch 8 | 451/ 2800 batches | train loss 0.2971386 +| epoch 8 | 455/ 2800 batches | train loss 0.2891416 +| epoch 8 | 459/ 2800 batches | train loss 0.3158466 +| epoch 8 | 463/ 2800 batches | train loss 0.3541179 +| epoch 8 | 467/ 2800 batches | train loss 0.3153260 +| epoch 8 | 471/ 2800 batches | train loss 0.2915727 +| epoch 8 | 475/ 2800 batches | train loss 0.2603338 +| epoch 8 | 479/ 2800 batches | train loss 0.2932023 +| epoch 8 | 483/ 2800 batches | train loss 0.3045503 +| epoch 8 | 487/ 2800 batches | train loss 0.3988415 +| epoch 8 | 491/ 2800 batches | train loss 0.3013292 +| epoch 8 | 495/ 2800 batches | train loss 0.2768000 +| epoch 8 | 499/ 2800 batches | train loss 0.3723939 +| epoch 8 | 503/ 2800 batches | train loss 0.3342016 +| epoch 8 | 507/ 2800 batches | train loss 0.3133495 +| epoch 8 | 511/ 2800 batches | train loss 0.2769442 +| epoch 8 | 515/ 2800 batches | train loss 0.2762412 +| epoch 8 | 519/ 2800 batches | train loss 0.2707557 +| epoch 8 | 523/ 2800 batches | train loss 0.2992112 +| epoch 8 | 527/ 2800 batches | train loss 0.3630154 +| epoch 8 | 531/ 2800 batches | train loss 0.2865462 +| epoch 8 | 535/ 2800 batches | train loss 0.2836992 +| epoch 8 | 539/ 2800 batches | train loss 0.3033283 +| epoch 8 | 543/ 2800 batches | train loss 0.3374395 +| epoch 8 | 547/ 2800 batches | train loss 0.3166823 +| epoch 8 | 551/ 2800 batches | train loss 0.3744363 +| epoch 8 | 555/ 2800 batches | train loss 0.3396770 +| epoch 8 | 559/ 2800 batches | train loss 0.2770470 +| epoch 8 | 563/ 2800 batches | train loss 0.4202607 +| epoch 8 | 567/ 2800 batches | train loss 0.4639869 +| epoch 8 | 571/ 2800 batches | train loss 0.3577464 +| epoch 8 | 575/ 2800 batches | train loss 0.3327393 +| epoch 8 | 579/ 2800 batches | train loss 0.3545964 +| epoch 8 | 583/ 2800 batches | train loss 0.2933186 +| epoch 8 | 587/ 2800 batches | train loss 0.3118776 +| epoch 8 | 591/ 2800 batches | train loss 0.3201090 +| epoch 8 | 595/ 2800 batches | train loss 0.2669754 +| epoch 8 | 599/ 2800 batches | train loss 0.3428210 +| epoch 8 | 603/ 2800 batches | train loss 0.3593716 +| epoch 8 | 607/ 2800 batches | train loss 0.3085420 +| epoch 8 | 611/ 2800 batches | train loss 0.3713695 +| epoch 8 | 615/ 2800 batches | train loss 0.3649834 +| epoch 8 | 619/ 2800 batches | train loss 0.3474168 +| epoch 8 | 623/ 2800 batches | train loss 0.3571914 +| epoch 8 | 627/ 2800 batches | train loss 0.3390087 +| epoch 8 | 631/ 2800 batches | train loss 0.3244322 +| epoch 8 | 635/ 2800 batches | train loss 0.3816780 +| epoch 8 | 639/ 2800 batches | train loss 0.2900980 +| epoch 8 | 643/ 2800 batches | train loss 0.3362958 +| epoch 8 | 647/ 2800 batches | train loss 0.3191591 +| epoch 8 | 651/ 2800 batches | train loss 0.3602423 +| epoch 8 | 655/ 2800 batches | train loss 0.3254831 +| epoch 8 | 659/ 2800 batches | train loss 0.3779852 +| epoch 8 | 663/ 2800 batches | train loss 0.3069111 +| epoch 8 | 667/ 2800 batches | train loss 0.3002205 +| epoch 8 | 671/ 2800 batches | train loss 0.3163567 +| epoch 8 | 675/ 2800 batches | train loss 0.2563427 +| epoch 8 | 679/ 2800 batches | train loss 0.3334551 +| epoch 8 | 683/ 2800 batches | train loss 0.2945163 +| epoch 8 | 687/ 2800 batches | train loss 0.3101507 +| epoch 8 | 691/ 2800 batches | train loss 0.3694669 +| epoch 8 | 695/ 2800 batches | train loss 0.3396486 +| epoch 8 | 699/ 2800 batches | train loss 0.3639562 +| epoch 8 | 703/ 2800 batches | train loss 0.3159829 +| epoch 8 | 707/ 2800 batches | train loss 0.2847294 +| epoch 8 | 711/ 2800 batches | train loss 0.3038000 +| epoch 8 | 715/ 2800 batches | train loss 0.2864587 +| epoch 8 | 719/ 2800 batches | train loss 0.3041620 +| epoch 8 | 723/ 2800 batches | train loss 0.3180282 +| epoch 8 | 727/ 2800 batches | train loss 0.3173023 +| epoch 8 | 731/ 2800 batches | train loss 0.3514827 +| epoch 8 | 735/ 2800 batches | train loss 0.2922849 +| epoch 8 | 739/ 2800 batches | train loss 0.3155190 +| epoch 8 | 743/ 2800 batches | train loss 0.2961824 +| epoch 8 | 747/ 2800 batches | train loss 0.3003388 +| epoch 8 | 751/ 2800 batches | train loss 0.2732952 +| epoch 8 | 755/ 2800 batches | train loss 0.3569930 +| epoch 8 | 759/ 2800 batches | train loss 0.3334831 +| epoch 8 | 763/ 2800 batches | train loss 0.3182198 +| epoch 8 | 767/ 2800 batches | train loss 0.3362253 +| epoch 8 | 771/ 2800 batches | train loss 0.3126910 +| epoch 8 | 775/ 2800 batches | train loss 0.3211966 +| epoch 8 | 779/ 2800 batches | train loss 0.3838350 +| epoch 8 | 783/ 2800 batches | train loss 0.3395598 +| epoch 8 | 787/ 2800 batches | train loss 0.2856908 +| epoch 8 | 791/ 2800 batches | train loss 0.3696911 +| epoch 8 | 795/ 2800 batches | train loss 0.3164408 +| epoch 8 | 799/ 2800 batches | train loss 0.3199244 +| epoch 8 | 803/ 2800 batches | train loss 0.3451295 +| epoch 8 | 807/ 2800 batches | train loss 0.3279605 +| epoch 8 | 811/ 2800 batches | train loss 0.3549668 +| epoch 8 | 815/ 2800 batches | train loss 0.3624984 +| epoch 8 | 819/ 2800 batches | train loss 0.3691582 +| epoch 8 | 823/ 2800 batches | train loss 0.3409393 +| epoch 8 | 827/ 2800 batches | train loss 0.2917536 +| epoch 8 | 831/ 2800 batches | train loss 0.3519516 +| epoch 8 | 835/ 2800 batches | train loss 0.3409562 +| epoch 8 | 839/ 2800 batches | train loss 0.3309845 +| epoch 8 | 843/ 2800 batches | train loss 0.3097865 +| epoch 8 | 847/ 2800 batches | train loss 0.2990711 +| epoch 8 | 851/ 2800 batches | train loss 0.3006627 +| epoch 8 | 855/ 2800 batches | train loss 0.3248916 +| epoch 8 | 859/ 2800 batches | train loss 0.3090461 +| epoch 8 | 863/ 2800 batches | train loss 0.3836630 +| epoch 8 | 867/ 2800 batches | train loss 0.2759950 +| epoch 8 | 871/ 2800 batches | train loss 0.2850708 +| epoch 8 | 875/ 2800 batches | train loss 0.3649260 +| epoch 8 | 879/ 2800 batches | train loss 0.2708354 +| epoch 8 | 883/ 2800 batches | train loss 0.4418461 +| epoch 8 | 887/ 2800 batches | train loss 0.3349776 +| epoch 8 | 891/ 2800 batches | train loss 0.3063543 +| epoch 8 | 895/ 2800 batches | train loss 0.2736077 +| epoch 8 | 899/ 2800 batches | train loss 0.3138047 +| epoch 8 | 903/ 2800 batches | train loss 0.3407496 +| epoch 8 | 907/ 2800 batches | train loss 0.3522075 +| epoch 8 | 911/ 2800 batches | train loss 0.3248535 +| epoch 8 | 915/ 2800 batches | train loss 0.2815077 +| epoch 8 | 919/ 2800 batches | train loss 0.3945302 +| epoch 8 | 923/ 2800 batches | train loss 0.2940339 +| epoch 8 | 927/ 2800 batches | train loss 0.2885546 +| epoch 8 | 931/ 2800 batches | train loss 0.3416523 +| epoch 8 | 935/ 2800 batches | train loss 0.3550812 +| epoch 8 | 939/ 2800 batches | train loss 0.4469422 +| epoch 8 | 943/ 2800 batches | train loss 0.3170855 +| epoch 8 | 947/ 2800 batches | train loss 0.2986404 +| epoch 8 | 951/ 2800 batches | train loss 0.4016641 +| epoch 8 | 955/ 2800 batches | train loss 0.2772862 +| epoch 8 | 959/ 2800 batches | train loss 0.3437534 +| epoch 8 | 963/ 2800 batches | train loss 0.3420576 +| epoch 8 | 967/ 2800 batches | train loss 0.4327755 +| epoch 8 | 971/ 2800 batches | train loss 0.3369165 +| epoch 8 | 975/ 2800 batches | train loss 0.3673226 +| epoch 8 | 979/ 2800 batches | train loss 0.3935516 +| epoch 8 | 983/ 2800 batches | train loss 0.3213627 +| epoch 8 | 987/ 2800 batches | train loss 0.2936587 +| epoch 8 | 991/ 2800 batches | train loss 0.3762641 +| epoch 8 | 995/ 2800 batches | train loss 0.1252978 +| epoch 8 | 999/ 2800 batches | train loss 0.3136228 +| epoch 8 | 1003/ 2800 batches | train loss 0.3558203 +| epoch 8 | 1007/ 2800 batches | train loss 0.3516163 +| epoch 8 | 1011/ 2800 batches | train loss 0.2819038 +| epoch 8 | 1015/ 2800 batches | train loss 0.3526340 +| epoch 8 | 1019/ 2800 batches | train loss 0.2349727 +| epoch 8 | 1023/ 2800 batches | train loss 0.3427012 +| epoch 8 | 1027/ 2800 batches | train loss 0.2701744 +| epoch 8 | 1031/ 2800 batches | train loss 0.3499861 +| epoch 8 | 1035/ 2800 batches | train loss 0.3459699 +| epoch 8 | 1039/ 2800 batches | train loss 0.3263167 +| epoch 8 | 1043/ 2800 batches | train loss 0.3857492 +| epoch 8 | 1047/ 2800 batches | train loss 0.1300410 +| epoch 8 | 1051/ 2800 batches | train loss 0.3686308 +| epoch 8 | 1055/ 2800 batches | train loss 0.3343827 +| epoch 8 | 1059/ 2800 batches | train loss 0.2741174 +| epoch 8 | 1063/ 2800 batches | train loss 0.4234708 +| epoch 8 | 1067/ 2800 batches | train loss 0.3118447 +| epoch 8 | 1071/ 2800 batches | train loss 0.2450769 +| epoch 8 | 1075/ 2800 batches | train loss 0.3588480 +| epoch 8 | 1079/ 2800 batches | train loss 0.4457151 +| epoch 8 | 1083/ 2800 batches | train loss 0.2569353 +| epoch 8 | 1087/ 2800 batches | train loss 0.3736789 +| epoch 8 | 1091/ 2800 batches | train loss 0.3642049 +| epoch 8 | 1095/ 2800 batches | train loss 0.2526192 +| epoch 8 | 1099/ 2800 batches | train loss 0.2769311 +| epoch 8 | 1103/ 2800 batches | train loss 0.3640654 +| epoch 8 | 1107/ 2800 batches | train loss 0.3744933 +| epoch 8 | 1111/ 2800 batches | train loss 0.3028875 +| epoch 8 | 1115/ 2800 batches | train loss 0.3006902 +| epoch 8 | 1119/ 2800 batches | train loss 0.3684945 +| epoch 8 | 1123/ 2800 batches | train loss 0.3999600 +| epoch 8 | 1127/ 2800 batches | train loss 0.3695683 +| epoch 8 | 1131/ 2800 batches | train loss 0.3473157 +| epoch 8 | 1135/ 2800 batches | train loss 0.3093559 +| epoch 8 | 1139/ 2800 batches | train loss 0.3199522 +| epoch 8 | 1143/ 2800 batches | train loss 0.3602773 +| epoch 8 | 1147/ 2800 batches | train loss 0.4063725 +| epoch 8 | 1151/ 2800 batches | train loss 0.3445408 +| epoch 8 | 1155/ 2800 batches | train loss 0.3505684 +| epoch 8 | 1159/ 2800 batches | train loss 0.2475955 +| epoch 8 | 1163/ 2800 batches | train loss 0.2687088 +| epoch 8 | 1167/ 2800 batches | train loss 0.3410389 +| epoch 8 | 1171/ 2800 batches | train loss 0.3441731 +| epoch 8 | 1175/ 2800 batches | train loss 0.3472177 +| epoch 8 | 1179/ 2800 batches | train loss 0.3389479 +| epoch 8 | 1183/ 2800 batches | train loss 0.3457015 +| epoch 8 | 1187/ 2800 batches | train loss 0.3107464 +| epoch 8 | 1191/ 2800 batches | train loss 0.3112637 +| epoch 8 | 1195/ 2800 batches | train loss 0.3332721 +| epoch 8 | 1199/ 2800 batches | train loss 0.2816663 +| epoch 8 | 1203/ 2800 batches | train loss 0.3549502 +| epoch 8 | 1207/ 2800 batches | train loss 0.3536913 +| epoch 8 | 1211/ 2800 batches | train loss 0.2904769 +| epoch 8 | 1215/ 2800 batches | train loss 0.2904401 +| epoch 8 | 1219/ 2800 batches | train loss 0.3253584 +| epoch 8 | 1223/ 2800 batches | train loss 0.3328519 +| epoch 8 | 1227/ 2800 batches | train loss 0.3436449 +| epoch 8 | 1231/ 2800 batches | train loss 0.3278950 +| epoch 8 | 1235/ 2800 batches | train loss 0.2739334 +| epoch 8 | 1239/ 2800 batches | train loss 0.3207582 +| epoch 8 | 1243/ 2800 batches | train loss 0.4134808 +| epoch 8 | 1247/ 2800 batches | train loss 0.3123137 +| epoch 8 | 1251/ 2800 batches | train loss 0.3002194 +| epoch 8 | 1255/ 2800 batches | train loss 0.2991992 +| epoch 8 | 1259/ 2800 batches | train loss 0.3105381 +| epoch 8 | 1263/ 2800 batches | train loss 0.3384854 +| epoch 8 | 1267/ 2800 batches | train loss 0.3675980 +| epoch 8 | 1271/ 2800 batches | train loss 0.2848434 +| epoch 8 | 1275/ 2800 batches | train loss 0.3869094 +| epoch 8 | 1279/ 2800 batches | train loss 0.2795778 +| epoch 8 | 1283/ 2800 batches | train loss 0.3317587 +| epoch 8 | 1287/ 2800 batches | train loss 0.2985148 +| epoch 8 | 1291/ 2800 batches | train loss 0.3452491 +| epoch 8 | 1295/ 2800 batches | train loss 0.3114786 +| epoch 8 | 1299/ 2800 batches | train loss 0.3584496 +| epoch 8 | 1303/ 2800 batches | train loss 0.3732173 +| epoch 8 | 1307/ 2800 batches | train loss 0.3412991 +| epoch 8 | 1311/ 2800 batches | train loss 0.3797405 +| epoch 8 | 1315/ 2800 batches | train loss 0.3110406 +| epoch 8 | 1319/ 2800 batches | train loss 0.3444149 +| epoch 8 | 1323/ 2800 batches | train loss 0.3416162 +| epoch 8 | 1327/ 2800 batches | train loss 0.3042913 +| epoch 8 | 1331/ 2800 batches | train loss 0.3393236 +| epoch 8 | 1335/ 2800 batches | train loss 0.4178247 +| epoch 8 | 1339/ 2800 batches | train loss 0.3128378 +| epoch 8 | 1343/ 2800 batches | train loss 0.3512678 +| epoch 8 | 1347/ 2800 batches | train loss 0.2786306 +| epoch 8 | 1351/ 2800 batches | train loss 0.3229292 +| epoch 8 | 1355/ 2800 batches | train loss 0.2697719 +| epoch 8 | 1359/ 2800 batches | train loss 0.3204882 +| epoch 8 | 1363/ 2800 batches | train loss 0.3268990 +| epoch 8 | 1367/ 2800 batches | train loss 0.3393813 +| epoch 8 | 1371/ 2800 batches | train loss 0.2523524 +| epoch 8 | 1375/ 2800 batches | train loss 0.3521839 +| epoch 8 | 1379/ 2800 batches | train loss 0.2647786 +| epoch 8 | 1383/ 2800 batches | train loss 0.3341029 +| epoch 8 | 1387/ 2800 batches | train loss 0.3046749 +| epoch 8 | 1391/ 2800 batches | train loss 0.3535383 +| epoch 8 | 1395/ 2800 batches | train loss 0.3556495 +| epoch 8 | 1399/ 2800 batches | train loss 0.3358765 +| epoch 8 | 1403/ 2800 batches | train loss 0.3271971 +| epoch 8 | 1407/ 2800 batches | train loss 0.3259662 +| epoch 8 | 1411/ 2800 batches | train loss 0.3826008 +| epoch 8 | 1415/ 2800 batches | train loss 0.3168290 +| epoch 8 | 1419/ 2800 batches | train loss 0.2712368 +| epoch 8 | 1423/ 2800 batches | train loss 0.3500922 +| epoch 8 | 1427/ 2800 batches | train loss 0.3648345 +| epoch 8 | 1431/ 2800 batches | train loss 0.3527376 +| epoch 8 | 1435/ 2800 batches | train loss 0.2894277 +| epoch 8 | 1439/ 2800 batches | train loss 0.3535284 +| epoch 8 | 1443/ 2800 batches | train loss 0.4228474 +| epoch 8 | 1447/ 2800 batches | train loss 0.3311391 +| epoch 8 | 1451/ 2800 batches | train loss 0.3906361 +| epoch 8 | 1455/ 2800 batches | train loss 0.3186199 +| epoch 8 | 1459/ 2800 batches | train loss 0.3131535 +| epoch 8 | 1463/ 2800 batches | train loss 0.2612956 +| epoch 8 | 1467/ 2800 batches | train loss 0.4076486 +| epoch 8 | 1471/ 2800 batches | train loss 0.3743741 +| epoch 8 | 1475/ 2800 batches | train loss 0.3079205 +| epoch 8 | 1479/ 2800 batches | train loss 0.3452841 +| epoch 8 | 1483/ 2800 batches | train loss 0.3396920 +| epoch 8 | 1487/ 2800 batches | train loss 0.3922377 +| epoch 8 | 1491/ 2800 batches | train loss 0.2905290 +| epoch 8 | 1495/ 2800 batches | train loss 0.3518546 +| epoch 8 | 1499/ 2800 batches | train loss 0.2637510 +| epoch 8 | 1503/ 2800 batches | train loss 0.2918468 +| epoch 8 | 1507/ 2800 batches | train loss 0.3263518 +| epoch 8 | 1511/ 2800 batches | train loss 0.3114598 +| epoch 8 | 1515/ 2800 batches | train loss 0.3764710 +| epoch 8 | 1519/ 2800 batches | train loss 0.3088861 +| epoch 8 | 1523/ 2800 batches | train loss 0.3053498 +| epoch 8 | 1527/ 2800 batches | train loss 0.3377054 +| epoch 8 | 1531/ 2800 batches | train loss 0.3235221 +| epoch 8 | 1535/ 2800 batches | train loss 0.2889608 +| epoch 8 | 1539/ 2800 batches | train loss 0.2459096 +| epoch 8 | 1543/ 2800 batches | train loss 0.3796004 +| epoch 8 | 1547/ 2800 batches | train loss 0.3488810 +| epoch 8 | 1551/ 2800 batches | train loss 0.2748757 +| epoch 8 | 1555/ 2800 batches | train loss 0.3808537 +| epoch 8 | 1559/ 2800 batches | train loss 0.3566656 +| epoch 8 | 1563/ 2800 batches | train loss 0.2984277 +| epoch 8 | 1567/ 2800 batches | train loss 0.3630397 +| epoch 8 | 1571/ 2800 batches | train loss 0.2641417 +| epoch 8 | 1575/ 2800 batches | train loss 0.2618010 +| epoch 8 | 1579/ 2800 batches | train loss 0.3391250 +| epoch 8 | 1583/ 2800 batches | train loss 0.3422520 +| epoch 8 | 1587/ 2800 batches | train loss 0.3415410 +| epoch 8 | 1591/ 2800 batches | train loss 0.3903698 +| epoch 8 | 1595/ 2800 batches | train loss 0.2624837 +| epoch 8 | 1599/ 2800 batches | train loss 0.3508593 +| epoch 8 | 1603/ 2800 batches | train loss 0.3612267 +| epoch 8 | 1607/ 2800 batches | train loss 0.4178642 +| epoch 8 | 1611/ 2800 batches | train loss 0.3096461 +| epoch 8 | 1615/ 2800 batches | train loss 0.3231727 +| epoch 8 | 1619/ 2800 batches | train loss 0.3031752 +| epoch 8 | 1623/ 2800 batches | train loss 0.3237293 +| epoch 8 | 1627/ 2800 batches | train loss 0.3162857 +| epoch 8 | 1631/ 2800 batches | train loss 0.3560316 +| epoch 8 | 1635/ 2800 batches | train loss 0.3704452 +| epoch 8 | 1639/ 2800 batches | train loss 0.2810642 +| epoch 8 | 1643/ 2800 batches | train loss 0.3881580 +| epoch 8 | 1647/ 2800 batches | train loss 0.2923135 +| epoch 8 | 1651/ 2800 batches | train loss 0.3204945 +| epoch 8 | 1655/ 2800 batches | train loss 0.2919130 +| epoch 8 | 1659/ 2800 batches | train loss 0.4015473 +| epoch 8 | 1663/ 2800 batches | train loss 0.3194863 +| epoch 8 | 1667/ 2800 batches | train loss 0.3532258 +| epoch 8 | 1671/ 2800 batches | train loss 0.3005724 +| epoch 8 | 1675/ 2800 batches | train loss 0.3643332 +| epoch 8 | 1679/ 2800 batches | train loss 0.2859822 +| epoch 8 | 1683/ 2800 batches | train loss 0.3139255 +| epoch 8 | 1687/ 2800 batches | train loss 0.3119759 +| epoch 8 | 1691/ 2800 batches | train loss 0.2906341 +| epoch 8 | 1695/ 2800 batches | train loss 0.3731361 +| epoch 8 | 1699/ 2800 batches | train loss 0.3048209 +| epoch 8 | 1703/ 2800 batches | train loss 0.3167249 +| epoch 8 | 1707/ 2800 batches | train loss 0.3677737 +| epoch 8 | 1711/ 2800 batches | train loss 0.2808760 +| epoch 8 | 1715/ 2800 batches | train loss 0.3484478 +| epoch 8 | 1719/ 2800 batches | train loss 0.3016372 +| epoch 8 | 1723/ 2800 batches | train loss 0.3269115 +| epoch 8 | 1727/ 2800 batches | train loss 0.3114192 +| epoch 8 | 1731/ 2800 batches | train loss 0.3475028 +| epoch 8 | 1735/ 2800 batches | train loss 0.3495471 +| epoch 8 | 1739/ 2800 batches | train loss 0.3115157 +| epoch 8 | 1743/ 2800 batches | train loss 0.3663539 +| epoch 8 | 1747/ 2800 batches | train loss 0.3606988 +| epoch 8 | 1751/ 2800 batches | train loss 0.2587336 +| epoch 8 | 1755/ 2800 batches | train loss 0.3511936 +| epoch 8 | 1759/ 2800 batches | train loss 0.3244107 +| epoch 8 | 1763/ 2800 batches | train loss 0.2703069 +| epoch 8 | 1767/ 2800 batches | train loss 0.3410591 +| epoch 8 | 1771/ 2800 batches | train loss 0.3499656 +| epoch 8 | 1775/ 2800 batches | train loss 0.3509939 +| epoch 8 | 1779/ 2800 batches | train loss 0.3409986 +| epoch 8 | 1783/ 2800 batches | train loss 0.3140088 +| epoch 8 | 1787/ 2800 batches | train loss 0.3699304 +| epoch 8 | 1791/ 2800 batches | train loss 0.3166741 +| epoch 8 | 1795/ 2800 batches | train loss 0.3117536 +| epoch 8 | 1799/ 2800 batches | train loss 0.3609630 +| epoch 8 | 1803/ 2800 batches | train loss 0.2830946 +| epoch 8 | 1807/ 2800 batches | train loss 0.3681023 +| epoch 8 | 1811/ 2800 batches | train loss 0.3655756 +| epoch 8 | 1815/ 2800 batches | train loss 0.3186969 +| epoch 8 | 1819/ 2800 batches | train loss 0.2840692 +| epoch 8 | 1823/ 2800 batches | train loss 0.3571136 +| epoch 8 | 1827/ 2800 batches | train loss 0.4026830 +| epoch 8 | 1831/ 2800 batches | train loss 0.3359315 +| epoch 8 | 1835/ 2800 batches | train loss 0.3311328 +| epoch 8 | 1839/ 2800 batches | train loss 0.2332442 +| epoch 8 | 1843/ 2800 batches | train loss 0.3508492 +| epoch 8 | 1847/ 2800 batches | train loss 0.3711058 +| epoch 8 | 1851/ 2800 batches | train loss 0.2840142 +| epoch 8 | 1855/ 2800 batches | train loss 0.3088339 +| epoch 8 | 1859/ 2800 batches | train loss 0.2955447 +| epoch 8 | 1863/ 2800 batches | train loss 0.3473493 +| epoch 8 | 1867/ 2800 batches | train loss 0.3359345 +| epoch 8 | 1871/ 2800 batches | train loss 0.3326746 +| epoch 8 | 1875/ 2800 batches | train loss 0.4569127 +| epoch 8 | 1879/ 2800 batches | train loss 0.2733728 +| epoch 8 | 1883/ 2800 batches | train loss 0.2956262 +| epoch 8 | 1887/ 2800 batches | train loss 0.3212796 +| epoch 8 | 1891/ 2800 batches | train loss 0.3682882 +| epoch 8 | 1895/ 2800 batches | train loss 0.3340008 +| epoch 8 | 1899/ 2800 batches | train loss 0.2928912 +| epoch 8 | 1903/ 2800 batches | train loss 0.3351670 +| epoch 8 | 1907/ 2800 batches | train loss 0.3465507 +| epoch 8 | 1911/ 2800 batches | train loss 0.3746949 +| epoch 8 | 1915/ 2800 batches | train loss 0.2980517 +| epoch 8 | 1919/ 2800 batches | train loss 0.3515745 +| epoch 8 | 1923/ 2800 batches | train loss 0.3414910 +| epoch 8 | 1927/ 2800 batches | train loss 0.2993252 +| epoch 8 | 1931/ 2800 batches | train loss 0.3740425 +| epoch 8 | 1935/ 2800 batches | train loss 0.3335825 +| epoch 8 | 1939/ 2800 batches | train loss 0.3082820 +| epoch 8 | 1943/ 2800 batches | train loss 0.3447025 +| epoch 8 | 1947/ 2800 batches | train loss 0.3434263 +| epoch 8 | 1951/ 2800 batches | train loss 0.3035072 +| epoch 8 | 1955/ 2800 batches | train loss 0.3694042 +| epoch 8 | 1959/ 2800 batches | train loss 0.3498694 +| epoch 8 | 1963/ 2800 batches | train loss 0.3202620 +| epoch 8 | 1967/ 2800 batches | train loss 0.3101195 +| epoch 8 | 1971/ 2800 batches | train loss 0.3338183 +| epoch 8 | 1975/ 2800 batches | train loss 0.2679960 +| epoch 8 | 1979/ 2800 batches | train loss 0.2859369 +| epoch 8 | 1983/ 2800 batches | train loss 0.3301531 +| epoch 8 | 1987/ 2800 batches | train loss 0.2914440 +| epoch 8 | 1991/ 2800 batches | train loss 0.3367746 +| epoch 8 | 1995/ 2800 batches | train loss 0.3540283 +| epoch 8 | 1999/ 2800 batches | train loss 0.3532016 +| epoch 8 | 2003/ 2800 batches | train loss 0.3747510 +| epoch 8 | 2007/ 2800 batches | train loss 0.3214120 +| epoch 8 | 2011/ 2800 batches | train loss 0.3554079 +| epoch 8 | 2015/ 2800 batches | train loss 0.3482521 +| epoch 8 | 2019/ 2800 batches | train loss 0.3453603 +| epoch 8 | 2023/ 2800 batches | train loss 0.3022773 +| epoch 8 | 2027/ 2800 batches | train loss 0.3089226 +| epoch 8 | 2031/ 2800 batches | train loss 0.3487529 +| epoch 8 | 2035/ 2800 batches | train loss 0.3429189 +| epoch 8 | 2039/ 2800 batches | train loss 0.3609641 +| epoch 8 | 2043/ 2800 batches | train loss 0.3339585 +| epoch 8 | 2047/ 2800 batches | train loss 0.3493437 +| epoch 8 | 2051/ 2800 batches | train loss 0.3087951 +| epoch 8 | 2055/ 2800 batches | train loss 0.3412160 +| epoch 8 | 2059/ 2800 batches | train loss 0.2936447 +| epoch 8 | 2063/ 2800 batches | train loss 0.3434885 +| epoch 8 | 2067/ 2800 batches | train loss 0.2878652 +| epoch 8 | 2071/ 2800 batches | train loss 0.3159986 +| epoch 8 | 2075/ 2800 batches | train loss 0.3241186 +| epoch 8 | 2079/ 2800 batches | train loss 0.4085459 +| epoch 8 | 2083/ 2800 batches | train loss 0.2795243 +| epoch 8 | 2087/ 2800 batches | train loss 0.3297751 +| epoch 8 | 2091/ 2800 batches | train loss 0.1243975 +| epoch 8 | 2095/ 2800 batches | train loss 0.2443276 +| epoch 8 | 2099/ 2800 batches | train loss 0.3127971 +| epoch 8 | 2103/ 2800 batches | train loss 0.3407458 +| epoch 8 | 2107/ 2800 batches | train loss 0.3307759 +| epoch 8 | 2111/ 2800 batches | train loss 0.3211545 +| epoch 8 | 2115/ 2800 batches | train loss 0.3062631 +| epoch 8 | 2119/ 2800 batches | train loss 0.3351579 +| epoch 8 | 2123/ 2800 batches | train loss 0.3438802 +| epoch 8 | 2127/ 2800 batches | train loss 0.2758720 +| epoch 8 | 2131/ 2800 batches | train loss 0.3279059 +| epoch 8 | 2135/ 2800 batches | train loss 0.2792102 +| epoch 8 | 2139/ 2800 batches | train loss 0.3189924 +| epoch 8 | 2143/ 2800 batches | train loss 0.3426241 +| epoch 8 | 2147/ 2800 batches | train loss 0.2915496 +| epoch 8 | 2151/ 2800 batches | train loss 0.2806140 +| epoch 8 | 2155/ 2800 batches | train loss 0.4226146 +| epoch 8 | 2159/ 2800 batches | train loss 0.3573718 +| epoch 8 | 2163/ 2800 batches | train loss 0.3146971 +| epoch 8 | 2167/ 2800 batches | train loss 0.3459713 +| epoch 8 | 2171/ 2800 batches | train loss 0.2699227 +| epoch 8 | 2175/ 2800 batches | train loss 0.3946170 +| epoch 8 | 2179/ 2800 batches | train loss 0.2996542 +| epoch 8 | 2183/ 2800 batches | train loss 0.2931061 +| epoch 8 | 2187/ 2800 batches | train loss 0.3205568 +| epoch 8 | 2191/ 2800 batches | train loss 0.3155321 +| epoch 8 | 2195/ 2800 batches | train loss 0.3668167 +| epoch 8 | 2199/ 2800 batches | train loss 0.3122390 +| epoch 8 | 2203/ 2800 batches | train loss 0.3274621 +| epoch 8 | 2207/ 2800 batches | train loss 0.3629118 +| epoch 8 | 2211/ 2800 batches | train loss 0.3732410 +| epoch 8 | 2215/ 2800 batches | train loss 0.3114424 +| epoch 8 | 2219/ 2800 batches | train loss 0.2921661 +| epoch 8 | 2223/ 2800 batches | train loss 0.3380228 +| epoch 8 | 2227/ 2800 batches | train loss 0.3654155 +| epoch 8 | 2231/ 2800 batches | train loss 0.3616673 +| epoch 8 | 2235/ 2800 batches | train loss 0.3043579 +| epoch 8 | 2239/ 2800 batches | train loss 0.2972625 +| epoch 8 | 2243/ 2800 batches | train loss 0.3533530 +| epoch 8 | 2247/ 2800 batches | train loss 0.3034130 +| epoch 8 | 2251/ 2800 batches | train loss 0.3477103 +| epoch 8 | 2255/ 2800 batches | train loss 0.2991144 +| epoch 8 | 2259/ 2800 batches | train loss 0.2924139 +| epoch 8 | 2263/ 2800 batches | train loss 0.4342728 +| epoch 8 | 2267/ 2800 batches | train loss 0.3601270 +| epoch 8 | 2271/ 2800 batches | train loss 0.3291228 +| epoch 8 | 2275/ 2800 batches | train loss 0.3432029 +| epoch 8 | 2279/ 2800 batches | train loss 0.3327447 +| epoch 8 | 2283/ 2800 batches | train loss 0.3400780 +| epoch 8 | 2287/ 2800 batches | train loss 0.3985092 +| epoch 8 | 2291/ 2800 batches | train loss 0.2730092 +| epoch 8 | 2295/ 2800 batches | train loss 0.3697318 +| epoch 8 | 2299/ 2800 batches | train loss 0.3314121 +| epoch 8 | 2303/ 2800 batches | train loss 0.3131917 +| epoch 8 | 2307/ 2800 batches | train loss 0.3057129 +| epoch 8 | 2311/ 2800 batches | train loss 0.3130890 +| epoch 8 | 2315/ 2800 batches | train loss 0.2765270 +| epoch 8 | 2319/ 2800 batches | train loss 0.3231960 +| epoch 8 | 2323/ 2800 batches | train loss 0.3331629 +| epoch 8 | 2327/ 2800 batches | train loss 0.3094705 +| epoch 8 | 2331/ 2800 batches | train loss 0.2747855 +| epoch 8 | 2335/ 2800 batches | train loss 0.3481286 +| epoch 8 | 2339/ 2800 batches | train loss 0.3276002 +| epoch 8 | 2343/ 2800 batches | train loss 0.3927181 +| epoch 8 | 2347/ 2800 batches | train loss 0.4069520 +| epoch 8 | 2351/ 2800 batches | train loss 0.3803270 +| epoch 8 | 2355/ 2800 batches | train loss 0.3972599 +| epoch 8 | 2359/ 2800 batches | train loss 0.2598484 +| epoch 8 | 2363/ 2800 batches | train loss 0.3999953 +| epoch 8 | 2367/ 2800 batches | train loss 0.3369508 +| epoch 8 | 2371/ 2800 batches | train loss 0.2817058 +| epoch 8 | 2375/ 2800 batches | train loss 0.3404898 +| epoch 8 | 2379/ 2800 batches | train loss 0.3448941 +| epoch 8 | 2383/ 2800 batches | train loss 0.2838233 +| epoch 8 | 2387/ 2800 batches | train loss 0.3415776 +| epoch 8 | 2391/ 2800 batches | train loss 0.2724029 +| epoch 8 | 2395/ 2800 batches | train loss 0.2493030 +| epoch 8 | 2399/ 2800 batches | train loss 0.3478157 +| epoch 8 | 2403/ 2800 batches | train loss 0.3511841 +| epoch 8 | 2407/ 2800 batches | train loss 0.3429150 +| epoch 8 | 2411/ 2800 batches | train loss 0.3273736 +| epoch 8 | 2415/ 2800 batches | train loss 0.3333684 +| epoch 8 | 2419/ 2800 batches | train loss 0.3051087 +| epoch 8 | 2423/ 2800 batches | train loss 0.2694541 +| epoch 8 | 2427/ 2800 batches | train loss 0.3199456 +| epoch 8 | 2431/ 2800 batches | train loss 0.3685253 +| epoch 8 | 2435/ 2800 batches | train loss 0.3578671 +| epoch 8 | 2439/ 2800 batches | train loss 0.3295883 +| epoch 8 | 2443/ 2800 batches | train loss 0.3209806 +| epoch 8 | 2447/ 2800 batches | train loss 0.3718402 +| epoch 8 | 2451/ 2800 batches | train loss 0.3855348 +| epoch 8 | 2455/ 2800 batches | train loss 0.3514818 +| epoch 8 | 2459/ 2800 batches | train loss 0.3171403 +| epoch 8 | 2463/ 2800 batches | train loss 0.3318031 +| epoch 8 | 2467/ 2800 batches | train loss 0.3535326 +| epoch 8 | 2471/ 2800 batches | train loss 0.2219431 +| epoch 8 | 2475/ 2800 batches | train loss 0.3654359 +| epoch 8 | 2479/ 2800 batches | train loss 0.3502426 +| epoch 8 | 2483/ 2800 batches | train loss 0.3853880 +| epoch 8 | 2487/ 2800 batches | train loss 0.2636304 +| epoch 8 | 2491/ 2800 batches | train loss 0.3393236 +| epoch 8 | 2495/ 2800 batches | train loss 0.3558141 +| epoch 8 | 2499/ 2800 batches | train loss 0.3138211 +| epoch 8 | 2503/ 2800 batches | train loss 0.3271841 +| epoch 8 | 2507/ 2800 batches | train loss 0.3626758 +| epoch 8 | 2511/ 2800 batches | train loss 0.2879394 +| epoch 8 | 2515/ 2800 batches | train loss 0.3312777 +| epoch 8 | 2519/ 2800 batches | train loss 0.3615299 +| epoch 8 | 2523/ 2800 batches | train loss 0.4221072 +| epoch 8 | 2527/ 2800 batches | train loss 0.3480104 +| epoch 8 | 2531/ 2800 batches | train loss 0.3857745 +| epoch 8 | 2535/ 2800 batches | train loss 0.3148419 +| epoch 8 | 2539/ 2800 batches | train loss 0.3933326 +| epoch 8 | 2543/ 2800 batches | train loss 0.3056165 +| epoch 8 | 2547/ 2800 batches | train loss 0.3486028 +| epoch 8 | 2551/ 2800 batches | train loss 0.3161132 +| epoch 8 | 2555/ 2800 batches | train loss 0.2839572 +| epoch 8 | 2559/ 2800 batches | train loss 0.3653885 +| epoch 8 | 2563/ 2800 batches | train loss 0.3425800 +| epoch 8 | 2567/ 2800 batches | train loss 0.3345022 +| epoch 8 | 2571/ 2800 batches | train loss 0.3356551 +| epoch 8 | 2575/ 2800 batches | train loss 0.3190837 +| epoch 8 | 2579/ 2800 batches | train loss 0.2342902 +| epoch 8 | 2583/ 2800 batches | train loss 0.3738931 +| epoch 8 | 2587/ 2800 batches | train loss 0.3993567 +| epoch 8 | 2591/ 2800 batches | train loss 0.2911220 +| epoch 8 | 2595/ 2800 batches | train loss 0.3447379 +| epoch 8 | 2599/ 2800 batches | train loss 0.2768741 +| epoch 8 | 2603/ 2800 batches | train loss 0.3376251 +| epoch 8 | 2607/ 2800 batches | train loss 0.4144413 +| epoch 8 | 2611/ 2800 batches | train loss 0.3075520 +| epoch 8 | 2615/ 2800 batches | train loss 0.2583296 +| epoch 8 | 2619/ 2800 batches | train loss 0.3640696 +| epoch 8 | 2623/ 2800 batches | train loss 0.3425593 +| epoch 8 | 2627/ 2800 batches | train loss 0.3614115 +| epoch 8 | 2631/ 2800 batches | train loss 0.2812214 +| epoch 8 | 2635/ 2800 batches | train loss 0.3299029 +| epoch 8 | 2639/ 2800 batches | train loss 0.2902356 +| epoch 8 | 2643/ 2800 batches | train loss 0.3775806 +| epoch 8 | 2647/ 2800 batches | train loss 0.3086361 +| epoch 8 | 2651/ 2800 batches | train loss 0.3067384 +| epoch 8 | 2655/ 2800 batches | train loss 0.3578240 +| epoch 8 | 2659/ 2800 batches | train loss 0.3626152 +| epoch 8 | 2663/ 2800 batches | train loss 0.3703219 +| epoch 8 | 2667/ 2800 batches | train loss 0.3563026 +| epoch 8 | 2671/ 2800 batches | train loss 0.3570373 +| epoch 8 | 2675/ 2800 batches | train loss 0.3321386 +| epoch 8 | 2679/ 2800 batches | train loss 0.3641763 +| epoch 8 | 2683/ 2800 batches | train loss 0.3743261 +| epoch 8 | 2687/ 2800 batches | train loss 0.3114031 +| epoch 8 | 2691/ 2800 batches | train loss 0.2461196 +| epoch 8 | 2695/ 2800 batches | train loss 0.3439526 +| epoch 8 | 2699/ 2800 batches | train loss 0.3077354 +| epoch 8 | 2703/ 2800 batches | train loss 0.3279308 +| epoch 8 | 2707/ 2800 batches | train loss 0.3041511 +| epoch 8 | 2711/ 2800 batches | train loss 0.3795912 +| epoch 8 | 2715/ 2800 batches | train loss 0.3512741 +| epoch 8 | 2719/ 2800 batches | train loss 0.2853166 +| epoch 8 | 2723/ 2800 batches | train loss 0.3136500 +| epoch 8 | 2727/ 2800 batches | train loss 0.3503476 +| epoch 8 | 2731/ 2800 batches | train loss 0.3190166 +| epoch 8 | 2735/ 2800 batches | train loss 0.3335922 +| epoch 8 | 2739/ 2800 batches | train loss 0.3242907 +| epoch 8 | 2743/ 2800 batches | train loss 0.3010023 +| epoch 8 | 2747/ 2800 batches | train loss 0.4109325 +| epoch 8 | 2751/ 2800 batches | train loss 0.3322293 +| epoch 8 | 2755/ 2800 batches | train loss 0.4042184 +| epoch 8 | 2759/ 2800 batches | train loss 0.3444617 +| epoch 8 | 2763/ 2800 batches | train loss 0.3242766 +| epoch 8 | 2767/ 2800 batches | train loss 0.2954276 +| epoch 8 | 2771/ 2800 batches | train loss 0.3053329 +| epoch 8 | 2775/ 2800 batches | train loss 0.3118627 +| epoch 8 | 2779/ 2800 batches | train loss 0.3832216 +| epoch 8 | 2783/ 2800 batches | train loss 0.3883051 +| epoch 8 | 2787/ 2800 batches | train loss 0.3108369 +| epoch 8 | 2791/ 2800 batches | train loss 0.3170072 +| epoch 8 | 2795/ 2800 batches | train loss 0.3789142 +| epoch 8 | 2799/ 2800 batches | train loss 0.3356530 +-------------------------------------------------------------------------------- +| epoch 8 | 3/ 2800 batches | test loss 0.5078281 +| epoch 8 | 7/ 2800 batches | test loss 0.3662616 +| epoch 8 | 11/ 2800 batches | test loss 0.4439098 +| epoch 8 | 15/ 2800 batches | test loss 0.6258856 +| epoch 8 | 19/ 2800 batches | test loss 0.5701656 +| epoch 8 | 23/ 2800 batches | test loss 0.6933634 +| epoch 8 | 27/ 2800 batches | test loss 0.5019227 +| epoch 8 | 31/ 2800 batches | test loss 0.4130674 +| epoch 8 | 35/ 2800 batches | test loss 0.5887611 +| epoch 8 | 39/ 2800 batches | test loss 0.6817564 +| epoch 8 | 43/ 2800 batches | test loss 0.5215200 +| epoch 8 | 47/ 2800 batches | test loss 0.5599293 +| epoch 8 | 51/ 2800 batches | test loss 0.4224497 +| epoch 8 | 55/ 2800 batches | test loss 0.5574170 +| epoch 8 | 59/ 2800 batches | test loss 0.5968378 +| epoch 8 | 63/ 2800 batches | test loss 0.4348431 +| epoch 8 | 67/ 2800 batches | test loss 0.4367338 +| epoch 8 | 71/ 2800 batches | test loss 0.6724957 +| epoch 8 | 75/ 2800 batches | test loss 0.4063399 +| epoch 8 | 79/ 2800 batches | test loss 0.4718919 +| epoch 8 | 83/ 2800 batches | test loss 0.6536629 +| epoch 8 | 87/ 2800 batches | test loss 0.6441940 +| epoch 8 | 91/ 2800 batches | test loss 0.5328766 +| epoch 8 | 95/ 2800 batches | test loss 0.5792619 +| epoch 8 | 99/ 2800 batches | test loss 0.4905722 +| epoch 8 | 103/ 2800 batches | test loss 0.3945376 +| epoch 8 | 107/ 2800 batches | test loss 0.4026013 +| epoch 8 | 111/ 2800 batches | test loss 0.4392682 +| epoch 8 | 115/ 2800 batches | test loss 0.4869053 +| epoch 8 | 119/ 2800 batches | test loss 0.5777332 +| epoch 8 | 123/ 2800 batches | test loss 0.5083752 +| epoch 8 | 127/ 2800 batches | test loss 0.3922920 +| epoch 8 | 131/ 2800 batches | test loss 0.5636764 +| epoch 8 | 135/ 2800 batches | test loss 0.4293064 +| epoch 8 | 139/ 2800 batches | test loss 0.6595802 +| epoch 8 | 143/ 2800 batches | test loss 0.6759303 +| epoch 8 | 147/ 2800 batches | test loss 0.5885590 +| epoch 8 | 151/ 2800 batches | test loss 0.4546042 +| epoch 8 | 155/ 2800 batches | test loss 0.5513375 +| epoch 8 | 159/ 2800 batches | test loss 0.5052087 +| epoch 8 | 163/ 2800 batches | test loss 0.5101970 +| epoch 8 | 167/ 2800 batches | test loss 0.3986976 +| epoch 8 | 171/ 2800 batches | test loss 0.4646076 +| epoch 8 | 175/ 2800 batches | test loss 0.4399750 +| epoch 8 | 179/ 2800 batches | test loss 0.6124792 +| epoch 8 | 183/ 2800 batches | test loss 0.4750535 +| epoch 8 | 187/ 2800 batches | test loss 0.5775960 +| epoch 8 | 191/ 2800 batches | test loss 0.6038942 +| epoch 8 | 195/ 2800 batches | test loss 0.4426790 +| epoch 8 | 199/ 2800 batches | test loss 0.4305543 +| epoch 8 | 203/ 2800 batches | test loss 0.8201780 +| epoch 8 | 207/ 2800 batches | test loss 0.5579250 +| epoch 8 | 211/ 2800 batches | test loss 0.6585153 +| epoch 8 | 215/ 2800 batches | test loss 0.7860010 +| epoch 8 | 219/ 2800 batches | test loss 0.4739953 +| epoch 8 | 223/ 2800 batches | test loss 0.5988336 +| epoch 8 | 227/ 2800 batches | test loss 0.7236719 +| epoch 8 | 231/ 2800 batches | test loss 0.6494073 +| epoch 8 | 235/ 2800 batches | test loss 0.7235557 +| epoch 8 | 239/ 2800 batches | test loss 0.4378499 +| epoch 8 | 243/ 2800 batches | test loss 0.4725201 +| epoch 8 | 247/ 2800 batches | test loss 0.4391958 +| epoch 8 | 251/ 2800 batches | test loss 0.4409592 +| epoch 8 | 255/ 2800 batches | test loss 0.5956452 +| epoch 8 | 259/ 2800 batches | test loss 0.6704308 +| epoch 8 | 263/ 2800 batches | test loss 0.5528213 +| epoch 8 | 267/ 2800 batches | test loss 0.4588328 +| epoch 8 | 271/ 2800 batches | test loss 0.5664635 +| epoch 8 | 275/ 2800 batches | test loss 0.4168571 +| epoch 8 | 279/ 2800 batches | test loss 0.4570540 +| epoch 8 | 283/ 2800 batches | test loss 0.6449987 +| epoch 8 | 287/ 2800 batches | test loss 0.4046680 +| epoch 8 | 291/ 2800 batches | test loss 0.5040466 +| epoch 8 | 295/ 2800 batches | test loss 0.5507567 +| epoch 8 | 299/ 2800 batches | test loss 0.4064604 +| epoch 8 | 303/ 2800 batches | test loss 0.6428521 +| epoch 8 | 307/ 2800 batches | test loss 0.5843657 +| epoch 8 | 311/ 2800 batches | test loss 0.5648920 +| epoch 8 | 315/ 2800 batches | test loss 0.4779511 +| epoch 8 | 319/ 2800 batches | test loss 0.5132051 +| epoch 8 | 323/ 2800 batches | test loss 0.5667001 +| epoch 8 | 327/ 2800 batches | test loss 0.6269759 +| epoch 8 | 331/ 2800 batches | test loss 0.5465424 +| epoch 8 | 335/ 2800 batches | test loss 0.5868372 +| epoch 8 | 339/ 2800 batches | test loss 0.3888116 +| epoch 8 | 343/ 2800 batches | test loss 0.4959593 +| epoch 8 | 347/ 2800 batches | test loss 0.5100112 +| epoch 8 | 351/ 2800 batches | test loss 0.5037690 +| epoch 8 | 355/ 2800 batches | test loss 0.6052505 +| epoch 8 | 359/ 2800 batches | test loss 0.4163720 +| epoch 8 | 363/ 2800 batches | test loss 0.6452839 +| epoch 8 | 367/ 2800 batches | test loss 0.5073149 +| epoch 8 | 371/ 2800 batches | test loss 0.5693252 +| epoch 8 | 375/ 2800 batches | test loss 0.4544371 +| epoch 8 | 379/ 2800 batches | test loss 0.4037821 +| epoch 8 | 383/ 2800 batches | test loss 0.5397879 +| epoch 8 | 387/ 2800 batches | test loss 0.5507130 +| epoch 8 | 391/ 2800 batches | test loss 0.5419597 +| epoch 8 | 395/ 2800 batches | test loss 0.4826837 +| epoch 8 | 399/ 2800 batches | test loss 0.5514777 +| epoch 8 | 403/ 2800 batches | test loss 0.4877657 +| epoch 8 | 407/ 2800 batches | test loss 0.6281025 +| epoch 8 | 411/ 2800 batches | test loss 0.5503794 +| epoch 8 | 415/ 2800 batches | test loss 0.6150907 +| epoch 8 | 419/ 2800 batches | test loss 0.5493089 +| epoch 8 | 423/ 2800 batches | test loss 0.4946041 +| epoch 8 | 427/ 2800 batches | test loss 0.4856763 +| epoch 8 | 431/ 2800 batches | test loss 0.6183240 +| epoch 8 | 435/ 2800 batches | test loss 0.6942880 +| epoch 8 | 439/ 2800 batches | test loss 0.5102686 +| epoch 8 | 443/ 2800 batches | test loss 0.8708238 +| epoch 8 | 447/ 2800 batches | test loss 0.5792772 +| epoch 8 | 451/ 2800 batches | test loss 0.5027052 +| epoch 8 | 455/ 2800 batches | test loss 0.6381123 +| epoch 8 | 459/ 2800 batches | test loss 0.7255287 +| epoch 8 | 463/ 2800 batches | test loss 0.6077421 +| epoch 8 | 467/ 2800 batches | test loss 0.5076811 +| epoch 8 | 471/ 2800 batches | test loss 0.5133624 +| epoch 8 | 475/ 2800 batches | test loss 0.5668998 +| epoch 8 | 479/ 2800 batches | test loss 0.4475265 +| epoch 8 | 483/ 2800 batches | test loss 0.4559371 +| epoch 8 | 487/ 2800 batches | test loss 0.5333345 +| epoch 8 | 491/ 2800 batches | test loss 0.6533030 +| epoch 8 | 495/ 2800 batches | test loss 0.5213386 +| epoch 8 | 499/ 2800 batches | test loss 0.5122900 +| epoch 8 | 503/ 2800 batches | test loss 0.7135027 +| epoch 8 | 507/ 2800 batches | test loss 0.4879549 +| epoch 8 | 511/ 2800 batches | test loss 0.6794543 +| epoch 8 | 515/ 2800 batches | test loss 0.5817182 +| epoch 8 | 519/ 2800 batches | test loss 0.4632114 +| epoch 8 | 523/ 2800 batches | test loss 0.5043346 +| epoch 8 | 527/ 2800 batches | test loss 0.4413856 +| epoch 8 | 531/ 2800 batches | test loss 0.4377880 +| epoch 8 | 535/ 2800 batches | test loss 0.6467820 +| epoch 8 | 539/ 2800 batches | test loss 0.4266030 +| epoch 8 | 543/ 2800 batches | test loss 0.4798656 +| epoch 8 | 547/ 2800 batches | test loss 0.4694819 +| epoch 8 | 551/ 2800 batches | test loss 0.4654121 +| epoch 8 | 555/ 2800 batches | test loss 0.6662662 +| epoch 8 | 559/ 2800 batches | test loss 0.4686591 +| epoch 8 | 563/ 2800 batches | test loss 0.6023666 +| epoch 8 | 567/ 2800 batches | test loss 0.5639658 +| epoch 8 | 571/ 2800 batches | test loss 0.3644849 +| epoch 8 | 575/ 2800 batches | test loss 0.4489688 +| epoch 8 | 579/ 2800 batches | test loss 0.6676696 +| epoch 8 | 583/ 2800 batches | test loss 0.5906693 +| epoch 8 | 587/ 2800 batches | test loss 0.4818341 +| epoch 8 | 591/ 2800 batches | test loss 0.4522473 +| epoch 8 | 595/ 2800 batches | test loss 0.5151606 +| epoch 8 | 599/ 2800 batches | test loss 0.4316722 +| epoch 8 | 603/ 2800 batches | test loss 0.6234999 +| epoch 8 | 607/ 2800 batches | test loss 0.5448573 +| epoch 8 | 611/ 2800 batches | test loss 0.6814144 +| epoch 8 | 615/ 2800 batches | test loss 0.5109053 +| epoch 8 | 619/ 2800 batches | test loss 0.4957161 +| epoch 8 | 623/ 2800 batches | test loss 0.7089619 +| epoch 8 | 627/ 2800 batches | test loss 0.6865315 +| epoch 8 | 631/ 2800 batches | test loss 0.4494183 +| epoch 8 | 635/ 2800 batches | test loss 0.7168102 +| epoch 8 | 639/ 2800 batches | test loss 0.5199395 +| epoch 8 | 643/ 2800 batches | test loss 0.5601706 +| epoch 8 | 647/ 2800 batches | test loss 0.4456800 +| epoch 8 | 651/ 2800 batches | test loss 0.6815026 +| epoch 8 | 655/ 2800 batches | test loss 0.4791888 +| epoch 8 | 659/ 2800 batches | test loss 0.4706629 +| epoch 8 | 663/ 2800 batches | test loss 0.5047003 +| epoch 8 | 667/ 2800 batches | test loss 0.4766999 +| epoch 8 | 671/ 2800 batches | test loss 0.5277734 +| epoch 8 | 675/ 2800 batches | test loss 0.5214510 +| epoch 8 | 679/ 2800 batches | test loss 0.5291889 +| epoch 8 | 683/ 2800 batches | test loss 0.5214841 +| epoch 8 | 687/ 2800 batches | test loss 0.4086252 +| epoch 8 | 691/ 2800 batches | test loss 0.4832144 +| epoch 8 | 695/ 2800 batches | test loss 0.4827784 +| epoch 8 | 699/ 2800 batches | test loss 0.3945782 +| epoch 8 | final test loss 0.5325, do not save model! +-------------------------------------------------------------------------------- +| epoch 9 | 3/ 2800 batches | train loss 0.3199317 +| epoch 9 | 7/ 2800 batches | train loss 0.2967202 +| epoch 9 | 11/ 2800 batches | train loss 0.2620899 +| epoch 9 | 15/ 2800 batches | train loss 0.3107787 +| epoch 9 | 19/ 2800 batches | train loss 0.2328263 +| epoch 9 | 23/ 2800 batches | train loss 0.2931320 +| epoch 9 | 27/ 2800 batches | train loss 0.2882358 +| epoch 9 | 31/ 2800 batches | train loss 0.1970280 +| epoch 9 | 35/ 2800 batches | train loss 0.2839122 +| epoch 9 | 39/ 2800 batches | train loss 0.2521792 +| epoch 9 | 43/ 2800 batches | train loss 0.3419712 +| epoch 9 | 47/ 2800 batches | train loss 0.2711176 +| epoch 9 | 51/ 2800 batches | train loss 0.2292305 +| epoch 9 | 55/ 2800 batches | train loss 0.2667348 +| epoch 9 | 59/ 2800 batches | train loss 0.2775513 +| epoch 9 | 63/ 2800 batches | train loss 0.2797869 +| epoch 9 | 67/ 2800 batches | train loss 0.2800155 +| epoch 9 | 71/ 2800 batches | train loss 0.3302028 +| epoch 9 | 75/ 2800 batches | train loss 0.2777324 +| epoch 9 | 79/ 2800 batches | train loss 0.3488941 +| epoch 9 | 83/ 2800 batches | train loss 0.2839718 +| epoch 9 | 87/ 2800 batches | train loss 0.3235368 +| epoch 9 | 91/ 2800 batches | train loss 0.2577561 +| epoch 9 | 95/ 2800 batches | train loss 0.3368354 +| epoch 9 | 99/ 2800 batches | train loss 0.3233721 +| epoch 9 | 103/ 2800 batches | train loss 0.3174810 +| epoch 9 | 107/ 2800 batches | train loss 0.3679833 +| epoch 9 | 111/ 2800 batches | train loss 0.2331627 +| epoch 9 | 115/ 2800 batches | train loss 0.2971346 +| epoch 9 | 119/ 2800 batches | train loss 0.2826804 +| epoch 9 | 123/ 2800 batches | train loss 0.2765691 +| epoch 9 | 127/ 2800 batches | train loss 0.3593215 +| epoch 9 | 131/ 2800 batches | train loss 0.3253596 +| epoch 9 | 135/ 2800 batches | train loss 0.2868860 +| epoch 9 | 139/ 2800 batches | train loss 0.2317416 +| epoch 9 | 143/ 2800 batches | train loss 0.3284456 +| epoch 9 | 147/ 2800 batches | train loss 0.2983526 +| epoch 9 | 151/ 2800 batches | train loss 0.3132160 +| epoch 9 | 155/ 2800 batches | train loss 0.2323841 +| epoch 9 | 159/ 2800 batches | train loss 0.2341310 +| epoch 9 | 163/ 2800 batches | train loss 0.3338636 +| epoch 9 | 167/ 2800 batches | train loss 0.3022329 +| epoch 9 | 171/ 2800 batches | train loss 0.3003829 +| epoch 9 | 175/ 2800 batches | train loss 0.2971155 +| epoch 9 | 179/ 2800 batches | train loss 0.3082716 +| epoch 9 | 183/ 2800 batches | train loss 0.2978920 +| epoch 9 | 187/ 2800 batches | train loss 0.3243917 +| epoch 9 | 191/ 2800 batches | train loss 0.3661332 +| epoch 9 | 195/ 2800 batches | train loss 0.2895523 +| epoch 9 | 199/ 2800 batches | train loss 0.3690690 +| epoch 9 | 203/ 2800 batches | train loss 0.2670707 +| epoch 9 | 207/ 2800 batches | train loss 0.3194066 +| epoch 9 | 211/ 2800 batches | train loss 0.2938356 +| epoch 9 | 215/ 2800 batches | train loss 0.3265966 +| epoch 9 | 219/ 2800 batches | train loss 0.3070897 +| epoch 9 | 223/ 2800 batches | train loss 0.2834504 +| epoch 9 | 227/ 2800 batches | train loss 0.2624227 +| epoch 9 | 231/ 2800 batches | train loss 0.3138956 +| epoch 9 | 235/ 2800 batches | train loss 0.3235320 +| epoch 9 | 239/ 2800 batches | train loss 0.3141841 +| epoch 9 | 243/ 2800 batches | train loss 0.2462512 +| epoch 9 | 247/ 2800 batches | train loss 0.2908385 +| epoch 9 | 251/ 2800 batches | train loss 0.2515398 +| epoch 9 | 255/ 2800 batches | train loss 0.3144229 +| epoch 9 | 259/ 2800 batches | train loss 0.2557423 +| epoch 9 | 263/ 2800 batches | train loss 0.3177931 +| epoch 9 | 267/ 2800 batches | train loss 0.3324576 +| epoch 9 | 271/ 2800 batches | train loss 0.3343805 +| epoch 9 | 275/ 2800 batches | train loss 0.3197216 +| epoch 9 | 279/ 2800 batches | train loss 0.2682505 +| epoch 9 | 283/ 2800 batches | train loss 0.2448024 +| epoch 9 | 287/ 2800 batches | train loss 0.3159710 +| epoch 9 | 291/ 2800 batches | train loss 0.3440824 +| epoch 9 | 295/ 2800 batches | train loss 0.2602437 +| epoch 9 | 299/ 2800 batches | train loss 0.2600385 +| epoch 9 | 303/ 2800 batches | train loss 0.3148060 +| epoch 9 | 307/ 2800 batches | train loss 0.2499447 +| epoch 9 | 311/ 2800 batches | train loss 0.3175115 +| epoch 9 | 315/ 2800 batches | train loss 0.2575635 +| epoch 9 | 319/ 2800 batches | train loss 0.2808646 +| epoch 9 | 323/ 2800 batches | train loss 0.2703043 +| epoch 9 | 327/ 2800 batches | train loss 0.3292227 +| epoch 9 | 331/ 2800 batches | train loss 0.3233533 +| epoch 9 | 335/ 2800 batches | train loss 0.2717940 +| epoch 9 | 339/ 2800 batches | train loss 0.3588252 +| epoch 9 | 343/ 2800 batches | train loss 0.2569357 +| epoch 9 | 347/ 2800 batches | train loss 0.3661240 +| epoch 9 | 351/ 2800 batches | train loss 0.3058068 +| epoch 9 | 355/ 2800 batches | train loss 0.3104767 +| epoch 9 | 359/ 2800 batches | train loss 0.3148579 +| epoch 9 | 363/ 2800 batches | train loss 0.2475540 +| epoch 9 | 367/ 2800 batches | train loss 0.3020988 +| epoch 9 | 371/ 2800 batches | train loss 0.3155500 +| epoch 9 | 375/ 2800 batches | train loss 0.2922015 +| epoch 9 | 379/ 2800 batches | train loss 0.2812653 +| epoch 9 | 383/ 2800 batches | train loss 0.3044710 +| epoch 9 | 387/ 2800 batches | train loss 0.2351849 +| epoch 9 | 391/ 2800 batches | train loss 0.1985355 +| epoch 9 | 395/ 2800 batches | train loss 0.2588319 +| epoch 9 | 399/ 2800 batches | train loss 0.2804927 +| epoch 9 | 403/ 2800 batches | train loss 0.2658542 +| epoch 9 | 407/ 2800 batches | train loss 0.2376473 +| epoch 9 | 411/ 2800 batches | train loss 0.3077676 +| epoch 9 | 415/ 2800 batches | train loss 0.3057281 +| epoch 9 | 419/ 2800 batches | train loss 0.3521140 +| epoch 9 | 423/ 2800 batches | train loss 0.2951000 +| epoch 9 | 427/ 2800 batches | train loss 0.3244700 +| epoch 9 | 431/ 2800 batches | train loss 0.2069047 +| epoch 9 | 435/ 2800 batches | train loss 0.2824786 +| epoch 9 | 439/ 2800 batches | train loss 0.1977105 +| epoch 9 | 443/ 2800 batches | train loss 0.3020554 +| epoch 9 | 447/ 2800 batches | train loss 0.2963503 +| epoch 9 | 451/ 2800 batches | train loss 0.2451769 +| epoch 9 | 455/ 2800 batches | train loss 0.2700512 +| epoch 9 | 459/ 2800 batches | train loss 0.2835744 +| epoch 9 | 463/ 2800 batches | train loss 0.3686357 +| epoch 9 | 467/ 2800 batches | train loss 0.2818689 +| epoch 9 | 471/ 2800 batches | train loss 0.3557377 +| epoch 9 | 475/ 2800 batches | train loss 0.2896528 +| epoch 9 | 479/ 2800 batches | train loss 0.3416961 +| epoch 9 | 483/ 2800 batches | train loss 0.2984993 +| epoch 9 | 487/ 2800 batches | train loss 0.2568560 +| epoch 9 | 491/ 2800 batches | train loss 0.3971637 +| epoch 9 | 495/ 2800 batches | train loss 0.3299125 +| epoch 9 | 499/ 2800 batches | train loss 0.3226148 +| epoch 9 | 503/ 2800 batches | train loss 0.3700368 +| epoch 9 | 507/ 2800 batches | train loss 0.2627778 +| epoch 9 | 511/ 2800 batches | train loss 0.3660963 +| epoch 9 | 515/ 2800 batches | train loss 0.2938729 +| epoch 9 | 519/ 2800 batches | train loss 0.2828253 +| epoch 9 | 523/ 2800 batches | train loss 0.2928731 +| epoch 9 | 527/ 2800 batches | train loss 0.2805951 +| epoch 9 | 531/ 2800 batches | train loss 0.2588638 +| epoch 9 | 535/ 2800 batches | train loss 0.3371612 +| epoch 9 | 539/ 2800 batches | train loss 0.3471282 +| epoch 9 | 543/ 2800 batches | train loss 0.2724701 +| epoch 9 | 547/ 2800 batches | train loss 0.2807786 +| epoch 9 | 551/ 2800 batches | train loss 0.1939987 +| epoch 9 | 555/ 2800 batches | train loss 0.3332474 +| epoch 9 | 559/ 2800 batches | train loss 0.3176697 +| epoch 9 | 563/ 2800 batches | train loss 0.3126923 +| epoch 9 | 567/ 2800 batches | train loss 0.2953717 +| epoch 9 | 571/ 2800 batches | train loss 0.3015815 +| epoch 9 | 575/ 2800 batches | train loss 0.3718358 +| epoch 9 | 579/ 2800 batches | train loss 0.3557075 +| epoch 9 | 583/ 2800 batches | train loss 0.2757699 +| epoch 9 | 587/ 2800 batches | train loss 0.3134620 +| epoch 9 | 591/ 2800 batches | train loss 0.2734619 +| epoch 9 | 595/ 2800 batches | train loss 0.2631932 +| epoch 9 | 599/ 2800 batches | train loss 0.2725621 +| epoch 9 | 603/ 2800 batches | train loss 0.3088495 +| epoch 9 | 607/ 2800 batches | train loss 0.3499971 +| epoch 9 | 611/ 2800 batches | train loss 0.2746783 +| epoch 9 | 615/ 2800 batches | train loss 0.2856020 +| epoch 9 | 619/ 2800 batches | train loss 0.2976097 +| epoch 9 | 623/ 2800 batches | train loss 0.3000979 +| epoch 9 | 627/ 2800 batches | train loss 0.3115226 +| epoch 9 | 631/ 2800 batches | train loss 0.3008426 +| epoch 9 | 635/ 2800 batches | train loss 0.3269486 +| epoch 9 | 639/ 2800 batches | train loss 0.3194674 +| epoch 9 | 643/ 2800 batches | train loss 0.2740003 +| epoch 9 | 647/ 2800 batches | train loss 0.3415127 +| epoch 9 | 651/ 2800 batches | train loss 0.4289087 +| epoch 9 | 655/ 2800 batches | train loss 0.3270388 +| epoch 9 | 659/ 2800 batches | train loss 0.3324998 +| epoch 9 | 663/ 2800 batches | train loss 0.2751091 +| epoch 9 | 667/ 2800 batches | train loss 0.2561081 +| epoch 9 | 671/ 2800 batches | train loss 0.2694494 +| epoch 9 | 675/ 2800 batches | train loss 0.3849741 +| epoch 9 | 679/ 2800 batches | train loss 0.3031057 +| epoch 9 | 683/ 2800 batches | train loss 0.2983746 +| epoch 9 | 687/ 2800 batches | train loss 0.3454533 +| epoch 9 | 691/ 2800 batches | train loss 0.4196919 +| epoch 9 | 695/ 2800 batches | train loss 0.2814274 +| epoch 9 | 699/ 2800 batches | train loss 0.3782684 +| epoch 9 | 703/ 2800 batches | train loss 0.3280249 +| epoch 9 | 707/ 2800 batches | train loss 0.2876328 +| epoch 9 | 711/ 2800 batches | train loss 0.2465757 +| epoch 9 | 715/ 2800 batches | train loss 0.3861841 +| epoch 9 | 719/ 2800 batches | train loss 0.2452381 +| epoch 9 | 723/ 2800 batches | train loss 0.2778228 +| epoch 9 | 727/ 2800 batches | train loss 0.3286633 +| epoch 9 | 731/ 2800 batches | train loss 0.2682037 +| epoch 9 | 735/ 2800 batches | train loss 0.3286442 +| epoch 9 | 739/ 2800 batches | train loss 0.3390009 +| epoch 9 | 743/ 2800 batches | train loss 0.2593614 +| epoch 9 | 747/ 2800 batches | train loss 0.3039067 +| epoch 9 | 751/ 2800 batches | train loss 0.3242009 +| epoch 9 | 755/ 2800 batches | train loss 0.3399414 +| epoch 9 | 759/ 2800 batches | train loss 0.2980427 +| epoch 9 | 763/ 2800 batches | train loss 0.3144129 +| epoch 9 | 767/ 2800 batches | train loss 0.2957837 +| epoch 9 | 771/ 2800 batches | train loss 0.3337373 +| epoch 9 | 775/ 2800 batches | train loss 0.3404347 +| epoch 9 | 779/ 2800 batches | train loss 0.3054453 +| epoch 9 | 783/ 2800 batches | train loss 0.3072273 +| epoch 9 | 787/ 2800 batches | train loss 0.2848733 +| epoch 9 | 791/ 2800 batches | train loss 0.2988285 +| epoch 9 | 795/ 2800 batches | train loss 0.3200070 +| epoch 9 | 799/ 2800 batches | train loss 0.3065127 +| epoch 9 | 803/ 2800 batches | train loss 0.2972471 +| epoch 9 | 807/ 2800 batches | train loss 0.3202206 +| epoch 9 | 811/ 2800 batches | train loss 0.3580720 +| epoch 9 | 815/ 2800 batches | train loss 0.3358255 +| epoch 9 | 819/ 2800 batches | train loss 0.3204234 +| epoch 9 | 823/ 2800 batches | train loss 0.2600026 +| epoch 9 | 827/ 2800 batches | train loss 0.3368640 +| epoch 9 | 831/ 2800 batches | train loss 0.3041692 +| epoch 9 | 835/ 2800 batches | train loss 0.2787350 +| epoch 9 | 839/ 2800 batches | train loss 0.3270900 +| epoch 9 | 843/ 2800 batches | train loss 0.4209865 +| epoch 9 | 847/ 2800 batches | train loss 0.3139939 +| epoch 9 | 851/ 2800 batches | train loss 0.2829917 +| epoch 9 | 855/ 2800 batches | train loss 0.3003676 +| epoch 9 | 859/ 2800 batches | train loss 0.2784680 +| epoch 9 | 863/ 2800 batches | train loss 0.2879916 +| epoch 9 | 867/ 2800 batches | train loss 0.2996196 +| epoch 9 | 871/ 2800 batches | train loss 0.3582772 +| epoch 9 | 875/ 2800 batches | train loss 0.3030475 +| epoch 9 | 879/ 2800 batches | train loss 0.3342837 +| epoch 9 | 883/ 2800 batches | train loss 0.3180220 +| epoch 9 | 887/ 2800 batches | train loss 0.3216027 +| epoch 9 | 891/ 2800 batches | train loss 0.2846782 +| epoch 9 | 895/ 2800 batches | train loss 0.3093298 +| epoch 9 | 899/ 2800 batches | train loss 0.2908967 +| epoch 9 | 903/ 2800 batches | train loss 0.3179754 +| epoch 9 | 907/ 2800 batches | train loss 0.3362014 +| epoch 9 | 911/ 2800 batches | train loss 0.2747164 +| epoch 9 | 915/ 2800 batches | train loss 0.3574662 +| epoch 9 | 919/ 2800 batches | train loss 0.2727722 +| epoch 9 | 923/ 2800 batches | train loss 0.2804221 +| epoch 9 | 927/ 2800 batches | train loss 0.2204116 +| epoch 9 | 931/ 2800 batches | train loss 0.3526553 +| epoch 9 | 935/ 2800 batches | train loss 0.2878632 +| epoch 9 | 939/ 2800 batches | train loss 0.2926033 +| epoch 9 | 943/ 2800 batches | train loss 0.2536253 +| epoch 9 | 947/ 2800 batches | train loss 0.2924424 +| epoch 9 | 951/ 2800 batches | train loss 0.3106425 +| epoch 9 | 955/ 2800 batches | train loss 0.3019990 +| epoch 9 | 959/ 2800 batches | train loss 0.2735271 +| epoch 9 | 963/ 2800 batches | train loss 0.2407841 +| epoch 9 | 967/ 2800 batches | train loss 0.2799483 +| epoch 9 | 971/ 2800 batches | train loss 0.3071294 +| epoch 9 | 975/ 2800 batches | train loss 0.2419916 +| epoch 9 | 979/ 2800 batches | train loss 0.3241380 +| epoch 9 | 983/ 2800 batches | train loss 0.3041127 +| epoch 9 | 987/ 2800 batches | train loss 0.3258899 +| epoch 9 | 991/ 2800 batches | train loss 0.3064294 +| epoch 9 | 995/ 2800 batches | train loss 0.3299328 +| epoch 9 | 999/ 2800 batches | train loss 0.3086756 +| epoch 9 | 1003/ 2800 batches | train loss 0.3636376 +| epoch 9 | 1007/ 2800 batches | train loss 0.3628845 +| epoch 9 | 1011/ 2800 batches | train loss 0.2820116 +| epoch 9 | 1015/ 2800 batches | train loss 0.2871560 +| epoch 9 | 1019/ 2800 batches | train loss 0.3414461 +| epoch 9 | 1023/ 2800 batches | train loss 0.3052519 +| epoch 9 | 1027/ 2800 batches | train loss 0.3115493 +| epoch 9 | 1031/ 2800 batches | train loss 0.3463041 +| epoch 9 | 1035/ 2800 batches | train loss 0.2852737 +| epoch 9 | 1039/ 2800 batches | train loss 0.3358895 +| epoch 9 | 1043/ 2800 batches | train loss 0.3631253 +| epoch 9 | 1047/ 2800 batches | train loss 0.2974944 +| epoch 9 | 1051/ 2800 batches | train loss 0.3400381 +| epoch 9 | 1055/ 2800 batches | train loss 0.2735445 +| epoch 9 | 1059/ 2800 batches | train loss 0.3771741 +| epoch 9 | 1063/ 2800 batches | train loss 0.2760479 +| epoch 9 | 1067/ 2800 batches | train loss 0.2528068 +| epoch 9 | 1071/ 2800 batches | train loss 0.3264199 +| epoch 9 | 1075/ 2800 batches | train loss 0.3547118 +| epoch 9 | 1079/ 2800 batches | train loss 0.3364591 +| epoch 9 | 1083/ 2800 batches | train loss 0.3318842 +| epoch 9 | 1087/ 2800 batches | train loss 0.3448608 +| epoch 9 | 1091/ 2800 batches | train loss 0.3196920 +| epoch 9 | 1095/ 2800 batches | train loss 0.3016995 +| epoch 9 | 1099/ 2800 batches | train loss 0.3500328 +| epoch 9 | 1103/ 2800 batches | train loss 0.2900206 +| epoch 9 | 1107/ 2800 batches | train loss 0.2995962 +| epoch 9 | 1111/ 2800 batches | train loss 0.2928768 +| epoch 9 | 1115/ 2800 batches | train loss 0.3295366 +| epoch 9 | 1119/ 2800 batches | train loss 0.3126410 +| epoch 9 | 1123/ 2800 batches | train loss 0.2611665 +| epoch 9 | 1127/ 2800 batches | train loss 0.2519665 +| epoch 9 | 1131/ 2800 batches | train loss 0.2911317 +| epoch 9 | 1135/ 2800 batches | train loss 0.2426419 +| epoch 9 | 1139/ 2800 batches | train loss 0.2954155 +| epoch 9 | 1143/ 2800 batches | train loss 0.1955677 +| epoch 9 | 1147/ 2800 batches | train loss 0.2749683 +| epoch 9 | 1151/ 2800 batches | train loss 0.3469580 +| epoch 9 | 1155/ 2800 batches | train loss 0.3527752 +| epoch 9 | 1159/ 2800 batches | train loss 0.3541676 +| epoch 9 | 1163/ 2800 batches | train loss 0.2718122 +| epoch 9 | 1167/ 2800 batches | train loss 0.3003638 +| epoch 9 | 1171/ 2800 batches | train loss 0.3112299 +| epoch 9 | 1175/ 2800 batches | train loss 0.3409740 +| epoch 9 | 1179/ 2800 batches | train loss 0.3410186 +| epoch 9 | 1183/ 2800 batches | train loss 0.2863399 +| epoch 9 | 1187/ 2800 batches | train loss 0.2706564 +| epoch 9 | 1191/ 2800 batches | train loss 0.2942007 +| epoch 9 | 1195/ 2800 batches | train loss 0.2857867 +| epoch 9 | 1199/ 2800 batches | train loss 0.2689963 +| epoch 9 | 1203/ 2800 batches | train loss 0.4594211 +| epoch 9 | 1207/ 2800 batches | train loss 0.2947035 +| epoch 9 | 1211/ 2800 batches | train loss 0.2801958 +| epoch 9 | 1215/ 2800 batches | train loss 0.3006366 +| epoch 9 | 1219/ 2800 batches | train loss 0.3203779 +| epoch 9 | 1223/ 2800 batches | train loss 0.3321413 +| epoch 9 | 1227/ 2800 batches | train loss 0.3366830 +| epoch 9 | 1231/ 2800 batches | train loss 0.3345884 +| epoch 9 | 1235/ 2800 batches | train loss 0.3047365 +| epoch 9 | 1239/ 2800 batches | train loss 0.4130462 +| epoch 9 | 1243/ 2800 batches | train loss 0.2913731 +| epoch 9 | 1247/ 2800 batches | train loss 0.2718608 +| epoch 9 | 1251/ 2800 batches | train loss 0.3127147 +| epoch 9 | 1255/ 2800 batches | train loss 0.3144796 +| epoch 9 | 1259/ 2800 batches | train loss 0.2633901 +| epoch 9 | 1263/ 2800 batches | train loss 0.3071342 +| epoch 9 | 1267/ 2800 batches | train loss 0.2738826 +| epoch 9 | 1271/ 2800 batches | train loss 0.2925895 +| epoch 9 | 1275/ 2800 batches | train loss 0.2614369 +| epoch 9 | 1279/ 2800 batches | train loss 0.4049575 +| epoch 9 | 1283/ 2800 batches | train loss 0.3056483 +| epoch 9 | 1287/ 2800 batches | train loss 0.3049931 +| epoch 9 | 1291/ 2800 batches | train loss 0.2712156 +| epoch 9 | 1295/ 2800 batches | train loss 0.2658080 +| epoch 9 | 1299/ 2800 batches | train loss 0.3163208 +| epoch 9 | 1303/ 2800 batches | train loss 0.2969397 +| epoch 9 | 1307/ 2800 batches | train loss 0.1991754 +| epoch 9 | 1311/ 2800 batches | train loss 0.3008451 +| epoch 9 | 1315/ 2800 batches | train loss 0.3072354 +| epoch 9 | 1319/ 2800 batches | train loss 0.3432564 +| epoch 9 | 1323/ 2800 batches | train loss 0.2110786 +| epoch 9 | 1327/ 2800 batches | train loss 0.3339471 +| epoch 9 | 1331/ 2800 batches | train loss 0.2752802 +| epoch 9 | 1335/ 2800 batches | train loss 0.3469564 +| epoch 9 | 1339/ 2800 batches | train loss 0.3178977 +| epoch 9 | 1343/ 2800 batches | train loss 0.4160205 +| epoch 9 | 1347/ 2800 batches | train loss 0.3136267 +| epoch 9 | 1351/ 2800 batches | train loss 0.3487433 +| epoch 9 | 1355/ 2800 batches | train loss 0.3249622 +| epoch 9 | 1359/ 2800 batches | train loss 0.3269995 +| epoch 9 | 1363/ 2800 batches | train loss 0.2565268 +| epoch 9 | 1367/ 2800 batches | train loss 0.3753574 +| epoch 9 | 1371/ 2800 batches | train loss 0.3137638 +| epoch 9 | 1375/ 2800 batches | train loss 0.2896572 +| epoch 9 | 1379/ 2800 batches | train loss 0.2716550 +| epoch 9 | 1383/ 2800 batches | train loss 0.2733362 +| epoch 9 | 1387/ 2800 batches | train loss 0.3417920 +| epoch 9 | 1391/ 2800 batches | train loss 0.4088832 +| epoch 9 | 1395/ 2800 batches | train loss 0.3028199 +| epoch 9 | 1399/ 2800 batches | train loss 0.2824892 +| epoch 9 | 1403/ 2800 batches | train loss 0.2910127 +| epoch 9 | 1407/ 2800 batches | train loss 0.3474752 +| epoch 9 | 1411/ 2800 batches | train loss 0.3087389 +| epoch 9 | 1415/ 2800 batches | train loss 0.2643429 +| epoch 9 | 1419/ 2800 batches | train loss 0.3448496 +| epoch 9 | 1423/ 2800 batches | train loss 0.2446980 +| epoch 9 | 1427/ 2800 batches | train loss 0.3453193 +| epoch 9 | 1431/ 2800 batches | train loss 0.3591141 +| epoch 9 | 1435/ 2800 batches | train loss 0.3503243 +| epoch 9 | 1439/ 2800 batches | train loss 0.3466596 +| epoch 9 | 1443/ 2800 batches | train loss 0.3488600 +| epoch 9 | 1447/ 2800 batches | train loss 0.2860169 +| epoch 9 | 1451/ 2800 batches | train loss 0.3607357 +| epoch 9 | 1455/ 2800 batches | train loss 0.2606319 +| epoch 9 | 1459/ 2800 batches | train loss 0.2932673 +| epoch 9 | 1463/ 2800 batches | train loss 0.3016435 +| epoch 9 | 1467/ 2800 batches | train loss 0.2862773 +| epoch 9 | 1471/ 2800 batches | train loss 0.3437455 +| epoch 9 | 1475/ 2800 batches | train loss 0.3438884 +| epoch 9 | 1479/ 2800 batches | train loss 0.2881804 +| epoch 9 | 1483/ 2800 batches | train loss 0.2173124 +| epoch 9 | 1487/ 2800 batches | train loss 0.3679586 +| epoch 9 | 1491/ 2800 batches | train loss 0.2740926 +| epoch 9 | 1495/ 2800 batches | train loss 0.3518593 +| epoch 9 | 1499/ 2800 batches | train loss 0.3075981 +| epoch 9 | 1503/ 2800 batches | train loss 0.2296843 +| epoch 9 | 1507/ 2800 batches | train loss 0.2989332 +| epoch 9 | 1511/ 2800 batches | train loss 0.3001178 +| epoch 9 | 1515/ 2800 batches | train loss 0.3322067 +| epoch 9 | 1519/ 2800 batches | train loss 0.3130742 +| epoch 9 | 1523/ 2800 batches | train loss 0.3043777 +| epoch 9 | 1527/ 2800 batches | train loss 0.2836616 +| epoch 9 | 1531/ 2800 batches | train loss 0.2815749 +| epoch 9 | 1535/ 2800 batches | train loss 0.3376121 +| epoch 9 | 1539/ 2800 batches | train loss 0.2877075 +| epoch 9 | 1543/ 2800 batches | train loss 0.3663209 +| epoch 9 | 1547/ 2800 batches | train loss 0.2784203 +| epoch 9 | 1551/ 2800 batches | train loss 0.2835551 +| epoch 9 | 1555/ 2800 batches | train loss 0.3347591 +| epoch 9 | 1559/ 2800 batches | train loss 0.3197483 +| epoch 9 | 1563/ 2800 batches | train loss 0.2952705 +| epoch 9 | 1567/ 2800 batches | train loss 0.2727427 +| epoch 9 | 1571/ 2800 batches | train loss 0.2969864 +| epoch 9 | 1575/ 2800 batches | train loss 0.3210097 +| epoch 9 | 1579/ 2800 batches | train loss 0.2508864 +| epoch 9 | 1583/ 2800 batches | train loss 0.2904592 +| epoch 9 | 1587/ 2800 batches | train loss 0.3001777 +| epoch 9 | 1591/ 2800 batches | train loss 0.3137857 +| epoch 9 | 1595/ 2800 batches | train loss 0.3386564 +| epoch 9 | 1599/ 2800 batches | train loss 0.3071191 +| epoch 9 | 1603/ 2800 batches | train loss 0.2881624 +| epoch 9 | 1607/ 2800 batches | train loss 0.3373202 +| epoch 9 | 1611/ 2800 batches | train loss 0.3477653 +| epoch 9 | 1615/ 2800 batches | train loss 0.2720995 +| epoch 9 | 1619/ 2800 batches | train loss 0.3291525 +| epoch 9 | 1623/ 2800 batches | train loss 0.2609586 +| epoch 9 | 1627/ 2800 batches | train loss 0.3314387 +| epoch 9 | 1631/ 2800 batches | train loss 0.2505973 +| epoch 9 | 1635/ 2800 batches | train loss 0.3492092 +| epoch 9 | 1639/ 2800 batches | train loss 0.3146334 +| epoch 9 | 1643/ 2800 batches | train loss 0.3241233 +| epoch 9 | 1647/ 2800 batches | train loss 0.2949015 +| epoch 9 | 1651/ 2800 batches | train loss 0.2917408 +| epoch 9 | 1655/ 2800 batches | train loss 0.3109764 +| epoch 9 | 1659/ 2800 batches | train loss 0.2939419 +| epoch 9 | 1663/ 2800 batches | train loss 0.2950831 +| epoch 9 | 1667/ 2800 batches | train loss 0.3797483 +| epoch 9 | 1671/ 2800 batches | train loss 0.2549676 +| epoch 9 | 1675/ 2800 batches | train loss 0.3584279 +| epoch 9 | 1679/ 2800 batches | train loss 0.2958565 +| epoch 9 | 1683/ 2800 batches | train loss 0.2779596 +| epoch 9 | 1687/ 2800 batches | train loss 0.3060691 +| epoch 9 | 1691/ 2800 batches | train loss 0.2903319 +| epoch 9 | 1695/ 2800 batches | train loss 0.3241404 +| epoch 9 | 1699/ 2800 batches | train loss 0.2798826 +| epoch 9 | 1703/ 2800 batches | train loss 0.2847404 +| epoch 9 | 1707/ 2800 batches | train loss 0.3094186 +| epoch 9 | 1711/ 2800 batches | train loss 0.2873829 +| epoch 9 | 1715/ 2800 batches | train loss 0.2020942 +| epoch 9 | 1719/ 2800 batches | train loss 0.3615134 +| epoch 9 | 1723/ 2800 batches | train loss 0.3057060 +| epoch 9 | 1727/ 2800 batches | train loss 0.3411888 +| epoch 9 | 1731/ 2800 batches | train loss 0.3434323 +| epoch 9 | 1735/ 2800 batches | train loss 0.3123560 +| epoch 9 | 1739/ 2800 batches | train loss 0.2668888 +| epoch 9 | 1743/ 2800 batches | train loss 0.2816147 +| epoch 9 | 1747/ 2800 batches | train loss 0.3530121 +| epoch 9 | 1751/ 2800 batches | train loss 0.2858671 +| epoch 9 | 1755/ 2800 batches | train loss 0.3366700 +| epoch 9 | 1759/ 2800 batches | train loss 0.4000580 +| epoch 9 | 1763/ 2800 batches | train loss 0.2829690 +| epoch 9 | 1767/ 2800 batches | train loss 0.3059143 +| epoch 9 | 1771/ 2800 batches | train loss 0.3097599 +| epoch 9 | 1775/ 2800 batches | train loss 0.3310729 +| epoch 9 | 1779/ 2800 batches | train loss 0.3231222 +| epoch 9 | 1783/ 2800 batches | train loss 0.3426662 +| epoch 9 | 1787/ 2800 batches | train loss 0.2774565 +| epoch 9 | 1791/ 2800 batches | train loss 0.2732913 +| epoch 9 | 1795/ 2800 batches | train loss 0.3199065 +| epoch 9 | 1799/ 2800 batches | train loss 0.2877939 +| epoch 9 | 1803/ 2800 batches | train loss 0.2670189 +| epoch 9 | 1807/ 2800 batches | train loss 0.3209354 +| epoch 9 | 1811/ 2800 batches | train loss 0.3003999 +| epoch 9 | 1815/ 2800 batches | train loss 0.3694388 +| epoch 9 | 1819/ 2800 batches | train loss 0.2592854 +| epoch 9 | 1823/ 2800 batches | train loss 0.2874211 +| epoch 9 | 1827/ 2800 batches | train loss 0.3411867 +| epoch 9 | 1831/ 2800 batches | train loss 0.2690163 +| epoch 9 | 1835/ 2800 batches | train loss 0.2980651 +| epoch 9 | 1839/ 2800 batches | train loss 0.3462716 +| epoch 9 | 1843/ 2800 batches | train loss 0.3459007 +| epoch 9 | 1847/ 2800 batches | train loss 0.2861266 +| epoch 9 | 1851/ 2800 batches | train loss 0.3229440 +| epoch 9 | 1855/ 2800 batches | train loss 0.2579717 +| epoch 9 | 1859/ 2800 batches | train loss 0.3396556 +| epoch 9 | 1863/ 2800 batches | train loss 0.3022180 +| epoch 9 | 1867/ 2800 batches | train loss 0.2809058 +| epoch 9 | 1871/ 2800 batches | train loss 0.3443508 +| epoch 9 | 1875/ 2800 batches | train loss 0.3343403 +| epoch 9 | 1879/ 2800 batches | train loss 0.3244756 +| epoch 9 | 1883/ 2800 batches | train loss 0.3080969 +| epoch 9 | 1887/ 2800 batches | train loss 0.3150594 +| epoch 9 | 1891/ 2800 batches | train loss 0.2941259 +| epoch 9 | 1895/ 2800 batches | train loss 0.2456684 +| epoch 9 | 1899/ 2800 batches | train loss 0.3026578 +| epoch 9 | 1903/ 2800 batches | train loss 0.3364616 +| epoch 9 | 1907/ 2800 batches | train loss 0.3192973 +| epoch 9 | 1911/ 2800 batches | train loss 0.3268858 +| epoch 9 | 1915/ 2800 batches | train loss 0.3126660 +| epoch 9 | 1919/ 2800 batches | train loss 0.3528344 +| epoch 9 | 1923/ 2800 batches | train loss 0.2895041 +| epoch 9 | 1927/ 2800 batches | train loss 0.3351020 +| epoch 9 | 1931/ 2800 batches | train loss 0.3171432 +| epoch 9 | 1935/ 2800 batches | train loss 0.3010162 +| epoch 9 | 1939/ 2800 batches | train loss 0.3694848 +| epoch 9 | 1943/ 2800 batches | train loss 0.1287627 +| epoch 9 | 1947/ 2800 batches | train loss 0.3059108 +| epoch 9 | 1951/ 2800 batches | train loss 0.3111936 +| epoch 9 | 1955/ 2800 batches | train loss 0.2777776 +| epoch 9 | 1959/ 2800 batches | train loss 0.3376158 +| epoch 9 | 1963/ 2800 batches | train loss 0.3112474 +| epoch 9 | 1967/ 2800 batches | train loss 0.3225057 +| epoch 9 | 1971/ 2800 batches | train loss 0.3472070 +| epoch 9 | 1975/ 2800 batches | train loss 0.2671060 +| epoch 9 | 1979/ 2800 batches | train loss 0.2383567 +| epoch 9 | 1983/ 2800 batches | train loss 0.3876588 +| epoch 9 | 1987/ 2800 batches | train loss 0.2748165 +| epoch 9 | 1991/ 2800 batches | train loss 0.3505584 +| epoch 9 | 1995/ 2800 batches | train loss 0.3202879 +| epoch 9 | 1999/ 2800 batches | train loss 0.3267037 +| epoch 9 | 2003/ 2800 batches | train loss 0.2905585 +| epoch 9 | 2007/ 2800 batches | train loss 0.2982946 +| epoch 9 | 2011/ 2800 batches | train loss 0.3258532 +| epoch 9 | 2015/ 2800 batches | train loss 0.2872379 +| epoch 9 | 2019/ 2800 batches | train loss 0.2600940 +| epoch 9 | 2023/ 2800 batches | train loss 0.3113332 +| epoch 9 | 2027/ 2800 batches | train loss 0.3003037 +| epoch 9 | 2031/ 2800 batches | train loss 0.3174519 +| epoch 9 | 2035/ 2800 batches | train loss 0.2817625 +| epoch 9 | 2039/ 2800 batches | train loss 0.3123966 +| epoch 9 | 2043/ 2800 batches | train loss 0.2769353 +| epoch 9 | 2047/ 2800 batches | train loss 0.3678387 +| epoch 9 | 2051/ 2800 batches | train loss 0.2783136 +| epoch 9 | 2055/ 2800 batches | train loss 0.3167485 +| epoch 9 | 2059/ 2800 batches | train loss 0.3207564 +| epoch 9 | 2063/ 2800 batches | train loss 0.4197620 +| epoch 9 | 2067/ 2800 batches | train loss 0.3371113 +| epoch 9 | 2071/ 2800 batches | train loss 0.3369134 +| epoch 9 | 2075/ 2800 batches | train loss 0.3947011 +| epoch 9 | 2079/ 2800 batches | train loss 0.2893895 +| epoch 9 | 2083/ 2800 batches | train loss 0.2648704 +| epoch 9 | 2087/ 2800 batches | train loss 0.2824361 +| epoch 9 | 2091/ 2800 batches | train loss 0.3074428 +| epoch 9 | 2095/ 2800 batches | train loss 0.2859939 +| epoch 9 | 2099/ 2800 batches | train loss 0.3036100 +| epoch 9 | 2103/ 2800 batches | train loss 0.2777277 +| epoch 9 | 2107/ 2800 batches | train loss 0.3188946 +| epoch 9 | 2111/ 2800 batches | train loss 0.3174734 +| epoch 9 | 2115/ 2800 batches | train loss 0.2710096 +| epoch 9 | 2119/ 2800 batches | train loss 0.2668312 +| epoch 9 | 2123/ 2800 batches | train loss 0.2807764 +| epoch 9 | 2127/ 2800 batches | train loss 0.3118299 +| epoch 9 | 2131/ 2800 batches | train loss 0.3446501 +| epoch 9 | 2135/ 2800 batches | train loss 0.3067604 +| epoch 9 | 2139/ 2800 batches | train loss 0.3203161 +| epoch 9 | 2143/ 2800 batches | train loss 0.3354369 +| epoch 9 | 2147/ 2800 batches | train loss 0.3052045 +| epoch 9 | 2151/ 2800 batches | train loss 0.2385961 +| epoch 9 | 2155/ 2800 batches | train loss 0.2684505 +| epoch 9 | 2159/ 2800 batches | train loss 0.2636278 +| epoch 9 | 2163/ 2800 batches | train loss 0.3483310 +| epoch 9 | 2167/ 2800 batches | train loss 0.2899859 +| epoch 9 | 2171/ 2800 batches | train loss 0.2894769 +| epoch 9 | 2175/ 2800 batches | train loss 0.3441691 +| epoch 9 | 2179/ 2800 batches | train loss 0.3039972 +| epoch 9 | 2183/ 2800 batches | train loss 0.3128651 +| epoch 9 | 2187/ 2800 batches | train loss 0.3136132 +| epoch 9 | 2191/ 2800 batches | train loss 0.3140364 +| epoch 9 | 2195/ 2800 batches | train loss 0.3119119 +| epoch 9 | 2199/ 2800 batches | train loss 0.2607416 +| epoch 9 | 2203/ 2800 batches | train loss 0.3918587 +| epoch 9 | 2207/ 2800 batches | train loss 0.2913972 +| epoch 9 | 2211/ 2800 batches | train loss 0.2718750 +| epoch 9 | 2215/ 2800 batches | train loss 0.2897996 +| epoch 9 | 2219/ 2800 batches | train loss 0.3486081 +| epoch 9 | 2223/ 2800 batches | train loss 0.3412774 +| epoch 9 | 2227/ 2800 batches | train loss 0.3721987 +| epoch 9 | 2231/ 2800 batches | train loss 0.3221334 +| epoch 9 | 2235/ 2800 batches | train loss 0.2508351 +| epoch 9 | 2239/ 2800 batches | train loss 0.3764285 +| epoch 9 | 2243/ 2800 batches | train loss 0.3604842 +| epoch 9 | 2247/ 2800 batches | train loss 0.3190878 +| epoch 9 | 2251/ 2800 batches | train loss 0.2914820 +| epoch 9 | 2255/ 2800 batches | train loss 0.3323649 +| epoch 9 | 2259/ 2800 batches | train loss 0.3224854 +| epoch 9 | 2263/ 2800 batches | train loss 0.2823522 +| epoch 9 | 2267/ 2800 batches | train loss 0.2986955 +| epoch 9 | 2271/ 2800 batches | train loss 0.2584264 +| epoch 9 | 2275/ 2800 batches | train loss 0.3108866 +| epoch 9 | 2279/ 2800 batches | train loss 0.2797994 +| epoch 9 | 2283/ 2800 batches | train loss 0.3528763 +| epoch 9 | 2287/ 2800 batches | train loss 0.3764148 +| epoch 9 | 2291/ 2800 batches | train loss 0.3213073 +| epoch 9 | 2295/ 2800 batches | train loss 0.3240993 +| epoch 9 | 2299/ 2800 batches | train loss 0.3367499 +| epoch 9 | 2303/ 2800 batches | train loss 0.3036671 +| epoch 9 | 2307/ 2800 batches | train loss 0.3434692 +| epoch 9 | 2311/ 2800 batches | train loss 0.2567936 +| epoch 9 | 2315/ 2800 batches | train loss 0.3387162 +| epoch 9 | 2319/ 2800 batches | train loss 0.3329280 +| epoch 9 | 2323/ 2800 batches | train loss 0.2877560 +| epoch 9 | 2327/ 2800 batches | train loss 0.3136849 +| epoch 9 | 2331/ 2800 batches | train loss 0.3359499 +| epoch 9 | 2335/ 2800 batches | train loss 0.2852676 +| epoch 9 | 2339/ 2800 batches | train loss 0.3115005 +| epoch 9 | 2343/ 2800 batches | train loss 0.2802305 +| epoch 9 | 2347/ 2800 batches | train loss 0.2740486 +| epoch 9 | 2351/ 2800 batches | train loss 0.3718278 +| epoch 9 | 2355/ 2800 batches | train loss 0.3135680 +| epoch 9 | 2359/ 2800 batches | train loss 0.2824967 +| epoch 9 | 2363/ 2800 batches | train loss 0.2877912 +| epoch 9 | 2367/ 2800 batches | train loss 0.3179181 +| epoch 9 | 2371/ 2800 batches | train loss 0.3405850 +| epoch 9 | 2375/ 2800 batches | train loss 0.3758650 +| epoch 9 | 2379/ 2800 batches | train loss 0.2731597 +| epoch 9 | 2383/ 2800 batches | train loss 0.3010010 +| epoch 9 | 2387/ 2800 batches | train loss 0.3990810 +| epoch 9 | 2391/ 2800 batches | train loss 0.3338234 +| epoch 9 | 2395/ 2800 batches | train loss 0.3212030 +| epoch 9 | 2399/ 2800 batches | train loss 0.2881930 +| epoch 9 | 2403/ 2800 batches | train loss 0.2827892 +| epoch 9 | 2407/ 2800 batches | train loss 0.3870143 +| epoch 9 | 2411/ 2800 batches | train loss 0.1183896 +| epoch 9 | 2415/ 2800 batches | train loss 0.3258994 +| epoch 9 | 2419/ 2800 batches | train loss 0.2828006 +| epoch 9 | 2423/ 2800 batches | train loss 0.3452403 +| epoch 9 | 2427/ 2800 batches | train loss 0.2966396 +| epoch 9 | 2431/ 2800 batches | train loss 0.3301374 +| epoch 9 | 2435/ 2800 batches | train loss 0.2773654 +| epoch 9 | 2439/ 2800 batches | train loss 0.3378876 +| epoch 9 | 2443/ 2800 batches | train loss 0.2646354 +| epoch 9 | 2447/ 2800 batches | train loss 0.2719839 +| epoch 9 | 2451/ 2800 batches | train loss 0.2701766 +| epoch 9 | 2455/ 2800 batches | train loss 0.2933937 +| epoch 9 | 2459/ 2800 batches | train loss 0.3369592 +| epoch 9 | 2463/ 2800 batches | train loss 0.3487294 +| epoch 9 | 2467/ 2800 batches | train loss 0.3503475 +| epoch 9 | 2471/ 2800 batches | train loss 0.3122612 +| epoch 9 | 2475/ 2800 batches | train loss 0.3617928 +| epoch 9 | 2479/ 2800 batches | train loss 0.3786681 +| epoch 9 | 2483/ 2800 batches | train loss 0.3291068 +| epoch 9 | 2487/ 2800 batches | train loss 0.2684293 +| epoch 9 | 2491/ 2800 batches | train loss 0.2740755 +| epoch 9 | 2495/ 2800 batches | train loss 0.2894703 +| epoch 9 | 2499/ 2800 batches | train loss 0.2912601 +| epoch 9 | 2503/ 2800 batches | train loss 0.3086804 +| epoch 9 | 2507/ 2800 batches | train loss 0.3159536 +| epoch 9 | 2511/ 2800 batches | train loss 0.3907239 +| epoch 9 | 2515/ 2800 batches | train loss 0.2591692 +| epoch 9 | 2519/ 2800 batches | train loss 0.2998779 +| epoch 9 | 2523/ 2800 batches | train loss 0.2663691 +| epoch 9 | 2527/ 2800 batches | train loss 0.2936983 +| epoch 9 | 2531/ 2800 batches | train loss 0.3147944 +| epoch 9 | 2535/ 2800 batches | train loss 0.3286000 +| epoch 9 | 2539/ 2800 batches | train loss 0.2735509 +| epoch 9 | 2543/ 2800 batches | train loss 0.3348638 +| epoch 9 | 2547/ 2800 batches | train loss 0.3173120 +| epoch 9 | 2551/ 2800 batches | train loss 0.3365633 +| epoch 9 | 2555/ 2800 batches | train loss 0.3137087 +| epoch 9 | 2559/ 2800 batches | train loss 0.2786165 +| epoch 9 | 2563/ 2800 batches | train loss 0.2718723 +| epoch 9 | 2567/ 2800 batches | train loss 0.3224980 +| epoch 9 | 2571/ 2800 batches | train loss 0.3062731 +| epoch 9 | 2575/ 2800 batches | train loss 0.2829413 +| epoch 9 | 2579/ 2800 batches | train loss 0.3919491 +| epoch 9 | 2583/ 2800 batches | train loss 0.3271141 +| epoch 9 | 2587/ 2800 batches | train loss 0.3354522 +| epoch 9 | 2591/ 2800 batches | train loss 0.3696140 +| epoch 9 | 2595/ 2800 batches | train loss 0.3382658 +| epoch 9 | 2599/ 2800 batches | train loss 0.2749253 +| epoch 9 | 2603/ 2800 batches | train loss 0.3746513 +| epoch 9 | 2607/ 2800 batches | train loss 0.3653318 +| epoch 9 | 2611/ 2800 batches | train loss 0.2731614 +| epoch 9 | 2615/ 2800 batches | train loss 0.2949010 +| epoch 9 | 2619/ 2800 batches | train loss 0.3529584 +| epoch 9 | 2623/ 2800 batches | train loss 0.3723107 +| epoch 9 | 2627/ 2800 batches | train loss 0.2992359 +| epoch 9 | 2631/ 2800 batches | train loss 0.3849333 +| epoch 9 | 2635/ 2800 batches | train loss 0.3676902 +| epoch 9 | 2639/ 2800 batches | train loss 0.3247280 +| epoch 9 | 2643/ 2800 batches | train loss 0.3063438 +| epoch 9 | 2647/ 2800 batches | train loss 0.3272486 +| epoch 9 | 2651/ 2800 batches | train loss 0.3865118 +| epoch 9 | 2655/ 2800 batches | train loss 0.3258220 +| epoch 9 | 2659/ 2800 batches | train loss 0.3126454 +| epoch 9 | 2663/ 2800 batches | train loss 0.3146307 +| epoch 9 | 2667/ 2800 batches | train loss 0.3388903 +| epoch 9 | 2671/ 2800 batches | train loss 0.3204968 +| epoch 9 | 2675/ 2800 batches | train loss 0.2665421 +| epoch 9 | 2679/ 2800 batches | train loss 0.2683485 +| epoch 9 | 2683/ 2800 batches | train loss 0.3614719 +| epoch 9 | 2687/ 2800 batches | train loss 0.3137563 +| epoch 9 | 2691/ 2800 batches | train loss 0.3197438 +| epoch 9 | 2695/ 2800 batches | train loss 0.3440449 +| epoch 9 | 2699/ 2800 batches | train loss 0.3145870 +| epoch 9 | 2703/ 2800 batches | train loss 0.3574555 +| epoch 9 | 2707/ 2800 batches | train loss 0.3211162 +| epoch 9 | 2711/ 2800 batches | train loss 0.2875624 +| epoch 9 | 2715/ 2800 batches | train loss 0.3354865 +| epoch 9 | 2719/ 2800 batches | train loss 0.2983618 +| epoch 9 | 2723/ 2800 batches | train loss 0.3136337 +| epoch 9 | 2727/ 2800 batches | train loss 0.3472726 +| epoch 9 | 2731/ 2800 batches | train loss 0.3413786 +| epoch 9 | 2735/ 2800 batches | train loss 0.3285336 +| epoch 9 | 2739/ 2800 batches | train loss 0.3029189 +| epoch 9 | 2743/ 2800 batches | train loss 0.3224231 +| epoch 9 | 2747/ 2800 batches | train loss 0.3116577 +| epoch 9 | 2751/ 2800 batches | train loss 0.3746629 +| epoch 9 | 2755/ 2800 batches | train loss 0.3093216 +| epoch 9 | 2759/ 2800 batches | train loss 0.3101005 +| epoch 9 | 2763/ 2800 batches | train loss 0.3277687 +| epoch 9 | 2767/ 2800 batches | train loss 0.2902088 +| epoch 9 | 2771/ 2800 batches | train loss 0.2914102 +| epoch 9 | 2775/ 2800 batches | train loss 0.2702022 +| epoch 9 | 2779/ 2800 batches | train loss 0.3434135 +| epoch 9 | 2783/ 2800 batches | train loss 0.3149445 +| epoch 9 | 2787/ 2800 batches | train loss 0.3037072 +| epoch 9 | 2791/ 2800 batches | train loss 0.3254253 +| epoch 9 | 2795/ 2800 batches | train loss 0.2888793 +| epoch 9 | 2799/ 2800 batches | train loss 0.3305366 +-------------------------------------------------------------------------------- +| epoch 9 | 3/ 2800 batches | test loss 0.4082764 +| epoch 9 | 7/ 2800 batches | test loss 0.6684874 +| epoch 9 | 11/ 2800 batches | test loss 0.5073451 +| epoch 9 | 15/ 2800 batches | test loss 0.6024228 +| epoch 9 | 19/ 2800 batches | test loss 0.4878204 +| epoch 9 | 23/ 2800 batches | test loss 0.6134668 +| epoch 9 | 27/ 2800 batches | test loss 0.5764965 +| epoch 9 | 31/ 2800 batches | test loss 0.7384372 +| epoch 9 | 35/ 2800 batches | test loss 0.6070712 +| epoch 9 | 39/ 2800 batches | test loss 0.4164784 +| epoch 9 | 43/ 2800 batches | test loss 0.4975621 +| epoch 9 | 47/ 2800 batches | test loss 0.6901503 +| epoch 9 | 51/ 2800 batches | test loss 0.5018418 +| epoch 9 | 55/ 2800 batches | test loss 0.7289706 +| epoch 9 | 59/ 2800 batches | test loss 0.6266592 +| epoch 9 | 63/ 2800 batches | test loss 0.7157618 +| epoch 9 | 67/ 2800 batches | test loss 0.5373676 +| epoch 9 | 71/ 2800 batches | test loss 0.7478765 +| epoch 9 | 75/ 2800 batches | test loss 0.4761427 +| epoch 9 | 79/ 2800 batches | test loss 0.5688534 +| epoch 9 | 83/ 2800 batches | test loss 0.4884301 +| epoch 9 | 87/ 2800 batches | test loss 0.4837137 +| epoch 9 | 91/ 2800 batches | test loss 0.6480246 +| epoch 9 | 95/ 2800 batches | test loss 0.5435605 +| epoch 9 | 99/ 2800 batches | test loss 0.5751008 +| epoch 9 | 103/ 2800 batches | test loss 0.8396552 +| epoch 9 | 107/ 2800 batches | test loss 0.6516351 +| epoch 9 | 111/ 2800 batches | test loss 0.5515026 +| epoch 9 | 115/ 2800 batches | test loss 0.5327238 +| epoch 9 | 119/ 2800 batches | test loss 1.0516477 +| epoch 9 | 123/ 2800 batches | test loss 0.6705423 +| epoch 9 | 127/ 2800 batches | test loss 0.5860513 +| epoch 9 | 131/ 2800 batches | test loss 0.8326866 +| epoch 9 | 135/ 2800 batches | test loss 0.4740184 +| epoch 9 | 139/ 2800 batches | test loss 0.8414112 +| epoch 9 | 143/ 2800 batches | test loss 0.6982202 +| epoch 9 | 147/ 2800 batches | test loss 0.4694467 +| epoch 9 | 151/ 2800 batches | test loss 0.6109419 +| epoch 9 | 155/ 2800 batches | test loss 0.5996158 +| epoch 9 | 159/ 2800 batches | test loss 0.5814663 +| epoch 9 | 163/ 2800 batches | test loss 0.5858066 +| epoch 9 | 167/ 2800 batches | test loss 0.5072606 +| epoch 9 | 171/ 2800 batches | test loss 0.5706271 +| epoch 9 | 175/ 2800 batches | test loss 0.6308795 +| epoch 9 | 179/ 2800 batches | test loss 0.5377339 +| epoch 9 | 183/ 2800 batches | test loss 0.6825609 +| epoch 9 | 187/ 2800 batches | test loss 0.5999385 +| epoch 9 | 191/ 2800 batches | test loss 0.4061014 +| epoch 9 | 195/ 2800 batches | test loss 0.6348343 +| epoch 9 | 199/ 2800 batches | test loss 0.5621536 +| epoch 9 | 203/ 2800 batches | test loss 0.5425689 +| epoch 9 | 207/ 2800 batches | test loss 0.6420845 +| epoch 9 | 211/ 2800 batches | test loss 0.5946299 +| epoch 9 | 215/ 2800 batches | test loss 0.5572445 +| epoch 9 | 219/ 2800 batches | test loss 0.7871999 +| epoch 9 | 223/ 2800 batches | test loss 0.3757695 +| epoch 9 | 227/ 2800 batches | test loss 0.7034279 +| epoch 9 | 231/ 2800 batches | test loss 0.5697480 +| epoch 9 | 235/ 2800 batches | test loss 0.4938911 +| epoch 9 | 239/ 2800 batches | test loss 0.4832351 +| epoch 9 | 243/ 2800 batches | test loss 0.7160151 +| epoch 9 | 247/ 2800 batches | test loss 0.3990401 +| epoch 9 | 251/ 2800 batches | test loss 0.7548835 +| epoch 9 | 255/ 2800 batches | test loss 0.8687642 +| epoch 9 | 259/ 2800 batches | test loss 0.5316263 +| epoch 9 | 263/ 2800 batches | test loss 0.5960785 +| epoch 9 | 267/ 2800 batches | test loss 0.4182929 +| epoch 9 | 271/ 2800 batches | test loss 0.6721787 +| epoch 9 | 275/ 2800 batches | test loss 0.4886368 +| epoch 9 | 279/ 2800 batches | test loss 0.6119902 +| epoch 9 | 283/ 2800 batches | test loss 0.3167953 +| epoch 9 | 287/ 2800 batches | test loss 0.4758354 +| epoch 9 | 291/ 2800 batches | test loss 0.6410215 +| epoch 9 | 295/ 2800 batches | test loss 0.5992377 +| epoch 9 | 299/ 2800 batches | test loss 0.6767461 +| epoch 9 | 303/ 2800 batches | test loss 0.7776352 +| epoch 9 | 307/ 2800 batches | test loss 0.6414518 +| epoch 9 | 311/ 2800 batches | test loss 0.5053840 +| epoch 9 | 315/ 2800 batches | test loss 0.4929298 +| epoch 9 | 319/ 2800 batches | test loss 0.6563423 +| epoch 9 | 323/ 2800 batches | test loss 0.3856817 +| epoch 9 | 327/ 2800 batches | test loss 0.6865852 +| epoch 9 | 331/ 2800 batches | test loss 0.6067944 +| epoch 9 | 335/ 2800 batches | test loss 0.6041197 +| epoch 9 | 339/ 2800 batches | test loss 0.4653715 +| epoch 9 | 343/ 2800 batches | test loss 0.6709388 +| epoch 9 | 347/ 2800 batches | test loss 0.7307169 +| epoch 9 | 351/ 2800 batches | test loss 0.4372440 +| epoch 9 | 355/ 2800 batches | test loss 0.4165484 +| epoch 9 | 359/ 2800 batches | test loss 0.5747851 +| epoch 9 | 363/ 2800 batches | test loss 0.7354480 +| epoch 9 | 367/ 2800 batches | test loss 0.5233259 +| epoch 9 | 371/ 2800 batches | test loss 0.6095200 +| epoch 9 | 375/ 2800 batches | test loss 0.5129271 +| epoch 9 | 379/ 2800 batches | test loss 0.6905119 +| epoch 9 | 383/ 2800 batches | test loss 0.7980691 +| epoch 9 | 387/ 2800 batches | test loss 0.5553832 +| epoch 9 | 391/ 2800 batches | test loss 0.4629939 +| epoch 9 | 395/ 2800 batches | test loss 0.6232854 +| epoch 9 | 399/ 2800 batches | test loss 0.5245970 +| epoch 9 | 403/ 2800 batches | test loss 0.9777642 +| epoch 9 | 407/ 2800 batches | test loss 0.6992419 +| epoch 9 | 411/ 2800 batches | test loss 0.7096879 +| epoch 9 | 415/ 2800 batches | test loss 0.5074912 +| epoch 9 | 419/ 2800 batches | test loss 0.3996552 +| epoch 9 | 423/ 2800 batches | test loss 0.4541686 +| epoch 9 | 427/ 2800 batches | test loss 0.5687295 +| epoch 9 | 431/ 2800 batches | test loss 0.4209141 +| epoch 9 | 435/ 2800 batches | test loss 0.4986649 +| epoch 9 | 439/ 2800 batches | test loss 0.7319589 +| epoch 9 | 443/ 2800 batches | test loss 0.9293369 +| epoch 9 | 447/ 2800 batches | test loss 0.7484139 +| epoch 9 | 451/ 2800 batches | test loss 0.4029941 +| epoch 9 | 455/ 2800 batches | test loss 0.4260620 +| epoch 9 | 459/ 2800 batches | test loss 0.6536440 +| epoch 9 | 463/ 2800 batches | test loss 0.6549864 +| epoch 9 | 467/ 2800 batches | test loss 0.5145680 +| epoch 9 | 471/ 2800 batches | test loss 0.5184134 +| epoch 9 | 475/ 2800 batches | test loss 0.7306960 +| epoch 9 | 479/ 2800 batches | test loss 0.5810134 +| epoch 9 | 483/ 2800 batches | test loss 0.7495231 +| epoch 9 | 487/ 2800 batches | test loss 0.7676117 +| epoch 9 | 491/ 2800 batches | test loss 0.4908382 +| epoch 9 | 495/ 2800 batches | test loss 0.5941232 +| epoch 9 | 499/ 2800 batches | test loss 0.5694824 +| epoch 9 | 503/ 2800 batches | test loss 0.6045237 +| epoch 9 | 507/ 2800 batches | test loss 0.6677343 +| epoch 9 | 511/ 2800 batches | test loss 0.5645787 +| epoch 9 | 515/ 2800 batches | test loss 0.4957299 +| epoch 9 | 519/ 2800 batches | test loss 0.3239064 +| epoch 9 | 523/ 2800 batches | test loss 0.5514607 +| epoch 9 | 527/ 2800 batches | test loss 0.6697280 +| epoch 9 | 531/ 2800 batches | test loss 0.5868545 +| epoch 9 | 535/ 2800 batches | test loss 0.5451204 +| epoch 9 | 539/ 2800 batches | test loss 0.7051930 +| epoch 9 | 543/ 2800 batches | test loss 0.4855602 +| epoch 9 | 547/ 2800 batches | test loss 0.6785364 +| epoch 9 | 551/ 2800 batches | test loss 0.4048008 +| epoch 9 | 555/ 2800 batches | test loss 0.5100873 +| epoch 9 | 559/ 2800 batches | test loss 0.4997252 +| epoch 9 | 563/ 2800 batches | test loss 0.5134439 +| epoch 9 | 567/ 2800 batches | test loss 0.7076159 +| epoch 9 | 571/ 2800 batches | test loss 0.5265446 +| epoch 9 | 575/ 2800 batches | test loss 0.4422880 +| epoch 9 | 579/ 2800 batches | test loss 0.4015457 +| epoch 9 | 583/ 2800 batches | test loss 0.6335754 +| epoch 9 | 587/ 2800 batches | test loss 0.5382611 +| epoch 9 | 591/ 2800 batches | test loss 0.5712200 +| epoch 9 | 595/ 2800 batches | test loss 0.4495537 +| epoch 9 | 599/ 2800 batches | test loss 0.5243131 +| epoch 9 | 603/ 2800 batches | test loss 0.7187907 +| epoch 9 | 607/ 2800 batches | test loss 0.4949501 +| epoch 9 | 611/ 2800 batches | test loss 0.5263298 +| epoch 9 | 615/ 2800 batches | test loss 0.6129566 +| epoch 9 | 619/ 2800 batches | test loss 0.5703043 +| epoch 9 | 623/ 2800 batches | test loss 0.7163926 +| epoch 9 | 627/ 2800 batches | test loss 0.6290069 +| epoch 9 | 631/ 2800 batches | test loss 0.4678606 +| epoch 9 | 635/ 2800 batches | test loss 0.4962995 +| epoch 9 | 639/ 2800 batches | test loss 0.5157506 +| epoch 9 | 643/ 2800 batches | test loss 0.7667272 +| epoch 9 | 647/ 2800 batches | test loss 0.4880416 +| epoch 9 | 651/ 2800 batches | test loss 0.4863949 +| epoch 9 | 655/ 2800 batches | test loss 0.5747768 +| epoch 9 | 659/ 2800 batches | test loss 0.5062641 +| epoch 9 | 663/ 2800 batches | test loss 0.5386879 +| epoch 9 | 667/ 2800 batches | test loss 0.5074807 +| epoch 9 | 671/ 2800 batches | test loss 0.5801394 +| epoch 9 | 675/ 2800 batches | test loss 0.6079704 +| epoch 9 | 679/ 2800 batches | test loss 0.5260928 +| epoch 9 | 683/ 2800 batches | test loss 0.5544791 +| epoch 9 | 687/ 2800 batches | test loss 0.6218052 +| epoch 9 | 691/ 2800 batches | test loss 0.4407982 +| epoch 9 | 695/ 2800 batches | test loss 0.5807954 +| epoch 9 | 699/ 2800 batches | test loss 0.5808843 +| epoch 9 | final test loss 0.5758, do not save model! +-------------------------------------------------------------------------------- +| epoch 10 | 3/ 2800 batches | train loss 0.2837988 +| epoch 10 | 7/ 2800 batches | train loss 0.2542890 +| epoch 10 | 11/ 2800 batches | train loss 0.2377156 +| epoch 10 | 15/ 2800 batches | train loss 0.2840766 +| epoch 10 | 19/ 2800 batches | train loss 0.2657101 +| epoch 10 | 23/ 2800 batches | train loss 0.3028252 +| epoch 10 | 27/ 2800 batches | train loss 0.3437132 +| epoch 10 | 31/ 2800 batches | train loss 0.2707814 +| epoch 10 | 35/ 2800 batches | train loss 0.2934166 +| epoch 10 | 39/ 2800 batches | train loss 0.3251261 +| epoch 10 | 43/ 2800 batches | train loss 0.2842772 +| epoch 10 | 47/ 2800 batches | train loss 0.2993408 +| epoch 10 | 51/ 2800 batches | train loss 0.2878763 +| epoch 10 | 55/ 2800 batches | train loss 0.3190262 +| epoch 10 | 59/ 2800 batches | train loss 0.2658101 +| epoch 10 | 63/ 2800 batches | train loss 0.2700734 +| epoch 10 | 67/ 2800 batches | train loss 0.2365963 +| epoch 10 | 71/ 2800 batches | train loss 0.2675688 +| epoch 10 | 75/ 2800 batches | train loss 0.2802282 +| epoch 10 | 79/ 2800 batches | train loss 0.2641235 +| epoch 10 | 83/ 2800 batches | train loss 0.3021455 +| epoch 10 | 87/ 2800 batches | train loss 0.2925573 +| epoch 10 | 91/ 2800 batches | train loss 0.2490766 +| epoch 10 | 95/ 2800 batches | train loss 0.2587329 +| epoch 10 | 99/ 2800 batches | train loss 0.3075079 +| epoch 10 | 103/ 2800 batches | train loss 0.2763173 +| epoch 10 | 107/ 2800 batches | train loss 0.3552863 +| epoch 10 | 111/ 2800 batches | train loss 0.2607000 +| epoch 10 | 115/ 2800 batches | train loss 0.2843301 +| epoch 10 | 119/ 2800 batches | train loss 0.2960889 +| epoch 10 | 123/ 2800 batches | train loss 0.2982486 +| epoch 10 | 127/ 2800 batches | train loss 0.3066332 +| epoch 10 | 131/ 2800 batches | train loss 0.2362916 +| epoch 10 | 135/ 2800 batches | train loss 0.3080269 +| epoch 10 | 139/ 2800 batches | train loss 0.3163506 +| epoch 10 | 143/ 2800 batches | train loss 0.2511707 +| epoch 10 | 147/ 2800 batches | train loss 0.3048688 +| epoch 10 | 151/ 2800 batches | train loss 0.3124799 +| epoch 10 | 155/ 2800 batches | train loss 0.2870552 +| epoch 10 | 159/ 2800 batches | train loss 0.3374431 +| epoch 10 | 163/ 2800 batches | train loss 0.1862038 +| epoch 10 | 167/ 2800 batches | train loss 0.2583878 +| epoch 10 | 171/ 2800 batches | train loss 0.3535134 +| epoch 10 | 175/ 2800 batches | train loss 0.2845853 +| epoch 10 | 179/ 2800 batches | train loss 0.3062889 +| epoch 10 | 183/ 2800 batches | train loss 0.2400150 +| epoch 10 | 187/ 2800 batches | train loss 0.2855591 +| epoch 10 | 191/ 2800 batches | train loss 0.2547096 +| epoch 10 | 195/ 2800 batches | train loss 0.2540023 +| epoch 10 | 199/ 2800 batches | train loss 0.2927302 +| epoch 10 | 203/ 2800 batches | train loss 0.2674754 +| epoch 10 | 207/ 2800 batches | train loss 0.3007008 +| epoch 10 | 211/ 2800 batches | train loss 0.3194398 +| epoch 10 | 215/ 2800 batches | train loss 0.3054899 +| epoch 10 | 219/ 2800 batches | train loss 0.2741052 +| epoch 10 | 223/ 2800 batches | train loss 0.3565454 +| epoch 10 | 227/ 2800 batches | train loss 0.3464173 +| epoch 10 | 231/ 2800 batches | train loss 0.2505773 +| epoch 10 | 235/ 2800 batches | train loss 0.2928603 +| epoch 10 | 239/ 2800 batches | train loss 0.2793518 +| epoch 10 | 243/ 2800 batches | train loss 0.2467046 +| epoch 10 | 247/ 2800 batches | train loss 0.2341457 +| epoch 10 | 251/ 2800 batches | train loss 0.2613223 +| epoch 10 | 255/ 2800 batches | train loss 0.2377989 +| epoch 10 | 259/ 2800 batches | train loss 0.2791911 +| epoch 10 | 263/ 2800 batches | train loss 0.2534997 +| epoch 10 | 267/ 2800 batches | train loss 0.3149356 +| epoch 10 | 271/ 2800 batches | train loss 0.2530836 +| epoch 10 | 275/ 2800 batches | train loss 0.2358334 +| epoch 10 | 279/ 2800 batches | train loss 0.3317038 +| epoch 10 | 283/ 2800 batches | train loss 0.2645729 +| epoch 10 | 287/ 2800 batches | train loss 0.2592271 +| epoch 10 | 291/ 2800 batches | train loss 0.2614136 +| epoch 10 | 295/ 2800 batches | train loss 0.2990748 +| epoch 10 | 299/ 2800 batches | train loss 0.2902916 +| epoch 10 | 303/ 2800 batches | train loss 0.2678174 +| epoch 10 | 307/ 2800 batches | train loss 0.2651162 +| epoch 10 | 311/ 2800 batches | train loss 0.2589153 +| epoch 10 | 315/ 2800 batches | train loss 0.2795311 +| epoch 10 | 319/ 2800 batches | train loss 0.2647638 +| epoch 10 | 323/ 2800 batches | train loss 0.2805421 +| epoch 10 | 327/ 2800 batches | train loss 0.2825192 +| epoch 10 | 331/ 2800 batches | train loss 0.2196744 +| epoch 10 | 335/ 2800 batches | train loss 0.2969789 +| epoch 10 | 339/ 2800 batches | train loss 0.2870261 +| epoch 10 | 343/ 2800 batches | train loss 0.2766591 +| epoch 10 | 347/ 2800 batches | train loss 0.2754996 +| epoch 10 | 351/ 2800 batches | train loss 0.2670348 +| epoch 10 | 355/ 2800 batches | train loss 0.3075375 +| epoch 10 | 359/ 2800 batches | train loss 0.2951514 +| epoch 10 | 363/ 2800 batches | train loss 0.2557576 +| epoch 10 | 367/ 2800 batches | train loss 0.2997905 +| epoch 10 | 371/ 2800 batches | train loss 0.2632630 +| epoch 10 | 375/ 2800 batches | train loss 0.2543866 +| epoch 10 | 379/ 2800 batches | train loss 0.2611024 +| epoch 10 | 383/ 2800 batches | train loss 0.2790863 +| epoch 10 | 387/ 2800 batches | train loss 0.2575414 +| epoch 10 | 391/ 2800 batches | train loss 0.2590808 +| epoch 10 | 395/ 2800 batches | train loss 0.2665160 +| epoch 10 | 399/ 2800 batches | train loss 0.2800539 +| epoch 10 | 403/ 2800 batches | train loss 0.2747634 +| epoch 10 | 407/ 2800 batches | train loss 0.3652465 +| epoch 10 | 411/ 2800 batches | train loss 0.2870274 +| epoch 10 | 415/ 2800 batches | train loss 0.2891691 +| epoch 10 | 419/ 2800 batches | train loss 0.2728755 +| epoch 10 | 423/ 2800 batches | train loss 0.2861739 +| epoch 10 | 427/ 2800 batches | train loss 0.3131618 +| epoch 10 | 431/ 2800 batches | train loss 0.2783377 +| epoch 10 | 435/ 2800 batches | train loss 0.3039752 +| epoch 10 | 439/ 2800 batches | train loss 0.2494726 +| epoch 10 | 443/ 2800 batches | train loss 0.2983963 +| epoch 10 | 447/ 2800 batches | train loss 0.2615442 +| epoch 10 | 451/ 2800 batches | train loss 0.2759540 +| epoch 10 | 455/ 2800 batches | train loss 0.2734495 +| epoch 10 | 459/ 2800 batches | train loss 0.2624617 +| epoch 10 | 463/ 2800 batches | train loss 0.2796345 +| epoch 10 | 467/ 2800 batches | train loss 0.2307749 +| epoch 10 | 471/ 2800 batches | train loss 0.2883750 +| epoch 10 | 475/ 2800 batches | train loss 0.3128124 +| epoch 10 | 479/ 2800 batches | train loss 0.2440735 +| epoch 10 | 483/ 2800 batches | train loss 0.3286335 +| epoch 10 | 487/ 2800 batches | train loss 0.2930411 +| epoch 10 | 491/ 2800 batches | train loss 0.3064634 +| epoch 10 | 495/ 2800 batches | train loss 0.2916621 +| epoch 10 | 499/ 2800 batches | train loss 0.2486942 +| epoch 10 | 503/ 2800 batches | train loss 0.3090151 +| epoch 10 | 507/ 2800 batches | train loss 0.2417660 +| epoch 10 | 511/ 2800 batches | train loss 0.2665252 +| epoch 10 | 515/ 2800 batches | train loss 0.3018176 +| epoch 10 | 519/ 2800 batches | train loss 0.2970488 +| epoch 10 | 523/ 2800 batches | train loss 0.2808356 +| epoch 10 | 527/ 2800 batches | train loss 0.2327544 +| epoch 10 | 531/ 2800 batches | train loss 0.2564077 +| epoch 10 | 535/ 2800 batches | train loss 0.3110668 +| epoch 10 | 539/ 2800 batches | train loss 0.2630082 +| epoch 10 | 543/ 2800 batches | train loss 0.2863905 +| epoch 10 | 547/ 2800 batches | train loss 0.2724799 +| epoch 10 | 551/ 2800 batches | train loss 0.2528425 +| epoch 10 | 555/ 2800 batches | train loss 0.3221703 +| epoch 10 | 559/ 2800 batches | train loss 0.2996007 +| epoch 10 | 563/ 2800 batches | train loss 0.3239254 +| epoch 10 | 567/ 2800 batches | train loss 0.2739965 +| epoch 10 | 571/ 2800 batches | train loss 0.3013928 +| epoch 10 | 575/ 2800 batches | train loss 0.3364900 +| epoch 10 | 579/ 2800 batches | train loss 0.3480272 +| epoch 10 | 583/ 2800 batches | train loss 0.2781913 +| epoch 10 | 587/ 2800 batches | train loss 0.2961705 +| epoch 10 | 591/ 2800 batches | train loss 0.2432714 +| epoch 10 | 595/ 2800 batches | train loss 0.2918018 +| epoch 10 | 599/ 2800 batches | train loss 0.2924065 +| epoch 10 | 603/ 2800 batches | train loss 0.3252223 +| epoch 10 | 607/ 2800 batches | train loss 0.1163002 +| epoch 10 | 611/ 2800 batches | train loss 0.3304954 +| epoch 10 | 615/ 2800 batches | train loss 0.2765199 +| epoch 10 | 619/ 2800 batches | train loss 0.2914734 +| epoch 10 | 623/ 2800 batches | train loss 0.2845157 +| epoch 10 | 627/ 2800 batches | train loss 0.3605397 +| epoch 10 | 631/ 2800 batches | train loss 0.3301150 +| epoch 10 | 635/ 2800 batches | train loss 0.3228125 +| epoch 10 | 639/ 2800 batches | train loss 0.3080243 +| epoch 10 | 643/ 2800 batches | train loss 0.2606946 +| epoch 10 | 647/ 2800 batches | train loss 0.2602400 +| epoch 10 | 651/ 2800 batches | train loss 0.2765074 +| epoch 10 | 655/ 2800 batches | train loss 0.3131244 +| epoch 10 | 659/ 2800 batches | train loss 0.3217579 +| epoch 10 | 663/ 2800 batches | train loss 0.2748343 +| epoch 10 | 667/ 2800 batches | train loss 0.2695936 +| epoch 10 | 671/ 2800 batches | train loss 0.3005656 +| epoch 10 | 675/ 2800 batches | train loss 0.2932089 +| epoch 10 | 679/ 2800 batches | train loss 0.3583032 +| epoch 10 | 683/ 2800 batches | train loss 0.3198895 +| epoch 10 | 687/ 2800 batches | train loss 0.2591021 +| epoch 10 | 691/ 2800 batches | train loss 0.2611669 +| epoch 10 | 695/ 2800 batches | train loss 0.3027817 +| epoch 10 | 699/ 2800 batches | train loss 0.3278190 +| epoch 10 | 703/ 2800 batches | train loss 0.3205241 +| epoch 10 | 707/ 2800 batches | train loss 0.3229492 +| epoch 10 | 711/ 2800 batches | train loss 0.2892862 +| epoch 10 | 715/ 2800 batches | train loss 0.2795503 +| epoch 10 | 719/ 2800 batches | train loss 0.2909724 +| epoch 10 | 723/ 2800 batches | train loss 0.2914680 +| epoch 10 | 727/ 2800 batches | train loss 0.3013853 +| epoch 10 | 731/ 2800 batches | train loss 0.2918532 +| epoch 10 | 735/ 2800 batches | train loss 0.3079848 +| epoch 10 | 739/ 2800 batches | train loss 0.2799231 +| epoch 10 | 743/ 2800 batches | train loss 0.3099398 +| epoch 10 | 747/ 2800 batches | train loss 0.2733266 +| epoch 10 | 751/ 2800 batches | train loss 0.3075439 +| epoch 10 | 755/ 2800 batches | train loss 0.3181683 +| epoch 10 | 759/ 2800 batches | train loss 0.3010851 +| epoch 10 | 763/ 2800 batches | train loss 0.3147709 +| epoch 10 | 767/ 2800 batches | train loss 0.3167087 +| epoch 10 | 771/ 2800 batches | train loss 0.3051811 +| epoch 10 | 775/ 2800 batches | train loss 0.2924427 +| epoch 10 | 779/ 2800 batches | train loss 0.2687225 +| epoch 10 | 783/ 2800 batches | train loss 0.2848793 +| epoch 10 | 787/ 2800 batches | train loss 0.2303441 +| epoch 10 | 791/ 2800 batches | train loss 0.2624548 +| epoch 10 | 795/ 2800 batches | train loss 0.3259931 +| epoch 10 | 799/ 2800 batches | train loss 0.3018958 +| epoch 10 | 803/ 2800 batches | train loss 0.3280657 +| epoch 10 | 807/ 2800 batches | train loss 0.2958758 +| epoch 10 | 811/ 2800 batches | train loss 0.3001675 +| epoch 10 | 815/ 2800 batches | train loss 0.2634825 +| epoch 10 | 819/ 2800 batches | train loss 0.2795542 +| epoch 10 | 823/ 2800 batches | train loss 0.2363863 +| epoch 10 | 827/ 2800 batches | train loss 0.2915107 +| epoch 10 | 831/ 2800 batches | train loss 0.3067066 +| epoch 10 | 835/ 2800 batches | train loss 0.2682271 +| epoch 10 | 839/ 2800 batches | train loss 0.2944076 +| epoch 10 | 843/ 2800 batches | train loss 0.2971052 +| epoch 10 | 847/ 2800 batches | train loss 0.2748374 +| epoch 10 | 851/ 2800 batches | train loss 0.2788562 +| epoch 10 | 855/ 2800 batches | train loss 0.2726670 +| epoch 10 | 859/ 2800 batches | train loss 0.2645178 +| epoch 10 | 863/ 2800 batches | train loss 0.2411704 +| epoch 10 | 867/ 2800 batches | train loss 0.3461367 +| epoch 10 | 871/ 2800 batches | train loss 0.3263754 +| epoch 10 | 875/ 2800 batches | train loss 0.3145024 +| epoch 10 | 879/ 2800 batches | train loss 0.2745517 +| epoch 10 | 883/ 2800 batches | train loss 0.3535686 +| epoch 10 | 887/ 2800 batches | train loss 0.2764899 +| epoch 10 | 891/ 2800 batches | train loss 0.2474702 +| epoch 10 | 895/ 2800 batches | train loss 0.3132941 +| epoch 10 | 899/ 2800 batches | train loss 0.3236853 +| epoch 10 | 903/ 2800 batches | train loss 0.3707917 +| epoch 10 | 907/ 2800 batches | train loss 0.3129498 +| epoch 10 | 911/ 2800 batches | train loss 0.3049572 +| epoch 10 | 915/ 2800 batches | train loss 0.2761315 +| epoch 10 | 919/ 2800 batches | train loss 0.3059312 +| epoch 10 | 923/ 2800 batches | train loss 0.2904113 +| epoch 10 | 927/ 2800 batches | train loss 0.2330985 +| epoch 10 | 931/ 2800 batches | train loss 0.3163675 +| epoch 10 | 935/ 2800 batches | train loss 0.3238412 +| epoch 10 | 939/ 2800 batches | train loss 0.3108099 +| epoch 10 | 943/ 2800 batches | train loss 0.2822601 +| epoch 10 | 947/ 2800 batches | train loss 0.2974466 +| epoch 10 | 951/ 2800 batches | train loss 0.2464249 +| epoch 10 | 955/ 2800 batches | train loss 0.3388177 +| epoch 10 | 959/ 2800 batches | train loss 0.2861697 +| epoch 10 | 963/ 2800 batches | train loss 0.3399035 +| epoch 10 | 967/ 2800 batches | train loss 0.2593715 +| epoch 10 | 971/ 2800 batches | train loss 0.2928778 +| epoch 10 | 975/ 2800 batches | train loss 0.2484003 +| epoch 10 | 979/ 2800 batches | train loss 0.2878366 +| epoch 10 | 983/ 2800 batches | train loss 0.3126895 +| epoch 10 | 987/ 2800 batches | train loss 0.3230075 +| epoch 10 | 991/ 2800 batches | train loss 0.3191866 +| epoch 10 | 995/ 2800 batches | train loss 0.3213628 +| epoch 10 | 999/ 2800 batches | train loss 0.3278300 +| epoch 10 | 1003/ 2800 batches | train loss 0.2643497 +| epoch 10 | 1007/ 2800 batches | train loss 0.2623430 +| epoch 10 | 1011/ 2800 batches | train loss 0.3055089 +| epoch 10 | 1015/ 2800 batches | train loss 0.2956600 +| epoch 10 | 1019/ 2800 batches | train loss 0.2946105 +| epoch 10 | 1023/ 2800 batches | train loss 0.2291252 +| epoch 10 | 1027/ 2800 batches | train loss 0.3476006 +| epoch 10 | 1031/ 2800 batches | train loss 0.3322283 +| epoch 10 | 1035/ 2800 batches | train loss 0.3096337 +| epoch 10 | 1039/ 2800 batches | train loss 0.2970745 +| epoch 10 | 1043/ 2800 batches | train loss 0.2698111 +| epoch 10 | 1047/ 2800 batches | train loss 0.2403833 +| epoch 10 | 1051/ 2800 batches | train loss 0.2751326 +| epoch 10 | 1055/ 2800 batches | train loss 0.2808938 +| epoch 10 | 1059/ 2800 batches | train loss 0.3128483 +| epoch 10 | 1063/ 2800 batches | train loss 0.2175114 +| epoch 10 | 1067/ 2800 batches | train loss 0.3195271 +| epoch 10 | 1071/ 2800 batches | train loss 0.3047226 +| epoch 10 | 1075/ 2800 batches | train loss 0.3331619 +| epoch 10 | 1079/ 2800 batches | train loss 0.2471311 +| epoch 10 | 1083/ 2800 batches | train loss 0.2782311 +| epoch 10 | 1087/ 2800 batches | train loss 0.2673763 +| epoch 10 | 1091/ 2800 batches | train loss 0.2834427 +| epoch 10 | 1095/ 2800 batches | train loss 0.2975471 +| epoch 10 | 1099/ 2800 batches | train loss 0.3113217 +| epoch 10 | 1103/ 2800 batches | train loss 0.2523732 +| epoch 10 | 1107/ 2800 batches | train loss 0.2914304 +| epoch 10 | 1111/ 2800 batches | train loss 0.2277420 +| epoch 10 | 1115/ 2800 batches | train loss 0.3093892 +| epoch 10 | 1119/ 2800 batches | train loss 0.2538071 +| epoch 10 | 1123/ 2800 batches | train loss 0.2917374 +| epoch 10 | 1127/ 2800 batches | train loss 0.2611488 +| epoch 10 | 1131/ 2800 batches | train loss 0.2619084 +| epoch 10 | 1135/ 2800 batches | train loss 0.2697705 +| epoch 10 | 1139/ 2800 batches | train loss 0.2621555 +| epoch 10 | 1143/ 2800 batches | train loss 0.3138369 +| epoch 10 | 1147/ 2800 batches | train loss 0.3185123 +| epoch 10 | 1151/ 2800 batches | train loss 0.2238197 +| epoch 10 | 1155/ 2800 batches | train loss 0.3098912 +| epoch 10 | 1159/ 2800 batches | train loss 0.2518494 +| epoch 10 | 1163/ 2800 batches | train loss 0.3167339 +| epoch 10 | 1167/ 2800 batches | train loss 0.3017150 +| epoch 10 | 1171/ 2800 batches | train loss 0.3277532 +| epoch 10 | 1175/ 2800 batches | train loss 0.2722653 +| epoch 10 | 1179/ 2800 batches | train loss 0.2915224 +| epoch 10 | 1183/ 2800 batches | train loss 0.3029600 +| epoch 10 | 1187/ 2800 batches | train loss 0.2910733 +| epoch 10 | 1191/ 2800 batches | train loss 0.2705761 +| epoch 10 | 1195/ 2800 batches | train loss 0.3404051 +| epoch 10 | 1199/ 2800 batches | train loss 0.2721301 +| epoch 10 | 1203/ 2800 batches | train loss 0.2844170 +| epoch 10 | 1207/ 2800 batches | train loss 0.2920905 +| epoch 10 | 1211/ 2800 batches | train loss 0.2761305 +| epoch 10 | 1215/ 2800 batches | train loss 0.3078531 +| epoch 10 | 1219/ 2800 batches | train loss 0.2683360 +| epoch 10 | 1223/ 2800 batches | train loss 0.2890113 +| epoch 10 | 1227/ 2800 batches | train loss 0.2967713 +| epoch 10 | 1231/ 2800 batches | train loss 0.2503186 +| epoch 10 | 1235/ 2800 batches | train loss 0.3023732 +| epoch 10 | 1239/ 2800 batches | train loss 0.2675691 +| epoch 10 | 1243/ 2800 batches | train loss 0.3093562 +| epoch 10 | 1247/ 2800 batches | train loss 0.2389087 +| epoch 10 | 1251/ 2800 batches | train loss 0.1127802 +| epoch 10 | 1255/ 2800 batches | train loss 0.3304011 +| epoch 10 | 1259/ 2800 batches | train loss 0.3207440 +| epoch 10 | 1263/ 2800 batches | train loss 0.2805686 +| epoch 10 | 1267/ 2800 batches | train loss 0.2721267 +| epoch 10 | 1271/ 2800 batches | train loss 0.3284695 +| epoch 10 | 1275/ 2800 batches | train loss 0.3440557 +| epoch 10 | 1279/ 2800 batches | train loss 0.3105776 +| epoch 10 | 1283/ 2800 batches | train loss 0.2933491 +| epoch 10 | 1287/ 2800 batches | train loss 0.2901450 +| epoch 10 | 1291/ 2800 batches | train loss 0.3708082 +| epoch 10 | 1295/ 2800 batches | train loss 0.2538799 +| epoch 10 | 1299/ 2800 batches | train loss 0.2397461 +| epoch 10 | 1303/ 2800 batches | train loss 0.2472952 +| epoch 10 | 1307/ 2800 batches | train loss 0.3121616 +| epoch 10 | 1311/ 2800 batches | train loss 0.2888330 +| epoch 10 | 1315/ 2800 batches | train loss 0.3182876 +| epoch 10 | 1319/ 2800 batches | train loss 0.2580886 +| epoch 10 | 1323/ 2800 batches | train loss 0.2680020 +| epoch 10 | 1327/ 2800 batches | train loss 0.3094675 +| epoch 10 | 1331/ 2800 batches | train loss 0.3094340 +| epoch 10 | 1335/ 2800 batches | train loss 0.2434778 +| epoch 10 | 1339/ 2800 batches | train loss 0.2742630 +| epoch 10 | 1343/ 2800 batches | train loss 0.2832903 +| epoch 10 | 1347/ 2800 batches | train loss 0.3044542 +| epoch 10 | 1351/ 2800 batches | train loss 0.2972461 +| epoch 10 | 1355/ 2800 batches | train loss 0.2970091 +| epoch 10 | 1359/ 2800 batches | train loss 0.2770052 +| epoch 10 | 1363/ 2800 batches | train loss 0.2904024 +| epoch 10 | 1367/ 2800 batches | train loss 0.3130364 +| epoch 10 | 1371/ 2800 batches | train loss 0.3128068 +| epoch 10 | 1375/ 2800 batches | train loss 0.2501791 +| epoch 10 | 1379/ 2800 batches | train loss 0.2950341 +| epoch 10 | 1383/ 2800 batches | train loss 0.2610647 +| epoch 10 | 1387/ 2800 batches | train loss 0.3168138 +| epoch 10 | 1391/ 2800 batches | train loss 0.2714199 +| epoch 10 | 1395/ 2800 batches | train loss 0.2452184 +| epoch 10 | 1399/ 2800 batches | train loss 0.2894319 +| epoch 10 | 1403/ 2800 batches | train loss 0.3233763 +| epoch 10 | 1407/ 2800 batches | train loss 0.3091510 +| epoch 10 | 1411/ 2800 batches | train loss 0.3098936 +| epoch 10 | 1415/ 2800 batches | train loss 0.2916481 +| epoch 10 | 1419/ 2800 batches | train loss 0.3013068 +| epoch 10 | 1423/ 2800 batches | train loss 0.2960420 +| epoch 10 | 1427/ 2800 batches | train loss 0.2458557 +| epoch 10 | 1431/ 2800 batches | train loss 0.2837605 +| epoch 10 | 1435/ 2800 batches | train loss 0.2996912 +| epoch 10 | 1439/ 2800 batches | train loss 0.2819453 +| epoch 10 | 1443/ 2800 batches | train loss 0.2891172 +| epoch 10 | 1447/ 2800 batches | train loss 0.2723676 +| epoch 10 | 1451/ 2800 batches | train loss 0.3193785 +| epoch 10 | 1455/ 2800 batches | train loss 0.3403729 +| epoch 10 | 1459/ 2800 batches | train loss 0.2578145 +| epoch 10 | 1463/ 2800 batches | train loss 0.3175068 +| epoch 10 | 1467/ 2800 batches | train loss 0.2582119 +| epoch 10 | 1471/ 2800 batches | train loss 0.2808559 +| epoch 10 | 1475/ 2800 batches | train loss 0.3437484 +| epoch 10 | 1479/ 2800 batches | train loss 0.2723942 +| epoch 10 | 1483/ 2800 batches | train loss 0.3567981 +| epoch 10 | 1487/ 2800 batches | train loss 0.2982877 +| epoch 10 | 1491/ 2800 batches | train loss 0.2447352 +| epoch 10 | 1495/ 2800 batches | train loss 0.3173020 +| epoch 10 | 1499/ 2800 batches | train loss 0.2724899 +| epoch 10 | 1503/ 2800 batches | train loss 0.2562053 +| epoch 10 | 1507/ 2800 batches | train loss 0.3022034 +| epoch 10 | 1511/ 2800 batches | train loss 0.2691593 +| epoch 10 | 1515/ 2800 batches | train loss 0.3240669 +| epoch 10 | 1519/ 2800 batches | train loss 0.2986355 +| epoch 10 | 1523/ 2800 batches | train loss 0.3075179 +| epoch 10 | 1527/ 2800 batches | train loss 0.3129807 +| epoch 10 | 1531/ 2800 batches | train loss 0.2968046 +| epoch 10 | 1535/ 2800 batches | train loss 0.2604999 +| epoch 10 | 1539/ 2800 batches | train loss 0.2611660 +| epoch 10 | 1543/ 2800 batches | train loss 0.2684386 +| epoch 10 | 1547/ 2800 batches | train loss 0.3436793 +| epoch 10 | 1551/ 2800 batches | train loss 0.2837272 +| epoch 10 | 1555/ 2800 batches | train loss 0.3057558 +| epoch 10 | 1559/ 2800 batches | train loss 0.2504358 +| epoch 10 | 1563/ 2800 batches | train loss 0.2751479 +| epoch 10 | 1567/ 2800 batches | train loss 0.2532011 +| epoch 10 | 1571/ 2800 batches | train loss 0.2803006 +| epoch 10 | 1575/ 2800 batches | train loss 0.2588608 +| epoch 10 | 1579/ 2800 batches | train loss 0.2664848 +| epoch 10 | 1583/ 2800 batches | train loss 0.2461252 +| epoch 10 | 1587/ 2800 batches | train loss 0.2813251 +| epoch 10 | 1591/ 2800 batches | train loss 0.2972276 +| epoch 10 | 1595/ 2800 batches | train loss 0.2953974 +| epoch 10 | 1599/ 2800 batches | train loss 0.3326205 +| epoch 10 | 1603/ 2800 batches | train loss 0.3119374 +| epoch 10 | 1607/ 2800 batches | train loss 0.3174842 +| epoch 10 | 1611/ 2800 batches | train loss 0.3036302 +| epoch 10 | 1615/ 2800 batches | train loss 0.3191860 +| epoch 10 | 1619/ 2800 batches | train loss 0.2569436 +| epoch 10 | 1623/ 2800 batches | train loss 0.2766265 +| epoch 10 | 1627/ 2800 batches | train loss 0.2510203 +| epoch 10 | 1631/ 2800 batches | train loss 0.2732643 +| epoch 10 | 1635/ 2800 batches | train loss 0.2965744 +| epoch 10 | 1639/ 2800 batches | train loss 0.2505130 +| epoch 10 | 1643/ 2800 batches | train loss 0.2969561 +| epoch 10 | 1647/ 2800 batches | train loss 0.3197318 +| epoch 10 | 1651/ 2800 batches | train loss 0.2752565 +| epoch 10 | 1655/ 2800 batches | train loss 0.2524500 +| epoch 10 | 1659/ 2800 batches | train loss 0.3206193 +| epoch 10 | 1663/ 2800 batches | train loss 0.2680779 +| epoch 10 | 1667/ 2800 batches | train loss 0.2734307 +| epoch 10 | 1671/ 2800 batches | train loss 0.3113307 +| epoch 10 | 1675/ 2800 batches | train loss 0.2953435 +| epoch 10 | 1679/ 2800 batches | train loss 0.1138283 +| epoch 10 | 1683/ 2800 batches | train loss 0.3406232 +| epoch 10 | 1687/ 2800 batches | train loss 0.3245293 +| epoch 10 | 1691/ 2800 batches | train loss 0.3241104 +| epoch 10 | 1695/ 2800 batches | train loss 0.2792984 +| epoch 10 | 1699/ 2800 batches | train loss 0.2801435 +| epoch 10 | 1703/ 2800 batches | train loss 0.3191274 +| epoch 10 | 1707/ 2800 batches | train loss 0.3076583 +| epoch 10 | 1711/ 2800 batches | train loss 0.3103113 +| epoch 10 | 1715/ 2800 batches | train loss 0.3260409 +| epoch 10 | 1719/ 2800 batches | train loss 0.2536442 +| epoch 10 | 1723/ 2800 batches | train loss 0.3524505 +| epoch 10 | 1727/ 2800 batches | train loss 0.3211297 +| epoch 10 | 1731/ 2800 batches | train loss 0.2956470 +| epoch 10 | 1735/ 2800 batches | train loss 0.3186127 +| epoch 10 | 1739/ 2800 batches | train loss 0.2508351 +| epoch 10 | 1743/ 2800 batches | train loss 0.3279923 +| epoch 10 | 1747/ 2800 batches | train loss 0.3113877 +| epoch 10 | 1751/ 2800 batches | train loss 0.2713217 +| epoch 10 | 1755/ 2800 batches | train loss 0.3100232 +| epoch 10 | 1759/ 2800 batches | train loss 0.3129624 +| epoch 10 | 1763/ 2800 batches | train loss 0.2911268 +| epoch 10 | 1767/ 2800 batches | train loss 0.2630638 +| epoch 10 | 1771/ 2800 batches | train loss 0.3097000 +| epoch 10 | 1775/ 2800 batches | train loss 0.3384520 +| epoch 10 | 1779/ 2800 batches | train loss 0.2593633 +| epoch 10 | 1783/ 2800 batches | train loss 0.3011886 +| epoch 10 | 1787/ 2800 batches | train loss 0.3418911 +| epoch 10 | 1791/ 2800 batches | train loss 0.3312325 +| epoch 10 | 1795/ 2800 batches | train loss 0.3371261 +| epoch 10 | 1799/ 2800 batches | train loss 0.3027805 +| epoch 10 | 1803/ 2800 batches | train loss 0.3036225 +| epoch 10 | 1807/ 2800 batches | train loss 0.2969956 +| epoch 10 | 1811/ 2800 batches | train loss 0.3148136 +| epoch 10 | 1815/ 2800 batches | train loss 0.3337114 +| epoch 10 | 1819/ 2800 batches | train loss 0.2645434 +| epoch 10 | 1823/ 2800 batches | train loss 0.2722082 +| epoch 10 | 1827/ 2800 batches | train loss 0.3259318 +| epoch 10 | 1831/ 2800 batches | train loss 0.2740484 +| epoch 10 | 1835/ 2800 batches | train loss 0.3169363 +| epoch 10 | 1839/ 2800 batches | train loss 0.2607061 +| epoch 10 | 1843/ 2800 batches | train loss 0.2509722 +| epoch 10 | 1847/ 2800 batches | train loss 0.3328978 +| epoch 10 | 1851/ 2800 batches | train loss 0.3897833 +| epoch 10 | 1855/ 2800 batches | train loss 0.2648348 +| epoch 10 | 1859/ 2800 batches | train loss 0.3111821 +| epoch 10 | 1863/ 2800 batches | train loss 0.3156462 +| epoch 10 | 1867/ 2800 batches | train loss 0.1984960 +| epoch 10 | 1871/ 2800 batches | train loss 0.2724660 +| epoch 10 | 1875/ 2800 batches | train loss 0.2795105 +| epoch 10 | 1879/ 2800 batches | train loss 0.2539995 +| epoch 10 | 1883/ 2800 batches | train loss 0.2906226 +| epoch 10 | 1887/ 2800 batches | train loss 0.3152232 +| epoch 10 | 1891/ 2800 batches | train loss 0.2916623 +| epoch 10 | 1895/ 2800 batches | train loss 0.2616024 +| epoch 10 | 1899/ 2800 batches | train loss 0.3180469 +| epoch 10 | 1903/ 2800 batches | train loss 0.3431798 +| epoch 10 | 1907/ 2800 batches | train loss 0.2800849 +| epoch 10 | 1911/ 2800 batches | train loss 0.2852792 +| epoch 10 | 1915/ 2800 batches | train loss 0.3112298 +| epoch 10 | 1919/ 2800 batches | train loss 0.2578012 +| epoch 10 | 1923/ 2800 batches | train loss 0.3229362 +| epoch 10 | 1927/ 2800 batches | train loss 0.3027593 +| epoch 10 | 1931/ 2800 batches | train loss 0.3792984 +| epoch 10 | 1935/ 2800 batches | train loss 0.3303411 +| epoch 10 | 1939/ 2800 batches | train loss 0.3203295 +| epoch 10 | 1943/ 2800 batches | train loss 0.2419063 +| epoch 10 | 1947/ 2800 batches | train loss 0.3115318 +| epoch 10 | 1951/ 2800 batches | train loss 0.2781575 +| epoch 10 | 1955/ 2800 batches | train loss 0.2569528 +| epoch 10 | 1959/ 2800 batches | train loss 0.3345669 +| epoch 10 | 1963/ 2800 batches | train loss 0.2839000 +| epoch 10 | 1967/ 2800 batches | train loss 0.2658909 +| epoch 10 | 1971/ 2800 batches | train loss 0.3111906 +| epoch 10 | 1975/ 2800 batches | train loss 0.3072116 +| epoch 10 | 1979/ 2800 batches | train loss 0.2364637 +| epoch 10 | 1983/ 2800 batches | train loss 0.2952773 +| epoch 10 | 1987/ 2800 batches | train loss 0.3301899 +| epoch 10 | 1991/ 2800 batches | train loss 0.2668443 +| epoch 10 | 1995/ 2800 batches | train loss 0.2862811 +| epoch 10 | 1999/ 2800 batches | train loss 0.3164684 +| epoch 10 | 2003/ 2800 batches | train loss 0.3155852 +| epoch 10 | 2007/ 2800 batches | train loss 0.3265872 +| epoch 10 | 2011/ 2800 batches | train loss 0.3206478 +| epoch 10 | 2015/ 2800 batches | train loss 0.2748376 +| epoch 10 | 2019/ 2800 batches | train loss 0.3172233 +| epoch 10 | 2023/ 2800 batches | train loss 0.3573665 +| epoch 10 | 2027/ 2800 batches | train loss 0.3239366 +| epoch 10 | 2031/ 2800 batches | train loss 0.2662220 +| epoch 10 | 2035/ 2800 batches | train loss 0.2528988 +| epoch 10 | 2039/ 2800 batches | train loss 0.2940849 +| epoch 10 | 2043/ 2800 batches | train loss 0.2801529 +| epoch 10 | 2047/ 2800 batches | train loss 0.3105201 +| epoch 10 | 2051/ 2800 batches | train loss 0.3032402 +| epoch 10 | 2055/ 2800 batches | train loss 0.2979382 +| epoch 10 | 2059/ 2800 batches | train loss 0.1190368 +| epoch 10 | 2063/ 2800 batches | train loss 0.2554944 +| epoch 10 | 2067/ 2800 batches | train loss 0.3215449 +| epoch 10 | 2071/ 2800 batches | train loss 0.2949273 +| epoch 10 | 2075/ 2800 batches | train loss 0.2949694 +| epoch 10 | 2079/ 2800 batches | train loss 0.2862405 +| epoch 10 | 2083/ 2800 batches | train loss 0.2912036 +| epoch 10 | 2087/ 2800 batches | train loss 0.3420187 +| epoch 10 | 2091/ 2800 batches | train loss 0.3045433 +| epoch 10 | 2095/ 2800 batches | train loss 0.2519501 +| epoch 10 | 2099/ 2800 batches | train loss 0.2294407 +| epoch 10 | 2103/ 2800 batches | train loss 0.2558145 +| epoch 10 | 2107/ 2800 batches | train loss 0.3015552 +| epoch 10 | 2111/ 2800 batches | train loss 0.3138420 +| epoch 10 | 2115/ 2800 batches | train loss 0.3168329 +| epoch 10 | 2119/ 2800 batches | train loss 0.3122904 +| epoch 10 | 2123/ 2800 batches | train loss 0.3083993 +| epoch 10 | 2127/ 2800 batches | train loss 0.3479671 +| epoch 10 | 2131/ 2800 batches | train loss 0.2981507 +| epoch 10 | 2135/ 2800 batches | train loss 0.3171976 +| epoch 10 | 2139/ 2800 batches | train loss 0.2756248 +| epoch 10 | 2143/ 2800 batches | train loss 0.3271894 +| epoch 10 | 2147/ 2800 batches | train loss 0.2841881 +| epoch 10 | 2151/ 2800 batches | train loss 0.3200687 +| epoch 10 | 2155/ 2800 batches | train loss 0.2751561 +| epoch 10 | 2159/ 2800 batches | train loss 0.3073632 +| epoch 10 | 2163/ 2800 batches | train loss 0.2914033 +| epoch 10 | 2167/ 2800 batches | train loss 0.2891004 +| epoch 10 | 2171/ 2800 batches | train loss 0.3061847 +| epoch 10 | 2175/ 2800 batches | train loss 0.2687992 +| epoch 10 | 2179/ 2800 batches | train loss 0.2840990 +| epoch 10 | 2183/ 2800 batches | train loss 0.3149713 +| epoch 10 | 2187/ 2800 batches | train loss 0.3057712 +| epoch 10 | 2191/ 2800 batches | train loss 0.3113939 +| epoch 10 | 2195/ 2800 batches | train loss 0.2912481 +| epoch 10 | 2199/ 2800 batches | train loss 0.3514864 +| epoch 10 | 2203/ 2800 batches | train loss 0.2913142 +| epoch 10 | 2207/ 2800 batches | train loss 0.2714781 +| epoch 10 | 2211/ 2800 batches | train loss 0.3033707 +| epoch 10 | 2215/ 2800 batches | train loss 0.3170312 +| epoch 10 | 2219/ 2800 batches | train loss 0.3551109 +| epoch 10 | 2223/ 2800 batches | train loss 0.2947959 +| epoch 10 | 2227/ 2800 batches | train loss 0.3032887 +| epoch 10 | 2231/ 2800 batches | train loss 0.3398768 +| epoch 10 | 2235/ 2800 batches | train loss 0.3056216 +| epoch 10 | 2239/ 2800 batches | train loss 0.2513229 +| epoch 10 | 2243/ 2800 batches | train loss 0.3087306 +| epoch 10 | 2247/ 2800 batches | train loss 0.3327056 +| epoch 10 | 2251/ 2800 batches | train loss 0.3078202 +| epoch 10 | 2255/ 2800 batches | train loss 0.3198066 +| epoch 10 | 2259/ 2800 batches | train loss 0.3088380 +| epoch 10 | 2263/ 2800 batches | train loss 0.3547863 +| epoch 10 | 2267/ 2800 batches | train loss 0.3364309 +| epoch 10 | 2271/ 2800 batches | train loss 0.2952367 +| epoch 10 | 2275/ 2800 batches | train loss 0.3107235 +| epoch 10 | 2279/ 2800 batches | train loss 0.2575070 +| epoch 10 | 2283/ 2800 batches | train loss 0.3054277 +| epoch 10 | 2287/ 2800 batches | train loss 0.2772309 +| epoch 10 | 2291/ 2800 batches | train loss 0.2827994 +| epoch 10 | 2295/ 2800 batches | train loss 0.3512810 +| epoch 10 | 2299/ 2800 batches | train loss 0.2782952 +| epoch 10 | 2303/ 2800 batches | train loss 0.2953420 +| epoch 10 | 2307/ 2800 batches | train loss 0.3137994 +| epoch 10 | 2311/ 2800 batches | train loss 0.2803475 +| epoch 10 | 2315/ 2800 batches | train loss 0.2600355 +| epoch 10 | 2319/ 2800 batches | train loss 0.2410653 +| epoch 10 | 2323/ 2800 batches | train loss 0.3274056 +| epoch 10 | 2327/ 2800 batches | train loss 0.2758646 +| epoch 10 | 2331/ 2800 batches | train loss 0.3039910 +| epoch 10 | 2335/ 2800 batches | train loss 0.3000992 +| epoch 10 | 2339/ 2800 batches | train loss 0.2833216 +| epoch 10 | 2343/ 2800 batches | train loss 0.2839495 +| epoch 10 | 2347/ 2800 batches | train loss 0.3244864 +| epoch 10 | 2351/ 2800 batches | train loss 0.2508985 +| epoch 10 | 2355/ 2800 batches | train loss 0.2815743 +| epoch 10 | 2359/ 2800 batches | train loss 0.2890827 +| epoch 10 | 2363/ 2800 batches | train loss 0.3238405 +| epoch 10 | 2367/ 2800 batches | train loss 0.2561396 +| epoch 10 | 2371/ 2800 batches | train loss 0.2436496 +| epoch 10 | 2375/ 2800 batches | train loss 0.3456715 +| epoch 10 | 2379/ 2800 batches | train loss 0.3291739 +| epoch 10 | 2383/ 2800 batches | train loss 0.2844173 +| epoch 10 | 2387/ 2800 batches | train loss 0.3291215 +| epoch 10 | 2391/ 2800 batches | train loss 0.2723505 +| epoch 10 | 2395/ 2800 batches | train loss 0.2788729 +| epoch 10 | 2399/ 2800 batches | train loss 0.2722236 +| epoch 10 | 2403/ 2800 batches | train loss 0.3763774 +| epoch 10 | 2407/ 2800 batches | train loss 0.2895704 +| epoch 10 | 2411/ 2800 batches | train loss 0.3486634 +| epoch 10 | 2415/ 2800 batches | train loss 0.2542497 +| epoch 10 | 2419/ 2800 batches | train loss 0.3030971 +| epoch 10 | 2423/ 2800 batches | train loss 0.3210551 +| epoch 10 | 2427/ 2800 batches | train loss 0.2916179 +| epoch 10 | 2431/ 2800 batches | train loss 0.2691686 +| epoch 10 | 2435/ 2800 batches | train loss 0.2876788 +| epoch 10 | 2439/ 2800 batches | train loss 0.3302628 +| epoch 10 | 2443/ 2800 batches | train loss 0.2840903 +| epoch 10 | 2447/ 2800 batches | train loss 0.3085269 +| epoch 10 | 2451/ 2800 batches | train loss 0.3157645 +| epoch 10 | 2455/ 2800 batches | train loss 0.3070333 +| epoch 10 | 2459/ 2800 batches | train loss 0.3131449 +| epoch 10 | 2463/ 2800 batches | train loss 0.3161151 +| epoch 10 | 2467/ 2800 batches | train loss 0.2769089 +| epoch 10 | 2471/ 2800 batches | train loss 0.3184898 +| epoch 10 | 2475/ 2800 batches | train loss 0.1903206 +| epoch 10 | 2479/ 2800 batches | train loss 0.3240823 +| epoch 10 | 2483/ 2800 batches | train loss 0.3104755 +| epoch 10 | 2487/ 2800 batches | train loss 0.2981274 +| epoch 10 | 2491/ 2800 batches | train loss 0.3215239 +| epoch 10 | 2495/ 2800 batches | train loss 0.2718752 +| epoch 10 | 2499/ 2800 batches | train loss 0.2973396 +| epoch 10 | 2503/ 2800 batches | train loss 0.3120751 +| epoch 10 | 2507/ 2800 batches | train loss 0.2527767 +| epoch 10 | 2511/ 2800 batches | train loss 0.2787289 +| epoch 10 | 2515/ 2800 batches | train loss 0.2500692 +| epoch 10 | 2519/ 2800 batches | train loss 0.2582002 +| epoch 10 | 2523/ 2800 batches | train loss 0.3259757 +| epoch 10 | 2527/ 2800 batches | train loss 0.3125084 +| epoch 10 | 2531/ 2800 batches | train loss 0.2968684 +| epoch 10 | 2535/ 2800 batches | train loss 0.2561847 +| epoch 10 | 2539/ 2800 batches | train loss 0.3140164 +| epoch 10 | 2543/ 2800 batches | train loss 0.3145362 +| epoch 10 | 2547/ 2800 batches | train loss 0.2759919 +| epoch 10 | 2551/ 2800 batches | train loss 0.2848623 +| epoch 10 | 2555/ 2800 batches | train loss 0.2564158 +| epoch 10 | 2559/ 2800 batches | train loss 0.3005636 +| epoch 10 | 2563/ 2800 batches | train loss 0.2869365 +| epoch 10 | 2567/ 2800 batches | train loss 0.3017029 +| epoch 10 | 2571/ 2800 batches | train loss 0.3081698 +| epoch 10 | 2575/ 2800 batches | train loss 0.3153285 +| epoch 10 | 2579/ 2800 batches | train loss 0.2740248 +| epoch 10 | 2583/ 2800 batches | train loss 0.3062275 +| epoch 10 | 2587/ 2800 batches | train loss 0.2593669 +| epoch 10 | 2591/ 2800 batches | train loss 0.3310436 +| epoch 10 | 2595/ 2800 batches | train loss 0.3312292 +| epoch 10 | 2599/ 2800 batches | train loss 0.2866895 +| epoch 10 | 2603/ 2800 batches | train loss 0.2844597 +| epoch 10 | 2607/ 2800 batches | train loss 0.2985013 +| epoch 10 | 2611/ 2800 batches | train loss 0.2850806 +| epoch 10 | 2615/ 2800 batches | train loss 0.3518824 +| epoch 10 | 2619/ 2800 batches | train loss 0.3135576 +| epoch 10 | 2623/ 2800 batches | train loss 0.3583370 +| epoch 10 | 2627/ 2800 batches | train loss 0.2771090 +| epoch 10 | 2631/ 2800 batches | train loss 0.2879975 +| epoch 10 | 2635/ 2800 batches | train loss 0.3159901 +| epoch 10 | 2639/ 2800 batches | train loss 0.3126967 +| epoch 10 | 2643/ 2800 batches | train loss 0.3309334 +| epoch 10 | 2647/ 2800 batches | train loss 0.3061485 +| epoch 10 | 2651/ 2800 batches | train loss 0.3060627 +| epoch 10 | 2655/ 2800 batches | train loss 0.2786766 +| epoch 10 | 2659/ 2800 batches | train loss 0.3260692 +| epoch 10 | 2663/ 2800 batches | train loss 0.3149448 +| epoch 10 | 2667/ 2800 batches | train loss 0.1869683 +| epoch 10 | 2671/ 2800 batches | train loss 0.3403881 +| epoch 10 | 2675/ 2800 batches | train loss 0.2650672 +| epoch 10 | 2679/ 2800 batches | train loss 0.3221778 +| epoch 10 | 2683/ 2800 batches | train loss 0.2605942 +| epoch 10 | 2687/ 2800 batches | train loss 0.2886504 +| epoch 10 | 2691/ 2800 batches | train loss 0.3051441 +| epoch 10 | 2695/ 2800 batches | train loss 0.3102413 +| epoch 10 | 2699/ 2800 batches | train loss 0.2668332 +| epoch 10 | 2703/ 2800 batches | train loss 0.3804442 +| epoch 10 | 2707/ 2800 batches | train loss 0.3029413 +| epoch 10 | 2711/ 2800 batches | train loss 0.3265842 +| epoch 10 | 2715/ 2800 batches | train loss 0.3637712 +| epoch 10 | 2719/ 2800 batches | train loss 0.2889486 +| epoch 10 | 2723/ 2800 batches | train loss 0.3185973 +| epoch 10 | 2727/ 2800 batches | train loss 0.3187689 +| epoch 10 | 2731/ 2800 batches | train loss 0.3434786 +| epoch 10 | 2735/ 2800 batches | train loss 0.2532389 +| epoch 10 | 2739/ 2800 batches | train loss 0.3145443 +| epoch 10 | 2743/ 2800 batches | train loss 0.2386336 +| epoch 10 | 2747/ 2800 batches | train loss 0.2832433 +| epoch 10 | 2751/ 2800 batches | train loss 0.3105043 +| epoch 10 | 2755/ 2800 batches | train loss 0.2714906 +| epoch 10 | 2759/ 2800 batches | train loss 0.2939847 +| epoch 10 | 2763/ 2800 batches | train loss 0.2537581 +| epoch 10 | 2767/ 2800 batches | train loss 0.2828511 +| epoch 10 | 2771/ 2800 batches | train loss 0.3189869 +| epoch 10 | 2775/ 2800 batches | train loss 0.2855949 +| epoch 10 | 2779/ 2800 batches | train loss 0.2827351 +| epoch 10 | 2783/ 2800 batches | train loss 0.2971925 +| epoch 10 | 2787/ 2800 batches | train loss 0.2949694 +| epoch 10 | 2791/ 2800 batches | train loss 0.3137600 +| epoch 10 | 2795/ 2800 batches | train loss 0.3064043 +| epoch 10 | 2799/ 2800 batches | train loss 0.2337307 +-------------------------------------------------------------------------------- +| epoch 10 | 3/ 2800 batches | test loss 0.7848230 +| epoch 10 | 7/ 2800 batches | test loss 0.8438213 +| epoch 10 | 11/ 2800 batches | test loss 0.5364562 +| epoch 10 | 15/ 2800 batches | test loss 0.6201767 +| epoch 10 | 19/ 2800 batches | test loss 0.4458298 +| epoch 10 | 23/ 2800 batches | test loss 0.4061688 +| epoch 10 | 27/ 2800 batches | test loss 0.7922405 +| epoch 10 | 31/ 2800 batches | test loss 0.9030291 +| epoch 10 | 35/ 2800 batches | test loss 0.5696181 +| epoch 10 | 39/ 2800 batches | test loss 0.4975663 +| epoch 10 | 43/ 2800 batches | test loss 0.7657561 +| epoch 10 | 47/ 2800 batches | test loss 0.4705218 +| epoch 10 | 51/ 2800 batches | test loss 0.8057936 +| epoch 10 | 55/ 2800 batches | test loss 0.5736546 +| epoch 10 | 59/ 2800 batches | test loss 0.4991413 +| epoch 10 | 63/ 2800 batches | test loss 0.4081082 +| epoch 10 | 67/ 2800 batches | test loss 0.6746889 +| epoch 10 | 71/ 2800 batches | test loss 0.8825628 +| epoch 10 | 75/ 2800 batches | test loss 0.5367418 +| epoch 10 | 79/ 2800 batches | test loss 0.6334084 +| epoch 10 | 83/ 2800 batches | test loss 0.7399070 +| epoch 10 | 87/ 2800 batches | test loss 0.4200848 +| epoch 10 | 91/ 2800 batches | test loss 0.5104741 +| epoch 10 | 95/ 2800 batches | test loss 0.5405000 +| epoch 10 | 99/ 2800 batches | test loss 0.5950724 +| epoch 10 | 103/ 2800 batches | test loss 0.7690908 +| epoch 10 | 107/ 2800 batches | test loss 0.5159650 +| epoch 10 | 111/ 2800 batches | test loss 0.4157373 +| epoch 10 | 115/ 2800 batches | test loss 0.5219396 +| epoch 10 | 119/ 2800 batches | test loss 1.1111128 +| epoch 10 | 123/ 2800 batches | test loss 0.4772600 +| epoch 10 | 127/ 2800 batches | test loss 0.4184983 +| epoch 10 | 131/ 2800 batches | test loss 0.7644535 +| epoch 10 | 135/ 2800 batches | test loss 0.4365654 +| epoch 10 | 139/ 2800 batches | test loss 0.6160207 +| epoch 10 | 143/ 2800 batches | test loss 0.6905756 +| epoch 10 | 147/ 2800 batches | test loss 0.6597078 +| epoch 10 | 151/ 2800 batches | test loss 0.9495423 +| epoch 10 | 155/ 2800 batches | test loss 0.6570095 +| epoch 10 | 159/ 2800 batches | test loss 0.5378278 +| epoch 10 | 163/ 2800 batches | test loss 0.7028770 +| epoch 10 | 167/ 2800 batches | test loss 0.5180213 +| epoch 10 | 171/ 2800 batches | test loss 0.7545133 +| epoch 10 | 175/ 2800 batches | test loss 0.5368174 +| epoch 10 | 179/ 2800 batches | test loss 0.6385447 +| epoch 10 | 183/ 2800 batches | test loss 0.4510094 +| epoch 10 | 187/ 2800 batches | test loss 0.6576228 +| epoch 10 | 191/ 2800 batches | test loss 0.5820192 +| epoch 10 | 195/ 2800 batches | test loss 0.7241269 +| epoch 10 | 199/ 2800 batches | test loss 0.5062503 +| epoch 10 | 203/ 2800 batches | test loss 0.5597834 +| epoch 10 | 207/ 2800 batches | test loss 0.6133246 +| epoch 10 | 211/ 2800 batches | test loss 0.6612653 +| epoch 10 | 215/ 2800 batches | test loss 0.5895460 +| epoch 10 | 219/ 2800 batches | test loss 0.5013292 +| epoch 10 | 223/ 2800 batches | test loss 0.7999366 +| epoch 10 | 227/ 2800 batches | test loss 0.5561142 +| epoch 10 | 231/ 2800 batches | test loss 0.5283781 +| epoch 10 | 235/ 2800 batches | test loss 1.0312953 +| epoch 10 | 239/ 2800 batches | test loss 0.4435361 +| epoch 10 | 243/ 2800 batches | test loss 0.4714187 +| epoch 10 | 247/ 2800 batches | test loss 0.7823470 +| epoch 10 | 251/ 2800 batches | test loss 0.5175272 +| epoch 10 | 255/ 2800 batches | test loss 0.8996737 +| epoch 10 | 259/ 2800 batches | test loss 0.4526417 +| epoch 10 | 263/ 2800 batches | test loss 0.5491581 +| epoch 10 | 267/ 2800 batches | test loss 0.5128369 +| epoch 10 | 271/ 2800 batches | test loss 0.8859183 +| epoch 10 | 275/ 2800 batches | test loss 0.6007297 +| epoch 10 | 279/ 2800 batches | test loss 0.6856833 +| epoch 10 | 283/ 2800 batches | test loss 0.5357201 +| epoch 10 | 287/ 2800 batches | test loss 0.7261552 +| epoch 10 | 291/ 2800 batches | test loss 0.4458182 +| epoch 10 | 295/ 2800 batches | test loss 0.4216187 +| epoch 10 | 299/ 2800 batches | test loss 0.6078417 +| epoch 10 | 303/ 2800 batches | test loss 0.5981948 +| epoch 10 | 307/ 2800 batches | test loss 0.4110689 +| epoch 10 | 311/ 2800 batches | test loss 0.4693054 +| epoch 10 | 315/ 2800 batches | test loss 0.6608272 +| epoch 10 | 319/ 2800 batches | test loss 0.4169740 +| epoch 10 | 323/ 2800 batches | test loss 0.5897564 +| epoch 10 | 327/ 2800 batches | test loss 0.6691872 +| epoch 10 | 331/ 2800 batches | test loss 0.5199742 +| epoch 10 | 335/ 2800 batches | test loss 0.4670727 +| epoch 10 | 339/ 2800 batches | test loss 0.5653321 +| epoch 10 | 343/ 2800 batches | test loss 0.8983682 +| epoch 10 | 347/ 2800 batches | test loss 0.6564670 +| epoch 10 | 351/ 2800 batches | test loss 0.6869086 +| epoch 10 | 355/ 2800 batches | test loss 0.7831565 +| epoch 10 | 359/ 2800 batches | test loss 0.4794977 +| epoch 10 | 363/ 2800 batches | test loss 0.4609131 +| epoch 10 | 367/ 2800 batches | test loss 0.4047937 +| epoch 10 | 371/ 2800 batches | test loss 0.6977445 +| epoch 10 | 375/ 2800 batches | test loss 0.5812454 +| epoch 10 | 379/ 2800 batches | test loss 0.6472186 +| epoch 10 | 383/ 2800 batches | test loss 0.7912677 +| epoch 10 | 387/ 2800 batches | test loss 0.5335256 +| epoch 10 | 391/ 2800 batches | test loss 0.5491673 +| epoch 10 | 395/ 2800 batches | test loss 0.3399187 +| epoch 10 | 399/ 2800 batches | test loss 0.8348643 +| epoch 10 | 403/ 2800 batches | test loss 0.6775533 +| epoch 10 | 407/ 2800 batches | test loss 0.5400509 +| epoch 10 | 411/ 2800 batches | test loss 0.6726384 +| epoch 10 | 415/ 2800 batches | test loss 0.7435468 +| epoch 10 | 419/ 2800 batches | test loss 0.5649228 +| epoch 10 | 423/ 2800 batches | test loss 0.7227160 +| epoch 10 | 427/ 2800 batches | test loss 0.5216274 +| epoch 10 | 431/ 2800 batches | test loss 0.9544329 +| epoch 10 | 435/ 2800 batches | test loss 0.4020764 +| epoch 10 | 439/ 2800 batches | test loss 0.5730275 +| epoch 10 | 443/ 2800 batches | test loss 0.6588098 +| epoch 10 | 447/ 2800 batches | test loss 0.6246970 +| epoch 10 | 451/ 2800 batches | test loss 0.4411240 +| epoch 10 | 455/ 2800 batches | test loss 0.5145816 +| epoch 10 | 459/ 2800 batches | test loss 0.7930493 +| epoch 10 | 463/ 2800 batches | test loss 1.1050490 +| epoch 10 | 467/ 2800 batches | test loss 0.5469399 +| epoch 10 | 471/ 2800 batches | test loss 0.5370989 +| epoch 10 | 475/ 2800 batches | test loss 0.6627459 +| epoch 10 | 479/ 2800 batches | test loss 0.6880459 +| epoch 10 | 483/ 2800 batches | test loss 0.7376671 +| epoch 10 | 487/ 2800 batches | test loss 0.5315414 +| epoch 10 | 491/ 2800 batches | test loss 0.8062339 +| epoch 10 | 495/ 2800 batches | test loss 0.5358605 +| epoch 10 | 499/ 2800 batches | test loss 0.7805253 +| epoch 10 | 503/ 2800 batches | test loss 0.4819397 +| epoch 10 | 507/ 2800 batches | test loss 0.4973963 +| epoch 10 | 511/ 2800 batches | test loss 1.0342108 +| epoch 10 | 515/ 2800 batches | test loss 0.5301802 +| epoch 10 | 519/ 2800 batches | test loss 0.8042853 +| epoch 10 | 523/ 2800 batches | test loss 0.5907403 +| epoch 10 | 527/ 2800 batches | test loss 0.7604453 +| epoch 10 | 531/ 2800 batches | test loss 0.7010232 +| epoch 10 | 535/ 2800 batches | test loss 0.5775387 +| epoch 10 | 539/ 2800 batches | test loss 0.6387542 +| epoch 10 | 543/ 2800 batches | test loss 0.7459205 +| epoch 10 | 547/ 2800 batches | test loss 0.5682229 +| epoch 10 | 551/ 2800 batches | test loss 0.6112845 +| epoch 10 | 555/ 2800 batches | test loss 0.5218268 +| epoch 10 | 559/ 2800 batches | test loss 0.4398752 +| epoch 10 | 563/ 2800 batches | test loss 0.5789756 +| epoch 10 | 567/ 2800 batches | test loss 0.5291409 +| epoch 10 | 571/ 2800 batches | test loss 0.5642874 +| epoch 10 | 575/ 2800 batches | test loss 0.6562037 +| epoch 10 | 579/ 2800 batches | test loss 0.6138113 +| epoch 10 | 583/ 2800 batches | test loss 0.4291363 +| epoch 10 | 587/ 2800 batches | test loss 0.6289151 +| epoch 10 | 591/ 2800 batches | test loss 0.5510758 +| epoch 10 | 595/ 2800 batches | test loss 0.6622139 +| epoch 10 | 599/ 2800 batches | test loss 0.6688059 +| epoch 10 | 603/ 2800 batches | test loss 0.5906464 +| epoch 10 | 607/ 2800 batches | test loss 0.5289934 +| epoch 10 | 611/ 2800 batches | test loss 0.4464752 +| epoch 10 | 615/ 2800 batches | test loss 0.5060351 +| epoch 10 | 619/ 2800 batches | test loss 0.6619603 +| epoch 10 | 623/ 2800 batches | test loss 0.7613887 +| epoch 10 | 627/ 2800 batches | test loss 0.4451824 +| epoch 10 | 631/ 2800 batches | test loss 0.7163399 +| epoch 10 | 635/ 2800 batches | test loss 0.4471694 +| epoch 10 | 639/ 2800 batches | test loss 0.9240367 +| epoch 10 | 643/ 2800 batches | test loss 0.7106956 +| epoch 10 | 647/ 2800 batches | test loss 0.4818737 +| epoch 10 | 651/ 2800 batches | test loss 0.9722596 +| epoch 10 | 655/ 2800 batches | test loss 0.5319129 +| epoch 10 | 659/ 2800 batches | test loss 0.3860292 +| epoch 10 | 663/ 2800 batches | test loss 0.6103733 +| epoch 10 | 667/ 2800 batches | test loss 0.6146235 +| epoch 10 | 671/ 2800 batches | test loss 0.5657567 +| epoch 10 | 675/ 2800 batches | test loss 0.6050847 +| epoch 10 | 679/ 2800 batches | test loss 0.5486230 +| epoch 10 | 683/ 2800 batches | test loss 0.4062689 +| epoch 10 | 687/ 2800 batches | test loss 0.5351673 +| epoch 10 | 691/ 2800 batches | test loss 0.8957242 +| epoch 10 | 695/ 2800 batches | test loss 0.4219657 +| epoch 10 | 699/ 2800 batches | test loss 0.6612644 +| epoch 10 | final test loss 0.6083, do not save model! diff --git a/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_4000/partial_model_weights.pth b/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_4000/partial_model_weights.pth new file mode 100644 index 0000000000000000000000000000000000000000..321c69a2e0da01ae2add84647c6ca2daa980b58f --- /dev/null +++ b/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_4000/partial_model_weights.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9653bb2737dacaff5ad743be55f8f68ef2cda481c11b7a2555407d52656c32f0 +size 1975288322 diff --git a/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_4000/train_config.json b/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_4000/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1c0cc7dc15e04f1c040e1660f159b5b09db64be8 --- /dev/null +++ b/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_4000/train_config.json @@ -0,0 +1,29 @@ +{ + "stage": "stage2", + "lr": 3e-05, + "epochs": 10, + "log_interval": 4, + "gradient_clip": 1.0, + "tr_batch_size": 4, + "te_batch_size": 4, + "gradient_accumulation_steps": 1, + "update_params": [ + "all" + ], + "corpus": "math_derivation", + "num_of_sents": [ + 12, + 12 + ], + "encoder": "bert-base-cased", + "repeat": 1, + "max_num_each_cat": 4000, + "fb_mode": 0.0, + "set_loss_mask": false, + "use_label_dec": true, + "use_label_enc": false, + "decoder": "Qwen/Qwen2.5-0.5B", + "pretrained_path": null, + "device": "cuda", + "save_dir": "checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_4000" +} \ No newline at end of file diff --git a/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_4000/train_log.log b/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_4000/train_log.log new file mode 100644 index 0000000000000000000000000000000000000000..2b74ac43321852a3bb9fb2b65fc8905657759f70 --- /dev/null +++ b/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_4000/train_log.log @@ -0,0 +1,17535 @@ +* training corpus: math_derivation +* total num: 28000 +* epochs: 10 +* batch size: 4 +* gradient_accumulation_steps: 1 +-------------------------------------------------------------------------------- +| epoch 1 | 3/ 5600 batches | train loss 3.3133447 +| epoch 1 | 7/ 5600 batches | train loss 2.0758586 +| epoch 1 | 11/ 5600 batches | train loss 2.1315722 +| epoch 1 | 15/ 5600 batches | train loss 2.1108468 +| epoch 1 | 19/ 5600 batches | train loss 1.7367579 +| epoch 1 | 23/ 5600 batches | train loss 1.0708959 +| epoch 1 | 27/ 5600 batches | train loss 0.8775748 +| epoch 1 | 31/ 5600 batches | train loss 0.8153957 +| epoch 1 | 35/ 5600 batches | train loss 0.9221532 +| epoch 1 | 39/ 5600 batches | train loss 0.7726994 +| epoch 1 | 43/ 5600 batches | train loss 0.7099791 +| epoch 1 | 47/ 5600 batches | train loss 1.2659805 +| epoch 1 | 51/ 5600 batches | train loss 0.7885312 +| epoch 1 | 55/ 5600 batches | train loss 0.6651708 +| epoch 1 | 59/ 5600 batches | train loss 0.4952407 +| epoch 1 | 63/ 5600 batches | train loss 0.5369329 +| epoch 1 | 67/ 5600 batches | train loss 0.5986235 +| epoch 1 | 71/ 5600 batches | train loss 0.8185696 +| epoch 1 | 75/ 5600 batches | train loss 0.7255831 +| epoch 1 | 79/ 5600 batches | train loss 0.5928910 +| epoch 1 | 83/ 5600 batches | train loss 0.8410126 +| epoch 1 | 87/ 5600 batches | train loss 0.6867917 +| epoch 1 | 91/ 5600 batches | train loss 0.6213816 +| epoch 1 | 95/ 5600 batches | train loss 0.6910181 +| epoch 1 | 99/ 5600 batches | train loss 0.7539612 +| epoch 1 | 103/ 5600 batches | train loss 0.6838274 +| epoch 1 | 107/ 5600 batches | train loss 0.6516335 +| epoch 1 | 111/ 5600 batches | train loss 0.4935870 +| epoch 1 | 115/ 5600 batches | train loss 0.5630779 +| epoch 1 | 119/ 5600 batches | train loss 0.5639338 +| epoch 1 | 123/ 5600 batches | train loss 0.6693232 +| epoch 1 | 127/ 5600 batches | train loss 0.7935075 +| epoch 1 | 131/ 5600 batches | train loss 0.5250092 +| epoch 1 | 135/ 5600 batches | train loss 0.5945840 +| epoch 1 | 139/ 5600 batches | train loss 0.5564717 +| epoch 1 | 143/ 5600 batches | train loss 0.5823095 +| epoch 1 | 147/ 5600 batches | train loss 0.4666017 +| epoch 1 | 151/ 5600 batches | train loss 0.4731266 +| epoch 1 | 155/ 5600 batches | train loss 0.5904341 +| epoch 1 | 159/ 5600 batches | train loss 0.5813676 +| epoch 1 | 163/ 5600 batches | train loss 0.4952699 +| epoch 1 | 167/ 5600 batches | train loss 0.5804496 +| epoch 1 | 171/ 5600 batches | train loss 0.6173522 +| epoch 1 | 175/ 5600 batches | train loss 0.6933423 +| epoch 1 | 179/ 5600 batches | train loss 0.5579094 +| epoch 1 | 183/ 5600 batches | train loss 0.4912142 +| epoch 1 | 187/ 5600 batches | train loss 0.5250177 +| epoch 1 | 191/ 5600 batches | train loss 0.5496048 +| epoch 1 | 195/ 5600 batches | train loss 0.5190498 +| epoch 1 | 199/ 5600 batches | train loss 0.5113035 +| epoch 1 | 203/ 5600 batches | train loss 0.7020054 +| epoch 1 | 207/ 5600 batches | train loss 0.5127919 +| epoch 1 | 211/ 5600 batches | train loss 0.4862161 +| epoch 1 | 215/ 5600 batches | train loss 0.6613528 +| epoch 1 | 219/ 5600 batches | train loss 0.6051615 +| epoch 1 | 223/ 5600 batches | train loss 0.6903039 +| epoch 1 | 227/ 5600 batches | train loss 0.4991980 +| epoch 1 | 231/ 5600 batches | train loss 0.5961658 +| epoch 1 | 235/ 5600 batches | train loss 0.6172767 +| epoch 1 | 239/ 5600 batches | train loss 0.5011738 +| epoch 1 | 243/ 5600 batches | train loss 0.4304037 +| epoch 1 | 247/ 5600 batches | train loss 0.5828505 +| epoch 1 | 251/ 5600 batches | train loss 0.4784932 +| epoch 1 | 255/ 5600 batches | train loss 0.5943760 +| epoch 1 | 259/ 5600 batches | train loss 0.5359232 +| epoch 1 | 263/ 5600 batches | train loss 0.5878261 +| epoch 1 | 267/ 5600 batches | train loss 0.4436319 +| epoch 1 | 271/ 5600 batches | train loss 0.5816644 +| epoch 1 | 275/ 5600 batches | train loss 0.5322912 +| epoch 1 | 279/ 5600 batches | train loss 0.6315144 +| epoch 1 | 283/ 5600 batches | train loss 0.4610661 +| epoch 1 | 287/ 5600 batches | train loss 0.5946109 +| epoch 1 | 291/ 5600 batches | train loss 0.6709569 +| epoch 1 | 295/ 5600 batches | train loss 0.5235999 +| epoch 1 | 299/ 5600 batches | train loss 0.4893663 +| epoch 1 | 303/ 5600 batches | train loss 0.7048007 +| epoch 1 | 307/ 5600 batches | train loss 0.6085953 +| epoch 1 | 311/ 5600 batches | train loss 0.4287734 +| epoch 1 | 315/ 5600 batches | train loss 0.5632508 +| epoch 1 | 319/ 5600 batches | train loss 0.6583062 +| epoch 1 | 323/ 5600 batches | train loss 0.4556877 +| epoch 1 | 327/ 5600 batches | train loss 0.6923782 +| epoch 1 | 331/ 5600 batches | train loss 0.6992607 +| epoch 1 | 335/ 5600 batches | train loss 0.6734372 +| epoch 1 | 339/ 5600 batches | train loss 0.5930737 +| epoch 1 | 343/ 5600 batches | train loss 0.6609282 +| epoch 1 | 347/ 5600 batches | train loss 0.5551747 +| epoch 1 | 351/ 5600 batches | train loss 0.6128871 +| epoch 1 | 355/ 5600 batches | train loss 0.5202906 +| epoch 1 | 359/ 5600 batches | train loss 0.4530511 +| epoch 1 | 363/ 5600 batches | train loss 0.4494618 +| epoch 1 | 367/ 5600 batches | train loss 0.6358787 +| epoch 1 | 371/ 5600 batches | train loss 0.4419897 +| epoch 1 | 375/ 5600 batches | train loss 0.5435529 +| epoch 1 | 379/ 5600 batches | train loss 0.4863274 +| epoch 1 | 383/ 5600 batches | train loss 0.5477592 +| epoch 1 | 387/ 5600 batches | train loss 0.5687806 +| epoch 1 | 391/ 5600 batches | train loss 0.6244823 +| epoch 1 | 395/ 5600 batches | train loss 0.5138503 +| epoch 1 | 399/ 5600 batches | train loss 0.4943646 +| epoch 1 | 403/ 5600 batches | train loss 0.7652339 +| epoch 1 | 407/ 5600 batches | train loss 0.5973601 +| epoch 1 | 411/ 5600 batches | train loss 0.5930209 +| epoch 1 | 415/ 5600 batches | train loss 0.5866001 +| epoch 1 | 419/ 5600 batches | train loss 0.5045822 +| epoch 1 | 423/ 5600 batches | train loss 0.6480458 +| epoch 1 | 427/ 5600 batches | train loss 0.4461710 +| epoch 1 | 431/ 5600 batches | train loss 0.4923908 +| epoch 1 | 435/ 5600 batches | train loss 0.5079139 +| epoch 1 | 439/ 5600 batches | train loss 0.4061791 +| epoch 1 | 443/ 5600 batches | train loss 0.6046994 +| epoch 1 | 447/ 5600 batches | train loss 0.4708787 +| epoch 1 | 451/ 5600 batches | train loss 0.6118962 +| epoch 1 | 455/ 5600 batches | train loss 0.4579494 +| epoch 1 | 459/ 5600 batches | train loss 0.5512557 +| epoch 1 | 463/ 5600 batches | train loss 0.6129863 +| epoch 1 | 467/ 5600 batches | train loss 0.5202197 +| epoch 1 | 471/ 5600 batches | train loss 0.5443246 +| epoch 1 | 475/ 5600 batches | train loss 0.5400996 +| epoch 1 | 479/ 5600 batches | train loss 0.4674771 +| epoch 1 | 483/ 5600 batches | train loss 0.5928821 +| epoch 1 | 487/ 5600 batches | train loss 0.5277351 +| epoch 1 | 491/ 5600 batches | train loss 0.5524150 +| epoch 1 | 495/ 5600 batches | train loss 0.6934844 +| epoch 1 | 499/ 5600 batches | train loss 0.5439299 +| epoch 1 | 503/ 5600 batches | train loss 0.4319586 +| epoch 1 | 507/ 5600 batches | train loss 0.4943466 +| epoch 1 | 511/ 5600 batches | train loss 0.5202398 +| epoch 1 | 515/ 5600 batches | train loss 0.6673074 +| epoch 1 | 519/ 5600 batches | train loss 0.4221221 +| epoch 1 | 523/ 5600 batches | train loss 0.5141737 +| epoch 1 | 527/ 5600 batches | train loss 0.5512957 +| epoch 1 | 531/ 5600 batches | train loss 0.5232006 +| epoch 1 | 535/ 5600 batches | train loss 0.5398598 +| epoch 1 | 539/ 5600 batches | train loss 0.4596859 +| epoch 1 | 543/ 5600 batches | train loss 0.4910014 +| epoch 1 | 547/ 5600 batches | train loss 0.4313131 +| epoch 1 | 551/ 5600 batches | train loss 0.5778501 +| epoch 1 | 555/ 5600 batches | train loss 0.5322535 +| epoch 1 | 559/ 5600 batches | train loss 0.4929832 +| epoch 1 | 563/ 5600 batches | train loss 0.5272602 +| epoch 1 | 567/ 5600 batches | train loss 0.5918156 +| epoch 1 | 571/ 5600 batches | train loss 0.5883304 +| epoch 1 | 575/ 5600 batches | train loss 0.3925229 +| epoch 1 | 579/ 5600 batches | train loss 0.4686307 +| epoch 1 | 583/ 5600 batches | train loss 0.4924507 +| epoch 1 | 587/ 5600 batches | train loss 0.5128553 +| epoch 1 | 591/ 5600 batches | train loss 0.5381929 +| epoch 1 | 595/ 5600 batches | train loss 0.4814757 +| epoch 1 | 599/ 5600 batches | train loss 0.4629661 +| epoch 1 | 603/ 5600 batches | train loss 0.6008903 +| epoch 1 | 607/ 5600 batches | train loss 0.5139576 +| epoch 1 | 611/ 5600 batches | train loss 0.5733581 +| epoch 1 | 615/ 5600 batches | train loss 0.5350846 +| epoch 1 | 619/ 5600 batches | train loss 0.6265988 +| epoch 1 | 623/ 5600 batches | train loss 0.4890959 +| epoch 1 | 627/ 5600 batches | train loss 0.4873736 +| epoch 1 | 631/ 5600 batches | train loss 0.5445471 +| epoch 1 | 635/ 5600 batches | train loss 0.4173234 +| epoch 1 | 639/ 5600 batches | train loss 0.4220459 +| epoch 1 | 643/ 5600 batches | train loss 0.4692485 +| epoch 1 | 647/ 5600 batches | train loss 0.6381805 +| epoch 1 | 651/ 5600 batches | train loss 0.4738812 +| epoch 1 | 655/ 5600 batches | train loss 0.4718007 +| epoch 1 | 659/ 5600 batches | train loss 0.3970064 +| epoch 1 | 663/ 5600 batches | train loss 0.6215916 +| epoch 1 | 667/ 5600 batches | train loss 0.5014836 +| epoch 1 | 671/ 5600 batches | train loss 0.5586456 +| epoch 1 | 675/ 5600 batches | train loss 0.5086534 +| epoch 1 | 679/ 5600 batches | train loss 0.4115275 +| epoch 1 | 683/ 5600 batches | train loss 0.5085486 +| epoch 1 | 687/ 5600 batches | train loss 0.5108876 +| epoch 1 | 691/ 5600 batches | train loss 0.5557371 +| epoch 1 | 695/ 5600 batches | train loss 0.4990545 +| epoch 1 | 699/ 5600 batches | train loss 0.4173401 +| epoch 1 | 703/ 5600 batches | train loss 0.5847116 +| epoch 1 | 707/ 5600 batches | train loss 0.5054849 +| epoch 1 | 711/ 5600 batches | train loss 0.5076065 +| epoch 1 | 715/ 5600 batches | train loss 0.3862225 +| epoch 1 | 719/ 5600 batches | train loss 0.4808805 +| epoch 1 | 723/ 5600 batches | train loss 0.4273820 +| epoch 1 | 727/ 5600 batches | train loss 0.5952323 +| epoch 1 | 731/ 5600 batches | train loss 0.4532765 +| epoch 1 | 735/ 5600 batches | train loss 0.5488924 +| epoch 1 | 739/ 5600 batches | train loss 0.4732034 +| epoch 1 | 743/ 5600 batches | train loss 0.4731188 +| epoch 1 | 747/ 5600 batches | train loss 0.4282040 +| epoch 1 | 751/ 5600 batches | train loss 0.5778198 +| epoch 1 | 755/ 5600 batches | train loss 0.6211195 +| epoch 1 | 759/ 5600 batches | train loss 0.4935620 +| epoch 1 | 763/ 5600 batches | train loss 0.4375107 +| epoch 1 | 767/ 5600 batches | train loss 0.5209572 +| epoch 1 | 771/ 5600 batches | train loss 0.4460109 +| epoch 1 | 775/ 5600 batches | train loss 0.5704132 +| epoch 1 | 779/ 5600 batches | train loss 0.6726553 +| epoch 1 | 783/ 5600 batches | train loss 0.5950947 +| epoch 1 | 787/ 5600 batches | train loss 0.4816056 +| epoch 1 | 791/ 5600 batches | train loss 0.4387454 +| epoch 1 | 795/ 5600 batches | train loss 0.5045794 +| epoch 1 | 799/ 5600 batches | train loss 0.4437639 +| epoch 1 | 803/ 5600 batches | train loss 0.4905578 +| epoch 1 | 807/ 5600 batches | train loss 0.5368279 +| epoch 1 | 811/ 5600 batches | train loss 0.5132293 +| epoch 1 | 815/ 5600 batches | train loss 0.5499268 +| epoch 1 | 819/ 5600 batches | train loss 0.6516056 +| epoch 1 | 823/ 5600 batches | train loss 0.4551177 +| epoch 1 | 827/ 5600 batches | train loss 0.5153025 +| epoch 1 | 831/ 5600 batches | train loss 0.5010906 +| epoch 1 | 835/ 5600 batches | train loss 0.4483464 +| epoch 1 | 839/ 5600 batches | train loss 0.4673225 +| epoch 1 | 843/ 5600 batches | train loss 0.4394499 +| epoch 1 | 847/ 5600 batches | train loss 0.3941830 +| epoch 1 | 851/ 5600 batches | train loss 0.5181873 +| epoch 1 | 855/ 5600 batches | train loss 0.4919385 +| epoch 1 | 859/ 5600 batches | train loss 0.4657471 +| epoch 1 | 863/ 5600 batches | train loss 0.5349509 +| epoch 1 | 867/ 5600 batches | train loss 0.6023873 +| epoch 1 | 871/ 5600 batches | train loss 0.6358197 +| epoch 1 | 875/ 5600 batches | train loss 0.5380447 +| epoch 1 | 879/ 5600 batches | train loss 0.5830106 +| epoch 1 | 883/ 5600 batches | train loss 0.4924961 +| epoch 1 | 887/ 5600 batches | train loss 0.5317803 +| epoch 1 | 891/ 5600 batches | train loss 0.5326496 +| epoch 1 | 895/ 5600 batches | train loss 0.4353643 +| epoch 1 | 899/ 5600 batches | train loss 0.4875642 +| epoch 1 | 903/ 5600 batches | train loss 0.5200240 +| epoch 1 | 907/ 5600 batches | train loss 0.4964597 +| epoch 1 | 911/ 5600 batches | train loss 0.4931636 +| epoch 1 | 915/ 5600 batches | train loss 0.4353269 +| epoch 1 | 919/ 5600 batches | train loss 0.4648139 +| epoch 1 | 923/ 5600 batches | train loss 0.6198268 +| epoch 1 | 927/ 5600 batches | train loss 0.4519708 +| epoch 1 | 931/ 5600 batches | train loss 0.4428665 +| epoch 1 | 935/ 5600 batches | train loss 0.4883670 +| epoch 1 | 939/ 5600 batches | train loss 0.4687600 +| epoch 1 | 943/ 5600 batches | train loss 0.4840597 +| epoch 1 | 947/ 5600 batches | train loss 0.4745246 +| epoch 1 | 951/ 5600 batches | train loss 0.4554381 +| epoch 1 | 955/ 5600 batches | train loss 0.3962733 +| epoch 1 | 959/ 5600 batches | train loss 0.5254318 +| epoch 1 | 963/ 5600 batches | train loss 0.5253713 +| epoch 1 | 967/ 5600 batches | train loss 0.4627275 +| epoch 1 | 971/ 5600 batches | train loss 0.4665143 +| epoch 1 | 975/ 5600 batches | train loss 0.4100786 +| epoch 1 | 979/ 5600 batches | train loss 0.5679672 +| epoch 1 | 983/ 5600 batches | train loss 0.5550607 +| epoch 1 | 987/ 5600 batches | train loss 0.4659416 +| epoch 1 | 991/ 5600 batches | train loss 0.5419417 +| epoch 1 | 995/ 5600 batches | train loss 0.4127226 +| epoch 1 | 999/ 5600 batches | train loss 0.6106113 +| epoch 1 | 1003/ 5600 batches | train loss 0.4166090 +| epoch 1 | 1007/ 5600 batches | train loss 0.3785040 +| epoch 1 | 1011/ 5600 batches | train loss 0.5809187 +| epoch 1 | 1015/ 5600 batches | train loss 0.5931811 +| epoch 1 | 1019/ 5600 batches | train loss 0.4278075 +| epoch 1 | 1023/ 5600 batches | train loss 0.4504914 +| epoch 1 | 1027/ 5600 batches | train loss 0.4570848 +| epoch 1 | 1031/ 5600 batches | train loss 0.5169023 +| epoch 1 | 1035/ 5600 batches | train loss 0.4849310 +| epoch 1 | 1039/ 5600 batches | train loss 0.4176098 +| epoch 1 | 1043/ 5600 batches | train loss 0.5584053 +| epoch 1 | 1047/ 5600 batches | train loss 0.5086404 +| epoch 1 | 1051/ 5600 batches | train loss 0.5177863 +| epoch 1 | 1055/ 5600 batches | train loss 0.5010082 +| epoch 1 | 1059/ 5600 batches | train loss 0.4777384 +| epoch 1 | 1063/ 5600 batches | train loss 0.5770459 +| epoch 1 | 1067/ 5600 batches | train loss 0.5084990 +| epoch 1 | 1071/ 5600 batches | train loss 0.7094997 +| epoch 1 | 1075/ 5600 batches | train loss 0.4399643 +| epoch 1 | 1079/ 5600 batches | train loss 0.4965402 +| epoch 1 | 1083/ 5600 batches | train loss 0.5961457 +| epoch 1 | 1087/ 5600 batches | train loss 0.6633607 +| epoch 1 | 1091/ 5600 batches | train loss 0.5700520 +| epoch 1 | 1095/ 5600 batches | train loss 0.4919399 +| epoch 1 | 1099/ 5600 batches | train loss 0.6646791 +| epoch 1 | 1103/ 5600 batches | train loss 0.4658458 +| epoch 1 | 1107/ 5600 batches | train loss 0.4748836 +| epoch 1 | 1111/ 5600 batches | train loss 0.4743096 +| epoch 1 | 1115/ 5600 batches | train loss 0.5500696 +| epoch 1 | 1119/ 5600 batches | train loss 0.5864825 +| epoch 1 | 1123/ 5600 batches | train loss 0.5161558 +| epoch 1 | 1127/ 5600 batches | train loss 0.5624248 +| epoch 1 | 1131/ 5600 batches | train loss 0.4979311 +| epoch 1 | 1135/ 5600 batches | train loss 0.4057249 +| epoch 1 | 1139/ 5600 batches | train loss 0.5515344 +| epoch 1 | 1143/ 5600 batches | train loss 0.5774241 +| epoch 1 | 1147/ 5600 batches | train loss 0.5095015 +| epoch 1 | 1151/ 5600 batches | train loss 0.4638712 +| epoch 1 | 1155/ 5600 batches | train loss 0.4525580 +| epoch 1 | 1159/ 5600 batches | train loss 0.4678239 +| epoch 1 | 1163/ 5600 batches | train loss 0.4641597 +| epoch 1 | 1167/ 5600 batches | train loss 0.4746199 +| epoch 1 | 1171/ 5600 batches | train loss 0.4379134 +| epoch 1 | 1175/ 5600 batches | train loss 0.4901578 +| epoch 1 | 1179/ 5600 batches | train loss 0.5204020 +| epoch 1 | 1183/ 5600 batches | train loss 0.5034056 +| epoch 1 | 1187/ 5600 batches | train loss 0.3979225 +| epoch 1 | 1191/ 5600 batches | train loss 0.4465124 +| epoch 1 | 1195/ 5600 batches | train loss 0.6551200 +| epoch 1 | 1199/ 5600 batches | train loss 0.3970615 +| epoch 1 | 1203/ 5600 batches | train loss 0.4057406 +| epoch 1 | 1207/ 5600 batches | train loss 0.4506311 +| epoch 1 | 1211/ 5600 batches | train loss 0.5717695 +| epoch 1 | 1215/ 5600 batches | train loss 0.5154402 +| epoch 1 | 1219/ 5600 batches | train loss 0.4920496 +| epoch 1 | 1223/ 5600 batches | train loss 0.4732084 +| epoch 1 | 1227/ 5600 batches | train loss 0.4952665 +| epoch 1 | 1231/ 5600 batches | train loss 0.4141206 +| epoch 1 | 1235/ 5600 batches | train loss 0.4909577 +| epoch 1 | 1239/ 5600 batches | train loss 0.4790432 +| epoch 1 | 1243/ 5600 batches | train loss 0.5437380 +| epoch 1 | 1247/ 5600 batches | train loss 0.5792726 +| epoch 1 | 1251/ 5600 batches | train loss 0.4902326 +| epoch 1 | 1255/ 5600 batches | train loss 0.3875408 +| epoch 1 | 1259/ 5600 batches | train loss 0.4831710 +| epoch 1 | 1263/ 5600 batches | train loss 0.3536491 +| epoch 1 | 1267/ 5600 batches | train loss 0.5744312 +| epoch 1 | 1271/ 5600 batches | train loss 0.4552462 +| epoch 1 | 1275/ 5600 batches | train loss 0.4585338 +| epoch 1 | 1279/ 5600 batches | train loss 0.5171164 +| epoch 1 | 1283/ 5600 batches | train loss 0.5325869 +| epoch 1 | 1287/ 5600 batches | train loss 0.4957192 +| epoch 1 | 1291/ 5600 batches | train loss 0.4473569 +| epoch 1 | 1295/ 5600 batches | train loss 0.4168393 +| epoch 1 | 1299/ 5600 batches | train loss 0.4971792 +| epoch 1 | 1303/ 5600 batches | train loss 0.5024359 +| epoch 1 | 1307/ 5600 batches | train loss 0.4992308 +| epoch 1 | 1311/ 5600 batches | train loss 0.4969214 +| epoch 1 | 1315/ 5600 batches | train loss 0.5355764 +| epoch 1 | 1319/ 5600 batches | train loss 0.5518603 +| epoch 1 | 1323/ 5600 batches | train loss 0.4427712 +| epoch 1 | 1327/ 5600 batches | train loss 0.5185073 +| epoch 1 | 1331/ 5600 batches | train loss 0.4860440 +| epoch 1 | 1335/ 5600 batches | train loss 0.4619315 +| epoch 1 | 1339/ 5600 batches | train loss 0.4920947 +| epoch 1 | 1343/ 5600 batches | train loss 0.4717280 +| epoch 1 | 1347/ 5600 batches | train loss 0.3979060 +| epoch 1 | 1351/ 5600 batches | train loss 0.4566374 +| epoch 1 | 1355/ 5600 batches | train loss 0.4044107 +| epoch 1 | 1359/ 5600 batches | train loss 0.4022487 +| epoch 1 | 1363/ 5600 batches | train loss 0.4837241 +| epoch 1 | 1367/ 5600 batches | train loss 0.5244812 +| epoch 1 | 1371/ 5600 batches | train loss 0.7190092 +| epoch 1 | 1375/ 5600 batches | train loss 0.5224617 +| epoch 1 | 1379/ 5600 batches | train loss 0.5262831 +| epoch 1 | 1383/ 5600 batches | train loss 0.5441014 +| epoch 1 | 1387/ 5600 batches | train loss 0.4914721 +| epoch 1 | 1391/ 5600 batches | train loss 0.4351192 +| epoch 1 | 1395/ 5600 batches | train loss 0.3658772 +| epoch 1 | 1399/ 5600 batches | train loss 0.4800627 +| epoch 1 | 1403/ 5600 batches | train loss 0.5224352 +| epoch 1 | 1407/ 5600 batches | train loss 0.4772417 +| epoch 1 | 1411/ 5600 batches | train loss 0.3999725 +| epoch 1 | 1415/ 5600 batches | train loss 0.4156454 +| epoch 1 | 1419/ 5600 batches | train loss 0.4722460 +| epoch 1 | 1423/ 5600 batches | train loss 0.4746714 +| epoch 1 | 1427/ 5600 batches | train loss 0.4957118 +| epoch 1 | 1431/ 5600 batches | train loss 0.4067321 +| epoch 1 | 1435/ 5600 batches | train loss 0.4103143 +| epoch 1 | 1439/ 5600 batches | train loss 0.4885409 +| epoch 1 | 1443/ 5600 batches | train loss 0.4834242 +| epoch 1 | 1447/ 5600 batches | train loss 0.4228095 +| epoch 1 | 1451/ 5600 batches | train loss 0.5318086 +| epoch 1 | 1455/ 5600 batches | train loss 0.4575752 +| epoch 1 | 1459/ 5600 batches | train loss 0.5005083 +| epoch 1 | 1463/ 5600 batches | train loss 0.4941356 +| epoch 1 | 1467/ 5600 batches | train loss 0.5618662 +| epoch 1 | 1471/ 5600 batches | train loss 0.3949169 +| epoch 1 | 1475/ 5600 batches | train loss 0.5009227 +| epoch 1 | 1479/ 5600 batches | train loss 0.4854052 +| epoch 1 | 1483/ 5600 batches | train loss 0.4807441 +| epoch 1 | 1487/ 5600 batches | train loss 0.6564406 +| epoch 1 | 1491/ 5600 batches | train loss 0.5367171 +| epoch 1 | 1495/ 5600 batches | train loss 0.5143370 +| epoch 1 | 1499/ 5600 batches | train loss 0.4497066 +| epoch 1 | 1503/ 5600 batches | train loss 0.5583011 +| epoch 1 | 1507/ 5600 batches | train loss 0.5290906 +| epoch 1 | 1511/ 5600 batches | train loss 0.4627480 +| epoch 1 | 1515/ 5600 batches | train loss 0.3973831 +| epoch 1 | 1519/ 5600 batches | train loss 0.4644001 +| epoch 1 | 1523/ 5600 batches | train loss 0.4948011 +| epoch 1 | 1527/ 5600 batches | train loss 0.4602465 +| epoch 1 | 1531/ 5600 batches | train loss 0.4623015 +| epoch 1 | 1535/ 5600 batches | train loss 0.4720019 +| epoch 1 | 1539/ 5600 batches | train loss 0.4056984 +| epoch 1 | 1543/ 5600 batches | train loss 0.5327101 +| epoch 1 | 1547/ 5600 batches | train loss 0.4518654 +| epoch 1 | 1551/ 5600 batches | train loss 0.4734226 +| epoch 1 | 1555/ 5600 batches | train loss 0.6510432 +| epoch 1 | 1559/ 5600 batches | train loss 0.4495581 +| epoch 1 | 1563/ 5600 batches | train loss 0.5430139 +| epoch 1 | 1567/ 5600 batches | train loss 0.4623941 +| epoch 1 | 1571/ 5600 batches | train loss 0.4710521 +| epoch 1 | 1575/ 5600 batches | train loss 0.4981773 +| epoch 1 | 1579/ 5600 batches | train loss 0.4721211 +| epoch 1 | 1583/ 5600 batches | train loss 0.4900665 +| epoch 1 | 1587/ 5600 batches | train loss 0.5729754 +| epoch 1 | 1591/ 5600 batches | train loss 0.4854618 +| epoch 1 | 1595/ 5600 batches | train loss 0.4784206 +| epoch 1 | 1599/ 5600 batches | train loss 0.4336490 +| epoch 1 | 1603/ 5600 batches | train loss 0.4873249 +| epoch 1 | 1607/ 5600 batches | train loss 0.4015589 +| epoch 1 | 1611/ 5600 batches | train loss 0.5375119 +| epoch 1 | 1615/ 5600 batches | train loss 0.4753075 +| epoch 1 | 1619/ 5600 batches | train loss 0.5450990 +| epoch 1 | 1623/ 5600 batches | train loss 0.4245117 +| epoch 1 | 1627/ 5600 batches | train loss 0.3608338 +| epoch 1 | 1631/ 5600 batches | train loss 0.5190017 +| epoch 1 | 1635/ 5600 batches | train loss 0.4742129 +| epoch 1 | 1639/ 5600 batches | train loss 0.4762683 +| epoch 1 | 1643/ 5600 batches | train loss 0.3906673 +| epoch 1 | 1647/ 5600 batches | train loss 0.5742714 +| epoch 1 | 1651/ 5600 batches | train loss 0.5677395 +| epoch 1 | 1655/ 5600 batches | train loss 0.5178397 +| epoch 1 | 1659/ 5600 batches | train loss 0.4866145 +| epoch 1 | 1663/ 5600 batches | train loss 0.5053368 +| epoch 1 | 1667/ 5600 batches | train loss 0.4259848 +| epoch 1 | 1671/ 5600 batches | train loss 0.4461163 +| epoch 1 | 1675/ 5600 batches | train loss 0.3446868 +| epoch 1 | 1679/ 5600 batches | train loss 0.6104684 +| epoch 1 | 1683/ 5600 batches | train loss 0.4232045 +| epoch 1 | 1687/ 5600 batches | train loss 0.4653296 +| epoch 1 | 1691/ 5600 batches | train loss 0.4840508 +| epoch 1 | 1695/ 5600 batches | train loss 0.4211758 +| epoch 1 | 1699/ 5600 batches | train loss 0.3737716 +| epoch 1 | 1703/ 5600 batches | train loss 0.5131824 +| epoch 1 | 1707/ 5600 batches | train loss 0.5297855 +| epoch 1 | 1711/ 5600 batches | train loss 0.4676027 +| epoch 1 | 1715/ 5600 batches | train loss 0.5764700 +| epoch 1 | 1719/ 5600 batches | train loss 0.3974354 +| epoch 1 | 1723/ 5600 batches | train loss 0.7416838 +| epoch 1 | 1727/ 5600 batches | train loss 0.4893106 +| epoch 1 | 1731/ 5600 batches | train loss 0.4394752 +| epoch 1 | 1735/ 5600 batches | train loss 0.6255120 +| epoch 1 | 1739/ 5600 batches | train loss 0.5411444 +| epoch 1 | 1743/ 5600 batches | train loss 0.4823126 +| epoch 1 | 1747/ 5600 batches | train loss 0.3946345 +| epoch 1 | 1751/ 5600 batches | train loss 0.5538396 +| epoch 1 | 1755/ 5600 batches | train loss 0.4993849 +| epoch 1 | 1759/ 5600 batches | train loss 0.3752137 +| epoch 1 | 1763/ 5600 batches | train loss 0.3969754 +| epoch 1 | 1767/ 5600 batches | train loss 0.4498310 +| epoch 1 | 1771/ 5600 batches | train loss 0.4654546 +| epoch 1 | 1775/ 5600 batches | train loss 0.4551740 +| epoch 1 | 1779/ 5600 batches | train loss 0.4665406 +| epoch 1 | 1783/ 5600 batches | train loss 0.4613776 +| epoch 1 | 1787/ 5600 batches | train loss 0.6039833 +| epoch 1 | 1791/ 5600 batches | train loss 0.5540807 +| epoch 1 | 1795/ 5600 batches | train loss 0.4657132 +| epoch 1 | 1799/ 5600 batches | train loss 0.4377107 +| epoch 1 | 1803/ 5600 batches | train loss 0.3915895 +| epoch 1 | 1807/ 5600 batches | train loss 0.3572749 +| epoch 1 | 1811/ 5600 batches | train loss 0.3750111 +| epoch 1 | 1815/ 5600 batches | train loss 0.4547806 +| epoch 1 | 1819/ 5600 batches | train loss 0.5236828 +| epoch 1 | 1823/ 5600 batches | train loss 0.4617789 +| epoch 1 | 1827/ 5600 batches | train loss 0.4365767 +| epoch 1 | 1831/ 5600 batches | train loss 0.5345322 +| epoch 1 | 1835/ 5600 batches | train loss 0.4647672 +| epoch 1 | 1839/ 5600 batches | train loss 0.4565738 +| epoch 1 | 1843/ 5600 batches | train loss 0.4933134 +| epoch 1 | 1847/ 5600 batches | train loss 0.4689440 +| epoch 1 | 1851/ 5600 batches | train loss 0.3667986 +| epoch 1 | 1855/ 5600 batches | train loss 0.3918616 +| epoch 1 | 1859/ 5600 batches | train loss 0.5323954 +| epoch 1 | 1863/ 5600 batches | train loss 0.4322564 +| epoch 1 | 1867/ 5600 batches | train loss 0.4534891 +| epoch 1 | 1871/ 5600 batches | train loss 0.5089151 +| epoch 1 | 1875/ 5600 batches | train loss 0.3523630 +| epoch 1 | 1879/ 5600 batches | train loss 0.5225185 +| epoch 1 | 1883/ 5600 batches | train loss 0.5527865 +| epoch 1 | 1887/ 5600 batches | train loss 0.3922178 +| epoch 1 | 1891/ 5600 batches | train loss 0.5813125 +| epoch 1 | 1895/ 5600 batches | train loss 0.4585605 +| epoch 1 | 1899/ 5600 batches | train loss 0.4540738 +| epoch 1 | 1903/ 5600 batches | train loss 0.3697260 +| epoch 1 | 1907/ 5600 batches | train loss 0.5941091 +| epoch 1 | 1911/ 5600 batches | train loss 0.4324286 +| epoch 1 | 1915/ 5600 batches | train loss 0.5283126 +| epoch 1 | 1919/ 5600 batches | train loss 0.5618026 +| epoch 1 | 1923/ 5600 batches | train loss 0.5024570 +| epoch 1 | 1927/ 5600 batches | train loss 0.4912200 +| epoch 1 | 1931/ 5600 batches | train loss 0.5150352 +| epoch 1 | 1935/ 5600 batches | train loss 0.4268386 +| epoch 1 | 1939/ 5600 batches | train loss 0.4944856 +| epoch 1 | 1943/ 5600 batches | train loss 0.4758014 +| epoch 1 | 1947/ 5600 batches | train loss 0.4205822 +| epoch 1 | 1951/ 5600 batches | train loss 0.5110755 +| epoch 1 | 1955/ 5600 batches | train loss 0.4901678 +| epoch 1 | 1959/ 5600 batches | train loss 0.5310152 +| epoch 1 | 1963/ 5600 batches | train loss 0.3921106 +| epoch 1 | 1967/ 5600 batches | train loss 0.3948178 +| epoch 1 | 1971/ 5600 batches | train loss 0.4252369 +| epoch 1 | 1975/ 5600 batches | train loss 0.5261616 +| epoch 1 | 1979/ 5600 batches | train loss 0.4818265 +| epoch 1 | 1983/ 5600 batches | train loss 0.4259442 +| epoch 1 | 1987/ 5600 batches | train loss 0.4243112 +| epoch 1 | 1991/ 5600 batches | train loss 0.4824405 +| epoch 1 | 1995/ 5600 batches | train loss 0.4466439 +| epoch 1 | 1999/ 5600 batches | train loss 0.5105960 +| epoch 1 | 2003/ 5600 batches | train loss 0.4760236 +| epoch 1 | 2007/ 5600 batches | train loss 0.3511917 +| epoch 1 | 2011/ 5600 batches | train loss 0.4834991 +| epoch 1 | 2015/ 5600 batches | train loss 0.4543375 +| epoch 1 | 2019/ 5600 batches | train loss 0.4758838 +| epoch 1 | 2023/ 5600 batches | train loss 0.4947075 +| epoch 1 | 2027/ 5600 batches | train loss 0.5835710 +| epoch 1 | 2031/ 5600 batches | train loss 0.4909217 +| epoch 1 | 2035/ 5600 batches | train loss 0.5966637 +| epoch 1 | 2039/ 5600 batches | train loss 0.4441630 +| epoch 1 | 2043/ 5600 batches | train loss 0.5682949 +| epoch 1 | 2047/ 5600 batches | train loss 0.5778780 +| epoch 1 | 2051/ 5600 batches | train loss 0.4621871 +| epoch 1 | 2055/ 5600 batches | train loss 0.4432112 +| epoch 1 | 2059/ 5600 batches | train loss 0.5007496 +| epoch 1 | 2063/ 5600 batches | train loss 0.5721867 +| epoch 1 | 2067/ 5600 batches | train loss 0.5016954 +| epoch 1 | 2071/ 5600 batches | train loss 0.4476094 +| epoch 1 | 2075/ 5600 batches | train loss 0.4867335 +| epoch 1 | 2079/ 5600 batches | train loss 0.4178138 +| epoch 1 | 2083/ 5600 batches | train loss 0.4919722 +| epoch 1 | 2087/ 5600 batches | train loss 0.4499795 +| epoch 1 | 2091/ 5600 batches | train loss 0.5188857 +| epoch 1 | 2095/ 5600 batches | train loss 0.5257286 +| epoch 1 | 2099/ 5600 batches | train loss 0.4820985 +| epoch 1 | 2103/ 5600 batches | train loss 0.4873977 +| epoch 1 | 2107/ 5600 batches | train loss 0.4263785 +| epoch 1 | 2111/ 5600 batches | train loss 0.4463661 +| epoch 1 | 2115/ 5600 batches | train loss 0.4788821 +| epoch 1 | 2119/ 5600 batches | train loss 0.4635593 +| epoch 1 | 2123/ 5600 batches | train loss 0.3972196 +| epoch 1 | 2127/ 5600 batches | train loss 0.3951063 +| epoch 1 | 2131/ 5600 batches | train loss 0.4523551 +| epoch 1 | 2135/ 5600 batches | train loss 0.4368637 +| epoch 1 | 2139/ 5600 batches | train loss 0.4017196 +| epoch 1 | 2143/ 5600 batches | train loss 0.4056234 +| epoch 1 | 2147/ 5600 batches | train loss 0.3784243 +| epoch 1 | 2151/ 5600 batches | train loss 0.5460722 +| epoch 1 | 2155/ 5600 batches | train loss 0.8132807 +| epoch 1 | 2159/ 5600 batches | train loss 0.5848260 +| epoch 1 | 2163/ 5600 batches | train loss 0.3779948 +| epoch 1 | 2167/ 5600 batches | train loss 0.5490317 +| epoch 1 | 2171/ 5600 batches | train loss 0.6026075 +| epoch 1 | 2175/ 5600 batches | train loss 0.4516944 +| epoch 1 | 2179/ 5600 batches | train loss 0.4353039 +| epoch 1 | 2183/ 5600 batches | train loss 0.5361631 +| epoch 1 | 2187/ 5600 batches | train loss 0.4225531 +| epoch 1 | 2191/ 5600 batches | train loss 0.5908254 +| epoch 1 | 2195/ 5600 batches | train loss 0.4317517 +| epoch 1 | 2199/ 5600 batches | train loss 0.5352062 +| epoch 1 | 2203/ 5600 batches | train loss 0.3467984 +| epoch 1 | 2207/ 5600 batches | train loss 0.4343090 +| epoch 1 | 2211/ 5600 batches | train loss 0.4614492 +| epoch 1 | 2215/ 5600 batches | train loss 0.3810523 +| epoch 1 | 2219/ 5600 batches | train loss 0.4033041 +| epoch 1 | 2223/ 5600 batches | train loss 0.5046809 +| epoch 1 | 2227/ 5600 batches | train loss 0.5853559 +| epoch 1 | 2231/ 5600 batches | train loss 0.5417895 +| epoch 1 | 2235/ 5600 batches | train loss 0.4259868 +| epoch 1 | 2239/ 5600 batches | train loss 0.4264571 +| epoch 1 | 2243/ 5600 batches | train loss 0.4140121 +| epoch 1 | 2247/ 5600 batches | train loss 0.5119973 +| epoch 1 | 2251/ 5600 batches | train loss 0.4292823 +| epoch 1 | 2255/ 5600 batches | train loss 0.4985166 +| epoch 1 | 2259/ 5600 batches | train loss 0.4079715 +| epoch 1 | 2263/ 5600 batches | train loss 0.4414428 +| epoch 1 | 2267/ 5600 batches | train loss 0.4044074 +| epoch 1 | 2271/ 5600 batches | train loss 0.4803379 +| epoch 1 | 2275/ 5600 batches | train loss 0.4640632 +| epoch 1 | 2279/ 5600 batches | train loss 0.4928516 +| epoch 1 | 2283/ 5600 batches | train loss 0.4709914 +| epoch 1 | 2287/ 5600 batches | train loss 0.4505612 +| epoch 1 | 2291/ 5600 batches | train loss 0.4310411 +| epoch 1 | 2295/ 5600 batches | train loss 0.5268811 +| epoch 1 | 2299/ 5600 batches | train loss 0.6006271 +| epoch 1 | 2303/ 5600 batches | train loss 0.5608326 +| epoch 1 | 2307/ 5600 batches | train loss 0.5134827 +| epoch 1 | 2311/ 5600 batches | train loss 0.5152854 +| epoch 1 | 2315/ 5600 batches | train loss 0.3133149 +| epoch 1 | 2319/ 5600 batches | train loss 0.4257945 +| epoch 1 | 2323/ 5600 batches | train loss 0.3732975 +| epoch 1 | 2327/ 5600 batches | train loss 0.4824586 +| epoch 1 | 2331/ 5600 batches | train loss 0.4843015 +| epoch 1 | 2335/ 5600 batches | train loss 0.4270354 +| epoch 1 | 2339/ 5600 batches | train loss 0.4621478 +| epoch 1 | 2343/ 5600 batches | train loss 0.4041989 +| epoch 1 | 2347/ 5600 batches | train loss 0.5063095 +| epoch 1 | 2351/ 5600 batches | train loss 0.4483372 +| epoch 1 | 2355/ 5600 batches | train loss 0.4613212 +| epoch 1 | 2359/ 5600 batches | train loss 0.3969824 +| epoch 1 | 2363/ 5600 batches | train loss 0.5300509 +| epoch 1 | 2367/ 5600 batches | train loss 0.4233947 +| epoch 1 | 2371/ 5600 batches | train loss 0.4787352 +| epoch 1 | 2375/ 5600 batches | train loss 0.4353071 +| epoch 1 | 2379/ 5600 batches | train loss 0.3353725 +| epoch 1 | 2383/ 5600 batches | train loss 0.4099729 +| epoch 1 | 2387/ 5600 batches | train loss 0.4597946 +| epoch 1 | 2391/ 5600 batches | train loss 0.4837102 +| epoch 1 | 2395/ 5600 batches | train loss 0.4213898 +| epoch 1 | 2399/ 5600 batches | train loss 0.5354124 +| epoch 1 | 2403/ 5600 batches | train loss 0.5240568 +| epoch 1 | 2407/ 5600 batches | train loss 0.5570633 +| epoch 1 | 2411/ 5600 batches | train loss 0.4330457 +| epoch 1 | 2415/ 5600 batches | train loss 0.6662574 +| epoch 1 | 2419/ 5600 batches | train loss 0.4957835 +| epoch 1 | 2423/ 5600 batches | train loss 0.5484108 +| epoch 1 | 2427/ 5600 batches | train loss 0.4544025 +| epoch 1 | 2431/ 5600 batches | train loss 0.5228748 +| epoch 1 | 2435/ 5600 batches | train loss 0.5120775 +| epoch 1 | 2439/ 5600 batches | train loss 0.4807270 +| epoch 1 | 2443/ 5600 batches | train loss 0.4769019 +| epoch 1 | 2447/ 5600 batches | train loss 0.4281411 +| epoch 1 | 2451/ 5600 batches | train loss 0.3920985 +| epoch 1 | 2455/ 5600 batches | train loss 0.4212230 +| epoch 1 | 2459/ 5600 batches | train loss 0.5138054 +| epoch 1 | 2463/ 5600 batches | train loss 0.5325805 +| epoch 1 | 2467/ 5600 batches | train loss 0.4144287 +| epoch 1 | 2471/ 5600 batches | train loss 0.4216407 +| epoch 1 | 2475/ 5600 batches | train loss 0.4998453 +| epoch 1 | 2479/ 5600 batches | train loss 0.4674722 +| epoch 1 | 2483/ 5600 batches | train loss 0.4658223 +| epoch 1 | 2487/ 5600 batches | train loss 0.3060280 +| epoch 1 | 2491/ 5600 batches | train loss 0.6255093 +| epoch 1 | 2495/ 5600 batches | train loss 0.5905625 +| epoch 1 | 2499/ 5600 batches | train loss 0.3648643 +| epoch 1 | 2503/ 5600 batches | train loss 0.4086157 +| epoch 1 | 2507/ 5600 batches | train loss 0.4265554 +| epoch 1 | 2511/ 5600 batches | train loss 0.4758821 +| epoch 1 | 2515/ 5600 batches | train loss 0.5645791 +| epoch 1 | 2519/ 5600 batches | train loss 0.4621581 +| epoch 1 | 2523/ 5600 batches | train loss 0.5189205 +| epoch 1 | 2527/ 5600 batches | train loss 0.4009226 +| epoch 1 | 2531/ 5600 batches | train loss 0.5640410 +| epoch 1 | 2535/ 5600 batches | train loss 0.4459904 +| epoch 1 | 2539/ 5600 batches | train loss 0.4579313 +| epoch 1 | 2543/ 5600 batches | train loss 0.4652305 +| epoch 1 | 2547/ 5600 batches | train loss 0.4081080 +| epoch 1 | 2551/ 5600 batches | train loss 0.4496194 +| epoch 1 | 2555/ 5600 batches | train loss 0.4653063 +| epoch 1 | 2559/ 5600 batches | train loss 0.4615616 +| epoch 1 | 2563/ 5600 batches | train loss 0.5258319 +| epoch 1 | 2567/ 5600 batches | train loss 0.5568210 +| epoch 1 | 2571/ 5600 batches | train loss 0.4317974 +| epoch 1 | 2575/ 5600 batches | train loss 0.4137252 +| epoch 1 | 2579/ 5600 batches | train loss 0.4562183 +| epoch 1 | 2583/ 5600 batches | train loss 0.5562797 +| epoch 1 | 2587/ 5600 batches | train loss 0.4702410 +| epoch 1 | 2591/ 5600 batches | train loss 0.4270251 +| epoch 1 | 2595/ 5600 batches | train loss 0.3895694 +| epoch 1 | 2599/ 5600 batches | train loss 0.4533715 +| epoch 1 | 2603/ 5600 batches | train loss 0.4713866 +| epoch 1 | 2607/ 5600 batches | train loss 0.3529832 +| epoch 1 | 2611/ 5600 batches | train loss 0.3975232 +| epoch 1 | 2615/ 5600 batches | train loss 0.5237927 +| epoch 1 | 2619/ 5600 batches | train loss 0.4942499 +| epoch 1 | 2623/ 5600 batches | train loss 0.4756626 +| epoch 1 | 2627/ 5600 batches | train loss 0.4416001 +| epoch 1 | 2631/ 5600 batches | train loss 0.3830590 +| epoch 1 | 2635/ 5600 batches | train loss 0.4846966 +| epoch 1 | 2639/ 5600 batches | train loss 0.3884430 +| epoch 1 | 2643/ 5600 batches | train loss 0.4416148 +| epoch 1 | 2647/ 5600 batches | train loss 0.3714934 +| epoch 1 | 2651/ 5600 batches | train loss 0.4612607 +| epoch 1 | 2655/ 5600 batches | train loss 0.5904009 +| epoch 1 | 2659/ 5600 batches | train loss 0.3666878 +| epoch 1 | 2663/ 5600 batches | train loss 0.5413646 +| epoch 1 | 2667/ 5600 batches | train loss 0.4388816 +| epoch 1 | 2671/ 5600 batches | train loss 0.4810251 +| epoch 1 | 2675/ 5600 batches | train loss 0.4501908 +| epoch 1 | 2679/ 5600 batches | train loss 0.4797720 +| epoch 1 | 2683/ 5600 batches | train loss 0.5064560 +| epoch 1 | 2687/ 5600 batches | train loss 0.5450571 +| epoch 1 | 2691/ 5600 batches | train loss 0.4564881 +| epoch 1 | 2695/ 5600 batches | train loss 0.4889610 +| epoch 1 | 2699/ 5600 batches | train loss 0.4224920 +| epoch 1 | 2703/ 5600 batches | train loss 0.4495855 +| epoch 1 | 2707/ 5600 batches | train loss 0.3803179 +| epoch 1 | 2711/ 5600 batches | train loss 0.5015252 +| epoch 1 | 2715/ 5600 batches | train loss 0.4612293 +| epoch 1 | 2719/ 5600 batches | train loss 0.4729725 +| epoch 1 | 2723/ 5600 batches | train loss 0.4300107 +| epoch 1 | 2727/ 5600 batches | train loss 0.4085605 +| epoch 1 | 2731/ 5600 batches | train loss 0.4584942 +| epoch 1 | 2735/ 5600 batches | train loss 0.4860761 +| epoch 1 | 2739/ 5600 batches | train loss 0.4041861 +| epoch 1 | 2743/ 5600 batches | train loss 0.3734671 +| epoch 1 | 2747/ 5600 batches | train loss 0.5271676 +| epoch 1 | 2751/ 5600 batches | train loss 0.3994094 +| epoch 1 | 2755/ 5600 batches | train loss 0.4229200 +| epoch 1 | 2759/ 5600 batches | train loss 0.4219907 +| epoch 1 | 2763/ 5600 batches | train loss 0.5093839 +| epoch 1 | 2767/ 5600 batches | train loss 0.5170504 +| epoch 1 | 2771/ 5600 batches | train loss 0.4455617 +| epoch 1 | 2775/ 5600 batches | train loss 0.3832216 +| epoch 1 | 2779/ 5600 batches | train loss 0.5608247 +| epoch 1 | 2783/ 5600 batches | train loss 0.4222837 +| epoch 1 | 2787/ 5600 batches | train loss 0.3769296 +| epoch 1 | 2791/ 5600 batches | train loss 0.4444249 +| epoch 1 | 2795/ 5600 batches | train loss 0.5244836 +| epoch 1 | 2799/ 5600 batches | train loss 0.3568063 +| epoch 1 | 2803/ 5600 batches | train loss 0.3691855 +| epoch 1 | 2807/ 5600 batches | train loss 0.6182540 +| epoch 1 | 2811/ 5600 batches | train loss 0.5730197 +| epoch 1 | 2815/ 5600 batches | train loss 0.4687834 +| epoch 1 | 2819/ 5600 batches | train loss 0.4570900 +| epoch 1 | 2823/ 5600 batches | train loss 0.4236129 +| epoch 1 | 2827/ 5600 batches | train loss 0.4228235 +| epoch 1 | 2831/ 5600 batches | train loss 0.4986216 +| epoch 1 | 2835/ 5600 batches | train loss 0.4483718 +| epoch 1 | 2839/ 5600 batches | train loss 0.6101334 +| epoch 1 | 2843/ 5600 batches | train loss 0.4056059 +| epoch 1 | 2847/ 5600 batches | train loss 0.3940744 +| epoch 1 | 2851/ 5600 batches | train loss 0.4482177 +| epoch 1 | 2855/ 5600 batches | train loss 0.4951231 +| epoch 1 | 2859/ 5600 batches | train loss 0.4575271 +| epoch 1 | 2863/ 5600 batches | train loss 0.4044596 +| epoch 1 | 2867/ 5600 batches | train loss 0.4308935 +| epoch 1 | 2871/ 5600 batches | train loss 0.3937408 +| epoch 1 | 2875/ 5600 batches | train loss 0.4332629 +| epoch 1 | 2879/ 5600 batches | train loss 0.4508239 +| epoch 1 | 2883/ 5600 batches | train loss 0.5342829 +| epoch 1 | 2887/ 5600 batches | train loss 0.4417273 +| epoch 1 | 2891/ 5600 batches | train loss 0.4554764 +| epoch 1 | 2895/ 5600 batches | train loss 0.4753658 +| epoch 1 | 2899/ 5600 batches | train loss 0.3999353 +| epoch 1 | 2903/ 5600 batches | train loss 0.4746965 +| epoch 1 | 2907/ 5600 batches | train loss 0.5036488 +| epoch 1 | 2911/ 5600 batches | train loss 0.3166573 +| epoch 1 | 2915/ 5600 batches | train loss 0.5955505 +| epoch 1 | 2919/ 5600 batches | train loss 0.4624256 +| epoch 1 | 2923/ 5600 batches | train loss 0.4903650 +| epoch 1 | 2927/ 5600 batches | train loss 0.4374830 +| epoch 1 | 2931/ 5600 batches | train loss 0.3950338 +| epoch 1 | 2935/ 5600 batches | train loss 0.4448671 +| epoch 1 | 2939/ 5600 batches | train loss 0.3600772 +| epoch 1 | 2943/ 5600 batches | train loss 0.4367391 +| epoch 1 | 2947/ 5600 batches | train loss 0.4930876 +| epoch 1 | 2951/ 5600 batches | train loss 0.4550186 +| epoch 1 | 2955/ 5600 batches | train loss 0.4776113 +| epoch 1 | 2959/ 5600 batches | train loss 0.5060768 +| epoch 1 | 2963/ 5600 batches | train loss 0.3780842 +| epoch 1 | 2967/ 5600 batches | train loss 0.7109404 +| epoch 1 | 2971/ 5600 batches | train loss 0.6083491 +| epoch 1 | 2975/ 5600 batches | train loss 0.4149856 +| epoch 1 | 2979/ 5600 batches | train loss 0.4798849 +| epoch 1 | 2983/ 5600 batches | train loss 0.5535545 +| epoch 1 | 2987/ 5600 batches | train loss 0.3977965 +| epoch 1 | 2991/ 5600 batches | train loss 0.3620906 +| epoch 1 | 2995/ 5600 batches | train loss 0.4404823 +| epoch 1 | 2999/ 5600 batches | train loss 0.5376676 +| epoch 1 | 3003/ 5600 batches | train loss 0.4203705 +| epoch 1 | 3007/ 5600 batches | train loss 0.5424635 +| epoch 1 | 3011/ 5600 batches | train loss 0.3775180 +| epoch 1 | 3015/ 5600 batches | train loss 0.3954435 +| epoch 1 | 3019/ 5600 batches | train loss 0.3945578 +| epoch 1 | 3023/ 5600 batches | train loss 0.5077878 +| epoch 1 | 3027/ 5600 batches | train loss 0.4284548 +| epoch 1 | 3031/ 5600 batches | train loss 0.4211437 +| epoch 1 | 3035/ 5600 batches | train loss 0.3661614 +| epoch 1 | 3039/ 5600 batches | train loss 0.4609284 +| epoch 1 | 3043/ 5600 batches | train loss 0.3612620 +| epoch 1 | 3047/ 5600 batches | train loss 0.3579297 +| epoch 1 | 3051/ 5600 batches | train loss 0.4163353 +| epoch 1 | 3055/ 5600 batches | train loss 0.4808232 +| epoch 1 | 3059/ 5600 batches | train loss 0.4703113 +| epoch 1 | 3063/ 5600 batches | train loss 0.4645382 +| epoch 1 | 3067/ 5600 batches | train loss 0.4824848 +| epoch 1 | 3071/ 5600 batches | train loss 0.4235457 +| epoch 1 | 3075/ 5600 batches | train loss 0.4960499 +| epoch 1 | 3079/ 5600 batches | train loss 0.5916886 +| epoch 1 | 3083/ 5600 batches | train loss 0.4381795 +| epoch 1 | 3087/ 5600 batches | train loss 0.4136462 +| epoch 1 | 3091/ 5600 batches | train loss 0.4615260 +| epoch 1 | 3095/ 5600 batches | train loss 0.5549889 +| epoch 1 | 3099/ 5600 batches | train loss 0.5248071 +| epoch 1 | 3103/ 5600 batches | train loss 0.5588535 +| epoch 1 | 3107/ 5600 batches | train loss 0.5168670 +| epoch 1 | 3111/ 5600 batches | train loss 0.4787083 +| epoch 1 | 3115/ 5600 batches | train loss 0.4276714 +| epoch 1 | 3119/ 5600 batches | train loss 0.5117438 +| epoch 1 | 3123/ 5600 batches | train loss 0.4327529 +| epoch 1 | 3127/ 5600 batches | train loss 0.4724131 +| epoch 1 | 3131/ 5600 batches | train loss 0.3892405 +| epoch 1 | 3135/ 5600 batches | train loss 0.5101907 +| epoch 1 | 3139/ 5600 batches | train loss 0.4340642 +| epoch 1 | 3143/ 5600 batches | train loss 0.4648277 +| epoch 1 | 3147/ 5600 batches | train loss 0.4722641 +| epoch 1 | 3151/ 5600 batches | train loss 0.5936143 +| epoch 1 | 3155/ 5600 batches | train loss 0.5178580 +| epoch 1 | 3159/ 5600 batches | train loss 0.4549387 +| epoch 1 | 3163/ 5600 batches | train loss 0.4143138 +| epoch 1 | 3167/ 5600 batches | train loss 0.5190272 +| epoch 1 | 3171/ 5600 batches | train loss 0.4392420 +| epoch 1 | 3175/ 5600 batches | train loss 0.4213494 +| epoch 1 | 3179/ 5600 batches | train loss 0.4942086 +| epoch 1 | 3183/ 5600 batches | train loss 0.5377790 +| epoch 1 | 3187/ 5600 batches | train loss 0.5356135 +| epoch 1 | 3191/ 5600 batches | train loss 0.4495867 +| epoch 1 | 3195/ 5600 batches | train loss 0.6160561 +| epoch 1 | 3199/ 5600 batches | train loss 0.5746571 +| epoch 1 | 3203/ 5600 batches | train loss 0.4305821 +| epoch 1 | 3207/ 5600 batches | train loss 0.4919342 +| epoch 1 | 3211/ 5600 batches | train loss 0.4121004 +| epoch 1 | 3215/ 5600 batches | train loss 0.3968020 +| epoch 1 | 3219/ 5600 batches | train loss 0.5263575 +| epoch 1 | 3223/ 5600 batches | train loss 0.4565450 +| epoch 1 | 3227/ 5600 batches | train loss 0.4627198 +| epoch 1 | 3231/ 5600 batches | train loss 0.4424434 +| epoch 1 | 3235/ 5600 batches | train loss 0.4587343 +| epoch 1 | 3239/ 5600 batches | train loss 0.4892044 +| epoch 1 | 3243/ 5600 batches | train loss 0.5680609 +| epoch 1 | 3247/ 5600 batches | train loss 0.3652572 +| epoch 1 | 3251/ 5600 batches | train loss 0.4479944 +| epoch 1 | 3255/ 5600 batches | train loss 0.4530227 +| epoch 1 | 3259/ 5600 batches | train loss 0.4268871 +| epoch 1 | 3263/ 5600 batches | train loss 0.4792871 +| epoch 1 | 3267/ 5600 batches | train loss 0.4492526 +| epoch 1 | 3271/ 5600 batches | train loss 0.3353665 +| epoch 1 | 3275/ 5600 batches | train loss 0.3969139 +| epoch 1 | 3279/ 5600 batches | train loss 0.4721785 +| epoch 1 | 3283/ 5600 batches | train loss 0.4488181 +| epoch 1 | 3287/ 5600 batches | train loss 0.4871584 +| epoch 1 | 3291/ 5600 batches | train loss 0.4901597 +| epoch 1 | 3295/ 5600 batches | train loss 0.4535862 +| epoch 1 | 3299/ 5600 batches | train loss 0.4146205 +| epoch 1 | 3303/ 5600 batches | train loss 0.3979863 +| epoch 1 | 3307/ 5600 batches | train loss 0.5379314 +| epoch 1 | 3311/ 5600 batches | train loss 0.4538651 +| epoch 1 | 3315/ 5600 batches | train loss 0.5057446 +| epoch 1 | 3319/ 5600 batches | train loss 0.5092642 +| epoch 1 | 3323/ 5600 batches | train loss 0.4883169 +| epoch 1 | 3327/ 5600 batches | train loss 0.4220133 +| epoch 1 | 3331/ 5600 batches | train loss 0.3803871 +| epoch 1 | 3335/ 5600 batches | train loss 0.3555273 +| epoch 1 | 3339/ 5600 batches | train loss 0.5400752 +| epoch 1 | 3343/ 5600 batches | train loss 0.3750537 +| epoch 1 | 3347/ 5600 batches | train loss 0.4818487 +| epoch 1 | 3351/ 5600 batches | train loss 0.5533177 +| epoch 1 | 3355/ 5600 batches | train loss 0.4915430 +| epoch 1 | 3359/ 5600 batches | train loss 0.4386976 +| epoch 1 | 3363/ 5600 batches | train loss 0.5222720 +| epoch 1 | 3367/ 5600 batches | train loss 0.4700283 +| epoch 1 | 3371/ 5600 batches | train loss 0.4265604 +| epoch 1 | 3375/ 5600 batches | train loss 0.4842393 +| epoch 1 | 3379/ 5600 batches | train loss 0.4100693 +| epoch 1 | 3383/ 5600 batches | train loss 0.4431047 +| epoch 1 | 3387/ 5600 batches | train loss 0.4920318 +| epoch 1 | 3391/ 5600 batches | train loss 0.4697558 +| epoch 1 | 3395/ 5600 batches | train loss 0.5618541 +| epoch 1 | 3399/ 5600 batches | train loss 0.3708346 +| epoch 1 | 3403/ 5600 batches | train loss 0.4574845 +| epoch 1 | 3407/ 5600 batches | train loss 0.5018492 +| epoch 1 | 3411/ 5600 batches | train loss 0.5387220 +| epoch 1 | 3415/ 5600 batches | train loss 0.4440482 +| epoch 1 | 3419/ 5600 batches | train loss 0.3832965 +| epoch 1 | 3423/ 5600 batches | train loss 0.5091390 +| epoch 1 | 3427/ 5600 batches | train loss 0.4213623 +| epoch 1 | 3431/ 5600 batches | train loss 0.4788649 +| epoch 1 | 3435/ 5600 batches | train loss 0.4677851 +| epoch 1 | 3439/ 5600 batches | train loss 0.4979328 +| epoch 1 | 3443/ 5600 batches | train loss 0.3676080 +| epoch 1 | 3447/ 5600 batches | train loss 0.3807502 +| epoch 1 | 3451/ 5600 batches | train loss 0.4245819 +| epoch 1 | 3455/ 5600 batches | train loss 0.4534436 +| epoch 1 | 3459/ 5600 batches | train loss 0.5431814 +| epoch 1 | 3463/ 5600 batches | train loss 0.4726817 +| epoch 1 | 3467/ 5600 batches | train loss 0.5061088 +| epoch 1 | 3471/ 5600 batches | train loss 0.5175202 +| epoch 1 | 3475/ 5600 batches | train loss 0.5915965 +| epoch 1 | 3479/ 5600 batches | train loss 0.4566636 +| epoch 1 | 3483/ 5600 batches | train loss 0.3716994 +| epoch 1 | 3487/ 5600 batches | train loss 0.5175923 +| epoch 1 | 3491/ 5600 batches | train loss 0.5832630 +| epoch 1 | 3495/ 5600 batches | train loss 0.5123756 +| epoch 1 | 3499/ 5600 batches | train loss 0.5240794 +| epoch 1 | 3503/ 5600 batches | train loss 0.4569184 +| epoch 1 | 3507/ 5600 batches | train loss 0.4873781 +| epoch 1 | 3511/ 5600 batches | train loss 0.4157811 +| epoch 1 | 3515/ 5600 batches | train loss 0.4928941 +| epoch 1 | 3519/ 5600 batches | train loss 0.4861958 +| epoch 1 | 3523/ 5600 batches | train loss 0.5260471 +| epoch 1 | 3527/ 5600 batches | train loss 0.4465203 +| epoch 1 | 3531/ 5600 batches | train loss 0.5064341 +| epoch 1 | 3535/ 5600 batches | train loss 0.4616203 +| epoch 1 | 3539/ 5600 batches | train loss 0.4077640 +| epoch 1 | 3543/ 5600 batches | train loss 0.4613791 +| epoch 1 | 3547/ 5600 batches | train loss 0.4444009 +| epoch 1 | 3551/ 5600 batches | train loss 0.4545948 +| epoch 1 | 3555/ 5600 batches | train loss 0.5004340 +| epoch 1 | 3559/ 5600 batches | train loss 0.4669823 +| epoch 1 | 3563/ 5600 batches | train loss 0.3726542 +| epoch 1 | 3567/ 5600 batches | train loss 0.4139763 +| epoch 1 | 3571/ 5600 batches | train loss 0.4232709 +| epoch 1 | 3575/ 5600 batches | train loss 0.4935820 +| epoch 1 | 3579/ 5600 batches | train loss 0.4040625 +| epoch 1 | 3583/ 5600 batches | train loss 0.4260690 +| epoch 1 | 3587/ 5600 batches | train loss 0.4544157 +| epoch 1 | 3591/ 5600 batches | train loss 0.5603418 +| epoch 1 | 3595/ 5600 batches | train loss 0.4242870 +| epoch 1 | 3599/ 5600 batches | train loss 0.5732625 +| epoch 1 | 3603/ 5600 batches | train loss 0.4037177 +| epoch 1 | 3607/ 5600 batches | train loss 0.4599219 +| epoch 1 | 3611/ 5600 batches | train loss 0.3940868 +| epoch 1 | 3615/ 5600 batches | train loss 0.4087777 +| epoch 1 | 3619/ 5600 batches | train loss 0.5324863 +| epoch 1 | 3623/ 5600 batches | train loss 0.4436680 +| epoch 1 | 3627/ 5600 batches | train loss 0.3706204 +| epoch 1 | 3631/ 5600 batches | train loss 0.3626059 +| epoch 1 | 3635/ 5600 batches | train loss 0.3965943 +| epoch 1 | 3639/ 5600 batches | train loss 0.4177159 +| epoch 1 | 3643/ 5600 batches | train loss 0.4953332 +| epoch 1 | 3647/ 5600 batches | train loss 0.4855038 +| epoch 1 | 3651/ 5600 batches | train loss 0.4454514 +| epoch 1 | 3655/ 5600 batches | train loss 0.4502287 +| epoch 1 | 3659/ 5600 batches | train loss 0.5623422 +| epoch 1 | 3663/ 5600 batches | train loss 0.4016857 +| epoch 1 | 3667/ 5600 batches | train loss 0.4323574 +| epoch 1 | 3671/ 5600 batches | train loss 0.5016009 +| epoch 1 | 3675/ 5600 batches | train loss 0.4959015 +| epoch 1 | 3679/ 5600 batches | train loss 0.4462346 +| epoch 1 | 3683/ 5600 batches | train loss 0.4850987 +| epoch 1 | 3687/ 5600 batches | train loss 0.4600517 +| epoch 1 | 3691/ 5600 batches | train loss 0.3181780 +| epoch 1 | 3695/ 5600 batches | train loss 0.4067253 +| epoch 1 | 3699/ 5600 batches | train loss 0.5123846 +| epoch 1 | 3703/ 5600 batches | train loss 0.4279406 +| epoch 1 | 3707/ 5600 batches | train loss 0.4221320 +| epoch 1 | 3711/ 5600 batches | train loss 0.4323111 +| epoch 1 | 3715/ 5600 batches | train loss 0.5591534 +| epoch 1 | 3719/ 5600 batches | train loss 0.4096780 +| epoch 1 | 3723/ 5600 batches | train loss 0.4597140 +| epoch 1 | 3727/ 5600 batches | train loss 0.4507034 +| epoch 1 | 3731/ 5600 batches | train loss 0.5600961 +| epoch 1 | 3735/ 5600 batches | train loss 0.4116622 +| epoch 1 | 3739/ 5600 batches | train loss 0.3805999 +| epoch 1 | 3743/ 5600 batches | train loss 0.3782209 +| epoch 1 | 3747/ 5600 batches | train loss 0.4406242 +| epoch 1 | 3751/ 5600 batches | train loss 0.3924898 +| epoch 1 | 3755/ 5600 batches | train loss 0.4052637 +| epoch 1 | 3759/ 5600 batches | train loss 0.4927862 +| epoch 1 | 3763/ 5600 batches | train loss 0.4904290 +| epoch 1 | 3767/ 5600 batches | train loss 0.5691465 +| epoch 1 | 3771/ 5600 batches | train loss 0.4659338 +| epoch 1 | 3775/ 5600 batches | train loss 0.5108979 +| epoch 1 | 3779/ 5600 batches | train loss 0.5426710 +| epoch 1 | 3783/ 5600 batches | train loss 0.4071932 +| epoch 1 | 3787/ 5600 batches | train loss 0.4789529 +| epoch 1 | 3791/ 5600 batches | train loss 0.4622443 +| epoch 1 | 3795/ 5600 batches | train loss 0.5286264 +| epoch 1 | 3799/ 5600 batches | train loss 0.5436944 +| epoch 1 | 3803/ 5600 batches | train loss 0.4256968 +| epoch 1 | 3807/ 5600 batches | train loss 0.4946608 +| epoch 1 | 3811/ 5600 batches | train loss 0.4377457 +| epoch 1 | 3815/ 5600 batches | train loss 0.5117986 +| epoch 1 | 3819/ 5600 batches | train loss 0.4147567 +| epoch 1 | 3823/ 5600 batches | train loss 0.3549392 +| epoch 1 | 3827/ 5600 batches | train loss 0.5440488 +| epoch 1 | 3831/ 5600 batches | train loss 0.5124861 +| epoch 1 | 3835/ 5600 batches | train loss 0.4940816 +| epoch 1 | 3839/ 5600 batches | train loss 0.5619411 +| epoch 1 | 3843/ 5600 batches | train loss 0.4481805 +| epoch 1 | 3847/ 5600 batches | train loss 0.4330665 +| epoch 1 | 3851/ 5600 batches | train loss 0.4281029 +| epoch 1 | 3855/ 5600 batches | train loss 0.3794211 +| epoch 1 | 3859/ 5600 batches | train loss 0.5454891 +| epoch 1 | 3863/ 5600 batches | train loss 0.4714652 +| epoch 1 | 3867/ 5600 batches | train loss 0.4583532 +| epoch 1 | 3871/ 5600 batches | train loss 0.4745832 +| epoch 1 | 3875/ 5600 batches | train loss 0.4202687 +| epoch 1 | 3879/ 5600 batches | train loss 0.4740205 +| epoch 1 | 3883/ 5600 batches | train loss 0.4874060 +| epoch 1 | 3887/ 5600 batches | train loss 0.4820097 +| epoch 1 | 3891/ 5600 batches | train loss 0.4497069 +| epoch 1 | 3895/ 5600 batches | train loss 0.4590911 +| epoch 1 | 3899/ 5600 batches | train loss 0.4573157 +| epoch 1 | 3903/ 5600 batches | train loss 0.5389441 +| epoch 1 | 3907/ 5600 batches | train loss 0.4010859 +| epoch 1 | 3911/ 5600 batches | train loss 0.5835031 +| epoch 1 | 3915/ 5600 batches | train loss 0.5115585 +| epoch 1 | 3919/ 5600 batches | train loss 0.4565112 +| epoch 1 | 3923/ 5600 batches | train loss 0.4915624 +| epoch 1 | 3927/ 5600 batches | train loss 0.4098884 +| epoch 1 | 3931/ 5600 batches | train loss 0.4036846 +| epoch 1 | 3935/ 5600 batches | train loss 0.3611696 +| epoch 1 | 3939/ 5600 batches | train loss 0.4026996 +| epoch 1 | 3943/ 5600 batches | train loss 0.3135713 +| epoch 1 | 3947/ 5600 batches | train loss 0.4432410 +| epoch 1 | 3951/ 5600 batches | train loss 0.4869865 +| epoch 1 | 3955/ 5600 batches | train loss 0.5846100 +| epoch 1 | 3959/ 5600 batches | train loss 0.4699513 +| epoch 1 | 3963/ 5600 batches | train loss 0.4779439 +| epoch 1 | 3967/ 5600 batches | train loss 0.4125530 +| epoch 1 | 3971/ 5600 batches | train loss 0.3782312 +| epoch 1 | 3975/ 5600 batches | train loss 0.4633311 +| epoch 1 | 3979/ 5600 batches | train loss 0.5067551 +| epoch 1 | 3983/ 5600 batches | train loss 0.5868401 +| epoch 1 | 3987/ 5600 batches | train loss 0.5583447 +| epoch 1 | 3991/ 5600 batches | train loss 0.4578797 +| epoch 1 | 3995/ 5600 batches | train loss 0.4264232 +| epoch 1 | 3999/ 5600 batches | train loss 0.4999781 +| epoch 1 | 4003/ 5600 batches | train loss 0.4728870 +| epoch 1 | 4007/ 5600 batches | train loss 0.4025570 +| epoch 1 | 4011/ 5600 batches | train loss 0.4251129 +| epoch 1 | 4015/ 5600 batches | train loss 0.5604575 +| epoch 1 | 4019/ 5600 batches | train loss 0.4974589 +| epoch 1 | 4023/ 5600 batches | train loss 0.3950051 +| epoch 1 | 4027/ 5600 batches | train loss 0.3758920 +| epoch 1 | 4031/ 5600 batches | train loss 0.4517400 +| epoch 1 | 4035/ 5600 batches | train loss 0.5561459 +| epoch 1 | 4039/ 5600 batches | train loss 0.5255171 +| epoch 1 | 4043/ 5600 batches | train loss 0.4043436 +| epoch 1 | 4047/ 5600 batches | train loss 0.5180738 +| epoch 1 | 4051/ 5600 batches | train loss 0.3784792 +| epoch 1 | 4055/ 5600 batches | train loss 0.4561926 +| epoch 1 | 4059/ 5600 batches | train loss 0.4881208 +| epoch 1 | 4063/ 5600 batches | train loss 0.4477469 +| epoch 1 | 4067/ 5600 batches | train loss 0.4595560 +| epoch 1 | 4071/ 5600 batches | train loss 0.4775738 +| epoch 1 | 4075/ 5600 batches | train loss 0.4013975 +| epoch 1 | 4079/ 5600 batches | train loss 0.4352792 +| epoch 1 | 4083/ 5600 batches | train loss 0.4493943 +| epoch 1 | 4087/ 5600 batches | train loss 0.5493371 +| epoch 1 | 4091/ 5600 batches | train loss 0.5458015 +| epoch 1 | 4095/ 5600 batches | train loss 0.6575759 +| epoch 1 | 4099/ 5600 batches | train loss 0.5091960 +| epoch 1 | 4103/ 5600 batches | train loss 0.3990905 +| epoch 1 | 4107/ 5600 batches | train loss 0.4523346 +| epoch 1 | 4111/ 5600 batches | train loss 0.4781649 +| epoch 1 | 4115/ 5600 batches | train loss 0.5155532 +| epoch 1 | 4119/ 5600 batches | train loss 0.3988008 +| epoch 1 | 4123/ 5600 batches | train loss 0.4706920 +| epoch 1 | 4127/ 5600 batches | train loss 0.4791014 +| epoch 1 | 4131/ 5600 batches | train loss 0.3667734 +| epoch 1 | 4135/ 5600 batches | train loss 0.4882951 +| epoch 1 | 4139/ 5600 batches | train loss 0.4786378 +| epoch 1 | 4143/ 5600 batches | train loss 0.5282380 +| epoch 1 | 4147/ 5600 batches | train loss 0.4338112 +| epoch 1 | 4151/ 5600 batches | train loss 0.4108990 +| epoch 1 | 4155/ 5600 batches | train loss 0.4816831 +| epoch 1 | 4159/ 5600 batches | train loss 0.4484292 +| epoch 1 | 4163/ 5600 batches | train loss 0.5341735 +| epoch 1 | 4167/ 5600 batches | train loss 0.4059494 +| epoch 1 | 4171/ 5600 batches | train loss 0.4004329 +| epoch 1 | 4175/ 5600 batches | train loss 0.4151503 +| epoch 1 | 4179/ 5600 batches | train loss 0.4941683 +| epoch 1 | 4183/ 5600 batches | train loss 0.3933965 +| epoch 1 | 4187/ 5600 batches | train loss 0.4974484 +| epoch 1 | 4191/ 5600 batches | train loss 0.4080346 +| epoch 1 | 4195/ 5600 batches | train loss 0.4057766 +| epoch 1 | 4199/ 5600 batches | train loss 0.3898595 +| epoch 1 | 4203/ 5600 batches | train loss 0.4906552 +| epoch 1 | 4207/ 5600 batches | train loss 0.4581912 +| epoch 1 | 4211/ 5600 batches | train loss 0.4609284 +| epoch 1 | 4215/ 5600 batches | train loss 0.4638166 +| epoch 1 | 4219/ 5600 batches | train loss 0.4478141 +| epoch 1 | 4223/ 5600 batches | train loss 0.5115995 +| epoch 1 | 4227/ 5600 batches | train loss 0.4052555 +| epoch 1 | 4231/ 5600 batches | train loss 0.5313212 +| epoch 1 | 4235/ 5600 batches | train loss 0.4655433 +| epoch 1 | 4239/ 5600 batches | train loss 0.3730962 +| epoch 1 | 4243/ 5600 batches | train loss 0.4920191 +| epoch 1 | 4247/ 5600 batches | train loss 0.5039712 +| epoch 1 | 4251/ 5600 batches | train loss 0.5265429 +| epoch 1 | 4255/ 5600 batches | train loss 0.5443292 +| epoch 1 | 4259/ 5600 batches | train loss 0.5291620 +| epoch 1 | 4263/ 5600 batches | train loss 0.4455838 +| epoch 1 | 4267/ 5600 batches | train loss 0.4016382 +| epoch 1 | 4271/ 5600 batches | train loss 0.4273507 +| epoch 1 | 4275/ 5600 batches | train loss 0.4160401 +| epoch 1 | 4279/ 5600 batches | train loss 0.3723443 +| epoch 1 | 4283/ 5600 batches | train loss 0.4475563 +| epoch 1 | 4287/ 5600 batches | train loss 0.3817609 +| epoch 1 | 4291/ 5600 batches | train loss 0.4083884 +| epoch 1 | 4295/ 5600 batches | train loss 0.4799844 +| epoch 1 | 4299/ 5600 batches | train loss 0.5295474 +| epoch 1 | 4303/ 5600 batches | train loss 0.4580065 +| epoch 1 | 4307/ 5600 batches | train loss 0.4567660 +| epoch 1 | 4311/ 5600 batches | train loss 0.4299845 +| epoch 1 | 4315/ 5600 batches | train loss 0.4490457 +| epoch 1 | 4319/ 5600 batches | train loss 0.4684587 +| epoch 1 | 4323/ 5600 batches | train loss 0.4098384 +| epoch 1 | 4327/ 5600 batches | train loss 0.4713361 +| epoch 1 | 4331/ 5600 batches | train loss 0.4009778 +| epoch 1 | 4335/ 5600 batches | train loss 0.4104576 +| epoch 1 | 4339/ 5600 batches | train loss 0.4369993 +| epoch 1 | 4343/ 5600 batches | train loss 0.4548393 +| epoch 1 | 4347/ 5600 batches | train loss 0.5732180 +| epoch 1 | 4351/ 5600 batches | train loss 0.3624404 +| epoch 1 | 4355/ 5600 batches | train loss 0.4627702 +| epoch 1 | 4359/ 5600 batches | train loss 0.4708641 +| epoch 1 | 4363/ 5600 batches | train loss 0.4679721 +| epoch 1 | 4367/ 5600 batches | train loss 0.3717743 +| epoch 1 | 4371/ 5600 batches | train loss 0.5289552 +| epoch 1 | 4375/ 5600 batches | train loss 0.4970630 +| epoch 1 | 4379/ 5600 batches | train loss 0.4407494 +| epoch 1 | 4383/ 5600 batches | train loss 0.3606116 +| epoch 1 | 4387/ 5600 batches | train loss 0.3826548 +| epoch 1 | 4391/ 5600 batches | train loss 0.4798526 +| epoch 1 | 4395/ 5600 batches | train loss 0.4273584 +| epoch 1 | 4399/ 5600 batches | train loss 0.4554494 +| epoch 1 | 4403/ 5600 batches | train loss 0.5246119 +| epoch 1 | 4407/ 5600 batches | train loss 0.4091742 +| epoch 1 | 4411/ 5600 batches | train loss 0.4781318 +| epoch 1 | 4415/ 5600 batches | train loss 0.4160026 +| epoch 1 | 4419/ 5600 batches | train loss 0.3671083 +| epoch 1 | 4423/ 5600 batches | train loss 0.4846776 +| epoch 1 | 4427/ 5600 batches | train loss 0.3867391 +| epoch 1 | 4431/ 5600 batches | train loss 0.4301780 +| epoch 1 | 4435/ 5600 batches | train loss 0.5342010 +| epoch 1 | 4439/ 5600 batches | train loss 0.4619882 +| epoch 1 | 4443/ 5600 batches | train loss 0.5138509 +| epoch 1 | 4447/ 5600 batches | train loss 0.3786267 +| epoch 1 | 4451/ 5600 batches | train loss 0.4513839 +| epoch 1 | 4455/ 5600 batches | train loss 0.3589383 +| epoch 1 | 4459/ 5600 batches | train loss 0.5616577 +| epoch 1 | 4463/ 5600 batches | train loss 0.4974181 +| epoch 1 | 4467/ 5600 batches | train loss 0.4966974 +| epoch 1 | 4471/ 5600 batches | train loss 0.4804605 +| epoch 1 | 4475/ 5600 batches | train loss 0.3897859 +| epoch 1 | 4479/ 5600 batches | train loss 0.4770253 +| epoch 1 | 4483/ 5600 batches | train loss 0.3218126 +| epoch 1 | 4487/ 5600 batches | train loss 0.4411565 +| epoch 1 | 4491/ 5600 batches | train loss 0.4372759 +| epoch 1 | 4495/ 5600 batches | train loss 0.3647102 +| epoch 1 | 4499/ 5600 batches | train loss 0.3712603 +| epoch 1 | 4503/ 5600 batches | train loss 0.4815283 +| epoch 1 | 4507/ 5600 batches | train loss 0.4249680 +| epoch 1 | 4511/ 5600 batches | train loss 0.4071704 +| epoch 1 | 4515/ 5600 batches | train loss 0.4971500 +| epoch 1 | 4519/ 5600 batches | train loss 0.4559127 +| epoch 1 | 4523/ 5600 batches | train loss 0.5141855 +| epoch 1 | 4527/ 5600 batches | train loss 0.3644698 +| epoch 1 | 4531/ 5600 batches | train loss 0.4345518 +| epoch 1 | 4535/ 5600 batches | train loss 0.4460751 +| epoch 1 | 4539/ 5600 batches | train loss 0.4582362 +| epoch 1 | 4543/ 5600 batches | train loss 0.4989831 +| epoch 1 | 4547/ 5600 batches | train loss 0.4577681 +| epoch 1 | 4551/ 5600 batches | train loss 0.4860574 +| epoch 1 | 4555/ 5600 batches | train loss 0.4648922 +| epoch 1 | 4559/ 5600 batches | train loss 0.4935458 +| epoch 1 | 4563/ 5600 batches | train loss 0.5097706 +| epoch 1 | 4567/ 5600 batches | train loss 0.4433591 +| epoch 1 | 4571/ 5600 batches | train loss 0.4980274 +| epoch 1 | 4575/ 5600 batches | train loss 0.4490138 +| epoch 1 | 4579/ 5600 batches | train loss 0.4520190 +| epoch 1 | 4583/ 5600 batches | train loss 0.4164645 +| epoch 1 | 4587/ 5600 batches | train loss 0.4497655 +| epoch 1 | 4591/ 5600 batches | train loss 0.4479624 +| epoch 1 | 4595/ 5600 batches | train loss 0.6529557 +| epoch 1 | 4599/ 5600 batches | train loss 0.4685968 +| epoch 1 | 4603/ 5600 batches | train loss 0.3975543 +| epoch 1 | 4607/ 5600 batches | train loss 0.5357361 +| epoch 1 | 4611/ 5600 batches | train loss 0.4184021 +| epoch 1 | 4615/ 5600 batches | train loss 0.3943226 +| epoch 1 | 4619/ 5600 batches | train loss 0.5240097 +| epoch 1 | 4623/ 5600 batches | train loss 0.5557206 +| epoch 1 | 4627/ 5600 batches | train loss 0.4433510 +| epoch 1 | 4631/ 5600 batches | train loss 0.4670643 +| epoch 1 | 4635/ 5600 batches | train loss 0.4921134 +| epoch 1 | 4639/ 5600 batches | train loss 0.4652994 +| epoch 1 | 4643/ 5600 batches | train loss 0.4050732 +| epoch 1 | 4647/ 5600 batches | train loss 0.4968996 +| epoch 1 | 4651/ 5600 batches | train loss 0.3733474 +| epoch 1 | 4655/ 5600 batches | train loss 0.4257248 +| epoch 1 | 4659/ 5600 batches | train loss 0.2976911 +| epoch 1 | 4663/ 5600 batches | train loss 0.4779161 +| epoch 1 | 4667/ 5600 batches | train loss 0.4613543 +| epoch 1 | 4671/ 5600 batches | train loss 0.4684731 +| epoch 1 | 4675/ 5600 batches | train loss 0.4632284 +| epoch 1 | 4679/ 5600 batches | train loss 0.4005275 +| epoch 1 | 4683/ 5600 batches | train loss 0.5007036 +| epoch 1 | 4687/ 5600 batches | train loss 0.4856960 +| epoch 1 | 4691/ 5600 batches | train loss 0.4486930 +| epoch 1 | 4695/ 5600 batches | train loss 0.4574770 +| epoch 1 | 4699/ 5600 batches | train loss 0.4110291 +| epoch 1 | 4703/ 5600 batches | train loss 0.4883224 +| epoch 1 | 4707/ 5600 batches | train loss 0.4216039 +| epoch 1 | 4711/ 5600 batches | train loss 0.4192386 +| epoch 1 | 4715/ 5600 batches | train loss 0.5279648 +| epoch 1 | 4719/ 5600 batches | train loss 0.4438800 +| epoch 1 | 4723/ 5600 batches | train loss 0.5049695 +| epoch 1 | 4727/ 5600 batches | train loss 0.4486070 +| epoch 1 | 4731/ 5600 batches | train loss 0.4852529 +| epoch 1 | 4735/ 5600 batches | train loss 0.2938491 +| epoch 1 | 4739/ 5600 batches | train loss 0.4373445 +| epoch 1 | 4743/ 5600 batches | train loss 0.4355931 +| epoch 1 | 4747/ 5600 batches | train loss 0.4682098 +| epoch 1 | 4751/ 5600 batches | train loss 0.5615485 +| epoch 1 | 4755/ 5600 batches | train loss 0.5080807 +| epoch 1 | 4759/ 5600 batches | train loss 0.4716691 +| epoch 1 | 4763/ 5600 batches | train loss 0.4481652 +| epoch 1 | 4767/ 5600 batches | train loss 0.5252100 +| epoch 1 | 4771/ 5600 batches | train loss 0.5484704 +| epoch 1 | 4775/ 5600 batches | train loss 0.4272976 +| epoch 1 | 4779/ 5600 batches | train loss 0.4542418 +| epoch 1 | 4783/ 5600 batches | train loss 0.3496889 +| epoch 1 | 4787/ 5600 batches | train loss 0.4067416 +| epoch 1 | 4791/ 5600 batches | train loss 0.4434361 +| epoch 1 | 4795/ 5600 batches | train loss 0.4961417 +| epoch 1 | 4799/ 5600 batches | train loss 0.5794912 +| epoch 1 | 4803/ 5600 batches | train loss 0.5029666 +| epoch 1 | 4807/ 5600 batches | train loss 0.4645793 +| epoch 1 | 4811/ 5600 batches | train loss 0.4626864 +| epoch 1 | 4815/ 5600 batches | train loss 0.4807317 +| epoch 1 | 4819/ 5600 batches | train loss 0.4296269 +| epoch 1 | 4823/ 5600 batches | train loss 0.4715279 +| epoch 1 | 4827/ 5600 batches | train loss 0.4325870 +| epoch 1 | 4831/ 5600 batches | train loss 0.5339657 +| epoch 1 | 4835/ 5600 batches | train loss 0.4665034 +| epoch 1 | 4839/ 5600 batches | train loss 0.4365445 +| epoch 1 | 4843/ 5600 batches | train loss 0.4210049 +| epoch 1 | 4847/ 5600 batches | train loss 0.7068779 +| epoch 1 | 4851/ 5600 batches | train loss 0.4449800 +| epoch 1 | 4855/ 5600 batches | train loss 0.3969381 +| epoch 1 | 4859/ 5600 batches | train loss 0.3645958 +| epoch 1 | 4863/ 5600 batches | train loss 0.4957295 +| epoch 1 | 4867/ 5600 batches | train loss 0.4493861 +| epoch 1 | 4871/ 5600 batches | train loss 0.4395373 +| epoch 1 | 4875/ 5600 batches | train loss 0.4477232 +| epoch 1 | 4879/ 5600 batches | train loss 0.5682062 +| epoch 1 | 4883/ 5600 batches | train loss 0.4287879 +| epoch 1 | 4887/ 5600 batches | train loss 0.3653037 +| epoch 1 | 4891/ 5600 batches | train loss 0.4181667 +| epoch 1 | 4895/ 5600 batches | train loss 0.4533248 +| epoch 1 | 4899/ 5600 batches | train loss 0.4980300 +| epoch 1 | 4903/ 5600 batches | train loss 0.4395733 +| epoch 1 | 4907/ 5600 batches | train loss 0.4316941 +| epoch 1 | 4911/ 5600 batches | train loss 0.4593310 +| epoch 1 | 4915/ 5600 batches | train loss 0.3313034 +| epoch 1 | 4919/ 5600 batches | train loss 0.5776969 +| epoch 1 | 4923/ 5600 batches | train loss 0.4816586 +| epoch 1 | 4927/ 5600 batches | train loss 0.3622531 +| epoch 1 | 4931/ 5600 batches | train loss 0.3952461 +| epoch 1 | 4935/ 5600 batches | train loss 0.4088280 +| epoch 1 | 4939/ 5600 batches | train loss 0.5251186 +| epoch 1 | 4943/ 5600 batches | train loss 0.4733342 +| epoch 1 | 4947/ 5600 batches | train loss 0.4928683 +| epoch 1 | 4951/ 5600 batches | train loss 0.4274975 +| epoch 1 | 4955/ 5600 batches | train loss 0.4335735 +| epoch 1 | 4959/ 5600 batches | train loss 0.4119572 +| epoch 1 | 4963/ 5600 batches | train loss 0.4590771 +| epoch 1 | 4967/ 5600 batches | train loss 0.4128127 +| epoch 1 | 4971/ 5600 batches | train loss 0.3939312 +| epoch 1 | 4975/ 5600 batches | train loss 0.5279958 +| epoch 1 | 4979/ 5600 batches | train loss 0.3838157 +| epoch 1 | 4983/ 5600 batches | train loss 0.4542981 +| epoch 1 | 4987/ 5600 batches | train loss 0.5726322 +| epoch 1 | 4991/ 5600 batches | train loss 0.3739221 +| epoch 1 | 4995/ 5600 batches | train loss 0.4036675 +| epoch 1 | 4999/ 5600 batches | train loss 0.4431809 +| epoch 1 | 5003/ 5600 batches | train loss 0.3612355 +| epoch 1 | 5007/ 5600 batches | train loss 0.4987982 +| epoch 1 | 5011/ 5600 batches | train loss 0.4762493 +| epoch 1 | 5015/ 5600 batches | train loss 0.3787966 +| epoch 1 | 5019/ 5600 batches | train loss 0.4230015 +| epoch 1 | 5023/ 5600 batches | train loss 0.5052902 +| epoch 1 | 5027/ 5600 batches | train loss 0.4798607 +| epoch 1 | 5031/ 5600 batches | train loss 0.5699164 +| epoch 1 | 5035/ 5600 batches | train loss 0.4608316 +| epoch 1 | 5039/ 5600 batches | train loss 0.4977309 +| epoch 1 | 5043/ 5600 batches | train loss 0.5008745 +| epoch 1 | 5047/ 5600 batches | train loss 0.4758075 +| epoch 1 | 5051/ 5600 batches | train loss 0.4389161 +| epoch 1 | 5055/ 5600 batches | train loss 0.4411451 +| epoch 1 | 5059/ 5600 batches | train loss 0.4613935 +| epoch 1 | 5063/ 5600 batches | train loss 0.4228776 +| epoch 1 | 5067/ 5600 batches | train loss 0.4915241 +| epoch 1 | 5071/ 5600 batches | train loss 0.3701249 +| epoch 1 | 5075/ 5600 batches | train loss 0.3766943 +| epoch 1 | 5079/ 5600 batches | train loss 0.4346635 +| epoch 1 | 5083/ 5600 batches | train loss 0.5060350 +| epoch 1 | 5087/ 5600 batches | train loss 0.4354095 +| epoch 1 | 5091/ 5600 batches | train loss 0.5336078 +| epoch 1 | 5095/ 5600 batches | train loss 0.5367700 +| epoch 1 | 5099/ 5600 batches | train loss 0.4477816 +| epoch 1 | 5103/ 5600 batches | train loss 0.4476193 +| epoch 1 | 5107/ 5600 batches | train loss 0.4673462 +| epoch 1 | 5111/ 5600 batches | train loss 0.5790390 +| epoch 1 | 5115/ 5600 batches | train loss 0.4869278 +| epoch 1 | 5119/ 5600 batches | train loss 0.4586633 +| epoch 1 | 5123/ 5600 batches | train loss 0.4020240 +| epoch 1 | 5127/ 5600 batches | train loss 0.3865579 +| epoch 1 | 5131/ 5600 batches | train loss 0.4721070 +| epoch 1 | 5135/ 5600 batches | train loss 0.4158720 +| epoch 1 | 5139/ 5600 batches | train loss 0.4397854 +| epoch 1 | 5143/ 5600 batches | train loss 0.4523872 +| epoch 1 | 5147/ 5600 batches | train loss 0.4425131 +| epoch 1 | 5151/ 5600 batches | train loss 0.5579927 +| epoch 1 | 5155/ 5600 batches | train loss 0.4763892 +| epoch 1 | 5159/ 5600 batches | train loss 0.4568165 +| epoch 1 | 5163/ 5600 batches | train loss 0.4310713 +| epoch 1 | 5167/ 5600 batches | train loss 0.4414120 +| epoch 1 | 5171/ 5600 batches | train loss 0.4961094 +| epoch 1 | 5175/ 5600 batches | train loss 0.3303370 +| epoch 1 | 5179/ 5600 batches | train loss 0.4435194 +| epoch 1 | 5183/ 5600 batches | train loss 0.4263199 +| epoch 1 | 5187/ 5600 batches | train loss 0.4950864 +| epoch 1 | 5191/ 5600 batches | train loss 0.4606279 +| epoch 1 | 5195/ 5600 batches | train loss 0.5507064 +| epoch 1 | 5199/ 5600 batches | train loss 0.3652090 +| epoch 1 | 5203/ 5600 batches | train loss 0.3767374 +| epoch 1 | 5207/ 5600 batches | train loss 0.4080671 +| epoch 1 | 5211/ 5600 batches | train loss 0.4117194 +| epoch 1 | 5215/ 5600 batches | train loss 0.3926224 +| epoch 1 | 5219/ 5600 batches | train loss 0.5070274 +| epoch 1 | 5223/ 5600 batches | train loss 0.4860566 +| epoch 1 | 5227/ 5600 batches | train loss 0.2496394 +| epoch 1 | 5231/ 5600 batches | train loss 0.4323878 +| epoch 1 | 5235/ 5600 batches | train loss 0.4774867 +| epoch 1 | 5239/ 5600 batches | train loss 0.4456257 +| epoch 1 | 5243/ 5600 batches | train loss 0.5059770 +| epoch 1 | 5247/ 5600 batches | train loss 0.4236316 +| epoch 1 | 5251/ 5600 batches | train loss 0.5169114 +| epoch 1 | 5255/ 5600 batches | train loss 0.3903289 +| epoch 1 | 5259/ 5600 batches | train loss 0.4390333 +| epoch 1 | 5263/ 5600 batches | train loss 0.4294921 +| epoch 1 | 5267/ 5600 batches | train loss 0.3907861 +| epoch 1 | 5271/ 5600 batches | train loss 0.5153983 +| epoch 1 | 5275/ 5600 batches | train loss 0.4704050 +| epoch 1 | 5279/ 5600 batches | train loss 0.4501371 +| epoch 1 | 5283/ 5600 batches | train loss 0.4532477 +| epoch 1 | 5287/ 5600 batches | train loss 0.5412799 +| epoch 1 | 5291/ 5600 batches | train loss 0.4242664 +| epoch 1 | 5295/ 5600 batches | train loss 0.4433115 +| epoch 1 | 5299/ 5600 batches | train loss 0.4409230 +| epoch 1 | 5303/ 5600 batches | train loss 0.4539884 +| epoch 1 | 5307/ 5600 batches | train loss 0.3028637 +| epoch 1 | 5311/ 5600 batches | train loss 0.5573090 +| epoch 1 | 5315/ 5600 batches | train loss 0.4747325 +| epoch 1 | 5319/ 5600 batches | train loss 0.4083256 +| epoch 1 | 5323/ 5600 batches | train loss 0.4602559 +| epoch 1 | 5327/ 5600 batches | train loss 0.4484693 +| epoch 1 | 5331/ 5600 batches | train loss 0.4400347 +| epoch 1 | 5335/ 5600 batches | train loss 0.4974102 +| epoch 1 | 5339/ 5600 batches | train loss 0.3742090 +| epoch 1 | 5343/ 5600 batches | train loss 0.4439768 +| epoch 1 | 5347/ 5600 batches | train loss 0.4929139 +| epoch 1 | 5351/ 5600 batches | train loss 0.4909332 +| epoch 1 | 5355/ 5600 batches | train loss 0.4531844 +| epoch 1 | 5359/ 5600 batches | train loss 0.5243430 +| epoch 1 | 5363/ 5600 batches | train loss 0.4395741 +| epoch 1 | 5367/ 5600 batches | train loss 0.5426828 +| epoch 1 | 5371/ 5600 batches | train loss 0.4158657 +| epoch 1 | 5375/ 5600 batches | train loss 0.4725955 +| epoch 1 | 5379/ 5600 batches | train loss 0.4587605 +| epoch 1 | 5383/ 5600 batches | train loss 0.3444564 +| epoch 1 | 5387/ 5600 batches | train loss 0.4476353 +| epoch 1 | 5391/ 5600 batches | train loss 0.4874334 +| epoch 1 | 5395/ 5600 batches | train loss 0.6339544 +| epoch 1 | 5399/ 5600 batches | train loss 0.6009269 +| epoch 1 | 5403/ 5600 batches | train loss 0.4358736 +| epoch 1 | 5407/ 5600 batches | train loss 0.4170212 +| epoch 1 | 5411/ 5600 batches | train loss 0.4755042 +| epoch 1 | 5415/ 5600 batches | train loss 0.3963555 +| epoch 1 | 5419/ 5600 batches | train loss 0.3884752 +| epoch 1 | 5423/ 5600 batches | train loss 0.5566435 +| epoch 1 | 5427/ 5600 batches | train loss 0.3479128 +| epoch 1 | 5431/ 5600 batches | train loss 0.4803753 +| epoch 1 | 5435/ 5600 batches | train loss 0.4096144 +| epoch 1 | 5439/ 5600 batches | train loss 0.4149459 +| epoch 1 | 5443/ 5600 batches | train loss 0.4864314 +| epoch 1 | 5447/ 5600 batches | train loss 0.4693924 +| epoch 1 | 5451/ 5600 batches | train loss 0.4998794 +| epoch 1 | 5455/ 5600 batches | train loss 0.3403507 +| epoch 1 | 5459/ 5600 batches | train loss 0.5138746 +| epoch 1 | 5463/ 5600 batches | train loss 0.4796081 +| epoch 1 | 5467/ 5600 batches | train loss 0.3776376 +| epoch 1 | 5471/ 5600 batches | train loss 0.5520729 +| epoch 1 | 5475/ 5600 batches | train loss 0.5265374 +| epoch 1 | 5479/ 5600 batches | train loss 0.4244580 +| epoch 1 | 5483/ 5600 batches | train loss 0.4969416 +| epoch 1 | 5487/ 5600 batches | train loss 0.4861909 +| epoch 1 | 5491/ 5600 batches | train loss 0.4120762 +| epoch 1 | 5495/ 5600 batches | train loss 0.4822299 +| epoch 1 | 5499/ 5600 batches | train loss 0.3212591 +| epoch 1 | 5503/ 5600 batches | train loss 0.4504560 +| epoch 1 | 5507/ 5600 batches | train loss 0.5434288 +| epoch 1 | 5511/ 5600 batches | train loss 0.4913944 +| epoch 1 | 5515/ 5600 batches | train loss 0.4104462 +| epoch 1 | 5519/ 5600 batches | train loss 0.4176293 +| epoch 1 | 5523/ 5600 batches | train loss 0.4535906 +| epoch 1 | 5527/ 5600 batches | train loss 0.4843036 +| epoch 1 | 5531/ 5600 batches | train loss 0.4037657 +| epoch 1 | 5535/ 5600 batches | train loss 0.3547840 +| epoch 1 | 5539/ 5600 batches | train loss 0.4578703 +| epoch 1 | 5543/ 5600 batches | train loss 0.4975147 +| epoch 1 | 5547/ 5600 batches | train loss 0.3566615 +| epoch 1 | 5551/ 5600 batches | train loss 0.4916636 +| epoch 1 | 5555/ 5600 batches | train loss 0.4023015 +| epoch 1 | 5559/ 5600 batches | train loss 0.4914725 +| epoch 1 | 5563/ 5600 batches | train loss 0.4274288 +| epoch 1 | 5567/ 5600 batches | train loss 0.4750485 +| epoch 1 | 5571/ 5600 batches | train loss 0.1989989 +| epoch 1 | 5575/ 5600 batches | train loss 0.4739618 +| epoch 1 | 5579/ 5600 batches | train loss 0.4966640 +| epoch 1 | 5583/ 5600 batches | train loss 0.3777391 +| epoch 1 | 5587/ 5600 batches | train loss 0.3624712 +| epoch 1 | 5591/ 5600 batches | train loss 0.5133488 +| epoch 1 | 5595/ 5600 batches | train loss 0.4425669 +| epoch 1 | 5599/ 5600 batches | train loss 0.5239753 +-------------------------------------------------------------------------------- +| epoch 1 | 3/ 5600 batches | test loss 0.3589638 +| epoch 1 | 7/ 5600 batches | test loss 0.5320301 +| epoch 1 | 11/ 5600 batches | test loss 0.5077624 +| epoch 1 | 15/ 5600 batches | test loss 0.4545498 +| epoch 1 | 19/ 5600 batches | test loss 0.4857149 +| epoch 1 | 23/ 5600 batches | test loss 0.4922410 +| epoch 1 | 27/ 5600 batches | test loss 0.4114775 +| epoch 1 | 31/ 5600 batches | test loss 0.4660002 +| epoch 1 | 35/ 5600 batches | test loss 0.4897549 +| epoch 1 | 39/ 5600 batches | test loss 0.4256029 +| epoch 1 | 43/ 5600 batches | test loss 0.4954878 +| epoch 1 | 47/ 5600 batches | test loss 0.4294465 +| epoch 1 | 51/ 5600 batches | test loss 0.3985560 +| epoch 1 | 55/ 5600 batches | test loss 0.3810490 +| epoch 1 | 59/ 5600 batches | test loss 0.3835219 +| epoch 1 | 63/ 5600 batches | test loss 0.5340390 +| epoch 1 | 67/ 5600 batches | test loss 0.4643973 +| epoch 1 | 71/ 5600 batches | test loss 0.4357888 +| epoch 1 | 75/ 5600 batches | test loss 0.4105633 +| epoch 1 | 79/ 5600 batches | test loss 0.3913731 +| epoch 1 | 83/ 5600 batches | test loss 0.4456025 +| epoch 1 | 87/ 5600 batches | test loss 0.5103353 +| epoch 1 | 91/ 5600 batches | test loss 0.5516756 +| epoch 1 | 95/ 5600 batches | test loss 0.4144306 +| epoch 1 | 99/ 5600 batches | test loss 0.4598699 +| epoch 1 | 103/ 5600 batches | test loss 0.4000327 +| epoch 1 | 107/ 5600 batches | test loss 0.4769800 +| epoch 1 | 111/ 5600 batches | test loss 0.5916522 +| epoch 1 | 115/ 5600 batches | test loss 0.4265314 +| epoch 1 | 119/ 5600 batches | test loss 0.4222083 +| epoch 1 | 123/ 5600 batches | test loss 0.4943322 +| epoch 1 | 127/ 5600 batches | test loss 0.5744495 +| epoch 1 | 131/ 5600 batches | test loss 0.3550383 +| epoch 1 | 135/ 5600 batches | test loss 0.5186418 +| epoch 1 | 139/ 5600 batches | test loss 0.3590609 +| epoch 1 | 143/ 5600 batches | test loss 0.4702617 +| epoch 1 | 147/ 5600 batches | test loss 0.4402144 +| epoch 1 | 151/ 5600 batches | test loss 0.4842389 +| epoch 1 | 155/ 5600 batches | test loss 0.5673769 +| epoch 1 | 159/ 5600 batches | test loss 0.4919772 +| epoch 1 | 163/ 5600 batches | test loss 0.3530230 +| epoch 1 | 167/ 5600 batches | test loss 0.4087288 +| epoch 1 | 171/ 5600 batches | test loss 0.4227593 +| epoch 1 | 175/ 5600 batches | test loss 0.4760435 +| epoch 1 | 179/ 5600 batches | test loss 0.6274503 +| epoch 1 | 183/ 5600 batches | test loss 0.4580104 +| epoch 1 | 187/ 5600 batches | test loss 0.4393489 +| epoch 1 | 191/ 5600 batches | test loss 0.4383540 +| epoch 1 | 195/ 5600 batches | test loss 0.4196538 +| epoch 1 | 199/ 5600 batches | test loss 0.4539073 +| epoch 1 | 203/ 5600 batches | test loss 0.4384711 +| epoch 1 | 207/ 5600 batches | test loss 0.3630072 +| epoch 1 | 211/ 5600 batches | test loss 0.4690647 +| epoch 1 | 215/ 5600 batches | test loss 0.4310908 +| epoch 1 | 219/ 5600 batches | test loss 0.4393053 +| epoch 1 | 223/ 5600 batches | test loss 0.3976923 +| epoch 1 | 227/ 5600 batches | test loss 0.4167389 +| epoch 1 | 231/ 5600 batches | test loss 0.4413556 +| epoch 1 | 235/ 5600 batches | test loss 0.3426162 +| epoch 1 | 239/ 5600 batches | test loss 0.4392090 +| epoch 1 | 243/ 5600 batches | test loss 0.4416491 +| epoch 1 | 247/ 5600 batches | test loss 0.4390508 +| epoch 1 | 251/ 5600 batches | test loss 0.4140347 +| epoch 1 | 255/ 5600 batches | test loss 0.4355788 +| epoch 1 | 259/ 5600 batches | test loss 0.3802955 +| epoch 1 | 263/ 5600 batches | test loss 0.4063205 +| epoch 1 | 267/ 5600 batches | test loss 0.4678237 +| epoch 1 | 271/ 5600 batches | test loss 0.4826372 +| epoch 1 | 275/ 5600 batches | test loss 0.4524527 +| epoch 1 | 279/ 5600 batches | test loss 0.4308304 +| epoch 1 | 283/ 5600 batches | test loss 0.3855456 +| epoch 1 | 287/ 5600 batches | test loss 0.5155973 +| epoch 1 | 291/ 5600 batches | test loss 0.4824814 +| epoch 1 | 295/ 5600 batches | test loss 0.4906886 +| epoch 1 | 299/ 5600 batches | test loss 0.3546321 +| epoch 1 | 303/ 5600 batches | test loss 0.3958063 +| epoch 1 | 307/ 5600 batches | test loss 0.4975410 +| epoch 1 | 311/ 5600 batches | test loss 0.4622622 +| epoch 1 | 315/ 5600 batches | test loss 0.4191197 +| epoch 1 | 319/ 5600 batches | test loss 0.4473246 +| epoch 1 | 323/ 5600 batches | test loss 0.4612281 +| epoch 1 | 327/ 5600 batches | test loss 0.5098811 +| epoch 1 | 331/ 5600 batches | test loss 0.5506583 +| epoch 1 | 335/ 5600 batches | test loss 0.4530545 +| epoch 1 | 339/ 5600 batches | test loss 0.4293340 +| epoch 1 | 343/ 5600 batches | test loss 0.3454933 +| epoch 1 | 347/ 5600 batches | test loss 0.4415148 +| epoch 1 | 351/ 5600 batches | test loss 0.3766122 +| epoch 1 | 355/ 5600 batches | test loss 0.4202463 +| epoch 1 | 359/ 5600 batches | test loss 0.4307471 +| epoch 1 | 363/ 5600 batches | test loss 0.4885827 +| epoch 1 | 367/ 5600 batches | test loss 0.3758846 +| epoch 1 | 371/ 5600 batches | test loss 0.4825094 +| epoch 1 | 375/ 5600 batches | test loss 0.4096917 +| epoch 1 | 379/ 5600 batches | test loss 0.4422837 +| epoch 1 | 383/ 5600 batches | test loss 0.4248131 +| epoch 1 | 387/ 5600 batches | test loss 0.4752894 +| epoch 1 | 391/ 5600 batches | test loss 0.5014507 +| epoch 1 | 395/ 5600 batches | test loss 0.6129328 +| epoch 1 | 399/ 5600 batches | test loss 0.4698400 +| epoch 1 | 403/ 5600 batches | test loss 0.4915234 +| epoch 1 | 407/ 5600 batches | test loss 0.4847627 +| epoch 1 | 411/ 5600 batches | test loss 0.4964734 +| epoch 1 | 415/ 5600 batches | test loss 0.3773123 +| epoch 1 | 419/ 5600 batches | test loss 0.5528975 +| epoch 1 | 423/ 5600 batches | test loss 0.4461805 +| epoch 1 | 427/ 5600 batches | test loss 0.4144683 +| epoch 1 | 431/ 5600 batches | test loss 0.5241271 +| epoch 1 | 435/ 5600 batches | test loss 0.4230504 +| epoch 1 | 439/ 5600 batches | test loss 0.4683666 +| epoch 1 | 443/ 5600 batches | test loss 0.4938009 +| epoch 1 | 447/ 5600 batches | test loss 0.4947018 +| epoch 1 | 451/ 5600 batches | test loss 0.3913531 +| epoch 1 | 455/ 5600 batches | test loss 0.4320431 +| epoch 1 | 459/ 5600 batches | test loss 0.6036540 +| epoch 1 | 463/ 5600 batches | test loss 0.4218634 +| epoch 1 | 467/ 5600 batches | test loss 0.4076650 +| epoch 1 | 471/ 5600 batches | test loss 0.4998973 +| epoch 1 | 475/ 5600 batches | test loss 0.5305476 +| epoch 1 | 479/ 5600 batches | test loss 0.4787334 +| epoch 1 | 483/ 5600 batches | test loss 0.5284542 +| epoch 1 | 487/ 5600 batches | test loss 0.4141140 +| epoch 1 | 491/ 5600 batches | test loss 0.5452546 +| epoch 1 | 495/ 5600 batches | test loss 0.5013431 +| epoch 1 | 499/ 5600 batches | test loss 0.5102515 +| epoch 1 | 503/ 5600 batches | test loss 0.5257640 +| epoch 1 | 507/ 5600 batches | test loss 0.4471633 +| epoch 1 | 511/ 5600 batches | test loss 0.5259604 +| epoch 1 | 515/ 5600 batches | test loss 0.4734818 +| epoch 1 | 519/ 5600 batches | test loss 0.3920024 +| epoch 1 | 523/ 5600 batches | test loss 0.4308228 +| epoch 1 | 527/ 5600 batches | test loss 0.4735607 +| epoch 1 | 531/ 5600 batches | test loss 0.5882099 +| epoch 1 | 535/ 5600 batches | test loss 0.4420986 +| epoch 1 | 539/ 5600 batches | test loss 0.3484914 +| epoch 1 | 543/ 5600 batches | test loss 0.4931806 +| epoch 1 | 547/ 5600 batches | test loss 0.4308915 +| epoch 1 | 551/ 5600 batches | test loss 0.4783983 +| epoch 1 | 555/ 5600 batches | test loss 0.5414230 +| epoch 1 | 559/ 5600 batches | test loss 0.4654374 +| epoch 1 | 563/ 5600 batches | test loss 0.3842484 +| epoch 1 | 567/ 5600 batches | test loss 0.3223549 +| epoch 1 | 571/ 5600 batches | test loss 0.4002300 +| epoch 1 | 575/ 5600 batches | test loss 0.4549326 +| epoch 1 | 579/ 5600 batches | test loss 0.4696395 +| epoch 1 | 583/ 5600 batches | test loss 0.4225959 +| epoch 1 | 587/ 5600 batches | test loss 0.4496831 +| epoch 1 | 591/ 5600 batches | test loss 0.4488894 +| epoch 1 | 595/ 5600 batches | test loss 0.4862735 +| epoch 1 | 599/ 5600 batches | test loss 0.4190828 +| epoch 1 | 603/ 5600 batches | test loss 0.5111748 +| epoch 1 | 607/ 5600 batches | test loss 0.6039680 +| epoch 1 | 611/ 5600 batches | test loss 0.4186269 +| epoch 1 | 615/ 5600 batches | test loss 0.4422587 +| epoch 1 | 619/ 5600 batches | test loss 0.5679865 +| epoch 1 | 623/ 5600 batches | test loss 0.4130694 +| epoch 1 | 627/ 5600 batches | test loss 0.4609624 +| epoch 1 | 631/ 5600 batches | test loss 0.4475721 +| epoch 1 | 635/ 5600 batches | test loss 0.4423103 +| epoch 1 | 639/ 5600 batches | test loss 0.4653710 +| epoch 1 | 643/ 5600 batches | test loss 0.4236086 +| epoch 1 | 647/ 5600 batches | test loss 0.3716019 +| epoch 1 | 651/ 5600 batches | test loss 0.4316435 +| epoch 1 | 655/ 5600 batches | test loss 0.3247678 +| epoch 1 | 659/ 5600 batches | test loss 0.4584864 +| epoch 1 | 663/ 5600 batches | test loss 0.4560969 +| epoch 1 | 667/ 5600 batches | test loss 0.4749911 +| epoch 1 | 671/ 5600 batches | test loss 0.4036824 +| epoch 1 | 675/ 5600 batches | test loss 0.4037977 +| epoch 1 | 679/ 5600 batches | test loss 0.4529690 +| epoch 1 | 683/ 5600 batches | test loss 0.4623174 +| epoch 1 | 687/ 5600 batches | test loss 0.4637274 +| epoch 1 | 691/ 5600 batches | test loss 0.5270911 +| epoch 1 | 695/ 5600 batches | test loss 0.4137880 +| epoch 1 | 699/ 5600 batches | test loss 0.3760954 +| epoch 1 | 703/ 5600 batches | test loss 0.4035886 +| epoch 1 | 707/ 5600 batches | test loss 0.5118624 +| epoch 1 | 711/ 5600 batches | test loss 0.4494839 +| epoch 1 | 715/ 5600 batches | test loss 0.4084706 +| epoch 1 | 719/ 5600 batches | test loss 0.3600673 +| epoch 1 | 723/ 5600 batches | test loss 0.4594440 +| epoch 1 | 727/ 5600 batches | test loss 0.4934294 +| epoch 1 | 731/ 5600 batches | test loss 0.4030160 +| epoch 1 | 735/ 5600 batches | test loss 0.5994792 +| epoch 1 | 739/ 5600 batches | test loss 0.4446592 +| epoch 1 | 743/ 5600 batches | test loss 0.4614400 +| epoch 1 | 747/ 5600 batches | test loss 0.5082977 +| epoch 1 | 751/ 5600 batches | test loss 0.4506242 +| epoch 1 | 755/ 5600 batches | test loss 0.4237062 +| epoch 1 | 759/ 5600 batches | test loss 0.4625037 +| epoch 1 | 763/ 5600 batches | test loss 0.3907027 +| epoch 1 | 767/ 5600 batches | test loss 0.5344875 +| epoch 1 | 771/ 5600 batches | test loss 0.3657672 +| epoch 1 | 775/ 5600 batches | test loss 0.4495515 +| epoch 1 | 779/ 5600 batches | test loss 0.4314572 +| epoch 1 | 783/ 5600 batches | test loss 0.4594222 +| epoch 1 | 787/ 5600 batches | test loss 0.5702634 +| epoch 1 | 791/ 5600 batches | test loss 0.4417397 +| epoch 1 | 795/ 5600 batches | test loss 0.4358131 +| epoch 1 | 799/ 5600 batches | test loss 0.3604627 +| epoch 1 | 803/ 5600 batches | test loss 0.5260155 +| epoch 1 | 807/ 5600 batches | test loss 0.3967003 +| epoch 1 | 811/ 5600 batches | test loss 0.3868913 +| epoch 1 | 815/ 5600 batches | test loss 0.4175267 +| epoch 1 | 819/ 5600 batches | test loss 0.4706128 +| epoch 1 | 823/ 5600 batches | test loss 0.4306137 +| epoch 1 | 827/ 5600 batches | test loss 0.3932520 +| epoch 1 | 831/ 5600 batches | test loss 0.4522684 +| epoch 1 | 835/ 5600 batches | test loss 0.4706475 +| epoch 1 | 839/ 5600 batches | test loss 0.4487240 +| epoch 1 | 843/ 5600 batches | test loss 0.3925402 +| epoch 1 | 847/ 5600 batches | test loss 0.4544481 +| epoch 1 | 851/ 5600 batches | test loss 0.3619694 +| epoch 1 | 855/ 5600 batches | test loss 0.4510379 +| epoch 1 | 859/ 5600 batches | test loss 0.4384314 +| epoch 1 | 863/ 5600 batches | test loss 0.4756667 +| epoch 1 | 867/ 5600 batches | test loss 0.4520389 +| epoch 1 | 871/ 5600 batches | test loss 0.4002359 +| epoch 1 | 875/ 5600 batches | test loss 0.4693992 +| epoch 1 | 879/ 5600 batches | test loss 0.4861874 +| epoch 1 | 883/ 5600 batches | test loss 0.4199278 +| epoch 1 | 887/ 5600 batches | test loss 0.5755597 +| epoch 1 | 891/ 5600 batches | test loss 0.4136172 +| epoch 1 | 895/ 5600 batches | test loss 0.4717745 +| epoch 1 | 899/ 5600 batches | test loss 0.3976801 +| epoch 1 | 903/ 5600 batches | test loss 0.4906713 +| epoch 1 | 907/ 5600 batches | test loss 0.3889432 +| epoch 1 | 911/ 5600 batches | test loss 0.4560101 +| epoch 1 | 915/ 5600 batches | test loss 0.4444095 +| epoch 1 | 919/ 5600 batches | test loss 0.3893464 +| epoch 1 | 923/ 5600 batches | test loss 0.4563906 +| epoch 1 | 927/ 5600 batches | test loss 0.4665183 +| epoch 1 | 931/ 5600 batches | test loss 0.5156422 +| epoch 1 | 935/ 5600 batches | test loss 0.3755454 +| epoch 1 | 939/ 5600 batches | test loss 0.3986037 +| epoch 1 | 943/ 5600 batches | test loss 0.4417096 +| epoch 1 | 947/ 5600 batches | test loss 0.5233095 +| epoch 1 | 951/ 5600 batches | test loss 0.4344600 +| epoch 1 | 955/ 5600 batches | test loss 0.4176419 +| epoch 1 | 959/ 5600 batches | test loss 0.3758560 +| epoch 1 | 963/ 5600 batches | test loss 0.5562584 +| epoch 1 | 967/ 5600 batches | test loss 0.4799763 +| epoch 1 | 971/ 5600 batches | test loss 0.4769807 +| epoch 1 | 975/ 5600 batches | test loss 0.4750175 +| epoch 1 | 979/ 5600 batches | test loss 0.4629908 +| epoch 1 | 983/ 5600 batches | test loss 0.4580837 +| epoch 1 | 987/ 5600 batches | test loss 0.4162859 +| epoch 1 | 991/ 5600 batches | test loss 0.4932163 +| epoch 1 | 995/ 5600 batches | test loss 0.4320902 +| epoch 1 | 999/ 5600 batches | test loss 0.4834851 +| epoch 1 | 1003/ 5600 batches | test loss 0.4632693 +| epoch 1 | 1007/ 5600 batches | test loss 0.4710650 +| epoch 1 | 1011/ 5600 batches | test loss 0.4223771 +| epoch 1 | 1015/ 5600 batches | test loss 0.3930092 +| epoch 1 | 1019/ 5600 batches | test loss 0.4687476 +| epoch 1 | 1023/ 5600 batches | test loss 0.4261357 +| epoch 1 | 1027/ 5600 batches | test loss 0.4201626 +| epoch 1 | 1031/ 5600 batches | test loss 0.4200612 +| epoch 1 | 1035/ 5600 batches | test loss 0.3804434 +| epoch 1 | 1039/ 5600 batches | test loss 0.4376196 +| epoch 1 | 1043/ 5600 batches | test loss 0.4339779 +| epoch 1 | 1047/ 5600 batches | test loss 0.4468268 +| epoch 1 | 1051/ 5600 batches | test loss 0.4299601 +| epoch 1 | 1055/ 5600 batches | test loss 0.4141706 +| epoch 1 | 1059/ 5600 batches | test loss 0.4249856 +| epoch 1 | 1063/ 5600 batches | test loss 0.4199311 +| epoch 1 | 1067/ 5600 batches | test loss 0.4524697 +| epoch 1 | 1071/ 5600 batches | test loss 0.5285364 +| epoch 1 | 1075/ 5600 batches | test loss 0.4203615 +| epoch 1 | 1079/ 5600 batches | test loss 0.5182711 +| epoch 1 | 1083/ 5600 batches | test loss 0.5016956 +| epoch 1 | 1087/ 5600 batches | test loss 0.4195539 +| epoch 1 | 1091/ 5600 batches | test loss 0.4619899 +| epoch 1 | 1095/ 5600 batches | test loss 0.4251023 +| epoch 1 | 1099/ 5600 batches | test loss 0.4953088 +| epoch 1 | 1103/ 5600 batches | test loss 0.4116303 +| epoch 1 | 1107/ 5600 batches | test loss 0.6428229 +| epoch 1 | 1111/ 5600 batches | test loss 0.4631975 +| epoch 1 | 1115/ 5600 batches | test loss 0.4227533 +| epoch 1 | 1119/ 5600 batches | test loss 0.3544821 +| epoch 1 | 1123/ 5600 batches | test loss 0.5095030 +| epoch 1 | 1127/ 5600 batches | test loss 0.4785182 +| epoch 1 | 1131/ 5600 batches | test loss 0.3940267 +| epoch 1 | 1135/ 5600 batches | test loss 0.5863269 +| epoch 1 | 1139/ 5600 batches | test loss 0.4701074 +| epoch 1 | 1143/ 5600 batches | test loss 0.5105058 +| epoch 1 | 1147/ 5600 batches | test loss 0.4289939 +| epoch 1 | 1151/ 5600 batches | test loss 0.4268473 +| epoch 1 | 1155/ 5600 batches | test loss 0.4008879 +| epoch 1 | 1159/ 5600 batches | test loss 0.4382528 +| epoch 1 | 1163/ 5600 batches | test loss 0.4834513 +| epoch 1 | 1167/ 5600 batches | test loss 0.4469658 +| epoch 1 | 1171/ 5600 batches | test loss 0.4448043 +| epoch 1 | 1175/ 5600 batches | test loss 0.4726873 +| epoch 1 | 1179/ 5600 batches | test loss 0.4737349 +| epoch 1 | 1183/ 5600 batches | test loss 0.3691912 +| epoch 1 | 1187/ 5600 batches | test loss 0.4466630 +| epoch 1 | 1191/ 5600 batches | test loss 0.4826936 +| epoch 1 | 1195/ 5600 batches | test loss 0.4853628 +| epoch 1 | 1199/ 5600 batches | test loss 0.5492260 +| epoch 1 | 1203/ 5600 batches | test loss 0.4935898 +| epoch 1 | 1207/ 5600 batches | test loss 0.4755994 +| epoch 1 | 1211/ 5600 batches | test loss 0.4065962 +| epoch 1 | 1215/ 5600 batches | test loss 0.4733995 +| epoch 1 | 1219/ 5600 batches | test loss 0.3982493 +| epoch 1 | 1223/ 5600 batches | test loss 0.4188954 +| epoch 1 | 1227/ 5600 batches | test loss 0.4160486 +| epoch 1 | 1231/ 5600 batches | test loss 0.4337700 +| epoch 1 | 1235/ 5600 batches | test loss 0.4496860 +| epoch 1 | 1239/ 5600 batches | test loss 0.3716748 +| epoch 1 | 1243/ 5600 batches | test loss 0.4991866 +| epoch 1 | 1247/ 5600 batches | test loss 0.4818730 +| epoch 1 | 1251/ 5600 batches | test loss 0.3525522 +| epoch 1 | 1255/ 5600 batches | test loss 0.3975428 +| epoch 1 | 1259/ 5600 batches | test loss 0.4574915 +| epoch 1 | 1263/ 5600 batches | test loss 0.4249626 +| epoch 1 | 1267/ 5600 batches | test loss 0.4743510 +| epoch 1 | 1271/ 5600 batches | test loss 0.5323205 +| epoch 1 | 1275/ 5600 batches | test loss 0.4724395 +| epoch 1 | 1279/ 5600 batches | test loss 0.4413972 +| epoch 1 | 1283/ 5600 batches | test loss 0.5005608 +| epoch 1 | 1287/ 5600 batches | test loss 0.4078408 +| epoch 1 | 1291/ 5600 batches | test loss 0.4778053 +| epoch 1 | 1295/ 5600 batches | test loss 0.4338089 +| epoch 1 | 1299/ 5600 batches | test loss 0.4431087 +| epoch 1 | 1303/ 5600 batches | test loss 0.4629383 +| epoch 1 | 1307/ 5600 batches | test loss 0.4166628 +| epoch 1 | 1311/ 5600 batches | test loss 0.3618328 +| epoch 1 | 1315/ 5600 batches | test loss 0.4919462 +| epoch 1 | 1319/ 5600 batches | test loss 0.4671009 +| epoch 1 | 1323/ 5600 batches | test loss 0.4854103 +| epoch 1 | 1327/ 5600 batches | test loss 0.3874090 +| epoch 1 | 1331/ 5600 batches | test loss 0.4488173 +| epoch 1 | 1335/ 5600 batches | test loss 0.4231798 +| epoch 1 | 1339/ 5600 batches | test loss 0.4413936 +| epoch 1 | 1343/ 5600 batches | test loss 0.4633380 +| epoch 1 | 1347/ 5600 batches | test loss 0.5428691 +| epoch 1 | 1351/ 5600 batches | test loss 0.4874304 +| epoch 1 | 1355/ 5600 batches | test loss 0.5034043 +| epoch 1 | 1359/ 5600 batches | test loss 0.5187913 +| epoch 1 | 1363/ 5600 batches | test loss 0.5066669 +| epoch 1 | 1367/ 5600 batches | test loss 0.4545139 +| epoch 1 | 1371/ 5600 batches | test loss 0.4718704 +| epoch 1 | 1375/ 5600 batches | test loss 0.4080411 +| epoch 1 | 1379/ 5600 batches | test loss 0.4261133 +| epoch 1 | 1383/ 5600 batches | test loss 0.5262358 +| epoch 1 | 1387/ 5600 batches | test loss 0.4479322 +| epoch 1 | 1391/ 5600 batches | test loss 0.4310757 +| epoch 1 | 1395/ 5600 batches | test loss 0.4676135 +| epoch 1 | 1399/ 5600 batches | test loss 0.4120118 +| epoch 1 | final test loss 0.4553, save model! +-------------------------------------------------------------------------------- +| epoch 2 | 3/ 5600 batches | train loss 0.4846484 +| epoch 2 | 7/ 5600 batches | train loss 0.4191690 +| epoch 2 | 11/ 5600 batches | train loss 0.4079680 +| epoch 2 | 15/ 5600 batches | train loss 0.4244682 +| epoch 2 | 19/ 5600 batches | train loss 0.4322517 +| epoch 2 | 23/ 5600 batches | train loss 0.3049103 +| epoch 2 | 27/ 5600 batches | train loss 0.4787770 +| epoch 2 | 31/ 5600 batches | train loss 0.4819290 +| epoch 2 | 35/ 5600 batches | train loss 0.4135132 +| epoch 2 | 39/ 5600 batches | train loss 0.4792550 +| epoch 2 | 43/ 5600 batches | train loss 0.4459303 +| epoch 2 | 47/ 5600 batches | train loss 0.4764750 +| epoch 2 | 51/ 5600 batches | train loss 0.4321368 +| epoch 2 | 55/ 5600 batches | train loss 0.4587001 +| epoch 2 | 59/ 5600 batches | train loss 0.4385103 +| epoch 2 | 63/ 5600 batches | train loss 0.4343700 +| epoch 2 | 67/ 5600 batches | train loss 0.4094712 +| epoch 2 | 71/ 5600 batches | train loss 0.5201941 +| epoch 2 | 75/ 5600 batches | train loss 0.4942226 +| epoch 2 | 79/ 5600 batches | train loss 0.4035313 +| epoch 2 | 83/ 5600 batches | train loss 0.5237136 +| epoch 2 | 87/ 5600 batches | train loss 0.4425309 +| epoch 2 | 91/ 5600 batches | train loss 0.4369817 +| epoch 2 | 95/ 5600 batches | train loss 0.4372985 +| epoch 2 | 99/ 5600 batches | train loss 0.4978276 +| epoch 2 | 103/ 5600 batches | train loss 0.3869736 +| epoch 2 | 107/ 5600 batches | train loss 0.3747490 +| epoch 2 | 111/ 5600 batches | train loss 0.4229427 +| epoch 2 | 115/ 5600 batches | train loss 0.4181295 +| epoch 2 | 119/ 5600 batches | train loss 0.5363687 +| epoch 2 | 123/ 5600 batches | train loss 0.4342864 +| epoch 2 | 127/ 5600 batches | train loss 0.4468445 +| epoch 2 | 131/ 5600 batches | train loss 0.5033332 +| epoch 2 | 135/ 5600 batches | train loss 0.4816266 +| epoch 2 | 139/ 5600 batches | train loss 0.4096197 +| epoch 2 | 143/ 5600 batches | train loss 0.3901384 +| epoch 2 | 147/ 5600 batches | train loss 0.4696940 +| epoch 2 | 151/ 5600 batches | train loss 0.4980974 +| epoch 2 | 155/ 5600 batches | train loss 0.4611664 +| epoch 2 | 159/ 5600 batches | train loss 0.4355647 +| epoch 2 | 163/ 5600 batches | train loss 0.4514006 +| epoch 2 | 167/ 5600 batches | train loss 0.4044395 +| epoch 2 | 171/ 5600 batches | train loss 0.2290988 +| epoch 2 | 175/ 5600 batches | train loss 0.4238434 +| epoch 2 | 179/ 5600 batches | train loss 0.4320198 +| epoch 2 | 183/ 5600 batches | train loss 0.4807873 +| epoch 2 | 187/ 5600 batches | train loss 0.4358228 +| epoch 2 | 191/ 5600 batches | train loss 0.3950376 +| epoch 2 | 195/ 5600 batches | train loss 0.3719571 +| epoch 2 | 199/ 5600 batches | train loss 0.4471626 +| epoch 2 | 203/ 5600 batches | train loss 0.4384392 +| epoch 2 | 207/ 5600 batches | train loss 0.4212983 +| epoch 2 | 211/ 5600 batches | train loss 0.4802123 +| epoch 2 | 215/ 5600 batches | train loss 0.4859935 +| epoch 2 | 219/ 5600 batches | train loss 0.3336306 +| epoch 2 | 223/ 5600 batches | train loss 0.4344713 +| epoch 2 | 227/ 5600 batches | train loss 0.4379914 +| epoch 2 | 231/ 5600 batches | train loss 0.4274454 +| epoch 2 | 235/ 5600 batches | train loss 0.4693549 +| epoch 2 | 239/ 5600 batches | train loss 0.4814959 +| epoch 2 | 243/ 5600 batches | train loss 0.5082788 +| epoch 2 | 247/ 5600 batches | train loss 0.4696269 +| epoch 2 | 251/ 5600 batches | train loss 0.4277343 +| epoch 2 | 255/ 5600 batches | train loss 0.4404748 +| epoch 2 | 259/ 5600 batches | train loss 0.5002291 +| epoch 2 | 263/ 5600 batches | train loss 0.4874319 +| epoch 2 | 267/ 5600 batches | train loss 0.3762930 +| epoch 2 | 271/ 5600 batches | train loss 0.3471134 +| epoch 2 | 275/ 5600 batches | train loss 0.3590412 +| epoch 2 | 279/ 5600 batches | train loss 0.4154926 +| epoch 2 | 283/ 5600 batches | train loss 0.4733720 +| epoch 2 | 287/ 5600 batches | train loss 0.4317936 +| epoch 2 | 291/ 5600 batches | train loss 0.4434558 +| epoch 2 | 295/ 5600 batches | train loss 0.3658080 +| epoch 2 | 299/ 5600 batches | train loss 0.4138424 +| epoch 2 | 303/ 5600 batches | train loss 0.4246335 +| epoch 2 | 307/ 5600 batches | train loss 0.4226875 +| epoch 2 | 311/ 5600 batches | train loss 0.4432138 +| epoch 2 | 315/ 5600 batches | train loss 0.4086466 +| epoch 2 | 319/ 5600 batches | train loss 0.4230721 +| epoch 2 | 323/ 5600 batches | train loss 0.4275479 +| epoch 2 | 327/ 5600 batches | train loss 0.2239931 +| epoch 2 | 331/ 5600 batches | train loss 0.4934379 +| epoch 2 | 335/ 5600 batches | train loss 0.5564194 +| epoch 2 | 339/ 5600 batches | train loss 0.5922652 +| epoch 2 | 343/ 5600 batches | train loss 0.4770417 +| epoch 2 | 347/ 5600 batches | train loss 0.4314607 +| epoch 2 | 351/ 5600 batches | train loss 0.4437340 +| epoch 2 | 355/ 5600 batches | train loss 0.4146106 +| epoch 2 | 359/ 5600 batches | train loss 0.4350353 +| epoch 2 | 363/ 5600 batches | train loss 0.4555013 +| epoch 2 | 367/ 5600 batches | train loss 0.4317573 +| epoch 2 | 371/ 5600 batches | train loss 0.4249922 +| epoch 2 | 375/ 5600 batches | train loss 0.3742818 +| epoch 2 | 379/ 5600 batches | train loss 0.3852099 +| epoch 2 | 383/ 5600 batches | train loss 0.5106187 +| epoch 2 | 387/ 5600 batches | train loss 0.5152810 +| epoch 2 | 391/ 5600 batches | train loss 0.5027899 +| epoch 2 | 395/ 5600 batches | train loss 0.4452178 +| epoch 2 | 399/ 5600 batches | train loss 0.4350626 +| epoch 2 | 403/ 5600 batches | train loss 0.3606626 +| epoch 2 | 407/ 5600 batches | train loss 0.5402102 +| epoch 2 | 411/ 5600 batches | train loss 0.5315328 +| epoch 2 | 415/ 5600 batches | train loss 0.4573919 +| epoch 2 | 419/ 5600 batches | train loss 0.4256342 +| epoch 2 | 423/ 5600 batches | train loss 0.4781439 +| epoch 2 | 427/ 5600 batches | train loss 0.4905997 +| epoch 2 | 431/ 5600 batches | train loss 0.4388587 +| epoch 2 | 435/ 5600 batches | train loss 0.3483134 +| epoch 2 | 439/ 5600 batches | train loss 0.5063794 +| epoch 2 | 443/ 5600 batches | train loss 0.5265402 +| epoch 2 | 447/ 5600 batches | train loss 0.5590013 +| epoch 2 | 451/ 5600 batches | train loss 0.3815675 +| epoch 2 | 455/ 5600 batches | train loss 0.4043280 +| epoch 2 | 459/ 5600 batches | train loss 0.3859677 +| epoch 2 | 463/ 5600 batches | train loss 0.4428357 +| epoch 2 | 467/ 5600 batches | train loss 0.4290812 +| epoch 2 | 471/ 5600 batches | train loss 0.4493035 +| epoch 2 | 475/ 5600 batches | train loss 0.3875586 +| epoch 2 | 479/ 5600 batches | train loss 0.3989415 +| epoch 2 | 483/ 5600 batches | train loss 0.4694184 +| epoch 2 | 487/ 5600 batches | train loss 0.4215510 +| epoch 2 | 491/ 5600 batches | train loss 0.3316494 +| epoch 2 | 495/ 5600 batches | train loss 0.4660629 +| epoch 2 | 499/ 5600 batches | train loss 0.4678427 +| epoch 2 | 503/ 5600 batches | train loss 0.4407607 +| epoch 2 | 507/ 5600 batches | train loss 0.4469196 +| epoch 2 | 511/ 5600 batches | train loss 0.3252557 +| epoch 2 | 515/ 5600 batches | train loss 0.4381378 +| epoch 2 | 519/ 5600 batches | train loss 0.4675310 +| epoch 2 | 523/ 5600 batches | train loss 0.5025362 +| epoch 2 | 527/ 5600 batches | train loss 0.4359919 +| epoch 2 | 531/ 5600 batches | train loss 0.6392128 +| epoch 2 | 535/ 5600 batches | train loss 0.4443228 +| epoch 2 | 539/ 5600 batches | train loss 0.4602851 +| epoch 2 | 543/ 5600 batches | train loss 0.4182498 +| epoch 2 | 547/ 5600 batches | train loss 0.4264001 +| epoch 2 | 551/ 5600 batches | train loss 0.3903968 +| epoch 2 | 555/ 5600 batches | train loss 0.4065478 +| epoch 2 | 559/ 5600 batches | train loss 0.4305822 +| epoch 2 | 563/ 5600 batches | train loss 0.4256210 +| epoch 2 | 567/ 5600 batches | train loss 0.3980613 +| epoch 2 | 571/ 5600 batches | train loss 0.3790136 +| epoch 2 | 575/ 5600 batches | train loss 0.4079875 +| epoch 2 | 579/ 5600 batches | train loss 0.4218259 +| epoch 2 | 583/ 5600 batches | train loss 0.4417951 +| epoch 2 | 587/ 5600 batches | train loss 0.4343023 +| epoch 2 | 591/ 5600 batches | train loss 0.3915465 +| epoch 2 | 595/ 5600 batches | train loss 0.5393430 +| epoch 2 | 599/ 5600 batches | train loss 0.3817525 +| epoch 2 | 603/ 5600 batches | train loss 0.4773142 +| epoch 2 | 607/ 5600 batches | train loss 0.3416865 +| epoch 2 | 611/ 5600 batches | train loss 0.5333049 +| epoch 2 | 615/ 5600 batches | train loss 0.4449773 +| epoch 2 | 619/ 5600 batches | train loss 0.3848871 +| epoch 2 | 623/ 5600 batches | train loss 0.4352831 +| epoch 2 | 627/ 5600 batches | train loss 0.4903785 +| epoch 2 | 631/ 5600 batches | train loss 0.4474512 +| epoch 2 | 635/ 5600 batches | train loss 0.4190648 +| epoch 2 | 639/ 5600 batches | train loss 0.4440147 +| epoch 2 | 643/ 5600 batches | train loss 0.3972835 +| epoch 2 | 647/ 5600 batches | train loss 0.4268103 +| epoch 2 | 651/ 5600 batches | train loss 0.4698091 +| epoch 2 | 655/ 5600 batches | train loss 0.5208402 +| epoch 2 | 659/ 5600 batches | train loss 0.4987274 +| epoch 2 | 663/ 5600 batches | train loss 0.4130062 +| epoch 2 | 667/ 5600 batches | train loss 0.4013564 +| epoch 2 | 671/ 5600 batches | train loss 0.4359338 +| epoch 2 | 675/ 5600 batches | train loss 0.4796363 +| epoch 2 | 679/ 5600 batches | train loss 0.4012409 +| epoch 2 | 683/ 5600 batches | train loss 0.4155603 +| epoch 2 | 687/ 5600 batches | train loss 0.4270967 +| epoch 2 | 691/ 5600 batches | train loss 0.5034668 +| epoch 2 | 695/ 5600 batches | train loss 0.4960203 +| epoch 2 | 699/ 5600 batches | train loss 0.3694646 +| epoch 2 | 703/ 5600 batches | train loss 0.3785395 +| epoch 2 | 707/ 5600 batches | train loss 0.6295956 +| epoch 2 | 711/ 5600 batches | train loss 0.4581585 +| epoch 2 | 715/ 5600 batches | train loss 0.4164565 +| epoch 2 | 719/ 5600 batches | train loss 0.5102288 +| epoch 2 | 723/ 5600 batches | train loss 0.5842186 +| epoch 2 | 727/ 5600 batches | train loss 0.5269750 +| epoch 2 | 731/ 5600 batches | train loss 0.3818576 +| epoch 2 | 735/ 5600 batches | train loss 0.4639589 +| epoch 2 | 739/ 5600 batches | train loss 0.4774276 +| epoch 2 | 743/ 5600 batches | train loss 0.4775080 +| epoch 2 | 747/ 5600 batches | train loss 0.4681529 +| epoch 2 | 751/ 5600 batches | train loss 0.4718078 +| epoch 2 | 755/ 5600 batches | train loss 0.4441060 +| epoch 2 | 759/ 5600 batches | train loss 0.4460973 +| epoch 2 | 763/ 5600 batches | train loss 0.3780971 +| epoch 2 | 767/ 5600 batches | train loss 0.3819444 +| epoch 2 | 771/ 5600 batches | train loss 0.4142941 +| epoch 2 | 775/ 5600 batches | train loss 0.4715832 +| epoch 2 | 779/ 5600 batches | train loss 0.5112577 +| epoch 2 | 783/ 5600 batches | train loss 0.5650378 +| epoch 2 | 787/ 5600 batches | train loss 0.3373156 +| epoch 2 | 791/ 5600 batches | train loss 0.4155394 +| epoch 2 | 795/ 5600 batches | train loss 0.4149629 +| epoch 2 | 799/ 5600 batches | train loss 0.4375308 +| epoch 2 | 803/ 5600 batches | train loss 0.5040358 +| epoch 2 | 807/ 5600 batches | train loss 0.4853462 +| epoch 2 | 811/ 5600 batches | train loss 0.4041343 +| epoch 2 | 815/ 5600 batches | train loss 0.4876938 +| epoch 2 | 819/ 5600 batches | train loss 0.5361857 +| epoch 2 | 823/ 5600 batches | train loss 0.4256513 +| epoch 2 | 827/ 5600 batches | train loss 0.3330273 +| epoch 2 | 831/ 5600 batches | train loss 0.5859827 +| epoch 2 | 835/ 5600 batches | train loss 0.4699035 +| epoch 2 | 839/ 5600 batches | train loss 0.4370855 +| epoch 2 | 843/ 5600 batches | train loss 0.4225487 +| epoch 2 | 847/ 5600 batches | train loss 0.4594650 +| epoch 2 | 851/ 5600 batches | train loss 0.5100259 +| epoch 2 | 855/ 5600 batches | train loss 0.4503745 +| epoch 2 | 859/ 5600 batches | train loss 0.4047478 +| epoch 2 | 863/ 5600 batches | train loss 0.3950934 +| epoch 2 | 867/ 5600 batches | train loss 0.5021706 +| epoch 2 | 871/ 5600 batches | train loss 0.5445446 +| epoch 2 | 875/ 5600 batches | train loss 0.4963531 +| epoch 2 | 879/ 5600 batches | train loss 0.4763684 +| epoch 2 | 883/ 5600 batches | train loss 0.3598472 +| epoch 2 | 887/ 5600 batches | train loss 0.3534974 +| epoch 2 | 891/ 5600 batches | train loss 0.3472953 +| epoch 2 | 895/ 5600 batches | train loss 0.3351049 +| epoch 2 | 899/ 5600 batches | train loss 0.3977386 +| epoch 2 | 903/ 5600 batches | train loss 0.4313419 +| epoch 2 | 907/ 5600 batches | train loss 0.4469215 +| epoch 2 | 911/ 5600 batches | train loss 0.4066998 +| epoch 2 | 915/ 5600 batches | train loss 0.4699729 +| epoch 2 | 919/ 5600 batches | train loss 0.4133019 +| epoch 2 | 923/ 5600 batches | train loss 0.4949882 +| epoch 2 | 927/ 5600 batches | train loss 0.4033630 +| epoch 2 | 931/ 5600 batches | train loss 0.4201933 +| epoch 2 | 935/ 5600 batches | train loss 0.5437998 +| epoch 2 | 939/ 5600 batches | train loss 0.4410938 +| epoch 2 | 943/ 5600 batches | train loss 0.4320232 +| epoch 2 | 947/ 5600 batches | train loss 0.4649370 +| epoch 2 | 951/ 5600 batches | train loss 0.4450359 +| epoch 2 | 955/ 5600 batches | train loss 0.4970894 +| epoch 2 | 959/ 5600 batches | train loss 0.4610034 +| epoch 2 | 963/ 5600 batches | train loss 0.4603431 +| epoch 2 | 967/ 5600 batches | train loss 0.5359393 +| epoch 2 | 971/ 5600 batches | train loss 0.4138923 +| epoch 2 | 975/ 5600 batches | train loss 0.3628987 +| epoch 2 | 979/ 5600 batches | train loss 0.5093364 +| epoch 2 | 983/ 5600 batches | train loss 0.4502822 +| epoch 2 | 987/ 5600 batches | train loss 0.3882629 +| epoch 2 | 991/ 5600 batches | train loss 0.5343184 +| epoch 2 | 995/ 5600 batches | train loss 0.4197324 +| epoch 2 | 999/ 5600 batches | train loss 0.4806952 +| epoch 2 | 1003/ 5600 batches | train loss 0.4304427 +| epoch 2 | 1007/ 5600 batches | train loss 0.3604002 +| epoch 2 | 1011/ 5600 batches | train loss 0.4255323 +| epoch 2 | 1015/ 5600 batches | train loss 0.4994697 +| epoch 2 | 1019/ 5600 batches | train loss 0.4437109 +| epoch 2 | 1023/ 5600 batches | train loss 0.4593057 +| epoch 2 | 1027/ 5600 batches | train loss 0.4075318 +| epoch 2 | 1031/ 5600 batches | train loss 0.4222036 +| epoch 2 | 1035/ 5600 batches | train loss 0.4928293 +| epoch 2 | 1039/ 5600 batches | train loss 0.4189730 +| epoch 2 | 1043/ 5600 batches | train loss 0.3366627 +| epoch 2 | 1047/ 5600 batches | train loss 0.4904363 +| epoch 2 | 1051/ 5600 batches | train loss 0.4442919 +| epoch 2 | 1055/ 5600 batches | train loss 0.4831916 +| epoch 2 | 1059/ 5600 batches | train loss 0.3972204 +| epoch 2 | 1063/ 5600 batches | train loss 0.4435898 +| epoch 2 | 1067/ 5600 batches | train loss 0.4172654 +| epoch 2 | 1071/ 5600 batches | train loss 0.3751013 +| epoch 2 | 1075/ 5600 batches | train loss 0.4152104 +| epoch 2 | 1079/ 5600 batches | train loss 0.4448097 +| epoch 2 | 1083/ 5600 batches | train loss 0.5249475 +| epoch 2 | 1087/ 5600 batches | train loss 0.4034808 +| epoch 2 | 1091/ 5600 batches | train loss 0.5792423 +| epoch 2 | 1095/ 5600 batches | train loss 0.4274521 +| epoch 2 | 1099/ 5600 batches | train loss 0.3947657 +| epoch 2 | 1103/ 5600 batches | train loss 0.3896069 +| epoch 2 | 1107/ 5600 batches | train loss 0.5771289 +| epoch 2 | 1111/ 5600 batches | train loss 0.5036529 +| epoch 2 | 1115/ 5600 batches | train loss 0.4211364 +| epoch 2 | 1119/ 5600 batches | train loss 0.4267647 +| epoch 2 | 1123/ 5600 batches | train loss 0.4477659 +| epoch 2 | 1127/ 5600 batches | train loss 0.3623011 +| epoch 2 | 1131/ 5600 batches | train loss 0.4907534 +| epoch 2 | 1135/ 5600 batches | train loss 0.5141941 +| epoch 2 | 1139/ 5600 batches | train loss 0.4930991 +| epoch 2 | 1143/ 5600 batches | train loss 0.3538750 +| epoch 2 | 1147/ 5600 batches | train loss 0.4933242 +| epoch 2 | 1151/ 5600 batches | train loss 0.4805239 +| epoch 2 | 1155/ 5600 batches | train loss 0.5020774 +| epoch 2 | 1159/ 5600 batches | train loss 0.4381992 +| epoch 2 | 1163/ 5600 batches | train loss 0.4730418 +| epoch 2 | 1167/ 5600 batches | train loss 0.4193426 +| epoch 2 | 1171/ 5600 batches | train loss 0.3788349 +| epoch 2 | 1175/ 5600 batches | train loss 0.5590099 +| epoch 2 | 1179/ 5600 batches | train loss 0.4104763 +| epoch 2 | 1183/ 5600 batches | train loss 0.3986649 +| epoch 2 | 1187/ 5600 batches | train loss 0.4721454 +| epoch 2 | 1191/ 5600 batches | train loss 0.3788531 +| epoch 2 | 1195/ 5600 batches | train loss 0.4328271 +| epoch 2 | 1199/ 5600 batches | train loss 0.4719958 +| epoch 2 | 1203/ 5600 batches | train loss 0.5335300 +| epoch 2 | 1207/ 5600 batches | train loss 0.4768466 +| epoch 2 | 1211/ 5600 batches | train loss 0.4696224 +| epoch 2 | 1215/ 5600 batches | train loss 0.5303708 +| epoch 2 | 1219/ 5600 batches | train loss 0.4067073 +| epoch 2 | 1223/ 5600 batches | train loss 0.6096076 +| epoch 2 | 1227/ 5600 batches | train loss 0.5392537 +| epoch 2 | 1231/ 5600 batches | train loss 0.5084476 +| epoch 2 | 1235/ 5600 batches | train loss 0.4960791 +| epoch 2 | 1239/ 5600 batches | train loss 0.4335454 +| epoch 2 | 1243/ 5600 batches | train loss 0.4030571 +| epoch 2 | 1247/ 5600 batches | train loss 0.3356515 +| epoch 2 | 1251/ 5600 batches | train loss 0.4378895 +| epoch 2 | 1255/ 5600 batches | train loss 0.4544784 +| epoch 2 | 1259/ 5600 batches | train loss 0.4549943 +| epoch 2 | 1263/ 5600 batches | train loss 0.3869049 +| epoch 2 | 1267/ 5600 batches | train loss 0.4477110 +| epoch 2 | 1271/ 5600 batches | train loss 0.4496360 +| epoch 2 | 1275/ 5600 batches | train loss 0.5608750 +| epoch 2 | 1279/ 5600 batches | train loss 0.3892668 +| epoch 2 | 1283/ 5600 batches | train loss 0.3369813 +| epoch 2 | 1287/ 5600 batches | train loss 0.5007370 +| epoch 2 | 1291/ 5600 batches | train loss 0.3585670 +| epoch 2 | 1295/ 5600 batches | train loss 0.5307053 +| epoch 2 | 1299/ 5600 batches | train loss 0.4416847 +| epoch 2 | 1303/ 5600 batches | train loss 0.4760743 +| epoch 2 | 1307/ 5600 batches | train loss 0.4729204 +| epoch 2 | 1311/ 5600 batches | train loss 0.4197509 +| epoch 2 | 1315/ 5600 batches | train loss 0.4311893 +| epoch 2 | 1319/ 5600 batches | train loss 0.3714786 +| epoch 2 | 1323/ 5600 batches | train loss 0.4038488 +| epoch 2 | 1327/ 5600 batches | train loss 0.4209963 +| epoch 2 | 1331/ 5600 batches | train loss 0.3419066 +| epoch 2 | 1335/ 5600 batches | train loss 0.3483582 +| epoch 2 | 1339/ 5600 batches | train loss 0.4721019 +| epoch 2 | 1343/ 5600 batches | train loss 0.5210156 +| epoch 2 | 1347/ 5600 batches | train loss 0.4555019 +| epoch 2 | 1351/ 5600 batches | train loss 0.4945139 +| epoch 2 | 1355/ 5600 batches | train loss 0.4686832 +| epoch 2 | 1359/ 5600 batches | train loss 0.4020048 +| epoch 2 | 1363/ 5600 batches | train loss 0.3762025 +| epoch 2 | 1367/ 5600 batches | train loss 0.4684259 +| epoch 2 | 1371/ 5600 batches | train loss 0.4100624 +| epoch 2 | 1375/ 5600 batches | train loss 0.3854966 +| epoch 2 | 1379/ 5600 batches | train loss 0.4200141 +| epoch 2 | 1383/ 5600 batches | train loss 0.5074292 +| epoch 2 | 1387/ 5600 batches | train loss 0.4862003 +| epoch 2 | 1391/ 5600 batches | train loss 0.4822383 +| epoch 2 | 1395/ 5600 batches | train loss 0.4116879 +| epoch 2 | 1399/ 5600 batches | train loss 0.4807755 +| epoch 2 | 1403/ 5600 batches | train loss 0.4049657 +| epoch 2 | 1407/ 5600 batches | train loss 0.3760501 +| epoch 2 | 1411/ 5600 batches | train loss 0.3868255 +| epoch 2 | 1415/ 5600 batches | train loss 0.4707948 +| epoch 2 | 1419/ 5600 batches | train loss 0.4593213 +| epoch 2 | 1423/ 5600 batches | train loss 0.3284283 +| epoch 2 | 1427/ 5600 batches | train loss 0.4007240 +| epoch 2 | 1431/ 5600 batches | train loss 0.4425900 +| epoch 2 | 1435/ 5600 batches | train loss 0.4947866 +| epoch 2 | 1439/ 5600 batches | train loss 0.5471636 +| epoch 2 | 1443/ 5600 batches | train loss 0.4191665 +| epoch 2 | 1447/ 5600 batches | train loss 0.4269090 +| epoch 2 | 1451/ 5600 batches | train loss 0.4552841 +| epoch 2 | 1455/ 5600 batches | train loss 0.4904104 +| epoch 2 | 1459/ 5600 batches | train loss 0.4522267 +| epoch 2 | 1463/ 5600 batches | train loss 0.5477582 +| epoch 2 | 1467/ 5600 batches | train loss 0.4758688 +| epoch 2 | 1471/ 5600 batches | train loss 0.3670383 +| epoch 2 | 1475/ 5600 batches | train loss 0.4201589 +| epoch 2 | 1479/ 5600 batches | train loss 0.4143091 +| epoch 2 | 1483/ 5600 batches | train loss 0.5106770 +| epoch 2 | 1487/ 5600 batches | train loss 0.4691057 +| epoch 2 | 1491/ 5600 batches | train loss 0.5087088 +| epoch 2 | 1495/ 5600 batches | train loss 0.4403791 +| epoch 2 | 1499/ 5600 batches | train loss 0.4343850 +| epoch 2 | 1503/ 5600 batches | train loss 0.4580504 +| epoch 2 | 1507/ 5600 batches | train loss 0.5072972 +| epoch 2 | 1511/ 5600 batches | train loss 0.5024029 +| epoch 2 | 1515/ 5600 batches | train loss 0.4776098 +| epoch 2 | 1519/ 5600 batches | train loss 0.5515192 +| epoch 2 | 1523/ 5600 batches | train loss 0.4530214 +| epoch 2 | 1527/ 5600 batches | train loss 0.5007383 +| epoch 2 | 1531/ 5600 batches | train loss 0.4676723 +| epoch 2 | 1535/ 5600 batches | train loss 0.4719065 +| epoch 2 | 1539/ 5600 batches | train loss 0.3801130 +| epoch 2 | 1543/ 5600 batches | train loss 0.3886684 +| epoch 2 | 1547/ 5600 batches | train loss 0.4533329 +| epoch 2 | 1551/ 5600 batches | train loss 0.3640497 +| epoch 2 | 1555/ 5600 batches | train loss 0.4894974 +| epoch 2 | 1559/ 5600 batches | train loss 0.4540650 +| epoch 2 | 1563/ 5600 batches | train loss 0.4642456 +| epoch 2 | 1567/ 5600 batches | train loss 0.5693934 +| epoch 2 | 1571/ 5600 batches | train loss 0.3716733 +| epoch 2 | 1575/ 5600 batches | train loss 0.3807311 +| epoch 2 | 1579/ 5600 batches | train loss 0.3522666 +| epoch 2 | 1583/ 5600 batches | train loss 0.4336077 +| epoch 2 | 1587/ 5600 batches | train loss 0.4056845 +| epoch 2 | 1591/ 5600 batches | train loss 0.3529910 +| epoch 2 | 1595/ 5600 batches | train loss 0.3599088 +| epoch 2 | 1599/ 5600 batches | train loss 0.4745892 +| epoch 2 | 1603/ 5600 batches | train loss 0.5399377 +| epoch 2 | 1607/ 5600 batches | train loss 0.5260139 +| epoch 2 | 1611/ 5600 batches | train loss 0.3839997 +| epoch 2 | 1615/ 5600 batches | train loss 0.5471377 +| epoch 2 | 1619/ 5600 batches | train loss 0.5087878 +| epoch 2 | 1623/ 5600 batches | train loss 0.4419333 +| epoch 2 | 1627/ 5600 batches | train loss 0.4174722 +| epoch 2 | 1631/ 5600 batches | train loss 0.3954589 +| epoch 2 | 1635/ 5600 batches | train loss 0.4029144 +| epoch 2 | 1639/ 5600 batches | train loss 0.4555044 +| epoch 2 | 1643/ 5600 batches | train loss 0.4505544 +| epoch 2 | 1647/ 5600 batches | train loss 0.5182030 +| epoch 2 | 1651/ 5600 batches | train loss 0.6344712 +| epoch 2 | 1655/ 5600 batches | train loss 0.4870257 +| epoch 2 | 1659/ 5600 batches | train loss 0.3850893 +| epoch 2 | 1663/ 5600 batches | train loss 0.4107230 +| epoch 2 | 1667/ 5600 batches | train loss 0.5660569 +| epoch 2 | 1671/ 5600 batches | train loss 0.4192564 +| epoch 2 | 1675/ 5600 batches | train loss 0.5484267 +| epoch 2 | 1679/ 5600 batches | train loss 0.3604199 +| epoch 2 | 1683/ 5600 batches | train loss 0.3999890 +| epoch 2 | 1687/ 5600 batches | train loss 0.4463496 +| epoch 2 | 1691/ 5600 batches | train loss 0.4592643 +| epoch 2 | 1695/ 5600 batches | train loss 0.4125516 +| epoch 2 | 1699/ 5600 batches | train loss 0.5088005 +| epoch 2 | 1703/ 5600 batches | train loss 0.4261331 +| epoch 2 | 1707/ 5600 batches | train loss 0.3752467 +| epoch 2 | 1711/ 5600 batches | train loss 0.4440436 +| epoch 2 | 1715/ 5600 batches | train loss 0.3493464 +| epoch 2 | 1719/ 5600 batches | train loss 0.4482471 +| epoch 2 | 1723/ 5600 batches | train loss 0.3679597 +| epoch 2 | 1727/ 5600 batches | train loss 0.4191869 +| epoch 2 | 1731/ 5600 batches | train loss 0.4715227 +| epoch 2 | 1735/ 5600 batches | train loss 0.4326045 +| epoch 2 | 1739/ 5600 batches | train loss 0.5229341 +| epoch 2 | 1743/ 5600 batches | train loss 0.5125259 +| epoch 2 | 1747/ 5600 batches | train loss 0.4308843 +| epoch 2 | 1751/ 5600 batches | train loss 0.3329382 +| epoch 2 | 1755/ 5600 batches | train loss 0.4249846 +| epoch 2 | 1759/ 5600 batches | train loss 0.4573545 +| epoch 2 | 1763/ 5600 batches | train loss 0.4919050 +| epoch 2 | 1767/ 5600 batches | train loss 0.4419567 +| epoch 2 | 1771/ 5600 batches | train loss 0.4578049 +| epoch 2 | 1775/ 5600 batches | train loss 0.4607700 +| epoch 2 | 1779/ 5600 batches | train loss 0.4605191 +| epoch 2 | 1783/ 5600 batches | train loss 0.3912696 +| epoch 2 | 1787/ 5600 batches | train loss 0.4704836 +| epoch 2 | 1791/ 5600 batches | train loss 0.4366079 +| epoch 2 | 1795/ 5600 batches | train loss 0.4172873 +| epoch 2 | 1799/ 5600 batches | train loss 0.4537373 +| epoch 2 | 1803/ 5600 batches | train loss 0.4563973 +| epoch 2 | 1807/ 5600 batches | train loss 0.5043658 +| epoch 2 | 1811/ 5600 batches | train loss 0.4142904 +| epoch 2 | 1815/ 5600 batches | train loss 0.4615920 +| epoch 2 | 1819/ 5600 batches | train loss 0.5099746 +| epoch 2 | 1823/ 5600 batches | train loss 0.3306162 +| epoch 2 | 1827/ 5600 batches | train loss 0.3612092 +| epoch 2 | 1831/ 5600 batches | train loss 0.5121068 +| epoch 2 | 1835/ 5600 batches | train loss 0.4482830 +| epoch 2 | 1839/ 5600 batches | train loss 0.3741663 +| epoch 2 | 1843/ 5600 batches | train loss 0.3769265 +| epoch 2 | 1847/ 5600 batches | train loss 0.4685669 +| epoch 2 | 1851/ 5600 batches | train loss 0.3821176 +| epoch 2 | 1855/ 5600 batches | train loss 0.4950376 +| epoch 2 | 1859/ 5600 batches | train loss 0.4954115 +| epoch 2 | 1863/ 5600 batches | train loss 0.4681551 +| epoch 2 | 1867/ 5600 batches | train loss 0.4556492 +| epoch 2 | 1871/ 5600 batches | train loss 0.4047847 +| epoch 2 | 1875/ 5600 batches | train loss 0.4137031 +| epoch 2 | 1879/ 5600 batches | train loss 0.4133909 +| epoch 2 | 1883/ 5600 batches | train loss 0.4112104 +| epoch 2 | 1887/ 5600 batches | train loss 0.4573481 +| epoch 2 | 1891/ 5600 batches | train loss 0.5269957 +| epoch 2 | 1895/ 5600 batches | train loss 0.4412948 +| epoch 2 | 1899/ 5600 batches | train loss 0.3812800 +| epoch 2 | 1903/ 5600 batches | train loss 0.3976547 +| epoch 2 | 1907/ 5600 batches | train loss 0.4432600 +| epoch 2 | 1911/ 5600 batches | train loss 0.5156049 +| epoch 2 | 1915/ 5600 batches | train loss 0.5164497 +| epoch 2 | 1919/ 5600 batches | train loss 0.4098588 +| epoch 2 | 1923/ 5600 batches | train loss 0.3654124 +| epoch 2 | 1927/ 5600 batches | train loss 0.3993953 +| epoch 2 | 1931/ 5600 batches | train loss 0.5874005 +| epoch 2 | 1935/ 5600 batches | train loss 0.4667777 +| epoch 2 | 1939/ 5600 batches | train loss 0.4431885 +| epoch 2 | 1943/ 5600 batches | train loss 0.5569580 +| epoch 2 | 1947/ 5600 batches | train loss 0.4701460 +| epoch 2 | 1951/ 5600 batches | train loss 0.4137835 +| epoch 2 | 1955/ 5600 batches | train loss 0.5162064 +| epoch 2 | 1959/ 5600 batches | train loss 0.4737987 +| epoch 2 | 1963/ 5600 batches | train loss 0.3755023 +| epoch 2 | 1967/ 5600 batches | train loss 0.4727711 +| epoch 2 | 1971/ 5600 batches | train loss 0.4941787 +| epoch 2 | 1975/ 5600 batches | train loss 0.4448513 +| epoch 2 | 1979/ 5600 batches | train loss 0.4405073 +| epoch 2 | 1983/ 5600 batches | train loss 0.4320168 +| epoch 2 | 1987/ 5600 batches | train loss 0.4722868 +| epoch 2 | 1991/ 5600 batches | train loss 0.4573571 +| epoch 2 | 1995/ 5600 batches | train loss 0.3936280 +| epoch 2 | 1999/ 5600 batches | train loss 0.4598811 +| epoch 2 | 2003/ 5600 batches | train loss 0.5070468 +| epoch 2 | 2007/ 5600 batches | train loss 0.3716732 +| epoch 2 | 2011/ 5600 batches | train loss 0.5151486 +| epoch 2 | 2015/ 5600 batches | train loss 0.4803654 +| epoch 2 | 2019/ 5600 batches | train loss 0.3777885 +| epoch 2 | 2023/ 5600 batches | train loss 0.4126613 +| epoch 2 | 2027/ 5600 batches | train loss 0.4087432 +| epoch 2 | 2031/ 5600 batches | train loss 0.3544958 +| epoch 2 | 2035/ 5600 batches | train loss 0.4763312 +| epoch 2 | 2039/ 5600 batches | train loss 0.4302729 +| epoch 2 | 2043/ 5600 batches | train loss 0.5124478 +| epoch 2 | 2047/ 5600 batches | train loss 0.4136851 +| epoch 2 | 2051/ 5600 batches | train loss 0.4155058 +| epoch 2 | 2055/ 5600 batches | train loss 0.4378569 +| epoch 2 | 2059/ 5600 batches | train loss 0.4133963 +| epoch 2 | 2063/ 5600 batches | train loss 0.4441358 +| epoch 2 | 2067/ 5600 batches | train loss 0.3991254 +| epoch 2 | 2071/ 5600 batches | train loss 0.4397390 +| epoch 2 | 2075/ 5600 batches | train loss 0.4716187 +| epoch 2 | 2079/ 5600 batches | train loss 0.4488094 +| epoch 2 | 2083/ 5600 batches | train loss 0.4358219 +| epoch 2 | 2087/ 5600 batches | train loss 0.4605413 +| epoch 2 | 2091/ 5600 batches | train loss 0.4055469 +| epoch 2 | 2095/ 5600 batches | train loss 0.4220627 +| epoch 2 | 2099/ 5600 batches | train loss 0.3997647 +| epoch 2 | 2103/ 5600 batches | train loss 0.3964646 +| epoch 2 | 2107/ 5600 batches | train loss 0.4463050 +| epoch 2 | 2111/ 5600 batches | train loss 0.4125048 +| epoch 2 | 2115/ 5600 batches | train loss 0.4293178 +| epoch 2 | 2119/ 5600 batches | train loss 0.4176322 +| epoch 2 | 2123/ 5600 batches | train loss 0.4690875 +| epoch 2 | 2127/ 5600 batches | train loss 0.5111638 +| epoch 2 | 2131/ 5600 batches | train loss 0.4773039 +| epoch 2 | 2135/ 5600 batches | train loss 0.4591744 +| epoch 2 | 2139/ 5600 batches | train loss 0.4103324 +| epoch 2 | 2143/ 5600 batches | train loss 0.4480172 +| epoch 2 | 2147/ 5600 batches | train loss 0.3495538 +| epoch 2 | 2151/ 5600 batches | train loss 0.4258965 +| epoch 2 | 2155/ 5600 batches | train loss 0.4034708 +| epoch 2 | 2159/ 5600 batches | train loss 0.5022625 +| epoch 2 | 2163/ 5600 batches | train loss 0.4275548 +| epoch 2 | 2167/ 5600 batches | train loss 0.3620068 +| epoch 2 | 2171/ 5600 batches | train loss 0.4154932 +| epoch 2 | 2175/ 5600 batches | train loss 0.4332776 +| epoch 2 | 2179/ 5600 batches | train loss 0.4291662 +| epoch 2 | 2183/ 5600 batches | train loss 0.4314796 +| epoch 2 | 2187/ 5600 batches | train loss 0.4594292 +| epoch 2 | 2191/ 5600 batches | train loss 0.3483693 +| epoch 2 | 2195/ 5600 batches | train loss 0.4692937 +| epoch 2 | 2199/ 5600 batches | train loss 0.4258223 +| epoch 2 | 2203/ 5600 batches | train loss 0.4498822 +| epoch 2 | 2207/ 5600 batches | train loss 0.4589058 +| epoch 2 | 2211/ 5600 batches | train loss 0.4768870 +| epoch 2 | 2215/ 5600 batches | train loss 0.3505657 +| epoch 2 | 2219/ 5600 batches | train loss 0.3669718 +| epoch 2 | 2223/ 5600 batches | train loss 0.4704757 +| epoch 2 | 2227/ 5600 batches | train loss 0.4054531 +| epoch 2 | 2231/ 5600 batches | train loss 0.4336447 +| epoch 2 | 2235/ 5600 batches | train loss 0.4162422 +| epoch 2 | 2239/ 5600 batches | train loss 0.4464211 +| epoch 2 | 2243/ 5600 batches | train loss 0.4336148 +| epoch 2 | 2247/ 5600 batches | train loss 0.4711565 +| epoch 2 | 2251/ 5600 batches | train loss 0.4138960 +| epoch 2 | 2255/ 5600 batches | train loss 0.4969432 +| epoch 2 | 2259/ 5600 batches | train loss 0.4858941 +| epoch 2 | 2263/ 5600 batches | train loss 0.3154998 +| epoch 2 | 2267/ 5600 batches | train loss 0.3395616 +| epoch 2 | 2271/ 5600 batches | train loss 0.3715605 +| epoch 2 | 2275/ 5600 batches | train loss 0.4479887 +| epoch 2 | 2279/ 5600 batches | train loss 0.4335181 +| epoch 2 | 2283/ 5600 batches | train loss 0.4588566 +| epoch 2 | 2287/ 5600 batches | train loss 0.4815493 +| epoch 2 | 2291/ 5600 batches | train loss 0.4254850 +| epoch 2 | 2295/ 5600 batches | train loss 0.3772436 +| epoch 2 | 2299/ 5600 batches | train loss 0.5627787 +| epoch 2 | 2303/ 5600 batches | train loss 0.4849100 +| epoch 2 | 2307/ 5600 batches | train loss 0.4640485 +| epoch 2 | 2311/ 5600 batches | train loss 0.4134527 +| epoch 2 | 2315/ 5600 batches | train loss 0.4846473 +| epoch 2 | 2319/ 5600 batches | train loss 0.3675583 +| epoch 2 | 2323/ 5600 batches | train loss 0.3993107 +| epoch 2 | 2327/ 5600 batches | train loss 0.4978599 +| epoch 2 | 2331/ 5600 batches | train loss 0.4629748 +| epoch 2 | 2335/ 5600 batches | train loss 0.3971645 +| epoch 2 | 2339/ 5600 batches | train loss 0.4745480 +| epoch 2 | 2343/ 5600 batches | train loss 0.3777255 +| epoch 2 | 2347/ 5600 batches | train loss 0.5171931 +| epoch 2 | 2351/ 5600 batches | train loss 0.3152268 +| epoch 2 | 2355/ 5600 batches | train loss 0.4101470 +| epoch 2 | 2359/ 5600 batches | train loss 0.4706660 +| epoch 2 | 2363/ 5600 batches | train loss 0.4849664 +| epoch 2 | 2367/ 5600 batches | train loss 0.4473095 +| epoch 2 | 2371/ 5600 batches | train loss 0.3960795 +| epoch 2 | 2375/ 5600 batches | train loss 0.4554454 +| epoch 2 | 2379/ 5600 batches | train loss 0.4817729 +| epoch 2 | 2383/ 5600 batches | train loss 0.4331189 +| epoch 2 | 2387/ 5600 batches | train loss 0.4910961 +| epoch 2 | 2391/ 5600 batches | train loss 0.4001014 +| epoch 2 | 2395/ 5600 batches | train loss 0.4217745 +| epoch 2 | 2399/ 5600 batches | train loss 0.4597690 +| epoch 2 | 2403/ 5600 batches | train loss 0.4810106 +| epoch 2 | 2407/ 5600 batches | train loss 0.4608281 +| epoch 2 | 2411/ 5600 batches | train loss 0.3595324 +| epoch 2 | 2415/ 5600 batches | train loss 0.5104452 +| epoch 2 | 2419/ 5600 batches | train loss 0.4709681 +| epoch 2 | 2423/ 5600 batches | train loss 0.4166837 +| epoch 2 | 2427/ 5600 batches | train loss 0.4974851 +| epoch 2 | 2431/ 5600 batches | train loss 0.4132214 +| epoch 2 | 2435/ 5600 batches | train loss 0.4601456 +| epoch 2 | 2439/ 5600 batches | train loss 0.3565542 +| epoch 2 | 2443/ 5600 batches | train loss 0.4575437 +| epoch 2 | 2447/ 5600 batches | train loss 0.4344181 +| epoch 2 | 2451/ 5600 batches | train loss 0.4619877 +| epoch 2 | 2455/ 5600 batches | train loss 0.5308558 +| epoch 2 | 2459/ 5600 batches | train loss 0.4700347 +| epoch 2 | 2463/ 5600 batches | train loss 0.6021177 +| epoch 2 | 2467/ 5600 batches | train loss 0.4174947 +| epoch 2 | 2471/ 5600 batches | train loss 0.4816989 +| epoch 2 | 2475/ 5600 batches | train loss 0.4685604 +| epoch 2 | 2479/ 5600 batches | train loss 0.3721579 +| epoch 2 | 2483/ 5600 batches | train loss 0.4942191 +| epoch 2 | 2487/ 5600 batches | train loss 0.4487894 +| epoch 2 | 2491/ 5600 batches | train loss 0.4449507 +| epoch 2 | 2495/ 5600 batches | train loss 0.4607757 +| epoch 2 | 2499/ 5600 batches | train loss 0.4643648 +| epoch 2 | 2503/ 5600 batches | train loss 0.5027278 +| epoch 2 | 2507/ 5600 batches | train loss 0.3886785 +| epoch 2 | 2511/ 5600 batches | train loss 0.4004147 +| epoch 2 | 2515/ 5600 batches | train loss 0.3536892 +| epoch 2 | 2519/ 5600 batches | train loss 0.3820989 +| epoch 2 | 2523/ 5600 batches | train loss 0.4122438 +| epoch 2 | 2527/ 5600 batches | train loss 0.4176911 +| epoch 2 | 2531/ 5600 batches | train loss 0.3891264 +| epoch 2 | 2535/ 5600 batches | train loss 0.4710099 +| epoch 2 | 2539/ 5600 batches | train loss 0.4330483 +| epoch 2 | 2543/ 5600 batches | train loss 0.3884266 +| epoch 2 | 2547/ 5600 batches | train loss 0.6330237 +| epoch 2 | 2551/ 5600 batches | train loss 0.3100862 +| epoch 2 | 2555/ 5600 batches | train loss 0.4458379 +| epoch 2 | 2559/ 5600 batches | train loss 0.4868694 +| epoch 2 | 2563/ 5600 batches | train loss 0.5184679 +| epoch 2 | 2567/ 5600 batches | train loss 0.6064844 +| epoch 2 | 2571/ 5600 batches | train loss 0.4885147 +| epoch 2 | 2575/ 5600 batches | train loss 0.3716696 +| epoch 2 | 2579/ 5600 batches | train loss 0.4769859 +| epoch 2 | 2583/ 5600 batches | train loss 0.5328770 +| epoch 2 | 2587/ 5600 batches | train loss 0.4514187 +| epoch 2 | 2591/ 5600 batches | train loss 0.4228287 +| epoch 2 | 2595/ 5600 batches | train loss 0.4416472 +| epoch 2 | 2599/ 5600 batches | train loss 0.4237011 +| epoch 2 | 2603/ 5600 batches | train loss 0.4670987 +| epoch 2 | 2607/ 5600 batches | train loss 0.4248513 +| epoch 2 | 2611/ 5600 batches | train loss 0.4156671 +| epoch 2 | 2615/ 5600 batches | train loss 0.4591805 +| epoch 2 | 2619/ 5600 batches | train loss 0.3686826 +| epoch 2 | 2623/ 5600 batches | train loss 0.4579608 +| epoch 2 | 2627/ 5600 batches | train loss 0.3687555 +| epoch 2 | 2631/ 5600 batches | train loss 0.4759711 +| epoch 2 | 2635/ 5600 batches | train loss 0.4756263 +| epoch 2 | 2639/ 5600 batches | train loss 0.4375302 +| epoch 2 | 2643/ 5600 batches | train loss 0.4607624 +| epoch 2 | 2647/ 5600 batches | train loss 0.4215752 +| epoch 2 | 2651/ 5600 batches | train loss 0.4351461 +| epoch 2 | 2655/ 5600 batches | train loss 0.4187046 +| epoch 2 | 2659/ 5600 batches | train loss 0.3838828 +| epoch 2 | 2663/ 5600 batches | train loss 0.4372696 +| epoch 2 | 2667/ 5600 batches | train loss 0.3510574 +| epoch 2 | 2671/ 5600 batches | train loss 0.4091307 +| epoch 2 | 2675/ 5600 batches | train loss 0.3587591 +| epoch 2 | 2679/ 5600 batches | train loss 0.4263857 +| epoch 2 | 2683/ 5600 batches | train loss 0.5178081 +| epoch 2 | 2687/ 5600 batches | train loss 0.4306461 +| epoch 2 | 2691/ 5600 batches | train loss 0.4083850 +| epoch 2 | 2695/ 5600 batches | train loss 0.4570985 +| epoch 2 | 2699/ 5600 batches | train loss 0.5014006 +| epoch 2 | 2703/ 5600 batches | train loss 0.4509267 +| epoch 2 | 2707/ 5600 batches | train loss 0.5388850 +| epoch 2 | 2711/ 5600 batches | train loss 0.3626217 +| epoch 2 | 2715/ 5600 batches | train loss 0.5195144 +| epoch 2 | 2719/ 5600 batches | train loss 0.5110604 +| epoch 2 | 2723/ 5600 batches | train loss 0.4746294 +| epoch 2 | 2727/ 5600 batches | train loss 0.4527207 +| epoch 2 | 2731/ 5600 batches | train loss 0.4551683 +| epoch 2 | 2735/ 5600 batches | train loss 0.5340832 +| epoch 2 | 2739/ 5600 batches | train loss 0.2831091 +| epoch 2 | 2743/ 5600 batches | train loss 0.5291548 +| epoch 2 | 2747/ 5600 batches | train loss 0.5283315 +| epoch 2 | 2751/ 5600 batches | train loss 0.4745742 +| epoch 2 | 2755/ 5600 batches | train loss 0.4635205 +| epoch 2 | 2759/ 5600 batches | train loss 0.4692655 +| epoch 2 | 2763/ 5600 batches | train loss 0.4388815 +| epoch 2 | 2767/ 5600 batches | train loss 0.5154797 +| epoch 2 | 2771/ 5600 batches | train loss 0.5363007 +| epoch 2 | 2775/ 5600 batches | train loss 0.4491544 +| epoch 2 | 2779/ 5600 batches | train loss 0.4126784 +| epoch 2 | 2783/ 5600 batches | train loss 0.4007597 +| epoch 2 | 2787/ 5600 batches | train loss 0.4550239 +| epoch 2 | 2791/ 5600 batches | train loss 0.4069005 +| epoch 2 | 2795/ 5600 batches | train loss 0.4503755 +| epoch 2 | 2799/ 5600 batches | train loss 0.2867558 +| epoch 2 | 2803/ 5600 batches | train loss 0.4721621 +| epoch 2 | 2807/ 5600 batches | train loss 0.3978367 +| epoch 2 | 2811/ 5600 batches | train loss 0.4615841 +| epoch 2 | 2815/ 5600 batches | train loss 0.2959251 +| epoch 2 | 2819/ 5600 batches | train loss 0.3922651 +| epoch 2 | 2823/ 5600 batches | train loss 0.4762527 +| epoch 2 | 2827/ 5600 batches | train loss 0.5239516 +| epoch 2 | 2831/ 5600 batches | train loss 0.4731838 +| epoch 2 | 2835/ 5600 batches | train loss 0.4146750 +| epoch 2 | 2839/ 5600 batches | train loss 0.4686199 +| epoch 2 | 2843/ 5600 batches | train loss 0.4461974 +| epoch 2 | 2847/ 5600 batches | train loss 0.5687598 +| epoch 2 | 2851/ 5600 batches | train loss 0.4164221 +| epoch 2 | 2855/ 5600 batches | train loss 0.3963780 +| epoch 2 | 2859/ 5600 batches | train loss 0.4772975 +| epoch 2 | 2863/ 5600 batches | train loss 0.5309905 +| epoch 2 | 2867/ 5600 batches | train loss 0.5032458 +| epoch 2 | 2871/ 5600 batches | train loss 0.3198534 +| epoch 2 | 2875/ 5600 batches | train loss 0.2843386 +| epoch 2 | 2879/ 5600 batches | train loss 0.4085808 +| epoch 2 | 2883/ 5600 batches | train loss 0.4617884 +| epoch 2 | 2887/ 5600 batches | train loss 0.4977275 +| epoch 2 | 2891/ 5600 batches | train loss 0.4251090 +| epoch 2 | 2895/ 5600 batches | train loss 0.4502965 +| epoch 2 | 2899/ 5600 batches | train loss 0.4054208 +| epoch 2 | 2903/ 5600 batches | train loss 0.5541931 +| epoch 2 | 2907/ 5600 batches | train loss 0.4615626 +| epoch 2 | 2911/ 5600 batches | train loss 0.4507143 +| epoch 2 | 2915/ 5600 batches | train loss 0.3879045 +| epoch 2 | 2919/ 5600 batches | train loss 0.4715779 +| epoch 2 | 2923/ 5600 batches | train loss 0.4132268 +| epoch 2 | 2927/ 5600 batches | train loss 0.5030133 +| epoch 2 | 2931/ 5600 batches | train loss 0.4481757 +| epoch 2 | 2935/ 5600 batches | train loss 0.4230248 +| epoch 2 | 2939/ 5600 batches | train loss 0.5458603 +| epoch 2 | 2943/ 5600 batches | train loss 0.3804038 +| epoch 2 | 2947/ 5600 batches | train loss 0.4507542 +| epoch 2 | 2951/ 5600 batches | train loss 0.4231442 +| epoch 2 | 2955/ 5600 batches | train loss 0.4384048 +| epoch 2 | 2959/ 5600 batches | train loss 0.4936926 +| epoch 2 | 2963/ 5600 batches | train loss 0.4201137 +| epoch 2 | 2967/ 5600 batches | train loss 0.5162366 +| epoch 2 | 2971/ 5600 batches | train loss 0.4107240 +| epoch 2 | 2975/ 5600 batches | train loss 0.4355863 +| epoch 2 | 2979/ 5600 batches | train loss 0.4570696 +| epoch 2 | 2983/ 5600 batches | train loss 0.4730203 +| epoch 2 | 2987/ 5600 batches | train loss 0.3299809 +| epoch 2 | 2991/ 5600 batches | train loss 0.4797997 +| epoch 2 | 2995/ 5600 batches | train loss 0.3700027 +| epoch 2 | 2999/ 5600 batches | train loss 0.4949353 +| epoch 2 | 3003/ 5600 batches | train loss 0.4553585 +| epoch 2 | 3007/ 5600 batches | train loss 0.4224002 +| epoch 2 | 3011/ 5600 batches | train loss 0.5401109 +| epoch 2 | 3015/ 5600 batches | train loss 0.4302680 +| epoch 2 | 3019/ 5600 batches | train loss 0.3767565 +| epoch 2 | 3023/ 5600 batches | train loss 0.4582587 +| epoch 2 | 3027/ 5600 batches | train loss 0.4432138 +| epoch 2 | 3031/ 5600 batches | train loss 0.3887092 +| epoch 2 | 3035/ 5600 batches | train loss 0.5460036 +| epoch 2 | 3039/ 5600 batches | train loss 0.4532143 +| epoch 2 | 3043/ 5600 batches | train loss 0.4127835 +| epoch 2 | 3047/ 5600 batches | train loss 0.4177639 +| epoch 2 | 3051/ 5600 batches | train loss 0.4354376 +| epoch 2 | 3055/ 5600 batches | train loss 0.4422860 +| epoch 2 | 3059/ 5600 batches | train loss 0.3911009 +| epoch 2 | 3063/ 5600 batches | train loss 0.4189651 +| epoch 2 | 3067/ 5600 batches | train loss 0.5326713 +| epoch 2 | 3071/ 5600 batches | train loss 0.4771625 +| epoch 2 | 3075/ 5600 batches | train loss 0.4223070 +| epoch 2 | 3079/ 5600 batches | train loss 0.4105856 +| epoch 2 | 3083/ 5600 batches | train loss 0.4317287 +| epoch 2 | 3087/ 5600 batches | train loss 0.4733944 +| epoch 2 | 3091/ 5600 batches | train loss 0.4240151 +| epoch 2 | 3095/ 5600 batches | train loss 0.4645284 +| epoch 2 | 3099/ 5600 batches | train loss 0.3880064 +| epoch 2 | 3103/ 5600 batches | train loss 0.4102652 +| epoch 2 | 3107/ 5600 batches | train loss 0.4686696 +| epoch 2 | 3111/ 5600 batches | train loss 0.4991488 +| epoch 2 | 3115/ 5600 batches | train loss 0.4684946 +| epoch 2 | 3119/ 5600 batches | train loss 0.4236297 +| epoch 2 | 3123/ 5600 batches | train loss 0.4539982 +| epoch 2 | 3127/ 5600 batches | train loss 0.4081040 +| epoch 2 | 3131/ 5600 batches | train loss 0.4826086 +| epoch 2 | 3135/ 5600 batches | train loss 0.3512870 +| epoch 2 | 3139/ 5600 batches | train loss 0.4546791 +| epoch 2 | 3143/ 5600 batches | train loss 0.5468977 +| epoch 2 | 3147/ 5600 batches | train loss 0.5662922 +| epoch 2 | 3151/ 5600 batches | train loss 0.4224679 +| epoch 2 | 3155/ 5600 batches | train loss 0.4411539 +| epoch 2 | 3159/ 5600 batches | train loss 0.4011238 +| epoch 2 | 3163/ 5600 batches | train loss 0.4895238 +| epoch 2 | 3167/ 5600 batches | train loss 0.3982527 +| epoch 2 | 3171/ 5600 batches | train loss 0.4617294 +| epoch 2 | 3175/ 5600 batches | train loss 0.3404775 +| epoch 2 | 3179/ 5600 batches | train loss 0.3753139 +| epoch 2 | 3183/ 5600 batches | train loss 0.4569921 +| epoch 2 | 3187/ 5600 batches | train loss 0.4206153 +| epoch 2 | 3191/ 5600 batches | train loss 0.4978107 +| epoch 2 | 3195/ 5600 batches | train loss 0.5449810 +| epoch 2 | 3199/ 5600 batches | train loss 0.4124415 +| epoch 2 | 3203/ 5600 batches | train loss 0.4164162 +| epoch 2 | 3207/ 5600 batches | train loss 0.4351508 +| epoch 2 | 3211/ 5600 batches | train loss 0.4254835 +| epoch 2 | 3215/ 5600 batches | train loss 0.4284465 +| epoch 2 | 3219/ 5600 batches | train loss 0.5246536 +| epoch 2 | 3223/ 5600 batches | train loss 0.4892963 +| epoch 2 | 3227/ 5600 batches | train loss 0.4715711 +| epoch 2 | 3231/ 5600 batches | train loss 0.5852801 +| epoch 2 | 3235/ 5600 batches | train loss 0.4246920 +| epoch 2 | 3239/ 5600 batches | train loss 0.4358634 +| epoch 2 | 3243/ 5600 batches | train loss 0.5202072 +| epoch 2 | 3247/ 5600 batches | train loss 0.4168837 +| epoch 2 | 3251/ 5600 batches | train loss 0.4732898 +| epoch 2 | 3255/ 5600 batches | train loss 0.4428133 +| epoch 2 | 3259/ 5600 batches | train loss 0.4736176 +| epoch 2 | 3263/ 5600 batches | train loss 0.5038531 +| epoch 2 | 3267/ 5600 batches | train loss 0.3004704 +| epoch 2 | 3271/ 5600 batches | train loss 0.4461613 +| epoch 2 | 3275/ 5600 batches | train loss 0.4386581 +| epoch 2 | 3279/ 5600 batches | train loss 0.4379454 +| epoch 2 | 3283/ 5600 batches | train loss 0.4853373 +| epoch 2 | 3287/ 5600 batches | train loss 0.3672370 +| epoch 2 | 3291/ 5600 batches | train loss 0.4671436 +| epoch 2 | 3295/ 5600 batches | train loss 0.4028924 +| epoch 2 | 3299/ 5600 batches | train loss 0.3076195 +| epoch 2 | 3303/ 5600 batches | train loss 0.4256265 +| epoch 2 | 3307/ 5600 batches | train loss 0.5301800 +| epoch 2 | 3311/ 5600 batches | train loss 0.5090387 +| epoch 2 | 3315/ 5600 batches | train loss 0.4315317 +| epoch 2 | 3319/ 5600 batches | train loss 0.4064901 +| epoch 2 | 3323/ 5600 batches | train loss 0.4518146 +| epoch 2 | 3327/ 5600 batches | train loss 0.4273396 +| epoch 2 | 3331/ 5600 batches | train loss 0.4934896 +| epoch 2 | 3335/ 5600 batches | train loss 0.4493540 +| epoch 2 | 3339/ 5600 batches | train loss 0.4838364 +| epoch 2 | 3343/ 5600 batches | train loss 0.4583791 +| epoch 2 | 3347/ 5600 batches | train loss 0.3545715 +| epoch 2 | 3351/ 5600 batches | train loss 0.4218286 +| epoch 2 | 3355/ 5600 batches | train loss 0.4655750 +| epoch 2 | 3359/ 5600 batches | train loss 0.5147858 +| epoch 2 | 3363/ 5600 batches | train loss 0.4083693 +| epoch 2 | 3367/ 5600 batches | train loss 0.4549179 +| epoch 2 | 3371/ 5600 batches | train loss 0.3524098 +| epoch 2 | 3375/ 5600 batches | train loss 0.4522269 +| epoch 2 | 3379/ 5600 batches | train loss 0.4489465 +| epoch 2 | 3383/ 5600 batches | train loss 0.4578975 +| epoch 2 | 3387/ 5600 batches | train loss 0.4473345 +| epoch 2 | 3391/ 5600 batches | train loss 0.4155479 +| epoch 2 | 3395/ 5600 batches | train loss 0.3947544 +| epoch 2 | 3399/ 5600 batches | train loss 0.4186001 +| epoch 2 | 3403/ 5600 batches | train loss 0.4490950 +| epoch 2 | 3407/ 5600 batches | train loss 0.4584203 +| epoch 2 | 3411/ 5600 batches | train loss 0.3743742 +| epoch 2 | 3415/ 5600 batches | train loss 0.4336787 +| epoch 2 | 3419/ 5600 batches | train loss 0.5086480 +| epoch 2 | 3423/ 5600 batches | train loss 0.5325880 +| epoch 2 | 3427/ 5600 batches | train loss 0.3335872 +| epoch 2 | 3431/ 5600 batches | train loss 0.4427752 +| epoch 2 | 3435/ 5600 batches | train loss 0.4126192 +| epoch 2 | 3439/ 5600 batches | train loss 0.4020243 +| epoch 2 | 3443/ 5600 batches | train loss 0.4212037 +| epoch 2 | 3447/ 5600 batches | train loss 0.4269980 +| epoch 2 | 3451/ 5600 batches | train loss 0.4998595 +| epoch 2 | 3455/ 5600 batches | train loss 0.3837009 +| epoch 2 | 3459/ 5600 batches | train loss 0.4970896 +| epoch 2 | 3463/ 5600 batches | train loss 0.4651323 +| epoch 2 | 3467/ 5600 batches | train loss 0.5922912 +| epoch 2 | 3471/ 5600 batches | train loss 0.4314894 +| epoch 2 | 3475/ 5600 batches | train loss 0.5049661 +| epoch 2 | 3479/ 5600 batches | train loss 0.4610401 +| epoch 2 | 3483/ 5600 batches | train loss 0.4277579 +| epoch 2 | 3487/ 5600 batches | train loss 0.4743099 +| epoch 2 | 3491/ 5600 batches | train loss 0.4106092 +| epoch 2 | 3495/ 5600 batches | train loss 0.4530513 +| epoch 2 | 3499/ 5600 batches | train loss 0.3924909 +| epoch 2 | 3503/ 5600 batches | train loss 0.4548354 +| epoch 2 | 3507/ 5600 batches | train loss 0.4243659 +| epoch 2 | 3511/ 5600 batches | train loss 0.4828198 +| epoch 2 | 3515/ 5600 batches | train loss 0.4101988 +| epoch 2 | 3519/ 5600 batches | train loss 0.4228880 +| epoch 2 | 3523/ 5600 batches | train loss 0.4602708 +| epoch 2 | 3527/ 5600 batches | train loss 0.4227896 +| epoch 2 | 3531/ 5600 batches | train loss 0.4712291 +| epoch 2 | 3535/ 5600 batches | train loss 0.3746750 +| epoch 2 | 3539/ 5600 batches | train loss 0.3911409 +| epoch 2 | 3543/ 5600 batches | train loss 0.4521301 +| epoch 2 | 3547/ 5600 batches | train loss 0.4234959 +| epoch 2 | 3551/ 5600 batches | train loss 0.3865995 +| epoch 2 | 3555/ 5600 batches | train loss 0.6879853 +| epoch 2 | 3559/ 5600 batches | train loss 0.5214698 +| epoch 2 | 3563/ 5600 batches | train loss 0.4014322 +| epoch 2 | 3567/ 5600 batches | train loss 0.4370340 +| epoch 2 | 3571/ 5600 batches | train loss 0.4437534 +| epoch 2 | 3575/ 5600 batches | train loss 0.3814631 +| epoch 2 | 3579/ 5600 batches | train loss 0.5618854 +| epoch 2 | 3583/ 5600 batches | train loss 0.4951243 +| epoch 2 | 3587/ 5600 batches | train loss 0.4128077 +| epoch 2 | 3591/ 5600 batches | train loss 0.3747147 +| epoch 2 | 3595/ 5600 batches | train loss 0.4871754 +| epoch 2 | 3599/ 5600 batches | train loss 0.3878328 +| epoch 2 | 3603/ 5600 batches | train loss 0.3822638 +| epoch 2 | 3607/ 5600 batches | train loss 0.3981159 +| epoch 2 | 3611/ 5600 batches | train loss 0.3562455 +| epoch 2 | 3615/ 5600 batches | train loss 0.4160352 +| epoch 2 | 3619/ 5600 batches | train loss 0.4297344 +| epoch 2 | 3623/ 5600 batches | train loss 0.4495763 +| epoch 2 | 3627/ 5600 batches | train loss 0.4679973 +| epoch 2 | 3631/ 5600 batches | train loss 0.4230078 +| epoch 2 | 3635/ 5600 batches | train loss 0.4365172 +| epoch 2 | 3639/ 5600 batches | train loss 0.4911969 +| epoch 2 | 3643/ 5600 batches | train loss 0.4584723 +| epoch 2 | 3647/ 5600 batches | train loss 0.4199055 +| epoch 2 | 3651/ 5600 batches | train loss 0.4371181 +| epoch 2 | 3655/ 5600 batches | train loss 0.5126778 +| epoch 2 | 3659/ 5600 batches | train loss 0.3867766 +| epoch 2 | 3663/ 5600 batches | train loss 0.4873124 +| epoch 2 | 3667/ 5600 batches | train loss 0.4822074 +| epoch 2 | 3671/ 5600 batches | train loss 0.4653633 +| epoch 2 | 3675/ 5600 batches | train loss 0.4541473 +| epoch 2 | 3679/ 5600 batches | train loss 0.4491008 +| epoch 2 | 3683/ 5600 batches | train loss 0.4826932 +| epoch 2 | 3687/ 5600 batches | train loss 0.5034250 +| epoch 2 | 3691/ 5600 batches | train loss 0.4089475 +| epoch 2 | 3695/ 5600 batches | train loss 0.5192621 +| epoch 2 | 3699/ 5600 batches | train loss 0.4490440 +| epoch 2 | 3703/ 5600 batches | train loss 0.4325980 +| epoch 2 | 3707/ 5600 batches | train loss 0.3761832 +| epoch 2 | 3711/ 5600 batches | train loss 0.4844289 +| epoch 2 | 3715/ 5600 batches | train loss 0.3830539 +| epoch 2 | 3719/ 5600 batches | train loss 0.4035942 +| epoch 2 | 3723/ 5600 batches | train loss 0.4451986 +| epoch 2 | 3727/ 5600 batches | train loss 0.4589018 +| epoch 2 | 3731/ 5600 batches | train loss 0.4399635 +| epoch 2 | 3735/ 5600 batches | train loss 0.3998706 +| epoch 2 | 3739/ 5600 batches | train loss 0.5362059 +| epoch 2 | 3743/ 5600 batches | train loss 0.3934767 +| epoch 2 | 3747/ 5600 batches | train loss 0.4272923 +| epoch 2 | 3751/ 5600 batches | train loss 0.4770598 +| epoch 2 | 3755/ 5600 batches | train loss 0.4927819 +| epoch 2 | 3759/ 5600 batches | train loss 0.3326682 +| epoch 2 | 3763/ 5600 batches | train loss 0.4607422 +| epoch 2 | 3767/ 5600 batches | train loss 0.4412971 +| epoch 2 | 3771/ 5600 batches | train loss 0.4586365 +| epoch 2 | 3775/ 5600 batches | train loss 0.4068527 +| epoch 2 | 3779/ 5600 batches | train loss 0.4623616 +| epoch 2 | 3783/ 5600 batches | train loss 0.4045108 +| epoch 2 | 3787/ 5600 batches | train loss 0.4882872 +| epoch 2 | 3791/ 5600 batches | train loss 0.3409624 +| epoch 2 | 3795/ 5600 batches | train loss 0.4764651 +| epoch 2 | 3799/ 5600 batches | train loss 0.5183997 +| epoch 2 | 3803/ 5600 batches | train loss 0.3865079 +| epoch 2 | 3807/ 5600 batches | train loss 0.4101505 +| epoch 2 | 3811/ 5600 batches | train loss 0.4546410 +| epoch 2 | 3815/ 5600 batches | train loss 0.4579427 +| epoch 2 | 3819/ 5600 batches | train loss 0.4063043 +| epoch 2 | 3823/ 5600 batches | train loss 0.5035492 +| epoch 2 | 3827/ 5600 batches | train loss 0.3355433 +| epoch 2 | 3831/ 5600 batches | train loss 0.4078193 +| epoch 2 | 3835/ 5600 batches | train loss 0.3682829 +| epoch 2 | 3839/ 5600 batches | train loss 0.4555463 +| epoch 2 | 3843/ 5600 batches | train loss 0.5211973 +| epoch 2 | 3847/ 5600 batches | train loss 0.5640141 +| epoch 2 | 3851/ 5600 batches | train loss 0.4271345 +| epoch 2 | 3855/ 5600 batches | train loss 0.4546927 +| epoch 2 | 3859/ 5600 batches | train loss 0.5921460 +| epoch 2 | 3863/ 5600 batches | train loss 0.3488622 +| epoch 2 | 3867/ 5600 batches | train loss 0.4000632 +| epoch 2 | 3871/ 5600 batches | train loss 0.4177081 +| epoch 2 | 3875/ 5600 batches | train loss 0.4160784 +| epoch 2 | 3879/ 5600 batches | train loss 0.3393726 +| epoch 2 | 3883/ 5600 batches | train loss 0.5390160 +| epoch 2 | 3887/ 5600 batches | train loss 0.3721434 +| epoch 2 | 3891/ 5600 batches | train loss 0.5385775 +| epoch 2 | 3895/ 5600 batches | train loss 0.4424688 +| epoch 2 | 3899/ 5600 batches | train loss 0.3749816 +| epoch 2 | 3903/ 5600 batches | train loss 0.4278985 +| epoch 2 | 3907/ 5600 batches | train loss 0.5142753 +| epoch 2 | 3911/ 5600 batches | train loss 0.3763090 +| epoch 2 | 3915/ 5600 batches | train loss 0.3935824 +| epoch 2 | 3919/ 5600 batches | train loss 0.4690101 +| epoch 2 | 3923/ 5600 batches | train loss 0.4308914 +| epoch 2 | 3927/ 5600 batches | train loss 0.4548031 +| epoch 2 | 3931/ 5600 batches | train loss 0.4180355 +| epoch 2 | 3935/ 5600 batches | train loss 0.4345170 +| epoch 2 | 3939/ 5600 batches | train loss 0.3365949 +| epoch 2 | 3943/ 5600 batches | train loss 0.3783427 +| epoch 2 | 3947/ 5600 batches | train loss 0.3825545 +| epoch 2 | 3951/ 5600 batches | train loss 0.4674487 +| epoch 2 | 3955/ 5600 batches | train loss 0.4319506 +| epoch 2 | 3959/ 5600 batches | train loss 0.4287656 +| epoch 2 | 3963/ 5600 batches | train loss 0.4612512 +| epoch 2 | 3967/ 5600 batches | train loss 0.4283958 +| epoch 2 | 3971/ 5600 batches | train loss 0.5110191 +| epoch 2 | 3975/ 5600 batches | train loss 0.4259228 +| epoch 2 | 3979/ 5600 batches | train loss 0.4748215 +| epoch 2 | 3983/ 5600 batches | train loss 0.4970393 +| epoch 2 | 3987/ 5600 batches | train loss 0.4919653 +| epoch 2 | 3991/ 5600 batches | train loss 0.3986348 +| epoch 2 | 3995/ 5600 batches | train loss 0.5795530 +| epoch 2 | 3999/ 5600 batches | train loss 0.3678969 +| epoch 2 | 4003/ 5600 batches | train loss 0.3758216 +| epoch 2 | 4007/ 5600 batches | train loss 0.4490964 +| epoch 2 | 4011/ 5600 batches | train loss 0.5346423 +| epoch 2 | 4015/ 5600 batches | train loss 0.4474991 +| epoch 2 | 4019/ 5600 batches | train loss 0.4547122 +| epoch 2 | 4023/ 5600 batches | train loss 0.4651682 +| epoch 2 | 4027/ 5600 batches | train loss 0.4355553 +| epoch 2 | 4031/ 5600 batches | train loss 0.4582453 +| epoch 2 | 4035/ 5600 batches | train loss 0.4058530 +| epoch 2 | 4039/ 5600 batches | train loss 0.4523208 +| epoch 2 | 4043/ 5600 batches | train loss 0.4370820 +| epoch 2 | 4047/ 5600 batches | train loss 0.4434477 +| epoch 2 | 4051/ 5600 batches | train loss 0.4060584 +| epoch 2 | 4055/ 5600 batches | train loss 0.3962259 +| epoch 2 | 4059/ 5600 batches | train loss 0.4518448 +| epoch 2 | 4063/ 5600 batches | train loss 0.5083176 +| epoch 2 | 4067/ 5600 batches | train loss 0.4073517 +| epoch 2 | 4071/ 5600 batches | train loss 0.5566409 +| epoch 2 | 4075/ 5600 batches | train loss 0.3600630 +| epoch 2 | 4079/ 5600 batches | train loss 0.4957379 +| epoch 2 | 4083/ 5600 batches | train loss 0.4202504 +| epoch 2 | 4087/ 5600 batches | train loss 0.5060383 +| epoch 2 | 4091/ 5600 batches | train loss 0.4233546 +| epoch 2 | 4095/ 5600 batches | train loss 0.4873443 +| epoch 2 | 4099/ 5600 batches | train loss 0.3584039 +| epoch 2 | 4103/ 5600 batches | train loss 0.4236172 +| epoch 2 | 4107/ 5600 batches | train loss 0.5024782 +| epoch 2 | 4111/ 5600 batches | train loss 0.4655725 +| epoch 2 | 4115/ 5600 batches | train loss 0.5249199 +| epoch 2 | 4119/ 5600 batches | train loss 0.4598626 +| epoch 2 | 4123/ 5600 batches | train loss 0.4387925 +| epoch 2 | 4127/ 5600 batches | train loss 0.4751156 +| epoch 2 | 4131/ 5600 batches | train loss 0.4887213 +| epoch 2 | 4135/ 5600 batches | train loss 0.4628000 +| epoch 2 | 4139/ 5600 batches | train loss 0.5430149 +| epoch 2 | 4143/ 5600 batches | train loss 0.4210371 +| epoch 2 | 4147/ 5600 batches | train loss 0.4882370 +| epoch 2 | 4151/ 5600 batches | train loss 0.4770001 +| epoch 2 | 4155/ 5600 batches | train loss 0.3744156 +| epoch 2 | 4159/ 5600 batches | train loss 0.5181062 +| epoch 2 | 4163/ 5600 batches | train loss 0.3823804 +| epoch 2 | 4167/ 5600 batches | train loss 0.4073721 +| epoch 2 | 4171/ 5600 batches | train loss 0.4977167 +| epoch 2 | 4175/ 5600 batches | train loss 0.4962063 +| epoch 2 | 4179/ 5600 batches | train loss 0.4689001 +| epoch 2 | 4183/ 5600 batches | train loss 0.5117974 +| epoch 2 | 4187/ 5600 batches | train loss 0.5002974 +| epoch 2 | 4191/ 5600 batches | train loss 0.4581299 +| epoch 2 | 4195/ 5600 batches | train loss 0.4270633 +| epoch 2 | 4199/ 5600 batches | train loss 0.4100353 +| epoch 2 | 4203/ 5600 batches | train loss 0.3684540 +| epoch 2 | 4207/ 5600 batches | train loss 0.3630099 +| epoch 2 | 4211/ 5600 batches | train loss 0.3718510 +| epoch 2 | 4215/ 5600 batches | train loss 0.4414395 +| epoch 2 | 4219/ 5600 batches | train loss 0.3772624 +| epoch 2 | 4223/ 5600 batches | train loss 0.4794021 +| epoch 2 | 4227/ 5600 batches | train loss 0.2941713 +| epoch 2 | 4231/ 5600 batches | train loss 0.4008823 +| epoch 2 | 4235/ 5600 batches | train loss 0.3955710 +| epoch 2 | 4239/ 5600 batches | train loss 0.3904094 +| epoch 2 | 4243/ 5600 batches | train loss 0.3715217 +| epoch 2 | 4247/ 5600 batches | train loss 0.5489557 +| epoch 2 | 4251/ 5600 batches | train loss 0.5030347 +| epoch 2 | 4255/ 5600 batches | train loss 0.3965001 +| epoch 2 | 4259/ 5600 batches | train loss 0.4188379 +| epoch 2 | 4263/ 5600 batches | train loss 0.5093090 +| epoch 2 | 4267/ 5600 batches | train loss 0.4613205 +| epoch 2 | 4271/ 5600 batches | train loss 0.3797289 +| epoch 2 | 4275/ 5600 batches | train loss 0.4813064 +| epoch 2 | 4279/ 5600 batches | train loss 0.3772082 +| epoch 2 | 4283/ 5600 batches | train loss 0.4397382 +| epoch 2 | 4287/ 5600 batches | train loss 0.4026945 +| epoch 2 | 4291/ 5600 batches | train loss 0.4657683 +| epoch 2 | 4295/ 5600 batches | train loss 0.4153232 +| epoch 2 | 4299/ 5600 batches | train loss 0.3916649 +| epoch 2 | 4303/ 5600 batches | train loss 0.5102600 +| epoch 2 | 4307/ 5600 batches | train loss 0.3773148 +| epoch 2 | 4311/ 5600 batches | train loss 0.5366832 +| epoch 2 | 4315/ 5600 batches | train loss 0.4430633 +| epoch 2 | 4319/ 5600 batches | train loss 0.6486310 +| epoch 2 | 4323/ 5600 batches | train loss 0.5221416 +| epoch 2 | 4327/ 5600 batches | train loss 0.4660839 +| epoch 2 | 4331/ 5600 batches | train loss 0.5076741 +| epoch 2 | 4335/ 5600 batches | train loss 0.3327085 +| epoch 2 | 4339/ 5600 batches | train loss 0.4382218 +| epoch 2 | 4343/ 5600 batches | train loss 0.5191894 +| epoch 2 | 4347/ 5600 batches | train loss 0.4067473 +| epoch 2 | 4351/ 5600 batches | train loss 0.4074319 +| epoch 2 | 4355/ 5600 batches | train loss 0.4126868 +| epoch 2 | 4359/ 5600 batches | train loss 0.4223193 +| epoch 2 | 4363/ 5600 batches | train loss 0.4078640 +| epoch 2 | 4367/ 5600 batches | train loss 0.4315726 +| epoch 2 | 4371/ 5600 batches | train loss 0.4392629 +| epoch 2 | 4375/ 5600 batches | train loss 0.3816914 +| epoch 2 | 4379/ 5600 batches | train loss 0.3593349 +| epoch 2 | 4383/ 5600 batches | train loss 0.4977371 +| epoch 2 | 4387/ 5600 batches | train loss 0.4209279 +| epoch 2 | 4391/ 5600 batches | train loss 0.4454204 +| epoch 2 | 4395/ 5600 batches | train loss 0.3395311 +| epoch 2 | 4399/ 5600 batches | train loss 0.4131939 +| epoch 2 | 4403/ 5600 batches | train loss 0.4441654 +| epoch 2 | 4407/ 5600 batches | train loss 0.4312212 +| epoch 2 | 4411/ 5600 batches | train loss 0.4061240 +| epoch 2 | 4415/ 5600 batches | train loss 0.4203827 +| epoch 2 | 4419/ 5600 batches | train loss 0.4373525 +| epoch 2 | 4423/ 5600 batches | train loss 0.3908015 +| epoch 2 | 4427/ 5600 batches | train loss 0.4143143 +| epoch 2 | 4431/ 5600 batches | train loss 0.3256076 +| epoch 2 | 4435/ 5600 batches | train loss 0.4317885 +| epoch 2 | 4439/ 5600 batches | train loss 0.4736932 +| epoch 2 | 4443/ 5600 batches | train loss 0.4422883 +| epoch 2 | 4447/ 5600 batches | train loss 0.4887973 +| epoch 2 | 4451/ 5600 batches | train loss 0.3899656 +| epoch 2 | 4455/ 5600 batches | train loss 0.4657616 +| epoch 2 | 4459/ 5600 batches | train loss 0.5172955 +| epoch 2 | 4463/ 5600 batches | train loss 0.4758436 +| epoch 2 | 4467/ 5600 batches | train loss 0.5348580 +| epoch 2 | 4471/ 5600 batches | train loss 0.4234464 +| epoch 2 | 4475/ 5600 batches | train loss 0.3674781 +| epoch 2 | 4479/ 5600 batches | train loss 0.4451483 +| epoch 2 | 4483/ 5600 batches | train loss 0.4084004 +| epoch 2 | 4487/ 5600 batches | train loss 0.4154665 +| epoch 2 | 4491/ 5600 batches | train loss 0.3875725 +| epoch 2 | 4495/ 5600 batches | train loss 0.4364819 +| epoch 2 | 4499/ 5600 batches | train loss 0.3899334 +| epoch 2 | 4503/ 5600 batches | train loss 0.5473663 +| epoch 2 | 4507/ 5600 batches | train loss 0.3647386 +| epoch 2 | 4511/ 5600 batches | train loss 0.4180239 +| epoch 2 | 4515/ 5600 batches | train loss 0.4697951 +| epoch 2 | 4519/ 5600 batches | train loss 0.4338708 +| epoch 2 | 4523/ 5600 batches | train loss 0.4208603 +| epoch 2 | 4527/ 5600 batches | train loss 0.4768353 +| epoch 2 | 4531/ 5600 batches | train loss 0.5163982 +| epoch 2 | 4535/ 5600 batches | train loss 0.3721527 +| epoch 2 | 4539/ 5600 batches | train loss 0.4814473 +| epoch 2 | 4543/ 5600 batches | train loss 0.3726901 +| epoch 2 | 4547/ 5600 batches | train loss 0.3655885 +| epoch 2 | 4551/ 5600 batches | train loss 0.4467214 +| epoch 2 | 4555/ 5600 batches | train loss 0.5345221 +| epoch 2 | 4559/ 5600 batches | train loss 0.5388165 +| epoch 2 | 4563/ 5600 batches | train loss 0.3773126 +| epoch 2 | 4567/ 5600 batches | train loss 0.3975894 +| epoch 2 | 4571/ 5600 batches | train loss 0.4767525 +| epoch 2 | 4575/ 5600 batches | train loss 0.3204498 +| epoch 2 | 4579/ 5600 batches | train loss 0.3667319 +| epoch 2 | 4583/ 5600 batches | train loss 0.4504562 +| epoch 2 | 4587/ 5600 batches | train loss 0.4012092 +| epoch 2 | 4591/ 5600 batches | train loss 0.5042007 +| epoch 2 | 4595/ 5600 batches | train loss 0.5018276 +| epoch 2 | 4599/ 5600 batches | train loss 0.3910475 +| epoch 2 | 4603/ 5600 batches | train loss 0.3970505 +| epoch 2 | 4607/ 5600 batches | train loss 0.3797924 +| epoch 2 | 4611/ 5600 batches | train loss 0.4146976 +| epoch 2 | 4615/ 5600 batches | train loss 0.3598462 +| epoch 2 | 4619/ 5600 batches | train loss 0.4579852 +| epoch 2 | 4623/ 5600 batches | train loss 0.3525761 +| epoch 2 | 4627/ 5600 batches | train loss 0.4410520 +| epoch 2 | 4631/ 5600 batches | train loss 0.4252129 +| epoch 2 | 4635/ 5600 batches | train loss 0.4593182 +| epoch 2 | 4639/ 5600 batches | train loss 0.5023967 +| epoch 2 | 4643/ 5600 batches | train loss 0.5960677 +| epoch 2 | 4647/ 5600 batches | train loss 0.4109411 +| epoch 2 | 4651/ 5600 batches | train loss 0.4913161 +| epoch 2 | 4655/ 5600 batches | train loss 0.5229824 +| epoch 2 | 4659/ 5600 batches | train loss 0.4588297 +| epoch 2 | 4663/ 5600 batches | train loss 0.4233307 +| epoch 2 | 4667/ 5600 batches | train loss 0.4113260 +| epoch 2 | 4671/ 5600 batches | train loss 0.4921188 +| epoch 2 | 4675/ 5600 batches | train loss 0.5282698 +| epoch 2 | 4679/ 5600 batches | train loss 0.4882159 +| epoch 2 | 4683/ 5600 batches | train loss 0.4366643 +| epoch 2 | 4687/ 5600 batches | train loss 0.3921379 +| epoch 2 | 4691/ 5600 batches | train loss 0.4598532 +| epoch 2 | 4695/ 5600 batches | train loss 0.5396042 +| epoch 2 | 4699/ 5600 batches | train loss 0.4111764 +| epoch 2 | 4703/ 5600 batches | train loss 0.4592351 +| epoch 2 | 4707/ 5600 batches | train loss 0.4528359 +| epoch 2 | 4711/ 5600 batches | train loss 0.3982272 +| epoch 2 | 4715/ 5600 batches | train loss 0.4471664 +| epoch 2 | 4719/ 5600 batches | train loss 0.4555741 +| epoch 2 | 4723/ 5600 batches | train loss 0.4592359 +| epoch 2 | 4727/ 5600 batches | train loss 0.4249344 +| epoch 2 | 4731/ 5600 batches | train loss 0.3081390 +| epoch 2 | 4735/ 5600 batches | train loss 0.4062714 +| epoch 2 | 4739/ 5600 batches | train loss 0.4672828 +| epoch 2 | 4743/ 5600 batches | train loss 0.3827149 +| epoch 2 | 4747/ 5600 batches | train loss 0.4682497 +| epoch 2 | 4751/ 5600 batches | train loss 0.3573256 +| epoch 2 | 4755/ 5600 batches | train loss 0.3541293 +| epoch 2 | 4759/ 5600 batches | train loss 0.4093467 +| epoch 2 | 4763/ 5600 batches | train loss 0.5754966 +| epoch 2 | 4767/ 5600 batches | train loss 0.4848173 +| epoch 2 | 4771/ 5600 batches | train loss 0.4739484 +| epoch 2 | 4775/ 5600 batches | train loss 0.4865458 +| epoch 2 | 4779/ 5600 batches | train loss 0.4671832 +| epoch 2 | 4783/ 5600 batches | train loss 0.4660223 +| epoch 2 | 4787/ 5600 batches | train loss 0.4867099 +| epoch 2 | 4791/ 5600 batches | train loss 0.4574796 +| epoch 2 | 4795/ 5600 batches | train loss 0.4770288 +| epoch 2 | 4799/ 5600 batches | train loss 0.4480227 +| epoch 2 | 4803/ 5600 batches | train loss 0.4100694 +| epoch 2 | 4807/ 5600 batches | train loss 0.4443746 +| epoch 2 | 4811/ 5600 batches | train loss 0.3518404 +| epoch 2 | 4815/ 5600 batches | train loss 0.4584441 +| epoch 2 | 4819/ 5600 batches | train loss 0.3943985 +| epoch 2 | 4823/ 5600 batches | train loss 0.4183724 +| epoch 2 | 4827/ 5600 batches | train loss 0.4510067 +| epoch 2 | 4831/ 5600 batches | train loss 0.4633560 +| epoch 2 | 4835/ 5600 batches | train loss 0.3659931 +| epoch 2 | 4839/ 5600 batches | train loss 0.4608249 +| epoch 2 | 4843/ 5600 batches | train loss 0.4367308 +| epoch 2 | 4847/ 5600 batches | train loss 0.4485772 +| epoch 2 | 4851/ 5600 batches | train loss 0.3856964 +| epoch 2 | 4855/ 5600 batches | train loss 0.4994791 +| epoch 2 | 4859/ 5600 batches | train loss 0.5433407 +| epoch 2 | 4863/ 5600 batches | train loss 0.4541186 +| epoch 2 | 4867/ 5600 batches | train loss 0.4039197 +| epoch 2 | 4871/ 5600 batches | train loss 0.4510322 +| epoch 2 | 4875/ 5600 batches | train loss 0.4295865 +| epoch 2 | 4879/ 5600 batches | train loss 0.3738361 +| epoch 2 | 4883/ 5600 batches | train loss 0.4624365 +| epoch 2 | 4887/ 5600 batches | train loss 0.4865873 +| epoch 2 | 4891/ 5600 batches | train loss 0.4491177 +| epoch 2 | 4895/ 5600 batches | train loss 0.4602076 +| epoch 2 | 4899/ 5600 batches | train loss 0.4500927 +| epoch 2 | 4903/ 5600 batches | train loss 0.4802861 +| epoch 2 | 4907/ 5600 batches | train loss 0.4723612 +| epoch 2 | 4911/ 5600 batches | train loss 0.4273061 +| epoch 2 | 4915/ 5600 batches | train loss 0.4463643 +| epoch 2 | 4919/ 5600 batches | train loss 0.4249796 +| epoch 2 | 4923/ 5600 batches | train loss 0.4606604 +| epoch 2 | 4927/ 5600 batches | train loss 0.3919528 +| epoch 2 | 4931/ 5600 batches | train loss 0.2715846 +| epoch 2 | 4935/ 5600 batches | train loss 0.4521362 +| epoch 2 | 4939/ 5600 batches | train loss 0.3496245 +| epoch 2 | 4943/ 5600 batches | train loss 0.4449361 +| epoch 2 | 4947/ 5600 batches | train loss 0.5261657 +| epoch 2 | 4951/ 5600 batches | train loss 0.4344154 +| epoch 2 | 4955/ 5600 batches | train loss 0.3791837 +| epoch 2 | 4959/ 5600 batches | train loss 0.4380879 +| epoch 2 | 4963/ 5600 batches | train loss 0.4317124 +| epoch 2 | 4967/ 5600 batches | train loss 0.3727867 +| epoch 2 | 4971/ 5600 batches | train loss 0.3479316 +| epoch 2 | 4975/ 5600 batches | train loss 0.4920605 +| epoch 2 | 4979/ 5600 batches | train loss 0.4673501 +| epoch 2 | 4983/ 5600 batches | train loss 0.4464504 +| epoch 2 | 4987/ 5600 batches | train loss 0.4948211 +| epoch 2 | 4991/ 5600 batches | train loss 0.3752613 +| epoch 2 | 4995/ 5600 batches | train loss 0.4726004 +| epoch 2 | 4999/ 5600 batches | train loss 0.4384400 +| epoch 2 | 5003/ 5600 batches | train loss 0.4990432 +| epoch 2 | 5007/ 5600 batches | train loss 0.4846060 +| epoch 2 | 5011/ 5600 batches | train loss 0.4455616 +| epoch 2 | 5015/ 5600 batches | train loss 0.5263664 +| epoch 2 | 5019/ 5600 batches | train loss 0.3919635 +| epoch 2 | 5023/ 5600 batches | train loss 0.4373266 +| epoch 2 | 5027/ 5600 batches | train loss 0.4693936 +| epoch 2 | 5031/ 5600 batches | train loss 0.5082921 +| epoch 2 | 5035/ 5600 batches | train loss 0.4765202 +| epoch 2 | 5039/ 5600 batches | train loss 0.4488368 +| epoch 2 | 5043/ 5600 batches | train loss 0.4097798 +| epoch 2 | 5047/ 5600 batches | train loss 0.5000862 +| epoch 2 | 5051/ 5600 batches | train loss 0.3858081 +| epoch 2 | 5055/ 5600 batches | train loss 0.4396004 +| epoch 2 | 5059/ 5600 batches | train loss 0.5350379 +| epoch 2 | 5063/ 5600 batches | train loss 0.4447317 +| epoch 2 | 5067/ 5600 batches | train loss 0.5243590 +| epoch 2 | 5071/ 5600 batches | train loss 0.4795291 +| epoch 2 | 5075/ 5600 batches | train loss 0.4571360 +| epoch 2 | 5079/ 5600 batches | train loss 0.4124612 +| epoch 2 | 5083/ 5600 batches | train loss 0.4343626 +| epoch 2 | 5087/ 5600 batches | train loss 0.4453903 +| epoch 2 | 5091/ 5600 batches | train loss 0.3943319 +| epoch 2 | 5095/ 5600 batches | train loss 0.3728716 +| epoch 2 | 5099/ 5600 batches | train loss 0.4281169 +| epoch 2 | 5103/ 5600 batches | train loss 0.4389821 +| epoch 2 | 5107/ 5600 batches | train loss 0.5062392 +| epoch 2 | 5111/ 5600 batches | train loss 0.4013538 +| epoch 2 | 5115/ 5600 batches | train loss 0.4479508 +| epoch 2 | 5119/ 5600 batches | train loss 0.3925293 +| epoch 2 | 5123/ 5600 batches | train loss 0.4963687 +| epoch 2 | 5127/ 5600 batches | train loss 0.4548543 +| epoch 2 | 5131/ 5600 batches | train loss 0.4964668 +| epoch 2 | 5135/ 5600 batches | train loss 0.4227513 +| epoch 2 | 5139/ 5600 batches | train loss 0.4708411 +| epoch 2 | 5143/ 5600 batches | train loss 0.5381901 +| epoch 2 | 5147/ 5600 batches | train loss 0.4483498 +| epoch 2 | 5151/ 5600 batches | train loss 0.4298190 +| epoch 2 | 5155/ 5600 batches | train loss 0.3946979 +| epoch 2 | 5159/ 5600 batches | train loss 0.3384082 +| epoch 2 | 5163/ 5600 batches | train loss 0.4769512 +| epoch 2 | 5167/ 5600 batches | train loss 0.5204139 +| epoch 2 | 5171/ 5600 batches | train loss 0.4115449 +| epoch 2 | 5175/ 5600 batches | train loss 0.4549329 +| epoch 2 | 5179/ 5600 batches | train loss 0.4527866 +| epoch 2 | 5183/ 5600 batches | train loss 0.5262887 +| epoch 2 | 5187/ 5600 batches | train loss 0.4437747 +| epoch 2 | 5191/ 5600 batches | train loss 0.5044764 +| epoch 2 | 5195/ 5600 batches | train loss 0.4313602 +| epoch 2 | 5199/ 5600 batches | train loss 0.4028626 +| epoch 2 | 5203/ 5600 batches | train loss 0.4526935 +| epoch 2 | 5207/ 5600 batches | train loss 0.4209414 +| epoch 2 | 5211/ 5600 batches | train loss 0.4482152 +| epoch 2 | 5215/ 5600 batches | train loss 0.4131697 +| epoch 2 | 5219/ 5600 batches | train loss 0.4016246 +| epoch 2 | 5223/ 5600 batches | train loss 0.3866525 +| epoch 2 | 5227/ 5600 batches | train loss 0.4056232 +| epoch 2 | 5231/ 5600 batches | train loss 0.5160246 +| epoch 2 | 5235/ 5600 batches | train loss 0.3814283 +| epoch 2 | 5239/ 5600 batches | train loss 0.4587539 +| epoch 2 | 5243/ 5600 batches | train loss 0.4666582 +| epoch 2 | 5247/ 5600 batches | train loss 0.3910675 +| epoch 2 | 5251/ 5600 batches | train loss 0.3902963 +| epoch 2 | 5255/ 5600 batches | train loss 0.3760468 +| epoch 2 | 5259/ 5600 batches | train loss 0.3476655 +| epoch 2 | 5263/ 5600 batches | train loss 0.4383090 +| epoch 2 | 5267/ 5600 batches | train loss 0.4123924 +| epoch 2 | 5271/ 5600 batches | train loss 0.6192884 +| epoch 2 | 5275/ 5600 batches | train loss 0.5191082 +| epoch 2 | 5279/ 5600 batches | train loss 0.4430309 +| epoch 2 | 5283/ 5600 batches | train loss 0.4634663 +| epoch 2 | 5287/ 5600 batches | train loss 0.3965467 +| epoch 2 | 5291/ 5600 batches | train loss 0.4344049 +| epoch 2 | 5295/ 5600 batches | train loss 0.3903260 +| epoch 2 | 5299/ 5600 batches | train loss 0.4404479 +| epoch 2 | 5303/ 5600 batches | train loss 0.4902810 +| epoch 2 | 5307/ 5600 batches | train loss 0.4017327 +| epoch 2 | 5311/ 5600 batches | train loss 0.3539245 +| epoch 2 | 5315/ 5600 batches | train loss 0.4526768 +| epoch 2 | 5319/ 5600 batches | train loss 0.4552312 +| epoch 2 | 5323/ 5600 batches | train loss 0.4236704 +| epoch 2 | 5327/ 5600 batches | train loss 0.3651127 +| epoch 2 | 5331/ 5600 batches | train loss 0.4026840 +| epoch 2 | 5335/ 5600 batches | train loss 0.4897469 +| epoch 2 | 5339/ 5600 batches | train loss 0.4082988 +| epoch 2 | 5343/ 5600 batches | train loss 0.4913019 +| epoch 2 | 5347/ 5600 batches | train loss 0.4776011 +| epoch 2 | 5351/ 5600 batches | train loss 0.5504945 +| epoch 2 | 5355/ 5600 batches | train loss 0.4103411 +| epoch 2 | 5359/ 5600 batches | train loss 0.4355100 +| epoch 2 | 5363/ 5600 batches | train loss 0.4206630 +| epoch 2 | 5367/ 5600 batches | train loss 0.4817601 +| epoch 2 | 5371/ 5600 batches | train loss 0.4773868 +| epoch 2 | 5375/ 5600 batches | train loss 0.3770548 +| epoch 2 | 5379/ 5600 batches | train loss 0.2953631 +| epoch 2 | 5383/ 5600 batches | train loss 0.4511570 +| epoch 2 | 5387/ 5600 batches | train loss 0.4692152 +| epoch 2 | 5391/ 5600 batches | train loss 0.4908349 +| epoch 2 | 5395/ 5600 batches | train loss 0.4655629 +| epoch 2 | 5399/ 5600 batches | train loss 0.4853193 +| epoch 2 | 5403/ 5600 batches | train loss 0.4800103 +| epoch 2 | 5407/ 5600 batches | train loss 0.4876809 +| epoch 2 | 5411/ 5600 batches | train loss 0.3141715 +| epoch 2 | 5415/ 5600 batches | train loss 0.4493196 +| epoch 2 | 5419/ 5600 batches | train loss 0.4431490 +| epoch 2 | 5423/ 5600 batches | train loss 0.3780558 +| epoch 2 | 5427/ 5600 batches | train loss 0.4711379 +| epoch 2 | 5431/ 5600 batches | train loss 0.4795521 +| epoch 2 | 5435/ 5600 batches | train loss 0.4579938 +| epoch 2 | 5439/ 5600 batches | train loss 0.3673310 +| epoch 2 | 5443/ 5600 batches | train loss 0.5539426 +| epoch 2 | 5447/ 5600 batches | train loss 0.4840942 +| epoch 2 | 5451/ 5600 batches | train loss 0.3549848 +| epoch 2 | 5455/ 5600 batches | train loss 0.4437675 +| epoch 2 | 5459/ 5600 batches | train loss 0.5118377 +| epoch 2 | 5463/ 5600 batches | train loss 0.4228743 +| epoch 2 | 5467/ 5600 batches | train loss 0.4327209 +| epoch 2 | 5471/ 5600 batches | train loss 0.3729967 +| epoch 2 | 5475/ 5600 batches | train loss 0.4641961 +| epoch 2 | 5479/ 5600 batches | train loss 0.4749466 +| epoch 2 | 5483/ 5600 batches | train loss 0.5179291 +| epoch 2 | 5487/ 5600 batches | train loss 0.5825300 +| epoch 2 | 5491/ 5600 batches | train loss 0.4994111 +| epoch 2 | 5495/ 5600 batches | train loss 0.4925715 +| epoch 2 | 5499/ 5600 batches | train loss 0.5252252 +| epoch 2 | 5503/ 5600 batches | train loss 0.4093102 +| epoch 2 | 5507/ 5600 batches | train loss 0.6256742 +| epoch 2 | 5511/ 5600 batches | train loss 0.4154730 +| epoch 2 | 5515/ 5600 batches | train loss 0.4314928 +| epoch 2 | 5519/ 5600 batches | train loss 0.4066962 +| epoch 2 | 5523/ 5600 batches | train loss 0.5012960 +| epoch 2 | 5527/ 5600 batches | train loss 0.5131326 +| epoch 2 | 5531/ 5600 batches | train loss 0.3904426 +| epoch 2 | 5535/ 5600 batches | train loss 0.5014663 +| epoch 2 | 5539/ 5600 batches | train loss 0.5064483 +| epoch 2 | 5543/ 5600 batches | train loss 0.4096518 +| epoch 2 | 5547/ 5600 batches | train loss 0.4125898 +| epoch 2 | 5551/ 5600 batches | train loss 0.5228854 +| epoch 2 | 5555/ 5600 batches | train loss 0.4775764 +| epoch 2 | 5559/ 5600 batches | train loss 0.4737009 +| epoch 2 | 5563/ 5600 batches | train loss 0.4184715 +| epoch 2 | 5567/ 5600 batches | train loss 0.4569168 +| epoch 2 | 5571/ 5600 batches | train loss 0.3829525 +| epoch 2 | 5575/ 5600 batches | train loss 0.4532464 +| epoch 2 | 5579/ 5600 batches | train loss 0.4842184 +| epoch 2 | 5583/ 5600 batches | train loss 0.5203774 +| epoch 2 | 5587/ 5600 batches | train loss 0.5058120 +| epoch 2 | 5591/ 5600 batches | train loss 0.3939964 +| epoch 2 | 5595/ 5600 batches | train loss 0.4985756 +| epoch 2 | 5599/ 5600 batches | train loss 0.4586041 +-------------------------------------------------------------------------------- +| epoch 2 | 3/ 5600 batches | test loss 0.5294788 +| epoch 2 | 7/ 5600 batches | test loss 0.3936746 +| epoch 2 | 11/ 5600 batches | test loss 0.4958615 +| epoch 2 | 15/ 5600 batches | test loss 0.4733728 +| epoch 2 | 19/ 5600 batches | test loss 0.4244089 +| epoch 2 | 23/ 5600 batches | test loss 0.4368821 +| epoch 2 | 27/ 5600 batches | test loss 0.3428754 +| epoch 2 | 31/ 5600 batches | test loss 0.4616683 +| epoch 2 | 35/ 5600 batches | test loss 0.4390837 +| epoch 2 | 39/ 5600 batches | test loss 0.4563276 +| epoch 2 | 43/ 5600 batches | test loss 0.3970989 +| epoch 2 | 47/ 5600 batches | test loss 0.4729606 +| epoch 2 | 51/ 5600 batches | test loss 0.3980550 +| epoch 2 | 55/ 5600 batches | test loss 0.4708646 +| epoch 2 | 59/ 5600 batches | test loss 0.3905385 +| epoch 2 | 63/ 5600 batches | test loss 0.5587212 +| epoch 2 | 67/ 5600 batches | test loss 0.4516928 +| epoch 2 | 71/ 5600 batches | test loss 0.3669098 +| epoch 2 | 75/ 5600 batches | test loss 0.4931019 +| epoch 2 | 79/ 5600 batches | test loss 0.3713190 +| epoch 2 | 83/ 5600 batches | test loss 0.4082062 +| epoch 2 | 87/ 5600 batches | test loss 0.5331184 +| epoch 2 | 91/ 5600 batches | test loss 0.4789002 +| epoch 2 | 95/ 5600 batches | test loss 0.4394425 +| epoch 2 | 99/ 5600 batches | test loss 0.4312862 +| epoch 2 | 103/ 5600 batches | test loss 0.4626613 +| epoch 2 | 107/ 5600 batches | test loss 0.4927158 +| epoch 2 | 111/ 5600 batches | test loss 0.4596294 +| epoch 2 | 115/ 5600 batches | test loss 0.5046420 +| epoch 2 | 119/ 5600 batches | test loss 0.3803273 +| epoch 2 | 123/ 5600 batches | test loss 0.4800637 +| epoch 2 | 127/ 5600 batches | test loss 0.4899608 +| epoch 2 | 131/ 5600 batches | test loss 0.5273591 +| epoch 2 | 135/ 5600 batches | test loss 0.4231659 +| epoch 2 | 139/ 5600 batches | test loss 0.3828605 +| epoch 2 | 143/ 5600 batches | test loss 0.4451078 +| epoch 2 | 147/ 5600 batches | test loss 0.4389244 +| epoch 2 | 151/ 5600 batches | test loss 0.4401908 +| epoch 2 | 155/ 5600 batches | test loss 0.4306651 +| epoch 2 | 159/ 5600 batches | test loss 0.4278610 +| epoch 2 | 163/ 5600 batches | test loss 0.4744608 +| epoch 2 | 167/ 5600 batches | test loss 0.4180903 +| epoch 2 | 171/ 5600 batches | test loss 0.3758648 +| epoch 2 | 175/ 5600 batches | test loss 0.4079736 +| epoch 2 | 179/ 5600 batches | test loss 0.3594611 +| epoch 2 | 183/ 5600 batches | test loss 0.4980869 +| epoch 2 | 187/ 5600 batches | test loss 0.4964052 +| epoch 2 | 191/ 5600 batches | test loss 0.4307451 +| epoch 2 | 195/ 5600 batches | test loss 0.4423462 +| epoch 2 | 199/ 5600 batches | test loss 0.4056869 +| epoch 2 | 203/ 5600 batches | test loss 0.4513726 +| epoch 2 | 207/ 5600 batches | test loss 0.4753048 +| epoch 2 | 211/ 5600 batches | test loss 0.3512205 +| epoch 2 | 215/ 5600 batches | test loss 0.5188749 +| epoch 2 | 219/ 5600 batches | test loss 0.3884652 +| epoch 2 | 223/ 5600 batches | test loss 0.4592071 +| epoch 2 | 227/ 5600 batches | test loss 0.4438496 +| epoch 2 | 231/ 5600 batches | test loss 0.5069222 +| epoch 2 | 235/ 5600 batches | test loss 0.4035576 +| epoch 2 | 239/ 5600 batches | test loss 0.4042291 +| epoch 2 | 243/ 5600 batches | test loss 0.3727475 +| epoch 2 | 247/ 5600 batches | test loss 0.4340523 +| epoch 2 | 251/ 5600 batches | test loss 0.4222527 +| epoch 2 | 255/ 5600 batches | test loss 0.4312535 +| epoch 2 | 259/ 5600 batches | test loss 0.4243412 +| epoch 2 | 263/ 5600 batches | test loss 0.4850333 +| epoch 2 | 267/ 5600 batches | test loss 0.4529931 +| epoch 2 | 271/ 5600 batches | test loss 0.4309501 +| epoch 2 | 275/ 5600 batches | test loss 0.4961271 +| epoch 2 | 279/ 5600 batches | test loss 0.4147743 +| epoch 2 | 283/ 5600 batches | test loss 0.3521060 +| epoch 2 | 287/ 5600 batches | test loss 0.5447993 +| epoch 2 | 291/ 5600 batches | test loss 0.3779094 +| epoch 2 | 295/ 5600 batches | test loss 0.4307473 +| epoch 2 | 299/ 5600 batches | test loss 0.4704588 +| epoch 2 | 303/ 5600 batches | test loss 0.4440035 +| epoch 2 | 307/ 5600 batches | test loss 0.3814127 +| epoch 2 | 311/ 5600 batches | test loss 0.5084088 +| epoch 2 | 315/ 5600 batches | test loss 0.4435525 +| epoch 2 | 319/ 5600 batches | test loss 0.4421767 +| epoch 2 | 323/ 5600 batches | test loss 0.4896773 +| epoch 2 | 327/ 5600 batches | test loss 0.4651540 +| epoch 2 | 331/ 5600 batches | test loss 0.4845947 +| epoch 2 | 335/ 5600 batches | test loss 0.4285416 +| epoch 2 | 339/ 5600 batches | test loss 0.5364118 +| epoch 2 | 343/ 5600 batches | test loss 0.4455816 +| epoch 2 | 347/ 5600 batches | test loss 0.3920626 +| epoch 2 | 351/ 5600 batches | test loss 0.4176329 +| epoch 2 | 355/ 5600 batches | test loss 0.4427665 +| epoch 2 | 359/ 5600 batches | test loss 0.4775425 +| epoch 2 | 363/ 5600 batches | test loss 0.3859715 +| epoch 2 | 367/ 5600 batches | test loss 0.4448166 +| epoch 2 | 371/ 5600 batches | test loss 0.3976539 +| epoch 2 | 375/ 5600 batches | test loss 0.4628180 +| epoch 2 | 379/ 5600 batches | test loss 0.5151043 +| epoch 2 | 383/ 5600 batches | test loss 0.4749645 +| epoch 2 | 387/ 5600 batches | test loss 0.4337948 +| epoch 2 | 391/ 5600 batches | test loss 0.4695969 +| epoch 2 | 395/ 5600 batches | test loss 0.4001307 +| epoch 2 | 399/ 5600 batches | test loss 0.4450701 +| epoch 2 | 403/ 5600 batches | test loss 0.4036980 +| epoch 2 | 407/ 5600 batches | test loss 0.4441279 +| epoch 2 | 411/ 5600 batches | test loss 0.4444417 +| epoch 2 | 415/ 5600 batches | test loss 0.3782003 +| epoch 2 | 419/ 5600 batches | test loss 0.4499841 +| epoch 2 | 423/ 5600 batches | test loss 0.4851033 +| epoch 2 | 427/ 5600 batches | test loss 0.4506938 +| epoch 2 | 431/ 5600 batches | test loss 0.4431538 +| epoch 2 | 435/ 5600 batches | test loss 0.1990829 +| epoch 2 | 439/ 5600 batches | test loss 0.4077557 +| epoch 2 | 443/ 5600 batches | test loss 0.4911201 +| epoch 2 | 447/ 5600 batches | test loss 0.4352635 +| epoch 2 | 451/ 5600 batches | test loss 0.4613602 +| epoch 2 | 455/ 5600 batches | test loss 0.4117456 +| epoch 2 | 459/ 5600 batches | test loss 0.4225878 +| epoch 2 | 463/ 5600 batches | test loss 0.3983732 +| epoch 2 | 467/ 5600 batches | test loss 0.5533243 +| epoch 2 | 471/ 5600 batches | test loss 0.4348628 +| epoch 2 | 475/ 5600 batches | test loss 0.4683051 +| epoch 2 | 479/ 5600 batches | test loss 0.4872023 +| epoch 2 | 483/ 5600 batches | test loss 0.2462153 +| epoch 2 | 487/ 5600 batches | test loss 0.4977279 +| epoch 2 | 491/ 5600 batches | test loss 0.4575611 +| epoch 2 | 495/ 5600 batches | test loss 0.4580836 +| epoch 2 | 499/ 5600 batches | test loss 0.3513390 +| epoch 2 | 503/ 5600 batches | test loss 0.4363964 +| epoch 2 | 507/ 5600 batches | test loss 0.4680519 +| epoch 2 | 511/ 5600 batches | test loss 0.4034163 +| epoch 2 | 515/ 5600 batches | test loss 0.4810053 +| epoch 2 | 519/ 5600 batches | test loss 0.4190316 +| epoch 2 | 523/ 5600 batches | test loss 0.3464521 +| epoch 2 | 527/ 5600 batches | test loss 0.4113372 +| epoch 2 | 531/ 5600 batches | test loss 0.3759799 +| epoch 2 | 535/ 5600 batches | test loss 0.4446039 +| epoch 2 | 539/ 5600 batches | test loss 0.4610003 +| epoch 2 | 543/ 5600 batches | test loss 0.5139380 +| epoch 2 | 547/ 5600 batches | test loss 0.3455619 +| epoch 2 | 551/ 5600 batches | test loss 0.5186770 +| epoch 2 | 555/ 5600 batches | test loss 0.3681214 +| epoch 2 | 559/ 5600 batches | test loss 0.5282482 +| epoch 2 | 563/ 5600 batches | test loss 0.4078212 +| epoch 2 | 567/ 5600 batches | test loss 0.3627600 +| epoch 2 | 571/ 5600 batches | test loss 0.4387828 +| epoch 2 | 575/ 5600 batches | test loss 0.4460468 +| epoch 2 | 579/ 5600 batches | test loss 0.4405383 +| epoch 2 | 583/ 5600 batches | test loss 0.4038219 +| epoch 2 | 587/ 5600 batches | test loss 0.4957465 +| epoch 2 | 591/ 5600 batches | test loss 0.4784868 +| epoch 2 | 595/ 5600 batches | test loss 0.4131797 +| epoch 2 | 599/ 5600 batches | test loss 0.4130718 +| epoch 2 | 603/ 5600 batches | test loss 0.5070766 +| epoch 2 | 607/ 5600 batches | test loss 0.4301375 +| epoch 2 | 611/ 5600 batches | test loss 0.4236834 +| epoch 2 | 615/ 5600 batches | test loss 0.4953714 +| epoch 2 | 619/ 5600 batches | test loss 0.4825589 +| epoch 2 | 623/ 5600 batches | test loss 0.5116807 +| epoch 2 | 627/ 5600 batches | test loss 0.4579464 +| epoch 2 | 631/ 5600 batches | test loss 0.3996577 +| epoch 2 | 635/ 5600 batches | test loss 0.4374783 +| epoch 2 | 639/ 5600 batches | test loss 0.4093262 +| epoch 2 | 643/ 5600 batches | test loss 0.4014531 +| epoch 2 | 647/ 5600 batches | test loss 0.4909166 +| epoch 2 | 651/ 5600 batches | test loss 0.4191688 +| epoch 2 | 655/ 5600 batches | test loss 0.5177490 +| epoch 2 | 659/ 5600 batches | test loss 0.4173431 +| epoch 2 | 663/ 5600 batches | test loss 0.4501320 +| epoch 2 | 667/ 5600 batches | test loss 0.4702923 +| epoch 2 | 671/ 5600 batches | test loss 0.4219290 +| epoch 2 | 675/ 5600 batches | test loss 0.4198972 +| epoch 2 | 679/ 5600 batches | test loss 0.4358284 +| epoch 2 | 683/ 5600 batches | test loss 0.3998059 +| epoch 2 | 687/ 5600 batches | test loss 0.3618367 +| epoch 2 | 691/ 5600 batches | test loss 0.4739236 +| epoch 2 | 695/ 5600 batches | test loss 0.4804242 +| epoch 2 | 699/ 5600 batches | test loss 0.4883922 +| epoch 2 | 703/ 5600 batches | test loss 0.3108804 +| epoch 2 | 707/ 5600 batches | test loss 0.4136433 +| epoch 2 | 711/ 5600 batches | test loss 0.4916683 +| epoch 2 | 715/ 5600 batches | test loss 0.4309772 +| epoch 2 | 719/ 5600 batches | test loss 0.4610162 +| epoch 2 | 723/ 5600 batches | test loss 0.4203452 +| epoch 2 | 727/ 5600 batches | test loss 0.4867311 +| epoch 2 | 731/ 5600 batches | test loss 0.4184257 +| epoch 2 | 735/ 5600 batches | test loss 0.4571446 +| epoch 2 | 739/ 5600 batches | test loss 0.4543806 +| epoch 2 | 743/ 5600 batches | test loss 0.4858193 +| epoch 2 | 747/ 5600 batches | test loss 0.4525787 +| epoch 2 | 751/ 5600 batches | test loss 0.3799071 +| epoch 2 | 755/ 5600 batches | test loss 0.4486926 +| epoch 2 | 759/ 5600 batches | test loss 0.3828263 +| epoch 2 | 763/ 5600 batches | test loss 0.4201737 +| epoch 2 | 767/ 5600 batches | test loss 0.5233033 +| epoch 2 | 771/ 5600 batches | test loss 0.4031562 +| epoch 2 | 775/ 5600 batches | test loss 0.4767799 +| epoch 2 | 779/ 5600 batches | test loss 0.3943267 +| epoch 2 | 783/ 5600 batches | test loss 0.3575893 +| epoch 2 | 787/ 5600 batches | test loss 0.4500430 +| epoch 2 | 791/ 5600 batches | test loss 0.4218178 +| epoch 2 | 795/ 5600 batches | test loss 0.4607686 +| epoch 2 | 799/ 5600 batches | test loss 0.5145268 +| epoch 2 | 803/ 5600 batches | test loss 0.5008334 +| epoch 2 | 807/ 5600 batches | test loss 0.4599965 +| epoch 2 | 811/ 5600 batches | test loss 0.4896994 +| epoch 2 | 815/ 5600 batches | test loss 0.4889708 +| epoch 2 | 819/ 5600 batches | test loss 0.4178745 +| epoch 2 | 823/ 5600 batches | test loss 0.4902495 +| epoch 2 | 827/ 5600 batches | test loss 0.4734206 +| epoch 2 | 831/ 5600 batches | test loss 0.4792064 +| epoch 2 | 835/ 5600 batches | test loss 0.4629156 +| epoch 2 | 839/ 5600 batches | test loss 0.4894810 +| epoch 2 | 843/ 5600 batches | test loss 0.4400185 +| epoch 2 | 847/ 5600 batches | test loss 0.4966869 +| epoch 2 | 851/ 5600 batches | test loss 0.4509358 +| epoch 2 | 855/ 5600 batches | test loss 0.4569044 +| epoch 2 | 859/ 5600 batches | test loss 0.4919817 +| epoch 2 | 863/ 5600 batches | test loss 0.4314785 +| epoch 2 | 867/ 5600 batches | test loss 0.5882734 +| epoch 2 | 871/ 5600 batches | test loss 0.3935978 +| epoch 2 | 875/ 5600 batches | test loss 0.4537317 +| epoch 2 | 879/ 5600 batches | test loss 0.4565849 +| epoch 2 | 883/ 5600 batches | test loss 0.3970664 +| epoch 2 | 887/ 5600 batches | test loss 0.4666058 +| epoch 2 | 891/ 5600 batches | test loss 0.4558342 +| epoch 2 | 895/ 5600 batches | test loss 0.4672521 +| epoch 2 | 899/ 5600 batches | test loss 0.4287301 +| epoch 2 | 903/ 5600 batches | test loss 0.4035202 +| epoch 2 | 907/ 5600 batches | test loss 0.5003750 +| epoch 2 | 911/ 5600 batches | test loss 0.4534751 +| epoch 2 | 915/ 5600 batches | test loss 0.3748460 +| epoch 2 | 919/ 5600 batches | test loss 0.4299838 +| epoch 2 | 923/ 5600 batches | test loss 0.4437186 +| epoch 2 | 927/ 5600 batches | test loss 0.3855631 +| epoch 2 | 931/ 5600 batches | test loss 0.5442506 +| epoch 2 | 935/ 5600 batches | test loss 0.4819956 +| epoch 2 | 939/ 5600 batches | test loss 0.5679631 +| epoch 2 | 943/ 5600 batches | test loss 0.4171265 +| epoch 2 | 947/ 5600 batches | test loss 0.4496714 +| epoch 2 | 951/ 5600 batches | test loss 0.4766288 +| epoch 2 | 955/ 5600 batches | test loss 0.3645504 +| epoch 2 | 959/ 5600 batches | test loss 0.5046278 +| epoch 2 | 963/ 5600 batches | test loss 0.5572635 +| epoch 2 | 967/ 5600 batches | test loss 0.5294274 +| epoch 2 | 971/ 5600 batches | test loss 0.4743931 +| epoch 2 | 975/ 5600 batches | test loss 0.4841201 +| epoch 2 | 979/ 5600 batches | test loss 0.5102869 +| epoch 2 | 983/ 5600 batches | test loss 0.4269381 +| epoch 2 | 987/ 5600 batches | test loss 0.4747117 +| epoch 2 | 991/ 5600 batches | test loss 0.4493925 +| epoch 2 | 995/ 5600 batches | test loss 0.4098035 +| epoch 2 | 999/ 5600 batches | test loss 0.4582987 +| epoch 2 | 1003/ 5600 batches | test loss 0.4372271 +| epoch 2 | 1007/ 5600 batches | test loss 0.3827928 +| epoch 2 | 1011/ 5600 batches | test loss 0.4563020 +| epoch 2 | 1015/ 5600 batches | test loss 0.4507899 +| epoch 2 | 1019/ 5600 batches | test loss 0.3896596 +| epoch 2 | 1023/ 5600 batches | test loss 0.5142801 +| epoch 2 | 1027/ 5600 batches | test loss 0.3899459 +| epoch 2 | 1031/ 5600 batches | test loss 0.4336065 +| epoch 2 | 1035/ 5600 batches | test loss 0.4869898 +| epoch 2 | 1039/ 5600 batches | test loss 0.3945950 +| epoch 2 | 1043/ 5600 batches | test loss 0.4260129 +| epoch 2 | 1047/ 5600 batches | test loss 0.3707640 +| epoch 2 | 1051/ 5600 batches | test loss 0.4744250 +| epoch 2 | 1055/ 5600 batches | test loss 0.4205432 +| epoch 2 | 1059/ 5600 batches | test loss 0.5130545 +| epoch 2 | 1063/ 5600 batches | test loss 0.3744100 +| epoch 2 | 1067/ 5600 batches | test loss 0.4619585 +| epoch 2 | 1071/ 5600 batches | test loss 0.5029095 +| epoch 2 | 1075/ 5600 batches | test loss 0.4318440 +| epoch 2 | 1079/ 5600 batches | test loss 0.4481920 +| epoch 2 | 1083/ 5600 batches | test loss 0.4904291 +| epoch 2 | 1087/ 5600 batches | test loss 0.5614344 +| epoch 2 | 1091/ 5600 batches | test loss 0.4769459 +| epoch 2 | 1095/ 5600 batches | test loss 0.4413298 +| epoch 2 | 1099/ 5600 batches | test loss 0.4458325 +| epoch 2 | 1103/ 5600 batches | test loss 0.4212734 +| epoch 2 | 1107/ 5600 batches | test loss 0.4176937 +| epoch 2 | 1111/ 5600 batches | test loss 0.4113751 +| epoch 2 | 1115/ 5600 batches | test loss 0.4214145 +| epoch 2 | 1119/ 5600 batches | test loss 0.4775306 +| epoch 2 | 1123/ 5600 batches | test loss 0.4917523 +| epoch 2 | 1127/ 5600 batches | test loss 0.4017801 +| epoch 2 | 1131/ 5600 batches | test loss 0.3662769 +| epoch 2 | 1135/ 5600 batches | test loss 0.4426545 +| epoch 2 | 1139/ 5600 batches | test loss 0.5306097 +| epoch 2 | 1143/ 5600 batches | test loss 0.4082726 +| epoch 2 | 1147/ 5600 batches | test loss 0.5233223 +| epoch 2 | 1151/ 5600 batches | test loss 0.4440351 +| epoch 2 | 1155/ 5600 batches | test loss 0.4935429 +| epoch 2 | 1159/ 5600 batches | test loss 0.4344443 +| epoch 2 | 1163/ 5600 batches | test loss 0.4286885 +| epoch 2 | 1167/ 5600 batches | test loss 0.4706927 +| epoch 2 | 1171/ 5600 batches | test loss 0.4533408 +| epoch 2 | 1175/ 5600 batches | test loss 0.4595308 +| epoch 2 | 1179/ 5600 batches | test loss 0.4645579 +| epoch 2 | 1183/ 5600 batches | test loss 0.4549286 +| epoch 2 | 1187/ 5600 batches | test loss 0.3881274 +| epoch 2 | 1191/ 5600 batches | test loss 0.4433476 +| epoch 2 | 1195/ 5600 batches | test loss 0.4891167 +| epoch 2 | 1199/ 5600 batches | test loss 0.4163049 +| epoch 2 | 1203/ 5600 batches | test loss 0.4128153 +| epoch 2 | 1207/ 5600 batches | test loss 0.4572557 +| epoch 2 | 1211/ 5600 batches | test loss 0.4817984 +| epoch 2 | 1215/ 5600 batches | test loss 0.4804041 +| epoch 2 | 1219/ 5600 batches | test loss 0.4672561 +| epoch 2 | 1223/ 5600 batches | test loss 0.4274999 +| epoch 2 | 1227/ 5600 batches | test loss 0.4533115 +| epoch 2 | 1231/ 5600 batches | test loss 0.4558923 +| epoch 2 | 1235/ 5600 batches | test loss 0.3754147 +| epoch 2 | 1239/ 5600 batches | test loss 0.4694264 +| epoch 2 | 1243/ 5600 batches | test loss 0.4744396 +| epoch 2 | 1247/ 5600 batches | test loss 0.4425966 +| epoch 2 | 1251/ 5600 batches | test loss 0.4661197 +| epoch 2 | 1255/ 5600 batches | test loss 0.3506842 +| epoch 2 | 1259/ 5600 batches | test loss 0.5520291 +| epoch 2 | 1263/ 5600 batches | test loss 0.4325025 +| epoch 2 | 1267/ 5600 batches | test loss 0.4370350 +| epoch 2 | 1271/ 5600 batches | test loss 0.4380631 +| epoch 2 | 1275/ 5600 batches | test loss 0.4710184 +| epoch 2 | 1279/ 5600 batches | test loss 0.5085086 +| epoch 2 | 1283/ 5600 batches | test loss 0.4055558 +| epoch 2 | 1287/ 5600 batches | test loss 0.4522350 +| epoch 2 | 1291/ 5600 batches | test loss 0.4412490 +| epoch 2 | 1295/ 5600 batches | test loss 0.4099319 +| epoch 2 | 1299/ 5600 batches | test loss 0.4696454 +| epoch 2 | 1303/ 5600 batches | test loss 0.4331482 +| epoch 2 | 1307/ 5600 batches | test loss 0.4256797 +| epoch 2 | 1311/ 5600 batches | test loss 0.5091025 +| epoch 2 | 1315/ 5600 batches | test loss 0.3764947 +| epoch 2 | 1319/ 5600 batches | test loss 0.4018371 +| epoch 2 | 1323/ 5600 batches | test loss 0.3373111 +| epoch 2 | 1327/ 5600 batches | test loss 0.4360026 +| epoch 2 | 1331/ 5600 batches | test loss 0.3459902 +| epoch 2 | 1335/ 5600 batches | test loss 0.4552622 +| epoch 2 | 1339/ 5600 batches | test loss 0.4246556 +| epoch 2 | 1343/ 5600 batches | test loss 0.3786367 +| epoch 2 | 1347/ 5600 batches | test loss 0.4361471 +| epoch 2 | 1351/ 5600 batches | test loss 0.3700475 +| epoch 2 | 1355/ 5600 batches | test loss 0.4841396 +| epoch 2 | 1359/ 5600 batches | test loss 0.4699708 +| epoch 2 | 1363/ 5600 batches | test loss 0.3740468 +| epoch 2 | 1367/ 5600 batches | test loss 0.5145916 +| epoch 2 | 1371/ 5600 batches | test loss 0.4985439 +| epoch 2 | 1375/ 5600 batches | test loss 0.4516796 +| epoch 2 | 1379/ 5600 batches | test loss 0.5001269 +| epoch 2 | 1383/ 5600 batches | test loss 0.4304266 +| epoch 2 | 1387/ 5600 batches | test loss 0.4928216 +| epoch 2 | 1391/ 5600 batches | test loss 0.4527657 +| epoch 2 | 1395/ 5600 batches | test loss 0.5149847 +| epoch 2 | 1399/ 5600 batches | test loss 0.4490194 +| epoch 2 | final test loss 0.4441, save model! +-------------------------------------------------------------------------------- +| epoch 3 | 3/ 5600 batches | train loss 0.4271923 +| epoch 3 | 7/ 5600 batches | train loss 0.4390239 +| epoch 3 | 11/ 5600 batches | train loss 0.4877919 +| epoch 3 | 15/ 5600 batches | train loss 0.4910704 +| epoch 3 | 19/ 5600 batches | train loss 0.4363193 +| epoch 3 | 23/ 5600 batches | train loss 0.3567539 +| epoch 3 | 27/ 5600 batches | train loss 0.3862947 +| epoch 3 | 31/ 5600 batches | train loss 0.4144392 +| epoch 3 | 35/ 5600 batches | train loss 0.5418842 +| epoch 3 | 39/ 5600 batches | train loss 0.4747173 +| epoch 3 | 43/ 5600 batches | train loss 0.3947745 +| epoch 3 | 47/ 5600 batches | train loss 0.4298339 +| epoch 3 | 51/ 5600 batches | train loss 0.4094233 +| epoch 3 | 55/ 5600 batches | train loss 0.4328707 +| epoch 3 | 59/ 5600 batches | train loss 0.4728249 +| epoch 3 | 63/ 5600 batches | train loss 0.4900059 +| epoch 3 | 67/ 5600 batches | train loss 0.3756631 +| epoch 3 | 71/ 5600 batches | train loss 0.3725985 +| epoch 3 | 75/ 5600 batches | train loss 0.4260246 +| epoch 3 | 79/ 5600 batches | train loss 0.4632054 +| epoch 3 | 83/ 5600 batches | train loss 0.4112996 +| epoch 3 | 87/ 5600 batches | train loss 0.5124202 +| epoch 3 | 91/ 5600 batches | train loss 0.3881525 +| epoch 3 | 95/ 5600 batches | train loss 0.3839028 +| epoch 3 | 99/ 5600 batches | train loss 0.5426949 +| epoch 3 | 103/ 5600 batches | train loss 0.4216326 +| epoch 3 | 107/ 5600 batches | train loss 0.4372342 +| epoch 3 | 111/ 5600 batches | train loss 0.4625973 +| epoch 3 | 115/ 5600 batches | train loss 0.5189461 +| epoch 3 | 119/ 5600 batches | train loss 0.4113768 +| epoch 3 | 123/ 5600 batches | train loss 0.4306938 +| epoch 3 | 127/ 5600 batches | train loss 0.4072965 +| epoch 3 | 131/ 5600 batches | train loss 0.3971349 +| epoch 3 | 135/ 5600 batches | train loss 0.4823847 +| epoch 3 | 139/ 5600 batches | train loss 0.4768159 +| epoch 3 | 143/ 5600 batches | train loss 0.4441011 +| epoch 3 | 147/ 5600 batches | train loss 0.5049501 +| epoch 3 | 151/ 5600 batches | train loss 0.4349777 +| epoch 3 | 155/ 5600 batches | train loss 0.5249531 +| epoch 3 | 159/ 5600 batches | train loss 0.4436717 +| epoch 3 | 163/ 5600 batches | train loss 0.4250141 +| epoch 3 | 167/ 5600 batches | train loss 0.4144585 +| epoch 3 | 171/ 5600 batches | train loss 0.3345834 +| epoch 3 | 175/ 5600 batches | train loss 0.3913626 +| epoch 3 | 179/ 5600 batches | train loss 0.4502129 +| epoch 3 | 183/ 5600 batches | train loss 0.4815257 +| epoch 3 | 187/ 5600 batches | train loss 0.4408067 +| epoch 3 | 191/ 5600 batches | train loss 0.4595851 +| epoch 3 | 195/ 5600 batches | train loss 0.3409231 +| epoch 3 | 199/ 5600 batches | train loss 0.4694963 +| epoch 3 | 203/ 5600 batches | train loss 0.4670717 +| epoch 3 | 207/ 5600 batches | train loss 0.4115562 +| epoch 3 | 211/ 5600 batches | train loss 0.3637217 +| epoch 3 | 215/ 5600 batches | train loss 0.3147471 +| epoch 3 | 219/ 5600 batches | train loss 0.5308429 +| epoch 3 | 223/ 5600 batches | train loss 0.3750511 +| epoch 3 | 227/ 5600 batches | train loss 0.4915626 +| epoch 3 | 231/ 5600 batches | train loss 0.4554880 +| epoch 3 | 235/ 5600 batches | train loss 0.4472014 +| epoch 3 | 239/ 5600 batches | train loss 0.4335824 +| epoch 3 | 243/ 5600 batches | train loss 0.4566395 +| epoch 3 | 247/ 5600 batches | train loss 0.3777994 +| epoch 3 | 251/ 5600 batches | train loss 0.3997136 +| epoch 3 | 255/ 5600 batches | train loss 0.3940528 +| epoch 3 | 259/ 5600 batches | train loss 0.4941971 +| epoch 3 | 263/ 5600 batches | train loss 0.4950351 +| epoch 3 | 267/ 5600 batches | train loss 0.4896756 +| epoch 3 | 271/ 5600 batches | train loss 0.4790010 +| epoch 3 | 275/ 5600 batches | train loss 0.5297507 +| epoch 3 | 279/ 5600 batches | train loss 0.4477301 +| epoch 3 | 283/ 5600 batches | train loss 0.4062842 +| epoch 3 | 287/ 5600 batches | train loss 0.4227501 +| epoch 3 | 291/ 5600 batches | train loss 0.4616978 +| epoch 3 | 295/ 5600 batches | train loss 0.4670438 +| epoch 3 | 299/ 5600 batches | train loss 0.3784675 +| epoch 3 | 303/ 5600 batches | train loss 0.4664268 +| epoch 3 | 307/ 5600 batches | train loss 0.4056404 +| epoch 3 | 311/ 5600 batches | train loss 0.4417580 +| epoch 3 | 315/ 5600 batches | train loss 0.4327900 +| epoch 3 | 319/ 5600 batches | train loss 0.4904797 +| epoch 3 | 323/ 5600 batches | train loss 0.4954429 +| epoch 3 | 327/ 5600 batches | train loss 0.4098440 +| epoch 3 | 331/ 5600 batches | train loss 0.4272933 +| epoch 3 | 335/ 5600 batches | train loss 0.4974366 +| epoch 3 | 339/ 5600 batches | train loss 0.4756115 +| epoch 3 | 343/ 5600 batches | train loss 0.3923286 +| epoch 3 | 347/ 5600 batches | train loss 0.3712208 +| epoch 3 | 351/ 5600 batches | train loss 0.4571657 +| epoch 3 | 355/ 5600 batches | train loss 0.4192032 +| epoch 3 | 359/ 5600 batches | train loss 0.4383708 +| epoch 3 | 363/ 5600 batches | train loss 0.4395967 +| epoch 3 | 367/ 5600 batches | train loss 0.3895456 +| epoch 3 | 371/ 5600 batches | train loss 0.4308503 +| epoch 3 | 375/ 5600 batches | train loss 0.4022632 +| epoch 3 | 379/ 5600 batches | train loss 0.4295225 +| epoch 3 | 383/ 5600 batches | train loss 0.3942703 +| epoch 3 | 387/ 5600 batches | train loss 0.4704461 +| epoch 3 | 391/ 5600 batches | train loss 0.5511678 +| epoch 3 | 395/ 5600 batches | train loss 0.4761330 +| epoch 3 | 399/ 5600 batches | train loss 0.4652605 +| epoch 3 | 403/ 5600 batches | train loss 0.4354391 +| epoch 3 | 407/ 5600 batches | train loss 0.4627499 +| epoch 3 | 411/ 5600 batches | train loss 0.4326349 +| epoch 3 | 415/ 5600 batches | train loss 0.4172734 +| epoch 3 | 419/ 5600 batches | train loss 0.4523243 +| epoch 3 | 423/ 5600 batches | train loss 0.4148727 +| epoch 3 | 427/ 5600 batches | train loss 0.3669046 +| epoch 3 | 431/ 5600 batches | train loss 0.4476857 +| epoch 3 | 435/ 5600 batches | train loss 0.4901201 +| epoch 3 | 439/ 5600 batches | train loss 0.3096261 +| epoch 3 | 443/ 5600 batches | train loss 0.4682921 +| epoch 3 | 447/ 5600 batches | train loss 0.4250532 +| epoch 3 | 451/ 5600 batches | train loss 0.4832255 +| epoch 3 | 455/ 5600 batches | train loss 0.4897599 +| epoch 3 | 459/ 5600 batches | train loss 0.3275934 +| epoch 3 | 463/ 5600 batches | train loss 0.4555253 +| epoch 3 | 467/ 5600 batches | train loss 0.5365545 +| epoch 3 | 471/ 5600 batches | train loss 0.4444909 +| epoch 3 | 475/ 5600 batches | train loss 0.4407139 +| epoch 3 | 479/ 5600 batches | train loss 0.4437700 +| epoch 3 | 483/ 5600 batches | train loss 0.4093034 +| epoch 3 | 487/ 5600 batches | train loss 0.3935206 +| epoch 3 | 491/ 5600 batches | train loss 0.4921893 +| epoch 3 | 495/ 5600 batches | train loss 0.4891613 +| epoch 3 | 499/ 5600 batches | train loss 0.4952833 +| epoch 3 | 503/ 5600 batches | train loss 0.4902258 +| epoch 3 | 507/ 5600 batches | train loss 0.4423666 +| epoch 3 | 511/ 5600 batches | train loss 0.3547415 +| epoch 3 | 515/ 5600 batches | train loss 0.3899372 +| epoch 3 | 519/ 5600 batches | train loss 0.4137575 +| epoch 3 | 523/ 5600 batches | train loss 0.4788973 +| epoch 3 | 527/ 5600 batches | train loss 0.3982667 +| epoch 3 | 531/ 5600 batches | train loss 0.3645518 +| epoch 3 | 535/ 5600 batches | train loss 0.4835024 +| epoch 3 | 539/ 5600 batches | train loss 0.3978182 +| epoch 3 | 543/ 5600 batches | train loss 0.4399501 +| epoch 3 | 547/ 5600 batches | train loss 0.4955948 +| epoch 3 | 551/ 5600 batches | train loss 0.3812752 +| epoch 3 | 555/ 5600 batches | train loss 0.4037354 +| epoch 3 | 559/ 5600 batches | train loss 0.3790075 +| epoch 3 | 563/ 5600 batches | train loss 0.5132306 +| epoch 3 | 567/ 5600 batches | train loss 0.4376362 +| epoch 3 | 571/ 5600 batches | train loss 0.4799456 +| epoch 3 | 575/ 5600 batches | train loss 0.4570894 +| epoch 3 | 579/ 5600 batches | train loss 0.3788078 +| epoch 3 | 583/ 5600 batches | train loss 0.4121761 +| epoch 3 | 587/ 5600 batches | train loss 0.4411914 +| epoch 3 | 591/ 5600 batches | train loss 0.4278868 +| epoch 3 | 595/ 5600 batches | train loss 0.4347519 +| epoch 3 | 599/ 5600 batches | train loss 0.2994060 +| epoch 3 | 603/ 5600 batches | train loss 0.3918295 +| epoch 3 | 607/ 5600 batches | train loss 0.4179766 +| epoch 3 | 611/ 5600 batches | train loss 0.4258592 +| epoch 3 | 615/ 5600 batches | train loss 0.4459682 +| epoch 3 | 619/ 5600 batches | train loss 0.4148130 +| epoch 3 | 623/ 5600 batches | train loss 0.4625228 +| epoch 3 | 627/ 5600 batches | train loss 0.3892313 +| epoch 3 | 631/ 5600 batches | train loss 0.3839891 +| epoch 3 | 635/ 5600 batches | train loss 0.4999600 +| epoch 3 | 639/ 5600 batches | train loss 0.4903290 +| epoch 3 | 643/ 5600 batches | train loss 0.4292960 +| epoch 3 | 647/ 5600 batches | train loss 0.4473794 +| epoch 3 | 651/ 5600 batches | train loss 0.3653863 +| epoch 3 | 655/ 5600 batches | train loss 0.4572130 +| epoch 3 | 659/ 5600 batches | train loss 0.4551918 +| epoch 3 | 663/ 5600 batches | train loss 0.3933414 +| epoch 3 | 667/ 5600 batches | train loss 0.4456485 +| epoch 3 | 671/ 5600 batches | train loss 0.3690495 +| epoch 3 | 675/ 5600 batches | train loss 0.4305170 +| epoch 3 | 679/ 5600 batches | train loss 0.5070423 +| epoch 3 | 683/ 5600 batches | train loss 0.4052486 +| epoch 3 | 687/ 5600 batches | train loss 0.4327565 +| epoch 3 | 691/ 5600 batches | train loss 0.3900793 +| epoch 3 | 695/ 5600 batches | train loss 0.4678176 +| epoch 3 | 699/ 5600 batches | train loss 0.4328440 +| epoch 3 | 703/ 5600 batches | train loss 0.4142609 +| epoch 3 | 707/ 5600 batches | train loss 0.3781615 +| epoch 3 | 711/ 5600 batches | train loss 0.3935946 +| epoch 3 | 715/ 5600 batches | train loss 0.4730611 +| epoch 3 | 719/ 5600 batches | train loss 0.4141806 +| epoch 3 | 723/ 5600 batches | train loss 0.4292192 +| epoch 3 | 727/ 5600 batches | train loss 0.4940368 +| epoch 3 | 731/ 5600 batches | train loss 0.4701291 +| epoch 3 | 735/ 5600 batches | train loss 0.4530609 +| epoch 3 | 739/ 5600 batches | train loss 0.3770766 +| epoch 3 | 743/ 5600 batches | train loss 0.4122234 +| epoch 3 | 747/ 5600 batches | train loss 0.3639061 +| epoch 3 | 751/ 5600 batches | train loss 0.3560339 +| epoch 3 | 755/ 5600 batches | train loss 0.4146546 +| epoch 3 | 759/ 5600 batches | train loss 0.3949732 +| epoch 3 | 763/ 5600 batches | train loss 0.5387446 +| epoch 3 | 767/ 5600 batches | train loss 0.4996930 +| epoch 3 | 771/ 5600 batches | train loss 0.5116661 +| epoch 3 | 775/ 5600 batches | train loss 0.5198611 +| epoch 3 | 779/ 5600 batches | train loss 0.4662759 +| epoch 3 | 783/ 5600 batches | train loss 0.3617437 +| epoch 3 | 787/ 5600 batches | train loss 0.4673272 +| epoch 3 | 791/ 5600 batches | train loss 0.3980324 +| epoch 3 | 795/ 5600 batches | train loss 0.2042260 +| epoch 3 | 799/ 5600 batches | train loss 0.4527915 +| epoch 3 | 803/ 5600 batches | train loss 0.4462359 +| epoch 3 | 807/ 5600 batches | train loss 0.3765056 +| epoch 3 | 811/ 5600 batches | train loss 0.4492986 +| epoch 3 | 815/ 5600 batches | train loss 0.4523899 +| epoch 3 | 819/ 5600 batches | train loss 0.4491021 +| epoch 3 | 823/ 5600 batches | train loss 0.4691430 +| epoch 3 | 827/ 5600 batches | train loss 0.4282834 +| epoch 3 | 831/ 5600 batches | train loss 0.4446515 +| epoch 3 | 835/ 5600 batches | train loss 0.4599247 +| epoch 3 | 839/ 5600 batches | train loss 0.5724695 +| epoch 3 | 843/ 5600 batches | train loss 0.4649230 +| epoch 3 | 847/ 5600 batches | train loss 0.3304528 +| epoch 3 | 851/ 5600 batches | train loss 0.4785154 +| epoch 3 | 855/ 5600 batches | train loss 0.4192917 +| epoch 3 | 859/ 5600 batches | train loss 0.4898252 +| epoch 3 | 863/ 5600 batches | train loss 0.4929881 +| epoch 3 | 867/ 5600 batches | train loss 0.4249444 +| epoch 3 | 871/ 5600 batches | train loss 0.3639047 +| epoch 3 | 875/ 5600 batches | train loss 0.4205503 +| epoch 3 | 879/ 5600 batches | train loss 0.4926236 +| epoch 3 | 883/ 5600 batches | train loss 0.3886304 +| epoch 3 | 887/ 5600 batches | train loss 0.4223007 +| epoch 3 | 891/ 5600 batches | train loss 0.4216575 +| epoch 3 | 895/ 5600 batches | train loss 0.4198434 +| epoch 3 | 899/ 5600 batches | train loss 0.4382452 +| epoch 3 | 903/ 5600 batches | train loss 0.5138204 +| epoch 3 | 907/ 5600 batches | train loss 0.4110046 +| epoch 3 | 911/ 5600 batches | train loss 0.4918474 +| epoch 3 | 915/ 5600 batches | train loss 0.4689963 +| epoch 3 | 919/ 5600 batches | train loss 0.4942966 +| epoch 3 | 923/ 5600 batches | train loss 0.4603353 +| epoch 3 | 927/ 5600 batches | train loss 0.3810428 +| epoch 3 | 931/ 5600 batches | train loss 0.3793029 +| epoch 3 | 935/ 5600 batches | train loss 0.4872721 +| epoch 3 | 939/ 5600 batches | train loss 0.4783680 +| epoch 3 | 943/ 5600 batches | train loss 0.4207427 +| epoch 3 | 947/ 5600 batches | train loss 0.3842666 +| epoch 3 | 951/ 5600 batches | train loss 0.3878585 +| epoch 3 | 955/ 5600 batches | train loss 0.4177919 +| epoch 3 | 959/ 5600 batches | train loss 0.4977333 +| epoch 3 | 963/ 5600 batches | train loss 0.2927804 +| epoch 3 | 967/ 5600 batches | train loss 0.3739997 +| epoch 3 | 971/ 5600 batches | train loss 0.5036408 +| epoch 3 | 975/ 5600 batches | train loss 0.4677635 +| epoch 3 | 979/ 5600 batches | train loss 0.4358692 +| epoch 3 | 983/ 5600 batches | train loss 0.3811452 +| epoch 3 | 987/ 5600 batches | train loss 0.4353430 +| epoch 3 | 991/ 5600 batches | train loss 0.4355201 +| epoch 3 | 995/ 5600 batches | train loss 0.4180086 +| epoch 3 | 999/ 5600 batches | train loss 0.4581674 +| epoch 3 | 1003/ 5600 batches | train loss 0.4399130 +| epoch 3 | 1007/ 5600 batches | train loss 0.4566167 +| epoch 3 | 1011/ 5600 batches | train loss 0.3974931 +| epoch 3 | 1015/ 5600 batches | train loss 0.3637634 +| epoch 3 | 1019/ 5600 batches | train loss 0.4275913 +| epoch 3 | 1023/ 5600 batches | train loss 0.3741025 +| epoch 3 | 1027/ 5600 batches | train loss 0.4286814 +| epoch 3 | 1031/ 5600 batches | train loss 0.4195264 +| epoch 3 | 1035/ 5600 batches | train loss 0.4355162 +| epoch 3 | 1039/ 5600 batches | train loss 0.4240561 +| epoch 3 | 1043/ 5600 batches | train loss 0.4185912 +| epoch 3 | 1047/ 5600 batches | train loss 0.4607069 +| epoch 3 | 1051/ 5600 batches | train loss 0.4504687 +| epoch 3 | 1055/ 5600 batches | train loss 0.4991445 +| epoch 3 | 1059/ 5600 batches | train loss 0.4950244 +| epoch 3 | 1063/ 5600 batches | train loss 0.4002782 +| epoch 3 | 1067/ 5600 batches | train loss 0.4578390 +| epoch 3 | 1071/ 5600 batches | train loss 0.4002240 +| epoch 3 | 1075/ 5600 batches | train loss 0.4761685 +| epoch 3 | 1079/ 5600 batches | train loss 0.3956087 +| epoch 3 | 1083/ 5600 batches | train loss 0.6885037 +| epoch 3 | 1087/ 5600 batches | train loss 0.5040990 +| epoch 3 | 1091/ 5600 batches | train loss 0.4208385 +| epoch 3 | 1095/ 5600 batches | train loss 0.4392938 +| epoch 3 | 1099/ 5600 batches | train loss 0.3758072 +| epoch 3 | 1103/ 5600 batches | train loss 0.4426543 +| epoch 3 | 1107/ 5600 batches | train loss 0.4352590 +| epoch 3 | 1111/ 5600 batches | train loss 0.4831676 +| epoch 3 | 1115/ 5600 batches | train loss 0.4276823 +| epoch 3 | 1119/ 5600 batches | train loss 0.4157274 +| epoch 3 | 1123/ 5600 batches | train loss 0.4398035 +| epoch 3 | 1127/ 5600 batches | train loss 0.4446085 +| epoch 3 | 1131/ 5600 batches | train loss 0.4818802 +| epoch 3 | 1135/ 5600 batches | train loss 0.4336858 +| epoch 3 | 1139/ 5600 batches | train loss 0.3952922 +| epoch 3 | 1143/ 5600 batches | train loss 0.4563702 +| epoch 3 | 1147/ 5600 batches | train loss 0.3725477 +| epoch 3 | 1151/ 5600 batches | train loss 0.4711072 +| epoch 3 | 1155/ 5600 batches | train loss 0.3728612 +| epoch 3 | 1159/ 5600 batches | train loss 0.4526260 +| epoch 3 | 1163/ 5600 batches | train loss 0.4273610 +| epoch 3 | 1167/ 5600 batches | train loss 0.3641599 +| epoch 3 | 1171/ 5600 batches | train loss 0.3873287 +| epoch 3 | 1175/ 5600 batches | train loss 0.5005571 +| epoch 3 | 1179/ 5600 batches | train loss 0.4492449 +| epoch 3 | 1183/ 5600 batches | train loss 0.3893371 +| epoch 3 | 1187/ 5600 batches | train loss 0.3468922 +| epoch 3 | 1191/ 5600 batches | train loss 0.3821515 +| epoch 3 | 1195/ 5600 batches | train loss 0.3650825 +| epoch 3 | 1199/ 5600 batches | train loss 0.3403019 +| epoch 3 | 1203/ 5600 batches | train loss 0.3596080 +| epoch 3 | 1207/ 5600 batches | train loss 0.4703729 +| epoch 3 | 1211/ 5600 batches | train loss 0.4276102 +| epoch 3 | 1215/ 5600 batches | train loss 0.3819673 +| epoch 3 | 1219/ 5600 batches | train loss 0.4108375 +| epoch 3 | 1223/ 5600 batches | train loss 0.3685470 +| epoch 3 | 1227/ 5600 batches | train loss 0.3835032 +| epoch 3 | 1231/ 5600 batches | train loss 0.3656515 +| epoch 3 | 1235/ 5600 batches | train loss 0.4450539 +| epoch 3 | 1239/ 5600 batches | train loss 0.3433461 +| epoch 3 | 1243/ 5600 batches | train loss 0.3624646 +| epoch 3 | 1247/ 5600 batches | train loss 0.4171492 +| epoch 3 | 1251/ 5600 batches | train loss 0.4314134 +| epoch 3 | 1255/ 5600 batches | train loss 0.3740044 +| epoch 3 | 1259/ 5600 batches | train loss 0.4527427 +| epoch 3 | 1263/ 5600 batches | train loss 0.4518725 +| epoch 3 | 1267/ 5600 batches | train loss 0.3871491 +| epoch 3 | 1271/ 5600 batches | train loss 0.4827009 +| epoch 3 | 1275/ 5600 batches | train loss 0.4917135 +| epoch 3 | 1279/ 5600 batches | train loss 0.4285792 +| epoch 3 | 1283/ 5600 batches | train loss 0.4334384 +| epoch 3 | 1287/ 5600 batches | train loss 0.3808350 +| epoch 3 | 1291/ 5600 batches | train loss 0.4388579 +| epoch 3 | 1295/ 5600 batches | train loss 0.4943334 +| epoch 3 | 1299/ 5600 batches | train loss 0.4397905 +| epoch 3 | 1303/ 5600 batches | train loss 0.4661012 +| epoch 3 | 1307/ 5600 batches | train loss 0.4708762 +| epoch 3 | 1311/ 5600 batches | train loss 0.3873042 +| epoch 3 | 1315/ 5600 batches | train loss 0.3438948 +| epoch 3 | 1319/ 5600 batches | train loss 0.4847784 +| epoch 3 | 1323/ 5600 batches | train loss 0.4130503 +| epoch 3 | 1327/ 5600 batches | train loss 0.4101827 +| epoch 3 | 1331/ 5600 batches | train loss 0.4212003 +| epoch 3 | 1335/ 5600 batches | train loss 0.4518874 +| epoch 3 | 1339/ 5600 batches | train loss 0.4437512 +| epoch 3 | 1343/ 5600 batches | train loss 0.4387287 +| epoch 3 | 1347/ 5600 batches | train loss 0.3996288 +| epoch 3 | 1351/ 5600 batches | train loss 0.4547049 +| epoch 3 | 1355/ 5600 batches | train loss 0.4629323 +| epoch 3 | 1359/ 5600 batches | train loss 0.4835515 +| epoch 3 | 1363/ 5600 batches | train loss 0.5136691 +| epoch 3 | 1367/ 5600 batches | train loss 0.4092703 +| epoch 3 | 1371/ 5600 batches | train loss 0.3685819 +| epoch 3 | 1375/ 5600 batches | train loss 0.6626897 +| epoch 3 | 1379/ 5600 batches | train loss 0.4477869 +| epoch 3 | 1383/ 5600 batches | train loss 0.3636816 +| epoch 3 | 1387/ 5600 batches | train loss 0.5202063 +| epoch 3 | 1391/ 5600 batches | train loss 0.4254940 +| epoch 3 | 1395/ 5600 batches | train loss 0.5458819 +| epoch 3 | 1399/ 5600 batches | train loss 0.2968850 +| epoch 3 | 1403/ 5600 batches | train loss 0.4595971 +| epoch 3 | 1407/ 5600 batches | train loss 0.4396550 +| epoch 3 | 1411/ 5600 batches | train loss 0.4955142 +| epoch 3 | 1415/ 5600 batches | train loss 0.4991437 +| epoch 3 | 1419/ 5600 batches | train loss 0.4300356 +| epoch 3 | 1423/ 5600 batches | train loss 0.3520814 +| epoch 3 | 1427/ 5600 batches | train loss 0.3804162 +| epoch 3 | 1431/ 5600 batches | train loss 0.4942873 +| epoch 3 | 1435/ 5600 batches | train loss 0.3581997 +| epoch 3 | 1439/ 5600 batches | train loss 0.5019779 +| epoch 3 | 1443/ 5600 batches | train loss 0.4419092 +| epoch 3 | 1447/ 5600 batches | train loss 0.4545332 +| epoch 3 | 1451/ 5600 batches | train loss 0.4681365 +| epoch 3 | 1455/ 5600 batches | train loss 0.3862998 +| epoch 3 | 1459/ 5600 batches | train loss 0.4501408 +| epoch 3 | 1463/ 5600 batches | train loss 0.4959511 +| epoch 3 | 1467/ 5600 batches | train loss 0.5227900 +| epoch 3 | 1471/ 5600 batches | train loss 0.4146433 +| epoch 3 | 1475/ 5600 batches | train loss 0.5100217 +| epoch 3 | 1479/ 5600 batches | train loss 0.4175250 +| epoch 3 | 1483/ 5600 batches | train loss 0.4372972 +| epoch 3 | 1487/ 5600 batches | train loss 0.3590872 +| epoch 3 | 1491/ 5600 batches | train loss 0.4766079 +| epoch 3 | 1495/ 5600 batches | train loss 0.4378357 +| epoch 3 | 1499/ 5600 batches | train loss 0.4166994 +| epoch 3 | 1503/ 5600 batches | train loss 0.3397409 +| epoch 3 | 1507/ 5600 batches | train loss 0.4234741 +| epoch 3 | 1511/ 5600 batches | train loss 0.5280885 +| epoch 3 | 1515/ 5600 batches | train loss 0.4077690 +| epoch 3 | 1519/ 5600 batches | train loss 0.4341491 +| epoch 3 | 1523/ 5600 batches | train loss 0.5244962 +| epoch 3 | 1527/ 5600 batches | train loss 0.4044733 +| epoch 3 | 1531/ 5600 batches | train loss 0.3425521 +| epoch 3 | 1535/ 5600 batches | train loss 0.3805822 +| epoch 3 | 1539/ 5600 batches | train loss 0.5132736 +| epoch 3 | 1543/ 5600 batches | train loss 0.4108182 +| epoch 3 | 1547/ 5600 batches | train loss 0.4571716 +| epoch 3 | 1551/ 5600 batches | train loss 0.3611102 +| epoch 3 | 1555/ 5600 batches | train loss 0.3548743 +| epoch 3 | 1559/ 5600 batches | train loss 0.4370487 +| epoch 3 | 1563/ 5600 batches | train loss 0.4349970 +| epoch 3 | 1567/ 5600 batches | train loss 0.5381369 +| epoch 3 | 1571/ 5600 batches | train loss 0.4154075 +| epoch 3 | 1575/ 5600 batches | train loss 0.5270684 +| epoch 3 | 1579/ 5600 batches | train loss 0.4431670 +| epoch 3 | 1583/ 5600 batches | train loss 0.4122829 +| epoch 3 | 1587/ 5600 batches | train loss 0.3672583 +| epoch 3 | 1591/ 5600 batches | train loss 0.4026505 +| epoch 3 | 1595/ 5600 batches | train loss 0.4852601 +| epoch 3 | 1599/ 5600 batches | train loss 0.5256846 +| epoch 3 | 1603/ 5600 batches | train loss 0.3596553 +| epoch 3 | 1607/ 5600 batches | train loss 0.3814669 +| epoch 3 | 1611/ 5600 batches | train loss 0.4181747 +| epoch 3 | 1615/ 5600 batches | train loss 0.4810449 +| epoch 3 | 1619/ 5600 batches | train loss 0.5109079 +| epoch 3 | 1623/ 5600 batches | train loss 0.4670941 +| epoch 3 | 1627/ 5600 batches | train loss 0.4157443 +| epoch 3 | 1631/ 5600 batches | train loss 0.4618612 +| epoch 3 | 1635/ 5600 batches | train loss 0.4371753 +| epoch 3 | 1639/ 5600 batches | train loss 0.3610586 +| epoch 3 | 1643/ 5600 batches | train loss 0.4032206 +| epoch 3 | 1647/ 5600 batches | train loss 0.4274648 +| epoch 3 | 1651/ 5600 batches | train loss 0.4231732 +| epoch 3 | 1655/ 5600 batches | train loss 0.3592424 +| epoch 3 | 1659/ 5600 batches | train loss 0.4244173 +| epoch 3 | 1663/ 5600 batches | train loss 0.3331443 +| epoch 3 | 1667/ 5600 batches | train loss 0.4125217 +| epoch 3 | 1671/ 5600 batches | train loss 0.4183689 +| epoch 3 | 1675/ 5600 batches | train loss 0.4472950 +| epoch 3 | 1679/ 5600 batches | train loss 0.4589075 +| epoch 3 | 1683/ 5600 batches | train loss 0.4297032 +| epoch 3 | 1687/ 5600 batches | train loss 0.4368901 +| epoch 3 | 1691/ 5600 batches | train loss 0.4170852 +| epoch 3 | 1695/ 5600 batches | train loss 0.3490794 +| epoch 3 | 1699/ 5600 batches | train loss 0.4308161 +| epoch 3 | 1703/ 5600 batches | train loss 0.4443434 +| epoch 3 | 1707/ 5600 batches | train loss 0.4121268 +| epoch 3 | 1711/ 5600 batches | train loss 0.3740270 +| epoch 3 | 1715/ 5600 batches | train loss 0.4528727 +| epoch 3 | 1719/ 5600 batches | train loss 0.3559087 +| epoch 3 | 1723/ 5600 batches | train loss 0.4529122 +| epoch 3 | 1727/ 5600 batches | train loss 0.4939189 +| epoch 3 | 1731/ 5600 batches | train loss 0.4064519 +| epoch 3 | 1735/ 5600 batches | train loss 0.4493723 +| epoch 3 | 1739/ 5600 batches | train loss 0.3458565 +| epoch 3 | 1743/ 5600 batches | train loss 0.4502728 +| epoch 3 | 1747/ 5600 batches | train loss 0.4513890 +| epoch 3 | 1751/ 5600 batches | train loss 0.3825247 +| epoch 3 | 1755/ 5600 batches | train loss 0.3784358 +| epoch 3 | 1759/ 5600 batches | train loss 0.3978540 +| epoch 3 | 1763/ 5600 batches | train loss 0.4370799 +| epoch 3 | 1767/ 5600 batches | train loss 0.4196816 +| epoch 3 | 1771/ 5600 batches | train loss 0.4704643 +| epoch 3 | 1775/ 5600 batches | train loss 0.4342930 +| epoch 3 | 1779/ 5600 batches | train loss 0.4251638 +| epoch 3 | 1783/ 5600 batches | train loss 0.4820433 +| epoch 3 | 1787/ 5600 batches | train loss 0.4432017 +| epoch 3 | 1791/ 5600 batches | train loss 0.3840971 +| epoch 3 | 1795/ 5600 batches | train loss 0.4818520 +| epoch 3 | 1799/ 5600 batches | train loss 0.3870623 +| epoch 3 | 1803/ 5600 batches | train loss 0.4584417 +| epoch 3 | 1807/ 5600 batches | train loss 0.4024227 +| epoch 3 | 1811/ 5600 batches | train loss 0.4536687 +| epoch 3 | 1815/ 5600 batches | train loss 0.4563760 +| epoch 3 | 1819/ 5600 batches | train loss 0.4229677 +| epoch 3 | 1823/ 5600 batches | train loss 0.3732540 +| epoch 3 | 1827/ 5600 batches | train loss 0.3946611 +| epoch 3 | 1831/ 5600 batches | train loss 0.4639878 +| epoch 3 | 1835/ 5600 batches | train loss 0.3962396 +| epoch 3 | 1839/ 5600 batches | train loss 0.4397101 +| epoch 3 | 1843/ 5600 batches | train loss 0.4796125 +| epoch 3 | 1847/ 5600 batches | train loss 0.4802003 +| epoch 3 | 1851/ 5600 batches | train loss 0.4184443 +| epoch 3 | 1855/ 5600 batches | train loss 0.2856187 +| epoch 3 | 1859/ 5600 batches | train loss 0.4150177 +| epoch 3 | 1863/ 5600 batches | train loss 0.3685188 +| epoch 3 | 1867/ 5600 batches | train loss 0.3915876 +| epoch 3 | 1871/ 5600 batches | train loss 0.4316143 +| epoch 3 | 1875/ 5600 batches | train loss 0.3816447 +| epoch 3 | 1879/ 5600 batches | train loss 0.4001453 +| epoch 3 | 1883/ 5600 batches | train loss 0.2460160 +| epoch 3 | 1887/ 5600 batches | train loss 0.4560868 +| epoch 3 | 1891/ 5600 batches | train loss 0.4816352 +| epoch 3 | 1895/ 5600 batches | train loss 0.4731663 +| epoch 3 | 1899/ 5600 batches | train loss 0.4545487 +| epoch 3 | 1903/ 5600 batches | train loss 0.4828691 +| epoch 3 | 1907/ 5600 batches | train loss 0.4485236 +| epoch 3 | 1911/ 5600 batches | train loss 0.4488093 +| epoch 3 | 1915/ 5600 batches | train loss 0.4557838 +| epoch 3 | 1919/ 5600 batches | train loss 0.4382421 +| epoch 3 | 1923/ 5600 batches | train loss 0.5143098 +| epoch 3 | 1927/ 5600 batches | train loss 0.4720048 +| epoch 3 | 1931/ 5600 batches | train loss 0.4127953 +| epoch 3 | 1935/ 5600 batches | train loss 0.4913286 +| epoch 3 | 1939/ 5600 batches | train loss 0.4803854 +| epoch 3 | 1943/ 5600 batches | train loss 0.4417454 +| epoch 3 | 1947/ 5600 batches | train loss 0.4287038 +| epoch 3 | 1951/ 5600 batches | train loss 0.3948976 +| epoch 3 | 1955/ 5600 batches | train loss 0.3944772 +| epoch 3 | 1959/ 5600 batches | train loss 0.4417471 +| epoch 3 | 1963/ 5600 batches | train loss 0.4519566 +| epoch 3 | 1967/ 5600 batches | train loss 0.4813421 +| epoch 3 | 1971/ 5600 batches | train loss 0.3866684 +| epoch 3 | 1975/ 5600 batches | train loss 0.4856890 +| epoch 3 | 1979/ 5600 batches | train loss 0.4525528 +| epoch 3 | 1983/ 5600 batches | train loss 0.5642681 +| epoch 3 | 1987/ 5600 batches | train loss 0.4693024 +| epoch 3 | 1991/ 5600 batches | train loss 0.5269911 +| epoch 3 | 1995/ 5600 batches | train loss 0.4385034 +| epoch 3 | 1999/ 5600 batches | train loss 0.3671507 +| epoch 3 | 2003/ 5600 batches | train loss 0.4434347 +| epoch 3 | 2007/ 5600 batches | train loss 0.3778410 +| epoch 3 | 2011/ 5600 batches | train loss 0.3380603 +| epoch 3 | 2015/ 5600 batches | train loss 0.3247853 +| epoch 3 | 2019/ 5600 batches | train loss 0.4253907 +| epoch 3 | 2023/ 5600 batches | train loss 0.4431949 +| epoch 3 | 2027/ 5600 batches | train loss 0.5637120 +| epoch 3 | 2031/ 5600 batches | train loss 0.3317515 +| epoch 3 | 2035/ 5600 batches | train loss 0.4388918 +| epoch 3 | 2039/ 5600 batches | train loss 0.4704610 +| epoch 3 | 2043/ 5600 batches | train loss 0.4492747 +| epoch 3 | 2047/ 5600 batches | train loss 0.5266271 +| epoch 3 | 2051/ 5600 batches | train loss 0.5303801 +| epoch 3 | 2055/ 5600 batches | train loss 0.5534399 +| epoch 3 | 2059/ 5600 batches | train loss 0.4948167 +| epoch 3 | 2063/ 5600 batches | train loss 0.3823694 +| epoch 3 | 2067/ 5600 batches | train loss 0.3793412 +| epoch 3 | 2071/ 5600 batches | train loss 0.3330562 +| epoch 3 | 2075/ 5600 batches | train loss 0.4272141 +| epoch 3 | 2079/ 5600 batches | train loss 0.3949151 +| epoch 3 | 2083/ 5600 batches | train loss 0.4140544 +| epoch 3 | 2087/ 5600 batches | train loss 0.4308771 +| epoch 3 | 2091/ 5600 batches | train loss 0.5381265 +| epoch 3 | 2095/ 5600 batches | train loss 0.4119792 +| epoch 3 | 2099/ 5600 batches | train loss 0.5313280 +| epoch 3 | 2103/ 5600 batches | train loss 0.3840926 +| epoch 3 | 2107/ 5600 batches | train loss 0.4406453 +| epoch 3 | 2111/ 5600 batches | train loss 0.4103207 +| epoch 3 | 2115/ 5600 batches | train loss 0.3483848 +| epoch 3 | 2119/ 5600 batches | train loss 0.4454698 +| epoch 3 | 2123/ 5600 batches | train loss 0.3585933 +| epoch 3 | 2127/ 5600 batches | train loss 0.4460423 +| epoch 3 | 2131/ 5600 batches | train loss 0.4803803 +| epoch 3 | 2135/ 5600 batches | train loss 0.4275307 +| epoch 3 | 2139/ 5600 batches | train loss 0.5056057 +| epoch 3 | 2143/ 5600 batches | train loss 0.4175521 +| epoch 3 | 2147/ 5600 batches | train loss 0.3554904 +| epoch 3 | 2151/ 5600 batches | train loss 0.4815789 +| epoch 3 | 2155/ 5600 batches | train loss 0.4272939 +| epoch 3 | 2159/ 5600 batches | train loss 0.3233493 +| epoch 3 | 2163/ 5600 batches | train loss 0.3281955 +| epoch 3 | 2167/ 5600 batches | train loss 0.4382375 +| epoch 3 | 2171/ 5600 batches | train loss 0.3796132 +| epoch 3 | 2175/ 5600 batches | train loss 0.4596741 +| epoch 3 | 2179/ 5600 batches | train loss 0.4409761 +| epoch 3 | 2183/ 5600 batches | train loss 0.4275502 +| epoch 3 | 2187/ 5600 batches | train loss 0.4252467 +| epoch 3 | 2191/ 5600 batches | train loss 0.3086446 +| epoch 3 | 2195/ 5600 batches | train loss 0.3945689 +| epoch 3 | 2199/ 5600 batches | train loss 0.4846128 +| epoch 3 | 2203/ 5600 batches | train loss 0.4693360 +| epoch 3 | 2207/ 5600 batches | train loss 0.5589397 +| epoch 3 | 2211/ 5600 batches | train loss 0.4010406 +| epoch 3 | 2215/ 5600 batches | train loss 0.4350644 +| epoch 3 | 2219/ 5600 batches | train loss 0.4365893 +| epoch 3 | 2223/ 5600 batches | train loss 0.4018378 +| epoch 3 | 2227/ 5600 batches | train loss 0.4270203 +| epoch 3 | 2231/ 5600 batches | train loss 0.3655969 +| epoch 3 | 2235/ 5600 batches | train loss 0.4038168 +| epoch 3 | 2239/ 5600 batches | train loss 0.4561525 +| epoch 3 | 2243/ 5600 batches | train loss 0.4535766 +| epoch 3 | 2247/ 5600 batches | train loss 0.3916153 +| epoch 3 | 2251/ 5600 batches | train loss 0.4328668 +| epoch 3 | 2255/ 5600 batches | train loss 0.4250706 +| epoch 3 | 2259/ 5600 batches | train loss 0.4957989 +| epoch 3 | 2263/ 5600 batches | train loss 0.4160810 +| epoch 3 | 2267/ 5600 batches | train loss 0.4879517 +| epoch 3 | 2271/ 5600 batches | train loss 0.4104763 +| epoch 3 | 2275/ 5600 batches | train loss 0.4027666 +| epoch 3 | 2279/ 5600 batches | train loss 0.5586016 +| epoch 3 | 2283/ 5600 batches | train loss 0.4258828 +| epoch 3 | 2287/ 5600 batches | train loss 0.4200495 +| epoch 3 | 2291/ 5600 batches | train loss 0.4890905 +| epoch 3 | 2295/ 5600 batches | train loss 0.4669720 +| epoch 3 | 2299/ 5600 batches | train loss 0.4658792 +| epoch 3 | 2303/ 5600 batches | train loss 0.4172492 +| epoch 3 | 2307/ 5600 batches | train loss 0.4049175 +| epoch 3 | 2311/ 5600 batches | train loss 0.4715238 +| epoch 3 | 2315/ 5600 batches | train loss 0.4356102 +| epoch 3 | 2319/ 5600 batches | train loss 0.5180558 +| epoch 3 | 2323/ 5600 batches | train loss 0.4360089 +| epoch 3 | 2327/ 5600 batches | train loss 0.4498170 +| epoch 3 | 2331/ 5600 batches | train loss 0.3687477 +| epoch 3 | 2335/ 5600 batches | train loss 0.4481199 +| epoch 3 | 2339/ 5600 batches | train loss 0.4869759 +| epoch 3 | 2343/ 5600 batches | train loss 0.4268932 +| epoch 3 | 2347/ 5600 batches | train loss 0.3975822 +| epoch 3 | 2351/ 5600 batches | train loss 0.4494705 +| epoch 3 | 2355/ 5600 batches | train loss 0.3828277 +| epoch 3 | 2359/ 5600 batches | train loss 0.4268037 +| epoch 3 | 2363/ 5600 batches | train loss 0.4187635 +| epoch 3 | 2367/ 5600 batches | train loss 0.5862464 +| epoch 3 | 2371/ 5600 batches | train loss 0.4319946 +| epoch 3 | 2375/ 5600 batches | train loss 0.5008773 +| epoch 3 | 2379/ 5600 batches | train loss 0.4534963 +| epoch 3 | 2383/ 5600 batches | train loss 0.4400151 +| epoch 3 | 2387/ 5600 batches | train loss 0.4305742 +| epoch 3 | 2391/ 5600 batches | train loss 0.3923495 +| epoch 3 | 2395/ 5600 batches | train loss 0.4162983 +| epoch 3 | 2399/ 5600 batches | train loss 0.5248997 +| epoch 3 | 2403/ 5600 batches | train loss 0.4186242 +| epoch 3 | 2407/ 5600 batches | train loss 0.3390631 +| epoch 3 | 2411/ 5600 batches | train loss 0.4551070 +| epoch 3 | 2415/ 5600 batches | train loss 0.4004099 +| epoch 3 | 2419/ 5600 batches | train loss 0.3777315 +| epoch 3 | 2423/ 5600 batches | train loss 0.4391294 +| epoch 3 | 2427/ 5600 batches | train loss 0.3455018 +| epoch 3 | 2431/ 5600 batches | train loss 0.3817391 +| epoch 3 | 2435/ 5600 batches | train loss 0.3801083 +| epoch 3 | 2439/ 5600 batches | train loss 0.4347232 +| epoch 3 | 2443/ 5600 batches | train loss 0.4164736 +| epoch 3 | 2447/ 5600 batches | train loss 0.3075967 +| epoch 3 | 2451/ 5600 batches | train loss 0.4274512 +| epoch 3 | 2455/ 5600 batches | train loss 0.5151923 +| epoch 3 | 2459/ 5600 batches | train loss 0.4522912 +| epoch 3 | 2463/ 5600 batches | train loss 0.4253532 +| epoch 3 | 2467/ 5600 batches | train loss 0.4194879 +| epoch 3 | 2471/ 5600 batches | train loss 0.4440391 +| epoch 3 | 2475/ 5600 batches | train loss 0.4882143 +| epoch 3 | 2479/ 5600 batches | train loss 0.3906908 +| epoch 3 | 2483/ 5600 batches | train loss 0.4351588 +| epoch 3 | 2487/ 5600 batches | train loss 0.3750558 +| epoch 3 | 2491/ 5600 batches | train loss 0.4099638 +| epoch 3 | 2495/ 5600 batches | train loss 0.3287035 +| epoch 3 | 2499/ 5600 batches | train loss 0.3810215 +| epoch 3 | 2503/ 5600 batches | train loss 0.4259641 +| epoch 3 | 2507/ 5600 batches | train loss 0.3876151 +| epoch 3 | 2511/ 5600 batches | train loss 0.4150361 +| epoch 3 | 2515/ 5600 batches | train loss 0.4125715 +| epoch 3 | 2519/ 5600 batches | train loss 0.4141045 +| epoch 3 | 2523/ 5600 batches | train loss 0.3924472 +| epoch 3 | 2527/ 5600 batches | train loss 0.4595839 +| epoch 3 | 2531/ 5600 batches | train loss 0.4517287 +| epoch 3 | 2535/ 5600 batches | train loss 0.3565506 +| epoch 3 | 2539/ 5600 batches | train loss 0.4323031 +| epoch 3 | 2543/ 5600 batches | train loss 0.3691944 +| epoch 3 | 2547/ 5600 batches | train loss 0.4954442 +| epoch 3 | 2551/ 5600 batches | train loss 0.1551204 +| epoch 3 | 2555/ 5600 batches | train loss 0.4430085 +| epoch 3 | 2559/ 5600 batches | train loss 0.4727004 +| epoch 3 | 2563/ 5600 batches | train loss 0.3650306 +| epoch 3 | 2567/ 5600 batches | train loss 0.3959099 +| epoch 3 | 2571/ 5600 batches | train loss 0.4737254 +| epoch 3 | 2575/ 5600 batches | train loss 0.5266953 +| epoch 3 | 2579/ 5600 batches | train loss 0.3803502 +| epoch 3 | 2583/ 5600 batches | train loss 0.4702818 +| epoch 3 | 2587/ 5600 batches | train loss 0.3921571 +| epoch 3 | 2591/ 5600 batches | train loss 0.4504243 +| epoch 3 | 2595/ 5600 batches | train loss 0.4751556 +| epoch 3 | 2599/ 5600 batches | train loss 0.3682747 +| epoch 3 | 2603/ 5600 batches | train loss 0.4735054 +| epoch 3 | 2607/ 5600 batches | train loss 0.3725368 +| epoch 3 | 2611/ 5600 batches | train loss 0.4252416 +| epoch 3 | 2615/ 5600 batches | train loss 0.4047374 +| epoch 3 | 2619/ 5600 batches | train loss 0.4682484 +| epoch 3 | 2623/ 5600 batches | train loss 0.4149379 +| epoch 3 | 2627/ 5600 batches | train loss 0.3795521 +| epoch 3 | 2631/ 5600 batches | train loss 0.2971289 +| epoch 3 | 2635/ 5600 batches | train loss 0.4443499 +| epoch 3 | 2639/ 5600 batches | train loss 0.4290803 +| epoch 3 | 2643/ 5600 batches | train loss 0.4465398 +| epoch 3 | 2647/ 5600 batches | train loss 0.4982803 +| epoch 3 | 2651/ 5600 batches | train loss 0.4226602 +| epoch 3 | 2655/ 5600 batches | train loss 0.4611031 +| epoch 3 | 2659/ 5600 batches | train loss 0.4584899 +| epoch 3 | 2663/ 5600 batches | train loss 0.4402172 +| epoch 3 | 2667/ 5600 batches | train loss 0.4976718 +| epoch 3 | 2671/ 5600 batches | train loss 0.3793170 +| epoch 3 | 2675/ 5600 batches | train loss 0.4913320 +| epoch 3 | 2679/ 5600 batches | train loss 0.4621410 +| epoch 3 | 2683/ 5600 batches | train loss 0.4552673 +| epoch 3 | 2687/ 5600 batches | train loss 0.4356105 +| epoch 3 | 2691/ 5600 batches | train loss 0.5785509 +| epoch 3 | 2695/ 5600 batches | train loss 0.3790524 +| epoch 3 | 2699/ 5600 batches | train loss 0.4435970 +| epoch 3 | 2703/ 5600 batches | train loss 0.4323158 +| epoch 3 | 2707/ 5600 batches | train loss 0.5126864 +| epoch 3 | 2711/ 5600 batches | train loss 0.2140304 +| epoch 3 | 2715/ 5600 batches | train loss 0.4145501 +| epoch 3 | 2719/ 5600 batches | train loss 0.4598257 +| epoch 3 | 2723/ 5600 batches | train loss 0.4336384 +| epoch 3 | 2727/ 5600 batches | train loss 0.4212318 +| epoch 3 | 2731/ 5600 batches | train loss 0.4933722 +| epoch 3 | 2735/ 5600 batches | train loss 0.4808586 +| epoch 3 | 2739/ 5600 batches | train loss 0.3129785 +| epoch 3 | 2743/ 5600 batches | train loss 0.5569119 +| epoch 3 | 2747/ 5600 batches | train loss 0.4621509 +| epoch 3 | 2751/ 5600 batches | train loss 0.4198858 +| epoch 3 | 2755/ 5600 batches | train loss 0.4591388 +| epoch 3 | 2759/ 5600 batches | train loss 0.4574731 +| epoch 3 | 2763/ 5600 batches | train loss 0.4294079 +| epoch 3 | 2767/ 5600 batches | train loss 0.4992768 +| epoch 3 | 2771/ 5600 batches | train loss 0.4362155 +| epoch 3 | 2775/ 5600 batches | train loss 0.4596055 +| epoch 3 | 2779/ 5600 batches | train loss 0.4270012 +| epoch 3 | 2783/ 5600 batches | train loss 0.4782768 +| epoch 3 | 2787/ 5600 batches | train loss 0.4303513 +| epoch 3 | 2791/ 5600 batches | train loss 0.4444219 +| epoch 3 | 2795/ 5600 batches | train loss 0.4286734 +| epoch 3 | 2799/ 5600 batches | train loss 0.4298581 +| epoch 3 | 2803/ 5600 batches | train loss 0.4366199 +| epoch 3 | 2807/ 5600 batches | train loss 0.4368083 +| epoch 3 | 2811/ 5600 batches | train loss 0.4628545 +| epoch 3 | 2815/ 5600 batches | train loss 0.4166577 +| epoch 3 | 2819/ 5600 batches | train loss 0.4348794 +| epoch 3 | 2823/ 5600 batches | train loss 0.3929672 +| epoch 3 | 2827/ 5600 batches | train loss 0.5012010 +| epoch 3 | 2831/ 5600 batches | train loss 0.3431709 +| epoch 3 | 2835/ 5600 batches | train loss 0.4715192 +| epoch 3 | 2839/ 5600 batches | train loss 0.3581712 +| epoch 3 | 2843/ 5600 batches | train loss 0.3794836 +| epoch 3 | 2847/ 5600 batches | train loss 0.4485537 +| epoch 3 | 2851/ 5600 batches | train loss 0.3936853 +| epoch 3 | 2855/ 5600 batches | train loss 0.3969200 +| epoch 3 | 2859/ 5600 batches | train loss 0.5463589 +| epoch 3 | 2863/ 5600 batches | train loss 0.4333013 +| epoch 3 | 2867/ 5600 batches | train loss 0.4760898 +| epoch 3 | 2871/ 5600 batches | train loss 0.5450434 +| epoch 3 | 2875/ 5600 batches | train loss 0.4118249 +| epoch 3 | 2879/ 5600 batches | train loss 0.4080748 +| epoch 3 | 2883/ 5600 batches | train loss 0.4740202 +| epoch 3 | 2887/ 5600 batches | train loss 0.4816137 +| epoch 3 | 2891/ 5600 batches | train loss 0.4415770 +| epoch 3 | 2895/ 5600 batches | train loss 0.3826049 +| epoch 3 | 2899/ 5600 batches | train loss 0.5027894 +| epoch 3 | 2903/ 5600 batches | train loss 0.4095104 +| epoch 3 | 2907/ 5600 batches | train loss 0.4308964 +| epoch 3 | 2911/ 5600 batches | train loss 0.5537713 +| epoch 3 | 2915/ 5600 batches | train loss 0.5530798 +| epoch 3 | 2919/ 5600 batches | train loss 0.3547646 +| epoch 3 | 2923/ 5600 batches | train loss 0.4452949 +| epoch 3 | 2927/ 5600 batches | train loss 0.3612161 +| epoch 3 | 2931/ 5600 batches | train loss 0.4101580 +| epoch 3 | 2935/ 5600 batches | train loss 0.5065758 +| epoch 3 | 2939/ 5600 batches | train loss 0.4430166 +| epoch 3 | 2943/ 5600 batches | train loss 0.3781770 +| epoch 3 | 2947/ 5600 batches | train loss 0.3354407 +| epoch 3 | 2951/ 5600 batches | train loss 0.4005396 +| epoch 3 | 2955/ 5600 batches | train loss 0.4477542 +| epoch 3 | 2959/ 5600 batches | train loss 0.5100254 +| epoch 3 | 2963/ 5600 batches | train loss 0.3852847 +| epoch 3 | 2967/ 5600 batches | train loss 0.3966184 +| epoch 3 | 2971/ 5600 batches | train loss 0.4148644 +| epoch 3 | 2975/ 5600 batches | train loss 0.4834833 +| epoch 3 | 2979/ 5600 batches | train loss 0.3772628 +| epoch 3 | 2983/ 5600 batches | train loss 0.3225821 +| epoch 3 | 2987/ 5600 batches | train loss 0.4375557 +| epoch 3 | 2991/ 5600 batches | train loss 0.4550110 +| epoch 3 | 2995/ 5600 batches | train loss 0.5034972 +| epoch 3 | 2999/ 5600 batches | train loss 0.3081013 +| epoch 3 | 3003/ 5600 batches | train loss 0.4274390 +| epoch 3 | 3007/ 5600 batches | train loss 0.4562964 +| epoch 3 | 3011/ 5600 batches | train loss 0.4305654 +| epoch 3 | 3015/ 5600 batches | train loss 0.3531386 +| epoch 3 | 3019/ 5600 batches | train loss 0.4390167 +| epoch 3 | 3023/ 5600 batches | train loss 0.4377738 +| epoch 3 | 3027/ 5600 batches | train loss 0.4590520 +| epoch 3 | 3031/ 5600 batches | train loss 0.4629960 +| epoch 3 | 3035/ 5600 batches | train loss 0.4080343 +| epoch 3 | 3039/ 5600 batches | train loss 0.4430034 +| epoch 3 | 3043/ 5600 batches | train loss 0.4438215 +| epoch 3 | 3047/ 5600 batches | train loss 0.4577793 +| epoch 3 | 3051/ 5600 batches | train loss 0.5192935 +| epoch 3 | 3055/ 5600 batches | train loss 0.4712799 +| epoch 3 | 3059/ 5600 batches | train loss 0.5036676 +| epoch 3 | 3063/ 5600 batches | train loss 0.4585949 +| epoch 3 | 3067/ 5600 batches | train loss 0.4072976 +| epoch 3 | 3071/ 5600 batches | train loss 0.4870169 +| epoch 3 | 3075/ 5600 batches | train loss 0.4016454 +| epoch 3 | 3079/ 5600 batches | train loss 0.4843413 +| epoch 3 | 3083/ 5600 batches | train loss 0.3254707 +| epoch 3 | 3087/ 5600 batches | train loss 0.4301428 +| epoch 3 | 3091/ 5600 batches | train loss 0.5660285 +| epoch 3 | 3095/ 5600 batches | train loss 0.3854578 +| epoch 3 | 3099/ 5600 batches | train loss 0.3682542 +| epoch 3 | 3103/ 5600 batches | train loss 0.3945570 +| epoch 3 | 3107/ 5600 batches | train loss 0.3525804 +| epoch 3 | 3111/ 5600 batches | train loss 0.4358040 +| epoch 3 | 3115/ 5600 batches | train loss 0.3573955 +| epoch 3 | 3119/ 5600 batches | train loss 0.4773556 +| epoch 3 | 3123/ 5600 batches | train loss 0.4260148 +| epoch 3 | 3127/ 5600 batches | train loss 0.4297944 +| epoch 3 | 3131/ 5600 batches | train loss 0.3955159 +| epoch 3 | 3135/ 5600 batches | train loss 0.4731648 +| epoch 3 | 3139/ 5600 batches | train loss 0.4463026 +| epoch 3 | 3143/ 5600 batches | train loss 0.3972510 +| epoch 3 | 3147/ 5600 batches | train loss 0.3858820 +| epoch 3 | 3151/ 5600 batches | train loss 0.4440197 +| epoch 3 | 3155/ 5600 batches | train loss 0.4074055 +| epoch 3 | 3159/ 5600 batches | train loss 0.4131092 +| epoch 3 | 3163/ 5600 batches | train loss 0.4553736 +| epoch 3 | 3167/ 5600 batches | train loss 0.4545988 +| epoch 3 | 3171/ 5600 batches | train loss 0.3514225 +| epoch 3 | 3175/ 5600 batches | train loss 0.4442571 +| epoch 3 | 3179/ 5600 batches | train loss 0.4617481 +| epoch 3 | 3183/ 5600 batches | train loss 0.4474911 +| epoch 3 | 3187/ 5600 batches | train loss 0.4798763 +| epoch 3 | 3191/ 5600 batches | train loss 0.4702465 +| epoch 3 | 3195/ 5600 batches | train loss 0.3897248 +| epoch 3 | 3199/ 5600 batches | train loss 0.4329901 +| epoch 3 | 3203/ 5600 batches | train loss 0.3963115 +| epoch 3 | 3207/ 5600 batches | train loss 0.4373814 +| epoch 3 | 3211/ 5600 batches | train loss 0.3918580 +| epoch 3 | 3215/ 5600 batches | train loss 0.4109465 +| epoch 3 | 3219/ 5600 batches | train loss 0.4375607 +| epoch 3 | 3223/ 5600 batches | train loss 0.3975623 +| epoch 3 | 3227/ 5600 batches | train loss 0.5218459 +| epoch 3 | 3231/ 5600 batches | train loss 0.4287913 +| epoch 3 | 3235/ 5600 batches | train loss 0.5510711 +| epoch 3 | 3239/ 5600 batches | train loss 0.2926959 +| epoch 3 | 3243/ 5600 batches | train loss 0.4233548 +| epoch 3 | 3247/ 5600 batches | train loss 0.5410064 +| epoch 3 | 3251/ 5600 batches | train loss 0.4535908 +| epoch 3 | 3255/ 5600 batches | train loss 0.4295977 +| epoch 3 | 3259/ 5600 batches | train loss 0.4853255 +| epoch 3 | 3263/ 5600 batches | train loss 0.5207707 +| epoch 3 | 3267/ 5600 batches | train loss 0.4626985 +| epoch 3 | 3271/ 5600 batches | train loss 0.4013826 +| epoch 3 | 3275/ 5600 batches | train loss 0.3977903 +| epoch 3 | 3279/ 5600 batches | train loss 0.4318963 +| epoch 3 | 3283/ 5600 batches | train loss 0.4649351 +| epoch 3 | 3287/ 5600 batches | train loss 0.4214757 +| epoch 3 | 3291/ 5600 batches | train loss 0.5020729 +| epoch 3 | 3295/ 5600 batches | train loss 0.4569792 +| epoch 3 | 3299/ 5600 batches | train loss 0.4051960 +| epoch 3 | 3303/ 5600 batches | train loss 0.4324094 +| epoch 3 | 3307/ 5600 batches | train loss 0.4638357 +| epoch 3 | 3311/ 5600 batches | train loss 0.3840593 +| epoch 3 | 3315/ 5600 batches | train loss 0.4145429 +| epoch 3 | 3319/ 5600 batches | train loss 0.5298375 +| epoch 3 | 3323/ 5600 batches | train loss 0.4345099 +| epoch 3 | 3327/ 5600 batches | train loss 0.3982954 +| epoch 3 | 3331/ 5600 batches | train loss 0.5593737 +| epoch 3 | 3335/ 5600 batches | train loss 0.4223287 +| epoch 3 | 3339/ 5600 batches | train loss 0.4226636 +| epoch 3 | 3343/ 5600 batches | train loss 0.4077691 +| epoch 3 | 3347/ 5600 batches | train loss 0.4073271 +| epoch 3 | 3351/ 5600 batches | train loss 0.3771558 +| epoch 3 | 3355/ 5600 batches | train loss 0.3760895 +| epoch 3 | 3359/ 5600 batches | train loss 0.4147867 +| epoch 3 | 3363/ 5600 batches | train loss 0.4702327 +| epoch 3 | 3367/ 5600 batches | train loss 0.4336767 +| epoch 3 | 3371/ 5600 batches | train loss 0.4259540 +| epoch 3 | 3375/ 5600 batches | train loss 0.4279770 +| epoch 3 | 3379/ 5600 batches | train loss 0.5467288 +| epoch 3 | 3383/ 5600 batches | train loss 0.4382202 +| epoch 3 | 3387/ 5600 batches | train loss 0.3753645 +| epoch 3 | 3391/ 5600 batches | train loss 0.4691730 +| epoch 3 | 3395/ 5600 batches | train loss 0.3836089 +| epoch 3 | 3399/ 5600 batches | train loss 0.4567532 +| epoch 3 | 3403/ 5600 batches | train loss 0.4449148 +| epoch 3 | 3407/ 5600 batches | train loss 0.4155768 +| epoch 3 | 3411/ 5600 batches | train loss 0.4495782 +| epoch 3 | 3415/ 5600 batches | train loss 0.4921300 +| epoch 3 | 3419/ 5600 batches | train loss 0.3600819 +| epoch 3 | 3423/ 5600 batches | train loss 0.4559231 +| epoch 3 | 3427/ 5600 batches | train loss 0.4520246 +| epoch 3 | 3431/ 5600 batches | train loss 0.4553074 +| epoch 3 | 3435/ 5600 batches | train loss 0.4216723 +| epoch 3 | 3439/ 5600 batches | train loss 0.4298914 +| epoch 3 | 3443/ 5600 batches | train loss 0.4442363 +| epoch 3 | 3447/ 5600 batches | train loss 0.3162858 +| epoch 3 | 3451/ 5600 batches | train loss 0.3646352 +| epoch 3 | 3455/ 5600 batches | train loss 0.4868588 +| epoch 3 | 3459/ 5600 batches | train loss 0.4043668 +| epoch 3 | 3463/ 5600 batches | train loss 0.4399540 +| epoch 3 | 3467/ 5600 batches | train loss 0.4780186 +| epoch 3 | 3471/ 5600 batches | train loss 0.5374588 +| epoch 3 | 3475/ 5600 batches | train loss 0.4096246 +| epoch 3 | 3479/ 5600 batches | train loss 0.4345670 +| epoch 3 | 3483/ 5600 batches | train loss 0.4327785 +| epoch 3 | 3487/ 5600 batches | train loss 0.4952297 +| epoch 3 | 3491/ 5600 batches | train loss 0.4109493 +| epoch 3 | 3495/ 5600 batches | train loss 0.4685126 +| epoch 3 | 3499/ 5600 batches | train loss 0.3471649 +| epoch 3 | 3503/ 5600 batches | train loss 0.3691339 +| epoch 3 | 3507/ 5600 batches | train loss 0.4430428 +| epoch 3 | 3511/ 5600 batches | train loss 0.4289240 +| epoch 3 | 3515/ 5600 batches | train loss 0.4450721 +| epoch 3 | 3519/ 5600 batches | train loss 0.4974033 +| epoch 3 | 3523/ 5600 batches | train loss 0.5365025 +| epoch 3 | 3527/ 5600 batches | train loss 0.4823743 +| epoch 3 | 3531/ 5600 batches | train loss 0.4450272 +| epoch 3 | 3535/ 5600 batches | train loss 0.4812955 +| epoch 3 | 3539/ 5600 batches | train loss 0.4472985 +| epoch 3 | 3543/ 5600 batches | train loss 0.4561077 +| epoch 3 | 3547/ 5600 batches | train loss 0.3487449 +| epoch 3 | 3551/ 5600 batches | train loss 0.4375067 +| epoch 3 | 3555/ 5600 batches | train loss 0.4873664 +| epoch 3 | 3559/ 5600 batches | train loss 0.4256160 +| epoch 3 | 3563/ 5600 batches | train loss 0.3191845 +| epoch 3 | 3567/ 5600 batches | train loss 0.4286108 +| epoch 3 | 3571/ 5600 batches | train loss 0.3941973 +| epoch 3 | 3575/ 5600 batches | train loss 0.4903461 +| epoch 3 | 3579/ 5600 batches | train loss 0.4608886 +| epoch 3 | 3583/ 5600 batches | train loss 0.4443572 +| epoch 3 | 3587/ 5600 batches | train loss 0.5057045 +| epoch 3 | 3591/ 5600 batches | train loss 0.5108396 +| epoch 3 | 3595/ 5600 batches | train loss 0.4263963 +| epoch 3 | 3599/ 5600 batches | train loss 0.5157658 +| epoch 3 | 3603/ 5600 batches | train loss 0.4676625 +| epoch 3 | 3607/ 5600 batches | train loss 0.5308497 +| epoch 3 | 3611/ 5600 batches | train loss 0.4445147 +| epoch 3 | 3615/ 5600 batches | train loss 0.4941773 +| epoch 3 | 3619/ 5600 batches | train loss 0.4644143 +| epoch 3 | 3623/ 5600 batches | train loss 0.4263128 +| epoch 3 | 3627/ 5600 batches | train loss 0.4302723 +| epoch 3 | 3631/ 5600 batches | train loss 0.4751206 +| epoch 3 | 3635/ 5600 batches | train loss 0.4662962 +| epoch 3 | 3639/ 5600 batches | train loss 0.4236796 +| epoch 3 | 3643/ 5600 batches | train loss 0.4056597 +| epoch 3 | 3647/ 5600 batches | train loss 0.3494277 +| epoch 3 | 3651/ 5600 batches | train loss 0.4171693 +| epoch 3 | 3655/ 5600 batches | train loss 0.3127797 +| epoch 3 | 3659/ 5600 batches | train loss 0.4186969 +| epoch 3 | 3663/ 5600 batches | train loss 0.3856733 +| epoch 3 | 3667/ 5600 batches | train loss 0.4103229 +| epoch 3 | 3671/ 5600 batches | train loss 0.4457956 +| epoch 3 | 3675/ 5600 batches | train loss 0.3642429 +| epoch 3 | 3679/ 5600 batches | train loss 0.4220151 +| epoch 3 | 3683/ 5600 batches | train loss 0.4204248 +| epoch 3 | 3687/ 5600 batches | train loss 0.4710018 +| epoch 3 | 3691/ 5600 batches | train loss 0.5073074 +| epoch 3 | 3695/ 5600 batches | train loss 0.5583131 +| epoch 3 | 3699/ 5600 batches | train loss 0.4663105 +| epoch 3 | 3703/ 5600 batches | train loss 0.4761376 +| epoch 3 | 3707/ 5600 batches | train loss 0.3475510 +| epoch 3 | 3711/ 5600 batches | train loss 0.4173228 +| epoch 3 | 3715/ 5600 batches | train loss 0.4859571 +| epoch 3 | 3719/ 5600 batches | train loss 0.5233752 +| epoch 3 | 3723/ 5600 batches | train loss 0.4288297 +| epoch 3 | 3727/ 5600 batches | train loss 0.4305155 +| epoch 3 | 3731/ 5600 batches | train loss 0.3591367 +| epoch 3 | 3735/ 5600 batches | train loss 0.4169669 +| epoch 3 | 3739/ 5600 batches | train loss 0.3463371 +| epoch 3 | 3743/ 5600 batches | train loss 0.4409924 +| epoch 3 | 3747/ 5600 batches | train loss 0.4186211 +| epoch 3 | 3751/ 5600 batches | train loss 0.4520994 +| epoch 3 | 3755/ 5600 batches | train loss 0.4273426 +| epoch 3 | 3759/ 5600 batches | train loss 0.4537365 +| epoch 3 | 3763/ 5600 batches | train loss 0.4206858 +| epoch 3 | 3767/ 5600 batches | train loss 0.4727583 +| epoch 3 | 3771/ 5600 batches | train loss 0.4265926 +| epoch 3 | 3775/ 5600 batches | train loss 0.4533525 +| epoch 3 | 3779/ 5600 batches | train loss 0.5788863 +| epoch 3 | 3783/ 5600 batches | train loss 0.4683944 +| epoch 3 | 3787/ 5600 batches | train loss 0.4326690 +| epoch 3 | 3791/ 5600 batches | train loss 0.4439241 +| epoch 3 | 3795/ 5600 batches | train loss 0.4761970 +| epoch 3 | 3799/ 5600 batches | train loss 0.3921517 +| epoch 3 | 3803/ 5600 batches | train loss 0.3845629 +| epoch 3 | 3807/ 5600 batches | train loss 0.4715689 +| epoch 3 | 3811/ 5600 batches | train loss 0.4023253 +| epoch 3 | 3815/ 5600 batches | train loss 0.3694595 +| epoch 3 | 3819/ 5600 batches | train loss 0.4321651 +| epoch 3 | 3823/ 5600 batches | train loss 0.4625160 +| epoch 3 | 3827/ 5600 batches | train loss 0.4372713 +| epoch 3 | 3831/ 5600 batches | train loss 0.5050381 +| epoch 3 | 3835/ 5600 batches | train loss 0.4232335 +| epoch 3 | 3839/ 5600 batches | train loss 0.5075492 +| epoch 3 | 3843/ 5600 batches | train loss 0.4631831 +| epoch 3 | 3847/ 5600 batches | train loss 0.5071313 +| epoch 3 | 3851/ 5600 batches | train loss 0.5018843 +| epoch 3 | 3855/ 5600 batches | train loss 0.4325926 +| epoch 3 | 3859/ 5600 batches | train loss 0.4321020 +| epoch 3 | 3863/ 5600 batches | train loss 0.3786122 +| epoch 3 | 3867/ 5600 batches | train loss 0.4737415 +| epoch 3 | 3871/ 5600 batches | train loss 0.4737089 +| epoch 3 | 3875/ 5600 batches | train loss 0.4192596 +| epoch 3 | 3879/ 5600 batches | train loss 0.4410571 +| epoch 3 | 3883/ 5600 batches | train loss 0.4488150 +| epoch 3 | 3887/ 5600 batches | train loss 0.3937226 +| epoch 3 | 3891/ 5600 batches | train loss 0.4042550 +| epoch 3 | 3895/ 5600 batches | train loss 0.4322048 +| epoch 3 | 3899/ 5600 batches | train loss 0.3668105 +| epoch 3 | 3903/ 5600 batches | train loss 0.4832436 +| epoch 3 | 3907/ 5600 batches | train loss 0.4399462 +| epoch 3 | 3911/ 5600 batches | train loss 0.3212576 +| epoch 3 | 3915/ 5600 batches | train loss 0.4086830 +| epoch 3 | 3919/ 5600 batches | train loss 0.4059554 +| epoch 3 | 3923/ 5600 batches | train loss 0.3842001 +| epoch 3 | 3927/ 5600 batches | train loss 0.4513513 +| epoch 3 | 3931/ 5600 batches | train loss 0.4971955 +| epoch 3 | 3935/ 5600 batches | train loss 0.4393809 +| epoch 3 | 3939/ 5600 batches | train loss 0.4438955 +| epoch 3 | 3943/ 5600 batches | train loss 0.4305923 +| epoch 3 | 3947/ 5600 batches | train loss 0.3371347 +| epoch 3 | 3951/ 5600 batches | train loss 0.4686104 +| epoch 3 | 3955/ 5600 batches | train loss 0.5757469 +| epoch 3 | 3959/ 5600 batches | train loss 0.4182433 +| epoch 3 | 3963/ 5600 batches | train loss 0.5571293 +| epoch 3 | 3967/ 5600 batches | train loss 0.4803419 +| epoch 3 | 3971/ 5600 batches | train loss 0.3817767 +| epoch 3 | 3975/ 5600 batches | train loss 0.4153407 +| epoch 3 | 3979/ 5600 batches | train loss 0.4577505 +| epoch 3 | 3983/ 5600 batches | train loss 0.4079793 +| epoch 3 | 3987/ 5600 batches | train loss 0.3883488 +| epoch 3 | 3991/ 5600 batches | train loss 0.3871285 +| epoch 3 | 3995/ 5600 batches | train loss 0.4218844 +| epoch 3 | 3999/ 5600 batches | train loss 0.3979127 +| epoch 3 | 4003/ 5600 batches | train loss 0.4308647 +| epoch 3 | 4007/ 5600 batches | train loss 0.3592063 +| epoch 3 | 4011/ 5600 batches | train loss 0.4627221 +| epoch 3 | 4015/ 5600 batches | train loss 0.4393153 +| epoch 3 | 4019/ 5600 batches | train loss 0.4096427 +| epoch 3 | 4023/ 5600 batches | train loss 0.4342410 +| epoch 3 | 4027/ 5600 batches | train loss 0.4083153 +| epoch 3 | 4031/ 5600 batches | train loss 0.3668137 +| epoch 3 | 4035/ 5600 batches | train loss 0.3954478 +| epoch 3 | 4039/ 5600 batches | train loss 0.4573950 +| epoch 3 | 4043/ 5600 batches | train loss 0.3783955 +| epoch 3 | 4047/ 5600 batches | train loss 0.4296241 +| epoch 3 | 4051/ 5600 batches | train loss 0.4727985 +| epoch 3 | 4055/ 5600 batches | train loss 0.3487841 +| epoch 3 | 4059/ 5600 batches | train loss 0.4694033 +| epoch 3 | 4063/ 5600 batches | train loss 0.4415171 +| epoch 3 | 4067/ 5600 batches | train loss 0.4795317 +| epoch 3 | 4071/ 5600 batches | train loss 0.3840500 +| epoch 3 | 4075/ 5600 batches | train loss 0.4421729 +| epoch 3 | 4079/ 5600 batches | train loss 0.5082331 +| epoch 3 | 4083/ 5600 batches | train loss 0.4638847 +| epoch 3 | 4087/ 5600 batches | train loss 0.4120113 +| epoch 3 | 4091/ 5600 batches | train loss 0.4128400 +| epoch 3 | 4095/ 5600 batches | train loss 0.5214349 +| epoch 3 | 4099/ 5600 batches | train loss 0.4438614 +| epoch 3 | 4103/ 5600 batches | train loss 0.3931668 +| epoch 3 | 4107/ 5600 batches | train loss 0.4195053 +| epoch 3 | 4111/ 5600 batches | train loss 0.3421085 +| epoch 3 | 4115/ 5600 batches | train loss 0.5253610 +| epoch 3 | 4119/ 5600 batches | train loss 0.3995913 +| epoch 3 | 4123/ 5600 batches | train loss 0.4286236 +| epoch 3 | 4127/ 5600 batches | train loss 0.4040203 +| epoch 3 | 4131/ 5600 batches | train loss 0.4449353 +| epoch 3 | 4135/ 5600 batches | train loss 0.5708619 +| epoch 3 | 4139/ 5600 batches | train loss 0.4237941 +| epoch 3 | 4143/ 5600 batches | train loss 0.4661436 +| epoch 3 | 4147/ 5600 batches | train loss 0.4634136 +| epoch 3 | 4151/ 5600 batches | train loss 0.3887826 +| epoch 3 | 4155/ 5600 batches | train loss 0.3970323 +| epoch 3 | 4159/ 5600 batches | train loss 0.4096599 +| epoch 3 | 4163/ 5600 batches | train loss 0.3742344 +| epoch 3 | 4167/ 5600 batches | train loss 0.4282011 +| epoch 3 | 4171/ 5600 batches | train loss 0.4834057 +| epoch 3 | 4175/ 5600 batches | train loss 0.4513690 +| epoch 3 | 4179/ 5600 batches | train loss 0.4007629 +| epoch 3 | 4183/ 5600 batches | train loss 0.4135308 +| epoch 3 | 4187/ 5600 batches | train loss 0.4456842 +| epoch 3 | 4191/ 5600 batches | train loss 0.5250396 +| epoch 3 | 4195/ 5600 batches | train loss 0.3873049 +| epoch 3 | 4199/ 5600 batches | train loss 0.4648950 +| epoch 3 | 4203/ 5600 batches | train loss 0.4733424 +| epoch 3 | 4207/ 5600 batches | train loss 0.4036259 +| epoch 3 | 4211/ 5600 batches | train loss 0.3842074 +| epoch 3 | 4215/ 5600 batches | train loss 0.4210353 +| epoch 3 | 4219/ 5600 batches | train loss 0.5508729 +| epoch 3 | 4223/ 5600 batches | train loss 0.4197926 +| epoch 3 | 4227/ 5600 batches | train loss 0.4053120 +| epoch 3 | 4231/ 5600 batches | train loss 0.4223233 +| epoch 3 | 4235/ 5600 batches | train loss 0.4367629 +| epoch 3 | 4239/ 5600 batches | train loss 0.4121823 +| epoch 3 | 4243/ 5600 batches | train loss 0.5281600 +| epoch 3 | 4247/ 5600 batches | train loss 0.3167803 +| epoch 3 | 4251/ 5600 batches | train loss 0.4048374 +| epoch 3 | 4255/ 5600 batches | train loss 0.4768332 +| epoch 3 | 4259/ 5600 batches | train loss 0.5653028 +| epoch 3 | 4263/ 5600 batches | train loss 0.4849761 +| epoch 3 | 4267/ 5600 batches | train loss 0.4895610 +| epoch 3 | 4271/ 5600 batches | train loss 0.3821619 +| epoch 3 | 4275/ 5600 batches | train loss 0.4665497 +| epoch 3 | 4279/ 5600 batches | train loss 0.5345424 +| epoch 3 | 4283/ 5600 batches | train loss 0.3774989 +| epoch 3 | 4287/ 5600 batches | train loss 0.4874251 +| epoch 3 | 4291/ 5600 batches | train loss 0.3891714 +| epoch 3 | 4295/ 5600 batches | train loss 0.5031585 +| epoch 3 | 4299/ 5600 batches | train loss 0.4468787 +| epoch 3 | 4303/ 5600 batches | train loss 0.4365646 +| epoch 3 | 4307/ 5600 batches | train loss 0.4533605 +| epoch 3 | 4311/ 5600 batches | train loss 0.4076800 +| epoch 3 | 4315/ 5600 batches | train loss 0.4360568 +| epoch 3 | 4319/ 5600 batches | train loss 0.4584280 +| epoch 3 | 4323/ 5600 batches | train loss 0.5141556 +| epoch 3 | 4327/ 5600 batches | train loss 0.4022160 +| epoch 3 | 4331/ 5600 batches | train loss 0.3689939 +| epoch 3 | 4335/ 5600 batches | train loss 0.4582818 +| epoch 3 | 4339/ 5600 batches | train loss 0.4882094 +| epoch 3 | 4343/ 5600 batches | train loss 0.4343041 +| epoch 3 | 4347/ 5600 batches | train loss 0.4077682 +| epoch 3 | 4351/ 5600 batches | train loss 0.4348923 +| epoch 3 | 4355/ 5600 batches | train loss 0.3387442 +| epoch 3 | 4359/ 5600 batches | train loss 0.4072863 +| epoch 3 | 4363/ 5600 batches | train loss 0.4493979 +| epoch 3 | 4367/ 5600 batches | train loss 0.4273002 +| epoch 3 | 4371/ 5600 batches | train loss 0.4872247 +| epoch 3 | 4375/ 5600 batches | train loss 0.4430691 +| epoch 3 | 4379/ 5600 batches | train loss 0.4315844 +| epoch 3 | 4383/ 5600 batches | train loss 0.3656222 +| epoch 3 | 4387/ 5600 batches | train loss 0.4854535 +| epoch 3 | 4391/ 5600 batches | train loss 0.4014715 +| epoch 3 | 4395/ 5600 batches | train loss 0.4171208 +| epoch 3 | 4399/ 5600 batches | train loss 0.5214595 +| epoch 3 | 4403/ 5600 batches | train loss 0.5083359 +| epoch 3 | 4407/ 5600 batches | train loss 0.3849486 +| epoch 3 | 4411/ 5600 batches | train loss 0.5102412 +| epoch 3 | 4415/ 5600 batches | train loss 0.3941130 +| epoch 3 | 4419/ 5600 batches | train loss 0.4011929 +| epoch 3 | 4423/ 5600 batches | train loss 0.4407375 +| epoch 3 | 4427/ 5600 batches | train loss 0.3343518 +| epoch 3 | 4431/ 5600 batches | train loss 0.4207348 +| epoch 3 | 4435/ 5600 batches | train loss 0.4411889 +| epoch 3 | 4439/ 5600 batches | train loss 0.5212187 +| epoch 3 | 4443/ 5600 batches | train loss 0.4807609 +| epoch 3 | 4447/ 5600 batches | train loss 0.4572819 +| epoch 3 | 4451/ 5600 batches | train loss 0.4263496 +| epoch 3 | 4455/ 5600 batches | train loss 0.5044882 +| epoch 3 | 4459/ 5600 batches | train loss 0.3536373 +| epoch 3 | 4463/ 5600 batches | train loss 0.4339090 +| epoch 3 | 4467/ 5600 batches | train loss 0.3790696 +| epoch 3 | 4471/ 5600 batches | train loss 0.4157659 +| epoch 3 | 4475/ 5600 batches | train loss 0.4247003 +| epoch 3 | 4479/ 5600 batches | train loss 0.3720878 +| epoch 3 | 4483/ 5600 batches | train loss 0.4250699 +| epoch 3 | 4487/ 5600 batches | train loss 0.4819219 +| epoch 3 | 4491/ 5600 batches | train loss 0.4920383 +| epoch 3 | 4495/ 5600 batches | train loss 0.3864862 +| epoch 3 | 4499/ 5600 batches | train loss 0.4379697 +| epoch 3 | 4503/ 5600 batches | train loss 0.4771738 +| epoch 3 | 4507/ 5600 batches | train loss 0.4270452 +| epoch 3 | 4511/ 5600 batches | train loss 0.4200868 +| epoch 3 | 4515/ 5600 batches | train loss 0.4454346 +| epoch 3 | 4519/ 5600 batches | train loss 0.5489385 +| epoch 3 | 4523/ 5600 batches | train loss 0.5055290 +| epoch 3 | 4527/ 5600 batches | train loss 0.3993215 +| epoch 3 | 4531/ 5600 batches | train loss 0.4257913 +| epoch 3 | 4535/ 5600 batches | train loss 0.3377058 +| epoch 3 | 4539/ 5600 batches | train loss 0.4662780 +| epoch 3 | 4543/ 5600 batches | train loss 0.3961416 +| epoch 3 | 4547/ 5600 batches | train loss 0.5278471 +| epoch 3 | 4551/ 5600 batches | train loss 0.2948909 +| epoch 3 | 4555/ 5600 batches | train loss 0.4420178 +| epoch 3 | 4559/ 5600 batches | train loss 0.4714656 +| epoch 3 | 4563/ 5600 batches | train loss 0.4866615 +| epoch 3 | 4567/ 5600 batches | train loss 0.4185169 +| epoch 3 | 4571/ 5600 batches | train loss 0.3838717 +| epoch 3 | 4575/ 5600 batches | train loss 0.4567983 +| epoch 3 | 4579/ 5600 batches | train loss 0.4691179 +| epoch 3 | 4583/ 5600 batches | train loss 0.4310558 +| epoch 3 | 4587/ 5600 batches | train loss 0.4378896 +| epoch 3 | 4591/ 5600 batches | train loss 0.3682601 +| epoch 3 | 4595/ 5600 batches | train loss 0.4933328 +| epoch 3 | 4599/ 5600 batches | train loss 0.3470039 +| epoch 3 | 4603/ 5600 batches | train loss 0.4583564 +| epoch 3 | 4607/ 5600 batches | train loss 0.4310948 +| epoch 3 | 4611/ 5600 batches | train loss 0.4693150 +| epoch 3 | 4615/ 5600 batches | train loss 0.3849925 +| epoch 3 | 4619/ 5600 batches | train loss 0.4762488 +| epoch 3 | 4623/ 5600 batches | train loss 0.4214932 +| epoch 3 | 4627/ 5600 batches | train loss 0.4172921 +| epoch 3 | 4631/ 5600 batches | train loss 0.4969559 +| epoch 3 | 4635/ 5600 batches | train loss 0.3969808 +| epoch 3 | 4639/ 5600 batches | train loss 0.4751256 +| epoch 3 | 4643/ 5600 batches | train loss 0.4189523 +| epoch 3 | 4647/ 5600 batches | train loss 0.4783245 +| epoch 3 | 4651/ 5600 batches | train loss 0.4295206 +| epoch 3 | 4655/ 5600 batches | train loss 0.5486791 +| epoch 3 | 4659/ 5600 batches | train loss 0.4542888 +| epoch 3 | 4663/ 5600 batches | train loss 0.4537375 +| epoch 3 | 4667/ 5600 batches | train loss 0.5181599 +| epoch 3 | 4671/ 5600 batches | train loss 0.4511838 +| epoch 3 | 4675/ 5600 batches | train loss 0.4342940 +| epoch 3 | 4679/ 5600 batches | train loss 0.4101075 +| epoch 3 | 4683/ 5600 batches | train loss 0.4139469 +| epoch 3 | 4687/ 5600 batches | train loss 0.4535292 +| epoch 3 | 4691/ 5600 batches | train loss 0.4074196 +| epoch 3 | 4695/ 5600 batches | train loss 0.4288292 +| epoch 3 | 4699/ 5600 batches | train loss 0.4821080 +| epoch 3 | 4703/ 5600 batches | train loss 0.4149006 +| epoch 3 | 4707/ 5600 batches | train loss 0.5120600 +| epoch 3 | 4711/ 5600 batches | train loss 0.5170691 +| epoch 3 | 4715/ 5600 batches | train loss 0.3575893 +| epoch 3 | 4719/ 5600 batches | train loss 0.3526710 +| epoch 3 | 4723/ 5600 batches | train loss 0.4995947 +| epoch 3 | 4727/ 5600 batches | train loss 0.4675295 +| epoch 3 | 4731/ 5600 batches | train loss 0.4288404 +| epoch 3 | 4735/ 5600 batches | train loss 0.3482005 +| epoch 3 | 4739/ 5600 batches | train loss 0.2885512 +| epoch 3 | 4743/ 5600 batches | train loss 0.5058483 +| epoch 3 | 4747/ 5600 batches | train loss 0.4022015 +| epoch 3 | 4751/ 5600 batches | train loss 0.4205616 +| epoch 3 | 4755/ 5600 batches | train loss 0.5691546 +| epoch 3 | 4759/ 5600 batches | train loss 0.4650783 +| epoch 3 | 4763/ 5600 batches | train loss 0.4584628 +| epoch 3 | 4767/ 5600 batches | train loss 0.4450406 +| epoch 3 | 4771/ 5600 batches | train loss 0.4154186 +| epoch 3 | 4775/ 5600 batches | train loss 0.3877782 +| epoch 3 | 4779/ 5600 batches | train loss 0.3498004 +| epoch 3 | 4783/ 5600 batches | train loss 0.4276288 +| epoch 3 | 4787/ 5600 batches | train loss 0.4913095 +| epoch 3 | 4791/ 5600 batches | train loss 0.4198384 +| epoch 3 | 4795/ 5600 batches | train loss 0.4315037 +| epoch 3 | 4799/ 5600 batches | train loss 0.4148070 +| epoch 3 | 4803/ 5600 batches | train loss 0.4467260 +| epoch 3 | 4807/ 5600 batches | train loss 0.4686309 +| epoch 3 | 4811/ 5600 batches | train loss 0.4833267 +| epoch 3 | 4815/ 5600 batches | train loss 0.5068455 +| epoch 3 | 4819/ 5600 batches | train loss 0.4025208 +| epoch 3 | 4823/ 5600 batches | train loss 0.3537770 +| epoch 3 | 4827/ 5600 batches | train loss 0.3574322 +| epoch 3 | 4831/ 5600 batches | train loss 0.4053536 +| epoch 3 | 4835/ 5600 batches | train loss 0.5359967 +| epoch 3 | 4839/ 5600 batches | train loss 0.4002200 +| epoch 3 | 4843/ 5600 batches | train loss 0.4265099 +| epoch 3 | 4847/ 5600 batches | train loss 0.4358175 +| epoch 3 | 4851/ 5600 batches | train loss 0.5062953 +| epoch 3 | 4855/ 5600 batches | train loss 0.3683007 +| epoch 3 | 4859/ 5600 batches | train loss 0.3473898 +| epoch 3 | 4863/ 5600 batches | train loss 0.4395453 +| epoch 3 | 4867/ 5600 batches | train loss 0.4461738 +| epoch 3 | 4871/ 5600 batches | train loss 0.3924973 +| epoch 3 | 4875/ 5600 batches | train loss 0.4117641 +| epoch 3 | 4879/ 5600 batches | train loss 0.4167763 +| epoch 3 | 4883/ 5600 batches | train loss 0.5274272 +| epoch 3 | 4887/ 5600 batches | train loss 0.4453602 +| epoch 3 | 4891/ 5600 batches | train loss 0.4464621 +| epoch 3 | 4895/ 5600 batches | train loss 0.4559398 +| epoch 3 | 4899/ 5600 batches | train loss 0.3826794 +| epoch 3 | 4903/ 5600 batches | train loss 0.5280125 +| epoch 3 | 4907/ 5600 batches | train loss 0.4424134 +| epoch 3 | 4911/ 5600 batches | train loss 0.4367490 +| epoch 3 | 4915/ 5600 batches | train loss 0.4316443 +| epoch 3 | 4919/ 5600 batches | train loss 0.5186869 +| epoch 3 | 4923/ 5600 batches | train loss 0.4385163 +| epoch 3 | 4927/ 5600 batches | train loss 0.3372045 +| epoch 3 | 4931/ 5600 batches | train loss 0.4016148 +| epoch 3 | 4935/ 5600 batches | train loss 0.4014484 +| epoch 3 | 4939/ 5600 batches | train loss 0.3859664 +| epoch 3 | 4943/ 5600 batches | train loss 0.3770071 +| epoch 3 | 4947/ 5600 batches | train loss 0.4669580 +| epoch 3 | 4951/ 5600 batches | train loss 0.4646429 +| epoch 3 | 4955/ 5600 batches | train loss 0.4794111 +| epoch 3 | 4959/ 5600 batches | train loss 0.4099801 +| epoch 3 | 4963/ 5600 batches | train loss 0.4518473 +| epoch 3 | 4967/ 5600 batches | train loss 0.4643258 +| epoch 3 | 4971/ 5600 batches | train loss 0.4071640 +| epoch 3 | 4975/ 5600 batches | train loss 0.4860736 +| epoch 3 | 4979/ 5600 batches | train loss 0.3464063 +| epoch 3 | 4983/ 5600 batches | train loss 0.3621034 +| epoch 3 | 4987/ 5600 batches | train loss 0.3386742 +| epoch 3 | 4991/ 5600 batches | train loss 0.3859203 +| epoch 3 | 4995/ 5600 batches | train loss 0.4594027 +| epoch 3 | 4999/ 5600 batches | train loss 0.4722853 +| epoch 3 | 5003/ 5600 batches | train loss 0.4144337 +| epoch 3 | 5007/ 5600 batches | train loss 0.4788744 +| epoch 3 | 5011/ 5600 batches | train loss 0.3547596 +| epoch 3 | 5015/ 5600 batches | train loss 0.3859072 +| epoch 3 | 5019/ 5600 batches | train loss 0.4310725 +| epoch 3 | 5023/ 5600 batches | train loss 0.4752210 +| epoch 3 | 5027/ 5600 batches | train loss 0.3702264 +| epoch 3 | 5031/ 5600 batches | train loss 0.3335214 +| epoch 3 | 5035/ 5600 batches | train loss 0.3738095 +| epoch 3 | 5039/ 5600 batches | train loss 0.2630160 +| epoch 3 | 5043/ 5600 batches | train loss 0.3718529 +| epoch 3 | 5047/ 5600 batches | train loss 0.5198519 +| epoch 3 | 5051/ 5600 batches | train loss 0.3889369 +| epoch 3 | 5055/ 5600 batches | train loss 0.4801240 +| epoch 3 | 5059/ 5600 batches | train loss 0.4668784 +| epoch 3 | 5063/ 5600 batches | train loss 0.5164362 +| epoch 3 | 5067/ 5600 batches | train loss 0.3435256 +| epoch 3 | 5071/ 5600 batches | train loss 0.3975760 +| epoch 3 | 5075/ 5600 batches | train loss 0.3684580 +| epoch 3 | 5079/ 5600 batches | train loss 0.3878467 +| epoch 3 | 5083/ 5600 batches | train loss 0.4585396 +| epoch 3 | 5087/ 5600 batches | train loss 0.4645756 +| epoch 3 | 5091/ 5600 batches | train loss 0.4258236 +| epoch 3 | 5095/ 5600 batches | train loss 0.4986678 +| epoch 3 | 5099/ 5600 batches | train loss 0.3154726 +| epoch 3 | 5103/ 5600 batches | train loss 0.4070895 +| epoch 3 | 5107/ 5600 batches | train loss 0.3875858 +| epoch 3 | 5111/ 5600 batches | train loss 0.4791747 +| epoch 3 | 5115/ 5600 batches | train loss 0.4412692 +| epoch 3 | 5119/ 5600 batches | train loss 0.4322127 +| epoch 3 | 5123/ 5600 batches | train loss 0.4453410 +| epoch 3 | 5127/ 5600 batches | train loss 0.4551459 +| epoch 3 | 5131/ 5600 batches | train loss 0.4477708 +| epoch 3 | 5135/ 5600 batches | train loss 0.3778468 +| epoch 3 | 5139/ 5600 batches | train loss 0.4831519 +| epoch 3 | 5143/ 5600 batches | train loss 0.5200400 +| epoch 3 | 5147/ 5600 batches | train loss 0.3857542 +| epoch 3 | 5151/ 5600 batches | train loss 0.3885433 +| epoch 3 | 5155/ 5600 batches | train loss 0.4661998 +| epoch 3 | 5159/ 5600 batches | train loss 0.5059516 +| epoch 3 | 5163/ 5600 batches | train loss 0.3559267 +| epoch 3 | 5167/ 5600 batches | train loss 0.4492541 +| epoch 3 | 5171/ 5600 batches | train loss 0.3822134 +| epoch 3 | 5175/ 5600 batches | train loss 0.3890851 +| epoch 3 | 5179/ 5600 batches | train loss 0.4466900 +| epoch 3 | 5183/ 5600 batches | train loss 0.4950698 +| epoch 3 | 5187/ 5600 batches | train loss 0.3707894 +| epoch 3 | 5191/ 5600 batches | train loss 0.4559082 +| epoch 3 | 5195/ 5600 batches | train loss 0.4601908 +| epoch 3 | 5199/ 5600 batches | train loss 0.4554346 +| epoch 3 | 5203/ 5600 batches | train loss 0.5209782 +| epoch 3 | 5207/ 5600 batches | train loss 0.4397540 +| epoch 3 | 5211/ 5600 batches | train loss 0.4299558 +| epoch 3 | 5215/ 5600 batches | train loss 0.4780206 +| epoch 3 | 5219/ 5600 batches | train loss 0.4810529 +| epoch 3 | 5223/ 5600 batches | train loss 0.4147907 +| epoch 3 | 5227/ 5600 batches | train loss 0.3648536 +| epoch 3 | 5231/ 5600 batches | train loss 0.4402556 +| epoch 3 | 5235/ 5600 batches | train loss 0.4048130 +| epoch 3 | 5239/ 5600 batches | train loss 0.3499224 +| epoch 3 | 5243/ 5600 batches | train loss 0.5442272 +| epoch 3 | 5247/ 5600 batches | train loss 0.5126254 +| epoch 3 | 5251/ 5600 batches | train loss 0.3936111 +| epoch 3 | 5255/ 5600 batches | train loss 0.5347829 +| epoch 3 | 5259/ 5600 batches | train loss 0.4077891 +| epoch 3 | 5263/ 5600 batches | train loss 0.4530931 +| epoch 3 | 5267/ 5600 batches | train loss 0.4276291 +| epoch 3 | 5271/ 5600 batches | train loss 0.3431069 +| epoch 3 | 5275/ 5600 batches | train loss 0.4205106 +| epoch 3 | 5279/ 5600 batches | train loss 0.3608408 +| epoch 3 | 5283/ 5600 batches | train loss 0.3632302 +| epoch 3 | 5287/ 5600 batches | train loss 0.4733286 +| epoch 3 | 5291/ 5600 batches | train loss 0.4298329 +| epoch 3 | 5295/ 5600 batches | train loss 0.4260285 +| epoch 3 | 5299/ 5600 batches | train loss 0.4507765 +| epoch 3 | 5303/ 5600 batches | train loss 0.4897984 +| epoch 3 | 5307/ 5600 batches | train loss 0.3784434 +| epoch 3 | 5311/ 5600 batches | train loss 0.5244337 +| epoch 3 | 5315/ 5600 batches | train loss 0.4459701 +| epoch 3 | 5319/ 5600 batches | train loss 0.4377145 +| epoch 3 | 5323/ 5600 batches | train loss 0.4694611 +| epoch 3 | 5327/ 5600 batches | train loss 0.4697883 +| epoch 3 | 5331/ 5600 batches | train loss 0.4009995 +| epoch 3 | 5335/ 5600 batches | train loss 0.3970987 +| epoch 3 | 5339/ 5600 batches | train loss 0.4464167 +| epoch 3 | 5343/ 5600 batches | train loss 0.5512725 +| epoch 3 | 5347/ 5600 batches | train loss 0.3684326 +| epoch 3 | 5351/ 5600 batches | train loss 0.3209206 +| epoch 3 | 5355/ 5600 batches | train loss 0.4330521 +| epoch 3 | 5359/ 5600 batches | train loss 0.4335207 +| epoch 3 | 5363/ 5600 batches | train loss 0.4611964 +| epoch 3 | 5367/ 5600 batches | train loss 0.4681481 +| epoch 3 | 5371/ 5600 batches | train loss 0.4752498 +| epoch 3 | 5375/ 5600 batches | train loss 0.3232527 +| epoch 3 | 5379/ 5600 batches | train loss 0.4518696 +| epoch 3 | 5383/ 5600 batches | train loss 0.4137031 +| epoch 3 | 5387/ 5600 batches | train loss 0.4978952 +| epoch 3 | 5391/ 5600 batches | train loss 0.4416341 +| epoch 3 | 5395/ 5600 batches | train loss 0.4626904 +| epoch 3 | 5399/ 5600 batches | train loss 0.4853056 +| epoch 3 | 5403/ 5600 batches | train loss 0.4420560 +| epoch 3 | 5407/ 5600 batches | train loss 0.3492326 +| epoch 3 | 5411/ 5600 batches | train loss 0.4098504 +| epoch 3 | 5415/ 5600 batches | train loss 0.5538447 +| epoch 3 | 5419/ 5600 batches | train loss 0.4098405 +| epoch 3 | 5423/ 5600 batches | train loss 0.4592195 +| epoch 3 | 5427/ 5600 batches | train loss 0.4231699 +| epoch 3 | 5431/ 5600 batches | train loss 0.4925768 +| epoch 3 | 5435/ 5600 batches | train loss 0.3999775 +| epoch 3 | 5439/ 5600 batches | train loss 0.4401366 +| epoch 3 | 5443/ 5600 batches | train loss 0.4488282 +| epoch 3 | 5447/ 5600 batches | train loss 0.4459643 +| epoch 3 | 5451/ 5600 batches | train loss 0.4408312 +| epoch 3 | 5455/ 5600 batches | train loss 0.4165313 +| epoch 3 | 5459/ 5600 batches | train loss 0.4370912 +| epoch 3 | 5463/ 5600 batches | train loss 0.2943360 +| epoch 3 | 5467/ 5600 batches | train loss 0.4516386 +| epoch 3 | 5471/ 5600 batches | train loss 0.3993952 +| epoch 3 | 5475/ 5600 batches | train loss 0.4688838 +| epoch 3 | 5479/ 5600 batches | train loss 0.4128251 +| epoch 3 | 5483/ 5600 batches | train loss 0.4914386 +| epoch 3 | 5487/ 5600 batches | train loss 0.4205581 +| epoch 3 | 5491/ 5600 batches | train loss 0.4332275 +| epoch 3 | 5495/ 5600 batches | train loss 0.4724156 +| epoch 3 | 5499/ 5600 batches | train loss 0.4503969 +| epoch 3 | 5503/ 5600 batches | train loss 0.3620658 +| epoch 3 | 5507/ 5600 batches | train loss 0.3499602 +| epoch 3 | 5511/ 5600 batches | train loss 0.4181179 +| epoch 3 | 5515/ 5600 batches | train loss 0.3737004 +| epoch 3 | 5519/ 5600 batches | train loss 0.4000061 +| epoch 3 | 5523/ 5600 batches | train loss 0.4628996 +| epoch 3 | 5527/ 5600 batches | train loss 0.4517257 +| epoch 3 | 5531/ 5600 batches | train loss 0.4211268 +| epoch 3 | 5535/ 5600 batches | train loss 0.4049473 +| epoch 3 | 5539/ 5600 batches | train loss 0.4770236 +| epoch 3 | 5543/ 5600 batches | train loss 0.4846233 +| epoch 3 | 5547/ 5600 batches | train loss 0.4335668 +| epoch 3 | 5551/ 5600 batches | train loss 0.4451946 +| epoch 3 | 5555/ 5600 batches | train loss 0.4087884 +| epoch 3 | 5559/ 5600 batches | train loss 0.4454355 +| epoch 3 | 5563/ 5600 batches | train loss 0.4213924 +| epoch 3 | 5567/ 5600 batches | train loss 0.5039622 +| epoch 3 | 5571/ 5600 batches | train loss 0.3727506 +| epoch 3 | 5575/ 5600 batches | train loss 0.4707736 +| epoch 3 | 5579/ 5600 batches | train loss 0.4314880 +| epoch 3 | 5583/ 5600 batches | train loss 0.5139623 +| epoch 3 | 5587/ 5600 batches | train loss 0.4662548 +| epoch 3 | 5591/ 5600 batches | train loss 0.4792375 +| epoch 3 | 5595/ 5600 batches | train loss 0.3957848 +| epoch 3 | 5599/ 5600 batches | train loss 0.3600895 +-------------------------------------------------------------------------------- +| epoch 3 | 3/ 5600 batches | test loss 0.4348329 +| epoch 3 | 7/ 5600 batches | test loss 0.4979467 +| epoch 3 | 11/ 5600 batches | test loss 0.4386799 +| epoch 3 | 15/ 5600 batches | test loss 0.4990900 +| epoch 3 | 19/ 5600 batches | test loss 0.4284548 +| epoch 3 | 23/ 5600 batches | test loss 0.3931679 +| epoch 3 | 27/ 5600 batches | test loss 0.5817227 +| epoch 3 | 31/ 5600 batches | test loss 0.4906456 +| epoch 3 | 35/ 5600 batches | test loss 0.3841953 +| epoch 3 | 39/ 5600 batches | test loss 0.5735199 +| epoch 3 | 43/ 5600 batches | test loss 0.4657383 +| epoch 3 | 47/ 5600 batches | test loss 0.4911761 +| epoch 3 | 51/ 5600 batches | test loss 0.4821792 +| epoch 3 | 55/ 5600 batches | test loss 0.4500337 +| epoch 3 | 59/ 5600 batches | test loss 0.3924308 +| epoch 3 | 63/ 5600 batches | test loss 0.4093072 +| epoch 3 | 67/ 5600 batches | test loss 0.5026035 +| epoch 3 | 71/ 5600 batches | test loss 0.5242273 +| epoch 3 | 75/ 5600 batches | test loss 0.3898652 +| epoch 3 | 79/ 5600 batches | test loss 0.3933540 +| epoch 3 | 83/ 5600 batches | test loss 0.4863752 +| epoch 3 | 87/ 5600 batches | test loss 0.3560985 +| epoch 3 | 91/ 5600 batches | test loss 0.4749564 +| epoch 3 | 95/ 5600 batches | test loss 0.4865756 +| epoch 3 | 99/ 5600 batches | test loss 0.4561239 +| epoch 3 | 103/ 5600 batches | test loss 0.3787941 +| epoch 3 | 107/ 5600 batches | test loss 0.4007404 +| epoch 3 | 111/ 5600 batches | test loss 0.4805051 +| epoch 3 | 115/ 5600 batches | test loss 0.4318085 +| epoch 3 | 119/ 5600 batches | test loss 0.5253646 +| epoch 3 | 123/ 5600 batches | test loss 0.4355161 +| epoch 3 | 127/ 5600 batches | test loss 0.3536392 +| epoch 3 | 131/ 5600 batches | test loss 0.3180274 +| epoch 3 | 135/ 5600 batches | test loss 0.6085494 +| epoch 3 | 139/ 5600 batches | test loss 0.4742198 +| epoch 3 | 143/ 5600 batches | test loss 0.4506912 +| epoch 3 | 147/ 5600 batches | test loss 0.4182671 +| epoch 3 | 151/ 5600 batches | test loss 0.4392949 +| epoch 3 | 155/ 5600 batches | test loss 0.4139593 +| epoch 3 | 159/ 5600 batches | test loss 0.4271582 +| epoch 3 | 163/ 5600 batches | test loss 0.3466879 +| epoch 3 | 167/ 5600 batches | test loss 0.4259845 +| epoch 3 | 171/ 5600 batches | test loss 0.3620512 +| epoch 3 | 175/ 5600 batches | test loss 0.4611187 +| epoch 3 | 179/ 5600 batches | test loss 0.3763726 +| epoch 3 | 183/ 5600 batches | test loss 0.3696866 +| epoch 3 | 187/ 5600 batches | test loss 0.4144356 +| epoch 3 | 191/ 5600 batches | test loss 0.4778399 +| epoch 3 | 195/ 5600 batches | test loss 0.5865427 +| epoch 3 | 199/ 5600 batches | test loss 0.3648919 +| epoch 3 | 203/ 5600 batches | test loss 0.5340946 +| epoch 3 | 207/ 5600 batches | test loss 0.4854143 +| epoch 3 | 211/ 5600 batches | test loss 0.4525675 +| epoch 3 | 215/ 5600 batches | test loss 0.4615694 +| epoch 3 | 219/ 5600 batches | test loss 0.4510439 +| epoch 3 | 223/ 5600 batches | test loss 0.4695826 +| epoch 3 | 227/ 5600 batches | test loss 0.3868496 +| epoch 3 | 231/ 5600 batches | test loss 0.4440941 +| epoch 3 | 235/ 5600 batches | test loss 0.5056996 +| epoch 3 | 239/ 5600 batches | test loss 0.4247164 +| epoch 3 | 243/ 5600 batches | test loss 0.4263557 +| epoch 3 | 247/ 5600 batches | test loss 0.4831929 +| epoch 3 | 251/ 5600 batches | test loss 0.4955285 +| epoch 3 | 255/ 5600 batches | test loss 0.4809200 +| epoch 3 | 259/ 5600 batches | test loss 0.4881160 +| epoch 3 | 263/ 5600 batches | test loss 0.3662568 +| epoch 3 | 267/ 5600 batches | test loss 0.3967468 +| epoch 3 | 271/ 5600 batches | test loss 0.4854550 +| epoch 3 | 275/ 5600 batches | test loss 0.4078354 +| epoch 3 | 279/ 5600 batches | test loss 0.4557138 +| epoch 3 | 283/ 5600 batches | test loss 0.3834986 +| epoch 3 | 287/ 5600 batches | test loss 0.5002158 +| epoch 3 | 291/ 5600 batches | test loss 0.4062638 +| epoch 3 | 295/ 5600 batches | test loss 0.4557646 +| epoch 3 | 299/ 5600 batches | test loss 0.4023976 +| epoch 3 | 303/ 5600 batches | test loss 0.4382423 +| epoch 3 | 307/ 5600 batches | test loss 0.4018230 +| epoch 3 | 311/ 5600 batches | test loss 0.3887840 +| epoch 3 | 315/ 5600 batches | test loss 0.4116876 +| epoch 3 | 319/ 5600 batches | test loss 0.4201042 +| epoch 3 | 323/ 5600 batches | test loss 0.5349233 +| epoch 3 | 327/ 5600 batches | test loss 0.4999796 +| epoch 3 | 331/ 5600 batches | test loss 0.4108624 +| epoch 3 | 335/ 5600 batches | test loss 0.4467248 +| epoch 3 | 339/ 5600 batches | test loss 0.4590688 +| epoch 3 | 343/ 5600 batches | test loss 0.4376638 +| epoch 3 | 347/ 5600 batches | test loss 0.4688471 +| epoch 3 | 351/ 5600 batches | test loss 0.4311923 +| epoch 3 | 355/ 5600 batches | test loss 0.4226285 +| epoch 3 | 359/ 5600 batches | test loss 0.4583767 +| epoch 3 | 363/ 5600 batches | test loss 0.4802995 +| epoch 3 | 367/ 5600 batches | test loss 0.3661018 +| epoch 3 | 371/ 5600 batches | test loss 0.4501898 +| epoch 3 | 375/ 5600 batches | test loss 0.6001019 +| epoch 3 | 379/ 5600 batches | test loss 0.3748393 +| epoch 3 | 383/ 5600 batches | test loss 0.5064062 +| epoch 3 | 387/ 5600 batches | test loss 0.4436550 +| epoch 3 | 391/ 5600 batches | test loss 0.3970544 +| epoch 3 | 395/ 5600 batches | test loss 0.4132106 +| epoch 3 | 399/ 5600 batches | test loss 0.4711731 +| epoch 3 | 403/ 5600 batches | test loss 0.3541722 +| epoch 3 | 407/ 5600 batches | test loss 0.5164287 +| epoch 3 | 411/ 5600 batches | test loss 0.5241885 +| epoch 3 | 415/ 5600 batches | test loss 0.4712974 +| epoch 3 | 419/ 5600 batches | test loss 0.4591444 +| epoch 3 | 423/ 5600 batches | test loss 0.4647620 +| epoch 3 | 427/ 5600 batches | test loss 0.5131253 +| epoch 3 | 431/ 5600 batches | test loss 0.3970795 +| epoch 3 | 435/ 5600 batches | test loss 0.4708496 +| epoch 3 | 439/ 5600 batches | test loss 0.4539040 +| epoch 3 | 443/ 5600 batches | test loss 0.3834777 +| epoch 3 | 447/ 5600 batches | test loss 0.4409002 +| epoch 3 | 451/ 5600 batches | test loss 0.4227395 +| epoch 3 | 455/ 5600 batches | test loss 0.3961386 +| epoch 3 | 459/ 5600 batches | test loss 0.4661279 +| epoch 3 | 463/ 5600 batches | test loss 0.5127457 +| epoch 3 | 467/ 5600 batches | test loss 0.4812554 +| epoch 3 | 471/ 5600 batches | test loss 0.4304859 +| epoch 3 | 475/ 5600 batches | test loss 0.5206614 +| epoch 3 | 479/ 5600 batches | test loss 0.4390474 +| epoch 3 | 483/ 5600 batches | test loss 0.5199444 +| epoch 3 | 487/ 5600 batches | test loss 0.4887957 +| epoch 3 | 491/ 5600 batches | test loss 0.4948373 +| epoch 3 | 495/ 5600 batches | test loss 0.5430543 +| epoch 3 | 499/ 5600 batches | test loss 0.3906574 +| epoch 3 | 503/ 5600 batches | test loss 0.4367315 +| epoch 3 | 507/ 5600 batches | test loss 0.4406679 +| epoch 3 | 511/ 5600 batches | test loss 0.4310203 +| epoch 3 | 515/ 5600 batches | test loss 0.4101844 +| epoch 3 | 519/ 5600 batches | test loss 0.4287848 +| epoch 3 | 523/ 5600 batches | test loss 0.4906553 +| epoch 3 | 527/ 5600 batches | test loss 0.4106646 +| epoch 3 | 531/ 5600 batches | test loss 0.3871172 +| epoch 3 | 535/ 5600 batches | test loss 0.4738815 +| epoch 3 | 539/ 5600 batches | test loss 0.4363877 +| epoch 3 | 543/ 5600 batches | test loss 0.4926101 +| epoch 3 | 547/ 5600 batches | test loss 0.4254556 +| epoch 3 | 551/ 5600 batches | test loss 0.4470390 +| epoch 3 | 555/ 5600 batches | test loss 0.4579976 +| epoch 3 | 559/ 5600 batches | test loss 0.5060208 +| epoch 3 | 563/ 5600 batches | test loss 0.3516251 +| epoch 3 | 567/ 5600 batches | test loss 0.4175415 +| epoch 3 | 571/ 5600 batches | test loss 0.4499536 +| epoch 3 | 575/ 5600 batches | test loss 0.3889906 +| epoch 3 | 579/ 5600 batches | test loss 0.3525406 +| epoch 3 | 583/ 5600 batches | test loss 0.4216935 +| epoch 3 | 587/ 5600 batches | test loss 0.5489336 +| epoch 3 | 591/ 5600 batches | test loss 0.6408167 +| epoch 3 | 595/ 5600 batches | test loss 0.3809589 +| epoch 3 | 599/ 5600 batches | test loss 0.4138927 +| epoch 3 | 603/ 5600 batches | test loss 0.4259600 +| epoch 3 | 607/ 5600 batches | test loss 0.4415818 +| epoch 3 | 611/ 5600 batches | test loss 0.4564929 +| epoch 3 | 615/ 5600 batches | test loss 0.4760638 +| epoch 3 | 619/ 5600 batches | test loss 0.4287894 +| epoch 3 | 623/ 5600 batches | test loss 0.3735062 +| epoch 3 | 627/ 5600 batches | test loss 0.6125942 +| epoch 3 | 631/ 5600 batches | test loss 0.4063322 +| epoch 3 | 635/ 5600 batches | test loss 0.4476544 +| epoch 3 | 639/ 5600 batches | test loss 0.3913985 +| epoch 3 | 643/ 5600 batches | test loss 0.5529930 +| epoch 3 | 647/ 5600 batches | test loss 0.4592736 +| epoch 3 | 651/ 5600 batches | test loss 0.5031431 +| epoch 3 | 655/ 5600 batches | test loss 0.4198757 +| epoch 3 | 659/ 5600 batches | test loss 0.3879392 +| epoch 3 | 663/ 5600 batches | test loss 0.4296749 +| epoch 3 | 667/ 5600 batches | test loss 0.4736854 +| epoch 3 | 671/ 5600 batches | test loss 0.3704009 +| epoch 3 | 675/ 5600 batches | test loss 0.3948188 +| epoch 3 | 679/ 5600 batches | test loss 0.4375044 +| epoch 3 | 683/ 5600 batches | test loss 0.4302287 +| epoch 3 | 687/ 5600 batches | test loss 0.4681526 +| epoch 3 | 691/ 5600 batches | test loss 0.4186614 +| epoch 3 | 695/ 5600 batches | test loss 0.4067076 +| epoch 3 | 699/ 5600 batches | test loss 0.4768423 +| epoch 3 | 703/ 5600 batches | test loss 0.4722856 +| epoch 3 | 707/ 5600 batches | test loss 0.3851238 +| epoch 3 | 711/ 5600 batches | test loss 0.5095369 +| epoch 3 | 715/ 5600 batches | test loss 0.4298268 +| epoch 3 | 719/ 5600 batches | test loss 0.7092562 +| epoch 3 | 723/ 5600 batches | test loss 0.3948832 +| epoch 3 | 727/ 5600 batches | test loss 0.3692536 +| epoch 3 | 731/ 5600 batches | test loss 0.4658656 +| epoch 3 | 735/ 5600 batches | test loss 0.4157875 +| epoch 3 | 739/ 5600 batches | test loss 0.4988033 +| epoch 3 | 743/ 5600 batches | test loss 0.5040582 +| epoch 3 | 747/ 5600 batches | test loss 0.3776706 +| epoch 3 | 751/ 5600 batches | test loss 0.4288219 +| epoch 3 | 755/ 5600 batches | test loss 0.5224317 +| epoch 3 | 759/ 5600 batches | test loss 0.3746833 +| epoch 3 | 763/ 5600 batches | test loss 0.4886962 +| epoch 3 | 767/ 5600 batches | test loss 0.3782906 +| epoch 3 | 771/ 5600 batches | test loss 0.4739340 +| epoch 3 | 775/ 5600 batches | test loss 0.3405260 +| epoch 3 | 779/ 5600 batches | test loss 0.4052059 +| epoch 3 | 783/ 5600 batches | test loss 0.3830920 +| epoch 3 | 787/ 5600 batches | test loss 0.5017078 +| epoch 3 | 791/ 5600 batches | test loss 0.4279532 +| epoch 3 | 795/ 5600 batches | test loss 0.5583295 +| epoch 3 | 799/ 5600 batches | test loss 0.4742228 +| epoch 3 | 803/ 5600 batches | test loss 0.4091988 +| epoch 3 | 807/ 5600 batches | test loss 0.3649247 +| epoch 3 | 811/ 5600 batches | test loss 0.4762620 +| epoch 3 | 815/ 5600 batches | test loss 0.5064780 +| epoch 3 | 819/ 5600 batches | test loss 0.4503399 +| epoch 3 | 823/ 5600 batches | test loss 0.4731249 +| epoch 3 | 827/ 5600 batches | test loss 0.3860903 +| epoch 3 | 831/ 5600 batches | test loss 0.4586377 +| epoch 3 | 835/ 5600 batches | test loss 0.3079180 +| epoch 3 | 839/ 5600 batches | test loss 0.4545802 +| epoch 3 | 843/ 5600 batches | test loss 0.4512716 +| epoch 3 | 847/ 5600 batches | test loss 0.5210087 +| epoch 3 | 851/ 5600 batches | test loss 0.4993462 +| epoch 3 | 855/ 5600 batches | test loss 0.5284594 +| epoch 3 | 859/ 5600 batches | test loss 0.4426101 +| epoch 3 | 863/ 5600 batches | test loss 0.5027142 +| epoch 3 | 867/ 5600 batches | test loss 0.4815471 +| epoch 3 | 871/ 5600 batches | test loss 0.4837633 +| epoch 3 | 875/ 5600 batches | test loss 0.4825671 +| epoch 3 | 879/ 5600 batches | test loss 0.4420696 +| epoch 3 | 883/ 5600 batches | test loss 0.4718763 +| epoch 3 | 887/ 5600 batches | test loss 0.4591067 +| epoch 3 | 891/ 5600 batches | test loss 0.4538735 +| epoch 3 | 895/ 5600 batches | test loss 0.4955012 +| epoch 3 | 899/ 5600 batches | test loss 0.4046268 +| epoch 3 | 903/ 5600 batches | test loss 0.5183216 +| epoch 3 | 907/ 5600 batches | test loss 0.4750884 +| epoch 3 | 911/ 5600 batches | test loss 0.4266484 +| epoch 3 | 915/ 5600 batches | test loss 0.5407579 +| epoch 3 | 919/ 5600 batches | test loss 0.4169811 +| epoch 3 | 923/ 5600 batches | test loss 0.4644544 +| epoch 3 | 927/ 5600 batches | test loss 0.3976838 +| epoch 3 | 931/ 5600 batches | test loss 0.4348829 +| epoch 3 | 935/ 5600 batches | test loss 0.4186524 +| epoch 3 | 939/ 5600 batches | test loss 0.4710830 +| epoch 3 | 943/ 5600 batches | test loss 0.3689543 +| epoch 3 | 947/ 5600 batches | test loss 0.4401859 +| epoch 3 | 951/ 5600 batches | test loss 0.4357505 +| epoch 3 | 955/ 5600 batches | test loss 0.3925492 +| epoch 3 | 959/ 5600 batches | test loss 0.4717116 +| epoch 3 | 963/ 5600 batches | test loss 0.4258346 +| epoch 3 | 967/ 5600 batches | test loss 0.5275119 +| epoch 3 | 971/ 5600 batches | test loss 0.3520526 +| epoch 3 | 975/ 5600 batches | test loss 0.4651155 +| epoch 3 | 979/ 5600 batches | test loss 0.3988379 +| epoch 3 | 983/ 5600 batches | test loss 0.3939650 +| epoch 3 | 987/ 5600 batches | test loss 0.3336898 +| epoch 3 | 991/ 5600 batches | test loss 0.5272617 +| epoch 3 | 995/ 5600 batches | test loss 0.4066194 +| epoch 3 | 999/ 5600 batches | test loss 0.4630404 +| epoch 3 | 1003/ 5600 batches | test loss 0.4768568 +| epoch 3 | 1007/ 5600 batches | test loss 0.4414781 +| epoch 3 | 1011/ 5600 batches | test loss 0.4116430 +| epoch 3 | 1015/ 5600 batches | test loss 0.4203209 +| epoch 3 | 1019/ 5600 batches | test loss 0.4431416 +| epoch 3 | 1023/ 5600 batches | test loss 0.4158695 +| epoch 3 | 1027/ 5600 batches | test loss 0.3911240 +| epoch 3 | 1031/ 5600 batches | test loss 0.5361501 +| epoch 3 | 1035/ 5600 batches | test loss 0.4636791 +| epoch 3 | 1039/ 5600 batches | test loss 0.3917784 +| epoch 3 | 1043/ 5600 batches | test loss 0.4994192 +| epoch 3 | 1047/ 5600 batches | test loss 0.4264916 +| epoch 3 | 1051/ 5600 batches | test loss 0.4407488 +| epoch 3 | 1055/ 5600 batches | test loss 0.4807334 +| epoch 3 | 1059/ 5600 batches | test loss 0.4397216 +| epoch 3 | 1063/ 5600 batches | test loss 0.3937919 +| epoch 3 | 1067/ 5600 batches | test loss 0.3825989 +| epoch 3 | 1071/ 5600 batches | test loss 0.4674187 +| epoch 3 | 1075/ 5600 batches | test loss 0.4790248 +| epoch 3 | 1079/ 5600 batches | test loss 0.4946425 +| epoch 3 | 1083/ 5600 batches | test loss 0.4128089 +| epoch 3 | 1087/ 5600 batches | test loss 0.4048470 +| epoch 3 | 1091/ 5600 batches | test loss 0.4265485 +| epoch 3 | 1095/ 5600 batches | test loss 0.6018052 +| epoch 3 | 1099/ 5600 batches | test loss 0.4704123 +| epoch 3 | 1103/ 5600 batches | test loss 0.4038273 +| epoch 3 | 1107/ 5600 batches | test loss 0.4945393 +| epoch 3 | 1111/ 5600 batches | test loss 0.3676506 +| epoch 3 | 1115/ 5600 batches | test loss 0.4888121 +| epoch 3 | 1119/ 5600 batches | test loss 0.3696884 +| epoch 3 | 1123/ 5600 batches | test loss 0.4999957 +| epoch 3 | 1127/ 5600 batches | test loss 0.5096967 +| epoch 3 | 1131/ 5600 batches | test loss 0.5529186 +| epoch 3 | 1135/ 5600 batches | test loss 0.2769639 +| epoch 3 | 1139/ 5600 batches | test loss 0.3678384 +| epoch 3 | 1143/ 5600 batches | test loss 0.4758446 +| epoch 3 | 1147/ 5600 batches | test loss 0.3915289 +| epoch 3 | 1151/ 5600 batches | test loss 0.3740585 +| epoch 3 | 1155/ 5600 batches | test loss 0.4153049 +| epoch 3 | 1159/ 5600 batches | test loss 0.4034850 +| epoch 3 | 1163/ 5600 batches | test loss 0.4392944 +| epoch 3 | 1167/ 5600 batches | test loss 0.4149621 +| epoch 3 | 1171/ 5600 batches | test loss 0.4125665 +| epoch 3 | 1175/ 5600 batches | test loss 0.3919003 +| epoch 3 | 1179/ 5600 batches | test loss 0.4784642 +| epoch 3 | 1183/ 5600 batches | test loss 0.5015288 +| epoch 3 | 1187/ 5600 batches | test loss 0.4093712 +| epoch 3 | 1191/ 5600 batches | test loss 0.4593484 +| epoch 3 | 1195/ 5600 batches | test loss 0.4029872 +| epoch 3 | 1199/ 5600 batches | test loss 0.4720694 +| epoch 3 | 1203/ 5600 batches | test loss 0.4417254 +| epoch 3 | 1207/ 5600 batches | test loss 0.4527046 +| epoch 3 | 1211/ 5600 batches | test loss 0.3969346 +| epoch 3 | 1215/ 5600 batches | test loss 0.4202838 +| epoch 3 | 1219/ 5600 batches | test loss 0.4936886 +| epoch 3 | 1223/ 5600 batches | test loss 0.4846113 +| epoch 3 | 1227/ 5600 batches | test loss 0.4590123 +| epoch 3 | 1231/ 5600 batches | test loss 0.4440042 +| epoch 3 | 1235/ 5600 batches | test loss 0.4592690 +| epoch 3 | 1239/ 5600 batches | test loss 0.4880574 +| epoch 3 | 1243/ 5600 batches | test loss 0.3648209 +| epoch 3 | 1247/ 5600 batches | test loss 0.4363886 +| epoch 3 | 1251/ 5600 batches | test loss 0.4681684 +| epoch 3 | 1255/ 5600 batches | test loss 0.4981259 +| epoch 3 | 1259/ 5600 batches | test loss 0.3750817 +| epoch 3 | 1263/ 5600 batches | test loss 0.4617646 +| epoch 3 | 1267/ 5600 batches | test loss 0.4616972 +| epoch 3 | 1271/ 5600 batches | test loss 0.3656940 +| epoch 3 | 1275/ 5600 batches | test loss 0.4684183 +| epoch 3 | 1279/ 5600 batches | test loss 0.5019379 +| epoch 3 | 1283/ 5600 batches | test loss 0.5000444 +| epoch 3 | 1287/ 5600 batches | test loss 0.4737949 +| epoch 3 | 1291/ 5600 batches | test loss 0.4829532 +| epoch 3 | 1295/ 5600 batches | test loss 0.5466265 +| epoch 3 | 1299/ 5600 batches | test loss 0.4469146 +| epoch 3 | 1303/ 5600 batches | test loss 0.3764861 +| epoch 3 | 1307/ 5600 batches | test loss 0.4934147 +| epoch 3 | 1311/ 5600 batches | test loss 0.5256735 +| epoch 3 | 1315/ 5600 batches | test loss 0.3811051 +| epoch 3 | 1319/ 5600 batches | test loss 0.4452061 +| epoch 3 | 1323/ 5600 batches | test loss 0.4902319 +| epoch 3 | 1327/ 5600 batches | test loss 0.4130394 +| epoch 3 | 1331/ 5600 batches | test loss 0.4673037 +| epoch 3 | 1335/ 5600 batches | test loss 0.5311176 +| epoch 3 | 1339/ 5600 batches | test loss 0.3976865 +| epoch 3 | 1343/ 5600 batches | test loss 0.5490916 +| epoch 3 | 1347/ 5600 batches | test loss 0.4363428 +| epoch 3 | 1351/ 5600 batches | test loss 0.3424369 +| epoch 3 | 1355/ 5600 batches | test loss 0.4353712 +| epoch 3 | 1359/ 5600 batches | test loss 0.4267820 +| epoch 3 | 1363/ 5600 batches | test loss 0.5003044 +| epoch 3 | 1367/ 5600 batches | test loss 0.4045261 +| epoch 3 | 1371/ 5600 batches | test loss 0.4515987 +| epoch 3 | 1375/ 5600 batches | test loss 0.3570463 +| epoch 3 | 1379/ 5600 batches | test loss 0.4897378 +| epoch 3 | 1383/ 5600 batches | test loss 0.4361133 +| epoch 3 | 1387/ 5600 batches | test loss 0.4418396 +| epoch 3 | 1391/ 5600 batches | test loss 0.4201902 +| epoch 3 | 1395/ 5600 batches | test loss 0.4621608 +| epoch 3 | 1399/ 5600 batches | test loss 0.4985104 +| epoch 3 | final test loss 0.4440, save model! +-------------------------------------------------------------------------------- +| epoch 4 | 3/ 5600 batches | train loss 0.4190884 +| epoch 4 | 7/ 5600 batches | train loss 0.4160017 +| epoch 4 | 11/ 5600 batches | train loss 0.3546186 +| epoch 4 | 15/ 5600 batches | train loss 0.4437737 +| epoch 4 | 19/ 5600 batches | train loss 0.3720314 +| epoch 4 | 23/ 5600 batches | train loss 0.4187099 +| epoch 4 | 27/ 5600 batches | train loss 0.4668888 +| epoch 4 | 31/ 5600 batches | train loss 0.3843437 +| epoch 4 | 35/ 5600 batches | train loss 0.3718364 +| epoch 4 | 39/ 5600 batches | train loss 0.4833029 +| epoch 4 | 43/ 5600 batches | train loss 0.4697198 +| epoch 4 | 47/ 5600 batches | train loss 0.5321737 +| epoch 4 | 51/ 5600 batches | train loss 0.4588133 +| epoch 4 | 55/ 5600 batches | train loss 0.4707809 +| epoch 4 | 59/ 5600 batches | train loss 0.4526604 +| epoch 4 | 63/ 5600 batches | train loss 0.4161766 +| epoch 4 | 67/ 5600 batches | train loss 0.4113023 +| epoch 4 | 71/ 5600 batches | train loss 0.4803765 +| epoch 4 | 75/ 5600 batches | train loss 0.4642387 +| epoch 4 | 79/ 5600 batches | train loss 0.4276638 +| epoch 4 | 83/ 5600 batches | train loss 0.4177818 +| epoch 4 | 87/ 5600 batches | train loss 0.3664286 +| epoch 4 | 91/ 5600 batches | train loss 0.3867154 +| epoch 4 | 95/ 5600 batches | train loss 0.3077107 +| epoch 4 | 99/ 5600 batches | train loss 0.5022750 +| epoch 4 | 103/ 5600 batches | train loss 0.4328740 +| epoch 4 | 107/ 5600 batches | train loss 0.4197631 +| epoch 4 | 111/ 5600 batches | train loss 0.3549783 +| epoch 4 | 115/ 5600 batches | train loss 0.4273572 +| epoch 4 | 119/ 5600 batches | train loss 0.3992880 +| epoch 4 | 123/ 5600 batches | train loss 0.3700565 +| epoch 4 | 127/ 5600 batches | train loss 0.4163857 +| epoch 4 | 131/ 5600 batches | train loss 0.4369729 +| epoch 4 | 135/ 5600 batches | train loss 0.3894947 +| epoch 4 | 139/ 5600 batches | train loss 0.3881796 +| epoch 4 | 143/ 5600 batches | train loss 0.5318316 +| epoch 4 | 147/ 5600 batches | train loss 0.3610095 +| epoch 4 | 151/ 5600 batches | train loss 0.4648507 +| epoch 4 | 155/ 5600 batches | train loss 0.3530077 +| epoch 4 | 159/ 5600 batches | train loss 0.3779121 +| epoch 4 | 163/ 5600 batches | train loss 0.4312732 +| epoch 4 | 167/ 5600 batches | train loss 0.4291730 +| epoch 4 | 171/ 5600 batches | train loss 0.4095625 +| epoch 4 | 175/ 5600 batches | train loss 0.4257452 +| epoch 4 | 179/ 5600 batches | train loss 0.4663111 +| epoch 4 | 183/ 5600 batches | train loss 0.3994474 +| epoch 4 | 187/ 5600 batches | train loss 0.4549671 +| epoch 4 | 191/ 5600 batches | train loss 0.4750466 +| epoch 4 | 195/ 5600 batches | train loss 0.4008166 +| epoch 4 | 199/ 5600 batches | train loss 0.4141548 +| epoch 4 | 203/ 5600 batches | train loss 0.4101961 +| epoch 4 | 207/ 5600 batches | train loss 0.3643329 +| epoch 4 | 211/ 5600 batches | train loss 0.3875285 +| epoch 4 | 215/ 5600 batches | train loss 0.3411758 +| epoch 4 | 219/ 5600 batches | train loss 0.2816148 +| epoch 4 | 223/ 5600 batches | train loss 0.5324780 +| epoch 4 | 227/ 5600 batches | train loss 0.3873262 +| epoch 4 | 231/ 5600 batches | train loss 0.4410426 +| epoch 4 | 235/ 5600 batches | train loss 0.4356505 +| epoch 4 | 239/ 5600 batches | train loss 0.4741670 +| epoch 4 | 243/ 5600 batches | train loss 0.2763520 +| epoch 4 | 247/ 5600 batches | train loss 0.5108224 +| epoch 4 | 251/ 5600 batches | train loss 0.4010169 +| epoch 4 | 255/ 5600 batches | train loss 0.4403188 +| epoch 4 | 259/ 5600 batches | train loss 0.4483217 +| epoch 4 | 263/ 5600 batches | train loss 0.4932331 +| epoch 4 | 267/ 5600 batches | train loss 0.5541831 +| epoch 4 | 271/ 5600 batches | train loss 0.4039191 +| epoch 4 | 275/ 5600 batches | train loss 0.5353172 +| epoch 4 | 279/ 5600 batches | train loss 0.3603250 +| epoch 4 | 283/ 5600 batches | train loss 0.4242027 +| epoch 4 | 287/ 5600 batches | train loss 0.3955062 +| epoch 4 | 291/ 5600 batches | train loss 0.3912282 +| epoch 4 | 295/ 5600 batches | train loss 0.4225195 +| epoch 4 | 299/ 5600 batches | train loss 0.4730230 +| epoch 4 | 303/ 5600 batches | train loss 0.4609828 +| epoch 4 | 307/ 5600 batches | train loss 0.3695803 +| epoch 4 | 311/ 5600 batches | train loss 0.3875979 +| epoch 4 | 315/ 5600 batches | train loss 0.3948593 +| epoch 4 | 319/ 5600 batches | train loss 0.2973353 +| epoch 4 | 323/ 5600 batches | train loss 0.3769025 +| epoch 4 | 327/ 5600 batches | train loss 0.3893436 +| epoch 4 | 331/ 5600 batches | train loss 0.3669622 +| epoch 4 | 335/ 5600 batches | train loss 0.4125659 +| epoch 4 | 339/ 5600 batches | train loss 0.3636876 +| epoch 4 | 343/ 5600 batches | train loss 0.5146129 +| epoch 4 | 347/ 5600 batches | train loss 0.4079774 +| epoch 4 | 351/ 5600 batches | train loss 0.3723340 +| epoch 4 | 355/ 5600 batches | train loss 0.4923099 +| epoch 4 | 359/ 5600 batches | train loss 0.4101925 +| epoch 4 | 363/ 5600 batches | train loss 0.4677785 +| epoch 4 | 367/ 5600 batches | train loss 0.4212319 +| epoch 4 | 371/ 5600 batches | train loss 0.4070886 +| epoch 4 | 375/ 5600 batches | train loss 0.4107812 +| epoch 4 | 379/ 5600 batches | train loss 0.4201827 +| epoch 4 | 383/ 5600 batches | train loss 0.3574064 +| epoch 4 | 387/ 5600 batches | train loss 0.4247522 +| epoch 4 | 391/ 5600 batches | train loss 0.4023155 +| epoch 4 | 395/ 5600 batches | train loss 0.3802717 +| epoch 4 | 399/ 5600 batches | train loss 0.3330905 +| epoch 4 | 403/ 5600 batches | train loss 0.4686388 +| epoch 4 | 407/ 5600 batches | train loss 0.5062689 +| epoch 4 | 411/ 5600 batches | train loss 0.4955542 +| epoch 4 | 415/ 5600 batches | train loss 0.4920347 +| epoch 4 | 419/ 5600 batches | train loss 0.3646981 +| epoch 4 | 423/ 5600 batches | train loss 0.4588113 +| epoch 4 | 427/ 5600 batches | train loss 0.4669082 +| epoch 4 | 431/ 5600 batches | train loss 0.4542751 +| epoch 4 | 435/ 5600 batches | train loss 0.4219275 +| epoch 4 | 439/ 5600 batches | train loss 0.3582158 +| epoch 4 | 443/ 5600 batches | train loss 0.3862128 +| epoch 4 | 447/ 5600 batches | train loss 0.4650944 +| epoch 4 | 451/ 5600 batches | train loss 0.4043207 +| epoch 4 | 455/ 5600 batches | train loss 0.4123053 +| epoch 4 | 459/ 5600 batches | train loss 0.4739149 +| epoch 4 | 463/ 5600 batches | train loss 0.3729306 +| epoch 4 | 467/ 5600 batches | train loss 0.4478046 +| epoch 4 | 471/ 5600 batches | train loss 0.3608517 +| epoch 4 | 475/ 5600 batches | train loss 0.3750383 +| epoch 4 | 479/ 5600 batches | train loss 0.4463353 +| epoch 4 | 483/ 5600 batches | train loss 0.2841336 +| epoch 4 | 487/ 5600 batches | train loss 0.3462114 +| epoch 4 | 491/ 5600 batches | train loss 0.3094528 +| epoch 4 | 495/ 5600 batches | train loss 0.4661865 +| epoch 4 | 499/ 5600 batches | train loss 0.4894448 +| epoch 4 | 503/ 5600 batches | train loss 0.4473757 +| epoch 4 | 507/ 5600 batches | train loss 0.4475406 +| epoch 4 | 511/ 5600 batches | train loss 0.4798029 +| epoch 4 | 515/ 5600 batches | train loss 0.4222305 +| epoch 4 | 519/ 5600 batches | train loss 0.3937533 +| epoch 4 | 523/ 5600 batches | train loss 0.3858315 +| epoch 4 | 527/ 5600 batches | train loss 0.4331644 +| epoch 4 | 531/ 5600 batches | train loss 0.3857189 +| epoch 4 | 535/ 5600 batches | train loss 0.4192352 +| epoch 4 | 539/ 5600 batches | train loss 0.4214408 +| epoch 4 | 543/ 5600 batches | train loss 0.4830179 +| epoch 4 | 547/ 5600 batches | train loss 0.3904578 +| epoch 4 | 551/ 5600 batches | train loss 0.3650934 +| epoch 4 | 555/ 5600 batches | train loss 0.4140702 +| epoch 4 | 559/ 5600 batches | train loss 0.4719210 +| epoch 4 | 563/ 5600 batches | train loss 0.3455884 +| epoch 4 | 567/ 5600 batches | train loss 0.5096920 +| epoch 4 | 571/ 5600 batches | train loss 0.4794288 +| epoch 4 | 575/ 5600 batches | train loss 0.4934592 +| epoch 4 | 579/ 5600 batches | train loss 0.3970350 +| epoch 4 | 583/ 5600 batches | train loss 0.4472319 +| epoch 4 | 587/ 5600 batches | train loss 0.4813991 +| epoch 4 | 591/ 5600 batches | train loss 0.3440765 +| epoch 4 | 595/ 5600 batches | train loss 0.4836303 +| epoch 4 | 599/ 5600 batches | train loss 0.3770320 +| epoch 4 | 603/ 5600 batches | train loss 0.4012719 +| epoch 4 | 607/ 5600 batches | train loss 0.3417121 +| epoch 4 | 611/ 5600 batches | train loss 0.1836578 +| epoch 4 | 615/ 5600 batches | train loss 0.3974445 +| epoch 4 | 619/ 5600 batches | train loss 0.3710099 +| epoch 4 | 623/ 5600 batches | train loss 0.3481343 +| epoch 4 | 627/ 5600 batches | train loss 0.3646945 +| epoch 4 | 631/ 5600 batches | train loss 0.3946805 +| epoch 4 | 635/ 5600 batches | train loss 0.4878678 +| epoch 4 | 639/ 5600 batches | train loss 0.3572750 +| epoch 4 | 643/ 5600 batches | train loss 0.3727975 +| epoch 4 | 647/ 5600 batches | train loss 0.3491135 +| epoch 4 | 651/ 5600 batches | train loss 0.3959767 +| epoch 4 | 655/ 5600 batches | train loss 0.4509766 +| epoch 4 | 659/ 5600 batches | train loss 0.3809928 +| epoch 4 | 663/ 5600 batches | train loss 0.4159586 +| epoch 4 | 667/ 5600 batches | train loss 0.4571943 +| epoch 4 | 671/ 5600 batches | train loss 0.4194445 +| epoch 4 | 675/ 5600 batches | train loss 0.4440193 +| epoch 4 | 679/ 5600 batches | train loss 0.3912024 +| epoch 4 | 683/ 5600 batches | train loss 0.4259495 +| epoch 4 | 687/ 5600 batches | train loss 0.3859442 +| epoch 4 | 691/ 5600 batches | train loss 0.3604597 +| epoch 4 | 695/ 5600 batches | train loss 0.4855217 +| epoch 4 | 699/ 5600 batches | train loss 0.4048098 +| epoch 4 | 703/ 5600 batches | train loss 0.4331701 +| epoch 4 | 707/ 5600 batches | train loss 0.4386407 +| epoch 4 | 711/ 5600 batches | train loss 0.3860661 +| epoch 4 | 715/ 5600 batches | train loss 0.4055658 +| epoch 4 | 719/ 5600 batches | train loss 0.4482841 +| epoch 4 | 723/ 5600 batches | train loss 0.5011600 +| epoch 4 | 727/ 5600 batches | train loss 0.5262328 +| epoch 4 | 731/ 5600 batches | train loss 0.4277899 +| epoch 4 | 735/ 5600 batches | train loss 0.3589883 +| epoch 4 | 739/ 5600 batches | train loss 0.4833672 +| epoch 4 | 743/ 5600 batches | train loss 0.4679015 +| epoch 4 | 747/ 5600 batches | train loss 0.3782025 +| epoch 4 | 751/ 5600 batches | train loss 0.4351850 +| epoch 4 | 755/ 5600 batches | train loss 0.5644772 +| epoch 4 | 759/ 5600 batches | train loss 0.3779288 +| epoch 4 | 763/ 5600 batches | train loss 0.4401466 +| epoch 4 | 767/ 5600 batches | train loss 0.4591461 +| epoch 4 | 771/ 5600 batches | train loss 0.4200915 +| epoch 4 | 775/ 5600 batches | train loss 0.4872816 +| epoch 4 | 779/ 5600 batches | train loss 0.3426490 +| epoch 4 | 783/ 5600 batches | train loss 0.4314141 +| epoch 4 | 787/ 5600 batches | train loss 0.4049272 +| epoch 4 | 791/ 5600 batches | train loss 0.3896651 +| epoch 4 | 795/ 5600 batches | train loss 0.4748634 +| epoch 4 | 799/ 5600 batches | train loss 0.3973594 +| epoch 4 | 803/ 5600 batches | train loss 0.4176992 +| epoch 4 | 807/ 5600 batches | train loss 0.3412399 +| epoch 4 | 811/ 5600 batches | train loss 0.4504470 +| epoch 4 | 815/ 5600 batches | train loss 0.4813079 +| epoch 4 | 819/ 5600 batches | train loss 0.4761590 +| epoch 4 | 823/ 5600 batches | train loss 0.4303868 +| epoch 4 | 827/ 5600 batches | train loss 0.4255683 +| epoch 4 | 831/ 5600 batches | train loss 0.4605970 +| epoch 4 | 835/ 5600 batches | train loss 0.4197264 +| epoch 4 | 839/ 5600 batches | train loss 0.5129649 +| epoch 4 | 843/ 5600 batches | train loss 0.4259618 +| epoch 4 | 847/ 5600 batches | train loss 0.3996546 +| epoch 4 | 851/ 5600 batches | train loss 0.4103180 +| epoch 4 | 855/ 5600 batches | train loss 0.3560893 +| epoch 4 | 859/ 5600 batches | train loss 0.4059726 +| epoch 4 | 863/ 5600 batches | train loss 0.3882548 +| epoch 4 | 867/ 5600 batches | train loss 0.3825666 +| epoch 4 | 871/ 5600 batches | train loss 0.3763527 +| epoch 4 | 875/ 5600 batches | train loss 0.5203635 +| epoch 4 | 879/ 5600 batches | train loss 0.3840700 +| epoch 4 | 883/ 5600 batches | train loss 0.4534681 +| epoch 4 | 887/ 5600 batches | train loss 0.4391128 +| epoch 4 | 891/ 5600 batches | train loss 0.4868767 +| epoch 4 | 895/ 5600 batches | train loss 0.4166473 +| epoch 4 | 899/ 5600 batches | train loss 0.4500083 +| epoch 4 | 903/ 5600 batches | train loss 0.3489315 +| epoch 4 | 907/ 5600 batches | train loss 0.4103647 +| epoch 4 | 911/ 5600 batches | train loss 0.4486199 +| epoch 4 | 915/ 5600 batches | train loss 0.4511292 +| epoch 4 | 919/ 5600 batches | train loss 0.5100331 +| epoch 4 | 923/ 5600 batches | train loss 0.4294606 +| epoch 4 | 927/ 5600 batches | train loss 0.4239302 +| epoch 4 | 931/ 5600 batches | train loss 0.4281038 +| epoch 4 | 935/ 5600 batches | train loss 0.3899766 +| epoch 4 | 939/ 5600 batches | train loss 0.4697772 +| epoch 4 | 943/ 5600 batches | train loss 0.4194281 +| epoch 4 | 947/ 5600 batches | train loss 0.4222926 +| epoch 4 | 951/ 5600 batches | train loss 0.4657148 +| epoch 4 | 955/ 5600 batches | train loss 0.4622055 +| epoch 4 | 959/ 5600 batches | train loss 0.3792124 +| epoch 4 | 963/ 5600 batches | train loss 0.4393739 +| epoch 4 | 967/ 5600 batches | train loss 0.3979185 +| epoch 4 | 971/ 5600 batches | train loss 0.4487226 +| epoch 4 | 975/ 5600 batches | train loss 0.4247065 +| epoch 4 | 979/ 5600 batches | train loss 0.4415141 +| epoch 4 | 983/ 5600 batches | train loss 0.4298870 +| epoch 4 | 987/ 5600 batches | train loss 0.3588840 +| epoch 4 | 991/ 5600 batches | train loss 0.3678306 +| epoch 4 | 995/ 5600 batches | train loss 0.5100521 +| epoch 4 | 999/ 5600 batches | train loss 0.3663733 +| epoch 4 | 1003/ 5600 batches | train loss 0.4837698 +| epoch 4 | 1007/ 5600 batches | train loss 0.4425305 +| epoch 4 | 1011/ 5600 batches | train loss 0.4071132 +| epoch 4 | 1015/ 5600 batches | train loss 0.3775003 +| epoch 4 | 1019/ 5600 batches | train loss 0.3883276 +| epoch 4 | 1023/ 5600 batches | train loss 0.3844642 +| epoch 4 | 1027/ 5600 batches | train loss 0.4259087 +| epoch 4 | 1031/ 5600 batches | train loss 0.4603086 +| epoch 4 | 1035/ 5600 batches | train loss 0.3988565 +| epoch 4 | 1039/ 5600 batches | train loss 0.5023845 +| epoch 4 | 1043/ 5600 batches | train loss 0.4963463 +| epoch 4 | 1047/ 5600 batches | train loss 0.5378276 +| epoch 4 | 1051/ 5600 batches | train loss 0.4222898 +| epoch 4 | 1055/ 5600 batches | train loss 0.4130760 +| epoch 4 | 1059/ 5600 batches | train loss 0.4694537 +| epoch 4 | 1063/ 5600 batches | train loss 0.3797291 +| epoch 4 | 1067/ 5600 batches | train loss 0.4939011 +| epoch 4 | 1071/ 5600 batches | train loss 0.3778152 +| epoch 4 | 1075/ 5600 batches | train loss 0.3869417 +| epoch 4 | 1079/ 5600 batches | train loss 0.3713775 +| epoch 4 | 1083/ 5600 batches | train loss 0.4073424 +| epoch 4 | 1087/ 5600 batches | train loss 0.4024076 +| epoch 4 | 1091/ 5600 batches | train loss 0.4950179 +| epoch 4 | 1095/ 5600 batches | train loss 0.4932267 +| epoch 4 | 1099/ 5600 batches | train loss 0.4462130 +| epoch 4 | 1103/ 5600 batches | train loss 0.4737949 +| epoch 4 | 1107/ 5600 batches | train loss 0.4550167 +| epoch 4 | 1111/ 5600 batches | train loss 0.4590712 +| epoch 4 | 1115/ 5600 batches | train loss 0.3782264 +| epoch 4 | 1119/ 5600 batches | train loss 0.3852066 +| epoch 4 | 1123/ 5600 batches | train loss 0.4529542 +| epoch 4 | 1127/ 5600 batches | train loss 0.4657646 +| epoch 4 | 1131/ 5600 batches | train loss 0.3635151 +| epoch 4 | 1135/ 5600 batches | train loss 0.1919722 +| epoch 4 | 1139/ 5600 batches | train loss 0.3831729 +| epoch 4 | 1143/ 5600 batches | train loss 0.3468226 +| epoch 4 | 1147/ 5600 batches | train loss 0.4536873 +| epoch 4 | 1151/ 5600 batches | train loss 0.3367576 +| epoch 4 | 1155/ 5600 batches | train loss 0.4932975 +| epoch 4 | 1159/ 5600 batches | train loss 0.4050580 +| epoch 4 | 1163/ 5600 batches | train loss 0.4172550 +| epoch 4 | 1167/ 5600 batches | train loss 0.4115505 +| epoch 4 | 1171/ 5600 batches | train loss 0.4445280 +| epoch 4 | 1175/ 5600 batches | train loss 0.4036595 +| epoch 4 | 1179/ 5600 batches | train loss 0.4517938 +| epoch 4 | 1183/ 5600 batches | train loss 0.3901675 +| epoch 4 | 1187/ 5600 batches | train loss 0.4144507 +| epoch 4 | 1191/ 5600 batches | train loss 0.4577700 +| epoch 4 | 1195/ 5600 batches | train loss 0.4947058 +| epoch 4 | 1199/ 5600 batches | train loss 0.3758876 +| epoch 4 | 1203/ 5600 batches | train loss 0.4049549 +| epoch 4 | 1207/ 5600 batches | train loss 0.4700553 +| epoch 4 | 1211/ 5600 batches | train loss 0.4641886 +| epoch 4 | 1215/ 5600 batches | train loss 0.4155415 +| epoch 4 | 1219/ 5600 batches | train loss 0.3851823 +| epoch 4 | 1223/ 5600 batches | train loss 0.4125165 +| epoch 4 | 1227/ 5600 batches | train loss 0.4353803 +| epoch 4 | 1231/ 5600 batches | train loss 0.4843247 +| epoch 4 | 1235/ 5600 batches | train loss 0.3716500 +| epoch 4 | 1239/ 5600 batches | train loss 0.5063398 +| epoch 4 | 1243/ 5600 batches | train loss 0.3599025 +| epoch 4 | 1247/ 5600 batches | train loss 0.4343401 +| epoch 4 | 1251/ 5600 batches | train loss 0.4076896 +| epoch 4 | 1255/ 5600 batches | train loss 0.4317484 +| epoch 4 | 1259/ 5600 batches | train loss 0.3901860 +| epoch 4 | 1263/ 5600 batches | train loss 0.3551331 +| epoch 4 | 1267/ 5600 batches | train loss 0.3387221 +| epoch 4 | 1271/ 5600 batches | train loss 0.4548238 +| epoch 4 | 1275/ 5600 batches | train loss 0.4909747 +| epoch 4 | 1279/ 5600 batches | train loss 0.4270518 +| epoch 4 | 1283/ 5600 batches | train loss 0.4681108 +| epoch 4 | 1287/ 5600 batches | train loss 0.4134285 +| epoch 4 | 1291/ 5600 batches | train loss 0.4719881 +| epoch 4 | 1295/ 5600 batches | train loss 0.3960939 +| epoch 4 | 1299/ 5600 batches | train loss 0.2592413 +| epoch 4 | 1303/ 5600 batches | train loss 0.3883594 +| epoch 4 | 1307/ 5600 batches | train loss 0.3019236 +| epoch 4 | 1311/ 5600 batches | train loss 0.4396944 +| epoch 4 | 1315/ 5600 batches | train loss 0.4195202 +| epoch 4 | 1319/ 5600 batches | train loss 0.2988233 +| epoch 4 | 1323/ 5600 batches | train loss 0.4703032 +| epoch 4 | 1327/ 5600 batches | train loss 0.4786674 +| epoch 4 | 1331/ 5600 batches | train loss 0.3961517 +| epoch 4 | 1335/ 5600 batches | train loss 0.4902071 +| epoch 4 | 1339/ 5600 batches | train loss 0.3943232 +| epoch 4 | 1343/ 5600 batches | train loss 0.5032979 +| epoch 4 | 1347/ 5600 batches | train loss 0.6027657 +| epoch 4 | 1351/ 5600 batches | train loss 0.3999614 +| epoch 4 | 1355/ 5600 batches | train loss 0.4282577 +| epoch 4 | 1359/ 5600 batches | train loss 0.3203613 +| epoch 4 | 1363/ 5600 batches | train loss 0.3495555 +| epoch 4 | 1367/ 5600 batches | train loss 0.4410936 +| epoch 4 | 1371/ 5600 batches | train loss 0.4851013 +| epoch 4 | 1375/ 5600 batches | train loss 0.3926811 +| epoch 4 | 1379/ 5600 batches | train loss 0.2680445 +| epoch 4 | 1383/ 5600 batches | train loss 0.3637397 +| epoch 4 | 1387/ 5600 batches | train loss 0.4029233 +| epoch 4 | 1391/ 5600 batches | train loss 0.4983208 +| epoch 4 | 1395/ 5600 batches | train loss 0.4153744 +| epoch 4 | 1399/ 5600 batches | train loss 0.3882304 +| epoch 4 | 1403/ 5600 batches | train loss 0.3877719 +| epoch 4 | 1407/ 5600 batches | train loss 0.4551086 +| epoch 4 | 1411/ 5600 batches | train loss 0.3968484 +| epoch 4 | 1415/ 5600 batches | train loss 0.4156893 +| epoch 4 | 1419/ 5600 batches | train loss 0.4823604 +| epoch 4 | 1423/ 5600 batches | train loss 0.4096445 +| epoch 4 | 1427/ 5600 batches | train loss 0.3502825 +| epoch 4 | 1431/ 5600 batches | train loss 0.4200409 +| epoch 4 | 1435/ 5600 batches | train loss 0.4855000 +| epoch 4 | 1439/ 5600 batches | train loss 0.3543762 +| epoch 4 | 1443/ 5600 batches | train loss 0.4355423 +| epoch 4 | 1447/ 5600 batches | train loss 0.3324610 +| epoch 4 | 1451/ 5600 batches | train loss 0.4542457 +| epoch 4 | 1455/ 5600 batches | train loss 0.3768916 +| epoch 4 | 1459/ 5600 batches | train loss 0.4376317 +| epoch 4 | 1463/ 5600 batches | train loss 0.4775043 +| epoch 4 | 1467/ 5600 batches | train loss 0.5041946 +| epoch 4 | 1471/ 5600 batches | train loss 0.5207326 +| epoch 4 | 1475/ 5600 batches | train loss 0.4904059 +| epoch 4 | 1479/ 5600 batches | train loss 0.3141315 +| epoch 4 | 1483/ 5600 batches | train loss 0.4514461 +| epoch 4 | 1487/ 5600 batches | train loss 0.4479516 +| epoch 4 | 1491/ 5600 batches | train loss 0.3916614 +| epoch 4 | 1495/ 5600 batches | train loss 0.3595101 +| epoch 4 | 1499/ 5600 batches | train loss 0.4037331 +| epoch 4 | 1503/ 5600 batches | train loss 0.4665710 +| epoch 4 | 1507/ 5600 batches | train loss 0.5946934 +| epoch 4 | 1511/ 5600 batches | train loss 0.4459223 +| epoch 4 | 1515/ 5600 batches | train loss 0.3897290 +| epoch 4 | 1519/ 5600 batches | train loss 0.4153457 +| epoch 4 | 1523/ 5600 batches | train loss 0.4639103 +| epoch 4 | 1527/ 5600 batches | train loss 0.4119572 +| epoch 4 | 1531/ 5600 batches | train loss 0.3759388 +| epoch 4 | 1535/ 5600 batches | train loss 0.4552619 +| epoch 4 | 1539/ 5600 batches | train loss 0.3542327 +| epoch 4 | 1543/ 5600 batches | train loss 0.4842701 +| epoch 4 | 1547/ 5600 batches | train loss 0.4267297 +| epoch 4 | 1551/ 5600 batches | train loss 0.4440934 +| epoch 4 | 1555/ 5600 batches | train loss 0.4166164 +| epoch 4 | 1559/ 5600 batches | train loss 0.3757989 +| epoch 4 | 1563/ 5600 batches | train loss 0.3862544 +| epoch 4 | 1567/ 5600 batches | train loss 0.4443280 +| epoch 4 | 1571/ 5600 batches | train loss 0.4747174 +| epoch 4 | 1575/ 5600 batches | train loss 0.4655518 +| epoch 4 | 1579/ 5600 batches | train loss 0.3926280 +| epoch 4 | 1583/ 5600 batches | train loss 0.4056890 +| epoch 4 | 1587/ 5600 batches | train loss 0.4119893 +| epoch 4 | 1591/ 5600 batches | train loss 0.4653510 +| epoch 4 | 1595/ 5600 batches | train loss 0.4732784 +| epoch 4 | 1599/ 5600 batches | train loss 0.3661720 +| epoch 4 | 1603/ 5600 batches | train loss 0.4405874 +| epoch 4 | 1607/ 5600 batches | train loss 0.3842919 +| epoch 4 | 1611/ 5600 batches | train loss 0.4289109 +| epoch 4 | 1615/ 5600 batches | train loss 0.4908324 +| epoch 4 | 1619/ 5600 batches | train loss 0.3862799 +| epoch 4 | 1623/ 5600 batches | train loss 0.4321204 +| epoch 4 | 1627/ 5600 batches | train loss 0.4299790 +| epoch 4 | 1631/ 5600 batches | train loss 0.4379737 +| epoch 4 | 1635/ 5600 batches | train loss 0.4064009 +| epoch 4 | 1639/ 5600 batches | train loss 0.4799170 +| epoch 4 | 1643/ 5600 batches | train loss 0.3437826 +| epoch 4 | 1647/ 5600 batches | train loss 0.4424534 +| epoch 4 | 1651/ 5600 batches | train loss 0.4360854 +| epoch 4 | 1655/ 5600 batches | train loss 0.1800650 +| epoch 4 | 1659/ 5600 batches | train loss 0.4211171 +| epoch 4 | 1663/ 5600 batches | train loss 0.5866342 +| epoch 4 | 1667/ 5600 batches | train loss 0.3679077 +| epoch 4 | 1671/ 5600 batches | train loss 0.5702616 +| epoch 4 | 1675/ 5600 batches | train loss 0.4813787 +| epoch 4 | 1679/ 5600 batches | train loss 0.4071237 +| epoch 4 | 1683/ 5600 batches | train loss 0.4813817 +| epoch 4 | 1687/ 5600 batches | train loss 0.4672062 +| epoch 4 | 1691/ 5600 batches | train loss 0.4437469 +| epoch 4 | 1695/ 5600 batches | train loss 0.4250744 +| epoch 4 | 1699/ 5600 batches | train loss 0.4103372 +| epoch 4 | 1703/ 5600 batches | train loss 0.4428574 +| epoch 4 | 1707/ 5600 batches | train loss 0.4423000 +| epoch 4 | 1711/ 5600 batches | train loss 0.4031012 +| epoch 4 | 1715/ 5600 batches | train loss 0.4168310 +| epoch 4 | 1719/ 5600 batches | train loss 0.4206158 +| epoch 4 | 1723/ 5600 batches | train loss 0.4957275 +| epoch 4 | 1727/ 5600 batches | train loss 0.4651234 +| epoch 4 | 1731/ 5600 batches | train loss 0.4899724 +| epoch 4 | 1735/ 5600 batches | train loss 0.4216712 +| epoch 4 | 1739/ 5600 batches | train loss 0.3744621 +| epoch 4 | 1743/ 5600 batches | train loss 0.4394888 +| epoch 4 | 1747/ 5600 batches | train loss 0.4573925 +| epoch 4 | 1751/ 5600 batches | train loss 0.4571944 +| epoch 4 | 1755/ 5600 batches | train loss 0.4112696 +| epoch 4 | 1759/ 5600 batches | train loss 0.4217098 +| epoch 4 | 1763/ 5600 batches | train loss 0.3735391 +| epoch 4 | 1767/ 5600 batches | train loss 0.3427168 +| epoch 4 | 1771/ 5600 batches | train loss 0.3911348 +| epoch 4 | 1775/ 5600 batches | train loss 0.5166330 +| epoch 4 | 1779/ 5600 batches | train loss 0.5143952 +| epoch 4 | 1783/ 5600 batches | train loss 0.3816829 +| epoch 4 | 1787/ 5600 batches | train loss 0.3988290 +| epoch 4 | 1791/ 5600 batches | train loss 0.4359839 +| epoch 4 | 1795/ 5600 batches | train loss 0.4291772 +| epoch 4 | 1799/ 5600 batches | train loss 0.3666891 +| epoch 4 | 1803/ 5600 batches | train loss 0.3665158 +| epoch 4 | 1807/ 5600 batches | train loss 0.3853028 +| epoch 4 | 1811/ 5600 batches | train loss 0.3882277 +| epoch 4 | 1815/ 5600 batches | train loss 0.4095648 +| epoch 4 | 1819/ 5600 batches | train loss 0.4242151 +| epoch 4 | 1823/ 5600 batches | train loss 0.3419250 +| epoch 4 | 1827/ 5600 batches | train loss 0.4369870 +| epoch 4 | 1831/ 5600 batches | train loss 0.4164262 +| epoch 4 | 1835/ 5600 batches | train loss 0.4437374 +| epoch 4 | 1839/ 5600 batches | train loss 0.4217651 +| epoch 4 | 1843/ 5600 batches | train loss 0.4444843 +| epoch 4 | 1847/ 5600 batches | train loss 0.3809807 +| epoch 4 | 1851/ 5600 batches | train loss 0.3852881 +| epoch 4 | 1855/ 5600 batches | train loss 0.4314585 +| epoch 4 | 1859/ 5600 batches | train loss 0.3699631 +| epoch 4 | 1863/ 5600 batches | train loss 0.4375134 +| epoch 4 | 1867/ 5600 batches | train loss 0.5461500 +| epoch 4 | 1871/ 5600 batches | train loss 0.4910183 +| epoch 4 | 1875/ 5600 batches | train loss 0.4135711 +| epoch 4 | 1879/ 5600 batches | train loss 0.4088469 +| epoch 4 | 1883/ 5600 batches | train loss 0.4216996 +| epoch 4 | 1887/ 5600 batches | train loss 0.3867198 +| epoch 4 | 1891/ 5600 batches | train loss 0.3571114 +| epoch 4 | 1895/ 5600 batches | train loss 0.3692347 +| epoch 4 | 1899/ 5600 batches | train loss 0.4258969 +| epoch 4 | 1903/ 5600 batches | train loss 0.4309664 +| epoch 4 | 1907/ 5600 batches | train loss 0.4998349 +| epoch 4 | 1911/ 5600 batches | train loss 0.4895943 +| epoch 4 | 1915/ 5600 batches | train loss 0.4243767 +| epoch 4 | 1919/ 5600 batches | train loss 0.4669973 +| epoch 4 | 1923/ 5600 batches | train loss 0.4466484 +| epoch 4 | 1927/ 5600 batches | train loss 0.4573669 +| epoch 4 | 1931/ 5600 batches | train loss 0.4005449 +| epoch 4 | 1935/ 5600 batches | train loss 0.3476658 +| epoch 4 | 1939/ 5600 batches | train loss 0.2351280 +| epoch 4 | 1943/ 5600 batches | train loss 0.4149822 +| epoch 4 | 1947/ 5600 batches | train loss 0.5067742 +| epoch 4 | 1951/ 5600 batches | train loss 0.5091150 +| epoch 4 | 1955/ 5600 batches | train loss 0.4198224 +| epoch 4 | 1959/ 5600 batches | train loss 0.4088296 +| epoch 4 | 1963/ 5600 batches | train loss 0.4585027 +| epoch 4 | 1967/ 5600 batches | train loss 0.3701698 +| epoch 4 | 1971/ 5600 batches | train loss 0.4504499 +| epoch 4 | 1975/ 5600 batches | train loss 0.4064633 +| epoch 4 | 1979/ 5600 batches | train loss 0.4781171 +| epoch 4 | 1983/ 5600 batches | train loss 0.4011117 +| epoch 4 | 1987/ 5600 batches | train loss 0.4447448 +| epoch 4 | 1991/ 5600 batches | train loss 0.3452493 +| epoch 4 | 1995/ 5600 batches | train loss 0.4115250 +| epoch 4 | 1999/ 5600 batches | train loss 0.5760372 +| epoch 4 | 2003/ 5600 batches | train loss 0.4841570 +| epoch 4 | 2007/ 5600 batches | train loss 0.4514295 +| epoch 4 | 2011/ 5600 batches | train loss 0.3809462 +| epoch 4 | 2015/ 5600 batches | train loss 0.4330056 +| epoch 4 | 2019/ 5600 batches | train loss 0.3376769 +| epoch 4 | 2023/ 5600 batches | train loss 0.5120527 +| epoch 4 | 2027/ 5600 batches | train loss 0.4556051 +| epoch 4 | 2031/ 5600 batches | train loss 0.4469999 +| epoch 4 | 2035/ 5600 batches | train loss 0.4075868 +| epoch 4 | 2039/ 5600 batches | train loss 0.5420973 +| epoch 4 | 2043/ 5600 batches | train loss 0.4796962 +| epoch 4 | 2047/ 5600 batches | train loss 0.3580919 +| epoch 4 | 2051/ 5600 batches | train loss 0.4158225 +| epoch 4 | 2055/ 5600 batches | train loss 0.3631473 +| epoch 4 | 2059/ 5600 batches | train loss 0.3144796 +| epoch 4 | 2063/ 5600 batches | train loss 0.3850931 +| epoch 4 | 2067/ 5600 batches | train loss 0.3260150 +| epoch 4 | 2071/ 5600 batches | train loss 0.3502078 +| epoch 4 | 2075/ 5600 batches | train loss 0.3701158 +| epoch 4 | 2079/ 5600 batches | train loss 0.4148868 +| epoch 4 | 2083/ 5600 batches | train loss 0.4775259 +| epoch 4 | 2087/ 5600 batches | train loss 0.5128403 +| epoch 4 | 2091/ 5600 batches | train loss 0.4344788 +| epoch 4 | 2095/ 5600 batches | train loss 0.3531306 +| epoch 4 | 2099/ 5600 batches | train loss 0.5040482 +| epoch 4 | 2103/ 5600 batches | train loss 0.3578904 +| epoch 4 | 2107/ 5600 batches | train loss 0.6137753 +| epoch 4 | 2111/ 5600 batches | train loss 0.4054883 +| epoch 4 | 2115/ 5600 batches | train loss 0.4324185 +| epoch 4 | 2119/ 5600 batches | train loss 0.5064965 +| epoch 4 | 2123/ 5600 batches | train loss 0.3876521 +| epoch 4 | 2127/ 5600 batches | train loss 0.5107836 +| epoch 4 | 2131/ 5600 batches | train loss 0.4164897 +| epoch 4 | 2135/ 5600 batches | train loss 0.4301371 +| epoch 4 | 2139/ 5600 batches | train loss 0.4930267 +| epoch 4 | 2143/ 5600 batches | train loss 0.3857124 +| epoch 4 | 2147/ 5600 batches | train loss 0.3596663 +| epoch 4 | 2151/ 5600 batches | train loss 0.4490303 +| epoch 4 | 2155/ 5600 batches | train loss 0.4430901 +| epoch 4 | 2159/ 5600 batches | train loss 0.3959748 +| epoch 4 | 2163/ 5600 batches | train loss 0.4498253 +| epoch 4 | 2167/ 5600 batches | train loss 0.3776144 +| epoch 4 | 2171/ 5600 batches | train loss 0.4780872 +| epoch 4 | 2175/ 5600 batches | train loss 0.4088014 +| epoch 4 | 2179/ 5600 batches | train loss 0.4517461 +| epoch 4 | 2183/ 5600 batches | train loss 0.4431545 +| epoch 4 | 2187/ 5600 batches | train loss 0.4541329 +| epoch 4 | 2191/ 5600 batches | train loss 0.3950435 +| epoch 4 | 2195/ 5600 batches | train loss 0.4250349 +| epoch 4 | 2199/ 5600 batches | train loss 0.3435582 +| epoch 4 | 2203/ 5600 batches | train loss 0.4485277 +| epoch 4 | 2207/ 5600 batches | train loss 0.4716949 +| epoch 4 | 2211/ 5600 batches | train loss 0.3561847 +| epoch 4 | 2215/ 5600 batches | train loss 0.4038738 +| epoch 4 | 2219/ 5600 batches | train loss 0.4653241 +| epoch 4 | 2223/ 5600 batches | train loss 0.4489447 +| epoch 4 | 2227/ 5600 batches | train loss 0.3612548 +| epoch 4 | 2231/ 5600 batches | train loss 0.4247680 +| epoch 4 | 2235/ 5600 batches | train loss 0.5031211 +| epoch 4 | 2239/ 5600 batches | train loss 0.3217955 +| epoch 4 | 2243/ 5600 batches | train loss 0.4183691 +| epoch 4 | 2247/ 5600 batches | train loss 0.4252514 +| epoch 4 | 2251/ 5600 batches | train loss 0.4986120 +| epoch 4 | 2255/ 5600 batches | train loss 0.4225094 +| epoch 4 | 2259/ 5600 batches | train loss 0.3650718 +| epoch 4 | 2263/ 5600 batches | train loss 0.4110323 +| epoch 4 | 2267/ 5600 batches | train loss 0.2968375 +| epoch 4 | 2271/ 5600 batches | train loss 0.4121440 +| epoch 4 | 2275/ 5600 batches | train loss 0.4057987 +| epoch 4 | 2279/ 5600 batches | train loss 0.3434146 +| epoch 4 | 2283/ 5600 batches | train loss 0.4422401 +| epoch 4 | 2287/ 5600 batches | train loss 0.3836656 +| epoch 4 | 2291/ 5600 batches | train loss 0.3603955 +| epoch 4 | 2295/ 5600 batches | train loss 0.4838067 +| epoch 4 | 2299/ 5600 batches | train loss 0.3780875 +| epoch 4 | 2303/ 5600 batches | train loss 0.4144851 +| epoch 4 | 2307/ 5600 batches | train loss 0.4195702 +| epoch 4 | 2311/ 5600 batches | train loss 0.4579926 +| epoch 4 | 2315/ 5600 batches | train loss 0.4755267 +| epoch 4 | 2319/ 5600 batches | train loss 0.4454927 +| epoch 4 | 2323/ 5600 batches | train loss 0.3976671 +| epoch 4 | 2327/ 5600 batches | train loss 0.3995177 +| epoch 4 | 2331/ 5600 batches | train loss 0.3990146 +| epoch 4 | 2335/ 5600 batches | train loss 0.4754510 +| epoch 4 | 2339/ 5600 batches | train loss 0.4277726 +| epoch 4 | 2343/ 5600 batches | train loss 0.4610485 +| epoch 4 | 2347/ 5600 batches | train loss 0.4492587 +| epoch 4 | 2351/ 5600 batches | train loss 0.4478148 +| epoch 4 | 2355/ 5600 batches | train loss 0.4108558 +| epoch 4 | 2359/ 5600 batches | train loss 0.3648599 +| epoch 4 | 2363/ 5600 batches | train loss 0.4209682 +| epoch 4 | 2367/ 5600 batches | train loss 0.4471137 +| epoch 4 | 2371/ 5600 batches | train loss 0.4493673 +| epoch 4 | 2375/ 5600 batches | train loss 0.3640267 +| epoch 4 | 2379/ 5600 batches | train loss 0.4295355 +| epoch 4 | 2383/ 5600 batches | train loss 0.4125386 +| epoch 4 | 2387/ 5600 batches | train loss 0.4256094 +| epoch 4 | 2391/ 5600 batches | train loss 0.3624701 +| epoch 4 | 2395/ 5600 batches | train loss 0.4469260 +| epoch 4 | 2399/ 5600 batches | train loss 0.5164121 +| epoch 4 | 2403/ 5600 batches | train loss 0.4171375 +| epoch 4 | 2407/ 5600 batches | train loss 0.4556585 +| epoch 4 | 2411/ 5600 batches | train loss 0.3222282 +| epoch 4 | 2415/ 5600 batches | train loss 0.4413071 +| epoch 4 | 2419/ 5600 batches | train loss 0.4985822 +| epoch 4 | 2423/ 5600 batches | train loss 0.4456725 +| epoch 4 | 2427/ 5600 batches | train loss 0.4103844 +| epoch 4 | 2431/ 5600 batches | train loss 0.3971827 +| epoch 4 | 2435/ 5600 batches | train loss 0.5167096 +| epoch 4 | 2439/ 5600 batches | train loss 0.3953017 +| epoch 4 | 2443/ 5600 batches | train loss 0.4154096 +| epoch 4 | 2447/ 5600 batches | train loss 0.4304639 +| epoch 4 | 2451/ 5600 batches | train loss 0.3272383 +| epoch 4 | 2455/ 5600 batches | train loss 0.3708355 +| epoch 4 | 2459/ 5600 batches | train loss 0.3642515 +| epoch 4 | 2463/ 5600 batches | train loss 0.4090292 +| epoch 4 | 2467/ 5600 batches | train loss 0.4035178 +| epoch 4 | 2471/ 5600 batches | train loss 0.4329590 +| epoch 4 | 2475/ 5600 batches | train loss 0.3517137 +| epoch 4 | 2479/ 5600 batches | train loss 0.4319819 +| epoch 4 | 2483/ 5600 batches | train loss 0.3766167 +| epoch 4 | 2487/ 5600 batches | train loss 0.4568669 +| epoch 4 | 2491/ 5600 batches | train loss 0.4239043 +| epoch 4 | 2495/ 5600 batches | train loss 0.4479727 +| epoch 4 | 2499/ 5600 batches | train loss 0.4715855 +| epoch 4 | 2503/ 5600 batches | train loss 0.4892638 +| epoch 4 | 2507/ 5600 batches | train loss 0.3907335 +| epoch 4 | 2511/ 5600 batches | train loss 0.4476681 +| epoch 4 | 2515/ 5600 batches | train loss 0.3803323 +| epoch 4 | 2519/ 5600 batches | train loss 0.4231571 +| epoch 4 | 2523/ 5600 batches | train loss 0.4016478 +| epoch 4 | 2527/ 5600 batches | train loss 0.3983559 +| epoch 4 | 2531/ 5600 batches | train loss 0.4279899 +| epoch 4 | 2535/ 5600 batches | train loss 0.4634015 +| epoch 4 | 2539/ 5600 batches | train loss 0.5659821 +| epoch 4 | 2543/ 5600 batches | train loss 0.4105321 +| epoch 4 | 2547/ 5600 batches | train loss 0.4527375 +| epoch 4 | 2551/ 5600 batches | train loss 0.3535742 +| epoch 4 | 2555/ 5600 batches | train loss 0.4788691 +| epoch 4 | 2559/ 5600 batches | train loss 0.4013609 +| epoch 4 | 2563/ 5600 batches | train loss 0.4178495 +| epoch 4 | 2567/ 5600 batches | train loss 0.3894159 +| epoch 4 | 2571/ 5600 batches | train loss 0.4385028 +| epoch 4 | 2575/ 5600 batches | train loss 0.3851108 +| epoch 4 | 2579/ 5600 batches | train loss 0.4157113 +| epoch 4 | 2583/ 5600 batches | train loss 0.4719691 +| epoch 4 | 2587/ 5600 batches | train loss 0.4451113 +| epoch 4 | 2591/ 5600 batches | train loss 0.4970910 +| epoch 4 | 2595/ 5600 batches | train loss 0.4474415 +| epoch 4 | 2599/ 5600 batches | train loss 0.4257589 +| epoch 4 | 2603/ 5600 batches | train loss 0.4548061 +| epoch 4 | 2607/ 5600 batches | train loss 0.3769132 +| epoch 4 | 2611/ 5600 batches | train loss 0.4559723 +| epoch 4 | 2615/ 5600 batches | train loss 0.4502371 +| epoch 4 | 2619/ 5600 batches | train loss 0.4047878 +| epoch 4 | 2623/ 5600 batches | train loss 0.4573703 +| epoch 4 | 2627/ 5600 batches | train loss 0.3563651 +| epoch 4 | 2631/ 5600 batches | train loss 0.3677237 +| epoch 4 | 2635/ 5600 batches | train loss 0.4244654 +| epoch 4 | 2639/ 5600 batches | train loss 0.3028800 +| epoch 4 | 2643/ 5600 batches | train loss 0.4278173 +| epoch 4 | 2647/ 5600 batches | train loss 0.4024755 +| epoch 4 | 2651/ 5600 batches | train loss 0.5678161 +| epoch 4 | 2655/ 5600 batches | train loss 0.4130037 +| epoch 4 | 2659/ 5600 batches | train loss 0.3936468 +| epoch 4 | 2663/ 5600 batches | train loss 0.4188161 +| epoch 4 | 2667/ 5600 batches | train loss 0.3826158 +| epoch 4 | 2671/ 5600 batches | train loss 0.4528563 +| epoch 4 | 2675/ 5600 batches | train loss 0.3636240 +| epoch 4 | 2679/ 5600 batches | train loss 0.4088197 +| epoch 4 | 2683/ 5600 batches | train loss 0.4380636 +| epoch 4 | 2687/ 5600 batches | train loss 0.4436797 +| epoch 4 | 2691/ 5600 batches | train loss 0.4682907 +| epoch 4 | 2695/ 5600 batches | train loss 0.3474478 +| epoch 4 | 2699/ 5600 batches | train loss 0.4089263 +| epoch 4 | 2703/ 5600 batches | train loss 0.3618564 +| epoch 4 | 2707/ 5600 batches | train loss 0.4485232 +| epoch 4 | 2711/ 5600 batches | train loss 0.4066994 +| epoch 4 | 2715/ 5600 batches | train loss 0.3751788 +| epoch 4 | 2719/ 5600 batches | train loss 0.4589605 +| epoch 4 | 2723/ 5600 batches | train loss 0.3727829 +| epoch 4 | 2727/ 5600 batches | train loss 0.4303173 +| epoch 4 | 2731/ 5600 batches | train loss 0.3675862 +| epoch 4 | 2735/ 5600 batches | train loss 0.5144243 +| epoch 4 | 2739/ 5600 batches | train loss 0.4197564 +| epoch 4 | 2743/ 5600 batches | train loss 0.4366699 +| epoch 4 | 2747/ 5600 batches | train loss 0.4208196 +| epoch 4 | 2751/ 5600 batches | train loss 0.4496254 +| epoch 4 | 2755/ 5600 batches | train loss 0.3578111 +| epoch 4 | 2759/ 5600 batches | train loss 0.3138784 +| epoch 4 | 2763/ 5600 batches | train loss 0.4220801 +| epoch 4 | 2767/ 5600 batches | train loss 0.4464294 +| epoch 4 | 2771/ 5600 batches | train loss 0.3831978 +| epoch 4 | 2775/ 5600 batches | train loss 0.4308566 +| epoch 4 | 2779/ 5600 batches | train loss 0.4774857 +| epoch 4 | 2783/ 5600 batches | train loss 0.4598220 +| epoch 4 | 2787/ 5600 batches | train loss 0.4330538 +| epoch 4 | 2791/ 5600 batches | train loss 0.3952857 +| epoch 4 | 2795/ 5600 batches | train loss 0.3910850 +| epoch 4 | 2799/ 5600 batches | train loss 0.4532082 +| epoch 4 | 2803/ 5600 batches | train loss 0.4588126 +| epoch 4 | 2807/ 5600 batches | train loss 0.4702792 +| epoch 4 | 2811/ 5600 batches | train loss 0.3806817 +| epoch 4 | 2815/ 5600 batches | train loss 0.4432853 +| epoch 4 | 2819/ 5600 batches | train loss 0.3659263 +| epoch 4 | 2823/ 5600 batches | train loss 0.4198545 +| epoch 4 | 2827/ 5600 batches | train loss 0.4600176 +| epoch 4 | 2831/ 5600 batches | train loss 0.4372673 +| epoch 4 | 2835/ 5600 batches | train loss 0.4085458 +| epoch 4 | 2839/ 5600 batches | train loss 0.4170040 +| epoch 4 | 2843/ 5600 batches | train loss 0.4679719 +| epoch 4 | 2847/ 5600 batches | train loss 0.4320531 +| epoch 4 | 2851/ 5600 batches | train loss 0.2958981 +| epoch 4 | 2855/ 5600 batches | train loss 0.3630089 +| epoch 4 | 2859/ 5600 batches | train loss 0.4791980 +| epoch 4 | 2863/ 5600 batches | train loss 0.4349336 +| epoch 4 | 2867/ 5600 batches | train loss 0.4151030 +| epoch 4 | 2871/ 5600 batches | train loss 0.4438379 +| epoch 4 | 2875/ 5600 batches | train loss 0.3236810 +| epoch 4 | 2879/ 5600 batches | train loss 0.4005893 +| epoch 4 | 2883/ 5600 batches | train loss 0.4455772 +| epoch 4 | 2887/ 5600 batches | train loss 0.2701015 +| epoch 4 | 2891/ 5600 batches | train loss 0.4623923 +| epoch 4 | 2895/ 5600 batches | train loss 0.3984223 +| epoch 4 | 2899/ 5600 batches | train loss 0.4320118 +| epoch 4 | 2903/ 5600 batches | train loss 0.4226620 +| epoch 4 | 2907/ 5600 batches | train loss 0.4401045 +| epoch 4 | 2911/ 5600 batches | train loss 0.3979728 +| epoch 4 | 2915/ 5600 batches | train loss 0.4000717 +| epoch 4 | 2919/ 5600 batches | train loss 0.4418212 +| epoch 4 | 2923/ 5600 batches | train loss 0.4110970 +| epoch 4 | 2927/ 5600 batches | train loss 0.4407138 +| epoch 4 | 2931/ 5600 batches | train loss 0.4519098 +| epoch 4 | 2935/ 5600 batches | train loss 0.1903656 +| epoch 4 | 2939/ 5600 batches | train loss 0.3526254 +| epoch 4 | 2943/ 5600 batches | train loss 0.4888505 +| epoch 4 | 2947/ 5600 batches | train loss 0.3668903 +| epoch 4 | 2951/ 5600 batches | train loss 0.4179922 +| epoch 4 | 2955/ 5600 batches | train loss 0.3707232 +| epoch 4 | 2959/ 5600 batches | train loss 0.3651194 +| epoch 4 | 2963/ 5600 batches | train loss 0.3573871 +| epoch 4 | 2967/ 5600 batches | train loss 0.3614814 +| epoch 4 | 2971/ 5600 batches | train loss 0.4487877 +| epoch 4 | 2975/ 5600 batches | train loss 0.4158449 +| epoch 4 | 2979/ 5600 batches | train loss 0.3991445 +| epoch 4 | 2983/ 5600 batches | train loss 0.4649602 +| epoch 4 | 2987/ 5600 batches | train loss 0.4447195 +| epoch 4 | 2991/ 5600 batches | train loss 0.4887373 +| epoch 4 | 2995/ 5600 batches | train loss 0.3341191 +| epoch 4 | 2999/ 5600 batches | train loss 0.4189852 +| epoch 4 | 3003/ 5600 batches | train loss 0.4237447 +| epoch 4 | 3007/ 5600 batches | train loss 0.3517051 +| epoch 4 | 3011/ 5600 batches | train loss 0.3715229 +| epoch 4 | 3015/ 5600 batches | train loss 0.3731121 +| epoch 4 | 3019/ 5600 batches | train loss 0.3736249 +| epoch 4 | 3023/ 5600 batches | train loss 0.3832774 +| epoch 4 | 3027/ 5600 batches | train loss 0.4403882 +| epoch 4 | 3031/ 5600 batches | train loss 0.4483193 +| epoch 4 | 3035/ 5600 batches | train loss 0.4149565 +| epoch 4 | 3039/ 5600 batches | train loss 0.3633496 +| epoch 4 | 3043/ 5600 batches | train loss 0.5168347 +| epoch 4 | 3047/ 5600 batches | train loss 0.3853115 +| epoch 4 | 3051/ 5600 batches | train loss 0.3451141 +| epoch 4 | 3055/ 5600 batches | train loss 0.4467726 +| epoch 4 | 3059/ 5600 batches | train loss 0.4524230 +| epoch 4 | 3063/ 5600 batches | train loss 0.4537819 +| epoch 4 | 3067/ 5600 batches | train loss 0.4511014 +| epoch 4 | 3071/ 5600 batches | train loss 0.5163594 +| epoch 4 | 3075/ 5600 batches | train loss 0.4101035 +| epoch 4 | 3079/ 5600 batches | train loss 0.4302571 +| epoch 4 | 3083/ 5600 batches | train loss 0.3790727 +| epoch 4 | 3087/ 5600 batches | train loss 0.3466322 +| epoch 4 | 3091/ 5600 batches | train loss 0.4956354 +| epoch 4 | 3095/ 5600 batches | train loss 0.4967006 +| epoch 4 | 3099/ 5600 batches | train loss 0.4027981 +| epoch 4 | 3103/ 5600 batches | train loss 0.3939118 +| epoch 4 | 3107/ 5600 batches | train loss 0.3903078 +| epoch 4 | 3111/ 5600 batches | train loss 0.3259692 +| epoch 4 | 3115/ 5600 batches | train loss 0.4257466 +| epoch 4 | 3119/ 5600 batches | train loss 0.4316773 +| epoch 4 | 3123/ 5600 batches | train loss 0.4379293 +| epoch 4 | 3127/ 5600 batches | train loss 0.4688275 +| epoch 4 | 3131/ 5600 batches | train loss 0.4326416 +| epoch 4 | 3135/ 5600 batches | train loss 0.5508233 +| epoch 4 | 3139/ 5600 batches | train loss 0.3329436 +| epoch 4 | 3143/ 5600 batches | train loss 0.5194609 +| epoch 4 | 3147/ 5600 batches | train loss 0.4361165 +| epoch 4 | 3151/ 5600 batches | train loss 0.4502463 +| epoch 4 | 3155/ 5600 batches | train loss 0.4405275 +| epoch 4 | 3159/ 5600 batches | train loss 0.4178715 +| epoch 4 | 3163/ 5600 batches | train loss 0.4113985 +| epoch 4 | 3167/ 5600 batches | train loss 0.4336052 +| epoch 4 | 3171/ 5600 batches | train loss 0.4058895 +| epoch 4 | 3175/ 5600 batches | train loss 0.4367784 +| epoch 4 | 3179/ 5600 batches | train loss 0.3000331 +| epoch 4 | 3183/ 5600 batches | train loss 0.4548548 +| epoch 4 | 3187/ 5600 batches | train loss 0.4020607 +| epoch 4 | 3191/ 5600 batches | train loss 0.4096524 +| epoch 4 | 3195/ 5600 batches | train loss 0.4132141 +| epoch 4 | 3199/ 5600 batches | train loss 0.3632458 +| epoch 4 | 3203/ 5600 batches | train loss 0.4549814 +| epoch 4 | 3207/ 5600 batches | train loss 0.4366817 +| epoch 4 | 3211/ 5600 batches | train loss 0.3742489 +| epoch 4 | 3215/ 5600 batches | train loss 0.4220995 +| epoch 4 | 3219/ 5600 batches | train loss 0.4632396 +| epoch 4 | 3223/ 5600 batches | train loss 0.4244588 +| epoch 4 | 3227/ 5600 batches | train loss 0.5148157 +| epoch 4 | 3231/ 5600 batches | train loss 0.3644252 +| epoch 4 | 3235/ 5600 batches | train loss 0.5517536 +| epoch 4 | 3239/ 5600 batches | train loss 0.4039542 +| epoch 4 | 3243/ 5600 batches | train loss 0.4945812 +| epoch 4 | 3247/ 5600 batches | train loss 0.4223917 +| epoch 4 | 3251/ 5600 batches | train loss 0.3992909 +| epoch 4 | 3255/ 5600 batches | train loss 0.3706149 +| epoch 4 | 3259/ 5600 batches | train loss 0.4297261 +| epoch 4 | 3263/ 5600 batches | train loss 0.3839167 +| epoch 4 | 3267/ 5600 batches | train loss 0.3490927 +| epoch 4 | 3271/ 5600 batches | train loss 0.3510646 +| epoch 4 | 3275/ 5600 batches | train loss 0.4956959 +| epoch 4 | 3279/ 5600 batches | train loss 0.3770502 +| epoch 4 | 3283/ 5600 batches | train loss 0.4216909 +| epoch 4 | 3287/ 5600 batches | train loss 0.4053599 +| epoch 4 | 3291/ 5600 batches | train loss 0.5075715 +| epoch 4 | 3295/ 5600 batches | train loss 0.3798963 +| epoch 4 | 3299/ 5600 batches | train loss 0.4958283 +| epoch 4 | 3303/ 5600 batches | train loss 0.4650040 +| epoch 4 | 3307/ 5600 batches | train loss 0.3909421 +| epoch 4 | 3311/ 5600 batches | train loss 0.3818374 +| epoch 4 | 3315/ 5600 batches | train loss 0.3698074 +| epoch 4 | 3319/ 5600 batches | train loss 0.4588837 +| epoch 4 | 3323/ 5600 batches | train loss 0.4069444 +| epoch 4 | 3327/ 5600 batches | train loss 0.4266629 +| epoch 4 | 3331/ 5600 batches | train loss 0.4698542 +| epoch 4 | 3335/ 5600 batches | train loss 0.3866257 +| epoch 4 | 3339/ 5600 batches | train loss 0.4816094 +| epoch 4 | 3343/ 5600 batches | train loss 0.3113642 +| epoch 4 | 3347/ 5600 batches | train loss 0.3627935 +| epoch 4 | 3351/ 5600 batches | train loss 0.4683720 +| epoch 4 | 3355/ 5600 batches | train loss 0.3789134 +| epoch 4 | 3359/ 5600 batches | train loss 0.4811325 +| epoch 4 | 3363/ 5600 batches | train loss 0.4156983 +| epoch 4 | 3367/ 5600 batches | train loss 0.3896350 +| epoch 4 | 3371/ 5600 batches | train loss 0.3683321 +| epoch 4 | 3375/ 5600 batches | train loss 0.4792706 +| epoch 4 | 3379/ 5600 batches | train loss 0.3944244 +| epoch 4 | 3383/ 5600 batches | train loss 0.3471412 +| epoch 4 | 3387/ 5600 batches | train loss 0.3591614 +| epoch 4 | 3391/ 5600 batches | train loss 0.3910675 +| epoch 4 | 3395/ 5600 batches | train loss 0.4269549 +| epoch 4 | 3399/ 5600 batches | train loss 0.4397386 +| epoch 4 | 3403/ 5600 batches | train loss 0.4125386 +| epoch 4 | 3407/ 5600 batches | train loss 0.4640269 +| epoch 4 | 3411/ 5600 batches | train loss 0.4230981 +| epoch 4 | 3415/ 5600 batches | train loss 0.4324945 +| epoch 4 | 3419/ 5600 batches | train loss 0.3525968 +| epoch 4 | 3423/ 5600 batches | train loss 0.4536750 +| epoch 4 | 3427/ 5600 batches | train loss 0.4568868 +| epoch 4 | 3431/ 5600 batches | train loss 0.4598106 +| epoch 4 | 3435/ 5600 batches | train loss 0.4729962 +| epoch 4 | 3439/ 5600 batches | train loss 0.3818956 +| epoch 4 | 3443/ 5600 batches | train loss 0.4117711 +| epoch 4 | 3447/ 5600 batches | train loss 0.4084285 +| epoch 4 | 3451/ 5600 batches | train loss 0.4565881 +| epoch 4 | 3455/ 5600 batches | train loss 0.3830490 +| epoch 4 | 3459/ 5600 batches | train loss 0.4310740 +| epoch 4 | 3463/ 5600 batches | train loss 0.3952556 +| epoch 4 | 3467/ 5600 batches | train loss 0.3939630 +| epoch 4 | 3471/ 5600 batches | train loss 0.4086343 +| epoch 4 | 3475/ 5600 batches | train loss 0.4767765 +| epoch 4 | 3479/ 5600 batches | train loss 0.3943832 +| epoch 4 | 3483/ 5600 batches | train loss 0.3993242 +| epoch 4 | 3487/ 5600 batches | train loss 0.3497525 +| epoch 4 | 3491/ 5600 batches | train loss 0.3474343 +| epoch 4 | 3495/ 5600 batches | train loss 0.4270377 +| epoch 4 | 3499/ 5600 batches | train loss 0.3753747 +| epoch 4 | 3503/ 5600 batches | train loss 0.4652417 +| epoch 4 | 3507/ 5600 batches | train loss 0.4163131 +| epoch 4 | 3511/ 5600 batches | train loss 0.4747350 +| epoch 4 | 3515/ 5600 batches | train loss 0.4907857 +| epoch 4 | 3519/ 5600 batches | train loss 0.4728831 +| epoch 4 | 3523/ 5600 batches | train loss 0.4668477 +| epoch 4 | 3527/ 5600 batches | train loss 0.4881209 +| epoch 4 | 3531/ 5600 batches | train loss 0.5093951 +| epoch 4 | 3535/ 5600 batches | train loss 0.4475259 +| epoch 4 | 3539/ 5600 batches | train loss 0.3923446 +| epoch 4 | 3543/ 5600 batches | train loss 0.4349786 +| epoch 4 | 3547/ 5600 batches | train loss 0.5019542 +| epoch 4 | 3551/ 5600 batches | train loss 0.3829837 +| epoch 4 | 3555/ 5600 batches | train loss 0.3601457 +| epoch 4 | 3559/ 5600 batches | train loss 0.4318563 +| epoch 4 | 3563/ 5600 batches | train loss 0.4611020 +| epoch 4 | 3567/ 5600 batches | train loss 0.2695860 +| epoch 4 | 3571/ 5600 batches | train loss 0.4300677 +| epoch 4 | 3575/ 5600 batches | train loss 0.4565103 +| epoch 4 | 3579/ 5600 batches | train loss 0.4041165 +| epoch 4 | 3583/ 5600 batches | train loss 0.3567711 +| epoch 4 | 3587/ 5600 batches | train loss 0.5127366 +| epoch 4 | 3591/ 5600 batches | train loss 0.4105094 +| epoch 4 | 3595/ 5600 batches | train loss 0.4391699 +| epoch 4 | 3599/ 5600 batches | train loss 0.3973310 +| epoch 4 | 3603/ 5600 batches | train loss 0.4305287 +| epoch 4 | 3607/ 5600 batches | train loss 0.4760387 +| epoch 4 | 3611/ 5600 batches | train loss 0.4182495 +| epoch 4 | 3615/ 5600 batches | train loss 0.3668259 +| epoch 4 | 3619/ 5600 batches | train loss 0.4626232 +| epoch 4 | 3623/ 5600 batches | train loss 0.3610384 +| epoch 4 | 3627/ 5600 batches | train loss 0.3991940 +| epoch 4 | 3631/ 5600 batches | train loss 0.4089911 +| epoch 4 | 3635/ 5600 batches | train loss 0.4053439 +| epoch 4 | 3639/ 5600 batches | train loss 0.4203717 +| epoch 4 | 3643/ 5600 batches | train loss 0.3903725 +| epoch 4 | 3647/ 5600 batches | train loss 0.4863410 +| epoch 4 | 3651/ 5600 batches | train loss 0.4103897 +| epoch 4 | 3655/ 5600 batches | train loss 0.4617995 +| epoch 4 | 3659/ 5600 batches | train loss 0.4510909 +| epoch 4 | 3663/ 5600 batches | train loss 0.3850649 +| epoch 4 | 3667/ 5600 batches | train loss 0.3806673 +| epoch 4 | 3671/ 5600 batches | train loss 0.4580359 +| epoch 4 | 3675/ 5600 batches | train loss 0.4904692 +| epoch 4 | 3679/ 5600 batches | train loss 0.4742209 +| epoch 4 | 3683/ 5600 batches | train loss 0.4694113 +| epoch 4 | 3687/ 5600 batches | train loss 0.4818651 +| epoch 4 | 3691/ 5600 batches | train loss 0.3939526 +| epoch 4 | 3695/ 5600 batches | train loss 0.3545073 +| epoch 4 | 3699/ 5600 batches | train loss 0.4170758 +| epoch 4 | 3703/ 5600 batches | train loss 0.3895988 +| epoch 4 | 3707/ 5600 batches | train loss 0.5287820 +| epoch 4 | 3711/ 5600 batches | train loss 0.4384142 +| epoch 4 | 3715/ 5600 batches | train loss 0.4740017 +| epoch 4 | 3719/ 5600 batches | train loss 0.2968711 +| epoch 4 | 3723/ 5600 batches | train loss 0.3997046 +| epoch 4 | 3727/ 5600 batches | train loss 0.4771065 +| epoch 4 | 3731/ 5600 batches | train loss 0.4910973 +| epoch 4 | 3735/ 5600 batches | train loss 0.4994076 +| epoch 4 | 3739/ 5600 batches | train loss 0.4009411 +| epoch 4 | 3743/ 5600 batches | train loss 0.3847963 +| epoch 4 | 3747/ 5600 batches | train loss 0.4419957 +| epoch 4 | 3751/ 5600 batches | train loss 0.5043742 +| epoch 4 | 3755/ 5600 batches | train loss 0.4148808 +| epoch 4 | 3759/ 5600 batches | train loss 0.3775578 +| epoch 4 | 3763/ 5600 batches | train loss 0.5711791 +| epoch 4 | 3767/ 5600 batches | train loss 0.4068530 +| epoch 4 | 3771/ 5600 batches | train loss 0.4096424 +| epoch 4 | 3775/ 5600 batches | train loss 0.4121100 +| epoch 4 | 3779/ 5600 batches | train loss 0.4985078 +| epoch 4 | 3783/ 5600 batches | train loss 0.5244951 +| epoch 4 | 3787/ 5600 batches | train loss 0.5124431 +| epoch 4 | 3791/ 5600 batches | train loss 0.3862817 +| epoch 4 | 3795/ 5600 batches | train loss 0.3988783 +| epoch 4 | 3799/ 5600 batches | train loss 0.3839214 +| epoch 4 | 3803/ 5600 batches | train loss 0.4601246 +| epoch 4 | 3807/ 5600 batches | train loss 0.3324512 +| epoch 4 | 3811/ 5600 batches | train loss 0.4099784 +| epoch 4 | 3815/ 5600 batches | train loss 0.4071229 +| epoch 4 | 3819/ 5600 batches | train loss 0.4595898 +| epoch 4 | 3823/ 5600 batches | train loss 0.4795383 +| epoch 4 | 3827/ 5600 batches | train loss 0.4379557 +| epoch 4 | 3831/ 5600 batches | train loss 0.3958082 +| epoch 4 | 3835/ 5600 batches | train loss 0.4405653 +| epoch 4 | 3839/ 5600 batches | train loss 0.4867929 +| epoch 4 | 3843/ 5600 batches | train loss 0.4356416 +| epoch 4 | 3847/ 5600 batches | train loss 0.4800732 +| epoch 4 | 3851/ 5600 batches | train loss 0.4095980 +| epoch 4 | 3855/ 5600 batches | train loss 0.4350404 +| epoch 4 | 3859/ 5600 batches | train loss 0.3745304 +| epoch 4 | 3863/ 5600 batches | train loss 0.4614952 +| epoch 4 | 3867/ 5600 batches | train loss 0.4979000 +| epoch 4 | 3871/ 5600 batches | train loss 0.4113087 +| epoch 4 | 3875/ 5600 batches | train loss 0.4596787 +| epoch 4 | 3879/ 5600 batches | train loss 0.3673879 +| epoch 4 | 3883/ 5600 batches | train loss 0.3319666 +| epoch 4 | 3887/ 5600 batches | train loss 0.4313426 +| epoch 4 | 3891/ 5600 batches | train loss 0.4634266 +| epoch 4 | 3895/ 5600 batches | train loss 0.4741969 +| epoch 4 | 3899/ 5600 batches | train loss 0.4060901 +| epoch 4 | 3903/ 5600 batches | train loss 0.3376118 +| epoch 4 | 3907/ 5600 batches | train loss 0.4043474 +| epoch 4 | 3911/ 5600 batches | train loss 0.3586575 +| epoch 4 | 3915/ 5600 batches | train loss 0.4642871 +| epoch 4 | 3919/ 5600 batches | train loss 0.4343347 +| epoch 4 | 3923/ 5600 batches | train loss 0.4096230 +| epoch 4 | 3927/ 5600 batches | train loss 0.2732776 +| epoch 4 | 3931/ 5600 batches | train loss 0.4659222 +| epoch 4 | 3935/ 5600 batches | train loss 0.3543547 +| epoch 4 | 3939/ 5600 batches | train loss 0.4087448 +| epoch 4 | 3943/ 5600 batches | train loss 0.4532589 +| epoch 4 | 3947/ 5600 batches | train loss 0.3887507 +| epoch 4 | 3951/ 5600 batches | train loss 0.4256208 +| epoch 4 | 3955/ 5600 batches | train loss 0.3850413 +| epoch 4 | 3959/ 5600 batches | train loss 0.4262847 +| epoch 4 | 3963/ 5600 batches | train loss 0.3876497 +| epoch 4 | 3967/ 5600 batches | train loss 0.5219135 +| epoch 4 | 3971/ 5600 batches | train loss 0.3808102 +| epoch 4 | 3975/ 5600 batches | train loss 0.3066245 +| epoch 4 | 3979/ 5600 batches | train loss 0.4343723 +| epoch 4 | 3983/ 5600 batches | train loss 0.3180597 +| epoch 4 | 3987/ 5600 batches | train loss 0.4619649 +| epoch 4 | 3991/ 5600 batches | train loss 0.4248462 +| epoch 4 | 3995/ 5600 batches | train loss 0.4472431 +| epoch 4 | 3999/ 5600 batches | train loss 0.3783735 +| epoch 4 | 4003/ 5600 batches | train loss 0.4387286 +| epoch 4 | 4007/ 5600 batches | train loss 0.3344719 +| epoch 4 | 4011/ 5600 batches | train loss 0.4770758 +| epoch 4 | 4015/ 5600 batches | train loss 0.4468794 +| epoch 4 | 4019/ 5600 batches | train loss 0.4899974 +| epoch 4 | 4023/ 5600 batches | train loss 0.4070131 +| epoch 4 | 4027/ 5600 batches | train loss 0.4802916 +| epoch 4 | 4031/ 5600 batches | train loss 0.4021658 +| epoch 4 | 4035/ 5600 batches | train loss 0.3948797 +| epoch 4 | 4039/ 5600 batches | train loss 0.5447859 +| epoch 4 | 4043/ 5600 batches | train loss 0.4156949 +| epoch 4 | 4047/ 5600 batches | train loss 0.4536642 +| epoch 4 | 4051/ 5600 batches | train loss 0.4550984 +| epoch 4 | 4055/ 5600 batches | train loss 0.4231889 +| epoch 4 | 4059/ 5600 batches | train loss 0.3755248 +| epoch 4 | 4063/ 5600 batches | train loss 0.4170315 +| epoch 4 | 4067/ 5600 batches | train loss 0.4208147 +| epoch 4 | 4071/ 5600 batches | train loss 0.5084841 +| epoch 4 | 4075/ 5600 batches | train loss 0.4073753 +| epoch 4 | 4079/ 5600 batches | train loss 0.4669021 +| epoch 4 | 4083/ 5600 batches | train loss 0.3452866 +| epoch 4 | 4087/ 5600 batches | train loss 0.4330478 +| epoch 4 | 4091/ 5600 batches | train loss 0.3468433 +| epoch 4 | 4095/ 5600 batches | train loss 0.3579243 +| epoch 4 | 4099/ 5600 batches | train loss 0.4680027 +| epoch 4 | 4103/ 5600 batches | train loss 0.3969160 +| epoch 4 | 4107/ 5600 batches | train loss 0.4496208 +| epoch 4 | 4111/ 5600 batches | train loss 0.4512132 +| epoch 4 | 4115/ 5600 batches | train loss 0.3934203 +| epoch 4 | 4119/ 5600 batches | train loss 0.4246808 +| epoch 4 | 4123/ 5600 batches | train loss 0.3987638 +| epoch 4 | 4127/ 5600 batches | train loss 0.4241516 +| epoch 4 | 4131/ 5600 batches | train loss 0.3752946 +| epoch 4 | 4135/ 5600 batches | train loss 0.4206845 +| epoch 4 | 4139/ 5600 batches | train loss 0.3767198 +| epoch 4 | 4143/ 5600 batches | train loss 0.4611220 +| epoch 4 | 4147/ 5600 batches | train loss 0.4449950 +| epoch 4 | 4151/ 5600 batches | train loss 0.5127202 +| epoch 4 | 4155/ 5600 batches | train loss 0.4475800 +| epoch 4 | 4159/ 5600 batches | train loss 0.4050618 +| epoch 4 | 4163/ 5600 batches | train loss 0.4295291 +| epoch 4 | 4167/ 5600 batches | train loss 0.3842709 +| epoch 4 | 4171/ 5600 batches | train loss 0.4163996 +| epoch 4 | 4175/ 5600 batches | train loss 0.4858613 +| epoch 4 | 4179/ 5600 batches | train loss 0.3119432 +| epoch 4 | 4183/ 5600 batches | train loss 0.4795676 +| epoch 4 | 4187/ 5600 batches | train loss 0.3605725 +| epoch 4 | 4191/ 5600 batches | train loss 0.3725128 +| epoch 4 | 4195/ 5600 batches | train loss 0.4896530 +| epoch 4 | 4199/ 5600 batches | train loss 0.4774184 +| epoch 4 | 4203/ 5600 batches | train loss 0.4044757 +| epoch 4 | 4207/ 5600 batches | train loss 0.4601434 +| epoch 4 | 4211/ 5600 batches | train loss 0.4601713 +| epoch 4 | 4215/ 5600 batches | train loss 0.3947807 +| epoch 4 | 4219/ 5600 batches | train loss 0.4012511 +| epoch 4 | 4223/ 5600 batches | train loss 0.4730980 +| epoch 4 | 4227/ 5600 batches | train loss 0.4560441 +| epoch 4 | 4231/ 5600 batches | train loss 0.4550771 +| epoch 4 | 4235/ 5600 batches | train loss 0.4198545 +| epoch 4 | 4239/ 5600 batches | train loss 0.4897421 +| epoch 4 | 4243/ 5600 batches | train loss 0.3255232 +| epoch 4 | 4247/ 5600 batches | train loss 0.4572387 +| epoch 4 | 4251/ 5600 batches | train loss 0.4152230 +| epoch 4 | 4255/ 5600 batches | train loss 0.4166420 +| epoch 4 | 4259/ 5600 batches | train loss 0.3514413 +| epoch 4 | 4263/ 5600 batches | train loss 0.5353163 +| epoch 4 | 4267/ 5600 batches | train loss 0.3500451 +| epoch 4 | 4271/ 5600 batches | train loss 0.4579270 +| epoch 4 | 4275/ 5600 batches | train loss 0.3923359 +| epoch 4 | 4279/ 5600 batches | train loss 0.4246123 +| epoch 4 | 4283/ 5600 batches | train loss 0.3594269 +| epoch 4 | 4287/ 5600 batches | train loss 0.4127828 +| epoch 4 | 4291/ 5600 batches | train loss 0.4642165 +| epoch 4 | 4295/ 5600 batches | train loss 0.4335732 +| epoch 4 | 4299/ 5600 batches | train loss 0.4706733 +| epoch 4 | 4303/ 5600 batches | train loss 0.3995056 +| epoch 4 | 4307/ 5600 batches | train loss 0.4360029 +| epoch 4 | 4311/ 5600 batches | train loss 0.4435876 +| epoch 4 | 4315/ 5600 batches | train loss 0.4080379 +| epoch 4 | 4319/ 5600 batches | train loss 0.5013145 +| epoch 4 | 4323/ 5600 batches | train loss 0.3783262 +| epoch 4 | 4327/ 5600 batches | train loss 0.5068430 +| epoch 4 | 4331/ 5600 batches | train loss 0.5243704 +| epoch 4 | 4335/ 5600 batches | train loss 0.4726872 +| epoch 4 | 4339/ 5600 batches | train loss 0.4627860 +| epoch 4 | 4343/ 5600 batches | train loss 0.3889933 +| epoch 4 | 4347/ 5600 batches | train loss 0.4654405 +| epoch 4 | 4351/ 5600 batches | train loss 0.4162496 +| epoch 4 | 4355/ 5600 batches | train loss 0.4419088 +| epoch 4 | 4359/ 5600 batches | train loss 0.3745251 +| epoch 4 | 4363/ 5600 batches | train loss 0.4453077 +| epoch 4 | 4367/ 5600 batches | train loss 0.5444402 +| epoch 4 | 4371/ 5600 batches | train loss 0.3843842 +| epoch 4 | 4375/ 5600 batches | train loss 0.4581083 +| epoch 4 | 4379/ 5600 batches | train loss 0.3806873 +| epoch 4 | 4383/ 5600 batches | train loss 0.4005804 +| epoch 4 | 4387/ 5600 batches | train loss 0.4402133 +| epoch 4 | 4391/ 5600 batches | train loss 0.4606542 +| epoch 4 | 4395/ 5600 batches | train loss 0.5087353 +| epoch 4 | 4399/ 5600 batches | train loss 0.3898736 +| epoch 4 | 4403/ 5600 batches | train loss 0.4457454 +| epoch 4 | 4407/ 5600 batches | train loss 0.4299732 +| epoch 4 | 4411/ 5600 batches | train loss 0.4011942 +| epoch 4 | 4415/ 5600 batches | train loss 0.3976706 +| epoch 4 | 4419/ 5600 batches | train loss 0.4014452 +| epoch 4 | 4423/ 5600 batches | train loss 0.4371983 +| epoch 4 | 4427/ 5600 batches | train loss 0.3802482 +| epoch 4 | 4431/ 5600 batches | train loss 0.5319477 +| epoch 4 | 4435/ 5600 batches | train loss 0.3632558 +| epoch 4 | 4439/ 5600 batches | train loss 0.4536047 +| epoch 4 | 4443/ 5600 batches | train loss 0.4563693 +| epoch 4 | 4447/ 5600 batches | train loss 0.4478151 +| epoch 4 | 4451/ 5600 batches | train loss 0.3576916 +| epoch 4 | 4455/ 5600 batches | train loss 0.4882609 +| epoch 4 | 4459/ 5600 batches | train loss 0.4026882 +| epoch 4 | 4463/ 5600 batches | train loss 0.3445435 +| epoch 4 | 4467/ 5600 batches | train loss 0.5127802 +| epoch 4 | 4471/ 5600 batches | train loss 0.4470795 +| epoch 4 | 4475/ 5600 batches | train loss 0.4651869 +| epoch 4 | 4479/ 5600 batches | train loss 0.4194487 +| epoch 4 | 4483/ 5600 batches | train loss 0.3724076 +| epoch 4 | 4487/ 5600 batches | train loss 0.4140746 +| epoch 4 | 4491/ 5600 batches | train loss 0.3970068 +| epoch 4 | 4495/ 5600 batches | train loss 0.3714907 +| epoch 4 | 4499/ 5600 batches | train loss 0.4568632 +| epoch 4 | 4503/ 5600 batches | train loss 0.4355394 +| epoch 4 | 4507/ 5600 batches | train loss 0.4984474 +| epoch 4 | 4511/ 5600 batches | train loss 0.3826451 +| epoch 4 | 4515/ 5600 batches | train loss 0.4070998 +| epoch 4 | 4519/ 5600 batches | train loss 0.4615196 +| epoch 4 | 4523/ 5600 batches | train loss 0.4000794 +| epoch 4 | 4527/ 5600 batches | train loss 0.3677313 +| epoch 4 | 4531/ 5600 batches | train loss 0.3905203 +| epoch 4 | 4535/ 5600 batches | train loss 0.3397551 +| epoch 4 | 4539/ 5600 batches | train loss 0.4862593 +| epoch 4 | 4543/ 5600 batches | train loss 0.4912583 +| epoch 4 | 4547/ 5600 batches | train loss 0.4513348 +| epoch 4 | 4551/ 5600 batches | train loss 0.4913911 +| epoch 4 | 4555/ 5600 batches | train loss 0.3618457 +| epoch 4 | 4559/ 5600 batches | train loss 0.3426997 +| epoch 4 | 4563/ 5600 batches | train loss 0.3878068 +| epoch 4 | 4567/ 5600 batches | train loss 0.3443245 +| epoch 4 | 4571/ 5600 batches | train loss 0.4127563 +| epoch 4 | 4575/ 5600 batches | train loss 0.4270853 +| epoch 4 | 4579/ 5600 batches | train loss 0.4237815 +| epoch 4 | 4583/ 5600 batches | train loss 0.4690081 +| epoch 4 | 4587/ 5600 batches | train loss 0.4197300 +| epoch 4 | 4591/ 5600 batches | train loss 0.4516849 +| epoch 4 | 4595/ 5600 batches | train loss 0.3507183 +| epoch 4 | 4599/ 5600 batches | train loss 0.3841026 +| epoch 4 | 4603/ 5600 batches | train loss 0.4903218 +| epoch 4 | 4607/ 5600 batches | train loss 0.4465202 +| epoch 4 | 4611/ 5600 batches | train loss 0.4643365 +| epoch 4 | 4615/ 5600 batches | train loss 0.4089835 +| epoch 4 | 4619/ 5600 batches | train loss 0.4405659 +| epoch 4 | 4623/ 5600 batches | train loss 0.3874714 +| epoch 4 | 4627/ 5600 batches | train loss 0.3750444 +| epoch 4 | 4631/ 5600 batches | train loss 0.4534487 +| epoch 4 | 4635/ 5600 batches | train loss 0.4195024 +| epoch 4 | 4639/ 5600 batches | train loss 0.4015315 +| epoch 4 | 4643/ 5600 batches | train loss 0.4839002 +| epoch 4 | 4647/ 5600 batches | train loss 0.3280555 +| epoch 4 | 4651/ 5600 batches | train loss 0.4634613 +| epoch 4 | 4655/ 5600 batches | train loss 0.4444900 +| epoch 4 | 4659/ 5600 batches | train loss 0.4894681 +| epoch 4 | 4663/ 5600 batches | train loss 0.4617490 +| epoch 4 | 4667/ 5600 batches | train loss 0.3887767 +| epoch 4 | 4671/ 5600 batches | train loss 0.3899638 +| epoch 4 | 4675/ 5600 batches | train loss 0.4517158 +| epoch 4 | 4679/ 5600 batches | train loss 0.4262219 +| epoch 4 | 4683/ 5600 batches | train loss 0.4353655 +| epoch 4 | 4687/ 5600 batches | train loss 0.3495166 +| epoch 4 | 4691/ 5600 batches | train loss 0.4495811 +| epoch 4 | 4695/ 5600 batches | train loss 0.4147326 +| epoch 4 | 4699/ 5600 batches | train loss 0.2569980 +| epoch 4 | 4703/ 5600 batches | train loss 0.3300332 +| epoch 4 | 4707/ 5600 batches | train loss 0.4223614 +| epoch 4 | 4711/ 5600 batches | train loss 0.3898373 +| epoch 4 | 4715/ 5600 batches | train loss 0.4232426 +| epoch 4 | 4719/ 5600 batches | train loss 0.4013793 +| epoch 4 | 4723/ 5600 batches | train loss 0.3764672 +| epoch 4 | 4727/ 5600 batches | train loss 0.4931320 +| epoch 4 | 4731/ 5600 batches | train loss 0.3993638 +| epoch 4 | 4735/ 5600 batches | train loss 0.4193130 +| epoch 4 | 4739/ 5600 batches | train loss 0.4272718 +| epoch 4 | 4743/ 5600 batches | train loss 0.3818889 +| epoch 4 | 4747/ 5600 batches | train loss 0.5058350 +| epoch 4 | 4751/ 5600 batches | train loss 0.4846981 +| epoch 4 | 4755/ 5600 batches | train loss 0.3706764 +| epoch 4 | 4759/ 5600 batches | train loss 0.3868483 +| epoch 4 | 4763/ 5600 batches | train loss 0.3399790 +| epoch 4 | 4767/ 5600 batches | train loss 0.4537531 +| epoch 4 | 4771/ 5600 batches | train loss 0.4091754 +| epoch 4 | 4775/ 5600 batches | train loss 0.4485268 +| epoch 4 | 4779/ 5600 batches | train loss 0.4502985 +| epoch 4 | 4783/ 5600 batches | train loss 0.5256407 +| epoch 4 | 4787/ 5600 batches | train loss 0.4068757 +| epoch 4 | 4791/ 5600 batches | train loss 0.4208389 +| epoch 4 | 4795/ 5600 batches | train loss 0.4208902 +| epoch 4 | 4799/ 5600 batches | train loss 0.4745704 +| epoch 4 | 4803/ 5600 batches | train loss 0.4328232 +| epoch 4 | 4807/ 5600 batches | train loss 0.4678311 +| epoch 4 | 4811/ 5600 batches | train loss 0.4577894 +| epoch 4 | 4815/ 5600 batches | train loss 0.4443228 +| epoch 4 | 4819/ 5600 batches | train loss 0.4937132 +| epoch 4 | 4823/ 5600 batches | train loss 0.4382493 +| epoch 4 | 4827/ 5600 batches | train loss 0.5234101 +| epoch 4 | 4831/ 5600 batches | train loss 0.3233472 +| epoch 4 | 4835/ 5600 batches | train loss 0.4459057 +| epoch 4 | 4839/ 5600 batches | train loss 0.3656731 +| epoch 4 | 4843/ 5600 batches | train loss 0.4541332 +| epoch 4 | 4847/ 5600 batches | train loss 0.3905385 +| epoch 4 | 4851/ 5600 batches | train loss 0.4452827 +| epoch 4 | 4855/ 5600 batches | train loss 0.4380552 +| epoch 4 | 4859/ 5600 batches | train loss 0.3572164 +| epoch 4 | 4863/ 5600 batches | train loss 0.2028315 +| epoch 4 | 4867/ 5600 batches | train loss 0.4369894 +| epoch 4 | 4871/ 5600 batches | train loss 0.4069335 +| epoch 4 | 4875/ 5600 batches | train loss 0.3207238 +| epoch 4 | 4879/ 5600 batches | train loss 0.4802381 +| epoch 4 | 4883/ 5600 batches | train loss 0.3920779 +| epoch 4 | 4887/ 5600 batches | train loss 0.3266039 +| epoch 4 | 4891/ 5600 batches | train loss 0.4173110 +| epoch 4 | 4895/ 5600 batches | train loss 0.4773352 +| epoch 4 | 4899/ 5600 batches | train loss 0.3429854 +| epoch 4 | 4903/ 5600 batches | train loss 0.4457224 +| epoch 4 | 4907/ 5600 batches | train loss 0.3531899 +| epoch 4 | 4911/ 5600 batches | train loss 0.4058663 +| epoch 4 | 4915/ 5600 batches | train loss 0.3789861 +| epoch 4 | 4919/ 5600 batches | train loss 0.5083046 +| epoch 4 | 4923/ 5600 batches | train loss 0.5626650 +| epoch 4 | 4927/ 5600 batches | train loss 0.4691098 +| epoch 4 | 4931/ 5600 batches | train loss 0.4676238 +| epoch 4 | 4935/ 5600 batches | train loss 0.3923465 +| epoch 4 | 4939/ 5600 batches | train loss 0.2753151 +| epoch 4 | 4943/ 5600 batches | train loss 0.4150183 +| epoch 4 | 4947/ 5600 batches | train loss 0.4754308 +| epoch 4 | 4951/ 5600 batches | train loss 0.3551332 +| epoch 4 | 4955/ 5600 batches | train loss 0.3309497 +| epoch 4 | 4959/ 5600 batches | train loss 0.4696305 +| epoch 4 | 4963/ 5600 batches | train loss 0.4081055 +| epoch 4 | 4967/ 5600 batches | train loss 0.3772191 +| epoch 4 | 4971/ 5600 batches | train loss 0.3764076 +| epoch 4 | 4975/ 5600 batches | train loss 0.3427544 +| epoch 4 | 4979/ 5600 batches | train loss 0.4186140 +| epoch 4 | 4983/ 5600 batches | train loss 0.4261135 +| epoch 4 | 4987/ 5600 batches | train loss 0.4761456 +| epoch 4 | 4991/ 5600 batches | train loss 0.3495618 +| epoch 4 | 4995/ 5600 batches | train loss 0.4338221 +| epoch 4 | 4999/ 5600 batches | train loss 0.4531689 +| epoch 4 | 5003/ 5600 batches | train loss 0.4646099 +| epoch 4 | 5007/ 5600 batches | train loss 0.4784964 +| epoch 4 | 5011/ 5600 batches | train loss 0.4912340 +| epoch 4 | 5015/ 5600 batches | train loss 0.3397743 +| epoch 4 | 5019/ 5600 batches | train loss 0.4186019 +| epoch 4 | 5023/ 5600 batches | train loss 0.3865629 +| epoch 4 | 5027/ 5600 batches | train loss 0.3664651 +| epoch 4 | 5031/ 5600 batches | train loss 0.3432852 +| epoch 4 | 5035/ 5600 batches | train loss 0.5016266 +| epoch 4 | 5039/ 5600 batches | train loss 0.4558254 +| epoch 4 | 5043/ 5600 batches | train loss 0.4566185 +| epoch 4 | 5047/ 5600 batches | train loss 0.4156425 +| epoch 4 | 5051/ 5600 batches | train loss 0.4268866 +| epoch 4 | 5055/ 5600 batches | train loss 0.4711205 +| epoch 4 | 5059/ 5600 batches | train loss 0.4942547 +| epoch 4 | 5063/ 5600 batches | train loss 0.4917006 +| epoch 4 | 5067/ 5600 batches | train loss 0.3839180 +| epoch 4 | 5071/ 5600 batches | train loss 0.4214194 +| epoch 4 | 5075/ 5600 batches | train loss 0.4108232 +| epoch 4 | 5079/ 5600 batches | train loss 0.4210067 +| epoch 4 | 5083/ 5600 batches | train loss 0.4488967 +| epoch 4 | 5087/ 5600 batches | train loss 0.3741376 +| epoch 4 | 5091/ 5600 batches | train loss 0.3711369 +| epoch 4 | 5095/ 5600 batches | train loss 0.3515655 +| epoch 4 | 5099/ 5600 batches | train loss 0.4789954 +| epoch 4 | 5103/ 5600 batches | train loss 0.3788283 +| epoch 4 | 5107/ 5600 batches | train loss 0.4401448 +| epoch 4 | 5111/ 5600 batches | train loss 0.4033214 +| epoch 4 | 5115/ 5600 batches | train loss 0.4705900 +| epoch 4 | 5119/ 5600 batches | train loss 0.3495463 +| epoch 4 | 5123/ 5600 batches | train loss 0.5333876 +| epoch 4 | 5127/ 5600 batches | train loss 0.3830968 +| epoch 4 | 5131/ 5600 batches | train loss 0.4187909 +| epoch 4 | 5135/ 5600 batches | train loss 0.3976463 +| epoch 4 | 5139/ 5600 batches | train loss 0.4360785 +| epoch 4 | 5143/ 5600 batches | train loss 0.3495827 +| epoch 4 | 5147/ 5600 batches | train loss 0.4856758 +| epoch 4 | 5151/ 5600 batches | train loss 0.4075162 +| epoch 4 | 5155/ 5600 batches | train loss 0.4898684 +| epoch 4 | 5159/ 5600 batches | train loss 0.4261782 +| epoch 4 | 5163/ 5600 batches | train loss 0.4002273 +| epoch 4 | 5167/ 5600 batches | train loss 0.4940185 +| epoch 4 | 5171/ 5600 batches | train loss 0.4353552 +| epoch 4 | 5175/ 5600 batches | train loss 0.4270639 +| epoch 4 | 5179/ 5600 batches | train loss 0.4684217 +| epoch 4 | 5183/ 5600 batches | train loss 0.4345016 +| epoch 4 | 5187/ 5600 batches | train loss 0.4016841 +| epoch 4 | 5191/ 5600 batches | train loss 0.4189940 +| epoch 4 | 5195/ 5600 batches | train loss 0.4619498 +| epoch 4 | 5199/ 5600 batches | train loss 0.4824176 +| epoch 4 | 5203/ 5600 batches | train loss 0.4281877 +| epoch 4 | 5207/ 5600 batches | train loss 0.4242949 +| epoch 4 | 5211/ 5600 batches | train loss 0.4044057 +| epoch 4 | 5215/ 5600 batches | train loss 0.4483515 +| epoch 4 | 5219/ 5600 batches | train loss 0.3490138 +| epoch 4 | 5223/ 5600 batches | train loss 0.4334035 +| epoch 4 | 5227/ 5600 batches | train loss 0.4285215 +| epoch 4 | 5231/ 5600 batches | train loss 0.4368865 +| epoch 4 | 5235/ 5600 batches | train loss 0.4239761 +| epoch 4 | 5239/ 5600 batches | train loss 0.4416373 +| epoch 4 | 5243/ 5600 batches | train loss 0.4609623 +| epoch 4 | 5247/ 5600 batches | train loss 0.4311654 +| epoch 4 | 5251/ 5600 batches | train loss 0.4136057 +| epoch 4 | 5255/ 5600 batches | train loss 0.4829032 +| epoch 4 | 5259/ 5600 batches | train loss 0.4198115 +| epoch 4 | 5263/ 5600 batches | train loss 0.5047043 +| epoch 4 | 5267/ 5600 batches | train loss 0.4495648 +| epoch 4 | 5271/ 5600 batches | train loss 0.4061282 +| epoch 4 | 5275/ 5600 batches | train loss 0.5426581 +| epoch 4 | 5279/ 5600 batches | train loss 0.3965868 +| epoch 4 | 5283/ 5600 batches | train loss 0.3483573 +| epoch 4 | 5287/ 5600 batches | train loss 0.4279841 +| epoch 4 | 5291/ 5600 batches | train loss 0.4341898 +| epoch 4 | 5295/ 5600 batches | train loss 0.4482865 +| epoch 4 | 5299/ 5600 batches | train loss 0.4177467 +| epoch 4 | 5303/ 5600 batches | train loss 0.4226302 +| epoch 4 | 5307/ 5600 batches | train loss 0.4502456 +| epoch 4 | 5311/ 5600 batches | train loss 0.4517104 +| epoch 4 | 5315/ 5600 batches | train loss 0.4638633 +| epoch 4 | 5319/ 5600 batches | train loss 0.4242934 +| epoch 4 | 5323/ 5600 batches | train loss 0.3974489 +| epoch 4 | 5327/ 5600 batches | train loss 0.4528676 +| epoch 4 | 5331/ 5600 batches | train loss 0.3222722 +| epoch 4 | 5335/ 5600 batches | train loss 0.3623275 +| epoch 4 | 5339/ 5600 batches | train loss 0.4084020 +| epoch 4 | 5343/ 5600 batches | train loss 0.5078623 +| epoch 4 | 5347/ 5600 batches | train loss 0.3767483 +| epoch 4 | 5351/ 5600 batches | train loss 0.4216591 +| epoch 4 | 5355/ 5600 batches | train loss 0.4010594 +| epoch 4 | 5359/ 5600 batches | train loss 0.4705051 +| epoch 4 | 5363/ 5600 batches | train loss 0.4322278 +| epoch 4 | 5367/ 5600 batches | train loss 0.3908370 +| epoch 4 | 5371/ 5600 batches | train loss 0.3437360 +| epoch 4 | 5375/ 5600 batches | train loss 0.4409982 +| epoch 4 | 5379/ 5600 batches | train loss 0.3996776 +| epoch 4 | 5383/ 5600 batches | train loss 0.4667052 +| epoch 4 | 5387/ 5600 batches | train loss 0.3869478 +| epoch 4 | 5391/ 5600 batches | train loss 0.4275803 +| epoch 4 | 5395/ 5600 batches | train loss 0.4450536 +| epoch 4 | 5399/ 5600 batches | train loss 0.4506323 +| epoch 4 | 5403/ 5600 batches | train loss 0.3808880 +| epoch 4 | 5407/ 5600 batches | train loss 0.5324184 +| epoch 4 | 5411/ 5600 batches | train loss 0.4882976 +| epoch 4 | 5415/ 5600 batches | train loss 0.4094397 +| epoch 4 | 5419/ 5600 batches | train loss 0.4017540 +| epoch 4 | 5423/ 5600 batches | train loss 0.4612086 +| epoch 4 | 5427/ 5600 batches | train loss 0.4349952 +| epoch 4 | 5431/ 5600 batches | train loss 0.4153213 +| epoch 4 | 5435/ 5600 batches | train loss 0.4182113 +| epoch 4 | 5439/ 5600 batches | train loss 0.4019443 +| epoch 4 | 5443/ 5600 batches | train loss 0.4334211 +| epoch 4 | 5447/ 5600 batches | train loss 0.4161104 +| epoch 4 | 5451/ 5600 batches | train loss 0.4263108 +| epoch 4 | 5455/ 5600 batches | train loss 0.4733092 +| epoch 4 | 5459/ 5600 batches | train loss 0.3774169 +| epoch 4 | 5463/ 5600 batches | train loss 0.4070619 +| epoch 4 | 5467/ 5600 batches | train loss 0.3859914 +| epoch 4 | 5471/ 5600 batches | train loss 0.4984971 +| epoch 4 | 5475/ 5600 batches | train loss 0.4159802 +| epoch 4 | 5479/ 5600 batches | train loss 0.3472490 +| epoch 4 | 5483/ 5600 batches | train loss 0.2602067 +| epoch 4 | 5487/ 5600 batches | train loss 0.4227171 +| epoch 4 | 5491/ 5600 batches | train loss 0.3287062 +| epoch 4 | 5495/ 5600 batches | train loss 0.3112259 +| epoch 4 | 5499/ 5600 batches | train loss 0.4760274 +| epoch 4 | 5503/ 5600 batches | train loss 0.4426559 +| epoch 4 | 5507/ 5600 batches | train loss 0.3275038 +| epoch 4 | 5511/ 5600 batches | train loss 0.3895199 +| epoch 4 | 5515/ 5600 batches | train loss 0.4717566 +| epoch 4 | 5519/ 5600 batches | train loss 0.4883730 +| epoch 4 | 5523/ 5600 batches | train loss 0.3597309 +| epoch 4 | 5527/ 5600 batches | train loss 0.4684640 +| epoch 4 | 5531/ 5600 batches | train loss 0.4612840 +| epoch 4 | 5535/ 5600 batches | train loss 0.3543081 +| epoch 4 | 5539/ 5600 batches | train loss 0.4919890 +| epoch 4 | 5543/ 5600 batches | train loss 0.3611801 +| epoch 4 | 5547/ 5600 batches | train loss 0.4965935 +| epoch 4 | 5551/ 5600 batches | train loss 0.3438227 +| epoch 4 | 5555/ 5600 batches | train loss 0.4352090 +| epoch 4 | 5559/ 5600 batches | train loss 0.3790570 +| epoch 4 | 5563/ 5600 batches | train loss 0.4453685 +| epoch 4 | 5567/ 5600 batches | train loss 0.4092085 +| epoch 4 | 5571/ 5600 batches | train loss 0.4344825 +| epoch 4 | 5575/ 5600 batches | train loss 0.3753543 +| epoch 4 | 5579/ 5600 batches | train loss 0.3969894 +| epoch 4 | 5583/ 5600 batches | train loss 0.3942000 +| epoch 4 | 5587/ 5600 batches | train loss 0.3705792 +| epoch 4 | 5591/ 5600 batches | train loss 0.4277799 +| epoch 4 | 5595/ 5600 batches | train loss 0.4228869 +| epoch 4 | 5599/ 5600 batches | train loss 0.4254353 +-------------------------------------------------------------------------------- +| epoch 4 | 3/ 5600 batches | test loss 0.5165914 +| epoch 4 | 7/ 5600 batches | test loss 0.4159205 +| epoch 4 | 11/ 5600 batches | test loss 0.4224656 +| epoch 4 | 15/ 5600 batches | test loss 0.3542475 +| epoch 4 | 19/ 5600 batches | test loss 0.3786939 +| epoch 4 | 23/ 5600 batches | test loss 0.4173783 +| epoch 4 | 27/ 5600 batches | test loss 0.4129690 +| epoch 4 | 31/ 5600 batches | test loss 0.4640586 +| epoch 4 | 35/ 5600 batches | test loss 0.4407847 +| epoch 4 | 39/ 5600 batches | test loss 0.4773883 +| epoch 4 | 43/ 5600 batches | test loss 0.4026064 +| epoch 4 | 47/ 5600 batches | test loss 0.3916979 +| epoch 4 | 51/ 5600 batches | test loss 0.3943694 +| epoch 4 | 55/ 5600 batches | test loss 0.3971351 +| epoch 4 | 59/ 5600 batches | test loss 0.5110446 +| epoch 4 | 63/ 5600 batches | test loss 0.4336498 +| epoch 4 | 67/ 5600 batches | test loss 0.4276029 +| epoch 4 | 71/ 5600 batches | test loss 0.3291350 +| epoch 4 | 75/ 5600 batches | test loss 0.4666152 +| epoch 4 | 79/ 5600 batches | test loss 0.4872098 +| epoch 4 | 83/ 5600 batches | test loss 0.5102315 +| epoch 4 | 87/ 5600 batches | test loss 0.4583023 +| epoch 4 | 91/ 5600 batches | test loss 0.5348284 +| epoch 4 | 95/ 5600 batches | test loss 0.3956492 +| epoch 4 | 99/ 5600 batches | test loss 0.3656137 +| epoch 4 | 103/ 5600 batches | test loss 0.3212606 +| epoch 4 | 107/ 5600 batches | test loss 0.3807884 +| epoch 4 | 111/ 5600 batches | test loss 0.4119689 +| epoch 4 | 115/ 5600 batches | test loss 0.4355831 +| epoch 4 | 119/ 5600 batches | test loss 0.3758251 +| epoch 4 | 123/ 5600 batches | test loss 0.4298605 +| epoch 4 | 127/ 5600 batches | test loss 0.4412967 +| epoch 4 | 131/ 5600 batches | test loss 0.4632340 +| epoch 4 | 135/ 5600 batches | test loss 0.5670024 +| epoch 4 | 139/ 5600 batches | test loss 0.3946738 +| epoch 4 | 143/ 5600 batches | test loss 0.3646989 +| epoch 4 | 147/ 5600 batches | test loss 0.4849436 +| epoch 4 | 151/ 5600 batches | test loss 0.4051852 +| epoch 4 | 155/ 5600 batches | test loss 0.5153507 +| epoch 4 | 159/ 5600 batches | test loss 0.3703719 +| epoch 4 | 163/ 5600 batches | test loss 0.3923273 +| epoch 4 | 167/ 5600 batches | test loss 0.4819953 +| epoch 4 | 171/ 5600 batches | test loss 0.4573792 +| epoch 4 | 175/ 5600 batches | test loss 0.4737141 +| epoch 4 | 179/ 5600 batches | test loss 0.3950037 +| epoch 4 | 183/ 5600 batches | test loss 0.4275668 +| epoch 4 | 187/ 5600 batches | test loss 0.3839754 +| epoch 4 | 191/ 5600 batches | test loss 0.3540651 +| epoch 4 | 195/ 5600 batches | test loss 0.4461639 +| epoch 4 | 199/ 5600 batches | test loss 0.3895285 +| epoch 4 | 203/ 5600 batches | test loss 0.3801166 +| epoch 4 | 207/ 5600 batches | test loss 0.3809010 +| epoch 4 | 211/ 5600 batches | test loss 0.4209154 +| epoch 4 | 215/ 5600 batches | test loss 0.4631692 +| epoch 4 | 219/ 5600 batches | test loss 0.4158039 +| epoch 4 | 223/ 5600 batches | test loss 0.4189997 +| epoch 4 | 227/ 5600 batches | test loss 0.4578183 +| epoch 4 | 231/ 5600 batches | test loss 0.3811213 +| epoch 4 | 235/ 5600 batches | test loss 0.4522924 +| epoch 4 | 239/ 5600 batches | test loss 0.4313683 +| epoch 4 | 243/ 5600 batches | test loss 0.4153805 +| epoch 4 | 247/ 5600 batches | test loss 0.4808919 +| epoch 4 | 251/ 5600 batches | test loss 0.4388263 +| epoch 4 | 255/ 5600 batches | test loss 0.3934895 +| epoch 4 | 259/ 5600 batches | test loss 0.4546852 +| epoch 4 | 263/ 5600 batches | test loss 0.3957095 +| epoch 4 | 267/ 5600 batches | test loss 0.3921653 +| epoch 4 | 271/ 5600 batches | test loss 0.3991177 +| epoch 4 | 275/ 5600 batches | test loss 0.4529243 +| epoch 4 | 279/ 5600 batches | test loss 0.4964595 +| epoch 4 | 283/ 5600 batches | test loss 0.4563612 +| epoch 4 | 287/ 5600 batches | test loss 0.3966023 +| epoch 4 | 291/ 5600 batches | test loss 0.3557989 +| epoch 4 | 295/ 5600 batches | test loss 0.4027722 +| epoch 4 | 299/ 5600 batches | test loss 0.4340894 +| epoch 4 | 303/ 5600 batches | test loss 0.4896688 +| epoch 4 | 307/ 5600 batches | test loss 0.4853403 +| epoch 4 | 311/ 5600 batches | test loss 0.4963675 +| epoch 4 | 315/ 5600 batches | test loss 0.5037805 +| epoch 4 | 319/ 5600 batches | test loss 0.4482979 +| epoch 4 | 323/ 5600 batches | test loss 0.3441829 +| epoch 4 | 327/ 5600 batches | test loss 0.3923096 +| epoch 4 | 331/ 5600 batches | test loss 0.4819134 +| epoch 4 | 335/ 5600 batches | test loss 0.4609858 +| epoch 4 | 339/ 5600 batches | test loss 0.5412454 +| epoch 4 | 343/ 5600 batches | test loss 0.4545707 +| epoch 4 | 347/ 5600 batches | test loss 0.4607913 +| epoch 4 | 351/ 5600 batches | test loss 0.4975950 +| epoch 4 | 355/ 5600 batches | test loss 0.3692733 +| epoch 4 | 359/ 5600 batches | test loss 0.4417816 +| epoch 4 | 363/ 5600 batches | test loss 0.5095773 +| epoch 4 | 367/ 5600 batches | test loss 0.3861354 +| epoch 4 | 371/ 5600 batches | test loss 0.5016660 +| epoch 4 | 375/ 5600 batches | test loss 0.4752990 +| epoch 4 | 379/ 5600 batches | test loss 0.4132770 +| epoch 4 | 383/ 5600 batches | test loss 0.5431440 +| epoch 4 | 387/ 5600 batches | test loss 0.6454766 +| epoch 4 | 391/ 5600 batches | test loss 0.5002429 +| epoch 4 | 395/ 5600 batches | test loss 0.5226164 +| epoch 4 | 399/ 5600 batches | test loss 0.4997888 +| epoch 4 | 403/ 5600 batches | test loss 0.3964275 +| epoch 4 | 407/ 5600 batches | test loss 0.5299270 +| epoch 4 | 411/ 5600 batches | test loss 0.4389933 +| epoch 4 | 415/ 5600 batches | test loss 0.5059285 +| epoch 4 | 419/ 5600 batches | test loss 0.4942662 +| epoch 4 | 423/ 5600 batches | test loss 0.4569088 +| epoch 4 | 427/ 5600 batches | test loss 0.4551115 +| epoch 4 | 431/ 5600 batches | test loss 0.3838636 +| epoch 4 | 435/ 5600 batches | test loss 0.4655537 +| epoch 4 | 439/ 5600 batches | test loss 0.5375592 +| epoch 4 | 443/ 5600 batches | test loss 0.3904420 +| epoch 4 | 447/ 5600 batches | test loss 0.3810907 +| epoch 4 | 451/ 5600 batches | test loss 0.3750379 +| epoch 4 | 455/ 5600 batches | test loss 0.4053518 +| epoch 4 | 459/ 5600 batches | test loss 0.5277765 +| epoch 4 | 463/ 5600 batches | test loss 0.3883867 +| epoch 4 | 467/ 5600 batches | test loss 0.4722278 +| epoch 4 | 471/ 5600 batches | test loss 0.3518978 +| epoch 4 | 475/ 5600 batches | test loss 0.4337136 +| epoch 4 | 479/ 5600 batches | test loss 0.3619617 +| epoch 4 | 483/ 5600 batches | test loss 0.3965790 +| epoch 4 | 487/ 5600 batches | test loss 0.5007990 +| epoch 4 | 491/ 5600 batches | test loss 0.4031104 +| epoch 4 | 495/ 5600 batches | test loss 0.3514317 +| epoch 4 | 499/ 5600 batches | test loss 0.4956316 +| epoch 4 | 503/ 5600 batches | test loss 0.3988275 +| epoch 4 | 507/ 5600 batches | test loss 0.4952898 +| epoch 4 | 511/ 5600 batches | test loss 0.4430682 +| epoch 4 | 515/ 5600 batches | test loss 0.4222052 +| epoch 4 | 519/ 5600 batches | test loss 0.4982325 +| epoch 4 | 523/ 5600 batches | test loss 0.3870565 +| epoch 4 | 527/ 5600 batches | test loss 0.4295225 +| epoch 4 | 531/ 5600 batches | test loss 0.4602397 +| epoch 4 | 535/ 5600 batches | test loss 0.4529338 +| epoch 4 | 539/ 5600 batches | test loss 0.2922752 +| epoch 4 | 543/ 5600 batches | test loss 0.4204811 +| epoch 4 | 547/ 5600 batches | test loss 0.3732824 +| epoch 4 | 551/ 5600 batches | test loss 0.3659911 +| epoch 4 | 555/ 5600 batches | test loss 0.3954372 +| epoch 4 | 559/ 5600 batches | test loss 0.4916757 +| epoch 4 | 563/ 5600 batches | test loss 0.4361146 +| epoch 4 | 567/ 5600 batches | test loss 0.4890849 +| epoch 4 | 571/ 5600 batches | test loss 0.3432298 +| epoch 4 | 575/ 5600 batches | test loss 0.4755968 +| epoch 4 | 579/ 5600 batches | test loss 0.4908455 +| epoch 4 | 583/ 5600 batches | test loss 0.4130588 +| epoch 4 | 587/ 5600 batches | test loss 0.3104971 +| epoch 4 | 591/ 5600 batches | test loss 0.3603351 +| epoch 4 | 595/ 5600 batches | test loss 0.3886356 +| epoch 4 | 599/ 5600 batches | test loss 0.3606012 +| epoch 4 | 603/ 5600 batches | test loss 0.4606803 +| epoch 4 | 607/ 5600 batches | test loss 0.4899252 +| epoch 4 | 611/ 5600 batches | test loss 0.3539377 +| epoch 4 | 615/ 5600 batches | test loss 0.3878108 +| epoch 4 | 619/ 5600 batches | test loss 0.3933648 +| epoch 4 | 623/ 5600 batches | test loss 0.4512950 +| epoch 4 | 627/ 5600 batches | test loss 0.3807774 +| epoch 4 | 631/ 5600 batches | test loss 0.2771091 +| epoch 4 | 635/ 5600 batches | test loss 0.3553968 +| epoch 4 | 639/ 5600 batches | test loss 0.4651661 +| epoch 4 | 643/ 5600 batches | test loss 0.4220745 +| epoch 4 | 647/ 5600 batches | test loss 0.4982687 +| epoch 4 | 651/ 5600 batches | test loss 0.4539423 +| epoch 4 | 655/ 5600 batches | test loss 0.4236872 +| epoch 4 | 659/ 5600 batches | test loss 0.4456952 +| epoch 4 | 663/ 5600 batches | test loss 0.4304309 +| epoch 4 | 667/ 5600 batches | test loss 0.7215745 +| epoch 4 | 671/ 5600 batches | test loss 0.4521405 +| epoch 4 | 675/ 5600 batches | test loss 0.3718376 +| epoch 4 | 679/ 5600 batches | test loss 0.4334990 +| epoch 4 | 683/ 5600 batches | test loss 0.4054602 +| epoch 4 | 687/ 5600 batches | test loss 0.3990330 +| epoch 4 | 691/ 5600 batches | test loss 0.4076822 +| epoch 4 | 695/ 5600 batches | test loss 0.4314246 +| epoch 4 | 699/ 5600 batches | test loss 0.3588203 +| epoch 4 | 703/ 5600 batches | test loss 0.3952065 +| epoch 4 | 707/ 5600 batches | test loss 0.5067146 +| epoch 4 | 711/ 5600 batches | test loss 0.3766012 +| epoch 4 | 715/ 5600 batches | test loss 0.4472233 +| epoch 4 | 719/ 5600 batches | test loss 0.3817550 +| epoch 4 | 723/ 5600 batches | test loss 0.5005305 +| epoch 4 | 727/ 5600 batches | test loss 0.5431545 +| epoch 4 | 731/ 5600 batches | test loss 0.5312911 +| epoch 4 | 735/ 5600 batches | test loss 0.3977045 +| epoch 4 | 739/ 5600 batches | test loss 0.3577456 +| epoch 4 | 743/ 5600 batches | test loss 0.4184462 +| epoch 4 | 747/ 5600 batches | test loss 0.5347546 +| epoch 4 | 751/ 5600 batches | test loss 0.4195858 +| epoch 4 | 755/ 5600 batches | test loss 0.4225383 +| epoch 4 | 759/ 5600 batches | test loss 0.4356490 +| epoch 4 | 763/ 5600 batches | test loss 0.3805966 +| epoch 4 | 767/ 5600 batches | test loss 0.4167029 +| epoch 4 | 771/ 5600 batches | test loss 0.4545458 +| epoch 4 | 775/ 5600 batches | test loss 0.4342594 +| epoch 4 | 779/ 5600 batches | test loss 0.4714155 +| epoch 4 | 783/ 5600 batches | test loss 0.3889319 +| epoch 4 | 787/ 5600 batches | test loss 0.4512052 +| epoch 4 | 791/ 5600 batches | test loss 0.4001508 +| epoch 4 | 795/ 5600 batches | test loss 0.4335694 +| epoch 4 | 799/ 5600 batches | test loss 0.4479841 +| epoch 4 | 803/ 5600 batches | test loss 0.3731875 +| epoch 4 | 807/ 5600 batches | test loss 0.4658583 +| epoch 4 | 811/ 5600 batches | test loss 0.3445699 +| epoch 4 | 815/ 5600 batches | test loss 0.4400841 +| epoch 4 | 819/ 5600 batches | test loss 0.3910393 +| epoch 4 | 823/ 5600 batches | test loss 0.4756531 +| epoch 4 | 827/ 5600 batches | test loss 0.5102243 +| epoch 4 | 831/ 5600 batches | test loss 0.4504604 +| epoch 4 | 835/ 5600 batches | test loss 0.5242466 +| epoch 4 | 839/ 5600 batches | test loss 0.4148566 +| epoch 4 | 843/ 5600 batches | test loss 0.4147806 +| epoch 4 | 847/ 5600 batches | test loss 0.4439313 +| epoch 4 | 851/ 5600 batches | test loss 0.1622443 +| epoch 4 | 855/ 5600 batches | test loss 0.4862636 +| epoch 4 | 859/ 5600 batches | test loss 0.4189391 +| epoch 4 | 863/ 5600 batches | test loss 0.4721814 +| epoch 4 | 867/ 5600 batches | test loss 0.4023208 +| epoch 4 | 871/ 5600 batches | test loss 0.5484167 +| epoch 4 | 875/ 5600 batches | test loss 0.4193547 +| epoch 4 | 879/ 5600 batches | test loss 0.4732963 +| epoch 4 | 883/ 5600 batches | test loss 0.3931126 +| epoch 4 | 887/ 5600 batches | test loss 0.5263203 +| epoch 4 | 891/ 5600 batches | test loss 0.5840617 +| epoch 4 | 895/ 5600 batches | test loss 0.4912588 +| epoch 4 | 899/ 5600 batches | test loss 0.3489895 +| epoch 4 | 903/ 5600 batches | test loss 0.4604193 +| epoch 4 | 907/ 5600 batches | test loss 0.4029127 +| epoch 4 | 911/ 5600 batches | test loss 0.4815272 +| epoch 4 | 915/ 5600 batches | test loss 0.4741418 +| epoch 4 | 919/ 5600 batches | test loss 0.6948498 +| epoch 4 | 923/ 5600 batches | test loss 0.4863395 +| epoch 4 | 927/ 5600 batches | test loss 0.4169327 +| epoch 4 | 931/ 5600 batches | test loss 0.4798383 +| epoch 4 | 935/ 5600 batches | test loss 0.3968399 +| epoch 4 | 939/ 5600 batches | test loss 0.4428348 +| epoch 4 | 943/ 5600 batches | test loss 0.3808122 +| epoch 4 | 947/ 5600 batches | test loss 0.4646429 +| epoch 4 | 951/ 5600 batches | test loss 0.3613649 +| epoch 4 | 955/ 5600 batches | test loss 0.4207139 +| epoch 4 | 959/ 5600 batches | test loss 0.4644191 +| epoch 4 | 963/ 5600 batches | test loss 0.4911074 +| epoch 4 | 967/ 5600 batches | test loss 0.4051770 +| epoch 4 | 971/ 5600 batches | test loss 0.4661658 +| epoch 4 | 975/ 5600 batches | test loss 0.4542540 +| epoch 4 | 979/ 5600 batches | test loss 0.4643370 +| epoch 4 | 983/ 5600 batches | test loss 0.4135166 +| epoch 4 | 987/ 5600 batches | test loss 0.5113339 +| epoch 4 | 991/ 5600 batches | test loss 0.4031994 +| epoch 4 | 995/ 5600 batches | test loss 0.4382612 +| epoch 4 | 999/ 5600 batches | test loss 0.4118186 +| epoch 4 | 1003/ 5600 batches | test loss 0.4786478 +| epoch 4 | 1007/ 5600 batches | test loss 0.5093501 +| epoch 4 | 1011/ 5600 batches | test loss 0.4210042 +| epoch 4 | 1015/ 5600 batches | test loss 0.4402992 +| epoch 4 | 1019/ 5600 batches | test loss 0.3767397 +| epoch 4 | 1023/ 5600 batches | test loss 0.4548357 +| epoch 4 | 1027/ 5600 batches | test loss 0.3341192 +| epoch 4 | 1031/ 5600 batches | test loss 0.4914901 +| epoch 4 | 1035/ 5600 batches | test loss 0.4573094 +| epoch 4 | 1039/ 5600 batches | test loss 0.4208323 +| epoch 4 | 1043/ 5600 batches | test loss 0.4026371 +| epoch 4 | 1047/ 5600 batches | test loss 0.4491553 +| epoch 4 | 1051/ 5600 batches | test loss 0.4978928 +| epoch 4 | 1055/ 5600 batches | test loss 0.4140821 +| epoch 4 | 1059/ 5600 batches | test loss 0.3787636 +| epoch 4 | 1063/ 5600 batches | test loss 0.4294561 +| epoch 4 | 1067/ 5600 batches | test loss 0.3928729 +| epoch 4 | 1071/ 5600 batches | test loss 0.3755994 +| epoch 4 | 1075/ 5600 batches | test loss 0.5235994 +| epoch 4 | 1079/ 5600 batches | test loss 0.4469902 +| epoch 4 | 1083/ 5600 batches | test loss 0.4391637 +| epoch 4 | 1087/ 5600 batches | test loss 0.4309894 +| epoch 4 | 1091/ 5600 batches | test loss 0.4724521 +| epoch 4 | 1095/ 5600 batches | test loss 0.3948251 +| epoch 4 | 1099/ 5600 batches | test loss 0.3017361 +| epoch 4 | 1103/ 5600 batches | test loss 0.5313438 +| epoch 4 | 1107/ 5600 batches | test loss 0.3530856 +| epoch 4 | 1111/ 5600 batches | test loss 0.4910898 +| epoch 4 | 1115/ 5600 batches | test loss 0.3992636 +| epoch 4 | 1119/ 5600 batches | test loss 0.4191832 +| epoch 4 | 1123/ 5600 batches | test loss 0.4148597 +| epoch 4 | 1127/ 5600 batches | test loss 0.3873601 +| epoch 4 | 1131/ 5600 batches | test loss 0.4654425 +| epoch 4 | 1135/ 5600 batches | test loss 0.5702920 +| epoch 4 | 1139/ 5600 batches | test loss 0.4249565 +| epoch 4 | 1143/ 5600 batches | test loss 0.4480814 +| epoch 4 | 1147/ 5600 batches | test loss 0.4027455 +| epoch 4 | 1151/ 5600 batches | test loss 0.5119308 +| epoch 4 | 1155/ 5600 batches | test loss 0.3622889 +| epoch 4 | 1159/ 5600 batches | test loss 0.4090759 +| epoch 4 | 1163/ 5600 batches | test loss 0.4354804 +| epoch 4 | 1167/ 5600 batches | test loss 0.5008045 +| epoch 4 | 1171/ 5600 batches | test loss 0.4076882 +| epoch 4 | 1175/ 5600 batches | test loss 0.4125363 +| epoch 4 | 1179/ 5600 batches | test loss 0.3746873 +| epoch 4 | 1183/ 5600 batches | test loss 0.4406612 +| epoch 4 | 1187/ 5600 batches | test loss 0.4155016 +| epoch 4 | 1191/ 5600 batches | test loss 0.4157431 +| epoch 4 | 1195/ 5600 batches | test loss 0.3824373 +| epoch 4 | 1199/ 5600 batches | test loss 0.4352043 +| epoch 4 | 1203/ 5600 batches | test loss 0.3576038 +| epoch 4 | 1207/ 5600 batches | test loss 0.3752018 +| epoch 4 | 1211/ 5600 batches | test loss 0.4084132 +| epoch 4 | 1215/ 5600 batches | test loss 0.3685874 +| epoch 4 | 1219/ 5600 batches | test loss 0.4122378 +| epoch 4 | 1223/ 5600 batches | test loss 0.4913263 +| epoch 4 | 1227/ 5600 batches | test loss 0.4748068 +| epoch 4 | 1231/ 5600 batches | test loss 0.4519202 +| epoch 4 | 1235/ 5600 batches | test loss 0.4412012 +| epoch 4 | 1239/ 5600 batches | test loss 0.4461904 +| epoch 4 | 1243/ 5600 batches | test loss 0.3740182 +| epoch 4 | 1247/ 5600 batches | test loss 0.4675402 +| epoch 4 | 1251/ 5600 batches | test loss 0.4089135 +| epoch 4 | 1255/ 5600 batches | test loss 0.4778261 +| epoch 4 | 1259/ 5600 batches | test loss 0.4325980 +| epoch 4 | 1263/ 5600 batches | test loss 0.4227557 +| epoch 4 | 1267/ 5600 batches | test loss 0.4288206 +| epoch 4 | 1271/ 5600 batches | test loss 0.4676401 +| epoch 4 | 1275/ 5600 batches | test loss 0.2880392 +| epoch 4 | 1279/ 5600 batches | test loss 0.4566912 +| epoch 4 | 1283/ 5600 batches | test loss 0.4148724 +| epoch 4 | 1287/ 5600 batches | test loss 0.4770549 +| epoch 4 | 1291/ 5600 batches | test loss 0.4729697 +| epoch 4 | 1295/ 5600 batches | test loss 0.4587139 +| epoch 4 | 1299/ 5600 batches | test loss 0.3860965 +| epoch 4 | 1303/ 5600 batches | test loss 0.4117003 +| epoch 4 | 1307/ 5600 batches | test loss 0.4606419 +| epoch 4 | 1311/ 5600 batches | test loss 0.3849129 +| epoch 4 | 1315/ 5600 batches | test loss 0.4106176 +| epoch 4 | 1319/ 5600 batches | test loss 0.4501970 +| epoch 4 | 1323/ 5600 batches | test loss 0.5292493 +| epoch 4 | 1327/ 5600 batches | test loss 0.3621894 +| epoch 4 | 1331/ 5600 batches | test loss 0.4110987 +| epoch 4 | 1335/ 5600 batches | test loss 0.3781797 +| epoch 4 | 1339/ 5600 batches | test loss 0.4823054 +| epoch 4 | 1343/ 5600 batches | test loss 0.4829046 +| epoch 4 | 1347/ 5600 batches | test loss 0.4385372 +| epoch 4 | 1351/ 5600 batches | test loss 0.4400028 +| epoch 4 | 1355/ 5600 batches | test loss 0.4029761 +| epoch 4 | 1359/ 5600 batches | test loss 0.4348590 +| epoch 4 | 1363/ 5600 batches | test loss 0.5035089 +| epoch 4 | 1367/ 5600 batches | test loss 0.4689191 +| epoch 4 | 1371/ 5600 batches | test loss 0.4272471 +| epoch 4 | 1375/ 5600 batches | test loss 0.5065456 +| epoch 4 | 1379/ 5600 batches | test loss 0.3614348 +| epoch 4 | 1383/ 5600 batches | test loss 0.8161749 +| epoch 4 | 1387/ 5600 batches | test loss 0.4822157 +| epoch 4 | 1391/ 5600 batches | test loss 0.4037264 +| epoch 4 | 1395/ 5600 batches | test loss 0.4050440 +| epoch 4 | 1399/ 5600 batches | test loss 0.4884870 +| epoch 4 | final test loss 0.4353, save model! +-------------------------------------------------------------------------------- +| epoch 5 | 3/ 5600 batches | train loss 0.3411058 +| epoch 5 | 7/ 5600 batches | train loss 0.4326919 +| epoch 5 | 11/ 5600 batches | train loss 0.4530209 +| epoch 5 | 15/ 5600 batches | train loss 0.3913167 +| epoch 5 | 19/ 5600 batches | train loss 0.3939140 +| epoch 5 | 23/ 5600 batches | train loss 0.4560288 +| epoch 5 | 27/ 5600 batches | train loss 0.4465972 +| epoch 5 | 31/ 5600 batches | train loss 0.3867609 +| epoch 5 | 35/ 5600 batches | train loss 0.3675328 +| epoch 5 | 39/ 5600 batches | train loss 0.4281596 +| epoch 5 | 43/ 5600 batches | train loss 0.4766108 +| epoch 5 | 47/ 5600 batches | train loss 0.3955135 +| epoch 5 | 51/ 5600 batches | train loss 0.4092509 +| epoch 5 | 55/ 5600 batches | train loss 0.3650393 +| epoch 5 | 59/ 5600 batches | train loss 0.4185401 +| epoch 5 | 63/ 5600 batches | train loss 0.4407626 +| epoch 5 | 67/ 5600 batches | train loss 0.3235786 +| epoch 5 | 71/ 5600 batches | train loss 0.3672578 +| epoch 5 | 75/ 5600 batches | train loss 0.4486640 +| epoch 5 | 79/ 5600 batches | train loss 0.3871290 +| epoch 5 | 83/ 5600 batches | train loss 0.4083567 +| epoch 5 | 87/ 5600 batches | train loss 0.3375584 +| epoch 5 | 91/ 5600 batches | train loss 0.3907908 +| epoch 5 | 95/ 5600 batches | train loss 0.3942717 +| epoch 5 | 99/ 5600 batches | train loss 0.3983716 +| epoch 5 | 103/ 5600 batches | train loss 0.3516500 +| epoch 5 | 107/ 5600 batches | train loss 0.4330241 +| epoch 5 | 111/ 5600 batches | train loss 0.3763015 +| epoch 5 | 115/ 5600 batches | train loss 0.4145159 +| epoch 5 | 119/ 5600 batches | train loss 0.4083931 +| epoch 5 | 123/ 5600 batches | train loss 0.4515639 +| epoch 5 | 127/ 5600 batches | train loss 0.3862583 +| epoch 5 | 131/ 5600 batches | train loss 0.4876343 +| epoch 5 | 135/ 5600 batches | train loss 0.4049899 +| epoch 5 | 139/ 5600 batches | train loss 0.3848208 +| epoch 5 | 143/ 5600 batches | train loss 0.3952952 +| epoch 5 | 147/ 5600 batches | train loss 0.5001976 +| epoch 5 | 151/ 5600 batches | train loss 0.4049336 +| epoch 5 | 155/ 5600 batches | train loss 0.5694432 +| epoch 5 | 159/ 5600 batches | train loss 0.3992335 +| epoch 5 | 163/ 5600 batches | train loss 0.3672551 +| epoch 5 | 167/ 5600 batches | train loss 0.3734990 +| epoch 5 | 171/ 5600 batches | train loss 0.4303891 +| epoch 5 | 175/ 5600 batches | train loss 0.2698109 +| epoch 5 | 179/ 5600 batches | train loss 0.3047156 +| epoch 5 | 183/ 5600 batches | train loss 0.3199254 +| epoch 5 | 187/ 5600 batches | train loss 0.4467981 +| epoch 5 | 191/ 5600 batches | train loss 0.3631533 +| epoch 5 | 195/ 5600 batches | train loss 0.4392644 +| epoch 5 | 199/ 5600 batches | train loss 0.4251108 +| epoch 5 | 203/ 5600 batches | train loss 0.4567065 +| epoch 5 | 207/ 5600 batches | train loss 0.4651247 +| epoch 5 | 211/ 5600 batches | train loss 0.3779842 +| epoch 5 | 215/ 5600 batches | train loss 0.3767667 +| epoch 5 | 219/ 5600 batches | train loss 0.4093325 +| epoch 5 | 223/ 5600 batches | train loss 0.4207567 +| epoch 5 | 227/ 5600 batches | train loss 0.3828095 +| epoch 5 | 231/ 5600 batches | train loss 0.4517501 +| epoch 5 | 235/ 5600 batches | train loss 0.3751005 +| epoch 5 | 239/ 5600 batches | train loss 0.3765129 +| epoch 5 | 243/ 5600 batches | train loss 0.4550758 +| epoch 5 | 247/ 5600 batches | train loss 0.3704570 +| epoch 5 | 251/ 5600 batches | train loss 0.3856797 +| epoch 5 | 255/ 5600 batches | train loss 0.4415877 +| epoch 5 | 259/ 5600 batches | train loss 0.4452059 +| epoch 5 | 263/ 5600 batches | train loss 0.3946946 +| epoch 5 | 267/ 5600 batches | train loss 0.4424171 +| epoch 5 | 271/ 5600 batches | train loss 0.3635888 +| epoch 5 | 275/ 5600 batches | train loss 0.4304051 +| epoch 5 | 279/ 5600 batches | train loss 0.4239078 +| epoch 5 | 283/ 5600 batches | train loss 0.4159359 +| epoch 5 | 287/ 5600 batches | train loss 0.4926453 +| epoch 5 | 291/ 5600 batches | train loss 0.4729585 +| epoch 5 | 295/ 5600 batches | train loss 0.4290383 +| epoch 5 | 299/ 5600 batches | train loss 0.4771719 +| epoch 5 | 303/ 5600 batches | train loss 0.3788100 +| epoch 5 | 307/ 5600 batches | train loss 0.3768732 +| epoch 5 | 311/ 5600 batches | train loss 0.3849315 +| epoch 5 | 315/ 5600 batches | train loss 0.1723971 +| epoch 5 | 319/ 5600 batches | train loss 0.3883559 +| epoch 5 | 323/ 5600 batches | train loss 0.4213738 +| epoch 5 | 327/ 5600 batches | train loss 0.3550370 +| epoch 5 | 331/ 5600 batches | train loss 0.4367546 +| epoch 5 | 335/ 5600 batches | train loss 0.3926369 +| epoch 5 | 339/ 5600 batches | train loss 0.4458795 +| epoch 5 | 343/ 5600 batches | train loss 0.4216383 +| epoch 5 | 347/ 5600 batches | train loss 0.4121364 +| epoch 5 | 351/ 5600 batches | train loss 0.4572852 +| epoch 5 | 355/ 5600 batches | train loss 0.3873458 +| epoch 5 | 359/ 5600 batches | train loss 0.4419769 +| epoch 5 | 363/ 5600 batches | train loss 0.3835513 +| epoch 5 | 367/ 5600 batches | train loss 0.4683562 +| epoch 5 | 371/ 5600 batches | train loss 0.3513597 +| epoch 5 | 375/ 5600 batches | train loss 0.4993552 +| epoch 5 | 379/ 5600 batches | train loss 0.4252582 +| epoch 5 | 383/ 5600 batches | train loss 0.3802318 +| epoch 5 | 387/ 5600 batches | train loss 0.4110965 +| epoch 5 | 391/ 5600 batches | train loss 0.3551326 +| epoch 5 | 395/ 5600 batches | train loss 0.4324385 +| epoch 5 | 399/ 5600 batches | train loss 0.4194914 +| epoch 5 | 403/ 5600 batches | train loss 0.3903360 +| epoch 5 | 407/ 5600 batches | train loss 0.3286153 +| epoch 5 | 411/ 5600 batches | train loss 0.3986420 +| epoch 5 | 415/ 5600 batches | train loss 0.4135613 +| epoch 5 | 419/ 5600 batches | train loss 0.3857127 +| epoch 5 | 423/ 5600 batches | train loss 0.4349334 +| epoch 5 | 427/ 5600 batches | train loss 0.4680794 +| epoch 5 | 431/ 5600 batches | train loss 0.4194281 +| epoch 5 | 435/ 5600 batches | train loss 0.3093524 +| epoch 5 | 439/ 5600 batches | train loss 0.4274931 +| epoch 5 | 443/ 5600 batches | train loss 0.4365742 +| epoch 5 | 447/ 5600 batches | train loss 0.2646592 +| epoch 5 | 451/ 5600 batches | train loss 0.4407763 +| epoch 5 | 455/ 5600 batches | train loss 0.3609562 +| epoch 5 | 459/ 5600 batches | train loss 0.3886064 +| epoch 5 | 463/ 5600 batches | train loss 0.4157907 +| epoch 5 | 467/ 5600 batches | train loss 0.4403338 +| epoch 5 | 471/ 5600 batches | train loss 0.3947624 +| epoch 5 | 475/ 5600 batches | train loss 0.3941795 +| epoch 5 | 479/ 5600 batches | train loss 0.4068092 +| epoch 5 | 483/ 5600 batches | train loss 0.3832802 +| epoch 5 | 487/ 5600 batches | train loss 0.4013743 +| epoch 5 | 491/ 5600 batches | train loss 0.4058919 +| epoch 5 | 495/ 5600 batches | train loss 0.4481977 +| epoch 5 | 499/ 5600 batches | train loss 0.3806476 +| epoch 5 | 503/ 5600 batches | train loss 0.4159062 +| epoch 5 | 507/ 5600 batches | train loss 0.3044100 +| epoch 5 | 511/ 5600 batches | train loss 0.3449375 +| epoch 5 | 515/ 5600 batches | train loss 0.3339795 +| epoch 5 | 519/ 5600 batches | train loss 0.4281774 +| epoch 5 | 523/ 5600 batches | train loss 0.4057516 +| epoch 5 | 527/ 5600 batches | train loss 0.4874104 +| epoch 5 | 531/ 5600 batches | train loss 0.4260838 +| epoch 5 | 535/ 5600 batches | train loss 0.4217295 +| epoch 5 | 539/ 5600 batches | train loss 0.4410551 +| epoch 5 | 543/ 5600 batches | train loss 0.4106407 +| epoch 5 | 547/ 5600 batches | train loss 0.3594490 +| epoch 5 | 551/ 5600 batches | train loss 0.3370090 +| epoch 5 | 555/ 5600 batches | train loss 0.2985260 +| epoch 5 | 559/ 5600 batches | train loss 0.4190496 +| epoch 5 | 563/ 5600 batches | train loss 0.4604910 +| epoch 5 | 567/ 5600 batches | train loss 0.4113881 +| epoch 5 | 571/ 5600 batches | train loss 0.3624223 +| epoch 5 | 575/ 5600 batches | train loss 0.3954264 +| epoch 5 | 579/ 5600 batches | train loss 0.4666665 +| epoch 5 | 583/ 5600 batches | train loss 0.3831019 +| epoch 5 | 587/ 5600 batches | train loss 0.3945519 +| epoch 5 | 591/ 5600 batches | train loss 0.4456799 +| epoch 5 | 595/ 5600 batches | train loss 0.3831083 +| epoch 5 | 599/ 5600 batches | train loss 0.4087479 +| epoch 5 | 603/ 5600 batches | train loss 0.3701448 +| epoch 5 | 607/ 5600 batches | train loss 0.4030147 +| epoch 5 | 611/ 5600 batches | train loss 0.3486134 +| epoch 5 | 615/ 5600 batches | train loss 0.2991245 +| epoch 5 | 619/ 5600 batches | train loss 0.4451053 +| epoch 5 | 623/ 5600 batches | train loss 0.3937935 +| epoch 5 | 627/ 5600 batches | train loss 0.4857625 +| epoch 5 | 631/ 5600 batches | train loss 0.4463212 +| epoch 5 | 635/ 5600 batches | train loss 0.4172293 +| epoch 5 | 639/ 5600 batches | train loss 0.4526889 +| epoch 5 | 643/ 5600 batches | train loss 0.4482545 +| epoch 5 | 647/ 5600 batches | train loss 0.4098550 +| epoch 5 | 651/ 5600 batches | train loss 0.3642333 +| epoch 5 | 655/ 5600 batches | train loss 0.3896029 +| epoch 5 | 659/ 5600 batches | train loss 0.3959943 +| epoch 5 | 663/ 5600 batches | train loss 0.3891084 +| epoch 5 | 667/ 5600 batches | train loss 0.4497218 +| epoch 5 | 671/ 5600 batches | train loss 0.4114555 +| epoch 5 | 675/ 5600 batches | train loss 0.4348220 +| epoch 5 | 679/ 5600 batches | train loss 0.3673522 +| epoch 5 | 683/ 5600 batches | train loss 0.4171907 +| epoch 5 | 687/ 5600 batches | train loss 0.4470983 +| epoch 5 | 691/ 5600 batches | train loss 0.3711269 +| epoch 5 | 695/ 5600 batches | train loss 0.4017535 +| epoch 5 | 699/ 5600 batches | train loss 0.4846093 +| epoch 5 | 703/ 5600 batches | train loss 0.3808216 +| epoch 5 | 707/ 5600 batches | train loss 0.3619691 +| epoch 5 | 711/ 5600 batches | train loss 0.4238171 +| epoch 5 | 715/ 5600 batches | train loss 0.4040017 +| epoch 5 | 719/ 5600 batches | train loss 0.3895079 +| epoch 5 | 723/ 5600 batches | train loss 0.4386477 +| epoch 5 | 727/ 5600 batches | train loss 0.4794372 +| epoch 5 | 731/ 5600 batches | train loss 0.4821036 +| epoch 5 | 735/ 5600 batches | train loss 0.4139282 +| epoch 5 | 739/ 5600 batches | train loss 0.2724159 +| epoch 5 | 743/ 5600 batches | train loss 0.3614248 +| epoch 5 | 747/ 5600 batches | train loss 0.3777978 +| epoch 5 | 751/ 5600 batches | train loss 0.3777370 +| epoch 5 | 755/ 5600 batches | train loss 0.4018234 +| epoch 5 | 759/ 5600 batches | train loss 0.3564084 +| epoch 5 | 763/ 5600 batches | train loss 0.3990660 +| epoch 5 | 767/ 5600 batches | train loss 0.4767892 +| epoch 5 | 771/ 5600 batches | train loss 0.3878613 +| epoch 5 | 775/ 5600 batches | train loss 0.4522514 +| epoch 5 | 779/ 5600 batches | train loss 0.3864959 +| epoch 5 | 783/ 5600 batches | train loss 0.3968417 +| epoch 5 | 787/ 5600 batches | train loss 0.4105825 +| epoch 5 | 791/ 5600 batches | train loss 0.4317043 +| epoch 5 | 795/ 5600 batches | train loss 0.4185238 +| epoch 5 | 799/ 5600 batches | train loss 0.3458968 +| epoch 5 | 803/ 5600 batches | train loss 0.3584731 +| epoch 5 | 807/ 5600 batches | train loss 0.4765987 +| epoch 5 | 811/ 5600 batches | train loss 0.3571626 +| epoch 5 | 815/ 5600 batches | train loss 0.4538044 +| epoch 5 | 819/ 5600 batches | train loss 0.4530061 +| epoch 5 | 823/ 5600 batches | train loss 0.3938100 +| epoch 5 | 827/ 5600 batches | train loss 0.4348868 +| epoch 5 | 831/ 5600 batches | train loss 0.4084420 +| epoch 5 | 835/ 5600 batches | train loss 0.4438716 +| epoch 5 | 839/ 5600 batches | train loss 0.4428880 +| epoch 5 | 843/ 5600 batches | train loss 0.3525322 +| epoch 5 | 847/ 5600 batches | train loss 0.3724953 +| epoch 5 | 851/ 5600 batches | train loss 0.4300970 +| epoch 5 | 855/ 5600 batches | train loss 0.4896208 +| epoch 5 | 859/ 5600 batches | train loss 0.4113186 +| epoch 5 | 863/ 5600 batches | train loss 0.3502387 +| epoch 5 | 867/ 5600 batches | train loss 0.4038669 +| epoch 5 | 871/ 5600 batches | train loss 0.3853960 +| epoch 5 | 875/ 5600 batches | train loss 0.3763081 +| epoch 5 | 879/ 5600 batches | train loss 0.3677503 +| epoch 5 | 883/ 5600 batches | train loss 0.3576190 +| epoch 5 | 887/ 5600 batches | train loss 0.4157259 +| epoch 5 | 891/ 5600 batches | train loss 0.3749955 +| epoch 5 | 895/ 5600 batches | train loss 0.5175426 +| epoch 5 | 899/ 5600 batches | train loss 0.4443761 +| epoch 5 | 903/ 5600 batches | train loss 0.4414574 +| epoch 5 | 907/ 5600 batches | train loss 0.3964587 +| epoch 5 | 911/ 5600 batches | train loss 0.4120746 +| epoch 5 | 915/ 5600 batches | train loss 0.4568124 +| epoch 5 | 919/ 5600 batches | train loss 0.4475729 +| epoch 5 | 923/ 5600 batches | train loss 0.4124520 +| epoch 5 | 927/ 5600 batches | train loss 0.3782449 +| epoch 5 | 931/ 5600 batches | train loss 0.4068227 +| epoch 5 | 935/ 5600 batches | train loss 0.3721696 +| epoch 5 | 939/ 5600 batches | train loss 0.4496021 +| epoch 5 | 943/ 5600 batches | train loss 0.4193950 +| epoch 5 | 947/ 5600 batches | train loss 0.4388447 +| epoch 5 | 951/ 5600 batches | train loss 0.4497439 +| epoch 5 | 955/ 5600 batches | train loss 0.4291103 +| epoch 5 | 959/ 5600 batches | train loss 0.3744928 +| epoch 5 | 963/ 5600 batches | train loss 0.3630421 +| epoch 5 | 967/ 5600 batches | train loss 0.3820678 +| epoch 5 | 971/ 5600 batches | train loss 0.3685765 +| epoch 5 | 975/ 5600 batches | train loss 0.4468272 +| epoch 5 | 979/ 5600 batches | train loss 0.4797371 +| epoch 5 | 983/ 5600 batches | train loss 0.5895106 +| epoch 5 | 987/ 5600 batches | train loss 0.3876774 +| epoch 5 | 991/ 5600 batches | train loss 0.4291293 +| epoch 5 | 995/ 5600 batches | train loss 0.2799615 +| epoch 5 | 999/ 5600 batches | train loss 0.3760346 +| epoch 5 | 1003/ 5600 batches | train loss 0.4741940 +| epoch 5 | 1007/ 5600 batches | train loss 0.3552478 +| epoch 5 | 1011/ 5600 batches | train loss 0.4769943 +| epoch 5 | 1015/ 5600 batches | train loss 0.3484951 +| epoch 5 | 1019/ 5600 batches | train loss 0.3502919 +| epoch 5 | 1023/ 5600 batches | train loss 0.3951086 +| epoch 5 | 1027/ 5600 batches | train loss 0.3966168 +| epoch 5 | 1031/ 5600 batches | train loss 0.4332156 +| epoch 5 | 1035/ 5600 batches | train loss 0.4282743 +| epoch 5 | 1039/ 5600 batches | train loss 0.4784549 +| epoch 5 | 1043/ 5600 batches | train loss 0.3919470 +| epoch 5 | 1047/ 5600 batches | train loss 0.3716994 +| epoch 5 | 1051/ 5600 batches | train loss 0.4013596 +| epoch 5 | 1055/ 5600 batches | train loss 0.3781747 +| epoch 5 | 1059/ 5600 batches | train loss 0.3775417 +| epoch 5 | 1063/ 5600 batches | train loss 0.3836130 +| epoch 5 | 1067/ 5600 batches | train loss 0.4016766 +| epoch 5 | 1071/ 5600 batches | train loss 0.2749446 +| epoch 5 | 1075/ 5600 batches | train loss 0.3459521 +| epoch 5 | 1079/ 5600 batches | train loss 0.3662789 +| epoch 5 | 1083/ 5600 batches | train loss 0.3302070 +| epoch 5 | 1087/ 5600 batches | train loss 0.3992337 +| epoch 5 | 1091/ 5600 batches | train loss 0.3851577 +| epoch 5 | 1095/ 5600 batches | train loss 0.4286867 +| epoch 5 | 1099/ 5600 batches | train loss 0.3394938 +| epoch 5 | 1103/ 5600 batches | train loss 0.3920196 +| epoch 5 | 1107/ 5600 batches | train loss 0.3966570 +| epoch 5 | 1111/ 5600 batches | train loss 0.4684117 +| epoch 5 | 1115/ 5600 batches | train loss 0.3506678 +| epoch 5 | 1119/ 5600 batches | train loss 0.4235337 +| epoch 5 | 1123/ 5600 batches | train loss 0.4303332 +| epoch 5 | 1127/ 5600 batches | train loss 0.4125157 +| epoch 5 | 1131/ 5600 batches | train loss 0.4184465 +| epoch 5 | 1135/ 5600 batches | train loss 0.3277321 +| epoch 5 | 1139/ 5600 batches | train loss 0.4871824 +| epoch 5 | 1143/ 5600 batches | train loss 0.3354822 +| epoch 5 | 1147/ 5600 batches | train loss 0.3874094 +| epoch 5 | 1151/ 5600 batches | train loss 0.4086443 +| epoch 5 | 1155/ 5600 batches | train loss 0.3343791 +| epoch 5 | 1159/ 5600 batches | train loss 0.3939289 +| epoch 5 | 1163/ 5600 batches | train loss 0.3813680 +| epoch 5 | 1167/ 5600 batches | train loss 0.3570928 +| epoch 5 | 1171/ 5600 batches | train loss 0.4182974 +| epoch 5 | 1175/ 5600 batches | train loss 0.4025173 +| epoch 5 | 1179/ 5600 batches | train loss 0.4800919 +| epoch 5 | 1183/ 5600 batches | train loss 0.4267294 +| epoch 5 | 1187/ 5600 batches | train loss 0.4060492 +| epoch 5 | 1191/ 5600 batches | train loss 0.4640225 +| epoch 5 | 1195/ 5600 batches | train loss 0.3821486 +| epoch 5 | 1199/ 5600 batches | train loss 0.4231885 +| epoch 5 | 1203/ 5600 batches | train loss 0.2543474 +| epoch 5 | 1207/ 5600 batches | train loss 0.4189571 +| epoch 5 | 1211/ 5600 batches | train loss 0.3622048 +| epoch 5 | 1215/ 5600 batches | train loss 0.4442247 +| epoch 5 | 1219/ 5600 batches | train loss 0.3564566 +| epoch 5 | 1223/ 5600 batches | train loss 0.3896369 +| epoch 5 | 1227/ 5600 batches | train loss 0.3850195 +| epoch 5 | 1231/ 5600 batches | train loss 0.4098979 +| epoch 5 | 1235/ 5600 batches | train loss 0.4050332 +| epoch 5 | 1239/ 5600 batches | train loss 0.4324482 +| epoch 5 | 1243/ 5600 batches | train loss 0.3634661 +| epoch 5 | 1247/ 5600 batches | train loss 0.3652944 +| epoch 5 | 1251/ 5600 batches | train loss 0.4371646 +| epoch 5 | 1255/ 5600 batches | train loss 0.4448023 +| epoch 5 | 1259/ 5600 batches | train loss 0.4176426 +| epoch 5 | 1263/ 5600 batches | train loss 0.4316921 +| epoch 5 | 1267/ 5600 batches | train loss 0.4536296 +| epoch 5 | 1271/ 5600 batches | train loss 0.4346302 +| epoch 5 | 1275/ 5600 batches | train loss 0.4764590 +| epoch 5 | 1279/ 5600 batches | train loss 0.2664301 +| epoch 5 | 1283/ 5600 batches | train loss 0.3418834 +| epoch 5 | 1287/ 5600 batches | train loss 0.4355266 +| epoch 5 | 1291/ 5600 batches | train loss 0.3774579 +| epoch 5 | 1295/ 5600 batches | train loss 0.4293307 +| epoch 5 | 1299/ 5600 batches | train loss 0.3601022 +| epoch 5 | 1303/ 5600 batches | train loss 0.3272857 +| epoch 5 | 1307/ 5600 batches | train loss 0.3983170 +| epoch 5 | 1311/ 5600 batches | train loss 0.3013976 +| epoch 5 | 1315/ 5600 batches | train loss 0.3481716 +| epoch 5 | 1319/ 5600 batches | train loss 0.4056509 +| epoch 5 | 1323/ 5600 batches | train loss 0.4510488 +| epoch 5 | 1327/ 5600 batches | train loss 0.3814865 +| epoch 5 | 1331/ 5600 batches | train loss 0.4246854 +| epoch 5 | 1335/ 5600 batches | train loss 0.3977668 +| epoch 5 | 1339/ 5600 batches | train loss 0.4096206 +| epoch 5 | 1343/ 5600 batches | train loss 0.4741561 +| epoch 5 | 1347/ 5600 batches | train loss 0.3775133 +| epoch 5 | 1351/ 5600 batches | train loss 0.4320548 +| epoch 5 | 1355/ 5600 batches | train loss 0.4020686 +| epoch 5 | 1359/ 5600 batches | train loss 0.4569656 +| epoch 5 | 1363/ 5600 batches | train loss 0.4728242 +| epoch 5 | 1367/ 5600 batches | train loss 0.4059303 +| epoch 5 | 1371/ 5600 batches | train loss 0.3929508 +| epoch 5 | 1375/ 5600 batches | train loss 0.4597599 +| epoch 5 | 1379/ 5600 batches | train loss 0.3400686 +| epoch 5 | 1383/ 5600 batches | train loss 0.4074851 +| epoch 5 | 1387/ 5600 batches | train loss 0.3540133 +| epoch 5 | 1391/ 5600 batches | train loss 0.5487020 +| epoch 5 | 1395/ 5600 batches | train loss 0.3989661 +| epoch 5 | 1399/ 5600 batches | train loss 0.4397373 +| epoch 5 | 1403/ 5600 batches | train loss 0.4356916 +| epoch 5 | 1407/ 5600 batches | train loss 0.4688933 +| epoch 5 | 1411/ 5600 batches | train loss 0.3867390 +| epoch 5 | 1415/ 5600 batches | train loss 0.4109483 +| epoch 5 | 1419/ 5600 batches | train loss 0.4778702 +| epoch 5 | 1423/ 5600 batches | train loss 0.4533072 +| epoch 5 | 1427/ 5600 batches | train loss 0.4342321 +| epoch 5 | 1431/ 5600 batches | train loss 0.3969408 +| epoch 5 | 1435/ 5600 batches | train loss 0.3911913 +| epoch 5 | 1439/ 5600 batches | train loss 0.3866718 +| epoch 5 | 1443/ 5600 batches | train loss 0.4186448 +| epoch 5 | 1447/ 5600 batches | train loss 0.3892167 +| epoch 5 | 1451/ 5600 batches | train loss 0.2693633 +| epoch 5 | 1455/ 5600 batches | train loss 0.4113796 +| epoch 5 | 1459/ 5600 batches | train loss 0.4193120 +| epoch 5 | 1463/ 5600 batches | train loss 0.3995654 +| epoch 5 | 1467/ 5600 batches | train loss 0.4741508 +| epoch 5 | 1471/ 5600 batches | train loss 0.1967289 +| epoch 5 | 1475/ 5600 batches | train loss 0.3363634 +| epoch 5 | 1479/ 5600 batches | train loss 0.3469898 +| epoch 5 | 1483/ 5600 batches | train loss 0.4764392 +| epoch 5 | 1487/ 5600 batches | train loss 0.4137711 +| epoch 5 | 1491/ 5600 batches | train loss 0.3965282 +| epoch 5 | 1495/ 5600 batches | train loss 0.3326804 +| epoch 5 | 1499/ 5600 batches | train loss 0.4649350 +| epoch 5 | 1503/ 5600 batches | train loss 0.4127308 +| epoch 5 | 1507/ 5600 batches | train loss 0.4230626 +| epoch 5 | 1511/ 5600 batches | train loss 0.3546954 +| epoch 5 | 1515/ 5600 batches | train loss 0.4165514 +| epoch 5 | 1519/ 5600 batches | train loss 0.4273743 +| epoch 5 | 1523/ 5600 batches | train loss 0.3104471 +| epoch 5 | 1527/ 5600 batches | train loss 0.4205411 +| epoch 5 | 1531/ 5600 batches | train loss 0.3750587 +| epoch 5 | 1535/ 5600 batches | train loss 0.2803087 +| epoch 5 | 1539/ 5600 batches | train loss 0.4572703 +| epoch 5 | 1543/ 5600 batches | train loss 0.3813863 +| epoch 5 | 1547/ 5600 batches | train loss 0.3552047 +| epoch 5 | 1551/ 5600 batches | train loss 0.3979231 +| epoch 5 | 1555/ 5600 batches | train loss 0.4505487 +| epoch 5 | 1559/ 5600 batches | train loss 0.4963823 +| epoch 5 | 1563/ 5600 batches | train loss 0.4859746 +| epoch 5 | 1567/ 5600 batches | train loss 0.4370102 +| epoch 5 | 1571/ 5600 batches | train loss 0.3968593 +| epoch 5 | 1575/ 5600 batches | train loss 0.4320061 +| epoch 5 | 1579/ 5600 batches | train loss 0.4234946 +| epoch 5 | 1583/ 5600 batches | train loss 0.4883697 +| epoch 5 | 1587/ 5600 batches | train loss 0.4305307 +| epoch 5 | 1591/ 5600 batches | train loss 0.3626848 +| epoch 5 | 1595/ 5600 batches | train loss 0.4307228 +| epoch 5 | 1599/ 5600 batches | train loss 0.4100840 +| epoch 5 | 1603/ 5600 batches | train loss 0.4884954 +| epoch 5 | 1607/ 5600 batches | train loss 0.3977579 +| epoch 5 | 1611/ 5600 batches | train loss 0.4861161 +| epoch 5 | 1615/ 5600 batches | train loss 0.4630761 +| epoch 5 | 1619/ 5600 batches | train loss 0.4278083 +| epoch 5 | 1623/ 5600 batches | train loss 0.4009815 +| epoch 5 | 1627/ 5600 batches | train loss 0.3665503 +| epoch 5 | 1631/ 5600 batches | train loss 0.3514051 +| epoch 5 | 1635/ 5600 batches | train loss 0.4389475 +| epoch 5 | 1639/ 5600 batches | train loss 0.3641625 +| epoch 5 | 1643/ 5600 batches | train loss 0.5076944 +| epoch 5 | 1647/ 5600 batches | train loss 0.3780777 +| epoch 5 | 1651/ 5600 batches | train loss 0.4726089 +| epoch 5 | 1655/ 5600 batches | train loss 0.3922763 +| epoch 5 | 1659/ 5600 batches | train loss 0.4044952 +| epoch 5 | 1663/ 5600 batches | train loss 0.3753282 +| epoch 5 | 1667/ 5600 batches | train loss 0.3172511 +| epoch 5 | 1671/ 5600 batches | train loss 0.4085439 +| epoch 5 | 1675/ 5600 batches | train loss 0.4207487 +| epoch 5 | 1679/ 5600 batches | train loss 0.4468489 +| epoch 5 | 1683/ 5600 batches | train loss 0.4666132 +| epoch 5 | 1687/ 5600 batches | train loss 0.4391217 +| epoch 5 | 1691/ 5600 batches | train loss 0.4488404 +| epoch 5 | 1695/ 5600 batches | train loss 0.4409213 +| epoch 5 | 1699/ 5600 batches | train loss 0.3016910 +| epoch 5 | 1703/ 5600 batches | train loss 0.4212556 +| epoch 5 | 1707/ 5600 batches | train loss 0.4071504 +| epoch 5 | 1711/ 5600 batches | train loss 0.4318374 +| epoch 5 | 1715/ 5600 batches | train loss 0.4493612 +| epoch 5 | 1719/ 5600 batches | train loss 0.3172228 +| epoch 5 | 1723/ 5600 batches | train loss 0.4192930 +| epoch 5 | 1727/ 5600 batches | train loss 0.4620127 +| epoch 5 | 1731/ 5600 batches | train loss 0.4684275 +| epoch 5 | 1735/ 5600 batches | train loss 0.4313438 +| epoch 5 | 1739/ 5600 batches | train loss 0.4022835 +| epoch 5 | 1743/ 5600 batches | train loss 0.4271660 +| epoch 5 | 1747/ 5600 batches | train loss 0.4132372 +| epoch 5 | 1751/ 5600 batches | train loss 0.3582207 +| epoch 5 | 1755/ 5600 batches | train loss 0.4324376 +| epoch 5 | 1759/ 5600 batches | train loss 0.4256481 +| epoch 5 | 1763/ 5600 batches | train loss 0.2882514 +| epoch 5 | 1767/ 5600 batches | train loss 0.4705499 +| epoch 5 | 1771/ 5600 batches | train loss 0.3812790 +| epoch 5 | 1775/ 5600 batches | train loss 0.3588856 +| epoch 5 | 1779/ 5600 batches | train loss 0.4463655 +| epoch 5 | 1783/ 5600 batches | train loss 0.2994968 +| epoch 5 | 1787/ 5600 batches | train loss 0.4088489 +| epoch 5 | 1791/ 5600 batches | train loss 0.5143441 +| epoch 5 | 1795/ 5600 batches | train loss 0.3616367 +| epoch 5 | 1799/ 5600 batches | train loss 0.4951855 +| epoch 5 | 1803/ 5600 batches | train loss 0.3906326 +| epoch 5 | 1807/ 5600 batches | train loss 0.3487894 +| epoch 5 | 1811/ 5600 batches | train loss 0.4139088 +| epoch 5 | 1815/ 5600 batches | train loss 0.4644521 +| epoch 5 | 1819/ 5600 batches | train loss 0.4421403 +| epoch 5 | 1823/ 5600 batches | train loss 0.4651771 +| epoch 5 | 1827/ 5600 batches | train loss 0.4309269 +| epoch 5 | 1831/ 5600 batches | train loss 0.3467928 +| epoch 5 | 1835/ 5600 batches | train loss 0.3197643 +| epoch 5 | 1839/ 5600 batches | train loss 0.4349464 +| epoch 5 | 1843/ 5600 batches | train loss 0.4032341 +| epoch 5 | 1847/ 5600 batches | train loss 0.3250517 +| epoch 5 | 1851/ 5600 batches | train loss 0.4098588 +| epoch 5 | 1855/ 5600 batches | train loss 0.3707766 +| epoch 5 | 1859/ 5600 batches | train loss 0.3959368 +| epoch 5 | 1863/ 5600 batches | train loss 0.3514537 +| epoch 5 | 1867/ 5600 batches | train loss 0.4304263 +| epoch 5 | 1871/ 5600 batches | train loss 0.4124777 +| epoch 5 | 1875/ 5600 batches | train loss 0.3304315 +| epoch 5 | 1879/ 5600 batches | train loss 0.3864124 +| epoch 5 | 1883/ 5600 batches | train loss 0.3979194 +| epoch 5 | 1887/ 5600 batches | train loss 0.4143429 +| epoch 5 | 1891/ 5600 batches | train loss 0.4721273 +| epoch 5 | 1895/ 5600 batches | train loss 0.4635291 +| epoch 5 | 1899/ 5600 batches | train loss 0.4572772 +| epoch 5 | 1903/ 5600 batches | train loss 0.4109108 +| epoch 5 | 1907/ 5600 batches | train loss 0.3934740 +| epoch 5 | 1911/ 5600 batches | train loss 0.4445327 +| epoch 5 | 1915/ 5600 batches | train loss 0.4658482 +| epoch 5 | 1919/ 5600 batches | train loss 0.3032516 +| epoch 5 | 1923/ 5600 batches | train loss 0.4073519 +| epoch 5 | 1927/ 5600 batches | train loss 0.3763528 +| epoch 5 | 1931/ 5600 batches | train loss 0.4204956 +| epoch 5 | 1935/ 5600 batches | train loss 0.3729182 +| epoch 5 | 1939/ 5600 batches | train loss 0.4885648 +| epoch 5 | 1943/ 5600 batches | train loss 0.3926075 +| epoch 5 | 1947/ 5600 batches | train loss 0.4138378 +| epoch 5 | 1951/ 5600 batches | train loss 0.4482541 +| epoch 5 | 1955/ 5600 batches | train loss 0.4441047 +| epoch 5 | 1959/ 5600 batches | train loss 0.4212933 +| epoch 5 | 1963/ 5600 batches | train loss 0.4150563 +| epoch 5 | 1967/ 5600 batches | train loss 0.4373586 +| epoch 5 | 1971/ 5600 batches | train loss 0.4197664 +| epoch 5 | 1975/ 5600 batches | train loss 0.3581865 +| epoch 5 | 1979/ 5600 batches | train loss 0.3397400 +| epoch 5 | 1983/ 5600 batches | train loss 0.3526217 +| epoch 5 | 1987/ 5600 batches | train loss 0.3311622 +| epoch 5 | 1991/ 5600 batches | train loss 0.3601820 +| epoch 5 | 1995/ 5600 batches | train loss 0.4409364 +| epoch 5 | 1999/ 5600 batches | train loss 0.4200487 +| epoch 5 | 2003/ 5600 batches | train loss 0.4363135 +| epoch 5 | 2007/ 5600 batches | train loss 0.3831336 +| epoch 5 | 2011/ 5600 batches | train loss 0.4139173 +| epoch 5 | 2015/ 5600 batches | train loss 0.5127423 +| epoch 5 | 2019/ 5600 batches | train loss 0.4057097 +| epoch 5 | 2023/ 5600 batches | train loss 0.3982266 +| epoch 5 | 2027/ 5600 batches | train loss 0.4164492 +| epoch 5 | 2031/ 5600 batches | train loss 0.4134164 +| epoch 5 | 2035/ 5600 batches | train loss 0.3644682 +| epoch 5 | 2039/ 5600 batches | train loss 0.4649186 +| epoch 5 | 2043/ 5600 batches | train loss 0.4406613 +| epoch 5 | 2047/ 5600 batches | train loss 0.3915240 +| epoch 5 | 2051/ 5600 batches | train loss 0.3850904 +| epoch 5 | 2055/ 5600 batches | train loss 0.3932000 +| epoch 5 | 2059/ 5600 batches | train loss 0.4275969 +| epoch 5 | 2063/ 5600 batches | train loss 0.3999730 +| epoch 5 | 2067/ 5600 batches | train loss 0.4167553 +| epoch 5 | 2071/ 5600 batches | train loss 0.3982851 +| epoch 5 | 2075/ 5600 batches | train loss 0.3768663 +| epoch 5 | 2079/ 5600 batches | train loss 0.3625524 +| epoch 5 | 2083/ 5600 batches | train loss 0.3712704 +| epoch 5 | 2087/ 5600 batches | train loss 0.4196059 +| epoch 5 | 2091/ 5600 batches | train loss 0.4152757 +| epoch 5 | 2095/ 5600 batches | train loss 0.4250370 +| epoch 5 | 2099/ 5600 batches | train loss 0.4390840 +| epoch 5 | 2103/ 5600 batches | train loss 0.4578848 +| epoch 5 | 2107/ 5600 batches | train loss 0.5152552 +| epoch 5 | 2111/ 5600 batches | train loss 0.4161862 +| epoch 5 | 2115/ 5600 batches | train loss 0.4200449 +| epoch 5 | 2119/ 5600 batches | train loss 0.3730514 +| epoch 5 | 2123/ 5600 batches | train loss 0.4178234 +| epoch 5 | 2127/ 5600 batches | train loss 0.4206420 +| epoch 5 | 2131/ 5600 batches | train loss 0.2651747 +| epoch 5 | 2135/ 5600 batches | train loss 0.4406803 +| epoch 5 | 2139/ 5600 batches | train loss 0.4373360 +| epoch 5 | 2143/ 5600 batches | train loss 0.4063446 +| epoch 5 | 2147/ 5600 batches | train loss 0.3299341 +| epoch 5 | 2151/ 5600 batches | train loss 0.3541061 +| epoch 5 | 2155/ 5600 batches | train loss 0.5041294 +| epoch 5 | 2159/ 5600 batches | train loss 0.4338687 +| epoch 5 | 2163/ 5600 batches | train loss 0.3478106 +| epoch 5 | 2167/ 5600 batches | train loss 0.4178231 +| epoch 5 | 2171/ 5600 batches | train loss 0.3803111 +| epoch 5 | 2175/ 5600 batches | train loss 0.4326356 +| epoch 5 | 2179/ 5600 batches | train loss 0.4719440 +| epoch 5 | 2183/ 5600 batches | train loss 0.3723662 +| epoch 5 | 2187/ 5600 batches | train loss 0.4908772 +| epoch 5 | 2191/ 5600 batches | train loss 0.3544555 +| epoch 5 | 2195/ 5600 batches | train loss 0.4064020 +| epoch 5 | 2199/ 5600 batches | train loss 0.3749656 +| epoch 5 | 2203/ 5600 batches | train loss 0.4472953 +| epoch 5 | 2207/ 5600 batches | train loss 0.4436875 +| epoch 5 | 2211/ 5600 batches | train loss 0.4103999 +| epoch 5 | 2215/ 5600 batches | train loss 0.4090129 +| epoch 5 | 2219/ 5600 batches | train loss 0.3367967 +| epoch 5 | 2223/ 5600 batches | train loss 0.3956817 +| epoch 5 | 2227/ 5600 batches | train loss 0.4929005 +| epoch 5 | 2231/ 5600 batches | train loss 0.4353406 +| epoch 5 | 2235/ 5600 batches | train loss 0.3287374 +| epoch 5 | 2239/ 5600 batches | train loss 0.3829067 +| epoch 5 | 2243/ 5600 batches | train loss 0.4176732 +| epoch 5 | 2247/ 5600 batches | train loss 0.3560771 +| epoch 5 | 2251/ 5600 batches | train loss 0.3509584 +| epoch 5 | 2255/ 5600 batches | train loss 0.3640254 +| epoch 5 | 2259/ 5600 batches | train loss 0.3852134 +| epoch 5 | 2263/ 5600 batches | train loss 0.4418078 +| epoch 5 | 2267/ 5600 batches | train loss 0.4854421 +| epoch 5 | 2271/ 5600 batches | train loss 0.4385169 +| epoch 5 | 2275/ 5600 batches | train loss 0.4321848 +| epoch 5 | 2279/ 5600 batches | train loss 0.4232548 +| epoch 5 | 2283/ 5600 batches | train loss 0.3945078 +| epoch 5 | 2287/ 5600 batches | train loss 0.5313915 +| epoch 5 | 2291/ 5600 batches | train loss 0.3370507 +| epoch 5 | 2295/ 5600 batches | train loss 0.4203051 +| epoch 5 | 2299/ 5600 batches | train loss 0.3961583 +| epoch 5 | 2303/ 5600 batches | train loss 0.4771632 +| epoch 5 | 2307/ 5600 batches | train loss 0.4236051 +| epoch 5 | 2311/ 5600 batches | train loss 0.3913599 +| epoch 5 | 2315/ 5600 batches | train loss 0.3782441 +| epoch 5 | 2319/ 5600 batches | train loss 0.5119284 +| epoch 5 | 2323/ 5600 batches | train loss 0.3903652 +| epoch 5 | 2327/ 5600 batches | train loss 0.3610037 +| epoch 5 | 2331/ 5600 batches | train loss 0.4056456 +| epoch 5 | 2335/ 5600 batches | train loss 0.3927806 +| epoch 5 | 2339/ 5600 batches | train loss 0.3093491 +| epoch 5 | 2343/ 5600 batches | train loss 0.3669588 +| epoch 5 | 2347/ 5600 batches | train loss 0.3905455 +| epoch 5 | 2351/ 5600 batches | train loss 0.4131843 +| epoch 5 | 2355/ 5600 batches | train loss 0.3880118 +| epoch 5 | 2359/ 5600 batches | train loss 0.3975314 +| epoch 5 | 2363/ 5600 batches | train loss 0.4502348 +| epoch 5 | 2367/ 5600 batches | train loss 0.4242801 +| epoch 5 | 2371/ 5600 batches | train loss 0.3974159 +| epoch 5 | 2375/ 5600 batches | train loss 0.4153086 +| epoch 5 | 2379/ 5600 batches | train loss 0.3599528 +| epoch 5 | 2383/ 5600 batches | train loss 0.4460790 +| epoch 5 | 2387/ 5600 batches | train loss 0.3936884 +| epoch 5 | 2391/ 5600 batches | train loss 0.4774971 +| epoch 5 | 2395/ 5600 batches | train loss 0.3709367 +| epoch 5 | 2399/ 5600 batches | train loss 0.3576757 +| epoch 5 | 2403/ 5600 batches | train loss 0.4247681 +| epoch 5 | 2407/ 5600 batches | train loss 0.3814337 +| epoch 5 | 2411/ 5600 batches | train loss 0.3538832 +| epoch 5 | 2415/ 5600 batches | train loss 0.3625879 +| epoch 5 | 2419/ 5600 batches | train loss 0.4472924 +| epoch 5 | 2423/ 5600 batches | train loss 0.3848779 +| epoch 5 | 2427/ 5600 batches | train loss 0.4596173 +| epoch 5 | 2431/ 5600 batches | train loss 0.3716809 +| epoch 5 | 2435/ 5600 batches | train loss 0.3880624 +| epoch 5 | 2439/ 5600 batches | train loss 0.3656678 +| epoch 5 | 2443/ 5600 batches | train loss 0.4258277 +| epoch 5 | 2447/ 5600 batches | train loss 0.3543265 +| epoch 5 | 2451/ 5600 batches | train loss 0.3009067 +| epoch 5 | 2455/ 5600 batches | train loss 0.4266927 +| epoch 5 | 2459/ 5600 batches | train loss 0.4073999 +| epoch 5 | 2463/ 5600 batches | train loss 0.3994828 +| epoch 5 | 2467/ 5600 batches | train loss 0.4372954 +| epoch 5 | 2471/ 5600 batches | train loss 0.3699921 +| epoch 5 | 2475/ 5600 batches | train loss 0.3642131 +| epoch 5 | 2479/ 5600 batches | train loss 0.3666811 +| epoch 5 | 2483/ 5600 batches | train loss 0.3646295 +| epoch 5 | 2487/ 5600 batches | train loss 0.3363156 +| epoch 5 | 2491/ 5600 batches | train loss 0.3461874 +| epoch 5 | 2495/ 5600 batches | train loss 0.3785628 +| epoch 5 | 2499/ 5600 batches | train loss 0.5192702 +| epoch 5 | 2503/ 5600 batches | train loss 0.4252015 +| epoch 5 | 2507/ 5600 batches | train loss 0.4659673 +| epoch 5 | 2511/ 5600 batches | train loss 0.5041249 +| epoch 5 | 2515/ 5600 batches | train loss 0.4437584 +| epoch 5 | 2519/ 5600 batches | train loss 0.4196193 +| epoch 5 | 2523/ 5600 batches | train loss 0.3621418 +| epoch 5 | 2527/ 5600 batches | train loss 0.3890052 +| epoch 5 | 2531/ 5600 batches | train loss 0.3199419 +| epoch 5 | 2535/ 5600 batches | train loss 0.4444305 +| epoch 5 | 2539/ 5600 batches | train loss 0.4327628 +| epoch 5 | 2543/ 5600 batches | train loss 0.4220521 +| epoch 5 | 2547/ 5600 batches | train loss 0.3801272 +| epoch 5 | 2551/ 5600 batches | train loss 0.3401888 +| epoch 5 | 2555/ 5600 batches | train loss 0.3604995 +| epoch 5 | 2559/ 5600 batches | train loss 0.3716740 +| epoch 5 | 2563/ 5600 batches | train loss 0.4467859 +| epoch 5 | 2567/ 5600 batches | train loss 0.4221876 +| epoch 5 | 2571/ 5600 batches | train loss 0.3773948 +| epoch 5 | 2575/ 5600 batches | train loss 0.3862653 +| epoch 5 | 2579/ 5600 batches | train loss 0.4076518 +| epoch 5 | 2583/ 5600 batches | train loss 0.5099723 +| epoch 5 | 2587/ 5600 batches | train loss 0.4317898 +| epoch 5 | 2591/ 5600 batches | train loss 0.4132745 +| epoch 5 | 2595/ 5600 batches | train loss 0.3737058 +| epoch 5 | 2599/ 5600 batches | train loss 0.4519691 +| epoch 5 | 2603/ 5600 batches | train loss 0.4079821 +| epoch 5 | 2607/ 5600 batches | train loss 0.3236723 +| epoch 5 | 2611/ 5600 batches | train loss 0.4741401 +| epoch 5 | 2615/ 5600 batches | train loss 0.4470648 +| epoch 5 | 2619/ 5600 batches | train loss 0.3777303 +| epoch 5 | 2623/ 5600 batches | train loss 0.5156943 +| epoch 5 | 2627/ 5600 batches | train loss 0.4458476 +| epoch 5 | 2631/ 5600 batches | train loss 0.2818886 +| epoch 5 | 2635/ 5600 batches | train loss 0.3969744 +| epoch 5 | 2639/ 5600 batches | train loss 0.4428534 +| epoch 5 | 2643/ 5600 batches | train loss 0.3765661 +| epoch 5 | 2647/ 5600 batches | train loss 0.3580884 +| epoch 5 | 2651/ 5600 batches | train loss 0.4819404 +| epoch 5 | 2655/ 5600 batches | train loss 0.4424523 +| epoch 5 | 2659/ 5600 batches | train loss 0.3692765 +| epoch 5 | 2663/ 5600 batches | train loss 0.2717208 +| epoch 5 | 2667/ 5600 batches | train loss 0.3875486 +| epoch 5 | 2671/ 5600 batches | train loss 0.3461932 +| epoch 5 | 2675/ 5600 batches | train loss 0.3889132 +| epoch 5 | 2679/ 5600 batches | train loss 0.4070293 +| epoch 5 | 2683/ 5600 batches | train loss 0.3604149 +| epoch 5 | 2687/ 5600 batches | train loss 0.4669590 +| epoch 5 | 2691/ 5600 batches | train loss 0.3202342 +| epoch 5 | 2695/ 5600 batches | train loss 0.3571705 +| epoch 5 | 2699/ 5600 batches | train loss 0.3518244 +| epoch 5 | 2703/ 5600 batches | train loss 0.4311681 +| epoch 5 | 2707/ 5600 batches | train loss 0.4028895 +| epoch 5 | 2711/ 5600 batches | train loss 0.3787057 +| epoch 5 | 2715/ 5600 batches | train loss 0.3818301 +| epoch 5 | 2719/ 5600 batches | train loss 0.4628466 +| epoch 5 | 2723/ 5600 batches | train loss 0.3778018 +| epoch 5 | 2727/ 5600 batches | train loss 0.4109366 +| epoch 5 | 2731/ 5600 batches | train loss 0.3643371 +| epoch 5 | 2735/ 5600 batches | train loss 0.3925066 +| epoch 5 | 2739/ 5600 batches | train loss 0.4215344 +| epoch 5 | 2743/ 5600 batches | train loss 0.3445748 +| epoch 5 | 2747/ 5600 batches | train loss 0.3998750 +| epoch 5 | 2751/ 5600 batches | train loss 0.3816661 +| epoch 5 | 2755/ 5600 batches | train loss 0.4487268 +| epoch 5 | 2759/ 5600 batches | train loss 0.3979070 +| epoch 5 | 2763/ 5600 batches | train loss 0.4607270 +| epoch 5 | 2767/ 5600 batches | train loss 0.3646814 +| epoch 5 | 2771/ 5600 batches | train loss 0.4013268 +| epoch 5 | 2775/ 5600 batches | train loss 0.3594294 +| epoch 5 | 2779/ 5600 batches | train loss 0.4410711 +| epoch 5 | 2783/ 5600 batches | train loss 0.4106854 +| epoch 5 | 2787/ 5600 batches | train loss 0.4368390 +| epoch 5 | 2791/ 5600 batches | train loss 0.3528161 +| epoch 5 | 2795/ 5600 batches | train loss 0.4523391 +| epoch 5 | 2799/ 5600 batches | train loss 0.4737731 +| epoch 5 | 2803/ 5600 batches | train loss 0.4712523 +| epoch 5 | 2807/ 5600 batches | train loss 0.4124445 +| epoch 5 | 2811/ 5600 batches | train loss 0.4097062 +| epoch 5 | 2815/ 5600 batches | train loss 0.4418505 +| epoch 5 | 2819/ 5600 batches | train loss 0.3462210 +| epoch 5 | 2823/ 5600 batches | train loss 0.3661205 +| epoch 5 | 2827/ 5600 batches | train loss 0.4284842 +| epoch 5 | 2831/ 5600 batches | train loss 0.3598289 +| epoch 5 | 2835/ 5600 batches | train loss 0.3987197 +| epoch 5 | 2839/ 5600 batches | train loss 0.3186769 +| epoch 5 | 2843/ 5600 batches | train loss 0.4368117 +| epoch 5 | 2847/ 5600 batches | train loss 0.4907520 +| epoch 5 | 2851/ 5600 batches | train loss 0.4343181 +| epoch 5 | 2855/ 5600 batches | train loss 0.3583328 +| epoch 5 | 2859/ 5600 batches | train loss 0.3859316 +| epoch 5 | 2863/ 5600 batches | train loss 0.4183696 +| epoch 5 | 2867/ 5600 batches | train loss 0.4188651 +| epoch 5 | 2871/ 5600 batches | train loss 0.3645195 +| epoch 5 | 2875/ 5600 batches | train loss 0.4108850 +| epoch 5 | 2879/ 5600 batches | train loss 0.4087144 +| epoch 5 | 2883/ 5600 batches | train loss 0.4392985 +| epoch 5 | 2887/ 5600 batches | train loss 0.3988331 +| epoch 5 | 2891/ 5600 batches | train loss 0.3384879 +| epoch 5 | 2895/ 5600 batches | train loss 0.4279339 +| epoch 5 | 2899/ 5600 batches | train loss 0.4290459 +| epoch 5 | 2903/ 5600 batches | train loss 0.3706440 +| epoch 5 | 2907/ 5600 batches | train loss 0.4176234 +| epoch 5 | 2911/ 5600 batches | train loss 0.4185366 +| epoch 5 | 2915/ 5600 batches | train loss 0.3692193 +| epoch 5 | 2919/ 5600 batches | train loss 0.2820593 +| epoch 5 | 2923/ 5600 batches | train loss 0.3978669 +| epoch 5 | 2927/ 5600 batches | train loss 0.4190925 +| epoch 5 | 2931/ 5600 batches | train loss 0.3937828 +| epoch 5 | 2935/ 5600 batches | train loss 0.4218309 +| epoch 5 | 2939/ 5600 batches | train loss 0.4027180 +| epoch 5 | 2943/ 5600 batches | train loss 0.3414652 +| epoch 5 | 2947/ 5600 batches | train loss 0.4898506 +| epoch 5 | 2951/ 5600 batches | train loss 0.2939050 +| epoch 5 | 2955/ 5600 batches | train loss 0.4250093 +| epoch 5 | 2959/ 5600 batches | train loss 0.4480984 +| epoch 5 | 2963/ 5600 batches | train loss 0.4593950 +| epoch 5 | 2967/ 5600 batches | train loss 0.2568720 +| epoch 5 | 2971/ 5600 batches | train loss 0.5034393 +| epoch 5 | 2975/ 5600 batches | train loss 0.3436363 +| epoch 5 | 2979/ 5600 batches | train loss 0.4429945 +| epoch 5 | 2983/ 5600 batches | train loss 0.3487622 +| epoch 5 | 2987/ 5600 batches | train loss 0.3864700 +| epoch 5 | 2991/ 5600 batches | train loss 0.4946561 +| epoch 5 | 2995/ 5600 batches | train loss 0.2890489 +| epoch 5 | 2999/ 5600 batches | train loss 0.4431843 +| epoch 5 | 3003/ 5600 batches | train loss 0.3447766 +| epoch 5 | 3007/ 5600 batches | train loss 0.3852956 +| epoch 5 | 3011/ 5600 batches | train loss 0.4284237 +| epoch 5 | 3015/ 5600 batches | train loss 0.3742028 +| epoch 5 | 3019/ 5600 batches | train loss 0.3999131 +| epoch 5 | 3023/ 5600 batches | train loss 0.3745416 +| epoch 5 | 3027/ 5600 batches | train loss 0.3371769 +| epoch 5 | 3031/ 5600 batches | train loss 0.3730939 +| epoch 5 | 3035/ 5600 batches | train loss 0.4413402 +| epoch 5 | 3039/ 5600 batches | train loss 0.4319625 +| epoch 5 | 3043/ 5600 batches | train loss 0.3388525 +| epoch 5 | 3047/ 5600 batches | train loss 0.3693028 +| epoch 5 | 3051/ 5600 batches | train loss 0.4345703 +| epoch 5 | 3055/ 5600 batches | train loss 0.4805470 +| epoch 5 | 3059/ 5600 batches | train loss 0.4020655 +| epoch 5 | 3063/ 5600 batches | train loss 0.4538076 +| epoch 5 | 3067/ 5600 batches | train loss 0.4214198 +| epoch 5 | 3071/ 5600 batches | train loss 0.4240026 +| epoch 5 | 3075/ 5600 batches | train loss 0.4486533 +| epoch 5 | 3079/ 5600 batches | train loss 0.4355807 +| epoch 5 | 3083/ 5600 batches | train loss 0.4620323 +| epoch 5 | 3087/ 5600 batches | train loss 0.3862217 +| epoch 5 | 3091/ 5600 batches | train loss 0.4114304 +| epoch 5 | 3095/ 5600 batches | train loss 0.3644676 +| epoch 5 | 3099/ 5600 batches | train loss 0.3509520 +| epoch 5 | 3103/ 5600 batches | train loss 0.4001437 +| epoch 5 | 3107/ 5600 batches | train loss 0.5039659 +| epoch 5 | 3111/ 5600 batches | train loss 0.4203389 +| epoch 5 | 3115/ 5600 batches | train loss 0.3211434 +| epoch 5 | 3119/ 5600 batches | train loss 0.4605350 +| epoch 5 | 3123/ 5600 batches | train loss 0.3929696 +| epoch 5 | 3127/ 5600 batches | train loss 0.4303751 +| epoch 5 | 3131/ 5600 batches | train loss 0.3411419 +| epoch 5 | 3135/ 5600 batches | train loss 0.4037852 +| epoch 5 | 3139/ 5600 batches | train loss 0.3431037 +| epoch 5 | 3143/ 5600 batches | train loss 0.3651608 +| epoch 5 | 3147/ 5600 batches | train loss 0.4806314 +| epoch 5 | 3151/ 5600 batches | train loss 0.4310724 +| epoch 5 | 3155/ 5600 batches | train loss 0.3675121 +| epoch 5 | 3159/ 5600 batches | train loss 0.3912933 +| epoch 5 | 3163/ 5600 batches | train loss 0.4480474 +| epoch 5 | 3167/ 5600 batches | train loss 0.3669893 +| epoch 5 | 3171/ 5600 batches | train loss 0.3426446 +| epoch 5 | 3175/ 5600 batches | train loss 0.3874495 +| epoch 5 | 3179/ 5600 batches | train loss 0.4162645 +| epoch 5 | 3183/ 5600 batches | train loss 0.4003229 +| epoch 5 | 3187/ 5600 batches | train loss 0.3875748 +| epoch 5 | 3191/ 5600 batches | train loss 0.3302568 +| epoch 5 | 3195/ 5600 batches | train loss 0.2932867 +| epoch 5 | 3199/ 5600 batches | train loss 0.4974753 +| epoch 5 | 3203/ 5600 batches | train loss 0.4123492 +| epoch 5 | 3207/ 5600 batches | train loss 0.3728120 +| epoch 5 | 3211/ 5600 batches | train loss 0.3975264 +| epoch 5 | 3215/ 5600 batches | train loss 0.4034437 +| epoch 5 | 3219/ 5600 batches | train loss 0.3961556 +| epoch 5 | 3223/ 5600 batches | train loss 0.3503155 +| epoch 5 | 3227/ 5600 batches | train loss 0.3641205 +| epoch 5 | 3231/ 5600 batches | train loss 0.4227423 +| epoch 5 | 3235/ 5600 batches | train loss 0.4646737 +| epoch 5 | 3239/ 5600 batches | train loss 0.3871239 +| epoch 5 | 3243/ 5600 batches | train loss 0.4375709 +| epoch 5 | 3247/ 5600 batches | train loss 0.3822836 +| epoch 5 | 3251/ 5600 batches | train loss 0.4196137 +| epoch 5 | 3255/ 5600 batches | train loss 0.4982413 +| epoch 5 | 3259/ 5600 batches | train loss 0.4024758 +| epoch 5 | 3263/ 5600 batches | train loss 0.2975321 +| epoch 5 | 3267/ 5600 batches | train loss 0.4029534 +| epoch 5 | 3271/ 5600 batches | train loss 0.4596384 +| epoch 5 | 3275/ 5600 batches | train loss 0.4895987 +| epoch 5 | 3279/ 5600 batches | train loss 0.3366352 +| epoch 5 | 3283/ 5600 batches | train loss 0.3779491 +| epoch 5 | 3287/ 5600 batches | train loss 0.3782439 +| epoch 5 | 3291/ 5600 batches | train loss 0.4142316 +| epoch 5 | 3295/ 5600 batches | train loss 0.3925037 +| epoch 5 | 3299/ 5600 batches | train loss 0.4903001 +| epoch 5 | 3303/ 5600 batches | train loss 0.4414620 +| epoch 5 | 3307/ 5600 batches | train loss 0.3839121 +| epoch 5 | 3311/ 5600 batches | train loss 0.3843100 +| epoch 5 | 3315/ 5600 batches | train loss 0.4008868 +| epoch 5 | 3319/ 5600 batches | train loss 0.5085219 +| epoch 5 | 3323/ 5600 batches | train loss 0.3493096 +| epoch 5 | 3327/ 5600 batches | train loss 0.4287654 +| epoch 5 | 3331/ 5600 batches | train loss 0.4895580 +| epoch 5 | 3335/ 5600 batches | train loss 0.4084852 +| epoch 5 | 3339/ 5600 batches | train loss 0.3958015 +| epoch 5 | 3343/ 5600 batches | train loss 0.3606810 +| epoch 5 | 3347/ 5600 batches | train loss 0.5086181 +| epoch 5 | 3351/ 5600 batches | train loss 0.3495188 +| epoch 5 | 3355/ 5600 batches | train loss 0.4217989 +| epoch 5 | 3359/ 5600 batches | train loss 0.4142329 +| epoch 5 | 3363/ 5600 batches | train loss 0.3642596 +| epoch 5 | 3367/ 5600 batches | train loss 0.3684009 +| epoch 5 | 3371/ 5600 batches | train loss 0.2927301 +| epoch 5 | 3375/ 5600 batches | train loss 0.2435421 +| epoch 5 | 3379/ 5600 batches | train loss 0.3430209 +| epoch 5 | 3383/ 5600 batches | train loss 0.3483547 +| epoch 5 | 3387/ 5600 batches | train loss 0.4510356 +| epoch 5 | 3391/ 5600 batches | train loss 0.3657870 +| epoch 5 | 3395/ 5600 batches | train loss 0.3830267 +| epoch 5 | 3399/ 5600 batches | train loss 0.4500595 +| epoch 5 | 3403/ 5600 batches | train loss 0.3914089 +| epoch 5 | 3407/ 5600 batches | train loss 0.4023532 +| epoch 5 | 3411/ 5600 batches | train loss 0.4283233 +| epoch 5 | 3415/ 5600 batches | train loss 0.4874038 +| epoch 5 | 3419/ 5600 batches | train loss 0.3430122 +| epoch 5 | 3423/ 5600 batches | train loss 0.4268644 +| epoch 5 | 3427/ 5600 batches | train loss 0.4040093 +| epoch 5 | 3431/ 5600 batches | train loss 0.4875421 +| epoch 5 | 3435/ 5600 batches | train loss 0.3117219 +| epoch 5 | 3439/ 5600 batches | train loss 0.2435260 +| epoch 5 | 3443/ 5600 batches | train loss 0.4270151 +| epoch 5 | 3447/ 5600 batches | train loss 0.4562992 +| epoch 5 | 3451/ 5600 batches | train loss 0.4109564 +| epoch 5 | 3455/ 5600 batches | train loss 0.3503899 +| epoch 5 | 3459/ 5600 batches | train loss 0.4942577 +| epoch 5 | 3463/ 5600 batches | train loss 0.3010217 +| epoch 5 | 3467/ 5600 batches | train loss 0.3714008 +| epoch 5 | 3471/ 5600 batches | train loss 0.2998185 +| epoch 5 | 3475/ 5600 batches | train loss 0.3674868 +| epoch 5 | 3479/ 5600 batches | train loss 0.4851695 +| epoch 5 | 3483/ 5600 batches | train loss 0.3891537 +| epoch 5 | 3487/ 5600 batches | train loss 0.3569679 +| epoch 5 | 3491/ 5600 batches | train loss 0.3730339 +| epoch 5 | 3495/ 5600 batches | train loss 0.4189663 +| epoch 5 | 3499/ 5600 batches | train loss 0.3843239 +| epoch 5 | 3503/ 5600 batches | train loss 0.3631622 +| epoch 5 | 3507/ 5600 batches | train loss 0.4153264 +| epoch 5 | 3511/ 5600 batches | train loss 0.4124777 +| epoch 5 | 3515/ 5600 batches | train loss 0.3861082 +| epoch 5 | 3519/ 5600 batches | train loss 0.5519315 +| epoch 5 | 3523/ 5600 batches | train loss 0.3715976 +| epoch 5 | 3527/ 5600 batches | train loss 0.4080188 +| epoch 5 | 3531/ 5600 batches | train loss 0.4219568 +| epoch 5 | 3535/ 5600 batches | train loss 0.3603206 +| epoch 5 | 3539/ 5600 batches | train loss 0.3906668 +| epoch 5 | 3543/ 5600 batches | train loss 0.3995685 +| epoch 5 | 3547/ 5600 batches | train loss 0.4053664 +| epoch 5 | 3551/ 5600 batches | train loss 0.4372297 +| epoch 5 | 3555/ 5600 batches | train loss 0.4438821 +| epoch 5 | 3559/ 5600 batches | train loss 0.3254327 +| epoch 5 | 3563/ 5600 batches | train loss 0.5066835 +| epoch 5 | 3567/ 5600 batches | train loss 0.4818383 +| epoch 5 | 3571/ 5600 batches | train loss 0.4432073 +| epoch 5 | 3575/ 5600 batches | train loss 0.4602668 +| epoch 5 | 3579/ 5600 batches | train loss 0.5200540 +| epoch 5 | 3583/ 5600 batches | train loss 0.4321365 +| epoch 5 | 3587/ 5600 batches | train loss 0.4345533 +| epoch 5 | 3591/ 5600 batches | train loss 0.3950805 +| epoch 5 | 3595/ 5600 batches | train loss 0.4353358 +| epoch 5 | 3599/ 5600 batches | train loss 0.4309358 +| epoch 5 | 3603/ 5600 batches | train loss 0.3959295 +| epoch 5 | 3607/ 5600 batches | train loss 0.3252144 +| epoch 5 | 3611/ 5600 batches | train loss 0.3558565 +| epoch 5 | 3615/ 5600 batches | train loss 0.3792644 +| epoch 5 | 3619/ 5600 batches | train loss 0.4222867 +| epoch 5 | 3623/ 5600 batches | train loss 0.4076714 +| epoch 5 | 3627/ 5600 batches | train loss 0.4025814 +| epoch 5 | 3631/ 5600 batches | train loss 0.4745141 +| epoch 5 | 3635/ 5600 batches | train loss 0.4271095 +| epoch 5 | 3639/ 5600 batches | train loss 0.3838814 +| epoch 5 | 3643/ 5600 batches | train loss 0.4052449 +| epoch 5 | 3647/ 5600 batches | train loss 0.4452847 +| epoch 5 | 3651/ 5600 batches | train loss 0.5796214 +| epoch 5 | 3655/ 5600 batches | train loss 0.3639262 +| epoch 5 | 3659/ 5600 batches | train loss 0.4297726 +| epoch 5 | 3663/ 5600 batches | train loss 0.3649068 +| epoch 5 | 3667/ 5600 batches | train loss 0.3747578 +| epoch 5 | 3671/ 5600 batches | train loss 0.3409030 +| epoch 5 | 3675/ 5600 batches | train loss 0.4732766 +| epoch 5 | 3679/ 5600 batches | train loss 0.3501813 +| epoch 5 | 3683/ 5600 batches | train loss 0.4396224 +| epoch 5 | 3687/ 5600 batches | train loss 0.3956520 +| epoch 5 | 3691/ 5600 batches | train loss 0.4367276 +| epoch 5 | 3695/ 5600 batches | train loss 0.4074655 +| epoch 5 | 3699/ 5600 batches | train loss 0.3948174 +| epoch 5 | 3703/ 5600 batches | train loss 0.4006713 +| epoch 5 | 3707/ 5600 batches | train loss 0.5027149 +| epoch 5 | 3711/ 5600 batches | train loss 0.3573551 +| epoch 5 | 3715/ 5600 batches | train loss 0.3466609 +| epoch 5 | 3719/ 5600 batches | train loss 0.4113220 +| epoch 5 | 3723/ 5600 batches | train loss 0.3866808 +| epoch 5 | 3727/ 5600 batches | train loss 0.4264271 +| epoch 5 | 3731/ 5600 batches | train loss 0.4429643 +| epoch 5 | 3735/ 5600 batches | train loss 0.4005559 +| epoch 5 | 3739/ 5600 batches | train loss 0.4422987 +| epoch 5 | 3743/ 5600 batches | train loss 0.4328012 +| epoch 5 | 3747/ 5600 batches | train loss 0.4175429 +| epoch 5 | 3751/ 5600 batches | train loss 0.5294991 +| epoch 5 | 3755/ 5600 batches | train loss 0.4396057 +| epoch 5 | 3759/ 5600 batches | train loss 0.3912457 +| epoch 5 | 3763/ 5600 batches | train loss 0.4985239 +| epoch 5 | 3767/ 5600 batches | train loss 0.4655557 +| epoch 5 | 3771/ 5600 batches | train loss 0.4059870 +| epoch 5 | 3775/ 5600 batches | train loss 0.4465766 +| epoch 5 | 3779/ 5600 batches | train loss 0.3430952 +| epoch 5 | 3783/ 5600 batches | train loss 0.3380115 +| epoch 5 | 3787/ 5600 batches | train loss 0.4092800 +| epoch 5 | 3791/ 5600 batches | train loss 0.4685862 +| epoch 5 | 3795/ 5600 batches | train loss 0.4572991 +| epoch 5 | 3799/ 5600 batches | train loss 0.3979335 +| epoch 5 | 3803/ 5600 batches | train loss 0.4640843 +| epoch 5 | 3807/ 5600 batches | train loss 0.4750535 +| epoch 5 | 3811/ 5600 batches | train loss 0.3745434 +| epoch 5 | 3815/ 5600 batches | train loss 0.4321778 +| epoch 5 | 3819/ 5600 batches | train loss 0.3338776 +| epoch 5 | 3823/ 5600 batches | train loss 0.3729335 +| epoch 5 | 3827/ 5600 batches | train loss 0.4224644 +| epoch 5 | 3831/ 5600 batches | train loss 0.3975369 +| epoch 5 | 3835/ 5600 batches | train loss 0.3919666 +| epoch 5 | 3839/ 5600 batches | train loss 0.3451565 +| epoch 5 | 3843/ 5600 batches | train loss 0.4229205 +| epoch 5 | 3847/ 5600 batches | train loss 0.3734874 +| epoch 5 | 3851/ 5600 batches | train loss 0.3668522 +| epoch 5 | 3855/ 5600 batches | train loss 0.3784024 +| epoch 5 | 3859/ 5600 batches | train loss 0.4701189 +| epoch 5 | 3863/ 5600 batches | train loss 0.4031012 +| epoch 5 | 3867/ 5600 batches | train loss 0.3563350 +| epoch 5 | 3871/ 5600 batches | train loss 0.3789651 +| epoch 5 | 3875/ 5600 batches | train loss 0.4816778 +| epoch 5 | 3879/ 5600 batches | train loss 0.3663206 +| epoch 5 | 3883/ 5600 batches | train loss 0.4262880 +| epoch 5 | 3887/ 5600 batches | train loss 0.3175142 +| epoch 5 | 3891/ 5600 batches | train loss 0.4319512 +| epoch 5 | 3895/ 5600 batches | train loss 0.3970982 +| epoch 5 | 3899/ 5600 batches | train loss 0.4265662 +| epoch 5 | 3903/ 5600 batches | train loss 0.4268808 +| epoch 5 | 3907/ 5600 batches | train loss 0.4440963 +| epoch 5 | 3911/ 5600 batches | train loss 0.3990066 +| epoch 5 | 3915/ 5600 batches | train loss 0.3967474 +| epoch 5 | 3919/ 5600 batches | train loss 0.4251218 +| epoch 5 | 3923/ 5600 batches | train loss 0.3553416 +| epoch 5 | 3927/ 5600 batches | train loss 0.4077298 +| epoch 5 | 3931/ 5600 batches | train loss 0.3486617 +| epoch 5 | 3935/ 5600 batches | train loss 0.3467055 +| epoch 5 | 3939/ 5600 batches | train loss 0.4163709 +| epoch 5 | 3943/ 5600 batches | train loss 0.4314971 +| epoch 5 | 3947/ 5600 batches | train loss 0.3662487 +| epoch 5 | 3951/ 5600 batches | train loss 0.4413233 +| epoch 5 | 3955/ 5600 batches | train loss 0.3871396 +| epoch 5 | 3959/ 5600 batches | train loss 0.3763283 +| epoch 5 | 3963/ 5600 batches | train loss 0.4155524 +| epoch 5 | 3967/ 5600 batches | train loss 0.4322300 +| epoch 5 | 3971/ 5600 batches | train loss 0.3492406 +| epoch 5 | 3975/ 5600 batches | train loss 0.4312798 +| epoch 5 | 3979/ 5600 batches | train loss 0.4491672 +| epoch 5 | 3983/ 5600 batches | train loss 0.4681535 +| epoch 5 | 3987/ 5600 batches | train loss 0.4693971 +| epoch 5 | 3991/ 5600 batches | train loss 0.3720872 +| epoch 5 | 3995/ 5600 batches | train loss 0.4270951 +| epoch 5 | 3999/ 5600 batches | train loss 0.4307244 +| epoch 5 | 4003/ 5600 batches | train loss 0.4212169 +| epoch 5 | 4007/ 5600 batches | train loss 0.4306666 +| epoch 5 | 4011/ 5600 batches | train loss 0.4189974 +| epoch 5 | 4015/ 5600 batches | train loss 0.4482657 +| epoch 5 | 4019/ 5600 batches | train loss 0.4409280 +| epoch 5 | 4023/ 5600 batches | train loss 0.4323777 +| epoch 5 | 4027/ 5600 batches | train loss 0.3804430 +| epoch 5 | 4031/ 5600 batches | train loss 0.2618883 +| epoch 5 | 4035/ 5600 batches | train loss 0.4598830 +| epoch 5 | 4039/ 5600 batches | train loss 0.3582243 +| epoch 5 | 4043/ 5600 batches | train loss 0.4062666 +| epoch 5 | 4047/ 5600 batches | train loss 0.4426318 +| epoch 5 | 4051/ 5600 batches | train loss 0.4194590 +| epoch 5 | 4055/ 5600 batches | train loss 0.4256653 +| epoch 5 | 4059/ 5600 batches | train loss 0.4574412 +| epoch 5 | 4063/ 5600 batches | train loss 0.4202892 +| epoch 5 | 4067/ 5600 batches | train loss 0.4291391 +| epoch 5 | 4071/ 5600 batches | train loss 0.4628448 +| epoch 5 | 4075/ 5600 batches | train loss 0.4663666 +| epoch 5 | 4079/ 5600 batches | train loss 0.3877717 +| epoch 5 | 4083/ 5600 batches | train loss 0.3936290 +| epoch 5 | 4087/ 5600 batches | train loss 0.3751458 +| epoch 5 | 4091/ 5600 batches | train loss 0.3882039 +| epoch 5 | 4095/ 5600 batches | train loss 0.3748017 +| epoch 5 | 4099/ 5600 batches | train loss 0.4148719 +| epoch 5 | 4103/ 5600 batches | train loss 0.4742035 +| epoch 5 | 4107/ 5600 batches | train loss 0.3663216 +| epoch 5 | 4111/ 5600 batches | train loss 0.3455961 +| epoch 5 | 4115/ 5600 batches | train loss 0.4240587 +| epoch 5 | 4119/ 5600 batches | train loss 0.4148373 +| epoch 5 | 4123/ 5600 batches | train loss 0.3900077 +| epoch 5 | 4127/ 5600 batches | train loss 0.4723663 +| epoch 5 | 4131/ 5600 batches | train loss 0.4505244 +| epoch 5 | 4135/ 5600 batches | train loss 0.3584650 +| epoch 5 | 4139/ 5600 batches | train loss 0.4647496 +| epoch 5 | 4143/ 5600 batches | train loss 0.4178470 +| epoch 5 | 4147/ 5600 batches | train loss 0.4412447 +| epoch 5 | 4151/ 5600 batches | train loss 0.4335308 +| epoch 5 | 4155/ 5600 batches | train loss 0.3182932 +| epoch 5 | 4159/ 5600 batches | train loss 0.4252596 +| epoch 5 | 4163/ 5600 batches | train loss 0.4539947 +| epoch 5 | 4167/ 5600 batches | train loss 0.3905048 +| epoch 5 | 4171/ 5600 batches | train loss 0.4318273 +| epoch 5 | 4175/ 5600 batches | train loss 0.3635013 +| epoch 5 | 4179/ 5600 batches | train loss 0.4191256 +| epoch 5 | 4183/ 5600 batches | train loss 0.4336796 +| epoch 5 | 4187/ 5600 batches | train loss 0.3560216 +| epoch 5 | 4191/ 5600 batches | train loss 0.4305187 +| epoch 5 | 4195/ 5600 batches | train loss 0.4262841 +| epoch 5 | 4199/ 5600 batches | train loss 0.3664718 +| epoch 5 | 4203/ 5600 batches | train loss 0.3758126 +| epoch 5 | 4207/ 5600 batches | train loss 0.3885164 +| epoch 5 | 4211/ 5600 batches | train loss 0.5008419 +| epoch 5 | 4215/ 5600 batches | train loss 0.4110990 +| epoch 5 | 4219/ 5600 batches | train loss 0.4005957 +| epoch 5 | 4223/ 5600 batches | train loss 0.3734746 +| epoch 5 | 4227/ 5600 batches | train loss 0.3985171 +| epoch 5 | 4231/ 5600 batches | train loss 0.3580991 +| epoch 5 | 4235/ 5600 batches | train loss 0.3191206 +| epoch 5 | 4239/ 5600 batches | train loss 0.4566743 +| epoch 5 | 4243/ 5600 batches | train loss 0.4270457 +| epoch 5 | 4247/ 5600 batches | train loss 0.2762101 +| epoch 5 | 4251/ 5600 batches | train loss 0.3735115 +| epoch 5 | 4255/ 5600 batches | train loss 0.3421429 +| epoch 5 | 4259/ 5600 batches | train loss 0.3602599 +| epoch 5 | 4263/ 5600 batches | train loss 0.4911030 +| epoch 5 | 4267/ 5600 batches | train loss 0.4149601 +| epoch 5 | 4271/ 5600 batches | train loss 0.4147593 +| epoch 5 | 4275/ 5600 batches | train loss 0.4124918 +| epoch 5 | 4279/ 5600 batches | train loss 0.4618594 +| epoch 5 | 4283/ 5600 batches | train loss 0.3018243 +| epoch 5 | 4287/ 5600 batches | train loss 0.4237682 +| epoch 5 | 4291/ 5600 batches | train loss 0.2985360 +| epoch 5 | 4295/ 5600 batches | train loss 0.3199488 +| epoch 5 | 4299/ 5600 batches | train loss 0.4119964 +| epoch 5 | 4303/ 5600 batches | train loss 0.3849190 +| epoch 5 | 4307/ 5600 batches | train loss 0.3739875 +| epoch 5 | 4311/ 5600 batches | train loss 0.4644340 +| epoch 5 | 4315/ 5600 batches | train loss 0.3518107 +| epoch 5 | 4319/ 5600 batches | train loss 0.4520190 +| epoch 5 | 4323/ 5600 batches | train loss 0.3859925 +| epoch 5 | 4327/ 5600 batches | train loss 0.4175513 +| epoch 5 | 4331/ 5600 batches | train loss 0.4137145 +| epoch 5 | 4335/ 5600 batches | train loss 0.4618097 +| epoch 5 | 4339/ 5600 batches | train loss 0.3999432 +| epoch 5 | 4343/ 5600 batches | train loss 0.3781620 +| epoch 5 | 4347/ 5600 batches | train loss 0.5567399 +| epoch 5 | 4351/ 5600 batches | train loss 0.3915339 +| epoch 5 | 4355/ 5600 batches | train loss 0.4404350 +| epoch 5 | 4359/ 5600 batches | train loss 0.3997626 +| epoch 5 | 4363/ 5600 batches | train loss 0.3443040 +| epoch 5 | 4367/ 5600 batches | train loss 0.3622687 +| epoch 5 | 4371/ 5600 batches | train loss 0.3395389 +| epoch 5 | 4375/ 5600 batches | train loss 0.4590431 +| epoch 5 | 4379/ 5600 batches | train loss 0.4292094 +| epoch 5 | 4383/ 5600 batches | train loss 0.4166185 +| epoch 5 | 4387/ 5600 batches | train loss 0.4183922 +| epoch 5 | 4391/ 5600 batches | train loss 0.3653168 +| epoch 5 | 4395/ 5600 batches | train loss 0.3789923 +| epoch 5 | 4399/ 5600 batches | train loss 0.3917901 +| epoch 5 | 4403/ 5600 batches | train loss 0.3931262 +| epoch 5 | 4407/ 5600 batches | train loss 0.4432445 +| epoch 5 | 4411/ 5600 batches | train loss 0.4284260 +| epoch 5 | 4415/ 5600 batches | train loss 0.2458725 +| epoch 5 | 4419/ 5600 batches | train loss 0.4745649 +| epoch 5 | 4423/ 5600 batches | train loss 0.3859183 +| epoch 5 | 4427/ 5600 batches | train loss 0.4898606 +| epoch 5 | 4431/ 5600 batches | train loss 0.3415553 +| epoch 5 | 4435/ 5600 batches | train loss 0.3502685 +| epoch 5 | 4439/ 5600 batches | train loss 0.3781552 +| epoch 5 | 4443/ 5600 batches | train loss 0.3753451 +| epoch 5 | 4447/ 5600 batches | train loss 0.3992399 +| epoch 5 | 4451/ 5600 batches | train loss 0.4330401 +| epoch 5 | 4455/ 5600 batches | train loss 0.4498930 +| epoch 5 | 4459/ 5600 batches | train loss 0.4317441 +| epoch 5 | 4463/ 5600 batches | train loss 0.4319917 +| epoch 5 | 4467/ 5600 batches | train loss 0.4560234 +| epoch 5 | 4471/ 5600 batches | train loss 0.4536148 +| epoch 5 | 4475/ 5600 batches | train loss 0.4807912 +| epoch 5 | 4479/ 5600 batches | train loss 0.4479743 +| epoch 5 | 4483/ 5600 batches | train loss 0.4839141 +| epoch 5 | 4487/ 5600 batches | train loss 0.5066655 +| epoch 5 | 4491/ 5600 batches | train loss 0.3618230 +| epoch 5 | 4495/ 5600 batches | train loss 0.3913568 +| epoch 5 | 4499/ 5600 batches | train loss 0.3544449 +| epoch 5 | 4503/ 5600 batches | train loss 0.4540203 +| epoch 5 | 4507/ 5600 batches | train loss 0.4778987 +| epoch 5 | 4511/ 5600 batches | train loss 0.3752991 +| epoch 5 | 4515/ 5600 batches | train loss 0.3957250 +| epoch 5 | 4519/ 5600 batches | train loss 0.3415571 +| epoch 5 | 4523/ 5600 batches | train loss 0.4013412 +| epoch 5 | 4527/ 5600 batches | train loss 0.4531462 +| epoch 5 | 4531/ 5600 batches | train loss 0.4051844 +| epoch 5 | 4535/ 5600 batches | train loss 0.4157075 +| epoch 5 | 4539/ 5600 batches | train loss 0.3739883 +| epoch 5 | 4543/ 5600 batches | train loss 0.4442345 +| epoch 5 | 4547/ 5600 batches | train loss 0.3289426 +| epoch 5 | 4551/ 5600 batches | train loss 0.4802457 +| epoch 5 | 4555/ 5600 batches | train loss 0.4436684 +| epoch 5 | 4559/ 5600 batches | train loss 0.4016636 +| epoch 5 | 4563/ 5600 batches | train loss 0.3635359 +| epoch 5 | 4567/ 5600 batches | train loss 0.4514985 +| epoch 5 | 4571/ 5600 batches | train loss 0.3235832 +| epoch 5 | 4575/ 5600 batches | train loss 0.3872522 +| epoch 5 | 4579/ 5600 batches | train loss 0.3449534 +| epoch 5 | 4583/ 5600 batches | train loss 0.4581093 +| epoch 5 | 4587/ 5600 batches | train loss 0.4344341 +| epoch 5 | 4591/ 5600 batches | train loss 0.3638220 +| epoch 5 | 4595/ 5600 batches | train loss 0.4180321 +| epoch 5 | 4599/ 5600 batches | train loss 0.4450125 +| epoch 5 | 4603/ 5600 batches | train loss 0.3586738 +| epoch 5 | 4607/ 5600 batches | train loss 0.1777628 +| epoch 5 | 4611/ 5600 batches | train loss 0.3672785 +| epoch 5 | 4615/ 5600 batches | train loss 0.4195063 +| epoch 5 | 4619/ 5600 batches | train loss 0.3609804 +| epoch 5 | 4623/ 5600 batches | train loss 0.4378451 +| epoch 5 | 4627/ 5600 batches | train loss 0.3416705 +| epoch 5 | 4631/ 5600 batches | train loss 0.3711146 +| epoch 5 | 4635/ 5600 batches | train loss 0.4892686 +| epoch 5 | 4639/ 5600 batches | train loss 0.3772697 +| epoch 5 | 4643/ 5600 batches | train loss 0.4847832 +| epoch 5 | 4647/ 5600 batches | train loss 0.4569342 +| epoch 5 | 4651/ 5600 batches | train loss 0.4570268 +| epoch 5 | 4655/ 5600 batches | train loss 0.3631148 +| epoch 5 | 4659/ 5600 batches | train loss 0.3548067 +| epoch 5 | 4663/ 5600 batches | train loss 0.3935089 +| epoch 5 | 4667/ 5600 batches | train loss 0.3803970 +| epoch 5 | 4671/ 5600 batches | train loss 0.3647881 +| epoch 5 | 4675/ 5600 batches | train loss 0.3210243 +| epoch 5 | 4679/ 5600 batches | train loss 0.3773164 +| epoch 5 | 4683/ 5600 batches | train loss 0.4294024 +| epoch 5 | 4687/ 5600 batches | train loss 0.4420986 +| epoch 5 | 4691/ 5600 batches | train loss 0.4203168 +| epoch 5 | 4695/ 5600 batches | train loss 0.4833891 +| epoch 5 | 4699/ 5600 batches | train loss 0.3521162 +| epoch 5 | 4703/ 5600 batches | train loss 0.3532777 +| epoch 5 | 4707/ 5600 batches | train loss 0.3652443 +| epoch 5 | 4711/ 5600 batches | train loss 0.4190297 +| epoch 5 | 4715/ 5600 batches | train loss 0.4233959 +| epoch 5 | 4719/ 5600 batches | train loss 0.3506646 +| epoch 5 | 4723/ 5600 batches | train loss 0.5233269 +| epoch 5 | 4727/ 5600 batches | train loss 0.3827632 +| epoch 5 | 4731/ 5600 batches | train loss 0.4152889 +| epoch 5 | 4735/ 5600 batches | train loss 0.4785720 +| epoch 5 | 4739/ 5600 batches | train loss 0.4090633 +| epoch 5 | 4743/ 5600 batches | train loss 0.3991261 +| epoch 5 | 4747/ 5600 batches | train loss 0.3602359 +| epoch 5 | 4751/ 5600 batches | train loss 0.3923275 +| epoch 5 | 4755/ 5600 batches | train loss 0.3508242 +| epoch 5 | 4759/ 5600 batches | train loss 0.4190050 +| epoch 5 | 4763/ 5600 batches | train loss 0.4933704 +| epoch 5 | 4767/ 5600 batches | train loss 0.4617238 +| epoch 5 | 4771/ 5600 batches | train loss 0.4877042 +| epoch 5 | 4775/ 5600 batches | train loss 0.4424095 +| epoch 5 | 4779/ 5600 batches | train loss 0.6194534 +| epoch 5 | 4783/ 5600 batches | train loss 0.4167311 +| epoch 5 | 4787/ 5600 batches | train loss 0.4429117 +| epoch 5 | 4791/ 5600 batches | train loss 0.3500462 +| epoch 5 | 4795/ 5600 batches | train loss 0.3650589 +| epoch 5 | 4799/ 5600 batches | train loss 0.3605974 +| epoch 5 | 4803/ 5600 batches | train loss 0.3758684 +| epoch 5 | 4807/ 5600 batches | train loss 0.4497193 +| epoch 5 | 4811/ 5600 batches | train loss 0.3443411 +| epoch 5 | 4815/ 5600 batches | train loss 0.4909810 +| epoch 5 | 4819/ 5600 batches | train loss 0.4200161 +| epoch 5 | 4823/ 5600 batches | train loss 0.4374691 +| epoch 5 | 4827/ 5600 batches | train loss 0.3233902 +| epoch 5 | 4831/ 5600 batches | train loss 0.2834609 +| epoch 5 | 4835/ 5600 batches | train loss 0.4417960 +| epoch 5 | 4839/ 5600 batches | train loss 0.4035555 +| epoch 5 | 4843/ 5600 batches | train loss 0.4037605 +| epoch 5 | 4847/ 5600 batches | train loss 0.4641404 +| epoch 5 | 4851/ 5600 batches | train loss 0.4061343 +| epoch 5 | 4855/ 5600 batches | train loss 0.3963208 +| epoch 5 | 4859/ 5600 batches | train loss 0.4502293 +| epoch 5 | 4863/ 5600 batches | train loss 0.3647863 +| epoch 5 | 4867/ 5600 batches | train loss 0.2608912 +| epoch 5 | 4871/ 5600 batches | train loss 0.3237317 +| epoch 5 | 4875/ 5600 batches | train loss 0.4523878 +| epoch 5 | 4879/ 5600 batches | train loss 0.3489870 +| epoch 5 | 4883/ 5600 batches | train loss 0.4137342 +| epoch 5 | 4887/ 5600 batches | train loss 0.4083328 +| epoch 5 | 4891/ 5600 batches | train loss 0.3578626 +| epoch 5 | 4895/ 5600 batches | train loss 0.4356092 +| epoch 5 | 4899/ 5600 batches | train loss 0.3313566 +| epoch 5 | 4903/ 5600 batches | train loss 0.4802046 +| epoch 5 | 4907/ 5600 batches | train loss 0.4622641 +| epoch 5 | 4911/ 5600 batches | train loss 0.4740868 +| epoch 5 | 4915/ 5600 batches | train loss 0.4074982 +| epoch 5 | 4919/ 5600 batches | train loss 0.4151604 +| epoch 5 | 4923/ 5600 batches | train loss 0.4557606 +| epoch 5 | 4927/ 5600 batches | train loss 0.4329925 +| epoch 5 | 4931/ 5600 batches | train loss 0.4233088 +| epoch 5 | 4935/ 5600 batches | train loss 0.3801692 +| epoch 5 | 4939/ 5600 batches | train loss 0.4488247 +| epoch 5 | 4943/ 5600 batches | train loss 0.4468641 +| epoch 5 | 4947/ 5600 batches | train loss 0.3510494 +| epoch 5 | 4951/ 5600 batches | train loss 0.4226193 +| epoch 5 | 4955/ 5600 batches | train loss 0.4203379 +| epoch 5 | 4959/ 5600 batches | train loss 0.4048633 +| epoch 5 | 4963/ 5600 batches | train loss 0.4029583 +| epoch 5 | 4967/ 5600 batches | train loss 0.3504611 +| epoch 5 | 4971/ 5600 batches | train loss 0.4101164 +| epoch 5 | 4975/ 5600 batches | train loss 0.4030915 +| epoch 5 | 4979/ 5600 batches | train loss 0.3965014 +| epoch 5 | 4983/ 5600 batches | train loss 0.3757106 +| epoch 5 | 4987/ 5600 batches | train loss 0.4268297 +| epoch 5 | 4991/ 5600 batches | train loss 0.4626031 +| epoch 5 | 4995/ 5600 batches | train loss 0.3133020 +| epoch 5 | 4999/ 5600 batches | train loss 0.4324920 +| epoch 5 | 5003/ 5600 batches | train loss 0.4427729 +| epoch 5 | 5007/ 5600 batches | train loss 0.3946295 +| epoch 5 | 5011/ 5600 batches | train loss 0.3302808 +| epoch 5 | 5015/ 5600 batches | train loss 0.3628010 +| epoch 5 | 5019/ 5600 batches | train loss 0.3877997 +| epoch 5 | 5023/ 5600 batches | train loss 0.5432795 +| epoch 5 | 5027/ 5600 batches | train loss 0.4120201 +| epoch 5 | 5031/ 5600 batches | train loss 0.4569543 +| epoch 5 | 5035/ 5600 batches | train loss 0.4052421 +| epoch 5 | 5039/ 5600 batches | train loss 0.3973602 +| epoch 5 | 5043/ 5600 batches | train loss 0.3474305 +| epoch 5 | 5047/ 5600 batches | train loss 0.3855099 +| epoch 5 | 5051/ 5600 batches | train loss 0.4534392 +| epoch 5 | 5055/ 5600 batches | train loss 0.3621066 +| epoch 5 | 5059/ 5600 batches | train loss 0.4252454 +| epoch 5 | 5063/ 5600 batches | train loss 0.3928393 +| epoch 5 | 5067/ 5600 batches | train loss 0.4218256 +| epoch 5 | 5071/ 5600 batches | train loss 0.4142657 +| epoch 5 | 5075/ 5600 batches | train loss 0.3576438 +| epoch 5 | 5079/ 5600 batches | train loss 0.3970523 +| epoch 5 | 5083/ 5600 batches | train loss 0.3646595 +| epoch 5 | 5087/ 5600 batches | train loss 0.4022223 +| epoch 5 | 5091/ 5600 batches | train loss 0.5687393 +| epoch 5 | 5095/ 5600 batches | train loss 0.5313184 +| epoch 5 | 5099/ 5600 batches | train loss 0.3506922 +| epoch 5 | 5103/ 5600 batches | train loss 0.4104181 +| epoch 5 | 5107/ 5600 batches | train loss 0.5073805 +| epoch 5 | 5111/ 5600 batches | train loss 0.4181912 +| epoch 5 | 5115/ 5600 batches | train loss 0.4661075 +| epoch 5 | 5119/ 5600 batches | train loss 0.3869303 +| epoch 5 | 5123/ 5600 batches | train loss 0.4368499 +| epoch 5 | 5127/ 5600 batches | train loss 0.4489423 +| epoch 5 | 5131/ 5600 batches | train loss 0.3849232 +| epoch 5 | 5135/ 5600 batches | train loss 0.4475038 +| epoch 5 | 5139/ 5600 batches | train loss 0.3595614 +| epoch 5 | 5143/ 5600 batches | train loss 0.3808618 +| epoch 5 | 5147/ 5600 batches | train loss 0.4327034 +| epoch 5 | 5151/ 5600 batches | train loss 0.4282235 +| epoch 5 | 5155/ 5600 batches | train loss 0.4800825 +| epoch 5 | 5159/ 5600 batches | train loss 0.3462336 +| epoch 5 | 5163/ 5600 batches | train loss 0.3609058 +| epoch 5 | 5167/ 5600 batches | train loss 0.3914267 +| epoch 5 | 5171/ 5600 batches | train loss 0.4395360 +| epoch 5 | 5175/ 5600 batches | train loss 0.4191061 +| epoch 5 | 5179/ 5600 batches | train loss 0.4159812 +| epoch 5 | 5183/ 5600 batches | train loss 0.2780103 +| epoch 5 | 5187/ 5600 batches | train loss 0.3801448 +| epoch 5 | 5191/ 5600 batches | train loss 0.4046586 +| epoch 5 | 5195/ 5600 batches | train loss 0.4360606 +| epoch 5 | 5199/ 5600 batches | train loss 0.3967138 +| epoch 5 | 5203/ 5600 batches | train loss 0.4677828 +| epoch 5 | 5207/ 5600 batches | train loss 0.3823145 +| epoch 5 | 5211/ 5600 batches | train loss 0.2953049 +| epoch 5 | 5215/ 5600 batches | train loss 0.4230182 +| epoch 5 | 5219/ 5600 batches | train loss 0.3411337 +| epoch 5 | 5223/ 5600 batches | train loss 0.3736681 +| epoch 5 | 5227/ 5600 batches | train loss 0.3937262 +| epoch 5 | 5231/ 5600 batches | train loss 0.4610882 +| epoch 5 | 5235/ 5600 batches | train loss 0.3616575 +| epoch 5 | 5239/ 5600 batches | train loss 0.3754704 +| epoch 5 | 5243/ 5600 batches | train loss 0.3746427 +| epoch 5 | 5247/ 5600 batches | train loss 0.4332339 +| epoch 5 | 5251/ 5600 batches | train loss 0.4206653 +| epoch 5 | 5255/ 5600 batches | train loss 0.4700766 +| epoch 5 | 5259/ 5600 batches | train loss 0.4039773 +| epoch 5 | 5263/ 5600 batches | train loss 0.3689705 +| epoch 5 | 5267/ 5600 batches | train loss 0.4249083 +| epoch 5 | 5271/ 5600 batches | train loss 0.4372979 +| epoch 5 | 5275/ 5600 batches | train loss 0.3784921 +| epoch 5 | 5279/ 5600 batches | train loss 0.3571348 +| epoch 5 | 5283/ 5600 batches | train loss 0.4537799 +| epoch 5 | 5287/ 5600 batches | train loss 0.5014463 +| epoch 5 | 5291/ 5600 batches | train loss 0.4709598 +| epoch 5 | 5295/ 5600 batches | train loss 0.4667156 +| epoch 5 | 5299/ 5600 batches | train loss 0.4365855 +| epoch 5 | 5303/ 5600 batches | train loss 0.3969734 +| epoch 5 | 5307/ 5600 batches | train loss 0.4640183 +| epoch 5 | 5311/ 5600 batches | train loss 0.4416482 +| epoch 5 | 5315/ 5600 batches | train loss 0.4536287 +| epoch 5 | 5319/ 5600 batches | train loss 0.4347040 +| epoch 5 | 5323/ 5600 batches | train loss 0.4320343 +| epoch 5 | 5327/ 5600 batches | train loss 0.4286297 +| epoch 5 | 5331/ 5600 batches | train loss 0.4299128 +| epoch 5 | 5335/ 5600 batches | train loss 0.4260914 +| epoch 5 | 5339/ 5600 batches | train loss 0.4438254 +| epoch 5 | 5343/ 5600 batches | train loss 0.4066535 +| epoch 5 | 5347/ 5600 batches | train loss 0.3760509 +| epoch 5 | 5351/ 5600 batches | train loss 0.4044950 +| epoch 5 | 5355/ 5600 batches | train loss 0.3637353 +| epoch 5 | 5359/ 5600 batches | train loss 0.4165192 +| epoch 5 | 5363/ 5600 batches | train loss 0.4853666 +| epoch 5 | 5367/ 5600 batches | train loss 0.3916457 +| epoch 5 | 5371/ 5600 batches | train loss 0.3966654 +| epoch 5 | 5375/ 5600 batches | train loss 0.4727576 +| epoch 5 | 5379/ 5600 batches | train loss 0.4217305 +| epoch 5 | 5383/ 5600 batches | train loss 0.3884830 +| epoch 5 | 5387/ 5600 batches | train loss 0.3283076 +| epoch 5 | 5391/ 5600 batches | train loss 0.4403205 +| epoch 5 | 5395/ 5600 batches | train loss 0.3342223 +| epoch 5 | 5399/ 5600 batches | train loss 0.4222731 +| epoch 5 | 5403/ 5600 batches | train loss 0.4340626 +| epoch 5 | 5407/ 5600 batches | train loss 0.4460039 +| epoch 5 | 5411/ 5600 batches | train loss 0.4433497 +| epoch 5 | 5415/ 5600 batches | train loss 0.2842449 +| epoch 5 | 5419/ 5600 batches | train loss 0.4097349 +| epoch 5 | 5423/ 5600 batches | train loss 0.4213354 +| epoch 5 | 5427/ 5600 batches | train loss 0.3923736 +| epoch 5 | 5431/ 5600 batches | train loss 0.3735822 +| epoch 5 | 5435/ 5600 batches | train loss 0.4179581 +| epoch 5 | 5439/ 5600 batches | train loss 0.4096219 +| epoch 5 | 5443/ 5600 batches | train loss 0.3665157 +| epoch 5 | 5447/ 5600 batches | train loss 0.5734377 +| epoch 5 | 5451/ 5600 batches | train loss 0.3526048 +| epoch 5 | 5455/ 5600 batches | train loss 0.4293562 +| epoch 5 | 5459/ 5600 batches | train loss 0.3730814 +| epoch 5 | 5463/ 5600 batches | train loss 0.3616325 +| epoch 5 | 5467/ 5600 batches | train loss 0.3598926 +| epoch 5 | 5471/ 5600 batches | train loss 0.4297817 +| epoch 5 | 5475/ 5600 batches | train loss 0.3817736 +| epoch 5 | 5479/ 5600 batches | train loss 0.3776091 +| epoch 5 | 5483/ 5600 batches | train loss 0.4580569 +| epoch 5 | 5487/ 5600 batches | train loss 0.4065092 +| epoch 5 | 5491/ 5600 batches | train loss 0.4467166 +| epoch 5 | 5495/ 5600 batches | train loss 0.4043116 +| epoch 5 | 5499/ 5600 batches | train loss 0.4529622 +| epoch 5 | 5503/ 5600 batches | train loss 0.4255563 +| epoch 5 | 5507/ 5600 batches | train loss 0.3602701 +| epoch 5 | 5511/ 5600 batches | train loss 0.3764421 +| epoch 5 | 5515/ 5600 batches | train loss 0.3331721 +| epoch 5 | 5519/ 5600 batches | train loss 0.4195624 +| epoch 5 | 5523/ 5600 batches | train loss 0.4905879 +| epoch 5 | 5527/ 5600 batches | train loss 0.4444108 +| epoch 5 | 5531/ 5600 batches | train loss 0.4125120 +| epoch 5 | 5535/ 5600 batches | train loss 0.4786322 +| epoch 5 | 5539/ 5600 batches | train loss 0.4125224 +| epoch 5 | 5543/ 5600 batches | train loss 0.4594447 +| epoch 5 | 5547/ 5600 batches | train loss 0.3788217 +| epoch 5 | 5551/ 5600 batches | train loss 0.3813305 +| epoch 5 | 5555/ 5600 batches | train loss 0.2967345 +| epoch 5 | 5559/ 5600 batches | train loss 0.4760116 +| epoch 5 | 5563/ 5600 batches | train loss 0.4251717 +| epoch 5 | 5567/ 5600 batches | train loss 0.3925876 +| epoch 5 | 5571/ 5600 batches | train loss 0.3671256 +| epoch 5 | 5575/ 5600 batches | train loss 0.3494489 +| epoch 5 | 5579/ 5600 batches | train loss 0.3971699 +| epoch 5 | 5583/ 5600 batches | train loss 0.3730756 +| epoch 5 | 5587/ 5600 batches | train loss 0.4169326 +| epoch 5 | 5591/ 5600 batches | train loss 0.3493193 +| epoch 5 | 5595/ 5600 batches | train loss 0.4290546 +| epoch 5 | 5599/ 5600 batches | train loss 0.3725748 +-------------------------------------------------------------------------------- +| epoch 5 | 3/ 5600 batches | test loss 0.3689570 +| epoch 5 | 7/ 5600 batches | test loss 0.4512259 +| epoch 5 | 11/ 5600 batches | test loss 0.4349349 +| epoch 5 | 15/ 5600 batches | test loss 0.4244192 +| epoch 5 | 19/ 5600 batches | test loss 0.3738138 +| epoch 5 | 23/ 5600 batches | test loss 0.4536447 +| epoch 5 | 27/ 5600 batches | test loss 0.5198684 +| epoch 5 | 31/ 5600 batches | test loss 0.4129699 +| epoch 5 | 35/ 5600 batches | test loss 0.3660258 +| epoch 5 | 39/ 5600 batches | test loss 0.4451992 +| epoch 5 | 43/ 5600 batches | test loss 0.3608958 +| epoch 5 | 47/ 5600 batches | test loss 0.5209014 +| epoch 5 | 51/ 5600 batches | test loss 0.4028679 +| epoch 5 | 55/ 5600 batches | test loss 0.5652009 +| epoch 5 | 59/ 5600 batches | test loss 0.4203297 +| epoch 5 | 63/ 5600 batches | test loss 0.4066471 +| epoch 5 | 67/ 5600 batches | test loss 0.3643109 +| epoch 5 | 71/ 5600 batches | test loss 0.4491889 +| epoch 5 | 75/ 5600 batches | test loss 0.4040341 +| epoch 5 | 79/ 5600 batches | test loss 0.4175054 +| epoch 5 | 83/ 5600 batches | test loss 0.3623804 +| epoch 5 | 87/ 5600 batches | test loss 0.4256064 +| epoch 5 | 91/ 5600 batches | test loss 0.4917649 +| epoch 5 | 95/ 5600 batches | test loss 0.5246614 +| epoch 5 | 99/ 5600 batches | test loss 0.3789513 +| epoch 5 | 103/ 5600 batches | test loss 0.4660529 +| epoch 5 | 107/ 5600 batches | test loss 0.4065495 +| epoch 5 | 111/ 5600 batches | test loss 0.3932706 +| epoch 5 | 115/ 5600 batches | test loss 0.4500448 +| epoch 5 | 119/ 5600 batches | test loss 0.4730209 +| epoch 5 | 123/ 5600 batches | test loss 0.3792565 +| epoch 5 | 127/ 5600 batches | test loss 0.4087681 +| epoch 5 | 131/ 5600 batches | test loss 0.3745052 +| epoch 5 | 135/ 5600 batches | test loss 0.4897315 +| epoch 5 | 139/ 5600 batches | test loss 0.5152631 +| epoch 5 | 143/ 5600 batches | test loss 0.3727681 +| epoch 5 | 147/ 5600 batches | test loss 0.4735818 +| epoch 5 | 151/ 5600 batches | test loss 0.4149802 +| epoch 5 | 155/ 5600 batches | test loss 0.4732175 +| epoch 5 | 159/ 5600 batches | test loss 0.4448599 +| epoch 5 | 163/ 5600 batches | test loss 0.3624100 +| epoch 5 | 167/ 5600 batches | test loss 0.3920551 +| epoch 5 | 171/ 5600 batches | test loss 0.4930863 +| epoch 5 | 175/ 5600 batches | test loss 0.3706427 +| epoch 5 | 179/ 5600 batches | test loss 0.4051405 +| epoch 5 | 183/ 5600 batches | test loss 0.4448221 +| epoch 5 | 187/ 5600 batches | test loss 0.4186238 +| epoch 5 | 191/ 5600 batches | test loss 0.3397996 +| epoch 5 | 195/ 5600 batches | test loss 0.4823382 +| epoch 5 | 199/ 5600 batches | test loss 0.3878592 +| epoch 5 | 203/ 5600 batches | test loss 0.4850677 +| epoch 5 | 207/ 5600 batches | test loss 0.3658620 +| epoch 5 | 211/ 5600 batches | test loss 0.3361293 +| epoch 5 | 215/ 5600 batches | test loss 0.4810662 +| epoch 5 | 219/ 5600 batches | test loss 0.4275258 +| epoch 5 | 223/ 5600 batches | test loss 0.4339406 +| epoch 5 | 227/ 5600 batches | test loss 0.4894040 +| epoch 5 | 231/ 5600 batches | test loss 0.5002899 +| epoch 5 | 235/ 5600 batches | test loss 0.4187146 +| epoch 5 | 239/ 5600 batches | test loss 0.4600401 +| epoch 5 | 243/ 5600 batches | test loss 0.4337175 +| epoch 5 | 247/ 5600 batches | test loss 0.3918026 +| epoch 5 | 251/ 5600 batches | test loss 0.4504704 +| epoch 5 | 255/ 5600 batches | test loss 0.4963732 +| epoch 5 | 259/ 5600 batches | test loss 0.4242139 +| epoch 5 | 263/ 5600 batches | test loss 0.4743435 +| epoch 5 | 267/ 5600 batches | test loss 0.4106200 +| epoch 5 | 271/ 5600 batches | test loss 0.4276989 +| epoch 5 | 275/ 5600 batches | test loss 0.3545985 +| epoch 5 | 279/ 5600 batches | test loss 0.4966325 +| epoch 5 | 283/ 5600 batches | test loss 0.6051002 +| epoch 5 | 287/ 5600 batches | test loss 0.3664069 +| epoch 5 | 291/ 5600 batches | test loss 0.4624887 +| epoch 5 | 295/ 5600 batches | test loss 0.4571924 +| epoch 5 | 299/ 5600 batches | test loss 0.4223025 +| epoch 5 | 303/ 5600 batches | test loss 0.4358616 +| epoch 5 | 307/ 5600 batches | test loss 0.3995770 +| epoch 5 | 311/ 5600 batches | test loss 0.4468054 +| epoch 5 | 315/ 5600 batches | test loss 0.4155628 +| epoch 5 | 319/ 5600 batches | test loss 0.4368556 +| epoch 5 | 323/ 5600 batches | test loss 0.4139362 +| epoch 5 | 327/ 5600 batches | test loss 0.5606782 +| epoch 5 | 331/ 5600 batches | test loss 0.3886312 +| epoch 5 | 335/ 5600 batches | test loss 0.4064584 +| epoch 5 | 339/ 5600 batches | test loss 0.3708195 +| epoch 5 | 343/ 5600 batches | test loss 0.4147019 +| epoch 5 | 347/ 5600 batches | test loss 0.4947011 +| epoch 5 | 351/ 5600 batches | test loss 0.3441454 +| epoch 5 | 355/ 5600 batches | test loss 0.4227423 +| epoch 5 | 359/ 5600 batches | test loss 0.4169545 +| epoch 5 | 363/ 5600 batches | test loss 0.3432196 +| epoch 5 | 367/ 5600 batches | test loss 0.4558545 +| epoch 5 | 371/ 5600 batches | test loss 0.4599900 +| epoch 5 | 375/ 5600 batches | test loss 0.3980892 +| epoch 5 | 379/ 5600 batches | test loss 0.4563307 +| epoch 5 | 383/ 5600 batches | test loss 0.4278896 +| epoch 5 | 387/ 5600 batches | test loss 0.3837062 +| epoch 5 | 391/ 5600 batches | test loss 0.4145026 +| epoch 5 | 395/ 5600 batches | test loss 0.4061421 +| epoch 5 | 399/ 5600 batches | test loss 0.5286212 +| epoch 5 | 403/ 5600 batches | test loss 0.5580668 +| epoch 5 | 407/ 5600 batches | test loss 0.3120936 +| epoch 5 | 411/ 5600 batches | test loss 0.4417042 +| epoch 5 | 415/ 5600 batches | test loss 0.3995584 +| epoch 5 | 419/ 5600 batches | test loss 0.3554662 +| epoch 5 | 423/ 5600 batches | test loss 0.3939916 +| epoch 5 | 427/ 5600 batches | test loss 0.4152537 +| epoch 5 | 431/ 5600 batches | test loss 0.4580677 +| epoch 5 | 435/ 5600 batches | test loss 0.3564485 +| epoch 5 | 439/ 5600 batches | test loss 0.4399509 +| epoch 5 | 443/ 5600 batches | test loss 0.4724635 +| epoch 5 | 447/ 5600 batches | test loss 0.4578151 +| epoch 5 | 451/ 5600 batches | test loss 0.4530690 +| epoch 5 | 455/ 5600 batches | test loss 0.5359700 +| epoch 5 | 459/ 5600 batches | test loss 0.4596974 +| epoch 5 | 463/ 5600 batches | test loss 0.4265253 +| epoch 5 | 467/ 5600 batches | test loss 0.2882422 +| epoch 5 | 471/ 5600 batches | test loss 0.3961086 +| epoch 5 | 475/ 5600 batches | test loss 0.4426219 +| epoch 5 | 479/ 5600 batches | test loss 0.4652894 +| epoch 5 | 483/ 5600 batches | test loss 0.3566923 +| epoch 5 | 487/ 5600 batches | test loss 0.3475550 +| epoch 5 | 491/ 5600 batches | test loss 0.4291379 +| epoch 5 | 495/ 5600 batches | test loss 0.3659766 +| epoch 5 | 499/ 5600 batches | test loss 0.5088285 +| epoch 5 | 503/ 5600 batches | test loss 0.5136436 +| epoch 5 | 507/ 5600 batches | test loss 0.4040512 +| epoch 5 | 511/ 5600 batches | test loss 0.3645351 +| epoch 5 | 515/ 5600 batches | test loss 0.4577189 +| epoch 5 | 519/ 5600 batches | test loss 0.4364005 +| epoch 5 | 523/ 5600 batches | test loss 0.4159076 +| epoch 5 | 527/ 5600 batches | test loss 0.4931741 +| epoch 5 | 531/ 5600 batches | test loss 0.4057918 +| epoch 5 | 535/ 5600 batches | test loss 0.2877041 +| epoch 5 | 539/ 5600 batches | test loss 0.4694653 +| epoch 5 | 543/ 5600 batches | test loss 0.5408408 +| epoch 5 | 547/ 5600 batches | test loss 0.4128378 +| epoch 5 | 551/ 5600 batches | test loss 0.4770301 +| epoch 5 | 555/ 5600 batches | test loss 0.1663433 +| epoch 5 | 559/ 5600 batches | test loss 0.4895798 +| epoch 5 | 563/ 5600 batches | test loss 0.4308831 +| epoch 5 | 567/ 5600 batches | test loss 0.4415000 +| epoch 5 | 571/ 5600 batches | test loss 0.4422673 +| epoch 5 | 575/ 5600 batches | test loss 0.3601894 +| epoch 5 | 579/ 5600 batches | test loss 0.4487756 +| epoch 5 | 583/ 5600 batches | test loss 0.3872101 +| epoch 5 | 587/ 5600 batches | test loss 0.4192527 +| epoch 5 | 591/ 5600 batches | test loss 0.3465961 +| epoch 5 | 595/ 5600 batches | test loss 0.4731351 +| epoch 5 | 599/ 5600 batches | test loss 0.4360987 +| epoch 5 | 603/ 5600 batches | test loss 0.3573496 +| epoch 5 | 607/ 5600 batches | test loss 0.4543169 +| epoch 5 | 611/ 5600 batches | test loss 0.3915879 +| epoch 5 | 615/ 5600 batches | test loss 0.4017861 +| epoch 5 | 619/ 5600 batches | test loss 0.4493183 +| epoch 5 | 623/ 5600 batches | test loss 0.4603925 +| epoch 5 | 627/ 5600 batches | test loss 0.3734788 +| epoch 5 | 631/ 5600 batches | test loss 0.4034488 +| epoch 5 | 635/ 5600 batches | test loss 0.4255802 +| epoch 5 | 639/ 5600 batches | test loss 0.4680310 +| epoch 5 | 643/ 5600 batches | test loss 0.4504498 +| epoch 5 | 647/ 5600 batches | test loss 0.4395382 +| epoch 5 | 651/ 5600 batches | test loss 0.5158397 +| epoch 5 | 655/ 5600 batches | test loss 0.4654530 +| epoch 5 | 659/ 5600 batches | test loss 0.5037616 +| epoch 5 | 663/ 5600 batches | test loss 0.4428444 +| epoch 5 | 667/ 5600 batches | test loss 0.4802816 +| epoch 5 | 671/ 5600 batches | test loss 0.3468114 +| epoch 5 | 675/ 5600 batches | test loss 0.4933598 +| epoch 5 | 679/ 5600 batches | test loss 0.4129284 +| epoch 5 | 683/ 5600 batches | test loss 0.4984621 +| epoch 5 | 687/ 5600 batches | test loss 0.4229008 +| epoch 5 | 691/ 5600 batches | test loss 0.4021144 +| epoch 5 | 695/ 5600 batches | test loss 0.4342326 +| epoch 5 | 699/ 5600 batches | test loss 0.4106691 +| epoch 5 | 703/ 5600 batches | test loss 0.5075251 +| epoch 5 | 707/ 5600 batches | test loss 0.5006577 +| epoch 5 | 711/ 5600 batches | test loss 0.4658189 +| epoch 5 | 715/ 5600 batches | test loss 0.5027335 +| epoch 5 | 719/ 5600 batches | test loss 0.4960717 +| epoch 5 | 723/ 5600 batches | test loss 0.4231354 +| epoch 5 | 727/ 5600 batches | test loss 0.3936213 +| epoch 5 | 731/ 5600 batches | test loss 0.4090811 +| epoch 5 | 735/ 5600 batches | test loss 0.7520743 +| epoch 5 | 739/ 5600 batches | test loss 0.3696815 +| epoch 5 | 743/ 5600 batches | test loss 0.3571427 +| epoch 5 | 747/ 5600 batches | test loss 0.4762009 +| epoch 5 | 751/ 5600 batches | test loss 0.3710055 +| epoch 5 | 755/ 5600 batches | test loss 0.4133495 +| epoch 5 | 759/ 5600 batches | test loss 0.3975253 +| epoch 5 | 763/ 5600 batches | test loss 0.3411023 +| epoch 5 | 767/ 5600 batches | test loss 0.4501663 +| epoch 5 | 771/ 5600 batches | test loss 0.3907761 +| epoch 5 | 775/ 5600 batches | test loss 0.4212821 +| epoch 5 | 779/ 5600 batches | test loss 0.4647480 +| epoch 5 | 783/ 5600 batches | test loss 0.5075072 +| epoch 5 | 787/ 5600 batches | test loss 0.4942481 +| epoch 5 | 791/ 5600 batches | test loss 0.4281486 +| epoch 5 | 795/ 5600 batches | test loss 0.4084731 +| epoch 5 | 799/ 5600 batches | test loss 0.4724599 +| epoch 5 | 803/ 5600 batches | test loss 0.4984832 +| epoch 5 | 807/ 5600 batches | test loss 0.4999532 +| epoch 5 | 811/ 5600 batches | test loss 0.3539804 +| epoch 5 | 815/ 5600 batches | test loss 0.3935294 +| epoch 5 | 819/ 5600 batches | test loss 0.3684913 +| epoch 5 | 823/ 5600 batches | test loss 0.4345988 +| epoch 5 | 827/ 5600 batches | test loss 0.3798161 +| epoch 5 | 831/ 5600 batches | test loss 0.3614841 +| epoch 5 | 835/ 5600 batches | test loss 0.3457825 +| epoch 5 | 839/ 5600 batches | test loss 0.4383759 +| epoch 5 | 843/ 5600 batches | test loss 0.4454883 +| epoch 5 | 847/ 5600 batches | test loss 0.4184856 +| epoch 5 | 851/ 5600 batches | test loss 0.3213665 +| epoch 5 | 855/ 5600 batches | test loss 0.4713098 +| epoch 5 | 859/ 5600 batches | test loss 0.3898750 +| epoch 5 | 863/ 5600 batches | test loss 0.4268534 +| epoch 5 | 867/ 5600 batches | test loss 0.5239422 +| epoch 5 | 871/ 5600 batches | test loss 0.3923626 +| epoch 5 | 875/ 5600 batches | test loss 0.4209269 +| epoch 5 | 879/ 5600 batches | test loss 0.4219227 +| epoch 5 | 883/ 5600 batches | test loss 0.3654838 +| epoch 5 | 887/ 5600 batches | test loss 0.4504934 +| epoch 5 | 891/ 5600 batches | test loss 0.4166380 +| epoch 5 | 895/ 5600 batches | test loss 0.3474753 +| epoch 5 | 899/ 5600 batches | test loss 0.4047734 +| epoch 5 | 903/ 5600 batches | test loss 0.4041077 +| epoch 5 | 907/ 5600 batches | test loss 0.3929742 +| epoch 5 | 911/ 5600 batches | test loss 0.3395898 +| epoch 5 | 915/ 5600 batches | test loss 0.4022650 +| epoch 5 | 919/ 5600 batches | test loss 0.4681832 +| epoch 5 | 923/ 5600 batches | test loss 0.4805526 +| epoch 5 | 927/ 5600 batches | test loss 0.4396124 +| epoch 5 | 931/ 5600 batches | test loss 0.3353044 +| epoch 5 | 935/ 5600 batches | test loss 0.4855708 +| epoch 5 | 939/ 5600 batches | test loss 0.4388098 +| epoch 5 | 943/ 5600 batches | test loss 0.4101469 +| epoch 5 | 947/ 5600 batches | test loss 0.3985168 +| epoch 5 | 951/ 5600 batches | test loss 0.4537066 +| epoch 5 | 955/ 5600 batches | test loss 0.3841612 +| epoch 5 | 959/ 5600 batches | test loss 0.4492305 +| epoch 5 | 963/ 5600 batches | test loss 0.3867095 +| epoch 5 | 967/ 5600 batches | test loss 0.4432675 +| epoch 5 | 971/ 5600 batches | test loss 0.4391317 +| epoch 5 | 975/ 5600 batches | test loss 0.5329745 +| epoch 5 | 979/ 5600 batches | test loss 0.4178655 +| epoch 5 | 983/ 5600 batches | test loss 0.3975604 +| epoch 5 | 987/ 5600 batches | test loss 0.4157867 +| epoch 5 | 991/ 5600 batches | test loss 0.4308296 +| epoch 5 | 995/ 5600 batches | test loss 0.3337757 +| epoch 5 | 999/ 5600 batches | test loss 0.4258027 +| epoch 5 | 1003/ 5600 batches | test loss 0.4559831 +| epoch 5 | 1007/ 5600 batches | test loss 0.4387062 +| epoch 5 | 1011/ 5600 batches | test loss 0.4128515 +| epoch 5 | 1015/ 5600 batches | test loss 0.3887491 +| epoch 5 | 1019/ 5600 batches | test loss 0.4856378 +| epoch 5 | 1023/ 5600 batches | test loss 0.4731106 +| epoch 5 | 1027/ 5600 batches | test loss 0.3475404 +| epoch 5 | 1031/ 5600 batches | test loss 0.4942232 +| epoch 5 | 1035/ 5600 batches | test loss 0.4140222 +| epoch 5 | 1039/ 5600 batches | test loss 0.4413294 +| epoch 5 | 1043/ 5600 batches | test loss 0.5242087 +| epoch 5 | 1047/ 5600 batches | test loss 0.5031160 +| epoch 5 | 1051/ 5600 batches | test loss 0.4560868 +| epoch 5 | 1055/ 5600 batches | test loss 0.2704020 +| epoch 5 | 1059/ 5600 batches | test loss 0.3783350 +| epoch 5 | 1063/ 5600 batches | test loss 0.4905600 +| epoch 5 | 1067/ 5600 batches | test loss 0.4182202 +| epoch 5 | 1071/ 5600 batches | test loss 0.4016751 +| epoch 5 | 1075/ 5600 batches | test loss 0.5204610 +| epoch 5 | 1079/ 5600 batches | test loss 0.4610468 +| epoch 5 | 1083/ 5600 batches | test loss 0.4512166 +| epoch 5 | 1087/ 5600 batches | test loss 0.4726901 +| epoch 5 | 1091/ 5600 batches | test loss 0.2145012 +| epoch 5 | 1095/ 5600 batches | test loss 0.4708194 +| epoch 5 | 1099/ 5600 batches | test loss 0.3804359 +| epoch 5 | 1103/ 5600 batches | test loss 0.5322043 +| epoch 5 | 1107/ 5600 batches | test loss 0.4776164 +| epoch 5 | 1111/ 5600 batches | test loss 0.4152372 +| epoch 5 | 1115/ 5600 batches | test loss 0.3630280 +| epoch 5 | 1119/ 5600 batches | test loss 0.4385124 +| epoch 5 | 1123/ 5600 batches | test loss 0.1864759 +| epoch 5 | 1127/ 5600 batches | test loss 0.4165710 +| epoch 5 | 1131/ 5600 batches | test loss 0.4051459 +| epoch 5 | 1135/ 5600 batches | test loss 0.3955809 +| epoch 5 | 1139/ 5600 batches | test loss 0.3682980 +| epoch 5 | 1143/ 5600 batches | test loss 0.3817949 +| epoch 5 | 1147/ 5600 batches | test loss 0.5503624 +| epoch 5 | 1151/ 5600 batches | test loss 0.3522958 +| epoch 5 | 1155/ 5600 batches | test loss 0.5556246 +| epoch 5 | 1159/ 5600 batches | test loss 0.4367905 +| epoch 5 | 1163/ 5600 batches | test loss 0.4719030 +| epoch 5 | 1167/ 5600 batches | test loss 0.4934591 +| epoch 5 | 1171/ 5600 batches | test loss 0.4100297 +| epoch 5 | 1175/ 5600 batches | test loss 0.4484338 +| epoch 5 | 1179/ 5600 batches | test loss 0.4236366 +| epoch 5 | 1183/ 5600 batches | test loss 0.3756547 +| epoch 5 | 1187/ 5600 batches | test loss 0.4083785 +| epoch 5 | 1191/ 5600 batches | test loss 0.4839619 +| epoch 5 | 1195/ 5600 batches | test loss 0.3259977 +| epoch 5 | 1199/ 5600 batches | test loss 0.5049109 +| epoch 5 | 1203/ 5600 batches | test loss 0.4543632 +| epoch 5 | 1207/ 5600 batches | test loss 0.4296085 +| epoch 5 | 1211/ 5600 batches | test loss 0.3681513 +| epoch 5 | 1215/ 5600 batches | test loss 0.4633802 +| epoch 5 | 1219/ 5600 batches | test loss 0.3501811 +| epoch 5 | 1223/ 5600 batches | test loss 0.3236622 +| epoch 5 | 1227/ 5600 batches | test loss 0.4465825 +| epoch 5 | 1231/ 5600 batches | test loss 0.3550308 +| epoch 5 | 1235/ 5600 batches | test loss 0.4187926 +| epoch 5 | 1239/ 5600 batches | test loss 0.4777430 +| epoch 5 | 1243/ 5600 batches | test loss 0.6479402 +| epoch 5 | 1247/ 5600 batches | test loss 0.3239281 +| epoch 5 | 1251/ 5600 batches | test loss 0.3579199 +| epoch 5 | 1255/ 5600 batches | test loss 0.4454825 +| epoch 5 | 1259/ 5600 batches | test loss 0.5224768 +| epoch 5 | 1263/ 5600 batches | test loss 0.4199997 +| epoch 5 | 1267/ 5600 batches | test loss 0.4025059 +| epoch 5 | 1271/ 5600 batches | test loss 0.3521645 +| epoch 5 | 1275/ 5600 batches | test loss 0.5334033 +| epoch 5 | 1279/ 5600 batches | test loss 0.1667665 +| epoch 5 | 1283/ 5600 batches | test loss 0.4834578 +| epoch 5 | 1287/ 5600 batches | test loss 0.6401087 +| epoch 5 | 1291/ 5600 batches | test loss 0.4024698 +| epoch 5 | 1295/ 5600 batches | test loss 0.4159910 +| epoch 5 | 1299/ 5600 batches | test loss 0.3711209 +| epoch 5 | 1303/ 5600 batches | test loss 0.3973256 +| epoch 5 | 1307/ 5600 batches | test loss 0.4178764 +| epoch 5 | 1311/ 5600 batches | test loss 0.4317400 +| epoch 5 | 1315/ 5600 batches | test loss 0.4250206 +| epoch 5 | 1319/ 5600 batches | test loss 0.5007309 +| epoch 5 | 1323/ 5600 batches | test loss 0.4568453 +| epoch 5 | 1327/ 5600 batches | test loss 0.5183586 +| epoch 5 | 1331/ 5600 batches | test loss 0.3782868 +| epoch 5 | 1335/ 5600 batches | test loss 0.3633727 +| epoch 5 | 1339/ 5600 batches | test loss 0.4959695 +| epoch 5 | 1343/ 5600 batches | test loss 0.4100403 +| epoch 5 | 1347/ 5600 batches | test loss 0.4060287 +| epoch 5 | 1351/ 5600 batches | test loss 0.4881102 +| epoch 5 | 1355/ 5600 batches | test loss 0.4611826 +| epoch 5 | 1359/ 5600 batches | test loss 0.3596517 +| epoch 5 | 1363/ 5600 batches | test loss 0.4748805 +| epoch 5 | 1367/ 5600 batches | test loss 0.4293925 +| epoch 5 | 1371/ 5600 batches | test loss 0.4232463 +| epoch 5 | 1375/ 5600 batches | test loss 0.5052311 +| epoch 5 | 1379/ 5600 batches | test loss 0.4304383 +| epoch 5 | 1383/ 5600 batches | test loss 0.2991152 +| epoch 5 | 1387/ 5600 batches | test loss 0.3893431 +| epoch 5 | 1391/ 5600 batches | test loss 0.4060446 +| epoch 5 | 1395/ 5600 batches | test loss 0.4571560 +| epoch 5 | 1399/ 5600 batches | test loss 0.4404915 +| epoch 5 | final test loss 0.4321, save model! +-------------------------------------------------------------------------------- +| epoch 6 | 3/ 5600 batches | train loss 0.3232036 +| epoch 6 | 7/ 5600 batches | train loss 0.3270247 +| epoch 6 | 11/ 5600 batches | train loss 0.3647864 +| epoch 6 | 15/ 5600 batches | train loss 0.4186231 +| epoch 6 | 19/ 5600 batches | train loss 0.3638395 +| epoch 6 | 23/ 5600 batches | train loss 0.4112357 +| epoch 6 | 27/ 5600 batches | train loss 0.4106248 +| epoch 6 | 31/ 5600 batches | train loss 0.4574719 +| epoch 6 | 35/ 5600 batches | train loss 0.2510920 +| epoch 6 | 39/ 5600 batches | train loss 0.3940569 +| epoch 6 | 43/ 5600 batches | train loss 0.3863373 +| epoch 6 | 47/ 5600 batches | train loss 0.4404146 +| epoch 6 | 51/ 5600 batches | train loss 0.4129805 +| epoch 6 | 55/ 5600 batches | train loss 0.3495730 +| epoch 6 | 59/ 5600 batches | train loss 0.3738448 +| epoch 6 | 63/ 5600 batches | train loss 0.4224747 +| epoch 6 | 67/ 5600 batches | train loss 0.3581990 +| epoch 6 | 71/ 5600 batches | train loss 0.4008843 +| epoch 6 | 75/ 5600 batches | train loss 0.4252041 +| epoch 6 | 79/ 5600 batches | train loss 0.4140705 +| epoch 6 | 83/ 5600 batches | train loss 0.4500617 +| epoch 6 | 87/ 5600 batches | train loss 0.3379859 +| epoch 6 | 91/ 5600 batches | train loss 0.3776900 +| epoch 6 | 95/ 5600 batches | train loss 0.3143407 +| epoch 6 | 99/ 5600 batches | train loss 0.3286626 +| epoch 6 | 103/ 5600 batches | train loss 0.4733856 +| epoch 6 | 107/ 5600 batches | train loss 0.3342017 +| epoch 6 | 111/ 5600 batches | train loss 0.3626714 +| epoch 6 | 115/ 5600 batches | train loss 0.4061559 +| epoch 6 | 119/ 5600 batches | train loss 0.4229802 +| epoch 6 | 123/ 5600 batches | train loss 0.3467297 +| epoch 6 | 127/ 5600 batches | train loss 0.3911031 +| epoch 6 | 131/ 5600 batches | train loss 0.4014050 +| epoch 6 | 135/ 5600 batches | train loss 0.3363125 +| epoch 6 | 139/ 5600 batches | train loss 0.3561754 +| epoch 6 | 143/ 5600 batches | train loss 0.4479951 +| epoch 6 | 147/ 5600 batches | train loss 0.3478334 +| epoch 6 | 151/ 5600 batches | train loss 0.3569427 +| epoch 6 | 155/ 5600 batches | train loss 0.4385589 +| epoch 6 | 159/ 5600 batches | train loss 0.3895899 +| epoch 6 | 163/ 5600 batches | train loss 0.3966790 +| epoch 6 | 167/ 5600 batches | train loss 0.3806143 +| epoch 6 | 171/ 5600 batches | train loss 0.3649663 +| epoch 6 | 175/ 5600 batches | train loss 0.3879836 +| epoch 6 | 179/ 5600 batches | train loss 0.4412057 +| epoch 6 | 183/ 5600 batches | train loss 0.3434464 +| epoch 6 | 187/ 5600 batches | train loss 0.3840152 +| epoch 6 | 191/ 5600 batches | train loss 0.3633503 +| epoch 6 | 195/ 5600 batches | train loss 0.3837941 +| epoch 6 | 199/ 5600 batches | train loss 0.3187811 +| epoch 6 | 203/ 5600 batches | train loss 0.3842685 +| epoch 6 | 207/ 5600 batches | train loss 0.4057103 +| epoch 6 | 211/ 5600 batches | train loss 0.3432241 +| epoch 6 | 215/ 5600 batches | train loss 0.3107156 +| epoch 6 | 219/ 5600 batches | train loss 0.4097081 +| epoch 6 | 223/ 5600 batches | train loss 0.4453284 +| epoch 6 | 227/ 5600 batches | train loss 0.3768070 +| epoch 6 | 231/ 5600 batches | train loss 0.3252652 +| epoch 6 | 235/ 5600 batches | train loss 0.4070016 +| epoch 6 | 239/ 5600 batches | train loss 0.3857098 +| epoch 6 | 243/ 5600 batches | train loss 0.3998513 +| epoch 6 | 247/ 5600 batches | train loss 0.4728460 +| epoch 6 | 251/ 5600 batches | train loss 0.3531942 +| epoch 6 | 255/ 5600 batches | train loss 0.4034077 +| epoch 6 | 259/ 5600 batches | train loss 0.3739231 +| epoch 6 | 263/ 5600 batches | train loss 0.4062814 +| epoch 6 | 267/ 5600 batches | train loss 0.3894770 +| epoch 6 | 271/ 5600 batches | train loss 0.4079762 +| epoch 6 | 275/ 5600 batches | train loss 0.4353336 +| epoch 6 | 279/ 5600 batches | train loss 0.3449706 +| epoch 6 | 283/ 5600 batches | train loss 0.4144973 +| epoch 6 | 287/ 5600 batches | train loss 0.3492729 +| epoch 6 | 291/ 5600 batches | train loss 0.3508941 +| epoch 6 | 295/ 5600 batches | train loss 0.3951106 +| epoch 6 | 299/ 5600 batches | train loss 0.3859638 +| epoch 6 | 303/ 5600 batches | train loss 0.3752259 +| epoch 6 | 307/ 5600 batches | train loss 0.4016215 +| epoch 6 | 311/ 5600 batches | train loss 0.4273471 +| epoch 6 | 315/ 5600 batches | train loss 0.3852930 +| epoch 6 | 319/ 5600 batches | train loss 0.3676698 +| epoch 6 | 323/ 5600 batches | train loss 0.3319322 +| epoch 6 | 327/ 5600 batches | train loss 0.3526459 +| epoch 6 | 331/ 5600 batches | train loss 0.4174361 +| epoch 6 | 335/ 5600 batches | train loss 0.3766438 +| epoch 6 | 339/ 5600 batches | train loss 0.4090820 +| epoch 6 | 343/ 5600 batches | train loss 0.4224954 +| epoch 6 | 347/ 5600 batches | train loss 0.4237244 +| epoch 6 | 351/ 5600 batches | train loss 0.4011912 +| epoch 6 | 355/ 5600 batches | train loss 0.2796975 +| epoch 6 | 359/ 5600 batches | train loss 0.3397644 +| epoch 6 | 363/ 5600 batches | train loss 0.3126338 +| epoch 6 | 367/ 5600 batches | train loss 0.3511983 +| epoch 6 | 371/ 5600 batches | train loss 0.3742005 +| epoch 6 | 375/ 5600 batches | train loss 0.4809584 +| epoch 6 | 379/ 5600 batches | train loss 0.3095811 +| epoch 6 | 383/ 5600 batches | train loss 0.3524841 +| epoch 6 | 387/ 5600 batches | train loss 0.3140589 +| epoch 6 | 391/ 5600 batches | train loss 0.4081736 +| epoch 6 | 395/ 5600 batches | train loss 0.3799177 +| epoch 6 | 399/ 5600 batches | train loss 0.4264156 +| epoch 6 | 403/ 5600 batches | train loss 0.3931733 +| epoch 6 | 407/ 5600 batches | train loss 0.3625249 +| epoch 6 | 411/ 5600 batches | train loss 0.4019867 +| epoch 6 | 415/ 5600 batches | train loss 0.2990233 +| epoch 6 | 419/ 5600 batches | train loss 0.3790292 +| epoch 6 | 423/ 5600 batches | train loss 0.4091600 +| epoch 6 | 427/ 5600 batches | train loss 0.3500327 +| epoch 6 | 431/ 5600 batches | train loss 0.4559293 +| epoch 6 | 435/ 5600 batches | train loss 0.4514709 +| epoch 6 | 439/ 5600 batches | train loss 0.3622116 +| epoch 6 | 443/ 5600 batches | train loss 0.4082762 +| epoch 6 | 447/ 5600 batches | train loss 0.3519127 +| epoch 6 | 451/ 5600 batches | train loss 0.3744155 +| epoch 6 | 455/ 5600 batches | train loss 0.3651093 +| epoch 6 | 459/ 5600 batches | train loss 0.3717067 +| epoch 6 | 463/ 5600 batches | train loss 0.3860195 +| epoch 6 | 467/ 5600 batches | train loss 0.3665530 +| epoch 6 | 471/ 5600 batches | train loss 0.3826524 +| epoch 6 | 475/ 5600 batches | train loss 0.3415653 +| epoch 6 | 479/ 5600 batches | train loss 0.3941586 +| epoch 6 | 483/ 5600 batches | train loss 0.3624393 +| epoch 6 | 487/ 5600 batches | train loss 0.4006034 +| epoch 6 | 491/ 5600 batches | train loss 0.3532748 +| epoch 6 | 495/ 5600 batches | train loss 0.3334838 +| epoch 6 | 499/ 5600 batches | train loss 0.3716389 +| epoch 6 | 503/ 5600 batches | train loss 0.3916160 +| epoch 6 | 507/ 5600 batches | train loss 0.3587625 +| epoch 6 | 511/ 5600 batches | train loss 0.3984569 +| epoch 6 | 515/ 5600 batches | train loss 0.4216271 +| epoch 6 | 519/ 5600 batches | train loss 0.4020286 +| epoch 6 | 523/ 5600 batches | train loss 0.3739561 +| epoch 6 | 527/ 5600 batches | train loss 0.3615899 +| epoch 6 | 531/ 5600 batches | train loss 0.3420352 +| epoch 6 | 535/ 5600 batches | train loss 0.3748094 +| epoch 6 | 539/ 5600 batches | train loss 0.4142705 +| epoch 6 | 543/ 5600 batches | train loss 0.4032844 +| epoch 6 | 547/ 5600 batches | train loss 0.3875235 +| epoch 6 | 551/ 5600 batches | train loss 0.4192739 +| epoch 6 | 555/ 5600 batches | train loss 0.3302968 +| epoch 6 | 559/ 5600 batches | train loss 0.4752101 +| epoch 6 | 563/ 5600 batches | train loss 0.3285763 +| epoch 6 | 567/ 5600 batches | train loss 0.3836772 +| epoch 6 | 571/ 5600 batches | train loss 0.3946120 +| epoch 6 | 575/ 5600 batches | train loss 0.4163927 +| epoch 6 | 579/ 5600 batches | train loss 0.3839303 +| epoch 6 | 583/ 5600 batches | train loss 0.3583331 +| epoch 6 | 587/ 5600 batches | train loss 0.2437424 +| epoch 6 | 591/ 5600 batches | train loss 0.4301120 +| epoch 6 | 595/ 5600 batches | train loss 0.3645816 +| epoch 6 | 599/ 5600 batches | train loss 0.3838271 +| epoch 6 | 603/ 5600 batches | train loss 0.3658571 +| epoch 6 | 607/ 5600 batches | train loss 0.3021927 +| epoch 6 | 611/ 5600 batches | train loss 0.3681527 +| epoch 6 | 615/ 5600 batches | train loss 0.3637969 +| epoch 6 | 619/ 5600 batches | train loss 0.3572501 +| epoch 6 | 623/ 5600 batches | train loss 0.4151229 +| epoch 6 | 627/ 5600 batches | train loss 0.3391696 +| epoch 6 | 631/ 5600 batches | train loss 0.3838969 +| epoch 6 | 635/ 5600 batches | train loss 0.4418935 +| epoch 6 | 639/ 5600 batches | train loss 0.3585470 +| epoch 6 | 643/ 5600 batches | train loss 0.4391394 +| epoch 6 | 647/ 5600 batches | train loss 0.3837515 +| epoch 6 | 651/ 5600 batches | train loss 0.3524522 +| epoch 6 | 655/ 5600 batches | train loss 0.3421972 +| epoch 6 | 659/ 5600 batches | train loss 0.4015621 +| epoch 6 | 663/ 5600 batches | train loss 0.4520378 +| epoch 6 | 667/ 5600 batches | train loss 0.4379396 +| epoch 6 | 671/ 5600 batches | train loss 0.3210585 +| epoch 6 | 675/ 5600 batches | train loss 0.4395045 +| epoch 6 | 679/ 5600 batches | train loss 0.3141456 +| epoch 6 | 683/ 5600 batches | train loss 0.1875941 +| epoch 6 | 687/ 5600 batches | train loss 0.3283863 +| epoch 6 | 691/ 5600 batches | train loss 0.3920528 +| epoch 6 | 695/ 5600 batches | train loss 0.3650817 +| epoch 6 | 699/ 5600 batches | train loss 0.3291473 +| epoch 6 | 703/ 5600 batches | train loss 0.3960318 +| epoch 6 | 707/ 5600 batches | train loss 0.3291047 +| epoch 6 | 711/ 5600 batches | train loss 0.4040922 +| epoch 6 | 715/ 5600 batches | train loss 0.3363737 +| epoch 6 | 719/ 5600 batches | train loss 0.4171801 +| epoch 6 | 723/ 5600 batches | train loss 0.3325736 +| epoch 6 | 727/ 5600 batches | train loss 0.3393897 +| epoch 6 | 731/ 5600 batches | train loss 0.3875498 +| epoch 6 | 735/ 5600 batches | train loss 0.3325271 +| epoch 6 | 739/ 5600 batches | train loss 0.3279513 +| epoch 6 | 743/ 5600 batches | train loss 0.3677976 +| epoch 6 | 747/ 5600 batches | train loss 0.3864163 +| epoch 6 | 751/ 5600 batches | train loss 0.3897223 +| epoch 6 | 755/ 5600 batches | train loss 0.5024320 +| epoch 6 | 759/ 5600 batches | train loss 0.4110387 +| epoch 6 | 763/ 5600 batches | train loss 0.3994540 +| epoch 6 | 767/ 5600 batches | train loss 0.3553213 +| epoch 6 | 771/ 5600 batches | train loss 0.3693831 +| epoch 6 | 775/ 5600 batches | train loss 0.3459608 +| epoch 6 | 779/ 5600 batches | train loss 0.4778358 +| epoch 6 | 783/ 5600 batches | train loss 0.4023807 +| epoch 6 | 787/ 5600 batches | train loss 0.4833766 +| epoch 6 | 791/ 5600 batches | train loss 0.4046915 +| epoch 6 | 795/ 5600 batches | train loss 0.3832313 +| epoch 6 | 799/ 5600 batches | train loss 0.4266893 +| epoch 6 | 803/ 5600 batches | train loss 0.3915057 +| epoch 6 | 807/ 5600 batches | train loss 0.3513551 +| epoch 6 | 811/ 5600 batches | train loss 0.4051485 +| epoch 6 | 815/ 5600 batches | train loss 0.4550240 +| epoch 6 | 819/ 5600 batches | train loss 0.3188364 +| epoch 6 | 823/ 5600 batches | train loss 0.3926781 +| epoch 6 | 827/ 5600 batches | train loss 0.4109510 +| epoch 6 | 831/ 5600 batches | train loss 0.4116862 +| epoch 6 | 835/ 5600 batches | train loss 0.3205615 +| epoch 6 | 839/ 5600 batches | train loss 0.4840069 +| epoch 6 | 843/ 5600 batches | train loss 0.3207687 +| epoch 6 | 847/ 5600 batches | train loss 0.3618652 +| epoch 6 | 851/ 5600 batches | train loss 0.3480560 +| epoch 6 | 855/ 5600 batches | train loss 0.4097078 +| epoch 6 | 859/ 5600 batches | train loss 0.4319345 +| epoch 6 | 863/ 5600 batches | train loss 0.3879349 +| epoch 6 | 867/ 5600 batches | train loss 0.3224410 +| epoch 6 | 871/ 5600 batches | train loss 0.4116647 +| epoch 6 | 875/ 5600 batches | train loss 0.3458960 +| epoch 6 | 879/ 5600 batches | train loss 0.4074676 +| epoch 6 | 883/ 5600 batches | train loss 0.4231678 +| epoch 6 | 887/ 5600 batches | train loss 0.4259369 +| epoch 6 | 891/ 5600 batches | train loss 0.3566475 +| epoch 6 | 895/ 5600 batches | train loss 0.1543114 +| epoch 6 | 899/ 5600 batches | train loss 0.4514413 +| epoch 6 | 903/ 5600 batches | train loss 0.3622901 +| epoch 6 | 907/ 5600 batches | train loss 0.4252608 +| epoch 6 | 911/ 5600 batches | train loss 0.4326707 +| epoch 6 | 915/ 5600 batches | train loss 0.4370075 +| epoch 6 | 919/ 5600 batches | train loss 0.3788691 +| epoch 6 | 923/ 5600 batches | train loss 0.3208897 +| epoch 6 | 927/ 5600 batches | train loss 0.3434991 +| epoch 6 | 931/ 5600 batches | train loss 0.4436933 +| epoch 6 | 935/ 5600 batches | train loss 0.3641716 +| epoch 6 | 939/ 5600 batches | train loss 0.3511337 +| epoch 6 | 943/ 5600 batches | train loss 0.3904622 +| epoch 6 | 947/ 5600 batches | train loss 0.3675384 +| epoch 6 | 951/ 5600 batches | train loss 0.3421926 +| epoch 6 | 955/ 5600 batches | train loss 0.3883262 +| epoch 6 | 959/ 5600 batches | train loss 0.3013642 +| epoch 6 | 963/ 5600 batches | train loss 0.3408728 +| epoch 6 | 967/ 5600 batches | train loss 0.3573128 +| epoch 6 | 971/ 5600 batches | train loss 0.4449192 +| epoch 6 | 975/ 5600 batches | train loss 0.4219003 +| epoch 6 | 979/ 5600 batches | train loss 0.3641447 +| epoch 6 | 983/ 5600 batches | train loss 0.3643998 +| epoch 6 | 987/ 5600 batches | train loss 0.3053450 +| epoch 6 | 991/ 5600 batches | train loss 0.3802582 +| epoch 6 | 995/ 5600 batches | train loss 0.4031162 +| epoch 6 | 999/ 5600 batches | train loss 0.3810331 +| epoch 6 | 1003/ 5600 batches | train loss 0.3757188 +| epoch 6 | 1007/ 5600 batches | train loss 0.4669299 +| epoch 6 | 1011/ 5600 batches | train loss 0.3555448 +| epoch 6 | 1015/ 5600 batches | train loss 0.3646764 +| epoch 6 | 1019/ 5600 batches | train loss 0.3390593 +| epoch 6 | 1023/ 5600 batches | train loss 0.4857724 +| epoch 6 | 1027/ 5600 batches | train loss 0.4003369 +| epoch 6 | 1031/ 5600 batches | train loss 0.4629046 +| epoch 6 | 1035/ 5600 batches | train loss 0.3268340 +| epoch 6 | 1039/ 5600 batches | train loss 0.3903390 +| epoch 6 | 1043/ 5600 batches | train loss 0.4423463 +| epoch 6 | 1047/ 5600 batches | train loss 0.3544737 +| epoch 6 | 1051/ 5600 batches | train loss 0.4205398 +| epoch 6 | 1055/ 5600 batches | train loss 0.3890072 +| epoch 6 | 1059/ 5600 batches | train loss 0.3609558 +| epoch 6 | 1063/ 5600 batches | train loss 0.4115100 +| epoch 6 | 1067/ 5600 batches | train loss 0.4234082 +| epoch 6 | 1071/ 5600 batches | train loss 0.3761908 +| epoch 6 | 1075/ 5600 batches | train loss 0.4275818 +| epoch 6 | 1079/ 5600 batches | train loss 0.4016193 +| epoch 6 | 1083/ 5600 batches | train loss 0.3572917 +| epoch 6 | 1087/ 5600 batches | train loss 0.3941911 +| epoch 6 | 1091/ 5600 batches | train loss 0.3924628 +| epoch 6 | 1095/ 5600 batches | train loss 0.3799946 +| epoch 6 | 1099/ 5600 batches | train loss 0.4240927 +| epoch 6 | 1103/ 5600 batches | train loss 0.3661605 +| epoch 6 | 1107/ 5600 batches | train loss 0.3827843 +| epoch 6 | 1111/ 5600 batches | train loss 0.3429717 +| epoch 6 | 1115/ 5600 batches | train loss 0.3690465 +| epoch 6 | 1119/ 5600 batches | train loss 0.3753892 +| epoch 6 | 1123/ 5600 batches | train loss 0.3774691 +| epoch 6 | 1127/ 5600 batches | train loss 0.3616892 +| epoch 6 | 1131/ 5600 batches | train loss 0.3260658 +| epoch 6 | 1135/ 5600 batches | train loss 0.3666798 +| epoch 6 | 1139/ 5600 batches | train loss 0.3632072 +| epoch 6 | 1143/ 5600 batches | train loss 0.3529151 +| epoch 6 | 1147/ 5600 batches | train loss 0.4115067 +| epoch 6 | 1151/ 5600 batches | train loss 0.3935221 +| epoch 6 | 1155/ 5600 batches | train loss 0.3961173 +| epoch 6 | 1159/ 5600 batches | train loss 0.2677079 +| epoch 6 | 1163/ 5600 batches | train loss 0.3947071 +| epoch 6 | 1167/ 5600 batches | train loss 0.3553298 +| epoch 6 | 1171/ 5600 batches | train loss 0.2586291 +| epoch 6 | 1175/ 5600 batches | train loss 0.3715394 +| epoch 6 | 1179/ 5600 batches | train loss 0.4637242 +| epoch 6 | 1183/ 5600 batches | train loss 0.3621413 +| epoch 6 | 1187/ 5600 batches | train loss 0.4261153 +| epoch 6 | 1191/ 5600 batches | train loss 0.3265839 +| epoch 6 | 1195/ 5600 batches | train loss 0.4139960 +| epoch 6 | 1199/ 5600 batches | train loss 0.3956214 +| epoch 6 | 1203/ 5600 batches | train loss 0.4111803 +| epoch 6 | 1207/ 5600 batches | train loss 0.4256203 +| epoch 6 | 1211/ 5600 batches | train loss 0.4299678 +| epoch 6 | 1215/ 5600 batches | train loss 0.4287248 +| epoch 6 | 1219/ 5600 batches | train loss 0.4110371 +| epoch 6 | 1223/ 5600 batches | train loss 0.4095948 +| epoch 6 | 1227/ 5600 batches | train loss 0.4500106 +| epoch 6 | 1231/ 5600 batches | train loss 0.4250858 +| epoch 6 | 1235/ 5600 batches | train loss 0.3864195 +| epoch 6 | 1239/ 5600 batches | train loss 0.3720411 +| epoch 6 | 1243/ 5600 batches | train loss 0.4278772 +| epoch 6 | 1247/ 5600 batches | train loss 0.4595116 +| epoch 6 | 1251/ 5600 batches | train loss 0.4402825 +| epoch 6 | 1255/ 5600 batches | train loss 0.3107880 +| epoch 6 | 1259/ 5600 batches | train loss 0.4162623 +| epoch 6 | 1263/ 5600 batches | train loss 0.4566033 +| epoch 6 | 1267/ 5600 batches | train loss 0.4263456 +| epoch 6 | 1271/ 5600 batches | train loss 0.4623662 +| epoch 6 | 1275/ 5600 batches | train loss 0.3266874 +| epoch 6 | 1279/ 5600 batches | train loss 0.3337741 +| epoch 6 | 1283/ 5600 batches | train loss 0.4144777 +| epoch 6 | 1287/ 5600 batches | train loss 0.3233317 +| epoch 6 | 1291/ 5600 batches | train loss 0.3656178 +| epoch 6 | 1295/ 5600 batches | train loss 0.3866877 +| epoch 6 | 1299/ 5600 batches | train loss 0.3269746 +| epoch 6 | 1303/ 5600 batches | train loss 0.3795058 +| epoch 6 | 1307/ 5600 batches | train loss 0.4356442 +| epoch 6 | 1311/ 5600 batches | train loss 0.3003878 +| epoch 6 | 1315/ 5600 batches | train loss 0.3797918 +| epoch 6 | 1319/ 5600 batches | train loss 0.4291786 +| epoch 6 | 1323/ 5600 batches | train loss 0.3536443 +| epoch 6 | 1327/ 5600 batches | train loss 0.3406273 +| epoch 6 | 1331/ 5600 batches | train loss 0.3395122 +| epoch 6 | 1335/ 5600 batches | train loss 0.4314838 +| epoch 6 | 1339/ 5600 batches | train loss 0.3459388 +| epoch 6 | 1343/ 5600 batches | train loss 0.4365608 +| epoch 6 | 1347/ 5600 batches | train loss 0.3228804 +| epoch 6 | 1351/ 5600 batches | train loss 0.4570275 +| epoch 6 | 1355/ 5600 batches | train loss 0.3596882 +| epoch 6 | 1359/ 5600 batches | train loss 0.3811866 +| epoch 6 | 1363/ 5600 batches | train loss 0.4536469 +| epoch 6 | 1367/ 5600 batches | train loss 0.4342372 +| epoch 6 | 1371/ 5600 batches | train loss 0.3507962 +| epoch 6 | 1375/ 5600 batches | train loss 0.4409050 +| epoch 6 | 1379/ 5600 batches | train loss 0.4186163 +| epoch 6 | 1383/ 5600 batches | train loss 0.4365916 +| epoch 6 | 1387/ 5600 batches | train loss 0.3467951 +| epoch 6 | 1391/ 5600 batches | train loss 0.3870044 +| epoch 6 | 1395/ 5600 batches | train loss 0.3768343 +| epoch 6 | 1399/ 5600 batches | train loss 0.3553482 +| epoch 6 | 1403/ 5600 batches | train loss 0.3824369 +| epoch 6 | 1407/ 5600 batches | train loss 0.3815194 +| epoch 6 | 1411/ 5600 batches | train loss 0.3537730 +| epoch 6 | 1415/ 5600 batches | train loss 0.3561472 +| epoch 6 | 1419/ 5600 batches | train loss 0.3993020 +| epoch 6 | 1423/ 5600 batches | train loss 0.3804115 +| epoch 6 | 1427/ 5600 batches | train loss 0.4026434 +| epoch 6 | 1431/ 5600 batches | train loss 0.3308264 +| epoch 6 | 1435/ 5600 batches | train loss 0.3587986 +| epoch 6 | 1439/ 5600 batches | train loss 0.4010092 +| epoch 6 | 1443/ 5600 batches | train loss 0.3901817 +| epoch 6 | 1447/ 5600 batches | train loss 0.3993845 +| epoch 6 | 1451/ 5600 batches | train loss 0.4331348 +| epoch 6 | 1455/ 5600 batches | train loss 0.3409318 +| epoch 6 | 1459/ 5600 batches | train loss 0.3979833 +| epoch 6 | 1463/ 5600 batches | train loss 0.3891812 +| epoch 6 | 1467/ 5600 batches | train loss 0.3664177 +| epoch 6 | 1471/ 5600 batches | train loss 0.3544920 +| epoch 6 | 1475/ 5600 batches | train loss 0.3817963 +| epoch 6 | 1479/ 5600 batches | train loss 0.2883561 +| epoch 6 | 1483/ 5600 batches | train loss 0.4030584 +| epoch 6 | 1487/ 5600 batches | train loss 0.4248528 +| epoch 6 | 1491/ 5600 batches | train loss 0.3425981 +| epoch 6 | 1495/ 5600 batches | train loss 0.4145095 +| epoch 6 | 1499/ 5600 batches | train loss 0.3555498 +| epoch 6 | 1503/ 5600 batches | train loss 0.4385926 +| epoch 6 | 1507/ 5600 batches | train loss 0.4062625 +| epoch 6 | 1511/ 5600 batches | train loss 0.4033113 +| epoch 6 | 1515/ 5600 batches | train loss 0.3772608 +| epoch 6 | 1519/ 5600 batches | train loss 0.3346276 +| epoch 6 | 1523/ 5600 batches | train loss 0.3942744 +| epoch 6 | 1527/ 5600 batches | train loss 0.3827056 +| epoch 6 | 1531/ 5600 batches | train loss 0.3732437 +| epoch 6 | 1535/ 5600 batches | train loss 0.4184679 +| epoch 6 | 1539/ 5600 batches | train loss 0.3602795 +| epoch 6 | 1543/ 5600 batches | train loss 0.3750393 +| epoch 6 | 1547/ 5600 batches | train loss 0.2717814 +| epoch 6 | 1551/ 5600 batches | train loss 0.3777708 +| epoch 6 | 1555/ 5600 batches | train loss 0.3779503 +| epoch 6 | 1559/ 5600 batches | train loss 0.3830808 +| epoch 6 | 1563/ 5600 batches | train loss 0.3829536 +| epoch 6 | 1567/ 5600 batches | train loss 0.4134593 +| epoch 6 | 1571/ 5600 batches | train loss 0.2866248 +| epoch 6 | 1575/ 5600 batches | train loss 0.4479280 +| epoch 6 | 1579/ 5600 batches | train loss 0.4037207 +| epoch 6 | 1583/ 5600 batches | train loss 0.3807793 +| epoch 6 | 1587/ 5600 batches | train loss 0.3244069 +| epoch 6 | 1591/ 5600 batches | train loss 0.4230616 +| epoch 6 | 1595/ 5600 batches | train loss 0.3774981 +| epoch 6 | 1599/ 5600 batches | train loss 0.3956177 +| epoch 6 | 1603/ 5600 batches | train loss 0.3068516 +| epoch 6 | 1607/ 5600 batches | train loss 0.4056920 +| epoch 6 | 1611/ 5600 batches | train loss 0.3577086 +| epoch 6 | 1615/ 5600 batches | train loss 0.3653726 +| epoch 6 | 1619/ 5600 batches | train loss 0.3395414 +| epoch 6 | 1623/ 5600 batches | train loss 0.5057271 +| epoch 6 | 1627/ 5600 batches | train loss 0.3910666 +| epoch 6 | 1631/ 5600 batches | train loss 0.3956643 +| epoch 6 | 1635/ 5600 batches | train loss 0.3619909 +| epoch 6 | 1639/ 5600 batches | train loss 0.3960209 +| epoch 6 | 1643/ 5600 batches | train loss 0.3285136 +| epoch 6 | 1647/ 5600 batches | train loss 0.4669570 +| epoch 6 | 1651/ 5600 batches | train loss 0.4125247 +| epoch 6 | 1655/ 5600 batches | train loss 0.3755808 +| epoch 6 | 1659/ 5600 batches | train loss 0.3952712 +| epoch 6 | 1663/ 5600 batches | train loss 0.3660952 +| epoch 6 | 1667/ 5600 batches | train loss 0.3619744 +| epoch 6 | 1671/ 5600 batches | train loss 0.3615545 +| epoch 6 | 1675/ 5600 batches | train loss 0.2647253 +| epoch 6 | 1679/ 5600 batches | train loss 0.4749057 +| epoch 6 | 1683/ 5600 batches | train loss 0.3864293 +| epoch 6 | 1687/ 5600 batches | train loss 0.4416680 +| epoch 6 | 1691/ 5600 batches | train loss 0.4109437 +| epoch 6 | 1695/ 5600 batches | train loss 0.3706599 +| epoch 6 | 1699/ 5600 batches | train loss 0.3695927 +| epoch 6 | 1703/ 5600 batches | train loss 0.2714524 +| epoch 6 | 1707/ 5600 batches | train loss 0.3742360 +| epoch 6 | 1711/ 5600 batches | train loss 0.3883736 +| epoch 6 | 1715/ 5600 batches | train loss 0.3113286 +| epoch 6 | 1719/ 5600 batches | train loss 0.3971580 +| epoch 6 | 1723/ 5600 batches | train loss 0.3557328 +| epoch 6 | 1727/ 5600 batches | train loss 0.3844728 +| epoch 6 | 1731/ 5600 batches | train loss 0.3812397 +| epoch 6 | 1735/ 5600 batches | train loss 0.4637578 +| epoch 6 | 1739/ 5600 batches | train loss 0.3596951 +| epoch 6 | 1743/ 5600 batches | train loss 0.3830588 +| epoch 6 | 1747/ 5600 batches | train loss 0.3919730 +| epoch 6 | 1751/ 5600 batches | train loss 0.3496964 +| epoch 6 | 1755/ 5600 batches | train loss 0.3863337 +| epoch 6 | 1759/ 5600 batches | train loss 0.3404906 +| epoch 6 | 1763/ 5600 batches | train loss 0.4437232 +| epoch 6 | 1767/ 5600 batches | train loss 0.4160726 +| epoch 6 | 1771/ 5600 batches | train loss 0.4015303 +| epoch 6 | 1775/ 5600 batches | train loss 0.3782914 +| epoch 6 | 1779/ 5600 batches | train loss 0.3335993 +| epoch 6 | 1783/ 5600 batches | train loss 0.4470053 +| epoch 6 | 1787/ 5600 batches | train loss 0.4159166 +| epoch 6 | 1791/ 5600 batches | train loss 0.4091040 +| epoch 6 | 1795/ 5600 batches | train loss 0.4237423 +| epoch 6 | 1799/ 5600 batches | train loss 0.3766921 +| epoch 6 | 1803/ 5600 batches | train loss 0.2328910 +| epoch 6 | 1807/ 5600 batches | train loss 0.4570592 +| epoch 6 | 1811/ 5600 batches | train loss 0.3572305 +| epoch 6 | 1815/ 5600 batches | train loss 0.3441454 +| epoch 6 | 1819/ 5600 batches | train loss 0.3790576 +| epoch 6 | 1823/ 5600 batches | train loss 0.3640878 +| epoch 6 | 1827/ 5600 batches | train loss 0.3598055 +| epoch 6 | 1831/ 5600 batches | train loss 0.4712842 +| epoch 6 | 1835/ 5600 batches | train loss 0.4321035 +| epoch 6 | 1839/ 5600 batches | train loss 0.3727055 +| epoch 6 | 1843/ 5600 batches | train loss 0.3497470 +| epoch 6 | 1847/ 5600 batches | train loss 0.4617800 +| epoch 6 | 1851/ 5600 batches | train loss 0.4769056 +| epoch 6 | 1855/ 5600 batches | train loss 0.3829644 +| epoch 6 | 1859/ 5600 batches | train loss 0.3557362 +| epoch 6 | 1863/ 5600 batches | train loss 0.3659156 +| epoch 6 | 1867/ 5600 batches | train loss 0.4281256 +| epoch 6 | 1871/ 5600 batches | train loss 0.4112476 +| epoch 6 | 1875/ 5600 batches | train loss 0.3879294 +| epoch 6 | 1879/ 5600 batches | train loss 0.4018568 +| epoch 6 | 1883/ 5600 batches | train loss 0.3353821 +| epoch 6 | 1887/ 5600 batches | train loss 0.3897833 +| epoch 6 | 1891/ 5600 batches | train loss 0.3736308 +| epoch 6 | 1895/ 5600 batches | train loss 0.3971175 +| epoch 6 | 1899/ 5600 batches | train loss 0.3791764 +| epoch 6 | 1903/ 5600 batches | train loss 0.4040211 +| epoch 6 | 1907/ 5600 batches | train loss 0.3339220 +| epoch 6 | 1911/ 5600 batches | train loss 0.4471700 +| epoch 6 | 1915/ 5600 batches | train loss 0.3915960 +| epoch 6 | 1919/ 5600 batches | train loss 0.3839913 +| epoch 6 | 1923/ 5600 batches | train loss 0.4108185 +| epoch 6 | 1927/ 5600 batches | train loss 0.4019943 +| epoch 6 | 1931/ 5600 batches | train loss 0.4175911 +| epoch 6 | 1935/ 5600 batches | train loss 0.4137878 +| epoch 6 | 1939/ 5600 batches | train loss 0.3819169 +| epoch 6 | 1943/ 5600 batches | train loss 0.4324534 +| epoch 6 | 1947/ 5600 batches | train loss 0.3373102 +| epoch 6 | 1951/ 5600 batches | train loss 0.3283500 +| epoch 6 | 1955/ 5600 batches | train loss 0.3241991 +| epoch 6 | 1959/ 5600 batches | train loss 0.3787411 +| epoch 6 | 1963/ 5600 batches | train loss 0.4817252 +| epoch 6 | 1967/ 5600 batches | train loss 0.3853911 +| epoch 6 | 1971/ 5600 batches | train loss 0.4390245 +| epoch 6 | 1975/ 5600 batches | train loss 0.2945471 +| epoch 6 | 1979/ 5600 batches | train loss 0.3705654 +| epoch 6 | 1983/ 5600 batches | train loss 0.3521125 +| epoch 6 | 1987/ 5600 batches | train loss 0.3995128 +| epoch 6 | 1991/ 5600 batches | train loss 0.3719435 +| epoch 6 | 1995/ 5600 batches | train loss 0.3969143 +| epoch 6 | 1999/ 5600 batches | train loss 0.4298608 +| epoch 6 | 2003/ 5600 batches | train loss 0.4268900 +| epoch 6 | 2007/ 5600 batches | train loss 0.4351255 +| epoch 6 | 2011/ 5600 batches | train loss 0.4145369 +| epoch 6 | 2015/ 5600 batches | train loss 0.3472306 +| epoch 6 | 2019/ 5600 batches | train loss 0.4882370 +| epoch 6 | 2023/ 5600 batches | train loss 0.4244349 +| epoch 6 | 2027/ 5600 batches | train loss 0.3910726 +| epoch 6 | 2031/ 5600 batches | train loss 0.3504492 +| epoch 6 | 2035/ 5600 batches | train loss 0.3437391 +| epoch 6 | 2039/ 5600 batches | train loss 0.3707539 +| epoch 6 | 2043/ 5600 batches | train loss 0.4077221 +| epoch 6 | 2047/ 5600 batches | train loss 0.4348607 +| epoch 6 | 2051/ 5600 batches | train loss 0.3691572 +| epoch 6 | 2055/ 5600 batches | train loss 0.3499407 +| epoch 6 | 2059/ 5600 batches | train loss 0.3107470 +| epoch 6 | 2063/ 5600 batches | train loss 0.4007818 +| epoch 6 | 2067/ 5600 batches | train loss 0.4229204 +| epoch 6 | 2071/ 5600 batches | train loss 0.3447670 +| epoch 6 | 2075/ 5600 batches | train loss 0.3456506 +| epoch 6 | 2079/ 5600 batches | train loss 0.3990471 +| epoch 6 | 2083/ 5600 batches | train loss 0.3323064 +| epoch 6 | 2087/ 5600 batches | train loss 0.4906120 +| epoch 6 | 2091/ 5600 batches | train loss 0.4638446 +| epoch 6 | 2095/ 5600 batches | train loss 0.3634951 +| epoch 6 | 2099/ 5600 batches | train loss 0.3856550 +| epoch 6 | 2103/ 5600 batches | train loss 0.4985286 +| epoch 6 | 2107/ 5600 batches | train loss 0.3637496 +| epoch 6 | 2111/ 5600 batches | train loss 0.3451290 +| epoch 6 | 2115/ 5600 batches | train loss 0.4622186 +| epoch 6 | 2119/ 5600 batches | train loss 0.4542955 +| epoch 6 | 2123/ 5600 batches | train loss 0.4393857 +| epoch 6 | 2127/ 5600 batches | train loss 0.4149391 +| epoch 6 | 2131/ 5600 batches | train loss 0.3767154 +| epoch 6 | 2135/ 5600 batches | train loss 0.3624484 +| epoch 6 | 2139/ 5600 batches | train loss 0.3739524 +| epoch 6 | 2143/ 5600 batches | train loss 0.4648224 +| epoch 6 | 2147/ 5600 batches | train loss 0.3709633 +| epoch 6 | 2151/ 5600 batches | train loss 0.3879694 +| epoch 6 | 2155/ 5600 batches | train loss 0.3742251 +| epoch 6 | 2159/ 5600 batches | train loss 0.3316922 +| epoch 6 | 2163/ 5600 batches | train loss 0.2697736 +| epoch 6 | 2167/ 5600 batches | train loss 0.4963516 +| epoch 6 | 2171/ 5600 batches | train loss 0.3851125 +| epoch 6 | 2175/ 5600 batches | train loss 0.3562143 +| epoch 6 | 2179/ 5600 batches | train loss 0.3262368 +| epoch 6 | 2183/ 5600 batches | train loss 0.3329551 +| epoch 6 | 2187/ 5600 batches | train loss 0.3623140 +| epoch 6 | 2191/ 5600 batches | train loss 0.4059641 +| epoch 6 | 2195/ 5600 batches | train loss 0.3298006 +| epoch 6 | 2199/ 5600 batches | train loss 0.4061503 +| epoch 6 | 2203/ 5600 batches | train loss 0.3704548 +| epoch 6 | 2207/ 5600 batches | train loss 0.3775839 +| epoch 6 | 2211/ 5600 batches | train loss 0.3882235 +| epoch 6 | 2215/ 5600 batches | train loss 0.3237803 +| epoch 6 | 2219/ 5600 batches | train loss 0.3798815 +| epoch 6 | 2223/ 5600 batches | train loss 0.3619015 +| epoch 6 | 2227/ 5600 batches | train loss 0.3541780 +| epoch 6 | 2231/ 5600 batches | train loss 0.4193416 +| epoch 6 | 2235/ 5600 batches | train loss 0.4025411 +| epoch 6 | 2239/ 5600 batches | train loss 0.3909834 +| epoch 6 | 2243/ 5600 batches | train loss 0.3602908 +| epoch 6 | 2247/ 5600 batches | train loss 0.3213763 +| epoch 6 | 2251/ 5600 batches | train loss 0.4037289 +| epoch 6 | 2255/ 5600 batches | train loss 0.4635155 +| epoch 6 | 2259/ 5600 batches | train loss 0.3578416 +| epoch 6 | 2263/ 5600 batches | train loss 0.4087595 +| epoch 6 | 2267/ 5600 batches | train loss 0.3462942 +| epoch 6 | 2271/ 5600 batches | train loss 0.3506926 +| epoch 6 | 2275/ 5600 batches | train loss 0.3560208 +| epoch 6 | 2279/ 5600 batches | train loss 0.3296048 +| epoch 6 | 2283/ 5600 batches | train loss 0.4651265 +| epoch 6 | 2287/ 5600 batches | train loss 0.3338660 +| epoch 6 | 2291/ 5600 batches | train loss 0.3770758 +| epoch 6 | 2295/ 5600 batches | train loss 0.4243033 +| epoch 6 | 2299/ 5600 batches | train loss 0.3799335 +| epoch 6 | 2303/ 5600 batches | train loss 0.3499944 +| epoch 6 | 2307/ 5600 batches | train loss 0.4702511 +| epoch 6 | 2311/ 5600 batches | train loss 0.3388932 +| epoch 6 | 2315/ 5600 batches | train loss 0.3494261 +| epoch 6 | 2319/ 5600 batches | train loss 0.4196199 +| epoch 6 | 2323/ 5600 batches | train loss 0.3956807 +| epoch 6 | 2327/ 5600 batches | train loss 0.3966301 +| epoch 6 | 2331/ 5600 batches | train loss 0.4469053 +| epoch 6 | 2335/ 5600 batches | train loss 0.4010284 +| epoch 6 | 2339/ 5600 batches | train loss 0.3906516 +| epoch 6 | 2343/ 5600 batches | train loss 0.3022535 +| epoch 6 | 2347/ 5600 batches | train loss 0.3423238 +| epoch 6 | 2351/ 5600 batches | train loss 0.3657976 +| epoch 6 | 2355/ 5600 batches | train loss 0.4044679 +| epoch 6 | 2359/ 5600 batches | train loss 0.4424733 +| epoch 6 | 2363/ 5600 batches | train loss 0.3487149 +| epoch 6 | 2367/ 5600 batches | train loss 0.3715604 +| epoch 6 | 2371/ 5600 batches | train loss 0.3810912 +| epoch 6 | 2375/ 5600 batches | train loss 0.4517024 +| epoch 6 | 2379/ 5600 batches | train loss 0.4296961 +| epoch 6 | 2383/ 5600 batches | train loss 0.3581773 +| epoch 6 | 2387/ 5600 batches | train loss 0.3978848 +| epoch 6 | 2391/ 5600 batches | train loss 0.3721335 +| epoch 6 | 2395/ 5600 batches | train loss 0.4127583 +| epoch 6 | 2399/ 5600 batches | train loss 0.4084839 +| epoch 6 | 2403/ 5600 batches | train loss 0.4112647 +| epoch 6 | 2407/ 5600 batches | train loss 0.3777542 +| epoch 6 | 2411/ 5600 batches | train loss 0.3951669 +| epoch 6 | 2415/ 5600 batches | train loss 0.3570449 +| epoch 6 | 2419/ 5600 batches | train loss 0.4167174 +| epoch 6 | 2423/ 5600 batches | train loss 0.3233625 +| epoch 6 | 2427/ 5600 batches | train loss 0.3755260 +| epoch 6 | 2431/ 5600 batches | train loss 0.4510536 +| epoch 6 | 2435/ 5600 batches | train loss 0.3649136 +| epoch 6 | 2439/ 5600 batches | train loss 0.4495109 +| epoch 6 | 2443/ 5600 batches | train loss 0.3741223 +| epoch 6 | 2447/ 5600 batches | train loss 0.4104854 +| epoch 6 | 2451/ 5600 batches | train loss 0.4403087 +| epoch 6 | 2455/ 5600 batches | train loss 0.3590029 +| epoch 6 | 2459/ 5600 batches | train loss 0.4028516 +| epoch 6 | 2463/ 5600 batches | train loss 0.3328510 +| epoch 6 | 2467/ 5600 batches | train loss 0.3861865 +| epoch 6 | 2471/ 5600 batches | train loss 0.3742564 +| epoch 6 | 2475/ 5600 batches | train loss 0.3209628 +| epoch 6 | 2479/ 5600 batches | train loss 0.3578580 +| epoch 6 | 2483/ 5600 batches | train loss 0.3673344 +| epoch 6 | 2487/ 5600 batches | train loss 0.4179123 +| epoch 6 | 2491/ 5600 batches | train loss 0.4248741 +| epoch 6 | 2495/ 5600 batches | train loss 0.3700159 +| epoch 6 | 2499/ 5600 batches | train loss 0.3465958 +| epoch 6 | 2503/ 5600 batches | train loss 0.3650787 +| epoch 6 | 2507/ 5600 batches | train loss 0.3764862 +| epoch 6 | 2511/ 5600 batches | train loss 0.4181517 +| epoch 6 | 2515/ 5600 batches | train loss 0.3472230 +| epoch 6 | 2519/ 5600 batches | train loss 0.3680573 +| epoch 6 | 2523/ 5600 batches | train loss 0.3248619 +| epoch 6 | 2527/ 5600 batches | train loss 0.4091613 +| epoch 6 | 2531/ 5600 batches | train loss 0.3553311 +| epoch 6 | 2535/ 5600 batches | train loss 0.3998989 +| epoch 6 | 2539/ 5600 batches | train loss 0.3529078 +| epoch 6 | 2543/ 5600 batches | train loss 0.3859054 +| epoch 6 | 2547/ 5600 batches | train loss 0.4194156 +| epoch 6 | 2551/ 5600 batches | train loss 0.3462007 +| epoch 6 | 2555/ 5600 batches | train loss 0.3509153 +| epoch 6 | 2559/ 5600 batches | train loss 0.3944485 +| epoch 6 | 2563/ 5600 batches | train loss 0.4271670 +| epoch 6 | 2567/ 5600 batches | train loss 0.3255569 +| epoch 6 | 2571/ 5600 batches | train loss 0.3700499 +| epoch 6 | 2575/ 5600 batches | train loss 0.4141531 +| epoch 6 | 2579/ 5600 batches | train loss 0.4045900 +| epoch 6 | 2583/ 5600 batches | train loss 0.3527729 +| epoch 6 | 2587/ 5600 batches | train loss 0.3690454 +| epoch 6 | 2591/ 5600 batches | train loss 0.3248892 +| epoch 6 | 2595/ 5600 batches | train loss 0.3200892 +| epoch 6 | 2599/ 5600 batches | train loss 0.4075758 +| epoch 6 | 2603/ 5600 batches | train loss 0.4125582 +| epoch 6 | 2607/ 5600 batches | train loss 0.3509894 +| epoch 6 | 2611/ 5600 batches | train loss 0.4014338 +| epoch 6 | 2615/ 5600 batches | train loss 0.3658253 +| epoch 6 | 2619/ 5600 batches | train loss 0.3987334 +| epoch 6 | 2623/ 5600 batches | train loss 0.3816328 +| epoch 6 | 2627/ 5600 batches | train loss 0.4067183 +| epoch 6 | 2631/ 5600 batches | train loss 0.3408021 +| epoch 6 | 2635/ 5600 batches | train loss 0.3787934 +| epoch 6 | 2639/ 5600 batches | train loss 0.4044815 +| epoch 6 | 2643/ 5600 batches | train loss 0.3804654 +| epoch 6 | 2647/ 5600 batches | train loss 0.3976814 +| epoch 6 | 2651/ 5600 batches | train loss 0.3899817 +| epoch 6 | 2655/ 5600 batches | train loss 0.3806134 +| epoch 6 | 2659/ 5600 batches | train loss 0.3799089 +| epoch 6 | 2663/ 5600 batches | train loss 0.4553675 +| epoch 6 | 2667/ 5600 batches | train loss 0.3906156 +| epoch 6 | 2671/ 5600 batches | train loss 0.3433642 +| epoch 6 | 2675/ 5600 batches | train loss 0.3737532 +| epoch 6 | 2679/ 5600 batches | train loss 0.3886042 +| epoch 6 | 2683/ 5600 batches | train loss 0.3606949 +| epoch 6 | 2687/ 5600 batches | train loss 0.3835171 +| epoch 6 | 2691/ 5600 batches | train loss 0.3729008 +| epoch 6 | 2695/ 5600 batches | train loss 0.3843284 +| epoch 6 | 2699/ 5600 batches | train loss 0.4332281 +| epoch 6 | 2703/ 5600 batches | train loss 0.4248981 +| epoch 6 | 2707/ 5600 batches | train loss 0.3495623 +| epoch 6 | 2711/ 5600 batches | train loss 0.3649971 +| epoch 6 | 2715/ 5600 batches | train loss 0.3825066 +| epoch 6 | 2719/ 5600 batches | train loss 0.3366583 +| epoch 6 | 2723/ 5600 batches | train loss 0.3957779 +| epoch 6 | 2727/ 5600 batches | train loss 0.3669032 +| epoch 6 | 2731/ 5600 batches | train loss 0.4496066 +| epoch 6 | 2735/ 5600 batches | train loss 0.3273359 +| epoch 6 | 2739/ 5600 batches | train loss 0.4609107 +| epoch 6 | 2743/ 5600 batches | train loss 0.3479718 +| epoch 6 | 2747/ 5600 batches | train loss 0.3847364 +| epoch 6 | 2751/ 5600 batches | train loss 0.2794140 +| epoch 6 | 2755/ 5600 batches | train loss 0.4054590 +| epoch 6 | 2759/ 5600 batches | train loss 0.3442180 +| epoch 6 | 2763/ 5600 batches | train loss 0.3637273 +| epoch 6 | 2767/ 5600 batches | train loss 0.4001241 +| epoch 6 | 2771/ 5600 batches | train loss 0.4303739 +| epoch 6 | 2775/ 5600 batches | train loss 0.3909297 +| epoch 6 | 2779/ 5600 batches | train loss 0.3735114 +| epoch 6 | 2783/ 5600 batches | train loss 0.4226731 +| epoch 6 | 2787/ 5600 batches | train loss 0.4248931 +| epoch 6 | 2791/ 5600 batches | train loss 0.4287811 +| epoch 6 | 2795/ 5600 batches | train loss 0.3673404 +| epoch 6 | 2799/ 5600 batches | train loss 0.4225342 +| epoch 6 | 2803/ 5600 batches | train loss 0.3298754 +| epoch 6 | 2807/ 5600 batches | train loss 0.3281871 +| epoch 6 | 2811/ 5600 batches | train loss 0.4327646 +| epoch 6 | 2815/ 5600 batches | train loss 0.3936199 +| epoch 6 | 2819/ 5600 batches | train loss 0.4025458 +| epoch 6 | 2823/ 5600 batches | train loss 0.3276729 +| epoch 6 | 2827/ 5600 batches | train loss 0.3759376 +| epoch 6 | 2831/ 5600 batches | train loss 0.3488327 +| epoch 6 | 2835/ 5600 batches | train loss 0.3165125 +| epoch 6 | 2839/ 5600 batches | train loss 0.4270481 +| epoch 6 | 2843/ 5600 batches | train loss 0.3283986 +| epoch 6 | 2847/ 5600 batches | train loss 0.3542063 +| epoch 6 | 2851/ 5600 batches | train loss 0.3349155 +| epoch 6 | 2855/ 5600 batches | train loss 0.3782128 +| epoch 6 | 2859/ 5600 batches | train loss 0.3961869 +| epoch 6 | 2863/ 5600 batches | train loss 0.3846183 +| epoch 6 | 2867/ 5600 batches | train loss 0.3443821 +| epoch 6 | 2871/ 5600 batches | train loss 0.3487223 +| epoch 6 | 2875/ 5600 batches | train loss 0.3317609 +| epoch 6 | 2879/ 5600 batches | train loss 0.3445125 +| epoch 6 | 2883/ 5600 batches | train loss 0.4218395 +| epoch 6 | 2887/ 5600 batches | train loss 0.3868184 +| epoch 6 | 2891/ 5600 batches | train loss 0.3813094 +| epoch 6 | 2895/ 5600 batches | train loss 0.4575971 +| epoch 6 | 2899/ 5600 batches | train loss 0.3853854 +| epoch 6 | 2903/ 5600 batches | train loss 0.2481171 +| epoch 6 | 2907/ 5600 batches | train loss 0.4048234 +| epoch 6 | 2911/ 5600 batches | train loss 0.3957756 +| epoch 6 | 2915/ 5600 batches | train loss 0.3895610 +| epoch 6 | 2919/ 5600 batches | train loss 0.4012699 +| epoch 6 | 2923/ 5600 batches | train loss 0.4024870 +| epoch 6 | 2927/ 5600 batches | train loss 0.3918457 +| epoch 6 | 2931/ 5600 batches | train loss 0.4423247 +| epoch 6 | 2935/ 5600 batches | train loss 0.3552238 +| epoch 6 | 2939/ 5600 batches | train loss 0.4614619 +| epoch 6 | 2943/ 5600 batches | train loss 0.4003253 +| epoch 6 | 2947/ 5600 batches | train loss 0.4581125 +| epoch 6 | 2951/ 5600 batches | train loss 0.3963027 +| epoch 6 | 2955/ 5600 batches | train loss 0.4157732 +| epoch 6 | 2959/ 5600 batches | train loss 0.3686948 +| epoch 6 | 2963/ 5600 batches | train loss 0.4393210 +| epoch 6 | 2967/ 5600 batches | train loss 0.3998064 +| epoch 6 | 2971/ 5600 batches | train loss 0.4360218 +| epoch 6 | 2975/ 5600 batches | train loss 0.4328663 +| epoch 6 | 2979/ 5600 batches | train loss 0.4041378 +| epoch 6 | 2983/ 5600 batches | train loss 0.4559683 +| epoch 6 | 2987/ 5600 batches | train loss 0.4695827 +| epoch 6 | 2991/ 5600 batches | train loss 0.5717826 +| epoch 6 | 2995/ 5600 batches | train loss 0.4652863 +| epoch 6 | 2999/ 5600 batches | train loss 0.3863440 +| epoch 6 | 3003/ 5600 batches | train loss 0.3596556 +| epoch 6 | 3007/ 5600 batches | train loss 0.3276235 +| epoch 6 | 3011/ 5600 batches | train loss 0.3545840 +| epoch 6 | 3015/ 5600 batches | train loss 0.4087029 +| epoch 6 | 3019/ 5600 batches | train loss 0.3709369 +| epoch 6 | 3023/ 5600 batches | train loss 0.3804147 +| epoch 6 | 3027/ 5600 batches | train loss 0.3871572 +| epoch 6 | 3031/ 5600 batches | train loss 0.3454890 +| epoch 6 | 3035/ 5600 batches | train loss 0.3794114 +| epoch 6 | 3039/ 5600 batches | train loss 0.3903146 +| epoch 6 | 3043/ 5600 batches | train loss 0.3522211 +| epoch 6 | 3047/ 5600 batches | train loss 0.3764337 +| epoch 6 | 3051/ 5600 batches | train loss 0.3848980 +| epoch 6 | 3055/ 5600 batches | train loss 0.3785529 +| epoch 6 | 3059/ 5600 batches | train loss 0.3683945 +| epoch 6 | 3063/ 5600 batches | train loss 0.3289411 +| epoch 6 | 3067/ 5600 batches | train loss 0.4291548 +| epoch 6 | 3071/ 5600 batches | train loss 0.3662133 +| epoch 6 | 3075/ 5600 batches | train loss 0.3591113 +| epoch 6 | 3079/ 5600 batches | train loss 0.4411954 +| epoch 6 | 3083/ 5600 batches | train loss 0.3420126 +| epoch 6 | 3087/ 5600 batches | train loss 0.3688117 +| epoch 6 | 3091/ 5600 batches | train loss 0.3468133 +| epoch 6 | 3095/ 5600 batches | train loss 0.3614171 +| epoch 6 | 3099/ 5600 batches | train loss 0.4936479 +| epoch 6 | 3103/ 5600 batches | train loss 0.3415499 +| epoch 6 | 3107/ 5600 batches | train loss 0.3646118 +| epoch 6 | 3111/ 5600 batches | train loss 0.4314435 +| epoch 6 | 3115/ 5600 batches | train loss 0.3941127 +| epoch 6 | 3119/ 5600 batches | train loss 0.3463428 +| epoch 6 | 3123/ 5600 batches | train loss 0.4457977 +| epoch 6 | 3127/ 5600 batches | train loss 0.4285369 +| epoch 6 | 3131/ 5600 batches | train loss 0.3801717 +| epoch 6 | 3135/ 5600 batches | train loss 0.4425213 +| epoch 6 | 3139/ 5600 batches | train loss 0.3673213 +| epoch 6 | 3143/ 5600 batches | train loss 0.3444255 +| epoch 6 | 3147/ 5600 batches | train loss 0.3922343 +| epoch 6 | 3151/ 5600 batches | train loss 0.3436334 +| epoch 6 | 3155/ 5600 batches | train loss 0.4155101 +| epoch 6 | 3159/ 5600 batches | train loss 0.4663597 +| epoch 6 | 3163/ 5600 batches | train loss 0.4397556 +| epoch 6 | 3167/ 5600 batches | train loss 0.4013898 +| epoch 6 | 3171/ 5600 batches | train loss 0.3642040 +| epoch 6 | 3175/ 5600 batches | train loss 0.3083259 +| epoch 6 | 3179/ 5600 batches | train loss 0.4071921 +| epoch 6 | 3183/ 5600 batches | train loss 0.4337640 +| epoch 6 | 3187/ 5600 batches | train loss 0.3539367 +| epoch 6 | 3191/ 5600 batches | train loss 0.4590494 +| epoch 6 | 3195/ 5600 batches | train loss 0.4017346 +| epoch 6 | 3199/ 5600 batches | train loss 0.3093974 +| epoch 6 | 3203/ 5600 batches | train loss 0.4393912 +| epoch 6 | 3207/ 5600 batches | train loss 0.4927078 +| epoch 6 | 3211/ 5600 batches | train loss 0.4436426 +| epoch 6 | 3215/ 5600 batches | train loss 0.3294515 +| epoch 6 | 3219/ 5600 batches | train loss 0.4171813 +| epoch 6 | 3223/ 5600 batches | train loss 0.4106847 +| epoch 6 | 3227/ 5600 batches | train loss 0.3780299 +| epoch 6 | 3231/ 5600 batches | train loss 0.4173594 +| epoch 6 | 3235/ 5600 batches | train loss 0.3320012 +| epoch 6 | 3239/ 5600 batches | train loss 0.3667626 +| epoch 6 | 3243/ 5600 batches | train loss 0.4162662 +| epoch 6 | 3247/ 5600 batches | train loss 0.4092216 +| epoch 6 | 3251/ 5600 batches | train loss 0.3959639 +| epoch 6 | 3255/ 5600 batches | train loss 0.3967237 +| epoch 6 | 3259/ 5600 batches | train loss 0.3810193 +| epoch 6 | 3263/ 5600 batches | train loss 0.4307969 +| epoch 6 | 3267/ 5600 batches | train loss 0.3911186 +| epoch 6 | 3271/ 5600 batches | train loss 0.4016792 +| epoch 6 | 3275/ 5600 batches | train loss 0.3808204 +| epoch 6 | 3279/ 5600 batches | train loss 0.3213734 +| epoch 6 | 3283/ 5600 batches | train loss 0.3885639 +| epoch 6 | 3287/ 5600 batches | train loss 0.3465927 +| epoch 6 | 3291/ 5600 batches | train loss 0.3880258 +| epoch 6 | 3295/ 5600 batches | train loss 0.3983099 +| epoch 6 | 3299/ 5600 batches | train loss 0.4036579 +| epoch 6 | 3303/ 5600 batches | train loss 0.4509494 +| epoch 6 | 3307/ 5600 batches | train loss 0.3695861 +| epoch 6 | 3311/ 5600 batches | train loss 0.3829549 +| epoch 6 | 3315/ 5600 batches | train loss 0.4058403 +| epoch 6 | 3319/ 5600 batches | train loss 0.3434327 +| epoch 6 | 3323/ 5600 batches | train loss 0.4168024 +| epoch 6 | 3327/ 5600 batches | train loss 0.3874881 +| epoch 6 | 3331/ 5600 batches | train loss 0.3499065 +| epoch 6 | 3335/ 5600 batches | train loss 0.4045995 +| epoch 6 | 3339/ 5600 batches | train loss 0.3623829 +| epoch 6 | 3343/ 5600 batches | train loss 0.4704268 +| epoch 6 | 3347/ 5600 batches | train loss 0.4436743 +| epoch 6 | 3351/ 5600 batches | train loss 0.3539946 +| epoch 6 | 3355/ 5600 batches | train loss 0.3798212 +| epoch 6 | 3359/ 5600 batches | train loss 0.4094017 +| epoch 6 | 3363/ 5600 batches | train loss 0.1786311 +| epoch 6 | 3367/ 5600 batches | train loss 0.3511190 +| epoch 6 | 3371/ 5600 batches | train loss 0.4424053 +| epoch 6 | 3375/ 5600 batches | train loss 0.3610557 +| epoch 6 | 3379/ 5600 batches | train loss 0.4509474 +| epoch 6 | 3383/ 5600 batches | train loss 0.3452448 +| epoch 6 | 3387/ 5600 batches | train loss 0.3586329 +| epoch 6 | 3391/ 5600 batches | train loss 0.4138047 +| epoch 6 | 3395/ 5600 batches | train loss 0.4587036 +| epoch 6 | 3399/ 5600 batches | train loss 0.4075101 +| epoch 6 | 3403/ 5600 batches | train loss 0.3824095 +| epoch 6 | 3407/ 5600 batches | train loss 0.3839220 +| epoch 6 | 3411/ 5600 batches | train loss 0.3998443 +| epoch 6 | 3415/ 5600 batches | train loss 0.3063335 +| epoch 6 | 3419/ 5600 batches | train loss 0.3544044 +| epoch 6 | 3423/ 5600 batches | train loss 0.3970526 +| epoch 6 | 3427/ 5600 batches | train loss 0.3942555 +| epoch 6 | 3431/ 5600 batches | train loss 0.3689852 +| epoch 6 | 3435/ 5600 batches | train loss 0.4182491 +| epoch 6 | 3439/ 5600 batches | train loss 0.3673226 +| epoch 6 | 3443/ 5600 batches | train loss 0.3540029 +| epoch 6 | 3447/ 5600 batches | train loss 0.3918916 +| epoch 6 | 3451/ 5600 batches | train loss 0.3535313 +| epoch 6 | 3455/ 5600 batches | train loss 0.4047277 +| epoch 6 | 3459/ 5600 batches | train loss 0.4641986 +| epoch 6 | 3463/ 5600 batches | train loss 0.4223397 +| epoch 6 | 3467/ 5600 batches | train loss 0.3814699 +| epoch 6 | 3471/ 5600 batches | train loss 0.3582460 +| epoch 6 | 3475/ 5600 batches | train loss 0.3749708 +| epoch 6 | 3479/ 5600 batches | train loss 0.3651488 +| epoch 6 | 3483/ 5600 batches | train loss 0.4061717 +| epoch 6 | 3487/ 5600 batches | train loss 0.4047391 +| epoch 6 | 3491/ 5600 batches | train loss 0.3466284 +| epoch 6 | 3495/ 5600 batches | train loss 0.3236502 +| epoch 6 | 3499/ 5600 batches | train loss 0.4080828 +| epoch 6 | 3503/ 5600 batches | train loss 0.4210251 +| epoch 6 | 3507/ 5600 batches | train loss 0.4056979 +| epoch 6 | 3511/ 5600 batches | train loss 0.4311496 +| epoch 6 | 3515/ 5600 batches | train loss 0.3425919 +| epoch 6 | 3519/ 5600 batches | train loss 0.4246244 +| epoch 6 | 3523/ 5600 batches | train loss 0.3305463 +| epoch 6 | 3527/ 5600 batches | train loss 0.3894323 +| epoch 6 | 3531/ 5600 batches | train loss 0.3244922 +| epoch 6 | 3535/ 5600 batches | train loss 0.3630915 +| epoch 6 | 3539/ 5600 batches | train loss 0.3610634 +| epoch 6 | 3543/ 5600 batches | train loss 0.3490862 +| epoch 6 | 3547/ 5600 batches | train loss 0.3549169 +| epoch 6 | 3551/ 5600 batches | train loss 0.2728106 +| epoch 6 | 3555/ 5600 batches | train loss 0.3928564 +| epoch 6 | 3559/ 5600 batches | train loss 0.4152620 +| epoch 6 | 3563/ 5600 batches | train loss 0.4894206 +| epoch 6 | 3567/ 5600 batches | train loss 0.3462759 +| epoch 6 | 3571/ 5600 batches | train loss 0.3886257 +| epoch 6 | 3575/ 5600 batches | train loss 0.3241536 +| epoch 6 | 3579/ 5600 batches | train loss 0.4237803 +| epoch 6 | 3583/ 5600 batches | train loss 0.4405451 +| epoch 6 | 3587/ 5600 batches | train loss 0.4503090 +| epoch 6 | 3591/ 5600 batches | train loss 0.4659988 +| epoch 6 | 3595/ 5600 batches | train loss 0.2883987 +| epoch 6 | 3599/ 5600 batches | train loss 0.3657436 +| epoch 6 | 3603/ 5600 batches | train loss 0.4395716 +| epoch 6 | 3607/ 5600 batches | train loss 0.3756020 +| epoch 6 | 3611/ 5600 batches | train loss 0.4107354 +| epoch 6 | 3615/ 5600 batches | train loss 0.3620929 +| epoch 6 | 3619/ 5600 batches | train loss 0.2606227 +| epoch 6 | 3623/ 5600 batches | train loss 0.4965605 +| epoch 6 | 3627/ 5600 batches | train loss 0.3226745 +| epoch 6 | 3631/ 5600 batches | train loss 0.4291996 +| epoch 6 | 3635/ 5600 batches | train loss 0.3907433 +| epoch 6 | 3639/ 5600 batches | train loss 0.3035423 +| epoch 6 | 3643/ 5600 batches | train loss 0.4196887 +| epoch 6 | 3647/ 5600 batches | train loss 0.3867598 +| epoch 6 | 3651/ 5600 batches | train loss 0.3632960 +| epoch 6 | 3655/ 5600 batches | train loss 0.3889858 +| epoch 6 | 3659/ 5600 batches | train loss 0.3414380 +| epoch 6 | 3663/ 5600 batches | train loss 0.3175684 +| epoch 6 | 3667/ 5600 batches | train loss 0.2526065 +| epoch 6 | 3671/ 5600 batches | train loss 0.3700950 +| epoch 6 | 3675/ 5600 batches | train loss 0.3732004 +| epoch 6 | 3679/ 5600 batches | train loss 0.4381504 +| epoch 6 | 3683/ 5600 batches | train loss 0.3562047 +| epoch 6 | 3687/ 5600 batches | train loss 0.3449490 +| epoch 6 | 3691/ 5600 batches | train loss 0.3965841 +| epoch 6 | 3695/ 5600 batches | train loss 0.3634435 +| epoch 6 | 3699/ 5600 batches | train loss 0.4207131 +| epoch 6 | 3703/ 5600 batches | train loss 0.3625991 +| epoch 6 | 3707/ 5600 batches | train loss 0.3977010 +| epoch 6 | 3711/ 5600 batches | train loss 0.4389139 +| epoch 6 | 3715/ 5600 batches | train loss 0.4105289 +| epoch 6 | 3719/ 5600 batches | train loss 0.3821503 +| epoch 6 | 3723/ 5600 batches | train loss 0.3873565 +| epoch 6 | 3727/ 5600 batches | train loss 0.3658602 +| epoch 6 | 3731/ 5600 batches | train loss 0.4061052 +| epoch 6 | 3735/ 5600 batches | train loss 0.4714027 +| epoch 6 | 3739/ 5600 batches | train loss 0.3692966 +| epoch 6 | 3743/ 5600 batches | train loss 0.3616753 +| epoch 6 | 3747/ 5600 batches | train loss 0.4075038 +| epoch 6 | 3751/ 5600 batches | train loss 0.3734357 +| epoch 6 | 3755/ 5600 batches | train loss 0.3448306 +| epoch 6 | 3759/ 5600 batches | train loss 0.3926441 +| epoch 6 | 3763/ 5600 batches | train loss 0.1629146 +| epoch 6 | 3767/ 5600 batches | train loss 0.3437028 +| epoch 6 | 3771/ 5600 batches | train loss 0.4403107 +| epoch 6 | 3775/ 5600 batches | train loss 0.3441612 +| epoch 6 | 3779/ 5600 batches | train loss 0.4298399 +| epoch 6 | 3783/ 5600 batches | train loss 0.4420428 +| epoch 6 | 3787/ 5600 batches | train loss 0.4587845 +| epoch 6 | 3791/ 5600 batches | train loss 0.3363393 +| epoch 6 | 3795/ 5600 batches | train loss 0.3150060 +| epoch 6 | 3799/ 5600 batches | train loss 0.3648370 +| epoch 6 | 3803/ 5600 batches | train loss 0.3342663 +| epoch 6 | 3807/ 5600 batches | train loss 0.3916843 +| epoch 6 | 3811/ 5600 batches | train loss 0.4307706 +| epoch 6 | 3815/ 5600 batches | train loss 0.4430417 +| epoch 6 | 3819/ 5600 batches | train loss 0.3497540 +| epoch 6 | 3823/ 5600 batches | train loss 0.3884680 +| epoch 6 | 3827/ 5600 batches | train loss 0.4028049 +| epoch 6 | 3831/ 5600 batches | train loss 0.3611620 +| epoch 6 | 3835/ 5600 batches | train loss 0.3554847 +| epoch 6 | 3839/ 5600 batches | train loss 0.3604911 +| epoch 6 | 3843/ 5600 batches | train loss 0.4052170 +| epoch 6 | 3847/ 5600 batches | train loss 0.3541604 +| epoch 6 | 3851/ 5600 batches | train loss 0.2682524 +| epoch 6 | 3855/ 5600 batches | train loss 0.3675485 +| epoch 6 | 3859/ 5600 batches | train loss 0.4498077 +| epoch 6 | 3863/ 5600 batches | train loss 0.4811049 +| epoch 6 | 3867/ 5600 batches | train loss 0.4815711 +| epoch 6 | 3871/ 5600 batches | train loss 0.3242750 +| epoch 6 | 3875/ 5600 batches | train loss 0.4322422 +| epoch 6 | 3879/ 5600 batches | train loss 0.3892058 +| epoch 6 | 3883/ 5600 batches | train loss 0.4199039 +| epoch 6 | 3887/ 5600 batches | train loss 0.4186867 +| epoch 6 | 3891/ 5600 batches | train loss 0.3456946 +| epoch 6 | 3895/ 5600 batches | train loss 0.3434306 +| epoch 6 | 3899/ 5600 batches | train loss 0.3797760 +| epoch 6 | 3903/ 5600 batches | train loss 0.4739512 +| epoch 6 | 3907/ 5600 batches | train loss 0.4012861 +| epoch 6 | 3911/ 5600 batches | train loss 0.3769537 +| epoch 6 | 3915/ 5600 batches | train loss 0.4088356 +| epoch 6 | 3919/ 5600 batches | train loss 0.4378719 +| epoch 6 | 3923/ 5600 batches | train loss 0.3138204 +| epoch 6 | 3927/ 5600 batches | train loss 0.3572871 +| epoch 6 | 3931/ 5600 batches | train loss 0.4168061 +| epoch 6 | 3935/ 5600 batches | train loss 0.4476162 +| epoch 6 | 3939/ 5600 batches | train loss 0.3576057 +| epoch 6 | 3943/ 5600 batches | train loss 0.4695879 +| epoch 6 | 3947/ 5600 batches | train loss 0.3589495 +| epoch 6 | 3951/ 5600 batches | train loss 0.3684458 +| epoch 6 | 3955/ 5600 batches | train loss 0.4277328 +| epoch 6 | 3959/ 5600 batches | train loss 0.4139260 +| epoch 6 | 3963/ 5600 batches | train loss 0.3451324 +| epoch 6 | 3967/ 5600 batches | train loss 0.4776049 +| epoch 6 | 3971/ 5600 batches | train loss 0.3280041 +| epoch 6 | 3975/ 5600 batches | train loss 0.4371988 +| epoch 6 | 3979/ 5600 batches | train loss 0.2615159 +| epoch 6 | 3983/ 5600 batches | train loss 0.4283553 +| epoch 6 | 3987/ 5600 batches | train loss 0.4229078 +| epoch 6 | 3991/ 5600 batches | train loss 0.4056076 +| epoch 6 | 3995/ 5600 batches | train loss 0.3583746 +| epoch 6 | 3999/ 5600 batches | train loss 0.2934188 +| epoch 6 | 4003/ 5600 batches | train loss 0.4067859 +| epoch 6 | 4007/ 5600 batches | train loss 0.3794967 +| epoch 6 | 4011/ 5600 batches | train loss 0.4061914 +| epoch 6 | 4015/ 5600 batches | train loss 0.3989335 +| epoch 6 | 4019/ 5600 batches | train loss 0.3629868 +| epoch 6 | 4023/ 5600 batches | train loss 0.3910154 +| epoch 6 | 4027/ 5600 batches | train loss 0.3777607 +| epoch 6 | 4031/ 5600 batches | train loss 0.4337906 +| epoch 6 | 4035/ 5600 batches | train loss 0.4715412 +| epoch 6 | 4039/ 5600 batches | train loss 0.4555861 +| epoch 6 | 4043/ 5600 batches | train loss 0.3761802 +| epoch 6 | 4047/ 5600 batches | train loss 0.4167395 +| epoch 6 | 4051/ 5600 batches | train loss 0.4111976 +| epoch 6 | 4055/ 5600 batches | train loss 0.4002318 +| epoch 6 | 4059/ 5600 batches | train loss 0.3861968 +| epoch 6 | 4063/ 5600 batches | train loss 0.3944116 +| epoch 6 | 4067/ 5600 batches | train loss 0.3873861 +| epoch 6 | 4071/ 5600 batches | train loss 0.4642729 +| epoch 6 | 4075/ 5600 batches | train loss 0.3509178 +| epoch 6 | 4079/ 5600 batches | train loss 0.4369466 +| epoch 6 | 4083/ 5600 batches | train loss 0.3706335 +| epoch 6 | 4087/ 5600 batches | train loss 0.3674862 +| epoch 6 | 4091/ 5600 batches | train loss 0.3910318 +| epoch 6 | 4095/ 5600 batches | train loss 0.4632994 +| epoch 6 | 4099/ 5600 batches | train loss 0.4424514 +| epoch 6 | 4103/ 5600 batches | train loss 0.3470619 +| epoch 6 | 4107/ 5600 batches | train loss 0.3354409 +| epoch 6 | 4111/ 5600 batches | train loss 0.3786862 +| epoch 6 | 4115/ 5600 batches | train loss 0.3249421 +| epoch 6 | 4119/ 5600 batches | train loss 0.4104844 +| epoch 6 | 4123/ 5600 batches | train loss 0.3619310 +| epoch 6 | 4127/ 5600 batches | train loss 0.3435011 +| epoch 6 | 4131/ 5600 batches | train loss 0.4251045 +| epoch 6 | 4135/ 5600 batches | train loss 0.3133131 +| epoch 6 | 4139/ 5600 batches | train loss 0.3818258 +| epoch 6 | 4143/ 5600 batches | train loss 0.3757447 +| epoch 6 | 4147/ 5600 batches | train loss 0.4707288 +| epoch 6 | 4151/ 5600 batches | train loss 0.3633183 +| epoch 6 | 4155/ 5600 batches | train loss 0.3283329 +| epoch 6 | 4159/ 5600 batches | train loss 0.4263942 +| epoch 6 | 4163/ 5600 batches | train loss 0.3808244 +| epoch 6 | 4167/ 5600 batches | train loss 0.3547177 +| epoch 6 | 4171/ 5600 batches | train loss 0.3314912 +| epoch 6 | 4175/ 5600 batches | train loss 0.4230242 +| epoch 6 | 4179/ 5600 batches | train loss 0.4183179 +| epoch 6 | 4183/ 5600 batches | train loss 0.3648415 +| epoch 6 | 4187/ 5600 batches | train loss 0.3631618 +| epoch 6 | 4191/ 5600 batches | train loss 0.3707300 +| epoch 6 | 4195/ 5600 batches | train loss 0.3925188 +| epoch 6 | 4199/ 5600 batches | train loss 0.3790566 +| epoch 6 | 4203/ 5600 batches | train loss 0.3895926 +| epoch 6 | 4207/ 5600 batches | train loss 0.3785151 +| epoch 6 | 4211/ 5600 batches | train loss 0.3577356 +| epoch 6 | 4215/ 5600 batches | train loss 0.4146350 +| epoch 6 | 4219/ 5600 batches | train loss 0.4711449 +| epoch 6 | 4223/ 5600 batches | train loss 0.4463338 +| epoch 6 | 4227/ 5600 batches | train loss 0.4079145 +| epoch 6 | 4231/ 5600 batches | train loss 0.3203906 +| epoch 6 | 4235/ 5600 batches | train loss 0.3435737 +| epoch 6 | 4239/ 5600 batches | train loss 0.3813654 +| epoch 6 | 4243/ 5600 batches | train loss 0.3595105 +| epoch 6 | 4247/ 5600 batches | train loss 0.3948522 +| epoch 6 | 4251/ 5600 batches | train loss 0.4576764 +| epoch 6 | 4255/ 5600 batches | train loss 0.3457398 +| epoch 6 | 4259/ 5600 batches | train loss 0.3110205 +| epoch 6 | 4263/ 5600 batches | train loss 0.3562723 +| epoch 6 | 4267/ 5600 batches | train loss 0.3620979 +| epoch 6 | 4271/ 5600 batches | train loss 0.3627553 +| epoch 6 | 4275/ 5600 batches | train loss 0.3123973 +| epoch 6 | 4279/ 5600 batches | train loss 0.3605493 +| epoch 6 | 4283/ 5600 batches | train loss 0.2702991 +| epoch 6 | 4287/ 5600 batches | train loss 0.4054016 +| epoch 6 | 4291/ 5600 batches | train loss 0.4088599 +| epoch 6 | 4295/ 5600 batches | train loss 0.3572065 +| epoch 6 | 4299/ 5600 batches | train loss 0.3430614 +| epoch 6 | 4303/ 5600 batches | train loss 0.3645428 +| epoch 6 | 4307/ 5600 batches | train loss 0.4531926 +| epoch 6 | 4311/ 5600 batches | train loss 0.4166545 +| epoch 6 | 4315/ 5600 batches | train loss 0.4535524 +| epoch 6 | 4319/ 5600 batches | train loss 0.4071257 +| epoch 6 | 4323/ 5600 batches | train loss 0.4155126 +| epoch 6 | 4327/ 5600 batches | train loss 0.2365575 +| epoch 6 | 4331/ 5600 batches | train loss 0.3529501 +| epoch 6 | 4335/ 5600 batches | train loss 0.3753062 +| epoch 6 | 4339/ 5600 batches | train loss 0.4335318 +| epoch 6 | 4343/ 5600 batches | train loss 0.3222667 +| epoch 6 | 4347/ 5600 batches | train loss 0.3978062 +| epoch 6 | 4351/ 5600 batches | train loss 0.3707820 +| epoch 6 | 4355/ 5600 batches | train loss 0.3596121 +| epoch 6 | 4359/ 5600 batches | train loss 0.4135615 +| epoch 6 | 4363/ 5600 batches | train loss 0.3183228 +| epoch 6 | 4367/ 5600 batches | train loss 0.4319327 +| epoch 6 | 4371/ 5600 batches | train loss 0.3393519 +| epoch 6 | 4375/ 5600 batches | train loss 0.4353173 +| epoch 6 | 4379/ 5600 batches | train loss 0.4461423 +| epoch 6 | 4383/ 5600 batches | train loss 0.4124848 +| epoch 6 | 4387/ 5600 batches | train loss 0.3838828 +| epoch 6 | 4391/ 5600 batches | train loss 0.3900368 +| epoch 6 | 4395/ 5600 batches | train loss 0.4074965 +| epoch 6 | 4399/ 5600 batches | train loss 0.4022453 +| epoch 6 | 4403/ 5600 batches | train loss 0.2426844 +| epoch 6 | 4407/ 5600 batches | train loss 0.4131160 +| epoch 6 | 4411/ 5600 batches | train loss 0.4212482 +| epoch 6 | 4415/ 5600 batches | train loss 0.3579131 +| epoch 6 | 4419/ 5600 batches | train loss 0.3778140 +| epoch 6 | 4423/ 5600 batches | train loss 0.4015070 +| epoch 6 | 4427/ 5600 batches | train loss 0.3456198 +| epoch 6 | 4431/ 5600 batches | train loss 0.3745520 +| epoch 6 | 4435/ 5600 batches | train loss 0.3778664 +| epoch 6 | 4439/ 5600 batches | train loss 0.3347087 +| epoch 6 | 4443/ 5600 batches | train loss 0.4147978 +| epoch 6 | 4447/ 5600 batches | train loss 0.3751961 +| epoch 6 | 4451/ 5600 batches | train loss 0.3544753 +| epoch 6 | 4455/ 5600 batches | train loss 0.4360531 +| epoch 6 | 4459/ 5600 batches | train loss 0.3864582 +| epoch 6 | 4463/ 5600 batches | train loss 0.4382212 +| epoch 6 | 4467/ 5600 batches | train loss 0.3999063 +| epoch 6 | 4471/ 5600 batches | train loss 0.3374558 +| epoch 6 | 4475/ 5600 batches | train loss 0.4376301 +| epoch 6 | 4479/ 5600 batches | train loss 0.3803498 +| epoch 6 | 4483/ 5600 batches | train loss 0.3524978 +| epoch 6 | 4487/ 5600 batches | train loss 0.4285623 +| epoch 6 | 4491/ 5600 batches | train loss 0.4452940 +| epoch 6 | 4495/ 5600 batches | train loss 0.4186505 +| epoch 6 | 4499/ 5600 batches | train loss 0.3889924 +| epoch 6 | 4503/ 5600 batches | train loss 0.3785976 +| epoch 6 | 4507/ 5600 batches | train loss 0.3803702 +| epoch 6 | 4511/ 5600 batches | train loss 0.4182502 +| epoch 6 | 4515/ 5600 batches | train loss 0.4127614 +| epoch 6 | 4519/ 5600 batches | train loss 0.4052494 +| epoch 6 | 4523/ 5600 batches | train loss 0.4035742 +| epoch 6 | 4527/ 5600 batches | train loss 0.3183585 +| epoch 6 | 4531/ 5600 batches | train loss 0.3152898 +| epoch 6 | 4535/ 5600 batches | train loss 0.3883017 +| epoch 6 | 4539/ 5600 batches | train loss 0.2368997 +| epoch 6 | 4543/ 5600 batches | train loss 0.3866609 +| epoch 6 | 4547/ 5600 batches | train loss 0.3510467 +| epoch 6 | 4551/ 5600 batches | train loss 0.3757698 +| epoch 6 | 4555/ 5600 batches | train loss 0.3978922 +| epoch 6 | 4559/ 5600 batches | train loss 0.3971674 +| epoch 6 | 4563/ 5600 batches | train loss 0.3753682 +| epoch 6 | 4567/ 5600 batches | train loss 0.3857504 +| epoch 6 | 4571/ 5600 batches | train loss 0.3648739 +| epoch 6 | 4575/ 5600 batches | train loss 0.4507649 +| epoch 6 | 4579/ 5600 batches | train loss 0.3998391 +| epoch 6 | 4583/ 5600 batches | train loss 0.4683705 +| epoch 6 | 4587/ 5600 batches | train loss 0.3655298 +| epoch 6 | 4591/ 5600 batches | train loss 0.3862708 +| epoch 6 | 4595/ 5600 batches | train loss 0.3996046 +| epoch 6 | 4599/ 5600 batches | train loss 0.3659449 +| epoch 6 | 4603/ 5600 batches | train loss 0.3958466 +| epoch 6 | 4607/ 5600 batches | train loss 0.3226326 +| epoch 6 | 4611/ 5600 batches | train loss 0.3210680 +| epoch 6 | 4615/ 5600 batches | train loss 0.4009230 +| epoch 6 | 4619/ 5600 batches | train loss 0.3477730 +| epoch 6 | 4623/ 5600 batches | train loss 0.3518984 +| epoch 6 | 4627/ 5600 batches | train loss 0.3931780 +| epoch 6 | 4631/ 5600 batches | train loss 0.3738842 +| epoch 6 | 4635/ 5600 batches | train loss 0.3836021 +| epoch 6 | 4639/ 5600 batches | train loss 0.4231903 +| epoch 6 | 4643/ 5600 batches | train loss 0.3911550 +| epoch 6 | 4647/ 5600 batches | train loss 0.3559051 +| epoch 6 | 4651/ 5600 batches | train loss 0.3707041 +| epoch 6 | 4655/ 5600 batches | train loss 0.2862494 +| epoch 6 | 4659/ 5600 batches | train loss 0.3951985 +| epoch 6 | 4663/ 5600 batches | train loss 0.3870600 +| epoch 6 | 4667/ 5600 batches | train loss 0.4109052 +| epoch 6 | 4671/ 5600 batches | train loss 0.3285594 +| epoch 6 | 4675/ 5600 batches | train loss 0.4166854 +| epoch 6 | 4679/ 5600 batches | train loss 0.3880826 +| epoch 6 | 4683/ 5600 batches | train loss 0.3596929 +| epoch 6 | 4687/ 5600 batches | train loss 0.3536000 +| epoch 6 | 4691/ 5600 batches | train loss 0.4027423 +| epoch 6 | 4695/ 5600 batches | train loss 0.3627227 +| epoch 6 | 4699/ 5600 batches | train loss 0.3971487 +| epoch 6 | 4703/ 5600 batches | train loss 0.4114263 +| epoch 6 | 4707/ 5600 batches | train loss 0.3984499 +| epoch 6 | 4711/ 5600 batches | train loss 0.3813447 +| epoch 6 | 4715/ 5600 batches | train loss 0.3907489 +| epoch 6 | 4719/ 5600 batches | train loss 0.4510961 +| epoch 6 | 4723/ 5600 batches | train loss 0.4073455 +| epoch 6 | 4727/ 5600 batches | train loss 0.3653516 +| epoch 6 | 4731/ 5600 batches | train loss 0.4182029 +| epoch 6 | 4735/ 5600 batches | train loss 0.3769964 +| epoch 6 | 4739/ 5600 batches | train loss 0.4104856 +| epoch 6 | 4743/ 5600 batches | train loss 0.4483479 +| epoch 6 | 4747/ 5600 batches | train loss 0.4662901 +| epoch 6 | 4751/ 5600 batches | train loss 0.3427802 +| epoch 6 | 4755/ 5600 batches | train loss 0.3783588 +| epoch 6 | 4759/ 5600 batches | train loss 0.3900844 +| epoch 6 | 4763/ 5600 batches | train loss 0.3697047 +| epoch 6 | 4767/ 5600 batches | train loss 0.4829871 +| epoch 6 | 4771/ 5600 batches | train loss 0.2445436 +| epoch 6 | 4775/ 5600 batches | train loss 0.3714974 +| epoch 6 | 4779/ 5600 batches | train loss 0.3775330 +| epoch 6 | 4783/ 5600 batches | train loss 0.3466691 +| epoch 6 | 4787/ 5600 batches | train loss 0.3539462 +| epoch 6 | 4791/ 5600 batches | train loss 0.3767698 +| epoch 6 | 4795/ 5600 batches | train loss 0.4419737 +| epoch 6 | 4799/ 5600 batches | train loss 0.4241264 +| epoch 6 | 4803/ 5600 batches | train loss 0.3315256 +| epoch 6 | 4807/ 5600 batches | train loss 0.3625317 +| epoch 6 | 4811/ 5600 batches | train loss 0.4255548 +| epoch 6 | 4815/ 5600 batches | train loss 0.3404604 +| epoch 6 | 4819/ 5600 batches | train loss 0.4610407 +| epoch 6 | 4823/ 5600 batches | train loss 0.3583063 +| epoch 6 | 4827/ 5600 batches | train loss 0.4250138 +| epoch 6 | 4831/ 5600 batches | train loss 0.3310032 +| epoch 6 | 4835/ 5600 batches | train loss 0.4173672 +| epoch 6 | 4839/ 5600 batches | train loss 0.3941284 +| epoch 6 | 4843/ 5600 batches | train loss 0.3885147 +| epoch 6 | 4847/ 5600 batches | train loss 0.4794479 +| epoch 6 | 4851/ 5600 batches | train loss 0.4418163 +| epoch 6 | 4855/ 5600 batches | train loss 0.3875105 +| epoch 6 | 4859/ 5600 batches | train loss 0.3686275 +| epoch 6 | 4863/ 5600 batches | train loss 0.4342138 +| epoch 6 | 4867/ 5600 batches | train loss 0.3432363 +| epoch 6 | 4871/ 5600 batches | train loss 0.4134070 +| epoch 6 | 4875/ 5600 batches | train loss 0.3448461 +| epoch 6 | 4879/ 5600 batches | train loss 0.3972375 +| epoch 6 | 4883/ 5600 batches | train loss 0.4154665 +| epoch 6 | 4887/ 5600 batches | train loss 0.2852226 +| epoch 6 | 4891/ 5600 batches | train loss 0.3841837 +| epoch 6 | 4895/ 5600 batches | train loss 0.4070731 +| epoch 6 | 4899/ 5600 batches | train loss 0.3083292 +| epoch 6 | 4903/ 5600 batches | train loss 0.4097580 +| epoch 6 | 4907/ 5600 batches | train loss 0.4050516 +| epoch 6 | 4911/ 5600 batches | train loss 0.3321096 +| epoch 6 | 4915/ 5600 batches | train loss 0.3933943 +| epoch 6 | 4919/ 5600 batches | train loss 0.3677518 +| epoch 6 | 4923/ 5600 batches | train loss 0.3973115 +| epoch 6 | 4927/ 5600 batches | train loss 0.4495035 +| epoch 6 | 4931/ 5600 batches | train loss 0.4094352 +| epoch 6 | 4935/ 5600 batches | train loss 0.3760419 +| epoch 6 | 4939/ 5600 batches | train loss 0.3710720 +| epoch 6 | 4943/ 5600 batches | train loss 0.3949412 +| epoch 6 | 4947/ 5600 batches | train loss 0.3408561 +| epoch 6 | 4951/ 5600 batches | train loss 0.4105094 +| epoch 6 | 4955/ 5600 batches | train loss 0.2510068 +| epoch 6 | 4959/ 5600 batches | train loss 0.4851436 +| epoch 6 | 4963/ 5600 batches | train loss 0.4087948 +| epoch 6 | 4967/ 5600 batches | train loss 0.3914952 +| epoch 6 | 4971/ 5600 batches | train loss 0.2724829 +| epoch 6 | 4975/ 5600 batches | train loss 0.3984528 +| epoch 6 | 4979/ 5600 batches | train loss 0.3771991 +| epoch 6 | 4983/ 5600 batches | train loss 0.4628368 +| epoch 6 | 4987/ 5600 batches | train loss 0.3702704 +| epoch 6 | 4991/ 5600 batches | train loss 0.3899584 +| epoch 6 | 4995/ 5600 batches | train loss 0.3318210 +| epoch 6 | 4999/ 5600 batches | train loss 0.4396452 +| epoch 6 | 5003/ 5600 batches | train loss 0.3339359 +| epoch 6 | 5007/ 5600 batches | train loss 0.3948959 +| epoch 6 | 5011/ 5600 batches | train loss 0.5231713 +| epoch 6 | 5015/ 5600 batches | train loss 0.4589714 +| epoch 6 | 5019/ 5600 batches | train loss 0.3085595 +| epoch 6 | 5023/ 5600 batches | train loss 0.3384213 +| epoch 6 | 5027/ 5600 batches | train loss 0.3689829 +| epoch 6 | 5031/ 5600 batches | train loss 0.3888531 +| epoch 6 | 5035/ 5600 batches | train loss 0.2959192 +| epoch 6 | 5039/ 5600 batches | train loss 0.3686201 +| epoch 6 | 5043/ 5600 batches | train loss 0.1651746 +| epoch 6 | 5047/ 5600 batches | train loss 0.3753059 +| epoch 6 | 5051/ 5600 batches | train loss 0.3827589 +| epoch 6 | 5055/ 5600 batches | train loss 0.4053668 +| epoch 6 | 5059/ 5600 batches | train loss 0.4371988 +| epoch 6 | 5063/ 5600 batches | train loss 0.4383250 +| epoch 6 | 5067/ 5600 batches | train loss 0.3986424 +| epoch 6 | 5071/ 5600 batches | train loss 0.4227171 +| epoch 6 | 5075/ 5600 batches | train loss 0.3399647 +| epoch 6 | 5079/ 5600 batches | train loss 0.3961062 +| epoch 6 | 5083/ 5600 batches | train loss 0.3538097 +| epoch 6 | 5087/ 5600 batches | train loss 0.4324370 +| epoch 6 | 5091/ 5600 batches | train loss 0.3852383 +| epoch 6 | 5095/ 5600 batches | train loss 0.3672618 +| epoch 6 | 5099/ 5600 batches | train loss 0.2825603 +| epoch 6 | 5103/ 5600 batches | train loss 0.3408064 +| epoch 6 | 5107/ 5600 batches | train loss 0.3245589 +| epoch 6 | 5111/ 5600 batches | train loss 0.3927332 +| epoch 6 | 5115/ 5600 batches | train loss 0.4287481 +| epoch 6 | 5119/ 5600 batches | train loss 0.4032788 +| epoch 6 | 5123/ 5600 batches | train loss 0.4251512 +| epoch 6 | 5127/ 5600 batches | train loss 0.3766441 +| epoch 6 | 5131/ 5600 batches | train loss 0.3715802 +| epoch 6 | 5135/ 5600 batches | train loss 0.4426603 +| epoch 6 | 5139/ 5600 batches | train loss 0.3466134 +| epoch 6 | 5143/ 5600 batches | train loss 0.3964273 +| epoch 6 | 5147/ 5600 batches | train loss 0.4157145 +| epoch 6 | 5151/ 5600 batches | train loss 0.4712653 +| epoch 6 | 5155/ 5600 batches | train loss 0.4090536 +| epoch 6 | 5159/ 5600 batches | train loss 0.3262013 +| epoch 6 | 5163/ 5600 batches | train loss 0.5071117 +| epoch 6 | 5167/ 5600 batches | train loss 0.4433094 +| epoch 6 | 5171/ 5600 batches | train loss 0.3907193 +| epoch 6 | 5175/ 5600 batches | train loss 0.3399377 +| epoch 6 | 5179/ 5600 batches | train loss 0.4483833 +| epoch 6 | 5183/ 5600 batches | train loss 0.3580848 +| epoch 6 | 5187/ 5600 batches | train loss 0.3241394 +| epoch 6 | 5191/ 5600 batches | train loss 0.3693008 +| epoch 6 | 5195/ 5600 batches | train loss 0.4011411 +| epoch 6 | 5199/ 5600 batches | train loss 0.3942015 +| epoch 6 | 5203/ 5600 batches | train loss 0.3553572 +| epoch 6 | 5207/ 5600 batches | train loss 0.4009576 +| epoch 6 | 5211/ 5600 batches | train loss 0.4038531 +| epoch 6 | 5215/ 5600 batches | train loss 0.3377155 +| epoch 6 | 5219/ 5600 batches | train loss 0.4711297 +| epoch 6 | 5223/ 5600 batches | train loss 0.3709847 +| epoch 6 | 5227/ 5600 batches | train loss 0.3542925 +| epoch 6 | 5231/ 5600 batches | train loss 0.3656061 +| epoch 6 | 5235/ 5600 batches | train loss 0.4065434 +| epoch 6 | 5239/ 5600 batches | train loss 0.4036916 +| epoch 6 | 5243/ 5600 batches | train loss 0.4934713 +| epoch 6 | 5247/ 5600 batches | train loss 0.4082603 +| epoch 6 | 5251/ 5600 batches | train loss 0.4115708 +| epoch 6 | 5255/ 5600 batches | train loss 0.4454677 +| epoch 6 | 5259/ 5600 batches | train loss 0.4734234 +| epoch 6 | 5263/ 5600 batches | train loss 0.3472853 +| epoch 6 | 5267/ 5600 batches | train loss 0.3759326 +| epoch 6 | 5271/ 5600 batches | train loss 0.4014879 +| epoch 6 | 5275/ 5600 batches | train loss 0.4543015 +| epoch 6 | 5279/ 5600 batches | train loss 0.3353862 +| epoch 6 | 5283/ 5600 batches | train loss 0.2945226 +| epoch 6 | 5287/ 5600 batches | train loss 0.3397053 +| epoch 6 | 5291/ 5600 batches | train loss 0.3698370 +| epoch 6 | 5295/ 5600 batches | train loss 0.3911367 +| epoch 6 | 5299/ 5600 batches | train loss 0.4081149 +| epoch 6 | 5303/ 5600 batches | train loss 0.4162547 +| epoch 6 | 5307/ 5600 batches | train loss 0.3114139 +| epoch 6 | 5311/ 5600 batches | train loss 0.4480568 +| epoch 6 | 5315/ 5600 batches | train loss 0.3960301 +| epoch 6 | 5319/ 5600 batches | train loss 0.3322543 +| epoch 6 | 5323/ 5600 batches | train loss 0.3741826 +| epoch 6 | 5327/ 5600 batches | train loss 0.3322775 +| epoch 6 | 5331/ 5600 batches | train loss 0.4583483 +| epoch 6 | 5335/ 5600 batches | train loss 0.4366198 +| epoch 6 | 5339/ 5600 batches | train loss 0.3533985 +| epoch 6 | 5343/ 5600 batches | train loss 0.3770083 +| epoch 6 | 5347/ 5600 batches | train loss 0.3678035 +| epoch 6 | 5351/ 5600 batches | train loss 0.3968531 +| epoch 6 | 5355/ 5600 batches | train loss 0.3639940 +| epoch 6 | 5359/ 5600 batches | train loss 0.3902107 +| epoch 6 | 5363/ 5600 batches | train loss 0.4356470 +| epoch 6 | 5367/ 5600 batches | train loss 0.4765531 +| epoch 6 | 5371/ 5600 batches | train loss 0.4005078 +| epoch 6 | 5375/ 5600 batches | train loss 0.3476183 +| epoch 6 | 5379/ 5600 batches | train loss 0.3511004 +| epoch 6 | 5383/ 5600 batches | train loss 0.3644282 +| epoch 6 | 5387/ 5600 batches | train loss 0.4548525 +| epoch 6 | 5391/ 5600 batches | train loss 0.3430833 +| epoch 6 | 5395/ 5600 batches | train loss 0.2995445 +| epoch 6 | 5399/ 5600 batches | train loss 0.3505396 +| epoch 6 | 5403/ 5600 batches | train loss 0.3679093 +| epoch 6 | 5407/ 5600 batches | train loss 0.3467836 +| epoch 6 | 5411/ 5600 batches | train loss 0.3864928 +| epoch 6 | 5415/ 5600 batches | train loss 0.3306838 +| epoch 6 | 5419/ 5600 batches | train loss 0.3920439 +| epoch 6 | 5423/ 5600 batches | train loss 0.4374300 +| epoch 6 | 5427/ 5600 batches | train loss 0.3361704 +| epoch 6 | 5431/ 5600 batches | train loss 0.4168930 +| epoch 6 | 5435/ 5600 batches | train loss 0.4199919 +| epoch 6 | 5439/ 5600 batches | train loss 0.3858464 +| epoch 6 | 5443/ 5600 batches | train loss 0.3187853 +| epoch 6 | 5447/ 5600 batches | train loss 0.3998044 +| epoch 6 | 5451/ 5600 batches | train loss 0.3433589 +| epoch 6 | 5455/ 5600 batches | train loss 0.3883138 +| epoch 6 | 5459/ 5600 batches | train loss 0.4157273 +| epoch 6 | 5463/ 5600 batches | train loss 0.4416899 +| epoch 6 | 5467/ 5600 batches | train loss 0.4401953 +| epoch 6 | 5471/ 5600 batches | train loss 0.3616734 +| epoch 6 | 5475/ 5600 batches | train loss 0.3113140 +| epoch 6 | 5479/ 5600 batches | train loss 0.3448038 +| epoch 6 | 5483/ 5600 batches | train loss 0.3128497 +| epoch 6 | 5487/ 5600 batches | train loss 0.3301814 +| epoch 6 | 5491/ 5600 batches | train loss 0.3550997 +| epoch 6 | 5495/ 5600 batches | train loss 0.3290747 +| epoch 6 | 5499/ 5600 batches | train loss 0.3816155 +| epoch 6 | 5503/ 5600 batches | train loss 0.3635746 +| epoch 6 | 5507/ 5600 batches | train loss 0.4204251 +| epoch 6 | 5511/ 5600 batches | train loss 0.3716087 +| epoch 6 | 5515/ 5600 batches | train loss 0.3975386 +| epoch 6 | 5519/ 5600 batches | train loss 0.4067867 +| epoch 6 | 5523/ 5600 batches | train loss 0.4228932 +| epoch 6 | 5527/ 5600 batches | train loss 0.3671919 +| epoch 6 | 5531/ 5600 batches | train loss 0.3812105 +| epoch 6 | 5535/ 5600 batches | train loss 0.4175417 +| epoch 6 | 5539/ 5600 batches | train loss 0.3218566 +| epoch 6 | 5543/ 5600 batches | train loss 0.4008225 +| epoch 6 | 5547/ 5600 batches | train loss 0.4088737 +| epoch 6 | 5551/ 5600 batches | train loss 0.5340939 +| epoch 6 | 5555/ 5600 batches | train loss 0.4074777 +| epoch 6 | 5559/ 5600 batches | train loss 0.3905498 +| epoch 6 | 5563/ 5600 batches | train loss 0.3529570 +| epoch 6 | 5567/ 5600 batches | train loss 0.3881623 +| epoch 6 | 5571/ 5600 batches | train loss 0.3496367 +| epoch 6 | 5575/ 5600 batches | train loss 0.4043606 +| epoch 6 | 5579/ 5600 batches | train loss 0.4327621 +| epoch 6 | 5583/ 5600 batches | train loss 0.4168770 +| epoch 6 | 5587/ 5600 batches | train loss 0.4362165 +| epoch 6 | 5591/ 5600 batches | train loss 0.3808844 +| epoch 6 | 5595/ 5600 batches | train loss 0.4545479 +| epoch 6 | 5599/ 5600 batches | train loss 0.3887545 +-------------------------------------------------------------------------------- +| epoch 6 | 3/ 5600 batches | test loss 0.5713301 +| epoch 6 | 7/ 5600 batches | test loss 0.4205488 +| epoch 6 | 11/ 5600 batches | test loss 0.4065319 +| epoch 6 | 15/ 5600 batches | test loss 0.3818929 +| epoch 6 | 19/ 5600 batches | test loss 0.4266025 +| epoch 6 | 23/ 5600 batches | test loss 0.3726388 +| epoch 6 | 27/ 5600 batches | test loss 0.4436261 +| epoch 6 | 31/ 5600 batches | test loss 0.3933489 +| epoch 6 | 35/ 5600 batches | test loss 0.3687487 +| epoch 6 | 39/ 5600 batches | test loss 0.4475300 +| epoch 6 | 43/ 5600 batches | test loss 0.5323439 +| epoch 6 | 47/ 5600 batches | test loss 0.4949720 +| epoch 6 | 51/ 5600 batches | test loss 0.4522509 +| epoch 6 | 55/ 5600 batches | test loss 0.3648933 +| epoch 6 | 59/ 5600 batches | test loss 0.3733106 +| epoch 6 | 63/ 5600 batches | test loss 0.3942081 +| epoch 6 | 67/ 5600 batches | test loss 0.4495793 +| epoch 6 | 71/ 5600 batches | test loss 0.3423414 +| epoch 6 | 75/ 5600 batches | test loss 0.5126935 +| epoch 6 | 79/ 5600 batches | test loss 0.3912065 +| epoch 6 | 83/ 5600 batches | test loss 0.4222692 +| epoch 6 | 87/ 5600 batches | test loss 0.3877051 +| epoch 6 | 91/ 5600 batches | test loss 0.5869164 +| epoch 6 | 95/ 5600 batches | test loss 0.5069173 +| epoch 6 | 99/ 5600 batches | test loss 0.4032622 +| epoch 6 | 103/ 5600 batches | test loss 0.4279872 +| epoch 6 | 107/ 5600 batches | test loss 0.3640227 +| epoch 6 | 111/ 5600 batches | test loss 0.3355851 +| epoch 6 | 115/ 5600 batches | test loss 0.5485618 +| epoch 6 | 119/ 5600 batches | test loss 0.4221033 +| epoch 6 | 123/ 5600 batches | test loss 0.3840526 +| epoch 6 | 127/ 5600 batches | test loss 0.4656247 +| epoch 6 | 131/ 5600 batches | test loss 0.4344574 +| epoch 6 | 135/ 5600 batches | test loss 0.3732605 +| epoch 6 | 139/ 5600 batches | test loss 0.3612422 +| epoch 6 | 143/ 5600 batches | test loss 0.4794904 +| epoch 6 | 147/ 5600 batches | test loss 0.4112386 +| epoch 6 | 151/ 5600 batches | test loss 0.4390368 +| epoch 6 | 155/ 5600 batches | test loss 0.4952450 +| epoch 6 | 159/ 5600 batches | test loss 0.3702903 +| epoch 6 | 163/ 5600 batches | test loss 0.3278443 +| epoch 6 | 167/ 5600 batches | test loss 0.4323946 +| epoch 6 | 171/ 5600 batches | test loss 0.4543426 +| epoch 6 | 175/ 5600 batches | test loss 0.4519573 +| epoch 6 | 179/ 5600 batches | test loss 0.5097466 +| epoch 6 | 183/ 5600 batches | test loss 0.2533635 +| epoch 6 | 187/ 5600 batches | test loss 0.4260086 +| epoch 6 | 191/ 5600 batches | test loss 0.5035862 +| epoch 6 | 195/ 5600 batches | test loss 0.3465856 +| epoch 6 | 199/ 5600 batches | test loss 0.3096690 +| epoch 6 | 203/ 5600 batches | test loss 0.4142320 +| epoch 6 | 207/ 5600 batches | test loss 0.5230489 +| epoch 6 | 211/ 5600 batches | test loss 0.4258565 +| epoch 6 | 215/ 5600 batches | test loss 0.5680479 +| epoch 6 | 219/ 5600 batches | test loss 0.4396412 +| epoch 6 | 223/ 5600 batches | test loss 0.4642163 +| epoch 6 | 227/ 5600 batches | test loss 0.3682358 +| epoch 6 | 231/ 5600 batches | test loss 0.4249226 +| epoch 6 | 235/ 5600 batches | test loss 0.7792694 +| epoch 6 | 239/ 5600 batches | test loss 0.3133043 +| epoch 6 | 243/ 5600 batches | test loss 0.4097568 +| epoch 6 | 247/ 5600 batches | test loss 0.4843791 +| epoch 6 | 251/ 5600 batches | test loss 0.5290043 +| epoch 6 | 255/ 5600 batches | test loss 0.4214340 +| epoch 6 | 259/ 5600 batches | test loss 0.4087069 +| epoch 6 | 263/ 5600 batches | test loss 0.4389758 +| epoch 6 | 267/ 5600 batches | test loss 0.5255643 +| epoch 6 | 271/ 5600 batches | test loss 0.4490436 +| epoch 6 | 275/ 5600 batches | test loss 0.4450287 +| epoch 6 | 279/ 5600 batches | test loss 0.7365693 +| epoch 6 | 283/ 5600 batches | test loss 0.3584074 +| epoch 6 | 287/ 5600 batches | test loss 0.5033855 +| epoch 6 | 291/ 5600 batches | test loss 0.4555417 +| epoch 6 | 295/ 5600 batches | test loss 0.4203081 +| epoch 6 | 299/ 5600 batches | test loss 0.4543789 +| epoch 6 | 303/ 5600 batches | test loss 0.3484140 +| epoch 6 | 307/ 5600 batches | test loss 0.4757976 +| epoch 6 | 311/ 5600 batches | test loss 0.4259902 +| epoch 6 | 315/ 5600 batches | test loss 0.4940683 +| epoch 6 | 319/ 5600 batches | test loss 0.4754379 +| epoch 6 | 323/ 5600 batches | test loss 0.4088086 +| epoch 6 | 327/ 5600 batches | test loss 0.5533078 +| epoch 6 | 331/ 5600 batches | test loss 0.4600616 +| epoch 6 | 335/ 5600 batches | test loss 0.3798587 +| epoch 6 | 339/ 5600 batches | test loss 0.3778185 +| epoch 6 | 343/ 5600 batches | test loss 0.3547695 +| epoch 6 | 347/ 5600 batches | test loss 0.4133903 +| epoch 6 | 351/ 5600 batches | test loss 0.3845864 +| epoch 6 | 355/ 5600 batches | test loss 0.3918107 +| epoch 6 | 359/ 5600 batches | test loss 0.5641643 +| epoch 6 | 363/ 5600 batches | test loss 0.3662972 +| epoch 6 | 367/ 5600 batches | test loss 0.3875509 +| epoch 6 | 371/ 5600 batches | test loss 0.4557511 +| epoch 6 | 375/ 5600 batches | test loss 0.3591876 +| epoch 6 | 379/ 5600 batches | test loss 0.4609136 +| epoch 6 | 383/ 5600 batches | test loss 0.3762074 +| epoch 6 | 387/ 5600 batches | test loss 0.3857228 +| epoch 6 | 391/ 5600 batches | test loss 0.5770628 +| epoch 6 | 395/ 5600 batches | test loss 0.4193670 +| epoch 6 | 399/ 5600 batches | test loss 0.5693172 +| epoch 6 | 403/ 5600 batches | test loss 0.4927593 +| epoch 6 | 407/ 5600 batches | test loss 0.4075885 +| epoch 6 | 411/ 5600 batches | test loss 0.4602399 +| epoch 6 | 415/ 5600 batches | test loss 0.3558906 +| epoch 6 | 419/ 5600 batches | test loss 0.3746073 +| epoch 6 | 423/ 5600 batches | test loss 0.4566874 +| epoch 6 | 427/ 5600 batches | test loss 0.3641562 +| epoch 6 | 431/ 5600 batches | test loss 0.4287514 +| epoch 6 | 435/ 5600 batches | test loss 0.3949273 +| epoch 6 | 439/ 5600 batches | test loss 0.3278698 +| epoch 6 | 443/ 5600 batches | test loss 0.4977384 +| epoch 6 | 447/ 5600 batches | test loss 0.4244635 +| epoch 6 | 451/ 5600 batches | test loss 0.4625461 +| epoch 6 | 455/ 5600 batches | test loss 0.3732206 +| epoch 6 | 459/ 5600 batches | test loss 0.4683851 +| epoch 6 | 463/ 5600 batches | test loss 0.3762906 +| epoch 6 | 467/ 5600 batches | test loss 0.6276365 +| epoch 6 | 471/ 5600 batches | test loss 0.4050351 +| epoch 6 | 475/ 5600 batches | test loss 0.5281774 +| epoch 6 | 479/ 5600 batches | test loss 0.3755966 +| epoch 6 | 483/ 5600 batches | test loss 0.4625473 +| epoch 6 | 487/ 5600 batches | test loss 0.5189131 +| epoch 6 | 491/ 5600 batches | test loss 0.6024100 +| epoch 6 | 495/ 5600 batches | test loss 0.4385146 +| epoch 6 | 499/ 5600 batches | test loss 0.2680140 +| epoch 6 | 503/ 5600 batches | test loss 0.4735565 +| epoch 6 | 507/ 5600 batches | test loss 0.4334787 +| epoch 6 | 511/ 5600 batches | test loss 0.4089220 +| epoch 6 | 515/ 5600 batches | test loss 0.3714560 +| epoch 6 | 519/ 5600 batches | test loss 0.4150835 +| epoch 6 | 523/ 5600 batches | test loss 0.4054166 +| epoch 6 | 527/ 5600 batches | test loss 0.3851709 +| epoch 6 | 531/ 5600 batches | test loss 0.3708333 +| epoch 6 | 535/ 5600 batches | test loss 0.4301459 +| epoch 6 | 539/ 5600 batches | test loss 0.3615800 +| epoch 6 | 543/ 5600 batches | test loss 0.4022730 +| epoch 6 | 547/ 5600 batches | test loss 0.4986478 +| epoch 6 | 551/ 5600 batches | test loss 0.3460838 +| epoch 6 | 555/ 5600 batches | test loss 0.4631384 +| epoch 6 | 559/ 5600 batches | test loss 0.4242628 +| epoch 6 | 563/ 5600 batches | test loss 0.4386239 +| epoch 6 | 567/ 5600 batches | test loss 0.4245320 +| epoch 6 | 571/ 5600 batches | test loss 0.4685546 +| epoch 6 | 575/ 5600 batches | test loss 0.3947085 +| epoch 6 | 579/ 5600 batches | test loss 0.5187738 +| epoch 6 | 583/ 5600 batches | test loss 0.5951660 +| epoch 6 | 587/ 5600 batches | test loss 0.3819821 +| epoch 6 | 591/ 5600 batches | test loss 0.3789398 +| epoch 6 | 595/ 5600 batches | test loss 0.4516410 +| epoch 6 | 599/ 5600 batches | test loss 0.4895064 +| epoch 6 | 603/ 5600 batches | test loss 0.4831415 +| epoch 6 | 607/ 5600 batches | test loss 0.3670613 +| epoch 6 | 611/ 5600 batches | test loss 0.4553062 +| epoch 6 | 615/ 5600 batches | test loss 0.4693238 +| epoch 6 | 619/ 5600 batches | test loss 0.4406644 +| epoch 6 | 623/ 5600 batches | test loss 0.3989846 +| epoch 6 | 627/ 5600 batches | test loss 0.4405128 +| epoch 6 | 631/ 5600 batches | test loss 0.4064671 +| epoch 6 | 635/ 5600 batches | test loss 0.4174131 +| epoch 6 | 639/ 5600 batches | test loss 0.5078725 +| epoch 6 | 643/ 5600 batches | test loss 0.5221562 +| epoch 6 | 647/ 5600 batches | test loss 0.4722983 +| epoch 6 | 651/ 5600 batches | test loss 0.4047545 +| epoch 6 | 655/ 5600 batches | test loss 0.4542636 +| epoch 6 | 659/ 5600 batches | test loss 0.5268749 +| epoch 6 | 663/ 5600 batches | test loss 0.4292303 +| epoch 6 | 667/ 5600 batches | test loss 0.3466858 +| epoch 6 | 671/ 5600 batches | test loss 0.4540960 +| epoch 6 | 675/ 5600 batches | test loss 0.4531575 +| epoch 6 | 679/ 5600 batches | test loss 0.3459027 +| epoch 6 | 683/ 5600 batches | test loss 0.4248195 +| epoch 6 | 687/ 5600 batches | test loss 0.4750601 +| epoch 6 | 691/ 5600 batches | test loss 0.4439562 +| epoch 6 | 695/ 5600 batches | test loss 0.5482878 +| epoch 6 | 699/ 5600 batches | test loss 0.4027829 +| epoch 6 | 703/ 5600 batches | test loss 0.3933365 +| epoch 6 | 707/ 5600 batches | test loss 0.4456241 +| epoch 6 | 711/ 5600 batches | test loss 0.3543352 +| epoch 6 | 715/ 5600 batches | test loss 0.3991043 +| epoch 6 | 719/ 5600 batches | test loss 0.4105702 +| epoch 6 | 723/ 5600 batches | test loss 0.3993134 +| epoch 6 | 727/ 5600 batches | test loss 0.5793365 +| epoch 6 | 731/ 5600 batches | test loss 0.3822085 +| epoch 6 | 735/ 5600 batches | test loss 0.3390785 +| epoch 6 | 739/ 5600 batches | test loss 0.3353548 +| epoch 6 | 743/ 5600 batches | test loss 0.4597970 +| epoch 6 | 747/ 5600 batches | test loss 0.4565762 +| epoch 6 | 751/ 5600 batches | test loss 0.4124696 +| epoch 6 | 755/ 5600 batches | test loss 0.4137999 +| epoch 6 | 759/ 5600 batches | test loss 0.3849355 +| epoch 6 | 763/ 5600 batches | test loss 0.4384921 +| epoch 6 | 767/ 5600 batches | test loss 0.4184594 +| epoch 6 | 771/ 5600 batches | test loss 0.5922965 +| epoch 6 | 775/ 5600 batches | test loss 0.3073056 +| epoch 6 | 779/ 5600 batches | test loss 0.4530602 +| epoch 6 | 783/ 5600 batches | test loss 0.5030207 +| epoch 6 | 787/ 5600 batches | test loss 0.4882652 +| epoch 6 | 791/ 5600 batches | test loss 0.5514378 +| epoch 6 | 795/ 5600 batches | test loss 0.3995771 +| epoch 6 | 799/ 5600 batches | test loss 0.3924105 +| epoch 6 | 803/ 5600 batches | test loss 0.4376487 +| epoch 6 | 807/ 5600 batches | test loss 0.4465621 +| epoch 6 | 811/ 5600 batches | test loss 0.3983080 +| epoch 6 | 815/ 5600 batches | test loss 0.4099376 +| epoch 6 | 819/ 5600 batches | test loss 0.5222448 +| epoch 6 | 823/ 5600 batches | test loss 0.3822681 +| epoch 6 | 827/ 5600 batches | test loss 0.4569032 +| epoch 6 | 831/ 5600 batches | test loss 0.3762555 +| epoch 6 | 835/ 5600 batches | test loss 0.4446243 +| epoch 6 | 839/ 5600 batches | test loss 0.5411198 +| epoch 6 | 843/ 5600 batches | test loss 0.5232720 +| epoch 6 | 847/ 5600 batches | test loss 0.4510848 +| epoch 6 | 851/ 5600 batches | test loss 0.4531543 +| epoch 6 | 855/ 5600 batches | test loss 0.4696055 +| epoch 6 | 859/ 5600 batches | test loss 0.4450031 +| epoch 6 | 863/ 5600 batches | test loss 0.3898995 +| epoch 6 | 867/ 5600 batches | test loss 0.4402865 +| epoch 6 | 871/ 5600 batches | test loss 0.4075681 +| epoch 6 | 875/ 5600 batches | test loss 0.3630821 +| epoch 6 | 879/ 5600 batches | test loss 0.4392995 +| epoch 6 | 883/ 5600 batches | test loss 0.5846858 +| epoch 6 | 887/ 5600 batches | test loss 0.4807314 +| epoch 6 | 891/ 5600 batches | test loss 0.5282139 +| epoch 6 | 895/ 5600 batches | test loss 0.4047440 +| epoch 6 | 899/ 5600 batches | test loss 0.4240907 +| epoch 6 | 903/ 5600 batches | test loss 0.3908826 +| epoch 6 | 907/ 5600 batches | test loss 0.4103806 +| epoch 6 | 911/ 5600 batches | test loss 0.4235157 +| epoch 6 | 915/ 5600 batches | test loss 0.4712790 +| epoch 6 | 919/ 5600 batches | test loss 0.3724395 +| epoch 6 | 923/ 5600 batches | test loss 0.3492599 +| epoch 6 | 927/ 5600 batches | test loss 0.3707136 +| epoch 6 | 931/ 5600 batches | test loss 0.4034841 +| epoch 6 | 935/ 5600 batches | test loss 0.3504967 +| epoch 6 | 939/ 5600 batches | test loss 0.3683912 +| epoch 6 | 943/ 5600 batches | test loss 0.4223586 +| epoch 6 | 947/ 5600 batches | test loss 0.4966191 +| epoch 6 | 951/ 5600 batches | test loss 0.3568083 +| epoch 6 | 955/ 5600 batches | test loss 0.3498601 +| epoch 6 | 959/ 5600 batches | test loss 0.3505235 +| epoch 6 | 963/ 5600 batches | test loss 0.3889662 +| epoch 6 | 967/ 5600 batches | test loss 0.4074347 +| epoch 6 | 971/ 5600 batches | test loss 0.4405819 +| epoch 6 | 975/ 5600 batches | test loss 0.3458694 +| epoch 6 | 979/ 5600 batches | test loss 0.3788115 +| epoch 6 | 983/ 5600 batches | test loss 0.4177710 +| epoch 6 | 987/ 5600 batches | test loss 0.4632050 +| epoch 6 | 991/ 5600 batches | test loss 0.4585871 +| epoch 6 | 995/ 5600 batches | test loss 0.3987726 +| epoch 6 | 999/ 5600 batches | test loss 0.4614401 +| epoch 6 | 1003/ 5600 batches | test loss 0.4331605 +| epoch 6 | 1007/ 5600 batches | test loss 0.4235538 +| epoch 6 | 1011/ 5600 batches | test loss 0.4467860 +| epoch 6 | 1015/ 5600 batches | test loss 0.3534872 +| epoch 6 | 1019/ 5600 batches | test loss 0.5406252 +| epoch 6 | 1023/ 5600 batches | test loss 0.3589812 +| epoch 6 | 1027/ 5600 batches | test loss 0.3932078 +| epoch 6 | 1031/ 5600 batches | test loss 0.4391658 +| epoch 6 | 1035/ 5600 batches | test loss 0.4518059 +| epoch 6 | 1039/ 5600 batches | test loss 0.3658863 +| epoch 6 | 1043/ 5600 batches | test loss 0.5423290 +| epoch 6 | 1047/ 5600 batches | test loss 0.4373402 +| epoch 6 | 1051/ 5600 batches | test loss 0.4562292 +| epoch 6 | 1055/ 5600 batches | test loss 0.1733891 +| epoch 6 | 1059/ 5600 batches | test loss 0.5235506 +| epoch 6 | 1063/ 5600 batches | test loss 0.4221325 +| epoch 6 | 1067/ 5600 batches | test loss 0.4341989 +| epoch 6 | 1071/ 5600 batches | test loss 0.3600768 +| epoch 6 | 1075/ 5600 batches | test loss 0.4318892 +| epoch 6 | 1079/ 5600 batches | test loss 0.4989479 +| epoch 6 | 1083/ 5600 batches | test loss 0.3978496 +| epoch 6 | 1087/ 5600 batches | test loss 0.4331107 +| epoch 6 | 1091/ 5600 batches | test loss 0.5031347 +| epoch 6 | 1095/ 5600 batches | test loss 0.5367534 +| epoch 6 | 1099/ 5600 batches | test loss 0.3468849 +| epoch 6 | 1103/ 5600 batches | test loss 0.4506291 +| epoch 6 | 1107/ 5600 batches | test loss 0.3590400 +| epoch 6 | 1111/ 5600 batches | test loss 0.4202861 +| epoch 6 | 1115/ 5600 batches | test loss 0.4272025 +| epoch 6 | 1119/ 5600 batches | test loss 0.3722188 +| epoch 6 | 1123/ 5600 batches | test loss 0.3962528 +| epoch 6 | 1127/ 5600 batches | test loss 0.4593857 +| epoch 6 | 1131/ 5600 batches | test loss 0.4057800 +| epoch 6 | 1135/ 5600 batches | test loss 0.4419583 +| epoch 6 | 1139/ 5600 batches | test loss 0.4178444 +| epoch 6 | 1143/ 5600 batches | test loss 0.4497389 +| epoch 6 | 1147/ 5600 batches | test loss 0.4711676 +| epoch 6 | 1151/ 5600 batches | test loss 0.5448053 +| epoch 6 | 1155/ 5600 batches | test loss 0.4050003 +| epoch 6 | 1159/ 5600 batches | test loss 0.4640679 +| epoch 6 | 1163/ 5600 batches | test loss 0.5111645 +| epoch 6 | 1167/ 5600 batches | test loss 0.4690706 +| epoch 6 | 1171/ 5600 batches | test loss 0.4058626 +| epoch 6 | 1175/ 5600 batches | test loss 0.4234385 +| epoch 6 | 1179/ 5600 batches | test loss 0.5633989 +| epoch 6 | 1183/ 5600 batches | test loss 0.5561283 +| epoch 6 | 1187/ 5600 batches | test loss 0.4625098 +| epoch 6 | 1191/ 5600 batches | test loss 0.3644576 +| epoch 6 | 1195/ 5600 batches | test loss 0.3976952 +| epoch 6 | 1199/ 5600 batches | test loss 0.3360362 +| epoch 6 | 1203/ 5600 batches | test loss 0.4654944 +| epoch 6 | 1207/ 5600 batches | test loss 0.4572174 +| epoch 6 | 1211/ 5600 batches | test loss 0.4436215 +| epoch 6 | 1215/ 5600 batches | test loss 0.4643284 +| epoch 6 | 1219/ 5600 batches | test loss 0.4714868 +| epoch 6 | 1223/ 5600 batches | test loss 0.3851394 +| epoch 6 | 1227/ 5600 batches | test loss 0.5217911 +| epoch 6 | 1231/ 5600 batches | test loss 0.5006942 +| epoch 6 | 1235/ 5600 batches | test loss 0.4745706 +| epoch 6 | 1239/ 5600 batches | test loss 0.5775602 +| epoch 6 | 1243/ 5600 batches | test loss 0.4033204 +| epoch 6 | 1247/ 5600 batches | test loss 0.5224355 +| epoch 6 | 1251/ 5600 batches | test loss 0.3705242 +| epoch 6 | 1255/ 5600 batches | test loss 0.4512295 +| epoch 6 | 1259/ 5600 batches | test loss 0.4988549 +| epoch 6 | 1263/ 5600 batches | test loss 0.3840638 +| epoch 6 | 1267/ 5600 batches | test loss 0.3723950 +| epoch 6 | 1271/ 5600 batches | test loss 0.3045458 +| epoch 6 | 1275/ 5600 batches | test loss 0.5891744 +| epoch 6 | 1279/ 5600 batches | test loss 0.4438691 +| epoch 6 | 1283/ 5600 batches | test loss 0.3526550 +| epoch 6 | 1287/ 5600 batches | test loss 0.3683164 +| epoch 6 | 1291/ 5600 batches | test loss 0.4076372 +| epoch 6 | 1295/ 5600 batches | test loss 0.4205759 +| epoch 6 | 1299/ 5600 batches | test loss 0.3949097 +| epoch 6 | 1303/ 5600 batches | test loss 0.4460977 +| epoch 6 | 1307/ 5600 batches | test loss 0.3706240 +| epoch 6 | 1311/ 5600 batches | test loss 0.4157636 +| epoch 6 | 1315/ 5600 batches | test loss 0.4533214 +| epoch 6 | 1319/ 5600 batches | test loss 0.3793227 +| epoch 6 | 1323/ 5600 batches | test loss 0.4351527 +| epoch 6 | 1327/ 5600 batches | test loss 0.5317363 +| epoch 6 | 1331/ 5600 batches | test loss 0.3837689 +| epoch 6 | 1335/ 5600 batches | test loss 0.4290867 +| epoch 6 | 1339/ 5600 batches | test loss 0.4330129 +| epoch 6 | 1343/ 5600 batches | test loss 0.4498514 +| epoch 6 | 1347/ 5600 batches | test loss 0.4286865 +| epoch 6 | 1351/ 5600 batches | test loss 0.3710323 +| epoch 6 | 1355/ 5600 batches | test loss 0.5241143 +| epoch 6 | 1359/ 5600 batches | test loss 0.5339111 +| epoch 6 | 1363/ 5600 batches | test loss 0.4437978 +| epoch 6 | 1367/ 5600 batches | test loss 0.5583199 +| epoch 6 | 1371/ 5600 batches | test loss 0.4189225 +| epoch 6 | 1375/ 5600 batches | test loss 0.4439711 +| epoch 6 | 1379/ 5600 batches | test loss 0.4303761 +| epoch 6 | 1383/ 5600 batches | test loss 0.4563999 +| epoch 6 | 1387/ 5600 batches | test loss 0.5627440 +| epoch 6 | 1391/ 5600 batches | test loss 0.3129975 +| epoch 6 | 1395/ 5600 batches | test loss 0.4841843 +| epoch 6 | 1399/ 5600 batches | test loss 0.3061938 +| epoch 6 | final test loss 0.4340, do not save model! +-------------------------------------------------------------------------------- +| epoch 7 | 3/ 5600 batches | train loss 0.3798218 +| epoch 7 | 7/ 5600 batches | train loss 0.3710312 +| epoch 7 | 11/ 5600 batches | train loss 0.3805399 +| epoch 7 | 15/ 5600 batches | train loss 0.3449294 +| epoch 7 | 19/ 5600 batches | train loss 0.3673289 +| epoch 7 | 23/ 5600 batches | train loss 0.3881922 +| epoch 7 | 27/ 5600 batches | train loss 0.3177544 +| epoch 7 | 31/ 5600 batches | train loss 0.3947239 +| epoch 7 | 35/ 5600 batches | train loss 0.3216993 +| epoch 7 | 39/ 5600 batches | train loss 0.2985334 +| epoch 7 | 43/ 5600 batches | train loss 0.3196392 +| epoch 7 | 47/ 5600 batches | train loss 0.3296324 +| epoch 7 | 51/ 5600 batches | train loss 0.3087975 +| epoch 7 | 55/ 5600 batches | train loss 0.3588435 +| epoch 7 | 59/ 5600 batches | train loss 0.2999719 +| epoch 7 | 63/ 5600 batches | train loss 0.3299994 +| epoch 7 | 67/ 5600 batches | train loss 0.3798639 +| epoch 7 | 71/ 5600 batches | train loss 0.3408972 +| epoch 7 | 75/ 5600 batches | train loss 0.3310894 +| epoch 7 | 79/ 5600 batches | train loss 0.4412684 +| epoch 7 | 83/ 5600 batches | train loss 0.4054542 +| epoch 7 | 87/ 5600 batches | train loss 0.3508663 +| epoch 7 | 91/ 5600 batches | train loss 0.3216106 +| epoch 7 | 95/ 5600 batches | train loss 0.3976052 +| epoch 7 | 99/ 5600 batches | train loss 0.3056669 +| epoch 7 | 103/ 5600 batches | train loss 0.4031635 +| epoch 7 | 107/ 5600 batches | train loss 0.3455536 +| epoch 7 | 111/ 5600 batches | train loss 0.4069460 +| epoch 7 | 115/ 5600 batches | train loss 0.3118597 +| epoch 7 | 119/ 5600 batches | train loss 0.3159899 +| epoch 7 | 123/ 5600 batches | train loss 0.3790030 +| epoch 7 | 127/ 5600 batches | train loss 0.3745882 +| epoch 7 | 131/ 5600 batches | train loss 0.3160907 +| epoch 7 | 135/ 5600 batches | train loss 0.3461195 +| epoch 7 | 139/ 5600 batches | train loss 0.3854528 +| epoch 7 | 143/ 5600 batches | train loss 0.3891348 +| epoch 7 | 147/ 5600 batches | train loss 0.3452109 +| epoch 7 | 151/ 5600 batches | train loss 0.3438918 +| epoch 7 | 155/ 5600 batches | train loss 0.4007979 +| epoch 7 | 159/ 5600 batches | train loss 0.3756372 +| epoch 7 | 163/ 5600 batches | train loss 0.3892130 +| epoch 7 | 167/ 5600 batches | train loss 0.4326418 +| epoch 7 | 171/ 5600 batches | train loss 0.3120615 +| epoch 7 | 175/ 5600 batches | train loss 0.2284075 +| epoch 7 | 179/ 5600 batches | train loss 0.4062995 +| epoch 7 | 183/ 5600 batches | train loss 0.3794113 +| epoch 7 | 187/ 5600 batches | train loss 0.2293842 +| epoch 7 | 191/ 5600 batches | train loss 0.3655073 +| epoch 7 | 195/ 5600 batches | train loss 0.3973576 +| epoch 7 | 199/ 5600 batches | train loss 0.4087239 +| epoch 7 | 203/ 5600 batches | train loss 0.3990852 +| epoch 7 | 207/ 5600 batches | train loss 0.3615873 +| epoch 7 | 211/ 5600 batches | train loss 0.3405606 +| epoch 7 | 215/ 5600 batches | train loss 0.6131058 +| epoch 7 | 219/ 5600 batches | train loss 0.3311125 +| epoch 7 | 223/ 5600 batches | train loss 0.2948667 +| epoch 7 | 227/ 5600 batches | train loss 0.3728504 +| epoch 7 | 231/ 5600 batches | train loss 0.3931623 +| epoch 7 | 235/ 5600 batches | train loss 0.3657297 +| epoch 7 | 239/ 5600 batches | train loss 0.3847347 +| epoch 7 | 243/ 5600 batches | train loss 0.3149512 +| epoch 7 | 247/ 5600 batches | train loss 0.3560084 +| epoch 7 | 251/ 5600 batches | train loss 0.3670344 +| epoch 7 | 255/ 5600 batches | train loss 0.3818887 +| epoch 7 | 259/ 5600 batches | train loss 0.2939555 +| epoch 7 | 263/ 5600 batches | train loss 0.3807286 +| epoch 7 | 267/ 5600 batches | train loss 0.3101012 +| epoch 7 | 271/ 5600 batches | train loss 0.3762618 +| epoch 7 | 275/ 5600 batches | train loss 0.2998237 +| epoch 7 | 279/ 5600 batches | train loss 0.3311070 +| epoch 7 | 283/ 5600 batches | train loss 0.3682010 +| epoch 7 | 287/ 5600 batches | train loss 0.3441929 +| epoch 7 | 291/ 5600 batches | train loss 0.3330147 +| epoch 7 | 295/ 5600 batches | train loss 0.3578375 +| epoch 7 | 299/ 5600 batches | train loss 0.3446835 +| epoch 7 | 303/ 5600 batches | train loss 0.3248277 +| epoch 7 | 307/ 5600 batches | train loss 0.3961928 +| epoch 7 | 311/ 5600 batches | train loss 0.3429849 +| epoch 7 | 315/ 5600 batches | train loss 0.3198189 +| epoch 7 | 319/ 5600 batches | train loss 0.3717171 +| epoch 7 | 323/ 5600 batches | train loss 0.3808895 +| epoch 7 | 327/ 5600 batches | train loss 0.4155475 +| epoch 7 | 331/ 5600 batches | train loss 0.3197032 +| epoch 7 | 335/ 5600 batches | train loss 0.3211089 +| epoch 7 | 339/ 5600 batches | train loss 0.3269510 +| epoch 7 | 343/ 5600 batches | train loss 0.3138486 +| epoch 7 | 347/ 5600 batches | train loss 0.3830560 +| epoch 7 | 351/ 5600 batches | train loss 0.3716278 +| epoch 7 | 355/ 5600 batches | train loss 0.3062187 +| epoch 7 | 359/ 5600 batches | train loss 0.3248183 +| epoch 7 | 363/ 5600 batches | train loss 0.3452333 +| epoch 7 | 367/ 5600 batches | train loss 0.3435119 +| epoch 7 | 371/ 5600 batches | train loss 0.2345775 +| epoch 7 | 375/ 5600 batches | train loss 0.3202905 +| epoch 7 | 379/ 5600 batches | train loss 0.3527414 +| epoch 7 | 383/ 5600 batches | train loss 0.3997169 +| epoch 7 | 387/ 5600 batches | train loss 0.3881030 +| epoch 7 | 391/ 5600 batches | train loss 0.2184233 +| epoch 7 | 395/ 5600 batches | train loss 0.3143967 +| epoch 7 | 399/ 5600 batches | train loss 0.3811534 +| epoch 7 | 403/ 5600 batches | train loss 0.3517945 +| epoch 7 | 407/ 5600 batches | train loss 0.2088085 +| epoch 7 | 411/ 5600 batches | train loss 0.4012696 +| epoch 7 | 415/ 5600 batches | train loss 0.3802398 +| epoch 7 | 419/ 5600 batches | train loss 0.2822238 +| epoch 7 | 423/ 5600 batches | train loss 0.3248720 +| epoch 7 | 427/ 5600 batches | train loss 0.3496965 +| epoch 7 | 431/ 5600 batches | train loss 0.4273408 +| epoch 7 | 435/ 5600 batches | train loss 0.3483808 +| epoch 7 | 439/ 5600 batches | train loss 0.3453030 +| epoch 7 | 443/ 5600 batches | train loss 0.3580146 +| epoch 7 | 447/ 5600 batches | train loss 0.3145580 +| epoch 7 | 451/ 5600 batches | train loss 0.3535969 +| epoch 7 | 455/ 5600 batches | train loss 0.3958487 +| epoch 7 | 459/ 5600 batches | train loss 0.3363472 +| epoch 7 | 463/ 5600 batches | train loss 0.4072789 +| epoch 7 | 467/ 5600 batches | train loss 0.3357854 +| epoch 7 | 471/ 5600 batches | train loss 0.2708200 +| epoch 7 | 475/ 5600 batches | train loss 0.3987702 +| epoch 7 | 479/ 5600 batches | train loss 0.3655213 +| epoch 7 | 483/ 5600 batches | train loss 0.3392455 +| epoch 7 | 487/ 5600 batches | train loss 0.3610160 +| epoch 7 | 491/ 5600 batches | train loss 0.3516818 +| epoch 7 | 495/ 5600 batches | train loss 0.3445675 +| epoch 7 | 499/ 5600 batches | train loss 0.3686696 +| epoch 7 | 503/ 5600 batches | train loss 0.3749498 +| epoch 7 | 507/ 5600 batches | train loss 0.3227157 +| epoch 7 | 511/ 5600 batches | train loss 0.3563260 +| epoch 7 | 515/ 5600 batches | train loss 0.3647448 +| epoch 7 | 519/ 5600 batches | train loss 0.3569434 +| epoch 7 | 523/ 5600 batches | train loss 0.3631392 +| epoch 7 | 527/ 5600 batches | train loss 0.3838897 +| epoch 7 | 531/ 5600 batches | train loss 0.2937073 +| epoch 7 | 535/ 5600 batches | train loss 0.3163955 +| epoch 7 | 539/ 5600 batches | train loss 0.3723306 +| epoch 7 | 543/ 5600 batches | train loss 0.2813900 +| epoch 7 | 547/ 5600 batches | train loss 0.3371507 +| epoch 7 | 551/ 5600 batches | train loss 0.3755899 +| epoch 7 | 555/ 5600 batches | train loss 0.3044538 +| epoch 7 | 559/ 5600 batches | train loss 0.3644139 +| epoch 7 | 563/ 5600 batches | train loss 0.3264651 +| epoch 7 | 567/ 5600 batches | train loss 0.3460935 +| epoch 7 | 571/ 5600 batches | train loss 0.4290945 +| epoch 7 | 575/ 5600 batches | train loss 0.3259887 +| epoch 7 | 579/ 5600 batches | train loss 0.4012014 +| epoch 7 | 583/ 5600 batches | train loss 0.3417118 +| epoch 7 | 587/ 5600 batches | train loss 0.3636648 +| epoch 7 | 591/ 5600 batches | train loss 0.3579464 +| epoch 7 | 595/ 5600 batches | train loss 0.3264872 +| epoch 7 | 599/ 5600 batches | train loss 0.3589103 +| epoch 7 | 603/ 5600 batches | train loss 0.5248557 +| epoch 7 | 607/ 5600 batches | train loss 0.4266950 +| epoch 7 | 611/ 5600 batches | train loss 0.3513062 +| epoch 7 | 615/ 5600 batches | train loss 0.2576464 +| epoch 7 | 619/ 5600 batches | train loss 0.3921043 +| epoch 7 | 623/ 5600 batches | train loss 0.3953820 +| epoch 7 | 627/ 5600 batches | train loss 0.3317220 +| epoch 7 | 631/ 5600 batches | train loss 0.3490211 +| epoch 7 | 635/ 5600 batches | train loss 0.4201835 +| epoch 7 | 639/ 5600 batches | train loss 0.3781067 +| epoch 7 | 643/ 5600 batches | train loss 0.3475286 +| epoch 7 | 647/ 5600 batches | train loss 0.4090754 +| epoch 7 | 651/ 5600 batches | train loss 0.2280652 +| epoch 7 | 655/ 5600 batches | train loss 0.3614908 +| epoch 7 | 659/ 5600 batches | train loss 0.3613388 +| epoch 7 | 663/ 5600 batches | train loss 0.3607749 +| epoch 7 | 667/ 5600 batches | train loss 0.3549542 +| epoch 7 | 671/ 5600 batches | train loss 0.2945138 +| epoch 7 | 675/ 5600 batches | train loss 0.3200674 +| epoch 7 | 679/ 5600 batches | train loss 0.3127454 +| epoch 7 | 683/ 5600 batches | train loss 0.3503911 +| epoch 7 | 687/ 5600 batches | train loss 0.3307318 +| epoch 7 | 691/ 5600 batches | train loss 0.3481465 +| epoch 7 | 695/ 5600 batches | train loss 0.3503089 +| epoch 7 | 699/ 5600 batches | train loss 0.2864535 +| epoch 7 | 703/ 5600 batches | train loss 0.3859771 +| epoch 7 | 707/ 5600 batches | train loss 0.3998002 +| epoch 7 | 711/ 5600 batches | train loss 0.3250452 +| epoch 7 | 715/ 5600 batches | train loss 0.3084204 +| epoch 7 | 719/ 5600 batches | train loss 0.3512650 +| epoch 7 | 723/ 5600 batches | train loss 0.3763521 +| epoch 7 | 727/ 5600 batches | train loss 0.3299755 +| epoch 7 | 731/ 5600 batches | train loss 0.4001281 +| epoch 7 | 735/ 5600 batches | train loss 0.3781473 +| epoch 7 | 739/ 5600 batches | train loss 0.3166140 +| epoch 7 | 743/ 5600 batches | train loss 0.3538930 +| epoch 7 | 747/ 5600 batches | train loss 0.4040507 +| epoch 7 | 751/ 5600 batches | train loss 0.3115017 +| epoch 7 | 755/ 5600 batches | train loss 0.3603176 +| epoch 7 | 759/ 5600 batches | train loss 0.4296440 +| epoch 7 | 763/ 5600 batches | train loss 0.4080726 +| epoch 7 | 767/ 5600 batches | train loss 0.3390526 +| epoch 7 | 771/ 5600 batches | train loss 0.3983387 +| epoch 7 | 775/ 5600 batches | train loss 0.3879288 +| epoch 7 | 779/ 5600 batches | train loss 0.4224267 +| epoch 7 | 783/ 5600 batches | train loss 0.3429390 +| epoch 7 | 787/ 5600 batches | train loss 0.3272989 +| epoch 7 | 791/ 5600 batches | train loss 0.3225263 +| epoch 7 | 795/ 5600 batches | train loss 0.3791001 +| epoch 7 | 799/ 5600 batches | train loss 0.3623854 +| epoch 7 | 803/ 5600 batches | train loss 0.3282293 +| epoch 7 | 807/ 5600 batches | train loss 0.4004054 +| epoch 7 | 811/ 5600 batches | train loss 0.3593290 +| epoch 7 | 815/ 5600 batches | train loss 0.3639892 +| epoch 7 | 819/ 5600 batches | train loss 0.3850442 +| epoch 7 | 823/ 5600 batches | train loss 0.4561617 +| epoch 7 | 827/ 5600 batches | train loss 0.3423256 +| epoch 7 | 831/ 5600 batches | train loss 0.4189956 +| epoch 7 | 835/ 5600 batches | train loss 0.3427562 +| epoch 7 | 839/ 5600 batches | train loss 0.3607877 +| epoch 7 | 843/ 5600 batches | train loss 0.3191163 +| epoch 7 | 847/ 5600 batches | train loss 0.2889326 +| epoch 7 | 851/ 5600 batches | train loss 0.3351181 +| epoch 7 | 855/ 5600 batches | train loss 0.3805388 +| epoch 7 | 859/ 5600 batches | train loss 0.3810301 +| epoch 7 | 863/ 5600 batches | train loss 0.3255839 +| epoch 7 | 867/ 5600 batches | train loss 0.3901498 +| epoch 7 | 871/ 5600 batches | train loss 0.3563491 +| epoch 7 | 875/ 5600 batches | train loss 0.3486037 +| epoch 7 | 879/ 5600 batches | train loss 0.3942122 +| epoch 7 | 883/ 5600 batches | train loss 0.3476620 +| epoch 7 | 887/ 5600 batches | train loss 0.3808523 +| epoch 7 | 891/ 5600 batches | train loss 0.3791468 +| epoch 7 | 895/ 5600 batches | train loss 0.3282275 +| epoch 7 | 899/ 5600 batches | train loss 0.3398289 +| epoch 7 | 903/ 5600 batches | train loss 0.2805861 +| epoch 7 | 907/ 5600 batches | train loss 0.3984888 +| epoch 7 | 911/ 5600 batches | train loss 0.3623309 +| epoch 7 | 915/ 5600 batches | train loss 0.3694217 +| epoch 7 | 919/ 5600 batches | train loss 0.3054900 +| epoch 7 | 923/ 5600 batches | train loss 0.3225561 +| epoch 7 | 927/ 5600 batches | train loss 0.2816625 +| epoch 7 | 931/ 5600 batches | train loss 0.3747888 +| epoch 7 | 935/ 5600 batches | train loss 0.3578992 +| epoch 7 | 939/ 5600 batches | train loss 0.3792423 +| epoch 7 | 943/ 5600 batches | train loss 0.3225954 +| epoch 7 | 947/ 5600 batches | train loss 0.3848881 +| epoch 7 | 951/ 5600 batches | train loss 0.3500653 +| epoch 7 | 955/ 5600 batches | train loss 0.3308333 +| epoch 7 | 959/ 5600 batches | train loss 0.3703266 +| epoch 7 | 963/ 5600 batches | train loss 0.3799930 +| epoch 7 | 967/ 5600 batches | train loss 0.3700685 +| epoch 7 | 971/ 5600 batches | train loss 0.4648659 +| epoch 7 | 975/ 5600 batches | train loss 0.4271050 +| epoch 7 | 979/ 5600 batches | train loss 0.3620064 +| epoch 7 | 983/ 5600 batches | train loss 0.3943470 +| epoch 7 | 987/ 5600 batches | train loss 0.3972909 +| epoch 7 | 991/ 5600 batches | train loss 0.4456323 +| epoch 7 | 995/ 5600 batches | train loss 0.3986980 +| epoch 7 | 999/ 5600 batches | train loss 0.3761246 +| epoch 7 | 1003/ 5600 batches | train loss 0.3869472 +| epoch 7 | 1007/ 5600 batches | train loss 0.3292799 +| epoch 7 | 1011/ 5600 batches | train loss 0.3525722 +| epoch 7 | 1015/ 5600 batches | train loss 0.3737213 +| epoch 7 | 1019/ 5600 batches | train loss 0.3205732 +| epoch 7 | 1023/ 5600 batches | train loss 0.4191205 +| epoch 7 | 1027/ 5600 batches | train loss 0.3947059 +| epoch 7 | 1031/ 5600 batches | train loss 0.3899478 +| epoch 7 | 1035/ 5600 batches | train loss 0.3945630 +| epoch 7 | 1039/ 5600 batches | train loss 0.3683453 +| epoch 7 | 1043/ 5600 batches | train loss 0.3339600 +| epoch 7 | 1047/ 5600 batches | train loss 0.3747563 +| epoch 7 | 1051/ 5600 batches | train loss 0.3191889 +| epoch 7 | 1055/ 5600 batches | train loss 0.2886470 +| epoch 7 | 1059/ 5600 batches | train loss 0.4016195 +| epoch 7 | 1063/ 5600 batches | train loss 0.3550926 +| epoch 7 | 1067/ 5600 batches | train loss 0.3457449 +| epoch 7 | 1071/ 5600 batches | train loss 0.3808427 +| epoch 7 | 1075/ 5600 batches | train loss 0.3219182 +| epoch 7 | 1079/ 5600 batches | train loss 0.4176623 +| epoch 7 | 1083/ 5600 batches | train loss 0.3516080 +| epoch 7 | 1087/ 5600 batches | train loss 0.3896746 +| epoch 7 | 1091/ 5600 batches | train loss 0.3704385 +| epoch 7 | 1095/ 5600 batches | train loss 0.3481055 +| epoch 7 | 1099/ 5600 batches | train loss 0.3594208 +| epoch 7 | 1103/ 5600 batches | train loss 0.3235983 +| epoch 7 | 1107/ 5600 batches | train loss 0.3937524 +| epoch 7 | 1111/ 5600 batches | train loss 0.3680418 +| epoch 7 | 1115/ 5600 batches | train loss 0.3518983 +| epoch 7 | 1119/ 5600 batches | train loss 0.3642238 +| epoch 7 | 1123/ 5600 batches | train loss 0.3016014 +| epoch 7 | 1127/ 5600 batches | train loss 0.4233084 +| epoch 7 | 1131/ 5600 batches | train loss 0.3474646 +| epoch 7 | 1135/ 5600 batches | train loss 0.3958638 +| epoch 7 | 1139/ 5600 batches | train loss 0.3357771 +| epoch 7 | 1143/ 5600 batches | train loss 0.3999870 +| epoch 7 | 1147/ 5600 batches | train loss 0.2650829 +| epoch 7 | 1151/ 5600 batches | train loss 0.3304247 +| epoch 7 | 1155/ 5600 batches | train loss 0.3993581 +| epoch 7 | 1159/ 5600 batches | train loss 0.3609429 +| epoch 7 | 1163/ 5600 batches | train loss 0.3391161 +| epoch 7 | 1167/ 5600 batches | train loss 0.3486751 +| epoch 7 | 1171/ 5600 batches | train loss 0.2960920 +| epoch 7 | 1175/ 5600 batches | train loss 0.3627476 +| epoch 7 | 1179/ 5600 batches | train loss 0.3595681 +| epoch 7 | 1183/ 5600 batches | train loss 0.3880181 +| epoch 7 | 1187/ 5600 batches | train loss 0.4685865 +| epoch 7 | 1191/ 5600 batches | train loss 0.2944505 +| epoch 7 | 1195/ 5600 batches | train loss 0.3710188 +| epoch 7 | 1199/ 5600 batches | train loss 0.3235930 +| epoch 7 | 1203/ 5600 batches | train loss 0.3498827 +| epoch 7 | 1207/ 5600 batches | train loss 0.3451591 +| epoch 7 | 1211/ 5600 batches | train loss 0.3889152 +| epoch 7 | 1215/ 5600 batches | train loss 0.2615285 +| epoch 7 | 1219/ 5600 batches | train loss 0.4081853 +| epoch 7 | 1223/ 5600 batches | train loss 0.3140066 +| epoch 7 | 1227/ 5600 batches | train loss 0.4464009 +| epoch 7 | 1231/ 5600 batches | train loss 0.3634585 +| epoch 7 | 1235/ 5600 batches | train loss 0.3246909 +| epoch 7 | 1239/ 5600 batches | train loss 0.3492933 +| epoch 7 | 1243/ 5600 batches | train loss 0.4034397 +| epoch 7 | 1247/ 5600 batches | train loss 0.3735537 +| epoch 7 | 1251/ 5600 batches | train loss 0.3884350 +| epoch 7 | 1255/ 5600 batches | train loss 0.3477129 +| epoch 7 | 1259/ 5600 batches | train loss 0.3029653 +| epoch 7 | 1263/ 5600 batches | train loss 0.4049770 +| epoch 7 | 1267/ 5600 batches | train loss 0.2799144 +| epoch 7 | 1271/ 5600 batches | train loss 0.3689893 +| epoch 7 | 1275/ 5600 batches | train loss 0.3852066 +| epoch 7 | 1279/ 5600 batches | train loss 0.4383182 +| epoch 7 | 1283/ 5600 batches | train loss 0.3977272 +| epoch 7 | 1287/ 5600 batches | train loss 0.3697901 +| epoch 7 | 1291/ 5600 batches | train loss 0.2962867 +| epoch 7 | 1295/ 5600 batches | train loss 0.4282291 +| epoch 7 | 1299/ 5600 batches | train loss 0.4041703 +| epoch 7 | 1303/ 5600 batches | train loss 0.3948724 +| epoch 7 | 1307/ 5600 batches | train loss 0.3675626 +| epoch 7 | 1311/ 5600 batches | train loss 0.3690764 +| epoch 7 | 1315/ 5600 batches | train loss 0.4770145 +| epoch 7 | 1319/ 5600 batches | train loss 0.3822067 +| epoch 7 | 1323/ 5600 batches | train loss 0.3554614 +| epoch 7 | 1327/ 5600 batches | train loss 0.4283112 +| epoch 7 | 1331/ 5600 batches | train loss 0.3227317 +| epoch 7 | 1335/ 5600 batches | train loss 0.3542202 +| epoch 7 | 1339/ 5600 batches | train loss 0.3097669 +| epoch 7 | 1343/ 5600 batches | train loss 0.3437741 +| epoch 7 | 1347/ 5600 batches | train loss 0.3294210 +| epoch 7 | 1351/ 5600 batches | train loss 0.3399207 +| epoch 7 | 1355/ 5600 batches | train loss 0.4064094 +| epoch 7 | 1359/ 5600 batches | train loss 0.2954514 +| epoch 7 | 1363/ 5600 batches | train loss 0.2757590 +| epoch 7 | 1367/ 5600 batches | train loss 0.3800390 +| epoch 7 | 1371/ 5600 batches | train loss 0.3466175 +| epoch 7 | 1375/ 5600 batches | train loss 0.3224465 +| epoch 7 | 1379/ 5600 batches | train loss 0.3284124 +| epoch 7 | 1383/ 5600 batches | train loss 0.3603751 +| epoch 7 | 1387/ 5600 batches | train loss 0.3961910 +| epoch 7 | 1391/ 5600 batches | train loss 0.3380843 +| epoch 7 | 1395/ 5600 batches | train loss 0.3508087 +| epoch 7 | 1399/ 5600 batches | train loss 0.3633412 +| epoch 7 | 1403/ 5600 batches | train loss 0.3496904 +| epoch 7 | 1407/ 5600 batches | train loss 0.4338592 +| epoch 7 | 1411/ 5600 batches | train loss 0.3200448 +| epoch 7 | 1415/ 5600 batches | train loss 0.3744169 +| epoch 7 | 1419/ 5600 batches | train loss 0.3917451 +| epoch 7 | 1423/ 5600 batches | train loss 0.3838486 +| epoch 7 | 1427/ 5600 batches | train loss 0.3791751 +| epoch 7 | 1431/ 5600 batches | train loss 0.3773197 +| epoch 7 | 1435/ 5600 batches | train loss 0.3702320 +| epoch 7 | 1439/ 5600 batches | train loss 0.4294638 +| epoch 7 | 1443/ 5600 batches | train loss 0.3622470 +| epoch 7 | 1447/ 5600 batches | train loss 0.3596215 +| epoch 7 | 1451/ 5600 batches | train loss 0.3936998 +| epoch 7 | 1455/ 5600 batches | train loss 0.3230585 +| epoch 7 | 1459/ 5600 batches | train loss 0.3759902 +| epoch 7 | 1463/ 5600 batches | train loss 0.4028053 +| epoch 7 | 1467/ 5600 batches | train loss 0.2776452 +| epoch 7 | 1471/ 5600 batches | train loss 0.3380168 +| epoch 7 | 1475/ 5600 batches | train loss 0.3694157 +| epoch 7 | 1479/ 5600 batches | train loss 0.3103528 +| epoch 7 | 1483/ 5600 batches | train loss 0.2907880 +| epoch 7 | 1487/ 5600 batches | train loss 0.4022927 +| epoch 7 | 1491/ 5600 batches | train loss 0.3251594 +| epoch 7 | 1495/ 5600 batches | train loss 0.3502756 +| epoch 7 | 1499/ 5600 batches | train loss 0.4252092 +| epoch 7 | 1503/ 5600 batches | train loss 0.3504858 +| epoch 7 | 1507/ 5600 batches | train loss 0.3300512 +| epoch 7 | 1511/ 5600 batches | train loss 0.3364040 +| epoch 7 | 1515/ 5600 batches | train loss 0.4228219 +| epoch 7 | 1519/ 5600 batches | train loss 0.3815285 +| epoch 7 | 1523/ 5600 batches | train loss 0.3377075 +| epoch 7 | 1527/ 5600 batches | train loss 0.3436928 +| epoch 7 | 1531/ 5600 batches | train loss 0.3594787 +| epoch 7 | 1535/ 5600 batches | train loss 0.4638129 +| epoch 7 | 1539/ 5600 batches | train loss 0.3729807 +| epoch 7 | 1543/ 5600 batches | train loss 0.3968808 +| epoch 7 | 1547/ 5600 batches | train loss 0.3973090 +| epoch 7 | 1551/ 5600 batches | train loss 0.3611054 +| epoch 7 | 1555/ 5600 batches | train loss 0.4736817 +| epoch 7 | 1559/ 5600 batches | train loss 0.3447977 +| epoch 7 | 1563/ 5600 batches | train loss 0.3106293 +| epoch 7 | 1567/ 5600 batches | train loss 0.4172729 +| epoch 7 | 1571/ 5600 batches | train loss 0.3304316 +| epoch 7 | 1575/ 5600 batches | train loss 0.3728814 +| epoch 7 | 1579/ 5600 batches | train loss 0.3818964 +| epoch 7 | 1583/ 5600 batches | train loss 0.3724939 +| epoch 7 | 1587/ 5600 batches | train loss 0.3081345 +| epoch 7 | 1591/ 5600 batches | train loss 0.3625309 +| epoch 7 | 1595/ 5600 batches | train loss 0.3760918 +| epoch 7 | 1599/ 5600 batches | train loss 0.4413611 +| epoch 7 | 1603/ 5600 batches | train loss 0.3749083 +| epoch 7 | 1607/ 5600 batches | train loss 0.4318720 +| epoch 7 | 1611/ 5600 batches | train loss 0.3377196 +| epoch 7 | 1615/ 5600 batches | train loss 0.4164523 +| epoch 7 | 1619/ 5600 batches | train loss 0.3699374 +| epoch 7 | 1623/ 5600 batches | train loss 0.3036602 +| epoch 7 | 1627/ 5600 batches | train loss 0.3816787 +| epoch 7 | 1631/ 5600 batches | train loss 0.3323665 +| epoch 7 | 1635/ 5600 batches | train loss 0.3544948 +| epoch 7 | 1639/ 5600 batches | train loss 0.3390267 +| epoch 7 | 1643/ 5600 batches | train loss 0.3424470 +| epoch 7 | 1647/ 5600 batches | train loss 0.3707214 +| epoch 7 | 1651/ 5600 batches | train loss 0.3980373 +| epoch 7 | 1655/ 5600 batches | train loss 0.3890884 +| epoch 7 | 1659/ 5600 batches | train loss 0.3880449 +| epoch 7 | 1663/ 5600 batches | train loss 0.3837868 +| epoch 7 | 1667/ 5600 batches | train loss 0.4140757 +| epoch 7 | 1671/ 5600 batches | train loss 0.3378894 +| epoch 7 | 1675/ 5600 batches | train loss 0.3728433 +| epoch 7 | 1679/ 5600 batches | train loss 0.3283749 +| epoch 7 | 1683/ 5600 batches | train loss 0.4587907 +| epoch 7 | 1687/ 5600 batches | train loss 0.3772395 +| epoch 7 | 1691/ 5600 batches | train loss 0.3871920 +| epoch 7 | 1695/ 5600 batches | train loss 0.3254653 +| epoch 7 | 1699/ 5600 batches | train loss 0.3757655 +| epoch 7 | 1703/ 5600 batches | train loss 0.3975247 +| epoch 7 | 1707/ 5600 batches | train loss 0.3979177 +| epoch 7 | 1711/ 5600 batches | train loss 0.3484620 +| epoch 7 | 1715/ 5600 batches | train loss 0.3183006 +| epoch 7 | 1719/ 5600 batches | train loss 0.3687188 +| epoch 7 | 1723/ 5600 batches | train loss 0.3529710 +| epoch 7 | 1727/ 5600 batches | train loss 0.3367813 +| epoch 7 | 1731/ 5600 batches | train loss 0.3797911 +| epoch 7 | 1735/ 5600 batches | train loss 0.3800875 +| epoch 7 | 1739/ 5600 batches | train loss 0.4086838 +| epoch 7 | 1743/ 5600 batches | train loss 0.4169617 +| epoch 7 | 1747/ 5600 batches | train loss 0.3176126 +| epoch 7 | 1751/ 5600 batches | train loss 0.3861463 +| epoch 7 | 1755/ 5600 batches | train loss 0.3573555 +| epoch 7 | 1759/ 5600 batches | train loss 0.3491541 +| epoch 7 | 1763/ 5600 batches | train loss 0.4435475 +| epoch 7 | 1767/ 5600 batches | train loss 0.3698097 +| epoch 7 | 1771/ 5600 batches | train loss 0.3830068 +| epoch 7 | 1775/ 5600 batches | train loss 0.3996128 +| epoch 7 | 1779/ 5600 batches | train loss 0.3953532 +| epoch 7 | 1783/ 5600 batches | train loss 0.3282332 +| epoch 7 | 1787/ 5600 batches | train loss 0.3537034 +| epoch 7 | 1791/ 5600 batches | train loss 0.3385341 +| epoch 7 | 1795/ 5600 batches | train loss 0.3998396 +| epoch 7 | 1799/ 5600 batches | train loss 0.3617684 +| epoch 7 | 1803/ 5600 batches | train loss 0.3758903 +| epoch 7 | 1807/ 5600 batches | train loss 0.3898857 +| epoch 7 | 1811/ 5600 batches | train loss 0.3753788 +| epoch 7 | 1815/ 5600 batches | train loss 0.3618194 +| epoch 7 | 1819/ 5600 batches | train loss 0.1582938 +| epoch 7 | 1823/ 5600 batches | train loss 0.3817112 +| epoch 7 | 1827/ 5600 batches | train loss 0.3322735 +| epoch 7 | 1831/ 5600 batches | train loss 0.3772861 +| epoch 7 | 1835/ 5600 batches | train loss 0.3119984 +| epoch 7 | 1839/ 5600 batches | train loss 0.2977102 +| epoch 7 | 1843/ 5600 batches | train loss 0.3084126 +| epoch 7 | 1847/ 5600 batches | train loss 0.2689360 +| epoch 7 | 1851/ 5600 batches | train loss 0.4626104 +| epoch 7 | 1855/ 5600 batches | train loss 0.3587872 +| epoch 7 | 1859/ 5600 batches | train loss 0.3187260 +| epoch 7 | 1863/ 5600 batches | train loss 0.3391295 +| epoch 7 | 1867/ 5600 batches | train loss 0.4255129 +| epoch 7 | 1871/ 5600 batches | train loss 0.3471626 +| epoch 7 | 1875/ 5600 batches | train loss 0.3101928 +| epoch 7 | 1879/ 5600 batches | train loss 0.3966274 +| epoch 7 | 1883/ 5600 batches | train loss 0.3782007 +| epoch 7 | 1887/ 5600 batches | train loss 0.3257924 +| epoch 7 | 1891/ 5600 batches | train loss 0.3896876 +| epoch 7 | 1895/ 5600 batches | train loss 0.3545425 +| epoch 7 | 1899/ 5600 batches | train loss 0.3530034 +| epoch 7 | 1903/ 5600 batches | train loss 0.4053207 +| epoch 7 | 1907/ 5600 batches | train loss 0.4349620 +| epoch 7 | 1911/ 5600 batches | train loss 0.3410339 +| epoch 7 | 1915/ 5600 batches | train loss 0.4242496 +| epoch 7 | 1919/ 5600 batches | train loss 0.3070936 +| epoch 7 | 1923/ 5600 batches | train loss 0.3098877 +| epoch 7 | 1927/ 5600 batches | train loss 0.2983586 +| epoch 7 | 1931/ 5600 batches | train loss 0.3983135 +| epoch 7 | 1935/ 5600 batches | train loss 0.3547169 +| epoch 7 | 1939/ 5600 batches | train loss 0.4021269 +| epoch 7 | 1943/ 5600 batches | train loss 0.3326407 +| epoch 7 | 1947/ 5600 batches | train loss 0.4051176 +| epoch 7 | 1951/ 5600 batches | train loss 0.4254290 +| epoch 7 | 1955/ 5600 batches | train loss 0.3350149 +| epoch 7 | 1959/ 5600 batches | train loss 0.3616478 +| epoch 7 | 1963/ 5600 batches | train loss 0.3363124 +| epoch 7 | 1967/ 5600 batches | train loss 0.4172139 +| epoch 7 | 1971/ 5600 batches | train loss 0.3616925 +| epoch 7 | 1975/ 5600 batches | train loss 0.4013717 +| epoch 7 | 1979/ 5600 batches | train loss 0.3358626 +| epoch 7 | 1983/ 5600 batches | train loss 0.2642040 +| epoch 7 | 1987/ 5600 batches | train loss 0.3493009 +| epoch 7 | 1991/ 5600 batches | train loss 0.3534759 +| epoch 7 | 1995/ 5600 batches | train loss 0.3419700 +| epoch 7 | 1999/ 5600 batches | train loss 0.3601245 +| epoch 7 | 2003/ 5600 batches | train loss 0.3053215 +| epoch 7 | 2007/ 5600 batches | train loss 0.3505720 +| epoch 7 | 2011/ 5600 batches | train loss 0.3772628 +| epoch 7 | 2015/ 5600 batches | train loss 0.4178088 +| epoch 7 | 2019/ 5600 batches | train loss 0.3947151 +| epoch 7 | 2023/ 5600 batches | train loss 0.3806649 +| epoch 7 | 2027/ 5600 batches | train loss 0.4500149 +| epoch 7 | 2031/ 5600 batches | train loss 0.3975638 +| epoch 7 | 2035/ 5600 batches | train loss 0.3817714 +| epoch 7 | 2039/ 5600 batches | train loss 0.3374895 +| epoch 7 | 2043/ 5600 batches | train loss 0.3658916 +| epoch 7 | 2047/ 5600 batches | train loss 0.3204574 +| epoch 7 | 2051/ 5600 batches | train loss 0.2514661 +| epoch 7 | 2055/ 5600 batches | train loss 0.4011945 +| epoch 7 | 2059/ 5600 batches | train loss 0.4097601 +| epoch 7 | 2063/ 5600 batches | train loss 0.3860956 +| epoch 7 | 2067/ 5600 batches | train loss 0.3887995 +| epoch 7 | 2071/ 5600 batches | train loss 0.3681743 +| epoch 7 | 2075/ 5600 batches | train loss 0.3370784 +| epoch 7 | 2079/ 5600 batches | train loss 0.4292312 +| epoch 7 | 2083/ 5600 batches | train loss 0.3132139 +| epoch 7 | 2087/ 5600 batches | train loss 0.3587619 +| epoch 7 | 2091/ 5600 batches | train loss 0.3862745 +| epoch 7 | 2095/ 5600 batches | train loss 0.3367041 +| epoch 7 | 2099/ 5600 batches | train loss 0.3298003 +| epoch 7 | 2103/ 5600 batches | train loss 0.3426686 +| epoch 7 | 2107/ 5600 batches | train loss 0.3467578 +| epoch 7 | 2111/ 5600 batches | train loss 0.3134555 +| epoch 7 | 2115/ 5600 batches | train loss 0.3560583 +| epoch 7 | 2119/ 5600 batches | train loss 0.3933892 +| epoch 7 | 2123/ 5600 batches | train loss 0.4638380 +| epoch 7 | 2127/ 5600 batches | train loss 0.4295749 +| epoch 7 | 2131/ 5600 batches | train loss 0.3760760 +| epoch 7 | 2135/ 5600 batches | train loss 0.3356154 +| epoch 7 | 2139/ 5600 batches | train loss 0.3055792 +| epoch 7 | 2143/ 5600 batches | train loss 0.3145841 +| epoch 7 | 2147/ 5600 batches | train loss 0.3273913 +| epoch 7 | 2151/ 5600 batches | train loss 0.4093455 +| epoch 7 | 2155/ 5600 batches | train loss 0.3442487 +| epoch 7 | 2159/ 5600 batches | train loss 0.2988575 +| epoch 7 | 2163/ 5600 batches | train loss 0.3263364 +| epoch 7 | 2167/ 5600 batches | train loss 0.2942994 +| epoch 7 | 2171/ 5600 batches | train loss 0.3688925 +| epoch 7 | 2175/ 5600 batches | train loss 0.3258205 +| epoch 7 | 2179/ 5600 batches | train loss 0.3180186 +| epoch 7 | 2183/ 5600 batches | train loss 0.3849535 +| epoch 7 | 2187/ 5600 batches | train loss 0.3639499 +| epoch 7 | 2191/ 5600 batches | train loss 0.3149883 +| epoch 7 | 2195/ 5600 batches | train loss 0.3693011 +| epoch 7 | 2199/ 5600 batches | train loss 0.3657732 +| epoch 7 | 2203/ 5600 batches | train loss 0.4575629 +| epoch 7 | 2207/ 5600 batches | train loss 0.3519102 +| epoch 7 | 2211/ 5600 batches | train loss 0.3400835 +| epoch 7 | 2215/ 5600 batches | train loss 0.3602769 +| epoch 7 | 2219/ 5600 batches | train loss 0.3989040 +| epoch 7 | 2223/ 5600 batches | train loss 0.3921748 +| epoch 7 | 2227/ 5600 batches | train loss 0.3865263 +| epoch 7 | 2231/ 5600 batches | train loss 0.3692254 +| epoch 7 | 2235/ 5600 batches | train loss 0.4056093 +| epoch 7 | 2239/ 5600 batches | train loss 0.3304071 +| epoch 7 | 2243/ 5600 batches | train loss 0.4072131 +| epoch 7 | 2247/ 5600 batches | train loss 0.3882078 +| epoch 7 | 2251/ 5600 batches | train loss 0.2939232 +| epoch 7 | 2255/ 5600 batches | train loss 0.3919340 +| epoch 7 | 2259/ 5600 batches | train loss 0.3722231 +| epoch 7 | 2263/ 5600 batches | train loss 0.3564523 +| epoch 7 | 2267/ 5600 batches | train loss 0.3295663 +| epoch 7 | 2271/ 5600 batches | train loss 0.3337642 +| epoch 7 | 2275/ 5600 batches | train loss 0.3841198 +| epoch 7 | 2279/ 5600 batches | train loss 0.3827585 +| epoch 7 | 2283/ 5600 batches | train loss 0.3510538 +| epoch 7 | 2287/ 5600 batches | train loss 0.4107121 +| epoch 7 | 2291/ 5600 batches | train loss 0.3515142 +| epoch 7 | 2295/ 5600 batches | train loss 0.4125363 +| epoch 7 | 2299/ 5600 batches | train loss 0.4266727 +| epoch 7 | 2303/ 5600 batches | train loss 0.3465042 +| epoch 7 | 2307/ 5600 batches | train loss 0.3896065 +| epoch 7 | 2311/ 5600 batches | train loss 0.4257241 +| epoch 7 | 2315/ 5600 batches | train loss 0.3920979 +| epoch 7 | 2319/ 5600 batches | train loss 0.3646353 +| epoch 7 | 2323/ 5600 batches | train loss 0.3927700 +| epoch 7 | 2327/ 5600 batches | train loss 0.4343513 +| epoch 7 | 2331/ 5600 batches | train loss 0.3543964 +| epoch 7 | 2335/ 5600 batches | train loss 0.3813593 +| epoch 7 | 2339/ 5600 batches | train loss 0.2233486 +| epoch 7 | 2343/ 5600 batches | train loss 0.3730931 +| epoch 7 | 2347/ 5600 batches | train loss 0.3352090 +| epoch 7 | 2351/ 5600 batches | train loss 0.3968633 +| epoch 7 | 2355/ 5600 batches | train loss 0.3827097 +| epoch 7 | 2359/ 5600 batches | train loss 0.3498462 +| epoch 7 | 2363/ 5600 batches | train loss 0.3896739 +| epoch 7 | 2367/ 5600 batches | train loss 0.2244891 +| epoch 7 | 2371/ 5600 batches | train loss 0.3929422 +| epoch 7 | 2375/ 5600 batches | train loss 0.3582230 +| epoch 7 | 2379/ 5600 batches | train loss 0.3196792 +| epoch 7 | 2383/ 5600 batches | train loss 0.3742830 +| epoch 7 | 2387/ 5600 batches | train loss 0.3503360 +| epoch 7 | 2391/ 5600 batches | train loss 0.3163521 +| epoch 7 | 2395/ 5600 batches | train loss 0.4013378 +| epoch 7 | 2399/ 5600 batches | train loss 0.4641612 +| epoch 7 | 2403/ 5600 batches | train loss 0.3477672 +| epoch 7 | 2407/ 5600 batches | train loss 0.3674400 +| epoch 7 | 2411/ 5600 batches | train loss 0.4135500 +| epoch 7 | 2415/ 5600 batches | train loss 0.4659345 +| epoch 7 | 2419/ 5600 batches | train loss 0.4015913 +| epoch 7 | 2423/ 5600 batches | train loss 0.3700240 +| epoch 7 | 2427/ 5600 batches | train loss 0.3538069 +| epoch 7 | 2431/ 5600 batches | train loss 0.3453432 +| epoch 7 | 2435/ 5600 batches | train loss 0.3179401 +| epoch 7 | 2439/ 5600 batches | train loss 0.3454084 +| epoch 7 | 2443/ 5600 batches | train loss 0.4285882 +| epoch 7 | 2447/ 5600 batches | train loss 0.3339403 +| epoch 7 | 2451/ 5600 batches | train loss 0.2966202 +| epoch 7 | 2455/ 5600 batches | train loss 0.4032634 +| epoch 7 | 2459/ 5600 batches | train loss 0.3460627 +| epoch 7 | 2463/ 5600 batches | train loss 0.3582556 +| epoch 7 | 2467/ 5600 batches | train loss 0.3541931 +| epoch 7 | 2471/ 5600 batches | train loss 0.3293594 +| epoch 7 | 2475/ 5600 batches | train loss 0.3806922 +| epoch 7 | 2479/ 5600 batches | train loss 0.3862029 +| epoch 7 | 2483/ 5600 batches | train loss 0.3697444 +| epoch 7 | 2487/ 5600 batches | train loss 0.3641821 +| epoch 7 | 2491/ 5600 batches | train loss 0.3474795 +| epoch 7 | 2495/ 5600 batches | train loss 0.3572176 +| epoch 7 | 2499/ 5600 batches | train loss 0.3385579 +| epoch 7 | 2503/ 5600 batches | train loss 0.3441785 +| epoch 7 | 2507/ 5600 batches | train loss 0.3731752 +| epoch 7 | 2511/ 5600 batches | train loss 0.3587777 +| epoch 7 | 2515/ 5600 batches | train loss 0.3585876 +| epoch 7 | 2519/ 5600 batches | train loss 0.3368003 +| epoch 7 | 2523/ 5600 batches | train loss 0.3761552 +| epoch 7 | 2527/ 5600 batches | train loss 0.2695804 +| epoch 7 | 2531/ 5600 batches | train loss 0.4074376 +| epoch 7 | 2535/ 5600 batches | train loss 0.3556970 +| epoch 7 | 2539/ 5600 batches | train loss 0.3717761 +| epoch 7 | 2543/ 5600 batches | train loss 0.3610491 +| epoch 7 | 2547/ 5600 batches | train loss 0.3719895 +| epoch 7 | 2551/ 5600 batches | train loss 0.4490156 +| epoch 7 | 2555/ 5600 batches | train loss 0.3301997 +| epoch 7 | 2559/ 5600 batches | train loss 0.3763639 +| epoch 7 | 2563/ 5600 batches | train loss 0.3252097 +| epoch 7 | 2567/ 5600 batches | train loss 0.2963702 +| epoch 7 | 2571/ 5600 batches | train loss 0.3902196 +| epoch 7 | 2575/ 5600 batches | train loss 0.4218673 +| epoch 7 | 2579/ 5600 batches | train loss 0.3264910 +| epoch 7 | 2583/ 5600 batches | train loss 0.3766838 +| epoch 7 | 2587/ 5600 batches | train loss 0.5064059 +| epoch 7 | 2591/ 5600 batches | train loss 0.2265940 +| epoch 7 | 2595/ 5600 batches | train loss 0.3877200 +| epoch 7 | 2599/ 5600 batches | train loss 0.1530899 +| epoch 7 | 2603/ 5600 batches | train loss 0.3231120 +| epoch 7 | 2607/ 5600 batches | train loss 0.3953522 +| epoch 7 | 2611/ 5600 batches | train loss 0.3559062 +| epoch 7 | 2615/ 5600 batches | train loss 0.4280798 +| epoch 7 | 2619/ 5600 batches | train loss 0.3893204 +| epoch 7 | 2623/ 5600 batches | train loss 0.1447378 +| epoch 7 | 2627/ 5600 batches | train loss 0.4212418 +| epoch 7 | 2631/ 5600 batches | train loss 0.3505094 +| epoch 7 | 2635/ 5600 batches | train loss 0.3900852 +| epoch 7 | 2639/ 5600 batches | train loss 0.3127699 +| epoch 7 | 2643/ 5600 batches | train loss 0.2899297 +| epoch 7 | 2647/ 5600 batches | train loss 0.4065120 +| epoch 7 | 2651/ 5600 batches | train loss 0.4070588 +| epoch 7 | 2655/ 5600 batches | train loss 0.4488233 +| epoch 7 | 2659/ 5600 batches | train loss 0.3269068 +| epoch 7 | 2663/ 5600 batches | train loss 0.3661777 +| epoch 7 | 2667/ 5600 batches | train loss 0.3980086 +| epoch 7 | 2671/ 5600 batches | train loss 0.4403266 +| epoch 7 | 2675/ 5600 batches | train loss 0.3774536 +| epoch 7 | 2679/ 5600 batches | train loss 0.3236020 +| epoch 7 | 2683/ 5600 batches | train loss 0.4022632 +| epoch 7 | 2687/ 5600 batches | train loss 0.2996895 +| epoch 7 | 2691/ 5600 batches | train loss 0.3798024 +| epoch 7 | 2695/ 5600 batches | train loss 0.3997206 +| epoch 7 | 2699/ 5600 batches | train loss 0.4185466 +| epoch 7 | 2703/ 5600 batches | train loss 0.4389154 +| epoch 7 | 2707/ 5600 batches | train loss 0.3234009 +| epoch 7 | 2711/ 5600 batches | train loss 0.3766250 +| epoch 7 | 2715/ 5600 batches | train loss 0.3287916 +| epoch 7 | 2719/ 5600 batches | train loss 0.3631596 +| epoch 7 | 2723/ 5600 batches | train loss 0.3571529 +| epoch 7 | 2727/ 5600 batches | train loss 0.3084655 +| epoch 7 | 2731/ 5600 batches | train loss 0.3632497 +| epoch 7 | 2735/ 5600 batches | train loss 0.3883570 +| epoch 7 | 2739/ 5600 batches | train loss 0.3303583 +| epoch 7 | 2743/ 5600 batches | train loss 0.4118210 +| epoch 7 | 2747/ 5600 batches | train loss 0.3249537 +| epoch 7 | 2751/ 5600 batches | train loss 0.3988177 +| epoch 7 | 2755/ 5600 batches | train loss 0.3131796 +| epoch 7 | 2759/ 5600 batches | train loss 0.3716325 +| epoch 7 | 2763/ 5600 batches | train loss 0.4263219 +| epoch 7 | 2767/ 5600 batches | train loss 0.3479083 +| epoch 7 | 2771/ 5600 batches | train loss 0.3263557 +| epoch 7 | 2775/ 5600 batches | train loss 0.4074754 +| epoch 7 | 2779/ 5600 batches | train loss 0.3620218 +| epoch 7 | 2783/ 5600 batches | train loss 0.4507663 +| epoch 7 | 2787/ 5600 batches | train loss 0.3692418 +| epoch 7 | 2791/ 5600 batches | train loss 0.4476629 +| epoch 7 | 2795/ 5600 batches | train loss 0.3319157 +| epoch 7 | 2799/ 5600 batches | train loss 0.3559164 +| epoch 7 | 2803/ 5600 batches | train loss 0.4039791 +| epoch 7 | 2807/ 5600 batches | train loss 0.3965240 +| epoch 7 | 2811/ 5600 batches | train loss 0.2993107 +| epoch 7 | 2815/ 5600 batches | train loss 0.3723554 +| epoch 7 | 2819/ 5600 batches | train loss 0.3449105 +| epoch 7 | 2823/ 5600 batches | train loss 0.4062642 +| epoch 7 | 2827/ 5600 batches | train loss 0.4184245 +| epoch 7 | 2831/ 5600 batches | train loss 0.4043120 +| epoch 7 | 2835/ 5600 batches | train loss 0.3920214 +| epoch 7 | 2839/ 5600 batches | train loss 0.3993170 +| epoch 7 | 2843/ 5600 batches | train loss 0.3312981 +| epoch 7 | 2847/ 5600 batches | train loss 0.3519157 +| epoch 7 | 2851/ 5600 batches | train loss 0.3942223 +| epoch 7 | 2855/ 5600 batches | train loss 0.3559400 +| epoch 7 | 2859/ 5600 batches | train loss 0.3957900 +| epoch 7 | 2863/ 5600 batches | train loss 0.3217160 +| epoch 7 | 2867/ 5600 batches | train loss 0.3774768 +| epoch 7 | 2871/ 5600 batches | train loss 0.3442461 +| epoch 7 | 2875/ 5600 batches | train loss 0.4265715 +| epoch 7 | 2879/ 5600 batches | train loss 0.3384388 +| epoch 7 | 2883/ 5600 batches | train loss 0.4044955 +| epoch 7 | 2887/ 5600 batches | train loss 0.3829598 +| epoch 7 | 2891/ 5600 batches | train loss 0.3705110 +| epoch 7 | 2895/ 5600 batches | train loss 0.4190437 +| epoch 7 | 2899/ 5600 batches | train loss 0.3051036 +| epoch 7 | 2903/ 5600 batches | train loss 0.4060140 +| epoch 7 | 2907/ 5600 batches | train loss 0.2960389 +| epoch 7 | 2911/ 5600 batches | train loss 0.3638312 +| epoch 7 | 2915/ 5600 batches | train loss 0.3086621 +| epoch 7 | 2919/ 5600 batches | train loss 0.4032967 +| epoch 7 | 2923/ 5600 batches | train loss 0.3776143 +| epoch 7 | 2927/ 5600 batches | train loss 0.3152539 +| epoch 7 | 2931/ 5600 batches | train loss 0.4069483 +| epoch 7 | 2935/ 5600 batches | train loss 0.3988419 +| epoch 7 | 2939/ 5600 batches | train loss 0.3877664 +| epoch 7 | 2943/ 5600 batches | train loss 0.3750573 +| epoch 7 | 2947/ 5600 batches | train loss 0.3903504 +| epoch 7 | 2951/ 5600 batches | train loss 0.2900920 +| epoch 7 | 2955/ 5600 batches | train loss 0.3766688 +| epoch 7 | 2959/ 5600 batches | train loss 0.3295359 +| epoch 7 | 2963/ 5600 batches | train loss 0.3731878 +| epoch 7 | 2967/ 5600 batches | train loss 0.3842866 +| epoch 7 | 2971/ 5600 batches | train loss 0.3670168 +| epoch 7 | 2975/ 5600 batches | train loss 0.3287801 +| epoch 7 | 2979/ 5600 batches | train loss 0.4298797 +| epoch 7 | 2983/ 5600 batches | train loss 0.2797155 +| epoch 7 | 2987/ 5600 batches | train loss 0.3819615 +| epoch 7 | 2991/ 5600 batches | train loss 0.3928762 +| epoch 7 | 2995/ 5600 batches | train loss 0.3384882 +| epoch 7 | 2999/ 5600 batches | train loss 0.3344153 +| epoch 7 | 3003/ 5600 batches | train loss 0.3630025 +| epoch 7 | 3007/ 5600 batches | train loss 0.3782385 +| epoch 7 | 3011/ 5600 batches | train loss 0.3957369 +| epoch 7 | 3015/ 5600 batches | train loss 0.3250741 +| epoch 7 | 3019/ 5600 batches | train loss 0.3158153 +| epoch 7 | 3023/ 5600 batches | train loss 0.3167529 +| epoch 7 | 3027/ 5600 batches | train loss 0.3642332 +| epoch 7 | 3031/ 5600 batches | train loss 0.3819239 +| epoch 7 | 3035/ 5600 batches | train loss 0.3208717 +| epoch 7 | 3039/ 5600 batches | train loss 0.3888033 +| epoch 7 | 3043/ 5600 batches | train loss 0.3185430 +| epoch 7 | 3047/ 5600 batches | train loss 0.3731714 +| epoch 7 | 3051/ 5600 batches | train loss 0.3487977 +| epoch 7 | 3055/ 5600 batches | train loss 0.3085143 +| epoch 7 | 3059/ 5600 batches | train loss 0.5080837 +| epoch 7 | 3063/ 5600 batches | train loss 0.3346415 +| epoch 7 | 3067/ 5600 batches | train loss 0.4453439 +| epoch 7 | 3071/ 5600 batches | train loss 0.4388638 +| epoch 7 | 3075/ 5600 batches | train loss 0.4211206 +| epoch 7 | 3079/ 5600 batches | train loss 0.3314319 +| epoch 7 | 3083/ 5600 batches | train loss 0.4079592 +| epoch 7 | 3087/ 5600 batches | train loss 0.3929865 +| epoch 7 | 3091/ 5600 batches | train loss 0.3946631 +| epoch 7 | 3095/ 5600 batches | train loss 0.2976230 +| epoch 7 | 3099/ 5600 batches | train loss 0.3476691 +| epoch 7 | 3103/ 5600 batches | train loss 0.4214115 +| epoch 7 | 3107/ 5600 batches | train loss 0.3115883 +| epoch 7 | 3111/ 5600 batches | train loss 0.3331457 +| epoch 7 | 3115/ 5600 batches | train loss 0.3521779 +| epoch 7 | 3119/ 5600 batches | train loss 0.4114543 +| epoch 7 | 3123/ 5600 batches | train loss 0.2853449 +| epoch 7 | 3127/ 5600 batches | train loss 0.3750599 +| epoch 7 | 3131/ 5600 batches | train loss 0.3970345 +| epoch 7 | 3135/ 5600 batches | train loss 0.3320181 +| epoch 7 | 3139/ 5600 batches | train loss 0.3058093 +| epoch 7 | 3143/ 5600 batches | train loss 0.4374416 +| epoch 7 | 3147/ 5600 batches | train loss 0.3223256 +| epoch 7 | 3151/ 5600 batches | train loss 0.4963108 +| epoch 7 | 3155/ 5600 batches | train loss 0.3478552 +| epoch 7 | 3159/ 5600 batches | train loss 0.2958732 +| epoch 7 | 3163/ 5600 batches | train loss 0.3564344 +| epoch 7 | 3167/ 5600 batches | train loss 0.3939994 +| epoch 7 | 3171/ 5600 batches | train loss 0.3254806 +| epoch 7 | 3175/ 5600 batches | train loss 0.3512392 +| epoch 7 | 3179/ 5600 batches | train loss 0.3241795 +| epoch 7 | 3183/ 5600 batches | train loss 0.3771039 +| epoch 7 | 3187/ 5600 batches | train loss 0.3618251 +| epoch 7 | 3191/ 5600 batches | train loss 0.3343026 +| epoch 7 | 3195/ 5600 batches | train loss 0.3340674 +| epoch 7 | 3199/ 5600 batches | train loss 0.4253417 +| epoch 7 | 3203/ 5600 batches | train loss 0.3726780 +| epoch 7 | 3207/ 5600 batches | train loss 0.4071167 +| epoch 7 | 3211/ 5600 batches | train loss 0.3775944 +| epoch 7 | 3215/ 5600 batches | train loss 0.3765408 +| epoch 7 | 3219/ 5600 batches | train loss 0.3441973 +| epoch 7 | 3223/ 5600 batches | train loss 0.3704745 +| epoch 7 | 3227/ 5600 batches | train loss 0.4036021 +| epoch 7 | 3231/ 5600 batches | train loss 0.3373375 +| epoch 7 | 3235/ 5600 batches | train loss 0.4040949 +| epoch 7 | 3239/ 5600 batches | train loss 0.3803591 +| epoch 7 | 3243/ 5600 batches | train loss 0.2737394 +| epoch 7 | 3247/ 5600 batches | train loss 0.3440819 +| epoch 7 | 3251/ 5600 batches | train loss 0.3072575 +| epoch 7 | 3255/ 5600 batches | train loss 0.4278274 +| epoch 7 | 3259/ 5600 batches | train loss 0.3381793 +| epoch 7 | 3263/ 5600 batches | train loss 0.4304376 +| epoch 7 | 3267/ 5600 batches | train loss 0.4289470 +| epoch 7 | 3271/ 5600 batches | train loss 0.3195000 +| epoch 7 | 3275/ 5600 batches | train loss 0.4267603 +| epoch 7 | 3279/ 5600 batches | train loss 0.3686592 +| epoch 7 | 3283/ 5600 batches | train loss 0.3193227 +| epoch 7 | 3287/ 5600 batches | train loss 0.3451076 +| epoch 7 | 3291/ 5600 batches | train loss 0.4115611 +| epoch 7 | 3295/ 5600 batches | train loss 0.4197855 +| epoch 7 | 3299/ 5600 batches | train loss 0.3591546 +| epoch 7 | 3303/ 5600 batches | train loss 0.4022085 +| epoch 7 | 3307/ 5600 batches | train loss 0.4353703 +| epoch 7 | 3311/ 5600 batches | train loss 0.4044995 +| epoch 7 | 3315/ 5600 batches | train loss 0.3262778 +| epoch 7 | 3319/ 5600 batches | train loss 0.4549189 +| epoch 7 | 3323/ 5600 batches | train loss 0.3941024 +| epoch 7 | 3327/ 5600 batches | train loss 0.3925832 +| epoch 7 | 3331/ 5600 batches | train loss 0.3478754 +| epoch 7 | 3335/ 5600 batches | train loss 0.3681801 +| epoch 7 | 3339/ 5600 batches | train loss 0.3902815 +| epoch 7 | 3343/ 5600 batches | train loss 0.3677428 +| epoch 7 | 3347/ 5600 batches | train loss 0.4140655 +| epoch 7 | 3351/ 5600 batches | train loss 0.4238776 +| epoch 7 | 3355/ 5600 batches | train loss 0.3001807 +| epoch 7 | 3359/ 5600 batches | train loss 0.3471435 +| epoch 7 | 3363/ 5600 batches | train loss 0.3301239 +| epoch 7 | 3367/ 5600 batches | train loss 0.4272403 +| epoch 7 | 3371/ 5600 batches | train loss 0.3347012 +| epoch 7 | 3375/ 5600 batches | train loss 0.3964234 +| epoch 7 | 3379/ 5600 batches | train loss 0.3821155 +| epoch 7 | 3383/ 5600 batches | train loss 0.4150163 +| epoch 7 | 3387/ 5600 batches | train loss 0.3038481 +| epoch 7 | 3391/ 5600 batches | train loss 0.3328271 +| epoch 7 | 3395/ 5600 batches | train loss 0.3216702 +| epoch 7 | 3399/ 5600 batches | train loss 0.4357110 +| epoch 7 | 3403/ 5600 batches | train loss 0.3306093 +| epoch 7 | 3407/ 5600 batches | train loss 0.3420442 +| epoch 7 | 3411/ 5600 batches | train loss 0.2543226 +| epoch 7 | 3415/ 5600 batches | train loss 0.3830685 +| epoch 7 | 3419/ 5600 batches | train loss 0.3080612 +| epoch 7 | 3423/ 5600 batches | train loss 0.3619899 +| epoch 7 | 3427/ 5600 batches | train loss 0.3483329 +| epoch 7 | 3431/ 5600 batches | train loss 0.4562770 +| epoch 7 | 3435/ 5600 batches | train loss 0.2741742 +| epoch 7 | 3439/ 5600 batches | train loss 0.3752425 +| epoch 7 | 3443/ 5600 batches | train loss 0.3864602 +| epoch 7 | 3447/ 5600 batches | train loss 0.3293013 +| epoch 7 | 3451/ 5600 batches | train loss 0.2864607 +| epoch 7 | 3455/ 5600 batches | train loss 0.3622101 +| epoch 7 | 3459/ 5600 batches | train loss 0.3966978 +| epoch 7 | 3463/ 5600 batches | train loss 0.3068688 +| epoch 7 | 3467/ 5600 batches | train loss 0.3574683 +| epoch 7 | 3471/ 5600 batches | train loss 0.3823946 +| epoch 7 | 3475/ 5600 batches | train loss 0.4134005 +| epoch 7 | 3479/ 5600 batches | train loss 0.4334331 +| epoch 7 | 3483/ 5600 batches | train loss 0.3596958 +| epoch 7 | 3487/ 5600 batches | train loss 0.3415749 +| epoch 7 | 3491/ 5600 batches | train loss 0.3750720 +| epoch 7 | 3495/ 5600 batches | train loss 0.4035328 +| epoch 7 | 3499/ 5600 batches | train loss 0.3257259 +| epoch 7 | 3503/ 5600 batches | train loss 0.3126040 +| epoch 7 | 3507/ 5600 batches | train loss 0.3790288 +| epoch 7 | 3511/ 5600 batches | train loss 0.3614972 +| epoch 7 | 3515/ 5600 batches | train loss 0.4004404 +| epoch 7 | 3519/ 5600 batches | train loss 0.3907767 +| epoch 7 | 3523/ 5600 batches | train loss 0.3225398 +| epoch 7 | 3527/ 5600 batches | train loss 0.3747180 +| epoch 7 | 3531/ 5600 batches | train loss 0.2902644 +| epoch 7 | 3535/ 5600 batches | train loss 0.3317167 +| epoch 7 | 3539/ 5600 batches | train loss 0.3293781 +| epoch 7 | 3543/ 5600 batches | train loss 0.3301692 +| epoch 7 | 3547/ 5600 batches | train loss 0.3118712 +| epoch 7 | 3551/ 5600 batches | train loss 0.3410537 +| epoch 7 | 3555/ 5600 batches | train loss 0.2709452 +| epoch 7 | 3559/ 5600 batches | train loss 0.3187461 +| epoch 7 | 3563/ 5600 batches | train loss 0.3121371 +| epoch 7 | 3567/ 5600 batches | train loss 0.3530865 +| epoch 7 | 3571/ 5600 batches | train loss 0.3434601 +| epoch 7 | 3575/ 5600 batches | train loss 0.3869313 +| epoch 7 | 3579/ 5600 batches | train loss 0.3478989 +| epoch 7 | 3583/ 5600 batches | train loss 0.3400837 +| epoch 7 | 3587/ 5600 batches | train loss 0.3990645 +| epoch 7 | 3591/ 5600 batches | train loss 0.3846141 +| epoch 7 | 3595/ 5600 batches | train loss 0.4493566 +| epoch 7 | 3599/ 5600 batches | train loss 0.4042313 +| epoch 7 | 3603/ 5600 batches | train loss 0.3716297 +| epoch 7 | 3607/ 5600 batches | train loss 0.4218374 +| epoch 7 | 3611/ 5600 batches | train loss 0.3653999 +| epoch 7 | 3615/ 5600 batches | train loss 0.3359350 +| epoch 7 | 3619/ 5600 batches | train loss 0.3934460 +| epoch 7 | 3623/ 5600 batches | train loss 0.3975276 +| epoch 7 | 3627/ 5600 batches | train loss 0.3733368 +| epoch 7 | 3631/ 5600 batches | train loss 0.3657297 +| epoch 7 | 3635/ 5600 batches | train loss 0.3942970 +| epoch 7 | 3639/ 5600 batches | train loss 0.2389514 +| epoch 7 | 3643/ 5600 batches | train loss 0.3601274 +| epoch 7 | 3647/ 5600 batches | train loss 0.3707501 +| epoch 7 | 3651/ 5600 batches | train loss 0.3673203 +| epoch 7 | 3655/ 5600 batches | train loss 0.4123546 +| epoch 7 | 3659/ 5600 batches | train loss 0.3751628 +| epoch 7 | 3663/ 5600 batches | train loss 0.3619009 +| epoch 7 | 3667/ 5600 batches | train loss 0.4379621 +| epoch 7 | 3671/ 5600 batches | train loss 0.4041360 +| epoch 7 | 3675/ 5600 batches | train loss 0.3314725 +| epoch 7 | 3679/ 5600 batches | train loss 0.3668480 +| epoch 7 | 3683/ 5600 batches | train loss 0.4376091 +| epoch 7 | 3687/ 5600 batches | train loss 0.4124215 +| epoch 7 | 3691/ 5600 batches | train loss 0.3789969 +| epoch 7 | 3695/ 5600 batches | train loss 0.3993448 +| epoch 7 | 3699/ 5600 batches | train loss 0.3444914 +| epoch 7 | 3703/ 5600 batches | train loss 0.4014666 +| epoch 7 | 3707/ 5600 batches | train loss 0.3454164 +| epoch 7 | 3711/ 5600 batches | train loss 0.3681195 +| epoch 7 | 3715/ 5600 batches | train loss 0.4078866 +| epoch 7 | 3719/ 5600 batches | train loss 0.3715014 +| epoch 7 | 3723/ 5600 batches | train loss 0.4115951 +| epoch 7 | 3727/ 5600 batches | train loss 0.3835365 +| epoch 7 | 3731/ 5600 batches | train loss 0.3504558 +| epoch 7 | 3735/ 5600 batches | train loss 0.3740431 +| epoch 7 | 3739/ 5600 batches | train loss 0.3252836 +| epoch 7 | 3743/ 5600 batches | train loss 0.4523325 +| epoch 7 | 3747/ 5600 batches | train loss 0.3332651 +| epoch 7 | 3751/ 5600 batches | train loss 0.3313698 +| epoch 7 | 3755/ 5600 batches | train loss 0.3490389 +| epoch 7 | 3759/ 5600 batches | train loss 0.3909349 +| epoch 7 | 3763/ 5600 batches | train loss 0.3775702 +| epoch 7 | 3767/ 5600 batches | train loss 0.4316275 +| epoch 7 | 3771/ 5600 batches | train loss 0.3526677 +| epoch 7 | 3775/ 5600 batches | train loss 0.3807690 +| epoch 7 | 3779/ 5600 batches | train loss 0.3723354 +| epoch 7 | 3783/ 5600 batches | train loss 0.3878576 +| epoch 7 | 3787/ 5600 batches | train loss 0.3577209 +| epoch 7 | 3791/ 5600 batches | train loss 0.3659027 +| epoch 7 | 3795/ 5600 batches | train loss 0.3552018 +| epoch 7 | 3799/ 5600 batches | train loss 0.3584316 +| epoch 7 | 3803/ 5600 batches | train loss 0.3566456 +| epoch 7 | 3807/ 5600 batches | train loss 0.2979764 +| epoch 7 | 3811/ 5600 batches | train loss 0.2888528 +| epoch 7 | 3815/ 5600 batches | train loss 0.3469992 +| epoch 7 | 3819/ 5600 batches | train loss 0.3762889 +| epoch 7 | 3823/ 5600 batches | train loss 0.3287112 +| epoch 7 | 3827/ 5600 batches | train loss 0.4034931 +| epoch 7 | 3831/ 5600 batches | train loss 0.3619204 +| epoch 7 | 3835/ 5600 batches | train loss 0.3741652 +| epoch 7 | 3839/ 5600 batches | train loss 0.3273528 +| epoch 7 | 3843/ 5600 batches | train loss 0.3251796 +| epoch 7 | 3847/ 5600 batches | train loss 0.3991456 +| epoch 7 | 3851/ 5600 batches | train loss 0.3598291 +| epoch 7 | 3855/ 5600 batches | train loss 0.3999461 +| epoch 7 | 3859/ 5600 batches | train loss 0.3136750 +| epoch 7 | 3863/ 5600 batches | train loss 0.3680629 +| epoch 7 | 3867/ 5600 batches | train loss 0.3271933 +| epoch 7 | 3871/ 5600 batches | train loss 0.4796227 +| epoch 7 | 3875/ 5600 batches | train loss 0.3774919 +| epoch 7 | 3879/ 5600 batches | train loss 0.3425743 +| epoch 7 | 3883/ 5600 batches | train loss 0.3489757 +| epoch 7 | 3887/ 5600 batches | train loss 0.3697324 +| epoch 7 | 3891/ 5600 batches | train loss 0.1532958 +| epoch 7 | 3895/ 5600 batches | train loss 0.3698679 +| epoch 7 | 3899/ 5600 batches | train loss 0.3568532 +| epoch 7 | 3903/ 5600 batches | train loss 0.3223392 +| epoch 7 | 3907/ 5600 batches | train loss 0.3582861 +| epoch 7 | 3911/ 5600 batches | train loss 0.3664973 +| epoch 7 | 3915/ 5600 batches | train loss 0.3052773 +| epoch 7 | 3919/ 5600 batches | train loss 0.3437206 +| epoch 7 | 3923/ 5600 batches | train loss 0.3058589 +| epoch 7 | 3927/ 5600 batches | train loss 0.3537097 +| epoch 7 | 3931/ 5600 batches | train loss 0.3844618 +| epoch 7 | 3935/ 5600 batches | train loss 0.4793621 +| epoch 7 | 3939/ 5600 batches | train loss 0.3403532 +| epoch 7 | 3943/ 5600 batches | train loss 0.3141718 +| epoch 7 | 3947/ 5600 batches | train loss 0.3308414 +| epoch 7 | 3951/ 5600 batches | train loss 0.3278376 +| epoch 7 | 3955/ 5600 batches | train loss 0.3792039 +| epoch 7 | 3959/ 5600 batches | train loss 0.4212637 +| epoch 7 | 3963/ 5600 batches | train loss 0.4193444 +| epoch 7 | 3967/ 5600 batches | train loss 0.4282285 +| epoch 7 | 3971/ 5600 batches | train loss 0.3448161 +| epoch 7 | 3975/ 5600 batches | train loss 0.4048837 +| epoch 7 | 3979/ 5600 batches | train loss 0.3414211 +| epoch 7 | 3983/ 5600 batches | train loss 0.2946754 +| epoch 7 | 3987/ 5600 batches | train loss 0.3772852 +| epoch 7 | 3991/ 5600 batches | train loss 0.4144873 +| epoch 7 | 3995/ 5600 batches | train loss 0.3418437 +| epoch 7 | 3999/ 5600 batches | train loss 0.3839809 +| epoch 7 | 4003/ 5600 batches | train loss 0.3423828 +| epoch 7 | 4007/ 5600 batches | train loss 0.3643737 +| epoch 7 | 4011/ 5600 batches | train loss 0.4390621 +| epoch 7 | 4015/ 5600 batches | train loss 0.4024265 +| epoch 7 | 4019/ 5600 batches | train loss 0.3619258 +| epoch 7 | 4023/ 5600 batches | train loss 0.3224169 +| epoch 7 | 4027/ 5600 batches | train loss 0.3608035 +| epoch 7 | 4031/ 5600 batches | train loss 0.3699998 +| epoch 7 | 4035/ 5600 batches | train loss 0.3807027 +| epoch 7 | 4039/ 5600 batches | train loss 0.3890967 +| epoch 7 | 4043/ 5600 batches | train loss 0.3114386 +| epoch 7 | 4047/ 5600 batches | train loss 0.4368076 +| epoch 7 | 4051/ 5600 batches | train loss 0.3535400 +| epoch 7 | 4055/ 5600 batches | train loss 0.2668675 +| epoch 7 | 4059/ 5600 batches | train loss 0.3760540 +| epoch 7 | 4063/ 5600 batches | train loss 0.3857319 +| epoch 7 | 4067/ 5600 batches | train loss 0.3589288 +| epoch 7 | 4071/ 5600 batches | train loss 0.3293813 +| epoch 7 | 4075/ 5600 batches | train loss 0.3669062 +| epoch 7 | 4079/ 5600 batches | train loss 0.3059916 +| epoch 7 | 4083/ 5600 batches | train loss 0.4422761 +| epoch 7 | 4087/ 5600 batches | train loss 0.3311498 +| epoch 7 | 4091/ 5600 batches | train loss 0.3820842 +| epoch 7 | 4095/ 5600 batches | train loss 0.4244103 +| epoch 7 | 4099/ 5600 batches | train loss 0.3721237 +| epoch 7 | 4103/ 5600 batches | train loss 0.4244064 +| epoch 7 | 4107/ 5600 batches | train loss 0.3372143 +| epoch 7 | 4111/ 5600 batches | train loss 0.3531752 +| epoch 7 | 4115/ 5600 batches | train loss 0.3754731 +| epoch 7 | 4119/ 5600 batches | train loss 0.3664154 +| epoch 7 | 4123/ 5600 batches | train loss 0.3842334 +| epoch 7 | 4127/ 5600 batches | train loss 0.4075088 +| epoch 7 | 4131/ 5600 batches | train loss 0.3231305 +| epoch 7 | 4135/ 5600 batches | train loss 0.2958675 +| epoch 7 | 4139/ 5600 batches | train loss 0.3644325 +| epoch 7 | 4143/ 5600 batches | train loss 0.3487632 +| epoch 7 | 4147/ 5600 batches | train loss 0.4389895 +| epoch 7 | 4151/ 5600 batches | train loss 0.4271184 +| epoch 7 | 4155/ 5600 batches | train loss 0.3917606 +| epoch 7 | 4159/ 5600 batches | train loss 0.3698272 +| epoch 7 | 4163/ 5600 batches | train loss 0.2906897 +| epoch 7 | 4167/ 5600 batches | train loss 0.3712409 +| epoch 7 | 4171/ 5600 batches | train loss 0.3854022 +| epoch 7 | 4175/ 5600 batches | train loss 0.4020880 +| epoch 7 | 4179/ 5600 batches | train loss 0.3241695 +| epoch 7 | 4183/ 5600 batches | train loss 0.2958977 +| epoch 7 | 4187/ 5600 batches | train loss 0.3851790 +| epoch 7 | 4191/ 5600 batches | train loss 0.4380616 +| epoch 7 | 4195/ 5600 batches | train loss 0.3837644 +| epoch 7 | 4199/ 5600 batches | train loss 0.3034794 +| epoch 7 | 4203/ 5600 batches | train loss 0.3439667 +| epoch 7 | 4207/ 5600 batches | train loss 0.3726407 +| epoch 7 | 4211/ 5600 batches | train loss 0.4400756 +| epoch 7 | 4215/ 5600 batches | train loss 0.3941613 +| epoch 7 | 4219/ 5600 batches | train loss 0.3928339 +| epoch 7 | 4223/ 5600 batches | train loss 0.3333919 +| epoch 7 | 4227/ 5600 batches | train loss 0.3560197 +| epoch 7 | 4231/ 5600 batches | train loss 0.3394011 +| epoch 7 | 4235/ 5600 batches | train loss 0.3514519 +| epoch 7 | 4239/ 5600 batches | train loss 0.3709727 +| epoch 7 | 4243/ 5600 batches | train loss 0.3624235 +| epoch 7 | 4247/ 5600 batches | train loss 0.2953925 +| epoch 7 | 4251/ 5600 batches | train loss 0.4043874 +| epoch 7 | 4255/ 5600 batches | train loss 0.3709005 +| epoch 7 | 4259/ 5600 batches | train loss 0.3253832 +| epoch 7 | 4263/ 5600 batches | train loss 0.4852529 +| epoch 7 | 4267/ 5600 batches | train loss 0.3733390 +| epoch 7 | 4271/ 5600 batches | train loss 0.3539384 +| epoch 7 | 4275/ 5600 batches | train loss 0.2940099 +| epoch 7 | 4279/ 5600 batches | train loss 0.4287925 +| epoch 7 | 4283/ 5600 batches | train loss 0.3476571 +| epoch 7 | 4287/ 5600 batches | train loss 0.3513953 +| epoch 7 | 4291/ 5600 batches | train loss 0.3325490 +| epoch 7 | 4295/ 5600 batches | train loss 0.3585374 +| epoch 7 | 4299/ 5600 batches | train loss 0.3672813 +| epoch 7 | 4303/ 5600 batches | train loss 0.3492117 +| epoch 7 | 4307/ 5600 batches | train loss 0.3519979 +| epoch 7 | 4311/ 5600 batches | train loss 0.3786675 +| epoch 7 | 4315/ 5600 batches | train loss 0.3803312 +| epoch 7 | 4319/ 5600 batches | train loss 0.3517375 +| epoch 7 | 4323/ 5600 batches | train loss 0.3402452 +| epoch 7 | 4327/ 5600 batches | train loss 0.4199897 +| epoch 7 | 4331/ 5600 batches | train loss 0.4327534 +| epoch 7 | 4335/ 5600 batches | train loss 0.3964812 +| epoch 7 | 4339/ 5600 batches | train loss 0.4099700 +| epoch 7 | 4343/ 5600 batches | train loss 0.3289209 +| epoch 7 | 4347/ 5600 batches | train loss 0.3457398 +| epoch 7 | 4351/ 5600 batches | train loss 0.3550020 +| epoch 7 | 4355/ 5600 batches | train loss 0.3662687 +| epoch 7 | 4359/ 5600 batches | train loss 0.4053635 +| epoch 7 | 4363/ 5600 batches | train loss 0.3261747 +| epoch 7 | 4367/ 5600 batches | train loss 0.2975666 +| epoch 7 | 4371/ 5600 batches | train loss 0.3454966 +| epoch 7 | 4375/ 5600 batches | train loss 0.3409908 +| epoch 7 | 4379/ 5600 batches | train loss 0.3717931 +| epoch 7 | 4383/ 5600 batches | train loss 0.4072971 +| epoch 7 | 4387/ 5600 batches | train loss 0.3822512 +| epoch 7 | 4391/ 5600 batches | train loss 0.4256257 +| epoch 7 | 4395/ 5600 batches | train loss 0.3048463 +| epoch 7 | 4399/ 5600 batches | train loss 0.3265667 +| epoch 7 | 4403/ 5600 batches | train loss 0.3279166 +| epoch 7 | 4407/ 5600 batches | train loss 0.3806429 +| epoch 7 | 4411/ 5600 batches | train loss 0.3496099 +| epoch 7 | 4415/ 5600 batches | train loss 0.4016476 +| epoch 7 | 4419/ 5600 batches | train loss 0.3122447 +| epoch 7 | 4423/ 5600 batches | train loss 0.3546565 +| epoch 7 | 4427/ 5600 batches | train loss 0.4837362 +| epoch 7 | 4431/ 5600 batches | train loss 0.3440855 +| epoch 7 | 4435/ 5600 batches | train loss 0.3994294 +| epoch 7 | 4439/ 5600 batches | train loss 0.3369879 +| epoch 7 | 4443/ 5600 batches | train loss 0.3136098 +| epoch 7 | 4447/ 5600 batches | train loss 0.3656525 +| epoch 7 | 4451/ 5600 batches | train loss 0.3689362 +| epoch 7 | 4455/ 5600 batches | train loss 0.3890718 +| epoch 7 | 4459/ 5600 batches | train loss 0.4027385 +| epoch 7 | 4463/ 5600 batches | train loss 0.3620404 +| epoch 7 | 4467/ 5600 batches | train loss 0.3658581 +| epoch 7 | 4471/ 5600 batches | train loss 0.4084075 +| epoch 7 | 4475/ 5600 batches | train loss 0.2744490 +| epoch 7 | 4479/ 5600 batches | train loss 0.3763809 +| epoch 7 | 4483/ 5600 batches | train loss 0.3804619 +| epoch 7 | 4487/ 5600 batches | train loss 0.2688623 +| epoch 7 | 4491/ 5600 batches | train loss 0.4196535 +| epoch 7 | 4495/ 5600 batches | train loss 0.4047986 +| epoch 7 | 4499/ 5600 batches | train loss 0.3480242 +| epoch 7 | 4503/ 5600 batches | train loss 0.3697165 +| epoch 7 | 4507/ 5600 batches | train loss 0.3864174 +| epoch 7 | 4511/ 5600 batches | train loss 0.3514751 +| epoch 7 | 4515/ 5600 batches | train loss 0.3107545 +| epoch 7 | 4519/ 5600 batches | train loss 0.3482776 +| epoch 7 | 4523/ 5600 batches | train loss 0.3971699 +| epoch 7 | 4527/ 5600 batches | train loss 0.3912734 +| epoch 7 | 4531/ 5600 batches | train loss 0.4069653 +| epoch 7 | 4535/ 5600 batches | train loss 0.3654131 +| epoch 7 | 4539/ 5600 batches | train loss 0.3794017 +| epoch 7 | 4543/ 5600 batches | train loss 0.3671488 +| epoch 7 | 4547/ 5600 batches | train loss 0.3254857 +| epoch 7 | 4551/ 5600 batches | train loss 0.3265458 +| epoch 7 | 4555/ 5600 batches | train loss 0.3997027 +| epoch 7 | 4559/ 5600 batches | train loss 0.3738633 +| epoch 7 | 4563/ 5600 batches | train loss 0.3663437 +| epoch 7 | 4567/ 5600 batches | train loss 0.3164651 +| epoch 7 | 4571/ 5600 batches | train loss 0.3968532 +| epoch 7 | 4575/ 5600 batches | train loss 0.4049284 +| epoch 7 | 4579/ 5600 batches | train loss 0.3542764 +| epoch 7 | 4583/ 5600 batches | train loss 0.4565318 +| epoch 7 | 4587/ 5600 batches | train loss 0.3382928 +| epoch 7 | 4591/ 5600 batches | train loss 0.3738329 +| epoch 7 | 4595/ 5600 batches | train loss 0.3827996 +| epoch 7 | 4599/ 5600 batches | train loss 0.2866267 +| epoch 7 | 4603/ 5600 batches | train loss 0.4133790 +| epoch 7 | 4607/ 5600 batches | train loss 0.3702852 +| epoch 7 | 4611/ 5600 batches | train loss 0.3624306 +| epoch 7 | 4615/ 5600 batches | train loss 0.3704006 +| epoch 7 | 4619/ 5600 batches | train loss 0.3528361 +| epoch 7 | 4623/ 5600 batches | train loss 0.3562880 +| epoch 7 | 4627/ 5600 batches | train loss 0.3168266 +| epoch 7 | 4631/ 5600 batches | train loss 0.3755783 +| epoch 7 | 4635/ 5600 batches | train loss 0.3603783 +| epoch 7 | 4639/ 5600 batches | train loss 0.4259102 +| epoch 7 | 4643/ 5600 batches | train loss 0.3442049 +| epoch 7 | 4647/ 5600 batches | train loss 0.3070104 +| epoch 7 | 4651/ 5600 batches | train loss 0.3858943 +| epoch 7 | 4655/ 5600 batches | train loss 0.3712057 +| epoch 7 | 4659/ 5600 batches | train loss 0.3501685 +| epoch 7 | 4663/ 5600 batches | train loss 0.3598264 +| epoch 7 | 4667/ 5600 batches | train loss 0.3538877 +| epoch 7 | 4671/ 5600 batches | train loss 0.3865339 +| epoch 7 | 4675/ 5600 batches | train loss 0.3259123 +| epoch 7 | 4679/ 5600 batches | train loss 0.3098983 +| epoch 7 | 4683/ 5600 batches | train loss 0.3714803 +| epoch 7 | 4687/ 5600 batches | train loss 0.3878531 +| epoch 7 | 4691/ 5600 batches | train loss 0.3138556 +| epoch 7 | 4695/ 5600 batches | train loss 0.4083402 +| epoch 7 | 4699/ 5600 batches | train loss 0.3905092 +| epoch 7 | 4703/ 5600 batches | train loss 0.3520303 +| epoch 7 | 4707/ 5600 batches | train loss 0.3898782 +| epoch 7 | 4711/ 5600 batches | train loss 0.3349216 +| epoch 7 | 4715/ 5600 batches | train loss 0.3168666 +| epoch 7 | 4719/ 5600 batches | train loss 0.4198385 +| epoch 7 | 4723/ 5600 batches | train loss 0.2934297 +| epoch 7 | 4727/ 5600 batches | train loss 0.4013329 +| epoch 7 | 4731/ 5600 batches | train loss 0.3756261 +| epoch 7 | 4735/ 5600 batches | train loss 0.3312735 +| epoch 7 | 4739/ 5600 batches | train loss 0.3671378 +| epoch 7 | 4743/ 5600 batches | train loss 0.3388008 +| epoch 7 | 4747/ 5600 batches | train loss 0.3580351 +| epoch 7 | 4751/ 5600 batches | train loss 0.3783742 +| epoch 7 | 4755/ 5600 batches | train loss 0.3698269 +| epoch 7 | 4759/ 5600 batches | train loss 0.3309051 +| epoch 7 | 4763/ 5600 batches | train loss 0.3147547 +| epoch 7 | 4767/ 5600 batches | train loss 0.3458685 +| epoch 7 | 4771/ 5600 batches | train loss 0.3759437 +| epoch 7 | 4775/ 5600 batches | train loss 0.2741101 +| epoch 7 | 4779/ 5600 batches | train loss 0.3768317 +| epoch 7 | 4783/ 5600 batches | train loss 0.3385756 +| epoch 7 | 4787/ 5600 batches | train loss 0.3915114 +| epoch 7 | 4791/ 5600 batches | train loss 0.3460960 +| epoch 7 | 4795/ 5600 batches | train loss 0.3266622 +| epoch 7 | 4799/ 5600 batches | train loss 0.4239551 +| epoch 7 | 4803/ 5600 batches | train loss 0.3504243 +| epoch 7 | 4807/ 5600 batches | train loss 0.3941480 +| epoch 7 | 4811/ 5600 batches | train loss 0.3599657 +| epoch 7 | 4815/ 5600 batches | train loss 0.2331692 +| epoch 7 | 4819/ 5600 batches | train loss 0.3478120 +| epoch 7 | 4823/ 5600 batches | train loss 0.3068535 +| epoch 7 | 4827/ 5600 batches | train loss 0.2997368 +| epoch 7 | 4831/ 5600 batches | train loss 0.4134023 +| epoch 7 | 4835/ 5600 batches | train loss 0.3419688 +| epoch 7 | 4839/ 5600 batches | train loss 0.4079792 +| epoch 7 | 4843/ 5600 batches | train loss 0.3171169 +| epoch 7 | 4847/ 5600 batches | train loss 0.3448904 +| epoch 7 | 4851/ 5600 batches | train loss 0.3568817 +| epoch 7 | 4855/ 5600 batches | train loss 0.3215223 +| epoch 7 | 4859/ 5600 batches | train loss 0.3296716 +| epoch 7 | 4863/ 5600 batches | train loss 0.3885417 +| epoch 7 | 4867/ 5600 batches | train loss 0.3559411 +| epoch 7 | 4871/ 5600 batches | train loss 0.3465304 +| epoch 7 | 4875/ 5600 batches | train loss 0.3342044 +| epoch 7 | 4879/ 5600 batches | train loss 0.4266913 +| epoch 7 | 4883/ 5600 batches | train loss 0.2692789 +| epoch 7 | 4887/ 5600 batches | train loss 0.3306186 +| epoch 7 | 4891/ 5600 batches | train loss 0.2573868 +| epoch 7 | 4895/ 5600 batches | train loss 0.3714173 +| epoch 7 | 4899/ 5600 batches | train loss 0.3652193 +| epoch 7 | 4903/ 5600 batches | train loss 0.3966473 +| epoch 7 | 4907/ 5600 batches | train loss 0.3573028 +| epoch 7 | 4911/ 5600 batches | train loss 0.3666875 +| epoch 7 | 4915/ 5600 batches | train loss 0.3515203 +| epoch 7 | 4919/ 5600 batches | train loss 0.4423320 +| epoch 7 | 4923/ 5600 batches | train loss 0.3804112 +| epoch 7 | 4927/ 5600 batches | train loss 0.4141653 +| epoch 7 | 4931/ 5600 batches | train loss 0.3530263 +| epoch 7 | 4935/ 5600 batches | train loss 0.4037373 +| epoch 7 | 4939/ 5600 batches | train loss 0.3829144 +| epoch 7 | 4943/ 5600 batches | train loss 0.3386458 +| epoch 7 | 4947/ 5600 batches | train loss 0.4238270 +| epoch 7 | 4951/ 5600 batches | train loss 0.3989666 +| epoch 7 | 4955/ 5600 batches | train loss 0.3380323 +| epoch 7 | 4959/ 5600 batches | train loss 0.4240031 +| epoch 7 | 4963/ 5600 batches | train loss 0.3330913 +| epoch 7 | 4967/ 5600 batches | train loss 0.3135147 +| epoch 7 | 4971/ 5600 batches | train loss 0.3617606 +| epoch 7 | 4975/ 5600 batches | train loss 0.3318056 +| epoch 7 | 4979/ 5600 batches | train loss 0.2964537 +| epoch 7 | 4983/ 5600 batches | train loss 0.2854705 +| epoch 7 | 4987/ 5600 batches | train loss 0.3622912 +| epoch 7 | 4991/ 5600 batches | train loss 0.3341281 +| epoch 7 | 4995/ 5600 batches | train loss 0.3372753 +| epoch 7 | 4999/ 5600 batches | train loss 0.4141275 +| epoch 7 | 5003/ 5600 batches | train loss 0.4330789 +| epoch 7 | 5007/ 5600 batches | train loss 0.4456747 +| epoch 7 | 5011/ 5600 batches | train loss 0.4192620 +| epoch 7 | 5015/ 5600 batches | train loss 0.3538684 +| epoch 7 | 5019/ 5600 batches | train loss 0.4208732 +| epoch 7 | 5023/ 5600 batches | train loss 0.3717121 +| epoch 7 | 5027/ 5600 batches | train loss 0.3407405 +| epoch 7 | 5031/ 5600 batches | train loss 0.3237889 +| epoch 7 | 5035/ 5600 batches | train loss 0.3983510 +| epoch 7 | 5039/ 5600 batches | train loss 0.3665727 +| epoch 7 | 5043/ 5600 batches | train loss 0.4689091 +| epoch 7 | 5047/ 5600 batches | train loss 0.3571251 +| epoch 7 | 5051/ 5600 batches | train loss 0.2964870 +| epoch 7 | 5055/ 5600 batches | train loss 0.3278569 +| epoch 7 | 5059/ 5600 batches | train loss 0.3206981 +| epoch 7 | 5063/ 5600 batches | train loss 0.4038587 +| epoch 7 | 5067/ 5600 batches | train loss 0.3635063 +| epoch 7 | 5071/ 5600 batches | train loss 0.4750449 +| epoch 7 | 5075/ 5600 batches | train loss 0.4800353 +| epoch 7 | 5079/ 5600 batches | train loss 0.3876327 +| epoch 7 | 5083/ 5600 batches | train loss 0.3031937 +| epoch 7 | 5087/ 5600 batches | train loss 0.3620266 +| epoch 7 | 5091/ 5600 batches | train loss 0.3742712 +| epoch 7 | 5095/ 5600 batches | train loss 0.3179033 +| epoch 7 | 5099/ 5600 batches | train loss 0.3642395 +| epoch 7 | 5103/ 5600 batches | train loss 0.3437746 +| epoch 7 | 5107/ 5600 batches | train loss 0.3630451 +| epoch 7 | 5111/ 5600 batches | train loss 0.4184654 +| epoch 7 | 5115/ 5600 batches | train loss 0.3836629 +| epoch 7 | 5119/ 5600 batches | train loss 0.4284786 +| epoch 7 | 5123/ 5600 batches | train loss 0.3720022 +| epoch 7 | 5127/ 5600 batches | train loss 0.4274744 +| epoch 7 | 5131/ 5600 batches | train loss 0.3400490 +| epoch 7 | 5135/ 5600 batches | train loss 0.3571571 +| epoch 7 | 5139/ 5600 batches | train loss 0.3448060 +| epoch 7 | 5143/ 5600 batches | train loss 0.3013924 +| epoch 7 | 5147/ 5600 batches | train loss 0.3150592 +| epoch 7 | 5151/ 5600 batches | train loss 0.4062684 +| epoch 7 | 5155/ 5600 batches | train loss 0.3785580 +| epoch 7 | 5159/ 5600 batches | train loss 0.3676255 +| epoch 7 | 5163/ 5600 batches | train loss 0.3127647 +| epoch 7 | 5167/ 5600 batches | train loss 0.3756167 +| epoch 7 | 5171/ 5600 batches | train loss 0.3429555 +| epoch 7 | 5175/ 5600 batches | train loss 0.3852892 +| epoch 7 | 5179/ 5600 batches | train loss 0.3706406 +| epoch 7 | 5183/ 5600 batches | train loss 0.3133330 +| epoch 7 | 5187/ 5600 batches | train loss 0.3543795 +| epoch 7 | 5191/ 5600 batches | train loss 0.3685435 +| epoch 7 | 5195/ 5600 batches | train loss 0.3595223 +| epoch 7 | 5199/ 5600 batches | train loss 0.3930303 +| epoch 7 | 5203/ 5600 batches | train loss 0.3860106 +| epoch 7 | 5207/ 5600 batches | train loss 0.4372041 +| epoch 7 | 5211/ 5600 batches | train loss 0.4228487 +| epoch 7 | 5215/ 5600 batches | train loss 0.4207877 +| epoch 7 | 5219/ 5600 batches | train loss 0.3310518 +| epoch 7 | 5223/ 5600 batches | train loss 0.4122768 +| epoch 7 | 5227/ 5600 batches | train loss 0.4626753 +| epoch 7 | 5231/ 5600 batches | train loss 0.4209533 +| epoch 7 | 5235/ 5600 batches | train loss 0.4052703 +| epoch 7 | 5239/ 5600 batches | train loss 0.3823765 +| epoch 7 | 5243/ 5600 batches | train loss 0.3731243 +| epoch 7 | 5247/ 5600 batches | train loss 0.3623120 +| epoch 7 | 5251/ 5600 batches | train loss 0.2989025 +| epoch 7 | 5255/ 5600 batches | train loss 0.4217753 +| epoch 7 | 5259/ 5600 batches | train loss 0.3631293 +| epoch 7 | 5263/ 5600 batches | train loss 0.3993724 +| epoch 7 | 5267/ 5600 batches | train loss 0.3272315 +| epoch 7 | 5271/ 5600 batches | train loss 0.3425088 +| epoch 7 | 5275/ 5600 batches | train loss 0.4113412 +| epoch 7 | 5279/ 5600 batches | train loss 0.3548412 +| epoch 7 | 5283/ 5600 batches | train loss 0.3994446 +| epoch 7 | 5287/ 5600 batches | train loss 0.3434342 +| epoch 7 | 5291/ 5600 batches | train loss 0.3614258 +| epoch 7 | 5295/ 5600 batches | train loss 0.3562315 +| epoch 7 | 5299/ 5600 batches | train loss 0.3770593 +| epoch 7 | 5303/ 5600 batches | train loss 0.4288051 +| epoch 7 | 5307/ 5600 batches | train loss 0.3624801 +| epoch 7 | 5311/ 5600 batches | train loss 0.4260164 +| epoch 7 | 5315/ 5600 batches | train loss 0.3425888 +| epoch 7 | 5319/ 5600 batches | train loss 0.4107388 +| epoch 7 | 5323/ 5600 batches | train loss 0.3750735 +| epoch 7 | 5327/ 5600 batches | train loss 0.3988749 +| epoch 7 | 5331/ 5600 batches | train loss 0.3551593 +| epoch 7 | 5335/ 5600 batches | train loss 0.3655941 +| epoch 7 | 5339/ 5600 batches | train loss 0.3390696 +| epoch 7 | 5343/ 5600 batches | train loss 0.4198051 +| epoch 7 | 5347/ 5600 batches | train loss 0.3454055 +| epoch 7 | 5351/ 5600 batches | train loss 0.4016388 +| epoch 7 | 5355/ 5600 batches | train loss 0.3852459 +| epoch 7 | 5359/ 5600 batches | train loss 0.2964959 +| epoch 7 | 5363/ 5600 batches | train loss 0.3815329 +| epoch 7 | 5367/ 5600 batches | train loss 0.4242339 +| epoch 7 | 5371/ 5600 batches | train loss 0.3816109 +| epoch 7 | 5375/ 5600 batches | train loss 0.4095868 +| epoch 7 | 5379/ 5600 batches | train loss 0.3449107 +| epoch 7 | 5383/ 5600 batches | train loss 0.3120971 +| epoch 7 | 5387/ 5600 batches | train loss 0.4141132 +| epoch 7 | 5391/ 5600 batches | train loss 0.3275947 +| epoch 7 | 5395/ 5600 batches | train loss 0.3827668 +| epoch 7 | 5399/ 5600 batches | train loss 0.3465819 +| epoch 7 | 5403/ 5600 batches | train loss 0.3488092 +| epoch 7 | 5407/ 5600 batches | train loss 0.3383834 +| epoch 7 | 5411/ 5600 batches | train loss 0.2318966 +| epoch 7 | 5415/ 5600 batches | train loss 0.3932070 +| epoch 7 | 5419/ 5600 batches | train loss 0.3871162 +| epoch 7 | 5423/ 5600 batches | train loss 0.3053839 +| epoch 7 | 5427/ 5600 batches | train loss 0.3025867 +| epoch 7 | 5431/ 5600 batches | train loss 0.3345284 +| epoch 7 | 5435/ 5600 batches | train loss 0.3851432 +| epoch 7 | 5439/ 5600 batches | train loss 0.3857974 +| epoch 7 | 5443/ 5600 batches | train loss 0.3565851 +| epoch 7 | 5447/ 5600 batches | train loss 0.3440719 +| epoch 7 | 5451/ 5600 batches | train loss 0.4575363 +| epoch 7 | 5455/ 5600 batches | train loss 0.3673710 +| epoch 7 | 5459/ 5600 batches | train loss 0.3418648 +| epoch 7 | 5463/ 5600 batches | train loss 0.3625382 +| epoch 7 | 5467/ 5600 batches | train loss 0.3860305 +| epoch 7 | 5471/ 5600 batches | train loss 0.3495005 +| epoch 7 | 5475/ 5600 batches | train loss 0.3614908 +| epoch 7 | 5479/ 5600 batches | train loss 0.3317600 +| epoch 7 | 5483/ 5600 batches | train loss 0.3599211 +| epoch 7 | 5487/ 5600 batches | train loss 0.4169068 +| epoch 7 | 5491/ 5600 batches | train loss 0.3612831 +| epoch 7 | 5495/ 5600 batches | train loss 0.3322980 +| epoch 7 | 5499/ 5600 batches | train loss 0.3991771 +| epoch 7 | 5503/ 5600 batches | train loss 0.3806864 +| epoch 7 | 5507/ 5600 batches | train loss 0.3864875 +| epoch 7 | 5511/ 5600 batches | train loss 0.3853951 +| epoch 7 | 5515/ 5600 batches | train loss 0.2720986 +| epoch 7 | 5519/ 5600 batches | train loss 0.3985563 +| epoch 7 | 5523/ 5600 batches | train loss 0.3326197 +| epoch 7 | 5527/ 5600 batches | train loss 0.3611766 +| epoch 7 | 5531/ 5600 batches | train loss 0.4300251 +| epoch 7 | 5535/ 5600 batches | train loss 0.4276548 +| epoch 7 | 5539/ 5600 batches | train loss 0.4015020 +| epoch 7 | 5543/ 5600 batches | train loss 0.3595819 +| epoch 7 | 5547/ 5600 batches | train loss 0.3411389 +| epoch 7 | 5551/ 5600 batches | train loss 0.3120985 +| epoch 7 | 5555/ 5600 batches | train loss 0.4252935 +| epoch 7 | 5559/ 5600 batches | train loss 0.3510090 +| epoch 7 | 5563/ 5600 batches | train loss 0.3890011 +| epoch 7 | 5567/ 5600 batches | train loss 0.3713323 +| epoch 7 | 5571/ 5600 batches | train loss 0.3218763 +| epoch 7 | 5575/ 5600 batches | train loss 0.3347811 +| epoch 7 | 5579/ 5600 batches | train loss 0.3205755 +| epoch 7 | 5583/ 5600 batches | train loss 0.3940929 +| epoch 7 | 5587/ 5600 batches | train loss 0.3041630 +| epoch 7 | 5591/ 5600 batches | train loss 0.3618823 +| epoch 7 | 5595/ 5600 batches | train loss 0.3816472 +| epoch 7 | 5599/ 5600 batches | train loss 0.3552527 +-------------------------------------------------------------------------------- +| epoch 7 | 3/ 5600 batches | test loss 0.2674392 +| epoch 7 | 7/ 5600 batches | test loss 0.5096645 +| epoch 7 | 11/ 5600 batches | test loss 0.3502081 +| epoch 7 | 15/ 5600 batches | test loss 0.4297400 +| epoch 7 | 19/ 5600 batches | test loss 0.4167552 +| epoch 7 | 23/ 5600 batches | test loss 0.4213536 +| epoch 7 | 27/ 5600 batches | test loss 0.4133711 +| epoch 7 | 31/ 5600 batches | test loss 0.3704889 +| epoch 7 | 35/ 5600 batches | test loss 0.3951021 +| epoch 7 | 39/ 5600 batches | test loss 0.2904349 +| epoch 7 | 43/ 5600 batches | test loss 0.3848451 +| epoch 7 | 47/ 5600 batches | test loss 0.4411631 +| epoch 7 | 51/ 5600 batches | test loss 0.4117308 +| epoch 7 | 55/ 5600 batches | test loss 0.4035662 +| epoch 7 | 59/ 5600 batches | test loss 0.4957126 +| epoch 7 | 63/ 5600 batches | test loss 0.4383678 +| epoch 7 | 67/ 5600 batches | test loss 0.3974837 +| epoch 7 | 71/ 5600 batches | test loss 0.4082799 +| epoch 7 | 75/ 5600 batches | test loss 0.5006529 +| epoch 7 | 79/ 5600 batches | test loss 0.5609400 +| epoch 7 | 83/ 5600 batches | test loss 0.4426861 +| epoch 7 | 87/ 5600 batches | test loss 0.4236435 +| epoch 7 | 91/ 5600 batches | test loss 0.5123557 +| epoch 7 | 95/ 5600 batches | test loss 0.4938147 +| epoch 7 | 99/ 5600 batches | test loss 0.4363582 +| epoch 7 | 103/ 5600 batches | test loss 0.3876861 +| epoch 7 | 107/ 5600 batches | test loss 0.4296447 +| epoch 7 | 111/ 5600 batches | test loss 0.3575787 +| epoch 7 | 115/ 5600 batches | test loss 0.4224958 +| epoch 7 | 119/ 5600 batches | test loss 0.4048120 +| epoch 7 | 123/ 5600 batches | test loss 0.5913450 +| epoch 7 | 127/ 5600 batches | test loss 0.3759714 +| epoch 7 | 131/ 5600 batches | test loss 0.5137652 +| epoch 7 | 135/ 5600 batches | test loss 0.6210003 +| epoch 7 | 139/ 5600 batches | test loss 0.4776622 +| epoch 7 | 143/ 5600 batches | test loss 0.3907542 +| epoch 7 | 147/ 5600 batches | test loss 0.5205200 +| epoch 7 | 151/ 5600 batches | test loss 0.5603797 +| epoch 7 | 155/ 5600 batches | test loss 0.3992116 +| epoch 7 | 159/ 5600 batches | test loss 0.5284784 +| epoch 7 | 163/ 5600 batches | test loss 0.5156729 +| epoch 7 | 167/ 5600 batches | test loss 0.6213816 +| epoch 7 | 171/ 5600 batches | test loss 0.3889196 +| epoch 7 | 175/ 5600 batches | test loss 0.5595047 +| epoch 7 | 179/ 5600 batches | test loss 0.6439887 +| epoch 7 | 183/ 5600 batches | test loss 0.4413513 +| epoch 7 | 187/ 5600 batches | test loss 0.4060145 +| epoch 7 | 191/ 5600 batches | test loss 0.4873697 +| epoch 7 | 195/ 5600 batches | test loss 0.4783280 +| epoch 7 | 199/ 5600 batches | test loss 0.5771719 +| epoch 7 | 203/ 5600 batches | test loss 0.5074749 +| epoch 7 | 207/ 5600 batches | test loss 0.4091080 +| epoch 7 | 211/ 5600 batches | test loss 0.3680736 +| epoch 7 | 215/ 5600 batches | test loss 0.3621833 +| epoch 7 | 219/ 5600 batches | test loss 0.5575434 +| epoch 7 | 223/ 5600 batches | test loss 0.3615476 +| epoch 7 | 227/ 5600 batches | test loss 0.4559550 +| epoch 7 | 231/ 5600 batches | test loss 0.4271458 +| epoch 7 | 235/ 5600 batches | test loss 0.4298497 +| epoch 7 | 239/ 5600 batches | test loss 0.6194575 +| epoch 7 | 243/ 5600 batches | test loss 0.4104618 +| epoch 7 | 247/ 5600 batches | test loss 0.3405485 +| epoch 7 | 251/ 5600 batches | test loss 0.4034944 +| epoch 7 | 255/ 5600 batches | test loss 0.5555831 +| epoch 7 | 259/ 5600 batches | test loss 0.4918572 +| epoch 7 | 263/ 5600 batches | test loss 0.5521544 +| epoch 7 | 267/ 5600 batches | test loss 0.4602120 +| epoch 7 | 271/ 5600 batches | test loss 0.1865063 +| epoch 7 | 275/ 5600 batches | test loss 0.4401283 +| epoch 7 | 279/ 5600 batches | test loss 0.4683782 +| epoch 7 | 283/ 5600 batches | test loss 0.4056431 +| epoch 7 | 287/ 5600 batches | test loss 0.4330165 +| epoch 7 | 291/ 5600 batches | test loss 0.4000739 +| epoch 7 | 295/ 5600 batches | test loss 0.4687335 +| epoch 7 | 299/ 5600 batches | test loss 0.4987853 +| epoch 7 | 303/ 5600 batches | test loss 0.5071757 +| epoch 7 | 307/ 5600 batches | test loss 0.1890496 +| epoch 7 | 311/ 5600 batches | test loss 0.4873807 +| epoch 7 | 315/ 5600 batches | test loss 0.4091078 +| epoch 7 | 319/ 5600 batches | test loss 0.4856501 +| epoch 7 | 323/ 5600 batches | test loss 0.6117583 +| epoch 7 | 327/ 5600 batches | test loss 0.4214865 +| epoch 7 | 331/ 5600 batches | test loss 0.4263225 +| epoch 7 | 335/ 5600 batches | test loss 0.4904646 +| epoch 7 | 339/ 5600 batches | test loss 0.5578744 +| epoch 7 | 343/ 5600 batches | test loss 0.5103401 +| epoch 7 | 347/ 5600 batches | test loss 0.5803134 +| epoch 7 | 351/ 5600 batches | test loss 0.3811325 +| epoch 7 | 355/ 5600 batches | test loss 0.5627018 +| epoch 7 | 359/ 5600 batches | test loss 0.3829505 +| epoch 7 | 363/ 5600 batches | test loss 0.4831275 +| epoch 7 | 367/ 5600 batches | test loss 0.4557947 +| epoch 7 | 371/ 5600 batches | test loss 0.5390171 +| epoch 7 | 375/ 5600 batches | test loss 0.5477086 +| epoch 7 | 379/ 5600 batches | test loss 0.4368988 +| epoch 7 | 383/ 5600 batches | test loss 0.3703659 +| epoch 7 | 387/ 5600 batches | test loss 0.4904882 +| epoch 7 | 391/ 5600 batches | test loss 0.4322475 +| epoch 7 | 395/ 5600 batches | test loss 0.3475383 +| epoch 7 | 399/ 5600 batches | test loss 0.3907019 +| epoch 7 | 403/ 5600 batches | test loss 0.3539054 +| epoch 7 | 407/ 5600 batches | test loss 0.4575938 +| epoch 7 | 411/ 5600 batches | test loss 0.4035650 +| epoch 7 | 415/ 5600 batches | test loss 0.4073544 +| epoch 7 | 419/ 5600 batches | test loss 0.5259873 +| epoch 7 | 423/ 5600 batches | test loss 0.3321860 +| epoch 7 | 427/ 5600 batches | test loss 0.4475265 +| epoch 7 | 431/ 5600 batches | test loss 0.4851117 +| epoch 7 | 435/ 5600 batches | test loss 0.3496810 +| epoch 7 | 439/ 5600 batches | test loss 0.5951307 +| epoch 7 | 443/ 5600 batches | test loss 0.5294599 +| epoch 7 | 447/ 5600 batches | test loss 0.5373700 +| epoch 7 | 451/ 5600 batches | test loss 0.5144621 +| epoch 7 | 455/ 5600 batches | test loss 0.3935514 +| epoch 7 | 459/ 5600 batches | test loss 0.5182166 +| epoch 7 | 463/ 5600 batches | test loss 0.4222845 +| epoch 7 | 467/ 5600 batches | test loss 0.4500262 +| epoch 7 | 471/ 5600 batches | test loss 0.4426215 +| epoch 7 | 475/ 5600 batches | test loss 0.3534575 +| epoch 7 | 479/ 5600 batches | test loss 0.3652025 +| epoch 7 | 483/ 5600 batches | test loss 0.5536402 +| epoch 7 | 487/ 5600 batches | test loss 0.5229829 +| epoch 7 | 491/ 5600 batches | test loss 0.4116872 +| epoch 7 | 495/ 5600 batches | test loss 0.5417826 +| epoch 7 | 499/ 5600 batches | test loss 0.4488905 +| epoch 7 | 503/ 5600 batches | test loss 0.4924822 +| epoch 7 | 507/ 5600 batches | test loss 0.1679920 +| epoch 7 | 511/ 5600 batches | test loss 0.5337379 +| epoch 7 | 515/ 5600 batches | test loss 0.5049958 +| epoch 7 | 519/ 5600 batches | test loss 0.5086781 +| epoch 7 | 523/ 5600 batches | test loss 0.4678108 +| epoch 7 | 527/ 5600 batches | test loss 0.4183245 +| epoch 7 | 531/ 5600 batches | test loss 0.5298743 +| epoch 7 | 535/ 5600 batches | test loss 0.5812727 +| epoch 7 | 539/ 5600 batches | test loss 0.4011384 +| epoch 7 | 543/ 5600 batches | test loss 0.4887976 +| epoch 7 | 547/ 5600 batches | test loss 0.3214997 +| epoch 7 | 551/ 5600 batches | test loss 0.5004472 +| epoch 7 | 555/ 5600 batches | test loss 0.3905383 +| epoch 7 | 559/ 5600 batches | test loss 0.5549646 +| epoch 7 | 563/ 5600 batches | test loss 0.3956830 +| epoch 7 | 567/ 5600 batches | test loss 0.4750978 +| epoch 7 | 571/ 5600 batches | test loss 0.4324193 +| epoch 7 | 575/ 5600 batches | test loss 0.5168709 +| epoch 7 | 579/ 5600 batches | test loss 0.3674298 +| epoch 7 | 583/ 5600 batches | test loss 0.5088329 +| epoch 7 | 587/ 5600 batches | test loss 0.4806914 +| epoch 7 | 591/ 5600 batches | test loss 0.4933861 +| epoch 7 | 595/ 5600 batches | test loss 0.4164901 +| epoch 7 | 599/ 5600 batches | test loss 0.3753519 +| epoch 7 | 603/ 5600 batches | test loss 0.5299703 +| epoch 7 | 607/ 5600 batches | test loss 0.5153397 +| epoch 7 | 611/ 5600 batches | test loss 0.4066209 +| epoch 7 | 615/ 5600 batches | test loss 0.4219855 +| epoch 7 | 619/ 5600 batches | test loss 0.5072021 +| epoch 7 | 623/ 5600 batches | test loss 0.5499468 +| epoch 7 | 627/ 5600 batches | test loss 0.4771428 +| epoch 7 | 631/ 5600 batches | test loss 0.4537890 +| epoch 7 | 635/ 5600 batches | test loss 0.5032134 +| epoch 7 | 639/ 5600 batches | test loss 0.5011777 +| epoch 7 | 643/ 5600 batches | test loss 0.4590364 +| epoch 7 | 647/ 5600 batches | test loss 0.4266618 +| epoch 7 | 651/ 5600 batches | test loss 0.4915410 +| epoch 7 | 655/ 5600 batches | test loss 0.4296392 +| epoch 7 | 659/ 5600 batches | test loss 0.4194912 +| epoch 7 | 663/ 5600 batches | test loss 0.5815366 +| epoch 7 | 667/ 5600 batches | test loss 0.4621366 +| epoch 7 | 671/ 5600 batches | test loss 0.4058239 +| epoch 7 | 675/ 5600 batches | test loss 0.4624378 +| epoch 7 | 679/ 5600 batches | test loss 0.3638861 +| epoch 7 | 683/ 5600 batches | test loss 0.4228907 +| epoch 7 | 687/ 5600 batches | test loss 0.3577819 +| epoch 7 | 691/ 5600 batches | test loss 0.4905627 +| epoch 7 | 695/ 5600 batches | test loss 0.4334370 +| epoch 7 | 699/ 5600 batches | test loss 0.4145270 +| epoch 7 | 703/ 5600 batches | test loss 0.3851823 +| epoch 7 | 707/ 5600 batches | test loss 0.4370703 +| epoch 7 | 711/ 5600 batches | test loss 0.3815271 +| epoch 7 | 715/ 5600 batches | test loss 0.4270048 +| epoch 7 | 719/ 5600 batches | test loss 0.4174519 +| epoch 7 | 723/ 5600 batches | test loss 0.5258253 +| epoch 7 | 727/ 5600 batches | test loss 0.4327844 +| epoch 7 | 731/ 5600 batches | test loss 0.3503478 +| epoch 7 | 735/ 5600 batches | test loss 0.4052894 +| epoch 7 | 739/ 5600 batches | test loss 0.4143857 +| epoch 7 | 743/ 5600 batches | test loss 0.4323263 +| epoch 7 | 747/ 5600 batches | test loss 0.3865187 +| epoch 7 | 751/ 5600 batches | test loss 0.4772825 +| epoch 7 | 755/ 5600 batches | test loss 0.4065146 +| epoch 7 | 759/ 5600 batches | test loss 0.3748127 +| epoch 7 | 763/ 5600 batches | test loss 0.4994879 +| epoch 7 | 767/ 5600 batches | test loss 0.5273325 +| epoch 7 | 771/ 5600 batches | test loss 0.4225448 +| epoch 7 | 775/ 5600 batches | test loss 0.4833368 +| epoch 7 | 779/ 5600 batches | test loss 0.4696631 +| epoch 7 | 783/ 5600 batches | test loss 0.3277721 +| epoch 7 | 787/ 5600 batches | test loss 0.4170444 +| epoch 7 | 791/ 5600 batches | test loss 0.5815184 +| epoch 7 | 795/ 5600 batches | test loss 0.4271780 +| epoch 7 | 799/ 5600 batches | test loss 0.4685348 +| epoch 7 | 803/ 5600 batches | test loss 0.4284503 +| epoch 7 | 807/ 5600 batches | test loss 0.5139668 +| epoch 7 | 811/ 5600 batches | test loss 0.5209523 +| epoch 7 | 815/ 5600 batches | test loss 0.4697208 +| epoch 7 | 819/ 5600 batches | test loss 0.4708857 +| epoch 7 | 823/ 5600 batches | test loss 0.4018201 +| epoch 7 | 827/ 5600 batches | test loss 0.4240555 +| epoch 7 | 831/ 5600 batches | test loss 0.2688226 +| epoch 7 | 835/ 5600 batches | test loss 0.4624257 +| epoch 7 | 839/ 5600 batches | test loss 0.7174083 +| epoch 7 | 843/ 5600 batches | test loss 0.4630782 +| epoch 7 | 847/ 5600 batches | test loss 0.3734345 +| epoch 7 | 851/ 5600 batches | test loss 0.4191005 +| epoch 7 | 855/ 5600 batches | test loss 0.4929672 +| epoch 7 | 859/ 5600 batches | test loss 0.4333619 +| epoch 7 | 863/ 5600 batches | test loss 0.4494729 +| epoch 7 | 867/ 5600 batches | test loss 0.4725176 +| epoch 7 | 871/ 5600 batches | test loss 0.4114605 +| epoch 7 | 875/ 5600 batches | test loss 0.4494794 +| epoch 7 | 879/ 5600 batches | test loss 0.4658256 +| epoch 7 | 883/ 5600 batches | test loss 0.4558638 +| epoch 7 | 887/ 5600 batches | test loss 0.4345186 +| epoch 7 | 891/ 5600 batches | test loss 0.4104713 +| epoch 7 | 895/ 5600 batches | test loss 0.4032819 +| epoch 7 | 899/ 5600 batches | test loss 0.3654996 +| epoch 7 | 903/ 5600 batches | test loss 0.4077891 +| epoch 7 | 907/ 5600 batches | test loss 0.4269856 +| epoch 7 | 911/ 5600 batches | test loss 0.4205776 +| epoch 7 | 915/ 5600 batches | test loss 0.3299578 +| epoch 7 | 919/ 5600 batches | test loss 0.3678746 +| epoch 7 | 923/ 5600 batches | test loss 0.5615237 +| epoch 7 | 927/ 5600 batches | test loss 0.3627370 +| epoch 7 | 931/ 5600 batches | test loss 0.4147730 +| epoch 7 | 935/ 5600 batches | test loss 0.3058930 +| epoch 7 | 939/ 5600 batches | test loss 0.4539574 +| epoch 7 | 943/ 5600 batches | test loss 0.4660502 +| epoch 7 | 947/ 5600 batches | test loss 0.5066665 +| epoch 7 | 951/ 5600 batches | test loss 0.4087137 +| epoch 7 | 955/ 5600 batches | test loss 0.4589054 +| epoch 7 | 959/ 5600 batches | test loss 0.3939249 +| epoch 7 | 963/ 5600 batches | test loss 0.4101729 +| epoch 7 | 967/ 5600 batches | test loss 0.3611759 +| epoch 7 | 971/ 5600 batches | test loss 0.4351833 +| epoch 7 | 975/ 5600 batches | test loss 0.5005041 +| epoch 7 | 979/ 5600 batches | test loss 0.3760374 +| epoch 7 | 983/ 5600 batches | test loss 0.4402596 +| epoch 7 | 987/ 5600 batches | test loss 0.3612986 +| epoch 7 | 991/ 5600 batches | test loss 0.3129779 +| epoch 7 | 995/ 5600 batches | test loss 0.5008198 +| epoch 7 | 999/ 5600 batches | test loss 0.4250024 +| epoch 7 | 1003/ 5600 batches | test loss 0.4541423 +| epoch 7 | 1007/ 5600 batches | test loss 0.3803438 +| epoch 7 | 1011/ 5600 batches | test loss 0.5003999 +| epoch 7 | 1015/ 5600 batches | test loss 0.4906226 +| epoch 7 | 1019/ 5600 batches | test loss 0.4772899 +| epoch 7 | 1023/ 5600 batches | test loss 0.3596117 +| epoch 7 | 1027/ 5600 batches | test loss 0.3751697 +| epoch 7 | 1031/ 5600 batches | test loss 0.4075192 +| epoch 7 | 1035/ 5600 batches | test loss 0.4189073 +| epoch 7 | 1039/ 5600 batches | test loss 0.5286028 +| epoch 7 | 1043/ 5600 batches | test loss 0.5355282 +| epoch 7 | 1047/ 5600 batches | test loss 0.4277368 +| epoch 7 | 1051/ 5600 batches | test loss 0.4771443 +| epoch 7 | 1055/ 5600 batches | test loss 0.4424289 +| epoch 7 | 1059/ 5600 batches | test loss 0.6348633 +| epoch 7 | 1063/ 5600 batches | test loss 0.4240436 +| epoch 7 | 1067/ 5600 batches | test loss 0.2446402 +| epoch 7 | 1071/ 5600 batches | test loss 0.4305464 +| epoch 7 | 1075/ 5600 batches | test loss 0.5429804 +| epoch 7 | 1079/ 5600 batches | test loss 0.5910538 +| epoch 7 | 1083/ 5600 batches | test loss 0.4398173 +| epoch 7 | 1087/ 5600 batches | test loss 0.6788689 +| epoch 7 | 1091/ 5600 batches | test loss 0.6683148 +| epoch 7 | 1095/ 5600 batches | test loss 0.1609269 +| epoch 7 | 1099/ 5600 batches | test loss 0.3803263 +| epoch 7 | 1103/ 5600 batches | test loss 0.4058471 +| epoch 7 | 1107/ 5600 batches | test loss 0.5070033 +| epoch 7 | 1111/ 5600 batches | test loss 0.3799312 +| epoch 7 | 1115/ 5600 batches | test loss 0.4047473 +| epoch 7 | 1119/ 5600 batches | test loss 0.4833829 +| epoch 7 | 1123/ 5600 batches | test loss 0.4578959 +| epoch 7 | 1127/ 5600 batches | test loss 0.2443636 +| epoch 7 | 1131/ 5600 batches | test loss 0.4994241 +| epoch 7 | 1135/ 5600 batches | test loss 0.5310193 +| epoch 7 | 1139/ 5600 batches | test loss 0.3653148 +| epoch 7 | 1143/ 5600 batches | test loss 0.3565158 +| epoch 7 | 1147/ 5600 batches | test loss 0.3360680 +| epoch 7 | 1151/ 5600 batches | test loss 0.3869046 +| epoch 7 | 1155/ 5600 batches | test loss 0.4025314 +| epoch 7 | 1159/ 5600 batches | test loss 0.4708924 +| epoch 7 | 1163/ 5600 batches | test loss 0.4661596 +| epoch 7 | 1167/ 5600 batches | test loss 0.4463121 +| epoch 7 | 1171/ 5600 batches | test loss 0.4177462 +| epoch 7 | 1175/ 5600 batches | test loss 0.3820272 +| epoch 7 | 1179/ 5600 batches | test loss 0.7352188 +| epoch 7 | 1183/ 5600 batches | test loss 0.4645779 +| epoch 7 | 1187/ 5600 batches | test loss 0.3088219 +| epoch 7 | 1191/ 5600 batches | test loss 0.4529220 +| epoch 7 | 1195/ 5600 batches | test loss 0.4084511 +| epoch 7 | 1199/ 5600 batches | test loss 0.4098402 +| epoch 7 | 1203/ 5600 batches | test loss 0.4151251 +| epoch 7 | 1207/ 5600 batches | test loss 0.5142222 +| epoch 7 | 1211/ 5600 batches | test loss 0.5949044 +| epoch 7 | 1215/ 5600 batches | test loss 0.4588511 +| epoch 7 | 1219/ 5600 batches | test loss 0.4124649 +| epoch 7 | 1223/ 5600 batches | test loss 0.5469480 +| epoch 7 | 1227/ 5600 batches | test loss 0.4087461 +| epoch 7 | 1231/ 5600 batches | test loss 0.5023429 +| epoch 7 | 1235/ 5600 batches | test loss 0.5465928 +| epoch 7 | 1239/ 5600 batches | test loss 0.4409040 +| epoch 7 | 1243/ 5600 batches | test loss 0.5552790 +| epoch 7 | 1247/ 5600 batches | test loss 0.3747019 +| epoch 7 | 1251/ 5600 batches | test loss 0.4969746 +| epoch 7 | 1255/ 5600 batches | test loss 0.3864072 +| epoch 7 | 1259/ 5600 batches | test loss 0.5428637 +| epoch 7 | 1263/ 5600 batches | test loss 0.4056703 +| epoch 7 | 1267/ 5600 batches | test loss 0.4947244 +| epoch 7 | 1271/ 5600 batches | test loss 0.5584592 +| epoch 7 | 1275/ 5600 batches | test loss 0.4905079 +| epoch 7 | 1279/ 5600 batches | test loss 0.4744240 +| epoch 7 | 1283/ 5600 batches | test loss 0.5994415 +| epoch 7 | 1287/ 5600 batches | test loss 0.4117478 +| epoch 7 | 1291/ 5600 batches | test loss 0.4008980 +| epoch 7 | 1295/ 5600 batches | test loss 0.4324664 +| epoch 7 | 1299/ 5600 batches | test loss 0.4314346 +| epoch 7 | 1303/ 5600 batches | test loss 0.4844548 +| epoch 7 | 1307/ 5600 batches | test loss 0.4177569 +| epoch 7 | 1311/ 5600 batches | test loss 0.3911540 +| epoch 7 | 1315/ 5600 batches | test loss 0.4466575 +| epoch 7 | 1319/ 5600 batches | test loss 0.4727727 +| epoch 7 | 1323/ 5600 batches | test loss 0.4735475 +| epoch 7 | 1327/ 5600 batches | test loss 0.5951850 +| epoch 7 | 1331/ 5600 batches | test loss 0.4802375 +| epoch 7 | 1335/ 5600 batches | test loss 0.4287408 +| epoch 7 | 1339/ 5600 batches | test loss 0.3979257 +| epoch 7 | 1343/ 5600 batches | test loss 0.3551773 +| epoch 7 | 1347/ 5600 batches | test loss 0.5819354 +| epoch 7 | 1351/ 5600 batches | test loss 0.3471105 +| epoch 7 | 1355/ 5600 batches | test loss 0.4492252 +| epoch 7 | 1359/ 5600 batches | test loss 0.4999479 +| epoch 7 | 1363/ 5600 batches | test loss 0.4849414 +| epoch 7 | 1367/ 5600 batches | test loss 0.3930507 +| epoch 7 | 1371/ 5600 batches | test loss 0.3621569 +| epoch 7 | 1375/ 5600 batches | test loss 0.5779542 +| epoch 7 | 1379/ 5600 batches | test loss 0.4727539 +| epoch 7 | 1383/ 5600 batches | test loss 0.4797548 +| epoch 7 | 1387/ 5600 batches | test loss 0.5703753 +| epoch 7 | 1391/ 5600 batches | test loss 0.4164716 +| epoch 7 | 1395/ 5600 batches | test loss 0.3730397 +| epoch 7 | 1399/ 5600 batches | test loss 0.4200890 +| epoch 7 | final test loss 0.4525, do not save model! +-------------------------------------------------------------------------------- +| epoch 8 | 3/ 5600 batches | train loss 0.3861510 +| epoch 8 | 7/ 5600 batches | train loss 0.3332696 +| epoch 8 | 11/ 5600 batches | train loss 0.2680299 +| epoch 8 | 15/ 5600 batches | train loss 0.3691643 +| epoch 8 | 19/ 5600 batches | train loss 0.3485478 +| epoch 8 | 23/ 5600 batches | train loss 0.3787405 +| epoch 8 | 27/ 5600 batches | train loss 0.3604912 +| epoch 8 | 31/ 5600 batches | train loss 0.3484747 +| epoch 8 | 35/ 5600 batches | train loss 0.3368217 +| epoch 8 | 39/ 5600 batches | train loss 0.4037504 +| epoch 8 | 43/ 5600 batches | train loss 0.3261488 +| epoch 8 | 47/ 5600 batches | train loss 0.2795743 +| epoch 8 | 51/ 5600 batches | train loss 0.3382113 +| epoch 8 | 55/ 5600 batches | train loss 0.3721017 +| epoch 8 | 59/ 5600 batches | train loss 0.3117198 +| epoch 8 | 63/ 5600 batches | train loss 0.3153484 +| epoch 8 | 67/ 5600 batches | train loss 0.2783931 +| epoch 8 | 71/ 5600 batches | train loss 0.3544771 +| epoch 8 | 75/ 5600 batches | train loss 0.3196339 +| epoch 8 | 79/ 5600 batches | train loss 0.3507812 +| epoch 8 | 83/ 5600 batches | train loss 0.3192264 +| epoch 8 | 87/ 5600 batches | train loss 0.3584012 +| epoch 8 | 91/ 5600 batches | train loss 0.3658526 +| epoch 8 | 95/ 5600 batches | train loss 0.3527432 +| epoch 8 | 99/ 5600 batches | train loss 0.3483713 +| epoch 8 | 103/ 5600 batches | train loss 0.3459195 +| epoch 8 | 107/ 5600 batches | train loss 0.4102384 +| epoch 8 | 111/ 5600 batches | train loss 0.2973159 +| epoch 8 | 115/ 5600 batches | train loss 0.3706188 +| epoch 8 | 119/ 5600 batches | train loss 0.3496822 +| epoch 8 | 123/ 5600 batches | train loss 0.3538101 +| epoch 8 | 127/ 5600 batches | train loss 0.2493549 +| epoch 8 | 131/ 5600 batches | train loss 0.3803381 +| epoch 8 | 135/ 5600 batches | train loss 0.3375125 +| epoch 8 | 139/ 5600 batches | train loss 0.3011388 +| epoch 8 | 143/ 5600 batches | train loss 0.4190898 +| epoch 8 | 147/ 5600 batches | train loss 0.3703198 +| epoch 8 | 151/ 5600 batches | train loss 0.3397805 +| epoch 8 | 155/ 5600 batches | train loss 0.2917307 +| epoch 8 | 159/ 5600 batches | train loss 0.3033199 +| epoch 8 | 163/ 5600 batches | train loss 0.2594198 +| epoch 8 | 167/ 5600 batches | train loss 0.3857547 +| epoch 8 | 171/ 5600 batches | train loss 0.3268124 +| epoch 8 | 175/ 5600 batches | train loss 0.3081831 +| epoch 8 | 179/ 5600 batches | train loss 0.3407412 +| epoch 8 | 183/ 5600 batches | train loss 0.3361838 +| epoch 8 | 187/ 5600 batches | train loss 0.3853892 +| epoch 8 | 191/ 5600 batches | train loss 0.3278522 +| epoch 8 | 195/ 5600 batches | train loss 0.3474318 +| epoch 8 | 199/ 5600 batches | train loss 0.2985783 +| epoch 8 | 203/ 5600 batches | train loss 0.4214235 +| epoch 8 | 207/ 5600 batches | train loss 0.3662437 +| epoch 8 | 211/ 5600 batches | train loss 0.3541013 +| epoch 8 | 215/ 5600 batches | train loss 0.3499680 +| epoch 8 | 219/ 5600 batches | train loss 0.3252631 +| epoch 8 | 223/ 5600 batches | train loss 0.2109458 +| epoch 8 | 227/ 5600 batches | train loss 0.3235381 +| epoch 8 | 231/ 5600 batches | train loss 0.3351656 +| epoch 8 | 235/ 5600 batches | train loss 0.2088089 +| epoch 8 | 239/ 5600 batches | train loss 0.2511523 +| epoch 8 | 243/ 5600 batches | train loss 0.2794889 +| epoch 8 | 247/ 5600 batches | train loss 0.3321348 +| epoch 8 | 251/ 5600 batches | train loss 0.3350232 +| epoch 8 | 255/ 5600 batches | train loss 0.2598208 +| epoch 8 | 259/ 5600 batches | train loss 0.2847679 +| epoch 8 | 263/ 5600 batches | train loss 0.3348344 +| epoch 8 | 267/ 5600 batches | train loss 0.3261913 +| epoch 8 | 271/ 5600 batches | train loss 0.2679940 +| epoch 8 | 275/ 5600 batches | train loss 0.3179649 +| epoch 8 | 279/ 5600 batches | train loss 0.3798193 +| epoch 8 | 283/ 5600 batches | train loss 0.3877871 +| epoch 8 | 287/ 5600 batches | train loss 0.3500317 +| epoch 8 | 291/ 5600 batches | train loss 0.3444471 +| epoch 8 | 295/ 5600 batches | train loss 0.3103265 +| epoch 8 | 299/ 5600 batches | train loss 0.3660493 +| epoch 8 | 303/ 5600 batches | train loss 0.3478726 +| epoch 8 | 307/ 5600 batches | train loss 0.4433932 +| epoch 8 | 311/ 5600 batches | train loss 0.3934807 +| epoch 8 | 315/ 5600 batches | train loss 0.2574773 +| epoch 8 | 319/ 5600 batches | train loss 0.3508615 +| epoch 8 | 323/ 5600 batches | train loss 0.3104557 +| epoch 8 | 327/ 5600 batches | train loss 0.3426817 +| epoch 8 | 331/ 5600 batches | train loss 0.3511088 +| epoch 8 | 335/ 5600 batches | train loss 0.3096153 +| epoch 8 | 339/ 5600 batches | train loss 0.3447618 +| epoch 8 | 343/ 5600 batches | train loss 0.3348807 +| epoch 8 | 347/ 5600 batches | train loss 0.3248811 +| epoch 8 | 351/ 5600 batches | train loss 0.3427816 +| epoch 8 | 355/ 5600 batches | train loss 0.3312359 +| epoch 8 | 359/ 5600 batches | train loss 0.4155759 +| epoch 8 | 363/ 5600 batches | train loss 0.3085504 +| epoch 8 | 367/ 5600 batches | train loss 0.3510858 +| epoch 8 | 371/ 5600 batches | train loss 0.3376038 +| epoch 8 | 375/ 5600 batches | train loss 0.3155870 +| epoch 8 | 379/ 5600 batches | train loss 0.3192030 +| epoch 8 | 383/ 5600 batches | train loss 0.2924689 +| epoch 8 | 387/ 5600 batches | train loss 0.2636130 +| epoch 8 | 391/ 5600 batches | train loss 0.3738128 +| epoch 8 | 395/ 5600 batches | train loss 0.3349260 +| epoch 8 | 399/ 5600 batches | train loss 0.3777665 +| epoch 8 | 403/ 5600 batches | train loss 0.2868204 +| epoch 8 | 407/ 5600 batches | train loss 0.3507370 +| epoch 8 | 411/ 5600 batches | train loss 0.3471167 +| epoch 8 | 415/ 5600 batches | train loss 0.3461077 +| epoch 8 | 419/ 5600 batches | train loss 0.3846878 +| epoch 8 | 423/ 5600 batches | train loss 0.3425412 +| epoch 8 | 427/ 5600 batches | train loss 0.3397389 +| epoch 8 | 431/ 5600 batches | train loss 0.3706934 +| epoch 8 | 435/ 5600 batches | train loss 0.3507302 +| epoch 8 | 439/ 5600 batches | train loss 0.3314782 +| epoch 8 | 443/ 5600 batches | train loss 0.3484102 +| epoch 8 | 447/ 5600 batches | train loss 0.3660222 +| epoch 8 | 451/ 5600 batches | train loss 0.3107955 +| epoch 8 | 455/ 5600 batches | train loss 0.3667602 +| epoch 8 | 459/ 5600 batches | train loss 0.3963065 +| epoch 8 | 463/ 5600 batches | train loss 0.3917379 +| epoch 8 | 467/ 5600 batches | train loss 0.3564326 +| epoch 8 | 471/ 5600 batches | train loss 0.3600094 +| epoch 8 | 475/ 5600 batches | train loss 0.3156933 +| epoch 8 | 479/ 5600 batches | train loss 0.3751578 +| epoch 8 | 483/ 5600 batches | train loss 0.3320203 +| epoch 8 | 487/ 5600 batches | train loss 0.3180770 +| epoch 8 | 491/ 5600 batches | train loss 0.3767901 +| epoch 8 | 495/ 5600 batches | train loss 0.3064540 +| epoch 8 | 499/ 5600 batches | train loss 0.3336858 +| epoch 8 | 503/ 5600 batches | train loss 0.3513218 +| epoch 8 | 507/ 5600 batches | train loss 0.3615786 +| epoch 8 | 511/ 5600 batches | train loss 0.3776307 +| epoch 8 | 515/ 5600 batches | train loss 0.2796935 +| epoch 8 | 519/ 5600 batches | train loss 0.2563264 +| epoch 8 | 523/ 5600 batches | train loss 0.3803493 +| epoch 8 | 527/ 5600 batches | train loss 0.4069352 +| epoch 8 | 531/ 5600 batches | train loss 0.3232819 +| epoch 8 | 535/ 5600 batches | train loss 0.3678272 +| epoch 8 | 539/ 5600 batches | train loss 0.3271521 +| epoch 8 | 543/ 5600 batches | train loss 0.3271340 +| epoch 8 | 547/ 5600 batches | train loss 0.3109473 +| epoch 8 | 551/ 5600 batches | train loss 0.3535618 +| epoch 8 | 555/ 5600 batches | train loss 0.3789313 +| epoch 8 | 559/ 5600 batches | train loss 0.3368988 +| epoch 8 | 563/ 5600 batches | train loss 0.3397363 +| epoch 8 | 567/ 5600 batches | train loss 0.3106105 +| epoch 8 | 571/ 5600 batches | train loss 0.2915238 +| epoch 8 | 575/ 5600 batches | train loss 0.3684956 +| epoch 8 | 579/ 5600 batches | train loss 0.2880780 +| epoch 8 | 583/ 5600 batches | train loss 0.3120158 +| epoch 8 | 587/ 5600 batches | train loss 0.3760278 +| epoch 8 | 591/ 5600 batches | train loss 0.2777358 +| epoch 8 | 595/ 5600 batches | train loss 0.3250918 +| epoch 8 | 599/ 5600 batches | train loss 0.3134278 +| epoch 8 | 603/ 5600 batches | train loss 0.3299555 +| epoch 8 | 607/ 5600 batches | train loss 0.3394734 +| epoch 8 | 611/ 5600 batches | train loss 0.3307417 +| epoch 8 | 615/ 5600 batches | train loss 0.3155245 +| epoch 8 | 619/ 5600 batches | train loss 0.3611274 +| epoch 8 | 623/ 5600 batches | train loss 0.3163669 +| epoch 8 | 627/ 5600 batches | train loss 0.2977071 +| epoch 8 | 631/ 5600 batches | train loss 0.4101557 +| epoch 8 | 635/ 5600 batches | train loss 0.3397335 +| epoch 8 | 639/ 5600 batches | train loss 0.3138640 +| epoch 8 | 643/ 5600 batches | train loss 0.2837386 +| epoch 8 | 647/ 5600 batches | train loss 0.3466798 +| epoch 8 | 651/ 5600 batches | train loss 0.3587206 +| epoch 8 | 655/ 5600 batches | train loss 0.3730535 +| epoch 8 | 659/ 5600 batches | train loss 0.3452280 +| epoch 8 | 663/ 5600 batches | train loss 0.3200543 +| epoch 8 | 667/ 5600 batches | train loss 0.3241104 +| epoch 8 | 671/ 5600 batches | train loss 0.4052823 +| epoch 8 | 675/ 5600 batches | train loss 0.3354432 +| epoch 8 | 679/ 5600 batches | train loss 0.3290974 +| epoch 8 | 683/ 5600 batches | train loss 0.3063194 +| epoch 8 | 687/ 5600 batches | train loss 0.3014311 +| epoch 8 | 691/ 5600 batches | train loss 0.3684154 +| epoch 8 | 695/ 5600 batches | train loss 0.3634254 +| epoch 8 | 699/ 5600 batches | train loss 0.3751187 +| epoch 8 | 703/ 5600 batches | train loss 0.3407827 +| epoch 8 | 707/ 5600 batches | train loss 0.3386593 +| epoch 8 | 711/ 5600 batches | train loss 0.3718106 +| epoch 8 | 715/ 5600 batches | train loss 0.2804125 +| epoch 8 | 719/ 5600 batches | train loss 0.3512594 +| epoch 8 | 723/ 5600 batches | train loss 0.3227938 +| epoch 8 | 727/ 5600 batches | train loss 0.3230645 +| epoch 8 | 731/ 5600 batches | train loss 0.3093651 +| epoch 8 | 735/ 5600 batches | train loss 0.3721673 +| epoch 8 | 739/ 5600 batches | train loss 0.3408215 +| epoch 8 | 743/ 5600 batches | train loss 0.3434048 +| epoch 8 | 747/ 5600 batches | train loss 0.4015569 +| epoch 8 | 751/ 5600 batches | train loss 0.2828220 +| epoch 8 | 755/ 5600 batches | train loss 0.3894437 +| epoch 8 | 759/ 5600 batches | train loss 0.4235369 +| epoch 8 | 763/ 5600 batches | train loss 0.3021898 +| epoch 8 | 767/ 5600 batches | train loss 0.3160705 +| epoch 8 | 771/ 5600 batches | train loss 0.3278006 +| epoch 8 | 775/ 5600 batches | train loss 0.3992954 +| epoch 8 | 779/ 5600 batches | train loss 0.4061787 +| epoch 8 | 783/ 5600 batches | train loss 0.2480228 +| epoch 8 | 787/ 5600 batches | train loss 0.3841136 +| epoch 8 | 791/ 5600 batches | train loss 0.3474336 +| epoch 8 | 795/ 5600 batches | train loss 0.4020179 +| epoch 8 | 799/ 5600 batches | train loss 0.3820939 +| epoch 8 | 803/ 5600 batches | train loss 0.3559113 +| epoch 8 | 807/ 5600 batches | train loss 0.4490862 +| epoch 8 | 811/ 5600 batches | train loss 0.3375989 +| epoch 8 | 815/ 5600 batches | train loss 0.4068770 +| epoch 8 | 819/ 5600 batches | train loss 0.3447293 +| epoch 8 | 823/ 5600 batches | train loss 0.4383588 +| epoch 8 | 827/ 5600 batches | train loss 0.3298914 +| epoch 8 | 831/ 5600 batches | train loss 0.3479182 +| epoch 8 | 835/ 5600 batches | train loss 0.2928331 +| epoch 8 | 839/ 5600 batches | train loss 0.3436370 +| epoch 8 | 843/ 5600 batches | train loss 0.3429193 +| epoch 8 | 847/ 5600 batches | train loss 0.3238676 +| epoch 8 | 851/ 5600 batches | train loss 0.3355542 +| epoch 8 | 855/ 5600 batches | train loss 0.3349479 +| epoch 8 | 859/ 5600 batches | train loss 0.3367418 +| epoch 8 | 863/ 5600 batches | train loss 0.2776495 +| epoch 8 | 867/ 5600 batches | train loss 0.3028154 +| epoch 8 | 871/ 5600 batches | train loss 0.3483754 +| epoch 8 | 875/ 5600 batches | train loss 0.3146592 +| epoch 8 | 879/ 5600 batches | train loss 0.3804214 +| epoch 8 | 883/ 5600 batches | train loss 0.3015469 +| epoch 8 | 887/ 5600 batches | train loss 0.3643866 +| epoch 8 | 891/ 5600 batches | train loss 0.3852479 +| epoch 8 | 895/ 5600 batches | train loss 0.3424123 +| epoch 8 | 899/ 5600 batches | train loss 0.3386922 +| epoch 8 | 903/ 5600 batches | train loss 0.3503659 +| epoch 8 | 907/ 5600 batches | train loss 0.2813747 +| epoch 8 | 911/ 5600 batches | train loss 0.3230943 +| epoch 8 | 915/ 5600 batches | train loss 0.3248756 +| epoch 8 | 919/ 5600 batches | train loss 0.3197145 +| epoch 8 | 923/ 5600 batches | train loss 0.3080358 +| epoch 8 | 927/ 5600 batches | train loss 0.3365749 +| epoch 8 | 931/ 5600 batches | train loss 0.2973079 +| epoch 8 | 935/ 5600 batches | train loss 0.2948822 +| epoch 8 | 939/ 5600 batches | train loss 0.3739066 +| epoch 8 | 943/ 5600 batches | train loss 0.2949198 +| epoch 8 | 947/ 5600 batches | train loss 0.3701159 +| epoch 8 | 951/ 5600 batches | train loss 0.3592696 +| epoch 8 | 955/ 5600 batches | train loss 0.3441331 +| epoch 8 | 959/ 5600 batches | train loss 0.3321974 +| epoch 8 | 963/ 5600 batches | train loss 0.5024141 +| epoch 8 | 967/ 5600 batches | train loss 0.3053435 +| epoch 8 | 971/ 5600 batches | train loss 0.3686571 +| epoch 8 | 975/ 5600 batches | train loss 0.3524730 +| epoch 8 | 979/ 5600 batches | train loss 0.3100932 +| epoch 8 | 983/ 5600 batches | train loss 0.3592799 +| epoch 8 | 987/ 5600 batches | train loss 0.3324360 +| epoch 8 | 991/ 5600 batches | train loss 0.3023373 +| epoch 8 | 995/ 5600 batches | train loss 0.3325625 +| epoch 8 | 999/ 5600 batches | train loss 0.4005513 +| epoch 8 | 1003/ 5600 batches | train loss 0.3690229 +| epoch 8 | 1007/ 5600 batches | train loss 0.3063856 +| epoch 8 | 1011/ 5600 batches | train loss 0.3317271 +| epoch 8 | 1015/ 5600 batches | train loss 0.3190710 +| epoch 8 | 1019/ 5600 batches | train loss 0.3134838 +| epoch 8 | 1023/ 5600 batches | train loss 0.3298602 +| epoch 8 | 1027/ 5600 batches | train loss 0.3303351 +| epoch 8 | 1031/ 5600 batches | train loss 0.3775457 +| epoch 8 | 1035/ 5600 batches | train loss 0.2956886 +| epoch 8 | 1039/ 5600 batches | train loss 0.3401444 +| epoch 8 | 1043/ 5600 batches | train loss 0.2853038 +| epoch 8 | 1047/ 5600 batches | train loss 0.3274586 +| epoch 8 | 1051/ 5600 batches | train loss 0.3894234 +| epoch 8 | 1055/ 5600 batches | train loss 0.3709869 +| epoch 8 | 1059/ 5600 batches | train loss 0.2247945 +| epoch 8 | 1063/ 5600 batches | train loss 0.3364836 +| epoch 8 | 1067/ 5600 batches | train loss 0.2965167 +| epoch 8 | 1071/ 5600 batches | train loss 0.3843174 +| epoch 8 | 1075/ 5600 batches | train loss 0.3794962 +| epoch 8 | 1079/ 5600 batches | train loss 0.2639347 +| epoch 8 | 1083/ 5600 batches | train loss 0.3392587 +| epoch 8 | 1087/ 5600 batches | train loss 0.3447935 +| epoch 8 | 1091/ 5600 batches | train loss 0.3007425 +| epoch 8 | 1095/ 5600 batches | train loss 0.3907243 +| epoch 8 | 1099/ 5600 batches | train loss 0.2597866 +| epoch 8 | 1103/ 5600 batches | train loss 0.3382928 +| epoch 8 | 1107/ 5600 batches | train loss 0.3244467 +| epoch 8 | 1111/ 5600 batches | train loss 0.3654827 +| epoch 8 | 1115/ 5600 batches | train loss 0.2859149 +| epoch 8 | 1119/ 5600 batches | train loss 0.2947476 +| epoch 8 | 1123/ 5600 batches | train loss 0.3314071 +| epoch 8 | 1127/ 5600 batches | train loss 0.3813163 +| epoch 8 | 1131/ 5600 batches | train loss 0.3907315 +| epoch 8 | 1135/ 5600 batches | train loss 0.3998267 +| epoch 8 | 1139/ 5600 batches | train loss 0.3924835 +| epoch 8 | 1143/ 5600 batches | train loss 0.3553485 +| epoch 8 | 1147/ 5600 batches | train loss 0.3340747 +| epoch 8 | 1151/ 5600 batches | train loss 0.3600229 +| epoch 8 | 1155/ 5600 batches | train loss 0.3642265 +| epoch 8 | 1159/ 5600 batches | train loss 0.2642542 +| epoch 8 | 1163/ 5600 batches | train loss 0.3531231 +| epoch 8 | 1167/ 5600 batches | train loss 0.3799978 +| epoch 8 | 1171/ 5600 batches | train loss 0.3986092 +| epoch 8 | 1175/ 5600 batches | train loss 0.3412079 +| epoch 8 | 1179/ 5600 batches | train loss 0.2928051 +| epoch 8 | 1183/ 5600 batches | train loss 0.3352918 +| epoch 8 | 1187/ 5600 batches | train loss 0.3046450 +| epoch 8 | 1191/ 5600 batches | train loss 0.3242991 +| epoch 8 | 1195/ 5600 batches | train loss 0.3950443 +| epoch 8 | 1199/ 5600 batches | train loss 0.4386494 +| epoch 8 | 1203/ 5600 batches | train loss 0.3739759 +| epoch 8 | 1207/ 5600 batches | train loss 0.2681244 +| epoch 8 | 1211/ 5600 batches | train loss 0.3370742 +| epoch 8 | 1215/ 5600 batches | train loss 0.3518817 +| epoch 8 | 1219/ 5600 batches | train loss 0.3189395 +| epoch 8 | 1223/ 5600 batches | train loss 0.3452458 +| epoch 8 | 1227/ 5600 batches | train loss 0.3107553 +| epoch 8 | 1231/ 5600 batches | train loss 0.3704604 +| epoch 8 | 1235/ 5600 batches | train loss 0.3514892 +| epoch 8 | 1239/ 5600 batches | train loss 0.3938047 +| epoch 8 | 1243/ 5600 batches | train loss 0.3815361 +| epoch 8 | 1247/ 5600 batches | train loss 0.3658848 +| epoch 8 | 1251/ 5600 batches | train loss 0.3991558 +| epoch 8 | 1255/ 5600 batches | train loss 0.3754398 +| epoch 8 | 1259/ 5600 batches | train loss 0.3683835 +| epoch 8 | 1263/ 5600 batches | train loss 0.3789843 +| epoch 8 | 1267/ 5600 batches | train loss 0.3466173 +| epoch 8 | 1271/ 5600 batches | train loss 0.3314805 +| epoch 8 | 1275/ 5600 batches | train loss 0.3858958 +| epoch 8 | 1279/ 5600 batches | train loss 0.3422950 +| epoch 8 | 1283/ 5600 batches | train loss 0.3616329 +| epoch 8 | 1287/ 5600 batches | train loss 0.3041406 +| epoch 8 | 1291/ 5600 batches | train loss 0.2302290 +| epoch 8 | 1295/ 5600 batches | train loss 0.4021288 +| epoch 8 | 1299/ 5600 batches | train loss 0.3669085 +| epoch 8 | 1303/ 5600 batches | train loss 0.3495010 +| epoch 8 | 1307/ 5600 batches | train loss 0.3972805 +| epoch 8 | 1311/ 5600 batches | train loss 0.3006479 +| epoch 8 | 1315/ 5600 batches | train loss 0.2873538 +| epoch 8 | 1319/ 5600 batches | train loss 0.3720084 +| epoch 8 | 1323/ 5600 batches | train loss 0.2647995 +| epoch 8 | 1327/ 5600 batches | train loss 0.3183298 +| epoch 8 | 1331/ 5600 batches | train loss 0.3654912 +| epoch 8 | 1335/ 5600 batches | train loss 0.3835357 +| epoch 8 | 1339/ 5600 batches | train loss 0.3530750 +| epoch 8 | 1343/ 5600 batches | train loss 0.3287506 +| epoch 8 | 1347/ 5600 batches | train loss 0.2796928 +| epoch 8 | 1351/ 5600 batches | train loss 0.3431056 +| epoch 8 | 1355/ 5600 batches | train loss 0.3591286 +| epoch 8 | 1359/ 5600 batches | train loss 0.2923186 +| epoch 8 | 1363/ 5600 batches | train loss 0.3230333 +| epoch 8 | 1367/ 5600 batches | train loss 0.3034724 +| epoch 8 | 1371/ 5600 batches | train loss 0.3006858 +| epoch 8 | 1375/ 5600 batches | train loss 0.4365951 +| epoch 8 | 1379/ 5600 batches | train loss 0.3503355 +| epoch 8 | 1383/ 5600 batches | train loss 0.4000545 +| epoch 8 | 1387/ 5600 batches | train loss 0.3453254 +| epoch 8 | 1391/ 5600 batches | train loss 0.3807777 +| epoch 8 | 1395/ 5600 batches | train loss 0.2953867 +| epoch 8 | 1399/ 5600 batches | train loss 0.3424905 +| epoch 8 | 1403/ 5600 batches | train loss 0.3449166 +| epoch 8 | 1407/ 5600 batches | train loss 0.3616651 +| epoch 8 | 1411/ 5600 batches | train loss 0.3396426 +| epoch 8 | 1415/ 5600 batches | train loss 0.3709276 +| epoch 8 | 1419/ 5600 batches | train loss 0.3714107 +| epoch 8 | 1423/ 5600 batches | train loss 0.3610411 +| epoch 8 | 1427/ 5600 batches | train loss 0.3090662 +| epoch 8 | 1431/ 5600 batches | train loss 0.3325675 +| epoch 8 | 1435/ 5600 batches | train loss 0.3248069 +| epoch 8 | 1439/ 5600 batches | train loss 0.3699616 +| epoch 8 | 1443/ 5600 batches | train loss 0.3433938 +| epoch 8 | 1447/ 5600 batches | train loss 0.4054709 +| epoch 8 | 1451/ 5600 batches | train loss 0.3319656 +| epoch 8 | 1455/ 5600 batches | train loss 0.3122863 +| epoch 8 | 1459/ 5600 batches | train loss 0.3526269 +| epoch 8 | 1463/ 5600 batches | train loss 0.3351245 +| epoch 8 | 1467/ 5600 batches | train loss 0.3906637 +| epoch 8 | 1471/ 5600 batches | train loss 0.2878377 +| epoch 8 | 1475/ 5600 batches | train loss 0.3552547 +| epoch 8 | 1479/ 5600 batches | train loss 0.3481762 +| epoch 8 | 1483/ 5600 batches | train loss 0.3767126 +| epoch 8 | 1487/ 5600 batches | train loss 0.3737366 +| epoch 8 | 1491/ 5600 batches | train loss 0.3468518 +| epoch 8 | 1495/ 5600 batches | train loss 0.3551652 +| epoch 8 | 1499/ 5600 batches | train loss 0.3007714 +| epoch 8 | 1503/ 5600 batches | train loss 0.3125328 +| epoch 8 | 1507/ 5600 batches | train loss 0.3593098 +| epoch 8 | 1511/ 5600 batches | train loss 0.3398504 +| epoch 8 | 1515/ 5600 batches | train loss 0.3277375 +| epoch 8 | 1519/ 5600 batches | train loss 0.3412002 +| epoch 8 | 1523/ 5600 batches | train loss 0.2758876 +| epoch 8 | 1527/ 5600 batches | train loss 0.2647007 +| epoch 8 | 1531/ 5600 batches | train loss 0.3385656 +| epoch 8 | 1535/ 5600 batches | train loss 0.3702461 +| epoch 8 | 1539/ 5600 batches | train loss 0.3212258 +| epoch 8 | 1543/ 5600 batches | train loss 0.3533196 +| epoch 8 | 1547/ 5600 batches | train loss 0.2829661 +| epoch 8 | 1551/ 5600 batches | train loss 0.3652468 +| epoch 8 | 1555/ 5600 batches | train loss 0.3196694 +| epoch 8 | 1559/ 5600 batches | train loss 0.3672935 +| epoch 8 | 1563/ 5600 batches | train loss 0.3482124 +| epoch 8 | 1567/ 5600 batches | train loss 0.3273954 +| epoch 8 | 1571/ 5600 batches | train loss 0.4132898 +| epoch 8 | 1575/ 5600 batches | train loss 0.3153211 +| epoch 8 | 1579/ 5600 batches | train loss 0.3367902 +| epoch 8 | 1583/ 5600 batches | train loss 0.3311233 +| epoch 8 | 1587/ 5600 batches | train loss 0.3523148 +| epoch 8 | 1591/ 5600 batches | train loss 0.4062091 +| epoch 8 | 1595/ 5600 batches | train loss 0.3560616 +| epoch 8 | 1599/ 5600 batches | train loss 0.3451688 +| epoch 8 | 1603/ 5600 batches | train loss 0.3575626 +| epoch 8 | 1607/ 5600 batches | train loss 0.4077178 +| epoch 8 | 1611/ 5600 batches | train loss 0.3277647 +| epoch 8 | 1615/ 5600 batches | train loss 0.3505892 +| epoch 8 | 1619/ 5600 batches | train loss 0.3290851 +| epoch 8 | 1623/ 5600 batches | train loss 0.3185705 +| epoch 8 | 1627/ 5600 batches | train loss 0.3057471 +| epoch 8 | 1631/ 5600 batches | train loss 0.3656015 +| epoch 8 | 1635/ 5600 batches | train loss 0.3309277 +| epoch 8 | 1639/ 5600 batches | train loss 0.3395807 +| epoch 8 | 1643/ 5600 batches | train loss 0.4170523 +| epoch 8 | 1647/ 5600 batches | train loss 0.3694836 +| epoch 8 | 1651/ 5600 batches | train loss 0.3321508 +| epoch 8 | 1655/ 5600 batches | train loss 0.3588961 +| epoch 8 | 1659/ 5600 batches | train loss 0.3211149 +| epoch 8 | 1663/ 5600 batches | train loss 0.3276271 +| epoch 8 | 1667/ 5600 batches | train loss 0.3666669 +| epoch 8 | 1671/ 5600 batches | train loss 0.3355741 +| epoch 8 | 1675/ 5600 batches | train loss 0.3730561 +| epoch 8 | 1679/ 5600 batches | train loss 0.2158214 +| epoch 8 | 1683/ 5600 batches | train loss 0.3943869 +| epoch 8 | 1687/ 5600 batches | train loss 0.3699329 +| epoch 8 | 1691/ 5600 batches | train loss 0.3193425 +| epoch 8 | 1695/ 5600 batches | train loss 0.3463993 +| epoch 8 | 1699/ 5600 batches | train loss 0.3431269 +| epoch 8 | 1703/ 5600 batches | train loss 0.4042632 +| epoch 8 | 1707/ 5600 batches | train loss 0.3471324 +| epoch 8 | 1711/ 5600 batches | train loss 0.3467402 +| epoch 8 | 1715/ 5600 batches | train loss 0.3620532 +| epoch 8 | 1719/ 5600 batches | train loss 0.3328682 +| epoch 8 | 1723/ 5600 batches | train loss 0.3416916 +| epoch 8 | 1727/ 5600 batches | train loss 0.3499469 +| epoch 8 | 1731/ 5600 batches | train loss 0.2654240 +| epoch 8 | 1735/ 5600 batches | train loss 0.3535671 +| epoch 8 | 1739/ 5600 batches | train loss 0.3299022 +| epoch 8 | 1743/ 5600 batches | train loss 0.3566330 +| epoch 8 | 1747/ 5600 batches | train loss 0.3524698 +| epoch 8 | 1751/ 5600 batches | train loss 0.3095868 +| epoch 8 | 1755/ 5600 batches | train loss 0.3423107 +| epoch 8 | 1759/ 5600 batches | train loss 0.3578556 +| epoch 8 | 1763/ 5600 batches | train loss 0.3437311 +| epoch 8 | 1767/ 5600 batches | train loss 0.3413169 +| epoch 8 | 1771/ 5600 batches | train loss 0.3155956 +| epoch 8 | 1775/ 5600 batches | train loss 0.3293069 +| epoch 8 | 1779/ 5600 batches | train loss 0.3613112 +| epoch 8 | 1783/ 5600 batches | train loss 0.2637310 +| epoch 8 | 1787/ 5600 batches | train loss 0.3405797 +| epoch 8 | 1791/ 5600 batches | train loss 0.3377875 +| epoch 8 | 1795/ 5600 batches | train loss 0.3255509 +| epoch 8 | 1799/ 5600 batches | train loss 0.3755895 +| epoch 8 | 1803/ 5600 batches | train loss 0.3447680 +| epoch 8 | 1807/ 5600 batches | train loss 0.3160657 +| epoch 8 | 1811/ 5600 batches | train loss 0.2922375 +| epoch 8 | 1815/ 5600 batches | train loss 0.3511875 +| epoch 8 | 1819/ 5600 batches | train loss 0.3775945 +| epoch 8 | 1823/ 5600 batches | train loss 0.3709391 +| epoch 8 | 1827/ 5600 batches | train loss 0.3764784 +| epoch 8 | 1831/ 5600 batches | train loss 0.2997312 +| epoch 8 | 1835/ 5600 batches | train loss 0.3779641 +| epoch 8 | 1839/ 5600 batches | train loss 0.3553575 +| epoch 8 | 1843/ 5600 batches | train loss 0.2877474 +| epoch 8 | 1847/ 5600 batches | train loss 0.4146487 +| epoch 8 | 1851/ 5600 batches | train loss 0.3470776 +| epoch 8 | 1855/ 5600 batches | train loss 0.3587486 +| epoch 8 | 1859/ 5600 batches | train loss 0.3616955 +| epoch 8 | 1863/ 5600 batches | train loss 0.3412499 +| epoch 8 | 1867/ 5600 batches | train loss 0.4017749 +| epoch 8 | 1871/ 5600 batches | train loss 0.3811377 +| epoch 8 | 1875/ 5600 batches | train loss 0.3299825 +| epoch 8 | 1879/ 5600 batches | train loss 0.3568985 +| epoch 8 | 1883/ 5600 batches | train loss 0.3698508 +| epoch 8 | 1887/ 5600 batches | train loss 0.3483359 +| epoch 8 | 1891/ 5600 batches | train loss 0.3247322 +| epoch 8 | 1895/ 5600 batches | train loss 0.3339739 +| epoch 8 | 1899/ 5600 batches | train loss 0.3955256 +| epoch 8 | 1903/ 5600 batches | train loss 0.3198237 +| epoch 8 | 1907/ 5600 batches | train loss 0.3833555 +| epoch 8 | 1911/ 5600 batches | train loss 0.3038768 +| epoch 8 | 1915/ 5600 batches | train loss 0.3546026 +| epoch 8 | 1919/ 5600 batches | train loss 0.2703145 +| epoch 8 | 1923/ 5600 batches | train loss 0.3514019 +| epoch 8 | 1927/ 5600 batches | train loss 0.3777183 +| epoch 8 | 1931/ 5600 batches | train loss 0.3245613 +| epoch 8 | 1935/ 5600 batches | train loss 0.4094346 +| epoch 8 | 1939/ 5600 batches | train loss 0.3257208 +| epoch 8 | 1943/ 5600 batches | train loss 0.2965727 +| epoch 8 | 1947/ 5600 batches | train loss 0.3775460 +| epoch 8 | 1951/ 5600 batches | train loss 0.3116305 +| epoch 8 | 1955/ 5600 batches | train loss 0.3426353 +| epoch 8 | 1959/ 5600 batches | train loss 0.3451118 +| epoch 8 | 1963/ 5600 batches | train loss 0.2935395 +| epoch 8 | 1967/ 5600 batches | train loss 0.3164767 +| epoch 8 | 1971/ 5600 batches | train loss 0.3573726 +| epoch 8 | 1975/ 5600 batches | train loss 0.3302254 +| epoch 8 | 1979/ 5600 batches | train loss 0.3652448 +| epoch 8 | 1983/ 5600 batches | train loss 0.3529298 +| epoch 8 | 1987/ 5600 batches | train loss 0.3358935 +| epoch 8 | 1991/ 5600 batches | train loss 0.3955723 +| epoch 8 | 1995/ 5600 batches | train loss 0.3851609 +| epoch 8 | 1999/ 5600 batches | train loss 0.2543357 +| epoch 8 | 2003/ 5600 batches | train loss 0.3319944 +| epoch 8 | 2007/ 5600 batches | train loss 0.3758630 +| epoch 8 | 2011/ 5600 batches | train loss 0.3335706 +| epoch 8 | 2015/ 5600 batches | train loss 0.3247141 +| epoch 8 | 2019/ 5600 batches | train loss 0.4262286 +| epoch 8 | 2023/ 5600 batches | train loss 0.3458534 +| epoch 8 | 2027/ 5600 batches | train loss 0.3902855 +| epoch 8 | 2031/ 5600 batches | train loss 0.3152140 +| epoch 8 | 2035/ 5600 batches | train loss 0.3275016 +| epoch 8 | 2039/ 5600 batches | train loss 0.3375022 +| epoch 8 | 2043/ 5600 batches | train loss 0.3434073 +| epoch 8 | 2047/ 5600 batches | train loss 0.3719751 +| epoch 8 | 2051/ 5600 batches | train loss 0.3291988 +| epoch 8 | 2055/ 5600 batches | train loss 0.3640106 +| epoch 8 | 2059/ 5600 batches | train loss 0.3880698 +| epoch 8 | 2063/ 5600 batches | train loss 0.3022651 +| epoch 8 | 2067/ 5600 batches | train loss 0.3163539 +| epoch 8 | 2071/ 5600 batches | train loss 0.3417676 +| epoch 8 | 2075/ 5600 batches | train loss 0.3641621 +| epoch 8 | 2079/ 5600 batches | train loss 0.3854052 +| epoch 8 | 2083/ 5600 batches | train loss 0.3297538 +| epoch 8 | 2087/ 5600 batches | train loss 0.3663327 +| epoch 8 | 2091/ 5600 batches | train loss 0.3848658 +| epoch 8 | 2095/ 5600 batches | train loss 0.3146459 +| epoch 8 | 2099/ 5600 batches | train loss 0.3934509 +| epoch 8 | 2103/ 5600 batches | train loss 0.4258889 +| epoch 8 | 2107/ 5600 batches | train loss 0.3873667 +| epoch 8 | 2111/ 5600 batches | train loss 0.3146850 +| epoch 8 | 2115/ 5600 batches | train loss 0.3591382 +| epoch 8 | 2119/ 5600 batches | train loss 0.4533686 +| epoch 8 | 2123/ 5600 batches | train loss 0.3910704 +| epoch 8 | 2127/ 5600 batches | train loss 0.3194213 +| epoch 8 | 2131/ 5600 batches | train loss 0.3232890 +| epoch 8 | 2135/ 5600 batches | train loss 0.3777102 +| epoch 8 | 2139/ 5600 batches | train loss 0.4334581 +| epoch 8 | 2143/ 5600 batches | train loss 0.3121890 +| epoch 8 | 2147/ 5600 batches | train loss 0.4085623 +| epoch 8 | 2151/ 5600 batches | train loss 0.3825567 +| epoch 8 | 2155/ 5600 batches | train loss 0.3359711 +| epoch 8 | 2159/ 5600 batches | train loss 0.3951405 +| epoch 8 | 2163/ 5600 batches | train loss 0.3416860 +| epoch 8 | 2167/ 5600 batches | train loss 0.2712353 +| epoch 8 | 2171/ 5600 batches | train loss 0.3456111 +| epoch 8 | 2175/ 5600 batches | train loss 0.3774639 +| epoch 8 | 2179/ 5600 batches | train loss 0.2962033 +| epoch 8 | 2183/ 5600 batches | train loss 0.2458480 +| epoch 8 | 2187/ 5600 batches | train loss 0.3329026 +| epoch 8 | 2191/ 5600 batches | train loss 0.4203088 +| epoch 8 | 2195/ 5600 batches | train loss 0.3877231 +| epoch 8 | 2199/ 5600 batches | train loss 0.3754102 +| epoch 8 | 2203/ 5600 batches | train loss 0.3691493 +| epoch 8 | 2207/ 5600 batches | train loss 0.3334372 +| epoch 8 | 2211/ 5600 batches | train loss 0.3996028 +| epoch 8 | 2215/ 5600 batches | train loss 0.4375055 +| epoch 8 | 2219/ 5600 batches | train loss 0.3618586 +| epoch 8 | 2223/ 5600 batches | train loss 0.3794975 +| epoch 8 | 2227/ 5600 batches | train loss 0.3729252 +| epoch 8 | 2231/ 5600 batches | train loss 0.4061884 +| epoch 8 | 2235/ 5600 batches | train loss 0.3783416 +| epoch 8 | 2239/ 5600 batches | train loss 0.3281772 +| epoch 8 | 2243/ 5600 batches | train loss 0.3413420 +| epoch 8 | 2247/ 5600 batches | train loss 0.3470812 +| epoch 8 | 2251/ 5600 batches | train loss 0.3485375 +| epoch 8 | 2255/ 5600 batches | train loss 0.3532264 +| epoch 8 | 2259/ 5600 batches | train loss 0.3086235 +| epoch 8 | 2263/ 5600 batches | train loss 0.3351492 +| epoch 8 | 2267/ 5600 batches | train loss 0.3783345 +| epoch 8 | 2271/ 5600 batches | train loss 0.4365450 +| epoch 8 | 2275/ 5600 batches | train loss 0.3370606 +| epoch 8 | 2279/ 5600 batches | train loss 0.3270941 +| epoch 8 | 2283/ 5600 batches | train loss 0.3342254 +| epoch 8 | 2287/ 5600 batches | train loss 0.3888133 +| epoch 8 | 2291/ 5600 batches | train loss 0.3542823 +| epoch 8 | 2295/ 5600 batches | train loss 0.3155090 +| epoch 8 | 2299/ 5600 batches | train loss 0.3732846 +| epoch 8 | 2303/ 5600 batches | train loss 0.4128982 +| epoch 8 | 2307/ 5600 batches | train loss 0.4042597 +| epoch 8 | 2311/ 5600 batches | train loss 0.3342049 +| epoch 8 | 2315/ 5600 batches | train loss 0.3550937 +| epoch 8 | 2319/ 5600 batches | train loss 0.3188670 +| epoch 8 | 2323/ 5600 batches | train loss 0.4002334 +| epoch 8 | 2327/ 5600 batches | train loss 0.3795821 +| epoch 8 | 2331/ 5600 batches | train loss 0.3180647 +| epoch 8 | 2335/ 5600 batches | train loss 0.3516755 +| epoch 8 | 2339/ 5600 batches | train loss 0.4075138 +| epoch 8 | 2343/ 5600 batches | train loss 0.3383826 +| epoch 8 | 2347/ 5600 batches | train loss 0.3624750 +| epoch 8 | 2351/ 5600 batches | train loss 0.4014958 +| epoch 8 | 2355/ 5600 batches | train loss 0.3804178 +| epoch 8 | 2359/ 5600 batches | train loss 0.3550897 +| epoch 8 | 2363/ 5600 batches | train loss 0.3409372 +| epoch 8 | 2367/ 5600 batches | train loss 0.3201012 +| epoch 8 | 2371/ 5600 batches | train loss 0.3124856 +| epoch 8 | 2375/ 5600 batches | train loss 0.2830439 +| epoch 8 | 2379/ 5600 batches | train loss 0.3173652 +| epoch 8 | 2383/ 5600 batches | train loss 0.3425596 +| epoch 8 | 2387/ 5600 batches | train loss 0.3238038 +| epoch 8 | 2391/ 5600 batches | train loss 0.2997563 +| epoch 8 | 2395/ 5600 batches | train loss 0.4232106 +| epoch 8 | 2399/ 5600 batches | train loss 0.3674501 +| epoch 8 | 2403/ 5600 batches | train loss 0.3395944 +| epoch 8 | 2407/ 5600 batches | train loss 0.2688308 +| epoch 8 | 2411/ 5600 batches | train loss 0.2398922 +| epoch 8 | 2415/ 5600 batches | train loss 0.3588321 +| epoch 8 | 2419/ 5600 batches | train loss 0.3322935 +| epoch 8 | 2423/ 5600 batches | train loss 0.2792899 +| epoch 8 | 2427/ 5600 batches | train loss 0.3294291 +| epoch 8 | 2431/ 5600 batches | train loss 0.3373683 +| epoch 8 | 2435/ 5600 batches | train loss 0.3161229 +| epoch 8 | 2439/ 5600 batches | train loss 0.2957664 +| epoch 8 | 2443/ 5600 batches | train loss 0.3580084 +| epoch 8 | 2447/ 5600 batches | train loss 0.4155740 +| epoch 8 | 2451/ 5600 batches | train loss 0.3982071 +| epoch 8 | 2455/ 5600 batches | train loss 0.3743539 +| epoch 8 | 2459/ 5600 batches | train loss 0.3459399 +| epoch 8 | 2463/ 5600 batches | train loss 0.3879523 +| epoch 8 | 2467/ 5600 batches | train loss 0.3741347 +| epoch 8 | 2471/ 5600 batches | train loss 0.3397437 +| epoch 8 | 2475/ 5600 batches | train loss 0.3947105 +| epoch 8 | 2479/ 5600 batches | train loss 0.3259179 +| epoch 8 | 2483/ 5600 batches | train loss 0.3186331 +| epoch 8 | 2487/ 5600 batches | train loss 0.3217941 +| epoch 8 | 2491/ 5600 batches | train loss 0.3534662 +| epoch 8 | 2495/ 5600 batches | train loss 0.3403933 +| epoch 8 | 2499/ 5600 batches | train loss 0.3347774 +| epoch 8 | 2503/ 5600 batches | train loss 0.3778470 +| epoch 8 | 2507/ 5600 batches | train loss 0.3679260 +| epoch 8 | 2511/ 5600 batches | train loss 0.3390734 +| epoch 8 | 2515/ 5600 batches | train loss 0.3668888 +| epoch 8 | 2519/ 5600 batches | train loss 0.3276548 +| epoch 8 | 2523/ 5600 batches | train loss 0.3667926 +| epoch 8 | 2527/ 5600 batches | train loss 0.3325288 +| epoch 8 | 2531/ 5600 batches | train loss 0.4369392 +| epoch 8 | 2535/ 5600 batches | train loss 0.3118485 +| epoch 8 | 2539/ 5600 batches | train loss 0.4523866 +| epoch 8 | 2543/ 5600 batches | train loss 0.2099734 +| epoch 8 | 2547/ 5600 batches | train loss 0.3669854 +| epoch 8 | 2551/ 5600 batches | train loss 0.2953312 +| epoch 8 | 2555/ 5600 batches | train loss 0.3755158 +| epoch 8 | 2559/ 5600 batches | train loss 0.3215444 +| epoch 8 | 2563/ 5600 batches | train loss 0.3431503 +| epoch 8 | 2567/ 5600 batches | train loss 0.3561061 +| epoch 8 | 2571/ 5600 batches | train loss 0.3178464 +| epoch 8 | 2575/ 5600 batches | train loss 0.3515291 +| epoch 8 | 2579/ 5600 batches | train loss 0.2166456 +| epoch 8 | 2583/ 5600 batches | train loss 0.2837171 +| epoch 8 | 2587/ 5600 batches | train loss 0.4048408 +| epoch 8 | 2591/ 5600 batches | train loss 0.3503533 +| epoch 8 | 2595/ 5600 batches | train loss 0.4063611 +| epoch 8 | 2599/ 5600 batches | train loss 0.3398089 +| epoch 8 | 2603/ 5600 batches | train loss 0.3216599 +| epoch 8 | 2607/ 5600 batches | train loss 0.3204815 +| epoch 8 | 2611/ 5600 batches | train loss 0.3064049 +| epoch 8 | 2615/ 5600 batches | train loss 0.3517927 +| epoch 8 | 2619/ 5600 batches | train loss 0.3697749 +| epoch 8 | 2623/ 5600 batches | train loss 0.3836304 +| epoch 8 | 2627/ 5600 batches | train loss 0.3895268 +| epoch 8 | 2631/ 5600 batches | train loss 0.3857326 +| epoch 8 | 2635/ 5600 batches | train loss 0.3313888 +| epoch 8 | 2639/ 5600 batches | train loss 0.3417430 +| epoch 8 | 2643/ 5600 batches | train loss 0.3956648 +| epoch 8 | 2647/ 5600 batches | train loss 0.3677720 +| epoch 8 | 2651/ 5600 batches | train loss 0.3437261 +| epoch 8 | 2655/ 5600 batches | train loss 0.3337677 +| epoch 8 | 2659/ 5600 batches | train loss 0.4083827 +| epoch 8 | 2663/ 5600 batches | train loss 0.3109805 +| epoch 8 | 2667/ 5600 batches | train loss 0.4172810 +| epoch 8 | 2671/ 5600 batches | train loss 0.3009850 +| epoch 8 | 2675/ 5600 batches | train loss 0.3898995 +| epoch 8 | 2679/ 5600 batches | train loss 0.3125055 +| epoch 8 | 2683/ 5600 batches | train loss 0.3541363 +| epoch 8 | 2687/ 5600 batches | train loss 0.3390229 +| epoch 8 | 2691/ 5600 batches | train loss 0.3090237 +| epoch 8 | 2695/ 5600 batches | train loss 0.3649026 +| epoch 8 | 2699/ 5600 batches | train loss 0.3548975 +| epoch 8 | 2703/ 5600 batches | train loss 0.3645888 +| epoch 8 | 2707/ 5600 batches | train loss 0.3665122 +| epoch 8 | 2711/ 5600 batches | train loss 0.3460089 +| epoch 8 | 2715/ 5600 batches | train loss 0.3039702 +| epoch 8 | 2719/ 5600 batches | train loss 0.3548181 +| epoch 8 | 2723/ 5600 batches | train loss 0.3681338 +| epoch 8 | 2727/ 5600 batches | train loss 0.3850247 +| epoch 8 | 2731/ 5600 batches | train loss 0.3229258 +| epoch 8 | 2735/ 5600 batches | train loss 0.3702604 +| epoch 8 | 2739/ 5600 batches | train loss 0.3812917 +| epoch 8 | 2743/ 5600 batches | train loss 0.3507991 +| epoch 8 | 2747/ 5600 batches | train loss 0.3433825 +| epoch 8 | 2751/ 5600 batches | train loss 0.3691505 +| epoch 8 | 2755/ 5600 batches | train loss 0.4573630 +| epoch 8 | 2759/ 5600 batches | train loss 0.3523946 +| epoch 8 | 2763/ 5600 batches | train loss 0.3021964 +| epoch 8 | 2767/ 5600 batches | train loss 0.3373331 +| epoch 8 | 2771/ 5600 batches | train loss 0.3356165 +| epoch 8 | 2775/ 5600 batches | train loss 0.2633072 +| epoch 8 | 2779/ 5600 batches | train loss 0.3579253 +| epoch 8 | 2783/ 5600 batches | train loss 0.3302417 +| epoch 8 | 2787/ 5600 batches | train loss 0.3090740 +| epoch 8 | 2791/ 5600 batches | train loss 0.3506760 +| epoch 8 | 2795/ 5600 batches | train loss 0.2738613 +| epoch 8 | 2799/ 5600 batches | train loss 0.3730503 +| epoch 8 | 2803/ 5600 batches | train loss 0.3068840 +| epoch 8 | 2807/ 5600 batches | train loss 0.2252794 +| epoch 8 | 2811/ 5600 batches | train loss 0.4753630 +| epoch 8 | 2815/ 5600 batches | train loss 0.3331362 +| epoch 8 | 2819/ 5600 batches | train loss 0.3215451 +| epoch 8 | 2823/ 5600 batches | train loss 0.3132563 +| epoch 8 | 2827/ 5600 batches | train loss 0.3862973 +| epoch 8 | 2831/ 5600 batches | train loss 0.3222541 +| epoch 8 | 2835/ 5600 batches | train loss 0.3861114 +| epoch 8 | 2839/ 5600 batches | train loss 0.3256001 +| epoch 8 | 2843/ 5600 batches | train loss 0.4517073 +| epoch 8 | 2847/ 5600 batches | train loss 0.2775864 +| epoch 8 | 2851/ 5600 batches | train loss 0.3551657 +| epoch 8 | 2855/ 5600 batches | train loss 0.4036292 +| epoch 8 | 2859/ 5600 batches | train loss 0.3166123 +| epoch 8 | 2863/ 5600 batches | train loss 0.3034925 +| epoch 8 | 2867/ 5600 batches | train loss 0.2908567 +| epoch 8 | 2871/ 5600 batches | train loss 0.3796188 +| epoch 8 | 2875/ 5600 batches | train loss 0.2999584 +| epoch 8 | 2879/ 5600 batches | train loss 0.3493407 +| epoch 8 | 2883/ 5600 batches | train loss 0.2979370 +| epoch 8 | 2887/ 5600 batches | train loss 0.3558350 +| epoch 8 | 2891/ 5600 batches | train loss 0.3132796 +| epoch 8 | 2895/ 5600 batches | train loss 0.4089882 +| epoch 8 | 2899/ 5600 batches | train loss 0.3304482 +| epoch 8 | 2903/ 5600 batches | train loss 0.3400822 +| epoch 8 | 2907/ 5600 batches | train loss 0.3908193 +| epoch 8 | 2911/ 5600 batches | train loss 0.3237690 +| epoch 8 | 2915/ 5600 batches | train loss 0.3670986 +| epoch 8 | 2919/ 5600 batches | train loss 0.3138241 +| epoch 8 | 2923/ 5600 batches | train loss 0.4315542 +| epoch 8 | 2927/ 5600 batches | train loss 0.3540519 +| epoch 8 | 2931/ 5600 batches | train loss 0.2896301 +| epoch 8 | 2935/ 5600 batches | train loss 0.3001415 +| epoch 8 | 2939/ 5600 batches | train loss 0.3278053 +| epoch 8 | 2943/ 5600 batches | train loss 0.3615806 +| epoch 8 | 2947/ 5600 batches | train loss 0.3602934 +| epoch 8 | 2951/ 5600 batches | train loss 0.3380552 +| epoch 8 | 2955/ 5600 batches | train loss 0.3461793 +| epoch 8 | 2959/ 5600 batches | train loss 0.3991258 +| epoch 8 | 2963/ 5600 batches | train loss 0.3633433 +| epoch 8 | 2967/ 5600 batches | train loss 0.4227996 +| epoch 8 | 2971/ 5600 batches | train loss 0.3249204 +| epoch 8 | 2975/ 5600 batches | train loss 0.3263921 +| epoch 8 | 2979/ 5600 batches | train loss 0.3170900 +| epoch 8 | 2983/ 5600 batches | train loss 0.3342592 +| epoch 8 | 2987/ 5600 batches | train loss 0.3332052 +| epoch 8 | 2991/ 5600 batches | train loss 0.3353825 +| epoch 8 | 2995/ 5600 batches | train loss 0.3850167 +| epoch 8 | 2999/ 5600 batches | train loss 0.3893570 +| epoch 8 | 3003/ 5600 batches | train loss 0.3193003 +| epoch 8 | 3007/ 5600 batches | train loss 0.3236987 +| epoch 8 | 3011/ 5600 batches | train loss 0.2923441 +| epoch 8 | 3015/ 5600 batches | train loss 0.4120891 +| epoch 8 | 3019/ 5600 batches | train loss 0.3417971 +| epoch 8 | 3023/ 5600 batches | train loss 0.3185809 +| epoch 8 | 3027/ 5600 batches | train loss 0.3384395 +| epoch 8 | 3031/ 5600 batches | train loss 0.3653126 +| epoch 8 | 3035/ 5600 batches | train loss 0.3898303 +| epoch 8 | 3039/ 5600 batches | train loss 0.3159540 +| epoch 8 | 3043/ 5600 batches | train loss 0.3309414 +| epoch 8 | 3047/ 5600 batches | train loss 0.3407973 +| epoch 8 | 3051/ 5600 batches | train loss 0.3304997 +| epoch 8 | 3055/ 5600 batches | train loss 0.3944275 +| epoch 8 | 3059/ 5600 batches | train loss 0.3228630 +| epoch 8 | 3063/ 5600 batches | train loss 0.3872485 +| epoch 8 | 3067/ 5600 batches | train loss 0.3668491 +| epoch 8 | 3071/ 5600 batches | train loss 0.3727110 +| epoch 8 | 3075/ 5600 batches | train loss 0.3298720 +| epoch 8 | 3079/ 5600 batches | train loss 0.3417992 +| epoch 8 | 3083/ 5600 batches | train loss 0.3383968 +| epoch 8 | 3087/ 5600 batches | train loss 0.3669727 +| epoch 8 | 3091/ 5600 batches | train loss 0.3307358 +| epoch 8 | 3095/ 5600 batches | train loss 0.3724085 +| epoch 8 | 3099/ 5600 batches | train loss 0.3031510 +| epoch 8 | 3103/ 5600 batches | train loss 0.3692116 +| epoch 8 | 3107/ 5600 batches | train loss 0.3272256 +| epoch 8 | 3111/ 5600 batches | train loss 0.3308234 +| epoch 8 | 3115/ 5600 batches | train loss 0.3591089 +| epoch 8 | 3119/ 5600 batches | train loss 0.4407795 +| epoch 8 | 3123/ 5600 batches | train loss 0.4585930 +| epoch 8 | 3127/ 5600 batches | train loss 0.2968753 +| epoch 8 | 3131/ 5600 batches | train loss 0.4235828 +| epoch 8 | 3135/ 5600 batches | train loss 0.3374595 +| epoch 8 | 3139/ 5600 batches | train loss 0.3567488 +| epoch 8 | 3143/ 5600 batches | train loss 0.4770285 +| epoch 8 | 3147/ 5600 batches | train loss 0.3250223 +| epoch 8 | 3151/ 5600 batches | train loss 0.2871263 +| epoch 8 | 3155/ 5600 batches | train loss 0.3469388 +| epoch 8 | 3159/ 5600 batches | train loss 0.3561026 +| epoch 8 | 3163/ 5600 batches | train loss 0.3011540 +| epoch 8 | 3167/ 5600 batches | train loss 0.3123939 +| epoch 8 | 3171/ 5600 batches | train loss 0.3754793 +| epoch 8 | 3175/ 5600 batches | train loss 0.3445449 +| epoch 8 | 3179/ 5600 batches | train loss 0.3327436 +| epoch 8 | 3183/ 5600 batches | train loss 0.2502331 +| epoch 8 | 3187/ 5600 batches | train loss 0.3789908 +| epoch 8 | 3191/ 5600 batches | train loss 0.3619000 +| epoch 8 | 3195/ 5600 batches | train loss 0.4560224 +| epoch 8 | 3199/ 5600 batches | train loss 0.3697336 +| epoch 8 | 3203/ 5600 batches | train loss 0.3254073 +| epoch 8 | 3207/ 5600 batches | train loss 0.3769773 +| epoch 8 | 3211/ 5600 batches | train loss 0.3480548 +| epoch 8 | 3215/ 5600 batches | train loss 0.3244764 +| epoch 8 | 3219/ 5600 batches | train loss 0.3221501 +| epoch 8 | 3223/ 5600 batches | train loss 0.4060114 +| epoch 8 | 3227/ 5600 batches | train loss 0.3374300 +| epoch 8 | 3231/ 5600 batches | train loss 0.3838191 +| epoch 8 | 3235/ 5600 batches | train loss 0.3658464 +| epoch 8 | 3239/ 5600 batches | train loss 0.3387128 +| epoch 8 | 3243/ 5600 batches | train loss 0.2941683 +| epoch 8 | 3247/ 5600 batches | train loss 0.3725139 +| epoch 8 | 3251/ 5600 batches | train loss 0.3431405 +| epoch 8 | 3255/ 5600 batches | train loss 0.2998764 +| epoch 8 | 3259/ 5600 batches | train loss 0.3637199 +| epoch 8 | 3263/ 5600 batches | train loss 0.3811292 +| epoch 8 | 3267/ 5600 batches | train loss 0.4052551 +| epoch 8 | 3271/ 5600 batches | train loss 0.3780178 +| epoch 8 | 3275/ 5600 batches | train loss 0.3687416 +| epoch 8 | 3279/ 5600 batches | train loss 0.3998206 +| epoch 8 | 3283/ 5600 batches | train loss 0.3633361 +| epoch 8 | 3287/ 5600 batches | train loss 0.3296084 +| epoch 8 | 3291/ 5600 batches | train loss 0.3496264 +| epoch 8 | 3295/ 5600 batches | train loss 0.3895026 +| epoch 8 | 3299/ 5600 batches | train loss 0.3679961 +| epoch 8 | 3303/ 5600 batches | train loss 0.2825218 +| epoch 8 | 3307/ 5600 batches | train loss 0.2994327 +| epoch 8 | 3311/ 5600 batches | train loss 0.3517397 +| epoch 8 | 3315/ 5600 batches | train loss 0.2260707 +| epoch 8 | 3319/ 5600 batches | train loss 0.3376395 +| epoch 8 | 3323/ 5600 batches | train loss 0.4004233 +| epoch 8 | 3327/ 5600 batches | train loss 0.3190268 +| epoch 8 | 3331/ 5600 batches | train loss 0.4194438 +| epoch 8 | 3335/ 5600 batches | train loss 0.2968770 +| epoch 8 | 3339/ 5600 batches | train loss 0.3961808 +| epoch 8 | 3343/ 5600 batches | train loss 0.3219279 +| epoch 8 | 3347/ 5600 batches | train loss 0.3735985 +| epoch 8 | 3351/ 5600 batches | train loss 0.4484934 +| epoch 8 | 3355/ 5600 batches | train loss 0.3392619 +| epoch 8 | 3359/ 5600 batches | train loss 0.3460740 +| epoch 8 | 3363/ 5600 batches | train loss 0.3442410 +| epoch 8 | 3367/ 5600 batches | train loss 0.3141035 +| epoch 8 | 3371/ 5600 batches | train loss 0.3682155 +| epoch 8 | 3375/ 5600 batches | train loss 0.3101156 +| epoch 8 | 3379/ 5600 batches | train loss 0.4278786 +| epoch 8 | 3383/ 5600 batches | train loss 0.2914357 +| epoch 8 | 3387/ 5600 batches | train loss 0.3511961 +| epoch 8 | 3391/ 5600 batches | train loss 0.2688393 +| epoch 8 | 3395/ 5600 batches | train loss 0.3220528 +| epoch 8 | 3399/ 5600 batches | train loss 0.2894909 +| epoch 8 | 3403/ 5600 batches | train loss 0.3744872 +| epoch 8 | 3407/ 5600 batches | train loss 0.3808175 +| epoch 8 | 3411/ 5600 batches | train loss 0.3565167 +| epoch 8 | 3415/ 5600 batches | train loss 0.3768206 +| epoch 8 | 3419/ 5600 batches | train loss 0.3510269 +| epoch 8 | 3423/ 5600 batches | train loss 0.3559255 +| epoch 8 | 3427/ 5600 batches | train loss 0.3479547 +| epoch 8 | 3431/ 5600 batches | train loss 0.3966819 +| epoch 8 | 3435/ 5600 batches | train loss 0.3686974 +| epoch 8 | 3439/ 5600 batches | train loss 0.3626385 +| epoch 8 | 3443/ 5600 batches | train loss 0.3730421 +| epoch 8 | 3447/ 5600 batches | train loss 0.3212143 +| epoch 8 | 3451/ 5600 batches | train loss 0.3776993 +| epoch 8 | 3455/ 5600 batches | train loss 0.3749448 +| epoch 8 | 3459/ 5600 batches | train loss 0.4417913 +| epoch 8 | 3463/ 5600 batches | train loss 0.3562756 +| epoch 8 | 3467/ 5600 batches | train loss 0.3696118 +| epoch 8 | 3471/ 5600 batches | train loss 0.3232201 +| epoch 8 | 3475/ 5600 batches | train loss 0.4388078 +| epoch 8 | 3479/ 5600 batches | train loss 0.3637015 +| epoch 8 | 3483/ 5600 batches | train loss 0.3411171 +| epoch 8 | 3487/ 5600 batches | train loss 0.3822238 +| epoch 8 | 3491/ 5600 batches | train loss 0.4265127 +| epoch 8 | 3495/ 5600 batches | train loss 0.2937031 +| epoch 8 | 3499/ 5600 batches | train loss 0.3785724 +| epoch 8 | 3503/ 5600 batches | train loss 0.3131010 +| epoch 8 | 3507/ 5600 batches | train loss 0.3390577 +| epoch 8 | 3511/ 5600 batches | train loss 0.3509695 +| epoch 8 | 3515/ 5600 batches | train loss 0.3485587 +| epoch 8 | 3519/ 5600 batches | train loss 0.3484511 +| epoch 8 | 3523/ 5600 batches | train loss 0.3320968 +| epoch 8 | 3527/ 5600 batches | train loss 0.3115597 +| epoch 8 | 3531/ 5600 batches | train loss 0.3772006 +| epoch 8 | 3535/ 5600 batches | train loss 0.3286084 +| epoch 8 | 3539/ 5600 batches | train loss 0.3314056 +| epoch 8 | 3543/ 5600 batches | train loss 0.4156208 +| epoch 8 | 3547/ 5600 batches | train loss 0.3325690 +| epoch 8 | 3551/ 5600 batches | train loss 0.3217108 +| epoch 8 | 3555/ 5600 batches | train loss 0.3282281 +| epoch 8 | 3559/ 5600 batches | train loss 0.3176884 +| epoch 8 | 3563/ 5600 batches | train loss 0.3492224 +| epoch 8 | 3567/ 5600 batches | train loss 0.3041193 +| epoch 8 | 3571/ 5600 batches | train loss 0.3335324 +| epoch 8 | 3575/ 5600 batches | train loss 0.3651988 +| epoch 8 | 3579/ 5600 batches | train loss 0.4133594 +| epoch 8 | 3583/ 5600 batches | train loss 0.3422770 +| epoch 8 | 3587/ 5600 batches | train loss 0.3289266 +| epoch 8 | 3591/ 5600 batches | train loss 0.3563580 +| epoch 8 | 3595/ 5600 batches | train loss 0.3356191 +| epoch 8 | 3599/ 5600 batches | train loss 0.3449802 +| epoch 8 | 3603/ 5600 batches | train loss 0.3830374 +| epoch 8 | 3607/ 5600 batches | train loss 0.2811761 +| epoch 8 | 3611/ 5600 batches | train loss 0.3567701 +| epoch 8 | 3615/ 5600 batches | train loss 0.4259652 +| epoch 8 | 3619/ 5600 batches | train loss 0.3755444 +| epoch 8 | 3623/ 5600 batches | train loss 0.3426752 +| epoch 8 | 3627/ 5600 batches | train loss 0.3481170 +| epoch 8 | 3631/ 5600 batches | train loss 0.3642135 +| epoch 8 | 3635/ 5600 batches | train loss 0.3243414 +| epoch 8 | 3639/ 5600 batches | train loss 0.3235333 +| epoch 8 | 3643/ 5600 batches | train loss 0.2975058 +| epoch 8 | 3647/ 5600 batches | train loss 0.3773004 +| epoch 8 | 3651/ 5600 batches | train loss 0.3319033 +| epoch 8 | 3655/ 5600 batches | train loss 0.3873119 +| epoch 8 | 3659/ 5600 batches | train loss 0.3451003 +| epoch 8 | 3663/ 5600 batches | train loss 0.3550518 +| epoch 8 | 3667/ 5600 batches | train loss 0.3109687 +| epoch 8 | 3671/ 5600 batches | train loss 0.3295439 +| epoch 8 | 3675/ 5600 batches | train loss 0.3140360 +| epoch 8 | 3679/ 5600 batches | train loss 0.3492830 +| epoch 8 | 3683/ 5600 batches | train loss 0.4232488 +| epoch 8 | 3687/ 5600 batches | train loss 0.2920081 +| epoch 8 | 3691/ 5600 batches | train loss 0.3281443 +| epoch 8 | 3695/ 5600 batches | train loss 0.3532651 +| epoch 8 | 3699/ 5600 batches | train loss 0.3826680 +| epoch 8 | 3703/ 5600 batches | train loss 0.3312410 +| epoch 8 | 3707/ 5600 batches | train loss 0.3150530 +| epoch 8 | 3711/ 5600 batches | train loss 0.3456237 +| epoch 8 | 3715/ 5600 batches | train loss 0.3641961 +| epoch 8 | 3719/ 5600 batches | train loss 0.3237756 +| epoch 8 | 3723/ 5600 batches | train loss 0.3475912 +| epoch 8 | 3727/ 5600 batches | train loss 0.3423748 +| epoch 8 | 3731/ 5600 batches | train loss 0.3529656 +| epoch 8 | 3735/ 5600 batches | train loss 0.3090967 +| epoch 8 | 3739/ 5600 batches | train loss 0.4142061 +| epoch 8 | 3743/ 5600 batches | train loss 0.3078562 +| epoch 8 | 3747/ 5600 batches | train loss 0.3512374 +| epoch 8 | 3751/ 5600 batches | train loss 0.3329345 +| epoch 8 | 3755/ 5600 batches | train loss 0.3545897 +| epoch 8 | 3759/ 5600 batches | train loss 0.3340772 +| epoch 8 | 3763/ 5600 batches | train loss 0.3266129 +| epoch 8 | 3767/ 5600 batches | train loss 0.2811553 +| epoch 8 | 3771/ 5600 batches | train loss 0.3693978 +| epoch 8 | 3775/ 5600 batches | train loss 0.3686687 +| epoch 8 | 3779/ 5600 batches | train loss 0.2890024 +| epoch 8 | 3783/ 5600 batches | train loss 0.3624594 +| epoch 8 | 3787/ 5600 batches | train loss 0.4012014 +| epoch 8 | 3791/ 5600 batches | train loss 0.3079855 +| epoch 8 | 3795/ 5600 batches | train loss 0.3550216 +| epoch 8 | 3799/ 5600 batches | train loss 0.3015243 +| epoch 8 | 3803/ 5600 batches | train loss 0.3370915 +| epoch 8 | 3807/ 5600 batches | train loss 0.3102927 +| epoch 8 | 3811/ 5600 batches | train loss 0.3480705 +| epoch 8 | 3815/ 5600 batches | train loss 0.3428533 +| epoch 8 | 3819/ 5600 batches | train loss 0.3704146 +| epoch 8 | 3823/ 5600 batches | train loss 0.3557712 +| epoch 8 | 3827/ 5600 batches | train loss 0.3842923 +| epoch 8 | 3831/ 5600 batches | train loss 0.3056630 +| epoch 8 | 3835/ 5600 batches | train loss 0.3333551 +| epoch 8 | 3839/ 5600 batches | train loss 0.3091437 +| epoch 8 | 3843/ 5600 batches | train loss 0.3737761 +| epoch 8 | 3847/ 5600 batches | train loss 0.2829327 +| epoch 8 | 3851/ 5600 batches | train loss 0.4056454 +| epoch 8 | 3855/ 5600 batches | train loss 0.3764995 +| epoch 8 | 3859/ 5600 batches | train loss 0.3510673 +| epoch 8 | 3863/ 5600 batches | train loss 0.3812419 +| epoch 8 | 3867/ 5600 batches | train loss 0.3497425 +| epoch 8 | 3871/ 5600 batches | train loss 0.3673264 +| epoch 8 | 3875/ 5600 batches | train loss 0.3511194 +| epoch 8 | 3879/ 5600 batches | train loss 0.3719861 +| epoch 8 | 3883/ 5600 batches | train loss 0.3771294 +| epoch 8 | 3887/ 5600 batches | train loss 0.3425441 +| epoch 8 | 3891/ 5600 batches | train loss 0.3269100 +| epoch 8 | 3895/ 5600 batches | train loss 0.3225534 +| epoch 8 | 3899/ 5600 batches | train loss 0.3479109 +| epoch 8 | 3903/ 5600 batches | train loss 0.3577648 +| epoch 8 | 3907/ 5600 batches | train loss 0.3499870 +| epoch 8 | 3911/ 5600 batches | train loss 0.3835933 +| epoch 8 | 3915/ 5600 batches | train loss 0.3767914 +| epoch 8 | 3919/ 5600 batches | train loss 0.4488533 +| epoch 8 | 3923/ 5600 batches | train loss 0.3606747 +| epoch 8 | 3927/ 5600 batches | train loss 0.3984536 +| epoch 8 | 3931/ 5600 batches | train loss 0.3385131 +| epoch 8 | 3935/ 5600 batches | train loss 0.3439686 +| epoch 8 | 3939/ 5600 batches | train loss 0.3574586 +| epoch 8 | 3943/ 5600 batches | train loss 0.3329778 +| epoch 8 | 3947/ 5600 batches | train loss 0.2821759 +| epoch 8 | 3951/ 5600 batches | train loss 0.3436277 +| epoch 8 | 3955/ 5600 batches | train loss 0.4169033 +| epoch 8 | 3959/ 5600 batches | train loss 0.3816245 +| epoch 8 | 3963/ 5600 batches | train loss 0.3356374 +| epoch 8 | 3967/ 5600 batches | train loss 0.3528453 +| epoch 8 | 3971/ 5600 batches | train loss 0.3334706 +| epoch 8 | 3975/ 5600 batches | train loss 0.3600785 +| epoch 8 | 3979/ 5600 batches | train loss 0.4067553 +| epoch 8 | 3983/ 5600 batches | train loss 0.3796853 +| epoch 8 | 3987/ 5600 batches | train loss 0.3331394 +| epoch 8 | 3991/ 5600 batches | train loss 0.4259502 +| epoch 8 | 3995/ 5600 batches | train loss 0.3026867 +| epoch 8 | 3999/ 5600 batches | train loss 0.3422333 +| epoch 8 | 4003/ 5600 batches | train loss 0.3210621 +| epoch 8 | 4007/ 5600 batches | train loss 0.3833211 +| epoch 8 | 4011/ 5600 batches | train loss 0.3722623 +| epoch 8 | 4015/ 5600 batches | train loss 0.4218141 +| epoch 8 | 4019/ 5600 batches | train loss 0.3479679 +| epoch 8 | 4023/ 5600 batches | train loss 0.3192444 +| epoch 8 | 4027/ 5600 batches | train loss 0.3650165 +| epoch 8 | 4031/ 5600 batches | train loss 0.3689080 +| epoch 8 | 4035/ 5600 batches | train loss 0.3453739 +| epoch 8 | 4039/ 5600 batches | train loss 0.3500668 +| epoch 8 | 4043/ 5600 batches | train loss 0.4124717 +| epoch 8 | 4047/ 5600 batches | train loss 0.3439058 +| epoch 8 | 4051/ 5600 batches | train loss 0.3308716 +| epoch 8 | 4055/ 5600 batches | train loss 0.3684211 +| epoch 8 | 4059/ 5600 batches | train loss 0.3404914 +| epoch 8 | 4063/ 5600 batches | train loss 0.3116687 +| epoch 8 | 4067/ 5600 batches | train loss 0.3263351 +| epoch 8 | 4071/ 5600 batches | train loss 0.3738816 +| epoch 8 | 4075/ 5600 batches | train loss 0.4144620 +| epoch 8 | 4079/ 5600 batches | train loss 0.2570811 +| epoch 8 | 4083/ 5600 batches | train loss 0.3609521 +| epoch 8 | 4087/ 5600 batches | train loss 0.3471200 +| epoch 8 | 4091/ 5600 batches | train loss 0.3562564 +| epoch 8 | 4095/ 5600 batches | train loss 0.3842279 +| epoch 8 | 4099/ 5600 batches | train loss 0.3067971 +| epoch 8 | 4103/ 5600 batches | train loss 0.3462693 +| epoch 8 | 4107/ 5600 batches | train loss 0.3929632 +| epoch 8 | 4111/ 5600 batches | train loss 0.2859584 +| epoch 8 | 4115/ 5600 batches | train loss 0.3158763 +| epoch 8 | 4119/ 5600 batches | train loss 0.3441809 +| epoch 8 | 4123/ 5600 batches | train loss 0.2972893 +| epoch 8 | 4127/ 5600 batches | train loss 0.3327449 +| epoch 8 | 4131/ 5600 batches | train loss 0.4173377 +| epoch 8 | 4135/ 5600 batches | train loss 0.2331268 +| epoch 8 | 4139/ 5600 batches | train loss 0.2978200 +| epoch 8 | 4143/ 5600 batches | train loss 0.3720307 +| epoch 8 | 4147/ 5600 batches | train loss 0.3937272 +| epoch 8 | 4151/ 5600 batches | train loss 0.3292419 +| epoch 8 | 4155/ 5600 batches | train loss 0.3489922 +| epoch 8 | 4159/ 5600 batches | train loss 0.4092543 +| epoch 8 | 4163/ 5600 batches | train loss 0.3244359 +| epoch 8 | 4167/ 5600 batches | train loss 0.3260838 +| epoch 8 | 4171/ 5600 batches | train loss 0.3685410 +| epoch 8 | 4175/ 5600 batches | train loss 0.3181490 +| epoch 8 | 4179/ 5600 batches | train loss 0.3612379 +| epoch 8 | 4183/ 5600 batches | train loss 0.2878973 +| epoch 8 | 4187/ 5600 batches | train loss 0.3527525 +| epoch 8 | 4191/ 5600 batches | train loss 0.2946925 +| epoch 8 | 4195/ 5600 batches | train loss 0.2928987 +| epoch 8 | 4199/ 5600 batches | train loss 0.3588949 +| epoch 8 | 4203/ 5600 batches | train loss 0.3146937 +| epoch 8 | 4207/ 5600 batches | train loss 0.3150012 +| epoch 8 | 4211/ 5600 batches | train loss 0.3446786 +| epoch 8 | 4215/ 5600 batches | train loss 0.3768963 +| epoch 8 | 4219/ 5600 batches | train loss 0.3920797 +| epoch 8 | 4223/ 5600 batches | train loss 0.3581339 +| epoch 8 | 4227/ 5600 batches | train loss 0.3719835 +| epoch 8 | 4231/ 5600 batches | train loss 0.3466119 +| epoch 8 | 4235/ 5600 batches | train loss 0.2924424 +| epoch 8 | 4239/ 5600 batches | train loss 0.3719926 +| epoch 8 | 4243/ 5600 batches | train loss 0.3292026 +| epoch 8 | 4247/ 5600 batches | train loss 0.3642904 +| epoch 8 | 4251/ 5600 batches | train loss 0.3242735 +| epoch 8 | 4255/ 5600 batches | train loss 0.3786264 +| epoch 8 | 4259/ 5600 batches | train loss 0.3448424 +| epoch 8 | 4263/ 5600 batches | train loss 0.3530076 +| epoch 8 | 4267/ 5600 batches | train loss 0.3228444 +| epoch 8 | 4271/ 5600 batches | train loss 0.3286686 +| epoch 8 | 4275/ 5600 batches | train loss 0.3862036 +| epoch 8 | 4279/ 5600 batches | train loss 0.2705849 +| epoch 8 | 4283/ 5600 batches | train loss 0.3144168 +| epoch 8 | 4287/ 5600 batches | train loss 0.3332759 +| epoch 8 | 4291/ 5600 batches | train loss 0.3900702 +| epoch 8 | 4295/ 5600 batches | train loss 0.3826387 +| epoch 8 | 4299/ 5600 batches | train loss 0.3121100 +| epoch 8 | 4303/ 5600 batches | train loss 0.3676581 +| epoch 8 | 4307/ 5600 batches | train loss 0.2010131 +| epoch 8 | 4311/ 5600 batches | train loss 0.3325714 +| epoch 8 | 4315/ 5600 batches | train loss 0.3087584 +| epoch 8 | 4319/ 5600 batches | train loss 0.4032777 +| epoch 8 | 4323/ 5600 batches | train loss 0.3718846 +| epoch 8 | 4327/ 5600 batches | train loss 0.3929206 +| epoch 8 | 4331/ 5600 batches | train loss 0.2759389 +| epoch 8 | 4335/ 5600 batches | train loss 0.3280182 +| epoch 8 | 4339/ 5600 batches | train loss 0.3681084 +| epoch 8 | 4343/ 5600 batches | train loss 0.3168678 +| epoch 8 | 4347/ 5600 batches | train loss 0.3572648 +| epoch 8 | 4351/ 5600 batches | train loss 0.3734245 +| epoch 8 | 4355/ 5600 batches | train loss 0.3138745 +| epoch 8 | 4359/ 5600 batches | train loss 0.3254492 +| epoch 8 | 4363/ 5600 batches | train loss 0.3698724 +| epoch 8 | 4367/ 5600 batches | train loss 0.3400300 +| epoch 8 | 4371/ 5600 batches | train loss 0.3536539 +| epoch 8 | 4375/ 5600 batches | train loss 0.3616342 +| epoch 8 | 4379/ 5600 batches | train loss 0.2961201 +| epoch 8 | 4383/ 5600 batches | train loss 0.3503606 +| epoch 8 | 4387/ 5600 batches | train loss 0.3859326 +| epoch 8 | 4391/ 5600 batches | train loss 0.2971314 +| epoch 8 | 4395/ 5600 batches | train loss 0.3098270 +| epoch 8 | 4399/ 5600 batches | train loss 0.3546820 +| epoch 8 | 4403/ 5600 batches | train loss 0.2913584 +| epoch 8 | 4407/ 5600 batches | train loss 0.4082670 +| epoch 8 | 4411/ 5600 batches | train loss 0.3234899 +| epoch 8 | 4415/ 5600 batches | train loss 0.3413060 +| epoch 8 | 4419/ 5600 batches | train loss 0.3504664 +| epoch 8 | 4423/ 5600 batches | train loss 0.2650971 +| epoch 8 | 4427/ 5600 batches | train loss 0.3136337 +| epoch 8 | 4431/ 5600 batches | train loss 0.2677084 +| epoch 8 | 4435/ 5600 batches | train loss 0.3870346 +| epoch 8 | 4439/ 5600 batches | train loss 0.3663051 +| epoch 8 | 4443/ 5600 batches | train loss 0.3336428 +| epoch 8 | 4447/ 5600 batches | train loss 0.3756761 +| epoch 8 | 4451/ 5600 batches | train loss 0.3500215 +| epoch 8 | 4455/ 5600 batches | train loss 0.4012411 +| epoch 8 | 4459/ 5600 batches | train loss 0.3485675 +| epoch 8 | 4463/ 5600 batches | train loss 0.3707553 +| epoch 8 | 4467/ 5600 batches | train loss 0.3637795 +| epoch 8 | 4471/ 5600 batches | train loss 0.3898550 +| epoch 8 | 4475/ 5600 batches | train loss 0.3903395 +| epoch 8 | 4479/ 5600 batches | train loss 0.3241454 +| epoch 8 | 4483/ 5600 batches | train loss 0.3436964 +| epoch 8 | 4487/ 5600 batches | train loss 0.3553465 +| epoch 8 | 4491/ 5600 batches | train loss 0.3935685 +| epoch 8 | 4495/ 5600 batches | train loss 0.3346425 +| epoch 8 | 4499/ 5600 batches | train loss 0.3259483 +| epoch 8 | 4503/ 5600 batches | train loss 0.3360519 +| epoch 8 | 4507/ 5600 batches | train loss 0.2839260 +| epoch 8 | 4511/ 5600 batches | train loss 0.3139934 +| epoch 8 | 4515/ 5600 batches | train loss 0.3303615 +| epoch 8 | 4519/ 5600 batches | train loss 0.3305010 +| epoch 8 | 4523/ 5600 batches | train loss 0.3881808 +| epoch 8 | 4527/ 5600 batches | train loss 0.3580593 +| epoch 8 | 4531/ 5600 batches | train loss 0.3432578 +| epoch 8 | 4535/ 5600 batches | train loss 0.3530363 +| epoch 8 | 4539/ 5600 batches | train loss 0.3515610 +| epoch 8 | 4543/ 5600 batches | train loss 0.4136574 +| epoch 8 | 4547/ 5600 batches | train loss 0.3630876 +| epoch 8 | 4551/ 5600 batches | train loss 0.3394401 +| epoch 8 | 4555/ 5600 batches | train loss 0.3431388 +| epoch 8 | 4559/ 5600 batches | train loss 0.3234268 +| epoch 8 | 4563/ 5600 batches | train loss 0.4025089 +| epoch 8 | 4567/ 5600 batches | train loss 0.3550507 +| epoch 8 | 4571/ 5600 batches | train loss 0.3522251 +| epoch 8 | 4575/ 5600 batches | train loss 0.3652306 +| epoch 8 | 4579/ 5600 batches | train loss 0.3127237 +| epoch 8 | 4583/ 5600 batches | train loss 0.3532553 +| epoch 8 | 4587/ 5600 batches | train loss 0.3584576 +| epoch 8 | 4591/ 5600 batches | train loss 0.3853280 +| epoch 8 | 4595/ 5600 batches | train loss 0.3299572 +| epoch 8 | 4599/ 5600 batches | train loss 0.3150872 +| epoch 8 | 4603/ 5600 batches | train loss 0.3826267 +| epoch 8 | 4607/ 5600 batches | train loss 0.3955613 +| epoch 8 | 4611/ 5600 batches | train loss 0.3236666 +| epoch 8 | 4615/ 5600 batches | train loss 0.3171956 +| epoch 8 | 4619/ 5600 batches | train loss 0.3474676 +| epoch 8 | 4623/ 5600 batches | train loss 0.3508629 +| epoch 8 | 4627/ 5600 batches | train loss 0.4145809 +| epoch 8 | 4631/ 5600 batches | train loss 0.3824740 +| epoch 8 | 4635/ 5600 batches | train loss 0.3132081 +| epoch 8 | 4639/ 5600 batches | train loss 0.3676400 +| epoch 8 | 4643/ 5600 batches | train loss 0.3641130 +| epoch 8 | 4647/ 5600 batches | train loss 0.3159288 +| epoch 8 | 4651/ 5600 batches | train loss 0.3649838 +| epoch 8 | 4655/ 5600 batches | train loss 0.3376710 +| epoch 8 | 4659/ 5600 batches | train loss 0.3176859 +| epoch 8 | 4663/ 5600 batches | train loss 0.3310208 +| epoch 8 | 4667/ 5600 batches | train loss 0.3498044 +| epoch 8 | 4671/ 5600 batches | train loss 0.3612591 +| epoch 8 | 4675/ 5600 batches | train loss 0.3422916 +| epoch 8 | 4679/ 5600 batches | train loss 0.3957741 +| epoch 8 | 4683/ 5600 batches | train loss 0.3987328 +| epoch 8 | 4687/ 5600 batches | train loss 0.2471949 +| epoch 8 | 4691/ 5600 batches | train loss 0.3657398 +| epoch 8 | 4695/ 5600 batches | train loss 0.3351736 +| epoch 8 | 4699/ 5600 batches | train loss 0.2786321 +| epoch 8 | 4703/ 5600 batches | train loss 0.3562191 +| epoch 8 | 4707/ 5600 batches | train loss 0.2889407 +| epoch 8 | 4711/ 5600 batches | train loss 0.3890505 +| epoch 8 | 4715/ 5600 batches | train loss 0.3174438 +| epoch 8 | 4719/ 5600 batches | train loss 0.3888835 +| epoch 8 | 4723/ 5600 batches | train loss 0.3586978 +| epoch 8 | 4727/ 5600 batches | train loss 0.4503573 +| epoch 8 | 4731/ 5600 batches | train loss 0.2179627 +| epoch 8 | 4735/ 5600 batches | train loss 0.3576068 +| epoch 8 | 4739/ 5600 batches | train loss 0.3102462 +| epoch 8 | 4743/ 5600 batches | train loss 0.3890409 +| epoch 8 | 4747/ 5600 batches | train loss 0.3922750 +| epoch 8 | 4751/ 5600 batches | train loss 0.3710638 +| epoch 8 | 4755/ 5600 batches | train loss 0.3559209 +| epoch 8 | 4759/ 5600 batches | train loss 0.3291644 +| epoch 8 | 4763/ 5600 batches | train loss 0.3585994 +| epoch 8 | 4767/ 5600 batches | train loss 0.3359195 +| epoch 8 | 4771/ 5600 batches | train loss 0.2919022 +| epoch 8 | 4775/ 5600 batches | train loss 0.2998709 +| epoch 8 | 4779/ 5600 batches | train loss 0.3614404 +| epoch 8 | 4783/ 5600 batches | train loss 0.3440903 +| epoch 8 | 4787/ 5600 batches | train loss 0.2896852 +| epoch 8 | 4791/ 5600 batches | train loss 0.3285445 +| epoch 8 | 4795/ 5600 batches | train loss 0.3484817 +| epoch 8 | 4799/ 5600 batches | train loss 0.3550860 +| epoch 8 | 4803/ 5600 batches | train loss 0.2695768 +| epoch 8 | 4807/ 5600 batches | train loss 0.3498489 +| epoch 8 | 4811/ 5600 batches | train loss 0.3008820 +| epoch 8 | 4815/ 5600 batches | train loss 0.3822088 +| epoch 8 | 4819/ 5600 batches | train loss 0.4020758 +| epoch 8 | 4823/ 5600 batches | train loss 0.3097131 +| epoch 8 | 4827/ 5600 batches | train loss 0.3436475 +| epoch 8 | 4831/ 5600 batches | train loss 0.3446995 +| epoch 8 | 4835/ 5600 batches | train loss 0.3972546 +| epoch 8 | 4839/ 5600 batches | train loss 0.3757545 +| epoch 8 | 4843/ 5600 batches | train loss 0.3191923 +| epoch 8 | 4847/ 5600 batches | train loss 0.3013672 +| epoch 8 | 4851/ 5600 batches | train loss 0.3011397 +| epoch 8 | 4855/ 5600 batches | train loss 0.2002983 +| epoch 8 | 4859/ 5600 batches | train loss 0.3670360 +| epoch 8 | 4863/ 5600 batches | train loss 0.3608828 +| epoch 8 | 4867/ 5600 batches | train loss 0.3534175 +| epoch 8 | 4871/ 5600 batches | train loss 0.4050441 +| epoch 8 | 4875/ 5600 batches | train loss 0.3565027 +| epoch 8 | 4879/ 5600 batches | train loss 0.3626381 +| epoch 8 | 4883/ 5600 batches | train loss 0.2870679 +| epoch 8 | 4887/ 5600 batches | train loss 0.3369991 +| epoch 8 | 4891/ 5600 batches | train loss 0.3330669 +| epoch 8 | 4895/ 5600 batches | train loss 0.3168057 +| epoch 8 | 4899/ 5600 batches | train loss 0.3554310 +| epoch 8 | 4903/ 5600 batches | train loss 0.3167742 +| epoch 8 | 4907/ 5600 batches | train loss 0.4140144 +| epoch 8 | 4911/ 5600 batches | train loss 0.4031419 +| epoch 8 | 4915/ 5600 batches | train loss 0.3507979 +| epoch 8 | 4919/ 5600 batches | train loss 0.3436790 +| epoch 8 | 4923/ 5600 batches | train loss 0.3648468 +| epoch 8 | 4927/ 5600 batches | train loss 0.3147230 +| epoch 8 | 4931/ 5600 batches | train loss 0.3306178 +| epoch 8 | 4935/ 5600 batches | train loss 0.3395375 +| epoch 8 | 4939/ 5600 batches | train loss 0.3563065 +| epoch 8 | 4943/ 5600 batches | train loss 0.4028107 +| epoch 8 | 4947/ 5600 batches | train loss 0.4448166 +| epoch 8 | 4951/ 5600 batches | train loss 0.3865221 +| epoch 8 | 4955/ 5600 batches | train loss 0.3740520 +| epoch 8 | 4959/ 5600 batches | train loss 0.3931072 +| epoch 8 | 4963/ 5600 batches | train loss 0.3205328 +| epoch 8 | 4967/ 5600 batches | train loss 0.3232087 +| epoch 8 | 4971/ 5600 batches | train loss 0.3565584 +| epoch 8 | 4975/ 5600 batches | train loss 0.3863223 +| epoch 8 | 4979/ 5600 batches | train loss 0.3207275 +| epoch 8 | 4983/ 5600 batches | train loss 0.3533298 +| epoch 8 | 4987/ 5600 batches | train loss 0.4164182 +| epoch 8 | 4991/ 5600 batches | train loss 0.3568799 +| epoch 8 | 4995/ 5600 batches | train loss 0.3254080 +| epoch 8 | 4999/ 5600 batches | train loss 0.3648157 +| epoch 8 | 5003/ 5600 batches | train loss 0.4345091 +| epoch 8 | 5007/ 5600 batches | train loss 0.3602681 +| epoch 8 | 5011/ 5600 batches | train loss 0.2507918 +| epoch 8 | 5015/ 5600 batches | train loss 0.3484392 +| epoch 8 | 5019/ 5600 batches | train loss 0.3498060 +| epoch 8 | 5023/ 5600 batches | train loss 0.5366669 +| epoch 8 | 5027/ 5600 batches | train loss 0.3567517 +| epoch 8 | 5031/ 5600 batches | train loss 0.3172371 +| epoch 8 | 5035/ 5600 batches | train loss 0.3899505 +| epoch 8 | 5039/ 5600 batches | train loss 0.3601547 +| epoch 8 | 5043/ 5600 batches | train loss 0.3124524 +| epoch 8 | 5047/ 5600 batches | train loss 0.3306696 +| epoch 8 | 5051/ 5600 batches | train loss 0.3791631 +| epoch 8 | 5055/ 5600 batches | train loss 0.3766177 +| epoch 8 | 5059/ 5600 batches | train loss 0.3996766 +| epoch 8 | 5063/ 5600 batches | train loss 0.3625798 +| epoch 8 | 5067/ 5600 batches | train loss 0.3625976 +| epoch 8 | 5071/ 5600 batches | train loss 0.4088442 +| epoch 8 | 5075/ 5600 batches | train loss 0.3296905 +| epoch 8 | 5079/ 5600 batches | train loss 0.3161125 +| epoch 8 | 5083/ 5600 batches | train loss 0.4144306 +| epoch 8 | 5087/ 5600 batches | train loss 0.3087889 +| epoch 8 | 5091/ 5600 batches | train loss 0.3503603 +| epoch 8 | 5095/ 5600 batches | train loss 0.3355461 +| epoch 8 | 5099/ 5600 batches | train loss 0.3613198 +| epoch 8 | 5103/ 5600 batches | train loss 0.3175090 +| epoch 8 | 5107/ 5600 batches | train loss 0.3312807 +| epoch 8 | 5111/ 5600 batches | train loss 0.3229442 +| epoch 8 | 5115/ 5600 batches | train loss 0.1600000 +| epoch 8 | 5119/ 5600 batches | train loss 0.3023971 +| epoch 8 | 5123/ 5600 batches | train loss 0.3322175 +| epoch 8 | 5127/ 5600 batches | train loss 0.3185825 +| epoch 8 | 5131/ 5600 batches | train loss 0.3341226 +| epoch 8 | 5135/ 5600 batches | train loss 0.3499335 +| epoch 8 | 5139/ 5600 batches | train loss 0.2999797 +| epoch 8 | 5143/ 5600 batches | train loss 0.3797531 +| epoch 8 | 5147/ 5600 batches | train loss 0.4341193 +| epoch 8 | 5151/ 5600 batches | train loss 0.3406635 +| epoch 8 | 5155/ 5600 batches | train loss 0.1318022 +| epoch 8 | 5159/ 5600 batches | train loss 0.3623864 +| epoch 8 | 5163/ 5600 batches | train loss 0.3104598 +| epoch 8 | 5167/ 5600 batches | train loss 0.3354074 +| epoch 8 | 5171/ 5600 batches | train loss 0.3671602 +| epoch 8 | 5175/ 5600 batches | train loss 0.3771645 +| epoch 8 | 5179/ 5600 batches | train loss 0.3026818 +| epoch 8 | 5183/ 5600 batches | train loss 0.3881733 +| epoch 8 | 5187/ 5600 batches | train loss 0.3679128 +| epoch 8 | 5191/ 5600 batches | train loss 0.3602356 +| epoch 8 | 5195/ 5600 batches | train loss 0.3899654 +| epoch 8 | 5199/ 5600 batches | train loss 0.3228931 +| epoch 8 | 5203/ 5600 batches | train loss 0.3719589 +| epoch 8 | 5207/ 5600 batches | train loss 0.3389568 +| epoch 8 | 5211/ 5600 batches | train loss 0.3544973 +| epoch 8 | 5215/ 5600 batches | train loss 0.3058676 +| epoch 8 | 5219/ 5600 batches | train loss 0.2664188 +| epoch 8 | 5223/ 5600 batches | train loss 0.3881652 +| epoch 8 | 5227/ 5600 batches | train loss 0.3416128 +| epoch 8 | 5231/ 5600 batches | train loss 0.3395747 +| epoch 8 | 5235/ 5600 batches | train loss 0.4675682 +| epoch 8 | 5239/ 5600 batches | train loss 0.2509477 +| epoch 8 | 5243/ 5600 batches | train loss 0.3055280 +| epoch 8 | 5247/ 5600 batches | train loss 0.3601232 +| epoch 8 | 5251/ 5600 batches | train loss 0.3246045 +| epoch 8 | 5255/ 5600 batches | train loss 0.3062423 +| epoch 8 | 5259/ 5600 batches | train loss 0.3788618 +| epoch 8 | 5263/ 5600 batches | train loss 0.3755323 +| epoch 8 | 5267/ 5600 batches | train loss 0.3760100 +| epoch 8 | 5271/ 5600 batches | train loss 0.4287206 +| epoch 8 | 5275/ 5600 batches | train loss 0.3588147 +| epoch 8 | 5279/ 5600 batches | train loss 0.3110119 +| epoch 8 | 5283/ 5600 batches | train loss 0.3782118 +| epoch 8 | 5287/ 5600 batches | train loss 0.3197269 +| epoch 8 | 5291/ 5600 batches | train loss 0.3788348 +| epoch 8 | 5295/ 5600 batches | train loss 0.3108158 +| epoch 8 | 5299/ 5600 batches | train loss 0.3371249 +| epoch 8 | 5303/ 5600 batches | train loss 0.3548847 +| epoch 8 | 5307/ 5600 batches | train loss 0.3340855 +| epoch 8 | 5311/ 5600 batches | train loss 0.3121325 +| epoch 8 | 5315/ 5600 batches | train loss 0.3613242 +| epoch 8 | 5319/ 5600 batches | train loss 0.3528026 +| epoch 8 | 5323/ 5600 batches | train loss 0.3202920 +| epoch 8 | 5327/ 5600 batches | train loss 0.3429265 +| epoch 8 | 5331/ 5600 batches | train loss 0.4035762 +| epoch 8 | 5335/ 5600 batches | train loss 0.3142990 +| epoch 8 | 5339/ 5600 batches | train loss 0.4173018 +| epoch 8 | 5343/ 5600 batches | train loss 0.3122800 +| epoch 8 | 5347/ 5600 batches | train loss 0.2318624 +| epoch 8 | 5351/ 5600 batches | train loss 0.1403981 +| epoch 8 | 5355/ 5600 batches | train loss 0.3480194 +| epoch 8 | 5359/ 5600 batches | train loss 0.4177293 +| epoch 8 | 5363/ 5600 batches | train loss 0.3554933 +| epoch 8 | 5367/ 5600 batches | train loss 0.3605511 +| epoch 8 | 5371/ 5600 batches | train loss 0.3639603 +| epoch 8 | 5375/ 5600 batches | train loss 0.3247240 +| epoch 8 | 5379/ 5600 batches | train loss 0.3114980 +| epoch 8 | 5383/ 5600 batches | train loss 0.3729033 +| epoch 8 | 5387/ 5600 batches | train loss 0.3622758 +| epoch 8 | 5391/ 5600 batches | train loss 0.3878396 +| epoch 8 | 5395/ 5600 batches | train loss 0.3089718 +| epoch 8 | 5399/ 5600 batches | train loss 0.3405511 +| epoch 8 | 5403/ 5600 batches | train loss 0.3767036 +| epoch 8 | 5407/ 5600 batches | train loss 0.3827467 +| epoch 8 | 5411/ 5600 batches | train loss 0.3664219 +| epoch 8 | 5415/ 5600 batches | train loss 0.3437253 +| epoch 8 | 5419/ 5600 batches | train loss 0.3355505 +| epoch 8 | 5423/ 5600 batches | train loss 0.3598986 +| epoch 8 | 5427/ 5600 batches | train loss 0.3795163 +| epoch 8 | 5431/ 5600 batches | train loss 0.4458801 +| epoch 8 | 5435/ 5600 batches | train loss 0.4199897 +| epoch 8 | 5439/ 5600 batches | train loss 0.2992018 +| epoch 8 | 5443/ 5600 batches | train loss 0.3056199 +| epoch 8 | 5447/ 5600 batches | train loss 0.3394970 +| epoch 8 | 5451/ 5600 batches | train loss 0.3804145 +| epoch 8 | 5455/ 5600 batches | train loss 0.3788992 +| epoch 8 | 5459/ 5600 batches | train loss 0.3370027 +| epoch 8 | 5463/ 5600 batches | train loss 0.3639311 +| epoch 8 | 5467/ 5600 batches | train loss 0.3747722 +| epoch 8 | 5471/ 5600 batches | train loss 0.3993748 +| epoch 8 | 5475/ 5600 batches | train loss 0.4338253 +| epoch 8 | 5479/ 5600 batches | train loss 0.3732860 +| epoch 8 | 5483/ 5600 batches | train loss 0.3308078 +| epoch 8 | 5487/ 5600 batches | train loss 0.2677662 +| epoch 8 | 5491/ 5600 batches | train loss 0.3149048 +| epoch 8 | 5495/ 5600 batches | train loss 0.3697459 +| epoch 8 | 5499/ 5600 batches | train loss 0.2984600 +| epoch 8 | 5503/ 5600 batches | train loss 0.3875765 +| epoch 8 | 5507/ 5600 batches | train loss 0.3473547 +| epoch 8 | 5511/ 5600 batches | train loss 0.3012950 +| epoch 8 | 5515/ 5600 batches | train loss 0.3593571 +| epoch 8 | 5519/ 5600 batches | train loss 0.3808149 +| epoch 8 | 5523/ 5600 batches | train loss 0.3257868 +| epoch 8 | 5527/ 5600 batches | train loss 0.3778277 +| epoch 8 | 5531/ 5600 batches | train loss 0.3550111 +| epoch 8 | 5535/ 5600 batches | train loss 0.3647035 +| epoch 8 | 5539/ 5600 batches | train loss 0.3242074 +| epoch 8 | 5543/ 5600 batches | train loss 0.4223629 +| epoch 8 | 5547/ 5600 batches | train loss 0.3500216 +| epoch 8 | 5551/ 5600 batches | train loss 0.3800749 +| epoch 8 | 5555/ 5600 batches | train loss 0.4119556 +| epoch 8 | 5559/ 5600 batches | train loss 0.3454456 +| epoch 8 | 5563/ 5600 batches | train loss 0.4378194 +| epoch 8 | 5567/ 5600 batches | train loss 0.3971756 +| epoch 8 | 5571/ 5600 batches | train loss 0.3345729 +| epoch 8 | 5575/ 5600 batches | train loss 0.3276696 +| epoch 8 | 5579/ 5600 batches | train loss 0.4757021 +| epoch 8 | 5583/ 5600 batches | train loss 0.3016744 +| epoch 8 | 5587/ 5600 batches | train loss 0.4448321 +| epoch 8 | 5591/ 5600 batches | train loss 0.3212240 +| epoch 8 | 5595/ 5600 batches | train loss 0.3502517 +| epoch 8 | 5599/ 5600 batches | train loss 0.3628647 +-------------------------------------------------------------------------------- +| epoch 8 | 3/ 5600 batches | test loss 0.8155001 +| epoch 8 | 7/ 5600 batches | test loss 0.5118679 +| epoch 8 | 11/ 5600 batches | test loss 0.5753068 +| epoch 8 | 15/ 5600 batches | test loss 0.3978291 +| epoch 8 | 19/ 5600 batches | test loss 0.3995497 +| epoch 8 | 23/ 5600 batches | test loss 0.4576994 +| epoch 8 | 27/ 5600 batches | test loss 0.4666059 +| epoch 8 | 31/ 5600 batches | test loss 0.4156116 +| epoch 8 | 35/ 5600 batches | test loss 0.5059621 +| epoch 8 | 39/ 5600 batches | test loss 0.3484126 +| epoch 8 | 43/ 5600 batches | test loss 0.5009824 +| epoch 8 | 47/ 5600 batches | test loss 0.3480539 +| epoch 8 | 51/ 5600 batches | test loss 0.4525701 +| epoch 8 | 55/ 5600 batches | test loss 0.5073486 +| epoch 8 | 59/ 5600 batches | test loss 0.4548960 +| epoch 8 | 63/ 5600 batches | test loss 0.4994117 +| epoch 8 | 67/ 5600 batches | test loss 0.5492553 +| epoch 8 | 71/ 5600 batches | test loss 0.5317128 +| epoch 8 | 75/ 5600 batches | test loss 0.5375015 +| epoch 8 | 79/ 5600 batches | test loss 0.5020065 +| epoch 8 | 83/ 5600 batches | test loss 0.4924242 +| epoch 8 | 87/ 5600 batches | test loss 0.5161424 +| epoch 8 | 91/ 5600 batches | test loss 0.3923422 +| epoch 8 | 95/ 5600 batches | test loss 0.5174986 +| epoch 8 | 99/ 5600 batches | test loss 0.4182751 +| epoch 8 | 103/ 5600 batches | test loss 0.4378581 +| epoch 8 | 107/ 5600 batches | test loss 0.5033285 +| epoch 8 | 111/ 5600 batches | test loss 0.4136499 +| epoch 8 | 115/ 5600 batches | test loss 0.4797137 +| epoch 8 | 119/ 5600 batches | test loss 0.4525801 +| epoch 8 | 123/ 5600 batches | test loss 0.4296678 +| epoch 8 | 127/ 5600 batches | test loss 0.4139411 +| epoch 8 | 131/ 5600 batches | test loss 0.4899730 +| epoch 8 | 135/ 5600 batches | test loss 0.5091659 +| epoch 8 | 139/ 5600 batches | test loss 0.4582369 +| epoch 8 | 143/ 5600 batches | test loss 0.5308833 +| epoch 8 | 147/ 5600 batches | test loss 0.4444375 +| epoch 8 | 151/ 5600 batches | test loss 0.5041118 +| epoch 8 | 155/ 5600 batches | test loss 0.5088735 +| epoch 8 | 159/ 5600 batches | test loss 0.4381444 +| epoch 8 | 163/ 5600 batches | test loss 0.5649511 +| epoch 8 | 167/ 5600 batches | test loss 0.3935533 +| epoch 8 | 171/ 5600 batches | test loss 0.4269573 +| epoch 8 | 175/ 5600 batches | test loss 0.4765199 +| epoch 8 | 179/ 5600 batches | test loss 0.5413552 +| epoch 8 | 183/ 5600 batches | test loss 0.3922444 +| epoch 8 | 187/ 5600 batches | test loss 0.5280877 +| epoch 8 | 191/ 5600 batches | test loss 0.4482881 +| epoch 8 | 195/ 5600 batches | test loss 0.4767596 +| epoch 8 | 199/ 5600 batches | test loss 0.4533480 +| epoch 8 | 203/ 5600 batches | test loss 0.4543202 +| epoch 8 | 207/ 5600 batches | test loss 0.4347732 +| epoch 8 | 211/ 5600 batches | test loss 0.4527209 +| epoch 8 | 215/ 5600 batches | test loss 0.4949161 +| epoch 8 | 219/ 5600 batches | test loss 0.4381621 +| epoch 8 | 223/ 5600 batches | test loss 0.4115185 +| epoch 8 | 227/ 5600 batches | test loss 0.5111544 +| epoch 8 | 231/ 5600 batches | test loss 0.5824664 +| epoch 8 | 235/ 5600 batches | test loss 0.3866407 +| epoch 8 | 239/ 5600 batches | test loss 0.4878818 +| epoch 8 | 243/ 5600 batches | test loss 0.4999233 +| epoch 8 | 247/ 5600 batches | test loss 0.5403351 +| epoch 8 | 251/ 5600 batches | test loss 0.4225327 +| epoch 8 | 255/ 5600 batches | test loss 0.5727866 +| epoch 8 | 259/ 5600 batches | test loss 0.4485462 +| epoch 8 | 263/ 5600 batches | test loss 0.3711137 +| epoch 8 | 267/ 5600 batches | test loss 0.4217095 +| epoch 8 | 271/ 5600 batches | test loss 0.4145762 +| epoch 8 | 275/ 5600 batches | test loss 0.6088268 +| epoch 8 | 279/ 5600 batches | test loss 0.5743814 +| epoch 8 | 283/ 5600 batches | test loss 0.4654493 +| epoch 8 | 287/ 5600 batches | test loss 0.4564859 +| epoch 8 | 291/ 5600 batches | test loss 0.6316832 +| epoch 8 | 295/ 5600 batches | test loss 0.4674409 +| epoch 8 | 299/ 5600 batches | test loss 0.3962662 +| epoch 8 | 303/ 5600 batches | test loss 0.4166759 +| epoch 8 | 307/ 5600 batches | test loss 0.5290974 +| epoch 8 | 311/ 5600 batches | test loss 0.5244735 +| epoch 8 | 315/ 5600 batches | test loss 0.4158872 +| epoch 8 | 319/ 5600 batches | test loss 0.3944561 +| epoch 8 | 323/ 5600 batches | test loss 0.4185434 +| epoch 8 | 327/ 5600 batches | test loss 0.4405200 +| epoch 8 | 331/ 5600 batches | test loss 0.4278219 +| epoch 8 | 335/ 5600 batches | test loss 0.5536464 +| epoch 8 | 339/ 5600 batches | test loss 0.4454333 +| epoch 8 | 343/ 5600 batches | test loss 0.4486563 +| epoch 8 | 347/ 5600 batches | test loss 0.6091284 +| epoch 8 | 351/ 5600 batches | test loss 0.3752504 +| epoch 8 | 355/ 5600 batches | test loss 0.3391003 +| epoch 8 | 359/ 5600 batches | test loss 0.4203769 +| epoch 8 | 363/ 5600 batches | test loss 0.6202341 +| epoch 8 | 367/ 5600 batches | test loss 0.4516973 +| epoch 8 | 371/ 5600 batches | test loss 0.5371926 +| epoch 8 | 375/ 5600 batches | test loss 0.5766556 +| epoch 8 | 379/ 5600 batches | test loss 0.4597018 +| epoch 8 | 383/ 5600 batches | test loss 0.4951049 +| epoch 8 | 387/ 5600 batches | test loss 0.4411910 +| epoch 8 | 391/ 5600 batches | test loss 0.4759211 +| epoch 8 | 395/ 5600 batches | test loss 0.7315667 +| epoch 8 | 399/ 5600 batches | test loss 0.4399176 +| epoch 8 | 403/ 5600 batches | test loss 0.6241858 +| epoch 8 | 407/ 5600 batches | test loss 0.5917625 +| epoch 8 | 411/ 5600 batches | test loss 0.5024999 +| epoch 8 | 415/ 5600 batches | test loss 0.5735400 +| epoch 8 | 419/ 5600 batches | test loss 0.6008509 +| epoch 8 | 423/ 5600 batches | test loss 0.5286384 +| epoch 8 | 427/ 5600 batches | test loss 0.5645028 +| epoch 8 | 431/ 5600 batches | test loss 0.6211950 +| epoch 8 | 435/ 5600 batches | test loss 0.4868619 +| epoch 8 | 439/ 5600 batches | test loss 0.5197508 +| epoch 8 | 443/ 5600 batches | test loss 0.3094001 +| epoch 8 | 447/ 5600 batches | test loss 0.6095039 +| epoch 8 | 451/ 5600 batches | test loss 0.3571909 +| epoch 8 | 455/ 5600 batches | test loss 0.3416347 +| epoch 8 | 459/ 5600 batches | test loss 0.4647300 +| epoch 8 | 463/ 5600 batches | test loss 0.3533697 +| epoch 8 | 467/ 5600 batches | test loss 0.5773082 +| epoch 8 | 471/ 5600 batches | test loss 0.4760485 +| epoch 8 | 475/ 5600 batches | test loss 0.3944389 +| epoch 8 | 479/ 5600 batches | test loss 0.4701765 +| epoch 8 | 483/ 5600 batches | test loss 0.4453197 +| epoch 8 | 487/ 5600 batches | test loss 0.3689876 +| epoch 8 | 491/ 5600 batches | test loss 0.4617849 +| epoch 8 | 495/ 5600 batches | test loss 0.4439872 +| epoch 8 | 499/ 5600 batches | test loss 0.4373086 +| epoch 8 | 503/ 5600 batches | test loss 0.4140435 +| epoch 8 | 507/ 5600 batches | test loss 0.4238383 +| epoch 8 | 511/ 5600 batches | test loss 0.5847775 +| epoch 8 | 515/ 5600 batches | test loss 0.4614895 +| epoch 8 | 519/ 5600 batches | test loss 0.3806181 +| epoch 8 | 523/ 5600 batches | test loss 0.3732979 +| epoch 8 | 527/ 5600 batches | test loss 0.5512088 +| epoch 8 | 531/ 5600 batches | test loss 0.2916003 +| epoch 8 | 535/ 5600 batches | test loss 0.4137351 +| epoch 8 | 539/ 5600 batches | test loss 0.7832545 +| epoch 8 | 543/ 5600 batches | test loss 0.3811292 +| epoch 8 | 547/ 5600 batches | test loss 0.3947469 +| epoch 8 | 551/ 5600 batches | test loss 0.4604751 +| epoch 8 | 555/ 5600 batches | test loss 0.4725238 +| epoch 8 | 559/ 5600 batches | test loss 0.5853027 +| epoch 8 | 563/ 5600 batches | test loss 0.3932883 +| epoch 8 | 567/ 5600 batches | test loss 0.4256647 +| epoch 8 | 571/ 5600 batches | test loss 0.5299887 +| epoch 8 | 575/ 5600 batches | test loss 0.4360913 +| epoch 8 | 579/ 5600 batches | test loss 0.5182172 +| epoch 8 | 583/ 5600 batches | test loss 0.6110268 +| epoch 8 | 587/ 5600 batches | test loss 0.3785308 +| epoch 8 | 591/ 5600 batches | test loss 0.4558692 +| epoch 8 | 595/ 5600 batches | test loss 0.4209820 +| epoch 8 | 599/ 5600 batches | test loss 0.5257516 +| epoch 8 | 603/ 5600 batches | test loss 0.5482669 +| epoch 8 | 607/ 5600 batches | test loss 0.3883338 +| epoch 8 | 611/ 5600 batches | test loss 0.4073327 +| epoch 8 | 615/ 5600 batches | test loss 0.4961870 +| epoch 8 | 619/ 5600 batches | test loss 0.4890245 +| epoch 8 | 623/ 5600 batches | test loss 0.4390401 +| epoch 8 | 627/ 5600 batches | test loss 0.4159203 +| epoch 8 | 631/ 5600 batches | test loss 0.6459690 +| epoch 8 | 635/ 5600 batches | test loss 0.6738645 +| epoch 8 | 639/ 5600 batches | test loss 0.4722593 +| epoch 8 | 643/ 5600 batches | test loss 0.4764409 +| epoch 8 | 647/ 5600 batches | test loss 0.4033211 +| epoch 8 | 651/ 5600 batches | test loss 0.4982555 +| epoch 8 | 655/ 5600 batches | test loss 0.5342548 +| epoch 8 | 659/ 5600 batches | test loss 0.4023921 +| epoch 8 | 663/ 5600 batches | test loss 0.9457165 +| epoch 8 | 667/ 5600 batches | test loss 0.3615488 +| epoch 8 | 671/ 5600 batches | test loss 0.4688474 +| epoch 8 | 675/ 5600 batches | test loss 0.4942060 +| epoch 8 | 679/ 5600 batches | test loss 0.4482988 +| epoch 8 | 683/ 5600 batches | test loss 0.5245259 +| epoch 8 | 687/ 5600 batches | test loss 0.6229415 +| epoch 8 | 691/ 5600 batches | test loss 0.3586450 +| epoch 8 | 695/ 5600 batches | test loss 0.4972872 +| epoch 8 | 699/ 5600 batches | test loss 0.4166590 +| epoch 8 | 703/ 5600 batches | test loss 0.6661309 +| epoch 8 | 707/ 5600 batches | test loss 0.3471390 +| epoch 8 | 711/ 5600 batches | test loss 0.4306397 +| epoch 8 | 715/ 5600 batches | test loss 0.4532980 +| epoch 8 | 719/ 5600 batches | test loss 0.5336645 +| epoch 8 | 723/ 5600 batches | test loss 0.4263779 +| epoch 8 | 727/ 5600 batches | test loss 0.4943349 +| epoch 8 | 731/ 5600 batches | test loss 0.4769097 +| epoch 8 | 735/ 5600 batches | test loss 0.6874628 +| epoch 8 | 739/ 5600 batches | test loss 0.3635826 +| epoch 8 | 743/ 5600 batches | test loss 0.5065243 +| epoch 8 | 747/ 5600 batches | test loss 0.4103527 +| epoch 8 | 751/ 5600 batches | test loss 0.4185215 +| epoch 8 | 755/ 5600 batches | test loss 0.6583078 +| epoch 8 | 759/ 5600 batches | test loss 0.3415132 +| epoch 8 | 763/ 5600 batches | test loss 0.3681245 +| epoch 8 | 767/ 5600 batches | test loss 0.4682482 +| epoch 8 | 771/ 5600 batches | test loss 0.3769445 +| epoch 8 | 775/ 5600 batches | test loss 0.5961925 +| epoch 8 | 779/ 5600 batches | test loss 0.5393896 +| epoch 8 | 783/ 5600 batches | test loss 0.4185183 +| epoch 8 | 787/ 5600 batches | test loss 0.4033843 +| epoch 8 | 791/ 5600 batches | test loss 0.5240734 +| epoch 8 | 795/ 5600 batches | test loss 0.4058029 +| epoch 8 | 799/ 5600 batches | test loss 0.4204691 +| epoch 8 | 803/ 5600 batches | test loss 0.4650340 +| epoch 8 | 807/ 5600 batches | test loss 0.4678605 +| epoch 8 | 811/ 5600 batches | test loss 0.5073730 +| epoch 8 | 815/ 5600 batches | test loss 0.4854437 +| epoch 8 | 819/ 5600 batches | test loss 0.4683256 +| epoch 8 | 823/ 5600 batches | test loss 0.5259261 +| epoch 8 | 827/ 5600 batches | test loss 0.5000851 +| epoch 8 | 831/ 5600 batches | test loss 0.5542766 +| epoch 8 | 835/ 5600 batches | test loss 0.5097396 +| epoch 8 | 839/ 5600 batches | test loss 0.4946296 +| epoch 8 | 843/ 5600 batches | test loss 0.7876534 +| epoch 8 | 847/ 5600 batches | test loss 0.3618265 +| epoch 8 | 851/ 5600 batches | test loss 0.4355271 +| epoch 8 | 855/ 5600 batches | test loss 0.3570453 +| epoch 8 | 859/ 5600 batches | test loss 0.3985807 +| epoch 8 | 863/ 5600 batches | test loss 0.4675902 +| epoch 8 | 867/ 5600 batches | test loss 0.6429695 +| epoch 8 | 871/ 5600 batches | test loss 0.5404259 +| epoch 8 | 875/ 5600 batches | test loss 0.4962125 +| epoch 8 | 879/ 5600 batches | test loss 0.4800731 +| epoch 8 | 883/ 5600 batches | test loss 0.3904207 +| epoch 8 | 887/ 5600 batches | test loss 0.4727381 +| epoch 8 | 891/ 5600 batches | test loss 0.4041343 +| epoch 8 | 895/ 5600 batches | test loss 0.4288887 +| epoch 8 | 899/ 5600 batches | test loss 0.4581542 +| epoch 8 | 903/ 5600 batches | test loss 0.4216091 +| epoch 8 | 907/ 5600 batches | test loss 0.4919260 +| epoch 8 | 911/ 5600 batches | test loss 0.4655179 +| epoch 8 | 915/ 5600 batches | test loss 0.4117859 +| epoch 8 | 919/ 5600 batches | test loss 0.5852405 +| epoch 8 | 923/ 5600 batches | test loss 0.5042924 +| epoch 8 | 927/ 5600 batches | test loss 0.4569785 +| epoch 8 | 931/ 5600 batches | test loss 0.5211592 +| epoch 8 | 935/ 5600 batches | test loss 0.7602488 +| epoch 8 | 939/ 5600 batches | test loss 0.6438050 +| epoch 8 | 943/ 5600 batches | test loss 0.4318506 +| epoch 8 | 947/ 5600 batches | test loss 0.5239496 +| epoch 8 | 951/ 5600 batches | test loss 0.5156538 +| epoch 8 | 955/ 5600 batches | test loss 0.5110782 +| epoch 8 | 959/ 5600 batches | test loss 0.4221132 +| epoch 8 | 963/ 5600 batches | test loss 0.4599077 +| epoch 8 | 967/ 5600 batches | test loss 0.6121481 +| epoch 8 | 971/ 5600 batches | test loss 0.5175256 +| epoch 8 | 975/ 5600 batches | test loss 0.5073287 +| epoch 8 | 979/ 5600 batches | test loss 0.3854020 +| epoch 8 | 983/ 5600 batches | test loss 0.4315609 +| epoch 8 | 987/ 5600 batches | test loss 0.4384531 +| epoch 8 | 991/ 5600 batches | test loss 0.4535165 +| epoch 8 | 995/ 5600 batches | test loss 0.6177590 +| epoch 8 | 999/ 5600 batches | test loss 0.4203151 +| epoch 8 | 1003/ 5600 batches | test loss 0.3893221 +| epoch 8 | 1007/ 5600 batches | test loss 0.3615662 +| epoch 8 | 1011/ 5600 batches | test loss 0.4884071 +| epoch 8 | 1015/ 5600 batches | test loss 0.3634397 +| epoch 8 | 1019/ 5600 batches | test loss 0.4612924 +| epoch 8 | 1023/ 5600 batches | test loss 0.4093497 +| epoch 8 | 1027/ 5600 batches | test loss 0.4421073 +| epoch 8 | 1031/ 5600 batches | test loss 0.4503515 +| epoch 8 | 1035/ 5600 batches | test loss 0.4800242 +| epoch 8 | 1039/ 5600 batches | test loss 0.5560877 +| epoch 8 | 1043/ 5600 batches | test loss 0.4973833 +| epoch 8 | 1047/ 5600 batches | test loss 0.4891831 +| epoch 8 | 1051/ 5600 batches | test loss 0.5238581 +| epoch 8 | 1055/ 5600 batches | test loss 0.3884557 +| epoch 8 | 1059/ 5600 batches | test loss 0.5095104 +| epoch 8 | 1063/ 5600 batches | test loss 0.4022690 +| epoch 8 | 1067/ 5600 batches | test loss 0.5127088 +| epoch 8 | 1071/ 5600 batches | test loss 0.5741086 +| epoch 8 | 1075/ 5600 batches | test loss 0.4033577 +| epoch 8 | 1079/ 5600 batches | test loss 0.5427022 +| epoch 8 | 1083/ 5600 batches | test loss 0.4510872 +| epoch 8 | 1087/ 5600 batches | test loss 0.3687682 +| epoch 8 | 1091/ 5600 batches | test loss 0.4030103 +| epoch 8 | 1095/ 5600 batches | test loss 0.4123599 +| epoch 8 | 1099/ 5600 batches | test loss 0.5162838 +| epoch 8 | 1103/ 5600 batches | test loss 0.4831079 +| epoch 8 | 1107/ 5600 batches | test loss 0.3209594 +| epoch 8 | 1111/ 5600 batches | test loss 0.5387792 +| epoch 8 | 1115/ 5600 batches | test loss 0.5009142 +| epoch 8 | 1119/ 5600 batches | test loss 0.3641607 +| epoch 8 | 1123/ 5600 batches | test loss 0.3721962 +| epoch 8 | 1127/ 5600 batches | test loss 0.5237511 +| epoch 8 | 1131/ 5600 batches | test loss 0.3518991 +| epoch 8 | 1135/ 5600 batches | test loss 0.6017357 +| epoch 8 | 1139/ 5600 batches | test loss 0.3693878 +| epoch 8 | 1143/ 5600 batches | test loss 0.4290098 +| epoch 8 | 1147/ 5600 batches | test loss 0.4383445 +| epoch 8 | 1151/ 5600 batches | test loss 0.3860128 +| epoch 8 | 1155/ 5600 batches | test loss 0.5440873 +| epoch 8 | 1159/ 5600 batches | test loss 0.5834723 +| epoch 8 | 1163/ 5600 batches | test loss 0.7181247 +| epoch 8 | 1167/ 5600 batches | test loss 0.5178399 +| epoch 8 | 1171/ 5600 batches | test loss 0.4137630 +| epoch 8 | 1175/ 5600 batches | test loss 0.4361188 +| epoch 8 | 1179/ 5600 batches | test loss 0.4693588 +| epoch 8 | 1183/ 5600 batches | test loss 0.3365539 +| epoch 8 | 1187/ 5600 batches | test loss 0.4392531 +| epoch 8 | 1191/ 5600 batches | test loss 0.4411500 +| epoch 8 | 1195/ 5600 batches | test loss 0.5944989 +| epoch 8 | 1199/ 5600 batches | test loss 0.4617465 +| epoch 8 | 1203/ 5600 batches | test loss 0.5544735 +| epoch 8 | 1207/ 5600 batches | test loss 0.5066615 +| epoch 8 | 1211/ 5600 batches | test loss 0.5074066 +| epoch 8 | 1215/ 5600 batches | test loss 0.5178703 +| epoch 8 | 1219/ 5600 batches | test loss 0.3692033 +| epoch 8 | 1223/ 5600 batches | test loss 0.4621377 +| epoch 8 | 1227/ 5600 batches | test loss 0.4803683 +| epoch 8 | 1231/ 5600 batches | test loss 0.4483117 +| epoch 8 | 1235/ 5600 batches | test loss 0.5389941 +| epoch 8 | 1239/ 5600 batches | test loss 0.4467821 +| epoch 8 | 1243/ 5600 batches | test loss 0.6545697 +| epoch 8 | 1247/ 5600 batches | test loss 0.4097495 +| epoch 8 | 1251/ 5600 batches | test loss 0.4182364 +| epoch 8 | 1255/ 5600 batches | test loss 0.4689486 +| epoch 8 | 1259/ 5600 batches | test loss 0.4215384 +| epoch 8 | 1263/ 5600 batches | test loss 0.4481996 +| epoch 8 | 1267/ 5600 batches | test loss 0.4336186 +| epoch 8 | 1271/ 5600 batches | test loss 0.4371884 +| epoch 8 | 1275/ 5600 batches | test loss 0.4176440 +| epoch 8 | 1279/ 5600 batches | test loss 0.4772014 +| epoch 8 | 1283/ 5600 batches | test loss 0.4098208 +| epoch 8 | 1287/ 5600 batches | test loss 0.4073554 +| epoch 8 | 1291/ 5600 batches | test loss 0.6155517 +| epoch 8 | 1295/ 5600 batches | test loss 0.4889581 +| epoch 8 | 1299/ 5600 batches | test loss 0.5685477 +| epoch 8 | 1303/ 5600 batches | test loss 0.4869760 +| epoch 8 | 1307/ 5600 batches | test loss 0.3914011 +| epoch 8 | 1311/ 5600 batches | test loss 0.4954372 +| epoch 8 | 1315/ 5600 batches | test loss 0.4468696 +| epoch 8 | 1319/ 5600 batches | test loss 0.4083686 +| epoch 8 | 1323/ 5600 batches | test loss 0.4124774 +| epoch 8 | 1327/ 5600 batches | test loss 0.4529722 +| epoch 8 | 1331/ 5600 batches | test loss 0.4063492 +| epoch 8 | 1335/ 5600 batches | test loss 0.5030908 +| epoch 8 | 1339/ 5600 batches | test loss 0.4955518 +| epoch 8 | 1343/ 5600 batches | test loss 0.5334306 +| epoch 8 | 1347/ 5600 batches | test loss 0.4241045 +| epoch 8 | 1351/ 5600 batches | test loss 0.3791631 +| epoch 8 | 1355/ 5600 batches | test loss 0.4330341 +| epoch 8 | 1359/ 5600 batches | test loss 0.4149908 +| epoch 8 | 1363/ 5600 batches | test loss 0.4734121 +| epoch 8 | 1367/ 5600 batches | test loss 0.4522957 +| epoch 8 | 1371/ 5600 batches | test loss 0.5045053 +| epoch 8 | 1375/ 5600 batches | test loss 0.4959020 +| epoch 8 | 1379/ 5600 batches | test loss 0.5321754 +| epoch 8 | 1383/ 5600 batches | test loss 0.4518772 +| epoch 8 | 1387/ 5600 batches | test loss 0.5908294 +| epoch 8 | 1391/ 5600 batches | test loss 0.5032821 +| epoch 8 | 1395/ 5600 batches | test loss 0.5252677 +| epoch 8 | 1399/ 5600 batches | test loss 0.5318443 +| epoch 8 | final test loss 0.4765, do not save model! +-------------------------------------------------------------------------------- +| epoch 9 | 3/ 5600 batches | train loss 0.3224073 +| epoch 9 | 7/ 5600 batches | train loss 0.3539136 +| epoch 9 | 11/ 5600 batches | train loss 0.2651339 +| epoch 9 | 15/ 5600 batches | train loss 0.3244807 +| epoch 9 | 19/ 5600 batches | train loss 0.3134781 +| epoch 9 | 23/ 5600 batches | train loss 0.2992616 +| epoch 9 | 27/ 5600 batches | train loss 0.3447966 +| epoch 9 | 31/ 5600 batches | train loss 0.3159708 +| epoch 9 | 35/ 5600 batches | train loss 0.2867176 +| epoch 9 | 39/ 5600 batches | train loss 0.2763264 +| epoch 9 | 43/ 5600 batches | train loss 0.3262234 +| epoch 9 | 47/ 5600 batches | train loss 0.3442491 +| epoch 9 | 51/ 5600 batches | train loss 0.3378169 +| epoch 9 | 55/ 5600 batches | train loss 0.3129609 +| epoch 9 | 59/ 5600 batches | train loss 0.3796815 +| epoch 9 | 63/ 5600 batches | train loss 0.3008885 +| epoch 9 | 67/ 5600 batches | train loss 0.2971105 +| epoch 9 | 71/ 5600 batches | train loss 0.3171912 +| epoch 9 | 75/ 5600 batches | train loss 0.3643093 +| epoch 9 | 79/ 5600 batches | train loss 0.3660975 +| epoch 9 | 83/ 5600 batches | train loss 0.3079808 +| epoch 9 | 87/ 5600 batches | train loss 0.3482924 +| epoch 9 | 91/ 5600 batches | train loss 0.3148487 +| epoch 9 | 95/ 5600 batches | train loss 0.2078075 +| epoch 9 | 99/ 5600 batches | train loss 0.3086965 +| epoch 9 | 103/ 5600 batches | train loss 0.3381038 +| epoch 9 | 107/ 5600 batches | train loss 0.2756829 +| epoch 9 | 111/ 5600 batches | train loss 0.3402312 +| epoch 9 | 115/ 5600 batches | train loss 0.3834518 +| epoch 9 | 119/ 5600 batches | train loss 0.2942560 +| epoch 9 | 123/ 5600 batches | train loss 0.2908018 +| epoch 9 | 127/ 5600 batches | train loss 0.3528835 +| epoch 9 | 131/ 5600 batches | train loss 0.2853286 +| epoch 9 | 135/ 5600 batches | train loss 0.3176819 +| epoch 9 | 139/ 5600 batches | train loss 0.2826737 +| epoch 9 | 143/ 5600 batches | train loss 0.2874153 +| epoch 9 | 147/ 5600 batches | train loss 0.3645938 +| epoch 9 | 151/ 5600 batches | train loss 0.3268251 +| epoch 9 | 155/ 5600 batches | train loss 0.3425757 +| epoch 9 | 159/ 5600 batches | train loss 0.3030465 +| epoch 9 | 163/ 5600 batches | train loss 0.2968220 +| epoch 9 | 167/ 5600 batches | train loss 0.3409872 +| epoch 9 | 171/ 5600 batches | train loss 0.2738270 +| epoch 9 | 175/ 5600 batches | train loss 0.3010312 +| epoch 9 | 179/ 5600 batches | train loss 0.3357231 +| epoch 9 | 183/ 5600 batches | train loss 0.2695421 +| epoch 9 | 187/ 5600 batches | train loss 0.2970232 +| epoch 9 | 191/ 5600 batches | train loss 0.3134083 +| epoch 9 | 195/ 5600 batches | train loss 0.2956690 +| epoch 9 | 199/ 5600 batches | train loss 0.3518713 +| epoch 9 | 203/ 5600 batches | train loss 0.3557991 +| epoch 9 | 207/ 5600 batches | train loss 0.3766423 +| epoch 9 | 211/ 5600 batches | train loss 0.2927819 +| epoch 9 | 215/ 5600 batches | train loss 0.2927963 +| epoch 9 | 219/ 5600 batches | train loss 0.2143328 +| epoch 9 | 223/ 5600 batches | train loss 0.3302043 +| epoch 9 | 227/ 5600 batches | train loss 0.2668540 +| epoch 9 | 231/ 5600 batches | train loss 0.3180728 +| epoch 9 | 235/ 5600 batches | train loss 0.3532130 +| epoch 9 | 239/ 5600 batches | train loss 0.3416699 +| epoch 9 | 243/ 5600 batches | train loss 0.3164676 +| epoch 9 | 247/ 5600 batches | train loss 0.2883566 +| epoch 9 | 251/ 5600 batches | train loss 0.3048261 +| epoch 9 | 255/ 5600 batches | train loss 0.3730193 +| epoch 9 | 259/ 5600 batches | train loss 0.2928632 +| epoch 9 | 263/ 5600 batches | train loss 0.2911773 +| epoch 9 | 267/ 5600 batches | train loss 0.3694951 +| epoch 9 | 271/ 5600 batches | train loss 0.3091271 +| epoch 9 | 275/ 5600 batches | train loss 0.4171556 +| epoch 9 | 279/ 5600 batches | train loss 0.3017429 +| epoch 9 | 283/ 5600 batches | train loss 0.2810507 +| epoch 9 | 287/ 5600 batches | train loss 0.2702826 +| epoch 9 | 291/ 5600 batches | train loss 0.2956890 +| epoch 9 | 295/ 5600 batches | train loss 0.3255891 +| epoch 9 | 299/ 5600 batches | train loss 0.3232971 +| epoch 9 | 303/ 5600 batches | train loss 0.2683067 +| epoch 9 | 307/ 5600 batches | train loss 0.2588875 +| epoch 9 | 311/ 5600 batches | train loss 0.3531860 +| epoch 9 | 315/ 5600 batches | train loss 0.3358567 +| epoch 9 | 319/ 5600 batches | train loss 0.1209001 +| epoch 9 | 323/ 5600 batches | train loss 0.3373340 +| epoch 9 | 327/ 5600 batches | train loss 0.2976502 +| epoch 9 | 331/ 5600 batches | train loss 0.3075675 +| epoch 9 | 335/ 5600 batches | train loss 0.2251837 +| epoch 9 | 339/ 5600 batches | train loss 0.3593700 +| epoch 9 | 343/ 5600 batches | train loss 0.3530955 +| epoch 9 | 347/ 5600 batches | train loss 0.3343591 +| epoch 9 | 351/ 5600 batches | train loss 0.3232732 +| epoch 9 | 355/ 5600 batches | train loss 0.3113911 +| epoch 9 | 359/ 5600 batches | train loss 0.3446828 +| epoch 9 | 363/ 5600 batches | train loss 0.3766814 +| epoch 9 | 367/ 5600 batches | train loss 0.3716940 +| epoch 9 | 371/ 5600 batches | train loss 0.3134839 +| epoch 9 | 375/ 5600 batches | train loss 0.3024810 +| epoch 9 | 379/ 5600 batches | train loss 0.3083712 +| epoch 9 | 383/ 5600 batches | train loss 0.3004048 +| epoch 9 | 387/ 5600 batches | train loss 0.3260446 +| epoch 9 | 391/ 5600 batches | train loss 0.3172876 +| epoch 9 | 395/ 5600 batches | train loss 0.3522205 +| epoch 9 | 399/ 5600 batches | train loss 0.2526715 +| epoch 9 | 403/ 5600 batches | train loss 0.3175510 +| epoch 9 | 407/ 5600 batches | train loss 0.3031429 +| epoch 9 | 411/ 5600 batches | train loss 0.3369632 +| epoch 9 | 415/ 5600 batches | train loss 0.3345251 +| epoch 9 | 419/ 5600 batches | train loss 0.3591897 +| epoch 9 | 423/ 5600 batches | train loss 0.3185704 +| epoch 9 | 427/ 5600 batches | train loss 0.3133601 +| epoch 9 | 431/ 5600 batches | train loss 0.2909146 +| epoch 9 | 435/ 5600 batches | train loss 0.3458189 +| epoch 9 | 439/ 5600 batches | train loss 0.3469749 +| epoch 9 | 443/ 5600 batches | train loss 0.2720821 +| epoch 9 | 447/ 5600 batches | train loss 0.3422890 +| epoch 9 | 451/ 5600 batches | train loss 0.3162446 +| epoch 9 | 455/ 5600 batches | train loss 0.2805953 +| epoch 9 | 459/ 5600 batches | train loss 0.3206969 +| epoch 9 | 463/ 5600 batches | train loss 0.3747120 +| epoch 9 | 467/ 5600 batches | train loss 0.3097060 +| epoch 9 | 471/ 5600 batches | train loss 0.2867795 +| epoch 9 | 475/ 5600 batches | train loss 0.3224728 +| epoch 9 | 479/ 5600 batches | train loss 0.3015409 +| epoch 9 | 483/ 5600 batches | train loss 0.2553578 +| epoch 9 | 487/ 5600 batches | train loss 0.2909576 +| epoch 9 | 491/ 5600 batches | train loss 0.2887393 +| epoch 9 | 495/ 5600 batches | train loss 0.2529614 +| epoch 9 | 499/ 5600 batches | train loss 0.2605805 +| epoch 9 | 503/ 5600 batches | train loss 0.3677430 +| epoch 9 | 507/ 5600 batches | train loss 0.3364409 +| epoch 9 | 511/ 5600 batches | train loss 0.2813172 +| epoch 9 | 515/ 5600 batches | train loss 0.3444040 +| epoch 9 | 519/ 5600 batches | train loss 0.3302706 +| epoch 9 | 523/ 5600 batches | train loss 0.3085703 +| epoch 9 | 527/ 5600 batches | train loss 0.3119187 +| epoch 9 | 531/ 5600 batches | train loss 0.3353208 +| epoch 9 | 535/ 5600 batches | train loss 0.3660923 +| epoch 9 | 539/ 5600 batches | train loss 0.3650684 +| epoch 9 | 543/ 5600 batches | train loss 0.4264088 +| epoch 9 | 547/ 5600 batches | train loss 0.3331890 +| epoch 9 | 551/ 5600 batches | train loss 0.3149039 +| epoch 9 | 555/ 5600 batches | train loss 0.3058975 +| epoch 9 | 559/ 5600 batches | train loss 0.2737411 +| epoch 9 | 563/ 5600 batches | train loss 0.3658751 +| epoch 9 | 567/ 5600 batches | train loss 0.3182514 +| epoch 9 | 571/ 5600 batches | train loss 0.3258460 +| epoch 9 | 575/ 5600 batches | train loss 0.3687884 +| epoch 9 | 579/ 5600 batches | train loss 0.3848682 +| epoch 9 | 583/ 5600 batches | train loss 0.3121125 +| epoch 9 | 587/ 5600 batches | train loss 0.3482188 +| epoch 9 | 591/ 5600 batches | train loss 0.3119443 +| epoch 9 | 595/ 5600 batches | train loss 0.3399451 +| epoch 9 | 599/ 5600 batches | train loss 0.3862764 +| epoch 9 | 603/ 5600 batches | train loss 0.3993090 +| epoch 9 | 607/ 5600 batches | train loss 0.3254933 +| epoch 9 | 611/ 5600 batches | train loss 0.3046209 +| epoch 9 | 615/ 5600 batches | train loss 0.3004264 +| epoch 9 | 619/ 5600 batches | train loss 0.2898968 +| epoch 9 | 623/ 5600 batches | train loss 0.3150002 +| epoch 9 | 627/ 5600 batches | train loss 0.2892067 +| epoch 9 | 631/ 5600 batches | train loss 0.3884126 +| epoch 9 | 635/ 5600 batches | train loss 0.2965067 +| epoch 9 | 639/ 5600 batches | train loss 0.3304995 +| epoch 9 | 643/ 5600 batches | train loss 0.3178809 +| epoch 9 | 647/ 5600 batches | train loss 0.2663165 +| epoch 9 | 651/ 5600 batches | train loss 0.2979486 +| epoch 9 | 655/ 5600 batches | train loss 0.3389803 +| epoch 9 | 659/ 5600 batches | train loss 0.2887779 +| epoch 9 | 663/ 5600 batches | train loss 0.3932915 +| epoch 9 | 667/ 5600 batches | train loss 0.3256816 +| epoch 9 | 671/ 5600 batches | train loss 0.3199478 +| epoch 9 | 675/ 5600 batches | train loss 0.4118375 +| epoch 9 | 679/ 5600 batches | train loss 0.3320980 +| epoch 9 | 683/ 5600 batches | train loss 0.2792288 +| epoch 9 | 687/ 5600 batches | train loss 0.2866445 +| epoch 9 | 691/ 5600 batches | train loss 0.3666166 +| epoch 9 | 695/ 5600 batches | train loss 0.3040659 +| epoch 9 | 699/ 5600 batches | train loss 0.2929340 +| epoch 9 | 703/ 5600 batches | train loss 0.2827529 +| epoch 9 | 707/ 5600 batches | train loss 0.3013121 +| epoch 9 | 711/ 5600 batches | train loss 0.2918714 +| epoch 9 | 715/ 5600 batches | train loss 0.3148385 +| epoch 9 | 719/ 5600 batches | train loss 0.3223335 +| epoch 9 | 723/ 5600 batches | train loss 0.3451925 +| epoch 9 | 727/ 5600 batches | train loss 0.3104663 +| epoch 9 | 731/ 5600 batches | train loss 0.3511493 +| epoch 9 | 735/ 5600 batches | train loss 0.3513810 +| epoch 9 | 739/ 5600 batches | train loss 0.3420185 +| epoch 9 | 743/ 5600 batches | train loss 0.3008537 +| epoch 9 | 747/ 5600 batches | train loss 0.2724018 +| epoch 9 | 751/ 5600 batches | train loss 0.3563239 +| epoch 9 | 755/ 5600 batches | train loss 0.3218461 +| epoch 9 | 759/ 5600 batches | train loss 0.3679155 +| epoch 9 | 763/ 5600 batches | train loss 0.3357013 +| epoch 9 | 767/ 5600 batches | train loss 0.3864009 +| epoch 9 | 771/ 5600 batches | train loss 0.2871188 +| epoch 9 | 775/ 5600 batches | train loss 0.3202240 +| epoch 9 | 779/ 5600 batches | train loss 0.2734635 +| epoch 9 | 783/ 5600 batches | train loss 0.3473097 +| epoch 9 | 787/ 5600 batches | train loss 0.2742811 +| epoch 9 | 791/ 5600 batches | train loss 0.2945873 +| epoch 9 | 795/ 5600 batches | train loss 0.2448978 +| epoch 9 | 799/ 5600 batches | train loss 0.3030464 +| epoch 9 | 803/ 5600 batches | train loss 0.2570336 +| epoch 9 | 807/ 5600 batches | train loss 0.2940389 +| epoch 9 | 811/ 5600 batches | train loss 0.2938303 +| epoch 9 | 815/ 5600 batches | train loss 0.3371719 +| epoch 9 | 819/ 5600 batches | train loss 0.3198889 +| epoch 9 | 823/ 5600 batches | train loss 0.3057878 +| epoch 9 | 827/ 5600 batches | train loss 0.2841206 +| epoch 9 | 831/ 5600 batches | train loss 0.3352171 +| epoch 9 | 835/ 5600 batches | train loss 0.3638444 +| epoch 9 | 839/ 5600 batches | train loss 0.3402513 +| epoch 9 | 843/ 5600 batches | train loss 0.3458559 +| epoch 9 | 847/ 5600 batches | train loss 0.2239698 +| epoch 9 | 851/ 5600 batches | train loss 0.2976106 +| epoch 9 | 855/ 5600 batches | train loss 0.2839161 +| epoch 9 | 859/ 5600 batches | train loss 0.2862966 +| epoch 9 | 863/ 5600 batches | train loss 0.3133704 +| epoch 9 | 867/ 5600 batches | train loss 0.3820743 +| epoch 9 | 871/ 5600 batches | train loss 0.3068911 +| epoch 9 | 875/ 5600 batches | train loss 0.3637791 +| epoch 9 | 879/ 5600 batches | train loss 0.3152395 +| epoch 9 | 883/ 5600 batches | train loss 0.3196005 +| epoch 9 | 887/ 5600 batches | train loss 0.3025410 +| epoch 9 | 891/ 5600 batches | train loss 0.3495920 +| epoch 9 | 895/ 5600 batches | train loss 0.3767382 +| epoch 9 | 899/ 5600 batches | train loss 0.1968316 +| epoch 9 | 903/ 5600 batches | train loss 0.2704825 +| epoch 9 | 907/ 5600 batches | train loss 0.3290342 +| epoch 9 | 911/ 5600 batches | train loss 0.3543867 +| epoch 9 | 915/ 5600 batches | train loss 0.2913724 +| epoch 9 | 919/ 5600 batches | train loss 0.3818784 +| epoch 9 | 923/ 5600 batches | train loss 0.3549425 +| epoch 9 | 927/ 5600 batches | train loss 0.3049187 +| epoch 9 | 931/ 5600 batches | train loss 0.3854162 +| epoch 9 | 935/ 5600 batches | train loss 0.3720821 +| epoch 9 | 939/ 5600 batches | train loss 0.2717785 +| epoch 9 | 943/ 5600 batches | train loss 0.3467670 +| epoch 9 | 947/ 5600 batches | train loss 0.3301917 +| epoch 9 | 951/ 5600 batches | train loss 0.2576771 +| epoch 9 | 955/ 5600 batches | train loss 0.3044130 +| epoch 9 | 959/ 5600 batches | train loss 0.3149593 +| epoch 9 | 963/ 5600 batches | train loss 0.3480394 +| epoch 9 | 967/ 5600 batches | train loss 0.3769803 +| epoch 9 | 971/ 5600 batches | train loss 0.2991476 +| epoch 9 | 975/ 5600 batches | train loss 0.2875429 +| epoch 9 | 979/ 5600 batches | train loss 0.2833401 +| epoch 9 | 983/ 5600 batches | train loss 0.2928244 +| epoch 9 | 987/ 5600 batches | train loss 0.3864549 +| epoch 9 | 991/ 5600 batches | train loss 0.3299436 +| epoch 9 | 995/ 5600 batches | train loss 0.3532720 +| epoch 9 | 999/ 5600 batches | train loss 0.2690882 +| epoch 9 | 1003/ 5600 batches | train loss 0.3633071 +| epoch 9 | 1007/ 5600 batches | train loss 0.3155398 +| epoch 9 | 1011/ 5600 batches | train loss 0.3027126 +| epoch 9 | 1015/ 5600 batches | train loss 0.3016029 +| epoch 9 | 1019/ 5600 batches | train loss 0.3689988 +| epoch 9 | 1023/ 5600 batches | train loss 0.4065875 +| epoch 9 | 1027/ 5600 batches | train loss 0.3444796 +| epoch 9 | 1031/ 5600 batches | train loss 0.3048804 +| epoch 9 | 1035/ 5600 batches | train loss 0.2538548 +| epoch 9 | 1039/ 5600 batches | train loss 0.3271396 +| epoch 9 | 1043/ 5600 batches | train loss 0.4546384 +| epoch 9 | 1047/ 5600 batches | train loss 0.3511012 +| epoch 9 | 1051/ 5600 batches | train loss 0.3045674 +| epoch 9 | 1055/ 5600 batches | train loss 0.2743705 +| epoch 9 | 1059/ 5600 batches | train loss 0.3414782 +| epoch 9 | 1063/ 5600 batches | train loss 0.4250059 +| epoch 9 | 1067/ 5600 batches | train loss 0.3220565 +| epoch 9 | 1071/ 5600 batches | train loss 0.2985232 +| epoch 9 | 1075/ 5600 batches | train loss 0.2932431 +| epoch 9 | 1079/ 5600 batches | train loss 0.2966466 +| epoch 9 | 1083/ 5600 batches | train loss 0.2737601 +| epoch 9 | 1087/ 5600 batches | train loss 0.3615997 +| epoch 9 | 1091/ 5600 batches | train loss 0.3232655 +| epoch 9 | 1095/ 5600 batches | train loss 0.2836140 +| epoch 9 | 1099/ 5600 batches | train loss 0.3547480 +| epoch 9 | 1103/ 5600 batches | train loss 0.2791709 +| epoch 9 | 1107/ 5600 batches | train loss 0.2984318 +| epoch 9 | 1111/ 5600 batches | train loss 0.3409930 +| epoch 9 | 1115/ 5600 batches | train loss 0.3030198 +| epoch 9 | 1119/ 5600 batches | train loss 0.3250059 +| epoch 9 | 1123/ 5600 batches | train loss 0.3154606 +| epoch 9 | 1127/ 5600 batches | train loss 0.3366941 +| epoch 9 | 1131/ 5600 batches | train loss 0.3266838 +| epoch 9 | 1135/ 5600 batches | train loss 0.2976072 +| epoch 9 | 1139/ 5600 batches | train loss 0.3280196 +| epoch 9 | 1143/ 5600 batches | train loss 0.3810973 +| epoch 9 | 1147/ 5600 batches | train loss 0.3240350 +| epoch 9 | 1151/ 5600 batches | train loss 0.3137412 +| epoch 9 | 1155/ 5600 batches | train loss 0.2959986 +| epoch 9 | 1159/ 5600 batches | train loss 0.3142489 +| epoch 9 | 1163/ 5600 batches | train loss 0.3393006 +| epoch 9 | 1167/ 5600 batches | train loss 0.4510977 +| epoch 9 | 1171/ 5600 batches | train loss 0.3462667 +| epoch 9 | 1175/ 5600 batches | train loss 0.3562336 +| epoch 9 | 1179/ 5600 batches | train loss 0.3532408 +| epoch 9 | 1183/ 5600 batches | train loss 0.2669888 +| epoch 9 | 1187/ 5600 batches | train loss 0.2944959 +| epoch 9 | 1191/ 5600 batches | train loss 0.2914887 +| epoch 9 | 1195/ 5600 batches | train loss 0.2953830 +| epoch 9 | 1199/ 5600 batches | train loss 0.3209534 +| epoch 9 | 1203/ 5600 batches | train loss 0.2937320 +| epoch 9 | 1207/ 5600 batches | train loss 0.3553422 +| epoch 9 | 1211/ 5600 batches | train loss 0.3142883 +| epoch 9 | 1215/ 5600 batches | train loss 0.3493223 +| epoch 9 | 1219/ 5600 batches | train loss 0.3678781 +| epoch 9 | 1223/ 5600 batches | train loss 0.3389414 +| epoch 9 | 1227/ 5600 batches | train loss 0.3155416 +| epoch 9 | 1231/ 5600 batches | train loss 0.2728042 +| epoch 9 | 1235/ 5600 batches | train loss 0.2886572 +| epoch 9 | 1239/ 5600 batches | train loss 0.3141691 +| epoch 9 | 1243/ 5600 batches | train loss 0.3179171 +| epoch 9 | 1247/ 5600 batches | train loss 0.3414201 +| epoch 9 | 1251/ 5600 batches | train loss 0.2801152 +| epoch 9 | 1255/ 5600 batches | train loss 0.3427341 +| epoch 9 | 1259/ 5600 batches | train loss 0.3610694 +| epoch 9 | 1263/ 5600 batches | train loss 0.3519307 +| epoch 9 | 1267/ 5600 batches | train loss 0.3331849 +| epoch 9 | 1271/ 5600 batches | train loss 0.3582763 +| epoch 9 | 1275/ 5600 batches | train loss 0.3322063 +| epoch 9 | 1279/ 5600 batches | train loss 0.3754655 +| epoch 9 | 1283/ 5600 batches | train loss 0.3322281 +| epoch 9 | 1287/ 5600 batches | train loss 0.3095782 +| epoch 9 | 1291/ 5600 batches | train loss 0.2987713 +| epoch 9 | 1295/ 5600 batches | train loss 0.3584269 +| epoch 9 | 1299/ 5600 batches | train loss 0.3078437 +| epoch 9 | 1303/ 5600 batches | train loss 0.3192729 +| epoch 9 | 1307/ 5600 batches | train loss 0.3436902 +| epoch 9 | 1311/ 5600 batches | train loss 0.2671549 +| epoch 9 | 1315/ 5600 batches | train loss 0.3619539 +| epoch 9 | 1319/ 5600 batches | train loss 0.3478267 +| epoch 9 | 1323/ 5600 batches | train loss 0.2941352 +| epoch 9 | 1327/ 5600 batches | train loss 0.3416928 +| epoch 9 | 1331/ 5600 batches | train loss 0.3372578 +| epoch 9 | 1335/ 5600 batches | train loss 0.3588993 +| epoch 9 | 1339/ 5600 batches | train loss 0.3831519 +| epoch 9 | 1343/ 5600 batches | train loss 0.3138674 +| epoch 9 | 1347/ 5600 batches | train loss 0.3243504 +| epoch 9 | 1351/ 5600 batches | train loss 0.3161854 +| epoch 9 | 1355/ 5600 batches | train loss 0.3398878 +| epoch 9 | 1359/ 5600 batches | train loss 0.2939234 +| epoch 9 | 1363/ 5600 batches | train loss 0.3532652 +| epoch 9 | 1367/ 5600 batches | train loss 0.3063837 +| epoch 9 | 1371/ 5600 batches | train loss 0.3086813 +| epoch 9 | 1375/ 5600 batches | train loss 0.3375325 +| epoch 9 | 1379/ 5600 batches | train loss 0.3429393 +| epoch 9 | 1383/ 5600 batches | train loss 0.3018379 +| epoch 9 | 1387/ 5600 batches | train loss 0.3876030 +| epoch 9 | 1391/ 5600 batches | train loss 0.3529511 +| epoch 9 | 1395/ 5600 batches | train loss 0.3051412 +| epoch 9 | 1399/ 5600 batches | train loss 0.3178262 +| epoch 9 | 1403/ 5600 batches | train loss 0.3025773 +| epoch 9 | 1407/ 5600 batches | train loss 0.3159699 +| epoch 9 | 1411/ 5600 batches | train loss 0.2910501 +| epoch 9 | 1415/ 5600 batches | train loss 0.2827661 +| epoch 9 | 1419/ 5600 batches | train loss 0.3131646 +| epoch 9 | 1423/ 5600 batches | train loss 0.2888274 +| epoch 9 | 1427/ 5600 batches | train loss 0.2990172 +| epoch 9 | 1431/ 5600 batches | train loss 0.3486590 +| epoch 9 | 1435/ 5600 batches | train loss 0.3043860 +| epoch 9 | 1439/ 5600 batches | train loss 0.3009662 +| epoch 9 | 1443/ 5600 batches | train loss 0.3354518 +| epoch 9 | 1447/ 5600 batches | train loss 0.3627250 +| epoch 9 | 1451/ 5600 batches | train loss 0.3306391 +| epoch 9 | 1455/ 5600 batches | train loss 0.4346260 +| epoch 9 | 1459/ 5600 batches | train loss 0.2541421 +| epoch 9 | 1463/ 5600 batches | train loss 0.3102112 +| epoch 9 | 1467/ 5600 batches | train loss 0.3024556 +| epoch 9 | 1471/ 5600 batches | train loss 0.2881683 +| epoch 9 | 1475/ 5600 batches | train loss 0.3096967 +| epoch 9 | 1479/ 5600 batches | train loss 0.3138175 +| epoch 9 | 1483/ 5600 batches | train loss 0.3307906 +| epoch 9 | 1487/ 5600 batches | train loss 0.3765906 +| epoch 9 | 1491/ 5600 batches | train loss 0.3660220 +| epoch 9 | 1495/ 5600 batches | train loss 0.2551199 +| epoch 9 | 1499/ 5600 batches | train loss 0.3533293 +| epoch 9 | 1503/ 5600 batches | train loss 0.3254783 +| epoch 9 | 1507/ 5600 batches | train loss 0.3090576 +| epoch 9 | 1511/ 5600 batches | train loss 0.3093320 +| epoch 9 | 1515/ 5600 batches | train loss 0.3344957 +| epoch 9 | 1519/ 5600 batches | train loss 0.3269057 +| epoch 9 | 1523/ 5600 batches | train loss 0.2650664 +| epoch 9 | 1527/ 5600 batches | train loss 0.3706415 +| epoch 9 | 1531/ 5600 batches | train loss 0.3521177 +| epoch 9 | 1535/ 5600 batches | train loss 0.3620042 +| epoch 9 | 1539/ 5600 batches | train loss 0.3560218 +| epoch 9 | 1543/ 5600 batches | train loss 0.3684179 +| epoch 9 | 1547/ 5600 batches | train loss 0.3796582 +| epoch 9 | 1551/ 5600 batches | train loss 0.2800512 +| epoch 9 | 1555/ 5600 batches | train loss 0.3493375 +| epoch 9 | 1559/ 5600 batches | train loss 0.3670663 +| epoch 9 | 1563/ 5600 batches | train loss 0.2817592 +| epoch 9 | 1567/ 5600 batches | train loss 0.3813276 +| epoch 9 | 1571/ 5600 batches | train loss 0.3436809 +| epoch 9 | 1575/ 5600 batches | train loss 0.3458656 +| epoch 9 | 1579/ 5600 batches | train loss 0.2853540 +| epoch 9 | 1583/ 5600 batches | train loss 0.3467058 +| epoch 9 | 1587/ 5600 batches | train loss 0.3121402 +| epoch 9 | 1591/ 5600 batches | train loss 0.2345906 +| epoch 9 | 1595/ 5600 batches | train loss 0.2846365 +| epoch 9 | 1599/ 5600 batches | train loss 0.3041565 +| epoch 9 | 1603/ 5600 batches | train loss 0.3788110 +| epoch 9 | 1607/ 5600 batches | train loss 0.3495009 +| epoch 9 | 1611/ 5600 batches | train loss 0.3518888 +| epoch 9 | 1615/ 5600 batches | train loss 0.1988104 +| epoch 9 | 1619/ 5600 batches | train loss 0.2768461 +| epoch 9 | 1623/ 5600 batches | train loss 0.1315524 +| epoch 9 | 1627/ 5600 batches | train loss 0.3135018 +| epoch 9 | 1631/ 5600 batches | train loss 0.3004191 +| epoch 9 | 1635/ 5600 batches | train loss 0.3288514 +| epoch 9 | 1639/ 5600 batches | train loss 0.3497587 +| epoch 9 | 1643/ 5600 batches | train loss 0.3325537 +| epoch 9 | 1647/ 5600 batches | train loss 0.3782929 +| epoch 9 | 1651/ 5600 batches | train loss 0.2093253 +| epoch 9 | 1655/ 5600 batches | train loss 0.3524166 +| epoch 9 | 1659/ 5600 batches | train loss 0.3501457 +| epoch 9 | 1663/ 5600 batches | train loss 0.3397842 +| epoch 9 | 1667/ 5600 batches | train loss 0.3594096 +| epoch 9 | 1671/ 5600 batches | train loss 0.2834750 +| epoch 9 | 1675/ 5600 batches | train loss 0.2874123 +| epoch 9 | 1679/ 5600 batches | train loss 0.3804836 +| epoch 9 | 1683/ 5600 batches | train loss 0.3290518 +| epoch 9 | 1687/ 5600 batches | train loss 0.3493166 +| epoch 9 | 1691/ 5600 batches | train loss 0.3120740 +| epoch 9 | 1695/ 5600 batches | train loss 0.3030585 +| epoch 9 | 1699/ 5600 batches | train loss 0.3059649 +| epoch 9 | 1703/ 5600 batches | train loss 0.3189443 +| epoch 9 | 1707/ 5600 batches | train loss 0.3534761 +| epoch 9 | 1711/ 5600 batches | train loss 0.2773864 +| epoch 9 | 1715/ 5600 batches | train loss 0.2880497 +| epoch 9 | 1719/ 5600 batches | train loss 0.2767045 +| epoch 9 | 1723/ 5600 batches | train loss 0.3381887 +| epoch 9 | 1727/ 5600 batches | train loss 0.3391928 +| epoch 9 | 1731/ 5600 batches | train loss 0.3385423 +| epoch 9 | 1735/ 5600 batches | train loss 0.3488616 +| epoch 9 | 1739/ 5600 batches | train loss 0.3185178 +| epoch 9 | 1743/ 5600 batches | train loss 0.3455017 +| epoch 9 | 1747/ 5600 batches | train loss 0.2678069 +| epoch 9 | 1751/ 5600 batches | train loss 0.4155274 +| epoch 9 | 1755/ 5600 batches | train loss 0.3798214 +| epoch 9 | 1759/ 5600 batches | train loss 0.3327967 +| epoch 9 | 1763/ 5600 batches | train loss 0.2803564 +| epoch 9 | 1767/ 5600 batches | train loss 0.3098152 +| epoch 9 | 1771/ 5600 batches | train loss 0.3371703 +| epoch 9 | 1775/ 5600 batches | train loss 0.2706151 +| epoch 9 | 1779/ 5600 batches | train loss 0.3783254 +| epoch 9 | 1783/ 5600 batches | train loss 0.3038239 +| epoch 9 | 1787/ 5600 batches | train loss 0.2970695 +| epoch 9 | 1791/ 5600 batches | train loss 0.2994881 +| epoch 9 | 1795/ 5600 batches | train loss 0.2935089 +| epoch 9 | 1799/ 5600 batches | train loss 0.3079864 +| epoch 9 | 1803/ 5600 batches | train loss 0.2808648 +| epoch 9 | 1807/ 5600 batches | train loss 0.3357168 +| epoch 9 | 1811/ 5600 batches | train loss 0.2569528 +| epoch 9 | 1815/ 5600 batches | train loss 0.2871163 +| epoch 9 | 1819/ 5600 batches | train loss 0.4221993 +| epoch 9 | 1823/ 5600 batches | train loss 0.3604267 +| epoch 9 | 1827/ 5600 batches | train loss 0.3132070 +| epoch 9 | 1831/ 5600 batches | train loss 0.3251875 +| epoch 9 | 1835/ 5600 batches | train loss 0.3281431 +| epoch 9 | 1839/ 5600 batches | train loss 0.2993532 +| epoch 9 | 1843/ 5600 batches | train loss 0.3388974 +| epoch 9 | 1847/ 5600 batches | train loss 0.3616238 +| epoch 9 | 1851/ 5600 batches | train loss 0.3826268 +| epoch 9 | 1855/ 5600 batches | train loss 0.3828478 +| epoch 9 | 1859/ 5600 batches | train loss 0.3434340 +| epoch 9 | 1863/ 5600 batches | train loss 0.1212730 +| epoch 9 | 1867/ 5600 batches | train loss 0.3261423 +| epoch 9 | 1871/ 5600 batches | train loss 0.3100168 +| epoch 9 | 1875/ 5600 batches | train loss 0.3669432 +| epoch 9 | 1879/ 5600 batches | train loss 0.3376651 +| epoch 9 | 1883/ 5600 batches | train loss 0.3324943 +| epoch 9 | 1887/ 5600 batches | train loss 0.2860563 +| epoch 9 | 1891/ 5600 batches | train loss 0.3588785 +| epoch 9 | 1895/ 5600 batches | train loss 0.3707109 +| epoch 9 | 1899/ 5600 batches | train loss 0.3459298 +| epoch 9 | 1903/ 5600 batches | train loss 0.2755008 +| epoch 9 | 1907/ 5600 batches | train loss 0.3306165 +| epoch 9 | 1911/ 5600 batches | train loss 0.3167152 +| epoch 9 | 1915/ 5600 batches | train loss 0.3081672 +| epoch 9 | 1919/ 5600 batches | train loss 0.3267494 +| epoch 9 | 1923/ 5600 batches | train loss 0.3120219 +| epoch 9 | 1927/ 5600 batches | train loss 0.3572531 +| epoch 9 | 1931/ 5600 batches | train loss 0.4002879 +| epoch 9 | 1935/ 5600 batches | train loss 0.3764484 +| epoch 9 | 1939/ 5600 batches | train loss 0.3650812 +| epoch 9 | 1943/ 5600 batches | train loss 0.2957951 +| epoch 9 | 1947/ 5600 batches | train loss 0.3000349 +| epoch 9 | 1951/ 5600 batches | train loss 0.3113640 +| epoch 9 | 1955/ 5600 batches | train loss 0.3255199 +| epoch 9 | 1959/ 5600 batches | train loss 0.3039668 +| epoch 9 | 1963/ 5600 batches | train loss 0.3188964 +| epoch 9 | 1967/ 5600 batches | train loss 0.2923089 +| epoch 9 | 1971/ 5600 batches | train loss 0.3368305 +| epoch 9 | 1975/ 5600 batches | train loss 0.4044273 +| epoch 9 | 1979/ 5600 batches | train loss 0.3576229 +| epoch 9 | 1983/ 5600 batches | train loss 0.2666937 +| epoch 9 | 1987/ 5600 batches | train loss 0.3405904 +| epoch 9 | 1991/ 5600 batches | train loss 0.3156333 +| epoch 9 | 1995/ 5600 batches | train loss 0.3506510 +| epoch 9 | 1999/ 5600 batches | train loss 0.3512698 +| epoch 9 | 2003/ 5600 batches | train loss 0.3561626 +| epoch 9 | 2007/ 5600 batches | train loss 0.3035457 +| epoch 9 | 2011/ 5600 batches | train loss 0.2791997 +| epoch 9 | 2015/ 5600 batches | train loss 0.2983647 +| epoch 9 | 2019/ 5600 batches | train loss 0.3205884 +| epoch 9 | 2023/ 5600 batches | train loss 0.3292676 +| epoch 9 | 2027/ 5600 batches | train loss 0.3499308 +| epoch 9 | 2031/ 5600 batches | train loss 0.2986674 +| epoch 9 | 2035/ 5600 batches | train loss 0.3002340 +| epoch 9 | 2039/ 5600 batches | train loss 0.3266706 +| epoch 9 | 2043/ 5600 batches | train loss 0.3327226 +| epoch 9 | 2047/ 5600 batches | train loss 0.3451597 +| epoch 9 | 2051/ 5600 batches | train loss 0.2669722 +| epoch 9 | 2055/ 5600 batches | train loss 0.4268912 +| epoch 9 | 2059/ 5600 batches | train loss 0.3293576 +| epoch 9 | 2063/ 5600 batches | train loss 0.2998044 +| epoch 9 | 2067/ 5600 batches | train loss 0.3666211 +| epoch 9 | 2071/ 5600 batches | train loss 0.2857266 +| epoch 9 | 2075/ 5600 batches | train loss 0.3108551 +| epoch 9 | 2079/ 5600 batches | train loss 0.4108582 +| epoch 9 | 2083/ 5600 batches | train loss 0.3237097 +| epoch 9 | 2087/ 5600 batches | train loss 0.3148007 +| epoch 9 | 2091/ 5600 batches | train loss 0.2739584 +| epoch 9 | 2095/ 5600 batches | train loss 0.3440971 +| epoch 9 | 2099/ 5600 batches | train loss 0.2922480 +| epoch 9 | 2103/ 5600 batches | train loss 0.2015371 +| epoch 9 | 2107/ 5600 batches | train loss 0.2125700 +| epoch 9 | 2111/ 5600 batches | train loss 0.4300115 +| epoch 9 | 2115/ 5600 batches | train loss 0.3780490 +| epoch 9 | 2119/ 5600 batches | train loss 0.2899592 +| epoch 9 | 2123/ 5600 batches | train loss 0.3564112 +| epoch 9 | 2127/ 5600 batches | train loss 0.3598121 +| epoch 9 | 2131/ 5600 batches | train loss 0.3430077 +| epoch 9 | 2135/ 5600 batches | train loss 0.3327355 +| epoch 9 | 2139/ 5600 batches | train loss 0.3441357 +| epoch 9 | 2143/ 5600 batches | train loss 0.3385275 +| epoch 9 | 2147/ 5600 batches | train loss 0.3117687 +| epoch 9 | 2151/ 5600 batches | train loss 0.3097364 +| epoch 9 | 2155/ 5600 batches | train loss 0.3619967 +| epoch 9 | 2159/ 5600 batches | train loss 0.3470102 +| epoch 9 | 2163/ 5600 batches | train loss 0.3077668 +| epoch 9 | 2167/ 5600 batches | train loss 0.3467935 +| epoch 9 | 2171/ 5600 batches | train loss 0.2917215 +| epoch 9 | 2175/ 5600 batches | train loss 0.2886207 +| epoch 9 | 2179/ 5600 batches | train loss 0.3256977 +| epoch 9 | 2183/ 5600 batches | train loss 0.3187882 +| epoch 9 | 2187/ 5600 batches | train loss 0.3398994 +| epoch 9 | 2191/ 5600 batches | train loss 0.3330989 +| epoch 9 | 2195/ 5600 batches | train loss 0.3701692 +| epoch 9 | 2199/ 5600 batches | train loss 0.3828028 +| epoch 9 | 2203/ 5600 batches | train loss 0.3148080 +| epoch 9 | 2207/ 5600 batches | train loss 0.3485246 +| epoch 9 | 2211/ 5600 batches | train loss 0.2858042 +| epoch 9 | 2215/ 5600 batches | train loss 0.3761954 +| epoch 9 | 2219/ 5600 batches | train loss 0.3551480 +| epoch 9 | 2223/ 5600 batches | train loss 0.3545835 +| epoch 9 | 2227/ 5600 batches | train loss 0.2870193 +| epoch 9 | 2231/ 5600 batches | train loss 0.3797397 +| epoch 9 | 2235/ 5600 batches | train loss 0.3356714 +| epoch 9 | 2239/ 5600 batches | train loss 0.3875178 +| epoch 9 | 2243/ 5600 batches | train loss 0.2962725 +| epoch 9 | 2247/ 5600 batches | train loss 0.3543327 +| epoch 9 | 2251/ 5600 batches | train loss 0.2823483 +| epoch 9 | 2255/ 5600 batches | train loss 0.3716500 +| epoch 9 | 2259/ 5600 batches | train loss 0.4021669 +| epoch 9 | 2263/ 5600 batches | train loss 0.3761887 +| epoch 9 | 2267/ 5600 batches | train loss 0.3222798 +| epoch 9 | 2271/ 5600 batches | train loss 0.3831311 +| epoch 9 | 2275/ 5600 batches | train loss 0.3005726 +| epoch 9 | 2279/ 5600 batches | train loss 0.3156550 +| epoch 9 | 2283/ 5600 batches | train loss 0.4142488 +| epoch 9 | 2287/ 5600 batches | train loss 0.3546899 +| epoch 9 | 2291/ 5600 batches | train loss 0.4239038 +| epoch 9 | 2295/ 5600 batches | train loss 0.2847771 +| epoch 9 | 2299/ 5600 batches | train loss 0.3634731 +| epoch 9 | 2303/ 5600 batches | train loss 0.3463818 +| epoch 9 | 2307/ 5600 batches | train loss 0.3026763 +| epoch 9 | 2311/ 5600 batches | train loss 0.3150249 +| epoch 9 | 2315/ 5600 batches | train loss 0.3611204 +| epoch 9 | 2319/ 5600 batches | train loss 0.3836036 +| epoch 9 | 2323/ 5600 batches | train loss 0.3104803 +| epoch 9 | 2327/ 5600 batches | train loss 0.3410807 +| epoch 9 | 2331/ 5600 batches | train loss 0.3181572 +| epoch 9 | 2335/ 5600 batches | train loss 0.3734416 +| epoch 9 | 2339/ 5600 batches | train loss 0.2782107 +| epoch 9 | 2343/ 5600 batches | train loss 0.3314481 +| epoch 9 | 2347/ 5600 batches | train loss 0.3609470 +| epoch 9 | 2351/ 5600 batches | train loss 0.3513271 +| epoch 9 | 2355/ 5600 batches | train loss 0.3209839 +| epoch 9 | 2359/ 5600 batches | train loss 0.2871468 +| epoch 9 | 2363/ 5600 batches | train loss 0.2243347 +| epoch 9 | 2367/ 5600 batches | train loss 0.3911344 +| epoch 9 | 2371/ 5600 batches | train loss 0.3312451 +| epoch 9 | 2375/ 5600 batches | train loss 0.3448865 +| epoch 9 | 2379/ 5600 batches | train loss 0.2930061 +| epoch 9 | 2383/ 5600 batches | train loss 0.3565534 +| epoch 9 | 2387/ 5600 batches | train loss 0.3735789 +| epoch 9 | 2391/ 5600 batches | train loss 0.3064306 +| epoch 9 | 2395/ 5600 batches | train loss 0.3617215 +| epoch 9 | 2399/ 5600 batches | train loss 0.2825661 +| epoch 9 | 2403/ 5600 batches | train loss 0.3851060 +| epoch 9 | 2407/ 5600 batches | train loss 0.2883430 +| epoch 9 | 2411/ 5600 batches | train loss 0.3421208 +| epoch 9 | 2415/ 5600 batches | train loss 0.3047441 +| epoch 9 | 2419/ 5600 batches | train loss 0.3243250 +| epoch 9 | 2423/ 5600 batches | train loss 0.3047935 +| epoch 9 | 2427/ 5600 batches | train loss 0.2812810 +| epoch 9 | 2431/ 5600 batches | train loss 0.2271398 +| epoch 9 | 2435/ 5600 batches | train loss 0.2789391 +| epoch 9 | 2439/ 5600 batches | train loss 0.2903920 +| epoch 9 | 2443/ 5600 batches | train loss 0.2255776 +| epoch 9 | 2447/ 5600 batches | train loss 0.2521108 +| epoch 9 | 2451/ 5600 batches | train loss 0.3259942 +| epoch 9 | 2455/ 5600 batches | train loss 0.4020195 +| epoch 9 | 2459/ 5600 batches | train loss 0.3568041 +| epoch 9 | 2463/ 5600 batches | train loss 0.3682039 +| epoch 9 | 2467/ 5600 batches | train loss 0.3633599 +| epoch 9 | 2471/ 5600 batches | train loss 0.3706679 +| epoch 9 | 2475/ 5600 batches | train loss 0.2896433 +| epoch 9 | 2479/ 5600 batches | train loss 0.3381807 +| epoch 9 | 2483/ 5600 batches | train loss 0.3387662 +| epoch 9 | 2487/ 5600 batches | train loss 0.3380497 +| epoch 9 | 2491/ 5600 batches | train loss 0.2628399 +| epoch 9 | 2495/ 5600 batches | train loss 0.2964748 +| epoch 9 | 2499/ 5600 batches | train loss 0.3609758 +| epoch 9 | 2503/ 5600 batches | train loss 0.3134420 +| epoch 9 | 2507/ 5600 batches | train loss 0.3121718 +| epoch 9 | 2511/ 5600 batches | train loss 0.2677107 +| epoch 9 | 2515/ 5600 batches | train loss 0.3016092 +| epoch 9 | 2519/ 5600 batches | train loss 0.4249183 +| epoch 9 | 2523/ 5600 batches | train loss 0.3051020 +| epoch 9 | 2527/ 5600 batches | train loss 0.2976694 +| epoch 9 | 2531/ 5600 batches | train loss 0.3226926 +| epoch 9 | 2535/ 5600 batches | train loss 0.3669642 +| epoch 9 | 2539/ 5600 batches | train loss 0.4222284 +| epoch 9 | 2543/ 5600 batches | train loss 0.3968765 +| epoch 9 | 2547/ 5600 batches | train loss 0.3983710 +| epoch 9 | 2551/ 5600 batches | train loss 0.3457198 +| epoch 9 | 2555/ 5600 batches | train loss 0.3389138 +| epoch 9 | 2559/ 5600 batches | train loss 0.3387328 +| epoch 9 | 2563/ 5600 batches | train loss 0.3477766 +| epoch 9 | 2567/ 5600 batches | train loss 0.3204333 +| epoch 9 | 2571/ 5600 batches | train loss 0.3416800 +| epoch 9 | 2575/ 5600 batches | train loss 0.2715188 +| epoch 9 | 2579/ 5600 batches | train loss 0.3218521 +| epoch 9 | 2583/ 5600 batches | train loss 0.2897281 +| epoch 9 | 2587/ 5600 batches | train loss 0.3734349 +| epoch 9 | 2591/ 5600 batches | train loss 0.3224354 +| epoch 9 | 2595/ 5600 batches | train loss 0.3616454 +| epoch 9 | 2599/ 5600 batches | train loss 0.3855026 +| epoch 9 | 2603/ 5600 batches | train loss 0.3147061 +| epoch 9 | 2607/ 5600 batches | train loss 0.3491151 +| epoch 9 | 2611/ 5600 batches | train loss 0.3566896 +| epoch 9 | 2615/ 5600 batches | train loss 0.3597835 +| epoch 9 | 2619/ 5600 batches | train loss 0.2766804 +| epoch 9 | 2623/ 5600 batches | train loss 0.2990104 +| epoch 9 | 2627/ 5600 batches | train loss 0.2856198 +| epoch 9 | 2631/ 5600 batches | train loss 0.3309915 +| epoch 9 | 2635/ 5600 batches | train loss 0.3023023 +| epoch 9 | 2639/ 5600 batches | train loss 0.3341531 +| epoch 9 | 2643/ 5600 batches | train loss 0.2899700 +| epoch 9 | 2647/ 5600 batches | train loss 0.3857945 +| epoch 9 | 2651/ 5600 batches | train loss 0.2619346 +| epoch 9 | 2655/ 5600 batches | train loss 0.3349115 +| epoch 9 | 2659/ 5600 batches | train loss 0.3036161 +| epoch 9 | 2663/ 5600 batches | train loss 0.2693498 +| epoch 9 | 2667/ 5600 batches | train loss 0.3061653 +| epoch 9 | 2671/ 5600 batches | train loss 0.3667143 +| epoch 9 | 2675/ 5600 batches | train loss 0.2947219 +| epoch 9 | 2679/ 5600 batches | train loss 0.3981021 +| epoch 9 | 2683/ 5600 batches | train loss 0.3479650 +| epoch 9 | 2687/ 5600 batches | train loss 0.3906662 +| epoch 9 | 2691/ 5600 batches | train loss 0.3053505 +| epoch 9 | 2695/ 5600 batches | train loss 0.3553010 +| epoch 9 | 2699/ 5600 batches | train loss 0.3569977 +| epoch 9 | 2703/ 5600 batches | train loss 0.3074421 +| epoch 9 | 2707/ 5600 batches | train loss 0.4086628 +| epoch 9 | 2711/ 5600 batches | train loss 0.3109680 +| epoch 9 | 2715/ 5600 batches | train loss 0.3051893 +| epoch 9 | 2719/ 5600 batches | train loss 0.4308295 +| epoch 9 | 2723/ 5600 batches | train loss 0.3233649 +| epoch 9 | 2727/ 5600 batches | train loss 0.3301926 +| epoch 9 | 2731/ 5600 batches | train loss 0.3647480 +| epoch 9 | 2735/ 5600 batches | train loss 0.4034983 +| epoch 9 | 2739/ 5600 batches | train loss 0.3221387 +| epoch 9 | 2743/ 5600 batches | train loss 0.4547183 +| epoch 9 | 2747/ 5600 batches | train loss 0.3448085 +| epoch 9 | 2751/ 5600 batches | train loss 0.3032429 +| epoch 9 | 2755/ 5600 batches | train loss 0.3259704 +| epoch 9 | 2759/ 5600 batches | train loss 0.2669554 +| epoch 9 | 2763/ 5600 batches | train loss 0.3199467 +| epoch 9 | 2767/ 5600 batches | train loss 0.2964135 +| epoch 9 | 2771/ 5600 batches | train loss 0.3727386 +| epoch 9 | 2775/ 5600 batches | train loss 0.3236814 +| epoch 9 | 2779/ 5600 batches | train loss 0.3412409 +| epoch 9 | 2783/ 5600 batches | train loss 0.3895520 +| epoch 9 | 2787/ 5600 batches | train loss 0.3520893 +| epoch 9 | 2791/ 5600 batches | train loss 0.3587624 +| epoch 9 | 2795/ 5600 batches | train loss 0.3644806 +| epoch 9 | 2799/ 5600 batches | train loss 0.3039767 +| epoch 9 | 2803/ 5600 batches | train loss 0.3197315 +| epoch 9 | 2807/ 5600 batches | train loss 0.2428093 +| epoch 9 | 2811/ 5600 batches | train loss 0.3144313 +| epoch 9 | 2815/ 5600 batches | train loss 0.3231686 +| epoch 9 | 2819/ 5600 batches | train loss 0.3265153 +| epoch 9 | 2823/ 5600 batches | train loss 0.3463747 +| epoch 9 | 2827/ 5600 batches | train loss 0.3255771 +| epoch 9 | 2831/ 5600 batches | train loss 0.3970120 +| epoch 9 | 2835/ 5600 batches | train loss 0.3510712 +| epoch 9 | 2839/ 5600 batches | train loss 0.3283331 +| epoch 9 | 2843/ 5600 batches | train loss 0.3498718 +| epoch 9 | 2847/ 5600 batches | train loss 0.3555770 +| epoch 9 | 2851/ 5600 batches | train loss 0.2881464 +| epoch 9 | 2855/ 5600 batches | train loss 0.3772974 +| epoch 9 | 2859/ 5600 batches | train loss 0.3512183 +| epoch 9 | 2863/ 5600 batches | train loss 0.3142726 +| epoch 9 | 2867/ 5600 batches | train loss 0.2528169 +| epoch 9 | 2871/ 5600 batches | train loss 0.3479534 +| epoch 9 | 2875/ 5600 batches | train loss 0.3013876 +| epoch 9 | 2879/ 5600 batches | train loss 0.3605343 +| epoch 9 | 2883/ 5600 batches | train loss 0.3679743 +| epoch 9 | 2887/ 5600 batches | train loss 0.2998605 +| epoch 9 | 2891/ 5600 batches | train loss 0.3604092 +| epoch 9 | 2895/ 5600 batches | train loss 0.2911650 +| epoch 9 | 2899/ 5600 batches | train loss 0.3289759 +| epoch 9 | 2903/ 5600 batches | train loss 0.3498136 +| epoch 9 | 2907/ 5600 batches | train loss 0.3023515 +| epoch 9 | 2911/ 5600 batches | train loss 0.2283284 +| epoch 9 | 2915/ 5600 batches | train loss 0.3453681 +| epoch 9 | 2919/ 5600 batches | train loss 0.3359903 +| epoch 9 | 2923/ 5600 batches | train loss 0.3556746 +| epoch 9 | 2927/ 5600 batches | train loss 0.3316397 +| epoch 9 | 2931/ 5600 batches | train loss 0.3812804 +| epoch 9 | 2935/ 5600 batches | train loss 0.2830154 +| epoch 9 | 2939/ 5600 batches | train loss 0.3473583 +| epoch 9 | 2943/ 5600 batches | train loss 0.3246308 +| epoch 9 | 2947/ 5600 batches | train loss 0.3252536 +| epoch 9 | 2951/ 5600 batches | train loss 0.4191951 +| epoch 9 | 2955/ 5600 batches | train loss 0.3102604 +| epoch 9 | 2959/ 5600 batches | train loss 0.3289648 +| epoch 9 | 2963/ 5600 batches | train loss 0.2916816 +| epoch 9 | 2967/ 5600 batches | train loss 0.2962937 +| epoch 9 | 2971/ 5600 batches | train loss 0.3995720 +| epoch 9 | 2975/ 5600 batches | train loss 0.2996801 +| epoch 9 | 2979/ 5600 batches | train loss 0.3299195 +| epoch 9 | 2983/ 5600 batches | train loss 0.3500234 +| epoch 9 | 2987/ 5600 batches | train loss 0.3024992 +| epoch 9 | 2991/ 5600 batches | train loss 0.3764867 +| epoch 9 | 2995/ 5600 batches | train loss 0.3536707 +| epoch 9 | 2999/ 5600 batches | train loss 0.3161893 +| epoch 9 | 3003/ 5600 batches | train loss 0.2948624 +| epoch 9 | 3007/ 5600 batches | train loss 0.3058361 +| epoch 9 | 3011/ 5600 batches | train loss 0.3318023 +| epoch 9 | 3015/ 5600 batches | train loss 0.3203090 +| epoch 9 | 3019/ 5600 batches | train loss 0.2766230 +| epoch 9 | 3023/ 5600 batches | train loss 0.2949856 +| epoch 9 | 3027/ 5600 batches | train loss 0.3394767 +| epoch 9 | 3031/ 5600 batches | train loss 0.3265748 +| epoch 9 | 3035/ 5600 batches | train loss 0.3108493 +| epoch 9 | 3039/ 5600 batches | train loss 0.3229325 +| epoch 9 | 3043/ 5600 batches | train loss 0.3467481 +| epoch 9 | 3047/ 5600 batches | train loss 0.3016417 +| epoch 9 | 3051/ 5600 batches | train loss 0.3331338 +| epoch 9 | 3055/ 5600 batches | train loss 0.3435110 +| epoch 9 | 3059/ 5600 batches | train loss 0.3249735 +| epoch 9 | 3063/ 5600 batches | train loss 0.3069448 +| epoch 9 | 3067/ 5600 batches | train loss 0.3284013 +| epoch 9 | 3071/ 5600 batches | train loss 0.3485045 +| epoch 9 | 3075/ 5600 batches | train loss 0.3618137 +| epoch 9 | 3079/ 5600 batches | train loss 0.3732206 +| epoch 9 | 3083/ 5600 batches | train loss 0.3601348 +| epoch 9 | 3087/ 5600 batches | train loss 0.2943171 +| epoch 9 | 3091/ 5600 batches | train loss 0.3223400 +| epoch 9 | 3095/ 5600 batches | train loss 0.3883628 +| epoch 9 | 3099/ 5600 batches | train loss 0.3266716 +| epoch 9 | 3103/ 5600 batches | train loss 0.2164381 +| epoch 9 | 3107/ 5600 batches | train loss 0.3902771 +| epoch 9 | 3111/ 5600 batches | train loss 0.3303488 +| epoch 9 | 3115/ 5600 batches | train loss 0.3075449 +| epoch 9 | 3119/ 5600 batches | train loss 0.3358309 +| epoch 9 | 3123/ 5600 batches | train loss 0.3326752 +| epoch 9 | 3127/ 5600 batches | train loss 0.3408799 +| epoch 9 | 3131/ 5600 batches | train loss 0.3505549 +| epoch 9 | 3135/ 5600 batches | train loss 0.3055322 +| epoch 9 | 3139/ 5600 batches | train loss 0.2957137 +| epoch 9 | 3143/ 5600 batches | train loss 0.3165540 +| epoch 9 | 3147/ 5600 batches | train loss 0.3258956 +| epoch 9 | 3151/ 5600 batches | train loss 0.3557937 +| epoch 9 | 3155/ 5600 batches | train loss 0.3204941 +| epoch 9 | 3159/ 5600 batches | train loss 0.3334832 +| epoch 9 | 3163/ 5600 batches | train loss 0.3462703 +| epoch 9 | 3167/ 5600 batches | train loss 0.3580607 +| epoch 9 | 3171/ 5600 batches | train loss 0.3523631 +| epoch 9 | 3175/ 5600 batches | train loss 0.2955204 +| epoch 9 | 3179/ 5600 batches | train loss 0.2846467 +| epoch 9 | 3183/ 5600 batches | train loss 0.2955249 +| epoch 9 | 3187/ 5600 batches | train loss 0.4023671 +| epoch 9 | 3191/ 5600 batches | train loss 0.3104836 +| epoch 9 | 3195/ 5600 batches | train loss 0.3118837 +| epoch 9 | 3199/ 5600 batches | train loss 0.2848345 +| epoch 9 | 3203/ 5600 batches | train loss 0.3024964 +| epoch 9 | 3207/ 5600 batches | train loss 0.3307720 +| epoch 9 | 3211/ 5600 batches | train loss 0.3359566 +| epoch 9 | 3215/ 5600 batches | train loss 0.3819272 +| epoch 9 | 3219/ 5600 batches | train loss 0.3779661 +| epoch 9 | 3223/ 5600 batches | train loss 0.3088683 +| epoch 9 | 3227/ 5600 batches | train loss 0.2016048 +| epoch 9 | 3231/ 5600 batches | train loss 0.3228936 +| epoch 9 | 3235/ 5600 batches | train loss 0.2120472 +| epoch 9 | 3239/ 5600 batches | train loss 0.3390135 +| epoch 9 | 3243/ 5600 batches | train loss 0.4236031 +| epoch 9 | 3247/ 5600 batches | train loss 0.2914422 +| epoch 9 | 3251/ 5600 batches | train loss 0.3358555 +| epoch 9 | 3255/ 5600 batches | train loss 0.3213844 +| epoch 9 | 3259/ 5600 batches | train loss 0.3282295 +| epoch 9 | 3263/ 5600 batches | train loss 0.2947818 +| epoch 9 | 3267/ 5600 batches | train loss 0.3111744 +| epoch 9 | 3271/ 5600 batches | train loss 0.3500712 +| epoch 9 | 3275/ 5600 batches | train loss 0.3951646 +| epoch 9 | 3279/ 5600 batches | train loss 0.3919164 +| epoch 9 | 3283/ 5600 batches | train loss 0.3683479 +| epoch 9 | 3287/ 5600 batches | train loss 0.2949663 +| epoch 9 | 3291/ 5600 batches | train loss 0.3430558 +| epoch 9 | 3295/ 5600 batches | train loss 0.3098714 +| epoch 9 | 3299/ 5600 batches | train loss 0.2709913 +| epoch 9 | 3303/ 5600 batches | train loss 0.3175011 +| epoch 9 | 3307/ 5600 batches | train loss 0.3052333 +| epoch 9 | 3311/ 5600 batches | train loss 0.3002281 +| epoch 9 | 3315/ 5600 batches | train loss 0.2686329 +| epoch 9 | 3319/ 5600 batches | train loss 0.3600798 +| epoch 9 | 3323/ 5600 batches | train loss 0.3297856 +| epoch 9 | 3327/ 5600 batches | train loss 0.3266592 +| epoch 9 | 3331/ 5600 batches | train loss 0.3234272 +| epoch 9 | 3335/ 5600 batches | train loss 0.3194685 +| epoch 9 | 3339/ 5600 batches | train loss 0.3187897 +| epoch 9 | 3343/ 5600 batches | train loss 0.2977017 +| epoch 9 | 3347/ 5600 batches | train loss 0.3404546 +| epoch 9 | 3351/ 5600 batches | train loss 0.3482590 +| epoch 9 | 3355/ 5600 batches | train loss 0.3207871 +| epoch 9 | 3359/ 5600 batches | train loss 0.3284355 +| epoch 9 | 3363/ 5600 batches | train loss 0.3212059 +| epoch 9 | 3367/ 5600 batches | train loss 0.4188711 +| epoch 9 | 3371/ 5600 batches | train loss 0.3043289 +| epoch 9 | 3375/ 5600 batches | train loss 0.3718771 +| epoch 9 | 3379/ 5600 batches | train loss 0.2972998 +| epoch 9 | 3383/ 5600 batches | train loss 0.3520865 +| epoch 9 | 3387/ 5600 batches | train loss 0.3295074 +| epoch 9 | 3391/ 5600 batches | train loss 0.2952595 +| epoch 9 | 3395/ 5600 batches | train loss 0.3680271 +| epoch 9 | 3399/ 5600 batches | train loss 0.3039085 +| epoch 9 | 3403/ 5600 batches | train loss 0.3120912 +| epoch 9 | 3407/ 5600 batches | train loss 0.3908212 +| epoch 9 | 3411/ 5600 batches | train loss 0.2969066 +| epoch 9 | 3415/ 5600 batches | train loss 0.3773415 +| epoch 9 | 3419/ 5600 batches | train loss 0.3341067 +| epoch 9 | 3423/ 5600 batches | train loss 0.3259876 +| epoch 9 | 3427/ 5600 batches | train loss 0.3128340 +| epoch 9 | 3431/ 5600 batches | train loss 0.2836716 +| epoch 9 | 3435/ 5600 batches | train loss 0.3816442 +| epoch 9 | 3439/ 5600 batches | train loss 0.2869917 +| epoch 9 | 3443/ 5600 batches | train loss 0.3534082 +| epoch 9 | 3447/ 5600 batches | train loss 0.4054446 +| epoch 9 | 3451/ 5600 batches | train loss 0.3474324 +| epoch 9 | 3455/ 5600 batches | train loss 0.4071586 +| epoch 9 | 3459/ 5600 batches | train loss 0.3138171 +| epoch 9 | 3463/ 5600 batches | train loss 0.4306966 +| epoch 9 | 3467/ 5600 batches | train loss 0.3066659 +| epoch 9 | 3471/ 5600 batches | train loss 0.3186942 +| epoch 9 | 3475/ 5600 batches | train loss 0.3239553 +| epoch 9 | 3479/ 5600 batches | train loss 0.3326752 +| epoch 9 | 3483/ 5600 batches | train loss 0.3432155 +| epoch 9 | 3487/ 5600 batches | train loss 0.3941549 +| epoch 9 | 3491/ 5600 batches | train loss 0.4667031 +| epoch 9 | 3495/ 5600 batches | train loss 0.3157648 +| epoch 9 | 3499/ 5600 batches | train loss 0.3567935 +| epoch 9 | 3503/ 5600 batches | train loss 0.3067665 +| epoch 9 | 3507/ 5600 batches | train loss 0.3419068 +| epoch 9 | 3511/ 5600 batches | train loss 0.3486569 +| epoch 9 | 3515/ 5600 batches | train loss 0.3233769 +| epoch 9 | 3519/ 5600 batches | train loss 0.3321324 +| epoch 9 | 3523/ 5600 batches | train loss 0.3138723 +| epoch 9 | 3527/ 5600 batches | train loss 0.3435111 +| epoch 9 | 3531/ 5600 batches | train loss 0.3286329 +| epoch 9 | 3535/ 5600 batches | train loss 0.4210521 +| epoch 9 | 3539/ 5600 batches | train loss 0.3974243 +| epoch 9 | 3543/ 5600 batches | train loss 0.3702298 +| epoch 9 | 3547/ 5600 batches | train loss 0.2595677 +| epoch 9 | 3551/ 5600 batches | train loss 0.3146636 +| epoch 9 | 3555/ 5600 batches | train loss 0.3830172 +| epoch 9 | 3559/ 5600 batches | train loss 0.3258537 +| epoch 9 | 3563/ 5600 batches | train loss 0.3774777 +| epoch 9 | 3567/ 5600 batches | train loss 0.3087561 +| epoch 9 | 3571/ 5600 batches | train loss 0.2857806 +| epoch 9 | 3575/ 5600 batches | train loss 0.2734258 +| epoch 9 | 3579/ 5600 batches | train loss 0.2888203 +| epoch 9 | 3583/ 5600 batches | train loss 0.3718317 +| epoch 9 | 3587/ 5600 batches | train loss 0.3510194 +| epoch 9 | 3591/ 5600 batches | train loss 0.3397564 +| epoch 9 | 3595/ 5600 batches | train loss 0.3028871 +| epoch 9 | 3599/ 5600 batches | train loss 0.3488344 +| epoch 9 | 3603/ 5600 batches | train loss 0.3538756 +| epoch 9 | 3607/ 5600 batches | train loss 0.3010153 +| epoch 9 | 3611/ 5600 batches | train loss 0.2727295 +| epoch 9 | 3615/ 5600 batches | train loss 0.3297522 +| epoch 9 | 3619/ 5600 batches | train loss 0.3086512 +| epoch 9 | 3623/ 5600 batches | train loss 0.3059818 +| epoch 9 | 3627/ 5600 batches | train loss 0.4047391 +| epoch 9 | 3631/ 5600 batches | train loss 0.3417720 +| epoch 9 | 3635/ 5600 batches | train loss 0.3363605 +| epoch 9 | 3639/ 5600 batches | train loss 0.3287390 +| epoch 9 | 3643/ 5600 batches | train loss 0.3240627 +| epoch 9 | 3647/ 5600 batches | train loss 0.3412554 +| epoch 9 | 3651/ 5600 batches | train loss 0.3132873 +| epoch 9 | 3655/ 5600 batches | train loss 0.3329066 +| epoch 9 | 3659/ 5600 batches | train loss 0.3047917 +| epoch 9 | 3663/ 5600 batches | train loss 0.3669673 +| epoch 9 | 3667/ 5600 batches | train loss 0.3364867 +| epoch 9 | 3671/ 5600 batches | train loss 0.3370683 +| epoch 9 | 3675/ 5600 batches | train loss 0.3853757 +| epoch 9 | 3679/ 5600 batches | train loss 0.3542864 +| epoch 9 | 3683/ 5600 batches | train loss 0.3454396 +| epoch 9 | 3687/ 5600 batches | train loss 0.2957543 +| epoch 9 | 3691/ 5600 batches | train loss 0.3626733 +| epoch 9 | 3695/ 5600 batches | train loss 0.3392436 +| epoch 9 | 3699/ 5600 batches | train loss 0.3917693 +| epoch 9 | 3703/ 5600 batches | train loss 0.3241791 +| epoch 9 | 3707/ 5600 batches | train loss 0.2760090 +| epoch 9 | 3711/ 5600 batches | train loss 0.3314572 +| epoch 9 | 3715/ 5600 batches | train loss 0.3338669 +| epoch 9 | 3719/ 5600 batches | train loss 0.3395089 +| epoch 9 | 3723/ 5600 batches | train loss 0.2166020 +| epoch 9 | 3727/ 5600 batches | train loss 0.3775554 +| epoch 9 | 3731/ 5600 batches | train loss 0.3627342 +| epoch 9 | 3735/ 5600 batches | train loss 0.3165976 +| epoch 9 | 3739/ 5600 batches | train loss 0.3440972 +| epoch 9 | 3743/ 5600 batches | train loss 0.2791128 +| epoch 9 | 3747/ 5600 batches | train loss 0.3389126 +| epoch 9 | 3751/ 5600 batches | train loss 0.3657742 +| epoch 9 | 3755/ 5600 batches | train loss 0.3281523 +| epoch 9 | 3759/ 5600 batches | train loss 0.3772044 +| epoch 9 | 3763/ 5600 batches | train loss 0.3322845 +| epoch 9 | 3767/ 5600 batches | train loss 0.3361581 +| epoch 9 | 3771/ 5600 batches | train loss 0.3274120 +| epoch 9 | 3775/ 5600 batches | train loss 0.3003690 +| epoch 9 | 3779/ 5600 batches | train loss 0.3098999 +| epoch 9 | 3783/ 5600 batches | train loss 0.3123960 +| epoch 9 | 3787/ 5600 batches | train loss 0.3207964 +| epoch 9 | 3791/ 5600 batches | train loss 0.3452506 +| epoch 9 | 3795/ 5600 batches | train loss 0.3226565 +| epoch 9 | 3799/ 5600 batches | train loss 0.2774521 +| epoch 9 | 3803/ 5600 batches | train loss 0.2974872 +| epoch 9 | 3807/ 5600 batches | train loss 0.3299160 +| epoch 9 | 3811/ 5600 batches | train loss 0.3201866 +| epoch 9 | 3815/ 5600 batches | train loss 0.2920985 +| epoch 9 | 3819/ 5600 batches | train loss 0.2961496 +| epoch 9 | 3823/ 5600 batches | train loss 0.3157450 +| epoch 9 | 3827/ 5600 batches | train loss 0.3846012 +| epoch 9 | 3831/ 5600 batches | train loss 0.3091317 +| epoch 9 | 3835/ 5600 batches | train loss 0.3369344 +| epoch 9 | 3839/ 5600 batches | train loss 0.3148677 +| epoch 9 | 3843/ 5600 batches | train loss 0.3419945 +| epoch 9 | 3847/ 5600 batches | train loss 0.2701206 +| epoch 9 | 3851/ 5600 batches | train loss 0.2525344 +| epoch 9 | 3855/ 5600 batches | train loss 0.3129537 +| epoch 9 | 3859/ 5600 batches | train loss 0.3396670 +| epoch 9 | 3863/ 5600 batches | train loss 0.2883749 +| epoch 9 | 3867/ 5600 batches | train loss 0.3094720 +| epoch 9 | 3871/ 5600 batches | train loss 0.3841922 +| epoch 9 | 3875/ 5600 batches | train loss 0.2915620 +| epoch 9 | 3879/ 5600 batches | train loss 0.3827666 +| epoch 9 | 3883/ 5600 batches | train loss 0.3344770 +| epoch 9 | 3887/ 5600 batches | train loss 0.3316664 +| epoch 9 | 3891/ 5600 batches | train loss 0.3982396 +| epoch 9 | 3895/ 5600 batches | train loss 0.2787249 +| epoch 9 | 3899/ 5600 batches | train loss 0.2909371 +| epoch 9 | 3903/ 5600 batches | train loss 0.2878937 +| epoch 9 | 3907/ 5600 batches | train loss 0.2866733 +| epoch 9 | 3911/ 5600 batches | train loss 0.2720941 +| epoch 9 | 3915/ 5600 batches | train loss 0.3476726 +| epoch 9 | 3919/ 5600 batches | train loss 0.3431253 +| epoch 9 | 3923/ 5600 batches | train loss 0.3475327 +| epoch 9 | 3927/ 5600 batches | train loss 0.3531080 +| epoch 9 | 3931/ 5600 batches | train loss 0.2111856 +| epoch 9 | 3935/ 5600 batches | train loss 0.3132364 +| epoch 9 | 3939/ 5600 batches | train loss 0.3165207 +| epoch 9 | 3943/ 5600 batches | train loss 0.3550408 +| epoch 9 | 3947/ 5600 batches | train loss 0.3641646 +| epoch 9 | 3951/ 5600 batches | train loss 0.3241143 +| epoch 9 | 3955/ 5600 batches | train loss 0.2967004 +| epoch 9 | 3959/ 5600 batches | train loss 0.3044518 +| epoch 9 | 3963/ 5600 batches | train loss 0.3371722 +| epoch 9 | 3967/ 5600 batches | train loss 0.3904058 +| epoch 9 | 3971/ 5600 batches | train loss 0.3316332 +| epoch 9 | 3975/ 5600 batches | train loss 0.3437453 +| epoch 9 | 3979/ 5600 batches | train loss 0.2850310 +| epoch 9 | 3983/ 5600 batches | train loss 0.2896672 +| epoch 9 | 3987/ 5600 batches | train loss 0.2920605 +| epoch 9 | 3991/ 5600 batches | train loss 0.3259482 +| epoch 9 | 3995/ 5600 batches | train loss 0.3199022 +| epoch 9 | 3999/ 5600 batches | train loss 0.2987677 +| epoch 9 | 4003/ 5600 batches | train loss 0.2977154 +| epoch 9 | 4007/ 5600 batches | train loss 0.2750321 +| epoch 9 | 4011/ 5600 batches | train loss 0.3158389 +| epoch 9 | 4015/ 5600 batches | train loss 0.3506395 +| epoch 9 | 4019/ 5600 batches | train loss 0.2821165 +| epoch 9 | 4023/ 5600 batches | train loss 0.3497882 +| epoch 9 | 4027/ 5600 batches | train loss 0.2818040 +| epoch 9 | 4031/ 5600 batches | train loss 0.3716486 +| epoch 9 | 4035/ 5600 batches | train loss 0.3380334 +| epoch 9 | 4039/ 5600 batches | train loss 0.3055975 +| epoch 9 | 4043/ 5600 batches | train loss 0.3147933 +| epoch 9 | 4047/ 5600 batches | train loss 0.3181896 +| epoch 9 | 4051/ 5600 batches | train loss 0.2965401 +| epoch 9 | 4055/ 5600 batches | train loss 0.3885281 +| epoch 9 | 4059/ 5600 batches | train loss 0.3296877 +| epoch 9 | 4063/ 5600 batches | train loss 0.3150590 +| epoch 9 | 4067/ 5600 batches | train loss 0.3940981 +| epoch 9 | 4071/ 5600 batches | train loss 0.2879879 +| epoch 9 | 4075/ 5600 batches | train loss 0.3192173 +| epoch 9 | 4079/ 5600 batches | train loss 0.3437995 +| epoch 9 | 4083/ 5600 batches | train loss 0.2797643 +| epoch 9 | 4087/ 5600 batches | train loss 0.2958347 +| epoch 9 | 4091/ 5600 batches | train loss 0.3269819 +| epoch 9 | 4095/ 5600 batches | train loss 0.3417767 +| epoch 9 | 4099/ 5600 batches | train loss 0.3314028 +| epoch 9 | 4103/ 5600 batches | train loss 0.2729079 +| epoch 9 | 4107/ 5600 batches | train loss 0.3071689 +| epoch 9 | 4111/ 5600 batches | train loss 0.3557109 +| epoch 9 | 4115/ 5600 batches | train loss 0.3320298 +| epoch 9 | 4119/ 5600 batches | train loss 0.3180135 +| epoch 9 | 4123/ 5600 batches | train loss 0.2759015 +| epoch 9 | 4127/ 5600 batches | train loss 0.3126533 +| epoch 9 | 4131/ 5600 batches | train loss 0.3197661 +| epoch 9 | 4135/ 5600 batches | train loss 0.2950937 +| epoch 9 | 4139/ 5600 batches | train loss 0.3236501 +| epoch 9 | 4143/ 5600 batches | train loss 0.3220993 +| epoch 9 | 4147/ 5600 batches | train loss 0.3416956 +| epoch 9 | 4151/ 5600 batches | train loss 0.3312122 +| epoch 9 | 4155/ 5600 batches | train loss 0.3727408 +| epoch 9 | 4159/ 5600 batches | train loss 0.2975741 +| epoch 9 | 4163/ 5600 batches | train loss 0.3010169 +| epoch 9 | 4167/ 5600 batches | train loss 0.3399770 +| epoch 9 | 4171/ 5600 batches | train loss 0.2792986 +| epoch 9 | 4175/ 5600 batches | train loss 0.2738520 +| epoch 9 | 4179/ 5600 batches | train loss 0.3450878 +| epoch 9 | 4183/ 5600 batches | train loss 0.3175778 +| epoch 9 | 4187/ 5600 batches | train loss 0.3073113 +| epoch 9 | 4191/ 5600 batches | train loss 0.3610817 +| epoch 9 | 4195/ 5600 batches | train loss 0.3579378 +| epoch 9 | 4199/ 5600 batches | train loss 0.2981077 +| epoch 9 | 4203/ 5600 batches | train loss 0.2441527 +| epoch 9 | 4207/ 5600 batches | train loss 0.3828390 +| epoch 9 | 4211/ 5600 batches | train loss 0.3014568 +| epoch 9 | 4215/ 5600 batches | train loss 0.3376835 +| epoch 9 | 4219/ 5600 batches | train loss 0.3237476 +| epoch 9 | 4223/ 5600 batches | train loss 0.3309041 +| epoch 9 | 4227/ 5600 batches | train loss 0.3332168 +| epoch 9 | 4231/ 5600 batches | train loss 0.3103395 +| epoch 9 | 4235/ 5600 batches | train loss 0.3239634 +| epoch 9 | 4239/ 5600 batches | train loss 0.3242548 +| epoch 9 | 4243/ 5600 batches | train loss 0.3457834 +| epoch 9 | 4247/ 5600 batches | train loss 0.3425575 +| epoch 9 | 4251/ 5600 batches | train loss 0.3160098 +| epoch 9 | 4255/ 5600 batches | train loss 0.3228479 +| epoch 9 | 4259/ 5600 batches | train loss 0.3249317 +| epoch 9 | 4263/ 5600 batches | train loss 0.3449650 +| epoch 9 | 4267/ 5600 batches | train loss 0.3066087 +| epoch 9 | 4271/ 5600 batches | train loss 0.3241156 +| epoch 9 | 4275/ 5600 batches | train loss 0.3031964 +| epoch 9 | 4279/ 5600 batches | train loss 0.3482698 +| epoch 9 | 4283/ 5600 batches | train loss 0.2845834 +| epoch 9 | 4287/ 5600 batches | train loss 0.2835161 +| epoch 9 | 4291/ 5600 batches | train loss 0.3434556 +| epoch 9 | 4295/ 5600 batches | train loss 0.2922624 +| epoch 9 | 4299/ 5600 batches | train loss 0.3671990 +| epoch 9 | 4303/ 5600 batches | train loss 0.4601110 +| epoch 9 | 4307/ 5600 batches | train loss 0.2977436 +| epoch 9 | 4311/ 5600 batches | train loss 0.2846648 +| epoch 9 | 4315/ 5600 batches | train loss 0.2824990 +| epoch 9 | 4319/ 5600 batches | train loss 0.3265041 +| epoch 9 | 4323/ 5600 batches | train loss 0.3057008 +| epoch 9 | 4327/ 5600 batches | train loss 0.3497362 +| epoch 9 | 4331/ 5600 batches | train loss 0.3022789 +| epoch 9 | 4335/ 5600 batches | train loss 0.3465089 +| epoch 9 | 4339/ 5600 batches | train loss 0.3122613 +| epoch 9 | 4343/ 5600 batches | train loss 0.3637337 +| epoch 9 | 4347/ 5600 batches | train loss 0.2932324 +| epoch 9 | 4351/ 5600 batches | train loss 0.3478986 +| epoch 9 | 4355/ 5600 batches | train loss 0.3189896 +| epoch 9 | 4359/ 5600 batches | train loss 0.3049094 +| epoch 9 | 4363/ 5600 batches | train loss 0.3761244 +| epoch 9 | 4367/ 5600 batches | train loss 0.3336942 +| epoch 9 | 4371/ 5600 batches | train loss 0.3653258 +| epoch 9 | 4375/ 5600 batches | train loss 0.3113562 +| epoch 9 | 4379/ 5600 batches | train loss 0.3388142 +| epoch 9 | 4383/ 5600 batches | train loss 0.3328767 +| epoch 9 | 4387/ 5600 batches | train loss 0.3528279 +| epoch 9 | 4391/ 5600 batches | train loss 0.2870510 +| epoch 9 | 4395/ 5600 batches | train loss 0.3084973 +| epoch 9 | 4399/ 5600 batches | train loss 0.3644424 +| epoch 9 | 4403/ 5600 batches | train loss 0.2946916 +| epoch 9 | 4407/ 5600 batches | train loss 0.3172247 +| epoch 9 | 4411/ 5600 batches | train loss 0.3691543 +| epoch 9 | 4415/ 5600 batches | train loss 0.3089358 +| epoch 9 | 4419/ 5600 batches | train loss 0.3301391 +| epoch 9 | 4423/ 5600 batches | train loss 0.3166687 +| epoch 9 | 4427/ 5600 batches | train loss 0.3465905 +| epoch 9 | 4431/ 5600 batches | train loss 0.3231139 +| epoch 9 | 4435/ 5600 batches | train loss 0.3224941 +| epoch 9 | 4439/ 5600 batches | train loss 0.3360788 +| epoch 9 | 4443/ 5600 batches | train loss 0.3003087 +| epoch 9 | 4447/ 5600 batches | train loss 0.2991272 +| epoch 9 | 4451/ 5600 batches | train loss 0.3060647 +| epoch 9 | 4455/ 5600 batches | train loss 0.2995933 +| epoch 9 | 4459/ 5600 batches | train loss 0.3322386 +| epoch 9 | 4463/ 5600 batches | train loss 0.4046226 +| epoch 9 | 4467/ 5600 batches | train loss 0.2880375 +| epoch 9 | 4471/ 5600 batches | train loss 0.3273200 +| epoch 9 | 4475/ 5600 batches | train loss 0.2758064 +| epoch 9 | 4479/ 5600 batches | train loss 0.3454599 +| epoch 9 | 4483/ 5600 batches | train loss 0.3526713 +| epoch 9 | 4487/ 5600 batches | train loss 0.3065384 +| epoch 9 | 4491/ 5600 batches | train loss 0.3200732 +| epoch 9 | 4495/ 5600 batches | train loss 0.2820603 +| epoch 9 | 4499/ 5600 batches | train loss 0.2980291 +| epoch 9 | 4503/ 5600 batches | train loss 0.3357330 +| epoch 9 | 4507/ 5600 batches | train loss 0.3625714 +| epoch 9 | 4511/ 5600 batches | train loss 0.3087776 +| epoch 9 | 4515/ 5600 batches | train loss 0.3760454 +| epoch 9 | 4519/ 5600 batches | train loss 0.3520465 +| epoch 9 | 4523/ 5600 batches | train loss 0.3413619 +| epoch 9 | 4527/ 5600 batches | train loss 0.3308818 +| epoch 9 | 4531/ 5600 batches | train loss 0.4460540 +| epoch 9 | 4535/ 5600 batches | train loss 0.3851519 +| epoch 9 | 4539/ 5600 batches | train loss 0.3274069 +| epoch 9 | 4543/ 5600 batches | train loss 0.3852856 +| epoch 9 | 4547/ 5600 batches | train loss 0.3346678 +| epoch 9 | 4551/ 5600 batches | train loss 0.4414951 +| epoch 9 | 4555/ 5600 batches | train loss 0.3829132 +| epoch 9 | 4559/ 5600 batches | train loss 0.2554446 +| epoch 9 | 4563/ 5600 batches | train loss 0.3480254 +| epoch 9 | 4567/ 5600 batches | train loss 0.3253938 +| epoch 9 | 4571/ 5600 batches | train loss 0.3392214 +| epoch 9 | 4575/ 5600 batches | train loss 0.3242637 +| epoch 9 | 4579/ 5600 batches | train loss 0.3306509 +| epoch 9 | 4583/ 5600 batches | train loss 0.3530917 +| epoch 9 | 4587/ 5600 batches | train loss 0.3518945 +| epoch 9 | 4591/ 5600 batches | train loss 0.2915429 +| epoch 9 | 4595/ 5600 batches | train loss 0.3257408 +| epoch 9 | 4599/ 5600 batches | train loss 0.3610113 +| epoch 9 | 4603/ 5600 batches | train loss 0.3030157 +| epoch 9 | 4607/ 5600 batches | train loss 0.2886183 +| epoch 9 | 4611/ 5600 batches | train loss 0.3007910 +| epoch 9 | 4615/ 5600 batches | train loss 0.3258038 +| epoch 9 | 4619/ 5600 batches | train loss 0.3065246 +| epoch 9 | 4623/ 5600 batches | train loss 0.3117072 +| epoch 9 | 4627/ 5600 batches | train loss 0.3505569 +| epoch 9 | 4631/ 5600 batches | train loss 0.3350190 +| epoch 9 | 4635/ 5600 batches | train loss 0.3565465 +| epoch 9 | 4639/ 5600 batches | train loss 0.3086585 +| epoch 9 | 4643/ 5600 batches | train loss 0.3411821 +| epoch 9 | 4647/ 5600 batches | train loss 0.3420150 +| epoch 9 | 4651/ 5600 batches | train loss 0.3220481 +| epoch 9 | 4655/ 5600 batches | train loss 0.3486391 +| epoch 9 | 4659/ 5600 batches | train loss 0.3565316 +| epoch 9 | 4663/ 5600 batches | train loss 0.3222915 +| epoch 9 | 4667/ 5600 batches | train loss 0.3209874 +| epoch 9 | 4671/ 5600 batches | train loss 0.3235368 +| epoch 9 | 4675/ 5600 batches | train loss 0.3584311 +| epoch 9 | 4679/ 5600 batches | train loss 0.3845605 +| epoch 9 | 4683/ 5600 batches | train loss 0.3196755 +| epoch 9 | 4687/ 5600 batches | train loss 0.2906299 +| epoch 9 | 4691/ 5600 batches | train loss 0.3538878 +| epoch 9 | 4695/ 5600 batches | train loss 0.2970775 +| epoch 9 | 4699/ 5600 batches | train loss 0.3726867 +| epoch 9 | 4703/ 5600 batches | train loss 0.3579080 +| epoch 9 | 4707/ 5600 batches | train loss 0.3391295 +| epoch 9 | 4711/ 5600 batches | train loss 0.4559695 +| epoch 9 | 4715/ 5600 batches | train loss 0.3705645 +| epoch 9 | 4719/ 5600 batches | train loss 0.2750903 +| epoch 9 | 4723/ 5600 batches | train loss 0.3243460 +| epoch 9 | 4727/ 5600 batches | train loss 0.2138934 +| epoch 9 | 4731/ 5600 batches | train loss 0.3241253 +| epoch 9 | 4735/ 5600 batches | train loss 0.3237049 +| epoch 9 | 4739/ 5600 batches | train loss 0.3029550 +| epoch 9 | 4743/ 5600 batches | train loss 0.3302003 +| epoch 9 | 4747/ 5600 batches | train loss 0.3103490 +| epoch 9 | 4751/ 5600 batches | train loss 0.3356411 +| epoch 9 | 4755/ 5600 batches | train loss 0.3667802 +| epoch 9 | 4759/ 5600 batches | train loss 0.2994777 +| epoch 9 | 4763/ 5600 batches | train loss 0.3805918 +| epoch 9 | 4767/ 5600 batches | train loss 0.2993538 +| epoch 9 | 4771/ 5600 batches | train loss 0.3439234 +| epoch 9 | 4775/ 5600 batches | train loss 0.3297832 +| epoch 9 | 4779/ 5600 batches | train loss 0.3306727 +| epoch 9 | 4783/ 5600 batches | train loss 0.3407427 +| epoch 9 | 4787/ 5600 batches | train loss 0.3177446 +| epoch 9 | 4791/ 5600 batches | train loss 0.2616434 +| epoch 9 | 4795/ 5600 batches | train loss 0.3176656 +| epoch 9 | 4799/ 5600 batches | train loss 0.4360693 +| epoch 9 | 4803/ 5600 batches | train loss 0.3045599 +| epoch 9 | 4807/ 5600 batches | train loss 0.3592964 +| epoch 9 | 4811/ 5600 batches | train loss 0.3841150 +| epoch 9 | 4815/ 5600 batches | train loss 0.3297287 +| epoch 9 | 4819/ 5600 batches | train loss 0.3862092 +| epoch 9 | 4823/ 5600 batches | train loss 0.3290065 +| epoch 9 | 4827/ 5600 batches | train loss 0.3315158 +| epoch 9 | 4831/ 5600 batches | train loss 0.3172660 +| epoch 9 | 4835/ 5600 batches | train loss 0.2993275 +| epoch 9 | 4839/ 5600 batches | train loss 0.3915400 +| epoch 9 | 4843/ 5600 batches | train loss 0.3475725 +| epoch 9 | 4847/ 5600 batches | train loss 0.3030707 +| epoch 9 | 4851/ 5600 batches | train loss 0.3265861 +| epoch 9 | 4855/ 5600 batches | train loss 0.3396782 +| epoch 9 | 4859/ 5600 batches | train loss 0.2721505 +| epoch 9 | 4863/ 5600 batches | train loss 0.3448935 +| epoch 9 | 4867/ 5600 batches | train loss 0.3850551 +| epoch 9 | 4871/ 5600 batches | train loss 0.3133486 +| epoch 9 | 4875/ 5600 batches | train loss 0.2863214 +| epoch 9 | 4879/ 5600 batches | train loss 0.2484305 +| epoch 9 | 4883/ 5600 batches | train loss 0.3347967 +| epoch 9 | 4887/ 5600 batches | train loss 0.3789181 +| epoch 9 | 4891/ 5600 batches | train loss 0.3063028 +| epoch 9 | 4895/ 5600 batches | train loss 0.4324688 +| epoch 9 | 4899/ 5600 batches | train loss 0.3630472 +| epoch 9 | 4903/ 5600 batches | train loss 0.3031195 +| epoch 9 | 4907/ 5600 batches | train loss 0.3418953 +| epoch 9 | 4911/ 5600 batches | train loss 0.3723571 +| epoch 9 | 4915/ 5600 batches | train loss 0.3191301 +| epoch 9 | 4919/ 5600 batches | train loss 0.3235968 +| epoch 9 | 4923/ 5600 batches | train loss 0.3733742 +| epoch 9 | 4927/ 5600 batches | train loss 0.3858477 +| epoch 9 | 4931/ 5600 batches | train loss 0.3152288 +| epoch 9 | 4935/ 5600 batches | train loss 0.3332540 +| epoch 9 | 4939/ 5600 batches | train loss 0.3832130 +| epoch 9 | 4943/ 5600 batches | train loss 0.2684195 +| epoch 9 | 4947/ 5600 batches | train loss 0.3365546 +| epoch 9 | 4951/ 5600 batches | train loss 0.2905185 +| epoch 9 | 4955/ 5600 batches | train loss 0.3494201 +| epoch 9 | 4959/ 5600 batches | train loss 0.2974440 +| epoch 9 | 4963/ 5600 batches | train loss 0.3743006 +| epoch 9 | 4967/ 5600 batches | train loss 0.3646712 +| epoch 9 | 4971/ 5600 batches | train loss 0.2908451 +| epoch 9 | 4975/ 5600 batches | train loss 0.2855065 +| epoch 9 | 4979/ 5600 batches | train loss 0.3175706 +| epoch 9 | 4983/ 5600 batches | train loss 0.2908698 +| epoch 9 | 4987/ 5600 batches | train loss 0.3541332 +| epoch 9 | 4991/ 5600 batches | train loss 0.3373737 +| epoch 9 | 4995/ 5600 batches | train loss 0.3553635 +| epoch 9 | 4999/ 5600 batches | train loss 0.3642682 +| epoch 9 | 5003/ 5600 batches | train loss 0.3616741 +| epoch 9 | 5007/ 5600 batches | train loss 0.3953162 +| epoch 9 | 5011/ 5600 batches | train loss 0.3751399 +| epoch 9 | 5015/ 5600 batches | train loss 0.3280495 +| epoch 9 | 5019/ 5600 batches | train loss 0.3613163 +| epoch 9 | 5023/ 5600 batches | train loss 0.2885473 +| epoch 9 | 5027/ 5600 batches | train loss 0.3456758 +| epoch 9 | 5031/ 5600 batches | train loss 0.3430904 +| epoch 9 | 5035/ 5600 batches | train loss 0.2904784 +| epoch 9 | 5039/ 5600 batches | train loss 0.3366355 +| epoch 9 | 5043/ 5600 batches | train loss 0.2838410 +| epoch 9 | 5047/ 5600 batches | train loss 0.3390523 +| epoch 9 | 5051/ 5600 batches | train loss 0.3268248 +| epoch 9 | 5055/ 5600 batches | train loss 0.3676066 +| epoch 9 | 5059/ 5600 batches | train loss 0.3008089 +| epoch 9 | 5063/ 5600 batches | train loss 0.4050298 +| epoch 9 | 5067/ 5600 batches | train loss 0.2931160 +| epoch 9 | 5071/ 5600 batches | train loss 0.3452343 +| epoch 9 | 5075/ 5600 batches | train loss 0.3035080 +| epoch 9 | 5079/ 5600 batches | train loss 0.3375718 +| epoch 9 | 5083/ 5600 batches | train loss 0.3317946 +| epoch 9 | 5087/ 5600 batches | train loss 0.3267793 +| epoch 9 | 5091/ 5600 batches | train loss 0.2983876 +| epoch 9 | 5095/ 5600 batches | train loss 0.2936046 +| epoch 9 | 5099/ 5600 batches | train loss 0.3140972 +| epoch 9 | 5103/ 5600 batches | train loss 0.3639950 +| epoch 9 | 5107/ 5600 batches | train loss 0.2652886 +| epoch 9 | 5111/ 5600 batches | train loss 0.3400671 +| epoch 9 | 5115/ 5600 batches | train loss 0.3348877 +| epoch 9 | 5119/ 5600 batches | train loss 0.3108631 +| epoch 9 | 5123/ 5600 batches | train loss 0.3275304 +| epoch 9 | 5127/ 5600 batches | train loss 0.3209010 +| epoch 9 | 5131/ 5600 batches | train loss 0.3277054 +| epoch 9 | 5135/ 5600 batches | train loss 0.3366281 +| epoch 9 | 5139/ 5600 batches | train loss 0.2807584 +| epoch 9 | 5143/ 5600 batches | train loss 0.3218501 +| epoch 9 | 5147/ 5600 batches | train loss 0.3716165 +| epoch 9 | 5151/ 5600 batches | train loss 0.2485518 +| epoch 9 | 5155/ 5600 batches | train loss 0.3468097 +| epoch 9 | 5159/ 5600 batches | train loss 0.3792513 +| epoch 9 | 5163/ 5600 batches | train loss 0.2770201 +| epoch 9 | 5167/ 5600 batches | train loss 0.3525392 +| epoch 9 | 5171/ 5600 batches | train loss 0.3286361 +| epoch 9 | 5175/ 5600 batches | train loss 0.3312157 +| epoch 9 | 5179/ 5600 batches | train loss 0.3531361 +| epoch 9 | 5183/ 5600 batches | train loss 0.2565746 +| epoch 9 | 5187/ 5600 batches | train loss 0.3088233 +| epoch 9 | 5191/ 5600 batches | train loss 0.3780075 +| epoch 9 | 5195/ 5600 batches | train loss 0.3373103 +| epoch 9 | 5199/ 5600 batches | train loss 0.3174288 +| epoch 9 | 5203/ 5600 batches | train loss 0.2933409 +| epoch 9 | 5207/ 5600 batches | train loss 0.3470063 +| epoch 9 | 5211/ 5600 batches | train loss 0.3725102 +| epoch 9 | 5215/ 5600 batches | train loss 0.3378513 +| epoch 9 | 5219/ 5600 batches | train loss 0.3180286 +| epoch 9 | 5223/ 5600 batches | train loss 0.3303530 +| epoch 9 | 5227/ 5600 batches | train loss 0.4279588 +| epoch 9 | 5231/ 5600 batches | train loss 0.3039989 +| epoch 9 | 5235/ 5600 batches | train loss 0.3443394 +| epoch 9 | 5239/ 5600 batches | train loss 0.3824621 +| epoch 9 | 5243/ 5600 batches | train loss 0.4054101 +| epoch 9 | 5247/ 5600 batches | train loss 0.3236289 +| epoch 9 | 5251/ 5600 batches | train loss 0.3711631 +| epoch 9 | 5255/ 5600 batches | train loss 0.3761195 +| epoch 9 | 5259/ 5600 batches | train loss 0.3474683 +| epoch 9 | 5263/ 5600 batches | train loss 0.3086074 +| epoch 9 | 5267/ 5600 batches | train loss 0.3008513 +| epoch 9 | 5271/ 5600 batches | train loss 0.3258665 +| epoch 9 | 5275/ 5600 batches | train loss 0.3050209 +| epoch 9 | 5279/ 5600 batches | train loss 0.3553363 +| epoch 9 | 5283/ 5600 batches | train loss 0.3572441 +| epoch 9 | 5287/ 5600 batches | train loss 0.3581072 +| epoch 9 | 5291/ 5600 batches | train loss 0.2947434 +| epoch 9 | 5295/ 5600 batches | train loss 0.3092681 +| epoch 9 | 5299/ 5600 batches | train loss 0.3385769 +| epoch 9 | 5303/ 5600 batches | train loss 0.2932279 +| epoch 9 | 5307/ 5600 batches | train loss 0.2661516 +| epoch 9 | 5311/ 5600 batches | train loss 0.2959409 +| epoch 9 | 5315/ 5600 batches | train loss 0.3044648 +| epoch 9 | 5319/ 5600 batches | train loss 0.4075511 +| epoch 9 | 5323/ 5600 batches | train loss 0.3617909 +| epoch 9 | 5327/ 5600 batches | train loss 0.3429106 +| epoch 9 | 5331/ 5600 batches | train loss 0.3158651 +| epoch 9 | 5335/ 5600 batches | train loss 0.2987241 +| epoch 9 | 5339/ 5600 batches | train loss 0.3951049 +| epoch 9 | 5343/ 5600 batches | train loss 0.3568354 +| epoch 9 | 5347/ 5600 batches | train loss 0.2970605 +| epoch 9 | 5351/ 5600 batches | train loss 0.3033123 +| epoch 9 | 5355/ 5600 batches | train loss 0.3592325 +| epoch 9 | 5359/ 5600 batches | train loss 0.2780588 +| epoch 9 | 5363/ 5600 batches | train loss 0.3316317 +| epoch 9 | 5367/ 5600 batches | train loss 0.3691844 +| epoch 9 | 5371/ 5600 batches | train loss 0.3383172 +| epoch 9 | 5375/ 5600 batches | train loss 0.3316521 +| epoch 9 | 5379/ 5600 batches | train loss 0.3733601 +| epoch 9 | 5383/ 5600 batches | train loss 0.3629373 +| epoch 9 | 5387/ 5600 batches | train loss 0.3125201 +| epoch 9 | 5391/ 5600 batches | train loss 0.2958196 +| epoch 9 | 5395/ 5600 batches | train loss 0.3402901 +| epoch 9 | 5399/ 5600 batches | train loss 0.3316789 +| epoch 9 | 5403/ 5600 batches | train loss 0.3268764 +| epoch 9 | 5407/ 5600 batches | train loss 0.3477388 +| epoch 9 | 5411/ 5600 batches | train loss 0.3695286 +| epoch 9 | 5415/ 5600 batches | train loss 0.2835694 +| epoch 9 | 5419/ 5600 batches | train loss 0.3361816 +| epoch 9 | 5423/ 5600 batches | train loss 0.2941847 +| epoch 9 | 5427/ 5600 batches | train loss 0.3239444 +| epoch 9 | 5431/ 5600 batches | train loss 0.3251675 +| epoch 9 | 5435/ 5600 batches | train loss 0.3209187 +| epoch 9 | 5439/ 5600 batches | train loss 0.3447919 +| epoch 9 | 5443/ 5600 batches | train loss 0.3397914 +| epoch 9 | 5447/ 5600 batches | train loss 0.3800880 +| epoch 9 | 5451/ 5600 batches | train loss 0.3411205 +| epoch 9 | 5455/ 5600 batches | train loss 0.3814847 +| epoch 9 | 5459/ 5600 batches | train loss 0.3637569 +| epoch 9 | 5463/ 5600 batches | train loss 0.3347782 +| epoch 9 | 5467/ 5600 batches | train loss 0.3450862 +| epoch 9 | 5471/ 5600 batches | train loss 0.3376910 +| epoch 9 | 5475/ 5600 batches | train loss 0.3534324 +| epoch 9 | 5479/ 5600 batches | train loss 0.3546697 +| epoch 9 | 5483/ 5600 batches | train loss 0.3952838 +| epoch 9 | 5487/ 5600 batches | train loss 0.3282903 +| epoch 9 | 5491/ 5600 batches | train loss 0.2903863 +| epoch 9 | 5495/ 5600 batches | train loss 0.3567582 +| epoch 9 | 5499/ 5600 batches | train loss 0.3166868 +| epoch 9 | 5503/ 5600 batches | train loss 0.3794969 +| epoch 9 | 5507/ 5600 batches | train loss 0.2935147 +| epoch 9 | 5511/ 5600 batches | train loss 0.3673822 +| epoch 9 | 5515/ 5600 batches | train loss 0.3224820 +| epoch 9 | 5519/ 5600 batches | train loss 0.3483322 +| epoch 9 | 5523/ 5600 batches | train loss 0.3408781 +| epoch 9 | 5527/ 5600 batches | train loss 0.3008234 +| epoch 9 | 5531/ 5600 batches | train loss 0.3857479 +| epoch 9 | 5535/ 5600 batches | train loss 0.2869741 +| epoch 9 | 5539/ 5600 batches | train loss 0.3359959 +| epoch 9 | 5543/ 5600 batches | train loss 0.3271914 +| epoch 9 | 5547/ 5600 batches | train loss 0.2914419 +| epoch 9 | 5551/ 5600 batches | train loss 0.3025111 +| epoch 9 | 5555/ 5600 batches | train loss 0.4146928 +| epoch 9 | 5559/ 5600 batches | train loss 0.3568379 +| epoch 9 | 5563/ 5600 batches | train loss 0.3664250 +| epoch 9 | 5567/ 5600 batches | train loss 0.3244205 +| epoch 9 | 5571/ 5600 batches | train loss 0.3192602 +| epoch 9 | 5575/ 5600 batches | train loss 0.2698470 +| epoch 9 | 5579/ 5600 batches | train loss 0.2883549 +| epoch 9 | 5583/ 5600 batches | train loss 0.3125293 +| epoch 9 | 5587/ 5600 batches | train loss 0.3429106 +| epoch 9 | 5591/ 5600 batches | train loss 0.3164818 +| epoch 9 | 5595/ 5600 batches | train loss 0.2874424 +| epoch 9 | 5599/ 5600 batches | train loss 0.3531969 +-------------------------------------------------------------------------------- +| epoch 9 | 3/ 5600 batches | test loss 0.4102776 +| epoch 9 | 7/ 5600 batches | test loss 0.5608594 +| epoch 9 | 11/ 5600 batches | test loss 0.4142999 +| epoch 9 | 15/ 5600 batches | test loss 0.5086458 +| epoch 9 | 19/ 5600 batches | test loss 0.5109013 +| epoch 9 | 23/ 5600 batches | test loss 0.4971206 +| epoch 9 | 27/ 5600 batches | test loss 0.4335437 +| epoch 9 | 31/ 5600 batches | test loss 0.3986371 +| epoch 9 | 35/ 5600 batches | test loss 0.5546825 +| epoch 9 | 39/ 5600 batches | test loss 0.4897359 +| epoch 9 | 43/ 5600 batches | test loss 0.3016306 +| epoch 9 | 47/ 5600 batches | test loss 0.3564081 +| epoch 9 | 51/ 5600 batches | test loss 0.4951559 +| epoch 9 | 55/ 5600 batches | test loss 0.5289870 +| epoch 9 | 59/ 5600 batches | test loss 0.4113869 +| epoch 9 | 63/ 5600 batches | test loss 0.4878671 +| epoch 9 | 67/ 5600 batches | test loss 0.6571071 +| epoch 9 | 71/ 5600 batches | test loss 0.5621938 +| epoch 9 | 75/ 5600 batches | test loss 0.4314635 +| epoch 9 | 79/ 5600 batches | test loss 0.6010895 +| epoch 9 | 83/ 5600 batches | test loss 0.4154790 +| epoch 9 | 87/ 5600 batches | test loss 0.3702061 +| epoch 9 | 91/ 5600 batches | test loss 0.4205513 +| epoch 9 | 95/ 5600 batches | test loss 0.6401476 +| epoch 9 | 99/ 5600 batches | test loss 0.6186177 +| epoch 9 | 103/ 5600 batches | test loss 0.5342494 +| epoch 9 | 107/ 5600 batches | test loss 0.5294924 +| epoch 9 | 111/ 5600 batches | test loss 0.4389595 +| epoch 9 | 115/ 5600 batches | test loss 0.5345454 +| epoch 9 | 119/ 5600 batches | test loss 0.4259169 +| epoch 9 | 123/ 5600 batches | test loss 0.5460346 +| epoch 9 | 127/ 5600 batches | test loss 0.4365442 +| epoch 9 | 131/ 5600 batches | test loss 0.3378321 +| epoch 9 | 135/ 5600 batches | test loss 0.5806455 +| epoch 9 | 139/ 5600 batches | test loss 0.3966823 +| epoch 9 | 143/ 5600 batches | test loss 0.5537632 +| epoch 9 | 147/ 5600 batches | test loss 0.4757431 +| epoch 9 | 151/ 5600 batches | test loss 0.3728489 +| epoch 9 | 155/ 5600 batches | test loss 0.4304737 +| epoch 9 | 159/ 5600 batches | test loss 0.4759910 +| epoch 9 | 163/ 5600 batches | test loss 0.5580440 +| epoch 9 | 167/ 5600 batches | test loss 0.4990386 +| epoch 9 | 171/ 5600 batches | test loss 0.4994659 +| epoch 9 | 175/ 5600 batches | test loss 0.5049891 +| epoch 9 | 179/ 5600 batches | test loss 0.6538826 +| epoch 9 | 183/ 5600 batches | test loss 0.4945371 +| epoch 9 | 187/ 5600 batches | test loss 0.5625325 +| epoch 9 | 191/ 5600 batches | test loss 0.7508577 +| epoch 9 | 195/ 5600 batches | test loss 0.5879827 +| epoch 9 | 199/ 5600 batches | test loss 0.4184368 +| epoch 9 | 203/ 5600 batches | test loss 0.5901479 +| epoch 9 | 207/ 5600 batches | test loss 0.4980239 +| epoch 9 | 211/ 5600 batches | test loss 0.4148401 +| epoch 9 | 215/ 5600 batches | test loss 0.3587280 +| epoch 9 | 219/ 5600 batches | test loss 0.3411234 +| epoch 9 | 223/ 5600 batches | test loss 0.5263515 +| epoch 9 | 227/ 5600 batches | test loss 0.5170181 +| epoch 9 | 231/ 5600 batches | test loss 0.6125486 +| epoch 9 | 235/ 5600 batches | test loss 0.5084212 +| epoch 9 | 239/ 5600 batches | test loss 0.4718993 +| epoch 9 | 243/ 5600 batches | test loss 0.5959854 +| epoch 9 | 247/ 5600 batches | test loss 0.6212813 +| epoch 9 | 251/ 5600 batches | test loss 0.4267126 +| epoch 9 | 255/ 5600 batches | test loss 0.5035329 +| epoch 9 | 259/ 5600 batches | test loss 0.4744330 +| epoch 9 | 263/ 5600 batches | test loss 0.5409668 +| epoch 9 | 267/ 5600 batches | test loss 0.5323756 +| epoch 9 | 271/ 5600 batches | test loss 0.5601332 +| epoch 9 | 275/ 5600 batches | test loss 0.4986450 +| epoch 9 | 279/ 5600 batches | test loss 0.4971890 +| epoch 9 | 283/ 5600 batches | test loss 0.4777661 +| epoch 9 | 287/ 5600 batches | test loss 0.4491914 +| epoch 9 | 291/ 5600 batches | test loss 0.5796357 +| epoch 9 | 295/ 5600 batches | test loss 0.4906626 +| epoch 9 | 299/ 5600 batches | test loss 0.3880326 +| epoch 9 | 303/ 5600 batches | test loss 0.6108595 +| epoch 9 | 307/ 5600 batches | test loss 0.7030048 +| epoch 9 | 311/ 5600 batches | test loss 0.5201672 +| epoch 9 | 315/ 5600 batches | test loss 0.4971379 +| epoch 9 | 319/ 5600 batches | test loss 0.4553623 +| epoch 9 | 323/ 5600 batches | test loss 0.5822986 +| epoch 9 | 327/ 5600 batches | test loss 0.3825220 +| epoch 9 | 331/ 5600 batches | test loss 0.4826259 +| epoch 9 | 335/ 5600 batches | test loss 0.6453448 +| epoch 9 | 339/ 5600 batches | test loss 0.4894572 +| epoch 9 | 343/ 5600 batches | test loss 0.2458837 +| epoch 9 | 347/ 5600 batches | test loss 0.6248262 +| epoch 9 | 351/ 5600 batches | test loss 0.3949414 +| epoch 9 | 355/ 5600 batches | test loss 0.6913618 +| epoch 9 | 359/ 5600 batches | test loss 0.3900443 +| epoch 9 | 363/ 5600 batches | test loss 0.5895009 +| epoch 9 | 367/ 5600 batches | test loss 0.6965958 +| epoch 9 | 371/ 5600 batches | test loss 0.3645308 +| epoch 9 | 375/ 5600 batches | test loss 0.4501103 +| epoch 9 | 379/ 5600 batches | test loss 0.5698304 +| epoch 9 | 383/ 5600 batches | test loss 0.7001534 +| epoch 9 | 387/ 5600 batches | test loss 0.4286712 +| epoch 9 | 391/ 5600 batches | test loss 0.3630496 +| epoch 9 | 395/ 5600 batches | test loss 0.7484605 +| epoch 9 | 399/ 5600 batches | test loss 0.4286330 +| epoch 9 | 403/ 5600 batches | test loss 0.5049569 +| epoch 9 | 407/ 5600 batches | test loss 0.5138278 +| epoch 9 | 411/ 5600 batches | test loss 0.4151874 +| epoch 9 | 415/ 5600 batches | test loss 0.6330306 +| epoch 9 | 419/ 5600 batches | test loss 0.4434909 +| epoch 9 | 423/ 5600 batches | test loss 0.4108200 +| epoch 9 | 427/ 5600 batches | test loss 0.4864677 +| epoch 9 | 431/ 5600 batches | test loss 0.5049138 +| epoch 9 | 435/ 5600 batches | test loss 0.4798045 +| epoch 9 | 439/ 5600 batches | test loss 0.5523229 +| epoch 9 | 443/ 5600 batches | test loss 0.4777312 +| epoch 9 | 447/ 5600 batches | test loss 0.6857151 +| epoch 9 | 451/ 5600 batches | test loss 0.4008249 +| epoch 9 | 455/ 5600 batches | test loss 0.6055963 +| epoch 9 | 459/ 5600 batches | test loss 0.4591282 +| epoch 9 | 463/ 5600 batches | test loss 0.4159902 +| epoch 9 | 467/ 5600 batches | test loss 0.3451754 +| epoch 9 | 471/ 5600 batches | test loss 0.3934759 +| epoch 9 | 475/ 5600 batches | test loss 0.6327618 +| epoch 9 | 479/ 5600 batches | test loss 0.4085187 +| epoch 9 | 483/ 5600 batches | test loss 0.6771944 +| epoch 9 | 487/ 5600 batches | test loss 0.4241236 +| epoch 9 | 491/ 5600 batches | test loss 0.4351977 +| epoch 9 | 495/ 5600 batches | test loss 0.5741215 +| epoch 9 | 499/ 5600 batches | test loss 0.3905349 +| epoch 9 | 503/ 5600 batches | test loss 0.7193973 +| epoch 9 | 507/ 5600 batches | test loss 0.5866895 +| epoch 9 | 511/ 5600 batches | test loss 0.6677071 +| epoch 9 | 515/ 5600 batches | test loss 0.4927178 +| epoch 9 | 519/ 5600 batches | test loss 0.4261321 +| epoch 9 | 523/ 5600 batches | test loss 0.5607373 +| epoch 9 | 527/ 5600 batches | test loss 0.3752123 +| epoch 9 | 531/ 5600 batches | test loss 0.4758590 +| epoch 9 | 535/ 5600 batches | test loss 0.3458664 +| epoch 9 | 539/ 5600 batches | test loss 0.4550291 +| epoch 9 | 543/ 5600 batches | test loss 0.5332184 +| epoch 9 | 547/ 5600 batches | test loss 0.4617413 +| epoch 9 | 551/ 5600 batches | test loss 0.4726718 +| epoch 9 | 555/ 5600 batches | test loss 0.3874741 +| epoch 9 | 559/ 5600 batches | test loss 0.5288229 +| epoch 9 | 563/ 5600 batches | test loss 0.5603932 +| epoch 9 | 567/ 5600 batches | test loss 0.4541591 +| epoch 9 | 571/ 5600 batches | test loss 0.6504101 +| epoch 9 | 575/ 5600 batches | test loss 0.7177545 +| epoch 9 | 579/ 5600 batches | test loss 0.4909278 +| epoch 9 | 583/ 5600 batches | test loss 0.3822554 +| epoch 9 | 587/ 5600 batches | test loss 0.3796628 +| epoch 9 | 591/ 5600 batches | test loss 0.6370814 +| epoch 9 | 595/ 5600 batches | test loss 0.4145440 +| epoch 9 | 599/ 5600 batches | test loss 0.7083300 +| epoch 9 | 603/ 5600 batches | test loss 0.4270872 +| epoch 9 | 607/ 5600 batches | test loss 0.8266611 +| epoch 9 | 611/ 5600 batches | test loss 0.3930416 +| epoch 9 | 615/ 5600 batches | test loss 0.4825460 +| epoch 9 | 619/ 5600 batches | test loss 0.3854278 +| epoch 9 | 623/ 5600 batches | test loss 0.8855392 +| epoch 9 | 627/ 5600 batches | test loss 0.4078043 +| epoch 9 | 631/ 5600 batches | test loss 0.4582297 +| epoch 9 | 635/ 5600 batches | test loss 0.4989447 +| epoch 9 | 639/ 5600 batches | test loss 0.5189086 +| epoch 9 | 643/ 5600 batches | test loss 0.6825832 +| epoch 9 | 647/ 5600 batches | test loss 0.4515240 +| epoch 9 | 651/ 5600 batches | test loss 0.4080063 +| epoch 9 | 655/ 5600 batches | test loss 0.4023762 +| epoch 9 | 659/ 5600 batches | test loss 0.4827416 +| epoch 9 | 663/ 5600 batches | test loss 0.6325387 +| epoch 9 | 667/ 5600 batches | test loss 0.4764455 +| epoch 9 | 671/ 5600 batches | test loss 0.6900256 +| epoch 9 | 675/ 5600 batches | test loss 0.4290188 +| epoch 9 | 679/ 5600 batches | test loss 0.6823516 +| epoch 9 | 683/ 5600 batches | test loss 0.6212335 +| epoch 9 | 687/ 5600 batches | test loss 0.3780771 +| epoch 9 | 691/ 5600 batches | test loss 0.4535635 +| epoch 9 | 695/ 5600 batches | test loss 0.4577473 +| epoch 9 | 699/ 5600 batches | test loss 0.4026445 +| epoch 9 | 703/ 5600 batches | test loss 0.4099808 +| epoch 9 | 707/ 5600 batches | test loss 0.4225627 +| epoch 9 | 711/ 5600 batches | test loss 0.6020204 +| epoch 9 | 715/ 5600 batches | test loss 0.4305828 +| epoch 9 | 719/ 5600 batches | test loss 0.4606628 +| epoch 9 | 723/ 5600 batches | test loss 0.4926965 +| epoch 9 | 727/ 5600 batches | test loss 0.5878866 +| epoch 9 | 731/ 5600 batches | test loss 0.5285408 +| epoch 9 | 735/ 5600 batches | test loss 0.4566832 +| epoch 9 | 739/ 5600 batches | test loss 0.5656086 +| epoch 9 | 743/ 5600 batches | test loss 0.5032148 +| epoch 9 | 747/ 5600 batches | test loss 0.6247814 +| epoch 9 | 751/ 5600 batches | test loss 0.3736306 +| epoch 9 | 755/ 5600 batches | test loss 0.5943812 +| epoch 9 | 759/ 5600 batches | test loss 0.4689747 +| epoch 9 | 763/ 5600 batches | test loss 0.4871783 +| epoch 9 | 767/ 5600 batches | test loss 0.5047410 +| epoch 9 | 771/ 5600 batches | test loss 0.3891901 +| epoch 9 | 775/ 5600 batches | test loss 0.6001971 +| epoch 9 | 779/ 5600 batches | test loss 0.4935851 +| epoch 9 | 783/ 5600 batches | test loss 0.5510080 +| epoch 9 | 787/ 5600 batches | test loss 0.3589813 +| epoch 9 | 791/ 5600 batches | test loss 0.4100907 +| epoch 9 | 795/ 5600 batches | test loss 0.6698813 +| epoch 9 | 799/ 5600 batches | test loss 0.4502784 +| epoch 9 | 803/ 5600 batches | test loss 0.3929670 +| epoch 9 | 807/ 5600 batches | test loss 0.3778989 +| epoch 9 | 811/ 5600 batches | test loss 0.4172352 +| epoch 9 | 815/ 5600 batches | test loss 0.5053053 +| epoch 9 | 819/ 5600 batches | test loss 0.5081228 +| epoch 9 | 823/ 5600 batches | test loss 0.4697189 +| epoch 9 | 827/ 5600 batches | test loss 0.5066367 +| epoch 9 | 831/ 5600 batches | test loss 0.7793553 +| epoch 9 | 835/ 5600 batches | test loss 0.6889290 +| epoch 9 | 839/ 5600 batches | test loss 0.4115919 +| epoch 9 | 843/ 5600 batches | test loss 0.5763052 +| epoch 9 | 847/ 5600 batches | test loss 0.6885290 +| epoch 9 | 851/ 5600 batches | test loss 0.4675331 +| epoch 9 | 855/ 5600 batches | test loss 0.4559010 +| epoch 9 | 859/ 5600 batches | test loss 0.4706248 +| epoch 9 | 863/ 5600 batches | test loss 0.3325512 +| epoch 9 | 867/ 5600 batches | test loss 0.4215095 +| epoch 9 | 871/ 5600 batches | test loss 0.5167933 +| epoch 9 | 875/ 5600 batches | test loss 0.4861976 +| epoch 9 | 879/ 5600 batches | test loss 0.4040060 +| epoch 9 | 883/ 5600 batches | test loss 0.4512880 +| epoch 9 | 887/ 5600 batches | test loss 0.6215503 +| epoch 9 | 891/ 5600 batches | test loss 0.4323012 +| epoch 9 | 895/ 5600 batches | test loss 0.4903710 +| epoch 9 | 899/ 5600 batches | test loss 0.3653383 +| epoch 9 | 903/ 5600 batches | test loss 0.6572495 +| epoch 9 | 907/ 5600 batches | test loss 0.7375832 +| epoch 9 | 911/ 5600 batches | test loss 0.4739136 +| epoch 9 | 915/ 5600 batches | test loss 0.5786575 +| epoch 9 | 919/ 5600 batches | test loss 0.4316429 +| epoch 9 | 923/ 5600 batches | test loss 0.5573278 +| epoch 9 | 927/ 5600 batches | test loss 0.6186532 +| epoch 9 | 931/ 5600 batches | test loss 0.3623714 +| epoch 9 | 935/ 5600 batches | test loss 0.4242792 +| epoch 9 | 939/ 5600 batches | test loss 0.4504746 +| epoch 9 | 943/ 5600 batches | test loss 0.4582370 +| epoch 9 | 947/ 5600 batches | test loss 0.5414200 +| epoch 9 | 951/ 5600 batches | test loss 0.4221646 +| epoch 9 | 955/ 5600 batches | test loss 0.3726912 +| epoch 9 | 959/ 5600 batches | test loss 0.5572668 +| epoch 9 | 963/ 5600 batches | test loss 0.5449033 +| epoch 9 | 967/ 5600 batches | test loss 0.6001182 +| epoch 9 | 971/ 5600 batches | test loss 0.6011997 +| epoch 9 | 975/ 5600 batches | test loss 0.6601402 +| epoch 9 | 979/ 5600 batches | test loss 0.4159060 +| epoch 9 | 983/ 5600 batches | test loss 0.4617563 +| epoch 9 | 987/ 5600 batches | test loss 0.8056982 +| epoch 9 | 991/ 5600 batches | test loss 0.5349848 +| epoch 9 | 995/ 5600 batches | test loss 0.4472469 +| epoch 9 | 999/ 5600 batches | test loss 0.5712886 +| epoch 9 | 1003/ 5600 batches | test loss 0.4731938 +| epoch 9 | 1007/ 5600 batches | test loss 0.3909540 +| epoch 9 | 1011/ 5600 batches | test loss 0.4079033 +| epoch 9 | 1015/ 5600 batches | test loss 0.4659227 +| epoch 9 | 1019/ 5600 batches | test loss 0.6500721 +| epoch 9 | 1023/ 5600 batches | test loss 0.4573654 +| epoch 9 | 1027/ 5600 batches | test loss 0.6812928 +| epoch 9 | 1031/ 5600 batches | test loss 0.4258277 +| epoch 9 | 1035/ 5600 batches | test loss 0.5721861 +| epoch 9 | 1039/ 5600 batches | test loss 0.3964512 +| epoch 9 | 1043/ 5600 batches | test loss 0.6234503 +| epoch 9 | 1047/ 5600 batches | test loss 0.5223240 +| epoch 9 | 1051/ 5600 batches | test loss 0.4027610 +| epoch 9 | 1055/ 5600 batches | test loss 0.4115627 +| epoch 9 | 1059/ 5600 batches | test loss 0.4746599 +| epoch 9 | 1063/ 5600 batches | test loss 0.4448875 +| epoch 9 | 1067/ 5600 batches | test loss 0.4860539 +| epoch 9 | 1071/ 5600 batches | test loss 0.5550565 +| epoch 9 | 1075/ 5600 batches | test loss 0.8117471 +| epoch 9 | 1079/ 5600 batches | test loss 0.5655835 +| epoch 9 | 1083/ 5600 batches | test loss 0.5285953 +| epoch 9 | 1087/ 5600 batches | test loss 0.4698516 +| epoch 9 | 1091/ 5600 batches | test loss 0.4441230 +| epoch 9 | 1095/ 5600 batches | test loss 0.4927966 +| epoch 9 | 1099/ 5600 batches | test loss 0.4773707 +| epoch 9 | 1103/ 5600 batches | test loss 0.5633736 +| epoch 9 | 1107/ 5600 batches | test loss 0.4372927 +| epoch 9 | 1111/ 5600 batches | test loss 0.3950700 +| epoch 9 | 1115/ 5600 batches | test loss 0.7465422 +| epoch 9 | 1119/ 5600 batches | test loss 0.4526308 +| epoch 9 | 1123/ 5600 batches | test loss 0.5338484 +| epoch 9 | 1127/ 5600 batches | test loss 0.4953883 +| epoch 9 | 1131/ 5600 batches | test loss 0.4341956 +| epoch 9 | 1135/ 5600 batches | test loss 0.4612700 +| epoch 9 | 1139/ 5600 batches | test loss 0.4543807 +| epoch 9 | 1143/ 5600 batches | test loss 0.4139947 +| epoch 9 | 1147/ 5600 batches | test loss 0.4738177 +| epoch 9 | 1151/ 5600 batches | test loss 0.5185370 +| epoch 9 | 1155/ 5600 batches | test loss 0.4176318 +| epoch 9 | 1159/ 5600 batches | test loss 0.6449996 +| epoch 9 | 1163/ 5600 batches | test loss 0.5247030 +| epoch 9 | 1167/ 5600 batches | test loss 0.4731703 +| epoch 9 | 1171/ 5600 batches | test loss 0.4071061 +| epoch 9 | 1175/ 5600 batches | test loss 0.6337885 +| epoch 9 | 1179/ 5600 batches | test loss 0.4024637 +| epoch 9 | 1183/ 5600 batches | test loss 0.4520804 +| epoch 9 | 1187/ 5600 batches | test loss 0.6904827 +| epoch 9 | 1191/ 5600 batches | test loss 0.3991888 +| epoch 9 | 1195/ 5600 batches | test loss 0.4007472 +| epoch 9 | 1199/ 5600 batches | test loss 0.7714438 +| epoch 9 | 1203/ 5600 batches | test loss 0.4781584 +| epoch 9 | 1207/ 5600 batches | test loss 0.4660073 +| epoch 9 | 1211/ 5600 batches | test loss 0.3660966 +| epoch 9 | 1215/ 5600 batches | test loss 0.4666747 +| epoch 9 | 1219/ 5600 batches | test loss 0.4654106 +| epoch 9 | 1223/ 5600 batches | test loss 0.4333050 +| epoch 9 | 1227/ 5600 batches | test loss 0.3814960 +| epoch 9 | 1231/ 5600 batches | test loss 0.5231497 +| epoch 9 | 1235/ 5600 batches | test loss 0.4403071 +| epoch 9 | 1239/ 5600 batches | test loss 0.4788707 +| epoch 9 | 1243/ 5600 batches | test loss 0.5645831 +| epoch 9 | 1247/ 5600 batches | test loss 0.5982699 +| epoch 9 | 1251/ 5600 batches | test loss 0.4596859 +| epoch 9 | 1255/ 5600 batches | test loss 0.4398360 +| epoch 9 | 1259/ 5600 batches | test loss 0.5608889 +| epoch 9 | 1263/ 5600 batches | test loss 0.7895414 +| epoch 9 | 1267/ 5600 batches | test loss 0.7115152 +| epoch 9 | 1271/ 5600 batches | test loss 0.4744168 +| epoch 9 | 1275/ 5600 batches | test loss 0.5234591 +| epoch 9 | 1279/ 5600 batches | test loss 0.4573501 +| epoch 9 | 1283/ 5600 batches | test loss 0.7687256 +| epoch 9 | 1287/ 5600 batches | test loss 0.6155058 +| epoch 9 | 1291/ 5600 batches | test loss 0.3921627 +| epoch 9 | 1295/ 5600 batches | test loss 0.5140307 +| epoch 9 | 1299/ 5600 batches | test loss 0.4984782 +| epoch 9 | 1303/ 5600 batches | test loss 0.5160344 +| epoch 9 | 1307/ 5600 batches | test loss 0.5144832 +| epoch 9 | 1311/ 5600 batches | test loss 0.2209859 +| epoch 9 | 1315/ 5600 batches | test loss 0.6339917 +| epoch 9 | 1319/ 5600 batches | test loss 0.5170159 +| epoch 9 | 1323/ 5600 batches | test loss 0.6863111 +| epoch 9 | 1327/ 5600 batches | test loss 0.5854964 +| epoch 9 | 1331/ 5600 batches | test loss 0.4541199 +| epoch 9 | 1335/ 5600 batches | test loss 0.5829254 +| epoch 9 | 1339/ 5600 batches | test loss 0.4995770 +| epoch 9 | 1343/ 5600 batches | test loss 0.5621570 +| epoch 9 | 1347/ 5600 batches | test loss 0.5550196 +| epoch 9 | 1351/ 5600 batches | test loss 0.3707661 +| epoch 9 | 1355/ 5600 batches | test loss 0.3460341 +| epoch 9 | 1359/ 5600 batches | test loss 0.4427615 +| epoch 9 | 1363/ 5600 batches | test loss 0.3969762 +| epoch 9 | 1367/ 5600 batches | test loss 0.4902728 +| epoch 9 | 1371/ 5600 batches | test loss 0.4593545 +| epoch 9 | 1375/ 5600 batches | test loss 0.4367189 +| epoch 9 | 1379/ 5600 batches | test loss 0.6675612 +| epoch 9 | 1383/ 5600 batches | test loss 0.4189436 +| epoch 9 | 1387/ 5600 batches | test loss 0.4392490 +| epoch 9 | 1391/ 5600 batches | test loss 0.4160161 +| epoch 9 | 1395/ 5600 batches | test loss 0.4218568 +| epoch 9 | 1399/ 5600 batches | test loss 0.3916390 +| epoch 9 | final test loss 0.5087, do not save model! +-------------------------------------------------------------------------------- +| epoch 10 | 3/ 5600 batches | train loss 0.2378400 +| epoch 10 | 7/ 5600 batches | train loss 0.3397065 +| epoch 10 | 11/ 5600 batches | train loss 0.3067684 +| epoch 10 | 15/ 5600 batches | train loss 0.3432362 +| epoch 10 | 19/ 5600 batches | train loss 0.3374801 +| epoch 10 | 23/ 5600 batches | train loss 0.3016942 +| epoch 10 | 27/ 5600 batches | train loss 0.3172780 +| epoch 10 | 31/ 5600 batches | train loss 0.3529003 +| epoch 10 | 35/ 5600 batches | train loss 0.2960760 +| epoch 10 | 39/ 5600 batches | train loss 0.4268445 +| epoch 10 | 43/ 5600 batches | train loss 0.3235424 +| epoch 10 | 47/ 5600 batches | train loss 0.2650790 +| epoch 10 | 51/ 5600 batches | train loss 0.2722858 +| epoch 10 | 55/ 5600 batches | train loss 0.3513751 +| epoch 10 | 59/ 5600 batches | train loss 0.3472887 +| epoch 10 | 63/ 5600 batches | train loss 0.2915915 +| epoch 10 | 67/ 5600 batches | train loss 0.2880120 +| epoch 10 | 71/ 5600 batches | train loss 0.3004074 +| epoch 10 | 75/ 5600 batches | train loss 0.3300104 +| epoch 10 | 79/ 5600 batches | train loss 0.3824116 +| epoch 10 | 83/ 5600 batches | train loss 0.2568333 +| epoch 10 | 87/ 5600 batches | train loss 0.2764418 +| epoch 10 | 91/ 5600 batches | train loss 0.2792591 +| epoch 10 | 95/ 5600 batches | train loss 0.3987127 +| epoch 10 | 99/ 5600 batches | train loss 0.2735272 +| epoch 10 | 103/ 5600 batches | train loss 0.2971532 +| epoch 10 | 107/ 5600 batches | train loss 0.3444145 +| epoch 10 | 111/ 5600 batches | train loss 0.3055981 +| epoch 10 | 115/ 5600 batches | train loss 0.3117211 +| epoch 10 | 119/ 5600 batches | train loss 0.3253250 +| epoch 10 | 123/ 5600 batches | train loss 0.3028522 +| epoch 10 | 127/ 5600 batches | train loss 0.3757589 +| epoch 10 | 131/ 5600 batches | train loss 0.4048266 +| epoch 10 | 135/ 5600 batches | train loss 0.2981919 +| epoch 10 | 139/ 5600 batches | train loss 0.3561558 +| epoch 10 | 143/ 5600 batches | train loss 0.4198082 +| epoch 10 | 147/ 5600 batches | train loss 0.3421735 +| epoch 10 | 151/ 5600 batches | train loss 0.2740601 +| epoch 10 | 155/ 5600 batches | train loss 0.2445662 +| epoch 10 | 159/ 5600 batches | train loss 0.3087945 +| epoch 10 | 163/ 5600 batches | train loss 0.2556441 +| epoch 10 | 167/ 5600 batches | train loss 0.2910531 +| epoch 10 | 171/ 5600 batches | train loss 0.3102990 +| epoch 10 | 175/ 5600 batches | train loss 0.3035253 +| epoch 10 | 179/ 5600 batches | train loss 0.2895166 +| epoch 10 | 183/ 5600 batches | train loss 0.2666757 +| epoch 10 | 187/ 5600 batches | train loss 0.2841354 +| epoch 10 | 191/ 5600 batches | train loss 0.2869339 +| epoch 10 | 195/ 5600 batches | train loss 0.2783355 +| epoch 10 | 199/ 5600 batches | train loss 0.2803345 +| epoch 10 | 203/ 5600 batches | train loss 0.3017874 +| epoch 10 | 207/ 5600 batches | train loss 0.3503695 +| epoch 10 | 211/ 5600 batches | train loss 0.2720737 +| epoch 10 | 215/ 5600 batches | train loss 0.2637262 +| epoch 10 | 219/ 5600 batches | train loss 0.2907046 +| epoch 10 | 223/ 5600 batches | train loss 0.2996843 +| epoch 10 | 227/ 5600 batches | train loss 0.3735182 +| epoch 10 | 231/ 5600 batches | train loss 0.2733565 +| epoch 10 | 235/ 5600 batches | train loss 0.3465618 +| epoch 10 | 239/ 5600 batches | train loss 0.2723368 +| epoch 10 | 243/ 5600 batches | train loss 0.2622873 +| epoch 10 | 247/ 5600 batches | train loss 0.2425316 +| epoch 10 | 251/ 5600 batches | train loss 0.3446805 +| epoch 10 | 255/ 5600 batches | train loss 0.1941132 +| epoch 10 | 259/ 5600 batches | train loss 0.2953744 +| epoch 10 | 263/ 5600 batches | train loss 0.2614373 +| epoch 10 | 267/ 5600 batches | train loss 0.2594421 +| epoch 10 | 271/ 5600 batches | train loss 0.2861881 +| epoch 10 | 275/ 5600 batches | train loss 0.3365091 +| epoch 10 | 279/ 5600 batches | train loss 0.2608497 +| epoch 10 | 283/ 5600 batches | train loss 0.2973556 +| epoch 10 | 287/ 5600 batches | train loss 0.3234226 +| epoch 10 | 291/ 5600 batches | train loss 0.3268187 +| epoch 10 | 295/ 5600 batches | train loss 0.3845186 +| epoch 10 | 299/ 5600 batches | train loss 0.3430806 +| epoch 10 | 303/ 5600 batches | train loss 0.2783457 +| epoch 10 | 307/ 5600 batches | train loss 0.2888211 +| epoch 10 | 311/ 5600 batches | train loss 0.3329060 +| epoch 10 | 315/ 5600 batches | train loss 0.2965265 +| epoch 10 | 319/ 5600 batches | train loss 0.2709716 +| epoch 10 | 323/ 5600 batches | train loss 0.3117214 +| epoch 10 | 327/ 5600 batches | train loss 0.3000054 +| epoch 10 | 331/ 5600 batches | train loss 0.2967328 +| epoch 10 | 335/ 5600 batches | train loss 0.3376976 +| epoch 10 | 339/ 5600 batches | train loss 0.2811907 +| epoch 10 | 343/ 5600 batches | train loss 0.2901212 +| epoch 10 | 347/ 5600 batches | train loss 0.3090023 +| epoch 10 | 351/ 5600 batches | train loss 0.3675506 +| epoch 10 | 355/ 5600 batches | train loss 0.2657050 +| epoch 10 | 359/ 5600 batches | train loss 0.3428491 +| epoch 10 | 363/ 5600 batches | train loss 0.2660722 +| epoch 10 | 367/ 5600 batches | train loss 0.3290256 +| epoch 10 | 371/ 5600 batches | train loss 0.3675454 +| epoch 10 | 375/ 5600 batches | train loss 0.3343460 +| epoch 10 | 379/ 5600 batches | train loss 0.2851047 +| epoch 10 | 383/ 5600 batches | train loss 0.3205857 +| epoch 10 | 387/ 5600 batches | train loss 0.3127350 +| epoch 10 | 391/ 5600 batches | train loss 0.2864885 +| epoch 10 | 395/ 5600 batches | train loss 0.2729477 +| epoch 10 | 399/ 5600 batches | train loss 0.3429899 +| epoch 10 | 403/ 5600 batches | train loss 0.2926024 +| epoch 10 | 407/ 5600 batches | train loss 0.2828042 +| epoch 10 | 411/ 5600 batches | train loss 0.2922170 +| epoch 10 | 415/ 5600 batches | train loss 0.3298895 +| epoch 10 | 419/ 5600 batches | train loss 0.3074905 +| epoch 10 | 423/ 5600 batches | train loss 0.3000697 +| epoch 10 | 427/ 5600 batches | train loss 0.2944630 +| epoch 10 | 431/ 5600 batches | train loss 0.2831009 +| epoch 10 | 435/ 5600 batches | train loss 0.4066800 +| epoch 10 | 439/ 5600 batches | train loss 0.2777399 +| epoch 10 | 443/ 5600 batches | train loss 0.2859278 +| epoch 10 | 447/ 5600 batches | train loss 0.3048206 +| epoch 10 | 451/ 5600 batches | train loss 0.3131291 +| epoch 10 | 455/ 5600 batches | train loss 0.3251400 +| epoch 10 | 459/ 5600 batches | train loss 0.2651798 +| epoch 10 | 463/ 5600 batches | train loss 0.3556278 +| epoch 10 | 467/ 5600 batches | train loss 0.2898911 +| epoch 10 | 471/ 5600 batches | train loss 0.2948165 +| epoch 10 | 475/ 5600 batches | train loss 0.2865292 +| epoch 10 | 479/ 5600 batches | train loss 0.3355203 +| epoch 10 | 483/ 5600 batches | train loss 0.2670013 +| epoch 10 | 487/ 5600 batches | train loss 0.3662878 +| epoch 10 | 491/ 5600 batches | train loss 0.3106523 +| epoch 10 | 495/ 5600 batches | train loss 0.3201371 +| epoch 10 | 499/ 5600 batches | train loss 0.2792746 +| epoch 10 | 503/ 5600 batches | train loss 0.3605193 +| epoch 10 | 507/ 5600 batches | train loss 0.3027650 +| epoch 10 | 511/ 5600 batches | train loss 0.3045689 +| epoch 10 | 515/ 5600 batches | train loss 0.3373697 +| epoch 10 | 519/ 5600 batches | train loss 0.3462345 +| epoch 10 | 523/ 5600 batches | train loss 0.3434067 +| epoch 10 | 527/ 5600 batches | train loss 0.2582214 +| epoch 10 | 531/ 5600 batches | train loss 0.4457778 +| epoch 10 | 535/ 5600 batches | train loss 0.3065375 +| epoch 10 | 539/ 5600 batches | train loss 0.3263736 +| epoch 10 | 543/ 5600 batches | train loss 0.3437163 +| epoch 10 | 547/ 5600 batches | train loss 0.2675967 +| epoch 10 | 551/ 5600 batches | train loss 0.2793936 +| epoch 10 | 555/ 5600 batches | train loss 0.2785636 +| epoch 10 | 559/ 5600 batches | train loss 0.2946037 +| epoch 10 | 563/ 5600 batches | train loss 0.3180106 +| epoch 10 | 567/ 5600 batches | train loss 0.3048380 +| epoch 10 | 571/ 5600 batches | train loss 0.3344923 +| epoch 10 | 575/ 5600 batches | train loss 0.3602706 +| epoch 10 | 579/ 5600 batches | train loss 0.3422219 +| epoch 10 | 583/ 5600 batches | train loss 0.3548823 +| epoch 10 | 587/ 5600 batches | train loss 0.2801377 +| epoch 10 | 591/ 5600 batches | train loss 0.3241021 +| epoch 10 | 595/ 5600 batches | train loss 0.2856116 +| epoch 10 | 599/ 5600 batches | train loss 0.2817634 +| epoch 10 | 603/ 5600 batches | train loss 0.3078517 +| epoch 10 | 607/ 5600 batches | train loss 0.2805228 +| epoch 10 | 611/ 5600 batches | train loss 0.3119335 +| epoch 10 | 615/ 5600 batches | train loss 0.2763322 +| epoch 10 | 619/ 5600 batches | train loss 0.2930092 +| epoch 10 | 623/ 5600 batches | train loss 0.2146865 +| epoch 10 | 627/ 5600 batches | train loss 0.2986816 +| epoch 10 | 631/ 5600 batches | train loss 0.3133144 +| epoch 10 | 635/ 5600 batches | train loss 0.3253734 +| epoch 10 | 639/ 5600 batches | train loss 0.3397827 +| epoch 10 | 643/ 5600 batches | train loss 0.3111627 +| epoch 10 | 647/ 5600 batches | train loss 0.3481828 +| epoch 10 | 651/ 5600 batches | train loss 0.2782193 +| epoch 10 | 655/ 5600 batches | train loss 0.3177850 +| epoch 10 | 659/ 5600 batches | train loss 0.2664421 +| epoch 10 | 663/ 5600 batches | train loss 0.2936017 +| epoch 10 | 667/ 5600 batches | train loss 0.3055382 +| epoch 10 | 671/ 5600 batches | train loss 0.2643490 +| epoch 10 | 675/ 5600 batches | train loss 0.2364197 +| epoch 10 | 679/ 5600 batches | train loss 0.3228366 +| epoch 10 | 683/ 5600 batches | train loss 0.2220409 +| epoch 10 | 687/ 5600 batches | train loss 0.2813463 +| epoch 10 | 691/ 5600 batches | train loss 0.3104202 +| epoch 10 | 695/ 5600 batches | train loss 0.2452096 +| epoch 10 | 699/ 5600 batches | train loss 0.3412775 +| epoch 10 | 703/ 5600 batches | train loss 0.3374442 +| epoch 10 | 707/ 5600 batches | train loss 0.3378114 +| epoch 10 | 711/ 5600 batches | train loss 0.2315709 +| epoch 10 | 715/ 5600 batches | train loss 0.3884484 +| epoch 10 | 719/ 5600 batches | train loss 0.3486165 +| epoch 10 | 723/ 5600 batches | train loss 0.3977190 +| epoch 10 | 727/ 5600 batches | train loss 0.2858300 +| epoch 10 | 731/ 5600 batches | train loss 0.2670974 +| epoch 10 | 735/ 5600 batches | train loss 0.3727766 +| epoch 10 | 739/ 5600 batches | train loss 0.2654995 +| epoch 10 | 743/ 5600 batches | train loss 0.4014828 +| epoch 10 | 747/ 5600 batches | train loss 0.2879643 +| epoch 10 | 751/ 5600 batches | train loss 0.2893939 +| epoch 10 | 755/ 5600 batches | train loss 0.2858460 +| epoch 10 | 759/ 5600 batches | train loss 0.3262328 +| epoch 10 | 763/ 5600 batches | train loss 0.3180853 +| epoch 10 | 767/ 5600 batches | train loss 0.2671615 +| epoch 10 | 771/ 5600 batches | train loss 0.3076166 +| epoch 10 | 775/ 5600 batches | train loss 0.2996268 +| epoch 10 | 779/ 5600 batches | train loss 0.3028590 +| epoch 10 | 783/ 5600 batches | train loss 0.2859168 +| epoch 10 | 787/ 5600 batches | train loss 0.3227696 +| epoch 10 | 791/ 5600 batches | train loss 0.2717193 +| epoch 10 | 795/ 5600 batches | train loss 0.3128572 +| epoch 10 | 799/ 5600 batches | train loss 0.3124841 +| epoch 10 | 803/ 5600 batches | train loss 0.2974127 +| epoch 10 | 807/ 5600 batches | train loss 0.2634871 +| epoch 10 | 811/ 5600 batches | train loss 0.2506207 +| epoch 10 | 815/ 5600 batches | train loss 0.2855106 +| epoch 10 | 819/ 5600 batches | train loss 0.2971735 +| epoch 10 | 823/ 5600 batches | train loss 0.3339637 +| epoch 10 | 827/ 5600 batches | train loss 0.2816591 +| epoch 10 | 831/ 5600 batches | train loss 0.3098933 +| epoch 10 | 835/ 5600 batches | train loss 0.3131968 +| epoch 10 | 839/ 5600 batches | train loss 0.2585506 +| epoch 10 | 843/ 5600 batches | train loss 0.2823246 +| epoch 10 | 847/ 5600 batches | train loss 0.3161618 +| epoch 10 | 851/ 5600 batches | train loss 0.3313845 +| epoch 10 | 855/ 5600 batches | train loss 0.2695227 +| epoch 10 | 859/ 5600 batches | train loss 0.2829753 +| epoch 10 | 863/ 5600 batches | train loss 0.2814976 +| epoch 10 | 867/ 5600 batches | train loss 0.3359851 +| epoch 10 | 871/ 5600 batches | train loss 0.2682383 +| epoch 10 | 875/ 5600 batches | train loss 0.3773758 +| epoch 10 | 879/ 5600 batches | train loss 0.3923600 +| epoch 10 | 883/ 5600 batches | train loss 0.3132041 +| epoch 10 | 887/ 5600 batches | train loss 0.2876774 +| epoch 10 | 891/ 5600 batches | train loss 0.2845477 +| epoch 10 | 895/ 5600 batches | train loss 0.3670734 +| epoch 10 | 899/ 5600 batches | train loss 0.3666703 +| epoch 10 | 903/ 5600 batches | train loss 0.3226694 +| epoch 10 | 907/ 5600 batches | train loss 0.2968142 +| epoch 10 | 911/ 5600 batches | train loss 0.3174070 +| epoch 10 | 915/ 5600 batches | train loss 0.3059771 +| epoch 10 | 919/ 5600 batches | train loss 0.3406624 +| epoch 10 | 923/ 5600 batches | train loss 0.3834429 +| epoch 10 | 927/ 5600 batches | train loss 0.3130780 +| epoch 10 | 931/ 5600 batches | train loss 0.2807207 +| epoch 10 | 935/ 5600 batches | train loss 0.2187654 +| epoch 10 | 939/ 5600 batches | train loss 0.2951280 +| epoch 10 | 943/ 5600 batches | train loss 0.3146122 +| epoch 10 | 947/ 5600 batches | train loss 0.3755471 +| epoch 10 | 951/ 5600 batches | train loss 0.3236375 +| epoch 10 | 955/ 5600 batches | train loss 0.3319987 +| epoch 10 | 959/ 5600 batches | train loss 0.3101697 +| epoch 10 | 963/ 5600 batches | train loss 0.3543957 +| epoch 10 | 967/ 5600 batches | train loss 0.2599398 +| epoch 10 | 971/ 5600 batches | train loss 0.3164899 +| epoch 10 | 975/ 5600 batches | train loss 0.2781878 +| epoch 10 | 979/ 5600 batches | train loss 0.2737257 +| epoch 10 | 983/ 5600 batches | train loss 0.3981021 +| epoch 10 | 987/ 5600 batches | train loss 0.2981014 +| epoch 10 | 991/ 5600 batches | train loss 0.3263362 +| epoch 10 | 995/ 5600 batches | train loss 0.3584439 +| epoch 10 | 999/ 5600 batches | train loss 0.3207537 +| epoch 10 | 1003/ 5600 batches | train loss 0.2752758 +| epoch 10 | 1007/ 5600 batches | train loss 0.2085030 +| epoch 10 | 1011/ 5600 batches | train loss 0.3000411 +| epoch 10 | 1015/ 5600 batches | train loss 0.3563967 +| epoch 10 | 1019/ 5600 batches | train loss 0.3261529 +| epoch 10 | 1023/ 5600 batches | train loss 0.3036525 +| epoch 10 | 1027/ 5600 batches | train loss 0.4099192 +| epoch 10 | 1031/ 5600 batches | train loss 0.3186782 +| epoch 10 | 1035/ 5600 batches | train loss 0.3240446 +| epoch 10 | 1039/ 5600 batches | train loss 0.4074234 +| epoch 10 | 1043/ 5600 batches | train loss 0.2720849 +| epoch 10 | 1047/ 5600 batches | train loss 0.2692652 +| epoch 10 | 1051/ 5600 batches | train loss 0.3688692 +| epoch 10 | 1055/ 5600 batches | train loss 0.2461733 +| epoch 10 | 1059/ 5600 batches | train loss 0.3356514 +| epoch 10 | 1063/ 5600 batches | train loss 0.3264827 +| epoch 10 | 1067/ 5600 batches | train loss 0.3080884 +| epoch 10 | 1071/ 5600 batches | train loss 0.2759882 +| epoch 10 | 1075/ 5600 batches | train loss 0.3307679 +| epoch 10 | 1079/ 5600 batches | train loss 0.2445806 +| epoch 10 | 1083/ 5600 batches | train loss 0.2591631 +| epoch 10 | 1087/ 5600 batches | train loss 0.3286585 +| epoch 10 | 1091/ 5600 batches | train loss 0.3123249 +| epoch 10 | 1095/ 5600 batches | train loss 0.3389665 +| epoch 10 | 1099/ 5600 batches | train loss 0.3361816 +| epoch 10 | 1103/ 5600 batches | train loss 0.3113275 +| epoch 10 | 1107/ 5600 batches | train loss 0.3422534 +| epoch 10 | 1111/ 5600 batches | train loss 0.2956813 +| epoch 10 | 1115/ 5600 batches | train loss 0.2528650 +| epoch 10 | 1119/ 5600 batches | train loss 0.3049962 +| epoch 10 | 1123/ 5600 batches | train loss 0.2531661 +| epoch 10 | 1127/ 5600 batches | train loss 0.3037584 +| epoch 10 | 1131/ 5600 batches | train loss 0.2974014 +| epoch 10 | 1135/ 5600 batches | train loss 0.3212688 +| epoch 10 | 1139/ 5600 batches | train loss 0.2755168 +| epoch 10 | 1143/ 5600 batches | train loss 0.2055205 +| epoch 10 | 1147/ 5600 batches | train loss 0.3738363 +| epoch 10 | 1151/ 5600 batches | train loss 0.3138657 +| epoch 10 | 1155/ 5600 batches | train loss 0.2748176 +| epoch 10 | 1159/ 5600 batches | train loss 0.4018179 +| epoch 10 | 1163/ 5600 batches | train loss 0.2632888 +| epoch 10 | 1167/ 5600 batches | train loss 0.3257337 +| epoch 10 | 1171/ 5600 batches | train loss 0.3238160 +| epoch 10 | 1175/ 5600 batches | train loss 0.3104730 +| epoch 10 | 1179/ 5600 batches | train loss 0.3392663 +| epoch 10 | 1183/ 5600 batches | train loss 0.3064248 +| epoch 10 | 1187/ 5600 batches | train loss 0.2683277 +| epoch 10 | 1191/ 5600 batches | train loss 0.3336470 +| epoch 10 | 1195/ 5600 batches | train loss 0.2570260 +| epoch 10 | 1199/ 5600 batches | train loss 0.2722235 +| epoch 10 | 1203/ 5600 batches | train loss 0.2907575 +| epoch 10 | 1207/ 5600 batches | train loss 0.3486227 +| epoch 10 | 1211/ 5600 batches | train loss 0.3525791 +| epoch 10 | 1215/ 5600 batches | train loss 0.2760192 +| epoch 10 | 1219/ 5600 batches | train loss 0.2187133 +| epoch 10 | 1223/ 5600 batches | train loss 0.2978113 +| epoch 10 | 1227/ 5600 batches | train loss 0.2921869 +| epoch 10 | 1231/ 5600 batches | train loss 0.3100305 +| epoch 10 | 1235/ 5600 batches | train loss 0.3414024 +| epoch 10 | 1239/ 5600 batches | train loss 0.2086149 +| epoch 10 | 1243/ 5600 batches | train loss 0.3077401 +| epoch 10 | 1247/ 5600 batches | train loss 0.3398440 +| epoch 10 | 1251/ 5600 batches | train loss 0.3679080 +| epoch 10 | 1255/ 5600 batches | train loss 0.3336040 +| epoch 10 | 1259/ 5600 batches | train loss 0.3412030 +| epoch 10 | 1263/ 5600 batches | train loss 0.3244410 +| epoch 10 | 1267/ 5600 batches | train loss 0.3087515 +| epoch 10 | 1271/ 5600 batches | train loss 0.3073624 +| epoch 10 | 1275/ 5600 batches | train loss 0.3005473 +| epoch 10 | 1279/ 5600 batches | train loss 0.2968301 +| epoch 10 | 1283/ 5600 batches | train loss 0.2619462 +| epoch 10 | 1287/ 5600 batches | train loss 0.3275239 +| epoch 10 | 1291/ 5600 batches | train loss 0.2676658 +| epoch 10 | 1295/ 5600 batches | train loss 0.3118685 +| epoch 10 | 1299/ 5600 batches | train loss 0.3222568 +| epoch 10 | 1303/ 5600 batches | train loss 0.3275602 +| epoch 10 | 1307/ 5600 batches | train loss 0.3567061 +| epoch 10 | 1311/ 5600 batches | train loss 0.3515285 +| epoch 10 | 1315/ 5600 batches | train loss 0.3151629 +| epoch 10 | 1319/ 5600 batches | train loss 0.3222276 +| epoch 10 | 1323/ 5600 batches | train loss 0.3234113 +| epoch 10 | 1327/ 5600 batches | train loss 0.4050065 +| epoch 10 | 1331/ 5600 batches | train loss 0.2790964 +| epoch 10 | 1335/ 5600 batches | train loss 0.3388597 +| epoch 10 | 1339/ 5600 batches | train loss 0.3234500 +| epoch 10 | 1343/ 5600 batches | train loss 0.2898818 +| epoch 10 | 1347/ 5600 batches | train loss 0.3181681 +| epoch 10 | 1351/ 5600 batches | train loss 0.2894737 +| epoch 10 | 1355/ 5600 batches | train loss 0.3056962 +| epoch 10 | 1359/ 5600 batches | train loss 0.4084914 +| epoch 10 | 1363/ 5600 batches | train loss 0.3813853 +| epoch 10 | 1367/ 5600 batches | train loss 0.3280958 +| epoch 10 | 1371/ 5600 batches | train loss 0.3528019 +| epoch 10 | 1375/ 5600 batches | train loss 0.3337238 +| epoch 10 | 1379/ 5600 batches | train loss 0.2909625 +| epoch 10 | 1383/ 5600 batches | train loss 0.3089172 +| epoch 10 | 1387/ 5600 batches | train loss 0.2824474 +| epoch 10 | 1391/ 5600 batches | train loss 0.3149739 +| epoch 10 | 1395/ 5600 batches | train loss 0.3371516 +| epoch 10 | 1399/ 5600 batches | train loss 0.3373370 +| epoch 10 | 1403/ 5600 batches | train loss 0.3370386 +| epoch 10 | 1407/ 5600 batches | train loss 0.3188760 +| epoch 10 | 1411/ 5600 batches | train loss 0.2654336 +| epoch 10 | 1415/ 5600 batches | train loss 0.2893181 +| epoch 10 | 1419/ 5600 batches | train loss 0.3033449 +| epoch 10 | 1423/ 5600 batches | train loss 0.3307236 +| epoch 10 | 1427/ 5600 batches | train loss 0.3183122 +| epoch 10 | 1431/ 5600 batches | train loss 0.2227625 +| epoch 10 | 1435/ 5600 batches | train loss 0.3324463 +| epoch 10 | 1439/ 5600 batches | train loss 0.3407235 +| epoch 10 | 1443/ 5600 batches | train loss 0.2973205 +| epoch 10 | 1447/ 5600 batches | train loss 0.2874933 +| epoch 10 | 1451/ 5600 batches | train loss 0.3136025 +| epoch 10 | 1455/ 5600 batches | train loss 0.2900890 +| epoch 10 | 1459/ 5600 batches | train loss 0.2919103 +| epoch 10 | 1463/ 5600 batches | train loss 0.3588769 +| epoch 10 | 1467/ 5600 batches | train loss 0.3472552 +| epoch 10 | 1471/ 5600 batches | train loss 0.3007592 +| epoch 10 | 1475/ 5600 batches | train loss 0.2586398 +| epoch 10 | 1479/ 5600 batches | train loss 0.3138478 +| epoch 10 | 1483/ 5600 batches | train loss 0.3173352 +| epoch 10 | 1487/ 5600 batches | train loss 0.2739725 +| epoch 10 | 1491/ 5600 batches | train loss 0.2602276 +| epoch 10 | 1495/ 5600 batches | train loss 0.3044183 +| epoch 10 | 1499/ 5600 batches | train loss 0.3156910 +| epoch 10 | 1503/ 5600 batches | train loss 0.2778501 +| epoch 10 | 1507/ 5600 batches | train loss 0.3080355 +| epoch 10 | 1511/ 5600 batches | train loss 0.3464761 +| epoch 10 | 1515/ 5600 batches | train loss 0.2498649 +| epoch 10 | 1519/ 5600 batches | train loss 0.2718076 +| epoch 10 | 1523/ 5600 batches | train loss 0.3024672 +| epoch 10 | 1527/ 5600 batches | train loss 0.2710165 +| epoch 10 | 1531/ 5600 batches | train loss 0.3358989 +| epoch 10 | 1535/ 5600 batches | train loss 0.3571826 +| epoch 10 | 1539/ 5600 batches | train loss 0.2734669 +| epoch 10 | 1543/ 5600 batches | train loss 0.2970909 +| epoch 10 | 1547/ 5600 batches | train loss 0.3319638 +| epoch 10 | 1551/ 5600 batches | train loss 0.1953603 +| epoch 10 | 1555/ 5600 batches | train loss 0.3032435 +| epoch 10 | 1559/ 5600 batches | train loss 0.2988441 +| epoch 10 | 1563/ 5600 batches | train loss 0.2041882 +| epoch 10 | 1567/ 5600 batches | train loss 0.4214413 +| epoch 10 | 1571/ 5600 batches | train loss 0.2815655 +| epoch 10 | 1575/ 5600 batches | train loss 0.3809017 +| epoch 10 | 1579/ 5600 batches | train loss 0.2902609 +| epoch 10 | 1583/ 5600 batches | train loss 0.3506245 +| epoch 10 | 1587/ 5600 batches | train loss 0.3365829 +| epoch 10 | 1591/ 5600 batches | train loss 0.2966820 +| epoch 10 | 1595/ 5600 batches | train loss 0.3345074 +| epoch 10 | 1599/ 5600 batches | train loss 0.2741196 +| epoch 10 | 1603/ 5600 batches | train loss 0.3420714 +| epoch 10 | 1607/ 5600 batches | train loss 0.3398457 +| epoch 10 | 1611/ 5600 batches | train loss 0.2767025 +| epoch 10 | 1615/ 5600 batches | train loss 0.3238928 +| epoch 10 | 1619/ 5600 batches | train loss 0.3365164 +| epoch 10 | 1623/ 5600 batches | train loss 0.3052951 +| epoch 10 | 1627/ 5600 batches | train loss 0.3322856 +| epoch 10 | 1631/ 5600 batches | train loss 0.2905580 +| epoch 10 | 1635/ 5600 batches | train loss 0.1997450 +| epoch 10 | 1639/ 5600 batches | train loss 0.3188699 +| epoch 10 | 1643/ 5600 batches | train loss 0.3464962 +| epoch 10 | 1647/ 5600 batches | train loss 0.3025086 +| epoch 10 | 1651/ 5600 batches | train loss 0.3095609 +| epoch 10 | 1655/ 5600 batches | train loss 0.3229186 +| epoch 10 | 1659/ 5600 batches | train loss 0.2754916 +| epoch 10 | 1663/ 5600 batches | train loss 0.3022259 +| epoch 10 | 1667/ 5600 batches | train loss 0.3060918 +| epoch 10 | 1671/ 5600 batches | train loss 0.3279894 +| epoch 10 | 1675/ 5600 batches | train loss 0.2957807 +| epoch 10 | 1679/ 5600 batches | train loss 0.2956363 +| epoch 10 | 1683/ 5600 batches | train loss 0.2895433 +| epoch 10 | 1687/ 5600 batches | train loss 0.2969447 +| epoch 10 | 1691/ 5600 batches | train loss 0.3392294 +| epoch 10 | 1695/ 5600 batches | train loss 0.3051691 +| epoch 10 | 1699/ 5600 batches | train loss 0.2911144 +| epoch 10 | 1703/ 5600 batches | train loss 0.3246649 +| epoch 10 | 1707/ 5600 batches | train loss 0.3773484 +| epoch 10 | 1711/ 5600 batches | train loss 0.2053043 +| epoch 10 | 1715/ 5600 batches | train loss 0.3110982 +| epoch 10 | 1719/ 5600 batches | train loss 0.3329455 +| epoch 10 | 1723/ 5600 batches | train loss 0.2728792 +| epoch 10 | 1727/ 5600 batches | train loss 0.3138047 +| epoch 10 | 1731/ 5600 batches | train loss 0.2854644 +| epoch 10 | 1735/ 5600 batches | train loss 0.3640039 +| epoch 10 | 1739/ 5600 batches | train loss 0.2652688 +| epoch 10 | 1743/ 5600 batches | train loss 0.2913547 +| epoch 10 | 1747/ 5600 batches | train loss 0.2754109 +| epoch 10 | 1751/ 5600 batches | train loss 0.3464498 +| epoch 10 | 1755/ 5600 batches | train loss 0.2658572 +| epoch 10 | 1759/ 5600 batches | train loss 0.2858093 +| epoch 10 | 1763/ 5600 batches | train loss 0.2598305 +| epoch 10 | 1767/ 5600 batches | train loss 0.3695834 +| epoch 10 | 1771/ 5600 batches | train loss 0.2210595 +| epoch 10 | 1775/ 5600 batches | train loss 0.2908285 +| epoch 10 | 1779/ 5600 batches | train loss 0.3201103 +| epoch 10 | 1783/ 5600 batches | train loss 0.3090689 +| epoch 10 | 1787/ 5600 batches | train loss 0.3220775 +| epoch 10 | 1791/ 5600 batches | train loss 0.2863144 +| epoch 10 | 1795/ 5600 batches | train loss 0.3080067 +| epoch 10 | 1799/ 5600 batches | train loss 0.3436896 +| epoch 10 | 1803/ 5600 batches | train loss 0.2694975 +| epoch 10 | 1807/ 5600 batches | train loss 0.3136636 +| epoch 10 | 1811/ 5600 batches | train loss 0.2869307 +| epoch 10 | 1815/ 5600 batches | train loss 0.3168345 +| epoch 10 | 1819/ 5600 batches | train loss 0.3555728 +| epoch 10 | 1823/ 5600 batches | train loss 0.3417972 +| epoch 10 | 1827/ 5600 batches | train loss 0.3498337 +| epoch 10 | 1831/ 5600 batches | train loss 0.2705545 +| epoch 10 | 1835/ 5600 batches | train loss 0.2635260 +| epoch 10 | 1839/ 5600 batches | train loss 0.2681521 +| epoch 10 | 1843/ 5600 batches | train loss 0.3411428 +| epoch 10 | 1847/ 5600 batches | train loss 0.3433580 +| epoch 10 | 1851/ 5600 batches | train loss 0.3251092 +| epoch 10 | 1855/ 5600 batches | train loss 0.3207682 +| epoch 10 | 1859/ 5600 batches | train loss 0.4041991 +| epoch 10 | 1863/ 5600 batches | train loss 0.2985148 +| epoch 10 | 1867/ 5600 batches | train loss 0.3199724 +| epoch 10 | 1871/ 5600 batches | train loss 0.2923584 +| epoch 10 | 1875/ 5600 batches | train loss 0.3298718 +| epoch 10 | 1879/ 5600 batches | train loss 0.3613380 +| epoch 10 | 1883/ 5600 batches | train loss 0.2540658 +| epoch 10 | 1887/ 5600 batches | train loss 0.3403518 +| epoch 10 | 1891/ 5600 batches | train loss 0.2732794 +| epoch 10 | 1895/ 5600 batches | train loss 0.3083057 +| epoch 10 | 1899/ 5600 batches | train loss 0.3393360 +| epoch 10 | 1903/ 5600 batches | train loss 0.3445317 +| epoch 10 | 1907/ 5600 batches | train loss 0.3288095 +| epoch 10 | 1911/ 5600 batches | train loss 0.3054060 +| epoch 10 | 1915/ 5600 batches | train loss 0.3760018 +| epoch 10 | 1919/ 5600 batches | train loss 0.2784305 +| epoch 10 | 1923/ 5600 batches | train loss 0.2911924 +| epoch 10 | 1927/ 5600 batches | train loss 0.3544079 +| epoch 10 | 1931/ 5600 batches | train loss 0.3016616 +| epoch 10 | 1935/ 5600 batches | train loss 0.2752866 +| epoch 10 | 1939/ 5600 batches | train loss 0.2765911 +| epoch 10 | 1943/ 5600 batches | train loss 0.3950471 +| epoch 10 | 1947/ 5600 batches | train loss 0.2942735 +| epoch 10 | 1951/ 5600 batches | train loss 0.2418407 +| epoch 10 | 1955/ 5600 batches | train loss 0.2979925 +| epoch 10 | 1959/ 5600 batches | train loss 0.2708561 +| epoch 10 | 1963/ 5600 batches | train loss 0.3357588 +| epoch 10 | 1967/ 5600 batches | train loss 0.3065155 +| epoch 10 | 1971/ 5600 batches | train loss 0.3994361 +| epoch 10 | 1975/ 5600 batches | train loss 0.2926959 +| epoch 10 | 1979/ 5600 batches | train loss 0.3345121 +| epoch 10 | 1983/ 5600 batches | train loss 0.3111459 +| epoch 10 | 1987/ 5600 batches | train loss 0.2568990 +| epoch 10 | 1991/ 5600 batches | train loss 0.2585257 +| epoch 10 | 1995/ 5600 batches | train loss 0.3024037 +| epoch 10 | 1999/ 5600 batches | train loss 0.2895074 +| epoch 10 | 2003/ 5600 batches | train loss 0.3478640 +| epoch 10 | 2007/ 5600 batches | train loss 0.3690517 +| epoch 10 | 2011/ 5600 batches | train loss 0.2987564 +| epoch 10 | 2015/ 5600 batches | train loss 0.3063065 +| epoch 10 | 2019/ 5600 batches | train loss 0.2910497 +| epoch 10 | 2023/ 5600 batches | train loss 0.3103346 +| epoch 10 | 2027/ 5600 batches | train loss 0.3221285 +| epoch 10 | 2031/ 5600 batches | train loss 0.3147730 +| epoch 10 | 2035/ 5600 batches | train loss 0.3391950 +| epoch 10 | 2039/ 5600 batches | train loss 0.3334373 +| epoch 10 | 2043/ 5600 batches | train loss 0.3057625 +| epoch 10 | 2047/ 5600 batches | train loss 0.3061262 +| epoch 10 | 2051/ 5600 batches | train loss 0.3260123 +| epoch 10 | 2055/ 5600 batches | train loss 0.3271772 +| epoch 10 | 2059/ 5600 batches | train loss 0.2927352 +| epoch 10 | 2063/ 5600 batches | train loss 0.2804473 +| epoch 10 | 2067/ 5600 batches | train loss 0.3262571 +| epoch 10 | 2071/ 5600 batches | train loss 0.3230378 +| epoch 10 | 2075/ 5600 batches | train loss 0.3030937 +| epoch 10 | 2079/ 5600 batches | train loss 0.2599157 +| epoch 10 | 2083/ 5600 batches | train loss 0.3500582 +| epoch 10 | 2087/ 5600 batches | train loss 0.3311166 +| epoch 10 | 2091/ 5600 batches | train loss 0.2790633 +| epoch 10 | 2095/ 5600 batches | train loss 0.3190032 +| epoch 10 | 2099/ 5600 batches | train loss 0.3203100 +| epoch 10 | 2103/ 5600 batches | train loss 0.3154393 +| epoch 10 | 2107/ 5600 batches | train loss 0.2993318 +| epoch 10 | 2111/ 5600 batches | train loss 0.1312117 +| epoch 10 | 2115/ 5600 batches | train loss 0.3328628 +| epoch 10 | 2119/ 5600 batches | train loss 0.3560029 +| epoch 10 | 2123/ 5600 batches | train loss 0.2923379 +| epoch 10 | 2127/ 5600 batches | train loss 0.2906690 +| epoch 10 | 2131/ 5600 batches | train loss 0.2980629 +| epoch 10 | 2135/ 5600 batches | train loss 0.3333487 +| epoch 10 | 2139/ 5600 batches | train loss 0.3290865 +| epoch 10 | 2143/ 5600 batches | train loss 0.3172275 +| epoch 10 | 2147/ 5600 batches | train loss 0.2959843 +| epoch 10 | 2151/ 5600 batches | train loss 0.2785387 +| epoch 10 | 2155/ 5600 batches | train loss 0.3965253 +| epoch 10 | 2159/ 5600 batches | train loss 0.3044537 +| epoch 10 | 2163/ 5600 batches | train loss 0.3025416 +| epoch 10 | 2167/ 5600 batches | train loss 0.3250064 +| epoch 10 | 2171/ 5600 batches | train loss 0.3271866 +| epoch 10 | 2175/ 5600 batches | train loss 0.2775526 +| epoch 10 | 2179/ 5600 batches | train loss 0.2909922 +| epoch 10 | 2183/ 5600 batches | train loss 0.2911788 +| epoch 10 | 2187/ 5600 batches | train loss 0.3402041 +| epoch 10 | 2191/ 5600 batches | train loss 0.3682185 +| epoch 10 | 2195/ 5600 batches | train loss 0.3359013 +| epoch 10 | 2199/ 5600 batches | train loss 0.3356743 +| epoch 10 | 2203/ 5600 batches | train loss 0.2685342 +| epoch 10 | 2207/ 5600 batches | train loss 0.3285192 +| epoch 10 | 2211/ 5600 batches | train loss 0.2907922 +| epoch 10 | 2215/ 5600 batches | train loss 0.3001098 +| epoch 10 | 2219/ 5600 batches | train loss 0.2984049 +| epoch 10 | 2223/ 5600 batches | train loss 0.2787472 +| epoch 10 | 2227/ 5600 batches | train loss 0.3079602 +| epoch 10 | 2231/ 5600 batches | train loss 0.2408235 +| epoch 10 | 2235/ 5600 batches | train loss 0.3451998 +| epoch 10 | 2239/ 5600 batches | train loss 0.3542273 +| epoch 10 | 2243/ 5600 batches | train loss 0.2922916 +| epoch 10 | 2247/ 5600 batches | train loss 0.2396129 +| epoch 10 | 2251/ 5600 batches | train loss 0.3018926 +| epoch 10 | 2255/ 5600 batches | train loss 0.2628441 +| epoch 10 | 2259/ 5600 batches | train loss 0.2708268 +| epoch 10 | 2263/ 5600 batches | train loss 0.3687176 +| epoch 10 | 2267/ 5600 batches | train loss 0.3709381 +| epoch 10 | 2271/ 5600 batches | train loss 0.2608633 +| epoch 10 | 2275/ 5600 batches | train loss 0.2760819 +| epoch 10 | 2279/ 5600 batches | train loss 0.3165329 +| epoch 10 | 2283/ 5600 batches | train loss 0.3645828 +| epoch 10 | 2287/ 5600 batches | train loss 0.3698687 +| epoch 10 | 2291/ 5600 batches | train loss 0.1234804 +| epoch 10 | 2295/ 5600 batches | train loss 0.3418251 +| epoch 10 | 2299/ 5600 batches | train loss 0.3637352 +| epoch 10 | 2303/ 5600 batches | train loss 0.2546741 +| epoch 10 | 2307/ 5600 batches | train loss 0.2671186 +| epoch 10 | 2311/ 5600 batches | train loss 0.3572165 +| epoch 10 | 2315/ 5600 batches | train loss 0.3051847 +| epoch 10 | 2319/ 5600 batches | train loss 0.2792710 +| epoch 10 | 2323/ 5600 batches | train loss 0.3181608 +| epoch 10 | 2327/ 5600 batches | train loss 0.3012964 +| epoch 10 | 2331/ 5600 batches | train loss 0.2622758 +| epoch 10 | 2335/ 5600 batches | train loss 0.3441433 +| epoch 10 | 2339/ 5600 batches | train loss 0.2813475 +| epoch 10 | 2343/ 5600 batches | train loss 0.1934881 +| epoch 10 | 2347/ 5600 batches | train loss 0.2835271 +| epoch 10 | 2351/ 5600 batches | train loss 0.3455550 +| epoch 10 | 2355/ 5600 batches | train loss 0.2965217 +| epoch 10 | 2359/ 5600 batches | train loss 0.2741801 +| epoch 10 | 2363/ 5600 batches | train loss 0.2975656 +| epoch 10 | 2367/ 5600 batches | train loss 0.2802471 +| epoch 10 | 2371/ 5600 batches | train loss 0.3434950 +| epoch 10 | 2375/ 5600 batches | train loss 0.3471551 +| epoch 10 | 2379/ 5600 batches | train loss 0.3381610 +| epoch 10 | 2383/ 5600 batches | train loss 0.3430779 +| epoch 10 | 2387/ 5600 batches | train loss 0.3454193 +| epoch 10 | 2391/ 5600 batches | train loss 0.3672718 +| epoch 10 | 2395/ 5600 batches | train loss 0.3013923 +| epoch 10 | 2399/ 5600 batches | train loss 0.2767980 +| epoch 10 | 2403/ 5600 batches | train loss 0.3083082 +| epoch 10 | 2407/ 5600 batches | train loss 0.3405567 +| epoch 10 | 2411/ 5600 batches | train loss 0.2960456 +| epoch 10 | 2415/ 5600 batches | train loss 0.2552305 +| epoch 10 | 2419/ 5600 batches | train loss 0.3006383 +| epoch 10 | 2423/ 5600 batches | train loss 0.2759455 +| epoch 10 | 2427/ 5600 batches | train loss 0.2925997 +| epoch 10 | 2431/ 5600 batches | train loss 0.3202208 +| epoch 10 | 2435/ 5600 batches | train loss 0.3072230 +| epoch 10 | 2439/ 5600 batches | train loss 0.3320827 +| epoch 10 | 2443/ 5600 batches | train loss 0.2898130 +| epoch 10 | 2447/ 5600 batches | train loss 0.3207472 +| epoch 10 | 2451/ 5600 batches | train loss 0.3167204 +| epoch 10 | 2455/ 5600 batches | train loss 0.2963911 +| epoch 10 | 2459/ 5600 batches | train loss 0.3078875 +| epoch 10 | 2463/ 5600 batches | train loss 0.2705429 +| epoch 10 | 2467/ 5600 batches | train loss 0.3705532 +| epoch 10 | 2471/ 5600 batches | train loss 0.3551176 +| epoch 10 | 2475/ 5600 batches | train loss 0.3596293 +| epoch 10 | 2479/ 5600 batches | train loss 0.3055102 +| epoch 10 | 2483/ 5600 batches | train loss 0.3256726 +| epoch 10 | 2487/ 5600 batches | train loss 0.2725546 +| epoch 10 | 2491/ 5600 batches | train loss 0.2987858 +| epoch 10 | 2495/ 5600 batches | train loss 0.2798346 +| epoch 10 | 2499/ 5600 batches | train loss 0.2875132 +| epoch 10 | 2503/ 5600 batches | train loss 0.4104258 +| epoch 10 | 2507/ 5600 batches | train loss 0.3572114 +| epoch 10 | 2511/ 5600 batches | train loss 0.2883162 +| epoch 10 | 2515/ 5600 batches | train loss 0.3702506 +| epoch 10 | 2519/ 5600 batches | train loss 0.3259897 +| epoch 10 | 2523/ 5600 batches | train loss 0.3314089 +| epoch 10 | 2527/ 5600 batches | train loss 0.3233702 +| epoch 10 | 2531/ 5600 batches | train loss 0.3502609 +| epoch 10 | 2535/ 5600 batches | train loss 0.2759821 +| epoch 10 | 2539/ 5600 batches | train loss 0.2651948 +| epoch 10 | 2543/ 5600 batches | train loss 0.3221775 +| epoch 10 | 2547/ 5600 batches | train loss 0.2798617 +| epoch 10 | 2551/ 5600 batches | train loss 0.3590522 +| epoch 10 | 2555/ 5600 batches | train loss 0.3131625 +| epoch 10 | 2559/ 5600 batches | train loss 0.2608924 +| epoch 10 | 2563/ 5600 batches | train loss 0.3416368 +| epoch 10 | 2567/ 5600 batches | train loss 0.3494634 +| epoch 10 | 2571/ 5600 batches | train loss 0.2943419 +| epoch 10 | 2575/ 5600 batches | train loss 0.3172254 +| epoch 10 | 2579/ 5600 batches | train loss 0.3635831 +| epoch 10 | 2583/ 5600 batches | train loss 0.3079424 +| epoch 10 | 2587/ 5600 batches | train loss 0.3447042 +| epoch 10 | 2591/ 5600 batches | train loss 0.3267096 +| epoch 10 | 2595/ 5600 batches | train loss 0.3467166 +| epoch 10 | 2599/ 5600 batches | train loss 0.3197247 +| epoch 10 | 2603/ 5600 batches | train loss 0.2809806 +| epoch 10 | 2607/ 5600 batches | train loss 0.3558241 +| epoch 10 | 2611/ 5600 batches | train loss 0.3300363 +| epoch 10 | 2615/ 5600 batches | train loss 0.2954871 +| epoch 10 | 2619/ 5600 batches | train loss 0.3441375 +| epoch 10 | 2623/ 5600 batches | train loss 0.2540244 +| epoch 10 | 2627/ 5600 batches | train loss 0.3267609 +| epoch 10 | 2631/ 5600 batches | train loss 0.3102556 +| epoch 10 | 2635/ 5600 batches | train loss 0.3860470 +| epoch 10 | 2639/ 5600 batches | train loss 0.2833729 +| epoch 10 | 2643/ 5600 batches | train loss 0.3152637 +| epoch 10 | 2647/ 5600 batches | train loss 0.2793021 +| epoch 10 | 2651/ 5600 batches | train loss 0.3224562 +| epoch 10 | 2655/ 5600 batches | train loss 0.3382747 +| epoch 10 | 2659/ 5600 batches | train loss 0.2817898 +| epoch 10 | 2663/ 5600 batches | train loss 0.3644149 +| epoch 10 | 2667/ 5600 batches | train loss 0.3809057 +| epoch 10 | 2671/ 5600 batches | train loss 0.2810422 +| epoch 10 | 2675/ 5600 batches | train loss 0.3510412 +| epoch 10 | 2679/ 5600 batches | train loss 0.3194992 +| epoch 10 | 2683/ 5600 batches | train loss 0.3410212 +| epoch 10 | 2687/ 5600 batches | train loss 0.3143065 +| epoch 10 | 2691/ 5600 batches | train loss 0.3266568 +| epoch 10 | 2695/ 5600 batches | train loss 0.2949364 +| epoch 10 | 2699/ 5600 batches | train loss 0.2973215 +| epoch 10 | 2703/ 5600 batches | train loss 0.3403857 +| epoch 10 | 2707/ 5600 batches | train loss 0.3208663 +| epoch 10 | 2711/ 5600 batches | train loss 0.2822933 +| epoch 10 | 2715/ 5600 batches | train loss 0.2699164 +| epoch 10 | 2719/ 5600 batches | train loss 0.2619407 +| epoch 10 | 2723/ 5600 batches | train loss 0.3185284 +| epoch 10 | 2727/ 5600 batches | train loss 0.3563688 +| epoch 10 | 2731/ 5600 batches | train loss 0.2564986 +| epoch 10 | 2735/ 5600 batches | train loss 0.3198374 +| epoch 10 | 2739/ 5600 batches | train loss 0.2769142 +| epoch 10 | 2743/ 5600 batches | train loss 0.3281114 +| epoch 10 | 2747/ 5600 batches | train loss 0.3543299 +| epoch 10 | 2751/ 5600 batches | train loss 0.3532682 +| epoch 10 | 2755/ 5600 batches | train loss 0.3084057 +| epoch 10 | 2759/ 5600 batches | train loss 0.2975326 +| epoch 10 | 2763/ 5600 batches | train loss 0.2657912 +| epoch 10 | 2767/ 5600 batches | train loss 0.3789235 +| epoch 10 | 2771/ 5600 batches | train loss 0.2772292 +| epoch 10 | 2775/ 5600 batches | train loss 0.3079145 +| epoch 10 | 2779/ 5600 batches | train loss 0.2757718 +| epoch 10 | 2783/ 5600 batches | train loss 0.3784497 +| epoch 10 | 2787/ 5600 batches | train loss 0.3222501 +| epoch 10 | 2791/ 5600 batches | train loss 0.2889574 +| epoch 10 | 2795/ 5600 batches | train loss 0.3405929 +| epoch 10 | 2799/ 5600 batches | train loss 0.3419313 +| epoch 10 | 2803/ 5600 batches | train loss 0.3189213 +| epoch 10 | 2807/ 5600 batches | train loss 0.2434939 +| epoch 10 | 2811/ 5600 batches | train loss 0.3051106 +| epoch 10 | 2815/ 5600 batches | train loss 0.3381487 +| epoch 10 | 2819/ 5600 batches | train loss 0.3400270 +| epoch 10 | 2823/ 5600 batches | train loss 0.3189971 +| epoch 10 | 2827/ 5600 batches | train loss 0.3616310 +| epoch 10 | 2831/ 5600 batches | train loss 0.2760911 +| epoch 10 | 2835/ 5600 batches | train loss 0.3424720 +| epoch 10 | 2839/ 5600 batches | train loss 0.3386048 +| epoch 10 | 2843/ 5600 batches | train loss 0.3256475 +| epoch 10 | 2847/ 5600 batches | train loss 0.3506769 +| epoch 10 | 2851/ 5600 batches | train loss 0.3310975 +| epoch 10 | 2855/ 5600 batches | train loss 0.3024243 +| epoch 10 | 2859/ 5600 batches | train loss 0.3237729 +| epoch 10 | 2863/ 5600 batches | train loss 0.3325866 +| epoch 10 | 2867/ 5600 batches | train loss 0.3214731 +| epoch 10 | 2871/ 5600 batches | train loss 0.2828761 +| epoch 10 | 2875/ 5600 batches | train loss 0.3259478 +| epoch 10 | 2879/ 5600 batches | train loss 0.2785921 +| epoch 10 | 2883/ 5600 batches | train loss 0.3338617 +| epoch 10 | 2887/ 5600 batches | train loss 0.3436512 +| epoch 10 | 2891/ 5600 batches | train loss 0.2956316 +| epoch 10 | 2895/ 5600 batches | train loss 0.3267211 +| epoch 10 | 2899/ 5600 batches | train loss 0.2698742 +| epoch 10 | 2903/ 5600 batches | train loss 0.2694038 +| epoch 10 | 2907/ 5600 batches | train loss 0.3244310 +| epoch 10 | 2911/ 5600 batches | train loss 0.2901547 +| epoch 10 | 2915/ 5600 batches | train loss 0.3342708 +| epoch 10 | 2919/ 5600 batches | train loss 0.2764438 +| epoch 10 | 2923/ 5600 batches | train loss 0.3303125 +| epoch 10 | 2927/ 5600 batches | train loss 0.2968974 +| epoch 10 | 2931/ 5600 batches | train loss 0.3105932 +| epoch 10 | 2935/ 5600 batches | train loss 0.2817708 +| epoch 10 | 2939/ 5600 batches | train loss 0.2935009 +| epoch 10 | 2943/ 5600 batches | train loss 0.3051860 +| epoch 10 | 2947/ 5600 batches | train loss 0.3982915 +| epoch 10 | 2951/ 5600 batches | train loss 0.3830520 +| epoch 10 | 2955/ 5600 batches | train loss 0.2691206 +| epoch 10 | 2959/ 5600 batches | train loss 0.3226614 +| epoch 10 | 2963/ 5600 batches | train loss 0.3017375 +| epoch 10 | 2967/ 5600 batches | train loss 0.2631527 +| epoch 10 | 2971/ 5600 batches | train loss 0.2744299 +| epoch 10 | 2975/ 5600 batches | train loss 0.3804856 +| epoch 10 | 2979/ 5600 batches | train loss 0.3100441 +| epoch 10 | 2983/ 5600 batches | train loss 0.3236460 +| epoch 10 | 2987/ 5600 batches | train loss 0.3006011 +| epoch 10 | 2991/ 5600 batches | train loss 0.3670548 +| epoch 10 | 2995/ 5600 batches | train loss 0.2736015 +| epoch 10 | 2999/ 5600 batches | train loss 0.3557203 +| epoch 10 | 3003/ 5600 batches | train loss 0.2750204 +| epoch 10 | 3007/ 5600 batches | train loss 0.2631627 +| epoch 10 | 3011/ 5600 batches | train loss 0.2875522 +| epoch 10 | 3015/ 5600 batches | train loss 0.3147594 +| epoch 10 | 3019/ 5600 batches | train loss 0.3523787 +| epoch 10 | 3023/ 5600 batches | train loss 0.3351927 +| epoch 10 | 3027/ 5600 batches | train loss 0.3050801 +| epoch 10 | 3031/ 5600 batches | train loss 0.2952471 +| epoch 10 | 3035/ 5600 batches | train loss 0.3182528 +| epoch 10 | 3039/ 5600 batches | train loss 0.3623152 +| epoch 10 | 3043/ 5600 batches | train loss 0.3299494 +| epoch 10 | 3047/ 5600 batches | train loss 0.2865980 +| epoch 10 | 3051/ 5600 batches | train loss 0.4104228 +| epoch 10 | 3055/ 5600 batches | train loss 0.3726645 +| epoch 10 | 3059/ 5600 batches | train loss 0.3426777 +| epoch 10 | 3063/ 5600 batches | train loss 0.3058411 +| epoch 10 | 3067/ 5600 batches | train loss 0.3214279 +| epoch 10 | 3071/ 5600 batches | train loss 0.1981719 +| epoch 10 | 3075/ 5600 batches | train loss 0.3096253 +| epoch 10 | 3079/ 5600 batches | train loss 0.2952479 +| epoch 10 | 3083/ 5600 batches | train loss 0.3036693 +| epoch 10 | 3087/ 5600 batches | train loss 0.2719876 +| epoch 10 | 3091/ 5600 batches | train loss 0.3398945 +| epoch 10 | 3095/ 5600 batches | train loss 0.3456850 +| epoch 10 | 3099/ 5600 batches | train loss 0.3461891 +| epoch 10 | 3103/ 5600 batches | train loss 0.3666238 +| epoch 10 | 3107/ 5600 batches | train loss 0.3109182 +| epoch 10 | 3111/ 5600 batches | train loss 0.3316648 +| epoch 10 | 3115/ 5600 batches | train loss 0.2957920 +| epoch 10 | 3119/ 5600 batches | train loss 0.3068527 +| epoch 10 | 3123/ 5600 batches | train loss 0.3446544 +| epoch 10 | 3127/ 5600 batches | train loss 0.3339809 +| epoch 10 | 3131/ 5600 batches | train loss 0.3420767 +| epoch 10 | 3135/ 5600 batches | train loss 0.3409847 +| epoch 10 | 3139/ 5600 batches | train loss 0.2440019 +| epoch 10 | 3143/ 5600 batches | train loss 0.3193098 +| epoch 10 | 3147/ 5600 batches | train loss 0.3374562 +| epoch 10 | 3151/ 5600 batches | train loss 0.3072423 +| epoch 10 | 3155/ 5600 batches | train loss 0.3344853 +| epoch 10 | 3159/ 5600 batches | train loss 0.2900292 +| epoch 10 | 3163/ 5600 batches | train loss 0.2952017 +| epoch 10 | 3167/ 5600 batches | train loss 0.2619732 +| epoch 10 | 3171/ 5600 batches | train loss 0.3191715 +| epoch 10 | 3175/ 5600 batches | train loss 0.2901334 +| epoch 10 | 3179/ 5600 batches | train loss 0.3090912 +| epoch 10 | 3183/ 5600 batches | train loss 0.2898481 +| epoch 10 | 3187/ 5600 batches | train loss 0.2748495 +| epoch 10 | 3191/ 5600 batches | train loss 0.3079139 +| epoch 10 | 3195/ 5600 batches | train loss 0.3854851 +| epoch 10 | 3199/ 5600 batches | train loss 0.2608379 +| epoch 10 | 3203/ 5600 batches | train loss 0.3063354 +| epoch 10 | 3207/ 5600 batches | train loss 0.3134129 +| epoch 10 | 3211/ 5600 batches | train loss 0.2878901 +| epoch 10 | 3215/ 5600 batches | train loss 0.3439746 +| epoch 10 | 3219/ 5600 batches | train loss 0.2956325 +| epoch 10 | 3223/ 5600 batches | train loss 0.3514757 +| epoch 10 | 3227/ 5600 batches | train loss 0.3595343 +| epoch 10 | 3231/ 5600 batches | train loss 0.3429114 +| epoch 10 | 3235/ 5600 batches | train loss 0.2645700 +| epoch 10 | 3239/ 5600 batches | train loss 0.2547386 +| epoch 10 | 3243/ 5600 batches | train loss 0.3050367 +| epoch 10 | 3247/ 5600 batches | train loss 0.3499412 +| epoch 10 | 3251/ 5600 batches | train loss 0.3062236 +| epoch 10 | 3255/ 5600 batches | train loss 0.3141401 +| epoch 10 | 3259/ 5600 batches | train loss 0.3052295 +| epoch 10 | 3263/ 5600 batches | train loss 0.2682973 +| epoch 10 | 3267/ 5600 batches | train loss 0.3200594 +| epoch 10 | 3271/ 5600 batches | train loss 0.3494347 +| epoch 10 | 3275/ 5600 batches | train loss 0.3286164 +| epoch 10 | 3279/ 5600 batches | train loss 0.2534019 +| epoch 10 | 3283/ 5600 batches | train loss 0.2786432 +| epoch 10 | 3287/ 5600 batches | train loss 0.2939847 +| epoch 10 | 3291/ 5600 batches | train loss 0.2956228 +| epoch 10 | 3295/ 5600 batches | train loss 0.3463294 +| epoch 10 | 3299/ 5600 batches | train loss 0.3416547 +| epoch 10 | 3303/ 5600 batches | train loss 0.3105881 +| epoch 10 | 3307/ 5600 batches | train loss 0.3277465 +| epoch 10 | 3311/ 5600 batches | train loss 0.2951348 +| epoch 10 | 3315/ 5600 batches | train loss 0.2876981 +| epoch 10 | 3319/ 5600 batches | train loss 0.2647120 +| epoch 10 | 3323/ 5600 batches | train loss 0.3119753 +| epoch 10 | 3327/ 5600 batches | train loss 0.2963843 +| epoch 10 | 3331/ 5600 batches | train loss 0.3179044 +| epoch 10 | 3335/ 5600 batches | train loss 0.2724929 +| epoch 10 | 3339/ 5600 batches | train loss 0.3263400 +| epoch 10 | 3343/ 5600 batches | train loss 0.3457631 +| epoch 10 | 3347/ 5600 batches | train loss 0.2808431 +| epoch 10 | 3351/ 5600 batches | train loss 0.3150891 +| epoch 10 | 3355/ 5600 batches | train loss 0.3477886 +| epoch 10 | 3359/ 5600 batches | train loss 0.2915127 +| epoch 10 | 3363/ 5600 batches | train loss 0.2722787 +| epoch 10 | 3367/ 5600 batches | train loss 0.3287411 +| epoch 10 | 3371/ 5600 batches | train loss 0.3324768 +| epoch 10 | 3375/ 5600 batches | train loss 0.3538915 +| epoch 10 | 3379/ 5600 batches | train loss 0.2774565 +| epoch 10 | 3383/ 5600 batches | train loss 0.3578228 +| epoch 10 | 3387/ 5600 batches | train loss 0.2965472 +| epoch 10 | 3391/ 5600 batches | train loss 0.2834440 +| epoch 10 | 3395/ 5600 batches | train loss 0.3127819 +| epoch 10 | 3399/ 5600 batches | train loss 0.2958563 +| epoch 10 | 3403/ 5600 batches | train loss 0.3051925 +| epoch 10 | 3407/ 5600 batches | train loss 0.3079699 +| epoch 10 | 3411/ 5600 batches | train loss 0.2719737 +| epoch 10 | 3415/ 5600 batches | train loss 0.3216035 +| epoch 10 | 3419/ 5600 batches | train loss 0.2864692 +| epoch 10 | 3423/ 5600 batches | train loss 0.3465822 +| epoch 10 | 3427/ 5600 batches | train loss 0.2794322 +| epoch 10 | 3431/ 5600 batches | train loss 0.2604943 +| epoch 10 | 3435/ 5600 batches | train loss 0.3105999 +| epoch 10 | 3439/ 5600 batches | train loss 0.2851579 +| epoch 10 | 3443/ 5600 batches | train loss 0.3515099 +| epoch 10 | 3447/ 5600 batches | train loss 0.2840644 +| epoch 10 | 3451/ 5600 batches | train loss 0.3402964 +| epoch 10 | 3455/ 5600 batches | train loss 0.2702043 +| epoch 10 | 3459/ 5600 batches | train loss 0.2845862 +| epoch 10 | 3463/ 5600 batches | train loss 0.3080357 +| epoch 10 | 3467/ 5600 batches | train loss 0.2616270 +| epoch 10 | 3471/ 5600 batches | train loss 0.3983693 +| epoch 10 | 3475/ 5600 batches | train loss 0.3184378 +| epoch 10 | 3479/ 5600 batches | train loss 0.3268411 +| epoch 10 | 3483/ 5600 batches | train loss 0.2860650 +| epoch 10 | 3487/ 5600 batches | train loss 0.3436074 +| epoch 10 | 3491/ 5600 batches | train loss 0.2773803 +| epoch 10 | 3495/ 5600 batches | train loss 0.3048878 +| epoch 10 | 3499/ 5600 batches | train loss 0.3356683 +| epoch 10 | 3503/ 5600 batches | train loss 0.3011691 +| epoch 10 | 3507/ 5600 batches | train loss 0.3258379 +| epoch 10 | 3511/ 5600 batches | train loss 0.3437331 +| epoch 10 | 3515/ 5600 batches | train loss 0.2912241 +| epoch 10 | 3519/ 5600 batches | train loss 0.3521536 +| epoch 10 | 3523/ 5600 batches | train loss 0.3089233 +| epoch 10 | 3527/ 5600 batches | train loss 0.3766944 +| epoch 10 | 3531/ 5600 batches | train loss 0.3314056 +| epoch 10 | 3535/ 5600 batches | train loss 0.3203550 +| epoch 10 | 3539/ 5600 batches | train loss 0.3313056 +| epoch 10 | 3543/ 5600 batches | train loss 0.3095746 +| epoch 10 | 3547/ 5600 batches | train loss 0.3298703 +| epoch 10 | 3551/ 5600 batches | train loss 0.4017809 +| epoch 10 | 3555/ 5600 batches | train loss 0.3172810 +| epoch 10 | 3559/ 5600 batches | train loss 0.2852749 +| epoch 10 | 3563/ 5600 batches | train loss 0.2845725 +| epoch 10 | 3567/ 5600 batches | train loss 0.2898206 +| epoch 10 | 3571/ 5600 batches | train loss 0.4042812 +| epoch 10 | 3575/ 5600 batches | train loss 0.3314964 +| epoch 10 | 3579/ 5600 batches | train loss 0.2969216 +| epoch 10 | 3583/ 5600 batches | train loss 0.3439918 +| epoch 10 | 3587/ 5600 batches | train loss 0.2750243 +| epoch 10 | 3591/ 5600 batches | train loss 0.3181689 +| epoch 10 | 3595/ 5600 batches | train loss 0.2875724 +| epoch 10 | 3599/ 5600 batches | train loss 0.2932538 +| epoch 10 | 3603/ 5600 batches | train loss 0.3042957 +| epoch 10 | 3607/ 5600 batches | train loss 0.3858877 +| epoch 10 | 3611/ 5600 batches | train loss 0.3139938 +| epoch 10 | 3615/ 5600 batches | train loss 0.3081255 +| epoch 10 | 3619/ 5600 batches | train loss 0.2978117 +| epoch 10 | 3623/ 5600 batches | train loss 0.2859749 +| epoch 10 | 3627/ 5600 batches | train loss 0.2699304 +| epoch 10 | 3631/ 5600 batches | train loss 0.2965057 +| epoch 10 | 3635/ 5600 batches | train loss 0.3391671 +| epoch 10 | 3639/ 5600 batches | train loss 0.3183763 +| epoch 10 | 3643/ 5600 batches | train loss 0.2775425 +| epoch 10 | 3647/ 5600 batches | train loss 0.3211472 +| epoch 10 | 3651/ 5600 batches | train loss 0.2863312 +| epoch 10 | 3655/ 5600 batches | train loss 0.3013276 +| epoch 10 | 3659/ 5600 batches | train loss 0.3173192 +| epoch 10 | 3663/ 5600 batches | train loss 0.3628300 +| epoch 10 | 3667/ 5600 batches | train loss 0.2434768 +| epoch 10 | 3671/ 5600 batches | train loss 0.2764850 +| epoch 10 | 3675/ 5600 batches | train loss 0.2641454 +| epoch 10 | 3679/ 5600 batches | train loss 0.3317026 +| epoch 10 | 3683/ 5600 batches | train loss 0.2964315 +| epoch 10 | 3687/ 5600 batches | train loss 0.3345379 +| epoch 10 | 3691/ 5600 batches | train loss 0.3520898 +| epoch 10 | 3695/ 5600 batches | train loss 0.3120224 +| epoch 10 | 3699/ 5600 batches | train loss 0.2842479 +| epoch 10 | 3703/ 5600 batches | train loss 0.3039628 +| epoch 10 | 3707/ 5600 batches | train loss 0.2832176 +| epoch 10 | 3711/ 5600 batches | train loss 0.2788686 +| epoch 10 | 3715/ 5600 batches | train loss 0.3664577 +| epoch 10 | 3719/ 5600 batches | train loss 0.2536777 +| epoch 10 | 3723/ 5600 batches | train loss 0.3505946 +| epoch 10 | 3727/ 5600 batches | train loss 0.2668677 +| epoch 10 | 3731/ 5600 batches | train loss 0.3679816 +| epoch 10 | 3735/ 5600 batches | train loss 0.3339612 +| epoch 10 | 3739/ 5600 batches | train loss 0.2860390 +| epoch 10 | 3743/ 5600 batches | train loss 0.3080414 +| epoch 10 | 3747/ 5600 batches | train loss 0.3320458 +| epoch 10 | 3751/ 5600 batches | train loss 0.3422507 +| epoch 10 | 3755/ 5600 batches | train loss 0.3441171 +| epoch 10 | 3759/ 5600 batches | train loss 0.3173047 +| epoch 10 | 3763/ 5600 batches | train loss 0.3042133 +| epoch 10 | 3767/ 5600 batches | train loss 0.3379929 +| epoch 10 | 3771/ 5600 batches | train loss 0.3118900 +| epoch 10 | 3775/ 5600 batches | train loss 0.3258238 +| epoch 10 | 3779/ 5600 batches | train loss 0.3545789 +| epoch 10 | 3783/ 5600 batches | train loss 0.2824892 +| epoch 10 | 3787/ 5600 batches | train loss 0.3266639 +| epoch 10 | 3791/ 5600 batches | train loss 0.3446977 +| epoch 10 | 3795/ 5600 batches | train loss 0.3407322 +| epoch 10 | 3799/ 5600 batches | train loss 0.2880044 +| epoch 10 | 3803/ 5600 batches | train loss 0.2893397 +| epoch 10 | 3807/ 5600 batches | train loss 0.3421893 +| epoch 10 | 3811/ 5600 batches | train loss 0.2056449 +| epoch 10 | 3815/ 5600 batches | train loss 0.3428841 +| epoch 10 | 3819/ 5600 batches | train loss 0.3128551 +| epoch 10 | 3823/ 5600 batches | train loss 0.2422410 +| epoch 10 | 3827/ 5600 batches | train loss 0.3330428 +| epoch 10 | 3831/ 5600 batches | train loss 0.3279857 +| epoch 10 | 3835/ 5600 batches | train loss 0.3039463 +| epoch 10 | 3839/ 5600 batches | train loss 0.3520946 +| epoch 10 | 3843/ 5600 batches | train loss 0.2846726 +| epoch 10 | 3847/ 5600 batches | train loss 0.3513573 +| epoch 10 | 3851/ 5600 batches | train loss 0.3483149 +| epoch 10 | 3855/ 5600 batches | train loss 0.3147214 +| epoch 10 | 3859/ 5600 batches | train loss 0.1224084 +| epoch 10 | 3863/ 5600 batches | train loss 0.3322031 +| epoch 10 | 3867/ 5600 batches | train loss 0.2851300 +| epoch 10 | 3871/ 5600 batches | train loss 0.3020408 +| epoch 10 | 3875/ 5600 batches | train loss 0.2880321 +| epoch 10 | 3879/ 5600 batches | train loss 0.2876822 +| epoch 10 | 3883/ 5600 batches | train loss 0.3317865 +| epoch 10 | 3887/ 5600 batches | train loss 0.3583404 +| epoch 10 | 3891/ 5600 batches | train loss 0.3201112 +| epoch 10 | 3895/ 5600 batches | train loss 0.3049401 +| epoch 10 | 3899/ 5600 batches | train loss 0.3937098 +| epoch 10 | 3903/ 5600 batches | train loss 0.3213709 +| epoch 10 | 3907/ 5600 batches | train loss 0.2905191 +| epoch 10 | 3911/ 5600 batches | train loss 0.2972543 +| epoch 10 | 3915/ 5600 batches | train loss 0.2843281 +| epoch 10 | 3919/ 5600 batches | train loss 0.3004788 +| epoch 10 | 3923/ 5600 batches | train loss 0.2660767 +| epoch 10 | 3927/ 5600 batches | train loss 0.3357798 +| epoch 10 | 3931/ 5600 batches | train loss 0.3317565 +| epoch 10 | 3935/ 5600 batches | train loss 0.2936274 +| epoch 10 | 3939/ 5600 batches | train loss 0.2676083 +| epoch 10 | 3943/ 5600 batches | train loss 0.3308575 +| epoch 10 | 3947/ 5600 batches | train loss 0.2872431 +| epoch 10 | 3951/ 5600 batches | train loss 0.2853923 +| epoch 10 | 3955/ 5600 batches | train loss 0.2963695 +| epoch 10 | 3959/ 5600 batches | train loss 0.2963105 +| epoch 10 | 3963/ 5600 batches | train loss 0.3226492 +| epoch 10 | 3967/ 5600 batches | train loss 0.3107569 +| epoch 10 | 3971/ 5600 batches | train loss 0.3095905 +| epoch 10 | 3975/ 5600 batches | train loss 0.3129099 +| epoch 10 | 3979/ 5600 batches | train loss 0.2771667 +| epoch 10 | 3983/ 5600 batches | train loss 0.3080773 +| epoch 10 | 3987/ 5600 batches | train loss 0.3344752 +| epoch 10 | 3991/ 5600 batches | train loss 0.2783554 +| epoch 10 | 3995/ 5600 batches | train loss 0.2998624 +| epoch 10 | 3999/ 5600 batches | train loss 0.3175220 +| epoch 10 | 4003/ 5600 batches | train loss 0.3455241 +| epoch 10 | 4007/ 5600 batches | train loss 0.1972259 +| epoch 10 | 4011/ 5600 batches | train loss 0.2620879 +| epoch 10 | 4015/ 5600 batches | train loss 0.3202425 +| epoch 10 | 4019/ 5600 batches | train loss 0.2659591 +| epoch 10 | 4023/ 5600 batches | train loss 0.3120783 +| epoch 10 | 4027/ 5600 batches | train loss 0.2599078 +| epoch 10 | 4031/ 5600 batches | train loss 0.2728034 +| epoch 10 | 4035/ 5600 batches | train loss 0.3391724 +| epoch 10 | 4039/ 5600 batches | train loss 0.3340599 +| epoch 10 | 4043/ 5600 batches | train loss 0.3083452 +| epoch 10 | 4047/ 5600 batches | train loss 0.3336741 +| epoch 10 | 4051/ 5600 batches | train loss 0.3148596 +| epoch 10 | 4055/ 5600 batches | train loss 0.3763805 +| epoch 10 | 4059/ 5600 batches | train loss 0.2809576 +| epoch 10 | 4063/ 5600 batches | train loss 0.2589113 +| epoch 10 | 4067/ 5600 batches | train loss 0.3320417 +| epoch 10 | 4071/ 5600 batches | train loss 0.2994035 +| epoch 10 | 4075/ 5600 batches | train loss 0.3672637 +| epoch 10 | 4079/ 5600 batches | train loss 0.3346259 +| epoch 10 | 4083/ 5600 batches | train loss 0.3086470 +| epoch 10 | 4087/ 5600 batches | train loss 0.3314407 +| epoch 10 | 4091/ 5600 batches | train loss 0.2982857 +| epoch 10 | 4095/ 5600 batches | train loss 0.2051139 +| epoch 10 | 4099/ 5600 batches | train loss 0.2514234 +| epoch 10 | 4103/ 5600 batches | train loss 0.2775619 +| epoch 10 | 4107/ 5600 batches | train loss 0.2989178 +| epoch 10 | 4111/ 5600 batches | train loss 0.3371931 +| epoch 10 | 4115/ 5600 batches | train loss 0.2846061 +| epoch 10 | 4119/ 5600 batches | train loss 0.3748302 +| epoch 10 | 4123/ 5600 batches | train loss 0.2693058 +| epoch 10 | 4127/ 5600 batches | train loss 0.3520193 +| epoch 10 | 4131/ 5600 batches | train loss 0.3426249 +| epoch 10 | 4135/ 5600 batches | train loss 0.2692705 +| epoch 10 | 4139/ 5600 batches | train loss 0.2845411 +| epoch 10 | 4143/ 5600 batches | train loss 0.3283216 +| epoch 10 | 4147/ 5600 batches | train loss 0.2988833 +| epoch 10 | 4151/ 5600 batches | train loss 0.3459619 +| epoch 10 | 4155/ 5600 batches | train loss 0.3635748 +| epoch 10 | 4159/ 5600 batches | train loss 0.3308373 +| epoch 10 | 4163/ 5600 batches | train loss 0.3348523 +| epoch 10 | 4167/ 5600 batches | train loss 0.2228286 +| epoch 10 | 4171/ 5600 batches | train loss 0.3487652 +| epoch 10 | 4175/ 5600 batches | train loss 0.3226373 +| epoch 10 | 4179/ 5600 batches | train loss 0.3452891 +| epoch 10 | 4183/ 5600 batches | train loss 0.3272287 +| epoch 10 | 4187/ 5600 batches | train loss 0.3538638 +| epoch 10 | 4191/ 5600 batches | train loss 0.2741356 +| epoch 10 | 4195/ 5600 batches | train loss 0.2888254 +| epoch 10 | 4199/ 5600 batches | train loss 0.2967018 +| epoch 10 | 4203/ 5600 batches | train loss 0.3184299 +| epoch 10 | 4207/ 5600 batches | train loss 0.2545179 +| epoch 10 | 4211/ 5600 batches | train loss 0.2740650 +| epoch 10 | 4215/ 5600 batches | train loss 0.3752025 +| epoch 10 | 4219/ 5600 batches | train loss 0.3244512 +| epoch 10 | 4223/ 5600 batches | train loss 0.3047429 +| epoch 10 | 4227/ 5600 batches | train loss 0.3413028 +| epoch 10 | 4231/ 5600 batches | train loss 0.2683551 +| epoch 10 | 4235/ 5600 batches | train loss 0.2873908 +| epoch 10 | 4239/ 5600 batches | train loss 0.3306086 +| epoch 10 | 4243/ 5600 batches | train loss 0.2700206 +| epoch 10 | 4247/ 5600 batches | train loss 0.3107603 +| epoch 10 | 4251/ 5600 batches | train loss 0.3004086 +| epoch 10 | 4255/ 5600 batches | train loss 0.3137395 +| epoch 10 | 4259/ 5600 batches | train loss 0.2923975 +| epoch 10 | 4263/ 5600 batches | train loss 0.3183025 +| epoch 10 | 4267/ 5600 batches | train loss 0.3075284 +| epoch 10 | 4271/ 5600 batches | train loss 0.2606648 +| epoch 10 | 4275/ 5600 batches | train loss 0.3285374 +| epoch 10 | 4279/ 5600 batches | train loss 0.2826564 +| epoch 10 | 4283/ 5600 batches | train loss 0.2543892 +| epoch 10 | 4287/ 5600 batches | train loss 0.3093181 +| epoch 10 | 4291/ 5600 batches | train loss 0.3769677 +| epoch 10 | 4295/ 5600 batches | train loss 0.3379362 +| epoch 10 | 4299/ 5600 batches | train loss 0.2619740 +| epoch 10 | 4303/ 5600 batches | train loss 0.2693145 +| epoch 10 | 4307/ 5600 batches | train loss 0.3389376 +| epoch 10 | 4311/ 5600 batches | train loss 0.3034020 +| epoch 10 | 4315/ 5600 batches | train loss 0.2781863 +| epoch 10 | 4319/ 5600 batches | train loss 0.3154366 +| epoch 10 | 4323/ 5600 batches | train loss 0.3358952 +| epoch 10 | 4327/ 5600 batches | train loss 0.3275204 +| epoch 10 | 4331/ 5600 batches | train loss 0.2730125 +| epoch 10 | 4335/ 5600 batches | train loss 0.3054405 +| epoch 10 | 4339/ 5600 batches | train loss 0.3183505 +| epoch 10 | 4343/ 5600 batches | train loss 0.3508405 +| epoch 10 | 4347/ 5600 batches | train loss 0.3137334 +| epoch 10 | 4351/ 5600 batches | train loss 0.3655868 +| epoch 10 | 4355/ 5600 batches | train loss 0.2092039 +| epoch 10 | 4359/ 5600 batches | train loss 0.3448507 +| epoch 10 | 4363/ 5600 batches | train loss 0.2892735 +| epoch 10 | 4367/ 5600 batches | train loss 0.3406394 +| epoch 10 | 4371/ 5600 batches | train loss 0.3345255 +| epoch 10 | 4375/ 5600 batches | train loss 0.3325253 +| epoch 10 | 4379/ 5600 batches | train loss 0.4004395 +| epoch 10 | 4383/ 5600 batches | train loss 0.2832998 +| epoch 10 | 4387/ 5600 batches | train loss 0.3432854 +| epoch 10 | 4391/ 5600 batches | train loss 0.3516141 +| epoch 10 | 4395/ 5600 batches | train loss 0.3024324 +| epoch 10 | 4399/ 5600 batches | train loss 0.3514172 +| epoch 10 | 4403/ 5600 batches | train loss 0.3886325 +| epoch 10 | 4407/ 5600 batches | train loss 0.3048163 +| epoch 10 | 4411/ 5600 batches | train loss 0.2896242 +| epoch 10 | 4415/ 5600 batches | train loss 0.2908752 +| epoch 10 | 4419/ 5600 batches | train loss 0.3119787 +| epoch 10 | 4423/ 5600 batches | train loss 0.2541279 +| epoch 10 | 4427/ 5600 batches | train loss 0.2788972 +| epoch 10 | 4431/ 5600 batches | train loss 0.3580005 +| epoch 10 | 4435/ 5600 batches | train loss 0.3127009 +| epoch 10 | 4439/ 5600 batches | train loss 0.3170959 +| epoch 10 | 4443/ 5600 batches | train loss 0.3536564 +| epoch 10 | 4447/ 5600 batches | train loss 0.3281442 +| epoch 10 | 4451/ 5600 batches | train loss 0.3510675 +| epoch 10 | 4455/ 5600 batches | train loss 0.2044684 +| epoch 10 | 4459/ 5600 batches | train loss 0.2812148 +| epoch 10 | 4463/ 5600 batches | train loss 0.3433163 +| epoch 10 | 4467/ 5600 batches | train loss 0.3366145 +| epoch 10 | 4471/ 5600 batches | train loss 0.3296795 +| epoch 10 | 4475/ 5600 batches | train loss 0.3209137 +| epoch 10 | 4479/ 5600 batches | train loss 0.2651725 +| epoch 10 | 4483/ 5600 batches | train loss 0.3651402 +| epoch 10 | 4487/ 5600 batches | train loss 0.3542839 +| epoch 10 | 4491/ 5600 batches | train loss 0.3277037 +| epoch 10 | 4495/ 5600 batches | train loss 0.2738660 +| epoch 10 | 4499/ 5600 batches | train loss 0.2756468 +| epoch 10 | 4503/ 5600 batches | train loss 0.3193144 +| epoch 10 | 4507/ 5600 batches | train loss 0.3282871 +| epoch 10 | 4511/ 5600 batches | train loss 0.2918375 +| epoch 10 | 4515/ 5600 batches | train loss 0.3513823 +| epoch 10 | 4519/ 5600 batches | train loss 0.3357485 +| epoch 10 | 4523/ 5600 batches | train loss 0.3199729 +| epoch 10 | 4527/ 5600 batches | train loss 0.2832163 +| epoch 10 | 4531/ 5600 batches | train loss 0.2604715 +| epoch 10 | 4535/ 5600 batches | train loss 0.3358647 +| epoch 10 | 4539/ 5600 batches | train loss 0.2754146 +| epoch 10 | 4543/ 5600 batches | train loss 0.2683591 +| epoch 10 | 4547/ 5600 batches | train loss 0.2830894 +| epoch 10 | 4551/ 5600 batches | train loss 0.3195298 +| epoch 10 | 4555/ 5600 batches | train loss 0.3334469 +| epoch 10 | 4559/ 5600 batches | train loss 0.4022177 +| epoch 10 | 4563/ 5600 batches | train loss 0.3233726 +| epoch 10 | 4567/ 5600 batches | train loss 0.2938215 +| epoch 10 | 4571/ 5600 batches | train loss 0.3705221 +| epoch 10 | 4575/ 5600 batches | train loss 0.3375356 +| epoch 10 | 4579/ 5600 batches | train loss 0.3770103 +| epoch 10 | 4583/ 5600 batches | train loss 0.2826312 +| epoch 10 | 4587/ 5600 batches | train loss 0.3129408 +| epoch 10 | 4591/ 5600 batches | train loss 0.2888286 +| epoch 10 | 4595/ 5600 batches | train loss 0.3700129 +| epoch 10 | 4599/ 5600 batches | train loss 0.2700943 +| epoch 10 | 4603/ 5600 batches | train loss 0.2851936 +| epoch 10 | 4607/ 5600 batches | train loss 0.2872156 +| epoch 10 | 4611/ 5600 batches | train loss 0.2646059 +| epoch 10 | 4615/ 5600 batches | train loss 0.3056064 +| epoch 10 | 4619/ 5600 batches | train loss 0.3750265 +| epoch 10 | 4623/ 5600 batches | train loss 0.2938563 +| epoch 10 | 4627/ 5600 batches | train loss 0.2780730 +| epoch 10 | 4631/ 5600 batches | train loss 0.3132148 +| epoch 10 | 4635/ 5600 batches | train loss 0.2856190 +| epoch 10 | 4639/ 5600 batches | train loss 0.2620364 +| epoch 10 | 4643/ 5600 batches | train loss 0.3430446 +| epoch 10 | 4647/ 5600 batches | train loss 0.2918970 +| epoch 10 | 4651/ 5600 batches | train loss 0.3182831 +| epoch 10 | 4655/ 5600 batches | train loss 0.2704770 +| epoch 10 | 4659/ 5600 batches | train loss 0.2897577 +| epoch 10 | 4663/ 5600 batches | train loss 0.2771356 +| epoch 10 | 4667/ 5600 batches | train loss 0.2924555 +| epoch 10 | 4671/ 5600 batches | train loss 0.4292431 +| epoch 10 | 4675/ 5600 batches | train loss 0.3437488 +| epoch 10 | 4679/ 5600 batches | train loss 0.3610199 +| epoch 10 | 4683/ 5600 batches | train loss 0.3716629 +| epoch 10 | 4687/ 5600 batches | train loss 0.3568152 +| epoch 10 | 4691/ 5600 batches | train loss 0.3019893 +| epoch 10 | 4695/ 5600 batches | train loss 0.3249794 +| epoch 10 | 4699/ 5600 batches | train loss 0.3405356 +| epoch 10 | 4703/ 5600 batches | train loss 0.3483683 +| epoch 10 | 4707/ 5600 batches | train loss 0.3679806 +| epoch 10 | 4711/ 5600 batches | train loss 0.4126782 +| epoch 10 | 4715/ 5600 batches | train loss 0.2947879 +| epoch 10 | 4719/ 5600 batches | train loss 0.3220912 +| epoch 10 | 4723/ 5600 batches | train loss 0.2707700 +| epoch 10 | 4727/ 5600 batches | train loss 0.3009861 +| epoch 10 | 4731/ 5600 batches | train loss 0.3085009 +| epoch 10 | 4735/ 5600 batches | train loss 0.2751698 +| epoch 10 | 4739/ 5600 batches | train loss 0.3441478 +| epoch 10 | 4743/ 5600 batches | train loss 0.3201413 +| epoch 10 | 4747/ 5600 batches | train loss 0.2872288 +| epoch 10 | 4751/ 5600 batches | train loss 0.3832070 +| epoch 10 | 4755/ 5600 batches | train loss 0.2962041 +| epoch 10 | 4759/ 5600 batches | train loss 0.2722533 +| epoch 10 | 4763/ 5600 batches | train loss 0.4099252 +| epoch 10 | 4767/ 5600 batches | train loss 0.2865685 +| epoch 10 | 4771/ 5600 batches | train loss 0.3039767 +| epoch 10 | 4775/ 5600 batches | train loss 0.3092154 +| epoch 10 | 4779/ 5600 batches | train loss 0.2934449 +| epoch 10 | 4783/ 5600 batches | train loss 0.3556784 +| epoch 10 | 4787/ 5600 batches | train loss 0.3166421 +| epoch 10 | 4791/ 5600 batches | train loss 0.2935864 +| epoch 10 | 4795/ 5600 batches | train loss 0.2668639 +| epoch 10 | 4799/ 5600 batches | train loss 0.3516954 +| epoch 10 | 4803/ 5600 batches | train loss 0.3182179 +| epoch 10 | 4807/ 5600 batches | train loss 0.3416685 +| epoch 10 | 4811/ 5600 batches | train loss 0.3364742 +| epoch 10 | 4815/ 5600 batches | train loss 0.2917608 +| epoch 10 | 4819/ 5600 batches | train loss 0.3786066 +| epoch 10 | 4823/ 5600 batches | train loss 0.2676912 +| epoch 10 | 4827/ 5600 batches | train loss 0.2652797 +| epoch 10 | 4831/ 5600 batches | train loss 0.2903552 +| epoch 10 | 4835/ 5600 batches | train loss 0.3680395 +| epoch 10 | 4839/ 5600 batches | train loss 0.3052137 +| epoch 10 | 4843/ 5600 batches | train loss 0.3639662 +| epoch 10 | 4847/ 5600 batches | train loss 0.3413863 +| epoch 10 | 4851/ 5600 batches | train loss 0.3485472 +| epoch 10 | 4855/ 5600 batches | train loss 0.3353548 +| epoch 10 | 4859/ 5600 batches | train loss 0.3053469 +| epoch 10 | 4863/ 5600 batches | train loss 0.3470344 +| epoch 10 | 4867/ 5600 batches | train loss 0.3205793 +| epoch 10 | 4871/ 5600 batches | train loss 0.4231577 +| epoch 10 | 4875/ 5600 batches | train loss 0.2835574 +| epoch 10 | 4879/ 5600 batches | train loss 0.2689282 +| epoch 10 | 4883/ 5600 batches | train loss 0.2812828 +| epoch 10 | 4887/ 5600 batches | train loss 0.2799109 +| epoch 10 | 4891/ 5600 batches | train loss 0.3134969 +| epoch 10 | 4895/ 5600 batches | train loss 0.3021784 +| epoch 10 | 4899/ 5600 batches | train loss 0.3002139 +| epoch 10 | 4903/ 5600 batches | train loss 0.2756273 +| epoch 10 | 4907/ 5600 batches | train loss 0.4087642 +| epoch 10 | 4911/ 5600 batches | train loss 0.3141756 +| epoch 10 | 4915/ 5600 batches | train loss 0.2907149 +| epoch 10 | 4919/ 5600 batches | train loss 0.2687030 +| epoch 10 | 4923/ 5600 batches | train loss 0.2438712 +| epoch 10 | 4927/ 5600 batches | train loss 0.2945180 +| epoch 10 | 4931/ 5600 batches | train loss 0.2906902 +| epoch 10 | 4935/ 5600 batches | train loss 0.2995231 +| epoch 10 | 4939/ 5600 batches | train loss 0.3070385 +| epoch 10 | 4943/ 5600 batches | train loss 0.3038825 +| epoch 10 | 4947/ 5600 batches | train loss 0.3279744 +| epoch 10 | 4951/ 5600 batches | train loss 0.3108385 +| epoch 10 | 4955/ 5600 batches | train loss 0.3202813 +| epoch 10 | 4959/ 5600 batches | train loss 0.2297594 +| epoch 10 | 4963/ 5600 batches | train loss 0.3279868 +| epoch 10 | 4967/ 5600 batches | train loss 0.3296885 +| epoch 10 | 4971/ 5600 batches | train loss 0.3134313 +| epoch 10 | 4975/ 5600 batches | train loss 0.3505904 +| epoch 10 | 4979/ 5600 batches | train loss 0.3311644 +| epoch 10 | 4983/ 5600 batches | train loss 0.3373832 +| epoch 10 | 4987/ 5600 batches | train loss 0.3514952 +| epoch 10 | 4991/ 5600 batches | train loss 0.2750347 +| epoch 10 | 4995/ 5600 batches | train loss 0.3448526 +| epoch 10 | 4999/ 5600 batches | train loss 0.3564920 +| epoch 10 | 5003/ 5600 batches | train loss 0.3682474 +| epoch 10 | 5007/ 5600 batches | train loss 0.2778021 +| epoch 10 | 5011/ 5600 batches | train loss 0.3437339 +| epoch 10 | 5015/ 5600 batches | train loss 0.3419338 +| epoch 10 | 5019/ 5600 batches | train loss 0.3087480 +| epoch 10 | 5023/ 5600 batches | train loss 0.3401236 +| epoch 10 | 5027/ 5600 batches | train loss 0.3401925 +| epoch 10 | 5031/ 5600 batches | train loss 0.3148796 +| epoch 10 | 5035/ 5600 batches | train loss 0.2587870 +| epoch 10 | 5039/ 5600 batches | train loss 0.1271835 +| epoch 10 | 5043/ 5600 batches | train loss 0.2809111 +| epoch 10 | 5047/ 5600 batches | train loss 0.3314084 +| epoch 10 | 5051/ 5600 batches | train loss 0.2948437 +| epoch 10 | 5055/ 5600 batches | train loss 0.3303866 +| epoch 10 | 5059/ 5600 batches | train loss 0.2833391 +| epoch 10 | 5063/ 5600 batches | train loss 0.3298908 +| epoch 10 | 5067/ 5600 batches | train loss 0.3368958 +| epoch 10 | 5071/ 5600 batches | train loss 0.3353882 +| epoch 10 | 5075/ 5600 batches | train loss 0.3437438 +| epoch 10 | 5079/ 5600 batches | train loss 0.3043924 +| epoch 10 | 5083/ 5600 batches | train loss 0.3913486 +| epoch 10 | 5087/ 5600 batches | train loss 0.3321606 +| epoch 10 | 5091/ 5600 batches | train loss 0.3053534 +| epoch 10 | 5095/ 5600 batches | train loss 0.2638527 +| epoch 10 | 5099/ 5600 batches | train loss 0.3292925 +| epoch 10 | 5103/ 5600 batches | train loss 0.3350325 +| epoch 10 | 5107/ 5600 batches | train loss 0.2830067 +| epoch 10 | 5111/ 5600 batches | train loss 0.3006036 +| epoch 10 | 5115/ 5600 batches | train loss 0.2794651 +| epoch 10 | 5119/ 5600 batches | train loss 0.3656254 +| epoch 10 | 5123/ 5600 batches | train loss 0.2883622 +| epoch 10 | 5127/ 5600 batches | train loss 0.3746692 +| epoch 10 | 5131/ 5600 batches | train loss 0.2953440 +| epoch 10 | 5135/ 5600 batches | train loss 0.3352548 +| epoch 10 | 5139/ 5600 batches | train loss 0.3206786 +| epoch 10 | 5143/ 5600 batches | train loss 0.3624165 +| epoch 10 | 5147/ 5600 batches | train loss 0.3295426 +| epoch 10 | 5151/ 5600 batches | train loss 0.2271138 +| epoch 10 | 5155/ 5600 batches | train loss 0.3207562 +| epoch 10 | 5159/ 5600 batches | train loss 0.3218697 +| epoch 10 | 5163/ 5600 batches | train loss 0.3395572 +| epoch 10 | 5167/ 5600 batches | train loss 0.2936158 +| epoch 10 | 5171/ 5600 batches | train loss 0.2735169 +| epoch 10 | 5175/ 5600 batches | train loss 0.3372921 +| epoch 10 | 5179/ 5600 batches | train loss 0.2983940 +| epoch 10 | 5183/ 5600 batches | train loss 0.2552047 +| epoch 10 | 5187/ 5600 batches | train loss 0.3345978 +| epoch 10 | 5191/ 5600 batches | train loss 0.2743803 +| epoch 10 | 5195/ 5600 batches | train loss 0.3084817 +| epoch 10 | 5199/ 5600 batches | train loss 0.2891074 +| epoch 10 | 5203/ 5600 batches | train loss 0.2791609 +| epoch 10 | 5207/ 5600 batches | train loss 0.3044064 +| epoch 10 | 5211/ 5600 batches | train loss 0.2447547 +| epoch 10 | 5215/ 5600 batches | train loss 0.3835853 +| epoch 10 | 5219/ 5600 batches | train loss 0.3016854 +| epoch 10 | 5223/ 5600 batches | train loss 0.3879364 +| epoch 10 | 5227/ 5600 batches | train loss 0.3048955 +| epoch 10 | 5231/ 5600 batches | train loss 0.3554663 +| epoch 10 | 5235/ 5600 batches | train loss 0.2959746 +| epoch 10 | 5239/ 5600 batches | train loss 0.3433813 +| epoch 10 | 5243/ 5600 batches | train loss 0.3344372 +| epoch 10 | 5247/ 5600 batches | train loss 0.3496529 +| epoch 10 | 5251/ 5600 batches | train loss 0.3108636 +| epoch 10 | 5255/ 5600 batches | train loss 0.2937451 +| epoch 10 | 5259/ 5600 batches | train loss 0.3280938 +| epoch 10 | 5263/ 5600 batches | train loss 0.3302952 +| epoch 10 | 5267/ 5600 batches | train loss 0.2696542 +| epoch 10 | 5271/ 5600 batches | train loss 0.3156997 +| epoch 10 | 5275/ 5600 batches | train loss 0.3675094 +| epoch 10 | 5279/ 5600 batches | train loss 0.2706309 +| epoch 10 | 5283/ 5600 batches | train loss 0.3465190 +| epoch 10 | 5287/ 5600 batches | train loss 0.3471585 +| epoch 10 | 5291/ 5600 batches | train loss 0.2961559 +| epoch 10 | 5295/ 5600 batches | train loss 0.3409360 +| epoch 10 | 5299/ 5600 batches | train loss 0.2769964 +| epoch 10 | 5303/ 5600 batches | train loss 0.3065387 +| epoch 10 | 5307/ 5600 batches | train loss 0.3352538 +| epoch 10 | 5311/ 5600 batches | train loss 0.2540433 +| epoch 10 | 5315/ 5600 batches | train loss 0.3417606 +| epoch 10 | 5319/ 5600 batches | train loss 0.3942818 +| epoch 10 | 5323/ 5600 batches | train loss 0.3505368 +| epoch 10 | 5327/ 5600 batches | train loss 0.2889375 +| epoch 10 | 5331/ 5600 batches | train loss 0.3416071 +| epoch 10 | 5335/ 5600 batches | train loss 0.2517787 +| epoch 10 | 5339/ 5600 batches | train loss 0.3145381 +| epoch 10 | 5343/ 5600 batches | train loss 0.2902899 +| epoch 10 | 5347/ 5600 batches | train loss 0.3187864 +| epoch 10 | 5351/ 5600 batches | train loss 0.3221239 +| epoch 10 | 5355/ 5600 batches | train loss 0.2777734 +| epoch 10 | 5359/ 5600 batches | train loss 0.3257538 +| epoch 10 | 5363/ 5600 batches | train loss 0.3095858 +| epoch 10 | 5367/ 5600 batches | train loss 0.3355663 +| epoch 10 | 5371/ 5600 batches | train loss 0.2671872 +| epoch 10 | 5375/ 5600 batches | train loss 0.2761231 +| epoch 10 | 5379/ 5600 batches | train loss 0.3840265 +| epoch 10 | 5383/ 5600 batches | train loss 0.3444292 +| epoch 10 | 5387/ 5600 batches | train loss 0.3320933 +| epoch 10 | 5391/ 5600 batches | train loss 0.3080861 +| epoch 10 | 5395/ 5600 batches | train loss 0.2794418 +| epoch 10 | 5399/ 5600 batches | train loss 0.2707005 +| epoch 10 | 5403/ 5600 batches | train loss 0.2838101 +| epoch 10 | 5407/ 5600 batches | train loss 0.3709069 +| epoch 10 | 5411/ 5600 batches | train loss 0.2994435 +| epoch 10 | 5415/ 5600 batches | train loss 0.2900752 +| epoch 10 | 5419/ 5600 batches | train loss 0.3273441 +| epoch 10 | 5423/ 5600 batches | train loss 0.2777225 +| epoch 10 | 5427/ 5600 batches | train loss 0.2918175 +| epoch 10 | 5431/ 5600 batches | train loss 0.3238176 +| epoch 10 | 5435/ 5600 batches | train loss 0.3011226 +| epoch 10 | 5439/ 5600 batches | train loss 0.2879033 +| epoch 10 | 5443/ 5600 batches | train loss 0.3227445 +| epoch 10 | 5447/ 5600 batches | train loss 0.3162402 +| epoch 10 | 5451/ 5600 batches | train loss 0.3622175 +| epoch 10 | 5455/ 5600 batches | train loss 0.3144366 +| epoch 10 | 5459/ 5600 batches | train loss 0.3583051 +| epoch 10 | 5463/ 5600 batches | train loss 0.2957687 +| epoch 10 | 5467/ 5600 batches | train loss 0.3308629 +| epoch 10 | 5471/ 5600 batches | train loss 0.3144660 +| epoch 10 | 5475/ 5600 batches | train loss 0.3020606 +| epoch 10 | 5479/ 5600 batches | train loss 0.3088648 +| epoch 10 | 5483/ 5600 batches | train loss 0.2546737 +| epoch 10 | 5487/ 5600 batches | train loss 0.3053345 +| epoch 10 | 5491/ 5600 batches | train loss 0.2976086 +| epoch 10 | 5495/ 5600 batches | train loss 0.3373578 +| epoch 10 | 5499/ 5600 batches | train loss 0.2836606 +| epoch 10 | 5503/ 5600 batches | train loss 0.3126161 +| epoch 10 | 5507/ 5600 batches | train loss 0.3190904 +| epoch 10 | 5511/ 5600 batches | train loss 0.3038084 +| epoch 10 | 5515/ 5600 batches | train loss 0.2849883 +| epoch 10 | 5519/ 5600 batches | train loss 0.3281127 +| epoch 10 | 5523/ 5600 batches | train loss 0.3358923 +| epoch 10 | 5527/ 5600 batches | train loss 0.2800915 +| epoch 10 | 5531/ 5600 batches | train loss 0.3169013 +| epoch 10 | 5535/ 5600 batches | train loss 0.2728322 +| epoch 10 | 5539/ 5600 batches | train loss 0.3174403 +| epoch 10 | 5543/ 5600 batches | train loss 0.2631128 +| epoch 10 | 5547/ 5600 batches | train loss 0.2452913 +| epoch 10 | 5551/ 5600 batches | train loss 0.2583205 +| epoch 10 | 5555/ 5600 batches | train loss 0.3412922 +| epoch 10 | 5559/ 5600 batches | train loss 0.3260619 +| epoch 10 | 5563/ 5600 batches | train loss 0.3229829 +| epoch 10 | 5567/ 5600 batches | train loss 0.3040984 +| epoch 10 | 5571/ 5600 batches | train loss 0.2778691 +| epoch 10 | 5575/ 5600 batches | train loss 0.3177022 +| epoch 10 | 5579/ 5600 batches | train loss 0.3914069 +| epoch 10 | 5583/ 5600 batches | train loss 0.2123493 +| epoch 10 | 5587/ 5600 batches | train loss 0.3137556 +| epoch 10 | 5591/ 5600 batches | train loss 0.3220212 +| epoch 10 | 5595/ 5600 batches | train loss 0.2938178 +| epoch 10 | 5599/ 5600 batches | train loss 0.3431606 +-------------------------------------------------------------------------------- +| epoch 10 | 3/ 5600 batches | test loss 0.5697415 +| epoch 10 | 7/ 5600 batches | test loss 0.5467549 +| epoch 10 | 11/ 5600 batches | test loss 0.6259751 +| epoch 10 | 15/ 5600 batches | test loss 0.5333130 +| epoch 10 | 19/ 5600 batches | test loss 0.5761265 +| epoch 10 | 23/ 5600 batches | test loss 0.5073118 +| epoch 10 | 27/ 5600 batches | test loss 0.7227620 +| epoch 10 | 31/ 5600 batches | test loss 0.6153011 +| epoch 10 | 35/ 5600 batches | test loss 0.5052023 +| epoch 10 | 39/ 5600 batches | test loss 0.4342486 +| epoch 10 | 43/ 5600 batches | test loss 0.5141374 +| epoch 10 | 47/ 5600 batches | test loss 0.5035620 +| epoch 10 | 51/ 5600 batches | test loss 0.7213354 +| epoch 10 | 55/ 5600 batches | test loss 0.5238614 +| epoch 10 | 59/ 5600 batches | test loss 0.3445600 +| epoch 10 | 63/ 5600 batches | test loss 0.6107059 +| epoch 10 | 67/ 5600 batches | test loss 0.5784305 +| epoch 10 | 71/ 5600 batches | test loss 0.4746626 +| epoch 10 | 75/ 5600 batches | test loss 0.5253242 +| epoch 10 | 79/ 5600 batches | test loss 0.5435150 +| epoch 10 | 83/ 5600 batches | test loss 0.4972471 +| epoch 10 | 87/ 5600 batches | test loss 0.7617126 +| epoch 10 | 91/ 5600 batches | test loss 0.5245584 +| epoch 10 | 95/ 5600 batches | test loss 0.6691545 +| epoch 10 | 99/ 5600 batches | test loss 0.4234307 +| epoch 10 | 103/ 5600 batches | test loss 0.5035585 +| epoch 10 | 107/ 5600 batches | test loss 0.5453871 +| epoch 10 | 111/ 5600 batches | test loss 0.5630271 +| epoch 10 | 115/ 5600 batches | test loss 0.5711015 +| epoch 10 | 119/ 5600 batches | test loss 0.9206492 +| epoch 10 | 123/ 5600 batches | test loss 0.5935793 +| epoch 10 | 127/ 5600 batches | test loss 0.7874171 +| epoch 10 | 131/ 5600 batches | test loss 0.4571570 +| epoch 10 | 135/ 5600 batches | test loss 0.4788617 +| epoch 10 | 139/ 5600 batches | test loss 0.3970309 +| epoch 10 | 143/ 5600 batches | test loss 0.4428449 +| epoch 10 | 147/ 5600 batches | test loss 0.4971041 +| epoch 10 | 151/ 5600 batches | test loss 0.8595065 +| epoch 10 | 155/ 5600 batches | test loss 0.4028838 +| epoch 10 | 159/ 5600 batches | test loss 0.5816450 +| epoch 10 | 163/ 5600 batches | test loss 0.4008449 +| epoch 10 | 167/ 5600 batches | test loss 0.3906282 +| epoch 10 | 171/ 5600 batches | test loss 0.4793206 +| epoch 10 | 175/ 5600 batches | test loss 0.5429308 +| epoch 10 | 179/ 5600 batches | test loss 0.4208504 +| epoch 10 | 183/ 5600 batches | test loss 0.3813484 +| epoch 10 | 187/ 5600 batches | test loss 0.5511096 +| epoch 10 | 191/ 5600 batches | test loss 0.6092809 +| epoch 10 | 195/ 5600 batches | test loss 0.5206006 +| epoch 10 | 199/ 5600 batches | test loss 0.5782830 +| epoch 10 | 203/ 5600 batches | test loss 0.5072056 +| epoch 10 | 207/ 5600 batches | test loss 0.4942901 +| epoch 10 | 211/ 5600 batches | test loss 0.7290793 +| epoch 10 | 215/ 5600 batches | test loss 0.7211875 +| epoch 10 | 219/ 5600 batches | test loss 0.5416843 +| epoch 10 | 223/ 5600 batches | test loss 0.5030115 +| epoch 10 | 227/ 5600 batches | test loss 0.4743460 +| epoch 10 | 231/ 5600 batches | test loss 0.6266342 +| epoch 10 | 235/ 5600 batches | test loss 0.5034771 +| epoch 10 | 239/ 5600 batches | test loss 0.5764868 +| epoch 10 | 243/ 5600 batches | test loss 0.4285588 +| epoch 10 | 247/ 5600 batches | test loss 0.4935328 +| epoch 10 | 251/ 5600 batches | test loss 0.4267447 +| epoch 10 | 255/ 5600 batches | test loss 0.5844629 +| epoch 10 | 259/ 5600 batches | test loss 0.4321941 +| epoch 10 | 263/ 5600 batches | test loss 0.4768504 +| epoch 10 | 267/ 5600 batches | test loss 0.4330503 +| epoch 10 | 271/ 5600 batches | test loss 0.5000019 +| epoch 10 | 275/ 5600 batches | test loss 0.6041878 +| epoch 10 | 279/ 5600 batches | test loss 0.7440864 +| epoch 10 | 283/ 5600 batches | test loss 0.5356047 +| epoch 10 | 287/ 5600 batches | test loss 0.5478595 +| epoch 10 | 291/ 5600 batches | test loss 0.8171029 +| epoch 10 | 295/ 5600 batches | test loss 0.4138760 +| epoch 10 | 299/ 5600 batches | test loss 0.5087965 +| epoch 10 | 303/ 5600 batches | test loss 0.6140543 +| epoch 10 | 307/ 5600 batches | test loss 0.6533428 +| epoch 10 | 311/ 5600 batches | test loss 0.6445075 +| epoch 10 | 315/ 5600 batches | test loss 0.4865736 +| epoch 10 | 319/ 5600 batches | test loss 0.4683161 +| epoch 10 | 323/ 5600 batches | test loss 0.5585153 +| epoch 10 | 327/ 5600 batches | test loss 0.5444334 +| epoch 10 | 331/ 5600 batches | test loss 0.6048137 +| epoch 10 | 335/ 5600 batches | test loss 0.5785416 +| epoch 10 | 339/ 5600 batches | test loss 0.7189196 +| epoch 10 | 343/ 5600 batches | test loss 0.4173158 +| epoch 10 | 347/ 5600 batches | test loss 0.5027251 +| epoch 10 | 351/ 5600 batches | test loss 0.4943096 +| epoch 10 | 355/ 5600 batches | test loss 0.5860929 +| epoch 10 | 359/ 5600 batches | test loss 0.5737345 +| epoch 10 | 363/ 5600 batches | test loss 0.5965680 +| epoch 10 | 367/ 5600 batches | test loss 0.4178753 +| epoch 10 | 371/ 5600 batches | test loss 0.7184504 +| epoch 10 | 375/ 5600 batches | test loss 0.4123038 +| epoch 10 | 379/ 5600 batches | test loss 0.4048444 +| epoch 10 | 383/ 5600 batches | test loss 0.5202695 +| epoch 10 | 387/ 5600 batches | test loss 0.4115656 +| epoch 10 | 391/ 5600 batches | test loss 0.5546821 +| epoch 10 | 395/ 5600 batches | test loss 0.5512727 +| epoch 10 | 399/ 5600 batches | test loss 0.6211736 +| epoch 10 | 403/ 5600 batches | test loss 0.5414140 +| epoch 10 | 407/ 5600 batches | test loss 0.5777485 +| epoch 10 | 411/ 5600 batches | test loss 0.6323020 +| epoch 10 | 415/ 5600 batches | test loss 0.3560139 +| epoch 10 | 419/ 5600 batches | test loss 0.4739322 +| epoch 10 | 423/ 5600 batches | test loss 0.5333376 +| epoch 10 | 427/ 5600 batches | test loss 0.6731847 +| epoch 10 | 431/ 5600 batches | test loss 0.5412437 +| epoch 10 | 435/ 5600 batches | test loss 0.5498387 +| epoch 10 | 439/ 5600 batches | test loss 0.6268644 +| epoch 10 | 443/ 5600 batches | test loss 0.3540244 +| epoch 10 | 447/ 5600 batches | test loss 0.6659602 +| epoch 10 | 451/ 5600 batches | test loss 0.5054981 +| epoch 10 | 455/ 5600 batches | test loss 0.5411239 +| epoch 10 | 459/ 5600 batches | test loss 0.4600367 +| epoch 10 | 463/ 5600 batches | test loss 0.6897670 +| epoch 10 | 467/ 5600 batches | test loss 0.4909961 +| epoch 10 | 471/ 5600 batches | test loss 0.6172958 +| epoch 10 | 475/ 5600 batches | test loss 0.4278260 +| epoch 10 | 479/ 5600 batches | test loss 0.5877194 +| epoch 10 | 483/ 5600 batches | test loss 0.4525743 +| epoch 10 | 487/ 5600 batches | test loss 0.6442379 +| epoch 10 | 491/ 5600 batches | test loss 0.6316346 +| epoch 10 | 495/ 5600 batches | test loss 0.5663019 +| epoch 10 | 499/ 5600 batches | test loss 0.6354134 +| epoch 10 | 503/ 5600 batches | test loss 0.6869807 +| epoch 10 | 507/ 5600 batches | test loss 0.6243418 +| epoch 10 | 511/ 5600 batches | test loss 0.5526336 +| epoch 10 | 515/ 5600 batches | test loss 0.5247828 +| epoch 10 | 519/ 5600 batches | test loss 0.5506618 +| epoch 10 | 523/ 5600 batches | test loss 0.6702718 +| epoch 10 | 527/ 5600 batches | test loss 0.6114601 +| epoch 10 | 531/ 5600 batches | test loss 0.4602808 +| epoch 10 | 535/ 5600 batches | test loss 0.4744338 +| epoch 10 | 539/ 5600 batches | test loss 0.8916039 +| epoch 10 | 543/ 5600 batches | test loss 0.4311931 +| epoch 10 | 547/ 5600 batches | test loss 0.5266583 +| epoch 10 | 551/ 5600 batches | test loss 0.5962101 +| epoch 10 | 555/ 5600 batches | test loss 1.0464073 +| epoch 10 | 559/ 5600 batches | test loss 0.6245705 +| epoch 10 | 563/ 5600 batches | test loss 0.6321813 +| epoch 10 | 567/ 5600 batches | test loss 0.6006538 +| epoch 10 | 571/ 5600 batches | test loss 0.4843861 +| epoch 10 | 575/ 5600 batches | test loss 0.4761470 +| epoch 10 | 579/ 5600 batches | test loss 0.6192003 +| epoch 10 | 583/ 5600 batches | test loss 0.5563742 +| epoch 10 | 587/ 5600 batches | test loss 0.5229918 +| epoch 10 | 591/ 5600 batches | test loss 0.4248256 +| epoch 10 | 595/ 5600 batches | test loss 0.6492908 +| epoch 10 | 599/ 5600 batches | test loss 0.6680497 +| epoch 10 | 603/ 5600 batches | test loss 0.7513142 +| epoch 10 | 607/ 5600 batches | test loss 0.4537091 +| epoch 10 | 611/ 5600 batches | test loss 0.4096944 +| epoch 10 | 615/ 5600 batches | test loss 0.6320465 +| epoch 10 | 619/ 5600 batches | test loss 0.6786351 +| epoch 10 | 623/ 5600 batches | test loss 0.5532112 +| epoch 10 | 627/ 5600 batches | test loss 0.5878905 +| epoch 10 | 631/ 5600 batches | test loss 0.4540731 +| epoch 10 | 635/ 5600 batches | test loss 0.7765746 +| epoch 10 | 639/ 5600 batches | test loss 0.6205115 +| epoch 10 | 643/ 5600 batches | test loss 0.4565594 +| epoch 10 | 647/ 5600 batches | test loss 0.5637800 +| epoch 10 | 651/ 5600 batches | test loss 0.6482105 +| epoch 10 | 655/ 5600 batches | test loss 0.7064399 +| epoch 10 | 659/ 5600 batches | test loss 0.5547761 +| epoch 10 | 663/ 5600 batches | test loss 0.5214928 +| epoch 10 | 667/ 5600 batches | test loss 0.5467290 +| epoch 10 | 671/ 5600 batches | test loss 0.3892682 +| epoch 10 | 675/ 5600 batches | test loss 0.6663030 +| epoch 10 | 679/ 5600 batches | test loss 0.5463169 +| epoch 10 | 683/ 5600 batches | test loss 0.5864457 +| epoch 10 | 687/ 5600 batches | test loss 0.4361963 +| epoch 10 | 691/ 5600 batches | test loss 0.4020282 +| epoch 10 | 695/ 5600 batches | test loss 0.3680944 +| epoch 10 | 699/ 5600 batches | test loss 0.5725592 +| epoch 10 | 703/ 5600 batches | test loss 0.5592270 +| epoch 10 | 707/ 5600 batches | test loss 0.6716445 +| epoch 10 | 711/ 5600 batches | test loss 0.4709266 +| epoch 10 | 715/ 5600 batches | test loss 0.4696338 +| epoch 10 | 719/ 5600 batches | test loss 0.4984657 +| epoch 10 | 723/ 5600 batches | test loss 0.4395120 +| epoch 10 | 727/ 5600 batches | test loss 0.3992856 +| epoch 10 | 731/ 5600 batches | test loss 0.4307006 +| epoch 10 | 735/ 5600 batches | test loss 0.5337930 +| epoch 10 | 739/ 5600 batches | test loss 0.7052689 +| epoch 10 | 743/ 5600 batches | test loss 0.4393975 +| epoch 10 | 747/ 5600 batches | test loss 0.4319934 +| epoch 10 | 751/ 5600 batches | test loss 0.6246532 +| epoch 10 | 755/ 5600 batches | test loss 0.6759992 +| epoch 10 | 759/ 5600 batches | test loss 0.6336237 +| epoch 10 | 763/ 5600 batches | test loss 0.5099584 +| epoch 10 | 767/ 5600 batches | test loss 0.7058490 +| epoch 10 | 771/ 5600 batches | test loss 0.7174932 +| epoch 10 | 775/ 5600 batches | test loss 0.4064707 +| epoch 10 | 779/ 5600 batches | test loss 0.5183006 +| epoch 10 | 783/ 5600 batches | test loss 0.5954165 +| epoch 10 | 787/ 5600 batches | test loss 0.5094186 +| epoch 10 | 791/ 5600 batches | test loss 0.5779842 +| epoch 10 | 795/ 5600 batches | test loss 0.5414040 +| epoch 10 | 799/ 5600 batches | test loss 0.5456430 +| epoch 10 | 803/ 5600 batches | test loss 0.7481964 +| epoch 10 | 807/ 5600 batches | test loss 0.5291324 +| epoch 10 | 811/ 5600 batches | test loss 0.5377951 +| epoch 10 | 815/ 5600 batches | test loss 0.6971188 +| epoch 10 | 819/ 5600 batches | test loss 0.7209499 +| epoch 10 | 823/ 5600 batches | test loss 0.4765195 +| epoch 10 | 827/ 5600 batches | test loss 0.4378389 +| epoch 10 | 831/ 5600 batches | test loss 0.8631907 +| epoch 10 | 835/ 5600 batches | test loss 0.6531966 +| epoch 10 | 839/ 5600 batches | test loss 0.4693613 +| epoch 10 | 843/ 5600 batches | test loss 0.6423891 +| epoch 10 | 847/ 5600 batches | test loss 0.5031864 +| epoch 10 | 851/ 5600 batches | test loss 0.5409194 +| epoch 10 | 855/ 5600 batches | test loss 0.5456429 +| epoch 10 | 859/ 5600 batches | test loss 0.4853880 +| epoch 10 | 863/ 5600 batches | test loss 0.6471543 +| epoch 10 | 867/ 5600 batches | test loss 0.8181823 +| epoch 10 | 871/ 5600 batches | test loss 0.5412736 +| epoch 10 | 875/ 5600 batches | test loss 0.6169982 +| epoch 10 | 879/ 5600 batches | test loss 0.8375596 +| epoch 10 | 883/ 5600 batches | test loss 0.5391655 +| epoch 10 | 887/ 5600 batches | test loss 0.5071127 +| epoch 10 | 891/ 5600 batches | test loss 0.6672420 +| epoch 10 | 895/ 5600 batches | test loss 0.5835660 +| epoch 10 | 899/ 5600 batches | test loss 0.5136799 +| epoch 10 | 903/ 5600 batches | test loss 0.5201617 +| epoch 10 | 907/ 5600 batches | test loss 0.6619372 +| epoch 10 | 911/ 5600 batches | test loss 0.7334630 +| epoch 10 | 915/ 5600 batches | test loss 0.5414548 +| epoch 10 | 919/ 5600 batches | test loss 0.5432984 +| epoch 10 | 923/ 5600 batches | test loss 0.5783800 +| epoch 10 | 927/ 5600 batches | test loss 0.4313186 +| epoch 10 | 931/ 5600 batches | test loss 0.4467949 +| epoch 10 | 935/ 5600 batches | test loss 0.7029302 +| epoch 10 | 939/ 5600 batches | test loss 0.6598303 +| epoch 10 | 943/ 5600 batches | test loss 0.5134739 +| epoch 10 | 947/ 5600 batches | test loss 0.4787187 +| epoch 10 | 951/ 5600 batches | test loss 0.6111485 +| epoch 10 | 955/ 5600 batches | test loss 0.4381752 +| epoch 10 | 959/ 5600 batches | test loss 0.6451178 +| epoch 10 | 963/ 5600 batches | test loss 0.4938774 +| epoch 10 | 967/ 5600 batches | test loss 0.4120709 +| epoch 10 | 971/ 5600 batches | test loss 0.3750663 +| epoch 10 | 975/ 5600 batches | test loss 0.6852923 +| epoch 10 | 979/ 5600 batches | test loss 0.5459830 +| epoch 10 | 983/ 5600 batches | test loss 0.5863101 +| epoch 10 | 987/ 5600 batches | test loss 0.5539916 +| epoch 10 | 991/ 5600 batches | test loss 0.4514762 +| epoch 10 | 995/ 5600 batches | test loss 0.4697759 +| epoch 10 | 999/ 5600 batches | test loss 0.5681531 +| epoch 10 | 1003/ 5600 batches | test loss 0.5565183 +| epoch 10 | 1007/ 5600 batches | test loss 0.3937781 +| epoch 10 | 1011/ 5600 batches | test loss 0.3819634 +| epoch 10 | 1015/ 5600 batches | test loss 0.5986008 +| epoch 10 | 1019/ 5600 batches | test loss 0.5234973 +| epoch 10 | 1023/ 5600 batches | test loss 0.5371686 +| epoch 10 | 1027/ 5600 batches | test loss 0.7397139 +| epoch 10 | 1031/ 5600 batches | test loss 0.4686288 +| epoch 10 | 1035/ 5600 batches | test loss 0.6324346 +| epoch 10 | 1039/ 5600 batches | test loss 0.4188577 +| epoch 10 | 1043/ 5600 batches | test loss 0.6628989 +| epoch 10 | 1047/ 5600 batches | test loss 0.6633856 +| epoch 10 | 1051/ 5600 batches | test loss 0.5354588 +| epoch 10 | 1055/ 5600 batches | test loss 0.5637197 +| epoch 10 | 1059/ 5600 batches | test loss 0.5950651 +| epoch 10 | 1063/ 5600 batches | test loss 0.8790601 +| epoch 10 | 1067/ 5600 batches | test loss 0.7089550 +| epoch 10 | 1071/ 5600 batches | test loss 0.7399232 +| epoch 10 | 1075/ 5600 batches | test loss 0.4070057 +| epoch 10 | 1079/ 5600 batches | test loss 0.4985086 +| epoch 10 | 1083/ 5600 batches | test loss 0.6417575 +| epoch 10 | 1087/ 5600 batches | test loss 0.6012184 +| epoch 10 | 1091/ 5600 batches | test loss 0.7304728 +| epoch 10 | 1095/ 5600 batches | test loss 0.4538470 +| epoch 10 | 1099/ 5600 batches | test loss 0.7504523 +| epoch 10 | 1103/ 5600 batches | test loss 0.8310511 +| epoch 10 | 1107/ 5600 batches | test loss 0.5131363 +| epoch 10 | 1111/ 5600 batches | test loss 0.5594270 +| epoch 10 | 1115/ 5600 batches | test loss 0.4458249 +| epoch 10 | 1119/ 5600 batches | test loss 0.4701741 +| epoch 10 | 1123/ 5600 batches | test loss 0.5274150 +| epoch 10 | 1127/ 5600 batches | test loss 0.5388539 +| epoch 10 | 1131/ 5600 batches | test loss 0.5271366 +| epoch 10 | 1135/ 5600 batches | test loss 0.6045997 +| epoch 10 | 1139/ 5600 batches | test loss 0.5589761 +| epoch 10 | 1143/ 5600 batches | test loss 0.5077927 +| epoch 10 | 1147/ 5600 batches | test loss 0.7024704 +| epoch 10 | 1151/ 5600 batches | test loss 0.7777929 +| epoch 10 | 1155/ 5600 batches | test loss 0.4797007 +| epoch 10 | 1159/ 5600 batches | test loss 0.6067771 +| epoch 10 | 1163/ 5600 batches | test loss 0.5352779 +| epoch 10 | 1167/ 5600 batches | test loss 0.5721296 +| epoch 10 | 1171/ 5600 batches | test loss 0.7571355 +| epoch 10 | 1175/ 5600 batches | test loss 0.5265462 +| epoch 10 | 1179/ 5600 batches | test loss 0.4941742 +| epoch 10 | 1183/ 5600 batches | test loss 0.4224874 +| epoch 10 | 1187/ 5600 batches | test loss 0.5682942 +| epoch 10 | 1191/ 5600 batches | test loss 0.4751699 +| epoch 10 | 1195/ 5600 batches | test loss 0.5211759 +| epoch 10 | 1199/ 5600 batches | test loss 0.4696579 +| epoch 10 | 1203/ 5600 batches | test loss 0.7356982 +| epoch 10 | 1207/ 5600 batches | test loss 0.4621479 +| epoch 10 | 1211/ 5600 batches | test loss 0.5615298 +| epoch 10 | 1215/ 5600 batches | test loss 0.4282125 +| epoch 10 | 1219/ 5600 batches | test loss 0.7428482 +| epoch 10 | 1223/ 5600 batches | test loss 0.4337233 +| epoch 10 | 1227/ 5600 batches | test loss 0.5517817 +| epoch 10 | 1231/ 5600 batches | test loss 0.5220206 +| epoch 10 | 1235/ 5600 batches | test loss 0.4185677 +| epoch 10 | 1239/ 5600 batches | test loss 0.5692506 +| epoch 10 | 1243/ 5600 batches | test loss 0.6085081 +| epoch 10 | 1247/ 5600 batches | test loss 0.6449954 +| epoch 10 | 1251/ 5600 batches | test loss 0.7118561 +| epoch 10 | 1255/ 5600 batches | test loss 0.6609719 +| epoch 10 | 1259/ 5600 batches | test loss 0.4153113 +| epoch 10 | 1263/ 5600 batches | test loss 0.3692271 +| epoch 10 | 1267/ 5600 batches | test loss 0.4627494 +| epoch 10 | 1271/ 5600 batches | test loss 0.3958597 +| epoch 10 | 1275/ 5600 batches | test loss 0.5946511 +| epoch 10 | 1279/ 5600 batches | test loss 0.5559334 +| epoch 10 | 1283/ 5600 batches | test loss 0.5454604 +| epoch 10 | 1287/ 5600 batches | test loss 0.4568748 +| epoch 10 | 1291/ 5600 batches | test loss 0.4107009 +| epoch 10 | 1295/ 5600 batches | test loss 0.4811569 +| epoch 10 | 1299/ 5600 batches | test loss 0.5696704 +| epoch 10 | 1303/ 5600 batches | test loss 0.4861489 +| epoch 10 | 1307/ 5600 batches | test loss 0.4355905 +| epoch 10 | 1311/ 5600 batches | test loss 0.7292378 +| epoch 10 | 1315/ 5600 batches | test loss 0.5731171 +| epoch 10 | 1319/ 5600 batches | test loss 0.6677155 +| epoch 10 | 1323/ 5600 batches | test loss 0.7112948 +| epoch 10 | 1327/ 5600 batches | test loss 0.4559108 +| epoch 10 | 1331/ 5600 batches | test loss 0.4203083 +| epoch 10 | 1335/ 5600 batches | test loss 0.6325558 +| epoch 10 | 1339/ 5600 batches | test loss 0.4693563 +| epoch 10 | 1343/ 5600 batches | test loss 0.6280764 +| epoch 10 | 1347/ 5600 batches | test loss 0.5001562 +| epoch 10 | 1351/ 5600 batches | test loss 0.7763321 +| epoch 10 | 1355/ 5600 batches | test loss 0.8078918 +| epoch 10 | 1359/ 5600 batches | test loss 0.5490643 +| epoch 10 | 1363/ 5600 batches | test loss 0.6809755 +| epoch 10 | 1367/ 5600 batches | test loss 0.5119761 +| epoch 10 | 1371/ 5600 batches | test loss 0.5530428 +| epoch 10 | 1375/ 5600 batches | test loss 0.5016624 +| epoch 10 | 1379/ 5600 batches | test loss 0.4134596 +| epoch 10 | 1383/ 5600 batches | test loss 0.5468147 +| epoch 10 | 1387/ 5600 batches | test loss 0.8259095 +| epoch 10 | 1391/ 5600 batches | test loss 0.2226267 +| epoch 10 | 1395/ 5600 batches | test loss 0.5667493 +| epoch 10 | 1399/ 5600 batches | test loss 0.5180029 +| epoch 10 | final test loss 0.5547, do not save model! diff --git a/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_500/partial_model_weights.pth b/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_500/partial_model_weights.pth new file mode 100644 index 0000000000000000000000000000000000000000..a3ee0649f3798626cd1ddd544a630304335db1f4 --- /dev/null +++ b/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_500/partial_model_weights.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb3386a8e754c0aad746d44a1046ee21909841de1c636636942db8a3e53f5b5f +size 1975288322 diff --git a/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_500/train_config.json b/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_500/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1e311b234c928bacdfe38f27771727ebe85acd62 --- /dev/null +++ b/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_500/train_config.json @@ -0,0 +1,29 @@ +{ + "stage": "stage2", + "lr": 3e-05, + "epochs": 10, + "log_interval": 4, + "gradient_clip": 1.0, + "tr_batch_size": 4, + "te_batch_size": 4, + "gradient_accumulation_steps": 1, + "update_params": [ + "all" + ], + "corpus": "math_derivation", + "num_of_sents": [ + 12, + 12 + ], + "encoder": "bert-base-cased", + "repeat": 1, + "max_num_each_cat": 500, + "fb_mode": 0.0, + "set_loss_mask": false, + "use_label_dec": true, + "use_label_enc": false, + "decoder": "Qwen/Qwen2.5-0.5B", + "pretrained_path": null, + "device": "cuda", + "save_dir": "checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_500" +} \ No newline at end of file diff --git a/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_500/train_log.log b/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_500/train_log.log new file mode 100644 index 0000000000000000000000000000000000000000..2aca444437383a48cbd73a7529c9ab859151a615 --- /dev/null +++ b/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_500/train_log.log @@ -0,0 +1,34 @@ +* training corpus: math_derivation +* total num: 3500 +* epochs: 10 +* batch size: 4 +* gradient_accumulation_steps: 1 +-------------------------------------------------------------------------------- +| epoch 1 | 3/ 700 batches | train loss 2.2737865 +| epoch 1 | 7/ 700 batches | train loss 1.8909767 +| epoch 1 | 11/ 700 batches | train loss 2.2367806 +| epoch 1 | 15/ 700 batches | train loss 1.4953537 +| epoch 1 | 19/ 700 batches | train loss 1.0802276 +| epoch 1 | 23/ 700 batches | train loss 1.0777125 +| epoch 1 | 27/ 700 batches | train loss 0.9398872 +| epoch 1 | 31/ 700 batches | train loss 0.7538413 +| epoch 1 | 35/ 700 batches | train loss 0.7569203 +| epoch 1 | 39/ 700 batches | train loss 0.7991652 +| epoch 1 | 43/ 700 batches | train loss 0.6284156 +| epoch 1 | 47/ 700 batches | train loss 0.5409176 +| epoch 1 | 51/ 700 batches | train loss 0.6084326 +| epoch 1 | 55/ 700 batches | train loss 0.6365047 +| epoch 1 | 59/ 700 batches | train loss 0.6892266 +| epoch 1 | 63/ 700 batches | train loss 0.5623323 +| epoch 1 | 67/ 700 batches | train loss 0.5938894 +| epoch 1 | 71/ 700 batches | train loss 0.6103931 +| epoch 1 | 75/ 700 batches | train loss 0.6170006 +| epoch 1 | 79/ 700 batches | train loss 0.5508593 +| epoch 1 | 83/ 700 batches | train loss 0.5028061 +| epoch 1 | 87/ 700 batches | train loss 0.8172022 +| epoch 1 | 91/ 700 batches | train loss 0.5984362 +| epoch 1 | 95/ 700 batches | train loss 0.7497207 +| epoch 1 | 99/ 700 batches | train loss 0.6599419 +| epoch 1 | 103/ 700 batches | train loss 0.5675180 +| epoch 1 | 107/ 700 batches | train loss 0.4801010 +| epoch 1 | 111/ 700 batches | train loss 0.4890854 diff --git a/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_6000/partial_model_weights.pth b/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_6000/partial_model_weights.pth new file mode 100644 index 0000000000000000000000000000000000000000..a759c21b429b4ab12e194fb976837b747c723ff8 --- /dev/null +++ b/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_6000/partial_model_weights.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:016f6a274e4b488d730f635259f2c68cbd867459fd7430cdb02e05ca18eeb186 +size 1975288322 diff --git a/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_6000/train_config.json b/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_6000/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8f797f4a106afc433f092e8797529012fff0a5cd --- /dev/null +++ b/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_6000/train_config.json @@ -0,0 +1,29 @@ +{ + "stage": "stage2", + "lr": 3e-05, + "epochs": 10, + "log_interval": 4, + "gradient_clip": 1.0, + "tr_batch_size": 4, + "te_batch_size": 4, + "gradient_accumulation_steps": 1, + "update_params": [ + "all" + ], + "corpus": "math_derivation", + "num_of_sents": [ + 12, + 12 + ], + "encoder": "bert-base-cased", + "repeat": 1, + "max_num_each_cat": 6000, + "fb_mode": 0.0, + "set_loss_mask": false, + "use_label_dec": true, + "use_label_enc": false, + "decoder": "Qwen/Qwen2.5-0.5B", + "pretrained_path": null, + "device": "cuda", + "save_dir": "checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_6000" +} \ No newline at end of file diff --git a/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_6000/train_log.log b/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_6000/train_log.log new file mode 100644 index 0000000000000000000000000000000000000000..ff11e34165b2858d0c96a275ee23dbbf0efe3669 --- /dev/null +++ b/checkpoint_dec_Qwen-Qwen2.5-0.5B_epochs_10_corpus_math_derivation_stage_stage2_num_12_use_label_dec_True_max_num_each_cat_6000/train_log.log @@ -0,0 +1,23906 @@ +* training corpus: math_derivation +* total num: 42000 +* epochs: 10 +* batch size: 4 +* gradient_accumulation_steps: 1 +-------------------------------------------------------------------------------- +| epoch 1 | 3/ 8400 batches | train loss 2.8348670 +| epoch 1 | 7/ 8400 batches | train loss 2.1561289 +| epoch 1 | 11/ 8400 batches | train loss 1.3624101 +| epoch 1 | 15/ 8400 batches | train loss 1.8648926 +| epoch 1 | 19/ 8400 batches | train loss 1.4431213 +| epoch 1 | 23/ 8400 batches | train loss 1.0360153 +| epoch 1 | 27/ 8400 batches | train loss 0.8824399 +| epoch 1 | 31/ 8400 batches | train loss 0.8643878 +| epoch 1 | 35/ 8400 batches | train loss 0.8157232 +| epoch 1 | 39/ 8400 batches | train loss 0.6915992 +| epoch 1 | 43/ 8400 batches | train loss 0.6959984 +| epoch 1 | 47/ 8400 batches | train loss 1.5480396 +| epoch 1 | 51/ 8400 batches | train loss 0.8179662 +| epoch 1 | 55/ 8400 batches | train loss 0.7664320 +| epoch 1 | 59/ 8400 batches | train loss 0.7240226 +| epoch 1 | 63/ 8400 batches | train loss 0.8738055 +| epoch 1 | 67/ 8400 batches | train loss 0.7119042 +| epoch 1 | 71/ 8400 batches | train loss 0.6739055 +| epoch 1 | 75/ 8400 batches | train loss 0.5782098 +| epoch 1 | 79/ 8400 batches | train loss 0.7513783 +| epoch 1 | 83/ 8400 batches | train loss 0.6091135 +| epoch 1 | 87/ 8400 batches | train loss 0.6140165 +| epoch 1 | 91/ 8400 batches | train loss 0.6785100 +| epoch 1 | 95/ 8400 batches | train loss 0.5801656 +| epoch 1 | 99/ 8400 batches | train loss 0.6953827 +| epoch 1 | 103/ 8400 batches | train loss 0.5196676 +| epoch 1 | 107/ 8400 batches | train loss 0.5991812 +| epoch 1 | 111/ 8400 batches | train loss 0.5430156 +| epoch 1 | 115/ 8400 batches | train loss 0.5481973 +| epoch 1 | 119/ 8400 batches | train loss 0.5944074 +| epoch 1 | 123/ 8400 batches | train loss 0.7460161 +| epoch 1 | 127/ 8400 batches | train loss 0.5673018 +| epoch 1 | 131/ 8400 batches | train loss 0.5549649 +| epoch 1 | 135/ 8400 batches | train loss 0.6303680 +| epoch 1 | 139/ 8400 batches | train loss 0.6180280 +| epoch 1 | 143/ 8400 batches | train loss 0.8412059 +| epoch 1 | 147/ 8400 batches | train loss 0.5469096 +| epoch 1 | 151/ 8400 batches | train loss 0.6884788 +| epoch 1 | 155/ 8400 batches | train loss 0.6581720 +| epoch 1 | 159/ 8400 batches | train loss 0.6513360 +| epoch 1 | 163/ 8400 batches | train loss 0.5392073 +| epoch 1 | 167/ 8400 batches | train loss 0.5638557 +| epoch 1 | 171/ 8400 batches | train loss 0.5905545 +| epoch 1 | 175/ 8400 batches | train loss 0.5461158 +| epoch 1 | 179/ 8400 batches | train loss 0.4498467 +| epoch 1 | 183/ 8400 batches | train loss 0.5136673 +| epoch 1 | 187/ 8400 batches | train loss 0.5378350 +| epoch 1 | 191/ 8400 batches | train loss 0.5966012 +| epoch 1 | 195/ 8400 batches | train loss 0.5155822 +| epoch 1 | 199/ 8400 batches | train loss 0.6023891 +| epoch 1 | 203/ 8400 batches | train loss 0.5727266 +| epoch 1 | 207/ 8400 batches | train loss 0.5202058 +| epoch 1 | 211/ 8400 batches | train loss 0.6473382 +| epoch 1 | 215/ 8400 batches | train loss 0.5168438 +| epoch 1 | 219/ 8400 batches | train loss 0.5661523 +| epoch 1 | 223/ 8400 batches | train loss 0.4749469 +| epoch 1 | 227/ 8400 batches | train loss 0.6696224 +| epoch 1 | 231/ 8400 batches | train loss 0.4739528 +| epoch 1 | 235/ 8400 batches | train loss 0.5823541 +| epoch 1 | 239/ 8400 batches | train loss 0.6497830 +| epoch 1 | 243/ 8400 batches | train loss 0.6400050 +| epoch 1 | 247/ 8400 batches | train loss 0.6480783 +| epoch 1 | 251/ 8400 batches | train loss 0.5023649 +| epoch 1 | 255/ 8400 batches | train loss 0.5212962 +| epoch 1 | 259/ 8400 batches | train loss 0.5707268 +| epoch 1 | 263/ 8400 batches | train loss 0.5578195 +| epoch 1 | 267/ 8400 batches | train loss 0.5323702 +| epoch 1 | 271/ 8400 batches | train loss 0.4728314 +| epoch 1 | 275/ 8400 batches | train loss 0.5269189 +| epoch 1 | 279/ 8400 batches | train loss 0.4991135 +| epoch 1 | 283/ 8400 batches | train loss 0.4376600 +| epoch 1 | 287/ 8400 batches | train loss 0.5270133 +| epoch 1 | 291/ 8400 batches | train loss 0.5917993 +| epoch 1 | 295/ 8400 batches | train loss 0.7104085 +| epoch 1 | 299/ 8400 batches | train loss 0.4816014 +| epoch 1 | 303/ 8400 batches | train loss 0.6519003 +| epoch 1 | 307/ 8400 batches | train loss 0.7364781 +| epoch 1 | 311/ 8400 batches | train loss 0.5146891 +| epoch 1 | 315/ 8400 batches | train loss 0.5581195 +| epoch 1 | 319/ 8400 batches | train loss 0.4607789 +| epoch 1 | 323/ 8400 batches | train loss 0.5394907 +| epoch 1 | 327/ 8400 batches | train loss 0.4827071 +| epoch 1 | 331/ 8400 batches | train loss 0.4803745 +| epoch 1 | 335/ 8400 batches | train loss 0.5945003 +| epoch 1 | 339/ 8400 batches | train loss 0.4461344 +| epoch 1 | 343/ 8400 batches | train loss 0.5242629 +| epoch 1 | 347/ 8400 batches | train loss 0.5929149 +| epoch 1 | 351/ 8400 batches | train loss 0.4355369 +| epoch 1 | 355/ 8400 batches | train loss 0.4597317 +| epoch 1 | 359/ 8400 batches | train loss 0.4400523 +| epoch 1 | 363/ 8400 batches | train loss 0.7035968 +| epoch 1 | 367/ 8400 batches | train loss 0.6464527 +| epoch 1 | 371/ 8400 batches | train loss 0.3949035 +| epoch 1 | 375/ 8400 batches | train loss 0.4771312 +| epoch 1 | 379/ 8400 batches | train loss 0.4655873 +| epoch 1 | 383/ 8400 batches | train loss 0.6886926 +| epoch 1 | 387/ 8400 batches | train loss 0.5050402 +| epoch 1 | 391/ 8400 batches | train loss 0.5723824 +| epoch 1 | 395/ 8400 batches | train loss 0.4811799 +| epoch 1 | 399/ 8400 batches | train loss 0.6605055 +| epoch 1 | 403/ 8400 batches | train loss 0.5691826 +| epoch 1 | 407/ 8400 batches | train loss 0.5184615 +| epoch 1 | 411/ 8400 batches | train loss 0.6394609 +| epoch 1 | 415/ 8400 batches | train loss 0.4652843 +| epoch 1 | 419/ 8400 batches | train loss 0.4814568 +| epoch 1 | 423/ 8400 batches | train loss 0.4923127 +| epoch 1 | 427/ 8400 batches | train loss 0.4340072 +| epoch 1 | 431/ 8400 batches | train loss 0.4454949 +| epoch 1 | 435/ 8400 batches | train loss 0.6225419 +| epoch 1 | 439/ 8400 batches | train loss 0.7556426 +| epoch 1 | 443/ 8400 batches | train loss 0.4293548 +| epoch 1 | 447/ 8400 batches | train loss 0.4959866 +| epoch 1 | 451/ 8400 batches | train loss 0.3786446 +| epoch 1 | 455/ 8400 batches | train loss 0.6037788 +| epoch 1 | 459/ 8400 batches | train loss 0.5504303 +| epoch 1 | 463/ 8400 batches | train loss 0.6862895 +| epoch 1 | 467/ 8400 batches | train loss 0.4945371 +| epoch 1 | 471/ 8400 batches | train loss 0.4839419 +| epoch 1 | 475/ 8400 batches | train loss 0.5033423 +| epoch 1 | 479/ 8400 batches | train loss 0.4706589 +| epoch 1 | 483/ 8400 batches | train loss 0.5473455 +| epoch 1 | 487/ 8400 batches | train loss 0.5308775 +| epoch 1 | 491/ 8400 batches | train loss 0.5052984 +| epoch 1 | 495/ 8400 batches | train loss 0.4342923 +| epoch 1 | 499/ 8400 batches | train loss 0.4179955 +| epoch 1 | 503/ 8400 batches | train loss 0.5153195 +| epoch 1 | 507/ 8400 batches | train loss 0.4675868 +| epoch 1 | 511/ 8400 batches | train loss 0.4594129 +| epoch 1 | 515/ 8400 batches | train loss 0.5091655 +| epoch 1 | 519/ 8400 batches | train loss 0.4749674 +| epoch 1 | 523/ 8400 batches | train loss 0.5127326 +| epoch 1 | 527/ 8400 batches | train loss 0.4122645 +| epoch 1 | 531/ 8400 batches | train loss 0.5050131 +| epoch 1 | 535/ 8400 batches | train loss 0.3907290 +| epoch 1 | 539/ 8400 batches | train loss 0.4778705 +| epoch 1 | 543/ 8400 batches | train loss 0.5208706 +| epoch 1 | 547/ 8400 batches | train loss 0.5002071 +| epoch 1 | 551/ 8400 batches | train loss 0.5630188 +| epoch 1 | 555/ 8400 batches | train loss 0.6023023 +| epoch 1 | 559/ 8400 batches | train loss 0.5076882 +| epoch 1 | 563/ 8400 batches | train loss 0.4992066 +| epoch 1 | 567/ 8400 batches | train loss 0.6336138 +| epoch 1 | 571/ 8400 batches | train loss 0.4794300 +| epoch 1 | 575/ 8400 batches | train loss 0.4523470 +| epoch 1 | 579/ 8400 batches | train loss 0.7017160 +| epoch 1 | 583/ 8400 batches | train loss 0.4860001 +| epoch 1 | 587/ 8400 batches | train loss 0.5522963 +| epoch 1 | 591/ 8400 batches | train loss 0.5425975 +| epoch 1 | 595/ 8400 batches | train loss 0.6037617 +| epoch 1 | 599/ 8400 batches | train loss 0.3906718 +| epoch 1 | 603/ 8400 batches | train loss 0.5068169 +| epoch 1 | 607/ 8400 batches | train loss 0.6758207 +| epoch 1 | 611/ 8400 batches | train loss 0.6127616 +| epoch 1 | 615/ 8400 batches | train loss 0.4399989 +| epoch 1 | 619/ 8400 batches | train loss 0.4504962 +| epoch 1 | 623/ 8400 batches | train loss 0.4958326 +| epoch 1 | 627/ 8400 batches | train loss 0.5594358 +| epoch 1 | 631/ 8400 batches | train loss 0.6127410 +| epoch 1 | 635/ 8400 batches | train loss 0.6246509 +| epoch 1 | 639/ 8400 batches | train loss 0.4570420 +| epoch 1 | 643/ 8400 batches | train loss 0.5018520 +| epoch 1 | 647/ 8400 batches | train loss 0.5431493 +| epoch 1 | 651/ 8400 batches | train loss 0.5236524 +| epoch 1 | 655/ 8400 batches | train loss 0.6084287 +| epoch 1 | 659/ 8400 batches | train loss 0.5558151 +| epoch 1 | 663/ 8400 batches | train loss 0.5977161 +| epoch 1 | 667/ 8400 batches | train loss 0.4738268 +| epoch 1 | 671/ 8400 batches | train loss 0.6785357 +| epoch 1 | 675/ 8400 batches | train loss 0.6204220 +| epoch 1 | 679/ 8400 batches | train loss 0.5087056 +| epoch 1 | 683/ 8400 batches | train loss 0.4591680 +| epoch 1 | 687/ 8400 batches | train loss 0.4998854 +| epoch 1 | 691/ 8400 batches | train loss 0.6738973 +| epoch 1 | 695/ 8400 batches | train loss 0.4436811 +| epoch 1 | 699/ 8400 batches | train loss 0.4646572 +| epoch 1 | 703/ 8400 batches | train loss 0.3841950 +| epoch 1 | 707/ 8400 batches | train loss 0.5728858 +| epoch 1 | 711/ 8400 batches | train loss 0.5647426 +| epoch 1 | 715/ 8400 batches | train loss 0.4046674 +| epoch 1 | 719/ 8400 batches | train loss 0.4885874 +| epoch 1 | 723/ 8400 batches | train loss 0.5026253 +| epoch 1 | 727/ 8400 batches | train loss 0.4327128 +| epoch 1 | 731/ 8400 batches | train loss 0.4486595 +| epoch 1 | 735/ 8400 batches | train loss 0.4167534 +| epoch 1 | 739/ 8400 batches | train loss 0.4376974 +| epoch 1 | 743/ 8400 batches | train loss 0.5324795 +| epoch 1 | 747/ 8400 batches | train loss 0.4108735 +| epoch 1 | 751/ 8400 batches | train loss 0.5275553 +| epoch 1 | 755/ 8400 batches | train loss 0.4712777 +| epoch 1 | 759/ 8400 batches | train loss 0.5816347 +| epoch 1 | 763/ 8400 batches | train loss 0.7628016 +| epoch 1 | 767/ 8400 batches | train loss 0.5017246 +| epoch 1 | 771/ 8400 batches | train loss 0.4198081 +| epoch 1 | 775/ 8400 batches | train loss 0.4047119 +| epoch 1 | 779/ 8400 batches | train loss 0.5218989 +| epoch 1 | 783/ 8400 batches | train loss 0.4447055 +| epoch 1 | 787/ 8400 batches | train loss 0.4939714 +| epoch 1 | 791/ 8400 batches | train loss 0.5201145 +| epoch 1 | 795/ 8400 batches | train loss 0.5542184 +| epoch 1 | 799/ 8400 batches | train loss 0.5776969 +| epoch 1 | 803/ 8400 batches | train loss 0.4484904 +| epoch 1 | 807/ 8400 batches | train loss 0.5192049 +| epoch 1 | 811/ 8400 batches | train loss 0.4689089 +| epoch 1 | 815/ 8400 batches | train loss 0.4622979 +| epoch 1 | 819/ 8400 batches | train loss 0.4669020 +| epoch 1 | 823/ 8400 batches | train loss 0.4229442 +| epoch 1 | 827/ 8400 batches | train loss 0.6125703 +| epoch 1 | 831/ 8400 batches | train loss 0.5191013 +| epoch 1 | 835/ 8400 batches | train loss 0.5058039 +| epoch 1 | 839/ 8400 batches | train loss 0.6801134 +| epoch 1 | 843/ 8400 batches | train loss 0.4709873 +| epoch 1 | 847/ 8400 batches | train loss 0.5353100 +| epoch 1 | 851/ 8400 batches | train loss 0.5153005 +| epoch 1 | 855/ 8400 batches | train loss 0.6613600 +| epoch 1 | 859/ 8400 batches | train loss 0.4239082 +| epoch 1 | 863/ 8400 batches | train loss 0.4807929 +| epoch 1 | 867/ 8400 batches | train loss 0.4420418 +| epoch 1 | 871/ 8400 batches | train loss 0.4562135 +| epoch 1 | 875/ 8400 batches | train loss 0.5476409 +| epoch 1 | 879/ 8400 batches | train loss 0.4666115 +| epoch 1 | 883/ 8400 batches | train loss 0.4069519 +| epoch 1 | 887/ 8400 batches | train loss 0.4584903 +| epoch 1 | 891/ 8400 batches | train loss 0.4897568 +| epoch 1 | 895/ 8400 batches | train loss 0.5159776 +| epoch 1 | 899/ 8400 batches | train loss 0.4693528 +| epoch 1 | 903/ 8400 batches | train loss 0.5025438 +| epoch 1 | 907/ 8400 batches | train loss 0.5372993 +| epoch 1 | 911/ 8400 batches | train loss 0.3763155 +| epoch 1 | 915/ 8400 batches | train loss 0.5480602 +| epoch 1 | 919/ 8400 batches | train loss 0.4635254 +| epoch 1 | 923/ 8400 batches | train loss 1.2997055 +| epoch 1 | 927/ 8400 batches | train loss 0.4598735 +| epoch 1 | 931/ 8400 batches | train loss 0.5369542 +| epoch 1 | 935/ 8400 batches | train loss 0.5336775 +| epoch 1 | 939/ 8400 batches | train loss 0.5199360 +| epoch 1 | 943/ 8400 batches | train loss 0.5498801 +| epoch 1 | 947/ 8400 batches | train loss 0.4062214 +| epoch 1 | 951/ 8400 batches | train loss 0.5427445 +| epoch 1 | 955/ 8400 batches | train loss 0.5562474 +| epoch 1 | 959/ 8400 batches | train loss 0.5369027 +| epoch 1 | 963/ 8400 batches | train loss 0.4391282 +| epoch 1 | 967/ 8400 batches | train loss 0.4241639 +| epoch 1 | 971/ 8400 batches | train loss 0.4433671 +| epoch 1 | 975/ 8400 batches | train loss 0.6155868 +| epoch 1 | 979/ 8400 batches | train loss 0.5291118 +| epoch 1 | 983/ 8400 batches | train loss 0.4567558 +| epoch 1 | 987/ 8400 batches | train loss 0.5476027 +| epoch 1 | 991/ 8400 batches | train loss 0.4750987 +| epoch 1 | 995/ 8400 batches | train loss 0.3757713 +| epoch 1 | 999/ 8400 batches | train loss 0.5029117 +| epoch 1 | 1003/ 8400 batches | train loss 0.4615450 +| epoch 1 | 1007/ 8400 batches | train loss 0.4638601 +| epoch 1 | 1011/ 8400 batches | train loss 0.4874531 +| epoch 1 | 1015/ 8400 batches | train loss 0.5881128 +| epoch 1 | 1019/ 8400 batches | train loss 0.4348742 +| epoch 1 | 1023/ 8400 batches | train loss 0.4247116 +| epoch 1 | 1027/ 8400 batches | train loss 0.4591154 +| epoch 1 | 1031/ 8400 batches | train loss 0.3796273 +| epoch 1 | 1035/ 8400 batches | train loss 0.5114118 +| epoch 1 | 1039/ 8400 batches | train loss 0.4343333 +| epoch 1 | 1043/ 8400 batches | train loss 0.5753639 +| epoch 1 | 1047/ 8400 batches | train loss 0.4766276 +| epoch 1 | 1051/ 8400 batches | train loss 0.4223703 +| epoch 1 | 1055/ 8400 batches | train loss 0.4760230 +| epoch 1 | 1059/ 8400 batches | train loss 0.4652491 +| epoch 1 | 1063/ 8400 batches | train loss 0.6706804 +| epoch 1 | 1067/ 8400 batches | train loss 0.4212830 +| epoch 1 | 1071/ 8400 batches | train loss 0.3948683 +| epoch 1 | 1075/ 8400 batches | train loss 0.3543612 +| epoch 1 | 1079/ 8400 batches | train loss 0.4271425 +| epoch 1 | 1083/ 8400 batches | train loss 0.4404871 +| epoch 1 | 1087/ 8400 batches | train loss 0.4459472 +| epoch 1 | 1091/ 8400 batches | train loss 0.5283827 +| epoch 1 | 1095/ 8400 batches | train loss 0.4828486 +| epoch 1 | 1099/ 8400 batches | train loss 0.5062754 +| epoch 1 | 1103/ 8400 batches | train loss 0.3937590 +| epoch 1 | 1107/ 8400 batches | train loss 0.4667453 +| epoch 1 | 1111/ 8400 batches | train loss 0.5030137 +| epoch 1 | 1115/ 8400 batches | train loss 0.5397768 +| epoch 1 | 1119/ 8400 batches | train loss 0.4823809 +| epoch 1 | 1123/ 8400 batches | train loss 0.5391484 +| epoch 1 | 1127/ 8400 batches | train loss 0.5155483 +| epoch 1 | 1131/ 8400 batches | train loss 0.4993476 +| epoch 1 | 1135/ 8400 batches | train loss 0.4054698 +| epoch 1 | 1139/ 8400 batches | train loss 0.5259321 +| epoch 1 | 1143/ 8400 batches | train loss 0.4940061 +| epoch 1 | 1147/ 8400 batches | train loss 0.4131316 +| epoch 1 | 1151/ 8400 batches | train loss 0.4533411 +| epoch 1 | 1155/ 8400 batches | train loss 0.4089728 +| epoch 1 | 1159/ 8400 batches | train loss 0.4389162 +| epoch 1 | 1163/ 8400 batches | train loss 0.5010150 +| epoch 1 | 1167/ 8400 batches | train loss 0.5254467 +| epoch 1 | 1171/ 8400 batches | train loss 0.4139773 +| epoch 1 | 1175/ 8400 batches | train loss 0.4432475 +| epoch 1 | 1179/ 8400 batches | train loss 0.6038272 +| epoch 1 | 1183/ 8400 batches | train loss 0.4178661 +| epoch 1 | 1187/ 8400 batches | train loss 0.5127678 +| epoch 1 | 1191/ 8400 batches | train loss 0.5075577 +| epoch 1 | 1195/ 8400 batches | train loss 0.5742291 +| epoch 1 | 1199/ 8400 batches | train loss 0.3315779 +| epoch 1 | 1203/ 8400 batches | train loss 0.4831104 +| epoch 1 | 1207/ 8400 batches | train loss 0.5256877 +| epoch 1 | 1211/ 8400 batches | train loss 0.5322437 +| epoch 1 | 1215/ 8400 batches | train loss 0.3713521 +| epoch 1 | 1219/ 8400 batches | train loss 0.5057644 +| epoch 1 | 1223/ 8400 batches | train loss 0.4581373 +| epoch 1 | 1227/ 8400 batches | train loss 0.4812832 +| epoch 1 | 1231/ 8400 batches | train loss 0.5067742 +| epoch 1 | 1235/ 8400 batches | train loss 0.4071133 +| epoch 1 | 1239/ 8400 batches | train loss 0.5079869 +| epoch 1 | 1243/ 8400 batches | train loss 0.5826868 +| epoch 1 | 1247/ 8400 batches | train loss 0.4743686 +| epoch 1 | 1251/ 8400 batches | train loss 0.4519351 +| epoch 1 | 1255/ 8400 batches | train loss 0.5141941 +| epoch 1 | 1259/ 8400 batches | train loss 0.5380761 +| epoch 1 | 1263/ 8400 batches | train loss 0.4589195 +| epoch 1 | 1267/ 8400 batches | train loss 0.3995742 +| epoch 1 | 1271/ 8400 batches | train loss 0.5118737 +| epoch 1 | 1275/ 8400 batches | train loss 0.4818394 +| epoch 1 | 1279/ 8400 batches | train loss 0.5442652 +| epoch 1 | 1283/ 8400 batches | train loss 0.5195299 +| epoch 1 | 1287/ 8400 batches | train loss 0.4604600 +| epoch 1 | 1291/ 8400 batches | train loss 0.4848218 +| epoch 1 | 1295/ 8400 batches | train loss 0.5601112 +| epoch 1 | 1299/ 8400 batches | train loss 0.4456598 +| epoch 1 | 1303/ 8400 batches | train loss 0.4988294 +| epoch 1 | 1307/ 8400 batches | train loss 0.4625114 +| epoch 1 | 1311/ 8400 batches | train loss 0.4405787 +| epoch 1 | 1315/ 8400 batches | train loss 0.4999397 +| epoch 1 | 1319/ 8400 batches | train loss 0.5582439 +| epoch 1 | 1323/ 8400 batches | train loss 0.4323127 +| epoch 1 | 1327/ 8400 batches | train loss 0.4246412 +| epoch 1 | 1331/ 8400 batches | train loss 0.4414960 +| epoch 1 | 1335/ 8400 batches | train loss 0.4159201 +| epoch 1 | 1339/ 8400 batches | train loss 0.4440080 +| epoch 1 | 1343/ 8400 batches | train loss 0.3955644 +| epoch 1 | 1347/ 8400 batches | train loss 0.5958145 +| epoch 1 | 1351/ 8400 batches | train loss 0.4682791 +| epoch 1 | 1355/ 8400 batches | train loss 0.4187239 +| epoch 1 | 1359/ 8400 batches | train loss 0.6154199 +| epoch 1 | 1363/ 8400 batches | train loss 0.3930652 +| epoch 1 | 1367/ 8400 batches | train loss 0.5014683 +| epoch 1 | 1371/ 8400 batches | train loss 0.5796260 +| epoch 1 | 1375/ 8400 batches | train loss 0.4625108 +| epoch 1 | 1379/ 8400 batches | train loss 0.5252565 +| epoch 1 | 1383/ 8400 batches | train loss 0.5663783 +| epoch 1 | 1387/ 8400 batches | train loss 0.5086679 +| epoch 1 | 1391/ 8400 batches | train loss 0.5231189 +| epoch 1 | 1395/ 8400 batches | train loss 0.5059478 +| epoch 1 | 1399/ 8400 batches | train loss 0.4465108 +| epoch 1 | 1403/ 8400 batches | train loss 0.5893552 +| epoch 1 | 1407/ 8400 batches | train loss 0.4173902 +| epoch 1 | 1411/ 8400 batches | train loss 0.4931629 +| epoch 1 | 1415/ 8400 batches | train loss 0.4569989 +| epoch 1 | 1419/ 8400 batches | train loss 0.4545261 +| epoch 1 | 1423/ 8400 batches | train loss 0.6251441 +| epoch 1 | 1427/ 8400 batches | train loss 0.5073937 +| epoch 1 | 1431/ 8400 batches | train loss 0.4587542 +| epoch 1 | 1435/ 8400 batches | train loss 0.4717516 +| epoch 1 | 1439/ 8400 batches | train loss 0.4614590 +| epoch 1 | 1443/ 8400 batches | train loss 0.5702676 +| epoch 1 | 1447/ 8400 batches | train loss 0.4296377 +| epoch 1 | 1451/ 8400 batches | train loss 0.4239523 +| epoch 1 | 1455/ 8400 batches | train loss 0.5110662 +| epoch 1 | 1459/ 8400 batches | train loss 0.4067464 +| epoch 1 | 1463/ 8400 batches | train loss 0.4992125 +| epoch 1 | 1467/ 8400 batches | train loss 0.5758699 +| epoch 1 | 1471/ 8400 batches | train loss 0.5038174 +| epoch 1 | 1475/ 8400 batches | train loss 0.5451483 +| epoch 1 | 1479/ 8400 batches | train loss 0.4868764 +| epoch 1 | 1483/ 8400 batches | train loss 0.6778464 +| epoch 1 | 1487/ 8400 batches | train loss 0.5146085 +| epoch 1 | 1491/ 8400 batches | train loss 0.4179553 +| epoch 1 | 1495/ 8400 batches | train loss 0.4567263 +| epoch 1 | 1499/ 8400 batches | train loss 0.4793751 +| epoch 1 | 1503/ 8400 batches | train loss 0.5694216 +| epoch 1 | 1507/ 8400 batches | train loss 0.4715543 +| epoch 1 | 1511/ 8400 batches | train loss 0.4861838 +| epoch 1 | 1515/ 8400 batches | train loss 0.4134265 +| epoch 1 | 1519/ 8400 batches | train loss 0.4635226 +| epoch 1 | 1523/ 8400 batches | train loss 0.4644060 +| epoch 1 | 1527/ 8400 batches | train loss 0.4661228 +| epoch 1 | 1531/ 8400 batches | train loss 0.5324447 +| epoch 1 | 1535/ 8400 batches | train loss 0.4238834 +| epoch 1 | 1539/ 8400 batches | train loss 0.5712193 +| epoch 1 | 1543/ 8400 batches | train loss 0.6255234 +| epoch 1 | 1547/ 8400 batches | train loss 0.4242026 +| epoch 1 | 1551/ 8400 batches | train loss 0.4157371 +| epoch 1 | 1555/ 8400 batches | train loss 0.5218512 +| epoch 1 | 1559/ 8400 batches | train loss 0.5380740 +| epoch 1 | 1563/ 8400 batches | train loss 0.4673009 +| epoch 1 | 1567/ 8400 batches | train loss 0.5483854 +| epoch 1 | 1571/ 8400 batches | train loss 0.4351531 +| epoch 1 | 1575/ 8400 batches | train loss 0.5484557 +| epoch 1 | 1579/ 8400 batches | train loss 0.5118064 +| epoch 1 | 1583/ 8400 batches | train loss 0.5261369 +| epoch 1 | 1587/ 8400 batches | train loss 0.4452060 +| epoch 1 | 1591/ 8400 batches | train loss 0.4946305 +| epoch 1 | 1595/ 8400 batches | train loss 0.3979831 +| epoch 1 | 1599/ 8400 batches | train loss 0.4834591 +| epoch 1 | 1603/ 8400 batches | train loss 0.4802205 +| epoch 1 | 1607/ 8400 batches | train loss 0.4699123 +| epoch 1 | 1611/ 8400 batches | train loss 0.4887771 +| epoch 1 | 1615/ 8400 batches | train loss 0.5065143 +| epoch 1 | 1619/ 8400 batches | train loss 0.4590871 +| epoch 1 | 1623/ 8400 batches | train loss 0.5605370 +| epoch 1 | 1627/ 8400 batches | train loss 0.4349377 +| epoch 1 | 1631/ 8400 batches | train loss 0.4842966 +| epoch 1 | 1635/ 8400 batches | train loss 0.4834331 +| epoch 1 | 1639/ 8400 batches | train loss 0.5232232 +| epoch 1 | 1643/ 8400 batches | train loss 0.4058701 +| epoch 1 | 1647/ 8400 batches | train loss 0.4126841 +| epoch 1 | 1651/ 8400 batches | train loss 0.4104966 +| epoch 1 | 1655/ 8400 batches | train loss 0.3999504 +| epoch 1 | 1659/ 8400 batches | train loss 0.4843015 +| epoch 1 | 1663/ 8400 batches | train loss 0.4342133 +| epoch 1 | 1667/ 8400 batches | train loss 0.4295389 +| epoch 1 | 1671/ 8400 batches | train loss 0.5935305 +| epoch 1 | 1675/ 8400 batches | train loss 0.4796898 +| epoch 1 | 1679/ 8400 batches | train loss 0.6094528 +| epoch 1 | 1683/ 8400 batches | train loss 0.4356109 +| epoch 1 | 1687/ 8400 batches | train loss 0.4997219 +| epoch 1 | 1691/ 8400 batches | train loss 0.5505395 +| epoch 1 | 1695/ 8400 batches | train loss 0.3984496 +| epoch 1 | 1699/ 8400 batches | train loss 0.3874319 +| epoch 1 | 1703/ 8400 batches | train loss 0.5642443 +| epoch 1 | 1707/ 8400 batches | train loss 0.4584670 +| epoch 1 | 1711/ 8400 batches | train loss 0.5341571 +| epoch 1 | 1715/ 8400 batches | train loss 0.4216025 +| epoch 1 | 1719/ 8400 batches | train loss 0.5450044 +| epoch 1 | 1723/ 8400 batches | train loss 0.5268986 +| epoch 1 | 1727/ 8400 batches | train loss 0.5394237 +| epoch 1 | 1731/ 8400 batches | train loss 0.5310385 +| epoch 1 | 1735/ 8400 batches | train loss 0.4931579 +| epoch 1 | 1739/ 8400 batches | train loss 0.6320820 +| epoch 1 | 1743/ 8400 batches | train loss 0.5010269 +| epoch 1 | 1747/ 8400 batches | train loss 0.5331625 +| epoch 1 | 1751/ 8400 batches | train loss 0.4785112 +| epoch 1 | 1755/ 8400 batches | train loss 0.5302502 +| epoch 1 | 1759/ 8400 batches | train loss 0.4475164 +| epoch 1 | 1763/ 8400 batches | train loss 0.4858260 +| epoch 1 | 1767/ 8400 batches | train loss 0.5473044 +| epoch 1 | 1771/ 8400 batches | train loss 0.4868209 +| epoch 1 | 1775/ 8400 batches | train loss 0.4304379 +| epoch 1 | 1779/ 8400 batches | train loss 0.4550492 +| epoch 1 | 1783/ 8400 batches | train loss 0.4752303 +| epoch 1 | 1787/ 8400 batches | train loss 0.4453951 +| epoch 1 | 1791/ 8400 batches | train loss 0.5146375 +| epoch 1 | 1795/ 8400 batches | train loss 0.4695857 +| epoch 1 | 1799/ 8400 batches | train loss 0.4828249 +| epoch 1 | 1803/ 8400 batches | train loss 0.4759398 +| epoch 1 | 1807/ 8400 batches | train loss 0.4383268 +| epoch 1 | 1811/ 8400 batches | train loss 0.4447736 +| epoch 1 | 1815/ 8400 batches | train loss 0.5570235 +| epoch 1 | 1819/ 8400 batches | train loss 0.5122305 +| epoch 1 | 1823/ 8400 batches | train loss 0.5331872 +| epoch 1 | 1827/ 8400 batches | train loss 0.5290679 +| epoch 1 | 1831/ 8400 batches | train loss 0.6019375 +| epoch 1 | 1835/ 8400 batches | train loss 0.5128634 +| epoch 1 | 1839/ 8400 batches | train loss 0.4038592 +| epoch 1 | 1843/ 8400 batches | train loss 0.4586186 +| epoch 1 | 1847/ 8400 batches | train loss 0.4015092 +| epoch 1 | 1851/ 8400 batches | train loss 0.5609667 +| epoch 1 | 1855/ 8400 batches | train loss 0.5413109 +| epoch 1 | 1859/ 8400 batches | train loss 0.4655048 +| epoch 1 | 1863/ 8400 batches | train loss 0.3993076 +| epoch 1 | 1867/ 8400 batches | train loss 0.5303800 +| epoch 1 | 1871/ 8400 batches | train loss 0.4988930 +| epoch 1 | 1875/ 8400 batches | train loss 0.4045311 +| epoch 1 | 1879/ 8400 batches | train loss 0.3855459 +| epoch 1 | 1883/ 8400 batches | train loss 0.5074718 +| epoch 1 | 1887/ 8400 batches | train loss 0.4443617 +| epoch 1 | 1891/ 8400 batches | train loss 0.4366227 +| epoch 1 | 1895/ 8400 batches | train loss 0.4539221 +| epoch 1 | 1899/ 8400 batches | train loss 0.4254987 +| epoch 1 | 1903/ 8400 batches | train loss 0.5298894 +| epoch 1 | 1907/ 8400 batches | train loss 0.4619650 +| epoch 1 | 1911/ 8400 batches | train loss 0.4365228 +| epoch 1 | 1915/ 8400 batches | train loss 0.3747184 +| epoch 1 | 1919/ 8400 batches | train loss 0.5487698 +| epoch 1 | 1923/ 8400 batches | train loss 0.3444690 +| epoch 1 | 1927/ 8400 batches | train loss 0.5078244 +| epoch 1 | 1931/ 8400 batches | train loss 0.5248298 +| epoch 1 | 1935/ 8400 batches | train loss 0.5446256 +| epoch 1 | 1939/ 8400 batches | train loss 0.4725804 +| epoch 1 | 1943/ 8400 batches | train loss 0.5543899 +| epoch 1 | 1947/ 8400 batches | train loss 0.4564849 +| epoch 1 | 1951/ 8400 batches | train loss 0.5193324 +| epoch 1 | 1955/ 8400 batches | train loss 0.5061120 +| epoch 1 | 1959/ 8400 batches | train loss 0.4344026 +| epoch 1 | 1963/ 8400 batches | train loss 0.4106280 +| epoch 1 | 1967/ 8400 batches | train loss 0.4087219 +| epoch 1 | 1971/ 8400 batches | train loss 0.4116226 +| epoch 1 | 1975/ 8400 batches | train loss 0.5206676 +| epoch 1 | 1979/ 8400 batches | train loss 0.4641925 +| epoch 1 | 1983/ 8400 batches | train loss 0.4934621 +| epoch 1 | 1987/ 8400 batches | train loss 0.4678968 +| epoch 1 | 1991/ 8400 batches | train loss 0.5412699 +| epoch 1 | 1995/ 8400 batches | train loss 0.4314542 +| epoch 1 | 1999/ 8400 batches | train loss 0.5187028 +| epoch 1 | 2003/ 8400 batches | train loss 0.5314489 +| epoch 1 | 2007/ 8400 batches | train loss 0.5331967 +| epoch 1 | 2011/ 8400 batches | train loss 0.4619139 +| epoch 1 | 2015/ 8400 batches | train loss 0.5589293 +| epoch 1 | 2019/ 8400 batches | train loss 0.4923715 +| epoch 1 | 2023/ 8400 batches | train loss 0.4435226 +| epoch 1 | 2027/ 8400 batches | train loss 0.4507110 +| epoch 1 | 2031/ 8400 batches | train loss 0.4027385 +| epoch 1 | 2035/ 8400 batches | train loss 0.4169093 +| epoch 1 | 2039/ 8400 batches | train loss 0.4881256 +| epoch 1 | 2043/ 8400 batches | train loss 0.4593969 +| epoch 1 | 2047/ 8400 batches | train loss 0.4694004 +| epoch 1 | 2051/ 8400 batches | train loss 0.4887583 +| epoch 1 | 2055/ 8400 batches | train loss 0.5094551 +| epoch 1 | 2059/ 8400 batches | train loss 0.4303480 +| epoch 1 | 2063/ 8400 batches | train loss 0.4381226 +| epoch 1 | 2067/ 8400 batches | train loss 0.4532444 +| epoch 1 | 2071/ 8400 batches | train loss 0.3799425 +| epoch 1 | 2075/ 8400 batches | train loss 0.3840509 +| epoch 1 | 2079/ 8400 batches | train loss 0.5092447 +| epoch 1 | 2083/ 8400 batches | train loss 0.4482068 +| epoch 1 | 2087/ 8400 batches | train loss 0.4462107 +| epoch 1 | 2091/ 8400 batches | train loss 0.4860476 +| epoch 1 | 2095/ 8400 batches | train loss 0.4533170 +| epoch 1 | 2099/ 8400 batches | train loss 0.5110875 +| epoch 1 | 2103/ 8400 batches | train loss 0.5463783 +| epoch 1 | 2107/ 8400 batches | train loss 0.4372922 +| epoch 1 | 2111/ 8400 batches | train loss 0.4388956 +| epoch 1 | 2115/ 8400 batches | train loss 0.4318286 +| epoch 1 | 2119/ 8400 batches | train loss 0.4039283 +| epoch 1 | 2123/ 8400 batches | train loss 0.4887948 +| epoch 1 | 2127/ 8400 batches | train loss 0.4915105 +| epoch 1 | 2131/ 8400 batches | train loss 0.4801521 +| epoch 1 | 2135/ 8400 batches | train loss 0.3673021 +| epoch 1 | 2139/ 8400 batches | train loss 0.5004351 +| epoch 1 | 2143/ 8400 batches | train loss 0.3366217 +| epoch 1 | 2147/ 8400 batches | train loss 0.4009190 +| epoch 1 | 2151/ 8400 batches | train loss 0.3405905 +| epoch 1 | 2155/ 8400 batches | train loss 0.5331359 +| epoch 1 | 2159/ 8400 batches | train loss 0.4168792 +| epoch 1 | 2163/ 8400 batches | train loss 0.5374700 +| epoch 1 | 2167/ 8400 batches | train loss 0.4855629 +| epoch 1 | 2171/ 8400 batches | train loss 0.4475416 +| epoch 1 | 2175/ 8400 batches | train loss 0.4887510 +| epoch 1 | 2179/ 8400 batches | train loss 0.4182971 +| epoch 1 | 2183/ 8400 batches | train loss 0.3958714 +| epoch 1 | 2187/ 8400 batches | train loss 0.5094783 +| epoch 1 | 2191/ 8400 batches | train loss 0.3669929 +| epoch 1 | 2195/ 8400 batches | train loss 0.4792700 +| epoch 1 | 2199/ 8400 batches | train loss 0.4648196 +| epoch 1 | 2203/ 8400 batches | train loss 0.5843452 +| epoch 1 | 2207/ 8400 batches | train loss 0.5199471 +| epoch 1 | 2211/ 8400 batches | train loss 0.5829877 +| epoch 1 | 2215/ 8400 batches | train loss 0.5360056 +| epoch 1 | 2219/ 8400 batches | train loss 0.5794097 +| epoch 1 | 2223/ 8400 batches | train loss 0.5560645 +| epoch 1 | 2227/ 8400 batches | train loss 0.4322286 +| epoch 1 | 2231/ 8400 batches | train loss 0.3837535 +| epoch 1 | 2235/ 8400 batches | train loss 0.4803597 +| epoch 1 | 2239/ 8400 batches | train loss 0.5326291 +| epoch 1 | 2243/ 8400 batches | train loss 0.4565249 +| epoch 1 | 2247/ 8400 batches | train loss 0.3786719 +| epoch 1 | 2251/ 8400 batches | train loss 0.5077039 +| epoch 1 | 2255/ 8400 batches | train loss 0.4236307 +| epoch 1 | 2259/ 8400 batches | train loss 0.4459350 +| epoch 1 | 2263/ 8400 batches | train loss 0.6673908 +| epoch 1 | 2267/ 8400 batches | train loss 0.3800707 +| epoch 1 | 2271/ 8400 batches | train loss 0.4540854 +| epoch 1 | 2275/ 8400 batches | train loss 0.5192260 +| epoch 1 | 2279/ 8400 batches | train loss 0.5073168 +| epoch 1 | 2283/ 8400 batches | train loss 0.5281285 +| epoch 1 | 2287/ 8400 batches | train loss 0.6025686 +| epoch 1 | 2291/ 8400 batches | train loss 0.3829712 +| epoch 1 | 2295/ 8400 batches | train loss 0.5192502 +| epoch 1 | 2299/ 8400 batches | train loss 0.5425026 +| epoch 1 | 2303/ 8400 batches | train loss 0.4553349 +| epoch 1 | 2307/ 8400 batches | train loss 0.3808696 +| epoch 1 | 2311/ 8400 batches | train loss 0.4534289 +| epoch 1 | 2315/ 8400 batches | train loss 0.4792393 +| epoch 1 | 2319/ 8400 batches | train loss 0.5928901 +| epoch 1 | 2323/ 8400 batches | train loss 0.6268672 +| epoch 1 | 2327/ 8400 batches | train loss 0.5003667 +| epoch 1 | 2331/ 8400 batches | train loss 0.3979225 +| epoch 1 | 2335/ 8400 batches | train loss 0.5134337 +| epoch 1 | 2339/ 8400 batches | train loss 0.3566006 +| epoch 1 | 2343/ 8400 batches | train loss 0.4931333 +| epoch 1 | 2347/ 8400 batches | train loss 0.4801966 +| epoch 1 | 2351/ 8400 batches | train loss 0.4465779 +| epoch 1 | 2355/ 8400 batches | train loss 0.4717734 +| epoch 1 | 2359/ 8400 batches | train loss 0.5175045 +| epoch 1 | 2363/ 8400 batches | train loss 0.4852707 +| epoch 1 | 2367/ 8400 batches | train loss 0.4707251 +| epoch 1 | 2371/ 8400 batches | train loss 0.5217452 +| epoch 1 | 2375/ 8400 batches | train loss 0.4413706 +| epoch 1 | 2379/ 8400 batches | train loss 0.4952630 +| epoch 1 | 2383/ 8400 batches | train loss 0.5452590 +| epoch 1 | 2387/ 8400 batches | train loss 0.4217747 +| epoch 1 | 2391/ 8400 batches | train loss 0.4253627 +| epoch 1 | 2395/ 8400 batches | train loss 0.4494906 +| epoch 1 | 2399/ 8400 batches | train loss 0.4248031 +| epoch 1 | 2403/ 8400 batches | train loss 0.3982193 +| epoch 1 | 2407/ 8400 batches | train loss 0.4390229 +| epoch 1 | 2411/ 8400 batches | train loss 0.4413102 +| epoch 1 | 2415/ 8400 batches | train loss 0.5203318 +| epoch 1 | 2419/ 8400 batches | train loss 0.5127721 +| epoch 1 | 2423/ 8400 batches | train loss 0.4293912 +| epoch 1 | 2427/ 8400 batches | train loss 0.4896052 +| epoch 1 | 2431/ 8400 batches | train loss 0.4693177 +| epoch 1 | 2435/ 8400 batches | train loss 0.4512358 +| epoch 1 | 2439/ 8400 batches | train loss 0.4098472 +| epoch 1 | 2443/ 8400 batches | train loss 0.3895623 +| epoch 1 | 2447/ 8400 batches | train loss 0.5216615 +| epoch 1 | 2451/ 8400 batches | train loss 0.4860425 +| epoch 1 | 2455/ 8400 batches | train loss 0.4950501 +| epoch 1 | 2459/ 8400 batches | train loss 0.4745768 +| epoch 1 | 2463/ 8400 batches | train loss 0.4782299 +| epoch 1 | 2467/ 8400 batches | train loss 0.4696954 +| epoch 1 | 2471/ 8400 batches | train loss 0.3778338 +| epoch 1 | 2475/ 8400 batches | train loss 0.4310060 +| epoch 1 | 2479/ 8400 batches | train loss 0.5575221 +| epoch 1 | 2483/ 8400 batches | train loss 0.4006772 +| epoch 1 | 2487/ 8400 batches | train loss 0.5273267 +| epoch 1 | 2491/ 8400 batches | train loss 0.4365161 +| epoch 1 | 2495/ 8400 batches | train loss 0.4668377 +| epoch 1 | 2499/ 8400 batches | train loss 0.4427919 +| epoch 1 | 2503/ 8400 batches | train loss 0.4755383 +| epoch 1 | 2507/ 8400 batches | train loss 0.5241213 +| epoch 1 | 2511/ 8400 batches | train loss 0.4512603 +| epoch 1 | 2515/ 8400 batches | train loss 0.4563733 +| epoch 1 | 2519/ 8400 batches | train loss 0.4700425 +| epoch 1 | 2523/ 8400 batches | train loss 0.4331175 +| epoch 1 | 2527/ 8400 batches | train loss 0.4980647 +| epoch 1 | 2531/ 8400 batches | train loss 0.4750025 +| epoch 1 | 2535/ 8400 batches | train loss 0.4500745 +| epoch 1 | 2539/ 8400 batches | train loss 0.4591545 +| epoch 1 | 2543/ 8400 batches | train loss 0.5660534 +| epoch 1 | 2547/ 8400 batches | train loss 0.4553673 +| epoch 1 | 2551/ 8400 batches | train loss 0.5229729 +| epoch 1 | 2555/ 8400 batches | train loss 0.5013870 +| epoch 1 | 2559/ 8400 batches | train loss 0.5531310 +| epoch 1 | 2563/ 8400 batches | train loss 0.4858274 +| epoch 1 | 2567/ 8400 batches | train loss 0.4671652 +| epoch 1 | 2571/ 8400 batches | train loss 0.4439363 +| epoch 1 | 2575/ 8400 batches | train loss 0.5124964 +| epoch 1 | 2579/ 8400 batches | train loss 0.3663715 +| epoch 1 | 2583/ 8400 batches | train loss 0.4344051 +| epoch 1 | 2587/ 8400 batches | train loss 0.5193256 +| epoch 1 | 2591/ 8400 batches | train loss 0.4314154 +| epoch 1 | 2595/ 8400 batches | train loss 0.4623845 +| epoch 1 | 2599/ 8400 batches | train loss 0.4213686 +| epoch 1 | 2603/ 8400 batches | train loss 0.5212638 +| epoch 1 | 2607/ 8400 batches | train loss 0.4925595 +| epoch 1 | 2611/ 8400 batches | train loss 0.4473316 +| epoch 1 | 2615/ 8400 batches | train loss 0.5586755 +| epoch 1 | 2619/ 8400 batches | train loss 0.4454672 +| epoch 1 | 2623/ 8400 batches | train loss 0.4570045 +| epoch 1 | 2627/ 8400 batches | train loss 0.3569335 +| epoch 1 | 2631/ 8400 batches | train loss 0.4709453 +| epoch 1 | 2635/ 8400 batches | train loss 0.5194436 +| epoch 1 | 2639/ 8400 batches | train loss 0.3959841 +| epoch 1 | 2643/ 8400 batches | train loss 0.4005876 +| epoch 1 | 2647/ 8400 batches | train loss 0.5167257 +| epoch 1 | 2651/ 8400 batches | train loss 0.4146396 +| epoch 1 | 2655/ 8400 batches | train loss 0.4904941 +| epoch 1 | 2659/ 8400 batches | train loss 0.5411339 +| epoch 1 | 2663/ 8400 batches | train loss 0.6085088 +| epoch 1 | 2667/ 8400 batches | train loss 0.4288318 +| epoch 1 | 2671/ 8400 batches | train loss 0.4867658 +| epoch 1 | 2675/ 8400 batches | train loss 0.4118645 +| epoch 1 | 2679/ 8400 batches | train loss 0.3737400 +| epoch 1 | 2683/ 8400 batches | train loss 0.4875038 +| epoch 1 | 2687/ 8400 batches | train loss 0.4798143 +| epoch 1 | 2691/ 8400 batches | train loss 0.4232144 +| epoch 1 | 2695/ 8400 batches | train loss 0.5757588 +| epoch 1 | 2699/ 8400 batches | train loss 0.4095692 +| epoch 1 | 2703/ 8400 batches | train loss 0.3077687 +| epoch 1 | 2707/ 8400 batches | train loss 0.4754208 +| epoch 1 | 2711/ 8400 batches | train loss 0.4924580 +| epoch 1 | 2715/ 8400 batches | train loss 0.4292903 +| epoch 1 | 2719/ 8400 batches | train loss 0.3985056 +| epoch 1 | 2723/ 8400 batches | train loss 0.4325708 +| epoch 1 | 2727/ 8400 batches | train loss 0.4776702 +| epoch 1 | 2731/ 8400 batches | train loss 0.4872853 +| epoch 1 | 2735/ 8400 batches | train loss 0.4570769 +| epoch 1 | 2739/ 8400 batches | train loss 0.4173493 +| epoch 1 | 2743/ 8400 batches | train loss 0.5267879 +| epoch 1 | 2747/ 8400 batches | train loss 0.6482832 +| epoch 1 | 2751/ 8400 batches | train loss 0.4931255 +| epoch 1 | 2755/ 8400 batches | train loss 0.4514264 +| epoch 1 | 2759/ 8400 batches | train loss 0.4096484 +| epoch 1 | 2763/ 8400 batches | train loss 0.4686541 +| epoch 1 | 2767/ 8400 batches | train loss 0.4076715 +| epoch 1 | 2771/ 8400 batches | train loss 0.5297057 +| epoch 1 | 2775/ 8400 batches | train loss 0.4649181 +| epoch 1 | 2779/ 8400 batches | train loss 0.5482666 +| epoch 1 | 2783/ 8400 batches | train loss 0.4569433 +| epoch 1 | 2787/ 8400 batches | train loss 0.3935727 +| epoch 1 | 2791/ 8400 batches | train loss 0.4272811 +| epoch 1 | 2795/ 8400 batches | train loss 0.5519127 +| epoch 1 | 2799/ 8400 batches | train loss 0.4706860 +| epoch 1 | 2803/ 8400 batches | train loss 0.4246119 +| epoch 1 | 2807/ 8400 batches | train loss 0.5366342 +| epoch 1 | 2811/ 8400 batches | train loss 0.4305761 +| epoch 1 | 2815/ 8400 batches | train loss 0.4376768 +| epoch 1 | 2819/ 8400 batches | train loss 0.3295829 +| epoch 1 | 2823/ 8400 batches | train loss 0.4846880 +| epoch 1 | 2827/ 8400 batches | train loss 0.4371893 +| epoch 1 | 2831/ 8400 batches | train loss 0.5289081 +| epoch 1 | 2835/ 8400 batches | train loss 0.5316500 +| epoch 1 | 2839/ 8400 batches | train loss 0.5352180 +| epoch 1 | 2843/ 8400 batches | train loss 0.4685819 +| epoch 1 | 2847/ 8400 batches | train loss 0.4561074 +| epoch 1 | 2851/ 8400 batches | train loss 0.5204905 +| epoch 1 | 2855/ 8400 batches | train loss 0.4684165 +| epoch 1 | 2859/ 8400 batches | train loss 0.4019229 +| epoch 1 | 2863/ 8400 batches | train loss 0.5971267 +| epoch 1 | 2867/ 8400 batches | train loss 0.4275982 +| epoch 1 | 2871/ 8400 batches | train loss 0.4697947 +| epoch 1 | 2875/ 8400 batches | train loss 0.4271600 +| epoch 1 | 2879/ 8400 batches | train loss 0.4779359 +| epoch 1 | 2883/ 8400 batches | train loss 0.3940181 +| epoch 1 | 2887/ 8400 batches | train loss 0.4469556 +| epoch 1 | 2891/ 8400 batches | train loss 0.3857205 +| epoch 1 | 2895/ 8400 batches | train loss 0.5381334 +| epoch 1 | 2899/ 8400 batches | train loss 0.6333778 +| epoch 1 | 2903/ 8400 batches | train loss 0.4805236 +| epoch 1 | 2907/ 8400 batches | train loss 0.4275120 +| epoch 1 | 2911/ 8400 batches | train loss 0.5249666 +| epoch 1 | 2915/ 8400 batches | train loss 0.4533424 +| epoch 1 | 2919/ 8400 batches | train loss 0.4615773 +| epoch 1 | 2923/ 8400 batches | train loss 0.4359019 +| epoch 1 | 2927/ 8400 batches | train loss 0.4564256 +| epoch 1 | 2931/ 8400 batches | train loss 0.4340006 +| epoch 1 | 2935/ 8400 batches | train loss 0.4222519 +| epoch 1 | 2939/ 8400 batches | train loss 0.5318657 +| epoch 1 | 2943/ 8400 batches | train loss 0.5060735 +| epoch 1 | 2947/ 8400 batches | train loss 0.5949175 +| epoch 1 | 2951/ 8400 batches | train loss 0.4306748 +| epoch 1 | 2955/ 8400 batches | train loss 0.4611114 +| epoch 1 | 2959/ 8400 batches | train loss 0.4533704 +| epoch 1 | 2963/ 8400 batches | train loss 0.3755487 +| epoch 1 | 2967/ 8400 batches | train loss 0.3476767 +| epoch 1 | 2971/ 8400 batches | train loss 0.4940438 +| epoch 1 | 2975/ 8400 batches | train loss 0.5045413 +| epoch 1 | 2979/ 8400 batches | train loss 0.4998296 +| epoch 1 | 2983/ 8400 batches | train loss 0.4419029 +| epoch 1 | 2987/ 8400 batches | train loss 0.3950601 +| epoch 1 | 2991/ 8400 batches | train loss 0.4846585 +| epoch 1 | 2995/ 8400 batches | train loss 0.4783605 +| epoch 1 | 2999/ 8400 batches | train loss 0.3438113 +| epoch 1 | 3003/ 8400 batches | train loss 0.3897695 +| epoch 1 | 3007/ 8400 batches | train loss 0.4197978 +| epoch 1 | 3011/ 8400 batches | train loss 0.4950505 +| epoch 1 | 3015/ 8400 batches | train loss 0.4947243 +| epoch 1 | 3019/ 8400 batches | train loss 0.4555619 +| epoch 1 | 3023/ 8400 batches | train loss 0.6248285 +| epoch 1 | 3027/ 8400 batches | train loss 0.4295513 +| epoch 1 | 3031/ 8400 batches | train loss 0.4578935 +| epoch 1 | 3035/ 8400 batches | train loss 0.4448893 +| epoch 1 | 3039/ 8400 batches | train loss 0.4931153 +| epoch 1 | 3043/ 8400 batches | train loss 0.3774017 +| epoch 1 | 3047/ 8400 batches | train loss 0.4992823 +| epoch 1 | 3051/ 8400 batches | train loss 0.3891652 +| epoch 1 | 3055/ 8400 batches | train loss 0.4682777 +| epoch 1 | 3059/ 8400 batches | train loss 0.5558448 +| epoch 1 | 3063/ 8400 batches | train loss 0.4177560 +| epoch 1 | 3067/ 8400 batches | train loss 0.4422688 +| epoch 1 | 3071/ 8400 batches | train loss 0.4401431 +| epoch 1 | 3075/ 8400 batches | train loss 0.4150284 +| epoch 1 | 3079/ 8400 batches | train loss 0.4813645 +| epoch 1 | 3083/ 8400 batches | train loss 0.4903327 +| epoch 1 | 3087/ 8400 batches | train loss 0.5278116 +| epoch 1 | 3091/ 8400 batches | train loss 0.3955366 +| epoch 1 | 3095/ 8400 batches | train loss 0.4663893 +| epoch 1 | 3099/ 8400 batches | train loss 0.4728978 +| epoch 1 | 3103/ 8400 batches | train loss 0.5027226 +| epoch 1 | 3107/ 8400 batches | train loss 0.5516735 +| epoch 1 | 3111/ 8400 batches | train loss 0.4737409 +| epoch 1 | 3115/ 8400 batches | train loss 0.5281570 +| epoch 1 | 3119/ 8400 batches | train loss 0.4857950 +| epoch 1 | 3123/ 8400 batches | train loss 0.6850352 +| epoch 1 | 3127/ 8400 batches | train loss 0.4788167 +| epoch 1 | 3131/ 8400 batches | train loss 0.5210661 +| epoch 1 | 3135/ 8400 batches | train loss 0.4228644 +| epoch 1 | 3139/ 8400 batches | train loss 0.4371581 +| epoch 1 | 3143/ 8400 batches | train loss 0.4008451 +| epoch 1 | 3147/ 8400 batches | train loss 0.4881171 +| epoch 1 | 3151/ 8400 batches | train loss 0.4224374 +| epoch 1 | 3155/ 8400 batches | train loss 0.4764612 +| epoch 1 | 3159/ 8400 batches | train loss 0.4439674 +| epoch 1 | 3163/ 8400 batches | train loss 0.3753345 +| epoch 1 | 3167/ 8400 batches | train loss 0.4331880 +| epoch 1 | 3171/ 8400 batches | train loss 0.4324213 +| epoch 1 | 3175/ 8400 batches | train loss 0.5046923 +| epoch 1 | 3179/ 8400 batches | train loss 0.5036235 +| epoch 1 | 3183/ 8400 batches | train loss 0.5531301 +| epoch 1 | 3187/ 8400 batches | train loss 0.4335307 +| epoch 1 | 3191/ 8400 batches | train loss 0.5080183 +| epoch 1 | 3195/ 8400 batches | train loss 0.6498583 +| epoch 1 | 3199/ 8400 batches | train loss 0.5672983 +| epoch 1 | 3203/ 8400 batches | train loss 0.5003181 +| epoch 1 | 3207/ 8400 batches | train loss 0.4296838 +| epoch 1 | 3211/ 8400 batches | train loss 0.5885836 +| epoch 1 | 3215/ 8400 batches | train loss 0.4382722 +| epoch 1 | 3219/ 8400 batches | train loss 0.4893799 +| epoch 1 | 3223/ 8400 batches | train loss 0.4549341 +| epoch 1 | 3227/ 8400 batches | train loss 0.4676116 +| epoch 1 | 3231/ 8400 batches | train loss 0.3921227 +| epoch 1 | 3235/ 8400 batches | train loss 0.4497069 +| epoch 1 | 3239/ 8400 batches | train loss 0.4395756 +| epoch 1 | 3243/ 8400 batches | train loss 0.4811155 +| epoch 1 | 3247/ 8400 batches | train loss 0.4273973 +| epoch 1 | 3251/ 8400 batches | train loss 0.5027413 +| epoch 1 | 3255/ 8400 batches | train loss 0.5377620 +| epoch 1 | 3259/ 8400 batches | train loss 0.4455375 +| epoch 1 | 3263/ 8400 batches | train loss 0.5052333 +| epoch 1 | 3267/ 8400 batches | train loss 0.5284002 +| epoch 1 | 3271/ 8400 batches | train loss 0.4235224 +| epoch 1 | 3275/ 8400 batches | train loss 0.4665731 +| epoch 1 | 3279/ 8400 batches | train loss 0.5100963 +| epoch 1 | 3283/ 8400 batches | train loss 0.4335001 +| epoch 1 | 3287/ 8400 batches | train loss 0.4352352 +| epoch 1 | 3291/ 8400 batches | train loss 0.4746643 +| epoch 1 | 3295/ 8400 batches | train loss 0.4337890 +| epoch 1 | 3299/ 8400 batches | train loss 0.4466141 +| epoch 1 | 3303/ 8400 batches | train loss 0.4173665 +| epoch 1 | 3307/ 8400 batches | train loss 0.4726455 +| epoch 1 | 3311/ 8400 batches | train loss 0.4634709 +| epoch 1 | 3315/ 8400 batches | train loss 0.4686878 +| epoch 1 | 3319/ 8400 batches | train loss 0.4520655 +| epoch 1 | 3323/ 8400 batches | train loss 0.4149457 +| epoch 1 | 3327/ 8400 batches | train loss 0.5613573 +| epoch 1 | 3331/ 8400 batches | train loss 0.4529266 +| epoch 1 | 3335/ 8400 batches | train loss 0.6327540 +| epoch 1 | 3339/ 8400 batches | train loss 0.4767963 +| epoch 1 | 3343/ 8400 batches | train loss 0.4391828 +| epoch 1 | 3347/ 8400 batches | train loss 0.5020807 +| epoch 1 | 3351/ 8400 batches | train loss 0.4468263 +| epoch 1 | 3355/ 8400 batches | train loss 0.4671915 +| epoch 1 | 3359/ 8400 batches | train loss 0.4271003 +| epoch 1 | 3363/ 8400 batches | train loss 0.4386903 +| epoch 1 | 3367/ 8400 batches | train loss 0.3709576 +| epoch 1 | 3371/ 8400 batches | train loss 0.4321863 +| epoch 1 | 3375/ 8400 batches | train loss 0.5319624 +| epoch 1 | 3379/ 8400 batches | train loss 0.4310501 +| epoch 1 | 3383/ 8400 batches | train loss 0.3375785 +| epoch 1 | 3387/ 8400 batches | train loss 0.4291532 +| epoch 1 | 3391/ 8400 batches | train loss 0.4540001 +| epoch 1 | 3395/ 8400 batches | train loss 0.4161726 +| epoch 1 | 3399/ 8400 batches | train loss 0.4122780 +| epoch 1 | 3403/ 8400 batches | train loss 0.5243733 +| epoch 1 | 3407/ 8400 batches | train loss 0.4291028 +| epoch 1 | 3411/ 8400 batches | train loss 0.5318494 +| epoch 1 | 3415/ 8400 batches | train loss 0.5301236 +| epoch 1 | 3419/ 8400 batches | train loss 0.4923901 +| epoch 1 | 3423/ 8400 batches | train loss 0.4485946 +| epoch 1 | 3427/ 8400 batches | train loss 0.4864904 +| epoch 1 | 3431/ 8400 batches | train loss 0.3918409 +| epoch 1 | 3435/ 8400 batches | train loss 0.4498369 +| epoch 1 | 3439/ 8400 batches | train loss 0.5340620 +| epoch 1 | 3443/ 8400 batches | train loss 0.4038934 +| epoch 1 | 3447/ 8400 batches | train loss 0.4256935 +| epoch 1 | 3451/ 8400 batches | train loss 0.4996988 +| epoch 1 | 3455/ 8400 batches | train loss 0.5272170 +| epoch 1 | 3459/ 8400 batches | train loss 0.4989794 +| epoch 1 | 3463/ 8400 batches | train loss 0.5687295 +| epoch 1 | 3467/ 8400 batches | train loss 0.3928550 +| epoch 1 | 3471/ 8400 batches | train loss 0.4181352 +| epoch 1 | 3475/ 8400 batches | train loss 0.4919440 +| epoch 1 | 3479/ 8400 batches | train loss 0.4478684 +| epoch 1 | 3483/ 8400 batches | train loss 0.4176112 +| epoch 1 | 3487/ 8400 batches | train loss 0.4020319 +| epoch 1 | 3491/ 8400 batches | train loss 0.4440651 +| epoch 1 | 3495/ 8400 batches | train loss 0.3898396 +| epoch 1 | 3499/ 8400 batches | train loss 0.3535887 +| epoch 1 | 3503/ 8400 batches | train loss 0.4099774 +| epoch 1 | 3507/ 8400 batches | train loss 0.5302254 +| epoch 1 | 3511/ 8400 batches | train loss 0.4122704 +| epoch 1 | 3515/ 8400 batches | train loss 0.4751813 +| epoch 1 | 3519/ 8400 batches | train loss 0.4595699 +| epoch 1 | 3523/ 8400 batches | train loss 0.4321557 +| epoch 1 | 3527/ 8400 batches | train loss 0.5839898 +| epoch 1 | 3531/ 8400 batches | train loss 0.4300243 +| epoch 1 | 3535/ 8400 batches | train loss 0.3914635 +| epoch 1 | 3539/ 8400 batches | train loss 0.5323279 +| epoch 1 | 3543/ 8400 batches | train loss 0.4379695 +| epoch 1 | 3547/ 8400 batches | train loss 0.4837177 +| epoch 1 | 3551/ 8400 batches | train loss 0.5526346 +| epoch 1 | 3555/ 8400 batches | train loss 0.5269722 +| epoch 1 | 3559/ 8400 batches | train loss 0.4503760 +| epoch 1 | 3563/ 8400 batches | train loss 0.4297287 +| epoch 1 | 3567/ 8400 batches | train loss 0.4994527 +| epoch 1 | 3571/ 8400 batches | train loss 0.4642969 +| epoch 1 | 3575/ 8400 batches | train loss 0.4186631 +| epoch 1 | 3579/ 8400 batches | train loss 0.4126139 +| epoch 1 | 3583/ 8400 batches | train loss 0.4866368 +| epoch 1 | 3587/ 8400 batches | train loss 0.6292088 +| epoch 1 | 3591/ 8400 batches | train loss 0.4636467 +| epoch 1 | 3595/ 8400 batches | train loss 0.5392548 +| epoch 1 | 3599/ 8400 batches | train loss 0.4707645 +| epoch 1 | 3603/ 8400 batches | train loss 0.4424062 +| epoch 1 | 3607/ 8400 batches | train loss 0.5380495 +| epoch 1 | 3611/ 8400 batches | train loss 0.4625890 +| epoch 1 | 3615/ 8400 batches | train loss 0.3821084 +| epoch 1 | 3619/ 8400 batches | train loss 0.4440797 +| epoch 1 | 3623/ 8400 batches | train loss 0.4821735 +| epoch 1 | 3627/ 8400 batches | train loss 0.4100466 +| epoch 1 | 3631/ 8400 batches | train loss 0.5217409 +| epoch 1 | 3635/ 8400 batches | train loss 0.6181822 +| epoch 1 | 3639/ 8400 batches | train loss 0.5488065 +| epoch 1 | 3643/ 8400 batches | train loss 0.4899714 +| epoch 1 | 3647/ 8400 batches | train loss 0.5041562 +| epoch 1 | 3651/ 8400 batches | train loss 0.4697085 +| epoch 1 | 3655/ 8400 batches | train loss 0.4661799 +| epoch 1 | 3659/ 8400 batches | train loss 0.4267007 +| epoch 1 | 3663/ 8400 batches | train loss 0.4228353 +| epoch 1 | 3667/ 8400 batches | train loss 0.5675063 +| epoch 1 | 3671/ 8400 batches | train loss 0.4993844 +| epoch 1 | 3675/ 8400 batches | train loss 0.4834579 +| epoch 1 | 3679/ 8400 batches | train loss 0.5311853 +| epoch 1 | 3683/ 8400 batches | train loss 0.4228169 +| epoch 1 | 3687/ 8400 batches | train loss 0.6851869 +| epoch 1 | 3691/ 8400 batches | train loss 0.4702440 +| epoch 1 | 3695/ 8400 batches | train loss 0.5677972 +| epoch 1 | 3699/ 8400 batches | train loss 0.3703484 +| epoch 1 | 3703/ 8400 batches | train loss 0.4969082 +| epoch 1 | 3707/ 8400 batches | train loss 0.3519745 +| epoch 1 | 3711/ 8400 batches | train loss 0.5565885 +| epoch 1 | 3715/ 8400 batches | train loss 0.4269442 +| epoch 1 | 3719/ 8400 batches | train loss 0.4513823 +| epoch 1 | 3723/ 8400 batches | train loss 0.4894631 +| epoch 1 | 3727/ 8400 batches | train loss 0.6430709 +| epoch 1 | 3731/ 8400 batches | train loss 0.4974879 +| epoch 1 | 3735/ 8400 batches | train loss 0.4465950 +| epoch 1 | 3739/ 8400 batches | train loss 0.4844095 +| epoch 1 | 3743/ 8400 batches | train loss 0.4654510 +| epoch 1 | 3747/ 8400 batches | train loss 0.4461128 +| epoch 1 | 3751/ 8400 batches | train loss 0.4656913 +| epoch 1 | 3755/ 8400 batches | train loss 0.4360227 +| epoch 1 | 3759/ 8400 batches | train loss 0.4384456 +| epoch 1 | 3763/ 8400 batches | train loss 0.4422907 +| epoch 1 | 3767/ 8400 batches | train loss 0.5881330 +| epoch 1 | 3771/ 8400 batches | train loss 0.2965437 +| epoch 1 | 3775/ 8400 batches | train loss 0.4422194 +| epoch 1 | 3779/ 8400 batches | train loss 0.4588023 +| epoch 1 | 3783/ 8400 batches | train loss 0.4823559 +| epoch 1 | 3787/ 8400 batches | train loss 0.4980883 +| epoch 1 | 3791/ 8400 batches | train loss 0.4480159 +| epoch 1 | 3795/ 8400 batches | train loss 0.3702132 +| epoch 1 | 3799/ 8400 batches | train loss 0.5390050 +| epoch 1 | 3803/ 8400 batches | train loss 0.5175337 +| epoch 1 | 3807/ 8400 batches | train loss 0.4889920 +| epoch 1 | 3811/ 8400 batches | train loss 0.4229550 +| epoch 1 | 3815/ 8400 batches | train loss 0.4534792 +| epoch 1 | 3819/ 8400 batches | train loss 0.4176222 +| epoch 1 | 3823/ 8400 batches | train loss 0.5189573 +| epoch 1 | 3827/ 8400 batches | train loss 0.3927983 +| epoch 1 | 3831/ 8400 batches | train loss 0.4419657 +| epoch 1 | 3835/ 8400 batches | train loss 0.4149140 +| epoch 1 | 3839/ 8400 batches | train loss 0.4669841 +| epoch 1 | 3843/ 8400 batches | train loss 0.3240227 +| epoch 1 | 3847/ 8400 batches | train loss 0.3829850 +| epoch 1 | 3851/ 8400 batches | train loss 0.4429113 +| epoch 1 | 3855/ 8400 batches | train loss 0.4736525 +| epoch 1 | 3859/ 8400 batches | train loss 0.5174173 +| epoch 1 | 3863/ 8400 batches | train loss 0.4635435 +| epoch 1 | 3867/ 8400 batches | train loss 0.4155853 +| epoch 1 | 3871/ 8400 batches | train loss 0.4582602 +| epoch 1 | 3875/ 8400 batches | train loss 0.4536901 +| epoch 1 | 3879/ 8400 batches | train loss 0.4535761 +| epoch 1 | 3883/ 8400 batches | train loss 0.5535332 +| epoch 1 | 3887/ 8400 batches | train loss 0.4581785 +| epoch 1 | 3891/ 8400 batches | train loss 0.5356876 +| epoch 1 | 3895/ 8400 batches | train loss 0.4519454 +| epoch 1 | 3899/ 8400 batches | train loss 0.4175943 +| epoch 1 | 3903/ 8400 batches | train loss 0.4173368 +| epoch 1 | 3907/ 8400 batches | train loss 0.4102967 +| epoch 1 | 3911/ 8400 batches | train loss 0.3883930 +| epoch 1 | 3915/ 8400 batches | train loss 0.5743866 +| epoch 1 | 3919/ 8400 batches | train loss 0.4853033 +| epoch 1 | 3923/ 8400 batches | train loss 0.5274987 +| epoch 1 | 3927/ 8400 batches | train loss 0.4540801 +| epoch 1 | 3931/ 8400 batches | train loss 0.3951076 +| epoch 1 | 3935/ 8400 batches | train loss 0.4624143 +| epoch 1 | 3939/ 8400 batches | train loss 0.5035746 +| epoch 1 | 3943/ 8400 batches | train loss 0.5659352 +| epoch 1 | 3947/ 8400 batches | train loss 0.4925632 +| epoch 1 | 3951/ 8400 batches | train loss 0.4047046 +| epoch 1 | 3955/ 8400 batches | train loss 0.4833160 +| epoch 1 | 3959/ 8400 batches | train loss 0.5370294 +| epoch 1 | 3963/ 8400 batches | train loss 0.4772096 +| epoch 1 | 3967/ 8400 batches | train loss 0.3877584 +| epoch 1 | 3971/ 8400 batches | train loss 0.4489952 +| epoch 1 | 3975/ 8400 batches | train loss 0.3649499 +| epoch 1 | 3979/ 8400 batches | train loss 0.4540021 +| epoch 1 | 3983/ 8400 batches | train loss 0.3534635 +| epoch 1 | 3987/ 8400 batches | train loss 0.3931195 +| epoch 1 | 3991/ 8400 batches | train loss 0.4710087 +| epoch 1 | 3995/ 8400 batches | train loss 0.5821055 +| epoch 1 | 3999/ 8400 batches | train loss 0.4080825 +| epoch 1 | 4003/ 8400 batches | train loss 0.3915387 +| epoch 1 | 4007/ 8400 batches | train loss 0.6043605 +| epoch 1 | 4011/ 8400 batches | train loss 0.4368620 +| epoch 1 | 4015/ 8400 batches | train loss 0.4518819 +| epoch 1 | 4019/ 8400 batches | train loss 0.4476095 +| epoch 1 | 4023/ 8400 batches | train loss 0.3521400 +| epoch 1 | 4027/ 8400 batches | train loss 0.5206389 +| epoch 1 | 4031/ 8400 batches | train loss 0.4539345 +| epoch 1 | 4035/ 8400 batches | train loss 0.4402478 +| epoch 1 | 4039/ 8400 batches | train loss 0.5160983 +| epoch 1 | 4043/ 8400 batches | train loss 0.4155818 +| epoch 1 | 4047/ 8400 batches | train loss 0.4385846 +| epoch 1 | 4051/ 8400 batches | train loss 0.4506103 +| epoch 1 | 4055/ 8400 batches | train loss 0.4576629 +| epoch 1 | 4059/ 8400 batches | train loss 0.3679560 +| epoch 1 | 4063/ 8400 batches | train loss 0.4212937 +| epoch 1 | 4067/ 8400 batches | train loss 0.5112837 +| epoch 1 | 4071/ 8400 batches | train loss 0.4602692 +| epoch 1 | 4075/ 8400 batches | train loss 0.4158560 +| epoch 1 | 4079/ 8400 batches | train loss 0.4549901 +| epoch 1 | 4083/ 8400 batches | train loss 0.3871723 +| epoch 1 | 4087/ 8400 batches | train loss 0.4453989 +| epoch 1 | 4091/ 8400 batches | train loss 0.4129519 +| epoch 1 | 4095/ 8400 batches | train loss 0.3591287 +| epoch 1 | 4099/ 8400 batches | train loss 0.3960942 +| epoch 1 | 4103/ 8400 batches | train loss 0.4008719 +| epoch 1 | 4107/ 8400 batches | train loss 0.4839704 +| epoch 1 | 4111/ 8400 batches | train loss 0.4393922 +| epoch 1 | 4115/ 8400 batches | train loss 0.4555488 +| epoch 1 | 4119/ 8400 batches | train loss 0.5554579 +| epoch 1 | 4123/ 8400 batches | train loss 0.4838043 +| epoch 1 | 4127/ 8400 batches | train loss 0.5028013 +| epoch 1 | 4131/ 8400 batches | train loss 0.3456749 +| epoch 1 | 4135/ 8400 batches | train loss 0.4919472 +| epoch 1 | 4139/ 8400 batches | train loss 0.5202356 +| epoch 1 | 4143/ 8400 batches | train loss 0.4730837 +| epoch 1 | 4147/ 8400 batches | train loss 0.4028480 +| epoch 1 | 4151/ 8400 batches | train loss 0.4826548 +| epoch 1 | 4155/ 8400 batches | train loss 0.6167243 +| epoch 1 | 4159/ 8400 batches | train loss 0.4969844 +| epoch 1 | 4163/ 8400 batches | train loss 0.4830416 +| epoch 1 | 4167/ 8400 batches | train loss 0.4816702 +| epoch 1 | 4171/ 8400 batches | train loss 0.4942016 +| epoch 1 | 4175/ 8400 batches | train loss 0.3610814 +| epoch 1 | 4179/ 8400 batches | train loss 0.2851793 +| epoch 1 | 4183/ 8400 batches | train loss 0.4681388 +| epoch 1 | 4187/ 8400 batches | train loss 0.4059138 +| epoch 1 | 4191/ 8400 batches | train loss 0.3592871 +| epoch 1 | 4195/ 8400 batches | train loss 0.4967627 +| epoch 1 | 4199/ 8400 batches | train loss 0.4714197 +| epoch 1 | 4203/ 8400 batches | train loss 0.3523360 +| epoch 1 | 4207/ 8400 batches | train loss 0.4165481 +| epoch 1 | 4211/ 8400 batches | train loss 0.5289390 +| epoch 1 | 4215/ 8400 batches | train loss 0.4132230 +| epoch 1 | 4219/ 8400 batches | train loss 0.4111775 +| epoch 1 | 4223/ 8400 batches | train loss 0.5780215 +| epoch 1 | 4227/ 8400 batches | train loss 0.4574243 +| epoch 1 | 4231/ 8400 batches | train loss 0.5036793 +| epoch 1 | 4235/ 8400 batches | train loss 0.5595343 +| epoch 1 | 4239/ 8400 batches | train loss 0.4860485 +| epoch 1 | 4243/ 8400 batches | train loss 0.4308729 +| epoch 1 | 4247/ 8400 batches | train loss 0.4615154 +| epoch 1 | 4251/ 8400 batches | train loss 0.4566841 +| epoch 1 | 4255/ 8400 batches | train loss 0.4229079 +| epoch 1 | 4259/ 8400 batches | train loss 0.4641912 +| epoch 1 | 4263/ 8400 batches | train loss 0.4612553 +| epoch 1 | 4267/ 8400 batches | train loss 0.5028556 +| epoch 1 | 4271/ 8400 batches | train loss 0.4380419 +| epoch 1 | 4275/ 8400 batches | train loss 0.4436646 +| epoch 1 | 4279/ 8400 batches | train loss 0.4787061 +| epoch 1 | 4283/ 8400 batches | train loss 0.4643944 +| epoch 1 | 4287/ 8400 batches | train loss 0.4959460 +| epoch 1 | 4291/ 8400 batches | train loss 0.4867705 +| epoch 1 | 4295/ 8400 batches | train loss 0.4686978 +| epoch 1 | 4299/ 8400 batches | train loss 0.4998558 +| epoch 1 | 4303/ 8400 batches | train loss 0.5451553 +| epoch 1 | 4307/ 8400 batches | train loss 0.4503559 +| epoch 1 | 4311/ 8400 batches | train loss 0.4310428 +| epoch 1 | 4315/ 8400 batches | train loss 0.4132578 +| epoch 1 | 4319/ 8400 batches | train loss 0.6492323 +| epoch 1 | 4323/ 8400 batches | train loss 0.3928486 +| epoch 1 | 4327/ 8400 batches | train loss 0.4148323 +| epoch 1 | 4331/ 8400 batches | train loss 0.4723005 +| epoch 1 | 4335/ 8400 batches | train loss 0.5770390 +| epoch 1 | 4339/ 8400 batches | train loss 0.4237177 +| epoch 1 | 4343/ 8400 batches | train loss 0.3611140 +| epoch 1 | 4347/ 8400 batches | train loss 0.4961610 +| epoch 1 | 4351/ 8400 batches | train loss 0.5759903 +| epoch 1 | 4355/ 8400 batches | train loss 0.4722711 +| epoch 1 | 4359/ 8400 batches | train loss 0.5264374 +| epoch 1 | 4363/ 8400 batches | train loss 0.4965889 +| epoch 1 | 4367/ 8400 batches | train loss 0.5315381 +| epoch 1 | 4371/ 8400 batches | train loss 0.4485883 +| epoch 1 | 4375/ 8400 batches | train loss 0.3718261 +| epoch 1 | 4379/ 8400 batches | train loss 0.5104246 +| epoch 1 | 4383/ 8400 batches | train loss 0.5582067 +| epoch 1 | 4387/ 8400 batches | train loss 0.4398937 +| epoch 1 | 4391/ 8400 batches | train loss 0.4151503 +| epoch 1 | 4395/ 8400 batches | train loss 0.4630395 +| epoch 1 | 4399/ 8400 batches | train loss 0.3929265 +| epoch 1 | 4403/ 8400 batches | train loss 0.4101958 +| epoch 1 | 4407/ 8400 batches | train loss 0.4283748 +| epoch 1 | 4411/ 8400 batches | train loss 0.4475626 +| epoch 1 | 4415/ 8400 batches | train loss 0.4664940 +| epoch 1 | 4419/ 8400 batches | train loss 0.4571611 +| epoch 1 | 4423/ 8400 batches | train loss 0.4222949 +| epoch 1 | 4427/ 8400 batches | train loss 0.4399598 +| epoch 1 | 4431/ 8400 batches | train loss 0.2346141 +| epoch 1 | 4435/ 8400 batches | train loss 0.4012939 +| epoch 1 | 4439/ 8400 batches | train loss 0.4269967 +| epoch 1 | 4443/ 8400 batches | train loss 0.4173667 +| epoch 1 | 4447/ 8400 batches | train loss 0.5354575 +| epoch 1 | 4451/ 8400 batches | train loss 0.4863866 +| epoch 1 | 4455/ 8400 batches | train loss 0.4117604 +| epoch 1 | 4459/ 8400 batches | train loss 0.5638326 +| epoch 1 | 4463/ 8400 batches | train loss 0.4339952 +| epoch 1 | 4467/ 8400 batches | train loss 0.5192439 +| epoch 1 | 4471/ 8400 batches | train loss 0.4261860 +| epoch 1 | 4475/ 8400 batches | train loss 0.4813199 +| epoch 1 | 4479/ 8400 batches | train loss 0.4737626 +| epoch 1 | 4483/ 8400 batches | train loss 0.5686092 +| epoch 1 | 4487/ 8400 batches | train loss 0.5016989 +| epoch 1 | 4491/ 8400 batches | train loss 0.3937934 +| epoch 1 | 4495/ 8400 batches | train loss 0.4576007 +| epoch 1 | 4499/ 8400 batches | train loss 0.4494596 +| epoch 1 | 4503/ 8400 batches | train loss 0.4542733 +| epoch 1 | 4507/ 8400 batches | train loss 0.5359360 +| epoch 1 | 4511/ 8400 batches | train loss 0.4396224 +| epoch 1 | 4515/ 8400 batches | train loss 0.5078263 +| epoch 1 | 4519/ 8400 batches | train loss 0.4083197 +| epoch 1 | 4523/ 8400 batches | train loss 0.3742101 +| epoch 1 | 4527/ 8400 batches | train loss 0.4867108 +| epoch 1 | 4531/ 8400 batches | train loss 0.5259712 +| epoch 1 | 4535/ 8400 batches | train loss 0.5222769 +| epoch 1 | 4539/ 8400 batches | train loss 0.4362297 +| epoch 1 | 4543/ 8400 batches | train loss 0.4484811 +| epoch 1 | 4547/ 8400 batches | train loss 0.5169899 +| epoch 1 | 4551/ 8400 batches | train loss 0.4640504 +| epoch 1 | 4555/ 8400 batches | train loss 0.4975965 +| epoch 1 | 4559/ 8400 batches | train loss 0.4822308 +| epoch 1 | 4563/ 8400 batches | train loss 0.4747865 +| epoch 1 | 4567/ 8400 batches | train loss 0.4090668 +| epoch 1 | 4571/ 8400 batches | train loss 0.4499899 +| epoch 1 | 4575/ 8400 batches | train loss 0.5435891 +| epoch 1 | 4579/ 8400 batches | train loss 0.4618234 +| epoch 1 | 4583/ 8400 batches | train loss 0.4495347 +| epoch 1 | 4587/ 8400 batches | train loss 0.4127429 +| epoch 1 | 4591/ 8400 batches | train loss 0.4053066 +| epoch 1 | 4595/ 8400 batches | train loss 0.3969120 +| epoch 1 | 4599/ 8400 batches | train loss 0.5239414 +| epoch 1 | 4603/ 8400 batches | train loss 0.4955971 +| epoch 1 | 4607/ 8400 batches | train loss 0.4015740 +| epoch 1 | 4611/ 8400 batches | train loss 0.5422531 +| epoch 1 | 4615/ 8400 batches | train loss 0.5122561 +| epoch 1 | 4619/ 8400 batches | train loss 0.5278379 +| epoch 1 | 4623/ 8400 batches | train loss 0.5130987 +| epoch 1 | 4627/ 8400 batches | train loss 0.4892618 +| epoch 1 | 4631/ 8400 batches | train loss 0.5005882 +| epoch 1 | 4635/ 8400 batches | train loss 0.3809222 +| epoch 1 | 4639/ 8400 batches | train loss 0.5059679 +| epoch 1 | 4643/ 8400 batches | train loss 0.4417415 +| epoch 1 | 4647/ 8400 batches | train loss 0.4607267 +| epoch 1 | 4651/ 8400 batches | train loss 0.4569930 +| epoch 1 | 4655/ 8400 batches | train loss 0.4196057 +| epoch 1 | 4659/ 8400 batches | train loss 0.3876089 +| epoch 1 | 4663/ 8400 batches | train loss 0.5240624 +| epoch 1 | 4667/ 8400 batches | train loss 0.4361203 +| epoch 1 | 4671/ 8400 batches | train loss 0.4661240 +| epoch 1 | 4675/ 8400 batches | train loss 0.3675870 +| epoch 1 | 4679/ 8400 batches | train loss 0.4601222 +| epoch 1 | 4683/ 8400 batches | train loss 0.4450244 +| epoch 1 | 4687/ 8400 batches | train loss 0.4952810 +| epoch 1 | 4691/ 8400 batches | train loss 0.5716170 +| epoch 1 | 4695/ 8400 batches | train loss 0.3885003 +| epoch 1 | 4699/ 8400 batches | train loss 0.5368896 +| epoch 1 | 4703/ 8400 batches | train loss 0.4940868 +| epoch 1 | 4707/ 8400 batches | train loss 0.5112643 +| epoch 1 | 4711/ 8400 batches | train loss 0.5735253 +| epoch 1 | 4715/ 8400 batches | train loss 0.4660548 +| epoch 1 | 4719/ 8400 batches | train loss 0.4630025 +| epoch 1 | 4723/ 8400 batches | train loss 0.4967850 +| epoch 1 | 4727/ 8400 batches | train loss 0.4068274 +| epoch 1 | 4731/ 8400 batches | train loss 0.3849896 +| epoch 1 | 4735/ 8400 batches | train loss 0.3478838 +| epoch 1 | 4739/ 8400 batches | train loss 0.5172981 +| epoch 1 | 4743/ 8400 batches | train loss 0.4164623 +| epoch 1 | 4747/ 8400 batches | train loss 0.4670901 +| epoch 1 | 4751/ 8400 batches | train loss 0.3794330 +| epoch 1 | 4755/ 8400 batches | train loss 0.4165812 +| epoch 1 | 4759/ 8400 batches | train loss 0.4429307 +| epoch 1 | 4763/ 8400 batches | train loss 0.4841943 +| epoch 1 | 4767/ 8400 batches | train loss 0.2640255 +| epoch 1 | 4771/ 8400 batches | train loss 0.4706765 +| epoch 1 | 4775/ 8400 batches | train loss 0.5079953 +| epoch 1 | 4779/ 8400 batches | train loss 0.4814208 +| epoch 1 | 4783/ 8400 batches | train loss 0.4571616 +| epoch 1 | 4787/ 8400 batches | train loss 0.4358271 +| epoch 1 | 4791/ 8400 batches | train loss 0.4164110 +| epoch 1 | 4795/ 8400 batches | train loss 0.4646850 +| epoch 1 | 4799/ 8400 batches | train loss 0.4299250 +| epoch 1 | 4803/ 8400 batches | train loss 0.4385393 +| epoch 1 | 4807/ 8400 batches | train loss 0.3962986 +| epoch 1 | 4811/ 8400 batches | train loss 0.3985940 +| epoch 1 | 4815/ 8400 batches | train loss 0.5697372 +| epoch 1 | 4819/ 8400 batches | train loss 0.4048959 +| epoch 1 | 4823/ 8400 batches | train loss 0.4198234 +| epoch 1 | 4827/ 8400 batches | train loss 0.4023820 +| epoch 1 | 4831/ 8400 batches | train loss 0.5399418 +| epoch 1 | 4835/ 8400 batches | train loss 0.3960025 +| epoch 1 | 4839/ 8400 batches | train loss 0.4531578 +| epoch 1 | 4843/ 8400 batches | train loss 0.4030633 +| epoch 1 | 4847/ 8400 batches | train loss 0.5357120 +| epoch 1 | 4851/ 8400 batches | train loss 0.3936762 +| epoch 1 | 4855/ 8400 batches | train loss 0.4982370 +| epoch 1 | 4859/ 8400 batches | train loss 0.4830291 +| epoch 1 | 4863/ 8400 batches | train loss 0.5718059 +| epoch 1 | 4867/ 8400 batches | train loss 0.4247956 +| epoch 1 | 4871/ 8400 batches | train loss 0.4431778 +| epoch 1 | 4875/ 8400 batches | train loss 0.4916151 +| epoch 1 | 4879/ 8400 batches | train loss 0.4277654 +| epoch 1 | 4883/ 8400 batches | train loss 0.4089339 +| epoch 1 | 4887/ 8400 batches | train loss 0.4805062 +| epoch 1 | 4891/ 8400 batches | train loss 0.4395393 +| epoch 1 | 4895/ 8400 batches | train loss 0.3913045 +| epoch 1 | 4899/ 8400 batches | train loss 0.4257303 +| epoch 1 | 4903/ 8400 batches | train loss 0.4848942 +| epoch 1 | 4907/ 8400 batches | train loss 0.5051036 +| epoch 1 | 4911/ 8400 batches | train loss 0.4437916 +| epoch 1 | 4915/ 8400 batches | train loss 0.5361956 +| epoch 1 | 4919/ 8400 batches | train loss 0.4925901 +| epoch 1 | 4923/ 8400 batches | train loss 0.4879173 +| epoch 1 | 4927/ 8400 batches | train loss 0.4241241 +| epoch 1 | 4931/ 8400 batches | train loss 0.4480663 +| epoch 1 | 4935/ 8400 batches | train loss 0.5468057 +| epoch 1 | 4939/ 8400 batches | train loss 0.4361264 +| epoch 1 | 4943/ 8400 batches | train loss 0.5699326 +| epoch 1 | 4947/ 8400 batches | train loss 0.4502523 +| epoch 1 | 4951/ 8400 batches | train loss 0.4836714 +| epoch 1 | 4955/ 8400 batches | train loss 0.4579236 +| epoch 1 | 4959/ 8400 batches | train loss 0.4145251 +| epoch 1 | 4963/ 8400 batches | train loss 0.4440338 +| epoch 1 | 4967/ 8400 batches | train loss 0.5021396 +| epoch 1 | 4971/ 8400 batches | train loss 0.3952087 +| epoch 1 | 4975/ 8400 batches | train loss 0.4777143 +| epoch 1 | 4979/ 8400 batches | train loss 0.4639593 +| epoch 1 | 4983/ 8400 batches | train loss 0.3849176 +| epoch 1 | 4987/ 8400 batches | train loss 0.4182923 +| epoch 1 | 4991/ 8400 batches | train loss 0.3437408 +| epoch 1 | 4995/ 8400 batches | train loss 0.5460736 +| epoch 1 | 4999/ 8400 batches | train loss 0.4541635 +| epoch 1 | 5003/ 8400 batches | train loss 0.3774453 +| epoch 1 | 5007/ 8400 batches | train loss 0.5449822 +| epoch 1 | 5011/ 8400 batches | train loss 0.4563451 +| epoch 1 | 5015/ 8400 batches | train loss 0.4835477 +| epoch 1 | 5019/ 8400 batches | train loss 0.4601206 +| epoch 1 | 5023/ 8400 batches | train loss 0.4173809 +| epoch 1 | 5027/ 8400 batches | train loss 0.3119854 +| epoch 1 | 5031/ 8400 batches | train loss 0.4100838 +| epoch 1 | 5035/ 8400 batches | train loss 0.3579430 +| epoch 1 | 5039/ 8400 batches | train loss 0.4806013 +| epoch 1 | 5043/ 8400 batches | train loss 0.5086969 +| epoch 1 | 5047/ 8400 batches | train loss 0.3990260 +| epoch 1 | 5051/ 8400 batches | train loss 0.5125597 +| epoch 1 | 5055/ 8400 batches | train loss 0.5398020 +| epoch 1 | 5059/ 8400 batches | train loss 0.4419197 +| epoch 1 | 5063/ 8400 batches | train loss 0.4302752 +| epoch 1 | 5067/ 8400 batches | train loss 0.4878927 +| epoch 1 | 5071/ 8400 batches | train loss 0.5046854 +| epoch 1 | 5075/ 8400 batches | train loss 0.5017772 +| epoch 1 | 5079/ 8400 batches | train loss 0.4849594 +| epoch 1 | 5083/ 8400 batches | train loss 0.3264785 +| epoch 1 | 5087/ 8400 batches | train loss 0.4232963 +| epoch 1 | 5091/ 8400 batches | train loss 0.4549705 +| epoch 1 | 5095/ 8400 batches | train loss 0.4957809 +| epoch 1 | 5099/ 8400 batches | train loss 0.3998663 +| epoch 1 | 5103/ 8400 batches | train loss 0.4362739 +| epoch 1 | 5107/ 8400 batches | train loss 0.4776471 +| epoch 1 | 5111/ 8400 batches | train loss 0.4403028 +| epoch 1 | 5115/ 8400 batches | train loss 0.4441399 +| epoch 1 | 5119/ 8400 batches | train loss 0.4613044 +| epoch 1 | 5123/ 8400 batches | train loss 0.4316032 +| epoch 1 | 5127/ 8400 batches | train loss 0.5434553 +| epoch 1 | 5131/ 8400 batches | train loss 0.5534775 +| epoch 1 | 5135/ 8400 batches | train loss 0.4343161 +| epoch 1 | 5139/ 8400 batches | train loss 0.4143225 +| epoch 1 | 5143/ 8400 batches | train loss 0.4256569 +| epoch 1 | 5147/ 8400 batches | train loss 0.5070990 +| epoch 1 | 5151/ 8400 batches | train loss 0.4322377 +| epoch 1 | 5155/ 8400 batches | train loss 0.4199427 +| epoch 1 | 5159/ 8400 batches | train loss 0.4036249 +| epoch 1 | 5163/ 8400 batches | train loss 0.6403461 +| epoch 1 | 5167/ 8400 batches | train loss 0.4926958 +| epoch 1 | 5171/ 8400 batches | train loss 0.4408721 +| epoch 1 | 5175/ 8400 batches | train loss 0.6617436 +| epoch 1 | 5179/ 8400 batches | train loss 0.4816056 +| epoch 1 | 5183/ 8400 batches | train loss 0.5081497 +| epoch 1 | 5187/ 8400 batches | train loss 0.4213230 +| epoch 1 | 5191/ 8400 batches | train loss 0.4578210 +| epoch 1 | 5195/ 8400 batches | train loss 0.4599907 +| epoch 1 | 5199/ 8400 batches | train loss 0.5689315 +| epoch 1 | 5203/ 8400 batches | train loss 0.5957100 +| epoch 1 | 5207/ 8400 batches | train loss 0.4839979 +| epoch 1 | 5211/ 8400 batches | train loss 0.4528016 +| epoch 1 | 5215/ 8400 batches | train loss 0.5132201 +| epoch 1 | 5219/ 8400 batches | train loss 0.4075799 +| epoch 1 | 5223/ 8400 batches | train loss 0.4744291 +| epoch 1 | 5227/ 8400 batches | train loss 0.4255094 +| epoch 1 | 5231/ 8400 batches | train loss 0.5031350 +| epoch 1 | 5235/ 8400 batches | train loss 0.5073076 +| epoch 1 | 5239/ 8400 batches | train loss 0.4419422 +| epoch 1 | 5243/ 8400 batches | train loss 0.4784935 +| epoch 1 | 5247/ 8400 batches | train loss 0.3763874 +| epoch 1 | 5251/ 8400 batches | train loss 0.5185248 +| epoch 1 | 5255/ 8400 batches | train loss 0.4189822 +| epoch 1 | 5259/ 8400 batches | train loss 0.5017205 +| epoch 1 | 5263/ 8400 batches | train loss 0.3900886 +| epoch 1 | 5267/ 8400 batches | train loss 0.4482870 +| epoch 1 | 5271/ 8400 batches | train loss 0.4994083 +| epoch 1 | 5275/ 8400 batches | train loss 0.4363535 +| epoch 1 | 5279/ 8400 batches | train loss 0.3997292 +| epoch 1 | 5283/ 8400 batches | train loss 0.4117195 +| epoch 1 | 5287/ 8400 batches | train loss 0.4322150 +| epoch 1 | 5291/ 8400 batches | train loss 0.4760928 +| epoch 1 | 5295/ 8400 batches | train loss 0.4652102 +| epoch 1 | 5299/ 8400 batches | train loss 0.4476700 +| epoch 1 | 5303/ 8400 batches | train loss 0.4329471 +| epoch 1 | 5307/ 8400 batches | train loss 0.4982991 +| epoch 1 | 5311/ 8400 batches | train loss 0.4760826 +| epoch 1 | 5315/ 8400 batches | train loss 0.5955977 +| epoch 1 | 5319/ 8400 batches | train loss 0.4220162 +| epoch 1 | 5323/ 8400 batches | train loss 0.2406518 +| epoch 1 | 5327/ 8400 batches | train loss 0.4094754 +| epoch 1 | 5331/ 8400 batches | train loss 0.6187380 +| epoch 1 | 5335/ 8400 batches | train loss 0.3314783 +| epoch 1 | 5339/ 8400 batches | train loss 0.3688113 +| epoch 1 | 5343/ 8400 batches | train loss 0.5616584 +| epoch 1 | 5347/ 8400 batches | train loss 0.4322603 +| epoch 1 | 5351/ 8400 batches | train loss 0.4366239 +| epoch 1 | 5355/ 8400 batches | train loss 0.4128718 +| epoch 1 | 5359/ 8400 batches | train loss 0.4243735 +| epoch 1 | 5363/ 8400 batches | train loss 0.4641290 +| epoch 1 | 5367/ 8400 batches | train loss 0.4019992 +| epoch 1 | 5371/ 8400 batches | train loss 0.3881454 +| epoch 1 | 5375/ 8400 batches | train loss 0.4106930 +| epoch 1 | 5379/ 8400 batches | train loss 0.4608458 +| epoch 1 | 5383/ 8400 batches | train loss 0.5777044 +| epoch 1 | 5387/ 8400 batches | train loss 0.4854195 +| epoch 1 | 5391/ 8400 batches | train loss 0.4901913 +| epoch 1 | 5395/ 8400 batches | train loss 0.4213533 +| epoch 1 | 5399/ 8400 batches | train loss 0.4409133 +| epoch 1 | 5403/ 8400 batches | train loss 0.4644766 +| epoch 1 | 5407/ 8400 batches | train loss 0.4554972 +| epoch 1 | 5411/ 8400 batches | train loss 0.5076947 +| epoch 1 | 5415/ 8400 batches | train loss 0.4490297 +| epoch 1 | 5419/ 8400 batches | train loss 0.4001392 +| epoch 1 | 5423/ 8400 batches | train loss 0.3835702 +| epoch 1 | 5427/ 8400 batches | train loss 0.4030038 +| epoch 1 | 5431/ 8400 batches | train loss 0.5447080 +| epoch 1 | 5435/ 8400 batches | train loss 0.4609839 +| epoch 1 | 5439/ 8400 batches | train loss 0.4215803 +| epoch 1 | 5443/ 8400 batches | train loss 0.4037109 +| epoch 1 | 5447/ 8400 batches | train loss 0.4929057 +| epoch 1 | 5451/ 8400 batches | train loss 0.4814074 +| epoch 1 | 5455/ 8400 batches | train loss 0.4380785 +| epoch 1 | 5459/ 8400 batches | train loss 0.4033659 +| epoch 1 | 5463/ 8400 batches | train loss 0.4144758 +| epoch 1 | 5467/ 8400 batches | train loss 0.5113521 +| epoch 1 | 5471/ 8400 batches | train loss 0.5059935 +| epoch 1 | 5475/ 8400 batches | train loss 0.4106542 +| epoch 1 | 5479/ 8400 batches | train loss 0.4321354 +| epoch 1 | 5483/ 8400 batches | train loss 0.4861564 +| epoch 1 | 5487/ 8400 batches | train loss 0.4742067 +| epoch 1 | 5491/ 8400 batches | train loss 0.3797887 +| epoch 1 | 5495/ 8400 batches | train loss 0.6960149 +| epoch 1 | 5499/ 8400 batches | train loss 0.5840983 +| epoch 1 | 5503/ 8400 batches | train loss 0.4108457 +| epoch 1 | 5507/ 8400 batches | train loss 0.5729977 +| epoch 1 | 5511/ 8400 batches | train loss 0.4204562 +| epoch 1 | 5515/ 8400 batches | train loss 0.3644505 +| epoch 1 | 5519/ 8400 batches | train loss 0.5583460 +| epoch 1 | 5523/ 8400 batches | train loss 0.4383581 +| epoch 1 | 5527/ 8400 batches | train loss 0.5369483 +| epoch 1 | 5531/ 8400 batches | train loss 0.4394140 +| epoch 1 | 5535/ 8400 batches | train loss 0.5109779 +| epoch 1 | 5539/ 8400 batches | train loss 0.4668306 +| epoch 1 | 5543/ 8400 batches | train loss 0.5340828 +| epoch 1 | 5547/ 8400 batches | train loss 0.4493226 +| epoch 1 | 5551/ 8400 batches | train loss 0.4509347 +| epoch 1 | 5555/ 8400 batches | train loss 0.4211503 +| epoch 1 | 5559/ 8400 batches | train loss 0.4905556 +| epoch 1 | 5563/ 8400 batches | train loss 0.4322278 +| epoch 1 | 5567/ 8400 batches | train loss 0.4556690 +| epoch 1 | 5571/ 8400 batches | train loss 0.5490724 +| epoch 1 | 5575/ 8400 batches | train loss 0.3616202 +| epoch 1 | 5579/ 8400 batches | train loss 0.4440911 +| epoch 1 | 5583/ 8400 batches | train loss 0.4272527 +| epoch 1 | 5587/ 8400 batches | train loss 0.3782153 +| epoch 1 | 5591/ 8400 batches | train loss 0.4852195 +| epoch 1 | 5595/ 8400 batches | train loss 0.4622221 +| epoch 1 | 5599/ 8400 batches | train loss 0.5490187 +| epoch 1 | 5603/ 8400 batches | train loss 0.4724306 +| epoch 1 | 5607/ 8400 batches | train loss 0.4742559 +| epoch 1 | 5611/ 8400 batches | train loss 0.4528478 +| epoch 1 | 5615/ 8400 batches | train loss 0.4405753 +| epoch 1 | 5619/ 8400 batches | train loss 0.3804006 +| epoch 1 | 5623/ 8400 batches | train loss 0.4393954 +| epoch 1 | 5627/ 8400 batches | train loss 0.5024697 +| epoch 1 | 5631/ 8400 batches | train loss 0.4287186 +| epoch 1 | 5635/ 8400 batches | train loss 0.5387098 +| epoch 1 | 5639/ 8400 batches | train loss 0.4273130 +| epoch 1 | 5643/ 8400 batches | train loss 0.4819769 +| epoch 1 | 5647/ 8400 batches | train loss 0.4446811 +| epoch 1 | 5651/ 8400 batches | train loss 0.4286402 +| epoch 1 | 5655/ 8400 batches | train loss 0.4292313 +| epoch 1 | 5659/ 8400 batches | train loss 0.4823375 +| epoch 1 | 5663/ 8400 batches | train loss 0.4923330 +| epoch 1 | 5667/ 8400 batches | train loss 0.4760778 +| epoch 1 | 5671/ 8400 batches | train loss 0.5828444 +| epoch 1 | 5675/ 8400 batches | train loss 0.4248469 +| epoch 1 | 5679/ 8400 batches | train loss 0.3300245 +| epoch 1 | 5683/ 8400 batches | train loss 0.4936039 +| epoch 1 | 5687/ 8400 batches | train loss 0.4081075 +| epoch 1 | 5691/ 8400 batches | train loss 0.5580660 +| epoch 1 | 5695/ 8400 batches | train loss 0.5119557 +| epoch 1 | 5699/ 8400 batches | train loss 0.4901704 +| epoch 1 | 5703/ 8400 batches | train loss 0.4534339 +| epoch 1 | 5707/ 8400 batches | train loss 0.4555465 +| epoch 1 | 5711/ 8400 batches | train loss 0.3339635 +| epoch 1 | 5715/ 8400 batches | train loss 0.5477612 +| epoch 1 | 5719/ 8400 batches | train loss 0.4573631 +| epoch 1 | 5723/ 8400 batches | train loss 0.4864308 +| epoch 1 | 5727/ 8400 batches | train loss 0.4990651 +| epoch 1 | 5731/ 8400 batches | train loss 0.5417656 +| epoch 1 | 5735/ 8400 batches | train loss 0.3825528 +| epoch 1 | 5739/ 8400 batches | train loss 0.4104937 +| epoch 1 | 5743/ 8400 batches | train loss 0.4560360 +| epoch 1 | 5747/ 8400 batches | train loss 0.5920348 +| epoch 1 | 5751/ 8400 batches | train loss 0.3580627 +| epoch 1 | 5755/ 8400 batches | train loss 0.3944694 +| epoch 1 | 5759/ 8400 batches | train loss 0.4069353 +| epoch 1 | 5763/ 8400 batches | train loss 0.5469341 +| epoch 1 | 5767/ 8400 batches | train loss 0.5407596 +| epoch 1 | 5771/ 8400 batches | train loss 0.4734764 +| epoch 1 | 5775/ 8400 batches | train loss 0.4493439 +| epoch 1 | 5779/ 8400 batches | train loss 0.3590726 +| epoch 1 | 5783/ 8400 batches | train loss 0.5680460 +| epoch 1 | 5787/ 8400 batches | train loss 0.5490460 +| epoch 1 | 5791/ 8400 batches | train loss 0.5681163 +| epoch 1 | 5795/ 8400 batches | train loss 0.4465439 +| epoch 1 | 5799/ 8400 batches | train loss 0.5068512 +| epoch 1 | 5803/ 8400 batches | train loss 0.4143619 +| epoch 1 | 5807/ 8400 batches | train loss 0.4665732 +| epoch 1 | 5811/ 8400 batches | train loss 0.4162282 +| epoch 1 | 5815/ 8400 batches | train loss 0.4257488 +| epoch 1 | 5819/ 8400 batches | train loss 0.4445371 +| epoch 1 | 5823/ 8400 batches | train loss 0.4464843 +| epoch 1 | 5827/ 8400 batches | train loss 0.4159898 +| epoch 1 | 5831/ 8400 batches | train loss 0.4858613 +| epoch 1 | 5835/ 8400 batches | train loss 0.7124503 +| epoch 1 | 5839/ 8400 batches | train loss 0.4424960 +| epoch 1 | 5843/ 8400 batches | train loss 0.4509666 +| epoch 1 | 5847/ 8400 batches | train loss 0.4214000 +| epoch 1 | 5851/ 8400 batches | train loss 0.4351177 +| epoch 1 | 5855/ 8400 batches | train loss 0.4504441 +| epoch 1 | 5859/ 8400 batches | train loss 0.5472522 +| epoch 1 | 5863/ 8400 batches | train loss 0.4394401 +| epoch 1 | 5867/ 8400 batches | train loss 0.3472915 +| epoch 1 | 5871/ 8400 batches | train loss 0.3938393 +| epoch 1 | 5875/ 8400 batches | train loss 0.3231494 +| epoch 1 | 5879/ 8400 batches | train loss 0.5054159 +| epoch 1 | 5883/ 8400 batches | train loss 0.4425475 +| epoch 1 | 5887/ 8400 batches | train loss 0.6205095 +| epoch 1 | 5891/ 8400 batches | train loss 0.4739063 +| epoch 1 | 5895/ 8400 batches | train loss 0.4915608 +| epoch 1 | 5899/ 8400 batches | train loss 0.4437454 +| epoch 1 | 5903/ 8400 batches | train loss 0.4476246 +| epoch 1 | 5907/ 8400 batches | train loss 0.3989193 +| epoch 1 | 5911/ 8400 batches | train loss 0.4711012 +| epoch 1 | 5915/ 8400 batches | train loss 0.5263524 +| epoch 1 | 5919/ 8400 batches | train loss 0.4360041 +| epoch 1 | 5923/ 8400 batches | train loss 0.4570351 +| epoch 1 | 5927/ 8400 batches | train loss 0.3396485 +| epoch 1 | 5931/ 8400 batches | train loss 0.4035500 +| epoch 1 | 5935/ 8400 batches | train loss 0.3609304 +| epoch 1 | 5939/ 8400 batches | train loss 0.4793381 +| epoch 1 | 5943/ 8400 batches | train loss 0.5278510 +| epoch 1 | 5947/ 8400 batches | train loss 0.3138922 +| epoch 1 | 5951/ 8400 batches | train loss 0.3579847 +| epoch 1 | 5955/ 8400 batches | train loss 0.4171969 +| epoch 1 | 5959/ 8400 batches | train loss 0.4241441 +| epoch 1 | 5963/ 8400 batches | train loss 0.4470515 +| epoch 1 | 5967/ 8400 batches | train loss 0.4362715 +| epoch 1 | 5971/ 8400 batches | train loss 0.4731538 +| epoch 1 | 5975/ 8400 batches | train loss 0.4412346 +| epoch 1 | 5979/ 8400 batches | train loss 0.5547622 +| epoch 1 | 5983/ 8400 batches | train loss 0.5383446 +| epoch 1 | 5987/ 8400 batches | train loss 0.4244602 +| epoch 1 | 5991/ 8400 batches | train loss 0.4646491 +| epoch 1 | 5995/ 8400 batches | train loss 0.4677846 +| epoch 1 | 5999/ 8400 batches | train loss 0.4187533 +| epoch 1 | 6003/ 8400 batches | train loss 0.4725454 +| epoch 1 | 6007/ 8400 batches | train loss 0.3890645 +| epoch 1 | 6011/ 8400 batches | train loss 0.4900838 +| epoch 1 | 6015/ 8400 batches | train loss 0.3979109 +| epoch 1 | 6019/ 8400 batches | train loss 0.5245646 +| epoch 1 | 6023/ 8400 batches | train loss 0.5013219 +| epoch 1 | 6027/ 8400 batches | train loss 0.4702868 +| epoch 1 | 6031/ 8400 batches | train loss 0.4715732 +| epoch 1 | 6035/ 8400 batches | train loss 0.4559702 +| epoch 1 | 6039/ 8400 batches | train loss 0.4194896 +| epoch 1 | 6043/ 8400 batches | train loss 0.5142412 +| epoch 1 | 6047/ 8400 batches | train loss 0.4037248 +| epoch 1 | 6051/ 8400 batches | train loss 0.4394931 +| epoch 1 | 6055/ 8400 batches | train loss 0.4875420 +| epoch 1 | 6059/ 8400 batches | train loss 0.4623716 +| epoch 1 | 6063/ 8400 batches | train loss 0.4617178 +| epoch 1 | 6067/ 8400 batches | train loss 0.4589711 +| epoch 1 | 6071/ 8400 batches | train loss 0.4266883 +| epoch 1 | 6075/ 8400 batches | train loss 0.3926526 +| epoch 1 | 6079/ 8400 batches | train loss 0.4384911 +| epoch 1 | 6083/ 8400 batches | train loss 0.3364648 +| epoch 1 | 6087/ 8400 batches | train loss 0.5646486 +| epoch 1 | 6091/ 8400 batches | train loss 0.4822341 +| epoch 1 | 6095/ 8400 batches | train loss 0.4002067 +| epoch 1 | 6099/ 8400 batches | train loss 0.4227333 +| epoch 1 | 6103/ 8400 batches | train loss 0.4776207 +| epoch 1 | 6107/ 8400 batches | train loss 0.5070716 +| epoch 1 | 6111/ 8400 batches | train loss 0.4136301 +| epoch 1 | 6115/ 8400 batches | train loss 0.4662896 +| epoch 1 | 6119/ 8400 batches | train loss 0.3724954 +| epoch 1 | 6123/ 8400 batches | train loss 0.3866970 +| epoch 1 | 6127/ 8400 batches | train loss 0.4848824 +| epoch 1 | 6131/ 8400 batches | train loss 0.4740038 +| epoch 1 | 6135/ 8400 batches | train loss 0.4493187 +| epoch 1 | 6139/ 8400 batches | train loss 0.4147261 +| epoch 1 | 6143/ 8400 batches | train loss 0.3788425 +| epoch 1 | 6147/ 8400 batches | train loss 0.4406363 +| epoch 1 | 6151/ 8400 batches | train loss 0.4034477 +| epoch 1 | 6155/ 8400 batches | train loss 0.4015492 +| epoch 1 | 6159/ 8400 batches | train loss 0.4153521 +| epoch 1 | 6163/ 8400 batches | train loss 0.3643582 +| epoch 1 | 6167/ 8400 batches | train loss 0.4214190 +| epoch 1 | 6171/ 8400 batches | train loss 0.4768691 +| epoch 1 | 6175/ 8400 batches | train loss 0.3602531 +| epoch 1 | 6179/ 8400 batches | train loss 0.3702382 +| epoch 1 | 6183/ 8400 batches | train loss 0.4641663 +| epoch 1 | 6187/ 8400 batches | train loss 0.4468603 +| epoch 1 | 6191/ 8400 batches | train loss 0.5785909 +| epoch 1 | 6195/ 8400 batches | train loss 0.4657433 +| epoch 1 | 6199/ 8400 batches | train loss 0.4667211 +| epoch 1 | 6203/ 8400 batches | train loss 0.3873185 +| epoch 1 | 6207/ 8400 batches | train loss 0.4533547 +| epoch 1 | 6211/ 8400 batches | train loss 0.4860702 +| epoch 1 | 6215/ 8400 batches | train loss 0.5903468 +| epoch 1 | 6219/ 8400 batches | train loss 0.5165917 +| epoch 1 | 6223/ 8400 batches | train loss 0.4014688 +| epoch 1 | 6227/ 8400 batches | train loss 0.4130311 +| epoch 1 | 6231/ 8400 batches | train loss 0.4393286 +| epoch 1 | 6235/ 8400 batches | train loss 0.4236084 +| epoch 1 | 6239/ 8400 batches | train loss 0.4300303 +| epoch 1 | 6243/ 8400 batches | train loss 0.4001791 +| epoch 1 | 6247/ 8400 batches | train loss 0.3863479 +| epoch 1 | 6251/ 8400 batches | train loss 0.4702919 +| epoch 1 | 6255/ 8400 batches | train loss 0.4377944 +| epoch 1 | 6259/ 8400 batches | train loss 0.4804646 +| epoch 1 | 6263/ 8400 batches | train loss 0.4249431 +| epoch 1 | 6267/ 8400 batches | train loss 0.4594179 +| epoch 1 | 6271/ 8400 batches | train loss 0.5897194 +| epoch 1 | 6275/ 8400 batches | train loss 0.5074353 +| epoch 1 | 6279/ 8400 batches | train loss 0.4434788 +| epoch 1 | 6283/ 8400 batches | train loss 0.5362121 +| epoch 1 | 6287/ 8400 batches | train loss 0.4100794 +| epoch 1 | 6291/ 8400 batches | train loss 0.3825542 +| epoch 1 | 6295/ 8400 batches | train loss 0.5408248 +| epoch 1 | 6299/ 8400 batches | train loss 0.4162740 +| epoch 1 | 6303/ 8400 batches | train loss 0.4687123 +| epoch 1 | 6307/ 8400 batches | train loss 0.3862134 +| epoch 1 | 6311/ 8400 batches | train loss 0.3879633 +| epoch 1 | 6315/ 8400 batches | train loss 0.6484554 +| epoch 1 | 6319/ 8400 batches | train loss 0.5790472 +| epoch 1 | 6323/ 8400 batches | train loss 0.4662556 +| epoch 1 | 6327/ 8400 batches | train loss 0.4192286 +| epoch 1 | 6331/ 8400 batches | train loss 0.4394665 +| epoch 1 | 6335/ 8400 batches | train loss 0.4131377 +| epoch 1 | 6339/ 8400 batches | train loss 0.4070277 +| epoch 1 | 6343/ 8400 batches | train loss 0.5132586 +| epoch 1 | 6347/ 8400 batches | train loss 0.4116104 +| epoch 1 | 6351/ 8400 batches | train loss 0.4226942 +| epoch 1 | 6355/ 8400 batches | train loss 0.4831133 +| epoch 1 | 6359/ 8400 batches | train loss 0.4471815 +| epoch 1 | 6363/ 8400 batches | train loss 0.4895718 +| epoch 1 | 6367/ 8400 batches | train loss 0.5088201 +| epoch 1 | 6371/ 8400 batches | train loss 0.4077353 +| epoch 1 | 6375/ 8400 batches | train loss 0.4467371 +| epoch 1 | 6379/ 8400 batches | train loss 0.4996677 +| epoch 1 | 6383/ 8400 batches | train loss 0.4118871 +| epoch 1 | 6387/ 8400 batches | train loss 0.3817917 +| epoch 1 | 6391/ 8400 batches | train loss 0.4416345 +| epoch 1 | 6395/ 8400 batches | train loss 0.4471321 +| epoch 1 | 6399/ 8400 batches | train loss 0.4178205 +| epoch 1 | 6403/ 8400 batches | train loss 0.4819595 +| epoch 1 | 6407/ 8400 batches | train loss 0.4065506 +| epoch 1 | 6411/ 8400 batches | train loss 0.4351918 +| epoch 1 | 6415/ 8400 batches | train loss 0.4539956 +| epoch 1 | 6419/ 8400 batches | train loss 0.4603924 +| epoch 1 | 6423/ 8400 batches | train loss 0.5276653 +| epoch 1 | 6427/ 8400 batches | train loss 0.4048013 +| epoch 1 | 6431/ 8400 batches | train loss 0.4738208 +| epoch 1 | 6435/ 8400 batches | train loss 0.3804659 +| epoch 1 | 6439/ 8400 batches | train loss 0.4688347 +| epoch 1 | 6443/ 8400 batches | train loss 0.4725940 +| epoch 1 | 6447/ 8400 batches | train loss 0.3399228 +| epoch 1 | 6451/ 8400 batches | train loss 0.4535197 +| epoch 1 | 6455/ 8400 batches | train loss 0.5349520 +| epoch 1 | 6459/ 8400 batches | train loss 0.4442473 +| epoch 1 | 6463/ 8400 batches | train loss 0.4750932 +| epoch 1 | 6467/ 8400 batches | train loss 0.4662931 +| epoch 1 | 6471/ 8400 batches | train loss 0.3601659 +| epoch 1 | 6475/ 8400 batches | train loss 0.4759006 +| epoch 1 | 6479/ 8400 batches | train loss 0.4320066 +| epoch 1 | 6483/ 8400 batches | train loss 0.4504402 +| epoch 1 | 6487/ 8400 batches | train loss 0.4510559 +| epoch 1 | 6491/ 8400 batches | train loss 0.3892722 +| epoch 1 | 6495/ 8400 batches | train loss 0.4951057 +| epoch 1 | 6499/ 8400 batches | train loss 0.4333978 +| epoch 1 | 6503/ 8400 batches | train loss 0.4071911 +| epoch 1 | 6507/ 8400 batches | train loss 0.4238477 +| epoch 1 | 6511/ 8400 batches | train loss 0.4519554 +| epoch 1 | 6515/ 8400 batches | train loss 0.4772965 +| epoch 1 | 6519/ 8400 batches | train loss 0.4441162 +| epoch 1 | 6523/ 8400 batches | train loss 0.4300187 +| epoch 1 | 6527/ 8400 batches | train loss 0.3946513 +| epoch 1 | 6531/ 8400 batches | train loss 0.4721371 +| epoch 1 | 6535/ 8400 batches | train loss 0.4675421 +| epoch 1 | 6539/ 8400 batches | train loss 0.4147860 +| epoch 1 | 6543/ 8400 batches | train loss 0.5291048 +| epoch 1 | 6547/ 8400 batches | train loss 0.4585257 +| epoch 1 | 6551/ 8400 batches | train loss 0.4899148 +| epoch 1 | 6555/ 8400 batches | train loss 0.4412758 +| epoch 1 | 6559/ 8400 batches | train loss 0.4633411 +| epoch 1 | 6563/ 8400 batches | train loss 0.4763827 +| epoch 1 | 6567/ 8400 batches | train loss 0.4619993 +| epoch 1 | 6571/ 8400 batches | train loss 0.4587455 +| epoch 1 | 6575/ 8400 batches | train loss 0.4927588 +| epoch 1 | 6579/ 8400 batches | train loss 0.5493081 +| epoch 1 | 6583/ 8400 batches | train loss 0.3479237 +| epoch 1 | 6587/ 8400 batches | train loss 0.5100950 +| epoch 1 | 6591/ 8400 batches | train loss 0.4020259 +| epoch 1 | 6595/ 8400 batches | train loss 0.5382004 +| epoch 1 | 6599/ 8400 batches | train loss 0.4121324 +| epoch 1 | 6603/ 8400 batches | train loss 0.4764659 +| epoch 1 | 6607/ 8400 batches | train loss 0.4100456 +| epoch 1 | 6611/ 8400 batches | train loss 0.4687536 +| epoch 1 | 6615/ 8400 batches | train loss 0.4745485 +| epoch 1 | 6619/ 8400 batches | train loss 0.5694058 +| epoch 1 | 6623/ 8400 batches | train loss 0.5172293 +| epoch 1 | 6627/ 8400 batches | train loss 0.4311709 +| epoch 1 | 6631/ 8400 batches | train loss 0.4502977 +| epoch 1 | 6635/ 8400 batches | train loss 0.4929428 +| epoch 1 | 6639/ 8400 batches | train loss 0.3788619 +| epoch 1 | 6643/ 8400 batches | train loss 0.3915944 +| epoch 1 | 6647/ 8400 batches | train loss 0.4061496 +| epoch 1 | 6651/ 8400 batches | train loss 0.4450553 +| epoch 1 | 6655/ 8400 batches | train loss 0.4530658 +| epoch 1 | 6659/ 8400 batches | train loss 0.5095595 +| epoch 1 | 6663/ 8400 batches | train loss 0.4523582 +| epoch 1 | 6667/ 8400 batches | train loss 0.4139188 +| epoch 1 | 6671/ 8400 batches | train loss 0.3512051 +| epoch 1 | 6675/ 8400 batches | train loss 0.4147882 +| epoch 1 | 6679/ 8400 batches | train loss 0.4917488 +| epoch 1 | 6683/ 8400 batches | train loss 0.4552802 +| epoch 1 | 6687/ 8400 batches | train loss 0.6225734 +| epoch 1 | 6691/ 8400 batches | train loss 0.3656864 +| epoch 1 | 6695/ 8400 batches | train loss 0.5280483 +| epoch 1 | 6699/ 8400 batches | train loss 0.5187976 +| epoch 1 | 6703/ 8400 batches | train loss 0.4363151 +| epoch 1 | 6707/ 8400 batches | train loss 0.6152350 +| epoch 1 | 6711/ 8400 batches | train loss 0.4889978 +| epoch 1 | 6715/ 8400 batches | train loss 0.4268501 +| epoch 1 | 6719/ 8400 batches | train loss 0.4209687 +| epoch 1 | 6723/ 8400 batches | train loss 0.4842423 +| epoch 1 | 6727/ 8400 batches | train loss 0.4924573 +| epoch 1 | 6731/ 8400 batches | train loss 0.4563768 +| epoch 1 | 6735/ 8400 batches | train loss 0.4597422 +| epoch 1 | 6739/ 8400 batches | train loss 0.5243496 +| epoch 1 | 6743/ 8400 batches | train loss 0.3844787 +| epoch 1 | 6747/ 8400 batches | train loss 0.4872409 +| epoch 1 | 6751/ 8400 batches | train loss 0.4325805 +| epoch 1 | 6755/ 8400 batches | train loss 0.3610150 +| epoch 1 | 6759/ 8400 batches | train loss 0.5207784 +| epoch 1 | 6763/ 8400 batches | train loss 0.4329050 +| epoch 1 | 6767/ 8400 batches | train loss 0.5699141 +| epoch 1 | 6771/ 8400 batches | train loss 0.5057760 +| epoch 1 | 6775/ 8400 batches | train loss 0.5416384 +| epoch 1 | 6779/ 8400 batches | train loss 0.3987611 +| epoch 1 | 6783/ 8400 batches | train loss 0.4172726 +| epoch 1 | 6787/ 8400 batches | train loss 0.4347487 +| epoch 1 | 6791/ 8400 batches | train loss 0.4536734 +| epoch 1 | 6795/ 8400 batches | train loss 0.4144299 +| epoch 1 | 6799/ 8400 batches | train loss 0.3638328 +| epoch 1 | 6803/ 8400 batches | train loss 0.4657812 +| epoch 1 | 6807/ 8400 batches | train loss 0.4772257 +| epoch 1 | 6811/ 8400 batches | train loss 0.5364709 +| epoch 1 | 6815/ 8400 batches | train loss 0.4246632 +| epoch 1 | 6819/ 8400 batches | train loss 0.4996827 +| epoch 1 | 6823/ 8400 batches | train loss 0.3864933 +| epoch 1 | 6827/ 8400 batches | train loss 0.4680707 +| epoch 1 | 6831/ 8400 batches | train loss 0.4002380 +| epoch 1 | 6835/ 8400 batches | train loss 0.4581874 +| epoch 1 | 6839/ 8400 batches | train loss 0.4073775 +| epoch 1 | 6843/ 8400 batches | train loss 0.5527627 +| epoch 1 | 6847/ 8400 batches | train loss 0.5319881 +| epoch 1 | 6851/ 8400 batches | train loss 0.3945013 +| epoch 1 | 6855/ 8400 batches | train loss 0.3709638 +| epoch 1 | 6859/ 8400 batches | train loss 0.3467074 +| epoch 1 | 6863/ 8400 batches | train loss 0.4874997 +| epoch 1 | 6867/ 8400 batches | train loss 0.4279041 +| epoch 1 | 6871/ 8400 batches | train loss 0.5738242 +| epoch 1 | 6875/ 8400 batches | train loss 0.4690358 +| epoch 1 | 6879/ 8400 batches | train loss 0.4096854 +| epoch 1 | 6883/ 8400 batches | train loss 0.4271510 +| epoch 1 | 6887/ 8400 batches | train loss 0.4447379 +| epoch 1 | 6891/ 8400 batches | train loss 0.4631678 +| epoch 1 | 6895/ 8400 batches | train loss 0.4321140 +| epoch 1 | 6899/ 8400 batches | train loss 0.4156134 +| epoch 1 | 6903/ 8400 batches | train loss 0.4249282 +| epoch 1 | 6907/ 8400 batches | train loss 0.3474240 +| epoch 1 | 6911/ 8400 batches | train loss 0.3746853 +| epoch 1 | 6915/ 8400 batches | train loss 0.3783233 +| epoch 1 | 6919/ 8400 batches | train loss 0.5121001 +| epoch 1 | 6923/ 8400 batches | train loss 0.3987966 +| epoch 1 | 6927/ 8400 batches | train loss 0.4853851 +| epoch 1 | 6931/ 8400 batches | train loss 0.3787100 +| epoch 1 | 6935/ 8400 batches | train loss 0.4427187 +| epoch 1 | 6939/ 8400 batches | train loss 0.4999230 +| epoch 1 | 6943/ 8400 batches | train loss 0.3583921 +| epoch 1 | 6947/ 8400 batches | train loss 0.3855801 +| epoch 1 | 6951/ 8400 batches | train loss 0.4117479 +| epoch 1 | 6955/ 8400 batches | train loss 0.4479932 +| epoch 1 | 6959/ 8400 batches | train loss 0.5975602 +| epoch 1 | 6963/ 8400 batches | train loss 0.4501027 +| epoch 1 | 6967/ 8400 batches | train loss 0.4818839 +| epoch 1 | 6971/ 8400 batches | train loss 0.4427651 +| epoch 1 | 6975/ 8400 batches | train loss 0.4178280 +| epoch 1 | 6979/ 8400 batches | train loss 0.4133306 +| epoch 1 | 6983/ 8400 batches | train loss 0.4481505 +| epoch 1 | 6987/ 8400 batches | train loss 0.4813995 +| epoch 1 | 6991/ 8400 batches | train loss 0.4195528 +| epoch 1 | 6995/ 8400 batches | train loss 0.3465980 +| epoch 1 | 6999/ 8400 batches | train loss 0.3786083 +| epoch 1 | 7003/ 8400 batches | train loss 0.5209238 +| epoch 1 | 7007/ 8400 batches | train loss 0.4729561 +| epoch 1 | 7011/ 8400 batches | train loss 0.4561760 +| epoch 1 | 7015/ 8400 batches | train loss 0.4193572 +| epoch 1 | 7019/ 8400 batches | train loss 0.5189550 +| epoch 1 | 7023/ 8400 batches | train loss 0.4350259 +| epoch 1 | 7027/ 8400 batches | train loss 0.3933221 +| epoch 1 | 7031/ 8400 batches | train loss 0.3791689 +| epoch 1 | 7035/ 8400 batches | train loss 0.4870338 +| epoch 1 | 7039/ 8400 batches | train loss 0.4222757 +| epoch 1 | 7043/ 8400 batches | train loss 0.3973576 +| epoch 1 | 7047/ 8400 batches | train loss 0.4595805 +| epoch 1 | 7051/ 8400 batches | train loss 0.4656561 +| epoch 1 | 7055/ 8400 batches | train loss 0.4176304 +| epoch 1 | 7059/ 8400 batches | train loss 0.4611502 +| epoch 1 | 7063/ 8400 batches | train loss 0.4001074 +| epoch 1 | 7067/ 8400 batches | train loss 0.4664544 +| epoch 1 | 7071/ 8400 batches | train loss 0.3084026 +| epoch 1 | 7075/ 8400 batches | train loss 0.4614359 +| epoch 1 | 7079/ 8400 batches | train loss 0.4518164 +| epoch 1 | 7083/ 8400 batches | train loss 0.3923690 +| epoch 1 | 7087/ 8400 batches | train loss 0.4460535 +| epoch 1 | 7091/ 8400 batches | train loss 0.4601390 +| epoch 1 | 7095/ 8400 batches | train loss 0.4934961 +| epoch 1 | 7099/ 8400 batches | train loss 0.4441712 +| epoch 1 | 7103/ 8400 batches | train loss 0.4811309 +| epoch 1 | 7107/ 8400 batches | train loss 0.4484901 +| epoch 1 | 7111/ 8400 batches | train loss 0.3244506 +| epoch 1 | 7115/ 8400 batches | train loss 0.4914928 +| epoch 1 | 7119/ 8400 batches | train loss 0.4441727 +| epoch 1 | 7123/ 8400 batches | train loss 0.4177953 +| epoch 1 | 7127/ 8400 batches | train loss 0.4488258 +| epoch 1 | 7131/ 8400 batches | train loss 0.3869311 +| epoch 1 | 7135/ 8400 batches | train loss 0.2828896 +| epoch 1 | 7139/ 8400 batches | train loss 0.4726456 +| epoch 1 | 7143/ 8400 batches | train loss 0.4994661 +| epoch 1 | 7147/ 8400 batches | train loss 0.3834285 +| epoch 1 | 7151/ 8400 batches | train loss 0.3564325 +| epoch 1 | 7155/ 8400 batches | train loss 0.4280598 +| epoch 1 | 7159/ 8400 batches | train loss 0.4958749 +| epoch 1 | 7163/ 8400 batches | train loss 0.4085928 +| epoch 1 | 7167/ 8400 batches | train loss 0.4167763 +| epoch 1 | 7171/ 8400 batches | train loss 0.4705018 +| epoch 1 | 7175/ 8400 batches | train loss 0.4508343 +| epoch 1 | 7179/ 8400 batches | train loss 0.4826366 +| epoch 1 | 7183/ 8400 batches | train loss 0.3373193 +| epoch 1 | 7187/ 8400 batches | train loss 0.4649849 +| epoch 1 | 7191/ 8400 batches | train loss 0.4260714 +| epoch 1 | 7195/ 8400 batches | train loss 0.3833188 +| epoch 1 | 7199/ 8400 batches | train loss 0.4797786 +| epoch 1 | 7203/ 8400 batches | train loss 0.4231750 +| epoch 1 | 7207/ 8400 batches | train loss 0.4410708 +| epoch 1 | 7211/ 8400 batches | train loss 0.5511764 +| epoch 1 | 7215/ 8400 batches | train loss 0.4317606 +| epoch 1 | 7219/ 8400 batches | train loss 0.3700871 +| epoch 1 | 7223/ 8400 batches | train loss 0.3858262 +| epoch 1 | 7227/ 8400 batches | train loss 0.3748515 +| epoch 1 | 7231/ 8400 batches | train loss 0.5622088 +| epoch 1 | 7235/ 8400 batches | train loss 0.4072186 +| epoch 1 | 7239/ 8400 batches | train loss 0.5250236 +| epoch 1 | 7243/ 8400 batches | train loss 0.3897208 +| epoch 1 | 7247/ 8400 batches | train loss 0.4258752 +| epoch 1 | 7251/ 8400 batches | train loss 0.4185781 +| epoch 1 | 7255/ 8400 batches | train loss 0.4595487 +| epoch 1 | 7259/ 8400 batches | train loss 0.4146007 +| epoch 1 | 7263/ 8400 batches | train loss 0.4359249 +| epoch 1 | 7267/ 8400 batches | train loss 0.4846158 +| epoch 1 | 7271/ 8400 batches | train loss 0.4483172 +| epoch 1 | 7275/ 8400 batches | train loss 0.3735496 +| epoch 1 | 7279/ 8400 batches | train loss 0.4480788 +| epoch 1 | 7283/ 8400 batches | train loss 0.4058948 +| epoch 1 | 7287/ 8400 batches | train loss 0.3740267 +| epoch 1 | 7291/ 8400 batches | train loss 0.5232893 +| epoch 1 | 7295/ 8400 batches | train loss 0.5147496 +| epoch 1 | 7299/ 8400 batches | train loss 0.3686317 +| epoch 1 | 7303/ 8400 batches | train loss 0.4375692 +| epoch 1 | 7307/ 8400 batches | train loss 0.3658991 +| epoch 1 | 7311/ 8400 batches | train loss 0.4622201 +| epoch 1 | 7315/ 8400 batches | train loss 0.4542083 +| epoch 1 | 7319/ 8400 batches | train loss 0.4285823 +| epoch 1 | 7323/ 8400 batches | train loss 0.4292324 +| epoch 1 | 7327/ 8400 batches | train loss 0.4668564 +| epoch 1 | 7331/ 8400 batches | train loss 0.5429322 +| epoch 1 | 7335/ 8400 batches | train loss 0.4330196 +| epoch 1 | 7339/ 8400 batches | train loss 0.3920352 +| epoch 1 | 7343/ 8400 batches | train loss 0.3852217 +| epoch 1 | 7347/ 8400 batches | train loss 0.4453635 +| epoch 1 | 7351/ 8400 batches | train loss 0.4757722 +| epoch 1 | 7355/ 8400 batches | train loss 0.5891298 +| epoch 1 | 7359/ 8400 batches | train loss 0.4718651 +| epoch 1 | 7363/ 8400 batches | train loss 0.4504113 +| epoch 1 | 7367/ 8400 batches | train loss 0.4723446 +| epoch 1 | 7371/ 8400 batches | train loss 0.4507155 +| epoch 1 | 7375/ 8400 batches | train loss 0.4803711 +| epoch 1 | 7379/ 8400 batches | train loss 0.4267612 +| epoch 1 | 7383/ 8400 batches | train loss 0.4689665 +| epoch 1 | 7387/ 8400 batches | train loss 0.4537187 +| epoch 1 | 7391/ 8400 batches | train loss 0.4469180 +| epoch 1 | 7395/ 8400 batches | train loss 0.4833167 +| epoch 1 | 7399/ 8400 batches | train loss 0.4385665 +| epoch 1 | 7403/ 8400 batches | train loss 0.4140771 +| epoch 1 | 7407/ 8400 batches | train loss 0.5013804 +| epoch 1 | 7411/ 8400 batches | train loss 0.5149891 +| epoch 1 | 7415/ 8400 batches | train loss 0.4968609 +| epoch 1 | 7419/ 8400 batches | train loss 0.5674980 +| epoch 1 | 7423/ 8400 batches | train loss 0.3908171 +| epoch 1 | 7427/ 8400 batches | train loss 0.6025736 +| epoch 1 | 7431/ 8400 batches | train loss 0.4973876 +| epoch 1 | 7435/ 8400 batches | train loss 0.4433455 +| epoch 1 | 7439/ 8400 batches | train loss 0.4763227 +| epoch 1 | 7443/ 8400 batches | train loss 0.3497389 +| epoch 1 | 7447/ 8400 batches | train loss 0.4892035 +| epoch 1 | 7451/ 8400 batches | train loss 0.4276944 +| epoch 1 | 7455/ 8400 batches | train loss 0.4587146 +| epoch 1 | 7459/ 8400 batches | train loss 0.4551089 +| epoch 1 | 7463/ 8400 batches | train loss 0.4765542 +| epoch 1 | 7467/ 8400 batches | train loss 0.4795183 +| epoch 1 | 7471/ 8400 batches | train loss 0.4306113 +| epoch 1 | 7475/ 8400 batches | train loss 0.4165007 +| epoch 1 | 7479/ 8400 batches | train loss 0.4859285 +| epoch 1 | 7483/ 8400 batches | train loss 0.3705177 +| epoch 1 | 7487/ 8400 batches | train loss 0.4562034 +| epoch 1 | 7491/ 8400 batches | train loss 0.5210373 +| epoch 1 | 7495/ 8400 batches | train loss 0.4689500 +| epoch 1 | 7499/ 8400 batches | train loss 0.3926866 +| epoch 1 | 7503/ 8400 batches | train loss 0.4369030 +| epoch 1 | 7507/ 8400 batches | train loss 0.4149488 +| epoch 1 | 7511/ 8400 batches | train loss 0.3794411 +| epoch 1 | 7515/ 8400 batches | train loss 0.4628645 +| epoch 1 | 7519/ 8400 batches | train loss 0.5167254 +| epoch 1 | 7523/ 8400 batches | train loss 0.4660283 +| epoch 1 | 7527/ 8400 batches | train loss 0.4531442 +| epoch 1 | 7531/ 8400 batches | train loss 0.3802334 +| epoch 1 | 7535/ 8400 batches | train loss 0.3905609 +| epoch 1 | 7539/ 8400 batches | train loss 0.4001489 +| epoch 1 | 7543/ 8400 batches | train loss 0.5342294 +| epoch 1 | 7547/ 8400 batches | train loss 0.4177360 +| epoch 1 | 7551/ 8400 batches | train loss 0.4595823 +| epoch 1 | 7555/ 8400 batches | train loss 0.3805891 +| epoch 1 | 7559/ 8400 batches | train loss 0.4874983 +| epoch 1 | 7563/ 8400 batches | train loss 0.3738230 +| epoch 1 | 7567/ 8400 batches | train loss 0.5222951 +| epoch 1 | 7571/ 8400 batches | train loss 0.4517268 +| epoch 1 | 7575/ 8400 batches | train loss 0.4878256 +| epoch 1 | 7579/ 8400 batches | train loss 0.5124147 +| epoch 1 | 7583/ 8400 batches | train loss 0.4799433 +| epoch 1 | 7587/ 8400 batches | train loss 0.4313918 +| epoch 1 | 7591/ 8400 batches | train loss 0.3704206 +| epoch 1 | 7595/ 8400 batches | train loss 0.5546154 +| epoch 1 | 7599/ 8400 batches | train loss 0.4408769 +| epoch 1 | 7603/ 8400 batches | train loss 0.5090649 +| epoch 1 | 7607/ 8400 batches | train loss 0.3603784 +| epoch 1 | 7611/ 8400 batches | train loss 0.3701495 +| epoch 1 | 7615/ 8400 batches | train loss 0.5243537 +| epoch 1 | 7619/ 8400 batches | train loss 0.4008598 +| epoch 1 | 7623/ 8400 batches | train loss 0.4410078 +| epoch 1 | 7627/ 8400 batches | train loss 0.5019155 +| epoch 1 | 7631/ 8400 batches | train loss 0.4153562 +| epoch 1 | 7635/ 8400 batches | train loss 0.5265166 +| epoch 1 | 7639/ 8400 batches | train loss 0.4331118 +| epoch 1 | 7643/ 8400 batches | train loss 0.5185388 +| epoch 1 | 7647/ 8400 batches | train loss 0.4282379 +| epoch 1 | 7651/ 8400 batches | train loss 0.4562352 +| epoch 1 | 7655/ 8400 batches | train loss 0.3896827 +| epoch 1 | 7659/ 8400 batches | train loss 0.5321616 +| epoch 1 | 7663/ 8400 batches | train loss 0.3753114 +| epoch 1 | 7667/ 8400 batches | train loss 0.5779969 +| epoch 1 | 7671/ 8400 batches | train loss 0.4479160 +| epoch 1 | 7675/ 8400 batches | train loss 0.4880095 +| epoch 1 | 7679/ 8400 batches | train loss 0.4364680 +| epoch 1 | 7683/ 8400 batches | train loss 0.4659341 +| epoch 1 | 7687/ 8400 batches | train loss 0.3484539 +| epoch 1 | 7691/ 8400 batches | train loss 0.4322085 +| epoch 1 | 7695/ 8400 batches | train loss 0.4874034 +| epoch 1 | 7699/ 8400 batches | train loss 0.5667990 +| epoch 1 | 7703/ 8400 batches | train loss 0.3894998 +| epoch 1 | 7707/ 8400 batches | train loss 0.5352186 +| epoch 1 | 7711/ 8400 batches | train loss 0.5111098 +| epoch 1 | 7715/ 8400 batches | train loss 0.3983191 +| epoch 1 | 7719/ 8400 batches | train loss 0.4322805 +| epoch 1 | 7723/ 8400 batches | train loss 0.5313352 +| epoch 1 | 7727/ 8400 batches | train loss 0.4300873 +| epoch 1 | 7731/ 8400 batches | train loss 0.4933465 +| epoch 1 | 7735/ 8400 batches | train loss 0.4922213 +| epoch 1 | 7739/ 8400 batches | train loss 0.4650442 +| epoch 1 | 7743/ 8400 batches | train loss 0.4728348 +| epoch 1 | 7747/ 8400 batches | train loss 0.4044047 +| epoch 1 | 7751/ 8400 batches | train loss 0.4500594 +| epoch 1 | 7755/ 8400 batches | train loss 0.3327878 +| epoch 1 | 7759/ 8400 batches | train loss 0.4000096 +| epoch 1 | 7763/ 8400 batches | train loss 0.4148018 +| epoch 1 | 7767/ 8400 batches | train loss 0.4053963 +| epoch 1 | 7771/ 8400 batches | train loss 0.4506460 +| epoch 1 | 7775/ 8400 batches | train loss 0.4373361 +| epoch 1 | 7779/ 8400 batches | train loss 0.5277637 +| epoch 1 | 7783/ 8400 batches | train loss 0.4552751 +| epoch 1 | 7787/ 8400 batches | train loss 0.4480605 +| epoch 1 | 7791/ 8400 batches | train loss 0.4683036 +| epoch 1 | 7795/ 8400 batches | train loss 0.4032449 +| epoch 1 | 7799/ 8400 batches | train loss 0.4956932 +| epoch 1 | 7803/ 8400 batches | train loss 0.4594486 +| epoch 1 | 7807/ 8400 batches | train loss 0.3576079 +| epoch 1 | 7811/ 8400 batches | train loss 0.4736848 +| epoch 1 | 7815/ 8400 batches | train loss 0.4844656 +| epoch 1 | 7819/ 8400 batches | train loss 0.4484360 +| epoch 1 | 7823/ 8400 batches | train loss 0.4859839 +| epoch 1 | 7827/ 8400 batches | train loss 0.5024896 +| epoch 1 | 7831/ 8400 batches | train loss 0.5522319 +| epoch 1 | 7835/ 8400 batches | train loss 0.5191646 +| epoch 1 | 7839/ 8400 batches | train loss 0.4749673 +| epoch 1 | 7843/ 8400 batches | train loss 0.4495926 +| epoch 1 | 7847/ 8400 batches | train loss 0.4993116 +| epoch 1 | 7851/ 8400 batches | train loss 0.3601722 +| epoch 1 | 7855/ 8400 batches | train loss 0.4335921 +| epoch 1 | 7859/ 8400 batches | train loss 0.4472336 +| epoch 1 | 7863/ 8400 batches | train loss 0.4913398 +| epoch 1 | 7867/ 8400 batches | train loss 0.4319037 +| epoch 1 | 7871/ 8400 batches | train loss 0.4829372 +| epoch 1 | 7875/ 8400 batches | train loss 0.3522757 +| epoch 1 | 7879/ 8400 batches | train loss 0.5306671 +| epoch 1 | 7883/ 8400 batches | train loss 0.3861583 +| epoch 1 | 7887/ 8400 batches | train loss 0.4099780 +| epoch 1 | 7891/ 8400 batches | train loss 0.3868110 +| epoch 1 | 7895/ 8400 batches | train loss 0.4660000 +| epoch 1 | 7899/ 8400 batches | train loss 0.4998291 +| epoch 1 | 7903/ 8400 batches | train loss 0.3750406 +| epoch 1 | 7907/ 8400 batches | train loss 0.3926627 +| epoch 1 | 7911/ 8400 batches | train loss 0.5133125 +| epoch 1 | 7915/ 8400 batches | train loss 0.4782287 +| epoch 1 | 7919/ 8400 batches | train loss 0.3808875 +| epoch 1 | 7923/ 8400 batches | train loss 0.4735874 +| epoch 1 | 7927/ 8400 batches | train loss 0.3996385 +| epoch 1 | 7931/ 8400 batches | train loss 0.3660167 +| epoch 1 | 7935/ 8400 batches | train loss 0.5268406 +| epoch 1 | 7939/ 8400 batches | train loss 0.4431172 +| epoch 1 | 7943/ 8400 batches | train loss 0.3877064 +| epoch 1 | 7947/ 8400 batches | train loss 0.4678858 +| epoch 1 | 7951/ 8400 batches | train loss 0.4751356 +| epoch 1 | 7955/ 8400 batches | train loss 0.5129949 +| epoch 1 | 7959/ 8400 batches | train loss 0.4516087 +| epoch 1 | 7963/ 8400 batches | train loss 0.4866477 +| epoch 1 | 7967/ 8400 batches | train loss 0.4543255 +| epoch 1 | 7971/ 8400 batches | train loss 0.4486284 +| epoch 1 | 7975/ 8400 batches | train loss 0.4324662 +| epoch 1 | 7979/ 8400 batches | train loss 0.4097517 +| epoch 1 | 7983/ 8400 batches | train loss 0.3423865 +| epoch 1 | 7987/ 8400 batches | train loss 0.3991675 +| epoch 1 | 7991/ 8400 batches | train loss 0.3603950 +| epoch 1 | 7995/ 8400 batches | train loss 0.4477684 +| epoch 1 | 7999/ 8400 batches | train loss 0.4880701 +| epoch 1 | 8003/ 8400 batches | train loss 0.5287133 +| epoch 1 | 8007/ 8400 batches | train loss 0.3818289 +| epoch 1 | 8011/ 8400 batches | train loss 0.3360689 +| epoch 1 | 8015/ 8400 batches | train loss 0.4769565 +| epoch 1 | 8019/ 8400 batches | train loss 0.5418832 +| epoch 1 | 8023/ 8400 batches | train loss 0.4533626 +| epoch 1 | 8027/ 8400 batches | train loss 0.4932953 +| epoch 1 | 8031/ 8400 batches | train loss 0.4769718 +| epoch 1 | 8035/ 8400 batches | train loss 0.4680414 +| epoch 1 | 8039/ 8400 batches | train loss 0.5007267 +| epoch 1 | 8043/ 8400 batches | train loss 0.4445797 +| epoch 1 | 8047/ 8400 batches | train loss 0.5199655 +| epoch 1 | 8051/ 8400 batches | train loss 0.4056185 +| epoch 1 | 8055/ 8400 batches | train loss 0.3908507 +| epoch 1 | 8059/ 8400 batches | train loss 0.4819883 +| epoch 1 | 8063/ 8400 batches | train loss 0.4366879 +| epoch 1 | 8067/ 8400 batches | train loss 0.3591675 +| epoch 1 | 8071/ 8400 batches | train loss 0.4275354 +| epoch 1 | 8075/ 8400 batches | train loss 0.3583116 +| epoch 1 | 8079/ 8400 batches | train loss 0.5050251 +| epoch 1 | 8083/ 8400 batches | train loss 0.5527714 +| epoch 1 | 8087/ 8400 batches | train loss 0.3840241 +| epoch 1 | 8091/ 8400 batches | train loss 0.4537689 +| epoch 1 | 8095/ 8400 batches | train loss 0.4507077 +| epoch 1 | 8099/ 8400 batches | train loss 0.5142478 +| epoch 1 | 8103/ 8400 batches | train loss 0.4012267 +| epoch 1 | 8107/ 8400 batches | train loss 0.4150855 +| epoch 1 | 8111/ 8400 batches | train loss 0.4353622 +| epoch 1 | 8115/ 8400 batches | train loss 0.4758431 +| epoch 1 | 8119/ 8400 batches | train loss 0.5167227 +| epoch 1 | 8123/ 8400 batches | train loss 0.4691521 +| epoch 1 | 8127/ 8400 batches | train loss 0.3723203 +| epoch 1 | 8131/ 8400 batches | train loss 0.4296868 +| epoch 1 | 8135/ 8400 batches | train loss 0.4220769 +| epoch 1 | 8139/ 8400 batches | train loss 0.3882474 +| epoch 1 | 8143/ 8400 batches | train loss 0.2836920 +| epoch 1 | 8147/ 8400 batches | train loss 0.4453808 +| epoch 1 | 8151/ 8400 batches | train loss 0.3898237 +| epoch 1 | 8155/ 8400 batches | train loss 0.4779664 +| epoch 1 | 8159/ 8400 batches | train loss 0.4865519 +| epoch 1 | 8163/ 8400 batches | train loss 0.4350986 +| epoch 1 | 8167/ 8400 batches | train loss 0.4451621 +| epoch 1 | 8171/ 8400 batches | train loss 0.4832058 +| epoch 1 | 8175/ 8400 batches | train loss 0.3531625 +| epoch 1 | 8179/ 8400 batches | train loss 0.4985535 +| epoch 1 | 8183/ 8400 batches | train loss 0.4608809 +| epoch 1 | 8187/ 8400 batches | train loss 0.4868069 +| epoch 1 | 8191/ 8400 batches | train loss 0.4462755 +| epoch 1 | 8195/ 8400 batches | train loss 0.4345226 +| epoch 1 | 8199/ 8400 batches | train loss 0.4773512 +| epoch 1 | 8203/ 8400 batches | train loss 0.3902143 +| epoch 1 | 8207/ 8400 batches | train loss 0.4697193 +| epoch 1 | 8211/ 8400 batches | train loss 0.4326152 +| epoch 1 | 8215/ 8400 batches | train loss 0.4625081 +| epoch 1 | 8219/ 8400 batches | train loss 0.3508705 +| epoch 1 | 8223/ 8400 batches | train loss 0.3874056 +| epoch 1 | 8227/ 8400 batches | train loss 0.5449859 +| epoch 1 | 8231/ 8400 batches | train loss 0.4324108 +| epoch 1 | 8235/ 8400 batches | train loss 0.4734832 +| epoch 1 | 8239/ 8400 batches | train loss 0.5034553 +| epoch 1 | 8243/ 8400 batches | train loss 0.3607929 +| epoch 1 | 8247/ 8400 batches | train loss 0.4763999 +| epoch 1 | 8251/ 8400 batches | train loss 0.3859798 +| epoch 1 | 8255/ 8400 batches | train loss 0.4544117 +| epoch 1 | 8259/ 8400 batches | train loss 0.4959441 +| epoch 1 | 8263/ 8400 batches | train loss 0.4970060 +| epoch 1 | 8267/ 8400 batches | train loss 0.4176994 +| epoch 1 | 8271/ 8400 batches | train loss 0.6589104 +| epoch 1 | 8275/ 8400 batches | train loss 0.4736611 +| epoch 1 | 8279/ 8400 batches | train loss 0.4492478 +| epoch 1 | 8283/ 8400 batches | train loss 0.4864790 +| epoch 1 | 8287/ 8400 batches | train loss 0.4272135 +| epoch 1 | 8291/ 8400 batches | train loss 0.4843382 +| epoch 1 | 8295/ 8400 batches | train loss 0.5195031 +| epoch 1 | 8299/ 8400 batches | train loss 0.4909213 +| epoch 1 | 8303/ 8400 batches | train loss 0.4845319 +| epoch 1 | 8307/ 8400 batches | train loss 0.5074868 +| epoch 1 | 8311/ 8400 batches | train loss 0.4715491 +| epoch 1 | 8315/ 8400 batches | train loss 0.5284193 +| epoch 1 | 8319/ 8400 batches | train loss 0.4199622 +| epoch 1 | 8323/ 8400 batches | train loss 0.4678216 +| epoch 1 | 8327/ 8400 batches | train loss 0.5096623 +| epoch 1 | 8331/ 8400 batches | train loss 0.4499186 +| epoch 1 | 8335/ 8400 batches | train loss 0.4908878 +| epoch 1 | 8339/ 8400 batches | train loss 0.4055032 +| epoch 1 | 8343/ 8400 batches | train loss 0.4754381 +| epoch 1 | 8347/ 8400 batches | train loss 0.4540998 +| epoch 1 | 8351/ 8400 batches | train loss 0.3579943 +| epoch 1 | 8355/ 8400 batches | train loss 0.4712056 +| epoch 1 | 8359/ 8400 batches | train loss 0.4465890 +| epoch 1 | 8363/ 8400 batches | train loss 0.4174885 +| epoch 1 | 8367/ 8400 batches | train loss 0.4545445 +| epoch 1 | 8371/ 8400 batches | train loss 0.4829674 +| epoch 1 | 8375/ 8400 batches | train loss 0.4380612 +| epoch 1 | 8379/ 8400 batches | train loss 0.4987023 +| epoch 1 | 8383/ 8400 batches | train loss 0.4698458 +| epoch 1 | 8387/ 8400 batches | train loss 0.3493424 +| epoch 1 | 8391/ 8400 batches | train loss 0.4615623 +| epoch 1 | 8395/ 8400 batches | train loss 0.4845327 +| epoch 1 | 8399/ 8400 batches | train loss 0.4973143 +-------------------------------------------------------------------------------- +| epoch 1 | 3/ 8400 batches | test loss 0.4845752 +| epoch 1 | 7/ 8400 batches | test loss 0.4666330 +| epoch 1 | 11/ 8400 batches | test loss 0.4023624 +| epoch 1 | 15/ 8400 batches | test loss 0.4311722 +| epoch 1 | 19/ 8400 batches | test loss 0.4401434 +| epoch 1 | 23/ 8400 batches | test loss 0.5492075 +| epoch 1 | 27/ 8400 batches | test loss 0.4528476 +| epoch 1 | 31/ 8400 batches | test loss 0.4697376 +| epoch 1 | 35/ 8400 batches | test loss 0.3869589 +| epoch 1 | 39/ 8400 batches | test loss 0.4517528 +| epoch 1 | 43/ 8400 batches | test loss 0.4666867 +| epoch 1 | 47/ 8400 batches | test loss 0.4554989 +| epoch 1 | 51/ 8400 batches | test loss 0.4835966 +| epoch 1 | 55/ 8400 batches | test loss 0.4412486 +| epoch 1 | 59/ 8400 batches | test loss 0.5511144 +| epoch 1 | 63/ 8400 batches | test loss 0.4472940 +| epoch 1 | 67/ 8400 batches | test loss 0.4822085 +| epoch 1 | 71/ 8400 batches | test loss 0.4133731 +| epoch 1 | 75/ 8400 batches | test loss 0.4455756 +| epoch 1 | 79/ 8400 batches | test loss 0.5012200 +| epoch 1 | 83/ 8400 batches | test loss 0.4480742 +| epoch 1 | 87/ 8400 batches | test loss 0.4225108 +| epoch 1 | 91/ 8400 batches | test loss 0.3754160 +| epoch 1 | 95/ 8400 batches | test loss 0.3856375 +| epoch 1 | 99/ 8400 batches | test loss 0.3955852 +| epoch 1 | 103/ 8400 batches | test loss 0.4912597 +| epoch 1 | 107/ 8400 batches | test loss 0.4420191 +| epoch 1 | 111/ 8400 batches | test loss 0.5266329 +| epoch 1 | 115/ 8400 batches | test loss 0.4449713 +| epoch 1 | 119/ 8400 batches | test loss 0.5293062 +| epoch 1 | 123/ 8400 batches | test loss 0.4547258 +| epoch 1 | 127/ 8400 batches | test loss 0.5170597 +| epoch 1 | 131/ 8400 batches | test loss 0.4623421 +| epoch 1 | 135/ 8400 batches | test loss 0.4688426 +| epoch 1 | 139/ 8400 batches | test loss 0.3450702 +| epoch 1 | 143/ 8400 batches | test loss 0.4009253 +| epoch 1 | 147/ 8400 batches | test loss 0.4532825 +| epoch 1 | 151/ 8400 batches | test loss 0.4594821 +| epoch 1 | 155/ 8400 batches | test loss 0.4534600 +| epoch 1 | 159/ 8400 batches | test loss 0.3692493 +| epoch 1 | 163/ 8400 batches | test loss 0.5189956 +| epoch 1 | 167/ 8400 batches | test loss 0.4229672 +| epoch 1 | 171/ 8400 batches | test loss 0.5113473 +| epoch 1 | 175/ 8400 batches | test loss 0.5138022 +| epoch 1 | 179/ 8400 batches | test loss 0.3847191 +| epoch 1 | 183/ 8400 batches | test loss 0.5060949 +| epoch 1 | 187/ 8400 batches | test loss 0.4452364 +| epoch 1 | 191/ 8400 batches | test loss 0.5685749 +| epoch 1 | 195/ 8400 batches | test loss 0.4548888 +| epoch 1 | 199/ 8400 batches | test loss 0.4681808 +| epoch 1 | 203/ 8400 batches | test loss 0.4912786 +| epoch 1 | 207/ 8400 batches | test loss 0.3796635 +| epoch 1 | 211/ 8400 batches | test loss 0.5049571 +| epoch 1 | 215/ 8400 batches | test loss 0.5058566 +| epoch 1 | 219/ 8400 batches | test loss 0.4191142 +| epoch 1 | 223/ 8400 batches | test loss 0.3757533 +| epoch 1 | 227/ 8400 batches | test loss 0.5693258 +| epoch 1 | 231/ 8400 batches | test loss 0.3372322 +| epoch 1 | 235/ 8400 batches | test loss 0.3838400 +| epoch 1 | 239/ 8400 batches | test loss 0.4949597 +| epoch 1 | 243/ 8400 batches | test loss 0.4306051 +| epoch 1 | 247/ 8400 batches | test loss 0.5236332 +| epoch 1 | 251/ 8400 batches | test loss 0.3690772 +| epoch 1 | 255/ 8400 batches | test loss 0.4662110 +| epoch 1 | 259/ 8400 batches | test loss 0.4086520 +| epoch 1 | 263/ 8400 batches | test loss 0.4612037 +| epoch 1 | 267/ 8400 batches | test loss 0.4668890 +| epoch 1 | 271/ 8400 batches | test loss 0.5444191 +| epoch 1 | 275/ 8400 batches | test loss 0.4519202 +| epoch 1 | 279/ 8400 batches | test loss 0.3622862 +| epoch 1 | 283/ 8400 batches | test loss 0.4795954 +| epoch 1 | 287/ 8400 batches | test loss 0.5043060 +| epoch 1 | 291/ 8400 batches | test loss 0.4440261 +| epoch 1 | 295/ 8400 batches | test loss 0.4477115 +| epoch 1 | 299/ 8400 batches | test loss 0.4299544 +| epoch 1 | 303/ 8400 batches | test loss 0.4963352 +| epoch 1 | 307/ 8400 batches | test loss 0.3148108 +| epoch 1 | 311/ 8400 batches | test loss 0.4618638 +| epoch 1 | 315/ 8400 batches | test loss 0.4509111 +| epoch 1 | 319/ 8400 batches | test loss 0.4299390 +| epoch 1 | 323/ 8400 batches | test loss 0.5152553 +| epoch 1 | 327/ 8400 batches | test loss 0.4776394 +| epoch 1 | 331/ 8400 batches | test loss 0.4708060 +| epoch 1 | 335/ 8400 batches | test loss 0.4567349 +| epoch 1 | 339/ 8400 batches | test loss 0.4330883 +| epoch 1 | 343/ 8400 batches | test loss 0.4919583 +| epoch 1 | 347/ 8400 batches | test loss 0.4243405 +| epoch 1 | 351/ 8400 batches | test loss 0.5496492 +| epoch 1 | 355/ 8400 batches | test loss 0.4004130 +| epoch 1 | 359/ 8400 batches | test loss 0.3618106 +| epoch 1 | 363/ 8400 batches | test loss 0.4453814 +| epoch 1 | 367/ 8400 batches | test loss 0.4875859 +| epoch 1 | 371/ 8400 batches | test loss 0.4063095 +| epoch 1 | 375/ 8400 batches | test loss 0.4379969 +| epoch 1 | 379/ 8400 batches | test loss 0.3797381 +| epoch 1 | 383/ 8400 batches | test loss 0.5329942 +| epoch 1 | 387/ 8400 batches | test loss 0.4169841 +| epoch 1 | 391/ 8400 batches | test loss 0.5268421 +| epoch 1 | 395/ 8400 batches | test loss 0.4400463 +| epoch 1 | 399/ 8400 batches | test loss 0.4658635 +| epoch 1 | 403/ 8400 batches | test loss 0.3930450 +| epoch 1 | 407/ 8400 batches | test loss 0.3424956 +| epoch 1 | 411/ 8400 batches | test loss 0.5062326 +| epoch 1 | 415/ 8400 batches | test loss 0.4367536 +| epoch 1 | 419/ 8400 batches | test loss 0.4568787 +| epoch 1 | 423/ 8400 batches | test loss 0.3899209 +| epoch 1 | 427/ 8400 batches | test loss 0.4590544 +| epoch 1 | 431/ 8400 batches | test loss 0.4620076 +| epoch 1 | 435/ 8400 batches | test loss 0.3624958 +| epoch 1 | 439/ 8400 batches | test loss 0.5141168 +| epoch 1 | 443/ 8400 batches | test loss 0.4579664 +| epoch 1 | 447/ 8400 batches | test loss 0.4712674 +| epoch 1 | 451/ 8400 batches | test loss 0.3964437 +| epoch 1 | 455/ 8400 batches | test loss 0.4772193 +| epoch 1 | 459/ 8400 batches | test loss 0.5971327 +| epoch 1 | 463/ 8400 batches | test loss 0.4161285 +| epoch 1 | 467/ 8400 batches | test loss 0.4383878 +| epoch 1 | 471/ 8400 batches | test loss 0.4479539 +| epoch 1 | 475/ 8400 batches | test loss 0.4675711 +| epoch 1 | 479/ 8400 batches | test loss 0.4657266 +| epoch 1 | 483/ 8400 batches | test loss 0.4327401 +| epoch 1 | 487/ 8400 batches | test loss 0.4666840 +| epoch 1 | 491/ 8400 batches | test loss 0.5360242 +| epoch 1 | 495/ 8400 batches | test loss 0.4721980 +| epoch 1 | 499/ 8400 batches | test loss 0.5014333 +| epoch 1 | 503/ 8400 batches | test loss 0.4588664 +| epoch 1 | 507/ 8400 batches | test loss 0.3953328 +| epoch 1 | 511/ 8400 batches | test loss 0.4858208 +| epoch 1 | 515/ 8400 batches | test loss 0.4672185 +| epoch 1 | 519/ 8400 batches | test loss 0.4248876 +| epoch 1 | 523/ 8400 batches | test loss 0.4985615 +| epoch 1 | 527/ 8400 batches | test loss 0.4629726 +| epoch 1 | 531/ 8400 batches | test loss 0.4933813 +| epoch 1 | 535/ 8400 batches | test loss 0.4283009 +| epoch 1 | 539/ 8400 batches | test loss 0.4744437 +| epoch 1 | 543/ 8400 batches | test loss 0.5306602 +| epoch 1 | 547/ 8400 batches | test loss 0.5391214 +| epoch 1 | 551/ 8400 batches | test loss 0.3664412 +| epoch 1 | 555/ 8400 batches | test loss 0.4518022 +| epoch 1 | 559/ 8400 batches | test loss 0.4048437 +| epoch 1 | 563/ 8400 batches | test loss 0.3799441 +| epoch 1 | 567/ 8400 batches | test loss 0.4505452 +| epoch 1 | 571/ 8400 batches | test loss 0.4102078 +| epoch 1 | 575/ 8400 batches | test loss 0.3717684 +| epoch 1 | 579/ 8400 batches | test loss 0.5476040 +| epoch 1 | 583/ 8400 batches | test loss 0.4308985 +| epoch 1 | 587/ 8400 batches | test loss 0.5599775 +| epoch 1 | 591/ 8400 batches | test loss 0.5348423 +| epoch 1 | 595/ 8400 batches | test loss 0.4083425 +| epoch 1 | 599/ 8400 batches | test loss 0.4089369 +| epoch 1 | 603/ 8400 batches | test loss 0.5648326 +| epoch 1 | 607/ 8400 batches | test loss 0.4724484 +| epoch 1 | 611/ 8400 batches | test loss 0.4194367 +| epoch 1 | 615/ 8400 batches | test loss 0.5352859 +| epoch 1 | 619/ 8400 batches | test loss 0.5195720 +| epoch 1 | 623/ 8400 batches | test loss 0.5262487 +| epoch 1 | 627/ 8400 batches | test loss 0.4231822 +| epoch 1 | 631/ 8400 batches | test loss 0.5322317 +| epoch 1 | 635/ 8400 batches | test loss 0.4995640 +| epoch 1 | 639/ 8400 batches | test loss 0.4954254 +| epoch 1 | 643/ 8400 batches | test loss 0.3826660 +| epoch 1 | 647/ 8400 batches | test loss 0.5229102 +| epoch 1 | 651/ 8400 batches | test loss 0.4701002 +| epoch 1 | 655/ 8400 batches | test loss 0.3712321 +| epoch 1 | 659/ 8400 batches | test loss 0.3994287 +| epoch 1 | 663/ 8400 batches | test loss 0.4706066 +| epoch 1 | 667/ 8400 batches | test loss 0.4927226 +| epoch 1 | 671/ 8400 batches | test loss 0.4643642 +| epoch 1 | 675/ 8400 batches | test loss 0.4802347 +| epoch 1 | 679/ 8400 batches | test loss 0.4527424 +| epoch 1 | 683/ 8400 batches | test loss 0.3919549 +| epoch 1 | 687/ 8400 batches | test loss 0.4877246 +| epoch 1 | 691/ 8400 batches | test loss 0.5104257 +| epoch 1 | 695/ 8400 batches | test loss 0.4422556 +| epoch 1 | 699/ 8400 batches | test loss 0.3868988 +| epoch 1 | 703/ 8400 batches | test loss 0.4715886 +| epoch 1 | 707/ 8400 batches | test loss 0.3706725 +| epoch 1 | 711/ 8400 batches | test loss 0.5318186 +| epoch 1 | 715/ 8400 batches | test loss 0.5381652 +| epoch 1 | 719/ 8400 batches | test loss 0.4342519 +| epoch 1 | 723/ 8400 batches | test loss 0.4304276 +| epoch 1 | 727/ 8400 batches | test loss 0.3750035 +| epoch 1 | 731/ 8400 batches | test loss 0.4353570 +| epoch 1 | 735/ 8400 batches | test loss 0.5236803 +| epoch 1 | 739/ 8400 batches | test loss 0.4535510 +| epoch 1 | 743/ 8400 batches | test loss 0.5047444 +| epoch 1 | 747/ 8400 batches | test loss 0.5171596 +| epoch 1 | 751/ 8400 batches | test loss 0.4381567 +| epoch 1 | 755/ 8400 batches | test loss 0.3813946 +| epoch 1 | 759/ 8400 batches | test loss 0.4753743 +| epoch 1 | 763/ 8400 batches | test loss 0.4394167 +| epoch 1 | 767/ 8400 batches | test loss 0.4406914 +| epoch 1 | 771/ 8400 batches | test loss 0.4620419 +| epoch 1 | 775/ 8400 batches | test loss 0.3822076 +| epoch 1 | 779/ 8400 batches | test loss 0.4720675 +| epoch 1 | 783/ 8400 batches | test loss 0.4072510 +| epoch 1 | 787/ 8400 batches | test loss 0.4232411 +| epoch 1 | 791/ 8400 batches | test loss 0.4913095 +| epoch 1 | 795/ 8400 batches | test loss 0.3822433 +| epoch 1 | 799/ 8400 batches | test loss 0.5884612 +| epoch 1 | 803/ 8400 batches | test loss 0.4885244 +| epoch 1 | 807/ 8400 batches | test loss 0.4599206 +| epoch 1 | 811/ 8400 batches | test loss 0.4139744 +| epoch 1 | 815/ 8400 batches | test loss 0.5302335 +| epoch 1 | 819/ 8400 batches | test loss 0.5524957 +| epoch 1 | 823/ 8400 batches | test loss 0.4184450 +| epoch 1 | 827/ 8400 batches | test loss 0.4727046 +| epoch 1 | 831/ 8400 batches | test loss 0.5517532 +| epoch 1 | 835/ 8400 batches | test loss 0.4393912 +| epoch 1 | 839/ 8400 batches | test loss 0.4563227 +| epoch 1 | 843/ 8400 batches | test loss 0.4956488 +| epoch 1 | 847/ 8400 batches | test loss 0.4975647 +| epoch 1 | 851/ 8400 batches | test loss 0.4951011 +| epoch 1 | 855/ 8400 batches | test loss 0.4520372 +| epoch 1 | 859/ 8400 batches | test loss 0.3960093 +| epoch 1 | 863/ 8400 batches | test loss 0.4231710 +| epoch 1 | 867/ 8400 batches | test loss 0.4618198 +| epoch 1 | 871/ 8400 batches | test loss 0.2923812 +| epoch 1 | 875/ 8400 batches | test loss 0.4877982 +| epoch 1 | 879/ 8400 batches | test loss 0.4040570 +| epoch 1 | 883/ 8400 batches | test loss 0.4244812 +| epoch 1 | 887/ 8400 batches | test loss 0.4942348 +| epoch 1 | 891/ 8400 batches | test loss 0.4216093 +| epoch 1 | 895/ 8400 batches | test loss 0.3329456 +| epoch 1 | 899/ 8400 batches | test loss 0.3529499 +| epoch 1 | 903/ 8400 batches | test loss 0.4215306 +| epoch 1 | 907/ 8400 batches | test loss 0.4144971 +| epoch 1 | 911/ 8400 batches | test loss 0.4248720 +| epoch 1 | 915/ 8400 batches | test loss 0.4919313 +| epoch 1 | 919/ 8400 batches | test loss 0.4951285 +| epoch 1 | 923/ 8400 batches | test loss 0.5340431 +| epoch 1 | 927/ 8400 batches | test loss 0.3108014 +| epoch 1 | 931/ 8400 batches | test loss 0.4430392 +| epoch 1 | 935/ 8400 batches | test loss 0.4650415 +| epoch 1 | 939/ 8400 batches | test loss 0.5623775 +| epoch 1 | 943/ 8400 batches | test loss 0.3766240 +| epoch 1 | 947/ 8400 batches | test loss 0.4177792 +| epoch 1 | 951/ 8400 batches | test loss 0.4271037 +| epoch 1 | 955/ 8400 batches | test loss 0.4958090 +| epoch 1 | 959/ 8400 batches | test loss 0.4735877 +| epoch 1 | 963/ 8400 batches | test loss 0.4646524 +| epoch 1 | 967/ 8400 batches | test loss 0.4647980 +| epoch 1 | 971/ 8400 batches | test loss 0.4230893 +| epoch 1 | 975/ 8400 batches | test loss 0.4428640 +| epoch 1 | 979/ 8400 batches | test loss 0.4192488 +| epoch 1 | 983/ 8400 batches | test loss 0.4622158 +| epoch 1 | 987/ 8400 batches | test loss 0.5911229 +| epoch 1 | 991/ 8400 batches | test loss 0.4599948 +| epoch 1 | 995/ 8400 batches | test loss 0.4115921 +| epoch 1 | 999/ 8400 batches | test loss 0.5268232 +| epoch 1 | 1003/ 8400 batches | test loss 0.4758074 +| epoch 1 | 1007/ 8400 batches | test loss 0.4642780 +| epoch 1 | 1011/ 8400 batches | test loss 0.4708663 +| epoch 1 | 1015/ 8400 batches | test loss 0.4799657 +| epoch 1 | 1019/ 8400 batches | test loss 0.5374113 +| epoch 1 | 1023/ 8400 batches | test loss 0.4305863 +| epoch 1 | 1027/ 8400 batches | test loss 0.4048328 +| epoch 1 | 1031/ 8400 batches | test loss 0.5325899 +| epoch 1 | 1035/ 8400 batches | test loss 0.4046249 +| epoch 1 | 1039/ 8400 batches | test loss 0.3414882 +| epoch 1 | 1043/ 8400 batches | test loss 0.5303318 +| epoch 1 | 1047/ 8400 batches | test loss 0.4305142 +| epoch 1 | 1051/ 8400 batches | test loss 0.4665684 +| epoch 1 | 1055/ 8400 batches | test loss 0.6274371 +| epoch 1 | 1059/ 8400 batches | test loss 0.5482272 +| epoch 1 | 1063/ 8400 batches | test loss 0.4030278 +| epoch 1 | 1067/ 8400 batches | test loss 0.4688913 +| epoch 1 | 1071/ 8400 batches | test loss 0.5006101 +| epoch 1 | 1075/ 8400 batches | test loss 0.4960381 +| epoch 1 | 1079/ 8400 batches | test loss 0.4140407 +| epoch 1 | 1083/ 8400 batches | test loss 0.4986109 +| epoch 1 | 1087/ 8400 batches | test loss 0.4234306 +| epoch 1 | 1091/ 8400 batches | test loss 0.5189595 +| epoch 1 | 1095/ 8400 batches | test loss 0.5023900 +| epoch 1 | 1099/ 8400 batches | test loss 0.4321250 +| epoch 1 | 1103/ 8400 batches | test loss 0.5383199 +| epoch 1 | 1107/ 8400 batches | test loss 0.4799256 +| epoch 1 | 1111/ 8400 batches | test loss 0.4384090 +| epoch 1 | 1115/ 8400 batches | test loss 0.4480876 +| epoch 1 | 1119/ 8400 batches | test loss 0.4711540 +| epoch 1 | 1123/ 8400 batches | test loss 0.4338166 +| epoch 1 | 1127/ 8400 batches | test loss 0.3995886 +| epoch 1 | 1131/ 8400 batches | test loss 0.4809743 +| epoch 1 | 1135/ 8400 batches | test loss 0.6613185 +| epoch 1 | 1139/ 8400 batches | test loss 0.5084187 +| epoch 1 | 1143/ 8400 batches | test loss 0.3990450 +| epoch 1 | 1147/ 8400 batches | test loss 0.4403231 +| epoch 1 | 1151/ 8400 batches | test loss 0.4134086 +| epoch 1 | 1155/ 8400 batches | test loss 0.4863535 +| epoch 1 | 1159/ 8400 batches | test loss 0.4159606 +| epoch 1 | 1163/ 8400 batches | test loss 0.4623645 +| epoch 1 | 1167/ 8400 batches | test loss 0.4772031 +| epoch 1 | 1171/ 8400 batches | test loss 0.4249135 +| epoch 1 | 1175/ 8400 batches | test loss 0.4756000 +| epoch 1 | 1179/ 8400 batches | test loss 0.4896102 +| epoch 1 | 1183/ 8400 batches | test loss 0.4460623 +| epoch 1 | 1187/ 8400 batches | test loss 0.3894854 +| epoch 1 | 1191/ 8400 batches | test loss 0.5340658 +| epoch 1 | 1195/ 8400 batches | test loss 0.4887779 +| epoch 1 | 1199/ 8400 batches | test loss 0.3967286 +| epoch 1 | 1203/ 8400 batches | test loss 0.4026426 +| epoch 1 | 1207/ 8400 batches | test loss 0.3606330 +| epoch 1 | 1211/ 8400 batches | test loss 0.4354813 +| epoch 1 | 1215/ 8400 batches | test loss 0.4733442 +| epoch 1 | 1219/ 8400 batches | test loss 0.5526273 +| epoch 1 | 1223/ 8400 batches | test loss 0.4212220 +| epoch 1 | 1227/ 8400 batches | test loss 0.3963684 +| epoch 1 | 1231/ 8400 batches | test loss 0.6001258 +| epoch 1 | 1235/ 8400 batches | test loss 0.4460683 +| epoch 1 | 1239/ 8400 batches | test loss 0.3700052 +| epoch 1 | 1243/ 8400 batches | test loss 0.4656563 +| epoch 1 | 1247/ 8400 batches | test loss 0.3897285 +| epoch 1 | 1251/ 8400 batches | test loss 0.4581584 +| epoch 1 | 1255/ 8400 batches | test loss 0.4932940 +| epoch 1 | 1259/ 8400 batches | test loss 0.4230370 +| epoch 1 | 1263/ 8400 batches | test loss 0.4735628 +| epoch 1 | 1267/ 8400 batches | test loss 0.3947591 +| epoch 1 | 1271/ 8400 batches | test loss 0.5150638 +| epoch 1 | 1275/ 8400 batches | test loss 0.4269164 +| epoch 1 | 1279/ 8400 batches | test loss 0.4421611 +| epoch 1 | 1283/ 8400 batches | test loss 0.4651600 +| epoch 1 | 1287/ 8400 batches | test loss 0.4205368 +| epoch 1 | 1291/ 8400 batches | test loss 0.4562033 +| epoch 1 | 1295/ 8400 batches | test loss 0.4597589 +| epoch 1 | 1299/ 8400 batches | test loss 0.4678962 +| epoch 1 | 1303/ 8400 batches | test loss 0.1983033 +| epoch 1 | 1307/ 8400 batches | test loss 0.4096379 +| epoch 1 | 1311/ 8400 batches | test loss 0.3856024 +| epoch 1 | 1315/ 8400 batches | test loss 0.3557659 +| epoch 1 | 1319/ 8400 batches | test loss 0.4260173 +| epoch 1 | 1323/ 8400 batches | test loss 0.4738616 +| epoch 1 | 1327/ 8400 batches | test loss 0.5676590 +| epoch 1 | 1331/ 8400 batches | test loss 0.5010931 +| epoch 1 | 1335/ 8400 batches | test loss 0.4359570 +| epoch 1 | 1339/ 8400 batches | test loss 0.4174522 +| epoch 1 | 1343/ 8400 batches | test loss 0.4574829 +| epoch 1 | 1347/ 8400 batches | test loss 0.4282363 +| epoch 1 | 1351/ 8400 batches | test loss 0.5892631 +| epoch 1 | 1355/ 8400 batches | test loss 0.4542820 +| epoch 1 | 1359/ 8400 batches | test loss 0.3840214 +| epoch 1 | 1363/ 8400 batches | test loss 0.4007796 +| epoch 1 | 1367/ 8400 batches | test loss 0.3430136 +| epoch 1 | 1371/ 8400 batches | test loss 0.4707622 +| epoch 1 | 1375/ 8400 batches | test loss 0.5022620 +| epoch 1 | 1379/ 8400 batches | test loss 0.4010439 +| epoch 1 | 1383/ 8400 batches | test loss 0.3565501 +| epoch 1 | 1387/ 8400 batches | test loss 0.4153140 +| epoch 1 | 1391/ 8400 batches | test loss 0.4505191 +| epoch 1 | 1395/ 8400 batches | test loss 0.4957787 +| epoch 1 | 1399/ 8400 batches | test loss 0.4776486 +| epoch 1 | 1403/ 8400 batches | test loss 0.4446848 +| epoch 1 | 1407/ 8400 batches | test loss 0.4020022 +| epoch 1 | 1411/ 8400 batches | test loss 0.4907399 +| epoch 1 | 1415/ 8400 batches | test loss 0.3688603 +| epoch 1 | 1419/ 8400 batches | test loss 0.4718755 +| epoch 1 | 1423/ 8400 batches | test loss 0.4679716 +| epoch 1 | 1427/ 8400 batches | test loss 0.3609892 +| epoch 1 | 1431/ 8400 batches | test loss 0.4259540 +| epoch 1 | 1435/ 8400 batches | test loss 0.5110041 +| epoch 1 | 1439/ 8400 batches | test loss 0.5519876 +| epoch 1 | 1443/ 8400 batches | test loss 0.4121604 +| epoch 1 | 1447/ 8400 batches | test loss 0.5156707 +| epoch 1 | 1451/ 8400 batches | test loss 0.3918325 +| epoch 1 | 1455/ 8400 batches | test loss 0.4654611 +| epoch 1 | 1459/ 8400 batches | test loss 0.4584038 +| epoch 1 | 1463/ 8400 batches | test loss 0.4970371 +| epoch 1 | 1467/ 8400 batches | test loss 0.3708941 +| epoch 1 | 1471/ 8400 batches | test loss 0.3804434 +| epoch 1 | 1475/ 8400 batches | test loss 0.4692941 +| epoch 1 | 1479/ 8400 batches | test loss 0.4304524 +| epoch 1 | 1483/ 8400 batches | test loss 0.4096193 +| epoch 1 | 1487/ 8400 batches | test loss 0.4069160 +| epoch 1 | 1491/ 8400 batches | test loss 0.4589111 +| epoch 1 | 1495/ 8400 batches | test loss 0.4429241 +| epoch 1 | 1499/ 8400 batches | test loss 0.4047341 +| epoch 1 | 1503/ 8400 batches | test loss 0.5124310 +| epoch 1 | 1507/ 8400 batches | test loss 0.5217559 +| epoch 1 | 1511/ 8400 batches | test loss 0.4257099 +| epoch 1 | 1515/ 8400 batches | test loss 0.4145415 +| epoch 1 | 1519/ 8400 batches | test loss 0.3607253 +| epoch 1 | 1523/ 8400 batches | test loss 0.4634516 +| epoch 1 | 1527/ 8400 batches | test loss 0.4480868 +| epoch 1 | 1531/ 8400 batches | test loss 0.5191026 +| epoch 1 | 1535/ 8400 batches | test loss 0.4798175 +| epoch 1 | 1539/ 8400 batches | test loss 0.4689468 +| epoch 1 | 1543/ 8400 batches | test loss 0.4965034 +| epoch 1 | 1547/ 8400 batches | test loss 0.3568276 +| epoch 1 | 1551/ 8400 batches | test loss 0.5176904 +| epoch 1 | 1555/ 8400 batches | test loss 0.4338638 +| epoch 1 | 1559/ 8400 batches | test loss 0.4477507 +| epoch 1 | 1563/ 8400 batches | test loss 0.4414919 +| epoch 1 | 1567/ 8400 batches | test loss 0.4246183 +| epoch 1 | 1571/ 8400 batches | test loss 0.3660983 +| epoch 1 | 1575/ 8400 batches | test loss 0.4845321 +| epoch 1 | 1579/ 8400 batches | test loss 0.5941580 +| epoch 1 | 1583/ 8400 batches | test loss 0.4749005 +| epoch 1 | 1587/ 8400 batches | test loss 0.4014343 +| epoch 1 | 1591/ 8400 batches | test loss 0.4868852 +| epoch 1 | 1595/ 8400 batches | test loss 0.3563897 +| epoch 1 | 1599/ 8400 batches | test loss 0.5253631 +| epoch 1 | 1603/ 8400 batches | test loss 0.5007486 +| epoch 1 | 1607/ 8400 batches | test loss 0.5612938 +| epoch 1 | 1611/ 8400 batches | test loss 0.4290932 +| epoch 1 | 1615/ 8400 batches | test loss 0.4448155 +| epoch 1 | 1619/ 8400 batches | test loss 0.4432333 +| epoch 1 | 1623/ 8400 batches | test loss 0.5247781 +| epoch 1 | 1627/ 8400 batches | test loss 0.4292353 +| epoch 1 | 1631/ 8400 batches | test loss 0.3892553 +| epoch 1 | 1635/ 8400 batches | test loss 0.4031084 +| epoch 1 | 1639/ 8400 batches | test loss 0.5464520 +| epoch 1 | 1643/ 8400 batches | test loss 0.3932608 +| epoch 1 | 1647/ 8400 batches | test loss 0.4232788 +| epoch 1 | 1651/ 8400 batches | test loss 0.4414482 +| epoch 1 | 1655/ 8400 batches | test loss 0.3262916 +| epoch 1 | 1659/ 8400 batches | test loss 0.4306225 +| epoch 1 | 1663/ 8400 batches | test loss 0.4909362 +| epoch 1 | 1667/ 8400 batches | test loss 0.4035166 +| epoch 1 | 1671/ 8400 batches | test loss 0.5434358 +| epoch 1 | 1675/ 8400 batches | test loss 0.4524393 +| epoch 1 | 1679/ 8400 batches | test loss 0.4453534 +| epoch 1 | 1683/ 8400 batches | test loss 0.4567491 +| epoch 1 | 1687/ 8400 batches | test loss 0.4694165 +| epoch 1 | 1691/ 8400 batches | test loss 0.5027049 +| epoch 1 | 1695/ 8400 batches | test loss 0.4237241 +| epoch 1 | 1699/ 8400 batches | test loss 0.4585148 +| epoch 1 | 1703/ 8400 batches | test loss 0.5261600 +| epoch 1 | 1707/ 8400 batches | test loss 0.4330146 +| epoch 1 | 1711/ 8400 batches | test loss 0.5298969 +| epoch 1 | 1715/ 8400 batches | test loss 0.5040521 +| epoch 1 | 1719/ 8400 batches | test loss 0.4831351 +| epoch 1 | 1723/ 8400 batches | test loss 0.5143384 +| epoch 1 | 1727/ 8400 batches | test loss 0.4476981 +| epoch 1 | 1731/ 8400 batches | test loss 0.5555434 +| epoch 1 | 1735/ 8400 batches | test loss 0.4726646 +| epoch 1 | 1739/ 8400 batches | test loss 0.5570796 +| epoch 1 | 1743/ 8400 batches | test loss 0.4513525 +| epoch 1 | 1747/ 8400 batches | test loss 0.4784547 +| epoch 1 | 1751/ 8400 batches | test loss 0.4361179 +| epoch 1 | 1755/ 8400 batches | test loss 0.3934455 +| epoch 1 | 1759/ 8400 batches | test loss 0.4639776 +| epoch 1 | 1763/ 8400 batches | test loss 0.4036835 +| epoch 1 | 1767/ 8400 batches | test loss 0.4483024 +| epoch 1 | 1771/ 8400 batches | test loss 0.4464906 +| epoch 1 | 1775/ 8400 batches | test loss 0.4229128 +| epoch 1 | 1779/ 8400 batches | test loss 0.4513218 +| epoch 1 | 1783/ 8400 batches | test loss 0.6400272 +| epoch 1 | 1787/ 8400 batches | test loss 0.4415784 +| epoch 1 | 1791/ 8400 batches | test loss 0.4287355 +| epoch 1 | 1795/ 8400 batches | test loss 0.4727877 +| epoch 1 | 1799/ 8400 batches | test loss 0.4074533 +| epoch 1 | 1803/ 8400 batches | test loss 0.3977016 +| epoch 1 | 1807/ 8400 batches | test loss 0.4544980 +| epoch 1 | 1811/ 8400 batches | test loss 0.3354878 +| epoch 1 | 1815/ 8400 batches | test loss 0.5359824 +| epoch 1 | 1819/ 8400 batches | test loss 0.5442101 +| epoch 1 | 1823/ 8400 batches | test loss 0.4411875 +| epoch 1 | 1827/ 8400 batches | test loss 0.4427463 +| epoch 1 | 1831/ 8400 batches | test loss 0.3796676 +| epoch 1 | 1835/ 8400 batches | test loss 0.3956306 +| epoch 1 | 1839/ 8400 batches | test loss 0.4100318 +| epoch 1 | 1843/ 8400 batches | test loss 0.4869255 +| epoch 1 | 1847/ 8400 batches | test loss 0.4636285 +| epoch 1 | 1851/ 8400 batches | test loss 0.4111032 +| epoch 1 | 1855/ 8400 batches | test loss 0.4087952 +| epoch 1 | 1859/ 8400 batches | test loss 0.5062739 +| epoch 1 | 1863/ 8400 batches | test loss 0.4648289 +| epoch 1 | 1867/ 8400 batches | test loss 0.5082198 +| epoch 1 | 1871/ 8400 batches | test loss 0.4248443 +| epoch 1 | 1875/ 8400 batches | test loss 0.3611386 +| epoch 1 | 1879/ 8400 batches | test loss 0.5697460 +| epoch 1 | 1883/ 8400 batches | test loss 0.4413712 +| epoch 1 | 1887/ 8400 batches | test loss 0.3126330 +| epoch 1 | 1891/ 8400 batches | test loss 0.4402237 +| epoch 1 | 1895/ 8400 batches | test loss 0.1827473 +| epoch 1 | 1899/ 8400 batches | test loss 0.4264142 +| epoch 1 | 1903/ 8400 batches | test loss 0.4667901 +| epoch 1 | 1907/ 8400 batches | test loss 0.2947589 +| epoch 1 | 1911/ 8400 batches | test loss 0.3866301 +| epoch 1 | 1915/ 8400 batches | test loss 0.4270242 +| epoch 1 | 1919/ 8400 batches | test loss 0.5189491 +| epoch 1 | 1923/ 8400 batches | test loss 0.5231279 +| epoch 1 | 1927/ 8400 batches | test loss 0.4719541 +| epoch 1 | 1931/ 8400 batches | test loss 0.5571570 +| epoch 1 | 1935/ 8400 batches | test loss 0.3449174 +| epoch 1 | 1939/ 8400 batches | test loss 0.5179442 +| epoch 1 | 1943/ 8400 batches | test loss 0.4705661 +| epoch 1 | 1947/ 8400 batches | test loss 0.4117459 +| epoch 1 | 1951/ 8400 batches | test loss 0.3691620 +| epoch 1 | 1955/ 8400 batches | test loss 0.4379798 +| epoch 1 | 1959/ 8400 batches | test loss 0.4554309 +| epoch 1 | 1963/ 8400 batches | test loss 0.3885041 +| epoch 1 | 1967/ 8400 batches | test loss 0.5197167 +| epoch 1 | 1971/ 8400 batches | test loss 0.4613439 +| epoch 1 | 1975/ 8400 batches | test loss 0.4763847 +| epoch 1 | 1979/ 8400 batches | test loss 0.4227102 +| epoch 1 | 1983/ 8400 batches | test loss 0.4525887 +| epoch 1 | 1987/ 8400 batches | test loss 0.4399880 +| epoch 1 | 1991/ 8400 batches | test loss 0.4977020 +| epoch 1 | 1995/ 8400 batches | test loss 0.3068158 +| epoch 1 | 1999/ 8400 batches | test loss 0.5433106 +| epoch 1 | 2003/ 8400 batches | test loss 0.4324536 +| epoch 1 | 2007/ 8400 batches | test loss 0.4555369 +| epoch 1 | 2011/ 8400 batches | test loss 0.4519553 +| epoch 1 | 2015/ 8400 batches | test loss 0.4147701 +| epoch 1 | 2019/ 8400 batches | test loss 0.4105814 +| epoch 1 | 2023/ 8400 batches | test loss 0.3955501 +| epoch 1 | 2027/ 8400 batches | test loss 0.4583668 +| epoch 1 | 2031/ 8400 batches | test loss 0.4537685 +| epoch 1 | 2035/ 8400 batches | test loss 0.4283880 +| epoch 1 | 2039/ 8400 batches | test loss 0.3518522 +| epoch 1 | 2043/ 8400 batches | test loss 0.3885002 +| epoch 1 | 2047/ 8400 batches | test loss 0.4195690 +| epoch 1 | 2051/ 8400 batches | test loss 0.4046348 +| epoch 1 | 2055/ 8400 batches | test loss 0.4287353 +| epoch 1 | 2059/ 8400 batches | test loss 0.4446228 +| epoch 1 | 2063/ 8400 batches | test loss 0.4761956 +| epoch 1 | 2067/ 8400 batches | test loss 0.3786660 +| epoch 1 | 2071/ 8400 batches | test loss 0.3825476 +| epoch 1 | 2075/ 8400 batches | test loss 0.6125473 +| epoch 1 | 2079/ 8400 batches | test loss 0.3796505 +| epoch 1 | 2083/ 8400 batches | test loss 0.4012651 +| epoch 1 | 2087/ 8400 batches | test loss 0.4864103 +| epoch 1 | 2091/ 8400 batches | test loss 0.4209836 +| epoch 1 | 2095/ 8400 batches | test loss 0.4962504 +| epoch 1 | 2099/ 8400 batches | test loss 0.4715029 +| epoch 1 | final test loss 0.4524, save model! +-------------------------------------------------------------------------------- +| epoch 2 | 3/ 8400 batches | train loss 0.3549469 +| epoch 2 | 7/ 8400 batches | train loss 0.5086287 +| epoch 2 | 11/ 8400 batches | train loss 0.4047836 +| epoch 2 | 15/ 8400 batches | train loss 0.3846101 +| epoch 2 | 19/ 8400 batches | train loss 0.5224471 +| epoch 2 | 23/ 8400 batches | train loss 0.4018563 +| epoch 2 | 27/ 8400 batches | train loss 0.4059101 +| epoch 2 | 31/ 8400 batches | train loss 0.4844435 +| epoch 2 | 35/ 8400 batches | train loss 0.3976895 +| epoch 2 | 39/ 8400 batches | train loss 0.4489666 +| epoch 2 | 43/ 8400 batches | train loss 0.4007483 +| epoch 2 | 47/ 8400 batches | train loss 0.4425266 +| epoch 2 | 51/ 8400 batches | train loss 0.4670369 +| epoch 2 | 55/ 8400 batches | train loss 0.4733784 +| epoch 2 | 59/ 8400 batches | train loss 0.4077660 +| epoch 2 | 63/ 8400 batches | train loss 0.5061529 +| epoch 2 | 67/ 8400 batches | train loss 0.4347228 +| epoch 2 | 71/ 8400 batches | train loss 0.4834900 +| epoch 2 | 75/ 8400 batches | train loss 0.4405404 +| epoch 2 | 79/ 8400 batches | train loss 0.2974406 +| epoch 2 | 83/ 8400 batches | train loss 0.4307724 +| epoch 2 | 87/ 8400 batches | train loss 0.4172544 +| epoch 2 | 91/ 8400 batches | train loss 0.4533684 +| epoch 2 | 95/ 8400 batches | train loss 0.3779663 +| epoch 2 | 99/ 8400 batches | train loss 0.4700480 +| epoch 2 | 103/ 8400 batches | train loss 0.4922052 +| epoch 2 | 107/ 8400 batches | train loss 0.5229263 +| epoch 2 | 111/ 8400 batches | train loss 0.3987178 +| epoch 2 | 115/ 8400 batches | train loss 0.4401959 +| epoch 2 | 119/ 8400 batches | train loss 0.4209328 +| epoch 2 | 123/ 8400 batches | train loss 0.4475148 +| epoch 2 | 127/ 8400 batches | train loss 0.4498393 +| epoch 2 | 131/ 8400 batches | train loss 0.4141487 +| epoch 2 | 135/ 8400 batches | train loss 0.4401546 +| epoch 2 | 139/ 8400 batches | train loss 0.4494334 +| epoch 2 | 143/ 8400 batches | train loss 0.4468524 +| epoch 2 | 147/ 8400 batches | train loss 0.4552121 +| epoch 2 | 151/ 8400 batches | train loss 0.4706215 +| epoch 2 | 155/ 8400 batches | train loss 0.4284720 +| epoch 2 | 159/ 8400 batches | train loss 0.4354578 +| epoch 2 | 163/ 8400 batches | train loss 0.5191020 +| epoch 2 | 167/ 8400 batches | train loss 0.3869337 +| epoch 2 | 171/ 8400 batches | train loss 0.4565397 +| epoch 2 | 175/ 8400 batches | train loss 0.4680445 +| epoch 2 | 179/ 8400 batches | train loss 0.4076710 +| epoch 2 | 183/ 8400 batches | train loss 0.4855702 +| epoch 2 | 187/ 8400 batches | train loss 0.4153292 +| epoch 2 | 191/ 8400 batches | train loss 0.3703338 +| epoch 2 | 195/ 8400 batches | train loss 0.4244801 +| epoch 2 | 199/ 8400 batches | train loss 0.4367605 +| epoch 2 | 203/ 8400 batches | train loss 0.4212423 +| epoch 2 | 207/ 8400 batches | train loss 0.3877829 +| epoch 2 | 211/ 8400 batches | train loss 0.4339630 +| epoch 2 | 215/ 8400 batches | train loss 0.3858964 +| epoch 2 | 219/ 8400 batches | train loss 0.5055417 +| epoch 2 | 223/ 8400 batches | train loss 0.5541636 +| epoch 2 | 227/ 8400 batches | train loss 0.3791716 +| epoch 2 | 231/ 8400 batches | train loss 0.3965286 +| epoch 2 | 235/ 8400 batches | train loss 0.4277290 +| epoch 2 | 239/ 8400 batches | train loss 0.4440581 +| epoch 2 | 243/ 8400 batches | train loss 0.3563277 +| epoch 2 | 247/ 8400 batches | train loss 0.4018391 +| epoch 2 | 251/ 8400 batches | train loss 0.4567090 +| epoch 2 | 255/ 8400 batches | train loss 0.4743567 +| epoch 2 | 259/ 8400 batches | train loss 0.3740380 +| epoch 2 | 263/ 8400 batches | train loss 0.4623136 +| epoch 2 | 267/ 8400 batches | train loss 0.3885236 +| epoch 2 | 271/ 8400 batches | train loss 0.5711173 +| epoch 2 | 275/ 8400 batches | train loss 0.4421874 +| epoch 2 | 279/ 8400 batches | train loss 0.4258931 +| epoch 2 | 283/ 8400 batches | train loss 0.4193090 +| epoch 2 | 287/ 8400 batches | train loss 0.4857340 +| epoch 2 | 291/ 8400 batches | train loss 0.3706613 +| epoch 2 | 295/ 8400 batches | train loss 0.4757236 +| epoch 2 | 299/ 8400 batches | train loss 0.4122065 +| epoch 2 | 303/ 8400 batches | train loss 0.4125523 +| epoch 2 | 307/ 8400 batches | train loss 0.4544037 +| epoch 2 | 311/ 8400 batches | train loss 0.4723305 +| epoch 2 | 315/ 8400 batches | train loss 0.4673147 +| epoch 2 | 319/ 8400 batches | train loss 0.4006814 +| epoch 2 | 323/ 8400 batches | train loss 0.5093193 +| epoch 2 | 327/ 8400 batches | train loss 0.5205083 +| epoch 2 | 331/ 8400 batches | train loss 0.4175323 +| epoch 2 | 335/ 8400 batches | train loss 0.3459617 +| epoch 2 | 339/ 8400 batches | train loss 0.2796101 +| epoch 2 | 343/ 8400 batches | train loss 0.4293883 +| epoch 2 | 347/ 8400 batches | train loss 0.5044138 +| epoch 2 | 351/ 8400 batches | train loss 0.4403045 +| epoch 2 | 355/ 8400 batches | train loss 0.4715262 +| epoch 2 | 359/ 8400 batches | train loss 0.4474765 +| epoch 2 | 363/ 8400 batches | train loss 0.4890863 +| epoch 2 | 367/ 8400 batches | train loss 0.4695824 +| epoch 2 | 371/ 8400 batches | train loss 0.3643613 +| epoch 2 | 375/ 8400 batches | train loss 0.5755135 +| epoch 2 | 379/ 8400 batches | train loss 0.4875843 +| epoch 2 | 383/ 8400 batches | train loss 0.5258268 +| epoch 2 | 387/ 8400 batches | train loss 0.4832115 +| epoch 2 | 391/ 8400 batches | train loss 0.5096210 +| epoch 2 | 395/ 8400 batches | train loss 0.4702960 +| epoch 2 | 399/ 8400 batches | train loss 0.4322843 +| epoch 2 | 403/ 8400 batches | train loss 0.3354091 +| epoch 2 | 407/ 8400 batches | train loss 0.3199840 +| epoch 2 | 411/ 8400 batches | train loss 0.4968084 +| epoch 2 | 415/ 8400 batches | train loss 0.4451516 +| epoch 2 | 419/ 8400 batches | train loss 0.3942807 +| epoch 2 | 423/ 8400 batches | train loss 0.4724409 +| epoch 2 | 427/ 8400 batches | train loss 0.4593329 +| epoch 2 | 431/ 8400 batches | train loss 0.4166244 +| epoch 2 | 435/ 8400 batches | train loss 0.3622004 +| epoch 2 | 439/ 8400 batches | train loss 0.5583043 +| epoch 2 | 443/ 8400 batches | train loss 0.4856534 +| epoch 2 | 447/ 8400 batches | train loss 0.5273816 +| epoch 2 | 451/ 8400 batches | train loss 0.4521937 +| epoch 2 | 455/ 8400 batches | train loss 0.4592214 +| epoch 2 | 459/ 8400 batches | train loss 0.5267496 +| epoch 2 | 463/ 8400 batches | train loss 0.4165621 +| epoch 2 | 467/ 8400 batches | train loss 0.4188018 +| epoch 2 | 471/ 8400 batches | train loss 0.4450165 +| epoch 2 | 475/ 8400 batches | train loss 0.4698082 +| epoch 2 | 479/ 8400 batches | train loss 0.4498207 +| epoch 2 | 483/ 8400 batches | train loss 0.4758225 +| epoch 2 | 487/ 8400 batches | train loss 0.3829396 +| epoch 2 | 491/ 8400 batches | train loss 0.5241634 +| epoch 2 | 495/ 8400 batches | train loss 0.5131896 +| epoch 2 | 499/ 8400 batches | train loss 0.4527582 +| epoch 2 | 503/ 8400 batches | train loss 0.4136620 +| epoch 2 | 507/ 8400 batches | train loss 0.3779118 +| epoch 2 | 511/ 8400 batches | train loss 0.4414360 +| epoch 2 | 515/ 8400 batches | train loss 0.4390899 +| epoch 2 | 519/ 8400 batches | train loss 0.4934747 +| epoch 2 | 523/ 8400 batches | train loss 0.4330505 +| epoch 2 | 527/ 8400 batches | train loss 0.4755440 +| epoch 2 | 531/ 8400 batches | train loss 0.3561915 +| epoch 2 | 535/ 8400 batches | train loss 0.4856571 +| epoch 2 | 539/ 8400 batches | train loss 0.4660541 +| epoch 2 | 543/ 8400 batches | train loss 0.5016396 +| epoch 2 | 547/ 8400 batches | train loss 0.4189340 +| epoch 2 | 551/ 8400 batches | train loss 0.4819037 +| epoch 2 | 555/ 8400 batches | train loss 0.3746706 +| epoch 2 | 559/ 8400 batches | train loss 0.3368754 +| epoch 2 | 563/ 8400 batches | train loss 0.4056488 +| epoch 2 | 567/ 8400 batches | train loss 0.4569638 +| epoch 2 | 571/ 8400 batches | train loss 0.4599513 +| epoch 2 | 575/ 8400 batches | train loss 0.5036764 +| epoch 2 | 579/ 8400 batches | train loss 0.4586242 +| epoch 2 | 583/ 8400 batches | train loss 0.4942990 +| epoch 2 | 587/ 8400 batches | train loss 0.3704243 +| epoch 2 | 591/ 8400 batches | train loss 0.4551626 +| epoch 2 | 595/ 8400 batches | train loss 0.4570685 +| epoch 2 | 599/ 8400 batches | train loss 0.4252838 +| epoch 2 | 603/ 8400 batches | train loss 0.3640384 +| epoch 2 | 607/ 8400 batches | train loss 0.4411083 +| epoch 2 | 611/ 8400 batches | train loss 0.4535077 +| epoch 2 | 615/ 8400 batches | train loss 0.4061242 +| epoch 2 | 619/ 8400 batches | train loss 0.4912764 +| epoch 2 | 623/ 8400 batches | train loss 0.4231436 +| epoch 2 | 627/ 8400 batches | train loss 0.5616248 +| epoch 2 | 631/ 8400 batches | train loss 0.5228828 +| epoch 2 | 635/ 8400 batches | train loss 0.5127071 +| epoch 2 | 639/ 8400 batches | train loss 0.4537179 +| epoch 2 | 643/ 8400 batches | train loss 0.4704853 +| epoch 2 | 647/ 8400 batches | train loss 0.4877584 +| epoch 2 | 651/ 8400 batches | train loss 0.4645955 +| epoch 2 | 655/ 8400 batches | train loss 0.4553886 +| epoch 2 | 659/ 8400 batches | train loss 0.4452620 +| epoch 2 | 663/ 8400 batches | train loss 0.3859414 +| epoch 2 | 667/ 8400 batches | train loss 0.4110618 +| epoch 2 | 671/ 8400 batches | train loss 0.4642339 +| epoch 2 | 675/ 8400 batches | train loss 0.4575922 +| epoch 2 | 679/ 8400 batches | train loss 0.4462625 +| epoch 2 | 683/ 8400 batches | train loss 0.3868822 +| epoch 2 | 687/ 8400 batches | train loss 0.3939513 +| epoch 2 | 691/ 8400 batches | train loss 0.4701375 +| epoch 2 | 695/ 8400 batches | train loss 0.5159695 +| epoch 2 | 699/ 8400 batches | train loss 0.4054651 +| epoch 2 | 703/ 8400 batches | train loss 0.4355730 +| epoch 2 | 707/ 8400 batches | train loss 0.4378069 +| epoch 2 | 711/ 8400 batches | train loss 0.5056107 +| epoch 2 | 715/ 8400 batches | train loss 0.4434671 +| epoch 2 | 719/ 8400 batches | train loss 0.3764086 +| epoch 2 | 723/ 8400 batches | train loss 0.3624806 +| epoch 2 | 727/ 8400 batches | train loss 0.5256577 +| epoch 2 | 731/ 8400 batches | train loss 0.4095260 +| epoch 2 | 735/ 8400 batches | train loss 0.4241731 +| epoch 2 | 739/ 8400 batches | train loss 0.4306552 +| epoch 2 | 743/ 8400 batches | train loss 0.4634493 +| epoch 2 | 747/ 8400 batches | train loss 0.4394921 +| epoch 2 | 751/ 8400 batches | train loss 0.4841711 +| epoch 2 | 755/ 8400 batches | train loss 0.4520099 +| epoch 2 | 759/ 8400 batches | train loss 0.4961116 +| epoch 2 | 763/ 8400 batches | train loss 0.5035068 +| epoch 2 | 767/ 8400 batches | train loss 0.3896690 +| epoch 2 | 771/ 8400 batches | train loss 0.5244162 +| epoch 2 | 775/ 8400 batches | train loss 0.4976510 +| epoch 2 | 779/ 8400 batches | train loss 0.3850992 +| epoch 2 | 783/ 8400 batches | train loss 0.4175592 +| epoch 2 | 787/ 8400 batches | train loss 0.5143175 +| epoch 2 | 791/ 8400 batches | train loss 0.4495890 +| epoch 2 | 795/ 8400 batches | train loss 0.3350192 +| epoch 2 | 799/ 8400 batches | train loss 0.3820169 +| epoch 2 | 803/ 8400 batches | train loss 0.5417631 +| epoch 2 | 807/ 8400 batches | train loss 0.3975849 +| epoch 2 | 811/ 8400 batches | train loss 0.4062762 +| epoch 2 | 815/ 8400 batches | train loss 0.4981766 +| epoch 2 | 819/ 8400 batches | train loss 0.4568678 +| epoch 2 | 823/ 8400 batches | train loss 0.4945430 +| epoch 2 | 827/ 8400 batches | train loss 0.4470640 +| epoch 2 | 831/ 8400 batches | train loss 0.4472088 +| epoch 2 | 835/ 8400 batches | train loss 0.4400122 +| epoch 2 | 839/ 8400 batches | train loss 0.4885024 +| epoch 2 | 843/ 8400 batches | train loss 0.3910902 +| epoch 2 | 847/ 8400 batches | train loss 0.4006884 +| epoch 2 | 851/ 8400 batches | train loss 0.3935348 +| epoch 2 | 855/ 8400 batches | train loss 0.4313129 +| epoch 2 | 859/ 8400 batches | train loss 0.5189260 +| epoch 2 | 863/ 8400 batches | train loss 0.4530605 +| epoch 2 | 867/ 8400 batches | train loss 0.4748930 +| epoch 2 | 871/ 8400 batches | train loss 0.3330108 +| epoch 2 | 875/ 8400 batches | train loss 0.4629524 +| epoch 2 | 879/ 8400 batches | train loss 0.4972742 +| epoch 2 | 883/ 8400 batches | train loss 0.4032412 +| epoch 2 | 887/ 8400 batches | train loss 0.4361287 +| epoch 2 | 891/ 8400 batches | train loss 0.3788672 +| epoch 2 | 895/ 8400 batches | train loss 0.4201782 +| epoch 2 | 899/ 8400 batches | train loss 0.4161823 +| epoch 2 | 903/ 8400 batches | train loss 0.4960063 +| epoch 2 | 907/ 8400 batches | train loss 0.4711348 +| epoch 2 | 911/ 8400 batches | train loss 0.3141282 +| epoch 2 | 915/ 8400 batches | train loss 0.4512557 +| epoch 2 | 919/ 8400 batches | train loss 0.6609651 +| epoch 2 | 923/ 8400 batches | train loss 0.4854915 +| epoch 2 | 927/ 8400 batches | train loss 0.4232553 +| epoch 2 | 931/ 8400 batches | train loss 0.4213045 +| epoch 2 | 935/ 8400 batches | train loss 0.4165171 +| epoch 2 | 939/ 8400 batches | train loss 0.4107958 +| epoch 2 | 943/ 8400 batches | train loss 0.5332727 +| epoch 2 | 947/ 8400 batches | train loss 0.4772452 +| epoch 2 | 951/ 8400 batches | train loss 0.3599927 +| epoch 2 | 955/ 8400 batches | train loss 0.4504910 +| epoch 2 | 959/ 8400 batches | train loss 0.4437870 +| epoch 2 | 963/ 8400 batches | train loss 0.4514819 +| epoch 2 | 967/ 8400 batches | train loss 0.4134361 +| epoch 2 | 971/ 8400 batches | train loss 0.3881742 +| epoch 2 | 975/ 8400 batches | train loss 0.4148107 +| epoch 2 | 979/ 8400 batches | train loss 0.5129687 +| epoch 2 | 983/ 8400 batches | train loss 0.5502145 +| epoch 2 | 987/ 8400 batches | train loss 0.5351189 +| epoch 2 | 991/ 8400 batches | train loss 0.4271681 +| epoch 2 | 995/ 8400 batches | train loss 0.4584963 +| epoch 2 | 999/ 8400 batches | train loss 0.4732869 +| epoch 2 | 1003/ 8400 batches | train loss 0.6129426 +| epoch 2 | 1007/ 8400 batches | train loss 0.3935167 +| epoch 2 | 1011/ 8400 batches | train loss 0.4793575 +| epoch 2 | 1015/ 8400 batches | train loss 0.4966649 +| epoch 2 | 1019/ 8400 batches | train loss 0.4390317 +| epoch 2 | 1023/ 8400 batches | train loss 0.5078625 +| epoch 2 | 1027/ 8400 batches | train loss 0.4046281 +| epoch 2 | 1031/ 8400 batches | train loss 0.4722218 +| epoch 2 | 1035/ 8400 batches | train loss 0.4532225 +| epoch 2 | 1039/ 8400 batches | train loss 0.4526558 +| epoch 2 | 1043/ 8400 batches | train loss 0.5147419 +| epoch 2 | 1047/ 8400 batches | train loss 0.4937532 +| epoch 2 | 1051/ 8400 batches | train loss 0.4234896 +| epoch 2 | 1055/ 8400 batches | train loss 0.4278473 +| epoch 2 | 1059/ 8400 batches | train loss 0.4146670 +| epoch 2 | 1063/ 8400 batches | train loss 0.4405251 +| epoch 2 | 1067/ 8400 batches | train loss 0.4508784 +| epoch 2 | 1071/ 8400 batches | train loss 0.5669281 +| epoch 2 | 1075/ 8400 batches | train loss 0.4383154 +| epoch 2 | 1079/ 8400 batches | train loss 0.5160360 +| epoch 2 | 1083/ 8400 batches | train loss 0.5479550 +| epoch 2 | 1087/ 8400 batches | train loss 0.3865346 +| epoch 2 | 1091/ 8400 batches | train loss 0.3824195 +| epoch 2 | 1095/ 8400 batches | train loss 0.4449508 +| epoch 2 | 1099/ 8400 batches | train loss 0.4635983 +| epoch 2 | 1103/ 8400 batches | train loss 0.4448748 +| epoch 2 | 1107/ 8400 batches | train loss 0.4347069 +| epoch 2 | 1111/ 8400 batches | train loss 0.5441306 +| epoch 2 | 1115/ 8400 batches | train loss 0.3964961 +| epoch 2 | 1119/ 8400 batches | train loss 0.4937775 +| epoch 2 | 1123/ 8400 batches | train loss 0.5554973 +| epoch 2 | 1127/ 8400 batches | train loss 0.4161828 +| epoch 2 | 1131/ 8400 batches | train loss 0.3905937 +| epoch 2 | 1135/ 8400 batches | train loss 0.4991624 +| epoch 2 | 1139/ 8400 batches | train loss 0.4428989 +| epoch 2 | 1143/ 8400 batches | train loss 0.5101801 +| epoch 2 | 1147/ 8400 batches | train loss 0.4456008 +| epoch 2 | 1151/ 8400 batches | train loss 0.4835624 +| epoch 2 | 1155/ 8400 batches | train loss 0.5858563 +| epoch 2 | 1159/ 8400 batches | train loss 0.5008320 +| epoch 2 | 1163/ 8400 batches | train loss 0.5098367 +| epoch 2 | 1167/ 8400 batches | train loss 0.4056199 +| epoch 2 | 1171/ 8400 batches | train loss 0.4040895 +| epoch 2 | 1175/ 8400 batches | train loss 0.4292546 +| epoch 2 | 1179/ 8400 batches | train loss 0.4593519 +| epoch 2 | 1183/ 8400 batches | train loss 0.4795738 +| epoch 2 | 1187/ 8400 batches | train loss 0.4078250 +| epoch 2 | 1191/ 8400 batches | train loss 0.4215099 +| epoch 2 | 1195/ 8400 batches | train loss 0.3962407 +| epoch 2 | 1199/ 8400 batches | train loss 0.4705251 +| epoch 2 | 1203/ 8400 batches | train loss 0.4627412 +| epoch 2 | 1207/ 8400 batches | train loss 0.3512225 +| epoch 2 | 1211/ 8400 batches | train loss 0.4330665 +| epoch 2 | 1215/ 8400 batches | train loss 0.4359043 +| epoch 2 | 1219/ 8400 batches | train loss 0.4376964 +| epoch 2 | 1223/ 8400 batches | train loss 0.4349837 +| epoch 2 | 1227/ 8400 batches | train loss 0.4838627 +| epoch 2 | 1231/ 8400 batches | train loss 0.3723041 +| epoch 2 | 1235/ 8400 batches | train loss 0.4821777 +| epoch 2 | 1239/ 8400 batches | train loss 0.4269943 +| epoch 2 | 1243/ 8400 batches | train loss 0.3354375 +| epoch 2 | 1247/ 8400 batches | train loss 0.5865080 +| epoch 2 | 1251/ 8400 batches | train loss 0.4188648 +| epoch 2 | 1255/ 8400 batches | train loss 0.5247092 +| epoch 2 | 1259/ 8400 batches | train loss 0.4072980 +| epoch 2 | 1263/ 8400 batches | train loss 0.4020610 +| epoch 2 | 1267/ 8400 batches | train loss 0.4816508 +| epoch 2 | 1271/ 8400 batches | train loss 0.5470686 +| epoch 2 | 1275/ 8400 batches | train loss 0.4755210 +| epoch 2 | 1279/ 8400 batches | train loss 0.3887359 +| epoch 2 | 1283/ 8400 batches | train loss 0.4710392 +| epoch 2 | 1287/ 8400 batches | train loss 0.4064912 +| epoch 2 | 1291/ 8400 batches | train loss 0.3751634 +| epoch 2 | 1295/ 8400 batches | train loss 0.4036083 +| epoch 2 | 1299/ 8400 batches | train loss 0.5535057 +| epoch 2 | 1303/ 8400 batches | train loss 0.5433820 +| epoch 2 | 1307/ 8400 batches | train loss 0.4638101 +| epoch 2 | 1311/ 8400 batches | train loss 0.4883563 +| epoch 2 | 1315/ 8400 batches | train loss 0.4992250 +| epoch 2 | 1319/ 8400 batches | train loss 0.4162630 +| epoch 2 | 1323/ 8400 batches | train loss 0.3456980 +| epoch 2 | 1327/ 8400 batches | train loss 0.3674986 +| epoch 2 | 1331/ 8400 batches | train loss 0.4538458 +| epoch 2 | 1335/ 8400 batches | train loss 0.5336376 +| epoch 2 | 1339/ 8400 batches | train loss 0.4020261 +| epoch 2 | 1343/ 8400 batches | train loss 0.5185934 +| epoch 2 | 1347/ 8400 batches | train loss 0.4959936 +| epoch 2 | 1351/ 8400 batches | train loss 0.4674609 +| epoch 2 | 1355/ 8400 batches | train loss 0.4312194 +| epoch 2 | 1359/ 8400 batches | train loss 0.4632466 +| epoch 2 | 1363/ 8400 batches | train loss 0.3606120 +| epoch 2 | 1367/ 8400 batches | train loss 0.4044445 +| epoch 2 | 1371/ 8400 batches | train loss 0.4706821 +| epoch 2 | 1375/ 8400 batches | train loss 0.4531625 +| epoch 2 | 1379/ 8400 batches | train loss 0.4192907 +| epoch 2 | 1383/ 8400 batches | train loss 0.4669491 +| epoch 2 | 1387/ 8400 batches | train loss 0.3758189 +| epoch 2 | 1391/ 8400 batches | train loss 0.3944284 +| epoch 2 | 1395/ 8400 batches | train loss 0.3629398 +| epoch 2 | 1399/ 8400 batches | train loss 0.5239087 +| epoch 2 | 1403/ 8400 batches | train loss 0.5012264 +| epoch 2 | 1407/ 8400 batches | train loss 0.3139382 +| epoch 2 | 1411/ 8400 batches | train loss 0.4362197 +| epoch 2 | 1415/ 8400 batches | train loss 0.4281553 +| epoch 2 | 1419/ 8400 batches | train loss 0.4235840 +| epoch 2 | 1423/ 8400 batches | train loss 0.5063881 +| epoch 2 | 1427/ 8400 batches | train loss 0.5164365 +| epoch 2 | 1431/ 8400 batches | train loss 0.4249440 +| epoch 2 | 1435/ 8400 batches | train loss 0.3371598 +| epoch 2 | 1439/ 8400 batches | train loss 0.3170366 +| epoch 2 | 1443/ 8400 batches | train loss 0.4539896 +| epoch 2 | 1447/ 8400 batches | train loss 0.3813105 +| epoch 2 | 1451/ 8400 batches | train loss 0.4089200 +| epoch 2 | 1455/ 8400 batches | train loss 0.4746923 +| epoch 2 | 1459/ 8400 batches | train loss 0.4093438 +| epoch 2 | 1463/ 8400 batches | train loss 0.4642181 +| epoch 2 | 1467/ 8400 batches | train loss 0.4700014 +| epoch 2 | 1471/ 8400 batches | train loss 0.4702843 +| epoch 2 | 1475/ 8400 batches | train loss 0.4755914 +| epoch 2 | 1479/ 8400 batches | train loss 0.4406857 +| epoch 2 | 1483/ 8400 batches | train loss 0.4450873 +| epoch 2 | 1487/ 8400 batches | train loss 0.4600547 +| epoch 2 | 1491/ 8400 batches | train loss 0.2616019 +| epoch 2 | 1495/ 8400 batches | train loss 0.4282423 +| epoch 2 | 1499/ 8400 batches | train loss 0.5593114 +| epoch 2 | 1503/ 8400 batches | train loss 0.4653451 +| epoch 2 | 1507/ 8400 batches | train loss 0.4496661 +| epoch 2 | 1511/ 8400 batches | train loss 0.3718447 +| epoch 2 | 1515/ 8400 batches | train loss 0.4210826 +| epoch 2 | 1519/ 8400 batches | train loss 0.5320799 +| epoch 2 | 1523/ 8400 batches | train loss 0.3963529 +| epoch 2 | 1527/ 8400 batches | train loss 0.5148510 +| epoch 2 | 1531/ 8400 batches | train loss 0.4118388 +| epoch 2 | 1535/ 8400 batches | train loss 0.4368929 +| epoch 2 | 1539/ 8400 batches | train loss 0.3124413 +| epoch 2 | 1543/ 8400 batches | train loss 0.4582497 +| epoch 2 | 1547/ 8400 batches | train loss 0.4761248 +| epoch 2 | 1551/ 8400 batches | train loss 0.4712870 +| epoch 2 | 1555/ 8400 batches | train loss 0.4306403 +| epoch 2 | 1559/ 8400 batches | train loss 0.4593154 +| epoch 2 | 1563/ 8400 batches | train loss 0.3824673 +| epoch 2 | 1567/ 8400 batches | train loss 0.4240760 +| epoch 2 | 1571/ 8400 batches | train loss 0.4112353 +| epoch 2 | 1575/ 8400 batches | train loss 0.4728423 +| epoch 2 | 1579/ 8400 batches | train loss 0.4714324 +| epoch 2 | 1583/ 8400 batches | train loss 0.4374827 +| epoch 2 | 1587/ 8400 batches | train loss 0.4118562 +| epoch 2 | 1591/ 8400 batches | train loss 0.3619718 +| epoch 2 | 1595/ 8400 batches | train loss 0.4193226 +| epoch 2 | 1599/ 8400 batches | train loss 0.4660319 +| epoch 2 | 1603/ 8400 batches | train loss 0.5293878 +| epoch 2 | 1607/ 8400 batches | train loss 0.3741421 +| epoch 2 | 1611/ 8400 batches | train loss 0.4473907 +| epoch 2 | 1615/ 8400 batches | train loss 0.4633519 +| epoch 2 | 1619/ 8400 batches | train loss 0.3626811 +| epoch 2 | 1623/ 8400 batches | train loss 0.4729465 +| epoch 2 | 1627/ 8400 batches | train loss 0.5050905 +| epoch 2 | 1631/ 8400 batches | train loss 0.3673269 +| epoch 2 | 1635/ 8400 batches | train loss 0.4978782 +| epoch 2 | 1639/ 8400 batches | train loss 0.5704988 +| epoch 2 | 1643/ 8400 batches | train loss 0.3561943 +| epoch 2 | 1647/ 8400 batches | train loss 0.5390673 +| epoch 2 | 1651/ 8400 batches | train loss 0.4449534 +| epoch 2 | 1655/ 8400 batches | train loss 0.4111512 +| epoch 2 | 1659/ 8400 batches | train loss 0.4718194 +| epoch 2 | 1663/ 8400 batches | train loss 0.4653298 +| epoch 2 | 1667/ 8400 batches | train loss 0.3664204 +| epoch 2 | 1671/ 8400 batches | train loss 0.5526690 +| epoch 2 | 1675/ 8400 batches | train loss 0.4579446 +| epoch 2 | 1679/ 8400 batches | train loss 0.4521244 +| epoch 2 | 1683/ 8400 batches | train loss 0.5004650 +| epoch 2 | 1687/ 8400 batches | train loss 0.5103920 +| epoch 2 | 1691/ 8400 batches | train loss 0.5000495 +| epoch 2 | 1695/ 8400 batches | train loss 0.4161584 +| epoch 2 | 1699/ 8400 batches | train loss 0.3901863 +| epoch 2 | 1703/ 8400 batches | train loss 0.4161804 +| epoch 2 | 1707/ 8400 batches | train loss 0.4820658 +| epoch 2 | 1711/ 8400 batches | train loss 0.4582626 +| epoch 2 | 1715/ 8400 batches | train loss 0.4028066 +| epoch 2 | 1719/ 8400 batches | train loss 0.2729694 +| epoch 2 | 1723/ 8400 batches | train loss 0.3686676 +| epoch 2 | 1727/ 8400 batches | train loss 0.5002515 +| epoch 2 | 1731/ 8400 batches | train loss 0.4763167 +| epoch 2 | 1735/ 8400 batches | train loss 0.3915549 +| epoch 2 | 1739/ 8400 batches | train loss 0.3859152 +| epoch 2 | 1743/ 8400 batches | train loss 0.4531714 +| epoch 2 | 1747/ 8400 batches | train loss 0.4608514 +| epoch 2 | 1751/ 8400 batches | train loss 0.4503944 +| epoch 2 | 1755/ 8400 batches | train loss 0.4380647 +| epoch 2 | 1759/ 8400 batches | train loss 0.3780472 +| epoch 2 | 1763/ 8400 batches | train loss 0.4305112 +| epoch 2 | 1767/ 8400 batches | train loss 0.4119217 +| epoch 2 | 1771/ 8400 batches | train loss 0.4534126 +| epoch 2 | 1775/ 8400 batches | train loss 0.4305370 +| epoch 2 | 1779/ 8400 batches | train loss 0.4757932 +| epoch 2 | 1783/ 8400 batches | train loss 0.4342138 +| epoch 2 | 1787/ 8400 batches | train loss 0.3815806 +| epoch 2 | 1791/ 8400 batches | train loss 0.4257295 +| epoch 2 | 1795/ 8400 batches | train loss 0.4549410 +| epoch 2 | 1799/ 8400 batches | train loss 0.4237005 +| epoch 2 | 1803/ 8400 batches | train loss 0.4638951 +| epoch 2 | 1807/ 8400 batches | train loss 0.4939986 +| epoch 2 | 1811/ 8400 batches | train loss 0.4806557 +| epoch 2 | 1815/ 8400 batches | train loss 0.4224730 +| epoch 2 | 1819/ 8400 batches | train loss 0.3995570 +| epoch 2 | 1823/ 8400 batches | train loss 0.4545873 +| epoch 2 | 1827/ 8400 batches | train loss 0.4383115 +| epoch 2 | 1831/ 8400 batches | train loss 0.4643232 +| epoch 2 | 1835/ 8400 batches | train loss 0.4903177 +| epoch 2 | 1839/ 8400 batches | train loss 0.4448816 +| epoch 2 | 1843/ 8400 batches | train loss 0.3891506 +| epoch 2 | 1847/ 8400 batches | train loss 0.4546967 +| epoch 2 | 1851/ 8400 batches | train loss 0.3940042 +| epoch 2 | 1855/ 8400 batches | train loss 0.3997847 +| epoch 2 | 1859/ 8400 batches | train loss 0.4980407 +| epoch 2 | 1863/ 8400 batches | train loss 0.3927789 +| epoch 2 | 1867/ 8400 batches | train loss 0.4443839 +| epoch 2 | 1871/ 8400 batches | train loss 0.3809623 +| epoch 2 | 1875/ 8400 batches | train loss 0.5873744 +| epoch 2 | 1879/ 8400 batches | train loss 0.4582808 +| epoch 2 | 1883/ 8400 batches | train loss 0.5191520 +| epoch 2 | 1887/ 8400 batches | train loss 0.3847778 +| epoch 2 | 1891/ 8400 batches | train loss 0.4070959 +| epoch 2 | 1895/ 8400 batches | train loss 0.6131593 +| epoch 2 | 1899/ 8400 batches | train loss 0.3944704 +| epoch 2 | 1903/ 8400 batches | train loss 0.5252680 +| epoch 2 | 1907/ 8400 batches | train loss 0.4706497 +| epoch 2 | 1911/ 8400 batches | train loss 0.4717390 +| epoch 2 | 1915/ 8400 batches | train loss 0.4497449 +| epoch 2 | 1919/ 8400 batches | train loss 0.4651391 +| epoch 2 | 1923/ 8400 batches | train loss 0.4246832 +| epoch 2 | 1927/ 8400 batches | train loss 0.4311874 +| epoch 2 | 1931/ 8400 batches | train loss 0.4685150 +| epoch 2 | 1935/ 8400 batches | train loss 0.4255286 +| epoch 2 | 1939/ 8400 batches | train loss 0.5086305 +| epoch 2 | 1943/ 8400 batches | train loss 0.5912720 +| epoch 2 | 1947/ 8400 batches | train loss 0.4346604 +| epoch 2 | 1951/ 8400 batches | train loss 0.5346561 +| epoch 2 | 1955/ 8400 batches | train loss 0.5503963 +| epoch 2 | 1959/ 8400 batches | train loss 0.4287931 +| epoch 2 | 1963/ 8400 batches | train loss 0.5419626 +| epoch 2 | 1967/ 8400 batches | train loss 0.4463885 +| epoch 2 | 1971/ 8400 batches | train loss 0.4626538 +| epoch 2 | 1975/ 8400 batches | train loss 0.4146925 +| epoch 2 | 1979/ 8400 batches | train loss 0.4161415 +| epoch 2 | 1983/ 8400 batches | train loss 0.4822129 +| epoch 2 | 1987/ 8400 batches | train loss 0.4448012 +| epoch 2 | 1991/ 8400 batches | train loss 0.4413242 +| epoch 2 | 1995/ 8400 batches | train loss 0.4917670 +| epoch 2 | 1999/ 8400 batches | train loss 0.4742963 +| epoch 2 | 2003/ 8400 batches | train loss 0.4139876 +| epoch 2 | 2007/ 8400 batches | train loss 0.5582055 +| epoch 2 | 2011/ 8400 batches | train loss 0.4431755 +| epoch 2 | 2015/ 8400 batches | train loss 0.4207576 +| epoch 2 | 2019/ 8400 batches | train loss 0.3958949 +| epoch 2 | 2023/ 8400 batches | train loss 0.4760965 +| epoch 2 | 2027/ 8400 batches | train loss 0.5027477 +| epoch 2 | 2031/ 8400 batches | train loss 0.4149530 +| epoch 2 | 2035/ 8400 batches | train loss 0.4750271 +| epoch 2 | 2039/ 8400 batches | train loss 0.5292153 +| epoch 2 | 2043/ 8400 batches | train loss 0.4966862 +| epoch 2 | 2047/ 8400 batches | train loss 0.5698121 +| epoch 2 | 2051/ 8400 batches | train loss 0.3889751 +| epoch 2 | 2055/ 8400 batches | train loss 0.4397038 +| epoch 2 | 2059/ 8400 batches | train loss 0.4470300 +| epoch 2 | 2063/ 8400 batches | train loss 0.3727342 +| epoch 2 | 2067/ 8400 batches | train loss 0.3188586 +| epoch 2 | 2071/ 8400 batches | train loss 0.5607458 +| epoch 2 | 2075/ 8400 batches | train loss 0.4731276 +| epoch 2 | 2079/ 8400 batches | train loss 0.3887605 +| epoch 2 | 2083/ 8400 batches | train loss 0.5339062 +| epoch 2 | 2087/ 8400 batches | train loss 0.5356481 +| epoch 2 | 2091/ 8400 batches | train loss 0.4814438 +| epoch 2 | 2095/ 8400 batches | train loss 0.4101045 +| epoch 2 | 2099/ 8400 batches | train loss 0.4701767 +| epoch 2 | 2103/ 8400 batches | train loss 0.4122107 +| epoch 2 | 2107/ 8400 batches | train loss 0.4508008 +| epoch 2 | 2111/ 8400 batches | train loss 0.4667869 +| epoch 2 | 2115/ 8400 batches | train loss 0.2143324 +| epoch 2 | 2119/ 8400 batches | train loss 0.3947688 +| epoch 2 | 2123/ 8400 batches | train loss 0.4373552 +| epoch 2 | 2127/ 8400 batches | train loss 0.4537326 +| epoch 2 | 2131/ 8400 batches | train loss 0.4559444 +| epoch 2 | 2135/ 8400 batches | train loss 0.4616030 +| epoch 2 | 2139/ 8400 batches | train loss 0.3662320 +| epoch 2 | 2143/ 8400 batches | train loss 0.5719053 +| epoch 2 | 2147/ 8400 batches | train loss 0.4585846 +| epoch 2 | 2151/ 8400 batches | train loss 0.4105836 +| epoch 2 | 2155/ 8400 batches | train loss 0.5092403 +| epoch 2 | 2159/ 8400 batches | train loss 0.4731986 +| epoch 2 | 2163/ 8400 batches | train loss 0.4637911 +| epoch 2 | 2167/ 8400 batches | train loss 0.4475716 +| epoch 2 | 2171/ 8400 batches | train loss 0.4768647 +| epoch 2 | 2175/ 8400 batches | train loss 0.4862597 +| epoch 2 | 2179/ 8400 batches | train loss 0.4596297 +| epoch 2 | 2183/ 8400 batches | train loss 0.4336993 +| epoch 2 | 2187/ 8400 batches | train loss 0.3734425 +| epoch 2 | 2191/ 8400 batches | train loss 0.4670311 +| epoch 2 | 2195/ 8400 batches | train loss 0.4298537 +| epoch 2 | 2199/ 8400 batches | train loss 0.4723849 +| epoch 2 | 2203/ 8400 batches | train loss 0.5162256 +| epoch 2 | 2207/ 8400 batches | train loss 0.4313271 +| epoch 2 | 2211/ 8400 batches | train loss 0.4430626 +| epoch 2 | 2215/ 8400 batches | train loss 0.4497205 +| epoch 2 | 2219/ 8400 batches | train loss 0.4169797 +| epoch 2 | 2223/ 8400 batches | train loss 0.3818238 +| epoch 2 | 2227/ 8400 batches | train loss 0.4504856 +| epoch 2 | 2231/ 8400 batches | train loss 0.3964670 +| epoch 2 | 2235/ 8400 batches | train loss 0.3951694 +| epoch 2 | 2239/ 8400 batches | train loss 0.4513684 +| epoch 2 | 2243/ 8400 batches | train loss 0.4928649 +| epoch 2 | 2247/ 8400 batches | train loss 0.4825702 +| epoch 2 | 2251/ 8400 batches | train loss 0.4574444 +| epoch 2 | 2255/ 8400 batches | train loss 0.4090469 +| epoch 2 | 2259/ 8400 batches | train loss 0.4060106 +| epoch 2 | 2263/ 8400 batches | train loss 0.3959404 +| epoch 2 | 2267/ 8400 batches | train loss 0.5127560 +| epoch 2 | 2271/ 8400 batches | train loss 0.3728122 +| epoch 2 | 2275/ 8400 batches | train loss 0.4344850 +| epoch 2 | 2279/ 8400 batches | train loss 0.4515443 +| epoch 2 | 2283/ 8400 batches | train loss 0.4058890 +| epoch 2 | 2287/ 8400 batches | train loss 0.5123663 +| epoch 2 | 2291/ 8400 batches | train loss 0.5560341 +| epoch 2 | 2295/ 8400 batches | train loss 0.3791836 +| epoch 2 | 2299/ 8400 batches | train loss 0.3373597 +| epoch 2 | 2303/ 8400 batches | train loss 0.5236833 +| epoch 2 | 2307/ 8400 batches | train loss 0.4813696 +| epoch 2 | 2311/ 8400 batches | train loss 0.3737037 +| epoch 2 | 2315/ 8400 batches | train loss 0.3725401 +| epoch 2 | 2319/ 8400 batches | train loss 0.4034603 +| epoch 2 | 2323/ 8400 batches | train loss 0.4832204 +| epoch 2 | 2327/ 8400 batches | train loss 0.5333899 +| epoch 2 | 2331/ 8400 batches | train loss 0.4723452 +| epoch 2 | 2335/ 8400 batches | train loss 0.4705627 +| epoch 2 | 2339/ 8400 batches | train loss 0.3329767 +| epoch 2 | 2343/ 8400 batches | train loss 0.4100243 +| epoch 2 | 2347/ 8400 batches | train loss 0.3725949 +| epoch 2 | 2351/ 8400 batches | train loss 0.4263190 +| epoch 2 | 2355/ 8400 batches | train loss 0.5240631 +| epoch 2 | 2359/ 8400 batches | train loss 0.4374815 +| epoch 2 | 2363/ 8400 batches | train loss 0.4712237 +| epoch 2 | 2367/ 8400 batches | train loss 0.5331589 +| epoch 2 | 2371/ 8400 batches | train loss 0.5267990 +| epoch 2 | 2375/ 8400 batches | train loss 0.4918198 +| epoch 2 | 2379/ 8400 batches | train loss 0.4619983 +| epoch 2 | 2383/ 8400 batches | train loss 0.3785875 +| epoch 2 | 2387/ 8400 batches | train loss 0.4411524 +| epoch 2 | 2391/ 8400 batches | train loss 0.4256923 +| epoch 2 | 2395/ 8400 batches | train loss 0.3076473 +| epoch 2 | 2399/ 8400 batches | train loss 0.5009260 +| epoch 2 | 2403/ 8400 batches | train loss 0.4876156 +| epoch 2 | 2407/ 8400 batches | train loss 0.4308974 +| epoch 2 | 2411/ 8400 batches | train loss 0.5318623 +| epoch 2 | 2415/ 8400 batches | train loss 0.3626277 +| epoch 2 | 2419/ 8400 batches | train loss 0.4231309 +| epoch 2 | 2423/ 8400 batches | train loss 0.3579985 +| epoch 2 | 2427/ 8400 batches | train loss 0.4682678 +| epoch 2 | 2431/ 8400 batches | train loss 0.4244424 +| epoch 2 | 2435/ 8400 batches | train loss 0.4564338 +| epoch 2 | 2439/ 8400 batches | train loss 0.3757017 +| epoch 2 | 2443/ 8400 batches | train loss 0.3983319 +| epoch 2 | 2447/ 8400 batches | train loss 0.4779848 +| epoch 2 | 2451/ 8400 batches | train loss 0.3960901 +| epoch 2 | 2455/ 8400 batches | train loss 0.5562279 +| epoch 2 | 2459/ 8400 batches | train loss 0.4597537 +| epoch 2 | 2463/ 8400 batches | train loss 0.4376361 +| epoch 2 | 2467/ 8400 batches | train loss 0.4283309 +| epoch 2 | 2471/ 8400 batches | train loss 0.3831564 +| epoch 2 | 2475/ 8400 batches | train loss 0.4103609 +| epoch 2 | 2479/ 8400 batches | train loss 0.3543527 +| epoch 2 | 2483/ 8400 batches | train loss 0.5691264 +| epoch 2 | 2487/ 8400 batches | train loss 0.4151757 +| epoch 2 | 2491/ 8400 batches | train loss 0.4558417 +| epoch 2 | 2495/ 8400 batches | train loss 0.4616710 +| epoch 2 | 2499/ 8400 batches | train loss 0.5183980 +| epoch 2 | 2503/ 8400 batches | train loss 0.4379488 +| epoch 2 | 2507/ 8400 batches | train loss 0.4138105 +| epoch 2 | 2511/ 8400 batches | train loss 0.3361149 +| epoch 2 | 2515/ 8400 batches | train loss 0.4300939 +| epoch 2 | 2519/ 8400 batches | train loss 0.4751780 +| epoch 2 | 2523/ 8400 batches | train loss 0.4211682 +| epoch 2 | 2527/ 8400 batches | train loss 0.4434839 +| epoch 2 | 2531/ 8400 batches | train loss 0.4416737 +| epoch 2 | 2535/ 8400 batches | train loss 0.4451517 +| epoch 2 | 2539/ 8400 batches | train loss 0.4702888 +| epoch 2 | 2543/ 8400 batches | train loss 0.3904930 +| epoch 2 | 2547/ 8400 batches | train loss 0.4303445 +| epoch 2 | 2551/ 8400 batches | train loss 0.3927836 +| epoch 2 | 2555/ 8400 batches | train loss 0.4349579 +| epoch 2 | 2559/ 8400 batches | train loss 0.3610560 +| epoch 2 | 2563/ 8400 batches | train loss 0.4330505 +| epoch 2 | 2567/ 8400 batches | train loss 0.4858794 +| epoch 2 | 2571/ 8400 batches | train loss 0.4886055 +| epoch 2 | 2575/ 8400 batches | train loss 0.5184703 +| epoch 2 | 2579/ 8400 batches | train loss 0.4556911 +| epoch 2 | 2583/ 8400 batches | train loss 0.5403132 +| epoch 2 | 2587/ 8400 batches | train loss 0.4723220 +| epoch 2 | 2591/ 8400 batches | train loss 0.4997066 +| epoch 2 | 2595/ 8400 batches | train loss 0.5000489 +| epoch 2 | 2599/ 8400 batches | train loss 0.5090556 +| epoch 2 | 2603/ 8400 batches | train loss 0.5045449 +| epoch 2 | 2607/ 8400 batches | train loss 0.5235513 +| epoch 2 | 2611/ 8400 batches | train loss 0.3957551 +| epoch 2 | 2615/ 8400 batches | train loss 0.4708627 +| epoch 2 | 2619/ 8400 batches | train loss 0.4572501 +| epoch 2 | 2623/ 8400 batches | train loss 0.4785057 +| epoch 2 | 2627/ 8400 batches | train loss 0.5041785 +| epoch 2 | 2631/ 8400 batches | train loss 0.5193830 +| epoch 2 | 2635/ 8400 batches | train loss 0.4390087 +| epoch 2 | 2639/ 8400 batches | train loss 0.5831690 +| epoch 2 | 2643/ 8400 batches | train loss 0.5178361 +| epoch 2 | 2647/ 8400 batches | train loss 0.3740042 +| epoch 2 | 2651/ 8400 batches | train loss 0.5135870 +| epoch 2 | 2655/ 8400 batches | train loss 0.3942801 +| epoch 2 | 2659/ 8400 batches | train loss 0.4371707 +| epoch 2 | 2663/ 8400 batches | train loss 0.4285258 +| epoch 2 | 2667/ 8400 batches | train loss 0.4941616 +| epoch 2 | 2671/ 8400 batches | train loss 0.3867606 +| epoch 2 | 2675/ 8400 batches | train loss 0.4475248 +| epoch 2 | 2679/ 8400 batches | train loss 0.3320869 +| epoch 2 | 2683/ 8400 batches | train loss 0.4807770 +| epoch 2 | 2687/ 8400 batches | train loss 0.5180917 +| epoch 2 | 2691/ 8400 batches | train loss 0.4300506 +| epoch 2 | 2695/ 8400 batches | train loss 0.4317939 +| epoch 2 | 2699/ 8400 batches | train loss 0.4443393 +| epoch 2 | 2703/ 8400 batches | train loss 0.5357560 +| epoch 2 | 2707/ 8400 batches | train loss 0.4358030 +| epoch 2 | 2711/ 8400 batches | train loss 0.4751561 +| epoch 2 | 2715/ 8400 batches | train loss 0.4277520 +| epoch 2 | 2719/ 8400 batches | train loss 0.4164885 +| epoch 2 | 2723/ 8400 batches | train loss 0.4506117 +| epoch 2 | 2727/ 8400 batches | train loss 0.4321839 +| epoch 2 | 2731/ 8400 batches | train loss 0.4312314 +| epoch 2 | 2735/ 8400 batches | train loss 0.3950862 +| epoch 2 | 2739/ 8400 batches | train loss 0.4306426 +| epoch 2 | 2743/ 8400 batches | train loss 0.4674922 +| epoch 2 | 2747/ 8400 batches | train loss 0.4805770 +| epoch 2 | 2751/ 8400 batches | train loss 0.4831257 +| epoch 2 | 2755/ 8400 batches | train loss 0.4817277 +| epoch 2 | 2759/ 8400 batches | train loss 0.4310803 +| epoch 2 | 2763/ 8400 batches | train loss 0.4508791 +| epoch 2 | 2767/ 8400 batches | train loss 0.4712352 +| epoch 2 | 2771/ 8400 batches | train loss 0.5582775 +| epoch 2 | 2775/ 8400 batches | train loss 0.5508716 +| epoch 2 | 2779/ 8400 batches | train loss 0.5180107 +| epoch 2 | 2783/ 8400 batches | train loss 0.4264150 +| epoch 2 | 2787/ 8400 batches | train loss 0.4656515 +| epoch 2 | 2791/ 8400 batches | train loss 0.3497076 +| epoch 2 | 2795/ 8400 batches | train loss 0.4485473 +| epoch 2 | 2799/ 8400 batches | train loss 0.4530905 +| epoch 2 | 2803/ 8400 batches | train loss 0.4325117 +| epoch 2 | 2807/ 8400 batches | train loss 0.4886319 +| epoch 2 | 2811/ 8400 batches | train loss 0.4344096 +| epoch 2 | 2815/ 8400 batches | train loss 0.4928180 +| epoch 2 | 2819/ 8400 batches | train loss 0.4483256 +| epoch 2 | 2823/ 8400 batches | train loss 0.4081751 +| epoch 2 | 2827/ 8400 batches | train loss 0.3725102 +| epoch 2 | 2831/ 8400 batches | train loss 0.5219651 +| epoch 2 | 2835/ 8400 batches | train loss 0.5058998 +| epoch 2 | 2839/ 8400 batches | train loss 0.5843010 +| epoch 2 | 2843/ 8400 batches | train loss 0.4584443 +| epoch 2 | 2847/ 8400 batches | train loss 0.3840663 +| epoch 2 | 2851/ 8400 batches | train loss 0.4877275 +| epoch 2 | 2855/ 8400 batches | train loss 0.4117020 +| epoch 2 | 2859/ 8400 batches | train loss 0.4641625 +| epoch 2 | 2863/ 8400 batches | train loss 0.4184980 +| epoch 2 | 2867/ 8400 batches | train loss 0.4959276 +| epoch 2 | 2871/ 8400 batches | train loss 0.4092624 +| epoch 2 | 2875/ 8400 batches | train loss 0.4982584 +| epoch 2 | 2879/ 8400 batches | train loss 0.5247439 +| epoch 2 | 2883/ 8400 batches | train loss 0.4652785 +| epoch 2 | 2887/ 8400 batches | train loss 0.4150716 +| epoch 2 | 2891/ 8400 batches | train loss 0.4075310 +| epoch 2 | 2895/ 8400 batches | train loss 0.4404382 +| epoch 2 | 2899/ 8400 batches | train loss 0.4278298 +| epoch 2 | 2903/ 8400 batches | train loss 0.4484103 +| epoch 2 | 2907/ 8400 batches | train loss 0.3707625 +| epoch 2 | 2911/ 8400 batches | train loss 0.4907174 +| epoch 2 | 2915/ 8400 batches | train loss 0.4946946 +| epoch 2 | 2919/ 8400 batches | train loss 0.4390634 +| epoch 2 | 2923/ 8400 batches | train loss 0.3483346 +| epoch 2 | 2927/ 8400 batches | train loss 0.5077058 +| epoch 2 | 2931/ 8400 batches | train loss 0.4573272 +| epoch 2 | 2935/ 8400 batches | train loss 0.3673048 +| epoch 2 | 2939/ 8400 batches | train loss 0.4479364 +| epoch 2 | 2943/ 8400 batches | train loss 0.4130884 +| epoch 2 | 2947/ 8400 batches | train loss 0.3733097 +| epoch 2 | 2951/ 8400 batches | train loss 0.4798008 +| epoch 2 | 2955/ 8400 batches | train loss 0.3855757 +| epoch 2 | 2959/ 8400 batches | train loss 0.4902949 +| epoch 2 | 2963/ 8400 batches | train loss 0.4441907 +| epoch 2 | 2967/ 8400 batches | train loss 0.4096851 +| epoch 2 | 2971/ 8400 batches | train loss 0.5144622 +| epoch 2 | 2975/ 8400 batches | train loss 0.4115358 +| epoch 2 | 2979/ 8400 batches | train loss 0.6748941 +| epoch 2 | 2983/ 8400 batches | train loss 0.5024633 +| epoch 2 | 2987/ 8400 batches | train loss 0.4045162 +| epoch 2 | 2991/ 8400 batches | train loss 0.3709346 +| epoch 2 | 2995/ 8400 batches | train loss 0.5052501 +| epoch 2 | 2999/ 8400 batches | train loss 0.4035032 +| epoch 2 | 3003/ 8400 batches | train loss 0.3622878 +| epoch 2 | 3007/ 8400 batches | train loss 0.4929671 +| epoch 2 | 3011/ 8400 batches | train loss 0.3963973 +| epoch 2 | 3015/ 8400 batches | train loss 0.5657131 +| epoch 2 | 3019/ 8400 batches | train loss 0.4157056 +| epoch 2 | 3023/ 8400 batches | train loss 0.3882734 +| epoch 2 | 3027/ 8400 batches | train loss 0.4478904 +| epoch 2 | 3031/ 8400 batches | train loss 0.3227888 +| epoch 2 | 3035/ 8400 batches | train loss 0.5369349 +| epoch 2 | 3039/ 8400 batches | train loss 0.4924745 +| epoch 2 | 3043/ 8400 batches | train loss 0.4228534 +| epoch 2 | 3047/ 8400 batches | train loss 0.3716927 +| epoch 2 | 3051/ 8400 batches | train loss 0.4104903 +| epoch 2 | 3055/ 8400 batches | train loss 0.3956451 +| epoch 2 | 3059/ 8400 batches | train loss 0.3932646 +| epoch 2 | 3063/ 8400 batches | train loss 0.4750467 +| epoch 2 | 3067/ 8400 batches | train loss 0.3873146 +| epoch 2 | 3071/ 8400 batches | train loss 0.6478589 +| epoch 2 | 3075/ 8400 batches | train loss 0.4285935 +| epoch 2 | 3079/ 8400 batches | train loss 0.2781633 +| epoch 2 | 3083/ 8400 batches | train loss 0.4934765 +| epoch 2 | 3087/ 8400 batches | train loss 0.3701271 +| epoch 2 | 3091/ 8400 batches | train loss 0.4716410 +| epoch 2 | 3095/ 8400 batches | train loss 0.3254571 +| epoch 2 | 3099/ 8400 batches | train loss 0.4492360 +| epoch 2 | 3103/ 8400 batches | train loss 0.4457809 +| epoch 2 | 3107/ 8400 batches | train loss 0.4130289 +| epoch 2 | 3111/ 8400 batches | train loss 0.4635864 +| epoch 2 | 3115/ 8400 batches | train loss 0.4598036 +| epoch 2 | 3119/ 8400 batches | train loss 0.5011390 +| epoch 2 | 3123/ 8400 batches | train loss 0.3297785 +| epoch 2 | 3127/ 8400 batches | train loss 0.4577759 +| epoch 2 | 3131/ 8400 batches | train loss 0.4635099 +| epoch 2 | 3135/ 8400 batches | train loss 0.5237479 +| epoch 2 | 3139/ 8400 batches | train loss 0.4745194 +| epoch 2 | 3143/ 8400 batches | train loss 0.3641829 +| epoch 2 | 3147/ 8400 batches | train loss 0.4883734 +| epoch 2 | 3151/ 8400 batches | train loss 0.4471112 +| epoch 2 | 3155/ 8400 batches | train loss 0.4749020 +| epoch 2 | 3159/ 8400 batches | train loss 0.4880615 +| epoch 2 | 3163/ 8400 batches | train loss 0.3498767 +| epoch 2 | 3167/ 8400 batches | train loss 0.4509004 +| epoch 2 | 3171/ 8400 batches | train loss 0.4799891 +| epoch 2 | 3175/ 8400 batches | train loss 0.3928769 +| epoch 2 | 3179/ 8400 batches | train loss 0.5041875 +| epoch 2 | 3183/ 8400 batches | train loss 0.3665187 +| epoch 2 | 3187/ 8400 batches | train loss 0.3920051 +| epoch 2 | 3191/ 8400 batches | train loss 0.4790118 +| epoch 2 | 3195/ 8400 batches | train loss 0.4413741 +| epoch 2 | 3199/ 8400 batches | train loss 0.4690083 +| epoch 2 | 3203/ 8400 batches | train loss 0.4320871 +| epoch 2 | 3207/ 8400 batches | train loss 0.4530165 +| epoch 2 | 3211/ 8400 batches | train loss 0.3950751 +| epoch 2 | 3215/ 8400 batches | train loss 0.3630276 +| epoch 2 | 3219/ 8400 batches | train loss 0.3517985 +| epoch 2 | 3223/ 8400 batches | train loss 0.5181034 +| epoch 2 | 3227/ 8400 batches | train loss 0.4291772 +| epoch 2 | 3231/ 8400 batches | train loss 0.4054488 +| epoch 2 | 3235/ 8400 batches | train loss 0.4083020 +| epoch 2 | 3239/ 8400 batches | train loss 0.4151283 +| epoch 2 | 3243/ 8400 batches | train loss 0.4032918 +| epoch 2 | 3247/ 8400 batches | train loss 0.4174827 +| epoch 2 | 3251/ 8400 batches | train loss 0.5101228 +| epoch 2 | 3255/ 8400 batches | train loss 0.5237022 +| epoch 2 | 3259/ 8400 batches | train loss 0.4688475 +| epoch 2 | 3263/ 8400 batches | train loss 0.4632887 +| epoch 2 | 3267/ 8400 batches | train loss 0.4627576 +| epoch 2 | 3271/ 8400 batches | train loss 0.5243457 +| epoch 2 | 3275/ 8400 batches | train loss 0.4495178 +| epoch 2 | 3279/ 8400 batches | train loss 0.4859238 +| epoch 2 | 3283/ 8400 batches | train loss 0.4362285 +| epoch 2 | 3287/ 8400 batches | train loss 0.4094796 +| epoch 2 | 3291/ 8400 batches | train loss 0.4326444 +| epoch 2 | 3295/ 8400 batches | train loss 0.4216893 +| epoch 2 | 3299/ 8400 batches | train loss 0.5119387 +| epoch 2 | 3303/ 8400 batches | train loss 0.4498487 +| epoch 2 | 3307/ 8400 batches | train loss 0.4516041 +| epoch 2 | 3311/ 8400 batches | train loss 0.4594470 +| epoch 2 | 3315/ 8400 batches | train loss 0.3816240 +| epoch 2 | 3319/ 8400 batches | train loss 0.6720239 +| epoch 2 | 3323/ 8400 batches | train loss 0.5823525 +| epoch 2 | 3327/ 8400 batches | train loss 0.4298376 +| epoch 2 | 3331/ 8400 batches | train loss 0.4877788 +| epoch 2 | 3335/ 8400 batches | train loss 0.4392535 +| epoch 2 | 3339/ 8400 batches | train loss 0.3096930 +| epoch 2 | 3343/ 8400 batches | train loss 0.3424310 +| epoch 2 | 3347/ 8400 batches | train loss 0.4668173 +| epoch 2 | 3351/ 8400 batches | train loss 0.4585240 +| epoch 2 | 3355/ 8400 batches | train loss 0.4213493 +| epoch 2 | 3359/ 8400 batches | train loss 0.4939837 +| epoch 2 | 3363/ 8400 batches | train loss 0.4092460 +| epoch 2 | 3367/ 8400 batches | train loss 0.4064318 +| epoch 2 | 3371/ 8400 batches | train loss 0.4846633 +| epoch 2 | 3375/ 8400 batches | train loss 0.5560588 +| epoch 2 | 3379/ 8400 batches | train loss 0.4454869 +| epoch 2 | 3383/ 8400 batches | train loss 0.4457274 +| epoch 2 | 3387/ 8400 batches | train loss 0.5469162 +| epoch 2 | 3391/ 8400 batches | train loss 0.4458497 +| epoch 2 | 3395/ 8400 batches | train loss 0.4730256 +| epoch 2 | 3399/ 8400 batches | train loss 0.3663318 +| epoch 2 | 3403/ 8400 batches | train loss 0.4726309 +| epoch 2 | 3407/ 8400 batches | train loss 0.4763103 +| epoch 2 | 3411/ 8400 batches | train loss 0.4753350 +| epoch 2 | 3415/ 8400 batches | train loss 0.5300300 +| epoch 2 | 3419/ 8400 batches | train loss 0.4333923 +| epoch 2 | 3423/ 8400 batches | train loss 0.3762836 +| epoch 2 | 3427/ 8400 batches | train loss 0.4090953 +| epoch 2 | 3431/ 8400 batches | train loss 0.4668026 +| epoch 2 | 3435/ 8400 batches | train loss 0.4507367 +| epoch 2 | 3439/ 8400 batches | train loss 0.4178969 +| epoch 2 | 3443/ 8400 batches | train loss 0.5605090 +| epoch 2 | 3447/ 8400 batches | train loss 0.4105872 +| epoch 2 | 3451/ 8400 batches | train loss 0.4486718 +| epoch 2 | 3455/ 8400 batches | train loss 0.3615437 +| epoch 2 | 3459/ 8400 batches | train loss 0.4541378 +| epoch 2 | 3463/ 8400 batches | train loss 0.6108741 +| epoch 2 | 3467/ 8400 batches | train loss 0.4152213 +| epoch 2 | 3471/ 8400 batches | train loss 0.4427556 +| epoch 2 | 3475/ 8400 batches | train loss 0.4267649 +| epoch 2 | 3479/ 8400 batches | train loss 0.4859678 +| epoch 2 | 3483/ 8400 batches | train loss 0.3897852 +| epoch 2 | 3487/ 8400 batches | train loss 0.4281293 +| epoch 2 | 3491/ 8400 batches | train loss 0.4909867 +| epoch 2 | 3495/ 8400 batches | train loss 0.3842173 +| epoch 2 | 3499/ 8400 batches | train loss 0.3881365 +| epoch 2 | 3503/ 8400 batches | train loss 0.4284868 +| epoch 2 | 3507/ 8400 batches | train loss 0.4450807 +| epoch 2 | 3511/ 8400 batches | train loss 0.4542865 +| epoch 2 | 3515/ 8400 batches | train loss 0.4495303 +| epoch 2 | 3519/ 8400 batches | train loss 0.3948452 +| epoch 2 | 3523/ 8400 batches | train loss 0.4570103 +| epoch 2 | 3527/ 8400 batches | train loss 0.4923691 +| epoch 2 | 3531/ 8400 batches | train loss 0.4131570 +| epoch 2 | 3535/ 8400 batches | train loss 0.4356120 +| epoch 2 | 3539/ 8400 batches | train loss 0.3828278 +| epoch 2 | 3543/ 8400 batches | train loss 0.2731416 +| epoch 2 | 3547/ 8400 batches | train loss 0.4387490 +| epoch 2 | 3551/ 8400 batches | train loss 0.4444008 +| epoch 2 | 3555/ 8400 batches | train loss 0.4158837 +| epoch 2 | 3559/ 8400 batches | train loss 0.4416153 +| epoch 2 | 3563/ 8400 batches | train loss 0.3753605 +| epoch 2 | 3567/ 8400 batches | train loss 0.4611140 +| epoch 2 | 3571/ 8400 batches | train loss 0.4466306 +| epoch 2 | 3575/ 8400 batches | train loss 0.3497829 +| epoch 2 | 3579/ 8400 batches | train loss 0.4624124 +| epoch 2 | 3583/ 8400 batches | train loss 0.4191659 +| epoch 2 | 3587/ 8400 batches | train loss 0.4304573 +| epoch 2 | 3591/ 8400 batches | train loss 0.4592211 +| epoch 2 | 3595/ 8400 batches | train loss 0.4174759 +| epoch 2 | 3599/ 8400 batches | train loss 0.4355614 +| epoch 2 | 3603/ 8400 batches | train loss 0.4510855 +| epoch 2 | 3607/ 8400 batches | train loss 0.3966278 +| epoch 2 | 3611/ 8400 batches | train loss 0.3538517 +| epoch 2 | 3615/ 8400 batches | train loss 0.4143755 +| epoch 2 | 3619/ 8400 batches | train loss 0.4548391 +| epoch 2 | 3623/ 8400 batches | train loss 0.4764610 +| epoch 2 | 3627/ 8400 batches | train loss 0.4943201 +| epoch 2 | 3631/ 8400 batches | train loss 0.4136826 +| epoch 2 | 3635/ 8400 batches | train loss 0.3514799 +| epoch 2 | 3639/ 8400 batches | train loss 0.5122958 +| epoch 2 | 3643/ 8400 batches | train loss 0.5039941 +| epoch 2 | 3647/ 8400 batches | train loss 0.4597276 +| epoch 2 | 3651/ 8400 batches | train loss 0.4132062 +| epoch 2 | 3655/ 8400 batches | train loss 0.4264012 +| epoch 2 | 3659/ 8400 batches | train loss 0.4049883 +| epoch 2 | 3663/ 8400 batches | train loss 0.4844509 +| epoch 2 | 3667/ 8400 batches | train loss 0.4997740 +| epoch 2 | 3671/ 8400 batches | train loss 0.3916408 +| epoch 2 | 3675/ 8400 batches | train loss 0.3800414 +| epoch 2 | 3679/ 8400 batches | train loss 0.4604605 +| epoch 2 | 3683/ 8400 batches | train loss 0.5078024 +| epoch 2 | 3687/ 8400 batches | train loss 0.4328043 +| epoch 2 | 3691/ 8400 batches | train loss 0.4842319 +| epoch 2 | 3695/ 8400 batches | train loss 0.3957960 +| epoch 2 | 3699/ 8400 batches | train loss 0.4826290 +| epoch 2 | 3703/ 8400 batches | train loss 0.5271987 +| epoch 2 | 3707/ 8400 batches | train loss 0.3513002 +| epoch 2 | 3711/ 8400 batches | train loss 0.4567431 +| epoch 2 | 3715/ 8400 batches | train loss 0.4405571 +| epoch 2 | 3719/ 8400 batches | train loss 0.4542456 +| epoch 2 | 3723/ 8400 batches | train loss 0.4638853 +| epoch 2 | 3727/ 8400 batches | train loss 0.3562478 +| epoch 2 | 3731/ 8400 batches | train loss 0.4424441 +| epoch 2 | 3735/ 8400 batches | train loss 0.4576424 +| epoch 2 | 3739/ 8400 batches | train loss 0.4608973 +| epoch 2 | 3743/ 8400 batches | train loss 0.5259956 +| epoch 2 | 3747/ 8400 batches | train loss 0.4336258 +| epoch 2 | 3751/ 8400 batches | train loss 0.4974649 +| epoch 2 | 3755/ 8400 batches | train loss 0.4551476 +| epoch 2 | 3759/ 8400 batches | train loss 0.3465658 +| epoch 2 | 3763/ 8400 batches | train loss 0.4783941 +| epoch 2 | 3767/ 8400 batches | train loss 0.4451891 +| epoch 2 | 3771/ 8400 batches | train loss 0.4195932 +| epoch 2 | 3775/ 8400 batches | train loss 0.3321321 +| epoch 2 | 3779/ 8400 batches | train loss 0.3584206 +| epoch 2 | 3783/ 8400 batches | train loss 0.2934348 +| epoch 2 | 3787/ 8400 batches | train loss 0.5631465 +| epoch 2 | 3791/ 8400 batches | train loss 0.5880545 +| epoch 2 | 3795/ 8400 batches | train loss 0.4873764 +| epoch 2 | 3799/ 8400 batches | train loss 0.2857625 +| epoch 2 | 3803/ 8400 batches | train loss 0.4494474 +| epoch 2 | 3807/ 8400 batches | train loss 0.3925178 +| epoch 2 | 3811/ 8400 batches | train loss 0.4056070 +| epoch 2 | 3815/ 8400 batches | train loss 0.4242555 +| epoch 2 | 3819/ 8400 batches | train loss 0.3568439 +| epoch 2 | 3823/ 8400 batches | train loss 0.4495752 +| epoch 2 | 3827/ 8400 batches | train loss 0.4508602 +| epoch 2 | 3831/ 8400 batches | train loss 0.4202166 +| epoch 2 | 3835/ 8400 batches | train loss 0.5061828 +| epoch 2 | 3839/ 8400 batches | train loss 0.4097368 +| epoch 2 | 3843/ 8400 batches | train loss 0.4518554 +| epoch 2 | 3847/ 8400 batches | train loss 0.4297361 +| epoch 2 | 3851/ 8400 batches | train loss 0.4123600 +| epoch 2 | 3855/ 8400 batches | train loss 0.4716157 +| epoch 2 | 3859/ 8400 batches | train loss 0.4702070 +| epoch 2 | 3863/ 8400 batches | train loss 0.3169357 +| epoch 2 | 3867/ 8400 batches | train loss 0.3685609 +| epoch 2 | 3871/ 8400 batches | train loss 0.3813704 +| epoch 2 | 3875/ 8400 batches | train loss 0.4449620 +| epoch 2 | 3879/ 8400 batches | train loss 0.4681274 +| epoch 2 | 3883/ 8400 batches | train loss 0.4676408 +| epoch 2 | 3887/ 8400 batches | train loss 0.4984494 +| epoch 2 | 3891/ 8400 batches | train loss 0.4147111 +| epoch 2 | 3895/ 8400 batches | train loss 0.4158050 +| epoch 2 | 3899/ 8400 batches | train loss 0.4665437 +| epoch 2 | 3903/ 8400 batches | train loss 0.2914380 +| epoch 2 | 3907/ 8400 batches | train loss 0.3823145 +| epoch 2 | 3911/ 8400 batches | train loss 0.4568275 +| epoch 2 | 3915/ 8400 batches | train loss 0.2052893 +| epoch 2 | 3919/ 8400 batches | train loss 0.3834457 +| epoch 2 | 3923/ 8400 batches | train loss 0.4367307 +| epoch 2 | 3927/ 8400 batches | train loss 0.4613521 +| epoch 2 | 3931/ 8400 batches | train loss 0.4219735 +| epoch 2 | 3935/ 8400 batches | train loss 0.3788032 +| epoch 2 | 3939/ 8400 batches | train loss 0.4323767 +| epoch 2 | 3943/ 8400 batches | train loss 0.3782243 +| epoch 2 | 3947/ 8400 batches | train loss 0.4887464 +| epoch 2 | 3951/ 8400 batches | train loss 0.4247877 +| epoch 2 | 3955/ 8400 batches | train loss 0.3538231 +| epoch 2 | 3959/ 8400 batches | train loss 0.5198207 +| epoch 2 | 3963/ 8400 batches | train loss 0.3616979 +| epoch 2 | 3967/ 8400 batches | train loss 0.3897806 +| epoch 2 | 3971/ 8400 batches | train loss 0.5073389 +| epoch 2 | 3975/ 8400 batches | train loss 0.4793053 +| epoch 2 | 3979/ 8400 batches | train loss 0.3894845 +| epoch 2 | 3983/ 8400 batches | train loss 0.5038897 +| epoch 2 | 3987/ 8400 batches | train loss 0.4355246 +| epoch 2 | 3991/ 8400 batches | train loss 0.5165173 +| epoch 2 | 3995/ 8400 batches | train loss 0.3724920 +| epoch 2 | 3999/ 8400 batches | train loss 0.4693987 +| epoch 2 | 4003/ 8400 batches | train loss 0.5037565 +| epoch 2 | 4007/ 8400 batches | train loss 0.4298415 +| epoch 2 | 4011/ 8400 batches | train loss 0.4409519 +| epoch 2 | 4015/ 8400 batches | train loss 0.4405621 +| epoch 2 | 4019/ 8400 batches | train loss 0.4286501 +| epoch 2 | 4023/ 8400 batches | train loss 0.4797127 +| epoch 2 | 4027/ 8400 batches | train loss 0.5281890 +| epoch 2 | 4031/ 8400 batches | train loss 0.4223961 +| epoch 2 | 4035/ 8400 batches | train loss 0.5242138 +| epoch 2 | 4039/ 8400 batches | train loss 0.4453453 +| epoch 2 | 4043/ 8400 batches | train loss 0.4742182 +| epoch 2 | 4047/ 8400 batches | train loss 0.4396483 +| epoch 2 | 4051/ 8400 batches | train loss 0.2824690 +| epoch 2 | 4055/ 8400 batches | train loss 0.4819787 +| epoch 2 | 4059/ 8400 batches | train loss 0.4077154 +| epoch 2 | 4063/ 8400 batches | train loss 0.5979187 +| epoch 2 | 4067/ 8400 batches | train loss 0.4224109 +| epoch 2 | 4071/ 8400 batches | train loss 0.4808698 +| epoch 2 | 4075/ 8400 batches | train loss 0.4757785 +| epoch 2 | 4079/ 8400 batches | train loss 0.4229229 +| epoch 2 | 4083/ 8400 batches | train loss 0.3676037 +| epoch 2 | 4087/ 8400 batches | train loss 0.4149249 +| epoch 2 | 4091/ 8400 batches | train loss 0.4777817 +| epoch 2 | 4095/ 8400 batches | train loss 0.4585907 +| epoch 2 | 4099/ 8400 batches | train loss 0.4928219 +| epoch 2 | 4103/ 8400 batches | train loss 0.4687359 +| epoch 2 | 4107/ 8400 batches | train loss 0.5654104 +| epoch 2 | 4111/ 8400 batches | train loss 0.5018489 +| epoch 2 | 4115/ 8400 batches | train loss 0.4586946 +| epoch 2 | 4119/ 8400 batches | train loss 0.5179210 +| epoch 2 | 4123/ 8400 batches | train loss 0.4352705 +| epoch 2 | 4127/ 8400 batches | train loss 0.4522666 +| epoch 2 | 4131/ 8400 batches | train loss 0.4216726 +| epoch 2 | 4135/ 8400 batches | train loss 0.4838217 +| epoch 2 | 4139/ 8400 batches | train loss 0.5090454 +| epoch 2 | 4143/ 8400 batches | train loss 0.4033250 +| epoch 2 | 4147/ 8400 batches | train loss 0.4689070 +| epoch 2 | 4151/ 8400 batches | train loss 0.4580154 +| epoch 2 | 4155/ 8400 batches | train loss 0.4176174 +| epoch 2 | 4159/ 8400 batches | train loss 0.4715901 +| epoch 2 | 4163/ 8400 batches | train loss 0.4118267 +| epoch 2 | 4167/ 8400 batches | train loss 0.4898815 +| epoch 2 | 4171/ 8400 batches | train loss 0.4051224 +| epoch 2 | 4175/ 8400 batches | train loss 0.5731826 +| epoch 2 | 4179/ 8400 batches | train loss 0.4977944 +| epoch 2 | 4183/ 8400 batches | train loss 0.4867007 +| epoch 2 | 4187/ 8400 batches | train loss 0.4413628 +| epoch 2 | 4191/ 8400 batches | train loss 0.3697398 +| epoch 2 | 4195/ 8400 batches | train loss 0.4264026 +| epoch 2 | 4199/ 8400 batches | train loss 0.4891559 +| epoch 2 | 4203/ 8400 batches | train loss 0.4201857 +| epoch 2 | 4207/ 8400 batches | train loss 0.4635115 +| epoch 2 | 4211/ 8400 batches | train loss 0.4013534 +| epoch 2 | 4215/ 8400 batches | train loss 0.4175850 +| epoch 2 | 4219/ 8400 batches | train loss 0.4032617 +| epoch 2 | 4223/ 8400 batches | train loss 0.5017274 +| epoch 2 | 4227/ 8400 batches | train loss 0.4707118 +| epoch 2 | 4231/ 8400 batches | train loss 0.4468066 +| epoch 2 | 4235/ 8400 batches | train loss 0.4755288 +| epoch 2 | 4239/ 8400 batches | train loss 0.4034959 +| epoch 2 | 4243/ 8400 batches | train loss 0.4521331 +| epoch 2 | 4247/ 8400 batches | train loss 0.4159199 +| epoch 2 | 4251/ 8400 batches | train loss 0.4405027 +| epoch 2 | 4255/ 8400 batches | train loss 0.5123083 +| epoch 2 | 4259/ 8400 batches | train loss 0.4184789 +| epoch 2 | 4263/ 8400 batches | train loss 0.5022866 +| epoch 2 | 4267/ 8400 batches | train loss 0.4900973 +| epoch 2 | 4271/ 8400 batches | train loss 0.3619776 +| epoch 2 | 4275/ 8400 batches | train loss 0.4358205 +| epoch 2 | 4279/ 8400 batches | train loss 0.4518040 +| epoch 2 | 4283/ 8400 batches | train loss 0.4932006 +| epoch 2 | 4287/ 8400 batches | train loss 0.4868020 +| epoch 2 | 4291/ 8400 batches | train loss 0.3980803 +| epoch 2 | 4295/ 8400 batches | train loss 0.3349135 +| epoch 2 | 4299/ 8400 batches | train loss 0.4130476 +| epoch 2 | 4303/ 8400 batches | train loss 0.4234064 +| epoch 2 | 4307/ 8400 batches | train loss 0.5288845 +| epoch 2 | 4311/ 8400 batches | train loss 0.3442973 +| epoch 2 | 4315/ 8400 batches | train loss 0.4717241 +| epoch 2 | 4319/ 8400 batches | train loss 0.5306035 +| epoch 2 | 4323/ 8400 batches | train loss 0.5770416 +| epoch 2 | 4327/ 8400 batches | train loss 0.4982584 +| epoch 2 | 4331/ 8400 batches | train loss 0.5173823 +| epoch 2 | 4335/ 8400 batches | train loss 0.5079656 +| epoch 2 | 4339/ 8400 batches | train loss 0.4235011 +| epoch 2 | 4343/ 8400 batches | train loss 0.4633228 +| epoch 2 | 4347/ 8400 batches | train loss 0.4779363 +| epoch 2 | 4351/ 8400 batches | train loss 0.5207191 +| epoch 2 | 4355/ 8400 batches | train loss 0.4554666 +| epoch 2 | 4359/ 8400 batches | train loss 0.4203696 +| epoch 2 | 4363/ 8400 batches | train loss 0.5157273 +| epoch 2 | 4367/ 8400 batches | train loss 0.4625092 +| epoch 2 | 4371/ 8400 batches | train loss 0.5067518 +| epoch 2 | 4375/ 8400 batches | train loss 0.4392094 +| epoch 2 | 4379/ 8400 batches | train loss 0.4593611 +| epoch 2 | 4383/ 8400 batches | train loss 0.3948419 +| epoch 2 | 4387/ 8400 batches | train loss 0.4961026 +| epoch 2 | 4391/ 8400 batches | train loss 0.4719104 +| epoch 2 | 4395/ 8400 batches | train loss 0.4470201 +| epoch 2 | 4399/ 8400 batches | train loss 0.3737329 +| epoch 2 | 4403/ 8400 batches | train loss 0.4733876 +| epoch 2 | 4407/ 8400 batches | train loss 0.4629700 +| epoch 2 | 4411/ 8400 batches | train loss 0.4253825 +| epoch 2 | 4415/ 8400 batches | train loss 0.4659115 +| epoch 2 | 4419/ 8400 batches | train loss 0.5342636 +| epoch 2 | 4423/ 8400 batches | train loss 0.3494947 +| epoch 2 | 4427/ 8400 batches | train loss 0.4246862 +| epoch 2 | 4431/ 8400 batches | train loss 0.3369654 +| epoch 2 | 4435/ 8400 batches | train loss 0.4302664 +| epoch 2 | 4439/ 8400 batches | train loss 0.4640154 +| epoch 2 | 4443/ 8400 batches | train loss 0.4793456 +| epoch 2 | 4447/ 8400 batches | train loss 0.4482301 +| epoch 2 | 4451/ 8400 batches | train loss 0.4535396 +| epoch 2 | 4455/ 8400 batches | train loss 0.4549586 +| epoch 2 | 4459/ 8400 batches | train loss 0.4165044 +| epoch 2 | 4463/ 8400 batches | train loss 0.3384400 +| epoch 2 | 4467/ 8400 batches | train loss 0.4745012 +| epoch 2 | 4471/ 8400 batches | train loss 0.4737861 +| epoch 2 | 4475/ 8400 batches | train loss 0.5024695 +| epoch 2 | 4479/ 8400 batches | train loss 0.4498761 +| epoch 2 | 4483/ 8400 batches | train loss 0.4372377 +| epoch 2 | 4487/ 8400 batches | train loss 0.5273066 +| epoch 2 | 4491/ 8400 batches | train loss 0.3490345 +| epoch 2 | 4495/ 8400 batches | train loss 0.4519284 +| epoch 2 | 4499/ 8400 batches | train loss 0.3425007 +| epoch 2 | 4503/ 8400 batches | train loss 0.4677718 +| epoch 2 | 4507/ 8400 batches | train loss 0.5130325 +| epoch 2 | 4511/ 8400 batches | train loss 0.5572674 +| epoch 2 | 4515/ 8400 batches | train loss 0.5265259 +| epoch 2 | 4519/ 8400 batches | train loss 0.5046316 +| epoch 2 | 4523/ 8400 batches | train loss 0.4336430 +| epoch 2 | 4527/ 8400 batches | train loss 0.3987387 +| epoch 2 | 4531/ 8400 batches | train loss 0.3679455 +| epoch 2 | 4535/ 8400 batches | train loss 0.4852018 +| epoch 2 | 4539/ 8400 batches | train loss 0.4468511 +| epoch 2 | 4543/ 8400 batches | train loss 0.4402687 +| epoch 2 | 4547/ 8400 batches | train loss 0.4494547 +| epoch 2 | 4551/ 8400 batches | train loss 0.4752423 +| epoch 2 | 4555/ 8400 batches | train loss 0.3389487 +| epoch 2 | 4559/ 8400 batches | train loss 0.3967522 +| epoch 2 | 4563/ 8400 batches | train loss 0.4436269 +| epoch 2 | 4567/ 8400 batches | train loss 0.4019537 +| epoch 2 | 4571/ 8400 batches | train loss 0.4756634 +| epoch 2 | 4575/ 8400 batches | train loss 0.4204965 +| epoch 2 | 4579/ 8400 batches | train loss 0.3721949 +| epoch 2 | 4583/ 8400 batches | train loss 0.4690742 +| epoch 2 | 4587/ 8400 batches | train loss 0.4522732 +| epoch 2 | 4591/ 8400 batches | train loss 0.4136244 +| epoch 2 | 4595/ 8400 batches | train loss 0.5237478 +| epoch 2 | 4599/ 8400 batches | train loss 0.3130177 +| epoch 2 | 4603/ 8400 batches | train loss 0.4057680 +| epoch 2 | 4607/ 8400 batches | train loss 0.4666670 +| epoch 2 | 4611/ 8400 batches | train loss 0.4257603 +| epoch 2 | 4615/ 8400 batches | train loss 0.5191524 +| epoch 2 | 4619/ 8400 batches | train loss 0.5405591 +| epoch 2 | 4623/ 8400 batches | train loss 0.4528822 +| epoch 2 | 4627/ 8400 batches | train loss 0.4413022 +| epoch 2 | 4631/ 8400 batches | train loss 0.4229748 +| epoch 2 | 4635/ 8400 batches | train loss 0.2715589 +| epoch 2 | 4639/ 8400 batches | train loss 0.4986756 +| epoch 2 | 4643/ 8400 batches | train loss 0.4436684 +| epoch 2 | 4647/ 8400 batches | train loss 0.4586142 +| epoch 2 | 4651/ 8400 batches | train loss 0.4263945 +| epoch 2 | 4655/ 8400 batches | train loss 0.3911613 +| epoch 2 | 4659/ 8400 batches | train loss 0.4323280 +| epoch 2 | 4663/ 8400 batches | train loss 0.4273608 +| epoch 2 | 4667/ 8400 batches | train loss 0.4003348 +| epoch 2 | 4671/ 8400 batches | train loss 0.3629189 +| epoch 2 | 4675/ 8400 batches | train loss 0.4647191 +| epoch 2 | 4679/ 8400 batches | train loss 0.3774797 +| epoch 2 | 4683/ 8400 batches | train loss 0.4102374 +| epoch 2 | 4687/ 8400 batches | train loss 0.3999949 +| epoch 2 | 4691/ 8400 batches | train loss 0.3906066 +| epoch 2 | 4695/ 8400 batches | train loss 0.4007494 +| epoch 2 | 4699/ 8400 batches | train loss 0.3601081 +| epoch 2 | 4703/ 8400 batches | train loss 0.5203819 +| epoch 2 | 4707/ 8400 batches | train loss 0.5052013 +| epoch 2 | 4711/ 8400 batches | train loss 0.4553866 +| epoch 2 | 4715/ 8400 batches | train loss 0.4924981 +| epoch 2 | 4719/ 8400 batches | train loss 0.3901910 +| epoch 2 | 4723/ 8400 batches | train loss 0.4522327 +| epoch 2 | 4727/ 8400 batches | train loss 0.4197174 +| epoch 2 | 4731/ 8400 batches | train loss 0.4251105 +| epoch 2 | 4735/ 8400 batches | train loss 0.4390133 +| epoch 2 | 4739/ 8400 batches | train loss 0.4507254 +| epoch 2 | 4743/ 8400 batches | train loss 0.4770001 +| epoch 2 | 4747/ 8400 batches | train loss 0.4119799 +| epoch 2 | 4751/ 8400 batches | train loss 0.4476267 +| epoch 2 | 4755/ 8400 batches | train loss 0.4298598 +| epoch 2 | 4759/ 8400 batches | train loss 0.3829804 +| epoch 2 | 4763/ 8400 batches | train loss 0.4213821 +| epoch 2 | 4767/ 8400 batches | train loss 0.4789958 +| epoch 2 | 4771/ 8400 batches | train loss 0.4373519 +| epoch 2 | 4775/ 8400 batches | train loss 0.4241046 +| epoch 2 | 4779/ 8400 batches | train loss 0.3830805 +| epoch 2 | 4783/ 8400 batches | train loss 0.4717733 +| epoch 2 | 4787/ 8400 batches | train loss 0.4370974 +| epoch 2 | 4791/ 8400 batches | train loss 0.4195940 +| epoch 2 | 4795/ 8400 batches | train loss 0.3457147 +| epoch 2 | 4799/ 8400 batches | train loss 0.4884820 +| epoch 2 | 4803/ 8400 batches | train loss 0.4273989 +| epoch 2 | 4807/ 8400 batches | train loss 0.4593000 +| epoch 2 | 4811/ 8400 batches | train loss 0.4332321 +| epoch 2 | 4815/ 8400 batches | train loss 0.4461325 +| epoch 2 | 4819/ 8400 batches | train loss 0.2932912 +| epoch 2 | 4823/ 8400 batches | train loss 0.4459486 +| epoch 2 | 4827/ 8400 batches | train loss 0.3659839 +| epoch 2 | 4831/ 8400 batches | train loss 0.3439506 +| epoch 2 | 4835/ 8400 batches | train loss 0.4448695 +| epoch 2 | 4839/ 8400 batches | train loss 0.4276548 +| epoch 2 | 4843/ 8400 batches | train loss 0.3420947 +| epoch 2 | 4847/ 8400 batches | train loss 0.4210758 +| epoch 2 | 4851/ 8400 batches | train loss 0.3601100 +| epoch 2 | 4855/ 8400 batches | train loss 0.3975823 +| epoch 2 | 4859/ 8400 batches | train loss 0.4257277 +| epoch 2 | 4863/ 8400 batches | train loss 0.4112902 +| epoch 2 | 4867/ 8400 batches | train loss 0.4377019 +| epoch 2 | 4871/ 8400 batches | train loss 0.3895246 +| epoch 2 | 4875/ 8400 batches | train loss 0.3733320 +| epoch 2 | 4879/ 8400 batches | train loss 0.5135479 +| epoch 2 | 4883/ 8400 batches | train loss 0.4686452 +| epoch 2 | 4887/ 8400 batches | train loss 0.6057597 +| epoch 2 | 4891/ 8400 batches | train loss 0.4535134 +| epoch 2 | 4895/ 8400 batches | train loss 0.4038141 +| epoch 2 | 4899/ 8400 batches | train loss 0.4940837 +| epoch 2 | 4903/ 8400 batches | train loss 0.4189467 +| epoch 2 | 4907/ 8400 batches | train loss 0.3813121 +| epoch 2 | 4911/ 8400 batches | train loss 0.4158691 +| epoch 2 | 4915/ 8400 batches | train loss 0.4390335 +| epoch 2 | 4919/ 8400 batches | train loss 0.4077520 +| epoch 2 | 4923/ 8400 batches | train loss 0.4852702 +| epoch 2 | 4927/ 8400 batches | train loss 0.4611952 +| epoch 2 | 4931/ 8400 batches | train loss 0.5406942 +| epoch 2 | 4935/ 8400 batches | train loss 0.4435103 +| epoch 2 | 4939/ 8400 batches | train loss 0.3571073 +| epoch 2 | 4943/ 8400 batches | train loss 0.4068410 +| epoch 2 | 4947/ 8400 batches | train loss 0.3692700 +| epoch 2 | 4951/ 8400 batches | train loss 0.5286578 +| epoch 2 | 4955/ 8400 batches | train loss 0.4544321 +| epoch 2 | 4959/ 8400 batches | train loss 0.4666892 +| epoch 2 | 4963/ 8400 batches | train loss 0.4163034 +| epoch 2 | 4967/ 8400 batches | train loss 0.4540769 +| epoch 2 | 4971/ 8400 batches | train loss 0.3535104 +| epoch 2 | 4975/ 8400 batches | train loss 0.4757343 +| epoch 2 | 4979/ 8400 batches | train loss 0.3289132 +| epoch 2 | 4983/ 8400 batches | train loss 0.4296528 +| epoch 2 | 4987/ 8400 batches | train loss 0.4502786 +| epoch 2 | 4991/ 8400 batches | train loss 0.4289553 +| epoch 2 | 4995/ 8400 batches | train loss 0.4431494 +| epoch 2 | 4999/ 8400 batches | train loss 0.4816975 +| epoch 2 | 5003/ 8400 batches | train loss 0.4250085 +| epoch 2 | 5007/ 8400 batches | train loss 0.4517630 +| epoch 2 | 5011/ 8400 batches | train loss 0.6679970 +| epoch 2 | 5015/ 8400 batches | train loss 0.4571989 +| epoch 2 | 5019/ 8400 batches | train loss 0.4289168 +| epoch 2 | 5023/ 8400 batches | train loss 0.4631388 +| epoch 2 | 5027/ 8400 batches | train loss 0.3267680 +| epoch 2 | 5031/ 8400 batches | train loss 0.3579931 +| epoch 2 | 5035/ 8400 batches | train loss 0.4214066 +| epoch 2 | 5039/ 8400 batches | train loss 0.4296350 +| epoch 2 | 5043/ 8400 batches | train loss 0.4610147 +| epoch 2 | 5047/ 8400 batches | train loss 0.3884968 +| epoch 2 | 5051/ 8400 batches | train loss 0.5723854 +| epoch 2 | 5055/ 8400 batches | train loss 0.3938479 +| epoch 2 | 5059/ 8400 batches | train loss 0.4168795 +| epoch 2 | 5063/ 8400 batches | train loss 0.4936973 +| epoch 2 | 5067/ 8400 batches | train loss 0.4393615 +| epoch 2 | 5071/ 8400 batches | train loss 0.4602354 +| epoch 2 | 5075/ 8400 batches | train loss 0.4670639 +| epoch 2 | 5079/ 8400 batches | train loss 0.4178157 +| epoch 2 | 5083/ 8400 batches | train loss 0.4572926 +| epoch 2 | 5087/ 8400 batches | train loss 0.4167809 +| epoch 2 | 5091/ 8400 batches | train loss 0.4860008 +| epoch 2 | 5095/ 8400 batches | train loss 0.4527799 +| epoch 2 | 5099/ 8400 batches | train loss 0.4565629 +| epoch 2 | 5103/ 8400 batches | train loss 0.4287761 +| epoch 2 | 5107/ 8400 batches | train loss 0.3731605 +| epoch 2 | 5111/ 8400 batches | train loss 0.4577453 +| epoch 2 | 5115/ 8400 batches | train loss 0.3704037 +| epoch 2 | 5119/ 8400 batches | train loss 0.4900154 +| epoch 2 | 5123/ 8400 batches | train loss 0.4649192 +| epoch 2 | 5127/ 8400 batches | train loss 0.5160613 +| epoch 2 | 5131/ 8400 batches | train loss 0.4576980 +| epoch 2 | 5135/ 8400 batches | train loss 0.6188892 +| epoch 2 | 5139/ 8400 batches | train loss 0.4480585 +| epoch 2 | 5143/ 8400 batches | train loss 0.4388678 +| epoch 2 | 5147/ 8400 batches | train loss 0.4713314 +| epoch 2 | 5151/ 8400 batches | train loss 0.4323123 +| epoch 2 | 5155/ 8400 batches | train loss 0.4130487 +| epoch 2 | 5159/ 8400 batches | train loss 0.4659117 +| epoch 2 | 5163/ 8400 batches | train loss 0.5016197 +| epoch 2 | 5167/ 8400 batches | train loss 0.4900702 +| epoch 2 | 5171/ 8400 batches | train loss 0.4520428 +| epoch 2 | 5175/ 8400 batches | train loss 0.4704682 +| epoch 2 | 5179/ 8400 batches | train loss 0.3698763 +| epoch 2 | 5183/ 8400 batches | train loss 0.3656346 +| epoch 2 | 5187/ 8400 batches | train loss 0.5065871 +| epoch 2 | 5191/ 8400 batches | train loss 0.4641804 +| epoch 2 | 5195/ 8400 batches | train loss 0.3827268 +| epoch 2 | 5199/ 8400 batches | train loss 0.3565964 +| epoch 2 | 5203/ 8400 batches | train loss 0.4733826 +| epoch 2 | 5207/ 8400 batches | train loss 0.4310458 +| epoch 2 | 5211/ 8400 batches | train loss 0.5240417 +| epoch 2 | 5215/ 8400 batches | train loss 0.4136763 +| epoch 2 | 5219/ 8400 batches | train loss 0.4443954 +| epoch 2 | 5223/ 8400 batches | train loss 0.4907642 +| epoch 2 | 5227/ 8400 batches | train loss 0.3919527 +| epoch 2 | 5231/ 8400 batches | train loss 0.4784292 +| epoch 2 | 5235/ 8400 batches | train loss 0.4831288 +| epoch 2 | 5239/ 8400 batches | train loss 0.4110172 +| epoch 2 | 5243/ 8400 batches | train loss 0.3627878 +| epoch 2 | 5247/ 8400 batches | train loss 0.4943017 +| epoch 2 | 5251/ 8400 batches | train loss 0.3513798 +| epoch 2 | 5255/ 8400 batches | train loss 0.3702073 +| epoch 2 | 5259/ 8400 batches | train loss 0.4830094 +| epoch 2 | 5263/ 8400 batches | train loss 0.4795652 +| epoch 2 | 5267/ 8400 batches | train loss 0.5150526 +| epoch 2 | 5271/ 8400 batches | train loss 0.5271742 +| epoch 2 | 5275/ 8400 batches | train loss 0.4013223 +| epoch 2 | 5279/ 8400 batches | train loss 0.6017971 +| epoch 2 | 5283/ 8400 batches | train loss 0.4017674 +| epoch 2 | 5287/ 8400 batches | train loss 0.5477511 +| epoch 2 | 5291/ 8400 batches | train loss 0.3973626 +| epoch 2 | 5295/ 8400 batches | train loss 0.5292565 +| epoch 2 | 5299/ 8400 batches | train loss 0.3891146 +| epoch 2 | 5303/ 8400 batches | train loss 0.4776950 +| epoch 2 | 5307/ 8400 batches | train loss 0.4513564 +| epoch 2 | 5311/ 8400 batches | train loss 0.3856159 +| epoch 2 | 5315/ 8400 batches | train loss 0.4461243 +| epoch 2 | 5319/ 8400 batches | train loss 0.4517093 +| epoch 2 | 5323/ 8400 batches | train loss 0.4475248 +| epoch 2 | 5327/ 8400 batches | train loss 0.4230461 +| epoch 2 | 5331/ 8400 batches | train loss 0.4274843 +| epoch 2 | 5335/ 8400 batches | train loss 0.4302093 +| epoch 2 | 5339/ 8400 batches | train loss 0.4991142 +| epoch 2 | 5343/ 8400 batches | train loss 0.4063677 +| epoch 2 | 5347/ 8400 batches | train loss 0.3898658 +| epoch 2 | 5351/ 8400 batches | train loss 0.3836152 +| epoch 2 | 5355/ 8400 batches | train loss 0.4737834 +| epoch 2 | 5359/ 8400 batches | train loss 0.4688455 +| epoch 2 | 5363/ 8400 batches | train loss 0.4671781 +| epoch 2 | 5367/ 8400 batches | train loss 0.4923837 +| epoch 2 | 5371/ 8400 batches | train loss 0.4965537 +| epoch 2 | 5375/ 8400 batches | train loss 0.3411033 +| epoch 2 | 5379/ 8400 batches | train loss 0.3792409 +| epoch 2 | 5383/ 8400 batches | train loss 0.4961007 +| epoch 2 | 5387/ 8400 batches | train loss 0.4415293 +| epoch 2 | 5391/ 8400 batches | train loss 0.3978679 +| epoch 2 | 5395/ 8400 batches | train loss 0.6709688 +| epoch 2 | 5399/ 8400 batches | train loss 0.4429738 +| epoch 2 | 5403/ 8400 batches | train loss 0.4407045 +| epoch 2 | 5407/ 8400 batches | train loss 0.4511384 +| epoch 2 | 5411/ 8400 batches | train loss 0.5189138 +| epoch 2 | 5415/ 8400 batches | train loss 0.4103686 +| epoch 2 | 5419/ 8400 batches | train loss 0.4977941 +| epoch 2 | 5423/ 8400 batches | train loss 0.4318696 +| epoch 2 | 5427/ 8400 batches | train loss 0.4259337 +| epoch 2 | 5431/ 8400 batches | train loss 0.4898166 +| epoch 2 | 5435/ 8400 batches | train loss 0.4972975 +| epoch 2 | 5439/ 8400 batches | train loss 0.4044670 +| epoch 2 | 5443/ 8400 batches | train loss 0.4345393 +| epoch 2 | 5447/ 8400 batches | train loss 0.4924439 +| epoch 2 | 5451/ 8400 batches | train loss 0.4478414 +| epoch 2 | 5455/ 8400 batches | train loss 0.3785090 +| epoch 2 | 5459/ 8400 batches | train loss 0.3761014 +| epoch 2 | 5463/ 8400 batches | train loss 0.4062059 +| epoch 2 | 5467/ 8400 batches | train loss 0.4635542 +| epoch 2 | 5471/ 8400 batches | train loss 0.3900869 +| epoch 2 | 5475/ 8400 batches | train loss 0.4139702 +| epoch 2 | 5479/ 8400 batches | train loss 0.4253767 +| epoch 2 | 5483/ 8400 batches | train loss 0.3464870 +| epoch 2 | 5487/ 8400 batches | train loss 0.4374321 +| epoch 2 | 5491/ 8400 batches | train loss 0.4621257 +| epoch 2 | 5495/ 8400 batches | train loss 0.4356217 +| epoch 2 | 5499/ 8400 batches | train loss 0.4938242 +| epoch 2 | 5503/ 8400 batches | train loss 0.5328792 +| epoch 2 | 5507/ 8400 batches | train loss 0.4730778 +| epoch 2 | 5511/ 8400 batches | train loss 0.3722488 +| epoch 2 | 5515/ 8400 batches | train loss 0.3916929 +| epoch 2 | 5519/ 8400 batches | train loss 0.5875559 +| epoch 2 | 5523/ 8400 batches | train loss 0.4400026 +| epoch 2 | 5527/ 8400 batches | train loss 0.4765582 +| epoch 2 | 5531/ 8400 batches | train loss 0.4266437 +| epoch 2 | 5535/ 8400 batches | train loss 0.4247587 +| epoch 2 | 5539/ 8400 batches | train loss 0.4247500 +| epoch 2 | 5543/ 8400 batches | train loss 0.4660529 +| epoch 2 | 5547/ 8400 batches | train loss 0.4523095 +| epoch 2 | 5551/ 8400 batches | train loss 0.4526484 +| epoch 2 | 5555/ 8400 batches | train loss 0.4078234 +| epoch 2 | 5559/ 8400 batches | train loss 0.4599583 +| epoch 2 | 5563/ 8400 batches | train loss 0.3402905 +| epoch 2 | 5567/ 8400 batches | train loss 0.3590713 +| epoch 2 | 5571/ 8400 batches | train loss 0.4400434 +| epoch 2 | 5575/ 8400 batches | train loss 0.4514377 +| epoch 2 | 5579/ 8400 batches | train loss 0.3678736 +| epoch 2 | 5583/ 8400 batches | train loss 0.3489721 +| epoch 2 | 5587/ 8400 batches | train loss 0.5266989 +| epoch 2 | 5591/ 8400 batches | train loss 0.4718564 +| epoch 2 | 5595/ 8400 batches | train loss 0.3732422 +| epoch 2 | 5599/ 8400 batches | train loss 0.4454523 +| epoch 2 | 5603/ 8400 batches | train loss 0.4972327 +| epoch 2 | 5607/ 8400 batches | train loss 0.4166094 +| epoch 2 | 5611/ 8400 batches | train loss 0.4778103 +| epoch 2 | 5615/ 8400 batches | train loss 0.3387973 +| epoch 2 | 5619/ 8400 batches | train loss 0.4244777 +| epoch 2 | 5623/ 8400 batches | train loss 0.3014991 +| epoch 2 | 5627/ 8400 batches | train loss 0.3348698 +| epoch 2 | 5631/ 8400 batches | train loss 0.4553482 +| epoch 2 | 5635/ 8400 batches | train loss 0.4336443 +| epoch 2 | 5639/ 8400 batches | train loss 0.4299666 +| epoch 2 | 5643/ 8400 batches | train loss 0.3376874 +| epoch 2 | 5647/ 8400 batches | train loss 0.4389117 +| epoch 2 | 5651/ 8400 batches | train loss 0.4256823 +| epoch 2 | 5655/ 8400 batches | train loss 0.4237088 +| epoch 2 | 5659/ 8400 batches | train loss 0.4086992 +| epoch 2 | 5663/ 8400 batches | train loss 0.4407201 +| epoch 2 | 5667/ 8400 batches | train loss 0.4667877 +| epoch 2 | 5671/ 8400 batches | train loss 0.3954513 +| epoch 2 | 5675/ 8400 batches | train loss 0.4214205 +| epoch 2 | 5679/ 8400 batches | train loss 0.4248317 +| epoch 2 | 5683/ 8400 batches | train loss 0.4146324 +| epoch 2 | 5687/ 8400 batches | train loss 0.4452596 +| epoch 2 | 5691/ 8400 batches | train loss 0.4505024 +| epoch 2 | 5695/ 8400 batches | train loss 0.4183476 +| epoch 2 | 5699/ 8400 batches | train loss 0.4361239 +| epoch 2 | 5703/ 8400 batches | train loss 0.4715668 +| epoch 2 | 5707/ 8400 batches | train loss 0.5465855 +| epoch 2 | 5711/ 8400 batches | train loss 0.3822646 +| epoch 2 | 5715/ 8400 batches | train loss 0.4601559 +| epoch 2 | 5719/ 8400 batches | train loss 0.4571685 +| epoch 2 | 5723/ 8400 batches | train loss 0.3860587 +| epoch 2 | 5727/ 8400 batches | train loss 0.4035996 +| epoch 2 | 5731/ 8400 batches | train loss 0.4779065 +| epoch 2 | 5735/ 8400 batches | train loss 0.4393293 +| epoch 2 | 5739/ 8400 batches | train loss 0.4747052 +| epoch 2 | 5743/ 8400 batches | train loss 0.4260898 +| epoch 2 | 5747/ 8400 batches | train loss 0.4137464 +| epoch 2 | 5751/ 8400 batches | train loss 0.4138323 +| epoch 2 | 5755/ 8400 batches | train loss 0.4472353 +| epoch 2 | 5759/ 8400 batches | train loss 0.4663386 +| epoch 2 | 5763/ 8400 batches | train loss 0.3874534 +| epoch 2 | 5767/ 8400 batches | train loss 0.5038992 +| epoch 2 | 5771/ 8400 batches | train loss 0.4931749 +| epoch 2 | 5775/ 8400 batches | train loss 0.3852224 +| epoch 2 | 5779/ 8400 batches | train loss 0.4614046 +| epoch 2 | 5783/ 8400 batches | train loss 0.5900890 +| epoch 2 | 5787/ 8400 batches | train loss 0.4919593 +| epoch 2 | 5791/ 8400 batches | train loss 0.4743038 +| epoch 2 | 5795/ 8400 batches | train loss 0.4206160 +| epoch 2 | 5799/ 8400 batches | train loss 0.3816971 +| epoch 2 | 5803/ 8400 batches | train loss 0.4087888 +| epoch 2 | 5807/ 8400 batches | train loss 0.3932996 +| epoch 2 | 5811/ 8400 batches | train loss 0.4217987 +| epoch 2 | 5815/ 8400 batches | train loss 0.4748740 +| epoch 2 | 5819/ 8400 batches | train loss 0.5216265 +| epoch 2 | 5823/ 8400 batches | train loss 0.5436091 +| epoch 2 | 5827/ 8400 batches | train loss 0.4403720 +| epoch 2 | 5831/ 8400 batches | train loss 0.4536466 +| epoch 2 | 5835/ 8400 batches | train loss 0.4279452 +| epoch 2 | 5839/ 8400 batches | train loss 0.4629200 +| epoch 2 | 5843/ 8400 batches | train loss 0.4248276 +| epoch 2 | 5847/ 8400 batches | train loss 0.4662665 +| epoch 2 | 5851/ 8400 batches | train loss 0.4960387 +| epoch 2 | 5855/ 8400 batches | train loss 0.4444839 +| epoch 2 | 5859/ 8400 batches | train loss 0.5162193 +| epoch 2 | 5863/ 8400 batches | train loss 0.3770461 +| epoch 2 | 5867/ 8400 batches | train loss 0.4870250 +| epoch 2 | 5871/ 8400 batches | train loss 0.3675534 +| epoch 2 | 5875/ 8400 batches | train loss 0.4836399 +| epoch 2 | 5879/ 8400 batches | train loss 0.4253018 +| epoch 2 | 5883/ 8400 batches | train loss 0.4353305 +| epoch 2 | 5887/ 8400 batches | train loss 0.4282180 +| epoch 2 | 5891/ 8400 batches | train loss 0.4321259 +| epoch 2 | 5895/ 8400 batches | train loss 0.5051517 +| epoch 2 | 5899/ 8400 batches | train loss 0.4511924 +| epoch 2 | 5903/ 8400 batches | train loss 0.5506500 +| epoch 2 | 5907/ 8400 batches | train loss 0.4335531 +| epoch 2 | 5911/ 8400 batches | train loss 0.3460438 +| epoch 2 | 5915/ 8400 batches | train loss 0.4791885 +| epoch 2 | 5919/ 8400 batches | train loss 0.4865044 +| epoch 2 | 5923/ 8400 batches | train loss 0.4497942 +| epoch 2 | 5927/ 8400 batches | train loss 0.4725554 +| epoch 2 | 5931/ 8400 batches | train loss 0.3644533 +| epoch 2 | 5935/ 8400 batches | train loss 0.4826759 +| epoch 2 | 5939/ 8400 batches | train loss 0.4651472 +| epoch 2 | 5943/ 8400 batches | train loss 0.3932249 +| epoch 2 | 5947/ 8400 batches | train loss 0.4903775 +| epoch 2 | 5951/ 8400 batches | train loss 0.2806215 +| epoch 2 | 5955/ 8400 batches | train loss 0.3748435 +| epoch 2 | 5959/ 8400 batches | train loss 0.4948159 +| epoch 2 | 5963/ 8400 batches | train loss 0.4051238 +| epoch 2 | 5967/ 8400 batches | train loss 0.4338093 +| epoch 2 | 5971/ 8400 batches | train loss 0.4549556 +| epoch 2 | 5975/ 8400 batches | train loss 0.3639056 +| epoch 2 | 5979/ 8400 batches | train loss 0.4628993 +| epoch 2 | 5983/ 8400 batches | train loss 0.4880576 +| epoch 2 | 5987/ 8400 batches | train loss 0.4626300 +| epoch 2 | 5991/ 8400 batches | train loss 0.4428907 +| epoch 2 | 5995/ 8400 batches | train loss 0.4071947 +| epoch 2 | 5999/ 8400 batches | train loss 0.4701550 +| epoch 2 | 6003/ 8400 batches | train loss 0.3485566 +| epoch 2 | 6007/ 8400 batches | train loss 0.3968373 +| epoch 2 | 6011/ 8400 batches | train loss 0.3870945 +| epoch 2 | 6015/ 8400 batches | train loss 0.5206247 +| epoch 2 | 6019/ 8400 batches | train loss 0.3492263 +| epoch 2 | 6023/ 8400 batches | train loss 0.4262955 +| epoch 2 | 6027/ 8400 batches | train loss 0.3589195 +| epoch 2 | 6031/ 8400 batches | train loss 0.4231276 +| epoch 2 | 6035/ 8400 batches | train loss 0.4763454 +| epoch 2 | 6039/ 8400 batches | train loss 0.3843059 +| epoch 2 | 6043/ 8400 batches | train loss 0.4509572 +| epoch 2 | 6047/ 8400 batches | train loss 0.4303443 +| epoch 2 | 6051/ 8400 batches | train loss 0.4583592 +| epoch 2 | 6055/ 8400 batches | train loss 0.4228874 +| epoch 2 | 6059/ 8400 batches | train loss 0.4113712 +| epoch 2 | 6063/ 8400 batches | train loss 0.4423866 +| epoch 2 | 6067/ 8400 batches | train loss 0.4189864 +| epoch 2 | 6071/ 8400 batches | train loss 0.4821180 +| epoch 2 | 6075/ 8400 batches | train loss 0.3897539 +| epoch 2 | 6079/ 8400 batches | train loss 0.4605778 +| epoch 2 | 6083/ 8400 batches | train loss 0.2121822 +| epoch 2 | 6087/ 8400 batches | train loss 0.3455574 +| epoch 2 | 6091/ 8400 batches | train loss 0.4716724 +| epoch 2 | 6095/ 8400 batches | train loss 0.3962925 +| epoch 2 | 6099/ 8400 batches | train loss 0.4244396 +| epoch 2 | 6103/ 8400 batches | train loss 0.4354520 +| epoch 2 | 6107/ 8400 batches | train loss 0.4555062 +| epoch 2 | 6111/ 8400 batches | train loss 0.4449597 +| epoch 2 | 6115/ 8400 batches | train loss 0.4747838 +| epoch 2 | 6119/ 8400 batches | train loss 0.4346871 +| epoch 2 | 6123/ 8400 batches | train loss 0.3152105 +| epoch 2 | 6127/ 8400 batches | train loss 0.3385125 +| epoch 2 | 6131/ 8400 batches | train loss 0.4398076 +| epoch 2 | 6135/ 8400 batches | train loss 0.4289637 +| epoch 2 | 6139/ 8400 batches | train loss 0.3880827 +| epoch 2 | 6143/ 8400 batches | train loss 0.4929458 +| epoch 2 | 6147/ 8400 batches | train loss 0.4193972 +| epoch 2 | 6151/ 8400 batches | train loss 0.3653185 +| epoch 2 | 6155/ 8400 batches | train loss 0.4505122 +| epoch 2 | 6159/ 8400 batches | train loss 0.4348943 +| epoch 2 | 6163/ 8400 batches | train loss 0.5361242 +| epoch 2 | 6167/ 8400 batches | train loss 0.4383077 +| epoch 2 | 6171/ 8400 batches | train loss 0.5047956 +| epoch 2 | 6175/ 8400 batches | train loss 0.4684241 +| epoch 2 | 6179/ 8400 batches | train loss 0.4787382 +| epoch 2 | 6183/ 8400 batches | train loss 0.3511985 +| epoch 2 | 6187/ 8400 batches | train loss 0.3809880 +| epoch 2 | 6191/ 8400 batches | train loss 0.4177775 +| epoch 2 | 6195/ 8400 batches | train loss 0.5367352 +| epoch 2 | 6199/ 8400 batches | train loss 0.3916747 +| epoch 2 | 6203/ 8400 batches | train loss 0.4398057 +| epoch 2 | 6207/ 8400 batches | train loss 0.4625123 +| epoch 2 | 6211/ 8400 batches | train loss 0.4802647 +| epoch 2 | 6215/ 8400 batches | train loss 0.5039583 +| epoch 2 | 6219/ 8400 batches | train loss 0.3516344 +| epoch 2 | 6223/ 8400 batches | train loss 0.4659211 +| epoch 2 | 6227/ 8400 batches | train loss 0.4486580 +| epoch 2 | 6231/ 8400 batches | train loss 0.4278354 +| epoch 2 | 6235/ 8400 batches | train loss 0.3603746 +| epoch 2 | 6239/ 8400 batches | train loss 0.4367176 +| epoch 2 | 6243/ 8400 batches | train loss 0.4666632 +| epoch 2 | 6247/ 8400 batches | train loss 0.3999297 +| epoch 2 | 6251/ 8400 batches | train loss 0.5466688 +| epoch 2 | 6255/ 8400 batches | train loss 0.4514007 +| epoch 2 | 6259/ 8400 batches | train loss 0.4252974 +| epoch 2 | 6263/ 8400 batches | train loss 0.4530908 +| epoch 2 | 6267/ 8400 batches | train loss 0.3919120 +| epoch 2 | 6271/ 8400 batches | train loss 0.4012725 +| epoch 2 | 6275/ 8400 batches | train loss 0.4721314 +| epoch 2 | 6279/ 8400 batches | train loss 0.4240447 +| epoch 2 | 6283/ 8400 batches | train loss 0.3511238 +| epoch 2 | 6287/ 8400 batches | train loss 0.4446785 +| epoch 2 | 6291/ 8400 batches | train loss 0.4158169 +| epoch 2 | 6295/ 8400 batches | train loss 0.4411711 +| epoch 2 | 6299/ 8400 batches | train loss 0.3835754 +| epoch 2 | 6303/ 8400 batches | train loss 0.4159355 +| epoch 2 | 6307/ 8400 batches | train loss 0.4803889 +| epoch 2 | 6311/ 8400 batches | train loss 0.4324310 +| epoch 2 | 6315/ 8400 batches | train loss 0.4930360 +| epoch 2 | 6319/ 8400 batches | train loss 0.4090301 +| epoch 2 | 6323/ 8400 batches | train loss 0.4532090 +| epoch 2 | 6327/ 8400 batches | train loss 0.4968748 +| epoch 2 | 6331/ 8400 batches | train loss 0.3874412 +| epoch 2 | 6335/ 8400 batches | train loss 0.4770561 +| epoch 2 | 6339/ 8400 batches | train loss 0.5030010 +| epoch 2 | 6343/ 8400 batches | train loss 0.4592317 +| epoch 2 | 6347/ 8400 batches | train loss 0.4117239 +| epoch 2 | 6351/ 8400 batches | train loss 0.4648583 +| epoch 2 | 6355/ 8400 batches | train loss 0.4417576 +| epoch 2 | 6359/ 8400 batches | train loss 0.3607935 +| epoch 2 | 6363/ 8400 batches | train loss 0.3733722 +| epoch 2 | 6367/ 8400 batches | train loss 0.5122381 +| epoch 2 | 6371/ 8400 batches | train loss 0.5571411 +| epoch 2 | 6375/ 8400 batches | train loss 0.4331376 +| epoch 2 | 6379/ 8400 batches | train loss 0.4141774 +| epoch 2 | 6383/ 8400 batches | train loss 0.2858114 +| epoch 2 | 6387/ 8400 batches | train loss 0.4552208 +| epoch 2 | 6391/ 8400 batches | train loss 0.4389137 +| epoch 2 | 6395/ 8400 batches | train loss 0.5283384 +| epoch 2 | 6399/ 8400 batches | train loss 0.5491645 +| epoch 2 | 6403/ 8400 batches | train loss 0.4123265 +| epoch 2 | 6407/ 8400 batches | train loss 0.4721980 +| epoch 2 | 6411/ 8400 batches | train loss 0.5284635 +| epoch 2 | 6415/ 8400 batches | train loss 0.5265663 +| epoch 2 | 6419/ 8400 batches | train loss 0.4079227 +| epoch 2 | 6423/ 8400 batches | train loss 0.4323049 +| epoch 2 | 6427/ 8400 batches | train loss 0.4429526 +| epoch 2 | 6431/ 8400 batches | train loss 0.3634422 +| epoch 2 | 6435/ 8400 batches | train loss 0.3322742 +| epoch 2 | 6439/ 8400 batches | train loss 0.3451969 +| epoch 2 | 6443/ 8400 batches | train loss 0.5045781 +| epoch 2 | 6447/ 8400 batches | train loss 0.4643139 +| epoch 2 | 6451/ 8400 batches | train loss 0.5714149 +| epoch 2 | 6455/ 8400 batches | train loss 0.3463938 +| epoch 2 | 6459/ 8400 batches | train loss 0.5491889 +| epoch 2 | 6463/ 8400 batches | train loss 0.3687078 +| epoch 2 | 6467/ 8400 batches | train loss 0.4477635 +| epoch 2 | 6471/ 8400 batches | train loss 0.4782694 +| epoch 2 | 6475/ 8400 batches | train loss 0.4222267 +| epoch 2 | 6479/ 8400 batches | train loss 0.4242595 +| epoch 2 | 6483/ 8400 batches | train loss 0.5322825 +| epoch 2 | 6487/ 8400 batches | train loss 0.4220080 +| epoch 2 | 6491/ 8400 batches | train loss 0.4537518 +| epoch 2 | 6495/ 8400 batches | train loss 0.3744826 +| epoch 2 | 6499/ 8400 batches | train loss 0.4836514 +| epoch 2 | 6503/ 8400 batches | train loss 0.4362672 +| epoch 2 | 6507/ 8400 batches | train loss 0.4683970 +| epoch 2 | 6511/ 8400 batches | train loss 0.4622715 +| epoch 2 | 6515/ 8400 batches | train loss 0.2953345 +| epoch 2 | 6519/ 8400 batches | train loss 0.4338967 +| epoch 2 | 6523/ 8400 batches | train loss 0.4115453 +| epoch 2 | 6527/ 8400 batches | train loss 0.4665183 +| epoch 2 | 6531/ 8400 batches | train loss 0.4756924 +| epoch 2 | 6535/ 8400 batches | train loss 0.4449427 +| epoch 2 | 6539/ 8400 batches | train loss 0.4599101 +| epoch 2 | 6543/ 8400 batches | train loss 0.4573697 +| epoch 2 | 6547/ 8400 batches | train loss 0.4775953 +| epoch 2 | 6551/ 8400 batches | train loss 0.4710222 +| epoch 2 | 6555/ 8400 batches | train loss 0.4669006 +| epoch 2 | 6559/ 8400 batches | train loss 0.4085632 +| epoch 2 | 6563/ 8400 batches | train loss 0.4657560 +| epoch 2 | 6567/ 8400 batches | train loss 0.4231465 +| epoch 2 | 6571/ 8400 batches | train loss 0.3868263 +| epoch 2 | 6575/ 8400 batches | train loss 0.4593869 +| epoch 2 | 6579/ 8400 batches | train loss 0.5126526 +| epoch 2 | 6583/ 8400 batches | train loss 0.3422080 +| epoch 2 | 6587/ 8400 batches | train loss 0.4800712 +| epoch 2 | 6591/ 8400 batches | train loss 0.5370846 +| epoch 2 | 6595/ 8400 batches | train loss 0.4559625 +| epoch 2 | 6599/ 8400 batches | train loss 0.4116556 +| epoch 2 | 6603/ 8400 batches | train loss 0.3421775 +| epoch 2 | 6607/ 8400 batches | train loss 0.4128254 +| epoch 2 | 6611/ 8400 batches | train loss 0.4193307 +| epoch 2 | 6615/ 8400 batches | train loss 0.4535975 +| epoch 2 | 6619/ 8400 batches | train loss 0.5119385 +| epoch 2 | 6623/ 8400 batches | train loss 0.3990396 +| epoch 2 | 6627/ 8400 batches | train loss 0.5136555 +| epoch 2 | 6631/ 8400 batches | train loss 0.4459628 +| epoch 2 | 6635/ 8400 batches | train loss 0.3940718 +| epoch 2 | 6639/ 8400 batches | train loss 0.3652437 +| epoch 2 | 6643/ 8400 batches | train loss 0.4542153 +| epoch 2 | 6647/ 8400 batches | train loss 0.3924188 +| epoch 2 | 6651/ 8400 batches | train loss 0.4826248 +| epoch 2 | 6655/ 8400 batches | train loss 0.4082083 +| epoch 2 | 6659/ 8400 batches | train loss 0.5111703 +| epoch 2 | 6663/ 8400 batches | train loss 0.3579967 +| epoch 2 | 6667/ 8400 batches | train loss 0.3979007 +| epoch 2 | 6671/ 8400 batches | train loss 0.4297503 +| epoch 2 | 6675/ 8400 batches | train loss 0.4076157 +| epoch 2 | 6679/ 8400 batches | train loss 0.4810241 +| epoch 2 | 6683/ 8400 batches | train loss 0.3466300 +| epoch 2 | 6687/ 8400 batches | train loss 0.4985346 +| epoch 2 | 6691/ 8400 batches | train loss 0.3297727 +| epoch 2 | 6695/ 8400 batches | train loss 0.4385538 +| epoch 2 | 6699/ 8400 batches | train loss 0.3609181 +| epoch 2 | 6703/ 8400 batches | train loss 0.3964626 +| epoch 2 | 6707/ 8400 batches | train loss 0.4460052 +| epoch 2 | 6711/ 8400 batches | train loss 0.4066135 +| epoch 2 | 6715/ 8400 batches | train loss 0.3828098 +| epoch 2 | 6719/ 8400 batches | train loss 0.5310951 +| epoch 2 | 6723/ 8400 batches | train loss 0.4153032 +| epoch 2 | 6727/ 8400 batches | train loss 0.4725240 +| epoch 2 | 6731/ 8400 batches | train loss 0.4904701 +| epoch 2 | 6735/ 8400 batches | train loss 0.4599610 +| epoch 2 | 6739/ 8400 batches | train loss 0.4378680 +| epoch 2 | 6743/ 8400 batches | train loss 0.3877022 +| epoch 2 | 6747/ 8400 batches | train loss 0.3650270 +| epoch 2 | 6751/ 8400 batches | train loss 0.3551787 +| epoch 2 | 6755/ 8400 batches | train loss 0.4496259 +| epoch 2 | 6759/ 8400 batches | train loss 0.4570082 +| epoch 2 | 6763/ 8400 batches | train loss 0.4232652 +| epoch 2 | 6767/ 8400 batches | train loss 0.3872012 +| epoch 2 | 6771/ 8400 batches | train loss 0.5287418 +| epoch 2 | 6775/ 8400 batches | train loss 0.4385306 +| epoch 2 | 6779/ 8400 batches | train loss 0.4307219 +| epoch 2 | 6783/ 8400 batches | train loss 0.4825148 +| epoch 2 | 6787/ 8400 batches | train loss 0.4756497 +| epoch 2 | 6791/ 8400 batches | train loss 0.5241013 +| epoch 2 | 6795/ 8400 batches | train loss 0.4585631 +| epoch 2 | 6799/ 8400 batches | train loss 0.4741077 +| epoch 2 | 6803/ 8400 batches | train loss 0.3632014 +| epoch 2 | 6807/ 8400 batches | train loss 0.3360876 +| epoch 2 | 6811/ 8400 batches | train loss 0.4705692 +| epoch 2 | 6815/ 8400 batches | train loss 0.4891838 +| epoch 2 | 6819/ 8400 batches | train loss 0.4295855 +| epoch 2 | 6823/ 8400 batches | train loss 0.4768055 +| epoch 2 | 6827/ 8400 batches | train loss 0.3811417 +| epoch 2 | 6831/ 8400 batches | train loss 0.5124766 +| epoch 2 | 6835/ 8400 batches | train loss 0.4237407 +| epoch 2 | 6839/ 8400 batches | train loss 0.4520408 +| epoch 2 | 6843/ 8400 batches | train loss 0.3972434 +| epoch 2 | 6847/ 8400 batches | train loss 0.5191001 +| epoch 2 | 6851/ 8400 batches | train loss 0.3852624 +| epoch 2 | 6855/ 8400 batches | train loss 0.4284990 +| epoch 2 | 6859/ 8400 batches | train loss 0.4996524 +| epoch 2 | 6863/ 8400 batches | train loss 0.4075267 +| epoch 2 | 6867/ 8400 batches | train loss 0.4013815 +| epoch 2 | 6871/ 8400 batches | train loss 0.3914483 +| epoch 2 | 6875/ 8400 batches | train loss 0.4134668 +| epoch 2 | 6879/ 8400 batches | train loss 0.4261007 +| epoch 2 | 6883/ 8400 batches | train loss 0.3241802 +| epoch 2 | 6887/ 8400 batches | train loss 0.4303287 +| epoch 2 | 6891/ 8400 batches | train loss 0.6668986 +| epoch 2 | 6895/ 8400 batches | train loss 0.5185063 +| epoch 2 | 6899/ 8400 batches | train loss 0.5274370 +| epoch 2 | 6903/ 8400 batches | train loss 0.4471864 +| epoch 2 | 6907/ 8400 batches | train loss 0.4818925 +| epoch 2 | 6911/ 8400 batches | train loss 0.4049334 +| epoch 2 | 6915/ 8400 batches | train loss 0.3712785 +| epoch 2 | 6919/ 8400 batches | train loss 0.3171133 +| epoch 2 | 6923/ 8400 batches | train loss 0.4088188 +| epoch 2 | 6927/ 8400 batches | train loss 0.3787465 +| epoch 2 | 6931/ 8400 batches | train loss 0.4218076 +| epoch 2 | 6935/ 8400 batches | train loss 0.4414844 +| epoch 2 | 6939/ 8400 batches | train loss 0.4202459 +| epoch 2 | 6943/ 8400 batches | train loss 0.4423471 +| epoch 2 | 6947/ 8400 batches | train loss 0.4978554 +| epoch 2 | 6951/ 8400 batches | train loss 0.4793210 +| epoch 2 | 6955/ 8400 batches | train loss 0.4084034 +| epoch 2 | 6959/ 8400 batches | train loss 0.4442290 +| epoch 2 | 6963/ 8400 batches | train loss 0.5867443 +| epoch 2 | 6967/ 8400 batches | train loss 0.4725111 +| epoch 2 | 6971/ 8400 batches | train loss 0.4128178 +| epoch 2 | 6975/ 8400 batches | train loss 0.4974557 +| epoch 2 | 6979/ 8400 batches | train loss 0.4791341 +| epoch 2 | 6983/ 8400 batches | train loss 0.3803162 +| epoch 2 | 6987/ 8400 batches | train loss 0.4643596 +| epoch 2 | 6991/ 8400 batches | train loss 0.4625736 +| epoch 2 | 6995/ 8400 batches | train loss 0.4555046 +| epoch 2 | 6999/ 8400 batches | train loss 0.4267131 +| epoch 2 | 7003/ 8400 batches | train loss 0.4304585 +| epoch 2 | 7007/ 8400 batches | train loss 0.3941030 +| epoch 2 | 7011/ 8400 batches | train loss 0.4482403 +| epoch 2 | 7015/ 8400 batches | train loss 0.3847388 +| epoch 2 | 7019/ 8400 batches | train loss 0.3666987 +| epoch 2 | 7023/ 8400 batches | train loss 0.3915593 +| epoch 2 | 7027/ 8400 batches | train loss 0.4398469 +| epoch 2 | 7031/ 8400 batches | train loss 0.4590636 +| epoch 2 | 7035/ 8400 batches | train loss 0.4612427 +| epoch 2 | 7039/ 8400 batches | train loss 0.4308408 +| epoch 2 | 7043/ 8400 batches | train loss 0.4091649 +| epoch 2 | 7047/ 8400 batches | train loss 0.4285401 +| epoch 2 | 7051/ 8400 batches | train loss 0.4012319 +| epoch 2 | 7055/ 8400 batches | train loss 0.5171798 +| epoch 2 | 7059/ 8400 batches | train loss 0.3635127 +| epoch 2 | 7063/ 8400 batches | train loss 0.3890100 +| epoch 2 | 7067/ 8400 batches | train loss 0.4276980 +| epoch 2 | 7071/ 8400 batches | train loss 0.4183370 +| epoch 2 | 7075/ 8400 batches | train loss 0.3928909 +| epoch 2 | 7079/ 8400 batches | train loss 0.4129559 +| epoch 2 | 7083/ 8400 batches | train loss 0.4193084 +| epoch 2 | 7087/ 8400 batches | train loss 0.4728698 +| epoch 2 | 7091/ 8400 batches | train loss 0.4015151 +| epoch 2 | 7095/ 8400 batches | train loss 0.3719270 +| epoch 2 | 7099/ 8400 batches | train loss 0.4460162 +| epoch 2 | 7103/ 8400 batches | train loss 0.4107914 +| epoch 2 | 7107/ 8400 batches | train loss 0.4480567 +| epoch 2 | 7111/ 8400 batches | train loss 0.4373131 +| epoch 2 | 7115/ 8400 batches | train loss 0.4593980 +| epoch 2 | 7119/ 8400 batches | train loss 0.5046185 +| epoch 2 | 7123/ 8400 batches | train loss 0.4994151 +| epoch 2 | 7127/ 8400 batches | train loss 0.4247710 +| epoch 2 | 7131/ 8400 batches | train loss 0.4699095 +| epoch 2 | 7135/ 8400 batches | train loss 0.3662687 +| epoch 2 | 7139/ 8400 batches | train loss 0.4386852 +| epoch 2 | 7143/ 8400 batches | train loss 0.3783248 +| epoch 2 | 7147/ 8400 batches | train loss 0.4926661 +| epoch 2 | 7151/ 8400 batches | train loss 0.4643103 +| epoch 2 | 7155/ 8400 batches | train loss 0.5037116 +| epoch 2 | 7159/ 8400 batches | train loss 0.4396274 +| epoch 2 | 7163/ 8400 batches | train loss 0.2921853 +| epoch 2 | 7167/ 8400 batches | train loss 0.4048859 +| epoch 2 | 7171/ 8400 batches | train loss 0.4542110 +| epoch 2 | 7175/ 8400 batches | train loss 0.5130676 +| epoch 2 | 7179/ 8400 batches | train loss 0.4874495 +| epoch 2 | 7183/ 8400 batches | train loss 0.4917848 +| epoch 2 | 7187/ 8400 batches | train loss 0.3938615 +| epoch 2 | 7191/ 8400 batches | train loss 0.3736802 +| epoch 2 | 7195/ 8400 batches | train loss 0.4121909 +| epoch 2 | 7199/ 8400 batches | train loss 0.3850673 +| epoch 2 | 7203/ 8400 batches | train loss 0.4286849 +| epoch 2 | 7207/ 8400 batches | train loss 0.4546300 +| epoch 2 | 7211/ 8400 batches | train loss 0.4019743 +| epoch 2 | 7215/ 8400 batches | train loss 0.4680196 +| epoch 2 | 7219/ 8400 batches | train loss 0.4200147 +| epoch 2 | 7223/ 8400 batches | train loss 0.5243983 +| epoch 2 | 7227/ 8400 batches | train loss 0.4174751 +| epoch 2 | 7231/ 8400 batches | train loss 0.3605863 +| epoch 2 | 7235/ 8400 batches | train loss 0.5106763 +| epoch 2 | 7239/ 8400 batches | train loss 0.4425638 +| epoch 2 | 7243/ 8400 batches | train loss 0.4232593 +| epoch 2 | 7247/ 8400 batches | train loss 0.3907430 +| epoch 2 | 7251/ 8400 batches | train loss 0.4255274 +| epoch 2 | 7255/ 8400 batches | train loss 0.4405202 +| epoch 2 | 7259/ 8400 batches | train loss 0.4017684 +| epoch 2 | 7263/ 8400 batches | train loss 0.4822419 +| epoch 2 | 7267/ 8400 batches | train loss 0.4709960 +| epoch 2 | 7271/ 8400 batches | train loss 0.5454205 +| epoch 2 | 7275/ 8400 batches | train loss 0.4563844 +| epoch 2 | 7279/ 8400 batches | train loss 0.2562997 +| epoch 2 | 7283/ 8400 batches | train loss 0.4877234 +| epoch 2 | 7287/ 8400 batches | train loss 0.4042388 +| epoch 2 | 7291/ 8400 batches | train loss 0.5638799 +| epoch 2 | 7295/ 8400 batches | train loss 0.4739606 +| epoch 2 | 7299/ 8400 batches | train loss 0.3743873 +| epoch 2 | 7303/ 8400 batches | train loss 0.5175897 +| epoch 2 | 7307/ 8400 batches | train loss 0.4369468 +| epoch 2 | 7311/ 8400 batches | train loss 0.4606816 +| epoch 2 | 7315/ 8400 batches | train loss 0.4336842 +| epoch 2 | 7319/ 8400 batches | train loss 0.4115458 +| epoch 2 | 7323/ 8400 batches | train loss 0.3866030 +| epoch 2 | 7327/ 8400 batches | train loss 0.4185141 +| epoch 2 | 7331/ 8400 batches | train loss 0.5273749 +| epoch 2 | 7335/ 8400 batches | train loss 0.4544299 +| epoch 2 | 7339/ 8400 batches | train loss 0.4647723 +| epoch 2 | 7343/ 8400 batches | train loss 0.3924540 +| epoch 2 | 7347/ 8400 batches | train loss 0.3636943 +| epoch 2 | 7351/ 8400 batches | train loss 0.4250437 +| epoch 2 | 7355/ 8400 batches | train loss 0.4711125 +| epoch 2 | 7359/ 8400 batches | train loss 0.3620801 +| epoch 2 | 7363/ 8400 batches | train loss 0.4529548 +| epoch 2 | 7367/ 8400 batches | train loss 0.3746335 +| epoch 2 | 7371/ 8400 batches | train loss 0.4058015 +| epoch 2 | 7375/ 8400 batches | train loss 0.3384687 +| epoch 2 | 7379/ 8400 batches | train loss 0.4773164 +| epoch 2 | 7383/ 8400 batches | train loss 0.4552816 +| epoch 2 | 7387/ 8400 batches | train loss 0.4443709 +| epoch 2 | 7391/ 8400 batches | train loss 0.4368008 +| epoch 2 | 7395/ 8400 batches | train loss 0.4024506 +| epoch 2 | 7399/ 8400 batches | train loss 0.3819963 +| epoch 2 | 7403/ 8400 batches | train loss 0.4913772 +| epoch 2 | 7407/ 8400 batches | train loss 0.4913197 +| epoch 2 | 7411/ 8400 batches | train loss 0.4297435 +| epoch 2 | 7415/ 8400 batches | train loss 0.5736839 +| epoch 2 | 7419/ 8400 batches | train loss 0.4384253 +| epoch 2 | 7423/ 8400 batches | train loss 0.4143513 +| epoch 2 | 7427/ 8400 batches | train loss 0.4351333 +| epoch 2 | 7431/ 8400 batches | train loss 0.4212862 +| epoch 2 | 7435/ 8400 batches | train loss 0.4093168 +| epoch 2 | 7439/ 8400 batches | train loss 0.4954488 +| epoch 2 | 7443/ 8400 batches | train loss 0.4288616 +| epoch 2 | 7447/ 8400 batches | train loss 0.3414128 +| epoch 2 | 7451/ 8400 batches | train loss 0.2922701 +| epoch 2 | 7455/ 8400 batches | train loss 0.4513274 +| epoch 2 | 7459/ 8400 batches | train loss 0.4398248 +| epoch 2 | 7463/ 8400 batches | train loss 0.3442312 +| epoch 2 | 7467/ 8400 batches | train loss 0.4640820 +| epoch 2 | 7471/ 8400 batches | train loss 0.4181828 +| epoch 2 | 7475/ 8400 batches | train loss 0.5049767 +| epoch 2 | 7479/ 8400 batches | train loss 0.5190731 +| epoch 2 | 7483/ 8400 batches | train loss 0.4892808 +| epoch 2 | 7487/ 8400 batches | train loss 0.4751779 +| epoch 2 | 7491/ 8400 batches | train loss 0.4247923 +| epoch 2 | 7495/ 8400 batches | train loss 0.4094692 +| epoch 2 | 7499/ 8400 batches | train loss 0.4555722 +| epoch 2 | 7503/ 8400 batches | train loss 0.4847822 +| epoch 2 | 7507/ 8400 batches | train loss 0.4076359 +| epoch 2 | 7511/ 8400 batches | train loss 0.5205165 +| epoch 2 | 7515/ 8400 batches | train loss 0.3262413 +| epoch 2 | 7519/ 8400 batches | train loss 0.4158912 +| epoch 2 | 7523/ 8400 batches | train loss 0.4610707 +| epoch 2 | 7527/ 8400 batches | train loss 0.4720271 +| epoch 2 | 7531/ 8400 batches | train loss 0.4529773 +| epoch 2 | 7535/ 8400 batches | train loss 0.4492773 +| epoch 2 | 7539/ 8400 batches | train loss 0.4957269 +| epoch 2 | 7543/ 8400 batches | train loss 0.3855027 +| epoch 2 | 7547/ 8400 batches | train loss 0.3098511 +| epoch 2 | 7551/ 8400 batches | train loss 0.5082740 +| epoch 2 | 7555/ 8400 batches | train loss 0.5054609 +| epoch 2 | 7559/ 8400 batches | train loss 0.3731389 +| epoch 2 | 7563/ 8400 batches | train loss 0.2713947 +| epoch 2 | 7567/ 8400 batches | train loss 0.4605697 +| epoch 2 | 7571/ 8400 batches | train loss 0.4592023 +| epoch 2 | 7575/ 8400 batches | train loss 0.4494402 +| epoch 2 | 7579/ 8400 batches | train loss 0.4328233 +| epoch 2 | 7583/ 8400 batches | train loss 0.3087216 +| epoch 2 | 7587/ 8400 batches | train loss 0.5018719 +| epoch 2 | 7591/ 8400 batches | train loss 0.4821679 +| epoch 2 | 7595/ 8400 batches | train loss 0.4985678 +| epoch 2 | 7599/ 8400 batches | train loss 0.5800334 +| epoch 2 | 7603/ 8400 batches | train loss 0.3588495 +| epoch 2 | 7607/ 8400 batches | train loss 0.4252089 +| epoch 2 | 7611/ 8400 batches | train loss 0.3809231 +| epoch 2 | 7615/ 8400 batches | train loss 0.4785965 +| epoch 2 | 7619/ 8400 batches | train loss 0.4535257 +| epoch 2 | 7623/ 8400 batches | train loss 0.4373708 +| epoch 2 | 7627/ 8400 batches | train loss 0.4222895 +| epoch 2 | 7631/ 8400 batches | train loss 0.4069597 +| epoch 2 | 7635/ 8400 batches | train loss 0.5601982 +| epoch 2 | 7639/ 8400 batches | train loss 0.4648648 +| epoch 2 | 7643/ 8400 batches | train loss 0.3683262 +| epoch 2 | 7647/ 8400 batches | train loss 0.3782251 +| epoch 2 | 7651/ 8400 batches | train loss 0.4531475 +| epoch 2 | 7655/ 8400 batches | train loss 0.4757580 +| epoch 2 | 7659/ 8400 batches | train loss 0.4016430 +| epoch 2 | 7663/ 8400 batches | train loss 0.4088833 +| epoch 2 | 7667/ 8400 batches | train loss 0.4091849 +| epoch 2 | 7671/ 8400 batches | train loss 0.4058450 +| epoch 2 | 7675/ 8400 batches | train loss 0.4230076 +| epoch 2 | 7679/ 8400 batches | train loss 0.4309586 +| epoch 2 | 7683/ 8400 batches | train loss 0.4301428 +| epoch 2 | 7687/ 8400 batches | train loss 0.5059662 +| epoch 2 | 7691/ 8400 batches | train loss 0.4723625 +| epoch 2 | 7695/ 8400 batches | train loss 0.4345090 +| epoch 2 | 7699/ 8400 batches | train loss 0.4502392 +| epoch 2 | 7703/ 8400 batches | train loss 0.4025121 +| epoch 2 | 7707/ 8400 batches | train loss 0.4112363 +| epoch 2 | 7711/ 8400 batches | train loss 0.4545785 +| epoch 2 | 7715/ 8400 batches | train loss 0.3264987 +| epoch 2 | 7719/ 8400 batches | train loss 0.4646623 +| epoch 2 | 7723/ 8400 batches | train loss 0.4302696 +| epoch 2 | 7727/ 8400 batches | train loss 0.4420041 +| epoch 2 | 7731/ 8400 batches | train loss 0.3493756 +| epoch 2 | 7735/ 8400 batches | train loss 0.4532015 +| epoch 2 | 7739/ 8400 batches | train loss 0.4172077 +| epoch 2 | 7743/ 8400 batches | train loss 0.4733063 +| epoch 2 | 7747/ 8400 batches | train loss 0.4315346 +| epoch 2 | 7751/ 8400 batches | train loss 0.4780881 +| epoch 2 | 7755/ 8400 batches | train loss 0.4072280 +| epoch 2 | 7759/ 8400 batches | train loss 0.4099395 +| epoch 2 | 7763/ 8400 batches | train loss 0.4373848 +| epoch 2 | 7767/ 8400 batches | train loss 0.4830437 +| epoch 2 | 7771/ 8400 batches | train loss 0.5525598 +| epoch 2 | 7775/ 8400 batches | train loss 0.3653696 +| epoch 2 | 7779/ 8400 batches | train loss 0.4109278 +| epoch 2 | 7783/ 8400 batches | train loss 0.3732586 +| epoch 2 | 7787/ 8400 batches | train loss 0.4646640 +| epoch 2 | 7791/ 8400 batches | train loss 0.4906887 +| epoch 2 | 7795/ 8400 batches | train loss 0.4518847 +| epoch 2 | 7799/ 8400 batches | train loss 0.4317854 +| epoch 2 | 7803/ 8400 batches | train loss 0.5443358 +| epoch 2 | 7807/ 8400 batches | train loss 0.5149982 +| epoch 2 | 7811/ 8400 batches | train loss 0.4555129 +| epoch 2 | 7815/ 8400 batches | train loss 0.5974020 +| epoch 2 | 7819/ 8400 batches | train loss 0.4126021 +| epoch 2 | 7823/ 8400 batches | train loss 0.4199046 +| epoch 2 | 7827/ 8400 batches | train loss 0.4396512 +| epoch 2 | 7831/ 8400 batches | train loss 0.4285427 +| epoch 2 | 7835/ 8400 batches | train loss 0.4276830 +| epoch 2 | 7839/ 8400 batches | train loss 0.3524906 +| epoch 2 | 7843/ 8400 batches | train loss 0.4047725 +| epoch 2 | 7847/ 8400 batches | train loss 0.2236762 +| epoch 2 | 7851/ 8400 batches | train loss 0.4660423 +| epoch 2 | 7855/ 8400 batches | train loss 0.4508879 +| epoch 2 | 7859/ 8400 batches | train loss 0.4449871 +| epoch 2 | 7863/ 8400 batches | train loss 0.5057083 +| epoch 2 | 7867/ 8400 batches | train loss 0.3627775 +| epoch 2 | 7871/ 8400 batches | train loss 0.3727787 +| epoch 2 | 7875/ 8400 batches | train loss 0.4192485 +| epoch 2 | 7879/ 8400 batches | train loss 0.4133629 +| epoch 2 | 7883/ 8400 batches | train loss 0.4204447 +| epoch 2 | 7887/ 8400 batches | train loss 0.3438849 +| epoch 2 | 7891/ 8400 batches | train loss 0.5811048 +| epoch 2 | 7895/ 8400 batches | train loss 0.5527893 +| epoch 2 | 7899/ 8400 batches | train loss 0.4648438 +| epoch 2 | 7903/ 8400 batches | train loss 0.4002014 +| epoch 2 | 7907/ 8400 batches | train loss 0.3939990 +| epoch 2 | 7911/ 8400 batches | train loss 0.4063644 +| epoch 2 | 7915/ 8400 batches | train loss 0.4101642 +| epoch 2 | 7919/ 8400 batches | train loss 0.4135553 +| epoch 2 | 7923/ 8400 batches | train loss 0.5272847 +| epoch 2 | 7927/ 8400 batches | train loss 0.3969342 +| epoch 2 | 7931/ 8400 batches | train loss 0.4822612 +| epoch 2 | 7935/ 8400 batches | train loss 0.3837664 +| epoch 2 | 7939/ 8400 batches | train loss 0.3632128 +| epoch 2 | 7943/ 8400 batches | train loss 0.4391234 +| epoch 2 | 7947/ 8400 batches | train loss 0.3430632 +| epoch 2 | 7951/ 8400 batches | train loss 0.3890565 +| epoch 2 | 7955/ 8400 batches | train loss 0.5298819 +| epoch 2 | 7959/ 8400 batches | train loss 0.4882834 +| epoch 2 | 7963/ 8400 batches | train loss 0.3924131 +| epoch 2 | 7967/ 8400 batches | train loss 0.4750591 +| epoch 2 | 7971/ 8400 batches | train loss 0.4734011 +| epoch 2 | 7975/ 8400 batches | train loss 0.5389587 +| epoch 2 | 7979/ 8400 batches | train loss 0.4546551 +| epoch 2 | 7983/ 8400 batches | train loss 0.5174361 +| epoch 2 | 7987/ 8400 batches | train loss 0.4958332 +| epoch 2 | 7991/ 8400 batches | train loss 0.4559325 +| epoch 2 | 7995/ 8400 batches | train loss 0.3952898 +| epoch 2 | 7999/ 8400 batches | train loss 0.4833924 +| epoch 2 | 8003/ 8400 batches | train loss 0.4855407 +| epoch 2 | 8007/ 8400 batches | train loss 0.4102639 +| epoch 2 | 8011/ 8400 batches | train loss 0.3824531 +| epoch 2 | 8015/ 8400 batches | train loss 0.4570545 +| epoch 2 | 8019/ 8400 batches | train loss 0.4582804 +| epoch 2 | 8023/ 8400 batches | train loss 0.4836689 +| epoch 2 | 8027/ 8400 batches | train loss 0.4194646 +| epoch 2 | 8031/ 8400 batches | train loss 0.4221246 +| epoch 2 | 8035/ 8400 batches | train loss 0.4892279 +| epoch 2 | 8039/ 8400 batches | train loss 0.4252645 +| epoch 2 | 8043/ 8400 batches | train loss 0.4529178 +| epoch 2 | 8047/ 8400 batches | train loss 0.4376356 +| epoch 2 | 8051/ 8400 batches | train loss 0.5080457 +| epoch 2 | 8055/ 8400 batches | train loss 0.3796359 +| epoch 2 | 8059/ 8400 batches | train loss 0.3932885 +| epoch 2 | 8063/ 8400 batches | train loss 0.3656267 +| epoch 2 | 8067/ 8400 batches | train loss 0.4664707 +| epoch 2 | 8071/ 8400 batches | train loss 0.4626297 +| epoch 2 | 8075/ 8400 batches | train loss 0.4754076 +| epoch 2 | 8079/ 8400 batches | train loss 0.4173518 +| epoch 2 | 8083/ 8400 batches | train loss 0.3944663 +| epoch 2 | 8087/ 8400 batches | train loss 0.4873048 +| epoch 2 | 8091/ 8400 batches | train loss 0.4304654 +| epoch 2 | 8095/ 8400 batches | train loss 0.5082889 +| epoch 2 | 8099/ 8400 batches | train loss 0.3933712 +| epoch 2 | 8103/ 8400 batches | train loss 0.4592372 +| epoch 2 | 8107/ 8400 batches | train loss 0.3783329 +| epoch 2 | 8111/ 8400 batches | train loss 0.3940658 +| epoch 2 | 8115/ 8400 batches | train loss 0.5138921 +| epoch 2 | 8119/ 8400 batches | train loss 0.4126198 +| epoch 2 | 8123/ 8400 batches | train loss 0.4360863 +| epoch 2 | 8127/ 8400 batches | train loss 0.4016995 +| epoch 2 | 8131/ 8400 batches | train loss 0.3609138 +| epoch 2 | 8135/ 8400 batches | train loss 0.5206388 +| epoch 2 | 8139/ 8400 batches | train loss 0.4335110 +| epoch 2 | 8143/ 8400 batches | train loss 0.4519268 +| epoch 2 | 8147/ 8400 batches | train loss 0.4058818 +| epoch 2 | 8151/ 8400 batches | train loss 0.4460497 +| epoch 2 | 8155/ 8400 batches | train loss 0.4280242 +| epoch 2 | 8159/ 8400 batches | train loss 0.4440992 +| epoch 2 | 8163/ 8400 batches | train loss 0.5024058 +| epoch 2 | 8167/ 8400 batches | train loss 0.4687991 +| epoch 2 | 8171/ 8400 batches | train loss 0.4452318 +| epoch 2 | 8175/ 8400 batches | train loss 0.4323971 +| epoch 2 | 8179/ 8400 batches | train loss 0.3925655 +| epoch 2 | 8183/ 8400 batches | train loss 0.3829882 +| epoch 2 | 8187/ 8400 batches | train loss 0.4271670 +| epoch 2 | 8191/ 8400 batches | train loss 0.4277080 +| epoch 2 | 8195/ 8400 batches | train loss 0.5036961 +| epoch 2 | 8199/ 8400 batches | train loss 0.3855321 +| epoch 2 | 8203/ 8400 batches | train loss 0.6859992 +| epoch 2 | 8207/ 8400 batches | train loss 0.4891511 +| epoch 2 | 8211/ 8400 batches | train loss 0.4604536 +| epoch 2 | 8215/ 8400 batches | train loss 0.5171624 +| epoch 2 | 8219/ 8400 batches | train loss 0.4180624 +| epoch 2 | 8223/ 8400 batches | train loss 0.5666121 +| epoch 2 | 8227/ 8400 batches | train loss 0.5120658 +| epoch 2 | 8231/ 8400 batches | train loss 0.4421765 +| epoch 2 | 8235/ 8400 batches | train loss 0.4574822 +| epoch 2 | 8239/ 8400 batches | train loss 0.5098551 +| epoch 2 | 8243/ 8400 batches | train loss 0.3701166 +| epoch 2 | 8247/ 8400 batches | train loss 0.3300191 +| epoch 2 | 8251/ 8400 batches | train loss 0.4826222 +| epoch 2 | 8255/ 8400 batches | train loss 0.3807100 +| epoch 2 | 8259/ 8400 batches | train loss 0.4190627 +| epoch 2 | 8263/ 8400 batches | train loss 0.3935230 +| epoch 2 | 8267/ 8400 batches | train loss 0.3695375 +| epoch 2 | 8271/ 8400 batches | train loss 0.4515898 +| epoch 2 | 8275/ 8400 batches | train loss 0.4960879 +| epoch 2 | 8279/ 8400 batches | train loss 0.3968720 +| epoch 2 | 8283/ 8400 batches | train loss 0.3766199 +| epoch 2 | 8287/ 8400 batches | train loss 0.4910249 +| epoch 2 | 8291/ 8400 batches | train loss 0.3947165 +| epoch 2 | 8295/ 8400 batches | train loss 0.3725588 +| epoch 2 | 8299/ 8400 batches | train loss 0.4272549 +| epoch 2 | 8303/ 8400 batches | train loss 0.4667849 +| epoch 2 | 8307/ 8400 batches | train loss 0.5021523 +| epoch 2 | 8311/ 8400 batches | train loss 0.5092399 +| epoch 2 | 8315/ 8400 batches | train loss 0.5729920 +| epoch 2 | 8319/ 8400 batches | train loss 0.3172703 +| epoch 2 | 8323/ 8400 batches | train loss 0.4878427 +| epoch 2 | 8327/ 8400 batches | train loss 0.4684072 +| epoch 2 | 8331/ 8400 batches | train loss 0.4283581 +| epoch 2 | 8335/ 8400 batches | train loss 0.5636411 +| epoch 2 | 8339/ 8400 batches | train loss 0.3906810 +| epoch 2 | 8343/ 8400 batches | train loss 0.5060986 +| epoch 2 | 8347/ 8400 batches | train loss 0.4087880 +| epoch 2 | 8351/ 8400 batches | train loss 0.3997197 +| epoch 2 | 8355/ 8400 batches | train loss 0.4348993 +| epoch 2 | 8359/ 8400 batches | train loss 0.5096422 +| epoch 2 | 8363/ 8400 batches | train loss 0.4212902 +| epoch 2 | 8367/ 8400 batches | train loss 0.4100126 +| epoch 2 | 8371/ 8400 batches | train loss 0.4153573 +| epoch 2 | 8375/ 8400 batches | train loss 0.4703512 +| epoch 2 | 8379/ 8400 batches | train loss 0.4345771 +| epoch 2 | 8383/ 8400 batches | train loss 0.3726317 +| epoch 2 | 8387/ 8400 batches | train loss 0.3949343 +| epoch 2 | 8391/ 8400 batches | train loss 0.5364680 +| epoch 2 | 8395/ 8400 batches | train loss 0.3587578 +| epoch 2 | 8399/ 8400 batches | train loss 0.5236561 +-------------------------------------------------------------------------------- +| epoch 2 | 3/ 8400 batches | test loss 0.4847508 +| epoch 2 | 7/ 8400 batches | test loss 0.3653224 +| epoch 2 | 11/ 8400 batches | test loss 0.1980260 +| epoch 2 | 15/ 8400 batches | test loss 0.4411439 +| epoch 2 | 19/ 8400 batches | test loss 0.4964648 +| epoch 2 | 23/ 8400 batches | test loss 0.3708342 +| epoch 2 | 27/ 8400 batches | test loss 0.4242280 +| epoch 2 | 31/ 8400 batches | test loss 0.4459499 +| epoch 2 | 35/ 8400 batches | test loss 0.4402525 +| epoch 2 | 39/ 8400 batches | test loss 0.3955732 +| epoch 2 | 43/ 8400 batches | test loss 0.4517928 +| epoch 2 | 47/ 8400 batches | test loss 0.3534632 +| epoch 2 | 51/ 8400 batches | test loss 0.4209988 +| epoch 2 | 55/ 8400 batches | test loss 0.3919137 +| epoch 2 | 59/ 8400 batches | test loss 0.5451552 +| epoch 2 | 63/ 8400 batches | test loss 0.4803168 +| epoch 2 | 67/ 8400 batches | test loss 0.4542006 +| epoch 2 | 71/ 8400 batches | test loss 0.4761653 +| epoch 2 | 75/ 8400 batches | test loss 0.3965576 +| epoch 2 | 79/ 8400 batches | test loss 0.4640288 +| epoch 2 | 83/ 8400 batches | test loss 0.4978254 +| epoch 2 | 87/ 8400 batches | test loss 0.3670468 +| epoch 2 | 91/ 8400 batches | test loss 0.3849856 +| epoch 2 | 95/ 8400 batches | test loss 0.4170167 +| epoch 2 | 99/ 8400 batches | test loss 0.4110425 +| epoch 2 | 103/ 8400 batches | test loss 0.1744907 +| epoch 2 | 107/ 8400 batches | test loss 0.5492438 +| epoch 2 | 111/ 8400 batches | test loss 0.3795059 +| epoch 2 | 115/ 8400 batches | test loss 0.4571554 +| epoch 2 | 119/ 8400 batches | test loss 0.4812487 +| epoch 2 | 123/ 8400 batches | test loss 0.4515166 +| epoch 2 | 127/ 8400 batches | test loss 0.4136388 +| epoch 2 | 131/ 8400 batches | test loss 0.5243822 +| epoch 2 | 135/ 8400 batches | test loss 0.4014806 +| epoch 2 | 139/ 8400 batches | test loss 0.4087412 +| epoch 2 | 143/ 8400 batches | test loss 0.4877242 +| epoch 2 | 147/ 8400 batches | test loss 0.4158931 +| epoch 2 | 151/ 8400 batches | test loss 0.4201264 +| epoch 2 | 155/ 8400 batches | test loss 0.3791307 +| epoch 2 | 159/ 8400 batches | test loss 0.4148852 +| epoch 2 | 163/ 8400 batches | test loss 0.4179410 +| epoch 2 | 167/ 8400 batches | test loss 0.5183903 +| epoch 2 | 171/ 8400 batches | test loss 0.4938429 +| epoch 2 | 175/ 8400 batches | test loss 0.4229059 +| epoch 2 | 179/ 8400 batches | test loss 0.4085885 +| epoch 2 | 183/ 8400 batches | test loss 0.4601756 +| epoch 2 | 187/ 8400 batches | test loss 0.4901630 +| epoch 2 | 191/ 8400 batches | test loss 0.4454888 +| epoch 2 | 195/ 8400 batches | test loss 0.5479761 +| epoch 2 | 199/ 8400 batches | test loss 0.4668353 +| epoch 2 | 203/ 8400 batches | test loss 0.4416052 +| epoch 2 | 207/ 8400 batches | test loss 0.4608553 +| epoch 2 | 211/ 8400 batches | test loss 0.4577955 +| epoch 2 | 215/ 8400 batches | test loss 0.4269148 +| epoch 2 | 219/ 8400 batches | test loss 0.4206091 +| epoch 2 | 223/ 8400 batches | test loss 0.4794655 +| epoch 2 | 227/ 8400 batches | test loss 0.4471678 +| epoch 2 | 231/ 8400 batches | test loss 0.4618847 +| epoch 2 | 235/ 8400 batches | test loss 0.4322526 +| epoch 2 | 239/ 8400 batches | test loss 0.4268472 +| epoch 2 | 243/ 8400 batches | test loss 0.4346126 +| epoch 2 | 247/ 8400 batches | test loss 0.4550204 +| epoch 2 | 251/ 8400 batches | test loss 0.3787837 +| epoch 2 | 255/ 8400 batches | test loss 0.4370987 +| epoch 2 | 259/ 8400 batches | test loss 0.3634155 +| epoch 2 | 263/ 8400 batches | test loss 0.3658380 +| epoch 2 | 267/ 8400 batches | test loss 0.4521217 +| epoch 2 | 271/ 8400 batches | test loss 0.3238748 +| epoch 2 | 275/ 8400 batches | test loss 0.4638857 +| epoch 2 | 279/ 8400 batches | test loss 0.4641461 +| epoch 2 | 283/ 8400 batches | test loss 0.5918404 +| epoch 2 | 287/ 8400 batches | test loss 0.4334638 +| epoch 2 | 291/ 8400 batches | test loss 0.3877937 +| epoch 2 | 295/ 8400 batches | test loss 0.4779786 +| epoch 2 | 299/ 8400 batches | test loss 0.5486596 +| epoch 2 | 303/ 8400 batches | test loss 0.4588679 +| epoch 2 | 307/ 8400 batches | test loss 0.4348928 +| epoch 2 | 311/ 8400 batches | test loss 0.4592320 +| epoch 2 | 315/ 8400 batches | test loss 0.4156356 +| epoch 2 | 319/ 8400 batches | test loss 0.4533148 +| epoch 2 | 323/ 8400 batches | test loss 0.4507282 +| epoch 2 | 327/ 8400 batches | test loss 0.4170481 +| epoch 2 | 331/ 8400 batches | test loss 0.2736739 +| epoch 2 | 335/ 8400 batches | test loss 0.4095398 +| epoch 2 | 339/ 8400 batches | test loss 0.3912836 +| epoch 2 | 343/ 8400 batches | test loss 0.4804849 +| epoch 2 | 347/ 8400 batches | test loss 0.3922172 +| epoch 2 | 351/ 8400 batches | test loss 0.4551689 +| epoch 2 | 355/ 8400 batches | test loss 0.5170163 +| epoch 2 | 359/ 8400 batches | test loss 0.4081990 +| epoch 2 | 363/ 8400 batches | test loss 0.4134353 +| epoch 2 | 367/ 8400 batches | test loss 0.2970905 +| epoch 2 | 371/ 8400 batches | test loss 0.3204580 +| epoch 2 | 375/ 8400 batches | test loss 0.3512537 +| epoch 2 | 379/ 8400 batches | test loss 0.5208926 +| epoch 2 | 383/ 8400 batches | test loss 0.4156558 +| epoch 2 | 387/ 8400 batches | test loss 0.3984537 +| epoch 2 | 391/ 8400 batches | test loss 0.3443584 +| epoch 2 | 395/ 8400 batches | test loss 0.4668775 +| epoch 2 | 399/ 8400 batches | test loss 0.3808827 +| epoch 2 | 403/ 8400 batches | test loss 0.5238194 +| epoch 2 | 407/ 8400 batches | test loss 0.4733370 +| epoch 2 | 411/ 8400 batches | test loss 0.4631205 +| epoch 2 | 415/ 8400 batches | test loss 0.4793800 +| epoch 2 | 419/ 8400 batches | test loss 0.3230661 +| epoch 2 | 423/ 8400 batches | test loss 0.4822882 +| epoch 2 | 427/ 8400 batches | test loss 0.4013682 +| epoch 2 | 431/ 8400 batches | test loss 0.3972542 +| epoch 2 | 435/ 8400 batches | test loss 0.4602132 +| epoch 2 | 439/ 8400 batches | test loss 0.3958151 +| epoch 2 | 443/ 8400 batches | test loss 0.4532790 +| epoch 2 | 447/ 8400 batches | test loss 0.5044289 +| epoch 2 | 451/ 8400 batches | test loss 0.3391435 +| epoch 2 | 455/ 8400 batches | test loss 0.4307181 +| epoch 2 | 459/ 8400 batches | test loss 0.4637157 +| epoch 2 | 463/ 8400 batches | test loss 0.3822381 +| epoch 2 | 467/ 8400 batches | test loss 0.4423759 +| epoch 2 | 471/ 8400 batches | test loss 0.4523017 +| epoch 2 | 475/ 8400 batches | test loss 0.3287528 +| epoch 2 | 479/ 8400 batches | test loss 0.4894750 +| epoch 2 | 483/ 8400 batches | test loss 0.4089992 +| epoch 2 | 487/ 8400 batches | test loss 0.5056423 +| epoch 2 | 491/ 8400 batches | test loss 0.5017703 +| epoch 2 | 495/ 8400 batches | test loss 0.4337994 +| epoch 2 | 499/ 8400 batches | test loss 0.4524754 +| epoch 2 | 503/ 8400 batches | test loss 0.3953378 +| epoch 2 | 507/ 8400 batches | test loss 0.5543277 +| epoch 2 | 511/ 8400 batches | test loss 0.4536319 +| epoch 2 | 515/ 8400 batches | test loss 0.5071777 +| epoch 2 | 519/ 8400 batches | test loss 0.4784953 +| epoch 2 | 523/ 8400 batches | test loss 0.4104493 +| epoch 2 | 527/ 8400 batches | test loss 0.3386659 +| epoch 2 | 531/ 8400 batches | test loss 0.5211295 +| epoch 2 | 535/ 8400 batches | test loss 0.4305915 +| epoch 2 | 539/ 8400 batches | test loss 0.4830220 +| epoch 2 | 543/ 8400 batches | test loss 0.4139406 +| epoch 2 | 547/ 8400 batches | test loss 0.4319276 +| epoch 2 | 551/ 8400 batches | test loss 0.4034017 +| epoch 2 | 555/ 8400 batches | test loss 0.5197914 +| epoch 2 | 559/ 8400 batches | test loss 0.4375202 +| epoch 2 | 563/ 8400 batches | test loss 0.4537227 +| epoch 2 | 567/ 8400 batches | test loss 0.4526184 +| epoch 2 | 571/ 8400 batches | test loss 0.4999759 +| epoch 2 | 575/ 8400 batches | test loss 0.5149099 +| epoch 2 | 579/ 8400 batches | test loss 0.3606366 +| epoch 2 | 583/ 8400 batches | test loss 0.3927469 +| epoch 2 | 587/ 8400 batches | test loss 0.3725702 +| epoch 2 | 591/ 8400 batches | test loss 0.4250820 +| epoch 2 | 595/ 8400 batches | test loss 0.4429562 +| epoch 2 | 599/ 8400 batches | test loss 0.3533447 +| epoch 2 | 603/ 8400 batches | test loss 0.3917531 +| epoch 2 | 607/ 8400 batches | test loss 0.5235780 +| epoch 2 | 611/ 8400 batches | test loss 0.4703154 +| epoch 2 | 615/ 8400 batches | test loss 0.4314028 +| epoch 2 | 619/ 8400 batches | test loss 0.5057480 +| epoch 2 | 623/ 8400 batches | test loss 0.4537682 +| epoch 2 | 627/ 8400 batches | test loss 0.4034238 +| epoch 2 | 631/ 8400 batches | test loss 0.4332489 +| epoch 2 | 635/ 8400 batches | test loss 0.4965527 +| epoch 2 | 639/ 8400 batches | test loss 0.3936609 +| epoch 2 | 643/ 8400 batches | test loss 0.3994475 +| epoch 2 | 647/ 8400 batches | test loss 0.3991082 +| epoch 2 | 651/ 8400 batches | test loss 0.4386994 +| epoch 2 | 655/ 8400 batches | test loss 0.4575856 +| epoch 2 | 659/ 8400 batches | test loss 0.4676037 +| epoch 2 | 663/ 8400 batches | test loss 0.4224727 +| epoch 2 | 667/ 8400 batches | test loss 0.4132534 +| epoch 2 | 671/ 8400 batches | test loss 0.5012492 +| epoch 2 | 675/ 8400 batches | test loss 0.4250906 +| epoch 2 | 679/ 8400 batches | test loss 0.4463664 +| epoch 2 | 683/ 8400 batches | test loss 0.4788580 +| epoch 2 | 687/ 8400 batches | test loss 0.4629843 +| epoch 2 | 691/ 8400 batches | test loss 0.4417134 +| epoch 2 | 695/ 8400 batches | test loss 0.4144459 +| epoch 2 | 699/ 8400 batches | test loss 0.4689002 +| epoch 2 | 703/ 8400 batches | test loss 0.4386952 +| epoch 2 | 707/ 8400 batches | test loss 0.4324512 +| epoch 2 | 711/ 8400 batches | test loss 0.4579524 +| epoch 2 | 715/ 8400 batches | test loss 0.4043927 +| epoch 2 | 719/ 8400 batches | test loss 0.3489857 +| epoch 2 | 723/ 8400 batches | test loss 0.4402595 +| epoch 2 | 727/ 8400 batches | test loss 0.4601274 +| epoch 2 | 731/ 8400 batches | test loss 0.4405244 +| epoch 2 | 735/ 8400 batches | test loss 0.4388878 +| epoch 2 | 739/ 8400 batches | test loss 0.3573432 +| epoch 2 | 743/ 8400 batches | test loss 0.3606683 +| epoch 2 | 747/ 8400 batches | test loss 0.4862258 +| epoch 2 | 751/ 8400 batches | test loss 0.5449278 +| epoch 2 | 755/ 8400 batches | test loss 0.3844610 +| epoch 2 | 759/ 8400 batches | test loss 0.4372624 +| epoch 2 | 763/ 8400 batches | test loss 0.4857170 +| epoch 2 | 767/ 8400 batches | test loss 0.4576598 +| epoch 2 | 771/ 8400 batches | test loss 0.5353319 +| epoch 2 | 775/ 8400 batches | test loss 0.3895023 +| epoch 2 | 779/ 8400 batches | test loss 0.4155867 +| epoch 2 | 783/ 8400 batches | test loss 0.4738109 +| epoch 2 | 787/ 8400 batches | test loss 0.4788624 +| epoch 2 | 791/ 8400 batches | test loss 0.5010348 +| epoch 2 | 795/ 8400 batches | test loss 0.4574017 +| epoch 2 | 799/ 8400 batches | test loss 0.4525063 +| epoch 2 | 803/ 8400 batches | test loss 0.3988197 +| epoch 2 | 807/ 8400 batches | test loss 0.3523614 +| epoch 2 | 811/ 8400 batches | test loss 0.4570324 +| epoch 2 | 815/ 8400 batches | test loss 0.3543860 +| epoch 2 | 819/ 8400 batches | test loss 0.4281558 +| epoch 2 | 823/ 8400 batches | test loss 0.4440421 +| epoch 2 | 827/ 8400 batches | test loss 0.4083322 +| epoch 2 | 831/ 8400 batches | test loss 0.4994066 +| epoch 2 | 835/ 8400 batches | test loss 0.4577960 +| epoch 2 | 839/ 8400 batches | test loss 0.4293430 +| epoch 2 | 843/ 8400 batches | test loss 0.4397605 +| epoch 2 | 847/ 8400 batches | test loss 0.3282270 +| epoch 2 | 851/ 8400 batches | test loss 0.5632775 +| epoch 2 | 855/ 8400 batches | test loss 0.4727374 +| epoch 2 | 859/ 8400 batches | test loss 0.5061570 +| epoch 2 | 863/ 8400 batches | test loss 0.4437107 +| epoch 2 | 867/ 8400 batches | test loss 0.3492931 +| epoch 2 | 871/ 8400 batches | test loss 0.4438059 +| epoch 2 | 875/ 8400 batches | test loss 0.5134820 +| epoch 2 | 879/ 8400 batches | test loss 0.4463922 +| epoch 2 | 883/ 8400 batches | test loss 0.4249644 +| epoch 2 | 887/ 8400 batches | test loss 0.4932758 +| epoch 2 | 891/ 8400 batches | test loss 0.3643948 +| epoch 2 | 895/ 8400 batches | test loss 0.4508461 +| epoch 2 | 899/ 8400 batches | test loss 0.4397899 +| epoch 2 | 903/ 8400 batches | test loss 0.4635890 +| epoch 2 | 907/ 8400 batches | test loss 0.5362753 +| epoch 2 | 911/ 8400 batches | test loss 0.4245804 +| epoch 2 | 915/ 8400 batches | test loss 0.3027481 +| epoch 2 | 919/ 8400 batches | test loss 0.4608878 +| epoch 2 | 923/ 8400 batches | test loss 0.7440290 +| epoch 2 | 927/ 8400 batches | test loss 0.5023031 +| epoch 2 | 931/ 8400 batches | test loss 0.5411845 +| epoch 2 | 935/ 8400 batches | test loss 0.4781693 +| epoch 2 | 939/ 8400 batches | test loss 0.4952441 +| epoch 2 | 943/ 8400 batches | test loss 0.4623036 +| epoch 2 | 947/ 8400 batches | test loss 0.2399192 +| epoch 2 | 951/ 8400 batches | test loss 0.4795153 +| epoch 2 | 955/ 8400 batches | test loss 0.3686490 +| epoch 2 | 959/ 8400 batches | test loss 0.4318544 +| epoch 2 | 963/ 8400 batches | test loss 0.5120963 +| epoch 2 | 967/ 8400 batches | test loss 0.3611973 +| epoch 2 | 971/ 8400 batches | test loss 0.4789494 +| epoch 2 | 975/ 8400 batches | test loss 0.4654132 +| epoch 2 | 979/ 8400 batches | test loss 0.4472801 +| epoch 2 | 983/ 8400 batches | test loss 0.4868614 +| epoch 2 | 987/ 8400 batches | test loss 0.4000688 +| epoch 2 | 991/ 8400 batches | test loss 0.4027922 +| epoch 2 | 995/ 8400 batches | test loss 0.3922831 +| epoch 2 | 999/ 8400 batches | test loss 0.4948677 +| epoch 2 | 1003/ 8400 batches | test loss 0.4577377 +| epoch 2 | 1007/ 8400 batches | test loss 0.4654907 +| epoch 2 | 1011/ 8400 batches | test loss 0.4398047 +| epoch 2 | 1015/ 8400 batches | test loss 0.4417304 +| epoch 2 | 1019/ 8400 batches | test loss 0.4541813 +| epoch 2 | 1023/ 8400 batches | test loss 0.4767755 +| epoch 2 | 1027/ 8400 batches | test loss 0.4226521 +| epoch 2 | 1031/ 8400 batches | test loss 0.2728993 +| epoch 2 | 1035/ 8400 batches | test loss 0.4228085 +| epoch 2 | 1039/ 8400 batches | test loss 0.3683025 +| epoch 2 | 1043/ 8400 batches | test loss 0.4621373 +| epoch 2 | 1047/ 8400 batches | test loss 0.3979003 +| epoch 2 | 1051/ 8400 batches | test loss 0.4160374 +| epoch 2 | 1055/ 8400 batches | test loss 0.4277775 +| epoch 2 | 1059/ 8400 batches | test loss 0.4781464 +| epoch 2 | 1063/ 8400 batches | test loss 0.2677032 +| epoch 2 | 1067/ 8400 batches | test loss 0.5634766 +| epoch 2 | 1071/ 8400 batches | test loss 0.4127424 +| epoch 2 | 1075/ 8400 batches | test loss 0.4155689 +| epoch 2 | 1079/ 8400 batches | test loss 0.4742403 +| epoch 2 | 1083/ 8400 batches | test loss 0.4677496 +| epoch 2 | 1087/ 8400 batches | test loss 0.4379556 +| epoch 2 | 1091/ 8400 batches | test loss 0.5121195 +| epoch 2 | 1095/ 8400 batches | test loss 0.4071822 +| epoch 2 | 1099/ 8400 batches | test loss 0.4590290 +| epoch 2 | 1103/ 8400 batches | test loss 0.3815541 +| epoch 2 | 1107/ 8400 batches | test loss 0.4580435 +| epoch 2 | 1111/ 8400 batches | test loss 0.4769096 +| epoch 2 | 1115/ 8400 batches | test loss 0.4961613 +| epoch 2 | 1119/ 8400 batches | test loss 0.4282195 +| epoch 2 | 1123/ 8400 batches | test loss 0.3849045 +| epoch 2 | 1127/ 8400 batches | test loss 0.4639594 +| epoch 2 | 1131/ 8400 batches | test loss 0.4519952 +| epoch 2 | 1135/ 8400 batches | test loss 0.5345026 +| epoch 2 | 1139/ 8400 batches | test loss 0.4869708 +| epoch 2 | 1143/ 8400 batches | test loss 0.4821147 +| epoch 2 | 1147/ 8400 batches | test loss 0.5193578 +| epoch 2 | 1151/ 8400 batches | test loss 0.4371494 +| epoch 2 | 1155/ 8400 batches | test loss 0.4971933 +| epoch 2 | 1159/ 8400 batches | test loss 0.4861835 +| epoch 2 | 1163/ 8400 batches | test loss 0.3903256 +| epoch 2 | 1167/ 8400 batches | test loss 0.4128802 +| epoch 2 | 1171/ 8400 batches | test loss 0.4605797 +| epoch 2 | 1175/ 8400 batches | test loss 0.4441528 +| epoch 2 | 1179/ 8400 batches | test loss 0.4604555 +| epoch 2 | 1183/ 8400 batches | test loss 0.4726912 +| epoch 2 | 1187/ 8400 batches | test loss 0.4287097 +| epoch 2 | 1191/ 8400 batches | test loss 0.4103812 +| epoch 2 | 1195/ 8400 batches | test loss 0.5248753 +| epoch 2 | 1199/ 8400 batches | test loss 0.4372061 +| epoch 2 | 1203/ 8400 batches | test loss 0.4486815 +| epoch 2 | 1207/ 8400 batches | test loss 0.3579667 +| epoch 2 | 1211/ 8400 batches | test loss 0.4892544 +| epoch 2 | 1215/ 8400 batches | test loss 0.4979021 +| epoch 2 | 1219/ 8400 batches | test loss 0.5061369 +| epoch 2 | 1223/ 8400 batches | test loss 0.5271738 +| epoch 2 | 1227/ 8400 batches | test loss 0.4497885 +| epoch 2 | 1231/ 8400 batches | test loss 0.3407068 +| epoch 2 | 1235/ 8400 batches | test loss 0.3676932 +| epoch 2 | 1239/ 8400 batches | test loss 0.4434826 +| epoch 2 | 1243/ 8400 batches | test loss 0.5225958 +| epoch 2 | 1247/ 8400 batches | test loss 0.4181336 +| epoch 2 | 1251/ 8400 batches | test loss 0.4513156 +| epoch 2 | 1255/ 8400 batches | test loss 0.4964257 +| epoch 2 | 1259/ 8400 batches | test loss 0.4050499 +| epoch 2 | 1263/ 8400 batches | test loss 0.4111292 +| epoch 2 | 1267/ 8400 batches | test loss 0.3888874 +| epoch 2 | 1271/ 8400 batches | test loss 0.5589917 +| epoch 2 | 1275/ 8400 batches | test loss 0.4505556 +| epoch 2 | 1279/ 8400 batches | test loss 0.3364292 +| epoch 2 | 1283/ 8400 batches | test loss 0.4602584 +| epoch 2 | 1287/ 8400 batches | test loss 0.4678950 +| epoch 2 | 1291/ 8400 batches | test loss 0.5189459 +| epoch 2 | 1295/ 8400 batches | test loss 0.5045341 +| epoch 2 | 1299/ 8400 batches | test loss 0.3761545 +| epoch 2 | 1303/ 8400 batches | test loss 0.4017737 +| epoch 2 | 1307/ 8400 batches | test loss 0.3880972 +| epoch 2 | 1311/ 8400 batches | test loss 0.4195657 +| epoch 2 | 1315/ 8400 batches | test loss 0.4442533 +| epoch 2 | 1319/ 8400 batches | test loss 0.4050108 +| epoch 2 | 1323/ 8400 batches | test loss 0.4552408 +| epoch 2 | 1327/ 8400 batches | test loss 0.4923564 +| epoch 2 | 1331/ 8400 batches | test loss 0.4592271 +| epoch 2 | 1335/ 8400 batches | test loss 0.3973995 +| epoch 2 | 1339/ 8400 batches | test loss 0.4536633 +| epoch 2 | 1343/ 8400 batches | test loss 0.4095670 +| epoch 2 | 1347/ 8400 batches | test loss 0.3585107 +| epoch 2 | 1351/ 8400 batches | test loss 0.4053585 +| epoch 2 | 1355/ 8400 batches | test loss 0.4670658 +| epoch 2 | 1359/ 8400 batches | test loss 0.4652103 +| epoch 2 | 1363/ 8400 batches | test loss 0.3997742 +| epoch 2 | 1367/ 8400 batches | test loss 0.4819061 +| epoch 2 | 1371/ 8400 batches | test loss 0.3756433 +| epoch 2 | 1375/ 8400 batches | test loss 0.4461043 +| epoch 2 | 1379/ 8400 batches | test loss 0.4428957 +| epoch 2 | 1383/ 8400 batches | test loss 0.4455402 +| epoch 2 | 1387/ 8400 batches | test loss 0.4311838 +| epoch 2 | 1391/ 8400 batches | test loss 0.4642040 +| epoch 2 | 1395/ 8400 batches | test loss 0.4434982 +| epoch 2 | 1399/ 8400 batches | test loss 0.3941928 +| epoch 2 | 1403/ 8400 batches | test loss 0.3880153 +| epoch 2 | 1407/ 8400 batches | test loss 0.4588466 +| epoch 2 | 1411/ 8400 batches | test loss 0.4180751 +| epoch 2 | 1415/ 8400 batches | test loss 0.4263860 +| epoch 2 | 1419/ 8400 batches | test loss 0.5034552 +| epoch 2 | 1423/ 8400 batches | test loss 0.5122498 +| epoch 2 | 1427/ 8400 batches | test loss 0.3499082 +| epoch 2 | 1431/ 8400 batches | test loss 0.4457626 +| epoch 2 | 1435/ 8400 batches | test loss 0.3982719 +| epoch 2 | 1439/ 8400 batches | test loss 0.4809520 +| epoch 2 | 1443/ 8400 batches | test loss 0.4176001 +| epoch 2 | 1447/ 8400 batches | test loss 0.3900818 +| epoch 2 | 1451/ 8400 batches | test loss 0.4725379 +| epoch 2 | 1455/ 8400 batches | test loss 0.4918854 +| epoch 2 | 1459/ 8400 batches | test loss 0.4489173 +| epoch 2 | 1463/ 8400 batches | test loss 0.4578925 +| epoch 2 | 1467/ 8400 batches | test loss 0.4067623 +| epoch 2 | 1471/ 8400 batches | test loss 0.3867198 +| epoch 2 | 1475/ 8400 batches | test loss 0.4578992 +| epoch 2 | 1479/ 8400 batches | test loss 0.3419496 +| epoch 2 | 1483/ 8400 batches | test loss 0.4580484 +| epoch 2 | 1487/ 8400 batches | test loss 0.5366290 +| epoch 2 | 1491/ 8400 batches | test loss 0.5130509 +| epoch 2 | 1495/ 8400 batches | test loss 0.5051578 +| epoch 2 | 1499/ 8400 batches | test loss 0.5286055 +| epoch 2 | 1503/ 8400 batches | test loss 0.4558894 +| epoch 2 | 1507/ 8400 batches | test loss 0.3852592 +| epoch 2 | 1511/ 8400 batches | test loss 0.5159892 +| epoch 2 | 1515/ 8400 batches | test loss 0.4635119 +| epoch 2 | 1519/ 8400 batches | test loss 0.4008371 +| epoch 2 | 1523/ 8400 batches | test loss 0.3785127 +| epoch 2 | 1527/ 8400 batches | test loss 0.4212450 +| epoch 2 | 1531/ 8400 batches | test loss 0.2943670 +| epoch 2 | 1535/ 8400 batches | test loss 0.5155550 +| epoch 2 | 1539/ 8400 batches | test loss 0.5000461 +| epoch 2 | 1543/ 8400 batches | test loss 0.3663696 +| epoch 2 | 1547/ 8400 batches | test loss 0.4186338 +| epoch 2 | 1551/ 8400 batches | test loss 0.4836901 +| epoch 2 | 1555/ 8400 batches | test loss 0.3596983 +| epoch 2 | 1559/ 8400 batches | test loss 0.3867439 +| epoch 2 | 1563/ 8400 batches | test loss 0.3901856 +| epoch 2 | 1567/ 8400 batches | test loss 0.4757872 +| epoch 2 | 1571/ 8400 batches | test loss 0.4401837 +| epoch 2 | 1575/ 8400 batches | test loss 0.4429006 +| epoch 2 | 1579/ 8400 batches | test loss 0.3622076 +| epoch 2 | 1583/ 8400 batches | test loss 0.4062871 +| epoch 2 | 1587/ 8400 batches | test loss 0.4470285 +| epoch 2 | 1591/ 8400 batches | test loss 0.4125087 +| epoch 2 | 1595/ 8400 batches | test loss 0.4928948 +| epoch 2 | 1599/ 8400 batches | test loss 0.4645247 +| epoch 2 | 1603/ 8400 batches | test loss 0.4816849 +| epoch 2 | 1607/ 8400 batches | test loss 0.4670760 +| epoch 2 | 1611/ 8400 batches | test loss 0.5282452 +| epoch 2 | 1615/ 8400 batches | test loss 0.4330555 +| epoch 2 | 1619/ 8400 batches | test loss 0.3656017 +| epoch 2 | 1623/ 8400 batches | test loss 0.3622838 +| epoch 2 | 1627/ 8400 batches | test loss 0.4545348 +| epoch 2 | 1631/ 8400 batches | test loss 0.3921432 +| epoch 2 | 1635/ 8400 batches | test loss 0.3738436 +| epoch 2 | 1639/ 8400 batches | test loss 0.5049952 +| epoch 2 | 1643/ 8400 batches | test loss 0.4867942 +| epoch 2 | 1647/ 8400 batches | test loss 0.4505732 +| epoch 2 | 1651/ 8400 batches | test loss 0.3829783 +| epoch 2 | 1655/ 8400 batches | test loss 0.4788327 +| epoch 2 | 1659/ 8400 batches | test loss 0.4648305 +| epoch 2 | 1663/ 8400 batches | test loss 0.3610248 +| epoch 2 | 1667/ 8400 batches | test loss 0.3875177 +| epoch 2 | 1671/ 8400 batches | test loss 0.4386576 +| epoch 2 | 1675/ 8400 batches | test loss 0.4332153 +| epoch 2 | 1679/ 8400 batches | test loss 0.4188477 +| epoch 2 | 1683/ 8400 batches | test loss 0.4174381 +| epoch 2 | 1687/ 8400 batches | test loss 0.4638430 +| epoch 2 | 1691/ 8400 batches | test loss 0.4801592 +| epoch 2 | 1695/ 8400 batches | test loss 0.4663343 +| epoch 2 | 1699/ 8400 batches | test loss 0.3286528 +| epoch 2 | 1703/ 8400 batches | test loss 0.5357174 +| epoch 2 | 1707/ 8400 batches | test loss 0.4558335 +| epoch 2 | 1711/ 8400 batches | test loss 0.3683590 +| epoch 2 | 1715/ 8400 batches | test loss 0.4773854 +| epoch 2 | 1719/ 8400 batches | test loss 0.4538065 +| epoch 2 | 1723/ 8400 batches | test loss 0.4352373 +| epoch 2 | 1727/ 8400 batches | test loss 0.3452027 +| epoch 2 | 1731/ 8400 batches | test loss 0.4556599 +| epoch 2 | 1735/ 8400 batches | test loss 0.3856039 +| epoch 2 | 1739/ 8400 batches | test loss 0.4292809 +| epoch 2 | 1743/ 8400 batches | test loss 0.5533949 +| epoch 2 | 1747/ 8400 batches | test loss 0.5176657 +| epoch 2 | 1751/ 8400 batches | test loss 0.5141821 +| epoch 2 | 1755/ 8400 batches | test loss 0.3844661 +| epoch 2 | 1759/ 8400 batches | test loss 0.3846410 +| epoch 2 | 1763/ 8400 batches | test loss 0.3970058 +| epoch 2 | 1767/ 8400 batches | test loss 0.4123623 +| epoch 2 | 1771/ 8400 batches | test loss 0.3911818 +| epoch 2 | 1775/ 8400 batches | test loss 0.5757114 +| epoch 2 | 1779/ 8400 batches | test loss 0.3952602 +| epoch 2 | 1783/ 8400 batches | test loss 0.5290379 +| epoch 2 | 1787/ 8400 batches | test loss 0.3721493 +| epoch 2 | 1791/ 8400 batches | test loss 0.4154287 +| epoch 2 | 1795/ 8400 batches | test loss 0.4514841 +| epoch 2 | 1799/ 8400 batches | test loss 0.4230259 +| epoch 2 | 1803/ 8400 batches | test loss 0.4370694 +| epoch 2 | 1807/ 8400 batches | test loss 0.4395003 +| epoch 2 | 1811/ 8400 batches | test loss 0.3668947 +| epoch 2 | 1815/ 8400 batches | test loss 0.4404554 +| epoch 2 | 1819/ 8400 batches | test loss 0.3049334 +| epoch 2 | 1823/ 8400 batches | test loss 0.3658231 +| epoch 2 | 1827/ 8400 batches | test loss 0.4846736 +| epoch 2 | 1831/ 8400 batches | test loss 0.3960795 +| epoch 2 | 1835/ 8400 batches | test loss 0.3733712 +| epoch 2 | 1839/ 8400 batches | test loss 0.4427110 +| epoch 2 | 1843/ 8400 batches | test loss 0.3563345 +| epoch 2 | 1847/ 8400 batches | test loss 0.3925528 +| epoch 2 | 1851/ 8400 batches | test loss 0.4581801 +| epoch 2 | 1855/ 8400 batches | test loss 0.5214881 +| epoch 2 | 1859/ 8400 batches | test loss 0.5145448 +| epoch 2 | 1863/ 8400 batches | test loss 0.3987499 +| epoch 2 | 1867/ 8400 batches | test loss 0.4012925 +| epoch 2 | 1871/ 8400 batches | test loss 0.4744430 +| epoch 2 | 1875/ 8400 batches | test loss 0.4071363 +| epoch 2 | 1879/ 8400 batches | test loss 0.4793403 +| epoch 2 | 1883/ 8400 batches | test loss 0.4699616 +| epoch 2 | 1887/ 8400 batches | test loss 0.5161666 +| epoch 2 | 1891/ 8400 batches | test loss 0.3716644 +| epoch 2 | 1895/ 8400 batches | test loss 0.4516080 +| epoch 2 | 1899/ 8400 batches | test loss 0.4897864 +| epoch 2 | 1903/ 8400 batches | test loss 0.3477816 +| epoch 2 | 1907/ 8400 batches | test loss 0.4853153 +| epoch 2 | 1911/ 8400 batches | test loss 0.4650584 +| epoch 2 | 1915/ 8400 batches | test loss 0.3969123 +| epoch 2 | 1919/ 8400 batches | test loss 0.4747820 +| epoch 2 | 1923/ 8400 batches | test loss 0.4119482 +| epoch 2 | 1927/ 8400 batches | test loss 0.4590876 +| epoch 2 | 1931/ 8400 batches | test loss 0.4577979 +| epoch 2 | 1935/ 8400 batches | test loss 0.4551982 +| epoch 2 | 1939/ 8400 batches | test loss 0.4480088 +| epoch 2 | 1943/ 8400 batches | test loss 0.4028287 +| epoch 2 | 1947/ 8400 batches | test loss 0.4671080 +| epoch 2 | 1951/ 8400 batches | test loss 0.4849714 +| epoch 2 | 1955/ 8400 batches | test loss 0.4315463 +| epoch 2 | 1959/ 8400 batches | test loss 0.3709581 +| epoch 2 | 1963/ 8400 batches | test loss 0.3769809 +| epoch 2 | 1967/ 8400 batches | test loss 0.4706839 +| epoch 2 | 1971/ 8400 batches | test loss 0.4328176 +| epoch 2 | 1975/ 8400 batches | test loss 0.4454737 +| epoch 2 | 1979/ 8400 batches | test loss 0.4229930 +| epoch 2 | 1983/ 8400 batches | test loss 0.4433021 +| epoch 2 | 1987/ 8400 batches | test loss 0.4283005 +| epoch 2 | 1991/ 8400 batches | test loss 0.3827750 +| epoch 2 | 1995/ 8400 batches | test loss 0.6237701 +| epoch 2 | 1999/ 8400 batches | test loss 0.4465379 +| epoch 2 | 2003/ 8400 batches | test loss 0.4380680 +| epoch 2 | 2007/ 8400 batches | test loss 0.3475432 +| epoch 2 | 2011/ 8400 batches | test loss 0.4629641 +| epoch 2 | 2015/ 8400 batches | test loss 0.5183942 +| epoch 2 | 2019/ 8400 batches | test loss 0.3654220 +| epoch 2 | 2023/ 8400 batches | test loss 0.4154999 +| epoch 2 | 2027/ 8400 batches | test loss 0.4811735 +| epoch 2 | 2031/ 8400 batches | test loss 0.4059358 +| epoch 2 | 2035/ 8400 batches | test loss 0.4856188 +| epoch 2 | 2039/ 8400 batches | test loss 0.4346380 +| epoch 2 | 2043/ 8400 batches | test loss 0.4008653 +| epoch 2 | 2047/ 8400 batches | test loss 0.4204349 +| epoch 2 | 2051/ 8400 batches | test loss 0.4212115 +| epoch 2 | 2055/ 8400 batches | test loss 0.4618091 +| epoch 2 | 2059/ 8400 batches | test loss 0.4839236 +| epoch 2 | 2063/ 8400 batches | test loss 0.4121138 +| epoch 2 | 2067/ 8400 batches | test loss 0.3678512 +| epoch 2 | 2071/ 8400 batches | test loss 0.4894736 +| epoch 2 | 2075/ 8400 batches | test loss 0.3511355 +| epoch 2 | 2079/ 8400 batches | test loss 0.5331839 +| epoch 2 | 2083/ 8400 batches | test loss 0.3399584 +| epoch 2 | 2087/ 8400 batches | test loss 0.5233765 +| epoch 2 | 2091/ 8400 batches | test loss 0.3795213 +| epoch 2 | 2095/ 8400 batches | test loss 0.3909431 +| epoch 2 | 2099/ 8400 batches | test loss 0.4431379 +| epoch 2 | final test loss 0.4390, save model! +-------------------------------------------------------------------------------- +| epoch 3 | 3/ 8400 batches | train loss 0.5266088 +| epoch 3 | 7/ 8400 batches | train loss 0.5141653 +| epoch 3 | 11/ 8400 batches | train loss 0.3808904 +| epoch 3 | 15/ 8400 batches | train loss 0.4754821 +| epoch 3 | 19/ 8400 batches | train loss 0.4509925 +| epoch 3 | 23/ 8400 batches | train loss 0.3565230 +| epoch 3 | 27/ 8400 batches | train loss 0.3975750 +| epoch 3 | 31/ 8400 batches | train loss 0.3270814 +| epoch 3 | 35/ 8400 batches | train loss 0.3405224 +| epoch 3 | 39/ 8400 batches | train loss 0.4893207 +| epoch 3 | 43/ 8400 batches | train loss 0.4395478 +| epoch 3 | 47/ 8400 batches | train loss 0.4020339 +| epoch 3 | 51/ 8400 batches | train loss 0.4343916 +| epoch 3 | 55/ 8400 batches | train loss 0.4575898 +| epoch 3 | 59/ 8400 batches | train loss 0.2371777 +| epoch 3 | 63/ 8400 batches | train loss 0.4664087 +| epoch 3 | 67/ 8400 batches | train loss 0.4834961 +| epoch 3 | 71/ 8400 batches | train loss 0.3657712 +| epoch 3 | 75/ 8400 batches | train loss 0.5235486 +| epoch 3 | 79/ 8400 batches | train loss 0.4037463 +| epoch 3 | 83/ 8400 batches | train loss 0.4573112 +| epoch 3 | 87/ 8400 batches | train loss 0.3467742 +| epoch 3 | 91/ 8400 batches | train loss 0.4237788 +| epoch 3 | 95/ 8400 batches | train loss 0.4439739 +| epoch 3 | 99/ 8400 batches | train loss 0.3909307 +| epoch 3 | 103/ 8400 batches | train loss 0.3549192 +| epoch 3 | 107/ 8400 batches | train loss 0.4332950 +| epoch 3 | 111/ 8400 batches | train loss 0.4290445 +| epoch 3 | 115/ 8400 batches | train loss 0.4596804 +| epoch 3 | 119/ 8400 batches | train loss 0.4105406 +| epoch 3 | 123/ 8400 batches | train loss 0.4773267 +| epoch 3 | 127/ 8400 batches | train loss 0.4032593 +| epoch 3 | 131/ 8400 batches | train loss 0.3755908 +| epoch 3 | 135/ 8400 batches | train loss 0.4597463 +| epoch 3 | 139/ 8400 batches | train loss 0.4562933 +| epoch 3 | 143/ 8400 batches | train loss 0.4748263 +| epoch 3 | 147/ 8400 batches | train loss 0.4861655 +| epoch 3 | 151/ 8400 batches | train loss 0.4706086 +| epoch 3 | 155/ 8400 batches | train loss 0.4753865 +| epoch 3 | 159/ 8400 batches | train loss 0.4598814 +| epoch 3 | 163/ 8400 batches | train loss 0.4644274 +| epoch 3 | 167/ 8400 batches | train loss 0.5398164 +| epoch 3 | 171/ 8400 batches | train loss 0.6150272 +| epoch 3 | 175/ 8400 batches | train loss 0.3580773 +| epoch 3 | 179/ 8400 batches | train loss 0.3919752 +| epoch 3 | 183/ 8400 batches | train loss 0.4506699 +| epoch 3 | 187/ 8400 batches | train loss 0.4335768 +| epoch 3 | 191/ 8400 batches | train loss 0.3348045 +| epoch 3 | 195/ 8400 batches | train loss 0.5288998 +| epoch 3 | 199/ 8400 batches | train loss 0.4118608 +| epoch 3 | 203/ 8400 batches | train loss 0.4500732 +| epoch 3 | 207/ 8400 batches | train loss 0.5190308 +| epoch 3 | 211/ 8400 batches | train loss 0.3471100 +| epoch 3 | 215/ 8400 batches | train loss 0.4562981 +| epoch 3 | 219/ 8400 batches | train loss 0.4019199 +| epoch 3 | 223/ 8400 batches | train loss 0.3655698 +| epoch 3 | 227/ 8400 batches | train loss 0.3977305 +| epoch 3 | 231/ 8400 batches | train loss 0.4259061 +| epoch 3 | 235/ 8400 batches | train loss 0.3764123 +| epoch 3 | 239/ 8400 batches | train loss 0.4556634 +| epoch 3 | 243/ 8400 batches | train loss 0.4925920 +| epoch 3 | 247/ 8400 batches | train loss 0.4309837 +| epoch 3 | 251/ 8400 batches | train loss 0.4878662 +| epoch 3 | 255/ 8400 batches | train loss 0.3661897 +| epoch 3 | 259/ 8400 batches | train loss 0.4794484 +| epoch 3 | 263/ 8400 batches | train loss 0.5090583 +| epoch 3 | 267/ 8400 batches | train loss 0.4381827 +| epoch 3 | 271/ 8400 batches | train loss 0.4675661 +| epoch 3 | 275/ 8400 batches | train loss 0.3949683 +| epoch 3 | 279/ 8400 batches | train loss 0.3975734 +| epoch 3 | 283/ 8400 batches | train loss 0.3279961 +| epoch 3 | 287/ 8400 batches | train loss 0.3532674 +| epoch 3 | 291/ 8400 batches | train loss 0.3794149 +| epoch 3 | 295/ 8400 batches | train loss 0.3848442 +| epoch 3 | 299/ 8400 batches | train loss 0.3441837 +| epoch 3 | 303/ 8400 batches | train loss 0.4745951 +| epoch 3 | 307/ 8400 batches | train loss 0.4608744 +| epoch 3 | 311/ 8400 batches | train loss 0.4490008 +| epoch 3 | 315/ 8400 batches | train loss 0.4321205 +| epoch 3 | 319/ 8400 batches | train loss 0.4370024 +| epoch 3 | 323/ 8400 batches | train loss 0.4136338 +| epoch 3 | 327/ 8400 batches | train loss 0.3980531 +| epoch 3 | 331/ 8400 batches | train loss 0.4414089 +| epoch 3 | 335/ 8400 batches | train loss 0.4256063 +| epoch 3 | 339/ 8400 batches | train loss 0.3987657 +| epoch 3 | 343/ 8400 batches | train loss 0.4397002 +| epoch 3 | 347/ 8400 batches | train loss 0.4387649 +| epoch 3 | 351/ 8400 batches | train loss 0.4487634 +| epoch 3 | 355/ 8400 batches | train loss 0.4322521 +| epoch 3 | 359/ 8400 batches | train loss 0.4281003 +| epoch 3 | 363/ 8400 batches | train loss 0.3268384 +| epoch 3 | 367/ 8400 batches | train loss 0.4347104 +| epoch 3 | 371/ 8400 batches | train loss 0.4211422 +| epoch 3 | 375/ 8400 batches | train loss 0.4129105 +| epoch 3 | 379/ 8400 batches | train loss 0.4593430 +| epoch 3 | 383/ 8400 batches | train loss 0.4033826 +| epoch 3 | 387/ 8400 batches | train loss 0.3658641 +| epoch 3 | 391/ 8400 batches | train loss 0.4570914 +| epoch 3 | 395/ 8400 batches | train loss 0.4003596 +| epoch 3 | 399/ 8400 batches | train loss 0.3881803 +| epoch 3 | 403/ 8400 batches | train loss 0.5228763 +| epoch 3 | 407/ 8400 batches | train loss 0.4406921 +| epoch 3 | 411/ 8400 batches | train loss 0.4449171 +| epoch 3 | 415/ 8400 batches | train loss 0.4817840 +| epoch 3 | 419/ 8400 batches | train loss 0.4299684 +| epoch 3 | 423/ 8400 batches | train loss 0.5306319 +| epoch 3 | 427/ 8400 batches | train loss 0.4327064 +| epoch 3 | 431/ 8400 batches | train loss 0.4288846 +| epoch 3 | 435/ 8400 batches | train loss 0.3348979 +| epoch 3 | 439/ 8400 batches | train loss 0.4070536 +| epoch 3 | 443/ 8400 batches | train loss 0.4743996 +| epoch 3 | 447/ 8400 batches | train loss 0.3562035 +| epoch 3 | 451/ 8400 batches | train loss 0.3343857 +| epoch 3 | 455/ 8400 batches | train loss 0.4643523 +| epoch 3 | 459/ 8400 batches | train loss 0.4868014 +| epoch 3 | 463/ 8400 batches | train loss 0.4248430 +| epoch 3 | 467/ 8400 batches | train loss 0.4651257 +| epoch 3 | 471/ 8400 batches | train loss 0.4758041 +| epoch 3 | 475/ 8400 batches | train loss 0.3904888 +| epoch 3 | 479/ 8400 batches | train loss 0.4072209 +| epoch 3 | 483/ 8400 batches | train loss 0.3410019 +| epoch 3 | 487/ 8400 batches | train loss 0.4299957 +| epoch 3 | 491/ 8400 batches | train loss 0.4742444 +| epoch 3 | 495/ 8400 batches | train loss 0.4449332 +| epoch 3 | 499/ 8400 batches | train loss 0.4235253 +| epoch 3 | 503/ 8400 batches | train loss 0.3675724 +| epoch 3 | 507/ 8400 batches | train loss 0.4444369 +| epoch 3 | 511/ 8400 batches | train loss 0.4439870 +| epoch 3 | 515/ 8400 batches | train loss 0.4606795 +| epoch 3 | 519/ 8400 batches | train loss 0.4479080 +| epoch 3 | 523/ 8400 batches | train loss 0.4957029 +| epoch 3 | 527/ 8400 batches | train loss 0.4800192 +| epoch 3 | 531/ 8400 batches | train loss 0.3795331 +| epoch 3 | 535/ 8400 batches | train loss 0.3887766 +| epoch 3 | 539/ 8400 batches | train loss 0.4421644 +| epoch 3 | 543/ 8400 batches | train loss 0.4017465 +| epoch 3 | 547/ 8400 batches | train loss 0.4734821 +| epoch 3 | 551/ 8400 batches | train loss 0.3738557 +| epoch 3 | 555/ 8400 batches | train loss 0.3257600 +| epoch 3 | 559/ 8400 batches | train loss 0.4973025 +| epoch 3 | 563/ 8400 batches | train loss 0.3804446 +| epoch 3 | 567/ 8400 batches | train loss 0.4397126 +| epoch 3 | 571/ 8400 batches | train loss 0.4447466 +| epoch 3 | 575/ 8400 batches | train loss 0.5239960 +| epoch 3 | 579/ 8400 batches | train loss 0.4252162 +| epoch 3 | 583/ 8400 batches | train loss 0.3591631 +| epoch 3 | 587/ 8400 batches | train loss 0.4109759 +| epoch 3 | 591/ 8400 batches | train loss 0.3643194 +| epoch 3 | 595/ 8400 batches | train loss 0.3712477 +| epoch 3 | 599/ 8400 batches | train loss 0.3439764 +| epoch 3 | 603/ 8400 batches | train loss 0.3865064 +| epoch 3 | 607/ 8400 batches | train loss 0.3988606 +| epoch 3 | 611/ 8400 batches | train loss 0.4862214 +| epoch 3 | 615/ 8400 batches | train loss 0.4275459 +| epoch 3 | 619/ 8400 batches | train loss 0.4518201 +| epoch 3 | 623/ 8400 batches | train loss 0.5742526 +| epoch 3 | 627/ 8400 batches | train loss 0.3571631 +| epoch 3 | 631/ 8400 batches | train loss 0.5191307 +| epoch 3 | 635/ 8400 batches | train loss 0.3592481 +| epoch 3 | 639/ 8400 batches | train loss 0.4024444 +| epoch 3 | 643/ 8400 batches | train loss 0.3726444 +| epoch 3 | 647/ 8400 batches | train loss 0.4109451 +| epoch 3 | 651/ 8400 batches | train loss 0.3790370 +| epoch 3 | 655/ 8400 batches | train loss 0.4437168 +| epoch 3 | 659/ 8400 batches | train loss 0.5206413 +| epoch 3 | 663/ 8400 batches | train loss 0.4282896 +| epoch 3 | 667/ 8400 batches | train loss 0.5106225 +| epoch 3 | 671/ 8400 batches | train loss 0.3716421 +| epoch 3 | 675/ 8400 batches | train loss 0.3763329 +| epoch 3 | 679/ 8400 batches | train loss 0.5094829 +| epoch 3 | 683/ 8400 batches | train loss 0.5267140 +| epoch 3 | 687/ 8400 batches | train loss 0.4974388 +| epoch 3 | 691/ 8400 batches | train loss 0.3533645 +| epoch 3 | 695/ 8400 batches | train loss 0.4947777 +| epoch 3 | 699/ 8400 batches | train loss 0.5061567 +| epoch 3 | 703/ 8400 batches | train loss 0.4374468 +| epoch 3 | 707/ 8400 batches | train loss 0.4762437 +| epoch 3 | 711/ 8400 batches | train loss 0.4063326 +| epoch 3 | 715/ 8400 batches | train loss 0.3530846 +| epoch 3 | 719/ 8400 batches | train loss 0.4832169 +| epoch 3 | 723/ 8400 batches | train loss 0.4621591 +| epoch 3 | 727/ 8400 batches | train loss 0.4572730 +| epoch 3 | 731/ 8400 batches | train loss 0.4043415 +| epoch 3 | 735/ 8400 batches | train loss 0.4949858 +| epoch 3 | 739/ 8400 batches | train loss 0.3889059 +| epoch 3 | 743/ 8400 batches | train loss 0.3739050 +| epoch 3 | 747/ 8400 batches | train loss 0.4457471 +| epoch 3 | 751/ 8400 batches | train loss 0.3824790 +| epoch 3 | 755/ 8400 batches | train loss 0.3677245 +| epoch 3 | 759/ 8400 batches | train loss 0.4859343 +| epoch 3 | 763/ 8400 batches | train loss 0.3848416 +| epoch 3 | 767/ 8400 batches | train loss 0.4128228 +| epoch 3 | 771/ 8400 batches | train loss 0.4917466 +| epoch 3 | 775/ 8400 batches | train loss 0.4472509 +| epoch 3 | 779/ 8400 batches | train loss 0.4770509 +| epoch 3 | 783/ 8400 batches | train loss 0.4026613 +| epoch 3 | 787/ 8400 batches | train loss 0.3670279 +| epoch 3 | 791/ 8400 batches | train loss 0.4110168 +| epoch 3 | 795/ 8400 batches | train loss 0.3922935 +| epoch 3 | 799/ 8400 batches | train loss 0.4700782 +| epoch 3 | 803/ 8400 batches | train loss 0.4481570 +| epoch 3 | 807/ 8400 batches | train loss 0.3905619 +| epoch 3 | 811/ 8400 batches | train loss 0.5113567 +| epoch 3 | 815/ 8400 batches | train loss 0.4171913 +| epoch 3 | 819/ 8400 batches | train loss 0.4718825 +| epoch 3 | 823/ 8400 batches | train loss 0.4863798 +| epoch 3 | 827/ 8400 batches | train loss 0.4671879 +| epoch 3 | 831/ 8400 batches | train loss 0.5610505 +| epoch 3 | 835/ 8400 batches | train loss 0.4312384 +| epoch 3 | 839/ 8400 batches | train loss 0.4608877 +| epoch 3 | 843/ 8400 batches | train loss 0.4379047 +| epoch 3 | 847/ 8400 batches | train loss 0.3862952 +| epoch 3 | 851/ 8400 batches | train loss 0.3717958 +| epoch 3 | 855/ 8400 batches | train loss 0.4387842 +| epoch 3 | 859/ 8400 batches | train loss 0.4442978 +| epoch 3 | 863/ 8400 batches | train loss 0.4507936 +| epoch 3 | 867/ 8400 batches | train loss 0.3999727 +| epoch 3 | 871/ 8400 batches | train loss 0.4882951 +| epoch 3 | 875/ 8400 batches | train loss 0.5046577 +| epoch 3 | 879/ 8400 batches | train loss 0.3680181 +| epoch 3 | 883/ 8400 batches | train loss 0.4322930 +| epoch 3 | 887/ 8400 batches | train loss 0.3552269 +| epoch 3 | 891/ 8400 batches | train loss 0.4217541 +| epoch 3 | 895/ 8400 batches | train loss 0.4579093 +| epoch 3 | 899/ 8400 batches | train loss 0.4078638 +| epoch 3 | 903/ 8400 batches | train loss 0.4567189 +| epoch 3 | 907/ 8400 batches | train loss 0.4251177 +| epoch 3 | 911/ 8400 batches | train loss 0.4621971 +| epoch 3 | 915/ 8400 batches | train loss 0.4126184 +| epoch 3 | 919/ 8400 batches | train loss 0.4555515 +| epoch 3 | 923/ 8400 batches | train loss 0.4595240 +| epoch 3 | 927/ 8400 batches | train loss 0.5038929 +| epoch 3 | 931/ 8400 batches | train loss 0.4332921 +| epoch 3 | 935/ 8400 batches | train loss 0.3567894 +| epoch 3 | 939/ 8400 batches | train loss 0.4198496 +| epoch 3 | 943/ 8400 batches | train loss 0.4230591 +| epoch 3 | 947/ 8400 batches | train loss 0.4473351 +| epoch 3 | 951/ 8400 batches | train loss 0.3912458 +| epoch 3 | 955/ 8400 batches | train loss 0.3906778 +| epoch 3 | 959/ 8400 batches | train loss 0.3913414 +| epoch 3 | 963/ 8400 batches | train loss 0.4009998 +| epoch 3 | 967/ 8400 batches | train loss 0.3901873 +| epoch 3 | 971/ 8400 batches | train loss 0.4428384 +| epoch 3 | 975/ 8400 batches | train loss 0.4571300 +| epoch 3 | 979/ 8400 batches | train loss 0.5685481 +| epoch 3 | 983/ 8400 batches | train loss 0.3916225 +| epoch 3 | 987/ 8400 batches | train loss 0.4431968 +| epoch 3 | 991/ 8400 batches | train loss 0.4930515 +| epoch 3 | 995/ 8400 batches | train loss 0.3840739 +| epoch 3 | 999/ 8400 batches | train loss 0.3480630 +| epoch 3 | 1003/ 8400 batches | train loss 0.4118192 +| epoch 3 | 1007/ 8400 batches | train loss 0.2960898 +| epoch 3 | 1011/ 8400 batches | train loss 0.4145895 +| epoch 3 | 1015/ 8400 batches | train loss 0.3186538 +| epoch 3 | 1019/ 8400 batches | train loss 0.3836666 +| epoch 3 | 1023/ 8400 batches | train loss 0.3799645 +| epoch 3 | 1027/ 8400 batches | train loss 0.3710960 +| epoch 3 | 1031/ 8400 batches | train loss 0.5248891 +| epoch 3 | 1035/ 8400 batches | train loss 0.4151385 +| epoch 3 | 1039/ 8400 batches | train loss 0.4342157 +| epoch 3 | 1043/ 8400 batches | train loss 0.3896122 +| epoch 3 | 1047/ 8400 batches | train loss 0.3668627 +| epoch 3 | 1051/ 8400 batches | train loss 0.3408854 +| epoch 3 | 1055/ 8400 batches | train loss 0.5497681 +| epoch 3 | 1059/ 8400 batches | train loss 0.4067222 +| epoch 3 | 1063/ 8400 batches | train loss 0.4501638 +| epoch 3 | 1067/ 8400 batches | train loss 0.3996975 +| epoch 3 | 1071/ 8400 batches | train loss 0.5185586 +| epoch 3 | 1075/ 8400 batches | train loss 0.4853388 +| epoch 3 | 1079/ 8400 batches | train loss 0.3756125 +| epoch 3 | 1083/ 8400 batches | train loss 0.4156650 +| epoch 3 | 1087/ 8400 batches | train loss 0.4279482 +| epoch 3 | 1091/ 8400 batches | train loss 0.4620521 +| epoch 3 | 1095/ 8400 batches | train loss 0.4468697 +| epoch 3 | 1099/ 8400 batches | train loss 0.4491844 +| epoch 3 | 1103/ 8400 batches | train loss 0.4697672 +| epoch 3 | 1107/ 8400 batches | train loss 0.4528145 +| epoch 3 | 1111/ 8400 batches | train loss 0.3782363 +| epoch 3 | 1115/ 8400 batches | train loss 0.4053242 +| epoch 3 | 1119/ 8400 batches | train loss 0.4462736 +| epoch 3 | 1123/ 8400 batches | train loss 0.4144008 +| epoch 3 | 1127/ 8400 batches | train loss 0.4584015 +| epoch 3 | 1131/ 8400 batches | train loss 0.4571207 +| epoch 3 | 1135/ 8400 batches | train loss 0.4548543 +| epoch 3 | 1139/ 8400 batches | train loss 0.4945092 +| epoch 3 | 1143/ 8400 batches | train loss 0.3907819 +| epoch 3 | 1147/ 8400 batches | train loss 0.4293327 +| epoch 3 | 1151/ 8400 batches | train loss 0.4563267 +| epoch 3 | 1155/ 8400 batches | train loss 0.4486636 +| epoch 3 | 1159/ 8400 batches | train loss 0.3776086 +| epoch 3 | 1163/ 8400 batches | train loss 0.4648448 +| epoch 3 | 1167/ 8400 batches | train loss 0.3874705 +| epoch 3 | 1171/ 8400 batches | train loss 0.4673308 +| epoch 3 | 1175/ 8400 batches | train loss 0.3419921 +| epoch 3 | 1179/ 8400 batches | train loss 0.6514399 +| epoch 3 | 1183/ 8400 batches | train loss 0.4158537 +| epoch 3 | 1187/ 8400 batches | train loss 0.4574265 +| epoch 3 | 1191/ 8400 batches | train loss 0.3978761 +| epoch 3 | 1195/ 8400 batches | train loss 0.4987249 +| epoch 3 | 1199/ 8400 batches | train loss 0.3981693 +| epoch 3 | 1203/ 8400 batches | train loss 0.4492275 +| epoch 3 | 1207/ 8400 batches | train loss 0.4582723 +| epoch 3 | 1211/ 8400 batches | train loss 0.4249386 +| epoch 3 | 1215/ 8400 batches | train loss 0.4923331 +| epoch 3 | 1219/ 8400 batches | train loss 0.3953220 +| epoch 3 | 1223/ 8400 batches | train loss 0.4749864 +| epoch 3 | 1227/ 8400 batches | train loss 0.5160868 +| epoch 3 | 1231/ 8400 batches | train loss 0.4506809 +| epoch 3 | 1235/ 8400 batches | train loss 0.4389272 +| epoch 3 | 1239/ 8400 batches | train loss 0.3911368 +| epoch 3 | 1243/ 8400 batches | train loss 0.3667304 +| epoch 3 | 1247/ 8400 batches | train loss 0.4911478 +| epoch 3 | 1251/ 8400 batches | train loss 0.4523067 +| epoch 3 | 1255/ 8400 batches | train loss 0.3919758 +| epoch 3 | 1259/ 8400 batches | train loss 0.3821312 +| epoch 3 | 1263/ 8400 batches | train loss 0.4931073 +| epoch 3 | 1267/ 8400 batches | train loss 0.4857255 +| epoch 3 | 1271/ 8400 batches | train loss 0.3644981 +| epoch 3 | 1275/ 8400 batches | train loss 0.4697547 +| epoch 3 | 1279/ 8400 batches | train loss 0.4707624 +| epoch 3 | 1283/ 8400 batches | train loss 0.4489937 +| epoch 3 | 1287/ 8400 batches | train loss 0.4515426 +| epoch 3 | 1291/ 8400 batches | train loss 0.4482256 +| epoch 3 | 1295/ 8400 batches | train loss 0.3522373 +| epoch 3 | 1299/ 8400 batches | train loss 0.3993476 +| epoch 3 | 1303/ 8400 batches | train loss 0.4369199 +| epoch 3 | 1307/ 8400 batches | train loss 0.3779678 +| epoch 3 | 1311/ 8400 batches | train loss 0.4161374 +| epoch 3 | 1315/ 8400 batches | train loss 0.4552338 +| epoch 3 | 1319/ 8400 batches | train loss 0.3722150 +| epoch 3 | 1323/ 8400 batches | train loss 0.4310656 +| epoch 3 | 1327/ 8400 batches | train loss 0.4772426 +| epoch 3 | 1331/ 8400 batches | train loss 0.3923919 +| epoch 3 | 1335/ 8400 batches | train loss 0.4020606 +| epoch 3 | 1339/ 8400 batches | train loss 0.4748236 +| epoch 3 | 1343/ 8400 batches | train loss 0.3916169 +| epoch 3 | 1347/ 8400 batches | train loss 0.5070772 +| epoch 3 | 1351/ 8400 batches | train loss 0.4039076 +| epoch 3 | 1355/ 8400 batches | train loss 0.4138730 +| epoch 3 | 1359/ 8400 batches | train loss 0.3552102 +| epoch 3 | 1363/ 8400 batches | train loss 0.4481125 +| epoch 3 | 1367/ 8400 batches | train loss 0.3870820 +| epoch 3 | 1371/ 8400 batches | train loss 0.4906538 +| epoch 3 | 1375/ 8400 batches | train loss 0.4193415 +| epoch 3 | 1379/ 8400 batches | train loss 0.4329947 +| epoch 3 | 1383/ 8400 batches | train loss 0.4007332 +| epoch 3 | 1387/ 8400 batches | train loss 0.4547531 +| epoch 3 | 1391/ 8400 batches | train loss 0.4281619 +| epoch 3 | 1395/ 8400 batches | train loss 0.4664706 +| epoch 3 | 1399/ 8400 batches | train loss 0.4074656 +| epoch 3 | 1403/ 8400 batches | train loss 0.3433137 +| epoch 3 | 1407/ 8400 batches | train loss 0.5679003 +| epoch 3 | 1411/ 8400 batches | train loss 0.4404187 +| epoch 3 | 1415/ 8400 batches | train loss 0.4107728 +| epoch 3 | 1419/ 8400 batches | train loss 0.4563493 +| epoch 3 | 1423/ 8400 batches | train loss 0.5235581 +| epoch 3 | 1427/ 8400 batches | train loss 0.4096013 +| epoch 3 | 1431/ 8400 batches | train loss 0.4228953 +| epoch 3 | 1435/ 8400 batches | train loss 0.4268132 +| epoch 3 | 1439/ 8400 batches | train loss 0.3656965 +| epoch 3 | 1443/ 8400 batches | train loss 0.3770863 +| epoch 3 | 1447/ 8400 batches | train loss 0.2763744 +| epoch 3 | 1451/ 8400 batches | train loss 0.4416444 +| epoch 3 | 1455/ 8400 batches | train loss 0.5117407 +| epoch 3 | 1459/ 8400 batches | train loss 0.4473672 +| epoch 3 | 1463/ 8400 batches | train loss 0.3649708 +| epoch 3 | 1467/ 8400 batches | train loss 0.3995643 +| epoch 3 | 1471/ 8400 batches | train loss 0.4930925 +| epoch 3 | 1475/ 8400 batches | train loss 0.3414150 +| epoch 3 | 1479/ 8400 batches | train loss 0.4010139 +| epoch 3 | 1483/ 8400 batches | train loss 0.5063579 +| epoch 3 | 1487/ 8400 batches | train loss 0.4713919 +| epoch 3 | 1491/ 8400 batches | train loss 0.4710147 +| epoch 3 | 1495/ 8400 batches | train loss 0.4401147 +| epoch 3 | 1499/ 8400 batches | train loss 0.3938964 +| epoch 3 | 1503/ 8400 batches | train loss 0.4551312 +| epoch 3 | 1507/ 8400 batches | train loss 0.4935328 +| epoch 3 | 1511/ 8400 batches | train loss 0.4762626 +| epoch 3 | 1515/ 8400 batches | train loss 0.4095083 +| epoch 3 | 1519/ 8400 batches | train loss 0.5229273 +| epoch 3 | 1523/ 8400 batches | train loss 0.4693912 +| epoch 3 | 1527/ 8400 batches | train loss 0.3853660 +| epoch 3 | 1531/ 8400 batches | train loss 0.4129201 +| epoch 3 | 1535/ 8400 batches | train loss 0.4117990 +| epoch 3 | 1539/ 8400 batches | train loss 0.3542100 +| epoch 3 | 1543/ 8400 batches | train loss 0.4722235 +| epoch 3 | 1547/ 8400 batches | train loss 0.4520882 +| epoch 3 | 1551/ 8400 batches | train loss 0.3728220 +| epoch 3 | 1555/ 8400 batches | train loss 0.5399117 +| epoch 3 | 1559/ 8400 batches | train loss 0.4070334 +| epoch 3 | 1563/ 8400 batches | train loss 0.3563408 +| epoch 3 | 1567/ 8400 batches | train loss 0.3930161 +| epoch 3 | 1571/ 8400 batches | train loss 0.4071253 +| epoch 3 | 1575/ 8400 batches | train loss 0.4638795 +| epoch 3 | 1579/ 8400 batches | train loss 0.4757348 +| epoch 3 | 1583/ 8400 batches | train loss 0.4264151 +| epoch 3 | 1587/ 8400 batches | train loss 0.4867972 +| epoch 3 | 1591/ 8400 batches | train loss 0.4506883 +| epoch 3 | 1595/ 8400 batches | train loss 0.4170603 +| epoch 3 | 1599/ 8400 batches | train loss 0.4295425 +| epoch 3 | 1603/ 8400 batches | train loss 0.3459615 +| epoch 3 | 1607/ 8400 batches | train loss 0.4830684 +| epoch 3 | 1611/ 8400 batches | train loss 0.4242706 +| epoch 3 | 1615/ 8400 batches | train loss 0.5045322 +| epoch 3 | 1619/ 8400 batches | train loss 0.4759282 +| epoch 3 | 1623/ 8400 batches | train loss 0.4189850 +| epoch 3 | 1627/ 8400 batches | train loss 0.4872440 +| epoch 3 | 1631/ 8400 batches | train loss 0.4586699 +| epoch 3 | 1635/ 8400 batches | train loss 0.3337638 +| epoch 3 | 1639/ 8400 batches | train loss 0.4230505 +| epoch 3 | 1643/ 8400 batches | train loss 0.4633292 +| epoch 3 | 1647/ 8400 batches | train loss 0.3930857 +| epoch 3 | 1651/ 8400 batches | train loss 0.3846743 +| epoch 3 | 1655/ 8400 batches | train loss 0.4440183 +| epoch 3 | 1659/ 8400 batches | train loss 0.4914080 +| epoch 3 | 1663/ 8400 batches | train loss 0.4095325 +| epoch 3 | 1667/ 8400 batches | train loss 0.3647419 +| epoch 3 | 1671/ 8400 batches | train loss 0.4463159 +| epoch 3 | 1675/ 8400 batches | train loss 0.4638098 +| epoch 3 | 1679/ 8400 batches | train loss 0.4226719 +| epoch 3 | 1683/ 8400 batches | train loss 0.2808070 +| epoch 3 | 1687/ 8400 batches | train loss 0.3827966 +| epoch 3 | 1691/ 8400 batches | train loss 0.4047921 +| epoch 3 | 1695/ 8400 batches | train loss 0.4918575 +| epoch 3 | 1699/ 8400 batches | train loss 0.4017309 +| epoch 3 | 1703/ 8400 batches | train loss 0.4589006 +| epoch 3 | 1707/ 8400 batches | train loss 0.3990380 +| epoch 3 | 1711/ 8400 batches | train loss 0.3324176 +| epoch 3 | 1715/ 8400 batches | train loss 0.4361143 +| epoch 3 | 1719/ 8400 batches | train loss 0.4428921 +| epoch 3 | 1723/ 8400 batches | train loss 0.4652950 +| epoch 3 | 1727/ 8400 batches | train loss 0.4338964 +| epoch 3 | 1731/ 8400 batches | train loss 0.3788840 +| epoch 3 | 1735/ 8400 batches | train loss 0.4871460 +| epoch 3 | 1739/ 8400 batches | train loss 0.4085207 +| epoch 3 | 1743/ 8400 batches | train loss 0.3669608 +| epoch 3 | 1747/ 8400 batches | train loss 0.3255454 +| epoch 3 | 1751/ 8400 batches | train loss 0.3788321 +| epoch 3 | 1755/ 8400 batches | train loss 0.5253347 +| epoch 3 | 1759/ 8400 batches | train loss 0.4088221 +| epoch 3 | 1763/ 8400 batches | train loss 0.4951754 +| epoch 3 | 1767/ 8400 batches | train loss 0.3711764 +| epoch 3 | 1771/ 8400 batches | train loss 0.4027116 +| epoch 3 | 1775/ 8400 batches | train loss 0.4955865 +| epoch 3 | 1779/ 8400 batches | train loss 0.4198918 +| epoch 3 | 1783/ 8400 batches | train loss 0.4137354 +| epoch 3 | 1787/ 8400 batches | train loss 0.3670676 +| epoch 3 | 1791/ 8400 batches | train loss 0.4287485 +| epoch 3 | 1795/ 8400 batches | train loss 0.3682104 +| epoch 3 | 1799/ 8400 batches | train loss 0.3505275 +| epoch 3 | 1803/ 8400 batches | train loss 0.3433147 +| epoch 3 | 1807/ 8400 batches | train loss 0.4534953 +| epoch 3 | 1811/ 8400 batches | train loss 0.4224364 +| epoch 3 | 1815/ 8400 batches | train loss 0.4445131 +| epoch 3 | 1819/ 8400 batches | train loss 0.4169411 +| epoch 3 | 1823/ 8400 batches | train loss 0.4740881 +| epoch 3 | 1827/ 8400 batches | train loss 0.4004157 +| epoch 3 | 1831/ 8400 batches | train loss 0.4937741 +| epoch 3 | 1835/ 8400 batches | train loss 0.3082042 +| epoch 3 | 1839/ 8400 batches | train loss 0.3484361 +| epoch 3 | 1843/ 8400 batches | train loss 0.3566907 +| epoch 3 | 1847/ 8400 batches | train loss 0.3732946 +| epoch 3 | 1851/ 8400 batches | train loss 0.3430409 +| epoch 3 | 1855/ 8400 batches | train loss 0.5380676 +| epoch 3 | 1859/ 8400 batches | train loss 0.4305928 +| epoch 3 | 1863/ 8400 batches | train loss 0.3490774 +| epoch 3 | 1867/ 8400 batches | train loss 0.5535196 +| epoch 3 | 1871/ 8400 batches | train loss 0.4021472 +| epoch 3 | 1875/ 8400 batches | train loss 0.3707292 +| epoch 3 | 1879/ 8400 batches | train loss 0.5168012 +| epoch 3 | 1883/ 8400 batches | train loss 0.4035194 +| epoch 3 | 1887/ 8400 batches | train loss 0.2585700 +| epoch 3 | 1891/ 8400 batches | train loss 0.4838980 +| epoch 3 | 1895/ 8400 batches | train loss 0.5674012 +| epoch 3 | 1899/ 8400 batches | train loss 0.5570406 +| epoch 3 | 1903/ 8400 batches | train loss 0.4860680 +| epoch 3 | 1907/ 8400 batches | train loss 0.4464951 +| epoch 3 | 1911/ 8400 batches | train loss 0.5396271 +| epoch 3 | 1915/ 8400 batches | train loss 0.5271707 +| epoch 3 | 1919/ 8400 batches | train loss 0.3904230 +| epoch 3 | 1923/ 8400 batches | train loss 0.4602706 +| epoch 3 | 1927/ 8400 batches | train loss 0.4653879 +| epoch 3 | 1931/ 8400 batches | train loss 0.4571323 +| epoch 3 | 1935/ 8400 batches | train loss 0.4580870 +| epoch 3 | 1939/ 8400 batches | train loss 0.4088288 +| epoch 3 | 1943/ 8400 batches | train loss 0.4120399 +| epoch 3 | 1947/ 8400 batches | train loss 0.3920754 +| epoch 3 | 1951/ 8400 batches | train loss 0.4782687 +| epoch 3 | 1955/ 8400 batches | train loss 0.4459303 +| epoch 3 | 1959/ 8400 batches | train loss 0.4024484 +| epoch 3 | 1963/ 8400 batches | train loss 0.4124344 +| epoch 3 | 1967/ 8400 batches | train loss 0.4613847 +| epoch 3 | 1971/ 8400 batches | train loss 0.4990279 +| epoch 3 | 1975/ 8400 batches | train loss 0.4046359 +| epoch 3 | 1979/ 8400 batches | train loss 0.4533383 +| epoch 3 | 1983/ 8400 batches | train loss 0.3683685 +| epoch 3 | 1987/ 8400 batches | train loss 0.4646831 +| epoch 3 | 1991/ 8400 batches | train loss 0.4306962 +| epoch 3 | 1995/ 8400 batches | train loss 0.3846233 +| epoch 3 | 1999/ 8400 batches | train loss 0.3961951 +| epoch 3 | 2003/ 8400 batches | train loss 0.4260936 +| epoch 3 | 2007/ 8400 batches | train loss 0.4795524 +| epoch 3 | 2011/ 8400 batches | train loss 0.3850807 +| epoch 3 | 2015/ 8400 batches | train loss 0.4079966 +| epoch 3 | 2019/ 8400 batches | train loss 0.4615674 +| epoch 3 | 2023/ 8400 batches | train loss 0.3847578 +| epoch 3 | 2027/ 8400 batches | train loss 0.4381989 +| epoch 3 | 2031/ 8400 batches | train loss 0.3734722 +| epoch 3 | 2035/ 8400 batches | train loss 0.4357494 +| epoch 3 | 2039/ 8400 batches | train loss 0.4516912 +| epoch 3 | 2043/ 8400 batches | train loss 0.4137258 +| epoch 3 | 2047/ 8400 batches | train loss 0.4748888 +| epoch 3 | 2051/ 8400 batches | train loss 0.4158034 +| epoch 3 | 2055/ 8400 batches | train loss 0.4528368 +| epoch 3 | 2059/ 8400 batches | train loss 0.4232789 +| epoch 3 | 2063/ 8400 batches | train loss 0.4266667 +| epoch 3 | 2067/ 8400 batches | train loss 0.4475744 +| epoch 3 | 2071/ 8400 batches | train loss 0.4114550 +| epoch 3 | 2075/ 8400 batches | train loss 0.3947951 +| epoch 3 | 2079/ 8400 batches | train loss 0.3631019 +| epoch 3 | 2083/ 8400 batches | train loss 0.4177865 +| epoch 3 | 2087/ 8400 batches | train loss 0.4826335 +| epoch 3 | 2091/ 8400 batches | train loss 0.4101809 +| epoch 3 | 2095/ 8400 batches | train loss 0.5011265 +| epoch 3 | 2099/ 8400 batches | train loss 0.4273172 +| epoch 3 | 2103/ 8400 batches | train loss 0.3982399 +| epoch 3 | 2107/ 8400 batches | train loss 0.5165367 +| epoch 3 | 2111/ 8400 batches | train loss 0.3957874 +| epoch 3 | 2115/ 8400 batches | train loss 0.4250109 +| epoch 3 | 2119/ 8400 batches | train loss 0.4141327 +| epoch 3 | 2123/ 8400 batches | train loss 0.4523239 +| epoch 3 | 2127/ 8400 batches | train loss 0.3661790 +| epoch 3 | 2131/ 8400 batches | train loss 0.4161738 +| epoch 3 | 2135/ 8400 batches | train loss 0.4422050 +| epoch 3 | 2139/ 8400 batches | train loss 0.3334177 +| epoch 3 | 2143/ 8400 batches | train loss 0.4769242 +| epoch 3 | 2147/ 8400 batches | train loss 0.4784709 +| epoch 3 | 2151/ 8400 batches | train loss 0.4221960 +| epoch 3 | 2155/ 8400 batches | train loss 0.3199443 +| epoch 3 | 2159/ 8400 batches | train loss 0.4035449 +| epoch 3 | 2163/ 8400 batches | train loss 0.4090356 +| epoch 3 | 2167/ 8400 batches | train loss 0.4089789 +| epoch 3 | 2171/ 8400 batches | train loss 0.3528763 +| epoch 3 | 2175/ 8400 batches | train loss 0.3599340 +| epoch 3 | 2179/ 8400 batches | train loss 0.4162851 +| epoch 3 | 2183/ 8400 batches | train loss 0.4066601 +| epoch 3 | 2187/ 8400 batches | train loss 0.3946292 +| epoch 3 | 2191/ 8400 batches | train loss 0.4309688 +| epoch 3 | 2195/ 8400 batches | train loss 0.3903261 +| epoch 3 | 2199/ 8400 batches | train loss 0.3949214 +| epoch 3 | 2203/ 8400 batches | train loss 0.5173039 +| epoch 3 | 2207/ 8400 batches | train loss 0.3421411 +| epoch 3 | 2211/ 8400 batches | train loss 0.3871737 +| epoch 3 | 2215/ 8400 batches | train loss 0.4503427 +| epoch 3 | 2219/ 8400 batches | train loss 0.4490760 +| epoch 3 | 2223/ 8400 batches | train loss 0.4106784 +| epoch 3 | 2227/ 8400 batches | train loss 0.4048940 +| epoch 3 | 2231/ 8400 batches | train loss 0.3991695 +| epoch 3 | 2235/ 8400 batches | train loss 0.4471375 +| epoch 3 | 2239/ 8400 batches | train loss 0.3603232 +| epoch 3 | 2243/ 8400 batches | train loss 0.4122146 +| epoch 3 | 2247/ 8400 batches | train loss 0.4048666 +| epoch 3 | 2251/ 8400 batches | train loss 0.4473425 +| epoch 3 | 2255/ 8400 batches | train loss 0.4032618 +| epoch 3 | 2259/ 8400 batches | train loss 0.3479643 +| epoch 3 | 2263/ 8400 batches | train loss 0.4579791 +| epoch 3 | 2267/ 8400 batches | train loss 0.4988677 +| epoch 3 | 2271/ 8400 batches | train loss 0.3690899 +| epoch 3 | 2275/ 8400 batches | train loss 0.4557022 +| epoch 3 | 2279/ 8400 batches | train loss 0.4797689 +| epoch 3 | 2283/ 8400 batches | train loss 0.3675780 +| epoch 3 | 2287/ 8400 batches | train loss 0.3590749 +| epoch 3 | 2291/ 8400 batches | train loss 0.4194872 +| epoch 3 | 2295/ 8400 batches | train loss 0.3897243 +| epoch 3 | 2299/ 8400 batches | train loss 0.3937960 +| epoch 3 | 2303/ 8400 batches | train loss 0.3669200 +| epoch 3 | 2307/ 8400 batches | train loss 0.4927703 +| epoch 3 | 2311/ 8400 batches | train loss 0.4218509 +| epoch 3 | 2315/ 8400 batches | train loss 0.3753673 +| epoch 3 | 2319/ 8400 batches | train loss 0.4797855 +| epoch 3 | 2323/ 8400 batches | train loss 0.4186707 +| epoch 3 | 2327/ 8400 batches | train loss 0.3955817 +| epoch 3 | 2331/ 8400 batches | train loss 0.4978559 +| epoch 3 | 2335/ 8400 batches | train loss 0.3406259 +| epoch 3 | 2339/ 8400 batches | train loss 0.4278117 +| epoch 3 | 2343/ 8400 batches | train loss 0.4449250 +| epoch 3 | 2347/ 8400 batches | train loss 0.3320563 +| epoch 3 | 2351/ 8400 batches | train loss 0.4098447 +| epoch 3 | 2355/ 8400 batches | train loss 0.4750887 +| epoch 3 | 2359/ 8400 batches | train loss 0.4621130 +| epoch 3 | 2363/ 8400 batches | train loss 0.3950797 +| epoch 3 | 2367/ 8400 batches | train loss 0.4237540 +| epoch 3 | 2371/ 8400 batches | train loss 0.4919001 +| epoch 3 | 2375/ 8400 batches | train loss 0.4039922 +| epoch 3 | 2379/ 8400 batches | train loss 0.3729182 +| epoch 3 | 2383/ 8400 batches | train loss 0.4585421 +| epoch 3 | 2387/ 8400 batches | train loss 0.3965166 +| epoch 3 | 2391/ 8400 batches | train loss 0.4228804 +| epoch 3 | 2395/ 8400 batches | train loss 0.4230769 +| epoch 3 | 2399/ 8400 batches | train loss 0.5285318 +| epoch 3 | 2403/ 8400 batches | train loss 0.5006299 +| epoch 3 | 2407/ 8400 batches | train loss 0.4215284 +| epoch 3 | 2411/ 8400 batches | train loss 0.3627901 +| epoch 3 | 2415/ 8400 batches | train loss 0.4462233 +| epoch 3 | 2419/ 8400 batches | train loss 0.3814696 +| epoch 3 | 2423/ 8400 batches | train loss 0.4821917 +| epoch 3 | 2427/ 8400 batches | train loss 0.4502997 +| epoch 3 | 2431/ 8400 batches | train loss 0.3824692 +| epoch 3 | 2435/ 8400 batches | train loss 0.4927678 +| epoch 3 | 2439/ 8400 batches | train loss 0.4709520 +| epoch 3 | 2443/ 8400 batches | train loss 0.3915802 +| epoch 3 | 2447/ 8400 batches | train loss 0.3607139 +| epoch 3 | 2451/ 8400 batches | train loss 0.4283034 +| epoch 3 | 2455/ 8400 batches | train loss 0.3314071 +| epoch 3 | 2459/ 8400 batches | train loss 0.4172406 +| epoch 3 | 2463/ 8400 batches | train loss 0.4458120 +| epoch 3 | 2467/ 8400 batches | train loss 0.4162682 +| epoch 3 | 2471/ 8400 batches | train loss 0.3567355 +| epoch 3 | 2475/ 8400 batches | train loss 0.3388024 +| epoch 3 | 2479/ 8400 batches | train loss 0.3631730 +| epoch 3 | 2483/ 8400 batches | train loss 0.4178029 +| epoch 3 | 2487/ 8400 batches | train loss 0.4941802 +| epoch 3 | 2491/ 8400 batches | train loss 0.5384659 +| epoch 3 | 2495/ 8400 batches | train loss 0.4115818 +| epoch 3 | 2499/ 8400 batches | train loss 0.4209631 +| epoch 3 | 2503/ 8400 batches | train loss 0.4370303 +| epoch 3 | 2507/ 8400 batches | train loss 0.5950481 +| epoch 3 | 2511/ 8400 batches | train loss 0.4491742 +| epoch 3 | 2515/ 8400 batches | train loss 0.4742596 +| epoch 3 | 2519/ 8400 batches | train loss 0.4421910 +| epoch 3 | 2523/ 8400 batches | train loss 0.4406196 +| epoch 3 | 2527/ 8400 batches | train loss 0.4168361 +| epoch 3 | 2531/ 8400 batches | train loss 0.4623407 +| epoch 3 | 2535/ 8400 batches | train loss 0.4460619 +| epoch 3 | 2539/ 8400 batches | train loss 0.3274370 +| epoch 3 | 2543/ 8400 batches | train loss 0.4348443 +| epoch 3 | 2547/ 8400 batches | train loss 0.2675389 +| epoch 3 | 2551/ 8400 batches | train loss 0.3793083 +| epoch 3 | 2555/ 8400 batches | train loss 0.4341469 +| epoch 3 | 2559/ 8400 batches | train loss 0.4872911 +| epoch 3 | 2563/ 8400 batches | train loss 0.4336466 +| epoch 3 | 2567/ 8400 batches | train loss 0.4557014 +| epoch 3 | 2571/ 8400 batches | train loss 0.4768525 +| epoch 3 | 2575/ 8400 batches | train loss 0.4487462 +| epoch 3 | 2579/ 8400 batches | train loss 0.4666814 +| epoch 3 | 2583/ 8400 batches | train loss 0.4031867 +| epoch 3 | 2587/ 8400 batches | train loss 0.4929907 +| epoch 3 | 2591/ 8400 batches | train loss 0.4693709 +| epoch 3 | 2595/ 8400 batches | train loss 0.4534445 +| epoch 3 | 2599/ 8400 batches | train loss 0.4185665 +| epoch 3 | 2603/ 8400 batches | train loss 0.4087020 +| epoch 3 | 2607/ 8400 batches | train loss 0.4126162 +| epoch 3 | 2611/ 8400 batches | train loss 0.3974220 +| epoch 3 | 2615/ 8400 batches | train loss 0.5163597 +| epoch 3 | 2619/ 8400 batches | train loss 0.4346197 +| epoch 3 | 2623/ 8400 batches | train loss 0.3945452 +| epoch 3 | 2627/ 8400 batches | train loss 0.4170922 +| epoch 3 | 2631/ 8400 batches | train loss 0.4608517 +| epoch 3 | 2635/ 8400 batches | train loss 0.4281597 +| epoch 3 | 2639/ 8400 batches | train loss 0.5363036 +| epoch 3 | 2643/ 8400 batches | train loss 0.3773991 +| epoch 3 | 2647/ 8400 batches | train loss 0.3939678 +| epoch 3 | 2651/ 8400 batches | train loss 0.4039115 +| epoch 3 | 2655/ 8400 batches | train loss 0.4563212 +| epoch 3 | 2659/ 8400 batches | train loss 0.4501998 +| epoch 3 | 2663/ 8400 batches | train loss 0.4152021 +| epoch 3 | 2667/ 8400 batches | train loss 0.3312073 +| epoch 3 | 2671/ 8400 batches | train loss 0.4179820 +| epoch 3 | 2675/ 8400 batches | train loss 0.3838840 +| epoch 3 | 2679/ 8400 batches | train loss 0.3831441 +| epoch 3 | 2683/ 8400 batches | train loss 0.4097995 +| epoch 3 | 2687/ 8400 batches | train loss 0.4111747 +| epoch 3 | 2691/ 8400 batches | train loss 0.4846689 +| epoch 3 | 2695/ 8400 batches | train loss 0.4671797 +| epoch 3 | 2699/ 8400 batches | train loss 0.5327584 +| epoch 3 | 2703/ 8400 batches | train loss 0.3893409 +| epoch 3 | 2707/ 8400 batches | train loss 0.5074238 +| epoch 3 | 2711/ 8400 batches | train loss 0.4536905 +| epoch 3 | 2715/ 8400 batches | train loss 0.5098360 +| epoch 3 | 2719/ 8400 batches | train loss 0.5200508 +| epoch 3 | 2723/ 8400 batches | train loss 0.4360330 +| epoch 3 | 2727/ 8400 batches | train loss 0.3776845 +| epoch 3 | 2731/ 8400 batches | train loss 0.4654699 +| epoch 3 | 2735/ 8400 batches | train loss 0.3260231 +| epoch 3 | 2739/ 8400 batches | train loss 0.5547879 +| epoch 3 | 2743/ 8400 batches | train loss 0.4035779 +| epoch 3 | 2747/ 8400 batches | train loss 0.4318132 +| epoch 3 | 2751/ 8400 batches | train loss 0.4428507 +| epoch 3 | 2755/ 8400 batches | train loss 0.4689087 +| epoch 3 | 2759/ 8400 batches | train loss 0.3550460 +| epoch 3 | 2763/ 8400 batches | train loss 0.4386360 +| epoch 3 | 2767/ 8400 batches | train loss 0.4582905 +| epoch 3 | 2771/ 8400 batches | train loss 0.4880560 +| epoch 3 | 2775/ 8400 batches | train loss 0.5972806 +| epoch 3 | 2779/ 8400 batches | train loss 0.4764749 +| epoch 3 | 2783/ 8400 batches | train loss 0.4310450 +| epoch 3 | 2787/ 8400 batches | train loss 0.4389167 +| epoch 3 | 2791/ 8400 batches | train loss 0.4800881 +| epoch 3 | 2795/ 8400 batches | train loss 0.4707093 +| epoch 3 | 2799/ 8400 batches | train loss 0.3951652 +| epoch 3 | 2803/ 8400 batches | train loss 0.4186311 +| epoch 3 | 2807/ 8400 batches | train loss 0.3367092 +| epoch 3 | 2811/ 8400 batches | train loss 0.3807982 +| epoch 3 | 2815/ 8400 batches | train loss 0.3831452 +| epoch 3 | 2819/ 8400 batches | train loss 0.4624926 +| epoch 3 | 2823/ 8400 batches | train loss 0.4650319 +| epoch 3 | 2827/ 8400 batches | train loss 0.4557779 +| epoch 3 | 2831/ 8400 batches | train loss 0.3973930 +| epoch 3 | 2835/ 8400 batches | train loss 0.4522012 +| epoch 3 | 2839/ 8400 batches | train loss 0.3901882 +| epoch 3 | 2843/ 8400 batches | train loss 0.4798885 +| epoch 3 | 2847/ 8400 batches | train loss 0.4877118 +| epoch 3 | 2851/ 8400 batches | train loss 0.4249757 +| epoch 3 | 2855/ 8400 batches | train loss 0.4199596 +| epoch 3 | 2859/ 8400 batches | train loss 0.4245064 +| epoch 3 | 2863/ 8400 batches | train loss 0.4191217 +| epoch 3 | 2867/ 8400 batches | train loss 0.4922368 +| epoch 3 | 2871/ 8400 batches | train loss 0.4609888 +| epoch 3 | 2875/ 8400 batches | train loss 0.4173293 +| epoch 3 | 2879/ 8400 batches | train loss 0.3801951 +| epoch 3 | 2883/ 8400 batches | train loss 0.4913827 +| epoch 3 | 2887/ 8400 batches | train loss 0.4353384 +| epoch 3 | 2891/ 8400 batches | train loss 0.3978741 +| epoch 3 | 2895/ 8400 batches | train loss 0.3827975 +| epoch 3 | 2899/ 8400 batches | train loss 0.3926415 +| epoch 3 | 2903/ 8400 batches | train loss 0.4000759 +| epoch 3 | 2907/ 8400 batches | train loss 0.4464859 +| epoch 3 | 2911/ 8400 batches | train loss 0.2551497 +| epoch 3 | 2915/ 8400 batches | train loss 0.4684074 +| epoch 3 | 2919/ 8400 batches | train loss 0.4488123 +| epoch 3 | 2923/ 8400 batches | train loss 0.4054960 +| epoch 3 | 2927/ 8400 batches | train loss 0.4320938 +| epoch 3 | 2931/ 8400 batches | train loss 0.4416761 +| epoch 3 | 2935/ 8400 batches | train loss 0.3965000 +| epoch 3 | 2939/ 8400 batches | train loss 0.5472027 +| epoch 3 | 2943/ 8400 batches | train loss 0.4388516 +| epoch 3 | 2947/ 8400 batches | train loss 0.4370481 +| epoch 3 | 2951/ 8400 batches | train loss 0.4402036 +| epoch 3 | 2955/ 8400 batches | train loss 0.4400202 +| epoch 3 | 2959/ 8400 batches | train loss 0.5217542 +| epoch 3 | 2963/ 8400 batches | train loss 0.4415560 +| epoch 3 | 2967/ 8400 batches | train loss 0.4745510 +| epoch 3 | 2971/ 8400 batches | train loss 0.3626564 +| epoch 3 | 2975/ 8400 batches | train loss 0.4392950 +| epoch 3 | 2979/ 8400 batches | train loss 0.3849154 +| epoch 3 | 2983/ 8400 batches | train loss 0.4013920 +| epoch 3 | 2987/ 8400 batches | train loss 0.4153562 +| epoch 3 | 2991/ 8400 batches | train loss 0.3983147 +| epoch 3 | 2995/ 8400 batches | train loss 0.3676240 +| epoch 3 | 2999/ 8400 batches | train loss 0.4129642 +| epoch 3 | 3003/ 8400 batches | train loss 0.4845931 +| epoch 3 | 3007/ 8400 batches | train loss 0.3940961 +| epoch 3 | 3011/ 8400 batches | train loss 0.5094696 +| epoch 3 | 3015/ 8400 batches | train loss 0.3921476 +| epoch 3 | 3019/ 8400 batches | train loss 0.3784822 +| epoch 3 | 3023/ 8400 batches | train loss 0.4317381 +| epoch 3 | 3027/ 8400 batches | train loss 0.4461626 +| epoch 3 | 3031/ 8400 batches | train loss 0.3608190 +| epoch 3 | 3035/ 8400 batches | train loss 0.4621805 +| epoch 3 | 3039/ 8400 batches | train loss 0.4383783 +| epoch 3 | 3043/ 8400 batches | train loss 0.3237410 +| epoch 3 | 3047/ 8400 batches | train loss 0.3968583 +| epoch 3 | 3051/ 8400 batches | train loss 0.4855098 +| epoch 3 | 3055/ 8400 batches | train loss 0.4408745 +| epoch 3 | 3059/ 8400 batches | train loss 0.3779052 +| epoch 3 | 3063/ 8400 batches | train loss 0.4755213 +| epoch 3 | 3067/ 8400 batches | train loss 0.3659426 +| epoch 3 | 3071/ 8400 batches | train loss 0.3879477 +| epoch 3 | 3075/ 8400 batches | train loss 0.4814784 +| epoch 3 | 3079/ 8400 batches | train loss 0.4545875 +| epoch 3 | 3083/ 8400 batches | train loss 0.3605276 +| epoch 3 | 3087/ 8400 batches | train loss 0.4990504 +| epoch 3 | 3091/ 8400 batches | train loss 0.4303882 +| epoch 3 | 3095/ 8400 batches | train loss 0.4026994 +| epoch 3 | 3099/ 8400 batches | train loss 0.4509772 +| epoch 3 | 3103/ 8400 batches | train loss 0.3906162 +| epoch 3 | 3107/ 8400 batches | train loss 0.5047876 +| epoch 3 | 3111/ 8400 batches | train loss 0.4895236 +| epoch 3 | 3115/ 8400 batches | train loss 0.5279763 +| epoch 3 | 3119/ 8400 batches | train loss 0.5621075 +| epoch 3 | 3123/ 8400 batches | train loss 0.4202015 +| epoch 3 | 3127/ 8400 batches | train loss 0.4607244 +| epoch 3 | 3131/ 8400 batches | train loss 0.4061488 +| epoch 3 | 3135/ 8400 batches | train loss 0.5013621 +| epoch 3 | 3139/ 8400 batches | train loss 0.3642625 +| epoch 3 | 3143/ 8400 batches | train loss 0.4655082 +| epoch 3 | 3147/ 8400 batches | train loss 0.3723972 +| epoch 3 | 3151/ 8400 batches | train loss 0.3939061 +| epoch 3 | 3155/ 8400 batches | train loss 0.4303103 +| epoch 3 | 3159/ 8400 batches | train loss 0.4554691 +| epoch 3 | 3163/ 8400 batches | train loss 0.3945343 +| epoch 3 | 3167/ 8400 batches | train loss 0.3793403 +| epoch 3 | 3171/ 8400 batches | train loss 0.3873811 +| epoch 3 | 3175/ 8400 batches | train loss 0.5534453 +| epoch 3 | 3179/ 8400 batches | train loss 0.3786868 +| epoch 3 | 3183/ 8400 batches | train loss 0.3086480 +| epoch 3 | 3187/ 8400 batches | train loss 0.4444151 +| epoch 3 | 3191/ 8400 batches | train loss 0.5012313 +| epoch 3 | 3195/ 8400 batches | train loss 0.3930241 +| epoch 3 | 3199/ 8400 batches | train loss 0.4887090 +| epoch 3 | 3203/ 8400 batches | train loss 0.4326211 +| epoch 3 | 3207/ 8400 batches | train loss 0.4248502 +| epoch 3 | 3211/ 8400 batches | train loss 0.3953975 +| epoch 3 | 3215/ 8400 batches | train loss 0.4922213 +| epoch 3 | 3219/ 8400 batches | train loss 0.4199584 +| epoch 3 | 3223/ 8400 batches | train loss 0.4406448 +| epoch 3 | 3227/ 8400 batches | train loss 0.4454781 +| epoch 3 | 3231/ 8400 batches | train loss 0.4374293 +| epoch 3 | 3235/ 8400 batches | train loss 0.4852137 +| epoch 3 | 3239/ 8400 batches | train loss 0.3419514 +| epoch 3 | 3243/ 8400 batches | train loss 0.3299361 +| epoch 3 | 3247/ 8400 batches | train loss 0.4977032 +| epoch 3 | 3251/ 8400 batches | train loss 0.4221077 +| epoch 3 | 3255/ 8400 batches | train loss 0.3857624 +| epoch 3 | 3259/ 8400 batches | train loss 0.4163298 +| epoch 3 | 3263/ 8400 batches | train loss 0.4703280 +| epoch 3 | 3267/ 8400 batches | train loss 0.4496081 +| epoch 3 | 3271/ 8400 batches | train loss 0.4030460 +| epoch 3 | 3275/ 8400 batches | train loss 0.4246539 +| epoch 3 | 3279/ 8400 batches | train loss 0.3850116 +| epoch 3 | 3283/ 8400 batches | train loss 0.4967883 +| epoch 3 | 3287/ 8400 batches | train loss 0.2588152 +| epoch 3 | 3291/ 8400 batches | train loss 0.4652579 +| epoch 3 | 3295/ 8400 batches | train loss 0.4742422 +| epoch 3 | 3299/ 8400 batches | train loss 0.4542871 +| epoch 3 | 3303/ 8400 batches | train loss 0.4144868 +| epoch 3 | 3307/ 8400 batches | train loss 0.4200699 +| epoch 3 | 3311/ 8400 batches | train loss 0.4255943 +| epoch 3 | 3315/ 8400 batches | train loss 0.4679689 +| epoch 3 | 3319/ 8400 batches | train loss 0.4184220 +| epoch 3 | 3323/ 8400 batches | train loss 0.4850180 +| epoch 3 | 3327/ 8400 batches | train loss 0.3740230 +| epoch 3 | 3331/ 8400 batches | train loss 0.4569137 +| epoch 3 | 3335/ 8400 batches | train loss 0.4116026 +| epoch 3 | 3339/ 8400 batches | train loss 0.3280455 +| epoch 3 | 3343/ 8400 batches | train loss 0.4330699 +| epoch 3 | 3347/ 8400 batches | train loss 0.4864080 +| epoch 3 | 3351/ 8400 batches | train loss 0.5071261 +| epoch 3 | 3355/ 8400 batches | train loss 0.4537407 +| epoch 3 | 3359/ 8400 batches | train loss 0.4405000 +| epoch 3 | 3363/ 8400 batches | train loss 0.4054938 +| epoch 3 | 3367/ 8400 batches | train loss 0.4200187 +| epoch 3 | 3371/ 8400 batches | train loss 0.3574831 +| epoch 3 | 3375/ 8400 batches | train loss 0.4175464 +| epoch 3 | 3379/ 8400 batches | train loss 0.4474798 +| epoch 3 | 3383/ 8400 batches | train loss 0.4855259 +| epoch 3 | 3387/ 8400 batches | train loss 0.4029610 +| epoch 3 | 3391/ 8400 batches | train loss 0.3762630 +| epoch 3 | 3395/ 8400 batches | train loss 0.3633531 +| epoch 3 | 3399/ 8400 batches | train loss 0.4298416 +| epoch 3 | 3403/ 8400 batches | train loss 0.4282546 +| epoch 3 | 3407/ 8400 batches | train loss 0.3756180 +| epoch 3 | 3411/ 8400 batches | train loss 0.4444042 +| epoch 3 | 3415/ 8400 batches | train loss 0.4778789 +| epoch 3 | 3419/ 8400 batches | train loss 0.4508304 +| epoch 3 | 3423/ 8400 batches | train loss 0.4527548 +| epoch 3 | 3427/ 8400 batches | train loss 0.3875016 +| epoch 3 | 3431/ 8400 batches | train loss 0.3600854 +| epoch 3 | 3435/ 8400 batches | train loss 0.4358328 +| epoch 3 | 3439/ 8400 batches | train loss 0.4139833 +| epoch 3 | 3443/ 8400 batches | train loss 0.4727132 +| epoch 3 | 3447/ 8400 batches | train loss 0.3432477 +| epoch 3 | 3451/ 8400 batches | train loss 0.4213719 +| epoch 3 | 3455/ 8400 batches | train loss 0.4000669 +| epoch 3 | 3459/ 8400 batches | train loss 0.4322414 +| epoch 3 | 3463/ 8400 batches | train loss 0.4626339 +| epoch 3 | 3467/ 8400 batches | train loss 0.4148725 +| epoch 3 | 3471/ 8400 batches | train loss 0.4880468 +| epoch 3 | 3475/ 8400 batches | train loss 0.3682779 +| epoch 3 | 3479/ 8400 batches | train loss 0.4670420 +| epoch 3 | 3483/ 8400 batches | train loss 0.4860291 +| epoch 3 | 3487/ 8400 batches | train loss 0.3940412 +| epoch 3 | 3491/ 8400 batches | train loss 0.4076584 +| epoch 3 | 3495/ 8400 batches | train loss 0.4319222 +| epoch 3 | 3499/ 8400 batches | train loss 0.4363534 +| epoch 3 | 3503/ 8400 batches | train loss 0.3502017 +| epoch 3 | 3507/ 8400 batches | train loss 0.4190083 +| epoch 3 | 3511/ 8400 batches | train loss 0.4431495 +| epoch 3 | 3515/ 8400 batches | train loss 0.6339937 +| epoch 3 | 3519/ 8400 batches | train loss 0.4121779 +| epoch 3 | 3523/ 8400 batches | train loss 0.4198700 +| epoch 3 | 3527/ 8400 batches | train loss 0.3650917 +| epoch 3 | 3531/ 8400 batches | train loss 0.4139209 +| epoch 3 | 3535/ 8400 batches | train loss 0.5187604 +| epoch 3 | 3539/ 8400 batches | train loss 0.4427099 +| epoch 3 | 3543/ 8400 batches | train loss 0.5321088 +| epoch 3 | 3547/ 8400 batches | train loss 0.4898304 +| epoch 3 | 3551/ 8400 batches | train loss 0.4646242 +| epoch 3 | 3555/ 8400 batches | train loss 0.3933004 +| epoch 3 | 3559/ 8400 batches | train loss 0.4383591 +| epoch 3 | 3563/ 8400 batches | train loss 0.4364028 +| epoch 3 | 3567/ 8400 batches | train loss 0.4052286 +| epoch 3 | 3571/ 8400 batches | train loss 0.3697054 +| epoch 3 | 3575/ 8400 batches | train loss 0.4988063 +| epoch 3 | 3579/ 8400 batches | train loss 0.4281021 +| epoch 3 | 3583/ 8400 batches | train loss 0.4058370 +| epoch 3 | 3587/ 8400 batches | train loss 0.4233039 +| epoch 3 | 3591/ 8400 batches | train loss 0.3653759 +| epoch 3 | 3595/ 8400 batches | train loss 0.4844217 +| epoch 3 | 3599/ 8400 batches | train loss 0.3915169 +| epoch 3 | 3603/ 8400 batches | train loss 0.4310811 +| epoch 3 | 3607/ 8400 batches | train loss 0.4253882 +| epoch 3 | 3611/ 8400 batches | train loss 0.4006374 +| epoch 3 | 3615/ 8400 batches | train loss 0.3404672 +| epoch 3 | 3619/ 8400 batches | train loss 0.4027497 +| epoch 3 | 3623/ 8400 batches | train loss 0.4069284 +| epoch 3 | 3627/ 8400 batches | train loss 0.4441970 +| epoch 3 | 3631/ 8400 batches | train loss 0.5374571 +| epoch 3 | 3635/ 8400 batches | train loss 0.4702592 +| epoch 3 | 3639/ 8400 batches | train loss 0.4411353 +| epoch 3 | 3643/ 8400 batches | train loss 0.4853932 +| epoch 3 | 3647/ 8400 batches | train loss 0.3800962 +| epoch 3 | 3651/ 8400 batches | train loss 0.4117799 +| epoch 3 | 3655/ 8400 batches | train loss 0.4289967 +| epoch 3 | 3659/ 8400 batches | train loss 0.5730304 +| epoch 3 | 3663/ 8400 batches | train loss 0.4719873 +| epoch 3 | 3667/ 8400 batches | train loss 0.4276562 +| epoch 3 | 3671/ 8400 batches | train loss 0.3937479 +| epoch 3 | 3675/ 8400 batches | train loss 0.3652046 +| epoch 3 | 3679/ 8400 batches | train loss 0.4379597 +| epoch 3 | 3683/ 8400 batches | train loss 0.4134273 +| epoch 3 | 3687/ 8400 batches | train loss 0.3828732 +| epoch 3 | 3691/ 8400 batches | train loss 0.4154026 +| epoch 3 | 3695/ 8400 batches | train loss 0.4121521 +| epoch 3 | 3699/ 8400 batches | train loss 0.4738683 +| epoch 3 | 3703/ 8400 batches | train loss 0.3974714 +| epoch 3 | 3707/ 8400 batches | train loss 0.3824834 +| epoch 3 | 3711/ 8400 batches | train loss 0.4321530 +| epoch 3 | 3715/ 8400 batches | train loss 0.4419288 +| epoch 3 | 3719/ 8400 batches | train loss 0.4577477 +| epoch 3 | 3723/ 8400 batches | train loss 0.4520175 +| epoch 3 | 3727/ 8400 batches | train loss 0.4746246 +| epoch 3 | 3731/ 8400 batches | train loss 0.3777915 +| epoch 3 | 3735/ 8400 batches | train loss 0.4710211 +| epoch 3 | 3739/ 8400 batches | train loss 0.5220116 +| epoch 3 | 3743/ 8400 batches | train loss 0.4813192 +| epoch 3 | 3747/ 8400 batches | train loss 0.4602512 +| epoch 3 | 3751/ 8400 batches | train loss 0.4469970 +| epoch 3 | 3755/ 8400 batches | train loss 0.5049555 +| epoch 3 | 3759/ 8400 batches | train loss 0.3662537 +| epoch 3 | 3763/ 8400 batches | train loss 0.4447244 +| epoch 3 | 3767/ 8400 batches | train loss 0.4252273 +| epoch 3 | 3771/ 8400 batches | train loss 0.4328070 +| epoch 3 | 3775/ 8400 batches | train loss 0.4227177 +| epoch 3 | 3779/ 8400 batches | train loss 0.4175547 +| epoch 3 | 3783/ 8400 batches | train loss 0.3280789 +| epoch 3 | 3787/ 8400 batches | train loss 0.4595338 +| epoch 3 | 3791/ 8400 batches | train loss 0.3945566 +| epoch 3 | 3795/ 8400 batches | train loss 0.4270489 +| epoch 3 | 3799/ 8400 batches | train loss 0.4016593 +| epoch 3 | 3803/ 8400 batches | train loss 0.5073691 +| epoch 3 | 3807/ 8400 batches | train loss 0.4156829 +| epoch 3 | 3811/ 8400 batches | train loss 0.4141298 +| epoch 3 | 3815/ 8400 batches | train loss 0.4759144 +| epoch 3 | 3819/ 8400 batches | train loss 0.5116470 +| epoch 3 | 3823/ 8400 batches | train loss 0.4057332 +| epoch 3 | 3827/ 8400 batches | train loss 0.4251115 +| epoch 3 | 3831/ 8400 batches | train loss 0.5252941 +| epoch 3 | 3835/ 8400 batches | train loss 0.4076021 +| epoch 3 | 3839/ 8400 batches | train loss 0.2891548 +| epoch 3 | 3843/ 8400 batches | train loss 0.4939113 +| epoch 3 | 3847/ 8400 batches | train loss 0.3916595 +| epoch 3 | 3851/ 8400 batches | train loss 0.5708052 +| epoch 3 | 3855/ 8400 batches | train loss 0.4476310 +| epoch 3 | 3859/ 8400 batches | train loss 0.4320664 +| epoch 3 | 3863/ 8400 batches | train loss 0.4382252 +| epoch 3 | 3867/ 8400 batches | train loss 0.4991653 +| epoch 3 | 3871/ 8400 batches | train loss 0.4446437 +| epoch 3 | 3875/ 8400 batches | train loss 0.4777697 +| epoch 3 | 3879/ 8400 batches | train loss 0.3658875 +| epoch 3 | 3883/ 8400 batches | train loss 0.4553218 +| epoch 3 | 3887/ 8400 batches | train loss 0.4384131 +| epoch 3 | 3891/ 8400 batches | train loss 0.4065263 +| epoch 3 | 3895/ 8400 batches | train loss 0.4219631 +| epoch 3 | 3899/ 8400 batches | train loss 0.4662646 +| epoch 3 | 3903/ 8400 batches | train loss 0.4694676 +| epoch 3 | 3907/ 8400 batches | train loss 0.5730330 +| epoch 3 | 3911/ 8400 batches | train loss 0.4089900 +| epoch 3 | 3915/ 8400 batches | train loss 0.3940669 +| epoch 3 | 3919/ 8400 batches | train loss 0.4458036 +| epoch 3 | 3923/ 8400 batches | train loss 0.5194165 +| epoch 3 | 3927/ 8400 batches | train loss 0.4436199 +| epoch 3 | 3931/ 8400 batches | train loss 0.4282609 +| epoch 3 | 3935/ 8400 batches | train loss 0.5280269 +| epoch 3 | 3939/ 8400 batches | train loss 0.4588328 +| epoch 3 | 3943/ 8400 batches | train loss 0.4937447 +| epoch 3 | 3947/ 8400 batches | train loss 0.4819654 +| epoch 3 | 3951/ 8400 batches | train loss 0.5051444 +| epoch 3 | 3955/ 8400 batches | train loss 0.3659846 +| epoch 3 | 3959/ 8400 batches | train loss 0.4280655 +| epoch 3 | 3963/ 8400 batches | train loss 0.4237800 +| epoch 3 | 3967/ 8400 batches | train loss 0.4093893 +| epoch 3 | 3971/ 8400 batches | train loss 0.4428073 +| epoch 3 | 3975/ 8400 batches | train loss 0.5263920 +| epoch 3 | 3979/ 8400 batches | train loss 0.2622664 +| epoch 3 | 3983/ 8400 batches | train loss 0.4558114 +| epoch 3 | 3987/ 8400 batches | train loss 0.4930686 +| epoch 3 | 3991/ 8400 batches | train loss 0.3916312 +| epoch 3 | 3995/ 8400 batches | train loss 0.5043455 +| epoch 3 | 3999/ 8400 batches | train loss 0.4036118 +| epoch 3 | 4003/ 8400 batches | train loss 0.4339397 +| epoch 3 | 4007/ 8400 batches | train loss 0.4473183 +| epoch 3 | 4011/ 8400 batches | train loss 0.4882367 +| epoch 3 | 4015/ 8400 batches | train loss 0.4769215 +| epoch 3 | 4019/ 8400 batches | train loss 0.5171167 +| epoch 3 | 4023/ 8400 batches | train loss 0.4253546 +| epoch 3 | 4027/ 8400 batches | train loss 0.3587659 +| epoch 3 | 4031/ 8400 batches | train loss 0.5155165 +| epoch 3 | 4035/ 8400 batches | train loss 0.4388926 +| epoch 3 | 4039/ 8400 batches | train loss 0.4693834 +| epoch 3 | 4043/ 8400 batches | train loss 0.3399302 +| epoch 3 | 4047/ 8400 batches | train loss 0.4458982 +| epoch 3 | 4051/ 8400 batches | train loss 0.3922828 +| epoch 3 | 4055/ 8400 batches | train loss 0.4175358 +| epoch 3 | 4059/ 8400 batches | train loss 0.3693205 +| epoch 3 | 4063/ 8400 batches | train loss 0.4492429 +| epoch 3 | 4067/ 8400 batches | train loss 0.4736640 +| epoch 3 | 4071/ 8400 batches | train loss 0.4780795 +| epoch 3 | 4075/ 8400 batches | train loss 0.4659325 +| epoch 3 | 4079/ 8400 batches | train loss 0.3841155 +| epoch 3 | 4083/ 8400 batches | train loss 0.4558225 +| epoch 3 | 4087/ 8400 batches | train loss 0.4711018 +| epoch 3 | 4091/ 8400 batches | train loss 0.4554076 +| epoch 3 | 4095/ 8400 batches | train loss 0.4246551 +| epoch 3 | 4099/ 8400 batches | train loss 0.5252431 +| epoch 3 | 4103/ 8400 batches | train loss 0.3865695 +| epoch 3 | 4107/ 8400 batches | train loss 0.4942692 +| epoch 3 | 4111/ 8400 batches | train loss 0.4532787 +| epoch 3 | 4115/ 8400 batches | train loss 0.4739163 +| epoch 3 | 4119/ 8400 batches | train loss 0.4733874 +| epoch 3 | 4123/ 8400 batches | train loss 0.4100262 +| epoch 3 | 4127/ 8400 batches | train loss 0.3754988 +| epoch 3 | 4131/ 8400 batches | train loss 0.3816468 +| epoch 3 | 4135/ 8400 batches | train loss 0.3809307 +| epoch 3 | 4139/ 8400 batches | train loss 0.3888447 +| epoch 3 | 4143/ 8400 batches | train loss 0.4595124 +| epoch 3 | 4147/ 8400 batches | train loss 0.1553629 +| epoch 3 | 4151/ 8400 batches | train loss 0.4035947 +| epoch 3 | 4155/ 8400 batches | train loss 0.3949604 +| epoch 3 | 4159/ 8400 batches | train loss 0.4718992 +| epoch 3 | 4163/ 8400 batches | train loss 0.4065798 +| epoch 3 | 4167/ 8400 batches | train loss 0.3954883 +| epoch 3 | 4171/ 8400 batches | train loss 0.4743035 +| epoch 3 | 4175/ 8400 batches | train loss 0.4632674 +| epoch 3 | 4179/ 8400 batches | train loss 0.4302234 +| epoch 3 | 4183/ 8400 batches | train loss 0.3362600 +| epoch 3 | 4187/ 8400 batches | train loss 0.2831365 +| epoch 3 | 4191/ 8400 batches | train loss 0.4621974 +| epoch 3 | 4195/ 8400 batches | train loss 0.3813274 +| epoch 3 | 4199/ 8400 batches | train loss 0.4275331 +| epoch 3 | 4203/ 8400 batches | train loss 0.4507988 +| epoch 3 | 4207/ 8400 batches | train loss 0.4363708 +| epoch 3 | 4211/ 8400 batches | train loss 0.4720098 +| epoch 3 | 4215/ 8400 batches | train loss 0.3983980 +| epoch 3 | 4219/ 8400 batches | train loss 0.3774331 +| epoch 3 | 4223/ 8400 batches | train loss 0.4871075 +| epoch 3 | 4227/ 8400 batches | train loss 0.3548056 +| epoch 3 | 4231/ 8400 batches | train loss 0.4848293 +| epoch 3 | 4235/ 8400 batches | train loss 0.3884101 +| epoch 3 | 4239/ 8400 batches | train loss 0.4975212 +| epoch 3 | 4243/ 8400 batches | train loss 0.3496988 +| epoch 3 | 4247/ 8400 batches | train loss 0.4322661 +| epoch 3 | 4251/ 8400 batches | train loss 0.4563704 +| epoch 3 | 4255/ 8400 batches | train loss 0.4499002 +| epoch 3 | 4259/ 8400 batches | train loss 0.5055172 +| epoch 3 | 4263/ 8400 batches | train loss 0.4300251 +| epoch 3 | 4267/ 8400 batches | train loss 0.3974542 +| epoch 3 | 4271/ 8400 batches | train loss 0.5326705 +| epoch 3 | 4275/ 8400 batches | train loss 0.3185826 +| epoch 3 | 4279/ 8400 batches | train loss 0.5024976 +| epoch 3 | 4283/ 8400 batches | train loss 0.4925385 +| epoch 3 | 4287/ 8400 batches | train loss 0.4598651 +| epoch 3 | 4291/ 8400 batches | train loss 0.4854089 +| epoch 3 | 4295/ 8400 batches | train loss 0.3914947 +| epoch 3 | 4299/ 8400 batches | train loss 0.4311382 +| epoch 3 | 4303/ 8400 batches | train loss 0.3877803 +| epoch 3 | 4307/ 8400 batches | train loss 0.5294431 +| epoch 3 | 4311/ 8400 batches | train loss 0.4849593 +| epoch 3 | 4315/ 8400 batches | train loss 0.4491101 +| epoch 3 | 4319/ 8400 batches | train loss 0.5195226 +| epoch 3 | 4323/ 8400 batches | train loss 0.4143901 +| epoch 3 | 4327/ 8400 batches | train loss 0.4602073 +| epoch 3 | 4331/ 8400 batches | train loss 0.4216264 +| epoch 3 | 4335/ 8400 batches | train loss 0.4133300 +| epoch 3 | 4339/ 8400 batches | train loss 0.4700159 +| epoch 3 | 4343/ 8400 batches | train loss 0.4950285 +| epoch 3 | 4347/ 8400 batches | train loss 0.3973533 +| epoch 3 | 4351/ 8400 batches | train loss 0.4520872 +| epoch 3 | 4355/ 8400 batches | train loss 0.3525426 +| epoch 3 | 4359/ 8400 batches | train loss 0.5088789 +| epoch 3 | 4363/ 8400 batches | train loss 0.3844631 +| epoch 3 | 4367/ 8400 batches | train loss 0.3459055 +| epoch 3 | 4371/ 8400 batches | train loss 0.3451472 +| epoch 3 | 4375/ 8400 batches | train loss 0.5037439 +| epoch 3 | 4379/ 8400 batches | train loss 0.5235883 +| epoch 3 | 4383/ 8400 batches | train loss 0.4341855 +| epoch 3 | 4387/ 8400 batches | train loss 0.4521947 +| epoch 3 | 4391/ 8400 batches | train loss 0.4408861 +| epoch 3 | 4395/ 8400 batches | train loss 0.3745812 +| epoch 3 | 4399/ 8400 batches | train loss 0.4306205 +| epoch 3 | 4403/ 8400 batches | train loss 0.4352725 +| epoch 3 | 4407/ 8400 batches | train loss 0.3566893 +| epoch 3 | 4411/ 8400 batches | train loss 0.4745087 +| epoch 3 | 4415/ 8400 batches | train loss 0.4000953 +| epoch 3 | 4419/ 8400 batches | train loss 0.4075746 +| epoch 3 | 4423/ 8400 batches | train loss 0.4565172 +| epoch 3 | 4427/ 8400 batches | train loss 0.4323864 +| epoch 3 | 4431/ 8400 batches | train loss 0.3864323 +| epoch 3 | 4435/ 8400 batches | train loss 0.4587228 +| epoch 3 | 4439/ 8400 batches | train loss 0.4333168 +| epoch 3 | 4443/ 8400 batches | train loss 0.3856335 +| epoch 3 | 4447/ 8400 batches | train loss 0.3831725 +| epoch 3 | 4451/ 8400 batches | train loss 0.4504626 +| epoch 3 | 4455/ 8400 batches | train loss 0.4619350 +| epoch 3 | 4459/ 8400 batches | train loss 0.4946671 +| epoch 3 | 4463/ 8400 batches | train loss 0.3327388 +| epoch 3 | 4467/ 8400 batches | train loss 0.4187896 +| epoch 3 | 4471/ 8400 batches | train loss 0.3613147 +| epoch 3 | 4475/ 8400 batches | train loss 0.3947209 +| epoch 3 | 4479/ 8400 batches | train loss 0.5152670 +| epoch 3 | 4483/ 8400 batches | train loss 0.4997280 +| epoch 3 | 4487/ 8400 batches | train loss 0.3992711 +| epoch 3 | 4491/ 8400 batches | train loss 0.3164592 +| epoch 3 | 4495/ 8400 batches | train loss 0.4504139 +| epoch 3 | 4499/ 8400 batches | train loss 0.4860336 +| epoch 3 | 4503/ 8400 batches | train loss 0.4758912 +| epoch 3 | 4507/ 8400 batches | train loss 0.4542741 +| epoch 3 | 4511/ 8400 batches | train loss 0.4591966 +| epoch 3 | 4515/ 8400 batches | train loss 0.5080327 +| epoch 3 | 4519/ 8400 batches | train loss 0.4207844 +| epoch 3 | 4523/ 8400 batches | train loss 0.4688239 +| epoch 3 | 4527/ 8400 batches | train loss 0.4051251 +| epoch 3 | 4531/ 8400 batches | train loss 0.3678784 +| epoch 3 | 4535/ 8400 batches | train loss 0.5305932 +| epoch 3 | 4539/ 8400 batches | train loss 0.3052303 +| epoch 3 | 4543/ 8400 batches | train loss 0.4317425 +| epoch 3 | 4547/ 8400 batches | train loss 0.5399964 +| epoch 3 | 4551/ 8400 batches | train loss 0.5327598 +| epoch 3 | 4555/ 8400 batches | train loss 0.3687620 +| epoch 3 | 4559/ 8400 batches | train loss 0.4369283 +| epoch 3 | 4563/ 8400 batches | train loss 0.4460245 +| epoch 3 | 4567/ 8400 batches | train loss 0.4081407 +| epoch 3 | 4571/ 8400 batches | train loss 0.4610634 +| epoch 3 | 4575/ 8400 batches | train loss 0.3953694 +| epoch 3 | 4579/ 8400 batches | train loss 0.5245516 +| epoch 3 | 4583/ 8400 batches | train loss 0.4159179 +| epoch 3 | 4587/ 8400 batches | train loss 0.4229122 +| epoch 3 | 4591/ 8400 batches | train loss 0.4393918 +| epoch 3 | 4595/ 8400 batches | train loss 0.6808518 +| epoch 3 | 4599/ 8400 batches | train loss 0.3415989 +| epoch 3 | 4603/ 8400 batches | train loss 0.4782517 +| epoch 3 | 4607/ 8400 batches | train loss 0.5106678 +| epoch 3 | 4611/ 8400 batches | train loss 0.5149634 +| epoch 3 | 4615/ 8400 batches | train loss 0.4352367 +| epoch 3 | 4619/ 8400 batches | train loss 0.5136752 +| epoch 3 | 4623/ 8400 batches | train loss 0.4243073 +| epoch 3 | 4627/ 8400 batches | train loss 0.4499112 +| epoch 3 | 4631/ 8400 batches | train loss 0.4304440 +| epoch 3 | 4635/ 8400 batches | train loss 0.4351152 +| epoch 3 | 4639/ 8400 batches | train loss 0.4002710 +| epoch 3 | 4643/ 8400 batches | train loss 0.3989542 +| epoch 3 | 4647/ 8400 batches | train loss 0.4679184 +| epoch 3 | 4651/ 8400 batches | train loss 0.4594752 +| epoch 3 | 4655/ 8400 batches | train loss 0.3570824 +| epoch 3 | 4659/ 8400 batches | train loss 0.4526455 +| epoch 3 | 4663/ 8400 batches | train loss 0.4519161 +| epoch 3 | 4667/ 8400 batches | train loss 0.4622009 +| epoch 3 | 4671/ 8400 batches | train loss 0.4604788 +| epoch 3 | 4675/ 8400 batches | train loss 0.4742973 +| epoch 3 | 4679/ 8400 batches | train loss 0.3397990 +| epoch 3 | 4683/ 8400 batches | train loss 0.4773080 +| epoch 3 | 4687/ 8400 batches | train loss 0.4740769 +| epoch 3 | 4691/ 8400 batches | train loss 0.4676801 +| epoch 3 | 4695/ 8400 batches | train loss 0.4773059 +| epoch 3 | 4699/ 8400 batches | train loss 0.3714828 +| epoch 3 | 4703/ 8400 batches | train loss 0.4541089 +| epoch 3 | 4707/ 8400 batches | train loss 0.4016029 +| epoch 3 | 4711/ 8400 batches | train loss 0.3393990 +| epoch 3 | 4715/ 8400 batches | train loss 0.3830532 +| epoch 3 | 4719/ 8400 batches | train loss 0.4159781 +| epoch 3 | 4723/ 8400 batches | train loss 0.4434882 +| epoch 3 | 4727/ 8400 batches | train loss 0.3776328 +| epoch 3 | 4731/ 8400 batches | train loss 0.3603039 +| epoch 3 | 4735/ 8400 batches | train loss 0.5186175 +| epoch 3 | 4739/ 8400 batches | train loss 0.4685124 +| epoch 3 | 4743/ 8400 batches | train loss 0.4275787 +| epoch 3 | 4747/ 8400 batches | train loss 0.4485740 +| epoch 3 | 4751/ 8400 batches | train loss 0.3531971 +| epoch 3 | 4755/ 8400 batches | train loss 0.4654415 +| epoch 3 | 4759/ 8400 batches | train loss 0.4841647 +| epoch 3 | 4763/ 8400 batches | train loss 0.4097283 +| epoch 3 | 4767/ 8400 batches | train loss 0.4297427 +| epoch 3 | 4771/ 8400 batches | train loss 0.5093180 +| epoch 3 | 4775/ 8400 batches | train loss 0.5248168 +| epoch 3 | 4779/ 8400 batches | train loss 0.5906608 +| epoch 3 | 4783/ 8400 batches | train loss 0.4483946 +| epoch 3 | 4787/ 8400 batches | train loss 0.3565907 +| epoch 3 | 4791/ 8400 batches | train loss 0.4765308 +| epoch 3 | 4795/ 8400 batches | train loss 0.3645914 +| epoch 3 | 4799/ 8400 batches | train loss 0.4414135 +| epoch 3 | 4803/ 8400 batches | train loss 0.4554636 +| epoch 3 | 4807/ 8400 batches | train loss 0.5148290 +| epoch 3 | 4811/ 8400 batches | train loss 0.4777483 +| epoch 3 | 4815/ 8400 batches | train loss 0.4152004 +| epoch 3 | 4819/ 8400 batches | train loss 0.5489973 +| epoch 3 | 4823/ 8400 batches | train loss 0.4630741 +| epoch 3 | 4827/ 8400 batches | train loss 0.4938445 +| epoch 3 | 4831/ 8400 batches | train loss 0.4186473 +| epoch 3 | 4835/ 8400 batches | train loss 0.4121084 +| epoch 3 | 4839/ 8400 batches | train loss 0.4306587 +| epoch 3 | 4843/ 8400 batches | train loss 0.4465706 +| epoch 3 | 4847/ 8400 batches | train loss 0.4408892 +| epoch 3 | 4851/ 8400 batches | train loss 0.5751983 +| epoch 3 | 4855/ 8400 batches | train loss 0.4694500 +| epoch 3 | 4859/ 8400 batches | train loss 0.4555127 +| epoch 3 | 4863/ 8400 batches | train loss 0.4098268 +| epoch 3 | 4867/ 8400 batches | train loss 0.4299458 +| epoch 3 | 4871/ 8400 batches | train loss 0.4167025 +| epoch 3 | 4875/ 8400 batches | train loss 0.4354016 +| epoch 3 | 4879/ 8400 batches | train loss 0.3980429 +| epoch 3 | 4883/ 8400 batches | train loss 0.4293247 +| epoch 3 | 4887/ 8400 batches | train loss 0.4017226 +| epoch 3 | 4891/ 8400 batches | train loss 0.3842996 +| epoch 3 | 4895/ 8400 batches | train loss 0.4454483 +| epoch 3 | 4899/ 8400 batches | train loss 0.4504895 +| epoch 3 | 4903/ 8400 batches | train loss 0.4133086 +| epoch 3 | 4907/ 8400 batches | train loss 0.4145674 +| epoch 3 | 4911/ 8400 batches | train loss 0.3663338 +| epoch 3 | 4915/ 8400 batches | train loss 0.4097959 +| epoch 3 | 4919/ 8400 batches | train loss 0.4937093 +| epoch 3 | 4923/ 8400 batches | train loss 0.4816546 +| epoch 3 | 4927/ 8400 batches | train loss 0.3956395 +| epoch 3 | 4931/ 8400 batches | train loss 0.3947642 +| epoch 3 | 4935/ 8400 batches | train loss 0.4404728 +| epoch 3 | 4939/ 8400 batches | train loss 0.4118055 +| epoch 3 | 4943/ 8400 batches | train loss 0.5142845 +| epoch 3 | 4947/ 8400 batches | train loss 0.3370161 +| epoch 3 | 4951/ 8400 batches | train loss 0.4783406 +| epoch 3 | 4955/ 8400 batches | train loss 0.3571847 +| epoch 3 | 4959/ 8400 batches | train loss 0.3803309 +| epoch 3 | 4963/ 8400 batches | train loss 0.4540127 +| epoch 3 | 4967/ 8400 batches | train loss 0.3903714 +| epoch 3 | 4971/ 8400 batches | train loss 0.4296027 +| epoch 3 | 4975/ 8400 batches | train loss 0.3641831 +| epoch 3 | 4979/ 8400 batches | train loss 0.4134306 +| epoch 3 | 4983/ 8400 batches | train loss 0.4066682 +| epoch 3 | 4987/ 8400 batches | train loss 0.4991532 +| epoch 3 | 4991/ 8400 batches | train loss 0.4051840 +| epoch 3 | 4995/ 8400 batches | train loss 0.4531197 +| epoch 3 | 4999/ 8400 batches | train loss 0.3673588 +| epoch 3 | 5003/ 8400 batches | train loss 0.4710798 +| epoch 3 | 5007/ 8400 batches | train loss 0.4780102 +| epoch 3 | 5011/ 8400 batches | train loss 0.4662254 +| epoch 3 | 5015/ 8400 batches | train loss 0.5293398 +| epoch 3 | 5019/ 8400 batches | train loss 0.3996031 +| epoch 3 | 5023/ 8400 batches | train loss 0.4654372 +| epoch 3 | 5027/ 8400 batches | train loss 0.3617325 +| epoch 3 | 5031/ 8400 batches | train loss 0.4176438 +| epoch 3 | 5035/ 8400 batches | train loss 0.4362382 +| epoch 3 | 5039/ 8400 batches | train loss 0.4032398 +| epoch 3 | 5043/ 8400 batches | train loss 0.4459668 +| epoch 3 | 5047/ 8400 batches | train loss 0.4666676 +| epoch 3 | 5051/ 8400 batches | train loss 0.4748141 +| epoch 3 | 5055/ 8400 batches | train loss 0.4775225 +| epoch 3 | 5059/ 8400 batches | train loss 0.4664161 +| epoch 3 | 5063/ 8400 batches | train loss 0.4821625 +| epoch 3 | 5067/ 8400 batches | train loss 0.3590054 +| epoch 3 | 5071/ 8400 batches | train loss 0.4647713 +| epoch 3 | 5075/ 8400 batches | train loss 0.4687353 +| epoch 3 | 5079/ 8400 batches | train loss 0.5140166 +| epoch 3 | 5083/ 8400 batches | train loss 0.4534609 +| epoch 3 | 5087/ 8400 batches | train loss 0.3722368 +| epoch 3 | 5091/ 8400 batches | train loss 0.4602840 +| epoch 3 | 5095/ 8400 batches | train loss 0.5008442 +| epoch 3 | 5099/ 8400 batches | train loss 0.4556881 +| epoch 3 | 5103/ 8400 batches | train loss 0.4113770 +| epoch 3 | 5107/ 8400 batches | train loss 0.3652952 +| epoch 3 | 5111/ 8400 batches | train loss 0.4242270 +| epoch 3 | 5115/ 8400 batches | train loss 0.4295415 +| epoch 3 | 5119/ 8400 batches | train loss 0.3612444 +| epoch 3 | 5123/ 8400 batches | train loss 0.5024571 +| epoch 3 | 5127/ 8400 batches | train loss 0.4016355 +| epoch 3 | 5131/ 8400 batches | train loss 0.4323520 +| epoch 3 | 5135/ 8400 batches | train loss 0.4250819 +| epoch 3 | 5139/ 8400 batches | train loss 0.4617497 +| epoch 3 | 5143/ 8400 batches | train loss 0.4691481 +| epoch 3 | 5147/ 8400 batches | train loss 0.3156797 +| epoch 3 | 5151/ 8400 batches | train loss 0.4356196 +| epoch 3 | 5155/ 8400 batches | train loss 0.3329070 +| epoch 3 | 5159/ 8400 batches | train loss 0.4183389 +| epoch 3 | 5163/ 8400 batches | train loss 0.3937614 +| epoch 3 | 5167/ 8400 batches | train loss 0.4053788 +| epoch 3 | 5171/ 8400 batches | train loss 0.4409531 +| epoch 3 | 5175/ 8400 batches | train loss 0.5074399 +| epoch 3 | 5179/ 8400 batches | train loss 0.4186581 +| epoch 3 | 5183/ 8400 batches | train loss 0.4089649 +| epoch 3 | 5187/ 8400 batches | train loss 0.4570020 +| epoch 3 | 5191/ 8400 batches | train loss 0.4171641 +| epoch 3 | 5195/ 8400 batches | train loss 0.3990723 +| epoch 3 | 5199/ 8400 batches | train loss 0.4207669 +| epoch 3 | 5203/ 8400 batches | train loss 0.4516804 +| epoch 3 | 5207/ 8400 batches | train loss 0.4256197 +| epoch 3 | 5211/ 8400 batches | train loss 0.4641318 +| epoch 3 | 5215/ 8400 batches | train loss 0.4464850 +| epoch 3 | 5219/ 8400 batches | train loss 0.4770527 +| epoch 3 | 5223/ 8400 batches | train loss 0.4782286 +| epoch 3 | 5227/ 8400 batches | train loss 0.4234222 +| epoch 3 | 5231/ 8400 batches | train loss 0.5031089 +| epoch 3 | 5235/ 8400 batches | train loss 0.4006312 +| epoch 3 | 5239/ 8400 batches | train loss 0.4062267 +| epoch 3 | 5243/ 8400 batches | train loss 0.4401333 +| epoch 3 | 5247/ 8400 batches | train loss 0.4343511 +| epoch 3 | 5251/ 8400 batches | train loss 0.4476901 +| epoch 3 | 5255/ 8400 batches | train loss 0.4311011 +| epoch 3 | 5259/ 8400 batches | train loss 0.4189512 +| epoch 3 | 5263/ 8400 batches | train loss 0.3605996 +| epoch 3 | 5267/ 8400 batches | train loss 0.4693173 +| epoch 3 | 5271/ 8400 batches | train loss 0.4672505 +| epoch 3 | 5275/ 8400 batches | train loss 0.4099944 +| epoch 3 | 5279/ 8400 batches | train loss 0.3859398 +| epoch 3 | 5283/ 8400 batches | train loss 0.4776340 +| epoch 3 | 5287/ 8400 batches | train loss 0.4285035 +| epoch 3 | 5291/ 8400 batches | train loss 0.4409409 +| epoch 3 | 5295/ 8400 batches | train loss 0.3553640 +| epoch 3 | 5299/ 8400 batches | train loss 0.4218186 +| epoch 3 | 5303/ 8400 batches | train loss 0.4233144 +| epoch 3 | 5307/ 8400 batches | train loss 0.4143127 +| epoch 3 | 5311/ 8400 batches | train loss 0.4037282 +| epoch 3 | 5315/ 8400 batches | train loss 0.5166503 +| epoch 3 | 5319/ 8400 batches | train loss 0.4232835 +| epoch 3 | 5323/ 8400 batches | train loss 0.3962114 +| epoch 3 | 5327/ 8400 batches | train loss 0.3610356 +| epoch 3 | 5331/ 8400 batches | train loss 0.4950358 +| epoch 3 | 5335/ 8400 batches | train loss 0.4136183 +| epoch 3 | 5339/ 8400 batches | train loss 0.3729496 +| epoch 3 | 5343/ 8400 batches | train loss 0.5066221 +| epoch 3 | 5347/ 8400 batches | train loss 0.4264188 +| epoch 3 | 5351/ 8400 batches | train loss 0.4660183 +| epoch 3 | 5355/ 8400 batches | train loss 0.4615914 +| epoch 3 | 5359/ 8400 batches | train loss 0.4545941 +| epoch 3 | 5363/ 8400 batches | train loss 0.5018060 +| epoch 3 | 5367/ 8400 batches | train loss 0.3949551 +| epoch 3 | 5371/ 8400 batches | train loss 0.3932520 +| epoch 3 | 5375/ 8400 batches | train loss 0.3891376 +| epoch 3 | 5379/ 8400 batches | train loss 0.4372912 +| epoch 3 | 5383/ 8400 batches | train loss 0.4827383 +| epoch 3 | 5387/ 8400 batches | train loss 0.4595698 +| epoch 3 | 5391/ 8400 batches | train loss 0.3853126 +| epoch 3 | 5395/ 8400 batches | train loss 0.4529684 +| epoch 3 | 5399/ 8400 batches | train loss 0.4370916 +| epoch 3 | 5403/ 8400 batches | train loss 0.3984270 +| epoch 3 | 5407/ 8400 batches | train loss 0.3750512 +| epoch 3 | 5411/ 8400 batches | train loss 0.4410890 +| epoch 3 | 5415/ 8400 batches | train loss 0.4130443 +| epoch 3 | 5419/ 8400 batches | train loss 0.4579374 +| epoch 3 | 5423/ 8400 batches | train loss 0.4597472 +| epoch 3 | 5427/ 8400 batches | train loss 0.5068272 +| epoch 3 | 5431/ 8400 batches | train loss 0.4338464 +| epoch 3 | 5435/ 8400 batches | train loss 0.4928029 +| epoch 3 | 5439/ 8400 batches | train loss 0.3942690 +| epoch 3 | 5443/ 8400 batches | train loss 0.4620813 +| epoch 3 | 5447/ 8400 batches | train loss 0.4194825 +| epoch 3 | 5451/ 8400 batches | train loss 0.2840390 +| epoch 3 | 5455/ 8400 batches | train loss 0.4190543 +| epoch 3 | 5459/ 8400 batches | train loss 0.5339235 +| epoch 3 | 5463/ 8400 batches | train loss 0.4699958 +| epoch 3 | 5467/ 8400 batches | train loss 0.4369792 +| epoch 3 | 5471/ 8400 batches | train loss 0.4219821 +| epoch 3 | 5475/ 8400 batches | train loss 0.5136022 +| epoch 3 | 5479/ 8400 batches | train loss 0.3990676 +| epoch 3 | 5483/ 8400 batches | train loss 0.4844312 +| epoch 3 | 5487/ 8400 batches | train loss 0.3599533 +| epoch 3 | 5491/ 8400 batches | train loss 0.3750450 +| epoch 3 | 5495/ 8400 batches | train loss 0.4705094 +| epoch 3 | 5499/ 8400 batches | train loss 0.3974990 +| epoch 3 | 5503/ 8400 batches | train loss 0.3659017 +| epoch 3 | 5507/ 8400 batches | train loss 0.2652087 +| epoch 3 | 5511/ 8400 batches | train loss 0.4131779 +| epoch 3 | 5515/ 8400 batches | train loss 0.5141127 +| epoch 3 | 5519/ 8400 batches | train loss 0.4472099 +| epoch 3 | 5523/ 8400 batches | train loss 0.5049028 +| epoch 3 | 5527/ 8400 batches | train loss 0.4184373 +| epoch 3 | 5531/ 8400 batches | train loss 0.4285707 +| epoch 3 | 5535/ 8400 batches | train loss 0.3972914 +| epoch 3 | 5539/ 8400 batches | train loss 0.4401591 +| epoch 3 | 5543/ 8400 batches | train loss 0.4336876 +| epoch 3 | 5547/ 8400 batches | train loss 0.3566805 +| epoch 3 | 5551/ 8400 batches | train loss 0.4183176 +| epoch 3 | 5555/ 8400 batches | train loss 0.3444404 +| epoch 3 | 5559/ 8400 batches | train loss 0.3840351 +| epoch 3 | 5563/ 8400 batches | train loss 0.4190071 +| epoch 3 | 5567/ 8400 batches | train loss 0.4692533 +| epoch 3 | 5571/ 8400 batches | train loss 0.3594779 +| epoch 3 | 5575/ 8400 batches | train loss 0.4550391 +| epoch 3 | 5579/ 8400 batches | train loss 0.4250960 +| epoch 3 | 5583/ 8400 batches | train loss 0.5020220 +| epoch 3 | 5587/ 8400 batches | train loss 0.4648731 +| epoch 3 | 5591/ 8400 batches | train loss 0.3828313 +| epoch 3 | 5595/ 8400 batches | train loss 0.4406587 +| epoch 3 | 5599/ 8400 batches | train loss 0.4329984 +| epoch 3 | 5603/ 8400 batches | train loss 0.5569104 +| epoch 3 | 5607/ 8400 batches | train loss 0.4328119 +| epoch 3 | 5611/ 8400 batches | train loss 0.3925989 +| epoch 3 | 5615/ 8400 batches | train loss 0.4197273 +| epoch 3 | 5619/ 8400 batches | train loss 0.5034270 +| epoch 3 | 5623/ 8400 batches | train loss 0.3288835 +| epoch 3 | 5627/ 8400 batches | train loss 0.4836558 +| epoch 3 | 5631/ 8400 batches | train loss 0.4659472 +| epoch 3 | 5635/ 8400 batches | train loss 0.4657136 +| epoch 3 | 5639/ 8400 batches | train loss 0.4252695 +| epoch 3 | 5643/ 8400 batches | train loss 0.3963444 +| epoch 3 | 5647/ 8400 batches | train loss 0.4661562 +| epoch 3 | 5651/ 8400 batches | train loss 0.4364211 +| epoch 3 | 5655/ 8400 batches | train loss 0.4268318 +| epoch 3 | 5659/ 8400 batches | train loss 0.4972647 +| epoch 3 | 5663/ 8400 batches | train loss 0.4139843 +| epoch 3 | 5667/ 8400 batches | train loss 0.4159294 +| epoch 3 | 5671/ 8400 batches | train loss 0.4052324 +| epoch 3 | 5675/ 8400 batches | train loss 0.2429203 +| epoch 3 | 5679/ 8400 batches | train loss 0.4918456 +| epoch 3 | 5683/ 8400 batches | train loss 0.4129604 +| epoch 3 | 5687/ 8400 batches | train loss 0.4042357 +| epoch 3 | 5691/ 8400 batches | train loss 0.4548768 +| epoch 3 | 5695/ 8400 batches | train loss 0.4327132 +| epoch 3 | 5699/ 8400 batches | train loss 0.3256992 +| epoch 3 | 5703/ 8400 batches | train loss 0.4426100 +| epoch 3 | 5707/ 8400 batches | train loss 0.4257580 +| epoch 3 | 5711/ 8400 batches | train loss 0.4579666 +| epoch 3 | 5715/ 8400 batches | train loss 0.4475928 +| epoch 3 | 5719/ 8400 batches | train loss 0.3656399 +| epoch 3 | 5723/ 8400 batches | train loss 0.5720294 +| epoch 3 | 5727/ 8400 batches | train loss 0.4268656 +| epoch 3 | 5731/ 8400 batches | train loss 0.4449340 +| epoch 3 | 5735/ 8400 batches | train loss 0.4318892 +| epoch 3 | 5739/ 8400 batches | train loss 0.4405954 +| epoch 3 | 5743/ 8400 batches | train loss 0.6045535 +| epoch 3 | 5747/ 8400 batches | train loss 0.4587021 +| epoch 3 | 5751/ 8400 batches | train loss 0.4198572 +| epoch 3 | 5755/ 8400 batches | train loss 0.4064422 +| epoch 3 | 5759/ 8400 batches | train loss 0.5104040 +| epoch 3 | 5763/ 8400 batches | train loss 0.4111992 +| epoch 3 | 5767/ 8400 batches | train loss 0.4080591 +| epoch 3 | 5771/ 8400 batches | train loss 0.4127774 +| epoch 3 | 5775/ 8400 batches | train loss 0.4718962 +| epoch 3 | 5779/ 8400 batches | train loss 0.4771731 +| epoch 3 | 5783/ 8400 batches | train loss 0.4219110 +| epoch 3 | 5787/ 8400 batches | train loss 0.4699021 +| epoch 3 | 5791/ 8400 batches | train loss 0.4607189 +| epoch 3 | 5795/ 8400 batches | train loss 0.4687615 +| epoch 3 | 5799/ 8400 batches | train loss 0.4309178 +| epoch 3 | 5803/ 8400 batches | train loss 0.4529877 +| epoch 3 | 5807/ 8400 batches | train loss 0.3626259 +| epoch 3 | 5811/ 8400 batches | train loss 0.4046740 +| epoch 3 | 5815/ 8400 batches | train loss 0.4880724 +| epoch 3 | 5819/ 8400 batches | train loss 0.3485500 +| epoch 3 | 5823/ 8400 batches | train loss 0.4858816 +| epoch 3 | 5827/ 8400 batches | train loss 0.4304579 +| epoch 3 | 5831/ 8400 batches | train loss 0.4136354 +| epoch 3 | 5835/ 8400 batches | train loss 0.3782254 +| epoch 3 | 5839/ 8400 batches | train loss 0.4686147 +| epoch 3 | 5843/ 8400 batches | train loss 0.4275938 +| epoch 3 | 5847/ 8400 batches | train loss 0.4444302 +| epoch 3 | 5851/ 8400 batches | train loss 0.4295036 +| epoch 3 | 5855/ 8400 batches | train loss 0.3943246 +| epoch 3 | 5859/ 8400 batches | train loss 0.5099962 +| epoch 3 | 5863/ 8400 batches | train loss 0.4406220 +| epoch 3 | 5867/ 8400 batches | train loss 0.4384875 +| epoch 3 | 5871/ 8400 batches | train loss 0.4083177 +| epoch 3 | 5875/ 8400 batches | train loss 0.4378953 +| epoch 3 | 5879/ 8400 batches | train loss 0.4902639 +| epoch 3 | 5883/ 8400 batches | train loss 0.4210080 +| epoch 3 | 5887/ 8400 batches | train loss 0.4219649 +| epoch 3 | 5891/ 8400 batches | train loss 0.4135344 +| epoch 3 | 5895/ 8400 batches | train loss 0.4055356 +| epoch 3 | 5899/ 8400 batches | train loss 0.4898872 +| epoch 3 | 5903/ 8400 batches | train loss 0.5293130 +| epoch 3 | 5907/ 8400 batches | train loss 0.4715566 +| epoch 3 | 5911/ 8400 batches | train loss 0.3440272 +| epoch 3 | 5915/ 8400 batches | train loss 0.3605144 +| epoch 3 | 5919/ 8400 batches | train loss 0.3057417 +| epoch 3 | 5923/ 8400 batches | train loss 0.4469911 +| epoch 3 | 5927/ 8400 batches | train loss 0.4172246 +| epoch 3 | 5931/ 8400 batches | train loss 0.3849238 +| epoch 3 | 5935/ 8400 batches | train loss 0.4460186 +| epoch 3 | 5939/ 8400 batches | train loss 0.4143168 +| epoch 3 | 5943/ 8400 batches | train loss 0.4494727 +| epoch 3 | 5947/ 8400 batches | train loss 0.5054681 +| epoch 3 | 5951/ 8400 batches | train loss 0.3410047 +| epoch 3 | 5955/ 8400 batches | train loss 0.4311704 +| epoch 3 | 5959/ 8400 batches | train loss 0.4027765 +| epoch 3 | 5963/ 8400 batches | train loss 0.4258406 +| epoch 3 | 5967/ 8400 batches | train loss 0.4243905 +| epoch 3 | 5971/ 8400 batches | train loss 0.3891525 +| epoch 3 | 5975/ 8400 batches | train loss 0.4525754 +| epoch 3 | 5979/ 8400 batches | train loss 0.4968563 +| epoch 3 | 5983/ 8400 batches | train loss 0.4479370 +| epoch 3 | 5987/ 8400 batches | train loss 0.4648337 +| epoch 3 | 5991/ 8400 batches | train loss 0.4565050 +| epoch 3 | 5995/ 8400 batches | train loss 0.4106365 +| epoch 3 | 5999/ 8400 batches | train loss 0.4385476 +| epoch 3 | 6003/ 8400 batches | train loss 0.4479324 +| epoch 3 | 6007/ 8400 batches | train loss 0.5036008 +| epoch 3 | 6011/ 8400 batches | train loss 0.4562574 +| epoch 3 | 6015/ 8400 batches | train loss 0.3748294 +| epoch 3 | 6019/ 8400 batches | train loss 0.4096712 +| epoch 3 | 6023/ 8400 batches | train loss 0.4330110 +| epoch 3 | 6027/ 8400 batches | train loss 0.4324446 +| epoch 3 | 6031/ 8400 batches | train loss 0.4570785 +| epoch 3 | 6035/ 8400 batches | train loss 0.4117538 +| epoch 3 | 6039/ 8400 batches | train loss 0.3982836 +| epoch 3 | 6043/ 8400 batches | train loss 0.5239062 +| epoch 3 | 6047/ 8400 batches | train loss 0.3833320 +| epoch 3 | 6051/ 8400 batches | train loss 0.4459039 +| epoch 3 | 6055/ 8400 batches | train loss 0.4018120 +| epoch 3 | 6059/ 8400 batches | train loss 0.4472901 +| epoch 3 | 6063/ 8400 batches | train loss 0.4307657 +| epoch 3 | 6067/ 8400 batches | train loss 0.4237018 +| epoch 3 | 6071/ 8400 batches | train loss 0.4282608 +| epoch 3 | 6075/ 8400 batches | train loss 0.4333246 +| epoch 3 | 6079/ 8400 batches | train loss 0.4316451 +| epoch 3 | 6083/ 8400 batches | train loss 0.5543202 +| epoch 3 | 6087/ 8400 batches | train loss 0.4132926 +| epoch 3 | 6091/ 8400 batches | train loss 0.5258691 +| epoch 3 | 6095/ 8400 batches | train loss 0.4432510 +| epoch 3 | 6099/ 8400 batches | train loss 0.4619863 +| epoch 3 | 6103/ 8400 batches | train loss 0.4284809 +| epoch 3 | 6107/ 8400 batches | train loss 0.2908910 +| epoch 3 | 6111/ 8400 batches | train loss 0.3959228 +| epoch 3 | 6115/ 8400 batches | train loss 0.4487470 +| epoch 3 | 6119/ 8400 batches | train loss 0.4292397 +| epoch 3 | 6123/ 8400 batches | train loss 0.4518757 +| epoch 3 | 6127/ 8400 batches | train loss 0.4641860 +| epoch 3 | 6131/ 8400 batches | train loss 0.4315655 +| epoch 3 | 6135/ 8400 batches | train loss 0.4477816 +| epoch 3 | 6139/ 8400 batches | train loss 0.4104670 +| epoch 3 | 6143/ 8400 batches | train loss 0.4675618 +| epoch 3 | 6147/ 8400 batches | train loss 0.5127437 +| epoch 3 | 6151/ 8400 batches | train loss 0.4139062 +| epoch 3 | 6155/ 8400 batches | train loss 0.4602029 +| epoch 3 | 6159/ 8400 batches | train loss 0.4673343 +| epoch 3 | 6163/ 8400 batches | train loss 0.4269771 +| epoch 3 | 6167/ 8400 batches | train loss 0.3990184 +| epoch 3 | 6171/ 8400 batches | train loss 0.4596578 +| epoch 3 | 6175/ 8400 batches | train loss 0.3066605 +| epoch 3 | 6179/ 8400 batches | train loss 0.4500671 +| epoch 3 | 6183/ 8400 batches | train loss 0.3631921 +| epoch 3 | 6187/ 8400 batches | train loss 0.3263886 +| epoch 3 | 6191/ 8400 batches | train loss 0.4001731 +| epoch 3 | 6195/ 8400 batches | train loss 0.4391366 +| epoch 3 | 6199/ 8400 batches | train loss 0.3420657 +| epoch 3 | 6203/ 8400 batches | train loss 0.5155169 +| epoch 3 | 6207/ 8400 batches | train loss 0.3812865 +| epoch 3 | 6211/ 8400 batches | train loss 0.4504139 +| epoch 3 | 6215/ 8400 batches | train loss 0.4211414 +| epoch 3 | 6219/ 8400 batches | train loss 0.5141164 +| epoch 3 | 6223/ 8400 batches | train loss 0.5080925 +| epoch 3 | 6227/ 8400 batches | train loss 0.4591738 +| epoch 3 | 6231/ 8400 batches | train loss 0.5173648 +| epoch 3 | 6235/ 8400 batches | train loss 0.4473312 +| epoch 3 | 6239/ 8400 batches | train loss 0.4487575 +| epoch 3 | 6243/ 8400 batches | train loss 0.4415043 +| epoch 3 | 6247/ 8400 batches | train loss 0.3423852 +| epoch 3 | 6251/ 8400 batches | train loss 0.4381605 +| epoch 3 | 6255/ 8400 batches | train loss 0.4729115 +| epoch 3 | 6259/ 8400 batches | train loss 0.4190980 +| epoch 3 | 6263/ 8400 batches | train loss 0.3717624 +| epoch 3 | 6267/ 8400 batches | train loss 0.4675045 +| epoch 3 | 6271/ 8400 batches | train loss 0.4270280 +| epoch 3 | 6275/ 8400 batches | train loss 0.4480417 +| epoch 3 | 6279/ 8400 batches | train loss 0.4257110 +| epoch 3 | 6283/ 8400 batches | train loss 0.4122897 +| epoch 3 | 6287/ 8400 batches | train loss 0.4074734 +| epoch 3 | 6291/ 8400 batches | train loss 0.4430276 +| epoch 3 | 6295/ 8400 batches | train loss 0.4816563 +| epoch 3 | 6299/ 8400 batches | train loss 0.4129362 +| epoch 3 | 6303/ 8400 batches | train loss 0.4446423 +| epoch 3 | 6307/ 8400 batches | train loss 0.5213354 +| epoch 3 | 6311/ 8400 batches | train loss 0.2943779 +| epoch 3 | 6315/ 8400 batches | train loss 0.4309244 +| epoch 3 | 6319/ 8400 batches | train loss 0.4614321 +| epoch 3 | 6323/ 8400 batches | train loss 0.3830779 +| epoch 3 | 6327/ 8400 batches | train loss 0.4093908 +| epoch 3 | 6331/ 8400 batches | train loss 0.4897978 +| epoch 3 | 6335/ 8400 batches | train loss 0.4687956 +| epoch 3 | 6339/ 8400 batches | train loss 0.4487105 +| epoch 3 | 6343/ 8400 batches | train loss 0.4606756 +| epoch 3 | 6347/ 8400 batches | train loss 0.4475814 +| epoch 3 | 6351/ 8400 batches | train loss 0.4132512 +| epoch 3 | 6355/ 8400 batches | train loss 0.4711013 +| epoch 3 | 6359/ 8400 batches | train loss 0.3555175 +| epoch 3 | 6363/ 8400 batches | train loss 0.4077731 +| epoch 3 | 6367/ 8400 batches | train loss 0.3549019 +| epoch 3 | 6371/ 8400 batches | train loss 0.4640273 +| epoch 3 | 6375/ 8400 batches | train loss 0.3807636 +| epoch 3 | 6379/ 8400 batches | train loss 0.3233097 +| epoch 3 | 6383/ 8400 batches | train loss 0.4575552 +| epoch 3 | 6387/ 8400 batches | train loss 0.3993714 +| epoch 3 | 6391/ 8400 batches | train loss 0.4593830 +| epoch 3 | 6395/ 8400 batches | train loss 0.2682103 +| epoch 3 | 6399/ 8400 batches | train loss 0.4759565 +| epoch 3 | 6403/ 8400 batches | train loss 0.4392023 +| epoch 3 | 6407/ 8400 batches | train loss 0.4797287 +| epoch 3 | 6411/ 8400 batches | train loss 0.4941391 +| epoch 3 | 6415/ 8400 batches | train loss 0.4497768 +| epoch 3 | 6419/ 8400 batches | train loss 0.4763797 +| epoch 3 | 6423/ 8400 batches | train loss 0.4491511 +| epoch 3 | 6427/ 8400 batches | train loss 0.4645059 +| epoch 3 | 6431/ 8400 batches | train loss 0.5187026 +| epoch 3 | 6435/ 8400 batches | train loss 0.4283422 +| epoch 3 | 6439/ 8400 batches | train loss 0.5321351 +| epoch 3 | 6443/ 8400 batches | train loss 0.4318316 +| epoch 3 | 6447/ 8400 batches | train loss 0.4249646 +| epoch 3 | 6451/ 8400 batches | train loss 0.4704165 +| epoch 3 | 6455/ 8400 batches | train loss 0.5198953 +| epoch 3 | 6459/ 8400 batches | train loss 0.4684190 +| epoch 3 | 6463/ 8400 batches | train loss 0.4294080 +| epoch 3 | 6467/ 8400 batches | train loss 0.4823889 +| epoch 3 | 6471/ 8400 batches | train loss 0.4938041 +| epoch 3 | 6475/ 8400 batches | train loss 0.4099058 +| epoch 3 | 6479/ 8400 batches | train loss 0.3918010 +| epoch 3 | 6483/ 8400 batches | train loss 0.4665279 +| epoch 3 | 6487/ 8400 batches | train loss 0.4222373 +| epoch 3 | 6491/ 8400 batches | train loss 0.4578295 +| epoch 3 | 6495/ 8400 batches | train loss 0.4028731 +| epoch 3 | 6499/ 8400 batches | train loss 0.5121911 +| epoch 3 | 6503/ 8400 batches | train loss 0.1620902 +| epoch 3 | 6507/ 8400 batches | train loss 0.4915446 +| epoch 3 | 6511/ 8400 batches | train loss 0.4614177 +| epoch 3 | 6515/ 8400 batches | train loss 0.5195326 +| epoch 3 | 6519/ 8400 batches | train loss 0.5331775 +| epoch 3 | 6523/ 8400 batches | train loss 0.4227230 +| epoch 3 | 6527/ 8400 batches | train loss 0.4538719 +| epoch 3 | 6531/ 8400 batches | train loss 0.4895675 +| epoch 3 | 6535/ 8400 batches | train loss 0.4092363 +| epoch 3 | 6539/ 8400 batches | train loss 0.3611248 +| epoch 3 | 6543/ 8400 batches | train loss 0.4804867 +| epoch 3 | 6547/ 8400 batches | train loss 0.3748992 +| epoch 3 | 6551/ 8400 batches | train loss 0.4737132 +| epoch 3 | 6555/ 8400 batches | train loss 0.3804742 +| epoch 3 | 6559/ 8400 batches | train loss 0.3562172 +| epoch 3 | 6563/ 8400 batches | train loss 0.3654948 +| epoch 3 | 6567/ 8400 batches | train loss 0.3997095 +| epoch 3 | 6571/ 8400 batches | train loss 0.2459100 +| epoch 3 | 6575/ 8400 batches | train loss 0.2626163 +| epoch 3 | 6579/ 8400 batches | train loss 0.4293004 +| epoch 3 | 6583/ 8400 batches | train loss 0.3606281 +| epoch 3 | 6587/ 8400 batches | train loss 0.4683345 +| epoch 3 | 6591/ 8400 batches | train loss 0.4352907 +| epoch 3 | 6595/ 8400 batches | train loss 0.4047066 +| epoch 3 | 6599/ 8400 batches | train loss 0.3438637 +| epoch 3 | 6603/ 8400 batches | train loss 0.3570020 +| epoch 3 | 6607/ 8400 batches | train loss 0.4432876 +| epoch 3 | 6611/ 8400 batches | train loss 0.4342363 +| epoch 3 | 6615/ 8400 batches | train loss 0.3750467 +| epoch 3 | 6619/ 8400 batches | train loss 0.4855015 +| epoch 3 | 6623/ 8400 batches | train loss 0.4370949 +| epoch 3 | 6627/ 8400 batches | train loss 0.3882018 +| epoch 3 | 6631/ 8400 batches | train loss 0.4476438 +| epoch 3 | 6635/ 8400 batches | train loss 0.4002177 +| epoch 3 | 6639/ 8400 batches | train loss 0.4765047 +| epoch 3 | 6643/ 8400 batches | train loss 0.3770896 +| epoch 3 | 6647/ 8400 batches | train loss 0.3969764 +| epoch 3 | 6651/ 8400 batches | train loss 0.4223138 +| epoch 3 | 6655/ 8400 batches | train loss 0.4531165 +| epoch 3 | 6659/ 8400 batches | train loss 0.3618965 +| epoch 3 | 6663/ 8400 batches | train loss 0.4258112 +| epoch 3 | 6667/ 8400 batches | train loss 0.3709809 +| epoch 3 | 6671/ 8400 batches | train loss 0.3877477 +| epoch 3 | 6675/ 8400 batches | train loss 0.4646021 +| epoch 3 | 6679/ 8400 batches | train loss 0.4241462 +| epoch 3 | 6683/ 8400 batches | train loss 0.4159134 +| epoch 3 | 6687/ 8400 batches | train loss 0.4300078 +| epoch 3 | 6691/ 8400 batches | train loss 0.5626234 +| epoch 3 | 6695/ 8400 batches | train loss 0.3565681 +| epoch 3 | 6699/ 8400 batches | train loss 0.3786060 +| epoch 3 | 6703/ 8400 batches | train loss 0.4076037 +| epoch 3 | 6707/ 8400 batches | train loss 0.4650961 +| epoch 3 | 6711/ 8400 batches | train loss 0.5623261 +| epoch 3 | 6715/ 8400 batches | train loss 0.3264393 +| epoch 3 | 6719/ 8400 batches | train loss 0.5213079 +| epoch 3 | 6723/ 8400 batches | train loss 0.4762020 +| epoch 3 | 6727/ 8400 batches | train loss 0.4272085 +| epoch 3 | 6731/ 8400 batches | train loss 0.4378442 +| epoch 3 | 6735/ 8400 batches | train loss 0.4603831 +| epoch 3 | 6739/ 8400 batches | train loss 0.3606335 +| epoch 3 | 6743/ 8400 batches | train loss 0.4458018 +| epoch 3 | 6747/ 8400 batches | train loss 0.5127102 +| epoch 3 | 6751/ 8400 batches | train loss 0.4081091 +| epoch 3 | 6755/ 8400 batches | train loss 0.5214785 +| epoch 3 | 6759/ 8400 batches | train loss 0.3740554 +| epoch 3 | 6763/ 8400 batches | train loss 0.4096208 +| epoch 3 | 6767/ 8400 batches | train loss 0.4226797 +| epoch 3 | 6771/ 8400 batches | train loss 0.4546384 +| epoch 3 | 6775/ 8400 batches | train loss 0.4218344 +| epoch 3 | 6779/ 8400 batches | train loss 0.3487393 +| epoch 3 | 6783/ 8400 batches | train loss 0.4234390 +| epoch 3 | 6787/ 8400 batches | train loss 0.5331706 +| epoch 3 | 6791/ 8400 batches | train loss 0.3858456 +| epoch 3 | 6795/ 8400 batches | train loss 0.4479974 +| epoch 3 | 6799/ 8400 batches | train loss 0.3356864 +| epoch 3 | 6803/ 8400 batches | train loss 0.4230190 +| epoch 3 | 6807/ 8400 batches | train loss 0.3828920 +| epoch 3 | 6811/ 8400 batches | train loss 0.4555769 +| epoch 3 | 6815/ 8400 batches | train loss 0.4436959 +| epoch 3 | 6819/ 8400 batches | train loss 0.4191581 +| epoch 3 | 6823/ 8400 batches | train loss 0.4980814 +| epoch 3 | 6827/ 8400 batches | train loss 0.3901439 +| epoch 3 | 6831/ 8400 batches | train loss 0.4072435 +| epoch 3 | 6835/ 8400 batches | train loss 0.4535761 +| epoch 3 | 6839/ 8400 batches | train loss 0.4466425 +| epoch 3 | 6843/ 8400 batches | train loss 0.4692194 +| epoch 3 | 6847/ 8400 batches | train loss 0.4058687 +| epoch 3 | 6851/ 8400 batches | train loss 0.4365746 +| epoch 3 | 6855/ 8400 batches | train loss 0.4438800 +| epoch 3 | 6859/ 8400 batches | train loss 0.4934388 +| epoch 3 | 6863/ 8400 batches | train loss 0.3696390 +| epoch 3 | 6867/ 8400 batches | train loss 0.4299587 +| epoch 3 | 6871/ 8400 batches | train loss 0.3664676 +| epoch 3 | 6875/ 8400 batches | train loss 0.4082103 +| epoch 3 | 6879/ 8400 batches | train loss 0.5459920 +| epoch 3 | 6883/ 8400 batches | train loss 0.4506537 +| epoch 3 | 6887/ 8400 batches | train loss 0.3640574 +| epoch 3 | 6891/ 8400 batches | train loss 0.3882392 +| epoch 3 | 6895/ 8400 batches | train loss 0.3769008 +| epoch 3 | 6899/ 8400 batches | train loss 0.5174130 +| epoch 3 | 6903/ 8400 batches | train loss 0.4688942 +| epoch 3 | 6907/ 8400 batches | train loss 0.4952645 +| epoch 3 | 6911/ 8400 batches | train loss 0.3920260 +| epoch 3 | 6915/ 8400 batches | train loss 0.3287557 +| epoch 3 | 6919/ 8400 batches | train loss 0.4351383 +| epoch 3 | 6923/ 8400 batches | train loss 0.3962503 +| epoch 3 | 6927/ 8400 batches | train loss 0.4033481 +| epoch 3 | 6931/ 8400 batches | train loss 0.3603354 +| epoch 3 | 6935/ 8400 batches | train loss 0.4296096 +| epoch 3 | 6939/ 8400 batches | train loss 0.4167953 +| epoch 3 | 6943/ 8400 batches | train loss 0.3867882 +| epoch 3 | 6947/ 8400 batches | train loss 0.5616249 +| epoch 3 | 6951/ 8400 batches | train loss 0.4202159 +| epoch 3 | 6955/ 8400 batches | train loss 0.3311212 +| epoch 3 | 6959/ 8400 batches | train loss 0.3894750 +| epoch 3 | 6963/ 8400 batches | train loss 0.4086351 +| epoch 3 | 6967/ 8400 batches | train loss 0.3686200 +| epoch 3 | 6971/ 8400 batches | train loss 0.3665307 +| epoch 3 | 6975/ 8400 batches | train loss 0.4537463 +| epoch 3 | 6979/ 8400 batches | train loss 0.4286595 +| epoch 3 | 6983/ 8400 batches | train loss 0.4857438 +| epoch 3 | 6987/ 8400 batches | train loss 0.3608267 +| epoch 3 | 6991/ 8400 batches | train loss 0.4173560 +| epoch 3 | 6995/ 8400 batches | train loss 0.4940770 +| epoch 3 | 6999/ 8400 batches | train loss 0.4264865 +| epoch 3 | 7003/ 8400 batches | train loss 0.4718529 +| epoch 3 | 7007/ 8400 batches | train loss 0.4807313 +| epoch 3 | 7011/ 8400 batches | train loss 0.5184271 +| epoch 3 | 7015/ 8400 batches | train loss 0.4342225 +| epoch 3 | 7019/ 8400 batches | train loss 0.4175906 +| epoch 3 | 7023/ 8400 batches | train loss 0.4456964 +| epoch 3 | 7027/ 8400 batches | train loss 0.4350654 +| epoch 3 | 7031/ 8400 batches | train loss 0.3699411 +| epoch 3 | 7035/ 8400 batches | train loss 0.4443712 +| epoch 3 | 7039/ 8400 batches | train loss 0.3703433 +| epoch 3 | 7043/ 8400 batches | train loss 0.3879523 +| epoch 3 | 7047/ 8400 batches | train loss 0.3732703 +| epoch 3 | 7051/ 8400 batches | train loss 0.3714593 +| epoch 3 | 7055/ 8400 batches | train loss 0.3697941 +| epoch 3 | 7059/ 8400 batches | train loss 0.4443329 +| epoch 3 | 7063/ 8400 batches | train loss 0.4393403 +| epoch 3 | 7067/ 8400 batches | train loss 0.4072748 +| epoch 3 | 7071/ 8400 batches | train loss 0.3759170 +| epoch 3 | 7075/ 8400 batches | train loss 0.4137598 +| epoch 3 | 7079/ 8400 batches | train loss 0.4182557 +| epoch 3 | 7083/ 8400 batches | train loss 0.4089601 +| epoch 3 | 7087/ 8400 batches | train loss 0.4476534 +| epoch 3 | 7091/ 8400 batches | train loss 0.4168817 +| epoch 3 | 7095/ 8400 batches | train loss 0.4217684 +| epoch 3 | 7099/ 8400 batches | train loss 0.4751054 +| epoch 3 | 7103/ 8400 batches | train loss 0.4863848 +| epoch 3 | 7107/ 8400 batches | train loss 0.4150705 +| epoch 3 | 7111/ 8400 batches | train loss 0.4516417 +| epoch 3 | 7115/ 8400 batches | train loss 0.4665390 +| epoch 3 | 7119/ 8400 batches | train loss 0.4635664 +| epoch 3 | 7123/ 8400 batches | train loss 0.3572167 +| epoch 3 | 7127/ 8400 batches | train loss 0.3966381 +| epoch 3 | 7131/ 8400 batches | train loss 0.4602525 +| epoch 3 | 7135/ 8400 batches | train loss 0.4620552 +| epoch 3 | 7139/ 8400 batches | train loss 0.4934112 +| epoch 3 | 7143/ 8400 batches | train loss 0.4360074 +| epoch 3 | 7147/ 8400 batches | train loss 0.3873237 +| epoch 3 | 7151/ 8400 batches | train loss 0.4664789 +| epoch 3 | 7155/ 8400 batches | train loss 0.3958118 +| epoch 3 | 7159/ 8400 batches | train loss 0.4297009 +| epoch 3 | 7163/ 8400 batches | train loss 0.4395973 +| epoch 3 | 7167/ 8400 batches | train loss 0.4246394 +| epoch 3 | 7171/ 8400 batches | train loss 0.3816961 +| epoch 3 | 7175/ 8400 batches | train loss 0.4441813 +| epoch 3 | 7179/ 8400 batches | train loss 0.4346949 +| epoch 3 | 7183/ 8400 batches | train loss 0.3990414 +| epoch 3 | 7187/ 8400 batches | train loss 0.4484903 +| epoch 3 | 7191/ 8400 batches | train loss 0.3444474 +| epoch 3 | 7195/ 8400 batches | train loss 0.4468470 +| epoch 3 | 7199/ 8400 batches | train loss 0.4450274 +| epoch 3 | 7203/ 8400 batches | train loss 0.3638786 +| epoch 3 | 7207/ 8400 batches | train loss 0.3865011 +| epoch 3 | 7211/ 8400 batches | train loss 0.4284325 +| epoch 3 | 7215/ 8400 batches | train loss 0.4284943 +| epoch 3 | 7219/ 8400 batches | train loss 0.4775512 +| epoch 3 | 7223/ 8400 batches | train loss 0.3683963 +| epoch 3 | 7227/ 8400 batches | train loss 0.4262291 +| epoch 3 | 7231/ 8400 batches | train loss 0.3738466 +| epoch 3 | 7235/ 8400 batches | train loss 0.4628164 +| epoch 3 | 7239/ 8400 batches | train loss 0.3706955 +| epoch 3 | 7243/ 8400 batches | train loss 0.4237100 +| epoch 3 | 7247/ 8400 batches | train loss 0.4378777 +| epoch 3 | 7251/ 8400 batches | train loss 0.4338270 +| epoch 3 | 7255/ 8400 batches | train loss 0.4960875 +| epoch 3 | 7259/ 8400 batches | train loss 0.4585676 +| epoch 3 | 7263/ 8400 batches | train loss 0.4148426 +| epoch 3 | 7267/ 8400 batches | train loss 0.4047230 +| epoch 3 | 7271/ 8400 batches | train loss 0.4423625 +| epoch 3 | 7275/ 8400 batches | train loss 0.5611994 +| epoch 3 | 7279/ 8400 batches | train loss 0.3768969 +| epoch 3 | 7283/ 8400 batches | train loss 0.5568570 +| epoch 3 | 7287/ 8400 batches | train loss 0.4037203 +| epoch 3 | 7291/ 8400 batches | train loss 0.3886988 +| epoch 3 | 7295/ 8400 batches | train loss 0.4627195 +| epoch 3 | 7299/ 8400 batches | train loss 0.4676411 +| epoch 3 | 7303/ 8400 batches | train loss 0.4319911 +| epoch 3 | 7307/ 8400 batches | train loss 0.4650024 +| epoch 3 | 7311/ 8400 batches | train loss 0.4147055 +| epoch 3 | 7315/ 8400 batches | train loss 0.4744978 +| epoch 3 | 7319/ 8400 batches | train loss 0.4220588 +| epoch 3 | 7323/ 8400 batches | train loss 0.4702389 +| epoch 3 | 7327/ 8400 batches | train loss 0.4461277 +| epoch 3 | 7331/ 8400 batches | train loss 0.4312738 +| epoch 3 | 7335/ 8400 batches | train loss 0.4262456 +| epoch 3 | 7339/ 8400 batches | train loss 0.4528388 +| epoch 3 | 7343/ 8400 batches | train loss 0.4457605 +| epoch 3 | 7347/ 8400 batches | train loss 0.3994632 +| epoch 3 | 7351/ 8400 batches | train loss 0.4362895 +| epoch 3 | 7355/ 8400 batches | train loss 0.4105110 +| epoch 3 | 7359/ 8400 batches | train loss 0.3932723 +| epoch 3 | 7363/ 8400 batches | train loss 0.4598592 +| epoch 3 | 7367/ 8400 batches | train loss 0.4446262 +| epoch 3 | 7371/ 8400 batches | train loss 0.4950220 +| epoch 3 | 7375/ 8400 batches | train loss 0.4041478 +| epoch 3 | 7379/ 8400 batches | train loss 0.3633448 +| epoch 3 | 7383/ 8400 batches | train loss 0.4727444 +| epoch 3 | 7387/ 8400 batches | train loss 0.4223205 +| epoch 3 | 7391/ 8400 batches | train loss 0.4085889 +| epoch 3 | 7395/ 8400 batches | train loss 0.4338853 +| epoch 3 | 7399/ 8400 batches | train loss 0.3490850 +| epoch 3 | 7403/ 8400 batches | train loss 0.4383394 +| epoch 3 | 7407/ 8400 batches | train loss 0.4403861 +| epoch 3 | 7411/ 8400 batches | train loss 0.5456232 +| epoch 3 | 7415/ 8400 batches | train loss 0.4579963 +| epoch 3 | 7419/ 8400 batches | train loss 0.3703727 +| epoch 3 | 7423/ 8400 batches | train loss 0.4475587 +| epoch 3 | 7427/ 8400 batches | train loss 0.4241229 +| epoch 3 | 7431/ 8400 batches | train loss 0.4821792 +| epoch 3 | 7435/ 8400 batches | train loss 0.4101321 +| epoch 3 | 7439/ 8400 batches | train loss 0.4117743 +| epoch 3 | 7443/ 8400 batches | train loss 0.4242840 +| epoch 3 | 7447/ 8400 batches | train loss 0.4485003 +| epoch 3 | 7451/ 8400 batches | train loss 0.4237819 +| epoch 3 | 7455/ 8400 batches | train loss 0.4369905 +| epoch 3 | 7459/ 8400 batches | train loss 0.4600950 +| epoch 3 | 7463/ 8400 batches | train loss 0.4918848 +| epoch 3 | 7467/ 8400 batches | train loss 0.4238704 +| epoch 3 | 7471/ 8400 batches | train loss 0.4436992 +| epoch 3 | 7475/ 8400 batches | train loss 0.4090721 +| epoch 3 | 7479/ 8400 batches | train loss 0.4224177 +| epoch 3 | 7483/ 8400 batches | train loss 0.4015450 +| epoch 3 | 7487/ 8400 batches | train loss 0.5417443 +| epoch 3 | 7491/ 8400 batches | train loss 0.4570982 +| epoch 3 | 7495/ 8400 batches | train loss 0.3901959 +| epoch 3 | 7499/ 8400 batches | train loss 0.3621137 +| epoch 3 | 7503/ 8400 batches | train loss 0.4044093 +| epoch 3 | 7507/ 8400 batches | train loss 0.4474281 +| epoch 3 | 7511/ 8400 batches | train loss 0.5293497 +| epoch 3 | 7515/ 8400 batches | train loss 0.4201908 +| epoch 3 | 7519/ 8400 batches | train loss 0.4165291 +| epoch 3 | 7523/ 8400 batches | train loss 0.4027027 +| epoch 3 | 7527/ 8400 batches | train loss 0.4214918 +| epoch 3 | 7531/ 8400 batches | train loss 0.4156011 +| epoch 3 | 7535/ 8400 batches | train loss 0.4159203 +| epoch 3 | 7539/ 8400 batches | train loss 0.4062000 +| epoch 3 | 7543/ 8400 batches | train loss 0.5005726 +| epoch 3 | 7547/ 8400 batches | train loss 0.4203051 +| epoch 3 | 7551/ 8400 batches | train loss 0.1608863 +| epoch 3 | 7555/ 8400 batches | train loss 0.4274541 +| epoch 3 | 7559/ 8400 batches | train loss 0.4023918 +| epoch 3 | 7563/ 8400 batches | train loss 0.4779592 +| epoch 3 | 7567/ 8400 batches | train loss 0.4145129 +| epoch 3 | 7571/ 8400 batches | train loss 0.5094066 +| epoch 3 | 7575/ 8400 batches | train loss 0.3418823 +| epoch 3 | 7579/ 8400 batches | train loss 0.3912459 +| epoch 3 | 7583/ 8400 batches | train loss 0.4344123 +| epoch 3 | 7587/ 8400 batches | train loss 0.4306522 +| epoch 3 | 7591/ 8400 batches | train loss 0.4508746 +| epoch 3 | 7595/ 8400 batches | train loss 0.4159718 +| epoch 3 | 7599/ 8400 batches | train loss 0.3531137 +| epoch 3 | 7603/ 8400 batches | train loss 0.4767950 +| epoch 3 | 7607/ 8400 batches | train loss 0.3379324 +| epoch 3 | 7611/ 8400 batches | train loss 0.4822529 +| epoch 3 | 7615/ 8400 batches | train loss 0.3751157 +| epoch 3 | 7619/ 8400 batches | train loss 0.4988377 +| epoch 3 | 7623/ 8400 batches | train loss 0.4256356 +| epoch 3 | 7627/ 8400 batches | train loss 0.4892746 +| epoch 3 | 7631/ 8400 batches | train loss 0.4588138 +| epoch 3 | 7635/ 8400 batches | train loss 0.3459995 +| epoch 3 | 7639/ 8400 batches | train loss 0.4981956 +| epoch 3 | 7643/ 8400 batches | train loss 0.3834053 +| epoch 3 | 7647/ 8400 batches | train loss 0.4111272 +| epoch 3 | 7651/ 8400 batches | train loss 0.4474996 +| epoch 3 | 7655/ 8400 batches | train loss 0.3424296 +| epoch 3 | 7659/ 8400 batches | train loss 0.3994551 +| epoch 3 | 7663/ 8400 batches | train loss 0.4600495 +| epoch 3 | 7667/ 8400 batches | train loss 0.3419810 +| epoch 3 | 7671/ 8400 batches | train loss 0.4823226 +| epoch 3 | 7675/ 8400 batches | train loss 0.4625206 +| epoch 3 | 7679/ 8400 batches | train loss 0.4884385 +| epoch 3 | 7683/ 8400 batches | train loss 0.3090059 +| epoch 3 | 7687/ 8400 batches | train loss 0.4281349 +| epoch 3 | 7691/ 8400 batches | train loss 0.3438503 +| epoch 3 | 7695/ 8400 batches | train loss 0.3287656 +| epoch 3 | 7699/ 8400 batches | train loss 0.3582968 +| epoch 3 | 7703/ 8400 batches | train loss 0.4702791 +| epoch 3 | 7707/ 8400 batches | train loss 0.4332052 +| epoch 3 | 7711/ 8400 batches | train loss 0.4138442 +| epoch 3 | 7715/ 8400 batches | train loss 0.4288753 +| epoch 3 | 7719/ 8400 batches | train loss 0.3846974 +| epoch 3 | 7723/ 8400 batches | train loss 0.4391729 +| epoch 3 | 7727/ 8400 batches | train loss 0.3972563 +| epoch 3 | 7731/ 8400 batches | train loss 0.3975783 +| epoch 3 | 7735/ 8400 batches | train loss 0.4179385 +| epoch 3 | 7739/ 8400 batches | train loss 0.4744577 +| epoch 3 | 7743/ 8400 batches | train loss 0.3416997 +| epoch 3 | 7747/ 8400 batches | train loss 0.4997120 +| epoch 3 | 7751/ 8400 batches | train loss 0.4055551 +| epoch 3 | 7755/ 8400 batches | train loss 0.4784013 +| epoch 3 | 7759/ 8400 batches | train loss 0.3801760 +| epoch 3 | 7763/ 8400 batches | train loss 0.4644202 +| epoch 3 | 7767/ 8400 batches | train loss 0.4040116 +| epoch 3 | 7771/ 8400 batches | train loss 0.5777091 +| epoch 3 | 7775/ 8400 batches | train loss 0.4296241 +| epoch 3 | 7779/ 8400 batches | train loss 0.4185946 +| epoch 3 | 7783/ 8400 batches | train loss 0.3955782 +| epoch 3 | 7787/ 8400 batches | train loss 0.3853444 +| epoch 3 | 7791/ 8400 batches | train loss 0.4635808 +| epoch 3 | 7795/ 8400 batches | train loss 0.3931357 +| epoch 3 | 7799/ 8400 batches | train loss 0.4501830 +| epoch 3 | 7803/ 8400 batches | train loss 0.3904261 +| epoch 3 | 7807/ 8400 batches | train loss 0.4478346 +| epoch 3 | 7811/ 8400 batches | train loss 0.4219733 +| epoch 3 | 7815/ 8400 batches | train loss 0.4560606 +| epoch 3 | 7819/ 8400 batches | train loss 0.4553627 +| epoch 3 | 7823/ 8400 batches | train loss 0.3806101 +| epoch 3 | 7827/ 8400 batches | train loss 0.3291288 +| epoch 3 | 7831/ 8400 batches | train loss 0.4520940 +| epoch 3 | 7835/ 8400 batches | train loss 0.3699153 +| epoch 3 | 7839/ 8400 batches | train loss 0.3656266 +| epoch 3 | 7843/ 8400 batches | train loss 0.4716498 +| epoch 3 | 7847/ 8400 batches | train loss 0.4752025 +| epoch 3 | 7851/ 8400 batches | train loss 0.3882146 +| epoch 3 | 7855/ 8400 batches | train loss 0.4917844 +| epoch 3 | 7859/ 8400 batches | train loss 0.3999011 +| epoch 3 | 7863/ 8400 batches | train loss 0.4544956 +| epoch 3 | 7867/ 8400 batches | train loss 0.4148045 +| epoch 3 | 7871/ 8400 batches | train loss 0.4925250 +| epoch 3 | 7875/ 8400 batches | train loss 0.4873461 +| epoch 3 | 7879/ 8400 batches | train loss 0.4062786 +| epoch 3 | 7883/ 8400 batches | train loss 0.4468119 +| epoch 3 | 7887/ 8400 batches | train loss 0.4164544 +| epoch 3 | 7891/ 8400 batches | train loss 0.4185767 +| epoch 3 | 7895/ 8400 batches | train loss 0.4242486 +| epoch 3 | 7899/ 8400 batches | train loss 0.3802485 +| epoch 3 | 7903/ 8400 batches | train loss 0.5024635 +| epoch 3 | 7907/ 8400 batches | train loss 0.5333588 +| epoch 3 | 7911/ 8400 batches | train loss 0.4342026 +| epoch 3 | 7915/ 8400 batches | train loss 0.5504004 +| epoch 3 | 7919/ 8400 batches | train loss 0.5084795 +| epoch 3 | 7923/ 8400 batches | train loss 0.4442315 +| epoch 3 | 7927/ 8400 batches | train loss 0.4365079 +| epoch 3 | 7931/ 8400 batches | train loss 0.3844701 +| epoch 3 | 7935/ 8400 batches | train loss 0.3761178 +| epoch 3 | 7939/ 8400 batches | train loss 0.4303666 +| epoch 3 | 7943/ 8400 batches | train loss 0.3750190 +| epoch 3 | 7947/ 8400 batches | train loss 0.3809266 +| epoch 3 | 7951/ 8400 batches | train loss 0.4197474 +| epoch 3 | 7955/ 8400 batches | train loss 0.4528985 +| epoch 3 | 7959/ 8400 batches | train loss 0.4447426 +| epoch 3 | 7963/ 8400 batches | train loss 0.3543741 +| epoch 3 | 7967/ 8400 batches | train loss 0.3657716 +| epoch 3 | 7971/ 8400 batches | train loss 0.4220203 +| epoch 3 | 7975/ 8400 batches | train loss 0.4357258 +| epoch 3 | 7979/ 8400 batches | train loss 0.3357923 +| epoch 3 | 7983/ 8400 batches | train loss 0.5150501 +| epoch 3 | 7987/ 8400 batches | train loss 0.4303671 +| epoch 3 | 7991/ 8400 batches | train loss 0.4188787 +| epoch 3 | 7995/ 8400 batches | train loss 0.3799668 +| epoch 3 | 7999/ 8400 batches | train loss 0.4843409 +| epoch 3 | 8003/ 8400 batches | train loss 0.4223222 +| epoch 3 | 8007/ 8400 batches | train loss 0.4450576 +| epoch 3 | 8011/ 8400 batches | train loss 0.4351073 +| epoch 3 | 8015/ 8400 batches | train loss 0.3796844 +| epoch 3 | 8019/ 8400 batches | train loss 0.4697955 +| epoch 3 | 8023/ 8400 batches | train loss 0.3985496 +| epoch 3 | 8027/ 8400 batches | train loss 0.5318129 +| epoch 3 | 8031/ 8400 batches | train loss 0.3717822 +| epoch 3 | 8035/ 8400 batches | train loss 0.4747249 +| epoch 3 | 8039/ 8400 batches | train loss 0.4038470 +| epoch 3 | 8043/ 8400 batches | train loss 0.4454061 +| epoch 3 | 8047/ 8400 batches | train loss 0.4420899 +| epoch 3 | 8051/ 8400 batches | train loss 0.4395837 +| epoch 3 | 8055/ 8400 batches | train loss 0.4595437 +| epoch 3 | 8059/ 8400 batches | train loss 0.4538887 +| epoch 3 | 8063/ 8400 batches | train loss 0.4411908 +| epoch 3 | 8067/ 8400 batches | train loss 0.3822247 +| epoch 3 | 8071/ 8400 batches | train loss 0.5327168 +| epoch 3 | 8075/ 8400 batches | train loss 0.3974846 +| epoch 3 | 8079/ 8400 batches | train loss 0.4057727 +| epoch 3 | 8083/ 8400 batches | train loss 0.4823111 +| epoch 3 | 8087/ 8400 batches | train loss 0.4803174 +| epoch 3 | 8091/ 8400 batches | train loss 0.4321107 +| epoch 3 | 8095/ 8400 batches | train loss 0.4303929 +| epoch 3 | 8099/ 8400 batches | train loss 0.3915436 +| epoch 3 | 8103/ 8400 batches | train loss 0.4363292 +| epoch 3 | 8107/ 8400 batches | train loss 0.4045705 +| epoch 3 | 8111/ 8400 batches | train loss 0.3649825 +| epoch 3 | 8115/ 8400 batches | train loss 0.4577353 +| epoch 3 | 8119/ 8400 batches | train loss 0.4281182 +| epoch 3 | 8123/ 8400 batches | train loss 0.4105183 +| epoch 3 | 8127/ 8400 batches | train loss 0.5332845 +| epoch 3 | 8131/ 8400 batches | train loss 0.3846065 +| epoch 3 | 8135/ 8400 batches | train loss 0.4312183 +| epoch 3 | 8139/ 8400 batches | train loss 0.3979971 +| epoch 3 | 8143/ 8400 batches | train loss 0.4739240 +| epoch 3 | 8147/ 8400 batches | train loss 0.4454577 +| epoch 3 | 8151/ 8400 batches | train loss 0.4963498 +| epoch 3 | 8155/ 8400 batches | train loss 0.4098369 +| epoch 3 | 8159/ 8400 batches | train loss 0.4673172 +| epoch 3 | 8163/ 8400 batches | train loss 0.4731590 +| epoch 3 | 8167/ 8400 batches | train loss 0.4305042 +| epoch 3 | 8171/ 8400 batches | train loss 0.4710165 +| epoch 3 | 8175/ 8400 batches | train loss 0.4100938 +| epoch 3 | 8179/ 8400 batches | train loss 0.4189202 +| epoch 3 | 8183/ 8400 batches | train loss 0.4934852 +| epoch 3 | 8187/ 8400 batches | train loss 0.4391265 +| epoch 3 | 8191/ 8400 batches | train loss 0.4556954 +| epoch 3 | 8195/ 8400 batches | train loss 0.4024417 +| epoch 3 | 8199/ 8400 batches | train loss 0.4783481 +| epoch 3 | 8203/ 8400 batches | train loss 0.4331849 +| epoch 3 | 8207/ 8400 batches | train loss 0.5025400 +| epoch 3 | 8211/ 8400 batches | train loss 0.4351939 +| epoch 3 | 8215/ 8400 batches | train loss 0.4367085 +| epoch 3 | 8219/ 8400 batches | train loss 0.4897487 +| epoch 3 | 8223/ 8400 batches | train loss 0.3978041 +| epoch 3 | 8227/ 8400 batches | train loss 0.4487588 +| epoch 3 | 8231/ 8400 batches | train loss 0.3367834 +| epoch 3 | 8235/ 8400 batches | train loss 0.4205028 +| epoch 3 | 8239/ 8400 batches | train loss 0.4166207 +| epoch 3 | 8243/ 8400 batches | train loss 0.2703550 +| epoch 3 | 8247/ 8400 batches | train loss 0.3562218 +| epoch 3 | 8251/ 8400 batches | train loss 0.4234741 +| epoch 3 | 8255/ 8400 batches | train loss 0.5257254 +| epoch 3 | 8259/ 8400 batches | train loss 0.3830456 +| epoch 3 | 8263/ 8400 batches | train loss 0.4435726 +| epoch 3 | 8267/ 8400 batches | train loss 0.4047659 +| epoch 3 | 8271/ 8400 batches | train loss 0.5284123 +| epoch 3 | 8275/ 8400 batches | train loss 0.4367923 +| epoch 3 | 8279/ 8400 batches | train loss 0.4324991 +| epoch 3 | 8283/ 8400 batches | train loss 0.4132283 +| epoch 3 | 8287/ 8400 batches | train loss 0.3441537 +| epoch 3 | 8291/ 8400 batches | train loss 0.4024369 +| epoch 3 | 8295/ 8400 batches | train loss 0.4832375 +| epoch 3 | 8299/ 8400 batches | train loss 0.4467590 +| epoch 3 | 8303/ 8400 batches | train loss 0.3443162 +| epoch 3 | 8307/ 8400 batches | train loss 0.4491732 +| epoch 3 | 8311/ 8400 batches | train loss 0.3014880 +| epoch 3 | 8315/ 8400 batches | train loss 0.4695542 +| epoch 3 | 8319/ 8400 batches | train loss 0.4628708 +| epoch 3 | 8323/ 8400 batches | train loss 0.4847762 +| epoch 3 | 8327/ 8400 batches | train loss 0.3719006 +| epoch 3 | 8331/ 8400 batches | train loss 0.3497998 +| epoch 3 | 8335/ 8400 batches | train loss 0.4035925 +| epoch 3 | 8339/ 8400 batches | train loss 0.3610284 +| epoch 3 | 8343/ 8400 batches | train loss 0.4587111 +| epoch 3 | 8347/ 8400 batches | train loss 0.4497126 +| epoch 3 | 8351/ 8400 batches | train loss 0.4817817 +| epoch 3 | 8355/ 8400 batches | train loss 0.3398620 +| epoch 3 | 8359/ 8400 batches | train loss 0.5060595 +| epoch 3 | 8363/ 8400 batches | train loss 0.4720393 +| epoch 3 | 8367/ 8400 batches | train loss 0.3590117 +| epoch 3 | 8371/ 8400 batches | train loss 0.4690205 +| epoch 3 | 8375/ 8400 batches | train loss 0.5157302 +| epoch 3 | 8379/ 8400 batches | train loss 0.4579799 +| epoch 3 | 8383/ 8400 batches | train loss 0.3576038 +| epoch 3 | 8387/ 8400 batches | train loss 0.4308051 +| epoch 3 | 8391/ 8400 batches | train loss 0.4118090 +| epoch 3 | 8395/ 8400 batches | train loss 0.4803963 +| epoch 3 | 8399/ 8400 batches | train loss 0.3743087 +-------------------------------------------------------------------------------- +| epoch 3 | 3/ 8400 batches | test loss 0.3850584 +| epoch 3 | 7/ 8400 batches | test loss 0.4437746 +| epoch 3 | 11/ 8400 batches | test loss 0.3711387 +| epoch 3 | 15/ 8400 batches | test loss 0.4931355 +| epoch 3 | 19/ 8400 batches | test loss 0.5181259 +| epoch 3 | 23/ 8400 batches | test loss 0.4312963 +| epoch 3 | 27/ 8400 batches | test loss 0.4043555 +| epoch 3 | 31/ 8400 batches | test loss 0.2777260 +| epoch 3 | 35/ 8400 batches | test loss 0.5177177 +| epoch 3 | 39/ 8400 batches | test loss 0.4603204 +| epoch 3 | 43/ 8400 batches | test loss 0.5432979 +| epoch 3 | 47/ 8400 batches | test loss 0.4728153 +| epoch 3 | 51/ 8400 batches | test loss 0.4663382 +| epoch 3 | 55/ 8400 batches | test loss 0.5011431 +| epoch 3 | 59/ 8400 batches | test loss 0.4266507 +| epoch 3 | 63/ 8400 batches | test loss 0.3957227 +| epoch 3 | 67/ 8400 batches | test loss 0.4450160 +| epoch 3 | 71/ 8400 batches | test loss 0.4636395 +| epoch 3 | 75/ 8400 batches | test loss 0.3804512 +| epoch 3 | 79/ 8400 batches | test loss 0.4678305 +| epoch 3 | 83/ 8400 batches | test loss 0.4738797 +| epoch 3 | 87/ 8400 batches | test loss 0.3939104 +| epoch 3 | 91/ 8400 batches | test loss 0.5015042 +| epoch 3 | 95/ 8400 batches | test loss 0.7461697 +| epoch 3 | 99/ 8400 batches | test loss 0.7447691 +| epoch 3 | 103/ 8400 batches | test loss 0.4286060 +| epoch 3 | 107/ 8400 batches | test loss 0.4212580 +| epoch 3 | 111/ 8400 batches | test loss 0.5342635 +| epoch 3 | 115/ 8400 batches | test loss 0.4710214 +| epoch 3 | 119/ 8400 batches | test loss 0.4481270 +| epoch 3 | 123/ 8400 batches | test loss 0.4076562 +| epoch 3 | 127/ 8400 batches | test loss 0.4265643 +| epoch 3 | 131/ 8400 batches | test loss 0.4012260 +| epoch 3 | 135/ 8400 batches | test loss 0.3504434 +| epoch 3 | 139/ 8400 batches | test loss 0.4998649 +| epoch 3 | 143/ 8400 batches | test loss 0.3495451 +| epoch 3 | 147/ 8400 batches | test loss 0.3814742 +| epoch 3 | 151/ 8400 batches | test loss 0.4836997 +| epoch 3 | 155/ 8400 batches | test loss 0.4946436 +| epoch 3 | 159/ 8400 batches | test loss 0.4433946 +| epoch 3 | 163/ 8400 batches | test loss 0.5687431 +| epoch 3 | 167/ 8400 batches | test loss 0.4095218 +| epoch 3 | 171/ 8400 batches | test loss 0.3982504 +| epoch 3 | 175/ 8400 batches | test loss 0.3789432 +| epoch 3 | 179/ 8400 batches | test loss 0.3589029 +| epoch 3 | 183/ 8400 batches | test loss 0.4870785 +| epoch 3 | 187/ 8400 batches | test loss 0.5853536 +| epoch 3 | 191/ 8400 batches | test loss 0.3332348 +| epoch 3 | 195/ 8400 batches | test loss 0.5148263 +| epoch 3 | 199/ 8400 batches | test loss 0.3424349 +| epoch 3 | 203/ 8400 batches | test loss 0.4647318 +| epoch 3 | 207/ 8400 batches | test loss 0.3262487 +| epoch 3 | 211/ 8400 batches | test loss 0.3235424 +| epoch 3 | 215/ 8400 batches | test loss 0.4922245 +| epoch 3 | 219/ 8400 batches | test loss 0.4653788 +| epoch 3 | 223/ 8400 batches | test loss 0.4833926 +| epoch 3 | 227/ 8400 batches | test loss 0.4336649 +| epoch 3 | 231/ 8400 batches | test loss 0.4589989 +| epoch 3 | 235/ 8400 batches | test loss 0.4332812 +| epoch 3 | 239/ 8400 batches | test loss 0.4737257 +| epoch 3 | 243/ 8400 batches | test loss 0.5056362 +| epoch 3 | 247/ 8400 batches | test loss 0.4222236 +| epoch 3 | 251/ 8400 batches | test loss 0.4628175 +| epoch 3 | 255/ 8400 batches | test loss 0.4699325 +| epoch 3 | 259/ 8400 batches | test loss 0.4988278 +| epoch 3 | 263/ 8400 batches | test loss 0.3174432 +| epoch 3 | 267/ 8400 batches | test loss 0.4591985 +| epoch 3 | 271/ 8400 batches | test loss 0.3698381 +| epoch 3 | 275/ 8400 batches | test loss 0.4617032 +| epoch 3 | 279/ 8400 batches | test loss 0.4293756 +| epoch 3 | 283/ 8400 batches | test loss 0.3442499 +| epoch 3 | 287/ 8400 batches | test loss 0.4617188 +| epoch 3 | 291/ 8400 batches | test loss 0.4828478 +| epoch 3 | 295/ 8400 batches | test loss 0.4829343 +| epoch 3 | 299/ 8400 batches | test loss 0.5044162 +| epoch 3 | 303/ 8400 batches | test loss 0.4061273 +| epoch 3 | 307/ 8400 batches | test loss 0.4358173 +| epoch 3 | 311/ 8400 batches | test loss 0.3591819 +| epoch 3 | 315/ 8400 batches | test loss 0.4393628 +| epoch 3 | 319/ 8400 batches | test loss 0.4564434 +| epoch 3 | 323/ 8400 batches | test loss 0.4872884 +| epoch 3 | 327/ 8400 batches | test loss 0.4985391 +| epoch 3 | 331/ 8400 batches | test loss 0.4324168 +| epoch 3 | 335/ 8400 batches | test loss 0.4690919 +| epoch 3 | 339/ 8400 batches | test loss 0.3935370 +| epoch 3 | 343/ 8400 batches | test loss 0.4220051 +| epoch 3 | 347/ 8400 batches | test loss 0.4404744 +| epoch 3 | 351/ 8400 batches | test loss 0.4624093 +| epoch 3 | 355/ 8400 batches | test loss 0.4350482 +| epoch 3 | 359/ 8400 batches | test loss 0.4152234 +| epoch 3 | 363/ 8400 batches | test loss 0.4358140 +| epoch 3 | 367/ 8400 batches | test loss 0.4296740 +| epoch 3 | 371/ 8400 batches | test loss 0.4628097 +| epoch 3 | 375/ 8400 batches | test loss 0.4311179 +| epoch 3 | 379/ 8400 batches | test loss 0.4242420 +| epoch 3 | 383/ 8400 batches | test loss 0.3974122 +| epoch 3 | 387/ 8400 batches | test loss 0.4766045 +| epoch 3 | 391/ 8400 batches | test loss 0.4110498 +| epoch 3 | 395/ 8400 batches | test loss 0.4444460 +| epoch 3 | 399/ 8400 batches | test loss 0.4529037 +| epoch 3 | 403/ 8400 batches | test loss 0.3275049 +| epoch 3 | 407/ 8400 batches | test loss 0.4184555 +| epoch 3 | 411/ 8400 batches | test loss 0.3764310 +| epoch 3 | 415/ 8400 batches | test loss 0.5845703 +| epoch 3 | 419/ 8400 batches | test loss 0.4439876 +| epoch 3 | 423/ 8400 batches | test loss 0.3367836 +| epoch 3 | 427/ 8400 batches | test loss 0.4771059 +| epoch 3 | 431/ 8400 batches | test loss 0.4665538 +| epoch 3 | 435/ 8400 batches | test loss 0.3418829 +| epoch 3 | 439/ 8400 batches | test loss 0.4395111 +| epoch 3 | 443/ 8400 batches | test loss 0.3277825 +| epoch 3 | 447/ 8400 batches | test loss 0.4509568 +| epoch 3 | 451/ 8400 batches | test loss 0.4261014 +| epoch 3 | 455/ 8400 batches | test loss 0.4175816 +| epoch 3 | 459/ 8400 batches | test loss 0.4674577 +| epoch 3 | 463/ 8400 batches | test loss 0.4242951 +| epoch 3 | 467/ 8400 batches | test loss 0.4549266 +| epoch 3 | 471/ 8400 batches | test loss 0.4464211 +| epoch 3 | 475/ 8400 batches | test loss 0.4870688 +| epoch 3 | 479/ 8400 batches | test loss 0.8159227 +| epoch 3 | 483/ 8400 batches | test loss 0.4580130 +| epoch 3 | 487/ 8400 batches | test loss 0.4857047 +| epoch 3 | 491/ 8400 batches | test loss 0.4151202 +| epoch 3 | 495/ 8400 batches | test loss 0.4179236 +| epoch 3 | 499/ 8400 batches | test loss 0.4046321 +| epoch 3 | 503/ 8400 batches | test loss 0.5184138 +| epoch 3 | 507/ 8400 batches | test loss 0.3809329 +| epoch 3 | 511/ 8400 batches | test loss 0.3894362 +| epoch 3 | 515/ 8400 batches | test loss 0.4587376 +| epoch 3 | 519/ 8400 batches | test loss 0.4350227 +| epoch 3 | 523/ 8400 batches | test loss 0.4580413 +| epoch 3 | 527/ 8400 batches | test loss 0.4574622 +| epoch 3 | 531/ 8400 batches | test loss 0.4342276 +| epoch 3 | 535/ 8400 batches | test loss 0.4484676 +| epoch 3 | 539/ 8400 batches | test loss 0.4528328 +| epoch 3 | 543/ 8400 batches | test loss 0.5515661 +| epoch 3 | 547/ 8400 batches | test loss 0.4347336 +| epoch 3 | 551/ 8400 batches | test loss 0.3619815 +| epoch 3 | 555/ 8400 batches | test loss 0.4434858 +| epoch 3 | 559/ 8400 batches | test loss 0.4267585 +| epoch 3 | 563/ 8400 batches | test loss 0.4742047 +| epoch 3 | 567/ 8400 batches | test loss 0.4243808 +| epoch 3 | 571/ 8400 batches | test loss 0.4854069 +| epoch 3 | 575/ 8400 batches | test loss 0.4431820 +| epoch 3 | 579/ 8400 batches | test loss 0.4392012 +| epoch 3 | 583/ 8400 batches | test loss 0.5235468 +| epoch 3 | 587/ 8400 batches | test loss 0.5029095 +| epoch 3 | 591/ 8400 batches | test loss 0.4235566 +| epoch 3 | 595/ 8400 batches | test loss 0.5155430 +| epoch 3 | 599/ 8400 batches | test loss 0.4201111 +| epoch 3 | 603/ 8400 batches | test loss 0.3508898 +| epoch 3 | 607/ 8400 batches | test loss 0.5038165 +| epoch 3 | 611/ 8400 batches | test loss 0.4175433 +| epoch 3 | 615/ 8400 batches | test loss 0.3953252 +| epoch 3 | 619/ 8400 batches | test loss 0.5334197 +| epoch 3 | 623/ 8400 batches | test loss 0.4104906 +| epoch 3 | 627/ 8400 batches | test loss 0.3603241 +| epoch 3 | 631/ 8400 batches | test loss 0.4031740 +| epoch 3 | 635/ 8400 batches | test loss 0.3411608 +| epoch 3 | 639/ 8400 batches | test loss 0.4442202 +| epoch 3 | 643/ 8400 batches | test loss 0.3449458 +| epoch 3 | 647/ 8400 batches | test loss 0.4467297 +| epoch 3 | 651/ 8400 batches | test loss 0.4456836 +| epoch 3 | 655/ 8400 batches | test loss 0.3220762 +| epoch 3 | 659/ 8400 batches | test loss 0.3914291 +| epoch 3 | 663/ 8400 batches | test loss 0.4918157 +| epoch 3 | 667/ 8400 batches | test loss 0.3113674 +| epoch 3 | 671/ 8400 batches | test loss 0.4560226 +| epoch 3 | 675/ 8400 batches | test loss 0.4448277 +| epoch 3 | 679/ 8400 batches | test loss 0.3758442 +| epoch 3 | 683/ 8400 batches | test loss 0.4143198 +| epoch 3 | 687/ 8400 batches | test loss 0.5156416 +| epoch 3 | 691/ 8400 batches | test loss 0.3976277 +| epoch 3 | 695/ 8400 batches | test loss 0.4097215 +| epoch 3 | 699/ 8400 batches | test loss 0.3587192 +| epoch 3 | 703/ 8400 batches | test loss 0.4907787 +| epoch 3 | 707/ 8400 batches | test loss 0.4157919 +| epoch 3 | 711/ 8400 batches | test loss 0.4125029 +| epoch 3 | 715/ 8400 batches | test loss 0.4091676 +| epoch 3 | 719/ 8400 batches | test loss 0.4251233 +| epoch 3 | 723/ 8400 batches | test loss 0.5176902 +| epoch 3 | 727/ 8400 batches | test loss 0.4624525 +| epoch 3 | 731/ 8400 batches | test loss 0.3698114 +| epoch 3 | 735/ 8400 batches | test loss 0.3247986 +| epoch 3 | 739/ 8400 batches | test loss 0.4239787 +| epoch 3 | 743/ 8400 batches | test loss 0.4245450 +| epoch 3 | 747/ 8400 batches | test loss 0.4955562 +| epoch 3 | 751/ 8400 batches | test loss 0.4451991 +| epoch 3 | 755/ 8400 batches | test loss 0.4196579 +| epoch 3 | 759/ 8400 batches | test loss 0.4171224 +| epoch 3 | 763/ 8400 batches | test loss 0.3888274 +| epoch 3 | 767/ 8400 batches | test loss 0.4432109 +| epoch 3 | 771/ 8400 batches | test loss 0.4169728 +| epoch 3 | 775/ 8400 batches | test loss 0.3980957 +| epoch 3 | 779/ 8400 batches | test loss 0.5365942 +| epoch 3 | 783/ 8400 batches | test loss 0.4709612 +| epoch 3 | 787/ 8400 batches | test loss 0.3627712 +| epoch 3 | 791/ 8400 batches | test loss 0.5134009 +| epoch 3 | 795/ 8400 batches | test loss 0.4999067 +| epoch 3 | 799/ 8400 batches | test loss 0.4650210 +| epoch 3 | 803/ 8400 batches | test loss 0.4525042 +| epoch 3 | 807/ 8400 batches | test loss 0.3732867 +| epoch 3 | 811/ 8400 batches | test loss 0.4777485 +| epoch 3 | 815/ 8400 batches | test loss 0.5629523 +| epoch 3 | 819/ 8400 batches | test loss 0.4657155 +| epoch 3 | 823/ 8400 batches | test loss 0.3481107 +| epoch 3 | 827/ 8400 batches | test loss 0.4155894 +| epoch 3 | 831/ 8400 batches | test loss 0.4321612 +| epoch 3 | 835/ 8400 batches | test loss 0.5562972 +| epoch 3 | 839/ 8400 batches | test loss 0.4303788 +| epoch 3 | 843/ 8400 batches | test loss 0.5238076 +| epoch 3 | 847/ 8400 batches | test loss 0.3346123 +| epoch 3 | 851/ 8400 batches | test loss 0.3987032 +| epoch 3 | 855/ 8400 batches | test loss 0.4828783 +| epoch 3 | 859/ 8400 batches | test loss 0.3955371 +| epoch 3 | 863/ 8400 batches | test loss 0.4357063 +| epoch 3 | 867/ 8400 batches | test loss 0.3215549 +| epoch 3 | 871/ 8400 batches | test loss 0.4689271 +| epoch 3 | 875/ 8400 batches | test loss 0.3540668 +| epoch 3 | 879/ 8400 batches | test loss 0.4656070 +| epoch 3 | 883/ 8400 batches | test loss 0.3996015 +| epoch 3 | 887/ 8400 batches | test loss 0.4181491 +| epoch 3 | 891/ 8400 batches | test loss 0.4052426 +| epoch 3 | 895/ 8400 batches | test loss 0.4710146 +| epoch 3 | 899/ 8400 batches | test loss 0.3957723 +| epoch 3 | 903/ 8400 batches | test loss 0.5089312 +| epoch 3 | 907/ 8400 batches | test loss 0.4338226 +| epoch 3 | 911/ 8400 batches | test loss 0.4811297 +| epoch 3 | 915/ 8400 batches | test loss 0.4086583 +| epoch 3 | 919/ 8400 batches | test loss 0.4204842 +| epoch 3 | 923/ 8400 batches | test loss 0.4787765 +| epoch 3 | 927/ 8400 batches | test loss 0.4330913 +| epoch 3 | 931/ 8400 batches | test loss 0.4320439 +| epoch 3 | 935/ 8400 batches | test loss 0.4055167 +| epoch 3 | 939/ 8400 batches | test loss 0.4395393 +| epoch 3 | 943/ 8400 batches | test loss 0.4236817 +| epoch 3 | 947/ 8400 batches | test loss 0.3866429 +| epoch 3 | 951/ 8400 batches | test loss 0.4441852 +| epoch 3 | 955/ 8400 batches | test loss 0.3935457 +| epoch 3 | 959/ 8400 batches | test loss 0.3891091 +| epoch 3 | 963/ 8400 batches | test loss 0.4213308 +| epoch 3 | 967/ 8400 batches | test loss 0.4178141 +| epoch 3 | 971/ 8400 batches | test loss 0.5354933 +| epoch 3 | 975/ 8400 batches | test loss 0.4095215 +| epoch 3 | 979/ 8400 batches | test loss 0.4474334 +| epoch 3 | 983/ 8400 batches | test loss 0.5263347 +| epoch 3 | 987/ 8400 batches | test loss 0.3367236 +| epoch 3 | 991/ 8400 batches | test loss 0.4269985 +| epoch 3 | 995/ 8400 batches | test loss 0.4948099 +| epoch 3 | 999/ 8400 batches | test loss 0.4608654 +| epoch 3 | 1003/ 8400 batches | test loss 0.4496891 +| epoch 3 | 1007/ 8400 batches | test loss 0.5399866 +| epoch 3 | 1011/ 8400 batches | test loss 0.5212192 +| epoch 3 | 1015/ 8400 batches | test loss 0.4492822 +| epoch 3 | 1019/ 8400 batches | test loss 0.4580802 +| epoch 3 | 1023/ 8400 batches | test loss 0.4076844 +| epoch 3 | 1027/ 8400 batches | test loss 0.4250184 +| epoch 3 | 1031/ 8400 batches | test loss 0.5219426 +| epoch 3 | 1035/ 8400 batches | test loss 0.4305984 +| epoch 3 | 1039/ 8400 batches | test loss 0.4012036 +| epoch 3 | 1043/ 8400 batches | test loss 0.2756877 +| epoch 3 | 1047/ 8400 batches | test loss 0.3375657 +| epoch 3 | 1051/ 8400 batches | test loss 0.5499409 +| epoch 3 | 1055/ 8400 batches | test loss 0.4718187 +| epoch 3 | 1059/ 8400 batches | test loss 0.4314329 +| epoch 3 | 1063/ 8400 batches | test loss 0.4421697 +| epoch 3 | 1067/ 8400 batches | test loss 0.4522332 +| epoch 3 | 1071/ 8400 batches | test loss 0.4630193 +| epoch 3 | 1075/ 8400 batches | test loss 0.3884524 +| epoch 3 | 1079/ 8400 batches | test loss 0.4554074 +| epoch 3 | 1083/ 8400 batches | test loss 0.4123887 +| epoch 3 | 1087/ 8400 batches | test loss 0.3595396 +| epoch 3 | 1091/ 8400 batches | test loss 0.4344557 +| epoch 3 | 1095/ 8400 batches | test loss 0.5574350 +| epoch 3 | 1099/ 8400 batches | test loss 0.3030140 +| epoch 3 | 1103/ 8400 batches | test loss 0.4906140 +| epoch 3 | 1107/ 8400 batches | test loss 0.3883488 +| epoch 3 | 1111/ 8400 batches | test loss 0.5087063 +| epoch 3 | 1115/ 8400 batches | test loss 0.4620073 +| epoch 3 | 1119/ 8400 batches | test loss 0.4333790 +| epoch 3 | 1123/ 8400 batches | test loss 0.4631384 +| epoch 3 | 1127/ 8400 batches | test loss 0.4321982 +| epoch 3 | 1131/ 8400 batches | test loss 0.4069047 +| epoch 3 | 1135/ 8400 batches | test loss 0.3574060 +| epoch 3 | 1139/ 8400 batches | test loss 0.4951109 +| epoch 3 | 1143/ 8400 batches | test loss 0.4849675 +| epoch 3 | 1147/ 8400 batches | test loss 0.4609414 +| epoch 3 | 1151/ 8400 batches | test loss 0.5127585 +| epoch 3 | 1155/ 8400 batches | test loss 0.4028335 +| epoch 3 | 1159/ 8400 batches | test loss 0.4911841 +| epoch 3 | 1163/ 8400 batches | test loss 0.4001037 +| epoch 3 | 1167/ 8400 batches | test loss 0.3827985 +| epoch 3 | 1171/ 8400 batches | test loss 0.4182726 +| epoch 3 | 1175/ 8400 batches | test loss 0.4686057 +| epoch 3 | 1179/ 8400 batches | test loss 0.4714670 +| epoch 3 | 1183/ 8400 batches | test loss 0.5100766 +| epoch 3 | 1187/ 8400 batches | test loss 0.4027915 +| epoch 3 | 1191/ 8400 batches | test loss 0.5149035 +| epoch 3 | 1195/ 8400 batches | test loss 0.3833049 +| epoch 3 | 1199/ 8400 batches | test loss 0.3583508 +| epoch 3 | 1203/ 8400 batches | test loss 0.3721653 +| epoch 3 | 1207/ 8400 batches | test loss 0.4345831 +| epoch 3 | 1211/ 8400 batches | test loss 0.4403972 +| epoch 3 | 1215/ 8400 batches | test loss 0.6058078 +| epoch 3 | 1219/ 8400 batches | test loss 0.5423012 +| epoch 3 | 1223/ 8400 batches | test loss 0.4900721 +| epoch 3 | 1227/ 8400 batches | test loss 0.4480679 +| epoch 3 | 1231/ 8400 batches | test loss 0.3991582 +| epoch 3 | 1235/ 8400 batches | test loss 0.4368941 +| epoch 3 | 1239/ 8400 batches | test loss 0.4431729 +| epoch 3 | 1243/ 8400 batches | test loss 0.4232822 +| epoch 3 | 1247/ 8400 batches | test loss 0.3594466 +| epoch 3 | 1251/ 8400 batches | test loss 0.5979074 +| epoch 3 | 1255/ 8400 batches | test loss 0.3625790 +| epoch 3 | 1259/ 8400 batches | test loss 0.4141789 +| epoch 3 | 1263/ 8400 batches | test loss 0.3848102 +| epoch 3 | 1267/ 8400 batches | test loss 0.4148519 +| epoch 3 | 1271/ 8400 batches | test loss 0.4055929 +| epoch 3 | 1275/ 8400 batches | test loss 0.4274164 +| epoch 3 | 1279/ 8400 batches | test loss 0.4232998 +| epoch 3 | 1283/ 8400 batches | test loss 0.4355643 +| epoch 3 | 1287/ 8400 batches | test loss 0.2204675 +| epoch 3 | 1291/ 8400 batches | test loss 0.4036393 +| epoch 3 | 1295/ 8400 batches | test loss 0.4875364 +| epoch 3 | 1299/ 8400 batches | test loss 0.3887835 +| epoch 3 | 1303/ 8400 batches | test loss 0.4238321 +| epoch 3 | 1307/ 8400 batches | test loss 0.3358514 +| epoch 3 | 1311/ 8400 batches | test loss 0.3492254 +| epoch 3 | 1315/ 8400 batches | test loss 0.4104574 +| epoch 3 | 1319/ 8400 batches | test loss 0.3637696 +| epoch 3 | 1323/ 8400 batches | test loss 0.4223250 +| epoch 3 | 1327/ 8400 batches | test loss 0.6055881 +| epoch 3 | 1331/ 8400 batches | test loss 0.4889343 +| epoch 3 | 1335/ 8400 batches | test loss 0.5912943 +| epoch 3 | 1339/ 8400 batches | test loss 0.4007370 +| epoch 3 | 1343/ 8400 batches | test loss 0.4911010 +| epoch 3 | 1347/ 8400 batches | test loss 0.4457363 +| epoch 3 | 1351/ 8400 batches | test loss 0.4095991 +| epoch 3 | 1355/ 8400 batches | test loss 0.4115245 +| epoch 3 | 1359/ 8400 batches | test loss 0.4477249 +| epoch 3 | 1363/ 8400 batches | test loss 0.3863943 +| epoch 3 | 1367/ 8400 batches | test loss 0.4856851 +| epoch 3 | 1371/ 8400 batches | test loss 0.5234917 +| epoch 3 | 1375/ 8400 batches | test loss 0.3319527 +| epoch 3 | 1379/ 8400 batches | test loss 0.4517803 +| epoch 3 | 1383/ 8400 batches | test loss 0.4937870 +| epoch 3 | 1387/ 8400 batches | test loss 0.4828520 +| epoch 3 | 1391/ 8400 batches | test loss 0.4801977 +| epoch 3 | 1395/ 8400 batches | test loss 0.4254601 +| epoch 3 | 1399/ 8400 batches | test loss 0.4255303 +| epoch 3 | 1403/ 8400 batches | test loss 0.3897628 +| epoch 3 | 1407/ 8400 batches | test loss 0.4438547 +| epoch 3 | 1411/ 8400 batches | test loss 0.4592370 +| epoch 3 | 1415/ 8400 batches | test loss 0.5206337 +| epoch 3 | 1419/ 8400 batches | test loss 0.4771311 +| epoch 3 | 1423/ 8400 batches | test loss 0.4397853 +| epoch 3 | 1427/ 8400 batches | test loss 0.4725531 +| epoch 3 | 1431/ 8400 batches | test loss 0.8031738 +| epoch 3 | 1435/ 8400 batches | test loss 0.4837093 +| epoch 3 | 1439/ 8400 batches | test loss 0.3984298 +| epoch 3 | 1443/ 8400 batches | test loss 0.4012194 +| epoch 3 | 1447/ 8400 batches | test loss 0.3938259 +| epoch 3 | 1451/ 8400 batches | test loss 0.4550920 +| epoch 3 | 1455/ 8400 batches | test loss 0.4135309 +| epoch 3 | 1459/ 8400 batches | test loss 0.5327232 +| epoch 3 | 1463/ 8400 batches | test loss 0.4476941 +| epoch 3 | 1467/ 8400 batches | test loss 0.4812007 +| epoch 3 | 1471/ 8400 batches | test loss 0.4437889 +| epoch 3 | 1475/ 8400 batches | test loss 0.4106262 +| epoch 3 | 1479/ 8400 batches | test loss 0.4819678 +| epoch 3 | 1483/ 8400 batches | test loss 0.4216598 +| epoch 3 | 1487/ 8400 batches | test loss 0.5097629 +| epoch 3 | 1491/ 8400 batches | test loss 0.4141223 +| epoch 3 | 1495/ 8400 batches | test loss 0.5388214 +| epoch 3 | 1499/ 8400 batches | test loss 0.4968316 +| epoch 3 | 1503/ 8400 batches | test loss 0.2573400 +| epoch 3 | 1507/ 8400 batches | test loss 0.3845157 +| epoch 3 | 1511/ 8400 batches | test loss 0.3739434 +| epoch 3 | 1515/ 8400 batches | test loss 0.3721260 +| epoch 3 | 1519/ 8400 batches | test loss 0.4331816 +| epoch 3 | 1523/ 8400 batches | test loss 0.4108021 +| epoch 3 | 1527/ 8400 batches | test loss 0.3945033 +| epoch 3 | 1531/ 8400 batches | test loss 0.4322984 +| epoch 3 | 1535/ 8400 batches | test loss 0.5649664 +| epoch 3 | 1539/ 8400 batches | test loss 0.4190437 +| epoch 3 | 1543/ 8400 batches | test loss 0.4638847 +| epoch 3 | 1547/ 8400 batches | test loss 0.5175039 +| epoch 3 | 1551/ 8400 batches | test loss 0.4998062 +| epoch 3 | 1555/ 8400 batches | test loss 0.4913690 +| epoch 3 | 1559/ 8400 batches | test loss 0.3796181 +| epoch 3 | 1563/ 8400 batches | test loss 0.4015239 +| epoch 3 | 1567/ 8400 batches | test loss 0.3109151 +| epoch 3 | 1571/ 8400 batches | test loss 0.5318912 +| epoch 3 | 1575/ 8400 batches | test loss 0.4010166 +| epoch 3 | 1579/ 8400 batches | test loss 0.4462517 +| epoch 3 | 1583/ 8400 batches | test loss 0.4935222 +| epoch 3 | 1587/ 8400 batches | test loss 0.5154783 +| epoch 3 | 1591/ 8400 batches | test loss 0.4874318 +| epoch 3 | 1595/ 8400 batches | test loss 0.4550159 +| epoch 3 | 1599/ 8400 batches | test loss 0.4490488 +| epoch 3 | 1603/ 8400 batches | test loss 0.4197554 +| epoch 3 | 1607/ 8400 batches | test loss 0.4238251 +| epoch 3 | 1611/ 8400 batches | test loss 0.4606456 +| epoch 3 | 1615/ 8400 batches | test loss 0.4374822 +| epoch 3 | 1619/ 8400 batches | test loss 0.5164673 +| epoch 3 | 1623/ 8400 batches | test loss 0.3956852 +| epoch 3 | 1627/ 8400 batches | test loss 0.4191281 +| epoch 3 | 1631/ 8400 batches | test loss 0.3529484 +| epoch 3 | 1635/ 8400 batches | test loss 0.3902121 +| epoch 3 | 1639/ 8400 batches | test loss 0.4109558 +| epoch 3 | 1643/ 8400 batches | test loss 0.3611627 +| epoch 3 | 1647/ 8400 batches | test loss 0.4395056 +| epoch 3 | 1651/ 8400 batches | test loss 0.4350206 +| epoch 3 | 1655/ 8400 batches | test loss 0.4465186 +| epoch 3 | 1659/ 8400 batches | test loss 0.4365839 +| epoch 3 | 1663/ 8400 batches | test loss 0.4135522 +| epoch 3 | 1667/ 8400 batches | test loss 0.4062838 +| epoch 3 | 1671/ 8400 batches | test loss 0.4991411 +| epoch 3 | 1675/ 8400 batches | test loss 0.4673760 +| epoch 3 | 1679/ 8400 batches | test loss 0.3593608 +| epoch 3 | 1683/ 8400 batches | test loss 0.3711451 +| epoch 3 | 1687/ 8400 batches | test loss 0.4251539 +| epoch 3 | 1691/ 8400 batches | test loss 0.4765893 +| epoch 3 | 1695/ 8400 batches | test loss 0.4390208 +| epoch 3 | 1699/ 8400 batches | test loss 0.4625039 +| epoch 3 | 1703/ 8400 batches | test loss 0.4823137 +| epoch 3 | 1707/ 8400 batches | test loss 0.4664665 +| epoch 3 | 1711/ 8400 batches | test loss 0.5128556 +| epoch 3 | 1715/ 8400 batches | test loss 0.3440912 +| epoch 3 | 1719/ 8400 batches | test loss 0.4485716 +| epoch 3 | 1723/ 8400 batches | test loss 0.3979516 +| epoch 3 | 1727/ 8400 batches | test loss 0.4325103 +| epoch 3 | 1731/ 8400 batches | test loss 0.4494621 +| epoch 3 | 1735/ 8400 batches | test loss 0.3396366 +| epoch 3 | 1739/ 8400 batches | test loss 0.5132455 +| epoch 3 | 1743/ 8400 batches | test loss 0.3992328 +| epoch 3 | 1747/ 8400 batches | test loss 0.4832273 +| epoch 3 | 1751/ 8400 batches | test loss 0.4462531 +| epoch 3 | 1755/ 8400 batches | test loss 0.4302915 +| epoch 3 | 1759/ 8400 batches | test loss 0.4228421 +| epoch 3 | 1763/ 8400 batches | test loss 0.3639389 +| epoch 3 | 1767/ 8400 batches | test loss 0.3701602 +| epoch 3 | 1771/ 8400 batches | test loss 0.4877107 +| epoch 3 | 1775/ 8400 batches | test loss 0.3437000 +| epoch 3 | 1779/ 8400 batches | test loss 0.3795846 +| epoch 3 | 1783/ 8400 batches | test loss 0.4336126 +| epoch 3 | 1787/ 8400 batches | test loss 0.4646284 +| epoch 3 | 1791/ 8400 batches | test loss 0.4333327 +| epoch 3 | 1795/ 8400 batches | test loss 0.5012006 +| epoch 3 | 1799/ 8400 batches | test loss 0.4589463 +| epoch 3 | 1803/ 8400 batches | test loss 0.4524797 +| epoch 3 | 1807/ 8400 batches | test loss 0.3902830 +| epoch 3 | 1811/ 8400 batches | test loss 0.3877945 +| epoch 3 | 1815/ 8400 batches | test loss 0.4209875 +| epoch 3 | 1819/ 8400 batches | test loss 0.4224578 +| epoch 3 | 1823/ 8400 batches | test loss 0.5991511 +| epoch 3 | 1827/ 8400 batches | test loss 0.4340576 +| epoch 3 | 1831/ 8400 batches | test loss 0.4451561 +| epoch 3 | 1835/ 8400 batches | test loss 0.3861086 +| epoch 3 | 1839/ 8400 batches | test loss 0.5100093 +| epoch 3 | 1843/ 8400 batches | test loss 0.4350497 +| epoch 3 | 1847/ 8400 batches | test loss 0.4694964 +| epoch 3 | 1851/ 8400 batches | test loss 0.3840744 +| epoch 3 | 1855/ 8400 batches | test loss 0.4413583 +| epoch 3 | 1859/ 8400 batches | test loss 0.4515982 +| epoch 3 | 1863/ 8400 batches | test loss 0.4132004 +| epoch 3 | 1867/ 8400 batches | test loss 0.5269619 +| epoch 3 | 1871/ 8400 batches | test loss 0.4402150 +| epoch 3 | 1875/ 8400 batches | test loss 0.3840286 +| epoch 3 | 1879/ 8400 batches | test loss 0.4780537 +| epoch 3 | 1883/ 8400 batches | test loss 0.4064520 +| epoch 3 | 1887/ 8400 batches | test loss 0.3719921 +| epoch 3 | 1891/ 8400 batches | test loss 0.4524623 +| epoch 3 | 1895/ 8400 batches | test loss 0.4864617 +| epoch 3 | 1899/ 8400 batches | test loss 0.4537681 +| epoch 3 | 1903/ 8400 batches | test loss 0.3933229 +| epoch 3 | 1907/ 8400 batches | test loss 0.4681114 +| epoch 3 | 1911/ 8400 batches | test loss 0.4238593 +| epoch 3 | 1915/ 8400 batches | test loss 0.4205866 +| epoch 3 | 1919/ 8400 batches | test loss 0.4097142 +| epoch 3 | 1923/ 8400 batches | test loss 0.3639503 +| epoch 3 | 1927/ 8400 batches | test loss 0.4203579 +| epoch 3 | 1931/ 8400 batches | test loss 0.4255848 +| epoch 3 | 1935/ 8400 batches | test loss 0.4327266 +| epoch 3 | 1939/ 8400 batches | test loss 0.4301507 +| epoch 3 | 1943/ 8400 batches | test loss 0.4546994 +| epoch 3 | 1947/ 8400 batches | test loss 0.5092846 +| epoch 3 | 1951/ 8400 batches | test loss 0.4262589 +| epoch 3 | 1955/ 8400 batches | test loss 0.4859958 +| epoch 3 | 1959/ 8400 batches | test loss 0.4702708 +| epoch 3 | 1963/ 8400 batches | test loss 0.4307864 +| epoch 3 | 1967/ 8400 batches | test loss 0.4365982 +| epoch 3 | 1971/ 8400 batches | test loss 0.3447022 +| epoch 3 | 1975/ 8400 batches | test loss 0.4374913 +| epoch 3 | 1979/ 8400 batches | test loss 0.4306411 +| epoch 3 | 1983/ 8400 batches | test loss 0.3730251 +| epoch 3 | 1987/ 8400 batches | test loss 0.4056706 +| epoch 3 | 1991/ 8400 batches | test loss 0.4161052 +| epoch 3 | 1995/ 8400 batches | test loss 0.4407722 +| epoch 3 | 1999/ 8400 batches | test loss 0.4476797 +| epoch 3 | 2003/ 8400 batches | test loss 0.4775108 +| epoch 3 | 2007/ 8400 batches | test loss 0.3453476 +| epoch 3 | 2011/ 8400 batches | test loss 0.4489121 +| epoch 3 | 2015/ 8400 batches | test loss 0.4367339 +| epoch 3 | 2019/ 8400 batches | test loss 0.3931328 +| epoch 3 | 2023/ 8400 batches | test loss 0.3692096 +| epoch 3 | 2027/ 8400 batches | test loss 0.4188907 +| epoch 3 | 2031/ 8400 batches | test loss 0.4786775 +| epoch 3 | 2035/ 8400 batches | test loss 0.4889234 +| epoch 3 | 2039/ 8400 batches | test loss 0.3791552 +| epoch 3 | 2043/ 8400 batches | test loss 0.4089478 +| epoch 3 | 2047/ 8400 batches | test loss 0.4105543 +| epoch 3 | 2051/ 8400 batches | test loss 0.3626556 +| epoch 3 | 2055/ 8400 batches | test loss 0.4255606 +| epoch 3 | 2059/ 8400 batches | test loss 0.3906174 +| epoch 3 | 2063/ 8400 batches | test loss 0.4197096 +| epoch 3 | 2067/ 8400 batches | test loss 0.3822519 +| epoch 3 | 2071/ 8400 batches | test loss 0.4132991 +| epoch 3 | 2075/ 8400 batches | test loss 0.4841568 +| epoch 3 | 2079/ 8400 batches | test loss 0.3331556 +| epoch 3 | 2083/ 8400 batches | test loss 0.4515904 +| epoch 3 | 2087/ 8400 batches | test loss 0.4209869 +| epoch 3 | 2091/ 8400 batches | test loss 0.4467662 +| epoch 3 | 2095/ 8400 batches | test loss 0.3193025 +| epoch 3 | 2099/ 8400 batches | test loss 0.3797763 +| epoch 3 | final test loss 0.4339, save model! +-------------------------------------------------------------------------------- +| epoch 4 | 3/ 8400 batches | train loss 0.4105121 +| epoch 4 | 7/ 8400 batches | train loss 0.4122066 +| epoch 4 | 11/ 8400 batches | train loss 0.3753837 +| epoch 4 | 15/ 8400 batches | train loss 0.4259503 +| epoch 4 | 19/ 8400 batches | train loss 0.4151087 +| epoch 4 | 23/ 8400 batches | train loss 0.4476305 +| epoch 4 | 27/ 8400 batches | train loss 0.3281106 +| epoch 4 | 31/ 8400 batches | train loss 0.4038531 +| epoch 4 | 35/ 8400 batches | train loss 0.4626127 +| epoch 4 | 39/ 8400 batches | train loss 0.3560472 +| epoch 4 | 43/ 8400 batches | train loss 0.3307232 +| epoch 4 | 47/ 8400 batches | train loss 0.5350984 +| epoch 4 | 51/ 8400 batches | train loss 0.3800812 +| epoch 4 | 55/ 8400 batches | train loss 0.4581792 +| epoch 4 | 59/ 8400 batches | train loss 0.3854673 +| epoch 4 | 63/ 8400 batches | train loss 0.2991980 +| epoch 4 | 67/ 8400 batches | train loss 0.3784110 +| epoch 4 | 71/ 8400 batches | train loss 0.4998831 +| epoch 4 | 75/ 8400 batches | train loss 0.3675056 +| epoch 4 | 79/ 8400 batches | train loss 0.4056392 +| epoch 4 | 83/ 8400 batches | train loss 0.4288573 +| epoch 4 | 87/ 8400 batches | train loss 0.4762898 +| epoch 4 | 91/ 8400 batches | train loss 0.4418508 +| epoch 4 | 95/ 8400 batches | train loss 0.3936772 +| epoch 4 | 99/ 8400 batches | train loss 0.5006737 +| epoch 4 | 103/ 8400 batches | train loss 0.5010793 +| epoch 4 | 107/ 8400 batches | train loss 0.4367926 +| epoch 4 | 111/ 8400 batches | train loss 0.3790506 +| epoch 4 | 115/ 8400 batches | train loss 0.3676119 +| epoch 4 | 119/ 8400 batches | train loss 0.4198632 +| epoch 4 | 123/ 8400 batches | train loss 0.4831315 +| epoch 4 | 127/ 8400 batches | train loss 0.4355630 +| epoch 4 | 131/ 8400 batches | train loss 0.3672900 +| epoch 4 | 135/ 8400 batches | train loss 0.5001127 +| epoch 4 | 139/ 8400 batches | train loss 0.3954701 +| epoch 4 | 143/ 8400 batches | train loss 0.3395967 +| epoch 4 | 147/ 8400 batches | train loss 0.4034004 +| epoch 4 | 151/ 8400 batches | train loss 0.4736348 +| epoch 4 | 155/ 8400 batches | train loss 0.4720065 +| epoch 4 | 159/ 8400 batches | train loss 0.3938875 +| epoch 4 | 163/ 8400 batches | train loss 0.3804572 +| epoch 4 | 167/ 8400 batches | train loss 0.4395756 +| epoch 4 | 171/ 8400 batches | train loss 0.5336769 +| epoch 4 | 175/ 8400 batches | train loss 0.4219136 +| epoch 4 | 179/ 8400 batches | train loss 0.4027507 +| epoch 4 | 183/ 8400 batches | train loss 0.3743524 +| epoch 4 | 187/ 8400 batches | train loss 0.4522199 +| epoch 4 | 191/ 8400 batches | train loss 0.3635494 +| epoch 4 | 195/ 8400 batches | train loss 0.3755082 +| epoch 4 | 199/ 8400 batches | train loss 0.4119741 +| epoch 4 | 203/ 8400 batches | train loss 0.2698881 +| epoch 4 | 207/ 8400 batches | train loss 0.4029006 +| epoch 4 | 211/ 8400 batches | train loss 0.3869975 +| epoch 4 | 215/ 8400 batches | train loss 0.3651017 +| epoch 4 | 219/ 8400 batches | train loss 0.4281885 +| epoch 4 | 223/ 8400 batches | train loss 0.4142575 +| epoch 4 | 227/ 8400 batches | train loss 0.3756227 +| epoch 4 | 231/ 8400 batches | train loss 0.4382778 +| epoch 4 | 235/ 8400 batches | train loss 0.3992969 +| epoch 4 | 239/ 8400 batches | train loss 0.4475814 +| epoch 4 | 243/ 8400 batches | train loss 0.4499844 +| epoch 4 | 247/ 8400 batches | train loss 0.4067896 +| epoch 4 | 251/ 8400 batches | train loss 0.4548350 +| epoch 4 | 255/ 8400 batches | train loss 0.4254653 +| epoch 4 | 259/ 8400 batches | train loss 0.4467176 +| epoch 4 | 263/ 8400 batches | train loss 0.4550365 +| epoch 4 | 267/ 8400 batches | train loss 0.3785678 +| epoch 4 | 271/ 8400 batches | train loss 0.5014666 +| epoch 4 | 275/ 8400 batches | train loss 0.4256283 +| epoch 4 | 279/ 8400 batches | train loss 0.4364642 +| epoch 4 | 283/ 8400 batches | train loss 0.3611107 +| epoch 4 | 287/ 8400 batches | train loss 0.4522038 +| epoch 4 | 291/ 8400 batches | train loss 0.4461350 +| epoch 4 | 295/ 8400 batches | train loss 0.3071890 +| epoch 4 | 299/ 8400 batches | train loss 0.4000991 +| epoch 4 | 303/ 8400 batches | train loss 0.5188360 +| epoch 4 | 307/ 8400 batches | train loss 0.4219698 +| epoch 4 | 311/ 8400 batches | train loss 0.4148562 +| epoch 4 | 315/ 8400 batches | train loss 0.3955383 +| epoch 4 | 319/ 8400 batches | train loss 0.4841765 +| epoch 4 | 323/ 8400 batches | train loss 0.3697609 +| epoch 4 | 327/ 8400 batches | train loss 0.3771356 +| epoch 4 | 331/ 8400 batches | train loss 0.4075789 +| epoch 4 | 335/ 8400 batches | train loss 0.3922634 +| epoch 4 | 339/ 8400 batches | train loss 0.5139506 +| epoch 4 | 343/ 8400 batches | train loss 0.4137439 +| epoch 4 | 347/ 8400 batches | train loss 0.4423190 +| epoch 4 | 351/ 8400 batches | train loss 0.3286564 +| epoch 4 | 355/ 8400 batches | train loss 0.4078443 +| epoch 4 | 359/ 8400 batches | train loss 0.3367403 +| epoch 4 | 363/ 8400 batches | train loss 0.4916173 +| epoch 4 | 367/ 8400 batches | train loss 0.3354490 +| epoch 4 | 371/ 8400 batches | train loss 0.4006558 +| epoch 4 | 375/ 8400 batches | train loss 0.4025444 +| epoch 4 | 379/ 8400 batches | train loss 0.5191189 +| epoch 4 | 383/ 8400 batches | train loss 0.4137402 +| epoch 4 | 387/ 8400 batches | train loss 0.4801420 +| epoch 4 | 391/ 8400 batches | train loss 0.4212449 +| epoch 4 | 395/ 8400 batches | train loss 0.4315067 +| epoch 4 | 399/ 8400 batches | train loss 0.5094700 +| epoch 4 | 403/ 8400 batches | train loss 0.3856279 +| epoch 4 | 407/ 8400 batches | train loss 0.3990624 +| epoch 4 | 411/ 8400 batches | train loss 0.4247920 +| epoch 4 | 415/ 8400 batches | train loss 0.4309523 +| epoch 4 | 419/ 8400 batches | train loss 0.4713415 +| epoch 4 | 423/ 8400 batches | train loss 0.3623177 +| epoch 4 | 427/ 8400 batches | train loss 0.4038214 +| epoch 4 | 431/ 8400 batches | train loss 0.4184711 +| epoch 4 | 435/ 8400 batches | train loss 0.4368643 +| epoch 4 | 439/ 8400 batches | train loss 0.4104628 +| epoch 4 | 443/ 8400 batches | train loss 0.3933346 +| epoch 4 | 447/ 8400 batches | train loss 0.4828223 +| epoch 4 | 451/ 8400 batches | train loss 0.4410538 +| epoch 4 | 455/ 8400 batches | train loss 0.4077548 +| epoch 4 | 459/ 8400 batches | train loss 0.3985959 +| epoch 4 | 463/ 8400 batches | train loss 0.4263973 +| epoch 4 | 467/ 8400 batches | train loss 0.5316733 +| epoch 4 | 471/ 8400 batches | train loss 0.4036126 +| epoch 4 | 475/ 8400 batches | train loss 0.3934357 +| epoch 4 | 479/ 8400 batches | train loss 0.4466174 +| epoch 4 | 483/ 8400 batches | train loss 0.4106495 +| epoch 4 | 487/ 8400 batches | train loss 0.3445940 +| epoch 4 | 491/ 8400 batches | train loss 0.4735942 +| epoch 4 | 495/ 8400 batches | train loss 0.4013478 +| epoch 4 | 499/ 8400 batches | train loss 0.3727709 +| epoch 4 | 503/ 8400 batches | train loss 0.2464319 +| epoch 4 | 507/ 8400 batches | train loss 0.3787327 +| epoch 4 | 511/ 8400 batches | train loss 0.3865899 +| epoch 4 | 515/ 8400 batches | train loss 0.4266335 +| epoch 4 | 519/ 8400 batches | train loss 0.3059404 +| epoch 4 | 523/ 8400 batches | train loss 0.3384796 +| epoch 4 | 527/ 8400 batches | train loss 0.3187631 +| epoch 4 | 531/ 8400 batches | train loss 0.3775592 +| epoch 4 | 535/ 8400 batches | train loss 0.4246787 +| epoch 4 | 539/ 8400 batches | train loss 0.4278018 +| epoch 4 | 543/ 8400 batches | train loss 0.4553250 +| epoch 4 | 547/ 8400 batches | train loss 0.3398961 +| epoch 4 | 551/ 8400 batches | train loss 0.3821713 +| epoch 4 | 555/ 8400 batches | train loss 0.4481533 +| epoch 4 | 559/ 8400 batches | train loss 0.3907287 +| epoch 4 | 563/ 8400 batches | train loss 0.3265201 +| epoch 4 | 567/ 8400 batches | train loss 0.3967777 +| epoch 4 | 571/ 8400 batches | train loss 0.3896617 +| epoch 4 | 575/ 8400 batches | train loss 0.3392978 +| epoch 4 | 579/ 8400 batches | train loss 0.4114262 +| epoch 4 | 583/ 8400 batches | train loss 0.3736748 +| epoch 4 | 587/ 8400 batches | train loss 0.3852961 +| epoch 4 | 591/ 8400 batches | train loss 0.3406977 +| epoch 4 | 595/ 8400 batches | train loss 0.4329735 +| epoch 4 | 599/ 8400 batches | train loss 0.3933859 +| epoch 4 | 603/ 8400 batches | train loss 0.3945585 +| epoch 4 | 607/ 8400 batches | train loss 0.3738636 +| epoch 4 | 611/ 8400 batches | train loss 0.3827044 +| epoch 4 | 615/ 8400 batches | train loss 0.3981009 +| epoch 4 | 619/ 8400 batches | train loss 0.3358071 +| epoch 4 | 623/ 8400 batches | train loss 0.4432285 +| epoch 4 | 627/ 8400 batches | train loss 0.3262199 +| epoch 4 | 631/ 8400 batches | train loss 0.3332701 +| epoch 4 | 635/ 8400 batches | train loss 0.4766111 +| epoch 4 | 639/ 8400 batches | train loss 0.3640507 +| epoch 4 | 643/ 8400 batches | train loss 0.4003283 +| epoch 4 | 647/ 8400 batches | train loss 0.4762813 +| epoch 4 | 651/ 8400 batches | train loss 0.3554778 +| epoch 4 | 655/ 8400 batches | train loss 0.3630597 +| epoch 4 | 659/ 8400 batches | train loss 0.4199694 +| epoch 4 | 663/ 8400 batches | train loss 0.4511137 +| epoch 4 | 667/ 8400 batches | train loss 0.3963529 +| epoch 4 | 671/ 8400 batches | train loss 0.4987243 +| epoch 4 | 675/ 8400 batches | train loss 0.4075965 +| epoch 4 | 679/ 8400 batches | train loss 0.4807796 +| epoch 4 | 683/ 8400 batches | train loss 0.5136167 +| epoch 4 | 687/ 8400 batches | train loss 0.4097446 +| epoch 4 | 691/ 8400 batches | train loss 0.3836895 +| epoch 4 | 695/ 8400 batches | train loss 0.4205378 +| epoch 4 | 699/ 8400 batches | train loss 0.3999117 +| epoch 4 | 703/ 8400 batches | train loss 0.3283637 +| epoch 4 | 707/ 8400 batches | train loss 0.4160743 +| epoch 4 | 711/ 8400 batches | train loss 0.4146982 +| epoch 4 | 715/ 8400 batches | train loss 0.4718591 +| epoch 4 | 719/ 8400 batches | train loss 0.4616997 +| epoch 4 | 723/ 8400 batches | train loss 0.3981532 +| epoch 4 | 727/ 8400 batches | train loss 0.4587872 +| epoch 4 | 731/ 8400 batches | train loss 0.4046730 +| epoch 4 | 735/ 8400 batches | train loss 0.4270633 +| epoch 4 | 739/ 8400 batches | train loss 0.3974895 +| epoch 4 | 743/ 8400 batches | train loss 0.3903999 +| epoch 4 | 747/ 8400 batches | train loss 0.4179622 +| epoch 4 | 751/ 8400 batches | train loss 0.4224882 +| epoch 4 | 755/ 8400 batches | train loss 0.4302867 +| epoch 4 | 759/ 8400 batches | train loss 0.5029005 +| epoch 4 | 763/ 8400 batches | train loss 0.4958844 +| epoch 4 | 767/ 8400 batches | train loss 0.3346771 +| epoch 4 | 771/ 8400 batches | train loss 0.3863189 +| epoch 4 | 775/ 8400 batches | train loss 0.4774123 +| epoch 4 | 779/ 8400 batches | train loss 0.4021828 +| epoch 4 | 783/ 8400 batches | train loss 0.4607209 +| epoch 4 | 787/ 8400 batches | train loss 0.3348505 +| epoch 4 | 791/ 8400 batches | train loss 0.4386550 +| epoch 4 | 795/ 8400 batches | train loss 0.3564398 +| epoch 4 | 799/ 8400 batches | train loss 0.4332590 +| epoch 4 | 803/ 8400 batches | train loss 0.4121352 +| epoch 4 | 807/ 8400 batches | train loss 0.4071808 +| epoch 4 | 811/ 8400 batches | train loss 0.3566028 +| epoch 4 | 815/ 8400 batches | train loss 0.4004174 +| epoch 4 | 819/ 8400 batches | train loss 0.4730129 +| epoch 4 | 823/ 8400 batches | train loss 0.4777262 +| epoch 4 | 827/ 8400 batches | train loss 0.4083071 +| epoch 4 | 831/ 8400 batches | train loss 0.4465462 +| epoch 4 | 835/ 8400 batches | train loss 0.4127223 +| epoch 4 | 839/ 8400 batches | train loss 0.3830614 +| epoch 4 | 843/ 8400 batches | train loss 0.3634876 +| epoch 4 | 847/ 8400 batches | train loss 0.4582267 +| epoch 4 | 851/ 8400 batches | train loss 0.3366150 +| epoch 4 | 855/ 8400 batches | train loss 0.3975351 +| epoch 4 | 859/ 8400 batches | train loss 0.3944078 +| epoch 4 | 863/ 8400 batches | train loss 0.5133938 +| epoch 4 | 867/ 8400 batches | train loss 0.4677575 +| epoch 4 | 871/ 8400 batches | train loss 0.3761538 +| epoch 4 | 875/ 8400 batches | train loss 0.3358305 +| epoch 4 | 879/ 8400 batches | train loss 0.4267418 +| epoch 4 | 883/ 8400 batches | train loss 0.4894016 +| epoch 4 | 887/ 8400 batches | train loss 0.4266210 +| epoch 4 | 891/ 8400 batches | train loss 0.5382880 +| epoch 4 | 895/ 8400 batches | train loss 0.4320882 +| epoch 4 | 899/ 8400 batches | train loss 0.3808015 +| epoch 4 | 903/ 8400 batches | train loss 0.4161889 +| epoch 4 | 907/ 8400 batches | train loss 0.4030518 +| epoch 4 | 911/ 8400 batches | train loss 0.3501503 +| epoch 4 | 915/ 8400 batches | train loss 0.2729744 +| epoch 4 | 919/ 8400 batches | train loss 0.4034247 +| epoch 4 | 923/ 8400 batches | train loss 0.3539782 +| epoch 4 | 927/ 8400 batches | train loss 0.4131728 +| epoch 4 | 931/ 8400 batches | train loss 0.3514440 +| epoch 4 | 935/ 8400 batches | train loss 0.5242421 +| epoch 4 | 939/ 8400 batches | train loss 0.4436743 +| epoch 4 | 943/ 8400 batches | train loss 0.4225261 +| epoch 4 | 947/ 8400 batches | train loss 0.4381529 +| epoch 4 | 951/ 8400 batches | train loss 0.4562263 +| epoch 4 | 955/ 8400 batches | train loss 0.3077725 +| epoch 4 | 959/ 8400 batches | train loss 0.3527634 +| epoch 4 | 963/ 8400 batches | train loss 0.4403565 +| epoch 4 | 967/ 8400 batches | train loss 0.3780047 +| epoch 4 | 971/ 8400 batches | train loss 0.4385488 +| epoch 4 | 975/ 8400 batches | train loss 0.4438300 +| epoch 4 | 979/ 8400 batches | train loss 0.4094006 +| epoch 4 | 983/ 8400 batches | train loss 0.3630230 +| epoch 4 | 987/ 8400 batches | train loss 0.4314828 +| epoch 4 | 991/ 8400 batches | train loss 0.4287212 +| epoch 4 | 995/ 8400 batches | train loss 0.3847008 +| epoch 4 | 999/ 8400 batches | train loss 0.4360410 +| epoch 4 | 1003/ 8400 batches | train loss 0.3992652 +| epoch 4 | 1007/ 8400 batches | train loss 0.4148052 +| epoch 4 | 1011/ 8400 batches | train loss 0.5197626 +| epoch 4 | 1015/ 8400 batches | train loss 0.4970093 +| epoch 4 | 1019/ 8400 batches | train loss 0.3496514 +| epoch 4 | 1023/ 8400 batches | train loss 0.3392563 +| epoch 4 | 1027/ 8400 batches | train loss 0.4845507 +| epoch 4 | 1031/ 8400 batches | train loss 0.3310230 +| epoch 4 | 1035/ 8400 batches | train loss 0.3834469 +| epoch 4 | 1039/ 8400 batches | train loss 0.4457830 +| epoch 4 | 1043/ 8400 batches | train loss 0.4898160 +| epoch 4 | 1047/ 8400 batches | train loss 0.4534319 +| epoch 4 | 1051/ 8400 batches | train loss 0.4400485 +| epoch 4 | 1055/ 8400 batches | train loss 0.3837021 +| epoch 4 | 1059/ 8400 batches | train loss 0.4805057 +| epoch 4 | 1063/ 8400 batches | train loss 0.4116217 +| epoch 4 | 1067/ 8400 batches | train loss 0.4841871 +| epoch 4 | 1071/ 8400 batches | train loss 0.4224352 +| epoch 4 | 1075/ 8400 batches | train loss 0.5096746 +| epoch 4 | 1079/ 8400 batches | train loss 0.4489686 +| epoch 4 | 1083/ 8400 batches | train loss 0.5021098 +| epoch 4 | 1087/ 8400 batches | train loss 0.4499443 +| epoch 4 | 1091/ 8400 batches | train loss 0.3551226 +| epoch 4 | 1095/ 8400 batches | train loss 0.4239576 +| epoch 4 | 1099/ 8400 batches | train loss 0.4349858 +| epoch 4 | 1103/ 8400 batches | train loss 0.3974226 +| epoch 4 | 1107/ 8400 batches | train loss 0.3877772 +| epoch 4 | 1111/ 8400 batches | train loss 0.4372932 +| epoch 4 | 1115/ 8400 batches | train loss 0.2888271 +| epoch 4 | 1119/ 8400 batches | train loss 0.4433682 +| epoch 4 | 1123/ 8400 batches | train loss 0.4525032 +| epoch 4 | 1127/ 8400 batches | train loss 0.4109480 +| epoch 4 | 1131/ 8400 batches | train loss 0.4076675 +| epoch 4 | 1135/ 8400 batches | train loss 0.4861659 +| epoch 4 | 1139/ 8400 batches | train loss 0.4856344 +| epoch 4 | 1143/ 8400 batches | train loss 0.4873062 +| epoch 4 | 1147/ 8400 batches | train loss 0.4619984 +| epoch 4 | 1151/ 8400 batches | train loss 0.4598307 +| epoch 4 | 1155/ 8400 batches | train loss 0.4118659 +| epoch 4 | 1159/ 8400 batches | train loss 0.4258145 +| epoch 4 | 1163/ 8400 batches | train loss 0.3856435 +| epoch 4 | 1167/ 8400 batches | train loss 0.3957456 +| epoch 4 | 1171/ 8400 batches | train loss 0.4640567 +| epoch 4 | 1175/ 8400 batches | train loss 0.3973231 +| epoch 4 | 1179/ 8400 batches | train loss 0.4680495 +| epoch 4 | 1183/ 8400 batches | train loss 0.4813085 +| epoch 4 | 1187/ 8400 batches | train loss 0.3923513 +| epoch 4 | 1191/ 8400 batches | train loss 0.4582163 +| epoch 4 | 1195/ 8400 batches | train loss 0.4475418 +| epoch 4 | 1199/ 8400 batches | train loss 0.4239194 +| epoch 4 | 1203/ 8400 batches | train loss 0.4390871 +| epoch 4 | 1207/ 8400 batches | train loss 0.3740848 +| epoch 4 | 1211/ 8400 batches | train loss 0.4786079 +| epoch 4 | 1215/ 8400 batches | train loss 0.5554202 +| epoch 4 | 1219/ 8400 batches | train loss 0.4407499 +| epoch 4 | 1223/ 8400 batches | train loss 0.4129073 +| epoch 4 | 1227/ 8400 batches | train loss 0.4687672 +| epoch 4 | 1231/ 8400 batches | train loss 0.3426725 +| epoch 4 | 1235/ 8400 batches | train loss 0.4637830 +| epoch 4 | 1239/ 8400 batches | train loss 0.4577962 +| epoch 4 | 1243/ 8400 batches | train loss 0.4433909 +| epoch 4 | 1247/ 8400 batches | train loss 0.3950944 +| epoch 4 | 1251/ 8400 batches | train loss 0.4771661 +| epoch 4 | 1255/ 8400 batches | train loss 0.4096244 +| epoch 4 | 1259/ 8400 batches | train loss 0.4182056 +| epoch 4 | 1263/ 8400 batches | train loss 0.4156525 +| epoch 4 | 1267/ 8400 batches | train loss 0.5565984 +| epoch 4 | 1271/ 8400 batches | train loss 0.3632478 +| epoch 4 | 1275/ 8400 batches | train loss 0.3974699 +| epoch 4 | 1279/ 8400 batches | train loss 0.4676155 +| epoch 4 | 1283/ 8400 batches | train loss 0.3477083 +| epoch 4 | 1287/ 8400 batches | train loss 0.4043010 +| epoch 4 | 1291/ 8400 batches | train loss 0.3486715 +| epoch 4 | 1295/ 8400 batches | train loss 0.3426670 +| epoch 4 | 1299/ 8400 batches | train loss 0.4522077 +| epoch 4 | 1303/ 8400 batches | train loss 0.4268390 +| epoch 4 | 1307/ 8400 batches | train loss 0.4570842 +| epoch 4 | 1311/ 8400 batches | train loss 0.4288343 +| epoch 4 | 1315/ 8400 batches | train loss 0.4482654 +| epoch 4 | 1319/ 8400 batches | train loss 0.3996851 +| epoch 4 | 1323/ 8400 batches | train loss 0.4198845 +| epoch 4 | 1327/ 8400 batches | train loss 0.3445494 +| epoch 4 | 1331/ 8400 batches | train loss 0.4748161 +| epoch 4 | 1335/ 8400 batches | train loss 0.4402609 +| epoch 4 | 1339/ 8400 batches | train loss 0.4273812 +| epoch 4 | 1343/ 8400 batches | train loss 0.3658706 +| epoch 4 | 1347/ 8400 batches | train loss 0.3613392 +| epoch 4 | 1351/ 8400 batches | train loss 0.4250193 +| epoch 4 | 1355/ 8400 batches | train loss 0.3931868 +| epoch 4 | 1359/ 8400 batches | train loss 0.3854283 +| epoch 4 | 1363/ 8400 batches | train loss 0.4638473 +| epoch 4 | 1367/ 8400 batches | train loss 0.4751266 +| epoch 4 | 1371/ 8400 batches | train loss 0.3901071 +| epoch 4 | 1375/ 8400 batches | train loss 0.4212012 +| epoch 4 | 1379/ 8400 batches | train loss 0.3705493 +| epoch 4 | 1383/ 8400 batches | train loss 0.4023508 +| epoch 4 | 1387/ 8400 batches | train loss 0.3597847 +| epoch 4 | 1391/ 8400 batches | train loss 0.4322464 +| epoch 4 | 1395/ 8400 batches | train loss 0.3773183 +| epoch 4 | 1399/ 8400 batches | train loss 0.4234985 +| epoch 4 | 1403/ 8400 batches | train loss 0.4771075 +| epoch 4 | 1407/ 8400 batches | train loss 0.4028227 +| epoch 4 | 1411/ 8400 batches | train loss 0.4687385 +| epoch 4 | 1415/ 8400 batches | train loss 0.4163313 +| epoch 4 | 1419/ 8400 batches | train loss 0.4308133 +| epoch 4 | 1423/ 8400 batches | train loss 0.4050415 +| epoch 4 | 1427/ 8400 batches | train loss 0.3592268 +| epoch 4 | 1431/ 8400 batches | train loss 0.4566509 +| epoch 4 | 1435/ 8400 batches | train loss 0.4117662 +| epoch 4 | 1439/ 8400 batches | train loss 0.3480918 +| epoch 4 | 1443/ 8400 batches | train loss 0.4438435 +| epoch 4 | 1447/ 8400 batches | train loss 0.3689386 +| epoch 4 | 1451/ 8400 batches | train loss 0.4401566 +| epoch 4 | 1455/ 8400 batches | train loss 0.3934162 +| epoch 4 | 1459/ 8400 batches | train loss 0.4203352 +| epoch 4 | 1463/ 8400 batches | train loss 0.4663711 +| epoch 4 | 1467/ 8400 batches | train loss 0.3528313 +| epoch 4 | 1471/ 8400 batches | train loss 0.4382895 +| epoch 4 | 1475/ 8400 batches | train loss 0.3721591 +| epoch 4 | 1479/ 8400 batches | train loss 0.4107084 +| epoch 4 | 1483/ 8400 batches | train loss 0.4306352 +| epoch 4 | 1487/ 8400 batches | train loss 0.4648575 +| epoch 4 | 1491/ 8400 batches | train loss 0.4996463 +| epoch 4 | 1495/ 8400 batches | train loss 0.4495484 +| epoch 4 | 1499/ 8400 batches | train loss 0.4222323 +| epoch 4 | 1503/ 8400 batches | train loss 0.3679690 +| epoch 4 | 1507/ 8400 batches | train loss 0.4271246 +| epoch 4 | 1511/ 8400 batches | train loss 0.3906177 +| epoch 4 | 1515/ 8400 batches | train loss 0.4593945 +| epoch 4 | 1519/ 8400 batches | train loss 0.4619452 +| epoch 4 | 1523/ 8400 batches | train loss 0.3883618 +| epoch 4 | 1527/ 8400 batches | train loss 0.2483150 +| epoch 4 | 1531/ 8400 batches | train loss 0.4116794 +| epoch 4 | 1535/ 8400 batches | train loss 0.4037308 +| epoch 4 | 1539/ 8400 batches | train loss 0.3828394 +| epoch 4 | 1543/ 8400 batches | train loss 0.4612454 +| epoch 4 | 1547/ 8400 batches | train loss 0.4176147 +| epoch 4 | 1551/ 8400 batches | train loss 0.3949000 +| epoch 4 | 1555/ 8400 batches | train loss 0.4288535 +| epoch 4 | 1559/ 8400 batches | train loss 0.4121007 +| epoch 4 | 1563/ 8400 batches | train loss 0.3620411 +| epoch 4 | 1567/ 8400 batches | train loss 0.4567731 +| epoch 4 | 1571/ 8400 batches | train loss 0.4036689 +| epoch 4 | 1575/ 8400 batches | train loss 0.5762186 +| epoch 4 | 1579/ 8400 batches | train loss 0.4175324 +| epoch 4 | 1583/ 8400 batches | train loss 0.5398687 +| epoch 4 | 1587/ 8400 batches | train loss 0.3498147 +| epoch 4 | 1591/ 8400 batches | train loss 0.3946000 +| epoch 4 | 1595/ 8400 batches | train loss 0.3999793 +| epoch 4 | 1599/ 8400 batches | train loss 0.2909763 +| epoch 4 | 1603/ 8400 batches | train loss 0.3455434 +| epoch 4 | 1607/ 8400 batches | train loss 0.4373444 +| epoch 4 | 1611/ 8400 batches | train loss 0.4973272 +| epoch 4 | 1615/ 8400 batches | train loss 0.3608537 +| epoch 4 | 1619/ 8400 batches | train loss 0.4387062 +| epoch 4 | 1623/ 8400 batches | train loss 0.4222443 +| epoch 4 | 1627/ 8400 batches | train loss 0.4149169 +| epoch 4 | 1631/ 8400 batches | train loss 0.4061285 +| epoch 4 | 1635/ 8400 batches | train loss 0.4688377 +| epoch 4 | 1639/ 8400 batches | train loss 0.4273272 +| epoch 4 | 1643/ 8400 batches | train loss 0.2549168 +| epoch 4 | 1647/ 8400 batches | train loss 0.3834767 +| epoch 4 | 1651/ 8400 batches | train loss 0.3888844 +| epoch 4 | 1655/ 8400 batches | train loss 0.4528684 +| epoch 4 | 1659/ 8400 batches | train loss 0.4363829 +| epoch 4 | 1663/ 8400 batches | train loss 0.4749484 +| epoch 4 | 1667/ 8400 batches | train loss 0.4622295 +| epoch 4 | 1671/ 8400 batches | train loss 0.4592943 +| epoch 4 | 1675/ 8400 batches | train loss 0.4597593 +| epoch 4 | 1679/ 8400 batches | train loss 0.4401518 +| epoch 4 | 1683/ 8400 batches | train loss 0.4951685 +| epoch 4 | 1687/ 8400 batches | train loss 0.3771677 +| epoch 4 | 1691/ 8400 batches | train loss 0.4087892 +| epoch 4 | 1695/ 8400 batches | train loss 0.4622080 +| epoch 4 | 1699/ 8400 batches | train loss 0.3833562 +| epoch 4 | 1703/ 8400 batches | train loss 0.4504072 +| epoch 4 | 1707/ 8400 batches | train loss 0.5581336 +| epoch 4 | 1711/ 8400 batches | train loss 0.4644086 +| epoch 4 | 1715/ 8400 batches | train loss 0.3896427 +| epoch 4 | 1719/ 8400 batches | train loss 0.4209866 +| epoch 4 | 1723/ 8400 batches | train loss 0.4593048 +| epoch 4 | 1727/ 8400 batches | train loss 0.3385212 +| epoch 4 | 1731/ 8400 batches | train loss 0.4061608 +| epoch 4 | 1735/ 8400 batches | train loss 0.4464835 +| epoch 4 | 1739/ 8400 batches | train loss 0.4112409 +| epoch 4 | 1743/ 8400 batches | train loss 0.4111546 +| epoch 4 | 1747/ 8400 batches | train loss 0.4017902 +| epoch 4 | 1751/ 8400 batches | train loss 0.4841485 +| epoch 4 | 1755/ 8400 batches | train loss 0.3709976 +| epoch 4 | 1759/ 8400 batches | train loss 0.4556702 +| epoch 4 | 1763/ 8400 batches | train loss 0.4224994 +| epoch 4 | 1767/ 8400 batches | train loss 0.4523166 +| epoch 4 | 1771/ 8400 batches | train loss 0.4230109 +| epoch 4 | 1775/ 8400 batches | train loss 0.4105085 +| epoch 4 | 1779/ 8400 batches | train loss 0.4341930 +| epoch 4 | 1783/ 8400 batches | train loss 0.3797514 +| epoch 4 | 1787/ 8400 batches | train loss 0.4227282 +| epoch 4 | 1791/ 8400 batches | train loss 0.4472111 +| epoch 4 | 1795/ 8400 batches | train loss 0.4082892 +| epoch 4 | 1799/ 8400 batches | train loss 0.4086145 +| epoch 4 | 1803/ 8400 batches | train loss 0.4147356 +| epoch 4 | 1807/ 8400 batches | train loss 0.4207474 +| epoch 4 | 1811/ 8400 batches | train loss 0.4855131 +| epoch 4 | 1815/ 8400 batches | train loss 0.4007407 +| epoch 4 | 1819/ 8400 batches | train loss 0.3647381 +| epoch 4 | 1823/ 8400 batches | train loss 0.4401994 +| epoch 4 | 1827/ 8400 batches | train loss 0.3993959 +| epoch 4 | 1831/ 8400 batches | train loss 0.3911145 +| epoch 4 | 1835/ 8400 batches | train loss 0.3739744 +| epoch 4 | 1839/ 8400 batches | train loss 0.3654296 +| epoch 4 | 1843/ 8400 batches | train loss 0.4272293 +| epoch 4 | 1847/ 8400 batches | train loss 0.3833006 +| epoch 4 | 1851/ 8400 batches | train loss 0.5036721 +| epoch 4 | 1855/ 8400 batches | train loss 0.3430497 +| epoch 4 | 1859/ 8400 batches | train loss 0.3725519 +| epoch 4 | 1863/ 8400 batches | train loss 0.4498338 +| epoch 4 | 1867/ 8400 batches | train loss 0.4770538 +| epoch 4 | 1871/ 8400 batches | train loss 0.4061402 +| epoch 4 | 1875/ 8400 batches | train loss 0.4263186 +| epoch 4 | 1879/ 8400 batches | train loss 0.4199362 +| epoch 4 | 1883/ 8400 batches | train loss 0.2854404 +| epoch 4 | 1887/ 8400 batches | train loss 0.4445031 +| epoch 4 | 1891/ 8400 batches | train loss 0.4847237 +| epoch 4 | 1895/ 8400 batches | train loss 0.3283386 +| epoch 4 | 1899/ 8400 batches | train loss 0.4541752 +| epoch 4 | 1903/ 8400 batches | train loss 0.2997260 +| epoch 4 | 1907/ 8400 batches | train loss 0.4341972 +| epoch 4 | 1911/ 8400 batches | train loss 0.4592231 +| epoch 4 | 1915/ 8400 batches | train loss 0.3567243 +| epoch 4 | 1919/ 8400 batches | train loss 0.3567318 +| epoch 4 | 1923/ 8400 batches | train loss 0.3963818 +| epoch 4 | 1927/ 8400 batches | train loss 0.4232846 +| epoch 4 | 1931/ 8400 batches | train loss 0.4636348 +| epoch 4 | 1935/ 8400 batches | train loss 0.5014863 +| epoch 4 | 1939/ 8400 batches | train loss 0.4501415 +| epoch 4 | 1943/ 8400 batches | train loss 0.4081010 +| epoch 4 | 1947/ 8400 batches | train loss 0.3512048 +| epoch 4 | 1951/ 8400 batches | train loss 0.3986461 +| epoch 4 | 1955/ 8400 batches | train loss 0.4182426 +| epoch 4 | 1959/ 8400 batches | train loss 0.3614182 +| epoch 4 | 1963/ 8400 batches | train loss 0.4534363 +| epoch 4 | 1967/ 8400 batches | train loss 0.4210992 +| epoch 4 | 1971/ 8400 batches | train loss 0.4710971 +| epoch 4 | 1975/ 8400 batches | train loss 0.4034770 +| epoch 4 | 1979/ 8400 batches | train loss 0.4995354 +| epoch 4 | 1983/ 8400 batches | train loss 0.4788920 +| epoch 4 | 1987/ 8400 batches | train loss 0.4020619 +| epoch 4 | 1991/ 8400 batches | train loss 0.4749075 +| epoch 4 | 1995/ 8400 batches | train loss 0.3673231 +| epoch 4 | 1999/ 8400 batches | train loss 0.4294292 +| epoch 4 | 2003/ 8400 batches | train loss 0.4120149 +| epoch 4 | 2007/ 8400 batches | train loss 0.4111922 +| epoch 4 | 2011/ 8400 batches | train loss 0.3949390 +| epoch 4 | 2015/ 8400 batches | train loss 0.4387488 +| epoch 4 | 2019/ 8400 batches | train loss 0.4510209 +| epoch 4 | 2023/ 8400 batches | train loss 0.4982571 +| epoch 4 | 2027/ 8400 batches | train loss 0.4352592 +| epoch 4 | 2031/ 8400 batches | train loss 0.4812754 +| epoch 4 | 2035/ 8400 batches | train loss 0.4607503 +| epoch 4 | 2039/ 8400 batches | train loss 0.5201844 +| epoch 4 | 2043/ 8400 batches | train loss 0.3642228 +| epoch 4 | 2047/ 8400 batches | train loss 0.4082204 +| epoch 4 | 2051/ 8400 batches | train loss 0.4184853 +| epoch 4 | 2055/ 8400 batches | train loss 0.3944647 +| epoch 4 | 2059/ 8400 batches | train loss 0.4727292 +| epoch 4 | 2063/ 8400 batches | train loss 0.4031073 +| epoch 4 | 2067/ 8400 batches | train loss 0.4443199 +| epoch 4 | 2071/ 8400 batches | train loss 0.3856274 +| epoch 4 | 2075/ 8400 batches | train loss 0.3420295 +| epoch 4 | 2079/ 8400 batches | train loss 0.4977271 +| epoch 4 | 2083/ 8400 batches | train loss 0.4684125 +| epoch 4 | 2087/ 8400 batches | train loss 0.4491968 +| epoch 4 | 2091/ 8400 batches | train loss 0.4258721 +| epoch 4 | 2095/ 8400 batches | train loss 0.4172281 +| epoch 4 | 2099/ 8400 batches | train loss 0.5997462 +| epoch 4 | 2103/ 8400 batches | train loss 0.4166218 +| epoch 4 | 2107/ 8400 batches | train loss 0.3886830 +| epoch 4 | 2111/ 8400 batches | train loss 0.4604959 +| epoch 4 | 2115/ 8400 batches | train loss 0.3843784 +| epoch 4 | 2119/ 8400 batches | train loss 0.4250330 +| epoch 4 | 2123/ 8400 batches | train loss 0.4214867 +| epoch 4 | 2127/ 8400 batches | train loss 0.4083308 +| epoch 4 | 2131/ 8400 batches | train loss 0.4132278 +| epoch 4 | 2135/ 8400 batches | train loss 0.5127577 +| epoch 4 | 2139/ 8400 batches | train loss 0.4765952 +| epoch 4 | 2143/ 8400 batches | train loss 0.3955799 +| epoch 4 | 2147/ 8400 batches | train loss 0.3634178 +| epoch 4 | 2151/ 8400 batches | train loss 0.4080905 +| epoch 4 | 2155/ 8400 batches | train loss 0.4675519 +| epoch 4 | 2159/ 8400 batches | train loss 0.3645905 +| epoch 4 | 2163/ 8400 batches | train loss 0.4863575 +| epoch 4 | 2167/ 8400 batches | train loss 0.4488544 +| epoch 4 | 2171/ 8400 batches | train loss 0.4587533 +| epoch 4 | 2175/ 8400 batches | train loss 0.4458793 +| epoch 4 | 2179/ 8400 batches | train loss 0.4013740 +| epoch 4 | 2183/ 8400 batches | train loss 0.4454888 +| epoch 4 | 2187/ 8400 batches | train loss 0.4523716 +| epoch 4 | 2191/ 8400 batches | train loss 0.4031016 +| epoch 4 | 2195/ 8400 batches | train loss 0.3791493 +| epoch 4 | 2199/ 8400 batches | train loss 0.4390596 +| epoch 4 | 2203/ 8400 batches | train loss 0.4378814 +| epoch 4 | 2207/ 8400 batches | train loss 0.4456995 +| epoch 4 | 2211/ 8400 batches | train loss 0.3963755 +| epoch 4 | 2215/ 8400 batches | train loss 0.3759447 +| epoch 4 | 2219/ 8400 batches | train loss 0.4245864 +| epoch 4 | 2223/ 8400 batches | train loss 0.3851196 +| epoch 4 | 2227/ 8400 batches | train loss 0.3563089 +| epoch 4 | 2231/ 8400 batches | train loss 0.4408543 +| epoch 4 | 2235/ 8400 batches | train loss 0.3894345 +| epoch 4 | 2239/ 8400 batches | train loss 0.3931941 +| epoch 4 | 2243/ 8400 batches | train loss 0.3780581 +| epoch 4 | 2247/ 8400 batches | train loss 0.4184580 +| epoch 4 | 2251/ 8400 batches | train loss 0.3590962 +| epoch 4 | 2255/ 8400 batches | train loss 0.4412781 +| epoch 4 | 2259/ 8400 batches | train loss 0.4140923 +| epoch 4 | 2263/ 8400 batches | train loss 0.4154480 +| epoch 4 | 2267/ 8400 batches | train loss 0.3720126 +| epoch 4 | 2271/ 8400 batches | train loss 0.4022622 +| epoch 4 | 2275/ 8400 batches | train loss 0.4656912 +| epoch 4 | 2279/ 8400 batches | train loss 0.3818513 +| epoch 4 | 2283/ 8400 batches | train loss 0.3810713 +| epoch 4 | 2287/ 8400 batches | train loss 0.4147546 +| epoch 4 | 2291/ 8400 batches | train loss 0.3690504 +| epoch 4 | 2295/ 8400 batches | train loss 0.4426138 +| epoch 4 | 2299/ 8400 batches | train loss 0.3409059 +| epoch 4 | 2303/ 8400 batches | train loss 0.3682172 +| epoch 4 | 2307/ 8400 batches | train loss 0.3172790 +| epoch 4 | 2311/ 8400 batches | train loss 0.3998615 +| epoch 4 | 2315/ 8400 batches | train loss 0.4703106 +| epoch 4 | 2319/ 8400 batches | train loss 0.2664122 +| epoch 4 | 2323/ 8400 batches | train loss 0.4697964 +| epoch 4 | 2327/ 8400 batches | train loss 0.4006893 +| epoch 4 | 2331/ 8400 batches | train loss 0.4081721 +| epoch 4 | 2335/ 8400 batches | train loss 0.3594357 +| epoch 4 | 2339/ 8400 batches | train loss 0.3991102 +| epoch 4 | 2343/ 8400 batches | train loss 0.4131203 +| epoch 4 | 2347/ 8400 batches | train loss 0.4460919 +| epoch 4 | 2351/ 8400 batches | train loss 0.4133761 +| epoch 4 | 2355/ 8400 batches | train loss 0.3685126 +| epoch 4 | 2359/ 8400 batches | train loss 0.3775850 +| epoch 4 | 2363/ 8400 batches | train loss 0.4593634 +| epoch 4 | 2367/ 8400 batches | train loss 0.4032910 +| epoch 4 | 2371/ 8400 batches | train loss 0.3687059 +| epoch 4 | 2375/ 8400 batches | train loss 0.4086692 +| epoch 4 | 2379/ 8400 batches | train loss 0.2994785 +| epoch 4 | 2383/ 8400 batches | train loss 0.4275791 +| epoch 4 | 2387/ 8400 batches | train loss 0.3665806 +| epoch 4 | 2391/ 8400 batches | train loss 0.4400147 +| epoch 4 | 2395/ 8400 batches | train loss 0.4440307 +| epoch 4 | 2399/ 8400 batches | train loss 0.4363187 +| epoch 4 | 2403/ 8400 batches | train loss 0.3840179 +| epoch 4 | 2407/ 8400 batches | train loss 0.4130956 +| epoch 4 | 2411/ 8400 batches | train loss 0.4334919 +| epoch 4 | 2415/ 8400 batches | train loss 0.4641358 +| epoch 4 | 2419/ 8400 batches | train loss 0.4586369 +| epoch 4 | 2423/ 8400 batches | train loss 0.4857319 +| epoch 4 | 2427/ 8400 batches | train loss 0.4163045 +| epoch 4 | 2431/ 8400 batches | train loss 0.4418001 +| epoch 4 | 2435/ 8400 batches | train loss 0.5211638 +| epoch 4 | 2439/ 8400 batches | train loss 0.4823262 +| epoch 4 | 2443/ 8400 batches | train loss 0.4342505 +| epoch 4 | 2447/ 8400 batches | train loss 0.3718773 +| epoch 4 | 2451/ 8400 batches | train loss 0.4314224 +| epoch 4 | 2455/ 8400 batches | train loss 0.4409936 +| epoch 4 | 2459/ 8400 batches | train loss 0.4277785 +| epoch 4 | 2463/ 8400 batches | train loss 0.4525014 +| epoch 4 | 2467/ 8400 batches | train loss 0.4864857 +| epoch 4 | 2471/ 8400 batches | train loss 0.4269811 +| epoch 4 | 2475/ 8400 batches | train loss 0.4463421 +| epoch 4 | 2479/ 8400 batches | train loss 0.3627416 +| epoch 4 | 2483/ 8400 batches | train loss 0.3821005 +| epoch 4 | 2487/ 8400 batches | train loss 0.3484793 +| epoch 4 | 2491/ 8400 batches | train loss 0.3571365 +| epoch 4 | 2495/ 8400 batches | train loss 0.4812956 +| epoch 4 | 2499/ 8400 batches | train loss 0.3870448 +| epoch 4 | 2503/ 8400 batches | train loss 0.4095666 +| epoch 4 | 2507/ 8400 batches | train loss 0.4422169 +| epoch 4 | 2511/ 8400 batches | train loss 0.3877973 +| epoch 4 | 2515/ 8400 batches | train loss 0.4274673 +| epoch 4 | 2519/ 8400 batches | train loss 0.3533845 +| epoch 4 | 2523/ 8400 batches | train loss 0.4525247 +| epoch 4 | 2527/ 8400 batches | train loss 0.4249533 +| epoch 4 | 2531/ 8400 batches | train loss 0.4371259 +| epoch 4 | 2535/ 8400 batches | train loss 0.4610544 +| epoch 4 | 2539/ 8400 batches | train loss 0.3666744 +| epoch 4 | 2543/ 8400 batches | train loss 0.3457428 +| epoch 4 | 2547/ 8400 batches | train loss 0.4118536 +| epoch 4 | 2551/ 8400 batches | train loss 0.4048484 +| epoch 4 | 2555/ 8400 batches | train loss 0.4601296 +| epoch 4 | 2559/ 8400 batches | train loss 0.4119819 +| epoch 4 | 2563/ 8400 batches | train loss 0.4413690 +| epoch 4 | 2567/ 8400 batches | train loss 0.4157786 +| epoch 4 | 2571/ 8400 batches | train loss 0.3621331 +| epoch 4 | 2575/ 8400 batches | train loss 0.3612413 +| epoch 4 | 2579/ 8400 batches | train loss 0.4068986 +| epoch 4 | 2583/ 8400 batches | train loss 0.4265789 +| epoch 4 | 2587/ 8400 batches | train loss 0.4351502 +| epoch 4 | 2591/ 8400 batches | train loss 0.3570413 +| epoch 4 | 2595/ 8400 batches | train loss 0.3851882 +| epoch 4 | 2599/ 8400 batches | train loss 0.3803907 +| epoch 4 | 2603/ 8400 batches | train loss 0.4094196 +| epoch 4 | 2607/ 8400 batches | train loss 0.4571862 +| epoch 4 | 2611/ 8400 batches | train loss 0.4036086 +| epoch 4 | 2615/ 8400 batches | train loss 0.4298558 +| epoch 4 | 2619/ 8400 batches | train loss 0.3924481 +| epoch 4 | 2623/ 8400 batches | train loss 0.3882535 +| epoch 4 | 2627/ 8400 batches | train loss 0.5265865 +| epoch 4 | 2631/ 8400 batches | train loss 0.4243666 +| epoch 4 | 2635/ 8400 batches | train loss 0.4424360 +| epoch 4 | 2639/ 8400 batches | train loss 0.4150951 +| epoch 4 | 2643/ 8400 batches | train loss 0.4665308 +| epoch 4 | 2647/ 8400 batches | train loss 0.4885202 +| epoch 4 | 2651/ 8400 batches | train loss 0.3653609 +| epoch 4 | 2655/ 8400 batches | train loss 0.4032482 +| epoch 4 | 2659/ 8400 batches | train loss 0.4263576 +| epoch 4 | 2663/ 8400 batches | train loss 0.3820003 +| epoch 4 | 2667/ 8400 batches | train loss 0.3549582 +| epoch 4 | 2671/ 8400 batches | train loss 0.3576797 +| epoch 4 | 2675/ 8400 batches | train loss 0.5059990 +| epoch 4 | 2679/ 8400 batches | train loss 0.3959746 +| epoch 4 | 2683/ 8400 batches | train loss 0.4248880 +| epoch 4 | 2687/ 8400 batches | train loss 0.3566818 +| epoch 4 | 2691/ 8400 batches | train loss 0.3883407 +| epoch 4 | 2695/ 8400 batches | train loss 0.3909492 +| epoch 4 | 2699/ 8400 batches | train loss 0.4625016 +| epoch 4 | 2703/ 8400 batches | train loss 0.3611713 +| epoch 4 | 2707/ 8400 batches | train loss 0.4201431 +| epoch 4 | 2711/ 8400 batches | train loss 0.3431580 +| epoch 4 | 2715/ 8400 batches | train loss 0.4051198 +| epoch 4 | 2719/ 8400 batches | train loss 0.4685678 +| epoch 4 | 2723/ 8400 batches | train loss 0.4449142 +| epoch 4 | 2727/ 8400 batches | train loss 0.6033585 +| epoch 4 | 2731/ 8400 batches | train loss 0.4382191 +| epoch 4 | 2735/ 8400 batches | train loss 0.3495726 +| epoch 4 | 2739/ 8400 batches | train loss 0.4002575 +| epoch 4 | 2743/ 8400 batches | train loss 0.4767329 +| epoch 4 | 2747/ 8400 batches | train loss 0.4903154 +| epoch 4 | 2751/ 8400 batches | train loss 0.3566936 +| epoch 4 | 2755/ 8400 batches | train loss 0.4571393 +| epoch 4 | 2759/ 8400 batches | train loss 0.4223784 +| epoch 4 | 2763/ 8400 batches | train loss 0.5259650 +| epoch 4 | 2767/ 8400 batches | train loss 0.4219395 +| epoch 4 | 2771/ 8400 batches | train loss 0.3584664 +| epoch 4 | 2775/ 8400 batches | train loss 0.4741451 +| epoch 4 | 2779/ 8400 batches | train loss 0.4711213 +| epoch 4 | 2783/ 8400 batches | train loss 0.4167042 +| epoch 4 | 2787/ 8400 batches | train loss 0.3984131 +| epoch 4 | 2791/ 8400 batches | train loss 0.4886111 +| epoch 4 | 2795/ 8400 batches | train loss 0.3926051 +| epoch 4 | 2799/ 8400 batches | train loss 0.3687599 +| epoch 4 | 2803/ 8400 batches | train loss 0.4114999 +| epoch 4 | 2807/ 8400 batches | train loss 0.4017787 +| epoch 4 | 2811/ 8400 batches | train loss 0.4196537 +| epoch 4 | 2815/ 8400 batches | train loss 0.4074268 +| epoch 4 | 2819/ 8400 batches | train loss 0.4502900 +| epoch 4 | 2823/ 8400 batches | train loss 0.3937156 +| epoch 4 | 2827/ 8400 batches | train loss 0.4222986 +| epoch 4 | 2831/ 8400 batches | train loss 0.4662248 +| epoch 4 | 2835/ 8400 batches | train loss 0.4663895 +| epoch 4 | 2839/ 8400 batches | train loss 0.4614444 +| epoch 4 | 2843/ 8400 batches | train loss 0.2977835 +| epoch 4 | 2847/ 8400 batches | train loss 0.4089286 +| epoch 4 | 2851/ 8400 batches | train loss 0.4064608 +| epoch 4 | 2855/ 8400 batches | train loss 0.4546776 +| epoch 4 | 2859/ 8400 batches | train loss 0.1999888 +| epoch 4 | 2863/ 8400 batches | train loss 0.3810314 +| epoch 4 | 2867/ 8400 batches | train loss 0.3361663 +| epoch 4 | 2871/ 8400 batches | train loss 0.3861985 +| epoch 4 | 2875/ 8400 batches | train loss 0.3675568 +| epoch 4 | 2879/ 8400 batches | train loss 0.3715824 +| epoch 4 | 2883/ 8400 batches | train loss 0.3882042 +| epoch 4 | 2887/ 8400 batches | train loss 0.4080533 +| epoch 4 | 2891/ 8400 batches | train loss 0.3938091 +| epoch 4 | 2895/ 8400 batches | train loss 0.2860805 +| epoch 4 | 2899/ 8400 batches | train loss 0.4392670 +| epoch 4 | 2903/ 8400 batches | train loss 0.4507629 +| epoch 4 | 2907/ 8400 batches | train loss 0.4473121 +| epoch 4 | 2911/ 8400 batches | train loss 0.4125431 +| epoch 4 | 2915/ 8400 batches | train loss 0.4886345 +| epoch 4 | 2919/ 8400 batches | train loss 0.4744732 +| epoch 4 | 2923/ 8400 batches | train loss 0.4322107 +| epoch 4 | 2927/ 8400 batches | train loss 0.4280143 +| epoch 4 | 2931/ 8400 batches | train loss 0.4063632 +| epoch 4 | 2935/ 8400 batches | train loss 0.4186508 +| epoch 4 | 2939/ 8400 batches | train loss 0.4389027 +| epoch 4 | 2943/ 8400 batches | train loss 0.4302064 +| epoch 4 | 2947/ 8400 batches | train loss 0.4125412 +| epoch 4 | 2951/ 8400 batches | train loss 0.4408848 +| epoch 4 | 2955/ 8400 batches | train loss 0.3416233 +| epoch 4 | 2959/ 8400 batches | train loss 0.4373837 +| epoch 4 | 2963/ 8400 batches | train loss 0.3774152 +| epoch 4 | 2967/ 8400 batches | train loss 0.4646848 +| epoch 4 | 2971/ 8400 batches | train loss 0.3829241 +| epoch 4 | 2975/ 8400 batches | train loss 0.3468503 +| epoch 4 | 2979/ 8400 batches | train loss 0.4869475 +| epoch 4 | 2983/ 8400 batches | train loss 0.4076281 +| epoch 4 | 2987/ 8400 batches | train loss 0.3855651 +| epoch 4 | 2991/ 8400 batches | train loss 0.4364104 +| epoch 4 | 2995/ 8400 batches | train loss 0.3652155 +| epoch 4 | 2999/ 8400 batches | train loss 0.5072603 +| epoch 4 | 3003/ 8400 batches | train loss 0.4117108 +| epoch 4 | 3007/ 8400 batches | train loss 0.4400720 +| epoch 4 | 3011/ 8400 batches | train loss 0.3775637 +| epoch 4 | 3015/ 8400 batches | train loss 0.4440508 +| epoch 4 | 3019/ 8400 batches | train loss 0.4230571 +| epoch 4 | 3023/ 8400 batches | train loss 0.4041026 +| epoch 4 | 3027/ 8400 batches | train loss 0.4427864 +| epoch 4 | 3031/ 8400 batches | train loss 0.4239852 +| epoch 4 | 3035/ 8400 batches | train loss 0.4621433 +| epoch 4 | 3039/ 8400 batches | train loss 0.4883300 +| epoch 4 | 3043/ 8400 batches | train loss 0.4257975 +| epoch 4 | 3047/ 8400 batches | train loss 0.3678156 +| epoch 4 | 3051/ 8400 batches | train loss 0.4751696 +| epoch 4 | 3055/ 8400 batches | train loss 0.4451501 +| epoch 4 | 3059/ 8400 batches | train loss 0.4683719 +| epoch 4 | 3063/ 8400 batches | train loss 0.4189243 +| epoch 4 | 3067/ 8400 batches | train loss 0.4083845 +| epoch 4 | 3071/ 8400 batches | train loss 0.3688815 +| epoch 4 | 3075/ 8400 batches | train loss 0.3687821 +| epoch 4 | 3079/ 8400 batches | train loss 0.3874044 +| epoch 4 | 3083/ 8400 batches | train loss 0.4999610 +| epoch 4 | 3087/ 8400 batches | train loss 0.3852431 +| epoch 4 | 3091/ 8400 batches | train loss 0.3870397 +| epoch 4 | 3095/ 8400 batches | train loss 0.3840301 +| epoch 4 | 3099/ 8400 batches | train loss 0.4455763 +| epoch 4 | 3103/ 8400 batches | train loss 0.4831408 +| epoch 4 | 3107/ 8400 batches | train loss 0.4564713 +| epoch 4 | 3111/ 8400 batches | train loss 0.4003755 +| epoch 4 | 3115/ 8400 batches | train loss 0.4693187 +| epoch 4 | 3119/ 8400 batches | train loss 0.3654432 +| epoch 4 | 3123/ 8400 batches | train loss 0.4217390 +| epoch 4 | 3127/ 8400 batches | train loss 0.3938797 +| epoch 4 | 3131/ 8400 batches | train loss 0.4624801 +| epoch 4 | 3135/ 8400 batches | train loss 0.3845765 +| epoch 4 | 3139/ 8400 batches | train loss 0.4393333 +| epoch 4 | 3143/ 8400 batches | train loss 0.4743010 +| epoch 4 | 3147/ 8400 batches | train loss 0.3598067 +| epoch 4 | 3151/ 8400 batches | train loss 0.4299434 +| epoch 4 | 3155/ 8400 batches | train loss 0.4735620 +| epoch 4 | 3159/ 8400 batches | train loss 0.3281854 +| epoch 4 | 3163/ 8400 batches | train loss 0.4326640 +| epoch 4 | 3167/ 8400 batches | train loss 0.4870673 +| epoch 4 | 3171/ 8400 batches | train loss 0.3478505 +| epoch 4 | 3175/ 8400 batches | train loss 0.3686488 +| epoch 4 | 3179/ 8400 batches | train loss 0.4035214 +| epoch 4 | 3183/ 8400 batches | train loss 0.5144723 +| epoch 4 | 3187/ 8400 batches | train loss 0.4505150 +| epoch 4 | 3191/ 8400 batches | train loss 0.3491422 +| epoch 4 | 3195/ 8400 batches | train loss 0.2835103 +| epoch 4 | 3199/ 8400 batches | train loss 0.5008584 +| epoch 4 | 3203/ 8400 batches | train loss 0.3933983 +| epoch 4 | 3207/ 8400 batches | train loss 0.4135214 +| epoch 4 | 3211/ 8400 batches | train loss 0.4140550 +| epoch 4 | 3215/ 8400 batches | train loss 0.3849527 +| epoch 4 | 3219/ 8400 batches | train loss 0.4680731 +| epoch 4 | 3223/ 8400 batches | train loss 0.4981446 +| epoch 4 | 3227/ 8400 batches | train loss 0.4242877 +| epoch 4 | 3231/ 8400 batches | train loss 0.4428618 +| epoch 4 | 3235/ 8400 batches | train loss 0.4091351 +| epoch 4 | 3239/ 8400 batches | train loss 0.4481752 +| epoch 4 | 3243/ 8400 batches | train loss 0.3768117 +| epoch 4 | 3247/ 8400 batches | train loss 0.4411800 +| epoch 4 | 3251/ 8400 batches | train loss 0.4091699 +| epoch 4 | 3255/ 8400 batches | train loss 0.4769952 +| epoch 4 | 3259/ 8400 batches | train loss 0.3856646 +| epoch 4 | 3263/ 8400 batches | train loss 0.3887368 +| epoch 4 | 3267/ 8400 batches | train loss 0.3781585 +| epoch 4 | 3271/ 8400 batches | train loss 0.3258487 +| epoch 4 | 3275/ 8400 batches | train loss 0.5436555 +| epoch 4 | 3279/ 8400 batches | train loss 0.4159019 +| epoch 4 | 3283/ 8400 batches | train loss 0.4056463 +| epoch 4 | 3287/ 8400 batches | train loss 0.4056996 +| epoch 4 | 3291/ 8400 batches | train loss 0.4034027 +| epoch 4 | 3295/ 8400 batches | train loss 0.4091363 +| epoch 4 | 3299/ 8400 batches | train loss 0.4466612 +| epoch 4 | 3303/ 8400 batches | train loss 0.4652999 +| epoch 4 | 3307/ 8400 batches | train loss 0.4116864 +| epoch 4 | 3311/ 8400 batches | train loss 0.4336420 +| epoch 4 | 3315/ 8400 batches | train loss 0.3660874 +| epoch 4 | 3319/ 8400 batches | train loss 0.3504260 +| epoch 4 | 3323/ 8400 batches | train loss 0.3872929 +| epoch 4 | 3327/ 8400 batches | train loss 0.3965423 +| epoch 4 | 3331/ 8400 batches | train loss 0.4107655 +| epoch 4 | 3335/ 8400 batches | train loss 0.4341398 +| epoch 4 | 3339/ 8400 batches | train loss 0.4460876 +| epoch 4 | 3343/ 8400 batches | train loss 0.5226185 +| epoch 4 | 3347/ 8400 batches | train loss 0.3984032 +| epoch 4 | 3351/ 8400 batches | train loss 0.4591290 +| epoch 4 | 3355/ 8400 batches | train loss 0.5010569 +| epoch 4 | 3359/ 8400 batches | train loss 0.4324121 +| epoch 4 | 3363/ 8400 batches | train loss 0.3957241 +| epoch 4 | 3367/ 8400 batches | train loss 0.4274285 +| epoch 4 | 3371/ 8400 batches | train loss 0.3779030 +| epoch 4 | 3375/ 8400 batches | train loss 0.3728356 +| epoch 4 | 3379/ 8400 batches | train loss 0.4094099 +| epoch 4 | 3383/ 8400 batches | train loss 0.4524478 +| epoch 4 | 3387/ 8400 batches | train loss 0.3480811 +| epoch 4 | 3391/ 8400 batches | train loss 0.4270226 +| epoch 4 | 3395/ 8400 batches | train loss 0.3776846 +| epoch 4 | 3399/ 8400 batches | train loss 0.4651729 +| epoch 4 | 3403/ 8400 batches | train loss 0.3949957 +| epoch 4 | 3407/ 8400 batches | train loss 0.3939026 +| epoch 4 | 3411/ 8400 batches | train loss 0.4832631 +| epoch 4 | 3415/ 8400 batches | train loss 0.3869405 +| epoch 4 | 3419/ 8400 batches | train loss 0.3836241 +| epoch 4 | 3423/ 8400 batches | train loss 0.4551652 +| epoch 4 | 3427/ 8400 batches | train loss 0.4526220 +| epoch 4 | 3431/ 8400 batches | train loss 0.3933750 +| epoch 4 | 3435/ 8400 batches | train loss 0.3793714 +| epoch 4 | 3439/ 8400 batches | train loss 0.4962791 +| epoch 4 | 3443/ 8400 batches | train loss 0.4224555 +| epoch 4 | 3447/ 8400 batches | train loss 0.4308376 +| epoch 4 | 3451/ 8400 batches | train loss 0.3779808 +| epoch 4 | 3455/ 8400 batches | train loss 0.4428584 +| epoch 4 | 3459/ 8400 batches | train loss 0.4070317 +| epoch 4 | 3463/ 8400 batches | train loss 0.4492293 +| epoch 4 | 3467/ 8400 batches | train loss 0.4023001 +| epoch 4 | 3471/ 8400 batches | train loss 0.3704171 +| epoch 4 | 3475/ 8400 batches | train loss 0.4827487 +| epoch 4 | 3479/ 8400 batches | train loss 0.3993930 +| epoch 4 | 3483/ 8400 batches | train loss 0.4115998 +| epoch 4 | 3487/ 8400 batches | train loss 0.4568737 +| epoch 4 | 3491/ 8400 batches | train loss 0.3888425 +| epoch 4 | 3495/ 8400 batches | train loss 0.3983206 +| epoch 4 | 3499/ 8400 batches | train loss 0.4409790 +| epoch 4 | 3503/ 8400 batches | train loss 0.3911090 +| epoch 4 | 3507/ 8400 batches | train loss 0.4489732 +| epoch 4 | 3511/ 8400 batches | train loss 0.2060282 +| epoch 4 | 3515/ 8400 batches | train loss 0.4066453 +| epoch 4 | 3519/ 8400 batches | train loss 0.4723527 +| epoch 4 | 3523/ 8400 batches | train loss 0.4155204 +| epoch 4 | 3527/ 8400 batches | train loss 0.4133646 +| epoch 4 | 3531/ 8400 batches | train loss 0.3467073 +| epoch 4 | 3535/ 8400 batches | train loss 0.4422851 +| epoch 4 | 3539/ 8400 batches | train loss 0.3917217 +| epoch 4 | 3543/ 8400 batches | train loss 0.4525858 +| epoch 4 | 3547/ 8400 batches | train loss 0.4051102 +| epoch 4 | 3551/ 8400 batches | train loss 0.4188965 +| epoch 4 | 3555/ 8400 batches | train loss 0.3857713 +| epoch 4 | 3559/ 8400 batches | train loss 0.4870083 +| epoch 4 | 3563/ 8400 batches | train loss 0.4054637 +| epoch 4 | 3567/ 8400 batches | train loss 0.4228815 +| epoch 4 | 3571/ 8400 batches | train loss 0.4416664 +| epoch 4 | 3575/ 8400 batches | train loss 0.4269339 +| epoch 4 | 3579/ 8400 batches | train loss 0.3992640 +| epoch 4 | 3583/ 8400 batches | train loss 0.3735234 +| epoch 4 | 3587/ 8400 batches | train loss 0.4801037 +| epoch 4 | 3591/ 8400 batches | train loss 0.3593550 +| epoch 4 | 3595/ 8400 batches | train loss 0.3023711 +| epoch 4 | 3599/ 8400 batches | train loss 0.4298867 +| epoch 4 | 3603/ 8400 batches | train loss 0.4676791 +| epoch 4 | 3607/ 8400 batches | train loss 0.4233691 +| epoch 4 | 3611/ 8400 batches | train loss 0.4008090 +| epoch 4 | 3615/ 8400 batches | train loss 0.4132325 +| epoch 4 | 3619/ 8400 batches | train loss 0.4831947 +| epoch 4 | 3623/ 8400 batches | train loss 0.4091768 +| epoch 4 | 3627/ 8400 batches | train loss 0.4004737 +| epoch 4 | 3631/ 8400 batches | train loss 0.3291369 +| epoch 4 | 3635/ 8400 batches | train loss 0.3630120 +| epoch 4 | 3639/ 8400 batches | train loss 0.4366699 +| epoch 4 | 3643/ 8400 batches | train loss 0.5259154 +| epoch 4 | 3647/ 8400 batches | train loss 0.5034429 +| epoch 4 | 3651/ 8400 batches | train loss 0.3975719 +| epoch 4 | 3655/ 8400 batches | train loss 0.3414751 +| epoch 4 | 3659/ 8400 batches | train loss 0.4787593 +| epoch 4 | 3663/ 8400 batches | train loss 0.4565538 +| epoch 4 | 3667/ 8400 batches | train loss 0.4204066 +| epoch 4 | 3671/ 8400 batches | train loss 0.4397473 +| epoch 4 | 3675/ 8400 batches | train loss 0.5111009 +| epoch 4 | 3679/ 8400 batches | train loss 0.4459023 +| epoch 4 | 3683/ 8400 batches | train loss 0.3572014 +| epoch 4 | 3687/ 8400 batches | train loss 0.4376549 +| epoch 4 | 3691/ 8400 batches | train loss 0.4346403 +| epoch 4 | 3695/ 8400 batches | train loss 0.4842799 +| epoch 4 | 3699/ 8400 batches | train loss 0.3927224 +| epoch 4 | 3703/ 8400 batches | train loss 0.4272631 +| epoch 4 | 3707/ 8400 batches | train loss 0.4751432 +| epoch 4 | 3711/ 8400 batches | train loss 0.3666979 +| epoch 4 | 3715/ 8400 batches | train loss 0.4007412 +| epoch 4 | 3719/ 8400 batches | train loss 0.3053149 +| epoch 4 | 3723/ 8400 batches | train loss 0.5269612 +| epoch 4 | 3727/ 8400 batches | train loss 0.4048705 +| epoch 4 | 3731/ 8400 batches | train loss 0.3854586 +| epoch 4 | 3735/ 8400 batches | train loss 0.3445857 +| epoch 4 | 3739/ 8400 batches | train loss 0.4488396 +| epoch 4 | 3743/ 8400 batches | train loss 0.4793410 +| epoch 4 | 3747/ 8400 batches | train loss 0.4327419 +| epoch 4 | 3751/ 8400 batches | train loss 0.4720801 +| epoch 4 | 3755/ 8400 batches | train loss 0.3950850 +| epoch 4 | 3759/ 8400 batches | train loss 0.4533178 +| epoch 4 | 3763/ 8400 batches | train loss 0.4085871 +| epoch 4 | 3767/ 8400 batches | train loss 0.4002294 +| epoch 4 | 3771/ 8400 batches | train loss 0.4314652 +| epoch 4 | 3775/ 8400 batches | train loss 0.4429258 +| epoch 4 | 3779/ 8400 batches | train loss 0.4525704 +| epoch 4 | 3783/ 8400 batches | train loss 0.4848570 +| epoch 4 | 3787/ 8400 batches | train loss 0.3941830 +| epoch 4 | 3791/ 8400 batches | train loss 0.4119557 +| epoch 4 | 3795/ 8400 batches | train loss 0.4111979 +| epoch 4 | 3799/ 8400 batches | train loss 0.3899562 +| epoch 4 | 3803/ 8400 batches | train loss 0.3513546 +| epoch 4 | 3807/ 8400 batches | train loss 0.3101423 +| epoch 4 | 3811/ 8400 batches | train loss 0.3739613 +| epoch 4 | 3815/ 8400 batches | train loss 0.3879176 +| epoch 4 | 3819/ 8400 batches | train loss 0.3886907 +| epoch 4 | 3823/ 8400 batches | train loss 0.4389945 +| epoch 4 | 3827/ 8400 batches | train loss 0.3986944 +| epoch 4 | 3831/ 8400 batches | train loss 0.3622794 +| epoch 4 | 3835/ 8400 batches | train loss 0.3836834 +| epoch 4 | 3839/ 8400 batches | train loss 0.4216721 +| epoch 4 | 3843/ 8400 batches | train loss 0.3907187 +| epoch 4 | 3847/ 8400 batches | train loss 0.4399406 +| epoch 4 | 3851/ 8400 batches | train loss 0.3782063 +| epoch 4 | 3855/ 8400 batches | train loss 0.3823020 +| epoch 4 | 3859/ 8400 batches | train loss 0.5112603 +| epoch 4 | 3863/ 8400 batches | train loss 0.4424102 +| epoch 4 | 3867/ 8400 batches | train loss 0.4100693 +| epoch 4 | 3871/ 8400 batches | train loss 0.3788882 +| epoch 4 | 3875/ 8400 batches | train loss 0.3549196 +| epoch 4 | 3879/ 8400 batches | train loss 0.3892424 +| epoch 4 | 3883/ 8400 batches | train loss 0.3695137 +| epoch 4 | 3887/ 8400 batches | train loss 0.4435688 +| epoch 4 | 3891/ 8400 batches | train loss 0.4028544 +| epoch 4 | 3895/ 8400 batches | train loss 0.4968684 +| epoch 4 | 3899/ 8400 batches | train loss 0.4754418 +| epoch 4 | 3903/ 8400 batches | train loss 0.4450870 +| epoch 4 | 3907/ 8400 batches | train loss 0.2952068 +| epoch 4 | 3911/ 8400 batches | train loss 0.4564338 +| epoch 4 | 3915/ 8400 batches | train loss 0.4402645 +| epoch 4 | 3919/ 8400 batches | train loss 0.4437817 +| epoch 4 | 3923/ 8400 batches | train loss 0.4236561 +| epoch 4 | 3927/ 8400 batches | train loss 0.4425639 +| epoch 4 | 3931/ 8400 batches | train loss 0.4890124 +| epoch 4 | 3935/ 8400 batches | train loss 0.4672004 +| epoch 4 | 3939/ 8400 batches | train loss 0.3781010 +| epoch 4 | 3943/ 8400 batches | train loss 0.4158883 +| epoch 4 | 3947/ 8400 batches | train loss 0.4671612 +| epoch 4 | 3951/ 8400 batches | train loss 0.4406732 +| epoch 4 | 3955/ 8400 batches | train loss 0.3323492 +| epoch 4 | 3959/ 8400 batches | train loss 0.4389936 +| epoch 4 | 3963/ 8400 batches | train loss 0.4421006 +| epoch 4 | 3967/ 8400 batches | train loss 0.3716131 +| epoch 4 | 3971/ 8400 batches | train loss 0.3859719 +| epoch 4 | 3975/ 8400 batches | train loss 0.3349609 +| epoch 4 | 3979/ 8400 batches | train loss 0.4554691 +| epoch 4 | 3983/ 8400 batches | train loss 0.3149126 +| epoch 4 | 3987/ 8400 batches | train loss 0.3588722 +| epoch 4 | 3991/ 8400 batches | train loss 0.4337630 +| epoch 4 | 3995/ 8400 batches | train loss 0.3674546 +| epoch 4 | 3999/ 8400 batches | train loss 0.3862978 +| epoch 4 | 4003/ 8400 batches | train loss 0.3732492 +| epoch 4 | 4007/ 8400 batches | train loss 0.3653289 +| epoch 4 | 4011/ 8400 batches | train loss 0.3609504 +| epoch 4 | 4015/ 8400 batches | train loss 0.4410775 +| epoch 4 | 4019/ 8400 batches | train loss 0.4012244 +| epoch 4 | 4023/ 8400 batches | train loss 0.5223273 +| epoch 4 | 4027/ 8400 batches | train loss 0.3491798 +| epoch 4 | 4031/ 8400 batches | train loss 0.4757296 +| epoch 4 | 4035/ 8400 batches | train loss 0.4569734 +| epoch 4 | 4039/ 8400 batches | train loss 0.4024704 +| epoch 4 | 4043/ 8400 batches | train loss 0.4710162 +| epoch 4 | 4047/ 8400 batches | train loss 0.5035806 +| epoch 4 | 4051/ 8400 batches | train loss 0.4058856 +| epoch 4 | 4055/ 8400 batches | train loss 0.4595916 +| epoch 4 | 4059/ 8400 batches | train loss 0.4469716 +| epoch 4 | 4063/ 8400 batches | train loss 0.4934871 +| epoch 4 | 4067/ 8400 batches | train loss 0.4480666 +| epoch 4 | 4071/ 8400 batches | train loss 0.3510290 +| epoch 4 | 4075/ 8400 batches | train loss 0.3363369 +| epoch 4 | 4079/ 8400 batches | train loss 0.4476732 +| epoch 4 | 4083/ 8400 batches | train loss 0.3967434 +| epoch 4 | 4087/ 8400 batches | train loss 0.5110524 +| epoch 4 | 4091/ 8400 batches | train loss 0.3845339 +| epoch 4 | 4095/ 8400 batches | train loss 0.3727357 +| epoch 4 | 4099/ 8400 batches | train loss 0.4510589 +| epoch 4 | 4103/ 8400 batches | train loss 0.4082730 +| epoch 4 | 4107/ 8400 batches | train loss 0.3974631 +| epoch 4 | 4111/ 8400 batches | train loss 0.3902041 +| epoch 4 | 4115/ 8400 batches | train loss 0.4568363 +| epoch 4 | 4119/ 8400 batches | train loss 0.3994294 +| epoch 4 | 4123/ 8400 batches | train loss 0.3908883 +| epoch 4 | 4127/ 8400 batches | train loss 0.3881307 +| epoch 4 | 4131/ 8400 batches | train loss 0.3192787 +| epoch 4 | 4135/ 8400 batches | train loss 0.3631756 +| epoch 4 | 4139/ 8400 batches | train loss 0.4293251 +| epoch 4 | 4143/ 8400 batches | train loss 0.4646492 +| epoch 4 | 4147/ 8400 batches | train loss 0.4501648 +| epoch 4 | 4151/ 8400 batches | train loss 0.3511296 +| epoch 4 | 4155/ 8400 batches | train loss 0.4220756 +| epoch 4 | 4159/ 8400 batches | train loss 0.4240253 +| epoch 4 | 4163/ 8400 batches | train loss 0.2967498 +| epoch 4 | 4167/ 8400 batches | train loss 0.4120102 +| epoch 4 | 4171/ 8400 batches | train loss 0.4328113 +| epoch 4 | 4175/ 8400 batches | train loss 0.3769087 +| epoch 4 | 4179/ 8400 batches | train loss 0.2625770 +| epoch 4 | 4183/ 8400 batches | train loss 0.4086582 +| epoch 4 | 4187/ 8400 batches | train loss 0.4511029 +| epoch 4 | 4191/ 8400 batches | train loss 0.4630453 +| epoch 4 | 4195/ 8400 batches | train loss 0.1651094 +| epoch 4 | 4199/ 8400 batches | train loss 0.4783599 +| epoch 4 | 4203/ 8400 batches | train loss 0.3761642 +| epoch 4 | 4207/ 8400 batches | train loss 0.4013549 +| epoch 4 | 4211/ 8400 batches | train loss 0.3623404 +| epoch 4 | 4215/ 8400 batches | train loss 0.4084453 +| epoch 4 | 4219/ 8400 batches | train loss 0.3858016 +| epoch 4 | 4223/ 8400 batches | train loss 0.4726909 +| epoch 4 | 4227/ 8400 batches | train loss 0.4919816 +| epoch 4 | 4231/ 8400 batches | train loss 0.4773221 +| epoch 4 | 4235/ 8400 batches | train loss 0.4447165 +| epoch 4 | 4239/ 8400 batches | train loss 0.4512951 +| epoch 4 | 4243/ 8400 batches | train loss 0.3827592 +| epoch 4 | 4247/ 8400 batches | train loss 0.4075868 +| epoch 4 | 4251/ 8400 batches | train loss 0.4427724 +| epoch 4 | 4255/ 8400 batches | train loss 0.3482966 +| epoch 4 | 4259/ 8400 batches | train loss 0.4527882 +| epoch 4 | 4263/ 8400 batches | train loss 0.3763696 +| epoch 4 | 4267/ 8400 batches | train loss 0.4021733 +| epoch 4 | 4271/ 8400 batches | train loss 0.4553432 +| epoch 4 | 4275/ 8400 batches | train loss 0.4358488 +| epoch 4 | 4279/ 8400 batches | train loss 0.5892720 +| epoch 4 | 4283/ 8400 batches | train loss 0.3742928 +| epoch 4 | 4287/ 8400 batches | train loss 0.4854721 +| epoch 4 | 4291/ 8400 batches | train loss 0.4730308 +| epoch 4 | 4295/ 8400 batches | train loss 0.3637975 +| epoch 4 | 4299/ 8400 batches | train loss 0.4412740 +| epoch 4 | 4303/ 8400 batches | train loss 0.4530545 +| epoch 4 | 4307/ 8400 batches | train loss 0.3561410 +| epoch 4 | 4311/ 8400 batches | train loss 0.4389195 +| epoch 4 | 4315/ 8400 batches | train loss 0.3898017 +| epoch 4 | 4319/ 8400 batches | train loss 0.3482287 +| epoch 4 | 4323/ 8400 batches | train loss 0.4794357 +| epoch 4 | 4327/ 8400 batches | train loss 0.4422925 +| epoch 4 | 4331/ 8400 batches | train loss 0.4020088 +| epoch 4 | 4335/ 8400 batches | train loss 0.4786743 +| epoch 4 | 4339/ 8400 batches | train loss 0.3867428 +| epoch 4 | 4343/ 8400 batches | train loss 0.5071729 +| epoch 4 | 4347/ 8400 batches | train loss 0.3540088 +| epoch 4 | 4351/ 8400 batches | train loss 0.4046291 +| epoch 4 | 4355/ 8400 batches | train loss 0.4264438 +| epoch 4 | 4359/ 8400 batches | train loss 0.3862260 +| epoch 4 | 4363/ 8400 batches | train loss 0.4954306 +| epoch 4 | 4367/ 8400 batches | train loss 0.4510718 +| epoch 4 | 4371/ 8400 batches | train loss 0.4486637 +| epoch 4 | 4375/ 8400 batches | train loss 0.4805099 +| epoch 4 | 4379/ 8400 batches | train loss 0.4149433 +| epoch 4 | 4383/ 8400 batches | train loss 0.3823513 +| epoch 4 | 4387/ 8400 batches | train loss 0.3908048 +| epoch 4 | 4391/ 8400 batches | train loss 0.4466996 +| epoch 4 | 4395/ 8400 batches | train loss 0.4245649 +| epoch 4 | 4399/ 8400 batches | train loss 0.5089067 +| epoch 4 | 4403/ 8400 batches | train loss 0.4083614 +| epoch 4 | 4407/ 8400 batches | train loss 0.2874344 +| epoch 4 | 4411/ 8400 batches | train loss 0.4447168 +| epoch 4 | 4415/ 8400 batches | train loss 0.5169556 +| epoch 4 | 4419/ 8400 batches | train loss 0.3908626 +| epoch 4 | 4423/ 8400 batches | train loss 0.3362079 +| epoch 4 | 4427/ 8400 batches | train loss 0.4049070 +| epoch 4 | 4431/ 8400 batches | train loss 0.3345198 +| epoch 4 | 4435/ 8400 batches | train loss 0.3891635 +| epoch 4 | 4439/ 8400 batches | train loss 0.4127146 +| epoch 4 | 4443/ 8400 batches | train loss 0.4648447 +| epoch 4 | 4447/ 8400 batches | train loss 0.5588483 +| epoch 4 | 4451/ 8400 batches | train loss 0.4084594 +| epoch 4 | 4455/ 8400 batches | train loss 0.4280265 +| epoch 4 | 4459/ 8400 batches | train loss 0.4483850 +| epoch 4 | 4463/ 8400 batches | train loss 0.4659249 +| epoch 4 | 4467/ 8400 batches | train loss 0.4708718 +| epoch 4 | 4471/ 8400 batches | train loss 0.4884421 +| epoch 4 | 4475/ 8400 batches | train loss 0.4458250 +| epoch 4 | 4479/ 8400 batches | train loss 0.4251419 +| epoch 4 | 4483/ 8400 batches | train loss 0.4260715 +| epoch 4 | 4487/ 8400 batches | train loss 0.4734489 +| epoch 4 | 4491/ 8400 batches | train loss 0.5725158 +| epoch 4 | 4495/ 8400 batches | train loss 0.4433520 +| epoch 4 | 4499/ 8400 batches | train loss 0.4535120 +| epoch 4 | 4503/ 8400 batches | train loss 0.4302741 +| epoch 4 | 4507/ 8400 batches | train loss 0.4169793 +| epoch 4 | 4511/ 8400 batches | train loss 0.4049386 +| epoch 4 | 4515/ 8400 batches | train loss 0.4523040 +| epoch 4 | 4519/ 8400 batches | train loss 0.4417014 +| epoch 4 | 4523/ 8400 batches | train loss 0.4100343 +| epoch 4 | 4527/ 8400 batches | train loss 0.4152349 +| epoch 4 | 4531/ 8400 batches | train loss 0.4756121 +| epoch 4 | 4535/ 8400 batches | train loss 0.3815087 +| epoch 4 | 4539/ 8400 batches | train loss 0.3666453 +| epoch 4 | 4543/ 8400 batches | train loss 0.4363488 +| epoch 4 | 4547/ 8400 batches | train loss 0.3703486 +| epoch 4 | 4551/ 8400 batches | train loss 0.3971298 +| epoch 4 | 4555/ 8400 batches | train loss 0.4672128 +| epoch 4 | 4559/ 8400 batches | train loss 0.4264669 +| epoch 4 | 4563/ 8400 batches | train loss 0.4231305 +| epoch 4 | 4567/ 8400 batches | train loss 0.4153859 +| epoch 4 | 4571/ 8400 batches | train loss 0.4569786 +| epoch 4 | 4575/ 8400 batches | train loss 0.4114566 +| epoch 4 | 4579/ 8400 batches | train loss 0.3870401 +| epoch 4 | 4583/ 8400 batches | train loss 0.4133212 +| epoch 4 | 4587/ 8400 batches | train loss 0.3876987 +| epoch 4 | 4591/ 8400 batches | train loss 0.4213488 +| epoch 4 | 4595/ 8400 batches | train loss 0.4303699 +| epoch 4 | 4599/ 8400 batches | train loss 0.3813841 +| epoch 4 | 4603/ 8400 batches | train loss 0.3479789 +| epoch 4 | 4607/ 8400 batches | train loss 0.3029099 +| epoch 4 | 4611/ 8400 batches | train loss 0.4846924 +| epoch 4 | 4615/ 8400 batches | train loss 0.4329126 +| epoch 4 | 4619/ 8400 batches | train loss 0.4656628 +| epoch 4 | 4623/ 8400 batches | train loss 0.5299397 +| epoch 4 | 4627/ 8400 batches | train loss 0.3982302 +| epoch 4 | 4631/ 8400 batches | train loss 0.3358907 +| epoch 4 | 4635/ 8400 batches | train loss 0.4471242 +| epoch 4 | 4639/ 8400 batches | train loss 0.3858154 +| epoch 4 | 4643/ 8400 batches | train loss 0.4086359 +| epoch 4 | 4647/ 8400 batches | train loss 0.3493141 +| epoch 4 | 4651/ 8400 batches | train loss 0.4275885 +| epoch 4 | 4655/ 8400 batches | train loss 0.3306875 +| epoch 4 | 4659/ 8400 batches | train loss 0.4360405 +| epoch 4 | 4663/ 8400 batches | train loss 0.3631793 +| epoch 4 | 4667/ 8400 batches | train loss 0.4121473 +| epoch 4 | 4671/ 8400 batches | train loss 0.4066014 +| epoch 4 | 4675/ 8400 batches | train loss 0.3980960 +| epoch 4 | 4679/ 8400 batches | train loss 0.4438380 +| epoch 4 | 4683/ 8400 batches | train loss 0.3040125 +| epoch 4 | 4687/ 8400 batches | train loss 0.4010695 +| epoch 4 | 4691/ 8400 batches | train loss 0.4318111 +| epoch 4 | 4695/ 8400 batches | train loss 0.4031559 +| epoch 4 | 4699/ 8400 batches | train loss 0.4097743 +| epoch 4 | 4703/ 8400 batches | train loss 0.3849545 +| epoch 4 | 4707/ 8400 batches | train loss 0.4900526 +| epoch 4 | 4711/ 8400 batches | train loss 0.5305321 +| epoch 4 | 4715/ 8400 batches | train loss 0.2653347 +| epoch 4 | 4719/ 8400 batches | train loss 0.4711556 +| epoch 4 | 4723/ 8400 batches | train loss 0.5049659 +| epoch 4 | 4727/ 8400 batches | train loss 0.4265203 +| epoch 4 | 4731/ 8400 batches | train loss 0.4934627 +| epoch 4 | 4735/ 8400 batches | train loss 0.4300589 +| epoch 4 | 4739/ 8400 batches | train loss 0.4461673 +| epoch 4 | 4743/ 8400 batches | train loss 0.4416638 +| epoch 4 | 4747/ 8400 batches | train loss 0.2891733 +| epoch 4 | 4751/ 8400 batches | train loss 0.4037630 +| epoch 4 | 4755/ 8400 batches | train loss 0.4575748 +| epoch 4 | 4759/ 8400 batches | train loss 0.4286479 +| epoch 4 | 4763/ 8400 batches | train loss 0.3989279 +| epoch 4 | 4767/ 8400 batches | train loss 0.4453625 +| epoch 4 | 4771/ 8400 batches | train loss 0.3378610 +| epoch 4 | 4775/ 8400 batches | train loss 0.5452445 +| epoch 4 | 4779/ 8400 batches | train loss 0.4064617 +| epoch 4 | 4783/ 8400 batches | train loss 0.4695174 +| epoch 4 | 4787/ 8400 batches | train loss 0.3387898 +| epoch 4 | 4791/ 8400 batches | train loss 0.4047583 +| epoch 4 | 4795/ 8400 batches | train loss 0.3812910 +| epoch 4 | 4799/ 8400 batches | train loss 0.3782430 +| epoch 4 | 4803/ 8400 batches | train loss 0.4423445 +| epoch 4 | 4807/ 8400 batches | train loss 0.3722583 +| epoch 4 | 4811/ 8400 batches | train loss 0.4356231 +| epoch 4 | 4815/ 8400 batches | train loss 0.4015161 +| epoch 4 | 4819/ 8400 batches | train loss 0.4402446 +| epoch 4 | 4823/ 8400 batches | train loss 0.3359776 +| epoch 4 | 4827/ 8400 batches | train loss 0.5164189 +| epoch 4 | 4831/ 8400 batches | train loss 0.4747681 +| epoch 4 | 4835/ 8400 batches | train loss 0.4217274 +| epoch 4 | 4839/ 8400 batches | train loss 0.3847592 +| epoch 4 | 4843/ 8400 batches | train loss 0.4774133 +| epoch 4 | 4847/ 8400 batches | train loss 0.3719828 +| epoch 4 | 4851/ 8400 batches | train loss 0.4187745 +| epoch 4 | 4855/ 8400 batches | train loss 0.4721383 +| epoch 4 | 4859/ 8400 batches | train loss 0.4018688 +| epoch 4 | 4863/ 8400 batches | train loss 0.3521723 +| epoch 4 | 4867/ 8400 batches | train loss 0.4450685 +| epoch 4 | 4871/ 8400 batches | train loss 0.4344862 +| epoch 4 | 4875/ 8400 batches | train loss 0.3807382 +| epoch 4 | 4879/ 8400 batches | train loss 0.3836565 +| epoch 4 | 4883/ 8400 batches | train loss 0.4052928 +| epoch 4 | 4887/ 8400 batches | train loss 0.4113188 +| epoch 4 | 4891/ 8400 batches | train loss 0.4318188 +| epoch 4 | 4895/ 8400 batches | train loss 0.3906104 +| epoch 4 | 4899/ 8400 batches | train loss 0.4075308 +| epoch 4 | 4903/ 8400 batches | train loss 0.3978979 +| epoch 4 | 4907/ 8400 batches | train loss 0.4854924 +| epoch 4 | 4911/ 8400 batches | train loss 0.3952683 +| epoch 4 | 4915/ 8400 batches | train loss 0.4133812 +| epoch 4 | 4919/ 8400 batches | train loss 0.4328925 +| epoch 4 | 4923/ 8400 batches | train loss 0.4199337 +| epoch 4 | 4927/ 8400 batches | train loss 0.5187842 +| epoch 4 | 4931/ 8400 batches | train loss 0.4100984 +| epoch 4 | 4935/ 8400 batches | train loss 0.3806044 +| epoch 4 | 4939/ 8400 batches | train loss 0.4457298 +| epoch 4 | 4943/ 8400 batches | train loss 0.4624846 +| epoch 4 | 4947/ 8400 batches | train loss 0.3564779 +| epoch 4 | 4951/ 8400 batches | train loss 0.4366631 +| epoch 4 | 4955/ 8400 batches | train loss 0.3914947 +| epoch 4 | 4959/ 8400 batches | train loss 0.4636726 +| epoch 4 | 4963/ 8400 batches | train loss 0.4120757 +| epoch 4 | 4967/ 8400 batches | train loss 0.3885132 +| epoch 4 | 4971/ 8400 batches | train loss 0.4184952 +| epoch 4 | 4975/ 8400 batches | train loss 0.4223902 +| epoch 4 | 4979/ 8400 batches | train loss 0.4002074 +| epoch 4 | 4983/ 8400 batches | train loss 0.4052045 +| epoch 4 | 4987/ 8400 batches | train loss 0.5021925 +| epoch 4 | 4991/ 8400 batches | train loss 0.5148458 +| epoch 4 | 4995/ 8400 batches | train loss 0.3564010 +| epoch 4 | 4999/ 8400 batches | train loss 0.4208273 +| epoch 4 | 5003/ 8400 batches | train loss 0.4520566 +| epoch 4 | 5007/ 8400 batches | train loss 0.4958116 +| epoch 4 | 5011/ 8400 batches | train loss 0.4035544 +| epoch 4 | 5015/ 8400 batches | train loss 0.3954738 +| epoch 4 | 5019/ 8400 batches | train loss 0.4275880 +| epoch 4 | 5023/ 8400 batches | train loss 0.4231168 +| epoch 4 | 5027/ 8400 batches | train loss 0.3996632 +| epoch 4 | 5031/ 8400 batches | train loss 0.3651542 +| epoch 4 | 5035/ 8400 batches | train loss 0.3681931 +| epoch 4 | 5039/ 8400 batches | train loss 0.4008151 +| epoch 4 | 5043/ 8400 batches | train loss 0.4733796 +| epoch 4 | 5047/ 8400 batches | train loss 0.4118138 +| epoch 4 | 5051/ 8400 batches | train loss 0.3848284 +| epoch 4 | 5055/ 8400 batches | train loss 0.4236087 +| epoch 4 | 5059/ 8400 batches | train loss 0.4434066 +| epoch 4 | 5063/ 8400 batches | train loss 0.3323950 +| epoch 4 | 5067/ 8400 batches | train loss 0.4703171 +| epoch 4 | 5071/ 8400 batches | train loss 0.4209989 +| epoch 4 | 5075/ 8400 batches | train loss 0.3716661 +| epoch 4 | 5079/ 8400 batches | train loss 0.4312255 +| epoch 4 | 5083/ 8400 batches | train loss 0.3993547 +| epoch 4 | 5087/ 8400 batches | train loss 0.4748791 +| epoch 4 | 5091/ 8400 batches | train loss 0.4326349 +| epoch 4 | 5095/ 8400 batches | train loss 0.3786300 +| epoch 4 | 5099/ 8400 batches | train loss 0.3194889 +| epoch 4 | 5103/ 8400 batches | train loss 0.4518282 +| epoch 4 | 5107/ 8400 batches | train loss 0.4787078 +| epoch 4 | 5111/ 8400 batches | train loss 0.3349186 +| epoch 4 | 5115/ 8400 batches | train loss 0.4142002 +| epoch 4 | 5119/ 8400 batches | train loss 0.4532438 +| epoch 4 | 5123/ 8400 batches | train loss 0.5212754 +| epoch 4 | 5127/ 8400 batches | train loss 0.4082681 +| epoch 4 | 5131/ 8400 batches | train loss 0.4478701 +| epoch 4 | 5135/ 8400 batches | train loss 0.3883033 +| epoch 4 | 5139/ 8400 batches | train loss 0.4268290 +| epoch 4 | 5143/ 8400 batches | train loss 0.2769440 +| epoch 4 | 5147/ 8400 batches | train loss 0.4967284 +| epoch 4 | 5151/ 8400 batches | train loss 0.5144944 +| epoch 4 | 5155/ 8400 batches | train loss 0.3577678 +| epoch 4 | 5159/ 8400 batches | train loss 0.3280835 +| epoch 4 | 5163/ 8400 batches | train loss 0.4344931 +| epoch 4 | 5167/ 8400 batches | train loss 0.4444027 +| epoch 4 | 5171/ 8400 batches | train loss 0.4525711 +| epoch 4 | 5175/ 8400 batches | train loss 0.4691133 +| epoch 4 | 5179/ 8400 batches | train loss 0.3983892 +| epoch 4 | 5183/ 8400 batches | train loss 0.3416054 +| epoch 4 | 5187/ 8400 batches | train loss 0.3589532 +| epoch 4 | 5191/ 8400 batches | train loss 0.4102032 +| epoch 4 | 5195/ 8400 batches | train loss 0.4092324 +| epoch 4 | 5199/ 8400 batches | train loss 0.4602942 +| epoch 4 | 5203/ 8400 batches | train loss 0.4489684 +| epoch 4 | 5207/ 8400 batches | train loss 0.4316781 +| epoch 4 | 5211/ 8400 batches | train loss 0.3807347 +| epoch 4 | 5215/ 8400 batches | train loss 0.4285565 +| epoch 4 | 5219/ 8400 batches | train loss 0.3642933 +| epoch 4 | 5223/ 8400 batches | train loss 0.4984951 +| epoch 4 | 5227/ 8400 batches | train loss 0.4257289 +| epoch 4 | 5231/ 8400 batches | train loss 0.3415166 +| epoch 4 | 5235/ 8400 batches | train loss 0.3724571 +| epoch 4 | 5239/ 8400 batches | train loss 0.4051743 +| epoch 4 | 5243/ 8400 batches | train loss 0.3966506 +| epoch 4 | 5247/ 8400 batches | train loss 0.4198724 +| epoch 4 | 5251/ 8400 batches | train loss 0.3601802 +| epoch 4 | 5255/ 8400 batches | train loss 0.3616103 +| epoch 4 | 5259/ 8400 batches | train loss 0.4268526 +| epoch 4 | 5263/ 8400 batches | train loss 0.4022562 +| epoch 4 | 5267/ 8400 batches | train loss 0.4034747 +| epoch 4 | 5271/ 8400 batches | train loss 0.3505731 +| epoch 4 | 5275/ 8400 batches | train loss 0.4309439 +| epoch 4 | 5279/ 8400 batches | train loss 0.3928129 +| epoch 4 | 5283/ 8400 batches | train loss 0.3625881 +| epoch 4 | 5287/ 8400 batches | train loss 0.3850430 +| epoch 4 | 5291/ 8400 batches | train loss 0.4555889 +| epoch 4 | 5295/ 8400 batches | train loss 0.3918436 +| epoch 4 | 5299/ 8400 batches | train loss 0.3543112 +| epoch 4 | 5303/ 8400 batches | train loss 0.3681666 +| epoch 4 | 5307/ 8400 batches | train loss 0.4134035 +| epoch 4 | 5311/ 8400 batches | train loss 0.3565687 +| epoch 4 | 5315/ 8400 batches | train loss 0.3586655 +| epoch 4 | 5319/ 8400 batches | train loss 0.3770658 +| epoch 4 | 5323/ 8400 batches | train loss 0.3631654 +| epoch 4 | 5327/ 8400 batches | train loss 0.3416443 +| epoch 4 | 5331/ 8400 batches | train loss 0.5025782 +| epoch 4 | 5335/ 8400 batches | train loss 0.4131814 +| epoch 4 | 5339/ 8400 batches | train loss 0.3810590 +| epoch 4 | 5343/ 8400 batches | train loss 0.3983486 +| epoch 4 | 5347/ 8400 batches | train loss 0.3598624 +| epoch 4 | 5351/ 8400 batches | train loss 0.4385935 +| epoch 4 | 5355/ 8400 batches | train loss 0.4007794 +| epoch 4 | 5359/ 8400 batches | train loss 0.4841316 +| epoch 4 | 5363/ 8400 batches | train loss 0.4373362 +| epoch 4 | 5367/ 8400 batches | train loss 0.3988373 +| epoch 4 | 5371/ 8400 batches | train loss 0.4140252 +| epoch 4 | 5375/ 8400 batches | train loss 0.3357447 +| epoch 4 | 5379/ 8400 batches | train loss 0.4144571 +| epoch 4 | 5383/ 8400 batches | train loss 0.4766061 +| epoch 4 | 5387/ 8400 batches | train loss 0.4315721 +| epoch 4 | 5391/ 8400 batches | train loss 0.3997428 +| epoch 4 | 5395/ 8400 batches | train loss 0.3746189 +| epoch 4 | 5399/ 8400 batches | train loss 0.3574507 +| epoch 4 | 5403/ 8400 batches | train loss 0.4331805 +| epoch 4 | 5407/ 8400 batches | train loss 0.4632431 +| epoch 4 | 5411/ 8400 batches | train loss 0.4083035 +| epoch 4 | 5415/ 8400 batches | train loss 0.4079743 +| epoch 4 | 5419/ 8400 batches | train loss 0.4239645 +| epoch 4 | 5423/ 8400 batches | train loss 0.3797940 +| epoch 4 | 5427/ 8400 batches | train loss 0.4194925 +| epoch 4 | 5431/ 8400 batches | train loss 0.4324275 +| epoch 4 | 5435/ 8400 batches | train loss 0.4324543 +| epoch 4 | 5439/ 8400 batches | train loss 0.4379869 +| epoch 4 | 5443/ 8400 batches | train loss 0.4827365 +| epoch 4 | 5447/ 8400 batches | train loss 0.4264577 +| epoch 4 | 5451/ 8400 batches | train loss 0.4574646 +| epoch 4 | 5455/ 8400 batches | train loss 0.4407006 +| epoch 4 | 5459/ 8400 batches | train loss 0.4583324 +| epoch 4 | 5463/ 8400 batches | train loss 0.4970348 +| epoch 4 | 5467/ 8400 batches | train loss 0.2813709 +| epoch 4 | 5471/ 8400 batches | train loss 0.4845110 +| epoch 4 | 5475/ 8400 batches | train loss 0.4376473 +| epoch 4 | 5479/ 8400 batches | train loss 0.4044674 +| epoch 4 | 5483/ 8400 batches | train loss 0.4220493 +| epoch 4 | 5487/ 8400 batches | train loss 0.4051603 +| epoch 4 | 5491/ 8400 batches | train loss 0.4400458 +| epoch 4 | 5495/ 8400 batches | train loss 0.3476424 +| epoch 4 | 5499/ 8400 batches | train loss 0.4107952 +| epoch 4 | 5503/ 8400 batches | train loss 0.3751907 +| epoch 4 | 5507/ 8400 batches | train loss 0.4464099 +| epoch 4 | 5511/ 8400 batches | train loss 0.5126773 +| epoch 4 | 5515/ 8400 batches | train loss 0.4784089 +| epoch 4 | 5519/ 8400 batches | train loss 0.4230219 +| epoch 4 | 5523/ 8400 batches | train loss 0.4116650 +| epoch 4 | 5527/ 8400 batches | train loss 0.4530195 +| epoch 4 | 5531/ 8400 batches | train loss 0.5185719 +| epoch 4 | 5535/ 8400 batches | train loss 0.4241114 +| epoch 4 | 5539/ 8400 batches | train loss 0.4695526 +| epoch 4 | 5543/ 8400 batches | train loss 0.3738770 +| epoch 4 | 5547/ 8400 batches | train loss 0.5168337 +| epoch 4 | 5551/ 8400 batches | train loss 0.4013348 +| epoch 4 | 5555/ 8400 batches | train loss 0.4160098 +| epoch 4 | 5559/ 8400 batches | train loss 0.4052743 +| epoch 4 | 5563/ 8400 batches | train loss 0.3632426 +| epoch 4 | 5567/ 8400 batches | train loss 0.4609243 +| epoch 4 | 5571/ 8400 batches | train loss 0.4222327 +| epoch 4 | 5575/ 8400 batches | train loss 0.3685309 +| epoch 4 | 5579/ 8400 batches | train loss 0.4527126 +| epoch 4 | 5583/ 8400 batches | train loss 0.3659916 +| epoch 4 | 5587/ 8400 batches | train loss 0.4507071 +| epoch 4 | 5591/ 8400 batches | train loss 0.4064734 +| epoch 4 | 5595/ 8400 batches | train loss 0.3996289 +| epoch 4 | 5599/ 8400 batches | train loss 0.4418373 +| epoch 4 | 5603/ 8400 batches | train loss 0.1800223 +| epoch 4 | 5607/ 8400 batches | train loss 0.4396128 +| epoch 4 | 5611/ 8400 batches | train loss 0.4626628 +| epoch 4 | 5615/ 8400 batches | train loss 0.4629716 +| epoch 4 | 5619/ 8400 batches | train loss 0.3729309 +| epoch 4 | 5623/ 8400 batches | train loss 0.4071375 +| epoch 4 | 5627/ 8400 batches | train loss 0.4821313 +| epoch 4 | 5631/ 8400 batches | train loss 0.4742430 +| epoch 4 | 5635/ 8400 batches | train loss 0.4326324 +| epoch 4 | 5639/ 8400 batches | train loss 0.4479321 +| epoch 4 | 5643/ 8400 batches | train loss 0.4624570 +| epoch 4 | 5647/ 8400 batches | train loss 0.3909944 +| epoch 4 | 5651/ 8400 batches | train loss 0.3616486 +| epoch 4 | 5655/ 8400 batches | train loss 0.3949559 +| epoch 4 | 5659/ 8400 batches | train loss 0.4575433 +| epoch 4 | 5663/ 8400 batches | train loss 0.3322145 +| epoch 4 | 5667/ 8400 batches | train loss 0.4285921 +| epoch 4 | 5671/ 8400 batches | train loss 0.4324037 +| epoch 4 | 5675/ 8400 batches | train loss 0.3366660 +| epoch 4 | 5679/ 8400 batches | train loss 0.3947150 +| epoch 4 | 5683/ 8400 batches | train loss 0.4613301 +| epoch 4 | 5687/ 8400 batches | train loss 0.3971300 +| epoch 4 | 5691/ 8400 batches | train loss 0.4172265 +| epoch 4 | 5695/ 8400 batches | train loss 0.4291292 +| epoch 4 | 5699/ 8400 batches | train loss 0.4303629 +| epoch 4 | 5703/ 8400 batches | train loss 0.3969399 +| epoch 4 | 5707/ 8400 batches | train loss 0.4672446 +| epoch 4 | 5711/ 8400 batches | train loss 0.3566236 +| epoch 4 | 5715/ 8400 batches | train loss 0.4924901 +| epoch 4 | 5719/ 8400 batches | train loss 0.4309705 +| epoch 4 | 5723/ 8400 batches | train loss 0.4669464 +| epoch 4 | 5727/ 8400 batches | train loss 0.3922215 +| epoch 4 | 5731/ 8400 batches | train loss 0.4023930 +| epoch 4 | 5735/ 8400 batches | train loss 0.3432696 +| epoch 4 | 5739/ 8400 batches | train loss 0.4674236 +| epoch 4 | 5743/ 8400 batches | train loss 0.3362206 +| epoch 4 | 5747/ 8400 batches | train loss 0.4539825 +| epoch 4 | 5751/ 8400 batches | train loss 0.3886923 +| epoch 4 | 5755/ 8400 batches | train loss 0.4871373 +| epoch 4 | 5759/ 8400 batches | train loss 0.4348642 +| epoch 4 | 5763/ 8400 batches | train loss 0.4176685 +| epoch 4 | 5767/ 8400 batches | train loss 0.3472125 +| epoch 4 | 5771/ 8400 batches | train loss 0.4642709 +| epoch 4 | 5775/ 8400 batches | train loss 0.5072857 +| epoch 4 | 5779/ 8400 batches | train loss 0.4802845 +| epoch 4 | 5783/ 8400 batches | train loss 0.3977831 +| epoch 4 | 5787/ 8400 batches | train loss 0.4319186 +| epoch 4 | 5791/ 8400 batches | train loss 0.4126724 +| epoch 4 | 5795/ 8400 batches | train loss 0.4010841 +| epoch 4 | 5799/ 8400 batches | train loss 0.4086665 +| epoch 4 | 5803/ 8400 batches | train loss 0.4017317 +| epoch 4 | 5807/ 8400 batches | train loss 0.3612926 +| epoch 4 | 5811/ 8400 batches | train loss 0.4504824 +| epoch 4 | 5815/ 8400 batches | train loss 0.3907652 +| epoch 4 | 5819/ 8400 batches | train loss 0.3617211 +| epoch 4 | 5823/ 8400 batches | train loss 0.3560765 +| epoch 4 | 5827/ 8400 batches | train loss 0.3551266 +| epoch 4 | 5831/ 8400 batches | train loss 0.4086170 +| epoch 4 | 5835/ 8400 batches | train loss 0.4121458 +| epoch 4 | 5839/ 8400 batches | train loss 0.4126866 +| epoch 4 | 5843/ 8400 batches | train loss 0.3869844 +| epoch 4 | 5847/ 8400 batches | train loss 0.3637406 +| epoch 4 | 5851/ 8400 batches | train loss 0.3869717 +| epoch 4 | 5855/ 8400 batches | train loss 0.4685985 +| epoch 4 | 5859/ 8400 batches | train loss 0.4567616 +| epoch 4 | 5863/ 8400 batches | train loss 0.4460894 +| epoch 4 | 5867/ 8400 batches | train loss 0.4537347 +| epoch 4 | 5871/ 8400 batches | train loss 0.4596877 +| epoch 4 | 5875/ 8400 batches | train loss 0.4690043 +| epoch 4 | 5879/ 8400 batches | train loss 0.3405472 +| epoch 4 | 5883/ 8400 batches | train loss 0.4129065 +| epoch 4 | 5887/ 8400 batches | train loss 0.4512407 +| epoch 4 | 5891/ 8400 batches | train loss 0.3795044 +| epoch 4 | 5895/ 8400 batches | train loss 0.4629926 +| epoch 4 | 5899/ 8400 batches | train loss 0.3959310 +| epoch 4 | 5903/ 8400 batches | train loss 0.4234484 +| epoch 4 | 5907/ 8400 batches | train loss 0.3422291 +| epoch 4 | 5911/ 8400 batches | train loss 0.4196093 +| epoch 4 | 5915/ 8400 batches | train loss 0.3398160 +| epoch 4 | 5919/ 8400 batches | train loss 0.4729189 +| epoch 4 | 5923/ 8400 batches | train loss 0.4062782 +| epoch 4 | 5927/ 8400 batches | train loss 0.3771113 +| epoch 4 | 5931/ 8400 batches | train loss 0.3539132 +| epoch 4 | 5935/ 8400 batches | train loss 0.3947082 +| epoch 4 | 5939/ 8400 batches | train loss 0.4901688 +| epoch 4 | 5943/ 8400 batches | train loss 0.4388704 +| epoch 4 | 5947/ 8400 batches | train loss 0.4000736 +| epoch 4 | 5951/ 8400 batches | train loss 0.3395951 +| epoch 4 | 5955/ 8400 batches | train loss 0.4232819 +| epoch 4 | 5959/ 8400 batches | train loss 0.4812384 +| epoch 4 | 5963/ 8400 batches | train loss 0.3769329 +| epoch 4 | 5967/ 8400 batches | train loss 0.4414163 +| epoch 4 | 5971/ 8400 batches | train loss 0.3459620 +| epoch 4 | 5975/ 8400 batches | train loss 0.3373687 +| epoch 4 | 5979/ 8400 batches | train loss 0.4366155 +| epoch 4 | 5983/ 8400 batches | train loss 0.5191561 +| epoch 4 | 5987/ 8400 batches | train loss 0.3036730 +| epoch 4 | 5991/ 8400 batches | train loss 0.3787805 +| epoch 4 | 5995/ 8400 batches | train loss 0.3441958 +| epoch 4 | 5999/ 8400 batches | train loss 0.4952622 +| epoch 4 | 6003/ 8400 batches | train loss 0.4653919 +| epoch 4 | 6007/ 8400 batches | train loss 0.4427082 +| epoch 4 | 6011/ 8400 batches | train loss 0.3167629 +| epoch 4 | 6015/ 8400 batches | train loss 0.4258003 +| epoch 4 | 6019/ 8400 batches | train loss 0.4312099 +| epoch 4 | 6023/ 8400 batches | train loss 0.5706725 +| epoch 4 | 6027/ 8400 batches | train loss 0.4097316 +| epoch 4 | 6031/ 8400 batches | train loss 0.3859614 +| epoch 4 | 6035/ 8400 batches | train loss 0.5096745 +| epoch 4 | 6039/ 8400 batches | train loss 0.4080009 +| epoch 4 | 6043/ 8400 batches | train loss 0.3861289 +| epoch 4 | 6047/ 8400 batches | train loss 0.5064961 +| epoch 4 | 6051/ 8400 batches | train loss 0.4665852 +| epoch 4 | 6055/ 8400 batches | train loss 0.3998649 +| epoch 4 | 6059/ 8400 batches | train loss 0.4312420 +| epoch 4 | 6063/ 8400 batches | train loss 0.4439485 +| epoch 4 | 6067/ 8400 batches | train loss 0.4074201 +| epoch 4 | 6071/ 8400 batches | train loss 0.3955197 +| epoch 4 | 6075/ 8400 batches | train loss 0.4469774 +| epoch 4 | 6079/ 8400 batches | train loss 0.4776276 +| epoch 4 | 6083/ 8400 batches | train loss 0.4136444 +| epoch 4 | 6087/ 8400 batches | train loss 0.3726396 +| epoch 4 | 6091/ 8400 batches | train loss 0.4705566 +| epoch 4 | 6095/ 8400 batches | train loss 0.3103898 +| epoch 4 | 6099/ 8400 batches | train loss 0.4185449 +| epoch 4 | 6103/ 8400 batches | train loss 0.3925284 +| epoch 4 | 6107/ 8400 batches | train loss 0.4485682 +| epoch 4 | 6111/ 8400 batches | train loss 0.3899059 +| epoch 4 | 6115/ 8400 batches | train loss 0.4234807 +| epoch 4 | 6119/ 8400 batches | train loss 0.5073063 +| epoch 4 | 6123/ 8400 batches | train loss 0.3707145 +| epoch 4 | 6127/ 8400 batches | train loss 0.4363523 +| epoch 4 | 6131/ 8400 batches | train loss 0.4369725 +| epoch 4 | 6135/ 8400 batches | train loss 0.4627702 +| epoch 4 | 6139/ 8400 batches | train loss 0.4104088 +| epoch 4 | 6143/ 8400 batches | train loss 0.4216006 +| epoch 4 | 6147/ 8400 batches | train loss 0.4158161 +| epoch 4 | 6151/ 8400 batches | train loss 0.4262666 +| epoch 4 | 6155/ 8400 batches | train loss 0.4032213 +| epoch 4 | 6159/ 8400 batches | train loss 0.4554287 +| epoch 4 | 6163/ 8400 batches | train loss 0.4503917 +| epoch 4 | 6167/ 8400 batches | train loss 0.4456408 +| epoch 4 | 6171/ 8400 batches | train loss 0.5100049 +| epoch 4 | 6175/ 8400 batches | train loss 0.4739694 +| epoch 4 | 6179/ 8400 batches | train loss 0.4449633 +| epoch 4 | 6183/ 8400 batches | train loss 0.4092739 +| epoch 4 | 6187/ 8400 batches | train loss 0.4373118 +| epoch 4 | 6191/ 8400 batches | train loss 0.2940992 +| epoch 4 | 6195/ 8400 batches | train loss 0.3555181 +| epoch 4 | 6199/ 8400 batches | train loss 0.3746779 +| epoch 4 | 6203/ 8400 batches | train loss 0.4463044 +| epoch 4 | 6207/ 8400 batches | train loss 0.4019440 +| epoch 4 | 6211/ 8400 batches | train loss 0.3965619 +| epoch 4 | 6215/ 8400 batches | train loss 0.3711886 +| epoch 4 | 6219/ 8400 batches | train loss 0.3795040 +| epoch 4 | 6223/ 8400 batches | train loss 0.3406985 +| epoch 4 | 6227/ 8400 batches | train loss 0.4137001 +| epoch 4 | 6231/ 8400 batches | train loss 0.3348615 +| epoch 4 | 6235/ 8400 batches | train loss 0.3958676 +| epoch 4 | 6239/ 8400 batches | train loss 0.4232379 +| epoch 4 | 6243/ 8400 batches | train loss 0.4186693 +| epoch 4 | 6247/ 8400 batches | train loss 0.4064969 +| epoch 4 | 6251/ 8400 batches | train loss 0.4366120 +| epoch 4 | 6255/ 8400 batches | train loss 0.4358487 +| epoch 4 | 6259/ 8400 batches | train loss 0.5131360 +| epoch 4 | 6263/ 8400 batches | train loss 0.4079782 +| epoch 4 | 6267/ 8400 batches | train loss 0.3464609 +| epoch 4 | 6271/ 8400 batches | train loss 0.3845928 +| epoch 4 | 6275/ 8400 batches | train loss 0.4947224 +| epoch 4 | 6279/ 8400 batches | train loss 0.3957645 +| epoch 4 | 6283/ 8400 batches | train loss 0.3758379 +| epoch 4 | 6287/ 8400 batches | train loss 0.3935382 +| epoch 4 | 6291/ 8400 batches | train loss 0.4050419 +| epoch 4 | 6295/ 8400 batches | train loss 0.3003532 +| epoch 4 | 6299/ 8400 batches | train loss 0.3614822 +| epoch 4 | 6303/ 8400 batches | train loss 0.3717705 +| epoch 4 | 6307/ 8400 batches | train loss 0.4011413 +| epoch 4 | 6311/ 8400 batches | train loss 0.4306663 +| epoch 4 | 6315/ 8400 batches | train loss 0.2186528 +| epoch 4 | 6319/ 8400 batches | train loss 0.4152263 +| epoch 4 | 6323/ 8400 batches | train loss 0.3686730 +| epoch 4 | 6327/ 8400 batches | train loss 0.4720764 +| epoch 4 | 6331/ 8400 batches | train loss 0.3613490 +| epoch 4 | 6335/ 8400 batches | train loss 0.4314060 +| epoch 4 | 6339/ 8400 batches | train loss 0.3973452 +| epoch 4 | 6343/ 8400 batches | train loss 0.4313611 +| epoch 4 | 6347/ 8400 batches | train loss 0.3781116 +| epoch 4 | 6351/ 8400 batches | train loss 0.4892043 +| epoch 4 | 6355/ 8400 batches | train loss 0.4257789 +| epoch 4 | 6359/ 8400 batches | train loss 0.4660070 +| epoch 4 | 6363/ 8400 batches | train loss 0.4251430 +| epoch 4 | 6367/ 8400 batches | train loss 0.3819453 +| epoch 4 | 6371/ 8400 batches | train loss 0.4165358 +| epoch 4 | 6375/ 8400 batches | train loss 0.3620110 +| epoch 4 | 6379/ 8400 batches | train loss 0.4062056 +| epoch 4 | 6383/ 8400 batches | train loss 0.3944047 +| epoch 4 | 6387/ 8400 batches | train loss 0.4301113 +| epoch 4 | 6391/ 8400 batches | train loss 0.4387995 +| epoch 4 | 6395/ 8400 batches | train loss 0.4070958 +| epoch 4 | 6399/ 8400 batches | train loss 0.4114189 +| epoch 4 | 6403/ 8400 batches | train loss 0.3948318 +| epoch 4 | 6407/ 8400 batches | train loss 0.4044764 +| epoch 4 | 6411/ 8400 batches | train loss 0.4360460 +| epoch 4 | 6415/ 8400 batches | train loss 0.4312688 +| epoch 4 | 6419/ 8400 batches | train loss 0.3881005 +| epoch 4 | 6423/ 8400 batches | train loss 0.4841730 +| epoch 4 | 6427/ 8400 batches | train loss 0.4338417 +| epoch 4 | 6431/ 8400 batches | train loss 0.4014851 +| epoch 4 | 6435/ 8400 batches | train loss 0.3762574 +| epoch 4 | 6439/ 8400 batches | train loss 0.4115075 +| epoch 4 | 6443/ 8400 batches | train loss 0.3851924 +| epoch 4 | 6447/ 8400 batches | train loss 0.4485887 +| epoch 4 | 6451/ 8400 batches | train loss 0.4094266 +| epoch 4 | 6455/ 8400 batches | train loss 0.4030518 +| epoch 4 | 6459/ 8400 batches | train loss 0.4369673 +| epoch 4 | 6463/ 8400 batches | train loss 0.4046826 +| epoch 4 | 6467/ 8400 batches | train loss 0.3910692 +| epoch 4 | 6471/ 8400 batches | train loss 0.4254247 +| epoch 4 | 6475/ 8400 batches | train loss 0.3311304 +| epoch 4 | 6479/ 8400 batches | train loss 0.4285111 +| epoch 4 | 6483/ 8400 batches | train loss 0.3786039 +| epoch 4 | 6487/ 8400 batches | train loss 0.4483102 +| epoch 4 | 6491/ 8400 batches | train loss 0.3728631 +| epoch 4 | 6495/ 8400 batches | train loss 0.4149876 +| epoch 4 | 6499/ 8400 batches | train loss 0.4587330 +| epoch 4 | 6503/ 8400 batches | train loss 0.5125269 +| epoch 4 | 6507/ 8400 batches | train loss 0.3857781 +| epoch 4 | 6511/ 8400 batches | train loss 0.2599162 +| epoch 4 | 6515/ 8400 batches | train loss 0.4979083 +| epoch 4 | 6519/ 8400 batches | train loss 0.3682957 +| epoch 4 | 6523/ 8400 batches | train loss 0.4351320 +| epoch 4 | 6527/ 8400 batches | train loss 0.3600005 +| epoch 4 | 6531/ 8400 batches | train loss 0.4359797 +| epoch 4 | 6535/ 8400 batches | train loss 0.4148650 +| epoch 4 | 6539/ 8400 batches | train loss 0.4048317 +| epoch 4 | 6543/ 8400 batches | train loss 0.4079449 +| epoch 4 | 6547/ 8400 batches | train loss 0.3474254 +| epoch 4 | 6551/ 8400 batches | train loss 0.3751903 +| epoch 4 | 6555/ 8400 batches | train loss 0.4373195 +| epoch 4 | 6559/ 8400 batches | train loss 0.4483306 +| epoch 4 | 6563/ 8400 batches | train loss 0.4451927 +| epoch 4 | 6567/ 8400 batches | train loss 0.3422658 +| epoch 4 | 6571/ 8400 batches | train loss 0.3859282 +| epoch 4 | 6575/ 8400 batches | train loss 0.4407635 +| epoch 4 | 6579/ 8400 batches | train loss 0.4119331 +| epoch 4 | 6583/ 8400 batches | train loss 0.4670925 +| epoch 4 | 6587/ 8400 batches | train loss 0.4174356 +| epoch 4 | 6591/ 8400 batches | train loss 0.4963367 +| epoch 4 | 6595/ 8400 batches | train loss 0.4164278 +| epoch 4 | 6599/ 8400 batches | train loss 0.4303058 +| epoch 4 | 6603/ 8400 batches | train loss 0.3743299 +| epoch 4 | 6607/ 8400 batches | train loss 0.4905364 +| epoch 4 | 6611/ 8400 batches | train loss 0.3902980 +| epoch 4 | 6615/ 8400 batches | train loss 0.4195372 +| epoch 4 | 6619/ 8400 batches | train loss 0.3591788 +| epoch 4 | 6623/ 8400 batches | train loss 0.4460233 +| epoch 4 | 6627/ 8400 batches | train loss 0.4409517 +| epoch 4 | 6631/ 8400 batches | train loss 0.4423726 +| epoch 4 | 6635/ 8400 batches | train loss 0.4241077 +| epoch 4 | 6639/ 8400 batches | train loss 0.4772458 +| epoch 4 | 6643/ 8400 batches | train loss 0.4486790 +| epoch 4 | 6647/ 8400 batches | train loss 0.3310031 +| epoch 4 | 6651/ 8400 batches | train loss 0.3563080 +| epoch 4 | 6655/ 8400 batches | train loss 0.4098258 +| epoch 4 | 6659/ 8400 batches | train loss 0.3503596 +| epoch 4 | 6663/ 8400 batches | train loss 0.3430635 +| epoch 4 | 6667/ 8400 batches | train loss 0.4678969 +| epoch 4 | 6671/ 8400 batches | train loss 0.5136477 +| epoch 4 | 6675/ 8400 batches | train loss 0.4242900 +| epoch 4 | 6679/ 8400 batches | train loss 0.3331891 +| epoch 4 | 6683/ 8400 batches | train loss 0.3805680 +| epoch 4 | 6687/ 8400 batches | train loss 0.3861214 +| epoch 4 | 6691/ 8400 batches | train loss 0.3560608 +| epoch 4 | 6695/ 8400 batches | train loss 0.4101692 +| epoch 4 | 6699/ 8400 batches | train loss 0.4606878 +| epoch 4 | 6703/ 8400 batches | train loss 0.3526347 +| epoch 4 | 6707/ 8400 batches | train loss 0.3801618 +| epoch 4 | 6711/ 8400 batches | train loss 0.4425698 +| epoch 4 | 6715/ 8400 batches | train loss 0.4549229 +| epoch 4 | 6719/ 8400 batches | train loss 0.3876112 +| epoch 4 | 6723/ 8400 batches | train loss 0.4297138 +| epoch 4 | 6727/ 8400 batches | train loss 0.4591920 +| epoch 4 | 6731/ 8400 batches | train loss 0.4630691 +| epoch 4 | 6735/ 8400 batches | train loss 0.4254954 +| epoch 4 | 6739/ 8400 batches | train loss 0.4137478 +| epoch 4 | 6743/ 8400 batches | train loss 0.4230009 +| epoch 4 | 6747/ 8400 batches | train loss 0.4109486 +| epoch 4 | 6751/ 8400 batches | train loss 0.4198261 +| epoch 4 | 6755/ 8400 batches | train loss 0.5107582 +| epoch 4 | 6759/ 8400 batches | train loss 0.4073474 +| epoch 4 | 6763/ 8400 batches | train loss 0.4138442 +| epoch 4 | 6767/ 8400 batches | train loss 0.4160243 +| epoch 4 | 6771/ 8400 batches | train loss 0.4008133 +| epoch 4 | 6775/ 8400 batches | train loss 0.3694953 +| epoch 4 | 6779/ 8400 batches | train loss 0.3976364 +| epoch 4 | 6783/ 8400 batches | train loss 0.3537118 +| epoch 4 | 6787/ 8400 batches | train loss 0.4528018 +| epoch 4 | 6791/ 8400 batches | train loss 0.3716781 +| epoch 4 | 6795/ 8400 batches | train loss 0.4240714 +| epoch 4 | 6799/ 8400 batches | train loss 0.4415513 +| epoch 4 | 6803/ 8400 batches | train loss 0.4462078 +| epoch 4 | 6807/ 8400 batches | train loss 0.3938265 +| epoch 4 | 6811/ 8400 batches | train loss 0.3762805 +| epoch 4 | 6815/ 8400 batches | train loss 0.4572821 +| epoch 4 | 6819/ 8400 batches | train loss 0.4399527 +| epoch 4 | 6823/ 8400 batches | train loss 0.3476362 +| epoch 4 | 6827/ 8400 batches | train loss 0.4167195 +| epoch 4 | 6831/ 8400 batches | train loss 0.4288871 +| epoch 4 | 6835/ 8400 batches | train loss 0.4573025 +| epoch 4 | 6839/ 8400 batches | train loss 0.4273152 +| epoch 4 | 6843/ 8400 batches | train loss 0.4295734 +| epoch 4 | 6847/ 8400 batches | train loss 0.4296082 +| epoch 4 | 6851/ 8400 batches | train loss 0.4215057 +| epoch 4 | 6855/ 8400 batches | train loss 0.4377123 +| epoch 4 | 6859/ 8400 batches | train loss 0.4069284 +| epoch 4 | 6863/ 8400 batches | train loss 0.4145710 +| epoch 4 | 6867/ 8400 batches | train loss 0.4044807 +| epoch 4 | 6871/ 8400 batches | train loss 0.4198548 +| epoch 4 | 6875/ 8400 batches | train loss 0.4161442 +| epoch 4 | 6879/ 8400 batches | train loss 0.4021335 +| epoch 4 | 6883/ 8400 batches | train loss 0.3985411 +| epoch 4 | 6887/ 8400 batches | train loss 0.4476285 +| epoch 4 | 6891/ 8400 batches | train loss 0.4066637 +| epoch 4 | 6895/ 8400 batches | train loss 0.4117961 +| epoch 4 | 6899/ 8400 batches | train loss 0.4167370 +| epoch 4 | 6903/ 8400 batches | train loss 0.4525994 +| epoch 4 | 6907/ 8400 batches | train loss 0.4331324 +| epoch 4 | 6911/ 8400 batches | train loss 0.4371700 +| epoch 4 | 6915/ 8400 batches | train loss 0.3864685 +| epoch 4 | 6919/ 8400 batches | train loss 0.3869706 +| epoch 4 | 6923/ 8400 batches | train loss 0.4665299 +| epoch 4 | 6927/ 8400 batches | train loss 0.3605450 +| epoch 4 | 6931/ 8400 batches | train loss 0.4611173 +| epoch 4 | 6935/ 8400 batches | train loss 0.3915706 +| epoch 4 | 6939/ 8400 batches | train loss 0.3403749 +| epoch 4 | 6943/ 8400 batches | train loss 0.4316598 +| epoch 4 | 6947/ 8400 batches | train loss 0.3859628 +| epoch 4 | 6951/ 8400 batches | train loss 0.4820307 +| epoch 4 | 6955/ 8400 batches | train loss 0.3966476 +| epoch 4 | 6959/ 8400 batches | train loss 0.4400349 +| epoch 4 | 6963/ 8400 batches | train loss 0.4441534 +| epoch 4 | 6967/ 8400 batches | train loss 0.3572703 +| epoch 4 | 6971/ 8400 batches | train loss 0.2504444 +| epoch 4 | 6975/ 8400 batches | train loss 0.4647433 +| epoch 4 | 6979/ 8400 batches | train loss 0.4542233 +| epoch 4 | 6983/ 8400 batches | train loss 0.3333581 +| epoch 4 | 6987/ 8400 batches | train loss 0.3977022 +| epoch 4 | 6991/ 8400 batches | train loss 0.5027028 +| epoch 4 | 6995/ 8400 batches | train loss 0.3567299 +| epoch 4 | 6999/ 8400 batches | train loss 0.4432151 +| epoch 4 | 7003/ 8400 batches | train loss 0.4422529 +| epoch 4 | 7007/ 8400 batches | train loss 0.4168195 +| epoch 4 | 7011/ 8400 batches | train loss 0.4951628 +| epoch 4 | 7015/ 8400 batches | train loss 0.4565289 +| epoch 4 | 7019/ 8400 batches | train loss 0.4901295 +| epoch 4 | 7023/ 8400 batches | train loss 0.3425331 +| epoch 4 | 7027/ 8400 batches | train loss 0.4130889 +| epoch 4 | 7031/ 8400 batches | train loss 0.4232841 +| epoch 4 | 7035/ 8400 batches | train loss 0.4526623 +| epoch 4 | 7039/ 8400 batches | train loss 0.4933411 +| epoch 4 | 7043/ 8400 batches | train loss 0.3581362 +| epoch 4 | 7047/ 8400 batches | train loss 0.3172991 +| epoch 4 | 7051/ 8400 batches | train loss 0.4392086 +| epoch 4 | 7055/ 8400 batches | train loss 0.4165310 +| epoch 4 | 7059/ 8400 batches | train loss 0.4694718 +| epoch 4 | 7063/ 8400 batches | train loss 0.4319297 +| epoch 4 | 7067/ 8400 batches | train loss 0.4235826 +| epoch 4 | 7071/ 8400 batches | train loss 0.5038003 +| epoch 4 | 7075/ 8400 batches | train loss 0.3764496 +| epoch 4 | 7079/ 8400 batches | train loss 0.4194939 +| epoch 4 | 7083/ 8400 batches | train loss 0.4374790 +| epoch 4 | 7087/ 8400 batches | train loss 0.4132894 +| epoch 4 | 7091/ 8400 batches | train loss 0.3995552 +| epoch 4 | 7095/ 8400 batches | train loss 0.4535239 +| epoch 4 | 7099/ 8400 batches | train loss 0.4842235 +| epoch 4 | 7103/ 8400 batches | train loss 0.3697564 +| epoch 4 | 7107/ 8400 batches | train loss 0.4865619 +| epoch 4 | 7111/ 8400 batches | train loss 0.3552466 +| epoch 4 | 7115/ 8400 batches | train loss 0.4501626 +| epoch 4 | 7119/ 8400 batches | train loss 0.3851340 +| epoch 4 | 7123/ 8400 batches | train loss 0.4149805 +| epoch 4 | 7127/ 8400 batches | train loss 0.4190367 +| epoch 4 | 7131/ 8400 batches | train loss 0.5601063 +| epoch 4 | 7135/ 8400 batches | train loss 0.3995639 +| epoch 4 | 7139/ 8400 batches | train loss 0.4147194 +| epoch 4 | 7143/ 8400 batches | train loss 0.3319887 +| epoch 4 | 7147/ 8400 batches | train loss 0.4198436 +| epoch 4 | 7151/ 8400 batches | train loss 0.4406514 +| epoch 4 | 7155/ 8400 batches | train loss 0.4512499 +| epoch 4 | 7159/ 8400 batches | train loss 0.4139030 +| epoch 4 | 7163/ 8400 batches | train loss 0.4670646 +| epoch 4 | 7167/ 8400 batches | train loss 0.4145032 +| epoch 4 | 7171/ 8400 batches | train loss 0.4103088 +| epoch 4 | 7175/ 8400 batches | train loss 0.3917292 +| epoch 4 | 7179/ 8400 batches | train loss 0.3784406 +| epoch 4 | 7183/ 8400 batches | train loss 0.4268928 +| epoch 4 | 7187/ 8400 batches | train loss 0.4231777 +| epoch 4 | 7191/ 8400 batches | train loss 0.3028356 +| epoch 4 | 7195/ 8400 batches | train loss 0.4070397 +| epoch 4 | 7199/ 8400 batches | train loss 0.3901229 +| epoch 4 | 7203/ 8400 batches | train loss 0.4528193 +| epoch 4 | 7207/ 8400 batches | train loss 0.5335148 +| epoch 4 | 7211/ 8400 batches | train loss 0.3635504 +| epoch 4 | 7215/ 8400 batches | train loss 0.4471866 +| epoch 4 | 7219/ 8400 batches | train loss 0.4214023 +| epoch 4 | 7223/ 8400 batches | train loss 0.4129476 +| epoch 4 | 7227/ 8400 batches | train loss 0.3963490 +| epoch 4 | 7231/ 8400 batches | train loss 0.4495599 +| epoch 4 | 7235/ 8400 batches | train loss 0.3408979 +| epoch 4 | 7239/ 8400 batches | train loss 0.3149965 +| epoch 4 | 7243/ 8400 batches | train loss 0.5208820 +| epoch 4 | 7247/ 8400 batches | train loss 0.4166986 +| epoch 4 | 7251/ 8400 batches | train loss 0.4822518 +| epoch 4 | 7255/ 8400 batches | train loss 0.3874730 +| epoch 4 | 7259/ 8400 batches | train loss 0.3774488 +| epoch 4 | 7263/ 8400 batches | train loss 0.4646792 +| epoch 4 | 7267/ 8400 batches | train loss 0.3934265 +| epoch 4 | 7271/ 8400 batches | train loss 0.4459791 +| epoch 4 | 7275/ 8400 batches | train loss 0.2944472 +| epoch 4 | 7279/ 8400 batches | train loss 0.4129940 +| epoch 4 | 7283/ 8400 batches | train loss 0.4608297 +| epoch 4 | 7287/ 8400 batches | train loss 0.3422091 +| epoch 4 | 7291/ 8400 batches | train loss 0.4614241 +| epoch 4 | 7295/ 8400 batches | train loss 0.3873906 +| epoch 4 | 7299/ 8400 batches | train loss 0.4551317 +| epoch 4 | 7303/ 8400 batches | train loss 0.4058945 +| epoch 4 | 7307/ 8400 batches | train loss 0.4291376 +| epoch 4 | 7311/ 8400 batches | train loss 0.3487148 +| epoch 4 | 7315/ 8400 batches | train loss 0.4151049 +| epoch 4 | 7319/ 8400 batches | train loss 0.3550029 +| epoch 4 | 7323/ 8400 batches | train loss 0.4218666 +| epoch 4 | 7327/ 8400 batches | train loss 0.4482016 +| epoch 4 | 7331/ 8400 batches | train loss 0.4631172 +| epoch 4 | 7335/ 8400 batches | train loss 0.4463885 +| epoch 4 | 7339/ 8400 batches | train loss 0.4157594 +| epoch 4 | 7343/ 8400 batches | train loss 0.4167793 +| epoch 4 | 7347/ 8400 batches | train loss 0.3994045 +| epoch 4 | 7351/ 8400 batches | train loss 0.3634314 +| epoch 4 | 7355/ 8400 batches | train loss 0.4816180 +| epoch 4 | 7359/ 8400 batches | train loss 0.4789856 +| epoch 4 | 7363/ 8400 batches | train loss 0.5368855 +| epoch 4 | 7367/ 8400 batches | train loss 0.4410112 +| epoch 4 | 7371/ 8400 batches | train loss 0.4276577 +| epoch 4 | 7375/ 8400 batches | train loss 0.4788734 +| epoch 4 | 7379/ 8400 batches | train loss 0.3406483 +| epoch 4 | 7383/ 8400 batches | train loss 0.3726806 +| epoch 4 | 7387/ 8400 batches | train loss 0.4045396 +| epoch 4 | 7391/ 8400 batches | train loss 0.3988892 +| epoch 4 | 7395/ 8400 batches | train loss 0.3757551 +| epoch 4 | 7399/ 8400 batches | train loss 0.4227198 +| epoch 4 | 7403/ 8400 batches | train loss 0.4282929 +| epoch 4 | 7407/ 8400 batches | train loss 0.3820557 +| epoch 4 | 7411/ 8400 batches | train loss 0.4886452 +| epoch 4 | 7415/ 8400 batches | train loss 0.3884775 +| epoch 4 | 7419/ 8400 batches | train loss 0.4989515 +| epoch 4 | 7423/ 8400 batches | train loss 0.4292276 +| epoch 4 | 7427/ 8400 batches | train loss 0.3937888 +| epoch 4 | 7431/ 8400 batches | train loss 0.4082635 +| epoch 4 | 7435/ 8400 batches | train loss 0.4479591 +| epoch 4 | 7439/ 8400 batches | train loss 0.4272531 +| epoch 4 | 7443/ 8400 batches | train loss 0.3813048 +| epoch 4 | 7447/ 8400 batches | train loss 0.3468572 +| epoch 4 | 7451/ 8400 batches | train loss 0.4034772 +| epoch 4 | 7455/ 8400 batches | train loss 0.4169660 +| epoch 4 | 7459/ 8400 batches | train loss 0.3397645 +| epoch 4 | 7463/ 8400 batches | train loss 0.4006927 +| epoch 4 | 7467/ 8400 batches | train loss 0.4078524 +| epoch 4 | 7471/ 8400 batches | train loss 0.4830531 +| epoch 4 | 7475/ 8400 batches | train loss 0.4084561 +| epoch 4 | 7479/ 8400 batches | train loss 0.4156345 +| epoch 4 | 7483/ 8400 batches | train loss 0.4163575 +| epoch 4 | 7487/ 8400 batches | train loss 0.4379520 +| epoch 4 | 7491/ 8400 batches | train loss 0.4132000 +| epoch 4 | 7495/ 8400 batches | train loss 0.3703957 +| epoch 4 | 7499/ 8400 batches | train loss 0.4537866 +| epoch 4 | 7503/ 8400 batches | train loss 0.4091715 +| epoch 4 | 7507/ 8400 batches | train loss 0.4183232 +| epoch 4 | 7511/ 8400 batches | train loss 0.3897715 +| epoch 4 | 7515/ 8400 batches | train loss 0.4737270 +| epoch 4 | 7519/ 8400 batches | train loss 0.4584628 +| epoch 4 | 7523/ 8400 batches | train loss 0.4279911 +| epoch 4 | 7527/ 8400 batches | train loss 0.5246695 +| epoch 4 | 7531/ 8400 batches | train loss 0.3879204 +| epoch 4 | 7535/ 8400 batches | train loss 0.4014799 +| epoch 4 | 7539/ 8400 batches | train loss 0.4284721 +| epoch 4 | 7543/ 8400 batches | train loss 0.3792964 +| epoch 4 | 7547/ 8400 batches | train loss 0.2973599 +| epoch 4 | 7551/ 8400 batches | train loss 0.4427291 +| epoch 4 | 7555/ 8400 batches | train loss 0.4802384 +| epoch 4 | 7559/ 8400 batches | train loss 0.4552250 +| epoch 4 | 7563/ 8400 batches | train loss 0.1732455 +| epoch 4 | 7567/ 8400 batches | train loss 0.3581344 +| epoch 4 | 7571/ 8400 batches | train loss 0.4464180 +| epoch 4 | 7575/ 8400 batches | train loss 0.4104158 +| epoch 4 | 7579/ 8400 batches | train loss 0.4120890 +| epoch 4 | 7583/ 8400 batches | train loss 0.3724312 +| epoch 4 | 7587/ 8400 batches | train loss 0.4333027 +| epoch 4 | 7591/ 8400 batches | train loss 0.4229354 +| epoch 4 | 7595/ 8400 batches | train loss 0.4158413 +| epoch 4 | 7599/ 8400 batches | train loss 0.4145570 +| epoch 4 | 7603/ 8400 batches | train loss 0.3872723 +| epoch 4 | 7607/ 8400 batches | train loss 0.4739646 +| epoch 4 | 7611/ 8400 batches | train loss 0.3973717 +| epoch 4 | 7615/ 8400 batches | train loss 0.3247881 +| epoch 4 | 7619/ 8400 batches | train loss 0.2974226 +| epoch 4 | 7623/ 8400 batches | train loss 0.4412169 +| epoch 4 | 7627/ 8400 batches | train loss 0.3955390 +| epoch 4 | 7631/ 8400 batches | train loss 0.4116656 +| epoch 4 | 7635/ 8400 batches | train loss 0.4545355 +| epoch 4 | 7639/ 8400 batches | train loss 0.4441757 +| epoch 4 | 7643/ 8400 batches | train loss 0.3350335 +| epoch 4 | 7647/ 8400 batches | train loss 0.3597092 +| epoch 4 | 7651/ 8400 batches | train loss 0.4718487 +| epoch 4 | 7655/ 8400 batches | train loss 0.3785242 +| epoch 4 | 7659/ 8400 batches | train loss 0.4564977 +| epoch 4 | 7663/ 8400 batches | train loss 0.3108791 +| epoch 4 | 7667/ 8400 batches | train loss 0.4263412 +| epoch 4 | 7671/ 8400 batches | train loss 0.3964151 +| epoch 4 | 7675/ 8400 batches | train loss 0.4004380 +| epoch 4 | 7679/ 8400 batches | train loss 0.5065788 +| epoch 4 | 7683/ 8400 batches | train loss 0.4082829 +| epoch 4 | 7687/ 8400 batches | train loss 0.4049432 +| epoch 4 | 7691/ 8400 batches | train loss 0.4103980 +| epoch 4 | 7695/ 8400 batches | train loss 0.3834008 +| epoch 4 | 7699/ 8400 batches | train loss 0.4005780 +| epoch 4 | 7703/ 8400 batches | train loss 0.4311267 +| epoch 4 | 7707/ 8400 batches | train loss 0.4747646 +| epoch 4 | 7711/ 8400 batches | train loss 0.3759143 +| epoch 4 | 7715/ 8400 batches | train loss 0.4205938 +| epoch 4 | 7719/ 8400 batches | train loss 0.3982464 +| epoch 4 | 7723/ 8400 batches | train loss 0.4557382 +| epoch 4 | 7727/ 8400 batches | train loss 0.4053824 +| epoch 4 | 7731/ 8400 batches | train loss 0.4304177 +| epoch 4 | 7735/ 8400 batches | train loss 0.3608626 +| epoch 4 | 7739/ 8400 batches | train loss 0.3918429 +| epoch 4 | 7743/ 8400 batches | train loss 0.3736970 +| epoch 4 | 7747/ 8400 batches | train loss 0.4765603 +| epoch 4 | 7751/ 8400 batches | train loss 0.4307755 +| epoch 4 | 7755/ 8400 batches | train loss 0.4567185 +| epoch 4 | 7759/ 8400 batches | train loss 0.4185688 +| epoch 4 | 7763/ 8400 batches | train loss 0.4729510 +| epoch 4 | 7767/ 8400 batches | train loss 0.4470623 +| epoch 4 | 7771/ 8400 batches | train loss 0.3809549 +| epoch 4 | 7775/ 8400 batches | train loss 0.3810458 +| epoch 4 | 7779/ 8400 batches | train loss 0.3802026 +| epoch 4 | 7783/ 8400 batches | train loss 0.4083657 +| epoch 4 | 7787/ 8400 batches | train loss 0.3836988 +| epoch 4 | 7791/ 8400 batches | train loss 0.4181341 +| epoch 4 | 7795/ 8400 batches | train loss 0.4571202 +| epoch 4 | 7799/ 8400 batches | train loss 0.3850690 +| epoch 4 | 7803/ 8400 batches | train loss 0.4360188 +| epoch 4 | 7807/ 8400 batches | train loss 0.3976924 +| epoch 4 | 7811/ 8400 batches | train loss 0.4284077 +| epoch 4 | 7815/ 8400 batches | train loss 0.4182636 +| epoch 4 | 7819/ 8400 batches | train loss 0.4121118 +| epoch 4 | 7823/ 8400 batches | train loss 0.4286636 +| epoch 4 | 7827/ 8400 batches | train loss 0.4484325 +| epoch 4 | 7831/ 8400 batches | train loss 0.3710150 +| epoch 4 | 7835/ 8400 batches | train loss 0.3557712 +| epoch 4 | 7839/ 8400 batches | train loss 0.3782537 +| epoch 4 | 7843/ 8400 batches | train loss 0.4112198 +| epoch 4 | 7847/ 8400 batches | train loss 0.3860279 +| epoch 4 | 7851/ 8400 batches | train loss 0.3654270 +| epoch 4 | 7855/ 8400 batches | train loss 0.4740515 +| epoch 4 | 7859/ 8400 batches | train loss 0.4220983 +| epoch 4 | 7863/ 8400 batches | train loss 0.4885835 +| epoch 4 | 7867/ 8400 batches | train loss 0.4225017 +| epoch 4 | 7871/ 8400 batches | train loss 0.3984991 +| epoch 4 | 7875/ 8400 batches | train loss 0.3966221 +| epoch 4 | 7879/ 8400 batches | train loss 0.4134630 +| epoch 4 | 7883/ 8400 batches | train loss 0.4151192 +| epoch 4 | 7887/ 8400 batches | train loss 0.4354542 +| epoch 4 | 7891/ 8400 batches | train loss 0.4088649 +| epoch 4 | 7895/ 8400 batches | train loss 0.3730825 +| epoch 4 | 7899/ 8400 batches | train loss 0.4310366 +| epoch 4 | 7903/ 8400 batches | train loss 0.4114436 +| epoch 4 | 7907/ 8400 batches | train loss 0.4437228 +| epoch 4 | 7911/ 8400 batches | train loss 0.3980331 +| epoch 4 | 7915/ 8400 batches | train loss 0.4256939 +| epoch 4 | 7919/ 8400 batches | train loss 0.4544182 +| epoch 4 | 7923/ 8400 batches | train loss 0.4097758 +| epoch 4 | 7927/ 8400 batches | train loss 0.4169452 +| epoch 4 | 7931/ 8400 batches | train loss 0.4066852 +| epoch 4 | 7935/ 8400 batches | train loss 0.4115552 +| epoch 4 | 7939/ 8400 batches | train loss 0.3679005 +| epoch 4 | 7943/ 8400 batches | train loss 0.5313696 +| epoch 4 | 7947/ 8400 batches | train loss 0.3706309 +| epoch 4 | 7951/ 8400 batches | train loss 0.3925377 +| epoch 4 | 7955/ 8400 batches | train loss 0.3920197 +| epoch 4 | 7959/ 8400 batches | train loss 0.4329004 +| epoch 4 | 7963/ 8400 batches | train loss 0.4182983 +| epoch 4 | 7967/ 8400 batches | train loss 0.4414514 +| epoch 4 | 7971/ 8400 batches | train loss 0.3948670 +| epoch 4 | 7975/ 8400 batches | train loss 0.3928537 +| epoch 4 | 7979/ 8400 batches | train loss 0.4747302 +| epoch 4 | 7983/ 8400 batches | train loss 0.5328214 +| epoch 4 | 7987/ 8400 batches | train loss 0.4280733 +| epoch 4 | 7991/ 8400 batches | train loss 0.3733666 +| epoch 4 | 7995/ 8400 batches | train loss 0.3998114 +| epoch 4 | 7999/ 8400 batches | train loss 0.3819678 +| epoch 4 | 8003/ 8400 batches | train loss 0.1536635 +| epoch 4 | 8007/ 8400 batches | train loss 0.4553511 +| epoch 4 | 8011/ 8400 batches | train loss 0.5004649 +| epoch 4 | 8015/ 8400 batches | train loss 0.4322444 +| epoch 4 | 8019/ 8400 batches | train loss 0.4384807 +| epoch 4 | 8023/ 8400 batches | train loss 0.3531192 +| epoch 4 | 8027/ 8400 batches | train loss 0.3801646 +| epoch 4 | 8031/ 8400 batches | train loss 0.4305709 +| epoch 4 | 8035/ 8400 batches | train loss 0.4112620 +| epoch 4 | 8039/ 8400 batches | train loss 0.4335704 +| epoch 4 | 8043/ 8400 batches | train loss 0.4179494 +| epoch 4 | 8047/ 8400 batches | train loss 0.4088569 +| epoch 4 | 8051/ 8400 batches | train loss 0.4060373 +| epoch 4 | 8055/ 8400 batches | train loss 0.4026603 +| epoch 4 | 8059/ 8400 batches | train loss 0.3868240 +| epoch 4 | 8063/ 8400 batches | train loss 0.3786101 +| epoch 4 | 8067/ 8400 batches | train loss 0.3909491 +| epoch 4 | 8071/ 8400 batches | train loss 0.4965548 +| epoch 4 | 8075/ 8400 batches | train loss 0.4607484 +| epoch 4 | 8079/ 8400 batches | train loss 0.4455820 +| epoch 4 | 8083/ 8400 batches | train loss 0.4653524 +| epoch 4 | 8087/ 8400 batches | train loss 0.3416304 +| epoch 4 | 8091/ 8400 batches | train loss 0.4983598 +| epoch 4 | 8095/ 8400 batches | train loss 0.4192231 +| epoch 4 | 8099/ 8400 batches | train loss 0.3844456 +| epoch 4 | 8103/ 8400 batches | train loss 0.3295648 +| epoch 4 | 8107/ 8400 batches | train loss 0.2751027 +| epoch 4 | 8111/ 8400 batches | train loss 0.4400746 +| epoch 4 | 8115/ 8400 batches | train loss 0.3979620 +| epoch 4 | 8119/ 8400 batches | train loss 0.3954122 +| epoch 4 | 8123/ 8400 batches | train loss 0.4388133 +| epoch 4 | 8127/ 8400 batches | train loss 0.3792263 +| epoch 4 | 8131/ 8400 batches | train loss 0.4452795 +| epoch 4 | 8135/ 8400 batches | train loss 0.3704000 +| epoch 4 | 8139/ 8400 batches | train loss 0.4392745 +| epoch 4 | 8143/ 8400 batches | train loss 0.4352166 +| epoch 4 | 8147/ 8400 batches | train loss 0.3771684 +| epoch 4 | 8151/ 8400 batches | train loss 0.4371707 +| epoch 4 | 8155/ 8400 batches | train loss 0.4896065 +| epoch 4 | 8159/ 8400 batches | train loss 0.4137574 +| epoch 4 | 8163/ 8400 batches | train loss 0.2487964 +| epoch 4 | 8167/ 8400 batches | train loss 0.3550521 +| epoch 4 | 8171/ 8400 batches | train loss 0.4057162 +| epoch 4 | 8175/ 8400 batches | train loss 0.3680246 +| epoch 4 | 8179/ 8400 batches | train loss 0.4040940 +| epoch 4 | 8183/ 8400 batches | train loss 0.4315794 +| epoch 4 | 8187/ 8400 batches | train loss 0.4229495 +| epoch 4 | 8191/ 8400 batches | train loss 0.4997150 +| epoch 4 | 8195/ 8400 batches | train loss 0.3395800 +| epoch 4 | 8199/ 8400 batches | train loss 0.4617051 +| epoch 4 | 8203/ 8400 batches | train loss 0.3890838 +| epoch 4 | 8207/ 8400 batches | train loss 0.4116004 +| epoch 4 | 8211/ 8400 batches | train loss 0.3933116 +| epoch 4 | 8215/ 8400 batches | train loss 0.4090797 +| epoch 4 | 8219/ 8400 batches | train loss 0.4215497 +| epoch 4 | 8223/ 8400 batches | train loss 0.4161381 +| epoch 4 | 8227/ 8400 batches | train loss 0.3573069 +| epoch 4 | 8231/ 8400 batches | train loss 0.4127784 +| epoch 4 | 8235/ 8400 batches | train loss 0.3456637 +| epoch 4 | 8239/ 8400 batches | train loss 0.3876966 +| epoch 4 | 8243/ 8400 batches | train loss 0.3878163 +| epoch 4 | 8247/ 8400 batches | train loss 0.4309076 +| epoch 4 | 8251/ 8400 batches | train loss 0.4270675 +| epoch 4 | 8255/ 8400 batches | train loss 0.4383253 +| epoch 4 | 8259/ 8400 batches | train loss 0.3679942 +| epoch 4 | 8263/ 8400 batches | train loss 0.4598241 +| epoch 4 | 8267/ 8400 batches | train loss 0.3558490 +| epoch 4 | 8271/ 8400 batches | train loss 0.4681570 +| epoch 4 | 8275/ 8400 batches | train loss 0.4220391 +| epoch 4 | 8279/ 8400 batches | train loss 0.3802693 +| epoch 4 | 8283/ 8400 batches | train loss 0.3975371 +| epoch 4 | 8287/ 8400 batches | train loss 0.4204782 +| epoch 4 | 8291/ 8400 batches | train loss 0.5143806 +| epoch 4 | 8295/ 8400 batches | train loss 0.4092705 +| epoch 4 | 8299/ 8400 batches | train loss 0.4389723 +| epoch 4 | 8303/ 8400 batches | train loss 0.3771581 +| epoch 4 | 8307/ 8400 batches | train loss 0.4019495 +| epoch 4 | 8311/ 8400 batches | train loss 0.4358675 +| epoch 4 | 8315/ 8400 batches | train loss 0.4647323 +| epoch 4 | 8319/ 8400 batches | train loss 0.5471774 +| epoch 4 | 8323/ 8400 batches | train loss 0.4194764 +| epoch 4 | 8327/ 8400 batches | train loss 0.4721580 +| epoch 4 | 8331/ 8400 batches | train loss 0.3964433 +| epoch 4 | 8335/ 8400 batches | train loss 0.3821837 +| epoch 4 | 8339/ 8400 batches | train loss 0.3082163 +| epoch 4 | 8343/ 8400 batches | train loss 0.3345024 +| epoch 4 | 8347/ 8400 batches | train loss 0.3855210 +| epoch 4 | 8351/ 8400 batches | train loss 0.3537367 +| epoch 4 | 8355/ 8400 batches | train loss 0.4389447 +| epoch 4 | 8359/ 8400 batches | train loss 0.5440845 +| epoch 4 | 8363/ 8400 batches | train loss 0.3454401 +| epoch 4 | 8367/ 8400 batches | train loss 0.3314352 +| epoch 4 | 8371/ 8400 batches | train loss 0.3970507 +| epoch 4 | 8375/ 8400 batches | train loss 0.4675626 +| epoch 4 | 8379/ 8400 batches | train loss 0.4102191 +| epoch 4 | 8383/ 8400 batches | train loss 0.4070360 +| epoch 4 | 8387/ 8400 batches | train loss 0.3846892 +| epoch 4 | 8391/ 8400 batches | train loss 0.4214908 +| epoch 4 | 8395/ 8400 batches | train loss 0.3766348 +| epoch 4 | 8399/ 8400 batches | train loss 0.3952793 +-------------------------------------------------------------------------------- +| epoch 4 | 3/ 8400 batches | test loss 0.4743121 +| epoch 4 | 7/ 8400 batches | test loss 0.3618924 +| epoch 4 | 11/ 8400 batches | test loss 0.3597663 +| epoch 4 | 15/ 8400 batches | test loss 0.4250250 +| epoch 4 | 19/ 8400 batches | test loss 0.3454697 +| epoch 4 | 23/ 8400 batches | test loss 0.4309090 +| epoch 4 | 27/ 8400 batches | test loss 0.3950347 +| epoch 4 | 31/ 8400 batches | test loss 0.3649420 +| epoch 4 | 35/ 8400 batches | test loss 0.4443736 +| epoch 4 | 39/ 8400 batches | test loss 0.3616454 +| epoch 4 | 43/ 8400 batches | test loss 0.4934763 +| epoch 4 | 47/ 8400 batches | test loss 0.4125146 +| epoch 4 | 51/ 8400 batches | test loss 0.5445424 +| epoch 4 | 55/ 8400 batches | test loss 0.3841010 +| epoch 4 | 59/ 8400 batches | test loss 0.3781459 +| epoch 4 | 63/ 8400 batches | test loss 0.4365913 +| epoch 4 | 67/ 8400 batches | test loss 0.5173657 +| epoch 4 | 71/ 8400 batches | test loss 0.3242480 +| epoch 4 | 75/ 8400 batches | test loss 0.3989249 +| epoch 4 | 79/ 8400 batches | test loss 0.3919530 +| epoch 4 | 83/ 8400 batches | test loss 0.4671268 +| epoch 4 | 87/ 8400 batches | test loss 0.4490637 +| epoch 4 | 91/ 8400 batches | test loss 0.4407180 +| epoch 4 | 95/ 8400 batches | test loss 0.4817542 +| epoch 4 | 99/ 8400 batches | test loss 0.4233415 +| epoch 4 | 103/ 8400 batches | test loss 0.3655780 +| epoch 4 | 107/ 8400 batches | test loss 0.3345574 +| epoch 4 | 111/ 8400 batches | test loss 0.3832757 +| epoch 4 | 115/ 8400 batches | test loss 0.3226737 +| epoch 4 | 119/ 8400 batches | test loss 0.4490253 +| epoch 4 | 123/ 8400 batches | test loss 0.3860359 +| epoch 4 | 127/ 8400 batches | test loss 0.4108231 +| epoch 4 | 131/ 8400 batches | test loss 0.4425254 +| epoch 4 | 135/ 8400 batches | test loss 0.4273176 +| epoch 4 | 139/ 8400 batches | test loss 0.3885434 +| epoch 4 | 143/ 8400 batches | test loss 0.3485143 +| epoch 4 | 147/ 8400 batches | test loss 0.4158696 +| epoch 4 | 151/ 8400 batches | test loss 0.4220117 +| epoch 4 | 155/ 8400 batches | test loss 0.4331647 +| epoch 4 | 159/ 8400 batches | test loss 0.4925877 +| epoch 4 | 163/ 8400 batches | test loss 0.4025922 +| epoch 4 | 167/ 8400 batches | test loss 0.1688333 +| epoch 4 | 171/ 8400 batches | test loss 0.4584802 +| epoch 4 | 175/ 8400 batches | test loss 0.3319588 +| epoch 4 | 179/ 8400 batches | test loss 0.4622829 +| epoch 4 | 183/ 8400 batches | test loss 0.4306253 +| epoch 4 | 187/ 8400 batches | test loss 0.4365794 +| epoch 4 | 191/ 8400 batches | test loss 0.4825446 +| epoch 4 | 195/ 8400 batches | test loss 0.3879740 +| epoch 4 | 199/ 8400 batches | test loss 0.4243907 +| epoch 4 | 203/ 8400 batches | test loss 0.4131365 +| epoch 4 | 207/ 8400 batches | test loss 0.3560795 +| epoch 4 | 211/ 8400 batches | test loss 0.4256191 +| epoch 4 | 215/ 8400 batches | test loss 0.5082688 +| epoch 4 | 219/ 8400 batches | test loss 0.3753309 +| epoch 4 | 223/ 8400 batches | test loss 0.3753574 +| epoch 4 | 227/ 8400 batches | test loss 0.3407509 +| epoch 4 | 231/ 8400 batches | test loss 0.4613874 +| epoch 4 | 235/ 8400 batches | test loss 0.3255495 +| epoch 4 | 239/ 8400 batches | test loss 0.3023110 +| epoch 4 | 243/ 8400 batches | test loss 0.4286359 +| epoch 4 | 247/ 8400 batches | test loss 0.3606327 +| epoch 4 | 251/ 8400 batches | test loss 0.4230706 +| epoch 4 | 255/ 8400 batches | test loss 0.4570451 +| epoch 4 | 259/ 8400 batches | test loss 0.4295763 +| epoch 4 | 263/ 8400 batches | test loss 0.4079153 +| epoch 4 | 267/ 8400 batches | test loss 0.4768990 +| epoch 4 | 271/ 8400 batches | test loss 0.4870041 +| epoch 4 | 275/ 8400 batches | test loss 0.4309321 +| epoch 4 | 279/ 8400 batches | test loss 0.4140615 +| epoch 4 | 283/ 8400 batches | test loss 0.4295279 +| epoch 4 | 287/ 8400 batches | test loss 0.4431064 +| epoch 4 | 291/ 8400 batches | test loss 0.4048661 +| epoch 4 | 295/ 8400 batches | test loss 0.4440917 +| epoch 4 | 299/ 8400 batches | test loss 0.4265057 +| epoch 4 | 303/ 8400 batches | test loss 0.4318252 +| epoch 4 | 307/ 8400 batches | test loss 0.4862336 +| epoch 4 | 311/ 8400 batches | test loss 0.4314035 +| epoch 4 | 315/ 8400 batches | test loss 0.5082780 +| epoch 4 | 319/ 8400 batches | test loss 0.3554455 +| epoch 4 | 323/ 8400 batches | test loss 0.4472879 +| epoch 4 | 327/ 8400 batches | test loss 0.3796255 +| epoch 4 | 331/ 8400 batches | test loss 0.4213082 +| epoch 4 | 335/ 8400 batches | test loss 0.2946593 +| epoch 4 | 339/ 8400 batches | test loss 0.3869607 +| epoch 4 | 343/ 8400 batches | test loss 0.7652704 +| epoch 4 | 347/ 8400 batches | test loss 0.4248450 +| epoch 4 | 351/ 8400 batches | test loss 0.3837627 +| epoch 4 | 355/ 8400 batches | test loss 0.4282422 +| epoch 4 | 359/ 8400 batches | test loss 0.4253049 +| epoch 4 | 363/ 8400 batches | test loss 0.4226103 +| epoch 4 | 367/ 8400 batches | test loss 0.4178099 +| epoch 4 | 371/ 8400 batches | test loss 0.3791002 +| epoch 4 | 375/ 8400 batches | test loss 0.5065224 +| epoch 4 | 379/ 8400 batches | test loss 0.4285427 +| epoch 4 | 383/ 8400 batches | test loss 0.4279349 +| epoch 4 | 387/ 8400 batches | test loss 0.3924105 +| epoch 4 | 391/ 8400 batches | test loss 0.4272199 +| epoch 4 | 395/ 8400 batches | test loss 0.4283170 +| epoch 4 | 399/ 8400 batches | test loss 0.3599080 +| epoch 4 | 403/ 8400 batches | test loss 0.4158924 +| epoch 4 | 407/ 8400 batches | test loss 0.3851529 +| epoch 4 | 411/ 8400 batches | test loss 0.3672324 +| epoch 4 | 415/ 8400 batches | test loss 0.4645123 +| epoch 4 | 419/ 8400 batches | test loss 0.3931932 +| epoch 4 | 423/ 8400 batches | test loss 0.4016221 +| epoch 4 | 427/ 8400 batches | test loss 0.4506420 +| epoch 4 | 431/ 8400 batches | test loss 0.3410391 +| epoch 4 | 435/ 8400 batches | test loss 0.3948377 +| epoch 4 | 439/ 8400 batches | test loss 0.4717552 +| epoch 4 | 443/ 8400 batches | test loss 0.4214056 +| epoch 4 | 447/ 8400 batches | test loss 0.3874180 +| epoch 4 | 451/ 8400 batches | test loss 0.4397950 +| epoch 4 | 455/ 8400 batches | test loss 0.4059349 +| epoch 4 | 459/ 8400 batches | test loss 0.4562779 +| epoch 4 | 463/ 8400 batches | test loss 0.4507553 +| epoch 4 | 467/ 8400 batches | test loss 0.3931249 +| epoch 4 | 471/ 8400 batches | test loss 0.4130830 +| epoch 4 | 475/ 8400 batches | test loss 0.4149294 +| epoch 4 | 479/ 8400 batches | test loss 0.3647839 +| epoch 4 | 483/ 8400 batches | test loss 0.4231834 +| epoch 4 | 487/ 8400 batches | test loss 0.4397143 +| epoch 4 | 491/ 8400 batches | test loss 0.4544667 +| epoch 4 | 495/ 8400 batches | test loss 0.3922234 +| epoch 4 | 499/ 8400 batches | test loss 0.4580188 +| epoch 4 | 503/ 8400 batches | test loss 0.3810494 +| epoch 4 | 507/ 8400 batches | test loss 0.4164903 +| epoch 4 | 511/ 8400 batches | test loss 0.4453373 +| epoch 4 | 515/ 8400 batches | test loss 0.4514440 +| epoch 4 | 519/ 8400 batches | test loss 0.4081230 +| epoch 4 | 523/ 8400 batches | test loss 0.4719906 +| epoch 4 | 527/ 8400 batches | test loss 0.4280463 +| epoch 4 | 531/ 8400 batches | test loss 0.4252947 +| epoch 4 | 535/ 8400 batches | test loss 0.3856783 +| epoch 4 | 539/ 8400 batches | test loss 0.4586048 +| epoch 4 | 543/ 8400 batches | test loss 0.4182649 +| epoch 4 | 547/ 8400 batches | test loss 0.3627877 +| epoch 4 | 551/ 8400 batches | test loss 0.4097933 +| epoch 4 | 555/ 8400 batches | test loss 0.4101835 +| epoch 4 | 559/ 8400 batches | test loss 0.3883981 +| epoch 4 | 563/ 8400 batches | test loss 0.4053096 +| epoch 4 | 567/ 8400 batches | test loss 0.3807225 +| epoch 4 | 571/ 8400 batches | test loss 0.4755501 +| epoch 4 | 575/ 8400 batches | test loss 0.3708287 +| epoch 4 | 579/ 8400 batches | test loss 0.3790591 +| epoch 4 | 583/ 8400 batches | test loss 0.4440338 +| epoch 4 | 587/ 8400 batches | test loss 0.4682338 +| epoch 4 | 591/ 8400 batches | test loss 0.4128748 +| epoch 4 | 595/ 8400 batches | test loss 0.4043680 +| epoch 4 | 599/ 8400 batches | test loss 0.4218825 +| epoch 4 | 603/ 8400 batches | test loss 0.3236554 +| epoch 4 | 607/ 8400 batches | test loss 0.4078383 +| epoch 4 | 611/ 8400 batches | test loss 0.4098239 +| epoch 4 | 615/ 8400 batches | test loss 0.5217125 +| epoch 4 | 619/ 8400 batches | test loss 0.3601586 +| epoch 4 | 623/ 8400 batches | test loss 0.3881283 +| epoch 4 | 627/ 8400 batches | test loss 0.4265016 +| epoch 4 | 631/ 8400 batches | test loss 0.3864636 +| epoch 4 | 635/ 8400 batches | test loss 0.3640418 +| epoch 4 | 639/ 8400 batches | test loss 0.4267485 +| epoch 4 | 643/ 8400 batches | test loss 0.4154445 +| epoch 4 | 647/ 8400 batches | test loss 0.4468075 +| epoch 4 | 651/ 8400 batches | test loss 0.3627956 +| epoch 4 | 655/ 8400 batches | test loss 0.4293336 +| epoch 4 | 659/ 8400 batches | test loss 0.4050271 +| epoch 4 | 663/ 8400 batches | test loss 0.3676324 +| epoch 4 | 667/ 8400 batches | test loss 0.4729365 +| epoch 4 | 671/ 8400 batches | test loss 0.3993573 +| epoch 4 | 675/ 8400 batches | test loss 0.4794633 +| epoch 4 | 679/ 8400 batches | test loss 0.4096234 +| epoch 4 | 683/ 8400 batches | test loss 0.4063741 +| epoch 4 | 687/ 8400 batches | test loss 0.4249203 +| epoch 4 | 691/ 8400 batches | test loss 0.4305911 +| epoch 4 | 695/ 8400 batches | test loss 0.4267085 +| epoch 4 | 699/ 8400 batches | test loss 0.3810694 +| epoch 4 | 703/ 8400 batches | test loss 0.3857891 +| epoch 4 | 707/ 8400 batches | test loss 0.4775327 +| epoch 4 | 711/ 8400 batches | test loss 0.3954768 +| epoch 4 | 715/ 8400 batches | test loss 0.4337752 +| epoch 4 | 719/ 8400 batches | test loss 0.3347034 +| epoch 4 | 723/ 8400 batches | test loss 0.3705322 +| epoch 4 | 727/ 8400 batches | test loss 0.4807537 +| epoch 4 | 731/ 8400 batches | test loss 0.4244482 +| epoch 4 | 735/ 8400 batches | test loss 0.4322184 +| epoch 4 | 739/ 8400 batches | test loss 0.3746952 +| epoch 4 | 743/ 8400 batches | test loss 0.4667453 +| epoch 4 | 747/ 8400 batches | test loss 0.4012856 +| epoch 4 | 751/ 8400 batches | test loss 0.4394665 +| epoch 4 | 755/ 8400 batches | test loss 0.4016795 +| epoch 4 | 759/ 8400 batches | test loss 0.4382364 +| epoch 4 | 763/ 8400 batches | test loss 0.3641967 +| epoch 4 | 767/ 8400 batches | test loss 0.4240788 +| epoch 4 | 771/ 8400 batches | test loss 0.4011423 +| epoch 4 | 775/ 8400 batches | test loss 0.4966514 +| epoch 4 | 779/ 8400 batches | test loss 0.4538183 +| epoch 4 | 783/ 8400 batches | test loss 0.4030647 +| epoch 4 | 787/ 8400 batches | test loss 0.4613509 +| epoch 4 | 791/ 8400 batches | test loss 0.4808378 +| epoch 4 | 795/ 8400 batches | test loss 0.3529318 +| epoch 4 | 799/ 8400 batches | test loss 0.4746317 +| epoch 4 | 803/ 8400 batches | test loss 0.4054451 +| epoch 4 | 807/ 8400 batches | test loss 0.4115808 +| epoch 4 | 811/ 8400 batches | test loss 0.4269734 +| epoch 4 | 815/ 8400 batches | test loss 0.4326250 +| epoch 4 | 819/ 8400 batches | test loss 0.3776896 +| epoch 4 | 823/ 8400 batches | test loss 0.4227804 +| epoch 4 | 827/ 8400 batches | test loss 0.5061905 +| epoch 4 | 831/ 8400 batches | test loss 0.4440944 +| epoch 4 | 835/ 8400 batches | test loss 0.4288796 +| epoch 4 | 839/ 8400 batches | test loss 0.4025726 +| epoch 4 | 843/ 8400 batches | test loss 0.3471934 +| epoch 4 | 847/ 8400 batches | test loss 0.4112601 +| epoch 4 | 851/ 8400 batches | test loss 0.4424783 +| epoch 4 | 855/ 8400 batches | test loss 0.4360176 +| epoch 4 | 859/ 8400 batches | test loss 0.4041480 +| epoch 4 | 863/ 8400 batches | test loss 0.4341534 +| epoch 4 | 867/ 8400 batches | test loss 0.3804078 +| epoch 4 | 871/ 8400 batches | test loss 0.5066306 +| epoch 4 | 875/ 8400 batches | test loss 0.3861409 +| epoch 4 | 879/ 8400 batches | test loss 0.3628329 +| epoch 4 | 883/ 8400 batches | test loss 0.3637134 +| epoch 4 | 887/ 8400 batches | test loss 0.4797557 +| epoch 4 | 891/ 8400 batches | test loss 0.5387821 +| epoch 4 | 895/ 8400 batches | test loss 0.3928863 +| epoch 4 | 899/ 8400 batches | test loss 0.4564459 +| epoch 4 | 903/ 8400 batches | test loss 0.4417565 +| epoch 4 | 907/ 8400 batches | test loss 0.4468854 +| epoch 4 | 911/ 8400 batches | test loss 0.4678197 +| epoch 4 | 915/ 8400 batches | test loss 0.3882279 +| epoch 4 | 919/ 8400 batches | test loss 0.3502167 +| epoch 4 | 923/ 8400 batches | test loss 0.3589739 +| epoch 4 | 927/ 8400 batches | test loss 0.4414955 +| epoch 4 | 931/ 8400 batches | test loss 0.3983770 +| epoch 4 | 935/ 8400 batches | test loss 0.4673961 +| epoch 4 | 939/ 8400 batches | test loss 0.3881847 +| epoch 4 | 943/ 8400 batches | test loss 0.4121384 +| epoch 4 | 947/ 8400 batches | test loss 0.4554228 +| epoch 4 | 951/ 8400 batches | test loss 0.1746236 +| epoch 4 | 955/ 8400 batches | test loss 0.4151510 +| epoch 4 | 959/ 8400 batches | test loss 0.4327608 +| epoch 4 | 963/ 8400 batches | test loss 0.3398077 +| epoch 4 | 967/ 8400 batches | test loss 0.4367116 +| epoch 4 | 971/ 8400 batches | test loss 0.5362207 +| epoch 4 | 975/ 8400 batches | test loss 0.3845855 +| epoch 4 | 979/ 8400 batches | test loss 0.4189120 +| epoch 4 | 983/ 8400 batches | test loss 0.4666391 +| epoch 4 | 987/ 8400 batches | test loss 0.3819749 +| epoch 4 | 991/ 8400 batches | test loss 0.3859978 +| epoch 4 | 995/ 8400 batches | test loss 0.3592962 +| epoch 4 | 999/ 8400 batches | test loss 0.4213415 +| epoch 4 | 1003/ 8400 batches | test loss 0.3881718 +| epoch 4 | 1007/ 8400 batches | test loss 0.5856229 +| epoch 4 | 1011/ 8400 batches | test loss 0.4711406 +| epoch 4 | 1015/ 8400 batches | test loss 0.3489167 +| epoch 4 | 1019/ 8400 batches | test loss 0.3901407 +| epoch 4 | 1023/ 8400 batches | test loss 0.4158467 +| epoch 4 | 1027/ 8400 batches | test loss 0.4922676 +| epoch 4 | 1031/ 8400 batches | test loss 0.4226946 +| epoch 4 | 1035/ 8400 batches | test loss 0.4413824 +| epoch 4 | 1039/ 8400 batches | test loss 0.5440539 +| epoch 4 | 1043/ 8400 batches | test loss 0.3201915 +| epoch 4 | 1047/ 8400 batches | test loss 0.4425289 +| epoch 4 | 1051/ 8400 batches | test loss 0.4245033 +| epoch 4 | 1055/ 8400 batches | test loss 0.4902617 +| epoch 4 | 1059/ 8400 batches | test loss 0.3807878 +| epoch 4 | 1063/ 8400 batches | test loss 0.4061864 +| epoch 4 | 1067/ 8400 batches | test loss 0.3808410 +| epoch 4 | 1071/ 8400 batches | test loss 0.4226532 +| epoch 4 | 1075/ 8400 batches | test loss 0.5145963 +| epoch 4 | 1079/ 8400 batches | test loss 0.3578528 +| epoch 4 | 1083/ 8400 batches | test loss 0.3789351 +| epoch 4 | 1087/ 8400 batches | test loss 0.4252894 +| epoch 4 | 1091/ 8400 batches | test loss 0.4041168 +| epoch 4 | 1095/ 8400 batches | test loss 0.4631607 +| epoch 4 | 1099/ 8400 batches | test loss 0.4577563 +| epoch 4 | 1103/ 8400 batches | test loss 0.3653928 +| epoch 4 | 1107/ 8400 batches | test loss 0.4213618 +| epoch 4 | 1111/ 8400 batches | test loss 0.3888984 +| epoch 4 | 1115/ 8400 batches | test loss 0.3892430 +| epoch 4 | 1119/ 8400 batches | test loss 0.5089613 +| epoch 4 | 1123/ 8400 batches | test loss 0.3482143 +| epoch 4 | 1127/ 8400 batches | test loss 0.3638718 +| epoch 4 | 1131/ 8400 batches | test loss 0.3798802 +| epoch 4 | 1135/ 8400 batches | test loss 0.4357949 +| epoch 4 | 1139/ 8400 batches | test loss 0.4185002 +| epoch 4 | 1143/ 8400 batches | test loss 0.4322827 +| epoch 4 | 1147/ 8400 batches | test loss 0.3882187 +| epoch 4 | 1151/ 8400 batches | test loss 0.3944347 +| epoch 4 | 1155/ 8400 batches | test loss 0.4564420 +| epoch 4 | 1159/ 8400 batches | test loss 0.4430922 +| epoch 4 | 1163/ 8400 batches | test loss 0.4229888 +| epoch 4 | 1167/ 8400 batches | test loss 0.4262359 +| epoch 4 | 1171/ 8400 batches | test loss 0.4570541 +| epoch 4 | 1175/ 8400 batches | test loss 0.5180261 +| epoch 4 | 1179/ 8400 batches | test loss 0.4422157 +| epoch 4 | 1183/ 8400 batches | test loss 0.4608741 +| epoch 4 | 1187/ 8400 batches | test loss 0.3481121 +| epoch 4 | 1191/ 8400 batches | test loss 0.4340169 +| epoch 4 | 1195/ 8400 batches | test loss 0.4632472 +| epoch 4 | 1199/ 8400 batches | test loss 0.4735826 +| epoch 4 | 1203/ 8400 batches | test loss 0.4471625 +| epoch 4 | 1207/ 8400 batches | test loss 0.3537352 +| epoch 4 | 1211/ 8400 batches | test loss 0.4604784 +| epoch 4 | 1215/ 8400 batches | test loss 0.3959960 +| epoch 4 | 1219/ 8400 batches | test loss 0.4251021 +| epoch 4 | 1223/ 8400 batches | test loss 0.4382259 +| epoch 4 | 1227/ 8400 batches | test loss 0.4337378 +| epoch 4 | 1231/ 8400 batches | test loss 0.3551027 +| epoch 4 | 1235/ 8400 batches | test loss 0.4223125 +| epoch 4 | 1239/ 8400 batches | test loss 0.3841708 +| epoch 4 | 1243/ 8400 batches | test loss 0.3726590 +| epoch 4 | 1247/ 8400 batches | test loss 0.4309351 +| epoch 4 | 1251/ 8400 batches | test loss 0.4325226 +| epoch 4 | 1255/ 8400 batches | test loss 0.4489112 +| epoch 4 | 1259/ 8400 batches | test loss 0.4907672 +| epoch 4 | 1263/ 8400 batches | test loss 0.4298137 +| epoch 4 | 1267/ 8400 batches | test loss 0.4345949 +| epoch 4 | 1271/ 8400 batches | test loss 0.3689298 +| epoch 4 | 1275/ 8400 batches | test loss 0.4583735 +| epoch 4 | 1279/ 8400 batches | test loss 0.4115720 +| epoch 4 | 1283/ 8400 batches | test loss 0.4083424 +| epoch 4 | 1287/ 8400 batches | test loss 0.4484675 +| epoch 4 | 1291/ 8400 batches | test loss 0.3644250 +| epoch 4 | 1295/ 8400 batches | test loss 0.4537342 +| epoch 4 | 1299/ 8400 batches | test loss 0.4406969 +| epoch 4 | 1303/ 8400 batches | test loss 0.4117550 +| epoch 4 | 1307/ 8400 batches | test loss 0.4259578 +| epoch 4 | 1311/ 8400 batches | test loss 0.4288213 +| epoch 4 | 1315/ 8400 batches | test loss 0.4645424 +| epoch 4 | 1319/ 8400 batches | test loss 0.4154488 +| epoch 4 | 1323/ 8400 batches | test loss 0.4591379 +| epoch 4 | 1327/ 8400 batches | test loss 0.4448392 +| epoch 4 | 1331/ 8400 batches | test loss 0.5496150 +| epoch 4 | 1335/ 8400 batches | test loss 0.3868004 +| epoch 4 | 1339/ 8400 batches | test loss 0.4360240 +| epoch 4 | 1343/ 8400 batches | test loss 0.3699277 +| epoch 4 | 1347/ 8400 batches | test loss 0.5244849 +| epoch 4 | 1351/ 8400 batches | test loss 0.4458991 +| epoch 4 | 1355/ 8400 batches | test loss 0.4895615 +| epoch 4 | 1359/ 8400 batches | test loss 0.3991781 +| epoch 4 | 1363/ 8400 batches | test loss 0.4526729 +| epoch 4 | 1367/ 8400 batches | test loss 0.4024581 +| epoch 4 | 1371/ 8400 batches | test loss 0.4589900 +| epoch 4 | 1375/ 8400 batches | test loss 0.4640874 +| epoch 4 | 1379/ 8400 batches | test loss 0.4340087 +| epoch 4 | 1383/ 8400 batches | test loss 0.4460052 +| epoch 4 | 1387/ 8400 batches | test loss 0.4067905 +| epoch 4 | 1391/ 8400 batches | test loss 0.3697380 +| epoch 4 | 1395/ 8400 batches | test loss 0.4478863 +| epoch 4 | 1399/ 8400 batches | test loss 0.4760968 +| epoch 4 | 1403/ 8400 batches | test loss 0.5029222 +| epoch 4 | 1407/ 8400 batches | test loss 0.3260728 +| epoch 4 | 1411/ 8400 batches | test loss 0.4429572 +| epoch 4 | 1415/ 8400 batches | test loss 0.5172363 +| epoch 4 | 1419/ 8400 batches | test loss 0.4323608 +| epoch 4 | 1423/ 8400 batches | test loss 0.4327135 +| epoch 4 | 1427/ 8400 batches | test loss 0.4816955 +| epoch 4 | 1431/ 8400 batches | test loss 0.4272477 +| epoch 4 | 1435/ 8400 batches | test loss 0.4791141 +| epoch 4 | 1439/ 8400 batches | test loss 0.3939380 +| epoch 4 | 1443/ 8400 batches | test loss 0.4745894 +| epoch 4 | 1447/ 8400 batches | test loss 0.3084844 +| epoch 4 | 1451/ 8400 batches | test loss 0.3899185 +| epoch 4 | 1455/ 8400 batches | test loss 0.4102940 +| epoch 4 | 1459/ 8400 batches | test loss 0.4515971 +| epoch 4 | 1463/ 8400 batches | test loss 0.3801450 +| epoch 4 | 1467/ 8400 batches | test loss 0.4272662 +| epoch 4 | 1471/ 8400 batches | test loss 0.4159067 +| epoch 4 | 1475/ 8400 batches | test loss 0.4039572 +| epoch 4 | 1479/ 8400 batches | test loss 0.5221621 +| epoch 4 | 1483/ 8400 batches | test loss 0.3793230 +| epoch 4 | 1487/ 8400 batches | test loss 0.4736823 +| epoch 4 | 1491/ 8400 batches | test loss 0.2524937 +| epoch 4 | 1495/ 8400 batches | test loss 0.4087225 +| epoch 4 | 1499/ 8400 batches | test loss 0.4882197 +| epoch 4 | 1503/ 8400 batches | test loss 0.4871483 +| epoch 4 | 1507/ 8400 batches | test loss 0.4337133 +| epoch 4 | 1511/ 8400 batches | test loss 0.5110766 +| epoch 4 | 1515/ 8400 batches | test loss 0.3827079 +| epoch 4 | 1519/ 8400 batches | test loss 0.3874223 +| epoch 4 | 1523/ 8400 batches | test loss 0.3731114 +| epoch 4 | 1527/ 8400 batches | test loss 0.4086728 +| epoch 4 | 1531/ 8400 batches | test loss 0.3794537 +| epoch 4 | 1535/ 8400 batches | test loss 0.5045905 +| epoch 4 | 1539/ 8400 batches | test loss 0.4461818 +| epoch 4 | 1543/ 8400 batches | test loss 0.3905924 +| epoch 4 | 1547/ 8400 batches | test loss 0.4907235 +| epoch 4 | 1551/ 8400 batches | test loss 0.2804478 +| epoch 4 | 1555/ 8400 batches | test loss 0.4388576 +| epoch 4 | 1559/ 8400 batches | test loss 0.4796154 +| epoch 4 | 1563/ 8400 batches | test loss 0.4032587 +| epoch 4 | 1567/ 8400 batches | test loss 0.3816071 +| epoch 4 | 1571/ 8400 batches | test loss 0.3283512 +| epoch 4 | 1575/ 8400 batches | test loss 0.3174985 +| epoch 4 | 1579/ 8400 batches | test loss 0.4130946 +| epoch 4 | 1583/ 8400 batches | test loss 0.5309095 +| epoch 4 | 1587/ 8400 batches | test loss 0.1670037 +| epoch 4 | 1591/ 8400 batches | test loss 0.4175571 +| epoch 4 | 1595/ 8400 batches | test loss 0.4413784 +| epoch 4 | 1599/ 8400 batches | test loss 0.3858364 +| epoch 4 | 1603/ 8400 batches | test loss 0.4350178 +| epoch 4 | 1607/ 8400 batches | test loss 0.4641505 +| epoch 4 | 1611/ 8400 batches | test loss 0.3643676 +| epoch 4 | 1615/ 8400 batches | test loss 0.4407665 +| epoch 4 | 1619/ 8400 batches | test loss 0.4931530 +| epoch 4 | 1623/ 8400 batches | test loss 0.4446766 +| epoch 4 | 1627/ 8400 batches | test loss 0.4683977 +| epoch 4 | 1631/ 8400 batches | test loss 0.3497923 +| epoch 4 | 1635/ 8400 batches | test loss 0.3959648 +| epoch 4 | 1639/ 8400 batches | test loss 0.4300017 +| epoch 4 | 1643/ 8400 batches | test loss 0.3709043 +| epoch 4 | 1647/ 8400 batches | test loss 0.4358930 +| epoch 4 | 1651/ 8400 batches | test loss 0.5725192 +| epoch 4 | 1655/ 8400 batches | test loss 0.3514874 +| epoch 4 | 1659/ 8400 batches | test loss 0.3610305 +| epoch 4 | 1663/ 8400 batches | test loss 0.4838395 +| epoch 4 | 1667/ 8400 batches | test loss 0.3996806 +| epoch 4 | 1671/ 8400 batches | test loss 0.3700918 +| epoch 4 | 1675/ 8400 batches | test loss 0.4081454 +| epoch 4 | 1679/ 8400 batches | test loss 0.4404875 +| epoch 4 | 1683/ 8400 batches | test loss 0.3445091 +| epoch 4 | 1687/ 8400 batches | test loss 0.4495679 +| epoch 4 | 1691/ 8400 batches | test loss 0.4330311 +| epoch 4 | 1695/ 8400 batches | test loss 0.3486100 +| epoch 4 | 1699/ 8400 batches | test loss 0.4215014 +| epoch 4 | 1703/ 8400 batches | test loss 0.4546432 +| epoch 4 | 1707/ 8400 batches | test loss 0.4680125 +| epoch 4 | 1711/ 8400 batches | test loss 0.2522843 +| epoch 4 | 1715/ 8400 batches | test loss 0.4034307 +| epoch 4 | 1719/ 8400 batches | test loss 0.4249162 +| epoch 4 | 1723/ 8400 batches | test loss 0.4531586 +| epoch 4 | 1727/ 8400 batches | test loss 0.4194748 +| epoch 4 | 1731/ 8400 batches | test loss 0.3682569 +| epoch 4 | 1735/ 8400 batches | test loss 0.4510885 +| epoch 4 | 1739/ 8400 batches | test loss 0.4029561 +| epoch 4 | 1743/ 8400 batches | test loss 0.4105034 +| epoch 4 | 1747/ 8400 batches | test loss 0.3787745 +| epoch 4 | 1751/ 8400 batches | test loss 0.3887992 +| epoch 4 | 1755/ 8400 batches | test loss 0.3916527 +| epoch 4 | 1759/ 8400 batches | test loss 0.3961721 +| epoch 4 | 1763/ 8400 batches | test loss 0.4259644 +| epoch 4 | 1767/ 8400 batches | test loss 0.3838112 +| epoch 4 | 1771/ 8400 batches | test loss 0.4896010 +| epoch 4 | 1775/ 8400 batches | test loss 0.4308844 +| epoch 4 | 1779/ 8400 batches | test loss 0.4545951 +| epoch 4 | 1783/ 8400 batches | test loss 0.4544398 +| epoch 4 | 1787/ 8400 batches | test loss 0.4171709 +| epoch 4 | 1791/ 8400 batches | test loss 0.3851979 +| epoch 4 | 1795/ 8400 batches | test loss 0.3316941 +| epoch 4 | 1799/ 8400 batches | test loss 0.4224656 +| epoch 4 | 1803/ 8400 batches | test loss 0.4962525 +| epoch 4 | 1807/ 8400 batches | test loss 0.4670775 +| epoch 4 | 1811/ 8400 batches | test loss 0.3492733 +| epoch 4 | 1815/ 8400 batches | test loss 0.3518295 +| epoch 4 | 1819/ 8400 batches | test loss 0.4213030 +| epoch 4 | 1823/ 8400 batches | test loss 0.3425536 +| epoch 4 | 1827/ 8400 batches | test loss 0.4942740 +| epoch 4 | 1831/ 8400 batches | test loss 0.4219471 +| epoch 4 | 1835/ 8400 batches | test loss 0.4173957 +| epoch 4 | 1839/ 8400 batches | test loss 0.4248302 +| epoch 4 | 1843/ 8400 batches | test loss 0.4073019 +| epoch 4 | 1847/ 8400 batches | test loss 0.4412723 +| epoch 4 | 1851/ 8400 batches | test loss 0.4061756 +| epoch 4 | 1855/ 8400 batches | test loss 0.3674608 +| epoch 4 | 1859/ 8400 batches | test loss 0.4577961 +| epoch 4 | 1863/ 8400 batches | test loss 0.4999748 +| epoch 4 | 1867/ 8400 batches | test loss 0.4406660 +| epoch 4 | 1871/ 8400 batches | test loss 0.1747372 +| epoch 4 | 1875/ 8400 batches | test loss 0.4643570 +| epoch 4 | 1879/ 8400 batches | test loss 0.4024564 +| epoch 4 | 1883/ 8400 batches | test loss 0.4717318 +| epoch 4 | 1887/ 8400 batches | test loss 0.4658672 +| epoch 4 | 1891/ 8400 batches | test loss 0.3634521 +| epoch 4 | 1895/ 8400 batches | test loss 0.4084986 +| epoch 4 | 1899/ 8400 batches | test loss 0.3750493 +| epoch 4 | 1903/ 8400 batches | test loss 0.4741834 +| epoch 4 | 1907/ 8400 batches | test loss 0.3995007 +| epoch 4 | 1911/ 8400 batches | test loss 0.3353386 +| epoch 4 | 1915/ 8400 batches | test loss 0.4082854 +| epoch 4 | 1919/ 8400 batches | test loss 0.3914543 +| epoch 4 | 1923/ 8400 batches | test loss 0.3639150 +| epoch 4 | 1927/ 8400 batches | test loss 0.4659807 +| epoch 4 | 1931/ 8400 batches | test loss 0.4172004 +| epoch 4 | 1935/ 8400 batches | test loss 0.3808240 +| epoch 4 | 1939/ 8400 batches | test loss 0.4855450 +| epoch 4 | 1943/ 8400 batches | test loss 0.3630481 +| epoch 4 | 1947/ 8400 batches | test loss 0.4291651 +| epoch 4 | 1951/ 8400 batches | test loss 0.4695132 +| epoch 4 | 1955/ 8400 batches | test loss 0.4671665 +| epoch 4 | 1959/ 8400 batches | test loss 0.4274780 +| epoch 4 | 1963/ 8400 batches | test loss 0.4371148 +| epoch 4 | 1967/ 8400 batches | test loss 0.3656789 +| epoch 4 | 1971/ 8400 batches | test loss 0.3902307 +| epoch 4 | 1975/ 8400 batches | test loss 0.5127717 +| epoch 4 | 1979/ 8400 batches | test loss 0.4495652 +| epoch 4 | 1983/ 8400 batches | test loss 0.5218740 +| epoch 4 | 1987/ 8400 batches | test loss 0.4318603 +| epoch 4 | 1991/ 8400 batches | test loss 0.3561161 +| epoch 4 | 1995/ 8400 batches | test loss 0.4356321 +| epoch 4 | 1999/ 8400 batches | test loss 0.4113271 +| epoch 4 | 2003/ 8400 batches | test loss 0.3714447 +| epoch 4 | 2007/ 8400 batches | test loss 0.3790269 +| epoch 4 | 2011/ 8400 batches | test loss 0.4477465 +| epoch 4 | 2015/ 8400 batches | test loss 0.4078648 +| epoch 4 | 2019/ 8400 batches | test loss 0.3732950 +| epoch 4 | 2023/ 8400 batches | test loss 0.4264187 +| epoch 4 | 2027/ 8400 batches | test loss 0.3687637 +| epoch 4 | 2031/ 8400 batches | test loss 0.4033829 +| epoch 4 | 2035/ 8400 batches | test loss 0.4026903 +| epoch 4 | 2039/ 8400 batches | test loss 0.4907670 +| epoch 4 | 2043/ 8400 batches | test loss 0.3248029 +| epoch 4 | 2047/ 8400 batches | test loss 0.3690838 +| epoch 4 | 2051/ 8400 batches | test loss 0.3987488 +| epoch 4 | 2055/ 8400 batches | test loss 0.4122690 +| epoch 4 | 2059/ 8400 batches | test loss 0.4009894 +| epoch 4 | 2063/ 8400 batches | test loss 0.5057179 +| epoch 4 | 2067/ 8400 batches | test loss 0.4348180 +| epoch 4 | 2071/ 8400 batches | test loss 0.4084104 +| epoch 4 | 2075/ 8400 batches | test loss 0.4439535 +| epoch 4 | 2079/ 8400 batches | test loss 0.3514746 +| epoch 4 | 2083/ 8400 batches | test loss 0.4899396 +| epoch 4 | 2087/ 8400 batches | test loss 0.4294316 +| epoch 4 | 2091/ 8400 batches | test loss 0.4193639 +| epoch 4 | 2095/ 8400 batches | test loss 0.4111037 +| epoch 4 | 2099/ 8400 batches | test loss 0.4504327 +| epoch 4 | final test loss 0.4226, save model! +-------------------------------------------------------------------------------- +| epoch 5 | 3/ 8400 batches | train loss 0.3593824 +| epoch 5 | 7/ 8400 batches | train loss 0.3400049 +| epoch 5 | 11/ 8400 batches | train loss 0.4191275 +| epoch 5 | 15/ 8400 batches | train loss 0.4087319 +| epoch 5 | 19/ 8400 batches | train loss 0.3890781 +| epoch 5 | 23/ 8400 batches | train loss 0.4334758 +| epoch 5 | 27/ 8400 batches | train loss 0.3937730 +| epoch 5 | 31/ 8400 batches | train loss 0.4823118 +| epoch 5 | 35/ 8400 batches | train loss 0.4076465 +| epoch 5 | 39/ 8400 batches | train loss 0.3955448 +| epoch 5 | 43/ 8400 batches | train loss 0.4021681 +| epoch 5 | 47/ 8400 batches | train loss 0.3367633 +| epoch 5 | 51/ 8400 batches | train loss 0.2484502 +| epoch 5 | 55/ 8400 batches | train loss 0.3897374 +| epoch 5 | 59/ 8400 batches | train loss 0.3434918 +| epoch 5 | 63/ 8400 batches | train loss 0.4659744 +| epoch 5 | 67/ 8400 batches | train loss 0.4064997 +| epoch 5 | 71/ 8400 batches | train loss 0.4501799 +| epoch 5 | 75/ 8400 batches | train loss 0.4237170 +| epoch 5 | 79/ 8400 batches | train loss 0.4238800 +| epoch 5 | 83/ 8400 batches | train loss 0.3838960 +| epoch 5 | 87/ 8400 batches | train loss 0.3688886 +| epoch 5 | 91/ 8400 batches | train loss 0.3415506 +| epoch 5 | 95/ 8400 batches | train loss 0.3473782 +| epoch 5 | 99/ 8400 batches | train loss 0.3990577 +| epoch 5 | 103/ 8400 batches | train loss 0.4214807 +| epoch 5 | 107/ 8400 batches | train loss 0.3555223 +| epoch 5 | 111/ 8400 batches | train loss 0.3655391 +| epoch 5 | 115/ 8400 batches | train loss 0.3642768 +| epoch 5 | 119/ 8400 batches | train loss 0.3671960 +| epoch 5 | 123/ 8400 batches | train loss 0.4283023 +| epoch 5 | 127/ 8400 batches | train loss 0.3888837 +| epoch 5 | 131/ 8400 batches | train loss 0.3588775 +| epoch 5 | 135/ 8400 batches | train loss 0.4198115 +| epoch 5 | 139/ 8400 batches | train loss 0.3418237 +| epoch 5 | 143/ 8400 batches | train loss 0.3401414 +| epoch 5 | 147/ 8400 batches | train loss 0.4536959 +| epoch 5 | 151/ 8400 batches | train loss 0.3779819 +| epoch 5 | 155/ 8400 batches | train loss 0.3367822 +| epoch 5 | 159/ 8400 batches | train loss 0.4279543 +| epoch 5 | 163/ 8400 batches | train loss 0.3619952 +| epoch 5 | 167/ 8400 batches | train loss 0.3581897 +| epoch 5 | 171/ 8400 batches | train loss 0.4143561 +| epoch 5 | 175/ 8400 batches | train loss 0.4352861 +| epoch 5 | 179/ 8400 batches | train loss 0.3840103 +| epoch 5 | 183/ 8400 batches | train loss 0.3807190 +| epoch 5 | 187/ 8400 batches | train loss 0.5020622 +| epoch 5 | 191/ 8400 batches | train loss 0.3839292 +| epoch 5 | 195/ 8400 batches | train loss 0.2516713 +| epoch 5 | 199/ 8400 batches | train loss 0.4072908 +| epoch 5 | 203/ 8400 batches | train loss 0.3242333 +| epoch 5 | 207/ 8400 batches | train loss 0.3335330 +| epoch 5 | 211/ 8400 batches | train loss 0.3925023 +| epoch 5 | 215/ 8400 batches | train loss 0.4370056 +| epoch 5 | 219/ 8400 batches | train loss 0.5191630 +| epoch 5 | 223/ 8400 batches | train loss 0.4340358 +| epoch 5 | 227/ 8400 batches | train loss 0.4090252 +| epoch 5 | 231/ 8400 batches | train loss 0.4141694 +| epoch 5 | 235/ 8400 batches | train loss 0.4680394 +| epoch 5 | 239/ 8400 batches | train loss 0.3696490 +| epoch 5 | 243/ 8400 batches | train loss 0.3841461 +| epoch 5 | 247/ 8400 batches | train loss 0.4299930 +| epoch 5 | 251/ 8400 batches | train loss 0.4066490 +| epoch 5 | 255/ 8400 batches | train loss 0.4171299 +| epoch 5 | 259/ 8400 batches | train loss 0.3778256 +| epoch 5 | 263/ 8400 batches | train loss 0.4120997 +| epoch 5 | 267/ 8400 batches | train loss 0.3572839 +| epoch 5 | 271/ 8400 batches | train loss 0.2928717 +| epoch 5 | 275/ 8400 batches | train loss 0.3334068 +| epoch 5 | 279/ 8400 batches | train loss 0.4121814 +| epoch 5 | 283/ 8400 batches | train loss 0.4662732 +| epoch 5 | 287/ 8400 batches | train loss 0.2976893 +| epoch 5 | 291/ 8400 batches | train loss 0.4038708 +| epoch 5 | 295/ 8400 batches | train loss 0.4702094 +| epoch 5 | 299/ 8400 batches | train loss 0.3823338 +| epoch 5 | 303/ 8400 batches | train loss 0.3875082 +| epoch 5 | 307/ 8400 batches | train loss 0.3721317 +| epoch 5 | 311/ 8400 batches | train loss 0.3850029 +| epoch 5 | 315/ 8400 batches | train loss 0.4448440 +| epoch 5 | 319/ 8400 batches | train loss 0.4136206 +| epoch 5 | 323/ 8400 batches | train loss 0.3547206 +| epoch 5 | 327/ 8400 batches | train loss 0.4152985 +| epoch 5 | 331/ 8400 batches | train loss 0.3922695 +| epoch 5 | 335/ 8400 batches | train loss 0.4582824 +| epoch 5 | 339/ 8400 batches | train loss 0.3994898 +| epoch 5 | 343/ 8400 batches | train loss 0.4151088 +| epoch 5 | 347/ 8400 batches | train loss 0.3971760 +| epoch 5 | 351/ 8400 batches | train loss 0.3340292 +| epoch 5 | 355/ 8400 batches | train loss 0.3375213 +| epoch 5 | 359/ 8400 batches | train loss 0.4017391 +| epoch 5 | 363/ 8400 batches | train loss 0.4112670 +| epoch 5 | 367/ 8400 batches | train loss 0.4539675 +| epoch 5 | 371/ 8400 batches | train loss 0.4249952 +| epoch 5 | 375/ 8400 batches | train loss 0.3482487 +| epoch 5 | 379/ 8400 batches | train loss 0.3884810 +| epoch 5 | 383/ 8400 batches | train loss 0.3041080 +| epoch 5 | 387/ 8400 batches | train loss 0.4028068 +| epoch 5 | 391/ 8400 batches | train loss 0.4338643 +| epoch 5 | 395/ 8400 batches | train loss 0.3918645 +| epoch 5 | 399/ 8400 batches | train loss 0.4146070 +| epoch 5 | 403/ 8400 batches | train loss 0.4498686 +| epoch 5 | 407/ 8400 batches | train loss 0.4004155 +| epoch 5 | 411/ 8400 batches | train loss 0.4145859 +| epoch 5 | 415/ 8400 batches | train loss 0.3818790 +| epoch 5 | 419/ 8400 batches | train loss 0.4241371 +| epoch 5 | 423/ 8400 batches | train loss 0.2989807 +| epoch 5 | 427/ 8400 batches | train loss 0.3608345 +| epoch 5 | 431/ 8400 batches | train loss 0.3863556 +| epoch 5 | 435/ 8400 batches | train loss 0.4898669 +| epoch 5 | 439/ 8400 batches | train loss 0.3664816 +| epoch 5 | 443/ 8400 batches | train loss 0.5200330 +| epoch 5 | 447/ 8400 batches | train loss 0.3682654 +| epoch 5 | 451/ 8400 batches | train loss 0.3967685 +| epoch 5 | 455/ 8400 batches | train loss 0.3683305 +| epoch 5 | 459/ 8400 batches | train loss 0.4092230 +| epoch 5 | 463/ 8400 batches | train loss 0.3746543 +| epoch 5 | 467/ 8400 batches | train loss 0.3840380 +| epoch 5 | 471/ 8400 batches | train loss 0.4029855 +| epoch 5 | 475/ 8400 batches | train loss 0.3908237 +| epoch 5 | 479/ 8400 batches | train loss 0.3705149 +| epoch 5 | 483/ 8400 batches | train loss 0.3075574 +| epoch 5 | 487/ 8400 batches | train loss 0.3405523 +| epoch 5 | 491/ 8400 batches | train loss 0.3749163 +| epoch 5 | 495/ 8400 batches | train loss 0.4679286 +| epoch 5 | 499/ 8400 batches | train loss 0.4373691 +| epoch 5 | 503/ 8400 batches | train loss 0.3787096 +| epoch 5 | 507/ 8400 batches | train loss 0.3746420 +| epoch 5 | 511/ 8400 batches | train loss 0.3881569 +| epoch 5 | 515/ 8400 batches | train loss 0.3857827 +| epoch 5 | 519/ 8400 batches | train loss 0.4228829 +| epoch 5 | 523/ 8400 batches | train loss 0.3761822 +| epoch 5 | 527/ 8400 batches | train loss 0.3265909 +| epoch 5 | 531/ 8400 batches | train loss 0.4423426 +| epoch 5 | 535/ 8400 batches | train loss 0.3810850 +| epoch 5 | 539/ 8400 batches | train loss 0.3135794 +| epoch 5 | 543/ 8400 batches | train loss 0.4126382 +| epoch 5 | 547/ 8400 batches | train loss 0.4061158 +| epoch 5 | 551/ 8400 batches | train loss 0.3009906 +| epoch 5 | 555/ 8400 batches | train loss 0.3886203 +| epoch 5 | 559/ 8400 batches | train loss 0.3241872 +| epoch 5 | 563/ 8400 batches | train loss 0.3199551 +| epoch 5 | 567/ 8400 batches | train loss 0.3875171 +| epoch 5 | 571/ 8400 batches | train loss 0.3904772 +| epoch 5 | 575/ 8400 batches | train loss 0.3635764 +| epoch 5 | 579/ 8400 batches | train loss 0.5240003 +| epoch 5 | 583/ 8400 batches | train loss 0.3467786 +| epoch 5 | 587/ 8400 batches | train loss 0.4027542 +| epoch 5 | 591/ 8400 batches | train loss 0.3805286 +| epoch 5 | 595/ 8400 batches | train loss 0.4666994 +| epoch 5 | 599/ 8400 batches | train loss 0.4270021 +| epoch 5 | 603/ 8400 batches | train loss 0.3305451 +| epoch 5 | 607/ 8400 batches | train loss 0.3534935 +| epoch 5 | 611/ 8400 batches | train loss 0.3418579 +| epoch 5 | 615/ 8400 batches | train loss 0.4663503 +| epoch 5 | 619/ 8400 batches | train loss 0.4378154 +| epoch 5 | 623/ 8400 batches | train loss 0.3893968 +| epoch 5 | 627/ 8400 batches | train loss 0.4007885 +| epoch 5 | 631/ 8400 batches | train loss 0.3926164 +| epoch 5 | 635/ 8400 batches | train loss 0.3997467 +| epoch 5 | 639/ 8400 batches | train loss 0.3382965 +| epoch 5 | 643/ 8400 batches | train loss 0.3955067 +| epoch 5 | 647/ 8400 batches | train loss 0.3964025 +| epoch 5 | 651/ 8400 batches | train loss 0.3601374 +| epoch 5 | 655/ 8400 batches | train loss 0.4151551 +| epoch 5 | 659/ 8400 batches | train loss 0.3643292 +| epoch 5 | 663/ 8400 batches | train loss 0.3869409 +| epoch 5 | 667/ 8400 batches | train loss 0.4316087 +| epoch 5 | 671/ 8400 batches | train loss 0.3750058 +| epoch 5 | 675/ 8400 batches | train loss 0.4370664 +| epoch 5 | 679/ 8400 batches | train loss 0.4370378 +| epoch 5 | 683/ 8400 batches | train loss 0.4068772 +| epoch 5 | 687/ 8400 batches | train loss 0.4483197 +| epoch 5 | 691/ 8400 batches | train loss 0.3801274 +| epoch 5 | 695/ 8400 batches | train loss 0.4347463 +| epoch 5 | 699/ 8400 batches | train loss 0.4029701 +| epoch 5 | 703/ 8400 batches | train loss 0.3979630 +| epoch 5 | 707/ 8400 batches | train loss 0.4050408 +| epoch 5 | 711/ 8400 batches | train loss 0.4003882 +| epoch 5 | 715/ 8400 batches | train loss 0.3808318 +| epoch 5 | 719/ 8400 batches | train loss 0.4196196 +| epoch 5 | 723/ 8400 batches | train loss 0.4000114 +| epoch 5 | 727/ 8400 batches | train loss 0.4191922 +| epoch 5 | 731/ 8400 batches | train loss 0.4214569 +| epoch 5 | 735/ 8400 batches | train loss 0.3734300 +| epoch 5 | 739/ 8400 batches | train loss 0.4280094 +| epoch 5 | 743/ 8400 batches | train loss 0.4126209 +| epoch 5 | 747/ 8400 batches | train loss 0.3926820 +| epoch 5 | 751/ 8400 batches | train loss 0.3743880 +| epoch 5 | 755/ 8400 batches | train loss 0.3620677 +| epoch 5 | 759/ 8400 batches | train loss 0.3309782 +| epoch 5 | 763/ 8400 batches | train loss 0.3604153 +| epoch 5 | 767/ 8400 batches | train loss 0.4155064 +| epoch 5 | 771/ 8400 batches | train loss 0.3591413 +| epoch 5 | 775/ 8400 batches | train loss 0.3947600 +| epoch 5 | 779/ 8400 batches | train loss 0.4442868 +| epoch 5 | 783/ 8400 batches | train loss 0.3564501 +| epoch 5 | 787/ 8400 batches | train loss 0.3399641 +| epoch 5 | 791/ 8400 batches | train loss 0.4132524 +| epoch 5 | 795/ 8400 batches | train loss 0.4483593 +| epoch 5 | 799/ 8400 batches | train loss 0.5183659 +| epoch 5 | 803/ 8400 batches | train loss 0.4600327 +| epoch 5 | 807/ 8400 batches | train loss 0.3991992 +| epoch 5 | 811/ 8400 batches | train loss 0.3249579 +| epoch 5 | 815/ 8400 batches | train loss 0.3934444 +| epoch 5 | 819/ 8400 batches | train loss 0.4565930 +| epoch 5 | 823/ 8400 batches | train loss 0.3322781 +| epoch 5 | 827/ 8400 batches | train loss 0.4243154 +| epoch 5 | 831/ 8400 batches | train loss 0.3855466 +| epoch 5 | 835/ 8400 batches | train loss 0.3506567 +| epoch 5 | 839/ 8400 batches | train loss 0.2701055 +| epoch 5 | 843/ 8400 batches | train loss 0.4778098 +| epoch 5 | 847/ 8400 batches | train loss 0.4445060 +| epoch 5 | 851/ 8400 batches | train loss 0.3998176 +| epoch 5 | 855/ 8400 batches | train loss 0.3568615 +| epoch 5 | 859/ 8400 batches | train loss 0.4005623 +| epoch 5 | 863/ 8400 batches | train loss 0.4176251 +| epoch 5 | 867/ 8400 batches | train loss 0.3581597 +| epoch 5 | 871/ 8400 batches | train loss 0.3506893 +| epoch 5 | 875/ 8400 batches | train loss 0.3664513 +| epoch 5 | 879/ 8400 batches | train loss 0.3389059 +| epoch 5 | 883/ 8400 batches | train loss 0.4202949 +| epoch 5 | 887/ 8400 batches | train loss 0.4017440 +| epoch 5 | 891/ 8400 batches | train loss 0.3613271 +| epoch 5 | 895/ 8400 batches | train loss 0.3885759 +| epoch 5 | 899/ 8400 batches | train loss 0.3632244 +| epoch 5 | 903/ 8400 batches | train loss 0.3816019 +| epoch 5 | 907/ 8400 batches | train loss 0.4459766 +| epoch 5 | 911/ 8400 batches | train loss 0.4444913 +| epoch 5 | 915/ 8400 batches | train loss 0.3542046 +| epoch 5 | 919/ 8400 batches | train loss 0.4224114 +| epoch 5 | 923/ 8400 batches | train loss 0.3890826 +| epoch 5 | 927/ 8400 batches | train loss 0.4517356 +| epoch 5 | 931/ 8400 batches | train loss 0.3750967 +| epoch 5 | 935/ 8400 batches | train loss 0.4466007 +| epoch 5 | 939/ 8400 batches | train loss 0.3739240 +| epoch 5 | 943/ 8400 batches | train loss 0.4104488 +| epoch 5 | 947/ 8400 batches | train loss 0.4074378 +| epoch 5 | 951/ 8400 batches | train loss 0.3960084 +| epoch 5 | 955/ 8400 batches | train loss 0.3694159 +| epoch 5 | 959/ 8400 batches | train loss 0.4298722 +| epoch 5 | 963/ 8400 batches | train loss 0.3293210 +| epoch 5 | 967/ 8400 batches | train loss 0.4067483 +| epoch 5 | 971/ 8400 batches | train loss 0.4690311 +| epoch 5 | 975/ 8400 batches | train loss 0.3750997 +| epoch 5 | 979/ 8400 batches | train loss 0.3615947 +| epoch 5 | 983/ 8400 batches | train loss 0.3504002 +| epoch 5 | 987/ 8400 batches | train loss 0.3518459 +| epoch 5 | 991/ 8400 batches | train loss 0.3324278 +| epoch 5 | 995/ 8400 batches | train loss 0.3736656 +| epoch 5 | 999/ 8400 batches | train loss 0.3854561 +| epoch 5 | 1003/ 8400 batches | train loss 0.4633177 +| epoch 5 | 1007/ 8400 batches | train loss 0.3899924 +| epoch 5 | 1011/ 8400 batches | train loss 0.3161696 +| epoch 5 | 1015/ 8400 batches | train loss 0.4008378 +| epoch 5 | 1019/ 8400 batches | train loss 0.3856725 +| epoch 5 | 1023/ 8400 batches | train loss 0.3584473 +| epoch 5 | 1027/ 8400 batches | train loss 0.3877640 +| epoch 5 | 1031/ 8400 batches | train loss 0.4471295 +| epoch 5 | 1035/ 8400 batches | train loss 0.3277302 +| epoch 5 | 1039/ 8400 batches | train loss 0.4416141 +| epoch 5 | 1043/ 8400 batches | train loss 0.4228371 +| epoch 5 | 1047/ 8400 batches | train loss 0.3794815 +| epoch 5 | 1051/ 8400 batches | train loss 0.4205182 +| epoch 5 | 1055/ 8400 batches | train loss 0.4205292 +| epoch 5 | 1059/ 8400 batches | train loss 0.4401872 +| epoch 5 | 1063/ 8400 batches | train loss 0.4141182 +| epoch 5 | 1067/ 8400 batches | train loss 0.4132090 +| epoch 5 | 1071/ 8400 batches | train loss 0.3750306 +| epoch 5 | 1075/ 8400 batches | train loss 0.3650802 +| epoch 5 | 1079/ 8400 batches | train loss 0.4995553 +| epoch 5 | 1083/ 8400 batches | train loss 0.3876051 +| epoch 5 | 1087/ 8400 batches | train loss 0.4103153 +| epoch 5 | 1091/ 8400 batches | train loss 0.3645220 +| epoch 5 | 1095/ 8400 batches | train loss 0.3876637 +| epoch 5 | 1099/ 8400 batches | train loss 0.3905780 +| epoch 5 | 1103/ 8400 batches | train loss 0.3631214 +| epoch 5 | 1107/ 8400 batches | train loss 0.4034971 +| epoch 5 | 1111/ 8400 batches | train loss 0.3963028 +| epoch 5 | 1115/ 8400 batches | train loss 0.3500114 +| epoch 5 | 1119/ 8400 batches | train loss 0.3423014 +| epoch 5 | 1123/ 8400 batches | train loss 0.3578257 +| epoch 5 | 1127/ 8400 batches | train loss 0.4147716 +| epoch 5 | 1131/ 8400 batches | train loss 0.4434801 +| epoch 5 | 1135/ 8400 batches | train loss 0.3644706 +| epoch 5 | 1139/ 8400 batches | train loss 0.4488445 +| epoch 5 | 1143/ 8400 batches | train loss 0.3521000 +| epoch 5 | 1147/ 8400 batches | train loss 0.4180777 +| epoch 5 | 1151/ 8400 batches | train loss 0.4228965 +| epoch 5 | 1155/ 8400 batches | train loss 0.4197423 +| epoch 5 | 1159/ 8400 batches | train loss 0.3832734 +| epoch 5 | 1163/ 8400 batches | train loss 0.4111995 +| epoch 5 | 1167/ 8400 batches | train loss 0.3925555 +| epoch 5 | 1171/ 8400 batches | train loss 0.3887150 +| epoch 5 | 1175/ 8400 batches | train loss 0.3542530 +| epoch 5 | 1179/ 8400 batches | train loss 0.3782940 +| epoch 5 | 1183/ 8400 batches | train loss 0.4245349 +| epoch 5 | 1187/ 8400 batches | train loss 0.3840042 +| epoch 5 | 1191/ 8400 batches | train loss 0.4425310 +| epoch 5 | 1195/ 8400 batches | train loss 0.4489678 +| epoch 5 | 1199/ 8400 batches | train loss 0.3607498 +| epoch 5 | 1203/ 8400 batches | train loss 0.3330843 +| epoch 5 | 1207/ 8400 batches | train loss 0.4473359 +| epoch 5 | 1211/ 8400 batches | train loss 0.3863341 +| epoch 5 | 1215/ 8400 batches | train loss 0.2675614 +| epoch 5 | 1219/ 8400 batches | train loss 0.4160935 +| epoch 5 | 1223/ 8400 batches | train loss 0.3423886 +| epoch 5 | 1227/ 8400 batches | train loss 0.4333057 +| epoch 5 | 1231/ 8400 batches | train loss 0.4652202 +| epoch 5 | 1235/ 8400 batches | train loss 0.3927893 +| epoch 5 | 1239/ 8400 batches | train loss 0.4532236 +| epoch 5 | 1243/ 8400 batches | train loss 0.4214154 +| epoch 5 | 1247/ 8400 batches | train loss 0.4046034 +| epoch 5 | 1251/ 8400 batches | train loss 0.2981190 +| epoch 5 | 1255/ 8400 batches | train loss 0.4210938 +| epoch 5 | 1259/ 8400 batches | train loss 0.3356825 +| epoch 5 | 1263/ 8400 batches | train loss 0.3462487 +| epoch 5 | 1267/ 8400 batches | train loss 0.3305290 +| epoch 5 | 1271/ 8400 batches | train loss 0.3581139 +| epoch 5 | 1275/ 8400 batches | train loss 0.4646329 +| epoch 5 | 1279/ 8400 batches | train loss 0.3808156 +| epoch 5 | 1283/ 8400 batches | train loss 0.3615060 +| epoch 5 | 1287/ 8400 batches | train loss 0.3838449 +| epoch 5 | 1291/ 8400 batches | train loss 0.4071477 +| epoch 5 | 1295/ 8400 batches | train loss 0.3584894 +| epoch 5 | 1299/ 8400 batches | train loss 0.4028409 +| epoch 5 | 1303/ 8400 batches | train loss 0.3864580 +| epoch 5 | 1307/ 8400 batches | train loss 0.3453426 +| epoch 5 | 1311/ 8400 batches | train loss 0.4519747 +| epoch 5 | 1315/ 8400 batches | train loss 0.4647913 +| epoch 5 | 1319/ 8400 batches | train loss 0.3774621 +| epoch 5 | 1323/ 8400 batches | train loss 0.3586819 +| epoch 5 | 1327/ 8400 batches | train loss 0.3973764 +| epoch 5 | 1331/ 8400 batches | train loss 0.3561255 +| epoch 5 | 1335/ 8400 batches | train loss 0.3723025 +| epoch 5 | 1339/ 8400 batches | train loss 0.3492723 +| epoch 5 | 1343/ 8400 batches | train loss 0.3611465 +| epoch 5 | 1347/ 8400 batches | train loss 0.4022298 +| epoch 5 | 1351/ 8400 batches | train loss 0.3376593 +| epoch 5 | 1355/ 8400 batches | train loss 0.4254030 +| epoch 5 | 1359/ 8400 batches | train loss 0.4283025 +| epoch 5 | 1363/ 8400 batches | train loss 0.3869561 +| epoch 5 | 1367/ 8400 batches | train loss 0.2479766 +| epoch 5 | 1371/ 8400 batches | train loss 0.3303355 +| epoch 5 | 1375/ 8400 batches | train loss 0.3638457 +| epoch 5 | 1379/ 8400 batches | train loss 0.3695670 +| epoch 5 | 1383/ 8400 batches | train loss 0.3984191 +| epoch 5 | 1387/ 8400 batches | train loss 0.3450657 +| epoch 5 | 1391/ 8400 batches | train loss 0.3954247 +| epoch 5 | 1395/ 8400 batches | train loss 0.3935208 +| epoch 5 | 1399/ 8400 batches | train loss 0.3378302 +| epoch 5 | 1403/ 8400 batches | train loss 0.4093194 +| epoch 5 | 1407/ 8400 batches | train loss 0.3904762 +| epoch 5 | 1411/ 8400 batches | train loss 0.4118822 +| epoch 5 | 1415/ 8400 batches | train loss 0.4140309 +| epoch 5 | 1419/ 8400 batches | train loss 0.4410062 +| epoch 5 | 1423/ 8400 batches | train loss 0.4048127 +| epoch 5 | 1427/ 8400 batches | train loss 0.3756962 +| epoch 5 | 1431/ 8400 batches | train loss 0.3340430 +| epoch 5 | 1435/ 8400 batches | train loss 0.3892742 +| epoch 5 | 1439/ 8400 batches | train loss 0.3496053 +| epoch 5 | 1443/ 8400 batches | train loss 0.3376425 +| epoch 5 | 1447/ 8400 batches | train loss 0.5161284 +| epoch 5 | 1451/ 8400 batches | train loss 0.4024815 +| epoch 5 | 1455/ 8400 batches | train loss 0.4043441 +| epoch 5 | 1459/ 8400 batches | train loss 0.3949705 +| epoch 5 | 1463/ 8400 batches | train loss 0.3889008 +| epoch 5 | 1467/ 8400 batches | train loss 0.3050528 +| epoch 5 | 1471/ 8400 batches | train loss 0.4108179 +| epoch 5 | 1475/ 8400 batches | train loss 0.3399509 +| epoch 5 | 1479/ 8400 batches | train loss 0.4222930 +| epoch 5 | 1483/ 8400 batches | train loss 0.3955173 +| epoch 5 | 1487/ 8400 batches | train loss 0.3780804 +| epoch 5 | 1491/ 8400 batches | train loss 0.3622967 +| epoch 5 | 1495/ 8400 batches | train loss 0.3666457 +| epoch 5 | 1499/ 8400 batches | train loss 0.3843266 +| epoch 5 | 1503/ 8400 batches | train loss 0.4164920 +| epoch 5 | 1507/ 8400 batches | train loss 0.3620692 +| epoch 5 | 1511/ 8400 batches | train loss 0.2925678 +| epoch 5 | 1515/ 8400 batches | train loss 0.3225903 +| epoch 5 | 1519/ 8400 batches | train loss 0.3506965 +| epoch 5 | 1523/ 8400 batches | train loss 0.4166065 +| epoch 5 | 1527/ 8400 batches | train loss 0.3674700 +| epoch 5 | 1531/ 8400 batches | train loss 0.3524333 +| epoch 5 | 1535/ 8400 batches | train loss 0.4151472 +| epoch 5 | 1539/ 8400 batches | train loss 0.3641806 +| epoch 5 | 1543/ 8400 batches | train loss 0.3055440 +| epoch 5 | 1547/ 8400 batches | train loss 0.4272591 +| epoch 5 | 1551/ 8400 batches | train loss 0.3645106 +| epoch 5 | 1555/ 8400 batches | train loss 0.3616000 +| epoch 5 | 1559/ 8400 batches | train loss 0.3942989 +| epoch 5 | 1563/ 8400 batches | train loss 0.3538427 +| epoch 5 | 1567/ 8400 batches | train loss 0.3353967 +| epoch 5 | 1571/ 8400 batches | train loss 0.3930282 +| epoch 5 | 1575/ 8400 batches | train loss 0.3570237 +| epoch 5 | 1579/ 8400 batches | train loss 0.4088241 +| epoch 5 | 1583/ 8400 batches | train loss 0.3721218 +| epoch 5 | 1587/ 8400 batches | train loss 0.3472776 +| epoch 5 | 1591/ 8400 batches | train loss 0.3202346 +| epoch 5 | 1595/ 8400 batches | train loss 0.3961484 +| epoch 5 | 1599/ 8400 batches | train loss 0.4167176 +| epoch 5 | 1603/ 8400 batches | train loss 0.4716296 +| epoch 5 | 1607/ 8400 batches | train loss 0.4236815 +| epoch 5 | 1611/ 8400 batches | train loss 0.3810970 +| epoch 5 | 1615/ 8400 batches | train loss 0.4161049 +| epoch 5 | 1619/ 8400 batches | train loss 0.4239753 +| epoch 5 | 1623/ 8400 batches | train loss 0.4044380 +| epoch 5 | 1627/ 8400 batches | train loss 0.3458393 +| epoch 5 | 1631/ 8400 batches | train loss 0.3256074 +| epoch 5 | 1635/ 8400 batches | train loss 0.3593658 +| epoch 5 | 1639/ 8400 batches | train loss 0.3722892 +| epoch 5 | 1643/ 8400 batches | train loss 0.3284819 +| epoch 5 | 1647/ 8400 batches | train loss 0.4090953 +| epoch 5 | 1651/ 8400 batches | train loss 0.3256129 +| epoch 5 | 1655/ 8400 batches | train loss 0.4374568 +| epoch 5 | 1659/ 8400 batches | train loss 0.3633070 +| epoch 5 | 1663/ 8400 batches | train loss 0.4394007 +| epoch 5 | 1667/ 8400 batches | train loss 0.3876933 +| epoch 5 | 1671/ 8400 batches | train loss 0.3418744 +| epoch 5 | 1675/ 8400 batches | train loss 0.4175242 +| epoch 5 | 1679/ 8400 batches | train loss 0.4665394 +| epoch 5 | 1683/ 8400 batches | train loss 0.3864297 +| epoch 5 | 1687/ 8400 batches | train loss 0.3207235 +| epoch 5 | 1691/ 8400 batches | train loss 0.2344164 +| epoch 5 | 1695/ 8400 batches | train loss 0.4367361 +| epoch 5 | 1699/ 8400 batches | train loss 0.3875667 +| epoch 5 | 1703/ 8400 batches | train loss 0.4151051 +| epoch 5 | 1707/ 8400 batches | train loss 0.4591398 +| epoch 5 | 1711/ 8400 batches | train loss 0.4500131 +| epoch 5 | 1715/ 8400 batches | train loss 0.4193673 +| epoch 5 | 1719/ 8400 batches | train loss 0.4065013 +| epoch 5 | 1723/ 8400 batches | train loss 0.3441650 +| epoch 5 | 1727/ 8400 batches | train loss 0.3846405 +| epoch 5 | 1731/ 8400 batches | train loss 0.4899611 +| epoch 5 | 1735/ 8400 batches | train loss 0.3244891 +| epoch 5 | 1739/ 8400 batches | train loss 0.4261735 +| epoch 5 | 1743/ 8400 batches | train loss 0.3706071 +| epoch 5 | 1747/ 8400 batches | train loss 0.3561898 +| epoch 5 | 1751/ 8400 batches | train loss 0.3851370 +| epoch 5 | 1755/ 8400 batches | train loss 0.3982828 +| epoch 5 | 1759/ 8400 batches | train loss 0.3715187 +| epoch 5 | 1763/ 8400 batches | train loss 0.2859330 +| epoch 5 | 1767/ 8400 batches | train loss 0.3671633 +| epoch 5 | 1771/ 8400 batches | train loss 0.3380041 +| epoch 5 | 1775/ 8400 batches | train loss 0.4139609 +| epoch 5 | 1779/ 8400 batches | train loss 0.3547909 +| epoch 5 | 1783/ 8400 batches | train loss 0.4573303 +| epoch 5 | 1787/ 8400 batches | train loss 0.4174146 +| epoch 5 | 1791/ 8400 batches | train loss 0.3667945 +| epoch 5 | 1795/ 8400 batches | train loss 0.4418485 +| epoch 5 | 1799/ 8400 batches | train loss 0.3093371 +| epoch 5 | 1803/ 8400 batches | train loss 0.3889676 +| epoch 5 | 1807/ 8400 batches | train loss 0.3890060 +| epoch 5 | 1811/ 8400 batches | train loss 0.3071201 +| epoch 5 | 1815/ 8400 batches | train loss 0.3202910 +| epoch 5 | 1819/ 8400 batches | train loss 0.3397531 +| epoch 5 | 1823/ 8400 batches | train loss 0.4222135 +| epoch 5 | 1827/ 8400 batches | train loss 0.3966773 +| epoch 5 | 1831/ 8400 batches | train loss 0.3637322 +| epoch 5 | 1835/ 8400 batches | train loss 0.3582607 +| epoch 5 | 1839/ 8400 batches | train loss 0.4132665 +| epoch 5 | 1843/ 8400 batches | train loss 0.4768734 +| epoch 5 | 1847/ 8400 batches | train loss 0.4300182 +| epoch 5 | 1851/ 8400 batches | train loss 0.3422929 +| epoch 5 | 1855/ 8400 batches | train loss 0.3812135 +| epoch 5 | 1859/ 8400 batches | train loss 0.4224819 +| epoch 5 | 1863/ 8400 batches | train loss 0.3660844 +| epoch 5 | 1867/ 8400 batches | train loss 0.4795066 +| epoch 5 | 1871/ 8400 batches | train loss 0.3932586 +| epoch 5 | 1875/ 8400 batches | train loss 0.4045061 +| epoch 5 | 1879/ 8400 batches | train loss 0.3798733 +| epoch 5 | 1883/ 8400 batches | train loss 0.3877153 +| epoch 5 | 1887/ 8400 batches | train loss 0.3999401 +| epoch 5 | 1891/ 8400 batches | train loss 0.3787058 +| epoch 5 | 1895/ 8400 batches | train loss 0.3562081 +| epoch 5 | 1899/ 8400 batches | train loss 0.3756312 +| epoch 5 | 1903/ 8400 batches | train loss 0.4747334 +| epoch 5 | 1907/ 8400 batches | train loss 0.4132539 +| epoch 5 | 1911/ 8400 batches | train loss 0.4301991 +| epoch 5 | 1915/ 8400 batches | train loss 0.3279330 +| epoch 5 | 1919/ 8400 batches | train loss 0.3656496 +| epoch 5 | 1923/ 8400 batches | train loss 0.3948940 +| epoch 5 | 1927/ 8400 batches | train loss 0.3890021 +| epoch 5 | 1931/ 8400 batches | train loss 0.3966498 +| epoch 5 | 1935/ 8400 batches | train loss 0.4071890 +| epoch 5 | 1939/ 8400 batches | train loss 0.4107912 +| epoch 5 | 1943/ 8400 batches | train loss 0.3716920 +| epoch 5 | 1947/ 8400 batches | train loss 0.4748241 +| epoch 5 | 1951/ 8400 batches | train loss 0.4160565 +| epoch 5 | 1955/ 8400 batches | train loss 0.3639017 +| epoch 5 | 1959/ 8400 batches | train loss 0.3527042 +| epoch 5 | 1963/ 8400 batches | train loss 0.4427539 +| epoch 5 | 1967/ 8400 batches | train loss 0.3797221 +| epoch 5 | 1971/ 8400 batches | train loss 0.3373482 +| epoch 5 | 1975/ 8400 batches | train loss 0.3366427 +| epoch 5 | 1979/ 8400 batches | train loss 0.3357420 +| epoch 5 | 1983/ 8400 batches | train loss 0.4145959 +| epoch 5 | 1987/ 8400 batches | train loss 0.4186923 +| epoch 5 | 1991/ 8400 batches | train loss 0.3998639 +| epoch 5 | 1995/ 8400 batches | train loss 0.3950889 +| epoch 5 | 1999/ 8400 batches | train loss 0.3191633 +| epoch 5 | 2003/ 8400 batches | train loss 0.3758889 +| epoch 5 | 2007/ 8400 batches | train loss 0.3586223 +| epoch 5 | 2011/ 8400 batches | train loss 0.4004066 +| epoch 5 | 2015/ 8400 batches | train loss 0.2517193 +| epoch 5 | 2019/ 8400 batches | train loss 0.4028316 +| epoch 5 | 2023/ 8400 batches | train loss 0.5012212 +| epoch 5 | 2027/ 8400 batches | train loss 0.4302144 +| epoch 5 | 2031/ 8400 batches | train loss 0.3934223 +| epoch 5 | 2035/ 8400 batches | train loss 0.4130310 +| epoch 5 | 2039/ 8400 batches | train loss 0.3767675 +| epoch 5 | 2043/ 8400 batches | train loss 0.3249028 +| epoch 5 | 2047/ 8400 batches | train loss 0.3682214 +| epoch 5 | 2051/ 8400 batches | train loss 0.3557531 +| epoch 5 | 2055/ 8400 batches | train loss 0.4079242 +| epoch 5 | 2059/ 8400 batches | train loss 0.4097866 +| epoch 5 | 2063/ 8400 batches | train loss 0.4488272 +| epoch 5 | 2067/ 8400 batches | train loss 0.3373048 +| epoch 5 | 2071/ 8400 batches | train loss 0.3686487 +| epoch 5 | 2075/ 8400 batches | train loss 0.3549913 +| epoch 5 | 2079/ 8400 batches | train loss 0.3907593 +| epoch 5 | 2083/ 8400 batches | train loss 0.3972699 +| epoch 5 | 2087/ 8400 batches | train loss 0.4122858 +| epoch 5 | 2091/ 8400 batches | train loss 0.4699830 +| epoch 5 | 2095/ 8400 batches | train loss 0.3591479 +| epoch 5 | 2099/ 8400 batches | train loss 0.3504688 +| epoch 5 | 2103/ 8400 batches | train loss 0.3048038 +| epoch 5 | 2107/ 8400 batches | train loss 0.4619732 +| epoch 5 | 2111/ 8400 batches | train loss 0.4367321 +| epoch 5 | 2115/ 8400 batches | train loss 0.4100394 +| epoch 5 | 2119/ 8400 batches | train loss 0.3195751 +| epoch 5 | 2123/ 8400 batches | train loss 0.3529002 +| epoch 5 | 2127/ 8400 batches | train loss 0.4481981 +| epoch 5 | 2131/ 8400 batches | train loss 0.3378599 +| epoch 5 | 2135/ 8400 batches | train loss 0.4097647 +| epoch 5 | 2139/ 8400 batches | train loss 0.4169728 +| epoch 5 | 2143/ 8400 batches | train loss 0.4345222 +| epoch 5 | 2147/ 8400 batches | train loss 0.4160590 +| epoch 5 | 2151/ 8400 batches | train loss 0.3763070 +| epoch 5 | 2155/ 8400 batches | train loss 0.3691801 +| epoch 5 | 2159/ 8400 batches | train loss 0.4348053 +| epoch 5 | 2163/ 8400 batches | train loss 0.4188434 +| epoch 5 | 2167/ 8400 batches | train loss 0.4389147 +| epoch 5 | 2171/ 8400 batches | train loss 0.3530429 +| epoch 5 | 2175/ 8400 batches | train loss 0.3932327 +| epoch 5 | 2179/ 8400 batches | train loss 0.4350135 +| epoch 5 | 2183/ 8400 batches | train loss 0.4515821 +| epoch 5 | 2187/ 8400 batches | train loss 0.3821276 +| epoch 5 | 2191/ 8400 batches | train loss 0.3859021 +| epoch 5 | 2195/ 8400 batches | train loss 0.3885292 +| epoch 5 | 2199/ 8400 batches | train loss 0.3973892 +| epoch 5 | 2203/ 8400 batches | train loss 0.3469423 +| epoch 5 | 2207/ 8400 batches | train loss 0.4311338 +| epoch 5 | 2211/ 8400 batches | train loss 0.3727147 +| epoch 5 | 2215/ 8400 batches | train loss 0.4259311 +| epoch 5 | 2219/ 8400 batches | train loss 0.4047440 +| epoch 5 | 2223/ 8400 batches | train loss 0.3883448 +| epoch 5 | 2227/ 8400 batches | train loss 0.3979185 +| epoch 5 | 2231/ 8400 batches | train loss 0.4428241 +| epoch 5 | 2235/ 8400 batches | train loss 0.3523805 +| epoch 5 | 2239/ 8400 batches | train loss 0.3338932 +| epoch 5 | 2243/ 8400 batches | train loss 0.4534009 +| epoch 5 | 2247/ 8400 batches | train loss 0.4963708 +| epoch 5 | 2251/ 8400 batches | train loss 0.4073956 +| epoch 5 | 2255/ 8400 batches | train loss 0.3868310 +| epoch 5 | 2259/ 8400 batches | train loss 0.4796423 +| epoch 5 | 2263/ 8400 batches | train loss 0.3681667 +| epoch 5 | 2267/ 8400 batches | train loss 0.3254132 +| epoch 5 | 2271/ 8400 batches | train loss 0.4243199 +| epoch 5 | 2275/ 8400 batches | train loss 0.3634806 +| epoch 5 | 2279/ 8400 batches | train loss 0.3765886 +| epoch 5 | 2283/ 8400 batches | train loss 0.3637362 +| epoch 5 | 2287/ 8400 batches | train loss 0.3805175 +| epoch 5 | 2291/ 8400 batches | train loss 0.3508465 +| epoch 5 | 2295/ 8400 batches | train loss 0.4331444 +| epoch 5 | 2299/ 8400 batches | train loss 0.3455507 +| epoch 5 | 2303/ 8400 batches | train loss 0.4173506 +| epoch 5 | 2307/ 8400 batches | train loss 0.4055235 +| epoch 5 | 2311/ 8400 batches | train loss 0.4404913 +| epoch 5 | 2315/ 8400 batches | train loss 0.3530514 +| epoch 5 | 2319/ 8400 batches | train loss 0.3748693 +| epoch 5 | 2323/ 8400 batches | train loss 0.4104155 +| epoch 5 | 2327/ 8400 batches | train loss 0.3058565 +| epoch 5 | 2331/ 8400 batches | train loss 0.3716330 +| epoch 5 | 2335/ 8400 batches | train loss 0.3487516 +| epoch 5 | 2339/ 8400 batches | train loss 0.4369044 +| epoch 5 | 2343/ 8400 batches | train loss 0.3468312 +| epoch 5 | 2347/ 8400 batches | train loss 0.4148903 +| epoch 5 | 2351/ 8400 batches | train loss 0.3342262 +| epoch 5 | 2355/ 8400 batches | train loss 0.3475716 +| epoch 5 | 2359/ 8400 batches | train loss 0.1835583 +| epoch 5 | 2363/ 8400 batches | train loss 0.3635336 +| epoch 5 | 2367/ 8400 batches | train loss 0.3892080 +| epoch 5 | 2371/ 8400 batches | train loss 0.3797551 +| epoch 5 | 2375/ 8400 batches | train loss 0.3381189 +| epoch 5 | 2379/ 8400 batches | train loss 0.4301181 +| epoch 5 | 2383/ 8400 batches | train loss 0.4491678 +| epoch 5 | 2387/ 8400 batches | train loss 0.3503390 +| epoch 5 | 2391/ 8400 batches | train loss 0.4983959 +| epoch 5 | 2395/ 8400 batches | train loss 0.4062314 +| epoch 5 | 2399/ 8400 batches | train loss 0.5394359 +| epoch 5 | 2403/ 8400 batches | train loss 0.4293590 +| epoch 5 | 2407/ 8400 batches | train loss 0.4212648 +| epoch 5 | 2411/ 8400 batches | train loss 0.3981526 +| epoch 5 | 2415/ 8400 batches | train loss 0.3579239 +| epoch 5 | 2419/ 8400 batches | train loss 0.3433982 +| epoch 5 | 2423/ 8400 batches | train loss 0.4140229 +| epoch 5 | 2427/ 8400 batches | train loss 0.3775758 +| epoch 5 | 2431/ 8400 batches | train loss 0.3891627 +| epoch 5 | 2435/ 8400 batches | train loss 0.4225065 +| epoch 5 | 2439/ 8400 batches | train loss 0.4067840 +| epoch 5 | 2443/ 8400 batches | train loss 0.4031358 +| epoch 5 | 2447/ 8400 batches | train loss 0.4294484 +| epoch 5 | 2451/ 8400 batches | train loss 0.3419242 +| epoch 5 | 2455/ 8400 batches | train loss 0.4293712 +| epoch 5 | 2459/ 8400 batches | train loss 0.4102441 +| epoch 5 | 2463/ 8400 batches | train loss 0.3918985 +| epoch 5 | 2467/ 8400 batches | train loss 0.4559234 +| epoch 5 | 2471/ 8400 batches | train loss 0.2971099 +| epoch 5 | 2475/ 8400 batches | train loss 0.4276124 +| epoch 5 | 2479/ 8400 batches | train loss 0.3640854 +| epoch 5 | 2483/ 8400 batches | train loss 0.3780640 +| epoch 5 | 2487/ 8400 batches | train loss 0.4165683 +| epoch 5 | 2491/ 8400 batches | train loss 0.3697545 +| epoch 5 | 2495/ 8400 batches | train loss 0.4795012 +| epoch 5 | 2499/ 8400 batches | train loss 0.4148131 +| epoch 5 | 2503/ 8400 batches | train loss 0.3591124 +| epoch 5 | 2507/ 8400 batches | train loss 0.4322481 +| epoch 5 | 2511/ 8400 batches | train loss 0.4268101 +| epoch 5 | 2515/ 8400 batches | train loss 0.4999959 +| epoch 5 | 2519/ 8400 batches | train loss 0.3729344 +| epoch 5 | 2523/ 8400 batches | train loss 0.4216561 +| epoch 5 | 2527/ 8400 batches | train loss 0.4424525 +| epoch 5 | 2531/ 8400 batches | train loss 0.4168615 +| epoch 5 | 2535/ 8400 batches | train loss 0.4711990 +| epoch 5 | 2539/ 8400 batches | train loss 0.4362105 +| epoch 5 | 2543/ 8400 batches | train loss 0.4450331 +| epoch 5 | 2547/ 8400 batches | train loss 0.3936947 +| epoch 5 | 2551/ 8400 batches | train loss 0.4832358 +| epoch 5 | 2555/ 8400 batches | train loss 0.4645273 +| epoch 5 | 2559/ 8400 batches | train loss 0.4117618 +| epoch 5 | 2563/ 8400 batches | train loss 0.4419429 +| epoch 5 | 2567/ 8400 batches | train loss 0.4326525 +| epoch 5 | 2571/ 8400 batches | train loss 0.4373650 +| epoch 5 | 2575/ 8400 batches | train loss 0.2653630 +| epoch 5 | 2579/ 8400 batches | train loss 0.4666315 +| epoch 5 | 2583/ 8400 batches | train loss 0.3885353 +| epoch 5 | 2587/ 8400 batches | train loss 0.2488784 +| epoch 5 | 2591/ 8400 batches | train loss 0.4538420 +| epoch 5 | 2595/ 8400 batches | train loss 0.4583503 +| epoch 5 | 2599/ 8400 batches | train loss 0.3923142 +| epoch 5 | 2603/ 8400 batches | train loss 0.3675449 +| epoch 5 | 2607/ 8400 batches | train loss 0.4357167 +| epoch 5 | 2611/ 8400 batches | train loss 0.3454680 +| epoch 5 | 2615/ 8400 batches | train loss 0.3860897 +| epoch 5 | 2619/ 8400 batches | train loss 0.3328010 +| epoch 5 | 2623/ 8400 batches | train loss 0.4043223 +| epoch 5 | 2627/ 8400 batches | train loss 0.3679398 +| epoch 5 | 2631/ 8400 batches | train loss 0.3600134 +| epoch 5 | 2635/ 8400 batches | train loss 0.4674914 +| epoch 5 | 2639/ 8400 batches | train loss 0.3880361 +| epoch 5 | 2643/ 8400 batches | train loss 0.3923196 +| epoch 5 | 2647/ 8400 batches | train loss 0.3518573 +| epoch 5 | 2651/ 8400 batches | train loss 0.3934168 +| epoch 5 | 2655/ 8400 batches | train loss 0.3734706 +| epoch 5 | 2659/ 8400 batches | train loss 0.3823174 +| epoch 5 | 2663/ 8400 batches | train loss 0.4112761 +| epoch 5 | 2667/ 8400 batches | train loss 0.4390637 +| epoch 5 | 2671/ 8400 batches | train loss 0.4586822 +| epoch 5 | 2675/ 8400 batches | train loss 0.3492468 +| epoch 5 | 2679/ 8400 batches | train loss 0.4520070 +| epoch 5 | 2683/ 8400 batches | train loss 0.4444096 +| epoch 5 | 2687/ 8400 batches | train loss 0.3562733 +| epoch 5 | 2691/ 8400 batches | train loss 0.4223900 +| epoch 5 | 2695/ 8400 batches | train loss 0.4247137 +| epoch 5 | 2699/ 8400 batches | train loss 0.4252923 +| epoch 5 | 2703/ 8400 batches | train loss 0.4212516 +| epoch 5 | 2707/ 8400 batches | train loss 0.3340186 +| epoch 5 | 2711/ 8400 batches | train loss 0.4320323 +| epoch 5 | 2715/ 8400 batches | train loss 0.4608257 +| epoch 5 | 2719/ 8400 batches | train loss 0.4014640 +| epoch 5 | 2723/ 8400 batches | train loss 0.3810285 +| epoch 5 | 2727/ 8400 batches | train loss 0.4413775 +| epoch 5 | 2731/ 8400 batches | train loss 0.4429019 +| epoch 5 | 2735/ 8400 batches | train loss 0.3826917 +| epoch 5 | 2739/ 8400 batches | train loss 0.4309384 +| epoch 5 | 2743/ 8400 batches | train loss 0.3183784 +| epoch 5 | 2747/ 8400 batches | train loss 0.4108868 +| epoch 5 | 2751/ 8400 batches | train loss 0.3732699 +| epoch 5 | 2755/ 8400 batches | train loss 0.4159272 +| epoch 5 | 2759/ 8400 batches | train loss 0.3679855 +| epoch 5 | 2763/ 8400 batches | train loss 0.5103858 +| epoch 5 | 2767/ 8400 batches | train loss 0.3881705 +| epoch 5 | 2771/ 8400 batches | train loss 0.4353954 +| epoch 5 | 2775/ 8400 batches | train loss 0.4022238 +| epoch 5 | 2779/ 8400 batches | train loss 0.3618343 +| epoch 5 | 2783/ 8400 batches | train loss 0.4492702 +| epoch 5 | 2787/ 8400 batches | train loss 0.4177955 +| epoch 5 | 2791/ 8400 batches | train loss 0.3852564 +| epoch 5 | 2795/ 8400 batches | train loss 0.4044948 +| epoch 5 | 2799/ 8400 batches | train loss 0.4219506 +| epoch 5 | 2803/ 8400 batches | train loss 0.3552243 +| epoch 5 | 2807/ 8400 batches | train loss 0.4706051 +| epoch 5 | 2811/ 8400 batches | train loss 0.3415822 +| epoch 5 | 2815/ 8400 batches | train loss 0.4726949 +| epoch 5 | 2819/ 8400 batches | train loss 0.3652464 +| epoch 5 | 2823/ 8400 batches | train loss 0.4325184 +| epoch 5 | 2827/ 8400 batches | train loss 0.3238860 +| epoch 5 | 2831/ 8400 batches | train loss 0.4440794 +| epoch 5 | 2835/ 8400 batches | train loss 0.3818988 +| epoch 5 | 2839/ 8400 batches | train loss 0.4530755 +| epoch 5 | 2843/ 8400 batches | train loss 0.3620143 +| epoch 5 | 2847/ 8400 batches | train loss 0.3482058 +| epoch 5 | 2851/ 8400 batches | train loss 0.4159600 +| epoch 5 | 2855/ 8400 batches | train loss 0.4146320 +| epoch 5 | 2859/ 8400 batches | train loss 0.4795705 +| epoch 5 | 2863/ 8400 batches | train loss 0.3425666 +| epoch 5 | 2867/ 8400 batches | train loss 0.3614851 +| epoch 5 | 2871/ 8400 batches | train loss 0.3958892 +| epoch 5 | 2875/ 8400 batches | train loss 0.3717359 +| epoch 5 | 2879/ 8400 batches | train loss 0.3640862 +| epoch 5 | 2883/ 8400 batches | train loss 0.4175740 +| epoch 5 | 2887/ 8400 batches | train loss 0.4108266 +| epoch 5 | 2891/ 8400 batches | train loss 0.3748949 +| epoch 5 | 2895/ 8400 batches | train loss 0.3156654 +| epoch 5 | 2899/ 8400 batches | train loss 0.4231804 +| epoch 5 | 2903/ 8400 batches | train loss 0.3644624 +| epoch 5 | 2907/ 8400 batches | train loss 0.4344200 +| epoch 5 | 2911/ 8400 batches | train loss 0.3785331 +| epoch 5 | 2915/ 8400 batches | train loss 0.3109848 +| epoch 5 | 2919/ 8400 batches | train loss 0.3641855 +| epoch 5 | 2923/ 8400 batches | train loss 0.4281929 +| epoch 5 | 2927/ 8400 batches | train loss 0.3918565 +| epoch 5 | 2931/ 8400 batches | train loss 0.3309295 +| epoch 5 | 2935/ 8400 batches | train loss 0.4036964 +| epoch 5 | 2939/ 8400 batches | train loss 0.4567214 +| epoch 5 | 2943/ 8400 batches | train loss 0.4685804 +| epoch 5 | 2947/ 8400 batches | train loss 0.4691932 +| epoch 5 | 2951/ 8400 batches | train loss 0.3131218 +| epoch 5 | 2955/ 8400 batches | train loss 0.4403867 +| epoch 5 | 2959/ 8400 batches | train loss 0.4660106 +| epoch 5 | 2963/ 8400 batches | train loss 0.3912106 +| epoch 5 | 2967/ 8400 batches | train loss 0.3938460 +| epoch 5 | 2971/ 8400 batches | train loss 0.3866377 +| epoch 5 | 2975/ 8400 batches | train loss 0.4084108 +| epoch 5 | 2979/ 8400 batches | train loss 0.4428016 +| epoch 5 | 2983/ 8400 batches | train loss 0.3785982 +| epoch 5 | 2987/ 8400 batches | train loss 0.4088534 +| epoch 5 | 2991/ 8400 batches | train loss 0.3825327 +| epoch 5 | 2995/ 8400 batches | train loss 0.3875294 +| epoch 5 | 2999/ 8400 batches | train loss 0.3232366 +| epoch 5 | 3003/ 8400 batches | train loss 0.4194075 +| epoch 5 | 3007/ 8400 batches | train loss 0.4201224 +| epoch 5 | 3011/ 8400 batches | train loss 0.4545338 +| epoch 5 | 3015/ 8400 batches | train loss 0.3102182 +| epoch 5 | 3019/ 8400 batches | train loss 0.4461920 +| epoch 5 | 3023/ 8400 batches | train loss 0.3245255 +| epoch 5 | 3027/ 8400 batches | train loss 0.4345875 +| epoch 5 | 3031/ 8400 batches | train loss 0.3429832 +| epoch 5 | 3035/ 8400 batches | train loss 0.4368981 +| epoch 5 | 3039/ 8400 batches | train loss 0.3877959 +| epoch 5 | 3043/ 8400 batches | train loss 0.4310406 +| epoch 5 | 3047/ 8400 batches | train loss 0.4365984 +| epoch 5 | 3051/ 8400 batches | train loss 0.4130258 +| epoch 5 | 3055/ 8400 batches | train loss 0.4225609 +| epoch 5 | 3059/ 8400 batches | train loss 0.4306935 +| epoch 5 | 3063/ 8400 batches | train loss 0.4238628 +| epoch 5 | 3067/ 8400 batches | train loss 0.3766165 +| epoch 5 | 3071/ 8400 batches | train loss 0.4070607 +| epoch 5 | 3075/ 8400 batches | train loss 0.3424940 +| epoch 5 | 3079/ 8400 batches | train loss 0.3298678 +| epoch 5 | 3083/ 8400 batches | train loss 0.4446509 +| epoch 5 | 3087/ 8400 batches | train loss 0.3691871 +| epoch 5 | 3091/ 8400 batches | train loss 0.3675611 +| epoch 5 | 3095/ 8400 batches | train loss 0.3719403 +| epoch 5 | 3099/ 8400 batches | train loss 0.4308904 +| epoch 5 | 3103/ 8400 batches | train loss 0.4108950 +| epoch 5 | 3107/ 8400 batches | train loss 0.3750111 +| epoch 5 | 3111/ 8400 batches | train loss 0.4835641 +| epoch 5 | 3115/ 8400 batches | train loss 0.4238808 +| epoch 5 | 3119/ 8400 batches | train loss 0.3605520 +| epoch 5 | 3123/ 8400 batches | train loss 0.4095392 +| epoch 5 | 3127/ 8400 batches | train loss 0.4498971 +| epoch 5 | 3131/ 8400 batches | train loss 0.4308893 +| epoch 5 | 3135/ 8400 batches | train loss 0.4072524 +| epoch 5 | 3139/ 8400 batches | train loss 0.4604114 +| epoch 5 | 3143/ 8400 batches | train loss 0.3805286 +| epoch 5 | 3147/ 8400 batches | train loss 0.3565742 +| epoch 5 | 3151/ 8400 batches | train loss 0.4684913 +| epoch 5 | 3155/ 8400 batches | train loss 0.3428867 +| epoch 5 | 3159/ 8400 batches | train loss 0.3966374 +| epoch 5 | 3163/ 8400 batches | train loss 0.4168983 +| epoch 5 | 3167/ 8400 batches | train loss 0.3984353 +| epoch 5 | 3171/ 8400 batches | train loss 0.3896741 +| epoch 5 | 3175/ 8400 batches | train loss 0.3671257 +| epoch 5 | 3179/ 8400 batches | train loss 0.4068777 +| epoch 5 | 3183/ 8400 batches | train loss 0.4025669 +| epoch 5 | 3187/ 8400 batches | train loss 0.3342912 +| epoch 5 | 3191/ 8400 batches | train loss 0.2803407 +| epoch 5 | 3195/ 8400 batches | train loss 0.3588349 +| epoch 5 | 3199/ 8400 batches | train loss 0.4237573 +| epoch 5 | 3203/ 8400 batches | train loss 0.3954520 +| epoch 5 | 3207/ 8400 batches | train loss 0.3515661 +| epoch 5 | 3211/ 8400 batches | train loss 0.3593029 +| epoch 5 | 3215/ 8400 batches | train loss 0.3872858 +| epoch 5 | 3219/ 8400 batches | train loss 0.3778260 +| epoch 5 | 3223/ 8400 batches | train loss 0.3361310 +| epoch 5 | 3227/ 8400 batches | train loss 0.4994539 +| epoch 5 | 3231/ 8400 batches | train loss 0.4172706 +| epoch 5 | 3235/ 8400 batches | train loss 0.3427102 +| epoch 5 | 3239/ 8400 batches | train loss 0.4222125 +| epoch 5 | 3243/ 8400 batches | train loss 0.4399300 +| epoch 5 | 3247/ 8400 batches | train loss 0.4418959 +| epoch 5 | 3251/ 8400 batches | train loss 0.5322400 +| epoch 5 | 3255/ 8400 batches | train loss 0.4105040 +| epoch 5 | 3259/ 8400 batches | train loss 0.3881857 +| epoch 5 | 3263/ 8400 batches | train loss 0.4434231 +| epoch 5 | 3267/ 8400 batches | train loss 0.3904016 +| epoch 5 | 3271/ 8400 batches | train loss 0.3533995 +| epoch 5 | 3275/ 8400 batches | train loss 0.4161882 +| epoch 5 | 3279/ 8400 batches | train loss 0.3813938 +| epoch 5 | 3283/ 8400 batches | train loss 0.4252442 +| epoch 5 | 3287/ 8400 batches | train loss 0.3946111 +| epoch 5 | 3291/ 8400 batches | train loss 0.3770378 +| epoch 5 | 3295/ 8400 batches | train loss 0.3544022 +| epoch 5 | 3299/ 8400 batches | train loss 0.3258187 +| epoch 5 | 3303/ 8400 batches | train loss 0.3330789 +| epoch 5 | 3307/ 8400 batches | train loss 0.4146962 +| epoch 5 | 3311/ 8400 batches | train loss 0.4262791 +| epoch 5 | 3315/ 8400 batches | train loss 0.3780219 +| epoch 5 | 3319/ 8400 batches | train loss 0.5016191 +| epoch 5 | 3323/ 8400 batches | train loss 0.3851494 +| epoch 5 | 3327/ 8400 batches | train loss 0.4552545 +| epoch 5 | 3331/ 8400 batches | train loss 0.3311011 +| epoch 5 | 3335/ 8400 batches | train loss 0.3718118 +| epoch 5 | 3339/ 8400 batches | train loss 0.4894498 +| epoch 5 | 3343/ 8400 batches | train loss 0.3954386 +| epoch 5 | 3347/ 8400 batches | train loss 0.3319807 +| epoch 5 | 3351/ 8400 batches | train loss 0.4284545 +| epoch 5 | 3355/ 8400 batches | train loss 0.3751384 +| epoch 5 | 3359/ 8400 batches | train loss 0.4835992 +| epoch 5 | 3363/ 8400 batches | train loss 0.2866031 +| epoch 5 | 3367/ 8400 batches | train loss 0.4296603 +| epoch 5 | 3371/ 8400 batches | train loss 0.4439795 +| epoch 5 | 3375/ 8400 batches | train loss 0.3443923 +| epoch 5 | 3379/ 8400 batches | train loss 0.4340976 +| epoch 5 | 3383/ 8400 batches | train loss 0.3515902 +| epoch 5 | 3387/ 8400 batches | train loss 0.4449911 +| epoch 5 | 3391/ 8400 batches | train loss 0.3925219 +| epoch 5 | 3395/ 8400 batches | train loss 0.3697899 +| epoch 5 | 3399/ 8400 batches | train loss 0.4147184 +| epoch 5 | 3403/ 8400 batches | train loss 0.4029403 +| epoch 5 | 3407/ 8400 batches | train loss 0.3607473 +| epoch 5 | 3411/ 8400 batches | train loss 0.3700811 +| epoch 5 | 3415/ 8400 batches | train loss 0.4004764 +| epoch 5 | 3419/ 8400 batches | train loss 0.4170244 +| epoch 5 | 3423/ 8400 batches | train loss 0.3851484 +| epoch 5 | 3427/ 8400 batches | train loss 0.3432217 +| epoch 5 | 3431/ 8400 batches | train loss 0.3419093 +| epoch 5 | 3435/ 8400 batches | train loss 0.4590288 +| epoch 5 | 3439/ 8400 batches | train loss 0.4050597 +| epoch 5 | 3443/ 8400 batches | train loss 0.4358388 +| epoch 5 | 3447/ 8400 batches | train loss 0.3760389 +| epoch 5 | 3451/ 8400 batches | train loss 0.4412147 +| epoch 5 | 3455/ 8400 batches | train loss 0.3997004 +| epoch 5 | 3459/ 8400 batches | train loss 0.4159743 +| epoch 5 | 3463/ 8400 batches | train loss 0.3199709 +| epoch 5 | 3467/ 8400 batches | train loss 0.4740973 +| epoch 5 | 3471/ 8400 batches | train loss 0.3476818 +| epoch 5 | 3475/ 8400 batches | train loss 0.4882469 +| epoch 5 | 3479/ 8400 batches | train loss 0.2790818 +| epoch 5 | 3483/ 8400 batches | train loss 0.3567834 +| epoch 5 | 3487/ 8400 batches | train loss 0.3480735 +| epoch 5 | 3491/ 8400 batches | train loss 0.4117199 +| epoch 5 | 3495/ 8400 batches | train loss 0.3155468 +| epoch 5 | 3499/ 8400 batches | train loss 0.4805006 +| epoch 5 | 3503/ 8400 batches | train loss 0.3919991 +| epoch 5 | 3507/ 8400 batches | train loss 0.3894911 +| epoch 5 | 3511/ 8400 batches | train loss 0.3691714 +| epoch 5 | 3515/ 8400 batches | train loss 0.3203040 +| epoch 5 | 3519/ 8400 batches | train loss 0.4099715 +| epoch 5 | 3523/ 8400 batches | train loss 0.3513526 +| epoch 5 | 3527/ 8400 batches | train loss 0.3906717 +| epoch 5 | 3531/ 8400 batches | train loss 0.4043028 +| epoch 5 | 3535/ 8400 batches | train loss 0.4069030 +| epoch 5 | 3539/ 8400 batches | train loss 0.4031450 +| epoch 5 | 3543/ 8400 batches | train loss 0.3852602 +| epoch 5 | 3547/ 8400 batches | train loss 0.3862959 +| epoch 5 | 3551/ 8400 batches | train loss 0.4374809 +| epoch 5 | 3555/ 8400 batches | train loss 0.3490930 +| epoch 5 | 3559/ 8400 batches | train loss 0.3446150 +| epoch 5 | 3563/ 8400 batches | train loss 0.3296111 +| epoch 5 | 3567/ 8400 batches | train loss 0.4591321 +| epoch 5 | 3571/ 8400 batches | train loss 0.3828594 +| epoch 5 | 3575/ 8400 batches | train loss 0.4078134 +| epoch 5 | 3579/ 8400 batches | train loss 0.3619335 +| epoch 5 | 3583/ 8400 batches | train loss 0.3740829 +| epoch 5 | 3587/ 8400 batches | train loss 0.4071976 +| epoch 5 | 3591/ 8400 batches | train loss 0.4133521 +| epoch 5 | 3595/ 8400 batches | train loss 0.4154820 +| epoch 5 | 3599/ 8400 batches | train loss 0.3877241 +| epoch 5 | 3603/ 8400 batches | train loss 0.3788369 +| epoch 5 | 3607/ 8400 batches | train loss 0.4020815 +| epoch 5 | 3611/ 8400 batches | train loss 0.4615420 +| epoch 5 | 3615/ 8400 batches | train loss 0.3344846 +| epoch 5 | 3619/ 8400 batches | train loss 0.3680142 +| epoch 5 | 3623/ 8400 batches | train loss 0.4969345 +| epoch 5 | 3627/ 8400 batches | train loss 0.3773671 +| epoch 5 | 3631/ 8400 batches | train loss 0.2841739 +| epoch 5 | 3635/ 8400 batches | train loss 0.3571914 +| epoch 5 | 3639/ 8400 batches | train loss 0.3753896 +| epoch 5 | 3643/ 8400 batches | train loss 0.3609840 +| epoch 5 | 3647/ 8400 batches | train loss 0.3160879 +| epoch 5 | 3651/ 8400 batches | train loss 0.4079719 +| epoch 5 | 3655/ 8400 batches | train loss 0.3670795 +| epoch 5 | 3659/ 8400 batches | train loss 0.4092899 +| epoch 5 | 3663/ 8400 batches | train loss 0.4035701 +| epoch 5 | 3667/ 8400 batches | train loss 0.4616309 +| epoch 5 | 3671/ 8400 batches | train loss 0.3656596 +| epoch 5 | 3675/ 8400 batches | train loss 0.4396645 +| epoch 5 | 3679/ 8400 batches | train loss 0.3237295 +| epoch 5 | 3683/ 8400 batches | train loss 0.4337014 +| epoch 5 | 3687/ 8400 batches | train loss 0.3843254 +| epoch 5 | 3691/ 8400 batches | train loss 0.3896485 +| epoch 5 | 3695/ 8400 batches | train loss 0.4099892 +| epoch 5 | 3699/ 8400 batches | train loss 0.4336446 +| epoch 5 | 3703/ 8400 batches | train loss 0.3848701 +| epoch 5 | 3707/ 8400 batches | train loss 0.3910202 +| epoch 5 | 3711/ 8400 batches | train loss 0.4913651 +| epoch 5 | 3715/ 8400 batches | train loss 0.3814469 +| epoch 5 | 3719/ 8400 batches | train loss 0.4932074 +| epoch 5 | 3723/ 8400 batches | train loss 0.4493360 +| epoch 5 | 3727/ 8400 batches | train loss 0.4585716 +| epoch 5 | 3731/ 8400 batches | train loss 0.2665628 +| epoch 5 | 3735/ 8400 batches | train loss 0.3730030 +| epoch 5 | 3739/ 8400 batches | train loss 0.3866798 +| epoch 5 | 3743/ 8400 batches | train loss 0.4415115 +| epoch 5 | 3747/ 8400 batches | train loss 0.4497731 +| epoch 5 | 3751/ 8400 batches | train loss 0.3788248 +| epoch 5 | 3755/ 8400 batches | train loss 0.4556299 +| epoch 5 | 3759/ 8400 batches | train loss 0.3842484 +| epoch 5 | 3763/ 8400 batches | train loss 0.3463374 +| epoch 5 | 3767/ 8400 batches | train loss 0.3199647 +| epoch 5 | 3771/ 8400 batches | train loss 0.4302589 +| epoch 5 | 3775/ 8400 batches | train loss 0.3598386 +| epoch 5 | 3779/ 8400 batches | train loss 0.4436909 +| epoch 5 | 3783/ 8400 batches | train loss 0.3634061 +| epoch 5 | 3787/ 8400 batches | train loss 0.4388523 +| epoch 5 | 3791/ 8400 batches | train loss 0.3753434 +| epoch 5 | 3795/ 8400 batches | train loss 0.3195876 +| epoch 5 | 3799/ 8400 batches | train loss 0.3334085 +| epoch 5 | 3803/ 8400 batches | train loss 0.4451722 +| epoch 5 | 3807/ 8400 batches | train loss 0.4149562 +| epoch 5 | 3811/ 8400 batches | train loss 0.4314484 +| epoch 5 | 3815/ 8400 batches | train loss 0.3775812 +| epoch 5 | 3819/ 8400 batches | train loss 0.4154625 +| epoch 5 | 3823/ 8400 batches | train loss 0.4115523 +| epoch 5 | 3827/ 8400 batches | train loss 0.3659828 +| epoch 5 | 3831/ 8400 batches | train loss 0.3716207 +| epoch 5 | 3835/ 8400 batches | train loss 0.3967621 +| epoch 5 | 3839/ 8400 batches | train loss 0.3383518 +| epoch 5 | 3843/ 8400 batches | train loss 0.3603327 +| epoch 5 | 3847/ 8400 batches | train loss 0.4004653 +| epoch 5 | 3851/ 8400 batches | train loss 0.3851228 +| epoch 5 | 3855/ 8400 batches | train loss 0.3077958 +| epoch 5 | 3859/ 8400 batches | train loss 0.3852710 +| epoch 5 | 3863/ 8400 batches | train loss 0.4569426 +| epoch 5 | 3867/ 8400 batches | train loss 0.3555312 +| epoch 5 | 3871/ 8400 batches | train loss 0.3712206 +| epoch 5 | 3875/ 8400 batches | train loss 0.4817529 +| epoch 5 | 3879/ 8400 batches | train loss 0.4302453 +| epoch 5 | 3883/ 8400 batches | train loss 0.3489042 +| epoch 5 | 3887/ 8400 batches | train loss 0.4224508 +| epoch 5 | 3891/ 8400 batches | train loss 0.3575788 +| epoch 5 | 3895/ 8400 batches | train loss 0.3916127 +| epoch 5 | 3899/ 8400 batches | train loss 0.4569259 +| epoch 5 | 3903/ 8400 batches | train loss 0.3962821 +| epoch 5 | 3907/ 8400 batches | train loss 0.3859026 +| epoch 5 | 3911/ 8400 batches | train loss 0.3817806 +| epoch 5 | 3915/ 8400 batches | train loss 0.4477082 +| epoch 5 | 3919/ 8400 batches | train loss 0.3848569 +| epoch 5 | 3923/ 8400 batches | train loss 0.4414149 +| epoch 5 | 3927/ 8400 batches | train loss 0.4509866 +| epoch 5 | 3931/ 8400 batches | train loss 0.3806261 +| epoch 5 | 3935/ 8400 batches | train loss 0.4022514 +| epoch 5 | 3939/ 8400 batches | train loss 0.4075333 +| epoch 5 | 3943/ 8400 batches | train loss 0.4441207 +| epoch 5 | 3947/ 8400 batches | train loss 0.4263310 +| epoch 5 | 3951/ 8400 batches | train loss 0.3801261 +| epoch 5 | 3955/ 8400 batches | train loss 0.2691915 +| epoch 5 | 3959/ 8400 batches | train loss 0.3621106 +| epoch 5 | 3963/ 8400 batches | train loss 0.3656596 +| epoch 5 | 3967/ 8400 batches | train loss 0.4255423 +| epoch 5 | 3971/ 8400 batches | train loss 0.3895894 +| epoch 5 | 3975/ 8400 batches | train loss 0.3857924 +| epoch 5 | 3979/ 8400 batches | train loss 0.3580959 +| epoch 5 | 3983/ 8400 batches | train loss 0.3410578 +| epoch 5 | 3987/ 8400 batches | train loss 0.3472430 +| epoch 5 | 3991/ 8400 batches | train loss 0.4151885 +| epoch 5 | 3995/ 8400 batches | train loss 0.4040300 +| epoch 5 | 3999/ 8400 batches | train loss 0.3932586 +| epoch 5 | 4003/ 8400 batches | train loss 0.3686047 +| epoch 5 | 4007/ 8400 batches | train loss 0.4167915 +| epoch 5 | 4011/ 8400 batches | train loss 0.3594931 +| epoch 5 | 4015/ 8400 batches | train loss 0.3721711 +| epoch 5 | 4019/ 8400 batches | train loss 0.3272728 +| epoch 5 | 4023/ 8400 batches | train loss 0.3778343 +| epoch 5 | 4027/ 8400 batches | train loss 0.4488523 +| epoch 5 | 4031/ 8400 batches | train loss 0.4918891 +| epoch 5 | 4035/ 8400 batches | train loss 0.4207867 +| epoch 5 | 4039/ 8400 batches | train loss 0.2416367 +| epoch 5 | 4043/ 8400 batches | train loss 0.4063359 +| epoch 5 | 4047/ 8400 batches | train loss 0.4335475 +| epoch 5 | 4051/ 8400 batches | train loss 0.4788874 +| epoch 5 | 4055/ 8400 batches | train loss 0.4578485 +| epoch 5 | 4059/ 8400 batches | train loss 0.4017416 +| epoch 5 | 4063/ 8400 batches | train loss 0.3779302 +| epoch 5 | 4067/ 8400 batches | train loss 0.2976320 +| epoch 5 | 4071/ 8400 batches | train loss 0.4022047 +| epoch 5 | 4075/ 8400 batches | train loss 0.3189971 +| epoch 5 | 4079/ 8400 batches | train loss 0.4144904 +| epoch 5 | 4083/ 8400 batches | train loss 0.3860030 +| epoch 5 | 4087/ 8400 batches | train loss 0.4474686 +| epoch 5 | 4091/ 8400 batches | train loss 0.4300106 +| epoch 5 | 4095/ 8400 batches | train loss 0.4336890 +| epoch 5 | 4099/ 8400 batches | train loss 0.3822221 +| epoch 5 | 4103/ 8400 batches | train loss 0.3998159 +| epoch 5 | 4107/ 8400 batches | train loss 0.4238929 +| epoch 5 | 4111/ 8400 batches | train loss 0.3629113 +| epoch 5 | 4115/ 8400 batches | train loss 0.4775988 +| epoch 5 | 4119/ 8400 batches | train loss 0.3694888 +| epoch 5 | 4123/ 8400 batches | train loss 0.3977238 +| epoch 5 | 4127/ 8400 batches | train loss 0.3632028 +| epoch 5 | 4131/ 8400 batches | train loss 0.3708575 +| epoch 5 | 4135/ 8400 batches | train loss 0.4214197 +| epoch 5 | 4139/ 8400 batches | train loss 0.3528588 +| epoch 5 | 4143/ 8400 batches | train loss 0.4297641 +| epoch 5 | 4147/ 8400 batches | train loss 0.3843663 +| epoch 5 | 4151/ 8400 batches | train loss 0.4382183 +| epoch 5 | 4155/ 8400 batches | train loss 0.4136533 +| epoch 5 | 4159/ 8400 batches | train loss 0.4012201 +| epoch 5 | 4163/ 8400 batches | train loss 0.3825632 +| epoch 5 | 4167/ 8400 batches | train loss 0.4621703 +| epoch 5 | 4171/ 8400 batches | train loss 0.4139004 +| epoch 5 | 4175/ 8400 batches | train loss 0.4310254 +| epoch 5 | 4179/ 8400 batches | train loss 0.4120770 +| epoch 5 | 4183/ 8400 batches | train loss 0.4385460 +| epoch 5 | 4187/ 8400 batches | train loss 0.3640638 +| epoch 5 | 4191/ 8400 batches | train loss 0.4154157 +| epoch 5 | 4195/ 8400 batches | train loss 0.3742738 +| epoch 5 | 4199/ 8400 batches | train loss 0.3360876 +| epoch 5 | 4203/ 8400 batches | train loss 0.3974531 +| epoch 5 | 4207/ 8400 batches | train loss 0.3581788 +| epoch 5 | 4211/ 8400 batches | train loss 0.4529778 +| epoch 5 | 4215/ 8400 batches | train loss 0.4765921 +| epoch 5 | 4219/ 8400 batches | train loss 0.3537509 +| epoch 5 | 4223/ 8400 batches | train loss 0.4122245 +| epoch 5 | 4227/ 8400 batches | train loss 0.4467553 +| epoch 5 | 4231/ 8400 batches | train loss 0.3658379 +| epoch 5 | 4235/ 8400 batches | train loss 0.4208212 +| epoch 5 | 4239/ 8400 batches | train loss 0.3444868 +| epoch 5 | 4243/ 8400 batches | train loss 0.3908184 +| epoch 5 | 4247/ 8400 batches | train loss 0.3913232 +| epoch 5 | 4251/ 8400 batches | train loss 0.3194556 +| epoch 5 | 4255/ 8400 batches | train loss 0.3145241 +| epoch 5 | 4259/ 8400 batches | train loss 0.4838897 +| epoch 5 | 4263/ 8400 batches | train loss 0.4294803 +| epoch 5 | 4267/ 8400 batches | train loss 0.3925055 +| epoch 5 | 4271/ 8400 batches | train loss 0.3805102 +| epoch 5 | 4275/ 8400 batches | train loss 0.4916290 +| epoch 5 | 4279/ 8400 batches | train loss 0.3921512 +| epoch 5 | 4283/ 8400 batches | train loss 0.3926531 +| epoch 5 | 4287/ 8400 batches | train loss 0.3662274 +| epoch 5 | 4291/ 8400 batches | train loss 0.4192670 +| epoch 5 | 4295/ 8400 batches | train loss 0.4285039 +| epoch 5 | 4299/ 8400 batches | train loss 0.3971907 +| epoch 5 | 4303/ 8400 batches | train loss 0.3678279 +| epoch 5 | 4307/ 8400 batches | train loss 0.3830084 +| epoch 5 | 4311/ 8400 batches | train loss 0.3941906 +| epoch 5 | 4315/ 8400 batches | train loss 0.3917400 +| epoch 5 | 4319/ 8400 batches | train loss 0.4191118 +| epoch 5 | 4323/ 8400 batches | train loss 0.4298346 +| epoch 5 | 4327/ 8400 batches | train loss 0.3071609 +| epoch 5 | 4331/ 8400 batches | train loss 0.4231892 +| epoch 5 | 4335/ 8400 batches | train loss 0.3517686 +| epoch 5 | 4339/ 8400 batches | train loss 0.4153521 +| epoch 5 | 4343/ 8400 batches | train loss 0.4412471 +| epoch 5 | 4347/ 8400 batches | train loss 0.2722025 +| epoch 5 | 4351/ 8400 batches | train loss 0.4384214 +| epoch 5 | 4355/ 8400 batches | train loss 0.4219631 +| epoch 5 | 4359/ 8400 batches | train loss 0.3546070 +| epoch 5 | 4363/ 8400 batches | train loss 0.3550315 +| epoch 5 | 4367/ 8400 batches | train loss 0.4489347 +| epoch 5 | 4371/ 8400 batches | train loss 0.3919690 +| epoch 5 | 4375/ 8400 batches | train loss 0.3914501 +| epoch 5 | 4379/ 8400 batches | train loss 0.4364912 +| epoch 5 | 4383/ 8400 batches | train loss 0.4486452 +| epoch 5 | 4387/ 8400 batches | train loss 0.3852426 +| epoch 5 | 4391/ 8400 batches | train loss 0.4020938 +| epoch 5 | 4395/ 8400 batches | train loss 0.4064586 +| epoch 5 | 4399/ 8400 batches | train loss 0.4324792 +| epoch 5 | 4403/ 8400 batches | train loss 0.4834189 +| epoch 5 | 4407/ 8400 batches | train loss 0.4051145 +| epoch 5 | 4411/ 8400 batches | train loss 0.3647065 +| epoch 5 | 4415/ 8400 batches | train loss 0.3959833 +| epoch 5 | 4419/ 8400 batches | train loss 0.4117520 +| epoch 5 | 4423/ 8400 batches | train loss 0.3889737 +| epoch 5 | 4427/ 8400 batches | train loss 0.4816163 +| epoch 5 | 4431/ 8400 batches | train loss 0.3846406 +| epoch 5 | 4435/ 8400 batches | train loss 0.3993160 +| epoch 5 | 4439/ 8400 batches | train loss 0.3742126 +| epoch 5 | 4443/ 8400 batches | train loss 0.3927450 +| epoch 5 | 4447/ 8400 batches | train loss 0.3561656 +| epoch 5 | 4451/ 8400 batches | train loss 0.4184517 +| epoch 5 | 4455/ 8400 batches | train loss 0.3421210 +| epoch 5 | 4459/ 8400 batches | train loss 0.4559047 +| epoch 5 | 4463/ 8400 batches | train loss 0.3197196 +| epoch 5 | 4467/ 8400 batches | train loss 0.4223446 +| epoch 5 | 4471/ 8400 batches | train loss 0.4627278 +| epoch 5 | 4475/ 8400 batches | train loss 0.3768935 +| epoch 5 | 4479/ 8400 batches | train loss 0.3957473 +| epoch 5 | 4483/ 8400 batches | train loss 0.4002129 +| epoch 5 | 4487/ 8400 batches | train loss 0.4537164 +| epoch 5 | 4491/ 8400 batches | train loss 0.3819380 +| epoch 5 | 4495/ 8400 batches | train loss 0.4168408 +| epoch 5 | 4499/ 8400 batches | train loss 0.1658887 +| epoch 5 | 4503/ 8400 batches | train loss 0.3315314 +| epoch 5 | 4507/ 8400 batches | train loss 0.4125251 +| epoch 5 | 4511/ 8400 batches | train loss 0.3862006 +| epoch 5 | 4515/ 8400 batches | train loss 0.3729797 +| epoch 5 | 4519/ 8400 batches | train loss 0.3580382 +| epoch 5 | 4523/ 8400 batches | train loss 0.4709867 +| epoch 5 | 4527/ 8400 batches | train loss 0.4131080 +| epoch 5 | 4531/ 8400 batches | train loss 0.4354812 +| epoch 5 | 4535/ 8400 batches | train loss 0.3790883 +| epoch 5 | 4539/ 8400 batches | train loss 0.4239359 +| epoch 5 | 4543/ 8400 batches | train loss 0.3678259 +| epoch 5 | 4547/ 8400 batches | train loss 0.3624728 +| epoch 5 | 4551/ 8400 batches | train loss 0.4271109 +| epoch 5 | 4555/ 8400 batches | train loss 0.3615037 +| epoch 5 | 4559/ 8400 batches | train loss 0.4751217 +| epoch 5 | 4563/ 8400 batches | train loss 0.4526256 +| epoch 5 | 4567/ 8400 batches | train loss 0.4090866 +| epoch 5 | 4571/ 8400 batches | train loss 0.3897029 +| epoch 5 | 4575/ 8400 batches | train loss 0.3820383 +| epoch 5 | 4579/ 8400 batches | train loss 0.4240627 +| epoch 5 | 4583/ 8400 batches | train loss 0.3824985 +| epoch 5 | 4587/ 8400 batches | train loss 0.3258221 +| epoch 5 | 4591/ 8400 batches | train loss 0.4558938 +| epoch 5 | 4595/ 8400 batches | train loss 0.3759962 +| epoch 5 | 4599/ 8400 batches | train loss 0.4088175 +| epoch 5 | 4603/ 8400 batches | train loss 0.3683824 +| epoch 5 | 4607/ 8400 batches | train loss 0.4171433 +| epoch 5 | 4611/ 8400 batches | train loss 0.3608238 +| epoch 5 | 4615/ 8400 batches | train loss 0.3903815 +| epoch 5 | 4619/ 8400 batches | train loss 0.4624343 +| epoch 5 | 4623/ 8400 batches | train loss 0.3693169 +| epoch 5 | 4627/ 8400 batches | train loss 0.4731627 +| epoch 5 | 4631/ 8400 batches | train loss 0.3041521 +| epoch 5 | 4635/ 8400 batches | train loss 0.4069070 +| epoch 5 | 4639/ 8400 batches | train loss 0.3819637 +| epoch 5 | 4643/ 8400 batches | train loss 0.4620403 +| epoch 5 | 4647/ 8400 batches | train loss 0.3267867 +| epoch 5 | 4651/ 8400 batches | train loss 0.3676144 +| epoch 5 | 4655/ 8400 batches | train loss 0.4276279 +| epoch 5 | 4659/ 8400 batches | train loss 0.4261360 +| epoch 5 | 4663/ 8400 batches | train loss 0.3261501 +| epoch 5 | 4667/ 8400 batches | train loss 0.3582719 +| epoch 5 | 4671/ 8400 batches | train loss 0.3345844 +| epoch 5 | 4675/ 8400 batches | train loss 0.3829733 +| epoch 5 | 4679/ 8400 batches | train loss 0.3224839 +| epoch 5 | 4683/ 8400 batches | train loss 0.4138873 +| epoch 5 | 4687/ 8400 batches | train loss 0.4032896 +| epoch 5 | 4691/ 8400 batches | train loss 0.4124711 +| epoch 5 | 4695/ 8400 batches | train loss 0.4253420 +| epoch 5 | 4699/ 8400 batches | train loss 0.3937125 +| epoch 5 | 4703/ 8400 batches | train loss 0.3101046 +| epoch 5 | 4707/ 8400 batches | train loss 0.4820801 +| epoch 5 | 4711/ 8400 batches | train loss 0.4683305 +| epoch 5 | 4715/ 8400 batches | train loss 0.3870561 +| epoch 5 | 4719/ 8400 batches | train loss 0.4467968 +| epoch 5 | 4723/ 8400 batches | train loss 0.3458573 +| epoch 5 | 4727/ 8400 batches | train loss 0.4433357 +| epoch 5 | 4731/ 8400 batches | train loss 0.3883168 +| epoch 5 | 4735/ 8400 batches | train loss 0.3986953 +| epoch 5 | 4739/ 8400 batches | train loss 0.4011750 +| epoch 5 | 4743/ 8400 batches | train loss 0.3625163 +| epoch 5 | 4747/ 8400 batches | train loss 0.3889123 +| epoch 5 | 4751/ 8400 batches | train loss 0.4286306 +| epoch 5 | 4755/ 8400 batches | train loss 0.4037420 +| epoch 5 | 4759/ 8400 batches | train loss 0.4455317 +| epoch 5 | 4763/ 8400 batches | train loss 0.4222949 +| epoch 5 | 4767/ 8400 batches | train loss 0.3562970 +| epoch 5 | 4771/ 8400 batches | train loss 0.3591591 +| epoch 5 | 4775/ 8400 batches | train loss 0.3565410 +| epoch 5 | 4779/ 8400 batches | train loss 0.3351849 +| epoch 5 | 4783/ 8400 batches | train loss 0.3272256 +| epoch 5 | 4787/ 8400 batches | train loss 0.3900303 +| epoch 5 | 4791/ 8400 batches | train loss 0.4088483 +| epoch 5 | 4795/ 8400 batches | train loss 0.3688251 +| epoch 5 | 4799/ 8400 batches | train loss 0.4027113 +| epoch 5 | 4803/ 8400 batches | train loss 0.3694941 +| epoch 5 | 4807/ 8400 batches | train loss 0.3845108 +| epoch 5 | 4811/ 8400 batches | train loss 0.3753211 +| epoch 5 | 4815/ 8400 batches | train loss 0.3560913 +| epoch 5 | 4819/ 8400 batches | train loss 0.4022783 +| epoch 5 | 4823/ 8400 batches | train loss 0.3450656 +| epoch 5 | 4827/ 8400 batches | train loss 0.3449952 +| epoch 5 | 4831/ 8400 batches | train loss 0.3517249 +| epoch 5 | 4835/ 8400 batches | train loss 0.3746617 +| epoch 5 | 4839/ 8400 batches | train loss 0.3965947 +| epoch 5 | 4843/ 8400 batches | train loss 0.4433759 +| epoch 5 | 4847/ 8400 batches | train loss 0.4467288 +| epoch 5 | 4851/ 8400 batches | train loss 0.4054630 +| epoch 5 | 4855/ 8400 batches | train loss 0.3794144 +| epoch 5 | 4859/ 8400 batches | train loss 0.3996955 +| epoch 5 | 4863/ 8400 batches | train loss 0.4244091 +| epoch 5 | 4867/ 8400 batches | train loss 0.4048347 +| epoch 5 | 4871/ 8400 batches | train loss 0.3703628 +| epoch 5 | 4875/ 8400 batches | train loss 0.3992829 +| epoch 5 | 4879/ 8400 batches | train loss 0.3266529 +| epoch 5 | 4883/ 8400 batches | train loss 0.3929265 +| epoch 5 | 4887/ 8400 batches | train loss 0.4258072 +| epoch 5 | 4891/ 8400 batches | train loss 0.4239945 +| epoch 5 | 4895/ 8400 batches | train loss 0.4247845 +| epoch 5 | 4899/ 8400 batches | train loss 0.4161616 +| epoch 5 | 4903/ 8400 batches | train loss 0.4345663 +| epoch 5 | 4907/ 8400 batches | train loss 0.3161371 +| epoch 5 | 4911/ 8400 batches | train loss 0.3939936 +| epoch 5 | 4915/ 8400 batches | train loss 0.4098663 +| epoch 5 | 4919/ 8400 batches | train loss 0.6754188 +| epoch 5 | 4923/ 8400 batches | train loss 0.3705566 +| epoch 5 | 4927/ 8400 batches | train loss 0.3735892 +| epoch 5 | 4931/ 8400 batches | train loss 0.4007270 +| epoch 5 | 4935/ 8400 batches | train loss 0.3379865 +| epoch 5 | 4939/ 8400 batches | train loss 0.3783731 +| epoch 5 | 4943/ 8400 batches | train loss 0.3475557 +| epoch 5 | 4947/ 8400 batches | train loss 0.3875437 +| epoch 5 | 4951/ 8400 batches | train loss 0.4046281 +| epoch 5 | 4955/ 8400 batches | train loss 0.4879358 +| epoch 5 | 4959/ 8400 batches | train loss 0.3989120 +| epoch 5 | 4963/ 8400 batches | train loss 0.3677575 +| epoch 5 | 4967/ 8400 batches | train loss 0.3891734 +| epoch 5 | 4971/ 8400 batches | train loss 0.3502174 +| epoch 5 | 4975/ 8400 batches | train loss 0.4243491 +| epoch 5 | 4979/ 8400 batches | train loss 0.4070475 +| epoch 5 | 4983/ 8400 batches | train loss 0.3495891 +| epoch 5 | 4987/ 8400 batches | train loss 0.4375558 +| epoch 5 | 4991/ 8400 batches | train loss 0.3740574 +| epoch 5 | 4995/ 8400 batches | train loss 0.4181194 +| epoch 5 | 4999/ 8400 batches | train loss 0.3912495 +| epoch 5 | 5003/ 8400 batches | train loss 0.4310999 +| epoch 5 | 5007/ 8400 batches | train loss 0.4152316 +| epoch 5 | 5011/ 8400 batches | train loss 0.3782640 +| epoch 5 | 5015/ 8400 batches | train loss 0.4446059 +| epoch 5 | 5019/ 8400 batches | train loss 0.3776425 +| epoch 5 | 5023/ 8400 batches | train loss 0.3583714 +| epoch 5 | 5027/ 8400 batches | train loss 0.3983207 +| epoch 5 | 5031/ 8400 batches | train loss 0.4844205 +| epoch 5 | 5035/ 8400 batches | train loss 0.3942279 +| epoch 5 | 5039/ 8400 batches | train loss 0.4723127 +| epoch 5 | 5043/ 8400 batches | train loss 0.3882675 +| epoch 5 | 5047/ 8400 batches | train loss 0.3449807 +| epoch 5 | 5051/ 8400 batches | train loss 0.3475376 +| epoch 5 | 5055/ 8400 batches | train loss 0.3604474 +| epoch 5 | 5059/ 8400 batches | train loss 0.3830604 +| epoch 5 | 5063/ 8400 batches | train loss 0.3549212 +| epoch 5 | 5067/ 8400 batches | train loss 0.4073606 +| epoch 5 | 5071/ 8400 batches | train loss 0.4198047 +| epoch 5 | 5075/ 8400 batches | train loss 0.3953509 +| epoch 5 | 5079/ 8400 batches | train loss 0.3744154 +| epoch 5 | 5083/ 8400 batches | train loss 0.3852322 +| epoch 5 | 5087/ 8400 batches | train loss 0.3759593 +| epoch 5 | 5091/ 8400 batches | train loss 0.4418053 +| epoch 5 | 5095/ 8400 batches | train loss 0.3049874 +| epoch 5 | 5099/ 8400 batches | train loss 0.4124417 +| epoch 5 | 5103/ 8400 batches | train loss 0.4266110 +| epoch 5 | 5107/ 8400 batches | train loss 0.2540161 +| epoch 5 | 5111/ 8400 batches | train loss 0.4715067 +| epoch 5 | 5115/ 8400 batches | train loss 0.4482436 +| epoch 5 | 5119/ 8400 batches | train loss 0.4223160 +| epoch 5 | 5123/ 8400 batches | train loss 0.4146408 +| epoch 5 | 5127/ 8400 batches | train loss 0.3402328 +| epoch 5 | 5131/ 8400 batches | train loss 0.3896819 +| epoch 5 | 5135/ 8400 batches | train loss 0.4166582 +| epoch 5 | 5139/ 8400 batches | train loss 0.4042012 +| epoch 5 | 5143/ 8400 batches | train loss 0.3349497 +| epoch 5 | 5147/ 8400 batches | train loss 0.4026933 +| epoch 5 | 5151/ 8400 batches | train loss 0.3551156 +| epoch 5 | 5155/ 8400 batches | train loss 0.3687817 +| epoch 5 | 5159/ 8400 batches | train loss 0.3114522 +| epoch 5 | 5163/ 8400 batches | train loss 0.3671982 +| epoch 5 | 5167/ 8400 batches | train loss 0.3190956 +| epoch 5 | 5171/ 8400 batches | train loss 0.4475243 +| epoch 5 | 5175/ 8400 batches | train loss 0.4088323 +| epoch 5 | 5179/ 8400 batches | train loss 0.3833566 +| epoch 5 | 5183/ 8400 batches | train loss 0.3144405 +| epoch 5 | 5187/ 8400 batches | train loss 0.4297671 +| epoch 5 | 5191/ 8400 batches | train loss 0.3283111 +| epoch 5 | 5195/ 8400 batches | train loss 0.4535992 +| epoch 5 | 5199/ 8400 batches | train loss 0.3357157 +| epoch 5 | 5203/ 8400 batches | train loss 0.3646049 +| epoch 5 | 5207/ 8400 batches | train loss 0.4396408 +| epoch 5 | 5211/ 8400 batches | train loss 0.2984825 +| epoch 5 | 5215/ 8400 batches | train loss 0.3356403 +| epoch 5 | 5219/ 8400 batches | train loss 0.5104092 +| epoch 5 | 5223/ 8400 batches | train loss 0.3874272 +| epoch 5 | 5227/ 8400 batches | train loss 0.5081360 +| epoch 5 | 5231/ 8400 batches | train loss 0.4571730 +| epoch 5 | 5235/ 8400 batches | train loss 0.3624549 +| epoch 5 | 5239/ 8400 batches | train loss 0.4108869 +| epoch 5 | 5243/ 8400 batches | train loss 0.3198924 +| epoch 5 | 5247/ 8400 batches | train loss 0.4020454 +| epoch 5 | 5251/ 8400 batches | train loss 0.3963395 +| epoch 5 | 5255/ 8400 batches | train loss 0.4555796 +| epoch 5 | 5259/ 8400 batches | train loss 0.3707306 +| epoch 5 | 5263/ 8400 batches | train loss 0.3520404 +| epoch 5 | 5267/ 8400 batches | train loss 0.3255885 +| epoch 5 | 5271/ 8400 batches | train loss 0.3765215 +| epoch 5 | 5275/ 8400 batches | train loss 0.4754986 +| epoch 5 | 5279/ 8400 batches | train loss 0.3442427 +| epoch 5 | 5283/ 8400 batches | train loss 0.4898134 +| epoch 5 | 5287/ 8400 batches | train loss 0.3900937 +| epoch 5 | 5291/ 8400 batches | train loss 0.3322042 +| epoch 5 | 5295/ 8400 batches | train loss 0.1467801 +| epoch 5 | 5299/ 8400 batches | train loss 0.3798962 +| epoch 5 | 5303/ 8400 batches | train loss 0.4634907 +| epoch 5 | 5307/ 8400 batches | train loss 0.4680909 +| epoch 5 | 5311/ 8400 batches | train loss 0.4298703 +| epoch 5 | 5315/ 8400 batches | train loss 0.4183446 +| epoch 5 | 5319/ 8400 batches | train loss 0.4666284 +| epoch 5 | 5323/ 8400 batches | train loss 0.4094351 +| epoch 5 | 5327/ 8400 batches | train loss 0.4113644 +| epoch 5 | 5331/ 8400 batches | train loss 0.4148949 +| epoch 5 | 5335/ 8400 batches | train loss 0.3494587 +| epoch 5 | 5339/ 8400 batches | train loss 0.4891263 +| epoch 5 | 5343/ 8400 batches | train loss 0.3576639 +| epoch 5 | 5347/ 8400 batches | train loss 0.3977595 +| epoch 5 | 5351/ 8400 batches | train loss 0.3670676 +| epoch 5 | 5355/ 8400 batches | train loss 0.3792500 +| epoch 5 | 5359/ 8400 batches | train loss 0.3484405 +| epoch 5 | 5363/ 8400 batches | train loss 0.4263987 +| epoch 5 | 5367/ 8400 batches | train loss 0.4399356 +| epoch 5 | 5371/ 8400 batches | train loss 0.3892364 +| epoch 5 | 5375/ 8400 batches | train loss 0.3588054 +| epoch 5 | 5379/ 8400 batches | train loss 0.4423394 +| epoch 5 | 5383/ 8400 batches | train loss 0.4183136 +| epoch 5 | 5387/ 8400 batches | train loss 0.4226122 +| epoch 5 | 5391/ 8400 batches | train loss 0.4392642 +| epoch 5 | 5395/ 8400 batches | train loss 0.3674977 +| epoch 5 | 5399/ 8400 batches | train loss 0.3760479 +| epoch 5 | 5403/ 8400 batches | train loss 0.3577531 +| epoch 5 | 5407/ 8400 batches | train loss 0.3701320 +| epoch 5 | 5411/ 8400 batches | train loss 0.3594709 +| epoch 5 | 5415/ 8400 batches | train loss 0.4098715 +| epoch 5 | 5419/ 8400 batches | train loss 0.2978498 +| epoch 5 | 5423/ 8400 batches | train loss 0.4444622 +| epoch 5 | 5427/ 8400 batches | train loss 0.3126678 +| epoch 5 | 5431/ 8400 batches | train loss 0.4380633 +| epoch 5 | 5435/ 8400 batches | train loss 0.3957376 +| epoch 5 | 5439/ 8400 batches | train loss 0.4158781 +| epoch 5 | 5443/ 8400 batches | train loss 0.3320947 +| epoch 5 | 5447/ 8400 batches | train loss 0.3567039 +| epoch 5 | 5451/ 8400 batches | train loss 0.3744192 +| epoch 5 | 5455/ 8400 batches | train loss 0.4078604 +| epoch 5 | 5459/ 8400 batches | train loss 0.4784459 +| epoch 5 | 5463/ 8400 batches | train loss 0.3696890 +| epoch 5 | 5467/ 8400 batches | train loss 0.3594631 +| epoch 5 | 5471/ 8400 batches | train loss 0.3681705 +| epoch 5 | 5475/ 8400 batches | train loss 0.3517509 +| epoch 5 | 5479/ 8400 batches | train loss 0.3864216 +| epoch 5 | 5483/ 8400 batches | train loss 0.4022127 +| epoch 5 | 5487/ 8400 batches | train loss 0.4814779 +| epoch 5 | 5491/ 8400 batches | train loss 0.3722044 +| epoch 5 | 5495/ 8400 batches | train loss 0.3647600 +| epoch 5 | 5499/ 8400 batches | train loss 0.4239021 +| epoch 5 | 5503/ 8400 batches | train loss 0.3765958 +| epoch 5 | 5507/ 8400 batches | train loss 0.4653379 +| epoch 5 | 5511/ 8400 batches | train loss 0.4276676 +| epoch 5 | 5515/ 8400 batches | train loss 0.2753123 +| epoch 5 | 5519/ 8400 batches | train loss 0.4024621 +| epoch 5 | 5523/ 8400 batches | train loss 0.4350725 +| epoch 5 | 5527/ 8400 batches | train loss 0.3876254 +| epoch 5 | 5531/ 8400 batches | train loss 0.4341712 +| epoch 5 | 5535/ 8400 batches | train loss 0.3701804 +| epoch 5 | 5539/ 8400 batches | train loss 0.4299040 +| epoch 5 | 5543/ 8400 batches | train loss 0.4021103 +| epoch 5 | 5547/ 8400 batches | train loss 0.4372834 +| epoch 5 | 5551/ 8400 batches | train loss 0.3837436 +| epoch 5 | 5555/ 8400 batches | train loss 0.4073846 +| epoch 5 | 5559/ 8400 batches | train loss 0.3407193 +| epoch 5 | 5563/ 8400 batches | train loss 0.3435404 +| epoch 5 | 5567/ 8400 batches | train loss 0.4012089 +| epoch 5 | 5571/ 8400 batches | train loss 0.3776726 +| epoch 5 | 5575/ 8400 batches | train loss 0.3836498 +| epoch 5 | 5579/ 8400 batches | train loss 0.3797734 +| epoch 5 | 5583/ 8400 batches | train loss 0.3709530 +| epoch 5 | 5587/ 8400 batches | train loss 0.3302446 +| epoch 5 | 5591/ 8400 batches | train loss 0.3378018 +| epoch 5 | 5595/ 8400 batches | train loss 0.4351411 +| epoch 5 | 5599/ 8400 batches | train loss 0.4335859 +| epoch 5 | 5603/ 8400 batches | train loss 0.4531754 +| epoch 5 | 5607/ 8400 batches | train loss 0.4531991 +| epoch 5 | 5611/ 8400 batches | train loss 0.4247864 +| epoch 5 | 5615/ 8400 batches | train loss 0.3622037 +| epoch 5 | 5619/ 8400 batches | train loss 0.3727868 +| epoch 5 | 5623/ 8400 batches | train loss 0.3647945 +| epoch 5 | 5627/ 8400 batches | train loss 0.4123817 +| epoch 5 | 5631/ 8400 batches | train loss 0.4778421 +| epoch 5 | 5635/ 8400 batches | train loss 0.3672827 +| epoch 5 | 5639/ 8400 batches | train loss 0.2499141 +| epoch 5 | 5643/ 8400 batches | train loss 0.3920280 +| epoch 5 | 5647/ 8400 batches | train loss 0.4368322 +| epoch 5 | 5651/ 8400 batches | train loss 0.3762839 +| epoch 5 | 5655/ 8400 batches | train loss 0.4160195 +| epoch 5 | 5659/ 8400 batches | train loss 0.4402030 +| epoch 5 | 5663/ 8400 batches | train loss 0.3624446 +| epoch 5 | 5667/ 8400 batches | train loss 0.3980429 +| epoch 5 | 5671/ 8400 batches | train loss 0.2670305 +| epoch 5 | 5675/ 8400 batches | train loss 0.3221522 +| epoch 5 | 5679/ 8400 batches | train loss 0.3382135 +| epoch 5 | 5683/ 8400 batches | train loss 0.4009878 +| epoch 5 | 5687/ 8400 batches | train loss 0.2830630 +| epoch 5 | 5691/ 8400 batches | train loss 0.4531843 +| epoch 5 | 5695/ 8400 batches | train loss 0.4347939 +| epoch 5 | 5699/ 8400 batches | train loss 0.4638034 +| epoch 5 | 5703/ 8400 batches | train loss 0.3669186 +| epoch 5 | 5707/ 8400 batches | train loss 0.3594048 +| epoch 5 | 5711/ 8400 batches | train loss 0.3756437 +| epoch 5 | 5715/ 8400 batches | train loss 0.3814455 +| epoch 5 | 5719/ 8400 batches | train loss 0.3834473 +| epoch 5 | 5723/ 8400 batches | train loss 0.3791864 +| epoch 5 | 5727/ 8400 batches | train loss 0.4197069 +| epoch 5 | 5731/ 8400 batches | train loss 0.4690159 +| epoch 5 | 5735/ 8400 batches | train loss 0.4295553 +| epoch 5 | 5739/ 8400 batches | train loss 0.3729092 +| epoch 5 | 5743/ 8400 batches | train loss 0.3663918 +| epoch 5 | 5747/ 8400 batches | train loss 0.4241363 +| epoch 5 | 5751/ 8400 batches | train loss 0.4017161 +| epoch 5 | 5755/ 8400 batches | train loss 0.4023420 +| epoch 5 | 5759/ 8400 batches | train loss 0.4239463 +| epoch 5 | 5763/ 8400 batches | train loss 0.4281359 +| epoch 5 | 5767/ 8400 batches | train loss 0.4017820 +| epoch 5 | 5771/ 8400 batches | train loss 0.3960999 +| epoch 5 | 5775/ 8400 batches | train loss 0.4410916 +| epoch 5 | 5779/ 8400 batches | train loss 0.3567930 +| epoch 5 | 5783/ 8400 batches | train loss 0.4420085 +| epoch 5 | 5787/ 8400 batches | train loss 0.4573795 +| epoch 5 | 5791/ 8400 batches | train loss 0.4010242 +| epoch 5 | 5795/ 8400 batches | train loss 0.4171006 +| epoch 5 | 5799/ 8400 batches | train loss 0.3589826 +| epoch 5 | 5803/ 8400 batches | train loss 0.4265302 +| epoch 5 | 5807/ 8400 batches | train loss 0.3412578 +| epoch 5 | 5811/ 8400 batches | train loss 0.2752209 +| epoch 5 | 5815/ 8400 batches | train loss 0.4275625 +| epoch 5 | 5819/ 8400 batches | train loss 0.3759294 +| epoch 5 | 5823/ 8400 batches | train loss 0.3454018 +| epoch 5 | 5827/ 8400 batches | train loss 0.3642998 +| epoch 5 | 5831/ 8400 batches | train loss 0.4302373 +| epoch 5 | 5835/ 8400 batches | train loss 0.3235753 +| epoch 5 | 5839/ 8400 batches | train loss 0.4330157 +| epoch 5 | 5843/ 8400 batches | train loss 0.4397363 +| epoch 5 | 5847/ 8400 batches | train loss 0.4080207 +| epoch 5 | 5851/ 8400 batches | train loss 0.2969565 +| epoch 5 | 5855/ 8400 batches | train loss 0.3951508 +| epoch 5 | 5859/ 8400 batches | train loss 0.4356108 +| epoch 5 | 5863/ 8400 batches | train loss 0.3334602 +| epoch 5 | 5867/ 8400 batches | train loss 0.4255986 +| epoch 5 | 5871/ 8400 batches | train loss 0.4234934 +| epoch 5 | 5875/ 8400 batches | train loss 0.3501377 +| epoch 5 | 5879/ 8400 batches | train loss 0.3509645 +| epoch 5 | 5883/ 8400 batches | train loss 0.4178510 +| epoch 5 | 5887/ 8400 batches | train loss 0.3831554 +| epoch 5 | 5891/ 8400 batches | train loss 0.3738030 +| epoch 5 | 5895/ 8400 batches | train loss 0.5253332 +| epoch 5 | 5899/ 8400 batches | train loss 0.3414878 +| epoch 5 | 5903/ 8400 batches | train loss 0.4464125 +| epoch 5 | 5907/ 8400 batches | train loss 0.3637968 +| epoch 5 | 5911/ 8400 batches | train loss 0.4151174 +| epoch 5 | 5915/ 8400 batches | train loss 0.3674526 +| epoch 5 | 5919/ 8400 batches | train loss 0.4594381 +| epoch 5 | 5923/ 8400 batches | train loss 0.3925755 +| epoch 5 | 5927/ 8400 batches | train loss 0.3415171 +| epoch 5 | 5931/ 8400 batches | train loss 0.3511814 +| epoch 5 | 5935/ 8400 batches | train loss 0.4194266 +| epoch 5 | 5939/ 8400 batches | train loss 0.3751037 +| epoch 5 | 5943/ 8400 batches | train loss 0.4763107 +| epoch 5 | 5947/ 8400 batches | train loss 0.3831446 +| epoch 5 | 5951/ 8400 batches | train loss 0.3988022 +| epoch 5 | 5955/ 8400 batches | train loss 0.3822311 +| epoch 5 | 5959/ 8400 batches | train loss 0.3844700 +| epoch 5 | 5963/ 8400 batches | train loss 0.4856536 +| epoch 5 | 5967/ 8400 batches | train loss 0.4267910 +| epoch 5 | 5971/ 8400 batches | train loss 0.3601481 +| epoch 5 | 5975/ 8400 batches | train loss 0.3799283 +| epoch 5 | 5979/ 8400 batches | train loss 0.3536898 +| epoch 5 | 5983/ 8400 batches | train loss 0.4338662 +| epoch 5 | 5987/ 8400 batches | train loss 0.3776690 +| epoch 5 | 5991/ 8400 batches | train loss 0.3995890 +| epoch 5 | 5995/ 8400 batches | train loss 0.3942045 +| epoch 5 | 5999/ 8400 batches | train loss 0.3606893 +| epoch 5 | 6003/ 8400 batches | train loss 0.3561852 +| epoch 5 | 6007/ 8400 batches | train loss 0.3570898 +| epoch 5 | 6011/ 8400 batches | train loss 0.4323090 +| epoch 5 | 6015/ 8400 batches | train loss 0.4394587 +| epoch 5 | 6019/ 8400 batches | train loss 0.3622037 +| epoch 5 | 6023/ 8400 batches | train loss 0.3803540 +| epoch 5 | 6027/ 8400 batches | train loss 0.3582478 +| epoch 5 | 6031/ 8400 batches | train loss 0.4017440 +| epoch 5 | 6035/ 8400 batches | train loss 0.4395735 +| epoch 5 | 6039/ 8400 batches | train loss 0.3269609 +| epoch 5 | 6043/ 8400 batches | train loss 0.3298309 +| epoch 5 | 6047/ 8400 batches | train loss 0.4247624 +| epoch 5 | 6051/ 8400 batches | train loss 0.4073730 +| epoch 5 | 6055/ 8400 batches | train loss 0.3521353 +| epoch 5 | 6059/ 8400 batches | train loss 0.3856647 +| epoch 5 | 6063/ 8400 batches | train loss 0.4537756 +| epoch 5 | 6067/ 8400 batches | train loss 0.3521763 +| epoch 5 | 6071/ 8400 batches | train loss 0.3838224 +| epoch 5 | 6075/ 8400 batches | train loss 0.4705406 +| epoch 5 | 6079/ 8400 batches | train loss 0.4460498 +| epoch 5 | 6083/ 8400 batches | train loss 0.3817859 +| epoch 5 | 6087/ 8400 batches | train loss 0.4283376 +| epoch 5 | 6091/ 8400 batches | train loss 0.2466529 +| epoch 5 | 6095/ 8400 batches | train loss 0.3740372 +| epoch 5 | 6099/ 8400 batches | train loss 0.3824349 +| epoch 5 | 6103/ 8400 batches | train loss 0.4011424 +| epoch 5 | 6107/ 8400 batches | train loss 0.3760866 +| epoch 5 | 6111/ 8400 batches | train loss 0.3916689 +| epoch 5 | 6115/ 8400 batches | train loss 0.4230138 +| epoch 5 | 6119/ 8400 batches | train loss 0.4302168 +| epoch 5 | 6123/ 8400 batches | train loss 0.3931624 +| epoch 5 | 6127/ 8400 batches | train loss 0.3906612 +| epoch 5 | 6131/ 8400 batches | train loss 0.4398917 +| epoch 5 | 6135/ 8400 batches | train loss 0.3412851 +| epoch 5 | 6139/ 8400 batches | train loss 0.5064232 +| epoch 5 | 6143/ 8400 batches | train loss 0.3996774 +| epoch 5 | 6147/ 8400 batches | train loss 0.3893783 +| epoch 5 | 6151/ 8400 batches | train loss 0.4214288 +| epoch 5 | 6155/ 8400 batches | train loss 0.3754080 +| epoch 5 | 6159/ 8400 batches | train loss 0.3762494 +| epoch 5 | 6163/ 8400 batches | train loss 0.4198131 +| epoch 5 | 6167/ 8400 batches | train loss 0.3732174 +| epoch 5 | 6171/ 8400 batches | train loss 0.4462515 +| epoch 5 | 6175/ 8400 batches | train loss 0.4316553 +| epoch 5 | 6179/ 8400 batches | train loss 0.3780985 +| epoch 5 | 6183/ 8400 batches | train loss 0.3983016 +| epoch 5 | 6187/ 8400 batches | train loss 0.4218902 +| epoch 5 | 6191/ 8400 batches | train loss 0.3549316 +| epoch 5 | 6195/ 8400 batches | train loss 0.3861925 +| epoch 5 | 6199/ 8400 batches | train loss 0.2513835 +| epoch 5 | 6203/ 8400 batches | train loss 0.4262261 +| epoch 5 | 6207/ 8400 batches | train loss 0.4204676 +| epoch 5 | 6211/ 8400 batches | train loss 0.4073305 +| epoch 5 | 6215/ 8400 batches | train loss 0.4660639 +| epoch 5 | 6219/ 8400 batches | train loss 0.2869228 +| epoch 5 | 6223/ 8400 batches | train loss 0.3600374 +| epoch 5 | 6227/ 8400 batches | train loss 0.4389987 +| epoch 5 | 6231/ 8400 batches | train loss 0.3444605 +| epoch 5 | 6235/ 8400 batches | train loss 0.4117773 +| epoch 5 | 6239/ 8400 batches | train loss 0.4038597 +| epoch 5 | 6243/ 8400 batches | train loss 0.4437820 +| epoch 5 | 6247/ 8400 batches | train loss 0.3362985 +| epoch 5 | 6251/ 8400 batches | train loss 0.3525768 +| epoch 5 | 6255/ 8400 batches | train loss 0.3759706 +| epoch 5 | 6259/ 8400 batches | train loss 0.4312713 +| epoch 5 | 6263/ 8400 batches | train loss 0.4593931 +| epoch 5 | 6267/ 8400 batches | train loss 0.3839370 +| epoch 5 | 6271/ 8400 batches | train loss 0.4071532 +| epoch 5 | 6275/ 8400 batches | train loss 0.3999874 +| epoch 5 | 6279/ 8400 batches | train loss 0.3906915 +| epoch 5 | 6283/ 8400 batches | train loss 0.4429313 +| epoch 5 | 6287/ 8400 batches | train loss 0.3747168 +| epoch 5 | 6291/ 8400 batches | train loss 0.3972656 +| epoch 5 | 6295/ 8400 batches | train loss 0.2636866 +| epoch 5 | 6299/ 8400 batches | train loss 0.4598943 +| epoch 5 | 6303/ 8400 batches | train loss 0.3517957 +| epoch 5 | 6307/ 8400 batches | train loss 0.4095281 +| epoch 5 | 6311/ 8400 batches | train loss 0.4952922 +| epoch 5 | 6315/ 8400 batches | train loss 0.4480203 +| epoch 5 | 6319/ 8400 batches | train loss 0.3837391 +| epoch 5 | 6323/ 8400 batches | train loss 0.3176926 +| epoch 5 | 6327/ 8400 batches | train loss 0.3707786 +| epoch 5 | 6331/ 8400 batches | train loss 0.4016749 +| epoch 5 | 6335/ 8400 batches | train loss 0.3778973 +| epoch 5 | 6339/ 8400 batches | train loss 0.3947798 +| epoch 5 | 6343/ 8400 batches | train loss 0.4051921 +| epoch 5 | 6347/ 8400 batches | train loss 0.4489942 +| epoch 5 | 6351/ 8400 batches | train loss 0.4401491 +| epoch 5 | 6355/ 8400 batches | train loss 0.3692753 +| epoch 5 | 6359/ 8400 batches | train loss 0.4794948 +| epoch 5 | 6363/ 8400 batches | train loss 0.4507841 +| epoch 5 | 6367/ 8400 batches | train loss 0.3762756 +| epoch 5 | 6371/ 8400 batches | train loss 0.4132482 +| epoch 5 | 6375/ 8400 batches | train loss 0.4091601 +| epoch 5 | 6379/ 8400 batches | train loss 0.4689804 +| epoch 5 | 6383/ 8400 batches | train loss 0.3305750 +| epoch 5 | 6387/ 8400 batches | train loss 0.3942980 +| epoch 5 | 6391/ 8400 batches | train loss 0.3962752 +| epoch 5 | 6395/ 8400 batches | train loss 0.3824680 +| epoch 5 | 6399/ 8400 batches | train loss 0.5146940 +| epoch 5 | 6403/ 8400 batches | train loss 0.4040673 +| epoch 5 | 6407/ 8400 batches | train loss 0.3965265 +| epoch 5 | 6411/ 8400 batches | train loss 0.3788509 +| epoch 5 | 6415/ 8400 batches | train loss 0.3377961 +| epoch 5 | 6419/ 8400 batches | train loss 0.3470839 +| epoch 5 | 6423/ 8400 batches | train loss 0.3994378 +| epoch 5 | 6427/ 8400 batches | train loss 0.4164944 +| epoch 5 | 6431/ 8400 batches | train loss 0.4148691 +| epoch 5 | 6435/ 8400 batches | train loss 0.4181705 +| epoch 5 | 6439/ 8400 batches | train loss 0.3429598 +| epoch 5 | 6443/ 8400 batches | train loss 0.4150027 +| epoch 5 | 6447/ 8400 batches | train loss 0.4068065 +| epoch 5 | 6451/ 8400 batches | train loss 0.3293032 +| epoch 5 | 6455/ 8400 batches | train loss 0.3236565 +| epoch 5 | 6459/ 8400 batches | train loss 0.4033706 +| epoch 5 | 6463/ 8400 batches | train loss 0.3408395 +| epoch 5 | 6467/ 8400 batches | train loss 0.4727922 +| epoch 5 | 6471/ 8400 batches | train loss 0.3512134 +| epoch 5 | 6475/ 8400 batches | train loss 0.3742135 +| epoch 5 | 6479/ 8400 batches | train loss 0.3689178 +| epoch 5 | 6483/ 8400 batches | train loss 0.3817545 +| epoch 5 | 6487/ 8400 batches | train loss 0.2412892 +| epoch 5 | 6491/ 8400 batches | train loss 0.4100505 +| epoch 5 | 6495/ 8400 batches | train loss 0.3941260 +| epoch 5 | 6499/ 8400 batches | train loss 0.3302555 +| epoch 5 | 6503/ 8400 batches | train loss 0.5163898 +| epoch 5 | 6507/ 8400 batches | train loss 0.3647323 +| epoch 5 | 6511/ 8400 batches | train loss 0.3582412 +| epoch 5 | 6515/ 8400 batches | train loss 0.4418930 +| epoch 5 | 6519/ 8400 batches | train loss 0.4106857 +| epoch 5 | 6523/ 8400 batches | train loss 0.4269467 +| epoch 5 | 6527/ 8400 batches | train loss 0.3810133 +| epoch 5 | 6531/ 8400 batches | train loss 0.4619798 +| epoch 5 | 6535/ 8400 batches | train loss 0.3224440 +| epoch 5 | 6539/ 8400 batches | train loss 0.4191230 +| epoch 5 | 6543/ 8400 batches | train loss 0.3264947 +| epoch 5 | 6547/ 8400 batches | train loss 0.4186877 +| epoch 5 | 6551/ 8400 batches | train loss 0.4489156 +| epoch 5 | 6555/ 8400 batches | train loss 0.3431991 +| epoch 5 | 6559/ 8400 batches | train loss 0.3324223 +| epoch 5 | 6563/ 8400 batches | train loss 0.3974343 +| epoch 5 | 6567/ 8400 batches | train loss 0.3358397 +| epoch 5 | 6571/ 8400 batches | train loss 0.3583319 +| epoch 5 | 6575/ 8400 batches | train loss 0.2623721 +| epoch 5 | 6579/ 8400 batches | train loss 0.4638674 +| epoch 5 | 6583/ 8400 batches | train loss 0.3362513 +| epoch 5 | 6587/ 8400 batches | train loss 0.4147936 +| epoch 5 | 6591/ 8400 batches | train loss 0.4557665 +| epoch 5 | 6595/ 8400 batches | train loss 0.3442803 +| epoch 5 | 6599/ 8400 batches | train loss 0.3947223 +| epoch 5 | 6603/ 8400 batches | train loss 0.3389364 +| epoch 5 | 6607/ 8400 batches | train loss 0.3892898 +| epoch 5 | 6611/ 8400 batches | train loss 0.3958215 +| epoch 5 | 6615/ 8400 batches | train loss 0.4470645 +| epoch 5 | 6619/ 8400 batches | train loss 0.4264153 +| epoch 5 | 6623/ 8400 batches | train loss 0.3703946 +| epoch 5 | 6627/ 8400 batches | train loss 0.4351032 +| epoch 5 | 6631/ 8400 batches | train loss 0.3827492 +| epoch 5 | 6635/ 8400 batches | train loss 0.4027608 +| epoch 5 | 6639/ 8400 batches | train loss 0.3983888 +| epoch 5 | 6643/ 8400 batches | train loss 0.3630823 +| epoch 5 | 6647/ 8400 batches | train loss 0.4045108 +| epoch 5 | 6651/ 8400 batches | train loss 0.4113460 +| epoch 5 | 6655/ 8400 batches | train loss 0.4229355 +| epoch 5 | 6659/ 8400 batches | train loss 0.3442681 +| epoch 5 | 6663/ 8400 batches | train loss 0.3736339 +| epoch 5 | 6667/ 8400 batches | train loss 0.3689786 +| epoch 5 | 6671/ 8400 batches | train loss 0.4018531 +| epoch 5 | 6675/ 8400 batches | train loss 0.4077803 +| epoch 5 | 6679/ 8400 batches | train loss 0.4330535 +| epoch 5 | 6683/ 8400 batches | train loss 0.4028285 +| epoch 5 | 6687/ 8400 batches | train loss 0.3806980 +| epoch 5 | 6691/ 8400 batches | train loss 0.4592877 +| epoch 5 | 6695/ 8400 batches | train loss 0.3913384 +| epoch 5 | 6699/ 8400 batches | train loss 0.3915839 +| epoch 5 | 6703/ 8400 batches | train loss 0.3842259 +| epoch 5 | 6707/ 8400 batches | train loss 0.4529645 +| epoch 5 | 6711/ 8400 batches | train loss 0.3723508 +| epoch 5 | 6715/ 8400 batches | train loss 0.3199872 +| epoch 5 | 6719/ 8400 batches | train loss 0.3831185 +| epoch 5 | 6723/ 8400 batches | train loss 0.4636166 +| epoch 5 | 6727/ 8400 batches | train loss 0.3919300 +| epoch 5 | 6731/ 8400 batches | train loss 0.5272473 +| epoch 5 | 6735/ 8400 batches | train loss 0.3661656 +| epoch 5 | 6739/ 8400 batches | train loss 0.4452007 +| epoch 5 | 6743/ 8400 batches | train loss 0.4120938 +| epoch 5 | 6747/ 8400 batches | train loss 0.3432587 +| epoch 5 | 6751/ 8400 batches | train loss 0.3890034 +| epoch 5 | 6755/ 8400 batches | train loss 0.4448246 +| epoch 5 | 6759/ 8400 batches | train loss 0.3039464 +| epoch 5 | 6763/ 8400 batches | train loss 0.4187969 +| epoch 5 | 6767/ 8400 batches | train loss 0.3812423 +| epoch 5 | 6771/ 8400 batches | train loss 0.4533732 +| epoch 5 | 6775/ 8400 batches | train loss 0.4397202 +| epoch 5 | 6779/ 8400 batches | train loss 0.4522359 +| epoch 5 | 6783/ 8400 batches | train loss 0.3956453 +| epoch 5 | 6787/ 8400 batches | train loss 0.3473996 +| epoch 5 | 6791/ 8400 batches | train loss 0.3351410 +| epoch 5 | 6795/ 8400 batches | train loss 0.3906652 +| epoch 5 | 6799/ 8400 batches | train loss 0.4835194 +| epoch 5 | 6803/ 8400 batches | train loss 0.3386583 +| epoch 5 | 6807/ 8400 batches | train loss 0.4497755 +| epoch 5 | 6811/ 8400 batches | train loss 0.4653218 +| epoch 5 | 6815/ 8400 batches | train loss 0.4158512 +| epoch 5 | 6819/ 8400 batches | train loss 0.3284237 +| epoch 5 | 6823/ 8400 batches | train loss 0.3184636 +| epoch 5 | 6827/ 8400 batches | train loss 0.3767079 +| epoch 5 | 6831/ 8400 batches | train loss 0.3996523 +| epoch 5 | 6835/ 8400 batches | train loss 0.3899305 +| epoch 5 | 6839/ 8400 batches | train loss 0.3562085 +| epoch 5 | 6843/ 8400 batches | train loss 0.4449515 +| epoch 5 | 6847/ 8400 batches | train loss 0.3832800 +| epoch 5 | 6851/ 8400 batches | train loss 0.4261690 +| epoch 5 | 6855/ 8400 batches | train loss 0.3857298 +| epoch 5 | 6859/ 8400 batches | train loss 0.5175508 +| epoch 5 | 6863/ 8400 batches | train loss 0.3796039 +| epoch 5 | 6867/ 8400 batches | train loss 0.5079906 +| epoch 5 | 6871/ 8400 batches | train loss 0.3487494 +| epoch 5 | 6875/ 8400 batches | train loss 0.3808748 +| epoch 5 | 6879/ 8400 batches | train loss 0.3605697 +| epoch 5 | 6883/ 8400 batches | train loss 0.3974422 +| epoch 5 | 6887/ 8400 batches | train loss 0.3749826 +| epoch 5 | 6891/ 8400 batches | train loss 0.3611951 +| epoch 5 | 6895/ 8400 batches | train loss 0.3770628 +| epoch 5 | 6899/ 8400 batches | train loss 0.4092969 +| epoch 5 | 6903/ 8400 batches | train loss 0.3665511 +| epoch 5 | 6907/ 8400 batches | train loss 0.4196000 +| epoch 5 | 6911/ 8400 batches | train loss 0.4369504 +| epoch 5 | 6915/ 8400 batches | train loss 0.4017951 +| epoch 5 | 6919/ 8400 batches | train loss 0.4383383 +| epoch 5 | 6923/ 8400 batches | train loss 0.3352056 +| epoch 5 | 6927/ 8400 batches | train loss 0.3561200 +| epoch 5 | 6931/ 8400 batches | train loss 0.3501320 +| epoch 5 | 6935/ 8400 batches | train loss 0.4508603 +| epoch 5 | 6939/ 8400 batches | train loss 0.3410271 +| epoch 5 | 6943/ 8400 batches | train loss 0.1788174 +| epoch 5 | 6947/ 8400 batches | train loss 0.4065130 +| epoch 5 | 6951/ 8400 batches | train loss 0.3689204 +| epoch 5 | 6955/ 8400 batches | train loss 0.4298566 +| epoch 5 | 6959/ 8400 batches | train loss 0.4301153 +| epoch 5 | 6963/ 8400 batches | train loss 0.3255408 +| epoch 5 | 6967/ 8400 batches | train loss 0.3727732 +| epoch 5 | 6971/ 8400 batches | train loss 0.4540753 +| epoch 5 | 6975/ 8400 batches | train loss 0.4290752 +| epoch 5 | 6979/ 8400 batches | train loss 0.3906260 +| epoch 5 | 6983/ 8400 batches | train loss 0.3309156 +| epoch 5 | 6987/ 8400 batches | train loss 0.4166562 +| epoch 5 | 6991/ 8400 batches | train loss 0.4017363 +| epoch 5 | 6995/ 8400 batches | train loss 0.3657838 +| epoch 5 | 6999/ 8400 batches | train loss 0.4265085 +| epoch 5 | 7003/ 8400 batches | train loss 0.4072701 +| epoch 5 | 7007/ 8400 batches | train loss 0.4785727 +| epoch 5 | 7011/ 8400 batches | train loss 0.3943699 +| epoch 5 | 7015/ 8400 batches | train loss 0.4089252 +| epoch 5 | 7019/ 8400 batches | train loss 0.2781973 +| epoch 5 | 7023/ 8400 batches | train loss 0.3782149 +| epoch 5 | 7027/ 8400 batches | train loss 0.3588095 +| epoch 5 | 7031/ 8400 batches | train loss 0.3422433 +| epoch 5 | 7035/ 8400 batches | train loss 0.4264082 +| epoch 5 | 7039/ 8400 batches | train loss 0.3360105 +| epoch 5 | 7043/ 8400 batches | train loss 0.4575850 +| epoch 5 | 7047/ 8400 batches | train loss 0.3692080 +| epoch 5 | 7051/ 8400 batches | train loss 0.3639054 +| epoch 5 | 7055/ 8400 batches | train loss 0.3593184 +| epoch 5 | 7059/ 8400 batches | train loss 0.3321469 +| epoch 5 | 7063/ 8400 batches | train loss 0.3584457 +| epoch 5 | 7067/ 8400 batches | train loss 0.4060745 +| epoch 5 | 7071/ 8400 batches | train loss 0.4477468 +| epoch 5 | 7075/ 8400 batches | train loss 0.4025323 +| epoch 5 | 7079/ 8400 batches | train loss 0.4150039 +| epoch 5 | 7083/ 8400 batches | train loss 0.3796367 +| epoch 5 | 7087/ 8400 batches | train loss 0.3774711 +| epoch 5 | 7091/ 8400 batches | train loss 0.3674589 +| epoch 5 | 7095/ 8400 batches | train loss 0.3500824 +| epoch 5 | 7099/ 8400 batches | train loss 0.3973102 +| epoch 5 | 7103/ 8400 batches | train loss 0.4413747 +| epoch 5 | 7107/ 8400 batches | train loss 0.3185413 +| epoch 5 | 7111/ 8400 batches | train loss 0.3698315 +| epoch 5 | 7115/ 8400 batches | train loss 0.3175813 +| epoch 5 | 7119/ 8400 batches | train loss 0.3938985 +| epoch 5 | 7123/ 8400 batches | train loss 0.3777508 +| epoch 5 | 7127/ 8400 batches | train loss 0.4224869 +| epoch 5 | 7131/ 8400 batches | train loss 0.4217490 +| epoch 5 | 7135/ 8400 batches | train loss 0.3959174 +| epoch 5 | 7139/ 8400 batches | train loss 0.3672001 +| epoch 5 | 7143/ 8400 batches | train loss 0.3769192 +| epoch 5 | 7147/ 8400 batches | train loss 0.4564381 +| epoch 5 | 7151/ 8400 batches | train loss 0.3529613 +| epoch 5 | 7155/ 8400 batches | train loss 0.4359933 +| epoch 5 | 7159/ 8400 batches | train loss 0.4191853 +| epoch 5 | 7163/ 8400 batches | train loss 0.4385396 +| epoch 5 | 7167/ 8400 batches | train loss 0.3826406 +| epoch 5 | 7171/ 8400 batches | train loss 0.3369398 +| epoch 5 | 7175/ 8400 batches | train loss 0.3857554 +| epoch 5 | 7179/ 8400 batches | train loss 0.5304645 +| epoch 5 | 7183/ 8400 batches | train loss 0.3847355 +| epoch 5 | 7187/ 8400 batches | train loss 0.3127021 +| epoch 5 | 7191/ 8400 batches | train loss 0.3516233 +| epoch 5 | 7195/ 8400 batches | train loss 0.4015382 +| epoch 5 | 7199/ 8400 batches | train loss 0.3789053 +| epoch 5 | 7203/ 8400 batches | train loss 0.4221253 +| epoch 5 | 7207/ 8400 batches | train loss 0.4416581 +| epoch 5 | 7211/ 8400 batches | train loss 0.3913715 +| epoch 5 | 7215/ 8400 batches | train loss 0.4447636 +| epoch 5 | 7219/ 8400 batches | train loss 0.3946637 +| epoch 5 | 7223/ 8400 batches | train loss 0.4014316 +| epoch 5 | 7227/ 8400 batches | train loss 0.3575154 +| epoch 5 | 7231/ 8400 batches | train loss 0.3650451 +| epoch 5 | 7235/ 8400 batches | train loss 0.3672510 +| epoch 5 | 7239/ 8400 batches | train loss 0.4049095 +| epoch 5 | 7243/ 8400 batches | train loss 0.3779577 +| epoch 5 | 7247/ 8400 batches | train loss 0.4940736 +| epoch 5 | 7251/ 8400 batches | train loss 0.3803843 +| epoch 5 | 7255/ 8400 batches | train loss 0.4091183 +| epoch 5 | 7259/ 8400 batches | train loss 0.3812657 +| epoch 5 | 7263/ 8400 batches | train loss 0.3218103 +| epoch 5 | 7267/ 8400 batches | train loss 0.4487791 +| epoch 5 | 7271/ 8400 batches | train loss 0.3545913 +| epoch 5 | 7275/ 8400 batches | train loss 0.3887300 +| epoch 5 | 7279/ 8400 batches | train loss 0.3732569 +| epoch 5 | 7283/ 8400 batches | train loss 0.4803025 +| epoch 5 | 7287/ 8400 batches | train loss 0.3869285 +| epoch 5 | 7291/ 8400 batches | train loss 0.4089212 +| epoch 5 | 7295/ 8400 batches | train loss 0.4249537 +| epoch 5 | 7299/ 8400 batches | train loss 0.3935183 +| epoch 5 | 7303/ 8400 batches | train loss 0.4928041 +| epoch 5 | 7307/ 8400 batches | train loss 0.4756446 +| epoch 5 | 7311/ 8400 batches | train loss 0.3612665 +| epoch 5 | 7315/ 8400 batches | train loss 0.3314358 +| epoch 5 | 7319/ 8400 batches | train loss 0.4615644 +| epoch 5 | 7323/ 8400 batches | train loss 0.4084777 +| epoch 5 | 7327/ 8400 batches | train loss 0.3979583 +| epoch 5 | 7331/ 8400 batches | train loss 0.3664210 +| epoch 5 | 7335/ 8400 batches | train loss 0.4711021 +| epoch 5 | 7339/ 8400 batches | train loss 0.3746827 +| epoch 5 | 7343/ 8400 batches | train loss 0.3601151 +| epoch 5 | 7347/ 8400 batches | train loss 0.4560717 +| epoch 5 | 7351/ 8400 batches | train loss 0.4249578 +| epoch 5 | 7355/ 8400 batches | train loss 0.3751853 +| epoch 5 | 7359/ 8400 batches | train loss 0.4248484 +| epoch 5 | 7363/ 8400 batches | train loss 0.3481429 +| epoch 5 | 7367/ 8400 batches | train loss 0.4334536 +| epoch 5 | 7371/ 8400 batches | train loss 0.3598080 +| epoch 5 | 7375/ 8400 batches | train loss 0.4109918 +| epoch 5 | 7379/ 8400 batches | train loss 0.3605906 +| epoch 5 | 7383/ 8400 batches | train loss 0.4245049 +| epoch 5 | 7387/ 8400 batches | train loss 0.3948194 +| epoch 5 | 7391/ 8400 batches | train loss 0.4143464 +| epoch 5 | 7395/ 8400 batches | train loss 0.4132847 +| epoch 5 | 7399/ 8400 batches | train loss 0.4273216 +| epoch 5 | 7403/ 8400 batches | train loss 0.3690104 +| epoch 5 | 7407/ 8400 batches | train loss 0.3817435 +| epoch 5 | 7411/ 8400 batches | train loss 0.3915881 +| epoch 5 | 7415/ 8400 batches | train loss 0.4244559 +| epoch 5 | 7419/ 8400 batches | train loss 0.4762515 +| epoch 5 | 7423/ 8400 batches | train loss 0.4392112 +| epoch 5 | 7427/ 8400 batches | train loss 0.3723703 +| epoch 5 | 7431/ 8400 batches | train loss 0.3754478 +| epoch 5 | 7435/ 8400 batches | train loss 0.4637496 +| epoch 5 | 7439/ 8400 batches | train loss 0.4059114 +| epoch 5 | 7443/ 8400 batches | train loss 0.3925825 +| epoch 5 | 7447/ 8400 batches | train loss 0.3677142 +| epoch 5 | 7451/ 8400 batches | train loss 0.4049881 +| epoch 5 | 7455/ 8400 batches | train loss 0.3949870 +| epoch 5 | 7459/ 8400 batches | train loss 0.4859553 +| epoch 5 | 7463/ 8400 batches | train loss 0.4868146 +| epoch 5 | 7467/ 8400 batches | train loss 0.4241666 +| epoch 5 | 7471/ 8400 batches | train loss 0.3842974 +| epoch 5 | 7475/ 8400 batches | train loss 0.3632013 +| epoch 5 | 7479/ 8400 batches | train loss 0.4290783 +| epoch 5 | 7483/ 8400 batches | train loss 0.3433865 +| epoch 5 | 7487/ 8400 batches | train loss 0.3848137 +| epoch 5 | 7491/ 8400 batches | train loss 0.3866165 +| epoch 5 | 7495/ 8400 batches | train loss 0.3929822 +| epoch 5 | 7499/ 8400 batches | train loss 0.3829457 +| epoch 5 | 7503/ 8400 batches | train loss 0.3928080 +| epoch 5 | 7507/ 8400 batches | train loss 0.4320574 +| epoch 5 | 7511/ 8400 batches | train loss 0.3396370 +| epoch 5 | 7515/ 8400 batches | train loss 0.4802700 +| epoch 5 | 7519/ 8400 batches | train loss 0.3797728 +| epoch 5 | 7523/ 8400 batches | train loss 0.3710043 +| epoch 5 | 7527/ 8400 batches | train loss 0.3348510 +| epoch 5 | 7531/ 8400 batches | train loss 0.4096367 +| epoch 5 | 7535/ 8400 batches | train loss 0.5055377 +| epoch 5 | 7539/ 8400 batches | train loss 0.3434093 +| epoch 5 | 7543/ 8400 batches | train loss 0.4619969 +| epoch 5 | 7547/ 8400 batches | train loss 0.3452151 +| epoch 5 | 7551/ 8400 batches | train loss 0.4294227 +| epoch 5 | 7555/ 8400 batches | train loss 0.3604236 +| epoch 5 | 7559/ 8400 batches | train loss 0.4943389 +| epoch 5 | 7563/ 8400 batches | train loss 0.4068461 +| epoch 5 | 7567/ 8400 batches | train loss 0.4056880 +| epoch 5 | 7571/ 8400 batches | train loss 0.4369684 +| epoch 5 | 7575/ 8400 batches | train loss 0.4148489 +| epoch 5 | 7579/ 8400 batches | train loss 0.4801829 +| epoch 5 | 7583/ 8400 batches | train loss 0.3902082 +| epoch 5 | 7587/ 8400 batches | train loss 0.3906484 +| epoch 5 | 7591/ 8400 batches | train loss 0.3579513 +| epoch 5 | 7595/ 8400 batches | train loss 0.3960510 +| epoch 5 | 7599/ 8400 batches | train loss 0.3280873 +| epoch 5 | 7603/ 8400 batches | train loss 0.3459080 +| epoch 5 | 7607/ 8400 batches | train loss 0.4002765 +| epoch 5 | 7611/ 8400 batches | train loss 0.4011455 +| epoch 5 | 7615/ 8400 batches | train loss 0.4102002 +| epoch 5 | 7619/ 8400 batches | train loss 0.3713496 +| epoch 5 | 7623/ 8400 batches | train loss 0.4477314 +| epoch 5 | 7627/ 8400 batches | train loss 0.3900720 +| epoch 5 | 7631/ 8400 batches | train loss 0.3496283 +| epoch 5 | 7635/ 8400 batches | train loss 0.3339899 +| epoch 5 | 7639/ 8400 batches | train loss 0.3338365 +| epoch 5 | 7643/ 8400 batches | train loss 0.3840719 +| epoch 5 | 7647/ 8400 batches | train loss 0.4219936 +| epoch 5 | 7651/ 8400 batches | train loss 0.4902666 +| epoch 5 | 7655/ 8400 batches | train loss 0.3611226 +| epoch 5 | 7659/ 8400 batches | train loss 0.3826757 +| epoch 5 | 7663/ 8400 batches | train loss 0.3665312 +| epoch 5 | 7667/ 8400 batches | train loss 0.4147699 +| epoch 5 | 7671/ 8400 batches | train loss 0.3181901 +| epoch 5 | 7675/ 8400 batches | train loss 0.3610362 +| epoch 5 | 7679/ 8400 batches | train loss 0.3992626 +| epoch 5 | 7683/ 8400 batches | train loss 0.3969309 +| epoch 5 | 7687/ 8400 batches | train loss 0.3589022 +| epoch 5 | 7691/ 8400 batches | train loss 0.3950270 +| epoch 5 | 7695/ 8400 batches | train loss 0.3959517 +| epoch 5 | 7699/ 8400 batches | train loss 0.3734676 +| epoch 5 | 7703/ 8400 batches | train loss 0.3331414 +| epoch 5 | 7707/ 8400 batches | train loss 0.3560311 +| epoch 5 | 7711/ 8400 batches | train loss 0.3750295 +| epoch 5 | 7715/ 8400 batches | train loss 0.4773663 +| epoch 5 | 7719/ 8400 batches | train loss 0.3173775 +| epoch 5 | 7723/ 8400 batches | train loss 0.3346394 +| epoch 5 | 7727/ 8400 batches | train loss 0.4283537 +| epoch 5 | 7731/ 8400 batches | train loss 0.4039053 +| epoch 5 | 7735/ 8400 batches | train loss 0.4170412 +| epoch 5 | 7739/ 8400 batches | train loss 0.4119558 +| epoch 5 | 7743/ 8400 batches | train loss 0.3261115 +| epoch 5 | 7747/ 8400 batches | train loss 0.4057891 +| epoch 5 | 7751/ 8400 batches | train loss 0.3711349 +| epoch 5 | 7755/ 8400 batches | train loss 0.4416263 +| epoch 5 | 7759/ 8400 batches | train loss 0.3763452 +| epoch 5 | 7763/ 8400 batches | train loss 0.3264148 +| epoch 5 | 7767/ 8400 batches | train loss 0.3327731 +| epoch 5 | 7771/ 8400 batches | train loss 0.4326952 +| epoch 5 | 7775/ 8400 batches | train loss 0.3824098 +| epoch 5 | 7779/ 8400 batches | train loss 0.4167704 +| epoch 5 | 7783/ 8400 batches | train loss 0.4295570 +| epoch 5 | 7787/ 8400 batches | train loss 0.3471028 +| epoch 5 | 7791/ 8400 batches | train loss 0.4117823 +| epoch 5 | 7795/ 8400 batches | train loss 0.3737356 +| epoch 5 | 7799/ 8400 batches | train loss 0.4820486 +| epoch 5 | 7803/ 8400 batches | train loss 0.4019181 +| epoch 5 | 7807/ 8400 batches | train loss 0.3269933 +| epoch 5 | 7811/ 8400 batches | train loss 0.3407302 +| epoch 5 | 7815/ 8400 batches | train loss 0.3512042 +| epoch 5 | 7819/ 8400 batches | train loss 0.4082111 +| epoch 5 | 7823/ 8400 batches | train loss 0.4913460 +| epoch 5 | 7827/ 8400 batches | train loss 0.3369032 +| epoch 5 | 7831/ 8400 batches | train loss 0.3074745 +| epoch 5 | 7835/ 8400 batches | train loss 0.4186835 +| epoch 5 | 7839/ 8400 batches | train loss 0.3676113 +| epoch 5 | 7843/ 8400 batches | train loss 0.4511371 +| epoch 5 | 7847/ 8400 batches | train loss 0.3804114 +| epoch 5 | 7851/ 8400 batches | train loss 0.4223495 +| epoch 5 | 7855/ 8400 batches | train loss 0.3858406 +| epoch 5 | 7859/ 8400 batches | train loss 0.3603452 +| epoch 5 | 7863/ 8400 batches | train loss 0.4264596 +| epoch 5 | 7867/ 8400 batches | train loss 0.3175940 +| epoch 5 | 7871/ 8400 batches | train loss 0.3891307 +| epoch 5 | 7875/ 8400 batches | train loss 0.4358715 +| epoch 5 | 7879/ 8400 batches | train loss 0.4121348 +| epoch 5 | 7883/ 8400 batches | train loss 0.4011035 +| epoch 5 | 7887/ 8400 batches | train loss 0.4348574 +| epoch 5 | 7891/ 8400 batches | train loss 0.4723171 +| epoch 5 | 7895/ 8400 batches | train loss 0.4092546 +| epoch 5 | 7899/ 8400 batches | train loss 0.4290971 +| epoch 5 | 7903/ 8400 batches | train loss 0.5579880 +| epoch 5 | 7907/ 8400 batches | train loss 0.4528520 +| epoch 5 | 7911/ 8400 batches | train loss 0.4357042 +| epoch 5 | 7915/ 8400 batches | train loss 0.3893978 +| epoch 5 | 7919/ 8400 batches | train loss 0.4030395 +| epoch 5 | 7923/ 8400 batches | train loss 0.4121397 +| epoch 5 | 7927/ 8400 batches | train loss 0.4001129 +| epoch 5 | 7931/ 8400 batches | train loss 0.3570710 +| epoch 5 | 7935/ 8400 batches | train loss 0.3343328 +| epoch 5 | 7939/ 8400 batches | train loss 0.4349199 +| epoch 5 | 7943/ 8400 batches | train loss 0.4369009 +| epoch 5 | 7947/ 8400 batches | train loss 0.3247657 +| epoch 5 | 7951/ 8400 batches | train loss 0.4227740 +| epoch 5 | 7955/ 8400 batches | train loss 0.4304933 +| epoch 5 | 7959/ 8400 batches | train loss 0.4863634 +| epoch 5 | 7963/ 8400 batches | train loss 0.4801353 +| epoch 5 | 7967/ 8400 batches | train loss 0.3859326 +| epoch 5 | 7971/ 8400 batches | train loss 0.4173516 +| epoch 5 | 7975/ 8400 batches | train loss 0.4839165 +| epoch 5 | 7979/ 8400 batches | train loss 0.4130638 +| epoch 5 | 7983/ 8400 batches | train loss 0.3607204 +| epoch 5 | 7987/ 8400 batches | train loss 0.3913491 +| epoch 5 | 7991/ 8400 batches | train loss 0.3875724 +| epoch 5 | 7995/ 8400 batches | train loss 0.4482138 +| epoch 5 | 7999/ 8400 batches | train loss 0.3451941 +| epoch 5 | 8003/ 8400 batches | train loss 0.3746268 +| epoch 5 | 8007/ 8400 batches | train loss 0.3558219 +| epoch 5 | 8011/ 8400 batches | train loss 0.3628459 +| epoch 5 | 8015/ 8400 batches | train loss 0.4156162 +| epoch 5 | 8019/ 8400 batches | train loss 0.4526686 +| epoch 5 | 8023/ 8400 batches | train loss 0.4276060 +| epoch 5 | 8027/ 8400 batches | train loss 0.3968629 +| epoch 5 | 8031/ 8400 batches | train loss 0.3404666 +| epoch 5 | 8035/ 8400 batches | train loss 0.4442064 +| epoch 5 | 8039/ 8400 batches | train loss 0.3916458 +| epoch 5 | 8043/ 8400 batches | train loss 0.4221525 +| epoch 5 | 8047/ 8400 batches | train loss 0.3659648 +| epoch 5 | 8051/ 8400 batches | train loss 0.3409806 +| epoch 5 | 8055/ 8400 batches | train loss 0.4392387 +| epoch 5 | 8059/ 8400 batches | train loss 0.3922811 +| epoch 5 | 8063/ 8400 batches | train loss 0.3354511 +| epoch 5 | 8067/ 8400 batches | train loss 0.3980610 +| epoch 5 | 8071/ 8400 batches | train loss 0.3746314 +| epoch 5 | 8075/ 8400 batches | train loss 0.3725676 +| epoch 5 | 8079/ 8400 batches | train loss 0.3989729 +| epoch 5 | 8083/ 8400 batches | train loss 0.3761339 +| epoch 5 | 8087/ 8400 batches | train loss 0.3527979 +| epoch 5 | 8091/ 8400 batches | train loss 0.3857259 +| epoch 5 | 8095/ 8400 batches | train loss 0.4091294 +| epoch 5 | 8099/ 8400 batches | train loss 0.4355127 +| epoch 5 | 8103/ 8400 batches | train loss 0.1751415 +| epoch 5 | 8107/ 8400 batches | train loss 0.4000002 +| epoch 5 | 8111/ 8400 batches | train loss 0.3976995 +| epoch 5 | 8115/ 8400 batches | train loss 0.3603956 +| epoch 5 | 8119/ 8400 batches | train loss 0.3472884 +| epoch 5 | 8123/ 8400 batches | train loss 0.4029577 +| epoch 5 | 8127/ 8400 batches | train loss 0.3776855 +| epoch 5 | 8131/ 8400 batches | train loss 0.3707818 +| epoch 5 | 8135/ 8400 batches | train loss 0.4812877 +| epoch 5 | 8139/ 8400 batches | train loss 0.4090125 +| epoch 5 | 8143/ 8400 batches | train loss 0.3509406 +| epoch 5 | 8147/ 8400 batches | train loss 0.3543345 +| epoch 5 | 8151/ 8400 batches | train loss 0.3464837 +| epoch 5 | 8155/ 8400 batches | train loss 0.4609185 +| epoch 5 | 8159/ 8400 batches | train loss 0.3838025 +| epoch 5 | 8163/ 8400 batches | train loss 0.3964242 +| epoch 5 | 8167/ 8400 batches | train loss 0.3357449 +| epoch 5 | 8171/ 8400 batches | train loss 0.4495082 +| epoch 5 | 8175/ 8400 batches | train loss 0.3936860 +| epoch 5 | 8179/ 8400 batches | train loss 0.3776364 +| epoch 5 | 8183/ 8400 batches | train loss 0.3540886 +| epoch 5 | 8187/ 8400 batches | train loss 0.3972393 +| epoch 5 | 8191/ 8400 batches | train loss 0.4081341 +| epoch 5 | 8195/ 8400 batches | train loss 0.4347327 +| epoch 5 | 8199/ 8400 batches | train loss 0.4855802 +| epoch 5 | 8203/ 8400 batches | train loss 0.3827217 +| epoch 5 | 8207/ 8400 batches | train loss 0.4344858 +| epoch 5 | 8211/ 8400 batches | train loss 0.3585123 +| epoch 5 | 8215/ 8400 batches | train loss 0.3819860 +| epoch 5 | 8219/ 8400 batches | train loss 0.3377170 +| epoch 5 | 8223/ 8400 batches | train loss 0.6325106 +| epoch 5 | 8227/ 8400 batches | train loss 0.3526454 +| epoch 5 | 8231/ 8400 batches | train loss 0.3166387 +| epoch 5 | 8235/ 8400 batches | train loss 0.3377474 +| epoch 5 | 8239/ 8400 batches | train loss 0.4190464 +| epoch 5 | 8243/ 8400 batches | train loss 0.4022343 +| epoch 5 | 8247/ 8400 batches | train loss 0.3903835 +| epoch 5 | 8251/ 8400 batches | train loss 0.3632123 +| epoch 5 | 8255/ 8400 batches | train loss 0.4972287 +| epoch 5 | 8259/ 8400 batches | train loss 0.4058148 +| epoch 5 | 8263/ 8400 batches | train loss 0.3905182 +| epoch 5 | 8267/ 8400 batches | train loss 0.4057112 +| epoch 5 | 8271/ 8400 batches | train loss 0.3744701 +| epoch 5 | 8275/ 8400 batches | train loss 0.3746045 +| epoch 5 | 8279/ 8400 batches | train loss 0.3975169 +| epoch 5 | 8283/ 8400 batches | train loss 0.4032840 +| epoch 5 | 8287/ 8400 batches | train loss 0.3861453 +| epoch 5 | 8291/ 8400 batches | train loss 0.4839723 +| epoch 5 | 8295/ 8400 batches | train loss 0.3517386 +| epoch 5 | 8299/ 8400 batches | train loss 0.3547089 +| epoch 5 | 8303/ 8400 batches | train loss 0.4195318 +| epoch 5 | 8307/ 8400 batches | train loss 0.5240768 +| epoch 5 | 8311/ 8400 batches | train loss 0.2485749 +| epoch 5 | 8315/ 8400 batches | train loss 0.3835094 +| epoch 5 | 8319/ 8400 batches | train loss 0.4397765 +| epoch 5 | 8323/ 8400 batches | train loss 0.5210915 +| epoch 5 | 8327/ 8400 batches | train loss 0.4052431 +| epoch 5 | 8331/ 8400 batches | train loss 0.1507152 +| epoch 5 | 8335/ 8400 batches | train loss 0.3766877 +| epoch 5 | 8339/ 8400 batches | train loss 0.4397750 +| epoch 5 | 8343/ 8400 batches | train loss 0.3595512 +| epoch 5 | 8347/ 8400 batches | train loss 0.1577619 +| epoch 5 | 8351/ 8400 batches | train loss 0.3370798 +| epoch 5 | 8355/ 8400 batches | train loss 0.3569027 +| epoch 5 | 8359/ 8400 batches | train loss 0.4149558 +| epoch 5 | 8363/ 8400 batches | train loss 0.4071635 +| epoch 5 | 8367/ 8400 batches | train loss 0.4060771 +| epoch 5 | 8371/ 8400 batches | train loss 0.3797555 +| epoch 5 | 8375/ 8400 batches | train loss 0.4243582 +| epoch 5 | 8379/ 8400 batches | train loss 0.4924932 +| epoch 5 | 8383/ 8400 batches | train loss 0.2721139 +| epoch 5 | 8387/ 8400 batches | train loss 0.2506082 +| epoch 5 | 8391/ 8400 batches | train loss 0.4066218 +| epoch 5 | 8395/ 8400 batches | train loss 0.5006098 +| epoch 5 | 8399/ 8400 batches | train loss 0.3392939 +-------------------------------------------------------------------------------- +| epoch 5 | 3/ 8400 batches | test loss 0.3735812 +| epoch 5 | 7/ 8400 batches | test loss 0.3877118 +| epoch 5 | 11/ 8400 batches | test loss 0.2775470 +| epoch 5 | 15/ 8400 batches | test loss 0.3923630 +| epoch 5 | 19/ 8400 batches | test loss 0.3527470 +| epoch 5 | 23/ 8400 batches | test loss 0.4677751 +| epoch 5 | 27/ 8400 batches | test loss 0.3437449 +| epoch 5 | 31/ 8400 batches | test loss 0.3187276 +| epoch 5 | 35/ 8400 batches | test loss 0.3551100 +| epoch 5 | 39/ 8400 batches | test loss 0.5184069 +| epoch 5 | 43/ 8400 batches | test loss 0.4045265 +| epoch 5 | 47/ 8400 batches | test loss 0.3752398 +| epoch 5 | 51/ 8400 batches | test loss 0.3835843 +| epoch 5 | 55/ 8400 batches | test loss 0.4272477 +| epoch 5 | 59/ 8400 batches | test loss 0.4097985 +| epoch 5 | 63/ 8400 batches | test loss 0.4051690 +| epoch 5 | 67/ 8400 batches | test loss 0.4925335 +| epoch 5 | 71/ 8400 batches | test loss 0.3683794 +| epoch 5 | 75/ 8400 batches | test loss 0.4171908 +| epoch 5 | 79/ 8400 batches | test loss 0.3703691 +| epoch 5 | 83/ 8400 batches | test loss 0.3528486 +| epoch 5 | 87/ 8400 batches | test loss 0.4934559 +| epoch 5 | 91/ 8400 batches | test loss 0.3657214 +| epoch 5 | 95/ 8400 batches | test loss 0.4416415 +| epoch 5 | 99/ 8400 batches | test loss 0.4095478 +| epoch 5 | 103/ 8400 batches | test loss 0.3960406 +| epoch 5 | 107/ 8400 batches | test loss 0.4654414 +| epoch 5 | 111/ 8400 batches | test loss 0.4669801 +| epoch 5 | 115/ 8400 batches | test loss 0.3889492 +| epoch 5 | 119/ 8400 batches | test loss 0.4318970 +| epoch 5 | 123/ 8400 batches | test loss 0.4658912 +| epoch 5 | 127/ 8400 batches | test loss 0.3308925 +| epoch 5 | 131/ 8400 batches | test loss 0.4230145 +| epoch 5 | 135/ 8400 batches | test loss 0.4924824 +| epoch 5 | 139/ 8400 batches | test loss 0.4342318 +| epoch 5 | 143/ 8400 batches | test loss 0.3149713 +| epoch 5 | 147/ 8400 batches | test loss 0.4382565 +| epoch 5 | 151/ 8400 batches | test loss 0.3551450 +| epoch 5 | 155/ 8400 batches | test loss 0.3411964 +| epoch 5 | 159/ 8400 batches | test loss 0.3993939 +| epoch 5 | 163/ 8400 batches | test loss 0.3384049 +| epoch 5 | 167/ 8400 batches | test loss 0.4197916 +| epoch 5 | 171/ 8400 batches | test loss 0.4297192 +| epoch 5 | 175/ 8400 batches | test loss 0.2835158 +| epoch 5 | 179/ 8400 batches | test loss 0.3677938 +| epoch 5 | 183/ 8400 batches | test loss 0.3406858 +| epoch 5 | 187/ 8400 batches | test loss 0.4838890 +| epoch 5 | 191/ 8400 batches | test loss 0.3766039 +| epoch 5 | 195/ 8400 batches | test loss 0.4821317 +| epoch 5 | 199/ 8400 batches | test loss 0.5187849 +| epoch 5 | 203/ 8400 batches | test loss 0.4159812 +| epoch 5 | 207/ 8400 batches | test loss 0.5937032 +| epoch 5 | 211/ 8400 batches | test loss 0.4101778 +| epoch 5 | 215/ 8400 batches | test loss 0.4269873 +| epoch 5 | 219/ 8400 batches | test loss 0.3596555 +| epoch 5 | 223/ 8400 batches | test loss 0.3710870 +| epoch 5 | 227/ 8400 batches | test loss 0.4322479 +| epoch 5 | 231/ 8400 batches | test loss 0.4284827 +| epoch 5 | 235/ 8400 batches | test loss 0.4620770 +| epoch 5 | 239/ 8400 batches | test loss 0.3372978 +| epoch 5 | 243/ 8400 batches | test loss 0.3636050 +| epoch 5 | 247/ 8400 batches | test loss 0.4964870 +| epoch 5 | 251/ 8400 batches | test loss 0.4777688 +| epoch 5 | 255/ 8400 batches | test loss 0.3716488 +| epoch 5 | 259/ 8400 batches | test loss 0.3997867 +| epoch 5 | 263/ 8400 batches | test loss 0.3878807 +| epoch 5 | 267/ 8400 batches | test loss 0.4707683 +| epoch 5 | 271/ 8400 batches | test loss 0.4265621 +| epoch 5 | 275/ 8400 batches | test loss 0.4755707 +| epoch 5 | 279/ 8400 batches | test loss 0.4066696 +| epoch 5 | 283/ 8400 batches | test loss 0.4127172 +| epoch 5 | 287/ 8400 batches | test loss 0.4560944 +| epoch 5 | 291/ 8400 batches | test loss 0.4444294 +| epoch 5 | 295/ 8400 batches | test loss 0.4710336 +| epoch 5 | 299/ 8400 batches | test loss 0.4387849 +| epoch 5 | 303/ 8400 batches | test loss 0.4199630 +| epoch 5 | 307/ 8400 batches | test loss 0.3787634 +| epoch 5 | 311/ 8400 batches | test loss 0.3915111 +| epoch 5 | 315/ 8400 batches | test loss 0.3691657 +| epoch 5 | 319/ 8400 batches | test loss 0.3617840 +| epoch 5 | 323/ 8400 batches | test loss 0.3799376 +| epoch 5 | 327/ 8400 batches | test loss 0.4029854 +| epoch 5 | 331/ 8400 batches | test loss 0.4274567 +| epoch 5 | 335/ 8400 batches | test loss 0.5156063 +| epoch 5 | 339/ 8400 batches | test loss 0.4081523 +| epoch 5 | 343/ 8400 batches | test loss 0.3901733 +| epoch 5 | 347/ 8400 batches | test loss 0.5441195 +| epoch 5 | 351/ 8400 batches | test loss 0.4382910 +| epoch 5 | 355/ 8400 batches | test loss 0.3970521 +| epoch 5 | 359/ 8400 batches | test loss 0.6032493 +| epoch 5 | 363/ 8400 batches | test loss 0.4568904 +| epoch 5 | 367/ 8400 batches | test loss 0.3764057 +| epoch 5 | 371/ 8400 batches | test loss 0.3549129 +| epoch 5 | 375/ 8400 batches | test loss 0.3242149 +| epoch 5 | 379/ 8400 batches | test loss 0.5187573 +| epoch 5 | 383/ 8400 batches | test loss 0.5043585 +| epoch 5 | 387/ 8400 batches | test loss 0.4130489 +| epoch 5 | 391/ 8400 batches | test loss 0.3677715 +| epoch 5 | 395/ 8400 batches | test loss 0.3566779 +| epoch 5 | 399/ 8400 batches | test loss 0.4522365 +| epoch 5 | 403/ 8400 batches | test loss 0.5553010 +| epoch 5 | 407/ 8400 batches | test loss 0.4632435 +| epoch 5 | 411/ 8400 batches | test loss 0.3952170 +| epoch 5 | 415/ 8400 batches | test loss 0.4600511 +| epoch 5 | 419/ 8400 batches | test loss 0.4251679 +| epoch 5 | 423/ 8400 batches | test loss 0.4353185 +| epoch 5 | 427/ 8400 batches | test loss 0.4350441 +| epoch 5 | 431/ 8400 batches | test loss 0.4891832 +| epoch 5 | 435/ 8400 batches | test loss 0.4728934 +| epoch 5 | 439/ 8400 batches | test loss 0.4565566 +| epoch 5 | 443/ 8400 batches | test loss 0.4622717 +| epoch 5 | 447/ 8400 batches | test loss 0.4363964 +| epoch 5 | 451/ 8400 batches | test loss 0.5152806 +| epoch 5 | 455/ 8400 batches | test loss 0.4861383 +| epoch 5 | 459/ 8400 batches | test loss 0.4150206 +| epoch 5 | 463/ 8400 batches | test loss 0.3557684 +| epoch 5 | 467/ 8400 batches | test loss 0.5548233 +| epoch 5 | 471/ 8400 batches | test loss 0.4886748 +| epoch 5 | 475/ 8400 batches | test loss 0.3387134 +| epoch 5 | 479/ 8400 batches | test loss 0.4442266 +| epoch 5 | 483/ 8400 batches | test loss 0.4747040 +| epoch 5 | 487/ 8400 batches | test loss 0.5332983 +| epoch 5 | 491/ 8400 batches | test loss 0.4343870 +| epoch 5 | 495/ 8400 batches | test loss 0.3695832 +| epoch 5 | 499/ 8400 batches | test loss 0.3786885 +| epoch 5 | 503/ 8400 batches | test loss 0.3625918 +| epoch 5 | 507/ 8400 batches | test loss 0.3759145 +| epoch 5 | 511/ 8400 batches | test loss 0.3210425 +| epoch 5 | 515/ 8400 batches | test loss 0.3589351 +| epoch 5 | 519/ 8400 batches | test loss 0.4117250 +| epoch 5 | 523/ 8400 batches | test loss 0.4363012 +| epoch 5 | 527/ 8400 batches | test loss 0.3993042 +| epoch 5 | 531/ 8400 batches | test loss 0.3962362 +| epoch 5 | 535/ 8400 batches | test loss 0.5112123 +| epoch 5 | 539/ 8400 batches | test loss 0.4714189 +| epoch 5 | 543/ 8400 batches | test loss 0.3884572 +| epoch 5 | 547/ 8400 batches | test loss 0.4550115 +| epoch 5 | 551/ 8400 batches | test loss 0.4923334 +| epoch 5 | 555/ 8400 batches | test loss 0.3585916 +| epoch 5 | 559/ 8400 batches | test loss 0.4631327 +| epoch 5 | 563/ 8400 batches | test loss 0.3699449 +| epoch 5 | 567/ 8400 batches | test loss 0.3791757 +| epoch 5 | 571/ 8400 batches | test loss 0.4428438 +| epoch 5 | 575/ 8400 batches | test loss 0.3303108 +| epoch 5 | 579/ 8400 batches | test loss 0.4087283 +| epoch 5 | 583/ 8400 batches | test loss 0.4418695 +| epoch 5 | 587/ 8400 batches | test loss 0.4049968 +| epoch 5 | 591/ 8400 batches | test loss 0.3850406 +| epoch 5 | 595/ 8400 batches | test loss 0.3245964 +| epoch 5 | 599/ 8400 batches | test loss 0.4367804 +| epoch 5 | 603/ 8400 batches | test loss 0.4092864 +| epoch 5 | 607/ 8400 batches | test loss 0.4585880 +| epoch 5 | 611/ 8400 batches | test loss 0.4185690 +| epoch 5 | 615/ 8400 batches | test loss 0.4053072 +| epoch 5 | 619/ 8400 batches | test loss 0.3820402 +| epoch 5 | 623/ 8400 batches | test loss 0.3545234 +| epoch 5 | 627/ 8400 batches | test loss 0.4010652 +| epoch 5 | 631/ 8400 batches | test loss 0.3261493 +| epoch 5 | 635/ 8400 batches | test loss 0.4898008 +| epoch 5 | 639/ 8400 batches | test loss 0.4143596 +| epoch 5 | 643/ 8400 batches | test loss 0.4782367 +| epoch 5 | 647/ 8400 batches | test loss 0.3580530 +| epoch 5 | 651/ 8400 batches | test loss 0.3719169 +| epoch 5 | 655/ 8400 batches | test loss 0.3930974 +| epoch 5 | 659/ 8400 batches | test loss 0.4178573 +| epoch 5 | 663/ 8400 batches | test loss 0.4427205 +| epoch 5 | 667/ 8400 batches | test loss 0.4815506 +| epoch 5 | 671/ 8400 batches | test loss 0.4117425 +| epoch 5 | 675/ 8400 batches | test loss 0.3946630 +| epoch 5 | 679/ 8400 batches | test loss 0.4655659 +| epoch 5 | 683/ 8400 batches | test loss 0.3719622 +| epoch 5 | 687/ 8400 batches | test loss 0.4443774 +| epoch 5 | 691/ 8400 batches | test loss 0.4908459 +| epoch 5 | 695/ 8400 batches | test loss 0.4126608 +| epoch 5 | 699/ 8400 batches | test loss 0.3405467 +| epoch 5 | 703/ 8400 batches | test loss 0.4357722 +| epoch 5 | 707/ 8400 batches | test loss 0.5442655 +| epoch 5 | 711/ 8400 batches | test loss 0.3332260 +| epoch 5 | 715/ 8400 batches | test loss 0.3540571 +| epoch 5 | 719/ 8400 batches | test loss 0.3469586 +| epoch 5 | 723/ 8400 batches | test loss 0.4132043 +| epoch 5 | 727/ 8400 batches | test loss 0.3867322 +| epoch 5 | 731/ 8400 batches | test loss 0.4701039 +| epoch 5 | 735/ 8400 batches | test loss 0.3988933 +| epoch 5 | 739/ 8400 batches | test loss 0.4381046 +| epoch 5 | 743/ 8400 batches | test loss 0.3587458 +| epoch 5 | 747/ 8400 batches | test loss 0.3779297 +| epoch 5 | 751/ 8400 batches | test loss 0.2724662 +| epoch 5 | 755/ 8400 batches | test loss 0.3376210 +| epoch 5 | 759/ 8400 batches | test loss 0.3749833 +| epoch 5 | 763/ 8400 batches | test loss 0.4112047 +| epoch 5 | 767/ 8400 batches | test loss 0.4136650 +| epoch 5 | 771/ 8400 batches | test loss 0.4502032 +| epoch 5 | 775/ 8400 batches | test loss 0.3559312 +| epoch 5 | 779/ 8400 batches | test loss 0.3048129 +| epoch 5 | 783/ 8400 batches | test loss 0.4783715 +| epoch 5 | 787/ 8400 batches | test loss 0.3428262 +| epoch 5 | 791/ 8400 batches | test loss 0.4318247 +| epoch 5 | 795/ 8400 batches | test loss 0.4104127 +| epoch 5 | 799/ 8400 batches | test loss 0.4138987 +| epoch 5 | 803/ 8400 batches | test loss 0.5054377 +| epoch 5 | 807/ 8400 batches | test loss 0.3882483 +| epoch 5 | 811/ 8400 batches | test loss 0.5372352 +| epoch 5 | 815/ 8400 batches | test loss 0.4174420 +| epoch 5 | 819/ 8400 batches | test loss 0.4003415 +| epoch 5 | 823/ 8400 batches | test loss 0.3523263 +| epoch 5 | 827/ 8400 batches | test loss 0.4431021 +| epoch 5 | 831/ 8400 batches | test loss 0.4423541 +| epoch 5 | 835/ 8400 batches | test loss 0.4191677 +| epoch 5 | 839/ 8400 batches | test loss 0.4376837 +| epoch 5 | 843/ 8400 batches | test loss 0.3675026 +| epoch 5 | 847/ 8400 batches | test loss 0.5603886 +| epoch 5 | 851/ 8400 batches | test loss 0.4165603 +| epoch 5 | 855/ 8400 batches | test loss 0.3785129 +| epoch 5 | 859/ 8400 batches | test loss 0.3623348 +| epoch 5 | 863/ 8400 batches | test loss 0.4151792 +| epoch 5 | 867/ 8400 batches | test loss 0.4432702 +| epoch 5 | 871/ 8400 batches | test loss 0.4196095 +| epoch 5 | 875/ 8400 batches | test loss 0.4459834 +| epoch 5 | 879/ 8400 batches | test loss 0.4238265 +| epoch 5 | 883/ 8400 batches | test loss 0.4351369 +| epoch 5 | 887/ 8400 batches | test loss 0.3590486 +| epoch 5 | 891/ 8400 batches | test loss 0.4208950 +| epoch 5 | 895/ 8400 batches | test loss 0.4649368 +| epoch 5 | 899/ 8400 batches | test loss 0.4134969 +| epoch 5 | 903/ 8400 batches | test loss 0.4285791 +| epoch 5 | 907/ 8400 batches | test loss 0.3935358 +| epoch 5 | 911/ 8400 batches | test loss 0.2880431 +| epoch 5 | 915/ 8400 batches | test loss 0.4492403 +| epoch 5 | 919/ 8400 batches | test loss 0.4226588 +| epoch 5 | 923/ 8400 batches | test loss 0.3895897 +| epoch 5 | 927/ 8400 batches | test loss 0.5171082 +| epoch 5 | 931/ 8400 batches | test loss 0.3656384 +| epoch 5 | 935/ 8400 batches | test loss 0.4045905 +| epoch 5 | 939/ 8400 batches | test loss 0.3339464 +| epoch 5 | 943/ 8400 batches | test loss 0.4359660 +| epoch 5 | 947/ 8400 batches | test loss 0.3429100 +| epoch 5 | 951/ 8400 batches | test loss 0.4647554 +| epoch 5 | 955/ 8400 batches | test loss 0.4296415 +| epoch 5 | 959/ 8400 batches | test loss 0.4397225 +| epoch 5 | 963/ 8400 batches | test loss 0.4883635 +| epoch 5 | 967/ 8400 batches | test loss 0.4218940 +| epoch 5 | 971/ 8400 batches | test loss 0.4646055 +| epoch 5 | 975/ 8400 batches | test loss 0.4387165 +| epoch 5 | 979/ 8400 batches | test loss 0.4157538 +| epoch 5 | 983/ 8400 batches | test loss 0.3822160 +| epoch 5 | 987/ 8400 batches | test loss 0.5057791 +| epoch 5 | 991/ 8400 batches | test loss 0.4104914 +| epoch 5 | 995/ 8400 batches | test loss 0.3943173 +| epoch 5 | 999/ 8400 batches | test loss 0.3766048 +| epoch 5 | 1003/ 8400 batches | test loss 0.5210011 +| epoch 5 | 1007/ 8400 batches | test loss 0.3610314 +| epoch 5 | 1011/ 8400 batches | test loss 0.4289519 +| epoch 5 | 1015/ 8400 batches | test loss 0.4743394 +| epoch 5 | 1019/ 8400 batches | test loss 0.4001340 +| epoch 5 | 1023/ 8400 batches | test loss 0.4286813 +| epoch 5 | 1027/ 8400 batches | test loss 0.4182174 +| epoch 5 | 1031/ 8400 batches | test loss 0.4045442 +| epoch 5 | 1035/ 8400 batches | test loss 0.4055333 +| epoch 5 | 1039/ 8400 batches | test loss 0.4547090 +| epoch 5 | 1043/ 8400 batches | test loss 0.3921897 +| epoch 5 | 1047/ 8400 batches | test loss 0.3390824 +| epoch 5 | 1051/ 8400 batches | test loss 0.5072079 +| epoch 5 | 1055/ 8400 batches | test loss 0.3561206 +| epoch 5 | 1059/ 8400 batches | test loss 0.4202045 +| epoch 5 | 1063/ 8400 batches | test loss 0.3435879 +| epoch 5 | 1067/ 8400 batches | test loss 0.3222480 +| epoch 5 | 1071/ 8400 batches | test loss 0.3758006 +| epoch 5 | 1075/ 8400 batches | test loss 0.4601503 +| epoch 5 | 1079/ 8400 batches | test loss 0.4681158 +| epoch 5 | 1083/ 8400 batches | test loss 0.3578658 +| epoch 5 | 1087/ 8400 batches | test loss 0.4135041 +| epoch 5 | 1091/ 8400 batches | test loss 0.3918377 +| epoch 5 | 1095/ 8400 batches | test loss 0.3370340 +| epoch 5 | 1099/ 8400 batches | test loss 0.3918504 +| epoch 5 | 1103/ 8400 batches | test loss 0.4935356 +| epoch 5 | 1107/ 8400 batches | test loss 0.4057746 +| epoch 5 | 1111/ 8400 batches | test loss 0.3599648 +| epoch 5 | 1115/ 8400 batches | test loss 0.3905903 +| epoch 5 | 1119/ 8400 batches | test loss 0.3932766 +| epoch 5 | 1123/ 8400 batches | test loss 0.3725649 +| epoch 5 | 1127/ 8400 batches | test loss 0.3980174 +| epoch 5 | 1131/ 8400 batches | test loss 0.4227638 +| epoch 5 | 1135/ 8400 batches | test loss 0.3903103 +| epoch 5 | 1139/ 8400 batches | test loss 0.4244373 +| epoch 5 | 1143/ 8400 batches | test loss 0.3970602 +| epoch 5 | 1147/ 8400 batches | test loss 0.5826912 +| epoch 5 | 1151/ 8400 batches | test loss 0.3891050 +| epoch 5 | 1155/ 8400 batches | test loss 0.4395371 +| epoch 5 | 1159/ 8400 batches | test loss 0.3934643 +| epoch 5 | 1163/ 8400 batches | test loss 0.6284352 +| epoch 5 | 1167/ 8400 batches | test loss 0.3771524 +| epoch 5 | 1171/ 8400 batches | test loss 0.4052935 +| epoch 5 | 1175/ 8400 batches | test loss 0.3923863 +| epoch 5 | 1179/ 8400 batches | test loss 0.4792180 +| epoch 5 | 1183/ 8400 batches | test loss 0.4459783 +| epoch 5 | 1187/ 8400 batches | test loss 0.4302253 +| epoch 5 | 1191/ 8400 batches | test loss 0.3162767 +| epoch 5 | 1195/ 8400 batches | test loss 0.4796600 +| epoch 5 | 1199/ 8400 batches | test loss 0.3926852 +| epoch 5 | 1203/ 8400 batches | test loss 0.4459923 +| epoch 5 | 1207/ 8400 batches | test loss 0.3771494 +| epoch 5 | 1211/ 8400 batches | test loss 0.4034376 +| epoch 5 | 1215/ 8400 batches | test loss 0.3430745 +| epoch 5 | 1219/ 8400 batches | test loss 0.4051784 +| epoch 5 | 1223/ 8400 batches | test loss 0.3254727 +| epoch 5 | 1227/ 8400 batches | test loss 0.3803971 +| epoch 5 | 1231/ 8400 batches | test loss 0.5889356 +| epoch 5 | 1235/ 8400 batches | test loss 0.3858085 +| epoch 5 | 1239/ 8400 batches | test loss 0.3949037 +| epoch 5 | 1243/ 8400 batches | test loss 0.3397685 +| epoch 5 | 1247/ 8400 batches | test loss 0.4652013 +| epoch 5 | 1251/ 8400 batches | test loss 0.3716852 +| epoch 5 | 1255/ 8400 batches | test loss 0.4029893 +| epoch 5 | 1259/ 8400 batches | test loss 0.5377659 +| epoch 5 | 1263/ 8400 batches | test loss 0.3490359 +| epoch 5 | 1267/ 8400 batches | test loss 0.3926030 +| epoch 5 | 1271/ 8400 batches | test loss 0.4403474 +| epoch 5 | 1275/ 8400 batches | test loss 0.4537002 +| epoch 5 | 1279/ 8400 batches | test loss 0.4985754 +| epoch 5 | 1283/ 8400 batches | test loss 0.3952470 +| epoch 5 | 1287/ 8400 batches | test loss 0.4635079 +| epoch 5 | 1291/ 8400 batches | test loss 0.4525380 +| epoch 5 | 1295/ 8400 batches | test loss 0.3641269 +| epoch 5 | 1299/ 8400 batches | test loss 0.3511792 +| epoch 5 | 1303/ 8400 batches | test loss 0.4235553 +| epoch 5 | 1307/ 8400 batches | test loss 0.4666672 +| epoch 5 | 1311/ 8400 batches | test loss 0.3528440 +| epoch 5 | 1315/ 8400 batches | test loss 0.4406258 +| epoch 5 | 1319/ 8400 batches | test loss 0.4010177 +| epoch 5 | 1323/ 8400 batches | test loss 0.3734381 +| epoch 5 | 1327/ 8400 batches | test loss 0.4489574 +| epoch 5 | 1331/ 8400 batches | test loss 0.4258344 +| epoch 5 | 1335/ 8400 batches | test loss 0.3608587 +| epoch 5 | 1339/ 8400 batches | test loss 0.4741129 +| epoch 5 | 1343/ 8400 batches | test loss 0.3880018 +| epoch 5 | 1347/ 8400 batches | test loss 0.4738807 +| epoch 5 | 1351/ 8400 batches | test loss 0.4751055 +| epoch 5 | 1355/ 8400 batches | test loss 0.4329580 +| epoch 5 | 1359/ 8400 batches | test loss 0.5001830 +| epoch 5 | 1363/ 8400 batches | test loss 0.4429381 +| epoch 5 | 1367/ 8400 batches | test loss 0.2080332 +| epoch 5 | 1371/ 8400 batches | test loss 0.4566426 +| epoch 5 | 1375/ 8400 batches | test loss 0.3543739 +| epoch 5 | 1379/ 8400 batches | test loss 0.4354818 +| epoch 5 | 1383/ 8400 batches | test loss 0.4342403 +| epoch 5 | 1387/ 8400 batches | test loss 0.4606170 +| epoch 5 | 1391/ 8400 batches | test loss 0.3355154 +| epoch 5 | 1395/ 8400 batches | test loss 0.3956876 +| epoch 5 | 1399/ 8400 batches | test loss 0.4765332 +| epoch 5 | 1403/ 8400 batches | test loss 0.4873756 +| epoch 5 | 1407/ 8400 batches | test loss 0.4101719 +| epoch 5 | 1411/ 8400 batches | test loss 0.5580243 +| epoch 5 | 1415/ 8400 batches | test loss 0.3834758 +| epoch 5 | 1419/ 8400 batches | test loss 0.3577780 +| epoch 5 | 1423/ 8400 batches | test loss 0.4596465 +| epoch 5 | 1427/ 8400 batches | test loss 0.4252709 +| epoch 5 | 1431/ 8400 batches | test loss 0.3370401 +| epoch 5 | 1435/ 8400 batches | test loss 0.4317967 +| epoch 5 | 1439/ 8400 batches | test loss 0.4194523 +| epoch 5 | 1443/ 8400 batches | test loss 0.4606568 +| epoch 5 | 1447/ 8400 batches | test loss 0.4548002 +| epoch 5 | 1451/ 8400 batches | test loss 0.5391012 +| epoch 5 | 1455/ 8400 batches | test loss 0.3973531 +| epoch 5 | 1459/ 8400 batches | test loss 0.4213045 +| epoch 5 | 1463/ 8400 batches | test loss 0.4699511 +| epoch 5 | 1467/ 8400 batches | test loss 0.5333843 +| epoch 5 | 1471/ 8400 batches | test loss 0.4810286 +| epoch 5 | 1475/ 8400 batches | test loss 0.3337545 +| epoch 5 | 1479/ 8400 batches | test loss 0.4346697 +| epoch 5 | 1483/ 8400 batches | test loss 0.4108838 +| epoch 5 | 1487/ 8400 batches | test loss 0.3836389 +| epoch 5 | 1491/ 8400 batches | test loss 0.4042978 +| epoch 5 | 1495/ 8400 batches | test loss 0.4197289 +| epoch 5 | 1499/ 8400 batches | test loss 0.3890412 +| epoch 5 | 1503/ 8400 batches | test loss 0.3785302 +| epoch 5 | 1507/ 8400 batches | test loss 0.3884268 +| epoch 5 | 1511/ 8400 batches | test loss 0.4693097 +| epoch 5 | 1515/ 8400 batches | test loss 0.4394351 +| epoch 5 | 1519/ 8400 batches | test loss 0.4530702 +| epoch 5 | 1523/ 8400 batches | test loss 0.4582859 +| epoch 5 | 1527/ 8400 batches | test loss 0.4259241 +| epoch 5 | 1531/ 8400 batches | test loss 0.3692263 +| epoch 5 | 1535/ 8400 batches | test loss 0.3891971 +| epoch 5 | 1539/ 8400 batches | test loss 0.3396764 +| epoch 5 | 1543/ 8400 batches | test loss 0.3855709 +| epoch 5 | 1547/ 8400 batches | test loss 0.3621160 +| epoch 5 | 1551/ 8400 batches | test loss 0.3712816 +| epoch 5 | 1555/ 8400 batches | test loss 0.4204972 +| epoch 5 | 1559/ 8400 batches | test loss 0.3543980 +| epoch 5 | 1563/ 8400 batches | test loss 0.4093551 +| epoch 5 | 1567/ 8400 batches | test loss 0.3888423 +| epoch 5 | 1571/ 8400 batches | test loss 0.5020699 +| epoch 5 | 1575/ 8400 batches | test loss 0.4390932 +| epoch 5 | 1579/ 8400 batches | test loss 0.2787793 +| epoch 5 | 1583/ 8400 batches | test loss 0.5448709 +| epoch 5 | 1587/ 8400 batches | test loss 0.4435712 +| epoch 5 | 1591/ 8400 batches | test loss 0.4669559 +| epoch 5 | 1595/ 8400 batches | test loss 0.4175251 +| epoch 5 | 1599/ 8400 batches | test loss 0.4365200 +| epoch 5 | 1603/ 8400 batches | test loss 0.4113303 +| epoch 5 | 1607/ 8400 batches | test loss 0.4033810 +| epoch 5 | 1611/ 8400 batches | test loss 0.3364513 +| epoch 5 | 1615/ 8400 batches | test loss 0.3810959 +| epoch 5 | 1619/ 8400 batches | test loss 0.4261304 +| epoch 5 | 1623/ 8400 batches | test loss 0.3602086 +| epoch 5 | 1627/ 8400 batches | test loss 0.3826354 +| epoch 5 | 1631/ 8400 batches | test loss 0.5483658 +| epoch 5 | 1635/ 8400 batches | test loss 0.3517270 +| epoch 5 | 1639/ 8400 batches | test loss 0.3974413 +| epoch 5 | 1643/ 8400 batches | test loss 0.3484067 +| epoch 5 | 1647/ 8400 batches | test loss 0.5024872 +| epoch 5 | 1651/ 8400 batches | test loss 0.3658461 +| epoch 5 | 1655/ 8400 batches | test loss 0.4122785 +| epoch 5 | 1659/ 8400 batches | test loss 0.4602953 +| epoch 5 | 1663/ 8400 batches | test loss 0.4112669 +| epoch 5 | 1667/ 8400 batches | test loss 0.4756748 +| epoch 5 | 1671/ 8400 batches | test loss 0.3616233 +| epoch 5 | 1675/ 8400 batches | test loss 0.4794928 +| epoch 5 | 1679/ 8400 batches | test loss 0.3844147 +| epoch 5 | 1683/ 8400 batches | test loss 0.3447702 +| epoch 5 | 1687/ 8400 batches | test loss 0.4411282 +| epoch 5 | 1691/ 8400 batches | test loss 0.4740912 +| epoch 5 | 1695/ 8400 batches | test loss 0.4534195 +| epoch 5 | 1699/ 8400 batches | test loss 0.3897404 +| epoch 5 | 1703/ 8400 batches | test loss 0.4758919 +| epoch 5 | 1707/ 8400 batches | test loss 0.5238771 +| epoch 5 | 1711/ 8400 batches | test loss 0.4348967 +| epoch 5 | 1715/ 8400 batches | test loss 0.4426954 +| epoch 5 | 1719/ 8400 batches | test loss 0.4272698 +| epoch 5 | 1723/ 8400 batches | test loss 0.3858317 +| epoch 5 | 1727/ 8400 batches | test loss 0.4294028 +| epoch 5 | 1731/ 8400 batches | test loss 0.4534378 +| epoch 5 | 1735/ 8400 batches | test loss 0.4405977 +| epoch 5 | 1739/ 8400 batches | test loss 0.4438602 +| epoch 5 | 1743/ 8400 batches | test loss 0.3643002 +| epoch 5 | 1747/ 8400 batches | test loss 0.3377528 +| epoch 5 | 1751/ 8400 batches | test loss 0.3829893 +| epoch 5 | 1755/ 8400 batches | test loss 0.4104714 +| epoch 5 | 1759/ 8400 batches | test loss 0.3515492 +| epoch 5 | 1763/ 8400 batches | test loss 0.4545927 +| epoch 5 | 1767/ 8400 batches | test loss 0.3727154 +| epoch 5 | 1771/ 8400 batches | test loss 0.3535689 +| epoch 5 | 1775/ 8400 batches | test loss 0.5081515 +| epoch 5 | 1779/ 8400 batches | test loss 0.3722182 +| epoch 5 | 1783/ 8400 batches | test loss 0.5214995 +| epoch 5 | 1787/ 8400 batches | test loss 0.4565086 +| epoch 5 | 1791/ 8400 batches | test loss 0.4050975 +| epoch 5 | 1795/ 8400 batches | test loss 0.3746391 +| epoch 5 | 1799/ 8400 batches | test loss 0.4536795 +| epoch 5 | 1803/ 8400 batches | test loss 0.3661393 +| epoch 5 | 1807/ 8400 batches | test loss 0.3523882 +| epoch 5 | 1811/ 8400 batches | test loss 0.4532555 +| epoch 5 | 1815/ 8400 batches | test loss 0.3555771 +| epoch 5 | 1819/ 8400 batches | test loss 0.4733848 +| epoch 5 | 1823/ 8400 batches | test loss 0.3922299 +| epoch 5 | 1827/ 8400 batches | test loss 0.4228698 +| epoch 5 | 1831/ 8400 batches | test loss 0.4252592 +| epoch 5 | 1835/ 8400 batches | test loss 0.4479518 +| epoch 5 | 1839/ 8400 batches | test loss 0.4922675 +| epoch 5 | 1843/ 8400 batches | test loss 0.4256256 +| epoch 5 | 1847/ 8400 batches | test loss 0.3717259 +| epoch 5 | 1851/ 8400 batches | test loss 0.4056844 +| epoch 5 | 1855/ 8400 batches | test loss 0.4551736 +| epoch 5 | 1859/ 8400 batches | test loss 0.3702711 +| epoch 5 | 1863/ 8400 batches | test loss 0.4845460 +| epoch 5 | 1867/ 8400 batches | test loss 0.4269054 +| epoch 5 | 1871/ 8400 batches | test loss 0.4273705 +| epoch 5 | 1875/ 8400 batches | test loss 0.4444812 +| epoch 5 | 1879/ 8400 batches | test loss 0.3676918 +| epoch 5 | 1883/ 8400 batches | test loss 0.3873599 +| epoch 5 | 1887/ 8400 batches | test loss 0.3555936 +| epoch 5 | 1891/ 8400 batches | test loss 0.4027476 +| epoch 5 | 1895/ 8400 batches | test loss 0.4637114 +| epoch 5 | 1899/ 8400 batches | test loss 0.3647832 +| epoch 5 | 1903/ 8400 batches | test loss 0.4006492 +| epoch 5 | 1907/ 8400 batches | test loss 0.3752163 +| epoch 5 | 1911/ 8400 batches | test loss 0.3525182 +| epoch 5 | 1915/ 8400 batches | test loss 0.4567364 +| epoch 5 | 1919/ 8400 batches | test loss 0.4038323 +| epoch 5 | 1923/ 8400 batches | test loss 0.3453238 +| epoch 5 | 1927/ 8400 batches | test loss 0.4187309 +| epoch 5 | 1931/ 8400 batches | test loss 0.5471416 +| epoch 5 | 1935/ 8400 batches | test loss 0.4534431 +| epoch 5 | 1939/ 8400 batches | test loss 0.3494802 +| epoch 5 | 1943/ 8400 batches | test loss 0.3739800 +| epoch 5 | 1947/ 8400 batches | test loss 0.4074961 +| epoch 5 | 1951/ 8400 batches | test loss 0.4151353 +| epoch 5 | 1955/ 8400 batches | test loss 0.4455315 +| epoch 5 | 1959/ 8400 batches | test loss 0.4878275 +| epoch 5 | 1963/ 8400 batches | test loss 0.3786412 +| epoch 5 | 1967/ 8400 batches | test loss 0.4336976 +| epoch 5 | 1971/ 8400 batches | test loss 0.4752432 +| epoch 5 | 1975/ 8400 batches | test loss 0.3845681 +| epoch 5 | 1979/ 8400 batches | test loss 0.4246788 +| epoch 5 | 1983/ 8400 batches | test loss 0.3477046 +| epoch 5 | 1987/ 8400 batches | test loss 0.3896405 +| epoch 5 | 1991/ 8400 batches | test loss 0.4166415 +| epoch 5 | 1995/ 8400 batches | test loss 0.3474495 +| epoch 5 | 1999/ 8400 batches | test loss 0.4224854 +| epoch 5 | 2003/ 8400 batches | test loss 0.3562869 +| epoch 5 | 2007/ 8400 batches | test loss 0.4076093 +| epoch 5 | 2011/ 8400 batches | test loss 0.4213435 +| epoch 5 | 2015/ 8400 batches | test loss 0.3896737 +| epoch 5 | 2019/ 8400 batches | test loss 0.3886611 +| epoch 5 | 2023/ 8400 batches | test loss 0.3688531 +| epoch 5 | 2027/ 8400 batches | test loss 0.4310322 +| epoch 5 | 2031/ 8400 batches | test loss 0.5222917 +| epoch 5 | 2035/ 8400 batches | test loss 0.3627065 +| epoch 5 | 2039/ 8400 batches | test loss 0.3785628 +| epoch 5 | 2043/ 8400 batches | test loss 0.4208378 +| epoch 5 | 2047/ 8400 batches | test loss 0.4943438 +| epoch 5 | 2051/ 8400 batches | test loss 0.4873078 +| epoch 5 | 2055/ 8400 batches | test loss 0.3563361 +| epoch 5 | 2059/ 8400 batches | test loss 0.4110042 +| epoch 5 | 2063/ 8400 batches | test loss 0.5073885 +| epoch 5 | 2067/ 8400 batches | test loss 0.4314131 +| epoch 5 | 2071/ 8400 batches | test loss 0.4158028 +| epoch 5 | 2075/ 8400 batches | test loss 0.4754640 +| epoch 5 | 2079/ 8400 batches | test loss 0.4747609 +| epoch 5 | 2083/ 8400 batches | test loss 0.4201314 +| epoch 5 | 2087/ 8400 batches | test loss 0.4066356 +| epoch 5 | 2091/ 8400 batches | test loss 0.4165099 +| epoch 5 | 2095/ 8400 batches | test loss 0.4083472 +| epoch 5 | 2099/ 8400 batches | test loss 0.4643781 +| epoch 5 | final test loss 0.4161, save model! +-------------------------------------------------------------------------------- +| epoch 6 | 3/ 8400 batches | train loss 0.3352602 +| epoch 6 | 7/ 8400 batches | train loss 0.4034452 +| epoch 6 | 11/ 8400 batches | train loss 0.3276287 +| epoch 6 | 15/ 8400 batches | train loss 0.2759177 +| epoch 6 | 19/ 8400 batches | train loss 0.3464706 +| epoch 6 | 23/ 8400 batches | train loss 0.4159283 +| epoch 6 | 27/ 8400 batches | train loss 0.3991906 +| epoch 6 | 31/ 8400 batches | train loss 0.3806580 +| epoch 6 | 35/ 8400 batches | train loss 0.4581714 +| epoch 6 | 39/ 8400 batches | train loss 0.3977798 +| epoch 6 | 43/ 8400 batches | train loss 0.3543004 +| epoch 6 | 47/ 8400 batches | train loss 0.3298187 +| epoch 6 | 51/ 8400 batches | train loss 0.3725246 +| epoch 6 | 55/ 8400 batches | train loss 0.4101568 +| epoch 6 | 59/ 8400 batches | train loss 0.3643169 +| epoch 6 | 63/ 8400 batches | train loss 0.3539419 +| epoch 6 | 67/ 8400 batches | train loss 0.3598305 +| epoch 6 | 71/ 8400 batches | train loss 0.3290564 +| epoch 6 | 75/ 8400 batches | train loss 0.3072927 +| epoch 6 | 79/ 8400 batches | train loss 0.3375075 +| epoch 6 | 83/ 8400 batches | train loss 0.3788284 +| epoch 6 | 87/ 8400 batches | train loss 0.4109521 +| epoch 6 | 91/ 8400 batches | train loss 0.4806017 +| epoch 6 | 95/ 8400 batches | train loss 0.3328507 +| epoch 6 | 99/ 8400 batches | train loss 0.3892934 +| epoch 6 | 103/ 8400 batches | train loss 0.3587120 +| epoch 6 | 107/ 8400 batches | train loss 0.3568226 +| epoch 6 | 111/ 8400 batches | train loss 0.3671097 +| epoch 6 | 115/ 8400 batches | train loss 0.4017566 +| epoch 6 | 119/ 8400 batches | train loss 0.3797798 +| epoch 6 | 123/ 8400 batches | train loss 0.3901378 +| epoch 6 | 127/ 8400 batches | train loss 0.3792858 +| epoch 6 | 131/ 8400 batches | train loss 0.3510512 +| epoch 6 | 135/ 8400 batches | train loss 0.3151927 +| epoch 6 | 139/ 8400 batches | train loss 0.2941960 +| epoch 6 | 143/ 8400 batches | train loss 0.3335347 +| epoch 6 | 147/ 8400 batches | train loss 0.3202155 +| epoch 6 | 151/ 8400 batches | train loss 0.4220705 +| epoch 6 | 155/ 8400 batches | train loss 0.3784717 +| epoch 6 | 159/ 8400 batches | train loss 0.4322197 +| epoch 6 | 163/ 8400 batches | train loss 0.3765419 +| epoch 6 | 167/ 8400 batches | train loss 0.3376994 +| epoch 6 | 171/ 8400 batches | train loss 0.3584571 +| epoch 6 | 175/ 8400 batches | train loss 0.4472423 +| epoch 6 | 179/ 8400 batches | train loss 0.4007549 +| epoch 6 | 183/ 8400 batches | train loss 0.3886894 +| epoch 6 | 187/ 8400 batches | train loss 0.4048743 +| epoch 6 | 191/ 8400 batches | train loss 0.3103116 +| epoch 6 | 195/ 8400 batches | train loss 0.4056602 +| epoch 6 | 199/ 8400 batches | train loss 0.3923740 +| epoch 6 | 203/ 8400 batches | train loss 0.3814020 +| epoch 6 | 207/ 8400 batches | train loss 0.4312130 +| epoch 6 | 211/ 8400 batches | train loss 0.3487119 +| epoch 6 | 215/ 8400 batches | train loss 0.4134720 +| epoch 6 | 219/ 8400 batches | train loss 0.3600551 +| epoch 6 | 223/ 8400 batches | train loss 0.4177120 +| epoch 6 | 227/ 8400 batches | train loss 0.3706492 +| epoch 6 | 231/ 8400 batches | train loss 0.4073937 +| epoch 6 | 235/ 8400 batches | train loss 0.3318343 +| epoch 6 | 239/ 8400 batches | train loss 0.3922773 +| epoch 6 | 243/ 8400 batches | train loss 0.3374960 +| epoch 6 | 247/ 8400 batches | train loss 0.2895194 +| epoch 6 | 251/ 8400 batches | train loss 0.3293269 +| epoch 6 | 255/ 8400 batches | train loss 0.4157060 +| epoch 6 | 259/ 8400 batches | train loss 0.4237673 +| epoch 6 | 263/ 8400 batches | train loss 0.3885695 +| epoch 6 | 267/ 8400 batches | train loss 0.3858320 +| epoch 6 | 271/ 8400 batches | train loss 0.4185185 +| epoch 6 | 275/ 8400 batches | train loss 0.3430184 +| epoch 6 | 279/ 8400 batches | train loss 0.4014586 +| epoch 6 | 283/ 8400 batches | train loss 0.3364711 +| epoch 6 | 287/ 8400 batches | train loss 0.4124838 +| epoch 6 | 291/ 8400 batches | train loss 0.4603709 +| epoch 6 | 295/ 8400 batches | train loss 0.4393502 +| epoch 6 | 299/ 8400 batches | train loss 0.4244357 +| epoch 6 | 303/ 8400 batches | train loss 0.4133539 +| epoch 6 | 307/ 8400 batches | train loss 0.3912539 +| epoch 6 | 311/ 8400 batches | train loss 0.3503352 +| epoch 6 | 315/ 8400 batches | train loss 0.3681290 +| epoch 6 | 319/ 8400 batches | train loss 0.3926110 +| epoch 6 | 323/ 8400 batches | train loss 0.4194769 +| epoch 6 | 327/ 8400 batches | train loss 0.4056160 +| epoch 6 | 331/ 8400 batches | train loss 0.3557338 +| epoch 6 | 335/ 8400 batches | train loss 0.3969001 +| epoch 6 | 339/ 8400 batches | train loss 0.3196511 +| epoch 6 | 343/ 8400 batches | train loss 0.3681539 +| epoch 6 | 347/ 8400 batches | train loss 0.4016116 +| epoch 6 | 351/ 8400 batches | train loss 0.3170732 +| epoch 6 | 355/ 8400 batches | train loss 0.4522436 +| epoch 6 | 359/ 8400 batches | train loss 0.3871351 +| epoch 6 | 363/ 8400 batches | train loss 0.3343869 +| epoch 6 | 367/ 8400 batches | train loss 0.3589318 +| epoch 6 | 371/ 8400 batches | train loss 0.3121822 +| epoch 6 | 375/ 8400 batches | train loss 0.3865325 +| epoch 6 | 379/ 8400 batches | train loss 0.3153836 +| epoch 6 | 383/ 8400 batches | train loss 0.4169752 +| epoch 6 | 387/ 8400 batches | train loss 0.3730085 +| epoch 6 | 391/ 8400 batches | train loss 0.4173658 +| epoch 6 | 395/ 8400 batches | train loss 0.3774352 +| epoch 6 | 399/ 8400 batches | train loss 0.3864037 +| epoch 6 | 403/ 8400 batches | train loss 0.4219750 +| epoch 6 | 407/ 8400 batches | train loss 0.3950208 +| epoch 6 | 411/ 8400 batches | train loss 0.4426880 +| epoch 6 | 415/ 8400 batches | train loss 0.4752514 +| epoch 6 | 419/ 8400 batches | train loss 0.3988588 +| epoch 6 | 423/ 8400 batches | train loss 0.4243290 +| epoch 6 | 427/ 8400 batches | train loss 0.3909495 +| epoch 6 | 431/ 8400 batches | train loss 0.3871161 +| epoch 6 | 435/ 8400 batches | train loss 0.4566404 +| epoch 6 | 439/ 8400 batches | train loss 0.3591867 +| epoch 6 | 443/ 8400 batches | train loss 0.4185424 +| epoch 6 | 447/ 8400 batches | train loss 0.5138841 +| epoch 6 | 451/ 8400 batches | train loss 0.3914879 +| epoch 6 | 455/ 8400 batches | train loss 0.3625729 +| epoch 6 | 459/ 8400 batches | train loss 0.3462086 +| epoch 6 | 463/ 8400 batches | train loss 0.3886726 +| epoch 6 | 467/ 8400 batches | train loss 0.5062033 +| epoch 6 | 471/ 8400 batches | train loss 0.3517201 +| epoch 6 | 475/ 8400 batches | train loss 0.3090270 +| epoch 6 | 479/ 8400 batches | train loss 0.3503413 +| epoch 6 | 483/ 8400 batches | train loss 0.3755658 +| epoch 6 | 487/ 8400 batches | train loss 0.3386906 +| epoch 6 | 491/ 8400 batches | train loss 0.4174856 +| epoch 6 | 495/ 8400 batches | train loss 0.3839303 +| epoch 6 | 499/ 8400 batches | train loss 0.3946356 +| epoch 6 | 503/ 8400 batches | train loss 0.3939262 +| epoch 6 | 507/ 8400 batches | train loss 0.4362429 +| epoch 6 | 511/ 8400 batches | train loss 0.3602044 +| epoch 6 | 515/ 8400 batches | train loss 0.3669746 +| epoch 6 | 519/ 8400 batches | train loss 0.3268801 +| epoch 6 | 523/ 8400 batches | train loss 0.3222126 +| epoch 6 | 527/ 8400 batches | train loss 0.3612987 +| epoch 6 | 531/ 8400 batches | train loss 0.3817204 +| epoch 6 | 535/ 8400 batches | train loss 0.3689955 +| epoch 6 | 539/ 8400 batches | train loss 0.3861227 +| epoch 6 | 543/ 8400 batches | train loss 0.4504900 +| epoch 6 | 547/ 8400 batches | train loss 0.2440837 +| epoch 6 | 551/ 8400 batches | train loss 0.2283507 +| epoch 6 | 555/ 8400 batches | train loss 0.4282149 +| epoch 6 | 559/ 8400 batches | train loss 0.3184243 +| epoch 6 | 563/ 8400 batches | train loss 0.3973795 +| epoch 6 | 567/ 8400 batches | train loss 0.3715762 +| epoch 6 | 571/ 8400 batches | train loss 0.4003580 +| epoch 6 | 575/ 8400 batches | train loss 0.3432633 +| epoch 6 | 579/ 8400 batches | train loss 0.3977844 +| epoch 6 | 583/ 8400 batches | train loss 0.2973014 +| epoch 6 | 587/ 8400 batches | train loss 0.3513889 +| epoch 6 | 591/ 8400 batches | train loss 0.3724656 +| epoch 6 | 595/ 8400 batches | train loss 0.3250248 +| epoch 6 | 599/ 8400 batches | train loss 0.3660639 +| epoch 6 | 603/ 8400 batches | train loss 0.3968571 +| epoch 6 | 607/ 8400 batches | train loss 0.4398240 +| epoch 6 | 611/ 8400 batches | train loss 0.3913989 +| epoch 6 | 615/ 8400 batches | train loss 0.4541927 +| epoch 6 | 619/ 8400 batches | train loss 0.3732216 +| epoch 6 | 623/ 8400 batches | train loss 0.3476724 +| epoch 6 | 627/ 8400 batches | train loss 0.3727251 +| epoch 6 | 631/ 8400 batches | train loss 0.3535458 +| epoch 6 | 635/ 8400 batches | train loss 0.4174107 +| epoch 6 | 639/ 8400 batches | train loss 0.3376676 +| epoch 6 | 643/ 8400 batches | train loss 0.3667646 +| epoch 6 | 647/ 8400 batches | train loss 0.3642448 +| epoch 6 | 651/ 8400 batches | train loss 0.3997958 +| epoch 6 | 655/ 8400 batches | train loss 0.4182173 +| epoch 6 | 659/ 8400 batches | train loss 0.3063306 +| epoch 6 | 663/ 8400 batches | train loss 0.3868991 +| epoch 6 | 667/ 8400 batches | train loss 0.4234166 +| epoch 6 | 671/ 8400 batches | train loss 0.3812182 +| epoch 6 | 675/ 8400 batches | train loss 0.3493794 +| epoch 6 | 679/ 8400 batches | train loss 0.3805976 +| epoch 6 | 683/ 8400 batches | train loss 0.3267205 +| epoch 6 | 687/ 8400 batches | train loss 0.3861987 +| epoch 6 | 691/ 8400 batches | train loss 0.3317506 +| epoch 6 | 695/ 8400 batches | train loss 0.3543415 +| epoch 6 | 699/ 8400 batches | train loss 0.3184830 +| epoch 6 | 703/ 8400 batches | train loss 0.3932331 +| epoch 6 | 707/ 8400 batches | train loss 0.3988332 +| epoch 6 | 711/ 8400 batches | train loss 0.3649001 +| epoch 6 | 715/ 8400 batches | train loss 0.3396919 +| epoch 6 | 719/ 8400 batches | train loss 0.4140888 +| epoch 6 | 723/ 8400 batches | train loss 0.3621427 +| epoch 6 | 727/ 8400 batches | train loss 0.3937938 +| epoch 6 | 731/ 8400 batches | train loss 0.3987916 +| epoch 6 | 735/ 8400 batches | train loss 0.3866431 +| epoch 6 | 739/ 8400 batches | train loss 0.4236072 +| epoch 6 | 743/ 8400 batches | train loss 0.4152206 +| epoch 6 | 747/ 8400 batches | train loss 0.3609224 +| epoch 6 | 751/ 8400 batches | train loss 0.2959008 +| epoch 6 | 755/ 8400 batches | train loss 0.3756574 +| epoch 6 | 759/ 8400 batches | train loss 0.3735424 +| epoch 6 | 763/ 8400 batches | train loss 0.3844318 +| epoch 6 | 767/ 8400 batches | train loss 0.3141677 +| epoch 6 | 771/ 8400 batches | train loss 0.3493676 +| epoch 6 | 775/ 8400 batches | train loss 0.3719960 +| epoch 6 | 779/ 8400 batches | train loss 0.3147733 +| epoch 6 | 783/ 8400 batches | train loss 0.3275938 +| epoch 6 | 787/ 8400 batches | train loss 0.3421133 +| epoch 6 | 791/ 8400 batches | train loss 0.3891474 +| epoch 6 | 795/ 8400 batches | train loss 0.3716626 +| epoch 6 | 799/ 8400 batches | train loss 0.3471649 +| epoch 6 | 803/ 8400 batches | train loss 0.3248166 +| epoch 6 | 807/ 8400 batches | train loss 0.3052281 +| epoch 6 | 811/ 8400 batches | train loss 0.3334044 +| epoch 6 | 815/ 8400 batches | train loss 0.3812474 +| epoch 6 | 819/ 8400 batches | train loss 0.3182039 +| epoch 6 | 823/ 8400 batches | train loss 0.3274369 +| epoch 6 | 827/ 8400 batches | train loss 0.3340018 +| epoch 6 | 831/ 8400 batches | train loss 0.3173720 +| epoch 6 | 835/ 8400 batches | train loss 0.3501167 +| epoch 6 | 839/ 8400 batches | train loss 0.3675368 +| epoch 6 | 843/ 8400 batches | train loss 0.4159189 +| epoch 6 | 847/ 8400 batches | train loss 0.3598009 +| epoch 6 | 851/ 8400 batches | train loss 0.3600536 +| epoch 6 | 855/ 8400 batches | train loss 0.4126962 +| epoch 6 | 859/ 8400 batches | train loss 0.3993977 +| epoch 6 | 863/ 8400 batches | train loss 0.3903396 +| epoch 6 | 867/ 8400 batches | train loss 0.4066041 +| epoch 6 | 871/ 8400 batches | train loss 0.4260870 +| epoch 6 | 875/ 8400 batches | train loss 0.3571916 +| epoch 6 | 879/ 8400 batches | train loss 0.3790126 +| epoch 6 | 883/ 8400 batches | train loss 0.4061103 +| epoch 6 | 887/ 8400 batches | train loss 0.3810120 +| epoch 6 | 891/ 8400 batches | train loss 0.4269447 +| epoch 6 | 895/ 8400 batches | train loss 0.4010987 +| epoch 6 | 899/ 8400 batches | train loss 0.3865614 +| epoch 6 | 903/ 8400 batches | train loss 0.3465762 +| epoch 6 | 907/ 8400 batches | train loss 0.3951751 +| epoch 6 | 911/ 8400 batches | train loss 0.4183234 +| epoch 6 | 915/ 8400 batches | train loss 0.3687306 +| epoch 6 | 919/ 8400 batches | train loss 0.4025963 +| epoch 6 | 923/ 8400 batches | train loss 0.4123092 +| epoch 6 | 927/ 8400 batches | train loss 0.4387080 +| epoch 6 | 931/ 8400 batches | train loss 0.4191537 +| epoch 6 | 935/ 8400 batches | train loss 0.3717653 +| epoch 6 | 939/ 8400 batches | train loss 0.4065209 +| epoch 6 | 943/ 8400 batches | train loss 0.4151774 +| epoch 6 | 947/ 8400 batches | train loss 0.3620658 +| epoch 6 | 951/ 8400 batches | train loss 0.3327878 +| epoch 6 | 955/ 8400 batches | train loss 0.4178671 +| epoch 6 | 959/ 8400 batches | train loss 0.4015250 +| epoch 6 | 963/ 8400 batches | train loss 0.3613553 +| epoch 6 | 967/ 8400 batches | train loss 0.3450003 +| epoch 6 | 971/ 8400 batches | train loss 0.4771832 +| epoch 6 | 975/ 8400 batches | train loss 0.4098666 +| epoch 6 | 979/ 8400 batches | train loss 0.4860812 +| epoch 6 | 983/ 8400 batches | train loss 0.4043255 +| epoch 6 | 987/ 8400 batches | train loss 0.3972383 +| epoch 6 | 991/ 8400 batches | train loss 0.3303579 +| epoch 6 | 995/ 8400 batches | train loss 0.4240750 +| epoch 6 | 999/ 8400 batches | train loss 0.3351360 +| epoch 6 | 1003/ 8400 batches | train loss 0.4039112 +| epoch 6 | 1007/ 8400 batches | train loss 0.3661814 +| epoch 6 | 1011/ 8400 batches | train loss 0.3218175 +| epoch 6 | 1015/ 8400 batches | train loss 0.3771191 +| epoch 6 | 1019/ 8400 batches | train loss 0.3511248 +| epoch 6 | 1023/ 8400 batches | train loss 0.3133594 +| epoch 6 | 1027/ 8400 batches | train loss 0.4331604 +| epoch 6 | 1031/ 8400 batches | train loss 0.3308198 +| epoch 6 | 1035/ 8400 batches | train loss 0.3931198 +| epoch 6 | 1039/ 8400 batches | train loss 0.3518105 +| epoch 6 | 1043/ 8400 batches | train loss 0.4109598 +| epoch 6 | 1047/ 8400 batches | train loss 0.3763856 +| epoch 6 | 1051/ 8400 batches | train loss 0.3763724 +| epoch 6 | 1055/ 8400 batches | train loss 0.3403887 +| epoch 6 | 1059/ 8400 batches | train loss 0.4089203 +| epoch 6 | 1063/ 8400 batches | train loss 0.4518958 +| epoch 6 | 1067/ 8400 batches | train loss 0.3598890 +| epoch 6 | 1071/ 8400 batches | train loss 0.3718832 +| epoch 6 | 1075/ 8400 batches | train loss 0.3038712 +| epoch 6 | 1079/ 8400 batches | train loss 0.3982578 +| epoch 6 | 1083/ 8400 batches | train loss 0.3457000 +| epoch 6 | 1087/ 8400 batches | train loss 0.2800511 +| epoch 6 | 1091/ 8400 batches | train loss 0.4277276 +| epoch 6 | 1095/ 8400 batches | train loss 0.3513747 +| epoch 6 | 1099/ 8400 batches | train loss 0.3768365 +| epoch 6 | 1103/ 8400 batches | train loss 0.3947709 +| epoch 6 | 1107/ 8400 batches | train loss 0.2470580 +| epoch 6 | 1111/ 8400 batches | train loss 0.3117945 +| epoch 6 | 1115/ 8400 batches | train loss 0.3895592 +| epoch 6 | 1119/ 8400 batches | train loss 0.3885432 +| epoch 6 | 1123/ 8400 batches | train loss 0.3627329 +| epoch 6 | 1127/ 8400 batches | train loss 0.3555608 +| epoch 6 | 1131/ 8400 batches | train loss 0.3470795 +| epoch 6 | 1135/ 8400 batches | train loss 0.3247011 +| epoch 6 | 1139/ 8400 batches | train loss 0.3518071 +| epoch 6 | 1143/ 8400 batches | train loss 0.3646526 +| epoch 6 | 1147/ 8400 batches | train loss 0.4297494 +| epoch 6 | 1151/ 8400 batches | train loss 0.3295239 +| epoch 6 | 1155/ 8400 batches | train loss 0.3747100 +| epoch 6 | 1159/ 8400 batches | train loss 0.3162222 +| epoch 6 | 1163/ 8400 batches | train loss 0.3863120 +| epoch 6 | 1167/ 8400 batches | train loss 0.4139987 +| epoch 6 | 1171/ 8400 batches | train loss 0.3350745 +| epoch 6 | 1175/ 8400 batches | train loss 0.3713342 +| epoch 6 | 1179/ 8400 batches | train loss 0.3314153 +| epoch 6 | 1183/ 8400 batches | train loss 0.3652607 +| epoch 6 | 1187/ 8400 batches | train loss 0.3416169 +| epoch 6 | 1191/ 8400 batches | train loss 0.4019317 +| epoch 6 | 1195/ 8400 batches | train loss 0.3372093 +| epoch 6 | 1199/ 8400 batches | train loss 0.2931436 +| epoch 6 | 1203/ 8400 batches | train loss 0.2561593 +| epoch 6 | 1207/ 8400 batches | train loss 0.4005226 +| epoch 6 | 1211/ 8400 batches | train loss 0.3531214 +| epoch 6 | 1215/ 8400 batches | train loss 0.4866806 +| epoch 6 | 1219/ 8400 batches | train loss 0.3721314 +| epoch 6 | 1223/ 8400 batches | train loss 0.3445761 +| epoch 6 | 1227/ 8400 batches | train loss 0.3843962 +| epoch 6 | 1231/ 8400 batches | train loss 0.3758525 +| epoch 6 | 1235/ 8400 batches | train loss 0.4124303 +| epoch 6 | 1239/ 8400 batches | train loss 0.3772943 +| epoch 6 | 1243/ 8400 batches | train loss 0.4828418 +| epoch 6 | 1247/ 8400 batches | train loss 0.3460830 +| epoch 6 | 1251/ 8400 batches | train loss 0.4199683 +| epoch 6 | 1255/ 8400 batches | train loss 0.3000222 +| epoch 6 | 1259/ 8400 batches | train loss 0.3381524 +| epoch 6 | 1263/ 8400 batches | train loss 0.4862753 +| epoch 6 | 1267/ 8400 batches | train loss 0.3690329 +| epoch 6 | 1271/ 8400 batches | train loss 0.3468590 +| epoch 6 | 1275/ 8400 batches | train loss 0.4157513 +| epoch 6 | 1279/ 8400 batches | train loss 0.3502184 +| epoch 6 | 1283/ 8400 batches | train loss 0.3955549 +| epoch 6 | 1287/ 8400 batches | train loss 0.3756474 +| epoch 6 | 1291/ 8400 batches | train loss 0.3229948 +| epoch 6 | 1295/ 8400 batches | train loss 0.3318085 +| epoch 6 | 1299/ 8400 batches | train loss 0.3943628 +| epoch 6 | 1303/ 8400 batches | train loss 0.3941238 +| epoch 6 | 1307/ 8400 batches | train loss 0.4132695 +| epoch 6 | 1311/ 8400 batches | train loss 0.3770238 +| epoch 6 | 1315/ 8400 batches | train loss 0.4685509 +| epoch 6 | 1319/ 8400 batches | train loss 0.3592205 +| epoch 6 | 1323/ 8400 batches | train loss 0.3542751 +| epoch 6 | 1327/ 8400 batches | train loss 0.3391317 +| epoch 6 | 1331/ 8400 batches | train loss 0.3943776 +| epoch 6 | 1335/ 8400 batches | train loss 0.3490002 +| epoch 6 | 1339/ 8400 batches | train loss 0.3852283 +| epoch 6 | 1343/ 8400 batches | train loss 0.3993634 +| epoch 6 | 1347/ 8400 batches | train loss 0.3975110 +| epoch 6 | 1351/ 8400 batches | train loss 0.3722853 +| epoch 6 | 1355/ 8400 batches | train loss 0.3551185 +| epoch 6 | 1359/ 8400 batches | train loss 0.3980045 +| epoch 6 | 1363/ 8400 batches | train loss 0.4595174 +| epoch 6 | 1367/ 8400 batches | train loss 0.4049789 +| epoch 6 | 1371/ 8400 batches | train loss 0.4042039 +| epoch 6 | 1375/ 8400 batches | train loss 0.4266555 +| epoch 6 | 1379/ 8400 batches | train loss 0.3650305 +| epoch 6 | 1383/ 8400 batches | train loss 0.4163360 +| epoch 6 | 1387/ 8400 batches | train loss 0.4258783 +| epoch 6 | 1391/ 8400 batches | train loss 0.3615914 +| epoch 6 | 1395/ 8400 batches | train loss 0.4282047 +| epoch 6 | 1399/ 8400 batches | train loss 0.4336793 +| epoch 6 | 1403/ 8400 batches | train loss 0.3827577 +| epoch 6 | 1407/ 8400 batches | train loss 0.3489428 +| epoch 6 | 1411/ 8400 batches | train loss 0.3019700 +| epoch 6 | 1415/ 8400 batches | train loss 0.3749155 +| epoch 6 | 1419/ 8400 batches | train loss 0.3680324 +| epoch 6 | 1423/ 8400 batches | train loss 0.3531500 +| epoch 6 | 1427/ 8400 batches | train loss 0.3654823 +| epoch 6 | 1431/ 8400 batches | train loss 0.4122591 +| epoch 6 | 1435/ 8400 batches | train loss 0.3691698 +| epoch 6 | 1439/ 8400 batches | train loss 0.4795751 +| epoch 6 | 1443/ 8400 batches | train loss 0.3598673 +| epoch 6 | 1447/ 8400 batches | train loss 0.4499405 +| epoch 6 | 1451/ 8400 batches | train loss 0.4450501 +| epoch 6 | 1455/ 8400 batches | train loss 0.3653165 +| epoch 6 | 1459/ 8400 batches | train loss 0.4247805 +| epoch 6 | 1463/ 8400 batches | train loss 0.3734949 +| epoch 6 | 1467/ 8400 batches | train loss 0.3649769 +| epoch 6 | 1471/ 8400 batches | train loss 0.3821343 +| epoch 6 | 1475/ 8400 batches | train loss 0.3178432 +| epoch 6 | 1479/ 8400 batches | train loss 0.3582290 +| epoch 6 | 1483/ 8400 batches | train loss 0.3070642 +| epoch 6 | 1487/ 8400 batches | train loss 0.4059746 +| epoch 6 | 1491/ 8400 batches | train loss 0.3100885 +| epoch 6 | 1495/ 8400 batches | train loss 0.3791546 +| epoch 6 | 1499/ 8400 batches | train loss 0.3664756 +| epoch 6 | 1503/ 8400 batches | train loss 0.3870856 +| epoch 6 | 1507/ 8400 batches | train loss 0.3944906 +| epoch 6 | 1511/ 8400 batches | train loss 0.3859374 +| epoch 6 | 1515/ 8400 batches | train loss 0.4096745 +| epoch 6 | 1519/ 8400 batches | train loss 0.3823345 +| epoch 6 | 1523/ 8400 batches | train loss 0.3568432 +| epoch 6 | 1527/ 8400 batches | train loss 0.4166606 +| epoch 6 | 1531/ 8400 batches | train loss 0.3271497 +| epoch 6 | 1535/ 8400 batches | train loss 0.3405007 +| epoch 6 | 1539/ 8400 batches | train loss 0.3846489 +| epoch 6 | 1543/ 8400 batches | train loss 0.3773271 +| epoch 6 | 1547/ 8400 batches | train loss 0.3880048 +| epoch 6 | 1551/ 8400 batches | train loss 0.4187602 +| epoch 6 | 1555/ 8400 batches | train loss 0.3747829 +| epoch 6 | 1559/ 8400 batches | train loss 0.4058972 +| epoch 6 | 1563/ 8400 batches | train loss 0.3831209 +| epoch 6 | 1567/ 8400 batches | train loss 0.3115096 +| epoch 6 | 1571/ 8400 batches | train loss 0.3900425 +| epoch 6 | 1575/ 8400 batches | train loss 0.3432448 +| epoch 6 | 1579/ 8400 batches | train loss 0.4576615 +| epoch 6 | 1583/ 8400 batches | train loss 0.3625264 +| epoch 6 | 1587/ 8400 batches | train loss 0.4101529 +| epoch 6 | 1591/ 8400 batches | train loss 0.3697402 +| epoch 6 | 1595/ 8400 batches | train loss 0.3183034 +| epoch 6 | 1599/ 8400 batches | train loss 0.3920129 +| epoch 6 | 1603/ 8400 batches | train loss 0.4312581 +| epoch 6 | 1607/ 8400 batches | train loss 0.3637378 +| epoch 6 | 1611/ 8400 batches | train loss 0.4087116 +| epoch 6 | 1615/ 8400 batches | train loss 0.3776525 +| epoch 6 | 1619/ 8400 batches | train loss 0.3217059 +| epoch 6 | 1623/ 8400 batches | train loss 0.3372436 +| epoch 6 | 1627/ 8400 batches | train loss 0.4088722 +| epoch 6 | 1631/ 8400 batches | train loss 0.3966233 +| epoch 6 | 1635/ 8400 batches | train loss 0.3401079 +| epoch 6 | 1639/ 8400 batches | train loss 0.3168565 +| epoch 6 | 1643/ 8400 batches | train loss 0.3550670 +| epoch 6 | 1647/ 8400 batches | train loss 0.3673292 +| epoch 6 | 1651/ 8400 batches | train loss 0.3749889 +| epoch 6 | 1655/ 8400 batches | train loss 0.3274802 +| epoch 6 | 1659/ 8400 batches | train loss 0.3861927 +| epoch 6 | 1663/ 8400 batches | train loss 0.3862819 +| epoch 6 | 1667/ 8400 batches | train loss 0.3488247 +| epoch 6 | 1671/ 8400 batches | train loss 0.3260162 +| epoch 6 | 1675/ 8400 batches | train loss 0.4145619 +| epoch 6 | 1679/ 8400 batches | train loss 0.3477265 +| epoch 6 | 1683/ 8400 batches | train loss 0.4462784 +| epoch 6 | 1687/ 8400 batches | train loss 0.4310157 +| epoch 6 | 1691/ 8400 batches | train loss 0.3547980 +| epoch 6 | 1695/ 8400 batches | train loss 0.4135979 +| epoch 6 | 1699/ 8400 batches | train loss 0.3325109 +| epoch 6 | 1703/ 8400 batches | train loss 0.2782818 +| epoch 6 | 1707/ 8400 batches | train loss 0.3222013 +| epoch 6 | 1711/ 8400 batches | train loss 0.4275050 +| epoch 6 | 1715/ 8400 batches | train loss 0.3847671 +| epoch 6 | 1719/ 8400 batches | train loss 0.3974667 +| epoch 6 | 1723/ 8400 batches | train loss 0.4560135 +| epoch 6 | 1727/ 8400 batches | train loss 0.3390045 +| epoch 6 | 1731/ 8400 batches | train loss 0.3591082 +| epoch 6 | 1735/ 8400 batches | train loss 0.4115462 +| epoch 6 | 1739/ 8400 batches | train loss 0.3692332 +| epoch 6 | 1743/ 8400 batches | train loss 0.3676457 +| epoch 6 | 1747/ 8400 batches | train loss 0.3916166 +| epoch 6 | 1751/ 8400 batches | train loss 0.3610037 +| epoch 6 | 1755/ 8400 batches | train loss 0.3426135 +| epoch 6 | 1759/ 8400 batches | train loss 0.3375524 +| epoch 6 | 1763/ 8400 batches | train loss 0.3266073 +| epoch 6 | 1767/ 8400 batches | train loss 0.4180935 +| epoch 6 | 1771/ 8400 batches | train loss 0.3999634 +| epoch 6 | 1775/ 8400 batches | train loss 0.2814757 +| epoch 6 | 1779/ 8400 batches | train loss 0.3436202 +| epoch 6 | 1783/ 8400 batches | train loss 0.3608111 +| epoch 6 | 1787/ 8400 batches | train loss 0.3370008 +| epoch 6 | 1791/ 8400 batches | train loss 0.3845862 +| epoch 6 | 1795/ 8400 batches | train loss 0.4062682 +| epoch 6 | 1799/ 8400 batches | train loss 0.3844839 +| epoch 6 | 1803/ 8400 batches | train loss 0.3315828 +| epoch 6 | 1807/ 8400 batches | train loss 0.3344727 +| epoch 6 | 1811/ 8400 batches | train loss 0.3007666 +| epoch 6 | 1815/ 8400 batches | train loss 0.3789766 +| epoch 6 | 1819/ 8400 batches | train loss 0.3242017 +| epoch 6 | 1823/ 8400 batches | train loss 0.3766241 +| epoch 6 | 1827/ 8400 batches | train loss 0.4113519 +| epoch 6 | 1831/ 8400 batches | train loss 0.3785626 +| epoch 6 | 1835/ 8400 batches | train loss 0.3801851 +| epoch 6 | 1839/ 8400 batches | train loss 0.2988723 +| epoch 6 | 1843/ 8400 batches | train loss 0.4273499 +| epoch 6 | 1847/ 8400 batches | train loss 0.3521492 +| epoch 6 | 1851/ 8400 batches | train loss 0.2980187 +| epoch 6 | 1855/ 8400 batches | train loss 0.3667436 +| epoch 6 | 1859/ 8400 batches | train loss 0.3823064 +| epoch 6 | 1863/ 8400 batches | train loss 0.3811944 +| epoch 6 | 1867/ 8400 batches | train loss 0.4153307 +| epoch 6 | 1871/ 8400 batches | train loss 0.3406351 +| epoch 6 | 1875/ 8400 batches | train loss 0.3032717 +| epoch 6 | 1879/ 8400 batches | train loss 0.3225051 +| epoch 6 | 1883/ 8400 batches | train loss 0.4329689 +| epoch 6 | 1887/ 8400 batches | train loss 0.3674408 +| epoch 6 | 1891/ 8400 batches | train loss 0.3492182 +| epoch 6 | 1895/ 8400 batches | train loss 0.2523614 +| epoch 6 | 1899/ 8400 batches | train loss 0.4348086 +| epoch 6 | 1903/ 8400 batches | train loss 0.4021987 +| epoch 6 | 1907/ 8400 batches | train loss 0.3918421 +| epoch 6 | 1911/ 8400 batches | train loss 0.3556637 +| epoch 6 | 1915/ 8400 batches | train loss 0.3285094 +| epoch 6 | 1919/ 8400 batches | train loss 0.4001157 +| epoch 6 | 1923/ 8400 batches | train loss 0.4346374 +| epoch 6 | 1927/ 8400 batches | train loss 0.3957096 +| epoch 6 | 1931/ 8400 batches | train loss 0.3948631 +| epoch 6 | 1935/ 8400 batches | train loss 0.4049888 +| epoch 6 | 1939/ 8400 batches | train loss 0.3110278 +| epoch 6 | 1943/ 8400 batches | train loss 0.4055555 +| epoch 6 | 1947/ 8400 batches | train loss 0.4091481 +| epoch 6 | 1951/ 8400 batches | train loss 0.3233429 +| epoch 6 | 1955/ 8400 batches | train loss 0.3090920 +| epoch 6 | 1959/ 8400 batches | train loss 0.3737701 +| epoch 6 | 1963/ 8400 batches | train loss 0.3338453 +| epoch 6 | 1967/ 8400 batches | train loss 0.4118308 +| epoch 6 | 1971/ 8400 batches | train loss 0.3894720 +| epoch 6 | 1975/ 8400 batches | train loss 0.2837796 +| epoch 6 | 1979/ 8400 batches | train loss 0.3740157 +| epoch 6 | 1983/ 8400 batches | train loss 0.3245732 +| epoch 6 | 1987/ 8400 batches | train loss 0.3595680 +| epoch 6 | 1991/ 8400 batches | train loss 0.3652268 +| epoch 6 | 1995/ 8400 batches | train loss 0.4372154 +| epoch 6 | 1999/ 8400 batches | train loss 0.3598106 +| epoch 6 | 2003/ 8400 batches | train loss 0.3595162 +| epoch 6 | 2007/ 8400 batches | train loss 0.3790154 +| epoch 6 | 2011/ 8400 batches | train loss 0.4020627 +| epoch 6 | 2015/ 8400 batches | train loss 0.3165059 +| epoch 6 | 2019/ 8400 batches | train loss 0.4027386 +| epoch 6 | 2023/ 8400 batches | train loss 0.4354843 +| epoch 6 | 2027/ 8400 batches | train loss 0.3737869 +| epoch 6 | 2031/ 8400 batches | train loss 0.3590620 +| epoch 6 | 2035/ 8400 batches | train loss 0.3633473 +| epoch 6 | 2039/ 8400 batches | train loss 0.4156729 +| epoch 6 | 2043/ 8400 batches | train loss 0.3623995 +| epoch 6 | 2047/ 8400 batches | train loss 0.3971711 +| epoch 6 | 2051/ 8400 batches | train loss 0.3212187 +| epoch 6 | 2055/ 8400 batches | train loss 0.4223278 +| epoch 6 | 2059/ 8400 batches | train loss 0.4508622 +| epoch 6 | 2063/ 8400 batches | train loss 0.3971827 +| epoch 6 | 2067/ 8400 batches | train loss 0.3490678 +| epoch 6 | 2071/ 8400 batches | train loss 0.4225917 +| epoch 6 | 2075/ 8400 batches | train loss 0.4045536 +| epoch 6 | 2079/ 8400 batches | train loss 0.4274316 +| epoch 6 | 2083/ 8400 batches | train loss 0.4168380 +| epoch 6 | 2087/ 8400 batches | train loss 0.3719824 +| epoch 6 | 2091/ 8400 batches | train loss 0.3443653 +| epoch 6 | 2095/ 8400 batches | train loss 0.3767187 +| epoch 6 | 2099/ 8400 batches | train loss 0.3446001 +| epoch 6 | 2103/ 8400 batches | train loss 0.3798980 +| epoch 6 | 2107/ 8400 batches | train loss 0.3596905 +| epoch 6 | 2111/ 8400 batches | train loss 0.3506662 +| epoch 6 | 2115/ 8400 batches | train loss 0.4136105 +| epoch 6 | 2119/ 8400 batches | train loss 0.3548896 +| epoch 6 | 2123/ 8400 batches | train loss 0.3660251 +| epoch 6 | 2127/ 8400 batches | train loss 0.3811784 +| epoch 6 | 2131/ 8400 batches | train loss 0.3452244 +| epoch 6 | 2135/ 8400 batches | train loss 0.4646723 +| epoch 6 | 2139/ 8400 batches | train loss 0.4502453 +| epoch 6 | 2143/ 8400 batches | train loss 0.3284217 +| epoch 6 | 2147/ 8400 batches | train loss 0.3663697 +| epoch 6 | 2151/ 8400 batches | train loss 0.3674608 +| epoch 6 | 2155/ 8400 batches | train loss 0.3406733 +| epoch 6 | 2159/ 8400 batches | train loss 0.2652444 +| epoch 6 | 2163/ 8400 batches | train loss 0.3510783 +| epoch 6 | 2167/ 8400 batches | train loss 0.4140521 +| epoch 6 | 2171/ 8400 batches | train loss 0.3501522 +| epoch 6 | 2175/ 8400 batches | train loss 0.3117709 +| epoch 6 | 2179/ 8400 batches | train loss 0.3420530 +| epoch 6 | 2183/ 8400 batches | train loss 0.3841229 +| epoch 6 | 2187/ 8400 batches | train loss 0.3473661 +| epoch 6 | 2191/ 8400 batches | train loss 0.3382199 +| epoch 6 | 2195/ 8400 batches | train loss 0.3955320 +| epoch 6 | 2199/ 8400 batches | train loss 0.3512806 +| epoch 6 | 2203/ 8400 batches | train loss 0.4062170 +| epoch 6 | 2207/ 8400 batches | train loss 0.4036664 +| epoch 6 | 2211/ 8400 batches | train loss 0.3561212 +| epoch 6 | 2215/ 8400 batches | train loss 0.3651067 +| epoch 6 | 2219/ 8400 batches | train loss 0.3578129 +| epoch 6 | 2223/ 8400 batches | train loss 0.3832215 +| epoch 6 | 2227/ 8400 batches | train loss 0.1630959 +| epoch 6 | 2231/ 8400 batches | train loss 0.4507660 +| epoch 6 | 2235/ 8400 batches | train loss 0.4354542 +| epoch 6 | 2239/ 8400 batches | train loss 0.3805921 +| epoch 6 | 2243/ 8400 batches | train loss 0.4573369 +| epoch 6 | 2247/ 8400 batches | train loss 0.3706375 +| epoch 6 | 2251/ 8400 batches | train loss 0.3607256 +| epoch 6 | 2255/ 8400 batches | train loss 0.4023007 +| epoch 6 | 2259/ 8400 batches | train loss 0.3243598 +| epoch 6 | 2263/ 8400 batches | train loss 0.3399890 +| epoch 6 | 2267/ 8400 batches | train loss 0.4063090 +| epoch 6 | 2271/ 8400 batches | train loss 0.3548306 +| epoch 6 | 2275/ 8400 batches | train loss 0.4186357 +| epoch 6 | 2279/ 8400 batches | train loss 0.3345858 +| epoch 6 | 2283/ 8400 batches | train loss 0.4019222 +| epoch 6 | 2287/ 8400 batches | train loss 0.4161222 +| epoch 6 | 2291/ 8400 batches | train loss 0.3930649 +| epoch 6 | 2295/ 8400 batches | train loss 0.4218134 +| epoch 6 | 2299/ 8400 batches | train loss 0.4700032 +| epoch 6 | 2303/ 8400 batches | train loss 0.4038160 +| epoch 6 | 2307/ 8400 batches | train loss 0.3291714 +| epoch 6 | 2311/ 8400 batches | train loss 0.3895776 +| epoch 6 | 2315/ 8400 batches | train loss 0.4087863 +| epoch 6 | 2319/ 8400 batches | train loss 0.3506033 +| epoch 6 | 2323/ 8400 batches | train loss 0.3729150 +| epoch 6 | 2327/ 8400 batches | train loss 0.3972438 +| epoch 6 | 2331/ 8400 batches | train loss 0.4291607 +| epoch 6 | 2335/ 8400 batches | train loss 0.4075809 +| epoch 6 | 2339/ 8400 batches | train loss 0.4121421 +| epoch 6 | 2343/ 8400 batches | train loss 0.4160102 +| epoch 6 | 2347/ 8400 batches | train loss 0.3633303 +| epoch 6 | 2351/ 8400 batches | train loss 0.3213723 +| epoch 6 | 2355/ 8400 batches | train loss 0.3928396 +| epoch 6 | 2359/ 8400 batches | train loss 0.4784615 +| epoch 6 | 2363/ 8400 batches | train loss 0.4068137 +| epoch 6 | 2367/ 8400 batches | train loss 0.3322451 +| epoch 6 | 2371/ 8400 batches | train loss 0.3964799 +| epoch 6 | 2375/ 8400 batches | train loss 0.4548598 +| epoch 6 | 2379/ 8400 batches | train loss 0.3550183 +| epoch 6 | 2383/ 8400 batches | train loss 0.3421415 +| epoch 6 | 2387/ 8400 batches | train loss 0.3165358 +| epoch 6 | 2391/ 8400 batches | train loss 0.3314357 +| epoch 6 | 2395/ 8400 batches | train loss 0.3147362 +| epoch 6 | 2399/ 8400 batches | train loss 0.4569996 +| epoch 6 | 2403/ 8400 batches | train loss 0.3216115 +| epoch 6 | 2407/ 8400 batches | train loss 0.3587329 +| epoch 6 | 2411/ 8400 batches | train loss 0.3998338 +| epoch 6 | 2415/ 8400 batches | train loss 0.4044499 +| epoch 6 | 2419/ 8400 batches | train loss 0.3837151 +| epoch 6 | 2423/ 8400 batches | train loss 0.3572602 +| epoch 6 | 2427/ 8400 batches | train loss 0.3439724 +| epoch 6 | 2431/ 8400 batches | train loss 0.3638920 +| epoch 6 | 2435/ 8400 batches | train loss 0.3146511 +| epoch 6 | 2439/ 8400 batches | train loss 0.3907156 +| epoch 6 | 2443/ 8400 batches | train loss 0.3748881 +| epoch 6 | 2447/ 8400 batches | train loss 0.3152852 +| epoch 6 | 2451/ 8400 batches | train loss 0.4096833 +| epoch 6 | 2455/ 8400 batches | train loss 0.3922559 +| epoch 6 | 2459/ 8400 batches | train loss 0.3192990 +| epoch 6 | 2463/ 8400 batches | train loss 0.2947488 +| epoch 6 | 2467/ 8400 batches | train loss 0.4354810 +| epoch 6 | 2471/ 8400 batches | train loss 0.3761352 +| epoch 6 | 2475/ 8400 batches | train loss 0.3576429 +| epoch 6 | 2479/ 8400 batches | train loss 0.3464962 +| epoch 6 | 2483/ 8400 batches | train loss 0.4619098 +| epoch 6 | 2487/ 8400 batches | train loss 0.3609474 +| epoch 6 | 2491/ 8400 batches | train loss 0.4268814 +| epoch 6 | 2495/ 8400 batches | train loss 0.3884648 +| epoch 6 | 2499/ 8400 batches | train loss 0.4012063 +| epoch 6 | 2503/ 8400 batches | train loss 0.3724384 +| epoch 6 | 2507/ 8400 batches | train loss 0.3683342 +| epoch 6 | 2511/ 8400 batches | train loss 0.4569985 +| epoch 6 | 2515/ 8400 batches | train loss 0.3910078 +| epoch 6 | 2519/ 8400 batches | train loss 0.3441453 +| epoch 6 | 2523/ 8400 batches | train loss 0.3820563 +| epoch 6 | 2527/ 8400 batches | train loss 0.4768984 +| epoch 6 | 2531/ 8400 batches | train loss 0.3606041 +| epoch 6 | 2535/ 8400 batches | train loss 0.3567885 +| epoch 6 | 2539/ 8400 batches | train loss 0.4974931 +| epoch 6 | 2543/ 8400 batches | train loss 0.3560264 +| epoch 6 | 2547/ 8400 batches | train loss 0.3652034 +| epoch 6 | 2551/ 8400 batches | train loss 0.4388377 +| epoch 6 | 2555/ 8400 batches | train loss 0.3976510 +| epoch 6 | 2559/ 8400 batches | train loss 0.3177549 +| epoch 6 | 2563/ 8400 batches | train loss 0.3309699 +| epoch 6 | 2567/ 8400 batches | train loss 0.4349890 +| epoch 6 | 2571/ 8400 batches | train loss 0.3768442 +| epoch 6 | 2575/ 8400 batches | train loss 0.4534659 +| epoch 6 | 2579/ 8400 batches | train loss 0.3273101 +| epoch 6 | 2583/ 8400 batches | train loss 0.3723744 +| epoch 6 | 2587/ 8400 batches | train loss 0.3465297 +| epoch 6 | 2591/ 8400 batches | train loss 0.4002351 +| epoch 6 | 2595/ 8400 batches | train loss 0.4332341 +| epoch 6 | 2599/ 8400 batches | train loss 0.3399886 +| epoch 6 | 2603/ 8400 batches | train loss 0.3837028 +| epoch 6 | 2607/ 8400 batches | train loss 0.4052706 +| epoch 6 | 2611/ 8400 batches | train loss 0.3694586 +| epoch 6 | 2615/ 8400 batches | train loss 0.3598648 +| epoch 6 | 2619/ 8400 batches | train loss 0.3786010 +| epoch 6 | 2623/ 8400 batches | train loss 0.3968109 +| epoch 6 | 2627/ 8400 batches | train loss 0.3207092 +| epoch 6 | 2631/ 8400 batches | train loss 0.4484717 +| epoch 6 | 2635/ 8400 batches | train loss 0.4189798 +| epoch 6 | 2639/ 8400 batches | train loss 0.4128444 +| epoch 6 | 2643/ 8400 batches | train loss 0.3416099 +| epoch 6 | 2647/ 8400 batches | train loss 0.3232137 +| epoch 6 | 2651/ 8400 batches | train loss 0.4284024 +| epoch 6 | 2655/ 8400 batches | train loss 0.3267459 +| epoch 6 | 2659/ 8400 batches | train loss 0.3805487 +| epoch 6 | 2663/ 8400 batches | train loss 0.3951011 +| epoch 6 | 2667/ 8400 batches | train loss 0.3647255 +| epoch 6 | 2671/ 8400 batches | train loss 0.3313205 +| epoch 6 | 2675/ 8400 batches | train loss 0.3985802 +| epoch 6 | 2679/ 8400 batches | train loss 0.4248741 +| epoch 6 | 2683/ 8400 batches | train loss 0.4120562 +| epoch 6 | 2687/ 8400 batches | train loss 0.3862845 +| epoch 6 | 2691/ 8400 batches | train loss 0.3364695 +| epoch 6 | 2695/ 8400 batches | train loss 0.4050524 +| epoch 6 | 2699/ 8400 batches | train loss 0.3473361 +| epoch 6 | 2703/ 8400 batches | train loss 0.3155312 +| epoch 6 | 2707/ 8400 batches | train loss 0.3258053 +| epoch 6 | 2711/ 8400 batches | train loss 0.4378860 +| epoch 6 | 2715/ 8400 batches | train loss 0.3323197 +| epoch 6 | 2719/ 8400 batches | train loss 0.3738387 +| epoch 6 | 2723/ 8400 batches | train loss 0.3782335 +| epoch 6 | 2727/ 8400 batches | train loss 0.3186287 +| epoch 6 | 2731/ 8400 batches | train loss 0.4159570 +| epoch 6 | 2735/ 8400 batches | train loss 0.3546350 +| epoch 6 | 2739/ 8400 batches | train loss 0.3175753 +| epoch 6 | 2743/ 8400 batches | train loss 0.3472425 +| epoch 6 | 2747/ 8400 batches | train loss 0.3982930 +| epoch 6 | 2751/ 8400 batches | train loss 0.3866748 +| epoch 6 | 2755/ 8400 batches | train loss 0.2936884 +| epoch 6 | 2759/ 8400 batches | train loss 0.4386970 +| epoch 6 | 2763/ 8400 batches | train loss 0.3600962 +| epoch 6 | 2767/ 8400 batches | train loss 0.3461859 +| epoch 6 | 2771/ 8400 batches | train loss 0.4350573 +| epoch 6 | 2775/ 8400 batches | train loss 0.3105417 +| epoch 6 | 2779/ 8400 batches | train loss 0.3481005 +| epoch 6 | 2783/ 8400 batches | train loss 0.3939251 +| epoch 6 | 2787/ 8400 batches | train loss 0.3654902 +| epoch 6 | 2791/ 8400 batches | train loss 0.3735399 +| epoch 6 | 2795/ 8400 batches | train loss 0.4315794 +| epoch 6 | 2799/ 8400 batches | train loss 0.3746524 +| epoch 6 | 2803/ 8400 batches | train loss 0.3990605 +| epoch 6 | 2807/ 8400 batches | train loss 0.4222412 +| epoch 6 | 2811/ 8400 batches | train loss 0.3753211 +| epoch 6 | 2815/ 8400 batches | train loss 0.3167946 +| epoch 6 | 2819/ 8400 batches | train loss 0.3588309 +| epoch 6 | 2823/ 8400 batches | train loss 0.4028284 +| epoch 6 | 2827/ 8400 batches | train loss 0.4141209 +| epoch 6 | 2831/ 8400 batches | train loss 0.4462744 +| epoch 6 | 2835/ 8400 batches | train loss 0.3922612 +| epoch 6 | 2839/ 8400 batches | train loss 0.3910959 +| epoch 6 | 2843/ 8400 batches | train loss 0.4484468 +| epoch 6 | 2847/ 8400 batches | train loss 0.3289439 +| epoch 6 | 2851/ 8400 batches | train loss 0.3278236 +| epoch 6 | 2855/ 8400 batches | train loss 0.3967854 +| epoch 6 | 2859/ 8400 batches | train loss 0.3974120 +| epoch 6 | 2863/ 8400 batches | train loss 0.3220455 +| epoch 6 | 2867/ 8400 batches | train loss 0.4187844 +| epoch 6 | 2871/ 8400 batches | train loss 0.3379368 +| epoch 6 | 2875/ 8400 batches | train loss 0.3175934 +| epoch 6 | 2879/ 8400 batches | train loss 0.3388927 +| epoch 6 | 2883/ 8400 batches | train loss 0.3722452 +| epoch 6 | 2887/ 8400 batches | train loss 0.3922285 +| epoch 6 | 2891/ 8400 batches | train loss 0.3785050 +| epoch 6 | 2895/ 8400 batches | train loss 0.4370528 +| epoch 6 | 2899/ 8400 batches | train loss 0.3726029 +| epoch 6 | 2903/ 8400 batches | train loss 0.3477281 +| epoch 6 | 2907/ 8400 batches | train loss 0.3990147 +| epoch 6 | 2911/ 8400 batches | train loss 0.3910859 +| epoch 6 | 2915/ 8400 batches | train loss 0.2613743 +| epoch 6 | 2919/ 8400 batches | train loss 0.3561942 +| epoch 6 | 2923/ 8400 batches | train loss 0.4136633 +| epoch 6 | 2927/ 8400 batches | train loss 0.3804696 +| epoch 6 | 2931/ 8400 batches | train loss 0.3631109 +| epoch 6 | 2935/ 8400 batches | train loss 0.3155696 +| epoch 6 | 2939/ 8400 batches | train loss 0.3977659 +| epoch 6 | 2943/ 8400 batches | train loss 0.3512739 +| epoch 6 | 2947/ 8400 batches | train loss 0.3667065 +| epoch 6 | 2951/ 8400 batches | train loss 0.3429298 +| epoch 6 | 2955/ 8400 batches | train loss 0.3963854 +| epoch 6 | 2959/ 8400 batches | train loss 0.3156479 +| epoch 6 | 2963/ 8400 batches | train loss 0.3784502 +| epoch 6 | 2967/ 8400 batches | train loss 0.4290815 +| epoch 6 | 2971/ 8400 batches | train loss 0.3799035 +| epoch 6 | 2975/ 8400 batches | train loss 0.4132930 +| epoch 6 | 2979/ 8400 batches | train loss 0.3900768 +| epoch 6 | 2983/ 8400 batches | train loss 0.3053695 +| epoch 6 | 2987/ 8400 batches | train loss 0.3977276 +| epoch 6 | 2991/ 8400 batches | train loss 0.3340902 +| epoch 6 | 2995/ 8400 batches | train loss 0.3732541 +| epoch 6 | 2999/ 8400 batches | train loss 0.3319367 +| epoch 6 | 3003/ 8400 batches | train loss 0.4069648 +| epoch 6 | 3007/ 8400 batches | train loss 0.4095505 +| epoch 6 | 3011/ 8400 batches | train loss 0.4199392 +| epoch 6 | 3015/ 8400 batches | train loss 0.3325007 +| epoch 6 | 3019/ 8400 batches | train loss 0.3947996 +| epoch 6 | 3023/ 8400 batches | train loss 0.3516897 +| epoch 6 | 3027/ 8400 batches | train loss 0.4604917 +| epoch 6 | 3031/ 8400 batches | train loss 0.4085443 +| epoch 6 | 3035/ 8400 batches | train loss 0.3804120 +| epoch 6 | 3039/ 8400 batches | train loss 0.3734235 +| epoch 6 | 3043/ 8400 batches | train loss 0.3814084 +| epoch 6 | 3047/ 8400 batches | train loss 0.3964056 +| epoch 6 | 3051/ 8400 batches | train loss 0.3561759 +| epoch 6 | 3055/ 8400 batches | train loss 0.5312952 +| epoch 6 | 3059/ 8400 batches | train loss 0.3791127 +| epoch 6 | 3063/ 8400 batches | train loss 0.3868120 +| epoch 6 | 3067/ 8400 batches | train loss 0.3476561 +| epoch 6 | 3071/ 8400 batches | train loss 0.4082035 +| epoch 6 | 3075/ 8400 batches | train loss 0.4022284 +| epoch 6 | 3079/ 8400 batches | train loss 0.3788713 +| epoch 6 | 3083/ 8400 batches | train loss 0.3966533 +| epoch 6 | 3087/ 8400 batches | train loss 0.3504944 +| epoch 6 | 3091/ 8400 batches | train loss 0.5087106 +| epoch 6 | 3095/ 8400 batches | train loss 0.3217769 +| epoch 6 | 3099/ 8400 batches | train loss 0.4128506 +| epoch 6 | 3103/ 8400 batches | train loss 0.3691431 +| epoch 6 | 3107/ 8400 batches | train loss 0.3545651 +| epoch 6 | 3111/ 8400 batches | train loss 0.3958930 +| epoch 6 | 3115/ 8400 batches | train loss 0.3352107 +| epoch 6 | 3119/ 8400 batches | train loss 0.3348323 +| epoch 6 | 3123/ 8400 batches | train loss 0.3590440 +| epoch 6 | 3127/ 8400 batches | train loss 0.3563845 +| epoch 6 | 3131/ 8400 batches | train loss 0.3612007 +| epoch 6 | 3135/ 8400 batches | train loss 0.3354456 +| epoch 6 | 3139/ 8400 batches | train loss 0.3978669 +| epoch 6 | 3143/ 8400 batches | train loss 0.4088281 +| epoch 6 | 3147/ 8400 batches | train loss 0.3977339 +| epoch 6 | 3151/ 8400 batches | train loss 0.4152704 +| epoch 6 | 3155/ 8400 batches | train loss 0.3631503 +| epoch 6 | 3159/ 8400 batches | train loss 0.4225859 +| epoch 6 | 3163/ 8400 batches | train loss 0.3973941 +| epoch 6 | 3167/ 8400 batches | train loss 0.3437490 +| epoch 6 | 3171/ 8400 batches | train loss 0.3732437 +| epoch 6 | 3175/ 8400 batches | train loss 0.2924495 +| epoch 6 | 3179/ 8400 batches | train loss 0.4104594 +| epoch 6 | 3183/ 8400 batches | train loss 0.3856660 +| epoch 6 | 3187/ 8400 batches | train loss 0.4717396 +| epoch 6 | 3191/ 8400 batches | train loss 0.3396964 +| epoch 6 | 3195/ 8400 batches | train loss 0.4721447 +| epoch 6 | 3199/ 8400 batches | train loss 0.3865107 +| epoch 6 | 3203/ 8400 batches | train loss 0.2588162 +| epoch 6 | 3207/ 8400 batches | train loss 0.3749889 +| epoch 6 | 3211/ 8400 batches | train loss 0.4141535 +| epoch 6 | 3215/ 8400 batches | train loss 0.3941621 +| epoch 6 | 3219/ 8400 batches | train loss 0.3745869 +| epoch 6 | 3223/ 8400 batches | train loss 0.4301372 +| epoch 6 | 3227/ 8400 batches | train loss 0.3638440 +| epoch 6 | 3231/ 8400 batches | train loss 0.4012358 +| epoch 6 | 3235/ 8400 batches | train loss 0.2855043 +| epoch 6 | 3239/ 8400 batches | train loss 0.3752908 +| epoch 6 | 3243/ 8400 batches | train loss 0.4065607 +| epoch 6 | 3247/ 8400 batches | train loss 0.4828165 +| epoch 6 | 3251/ 8400 batches | train loss 0.4601058 +| epoch 6 | 3255/ 8400 batches | train loss 0.3926914 +| epoch 6 | 3259/ 8400 batches | train loss 0.3592030 +| epoch 6 | 3263/ 8400 batches | train loss 0.4002361 +| epoch 6 | 3267/ 8400 batches | train loss 0.4306309 +| epoch 6 | 3271/ 8400 batches | train loss 0.4180825 +| epoch 6 | 3275/ 8400 batches | train loss 0.4613639 +| epoch 6 | 3279/ 8400 batches | train loss 0.3693062 +| epoch 6 | 3283/ 8400 batches | train loss 0.3506180 +| epoch 6 | 3287/ 8400 batches | train loss 0.4027210 +| epoch 6 | 3291/ 8400 batches | train loss 0.3544867 +| epoch 6 | 3295/ 8400 batches | train loss 0.3275312 +| epoch 6 | 3299/ 8400 batches | train loss 0.3778316 +| epoch 6 | 3303/ 8400 batches | train loss 0.3681740 +| epoch 6 | 3307/ 8400 batches | train loss 0.3232411 +| epoch 6 | 3311/ 8400 batches | train loss 0.3785569 +| epoch 6 | 3315/ 8400 batches | train loss 0.4297013 +| epoch 6 | 3319/ 8400 batches | train loss 0.3906528 +| epoch 6 | 3323/ 8400 batches | train loss 0.3025988 +| epoch 6 | 3327/ 8400 batches | train loss 0.3461002 +| epoch 6 | 3331/ 8400 batches | train loss 0.4131420 +| epoch 6 | 3335/ 8400 batches | train loss 0.3718598 +| epoch 6 | 3339/ 8400 batches | train loss 0.3291724 +| epoch 6 | 3343/ 8400 batches | train loss 0.3686489 +| epoch 6 | 3347/ 8400 batches | train loss 0.3675523 +| epoch 6 | 3351/ 8400 batches | train loss 0.4134918 +| epoch 6 | 3355/ 8400 batches | train loss 0.3774220 +| epoch 6 | 3359/ 8400 batches | train loss 0.3726045 +| epoch 6 | 3363/ 8400 batches | train loss 0.4240785 +| epoch 6 | 3367/ 8400 batches | train loss 0.4156564 +| epoch 6 | 3371/ 8400 batches | train loss 0.3786939 +| epoch 6 | 3375/ 8400 batches | train loss 0.3811470 +| epoch 6 | 3379/ 8400 batches | train loss 0.3566473 +| epoch 6 | 3383/ 8400 batches | train loss 0.2756180 +| epoch 6 | 3387/ 8400 batches | train loss 0.3965399 +| epoch 6 | 3391/ 8400 batches | train loss 0.4027911 +| epoch 6 | 3395/ 8400 batches | train loss 0.3499720 +| epoch 6 | 3399/ 8400 batches | train loss 0.3858563 +| epoch 6 | 3403/ 8400 batches | train loss 0.3657550 +| epoch 6 | 3407/ 8400 batches | train loss 0.3020136 +| epoch 6 | 3411/ 8400 batches | train loss 0.4547547 +| epoch 6 | 3415/ 8400 batches | train loss 0.3955492 +| epoch 6 | 3419/ 8400 batches | train loss 0.3426906 +| epoch 6 | 3423/ 8400 batches | train loss 0.3355265 +| epoch 6 | 3427/ 8400 batches | train loss 0.4095960 +| epoch 6 | 3431/ 8400 batches | train loss 0.3495445 +| epoch 6 | 3435/ 8400 batches | train loss 0.4714261 +| epoch 6 | 3439/ 8400 batches | train loss 0.4010751 +| epoch 6 | 3443/ 8400 batches | train loss 0.2779074 +| epoch 6 | 3447/ 8400 batches | train loss 0.2883731 +| epoch 6 | 3451/ 8400 batches | train loss 0.4161372 +| epoch 6 | 3455/ 8400 batches | train loss 0.3097533 +| epoch 6 | 3459/ 8400 batches | train loss 0.3206940 +| epoch 6 | 3463/ 8400 batches | train loss 0.3497109 +| epoch 6 | 3467/ 8400 batches | train loss 0.3327427 +| epoch 6 | 3471/ 8400 batches | train loss 0.3305091 +| epoch 6 | 3475/ 8400 batches | train loss 0.3630329 +| epoch 6 | 3479/ 8400 batches | train loss 0.3911607 +| epoch 6 | 3483/ 8400 batches | train loss 0.4052093 +| epoch 6 | 3487/ 8400 batches | train loss 0.3143023 +| epoch 6 | 3491/ 8400 batches | train loss 0.3492974 +| epoch 6 | 3495/ 8400 batches | train loss 0.4284600 +| epoch 6 | 3499/ 8400 batches | train loss 0.4228816 +| epoch 6 | 3503/ 8400 batches | train loss 0.3775762 +| epoch 6 | 3507/ 8400 batches | train loss 0.3544211 +| epoch 6 | 3511/ 8400 batches | train loss 0.4968119 +| epoch 6 | 3515/ 8400 batches | train loss 0.3791667 +| epoch 6 | 3519/ 8400 batches | train loss 0.3787131 +| epoch 6 | 3523/ 8400 batches | train loss 0.4525312 +| epoch 6 | 3527/ 8400 batches | train loss 0.4292699 +| epoch 6 | 3531/ 8400 batches | train loss 0.3841735 +| epoch 6 | 3535/ 8400 batches | train loss 0.4038823 +| epoch 6 | 3539/ 8400 batches | train loss 0.4179265 +| epoch 6 | 3543/ 8400 batches | train loss 0.3995044 +| epoch 6 | 3547/ 8400 batches | train loss 0.3830502 +| epoch 6 | 3551/ 8400 batches | train loss 0.3616856 +| epoch 6 | 3555/ 8400 batches | train loss 0.2857198 +| epoch 6 | 3559/ 8400 batches | train loss 0.3788397 +| epoch 6 | 3563/ 8400 batches | train loss 0.3870154 +| epoch 6 | 3567/ 8400 batches | train loss 0.3106275 +| epoch 6 | 3571/ 8400 batches | train loss 0.3790486 +| epoch 6 | 3575/ 8400 batches | train loss 0.3274512 +| epoch 6 | 3579/ 8400 batches | train loss 0.3111285 +| epoch 6 | 3583/ 8400 batches | train loss 0.4186245 +| epoch 6 | 3587/ 8400 batches | train loss 0.4455281 +| epoch 6 | 3591/ 8400 batches | train loss 0.4315036 +| epoch 6 | 3595/ 8400 batches | train loss 0.3332424 +| epoch 6 | 3599/ 8400 batches | train loss 0.4168085 +| epoch 6 | 3603/ 8400 batches | train loss 0.3983376 +| epoch 6 | 3607/ 8400 batches | train loss 0.4115530 +| epoch 6 | 3611/ 8400 batches | train loss 0.3460740 +| epoch 6 | 3615/ 8400 batches | train loss 0.3975678 +| epoch 6 | 3619/ 8400 batches | train loss 0.4543047 +| epoch 6 | 3623/ 8400 batches | train loss 0.4273826 +| epoch 6 | 3627/ 8400 batches | train loss 0.4457090 +| epoch 6 | 3631/ 8400 batches | train loss 0.3201164 +| epoch 6 | 3635/ 8400 batches | train loss 0.3432966 +| epoch 6 | 3639/ 8400 batches | train loss 0.3821166 +| epoch 6 | 3643/ 8400 batches | train loss 0.3991682 +| epoch 6 | 3647/ 8400 batches | train loss 0.4336587 +| epoch 6 | 3651/ 8400 batches | train loss 0.3880632 +| epoch 6 | 3655/ 8400 batches | train loss 0.4085022 +| epoch 6 | 3659/ 8400 batches | train loss 0.4457132 +| epoch 6 | 3663/ 8400 batches | train loss 0.4169230 +| epoch 6 | 3667/ 8400 batches | train loss 0.3786964 +| epoch 6 | 3671/ 8400 batches | train loss 0.3473717 +| epoch 6 | 3675/ 8400 batches | train loss 0.3137372 +| epoch 6 | 3679/ 8400 batches | train loss 0.3949913 +| epoch 6 | 3683/ 8400 batches | train loss 0.3949203 +| epoch 6 | 3687/ 8400 batches | train loss 0.3544825 +| epoch 6 | 3691/ 8400 batches | train loss 0.4039858 +| epoch 6 | 3695/ 8400 batches | train loss 0.4547455 +| epoch 6 | 3699/ 8400 batches | train loss 0.3857957 +| epoch 6 | 3703/ 8400 batches | train loss 0.4189586 +| epoch 6 | 3707/ 8400 batches | train loss 0.1500060 +| epoch 6 | 3711/ 8400 batches | train loss 0.4281920 +| epoch 6 | 3715/ 8400 batches | train loss 0.4273005 +| epoch 6 | 3719/ 8400 batches | train loss 0.4793123 +| epoch 6 | 3723/ 8400 batches | train loss 0.3630088 +| epoch 6 | 3727/ 8400 batches | train loss 0.3249503 +| epoch 6 | 3731/ 8400 batches | train loss 0.4280105 +| epoch 6 | 3735/ 8400 batches | train loss 0.3696775 +| epoch 6 | 3739/ 8400 batches | train loss 0.3849202 +| epoch 6 | 3743/ 8400 batches | train loss 0.3626400 +| epoch 6 | 3747/ 8400 batches | train loss 0.3269011 +| epoch 6 | 3751/ 8400 batches | train loss 0.4075838 +| epoch 6 | 3755/ 8400 batches | train loss 0.3851068 +| epoch 6 | 3759/ 8400 batches | train loss 0.4067115 +| epoch 6 | 3763/ 8400 batches | train loss 0.3573119 +| epoch 6 | 3767/ 8400 batches | train loss 0.4638891 +| epoch 6 | 3771/ 8400 batches | train loss 0.3608082 +| epoch 6 | 3775/ 8400 batches | train loss 0.3771321 +| epoch 6 | 3779/ 8400 batches | train loss 0.3852474 +| epoch 6 | 3783/ 8400 batches | train loss 0.3771652 +| epoch 6 | 3787/ 8400 batches | train loss 0.3970221 +| epoch 6 | 3791/ 8400 batches | train loss 0.3934968 +| epoch 6 | 3795/ 8400 batches | train loss 0.3118501 +| epoch 6 | 3799/ 8400 batches | train loss 0.4213452 +| epoch 6 | 3803/ 8400 batches | train loss 0.3380627 +| epoch 6 | 3807/ 8400 batches | train loss 0.4043552 +| epoch 6 | 3811/ 8400 batches | train loss 0.4031907 +| epoch 6 | 3815/ 8400 batches | train loss 0.3634279 +| epoch 6 | 3819/ 8400 batches | train loss 0.3225385 +| epoch 6 | 3823/ 8400 batches | train loss 0.4025735 +| epoch 6 | 3827/ 8400 batches | train loss 0.3766272 +| epoch 6 | 3831/ 8400 batches | train loss 0.4205393 +| epoch 6 | 3835/ 8400 batches | train loss 0.4311492 +| epoch 6 | 3839/ 8400 batches | train loss 0.3519812 +| epoch 6 | 3843/ 8400 batches | train loss 0.3854932 +| epoch 6 | 3847/ 8400 batches | train loss 0.3635848 +| epoch 6 | 3851/ 8400 batches | train loss 0.4292648 +| epoch 6 | 3855/ 8400 batches | train loss 0.3640117 +| epoch 6 | 3859/ 8400 batches | train loss 0.3851835 +| epoch 6 | 3863/ 8400 batches | train loss 0.3728839 +| epoch 6 | 3867/ 8400 batches | train loss 0.4817642 +| epoch 6 | 3871/ 8400 batches | train loss 0.3751074 +| epoch 6 | 3875/ 8400 batches | train loss 0.3898902 +| epoch 6 | 3879/ 8400 batches | train loss 0.3426126 +| epoch 6 | 3883/ 8400 batches | train loss 0.3041723 +| epoch 6 | 3887/ 8400 batches | train loss 0.3954700 +| epoch 6 | 3891/ 8400 batches | train loss 0.3609012 +| epoch 6 | 3895/ 8400 batches | train loss 0.4226529 +| epoch 6 | 3899/ 8400 batches | train loss 0.3795786 +| epoch 6 | 3903/ 8400 batches | train loss 0.3686525 +| epoch 6 | 3907/ 8400 batches | train loss 0.3203247 +| epoch 6 | 3911/ 8400 batches | train loss 0.3856835 +| epoch 6 | 3915/ 8400 batches | train loss 0.3729440 +| epoch 6 | 3919/ 8400 batches | train loss 0.3145407 +| epoch 6 | 3923/ 8400 batches | train loss 0.3417013 +| epoch 6 | 3927/ 8400 batches | train loss 0.4246038 +| epoch 6 | 3931/ 8400 batches | train loss 0.4173471 +| epoch 6 | 3935/ 8400 batches | train loss 0.4367938 +| epoch 6 | 3939/ 8400 batches | train loss 0.3857347 +| epoch 6 | 3943/ 8400 batches | train loss 0.3469152 +| epoch 6 | 3947/ 8400 batches | train loss 0.3819944 +| epoch 6 | 3951/ 8400 batches | train loss 0.4628878 +| epoch 6 | 3955/ 8400 batches | train loss 0.4350969 +| epoch 6 | 3959/ 8400 batches | train loss 0.3454589 +| epoch 6 | 3963/ 8400 batches | train loss 0.2930520 +| epoch 6 | 3967/ 8400 batches | train loss 0.3543584 +| epoch 6 | 3971/ 8400 batches | train loss 0.3794253 +| epoch 6 | 3975/ 8400 batches | train loss 0.3466846 +| epoch 6 | 3979/ 8400 batches | train loss 0.3168795 +| epoch 6 | 3983/ 8400 batches | train loss 0.3982331 +| epoch 6 | 3987/ 8400 batches | train loss 0.4040605 +| epoch 6 | 3991/ 8400 batches | train loss 0.3594670 +| epoch 6 | 3995/ 8400 batches | train loss 0.3579492 +| epoch 6 | 3999/ 8400 batches | train loss 0.5200960 +| epoch 6 | 4003/ 8400 batches | train loss 0.3917133 +| epoch 6 | 4007/ 8400 batches | train loss 0.3382094 +| epoch 6 | 4011/ 8400 batches | train loss 0.3549999 +| epoch 6 | 4015/ 8400 batches | train loss 0.4351635 +| epoch 6 | 4019/ 8400 batches | train loss 0.4535193 +| epoch 6 | 4023/ 8400 batches | train loss 0.3877876 +| epoch 6 | 4027/ 8400 batches | train loss 0.3780335 +| epoch 6 | 4031/ 8400 batches | train loss 0.4031090 +| epoch 6 | 4035/ 8400 batches | train loss 0.3941100 +| epoch 6 | 4039/ 8400 batches | train loss 0.3295478 +| epoch 6 | 4043/ 8400 batches | train loss 0.3687857 +| epoch 6 | 4047/ 8400 batches | train loss 0.3171976 +| epoch 6 | 4051/ 8400 batches | train loss 0.4258868 +| epoch 6 | 4055/ 8400 batches | train loss 0.4084205 +| epoch 6 | 4059/ 8400 batches | train loss 0.2882878 +| epoch 6 | 4063/ 8400 batches | train loss 0.3294193 +| epoch 6 | 4067/ 8400 batches | train loss 0.3043748 +| epoch 6 | 4071/ 8400 batches | train loss 0.3414123 +| epoch 6 | 4075/ 8400 batches | train loss 0.3964850 +| epoch 6 | 4079/ 8400 batches | train loss 0.4281236 +| epoch 6 | 4083/ 8400 batches | train loss 0.3399978 +| epoch 6 | 4087/ 8400 batches | train loss 0.3995639 +| epoch 6 | 4091/ 8400 batches | train loss 0.3710305 +| epoch 6 | 4095/ 8400 batches | train loss 0.3185555 +| epoch 6 | 4099/ 8400 batches | train loss 0.4300079 +| epoch 6 | 4103/ 8400 batches | train loss 0.4114099 +| epoch 6 | 4107/ 8400 batches | train loss 0.3802589 +| epoch 6 | 4111/ 8400 batches | train loss 0.3479809 +| epoch 6 | 4115/ 8400 batches | train loss 0.3665416 +| epoch 6 | 4119/ 8400 batches | train loss 0.3459125 +| epoch 6 | 4123/ 8400 batches | train loss 0.3285660 +| epoch 6 | 4127/ 8400 batches | train loss 0.4173279 +| epoch 6 | 4131/ 8400 batches | train loss 0.3531892 +| epoch 6 | 4135/ 8400 batches | train loss 0.4203081 +| epoch 6 | 4139/ 8400 batches | train loss 0.3850926 +| epoch 6 | 4143/ 8400 batches | train loss 0.3624882 +| epoch 6 | 4147/ 8400 batches | train loss 0.4186921 +| epoch 6 | 4151/ 8400 batches | train loss 0.4192123 +| epoch 6 | 4155/ 8400 batches | train loss 0.4022630 +| epoch 6 | 4159/ 8400 batches | train loss 0.3902092 +| epoch 6 | 4163/ 8400 batches | train loss 0.4413928 +| epoch 6 | 4167/ 8400 batches | train loss 0.3771021 +| epoch 6 | 4171/ 8400 batches | train loss 0.3748898 +| epoch 6 | 4175/ 8400 batches | train loss 0.4127778 +| epoch 6 | 4179/ 8400 batches | train loss 0.3350445 +| epoch 6 | 4183/ 8400 batches | train loss 0.3844573 +| epoch 6 | 4187/ 8400 batches | train loss 0.3761181 +| epoch 6 | 4191/ 8400 batches | train loss 0.3937004 +| epoch 6 | 4195/ 8400 batches | train loss 0.3395723 +| epoch 6 | 4199/ 8400 batches | train loss 0.3975962 +| epoch 6 | 4203/ 8400 batches | train loss 0.3614867 +| epoch 6 | 4207/ 8400 batches | train loss 0.3329571 +| epoch 6 | 4211/ 8400 batches | train loss 0.3355787 +| epoch 6 | 4215/ 8400 batches | train loss 0.3398037 +| epoch 6 | 4219/ 8400 batches | train loss 0.4308505 +| epoch 6 | 4223/ 8400 batches | train loss 0.4745849 +| epoch 6 | 4227/ 8400 batches | train loss 0.3516873 +| epoch 6 | 4231/ 8400 batches | train loss 0.4524900 +| epoch 6 | 4235/ 8400 batches | train loss 0.3836795 +| epoch 6 | 4239/ 8400 batches | train loss 0.4487073 +| epoch 6 | 4243/ 8400 batches | train loss 0.4355438 +| epoch 6 | 4247/ 8400 batches | train loss 0.3568616 +| epoch 6 | 4251/ 8400 batches | train loss 0.3578880 +| epoch 6 | 4255/ 8400 batches | train loss 0.3555329 +| epoch 6 | 4259/ 8400 batches | train loss 0.4208301 +| epoch 6 | 4263/ 8400 batches | train loss 0.4355893 +| epoch 6 | 4267/ 8400 batches | train loss 0.4538588 +| epoch 6 | 4271/ 8400 batches | train loss 0.3572527 +| epoch 6 | 4275/ 8400 batches | train loss 0.3870065 +| epoch 6 | 4279/ 8400 batches | train loss 0.3990908 +| epoch 6 | 4283/ 8400 batches | train loss 0.3976664 +| epoch 6 | 4287/ 8400 batches | train loss 0.3369500 +| epoch 6 | 4291/ 8400 batches | train loss 0.3439788 +| epoch 6 | 4295/ 8400 batches | train loss 0.4253839 +| epoch 6 | 4299/ 8400 batches | train loss 0.3728739 +| epoch 6 | 4303/ 8400 batches | train loss 0.3699558 +| epoch 6 | 4307/ 8400 batches | train loss 0.3489836 +| epoch 6 | 4311/ 8400 batches | train loss 0.3267628 +| epoch 6 | 4315/ 8400 batches | train loss 0.3502835 +| epoch 6 | 4319/ 8400 batches | train loss 0.3428486 +| epoch 6 | 4323/ 8400 batches | train loss 0.3778448 +| epoch 6 | 4327/ 8400 batches | train loss 0.3825391 +| epoch 6 | 4331/ 8400 batches | train loss 0.4091745 +| epoch 6 | 4335/ 8400 batches | train loss 0.3993959 +| epoch 6 | 4339/ 8400 batches | train loss 0.3040802 +| epoch 6 | 4343/ 8400 batches | train loss 0.3865312 +| epoch 6 | 4347/ 8400 batches | train loss 0.3669943 +| epoch 6 | 4351/ 8400 batches | train loss 0.3703445 +| epoch 6 | 4355/ 8400 batches | train loss 0.3614030 +| epoch 6 | 4359/ 8400 batches | train loss 0.4263280 +| epoch 6 | 4363/ 8400 batches | train loss 0.3375278 +| epoch 6 | 4367/ 8400 batches | train loss 0.3811029 +| epoch 6 | 4371/ 8400 batches | train loss 0.4034077 +| epoch 6 | 4375/ 8400 batches | train loss 0.3973401 +| epoch 6 | 4379/ 8400 batches | train loss 0.3321348 +| epoch 6 | 4383/ 8400 batches | train loss 0.4274368 +| epoch 6 | 4387/ 8400 batches | train loss 0.2996148 +| epoch 6 | 4391/ 8400 batches | train loss 0.3774943 +| epoch 6 | 4395/ 8400 batches | train loss 0.3488331 +| epoch 6 | 4399/ 8400 batches | train loss 0.4095691 +| epoch 6 | 4403/ 8400 batches | train loss 0.3604752 +| epoch 6 | 4407/ 8400 batches | train loss 0.4310288 +| epoch 6 | 4411/ 8400 batches | train loss 0.3247057 +| epoch 6 | 4415/ 8400 batches | train loss 0.3115714 +| epoch 6 | 4419/ 8400 batches | train loss 0.4162557 +| epoch 6 | 4423/ 8400 batches | train loss 0.3218368 +| epoch 6 | 4427/ 8400 batches | train loss 0.3381206 +| epoch 6 | 4431/ 8400 batches | train loss 0.3305270 +| epoch 6 | 4435/ 8400 batches | train loss 0.4148757 +| epoch 6 | 4439/ 8400 batches | train loss 0.3947770 +| epoch 6 | 4443/ 8400 batches | train loss 0.3190480 +| epoch 6 | 4447/ 8400 batches | train loss 0.3014966 +| epoch 6 | 4451/ 8400 batches | train loss 0.3826212 +| epoch 6 | 4455/ 8400 batches | train loss 0.4018224 +| epoch 6 | 4459/ 8400 batches | train loss 0.3276713 +| epoch 6 | 4463/ 8400 batches | train loss 0.3584621 +| epoch 6 | 4467/ 8400 batches | train loss 0.2954045 +| epoch 6 | 4471/ 8400 batches | train loss 0.3317247 +| epoch 6 | 4475/ 8400 batches | train loss 0.4355659 +| epoch 6 | 4479/ 8400 batches | train loss 0.3465241 +| epoch 6 | 4483/ 8400 batches | train loss 0.3528768 +| epoch 6 | 4487/ 8400 batches | train loss 0.3751044 +| epoch 6 | 4491/ 8400 batches | train loss 0.4113746 +| epoch 6 | 4495/ 8400 batches | train loss 0.3483689 +| epoch 6 | 4499/ 8400 batches | train loss 0.3432505 +| epoch 6 | 4503/ 8400 batches | train loss 0.3303415 +| epoch 6 | 4507/ 8400 batches | train loss 0.4331588 +| epoch 6 | 4511/ 8400 batches | train loss 0.4570494 +| epoch 6 | 4515/ 8400 batches | train loss 0.3202760 +| epoch 6 | 4519/ 8400 batches | train loss 0.4343647 +| epoch 6 | 4523/ 8400 batches | train loss 0.3122894 +| epoch 6 | 4527/ 8400 batches | train loss 0.3460169 +| epoch 6 | 4531/ 8400 batches | train loss 0.4007831 +| epoch 6 | 4535/ 8400 batches | train loss 0.3723890 +| epoch 6 | 4539/ 8400 batches | train loss 0.4263017 +| epoch 6 | 4543/ 8400 batches | train loss 0.3483475 +| epoch 6 | 4547/ 8400 batches | train loss 0.4348850 +| epoch 6 | 4551/ 8400 batches | train loss 0.3558185 +| epoch 6 | 4555/ 8400 batches | train loss 0.3550449 +| epoch 6 | 4559/ 8400 batches | train loss 0.3610407 +| epoch 6 | 4563/ 8400 batches | train loss 0.3959325 +| epoch 6 | 4567/ 8400 batches | train loss 0.3941556 +| epoch 6 | 4571/ 8400 batches | train loss 0.3703888 +| epoch 6 | 4575/ 8400 batches | train loss 0.4070537 +| epoch 6 | 4579/ 8400 batches | train loss 0.3508672 +| epoch 6 | 4583/ 8400 batches | train loss 0.4518602 +| epoch 6 | 4587/ 8400 batches | train loss 0.3606700 +| epoch 6 | 4591/ 8400 batches | train loss 0.3344762 +| epoch 6 | 4595/ 8400 batches | train loss 0.4081091 +| epoch 6 | 4599/ 8400 batches | train loss 0.3738700 +| epoch 6 | 4603/ 8400 batches | train loss 0.3673090 +| epoch 6 | 4607/ 8400 batches | train loss 0.2681741 +| epoch 6 | 4611/ 8400 batches | train loss 0.3894527 +| epoch 6 | 4615/ 8400 batches | train loss 0.4099751 +| epoch 6 | 4619/ 8400 batches | train loss 0.3966417 +| epoch 6 | 4623/ 8400 batches | train loss 0.4323987 +| epoch 6 | 4627/ 8400 batches | train loss 0.3477507 +| epoch 6 | 4631/ 8400 batches | train loss 0.3516575 +| epoch 6 | 4635/ 8400 batches | train loss 0.3540335 +| epoch 6 | 4639/ 8400 batches | train loss 0.3911645 +| epoch 6 | 4643/ 8400 batches | train loss 0.3671032 +| epoch 6 | 4647/ 8400 batches | train loss 0.3994494 +| epoch 6 | 4651/ 8400 batches | train loss 0.4031128 +| epoch 6 | 4655/ 8400 batches | train loss 0.3903738 +| epoch 6 | 4659/ 8400 batches | train loss 0.4336773 +| epoch 6 | 4663/ 8400 batches | train loss 0.5107158 +| epoch 6 | 4667/ 8400 batches | train loss 0.3300294 +| epoch 6 | 4671/ 8400 batches | train loss 0.4650250 +| epoch 6 | 4675/ 8400 batches | train loss 0.4004206 +| epoch 6 | 4679/ 8400 batches | train loss 0.4065368 +| epoch 6 | 4683/ 8400 batches | train loss 0.4369144 +| epoch 6 | 4687/ 8400 batches | train loss 0.4146429 +| epoch 6 | 4691/ 8400 batches | train loss 0.3935544 +| epoch 6 | 4695/ 8400 batches | train loss 0.3780930 +| epoch 6 | 4699/ 8400 batches | train loss 0.3414654 +| epoch 6 | 4703/ 8400 batches | train loss 0.3740381 +| epoch 6 | 4707/ 8400 batches | train loss 0.4980728 +| epoch 6 | 4711/ 8400 batches | train loss 0.4367292 +| epoch 6 | 4715/ 8400 batches | train loss 0.3774862 +| epoch 6 | 4719/ 8400 batches | train loss 0.3839169 +| epoch 6 | 4723/ 8400 batches | train loss 0.3848118 +| epoch 6 | 4727/ 8400 batches | train loss 0.3889146 +| epoch 6 | 4731/ 8400 batches | train loss 0.4554887 +| epoch 6 | 4735/ 8400 batches | train loss 0.3845080 +| epoch 6 | 4739/ 8400 batches | train loss 0.3713889 +| epoch 6 | 4743/ 8400 batches | train loss 0.3735835 +| epoch 6 | 4747/ 8400 batches | train loss 0.4174441 +| epoch 6 | 4751/ 8400 batches | train loss 0.2990752 +| epoch 6 | 4755/ 8400 batches | train loss 0.4429635 +| epoch 6 | 4759/ 8400 batches | train loss 0.3710066 +| epoch 6 | 4763/ 8400 batches | train loss 0.3882859 +| epoch 6 | 4767/ 8400 batches | train loss 0.3691804 +| epoch 6 | 4771/ 8400 batches | train loss 0.4828988 +| epoch 6 | 4775/ 8400 batches | train loss 0.4046160 +| epoch 6 | 4779/ 8400 batches | train loss 0.3163283 +| epoch 6 | 4783/ 8400 batches | train loss 0.3872398 +| epoch 6 | 4787/ 8400 batches | train loss 0.3146081 +| epoch 6 | 4791/ 8400 batches | train loss 0.3660907 +| epoch 6 | 4795/ 8400 batches | train loss 0.4007273 +| epoch 6 | 4799/ 8400 batches | train loss 0.4469780 +| epoch 6 | 4803/ 8400 batches | train loss 0.4102057 +| epoch 6 | 4807/ 8400 batches | train loss 0.4131575 +| epoch 6 | 4811/ 8400 batches | train loss 0.4237184 +| epoch 6 | 4815/ 8400 batches | train loss 0.4108896 +| epoch 6 | 4819/ 8400 batches | train loss 0.4071476 +| epoch 6 | 4823/ 8400 batches | train loss 0.3718667 +| epoch 6 | 4827/ 8400 batches | train loss 0.3820910 +| epoch 6 | 4831/ 8400 batches | train loss 0.3193443 +| epoch 6 | 4835/ 8400 batches | train loss 0.4503652 +| epoch 6 | 4839/ 8400 batches | train loss 0.3498632 +| epoch 6 | 4843/ 8400 batches | train loss 0.3489192 +| epoch 6 | 4847/ 8400 batches | train loss 0.2947499 +| epoch 6 | 4851/ 8400 batches | train loss 0.3619079 +| epoch 6 | 4855/ 8400 batches | train loss 0.4049637 +| epoch 6 | 4859/ 8400 batches | train loss 0.3453249 +| epoch 6 | 4863/ 8400 batches | train loss 0.4150019 +| epoch 6 | 4867/ 8400 batches | train loss 0.3346932 +| epoch 6 | 4871/ 8400 batches | train loss 0.3725052 +| epoch 6 | 4875/ 8400 batches | train loss 0.3558587 +| epoch 6 | 4879/ 8400 batches | train loss 0.4756227 +| epoch 6 | 4883/ 8400 batches | train loss 0.3241643 +| epoch 6 | 4887/ 8400 batches | train loss 0.3518271 +| epoch 6 | 4891/ 8400 batches | train loss 0.3319920 +| epoch 6 | 4895/ 8400 batches | train loss 0.3471580 +| epoch 6 | 4899/ 8400 batches | train loss 0.3446162 +| epoch 6 | 4903/ 8400 batches | train loss 0.3364869 +| epoch 6 | 4907/ 8400 batches | train loss 0.3575824 +| epoch 6 | 4911/ 8400 batches | train loss 0.3718877 +| epoch 6 | 4915/ 8400 batches | train loss 0.3933449 +| epoch 6 | 4919/ 8400 batches | train loss 0.3959102 +| epoch 6 | 4923/ 8400 batches | train loss 0.3380149 +| epoch 6 | 4927/ 8400 batches | train loss 0.2948716 +| epoch 6 | 4931/ 8400 batches | train loss 0.3888232 +| epoch 6 | 4935/ 8400 batches | train loss 0.3464820 +| epoch 6 | 4939/ 8400 batches | train loss 0.3973018 +| epoch 6 | 4943/ 8400 batches | train loss 0.3434257 +| epoch 6 | 4947/ 8400 batches | train loss 0.3744529 +| epoch 6 | 4951/ 8400 batches | train loss 0.3317397 +| epoch 6 | 4955/ 8400 batches | train loss 0.3590605 +| epoch 6 | 4959/ 8400 batches | train loss 0.4336797 +| epoch 6 | 4963/ 8400 batches | train loss 0.3738235 +| epoch 6 | 4967/ 8400 batches | train loss 0.4318476 +| epoch 6 | 4971/ 8400 batches | train loss 0.3947587 +| epoch 6 | 4975/ 8400 batches | train loss 0.3658161 +| epoch 6 | 4979/ 8400 batches | train loss 0.3404060 +| epoch 6 | 4983/ 8400 batches | train loss 0.3411745 +| epoch 6 | 4987/ 8400 batches | train loss 0.3617771 +| epoch 6 | 4991/ 8400 batches | train loss 0.4678112 +| epoch 6 | 4995/ 8400 batches | train loss 0.3387039 +| epoch 6 | 4999/ 8400 batches | train loss 0.3874382 +| epoch 6 | 5003/ 8400 batches | train loss 0.1438202 +| epoch 6 | 5007/ 8400 batches | train loss 0.3495806 +| epoch 6 | 5011/ 8400 batches | train loss 0.2992482 +| epoch 6 | 5015/ 8400 batches | train loss 0.4119403 +| epoch 6 | 5019/ 8400 batches | train loss 0.3947993 +| epoch 6 | 5023/ 8400 batches | train loss 0.4092413 +| epoch 6 | 5027/ 8400 batches | train loss 0.4138483 +| epoch 6 | 5031/ 8400 batches | train loss 0.3981889 +| epoch 6 | 5035/ 8400 batches | train loss 0.3839625 +| epoch 6 | 5039/ 8400 batches | train loss 0.3963377 +| epoch 6 | 5043/ 8400 batches | train loss 0.4185030 +| epoch 6 | 5047/ 8400 batches | train loss 0.3772455 +| epoch 6 | 5051/ 8400 batches | train loss 0.3426241 +| epoch 6 | 5055/ 8400 batches | train loss 0.3373615 +| epoch 6 | 5059/ 8400 batches | train loss 0.4056106 +| epoch 6 | 5063/ 8400 batches | train loss 0.3844571 +| epoch 6 | 5067/ 8400 batches | train loss 0.4217515 +| epoch 6 | 5071/ 8400 batches | train loss 0.3916827 +| epoch 6 | 5075/ 8400 batches | train loss 0.4074584 +| epoch 6 | 5079/ 8400 batches | train loss 0.2825610 +| epoch 6 | 5083/ 8400 batches | train loss 0.4339542 +| epoch 6 | 5087/ 8400 batches | train loss 0.3651901 +| epoch 6 | 5091/ 8400 batches | train loss 0.3938840 +| epoch 6 | 5095/ 8400 batches | train loss 0.3243107 +| epoch 6 | 5099/ 8400 batches | train loss 0.4483604 +| epoch 6 | 5103/ 8400 batches | train loss 0.4155223 +| epoch 6 | 5107/ 8400 batches | train loss 0.2851072 +| epoch 6 | 5111/ 8400 batches | train loss 0.3120012 +| epoch 6 | 5115/ 8400 batches | train loss 0.4661651 +| epoch 6 | 5119/ 8400 batches | train loss 0.4371605 +| epoch 6 | 5123/ 8400 batches | train loss 0.4288570 +| epoch 6 | 5127/ 8400 batches | train loss 0.3781890 +| epoch 6 | 5131/ 8400 batches | train loss 0.3230291 +| epoch 6 | 5135/ 8400 batches | train loss 0.3315010 +| epoch 6 | 5139/ 8400 batches | train loss 0.4018600 +| epoch 6 | 5143/ 8400 batches | train loss 0.3496967 +| epoch 6 | 5147/ 8400 batches | train loss 0.3865141 +| epoch 6 | 5151/ 8400 batches | train loss 0.3284498 +| epoch 6 | 5155/ 8400 batches | train loss 0.3589478 +| epoch 6 | 5159/ 8400 batches | train loss 0.4090552 +| epoch 6 | 5163/ 8400 batches | train loss 0.3766684 +| epoch 6 | 5167/ 8400 batches | train loss 0.3902973 +| epoch 6 | 5171/ 8400 batches | train loss 0.3137191 +| epoch 6 | 5175/ 8400 batches | train loss 0.4735994 +| epoch 6 | 5179/ 8400 batches | train loss 0.3784885 +| epoch 6 | 5183/ 8400 batches | train loss 0.3691243 +| epoch 6 | 5187/ 8400 batches | train loss 0.4304731 +| epoch 6 | 5191/ 8400 batches | train loss 0.4168028 +| epoch 6 | 5195/ 8400 batches | train loss 0.4061893 +| epoch 6 | 5199/ 8400 batches | train loss 0.3327205 +| epoch 6 | 5203/ 8400 batches | train loss 0.4039166 +| epoch 6 | 5207/ 8400 batches | train loss 0.3282452 +| epoch 6 | 5211/ 8400 batches | train loss 0.4279328 +| epoch 6 | 5215/ 8400 batches | train loss 0.4329898 +| epoch 6 | 5219/ 8400 batches | train loss 0.3366346 +| epoch 6 | 5223/ 8400 batches | train loss 0.3353082 +| epoch 6 | 5227/ 8400 batches | train loss 0.3456671 +| epoch 6 | 5231/ 8400 batches | train loss 0.3855256 +| epoch 6 | 5235/ 8400 batches | train loss 0.2281224 +| epoch 6 | 5239/ 8400 batches | train loss 0.3689882 +| epoch 6 | 5243/ 8400 batches | train loss 0.3138663 +| epoch 6 | 5247/ 8400 batches | train loss 0.4441478 +| epoch 6 | 5251/ 8400 batches | train loss 0.4052568 +| epoch 6 | 5255/ 8400 batches | train loss 0.3708320 +| epoch 6 | 5259/ 8400 batches | train loss 0.3819475 +| epoch 6 | 5263/ 8400 batches | train loss 0.3759818 +| epoch 6 | 5267/ 8400 batches | train loss 0.4979995 +| epoch 6 | 5271/ 8400 batches | train loss 0.3768259 +| epoch 6 | 5275/ 8400 batches | train loss 0.3190839 +| epoch 6 | 5279/ 8400 batches | train loss 0.2994760 +| epoch 6 | 5283/ 8400 batches | train loss 0.3621801 +| epoch 6 | 5287/ 8400 batches | train loss 0.4348982 +| epoch 6 | 5291/ 8400 batches | train loss 0.3538120 +| epoch 6 | 5295/ 8400 batches | train loss 0.3523566 +| epoch 6 | 5299/ 8400 batches | train loss 0.3375969 +| epoch 6 | 5303/ 8400 batches | train loss 0.4327372 +| epoch 6 | 5307/ 8400 batches | train loss 0.4502315 +| epoch 6 | 5311/ 8400 batches | train loss 0.4366744 +| epoch 6 | 5315/ 8400 batches | train loss 0.3750056 +| epoch 6 | 5319/ 8400 batches | train loss 0.3967808 +| epoch 6 | 5323/ 8400 batches | train loss 0.3581738 +| epoch 6 | 5327/ 8400 batches | train loss 0.4027151 +| epoch 6 | 5331/ 8400 batches | train loss 0.4221028 +| epoch 6 | 5335/ 8400 batches | train loss 0.3070931 +| epoch 6 | 5339/ 8400 batches | train loss 0.3708523 +| epoch 6 | 5343/ 8400 batches | train loss 0.3259406 +| epoch 6 | 5347/ 8400 batches | train loss 0.2696053 +| epoch 6 | 5351/ 8400 batches | train loss 0.4139265 +| epoch 6 | 5355/ 8400 batches | train loss 0.3560708 +| epoch 6 | 5359/ 8400 batches | train loss 0.2530756 +| epoch 6 | 5363/ 8400 batches | train loss 0.3361298 +| epoch 6 | 5367/ 8400 batches | train loss 0.3618787 +| epoch 6 | 5371/ 8400 batches | train loss 0.3340873 +| epoch 6 | 5375/ 8400 batches | train loss 0.3258800 +| epoch 6 | 5379/ 8400 batches | train loss 0.4077645 +| epoch 6 | 5383/ 8400 batches | train loss 0.3669410 +| epoch 6 | 5387/ 8400 batches | train loss 0.3660403 +| epoch 6 | 5391/ 8400 batches | train loss 0.4128209 +| epoch 6 | 5395/ 8400 batches | train loss 0.3803446 +| epoch 6 | 5399/ 8400 batches | train loss 0.3675069 +| epoch 6 | 5403/ 8400 batches | train loss 0.3457048 +| epoch 6 | 5407/ 8400 batches | train loss 0.3484460 +| epoch 6 | 5411/ 8400 batches | train loss 0.4187232 +| epoch 6 | 5415/ 8400 batches | train loss 0.3408754 +| epoch 6 | 5419/ 8400 batches | train loss 0.3701492 +| epoch 6 | 5423/ 8400 batches | train loss 0.4477451 +| epoch 6 | 5427/ 8400 batches | train loss 0.3479084 +| epoch 6 | 5431/ 8400 batches | train loss 0.3722733 +| epoch 6 | 5435/ 8400 batches | train loss 0.3288217 +| epoch 6 | 5439/ 8400 batches | train loss 0.4724792 +| epoch 6 | 5443/ 8400 batches | train loss 0.3949789 +| epoch 6 | 5447/ 8400 batches | train loss 0.4002167 +| epoch 6 | 5451/ 8400 batches | train loss 0.3442421 +| epoch 6 | 5455/ 8400 batches | train loss 0.3486437 +| epoch 6 | 5459/ 8400 batches | train loss 0.3321018 +| epoch 6 | 5463/ 8400 batches | train loss 0.3160405 +| epoch 6 | 5467/ 8400 batches | train loss 0.3538842 +| epoch 6 | 5471/ 8400 batches | train loss 0.3916158 +| epoch 6 | 5475/ 8400 batches | train loss 0.4420550 +| epoch 6 | 5479/ 8400 batches | train loss 0.3863413 +| epoch 6 | 5483/ 8400 batches | train loss 0.2940393 +| epoch 6 | 5487/ 8400 batches | train loss 0.3757448 +| epoch 6 | 5491/ 8400 batches | train loss 0.3759554 +| epoch 6 | 5495/ 8400 batches | train loss 0.3576173 +| epoch 6 | 5499/ 8400 batches | train loss 0.3528277 +| epoch 6 | 5503/ 8400 batches | train loss 0.3994429 +| epoch 6 | 5507/ 8400 batches | train loss 0.4070846 +| epoch 6 | 5511/ 8400 batches | train loss 0.3271375 +| epoch 6 | 5515/ 8400 batches | train loss 0.3522975 +| epoch 6 | 5519/ 8400 batches | train loss 0.4190377 +| epoch 6 | 5523/ 8400 batches | train loss 0.3754555 +| epoch 6 | 5527/ 8400 batches | train loss 0.3286244 +| epoch 6 | 5531/ 8400 batches | train loss 0.3182019 +| epoch 6 | 5535/ 8400 batches | train loss 0.3742472 +| epoch 6 | 5539/ 8400 batches | train loss 0.3237016 +| epoch 6 | 5543/ 8400 batches | train loss 0.3365483 +| epoch 6 | 5547/ 8400 batches | train loss 0.3319282 +| epoch 6 | 5551/ 8400 batches | train loss 0.4290338 +| epoch 6 | 5555/ 8400 batches | train loss 0.3432348 +| epoch 6 | 5559/ 8400 batches | train loss 0.3886164 +| epoch 6 | 5563/ 8400 batches | train loss 0.4090999 +| epoch 6 | 5567/ 8400 batches | train loss 0.3452067 +| epoch 6 | 5571/ 8400 batches | train loss 0.2562705 +| epoch 6 | 5575/ 8400 batches | train loss 0.3819045 +| epoch 6 | 5579/ 8400 batches | train loss 0.4020323 +| epoch 6 | 5583/ 8400 batches | train loss 0.3536893 +| epoch 6 | 5587/ 8400 batches | train loss 0.3351574 +| epoch 6 | 5591/ 8400 batches | train loss 0.4214056 +| epoch 6 | 5595/ 8400 batches | train loss 0.3549184 +| epoch 6 | 5599/ 8400 batches | train loss 0.4352523 +| epoch 6 | 5603/ 8400 batches | train loss 0.3616571 +| epoch 6 | 5607/ 8400 batches | train loss 0.3710310 +| epoch 6 | 5611/ 8400 batches | train loss 0.4042631 +| epoch 6 | 5615/ 8400 batches | train loss 0.3922112 +| epoch 6 | 5619/ 8400 batches | train loss 0.4007918 +| epoch 6 | 5623/ 8400 batches | train loss 0.3697961 +| epoch 6 | 5627/ 8400 batches | train loss 0.4165679 +| epoch 6 | 5631/ 8400 batches | train loss 0.3259763 +| epoch 6 | 5635/ 8400 batches | train loss 0.3359147 +| epoch 6 | 5639/ 8400 batches | train loss 0.3833855 +| epoch 6 | 5643/ 8400 batches | train loss 0.3377919 +| epoch 6 | 5647/ 8400 batches | train loss 0.4215222 +| epoch 6 | 5651/ 8400 batches | train loss 0.3883696 +| epoch 6 | 5655/ 8400 batches | train loss 0.3664129 +| epoch 6 | 5659/ 8400 batches | train loss 0.3194180 +| epoch 6 | 5663/ 8400 batches | train loss 0.4552382 +| epoch 6 | 5667/ 8400 batches | train loss 0.3811074 +| epoch 6 | 5671/ 8400 batches | train loss 0.3755296 +| epoch 6 | 5675/ 8400 batches | train loss 0.3980089 +| epoch 6 | 5679/ 8400 batches | train loss 0.3907476 +| epoch 6 | 5683/ 8400 batches | train loss 0.3615322 +| epoch 6 | 5687/ 8400 batches | train loss 0.3791416 +| epoch 6 | 5691/ 8400 batches | train loss 0.3995268 +| epoch 6 | 5695/ 8400 batches | train loss 0.3500284 +| epoch 6 | 5699/ 8400 batches | train loss 0.3321884 +| epoch 6 | 5703/ 8400 batches | train loss 0.4410642 +| epoch 6 | 5707/ 8400 batches | train loss 0.4750742 +| epoch 6 | 5711/ 8400 batches | train loss 0.4596698 +| epoch 6 | 5715/ 8400 batches | train loss 0.3708929 +| epoch 6 | 5719/ 8400 batches | train loss 0.3838985 +| epoch 6 | 5723/ 8400 batches | train loss 0.3560926 +| epoch 6 | 5727/ 8400 batches | train loss 0.3773698 +| epoch 6 | 5731/ 8400 batches | train loss 0.3863535 +| epoch 6 | 5735/ 8400 batches | train loss 0.3854184 +| epoch 6 | 5739/ 8400 batches | train loss 0.3873889 +| epoch 6 | 5743/ 8400 batches | train loss 0.3765187 +| epoch 6 | 5747/ 8400 batches | train loss 0.4367685 +| epoch 6 | 5751/ 8400 batches | train loss 0.4189932 +| epoch 6 | 5755/ 8400 batches | train loss 0.5204793 +| epoch 6 | 5759/ 8400 batches | train loss 0.4309023 +| epoch 6 | 5763/ 8400 batches | train loss 0.3290609 +| epoch 6 | 5767/ 8400 batches | train loss 0.4112000 +| epoch 6 | 5771/ 8400 batches | train loss 0.4325606 +| epoch 6 | 5775/ 8400 batches | train loss 0.3648731 +| epoch 6 | 5779/ 8400 batches | train loss 0.4603696 +| epoch 6 | 5783/ 8400 batches | train loss 0.3749186 +| epoch 6 | 5787/ 8400 batches | train loss 0.3851742 +| epoch 6 | 5791/ 8400 batches | train loss 0.3696223 +| epoch 6 | 5795/ 8400 batches | train loss 0.4392550 +| epoch 6 | 5799/ 8400 batches | train loss 0.3338831 +| epoch 6 | 5803/ 8400 batches | train loss 0.4803350 +| epoch 6 | 5807/ 8400 batches | train loss 0.4405354 +| epoch 6 | 5811/ 8400 batches | train loss 0.4364749 +| epoch 6 | 5815/ 8400 batches | train loss 0.3441716 +| epoch 6 | 5819/ 8400 batches | train loss 0.4148275 +| epoch 6 | 5823/ 8400 batches | train loss 0.3898729 +| epoch 6 | 5827/ 8400 batches | train loss 0.2963113 +| epoch 6 | 5831/ 8400 batches | train loss 0.3660000 +| epoch 6 | 5835/ 8400 batches | train loss 0.3277713 +| epoch 6 | 5839/ 8400 batches | train loss 0.3996219 +| epoch 6 | 5843/ 8400 batches | train loss 0.3443755 +| epoch 6 | 5847/ 8400 batches | train loss 0.3226949 +| epoch 6 | 5851/ 8400 batches | train loss 0.3228279 +| epoch 6 | 5855/ 8400 batches | train loss 0.4075278 +| epoch 6 | 5859/ 8400 batches | train loss 0.3808901 +| epoch 6 | 5863/ 8400 batches | train loss 0.3451465 +| epoch 6 | 5867/ 8400 batches | train loss 0.3357682 +| epoch 6 | 5871/ 8400 batches | train loss 0.3374276 +| epoch 6 | 5875/ 8400 batches | train loss 0.3946557 +| epoch 6 | 5879/ 8400 batches | train loss 0.4188489 +| epoch 6 | 5883/ 8400 batches | train loss 0.4498705 +| epoch 6 | 5887/ 8400 batches | train loss 0.4246255 +| epoch 6 | 5891/ 8400 batches | train loss 0.3920670 +| epoch 6 | 5895/ 8400 batches | train loss 0.3344772 +| epoch 6 | 5899/ 8400 batches | train loss 0.3280514 +| epoch 6 | 5903/ 8400 batches | train loss 0.3537996 +| epoch 6 | 5907/ 8400 batches | train loss 0.4014885 +| epoch 6 | 5911/ 8400 batches | train loss 0.4078113 +| epoch 6 | 5915/ 8400 batches | train loss 0.3418166 +| epoch 6 | 5919/ 8400 batches | train loss 0.4075298 +| epoch 6 | 5923/ 8400 batches | train loss 0.4220013 +| epoch 6 | 5927/ 8400 batches | train loss 0.3568113 +| epoch 6 | 5931/ 8400 batches | train loss 0.3871388 +| epoch 6 | 5935/ 8400 batches | train loss 0.3370263 +| epoch 6 | 5939/ 8400 batches | train loss 0.3357088 +| epoch 6 | 5943/ 8400 batches | train loss 0.3069610 +| epoch 6 | 5947/ 8400 batches | train loss 0.4127233 +| epoch 6 | 5951/ 8400 batches | train loss 0.3877161 +| epoch 6 | 5955/ 8400 batches | train loss 0.3860028 +| epoch 6 | 5959/ 8400 batches | train loss 0.4257557 +| epoch 6 | 5963/ 8400 batches | train loss 0.3921205 +| epoch 6 | 5967/ 8400 batches | train loss 0.3978844 +| epoch 6 | 5971/ 8400 batches | train loss 0.3497125 +| epoch 6 | 5975/ 8400 batches | train loss 0.2945784 +| epoch 6 | 5979/ 8400 batches | train loss 0.4080750 +| epoch 6 | 5983/ 8400 batches | train loss 0.4138640 +| epoch 6 | 5987/ 8400 batches | train loss 0.3718114 +| epoch 6 | 5991/ 8400 batches | train loss 0.3418494 +| epoch 6 | 5995/ 8400 batches | train loss 0.3739327 +| epoch 6 | 5999/ 8400 batches | train loss 0.3648783 +| epoch 6 | 6003/ 8400 batches | train loss 0.3556022 +| epoch 6 | 6007/ 8400 batches | train loss 0.3906895 +| epoch 6 | 6011/ 8400 batches | train loss 0.3650513 +| epoch 6 | 6015/ 8400 batches | train loss 0.3923947 +| epoch 6 | 6019/ 8400 batches | train loss 0.3818164 +| epoch 6 | 6023/ 8400 batches | train loss 0.3540571 +| epoch 6 | 6027/ 8400 batches | train loss 0.3886832 +| epoch 6 | 6031/ 8400 batches | train loss 0.4325024 +| epoch 6 | 6035/ 8400 batches | train loss 0.3475519 +| epoch 6 | 6039/ 8400 batches | train loss 0.4167368 +| epoch 6 | 6043/ 8400 batches | train loss 0.4139600 +| epoch 6 | 6047/ 8400 batches | train loss 0.3670704 +| epoch 6 | 6051/ 8400 batches | train loss 0.3889236 +| epoch 6 | 6055/ 8400 batches | train loss 0.4340546 +| epoch 6 | 6059/ 8400 batches | train loss 0.3916903 +| epoch 6 | 6063/ 8400 batches | train loss 0.4205534 +| epoch 6 | 6067/ 8400 batches | train loss 0.3520940 +| epoch 6 | 6071/ 8400 batches | train loss 0.3580380 +| epoch 6 | 6075/ 8400 batches | train loss 0.3592917 +| epoch 6 | 6079/ 8400 batches | train loss 0.3665102 +| epoch 6 | 6083/ 8400 batches | train loss 0.3202717 +| epoch 6 | 6087/ 8400 batches | train loss 0.3426207 +| epoch 6 | 6091/ 8400 batches | train loss 0.4280223 +| epoch 6 | 6095/ 8400 batches | train loss 0.4399790 +| epoch 6 | 6099/ 8400 batches | train loss 0.4578980 +| epoch 6 | 6103/ 8400 batches | train loss 0.3880490 +| epoch 6 | 6107/ 8400 batches | train loss 0.4303781 +| epoch 6 | 6111/ 8400 batches | train loss 0.3713272 +| epoch 6 | 6115/ 8400 batches | train loss 0.3238434 +| epoch 6 | 6119/ 8400 batches | train loss 0.4039516 +| epoch 6 | 6123/ 8400 batches | train loss 0.3482292 +| epoch 6 | 6127/ 8400 batches | train loss 0.4042948 +| epoch 6 | 6131/ 8400 batches | train loss 0.3477680 +| epoch 6 | 6135/ 8400 batches | train loss 0.4929043 +| epoch 6 | 6139/ 8400 batches | train loss 0.4346026 +| epoch 6 | 6143/ 8400 batches | train loss 0.3339945 +| epoch 6 | 6147/ 8400 batches | train loss 0.3293174 +| epoch 6 | 6151/ 8400 batches | train loss 0.3773664 +| epoch 6 | 6155/ 8400 batches | train loss 0.3783126 +| epoch 6 | 6159/ 8400 batches | train loss 0.3426182 +| epoch 6 | 6163/ 8400 batches | train loss 0.3404560 +| epoch 6 | 6167/ 8400 batches | train loss 0.4053297 +| epoch 6 | 6171/ 8400 batches | train loss 0.3876569 +| epoch 6 | 6175/ 8400 batches | train loss 0.4005542 +| epoch 6 | 6179/ 8400 batches | train loss 0.4230911 +| epoch 6 | 6183/ 8400 batches | train loss 0.3998631 +| epoch 6 | 6187/ 8400 batches | train loss 0.3178067 +| epoch 6 | 6191/ 8400 batches | train loss 0.3397077 +| epoch 6 | 6195/ 8400 batches | train loss 0.3940915 +| epoch 6 | 6199/ 8400 batches | train loss 0.3834480 +| epoch 6 | 6203/ 8400 batches | train loss 0.3738125 +| epoch 6 | 6207/ 8400 batches | train loss 0.4523168 +| epoch 6 | 6211/ 8400 batches | train loss 0.4008017 +| epoch 6 | 6215/ 8400 batches | train loss 0.4259050 +| epoch 6 | 6219/ 8400 batches | train loss 0.3435380 +| epoch 6 | 6223/ 8400 batches | train loss 0.3098398 +| epoch 6 | 6227/ 8400 batches | train loss 0.4647582 +| epoch 6 | 6231/ 8400 batches | train loss 0.3883741 +| epoch 6 | 6235/ 8400 batches | train loss 0.4512703 +| epoch 6 | 6239/ 8400 batches | train loss 0.3907179 +| epoch 6 | 6243/ 8400 batches | train loss 0.2502961 +| epoch 6 | 6247/ 8400 batches | train loss 0.4133961 +| epoch 6 | 6251/ 8400 batches | train loss 0.3230684 +| epoch 6 | 6255/ 8400 batches | train loss 0.3504889 +| epoch 6 | 6259/ 8400 batches | train loss 0.3529733 +| epoch 6 | 6263/ 8400 batches | train loss 0.3642544 +| epoch 6 | 6267/ 8400 batches | train loss 0.3580608 +| epoch 6 | 6271/ 8400 batches | train loss 0.3654296 +| epoch 6 | 6275/ 8400 batches | train loss 0.3741541 +| epoch 6 | 6279/ 8400 batches | train loss 0.3631340 +| epoch 6 | 6283/ 8400 batches | train loss 0.4097067 +| epoch 6 | 6287/ 8400 batches | train loss 0.3924662 +| epoch 6 | 6291/ 8400 batches | train loss 0.3332335 +| epoch 6 | 6295/ 8400 batches | train loss 0.4141357 +| epoch 6 | 6299/ 8400 batches | train loss 0.3364836 +| epoch 6 | 6303/ 8400 batches | train loss 0.4126774 +| epoch 6 | 6307/ 8400 batches | train loss 0.3083975 +| epoch 6 | 6311/ 8400 batches | train loss 0.4609095 +| epoch 6 | 6315/ 8400 batches | train loss 0.3240078 +| epoch 6 | 6319/ 8400 batches | train loss 0.3214043 +| epoch 6 | 6323/ 8400 batches | train loss 0.3076674 +| epoch 6 | 6327/ 8400 batches | train loss 0.4084157 +| epoch 6 | 6331/ 8400 batches | train loss 0.4157047 +| epoch 6 | 6335/ 8400 batches | train loss 0.4007747 +| epoch 6 | 6339/ 8400 batches | train loss 0.2990264 +| epoch 6 | 6343/ 8400 batches | train loss 0.3348491 +| epoch 6 | 6347/ 8400 batches | train loss 0.3976213 +| epoch 6 | 6351/ 8400 batches | train loss 0.3650198 +| epoch 6 | 6355/ 8400 batches | train loss 0.3812591 +| epoch 6 | 6359/ 8400 batches | train loss 0.3838947 +| epoch 6 | 6363/ 8400 batches | train loss 0.3995168 +| epoch 6 | 6367/ 8400 batches | train loss 0.4428971 +| epoch 6 | 6371/ 8400 batches | train loss 0.4422391 +| epoch 6 | 6375/ 8400 batches | train loss 0.3317412 +| epoch 6 | 6379/ 8400 batches | train loss 0.3506729 +| epoch 6 | 6383/ 8400 batches | train loss 0.3871323 +| epoch 6 | 6387/ 8400 batches | train loss 0.3392584 +| epoch 6 | 6391/ 8400 batches | train loss 0.3800361 +| epoch 6 | 6395/ 8400 batches | train loss 0.3976244 +| epoch 6 | 6399/ 8400 batches | train loss 0.3705254 +| epoch 6 | 6403/ 8400 batches | train loss 0.4200527 +| epoch 6 | 6407/ 8400 batches | train loss 0.3986891 +| epoch 6 | 6411/ 8400 batches | train loss 0.3695697 +| epoch 6 | 6415/ 8400 batches | train loss 0.4604526 +| epoch 6 | 6419/ 8400 batches | train loss 0.3417510 +| epoch 6 | 6423/ 8400 batches | train loss 0.1657537 +| epoch 6 | 6427/ 8400 batches | train loss 0.3364909 +| epoch 6 | 6431/ 8400 batches | train loss 0.3983625 +| epoch 6 | 6435/ 8400 batches | train loss 0.3931912 +| epoch 6 | 6439/ 8400 batches | train loss 0.3968652 +| epoch 6 | 6443/ 8400 batches | train loss 0.3251289 +| epoch 6 | 6447/ 8400 batches | train loss 0.3441336 +| epoch 6 | 6451/ 8400 batches | train loss 0.3466608 +| epoch 6 | 6455/ 8400 batches | train loss 0.3430800 +| epoch 6 | 6459/ 8400 batches | train loss 0.3813345 +| epoch 6 | 6463/ 8400 batches | train loss 0.4419886 +| epoch 6 | 6467/ 8400 batches | train loss 0.4219951 +| epoch 6 | 6471/ 8400 batches | train loss 0.4163164 +| epoch 6 | 6475/ 8400 batches | train loss 0.3535006 +| epoch 6 | 6479/ 8400 batches | train loss 0.3337186 +| epoch 6 | 6483/ 8400 batches | train loss 0.4726553 +| epoch 6 | 6487/ 8400 batches | train loss 0.4326179 +| epoch 6 | 6491/ 8400 batches | train loss 0.3607337 +| epoch 6 | 6495/ 8400 batches | train loss 0.4162238 +| epoch 6 | 6499/ 8400 batches | train loss 0.4074447 +| epoch 6 | 6503/ 8400 batches | train loss 0.4177425 +| epoch 6 | 6507/ 8400 batches | train loss 0.3311414 +| epoch 6 | 6511/ 8400 batches | train loss 0.3178797 +| epoch 6 | 6515/ 8400 batches | train loss 0.3750841 +| epoch 6 | 6519/ 8400 batches | train loss 0.3730117 +| epoch 6 | 6523/ 8400 batches | train loss 0.3960105 +| epoch 6 | 6527/ 8400 batches | train loss 0.3727001 +| epoch 6 | 6531/ 8400 batches | train loss 0.1552652 +| epoch 6 | 6535/ 8400 batches | train loss 0.4206885 +| epoch 6 | 6539/ 8400 batches | train loss 0.4617694 +| epoch 6 | 6543/ 8400 batches | train loss 0.4423963 +| epoch 6 | 6547/ 8400 batches | train loss 0.3463748 +| epoch 6 | 6551/ 8400 batches | train loss 0.3777027 +| epoch 6 | 6555/ 8400 batches | train loss 0.4326711 +| epoch 6 | 6559/ 8400 batches | train loss 0.3458464 +| epoch 6 | 6563/ 8400 batches | train loss 0.3992879 +| epoch 6 | 6567/ 8400 batches | train loss 0.4176648 +| epoch 6 | 6571/ 8400 batches | train loss 0.4322440 +| epoch 6 | 6575/ 8400 batches | train loss 0.3370010 +| epoch 6 | 6579/ 8400 batches | train loss 0.4255217 +| epoch 6 | 6583/ 8400 batches | train loss 0.3271057 +| epoch 6 | 6587/ 8400 batches | train loss 0.3908035 +| epoch 6 | 6591/ 8400 batches | train loss 0.3818752 +| epoch 6 | 6595/ 8400 batches | train loss 0.3555146 +| epoch 6 | 6599/ 8400 batches | train loss 0.4073219 +| epoch 6 | 6603/ 8400 batches | train loss 0.3284090 +| epoch 6 | 6607/ 8400 batches | train loss 0.3639864 +| epoch 6 | 6611/ 8400 batches | train loss 0.3912107 +| epoch 6 | 6615/ 8400 batches | train loss 0.4126142 +| epoch 6 | 6619/ 8400 batches | train loss 0.3603126 +| epoch 6 | 6623/ 8400 batches | train loss 0.3532721 +| epoch 6 | 6627/ 8400 batches | train loss 0.4026565 +| epoch 6 | 6631/ 8400 batches | train loss 0.3244573 +| epoch 6 | 6635/ 8400 batches | train loss 0.2452089 +| epoch 6 | 6639/ 8400 batches | train loss 0.3442505 +| epoch 6 | 6643/ 8400 batches | train loss 0.4475889 +| epoch 6 | 6647/ 8400 batches | train loss 0.3684675 +| epoch 6 | 6651/ 8400 batches | train loss 0.3671975 +| epoch 6 | 6655/ 8400 batches | train loss 0.3543206 +| epoch 6 | 6659/ 8400 batches | train loss 0.3503725 +| epoch 6 | 6663/ 8400 batches | train loss 0.3360605 +| epoch 6 | 6667/ 8400 batches | train loss 0.3791746 +| epoch 6 | 6671/ 8400 batches | train loss 0.4034551 +| epoch 6 | 6675/ 8400 batches | train loss 0.3869084 +| epoch 6 | 6679/ 8400 batches | train loss 0.3781978 +| epoch 6 | 6683/ 8400 batches | train loss 0.3800204 +| epoch 6 | 6687/ 8400 batches | train loss 0.4395344 +| epoch 6 | 6691/ 8400 batches | train loss 0.3313873 +| epoch 6 | 6695/ 8400 batches | train loss 0.3641157 +| epoch 6 | 6699/ 8400 batches | train loss 0.4354197 +| epoch 6 | 6703/ 8400 batches | train loss 0.3676132 +| epoch 6 | 6707/ 8400 batches | train loss 0.4718770 +| epoch 6 | 6711/ 8400 batches | train loss 0.3874162 +| epoch 6 | 6715/ 8400 batches | train loss 0.4874208 +| epoch 6 | 6719/ 8400 batches | train loss 0.3837421 +| epoch 6 | 6723/ 8400 batches | train loss 0.3477507 +| epoch 6 | 6727/ 8400 batches | train loss 0.4251272 +| epoch 6 | 6731/ 8400 batches | train loss 0.3487928 +| epoch 6 | 6735/ 8400 batches | train loss 0.4060708 +| epoch 6 | 6739/ 8400 batches | train loss 0.3196328 +| epoch 6 | 6743/ 8400 batches | train loss 0.3176275 +| epoch 6 | 6747/ 8400 batches | train loss 0.3849761 +| epoch 6 | 6751/ 8400 batches | train loss 0.3602379 +| epoch 6 | 6755/ 8400 batches | train loss 0.3763515 +| epoch 6 | 6759/ 8400 batches | train loss 0.3325038 +| epoch 6 | 6763/ 8400 batches | train loss 0.4625082 +| epoch 6 | 6767/ 8400 batches | train loss 0.3634365 +| epoch 6 | 6771/ 8400 batches | train loss 0.3111908 +| epoch 6 | 6775/ 8400 batches | train loss 0.2421676 +| epoch 6 | 6779/ 8400 batches | train loss 0.3269644 +| epoch 6 | 6783/ 8400 batches | train loss 0.3137474 +| epoch 6 | 6787/ 8400 batches | train loss 0.3637392 +| epoch 6 | 6791/ 8400 batches | train loss 0.3929460 +| epoch 6 | 6795/ 8400 batches | train loss 0.4002490 +| epoch 6 | 6799/ 8400 batches | train loss 0.4054226 +| epoch 6 | 6803/ 8400 batches | train loss 0.3391381 +| epoch 6 | 6807/ 8400 batches | train loss 0.3980359 +| epoch 6 | 6811/ 8400 batches | train loss 0.3146196 +| epoch 6 | 6815/ 8400 batches | train loss 0.3660464 +| epoch 6 | 6819/ 8400 batches | train loss 0.3771735 +| epoch 6 | 6823/ 8400 batches | train loss 0.3611595 +| epoch 6 | 6827/ 8400 batches | train loss 0.4019597 +| epoch 6 | 6831/ 8400 batches | train loss 0.4100222 +| epoch 6 | 6835/ 8400 batches | train loss 0.4442335 +| epoch 6 | 6839/ 8400 batches | train loss 0.4180596 +| epoch 6 | 6843/ 8400 batches | train loss 0.2482554 +| epoch 6 | 6847/ 8400 batches | train loss 0.3655428 +| epoch 6 | 6851/ 8400 batches | train loss 0.3513274 +| epoch 6 | 6855/ 8400 batches | train loss 0.4351763 +| epoch 6 | 6859/ 8400 batches | train loss 0.3230855 +| epoch 6 | 6863/ 8400 batches | train loss 0.4400292 +| epoch 6 | 6867/ 8400 batches | train loss 0.3833303 +| epoch 6 | 6871/ 8400 batches | train loss 0.4336782 +| epoch 6 | 6875/ 8400 batches | train loss 0.4123456 +| epoch 6 | 6879/ 8400 batches | train loss 0.3686807 +| epoch 6 | 6883/ 8400 batches | train loss 0.4056500 +| epoch 6 | 6887/ 8400 batches | train loss 0.3254089 +| epoch 6 | 6891/ 8400 batches | train loss 0.4122716 +| epoch 6 | 6895/ 8400 batches | train loss 0.3231982 +| epoch 6 | 6899/ 8400 batches | train loss 0.4264804 +| epoch 6 | 6903/ 8400 batches | train loss 0.3771579 +| epoch 6 | 6907/ 8400 batches | train loss 0.3801661 +| epoch 6 | 6911/ 8400 batches | train loss 0.3191906 +| epoch 6 | 6915/ 8400 batches | train loss 0.3217516 +| epoch 6 | 6919/ 8400 batches | train loss 0.4120117 +| epoch 6 | 6923/ 8400 batches | train loss 0.3391077 +| epoch 6 | 6927/ 8400 batches | train loss 0.4111813 +| epoch 6 | 6931/ 8400 batches | train loss 0.3771931 +| epoch 6 | 6935/ 8400 batches | train loss 0.3837105 +| epoch 6 | 6939/ 8400 batches | train loss 0.3668392 +| epoch 6 | 6943/ 8400 batches | train loss 0.4000870 +| epoch 6 | 6947/ 8400 batches | train loss 0.3847064 +| epoch 6 | 6951/ 8400 batches | train loss 0.3488519 +| epoch 6 | 6955/ 8400 batches | train loss 0.4788032 +| epoch 6 | 6959/ 8400 batches | train loss 0.3106651 +| epoch 6 | 6963/ 8400 batches | train loss 0.4447287 +| epoch 6 | 6967/ 8400 batches | train loss 0.3983161 +| epoch 6 | 6971/ 8400 batches | train loss 0.3590435 +| epoch 6 | 6975/ 8400 batches | train loss 0.3723275 +| epoch 6 | 6979/ 8400 batches | train loss 0.3544937 +| epoch 6 | 6983/ 8400 batches | train loss 0.3874047 +| epoch 6 | 6987/ 8400 batches | train loss 0.3832420 +| epoch 6 | 6991/ 8400 batches | train loss 0.4050359 +| epoch 6 | 6995/ 8400 batches | train loss 0.3734666 +| epoch 6 | 6999/ 8400 batches | train loss 0.4243060 +| epoch 6 | 7003/ 8400 batches | train loss 0.3517202 +| epoch 6 | 7007/ 8400 batches | train loss 0.3311595 +| epoch 6 | 7011/ 8400 batches | train loss 0.3168406 +| epoch 6 | 7015/ 8400 batches | train loss 0.4579694 +| epoch 6 | 7019/ 8400 batches | train loss 0.4051089 +| epoch 6 | 7023/ 8400 batches | train loss 0.3972024 +| epoch 6 | 7027/ 8400 batches | train loss 0.4263167 +| epoch 6 | 7031/ 8400 batches | train loss 0.3591706 +| epoch 6 | 7035/ 8400 batches | train loss 0.3472209 +| epoch 6 | 7039/ 8400 batches | train loss 0.3640660 +| epoch 6 | 7043/ 8400 batches | train loss 0.3370643 +| epoch 6 | 7047/ 8400 batches | train loss 0.3541386 +| epoch 6 | 7051/ 8400 batches | train loss 0.3455248 +| epoch 6 | 7055/ 8400 batches | train loss 0.3416582 +| epoch 6 | 7059/ 8400 batches | train loss 0.4037220 +| epoch 6 | 7063/ 8400 batches | train loss 0.4395462 +| epoch 6 | 7067/ 8400 batches | train loss 0.4047754 +| epoch 6 | 7071/ 8400 batches | train loss 0.3551019 +| epoch 6 | 7075/ 8400 batches | train loss 0.3903745 +| epoch 6 | 7079/ 8400 batches | train loss 0.4170881 +| epoch 6 | 7083/ 8400 batches | train loss 0.4036525 +| epoch 6 | 7087/ 8400 batches | train loss 0.4507751 +| epoch 6 | 7091/ 8400 batches | train loss 0.4593830 +| epoch 6 | 7095/ 8400 batches | train loss 0.4311958 +| epoch 6 | 7099/ 8400 batches | train loss 0.3494098 +| epoch 6 | 7103/ 8400 batches | train loss 0.3300614 +| epoch 6 | 7107/ 8400 batches | train loss 0.3521779 +| epoch 6 | 7111/ 8400 batches | train loss 0.4278703 +| epoch 6 | 7115/ 8400 batches | train loss 0.3590392 +| epoch 6 | 7119/ 8400 batches | train loss 0.3742370 +| epoch 6 | 7123/ 8400 batches | train loss 0.3073884 +| epoch 6 | 7127/ 8400 batches | train loss 0.2728298 +| epoch 6 | 7131/ 8400 batches | train loss 0.4243740 +| epoch 6 | 7135/ 8400 batches | train loss 0.3997925 +| epoch 6 | 7139/ 8400 batches | train loss 0.3985448 +| epoch 6 | 7143/ 8400 batches | train loss 0.3451787 +| epoch 6 | 7147/ 8400 batches | train loss 0.4726752 +| epoch 6 | 7151/ 8400 batches | train loss 0.3440052 +| epoch 6 | 7155/ 8400 batches | train loss 0.3519142 +| epoch 6 | 7159/ 8400 batches | train loss 0.3759907 +| epoch 6 | 7163/ 8400 batches | train loss 0.3870188 +| epoch 6 | 7167/ 8400 batches | train loss 0.3802348 +| epoch 6 | 7171/ 8400 batches | train loss 0.3879396 +| epoch 6 | 7175/ 8400 batches | train loss 0.3496137 +| epoch 6 | 7179/ 8400 batches | train loss 0.3368419 +| epoch 6 | 7183/ 8400 batches | train loss 0.3295263 +| epoch 6 | 7187/ 8400 batches | train loss 0.3694409 +| epoch 6 | 7191/ 8400 batches | train loss 0.3743019 +| epoch 6 | 7195/ 8400 batches | train loss 0.4214928 +| epoch 6 | 7199/ 8400 batches | train loss 0.3326440 +| epoch 6 | 7203/ 8400 batches | train loss 0.4416096 +| epoch 6 | 7207/ 8400 batches | train loss 0.3872027 +| epoch 6 | 7211/ 8400 batches | train loss 0.3367422 +| epoch 6 | 7215/ 8400 batches | train loss 0.3077896 +| epoch 6 | 7219/ 8400 batches | train loss 0.4192713 +| epoch 6 | 7223/ 8400 batches | train loss 0.3979841 +| epoch 6 | 7227/ 8400 batches | train loss 0.3802633 +| epoch 6 | 7231/ 8400 batches | train loss 0.3815219 +| epoch 6 | 7235/ 8400 batches | train loss 0.3217603 +| epoch 6 | 7239/ 8400 batches | train loss 0.4107845 +| epoch 6 | 7243/ 8400 batches | train loss 0.3896442 +| epoch 6 | 7247/ 8400 batches | train loss 0.3575361 +| epoch 6 | 7251/ 8400 batches | train loss 0.3999701 +| epoch 6 | 7255/ 8400 batches | train loss 0.3190371 +| epoch 6 | 7259/ 8400 batches | train loss 0.4383782 +| epoch 6 | 7263/ 8400 batches | train loss 0.4060113 +| epoch 6 | 7267/ 8400 batches | train loss 0.4969020 +| epoch 6 | 7271/ 8400 batches | train loss 0.3880251 +| epoch 6 | 7275/ 8400 batches | train loss 0.5170559 +| epoch 6 | 7279/ 8400 batches | train loss 0.3878559 +| epoch 6 | 7283/ 8400 batches | train loss 0.3382559 +| epoch 6 | 7287/ 8400 batches | train loss 0.3756573 +| epoch 6 | 7291/ 8400 batches | train loss 0.4099142 +| epoch 6 | 7295/ 8400 batches | train loss 0.4285006 +| epoch 6 | 7299/ 8400 batches | train loss 0.3178510 +| epoch 6 | 7303/ 8400 batches | train loss 0.3562215 +| epoch 6 | 7307/ 8400 batches | train loss 0.3257257 +| epoch 6 | 7311/ 8400 batches | train loss 0.3339200 +| epoch 6 | 7315/ 8400 batches | train loss 0.4667159 +| epoch 6 | 7319/ 8400 batches | train loss 0.3481296 +| epoch 6 | 7323/ 8400 batches | train loss 0.4828500 +| epoch 6 | 7327/ 8400 batches | train loss 0.3852610 +| epoch 6 | 7331/ 8400 batches | train loss 0.4163651 +| epoch 6 | 7335/ 8400 batches | train loss 0.4072090 +| epoch 6 | 7339/ 8400 batches | train loss 0.4084730 +| epoch 6 | 7343/ 8400 batches | train loss 0.3869878 +| epoch 6 | 7347/ 8400 batches | train loss 0.4267856 +| epoch 6 | 7351/ 8400 batches | train loss 0.3585134 +| epoch 6 | 7355/ 8400 batches | train loss 0.3329163 +| epoch 6 | 7359/ 8400 batches | train loss 0.4406329 +| epoch 6 | 7363/ 8400 batches | train loss 0.4032004 +| epoch 6 | 7367/ 8400 batches | train loss 0.2433393 +| epoch 6 | 7371/ 8400 batches | train loss 0.3379901 +| epoch 6 | 7375/ 8400 batches | train loss 0.4340243 +| epoch 6 | 7379/ 8400 batches | train loss 0.4584609 +| epoch 6 | 7383/ 8400 batches | train loss 0.3839302 +| epoch 6 | 7387/ 8400 batches | train loss 0.3674006 +| epoch 6 | 7391/ 8400 batches | train loss 0.3450010 +| epoch 6 | 7395/ 8400 batches | train loss 0.3771836 +| epoch 6 | 7399/ 8400 batches | train loss 0.4176767 +| epoch 6 | 7403/ 8400 batches | train loss 0.3566422 +| epoch 6 | 7407/ 8400 batches | train loss 0.3177570 +| epoch 6 | 7411/ 8400 batches | train loss 0.3618337 +| epoch 6 | 7415/ 8400 batches | train loss 0.3459912 +| epoch 6 | 7419/ 8400 batches | train loss 0.3371974 +| epoch 6 | 7423/ 8400 batches | train loss 0.3936647 +| epoch 6 | 7427/ 8400 batches | train loss 0.3829039 +| epoch 6 | 7431/ 8400 batches | train loss 0.3870757 +| epoch 6 | 7435/ 8400 batches | train loss 0.3851228 +| epoch 6 | 7439/ 8400 batches | train loss 0.2822105 +| epoch 6 | 7443/ 8400 batches | train loss 0.3807952 +| epoch 6 | 7447/ 8400 batches | train loss 0.3348125 +| epoch 6 | 7451/ 8400 batches | train loss 0.3911593 +| epoch 6 | 7455/ 8400 batches | train loss 0.4464636 +| epoch 6 | 7459/ 8400 batches | train loss 0.4019259 +| epoch 6 | 7463/ 8400 batches | train loss 0.3746290 +| epoch 6 | 7467/ 8400 batches | train loss 0.4088509 +| epoch 6 | 7471/ 8400 batches | train loss 0.3712544 +| epoch 6 | 7475/ 8400 batches | train loss 0.3611959 +| epoch 6 | 7479/ 8400 batches | train loss 0.3427815 +| epoch 6 | 7483/ 8400 batches | train loss 0.4092502 +| epoch 6 | 7487/ 8400 batches | train loss 0.4045916 +| epoch 6 | 7491/ 8400 batches | train loss 0.3334920 +| epoch 6 | 7495/ 8400 batches | train loss 0.3294127 +| epoch 6 | 7499/ 8400 batches | train loss 0.4064603 +| epoch 6 | 7503/ 8400 batches | train loss 0.3889241 +| epoch 6 | 7507/ 8400 batches | train loss 0.3990232 +| epoch 6 | 7511/ 8400 batches | train loss 0.3515112 +| epoch 6 | 7515/ 8400 batches | train loss 0.3527545 +| epoch 6 | 7519/ 8400 batches | train loss 0.3721527 +| epoch 6 | 7523/ 8400 batches | train loss 0.2427907 +| epoch 6 | 7527/ 8400 batches | train loss 0.3504215 +| epoch 6 | 7531/ 8400 batches | train loss 0.3648640 +| epoch 6 | 7535/ 8400 batches | train loss 0.3928439 +| epoch 6 | 7539/ 8400 batches | train loss 0.3374944 +| epoch 6 | 7543/ 8400 batches | train loss 0.3099813 +| epoch 6 | 7547/ 8400 batches | train loss 0.3906706 +| epoch 6 | 7551/ 8400 batches | train loss 0.4099653 +| epoch 6 | 7555/ 8400 batches | train loss 0.4285055 +| epoch 6 | 7559/ 8400 batches | train loss 0.4213586 +| epoch 6 | 7563/ 8400 batches | train loss 0.3242487 +| epoch 6 | 7567/ 8400 batches | train loss 0.4282933 +| epoch 6 | 7571/ 8400 batches | train loss 0.3669970 +| epoch 6 | 7575/ 8400 batches | train loss 0.3635245 +| epoch 6 | 7579/ 8400 batches | train loss 0.4001092 +| epoch 6 | 7583/ 8400 batches | train loss 0.3841642 +| epoch 6 | 7587/ 8400 batches | train loss 0.3545422 +| epoch 6 | 7591/ 8400 batches | train loss 0.3857347 +| epoch 6 | 7595/ 8400 batches | train loss 0.3409436 +| epoch 6 | 7599/ 8400 batches | train loss 0.3655279 +| epoch 6 | 7603/ 8400 batches | train loss 0.3466211 +| epoch 6 | 7607/ 8400 batches | train loss 0.3268551 +| epoch 6 | 7611/ 8400 batches | train loss 0.4189202 +| epoch 6 | 7615/ 8400 batches | train loss 0.3951894 +| epoch 6 | 7619/ 8400 batches | train loss 0.3618056 +| epoch 6 | 7623/ 8400 batches | train loss 0.3898497 +| epoch 6 | 7627/ 8400 batches | train loss 0.3839512 +| epoch 6 | 7631/ 8400 batches | train loss 0.4401765 +| epoch 6 | 7635/ 8400 batches | train loss 0.4149822 +| epoch 6 | 7639/ 8400 batches | train loss 0.4016878 +| epoch 6 | 7643/ 8400 batches | train loss 0.3358483 +| epoch 6 | 7647/ 8400 batches | train loss 0.3522939 +| epoch 6 | 7651/ 8400 batches | train loss 0.3192802 +| epoch 6 | 7655/ 8400 batches | train loss 0.3847910 +| epoch 6 | 7659/ 8400 batches | train loss 0.3573633 +| epoch 6 | 7663/ 8400 batches | train loss 0.3923042 +| epoch 6 | 7667/ 8400 batches | train loss 0.3490963 +| epoch 6 | 7671/ 8400 batches | train loss 0.3953542 +| epoch 6 | 7675/ 8400 batches | train loss 0.3242269 +| epoch 6 | 7679/ 8400 batches | train loss 0.3424696 +| epoch 6 | 7683/ 8400 batches | train loss 0.3164160 +| epoch 6 | 7687/ 8400 batches | train loss 0.4154483 +| epoch 6 | 7691/ 8400 batches | train loss 0.3713144 +| epoch 6 | 7695/ 8400 batches | train loss 0.3644824 +| epoch 6 | 7699/ 8400 batches | train loss 0.3708635 +| epoch 6 | 7703/ 8400 batches | train loss 0.4179704 +| epoch 6 | 7707/ 8400 batches | train loss 0.4006999 +| epoch 6 | 7711/ 8400 batches | train loss 0.3167375 +| epoch 6 | 7715/ 8400 batches | train loss 0.3301148 +| epoch 6 | 7719/ 8400 batches | train loss 0.3768240 +| epoch 6 | 7723/ 8400 batches | train loss 0.4046522 +| epoch 6 | 7727/ 8400 batches | train loss 0.3840212 +| epoch 6 | 7731/ 8400 batches | train loss 0.3497754 +| epoch 6 | 7735/ 8400 batches | train loss 0.3528131 +| epoch 6 | 7739/ 8400 batches | train loss 0.4363484 +| epoch 6 | 7743/ 8400 batches | train loss 0.3763053 +| epoch 6 | 7747/ 8400 batches | train loss 0.3588859 +| epoch 6 | 7751/ 8400 batches | train loss 0.3312421 +| epoch 6 | 7755/ 8400 batches | train loss 0.4300165 +| epoch 6 | 7759/ 8400 batches | train loss 0.4534877 +| epoch 6 | 7763/ 8400 batches | train loss 0.3744377 +| epoch 6 | 7767/ 8400 batches | train loss 0.3546385 +| epoch 6 | 7771/ 8400 batches | train loss 0.4656442 +| epoch 6 | 7775/ 8400 batches | train loss 0.3880167 +| epoch 6 | 7779/ 8400 batches | train loss 0.3551911 +| epoch 6 | 7783/ 8400 batches | train loss 0.4176717 +| epoch 6 | 7787/ 8400 batches | train loss 0.4002654 +| epoch 6 | 7791/ 8400 batches | train loss 0.3358443 +| epoch 6 | 7795/ 8400 batches | train loss 0.3955340 +| epoch 6 | 7799/ 8400 batches | train loss 0.3366848 +| epoch 6 | 7803/ 8400 batches | train loss 0.4022472 +| epoch 6 | 7807/ 8400 batches | train loss 0.3406830 +| epoch 6 | 7811/ 8400 batches | train loss 0.4343735 +| epoch 6 | 7815/ 8400 batches | train loss 0.3824428 +| epoch 6 | 7819/ 8400 batches | train loss 0.3953478 +| epoch 6 | 7823/ 8400 batches | train loss 0.4098476 +| epoch 6 | 7827/ 8400 batches | train loss 0.3728503 +| epoch 6 | 7831/ 8400 batches | train loss 0.3749975 +| epoch 6 | 7835/ 8400 batches | train loss 0.3841588 +| epoch 6 | 7839/ 8400 batches | train loss 0.3578293 +| epoch 6 | 7843/ 8400 batches | train loss 0.4038315 +| epoch 6 | 7847/ 8400 batches | train loss 0.3864813 +| epoch 6 | 7851/ 8400 batches | train loss 0.3800971 +| epoch 6 | 7855/ 8400 batches | train loss 0.4101017 +| epoch 6 | 7859/ 8400 batches | train loss 0.2676300 +| epoch 6 | 7863/ 8400 batches | train loss 0.4583393 +| epoch 6 | 7867/ 8400 batches | train loss 0.2624234 +| epoch 6 | 7871/ 8400 batches | train loss 0.3578252 +| epoch 6 | 7875/ 8400 batches | train loss 0.3387637 +| epoch 6 | 7879/ 8400 batches | train loss 0.3945245 +| epoch 6 | 7883/ 8400 batches | train loss 0.4125145 +| epoch 6 | 7887/ 8400 batches | train loss 0.4138358 +| epoch 6 | 7891/ 8400 batches | train loss 0.4570507 +| epoch 6 | 7895/ 8400 batches | train loss 0.4092811 +| epoch 6 | 7899/ 8400 batches | train loss 0.3775365 +| epoch 6 | 7903/ 8400 batches | train loss 0.4007314 +| epoch 6 | 7907/ 8400 batches | train loss 0.3678736 +| epoch 6 | 7911/ 8400 batches | train loss 0.3111027 +| epoch 6 | 7915/ 8400 batches | train loss 0.4202461 +| epoch 6 | 7919/ 8400 batches | train loss 0.3515412 +| epoch 6 | 7923/ 8400 batches | train loss 0.4774314 +| epoch 6 | 7927/ 8400 batches | train loss 0.4230294 +| epoch 6 | 7931/ 8400 batches | train loss 0.3701333 +| epoch 6 | 7935/ 8400 batches | train loss 0.4740555 +| epoch 6 | 7939/ 8400 batches | train loss 0.4047359 +| epoch 6 | 7943/ 8400 batches | train loss 0.3594812 +| epoch 6 | 7947/ 8400 batches | train loss 0.4061387 +| epoch 6 | 7951/ 8400 batches | train loss 0.3066314 +| epoch 6 | 7955/ 8400 batches | train loss 0.4421375 +| epoch 6 | 7959/ 8400 batches | train loss 0.3950526 +| epoch 6 | 7963/ 8400 batches | train loss 0.3884857 +| epoch 6 | 7967/ 8400 batches | train loss 0.3394588 +| epoch 6 | 7971/ 8400 batches | train loss 0.3402277 +| epoch 6 | 7975/ 8400 batches | train loss 0.3372646 +| epoch 6 | 7979/ 8400 batches | train loss 0.4882978 +| epoch 6 | 7983/ 8400 batches | train loss 0.3696521 +| epoch 6 | 7987/ 8400 batches | train loss 0.3816137 +| epoch 6 | 7991/ 8400 batches | train loss 0.3286079 +| epoch 6 | 7995/ 8400 batches | train loss 0.3471626 +| epoch 6 | 7999/ 8400 batches | train loss 0.4296024 +| epoch 6 | 8003/ 8400 batches | train loss 0.3787758 +| epoch 6 | 8007/ 8400 batches | train loss 0.3554478 +| epoch 6 | 8011/ 8400 batches | train loss 0.3999651 +| epoch 6 | 8015/ 8400 batches | train loss 0.3842617 +| epoch 6 | 8019/ 8400 batches | train loss 0.3700396 +| epoch 6 | 8023/ 8400 batches | train loss 0.3716728 +| epoch 6 | 8027/ 8400 batches | train loss 0.3322511 +| epoch 6 | 8031/ 8400 batches | train loss 0.4098476 +| epoch 6 | 8035/ 8400 batches | train loss 0.2938102 +| epoch 6 | 8039/ 8400 batches | train loss 0.3786329 +| epoch 6 | 8043/ 8400 batches | train loss 0.4203099 +| epoch 6 | 8047/ 8400 batches | train loss 0.3734856 +| epoch 6 | 8051/ 8400 batches | train loss 0.4189066 +| epoch 6 | 8055/ 8400 batches | train loss 0.4480478 +| epoch 6 | 8059/ 8400 batches | train loss 0.4067093 +| epoch 6 | 8063/ 8400 batches | train loss 0.3580153 +| epoch 6 | 8067/ 8400 batches | train loss 0.3880684 +| epoch 6 | 8071/ 8400 batches | train loss 0.3926058 +| epoch 6 | 8075/ 8400 batches | train loss 0.3132392 +| epoch 6 | 8079/ 8400 batches | train loss 0.4024324 +| epoch 6 | 8083/ 8400 batches | train loss 0.4386033 +| epoch 6 | 8087/ 8400 batches | train loss 0.3189332 +| epoch 6 | 8091/ 8400 batches | train loss 0.3466583 +| epoch 6 | 8095/ 8400 batches | train loss 0.4139914 +| epoch 6 | 8099/ 8400 batches | train loss 0.2683383 +| epoch 6 | 8103/ 8400 batches | train loss 0.3603112 +| epoch 6 | 8107/ 8400 batches | train loss 0.3351050 +| epoch 6 | 8111/ 8400 batches | train loss 0.4180854 +| epoch 6 | 8115/ 8400 batches | train loss 0.4153505 +| epoch 6 | 8119/ 8400 batches | train loss 0.4229996 +| epoch 6 | 8123/ 8400 batches | train loss 0.4168527 +| epoch 6 | 8127/ 8400 batches | train loss 0.3286601 +| epoch 6 | 8131/ 8400 batches | train loss 0.3694294 +| epoch 6 | 8135/ 8400 batches | train loss 0.4072515 +| epoch 6 | 8139/ 8400 batches | train loss 0.3514547 +| epoch 6 | 8143/ 8400 batches | train loss 0.4048955 +| epoch 6 | 8147/ 8400 batches | train loss 0.4032288 +| epoch 6 | 8151/ 8400 batches | train loss 0.3736893 +| epoch 6 | 8155/ 8400 batches | train loss 0.4512651 +| epoch 6 | 8159/ 8400 batches | train loss 0.4177591 +| epoch 6 | 8163/ 8400 batches | train loss 0.3214473 +| epoch 6 | 8167/ 8400 batches | train loss 0.4282292 +| epoch 6 | 8171/ 8400 batches | train loss 0.3693305 +| epoch 6 | 8175/ 8400 batches | train loss 0.3700857 +| epoch 6 | 8179/ 8400 batches | train loss 0.3466737 +| epoch 6 | 8183/ 8400 batches | train loss 0.3544068 +| epoch 6 | 8187/ 8400 batches | train loss 0.4132355 +| epoch 6 | 8191/ 8400 batches | train loss 0.3430107 +| epoch 6 | 8195/ 8400 batches | train loss 0.4148582 +| epoch 6 | 8199/ 8400 batches | train loss 0.3363291 +| epoch 6 | 8203/ 8400 batches | train loss 0.2888946 +| epoch 6 | 8207/ 8400 batches | train loss 0.3923791 +| epoch 6 | 8211/ 8400 batches | train loss 0.4069606 +| epoch 6 | 8215/ 8400 batches | train loss 0.4258420 +| epoch 6 | 8219/ 8400 batches | train loss 0.3894148 +| epoch 6 | 8223/ 8400 batches | train loss 0.4088689 +| epoch 6 | 8227/ 8400 batches | train loss 0.3881695 +| epoch 6 | 8231/ 8400 batches | train loss 0.3821075 +| epoch 6 | 8235/ 8400 batches | train loss 0.3517846 +| epoch 6 | 8239/ 8400 batches | train loss 0.3475196 +| epoch 6 | 8243/ 8400 batches | train loss 0.4947720 +| epoch 6 | 8247/ 8400 batches | train loss 0.4007530 +| epoch 6 | 8251/ 8400 batches | train loss 0.4126737 +| epoch 6 | 8255/ 8400 batches | train loss 0.4604692 +| epoch 6 | 8259/ 8400 batches | train loss 0.4565077 +| epoch 6 | 8263/ 8400 batches | train loss 0.3541772 +| epoch 6 | 8267/ 8400 batches | train loss 0.4104368 +| epoch 6 | 8271/ 8400 batches | train loss 0.3331180 +| epoch 6 | 8275/ 8400 batches | train loss 0.3541545 +| epoch 6 | 8279/ 8400 batches | train loss 0.3457596 +| epoch 6 | 8283/ 8400 batches | train loss 0.4218632 +| epoch 6 | 8287/ 8400 batches | train loss 0.4076781 +| epoch 6 | 8291/ 8400 batches | train loss 0.3954269 +| epoch 6 | 8295/ 8400 batches | train loss 0.4862914 +| epoch 6 | 8299/ 8400 batches | train loss 0.4112626 +| epoch 6 | 8303/ 8400 batches | train loss 0.4351985 +| epoch 6 | 8307/ 8400 batches | train loss 0.4898282 +| epoch 6 | 8311/ 8400 batches | train loss 0.4150445 +| epoch 6 | 8315/ 8400 batches | train loss 0.4380046 +| epoch 6 | 8319/ 8400 batches | train loss 0.3747174 +| epoch 6 | 8323/ 8400 batches | train loss 0.3269419 +| epoch 6 | 8327/ 8400 batches | train loss 0.5054226 +| epoch 6 | 8331/ 8400 batches | train loss 0.4181810 +| epoch 6 | 8335/ 8400 batches | train loss 0.3729583 +| epoch 6 | 8339/ 8400 batches | train loss 0.4010082 +| epoch 6 | 8343/ 8400 batches | train loss 0.3404872 +| epoch 6 | 8347/ 8400 batches | train loss 0.3016789 +| epoch 6 | 8351/ 8400 batches | train loss 0.3892482 +| epoch 6 | 8355/ 8400 batches | train loss 0.3976650 +| epoch 6 | 8359/ 8400 batches | train loss 0.4342953 +| epoch 6 | 8363/ 8400 batches | train loss 0.3304656 +| epoch 6 | 8367/ 8400 batches | train loss 0.4210264 +| epoch 6 | 8371/ 8400 batches | train loss 0.3696980 +| epoch 6 | 8375/ 8400 batches | train loss 0.3809360 +| epoch 6 | 8379/ 8400 batches | train loss 0.3473413 +| epoch 6 | 8383/ 8400 batches | train loss 0.3569467 +| epoch 6 | 8387/ 8400 batches | train loss 0.4319591 +| epoch 6 | 8391/ 8400 batches | train loss 0.3400992 +| epoch 6 | 8395/ 8400 batches | train loss 0.4113272 +| epoch 6 | 8399/ 8400 batches | train loss 0.4016992 +-------------------------------------------------------------------------------- +| epoch 6 | 3/ 8400 batches | test loss 0.4066104 +| epoch 6 | 7/ 8400 batches | test loss 0.5014316 +| epoch 6 | 11/ 8400 batches | test loss 0.4907863 +| epoch 6 | 15/ 8400 batches | test loss 0.4700710 +| epoch 6 | 19/ 8400 batches | test loss 0.4328215 +| epoch 6 | 23/ 8400 batches | test loss 0.5445372 +| epoch 6 | 27/ 8400 batches | test loss 0.3989263 +| epoch 6 | 31/ 8400 batches | test loss 0.5290857 +| epoch 6 | 35/ 8400 batches | test loss 0.4323905 +| epoch 6 | 39/ 8400 batches | test loss 0.4220108 +| epoch 6 | 43/ 8400 batches | test loss 0.3360289 +| epoch 6 | 47/ 8400 batches | test loss 0.3584382 +| epoch 6 | 51/ 8400 batches | test loss 0.4335955 +| epoch 6 | 55/ 8400 batches | test loss 0.4641158 +| epoch 6 | 59/ 8400 batches | test loss 0.3716349 +| epoch 6 | 63/ 8400 batches | test loss 0.3577954 +| epoch 6 | 67/ 8400 batches | test loss 0.4062594 +| epoch 6 | 71/ 8400 batches | test loss 0.3969484 +| epoch 6 | 75/ 8400 batches | test loss 0.3683148 +| epoch 6 | 79/ 8400 batches | test loss 0.3698323 +| epoch 6 | 83/ 8400 batches | test loss 0.3830773 +| epoch 6 | 87/ 8400 batches | test loss 0.3975363 +| epoch 6 | 91/ 8400 batches | test loss 0.3590247 +| epoch 6 | 95/ 8400 batches | test loss 0.5265166 +| epoch 6 | 99/ 8400 batches | test loss 0.4220538 +| epoch 6 | 103/ 8400 batches | test loss 0.3961789 +| epoch 6 | 107/ 8400 batches | test loss 0.3972273 +| epoch 6 | 111/ 8400 batches | test loss 0.4147888 +| epoch 6 | 115/ 8400 batches | test loss 0.3929875 +| epoch 6 | 119/ 8400 batches | test loss 0.4037659 +| epoch 6 | 123/ 8400 batches | test loss 0.4275680 +| epoch 6 | 127/ 8400 batches | test loss 0.4879028 +| epoch 6 | 131/ 8400 batches | test loss 0.4039217 +| epoch 6 | 135/ 8400 batches | test loss 0.3891416 +| epoch 6 | 139/ 8400 batches | test loss 0.4839065 +| epoch 6 | 143/ 8400 batches | test loss 0.4252780 +| epoch 6 | 147/ 8400 batches | test loss 0.3808485 +| epoch 6 | 151/ 8400 batches | test loss 0.3416096 +| epoch 6 | 155/ 8400 batches | test loss 0.4599605 +| epoch 6 | 159/ 8400 batches | test loss 0.4456032 +| epoch 6 | 163/ 8400 batches | test loss 0.3979118 +| epoch 6 | 167/ 8400 batches | test loss 0.3733866 +| epoch 6 | 171/ 8400 batches | test loss 0.5091394 +| epoch 6 | 175/ 8400 batches | test loss 0.4455380 +| epoch 6 | 179/ 8400 batches | test loss 0.4249758 +| epoch 6 | 183/ 8400 batches | test loss 0.4635462 +| epoch 6 | 187/ 8400 batches | test loss 0.3946342 +| epoch 6 | 191/ 8400 batches | test loss 0.3924363 +| epoch 6 | 195/ 8400 batches | test loss 0.4323400 +| epoch 6 | 199/ 8400 batches | test loss 0.4400007 +| epoch 6 | 203/ 8400 batches | test loss 0.5021584 +| epoch 6 | 207/ 8400 batches | test loss 0.4033297 +| epoch 6 | 211/ 8400 batches | test loss 0.4467085 +| epoch 6 | 215/ 8400 batches | test loss 0.4273444 +| epoch 6 | 219/ 8400 batches | test loss 0.3844530 +| epoch 6 | 223/ 8400 batches | test loss 0.3622636 +| epoch 6 | 227/ 8400 batches | test loss 0.3164032 +| epoch 6 | 231/ 8400 batches | test loss 0.3844472 +| epoch 6 | 235/ 8400 batches | test loss 0.4883540 +| epoch 6 | 239/ 8400 batches | test loss 0.4061824 +| epoch 6 | 243/ 8400 batches | test loss 0.3813221 +| epoch 6 | 247/ 8400 batches | test loss 0.3767576 +| epoch 6 | 251/ 8400 batches | test loss 0.4623213 +| epoch 6 | 255/ 8400 batches | test loss 0.4356007 +| epoch 6 | 259/ 8400 batches | test loss 0.4196027 +| epoch 6 | 263/ 8400 batches | test loss 0.3696476 +| epoch 6 | 267/ 8400 batches | test loss 0.4369987 +| epoch 6 | 271/ 8400 batches | test loss 0.4692071 +| epoch 6 | 275/ 8400 batches | test loss 0.4483696 +| epoch 6 | 279/ 8400 batches | test loss 0.3612311 +| epoch 6 | 283/ 8400 batches | test loss 0.2748981 +| epoch 6 | 287/ 8400 batches | test loss 0.4109605 +| epoch 6 | 291/ 8400 batches | test loss 0.2804654 +| epoch 6 | 295/ 8400 batches | test loss 0.4636612 +| epoch 6 | 299/ 8400 batches | test loss 0.4548869 +| epoch 6 | 303/ 8400 batches | test loss 0.3483935 +| epoch 6 | 307/ 8400 batches | test loss 0.3456273 +| epoch 6 | 311/ 8400 batches | test loss 0.3826005 +| epoch 6 | 315/ 8400 batches | test loss 0.4089891 +| epoch 6 | 319/ 8400 batches | test loss 0.2849310 +| epoch 6 | 323/ 8400 batches | test loss 0.4110678 +| epoch 6 | 327/ 8400 batches | test loss 0.4030559 +| epoch 6 | 331/ 8400 batches | test loss 0.4415829 +| epoch 6 | 335/ 8400 batches | test loss 0.3644235 +| epoch 6 | 339/ 8400 batches | test loss 0.3674576 +| epoch 6 | 343/ 8400 batches | test loss 0.4468350 +| epoch 6 | 347/ 8400 batches | test loss 0.4516702 +| epoch 6 | 351/ 8400 batches | test loss 0.3884236 +| epoch 6 | 355/ 8400 batches | test loss 0.4081045 +| epoch 6 | 359/ 8400 batches | test loss 0.4217109 +| epoch 6 | 363/ 8400 batches | test loss 0.3625353 +| epoch 6 | 367/ 8400 batches | test loss 0.3501180 +| epoch 6 | 371/ 8400 batches | test loss 0.3676423 +| epoch 6 | 375/ 8400 batches | test loss 0.3730849 +| epoch 6 | 379/ 8400 batches | test loss 0.4329599 +| epoch 6 | 383/ 8400 batches | test loss 0.4214823 +| epoch 6 | 387/ 8400 batches | test loss 0.3321012 +| epoch 6 | 391/ 8400 batches | test loss 0.4027745 +| epoch 6 | 395/ 8400 batches | test loss 0.4159658 +| epoch 6 | 399/ 8400 batches | test loss 0.2895995 +| epoch 6 | 403/ 8400 batches | test loss 0.4377618 +| epoch 6 | 407/ 8400 batches | test loss 0.3478965 +| epoch 6 | 411/ 8400 batches | test loss 0.4399462 +| epoch 6 | 415/ 8400 batches | test loss 0.3577946 +| epoch 6 | 419/ 8400 batches | test loss 0.4144668 +| epoch 6 | 423/ 8400 batches | test loss 0.3580789 +| epoch 6 | 427/ 8400 batches | test loss 0.3683288 +| epoch 6 | 431/ 8400 batches | test loss 0.4616172 +| epoch 6 | 435/ 8400 batches | test loss 0.5401522 +| epoch 6 | 439/ 8400 batches | test loss 0.4630045 +| epoch 6 | 443/ 8400 batches | test loss 0.3611268 +| epoch 6 | 447/ 8400 batches | test loss 0.5037278 +| epoch 6 | 451/ 8400 batches | test loss 0.3916576 +| epoch 6 | 455/ 8400 batches | test loss 0.4088602 +| epoch 6 | 459/ 8400 batches | test loss 0.4246733 +| epoch 6 | 463/ 8400 batches | test loss 0.3215225 +| epoch 6 | 467/ 8400 batches | test loss 0.4556912 +| epoch 6 | 471/ 8400 batches | test loss 0.5157298 +| epoch 6 | 475/ 8400 batches | test loss 0.2521334 +| epoch 6 | 479/ 8400 batches | test loss 0.4841149 +| epoch 6 | 483/ 8400 batches | test loss 0.3914599 +| epoch 6 | 487/ 8400 batches | test loss 0.3991084 +| epoch 6 | 491/ 8400 batches | test loss 0.4233649 +| epoch 6 | 495/ 8400 batches | test loss 0.4212323 +| epoch 6 | 499/ 8400 batches | test loss 0.3419318 +| epoch 6 | 503/ 8400 batches | test loss 0.3684108 +| epoch 6 | 507/ 8400 batches | test loss 0.5208442 +| epoch 6 | 511/ 8400 batches | test loss 0.3515689 +| epoch 6 | 515/ 8400 batches | test loss 0.3377503 +| epoch 6 | 519/ 8400 batches | test loss 0.4091609 +| epoch 6 | 523/ 8400 batches | test loss 0.4057952 +| epoch 6 | 527/ 8400 batches | test loss 0.5245174 +| epoch 6 | 531/ 8400 batches | test loss 0.4268333 +| epoch 6 | 535/ 8400 batches | test loss 0.4267721 +| epoch 6 | 539/ 8400 batches | test loss 0.4211672 +| epoch 6 | 543/ 8400 batches | test loss 0.4177549 +| epoch 6 | 547/ 8400 batches | test loss 0.4274102 +| epoch 6 | 551/ 8400 batches | test loss 0.4219870 +| epoch 6 | 555/ 8400 batches | test loss 0.4245613 +| epoch 6 | 559/ 8400 batches | test loss 0.6004583 +| epoch 6 | 563/ 8400 batches | test loss 0.8074422 +| epoch 6 | 567/ 8400 batches | test loss 0.4158153 +| epoch 6 | 571/ 8400 batches | test loss 0.4836504 +| epoch 6 | 575/ 8400 batches | test loss 0.4688638 +| epoch 6 | 579/ 8400 batches | test loss 0.4021562 +| epoch 6 | 583/ 8400 batches | test loss 0.4268279 +| epoch 6 | 587/ 8400 batches | test loss 0.5181662 +| epoch 6 | 591/ 8400 batches | test loss 0.3545883 +| epoch 6 | 595/ 8400 batches | test loss 0.4141907 +| epoch 6 | 599/ 8400 batches | test loss 0.3967291 +| epoch 6 | 603/ 8400 batches | test loss 0.4371593 +| epoch 6 | 607/ 8400 batches | test loss 0.3676400 +| epoch 6 | 611/ 8400 batches | test loss 0.3928257 +| epoch 6 | 615/ 8400 batches | test loss 0.4083765 +| epoch 6 | 619/ 8400 batches | test loss 0.4148083 +| epoch 6 | 623/ 8400 batches | test loss 0.4761758 +| epoch 6 | 627/ 8400 batches | test loss 0.3570343 +| epoch 6 | 631/ 8400 batches | test loss 0.4229871 +| epoch 6 | 635/ 8400 batches | test loss 0.5551124 +| epoch 6 | 639/ 8400 batches | test loss 0.3988241 +| epoch 6 | 643/ 8400 batches | test loss 0.4202285 +| epoch 6 | 647/ 8400 batches | test loss 0.3905403 +| epoch 6 | 651/ 8400 batches | test loss 0.4036114 +| epoch 6 | 655/ 8400 batches | test loss 0.3253640 +| epoch 6 | 659/ 8400 batches | test loss 0.3539075 +| epoch 6 | 663/ 8400 batches | test loss 0.4704010 +| epoch 6 | 667/ 8400 batches | test loss 0.4679220 +| epoch 6 | 671/ 8400 batches | test loss 0.3889291 +| epoch 6 | 675/ 8400 batches | test loss 0.4692070 +| epoch 6 | 679/ 8400 batches | test loss 0.4496403 +| epoch 6 | 683/ 8400 batches | test loss 0.3910053 +| epoch 6 | 687/ 8400 batches | test loss 0.4062507 +| epoch 6 | 691/ 8400 batches | test loss 0.4096141 +| epoch 6 | 695/ 8400 batches | test loss 0.4835545 +| epoch 6 | 699/ 8400 batches | test loss 0.3516838 +| epoch 6 | 703/ 8400 batches | test loss 0.5326489 +| epoch 6 | 707/ 8400 batches | test loss 0.4042464 +| epoch 6 | 711/ 8400 batches | test loss 0.3765498 +| epoch 6 | 715/ 8400 batches | test loss 0.4765857 +| epoch 6 | 719/ 8400 batches | test loss 0.4167799 +| epoch 6 | 723/ 8400 batches | test loss 0.4200149 +| epoch 6 | 727/ 8400 batches | test loss 0.4146563 +| epoch 6 | 731/ 8400 batches | test loss 0.4859400 +| epoch 6 | 735/ 8400 batches | test loss 0.4521640 +| epoch 6 | 739/ 8400 batches | test loss 0.4420848 +| epoch 6 | 743/ 8400 batches | test loss 0.4603940 +| epoch 6 | 747/ 8400 batches | test loss 0.3741498 +| epoch 6 | 751/ 8400 batches | test loss 0.4275141 +| epoch 6 | 755/ 8400 batches | test loss 0.4142830 +| epoch 6 | 759/ 8400 batches | test loss 0.4440646 +| epoch 6 | 763/ 8400 batches | test loss 0.4440022 +| epoch 6 | 767/ 8400 batches | test loss 0.4115366 +| epoch 6 | 771/ 8400 batches | test loss 0.4693347 +| epoch 6 | 775/ 8400 batches | test loss 0.4532095 +| epoch 6 | 779/ 8400 batches | test loss 0.4425648 +| epoch 6 | 783/ 8400 batches | test loss 0.3279543 +| epoch 6 | 787/ 8400 batches | test loss 0.3941755 +| epoch 6 | 791/ 8400 batches | test loss 0.3418928 +| epoch 6 | 795/ 8400 batches | test loss 0.4234577 +| epoch 6 | 799/ 8400 batches | test loss 0.3643478 +| epoch 6 | 803/ 8400 batches | test loss 0.4201180 +| epoch 6 | 807/ 8400 batches | test loss 0.3828394 +| epoch 6 | 811/ 8400 batches | test loss 0.3518248 +| epoch 6 | 815/ 8400 batches | test loss 0.4370992 +| epoch 6 | 819/ 8400 batches | test loss 0.3631057 +| epoch 6 | 823/ 8400 batches | test loss 0.5233870 +| epoch 6 | 827/ 8400 batches | test loss 0.4408970 +| epoch 6 | 831/ 8400 batches | test loss 0.5043552 +| epoch 6 | 835/ 8400 batches | test loss 0.3751845 +| epoch 6 | 839/ 8400 batches | test loss 0.4026972 +| epoch 6 | 843/ 8400 batches | test loss 0.4108452 +| epoch 6 | 847/ 8400 batches | test loss 0.3491994 +| epoch 6 | 851/ 8400 batches | test loss 0.4636286 +| epoch 6 | 855/ 8400 batches | test loss 0.4041956 +| epoch 6 | 859/ 8400 batches | test loss 0.3421501 +| epoch 6 | 863/ 8400 batches | test loss 0.3599737 +| epoch 6 | 867/ 8400 batches | test loss 0.3588279 +| epoch 6 | 871/ 8400 batches | test loss 0.3930445 +| epoch 6 | 875/ 8400 batches | test loss 0.4512365 +| epoch 6 | 879/ 8400 batches | test loss 0.4284535 +| epoch 6 | 883/ 8400 batches | test loss 0.3884165 +| epoch 6 | 887/ 8400 batches | test loss 0.3375643 +| epoch 6 | 891/ 8400 batches | test loss 0.3902829 +| epoch 6 | 895/ 8400 batches | test loss 0.3972022 +| epoch 6 | 899/ 8400 batches | test loss 0.3963078 +| epoch 6 | 903/ 8400 batches | test loss 0.4391497 +| epoch 6 | 907/ 8400 batches | test loss 0.4184322 +| epoch 6 | 911/ 8400 batches | test loss 0.4686048 +| epoch 6 | 915/ 8400 batches | test loss 0.4302524 +| epoch 6 | 919/ 8400 batches | test loss 0.3732101 +| epoch 6 | 923/ 8400 batches | test loss 0.3641519 +| epoch 6 | 927/ 8400 batches | test loss 0.3538060 +| epoch 6 | 931/ 8400 batches | test loss 0.4615927 +| epoch 6 | 935/ 8400 batches | test loss 0.4131345 +| epoch 6 | 939/ 8400 batches | test loss 0.3591064 +| epoch 6 | 943/ 8400 batches | test loss 0.3999489 +| epoch 6 | 947/ 8400 batches | test loss 0.4243743 +| epoch 6 | 951/ 8400 batches | test loss 0.4238762 +| epoch 6 | 955/ 8400 batches | test loss 0.5149604 +| epoch 6 | 959/ 8400 batches | test loss 0.5035698 +| epoch 6 | 963/ 8400 batches | test loss 0.5846453 +| epoch 6 | 967/ 8400 batches | test loss 0.4396775 +| epoch 6 | 971/ 8400 batches | test loss 0.4518327 +| epoch 6 | 975/ 8400 batches | test loss 0.4356174 +| epoch 6 | 979/ 8400 batches | test loss 0.3465425 +| epoch 6 | 983/ 8400 batches | test loss 0.3917462 +| epoch 6 | 987/ 8400 batches | test loss 0.4474429 +| epoch 6 | 991/ 8400 batches | test loss 0.4214360 +| epoch 6 | 995/ 8400 batches | test loss 0.3684689 +| epoch 6 | 999/ 8400 batches | test loss 0.1666016 +| epoch 6 | 1003/ 8400 batches | test loss 0.3585306 +| epoch 6 | 1007/ 8400 batches | test loss 0.3665415 +| epoch 6 | 1011/ 8400 batches | test loss 0.4007820 +| epoch 6 | 1015/ 8400 batches | test loss 0.3379559 +| epoch 6 | 1019/ 8400 batches | test loss 0.3497443 +| epoch 6 | 1023/ 8400 batches | test loss 0.6546630 +| epoch 6 | 1027/ 8400 batches | test loss 0.4789427 +| epoch 6 | 1031/ 8400 batches | test loss 0.3011349 +| epoch 6 | 1035/ 8400 batches | test loss 0.4643012 +| epoch 6 | 1039/ 8400 batches | test loss 0.4284236 +| epoch 6 | 1043/ 8400 batches | test loss 0.4458129 +| epoch 6 | 1047/ 8400 batches | test loss 0.4108419 +| epoch 6 | 1051/ 8400 batches | test loss 0.3522378 +| epoch 6 | 1055/ 8400 batches | test loss 0.4164596 +| epoch 6 | 1059/ 8400 batches | test loss 0.4035255 +| epoch 6 | 1063/ 8400 batches | test loss 0.3577644 +| epoch 6 | 1067/ 8400 batches | test loss 0.3830459 +| epoch 6 | 1071/ 8400 batches | test loss 0.4156150 +| epoch 6 | 1075/ 8400 batches | test loss 0.4276641 +| epoch 6 | 1079/ 8400 batches | test loss 0.3263921 +| epoch 6 | 1083/ 8400 batches | test loss 0.3448492 +| epoch 6 | 1087/ 8400 batches | test loss 0.4668305 +| epoch 6 | 1091/ 8400 batches | test loss 0.4036818 +| epoch 6 | 1095/ 8400 batches | test loss 0.4586192 +| epoch 6 | 1099/ 8400 batches | test loss 0.5370482 +| epoch 6 | 1103/ 8400 batches | test loss 0.3472374 +| epoch 6 | 1107/ 8400 batches | test loss 0.3714853 +| epoch 6 | 1111/ 8400 batches | test loss 0.4362558 +| epoch 6 | 1115/ 8400 batches | test loss 0.4225988 +| epoch 6 | 1119/ 8400 batches | test loss 0.4114259 +| epoch 6 | 1123/ 8400 batches | test loss 0.3271839 +| epoch 6 | 1127/ 8400 batches | test loss 0.5907058 +| epoch 6 | 1131/ 8400 batches | test loss 0.3977452 +| epoch 6 | 1135/ 8400 batches | test loss 0.4121350 +| epoch 6 | 1139/ 8400 batches | test loss 0.4200333 +| epoch 6 | 1143/ 8400 batches | test loss 0.3198998 +| epoch 6 | 1147/ 8400 batches | test loss 0.3989177 +| epoch 6 | 1151/ 8400 batches | test loss 0.3211506 +| epoch 6 | 1155/ 8400 batches | test loss 0.4664355 +| epoch 6 | 1159/ 8400 batches | test loss 0.3830910 +| epoch 6 | 1163/ 8400 batches | test loss 0.4257478 +| epoch 6 | 1167/ 8400 batches | test loss 0.4159934 +| epoch 6 | 1171/ 8400 batches | test loss 0.3632146 +| epoch 6 | 1175/ 8400 batches | test loss 0.3748910 +| epoch 6 | 1179/ 8400 batches | test loss 0.5196124 +| epoch 6 | 1183/ 8400 batches | test loss 0.3927060 +| epoch 6 | 1187/ 8400 batches | test loss 0.3978491 +| epoch 6 | 1191/ 8400 batches | test loss 0.4671346 +| epoch 6 | 1195/ 8400 batches | test loss 0.5009270 +| epoch 6 | 1199/ 8400 batches | test loss 0.4373009 +| epoch 6 | 1203/ 8400 batches | test loss 0.3886921 +| epoch 6 | 1207/ 8400 batches | test loss 0.5178971 +| epoch 6 | 1211/ 8400 batches | test loss 0.5379502 +| epoch 6 | 1215/ 8400 batches | test loss 0.3852876 +| epoch 6 | 1219/ 8400 batches | test loss 0.3874285 +| epoch 6 | 1223/ 8400 batches | test loss 0.7538168 +| epoch 6 | 1227/ 8400 batches | test loss 0.3258818 +| epoch 6 | 1231/ 8400 batches | test loss 0.4108380 +| epoch 6 | 1235/ 8400 batches | test loss 0.4043888 +| epoch 6 | 1239/ 8400 batches | test loss 0.3878755 +| epoch 6 | 1243/ 8400 batches | test loss 0.3948374 +| epoch 6 | 1247/ 8400 batches | test loss 0.4420061 +| epoch 6 | 1251/ 8400 batches | test loss 0.3678174 +| epoch 6 | 1255/ 8400 batches | test loss 0.3525026 +| epoch 6 | 1259/ 8400 batches | test loss 0.4182058 +| epoch 6 | 1263/ 8400 batches | test loss 0.4013073 +| epoch 6 | 1267/ 8400 batches | test loss 0.4114218 +| epoch 6 | 1271/ 8400 batches | test loss 0.3818195 +| epoch 6 | 1275/ 8400 batches | test loss 0.3689044 +| epoch 6 | 1279/ 8400 batches | test loss 0.4877814 +| epoch 6 | 1283/ 8400 batches | test loss 0.3957804 +| epoch 6 | 1287/ 8400 batches | test loss 0.4567240 +| epoch 6 | 1291/ 8400 batches | test loss 0.4228680 +| epoch 6 | 1295/ 8400 batches | test loss 0.3224030 +| epoch 6 | 1299/ 8400 batches | test loss 0.3657297 +| epoch 6 | 1303/ 8400 batches | test loss 0.3568010 +| epoch 6 | 1307/ 8400 batches | test loss 0.4224526 +| epoch 6 | 1311/ 8400 batches | test loss 0.4165481 +| epoch 6 | 1315/ 8400 batches | test loss 0.3722546 +| epoch 6 | 1319/ 8400 batches | test loss 0.3979028 +| epoch 6 | 1323/ 8400 batches | test loss 0.4358175 +| epoch 6 | 1327/ 8400 batches | test loss 0.4114098 +| epoch 6 | 1331/ 8400 batches | test loss 0.4400403 +| epoch 6 | 1335/ 8400 batches | test loss 0.3809835 +| epoch 6 | 1339/ 8400 batches | test loss 0.4188735 +| epoch 6 | 1343/ 8400 batches | test loss 0.4321116 +| epoch 6 | 1347/ 8400 batches | test loss 0.4924721 +| epoch 6 | 1351/ 8400 batches | test loss 0.5157034 +| epoch 6 | 1355/ 8400 batches | test loss 0.3877751 +| epoch 6 | 1359/ 8400 batches | test loss 0.4465521 +| epoch 6 | 1363/ 8400 batches | test loss 0.4316581 +| epoch 6 | 1367/ 8400 batches | test loss 0.4243595 +| epoch 6 | 1371/ 8400 batches | test loss 0.6813899 +| epoch 6 | 1375/ 8400 batches | test loss 0.4133338 +| epoch 6 | 1379/ 8400 batches | test loss 0.4116125 +| epoch 6 | 1383/ 8400 batches | test loss 0.4636233 +| epoch 6 | 1387/ 8400 batches | test loss 0.4342086 +| epoch 6 | 1391/ 8400 batches | test loss 0.3532164 +| epoch 6 | 1395/ 8400 batches | test loss 0.4912821 +| epoch 6 | 1399/ 8400 batches | test loss 0.3693165 +| epoch 6 | 1403/ 8400 batches | test loss 0.4346731 +| epoch 6 | 1407/ 8400 batches | test loss 0.3828668 +| epoch 6 | 1411/ 8400 batches | test loss 0.3619584 +| epoch 6 | 1415/ 8400 batches | test loss 0.4668497 +| epoch 6 | 1419/ 8400 batches | test loss 0.4344419 +| epoch 6 | 1423/ 8400 batches | test loss 0.3788089 +| epoch 6 | 1427/ 8400 batches | test loss 0.3614461 +| epoch 6 | 1431/ 8400 batches | test loss 0.3459648 +| epoch 6 | 1435/ 8400 batches | test loss 0.3854847 +| epoch 6 | 1439/ 8400 batches | test loss 0.4199843 +| epoch 6 | 1443/ 8400 batches | test loss 0.3237882 +| epoch 6 | 1447/ 8400 batches | test loss 0.3605011 +| epoch 6 | 1451/ 8400 batches | test loss 0.4482991 +| epoch 6 | 1455/ 8400 batches | test loss 0.4017454 +| epoch 6 | 1459/ 8400 batches | test loss 0.3661994 +| epoch 6 | 1463/ 8400 batches | test loss 0.5355144 +| epoch 6 | 1467/ 8400 batches | test loss 0.4228272 +| epoch 6 | 1471/ 8400 batches | test loss 0.4649520 +| epoch 6 | 1475/ 8400 batches | test loss 0.3780563 +| epoch 6 | 1479/ 8400 batches | test loss 0.3809502 +| epoch 6 | 1483/ 8400 batches | test loss 0.3539719 +| epoch 6 | 1487/ 8400 batches | test loss 0.4102950 +| epoch 6 | 1491/ 8400 batches | test loss 0.4039640 +| epoch 6 | 1495/ 8400 batches | test loss 0.3775432 +| epoch 6 | 1499/ 8400 batches | test loss 0.4637282 +| epoch 6 | 1503/ 8400 batches | test loss 0.4138081 +| epoch 6 | 1507/ 8400 batches | test loss 0.3917890 +| epoch 6 | 1511/ 8400 batches | test loss 0.4021403 +| epoch 6 | 1515/ 8400 batches | test loss 0.4128944 +| epoch 6 | 1519/ 8400 batches | test loss 0.5159900 +| epoch 6 | 1523/ 8400 batches | test loss 0.3819150 +| epoch 6 | 1527/ 8400 batches | test loss 0.4642543 +| epoch 6 | 1531/ 8400 batches | test loss 0.4263631 +| epoch 6 | 1535/ 8400 batches | test loss 0.4327383 +| epoch 6 | 1539/ 8400 batches | test loss 0.3143269 +| epoch 6 | 1543/ 8400 batches | test loss 0.4396006 +| epoch 6 | 1547/ 8400 batches | test loss 0.4549594 +| epoch 6 | 1551/ 8400 batches | test loss 0.4885293 +| epoch 6 | 1555/ 8400 batches | test loss 0.4129755 +| epoch 6 | 1559/ 8400 batches | test loss 0.3418724 +| epoch 6 | 1563/ 8400 batches | test loss 0.4502540 +| epoch 6 | 1567/ 8400 batches | test loss 0.3775538 +| epoch 6 | 1571/ 8400 batches | test loss 0.4524372 +| epoch 6 | 1575/ 8400 batches | test loss 0.3749271 +| epoch 6 | 1579/ 8400 batches | test loss 0.3682117 +| epoch 6 | 1583/ 8400 batches | test loss 0.4223503 +| epoch 6 | 1587/ 8400 batches | test loss 0.4512672 +| epoch 6 | 1591/ 8400 batches | test loss 0.3562379 +| epoch 6 | 1595/ 8400 batches | test loss 0.4364083 +| epoch 6 | 1599/ 8400 batches | test loss 0.4467941 +| epoch 6 | 1603/ 8400 batches | test loss 0.5294843 +| epoch 6 | 1607/ 8400 batches | test loss 0.3490245 +| epoch 6 | 1611/ 8400 batches | test loss 0.4557151 +| epoch 6 | 1615/ 8400 batches | test loss 0.4230476 +| epoch 6 | 1619/ 8400 batches | test loss 0.4330250 +| epoch 6 | 1623/ 8400 batches | test loss 0.4246232 +| epoch 6 | 1627/ 8400 batches | test loss 0.3107917 +| epoch 6 | 1631/ 8400 batches | test loss 0.3264680 +| epoch 6 | 1635/ 8400 batches | test loss 0.4438434 +| epoch 6 | 1639/ 8400 batches | test loss 0.3765454 +| epoch 6 | 1643/ 8400 batches | test loss 0.4509528 +| epoch 6 | 1647/ 8400 batches | test loss 0.3772953 +| epoch 6 | 1651/ 8400 batches | test loss 0.3482924 +| epoch 6 | 1655/ 8400 batches | test loss 0.5217025 +| epoch 6 | 1659/ 8400 batches | test loss 0.5046120 +| epoch 6 | 1663/ 8400 batches | test loss 0.5073687 +| epoch 6 | 1667/ 8400 batches | test loss 0.3687088 +| epoch 6 | 1671/ 8400 batches | test loss 0.5371748 +| epoch 6 | 1675/ 8400 batches | test loss 0.3899469 +| epoch 6 | 1679/ 8400 batches | test loss 0.3995039 +| epoch 6 | 1683/ 8400 batches | test loss 0.3769192 +| epoch 6 | 1687/ 8400 batches | test loss 0.4710888 +| epoch 6 | 1691/ 8400 batches | test loss 0.3845547 +| epoch 6 | 1695/ 8400 batches | test loss 0.3976699 +| epoch 6 | 1699/ 8400 batches | test loss 0.4424794 +| epoch 6 | 1703/ 8400 batches | test loss 0.4338155 +| epoch 6 | 1707/ 8400 batches | test loss 0.4166878 +| epoch 6 | 1711/ 8400 batches | test loss 0.3412608 +| epoch 6 | 1715/ 8400 batches | test loss 0.2584052 +| epoch 6 | 1719/ 8400 batches | test loss 0.4054380 +| epoch 6 | 1723/ 8400 batches | test loss 0.4114700 +| epoch 6 | 1727/ 8400 batches | test loss 0.3648247 +| epoch 6 | 1731/ 8400 batches | test loss 0.3802218 +| epoch 6 | 1735/ 8400 batches | test loss 0.4446800 +| epoch 6 | 1739/ 8400 batches | test loss 0.4082922 +| epoch 6 | 1743/ 8400 batches | test loss 0.4005842 +| epoch 6 | 1747/ 8400 batches | test loss 0.4208077 +| epoch 6 | 1751/ 8400 batches | test loss 0.4186343 +| epoch 6 | 1755/ 8400 batches | test loss 0.4431723 +| epoch 6 | 1759/ 8400 batches | test loss 0.3621369 +| epoch 6 | 1763/ 8400 batches | test loss 0.4731016 +| epoch 6 | 1767/ 8400 batches | test loss 0.4326872 +| epoch 6 | 1771/ 8400 batches | test loss 0.4057695 +| epoch 6 | 1775/ 8400 batches | test loss 0.4239981 +| epoch 6 | 1779/ 8400 batches | test loss 0.4965719 +| epoch 6 | 1783/ 8400 batches | test loss 0.4622112 +| epoch 6 | 1787/ 8400 batches | test loss 0.4702056 +| epoch 6 | 1791/ 8400 batches | test loss 0.4432163 +| epoch 6 | 1795/ 8400 batches | test loss 0.4064668 +| epoch 6 | 1799/ 8400 batches | test loss 0.3796404 +| epoch 6 | 1803/ 8400 batches | test loss 0.5015675 +| epoch 6 | 1807/ 8400 batches | test loss 0.4251429 +| epoch 6 | 1811/ 8400 batches | test loss 0.4578754 +| epoch 6 | 1815/ 8400 batches | test loss 0.3996797 +| epoch 6 | 1819/ 8400 batches | test loss 0.4347377 +| epoch 6 | 1823/ 8400 batches | test loss 0.4852318 +| epoch 6 | 1827/ 8400 batches | test loss 0.3187134 +| epoch 6 | 1831/ 8400 batches | test loss 0.4140055 +| epoch 6 | 1835/ 8400 batches | test loss 0.3197008 +| epoch 6 | 1839/ 8400 batches | test loss 0.3474348 +| epoch 6 | 1843/ 8400 batches | test loss 0.4916970 +| epoch 6 | 1847/ 8400 batches | test loss 0.5394094 +| epoch 6 | 1851/ 8400 batches | test loss 0.6200229 +| epoch 6 | 1855/ 8400 batches | test loss 0.3567454 +| epoch 6 | 1859/ 8400 batches | test loss 0.4020200 +| epoch 6 | 1863/ 8400 batches | test loss 0.4001687 +| epoch 6 | 1867/ 8400 batches | test loss 0.3754732 +| epoch 6 | 1871/ 8400 batches | test loss 0.3765091 +| epoch 6 | 1875/ 8400 batches | test loss 0.4515297 +| epoch 6 | 1879/ 8400 batches | test loss 0.4339605 +| epoch 6 | 1883/ 8400 batches | test loss 0.4879806 +| epoch 6 | 1887/ 8400 batches | test loss 0.5642234 +| epoch 6 | 1891/ 8400 batches | test loss 0.3637054 +| epoch 6 | 1895/ 8400 batches | test loss 0.3961018 +| epoch 6 | 1899/ 8400 batches | test loss 0.3565073 +| epoch 6 | 1903/ 8400 batches | test loss 0.4822190 +| epoch 6 | 1907/ 8400 batches | test loss 0.4323738 +| epoch 6 | 1911/ 8400 batches | test loss 0.4637350 +| epoch 6 | 1915/ 8400 batches | test loss 0.4613394 +| epoch 6 | 1919/ 8400 batches | test loss 0.4426647 +| epoch 6 | 1923/ 8400 batches | test loss 0.4634042 +| epoch 6 | 1927/ 8400 batches | test loss 0.4922931 +| epoch 6 | 1931/ 8400 batches | test loss 0.4614497 +| epoch 6 | 1935/ 8400 batches | test loss 0.3661040 +| epoch 6 | 1939/ 8400 batches | test loss 0.3656563 +| epoch 6 | 1943/ 8400 batches | test loss 0.3498991 +| epoch 6 | 1947/ 8400 batches | test loss 0.3840072 +| epoch 6 | 1951/ 8400 batches | test loss 0.4319582 +| epoch 6 | 1955/ 8400 batches | test loss 0.3745115 +| epoch 6 | 1959/ 8400 batches | test loss 0.3974022 +| epoch 6 | 1963/ 8400 batches | test loss 0.3593958 +| epoch 6 | 1967/ 8400 batches | test loss 0.5463111 +| epoch 6 | 1971/ 8400 batches | test loss 0.4203042 +| epoch 6 | 1975/ 8400 batches | test loss 0.4054006 +| epoch 6 | 1979/ 8400 batches | test loss 0.4004213 +| epoch 6 | 1983/ 8400 batches | test loss 0.3260578 +| epoch 6 | 1987/ 8400 batches | test loss 0.3719757 +| epoch 6 | 1991/ 8400 batches | test loss 0.3839300 +| epoch 6 | 1995/ 8400 batches | test loss 0.4300877 +| epoch 6 | 1999/ 8400 batches | test loss 0.3390996 +| epoch 6 | 2003/ 8400 batches | test loss 0.5687500 +| epoch 6 | 2007/ 8400 batches | test loss 0.3483523 +| epoch 6 | 2011/ 8400 batches | test loss 0.3728432 +| epoch 6 | 2015/ 8400 batches | test loss 0.3820975 +| epoch 6 | 2019/ 8400 batches | test loss 0.3494164 +| epoch 6 | 2023/ 8400 batches | test loss 0.5489955 +| epoch 6 | 2027/ 8400 batches | test loss 0.3982060 +| epoch 6 | 2031/ 8400 batches | test loss 0.4429891 +| epoch 6 | 2035/ 8400 batches | test loss 0.3173622 +| epoch 6 | 2039/ 8400 batches | test loss 0.4372449 +| epoch 6 | 2043/ 8400 batches | test loss 0.4452164 +| epoch 6 | 2047/ 8400 batches | test loss 0.3572650 +| epoch 6 | 2051/ 8400 batches | test loss 0.4354232 +| epoch 6 | 2055/ 8400 batches | test loss 0.4692028 +| epoch 6 | 2059/ 8400 batches | test loss 0.4929334 +| epoch 6 | 2063/ 8400 batches | test loss 0.3465480 +| epoch 6 | 2067/ 8400 batches | test loss 0.4008588 +| epoch 6 | 2071/ 8400 batches | test loss 0.3993207 +| epoch 6 | 2075/ 8400 batches | test loss 0.4258235 +| epoch 6 | 2079/ 8400 batches | test loss 0.4596688 +| epoch 6 | 2083/ 8400 batches | test loss 0.4605424 +| epoch 6 | 2087/ 8400 batches | test loss 0.4234552 +| epoch 6 | 2091/ 8400 batches | test loss 0.4980497 +| epoch 6 | 2095/ 8400 batches | test loss 0.4683489 +| epoch 6 | 2099/ 8400 batches | test loss 0.4450598 +| epoch 6 | final test loss 0.4150, save model! +-------------------------------------------------------------------------------- +| epoch 7 | 3/ 8400 batches | train loss 0.3617704 +| epoch 7 | 7/ 8400 batches | train loss 0.3984896 +| epoch 7 | 11/ 8400 batches | train loss 0.3628627 +| epoch 7 | 15/ 8400 batches | train loss 0.4029026 +| epoch 7 | 19/ 8400 batches | train loss 0.3902278 +| epoch 7 | 23/ 8400 batches | train loss 0.3345817 +| epoch 7 | 27/ 8400 batches | train loss 0.3784216 +| epoch 7 | 31/ 8400 batches | train loss 0.3505813 +| epoch 7 | 35/ 8400 batches | train loss 0.3990369 +| epoch 7 | 39/ 8400 batches | train loss 0.3399919 +| epoch 7 | 43/ 8400 batches | train loss 0.3199363 +| epoch 7 | 47/ 8400 batches | train loss 0.3043539 +| epoch 7 | 51/ 8400 batches | train loss 0.3128141 +| epoch 7 | 55/ 8400 batches | train loss 0.3429026 +| epoch 7 | 59/ 8400 batches | train loss 0.3778092 +| epoch 7 | 63/ 8400 batches | train loss 0.3455030 +| epoch 7 | 67/ 8400 batches | train loss 0.3769316 +| epoch 7 | 71/ 8400 batches | train loss 0.4531181 +| epoch 7 | 75/ 8400 batches | train loss 0.2899741 +| epoch 7 | 79/ 8400 batches | train loss 0.3545995 +| epoch 7 | 83/ 8400 batches | train loss 0.3318456 +| epoch 7 | 87/ 8400 batches | train loss 0.3725597 +| epoch 7 | 91/ 8400 batches | train loss 0.3298142 +| epoch 7 | 95/ 8400 batches | train loss 0.3805696 +| epoch 7 | 99/ 8400 batches | train loss 0.3929552 +| epoch 7 | 103/ 8400 batches | train loss 0.3558346 +| epoch 7 | 107/ 8400 batches | train loss 0.3676136 +| epoch 7 | 111/ 8400 batches | train loss 0.3625543 +| epoch 7 | 115/ 8400 batches | train loss 0.3927169 +| epoch 7 | 119/ 8400 batches | train loss 0.3691211 +| epoch 7 | 123/ 8400 batches | train loss 0.3405796 +| epoch 7 | 127/ 8400 batches | train loss 0.4130498 +| epoch 7 | 131/ 8400 batches | train loss 0.4244043 +| epoch 7 | 135/ 8400 batches | train loss 0.3130596 +| epoch 7 | 139/ 8400 batches | train loss 0.3881992 +| epoch 7 | 143/ 8400 batches | train loss 0.4255173 +| epoch 7 | 147/ 8400 batches | train loss 0.2927096 +| epoch 7 | 151/ 8400 batches | train loss 0.3170315 +| epoch 7 | 155/ 8400 batches | train loss 0.4261751 +| epoch 7 | 159/ 8400 batches | train loss 0.3418994 +| epoch 7 | 163/ 8400 batches | train loss 0.4195109 +| epoch 7 | 167/ 8400 batches | train loss 0.4684282 +| epoch 7 | 171/ 8400 batches | train loss 0.3646995 +| epoch 7 | 175/ 8400 batches | train loss 0.4101061 +| epoch 7 | 179/ 8400 batches | train loss 0.3804904 +| epoch 7 | 183/ 8400 batches | train loss 0.3313897 +| epoch 7 | 187/ 8400 batches | train loss 0.3903210 +| epoch 7 | 191/ 8400 batches | train loss 0.3753453 +| epoch 7 | 195/ 8400 batches | train loss 0.3425491 +| epoch 7 | 199/ 8400 batches | train loss 0.2755314 +| epoch 7 | 203/ 8400 batches | train loss 0.3433303 +| epoch 7 | 207/ 8400 batches | train loss 0.3113158 +| epoch 7 | 211/ 8400 batches | train loss 0.2900451 +| epoch 7 | 215/ 8400 batches | train loss 0.4083529 +| epoch 7 | 219/ 8400 batches | train loss 0.3272342 +| epoch 7 | 223/ 8400 batches | train loss 0.3589641 +| epoch 7 | 227/ 8400 batches | train loss 0.3347940 +| epoch 7 | 231/ 8400 batches | train loss 0.3493259 +| epoch 7 | 235/ 8400 batches | train loss 0.3599678 +| epoch 7 | 239/ 8400 batches | train loss 0.3654279 +| epoch 7 | 243/ 8400 batches | train loss 0.4529980 +| epoch 7 | 247/ 8400 batches | train loss 0.4410228 +| epoch 7 | 251/ 8400 batches | train loss 0.3640033 +| epoch 7 | 255/ 8400 batches | train loss 0.3605747 +| epoch 7 | 259/ 8400 batches | train loss 0.4275075 +| epoch 7 | 263/ 8400 batches | train loss 0.3191190 +| epoch 7 | 267/ 8400 batches | train loss 0.4439017 +| epoch 7 | 271/ 8400 batches | train loss 0.3886095 +| epoch 7 | 275/ 8400 batches | train loss 0.3814399 +| epoch 7 | 279/ 8400 batches | train loss 0.3692043 +| epoch 7 | 283/ 8400 batches | train loss 0.4163937 +| epoch 7 | 287/ 8400 batches | train loss 0.3201095 +| epoch 7 | 291/ 8400 batches | train loss 0.4068425 +| epoch 7 | 295/ 8400 batches | train loss 0.3124459 +| epoch 7 | 299/ 8400 batches | train loss 0.3603357 +| epoch 7 | 303/ 8400 batches | train loss 0.3158170 +| epoch 7 | 307/ 8400 batches | train loss 0.3145887 +| epoch 7 | 311/ 8400 batches | train loss 0.3443611 +| epoch 7 | 315/ 8400 batches | train loss 0.3825852 +| epoch 7 | 319/ 8400 batches | train loss 0.3364015 +| epoch 7 | 323/ 8400 batches | train loss 0.3701747 +| epoch 7 | 327/ 8400 batches | train loss 0.4002773 +| epoch 7 | 331/ 8400 batches | train loss 0.3200080 +| epoch 7 | 335/ 8400 batches | train loss 0.4097967 +| epoch 7 | 339/ 8400 batches | train loss 0.3487090 +| epoch 7 | 343/ 8400 batches | train loss 0.3358790 +| epoch 7 | 347/ 8400 batches | train loss 0.3632574 +| epoch 7 | 351/ 8400 batches | train loss 0.4037408 +| epoch 7 | 355/ 8400 batches | train loss 0.3626966 +| epoch 7 | 359/ 8400 batches | train loss 0.3654647 +| epoch 7 | 363/ 8400 batches | train loss 0.3294866 +| epoch 7 | 367/ 8400 batches | train loss 0.3880711 +| epoch 7 | 371/ 8400 batches | train loss 0.3809125 +| epoch 7 | 375/ 8400 batches | train loss 0.3662063 +| epoch 7 | 379/ 8400 batches | train loss 0.3140940 +| epoch 7 | 383/ 8400 batches | train loss 0.3003061 +| epoch 7 | 387/ 8400 batches | train loss 0.3723415 +| epoch 7 | 391/ 8400 batches | train loss 0.4299432 +| epoch 7 | 395/ 8400 batches | train loss 0.3519408 +| epoch 7 | 399/ 8400 batches | train loss 0.3298633 +| epoch 7 | 403/ 8400 batches | train loss 0.3472417 +| epoch 7 | 407/ 8400 batches | train loss 0.2954260 +| epoch 7 | 411/ 8400 batches | train loss 0.3365312 +| epoch 7 | 415/ 8400 batches | train loss 0.3131115 +| epoch 7 | 419/ 8400 batches | train loss 0.4536961 +| epoch 7 | 423/ 8400 batches | train loss 0.3827442 +| epoch 7 | 427/ 8400 batches | train loss 0.3599244 +| epoch 7 | 431/ 8400 batches | train loss 0.3731080 +| epoch 7 | 435/ 8400 batches | train loss 0.3800521 +| epoch 7 | 439/ 8400 batches | train loss 0.3382612 +| epoch 7 | 443/ 8400 batches | train loss 0.3248090 +| epoch 7 | 447/ 8400 batches | train loss 0.3557720 +| epoch 7 | 451/ 8400 batches | train loss 0.2648579 +| epoch 7 | 455/ 8400 batches | train loss 0.3938487 +| epoch 7 | 459/ 8400 batches | train loss 0.3608923 +| epoch 7 | 463/ 8400 batches | train loss 0.3669527 +| epoch 7 | 467/ 8400 batches | train loss 0.3403747 +| epoch 7 | 471/ 8400 batches | train loss 0.3220762 +| epoch 7 | 475/ 8400 batches | train loss 0.3017512 +| epoch 7 | 479/ 8400 batches | train loss 0.2853907 +| epoch 7 | 483/ 8400 batches | train loss 0.3537764 +| epoch 7 | 487/ 8400 batches | train loss 0.3899831 +| epoch 7 | 491/ 8400 batches | train loss 0.2917834 +| epoch 7 | 495/ 8400 batches | train loss 0.3808330 +| epoch 7 | 499/ 8400 batches | train loss 0.3102742 +| epoch 7 | 503/ 8400 batches | train loss 0.3767662 +| epoch 7 | 507/ 8400 batches | train loss 0.3599983 +| epoch 7 | 511/ 8400 batches | train loss 0.3668127 +| epoch 7 | 515/ 8400 batches | train loss 0.3400024 +| epoch 7 | 519/ 8400 batches | train loss 0.3462948 +| epoch 7 | 523/ 8400 batches | train loss 0.3213891 +| epoch 7 | 527/ 8400 batches | train loss 0.3457395 +| epoch 7 | 531/ 8400 batches | train loss 0.3689632 +| epoch 7 | 535/ 8400 batches | train loss 0.3396446 +| epoch 7 | 539/ 8400 batches | train loss 0.3729885 +| epoch 7 | 543/ 8400 batches | train loss 0.3756655 +| epoch 7 | 547/ 8400 batches | train loss 0.4112789 +| epoch 7 | 551/ 8400 batches | train loss 0.3222324 +| epoch 7 | 555/ 8400 batches | train loss 0.3866974 +| epoch 7 | 559/ 8400 batches | train loss 0.3276517 +| epoch 7 | 563/ 8400 batches | train loss 0.3697102 +| epoch 7 | 567/ 8400 batches | train loss 0.2958872 +| epoch 7 | 571/ 8400 batches | train loss 0.3075952 +| epoch 7 | 575/ 8400 batches | train loss 0.4572909 +| epoch 7 | 579/ 8400 batches | train loss 0.2499844 +| epoch 7 | 583/ 8400 batches | train loss 0.3813322 +| epoch 7 | 587/ 8400 batches | train loss 0.4071443 +| epoch 7 | 591/ 8400 batches | train loss 0.3258208 +| epoch 7 | 595/ 8400 batches | train loss 0.3917255 +| epoch 7 | 599/ 8400 batches | train loss 0.3159771 +| epoch 7 | 603/ 8400 batches | train loss 0.3593495 +| epoch 7 | 607/ 8400 batches | train loss 0.3542582 +| epoch 7 | 611/ 8400 batches | train loss 0.3817079 +| epoch 7 | 615/ 8400 batches | train loss 0.3074595 +| epoch 7 | 619/ 8400 batches | train loss 0.4412895 +| epoch 7 | 623/ 8400 batches | train loss 0.3214530 +| epoch 7 | 627/ 8400 batches | train loss 0.3733438 +| epoch 7 | 631/ 8400 batches | train loss 0.3810699 +| epoch 7 | 635/ 8400 batches | train loss 0.3830957 +| epoch 7 | 639/ 8400 batches | train loss 0.3121803 +| epoch 7 | 643/ 8400 batches | train loss 0.3276470 +| epoch 7 | 647/ 8400 batches | train loss 0.3450082 +| epoch 7 | 651/ 8400 batches | train loss 0.3385904 +| epoch 7 | 655/ 8400 batches | train loss 0.3259500 +| epoch 7 | 659/ 8400 batches | train loss 0.3547351 +| epoch 7 | 663/ 8400 batches | train loss 0.4210655 +| epoch 7 | 667/ 8400 batches | train loss 0.3630328 +| epoch 7 | 671/ 8400 batches | train loss 0.3949652 +| epoch 7 | 675/ 8400 batches | train loss 0.3116207 +| epoch 7 | 679/ 8400 batches | train loss 0.4058446 +| epoch 7 | 683/ 8400 batches | train loss 0.4007486 +| epoch 7 | 687/ 8400 batches | train loss 0.4074310 +| epoch 7 | 691/ 8400 batches | train loss 0.3623474 +| epoch 7 | 695/ 8400 batches | train loss 0.3448083 +| epoch 7 | 699/ 8400 batches | train loss 0.4463431 +| epoch 7 | 703/ 8400 batches | train loss 0.3677173 +| epoch 7 | 707/ 8400 batches | train loss 0.2985399 +| epoch 7 | 711/ 8400 batches | train loss 0.3937301 +| epoch 7 | 715/ 8400 batches | train loss 0.3303016 +| epoch 7 | 719/ 8400 batches | train loss 0.3303152 +| epoch 7 | 723/ 8400 batches | train loss 0.3402616 +| epoch 7 | 727/ 8400 batches | train loss 0.3535155 +| epoch 7 | 731/ 8400 batches | train loss 0.3348253 +| epoch 7 | 735/ 8400 batches | train loss 0.4019076 +| epoch 7 | 739/ 8400 batches | train loss 0.3381652 +| epoch 7 | 743/ 8400 batches | train loss 0.3428064 +| epoch 7 | 747/ 8400 batches | train loss 0.3945707 +| epoch 7 | 751/ 8400 batches | train loss 0.3561221 +| epoch 7 | 755/ 8400 batches | train loss 0.3438981 +| epoch 7 | 759/ 8400 batches | train loss 0.3343094 +| epoch 7 | 763/ 8400 batches | train loss 0.3155512 +| epoch 7 | 767/ 8400 batches | train loss 0.3254222 +| epoch 7 | 771/ 8400 batches | train loss 0.3281300 +| epoch 7 | 775/ 8400 batches | train loss 0.3097610 +| epoch 7 | 779/ 8400 batches | train loss 0.3631450 +| epoch 7 | 783/ 8400 batches | train loss 0.4530108 +| epoch 7 | 787/ 8400 batches | train loss 0.3306613 +| epoch 7 | 791/ 8400 batches | train loss 0.3394805 +| epoch 7 | 795/ 8400 batches | train loss 0.3301497 +| epoch 7 | 799/ 8400 batches | train loss 0.3030669 +| epoch 7 | 803/ 8400 batches | train loss 0.3089891 +| epoch 7 | 807/ 8400 batches | train loss 0.3287079 +| epoch 7 | 811/ 8400 batches | train loss 0.3595271 +| epoch 7 | 815/ 8400 batches | train loss 0.2795106 +| epoch 7 | 819/ 8400 batches | train loss 0.2861772 +| epoch 7 | 823/ 8400 batches | train loss 0.4128400 +| epoch 7 | 827/ 8400 batches | train loss 0.3943562 +| epoch 7 | 831/ 8400 batches | train loss 0.3662291 +| epoch 7 | 835/ 8400 batches | train loss 0.3483557 +| epoch 7 | 839/ 8400 batches | train loss 0.4183101 +| epoch 7 | 843/ 8400 batches | train loss 0.3368716 +| epoch 7 | 847/ 8400 batches | train loss 0.3341492 +| epoch 7 | 851/ 8400 batches | train loss 0.4502241 +| epoch 7 | 855/ 8400 batches | train loss 0.4016262 +| epoch 7 | 859/ 8400 batches | train loss 0.3866024 +| epoch 7 | 863/ 8400 batches | train loss 0.3635983 +| epoch 7 | 867/ 8400 batches | train loss 0.3308462 +| epoch 7 | 871/ 8400 batches | train loss 0.3346968 +| epoch 7 | 875/ 8400 batches | train loss 0.3602312 +| epoch 7 | 879/ 8400 batches | train loss 0.4208693 +| epoch 7 | 883/ 8400 batches | train loss 0.3758827 +| epoch 7 | 887/ 8400 batches | train loss 0.3116698 +| epoch 7 | 891/ 8400 batches | train loss 0.2710414 +| epoch 7 | 895/ 8400 batches | train loss 0.3778498 +| epoch 7 | 899/ 8400 batches | train loss 0.3141935 +| epoch 7 | 903/ 8400 batches | train loss 0.3293510 +| epoch 7 | 907/ 8400 batches | train loss 0.3498645 +| epoch 7 | 911/ 8400 batches | train loss 0.3241106 +| epoch 7 | 915/ 8400 batches | train loss 0.3925366 +| epoch 7 | 919/ 8400 batches | train loss 0.3480534 +| epoch 7 | 923/ 8400 batches | train loss 0.3533126 +| epoch 7 | 927/ 8400 batches | train loss 0.3654597 +| epoch 7 | 931/ 8400 batches | train loss 0.3187544 +| epoch 7 | 935/ 8400 batches | train loss 0.3408879 +| epoch 7 | 939/ 8400 batches | train loss 0.4044068 +| epoch 7 | 943/ 8400 batches | train loss 0.4658023 +| epoch 7 | 947/ 8400 batches | train loss 0.3456819 +| epoch 7 | 951/ 8400 batches | train loss 0.3447219 +| epoch 7 | 955/ 8400 batches | train loss 0.3620029 +| epoch 7 | 959/ 8400 batches | train loss 0.3493114 +| epoch 7 | 963/ 8400 batches | train loss 0.3220542 +| epoch 7 | 967/ 8400 batches | train loss 0.3567740 +| epoch 7 | 971/ 8400 batches | train loss 0.3686883 +| epoch 7 | 975/ 8400 batches | train loss 0.3401678 +| epoch 7 | 979/ 8400 batches | train loss 0.3909651 +| epoch 7 | 983/ 8400 batches | train loss 0.3547162 +| epoch 7 | 987/ 8400 batches | train loss 0.3486206 +| epoch 7 | 991/ 8400 batches | train loss 0.4069743 +| epoch 7 | 995/ 8400 batches | train loss 0.3712887 +| epoch 7 | 999/ 8400 batches | train loss 0.3916022 +| epoch 7 | 1003/ 8400 batches | train loss 0.3978913 +| epoch 7 | 1007/ 8400 batches | train loss 0.3793799 +| epoch 7 | 1011/ 8400 batches | train loss 0.3695644 +| epoch 7 | 1015/ 8400 batches | train loss 0.3965146 +| epoch 7 | 1019/ 8400 batches | train loss 0.3552900 +| epoch 7 | 1023/ 8400 batches | train loss 0.3780383 +| epoch 7 | 1027/ 8400 batches | train loss 0.3243116 +| epoch 7 | 1031/ 8400 batches | train loss 0.3580101 +| epoch 7 | 1035/ 8400 batches | train loss 0.3340560 +| epoch 7 | 1039/ 8400 batches | train loss 0.4444728 +| epoch 7 | 1043/ 8400 batches | train loss 0.3264514 +| epoch 7 | 1047/ 8400 batches | train loss 0.4090371 +| epoch 7 | 1051/ 8400 batches | train loss 0.3296898 +| epoch 7 | 1055/ 8400 batches | train loss 0.3627830 +| epoch 7 | 1059/ 8400 batches | train loss 0.3887315 +| epoch 7 | 1063/ 8400 batches | train loss 0.3021878 +| epoch 7 | 1067/ 8400 batches | train loss 0.3587124 +| epoch 7 | 1071/ 8400 batches | train loss 0.3158421 +| epoch 7 | 1075/ 8400 batches | train loss 0.4006680 +| epoch 7 | 1079/ 8400 batches | train loss 0.4022880 +| epoch 7 | 1083/ 8400 batches | train loss 0.3161204 +| epoch 7 | 1087/ 8400 batches | train loss 0.3557802 +| epoch 7 | 1091/ 8400 batches | train loss 0.3244771 +| epoch 7 | 1095/ 8400 batches | train loss 0.3496135 +| epoch 7 | 1099/ 8400 batches | train loss 0.4095316 +| epoch 7 | 1103/ 8400 batches | train loss 0.3098934 +| epoch 7 | 1107/ 8400 batches | train loss 0.3278208 +| epoch 7 | 1111/ 8400 batches | train loss 0.3375106 +| epoch 7 | 1115/ 8400 batches | train loss 0.3631378 +| epoch 7 | 1119/ 8400 batches | train loss 0.3841638 +| epoch 7 | 1123/ 8400 batches | train loss 0.3542984 +| epoch 7 | 1127/ 8400 batches | train loss 0.3303619 +| epoch 7 | 1131/ 8400 batches | train loss 0.3150708 +| epoch 7 | 1135/ 8400 batches | train loss 0.3366851 +| epoch 7 | 1139/ 8400 batches | train loss 0.3557243 +| epoch 7 | 1143/ 8400 batches | train loss 0.3225219 +| epoch 7 | 1147/ 8400 batches | train loss 0.3374849 +| epoch 7 | 1151/ 8400 batches | train loss 0.3561670 +| epoch 7 | 1155/ 8400 batches | train loss 0.3647342 +| epoch 7 | 1159/ 8400 batches | train loss 0.3166416 +| epoch 7 | 1163/ 8400 batches | train loss 0.3604106 +| epoch 7 | 1167/ 8400 batches | train loss 0.3806164 +| epoch 7 | 1171/ 8400 batches | train loss 0.4025021 +| epoch 7 | 1175/ 8400 batches | train loss 0.4405228 +| epoch 7 | 1179/ 8400 batches | train loss 0.3226522 +| epoch 7 | 1183/ 8400 batches | train loss 0.3402746 +| epoch 7 | 1187/ 8400 batches | train loss 0.3382253 +| epoch 7 | 1191/ 8400 batches | train loss 0.3374507 +| epoch 7 | 1195/ 8400 batches | train loss 0.3819653 +| epoch 7 | 1199/ 8400 batches | train loss 0.3712891 +| epoch 7 | 1203/ 8400 batches | train loss 0.3520905 +| epoch 7 | 1207/ 8400 batches | train loss 0.4001919 +| epoch 7 | 1211/ 8400 batches | train loss 0.3593338 +| epoch 7 | 1215/ 8400 batches | train loss 0.4322696 +| epoch 7 | 1219/ 8400 batches | train loss 0.4431444 +| epoch 7 | 1223/ 8400 batches | train loss 0.3533998 +| epoch 7 | 1227/ 8400 batches | train loss 0.3845326 +| epoch 7 | 1231/ 8400 batches | train loss 0.2979996 +| epoch 7 | 1235/ 8400 batches | train loss 0.3873427 +| epoch 7 | 1239/ 8400 batches | train loss 0.3625882 +| epoch 7 | 1243/ 8400 batches | train loss 0.3867355 +| epoch 7 | 1247/ 8400 batches | train loss 0.4047527 +| epoch 7 | 1251/ 8400 batches | train loss 0.3935176 +| epoch 7 | 1255/ 8400 batches | train loss 0.3453330 +| epoch 7 | 1259/ 8400 batches | train loss 0.3220832 +| epoch 7 | 1263/ 8400 batches | train loss 0.2934462 +| epoch 7 | 1267/ 8400 batches | train loss 0.3512852 +| epoch 7 | 1271/ 8400 batches | train loss 0.3548649 +| epoch 7 | 1275/ 8400 batches | train loss 0.3655433 +| epoch 7 | 1279/ 8400 batches | train loss 0.3444504 +| epoch 7 | 1283/ 8400 batches | train loss 0.3722924 +| epoch 7 | 1287/ 8400 batches | train loss 0.3235499 +| epoch 7 | 1291/ 8400 batches | train loss 0.3704364 +| epoch 7 | 1295/ 8400 batches | train loss 0.3396399 +| epoch 7 | 1299/ 8400 batches | train loss 0.3905005 +| epoch 7 | 1303/ 8400 batches | train loss 0.4133340 +| epoch 7 | 1307/ 8400 batches | train loss 0.3847378 +| epoch 7 | 1311/ 8400 batches | train loss 0.3813891 +| epoch 7 | 1315/ 8400 batches | train loss 0.3601862 +| epoch 7 | 1319/ 8400 batches | train loss 0.3336440 +| epoch 7 | 1323/ 8400 batches | train loss 0.3356461 +| epoch 7 | 1327/ 8400 batches | train loss 0.3503003 +| epoch 7 | 1331/ 8400 batches | train loss 0.3653867 +| epoch 7 | 1335/ 8400 batches | train loss 0.3313947 +| epoch 7 | 1339/ 8400 batches | train loss 0.4211240 +| epoch 7 | 1343/ 8400 batches | train loss 0.3271414 +| epoch 7 | 1347/ 8400 batches | train loss 0.3424243 +| epoch 7 | 1351/ 8400 batches | train loss 0.4468690 +| epoch 7 | 1355/ 8400 batches | train loss 0.3558989 +| epoch 7 | 1359/ 8400 batches | train loss 0.3035831 +| epoch 7 | 1363/ 8400 batches | train loss 0.3422865 +| epoch 7 | 1367/ 8400 batches | train loss 0.3449977 +| epoch 7 | 1371/ 8400 batches | train loss 0.4275176 +| epoch 7 | 1375/ 8400 batches | train loss 0.3448218 +| epoch 7 | 1379/ 8400 batches | train loss 0.3928439 +| epoch 7 | 1383/ 8400 batches | train loss 0.3205363 +| epoch 7 | 1387/ 8400 batches | train loss 0.3031750 +| epoch 7 | 1391/ 8400 batches | train loss 0.3006003 +| epoch 7 | 1395/ 8400 batches | train loss 0.4328460 +| epoch 7 | 1399/ 8400 batches | train loss 0.3750421 +| epoch 7 | 1403/ 8400 batches | train loss 0.3766921 +| epoch 7 | 1407/ 8400 batches | train loss 0.4378273 +| epoch 7 | 1411/ 8400 batches | train loss 0.3580814 +| epoch 7 | 1415/ 8400 batches | train loss 0.4088573 +| epoch 7 | 1419/ 8400 batches | train loss 0.3625454 +| epoch 7 | 1423/ 8400 batches | train loss 0.3150263 +| epoch 7 | 1427/ 8400 batches | train loss 0.3661667 +| epoch 7 | 1431/ 8400 batches | train loss 0.3421070 +| epoch 7 | 1435/ 8400 batches | train loss 0.4298201 +| epoch 7 | 1439/ 8400 batches | train loss 0.3315792 +| epoch 7 | 1443/ 8400 batches | train loss 0.3863560 +| epoch 7 | 1447/ 8400 batches | train loss 0.3541430 +| epoch 7 | 1451/ 8400 batches | train loss 0.3358443 +| epoch 7 | 1455/ 8400 batches | train loss 0.3360106 +| epoch 7 | 1459/ 8400 batches | train loss 0.3705517 +| epoch 7 | 1463/ 8400 batches | train loss 0.3296778 +| epoch 7 | 1467/ 8400 batches | train loss 0.3863005 +| epoch 7 | 1471/ 8400 batches | train loss 0.3073279 +| epoch 7 | 1475/ 8400 batches | train loss 0.4229244 +| epoch 7 | 1479/ 8400 batches | train loss 0.3062002 +| epoch 7 | 1483/ 8400 batches | train loss 0.3608736 +| epoch 7 | 1487/ 8400 batches | train loss 0.3208219 +| epoch 7 | 1491/ 8400 batches | train loss 0.3893638 +| epoch 7 | 1495/ 8400 batches | train loss 0.3593720 +| epoch 7 | 1499/ 8400 batches | train loss 0.3088775 +| epoch 7 | 1503/ 8400 batches | train loss 0.3729511 +| epoch 7 | 1507/ 8400 batches | train loss 0.4040471 +| epoch 7 | 1511/ 8400 batches | train loss 0.3773651 +| epoch 7 | 1515/ 8400 batches | train loss 0.3623927 +| epoch 7 | 1519/ 8400 batches | train loss 0.3402421 +| epoch 7 | 1523/ 8400 batches | train loss 0.4115404 +| epoch 7 | 1527/ 8400 batches | train loss 0.3801891 +| epoch 7 | 1531/ 8400 batches | train loss 0.3894976 +| epoch 7 | 1535/ 8400 batches | train loss 0.3396758 +| epoch 7 | 1539/ 8400 batches | train loss 0.4229912 +| epoch 7 | 1543/ 8400 batches | train loss 0.3416615 +| epoch 7 | 1547/ 8400 batches | train loss 0.3886974 +| epoch 7 | 1551/ 8400 batches | train loss 0.3197342 +| epoch 7 | 1555/ 8400 batches | train loss 0.3289965 +| epoch 7 | 1559/ 8400 batches | train loss 0.2982542 +| epoch 7 | 1563/ 8400 batches | train loss 0.3471969 +| epoch 7 | 1567/ 8400 batches | train loss 0.3395610 +| epoch 7 | 1571/ 8400 batches | train loss 0.3020733 +| epoch 7 | 1575/ 8400 batches | train loss 0.3582218 +| epoch 7 | 1579/ 8400 batches | train loss 0.3586542 +| epoch 7 | 1583/ 8400 batches | train loss 0.3269135 +| epoch 7 | 1587/ 8400 batches | train loss 0.3841953 +| epoch 7 | 1591/ 8400 batches | train loss 0.3589424 +| epoch 7 | 1595/ 8400 batches | train loss 0.3479999 +| epoch 7 | 1599/ 8400 batches | train loss 0.3858474 +| epoch 7 | 1603/ 8400 batches | train loss 0.4761946 +| epoch 7 | 1607/ 8400 batches | train loss 0.3911554 +| epoch 7 | 1611/ 8400 batches | train loss 0.3325820 +| epoch 7 | 1615/ 8400 batches | train loss 0.2956446 +| epoch 7 | 1619/ 8400 batches | train loss 0.3491055 +| epoch 7 | 1623/ 8400 batches | train loss 0.3857804 +| epoch 7 | 1627/ 8400 batches | train loss 0.3571491 +| epoch 7 | 1631/ 8400 batches | train loss 0.3357952 +| epoch 7 | 1635/ 8400 batches | train loss 0.3291946 +| epoch 7 | 1639/ 8400 batches | train loss 0.3316828 +| epoch 7 | 1643/ 8400 batches | train loss 0.3710437 +| epoch 7 | 1647/ 8400 batches | train loss 0.3256666 +| epoch 7 | 1651/ 8400 batches | train loss 0.3226369 +| epoch 7 | 1655/ 8400 batches | train loss 0.3522791 +| epoch 7 | 1659/ 8400 batches | train loss 0.4589837 +| epoch 7 | 1663/ 8400 batches | train loss 0.3623352 +| epoch 7 | 1667/ 8400 batches | train loss 0.3187663 +| epoch 7 | 1671/ 8400 batches | train loss 0.4040367 +| epoch 7 | 1675/ 8400 batches | train loss 0.4369060 +| epoch 7 | 1679/ 8400 batches | train loss 0.4161919 +| epoch 7 | 1683/ 8400 batches | train loss 0.2741169 +| epoch 7 | 1687/ 8400 batches | train loss 0.3784319 +| epoch 7 | 1691/ 8400 batches | train loss 0.3844811 +| epoch 7 | 1695/ 8400 batches | train loss 0.4312771 +| epoch 7 | 1699/ 8400 batches | train loss 0.3366951 +| epoch 7 | 1703/ 8400 batches | train loss 0.3896322 +| epoch 7 | 1707/ 8400 batches | train loss 0.3864191 +| epoch 7 | 1711/ 8400 batches | train loss 0.3949538 +| epoch 7 | 1715/ 8400 batches | train loss 0.3892167 +| epoch 7 | 1719/ 8400 batches | train loss 0.3084479 +| epoch 7 | 1723/ 8400 batches | train loss 0.3474004 +| epoch 7 | 1727/ 8400 batches | train loss 0.3580364 +| epoch 7 | 1731/ 8400 batches | train loss 0.3751112 +| epoch 7 | 1735/ 8400 batches | train loss 0.3219976 +| epoch 7 | 1739/ 8400 batches | train loss 0.3489513 +| epoch 7 | 1743/ 8400 batches | train loss 0.3710800 +| epoch 7 | 1747/ 8400 batches | train loss 0.3951572 +| epoch 7 | 1751/ 8400 batches | train loss 0.3688252 +| epoch 7 | 1755/ 8400 batches | train loss 0.3488019 +| epoch 7 | 1759/ 8400 batches | train loss 0.3874257 +| epoch 7 | 1763/ 8400 batches | train loss 0.3578607 +| epoch 7 | 1767/ 8400 batches | train loss 0.3748353 +| epoch 7 | 1771/ 8400 batches | train loss 0.2864473 +| epoch 7 | 1775/ 8400 batches | train loss 0.3743672 +| epoch 7 | 1779/ 8400 batches | train loss 0.3857105 +| epoch 7 | 1783/ 8400 batches | train loss 0.3301890 +| epoch 7 | 1787/ 8400 batches | train loss 0.3895319 +| epoch 7 | 1791/ 8400 batches | train loss 0.3049517 +| epoch 7 | 1795/ 8400 batches | train loss 0.4079368 +| epoch 7 | 1799/ 8400 batches | train loss 0.3561847 +| epoch 7 | 1803/ 8400 batches | train loss 0.3838480 +| epoch 7 | 1807/ 8400 batches | train loss 0.2256036 +| epoch 7 | 1811/ 8400 batches | train loss 0.3832923 +| epoch 7 | 1815/ 8400 batches | train loss 0.3824050 +| epoch 7 | 1819/ 8400 batches | train loss 0.3314478 +| epoch 7 | 1823/ 8400 batches | train loss 0.3988916 +| epoch 7 | 1827/ 8400 batches | train loss 0.3421153 +| epoch 7 | 1831/ 8400 batches | train loss 0.3473279 +| epoch 7 | 1835/ 8400 batches | train loss 0.4278511 +| epoch 7 | 1839/ 8400 batches | train loss 0.3245912 +| epoch 7 | 1843/ 8400 batches | train loss 0.3619133 +| epoch 7 | 1847/ 8400 batches | train loss 0.3325206 +| epoch 7 | 1851/ 8400 batches | train loss 0.3448371 +| epoch 7 | 1855/ 8400 batches | train loss 0.3643064 +| epoch 7 | 1859/ 8400 batches | train loss 0.3860171 +| epoch 7 | 1863/ 8400 batches | train loss 0.3181002 +| epoch 7 | 1867/ 8400 batches | train loss 0.4097219 +| epoch 7 | 1871/ 8400 batches | train loss 0.3617768 +| epoch 7 | 1875/ 8400 batches | train loss 0.3701141 +| epoch 7 | 1879/ 8400 batches | train loss 0.3277436 +| epoch 7 | 1883/ 8400 batches | train loss 0.3508056 +| epoch 7 | 1887/ 8400 batches | train loss 0.3861675 +| epoch 7 | 1891/ 8400 batches | train loss 0.3240835 +| epoch 7 | 1895/ 8400 batches | train loss 0.3502734 +| epoch 7 | 1899/ 8400 batches | train loss 0.3138644 +| epoch 7 | 1903/ 8400 batches | train loss 0.3700454 +| epoch 7 | 1907/ 8400 batches | train loss 0.3363601 +| epoch 7 | 1911/ 8400 batches | train loss 0.3310741 +| epoch 7 | 1915/ 8400 batches | train loss 0.4623892 +| epoch 7 | 1919/ 8400 batches | train loss 0.3151414 +| epoch 7 | 1923/ 8400 batches | train loss 0.3366144 +| epoch 7 | 1927/ 8400 batches | train loss 0.4217358 +| epoch 7 | 1931/ 8400 batches | train loss 0.3308824 +| epoch 7 | 1935/ 8400 batches | train loss 0.3029709 +| epoch 7 | 1939/ 8400 batches | train loss 0.3503064 +| epoch 7 | 1943/ 8400 batches | train loss 0.2899682 +| epoch 7 | 1947/ 8400 batches | train loss 0.2760858 +| epoch 7 | 1951/ 8400 batches | train loss 0.3418326 +| epoch 7 | 1955/ 8400 batches | train loss 0.4366845 +| epoch 7 | 1959/ 8400 batches | train loss 0.3630456 +| epoch 7 | 1963/ 8400 batches | train loss 0.3219472 +| epoch 7 | 1967/ 8400 batches | train loss 0.3957665 +| epoch 7 | 1971/ 8400 batches | train loss 0.3988511 +| epoch 7 | 1975/ 8400 batches | train loss 0.3533781 +| epoch 7 | 1979/ 8400 batches | train loss 0.3032618 +| epoch 7 | 1983/ 8400 batches | train loss 0.3496816 +| epoch 7 | 1987/ 8400 batches | train loss 0.3752623 +| epoch 7 | 1991/ 8400 batches | train loss 0.3263994 +| epoch 7 | 1995/ 8400 batches | train loss 0.3795471 +| epoch 7 | 1999/ 8400 batches | train loss 0.3433757 +| epoch 7 | 2003/ 8400 batches | train loss 0.2894884 +| epoch 7 | 2007/ 8400 batches | train loss 0.3201433 +| epoch 7 | 2011/ 8400 batches | train loss 0.2960785 +| epoch 7 | 2015/ 8400 batches | train loss 0.3921450 +| epoch 7 | 2019/ 8400 batches | train loss 0.3916346 +| epoch 7 | 2023/ 8400 batches | train loss 0.4214052 +| epoch 7 | 2027/ 8400 batches | train loss 0.3980322 +| epoch 7 | 2031/ 8400 batches | train loss 0.4233063 +| epoch 7 | 2035/ 8400 batches | train loss 0.3466022 +| epoch 7 | 2039/ 8400 batches | train loss 0.3397756 +| epoch 7 | 2043/ 8400 batches | train loss 0.3466139 +| epoch 7 | 2047/ 8400 batches | train loss 0.3407726 +| epoch 7 | 2051/ 8400 batches | train loss 0.3347750 +| epoch 7 | 2055/ 8400 batches | train loss 0.3477569 +| epoch 7 | 2059/ 8400 batches | train loss 0.3611835 +| epoch 7 | 2063/ 8400 batches | train loss 0.4738321 +| epoch 7 | 2067/ 8400 batches | train loss 0.1480286 +| epoch 7 | 2071/ 8400 batches | train loss 0.3673510 +| epoch 7 | 2075/ 8400 batches | train loss 0.3203762 +| epoch 7 | 2079/ 8400 batches | train loss 0.3332101 +| epoch 7 | 2083/ 8400 batches | train loss 0.4015871 +| epoch 7 | 2087/ 8400 batches | train loss 0.3501354 +| epoch 7 | 2091/ 8400 batches | train loss 0.3902273 +| epoch 7 | 2095/ 8400 batches | train loss 0.2973800 +| epoch 7 | 2099/ 8400 batches | train loss 0.3122026 +| epoch 7 | 2103/ 8400 batches | train loss 0.3544277 +| epoch 7 | 2107/ 8400 batches | train loss 0.4097882 +| epoch 7 | 2111/ 8400 batches | train loss 0.3186215 +| epoch 7 | 2115/ 8400 batches | train loss 0.3192064 +| epoch 7 | 2119/ 8400 batches | train loss 0.3663662 +| epoch 7 | 2123/ 8400 batches | train loss 0.3209083 +| epoch 7 | 2127/ 8400 batches | train loss 0.3516485 +| epoch 7 | 2131/ 8400 batches | train loss 0.3499904 +| epoch 7 | 2135/ 8400 batches | train loss 0.3716200 +| epoch 7 | 2139/ 8400 batches | train loss 0.3867645 +| epoch 7 | 2143/ 8400 batches | train loss 0.3345057 +| epoch 7 | 2147/ 8400 batches | train loss 0.3255211 +| epoch 7 | 2151/ 8400 batches | train loss 0.5071253 +| epoch 7 | 2155/ 8400 batches | train loss 0.3656728 +| epoch 7 | 2159/ 8400 batches | train loss 0.4138246 +| epoch 7 | 2163/ 8400 batches | train loss 0.3838471 +| epoch 7 | 2167/ 8400 batches | train loss 0.3810146 +| epoch 7 | 2171/ 8400 batches | train loss 0.3366937 +| epoch 7 | 2175/ 8400 batches | train loss 0.3598711 +| epoch 7 | 2179/ 8400 batches | train loss 0.3536243 +| epoch 7 | 2183/ 8400 batches | train loss 0.3787286 +| epoch 7 | 2187/ 8400 batches | train loss 0.3961959 +| epoch 7 | 2191/ 8400 batches | train loss 0.3759919 +| epoch 7 | 2195/ 8400 batches | train loss 0.2360147 +| epoch 7 | 2199/ 8400 batches | train loss 0.3870557 +| epoch 7 | 2203/ 8400 batches | train loss 0.2893849 +| epoch 7 | 2207/ 8400 batches | train loss 0.3849640 +| epoch 7 | 2211/ 8400 batches | train loss 0.3570258 +| epoch 7 | 2215/ 8400 batches | train loss 0.3703444 +| epoch 7 | 2219/ 8400 batches | train loss 0.3669474 +| epoch 7 | 2223/ 8400 batches | train loss 0.3474252 +| epoch 7 | 2227/ 8400 batches | train loss 0.3256067 +| epoch 7 | 2231/ 8400 batches | train loss 0.3075062 +| epoch 7 | 2235/ 8400 batches | train loss 0.3740235 +| epoch 7 | 2239/ 8400 batches | train loss 0.4111625 +| epoch 7 | 2243/ 8400 batches | train loss 0.1470822 +| epoch 7 | 2247/ 8400 batches | train loss 0.4132509 +| epoch 7 | 2251/ 8400 batches | train loss 0.3686956 +| epoch 7 | 2255/ 8400 batches | train loss 0.3564043 +| epoch 7 | 2259/ 8400 batches | train loss 0.3299018 +| epoch 7 | 2263/ 8400 batches | train loss 0.3354581 +| epoch 7 | 2267/ 8400 batches | train loss 0.3304592 +| epoch 7 | 2271/ 8400 batches | train loss 0.3947281 +| epoch 7 | 2275/ 8400 batches | train loss 0.3469608 +| epoch 7 | 2279/ 8400 batches | train loss 0.4178960 +| epoch 7 | 2283/ 8400 batches | train loss 0.3330218 +| epoch 7 | 2287/ 8400 batches | train loss 0.3731573 +| epoch 7 | 2291/ 8400 batches | train loss 0.3812881 +| epoch 7 | 2295/ 8400 batches | train loss 0.3299403 +| epoch 7 | 2299/ 8400 batches | train loss 0.3550046 +| epoch 7 | 2303/ 8400 batches | train loss 0.3627326 +| epoch 7 | 2307/ 8400 batches | train loss 0.3353678 +| epoch 7 | 2311/ 8400 batches | train loss 0.4111675 +| epoch 7 | 2315/ 8400 batches | train loss 0.3457254 +| epoch 7 | 2319/ 8400 batches | train loss 0.4205939 +| epoch 7 | 2323/ 8400 batches | train loss 0.4560784 +| epoch 7 | 2327/ 8400 batches | train loss 0.3199475 +| epoch 7 | 2331/ 8400 batches | train loss 0.3733286 +| epoch 7 | 2335/ 8400 batches | train loss 0.3351964 +| epoch 7 | 2339/ 8400 batches | train loss 0.5188558 +| epoch 7 | 2343/ 8400 batches | train loss 0.3678409 +| epoch 7 | 2347/ 8400 batches | train loss 0.3409491 +| epoch 7 | 2351/ 8400 batches | train loss 0.3445482 +| epoch 7 | 2355/ 8400 batches | train loss 0.3808549 +| epoch 7 | 2359/ 8400 batches | train loss 0.3248881 +| epoch 7 | 2363/ 8400 batches | train loss 0.3239073 +| epoch 7 | 2367/ 8400 batches | train loss 0.3868669 +| epoch 7 | 2371/ 8400 batches | train loss 0.3733470 +| epoch 7 | 2375/ 8400 batches | train loss 0.3314071 +| epoch 7 | 2379/ 8400 batches | train loss 0.3803862 +| epoch 7 | 2383/ 8400 batches | train loss 0.4792884 +| epoch 7 | 2387/ 8400 batches | train loss 0.3198482 +| epoch 7 | 2391/ 8400 batches | train loss 0.3548169 +| epoch 7 | 2395/ 8400 batches | train loss 0.3576029 +| epoch 7 | 2399/ 8400 batches | train loss 0.3279961 +| epoch 7 | 2403/ 8400 batches | train loss 0.3899699 +| epoch 7 | 2407/ 8400 batches | train loss 0.4026601 +| epoch 7 | 2411/ 8400 batches | train loss 0.3781609 +| epoch 7 | 2415/ 8400 batches | train loss 0.3266952 +| epoch 7 | 2419/ 8400 batches | train loss 0.3216924 +| epoch 7 | 2423/ 8400 batches | train loss 0.3919368 +| epoch 7 | 2427/ 8400 batches | train loss 0.4560919 +| epoch 7 | 2431/ 8400 batches | train loss 0.4296690 +| epoch 7 | 2435/ 8400 batches | train loss 0.3425978 +| epoch 7 | 2439/ 8400 batches | train loss 0.4117949 +| epoch 7 | 2443/ 8400 batches | train loss 0.3396770 +| epoch 7 | 2447/ 8400 batches | train loss 0.3468016 +| epoch 7 | 2451/ 8400 batches | train loss 0.2785739 +| epoch 7 | 2455/ 8400 batches | train loss 0.3393490 +| epoch 7 | 2459/ 8400 batches | train loss 0.4207831 +| epoch 7 | 2463/ 8400 batches | train loss 0.3956060 +| epoch 7 | 2467/ 8400 batches | train loss 0.3871213 +| epoch 7 | 2471/ 8400 batches | train loss 0.3548317 +| epoch 7 | 2475/ 8400 batches | train loss 0.3346007 +| epoch 7 | 2479/ 8400 batches | train loss 0.3562943 +| epoch 7 | 2483/ 8400 batches | train loss 0.3871866 +| epoch 7 | 2487/ 8400 batches | train loss 0.3799371 +| epoch 7 | 2491/ 8400 batches | train loss 0.3991228 +| epoch 7 | 2495/ 8400 batches | train loss 0.3653827 +| epoch 7 | 2499/ 8400 batches | train loss 0.3781880 +| epoch 7 | 2503/ 8400 batches | train loss 0.2946808 +| epoch 7 | 2507/ 8400 batches | train loss 0.3659984 +| epoch 7 | 2511/ 8400 batches | train loss 0.3813098 +| epoch 7 | 2515/ 8400 batches | train loss 0.3582936 +| epoch 7 | 2519/ 8400 batches | train loss 0.3901344 +| epoch 7 | 2523/ 8400 batches | train loss 0.3638233 +| epoch 7 | 2527/ 8400 batches | train loss 0.3479212 +| epoch 7 | 2531/ 8400 batches | train loss 0.3919359 +| epoch 7 | 2535/ 8400 batches | train loss 0.3206904 +| epoch 7 | 2539/ 8400 batches | train loss 0.3132969 +| epoch 7 | 2543/ 8400 batches | train loss 0.3047800 +| epoch 7 | 2547/ 8400 batches | train loss 0.3201182 +| epoch 7 | 2551/ 8400 batches | train loss 0.3247258 +| epoch 7 | 2555/ 8400 batches | train loss 0.3349130 +| epoch 7 | 2559/ 8400 batches | train loss 0.3010781 +| epoch 7 | 2563/ 8400 batches | train loss 0.4182490 +| epoch 7 | 2567/ 8400 batches | train loss 0.3586982 +| epoch 7 | 2571/ 8400 batches | train loss 0.3045875 +| epoch 7 | 2575/ 8400 batches | train loss 0.3521161 +| epoch 7 | 2579/ 8400 batches | train loss 0.3542081 +| epoch 7 | 2583/ 8400 batches | train loss 0.3593823 +| epoch 7 | 2587/ 8400 batches | train loss 0.3435087 +| epoch 7 | 2591/ 8400 batches | train loss 0.3661897 +| epoch 7 | 2595/ 8400 batches | train loss 0.3889895 +| epoch 7 | 2599/ 8400 batches | train loss 0.4187499 +| epoch 7 | 2603/ 8400 batches | train loss 0.3344944 +| epoch 7 | 2607/ 8400 batches | train loss 0.3647029 +| epoch 7 | 2611/ 8400 batches | train loss 0.3321221 +| epoch 7 | 2615/ 8400 batches | train loss 0.3190743 +| epoch 7 | 2619/ 8400 batches | train loss 0.3752614 +| epoch 7 | 2623/ 8400 batches | train loss 0.3522375 +| epoch 7 | 2627/ 8400 batches | train loss 0.3360970 +| epoch 7 | 2631/ 8400 batches | train loss 0.3448951 +| epoch 7 | 2635/ 8400 batches | train loss 0.3112283 +| epoch 7 | 2639/ 8400 batches | train loss 0.3781382 +| epoch 7 | 2643/ 8400 batches | train loss 0.4256740 +| epoch 7 | 2647/ 8400 batches | train loss 0.2950701 +| epoch 7 | 2651/ 8400 batches | train loss 0.3091689 +| epoch 7 | 2655/ 8400 batches | train loss 0.3663131 +| epoch 7 | 2659/ 8400 batches | train loss 0.3679345 +| epoch 7 | 2663/ 8400 batches | train loss 0.2719095 +| epoch 7 | 2667/ 8400 batches | train loss 0.3016780 +| epoch 7 | 2671/ 8400 batches | train loss 0.3514872 +| epoch 7 | 2675/ 8400 batches | train loss 0.3364808 +| epoch 7 | 2679/ 8400 batches | train loss 0.3387062 +| epoch 7 | 2683/ 8400 batches | train loss 0.3371310 +| epoch 7 | 2687/ 8400 batches | train loss 0.3074896 +| epoch 7 | 2691/ 8400 batches | train loss 0.4595420 +| epoch 7 | 2695/ 8400 batches | train loss 0.3614218 +| epoch 7 | 2699/ 8400 batches | train loss 0.3537202 +| epoch 7 | 2703/ 8400 batches | train loss 0.3917283 +| epoch 7 | 2707/ 8400 batches | train loss 0.3478838 +| epoch 7 | 2711/ 8400 batches | train loss 0.3739535 +| epoch 7 | 2715/ 8400 batches | train loss 0.2321670 +| epoch 7 | 2719/ 8400 batches | train loss 0.4195423 +| epoch 7 | 2723/ 8400 batches | train loss 0.3801777 +| epoch 7 | 2727/ 8400 batches | train loss 0.3771814 +| epoch 7 | 2731/ 8400 batches | train loss 0.3601091 +| epoch 7 | 2735/ 8400 batches | train loss 0.3595833 +| epoch 7 | 2739/ 8400 batches | train loss 0.3513236 +| epoch 7 | 2743/ 8400 batches | train loss 0.3560137 +| epoch 7 | 2747/ 8400 batches | train loss 0.2993672 +| epoch 7 | 2751/ 8400 batches | train loss 0.3807952 +| epoch 7 | 2755/ 8400 batches | train loss 0.3814926 +| epoch 7 | 2759/ 8400 batches | train loss 0.4304380 +| epoch 7 | 2763/ 8400 batches | train loss 0.3617653 +| epoch 7 | 2767/ 8400 batches | train loss 0.3092057 +| epoch 7 | 2771/ 8400 batches | train loss 0.3158471 +| epoch 7 | 2775/ 8400 batches | train loss 0.2934845 +| epoch 7 | 2779/ 8400 batches | train loss 0.2971285 +| epoch 7 | 2783/ 8400 batches | train loss 0.3598964 +| epoch 7 | 2787/ 8400 batches | train loss 0.3789892 +| epoch 7 | 2791/ 8400 batches | train loss 0.4300838 +| epoch 7 | 2795/ 8400 batches | train loss 0.3350380 +| epoch 7 | 2799/ 8400 batches | train loss 0.3979478 +| epoch 7 | 2803/ 8400 batches | train loss 0.4269118 +| epoch 7 | 2807/ 8400 batches | train loss 0.2831738 +| epoch 7 | 2811/ 8400 batches | train loss 0.3868806 +| epoch 7 | 2815/ 8400 batches | train loss 0.3555609 +| epoch 7 | 2819/ 8400 batches | train loss 0.3798474 +| epoch 7 | 2823/ 8400 batches | train loss 0.3421504 +| epoch 7 | 2827/ 8400 batches | train loss 0.4332661 +| epoch 7 | 2831/ 8400 batches | train loss 0.3234099 +| epoch 7 | 2835/ 8400 batches | train loss 0.4104482 +| epoch 7 | 2839/ 8400 batches | train loss 0.3135474 +| epoch 7 | 2843/ 8400 batches | train loss 0.3674074 +| epoch 7 | 2847/ 8400 batches | train loss 0.3656207 +| epoch 7 | 2851/ 8400 batches | train loss 0.4215706 +| epoch 7 | 2855/ 8400 batches | train loss 0.3891672 +| epoch 7 | 2859/ 8400 batches | train loss 0.3858728 +| epoch 7 | 2863/ 8400 batches | train loss 0.4432329 +| epoch 7 | 2867/ 8400 batches | train loss 0.2730767 +| epoch 7 | 2871/ 8400 batches | train loss 0.3390537 +| epoch 7 | 2875/ 8400 batches | train loss 0.3702353 +| epoch 7 | 2879/ 8400 batches | train loss 0.3576578 +| epoch 7 | 2883/ 8400 batches | train loss 0.4114885 +| epoch 7 | 2887/ 8400 batches | train loss 0.3846746 +| epoch 7 | 2891/ 8400 batches | train loss 0.3900220 +| epoch 7 | 2895/ 8400 batches | train loss 0.3530864 +| epoch 7 | 2899/ 8400 batches | train loss 0.3578463 +| epoch 7 | 2903/ 8400 batches | train loss 0.3759181 +| epoch 7 | 2907/ 8400 batches | train loss 0.3681974 +| epoch 7 | 2911/ 8400 batches | train loss 0.3857455 +| epoch 7 | 2915/ 8400 batches | train loss 0.4063989 +| epoch 7 | 2919/ 8400 batches | train loss 0.3506799 +| epoch 7 | 2923/ 8400 batches | train loss 0.3528084 +| epoch 7 | 2927/ 8400 batches | train loss 0.3854130 +| epoch 7 | 2931/ 8400 batches | train loss 0.3839991 +| epoch 7 | 2935/ 8400 batches | train loss 0.3106859 +| epoch 7 | 2939/ 8400 batches | train loss 0.3929036 +| epoch 7 | 2943/ 8400 batches | train loss 0.3512930 +| epoch 7 | 2947/ 8400 batches | train loss 0.3586901 +| epoch 7 | 2951/ 8400 batches | train loss 0.3731237 +| epoch 7 | 2955/ 8400 batches | train loss 0.3748056 +| epoch 7 | 2959/ 8400 batches | train loss 0.2628570 +| epoch 7 | 2963/ 8400 batches | train loss 0.4374730 +| epoch 7 | 2967/ 8400 batches | train loss 0.3708226 +| epoch 7 | 2971/ 8400 batches | train loss 0.3677242 +| epoch 7 | 2975/ 8400 batches | train loss 0.3460144 +| epoch 7 | 2979/ 8400 batches | train loss 0.2916043 +| epoch 7 | 2983/ 8400 batches | train loss 0.4494383 +| epoch 7 | 2987/ 8400 batches | train loss 0.2804555 +| epoch 7 | 2991/ 8400 batches | train loss 0.3968658 +| epoch 7 | 2995/ 8400 batches | train loss 0.3144727 +| epoch 7 | 2999/ 8400 batches | train loss 0.3974222 +| epoch 7 | 3003/ 8400 batches | train loss 0.3428578 +| epoch 7 | 3007/ 8400 batches | train loss 0.3787681 +| epoch 7 | 3011/ 8400 batches | train loss 0.4371817 +| epoch 7 | 3015/ 8400 batches | train loss 0.3294037 +| epoch 7 | 3019/ 8400 batches | train loss 0.3147006 +| epoch 7 | 3023/ 8400 batches | train loss 0.3200253 +| epoch 7 | 3027/ 8400 batches | train loss 0.4059229 +| epoch 7 | 3031/ 8400 batches | train loss 0.3861223 +| epoch 7 | 3035/ 8400 batches | train loss 0.4111115 +| epoch 7 | 3039/ 8400 batches | train loss 0.3503958 +| epoch 7 | 3043/ 8400 batches | train loss 0.4219700 +| epoch 7 | 3047/ 8400 batches | train loss 0.3641474 +| epoch 7 | 3051/ 8400 batches | train loss 0.3709912 +| epoch 7 | 3055/ 8400 batches | train loss 0.3566084 +| epoch 7 | 3059/ 8400 batches | train loss 0.3379107 +| epoch 7 | 3063/ 8400 batches | train loss 0.3970514 +| epoch 7 | 3067/ 8400 batches | train loss 0.4265195 +| epoch 7 | 3071/ 8400 batches | train loss 0.4196430 +| epoch 7 | 3075/ 8400 batches | train loss 0.4000504 +| epoch 7 | 3079/ 8400 batches | train loss 0.3468746 +| epoch 7 | 3083/ 8400 batches | train loss 0.4055218 +| epoch 7 | 3087/ 8400 batches | train loss 0.3465804 +| epoch 7 | 3091/ 8400 batches | train loss 0.3397581 +| epoch 7 | 3095/ 8400 batches | train loss 0.3120397 +| epoch 7 | 3099/ 8400 batches | train loss 0.3750666 +| epoch 7 | 3103/ 8400 batches | train loss 0.3925549 +| epoch 7 | 3107/ 8400 batches | train loss 0.3951543 +| epoch 7 | 3111/ 8400 batches | train loss 0.3320190 +| epoch 7 | 3115/ 8400 batches | train loss 0.3798807 +| epoch 7 | 3119/ 8400 batches | train loss 0.3344960 +| epoch 7 | 3123/ 8400 batches | train loss 0.2883694 +| epoch 7 | 3127/ 8400 batches | train loss 0.3722491 +| epoch 7 | 3131/ 8400 batches | train loss 0.4914790 +| epoch 7 | 3135/ 8400 batches | train loss 0.3309758 +| epoch 7 | 3139/ 8400 batches | train loss 0.3053895 +| epoch 7 | 3143/ 8400 batches | train loss 0.4180728 +| epoch 7 | 3147/ 8400 batches | train loss 0.3628867 +| epoch 7 | 3151/ 8400 batches | train loss 0.3612348 +| epoch 7 | 3155/ 8400 batches | train loss 0.4999008 +| epoch 7 | 3159/ 8400 batches | train loss 0.3474655 +| epoch 7 | 3163/ 8400 batches | train loss 0.3356502 +| epoch 7 | 3167/ 8400 batches | train loss 0.4866174 +| epoch 7 | 3171/ 8400 batches | train loss 0.3494883 +| epoch 7 | 3175/ 8400 batches | train loss 0.3026074 +| epoch 7 | 3179/ 8400 batches | train loss 0.3324591 +| epoch 7 | 3183/ 8400 batches | train loss 0.3936260 +| epoch 7 | 3187/ 8400 batches | train loss 0.3911172 +| epoch 7 | 3191/ 8400 batches | train loss 0.4505066 +| epoch 7 | 3195/ 8400 batches | train loss 0.3766469 +| epoch 7 | 3199/ 8400 batches | train loss 0.3749694 +| epoch 7 | 3203/ 8400 batches | train loss 0.3544538 +| epoch 7 | 3207/ 8400 batches | train loss 0.4014618 +| epoch 7 | 3211/ 8400 batches | train loss 0.4239102 +| epoch 7 | 3215/ 8400 batches | train loss 0.4063498 +| epoch 7 | 3219/ 8400 batches | train loss 0.3452515 +| epoch 7 | 3223/ 8400 batches | train loss 0.4033882 +| epoch 7 | 3227/ 8400 batches | train loss 0.3616399 +| epoch 7 | 3231/ 8400 batches | train loss 0.4424242 +| epoch 7 | 3235/ 8400 batches | train loss 0.4119664 +| epoch 7 | 3239/ 8400 batches | train loss 0.3368405 +| epoch 7 | 3243/ 8400 batches | train loss 0.3007878 +| epoch 7 | 3247/ 8400 batches | train loss 0.3469221 +| epoch 7 | 3251/ 8400 batches | train loss 0.3535995 +| epoch 7 | 3255/ 8400 batches | train loss 0.3213000 +| epoch 7 | 3259/ 8400 batches | train loss 0.4029982 +| epoch 7 | 3263/ 8400 batches | train loss 0.3996294 +| epoch 7 | 3267/ 8400 batches | train loss 0.3694821 +| epoch 7 | 3271/ 8400 batches | train loss 0.3389325 +| epoch 7 | 3275/ 8400 batches | train loss 0.3546554 +| epoch 7 | 3279/ 8400 batches | train loss 0.4031258 +| epoch 7 | 3283/ 8400 batches | train loss 0.3374859 +| epoch 7 | 3287/ 8400 batches | train loss 0.2999986 +| epoch 7 | 3291/ 8400 batches | train loss 0.4371104 +| epoch 7 | 3295/ 8400 batches | train loss 0.3901351 +| epoch 7 | 3299/ 8400 batches | train loss 0.3150666 +| epoch 7 | 3303/ 8400 batches | train loss 0.3639255 +| epoch 7 | 3307/ 8400 batches | train loss 0.4156565 +| epoch 7 | 3311/ 8400 batches | train loss 0.3782173 +| epoch 7 | 3315/ 8400 batches | train loss 0.3465934 +| epoch 7 | 3319/ 8400 batches | train loss 0.3280522 +| epoch 7 | 3323/ 8400 batches | train loss 0.3104296 +| epoch 7 | 3327/ 8400 batches | train loss 0.3980976 +| epoch 7 | 3331/ 8400 batches | train loss 0.3755190 +| epoch 7 | 3335/ 8400 batches | train loss 0.4180720 +| epoch 7 | 3339/ 8400 batches | train loss 0.3846848 +| epoch 7 | 3343/ 8400 batches | train loss 0.3912697 +| epoch 7 | 3347/ 8400 batches | train loss 0.3737181 +| epoch 7 | 3351/ 8400 batches | train loss 0.3786615 +| epoch 7 | 3355/ 8400 batches | train loss 0.4176024 +| epoch 7 | 3359/ 8400 batches | train loss 0.3874761 +| epoch 7 | 3363/ 8400 batches | train loss 0.3315607 +| epoch 7 | 3367/ 8400 batches | train loss 0.3428468 +| epoch 7 | 3371/ 8400 batches | train loss 0.3604695 +| epoch 7 | 3375/ 8400 batches | train loss 0.3795128 +| epoch 7 | 3379/ 8400 batches | train loss 0.3207570 +| epoch 7 | 3383/ 8400 batches | train loss 0.4590383 +| epoch 7 | 3387/ 8400 batches | train loss 0.3260267 +| epoch 7 | 3391/ 8400 batches | train loss 0.3862827 +| epoch 7 | 3395/ 8400 batches | train loss 0.3497484 +| epoch 7 | 3399/ 8400 batches | train loss 0.3095250 +| epoch 7 | 3403/ 8400 batches | train loss 0.4578776 +| epoch 7 | 3407/ 8400 batches | train loss 0.3433179 +| epoch 7 | 3411/ 8400 batches | train loss 0.3604398 +| epoch 7 | 3415/ 8400 batches | train loss 0.3618447 +| epoch 7 | 3419/ 8400 batches | train loss 0.3690509 +| epoch 7 | 3423/ 8400 batches | train loss 0.3647966 +| epoch 7 | 3427/ 8400 batches | train loss 0.3705102 +| epoch 7 | 3431/ 8400 batches | train loss 0.3766901 +| epoch 7 | 3435/ 8400 batches | train loss 0.3664418 +| epoch 7 | 3439/ 8400 batches | train loss 0.3639174 +| epoch 7 | 3443/ 8400 batches | train loss 0.3495995 +| epoch 7 | 3447/ 8400 batches | train loss 0.3567905 +| epoch 7 | 3451/ 8400 batches | train loss 0.3954217 +| epoch 7 | 3455/ 8400 batches | train loss 0.3689738 +| epoch 7 | 3459/ 8400 batches | train loss 0.4390567 +| epoch 7 | 3463/ 8400 batches | train loss 0.3425096 +| epoch 7 | 3467/ 8400 batches | train loss 0.4100068 +| epoch 7 | 3471/ 8400 batches | train loss 0.3669820 +| epoch 7 | 3475/ 8400 batches | train loss 0.3250146 +| epoch 7 | 3479/ 8400 batches | train loss 0.2824445 +| epoch 7 | 3483/ 8400 batches | train loss 0.4095504 +| epoch 7 | 3487/ 8400 batches | train loss 0.4469598 +| epoch 7 | 3491/ 8400 batches | train loss 0.3887893 +| epoch 7 | 3495/ 8400 batches | train loss 0.3773934 +| epoch 7 | 3499/ 8400 batches | train loss 0.3212700 +| epoch 7 | 3503/ 8400 batches | train loss 0.3934984 +| epoch 7 | 3507/ 8400 batches | train loss 0.3870941 +| epoch 7 | 3511/ 8400 batches | train loss 0.4429207 +| epoch 7 | 3515/ 8400 batches | train loss 0.3902922 +| epoch 7 | 3519/ 8400 batches | train loss 0.3661858 +| epoch 7 | 3523/ 8400 batches | train loss 0.2897616 +| epoch 7 | 3527/ 8400 batches | train loss 0.3523855 +| epoch 7 | 3531/ 8400 batches | train loss 0.3881441 +| epoch 7 | 3535/ 8400 batches | train loss 0.3853266 +| epoch 7 | 3539/ 8400 batches | train loss 0.3282725 +| epoch 7 | 3543/ 8400 batches | train loss 0.3095640 +| epoch 7 | 3547/ 8400 batches | train loss 0.3593035 +| epoch 7 | 3551/ 8400 batches | train loss 0.3233159 +| epoch 7 | 3555/ 8400 batches | train loss 0.3611436 +| epoch 7 | 3559/ 8400 batches | train loss 0.3259795 +| epoch 7 | 3563/ 8400 batches | train loss 0.2591962 +| epoch 7 | 3567/ 8400 batches | train loss 0.3696675 +| epoch 7 | 3571/ 8400 batches | train loss 0.3809288 +| epoch 7 | 3575/ 8400 batches | train loss 0.3903064 +| epoch 7 | 3579/ 8400 batches | train loss 0.3546224 +| epoch 7 | 3583/ 8400 batches | train loss 0.3967384 +| epoch 7 | 3587/ 8400 batches | train loss 0.4158379 +| epoch 7 | 3591/ 8400 batches | train loss 0.3546302 +| epoch 7 | 3595/ 8400 batches | train loss 0.3055930 +| epoch 7 | 3599/ 8400 batches | train loss 0.3915982 +| epoch 7 | 3603/ 8400 batches | train loss 0.4339770 +| epoch 7 | 3607/ 8400 batches | train loss 0.3151324 +| epoch 7 | 3611/ 8400 batches | train loss 0.3223796 +| epoch 7 | 3615/ 8400 batches | train loss 0.4044902 +| epoch 7 | 3619/ 8400 batches | train loss 0.4624196 +| epoch 7 | 3623/ 8400 batches | train loss 0.3805681 +| epoch 7 | 3627/ 8400 batches | train loss 0.1395746 +| epoch 7 | 3631/ 8400 batches | train loss 0.3400963 +| epoch 7 | 3635/ 8400 batches | train loss 0.4272600 +| epoch 7 | 3639/ 8400 batches | train loss 0.4110940 +| epoch 7 | 3643/ 8400 batches | train loss 0.3889562 +| epoch 7 | 3647/ 8400 batches | train loss 0.3808753 +| epoch 7 | 3651/ 8400 batches | train loss 0.3174052 +| epoch 7 | 3655/ 8400 batches | train loss 0.4366118 +| epoch 7 | 3659/ 8400 batches | train loss 0.3546056 +| epoch 7 | 3663/ 8400 batches | train loss 0.3830959 +| epoch 7 | 3667/ 8400 batches | train loss 0.4018877 +| epoch 7 | 3671/ 8400 batches | train loss 0.3748211 +| epoch 7 | 3675/ 8400 batches | train loss 0.3129153 +| epoch 7 | 3679/ 8400 batches | train loss 0.3284124 +| epoch 7 | 3683/ 8400 batches | train loss 0.2381857 +| epoch 7 | 3687/ 8400 batches | train loss 0.4492280 +| epoch 7 | 3691/ 8400 batches | train loss 0.3211561 +| epoch 7 | 3695/ 8400 batches | train loss 0.4184542 +| epoch 7 | 3699/ 8400 batches | train loss 0.3739093 +| epoch 7 | 3703/ 8400 batches | train loss 0.3711140 +| epoch 7 | 3707/ 8400 batches | train loss 0.4388147 +| epoch 7 | 3711/ 8400 batches | train loss 0.4101197 +| epoch 7 | 3715/ 8400 batches | train loss 0.3392509 +| epoch 7 | 3719/ 8400 batches | train loss 0.3668056 +| epoch 7 | 3723/ 8400 batches | train loss 0.3688663 +| epoch 7 | 3727/ 8400 batches | train loss 0.3775574 +| epoch 7 | 3731/ 8400 batches | train loss 0.3202308 +| epoch 7 | 3735/ 8400 batches | train loss 0.3092885 +| epoch 7 | 3739/ 8400 batches | train loss 0.3230775 +| epoch 7 | 3743/ 8400 batches | train loss 0.3691753 +| epoch 7 | 3747/ 8400 batches | train loss 0.4132369 +| epoch 7 | 3751/ 8400 batches | train loss 0.3332826 +| epoch 7 | 3755/ 8400 batches | train loss 0.3283257 +| epoch 7 | 3759/ 8400 batches | train loss 0.4262814 +| epoch 7 | 3763/ 8400 batches | train loss 0.3712839 +| epoch 7 | 3767/ 8400 batches | train loss 0.3972043 +| epoch 7 | 3771/ 8400 batches | train loss 0.3293045 +| epoch 7 | 3775/ 8400 batches | train loss 0.3853440 +| epoch 7 | 3779/ 8400 batches | train loss 0.3135733 +| epoch 7 | 3783/ 8400 batches | train loss 0.3604382 +| epoch 7 | 3787/ 8400 batches | train loss 0.3757620 +| epoch 7 | 3791/ 8400 batches | train loss 0.3357710 +| epoch 7 | 3795/ 8400 batches | train loss 0.4577911 +| epoch 7 | 3799/ 8400 batches | train loss 0.3814111 +| epoch 7 | 3803/ 8400 batches | train loss 0.3789343 +| epoch 7 | 3807/ 8400 batches | train loss 0.3854403 +| epoch 7 | 3811/ 8400 batches | train loss 0.3238078 +| epoch 7 | 3815/ 8400 batches | train loss 0.4110836 +| epoch 7 | 3819/ 8400 batches | train loss 0.3430379 +| epoch 7 | 3823/ 8400 batches | train loss 0.4004831 +| epoch 7 | 3827/ 8400 batches | train loss 0.3067704 +| epoch 7 | 3831/ 8400 batches | train loss 0.3370595 +| epoch 7 | 3835/ 8400 batches | train loss 0.2296160 +| epoch 7 | 3839/ 8400 batches | train loss 0.4269728 +| epoch 7 | 3843/ 8400 batches | train loss 0.3984031 +| epoch 7 | 3847/ 8400 batches | train loss 0.3652957 +| epoch 7 | 3851/ 8400 batches | train loss 0.3895361 +| epoch 7 | 3855/ 8400 batches | train loss 0.3411118 +| epoch 7 | 3859/ 8400 batches | train loss 0.3574052 +| epoch 7 | 3863/ 8400 batches | train loss 0.4125229 +| epoch 7 | 3867/ 8400 batches | train loss 0.4189794 +| epoch 7 | 3871/ 8400 batches | train loss 0.3954921 +| epoch 7 | 3875/ 8400 batches | train loss 0.3547125 +| epoch 7 | 3879/ 8400 batches | train loss 0.3780645 +| epoch 7 | 3883/ 8400 batches | train loss 0.3643477 +| epoch 7 | 3887/ 8400 batches | train loss 0.3634545 +| epoch 7 | 3891/ 8400 batches | train loss 0.3796531 +| epoch 7 | 3895/ 8400 batches | train loss 0.3894432 +| epoch 7 | 3899/ 8400 batches | train loss 0.3696640 +| epoch 7 | 3903/ 8400 batches | train loss 0.3308274 +| epoch 7 | 3907/ 8400 batches | train loss 0.3576360 +| epoch 7 | 3911/ 8400 batches | train loss 0.3301851 +| epoch 7 | 3915/ 8400 batches | train loss 0.4250327 +| epoch 7 | 3919/ 8400 batches | train loss 0.3300442 +| epoch 7 | 3923/ 8400 batches | train loss 0.3404611 +| epoch 7 | 3927/ 8400 batches | train loss 0.3385434 +| epoch 7 | 3931/ 8400 batches | train loss 0.3606890 +| epoch 7 | 3935/ 8400 batches | train loss 0.3386621 +| epoch 7 | 3939/ 8400 batches | train loss 0.4064943 +| epoch 7 | 3943/ 8400 batches | train loss 0.4150862 +| epoch 7 | 3947/ 8400 batches | train loss 0.3461470 +| epoch 7 | 3951/ 8400 batches | train loss 0.3701302 +| epoch 7 | 3955/ 8400 batches | train loss 0.3800519 +| epoch 7 | 3959/ 8400 batches | train loss 0.3425328 +| epoch 7 | 3963/ 8400 batches | train loss 0.3792230 +| epoch 7 | 3967/ 8400 batches | train loss 0.4008689 +| epoch 7 | 3971/ 8400 batches | train loss 0.3740172 +| epoch 7 | 3975/ 8400 batches | train loss 0.3845815 +| epoch 7 | 3979/ 8400 batches | train loss 0.3764349 +| epoch 7 | 3983/ 8400 batches | train loss 0.3691896 +| epoch 7 | 3987/ 8400 batches | train loss 0.3723332 +| epoch 7 | 3991/ 8400 batches | train loss 0.3132899 +| epoch 7 | 3995/ 8400 batches | train loss 0.3907754 +| epoch 7 | 3999/ 8400 batches | train loss 0.4295605 +| epoch 7 | 4003/ 8400 batches | train loss 0.3583885 +| epoch 7 | 4007/ 8400 batches | train loss 0.4287169 +| epoch 7 | 4011/ 8400 batches | train loss 0.3313286 +| epoch 7 | 4015/ 8400 batches | train loss 0.3321257 +| epoch 7 | 4019/ 8400 batches | train loss 0.2649742 +| epoch 7 | 4023/ 8400 batches | train loss 0.3955792 +| epoch 7 | 4027/ 8400 batches | train loss 0.3408911 +| epoch 7 | 4031/ 8400 batches | train loss 0.3514550 +| epoch 7 | 4035/ 8400 batches | train loss 0.4048178 +| epoch 7 | 4039/ 8400 batches | train loss 0.3947402 +| epoch 7 | 4043/ 8400 batches | train loss 0.3720968 +| epoch 7 | 4047/ 8400 batches | train loss 0.4474854 +| epoch 7 | 4051/ 8400 batches | train loss 0.3593157 +| epoch 7 | 4055/ 8400 batches | train loss 0.3677145 +| epoch 7 | 4059/ 8400 batches | train loss 0.3540777 +| epoch 7 | 4063/ 8400 batches | train loss 0.3638711 +| epoch 7 | 4067/ 8400 batches | train loss 0.3732710 +| epoch 7 | 4071/ 8400 batches | train loss 0.3595474 +| epoch 7 | 4075/ 8400 batches | train loss 0.3132700 +| epoch 7 | 4079/ 8400 batches | train loss 0.3741856 +| epoch 7 | 4083/ 8400 batches | train loss 0.3453487 +| epoch 7 | 4087/ 8400 batches | train loss 0.4124056 +| epoch 7 | 4091/ 8400 batches | train loss 0.3077272 +| epoch 7 | 4095/ 8400 batches | train loss 0.3477190 +| epoch 7 | 4099/ 8400 batches | train loss 0.3563278 +| epoch 7 | 4103/ 8400 batches | train loss 0.3739734 +| epoch 7 | 4107/ 8400 batches | train loss 0.3494646 +| epoch 7 | 4111/ 8400 batches | train loss 0.4228809 +| epoch 7 | 4115/ 8400 batches | train loss 0.3754596 +| epoch 7 | 4119/ 8400 batches | train loss 0.2726480 +| epoch 7 | 4123/ 8400 batches | train loss 0.3507656 +| epoch 7 | 4127/ 8400 batches | train loss 0.3654241 +| epoch 7 | 4131/ 8400 batches | train loss 0.3734429 +| epoch 7 | 4135/ 8400 batches | train loss 0.3263078 +| epoch 7 | 4139/ 8400 batches | train loss 0.3768339 +| epoch 7 | 4143/ 8400 batches | train loss 0.3422362 +| epoch 7 | 4147/ 8400 batches | train loss 0.3952779 +| epoch 7 | 4151/ 8400 batches | train loss 0.4115443 +| epoch 7 | 4155/ 8400 batches | train loss 0.3540299 +| epoch 7 | 4159/ 8400 batches | train loss 0.3340014 +| epoch 7 | 4163/ 8400 batches | train loss 0.4702821 +| epoch 7 | 4167/ 8400 batches | train loss 0.3595427 +| epoch 7 | 4171/ 8400 batches | train loss 0.4096968 +| epoch 7 | 4175/ 8400 batches | train loss 0.3769119 +| epoch 7 | 4179/ 8400 batches | train loss 0.3744822 +| epoch 7 | 4183/ 8400 batches | train loss 0.3463301 +| epoch 7 | 4187/ 8400 batches | train loss 0.3927958 +| epoch 7 | 4191/ 8400 batches | train loss 0.3844996 +| epoch 7 | 4195/ 8400 batches | train loss 0.3707553 +| epoch 7 | 4199/ 8400 batches | train loss 0.3082966 +| epoch 7 | 4203/ 8400 batches | train loss 0.3818510 +| epoch 7 | 4207/ 8400 batches | train loss 0.4618418 +| epoch 7 | 4211/ 8400 batches | train loss 0.3742452 +| epoch 7 | 4215/ 8400 batches | train loss 0.2928250 +| epoch 7 | 4219/ 8400 batches | train loss 0.5047676 +| epoch 7 | 4223/ 8400 batches | train loss 0.3545366 +| epoch 7 | 4227/ 8400 batches | train loss 0.3620930 +| epoch 7 | 4231/ 8400 batches | train loss 0.4043348 +| epoch 7 | 4235/ 8400 batches | train loss 0.3826578 +| epoch 7 | 4239/ 8400 batches | train loss 0.4602857 +| epoch 7 | 4243/ 8400 batches | train loss 0.4473652 +| epoch 7 | 4247/ 8400 batches | train loss 0.4056710 +| epoch 7 | 4251/ 8400 batches | train loss 0.4979745 +| epoch 7 | 4255/ 8400 batches | train loss 0.3476415 +| epoch 7 | 4259/ 8400 batches | train loss 0.3650227 +| epoch 7 | 4263/ 8400 batches | train loss 0.3171138 +| epoch 7 | 4267/ 8400 batches | train loss 0.4043661 +| epoch 7 | 4271/ 8400 batches | train loss 0.4120030 +| epoch 7 | 4275/ 8400 batches | train loss 0.4176065 +| epoch 7 | 4279/ 8400 batches | train loss 0.3146026 +| epoch 7 | 4283/ 8400 batches | train loss 0.3196243 +| epoch 7 | 4287/ 8400 batches | train loss 0.3276040 +| epoch 7 | 4291/ 8400 batches | train loss 0.4031062 +| epoch 7 | 4295/ 8400 batches | train loss 0.3306359 +| epoch 7 | 4299/ 8400 batches | train loss 0.3345154 +| epoch 7 | 4303/ 8400 batches | train loss 0.3516287 +| epoch 7 | 4307/ 8400 batches | train loss 0.3526214 +| epoch 7 | 4311/ 8400 batches | train loss 0.3496358 +| epoch 7 | 4315/ 8400 batches | train loss 0.3585470 +| epoch 7 | 4319/ 8400 batches | train loss 0.4114717 +| epoch 7 | 4323/ 8400 batches | train loss 0.3193723 +| epoch 7 | 4327/ 8400 batches | train loss 0.4541858 +| epoch 7 | 4331/ 8400 batches | train loss 0.3684557 +| epoch 7 | 4335/ 8400 batches | train loss 0.4234561 +| epoch 7 | 4339/ 8400 batches | train loss 0.3515192 +| epoch 7 | 4343/ 8400 batches | train loss 0.3623235 +| epoch 7 | 4347/ 8400 batches | train loss 0.3468095 +| epoch 7 | 4351/ 8400 batches | train loss 0.2968498 +| epoch 7 | 4355/ 8400 batches | train loss 0.3679237 +| epoch 7 | 4359/ 8400 batches | train loss 0.3443795 +| epoch 7 | 4363/ 8400 batches | train loss 0.3687038 +| epoch 7 | 4367/ 8400 batches | train loss 0.3450521 +| epoch 7 | 4371/ 8400 batches | train loss 0.3836212 +| epoch 7 | 4375/ 8400 batches | train loss 0.3118803 +| epoch 7 | 4379/ 8400 batches | train loss 0.3913211 +| epoch 7 | 4383/ 8400 batches | train loss 0.3755248 +| epoch 7 | 4387/ 8400 batches | train loss 0.3445798 +| epoch 7 | 4391/ 8400 batches | train loss 0.3732607 +| epoch 7 | 4395/ 8400 batches | train loss 0.3441222 +| epoch 7 | 4399/ 8400 batches | train loss 0.3832349 +| epoch 7 | 4403/ 8400 batches | train loss 0.3248122 +| epoch 7 | 4407/ 8400 batches | train loss 0.4475121 +| epoch 7 | 4411/ 8400 batches | train loss 0.4099472 +| epoch 7 | 4415/ 8400 batches | train loss 0.3558904 +| epoch 7 | 4419/ 8400 batches | train loss 0.3300263 +| epoch 7 | 4423/ 8400 batches | train loss 0.3085531 +| epoch 7 | 4427/ 8400 batches | train loss 0.3948656 +| epoch 7 | 4431/ 8400 batches | train loss 0.3773305 +| epoch 7 | 4435/ 8400 batches | train loss 0.3961404 +| epoch 7 | 4439/ 8400 batches | train loss 0.3501677 +| epoch 7 | 4443/ 8400 batches | train loss 0.3436632 +| epoch 7 | 4447/ 8400 batches | train loss 0.3215080 +| epoch 7 | 4451/ 8400 batches | train loss 0.4033052 +| epoch 7 | 4455/ 8400 batches | train loss 0.4135811 +| epoch 7 | 4459/ 8400 batches | train loss 0.4376012 +| epoch 7 | 4463/ 8400 batches | train loss 0.4254472 +| epoch 7 | 4467/ 8400 batches | train loss 0.3914690 +| epoch 7 | 4471/ 8400 batches | train loss 0.3663772 +| epoch 7 | 4475/ 8400 batches | train loss 0.3513701 +| epoch 7 | 4479/ 8400 batches | train loss 0.4238312 +| epoch 7 | 4483/ 8400 batches | train loss 0.3729792 +| epoch 7 | 4487/ 8400 batches | train loss 0.3355356 +| epoch 7 | 4491/ 8400 batches | train loss 0.4354578 +| epoch 7 | 4495/ 8400 batches | train loss 0.3639364 +| epoch 7 | 4499/ 8400 batches | train loss 0.4101974 +| epoch 7 | 4503/ 8400 batches | train loss 0.3680676 +| epoch 7 | 4507/ 8400 batches | train loss 0.4521871 +| epoch 7 | 4511/ 8400 batches | train loss 0.3756556 +| epoch 7 | 4515/ 8400 batches | train loss 0.3782172 +| epoch 7 | 4519/ 8400 batches | train loss 0.4243924 +| epoch 7 | 4523/ 8400 batches | train loss 0.4453432 +| epoch 7 | 4527/ 8400 batches | train loss 0.3504581 +| epoch 7 | 4531/ 8400 batches | train loss 0.3989337 +| epoch 7 | 4535/ 8400 batches | train loss 0.4266635 +| epoch 7 | 4539/ 8400 batches | train loss 0.3968912 +| epoch 7 | 4543/ 8400 batches | train loss 0.3842311 +| epoch 7 | 4547/ 8400 batches | train loss 0.3355868 +| epoch 7 | 4551/ 8400 batches | train loss 0.3645893 +| epoch 7 | 4555/ 8400 batches | train loss 0.3459685 +| epoch 7 | 4559/ 8400 batches | train loss 0.3944247 +| epoch 7 | 4563/ 8400 batches | train loss 0.3772541 +| epoch 7 | 4567/ 8400 batches | train loss 0.3762255 +| epoch 7 | 4571/ 8400 batches | train loss 0.3086483 +| epoch 7 | 4575/ 8400 batches | train loss 0.4194821 +| epoch 7 | 4579/ 8400 batches | train loss 0.3854244 +| epoch 7 | 4583/ 8400 batches | train loss 0.3365054 +| epoch 7 | 4587/ 8400 batches | train loss 0.3636441 +| epoch 7 | 4591/ 8400 batches | train loss 0.3625211 +| epoch 7 | 4595/ 8400 batches | train loss 0.3193561 +| epoch 7 | 4599/ 8400 batches | train loss 0.2783847 +| epoch 7 | 4603/ 8400 batches | train loss 0.3574466 +| epoch 7 | 4607/ 8400 batches | train loss 0.3300660 +| epoch 7 | 4611/ 8400 batches | train loss 0.3807412 +| epoch 7 | 4615/ 8400 batches | train loss 0.3650854 +| epoch 7 | 4619/ 8400 batches | train loss 0.3572792 +| epoch 7 | 4623/ 8400 batches | train loss 0.3535851 +| epoch 7 | 4627/ 8400 batches | train loss 0.3196412 +| epoch 7 | 4631/ 8400 batches | train loss 0.3706422 +| epoch 7 | 4635/ 8400 batches | train loss 0.4177554 +| epoch 7 | 4639/ 8400 batches | train loss 0.4175383 +| epoch 7 | 4643/ 8400 batches | train loss 0.3635582 +| epoch 7 | 4647/ 8400 batches | train loss 0.3558595 +| epoch 7 | 4651/ 8400 batches | train loss 0.3675963 +| epoch 7 | 4655/ 8400 batches | train loss 0.3741870 +| epoch 7 | 4659/ 8400 batches | train loss 0.3618052 +| epoch 7 | 4663/ 8400 batches | train loss 0.3581915 +| epoch 7 | 4667/ 8400 batches | train loss 0.3394447 +| epoch 7 | 4671/ 8400 batches | train loss 0.3234637 +| epoch 7 | 4675/ 8400 batches | train loss 0.3797920 +| epoch 7 | 4679/ 8400 batches | train loss 0.4017655 +| epoch 7 | 4683/ 8400 batches | train loss 0.3504954 +| epoch 7 | 4687/ 8400 batches | train loss 0.4414653 +| epoch 7 | 4691/ 8400 batches | train loss 0.3644807 +| epoch 7 | 4695/ 8400 batches | train loss 0.3867383 +| epoch 7 | 4699/ 8400 batches | train loss 0.3396794 +| epoch 7 | 4703/ 8400 batches | train loss 0.3867590 +| epoch 7 | 4707/ 8400 batches | train loss 0.2445762 +| epoch 7 | 4711/ 8400 batches | train loss 0.3576310 +| epoch 7 | 4715/ 8400 batches | train loss 0.3777770 +| epoch 7 | 4719/ 8400 batches | train loss 0.3727357 +| epoch 7 | 4723/ 8400 batches | train loss 0.3465205 +| epoch 7 | 4727/ 8400 batches | train loss 0.3325230 +| epoch 7 | 4731/ 8400 batches | train loss 0.3516088 +| epoch 7 | 4735/ 8400 batches | train loss 0.3271665 +| epoch 7 | 4739/ 8400 batches | train loss 0.4057549 +| epoch 7 | 4743/ 8400 batches | train loss 0.3752181 +| epoch 7 | 4747/ 8400 batches | train loss 0.3301456 +| epoch 7 | 4751/ 8400 batches | train loss 0.3851197 +| epoch 7 | 4755/ 8400 batches | train loss 0.4013388 +| epoch 7 | 4759/ 8400 batches | train loss 0.3560529 +| epoch 7 | 4763/ 8400 batches | train loss 0.3961873 +| epoch 7 | 4767/ 8400 batches | train loss 0.3544468 +| epoch 7 | 4771/ 8400 batches | train loss 0.4150175 +| epoch 7 | 4775/ 8400 batches | train loss 0.4107626 +| epoch 7 | 4779/ 8400 batches | train loss 0.4013151 +| epoch 7 | 4783/ 8400 batches | train loss 0.3925313 +| epoch 7 | 4787/ 8400 batches | train loss 0.4200391 +| epoch 7 | 4791/ 8400 batches | train loss 0.2917850 +| epoch 7 | 4795/ 8400 batches | train loss 0.4050974 +| epoch 7 | 4799/ 8400 batches | train loss 0.3560810 +| epoch 7 | 4803/ 8400 batches | train loss 0.3622345 +| epoch 7 | 4807/ 8400 batches | train loss 0.3215519 +| epoch 7 | 4811/ 8400 batches | train loss 0.3293092 +| epoch 7 | 4815/ 8400 batches | train loss 0.3598202 +| epoch 7 | 4819/ 8400 batches | train loss 0.3785098 +| epoch 7 | 4823/ 8400 batches | train loss 0.3936411 +| epoch 7 | 4827/ 8400 batches | train loss 0.3801850 +| epoch 7 | 4831/ 8400 batches | train loss 0.2823125 +| epoch 7 | 4835/ 8400 batches | train loss 0.4072210 +| epoch 7 | 4839/ 8400 batches | train loss 0.3383734 +| epoch 7 | 4843/ 8400 batches | train loss 0.3239301 +| epoch 7 | 4847/ 8400 batches | train loss 0.3937103 +| epoch 7 | 4851/ 8400 batches | train loss 0.3843145 +| epoch 7 | 4855/ 8400 batches | train loss 0.3954782 +| epoch 7 | 4859/ 8400 batches | train loss 0.4400609 +| epoch 7 | 4863/ 8400 batches | train loss 0.4328682 +| epoch 7 | 4867/ 8400 batches | train loss 0.3891137 +| epoch 7 | 4871/ 8400 batches | train loss 0.3730142 +| epoch 7 | 4875/ 8400 batches | train loss 0.4115797 +| epoch 7 | 4879/ 8400 batches | train loss 0.3219911 +| epoch 7 | 4883/ 8400 batches | train loss 0.4134232 +| epoch 7 | 4887/ 8400 batches | train loss 0.1436336 +| epoch 7 | 4891/ 8400 batches | train loss 0.3465431 +| epoch 7 | 4895/ 8400 batches | train loss 0.3850759 +| epoch 7 | 4899/ 8400 batches | train loss 0.3706433 +| epoch 7 | 4903/ 8400 batches | train loss 0.3530430 +| epoch 7 | 4907/ 8400 batches | train loss 0.3778355 +| epoch 7 | 4911/ 8400 batches | train loss 0.3639757 +| epoch 7 | 4915/ 8400 batches | train loss 0.3629481 +| epoch 7 | 4919/ 8400 batches | train loss 0.3255031 +| epoch 7 | 4923/ 8400 batches | train loss 0.4017407 +| epoch 7 | 4927/ 8400 batches | train loss 0.3559113 +| epoch 7 | 4931/ 8400 batches | train loss 0.3625065 +| epoch 7 | 4935/ 8400 batches | train loss 0.3725688 +| epoch 7 | 4939/ 8400 batches | train loss 0.3686706 +| epoch 7 | 4943/ 8400 batches | train loss 0.2920950 +| epoch 7 | 4947/ 8400 batches | train loss 0.3072824 +| epoch 7 | 4951/ 8400 batches | train loss 0.3663764 +| epoch 7 | 4955/ 8400 batches | train loss 0.4278910 +| epoch 7 | 4959/ 8400 batches | train loss 0.3067572 +| epoch 7 | 4963/ 8400 batches | train loss 0.2453802 +| epoch 7 | 4967/ 8400 batches | train loss 0.3231643 +| epoch 7 | 4971/ 8400 batches | train loss 0.3604895 +| epoch 7 | 4975/ 8400 batches | train loss 0.3406104 +| epoch 7 | 4979/ 8400 batches | train loss 0.4442177 +| epoch 7 | 4983/ 8400 batches | train loss 0.3925995 +| epoch 7 | 4987/ 8400 batches | train loss 0.3430905 +| epoch 7 | 4991/ 8400 batches | train loss 0.3894207 +| epoch 7 | 4995/ 8400 batches | train loss 0.4164938 +| epoch 7 | 4999/ 8400 batches | train loss 0.4007069 +| epoch 7 | 5003/ 8400 batches | train loss 0.3559128 +| epoch 7 | 5007/ 8400 batches | train loss 0.3971858 +| epoch 7 | 5011/ 8400 batches | train loss 0.3117155 +| epoch 7 | 5015/ 8400 batches | train loss 0.3479157 +| epoch 7 | 5019/ 8400 batches | train loss 0.3011602 +| epoch 7 | 5023/ 8400 batches | train loss 0.3611575 +| epoch 7 | 5027/ 8400 batches | train loss 0.2808543 +| epoch 7 | 5031/ 8400 batches | train loss 0.4064669 +| epoch 7 | 5035/ 8400 batches | train loss 0.3378555 +| epoch 7 | 5039/ 8400 batches | train loss 0.4012972 +| epoch 7 | 5043/ 8400 batches | train loss 0.3561079 +| epoch 7 | 5047/ 8400 batches | train loss 0.3867130 +| epoch 7 | 5051/ 8400 batches | train loss 0.3503733 +| epoch 7 | 5055/ 8400 batches | train loss 0.4187050 +| epoch 7 | 5059/ 8400 batches | train loss 0.3532653 +| epoch 7 | 5063/ 8400 batches | train loss 0.3684389 +| epoch 7 | 5067/ 8400 batches | train loss 0.3930569 +| epoch 7 | 5071/ 8400 batches | train loss 0.3340943 +| epoch 7 | 5075/ 8400 batches | train loss 0.3882718 +| epoch 7 | 5079/ 8400 batches | train loss 0.3580830 +| epoch 7 | 5083/ 8400 batches | train loss 0.2879859 +| epoch 7 | 5087/ 8400 batches | train loss 0.4160762 +| epoch 7 | 5091/ 8400 batches | train loss 0.2378717 +| epoch 7 | 5095/ 8400 batches | train loss 0.4195179 +| epoch 7 | 5099/ 8400 batches | train loss 0.3932778 +| epoch 7 | 5103/ 8400 batches | train loss 0.3982177 +| epoch 7 | 5107/ 8400 batches | train loss 0.4038356 +| epoch 7 | 5111/ 8400 batches | train loss 0.3158492 +| epoch 7 | 5115/ 8400 batches | train loss 0.2883329 +| epoch 7 | 5119/ 8400 batches | train loss 0.3404693 +| epoch 7 | 5123/ 8400 batches | train loss 0.4052280 +| epoch 7 | 5127/ 8400 batches | train loss 0.3333428 +| epoch 7 | 5131/ 8400 batches | train loss 0.3612211 +| epoch 7 | 5135/ 8400 batches | train loss 0.4009400 +| epoch 7 | 5139/ 8400 batches | train loss 0.3742531 +| epoch 7 | 5143/ 8400 batches | train loss 0.4247929 +| epoch 7 | 5147/ 8400 batches | train loss 0.3941508 +| epoch 7 | 5151/ 8400 batches | train loss 0.3205319 +| epoch 7 | 5155/ 8400 batches | train loss 0.3959174 +| epoch 7 | 5159/ 8400 batches | train loss 0.2503712 +| epoch 7 | 5163/ 8400 batches | train loss 0.3731068 +| epoch 7 | 5167/ 8400 batches | train loss 0.3472609 +| epoch 7 | 5171/ 8400 batches | train loss 0.3381998 +| epoch 7 | 5175/ 8400 batches | train loss 0.3981397 +| epoch 7 | 5179/ 8400 batches | train loss 0.4613239 +| epoch 7 | 5183/ 8400 batches | train loss 0.2991954 +| epoch 7 | 5187/ 8400 batches | train loss 0.3477890 +| epoch 7 | 5191/ 8400 batches | train loss 0.3310954 +| epoch 7 | 5195/ 8400 batches | train loss 0.4483105 +| epoch 7 | 5199/ 8400 batches | train loss 0.3618509 +| epoch 7 | 5203/ 8400 batches | train loss 0.4002317 +| epoch 7 | 5207/ 8400 batches | train loss 0.3837058 +| epoch 7 | 5211/ 8400 batches | train loss 0.3977330 +| epoch 7 | 5215/ 8400 batches | train loss 0.3871609 +| epoch 7 | 5219/ 8400 batches | train loss 0.3371221 +| epoch 7 | 5223/ 8400 batches | train loss 0.2521812 +| epoch 7 | 5227/ 8400 batches | train loss 0.3436661 +| epoch 7 | 5231/ 8400 batches | train loss 0.4740128 +| epoch 7 | 5235/ 8400 batches | train loss 0.3433068 +| epoch 7 | 5239/ 8400 batches | train loss 0.3670574 +| epoch 7 | 5243/ 8400 batches | train loss 0.3449140 +| epoch 7 | 5247/ 8400 batches | train loss 0.3675702 +| epoch 7 | 5251/ 8400 batches | train loss 0.3763861 +| epoch 7 | 5255/ 8400 batches | train loss 0.3051677 +| epoch 7 | 5259/ 8400 batches | train loss 0.3923958 +| epoch 7 | 5263/ 8400 batches | train loss 0.3891063 +| epoch 7 | 5267/ 8400 batches | train loss 0.4516889 +| epoch 7 | 5271/ 8400 batches | train loss 0.3320301 +| epoch 7 | 5275/ 8400 batches | train loss 0.4290218 +| epoch 7 | 5279/ 8400 batches | train loss 0.4153566 +| epoch 7 | 5283/ 8400 batches | train loss 0.3556352 +| epoch 7 | 5287/ 8400 batches | train loss 0.3584837 +| epoch 7 | 5291/ 8400 batches | train loss 0.4537754 +| epoch 7 | 5295/ 8400 batches | train loss 0.3897755 +| epoch 7 | 5299/ 8400 batches | train loss 0.3361891 +| epoch 7 | 5303/ 8400 batches | train loss 0.3297086 +| epoch 7 | 5307/ 8400 batches | train loss 0.4052683 +| epoch 7 | 5311/ 8400 batches | train loss 0.3290337 +| epoch 7 | 5315/ 8400 batches | train loss 0.3169882 +| epoch 7 | 5319/ 8400 batches | train loss 0.3188680 +| epoch 7 | 5323/ 8400 batches | train loss 0.3674935 +| epoch 7 | 5327/ 8400 batches | train loss 0.4772164 +| epoch 7 | 5331/ 8400 batches | train loss 0.3275908 +| epoch 7 | 5335/ 8400 batches | train loss 0.3496247 +| epoch 7 | 5339/ 8400 batches | train loss 0.3732516 +| epoch 7 | 5343/ 8400 batches | train loss 0.4445101 +| epoch 7 | 5347/ 8400 batches | train loss 0.3196965 +| epoch 7 | 5351/ 8400 batches | train loss 0.3217337 +| epoch 7 | 5355/ 8400 batches | train loss 0.3600296 +| epoch 7 | 5359/ 8400 batches | train loss 0.3932568 +| epoch 7 | 5363/ 8400 batches | train loss 0.3520237 +| epoch 7 | 5367/ 8400 batches | train loss 0.4167225 +| epoch 7 | 5371/ 8400 batches | train loss 0.3335617 +| epoch 7 | 5375/ 8400 batches | train loss 0.3224353 +| epoch 7 | 5379/ 8400 batches | train loss 0.4096206 +| epoch 7 | 5383/ 8400 batches | train loss 0.3776987 +| epoch 7 | 5387/ 8400 batches | train loss 0.3469441 +| epoch 7 | 5391/ 8400 batches | train loss 0.3252398 +| epoch 7 | 5395/ 8400 batches | train loss 0.4118382 +| epoch 7 | 5399/ 8400 batches | train loss 0.3843674 +| epoch 7 | 5403/ 8400 batches | train loss 0.3531005 +| epoch 7 | 5407/ 8400 batches | train loss 0.3915294 +| epoch 7 | 5411/ 8400 batches | train loss 0.3525066 +| epoch 7 | 5415/ 8400 batches | train loss 0.3023356 +| epoch 7 | 5419/ 8400 batches | train loss 0.4096370 +| epoch 7 | 5423/ 8400 batches | train loss 0.3171104 +| epoch 7 | 5427/ 8400 batches | train loss 0.3576036 +| epoch 7 | 5431/ 8400 batches | train loss 0.4461436 +| epoch 7 | 5435/ 8400 batches | train loss 0.3599136 +| epoch 7 | 5439/ 8400 batches | train loss 0.3740783 +| epoch 7 | 5443/ 8400 batches | train loss 0.3185514 +| epoch 7 | 5447/ 8400 batches | train loss 0.3278794 +| epoch 7 | 5451/ 8400 batches | train loss 0.2814307 +| epoch 7 | 5455/ 8400 batches | train loss 0.3471943 +| epoch 7 | 5459/ 8400 batches | train loss 0.3710322 +| epoch 7 | 5463/ 8400 batches | train loss 0.3694524 +| epoch 7 | 5467/ 8400 batches | train loss 0.3716926 +| epoch 7 | 5471/ 8400 batches | train loss 0.3528229 +| epoch 7 | 5475/ 8400 batches | train loss 0.4141098 +| epoch 7 | 5479/ 8400 batches | train loss 0.3481849 +| epoch 7 | 5483/ 8400 batches | train loss 0.3946906 +| epoch 7 | 5487/ 8400 batches | train loss 0.2722191 +| epoch 7 | 5491/ 8400 batches | train loss 0.4021663 +| epoch 7 | 5495/ 8400 batches | train loss 0.4067569 +| epoch 7 | 5499/ 8400 batches | train loss 0.4652906 +| epoch 7 | 5503/ 8400 batches | train loss 0.3436178 +| epoch 7 | 5507/ 8400 batches | train loss 0.3840244 +| epoch 7 | 5511/ 8400 batches | train loss 0.3220424 +| epoch 7 | 5515/ 8400 batches | train loss 0.3887176 +| epoch 7 | 5519/ 8400 batches | train loss 0.2617145 +| epoch 7 | 5523/ 8400 batches | train loss 0.3583688 +| epoch 7 | 5527/ 8400 batches | train loss 0.3808171 +| epoch 7 | 5531/ 8400 batches | train loss 0.3788736 +| epoch 7 | 5535/ 8400 batches | train loss 0.3820984 +| epoch 7 | 5539/ 8400 batches | train loss 0.3250600 +| epoch 7 | 5543/ 8400 batches | train loss 0.3882725 +| epoch 7 | 5547/ 8400 batches | train loss 0.3185170 +| epoch 7 | 5551/ 8400 batches | train loss 0.4661142 +| epoch 7 | 5555/ 8400 batches | train loss 0.3310973 +| epoch 7 | 5559/ 8400 batches | train loss 0.3799348 +| epoch 7 | 5563/ 8400 batches | train loss 0.3816195 +| epoch 7 | 5567/ 8400 batches | train loss 0.3089811 +| epoch 7 | 5571/ 8400 batches | train loss 0.3530513 +| epoch 7 | 5575/ 8400 batches | train loss 0.4029399 +| epoch 7 | 5579/ 8400 batches | train loss 0.3957076 +| epoch 7 | 5583/ 8400 batches | train loss 0.4287684 +| epoch 7 | 5587/ 8400 batches | train loss 0.3313652 +| epoch 7 | 5591/ 8400 batches | train loss 0.3749386 +| epoch 7 | 5595/ 8400 batches | train loss 0.4182834 +| epoch 7 | 5599/ 8400 batches | train loss 0.3964034 +| epoch 7 | 5603/ 8400 batches | train loss 0.3721300 +| epoch 7 | 5607/ 8400 batches | train loss 0.3815172 +| epoch 7 | 5611/ 8400 batches | train loss 0.3699142 +| epoch 7 | 5615/ 8400 batches | train loss 0.2677743 +| epoch 7 | 5619/ 8400 batches | train loss 0.2812369 +| epoch 7 | 5623/ 8400 batches | train loss 0.3240985 +| epoch 7 | 5627/ 8400 batches | train loss 0.2929715 +| epoch 7 | 5631/ 8400 batches | train loss 0.2333796 +| epoch 7 | 5635/ 8400 batches | train loss 0.3575098 +| epoch 7 | 5639/ 8400 batches | train loss 0.3744435 +| epoch 7 | 5643/ 8400 batches | train loss 0.4311725 +| epoch 7 | 5647/ 8400 batches | train loss 0.3431737 +| epoch 7 | 5651/ 8400 batches | train loss 0.3837126 +| epoch 7 | 5655/ 8400 batches | train loss 0.3472003 +| epoch 7 | 5659/ 8400 batches | train loss 0.4132214 +| epoch 7 | 5663/ 8400 batches | train loss 0.4356225 +| epoch 7 | 5667/ 8400 batches | train loss 0.3244077 +| epoch 7 | 5671/ 8400 batches | train loss 0.3283703 +| epoch 7 | 5675/ 8400 batches | train loss 0.3167964 +| epoch 7 | 5679/ 8400 batches | train loss 0.2879463 +| epoch 7 | 5683/ 8400 batches | train loss 0.3642745 +| epoch 7 | 5687/ 8400 batches | train loss 0.3960261 +| epoch 7 | 5691/ 8400 batches | train loss 0.3849957 +| epoch 7 | 5695/ 8400 batches | train loss 0.3715733 +| epoch 7 | 5699/ 8400 batches | train loss 0.4512401 +| epoch 7 | 5703/ 8400 batches | train loss 0.4741687 +| epoch 7 | 5707/ 8400 batches | train loss 0.3757057 +| epoch 7 | 5711/ 8400 batches | train loss 0.3496222 +| epoch 7 | 5715/ 8400 batches | train loss 0.3115466 +| epoch 7 | 5719/ 8400 batches | train loss 0.3616128 +| epoch 7 | 5723/ 8400 batches | train loss 0.3231066 +| epoch 7 | 5727/ 8400 batches | train loss 0.3924403 +| epoch 7 | 5731/ 8400 batches | train loss 0.3548973 +| epoch 7 | 5735/ 8400 batches | train loss 0.3848111 +| epoch 7 | 5739/ 8400 batches | train loss 0.3470120 +| epoch 7 | 5743/ 8400 batches | train loss 0.3623000 +| epoch 7 | 5747/ 8400 batches | train loss 0.3363425 +| epoch 7 | 5751/ 8400 batches | train loss 0.3699961 +| epoch 7 | 5755/ 8400 batches | train loss 0.4451114 +| epoch 7 | 5759/ 8400 batches | train loss 0.3289978 +| epoch 7 | 5763/ 8400 batches | train loss 0.3861751 +| epoch 7 | 5767/ 8400 batches | train loss 0.2985987 +| epoch 7 | 5771/ 8400 batches | train loss 0.3524245 +| epoch 7 | 5775/ 8400 batches | train loss 0.4432434 +| epoch 7 | 5779/ 8400 batches | train loss 0.3469804 +| epoch 7 | 5783/ 8400 batches | train loss 0.3416172 +| epoch 7 | 5787/ 8400 batches | train loss 0.4159256 +| epoch 7 | 5791/ 8400 batches | train loss 0.3247632 +| epoch 7 | 5795/ 8400 batches | train loss 0.3809550 +| epoch 7 | 5799/ 8400 batches | train loss 0.3340322 +| epoch 7 | 5803/ 8400 batches | train loss 0.3760704 +| epoch 7 | 5807/ 8400 batches | train loss 0.3702233 +| epoch 7 | 5811/ 8400 batches | train loss 0.2379864 +| epoch 7 | 5815/ 8400 batches | train loss 0.3787916 +| epoch 7 | 5819/ 8400 batches | train loss 0.3693138 +| epoch 7 | 5823/ 8400 batches | train loss 0.3810212 +| epoch 7 | 5827/ 8400 batches | train loss 0.3320407 +| epoch 7 | 5831/ 8400 batches | train loss 0.3279077 +| epoch 7 | 5835/ 8400 batches | train loss 0.3618011 +| epoch 7 | 5839/ 8400 batches | train loss 0.3233926 +| epoch 7 | 5843/ 8400 batches | train loss 0.3153998 +| epoch 7 | 5847/ 8400 batches | train loss 0.4248077 +| epoch 7 | 5851/ 8400 batches | train loss 0.3914644 +| epoch 7 | 5855/ 8400 batches | train loss 0.3429818 +| epoch 7 | 5859/ 8400 batches | train loss 0.3795016 +| epoch 7 | 5863/ 8400 batches | train loss 0.2795548 +| epoch 7 | 5867/ 8400 batches | train loss 0.3713534 +| epoch 7 | 5871/ 8400 batches | train loss 0.3096997 +| epoch 7 | 5875/ 8400 batches | train loss 0.4128615 +| epoch 7 | 5879/ 8400 batches | train loss 0.4249390 +| epoch 7 | 5883/ 8400 batches | train loss 0.3907622 +| epoch 7 | 5887/ 8400 batches | train loss 0.3740706 +| epoch 7 | 5891/ 8400 batches | train loss 0.4396277 +| epoch 7 | 5895/ 8400 batches | train loss 0.3151315 +| epoch 7 | 5899/ 8400 batches | train loss 0.3741571 +| epoch 7 | 5903/ 8400 batches | train loss 0.3820655 +| epoch 7 | 5907/ 8400 batches | train loss 0.3680784 +| epoch 7 | 5911/ 8400 batches | train loss 0.3570081 +| epoch 7 | 5915/ 8400 batches | train loss 0.3379630 +| epoch 7 | 5919/ 8400 batches | train loss 0.3474531 +| epoch 7 | 5923/ 8400 batches | train loss 0.3849527 +| epoch 7 | 5927/ 8400 batches | train loss 0.4020981 +| epoch 7 | 5931/ 8400 batches | train loss 0.3419783 +| epoch 7 | 5935/ 8400 batches | train loss 0.3275999 +| epoch 7 | 5939/ 8400 batches | train loss 0.4022652 +| epoch 7 | 5943/ 8400 batches | train loss 0.4256980 +| epoch 7 | 5947/ 8400 batches | train loss 0.3746212 +| epoch 7 | 5951/ 8400 batches | train loss 0.4469514 +| epoch 7 | 5955/ 8400 batches | train loss 0.3711807 +| epoch 7 | 5959/ 8400 batches | train loss 0.3357105 +| epoch 7 | 5963/ 8400 batches | train loss 0.3417960 +| epoch 7 | 5967/ 8400 batches | train loss 0.2273534 +| epoch 7 | 5971/ 8400 batches | train loss 0.3378989 +| epoch 7 | 5975/ 8400 batches | train loss 0.3551992 +| epoch 7 | 5979/ 8400 batches | train loss 0.3634983 +| epoch 7 | 5983/ 8400 batches | train loss 0.3220717 +| epoch 7 | 5987/ 8400 batches | train loss 0.4035119 +| epoch 7 | 5991/ 8400 batches | train loss 0.4291150 +| epoch 7 | 5995/ 8400 batches | train loss 0.3320566 +| epoch 7 | 5999/ 8400 batches | train loss 0.3338197 +| epoch 7 | 6003/ 8400 batches | train loss 0.3147616 +| epoch 7 | 6007/ 8400 batches | train loss 0.3711510 +| epoch 7 | 6011/ 8400 batches | train loss 0.3179584 +| epoch 7 | 6015/ 8400 batches | train loss 0.1544730 +| epoch 7 | 6019/ 8400 batches | train loss 0.4027878 +| epoch 7 | 6023/ 8400 batches | train loss 0.4148812 +| epoch 7 | 6027/ 8400 batches | train loss 0.4119137 +| epoch 7 | 6031/ 8400 batches | train loss 0.3761466 +| epoch 7 | 6035/ 8400 batches | train loss 0.3512088 +| epoch 7 | 6039/ 8400 batches | train loss 0.4184897 +| epoch 7 | 6043/ 8400 batches | train loss 0.3863056 +| epoch 7 | 6047/ 8400 batches | train loss 0.3426892 +| epoch 7 | 6051/ 8400 batches | train loss 0.3789953 +| epoch 7 | 6055/ 8400 batches | train loss 0.3914180 +| epoch 7 | 6059/ 8400 batches | train loss 0.3221176 +| epoch 7 | 6063/ 8400 batches | train loss 0.3311095 +| epoch 7 | 6067/ 8400 batches | train loss 0.3949208 +| epoch 7 | 6071/ 8400 batches | train loss 0.3540441 +| epoch 7 | 6075/ 8400 batches | train loss 0.3304446 +| epoch 7 | 6079/ 8400 batches | train loss 0.4092174 +| epoch 7 | 6083/ 8400 batches | train loss 0.3729925 +| epoch 7 | 6087/ 8400 batches | train loss 0.3087820 +| epoch 7 | 6091/ 8400 batches | train loss 0.4315037 +| epoch 7 | 6095/ 8400 batches | train loss 0.3250986 +| epoch 7 | 6099/ 8400 batches | train loss 0.3883288 +| epoch 7 | 6103/ 8400 batches | train loss 0.3666866 +| epoch 7 | 6107/ 8400 batches | train loss 0.4771360 +| epoch 7 | 6111/ 8400 batches | train loss 0.3767288 +| epoch 7 | 6115/ 8400 batches | train loss 0.4241576 +| epoch 7 | 6119/ 8400 batches | train loss 0.3708679 +| epoch 7 | 6123/ 8400 batches | train loss 0.4092779 +| epoch 7 | 6127/ 8400 batches | train loss 0.3467016 +| epoch 7 | 6131/ 8400 batches | train loss 0.2801149 +| epoch 7 | 6135/ 8400 batches | train loss 0.3781723 +| epoch 7 | 6139/ 8400 batches | train loss 0.3235756 +| epoch 7 | 6143/ 8400 batches | train loss 0.3938060 +| epoch 7 | 6147/ 8400 batches | train loss 0.4037501 +| epoch 7 | 6151/ 8400 batches | train loss 0.2894470 +| epoch 7 | 6155/ 8400 batches | train loss 0.3383850 +| epoch 7 | 6159/ 8400 batches | train loss 0.3735581 +| epoch 7 | 6163/ 8400 batches | train loss 0.3652574 +| epoch 7 | 6167/ 8400 batches | train loss 0.4096905 +| epoch 7 | 6171/ 8400 batches | train loss 0.3518180 +| epoch 7 | 6175/ 8400 batches | train loss 0.3305570 +| epoch 7 | 6179/ 8400 batches | train loss 0.3931133 +| epoch 7 | 6183/ 8400 batches | train loss 0.3245729 +| epoch 7 | 6187/ 8400 batches | train loss 0.4410444 +| epoch 7 | 6191/ 8400 batches | train loss 0.3516963 +| epoch 7 | 6195/ 8400 batches | train loss 0.3496827 +| epoch 7 | 6199/ 8400 batches | train loss 0.3906416 +| epoch 7 | 6203/ 8400 batches | train loss 0.3604911 +| epoch 7 | 6207/ 8400 batches | train loss 0.3873512 +| epoch 7 | 6211/ 8400 batches | train loss 0.3618959 +| epoch 7 | 6215/ 8400 batches | train loss 0.3614689 +| epoch 7 | 6219/ 8400 batches | train loss 0.3173102 +| epoch 7 | 6223/ 8400 batches | train loss 0.3325409 +| epoch 7 | 6227/ 8400 batches | train loss 0.3302916 +| epoch 7 | 6231/ 8400 batches | train loss 0.4252844 +| epoch 7 | 6235/ 8400 batches | train loss 0.4244082 +| epoch 7 | 6239/ 8400 batches | train loss 0.3870240 +| epoch 7 | 6243/ 8400 batches | train loss 0.3972939 +| epoch 7 | 6247/ 8400 batches | train loss 0.3610236 +| epoch 7 | 6251/ 8400 batches | train loss 0.3563756 +| epoch 7 | 6255/ 8400 batches | train loss 0.3321167 +| epoch 7 | 6259/ 8400 batches | train loss 0.3646753 +| epoch 7 | 6263/ 8400 batches | train loss 0.3594520 +| epoch 7 | 6267/ 8400 batches | train loss 0.3131686 +| epoch 7 | 6271/ 8400 batches | train loss 0.3123898 +| epoch 7 | 6275/ 8400 batches | train loss 0.3468594 +| epoch 7 | 6279/ 8400 batches | train loss 0.4106840 +| epoch 7 | 6283/ 8400 batches | train loss 0.3031312 +| epoch 7 | 6287/ 8400 batches | train loss 0.3688062 +| epoch 7 | 6291/ 8400 batches | train loss 0.3466395 +| epoch 7 | 6295/ 8400 batches | train loss 0.3785355 +| epoch 7 | 6299/ 8400 batches | train loss 0.3786770 +| epoch 7 | 6303/ 8400 batches | train loss 0.3896261 +| epoch 7 | 6307/ 8400 batches | train loss 0.3949753 +| epoch 7 | 6311/ 8400 batches | train loss 0.3297012 +| epoch 7 | 6315/ 8400 batches | train loss 0.3991170 +| epoch 7 | 6319/ 8400 batches | train loss 0.3401634 +| epoch 7 | 6323/ 8400 batches | train loss 0.3908880 +| epoch 7 | 6327/ 8400 batches | train loss 0.3815416 +| epoch 7 | 6331/ 8400 batches | train loss 0.3993905 +| epoch 7 | 6335/ 8400 batches | train loss 0.3572971 +| epoch 7 | 6339/ 8400 batches | train loss 0.3519124 +| epoch 7 | 6343/ 8400 batches | train loss 0.4083507 +| epoch 7 | 6347/ 8400 batches | train loss 0.4024779 +| epoch 7 | 6351/ 8400 batches | train loss 0.4974307 +| epoch 7 | 6355/ 8400 batches | train loss 0.3954988 +| epoch 7 | 6359/ 8400 batches | train loss 0.3138488 +| epoch 7 | 6363/ 8400 batches | train loss 0.3409610 +| epoch 7 | 6367/ 8400 batches | train loss 0.3378856 +| epoch 7 | 6371/ 8400 batches | train loss 0.3771715 +| epoch 7 | 6375/ 8400 batches | train loss 0.3389349 +| epoch 7 | 6379/ 8400 batches | train loss 0.3182676 +| epoch 7 | 6383/ 8400 batches | train loss 0.3501897 +| epoch 7 | 6387/ 8400 batches | train loss 0.3482999 +| epoch 7 | 6391/ 8400 batches | train loss 0.2366141 +| epoch 7 | 6395/ 8400 batches | train loss 0.3515984 +| epoch 7 | 6399/ 8400 batches | train loss 0.3329694 +| epoch 7 | 6403/ 8400 batches | train loss 0.3446085 +| epoch 7 | 6407/ 8400 batches | train loss 0.3264546 +| epoch 7 | 6411/ 8400 batches | train loss 0.3523476 +| epoch 7 | 6415/ 8400 batches | train loss 0.3827370 +| epoch 7 | 6419/ 8400 batches | train loss 0.3946903 +| epoch 7 | 6423/ 8400 batches | train loss 0.4041997 +| epoch 7 | 6427/ 8400 batches | train loss 0.4049992 +| epoch 7 | 6431/ 8400 batches | train loss 0.3803323 +| epoch 7 | 6435/ 8400 batches | train loss 0.3284505 +| epoch 7 | 6439/ 8400 batches | train loss 0.3011225 +| epoch 7 | 6443/ 8400 batches | train loss 0.3776600 +| epoch 7 | 6447/ 8400 batches | train loss 0.4165130 +| epoch 7 | 6451/ 8400 batches | train loss 0.3568950 +| epoch 7 | 6455/ 8400 batches | train loss 0.3791960 +| epoch 7 | 6459/ 8400 batches | train loss 0.3320464 +| epoch 7 | 6463/ 8400 batches | train loss 0.3405003 +| epoch 7 | 6467/ 8400 batches | train loss 0.3622657 +| epoch 7 | 6471/ 8400 batches | train loss 0.3649632 +| epoch 7 | 6475/ 8400 batches | train loss 0.3692267 +| epoch 7 | 6479/ 8400 batches | train loss 0.3504901 +| epoch 7 | 6483/ 8400 batches | train loss 0.3995589 +| epoch 7 | 6487/ 8400 batches | train loss 0.4181236 +| epoch 7 | 6491/ 8400 batches | train loss 0.3414877 +| epoch 7 | 6495/ 8400 batches | train loss 0.3927580 +| epoch 7 | 6499/ 8400 batches | train loss 0.3791559 +| epoch 7 | 6503/ 8400 batches | train loss 0.3336889 +| epoch 7 | 6507/ 8400 batches | train loss 0.4715552 +| epoch 7 | 6511/ 8400 batches | train loss 0.3941541 +| epoch 7 | 6515/ 8400 batches | train loss 0.3759937 +| epoch 7 | 6519/ 8400 batches | train loss 0.3669731 +| epoch 7 | 6523/ 8400 batches | train loss 0.3538002 +| epoch 7 | 6527/ 8400 batches | train loss 0.3663300 +| epoch 7 | 6531/ 8400 batches | train loss 0.3659951 +| epoch 7 | 6535/ 8400 batches | train loss 0.4387304 +| epoch 7 | 6539/ 8400 batches | train loss 0.3611922 +| epoch 7 | 6543/ 8400 batches | train loss 0.3411700 +| epoch 7 | 6547/ 8400 batches | train loss 0.3366742 +| epoch 7 | 6551/ 8400 batches | train loss 0.3520481 +| epoch 7 | 6555/ 8400 batches | train loss 0.4128276 +| epoch 7 | 6559/ 8400 batches | train loss 0.3957911 +| epoch 7 | 6563/ 8400 batches | train loss 0.3612607 +| epoch 7 | 6567/ 8400 batches | train loss 0.3952467 +| epoch 7 | 6571/ 8400 batches | train loss 0.3751597 +| epoch 7 | 6575/ 8400 batches | train loss 0.3723150 +| epoch 7 | 6579/ 8400 batches | train loss 0.3950596 +| epoch 7 | 6583/ 8400 batches | train loss 0.3873059 +| epoch 7 | 6587/ 8400 batches | train loss 0.3311906 +| epoch 7 | 6591/ 8400 batches | train loss 0.3671785 +| epoch 7 | 6595/ 8400 batches | train loss 0.4097423 +| epoch 7 | 6599/ 8400 batches | train loss 0.3944167 +| epoch 7 | 6603/ 8400 batches | train loss 0.3929952 +| epoch 7 | 6607/ 8400 batches | train loss 0.2864303 +| epoch 7 | 6611/ 8400 batches | train loss 0.3965124 +| epoch 7 | 6615/ 8400 batches | train loss 0.3440097 +| epoch 7 | 6619/ 8400 batches | train loss 0.3218424 +| epoch 7 | 6623/ 8400 batches | train loss 0.4003451 +| epoch 7 | 6627/ 8400 batches | train loss 0.3650187 +| epoch 7 | 6631/ 8400 batches | train loss 0.4510650 +| epoch 7 | 6635/ 8400 batches | train loss 0.3254287 +| epoch 7 | 6639/ 8400 batches | train loss 0.3453518 +| epoch 7 | 6643/ 8400 batches | train loss 0.3727843 +| epoch 7 | 6647/ 8400 batches | train loss 0.3562155 +| epoch 7 | 6651/ 8400 batches | train loss 0.4263402 +| epoch 7 | 6655/ 8400 batches | train loss 0.3752161 +| epoch 7 | 6659/ 8400 batches | train loss 0.3741758 +| epoch 7 | 6663/ 8400 batches | train loss 0.2179692 +| epoch 7 | 6667/ 8400 batches | train loss 0.3357690 +| epoch 7 | 6671/ 8400 batches | train loss 0.3126007 +| epoch 7 | 6675/ 8400 batches | train loss 0.3338889 +| epoch 7 | 6679/ 8400 batches | train loss 0.3978545 +| epoch 7 | 6683/ 8400 batches | train loss 0.3635408 +| epoch 7 | 6687/ 8400 batches | train loss 0.3608341 +| epoch 7 | 6691/ 8400 batches | train loss 0.3510002 +| epoch 7 | 6695/ 8400 batches | train loss 0.4028018 +| epoch 7 | 6699/ 8400 batches | train loss 0.3954552 +| epoch 7 | 6703/ 8400 batches | train loss 0.2405942 +| epoch 7 | 6707/ 8400 batches | train loss 0.3352503 +| epoch 7 | 6711/ 8400 batches | train loss 0.3307050 +| epoch 7 | 6715/ 8400 batches | train loss 0.4219665 +| epoch 7 | 6719/ 8400 batches | train loss 0.3314844 +| epoch 7 | 6723/ 8400 batches | train loss 0.4174784 +| epoch 7 | 6727/ 8400 batches | train loss 0.4588678 +| epoch 7 | 6731/ 8400 batches | train loss 0.3597921 +| epoch 7 | 6735/ 8400 batches | train loss 0.3317726 +| epoch 7 | 6739/ 8400 batches | train loss 0.4040379 +| epoch 7 | 6743/ 8400 batches | train loss 0.3806601 +| epoch 7 | 6747/ 8400 batches | train loss 0.3518793 +| epoch 7 | 6751/ 8400 batches | train loss 0.3408864 +| epoch 7 | 6755/ 8400 batches | train loss 0.3305663 +| epoch 7 | 6759/ 8400 batches | train loss 0.3689823 +| epoch 7 | 6763/ 8400 batches | train loss 0.3784420 +| epoch 7 | 6767/ 8400 batches | train loss 0.4168951 +| epoch 7 | 6771/ 8400 batches | train loss 0.3495150 +| epoch 7 | 6775/ 8400 batches | train loss 0.3630555 +| epoch 7 | 6779/ 8400 batches | train loss 0.3495235 +| epoch 7 | 6783/ 8400 batches | train loss 0.3363727 +| epoch 7 | 6787/ 8400 batches | train loss 0.3207175 +| epoch 7 | 6791/ 8400 batches | train loss 0.3847181 +| epoch 7 | 6795/ 8400 batches | train loss 0.3977133 +| epoch 7 | 6799/ 8400 batches | train loss 0.2419986 +| epoch 7 | 6803/ 8400 batches | train loss 0.3668655 +| epoch 7 | 6807/ 8400 batches | train loss 0.3412217 +| epoch 7 | 6811/ 8400 batches | train loss 0.4286477 +| epoch 7 | 6815/ 8400 batches | train loss 0.3970919 +| epoch 7 | 6819/ 8400 batches | train loss 0.3384466 +| epoch 7 | 6823/ 8400 batches | train loss 0.3099644 +| epoch 7 | 6827/ 8400 batches | train loss 0.4020194 +| epoch 7 | 6831/ 8400 batches | train loss 0.3282107 +| epoch 7 | 6835/ 8400 batches | train loss 0.3690254 +| epoch 7 | 6839/ 8400 batches | train loss 0.4472059 +| epoch 7 | 6843/ 8400 batches | train loss 0.3730162 +| epoch 7 | 6847/ 8400 batches | train loss 0.4015723 +| epoch 7 | 6851/ 8400 batches | train loss 0.3635567 +| epoch 7 | 6855/ 8400 batches | train loss 0.2931406 +| epoch 7 | 6859/ 8400 batches | train loss 0.3750286 +| epoch 7 | 6863/ 8400 batches | train loss 0.3932399 +| epoch 7 | 6867/ 8400 batches | train loss 0.3316436 +| epoch 7 | 6871/ 8400 batches | train loss 0.3342010 +| epoch 7 | 6875/ 8400 batches | train loss 0.3615889 +| epoch 7 | 6879/ 8400 batches | train loss 0.3541357 +| epoch 7 | 6883/ 8400 batches | train loss 0.3996492 +| epoch 7 | 6887/ 8400 batches | train loss 0.3878520 +| epoch 7 | 6891/ 8400 batches | train loss 0.3461304 +| epoch 7 | 6895/ 8400 batches | train loss 0.3742670 +| epoch 7 | 6899/ 8400 batches | train loss 0.4405693 +| epoch 7 | 6903/ 8400 batches | train loss 0.3401994 +| epoch 7 | 6907/ 8400 batches | train loss 0.3264883 +| epoch 7 | 6911/ 8400 batches | train loss 0.4050736 +| epoch 7 | 6915/ 8400 batches | train loss 0.3576281 +| epoch 7 | 6919/ 8400 batches | train loss 0.3406520 +| epoch 7 | 6923/ 8400 batches | train loss 0.3590546 +| epoch 7 | 6927/ 8400 batches | train loss 0.3033330 +| epoch 7 | 6931/ 8400 batches | train loss 0.3493752 +| epoch 7 | 6935/ 8400 batches | train loss 0.3427059 +| epoch 7 | 6939/ 8400 batches | train loss 0.3797659 +| epoch 7 | 6943/ 8400 batches | train loss 0.4037074 +| epoch 7 | 6947/ 8400 batches | train loss 0.3312549 +| epoch 7 | 6951/ 8400 batches | train loss 0.3749139 +| epoch 7 | 6955/ 8400 batches | train loss 0.2949404 +| epoch 7 | 6959/ 8400 batches | train loss 0.3334931 +| epoch 7 | 6963/ 8400 batches | train loss 0.3496609 +| epoch 7 | 6967/ 8400 batches | train loss 0.3875217 +| epoch 7 | 6971/ 8400 batches | train loss 0.3150196 +| epoch 7 | 6975/ 8400 batches | train loss 0.3959027 +| epoch 7 | 6979/ 8400 batches | train loss 0.2333874 +| epoch 7 | 6983/ 8400 batches | train loss 0.2947659 +| epoch 7 | 6987/ 8400 batches | train loss 0.3880865 +| epoch 7 | 6991/ 8400 batches | train loss 0.3468053 +| epoch 7 | 6995/ 8400 batches | train loss 0.3255422 +| epoch 7 | 6999/ 8400 batches | train loss 0.3716598 +| epoch 7 | 7003/ 8400 batches | train loss 0.3651479 +| epoch 7 | 7007/ 8400 batches | train loss 0.3549893 +| epoch 7 | 7011/ 8400 batches | train loss 0.3341128 +| epoch 7 | 7015/ 8400 batches | train loss 0.4181117 +| epoch 7 | 7019/ 8400 batches | train loss 0.4000693 +| epoch 7 | 7023/ 8400 batches | train loss 0.3757002 +| epoch 7 | 7027/ 8400 batches | train loss 0.4864099 +| epoch 7 | 7031/ 8400 batches | train loss 0.4186501 +| epoch 7 | 7035/ 8400 batches | train loss 0.3325596 +| epoch 7 | 7039/ 8400 batches | train loss 0.3730243 +| epoch 7 | 7043/ 8400 batches | train loss 0.3833854 +| epoch 7 | 7047/ 8400 batches | train loss 0.3530106 +| epoch 7 | 7051/ 8400 batches | train loss 0.3871680 +| epoch 7 | 7055/ 8400 batches | train loss 0.3806042 +| epoch 7 | 7059/ 8400 batches | train loss 0.3329584 +| epoch 7 | 7063/ 8400 batches | train loss 0.3807268 +| epoch 7 | 7067/ 8400 batches | train loss 0.3766581 +| epoch 7 | 7071/ 8400 batches | train loss 0.4366553 +| epoch 7 | 7075/ 8400 batches | train loss 0.3040890 +| epoch 7 | 7079/ 8400 batches | train loss 0.3267000 +| epoch 7 | 7083/ 8400 batches | train loss 0.4165439 +| epoch 7 | 7087/ 8400 batches | train loss 0.3934277 +| epoch 7 | 7091/ 8400 batches | train loss 0.4093500 +| epoch 7 | 7095/ 8400 batches | train loss 0.3822386 +| epoch 7 | 7099/ 8400 batches | train loss 0.3289567 +| epoch 7 | 7103/ 8400 batches | train loss 0.3926551 +| epoch 7 | 7107/ 8400 batches | train loss 0.3742259 +| epoch 7 | 7111/ 8400 batches | train loss 0.3551010 +| epoch 7 | 7115/ 8400 batches | train loss 0.3666579 +| epoch 7 | 7119/ 8400 batches | train loss 0.4115735 +| epoch 7 | 7123/ 8400 batches | train loss 0.4356595 +| epoch 7 | 7127/ 8400 batches | train loss 0.3296230 +| epoch 7 | 7131/ 8400 batches | train loss 0.3549053 +| epoch 7 | 7135/ 8400 batches | train loss 0.4172629 +| epoch 7 | 7139/ 8400 batches | train loss 0.4558277 +| epoch 7 | 7143/ 8400 batches | train loss 0.3585887 +| epoch 7 | 7147/ 8400 batches | train loss 0.3392841 +| epoch 7 | 7151/ 8400 batches | train loss 0.3520268 +| epoch 7 | 7155/ 8400 batches | train loss 0.4503817 +| epoch 7 | 7159/ 8400 batches | train loss 0.2900916 +| epoch 7 | 7163/ 8400 batches | train loss 0.3235505 +| epoch 7 | 7167/ 8400 batches | train loss 0.3712288 +| epoch 7 | 7171/ 8400 batches | train loss 0.3727720 +| epoch 7 | 7175/ 8400 batches | train loss 0.3623618 +| epoch 7 | 7179/ 8400 batches | train loss 0.3477405 +| epoch 7 | 7183/ 8400 batches | train loss 0.4259689 +| epoch 7 | 7187/ 8400 batches | train loss 0.3688278 +| epoch 7 | 7191/ 8400 batches | train loss 0.3236537 +| epoch 7 | 7195/ 8400 batches | train loss 0.3470416 +| epoch 7 | 7199/ 8400 batches | train loss 0.3521999 +| epoch 7 | 7203/ 8400 batches | train loss 0.4159038 +| epoch 7 | 7207/ 8400 batches | train loss 0.2456269 +| epoch 7 | 7211/ 8400 batches | train loss 0.3546665 +| epoch 7 | 7215/ 8400 batches | train loss 0.3931701 +| epoch 7 | 7219/ 8400 batches | train loss 0.3532476 +| epoch 7 | 7223/ 8400 batches | train loss 0.3699489 +| epoch 7 | 7227/ 8400 batches | train loss 0.3247706 +| epoch 7 | 7231/ 8400 batches | train loss 0.3862609 +| epoch 7 | 7235/ 8400 batches | train loss 0.3260031 +| epoch 7 | 7239/ 8400 batches | train loss 0.2911888 +| epoch 7 | 7243/ 8400 batches | train loss 0.3360794 +| epoch 7 | 7247/ 8400 batches | train loss 0.2266709 +| epoch 7 | 7251/ 8400 batches | train loss 0.4216933 +| epoch 7 | 7255/ 8400 batches | train loss 0.3573492 +| epoch 7 | 7259/ 8400 batches | train loss 0.3274704 +| epoch 7 | 7263/ 8400 batches | train loss 0.3260329 +| epoch 7 | 7267/ 8400 batches | train loss 0.3911543 +| epoch 7 | 7271/ 8400 batches | train loss 0.3642200 +| epoch 7 | 7275/ 8400 batches | train loss 0.3153246 +| epoch 7 | 7279/ 8400 batches | train loss 0.3361844 +| epoch 7 | 7283/ 8400 batches | train loss 0.3839910 +| epoch 7 | 7287/ 8400 batches | train loss 0.3134421 +| epoch 7 | 7291/ 8400 batches | train loss 0.3719378 +| epoch 7 | 7295/ 8400 batches | train loss 0.3823677 +| epoch 7 | 7299/ 8400 batches | train loss 0.3640171 +| epoch 7 | 7303/ 8400 batches | train loss 0.4531656 +| epoch 7 | 7307/ 8400 batches | train loss 0.3766510 +| epoch 7 | 7311/ 8400 batches | train loss 0.3919941 +| epoch 7 | 7315/ 8400 batches | train loss 0.3605159 +| epoch 7 | 7319/ 8400 batches | train loss 0.3769522 +| epoch 7 | 7323/ 8400 batches | train loss 0.3626317 +| epoch 7 | 7327/ 8400 batches | train loss 0.3440296 +| epoch 7 | 7331/ 8400 batches | train loss 0.3484928 +| epoch 7 | 7335/ 8400 batches | train loss 0.3726948 +| epoch 7 | 7339/ 8400 batches | train loss 0.3628568 +| epoch 7 | 7343/ 8400 batches | train loss 0.3984004 +| epoch 7 | 7347/ 8400 batches | train loss 0.4296459 +| epoch 7 | 7351/ 8400 batches | train loss 0.3955601 +| epoch 7 | 7355/ 8400 batches | train loss 0.4674982 +| epoch 7 | 7359/ 8400 batches | train loss 0.3864694 +| epoch 7 | 7363/ 8400 batches | train loss 0.3505523 +| epoch 7 | 7367/ 8400 batches | train loss 0.3368456 +| epoch 7 | 7371/ 8400 batches | train loss 0.3662394 +| epoch 7 | 7375/ 8400 batches | train loss 0.3194454 +| epoch 7 | 7379/ 8400 batches | train loss 0.4033822 +| epoch 7 | 7383/ 8400 batches | train loss 0.3586079 +| epoch 7 | 7387/ 8400 batches | train loss 0.3579907 +| epoch 7 | 7391/ 8400 batches | train loss 0.3203484 +| epoch 7 | 7395/ 8400 batches | train loss 0.3055066 +| epoch 7 | 7399/ 8400 batches | train loss 0.3885607 +| epoch 7 | 7403/ 8400 batches | train loss 0.3704010 +| epoch 7 | 7407/ 8400 batches | train loss 0.3825788 +| epoch 7 | 7411/ 8400 batches | train loss 0.3175511 +| epoch 7 | 7415/ 8400 batches | train loss 0.3536355 +| epoch 7 | 7419/ 8400 batches | train loss 0.3947067 +| epoch 7 | 7423/ 8400 batches | train loss 0.3234476 +| epoch 7 | 7427/ 8400 batches | train loss 0.4132515 +| epoch 7 | 7431/ 8400 batches | train loss 0.3102066 +| epoch 7 | 7435/ 8400 batches | train loss 0.4324791 +| epoch 7 | 7439/ 8400 batches | train loss 0.3920409 +| epoch 7 | 7443/ 8400 batches | train loss 0.3840913 +| epoch 7 | 7447/ 8400 batches | train loss 0.3659056 +| epoch 7 | 7451/ 8400 batches | train loss 0.3147931 +| epoch 7 | 7455/ 8400 batches | train loss 0.3370072 +| epoch 7 | 7459/ 8400 batches | train loss 0.3963007 +| epoch 7 | 7463/ 8400 batches | train loss 0.3333893 +| epoch 7 | 7467/ 8400 batches | train loss 0.3885432 +| epoch 7 | 7471/ 8400 batches | train loss 0.3370292 +| epoch 7 | 7475/ 8400 batches | train loss 0.3010491 +| epoch 7 | 7479/ 8400 batches | train loss 0.3354796 +| epoch 7 | 7483/ 8400 batches | train loss 0.2288985 +| epoch 7 | 7487/ 8400 batches | train loss 0.4131252 +| epoch 7 | 7491/ 8400 batches | train loss 0.4449028 +| epoch 7 | 7495/ 8400 batches | train loss 0.3613520 +| epoch 7 | 7499/ 8400 batches | train loss 0.4119929 +| epoch 7 | 7503/ 8400 batches | train loss 0.3319334 +| epoch 7 | 7507/ 8400 batches | train loss 0.3700233 +| epoch 7 | 7511/ 8400 batches | train loss 0.4008483 +| epoch 7 | 7515/ 8400 batches | train loss 0.3051528 +| epoch 7 | 7519/ 8400 batches | train loss 0.4007748 +| epoch 7 | 7523/ 8400 batches | train loss 0.3553795 +| epoch 7 | 7527/ 8400 batches | train loss 0.3694427 +| epoch 7 | 7531/ 8400 batches | train loss 0.3702158 +| epoch 7 | 7535/ 8400 batches | train loss 0.3681769 +| epoch 7 | 7539/ 8400 batches | train loss 0.3318799 +| epoch 7 | 7543/ 8400 batches | train loss 0.4280559 +| epoch 7 | 7547/ 8400 batches | train loss 0.4252754 +| epoch 7 | 7551/ 8400 batches | train loss 0.3666724 +| epoch 7 | 7555/ 8400 batches | train loss 0.3690435 +| epoch 7 | 7559/ 8400 batches | train loss 0.3090983 +| epoch 7 | 7563/ 8400 batches | train loss 0.3912493 +| epoch 7 | 7567/ 8400 batches | train loss 0.3800359 +| epoch 7 | 7571/ 8400 batches | train loss 0.3526800 +| epoch 7 | 7575/ 8400 batches | train loss 0.3525090 +| epoch 7 | 7579/ 8400 batches | train loss 0.3603562 +| epoch 7 | 7583/ 8400 batches | train loss 0.3450535 +| epoch 7 | 7587/ 8400 batches | train loss 0.4045689 +| epoch 7 | 7591/ 8400 batches | train loss 0.3949676 +| epoch 7 | 7595/ 8400 batches | train loss 0.3992408 +| epoch 7 | 7599/ 8400 batches | train loss 0.3545242 +| epoch 7 | 7603/ 8400 batches | train loss 0.3782260 +| epoch 7 | 7607/ 8400 batches | train loss 0.3963970 +| epoch 7 | 7611/ 8400 batches | train loss 0.4397385 +| epoch 7 | 7615/ 8400 batches | train loss 0.4250262 +| epoch 7 | 7619/ 8400 batches | train loss 0.4037554 +| epoch 7 | 7623/ 8400 batches | train loss 0.3282792 +| epoch 7 | 7627/ 8400 batches | train loss 0.4346228 +| epoch 7 | 7631/ 8400 batches | train loss 0.3315214 +| epoch 7 | 7635/ 8400 batches | train loss 0.3448368 +| epoch 7 | 7639/ 8400 batches | train loss 0.3206730 +| epoch 7 | 7643/ 8400 batches | train loss 0.3364670 +| epoch 7 | 7647/ 8400 batches | train loss 0.3570368 +| epoch 7 | 7651/ 8400 batches | train loss 0.3293601 +| epoch 7 | 7655/ 8400 batches | train loss 0.3576828 +| epoch 7 | 7659/ 8400 batches | train loss 0.3412278 +| epoch 7 | 7663/ 8400 batches | train loss 0.3858137 +| epoch 7 | 7667/ 8400 batches | train loss 0.3988731 +| epoch 7 | 7671/ 8400 batches | train loss 0.3479443 +| epoch 7 | 7675/ 8400 batches | train loss 0.4457502 +| epoch 7 | 7679/ 8400 batches | train loss 0.3252271 +| epoch 7 | 7683/ 8400 batches | train loss 0.3570746 +| epoch 7 | 7687/ 8400 batches | train loss 0.3880351 +| epoch 7 | 7691/ 8400 batches | train loss 0.2930235 +| epoch 7 | 7695/ 8400 batches | train loss 0.3661373 +| epoch 7 | 7699/ 8400 batches | train loss 0.3508116 +| epoch 7 | 7703/ 8400 batches | train loss 0.4804385 +| epoch 7 | 7707/ 8400 batches | train loss 0.3894588 +| epoch 7 | 7711/ 8400 batches | train loss 0.2704709 +| epoch 7 | 7715/ 8400 batches | train loss 0.4208547 +| epoch 7 | 7719/ 8400 batches | train loss 0.4001513 +| epoch 7 | 7723/ 8400 batches | train loss 0.3392502 +| epoch 7 | 7727/ 8400 batches | train loss 0.3300806 +| epoch 7 | 7731/ 8400 batches | train loss 0.2381570 +| epoch 7 | 7735/ 8400 batches | train loss 0.3808063 +| epoch 7 | 7739/ 8400 batches | train loss 0.4012333 +| epoch 7 | 7743/ 8400 batches | train loss 0.3363881 +| epoch 7 | 7747/ 8400 batches | train loss 0.3683376 +| epoch 7 | 7751/ 8400 batches | train loss 0.3761344 +| epoch 7 | 7755/ 8400 batches | train loss 0.3404153 +| epoch 7 | 7759/ 8400 batches | train loss 0.3217167 +| epoch 7 | 7763/ 8400 batches | train loss 0.3291488 +| epoch 7 | 7767/ 8400 batches | train loss 0.3396749 +| epoch 7 | 7771/ 8400 batches | train loss 0.3992372 +| epoch 7 | 7775/ 8400 batches | train loss 0.3792273 +| epoch 7 | 7779/ 8400 batches | train loss 0.3705175 +| epoch 7 | 7783/ 8400 batches | train loss 0.4795257 +| epoch 7 | 7787/ 8400 batches | train loss 0.3115280 +| epoch 7 | 7791/ 8400 batches | train loss 0.3736862 +| epoch 7 | 7795/ 8400 batches | train loss 0.3892182 +| epoch 7 | 7799/ 8400 batches | train loss 0.3576402 +| epoch 7 | 7803/ 8400 batches | train loss 0.3986849 +| epoch 7 | 7807/ 8400 batches | train loss 0.4318807 +| epoch 7 | 7811/ 8400 batches | train loss 0.3542580 +| epoch 7 | 7815/ 8400 batches | train loss 0.3698381 +| epoch 7 | 7819/ 8400 batches | train loss 0.3248205 +| epoch 7 | 7823/ 8400 batches | train loss 0.3797521 +| epoch 7 | 7827/ 8400 batches | train loss 0.3943644 +| epoch 7 | 7831/ 8400 batches | train loss 0.3290222 +| epoch 7 | 7835/ 8400 batches | train loss 0.3792911 +| epoch 7 | 7839/ 8400 batches | train loss 0.3420747 +| epoch 7 | 7843/ 8400 batches | train loss 0.3828177 +| epoch 7 | 7847/ 8400 batches | train loss 0.4051003 +| epoch 7 | 7851/ 8400 batches | train loss 0.3174266 +| epoch 7 | 7855/ 8400 batches | train loss 0.3835788 +| epoch 7 | 7859/ 8400 batches | train loss 0.3423060 +| epoch 7 | 7863/ 8400 batches | train loss 0.3444091 +| epoch 7 | 7867/ 8400 batches | train loss 0.3590428 +| epoch 7 | 7871/ 8400 batches | train loss 0.3944499 +| epoch 7 | 7875/ 8400 batches | train loss 0.3565575 +| epoch 7 | 7879/ 8400 batches | train loss 0.3751541 +| epoch 7 | 7883/ 8400 batches | train loss 0.3235493 +| epoch 7 | 7887/ 8400 batches | train loss 0.4401648 +| epoch 7 | 7891/ 8400 batches | train loss 0.3410404 +| epoch 7 | 7895/ 8400 batches | train loss 0.3454476 +| epoch 7 | 7899/ 8400 batches | train loss 0.3340105 +| epoch 7 | 7903/ 8400 batches | train loss 0.3618995 +| epoch 7 | 7907/ 8400 batches | train loss 0.3879708 +| epoch 7 | 7911/ 8400 batches | train loss 0.3539334 +| epoch 7 | 7915/ 8400 batches | train loss 0.3732027 +| epoch 7 | 7919/ 8400 batches | train loss 0.4103978 +| epoch 7 | 7923/ 8400 batches | train loss 0.3566405 +| epoch 7 | 7927/ 8400 batches | train loss 0.3315090 +| epoch 7 | 7931/ 8400 batches | train loss 0.3121537 +| epoch 7 | 7935/ 8400 batches | train loss 0.3469280 +| epoch 7 | 7939/ 8400 batches | train loss 0.3975074 +| epoch 7 | 7943/ 8400 batches | train loss 0.3548989 +| epoch 7 | 7947/ 8400 batches | train loss 0.3250121 +| epoch 7 | 7951/ 8400 batches | train loss 0.3684189 +| epoch 7 | 7955/ 8400 batches | train loss 0.3441275 +| epoch 7 | 7959/ 8400 batches | train loss 0.3855358 +| epoch 7 | 7963/ 8400 batches | train loss 0.3467368 +| epoch 7 | 7967/ 8400 batches | train loss 0.3934789 +| epoch 7 | 7971/ 8400 batches | train loss 0.3527306 +| epoch 7 | 7975/ 8400 batches | train loss 0.4298863 +| epoch 7 | 7979/ 8400 batches | train loss 0.3921483 +| epoch 7 | 7983/ 8400 batches | train loss 0.3341497 +| epoch 7 | 7987/ 8400 batches | train loss 0.3131710 +| epoch 7 | 7991/ 8400 batches | train loss 0.3285330 +| epoch 7 | 7995/ 8400 batches | train loss 0.3118917 +| epoch 7 | 7999/ 8400 batches | train loss 0.3909328 +| epoch 7 | 8003/ 8400 batches | train loss 0.3095202 +| epoch 7 | 8007/ 8400 batches | train loss 0.3570715 +| epoch 7 | 8011/ 8400 batches | train loss 0.4321434 +| epoch 7 | 8015/ 8400 batches | train loss 0.3189458 +| epoch 7 | 8019/ 8400 batches | train loss 0.3703764 +| epoch 7 | 8023/ 8400 batches | train loss 0.3863812 +| epoch 7 | 8027/ 8400 batches | train loss 0.3138926 +| epoch 7 | 8031/ 8400 batches | train loss 0.3658667 +| epoch 7 | 8035/ 8400 batches | train loss 0.3778277 +| epoch 7 | 8039/ 8400 batches | train loss 0.3715129 +| epoch 7 | 8043/ 8400 batches | train loss 0.4192932 +| epoch 7 | 8047/ 8400 batches | train loss 0.2350881 +| epoch 7 | 8051/ 8400 batches | train loss 0.3679762 +| epoch 7 | 8055/ 8400 batches | train loss 0.3784928 +| epoch 7 | 8059/ 8400 batches | train loss 0.4086643 +| epoch 7 | 8063/ 8400 batches | train loss 0.3872646 +| epoch 7 | 8067/ 8400 batches | train loss 0.3323048 +| epoch 7 | 8071/ 8400 batches | train loss 0.3430816 +| epoch 7 | 8075/ 8400 batches | train loss 0.3387389 +| epoch 7 | 8079/ 8400 batches | train loss 0.3957529 +| epoch 7 | 8083/ 8400 batches | train loss 0.3735183 +| epoch 7 | 8087/ 8400 batches | train loss 0.2310078 +| epoch 7 | 8091/ 8400 batches | train loss 0.3911654 +| epoch 7 | 8095/ 8400 batches | train loss 0.3385514 +| epoch 7 | 8099/ 8400 batches | train loss 0.5345229 +| epoch 7 | 8103/ 8400 batches | train loss 0.3304322 +| epoch 7 | 8107/ 8400 batches | train loss 0.3781173 +| epoch 7 | 8111/ 8400 batches | train loss 0.3599449 +| epoch 7 | 8115/ 8400 batches | train loss 0.4104565 +| epoch 7 | 8119/ 8400 batches | train loss 0.3255942 +| epoch 7 | 8123/ 8400 batches | train loss 0.3720807 +| epoch 7 | 8127/ 8400 batches | train loss 0.3982199 +| epoch 7 | 8131/ 8400 batches | train loss 0.3841813 +| epoch 7 | 8135/ 8400 batches | train loss 0.3447323 +| epoch 7 | 8139/ 8400 batches | train loss 0.3435043 +| epoch 7 | 8143/ 8400 batches | train loss 0.3571115 +| epoch 7 | 8147/ 8400 batches | train loss 0.3900600 +| epoch 7 | 8151/ 8400 batches | train loss 0.4168318 +| epoch 7 | 8155/ 8400 batches | train loss 0.4180322 +| epoch 7 | 8159/ 8400 batches | train loss 0.3840716 +| epoch 7 | 8163/ 8400 batches | train loss 0.3765469 +| epoch 7 | 8167/ 8400 batches | train loss 0.3974736 +| epoch 7 | 8171/ 8400 batches | train loss 0.3617302 +| epoch 7 | 8175/ 8400 batches | train loss 0.3523983 +| epoch 7 | 8179/ 8400 batches | train loss 0.3466989 +| epoch 7 | 8183/ 8400 batches | train loss 0.3495729 +| epoch 7 | 8187/ 8400 batches | train loss 0.3600296 +| epoch 7 | 8191/ 8400 batches | train loss 0.3246485 +| epoch 7 | 8195/ 8400 batches | train loss 0.4206378 +| epoch 7 | 8199/ 8400 batches | train loss 0.4528078 +| epoch 7 | 8203/ 8400 batches | train loss 0.3870685 +| epoch 7 | 8207/ 8400 batches | train loss 0.3196758 +| epoch 7 | 8211/ 8400 batches | train loss 0.4023725 +| epoch 7 | 8215/ 8400 batches | train loss 0.3750306 +| epoch 7 | 8219/ 8400 batches | train loss 0.3521561 +| epoch 7 | 8223/ 8400 batches | train loss 0.3306030 +| epoch 7 | 8227/ 8400 batches | train loss 0.2963485 +| epoch 7 | 8231/ 8400 batches | train loss 0.3590278 +| epoch 7 | 8235/ 8400 batches | train loss 0.3145687 +| epoch 7 | 8239/ 8400 batches | train loss 0.3344821 +| epoch 7 | 8243/ 8400 batches | train loss 0.3669118 +| epoch 7 | 8247/ 8400 batches | train loss 0.3477181 +| epoch 7 | 8251/ 8400 batches | train loss 0.3638602 +| epoch 7 | 8255/ 8400 batches | train loss 0.3040258 +| epoch 7 | 8259/ 8400 batches | train loss 0.3461704 +| epoch 7 | 8263/ 8400 batches | train loss 0.3507089 +| epoch 7 | 8267/ 8400 batches | train loss 0.4826170 +| epoch 7 | 8271/ 8400 batches | train loss 0.3174158 +| epoch 7 | 8275/ 8400 batches | train loss 0.3798259 +| epoch 7 | 8279/ 8400 batches | train loss 0.3490202 +| epoch 7 | 8283/ 8400 batches | train loss 0.3385434 +| epoch 7 | 8287/ 8400 batches | train loss 0.3651201 +| epoch 7 | 8291/ 8400 batches | train loss 0.4270627 +| epoch 7 | 8295/ 8400 batches | train loss 0.3736400 +| epoch 7 | 8299/ 8400 batches | train loss 0.2667225 +| epoch 7 | 8303/ 8400 batches | train loss 0.4048219 +| epoch 7 | 8307/ 8400 batches | train loss 0.3321084 +| epoch 7 | 8311/ 8400 batches | train loss 0.4532895 +| epoch 7 | 8315/ 8400 batches | train loss 0.3655975 +| epoch 7 | 8319/ 8400 batches | train loss 0.3714226 +| epoch 7 | 8323/ 8400 batches | train loss 0.3897443 +| epoch 7 | 8327/ 8400 batches | train loss 0.3219030 +| epoch 7 | 8331/ 8400 batches | train loss 0.3649511 +| epoch 7 | 8335/ 8400 batches | train loss 0.3429827 +| epoch 7 | 8339/ 8400 batches | train loss 0.3395168 +| epoch 7 | 8343/ 8400 batches | train loss 0.3683528 +| epoch 7 | 8347/ 8400 batches | train loss 0.3887337 +| epoch 7 | 8351/ 8400 batches | train loss 0.3816299 +| epoch 7 | 8355/ 8400 batches | train loss 0.3473333 +| epoch 7 | 8359/ 8400 batches | train loss 0.3605878 +| epoch 7 | 8363/ 8400 batches | train loss 0.3331611 +| epoch 7 | 8367/ 8400 batches | train loss 0.4137532 +| epoch 7 | 8371/ 8400 batches | train loss 0.3976599 +| epoch 7 | 8375/ 8400 batches | train loss 0.3379464 +| epoch 7 | 8379/ 8400 batches | train loss 0.4088118 +| epoch 7 | 8383/ 8400 batches | train loss 0.3343047 +| epoch 7 | 8387/ 8400 batches | train loss 0.3120768 +| epoch 7 | 8391/ 8400 batches | train loss 0.3192878 +| epoch 7 | 8395/ 8400 batches | train loss 0.3901195 +| epoch 7 | 8399/ 8400 batches | train loss 0.4180411 +-------------------------------------------------------------------------------- +| epoch 7 | 3/ 8400 batches | test loss 0.4108599 +| epoch 7 | 7/ 8400 batches | test loss 0.4394014 +| epoch 7 | 11/ 8400 batches | test loss 0.1697595 +| epoch 7 | 15/ 8400 batches | test loss 0.4049414 +| epoch 7 | 19/ 8400 batches | test loss 0.4655690 +| epoch 7 | 23/ 8400 batches | test loss 0.4718572 +| epoch 7 | 27/ 8400 batches | test loss 0.3954014 +| epoch 7 | 31/ 8400 batches | test loss 0.4782575 +| epoch 7 | 35/ 8400 batches | test loss 0.6708825 +| epoch 7 | 39/ 8400 batches | test loss 0.4617914 +| epoch 7 | 43/ 8400 batches | test loss 0.3697129 +| epoch 7 | 47/ 8400 batches | test loss 0.4399256 +| epoch 7 | 51/ 8400 batches | test loss 0.3968953 +| epoch 7 | 55/ 8400 batches | test loss 0.4621216 +| epoch 7 | 59/ 8400 batches | test loss 0.3409336 +| epoch 7 | 63/ 8400 batches | test loss 0.3331726 +| epoch 7 | 67/ 8400 batches | test loss 0.4850419 +| epoch 7 | 71/ 8400 batches | test loss 0.3445980 +| epoch 7 | 75/ 8400 batches | test loss 0.4759842 +| epoch 7 | 79/ 8400 batches | test loss 0.3408838 +| epoch 7 | 83/ 8400 batches | test loss 0.4691719 +| epoch 7 | 87/ 8400 batches | test loss 0.4143974 +| epoch 7 | 91/ 8400 batches | test loss 0.3414899 +| epoch 7 | 95/ 8400 batches | test loss 0.5262684 +| epoch 7 | 99/ 8400 batches | test loss 0.3288628 +| epoch 7 | 103/ 8400 batches | test loss 0.3885386 +| epoch 7 | 107/ 8400 batches | test loss 0.4403533 +| epoch 7 | 111/ 8400 batches | test loss 0.3522216 +| epoch 7 | 115/ 8400 batches | test loss 0.6120145 +| epoch 7 | 119/ 8400 batches | test loss 0.4058701 +| epoch 7 | 123/ 8400 batches | test loss 0.4002053 +| epoch 7 | 127/ 8400 batches | test loss 0.4184304 +| epoch 7 | 131/ 8400 batches | test loss 0.3793576 +| epoch 7 | 135/ 8400 batches | test loss 0.4369144 +| epoch 7 | 139/ 8400 batches | test loss 0.3693210 +| epoch 7 | 143/ 8400 batches | test loss 0.4227058 +| epoch 7 | 147/ 8400 batches | test loss 0.6109590 +| epoch 7 | 151/ 8400 batches | test loss 0.3656111 +| epoch 7 | 155/ 8400 batches | test loss 0.3597485 +| epoch 7 | 159/ 8400 batches | test loss 0.4792178 +| epoch 7 | 163/ 8400 batches | test loss 0.3749285 +| epoch 7 | 167/ 8400 batches | test loss 0.3881353 +| epoch 7 | 171/ 8400 batches | test loss 0.4491670 +| epoch 7 | 175/ 8400 batches | test loss 0.3277856 +| epoch 7 | 179/ 8400 batches | test loss 0.4099269 +| epoch 7 | 183/ 8400 batches | test loss 0.4083602 +| epoch 7 | 187/ 8400 batches | test loss 0.4285308 +| epoch 7 | 191/ 8400 batches | test loss 0.3235198 +| epoch 7 | 195/ 8400 batches | test loss 0.3780075 +| epoch 7 | 199/ 8400 batches | test loss 0.4206603 +| epoch 7 | 203/ 8400 batches | test loss 0.3933388 +| epoch 7 | 207/ 8400 batches | test loss 0.4918184 +| epoch 7 | 211/ 8400 batches | test loss 0.4921287 +| epoch 7 | 215/ 8400 batches | test loss 0.3353090 +| epoch 7 | 219/ 8400 batches | test loss 0.4339638 +| epoch 7 | 223/ 8400 batches | test loss 0.3393090 +| epoch 7 | 227/ 8400 batches | test loss 0.3567256 +| epoch 7 | 231/ 8400 batches | test loss 0.3477552 +| epoch 7 | 235/ 8400 batches | test loss 0.3086979 +| epoch 7 | 239/ 8400 batches | test loss 0.4739990 +| epoch 7 | 243/ 8400 batches | test loss 0.4494581 +| epoch 7 | 247/ 8400 batches | test loss 0.3897718 +| epoch 7 | 251/ 8400 batches | test loss 0.3873484 +| epoch 7 | 255/ 8400 batches | test loss 0.5750308 +| epoch 7 | 259/ 8400 batches | test loss 0.5028884 +| epoch 7 | 263/ 8400 batches | test loss 0.3698888 +| epoch 7 | 267/ 8400 batches | test loss 0.4317043 +| epoch 7 | 271/ 8400 batches | test loss 0.4458178 +| epoch 7 | 275/ 8400 batches | test loss 0.3595622 +| epoch 7 | 279/ 8400 batches | test loss 0.5017449 +| epoch 7 | 283/ 8400 batches | test loss 0.7536306 +| epoch 7 | 287/ 8400 batches | test loss 0.3404858 +| epoch 7 | 291/ 8400 batches | test loss 0.4157512 +| epoch 7 | 295/ 8400 batches | test loss 0.3370519 +| epoch 7 | 299/ 8400 batches | test loss 0.3655701 +| epoch 7 | 303/ 8400 batches | test loss 0.4394862 +| epoch 7 | 307/ 8400 batches | test loss 0.4337188 +| epoch 7 | 311/ 8400 batches | test loss 0.5005875 +| epoch 7 | 315/ 8400 batches | test loss 0.5121400 +| epoch 7 | 319/ 8400 batches | test loss 0.4535174 +| epoch 7 | 323/ 8400 batches | test loss 0.3999497 +| epoch 7 | 327/ 8400 batches | test loss 0.3539943 +| epoch 7 | 331/ 8400 batches | test loss 0.4906558 +| epoch 7 | 335/ 8400 batches | test loss 0.9852149 +| epoch 7 | 339/ 8400 batches | test loss 0.4609202 +| epoch 7 | 343/ 8400 batches | test loss 0.3967641 +| epoch 7 | 347/ 8400 batches | test loss 0.3514851 +| epoch 7 | 351/ 8400 batches | test loss 0.4389237 +| epoch 7 | 355/ 8400 batches | test loss 0.2518574 +| epoch 7 | 359/ 8400 batches | test loss 0.4636086 +| epoch 7 | 363/ 8400 batches | test loss 0.2911953 +| epoch 7 | 367/ 8400 batches | test loss 0.4312936 +| epoch 7 | 371/ 8400 batches | test loss 0.4604148 +| epoch 7 | 375/ 8400 batches | test loss 0.5526006 +| epoch 7 | 379/ 8400 batches | test loss 0.3843527 +| epoch 7 | 383/ 8400 batches | test loss 0.3662047 +| epoch 7 | 387/ 8400 batches | test loss 0.3849364 +| epoch 7 | 391/ 8400 batches | test loss 0.4476073 +| epoch 7 | 395/ 8400 batches | test loss 0.3686447 +| epoch 7 | 399/ 8400 batches | test loss 0.3376765 +| epoch 7 | 403/ 8400 batches | test loss 0.4714603 +| epoch 7 | 407/ 8400 batches | test loss 0.5575902 +| epoch 7 | 411/ 8400 batches | test loss 0.4040840 +| epoch 7 | 415/ 8400 batches | test loss 0.3012586 +| epoch 7 | 419/ 8400 batches | test loss 0.4203799 +| epoch 7 | 423/ 8400 batches | test loss 0.4087157 +| epoch 7 | 427/ 8400 batches | test loss 0.3839177 +| epoch 7 | 431/ 8400 batches | test loss 0.4574046 +| epoch 7 | 435/ 8400 batches | test loss 0.3617681 +| epoch 7 | 439/ 8400 batches | test loss 0.3751014 +| epoch 7 | 443/ 8400 batches | test loss 0.5650225 +| epoch 7 | 447/ 8400 batches | test loss 0.4013520 +| epoch 7 | 451/ 8400 batches | test loss 0.3805519 +| epoch 7 | 455/ 8400 batches | test loss 0.4321163 +| epoch 7 | 459/ 8400 batches | test loss 0.5291840 +| epoch 7 | 463/ 8400 batches | test loss 0.3987811 +| epoch 7 | 467/ 8400 batches | test loss 0.3954744 +| epoch 7 | 471/ 8400 batches | test loss 0.5318189 +| epoch 7 | 475/ 8400 batches | test loss 0.4753112 +| epoch 7 | 479/ 8400 batches | test loss 0.4122337 +| epoch 7 | 483/ 8400 batches | test loss 0.5571749 +| epoch 7 | 487/ 8400 batches | test loss 0.4569703 +| epoch 7 | 491/ 8400 batches | test loss 0.3781771 +| epoch 7 | 495/ 8400 batches | test loss 0.4047707 +| epoch 7 | 499/ 8400 batches | test loss 0.3609933 +| epoch 7 | 503/ 8400 batches | test loss 0.4645253 +| epoch 7 | 507/ 8400 batches | test loss 0.4129209 +| epoch 7 | 511/ 8400 batches | test loss 0.4608053 +| epoch 7 | 515/ 8400 batches | test loss 0.4236983 +| epoch 7 | 519/ 8400 batches | test loss 0.5075144 +| epoch 7 | 523/ 8400 batches | test loss 0.4484776 +| epoch 7 | 527/ 8400 batches | test loss 0.4364133 +| epoch 7 | 531/ 8400 batches | test loss 0.4682230 +| epoch 7 | 535/ 8400 batches | test loss 0.3814811 +| epoch 7 | 539/ 8400 batches | test loss 0.3975529 +| epoch 7 | 543/ 8400 batches | test loss 0.4218853 +| epoch 7 | 547/ 8400 batches | test loss 0.3941073 +| epoch 7 | 551/ 8400 batches | test loss 0.4426267 +| epoch 7 | 555/ 8400 batches | test loss 0.4354021 +| epoch 7 | 559/ 8400 batches | test loss 0.3859293 +| epoch 7 | 563/ 8400 batches | test loss 0.3816313 +| epoch 7 | 567/ 8400 batches | test loss 0.4738194 +| epoch 7 | 571/ 8400 batches | test loss 0.4103940 +| epoch 7 | 575/ 8400 batches | test loss 0.3749638 +| epoch 7 | 579/ 8400 batches | test loss 0.3832071 +| epoch 7 | 583/ 8400 batches | test loss 0.4526405 +| epoch 7 | 587/ 8400 batches | test loss 0.3108539 +| epoch 7 | 591/ 8400 batches | test loss 0.3511450 +| epoch 7 | 595/ 8400 batches | test loss 0.3807978 +| epoch 7 | 599/ 8400 batches | test loss 0.3531557 +| epoch 7 | 603/ 8400 batches | test loss 0.3200856 +| epoch 7 | 607/ 8400 batches | test loss 0.3616935 +| epoch 7 | 611/ 8400 batches | test loss 0.3939770 +| epoch 7 | 615/ 8400 batches | test loss 0.3739300 +| epoch 7 | 619/ 8400 batches | test loss 0.4389240 +| epoch 7 | 623/ 8400 batches | test loss 0.4090095 +| epoch 7 | 627/ 8400 batches | test loss 0.5072941 +| epoch 7 | 631/ 8400 batches | test loss 0.3967286 +| epoch 7 | 635/ 8400 batches | test loss 0.3851870 +| epoch 7 | 639/ 8400 batches | test loss 0.3285948 +| epoch 7 | 643/ 8400 batches | test loss 0.3818192 +| epoch 7 | 647/ 8400 batches | test loss 0.3599583 +| epoch 7 | 651/ 8400 batches | test loss 0.4557709 +| epoch 7 | 655/ 8400 batches | test loss 0.4335953 +| epoch 7 | 659/ 8400 batches | test loss 0.4918159 +| epoch 7 | 663/ 8400 batches | test loss 0.3934240 +| epoch 7 | 667/ 8400 batches | test loss 0.4272135 +| epoch 7 | 671/ 8400 batches | test loss 0.5004547 +| epoch 7 | 675/ 8400 batches | test loss 0.3877562 +| epoch 7 | 679/ 8400 batches | test loss 0.3730850 +| epoch 7 | 683/ 8400 batches | test loss 0.3608932 +| epoch 7 | 687/ 8400 batches | test loss 0.3807060 +| epoch 7 | 691/ 8400 batches | test loss 0.3561473 +| epoch 7 | 695/ 8400 batches | test loss 0.4396259 +| epoch 7 | 699/ 8400 batches | test loss 0.4229420 +| epoch 7 | 703/ 8400 batches | test loss 0.3809150 +| epoch 7 | 707/ 8400 batches | test loss 0.4597871 +| epoch 7 | 711/ 8400 batches | test loss 0.5459518 +| epoch 7 | 715/ 8400 batches | test loss 0.5157379 +| epoch 7 | 719/ 8400 batches | test loss 0.3689977 +| epoch 7 | 723/ 8400 batches | test loss 0.4457849 +| epoch 7 | 727/ 8400 batches | test loss 0.3866563 +| epoch 7 | 731/ 8400 batches | test loss 0.4251311 +| epoch 7 | 735/ 8400 batches | test loss 0.4580747 +| epoch 7 | 739/ 8400 batches | test loss 0.3990137 +| epoch 7 | 743/ 8400 batches | test loss 0.5225937 +| epoch 7 | 747/ 8400 batches | test loss 0.3738291 +| epoch 7 | 751/ 8400 batches | test loss 0.4829094 +| epoch 7 | 755/ 8400 batches | test loss 0.4493820 +| epoch 7 | 759/ 8400 batches | test loss 0.3270819 +| epoch 7 | 763/ 8400 batches | test loss 0.3864639 +| epoch 7 | 767/ 8400 batches | test loss 0.4517692 +| epoch 7 | 771/ 8400 batches | test loss 0.4392314 +| epoch 7 | 775/ 8400 batches | test loss 0.3809215 +| epoch 7 | 779/ 8400 batches | test loss 0.3633968 +| epoch 7 | 783/ 8400 batches | test loss 0.3673310 +| epoch 7 | 787/ 8400 batches | test loss 0.5069759 +| epoch 7 | 791/ 8400 batches | test loss 0.3562098 +| epoch 7 | 795/ 8400 batches | test loss 0.5537727 +| epoch 7 | 799/ 8400 batches | test loss 0.4276617 +| epoch 7 | 803/ 8400 batches | test loss 0.4856149 +| epoch 7 | 807/ 8400 batches | test loss 0.4034714 +| epoch 7 | 811/ 8400 batches | test loss 0.4290340 +| epoch 7 | 815/ 8400 batches | test loss 0.9166988 +| epoch 7 | 819/ 8400 batches | test loss 0.4606698 +| epoch 7 | 823/ 8400 batches | test loss 0.4143738 +| epoch 7 | 827/ 8400 batches | test loss 0.4370978 +| epoch 7 | 831/ 8400 batches | test loss 0.4765182 +| epoch 7 | 835/ 8400 batches | test loss 0.3980048 +| epoch 7 | 839/ 8400 batches | test loss 0.3939214 +| epoch 7 | 843/ 8400 batches | test loss 0.4060787 +| epoch 7 | 847/ 8400 batches | test loss 0.4115729 +| epoch 7 | 851/ 8400 batches | test loss 0.4390199 +| epoch 7 | 855/ 8400 batches | test loss 0.4825714 +| epoch 7 | 859/ 8400 batches | test loss 0.4652045 +| epoch 7 | 863/ 8400 batches | test loss 0.5776614 +| epoch 7 | 867/ 8400 batches | test loss 0.3872693 +| epoch 7 | 871/ 8400 batches | test loss 0.6061291 +| epoch 7 | 875/ 8400 batches | test loss 0.3967867 +| epoch 7 | 879/ 8400 batches | test loss 0.5300860 +| epoch 7 | 883/ 8400 batches | test loss 0.3559269 +| epoch 7 | 887/ 8400 batches | test loss 0.4227712 +| epoch 7 | 891/ 8400 batches | test loss 0.5410041 +| epoch 7 | 895/ 8400 batches | test loss 0.5538130 +| epoch 7 | 899/ 8400 batches | test loss 0.3736736 +| epoch 7 | 903/ 8400 batches | test loss 0.3585000 +| epoch 7 | 907/ 8400 batches | test loss 0.4070173 +| epoch 7 | 911/ 8400 batches | test loss 0.4004979 +| epoch 7 | 915/ 8400 batches | test loss 0.3190596 +| epoch 7 | 919/ 8400 batches | test loss 0.3917626 +| epoch 7 | 923/ 8400 batches | test loss 0.4898657 +| epoch 7 | 927/ 8400 batches | test loss 0.4051952 +| epoch 7 | 931/ 8400 batches | test loss 0.3976217 +| epoch 7 | 935/ 8400 batches | test loss 0.3914964 +| epoch 7 | 939/ 8400 batches | test loss 0.3977020 +| epoch 7 | 943/ 8400 batches | test loss 0.5134371 +| epoch 7 | 947/ 8400 batches | test loss 0.4251828 +| epoch 7 | 951/ 8400 batches | test loss 0.4609476 +| epoch 7 | 955/ 8400 batches | test loss 0.4377925 +| epoch 7 | 959/ 8400 batches | test loss 0.4676836 +| epoch 7 | 963/ 8400 batches | test loss 0.4527290 +| epoch 7 | 967/ 8400 batches | test loss 0.3769462 +| epoch 7 | 971/ 8400 batches | test loss 0.3693278 +| epoch 7 | 975/ 8400 batches | test loss 0.4181902 +| epoch 7 | 979/ 8400 batches | test loss 0.4797209 +| epoch 7 | 983/ 8400 batches | test loss 0.4535289 +| epoch 7 | 987/ 8400 batches | test loss 0.4626336 +| epoch 7 | 991/ 8400 batches | test loss 0.3902350 +| epoch 7 | 995/ 8400 batches | test loss 0.5053194 +| epoch 7 | 999/ 8400 batches | test loss 0.1966519 +| epoch 7 | 1003/ 8400 batches | test loss 0.4071286 +| epoch 7 | 1007/ 8400 batches | test loss 0.5067187 +| epoch 7 | 1011/ 8400 batches | test loss 0.4488175 +| epoch 7 | 1015/ 8400 batches | test loss 0.4203673 +| epoch 7 | 1019/ 8400 batches | test loss 0.5525476 +| epoch 7 | 1023/ 8400 batches | test loss 0.4224182 +| epoch 7 | 1027/ 8400 batches | test loss 0.3875982 +| epoch 7 | 1031/ 8400 batches | test loss 0.3165632 +| epoch 7 | 1035/ 8400 batches | test loss 0.4635226 +| epoch 7 | 1039/ 8400 batches | test loss 0.3815111 +| epoch 7 | 1043/ 8400 batches | test loss 0.4294202 +| epoch 7 | 1047/ 8400 batches | test loss 0.4436871 +| epoch 7 | 1051/ 8400 batches | test loss 0.2486399 +| epoch 7 | 1055/ 8400 batches | test loss 0.3772866 +| epoch 7 | 1059/ 8400 batches | test loss 0.4046165 +| epoch 7 | 1063/ 8400 batches | test loss 0.6099564 +| epoch 7 | 1067/ 8400 batches | test loss 0.3929551 +| epoch 7 | 1071/ 8400 batches | test loss 0.4435398 +| epoch 7 | 1075/ 8400 batches | test loss 0.3797417 +| epoch 7 | 1079/ 8400 batches | test loss 0.4288258 +| epoch 7 | 1083/ 8400 batches | test loss 0.3411862 +| epoch 7 | 1087/ 8400 batches | test loss 0.4405514 +| epoch 7 | 1091/ 8400 batches | test loss 0.4254222 +| epoch 7 | 1095/ 8400 batches | test loss 0.4388878 +| epoch 7 | 1099/ 8400 batches | test loss 0.4236178 +| epoch 7 | 1103/ 8400 batches | test loss 0.4337888 +| epoch 7 | 1107/ 8400 batches | test loss 0.3148258 +| epoch 7 | 1111/ 8400 batches | test loss 0.4058374 +| epoch 7 | 1115/ 8400 batches | test loss 0.3306689 +| epoch 7 | 1119/ 8400 batches | test loss 0.4711298 +| epoch 7 | 1123/ 8400 batches | test loss 0.3521277 +| epoch 7 | 1127/ 8400 batches | test loss 0.4175261 +| epoch 7 | 1131/ 8400 batches | test loss 0.4121007 +| epoch 7 | 1135/ 8400 batches | test loss 0.3493097 +| epoch 7 | 1139/ 8400 batches | test loss 0.4093800 +| epoch 7 | 1143/ 8400 batches | test loss 0.4186670 +| epoch 7 | 1147/ 8400 batches | test loss 0.4794073 +| epoch 7 | 1151/ 8400 batches | test loss 0.4188673 +| epoch 7 | 1155/ 8400 batches | test loss 0.4147881 +| epoch 7 | 1159/ 8400 batches | test loss 0.5360506 +| epoch 7 | 1163/ 8400 batches | test loss 0.4156929 +| epoch 7 | 1167/ 8400 batches | test loss 0.4115137 +| epoch 7 | 1171/ 8400 batches | test loss 0.6696240 +| epoch 7 | 1175/ 8400 batches | test loss 0.4836374 +| epoch 7 | 1179/ 8400 batches | test loss 0.4651808 +| epoch 7 | 1183/ 8400 batches | test loss 0.3190224 +| epoch 7 | 1187/ 8400 batches | test loss 0.6089510 +| epoch 7 | 1191/ 8400 batches | test loss 0.3724981 +| epoch 7 | 1195/ 8400 batches | test loss 0.3755461 +| epoch 7 | 1199/ 8400 batches | test loss 0.4158647 +| epoch 7 | 1203/ 8400 batches | test loss 0.3778952 +| epoch 7 | 1207/ 8400 batches | test loss 0.3699119 +| epoch 7 | 1211/ 8400 batches | test loss 0.4517716 +| epoch 7 | 1215/ 8400 batches | test loss 0.4692180 +| epoch 7 | 1219/ 8400 batches | test loss 0.3734456 +| epoch 7 | 1223/ 8400 batches | test loss 0.3707875 +| epoch 7 | 1227/ 8400 batches | test loss 0.5448213 +| epoch 7 | 1231/ 8400 batches | test loss 0.3830221 +| epoch 7 | 1235/ 8400 batches | test loss 0.4648595 +| epoch 7 | 1239/ 8400 batches | test loss 0.3788787 +| epoch 7 | 1243/ 8400 batches | test loss 0.4589320 +| epoch 7 | 1247/ 8400 batches | test loss 0.4770756 +| epoch 7 | 1251/ 8400 batches | test loss 0.4273068 +| epoch 7 | 1255/ 8400 batches | test loss 0.3420660 +| epoch 7 | 1259/ 8400 batches | test loss 0.3521150 +| epoch 7 | 1263/ 8400 batches | test loss 0.4618306 +| epoch 7 | 1267/ 8400 batches | test loss 0.4215196 +| epoch 7 | 1271/ 8400 batches | test loss 0.3641602 +| epoch 7 | 1275/ 8400 batches | test loss 0.3625297 +| epoch 7 | 1279/ 8400 batches | test loss 0.3461176 +| epoch 7 | 1283/ 8400 batches | test loss 0.3934405 +| epoch 7 | 1287/ 8400 batches | test loss 0.4597040 +| epoch 7 | 1291/ 8400 batches | test loss 0.3549728 +| epoch 7 | 1295/ 8400 batches | test loss 0.4454074 +| epoch 7 | 1299/ 8400 batches | test loss 0.5225722 +| epoch 7 | 1303/ 8400 batches | test loss 0.4375407 +| epoch 7 | 1307/ 8400 batches | test loss 0.4060744 +| epoch 7 | 1311/ 8400 batches | test loss 0.4092081 +| epoch 7 | 1315/ 8400 batches | test loss 0.3919944 +| epoch 7 | 1319/ 8400 batches | test loss 0.4574298 +| epoch 7 | 1323/ 8400 batches | test loss 0.4795658 +| epoch 7 | 1327/ 8400 batches | test loss 0.4675400 +| epoch 7 | 1331/ 8400 batches | test loss 0.4371884 +| epoch 7 | 1335/ 8400 batches | test loss 0.4113374 +| epoch 7 | 1339/ 8400 batches | test loss 0.3641574 +| epoch 7 | 1343/ 8400 batches | test loss 0.4137456 +| epoch 7 | 1347/ 8400 batches | test loss 0.3555947 +| epoch 7 | 1351/ 8400 batches | test loss 0.5346463 +| epoch 7 | 1355/ 8400 batches | test loss 0.4333992 +| epoch 7 | 1359/ 8400 batches | test loss 0.4113273 +| epoch 7 | 1363/ 8400 batches | test loss 0.3815405 +| epoch 7 | 1367/ 8400 batches | test loss 0.4315380 +| epoch 7 | 1371/ 8400 batches | test loss 0.3574265 +| epoch 7 | 1375/ 8400 batches | test loss 0.4562723 +| epoch 7 | 1379/ 8400 batches | test loss 0.4511337 +| epoch 7 | 1383/ 8400 batches | test loss 0.3549362 +| epoch 7 | 1387/ 8400 batches | test loss 0.3731141 +| epoch 7 | 1391/ 8400 batches | test loss 0.3436159 +| epoch 7 | 1395/ 8400 batches | test loss 0.4304355 +| epoch 7 | 1399/ 8400 batches | test loss 0.4970550 +| epoch 7 | 1403/ 8400 batches | test loss 0.4477446 +| epoch 7 | 1407/ 8400 batches | test loss 0.3960968 +| epoch 7 | 1411/ 8400 batches | test loss 0.4224701 +| epoch 7 | 1415/ 8400 batches | test loss 0.5144578 +| epoch 7 | 1419/ 8400 batches | test loss 0.3609391 +| epoch 7 | 1423/ 8400 batches | test loss 0.3336085 +| epoch 7 | 1427/ 8400 batches | test loss 0.4298012 +| epoch 7 | 1431/ 8400 batches | test loss 0.4925317 +| epoch 7 | 1435/ 8400 batches | test loss 0.4587812 +| epoch 7 | 1439/ 8400 batches | test loss 0.4414903 +| epoch 7 | 1443/ 8400 batches | test loss 0.4427744 +| epoch 7 | 1447/ 8400 batches | test loss 0.5273198 +| epoch 7 | 1451/ 8400 batches | test loss 0.3995771 +| epoch 7 | 1455/ 8400 batches | test loss 0.4034990 +| epoch 7 | 1459/ 8400 batches | test loss 0.4024194 +| epoch 7 | 1463/ 8400 batches | test loss 0.4142725 +| epoch 7 | 1467/ 8400 batches | test loss 0.3914917 +| epoch 7 | 1471/ 8400 batches | test loss 0.3760358 +| epoch 7 | 1475/ 8400 batches | test loss 0.4684117 +| epoch 7 | 1479/ 8400 batches | test loss 0.4549982 +| epoch 7 | 1483/ 8400 batches | test loss 0.4771532 +| epoch 7 | 1487/ 8400 batches | test loss 0.4146964 +| epoch 7 | 1491/ 8400 batches | test loss 0.3534658 +| epoch 7 | 1495/ 8400 batches | test loss 0.3773527 +| epoch 7 | 1499/ 8400 batches | test loss 0.4169587 +| epoch 7 | 1503/ 8400 batches | test loss 0.4533602 +| epoch 7 | 1507/ 8400 batches | test loss 0.4369629 +| epoch 7 | 1511/ 8400 batches | test loss 0.4487410 +| epoch 7 | 1515/ 8400 batches | test loss 0.5385600 +| epoch 7 | 1519/ 8400 batches | test loss 0.3460476 +| epoch 7 | 1523/ 8400 batches | test loss 0.4097558 +| epoch 7 | 1527/ 8400 batches | test loss 0.4599974 +| epoch 7 | 1531/ 8400 batches | test loss 0.4028229 +| epoch 7 | 1535/ 8400 batches | test loss 0.4127038 +| epoch 7 | 1539/ 8400 batches | test loss 0.4975435 +| epoch 7 | 1543/ 8400 batches | test loss 0.3951414 +| epoch 7 | 1547/ 8400 batches | test loss 0.5769118 +| epoch 7 | 1551/ 8400 batches | test loss 0.5231774 +| epoch 7 | 1555/ 8400 batches | test loss 0.4310992 +| epoch 7 | 1559/ 8400 batches | test loss 0.4161249 +| epoch 7 | 1563/ 8400 batches | test loss 0.4826195 +| epoch 7 | 1567/ 8400 batches | test loss 0.3960176 +| epoch 7 | 1571/ 8400 batches | test loss 0.3993383 +| epoch 7 | 1575/ 8400 batches | test loss 0.3785989 +| epoch 7 | 1579/ 8400 batches | test loss 0.3950821 +| epoch 7 | 1583/ 8400 batches | test loss 0.3705083 +| epoch 7 | 1587/ 8400 batches | test loss 0.4139783 +| epoch 7 | 1591/ 8400 batches | test loss 0.3850588 +| epoch 7 | 1595/ 8400 batches | test loss 0.3285331 +| epoch 7 | 1599/ 8400 batches | test loss 0.4350063 +| epoch 7 | 1603/ 8400 batches | test loss 0.3759762 +| epoch 7 | 1607/ 8400 batches | test loss 0.5281606 +| epoch 7 | 1611/ 8400 batches | test loss 0.3720339 +| epoch 7 | 1615/ 8400 batches | test loss 0.3357359 +| epoch 7 | 1619/ 8400 batches | test loss 0.3047609 +| epoch 7 | 1623/ 8400 batches | test loss 0.3976589 +| epoch 7 | 1627/ 8400 batches | test loss 0.3883049 +| epoch 7 | 1631/ 8400 batches | test loss 0.4961564 +| epoch 7 | 1635/ 8400 batches | test loss 0.4487717 +| epoch 7 | 1639/ 8400 batches | test loss 0.4393480 +| epoch 7 | 1643/ 8400 batches | test loss 0.3696775 +| epoch 7 | 1647/ 8400 batches | test loss 0.3941900 +| epoch 7 | 1651/ 8400 batches | test loss 0.4995243 +| epoch 7 | 1655/ 8400 batches | test loss 0.3706744 +| epoch 7 | 1659/ 8400 batches | test loss 0.4211784 +| epoch 7 | 1663/ 8400 batches | test loss 0.3940736 +| epoch 7 | 1667/ 8400 batches | test loss 0.7294219 +| epoch 7 | 1671/ 8400 batches | test loss 0.4039129 +| epoch 7 | 1675/ 8400 batches | test loss 0.4013571 +| epoch 7 | 1679/ 8400 batches | test loss 0.4995747 +| epoch 7 | 1683/ 8400 batches | test loss 0.3880870 +| epoch 7 | 1687/ 8400 batches | test loss 0.5364433 +| epoch 7 | 1691/ 8400 batches | test loss 0.4063171 +| epoch 7 | 1695/ 8400 batches | test loss 0.4448522 +| epoch 7 | 1699/ 8400 batches | test loss 0.3973210 +| epoch 7 | 1703/ 8400 batches | test loss 0.4378774 +| epoch 7 | 1707/ 8400 batches | test loss 0.5918363 +| epoch 7 | 1711/ 8400 batches | test loss 0.3864800 +| epoch 7 | 1715/ 8400 batches | test loss 0.4034275 +| epoch 7 | 1719/ 8400 batches | test loss 0.3625566 +| epoch 7 | 1723/ 8400 batches | test loss 0.3976682 +| epoch 7 | 1727/ 8400 batches | test loss 0.4122061 +| epoch 7 | 1731/ 8400 batches | test loss 0.3837121 +| epoch 7 | 1735/ 8400 batches | test loss 0.3980802 +| epoch 7 | 1739/ 8400 batches | test loss 0.3911712 +| epoch 7 | 1743/ 8400 batches | test loss 0.4278822 +| epoch 7 | 1747/ 8400 batches | test loss 0.4200178 +| epoch 7 | 1751/ 8400 batches | test loss 0.4094540 +| epoch 7 | 1755/ 8400 batches | test loss 0.5344236 +| epoch 7 | 1759/ 8400 batches | test loss 0.3250629 +| epoch 7 | 1763/ 8400 batches | test loss 0.5076529 +| epoch 7 | 1767/ 8400 batches | test loss 0.4282477 +| epoch 7 | 1771/ 8400 batches | test loss 0.4398170 +| epoch 7 | 1775/ 8400 batches | test loss 0.3938596 +| epoch 7 | 1779/ 8400 batches | test loss 0.3441035 +| epoch 7 | 1783/ 8400 batches | test loss 0.4046788 +| epoch 7 | 1787/ 8400 batches | test loss 0.4387384 +| epoch 7 | 1791/ 8400 batches | test loss 0.3794653 +| epoch 7 | 1795/ 8400 batches | test loss 0.4623925 +| epoch 7 | 1799/ 8400 batches | test loss 0.4148895 +| epoch 7 | 1803/ 8400 batches | test loss 0.3613578 +| epoch 7 | 1807/ 8400 batches | test loss 0.4356520 +| epoch 7 | 1811/ 8400 batches | test loss 0.3943071 +| epoch 7 | 1815/ 8400 batches | test loss 0.3634264 +| epoch 7 | 1819/ 8400 batches | test loss 0.3145631 +| epoch 7 | 1823/ 8400 batches | test loss 0.3524610 +| epoch 7 | 1827/ 8400 batches | test loss 0.4295076 +| epoch 7 | 1831/ 8400 batches | test loss 0.3606212 +| epoch 7 | 1835/ 8400 batches | test loss 0.4135669 +| epoch 7 | 1839/ 8400 batches | test loss 0.4448062 +| epoch 7 | 1843/ 8400 batches | test loss 0.3671367 +| epoch 7 | 1847/ 8400 batches | test loss 0.4015194 +| epoch 7 | 1851/ 8400 batches | test loss 0.4224134 +| epoch 7 | 1855/ 8400 batches | test loss 0.5066540 +| epoch 7 | 1859/ 8400 batches | test loss 0.4483446 +| epoch 7 | 1863/ 8400 batches | test loss 0.3761696 +| epoch 7 | 1867/ 8400 batches | test loss 0.4130138 +| epoch 7 | 1871/ 8400 batches | test loss 0.4027559 +| epoch 7 | 1875/ 8400 batches | test loss 0.3898552 +| epoch 7 | 1879/ 8400 batches | test loss 0.4273362 +| epoch 7 | 1883/ 8400 batches | test loss 0.4372123 +| epoch 7 | 1887/ 8400 batches | test loss 0.4210358 +| epoch 7 | 1891/ 8400 batches | test loss 0.5136068 +| epoch 7 | 1895/ 8400 batches | test loss 0.4420872 +| epoch 7 | 1899/ 8400 batches | test loss 0.4752890 +| epoch 7 | 1903/ 8400 batches | test loss 0.3963515 +| epoch 7 | 1907/ 8400 batches | test loss 0.2762086 +| epoch 7 | 1911/ 8400 batches | test loss 0.3646397 +| epoch 7 | 1915/ 8400 batches | test loss 0.3629389 +| epoch 7 | 1919/ 8400 batches | test loss 0.3787089 +| epoch 7 | 1923/ 8400 batches | test loss 0.4016837 +| epoch 7 | 1927/ 8400 batches | test loss 0.4717479 +| epoch 7 | 1931/ 8400 batches | test loss 0.3860522 +| epoch 7 | 1935/ 8400 batches | test loss 0.5326331 +| epoch 7 | 1939/ 8400 batches | test loss 0.5068903 +| epoch 7 | 1943/ 8400 batches | test loss 0.5286211 +| epoch 7 | 1947/ 8400 batches | test loss 0.3927140 +| epoch 7 | 1951/ 8400 batches | test loss 0.3922298 +| epoch 7 | 1955/ 8400 batches | test loss 0.3628710 +| epoch 7 | 1959/ 8400 batches | test loss 0.4212243 +| epoch 7 | 1963/ 8400 batches | test loss 0.3190008 +| epoch 7 | 1967/ 8400 batches | test loss 0.3985563 +| epoch 7 | 1971/ 8400 batches | test loss 0.4028061 +| epoch 7 | 1975/ 8400 batches | test loss 0.4524452 +| epoch 7 | 1979/ 8400 batches | test loss 0.4192121 +| epoch 7 | 1983/ 8400 batches | test loss 0.2897089 +| epoch 7 | 1987/ 8400 batches | test loss 0.3736970 +| epoch 7 | 1991/ 8400 batches | test loss 0.5133245 +| epoch 7 | 1995/ 8400 batches | test loss 0.4683081 +| epoch 7 | 1999/ 8400 batches | test loss 0.3676709 +| epoch 7 | 2003/ 8400 batches | test loss 0.3632419 +| epoch 7 | 2007/ 8400 batches | test loss 0.3755324 +| epoch 7 | 2011/ 8400 batches | test loss 0.5856382 +| epoch 7 | 2015/ 8400 batches | test loss 0.4113164 +| epoch 7 | 2019/ 8400 batches | test loss 0.5771115 +| epoch 7 | 2023/ 8400 batches | test loss 0.4376265 +| epoch 7 | 2027/ 8400 batches | test loss 0.4252020 +| epoch 7 | 2031/ 8400 batches | test loss 0.4735815 +| epoch 7 | 2035/ 8400 batches | test loss 0.4441618 +| epoch 7 | 2039/ 8400 batches | test loss 0.3417845 +| epoch 7 | 2043/ 8400 batches | test loss 0.3861853 +| epoch 7 | 2047/ 8400 batches | test loss 0.4510732 +| epoch 7 | 2051/ 8400 batches | test loss 0.4600616 +| epoch 7 | 2055/ 8400 batches | test loss 0.5124234 +| epoch 7 | 2059/ 8400 batches | test loss 0.4247091 +| epoch 7 | 2063/ 8400 batches | test loss 0.4700103 +| epoch 7 | 2067/ 8400 batches | test loss 0.3530943 +| epoch 7 | 2071/ 8400 batches | test loss 0.5037476 +| epoch 7 | 2075/ 8400 batches | test loss 0.3696307 +| epoch 7 | 2079/ 8400 batches | test loss 0.4122874 +| epoch 7 | 2083/ 8400 batches | test loss 0.3962082 +| epoch 7 | 2087/ 8400 batches | test loss 0.5057513 +| epoch 7 | 2091/ 8400 batches | test loss 0.3668855 +| epoch 7 | 2095/ 8400 batches | test loss 0.4395412 +| epoch 7 | 2099/ 8400 batches | test loss 0.5362852 +| epoch 7 | final test loss 0.4231, do not save model! +-------------------------------------------------------------------------------- +| epoch 8 | 3/ 8400 batches | train loss 0.3464530 +| epoch 8 | 7/ 8400 batches | train loss 0.3073760 +| epoch 8 | 11/ 8400 batches | train loss 0.3572354 +| epoch 8 | 15/ 8400 batches | train loss 0.3451068 +| epoch 8 | 19/ 8400 batches | train loss 0.3005395 +| epoch 8 | 23/ 8400 batches | train loss 0.3209919 +| epoch 8 | 27/ 8400 batches | train loss 0.3366165 +| epoch 8 | 31/ 8400 batches | train loss 0.3600410 +| epoch 8 | 35/ 8400 batches | train loss 0.3588290 +| epoch 8 | 39/ 8400 batches | train loss 0.2386304 +| epoch 8 | 43/ 8400 batches | train loss 0.3951481 +| epoch 8 | 47/ 8400 batches | train loss 0.3602970 +| epoch 8 | 51/ 8400 batches | train loss 0.1367804 +| epoch 8 | 55/ 8400 batches | train loss 0.3200372 +| epoch 8 | 59/ 8400 batches | train loss 0.4516679 +| epoch 8 | 63/ 8400 batches | train loss 0.4044549 +| epoch 8 | 67/ 8400 batches | train loss 0.2660862 +| epoch 8 | 71/ 8400 batches | train loss 0.3223050 +| epoch 8 | 75/ 8400 batches | train loss 0.3880625 +| epoch 8 | 79/ 8400 batches | train loss 0.3511164 +| epoch 8 | 83/ 8400 batches | train loss 0.3243734 +| epoch 8 | 87/ 8400 batches | train loss 0.2426172 +| epoch 8 | 91/ 8400 batches | train loss 0.3284439 +| epoch 8 | 95/ 8400 batches | train loss 0.3250198 +| epoch 8 | 99/ 8400 batches | train loss 0.3851922 +| epoch 8 | 103/ 8400 batches | train loss 0.3422816 +| epoch 8 | 107/ 8400 batches | train loss 0.3735997 +| epoch 8 | 111/ 8400 batches | train loss 0.3009205 +| epoch 8 | 115/ 8400 batches | train loss 0.3191334 +| epoch 8 | 119/ 8400 batches | train loss 0.3495525 +| epoch 8 | 123/ 8400 batches | train loss 0.3467649 +| epoch 8 | 127/ 8400 batches | train loss 0.2920147 +| epoch 8 | 131/ 8400 batches | train loss 0.3134912 +| epoch 8 | 135/ 8400 batches | train loss 0.3516272 +| epoch 8 | 139/ 8400 batches | train loss 0.3427733 +| epoch 8 | 143/ 8400 batches | train loss 0.3289681 +| epoch 8 | 147/ 8400 batches | train loss 0.3471358 +| epoch 8 | 151/ 8400 batches | train loss 0.3101782 +| epoch 8 | 155/ 8400 batches | train loss 0.3725362 +| epoch 8 | 159/ 8400 batches | train loss 0.3093776 +| epoch 8 | 163/ 8400 batches | train loss 0.2848088 +| epoch 8 | 167/ 8400 batches | train loss 0.2927251 +| epoch 8 | 171/ 8400 batches | train loss 0.3151742 +| epoch 8 | 175/ 8400 batches | train loss 0.3366306 +| epoch 8 | 179/ 8400 batches | train loss 0.2786125 +| epoch 8 | 183/ 8400 batches | train loss 0.3540570 +| epoch 8 | 187/ 8400 batches | train loss 0.3493827 +| epoch 8 | 191/ 8400 batches | train loss 0.3403282 +| epoch 8 | 195/ 8400 batches | train loss 0.3812878 +| epoch 8 | 199/ 8400 batches | train loss 0.3831297 +| epoch 8 | 203/ 8400 batches | train loss 0.3735581 +| epoch 8 | 207/ 8400 batches | train loss 0.3324983 +| epoch 8 | 211/ 8400 batches | train loss 0.3320051 +| epoch 8 | 215/ 8400 batches | train loss 0.3007060 +| epoch 8 | 219/ 8400 batches | train loss 0.3290244 +| epoch 8 | 223/ 8400 batches | train loss 0.3262276 +| epoch 8 | 227/ 8400 batches | train loss 0.3615452 +| epoch 8 | 231/ 8400 batches | train loss 0.3598825 +| epoch 8 | 235/ 8400 batches | train loss 0.3168952 +| epoch 8 | 239/ 8400 batches | train loss 0.3533952 +| epoch 8 | 243/ 8400 batches | train loss 0.4473849 +| epoch 8 | 247/ 8400 batches | train loss 0.3353578 +| epoch 8 | 251/ 8400 batches | train loss 0.3728693 +| epoch 8 | 255/ 8400 batches | train loss 0.4156374 +| epoch 8 | 259/ 8400 batches | train loss 0.3251037 +| epoch 8 | 263/ 8400 batches | train loss 0.3923597 +| epoch 8 | 267/ 8400 batches | train loss 0.3412269 +| epoch 8 | 271/ 8400 batches | train loss 0.3520290 +| epoch 8 | 275/ 8400 batches | train loss 0.3185991 +| epoch 8 | 279/ 8400 batches | train loss 0.3747047 +| epoch 8 | 283/ 8400 batches | train loss 0.3404784 +| epoch 8 | 287/ 8400 batches | train loss 0.3787861 +| epoch 8 | 291/ 8400 batches | train loss 0.2915505 +| epoch 8 | 295/ 8400 batches | train loss 0.3439270 +| epoch 8 | 299/ 8400 batches | train loss 0.3096762 +| epoch 8 | 303/ 8400 batches | train loss 0.3617016 +| epoch 8 | 307/ 8400 batches | train loss 0.3480337 +| epoch 8 | 311/ 8400 batches | train loss 0.2814493 +| epoch 8 | 315/ 8400 batches | train loss 0.3571859 +| epoch 8 | 319/ 8400 batches | train loss 0.3776016 +| epoch 8 | 323/ 8400 batches | train loss 0.3803186 +| epoch 8 | 327/ 8400 batches | train loss 0.3069845 +| epoch 8 | 331/ 8400 batches | train loss 0.3320301 +| epoch 8 | 335/ 8400 batches | train loss 0.3186387 +| epoch 8 | 339/ 8400 batches | train loss 0.3220245 +| epoch 8 | 343/ 8400 batches | train loss 0.3517922 +| epoch 8 | 347/ 8400 batches | train loss 0.3332786 +| epoch 8 | 351/ 8400 batches | train loss 0.3814577 +| epoch 8 | 355/ 8400 batches | train loss 0.3106561 +| epoch 8 | 359/ 8400 batches | train loss 0.3136333 +| epoch 8 | 363/ 8400 batches | train loss 0.3598379 +| epoch 8 | 367/ 8400 batches | train loss 0.2778042 +| epoch 8 | 371/ 8400 batches | train loss 0.3883075 +| epoch 8 | 375/ 8400 batches | train loss 0.3121412 +| epoch 8 | 379/ 8400 batches | train loss 0.3597933 +| epoch 8 | 383/ 8400 batches | train loss 0.3955780 +| epoch 8 | 387/ 8400 batches | train loss 0.3018816 +| epoch 8 | 391/ 8400 batches | train loss 0.3216893 +| epoch 8 | 395/ 8400 batches | train loss 0.3865772 +| epoch 8 | 399/ 8400 batches | train loss 0.3643622 +| epoch 8 | 403/ 8400 batches | train loss 0.3620605 +| epoch 8 | 407/ 8400 batches | train loss 0.3274035 +| epoch 8 | 411/ 8400 batches | train loss 0.4498476 +| epoch 8 | 415/ 8400 batches | train loss 0.3122667 +| epoch 8 | 419/ 8400 batches | train loss 0.3762189 +| epoch 8 | 423/ 8400 batches | train loss 0.3574459 +| epoch 8 | 427/ 8400 batches | train loss 0.3644782 +| epoch 8 | 431/ 8400 batches | train loss 0.3124503 +| epoch 8 | 435/ 8400 batches | train loss 0.3243048 +| epoch 8 | 439/ 8400 batches | train loss 0.3193182 +| epoch 8 | 443/ 8400 batches | train loss 0.3438644 +| epoch 8 | 447/ 8400 batches | train loss 0.3951917 +| epoch 8 | 451/ 8400 batches | train loss 0.3654469 +| epoch 8 | 455/ 8400 batches | train loss 0.3169554 +| epoch 8 | 459/ 8400 batches | train loss 0.4015052 +| epoch 8 | 463/ 8400 batches | train loss 0.3724319 +| epoch 8 | 467/ 8400 batches | train loss 0.4221740 +| epoch 8 | 471/ 8400 batches | train loss 0.3129227 +| epoch 8 | 475/ 8400 batches | train loss 0.3506666 +| epoch 8 | 479/ 8400 batches | train loss 0.3138176 +| epoch 8 | 483/ 8400 batches | train loss 0.3381253 +| epoch 8 | 487/ 8400 batches | train loss 0.2995946 +| epoch 8 | 491/ 8400 batches | train loss 0.3005978 +| epoch 8 | 495/ 8400 batches | train loss 0.3141748 +| epoch 8 | 499/ 8400 batches | train loss 0.3493816 +| epoch 8 | 503/ 8400 batches | train loss 0.4395403 +| epoch 8 | 507/ 8400 batches | train loss 0.3503028 +| epoch 8 | 511/ 8400 batches | train loss 0.3557448 +| epoch 8 | 515/ 8400 batches | train loss 0.3750366 +| epoch 8 | 519/ 8400 batches | train loss 0.2664407 +| epoch 8 | 523/ 8400 batches | train loss 0.3229573 +| epoch 8 | 527/ 8400 batches | train loss 0.2995170 +| epoch 8 | 531/ 8400 batches | train loss 0.3203323 +| epoch 8 | 535/ 8400 batches | train loss 0.4058481 +| epoch 8 | 539/ 8400 batches | train loss 0.2746753 +| epoch 8 | 543/ 8400 batches | train loss 0.2937620 +| epoch 8 | 547/ 8400 batches | train loss 0.3834544 +| epoch 8 | 551/ 8400 batches | train loss 0.3404389 +| epoch 8 | 555/ 8400 batches | train loss 0.4035619 +| epoch 8 | 559/ 8400 batches | train loss 0.2783430 +| epoch 8 | 563/ 8400 batches | train loss 0.3667358 +| epoch 8 | 567/ 8400 batches | train loss 0.3534609 +| epoch 8 | 571/ 8400 batches | train loss 0.3148026 +| epoch 8 | 575/ 8400 batches | train loss 0.2908633 +| epoch 8 | 579/ 8400 batches | train loss 0.3089363 +| epoch 8 | 583/ 8400 batches | train loss 0.3028547 +| epoch 8 | 587/ 8400 batches | train loss 0.3023254 +| epoch 8 | 591/ 8400 batches | train loss 0.3594234 +| epoch 8 | 595/ 8400 batches | train loss 0.3575266 +| epoch 8 | 599/ 8400 batches | train loss 0.2891416 +| epoch 8 | 603/ 8400 batches | train loss 0.2861642 +| epoch 8 | 607/ 8400 batches | train loss 0.3975822 +| epoch 8 | 611/ 8400 batches | train loss 0.3116944 +| epoch 8 | 615/ 8400 batches | train loss 0.3582530 +| epoch 8 | 619/ 8400 batches | train loss 0.3236693 +| epoch 8 | 623/ 8400 batches | train loss 0.3270369 +| epoch 8 | 627/ 8400 batches | train loss 0.2960234 +| epoch 8 | 631/ 8400 batches | train loss 0.3521554 +| epoch 8 | 635/ 8400 batches | train loss 0.2064427 +| epoch 8 | 639/ 8400 batches | train loss 0.3961120 +| epoch 8 | 643/ 8400 batches | train loss 0.3011793 +| epoch 8 | 647/ 8400 batches | train loss 0.3612428 +| epoch 8 | 651/ 8400 batches | train loss 0.2979309 +| epoch 8 | 655/ 8400 batches | train loss 0.3492708 +| epoch 8 | 659/ 8400 batches | train loss 0.3742415 +| epoch 8 | 663/ 8400 batches | train loss 0.3348774 +| epoch 8 | 667/ 8400 batches | train loss 0.3279139 +| epoch 8 | 671/ 8400 batches | train loss 0.3687304 +| epoch 8 | 675/ 8400 batches | train loss 0.3206378 +| epoch 8 | 679/ 8400 batches | train loss 0.2876862 +| epoch 8 | 683/ 8400 batches | train loss 0.2972730 +| epoch 8 | 687/ 8400 batches | train loss 0.2680071 +| epoch 8 | 691/ 8400 batches | train loss 0.3697150 +| epoch 8 | 695/ 8400 batches | train loss 0.3048662 +| epoch 8 | 699/ 8400 batches | train loss 0.3084047 +| epoch 8 | 703/ 8400 batches | train loss 0.3536400 +| epoch 8 | 707/ 8400 batches | train loss 0.3126184 +| epoch 8 | 711/ 8400 batches | train loss 0.2922245 +| epoch 8 | 715/ 8400 batches | train loss 0.3631845 +| epoch 8 | 719/ 8400 batches | train loss 0.3208247 +| epoch 8 | 723/ 8400 batches | train loss 0.3860889 +| epoch 8 | 727/ 8400 batches | train loss 0.3238682 +| epoch 8 | 731/ 8400 batches | train loss 0.3585995 +| epoch 8 | 735/ 8400 batches | train loss 0.3334264 +| epoch 8 | 739/ 8400 batches | train loss 0.3393432 +| epoch 8 | 743/ 8400 batches | train loss 0.4260060 +| epoch 8 | 747/ 8400 batches | train loss 0.4238504 +| epoch 8 | 751/ 8400 batches | train loss 0.3302577 +| epoch 8 | 755/ 8400 batches | train loss 0.3100595 +| epoch 8 | 759/ 8400 batches | train loss 0.3874786 +| epoch 8 | 763/ 8400 batches | train loss 0.3906780 +| epoch 8 | 767/ 8400 batches | train loss 0.3463249 +| epoch 8 | 771/ 8400 batches | train loss 0.3890414 +| epoch 8 | 775/ 8400 batches | train loss 0.3779213 +| epoch 8 | 779/ 8400 batches | train loss 0.3458090 +| epoch 8 | 783/ 8400 batches | train loss 0.4126896 +| epoch 8 | 787/ 8400 batches | train loss 0.4108571 +| epoch 8 | 791/ 8400 batches | train loss 0.3932644 +| epoch 8 | 795/ 8400 batches | train loss 0.3030617 +| epoch 8 | 799/ 8400 batches | train loss 0.3158528 +| epoch 8 | 803/ 8400 batches | train loss 0.4076852 +| epoch 8 | 807/ 8400 batches | train loss 0.3182354 +| epoch 8 | 811/ 8400 batches | train loss 0.3162937 +| epoch 8 | 815/ 8400 batches | train loss 0.3515105 +| epoch 8 | 819/ 8400 batches | train loss 0.3692377 +| epoch 8 | 823/ 8400 batches | train loss 0.3815955 +| epoch 8 | 827/ 8400 batches | train loss 0.4509635 +| epoch 8 | 831/ 8400 batches | train loss 0.2953214 +| epoch 8 | 835/ 8400 batches | train loss 0.3003519 +| epoch 8 | 839/ 8400 batches | train loss 0.3248129 +| epoch 8 | 843/ 8400 batches | train loss 0.3068950 +| epoch 8 | 847/ 8400 batches | train loss 0.3953323 +| epoch 8 | 851/ 8400 batches | train loss 0.3089028 +| epoch 8 | 855/ 8400 batches | train loss 0.3730105 +| epoch 8 | 859/ 8400 batches | train loss 0.3126928 +| epoch 8 | 863/ 8400 batches | train loss 0.3820183 +| epoch 8 | 867/ 8400 batches | train loss 0.3714802 +| epoch 8 | 871/ 8400 batches | train loss 0.3181486 +| epoch 8 | 875/ 8400 batches | train loss 0.3396105 +| epoch 8 | 879/ 8400 batches | train loss 0.3009099 +| epoch 8 | 883/ 8400 batches | train loss 0.3957964 +| epoch 8 | 887/ 8400 batches | train loss 0.3556268 +| epoch 8 | 891/ 8400 batches | train loss 0.3405375 +| epoch 8 | 895/ 8400 batches | train loss 0.3675137 +| epoch 8 | 899/ 8400 batches | train loss 0.3160529 +| epoch 8 | 903/ 8400 batches | train loss 0.3393849 +| epoch 8 | 907/ 8400 batches | train loss 0.2948771 +| epoch 8 | 911/ 8400 batches | train loss 0.3148775 +| epoch 8 | 915/ 8400 batches | train loss 0.3111401 +| epoch 8 | 919/ 8400 batches | train loss 0.3043615 +| epoch 8 | 923/ 8400 batches | train loss 0.3881009 +| epoch 8 | 927/ 8400 batches | train loss 0.4181836 +| epoch 8 | 931/ 8400 batches | train loss 0.2839526 +| epoch 8 | 935/ 8400 batches | train loss 0.3802503 +| epoch 8 | 939/ 8400 batches | train loss 0.4237019 +| epoch 8 | 943/ 8400 batches | train loss 0.3639533 +| epoch 8 | 947/ 8400 batches | train loss 0.3244004 +| epoch 8 | 951/ 8400 batches | train loss 0.3634807 +| epoch 8 | 955/ 8400 batches | train loss 0.2954682 +| epoch 8 | 959/ 8400 batches | train loss 0.3337856 +| epoch 8 | 963/ 8400 batches | train loss 0.3499227 +| epoch 8 | 967/ 8400 batches | train loss 0.3168558 +| epoch 8 | 971/ 8400 batches | train loss 0.3978771 +| epoch 8 | 975/ 8400 batches | train loss 0.2985771 +| epoch 8 | 979/ 8400 batches | train loss 0.3058401 +| epoch 8 | 983/ 8400 batches | train loss 0.3167465 +| epoch 8 | 987/ 8400 batches | train loss 0.3134637 +| epoch 8 | 991/ 8400 batches | train loss 0.4716953 +| epoch 8 | 995/ 8400 batches | train loss 0.2981474 +| epoch 8 | 999/ 8400 batches | train loss 0.3750201 +| epoch 8 | 1003/ 8400 batches | train loss 0.3697215 +| epoch 8 | 1007/ 8400 batches | train loss 0.3508645 +| epoch 8 | 1011/ 8400 batches | train loss 0.3882048 +| epoch 8 | 1015/ 8400 batches | train loss 0.3825395 +| epoch 8 | 1019/ 8400 batches | train loss 0.3589889 +| epoch 8 | 1023/ 8400 batches | train loss 0.3459960 +| epoch 8 | 1027/ 8400 batches | train loss 0.3071174 +| epoch 8 | 1031/ 8400 batches | train loss 0.3510233 +| epoch 8 | 1035/ 8400 batches | train loss 0.3397998 +| epoch 8 | 1039/ 8400 batches | train loss 0.3190699 +| epoch 8 | 1043/ 8400 batches | train loss 0.3449031 +| epoch 8 | 1047/ 8400 batches | train loss 0.3848352 +| epoch 8 | 1051/ 8400 batches | train loss 0.3140991 +| epoch 8 | 1055/ 8400 batches | train loss 0.3144080 +| epoch 8 | 1059/ 8400 batches | train loss 0.4145879 +| epoch 8 | 1063/ 8400 batches | train loss 0.3358496 +| epoch 8 | 1067/ 8400 batches | train loss 0.2461316 +| epoch 8 | 1071/ 8400 batches | train loss 0.3203995 +| epoch 8 | 1075/ 8400 batches | train loss 0.3696995 +| epoch 8 | 1079/ 8400 batches | train loss 0.3387727 +| epoch 8 | 1083/ 8400 batches | train loss 0.3684369 +| epoch 8 | 1087/ 8400 batches | train loss 0.2903207 +| epoch 8 | 1091/ 8400 batches | train loss 0.3686507 +| epoch 8 | 1095/ 8400 batches | train loss 0.4364193 +| epoch 8 | 1099/ 8400 batches | train loss 0.3330095 +| epoch 8 | 1103/ 8400 batches | train loss 0.3972001 +| epoch 8 | 1107/ 8400 batches | train loss 0.3638337 +| epoch 8 | 1111/ 8400 batches | train loss 0.3411672 +| epoch 8 | 1115/ 8400 batches | train loss 0.3347107 +| epoch 8 | 1119/ 8400 batches | train loss 0.2885478 +| epoch 8 | 1123/ 8400 batches | train loss 0.2287230 +| epoch 8 | 1127/ 8400 batches | train loss 0.3066367 +| epoch 8 | 1131/ 8400 batches | train loss 0.3086621 +| epoch 8 | 1135/ 8400 batches | train loss 0.3667956 +| epoch 8 | 1139/ 8400 batches | train loss 0.3673776 +| epoch 8 | 1143/ 8400 batches | train loss 0.3615837 +| epoch 8 | 1147/ 8400 batches | train loss 0.3108866 +| epoch 8 | 1151/ 8400 batches | train loss 0.3406702 +| epoch 8 | 1155/ 8400 batches | train loss 0.3253563 +| epoch 8 | 1159/ 8400 batches | train loss 0.3744709 +| epoch 8 | 1163/ 8400 batches | train loss 0.3833627 +| epoch 8 | 1167/ 8400 batches | train loss 0.3197391 +| epoch 8 | 1171/ 8400 batches | train loss 0.2992511 +| epoch 8 | 1175/ 8400 batches | train loss 0.3384591 +| epoch 8 | 1179/ 8400 batches | train loss 0.3291167 +| epoch 8 | 1183/ 8400 batches | train loss 0.3000451 +| epoch 8 | 1187/ 8400 batches | train loss 0.3406258 +| epoch 8 | 1191/ 8400 batches | train loss 0.4023840 +| epoch 8 | 1195/ 8400 batches | train loss 0.3836920 +| epoch 8 | 1199/ 8400 batches | train loss 0.3092320 +| epoch 8 | 1203/ 8400 batches | train loss 0.3829659 +| epoch 8 | 1207/ 8400 batches | train loss 0.2885684 +| epoch 8 | 1211/ 8400 batches | train loss 0.3857212 +| epoch 8 | 1215/ 8400 batches | train loss 0.3617512 +| epoch 8 | 1219/ 8400 batches | train loss 0.3429587 +| epoch 8 | 1223/ 8400 batches | train loss 0.3609851 +| epoch 8 | 1227/ 8400 batches | train loss 0.3572393 +| epoch 8 | 1231/ 8400 batches | train loss 0.3654529 +| epoch 8 | 1235/ 8400 batches | train loss 0.3787791 +| epoch 8 | 1239/ 8400 batches | train loss 0.3499469 +| epoch 8 | 1243/ 8400 batches | train loss 0.3827738 +| epoch 8 | 1247/ 8400 batches | train loss 0.3169459 +| epoch 8 | 1251/ 8400 batches | train loss 0.3479027 +| epoch 8 | 1255/ 8400 batches | train loss 0.3890980 +| epoch 8 | 1259/ 8400 batches | train loss 0.3512321 +| epoch 8 | 1263/ 8400 batches | train loss 0.3904505 +| epoch 8 | 1267/ 8400 batches | train loss 0.3468582 +| epoch 8 | 1271/ 8400 batches | train loss 0.3119719 +| epoch 8 | 1275/ 8400 batches | train loss 0.3235081 +| epoch 8 | 1279/ 8400 batches | train loss 0.3480841 +| epoch 8 | 1283/ 8400 batches | train loss 0.3232144 +| epoch 8 | 1287/ 8400 batches | train loss 0.3223769 +| epoch 8 | 1291/ 8400 batches | train loss 0.3053257 +| epoch 8 | 1295/ 8400 batches | train loss 0.3754758 +| epoch 8 | 1299/ 8400 batches | train loss 0.3575164 +| epoch 8 | 1303/ 8400 batches | train loss 0.3196355 +| epoch 8 | 1307/ 8400 batches | train loss 0.4023060 +| epoch 8 | 1311/ 8400 batches | train loss 0.2875387 +| epoch 8 | 1315/ 8400 batches | train loss 0.3087655 +| epoch 8 | 1319/ 8400 batches | train loss 0.3046810 +| epoch 8 | 1323/ 8400 batches | train loss 0.3629075 +| epoch 8 | 1327/ 8400 batches | train loss 0.3473235 +| epoch 8 | 1331/ 8400 batches | train loss 0.3094388 +| epoch 8 | 1335/ 8400 batches | train loss 0.4040097 +| epoch 8 | 1339/ 8400 batches | train loss 0.3307439 +| epoch 8 | 1343/ 8400 batches | train loss 0.3329259 +| epoch 8 | 1347/ 8400 batches | train loss 0.2945705 +| epoch 8 | 1351/ 8400 batches | train loss 0.3648274 +| epoch 8 | 1355/ 8400 batches | train loss 0.3892895 +| epoch 8 | 1359/ 8400 batches | train loss 0.3473936 +| epoch 8 | 1363/ 8400 batches | train loss 0.3534595 +| epoch 8 | 1367/ 8400 batches | train loss 0.3758458 +| epoch 8 | 1371/ 8400 batches | train loss 0.3239634 +| epoch 8 | 1375/ 8400 batches | train loss 0.3772014 +| epoch 8 | 1379/ 8400 batches | train loss 0.3598695 +| epoch 8 | 1383/ 8400 batches | train loss 0.3676691 +| epoch 8 | 1387/ 8400 batches | train loss 0.3450181 +| epoch 8 | 1391/ 8400 batches | train loss 0.3640853 +| epoch 8 | 1395/ 8400 batches | train loss 0.3495681 +| epoch 8 | 1399/ 8400 batches | train loss 0.4061474 +| epoch 8 | 1403/ 8400 batches | train loss 0.3143359 +| epoch 8 | 1407/ 8400 batches | train loss 0.3503263 +| epoch 8 | 1411/ 8400 batches | train loss 0.3570443 +| epoch 8 | 1415/ 8400 batches | train loss 0.3333021 +| epoch 8 | 1419/ 8400 batches | train loss 0.3560429 +| epoch 8 | 1423/ 8400 batches | train loss 0.3347141 +| epoch 8 | 1427/ 8400 batches | train loss 0.3741325 +| epoch 8 | 1431/ 8400 batches | train loss 0.3697649 +| epoch 8 | 1435/ 8400 batches | train loss 0.3611481 +| epoch 8 | 1439/ 8400 batches | train loss 0.3521716 +| epoch 8 | 1443/ 8400 batches | train loss 0.3941025 +| epoch 8 | 1447/ 8400 batches | train loss 0.3573881 +| epoch 8 | 1451/ 8400 batches | train loss 0.3573399 +| epoch 8 | 1455/ 8400 batches | train loss 0.3858527 +| epoch 8 | 1459/ 8400 batches | train loss 0.3116580 +| epoch 8 | 1463/ 8400 batches | train loss 0.3484338 +| epoch 8 | 1467/ 8400 batches | train loss 0.3294196 +| epoch 8 | 1471/ 8400 batches | train loss 0.2844813 +| epoch 8 | 1475/ 8400 batches | train loss 0.4039487 +| epoch 8 | 1479/ 8400 batches | train loss 0.3164947 +| epoch 8 | 1483/ 8400 batches | train loss 0.3598602 +| epoch 8 | 1487/ 8400 batches | train loss 0.3957415 +| epoch 8 | 1491/ 8400 batches | train loss 0.3332379 +| epoch 8 | 1495/ 8400 batches | train loss 0.3964042 +| epoch 8 | 1499/ 8400 batches | train loss 0.3888209 +| epoch 8 | 1503/ 8400 batches | train loss 0.3127682 +| epoch 8 | 1507/ 8400 batches | train loss 0.3854379 +| epoch 8 | 1511/ 8400 batches | train loss 0.2973226 +| epoch 8 | 1515/ 8400 batches | train loss 0.3706268 +| epoch 8 | 1519/ 8400 batches | train loss 0.3344195 +| epoch 8 | 1523/ 8400 batches | train loss 0.3391657 +| epoch 8 | 1527/ 8400 batches | train loss 0.3501452 +| epoch 8 | 1531/ 8400 batches | train loss 0.2946076 +| epoch 8 | 1535/ 8400 batches | train loss 0.3642031 +| epoch 8 | 1539/ 8400 batches | train loss 0.3489307 +| epoch 8 | 1543/ 8400 batches | train loss 0.3218030 +| epoch 8 | 1547/ 8400 batches | train loss 0.3375809 +| epoch 8 | 1551/ 8400 batches | train loss 0.4504597 +| epoch 8 | 1555/ 8400 batches | train loss 0.3897879 +| epoch 8 | 1559/ 8400 batches | train loss 0.3801771 +| epoch 8 | 1563/ 8400 batches | train loss 0.3478350 +| epoch 8 | 1567/ 8400 batches | train loss 0.3879687 +| epoch 8 | 1571/ 8400 batches | train loss 0.3088819 +| epoch 8 | 1575/ 8400 batches | train loss 0.3589637 +| epoch 8 | 1579/ 8400 batches | train loss 0.4122815 +| epoch 8 | 1583/ 8400 batches | train loss 0.4288975 +| epoch 8 | 1587/ 8400 batches | train loss 0.3037049 +| epoch 8 | 1591/ 8400 batches | train loss 0.3677897 +| epoch 8 | 1595/ 8400 batches | train loss 0.3418479 +| epoch 8 | 1599/ 8400 batches | train loss 0.3528691 +| epoch 8 | 1603/ 8400 batches | train loss 0.3851134 +| epoch 8 | 1607/ 8400 batches | train loss 0.3688754 +| epoch 8 | 1611/ 8400 batches | train loss 0.3119519 +| epoch 8 | 1615/ 8400 batches | train loss 0.3484994 +| epoch 8 | 1619/ 8400 batches | train loss 0.3605282 +| epoch 8 | 1623/ 8400 batches | train loss 0.3037304 +| epoch 8 | 1627/ 8400 batches | train loss 0.4187023 +| epoch 8 | 1631/ 8400 batches | train loss 0.3779936 +| epoch 8 | 1635/ 8400 batches | train loss 0.3376790 +| epoch 8 | 1639/ 8400 batches | train loss 0.3999688 +| epoch 8 | 1643/ 8400 batches | train loss 0.3316413 +| epoch 8 | 1647/ 8400 batches | train loss 0.2963514 +| epoch 8 | 1651/ 8400 batches | train loss 0.3601607 +| epoch 8 | 1655/ 8400 batches | train loss 0.3299913 +| epoch 8 | 1659/ 8400 batches | train loss 0.2916083 +| epoch 8 | 1663/ 8400 batches | train loss 0.3659002 +| epoch 8 | 1667/ 8400 batches | train loss 0.3329684 +| epoch 8 | 1671/ 8400 batches | train loss 0.3279758 +| epoch 8 | 1675/ 8400 batches | train loss 0.3294720 +| epoch 8 | 1679/ 8400 batches | train loss 0.3700001 +| epoch 8 | 1683/ 8400 batches | train loss 0.3814855 +| epoch 8 | 1687/ 8400 batches | train loss 0.3593648 +| epoch 8 | 1691/ 8400 batches | train loss 0.4057676 +| epoch 8 | 1695/ 8400 batches | train loss 0.2668963 +| epoch 8 | 1699/ 8400 batches | train loss 0.2581354 +| epoch 8 | 1703/ 8400 batches | train loss 0.2093479 +| epoch 8 | 1707/ 8400 batches | train loss 0.3750390 +| epoch 8 | 1711/ 8400 batches | train loss 0.3539090 +| epoch 8 | 1715/ 8400 batches | train loss 0.3488694 +| epoch 8 | 1719/ 8400 batches | train loss 0.4350421 +| epoch 8 | 1723/ 8400 batches | train loss 0.3559399 +| epoch 8 | 1727/ 8400 batches | train loss 0.3811899 +| epoch 8 | 1731/ 8400 batches | train loss 0.3894885 +| epoch 8 | 1735/ 8400 batches | train loss 0.3880291 +| epoch 8 | 1739/ 8400 batches | train loss 0.3912753 +| epoch 8 | 1743/ 8400 batches | train loss 0.3023303 +| epoch 8 | 1747/ 8400 batches | train loss 0.3734117 +| epoch 8 | 1751/ 8400 batches | train loss 0.3337436 +| epoch 8 | 1755/ 8400 batches | train loss 0.3244489 +| epoch 8 | 1759/ 8400 batches | train loss 0.3904508 +| epoch 8 | 1763/ 8400 batches | train loss 0.3507341 +| epoch 8 | 1767/ 8400 batches | train loss 0.3410627 +| epoch 8 | 1771/ 8400 batches | train loss 0.3890262 +| epoch 8 | 1775/ 8400 batches | train loss 0.4246019 +| epoch 8 | 1779/ 8400 batches | train loss 0.3236340 +| epoch 8 | 1783/ 8400 batches | train loss 0.3880764 +| epoch 8 | 1787/ 8400 batches | train loss 0.3306544 +| epoch 8 | 1791/ 8400 batches | train loss 0.3683113 +| epoch 8 | 1795/ 8400 batches | train loss 0.3243662 +| epoch 8 | 1799/ 8400 batches | train loss 0.3149011 +| epoch 8 | 1803/ 8400 batches | train loss 0.3351905 +| epoch 8 | 1807/ 8400 batches | train loss 0.3442446 +| epoch 8 | 1811/ 8400 batches | train loss 0.1380402 +| epoch 8 | 1815/ 8400 batches | train loss 0.3645658 +| epoch 8 | 1819/ 8400 batches | train loss 0.3832495 +| epoch 8 | 1823/ 8400 batches | train loss 0.3375890 +| epoch 8 | 1827/ 8400 batches | train loss 0.2880556 +| epoch 8 | 1831/ 8400 batches | train loss 0.2149277 +| epoch 8 | 1835/ 8400 batches | train loss 0.2643119 +| epoch 8 | 1839/ 8400 batches | train loss 0.3484843 +| epoch 8 | 1843/ 8400 batches | train loss 0.2181673 +| epoch 8 | 1847/ 8400 batches | train loss 0.3862484 +| epoch 8 | 1851/ 8400 batches | train loss 0.3615030 +| epoch 8 | 1855/ 8400 batches | train loss 0.3226824 +| epoch 8 | 1859/ 8400 batches | train loss 0.3711787 +| epoch 8 | 1863/ 8400 batches | train loss 0.4708917 +| epoch 8 | 1867/ 8400 batches | train loss 0.2980140 +| epoch 8 | 1871/ 8400 batches | train loss 0.4115513 +| epoch 8 | 1875/ 8400 batches | train loss 0.3721332 +| epoch 8 | 1879/ 8400 batches | train loss 0.3503674 +| epoch 8 | 1883/ 8400 batches | train loss 0.3443426 +| epoch 8 | 1887/ 8400 batches | train loss 0.4146059 +| epoch 8 | 1891/ 8400 batches | train loss 0.3735384 +| epoch 8 | 1895/ 8400 batches | train loss 0.3846861 +| epoch 8 | 1899/ 8400 batches | train loss 0.3547115 +| epoch 8 | 1903/ 8400 batches | train loss 0.3609380 +| epoch 8 | 1907/ 8400 batches | train loss 0.3546466 +| epoch 8 | 1911/ 8400 batches | train loss 0.3119279 +| epoch 8 | 1915/ 8400 batches | train loss 0.3597011 +| epoch 8 | 1919/ 8400 batches | train loss 0.3769141 +| epoch 8 | 1923/ 8400 batches | train loss 0.3787578 +| epoch 8 | 1927/ 8400 batches | train loss 0.4089414 +| epoch 8 | 1931/ 8400 batches | train loss 0.3601518 +| epoch 8 | 1935/ 8400 batches | train loss 0.4712934 +| epoch 8 | 1939/ 8400 batches | train loss 0.2970735 +| epoch 8 | 1943/ 8400 batches | train loss 0.3531545 +| epoch 8 | 1947/ 8400 batches | train loss 0.4148010 +| epoch 8 | 1951/ 8400 batches | train loss 0.3486050 +| epoch 8 | 1955/ 8400 batches | train loss 0.3828286 +| epoch 8 | 1959/ 8400 batches | train loss 0.3557958 +| epoch 8 | 1963/ 8400 batches | train loss 0.3432506 +| epoch 8 | 1967/ 8400 batches | train loss 0.3347943 +| epoch 8 | 1971/ 8400 batches | train loss 0.2913247 +| epoch 8 | 1975/ 8400 batches | train loss 0.3524007 +| epoch 8 | 1979/ 8400 batches | train loss 0.3380342 +| epoch 8 | 1983/ 8400 batches | train loss 0.2852034 +| epoch 8 | 1987/ 8400 batches | train loss 0.3878850 +| epoch 8 | 1991/ 8400 batches | train loss 0.3768295 +| epoch 8 | 1995/ 8400 batches | train loss 0.3612701 +| epoch 8 | 1999/ 8400 batches | train loss 0.4425732 +| epoch 8 | 2003/ 8400 batches | train loss 0.3964389 +| epoch 8 | 2007/ 8400 batches | train loss 0.3731326 +| epoch 8 | 2011/ 8400 batches | train loss 0.3607584 +| epoch 8 | 2015/ 8400 batches | train loss 0.3857713 +| epoch 8 | 2019/ 8400 batches | train loss 0.3983138 +| epoch 8 | 2023/ 8400 batches | train loss 0.3926434 +| epoch 8 | 2027/ 8400 batches | train loss 0.3163763 +| epoch 8 | 2031/ 8400 batches | train loss 0.3843848 +| epoch 8 | 2035/ 8400 batches | train loss 0.2281409 +| epoch 8 | 2039/ 8400 batches | train loss 0.3806823 +| epoch 8 | 2043/ 8400 batches | train loss 0.3372014 +| epoch 8 | 2047/ 8400 batches | train loss 0.3377232 +| epoch 8 | 2051/ 8400 batches | train loss 0.3091386 +| epoch 8 | 2055/ 8400 batches | train loss 0.3608779 +| epoch 8 | 2059/ 8400 batches | train loss 0.2088333 +| epoch 8 | 2063/ 8400 batches | train loss 0.3397251 +| epoch 8 | 2067/ 8400 batches | train loss 0.3457857 +| epoch 8 | 2071/ 8400 batches | train loss 0.3020642 +| epoch 8 | 2075/ 8400 batches | train loss 0.3006259 +| epoch 8 | 2079/ 8400 batches | train loss 0.2938479 +| epoch 8 | 2083/ 8400 batches | train loss 0.3660172 +| epoch 8 | 2087/ 8400 batches | train loss 0.3799494 +| epoch 8 | 2091/ 8400 batches | train loss 0.3702772 +| epoch 8 | 2095/ 8400 batches | train loss 0.3947995 +| epoch 8 | 2099/ 8400 batches | train loss 0.3379484 +| epoch 8 | 2103/ 8400 batches | train loss 0.3897980 +| epoch 8 | 2107/ 8400 batches | train loss 0.3261977 +| epoch 8 | 2111/ 8400 batches | train loss 0.3546990 +| epoch 8 | 2115/ 8400 batches | train loss 0.3351421 +| epoch 8 | 2119/ 8400 batches | train loss 0.3339168 +| epoch 8 | 2123/ 8400 batches | train loss 0.3603781 +| epoch 8 | 2127/ 8400 batches | train loss 0.3029240 +| epoch 8 | 2131/ 8400 batches | train loss 0.4063262 +| epoch 8 | 2135/ 8400 batches | train loss 0.3670722 +| epoch 8 | 2139/ 8400 batches | train loss 0.3365771 +| epoch 8 | 2143/ 8400 batches | train loss 0.3481809 +| epoch 8 | 2147/ 8400 batches | train loss 0.2725323 +| epoch 8 | 2151/ 8400 batches | train loss 0.3589702 +| epoch 8 | 2155/ 8400 batches | train loss 0.3964166 +| epoch 8 | 2159/ 8400 batches | train loss 0.3559915 +| epoch 8 | 2163/ 8400 batches | train loss 0.3523164 +| epoch 8 | 2167/ 8400 batches | train loss 0.2863332 +| epoch 8 | 2171/ 8400 batches | train loss 0.3316624 +| epoch 8 | 2175/ 8400 batches | train loss 0.3101453 +| epoch 8 | 2179/ 8400 batches | train loss 0.3340729 +| epoch 8 | 2183/ 8400 batches | train loss 0.3507729 +| epoch 8 | 2187/ 8400 batches | train loss 0.3583356 +| epoch 8 | 2191/ 8400 batches | train loss 0.3848681 +| epoch 8 | 2195/ 8400 batches | train loss 0.3072751 +| epoch 8 | 2199/ 8400 batches | train loss 0.3341118 +| epoch 8 | 2203/ 8400 batches | train loss 0.3751622 +| epoch 8 | 2207/ 8400 batches | train loss 0.3305619 +| epoch 8 | 2211/ 8400 batches | train loss 0.3410416 +| epoch 8 | 2215/ 8400 batches | train loss 0.3445441 +| epoch 8 | 2219/ 8400 batches | train loss 0.3457070 +| epoch 8 | 2223/ 8400 batches | train loss 0.3545202 +| epoch 8 | 2227/ 8400 batches | train loss 0.3063672 +| epoch 8 | 2231/ 8400 batches | train loss 0.3738880 +| epoch 8 | 2235/ 8400 batches | train loss 0.3246675 +| epoch 8 | 2239/ 8400 batches | train loss 0.4081672 +| epoch 8 | 2243/ 8400 batches | train loss 0.4045462 +| epoch 8 | 2247/ 8400 batches | train loss 0.3797202 +| epoch 8 | 2251/ 8400 batches | train loss 0.3847958 +| epoch 8 | 2255/ 8400 batches | train loss 0.3223838 +| epoch 8 | 2259/ 8400 batches | train loss 0.3332357 +| epoch 8 | 2263/ 8400 batches | train loss 0.4267141 +| epoch 8 | 2267/ 8400 batches | train loss 0.3091529 +| epoch 8 | 2271/ 8400 batches | train loss 0.3987944 +| epoch 8 | 2275/ 8400 batches | train loss 0.4047337 +| epoch 8 | 2279/ 8400 batches | train loss 0.3208721 +| epoch 8 | 2283/ 8400 batches | train loss 0.3795357 +| epoch 8 | 2287/ 8400 batches | train loss 0.3206848 +| epoch 8 | 2291/ 8400 batches | train loss 0.4480079 +| epoch 8 | 2295/ 8400 batches | train loss 0.3609976 +| epoch 8 | 2299/ 8400 batches | train loss 0.3432701 +| epoch 8 | 2303/ 8400 batches | train loss 0.3745261 +| epoch 8 | 2307/ 8400 batches | train loss 0.4292760 +| epoch 8 | 2311/ 8400 batches | train loss 0.4077033 +| epoch 8 | 2315/ 8400 batches | train loss 0.3584957 +| epoch 8 | 2319/ 8400 batches | train loss 0.2851135 +| epoch 8 | 2323/ 8400 batches | train loss 0.3861740 +| epoch 8 | 2327/ 8400 batches | train loss 0.3193372 +| epoch 8 | 2331/ 8400 batches | train loss 0.3668566 +| epoch 8 | 2335/ 8400 batches | train loss 0.3215750 +| epoch 8 | 2339/ 8400 batches | train loss 0.3207121 +| epoch 8 | 2343/ 8400 batches | train loss 0.3791980 +| epoch 8 | 2347/ 8400 batches | train loss 0.2818905 +| epoch 8 | 2351/ 8400 batches | train loss 0.3616764 +| epoch 8 | 2355/ 8400 batches | train loss 0.3988355 +| epoch 8 | 2359/ 8400 batches | train loss 0.3577747 +| epoch 8 | 2363/ 8400 batches | train loss 0.3287297 +| epoch 8 | 2367/ 8400 batches | train loss 0.3611730 +| epoch 8 | 2371/ 8400 batches | train loss 0.3961009 +| epoch 8 | 2375/ 8400 batches | train loss 0.3647953 +| epoch 8 | 2379/ 8400 batches | train loss 0.3422672 +| epoch 8 | 2383/ 8400 batches | train loss 0.2301857 +| epoch 8 | 2387/ 8400 batches | train loss 0.3444449 +| epoch 8 | 2391/ 8400 batches | train loss 0.3488969 +| epoch 8 | 2395/ 8400 batches | train loss 0.3532999 +| epoch 8 | 2399/ 8400 batches | train loss 0.3595482 +| epoch 8 | 2403/ 8400 batches | train loss 0.3023729 +| epoch 8 | 2407/ 8400 batches | train loss 0.3819616 +| epoch 8 | 2411/ 8400 batches | train loss 0.3559530 +| epoch 8 | 2415/ 8400 batches | train loss 0.4101236 +| epoch 8 | 2419/ 8400 batches | train loss 0.3461633 +| epoch 8 | 2423/ 8400 batches | train loss 0.3675613 +| epoch 8 | 2427/ 8400 batches | train loss 0.3856411 +| epoch 8 | 2431/ 8400 batches | train loss 0.3231729 +| epoch 8 | 2435/ 8400 batches | train loss 0.3727552 +| epoch 8 | 2439/ 8400 batches | train loss 0.2778800 +| epoch 8 | 2443/ 8400 batches | train loss 0.3289047 +| epoch 8 | 2447/ 8400 batches | train loss 0.4603512 +| epoch 8 | 2451/ 8400 batches | train loss 0.4048359 +| epoch 8 | 2455/ 8400 batches | train loss 0.4267055 +| epoch 8 | 2459/ 8400 batches | train loss 0.4003204 +| epoch 8 | 2463/ 8400 batches | train loss 0.3042799 +| epoch 8 | 2467/ 8400 batches | train loss 0.3428228 +| epoch 8 | 2471/ 8400 batches | train loss 0.3165708 +| epoch 8 | 2475/ 8400 batches | train loss 0.3991187 +| epoch 8 | 2479/ 8400 batches | train loss 0.3372544 +| epoch 8 | 2483/ 8400 batches | train loss 0.3105430 +| epoch 8 | 2487/ 8400 batches | train loss 0.3803457 +| epoch 8 | 2491/ 8400 batches | train loss 0.3092353 +| epoch 8 | 2495/ 8400 batches | train loss 0.3345048 +| epoch 8 | 2499/ 8400 batches | train loss 0.3839960 +| epoch 8 | 2503/ 8400 batches | train loss 0.3245391 +| epoch 8 | 2507/ 8400 batches | train loss 0.3818610 +| epoch 8 | 2511/ 8400 batches | train loss 0.3455079 +| epoch 8 | 2515/ 8400 batches | train loss 0.3568563 +| epoch 8 | 2519/ 8400 batches | train loss 0.3863990 +| epoch 8 | 2523/ 8400 batches | train loss 0.3664265 +| epoch 8 | 2527/ 8400 batches | train loss 0.3336441 +| epoch 8 | 2531/ 8400 batches | train loss 0.4012088 +| epoch 8 | 2535/ 8400 batches | train loss 0.3897487 +| epoch 8 | 2539/ 8400 batches | train loss 0.2876202 +| epoch 8 | 2543/ 8400 batches | train loss 0.3256220 +| epoch 8 | 2547/ 8400 batches | train loss 0.4244610 +| epoch 8 | 2551/ 8400 batches | train loss 0.3787059 +| epoch 8 | 2555/ 8400 batches | train loss 0.3044725 +| epoch 8 | 2559/ 8400 batches | train loss 0.3173567 +| epoch 8 | 2563/ 8400 batches | train loss 0.2726993 +| epoch 8 | 2567/ 8400 batches | train loss 0.3595961 +| epoch 8 | 2571/ 8400 batches | train loss 0.3548608 +| epoch 8 | 2575/ 8400 batches | train loss 0.3252756 +| epoch 8 | 2579/ 8400 batches | train loss 0.3933426 +| epoch 8 | 2583/ 8400 batches | train loss 0.2962756 +| epoch 8 | 2587/ 8400 batches | train loss 0.2229300 +| epoch 8 | 2591/ 8400 batches | train loss 0.4453025 +| epoch 8 | 2595/ 8400 batches | train loss 0.4295846 +| epoch 8 | 2599/ 8400 batches | train loss 0.3529564 +| epoch 8 | 2603/ 8400 batches | train loss 0.3927448 +| epoch 8 | 2607/ 8400 batches | train loss 0.3837401 +| epoch 8 | 2611/ 8400 batches | train loss 0.3472823 +| epoch 8 | 2615/ 8400 batches | train loss 0.4182927 +| epoch 8 | 2619/ 8400 batches | train loss 0.2877150 +| epoch 8 | 2623/ 8400 batches | train loss 0.3951752 +| epoch 8 | 2627/ 8400 batches | train loss 0.3549183 +| epoch 8 | 2631/ 8400 batches | train loss 0.3375762 +| epoch 8 | 2635/ 8400 batches | train loss 0.3839248 +| epoch 8 | 2639/ 8400 batches | train loss 0.3578203 +| epoch 8 | 2643/ 8400 batches | train loss 0.3121164 +| epoch 8 | 2647/ 8400 batches | train loss 0.4396956 +| epoch 8 | 2651/ 8400 batches | train loss 0.4109861 +| epoch 8 | 2655/ 8400 batches | train loss 0.3847009 +| epoch 8 | 2659/ 8400 batches | train loss 0.3502541 +| epoch 8 | 2663/ 8400 batches | train loss 0.3893573 +| epoch 8 | 2667/ 8400 batches | train loss 0.3121403 +| epoch 8 | 2671/ 8400 batches | train loss 0.3335018 +| epoch 8 | 2675/ 8400 batches | train loss 0.2991227 +| epoch 8 | 2679/ 8400 batches | train loss 0.3268657 +| epoch 8 | 2683/ 8400 batches | train loss 0.3366420 +| epoch 8 | 2687/ 8400 batches | train loss 0.3593663 +| epoch 8 | 2691/ 8400 batches | train loss 0.4083511 +| epoch 8 | 2695/ 8400 batches | train loss 0.3627943 +| epoch 8 | 2699/ 8400 batches | train loss 0.4157256 +| epoch 8 | 2703/ 8400 batches | train loss 0.3357375 +| epoch 8 | 2707/ 8400 batches | train loss 0.3976942 +| epoch 8 | 2711/ 8400 batches | train loss 0.2897507 +| epoch 8 | 2715/ 8400 batches | train loss 0.4101405 +| epoch 8 | 2719/ 8400 batches | train loss 0.4220175 +| epoch 8 | 2723/ 8400 batches | train loss 0.3284619 +| epoch 8 | 2727/ 8400 batches | train loss 0.3117769 +| epoch 8 | 2731/ 8400 batches | train loss 0.4059642 +| epoch 8 | 2735/ 8400 batches | train loss 0.3631453 +| epoch 8 | 2739/ 8400 batches | train loss 0.3181179 +| epoch 8 | 2743/ 8400 batches | train loss 0.3125477 +| epoch 8 | 2747/ 8400 batches | train loss 0.3032790 +| epoch 8 | 2751/ 8400 batches | train loss 0.3356639 +| epoch 8 | 2755/ 8400 batches | train loss 0.3762920 +| epoch 8 | 2759/ 8400 batches | train loss 0.3243293 +| epoch 8 | 2763/ 8400 batches | train loss 0.3872701 +| epoch 8 | 2767/ 8400 batches | train loss 0.3611533 +| epoch 8 | 2771/ 8400 batches | train loss 0.3099043 +| epoch 8 | 2775/ 8400 batches | train loss 0.3798940 +| epoch 8 | 2779/ 8400 batches | train loss 0.3670510 +| epoch 8 | 2783/ 8400 batches | train loss 0.3459208 +| epoch 8 | 2787/ 8400 batches | train loss 0.3604889 +| epoch 8 | 2791/ 8400 batches | train loss 0.4138222 +| epoch 8 | 2795/ 8400 batches | train loss 0.3098201 +| epoch 8 | 2799/ 8400 batches | train loss 0.3025684 +| epoch 8 | 2803/ 8400 batches | train loss 0.3478638 +| epoch 8 | 2807/ 8400 batches | train loss 0.3344452 +| epoch 8 | 2811/ 8400 batches | train loss 0.3760989 +| epoch 8 | 2815/ 8400 batches | train loss 0.3491449 +| epoch 8 | 2819/ 8400 batches | train loss 0.3220225 +| epoch 8 | 2823/ 8400 batches | train loss 0.3098156 +| epoch 8 | 2827/ 8400 batches | train loss 0.3216067 +| epoch 8 | 2831/ 8400 batches | train loss 0.3649322 +| epoch 8 | 2835/ 8400 batches | train loss 0.3577356 +| epoch 8 | 2839/ 8400 batches | train loss 0.2647024 +| epoch 8 | 2843/ 8400 batches | train loss 0.4416373 +| epoch 8 | 2847/ 8400 batches | train loss 0.3687280 +| epoch 8 | 2851/ 8400 batches | train loss 0.3438523 +| epoch 8 | 2855/ 8400 batches | train loss 0.2965908 +| epoch 8 | 2859/ 8400 batches | train loss 0.2865695 +| epoch 8 | 2863/ 8400 batches | train loss 0.3698449 +| epoch 8 | 2867/ 8400 batches | train loss 0.3560986 +| epoch 8 | 2871/ 8400 batches | train loss 0.3516302 +| epoch 8 | 2875/ 8400 batches | train loss 0.4060100 +| epoch 8 | 2879/ 8400 batches | train loss 0.3737071 +| epoch 8 | 2883/ 8400 batches | train loss 0.3816937 +| epoch 8 | 2887/ 8400 batches | train loss 0.3850338 +| epoch 8 | 2891/ 8400 batches | train loss 0.3520787 +| epoch 8 | 2895/ 8400 batches | train loss 0.3391143 +| epoch 8 | 2899/ 8400 batches | train loss 0.2906964 +| epoch 8 | 2903/ 8400 batches | train loss 0.3836653 +| epoch 8 | 2907/ 8400 batches | train loss 0.3184958 +| epoch 8 | 2911/ 8400 batches | train loss 0.2304815 +| epoch 8 | 2915/ 8400 batches | train loss 0.4127473 +| epoch 8 | 2919/ 8400 batches | train loss 0.3629372 +| epoch 8 | 2923/ 8400 batches | train loss 0.3349043 +| epoch 8 | 2927/ 8400 batches | train loss 0.3186312 +| epoch 8 | 2931/ 8400 batches | train loss 0.3494685 +| epoch 8 | 2935/ 8400 batches | train loss 0.3606065 +| epoch 8 | 2939/ 8400 batches | train loss 0.3520328 +| epoch 8 | 2943/ 8400 batches | train loss 0.2892075 +| epoch 8 | 2947/ 8400 batches | train loss 0.3322406 +| epoch 8 | 2951/ 8400 batches | train loss 0.3839583 +| epoch 8 | 2955/ 8400 batches | train loss 0.3599084 +| epoch 8 | 2959/ 8400 batches | train loss 0.3322132 +| epoch 8 | 2963/ 8400 batches | train loss 0.3464504 +| epoch 8 | 2967/ 8400 batches | train loss 0.3259698 +| epoch 8 | 2971/ 8400 batches | train loss 0.3841517 +| epoch 8 | 2975/ 8400 batches | train loss 0.3209083 +| epoch 8 | 2979/ 8400 batches | train loss 0.3921971 +| epoch 8 | 2983/ 8400 batches | train loss 0.3610806 +| epoch 8 | 2987/ 8400 batches | train loss 0.3387876 +| epoch 8 | 2991/ 8400 batches | train loss 0.3915030 +| epoch 8 | 2995/ 8400 batches | train loss 0.3515894 +| epoch 8 | 2999/ 8400 batches | train loss 0.3497909 +| epoch 8 | 3003/ 8400 batches | train loss 0.3103307 +| epoch 8 | 3007/ 8400 batches | train loss 0.3578803 +| epoch 8 | 3011/ 8400 batches | train loss 0.3636035 +| epoch 8 | 3015/ 8400 batches | train loss 0.3197885 +| epoch 8 | 3019/ 8400 batches | train loss 0.3476219 +| epoch 8 | 3023/ 8400 batches | train loss 0.3179573 +| epoch 8 | 3027/ 8400 batches | train loss 0.3234198 +| epoch 8 | 3031/ 8400 batches | train loss 0.3789061 +| epoch 8 | 3035/ 8400 batches | train loss 0.3204202 +| epoch 8 | 3039/ 8400 batches | train loss 0.3503402 +| epoch 8 | 3043/ 8400 batches | train loss 0.3579144 +| epoch 8 | 3047/ 8400 batches | train loss 0.3486415 +| epoch 8 | 3051/ 8400 batches | train loss 0.3154508 +| epoch 8 | 3055/ 8400 batches | train loss 0.3709302 +| epoch 8 | 3059/ 8400 batches | train loss 0.3453663 +| epoch 8 | 3063/ 8400 batches | train loss 0.3731117 +| epoch 8 | 3067/ 8400 batches | train loss 0.3833635 +| epoch 8 | 3071/ 8400 batches | train loss 0.3860730 +| epoch 8 | 3075/ 8400 batches | train loss 0.4015881 +| epoch 8 | 3079/ 8400 batches | train loss 0.3190969 +| epoch 8 | 3083/ 8400 batches | train loss 0.3011094 +| epoch 8 | 3087/ 8400 batches | train loss 0.3887605 +| epoch 8 | 3091/ 8400 batches | train loss 0.3200704 +| epoch 8 | 3095/ 8400 batches | train loss 0.3317358 +| epoch 8 | 3099/ 8400 batches | train loss 0.2783806 +| epoch 8 | 3103/ 8400 batches | train loss 0.3907112 +| epoch 8 | 3107/ 8400 batches | train loss 0.3448433 +| epoch 8 | 3111/ 8400 batches | train loss 0.3362956 +| epoch 8 | 3115/ 8400 batches | train loss 0.3964995 +| epoch 8 | 3119/ 8400 batches | train loss 0.3161842 +| epoch 8 | 3123/ 8400 batches | train loss 0.4439735 +| epoch 8 | 3127/ 8400 batches | train loss 0.3968756 +| epoch 8 | 3131/ 8400 batches | train loss 0.3724831 +| epoch 8 | 3135/ 8400 batches | train loss 0.3652140 +| epoch 8 | 3139/ 8400 batches | train loss 0.3610651 +| epoch 8 | 3143/ 8400 batches | train loss 0.3087605 +| epoch 8 | 3147/ 8400 batches | train loss 0.3550353 +| epoch 8 | 3151/ 8400 batches | train loss 0.3188665 +| epoch 8 | 3155/ 8400 batches | train loss 0.3329520 +| epoch 8 | 3159/ 8400 batches | train loss 0.3186694 +| epoch 8 | 3163/ 8400 batches | train loss 0.3083279 +| epoch 8 | 3167/ 8400 batches | train loss 0.3498476 +| epoch 8 | 3171/ 8400 batches | train loss 0.2856649 +| epoch 8 | 3175/ 8400 batches | train loss 0.3091209 +| epoch 8 | 3179/ 8400 batches | train loss 0.3223727 +| epoch 8 | 3183/ 8400 batches | train loss 0.3774945 +| epoch 8 | 3187/ 8400 batches | train loss 0.3613850 +| epoch 8 | 3191/ 8400 batches | train loss 0.4030600 +| epoch 8 | 3195/ 8400 batches | train loss 0.4204367 +| epoch 8 | 3199/ 8400 batches | train loss 0.3327414 +| epoch 8 | 3203/ 8400 batches | train loss 0.3518481 +| epoch 8 | 3207/ 8400 batches | train loss 0.3302728 +| epoch 8 | 3211/ 8400 batches | train loss 0.3562825 +| epoch 8 | 3215/ 8400 batches | train loss 0.3260721 +| epoch 8 | 3219/ 8400 batches | train loss 0.3192054 +| epoch 8 | 3223/ 8400 batches | train loss 0.3290302 +| epoch 8 | 3227/ 8400 batches | train loss 0.3856338 +| epoch 8 | 3231/ 8400 batches | train loss 0.3773532 +| epoch 8 | 3235/ 8400 batches | train loss 0.3707370 +| epoch 8 | 3239/ 8400 batches | train loss 0.3236646 +| epoch 8 | 3243/ 8400 batches | train loss 0.3158925 +| epoch 8 | 3247/ 8400 batches | train loss 0.3315305 +| epoch 8 | 3251/ 8400 batches | train loss 0.3829278 +| epoch 8 | 3255/ 8400 batches | train loss 0.3721595 +| epoch 8 | 3259/ 8400 batches | train loss 0.3363174 +| epoch 8 | 3263/ 8400 batches | train loss 0.3925371 +| epoch 8 | 3267/ 8400 batches | train loss 0.3574021 +| epoch 8 | 3271/ 8400 batches | train loss 0.3436714 +| epoch 8 | 3275/ 8400 batches | train loss 0.3610308 +| epoch 8 | 3279/ 8400 batches | train loss 0.3488590 +| epoch 8 | 3283/ 8400 batches | train loss 0.2933898 +| epoch 8 | 3287/ 8400 batches | train loss 0.3407435 +| epoch 8 | 3291/ 8400 batches | train loss 0.3097028 +| epoch 8 | 3295/ 8400 batches | train loss 0.3152493 +| epoch 8 | 3299/ 8400 batches | train loss 0.4505536 +| epoch 8 | 3303/ 8400 batches | train loss 0.2991717 +| epoch 8 | 3307/ 8400 batches | train loss 0.3452764 +| epoch 8 | 3311/ 8400 batches | train loss 0.3595857 +| epoch 8 | 3315/ 8400 batches | train loss 0.3225442 +| epoch 8 | 3319/ 8400 batches | train loss 0.3855893 +| epoch 8 | 3323/ 8400 batches | train loss 0.4345179 +| epoch 8 | 3327/ 8400 batches | train loss 0.4057515 +| epoch 8 | 3331/ 8400 batches | train loss 0.4131559 +| epoch 8 | 3335/ 8400 batches | train loss 0.3049065 +| epoch 8 | 3339/ 8400 batches | train loss 0.4430977 +| epoch 8 | 3343/ 8400 batches | train loss 0.4030586 +| epoch 8 | 3347/ 8400 batches | train loss 0.3707652 +| epoch 8 | 3351/ 8400 batches | train loss 0.2844574 +| epoch 8 | 3355/ 8400 batches | train loss 0.3833244 +| epoch 8 | 3359/ 8400 batches | train loss 0.2894287 +| epoch 8 | 3363/ 8400 batches | train loss 0.3627040 +| epoch 8 | 3367/ 8400 batches | train loss 0.3503853 +| epoch 8 | 3371/ 8400 batches | train loss 0.3499143 +| epoch 8 | 3375/ 8400 batches | train loss 0.3846375 +| epoch 8 | 3379/ 8400 batches | train loss 0.3420506 +| epoch 8 | 3383/ 8400 batches | train loss 0.2457716 +| epoch 8 | 3387/ 8400 batches | train loss 0.3517852 +| epoch 8 | 3391/ 8400 batches | train loss 0.3228674 +| epoch 8 | 3395/ 8400 batches | train loss 0.3494518 +| epoch 8 | 3399/ 8400 batches | train loss 0.3578854 +| epoch 8 | 3403/ 8400 batches | train loss 0.4012217 +| epoch 8 | 3407/ 8400 batches | train loss 0.4117750 +| epoch 8 | 3411/ 8400 batches | train loss 0.3323062 +| epoch 8 | 3415/ 8400 batches | train loss 0.3446504 +| epoch 8 | 3419/ 8400 batches | train loss 0.4267468 +| epoch 8 | 3423/ 8400 batches | train loss 0.3558306 +| epoch 8 | 3427/ 8400 batches | train loss 0.3719400 +| epoch 8 | 3431/ 8400 batches | train loss 0.3429731 +| epoch 8 | 3435/ 8400 batches | train loss 0.4103577 +| epoch 8 | 3439/ 8400 batches | train loss 0.2485324 +| epoch 8 | 3443/ 8400 batches | train loss 0.3239953 +| epoch 8 | 3447/ 8400 batches | train loss 0.3145254 +| epoch 8 | 3451/ 8400 batches | train loss 0.3938941 +| epoch 8 | 3455/ 8400 batches | train loss 0.3704254 +| epoch 8 | 3459/ 8400 batches | train loss 0.4329971 +| epoch 8 | 3463/ 8400 batches | train loss 0.3029587 +| epoch 8 | 3467/ 8400 batches | train loss 0.3480028 +| epoch 8 | 3471/ 8400 batches | train loss 0.2962739 +| epoch 8 | 3475/ 8400 batches | train loss 0.3307520 +| epoch 8 | 3479/ 8400 batches | train loss 0.3822216 +| epoch 8 | 3483/ 8400 batches | train loss 0.3605508 +| epoch 8 | 3487/ 8400 batches | train loss 0.3323210 +| epoch 8 | 3491/ 8400 batches | train loss 0.3332551 +| epoch 8 | 3495/ 8400 batches | train loss 0.3115101 +| epoch 8 | 3499/ 8400 batches | train loss 0.2722086 +| epoch 8 | 3503/ 8400 batches | train loss 0.3234509 +| epoch 8 | 3507/ 8400 batches | train loss 0.3418275 +| epoch 8 | 3511/ 8400 batches | train loss 0.3597815 +| epoch 8 | 3515/ 8400 batches | train loss 0.3789339 +| epoch 8 | 3519/ 8400 batches | train loss 0.4048070 +| epoch 8 | 3523/ 8400 batches | train loss 0.3475952 +| epoch 8 | 3527/ 8400 batches | train loss 0.3052188 +| epoch 8 | 3531/ 8400 batches | train loss 0.3486285 +| epoch 8 | 3535/ 8400 batches | train loss 0.3766995 +| epoch 8 | 3539/ 8400 batches | train loss 0.2402265 +| epoch 8 | 3543/ 8400 batches | train loss 0.3822327 +| epoch 8 | 3547/ 8400 batches | train loss 0.3668514 +| epoch 8 | 3551/ 8400 batches | train loss 0.3415321 +| epoch 8 | 3555/ 8400 batches | train loss 0.4356440 +| epoch 8 | 3559/ 8400 batches | train loss 0.4268930 +| epoch 8 | 3563/ 8400 batches | train loss 0.3794981 +| epoch 8 | 3567/ 8400 batches | train loss 0.3284658 +| epoch 8 | 3571/ 8400 batches | train loss 0.3641858 +| epoch 8 | 3575/ 8400 batches | train loss 0.3655036 +| epoch 8 | 3579/ 8400 batches | train loss 0.3334068 +| epoch 8 | 3583/ 8400 batches | train loss 0.3597664 +| epoch 8 | 3587/ 8400 batches | train loss 0.3663183 +| epoch 8 | 3591/ 8400 batches | train loss 0.4198684 +| epoch 8 | 3595/ 8400 batches | train loss 0.3707815 +| epoch 8 | 3599/ 8400 batches | train loss 0.3943681 +| epoch 8 | 3603/ 8400 batches | train loss 0.3353851 +| epoch 8 | 3607/ 8400 batches | train loss 0.3542132 +| epoch 8 | 3611/ 8400 batches | train loss 0.3141785 +| epoch 8 | 3615/ 8400 batches | train loss 0.3196781 +| epoch 8 | 3619/ 8400 batches | train loss 0.3674327 +| epoch 8 | 3623/ 8400 batches | train loss 0.2974239 +| epoch 8 | 3627/ 8400 batches | train loss 0.3965665 +| epoch 8 | 3631/ 8400 batches | train loss 0.2965053 +| epoch 8 | 3635/ 8400 batches | train loss 0.3130665 +| epoch 8 | 3639/ 8400 batches | train loss 0.3808333 +| epoch 8 | 3643/ 8400 batches | train loss 0.4495780 +| epoch 8 | 3647/ 8400 batches | train loss 0.3608716 +| epoch 8 | 3651/ 8400 batches | train loss 0.3531002 +| epoch 8 | 3655/ 8400 batches | train loss 0.3499163 +| epoch 8 | 3659/ 8400 batches | train loss 0.3494913 +| epoch 8 | 3663/ 8400 batches | train loss 0.2175402 +| epoch 8 | 3667/ 8400 batches | train loss 0.3751508 +| epoch 8 | 3671/ 8400 batches | train loss 0.4123409 +| epoch 8 | 3675/ 8400 batches | train loss 0.4135017 +| epoch 8 | 3679/ 8400 batches | train loss 0.4030237 +| epoch 8 | 3683/ 8400 batches | train loss 0.3648106 +| epoch 8 | 3687/ 8400 batches | train loss 0.3774312 +| epoch 8 | 3691/ 8400 batches | train loss 0.4158315 +| epoch 8 | 3695/ 8400 batches | train loss 0.3542951 +| epoch 8 | 3699/ 8400 batches | train loss 0.3124081 +| epoch 8 | 3703/ 8400 batches | train loss 0.3780483 +| epoch 8 | 3707/ 8400 batches | train loss 0.3451585 +| epoch 8 | 3711/ 8400 batches | train loss 0.3710357 +| epoch 8 | 3715/ 8400 batches | train loss 0.3593602 +| epoch 8 | 3719/ 8400 batches | train loss 0.3313902 +| epoch 8 | 3723/ 8400 batches | train loss 0.3287516 +| epoch 8 | 3727/ 8400 batches | train loss 0.3838615 +| epoch 8 | 3731/ 8400 batches | train loss 0.3616696 +| epoch 8 | 3735/ 8400 batches | train loss 0.3393168 +| epoch 8 | 3739/ 8400 batches | train loss 0.3544629 +| epoch 8 | 3743/ 8400 batches | train loss 0.3429129 +| epoch 8 | 3747/ 8400 batches | train loss 0.3473804 +| epoch 8 | 3751/ 8400 batches | train loss 0.3577375 +| epoch 8 | 3755/ 8400 batches | train loss 0.3233664 +| epoch 8 | 3759/ 8400 batches | train loss 0.3550156 +| epoch 8 | 3763/ 8400 batches | train loss 0.3131602 +| epoch 8 | 3767/ 8400 batches | train loss 0.3313041 +| epoch 8 | 3771/ 8400 batches | train loss 0.3354464 +| epoch 8 | 3775/ 8400 batches | train loss 0.3792052 +| epoch 8 | 3779/ 8400 batches | train loss 0.3201783 +| epoch 8 | 3783/ 8400 batches | train loss 0.3282380 +| epoch 8 | 3787/ 8400 batches | train loss 0.3371117 +| epoch 8 | 3791/ 8400 batches | train loss 0.3608798 +| epoch 8 | 3795/ 8400 batches | train loss 0.2889560 +| epoch 8 | 3799/ 8400 batches | train loss 0.3405242 +| epoch 8 | 3803/ 8400 batches | train loss 0.4261365 +| epoch 8 | 3807/ 8400 batches | train loss 0.3554096 +| epoch 8 | 3811/ 8400 batches | train loss 0.4419997 +| epoch 8 | 3815/ 8400 batches | train loss 0.2276635 +| epoch 8 | 3819/ 8400 batches | train loss 0.3448247 +| epoch 8 | 3823/ 8400 batches | train loss 0.3399742 +| epoch 8 | 3827/ 8400 batches | train loss 0.3447105 +| epoch 8 | 3831/ 8400 batches | train loss 0.3507348 +| epoch 8 | 3835/ 8400 batches | train loss 0.4472533 +| epoch 8 | 3839/ 8400 batches | train loss 0.3704283 +| epoch 8 | 3843/ 8400 batches | train loss 0.3601797 +| epoch 8 | 3847/ 8400 batches | train loss 0.3721952 +| epoch 8 | 3851/ 8400 batches | train loss 0.3255315 +| epoch 8 | 3855/ 8400 batches | train loss 0.3237063 +| epoch 8 | 3859/ 8400 batches | train loss 0.3105239 +| epoch 8 | 3863/ 8400 batches | train loss 0.3602310 +| epoch 8 | 3867/ 8400 batches | train loss 0.3448409 +| epoch 8 | 3871/ 8400 batches | train loss 0.3035842 +| epoch 8 | 3875/ 8400 batches | train loss 0.3492545 +| epoch 8 | 3879/ 8400 batches | train loss 0.3573446 +| epoch 8 | 3883/ 8400 batches | train loss 0.3812656 +| epoch 8 | 3887/ 8400 batches | train loss 0.3320502 +| epoch 8 | 3891/ 8400 batches | train loss 0.2985687 +| epoch 8 | 3895/ 8400 batches | train loss 0.3444589 +| epoch 8 | 3899/ 8400 batches | train loss 0.2893071 +| epoch 8 | 3903/ 8400 batches | train loss 0.3203643 +| epoch 8 | 3907/ 8400 batches | train loss 0.3560689 +| epoch 8 | 3911/ 8400 batches | train loss 0.3778709 +| epoch 8 | 3915/ 8400 batches | train loss 0.3844168 +| epoch 8 | 3919/ 8400 batches | train loss 0.3190340 +| epoch 8 | 3923/ 8400 batches | train loss 0.3227434 +| epoch 8 | 3927/ 8400 batches | train loss 0.3733240 +| epoch 8 | 3931/ 8400 batches | train loss 0.2445361 +| epoch 8 | 3935/ 8400 batches | train loss 0.3250641 +| epoch 8 | 3939/ 8400 batches | train loss 0.3086703 +| epoch 8 | 3943/ 8400 batches | train loss 0.3542910 +| epoch 8 | 3947/ 8400 batches | train loss 0.3377075 +| epoch 8 | 3951/ 8400 batches | train loss 0.3904114 +| epoch 8 | 3955/ 8400 batches | train loss 0.3640863 +| epoch 8 | 3959/ 8400 batches | train loss 0.3736487 +| epoch 8 | 3963/ 8400 batches | train loss 0.2934406 +| epoch 8 | 3967/ 8400 batches | train loss 0.3279834 +| epoch 8 | 3971/ 8400 batches | train loss 0.3686243 +| epoch 8 | 3975/ 8400 batches | train loss 0.3418783 +| epoch 8 | 3979/ 8400 batches | train loss 0.3433808 +| epoch 8 | 3983/ 8400 batches | train loss 0.3565349 +| epoch 8 | 3987/ 8400 batches | train loss 0.3683699 +| epoch 8 | 3991/ 8400 batches | train loss 0.2866419 +| epoch 8 | 3995/ 8400 batches | train loss 0.3977031 +| epoch 8 | 3999/ 8400 batches | train loss 0.4033494 +| epoch 8 | 4003/ 8400 batches | train loss 0.3618411 +| epoch 8 | 4007/ 8400 batches | train loss 0.3560271 +| epoch 8 | 4011/ 8400 batches | train loss 0.3885250 +| epoch 8 | 4015/ 8400 batches | train loss 0.3295657 +| epoch 8 | 4019/ 8400 batches | train loss 0.3899556 +| epoch 8 | 4023/ 8400 batches | train loss 0.3369068 +| epoch 8 | 4027/ 8400 batches | train loss 0.3169413 +| epoch 8 | 4031/ 8400 batches | train loss 0.3053176 +| epoch 8 | 4035/ 8400 batches | train loss 0.2856205 +| epoch 8 | 4039/ 8400 batches | train loss 0.3090408 +| epoch 8 | 4043/ 8400 batches | train loss 0.3564062 +| epoch 8 | 4047/ 8400 batches | train loss 0.3782327 +| epoch 8 | 4051/ 8400 batches | train loss 0.2457623 +| epoch 8 | 4055/ 8400 batches | train loss 0.4103218 +| epoch 8 | 4059/ 8400 batches | train loss 0.3635116 +| epoch 8 | 4063/ 8400 batches | train loss 0.4059856 +| epoch 8 | 4067/ 8400 batches | train loss 0.3914042 +| epoch 8 | 4071/ 8400 batches | train loss 0.3176695 +| epoch 8 | 4075/ 8400 batches | train loss 0.4372129 +| epoch 8 | 4079/ 8400 batches | train loss 0.3837380 +| epoch 8 | 4083/ 8400 batches | train loss 0.3480353 +| epoch 8 | 4087/ 8400 batches | train loss 0.3510589 +| epoch 8 | 4091/ 8400 batches | train loss 0.3701485 +| epoch 8 | 4095/ 8400 batches | train loss 0.3266652 +| epoch 8 | 4099/ 8400 batches | train loss 0.4275793 +| epoch 8 | 4103/ 8400 batches | train loss 0.3065751 +| epoch 8 | 4107/ 8400 batches | train loss 0.3490403 +| epoch 8 | 4111/ 8400 batches | train loss 0.3615146 +| epoch 8 | 4115/ 8400 batches | train loss 0.4060527 +| epoch 8 | 4119/ 8400 batches | train loss 0.3587177 +| epoch 8 | 4123/ 8400 batches | train loss 0.2518469 +| epoch 8 | 4127/ 8400 batches | train loss 0.3300124 +| epoch 8 | 4131/ 8400 batches | train loss 0.3333971 +| epoch 8 | 4135/ 8400 batches | train loss 0.3413363 +| epoch 8 | 4139/ 8400 batches | train loss 0.3754784 +| epoch 8 | 4143/ 8400 batches | train loss 0.3488691 +| epoch 8 | 4147/ 8400 batches | train loss 0.3280950 +| epoch 8 | 4151/ 8400 batches | train loss 0.2957021 +| epoch 8 | 4155/ 8400 batches | train loss 0.4319310 +| epoch 8 | 4159/ 8400 batches | train loss 0.3999901 +| epoch 8 | 4163/ 8400 batches | train loss 0.2866263 +| epoch 8 | 4167/ 8400 batches | train loss 0.3315534 +| epoch 8 | 4171/ 8400 batches | train loss 0.3741527 +| epoch 8 | 4175/ 8400 batches | train loss 0.3370731 +| epoch 8 | 4179/ 8400 batches | train loss 0.3113288 +| epoch 8 | 4183/ 8400 batches | train loss 0.3958530 +| epoch 8 | 4187/ 8400 batches | train loss 0.4065481 +| epoch 8 | 4191/ 8400 batches | train loss 0.2237855 +| epoch 8 | 4195/ 8400 batches | train loss 0.3849053 +| epoch 8 | 4199/ 8400 batches | train loss 0.3371541 +| epoch 8 | 4203/ 8400 batches | train loss 0.2974513 +| epoch 8 | 4207/ 8400 batches | train loss 0.4003868 +| epoch 8 | 4211/ 8400 batches | train loss 0.2907601 +| epoch 8 | 4215/ 8400 batches | train loss 0.3157622 +| epoch 8 | 4219/ 8400 batches | train loss 0.3858918 +| epoch 8 | 4223/ 8400 batches | train loss 0.3298919 +| epoch 8 | 4227/ 8400 batches | train loss 0.3783160 +| epoch 8 | 4231/ 8400 batches | train loss 0.4002914 +| epoch 8 | 4235/ 8400 batches | train loss 0.3669189 +| epoch 8 | 4239/ 8400 batches | train loss 0.3833049 +| epoch 8 | 4243/ 8400 batches | train loss 0.3880751 +| epoch 8 | 4247/ 8400 batches | train loss 0.4075431 +| epoch 8 | 4251/ 8400 batches | train loss 0.3271146 +| epoch 8 | 4255/ 8400 batches | train loss 0.3357350 +| epoch 8 | 4259/ 8400 batches | train loss 0.3588215 +| epoch 8 | 4263/ 8400 batches | train loss 0.3179896 +| epoch 8 | 4267/ 8400 batches | train loss 0.4159189 +| epoch 8 | 4271/ 8400 batches | train loss 0.3046974 +| epoch 8 | 4275/ 8400 batches | train loss 0.4235522 +| epoch 8 | 4279/ 8400 batches | train loss 0.3671858 +| epoch 8 | 4283/ 8400 batches | train loss 0.3225881 +| epoch 8 | 4287/ 8400 batches | train loss 0.3268229 +| epoch 8 | 4291/ 8400 batches | train loss 0.4030485 +| epoch 8 | 4295/ 8400 batches | train loss 0.4420798 +| epoch 8 | 4299/ 8400 batches | train loss 0.3226944 +| epoch 8 | 4303/ 8400 batches | train loss 0.3899562 +| epoch 8 | 4307/ 8400 batches | train loss 0.4270857 +| epoch 8 | 4311/ 8400 batches | train loss 0.4028127 +| epoch 8 | 4315/ 8400 batches | train loss 0.4530401 +| epoch 8 | 4319/ 8400 batches | train loss 0.4004217 +| epoch 8 | 4323/ 8400 batches | train loss 0.3715816 +| epoch 8 | 4327/ 8400 batches | train loss 0.3392501 +| epoch 8 | 4331/ 8400 batches | train loss 0.3794813 +| epoch 8 | 4335/ 8400 batches | train loss 0.3556587 +| epoch 8 | 4339/ 8400 batches | train loss 0.3767791 +| epoch 8 | 4343/ 8400 batches | train loss 0.3601981 +| epoch 8 | 4347/ 8400 batches | train loss 0.3745384 +| epoch 8 | 4351/ 8400 batches | train loss 0.3454507 +| epoch 8 | 4355/ 8400 batches | train loss 0.3817685 +| epoch 8 | 4359/ 8400 batches | train loss 0.3119860 +| epoch 8 | 4363/ 8400 batches | train loss 0.3724663 +| epoch 8 | 4367/ 8400 batches | train loss 0.4070116 +| epoch 8 | 4371/ 8400 batches | train loss 0.3806538 +| epoch 8 | 4375/ 8400 batches | train loss 0.3384805 +| epoch 8 | 4379/ 8400 batches | train loss 0.4390464 +| epoch 8 | 4383/ 8400 batches | train loss 0.3165848 +| epoch 8 | 4387/ 8400 batches | train loss 0.4602638 +| epoch 8 | 4391/ 8400 batches | train loss 0.4206765 +| epoch 8 | 4395/ 8400 batches | train loss 0.3697464 +| epoch 8 | 4399/ 8400 batches | train loss 0.3097704 +| epoch 8 | 4403/ 8400 batches | train loss 0.3649969 +| epoch 8 | 4407/ 8400 batches | train loss 0.3547043 +| epoch 8 | 4411/ 8400 batches | train loss 0.3727819 +| epoch 8 | 4415/ 8400 batches | train loss 0.3719122 +| epoch 8 | 4419/ 8400 batches | train loss 0.3702297 +| epoch 8 | 4423/ 8400 batches | train loss 0.3416172 +| epoch 8 | 4427/ 8400 batches | train loss 0.3776120 +| epoch 8 | 4431/ 8400 batches | train loss 0.3147179 +| epoch 8 | 4435/ 8400 batches | train loss 0.3061549 +| epoch 8 | 4439/ 8400 batches | train loss 0.3030759 +| epoch 8 | 4443/ 8400 batches | train loss 0.3505585 +| epoch 8 | 4447/ 8400 batches | train loss 0.3649466 +| epoch 8 | 4451/ 8400 batches | train loss 0.3554305 +| epoch 8 | 4455/ 8400 batches | train loss 0.4207015 +| epoch 8 | 4459/ 8400 batches | train loss 0.2868803 +| epoch 8 | 4463/ 8400 batches | train loss 0.3348203 +| epoch 8 | 4467/ 8400 batches | train loss 0.3659123 +| epoch 8 | 4471/ 8400 batches | train loss 0.4032788 +| epoch 8 | 4475/ 8400 batches | train loss 0.4285155 +| epoch 8 | 4479/ 8400 batches | train loss 0.4250561 +| epoch 8 | 4483/ 8400 batches | train loss 0.3626193 +| epoch 8 | 4487/ 8400 batches | train loss 0.3172990 +| epoch 8 | 4491/ 8400 batches | train loss 0.3266103 +| epoch 8 | 4495/ 8400 batches | train loss 0.3954254 +| epoch 8 | 4499/ 8400 batches | train loss 0.3246665 +| epoch 8 | 4503/ 8400 batches | train loss 0.4221346 +| epoch 8 | 4507/ 8400 batches | train loss 0.3437606 +| epoch 8 | 4511/ 8400 batches | train loss 0.3162569 +| epoch 8 | 4515/ 8400 batches | train loss 0.3916461 +| epoch 8 | 4519/ 8400 batches | train loss 0.4069786 +| epoch 8 | 4523/ 8400 batches | train loss 0.3992938 +| epoch 8 | 4527/ 8400 batches | train loss 0.3612603 +| epoch 8 | 4531/ 8400 batches | train loss 0.3219410 +| epoch 8 | 4535/ 8400 batches | train loss 0.3788583 +| epoch 8 | 4539/ 8400 batches | train loss 0.4001974 +| epoch 8 | 4543/ 8400 batches | train loss 0.3607977 +| epoch 8 | 4547/ 8400 batches | train loss 0.4137594 +| epoch 8 | 4551/ 8400 batches | train loss 0.3393824 +| epoch 8 | 4555/ 8400 batches | train loss 0.3784878 +| epoch 8 | 4559/ 8400 batches | train loss 0.3070344 +| epoch 8 | 4563/ 8400 batches | train loss 0.3597785 +| epoch 8 | 4567/ 8400 batches | train loss 0.3640872 +| epoch 8 | 4571/ 8400 batches | train loss 0.3704885 +| epoch 8 | 4575/ 8400 batches | train loss 0.3812722 +| epoch 8 | 4579/ 8400 batches | train loss 0.3495585 +| epoch 8 | 4583/ 8400 batches | train loss 0.3131658 +| epoch 8 | 4587/ 8400 batches | train loss 0.3311524 +| epoch 8 | 4591/ 8400 batches | train loss 0.2919097 +| epoch 8 | 4595/ 8400 batches | train loss 0.3697351 +| epoch 8 | 4599/ 8400 batches | train loss 0.3346455 +| epoch 8 | 4603/ 8400 batches | train loss 0.4420817 +| epoch 8 | 4607/ 8400 batches | train loss 0.3952842 +| epoch 8 | 4611/ 8400 batches | train loss 0.3898380 +| epoch 8 | 4615/ 8400 batches | train loss 0.3801584 +| epoch 8 | 4619/ 8400 batches | train loss 0.3516944 +| epoch 8 | 4623/ 8400 batches | train loss 0.3858539 +| epoch 8 | 4627/ 8400 batches | train loss 0.3582861 +| epoch 8 | 4631/ 8400 batches | train loss 0.3699779 +| epoch 8 | 4635/ 8400 batches | train loss 0.2384641 +| epoch 8 | 4639/ 8400 batches | train loss 0.3864819 +| epoch 8 | 4643/ 8400 batches | train loss 0.4189582 +| epoch 8 | 4647/ 8400 batches | train loss 0.4125673 +| epoch 8 | 4651/ 8400 batches | train loss 0.3146029 +| epoch 8 | 4655/ 8400 batches | train loss 0.3350932 +| epoch 8 | 4659/ 8400 batches | train loss 0.3364340 +| epoch 8 | 4663/ 8400 batches | train loss 0.3388232 +| epoch 8 | 4667/ 8400 batches | train loss 0.3132291 +| epoch 8 | 4671/ 8400 batches | train loss 0.3434347 +| epoch 8 | 4675/ 8400 batches | train loss 0.3058770 +| epoch 8 | 4679/ 8400 batches | train loss 0.3421975 +| epoch 8 | 4683/ 8400 batches | train loss 0.3436746 +| epoch 8 | 4687/ 8400 batches | train loss 0.3546305 +| epoch 8 | 4691/ 8400 batches | train loss 0.4026952 +| epoch 8 | 4695/ 8400 batches | train loss 0.3599797 +| epoch 8 | 4699/ 8400 batches | train loss 0.4439662 +| epoch 8 | 4703/ 8400 batches | train loss 0.3884996 +| epoch 8 | 4707/ 8400 batches | train loss 0.3217865 +| epoch 8 | 4711/ 8400 batches | train loss 0.2802780 +| epoch 8 | 4715/ 8400 batches | train loss 0.3509646 +| epoch 8 | 4719/ 8400 batches | train loss 0.3514870 +| epoch 8 | 4723/ 8400 batches | train loss 0.3493920 +| epoch 8 | 4727/ 8400 batches | train loss 0.3477978 +| epoch 8 | 4731/ 8400 batches | train loss 0.4720736 +| epoch 8 | 4735/ 8400 batches | train loss 0.2222095 +| epoch 8 | 4739/ 8400 batches | train loss 0.3897246 +| epoch 8 | 4743/ 8400 batches | train loss 0.3570802 +| epoch 8 | 4747/ 8400 batches | train loss 0.4044606 +| epoch 8 | 4751/ 8400 batches | train loss 0.2278881 +| epoch 8 | 4755/ 8400 batches | train loss 0.4315297 +| epoch 8 | 4759/ 8400 batches | train loss 0.3395784 +| epoch 8 | 4763/ 8400 batches | train loss 0.2903610 +| epoch 8 | 4767/ 8400 batches | train loss 0.3273101 +| epoch 8 | 4771/ 8400 batches | train loss 0.3592288 +| epoch 8 | 4775/ 8400 batches | train loss 0.3383788 +| epoch 8 | 4779/ 8400 batches | train loss 0.3473630 +| epoch 8 | 4783/ 8400 batches | train loss 0.3270999 +| epoch 8 | 4787/ 8400 batches | train loss 0.3102421 +| epoch 8 | 4791/ 8400 batches | train loss 0.3368247 +| epoch 8 | 4795/ 8400 batches | train loss 0.3394180 +| epoch 8 | 4799/ 8400 batches | train loss 0.3427391 +| epoch 8 | 4803/ 8400 batches | train loss 0.3419536 +| epoch 8 | 4807/ 8400 batches | train loss 0.3367534 +| epoch 8 | 4811/ 8400 batches | train loss 0.3693149 +| epoch 8 | 4815/ 8400 batches | train loss 0.3692188 +| epoch 8 | 4819/ 8400 batches | train loss 0.3544969 +| epoch 8 | 4823/ 8400 batches | train loss 0.3559480 +| epoch 8 | 4827/ 8400 batches | train loss 0.3472553 +| epoch 8 | 4831/ 8400 batches | train loss 0.3093823 +| epoch 8 | 4835/ 8400 batches | train loss 0.3155641 +| epoch 8 | 4839/ 8400 batches | train loss 0.3083985 +| epoch 8 | 4843/ 8400 batches | train loss 0.3733460 +| epoch 8 | 4847/ 8400 batches | train loss 0.3636892 +| epoch 8 | 4851/ 8400 batches | train loss 0.3518751 +| epoch 8 | 4855/ 8400 batches | train loss 0.3735098 +| epoch 8 | 4859/ 8400 batches | train loss 0.3639621 +| epoch 8 | 4863/ 8400 batches | train loss 0.3644507 +| epoch 8 | 4867/ 8400 batches | train loss 0.3389910 +| epoch 8 | 4871/ 8400 batches | train loss 0.2956522 +| epoch 8 | 4875/ 8400 batches | train loss 0.2191287 +| epoch 8 | 4879/ 8400 batches | train loss 0.3220892 +| epoch 8 | 4883/ 8400 batches | train loss 0.3535227 +| epoch 8 | 4887/ 8400 batches | train loss 0.3526449 +| epoch 8 | 4891/ 8400 batches | train loss 0.3272671 +| epoch 8 | 4895/ 8400 batches | train loss 0.3720016 +| epoch 8 | 4899/ 8400 batches | train loss 0.3519964 +| epoch 8 | 4903/ 8400 batches | train loss 0.3611381 +| epoch 8 | 4907/ 8400 batches | train loss 0.3986717 +| epoch 8 | 4911/ 8400 batches | train loss 0.3996088 +| epoch 8 | 4915/ 8400 batches | train loss 0.3469772 +| epoch 8 | 4919/ 8400 batches | train loss 0.3287478 +| epoch 8 | 4923/ 8400 batches | train loss 0.3125663 +| epoch 8 | 4927/ 8400 batches | train loss 0.3988136 +| epoch 8 | 4931/ 8400 batches | train loss 0.2950962 +| epoch 8 | 4935/ 8400 batches | train loss 0.3444661 +| epoch 8 | 4939/ 8400 batches | train loss 0.2841486 +| epoch 8 | 4943/ 8400 batches | train loss 0.3182116 +| epoch 8 | 4947/ 8400 batches | train loss 0.3493651 +| epoch 8 | 4951/ 8400 batches | train loss 0.3465376 +| epoch 8 | 4955/ 8400 batches | train loss 0.3767365 +| epoch 8 | 4959/ 8400 batches | train loss 0.3896666 +| epoch 8 | 4963/ 8400 batches | train loss 0.2204151 +| epoch 8 | 4967/ 8400 batches | train loss 0.3202024 +| epoch 8 | 4971/ 8400 batches | train loss 0.4233919 +| epoch 8 | 4975/ 8400 batches | train loss 0.3744461 +| epoch 8 | 4979/ 8400 batches | train loss 0.3177593 +| epoch 8 | 4983/ 8400 batches | train loss 0.3381587 +| epoch 8 | 4987/ 8400 batches | train loss 0.3806816 +| epoch 8 | 4991/ 8400 batches | train loss 0.3514940 +| epoch 8 | 4995/ 8400 batches | train loss 0.3753008 +| epoch 8 | 4999/ 8400 batches | train loss 0.3588985 +| epoch 8 | 5003/ 8400 batches | train loss 0.3164157 +| epoch 8 | 5007/ 8400 batches | train loss 0.4039610 +| epoch 8 | 5011/ 8400 batches | train loss 0.2931710 +| epoch 8 | 5015/ 8400 batches | train loss 0.3755367 +| epoch 8 | 5019/ 8400 batches | train loss 0.3710385 +| epoch 8 | 5023/ 8400 batches | train loss 0.3104175 +| epoch 8 | 5027/ 8400 batches | train loss 0.4091675 +| epoch 8 | 5031/ 8400 batches | train loss 0.3480220 +| epoch 8 | 5035/ 8400 batches | train loss 0.3920697 +| epoch 8 | 5039/ 8400 batches | train loss 0.3413604 +| epoch 8 | 5043/ 8400 batches | train loss 0.3354685 +| epoch 8 | 5047/ 8400 batches | train loss 0.3015714 +| epoch 8 | 5051/ 8400 batches | train loss 0.2787630 +| epoch 8 | 5055/ 8400 batches | train loss 0.3138508 +| epoch 8 | 5059/ 8400 batches | train loss 0.4121875 +| epoch 8 | 5063/ 8400 batches | train loss 0.3787750 +| epoch 8 | 5067/ 8400 batches | train loss 0.3034671 +| epoch 8 | 5071/ 8400 batches | train loss 0.3626100 +| epoch 8 | 5075/ 8400 batches | train loss 0.4074679 +| epoch 8 | 5079/ 8400 batches | train loss 0.3757559 +| epoch 8 | 5083/ 8400 batches | train loss 0.3858974 +| epoch 8 | 5087/ 8400 batches | train loss 0.3497526 +| epoch 8 | 5091/ 8400 batches | train loss 0.3302993 +| epoch 8 | 5095/ 8400 batches | train loss 0.4288083 +| epoch 8 | 5099/ 8400 batches | train loss 0.3171758 +| epoch 8 | 5103/ 8400 batches | train loss 0.3103007 +| epoch 8 | 5107/ 8400 batches | train loss 0.3042437 +| epoch 8 | 5111/ 8400 batches | train loss 0.4640965 +| epoch 8 | 5115/ 8400 batches | train loss 0.3023735 +| epoch 8 | 5119/ 8400 batches | train loss 0.4257524 +| epoch 8 | 5123/ 8400 batches | train loss 0.3287407 +| epoch 8 | 5127/ 8400 batches | train loss 0.3261327 +| epoch 8 | 5131/ 8400 batches | train loss 0.3746603 +| epoch 8 | 5135/ 8400 batches | train loss 0.5103584 +| epoch 8 | 5139/ 8400 batches | train loss 0.2593450 +| epoch 8 | 5143/ 8400 batches | train loss 0.3025405 +| epoch 8 | 5147/ 8400 batches | train loss 0.4047515 +| epoch 8 | 5151/ 8400 batches | train loss 0.4048063 +| epoch 8 | 5155/ 8400 batches | train loss 0.3560144 +| epoch 8 | 5159/ 8400 batches | train loss 0.3921376 +| epoch 8 | 5163/ 8400 batches | train loss 0.3368235 +| epoch 8 | 5167/ 8400 batches | train loss 0.3561326 +| epoch 8 | 5171/ 8400 batches | train loss 0.3587541 +| epoch 8 | 5175/ 8400 batches | train loss 0.2916237 +| epoch 8 | 5179/ 8400 batches | train loss 0.3165798 +| epoch 8 | 5183/ 8400 batches | train loss 0.3416671 +| epoch 8 | 5187/ 8400 batches | train loss 0.3616677 +| epoch 8 | 5191/ 8400 batches | train loss 0.3208387 +| epoch 8 | 5195/ 8400 batches | train loss 0.3431193 +| epoch 8 | 5199/ 8400 batches | train loss 0.3449694 +| epoch 8 | 5203/ 8400 batches | train loss 0.3789266 +| epoch 8 | 5207/ 8400 batches | train loss 0.3219027 +| epoch 8 | 5211/ 8400 batches | train loss 0.3187516 +| epoch 8 | 5215/ 8400 batches | train loss 0.3907952 +| epoch 8 | 5219/ 8400 batches | train loss 0.3605783 +| epoch 8 | 5223/ 8400 batches | train loss 0.4247523 +| epoch 8 | 5227/ 8400 batches | train loss 0.2418238 +| epoch 8 | 5231/ 8400 batches | train loss 0.4379090 +| epoch 8 | 5235/ 8400 batches | train loss 0.4155053 +| epoch 8 | 5239/ 8400 batches | train loss 0.4099311 +| epoch 8 | 5243/ 8400 batches | train loss 0.4341835 +| epoch 8 | 5247/ 8400 batches | train loss 0.3315542 +| epoch 8 | 5251/ 8400 batches | train loss 0.3733054 +| epoch 8 | 5255/ 8400 batches | train loss 0.2869053 +| epoch 8 | 5259/ 8400 batches | train loss 0.4112356 +| epoch 8 | 5263/ 8400 batches | train loss 0.3817503 +| epoch 8 | 5267/ 8400 batches | train loss 0.3938754 +| epoch 8 | 5271/ 8400 batches | train loss 0.3690073 +| epoch 8 | 5275/ 8400 batches | train loss 0.4454109 +| epoch 8 | 5279/ 8400 batches | train loss 0.3250737 +| epoch 8 | 5283/ 8400 batches | train loss 0.3441102 +| epoch 8 | 5287/ 8400 batches | train loss 0.2652782 +| epoch 8 | 5291/ 8400 batches | train loss 0.3079999 +| epoch 8 | 5295/ 8400 batches | train loss 0.3911497 +| epoch 8 | 5299/ 8400 batches | train loss 0.2609598 +| epoch 8 | 5303/ 8400 batches | train loss 0.3405305 +| epoch 8 | 5307/ 8400 batches | train loss 0.3390594 +| epoch 8 | 5311/ 8400 batches | train loss 0.3920715 +| epoch 8 | 5315/ 8400 batches | train loss 0.3318650 +| epoch 8 | 5319/ 8400 batches | train loss 0.3432344 +| epoch 8 | 5323/ 8400 batches | train loss 0.3575670 +| epoch 8 | 5327/ 8400 batches | train loss 0.3039326 +| epoch 8 | 5331/ 8400 batches | train loss 0.4329517 +| epoch 8 | 5335/ 8400 batches | train loss 0.3434067 +| epoch 8 | 5339/ 8400 batches | train loss 0.3130217 +| epoch 8 | 5343/ 8400 batches | train loss 0.3745947 +| epoch 8 | 5347/ 8400 batches | train loss 0.3759838 +| epoch 8 | 5351/ 8400 batches | train loss 0.3168695 +| epoch 8 | 5355/ 8400 batches | train loss 0.3299332 +| epoch 8 | 5359/ 8400 batches | train loss 0.2797039 +| epoch 8 | 5363/ 8400 batches | train loss 0.3586420 +| epoch 8 | 5367/ 8400 batches | train loss 0.3613445 +| epoch 8 | 5371/ 8400 batches | train loss 0.3648103 +| epoch 8 | 5375/ 8400 batches | train loss 0.4109949 +| epoch 8 | 5379/ 8400 batches | train loss 0.3717332 +| epoch 8 | 5383/ 8400 batches | train loss 0.3947937 +| epoch 8 | 5387/ 8400 batches | train loss 0.3603364 +| epoch 8 | 5391/ 8400 batches | train loss 0.2878748 +| epoch 8 | 5395/ 8400 batches | train loss 0.3067269 +| epoch 8 | 5399/ 8400 batches | train loss 0.3314233 +| epoch 8 | 5403/ 8400 batches | train loss 0.3335280 +| epoch 8 | 5407/ 8400 batches | train loss 0.2286899 +| epoch 8 | 5411/ 8400 batches | train loss 0.3675622 +| epoch 8 | 5415/ 8400 batches | train loss 0.3095126 +| epoch 8 | 5419/ 8400 batches | train loss 0.2357550 +| epoch 8 | 5423/ 8400 batches | train loss 0.4030812 +| epoch 8 | 5427/ 8400 batches | train loss 0.4160321 +| epoch 8 | 5431/ 8400 batches | train loss 0.3620847 +| epoch 8 | 5435/ 8400 batches | train loss 0.3467364 +| epoch 8 | 5439/ 8400 batches | train loss 0.3462514 +| epoch 8 | 5443/ 8400 batches | train loss 0.4186514 +| epoch 8 | 5447/ 8400 batches | train loss 0.2796380 +| epoch 8 | 5451/ 8400 batches | train loss 0.3282981 +| epoch 8 | 5455/ 8400 batches | train loss 0.3590353 +| epoch 8 | 5459/ 8400 batches | train loss 0.3102067 +| epoch 8 | 5463/ 8400 batches | train loss 0.3379947 +| epoch 8 | 5467/ 8400 batches | train loss 0.3834428 +| epoch 8 | 5471/ 8400 batches | train loss 0.2547566 +| epoch 8 | 5475/ 8400 batches | train loss 0.3697705 +| epoch 8 | 5479/ 8400 batches | train loss 0.4150472 +| epoch 8 | 5483/ 8400 batches | train loss 0.3129384 +| epoch 8 | 5487/ 8400 batches | train loss 0.3856163 +| epoch 8 | 5491/ 8400 batches | train loss 0.3928829 +| epoch 8 | 5495/ 8400 batches | train loss 0.2215487 +| epoch 8 | 5499/ 8400 batches | train loss 0.3374783 +| epoch 8 | 5503/ 8400 batches | train loss 0.3840759 +| epoch 8 | 5507/ 8400 batches | train loss 0.3529468 +| epoch 8 | 5511/ 8400 batches | train loss 0.4228436 +| epoch 8 | 5515/ 8400 batches | train loss 0.3579643 +| epoch 8 | 5519/ 8400 batches | train loss 0.2989713 +| epoch 8 | 5523/ 8400 batches | train loss 0.3763968 +| epoch 8 | 5527/ 8400 batches | train loss 0.3302063 +| epoch 8 | 5531/ 8400 batches | train loss 0.3656938 +| epoch 8 | 5535/ 8400 batches | train loss 0.3478787 +| epoch 8 | 5539/ 8400 batches | train loss 0.3436317 +| epoch 8 | 5543/ 8400 batches | train loss 0.3706107 +| epoch 8 | 5547/ 8400 batches | train loss 0.3533694 +| epoch 8 | 5551/ 8400 batches | train loss 0.3492428 +| epoch 8 | 5555/ 8400 batches | train loss 0.3182146 +| epoch 8 | 5559/ 8400 batches | train loss 0.3972259 +| epoch 8 | 5563/ 8400 batches | train loss 0.4208566 +| epoch 8 | 5567/ 8400 batches | train loss 0.3637577 +| epoch 8 | 5571/ 8400 batches | train loss 0.3134476 +| epoch 8 | 5575/ 8400 batches | train loss 0.3040662 +| epoch 8 | 5579/ 8400 batches | train loss 0.3369799 +| epoch 8 | 5583/ 8400 batches | train loss 0.3668831 +| epoch 8 | 5587/ 8400 batches | train loss 0.3727525 +| epoch 8 | 5591/ 8400 batches | train loss 0.3942345 +| epoch 8 | 5595/ 8400 batches | train loss 0.3657862 +| epoch 8 | 5599/ 8400 batches | train loss 0.3340679 +| epoch 8 | 5603/ 8400 batches | train loss 0.3797267 +| epoch 8 | 5607/ 8400 batches | train loss 0.3254360 +| epoch 8 | 5611/ 8400 batches | train loss 0.3963533 +| epoch 8 | 5615/ 8400 batches | train loss 0.3115070 +| epoch 8 | 5619/ 8400 batches | train loss 0.3749242 +| epoch 8 | 5623/ 8400 batches | train loss 0.4101163 +| epoch 8 | 5627/ 8400 batches | train loss 0.3781135 +| epoch 8 | 5631/ 8400 batches | train loss 0.4828759 +| epoch 8 | 5635/ 8400 batches | train loss 0.3680876 +| epoch 8 | 5639/ 8400 batches | train loss 0.3117813 +| epoch 8 | 5643/ 8400 batches | train loss 0.3690263 +| epoch 8 | 5647/ 8400 batches | train loss 0.3258731 +| epoch 8 | 5651/ 8400 batches | train loss 0.3278924 +| epoch 8 | 5655/ 8400 batches | train loss 0.3916285 +| epoch 8 | 5659/ 8400 batches | train loss 0.3489437 +| epoch 8 | 5663/ 8400 batches | train loss 0.3593456 +| epoch 8 | 5667/ 8400 batches | train loss 0.3544953 +| epoch 8 | 5671/ 8400 batches | train loss 0.3598011 +| epoch 8 | 5675/ 8400 batches | train loss 0.3779286 +| epoch 8 | 5679/ 8400 batches | train loss 0.3763342 +| epoch 8 | 5683/ 8400 batches | train loss 0.3339568 +| epoch 8 | 5687/ 8400 batches | train loss 0.3757416 +| epoch 8 | 5691/ 8400 batches | train loss 0.3491174 +| epoch 8 | 5695/ 8400 batches | train loss 0.3588695 +| epoch 8 | 5699/ 8400 batches | train loss 0.2922527 +| epoch 8 | 5703/ 8400 batches | train loss 0.3515079 +| epoch 8 | 5707/ 8400 batches | train loss 0.2872367 +| epoch 8 | 5711/ 8400 batches | train loss 0.3950260 +| epoch 8 | 5715/ 8400 batches | train loss 0.3647236 +| epoch 8 | 5719/ 8400 batches | train loss 0.3105111 +| epoch 8 | 5723/ 8400 batches | train loss 0.3234317 +| epoch 8 | 5727/ 8400 batches | train loss 0.3704281 +| epoch 8 | 5731/ 8400 batches | train loss 0.3445599 +| epoch 8 | 5735/ 8400 batches | train loss 0.3628892 +| epoch 8 | 5739/ 8400 batches | train loss 0.3850589 +| epoch 8 | 5743/ 8400 batches | train loss 0.3843347 +| epoch 8 | 5747/ 8400 batches | train loss 0.3348611 +| epoch 8 | 5751/ 8400 batches | train loss 0.3082680 +| epoch 8 | 5755/ 8400 batches | train loss 0.3041104 +| epoch 8 | 5759/ 8400 batches | train loss 0.3005225 +| epoch 8 | 5763/ 8400 batches | train loss 0.3483587 +| epoch 8 | 5767/ 8400 batches | train loss 0.3236446 +| epoch 8 | 5771/ 8400 batches | train loss 0.3541393 +| epoch 8 | 5775/ 8400 batches | train loss 0.3752547 +| epoch 8 | 5779/ 8400 batches | train loss 0.4096658 +| epoch 8 | 5783/ 8400 batches | train loss 0.4089676 +| epoch 8 | 5787/ 8400 batches | train loss 0.3227100 +| epoch 8 | 5791/ 8400 batches | train loss 0.3829538 +| epoch 8 | 5795/ 8400 batches | train loss 0.4862196 +| epoch 8 | 5799/ 8400 batches | train loss 0.3375992 +| epoch 8 | 5803/ 8400 batches | train loss 0.4477493 +| epoch 8 | 5807/ 8400 batches | train loss 0.3500120 +| epoch 8 | 5811/ 8400 batches | train loss 0.3806141 +| epoch 8 | 5815/ 8400 batches | train loss 0.4052910 +| epoch 8 | 5819/ 8400 batches | train loss 0.3695581 +| epoch 8 | 5823/ 8400 batches | train loss 0.3581204 +| epoch 8 | 5827/ 8400 batches | train loss 0.3439063 +| epoch 8 | 5831/ 8400 batches | train loss 0.3371083 +| epoch 8 | 5835/ 8400 batches | train loss 0.3869931 +| epoch 8 | 5839/ 8400 batches | train loss 0.3246824 +| epoch 8 | 5843/ 8400 batches | train loss 0.4116016 +| epoch 8 | 5847/ 8400 batches | train loss 0.3900124 +| epoch 8 | 5851/ 8400 batches | train loss 0.2966187 +| epoch 8 | 5855/ 8400 batches | train loss 0.3614805 +| epoch 8 | 5859/ 8400 batches | train loss 0.3614540 +| epoch 8 | 5863/ 8400 batches | train loss 0.3154070 +| epoch 8 | 5867/ 8400 batches | train loss 0.3930554 +| epoch 8 | 5871/ 8400 batches | train loss 0.3765138 +| epoch 8 | 5875/ 8400 batches | train loss 0.3497935 +| epoch 8 | 5879/ 8400 batches | train loss 0.4637220 +| epoch 8 | 5883/ 8400 batches | train loss 0.3409756 +| epoch 8 | 5887/ 8400 batches | train loss 0.3196988 +| epoch 8 | 5891/ 8400 batches | train loss 0.3470811 +| epoch 8 | 5895/ 8400 batches | train loss 0.3622904 +| epoch 8 | 5899/ 8400 batches | train loss 0.3498141 +| epoch 8 | 5903/ 8400 batches | train loss 0.3377176 +| epoch 8 | 5907/ 8400 batches | train loss 0.3996763 +| epoch 8 | 5911/ 8400 batches | train loss 0.2958232 +| epoch 8 | 5915/ 8400 batches | train loss 0.3822154 +| epoch 8 | 5919/ 8400 batches | train loss 0.4221385 +| epoch 8 | 5923/ 8400 batches | train loss 0.3891619 +| epoch 8 | 5927/ 8400 batches | train loss 0.3663834 +| epoch 8 | 5931/ 8400 batches | train loss 0.3645809 +| epoch 8 | 5935/ 8400 batches | train loss 0.3857262 +| epoch 8 | 5939/ 8400 batches | train loss 0.4006099 +| epoch 8 | 5943/ 8400 batches | train loss 0.3229147 +| epoch 8 | 5947/ 8400 batches | train loss 0.3796818 +| epoch 8 | 5951/ 8400 batches | train loss 0.3417666 +| epoch 8 | 5955/ 8400 batches | train loss 0.3577514 +| epoch 8 | 5959/ 8400 batches | train loss 0.3805951 +| epoch 8 | 5963/ 8400 batches | train loss 0.3551408 +| epoch 8 | 5967/ 8400 batches | train loss 0.4520386 +| epoch 8 | 5971/ 8400 batches | train loss 0.3528591 +| epoch 8 | 5975/ 8400 batches | train loss 0.3308453 +| epoch 8 | 5979/ 8400 batches | train loss 0.3846484 +| epoch 8 | 5983/ 8400 batches | train loss 0.3930846 +| epoch 8 | 5987/ 8400 batches | train loss 0.3300925 +| epoch 8 | 5991/ 8400 batches | train loss 0.3626027 +| epoch 8 | 5995/ 8400 batches | train loss 0.3304428 +| epoch 8 | 5999/ 8400 batches | train loss 0.3719951 +| epoch 8 | 6003/ 8400 batches | train loss 0.3781961 +| epoch 8 | 6007/ 8400 batches | train loss 0.3298133 +| epoch 8 | 6011/ 8400 batches | train loss 0.2924904 +| epoch 8 | 6015/ 8400 batches | train loss 0.3679307 +| epoch 8 | 6019/ 8400 batches | train loss 0.3432207 +| epoch 8 | 6023/ 8400 batches | train loss 0.3837873 +| epoch 8 | 6027/ 8400 batches | train loss 0.3876221 +| epoch 8 | 6031/ 8400 batches | train loss 0.3482828 +| epoch 8 | 6035/ 8400 batches | train loss 0.3257114 +| epoch 8 | 6039/ 8400 batches | train loss 0.3037653 +| epoch 8 | 6043/ 8400 batches | train loss 0.3421455 +| epoch 8 | 6047/ 8400 batches | train loss 0.3097260 +| epoch 8 | 6051/ 8400 batches | train loss 0.3281088 +| epoch 8 | 6055/ 8400 batches | train loss 0.3640157 +| epoch 8 | 6059/ 8400 batches | train loss 0.3574755 +| epoch 8 | 6063/ 8400 batches | train loss 0.3539769 +| epoch 8 | 6067/ 8400 batches | train loss 0.4727591 +| epoch 8 | 6071/ 8400 batches | train loss 0.3971273 +| epoch 8 | 6075/ 8400 batches | train loss 0.2875213 +| epoch 8 | 6079/ 8400 batches | train loss 0.3488942 +| epoch 8 | 6083/ 8400 batches | train loss 0.2948217 +| epoch 8 | 6087/ 8400 batches | train loss 0.3926303 +| epoch 8 | 6091/ 8400 batches | train loss 0.3860028 +| epoch 8 | 6095/ 8400 batches | train loss 0.3100781 +| epoch 8 | 6099/ 8400 batches | train loss 0.3137384 +| epoch 8 | 6103/ 8400 batches | train loss 0.3352258 +| epoch 8 | 6107/ 8400 batches | train loss 0.3388513 +| epoch 8 | 6111/ 8400 batches | train loss 0.4002293 +| epoch 8 | 6115/ 8400 batches | train loss 0.3792247 +| epoch 8 | 6119/ 8400 batches | train loss 0.3612721 +| epoch 8 | 6123/ 8400 batches | train loss 0.3110860 +| epoch 8 | 6127/ 8400 batches | train loss 0.4013931 +| epoch 8 | 6131/ 8400 batches | train loss 0.3310165 +| epoch 8 | 6135/ 8400 batches | train loss 0.3212008 +| epoch 8 | 6139/ 8400 batches | train loss 0.3972324 +| epoch 8 | 6143/ 8400 batches | train loss 0.3116757 +| epoch 8 | 6147/ 8400 batches | train loss 0.3554732 +| epoch 8 | 6151/ 8400 batches | train loss 0.3557381 +| epoch 8 | 6155/ 8400 batches | train loss 0.3336144 +| epoch 8 | 6159/ 8400 batches | train loss 0.3045558 +| epoch 8 | 6163/ 8400 batches | train loss 0.3448007 +| epoch 8 | 6167/ 8400 batches | train loss 0.2791033 +| epoch 8 | 6171/ 8400 batches | train loss 0.3533151 +| epoch 8 | 6175/ 8400 batches | train loss 0.3590879 +| epoch 8 | 6179/ 8400 batches | train loss 0.2969602 +| epoch 8 | 6183/ 8400 batches | train loss 0.4346540 +| epoch 8 | 6187/ 8400 batches | train loss 0.2987349 +| epoch 8 | 6191/ 8400 batches | train loss 0.3237486 +| epoch 8 | 6195/ 8400 batches | train loss 0.3873157 +| epoch 8 | 6199/ 8400 batches | train loss 0.3506960 +| epoch 8 | 6203/ 8400 batches | train loss 0.3760402 +| epoch 8 | 6207/ 8400 batches | train loss 0.3908244 +| epoch 8 | 6211/ 8400 batches | train loss 0.3889967 +| epoch 8 | 6215/ 8400 batches | train loss 0.3969309 +| epoch 8 | 6219/ 8400 batches | train loss 0.3242722 +| epoch 8 | 6223/ 8400 batches | train loss 0.2970855 +| epoch 8 | 6227/ 8400 batches | train loss 0.3604318 +| epoch 8 | 6231/ 8400 batches | train loss 0.3533702 +| epoch 8 | 6235/ 8400 batches | train loss 0.4058169 +| epoch 8 | 6239/ 8400 batches | train loss 0.3412041 +| epoch 8 | 6243/ 8400 batches | train loss 0.3325052 +| epoch 8 | 6247/ 8400 batches | train loss 0.3923097 +| epoch 8 | 6251/ 8400 batches | train loss 0.3507440 +| epoch 8 | 6255/ 8400 batches | train loss 0.2971480 +| epoch 8 | 6259/ 8400 batches | train loss 0.3346472 +| epoch 8 | 6263/ 8400 batches | train loss 0.4418015 +| epoch 8 | 6267/ 8400 batches | train loss 0.3326109 +| epoch 8 | 6271/ 8400 batches | train loss 0.3685097 +| epoch 8 | 6275/ 8400 batches | train loss 0.3861964 +| epoch 8 | 6279/ 8400 batches | train loss 0.3698882 +| epoch 8 | 6283/ 8400 batches | train loss 0.3469524 +| epoch 8 | 6287/ 8400 batches | train loss 0.3963462 +| epoch 8 | 6291/ 8400 batches | train loss 0.3541055 +| epoch 8 | 6295/ 8400 batches | train loss 0.3119429 +| epoch 8 | 6299/ 8400 batches | train loss 0.3086588 +| epoch 8 | 6303/ 8400 batches | train loss 0.4091224 +| epoch 8 | 6307/ 8400 batches | train loss 0.3541857 +| epoch 8 | 6311/ 8400 batches | train loss 0.3712264 +| epoch 8 | 6315/ 8400 batches | train loss 0.3595320 +| epoch 8 | 6319/ 8400 batches | train loss 0.3419062 +| epoch 8 | 6323/ 8400 batches | train loss 0.3381082 +| epoch 8 | 6327/ 8400 batches | train loss 0.3301136 +| epoch 8 | 6331/ 8400 batches | train loss 0.3771587 +| epoch 8 | 6335/ 8400 batches | train loss 0.3750561 +| epoch 8 | 6339/ 8400 batches | train loss 0.3169053 +| epoch 8 | 6343/ 8400 batches | train loss 0.4146841 +| epoch 8 | 6347/ 8400 batches | train loss 0.3469897 +| epoch 8 | 6351/ 8400 batches | train loss 0.3519771 +| epoch 8 | 6355/ 8400 batches | train loss 0.3851409 +| epoch 8 | 6359/ 8400 batches | train loss 0.3812035 +| epoch 8 | 6363/ 8400 batches | train loss 0.3767877 +| epoch 8 | 6367/ 8400 batches | train loss 0.3252348 +| epoch 8 | 6371/ 8400 batches | train loss 0.2927749 +| epoch 8 | 6375/ 8400 batches | train loss 0.3206332 +| epoch 8 | 6379/ 8400 batches | train loss 0.3449017 +| epoch 8 | 6383/ 8400 batches | train loss 0.3894235 +| epoch 8 | 6387/ 8400 batches | train loss 0.3089227 +| epoch 8 | 6391/ 8400 batches | train loss 0.4006661 +| epoch 8 | 6395/ 8400 batches | train loss 0.3635102 +| epoch 8 | 6399/ 8400 batches | train loss 0.3581166 +| epoch 8 | 6403/ 8400 batches | train loss 0.3597128 +| epoch 8 | 6407/ 8400 batches | train loss 0.3199683 +| epoch 8 | 6411/ 8400 batches | train loss 0.3745272 +| epoch 8 | 6415/ 8400 batches | train loss 0.3824189 +| epoch 8 | 6419/ 8400 batches | train loss 0.3321294 +| epoch 8 | 6423/ 8400 batches | train loss 0.3589450 +| epoch 8 | 6427/ 8400 batches | train loss 0.3603836 +| epoch 8 | 6431/ 8400 batches | train loss 0.2993141 +| epoch 8 | 6435/ 8400 batches | train loss 0.3465948 +| epoch 8 | 6439/ 8400 batches | train loss 0.4011073 +| epoch 8 | 6443/ 8400 batches | train loss 0.2897175 +| epoch 8 | 6447/ 8400 batches | train loss 0.3328951 +| epoch 8 | 6451/ 8400 batches | train loss 0.2299590 +| epoch 8 | 6455/ 8400 batches | train loss 0.3137935 +| epoch 8 | 6459/ 8400 batches | train loss 0.3366081 +| epoch 8 | 6463/ 8400 batches | train loss 0.3623815 +| epoch 8 | 6467/ 8400 batches | train loss 0.3804327 +| epoch 8 | 6471/ 8400 batches | train loss 0.3134715 +| epoch 8 | 6475/ 8400 batches | train loss 0.3004422 +| epoch 8 | 6479/ 8400 batches | train loss 0.4601455 +| epoch 8 | 6483/ 8400 batches | train loss 0.4096310 +| epoch 8 | 6487/ 8400 batches | train loss 0.2933839 +| epoch 8 | 6491/ 8400 batches | train loss 0.4044021 +| epoch 8 | 6495/ 8400 batches | train loss 0.3588152 +| epoch 8 | 6499/ 8400 batches | train loss 0.3534388 +| epoch 8 | 6503/ 8400 batches | train loss 0.3779848 +| epoch 8 | 6507/ 8400 batches | train loss 0.3882235 +| epoch 8 | 6511/ 8400 batches | train loss 0.2952868 +| epoch 8 | 6515/ 8400 batches | train loss 0.4512080 +| epoch 8 | 6519/ 8400 batches | train loss 0.3965031 +| epoch 8 | 6523/ 8400 batches | train loss 0.3565708 +| epoch 8 | 6527/ 8400 batches | train loss 0.2995783 +| epoch 8 | 6531/ 8400 batches | train loss 0.3372175 +| epoch 8 | 6535/ 8400 batches | train loss 0.3318034 +| epoch 8 | 6539/ 8400 batches | train loss 0.2641895 +| epoch 8 | 6543/ 8400 batches | train loss 0.3560181 +| epoch 8 | 6547/ 8400 batches | train loss 0.3461547 +| epoch 8 | 6551/ 8400 batches | train loss 0.3434652 +| epoch 8 | 6555/ 8400 batches | train loss 0.4061539 +| epoch 8 | 6559/ 8400 batches | train loss 0.3247023 +| epoch 8 | 6563/ 8400 batches | train loss 0.3198867 +| epoch 8 | 6567/ 8400 batches | train loss 0.3223077 +| epoch 8 | 6571/ 8400 batches | train loss 0.3889062 +| epoch 8 | 6575/ 8400 batches | train loss 0.3012033 +| epoch 8 | 6579/ 8400 batches | train loss 0.3703930 +| epoch 8 | 6583/ 8400 batches | train loss 0.3995288 +| epoch 8 | 6587/ 8400 batches | train loss 0.3092300 +| epoch 8 | 6591/ 8400 batches | train loss 0.3776515 +| epoch 8 | 6595/ 8400 batches | train loss 0.3749458 +| epoch 8 | 6599/ 8400 batches | train loss 0.3998634 +| epoch 8 | 6603/ 8400 batches | train loss 0.3182908 +| epoch 8 | 6607/ 8400 batches | train loss 0.3525637 +| epoch 8 | 6611/ 8400 batches | train loss 0.3375435 +| epoch 8 | 6615/ 8400 batches | train loss 0.3213940 +| epoch 8 | 6619/ 8400 batches | train loss 0.3212641 +| epoch 8 | 6623/ 8400 batches | train loss 0.3204096 +| epoch 8 | 6627/ 8400 batches | train loss 0.3144411 +| epoch 8 | 6631/ 8400 batches | train loss 0.3670847 +| epoch 8 | 6635/ 8400 batches | train loss 0.3520529 +| epoch 8 | 6639/ 8400 batches | train loss 0.3556716 +| epoch 8 | 6643/ 8400 batches | train loss 0.4185298 +| epoch 8 | 6647/ 8400 batches | train loss 0.3378128 +| epoch 8 | 6651/ 8400 batches | train loss 0.3622261 +| epoch 8 | 6655/ 8400 batches | train loss 0.3203574 +| epoch 8 | 6659/ 8400 batches | train loss 0.3236447 +| epoch 8 | 6663/ 8400 batches | train loss 0.3285859 +| epoch 8 | 6667/ 8400 batches | train loss 0.3520607 +| epoch 8 | 6671/ 8400 batches | train loss 0.2910011 +| epoch 8 | 6675/ 8400 batches | train loss 0.3510179 +| epoch 8 | 6679/ 8400 batches | train loss 0.3996350 +| epoch 8 | 6683/ 8400 batches | train loss 0.3652589 +| epoch 8 | 6687/ 8400 batches | train loss 0.3847004 +| epoch 8 | 6691/ 8400 batches | train loss 0.4118788 +| epoch 8 | 6695/ 8400 batches | train loss 0.4794662 +| epoch 8 | 6699/ 8400 batches | train loss 0.3082645 +| epoch 8 | 6703/ 8400 batches | train loss 0.2618002 +| epoch 8 | 6707/ 8400 batches | train loss 0.3616420 +| epoch 8 | 6711/ 8400 batches | train loss 0.2214899 +| epoch 8 | 6715/ 8400 batches | train loss 0.3731958 +| epoch 8 | 6719/ 8400 batches | train loss 0.3088478 +| epoch 8 | 6723/ 8400 batches | train loss 0.3072912 +| epoch 8 | 6727/ 8400 batches | train loss 0.3317617 +| epoch 8 | 6731/ 8400 batches | train loss 0.2940481 +| epoch 8 | 6735/ 8400 batches | train loss 0.3841745 +| epoch 8 | 6739/ 8400 batches | train loss 0.3096467 +| epoch 8 | 6743/ 8400 batches | train loss 0.3635154 +| epoch 8 | 6747/ 8400 batches | train loss 0.4775854 +| epoch 8 | 6751/ 8400 batches | train loss 0.3509883 +| epoch 8 | 6755/ 8400 batches | train loss 0.3362300 +| epoch 8 | 6759/ 8400 batches | train loss 0.3938275 +| epoch 8 | 6763/ 8400 batches | train loss 0.3595922 +| epoch 8 | 6767/ 8400 batches | train loss 0.3942221 +| epoch 8 | 6771/ 8400 batches | train loss 0.3758516 +| epoch 8 | 6775/ 8400 batches | train loss 0.2497285 +| epoch 8 | 6779/ 8400 batches | train loss 0.3536450 +| epoch 8 | 6783/ 8400 batches | train loss 0.3827270 +| epoch 8 | 6787/ 8400 batches | train loss 0.3100318 +| epoch 8 | 6791/ 8400 batches | train loss 0.3711936 +| epoch 8 | 6795/ 8400 batches | train loss 0.3489665 +| epoch 8 | 6799/ 8400 batches | train loss 0.3692424 +| epoch 8 | 6803/ 8400 batches | train loss 0.3791445 +| epoch 8 | 6807/ 8400 batches | train loss 0.3970973 +| epoch 8 | 6811/ 8400 batches | train loss 0.3732168 +| epoch 8 | 6815/ 8400 batches | train loss 0.3915709 +| epoch 8 | 6819/ 8400 batches | train loss 0.3981519 +| epoch 8 | 6823/ 8400 batches | train loss 0.3467883 +| epoch 8 | 6827/ 8400 batches | train loss 0.3065371 +| epoch 8 | 6831/ 8400 batches | train loss 0.3861628 +| epoch 8 | 6835/ 8400 batches | train loss 0.3933840 +| epoch 8 | 6839/ 8400 batches | train loss 0.3452801 +| epoch 8 | 6843/ 8400 batches | train loss 0.3617241 +| epoch 8 | 6847/ 8400 batches | train loss 0.2867407 +| epoch 8 | 6851/ 8400 batches | train loss 0.2702311 +| epoch 8 | 6855/ 8400 batches | train loss 0.3834975 +| epoch 8 | 6859/ 8400 batches | train loss 0.4104729 +| epoch 8 | 6863/ 8400 batches | train loss 0.3669274 +| epoch 8 | 6867/ 8400 batches | train loss 0.3003673 +| epoch 8 | 6871/ 8400 batches | train loss 0.3263122 +| epoch 8 | 6875/ 8400 batches | train loss 0.2913991 +| epoch 8 | 6879/ 8400 batches | train loss 0.3140178 +| epoch 8 | 6883/ 8400 batches | train loss 0.2554665 +| epoch 8 | 6887/ 8400 batches | train loss 0.3453254 +| epoch 8 | 6891/ 8400 batches | train loss 0.3081875 +| epoch 8 | 6895/ 8400 batches | train loss 0.3694218 +| epoch 8 | 6899/ 8400 batches | train loss 0.2953885 +| epoch 8 | 6903/ 8400 batches | train loss 0.3325798 +| epoch 8 | 6907/ 8400 batches | train loss 0.2926127 +| epoch 8 | 6911/ 8400 batches | train loss 0.3930166 +| epoch 8 | 6915/ 8400 batches | train loss 0.3664251 +| epoch 8 | 6919/ 8400 batches | train loss 0.3078591 +| epoch 8 | 6923/ 8400 batches | train loss 0.3124826 +| epoch 8 | 6927/ 8400 batches | train loss 0.3450773 +| epoch 8 | 6931/ 8400 batches | train loss 0.3068887 +| epoch 8 | 6935/ 8400 batches | train loss 0.3579919 +| epoch 8 | 6939/ 8400 batches | train loss 0.3397463 +| epoch 8 | 6943/ 8400 batches | train loss 0.3816775 +| epoch 8 | 6947/ 8400 batches | train loss 0.2989380 +| epoch 8 | 6951/ 8400 batches | train loss 0.3532495 +| epoch 8 | 6955/ 8400 batches | train loss 0.3580022 +| epoch 8 | 6959/ 8400 batches | train loss 0.2937939 +| epoch 8 | 6963/ 8400 batches | train loss 0.3063226 +| epoch 8 | 6967/ 8400 batches | train loss 0.2984481 +| epoch 8 | 6971/ 8400 batches | train loss 0.3596364 +| epoch 8 | 6975/ 8400 batches | train loss 0.3338063 +| epoch 8 | 6979/ 8400 batches | train loss 0.3500976 +| epoch 8 | 6983/ 8400 batches | train loss 0.3290256 +| epoch 8 | 6987/ 8400 batches | train loss 0.3045341 +| epoch 8 | 6991/ 8400 batches | train loss 0.4184550 +| epoch 8 | 6995/ 8400 batches | train loss 0.3913099 +| epoch 8 | 6999/ 8400 batches | train loss 0.3495447 +| epoch 8 | 7003/ 8400 batches | train loss 0.3099668 +| epoch 8 | 7007/ 8400 batches | train loss 0.4187687 +| epoch 8 | 7011/ 8400 batches | train loss 0.3295318 +| epoch 8 | 7015/ 8400 batches | train loss 0.3835885 +| epoch 8 | 7019/ 8400 batches | train loss 0.3089077 +| epoch 8 | 7023/ 8400 batches | train loss 0.3548859 +| epoch 8 | 7027/ 8400 batches | train loss 0.3647890 +| epoch 8 | 7031/ 8400 batches | train loss 0.3020371 +| epoch 8 | 7035/ 8400 batches | train loss 0.3686056 +| epoch 8 | 7039/ 8400 batches | train loss 0.4169109 +| epoch 8 | 7043/ 8400 batches | train loss 0.3747253 +| epoch 8 | 7047/ 8400 batches | train loss 0.3877139 +| epoch 8 | 7051/ 8400 batches | train loss 0.3529065 +| epoch 8 | 7055/ 8400 batches | train loss 0.3516320 +| epoch 8 | 7059/ 8400 batches | train loss 0.3884684 +| epoch 8 | 7063/ 8400 batches | train loss 0.4358743 +| epoch 8 | 7067/ 8400 batches | train loss 0.3843234 +| epoch 8 | 7071/ 8400 batches | train loss 0.3251089 +| epoch 8 | 7075/ 8400 batches | train loss 0.3354180 +| epoch 8 | 7079/ 8400 batches | train loss 0.3744793 +| epoch 8 | 7083/ 8400 batches | train loss 0.3640136 +| epoch 8 | 7087/ 8400 batches | train loss 0.3378872 +| epoch 8 | 7091/ 8400 batches | train loss 0.3333970 +| epoch 8 | 7095/ 8400 batches | train loss 0.3797980 +| epoch 8 | 7099/ 8400 batches | train loss 0.3344847 +| epoch 8 | 7103/ 8400 batches | train loss 0.3727877 +| epoch 8 | 7107/ 8400 batches | train loss 0.3251977 +| epoch 8 | 7111/ 8400 batches | train loss 0.3725855 +| epoch 8 | 7115/ 8400 batches | train loss 0.3103888 +| epoch 8 | 7119/ 8400 batches | train loss 0.4293960 +| epoch 8 | 7123/ 8400 batches | train loss 0.3830463 +| epoch 8 | 7127/ 8400 batches | train loss 0.3531915 +| epoch 8 | 7131/ 8400 batches | train loss 0.3347058 +| epoch 8 | 7135/ 8400 batches | train loss 0.3811286 +| epoch 8 | 7139/ 8400 batches | train loss 0.3804722 +| epoch 8 | 7143/ 8400 batches | train loss 0.3816245 +| epoch 8 | 7147/ 8400 batches | train loss 0.3266482 +| epoch 8 | 7151/ 8400 batches | train loss 0.3861634 +| epoch 8 | 7155/ 8400 batches | train loss 0.3906247 +| epoch 8 | 7159/ 8400 batches | train loss 0.3116722 +| epoch 8 | 7163/ 8400 batches | train loss 0.3259192 +| epoch 8 | 7167/ 8400 batches | train loss 0.3406647 +| epoch 8 | 7171/ 8400 batches | train loss 0.3552435 +| epoch 8 | 7175/ 8400 batches | train loss 0.2714803 +| epoch 8 | 7179/ 8400 batches | train loss 0.3820491 +| epoch 8 | 7183/ 8400 batches | train loss 0.3234897 +| epoch 8 | 7187/ 8400 batches | train loss 0.3294490 +| epoch 8 | 7191/ 8400 batches | train loss 0.4027101 +| epoch 8 | 7195/ 8400 batches | train loss 0.3798067 +| epoch 8 | 7199/ 8400 batches | train loss 0.3452594 +| epoch 8 | 7203/ 8400 batches | train loss 0.3277675 +| epoch 8 | 7207/ 8400 batches | train loss 0.2620172 +| epoch 8 | 7211/ 8400 batches | train loss 0.3736490 +| epoch 8 | 7215/ 8400 batches | train loss 0.3203675 +| epoch 8 | 7219/ 8400 batches | train loss 0.3808300 +| epoch 8 | 7223/ 8400 batches | train loss 0.3501233 +| epoch 8 | 7227/ 8400 batches | train loss 0.3200912 +| epoch 8 | 7231/ 8400 batches | train loss 0.2153512 +| epoch 8 | 7235/ 8400 batches | train loss 0.3523240 +| epoch 8 | 7239/ 8400 batches | train loss 0.3850780 +| epoch 8 | 7243/ 8400 batches | train loss 0.3569461 +| epoch 8 | 7247/ 8400 batches | train loss 0.3841180 +| epoch 8 | 7251/ 8400 batches | train loss 0.3610575 +| epoch 8 | 7255/ 8400 batches | train loss 0.3335219 +| epoch 8 | 7259/ 8400 batches | train loss 0.3013142 +| epoch 8 | 7263/ 8400 batches | train loss 0.2499344 +| epoch 8 | 7267/ 8400 batches | train loss 0.4329385 +| epoch 8 | 7271/ 8400 batches | train loss 0.3775610 +| epoch 8 | 7275/ 8400 batches | train loss 0.3614981 +| epoch 8 | 7279/ 8400 batches | train loss 0.3122031 +| epoch 8 | 7283/ 8400 batches | train loss 0.3443003 +| epoch 8 | 7287/ 8400 batches | train loss 0.3531030 +| epoch 8 | 7291/ 8400 batches | train loss 0.4597276 +| epoch 8 | 7295/ 8400 batches | train loss 0.3232827 +| epoch 8 | 7299/ 8400 batches | train loss 0.4223171 +| epoch 8 | 7303/ 8400 batches | train loss 0.3497031 +| epoch 8 | 7307/ 8400 batches | train loss 0.3765180 +| epoch 8 | 7311/ 8400 batches | train loss 0.3864052 +| epoch 8 | 7315/ 8400 batches | train loss 0.2929621 +| epoch 8 | 7319/ 8400 batches | train loss 0.3680891 +| epoch 8 | 7323/ 8400 batches | train loss 0.3335736 +| epoch 8 | 7327/ 8400 batches | train loss 0.3994757 +| epoch 8 | 7331/ 8400 batches | train loss 0.4230407 +| epoch 8 | 7335/ 8400 batches | train loss 0.2786121 +| epoch 8 | 7339/ 8400 batches | train loss 0.3482569 +| epoch 8 | 7343/ 8400 batches | train loss 0.3929546 +| epoch 8 | 7347/ 8400 batches | train loss 0.3653603 +| epoch 8 | 7351/ 8400 batches | train loss 0.3595222 +| epoch 8 | 7355/ 8400 batches | train loss 0.3487655 +| epoch 8 | 7359/ 8400 batches | train loss 0.3291833 +| epoch 8 | 7363/ 8400 batches | train loss 0.3302412 +| epoch 8 | 7367/ 8400 batches | train loss 0.3314525 +| epoch 8 | 7371/ 8400 batches | train loss 0.3196090 +| epoch 8 | 7375/ 8400 batches | train loss 0.3664743 +| epoch 8 | 7379/ 8400 batches | train loss 0.3182037 +| epoch 8 | 7383/ 8400 batches | train loss 0.3575220 +| epoch 8 | 7387/ 8400 batches | train loss 0.4008551 +| epoch 8 | 7391/ 8400 batches | train loss 0.3486897 +| epoch 8 | 7395/ 8400 batches | train loss 0.4101448 +| epoch 8 | 7399/ 8400 batches | train loss 0.3231558 +| epoch 8 | 7403/ 8400 batches | train loss 0.3776822 +| epoch 8 | 7407/ 8400 batches | train loss 0.3111365 +| epoch 8 | 7411/ 8400 batches | train loss 0.3565053 +| epoch 8 | 7415/ 8400 batches | train loss 0.2904812 +| epoch 8 | 7419/ 8400 batches | train loss 0.3554876 +| epoch 8 | 7423/ 8400 batches | train loss 0.3780611 +| epoch 8 | 7427/ 8400 batches | train loss 0.3137201 +| epoch 8 | 7431/ 8400 batches | train loss 0.3618731 +| epoch 8 | 7435/ 8400 batches | train loss 0.3296598 +| epoch 8 | 7439/ 8400 batches | train loss 0.3588650 +| epoch 8 | 7443/ 8400 batches | train loss 0.4114545 +| epoch 8 | 7447/ 8400 batches | train loss 0.3697461 +| epoch 8 | 7451/ 8400 batches | train loss 0.3812111 +| epoch 8 | 7455/ 8400 batches | train loss 0.4277267 +| epoch 8 | 7459/ 8400 batches | train loss 0.3374520 +| epoch 8 | 7463/ 8400 batches | train loss 0.3227443 +| epoch 8 | 7467/ 8400 batches | train loss 0.3452987 +| epoch 8 | 7471/ 8400 batches | train loss 0.3422436 +| epoch 8 | 7475/ 8400 batches | train loss 0.2908003 +| epoch 8 | 7479/ 8400 batches | train loss 0.4113241 +| epoch 8 | 7483/ 8400 batches | train loss 0.3574488 +| epoch 8 | 7487/ 8400 batches | train loss 0.3070895 +| epoch 8 | 7491/ 8400 batches | train loss 0.3840624 +| epoch 8 | 7495/ 8400 batches | train loss 0.4312215 +| epoch 8 | 7499/ 8400 batches | train loss 0.3491190 +| epoch 8 | 7503/ 8400 batches | train loss 0.3146074 +| epoch 8 | 7507/ 8400 batches | train loss 0.3218164 +| epoch 8 | 7511/ 8400 batches | train loss 0.3364982 +| epoch 8 | 7515/ 8400 batches | train loss 0.3783267 +| epoch 8 | 7519/ 8400 batches | train loss 0.3549748 +| epoch 8 | 7523/ 8400 batches | train loss 0.3463119 +| epoch 8 | 7527/ 8400 batches | train loss 0.3882712 +| epoch 8 | 7531/ 8400 batches | train loss 0.4229192 +| epoch 8 | 7535/ 8400 batches | train loss 0.3051857 +| epoch 8 | 7539/ 8400 batches | train loss 0.3744028 +| epoch 8 | 7543/ 8400 batches | train loss 0.3565782 +| epoch 8 | 7547/ 8400 batches | train loss 0.4080788 +| epoch 8 | 7551/ 8400 batches | train loss 0.3070473 +| epoch 8 | 7555/ 8400 batches | train loss 0.3523968 +| epoch 8 | 7559/ 8400 batches | train loss 0.4033939 +| epoch 8 | 7563/ 8400 batches | train loss 0.3456534 +| epoch 8 | 7567/ 8400 batches | train loss 0.3836311 +| epoch 8 | 7571/ 8400 batches | train loss 0.3740845 +| epoch 8 | 7575/ 8400 batches | train loss 0.2842823 +| epoch 8 | 7579/ 8400 batches | train loss 0.4181046 +| epoch 8 | 7583/ 8400 batches | train loss 0.3830004 +| epoch 8 | 7587/ 8400 batches | train loss 0.3487377 +| epoch 8 | 7591/ 8400 batches | train loss 0.3166705 +| epoch 8 | 7595/ 8400 batches | train loss 0.4404062 +| epoch 8 | 7599/ 8400 batches | train loss 0.3623444 +| epoch 8 | 7603/ 8400 batches | train loss 0.2058398 +| epoch 8 | 7607/ 8400 batches | train loss 0.3565672 +| epoch 8 | 7611/ 8400 batches | train loss 0.2982060 +| epoch 8 | 7615/ 8400 batches | train loss 0.3651317 +| epoch 8 | 7619/ 8400 batches | train loss 0.3384768 +| epoch 8 | 7623/ 8400 batches | train loss 0.3353129 +| epoch 8 | 7627/ 8400 batches | train loss 0.3047106 +| epoch 8 | 7631/ 8400 batches | train loss 0.3578354 +| epoch 8 | 7635/ 8400 batches | train loss 0.3824420 +| epoch 8 | 7639/ 8400 batches | train loss 0.3287596 +| epoch 8 | 7643/ 8400 batches | train loss 0.3618009 +| epoch 8 | 7647/ 8400 batches | train loss 0.3539975 +| epoch 8 | 7651/ 8400 batches | train loss 0.3613171 +| epoch 8 | 7655/ 8400 batches | train loss 0.3701386 +| epoch 8 | 7659/ 8400 batches | train loss 0.3546845 +| epoch 8 | 7663/ 8400 batches | train loss 0.3254641 +| epoch 8 | 7667/ 8400 batches | train loss 0.3977781 +| epoch 8 | 7671/ 8400 batches | train loss 0.3671267 +| epoch 8 | 7675/ 8400 batches | train loss 0.3810965 +| epoch 8 | 7679/ 8400 batches | train loss 0.4010487 +| epoch 8 | 7683/ 8400 batches | train loss 0.3230774 +| epoch 8 | 7687/ 8400 batches | train loss 0.3106061 +| epoch 8 | 7691/ 8400 batches | train loss 0.3189676 +| epoch 8 | 7695/ 8400 batches | train loss 0.3495589 +| epoch 8 | 7699/ 8400 batches | train loss 0.3675837 +| epoch 8 | 7703/ 8400 batches | train loss 0.3119686 +| epoch 8 | 7707/ 8400 batches | train loss 0.3421156 +| epoch 8 | 7711/ 8400 batches | train loss 0.3588783 +| epoch 8 | 7715/ 8400 batches | train loss 0.3807653 +| epoch 8 | 7719/ 8400 batches | train loss 0.3772618 +| epoch 8 | 7723/ 8400 batches | train loss 0.3544447 +| epoch 8 | 7727/ 8400 batches | train loss 0.3016643 +| epoch 8 | 7731/ 8400 batches | train loss 0.2988490 +| epoch 8 | 7735/ 8400 batches | train loss 0.3372072 +| epoch 8 | 7739/ 8400 batches | train loss 0.3659935 +| epoch 8 | 7743/ 8400 batches | train loss 0.2790001 +| epoch 8 | 7747/ 8400 batches | train loss 0.3285217 +| epoch 8 | 7751/ 8400 batches | train loss 0.3982518 +| epoch 8 | 7755/ 8400 batches | train loss 0.3861871 +| epoch 8 | 7759/ 8400 batches | train loss 0.3128250 +| epoch 8 | 7763/ 8400 batches | train loss 0.4549752 +| epoch 8 | 7767/ 8400 batches | train loss 0.3985169 +| epoch 8 | 7771/ 8400 batches | train loss 0.3966108 +| epoch 8 | 7775/ 8400 batches | train loss 0.4204957 +| epoch 8 | 7779/ 8400 batches | train loss 0.3943366 +| epoch 8 | 7783/ 8400 batches | train loss 0.3432159 +| epoch 8 | 7787/ 8400 batches | train loss 0.3733190 +| epoch 8 | 7791/ 8400 batches | train loss 0.4267968 +| epoch 8 | 7795/ 8400 batches | train loss 0.4174512 +| epoch 8 | 7799/ 8400 batches | train loss 0.3620458 +| epoch 8 | 7803/ 8400 batches | train loss 0.3208566 +| epoch 8 | 7807/ 8400 batches | train loss 0.3911955 +| epoch 8 | 7811/ 8400 batches | train loss 0.3570813 +| epoch 8 | 7815/ 8400 batches | train loss 0.3302709 +| epoch 8 | 7819/ 8400 batches | train loss 0.3448925 +| epoch 8 | 7823/ 8400 batches | train loss 0.3868863 +| epoch 8 | 7827/ 8400 batches | train loss 0.3366866 +| epoch 8 | 7831/ 8400 batches | train loss 0.3791296 +| epoch 8 | 7835/ 8400 batches | train loss 0.3369840 +| epoch 8 | 7839/ 8400 batches | train loss 0.3354814 +| epoch 8 | 7843/ 8400 batches | train loss 0.3163829 +| epoch 8 | 7847/ 8400 batches | train loss 0.3084259 +| epoch 8 | 7851/ 8400 batches | train loss 0.3337443 +| epoch 8 | 7855/ 8400 batches | train loss 0.4193025 +| epoch 8 | 7859/ 8400 batches | train loss 0.3460426 +| epoch 8 | 7863/ 8400 batches | train loss 0.3473542 +| epoch 8 | 7867/ 8400 batches | train loss 0.3237718 +| epoch 8 | 7871/ 8400 batches | train loss 0.4004701 +| epoch 8 | 7875/ 8400 batches | train loss 0.3538990 +| epoch 8 | 7879/ 8400 batches | train loss 0.3011991 +| epoch 8 | 7883/ 8400 batches | train loss 0.3523005 +| epoch 8 | 7887/ 8400 batches | train loss 0.3239350 +| epoch 8 | 7891/ 8400 batches | train loss 0.3312287 +| epoch 8 | 7895/ 8400 batches | train loss 0.3554001 +| epoch 8 | 7899/ 8400 batches | train loss 0.3554882 +| epoch 8 | 7903/ 8400 batches | train loss 0.3563572 +| epoch 8 | 7907/ 8400 batches | train loss 0.3557239 +| epoch 8 | 7911/ 8400 batches | train loss 0.3243682 +| epoch 8 | 7915/ 8400 batches | train loss 0.3922649 +| epoch 8 | 7919/ 8400 batches | train loss 0.3571078 +| epoch 8 | 7923/ 8400 batches | train loss 0.4157261 +| epoch 8 | 7927/ 8400 batches | train loss 0.3597146 +| epoch 8 | 7931/ 8400 batches | train loss 0.3395890 +| epoch 8 | 7935/ 8400 batches | train loss 0.3798242 +| epoch 8 | 7939/ 8400 batches | train loss 0.3081833 +| epoch 8 | 7943/ 8400 batches | train loss 0.3636838 +| epoch 8 | 7947/ 8400 batches | train loss 0.4182293 +| epoch 8 | 7951/ 8400 batches | train loss 0.3918582 +| epoch 8 | 7955/ 8400 batches | train loss 0.3732938 +| epoch 8 | 7959/ 8400 batches | train loss 0.3558220 +| epoch 8 | 7963/ 8400 batches | train loss 0.3427130 +| epoch 8 | 7967/ 8400 batches | train loss 0.3930647 +| epoch 8 | 7971/ 8400 batches | train loss 0.3208134 +| epoch 8 | 7975/ 8400 batches | train loss 0.3556152 +| epoch 8 | 7979/ 8400 batches | train loss 0.3143442 +| epoch 8 | 7983/ 8400 batches | train loss 0.3143499 +| epoch 8 | 7987/ 8400 batches | train loss 0.3822525 +| epoch 8 | 7991/ 8400 batches | train loss 0.3394270 +| epoch 8 | 7995/ 8400 batches | train loss 0.3153817 +| epoch 8 | 7999/ 8400 batches | train loss 0.3568925 +| epoch 8 | 8003/ 8400 batches | train loss 0.3757803 +| epoch 8 | 8007/ 8400 batches | train loss 0.4118865 +| epoch 8 | 8011/ 8400 batches | train loss 0.3730631 +| epoch 8 | 8015/ 8400 batches | train loss 0.3782361 +| epoch 8 | 8019/ 8400 batches | train loss 0.3837706 +| epoch 8 | 8023/ 8400 batches | train loss 0.3460773 +| epoch 8 | 8027/ 8400 batches | train loss 0.3514241 +| epoch 8 | 8031/ 8400 batches | train loss 0.3142492 +| epoch 8 | 8035/ 8400 batches | train loss 0.3367750 +| epoch 8 | 8039/ 8400 batches | train loss 0.3795577 +| epoch 8 | 8043/ 8400 batches | train loss 0.3665130 +| epoch 8 | 8047/ 8400 batches | train loss 0.3371054 +| epoch 8 | 8051/ 8400 batches | train loss 0.3622908 +| epoch 8 | 8055/ 8400 batches | train loss 0.3384777 +| epoch 8 | 8059/ 8400 batches | train loss 0.3350184 +| epoch 8 | 8063/ 8400 batches | train loss 0.4198439 +| epoch 8 | 8067/ 8400 batches | train loss 0.3489892 +| epoch 8 | 8071/ 8400 batches | train loss 0.3328254 +| epoch 8 | 8075/ 8400 batches | train loss 0.3827580 +| epoch 8 | 8079/ 8400 batches | train loss 0.3702833 +| epoch 8 | 8083/ 8400 batches | train loss 0.3528488 +| epoch 8 | 8087/ 8400 batches | train loss 0.3116300 +| epoch 8 | 8091/ 8400 batches | train loss 0.3428730 +| epoch 8 | 8095/ 8400 batches | train loss 0.3606740 +| epoch 8 | 8099/ 8400 batches | train loss 0.3540733 +| epoch 8 | 8103/ 8400 batches | train loss 0.2507220 +| epoch 8 | 8107/ 8400 batches | train loss 0.3766711 +| epoch 8 | 8111/ 8400 batches | train loss 0.3180178 +| epoch 8 | 8115/ 8400 batches | train loss 0.3680905 +| epoch 8 | 8119/ 8400 batches | train loss 0.3914878 +| epoch 8 | 8123/ 8400 batches | train loss 0.4080120 +| epoch 8 | 8127/ 8400 batches | train loss 0.3557842 +| epoch 8 | 8131/ 8400 batches | train loss 0.3427559 +| epoch 8 | 8135/ 8400 batches | train loss 0.2929958 +| epoch 8 | 8139/ 8400 batches | train loss 0.4203033 +| epoch 8 | 8143/ 8400 batches | train loss 0.3414567 +| epoch 8 | 8147/ 8400 batches | train loss 0.3790156 +| epoch 8 | 8151/ 8400 batches | train loss 0.3565180 +| epoch 8 | 8155/ 8400 batches | train loss 0.2587392 +| epoch 8 | 8159/ 8400 batches | train loss 0.3839724 +| epoch 8 | 8163/ 8400 batches | train loss 0.3128532 +| epoch 8 | 8167/ 8400 batches | train loss 0.2250061 +| epoch 8 | 8171/ 8400 batches | train loss 0.3348401 +| epoch 8 | 8175/ 8400 batches | train loss 0.3572493 +| epoch 8 | 8179/ 8400 batches | train loss 0.3158959 +| epoch 8 | 8183/ 8400 batches | train loss 0.3207131 +| epoch 8 | 8187/ 8400 batches | train loss 0.4089955 +| epoch 8 | 8191/ 8400 batches | train loss 0.3147066 +| epoch 8 | 8195/ 8400 batches | train loss 0.4169355 +| epoch 8 | 8199/ 8400 batches | train loss 0.3174124 +| epoch 8 | 8203/ 8400 batches | train loss 0.3258019 +| epoch 8 | 8207/ 8400 batches | train loss 0.3523862 +| epoch 8 | 8211/ 8400 batches | train loss 0.3608778 +| epoch 8 | 8215/ 8400 batches | train loss 0.3513839 +| epoch 8 | 8219/ 8400 batches | train loss 0.3955310 +| epoch 8 | 8223/ 8400 batches | train loss 0.4102287 +| epoch 8 | 8227/ 8400 batches | train loss 0.3476565 +| epoch 8 | 8231/ 8400 batches | train loss 0.3562444 +| epoch 8 | 8235/ 8400 batches | train loss 0.3133256 +| epoch 8 | 8239/ 8400 batches | train loss 0.3668126 +| epoch 8 | 8243/ 8400 batches | train loss 0.3470498 +| epoch 8 | 8247/ 8400 batches | train loss 0.2810302 +| epoch 8 | 8251/ 8400 batches | train loss 0.3789302 +| epoch 8 | 8255/ 8400 batches | train loss 0.3332199 +| epoch 8 | 8259/ 8400 batches | train loss 0.3523913 +| epoch 8 | 8263/ 8400 batches | train loss 0.4045463 +| epoch 8 | 8267/ 8400 batches | train loss 0.3365854 +| epoch 8 | 8271/ 8400 batches | train loss 0.3253891 +| epoch 8 | 8275/ 8400 batches | train loss 0.4402730 +| epoch 8 | 8279/ 8400 batches | train loss 0.3358389 +| epoch 8 | 8283/ 8400 batches | train loss 0.2795451 +| epoch 8 | 8287/ 8400 batches | train loss 0.3868178 +| epoch 8 | 8291/ 8400 batches | train loss 0.3470951 +| epoch 8 | 8295/ 8400 batches | train loss 0.3318123 +| epoch 8 | 8299/ 8400 batches | train loss 0.3141323 +| epoch 8 | 8303/ 8400 batches | train loss 0.3719186 +| epoch 8 | 8307/ 8400 batches | train loss 0.4166727 +| epoch 8 | 8311/ 8400 batches | train loss 0.3593553 +| epoch 8 | 8315/ 8400 batches | train loss 0.3855119 +| epoch 8 | 8319/ 8400 batches | train loss 0.3158653 +| epoch 8 | 8323/ 8400 batches | train loss 0.3535117 +| epoch 8 | 8327/ 8400 batches | train loss 0.3899300 +| epoch 8 | 8331/ 8400 batches | train loss 0.3947971 +| epoch 8 | 8335/ 8400 batches | train loss 0.3015273 +| epoch 8 | 8339/ 8400 batches | train loss 0.2835431 +| epoch 8 | 8343/ 8400 batches | train loss 0.3761566 +| epoch 8 | 8347/ 8400 batches | train loss 0.3653713 +| epoch 8 | 8351/ 8400 batches | train loss 0.3534931 +| epoch 8 | 8355/ 8400 batches | train loss 0.2968193 +| epoch 8 | 8359/ 8400 batches | train loss 0.2947460 +| epoch 8 | 8363/ 8400 batches | train loss 0.3428546 +| epoch 8 | 8367/ 8400 batches | train loss 0.3473474 +| epoch 8 | 8371/ 8400 batches | train loss 0.3569537 +| epoch 8 | 8375/ 8400 batches | train loss 0.3582101 +| epoch 8 | 8379/ 8400 batches | train loss 0.3736831 +| epoch 8 | 8383/ 8400 batches | train loss 0.3608275 +| epoch 8 | 8387/ 8400 batches | train loss 0.3869843 +| epoch 8 | 8391/ 8400 batches | train loss 0.3173078 +| epoch 8 | 8395/ 8400 batches | train loss 0.3793920 +| epoch 8 | 8399/ 8400 batches | train loss 0.3999858 +-------------------------------------------------------------------------------- +| epoch 8 | 3/ 8400 batches | test loss 0.5980332 +| epoch 8 | 7/ 8400 batches | test loss 0.5765646 +| epoch 8 | 11/ 8400 batches | test loss 0.4453132 +| epoch 8 | 15/ 8400 batches | test loss 0.3644663 +| epoch 8 | 19/ 8400 batches | test loss 0.4621227 +| epoch 8 | 23/ 8400 batches | test loss 0.4767115 +| epoch 8 | 27/ 8400 batches | test loss 0.4146690 +| epoch 8 | 31/ 8400 batches | test loss 0.3502055 +| epoch 8 | 35/ 8400 batches | test loss 0.3405299 +| epoch 8 | 39/ 8400 batches | test loss 0.5923933 +| epoch 8 | 43/ 8400 batches | test loss 0.4789510 +| epoch 8 | 47/ 8400 batches | test loss 0.3432025 +| epoch 8 | 51/ 8400 batches | test loss 0.6189808 +| epoch 8 | 55/ 8400 batches | test loss 0.4455601 +| epoch 8 | 59/ 8400 batches | test loss 0.7249690 +| epoch 8 | 63/ 8400 batches | test loss 0.5666389 +| epoch 8 | 67/ 8400 batches | test loss 0.5418453 +| epoch 8 | 71/ 8400 batches | test loss 0.4673611 +| epoch 8 | 75/ 8400 batches | test loss 0.4391261 +| epoch 8 | 79/ 8400 batches | test loss 0.3964486 +| epoch 8 | 83/ 8400 batches | test loss 0.3835450 +| epoch 8 | 87/ 8400 batches | test loss 0.4638911 +| epoch 8 | 91/ 8400 batches | test loss 0.6762936 +| epoch 8 | 95/ 8400 batches | test loss 0.2953546 +| epoch 8 | 99/ 8400 batches | test loss 0.4710506 +| epoch 8 | 103/ 8400 batches | test loss 0.4196406 +| epoch 8 | 107/ 8400 batches | test loss 0.7084993 +| epoch 8 | 111/ 8400 batches | test loss 0.3449117 +| epoch 8 | 115/ 8400 batches | test loss 0.5125947 +| epoch 8 | 119/ 8400 batches | test loss 0.3576939 +| epoch 8 | 123/ 8400 batches | test loss 0.4964103 +| epoch 8 | 127/ 8400 batches | test loss 0.3345577 +| epoch 8 | 131/ 8400 batches | test loss 0.3151582 +| epoch 8 | 135/ 8400 batches | test loss 0.3670452 +| epoch 8 | 139/ 8400 batches | test loss 0.3620602 +| epoch 8 | 143/ 8400 batches | test loss 0.4226945 +| epoch 8 | 147/ 8400 batches | test loss 0.5126703 +| epoch 8 | 151/ 8400 batches | test loss 0.5135710 +| epoch 8 | 155/ 8400 batches | test loss 0.3873616 +| epoch 8 | 159/ 8400 batches | test loss 0.3661804 +| epoch 8 | 163/ 8400 batches | test loss 0.4377173 +| epoch 8 | 167/ 8400 batches | test loss 0.4545079 +| epoch 8 | 171/ 8400 batches | test loss 0.4759184 +| epoch 8 | 175/ 8400 batches | test loss 0.5109586 +| epoch 8 | 179/ 8400 batches | test loss 0.5643364 +| epoch 8 | 183/ 8400 batches | test loss 0.4088925 +| epoch 8 | 187/ 8400 batches | test loss 0.4684584 +| epoch 8 | 191/ 8400 batches | test loss 0.3475947 +| epoch 8 | 195/ 8400 batches | test loss 0.4269643 +| epoch 8 | 199/ 8400 batches | test loss 0.4705731 +| epoch 8 | 203/ 8400 batches | test loss 0.5173001 +| epoch 8 | 207/ 8400 batches | test loss 0.4121675 +| epoch 8 | 211/ 8400 batches | test loss 0.3558716 +| epoch 8 | 215/ 8400 batches | test loss 0.4273351 +| epoch 8 | 219/ 8400 batches | test loss 0.4667897 +| epoch 8 | 223/ 8400 batches | test loss 0.5130323 +| epoch 8 | 227/ 8400 batches | test loss 0.3666321 +| epoch 8 | 231/ 8400 batches | test loss 0.5048759 +| epoch 8 | 235/ 8400 batches | test loss 0.4287907 +| epoch 8 | 239/ 8400 batches | test loss 0.4261602 +| epoch 8 | 243/ 8400 batches | test loss 0.3734347 +| epoch 8 | 247/ 8400 batches | test loss 0.4031735 +| epoch 8 | 251/ 8400 batches | test loss 0.6789122 +| epoch 8 | 255/ 8400 batches | test loss 0.4809833 +| epoch 8 | 259/ 8400 batches | test loss 0.3053779 +| epoch 8 | 263/ 8400 batches | test loss 0.3527602 +| epoch 8 | 267/ 8400 batches | test loss 0.4516613 +| epoch 8 | 271/ 8400 batches | test loss 0.4877827 +| epoch 8 | 275/ 8400 batches | test loss 0.3258863 +| epoch 8 | 279/ 8400 batches | test loss 0.4018175 +| epoch 8 | 283/ 8400 batches | test loss 0.3660154 +| epoch 8 | 287/ 8400 batches | test loss 0.6982912 +| epoch 8 | 291/ 8400 batches | test loss 0.4044665 +| epoch 8 | 295/ 8400 batches | test loss 0.5223361 +| epoch 8 | 299/ 8400 batches | test loss 0.3160685 +| epoch 8 | 303/ 8400 batches | test loss 0.4457172 +| epoch 8 | 307/ 8400 batches | test loss 0.4596602 +| epoch 8 | 311/ 8400 batches | test loss 0.4467611 +| epoch 8 | 315/ 8400 batches | test loss 0.3906769 +| epoch 8 | 319/ 8400 batches | test loss 0.4107261 +| epoch 8 | 323/ 8400 batches | test loss 0.4733877 +| epoch 8 | 327/ 8400 batches | test loss 0.4179339 +| epoch 8 | 331/ 8400 batches | test loss 0.4162495 +| epoch 8 | 335/ 8400 batches | test loss 0.3880033 +| epoch 8 | 339/ 8400 batches | test loss 0.6466191 +| epoch 8 | 343/ 8400 batches | test loss 0.6222724 +| epoch 8 | 347/ 8400 batches | test loss 0.4222047 +| epoch 8 | 351/ 8400 batches | test loss 0.4747229 +| epoch 8 | 355/ 8400 batches | test loss 0.3846297 +| epoch 8 | 359/ 8400 batches | test loss 0.3681296 +| epoch 8 | 363/ 8400 batches | test loss 0.3527287 +| epoch 8 | 367/ 8400 batches | test loss 0.4088515 +| epoch 8 | 371/ 8400 batches | test loss 0.4933395 +| epoch 8 | 375/ 8400 batches | test loss 0.4015310 +| epoch 8 | 379/ 8400 batches | test loss 0.4785019 +| epoch 8 | 383/ 8400 batches | test loss 0.4524062 +| epoch 8 | 387/ 8400 batches | test loss 0.4316000 +| epoch 8 | 391/ 8400 batches | test loss 0.4180271 +| epoch 8 | 395/ 8400 batches | test loss 0.4422097 +| epoch 8 | 399/ 8400 batches | test loss 0.3806477 +| epoch 8 | 403/ 8400 batches | test loss 0.3829390 +| epoch 8 | 407/ 8400 batches | test loss 0.3833026 +| epoch 8 | 411/ 8400 batches | test loss 0.5146159 +| epoch 8 | 415/ 8400 batches | test loss 0.4918464 +| epoch 8 | 419/ 8400 batches | test loss 0.4859025 +| epoch 8 | 423/ 8400 batches | test loss 0.4388382 +| epoch 8 | 427/ 8400 batches | test loss 0.4145145 +| epoch 8 | 431/ 8400 batches | test loss 0.4719934 +| epoch 8 | 435/ 8400 batches | test loss 0.4017995 +| epoch 8 | 439/ 8400 batches | test loss 0.5626466 +| epoch 8 | 443/ 8400 batches | test loss 0.3892830 +| epoch 8 | 447/ 8400 batches | test loss 0.4951601 +| epoch 8 | 451/ 8400 batches | test loss 0.4437006 +| epoch 8 | 455/ 8400 batches | test loss 0.4339145 +| epoch 8 | 459/ 8400 batches | test loss 0.3732377 +| epoch 8 | 463/ 8400 batches | test loss 0.3861226 +| epoch 8 | 467/ 8400 batches | test loss 0.4773818 +| epoch 8 | 471/ 8400 batches | test loss 0.3566623 +| epoch 8 | 475/ 8400 batches | test loss 0.5160329 +| epoch 8 | 479/ 8400 batches | test loss 0.5679650 +| epoch 8 | 483/ 8400 batches | test loss 0.5727440 +| epoch 8 | 487/ 8400 batches | test loss 0.4534502 +| epoch 8 | 491/ 8400 batches | test loss 0.4880247 +| epoch 8 | 495/ 8400 batches | test loss 0.3632348 +| epoch 8 | 499/ 8400 batches | test loss 0.3730973 +| epoch 8 | 503/ 8400 batches | test loss 0.4244583 +| epoch 8 | 507/ 8400 batches | test loss 0.4142438 +| epoch 8 | 511/ 8400 batches | test loss 0.5192259 +| epoch 8 | 515/ 8400 batches | test loss 0.5162406 +| epoch 8 | 519/ 8400 batches | test loss 0.4284669 +| epoch 8 | 523/ 8400 batches | test loss 0.4139411 +| epoch 8 | 527/ 8400 batches | test loss 0.4084975 +| epoch 8 | 531/ 8400 batches | test loss 0.4106021 +| epoch 8 | 535/ 8400 batches | test loss 0.5117186 +| epoch 8 | 539/ 8400 batches | test loss 0.4778699 +| epoch 8 | 543/ 8400 batches | test loss 0.3499011 +| epoch 8 | 547/ 8400 batches | test loss 0.5405711 +| epoch 8 | 551/ 8400 batches | test loss 0.4666303 +| epoch 8 | 555/ 8400 batches | test loss 0.4197089 +| epoch 8 | 559/ 8400 batches | test loss 0.6353084 +| epoch 8 | 563/ 8400 batches | test loss 0.3241445 +| epoch 8 | 567/ 8400 batches | test loss 0.3632627 +| epoch 8 | 571/ 8400 batches | test loss 0.5695753 +| epoch 8 | 575/ 8400 batches | test loss 0.3613821 +| epoch 8 | 579/ 8400 batches | test loss 0.4934397 +| epoch 8 | 583/ 8400 batches | test loss 0.4513557 +| epoch 8 | 587/ 8400 batches | test loss 0.5728298 +| epoch 8 | 591/ 8400 batches | test loss 0.3924658 +| epoch 8 | 595/ 8400 batches | test loss 0.4305130 +| epoch 8 | 599/ 8400 batches | test loss 0.5061707 +| epoch 8 | 603/ 8400 batches | test loss 0.4600466 +| epoch 8 | 607/ 8400 batches | test loss 0.4288385 +| epoch 8 | 611/ 8400 batches | test loss 0.4241679 +| epoch 8 | 615/ 8400 batches | test loss 0.4211500 +| epoch 8 | 619/ 8400 batches | test loss 0.4183493 +| epoch 8 | 623/ 8400 batches | test loss 0.5105280 +| epoch 8 | 627/ 8400 batches | test loss 0.4780542 +| epoch 8 | 631/ 8400 batches | test loss 0.3524082 +| epoch 8 | 635/ 8400 batches | test loss 0.4996530 +| epoch 8 | 639/ 8400 batches | test loss 0.4318788 +| epoch 8 | 643/ 8400 batches | test loss 0.4293109 +| epoch 8 | 647/ 8400 batches | test loss 0.3845249 +| epoch 8 | 651/ 8400 batches | test loss 0.5058539 +| epoch 8 | 655/ 8400 batches | test loss 0.3892244 +| epoch 8 | 659/ 8400 batches | test loss 0.4138174 +| epoch 8 | 663/ 8400 batches | test loss 0.3688887 +| epoch 8 | 667/ 8400 batches | test loss 0.5024318 +| epoch 8 | 671/ 8400 batches | test loss 0.5320073 +| epoch 8 | 675/ 8400 batches | test loss 0.2959883 +| epoch 8 | 679/ 8400 batches | test loss 0.3876220 +| epoch 8 | 683/ 8400 batches | test loss 0.4193316 +| epoch 8 | 687/ 8400 batches | test loss 0.3940581 +| epoch 8 | 691/ 8400 batches | test loss 0.5159639 +| epoch 8 | 695/ 8400 batches | test loss 0.4007686 +| epoch 8 | 699/ 8400 batches | test loss 0.3286194 +| epoch 8 | 703/ 8400 batches | test loss 0.4406280 +| epoch 8 | 707/ 8400 batches | test loss 0.3979554 +| epoch 8 | 711/ 8400 batches | test loss 0.6418554 +| epoch 8 | 715/ 8400 batches | test loss 0.3743005 +| epoch 8 | 719/ 8400 batches | test loss 0.3446597 +| epoch 8 | 723/ 8400 batches | test loss 0.3643764 +| epoch 8 | 727/ 8400 batches | test loss 0.3339314 +| epoch 8 | 731/ 8400 batches | test loss 0.3637889 +| epoch 8 | 735/ 8400 batches | test loss 0.3783703 +| epoch 8 | 739/ 8400 batches | test loss 0.4033459 +| epoch 8 | 743/ 8400 batches | test loss 0.4420279 +| epoch 8 | 747/ 8400 batches | test loss 0.3512138 +| epoch 8 | 751/ 8400 batches | test loss 0.3749422 +| epoch 8 | 755/ 8400 batches | test loss 0.4324713 +| epoch 8 | 759/ 8400 batches | test loss 0.3854509 +| epoch 8 | 763/ 8400 batches | test loss 0.3718292 +| epoch 8 | 767/ 8400 batches | test loss 0.3967043 +| epoch 8 | 771/ 8400 batches | test loss 0.4155603 +| epoch 8 | 775/ 8400 batches | test loss 0.3894488 +| epoch 8 | 779/ 8400 batches | test loss 0.4512891 +| epoch 8 | 783/ 8400 batches | test loss 0.3664466 +| epoch 8 | 787/ 8400 batches | test loss 0.3917063 +| epoch 8 | 791/ 8400 batches | test loss 0.4796072 +| epoch 8 | 795/ 8400 batches | test loss 0.3526701 +| epoch 8 | 799/ 8400 batches | test loss 0.5983311 +| epoch 8 | 803/ 8400 batches | test loss 0.5341004 +| epoch 8 | 807/ 8400 batches | test loss 0.4471461 +| epoch 8 | 811/ 8400 batches | test loss 0.3811093 +| epoch 8 | 815/ 8400 batches | test loss 0.4564482 +| epoch 8 | 819/ 8400 batches | test loss 0.3520999 +| epoch 8 | 823/ 8400 batches | test loss 0.5443872 +| epoch 8 | 827/ 8400 batches | test loss 0.4734344 +| epoch 8 | 831/ 8400 batches | test loss 0.3951095 +| epoch 8 | 835/ 8400 batches | test loss 0.4278785 +| epoch 8 | 839/ 8400 batches | test loss 0.4734046 +| epoch 8 | 843/ 8400 batches | test loss 0.4965963 +| epoch 8 | 847/ 8400 batches | test loss 0.5255193 +| epoch 8 | 851/ 8400 batches | test loss 0.4679427 +| epoch 8 | 855/ 8400 batches | test loss 0.4540799 +| epoch 8 | 859/ 8400 batches | test loss 0.4424646 +| epoch 8 | 863/ 8400 batches | test loss 0.3684250 +| epoch 8 | 867/ 8400 batches | test loss 0.3867735 +| epoch 8 | 871/ 8400 batches | test loss 0.3758761 +| epoch 8 | 875/ 8400 batches | test loss 0.4523039 +| epoch 8 | 879/ 8400 batches | test loss 0.4776633 +| epoch 8 | 883/ 8400 batches | test loss 0.3481870 +| epoch 8 | 887/ 8400 batches | test loss 0.4099262 +| epoch 8 | 891/ 8400 batches | test loss 0.4495323 +| epoch 8 | 895/ 8400 batches | test loss 0.3905471 +| epoch 8 | 899/ 8400 batches | test loss 0.3820661 +| epoch 8 | 903/ 8400 batches | test loss 0.4259493 +| epoch 8 | 907/ 8400 batches | test loss 0.3945021 +| epoch 8 | 911/ 8400 batches | test loss 0.3821623 +| epoch 8 | 915/ 8400 batches | test loss 0.4592469 +| epoch 8 | 919/ 8400 batches | test loss 0.4062149 +| epoch 8 | 923/ 8400 batches | test loss 0.6285134 +| epoch 8 | 927/ 8400 batches | test loss 0.3755247 +| epoch 8 | 931/ 8400 batches | test loss 0.4426674 +| epoch 8 | 935/ 8400 batches | test loss 0.4328965 +| epoch 8 | 939/ 8400 batches | test loss 0.6630253 +| epoch 8 | 943/ 8400 batches | test loss 0.4352763 +| epoch 8 | 947/ 8400 batches | test loss 0.3709785 +| epoch 8 | 951/ 8400 batches | test loss 0.4187186 +| epoch 8 | 955/ 8400 batches | test loss 0.4030324 +| epoch 8 | 959/ 8400 batches | test loss 1.0240088 +| epoch 8 | 963/ 8400 batches | test loss 0.3945298 +| epoch 8 | 967/ 8400 batches | test loss 0.4150437 +| epoch 8 | 971/ 8400 batches | test loss 0.4389111 +| epoch 8 | 975/ 8400 batches | test loss 0.3888384 +| epoch 8 | 979/ 8400 batches | test loss 0.4013139 +| epoch 8 | 983/ 8400 batches | test loss 0.4116736 +| epoch 8 | 987/ 8400 batches | test loss 0.5698491 +| epoch 8 | 991/ 8400 batches | test loss 0.3592734 +| epoch 8 | 995/ 8400 batches | test loss 0.3886226 +| epoch 8 | 999/ 8400 batches | test loss 0.3687446 +| epoch 8 | 1003/ 8400 batches | test loss 0.4040951 +| epoch 8 | 1007/ 8400 batches | test loss 0.4119092 +| epoch 8 | 1011/ 8400 batches | test loss 0.4951223 +| epoch 8 | 1015/ 8400 batches | test loss 0.4004073 +| epoch 8 | 1019/ 8400 batches | test loss 0.4586362 +| epoch 8 | 1023/ 8400 batches | test loss 0.4117157 +| epoch 8 | 1027/ 8400 batches | test loss 0.4092604 +| epoch 8 | 1031/ 8400 batches | test loss 0.3548702 +| epoch 8 | 1035/ 8400 batches | test loss 0.4815459 +| epoch 8 | 1039/ 8400 batches | test loss 0.4308130 +| epoch 8 | 1043/ 8400 batches | test loss 0.3639607 +| epoch 8 | 1047/ 8400 batches | test loss 0.4394014 +| epoch 8 | 1051/ 8400 batches | test loss 0.1670796 +| epoch 8 | 1055/ 8400 batches | test loss 0.3667426 +| epoch 8 | 1059/ 8400 batches | test loss 0.4191999 +| epoch 8 | 1063/ 8400 batches | test loss 0.3179731 +| epoch 8 | 1067/ 8400 batches | test loss 0.5206550 +| epoch 8 | 1071/ 8400 batches | test loss 0.3855050 +| epoch 8 | 1075/ 8400 batches | test loss 0.4382372 +| epoch 8 | 1079/ 8400 batches | test loss 0.5027814 +| epoch 8 | 1083/ 8400 batches | test loss 0.4091581 +| epoch 8 | 1087/ 8400 batches | test loss 0.6596490 +| epoch 8 | 1091/ 8400 batches | test loss 0.5374972 +| epoch 8 | 1095/ 8400 batches | test loss 0.4380425 +| epoch 8 | 1099/ 8400 batches | test loss 0.4344826 +| epoch 8 | 1103/ 8400 batches | test loss 0.3695555 +| epoch 8 | 1107/ 8400 batches | test loss 0.4834972 +| epoch 8 | 1111/ 8400 batches | test loss 0.3788611 +| epoch 8 | 1115/ 8400 batches | test loss 0.4460495 +| epoch 8 | 1119/ 8400 batches | test loss 0.4962627 +| epoch 8 | 1123/ 8400 batches | test loss 0.4988581 +| epoch 8 | 1127/ 8400 batches | test loss 0.4373077 +| epoch 8 | 1131/ 8400 batches | test loss 0.5982248 +| epoch 8 | 1135/ 8400 batches | test loss 0.4056241 +| epoch 8 | 1139/ 8400 batches | test loss 0.5321117 +| epoch 8 | 1143/ 8400 batches | test loss 0.4539086 +| epoch 8 | 1147/ 8400 batches | test loss 0.4539034 +| epoch 8 | 1151/ 8400 batches | test loss 0.3786791 +| epoch 8 | 1155/ 8400 batches | test loss 0.4382146 +| epoch 8 | 1159/ 8400 batches | test loss 0.3596978 +| epoch 8 | 1163/ 8400 batches | test loss 0.5657842 +| epoch 8 | 1167/ 8400 batches | test loss 0.4060059 +| epoch 8 | 1171/ 8400 batches | test loss 0.5328056 +| epoch 8 | 1175/ 8400 batches | test loss 0.3534881 +| epoch 8 | 1179/ 8400 batches | test loss 0.3969670 +| epoch 8 | 1183/ 8400 batches | test loss 0.4681723 +| epoch 8 | 1187/ 8400 batches | test loss 0.4691845 +| epoch 8 | 1191/ 8400 batches | test loss 0.4953007 +| epoch 8 | 1195/ 8400 batches | test loss 0.4748509 +| epoch 8 | 1199/ 8400 batches | test loss 0.4931989 +| epoch 8 | 1203/ 8400 batches | test loss 0.3828534 +| epoch 8 | 1207/ 8400 batches | test loss 0.4193141 +| epoch 8 | 1211/ 8400 batches | test loss 0.4336737 +| epoch 8 | 1215/ 8400 batches | test loss 0.4004642 +| epoch 8 | 1219/ 8400 batches | test loss 0.3998407 +| epoch 8 | 1223/ 8400 batches | test loss 0.4732809 +| epoch 8 | 1227/ 8400 batches | test loss 0.4076236 +| epoch 8 | 1231/ 8400 batches | test loss 0.3661569 +| epoch 8 | 1235/ 8400 batches | test loss 0.8421389 +| epoch 8 | 1239/ 8400 batches | test loss 0.3980007 +| epoch 8 | 1243/ 8400 batches | test loss 0.4517976 +| epoch 8 | 1247/ 8400 batches | test loss 0.3847754 +| epoch 8 | 1251/ 8400 batches | test loss 0.4469067 +| epoch 8 | 1255/ 8400 batches | test loss 0.4211884 +| epoch 8 | 1259/ 8400 batches | test loss 0.3597457 +| epoch 8 | 1263/ 8400 batches | test loss 0.4227908 +| epoch 8 | 1267/ 8400 batches | test loss 0.3779810 +| epoch 8 | 1271/ 8400 batches | test loss 0.4557531 +| epoch 8 | 1275/ 8400 batches | test loss 0.3344917 +| epoch 8 | 1279/ 8400 batches | test loss 0.4655402 +| epoch 8 | 1283/ 8400 batches | test loss 0.5752292 +| epoch 8 | 1287/ 8400 batches | test loss 0.4246059 +| epoch 8 | 1291/ 8400 batches | test loss 0.4977749 +| epoch 8 | 1295/ 8400 batches | test loss 0.4399751 +| epoch 8 | 1299/ 8400 batches | test loss 0.3794408 +| epoch 8 | 1303/ 8400 batches | test loss 0.3746473 +| epoch 8 | 1307/ 8400 batches | test loss 0.5908718 +| epoch 8 | 1311/ 8400 batches | test loss 0.4337458 +| epoch 8 | 1315/ 8400 batches | test loss 0.4132212 +| epoch 8 | 1319/ 8400 batches | test loss 0.3896345 +| epoch 8 | 1323/ 8400 batches | test loss 0.3765246 +| epoch 8 | 1327/ 8400 batches | test loss 0.5094774 +| epoch 8 | 1331/ 8400 batches | test loss 0.4340523 +| epoch 8 | 1335/ 8400 batches | test loss 0.5186504 +| epoch 8 | 1339/ 8400 batches | test loss 0.3854433 +| epoch 8 | 1343/ 8400 batches | test loss 0.4196332 +| epoch 8 | 1347/ 8400 batches | test loss 0.4520984 +| epoch 8 | 1351/ 8400 batches | test loss 0.5091468 +| epoch 8 | 1355/ 8400 batches | test loss 0.4155827 +| epoch 8 | 1359/ 8400 batches | test loss 0.5216824 +| epoch 8 | 1363/ 8400 batches | test loss 0.4145328 +| epoch 8 | 1367/ 8400 batches | test loss 0.4687919 +| epoch 8 | 1371/ 8400 batches | test loss 0.3865009 +| epoch 8 | 1375/ 8400 batches | test loss 0.3639880 +| epoch 8 | 1379/ 8400 batches | test loss 0.4771005 +| epoch 8 | 1383/ 8400 batches | test loss 0.4348828 +| epoch 8 | 1387/ 8400 batches | test loss 0.4141249 +| epoch 8 | 1391/ 8400 batches | test loss 0.4613841 +| epoch 8 | 1395/ 8400 batches | test loss 0.4309276 +| epoch 8 | 1399/ 8400 batches | test loss 0.5447444 +| epoch 8 | 1403/ 8400 batches | test loss 0.5428437 +| epoch 8 | 1407/ 8400 batches | test loss 0.4507927 +| epoch 8 | 1411/ 8400 batches | test loss 0.4488254 +| epoch 8 | 1415/ 8400 batches | test loss 0.4379289 +| epoch 8 | 1419/ 8400 batches | test loss 0.4224233 +| epoch 8 | 1423/ 8400 batches | test loss 0.4495611 +| epoch 8 | 1427/ 8400 batches | test loss 0.4358701 +| epoch 8 | 1431/ 8400 batches | test loss 0.4481063 +| epoch 8 | 1435/ 8400 batches | test loss 0.3800476 +| epoch 8 | 1439/ 8400 batches | test loss 0.3721951 +| epoch 8 | 1443/ 8400 batches | test loss 0.3497659 +| epoch 8 | 1447/ 8400 batches | test loss 0.4150519 +| epoch 8 | 1451/ 8400 batches | test loss 0.2641951 +| epoch 8 | 1455/ 8400 batches | test loss 0.3591203 +| epoch 8 | 1459/ 8400 batches | test loss 0.4005134 +| epoch 8 | 1463/ 8400 batches | test loss 0.4243101 +| epoch 8 | 1467/ 8400 batches | test loss 0.4986630 +| epoch 8 | 1471/ 8400 batches | test loss 0.2534668 +| epoch 8 | 1475/ 8400 batches | test loss 0.4208493 +| epoch 8 | 1479/ 8400 batches | test loss 0.4138943 +| epoch 8 | 1483/ 8400 batches | test loss 0.4567595 +| epoch 8 | 1487/ 8400 batches | test loss 0.4358875 +| epoch 8 | 1491/ 8400 batches | test loss 0.3458762 +| epoch 8 | 1495/ 8400 batches | test loss 0.4354641 +| epoch 8 | 1499/ 8400 batches | test loss 0.4533281 +| epoch 8 | 1503/ 8400 batches | test loss 0.4766589 +| epoch 8 | 1507/ 8400 batches | test loss 0.3488315 +| epoch 8 | 1511/ 8400 batches | test loss 0.3781119 +| epoch 8 | 1515/ 8400 batches | test loss 0.4116080 +| epoch 8 | 1519/ 8400 batches | test loss 0.4093827 +| epoch 8 | 1523/ 8400 batches | test loss 0.4966710 +| epoch 8 | 1527/ 8400 batches | test loss 0.4258698 +| epoch 8 | 1531/ 8400 batches | test loss 0.4427958 +| epoch 8 | 1535/ 8400 batches | test loss 0.4486279 +| epoch 8 | 1539/ 8400 batches | test loss 0.5203303 +| epoch 8 | 1543/ 8400 batches | test loss 0.3939882 +| epoch 8 | 1547/ 8400 batches | test loss 0.4099975 +| epoch 8 | 1551/ 8400 batches | test loss 0.3601598 +| epoch 8 | 1555/ 8400 batches | test loss 0.6792018 +| epoch 8 | 1559/ 8400 batches | test loss 0.4275809 +| epoch 8 | 1563/ 8400 batches | test loss 0.4086428 +| epoch 8 | 1567/ 8400 batches | test loss 0.4558592 +| epoch 8 | 1571/ 8400 batches | test loss 0.4649946 +| epoch 8 | 1575/ 8400 batches | test loss 0.4305464 +| epoch 8 | 1579/ 8400 batches | test loss 0.3587679 +| epoch 8 | 1583/ 8400 batches | test loss 0.5031361 +| epoch 8 | 1587/ 8400 batches | test loss 0.4167108 +| epoch 8 | 1591/ 8400 batches | test loss 0.5211779 +| epoch 8 | 1595/ 8400 batches | test loss 0.3712009 +| epoch 8 | 1599/ 8400 batches | test loss 0.4155537 +| epoch 8 | 1603/ 8400 batches | test loss 0.5254197 +| epoch 8 | 1607/ 8400 batches | test loss 0.4342124 +| epoch 8 | 1611/ 8400 batches | test loss 0.4775702 +| epoch 8 | 1615/ 8400 batches | test loss 0.4164510 +| epoch 8 | 1619/ 8400 batches | test loss 0.4198954 +| epoch 8 | 1623/ 8400 batches | test loss 0.5431287 +| epoch 8 | 1627/ 8400 batches | test loss 0.4116137 +| epoch 8 | 1631/ 8400 batches | test loss 0.4462192 +| epoch 8 | 1635/ 8400 batches | test loss 0.4474564 +| epoch 8 | 1639/ 8400 batches | test loss 0.5854969 +| epoch 8 | 1643/ 8400 batches | test loss 0.5357975 +| epoch 8 | 1647/ 8400 batches | test loss 0.5385217 +| epoch 8 | 1651/ 8400 batches | test loss 0.3876627 +| epoch 8 | 1655/ 8400 batches | test loss 0.4204838 +| epoch 8 | 1659/ 8400 batches | test loss 0.4154197 +| epoch 8 | 1663/ 8400 batches | test loss 0.3322844 +| epoch 8 | 1667/ 8400 batches | test loss 0.4830368 +| epoch 8 | 1671/ 8400 batches | test loss 0.4305540 +| epoch 8 | 1675/ 8400 batches | test loss 0.4272721 +| epoch 8 | 1679/ 8400 batches | test loss 0.4696181 +| epoch 8 | 1683/ 8400 batches | test loss 0.6475683 +| epoch 8 | 1687/ 8400 batches | test loss 0.4454051 +| epoch 8 | 1691/ 8400 batches | test loss 0.4633262 +| epoch 8 | 1695/ 8400 batches | test loss 0.4883203 +| epoch 8 | 1699/ 8400 batches | test loss 0.5736065 +| epoch 8 | 1703/ 8400 batches | test loss 0.3762245 +| epoch 8 | 1707/ 8400 batches | test loss 0.5000536 +| epoch 8 | 1711/ 8400 batches | test loss 0.3439063 +| epoch 8 | 1715/ 8400 batches | test loss 0.5338070 +| epoch 8 | 1719/ 8400 batches | test loss 0.4348218 +| epoch 8 | 1723/ 8400 batches | test loss 0.3848620 +| epoch 8 | 1727/ 8400 batches | test loss 0.4464713 +| epoch 8 | 1731/ 8400 batches | test loss 0.3255981 +| epoch 8 | 1735/ 8400 batches | test loss 0.4182107 +| epoch 8 | 1739/ 8400 batches | test loss 0.3840397 +| epoch 8 | 1743/ 8400 batches | test loss 0.4256982 +| epoch 8 | 1747/ 8400 batches | test loss 0.3484787 +| epoch 8 | 1751/ 8400 batches | test loss 0.4720314 +| epoch 8 | 1755/ 8400 batches | test loss 0.3840827 +| epoch 8 | 1759/ 8400 batches | test loss 0.5487534 +| epoch 8 | 1763/ 8400 batches | test loss 0.3758333 +| epoch 8 | 1767/ 8400 batches | test loss 0.4387141 +| epoch 8 | 1771/ 8400 batches | test loss 0.4583328 +| epoch 8 | 1775/ 8400 batches | test loss 0.5554551 +| epoch 8 | 1779/ 8400 batches | test loss 0.6622490 +| epoch 8 | 1783/ 8400 batches | test loss 0.4541495 +| epoch 8 | 1787/ 8400 batches | test loss 0.3234720 +| epoch 8 | 1791/ 8400 batches | test loss 0.4503028 +| epoch 8 | 1795/ 8400 batches | test loss 0.4086466 +| epoch 8 | 1799/ 8400 batches | test loss 0.4227826 +| epoch 8 | 1803/ 8400 batches | test loss 0.4632581 +| epoch 8 | 1807/ 8400 batches | test loss 0.5726520 +| epoch 8 | 1811/ 8400 batches | test loss 0.4394967 +| epoch 8 | 1815/ 8400 batches | test loss 0.4955696 +| epoch 8 | 1819/ 8400 batches | test loss 0.3800969 +| epoch 8 | 1823/ 8400 batches | test loss 0.3604663 +| epoch 8 | 1827/ 8400 batches | test loss 0.3725503 +| epoch 8 | 1831/ 8400 batches | test loss 0.4821134 +| epoch 8 | 1835/ 8400 batches | test loss 0.3640950 +| epoch 8 | 1839/ 8400 batches | test loss 0.3718418 +| epoch 8 | 1843/ 8400 batches | test loss 0.4449013 +| epoch 8 | 1847/ 8400 batches | test loss 0.3944170 +| epoch 8 | 1851/ 8400 batches | test loss 0.4288928 +| epoch 8 | 1855/ 8400 batches | test loss 0.5186036 +| epoch 8 | 1859/ 8400 batches | test loss 0.3781905 +| epoch 8 | 1863/ 8400 batches | test loss 0.4687116 +| epoch 8 | 1867/ 8400 batches | test loss 0.4389827 +| epoch 8 | 1871/ 8400 batches | test loss 0.4766341 +| epoch 8 | 1875/ 8400 batches | test loss 0.4656489 +| epoch 8 | 1879/ 8400 batches | test loss 0.4386583 +| epoch 8 | 1883/ 8400 batches | test loss 0.3488505 +| epoch 8 | 1887/ 8400 batches | test loss 0.3830796 +| epoch 8 | 1891/ 8400 batches | test loss 0.4399538 +| epoch 8 | 1895/ 8400 batches | test loss 0.4245108 +| epoch 8 | 1899/ 8400 batches | test loss 0.4257711 +| epoch 8 | 1903/ 8400 batches | test loss 0.4615124 +| epoch 8 | 1907/ 8400 batches | test loss 0.4535535 +| epoch 8 | 1911/ 8400 batches | test loss 0.3272122 +| epoch 8 | 1915/ 8400 batches | test loss 0.4113192 +| epoch 8 | 1919/ 8400 batches | test loss 1.2265905 +| epoch 8 | 1923/ 8400 batches | test loss 0.4076610 +| epoch 8 | 1927/ 8400 batches | test loss 0.5063685 +| epoch 8 | 1931/ 8400 batches | test loss 0.5598153 +| epoch 8 | 1935/ 8400 batches | test loss 0.4191193 +| epoch 8 | 1939/ 8400 batches | test loss 0.4897079 +| epoch 8 | 1943/ 8400 batches | test loss 0.4056134 +| epoch 8 | 1947/ 8400 batches | test loss 0.3872002 +| epoch 8 | 1951/ 8400 batches | test loss 0.4934689 +| epoch 8 | 1955/ 8400 batches | test loss 0.3899615 +| epoch 8 | 1959/ 8400 batches | test loss 0.1722201 +| epoch 8 | 1963/ 8400 batches | test loss 0.3978740 +| epoch 8 | 1967/ 8400 batches | test loss 0.4239523 +| epoch 8 | 1971/ 8400 batches | test loss 0.4125653 +| epoch 8 | 1975/ 8400 batches | test loss 0.3973253 +| epoch 8 | 1979/ 8400 batches | test loss 0.4386929 +| epoch 8 | 1983/ 8400 batches | test loss 0.3801830 +| epoch 8 | 1987/ 8400 batches | test loss 0.4024526 +| epoch 8 | 1991/ 8400 batches | test loss 0.5548344 +| epoch 8 | 1995/ 8400 batches | test loss 0.3472860 +| epoch 8 | 1999/ 8400 batches | test loss 0.4353259 +| epoch 8 | 2003/ 8400 batches | test loss 0.4659426 +| epoch 8 | 2007/ 8400 batches | test loss 0.4527880 +| epoch 8 | 2011/ 8400 batches | test loss 0.3906182 +| epoch 8 | 2015/ 8400 batches | test loss 0.4235364 +| epoch 8 | 2019/ 8400 batches | test loss 0.4069251 +| epoch 8 | 2023/ 8400 batches | test loss 0.3655033 +| epoch 8 | 2027/ 8400 batches | test loss 0.5656196 +| epoch 8 | 2031/ 8400 batches | test loss 0.4052728 +| epoch 8 | 2035/ 8400 batches | test loss 0.3374322 +| epoch 8 | 2039/ 8400 batches | test loss 0.3932049 +| epoch 8 | 2043/ 8400 batches | test loss 0.5015329 +| epoch 8 | 2047/ 8400 batches | test loss 0.4076152 +| epoch 8 | 2051/ 8400 batches | test loss 0.4371026 +| epoch 8 | 2055/ 8400 batches | test loss 0.3726535 +| epoch 8 | 2059/ 8400 batches | test loss 0.3614364 +| epoch 8 | 2063/ 8400 batches | test loss 0.4223429 +| epoch 8 | 2067/ 8400 batches | test loss 0.4128423 +| epoch 8 | 2071/ 8400 batches | test loss 0.4236103 +| epoch 8 | 2075/ 8400 batches | test loss 0.4104620 +| epoch 8 | 2079/ 8400 batches | test loss 0.3719893 +| epoch 8 | 2083/ 8400 batches | test loss 0.4985890 +| epoch 8 | 2087/ 8400 batches | test loss 0.5557562 +| epoch 8 | 2091/ 8400 batches | test loss 0.4172013 +| epoch 8 | 2095/ 8400 batches | test loss 0.4373169 +| epoch 8 | 2099/ 8400 batches | test loss 0.5342969 +| epoch 8 | final test loss 0.4402, do not save model! +-------------------------------------------------------------------------------- +| epoch 9 | 3/ 8400 batches | train loss 0.3891227 +| epoch 9 | 7/ 8400 batches | train loss 0.3604616 +| epoch 9 | 11/ 8400 batches | train loss 0.4193518 +| epoch 9 | 15/ 8400 batches | train loss 0.3564247 +| epoch 9 | 19/ 8400 batches | train loss 0.2843677 +| epoch 9 | 23/ 8400 batches | train loss 0.3027146 +| epoch 9 | 27/ 8400 batches | train loss 0.3750549 +| epoch 9 | 31/ 8400 batches | train loss 0.3493621 +| epoch 9 | 35/ 8400 batches | train loss 0.2993690 +| epoch 9 | 39/ 8400 batches | train loss 0.3412641 +| epoch 9 | 43/ 8400 batches | train loss 0.4212668 +| epoch 9 | 47/ 8400 batches | train loss 0.2946413 +| epoch 9 | 51/ 8400 batches | train loss 0.3324541 +| epoch 9 | 55/ 8400 batches | train loss 0.3198912 +| epoch 9 | 59/ 8400 batches | train loss 0.3218109 +| epoch 9 | 63/ 8400 batches | train loss 0.3505643 +| epoch 9 | 67/ 8400 batches | train loss 0.3164127 +| epoch 9 | 71/ 8400 batches | train loss 0.3109818 +| epoch 9 | 75/ 8400 batches | train loss 0.3327888 +| epoch 9 | 79/ 8400 batches | train loss 0.3206404 +| epoch 9 | 83/ 8400 batches | train loss 0.2889797 +| epoch 9 | 87/ 8400 batches | train loss 0.3222582 +| epoch 9 | 91/ 8400 batches | train loss 0.3657489 +| epoch 9 | 95/ 8400 batches | train loss 0.3514163 +| epoch 9 | 99/ 8400 batches | train loss 0.3255285 +| epoch 9 | 103/ 8400 batches | train loss 0.3792378 +| epoch 9 | 107/ 8400 batches | train loss 0.3253760 +| epoch 9 | 111/ 8400 batches | train loss 0.3503132 +| epoch 9 | 115/ 8400 batches | train loss 0.3610230 +| epoch 9 | 119/ 8400 batches | train loss 0.3770210 +| epoch 9 | 123/ 8400 batches | train loss 0.3510175 +| epoch 9 | 127/ 8400 batches | train loss 0.3061504 +| epoch 9 | 131/ 8400 batches | train loss 0.3594845 +| epoch 9 | 135/ 8400 batches | train loss 0.3360264 +| epoch 9 | 139/ 8400 batches | train loss 0.4325816 +| epoch 9 | 143/ 8400 batches | train loss 0.2716402 +| epoch 9 | 147/ 8400 batches | train loss 0.2987545 +| epoch 9 | 151/ 8400 batches | train loss 0.3247724 +| epoch 9 | 155/ 8400 batches | train loss 0.2792164 +| epoch 9 | 159/ 8400 batches | train loss 0.3370085 +| epoch 9 | 163/ 8400 batches | train loss 0.4112158 +| epoch 9 | 167/ 8400 batches | train loss 0.2980915 +| epoch 9 | 171/ 8400 batches | train loss 0.3302222 +| epoch 9 | 175/ 8400 batches | train loss 0.3003723 +| epoch 9 | 179/ 8400 batches | train loss 0.3602443 +| epoch 9 | 183/ 8400 batches | train loss 0.3704179 +| epoch 9 | 187/ 8400 batches | train loss 0.3138206 +| epoch 9 | 191/ 8400 batches | train loss 0.3415487 +| epoch 9 | 195/ 8400 batches | train loss 0.2881104 +| epoch 9 | 199/ 8400 batches | train loss 0.2783855 +| epoch 9 | 203/ 8400 batches | train loss 0.2733206 +| epoch 9 | 207/ 8400 batches | train loss 0.2795159 +| epoch 9 | 211/ 8400 batches | train loss 0.3157852 +| epoch 9 | 215/ 8400 batches | train loss 0.3020912 +| epoch 9 | 219/ 8400 batches | train loss 0.3875797 +| epoch 9 | 223/ 8400 batches | train loss 0.3172898 +| epoch 9 | 227/ 8400 batches | train loss 0.3813208 +| epoch 9 | 231/ 8400 batches | train loss 0.2983876 +| epoch 9 | 235/ 8400 batches | train loss 0.2809234 +| epoch 9 | 239/ 8400 batches | train loss 0.3527850 +| epoch 9 | 243/ 8400 batches | train loss 0.3042408 +| epoch 9 | 247/ 8400 batches | train loss 0.3383221 +| epoch 9 | 251/ 8400 batches | train loss 0.2875478 +| epoch 9 | 255/ 8400 batches | train loss 0.3243919 +| epoch 9 | 259/ 8400 batches | train loss 0.3299237 +| epoch 9 | 263/ 8400 batches | train loss 0.2829348 +| epoch 9 | 267/ 8400 batches | train loss 0.3727076 +| epoch 9 | 271/ 8400 batches | train loss 0.3818564 +| epoch 9 | 275/ 8400 batches | train loss 0.4060984 +| epoch 9 | 279/ 8400 batches | train loss 0.2825770 +| epoch 9 | 283/ 8400 batches | train loss 0.2791924 +| epoch 9 | 287/ 8400 batches | train loss 0.3372153 +| epoch 9 | 291/ 8400 batches | train loss 0.2859380 +| epoch 9 | 295/ 8400 batches | train loss 0.3562214 +| epoch 9 | 299/ 8400 batches | train loss 0.2816542 +| epoch 9 | 303/ 8400 batches | train loss 0.3411025 +| epoch 9 | 307/ 8400 batches | train loss 0.1968178 +| epoch 9 | 311/ 8400 batches | train loss 0.2891508 +| epoch 9 | 315/ 8400 batches | train loss 0.3878812 +| epoch 9 | 319/ 8400 batches | train loss 0.3193595 +| epoch 9 | 323/ 8400 batches | train loss 0.2819082 +| epoch 9 | 327/ 8400 batches | train loss 0.3567870 +| epoch 9 | 331/ 8400 batches | train loss 0.3316957 +| epoch 9 | 335/ 8400 batches | train loss 0.3838485 +| epoch 9 | 339/ 8400 batches | train loss 0.3616316 +| epoch 9 | 343/ 8400 batches | train loss 0.3358515 +| epoch 9 | 347/ 8400 batches | train loss 0.3537526 +| epoch 9 | 351/ 8400 batches | train loss 0.3800583 +| epoch 9 | 355/ 8400 batches | train loss 0.3535834 +| epoch 9 | 359/ 8400 batches | train loss 0.3830454 +| epoch 9 | 363/ 8400 batches | train loss 0.2844191 +| epoch 9 | 367/ 8400 batches | train loss 0.2778695 +| epoch 9 | 371/ 8400 batches | train loss 0.3412789 +| epoch 9 | 375/ 8400 batches | train loss 0.3168784 +| epoch 9 | 379/ 8400 batches | train loss 0.3520472 +| epoch 9 | 383/ 8400 batches | train loss 0.2715110 +| epoch 9 | 387/ 8400 batches | train loss 0.3064476 +| epoch 9 | 391/ 8400 batches | train loss 0.2796707 +| epoch 9 | 395/ 8400 batches | train loss 0.3026525 +| epoch 9 | 399/ 8400 batches | train loss 0.2814713 +| epoch 9 | 403/ 8400 batches | train loss 0.3410206 +| epoch 9 | 407/ 8400 batches | train loss 0.3471992 +| epoch 9 | 411/ 8400 batches | train loss 0.2850611 +| epoch 9 | 415/ 8400 batches | train loss 0.2975471 +| epoch 9 | 419/ 8400 batches | train loss 0.3423589 +| epoch 9 | 423/ 8400 batches | train loss 0.3181167 +| epoch 9 | 427/ 8400 batches | train loss 0.3660251 +| epoch 9 | 431/ 8400 batches | train loss 0.3054117 +| epoch 9 | 435/ 8400 batches | train loss 0.3916445 +| epoch 9 | 439/ 8400 batches | train loss 0.2765923 +| epoch 9 | 443/ 8400 batches | train loss 0.3141066 +| epoch 9 | 447/ 8400 batches | train loss 0.3178583 +| epoch 9 | 451/ 8400 batches | train loss 0.3296283 +| epoch 9 | 455/ 8400 batches | train loss 0.4241193 +| epoch 9 | 459/ 8400 batches | train loss 0.4072513 +| epoch 9 | 463/ 8400 batches | train loss 0.3072073 +| epoch 9 | 467/ 8400 batches | train loss 0.3175029 +| epoch 9 | 471/ 8400 batches | train loss 0.3508720 +| epoch 9 | 475/ 8400 batches | train loss 0.3968354 +| epoch 9 | 479/ 8400 batches | train loss 0.3420061 +| epoch 9 | 483/ 8400 batches | train loss 0.3018885 +| epoch 9 | 487/ 8400 batches | train loss 0.3870189 +| epoch 9 | 491/ 8400 batches | train loss 0.2936075 +| epoch 9 | 495/ 8400 batches | train loss 0.3238106 +| epoch 9 | 499/ 8400 batches | train loss 0.3816218 +| epoch 9 | 503/ 8400 batches | train loss 0.3065969 +| epoch 9 | 507/ 8400 batches | train loss 0.2987024 +| epoch 9 | 511/ 8400 batches | train loss 0.2772302 +| epoch 9 | 515/ 8400 batches | train loss 0.3452067 +| epoch 9 | 519/ 8400 batches | train loss 0.3117769 +| epoch 9 | 523/ 8400 batches | train loss 0.3703946 +| epoch 9 | 527/ 8400 batches | train loss 0.3260071 +| epoch 9 | 531/ 8400 batches | train loss 0.3558222 +| epoch 9 | 535/ 8400 batches | train loss 0.2988101 +| epoch 9 | 539/ 8400 batches | train loss 0.3528542 +| epoch 9 | 543/ 8400 batches | train loss 0.3887892 +| epoch 9 | 547/ 8400 batches | train loss 0.3289363 +| epoch 9 | 551/ 8400 batches | train loss 0.4225955 +| epoch 9 | 555/ 8400 batches | train loss 0.3234367 +| epoch 9 | 559/ 8400 batches | train loss 0.3625863 +| epoch 9 | 563/ 8400 batches | train loss 0.3853701 +| epoch 9 | 567/ 8400 batches | train loss 0.3447727 +| epoch 9 | 571/ 8400 batches | train loss 0.3696905 +| epoch 9 | 575/ 8400 batches | train loss 0.4576520 +| epoch 9 | 579/ 8400 batches | train loss 0.2929052 +| epoch 9 | 583/ 8400 batches | train loss 0.3366249 +| epoch 9 | 587/ 8400 batches | train loss 0.2937571 +| epoch 9 | 591/ 8400 batches | train loss 0.3263421 +| epoch 9 | 595/ 8400 batches | train loss 0.4768619 +| epoch 9 | 599/ 8400 batches | train loss 0.3268654 +| epoch 9 | 603/ 8400 batches | train loss 0.3588410 +| epoch 9 | 607/ 8400 batches | train loss 0.2884464 +| epoch 9 | 611/ 8400 batches | train loss 0.3683672 +| epoch 9 | 615/ 8400 batches | train loss 0.2885857 +| epoch 9 | 619/ 8400 batches | train loss 0.3624843 +| epoch 9 | 623/ 8400 batches | train loss 0.3537022 +| epoch 9 | 627/ 8400 batches | train loss 0.3144482 +| epoch 9 | 631/ 8400 batches | train loss 0.3690046 +| epoch 9 | 635/ 8400 batches | train loss 0.3507467 +| epoch 9 | 639/ 8400 batches | train loss 0.3148597 +| epoch 9 | 643/ 8400 batches | train loss 0.3673353 +| epoch 9 | 647/ 8400 batches | train loss 0.3328734 +| epoch 9 | 651/ 8400 batches | train loss 0.3299919 +| epoch 9 | 655/ 8400 batches | train loss 0.3165861 +| epoch 9 | 659/ 8400 batches | train loss 0.2905071 +| epoch 9 | 663/ 8400 batches | train loss 0.2805151 +| epoch 9 | 667/ 8400 batches | train loss 0.2941307 +| epoch 9 | 671/ 8400 batches | train loss 0.2189599 +| epoch 9 | 675/ 8400 batches | train loss 0.3381450 +| epoch 9 | 679/ 8400 batches | train loss 0.3368792 +| epoch 9 | 683/ 8400 batches | train loss 0.3015514 +| epoch 9 | 687/ 8400 batches | train loss 0.3162046 +| epoch 9 | 691/ 8400 batches | train loss 0.3326750 +| epoch 9 | 695/ 8400 batches | train loss 0.2200280 +| epoch 9 | 699/ 8400 batches | train loss 0.3267500 +| epoch 9 | 703/ 8400 batches | train loss 0.3124099 +| epoch 9 | 707/ 8400 batches | train loss 0.3668883 +| epoch 9 | 711/ 8400 batches | train loss 0.3263027 +| epoch 9 | 715/ 8400 batches | train loss 0.3021046 +| epoch 9 | 719/ 8400 batches | train loss 0.3556130 +| epoch 9 | 723/ 8400 batches | train loss 0.3606938 +| epoch 9 | 727/ 8400 batches | train loss 0.3467929 +| epoch 9 | 731/ 8400 batches | train loss 0.3842532 +| epoch 9 | 735/ 8400 batches | train loss 0.3121560 +| epoch 9 | 739/ 8400 batches | train loss 0.3599716 +| epoch 9 | 743/ 8400 batches | train loss 0.3417462 +| epoch 9 | 747/ 8400 batches | train loss 0.3371747 +| epoch 9 | 751/ 8400 batches | train loss 0.2527875 +| epoch 9 | 755/ 8400 batches | train loss 0.2626505 +| epoch 9 | 759/ 8400 batches | train loss 0.3122641 +| epoch 9 | 763/ 8400 batches | train loss 0.3639174 +| epoch 9 | 767/ 8400 batches | train loss 0.2887625 +| epoch 9 | 771/ 8400 batches | train loss 0.2996890 +| epoch 9 | 775/ 8400 batches | train loss 0.2825532 +| epoch 9 | 779/ 8400 batches | train loss 0.3484700 +| epoch 9 | 783/ 8400 batches | train loss 0.2945408 +| epoch 9 | 787/ 8400 batches | train loss 0.2862639 +| epoch 9 | 791/ 8400 batches | train loss 0.3952504 +| epoch 9 | 795/ 8400 batches | train loss 0.3425164 +| epoch 9 | 799/ 8400 batches | train loss 0.3123236 +| epoch 9 | 803/ 8400 batches | train loss 0.3981861 +| epoch 9 | 807/ 8400 batches | train loss 0.3159548 +| epoch 9 | 811/ 8400 batches | train loss 0.3756774 +| epoch 9 | 815/ 8400 batches | train loss 0.2712826 +| epoch 9 | 819/ 8400 batches | train loss 0.3571506 +| epoch 9 | 823/ 8400 batches | train loss 0.3334357 +| epoch 9 | 827/ 8400 batches | train loss 0.3955843 +| epoch 9 | 831/ 8400 batches | train loss 0.3489711 +| epoch 9 | 835/ 8400 batches | train loss 0.3816114 +| epoch 9 | 839/ 8400 batches | train loss 0.3613955 +| epoch 9 | 843/ 8400 batches | train loss 0.3023158 +| epoch 9 | 847/ 8400 batches | train loss 0.3324617 +| epoch 9 | 851/ 8400 batches | train loss 0.4331508 +| epoch 9 | 855/ 8400 batches | train loss 0.2939049 +| epoch 9 | 859/ 8400 batches | train loss 0.3198268 +| epoch 9 | 863/ 8400 batches | train loss 0.3050347 +| epoch 9 | 867/ 8400 batches | train loss 0.3347763 +| epoch 9 | 871/ 8400 batches | train loss 0.3935572 +| epoch 9 | 875/ 8400 batches | train loss 0.3014764 +| epoch 9 | 879/ 8400 batches | train loss 0.3233083 +| epoch 9 | 883/ 8400 batches | train loss 0.3535203 +| epoch 9 | 887/ 8400 batches | train loss 0.2771283 +| epoch 9 | 891/ 8400 batches | train loss 0.3169533 +| epoch 9 | 895/ 8400 batches | train loss 0.3652522 +| epoch 9 | 899/ 8400 batches | train loss 0.2948608 +| epoch 9 | 903/ 8400 batches | train loss 0.2676224 +| epoch 9 | 907/ 8400 batches | train loss 0.3126303 +| epoch 9 | 911/ 8400 batches | train loss 0.3138183 +| epoch 9 | 915/ 8400 batches | train loss 0.3120079 +| epoch 9 | 919/ 8400 batches | train loss 0.3530070 +| epoch 9 | 923/ 8400 batches | train loss 0.3022577 +| epoch 9 | 927/ 8400 batches | train loss 0.3050676 +| epoch 9 | 931/ 8400 batches | train loss 0.3279357 +| epoch 9 | 935/ 8400 batches | train loss 0.3945744 +| epoch 9 | 939/ 8400 batches | train loss 0.4059257 +| epoch 9 | 943/ 8400 batches | train loss 0.2863488 +| epoch 9 | 947/ 8400 batches | train loss 0.3048775 +| epoch 9 | 951/ 8400 batches | train loss 0.3098588 +| epoch 9 | 955/ 8400 batches | train loss 0.3260103 +| epoch 9 | 959/ 8400 batches | train loss 0.2638057 +| epoch 9 | 963/ 8400 batches | train loss 0.3117576 +| epoch 9 | 967/ 8400 batches | train loss 0.3521346 +| epoch 9 | 971/ 8400 batches | train loss 0.3027789 +| epoch 9 | 975/ 8400 batches | train loss 0.3326919 +| epoch 9 | 979/ 8400 batches | train loss 0.4681338 +| epoch 9 | 983/ 8400 batches | train loss 0.3739684 +| epoch 9 | 987/ 8400 batches | train loss 0.4053376 +| epoch 9 | 991/ 8400 batches | train loss 0.3148316 +| epoch 9 | 995/ 8400 batches | train loss 0.3076971 +| epoch 9 | 999/ 8400 batches | train loss 0.3465243 +| epoch 9 | 1003/ 8400 batches | train loss 0.3316745 +| epoch 9 | 1007/ 8400 batches | train loss 0.3568498 +| epoch 9 | 1011/ 8400 batches | train loss 0.1199702 +| epoch 9 | 1015/ 8400 batches | train loss 0.3269543 +| epoch 9 | 1019/ 8400 batches | train loss 0.3471888 +| epoch 9 | 1023/ 8400 batches | train loss 0.3250960 +| epoch 9 | 1027/ 8400 batches | train loss 0.3484820 +| epoch 9 | 1031/ 8400 batches | train loss 0.3013294 +| epoch 9 | 1035/ 8400 batches | train loss 0.3433053 +| epoch 9 | 1039/ 8400 batches | train loss 0.3211720 +| epoch 9 | 1043/ 8400 batches | train loss 0.3394728 +| epoch 9 | 1047/ 8400 batches | train loss 0.3175395 +| epoch 9 | 1051/ 8400 batches | train loss 0.1266723 +| epoch 9 | 1055/ 8400 batches | train loss 0.3169356 +| epoch 9 | 1059/ 8400 batches | train loss 0.3480443 +| epoch 9 | 1063/ 8400 batches | train loss 0.3090250 +| epoch 9 | 1067/ 8400 batches | train loss 0.3833871 +| epoch 9 | 1071/ 8400 batches | train loss 0.3306636 +| epoch 9 | 1075/ 8400 batches | train loss 0.2776629 +| epoch 9 | 1079/ 8400 batches | train loss 0.3036763 +| epoch 9 | 1083/ 8400 batches | train loss 0.2939335 +| epoch 9 | 1087/ 8400 batches | train loss 0.3680056 +| epoch 9 | 1091/ 8400 batches | train loss 0.3304082 +| epoch 9 | 1095/ 8400 batches | train loss 0.2991217 +| epoch 9 | 1099/ 8400 batches | train loss 0.2843406 +| epoch 9 | 1103/ 8400 batches | train loss 0.3593500 +| epoch 9 | 1107/ 8400 batches | train loss 0.3596227 +| epoch 9 | 1111/ 8400 batches | train loss 0.2741516 +| epoch 9 | 1115/ 8400 batches | train loss 0.3586181 +| epoch 9 | 1119/ 8400 batches | train loss 0.3176455 +| epoch 9 | 1123/ 8400 batches | train loss 0.3343148 +| epoch 9 | 1127/ 8400 batches | train loss 0.3191987 +| epoch 9 | 1131/ 8400 batches | train loss 0.3161294 +| epoch 9 | 1135/ 8400 batches | train loss 0.4346687 +| epoch 9 | 1139/ 8400 batches | train loss 0.3064249 +| epoch 9 | 1143/ 8400 batches | train loss 0.3755899 +| epoch 9 | 1147/ 8400 batches | train loss 0.3945296 +| epoch 9 | 1151/ 8400 batches | train loss 0.3093337 +| epoch 9 | 1155/ 8400 batches | train loss 0.3447613 +| epoch 9 | 1159/ 8400 batches | train loss 0.2984276 +| epoch 9 | 1163/ 8400 batches | train loss 0.3663200 +| epoch 9 | 1167/ 8400 batches | train loss 0.3544597 +| epoch 9 | 1171/ 8400 batches | train loss 0.3567012 +| epoch 9 | 1175/ 8400 batches | train loss 0.3437109 +| epoch 9 | 1179/ 8400 batches | train loss 0.3242201 +| epoch 9 | 1183/ 8400 batches | train loss 0.3156039 +| epoch 9 | 1187/ 8400 batches | train loss 0.3833172 +| epoch 9 | 1191/ 8400 batches | train loss 0.3285188 +| epoch 9 | 1195/ 8400 batches | train loss 0.3538903 +| epoch 9 | 1199/ 8400 batches | train loss 0.2948460 +| epoch 9 | 1203/ 8400 batches | train loss 0.3217150 +| epoch 9 | 1207/ 8400 batches | train loss 0.3474681 +| epoch 9 | 1211/ 8400 batches | train loss 0.2845167 +| epoch 9 | 1215/ 8400 batches | train loss 0.2505159 +| epoch 9 | 1219/ 8400 batches | train loss 0.2896014 +| epoch 9 | 1223/ 8400 batches | train loss 0.2810951 +| epoch 9 | 1227/ 8400 batches | train loss 0.3409593 +| epoch 9 | 1231/ 8400 batches | train loss 0.3742396 +| epoch 9 | 1235/ 8400 batches | train loss 0.1340604 +| epoch 9 | 1239/ 8400 batches | train loss 0.2963463 +| epoch 9 | 1243/ 8400 batches | train loss 0.3202722 +| epoch 9 | 1247/ 8400 batches | train loss 0.2824638 +| epoch 9 | 1251/ 8400 batches | train loss 0.3500174 +| epoch 9 | 1255/ 8400 batches | train loss 0.2621404 +| epoch 9 | 1259/ 8400 batches | train loss 0.3592171 +| epoch 9 | 1263/ 8400 batches | train loss 0.4369211 +| epoch 9 | 1267/ 8400 batches | train loss 0.3616624 +| epoch 9 | 1271/ 8400 batches | train loss 0.3028570 +| epoch 9 | 1275/ 8400 batches | train loss 0.3301635 +| epoch 9 | 1279/ 8400 batches | train loss 0.3260863 +| epoch 9 | 1283/ 8400 batches | train loss 0.3408600 +| epoch 9 | 1287/ 8400 batches | train loss 0.3031310 +| epoch 9 | 1291/ 8400 batches | train loss 0.3594450 +| epoch 9 | 1295/ 8400 batches | train loss 0.3433301 +| epoch 9 | 1299/ 8400 batches | train loss 0.3557246 +| epoch 9 | 1303/ 8400 batches | train loss 0.3324745 +| epoch 9 | 1307/ 8400 batches | train loss 0.3504119 +| epoch 9 | 1311/ 8400 batches | train loss 0.3205158 +| epoch 9 | 1315/ 8400 batches | train loss 0.3566366 +| epoch 9 | 1319/ 8400 batches | train loss 0.3608688 +| epoch 9 | 1323/ 8400 batches | train loss 0.3748493 +| epoch 9 | 1327/ 8400 batches | train loss 0.3344055 +| epoch 9 | 1331/ 8400 batches | train loss 0.3484202 +| epoch 9 | 1335/ 8400 batches | train loss 0.3620950 +| epoch 9 | 1339/ 8400 batches | train loss 0.2356447 +| epoch 9 | 1343/ 8400 batches | train loss 0.2965261 +| epoch 9 | 1347/ 8400 batches | train loss 0.3492998 +| epoch 9 | 1351/ 8400 batches | train loss 0.3941133 +| epoch 9 | 1355/ 8400 batches | train loss 0.3461418 +| epoch 9 | 1359/ 8400 batches | train loss 0.3343400 +| epoch 9 | 1363/ 8400 batches | train loss 0.3196476 +| epoch 9 | 1367/ 8400 batches | train loss 0.2723615 +| epoch 9 | 1371/ 8400 batches | train loss 0.3042757 +| epoch 9 | 1375/ 8400 batches | train loss 0.3308841 +| epoch 9 | 1379/ 8400 batches | train loss 0.2947922 +| epoch 9 | 1383/ 8400 batches | train loss 0.3382549 +| epoch 9 | 1387/ 8400 batches | train loss 0.2993305 +| epoch 9 | 1391/ 8400 batches | train loss 0.3629785 +| epoch 9 | 1395/ 8400 batches | train loss 0.3580443 +| epoch 9 | 1399/ 8400 batches | train loss 0.3333132 +| epoch 9 | 1403/ 8400 batches | train loss 0.3223863 +| epoch 9 | 1407/ 8400 batches | train loss 0.3359551 +| epoch 9 | 1411/ 8400 batches | train loss 0.2727419 +| epoch 9 | 1415/ 8400 batches | train loss 0.3130353 +| epoch 9 | 1419/ 8400 batches | train loss 0.3237780 +| epoch 9 | 1423/ 8400 batches | train loss 0.3455191 +| epoch 9 | 1427/ 8400 batches | train loss 0.2818369 +| epoch 9 | 1431/ 8400 batches | train loss 0.3624985 +| epoch 9 | 1435/ 8400 batches | train loss 0.2132771 +| epoch 9 | 1439/ 8400 batches | train loss 0.3174820 +| epoch 9 | 1443/ 8400 batches | train loss 0.3423337 +| epoch 9 | 1447/ 8400 batches | train loss 0.3435252 +| epoch 9 | 1451/ 8400 batches | train loss 0.2865213 +| epoch 9 | 1455/ 8400 batches | train loss 0.3107579 +| epoch 9 | 1459/ 8400 batches | train loss 0.3031328 +| epoch 9 | 1463/ 8400 batches | train loss 0.2166625 +| epoch 9 | 1467/ 8400 batches | train loss 0.2794548 +| epoch 9 | 1471/ 8400 batches | train loss 0.3049815 +| epoch 9 | 1475/ 8400 batches | train loss 0.3163667 +| epoch 9 | 1479/ 8400 batches | train loss 0.3350609 +| epoch 9 | 1483/ 8400 batches | train loss 0.3335829 +| epoch 9 | 1487/ 8400 batches | train loss 0.3698435 +| epoch 9 | 1491/ 8400 batches | train loss 0.3615756 +| epoch 9 | 1495/ 8400 batches | train loss 0.3243802 +| epoch 9 | 1499/ 8400 batches | train loss 0.3094309 +| epoch 9 | 1503/ 8400 batches | train loss 0.3282309 +| epoch 9 | 1507/ 8400 batches | train loss 0.3454334 +| epoch 9 | 1511/ 8400 batches | train loss 0.3435991 +| epoch 9 | 1515/ 8400 batches | train loss 0.2934393 +| epoch 9 | 1519/ 8400 batches | train loss 0.2932581 +| epoch 9 | 1523/ 8400 batches | train loss 0.2800528 +| epoch 9 | 1527/ 8400 batches | train loss 0.3481184 +| epoch 9 | 1531/ 8400 batches | train loss 0.3303853 +| epoch 9 | 1535/ 8400 batches | train loss 0.3968403 +| epoch 9 | 1539/ 8400 batches | train loss 0.3347459 +| epoch 9 | 1543/ 8400 batches | train loss 0.2922696 +| epoch 9 | 1547/ 8400 batches | train loss 0.3304947 +| epoch 9 | 1551/ 8400 batches | train loss 0.1455268 +| epoch 9 | 1555/ 8400 batches | train loss 0.3269173 +| epoch 9 | 1559/ 8400 batches | train loss 0.3048774 +| epoch 9 | 1563/ 8400 batches | train loss 0.3714536 +| epoch 9 | 1567/ 8400 batches | train loss 0.3506445 +| epoch 9 | 1571/ 8400 batches | train loss 0.3233762 +| epoch 9 | 1575/ 8400 batches | train loss 0.3144832 +| epoch 9 | 1579/ 8400 batches | train loss 0.3246236 +| epoch 9 | 1583/ 8400 batches | train loss 0.3583356 +| epoch 9 | 1587/ 8400 batches | train loss 0.3531825 +| epoch 9 | 1591/ 8400 batches | train loss 0.2286016 +| epoch 9 | 1595/ 8400 batches | train loss 0.3169723 +| epoch 9 | 1599/ 8400 batches | train loss 0.2899873 +| epoch 9 | 1603/ 8400 batches | train loss 0.3319738 +| epoch 9 | 1607/ 8400 batches | train loss 0.3645287 +| epoch 9 | 1611/ 8400 batches | train loss 0.3731354 +| epoch 9 | 1615/ 8400 batches | train loss 0.3185689 +| epoch 9 | 1619/ 8400 batches | train loss 0.3764052 +| epoch 9 | 1623/ 8400 batches | train loss 0.3558195 +| epoch 9 | 1627/ 8400 batches | train loss 0.3656443 +| epoch 9 | 1631/ 8400 batches | train loss 0.3228736 +| epoch 9 | 1635/ 8400 batches | train loss 0.3991274 +| epoch 9 | 1639/ 8400 batches | train loss 0.3283868 +| epoch 9 | 1643/ 8400 batches | train loss 0.3001538 +| epoch 9 | 1647/ 8400 batches | train loss 0.3372252 +| epoch 9 | 1651/ 8400 batches | train loss 0.3300599 +| epoch 9 | 1655/ 8400 batches | train loss 0.3471833 +| epoch 9 | 1659/ 8400 batches | train loss 0.3232485 +| epoch 9 | 1663/ 8400 batches | train loss 0.3638079 +| epoch 9 | 1667/ 8400 batches | train loss 0.3401564 +| epoch 9 | 1671/ 8400 batches | train loss 0.2773499 +| epoch 9 | 1675/ 8400 batches | train loss 0.2918663 +| epoch 9 | 1679/ 8400 batches | train loss 0.4042997 +| epoch 9 | 1683/ 8400 batches | train loss 0.3856645 +| epoch 9 | 1687/ 8400 batches | train loss 0.3697532 +| epoch 9 | 1691/ 8400 batches | train loss 0.3400727 +| epoch 9 | 1695/ 8400 batches | train loss 0.3140209 +| epoch 9 | 1699/ 8400 batches | train loss 0.3107408 +| epoch 9 | 1703/ 8400 batches | train loss 0.3091429 +| epoch 9 | 1707/ 8400 batches | train loss 0.3313570 +| epoch 9 | 1711/ 8400 batches | train loss 0.4103119 +| epoch 9 | 1715/ 8400 batches | train loss 0.2805150 +| epoch 9 | 1719/ 8400 batches | train loss 0.3249970 +| epoch 9 | 1723/ 8400 batches | train loss 0.3108267 +| epoch 9 | 1727/ 8400 batches | train loss 0.2604196 +| epoch 9 | 1731/ 8400 batches | train loss 0.2988296 +| epoch 9 | 1735/ 8400 batches | train loss 0.3871332 +| epoch 9 | 1739/ 8400 batches | train loss 0.3332237 +| epoch 9 | 1743/ 8400 batches | train loss 0.3401386 +| epoch 9 | 1747/ 8400 batches | train loss 0.3477237 +| epoch 9 | 1751/ 8400 batches | train loss 0.2987940 +| epoch 9 | 1755/ 8400 batches | train loss 0.2860283 +| epoch 9 | 1759/ 8400 batches | train loss 0.3132690 +| epoch 9 | 1763/ 8400 batches | train loss 0.3318147 +| epoch 9 | 1767/ 8400 batches | train loss 0.3233039 +| epoch 9 | 1771/ 8400 batches | train loss 0.2975537 +| epoch 9 | 1775/ 8400 batches | train loss 0.3236092 +| epoch 9 | 1779/ 8400 batches | train loss 0.3902125 +| epoch 9 | 1783/ 8400 batches | train loss 0.3608323 +| epoch 9 | 1787/ 8400 batches | train loss 0.3867704 +| epoch 9 | 1791/ 8400 batches | train loss 0.3549016 +| epoch 9 | 1795/ 8400 batches | train loss 0.3648921 +| epoch 9 | 1799/ 8400 batches | train loss 0.3475332 +| epoch 9 | 1803/ 8400 batches | train loss 0.3347809 +| epoch 9 | 1807/ 8400 batches | train loss 0.3556927 +| epoch 9 | 1811/ 8400 batches | train loss 0.3149598 +| epoch 9 | 1815/ 8400 batches | train loss 0.3092882 +| epoch 9 | 1819/ 8400 batches | train loss 0.2909423 +| epoch 9 | 1823/ 8400 batches | train loss 0.3214989 +| epoch 9 | 1827/ 8400 batches | train loss 0.3088195 +| epoch 9 | 1831/ 8400 batches | train loss 0.3626479 +| epoch 9 | 1835/ 8400 batches | train loss 0.3388521 +| epoch 9 | 1839/ 8400 batches | train loss 0.3055642 +| epoch 9 | 1843/ 8400 batches | train loss 0.2768323 +| epoch 9 | 1847/ 8400 batches | train loss 0.3610074 +| epoch 9 | 1851/ 8400 batches | train loss 0.3189515 +| epoch 9 | 1855/ 8400 batches | train loss 0.3304257 +| epoch 9 | 1859/ 8400 batches | train loss 0.3065697 +| epoch 9 | 1863/ 8400 batches | train loss 0.2851033 +| epoch 9 | 1867/ 8400 batches | train loss 0.3324034 +| epoch 9 | 1871/ 8400 batches | train loss 0.2979855 +| epoch 9 | 1875/ 8400 batches | train loss 0.2751870 +| epoch 9 | 1879/ 8400 batches | train loss 0.3091437 +| epoch 9 | 1883/ 8400 batches | train loss 0.3795793 +| epoch 9 | 1887/ 8400 batches | train loss 0.3459736 +| epoch 9 | 1891/ 8400 batches | train loss 0.3297790 +| epoch 9 | 1895/ 8400 batches | train loss 0.3817345 +| epoch 9 | 1899/ 8400 batches | train loss 0.3188682 +| epoch 9 | 1903/ 8400 batches | train loss 0.3117543 +| epoch 9 | 1907/ 8400 batches | train loss 0.3675669 +| epoch 9 | 1911/ 8400 batches | train loss 0.3407815 +| epoch 9 | 1915/ 8400 batches | train loss 0.3333880 +| epoch 9 | 1919/ 8400 batches | train loss 0.2996119 +| epoch 9 | 1923/ 8400 batches | train loss 0.3015037 +| epoch 9 | 1927/ 8400 batches | train loss 0.3408118 +| epoch 9 | 1931/ 8400 batches | train loss 0.3623404 +| epoch 9 | 1935/ 8400 batches | train loss 0.2797964 +| epoch 9 | 1939/ 8400 batches | train loss 0.3796307 +| epoch 9 | 1943/ 8400 batches | train loss 0.3390122 +| epoch 9 | 1947/ 8400 batches | train loss 0.3358727 +| epoch 9 | 1951/ 8400 batches | train loss 0.3324249 +| epoch 9 | 1955/ 8400 batches | train loss 0.2928627 +| epoch 9 | 1959/ 8400 batches | train loss 0.3376891 +| epoch 9 | 1963/ 8400 batches | train loss 0.2861364 +| epoch 9 | 1967/ 8400 batches | train loss 0.3098633 +| epoch 9 | 1971/ 8400 batches | train loss 0.2898753 +| epoch 9 | 1975/ 8400 batches | train loss 0.3214534 +| epoch 9 | 1979/ 8400 batches | train loss 0.3817836 +| epoch 9 | 1983/ 8400 batches | train loss 0.2936352 +| epoch 9 | 1987/ 8400 batches | train loss 0.3337902 +| epoch 9 | 1991/ 8400 batches | train loss 0.4216265 +| epoch 9 | 1995/ 8400 batches | train loss 0.4086743 +| epoch 9 | 1999/ 8400 batches | train loss 0.2884600 +| epoch 9 | 2003/ 8400 batches | train loss 0.2955344 +| epoch 9 | 2007/ 8400 batches | train loss 0.2988079 +| epoch 9 | 2011/ 8400 batches | train loss 0.2482650 +| epoch 9 | 2015/ 8400 batches | train loss 0.3090893 +| epoch 9 | 2019/ 8400 batches | train loss 0.3413030 +| epoch 9 | 2023/ 8400 batches | train loss 0.3292458 +| epoch 9 | 2027/ 8400 batches | train loss 0.2799543 +| epoch 9 | 2031/ 8400 batches | train loss 0.3426792 +| epoch 9 | 2035/ 8400 batches | train loss 0.3025354 +| epoch 9 | 2039/ 8400 batches | train loss 0.3010457 +| epoch 9 | 2043/ 8400 batches | train loss 0.3305432 +| epoch 9 | 2047/ 8400 batches | train loss 0.3311957 +| epoch 9 | 2051/ 8400 batches | train loss 0.3475906 +| epoch 9 | 2055/ 8400 batches | train loss 0.3247603 +| epoch 9 | 2059/ 8400 batches | train loss 0.3681597 +| epoch 9 | 2063/ 8400 batches | train loss 0.2908190 +| epoch 9 | 2067/ 8400 batches | train loss 0.4299219 +| epoch 9 | 2071/ 8400 batches | train loss 0.3027369 +| epoch 9 | 2075/ 8400 batches | train loss 0.3519669 +| epoch 9 | 2079/ 8400 batches | train loss 0.2572871 +| epoch 9 | 2083/ 8400 batches | train loss 0.2784165 +| epoch 9 | 2087/ 8400 batches | train loss 0.3371117 +| epoch 9 | 2091/ 8400 batches | train loss 0.2875986 +| epoch 9 | 2095/ 8400 batches | train loss 0.3111951 +| epoch 9 | 2099/ 8400 batches | train loss 0.3044473 +| epoch 9 | 2103/ 8400 batches | train loss 0.3412541 +| epoch 9 | 2107/ 8400 batches | train loss 0.3395640 +| epoch 9 | 2111/ 8400 batches | train loss 0.3569200 +| epoch 9 | 2115/ 8400 batches | train loss 0.3124006 +| epoch 9 | 2119/ 8400 batches | train loss 0.3423067 +| epoch 9 | 2123/ 8400 batches | train loss 0.3751627 +| epoch 9 | 2127/ 8400 batches | train loss 0.3082539 +| epoch 9 | 2131/ 8400 batches | train loss 0.2899759 +| epoch 9 | 2135/ 8400 batches | train loss 0.2879244 +| epoch 9 | 2139/ 8400 batches | train loss 0.3377308 +| epoch 9 | 2143/ 8400 batches | train loss 0.3138929 +| epoch 9 | 2147/ 8400 batches | train loss 0.3404245 +| epoch 9 | 2151/ 8400 batches | train loss 0.3750820 +| epoch 9 | 2155/ 8400 batches | train loss 0.2937324 +| epoch 9 | 2159/ 8400 batches | train loss 0.3030630 +| epoch 9 | 2163/ 8400 batches | train loss 0.3834774 +| epoch 9 | 2167/ 8400 batches | train loss 0.3652044 +| epoch 9 | 2171/ 8400 batches | train loss 0.3050074 +| epoch 9 | 2175/ 8400 batches | train loss 0.3387197 +| epoch 9 | 2179/ 8400 batches | train loss 0.3479123 +| epoch 9 | 2183/ 8400 batches | train loss 0.3189906 +| epoch 9 | 2187/ 8400 batches | train loss 0.3069542 +| epoch 9 | 2191/ 8400 batches | train loss 0.2604843 +| epoch 9 | 2195/ 8400 batches | train loss 0.3423128 +| epoch 9 | 2199/ 8400 batches | train loss 0.3438783 +| epoch 9 | 2203/ 8400 batches | train loss 0.3387448 +| epoch 9 | 2207/ 8400 batches | train loss 0.3001125 +| epoch 9 | 2211/ 8400 batches | train loss 0.3606575 +| epoch 9 | 2215/ 8400 batches | train loss 0.3485336 +| epoch 9 | 2219/ 8400 batches | train loss 0.2964124 +| epoch 9 | 2223/ 8400 batches | train loss 0.2983556 +| epoch 9 | 2227/ 8400 batches | train loss 0.2812890 +| epoch 9 | 2231/ 8400 batches | train loss 0.3592098 +| epoch 9 | 2235/ 8400 batches | train loss 0.2983950 +| epoch 9 | 2239/ 8400 batches | train loss 0.3039990 +| epoch 9 | 2243/ 8400 batches | train loss 0.2953983 +| epoch 9 | 2247/ 8400 batches | train loss 0.3073996 +| epoch 9 | 2251/ 8400 batches | train loss 0.3475273 +| epoch 9 | 2255/ 8400 batches | train loss 0.3271187 +| epoch 9 | 2259/ 8400 batches | train loss 0.3576604 +| epoch 9 | 2263/ 8400 batches | train loss 0.3462393 +| epoch 9 | 2267/ 8400 batches | train loss 0.2891939 +| epoch 9 | 2271/ 8400 batches | train loss 0.3632579 +| epoch 9 | 2275/ 8400 batches | train loss 0.3063518 +| epoch 9 | 2279/ 8400 batches | train loss 0.3312970 +| epoch 9 | 2283/ 8400 batches | train loss 0.2308781 +| epoch 9 | 2287/ 8400 batches | train loss 0.1241444 +| epoch 9 | 2291/ 8400 batches | train loss 0.3127155 +| epoch 9 | 2295/ 8400 batches | train loss 0.3365210 +| epoch 9 | 2299/ 8400 batches | train loss 0.3061746 +| epoch 9 | 2303/ 8400 batches | train loss 0.2747366 +| epoch 9 | 2307/ 8400 batches | train loss 0.2775969 +| epoch 9 | 2311/ 8400 batches | train loss 0.3360622 +| epoch 9 | 2315/ 8400 batches | train loss 0.3538421 +| epoch 9 | 2319/ 8400 batches | train loss 0.3107627 +| epoch 9 | 2323/ 8400 batches | train loss 0.3717455 +| epoch 9 | 2327/ 8400 batches | train loss 0.3117676 +| epoch 9 | 2331/ 8400 batches | train loss 0.3480581 +| epoch 9 | 2335/ 8400 batches | train loss 0.2349142 +| epoch 9 | 2339/ 8400 batches | train loss 0.2839646 +| epoch 9 | 2343/ 8400 batches | train loss 0.3504793 +| epoch 9 | 2347/ 8400 batches | train loss 0.2912489 +| epoch 9 | 2351/ 8400 batches | train loss 0.2961378 +| epoch 9 | 2355/ 8400 batches | train loss 0.3480769 +| epoch 9 | 2359/ 8400 batches | train loss 0.2901185 +| epoch 9 | 2363/ 8400 batches | train loss 0.2676111 +| epoch 9 | 2367/ 8400 batches | train loss 0.3244315 +| epoch 9 | 2371/ 8400 batches | train loss 0.3091029 +| epoch 9 | 2375/ 8400 batches | train loss 0.2964165 +| epoch 9 | 2379/ 8400 batches | train loss 0.3030692 +| epoch 9 | 2383/ 8400 batches | train loss 0.3138568 +| epoch 9 | 2387/ 8400 batches | train loss 0.3519423 +| epoch 9 | 2391/ 8400 batches | train loss 0.3516970 +| epoch 9 | 2395/ 8400 batches | train loss 0.3838741 +| epoch 9 | 2399/ 8400 batches | train loss 0.3059000 +| epoch 9 | 2403/ 8400 batches | train loss 0.4419430 +| epoch 9 | 2407/ 8400 batches | train loss 0.3900512 +| epoch 9 | 2411/ 8400 batches | train loss 0.3388653 +| epoch 9 | 2415/ 8400 batches | train loss 0.3692690 +| epoch 9 | 2419/ 8400 batches | train loss 0.3968557 +| epoch 9 | 2423/ 8400 batches | train loss 0.3182368 +| epoch 9 | 2427/ 8400 batches | train loss 0.3820722 +| epoch 9 | 2431/ 8400 batches | train loss 0.3645486 +| epoch 9 | 2435/ 8400 batches | train loss 0.3482310 +| epoch 9 | 2439/ 8400 batches | train loss 0.3314755 +| epoch 9 | 2443/ 8400 batches | train loss 0.3506190 +| epoch 9 | 2447/ 8400 batches | train loss 0.3947177 +| epoch 9 | 2451/ 8400 batches | train loss 0.3746826 +| epoch 9 | 2455/ 8400 batches | train loss 0.3151735 +| epoch 9 | 2459/ 8400 batches | train loss 0.3726019 +| epoch 9 | 2463/ 8400 batches | train loss 0.3159141 +| epoch 9 | 2467/ 8400 batches | train loss 0.3466252 +| epoch 9 | 2471/ 8400 batches | train loss 0.3151475 +| epoch 9 | 2475/ 8400 batches | train loss 0.3475961 +| epoch 9 | 2479/ 8400 batches | train loss 0.2981435 +| epoch 9 | 2483/ 8400 batches | train loss 0.3831673 +| epoch 9 | 2487/ 8400 batches | train loss 0.3012773 +| epoch 9 | 2491/ 8400 batches | train loss 0.3376228 +| epoch 9 | 2495/ 8400 batches | train loss 0.3392161 +| epoch 9 | 2499/ 8400 batches | train loss 0.3681822 +| epoch 9 | 2503/ 8400 batches | train loss 0.3432063 +| epoch 9 | 2507/ 8400 batches | train loss 0.3502221 +| epoch 9 | 2511/ 8400 batches | train loss 0.3626545 +| epoch 9 | 2515/ 8400 batches | train loss 0.3404827 +| epoch 9 | 2519/ 8400 batches | train loss 0.3481567 +| epoch 9 | 2523/ 8400 batches | train loss 0.3046038 +| epoch 9 | 2527/ 8400 batches | train loss 0.2991578 +| epoch 9 | 2531/ 8400 batches | train loss 0.3392708 +| epoch 9 | 2535/ 8400 batches | train loss 0.3349473 +| epoch 9 | 2539/ 8400 batches | train loss 0.3013845 +| epoch 9 | 2543/ 8400 batches | train loss 0.3611997 +| epoch 9 | 2547/ 8400 batches | train loss 0.3161693 +| epoch 9 | 2551/ 8400 batches | train loss 0.3630527 +| epoch 9 | 2555/ 8400 batches | train loss 0.3057144 +| epoch 9 | 2559/ 8400 batches | train loss 0.3662020 +| epoch 9 | 2563/ 8400 batches | train loss 0.3084677 +| epoch 9 | 2567/ 8400 batches | train loss 0.4308953 +| epoch 9 | 2571/ 8400 batches | train loss 0.3258379 +| epoch 9 | 2575/ 8400 batches | train loss 0.3259834 +| epoch 9 | 2579/ 8400 batches | train loss 0.3072490 +| epoch 9 | 2583/ 8400 batches | train loss 0.3539826 +| epoch 9 | 2587/ 8400 batches | train loss 0.2715373 +| epoch 9 | 2591/ 8400 batches | train loss 0.3271966 +| epoch 9 | 2595/ 8400 batches | train loss 0.3439538 +| epoch 9 | 2599/ 8400 batches | train loss 0.3133799 +| epoch 9 | 2603/ 8400 batches | train loss 0.2828827 +| epoch 9 | 2607/ 8400 batches | train loss 0.3361513 +| epoch 9 | 2611/ 8400 batches | train loss 0.3806006 +| epoch 9 | 2615/ 8400 batches | train loss 0.3420975 +| epoch 9 | 2619/ 8400 batches | train loss 0.2641940 +| epoch 9 | 2623/ 8400 batches | train loss 0.3690368 +| epoch 9 | 2627/ 8400 batches | train loss 0.3566446 +| epoch 9 | 2631/ 8400 batches | train loss 0.3192141 +| epoch 9 | 2635/ 8400 batches | train loss 0.3233749 +| epoch 9 | 2639/ 8400 batches | train loss 0.2927790 +| epoch 9 | 2643/ 8400 batches | train loss 0.3497320 +| epoch 9 | 2647/ 8400 batches | train loss 0.3725038 +| epoch 9 | 2651/ 8400 batches | train loss 0.3647366 +| epoch 9 | 2655/ 8400 batches | train loss 0.2750024 +| epoch 9 | 2659/ 8400 batches | train loss 0.3503792 +| epoch 9 | 2663/ 8400 batches | train loss 0.3010438 +| epoch 9 | 2667/ 8400 batches | train loss 0.3218686 +| epoch 9 | 2671/ 8400 batches | train loss 0.4871346 +| epoch 9 | 2675/ 8400 batches | train loss 0.3877553 +| epoch 9 | 2679/ 8400 batches | train loss 0.3870138 +| epoch 9 | 2683/ 8400 batches | train loss 0.2946904 +| epoch 9 | 2687/ 8400 batches | train loss 0.2953611 +| epoch 9 | 2691/ 8400 batches | train loss 0.2892607 +| epoch 9 | 2695/ 8400 batches | train loss 0.3836292 +| epoch 9 | 2699/ 8400 batches | train loss 0.3390056 +| epoch 9 | 2703/ 8400 batches | train loss 0.3463488 +| epoch 9 | 2707/ 8400 batches | train loss 0.4001392 +| epoch 9 | 2711/ 8400 batches | train loss 0.3580527 +| epoch 9 | 2715/ 8400 batches | train loss 0.3279266 +| epoch 9 | 2719/ 8400 batches | train loss 0.3353188 +| epoch 9 | 2723/ 8400 batches | train loss 0.3582187 +| epoch 9 | 2727/ 8400 batches | train loss 0.3575706 +| epoch 9 | 2731/ 8400 batches | train loss 0.3870617 +| epoch 9 | 2735/ 8400 batches | train loss 0.3500049 +| epoch 9 | 2739/ 8400 batches | train loss 0.3296695 +| epoch 9 | 2743/ 8400 batches | train loss 0.3040551 +| epoch 9 | 2747/ 8400 batches | train loss 0.2646127 +| epoch 9 | 2751/ 8400 batches | train loss 0.3609497 +| epoch 9 | 2755/ 8400 batches | train loss 0.3516904 +| epoch 9 | 2759/ 8400 batches | train loss 0.4351728 +| epoch 9 | 2763/ 8400 batches | train loss 0.1485495 +| epoch 9 | 2767/ 8400 batches | train loss 0.3393586 +| epoch 9 | 2771/ 8400 batches | train loss 0.3165877 +| epoch 9 | 2775/ 8400 batches | train loss 0.2220326 +| epoch 9 | 2779/ 8400 batches | train loss 0.3631410 +| epoch 9 | 2783/ 8400 batches | train loss 0.3218289 +| epoch 9 | 2787/ 8400 batches | train loss 0.2958736 +| epoch 9 | 2791/ 8400 batches | train loss 0.3536211 +| epoch 9 | 2795/ 8400 batches | train loss 0.3209295 +| epoch 9 | 2799/ 8400 batches | train loss 0.3789746 +| epoch 9 | 2803/ 8400 batches | train loss 0.3589101 +| epoch 9 | 2807/ 8400 batches | train loss 0.3266153 +| epoch 9 | 2811/ 8400 batches | train loss 0.2757877 +| epoch 9 | 2815/ 8400 batches | train loss 0.3380311 +| epoch 9 | 2819/ 8400 batches | train loss 0.3359747 +| epoch 9 | 2823/ 8400 batches | train loss 0.4079196 +| epoch 9 | 2827/ 8400 batches | train loss 0.3567376 +| epoch 9 | 2831/ 8400 batches | train loss 0.2663171 +| epoch 9 | 2835/ 8400 batches | train loss 0.3132963 +| epoch 9 | 2839/ 8400 batches | train loss 0.3753224 +| epoch 9 | 2843/ 8400 batches | train loss 0.3352745 +| epoch 9 | 2847/ 8400 batches | train loss 0.3738966 +| epoch 9 | 2851/ 8400 batches | train loss 0.3892301 +| epoch 9 | 2855/ 8400 batches | train loss 0.3354591 +| epoch 9 | 2859/ 8400 batches | train loss 0.3327015 +| epoch 9 | 2863/ 8400 batches | train loss 0.3338456 +| epoch 9 | 2867/ 8400 batches | train loss 0.3100615 +| epoch 9 | 2871/ 8400 batches | train loss 0.3300371 +| epoch 9 | 2875/ 8400 batches | train loss 0.3488676 +| epoch 9 | 2879/ 8400 batches | train loss 0.3786435 +| epoch 9 | 2883/ 8400 batches | train loss 0.3451056 +| epoch 9 | 2887/ 8400 batches | train loss 0.3204532 +| epoch 9 | 2891/ 8400 batches | train loss 0.2923050 +| epoch 9 | 2895/ 8400 batches | train loss 0.3561472 +| epoch 9 | 2899/ 8400 batches | train loss 0.3468804 +| epoch 9 | 2903/ 8400 batches | train loss 0.3262382 +| epoch 9 | 2907/ 8400 batches | train loss 0.2825352 +| epoch 9 | 2911/ 8400 batches | train loss 0.2976770 +| epoch 9 | 2915/ 8400 batches | train loss 0.3345344 +| epoch 9 | 2919/ 8400 batches | train loss 0.3096892 +| epoch 9 | 2923/ 8400 batches | train loss 0.3557639 +| epoch 9 | 2927/ 8400 batches | train loss 0.4510619 +| epoch 9 | 2931/ 8400 batches | train loss 0.3176382 +| epoch 9 | 2935/ 8400 batches | train loss 0.2908082 +| epoch 9 | 2939/ 8400 batches | train loss 0.3421798 +| epoch 9 | 2943/ 8400 batches | train loss 0.3367148 +| epoch 9 | 2947/ 8400 batches | train loss 0.3369777 +| epoch 9 | 2951/ 8400 batches | train loss 0.3087385 +| epoch 9 | 2955/ 8400 batches | train loss 0.3115143 +| epoch 9 | 2959/ 8400 batches | train loss 0.2928072 +| epoch 9 | 2963/ 8400 batches | train loss 0.3566884 +| epoch 9 | 2967/ 8400 batches | train loss 0.2912905 +| epoch 9 | 2971/ 8400 batches | train loss 0.3067365 +| epoch 9 | 2975/ 8400 batches | train loss 0.3872415 +| epoch 9 | 2979/ 8400 batches | train loss 0.2932721 +| epoch 9 | 2983/ 8400 batches | train loss 0.3378059 +| epoch 9 | 2987/ 8400 batches | train loss 0.3549313 +| epoch 9 | 2991/ 8400 batches | train loss 0.3498650 +| epoch 9 | 2995/ 8400 batches | train loss 0.4099276 +| epoch 9 | 2999/ 8400 batches | train loss 0.3058195 +| epoch 9 | 3003/ 8400 batches | train loss 0.2870365 +| epoch 9 | 3007/ 8400 batches | train loss 0.3801158 +| epoch 9 | 3011/ 8400 batches | train loss 0.2921956 +| epoch 9 | 3015/ 8400 batches | train loss 0.3057955 +| epoch 9 | 3019/ 8400 batches | train loss 0.3213804 +| epoch 9 | 3023/ 8400 batches | train loss 0.3395408 +| epoch 9 | 3027/ 8400 batches | train loss 0.2996821 +| epoch 9 | 3031/ 8400 batches | train loss 0.3151914 +| epoch 9 | 3035/ 8400 batches | train loss 0.3458084 +| epoch 9 | 3039/ 8400 batches | train loss 0.3502149 +| epoch 9 | 3043/ 8400 batches | train loss 0.3145550 +| epoch 9 | 3047/ 8400 batches | train loss 0.3369133 +| epoch 9 | 3051/ 8400 batches | train loss 0.3307965 +| epoch 9 | 3055/ 8400 batches | train loss 0.3837764 +| epoch 9 | 3059/ 8400 batches | train loss 0.3375855 +| epoch 9 | 3063/ 8400 batches | train loss 0.3240874 +| epoch 9 | 3067/ 8400 batches | train loss 0.3649125 +| epoch 9 | 3071/ 8400 batches | train loss 0.4342642 +| epoch 9 | 3075/ 8400 batches | train loss 0.3545592 +| epoch 9 | 3079/ 8400 batches | train loss 0.3550148 +| epoch 9 | 3083/ 8400 batches | train loss 0.3859587 +| epoch 9 | 3087/ 8400 batches | train loss 0.3357264 +| epoch 9 | 3091/ 8400 batches | train loss 0.3196119 +| epoch 9 | 3095/ 8400 batches | train loss 0.3806430 +| epoch 9 | 3099/ 8400 batches | train loss 0.3651941 +| epoch 9 | 3103/ 8400 batches | train loss 0.3382763 +| epoch 9 | 3107/ 8400 batches | train loss 0.2526171 +| epoch 9 | 3111/ 8400 batches | train loss 0.3651076 +| epoch 9 | 3115/ 8400 batches | train loss 0.3327453 +| epoch 9 | 3119/ 8400 batches | train loss 0.3333441 +| epoch 9 | 3123/ 8400 batches | train loss 0.3423268 +| epoch 9 | 3127/ 8400 batches | train loss 0.3944024 +| epoch 9 | 3131/ 8400 batches | train loss 0.3752345 +| epoch 9 | 3135/ 8400 batches | train loss 0.3660021 +| epoch 9 | 3139/ 8400 batches | train loss 0.3238413 +| epoch 9 | 3143/ 8400 batches | train loss 0.3381101 +| epoch 9 | 3147/ 8400 batches | train loss 0.3650397 +| epoch 9 | 3151/ 8400 batches | train loss 0.3630634 +| epoch 9 | 3155/ 8400 batches | train loss 0.2971288 +| epoch 9 | 3159/ 8400 batches | train loss 0.3504775 +| epoch 9 | 3163/ 8400 batches | train loss 0.2950619 +| epoch 9 | 3167/ 8400 batches | train loss 0.2994712 +| epoch 9 | 3171/ 8400 batches | train loss 0.3016714 +| epoch 9 | 3175/ 8400 batches | train loss 0.3636076 +| epoch 9 | 3179/ 8400 batches | train loss 0.2809286 +| epoch 9 | 3183/ 8400 batches | train loss 0.3368153 +| epoch 9 | 3187/ 8400 batches | train loss 0.3459106 +| epoch 9 | 3191/ 8400 batches | train loss 0.3465146 +| epoch 9 | 3195/ 8400 batches | train loss 0.3203338 +| epoch 9 | 3199/ 8400 batches | train loss 0.3583932 +| epoch 9 | 3203/ 8400 batches | train loss 0.3556226 +| epoch 9 | 3207/ 8400 batches | train loss 0.3689443 +| epoch 9 | 3211/ 8400 batches | train loss 0.2821860 +| epoch 9 | 3215/ 8400 batches | train loss 0.3424517 +| epoch 9 | 3219/ 8400 batches | train loss 0.3146729 +| epoch 9 | 3223/ 8400 batches | train loss 0.3036447 +| epoch 9 | 3227/ 8400 batches | train loss 0.4035252 +| epoch 9 | 3231/ 8400 batches | train loss 0.3619079 +| epoch 9 | 3235/ 8400 batches | train loss 0.3597859 +| epoch 9 | 3239/ 8400 batches | train loss 0.3537485 +| epoch 9 | 3243/ 8400 batches | train loss 0.3117945 +| epoch 9 | 3247/ 8400 batches | train loss 0.3819113 +| epoch 9 | 3251/ 8400 batches | train loss 0.3661761 +| epoch 9 | 3255/ 8400 batches | train loss 0.3861376 +| epoch 9 | 3259/ 8400 batches | train loss 0.3478377 +| epoch 9 | 3263/ 8400 batches | train loss 0.3800462 +| epoch 9 | 3267/ 8400 batches | train loss 0.3784947 +| epoch 9 | 3271/ 8400 batches | train loss 0.3632710 +| epoch 9 | 3275/ 8400 batches | train loss 0.3673452 +| epoch 9 | 3279/ 8400 batches | train loss 0.3302515 +| epoch 9 | 3283/ 8400 batches | train loss 0.3310211 +| epoch 9 | 3287/ 8400 batches | train loss 0.3304595 +| epoch 9 | 3291/ 8400 batches | train loss 0.3228293 +| epoch 9 | 3295/ 8400 batches | train loss 0.3284991 +| epoch 9 | 3299/ 8400 batches | train loss 0.3002884 +| epoch 9 | 3303/ 8400 batches | train loss 0.2992890 +| epoch 9 | 3307/ 8400 batches | train loss 0.3551488 +| epoch 9 | 3311/ 8400 batches | train loss 0.2940109 +| epoch 9 | 3315/ 8400 batches | train loss 0.3427705 +| epoch 9 | 3319/ 8400 batches | train loss 0.2975146 +| epoch 9 | 3323/ 8400 batches | train loss 0.3341986 +| epoch 9 | 3327/ 8400 batches | train loss 0.3506497 +| epoch 9 | 3331/ 8400 batches | train loss 0.3590007 +| epoch 9 | 3335/ 8400 batches | train loss 0.2937575 +| epoch 9 | 3339/ 8400 batches | train loss 0.3233777 +| epoch 9 | 3343/ 8400 batches | train loss 0.3652330 +| epoch 9 | 3347/ 8400 batches | train loss 0.3245777 +| epoch 9 | 3351/ 8400 batches | train loss 0.3098678 +| epoch 9 | 3355/ 8400 batches | train loss 0.3528350 +| epoch 9 | 3359/ 8400 batches | train loss 0.3482797 +| epoch 9 | 3363/ 8400 batches | train loss 0.2784180 +| epoch 9 | 3367/ 8400 batches | train loss 0.3666930 +| epoch 9 | 3371/ 8400 batches | train loss 0.4146609 +| epoch 9 | 3375/ 8400 batches | train loss 0.4586428 +| epoch 9 | 3379/ 8400 batches | train loss 0.3127066 +| epoch 9 | 3383/ 8400 batches | train loss 0.3442450 +| epoch 9 | 3387/ 8400 batches | train loss 0.4370479 +| epoch 9 | 3391/ 8400 batches | train loss 0.3359982 +| epoch 9 | 3395/ 8400 batches | train loss 0.3696595 +| epoch 9 | 3399/ 8400 batches | train loss 0.3133548 +| epoch 9 | 3403/ 8400 batches | train loss 0.2865010 +| epoch 9 | 3407/ 8400 batches | train loss 0.3588465 +| epoch 9 | 3411/ 8400 batches | train loss 0.2994526 +| epoch 9 | 3415/ 8400 batches | train loss 0.3484690 +| epoch 9 | 3419/ 8400 batches | train loss 0.3055624 +| epoch 9 | 3423/ 8400 batches | train loss 0.2791951 +| epoch 9 | 3427/ 8400 batches | train loss 0.4315669 +| epoch 9 | 3431/ 8400 batches | train loss 0.3803087 +| epoch 9 | 3435/ 8400 batches | train loss 0.3525719 +| epoch 9 | 3439/ 8400 batches | train loss 0.2943657 +| epoch 9 | 3443/ 8400 batches | train loss 0.3808191 +| epoch 9 | 3447/ 8400 batches | train loss 0.3839751 +| epoch 9 | 3451/ 8400 batches | train loss 0.2973017 +| epoch 9 | 3455/ 8400 batches | train loss 0.3559082 +| epoch 9 | 3459/ 8400 batches | train loss 0.3514997 +| epoch 9 | 3463/ 8400 batches | train loss 0.3388076 +| epoch 9 | 3467/ 8400 batches | train loss 0.3255613 +| epoch 9 | 3471/ 8400 batches | train loss 0.2773155 +| epoch 9 | 3475/ 8400 batches | train loss 0.3601199 +| epoch 9 | 3479/ 8400 batches | train loss 0.3786498 +| epoch 9 | 3483/ 8400 batches | train loss 0.3430907 +| epoch 9 | 3487/ 8400 batches | train loss 0.2873979 +| epoch 9 | 3491/ 8400 batches | train loss 0.3316863 +| epoch 9 | 3495/ 8400 batches | train loss 0.3460472 +| epoch 9 | 3499/ 8400 batches | train loss 0.2958090 +| epoch 9 | 3503/ 8400 batches | train loss 0.3430594 +| epoch 9 | 3507/ 8400 batches | train loss 0.2765718 +| epoch 9 | 3511/ 8400 batches | train loss 0.3059635 +| epoch 9 | 3515/ 8400 batches | train loss 0.3290301 +| epoch 9 | 3519/ 8400 batches | train loss 0.3641256 +| epoch 9 | 3523/ 8400 batches | train loss 0.3065201 +| epoch 9 | 3527/ 8400 batches | train loss 0.3678564 +| epoch 9 | 3531/ 8400 batches | train loss 0.3305702 +| epoch 9 | 3535/ 8400 batches | train loss 0.3366444 +| epoch 9 | 3539/ 8400 batches | train loss 0.3574880 +| epoch 9 | 3543/ 8400 batches | train loss 0.3541743 +| epoch 9 | 3547/ 8400 batches | train loss 0.3957230 +| epoch 9 | 3551/ 8400 batches | train loss 0.3022595 +| epoch 9 | 3555/ 8400 batches | train loss 0.3277077 +| epoch 9 | 3559/ 8400 batches | train loss 0.3496553 +| epoch 9 | 3563/ 8400 batches | train loss 0.3446670 +| epoch 9 | 3567/ 8400 batches | train loss 0.3295896 +| epoch 9 | 3571/ 8400 batches | train loss 0.3287557 +| epoch 9 | 3575/ 8400 batches | train loss 0.3169627 +| epoch 9 | 3579/ 8400 batches | train loss 0.3437143 +| epoch 9 | 3583/ 8400 batches | train loss 0.3720996 +| epoch 9 | 3587/ 8400 batches | train loss 0.3139473 +| epoch 9 | 3591/ 8400 batches | train loss 0.3596864 +| epoch 9 | 3595/ 8400 batches | train loss 0.3839173 +| epoch 9 | 3599/ 8400 batches | train loss 0.3730606 +| epoch 9 | 3603/ 8400 batches | train loss 0.2859376 +| epoch 9 | 3607/ 8400 batches | train loss 0.3057840 +| epoch 9 | 3611/ 8400 batches | train loss 0.2900502 +| epoch 9 | 3615/ 8400 batches | train loss 0.3116399 +| epoch 9 | 3619/ 8400 batches | train loss 0.3513386 +| epoch 9 | 3623/ 8400 batches | train loss 0.3663940 +| epoch 9 | 3627/ 8400 batches | train loss 0.2964622 +| epoch 9 | 3631/ 8400 batches | train loss 0.3791637 +| epoch 9 | 3635/ 8400 batches | train loss 0.3892738 +| epoch 9 | 3639/ 8400 batches | train loss 0.2939382 +| epoch 9 | 3643/ 8400 batches | train loss 0.3210599 +| epoch 9 | 3647/ 8400 batches | train loss 0.3589262 +| epoch 9 | 3651/ 8400 batches | train loss 0.2856445 +| epoch 9 | 3655/ 8400 batches | train loss 0.3103030 +| epoch 9 | 3659/ 8400 batches | train loss 0.3469039 +| epoch 9 | 3663/ 8400 batches | train loss 0.3300420 +| epoch 9 | 3667/ 8400 batches | train loss 0.4035957 +| epoch 9 | 3671/ 8400 batches | train loss 0.3141629 +| epoch 9 | 3675/ 8400 batches | train loss 0.2879278 +| epoch 9 | 3679/ 8400 batches | train loss 0.3324468 +| epoch 9 | 3683/ 8400 batches | train loss 0.3422000 +| epoch 9 | 3687/ 8400 batches | train loss 0.3221504 +| epoch 9 | 3691/ 8400 batches | train loss 0.2937576 +| epoch 9 | 3695/ 8400 batches | train loss 0.3439583 +| epoch 9 | 3699/ 8400 batches | train loss 0.3150942 +| epoch 9 | 3703/ 8400 batches | train loss 0.3422337 +| epoch 9 | 3707/ 8400 batches | train loss 0.2942449 +| epoch 9 | 3711/ 8400 batches | train loss 0.3777138 +| epoch 9 | 3715/ 8400 batches | train loss 0.3271623 +| epoch 9 | 3719/ 8400 batches | train loss 0.2739097 +| epoch 9 | 3723/ 8400 batches | train loss 0.3532496 +| epoch 9 | 3727/ 8400 batches | train loss 0.3343631 +| epoch 9 | 3731/ 8400 batches | train loss 0.3138362 +| epoch 9 | 3735/ 8400 batches | train loss 0.4178193 +| epoch 9 | 3739/ 8400 batches | train loss 0.3488092 +| epoch 9 | 3743/ 8400 batches | train loss 0.3051540 +| epoch 9 | 3747/ 8400 batches | train loss 0.3357098 +| epoch 9 | 3751/ 8400 batches | train loss 0.3397353 +| epoch 9 | 3755/ 8400 batches | train loss 0.3880958 +| epoch 9 | 3759/ 8400 batches | train loss 0.2868965 +| epoch 9 | 3763/ 8400 batches | train loss 0.3877833 +| epoch 9 | 3767/ 8400 batches | train loss 0.2888086 +| epoch 9 | 3771/ 8400 batches | train loss 0.3290926 +| epoch 9 | 3775/ 8400 batches | train loss 0.3143364 +| epoch 9 | 3779/ 8400 batches | train loss 0.2974554 +| epoch 9 | 3783/ 8400 batches | train loss 0.3280996 +| epoch 9 | 3787/ 8400 batches | train loss 0.3626040 +| epoch 9 | 3791/ 8400 batches | train loss 0.3144460 +| epoch 9 | 3795/ 8400 batches | train loss 0.3422332 +| epoch 9 | 3799/ 8400 batches | train loss 0.3399472 +| epoch 9 | 3803/ 8400 batches | train loss 0.4306685 +| epoch 9 | 3807/ 8400 batches | train loss 0.3502047 +| epoch 9 | 3811/ 8400 batches | train loss 0.3907483 +| epoch 9 | 3815/ 8400 batches | train loss 0.3968690 +| epoch 9 | 3819/ 8400 batches | train loss 0.3148399 +| epoch 9 | 3823/ 8400 batches | train loss 0.3091987 +| epoch 9 | 3827/ 8400 batches | train loss 0.3307419 +| epoch 9 | 3831/ 8400 batches | train loss 0.3582120 +| epoch 9 | 3835/ 8400 batches | train loss 0.3419456 +| epoch 9 | 3839/ 8400 batches | train loss 0.3644652 +| epoch 9 | 3843/ 8400 batches | train loss 0.2769817 +| epoch 9 | 3847/ 8400 batches | train loss 0.3351330 +| epoch 9 | 3851/ 8400 batches | train loss 0.3086787 +| epoch 9 | 3855/ 8400 batches | train loss 0.3609132 +| epoch 9 | 3859/ 8400 batches | train loss 0.3077836 +| epoch 9 | 3863/ 8400 batches | train loss 0.3708902 +| epoch 9 | 3867/ 8400 batches | train loss 0.2970266 +| epoch 9 | 3871/ 8400 batches | train loss 0.3581950 +| epoch 9 | 3875/ 8400 batches | train loss 0.3339592 +| epoch 9 | 3879/ 8400 batches | train loss 0.3155890 +| epoch 9 | 3883/ 8400 batches | train loss 0.3570780 +| epoch 9 | 3887/ 8400 batches | train loss 0.2930927 +| epoch 9 | 3891/ 8400 batches | train loss 0.3245234 +| epoch 9 | 3895/ 8400 batches | train loss 0.3132654 +| epoch 9 | 3899/ 8400 batches | train loss 0.3975306 +| epoch 9 | 3903/ 8400 batches | train loss 0.3289833 +| epoch 9 | 3907/ 8400 batches | train loss 0.3595169 +| epoch 9 | 3911/ 8400 batches | train loss 0.3311959 +| epoch 9 | 3915/ 8400 batches | train loss 0.3599156 +| epoch 9 | 3919/ 8400 batches | train loss 0.3337859 +| epoch 9 | 3923/ 8400 batches | train loss 0.3998087 +| epoch 9 | 3927/ 8400 batches | train loss 0.2994996 +| epoch 9 | 3931/ 8400 batches | train loss 0.3900086 +| epoch 9 | 3935/ 8400 batches | train loss 0.3679683 +| epoch 9 | 3939/ 8400 batches | train loss 0.3803188 +| epoch 9 | 3943/ 8400 batches | train loss 0.3596917 +| epoch 9 | 3947/ 8400 batches | train loss 0.3878525 +| epoch 9 | 3951/ 8400 batches | train loss 0.3991938 +| epoch 9 | 3955/ 8400 batches | train loss 0.4279783 +| epoch 9 | 3959/ 8400 batches | train loss 0.3435353 +| epoch 9 | 3963/ 8400 batches | train loss 0.3430558 +| epoch 9 | 3967/ 8400 batches | train loss 0.3605105 +| epoch 9 | 3971/ 8400 batches | train loss 0.3813927 +| epoch 9 | 3975/ 8400 batches | train loss 0.3218703 +| epoch 9 | 3979/ 8400 batches | train loss 0.3952800 +| epoch 9 | 3983/ 8400 batches | train loss 0.4484503 +| epoch 9 | 3987/ 8400 batches | train loss 0.3429776 +| epoch 9 | 3991/ 8400 batches | train loss 0.3109656 +| epoch 9 | 3995/ 8400 batches | train loss 0.3382179 +| epoch 9 | 3999/ 8400 batches | train loss 0.3293783 +| epoch 9 | 4003/ 8400 batches | train loss 0.2900083 +| epoch 9 | 4007/ 8400 batches | train loss 0.3318962 +| epoch 9 | 4011/ 8400 batches | train loss 0.3541174 +| epoch 9 | 4015/ 8400 batches | train loss 0.3666814 +| epoch 9 | 4019/ 8400 batches | train loss 0.3784045 +| epoch 9 | 4023/ 8400 batches | train loss 0.3967748 +| epoch 9 | 4027/ 8400 batches | train loss 0.2889687 +| epoch 9 | 4031/ 8400 batches | train loss 0.3154773 +| epoch 9 | 4035/ 8400 batches | train loss 0.3680717 +| epoch 9 | 4039/ 8400 batches | train loss 0.2771226 +| epoch 9 | 4043/ 8400 batches | train loss 0.3329329 +| epoch 9 | 4047/ 8400 batches | train loss 0.3653777 +| epoch 9 | 4051/ 8400 batches | train loss 0.2321484 +| epoch 9 | 4055/ 8400 batches | train loss 0.3228984 +| epoch 9 | 4059/ 8400 batches | train loss 0.3253887 +| epoch 9 | 4063/ 8400 batches | train loss 0.3255244 +| epoch 9 | 4067/ 8400 batches | train loss 0.2628089 +| epoch 9 | 4071/ 8400 batches | train loss 0.3304754 +| epoch 9 | 4075/ 8400 batches | train loss 0.3448857 +| epoch 9 | 4079/ 8400 batches | train loss 0.2814434 +| epoch 9 | 4083/ 8400 batches | train loss 0.3872565 +| epoch 9 | 4087/ 8400 batches | train loss 0.3402383 +| epoch 9 | 4091/ 8400 batches | train loss 0.3577543 +| epoch 9 | 4095/ 8400 batches | train loss 0.3739138 +| epoch 9 | 4099/ 8400 batches | train loss 0.3382933 +| epoch 9 | 4103/ 8400 batches | train loss 0.3385164 +| epoch 9 | 4107/ 8400 batches | train loss 0.3112281 +| epoch 9 | 4111/ 8400 batches | train loss 0.3400933 +| epoch 9 | 4115/ 8400 batches | train loss 0.3320427 +| epoch 9 | 4119/ 8400 batches | train loss 0.2890424 +| epoch 9 | 4123/ 8400 batches | train loss 0.3367426 +| epoch 9 | 4127/ 8400 batches | train loss 0.3078710 +| epoch 9 | 4131/ 8400 batches | train loss 0.2395802 +| epoch 9 | 4135/ 8400 batches | train loss 0.4110897 +| epoch 9 | 4139/ 8400 batches | train loss 0.3118158 +| epoch 9 | 4143/ 8400 batches | train loss 0.3431783 +| epoch 9 | 4147/ 8400 batches | train loss 0.3086887 +| epoch 9 | 4151/ 8400 batches | train loss 0.3081273 +| epoch 9 | 4155/ 8400 batches | train loss 0.3794965 +| epoch 9 | 4159/ 8400 batches | train loss 0.3286610 +| epoch 9 | 4163/ 8400 batches | train loss 0.3939370 +| epoch 9 | 4167/ 8400 batches | train loss 0.2968887 +| epoch 9 | 4171/ 8400 batches | train loss 0.4132889 +| epoch 9 | 4175/ 8400 batches | train loss 0.3184845 +| epoch 9 | 4179/ 8400 batches | train loss 0.3433226 +| epoch 9 | 4183/ 8400 batches | train loss 0.3079220 +| epoch 9 | 4187/ 8400 batches | train loss 0.3604339 +| epoch 9 | 4191/ 8400 batches | train loss 0.3336725 +| epoch 9 | 4195/ 8400 batches | train loss 0.3615320 +| epoch 9 | 4199/ 8400 batches | train loss 0.3243892 +| epoch 9 | 4203/ 8400 batches | train loss 0.3397099 +| epoch 9 | 4207/ 8400 batches | train loss 0.3045697 +| epoch 9 | 4211/ 8400 batches | train loss 0.3591240 +| epoch 9 | 4215/ 8400 batches | train loss 0.2892032 +| epoch 9 | 4219/ 8400 batches | train loss 0.3286292 +| epoch 9 | 4223/ 8400 batches | train loss 0.3514780 +| epoch 9 | 4227/ 8400 batches | train loss 0.3496807 +| epoch 9 | 4231/ 8400 batches | train loss 0.3402764 +| epoch 9 | 4235/ 8400 batches | train loss 0.3470052 +| epoch 9 | 4239/ 8400 batches | train loss 0.3123479 +| epoch 9 | 4243/ 8400 batches | train loss 0.3390001 +| epoch 9 | 4247/ 8400 batches | train loss 0.3783829 +| epoch 9 | 4251/ 8400 batches | train loss 0.2764835 +| epoch 9 | 4255/ 8400 batches | train loss 0.3078429 +| epoch 9 | 4259/ 8400 batches | train loss 0.3039506 +| epoch 9 | 4263/ 8400 batches | train loss 0.3559694 +| epoch 9 | 4267/ 8400 batches | train loss 0.2766154 +| epoch 9 | 4271/ 8400 batches | train loss 0.4000075 +| epoch 9 | 4275/ 8400 batches | train loss 0.3184448 +| epoch 9 | 4279/ 8400 batches | train loss 0.3218954 +| epoch 9 | 4283/ 8400 batches | train loss 0.3052509 +| epoch 9 | 4287/ 8400 batches | train loss 0.2865449 +| epoch 9 | 4291/ 8400 batches | train loss 0.2396228 +| epoch 9 | 4295/ 8400 batches | train loss 0.3068851 +| epoch 9 | 4299/ 8400 batches | train loss 0.3259107 +| epoch 9 | 4303/ 8400 batches | train loss 0.2863062 +| epoch 9 | 4307/ 8400 batches | train loss 0.3196868 +| epoch 9 | 4311/ 8400 batches | train loss 0.3433974 +| epoch 9 | 4315/ 8400 batches | train loss 0.3215822 +| epoch 9 | 4319/ 8400 batches | train loss 0.3791662 +| epoch 9 | 4323/ 8400 batches | train loss 0.2914475 +| epoch 9 | 4327/ 8400 batches | train loss 0.3352383 +| epoch 9 | 4331/ 8400 batches | train loss 0.3374227 +| epoch 9 | 4335/ 8400 batches | train loss 0.3252374 +| epoch 9 | 4339/ 8400 batches | train loss 0.3805656 +| epoch 9 | 4343/ 8400 batches | train loss 0.3077932 +| epoch 9 | 4347/ 8400 batches | train loss 0.3753368 +| epoch 9 | 4351/ 8400 batches | train loss 0.2996092 +| epoch 9 | 4355/ 8400 batches | train loss 0.3950612 +| epoch 9 | 4359/ 8400 batches | train loss 0.3618567 +| epoch 9 | 4363/ 8400 batches | train loss 0.3758373 +| epoch 9 | 4367/ 8400 batches | train loss 0.2306877 +| epoch 9 | 4371/ 8400 batches | train loss 0.3583829 +| epoch 9 | 4375/ 8400 batches | train loss 0.3007243 +| epoch 9 | 4379/ 8400 batches | train loss 0.2784863 +| epoch 9 | 4383/ 8400 batches | train loss 0.4078550 +| epoch 9 | 4387/ 8400 batches | train loss 0.2811485 +| epoch 9 | 4391/ 8400 batches | train loss 0.2426359 +| epoch 9 | 4395/ 8400 batches | train loss 0.3969916 +| epoch 9 | 4399/ 8400 batches | train loss 0.3711870 +| epoch 9 | 4403/ 8400 batches | train loss 0.3577989 +| epoch 9 | 4407/ 8400 batches | train loss 0.3895417 +| epoch 9 | 4411/ 8400 batches | train loss 0.3116063 +| epoch 9 | 4415/ 8400 batches | train loss 0.3500387 +| epoch 9 | 4419/ 8400 batches | train loss 0.3826052 +| epoch 9 | 4423/ 8400 batches | train loss 0.3237244 +| epoch 9 | 4427/ 8400 batches | train loss 0.3519200 +| epoch 9 | 4431/ 8400 batches | train loss 0.3484110 +| epoch 9 | 4435/ 8400 batches | train loss 0.3724715 +| epoch 9 | 4439/ 8400 batches | train loss 0.3534009 +| epoch 9 | 4443/ 8400 batches | train loss 0.3323839 +| epoch 9 | 4447/ 8400 batches | train loss 0.3256168 +| epoch 9 | 4451/ 8400 batches | train loss 0.3859843 +| epoch 9 | 4455/ 8400 batches | train loss 0.4200729 +| epoch 9 | 4459/ 8400 batches | train loss 0.3726053 +| epoch 9 | 4463/ 8400 batches | train loss 0.3109370 +| epoch 9 | 4467/ 8400 batches | train loss 0.3423923 +| epoch 9 | 4471/ 8400 batches | train loss 0.3131663 +| epoch 9 | 4475/ 8400 batches | train loss 0.3239742 +| epoch 9 | 4479/ 8400 batches | train loss 0.3296113 +| epoch 9 | 4483/ 8400 batches | train loss 0.3276095 +| epoch 9 | 4487/ 8400 batches | train loss 0.3256323 +| epoch 9 | 4491/ 8400 batches | train loss 0.3817265 +| epoch 9 | 4495/ 8400 batches | train loss 0.3131483 +| epoch 9 | 4499/ 8400 batches | train loss 0.4114239 +| epoch 9 | 4503/ 8400 batches | train loss 0.2822119 +| epoch 9 | 4507/ 8400 batches | train loss 0.3462433 +| epoch 9 | 4511/ 8400 batches | train loss 0.3305885 +| epoch 9 | 4515/ 8400 batches | train loss 0.3177109 +| epoch 9 | 4519/ 8400 batches | train loss 0.3884720 +| epoch 9 | 4523/ 8400 batches | train loss 0.3563592 +| epoch 9 | 4527/ 8400 batches | train loss 0.3173235 +| epoch 9 | 4531/ 8400 batches | train loss 0.3566502 +| epoch 9 | 4535/ 8400 batches | train loss 0.3204839 +| epoch 9 | 4539/ 8400 batches | train loss 0.3541063 +| epoch 9 | 4543/ 8400 batches | train loss 0.3356885 +| epoch 9 | 4547/ 8400 batches | train loss 0.3210967 +| epoch 9 | 4551/ 8400 batches | train loss 0.3093575 +| epoch 9 | 4555/ 8400 batches | train loss 0.3184457 +| epoch 9 | 4559/ 8400 batches | train loss 0.3671628 +| epoch 9 | 4563/ 8400 batches | train loss 0.1430525 +| epoch 9 | 4567/ 8400 batches | train loss 0.3086479 +| epoch 9 | 4571/ 8400 batches | train loss 0.3370104 +| epoch 9 | 4575/ 8400 batches | train loss 0.3268956 +| epoch 9 | 4579/ 8400 batches | train loss 0.3183239 +| epoch 9 | 4583/ 8400 batches | train loss 0.3239959 +| epoch 9 | 4587/ 8400 batches | train loss 0.4124119 +| epoch 9 | 4591/ 8400 batches | train loss 0.2344774 +| epoch 9 | 4595/ 8400 batches | train loss 0.3183871 +| epoch 9 | 4599/ 8400 batches | train loss 0.3706256 +| epoch 9 | 4603/ 8400 batches | train loss 0.2850568 +| epoch 9 | 4607/ 8400 batches | train loss 0.3632868 +| epoch 9 | 4611/ 8400 batches | train loss 0.3475043 +| epoch 9 | 4615/ 8400 batches | train loss 0.3251927 +| epoch 9 | 4619/ 8400 batches | train loss 0.2942979 +| epoch 9 | 4623/ 8400 batches | train loss 0.3007893 +| epoch 9 | 4627/ 8400 batches | train loss 0.2855542 +| epoch 9 | 4631/ 8400 batches | train loss 0.3412630 +| epoch 9 | 4635/ 8400 batches | train loss 0.3778579 +| epoch 9 | 4639/ 8400 batches | train loss 0.3967603 +| epoch 9 | 4643/ 8400 batches | train loss 0.2451936 +| epoch 9 | 4647/ 8400 batches | train loss 0.3706772 +| epoch 9 | 4651/ 8400 batches | train loss 0.4044448 +| epoch 9 | 4655/ 8400 batches | train loss 0.3425518 +| epoch 9 | 4659/ 8400 batches | train loss 0.3735067 +| epoch 9 | 4663/ 8400 batches | train loss 0.3847595 +| epoch 9 | 4667/ 8400 batches | train loss 0.3541074 +| epoch 9 | 4671/ 8400 batches | train loss 0.3521025 +| epoch 9 | 4675/ 8400 batches | train loss 0.3955657 +| epoch 9 | 4679/ 8400 batches | train loss 0.3456691 +| epoch 9 | 4683/ 8400 batches | train loss 0.3096536 +| epoch 9 | 4687/ 8400 batches | train loss 0.2219344 +| epoch 9 | 4691/ 8400 batches | train loss 0.3370252 +| epoch 9 | 4695/ 8400 batches | train loss 0.2813985 +| epoch 9 | 4699/ 8400 batches | train loss 0.3814884 +| epoch 9 | 4703/ 8400 batches | train loss 0.3200450 +| epoch 9 | 4707/ 8400 batches | train loss 0.3068353 +| epoch 9 | 4711/ 8400 batches | train loss 0.3352146 +| epoch 9 | 4715/ 8400 batches | train loss 0.3806578 +| epoch 9 | 4719/ 8400 batches | train loss 0.3734524 +| epoch 9 | 4723/ 8400 batches | train loss 0.3584622 +| epoch 9 | 4727/ 8400 batches | train loss 0.3183965 +| epoch 9 | 4731/ 8400 batches | train loss 0.3121293 +| epoch 9 | 4735/ 8400 batches | train loss 0.3160986 +| epoch 9 | 4739/ 8400 batches | train loss 0.3297409 +| epoch 9 | 4743/ 8400 batches | train loss 0.3993085 +| epoch 9 | 4747/ 8400 batches | train loss 0.3547550 +| epoch 9 | 4751/ 8400 batches | train loss 0.3484747 +| epoch 9 | 4755/ 8400 batches | train loss 0.3147375 +| epoch 9 | 4759/ 8400 batches | train loss 0.3700162 +| epoch 9 | 4763/ 8400 batches | train loss 0.3237912 +| epoch 9 | 4767/ 8400 batches | train loss 0.3493466 +| epoch 9 | 4771/ 8400 batches | train loss 0.3342634 +| epoch 9 | 4775/ 8400 batches | train loss 0.3105017 +| epoch 9 | 4779/ 8400 batches | train loss 0.4259258 +| epoch 9 | 4783/ 8400 batches | train loss 0.4010244 +| epoch 9 | 4787/ 8400 batches | train loss 0.3035628 +| epoch 9 | 4791/ 8400 batches | train loss 0.3585798 +| epoch 9 | 4795/ 8400 batches | train loss 0.3808860 +| epoch 9 | 4799/ 8400 batches | train loss 0.3787305 +| epoch 9 | 4803/ 8400 batches | train loss 0.3500754 +| epoch 9 | 4807/ 8400 batches | train loss 0.3058216 +| epoch 9 | 4811/ 8400 batches | train loss 0.3945760 +| epoch 9 | 4815/ 8400 batches | train loss 0.3398348 +| epoch 9 | 4819/ 8400 batches | train loss 0.3156258 +| epoch 9 | 4823/ 8400 batches | train loss 0.3929983 +| epoch 9 | 4827/ 8400 batches | train loss 0.3800976 +| epoch 9 | 4831/ 8400 batches | train loss 0.3995472 +| epoch 9 | 4835/ 8400 batches | train loss 0.3603731 +| epoch 9 | 4839/ 8400 batches | train loss 0.3390209 +| epoch 9 | 4843/ 8400 batches | train loss 0.3656426 +| epoch 9 | 4847/ 8400 batches | train loss 0.3791162 +| epoch 9 | 4851/ 8400 batches | train loss 0.3089851 +| epoch 9 | 4855/ 8400 batches | train loss 0.3679616 +| epoch 9 | 4859/ 8400 batches | train loss 0.3489470 +| epoch 9 | 4863/ 8400 batches | train loss 0.3518363 +| epoch 9 | 4867/ 8400 batches | train loss 0.3261292 +| epoch 9 | 4871/ 8400 batches | train loss 0.3308045 +| epoch 9 | 4875/ 8400 batches | train loss 0.3094302 +| epoch 9 | 4879/ 8400 batches | train loss 0.3123379 +| epoch 9 | 4883/ 8400 batches | train loss 0.3604888 +| epoch 9 | 4887/ 8400 batches | train loss 0.3694870 +| epoch 9 | 4891/ 8400 batches | train loss 0.1338939 +| epoch 9 | 4895/ 8400 batches | train loss 0.3496665 +| epoch 9 | 4899/ 8400 batches | train loss 0.3851739 +| epoch 9 | 4903/ 8400 batches | train loss 0.2589130 +| epoch 9 | 4907/ 8400 batches | train loss 0.3437709 +| epoch 9 | 4911/ 8400 batches | train loss 0.2879403 +| epoch 9 | 4915/ 8400 batches | train loss 0.4352418 +| epoch 9 | 4919/ 8400 batches | train loss 0.2833303 +| epoch 9 | 4923/ 8400 batches | train loss 0.3839925 +| epoch 9 | 4927/ 8400 batches | train loss 0.3196879 +| epoch 9 | 4931/ 8400 batches | train loss 0.3267382 +| epoch 9 | 4935/ 8400 batches | train loss 0.2917298 +| epoch 9 | 4939/ 8400 batches | train loss 0.2894155 +| epoch 9 | 4943/ 8400 batches | train loss 0.3656051 +| epoch 9 | 4947/ 8400 batches | train loss 0.3579076 +| epoch 9 | 4951/ 8400 batches | train loss 0.2969763 +| epoch 9 | 4955/ 8400 batches | train loss 0.3767411 +| epoch 9 | 4959/ 8400 batches | train loss 0.2998053 +| epoch 9 | 4963/ 8400 batches | train loss 0.3100861 +| epoch 9 | 4967/ 8400 batches | train loss 0.3483158 +| epoch 9 | 4971/ 8400 batches | train loss 0.4069386 +| epoch 9 | 4975/ 8400 batches | train loss 0.3100755 +| epoch 9 | 4979/ 8400 batches | train loss 0.2946581 +| epoch 9 | 4983/ 8400 batches | train loss 0.3966574 +| epoch 9 | 4987/ 8400 batches | train loss 0.2970246 +| epoch 9 | 4991/ 8400 batches | train loss 0.3605022 +| epoch 9 | 4995/ 8400 batches | train loss 0.3500552 +| epoch 9 | 4999/ 8400 batches | train loss 0.3113375 +| epoch 9 | 5003/ 8400 batches | train loss 0.2707043 +| epoch 9 | 5007/ 8400 batches | train loss 0.3040227 +| epoch 9 | 5011/ 8400 batches | train loss 0.3313947 +| epoch 9 | 5015/ 8400 batches | train loss 0.3094478 +| epoch 9 | 5019/ 8400 batches | train loss 0.3930072 +| epoch 9 | 5023/ 8400 batches | train loss 0.3342947 +| epoch 9 | 5027/ 8400 batches | train loss 0.3652004 +| epoch 9 | 5031/ 8400 batches | train loss 0.3539259 +| epoch 9 | 5035/ 8400 batches | train loss 0.3968286 +| epoch 9 | 5039/ 8400 batches | train loss 0.3492033 +| epoch 9 | 5043/ 8400 batches | train loss 0.2851510 +| epoch 9 | 5047/ 8400 batches | train loss 0.4185913 +| epoch 9 | 5051/ 8400 batches | train loss 0.2810212 +| epoch 9 | 5055/ 8400 batches | train loss 0.3219658 +| epoch 9 | 5059/ 8400 batches | train loss 0.3388313 +| epoch 9 | 5063/ 8400 batches | train loss 0.3521203 +| epoch 9 | 5067/ 8400 batches | train loss 0.3639656 +| epoch 9 | 5071/ 8400 batches | train loss 0.3978356 +| epoch 9 | 5075/ 8400 batches | train loss 0.2921586 +| epoch 9 | 5079/ 8400 batches | train loss 0.3222465 +| epoch 9 | 5083/ 8400 batches | train loss 0.3653202 +| epoch 9 | 5087/ 8400 batches | train loss 0.3798838 +| epoch 9 | 5091/ 8400 batches | train loss 0.3778440 +| epoch 9 | 5095/ 8400 batches | train loss 0.1291909 +| epoch 9 | 5099/ 8400 batches | train loss 0.3338672 +| epoch 9 | 5103/ 8400 batches | train loss 0.3358861 +| epoch 9 | 5107/ 8400 batches | train loss 0.3530135 +| epoch 9 | 5111/ 8400 batches | train loss 0.3010418 +| epoch 9 | 5115/ 8400 batches | train loss 0.3003227 +| epoch 9 | 5119/ 8400 batches | train loss 0.3181167 +| epoch 9 | 5123/ 8400 batches | train loss 0.3859466 +| epoch 9 | 5127/ 8400 batches | train loss 0.3157079 +| epoch 9 | 5131/ 8400 batches | train loss 0.3069954 +| epoch 9 | 5135/ 8400 batches | train loss 0.3706768 +| epoch 9 | 5139/ 8400 batches | train loss 0.2978024 +| epoch 9 | 5143/ 8400 batches | train loss 0.3432888 +| epoch 9 | 5147/ 8400 batches | train loss 0.3343534 +| epoch 9 | 5151/ 8400 batches | train loss 0.3794273 +| epoch 9 | 5155/ 8400 batches | train loss 0.3751925 +| epoch 9 | 5159/ 8400 batches | train loss 0.3252147 +| epoch 9 | 5163/ 8400 batches | train loss 0.3918465 +| epoch 9 | 5167/ 8400 batches | train loss 0.3232576 +| epoch 9 | 5171/ 8400 batches | train loss 0.3820247 +| epoch 9 | 5175/ 8400 batches | train loss 0.4170080 +| epoch 9 | 5179/ 8400 batches | train loss 0.4833586 +| epoch 9 | 5183/ 8400 batches | train loss 0.2262610 +| epoch 9 | 5187/ 8400 batches | train loss 0.3071140 +| epoch 9 | 5191/ 8400 batches | train loss 0.3514220 +| epoch 9 | 5195/ 8400 batches | train loss 0.3260795 +| epoch 9 | 5199/ 8400 batches | train loss 0.3502524 +| epoch 9 | 5203/ 8400 batches | train loss 0.3354582 +| epoch 9 | 5207/ 8400 batches | train loss 0.3573202 +| epoch 9 | 5211/ 8400 batches | train loss 0.4329855 +| epoch 9 | 5215/ 8400 batches | train loss 0.2851091 +| epoch 9 | 5219/ 8400 batches | train loss 0.3583659 +| epoch 9 | 5223/ 8400 batches | train loss 0.2875054 +| epoch 9 | 5227/ 8400 batches | train loss 0.3040501 +| epoch 9 | 5231/ 8400 batches | train loss 0.3260653 +| epoch 9 | 5235/ 8400 batches | train loss 0.3700004 +| epoch 9 | 5239/ 8400 batches | train loss 0.3541117 +| epoch 9 | 5243/ 8400 batches | train loss 0.3704071 +| epoch 9 | 5247/ 8400 batches | train loss 0.3212118 +| epoch 9 | 5251/ 8400 batches | train loss 0.3477065 +| epoch 9 | 5255/ 8400 batches | train loss 0.3618006 +| epoch 9 | 5259/ 8400 batches | train loss 0.3811703 +| epoch 9 | 5263/ 8400 batches | train loss 0.3436733 +| epoch 9 | 5267/ 8400 batches | train loss 0.2983496 +| epoch 9 | 5271/ 8400 batches | train loss 0.3150134 +| epoch 9 | 5275/ 8400 batches | train loss 0.3904199 +| epoch 9 | 5279/ 8400 batches | train loss 0.3035046 +| epoch 9 | 5283/ 8400 batches | train loss 0.3163866 +| epoch 9 | 5287/ 8400 batches | train loss 0.3547618 +| epoch 9 | 5291/ 8400 batches | train loss 0.3128126 +| epoch 9 | 5295/ 8400 batches | train loss 0.4101319 +| epoch 9 | 5299/ 8400 batches | train loss 0.2717436 +| epoch 9 | 5303/ 8400 batches | train loss 0.3342803 +| epoch 9 | 5307/ 8400 batches | train loss 0.3815106 +| epoch 9 | 5311/ 8400 batches | train loss 0.2955289 +| epoch 9 | 5315/ 8400 batches | train loss 0.3362689 +| epoch 9 | 5319/ 8400 batches | train loss 0.3790901 +| epoch 9 | 5323/ 8400 batches | train loss 0.2954827 +| epoch 9 | 5327/ 8400 batches | train loss 0.3206239 +| epoch 9 | 5331/ 8400 batches | train loss 0.3395682 +| epoch 9 | 5335/ 8400 batches | train loss 0.3490586 +| epoch 9 | 5339/ 8400 batches | train loss 0.2727194 +| epoch 9 | 5343/ 8400 batches | train loss 0.3649572 +| epoch 9 | 5347/ 8400 batches | train loss 0.3520435 +| epoch 9 | 5351/ 8400 batches | train loss 0.2957617 +| epoch 9 | 5355/ 8400 batches | train loss 0.2163774 +| epoch 9 | 5359/ 8400 batches | train loss 0.2977771 +| epoch 9 | 5363/ 8400 batches | train loss 0.4114287 +| epoch 9 | 5367/ 8400 batches | train loss 0.3496507 +| epoch 9 | 5371/ 8400 batches | train loss 0.3655916 +| epoch 9 | 5375/ 8400 batches | train loss 0.3118231 +| epoch 9 | 5379/ 8400 batches | train loss 0.4367537 +| epoch 9 | 5383/ 8400 batches | train loss 0.3817496 +| epoch 9 | 5387/ 8400 batches | train loss 0.3588994 +| epoch 9 | 5391/ 8400 batches | train loss 0.3811148 +| epoch 9 | 5395/ 8400 batches | train loss 0.2727807 +| epoch 9 | 5399/ 8400 batches | train loss 0.3544065 +| epoch 9 | 5403/ 8400 batches | train loss 0.3817157 +| epoch 9 | 5407/ 8400 batches | train loss 0.3807167 +| epoch 9 | 5411/ 8400 batches | train loss 0.3559561 +| epoch 9 | 5415/ 8400 batches | train loss 0.4069190 +| epoch 9 | 5419/ 8400 batches | train loss 0.2796225 +| epoch 9 | 5423/ 8400 batches | train loss 0.3070291 +| epoch 9 | 5427/ 8400 batches | train loss 0.4139509 +| epoch 9 | 5431/ 8400 batches | train loss 0.3483869 +| epoch 9 | 5435/ 8400 batches | train loss 0.3674121 +| epoch 9 | 5439/ 8400 batches | train loss 0.3023942 +| epoch 9 | 5443/ 8400 batches | train loss 0.3386703 +| epoch 9 | 5447/ 8400 batches | train loss 0.3331817 +| epoch 9 | 5451/ 8400 batches | train loss 0.3085674 +| epoch 9 | 5455/ 8400 batches | train loss 0.4083484 +| epoch 9 | 5459/ 8400 batches | train loss 0.4032427 +| epoch 9 | 5463/ 8400 batches | train loss 0.3265763 +| epoch 9 | 5467/ 8400 batches | train loss 0.3080245 +| epoch 9 | 5471/ 8400 batches | train loss 0.3536888 +| epoch 9 | 5475/ 8400 batches | train loss 0.3454482 +| epoch 9 | 5479/ 8400 batches | train loss 0.3031414 +| epoch 9 | 5483/ 8400 batches | train loss 0.3139606 +| epoch 9 | 5487/ 8400 batches | train loss 0.3402622 +| epoch 9 | 5491/ 8400 batches | train loss 0.4276883 +| epoch 9 | 5495/ 8400 batches | train loss 0.3536251 +| epoch 9 | 5499/ 8400 batches | train loss 0.3900388 +| epoch 9 | 5503/ 8400 batches | train loss 0.3695461 +| epoch 9 | 5507/ 8400 batches | train loss 0.2979407 +| epoch 9 | 5511/ 8400 batches | train loss 0.3426150 +| epoch 9 | 5515/ 8400 batches | train loss 0.3420311 +| epoch 9 | 5519/ 8400 batches | train loss 0.3582024 +| epoch 9 | 5523/ 8400 batches | train loss 0.3627315 +| epoch 9 | 5527/ 8400 batches | train loss 0.3181965 +| epoch 9 | 5531/ 8400 batches | train loss 0.3703500 +| epoch 9 | 5535/ 8400 batches | train loss 0.2638401 +| epoch 9 | 5539/ 8400 batches | train loss 0.4125451 +| epoch 9 | 5543/ 8400 batches | train loss 0.3847347 +| epoch 9 | 5547/ 8400 batches | train loss 0.2770080 +| epoch 9 | 5551/ 8400 batches | train loss 0.3529154 +| epoch 9 | 5555/ 8400 batches | train loss 0.2877284 +| epoch 9 | 5559/ 8400 batches | train loss 0.3698295 +| epoch 9 | 5563/ 8400 batches | train loss 0.3564499 +| epoch 9 | 5567/ 8400 batches | train loss 0.3356756 +| epoch 9 | 5571/ 8400 batches | train loss 0.3462157 +| epoch 9 | 5575/ 8400 batches | train loss 0.2692556 +| epoch 9 | 5579/ 8400 batches | train loss 0.3519582 +| epoch 9 | 5583/ 8400 batches | train loss 0.3407713 +| epoch 9 | 5587/ 8400 batches | train loss 0.3277345 +| epoch 9 | 5591/ 8400 batches | train loss 0.3073766 +| epoch 9 | 5595/ 8400 batches | train loss 0.4132752 +| epoch 9 | 5599/ 8400 batches | train loss 0.3115035 +| epoch 9 | 5603/ 8400 batches | train loss 0.3031682 +| epoch 9 | 5607/ 8400 batches | train loss 0.3731295 +| epoch 9 | 5611/ 8400 batches | train loss 0.3364353 +| epoch 9 | 5615/ 8400 batches | train loss 0.3659477 +| epoch 9 | 5619/ 8400 batches | train loss 0.3510596 +| epoch 9 | 5623/ 8400 batches | train loss 0.3008367 +| epoch 9 | 5627/ 8400 batches | train loss 0.3546741 +| epoch 9 | 5631/ 8400 batches | train loss 0.3019859 +| epoch 9 | 5635/ 8400 batches | train loss 0.3411434 +| epoch 9 | 5639/ 8400 batches | train loss 0.2955810 +| epoch 9 | 5643/ 8400 batches | train loss 0.3170366 +| epoch 9 | 5647/ 8400 batches | train loss 0.3543995 +| epoch 9 | 5651/ 8400 batches | train loss 0.3036222 +| epoch 9 | 5655/ 8400 batches | train loss 0.3169876 +| epoch 9 | 5659/ 8400 batches | train loss 0.2922995 +| epoch 9 | 5663/ 8400 batches | train loss 0.3089143 +| epoch 9 | 5667/ 8400 batches | train loss 0.2120168 +| epoch 9 | 5671/ 8400 batches | train loss 0.3415245 +| epoch 9 | 5675/ 8400 batches | train loss 0.3009955 +| epoch 9 | 5679/ 8400 batches | train loss 0.3638444 +| epoch 9 | 5683/ 8400 batches | train loss 0.3247325 +| epoch 9 | 5687/ 8400 batches | train loss 0.2984049 +| epoch 9 | 5691/ 8400 batches | train loss 0.3738004 +| epoch 9 | 5695/ 8400 batches | train loss 0.3051558 +| epoch 9 | 5699/ 8400 batches | train loss 0.2973095 +| epoch 9 | 5703/ 8400 batches | train loss 0.3265228 +| epoch 9 | 5707/ 8400 batches | train loss 0.3045337 +| epoch 9 | 5711/ 8400 batches | train loss 0.3288881 +| epoch 9 | 5715/ 8400 batches | train loss 0.3131852 +| epoch 9 | 5719/ 8400 batches | train loss 0.3646019 +| epoch 9 | 5723/ 8400 batches | train loss 0.3493888 +| epoch 9 | 5727/ 8400 batches | train loss 0.3078726 +| epoch 9 | 5731/ 8400 batches | train loss 0.3361104 +| epoch 9 | 5735/ 8400 batches | train loss 0.3232630 +| epoch 9 | 5739/ 8400 batches | train loss 0.3610033 +| epoch 9 | 5743/ 8400 batches | train loss 0.3514092 +| epoch 9 | 5747/ 8400 batches | train loss 0.3776988 +| epoch 9 | 5751/ 8400 batches | train loss 0.3266028 +| epoch 9 | 5755/ 8400 batches | train loss 0.3635251 +| epoch 9 | 5759/ 8400 batches | train loss 0.2956766 +| epoch 9 | 5763/ 8400 batches | train loss 0.3328974 +| epoch 9 | 5767/ 8400 batches | train loss 0.3512045 +| epoch 9 | 5771/ 8400 batches | train loss 0.2929552 +| epoch 9 | 5775/ 8400 batches | train loss 0.3555828 +| epoch 9 | 5779/ 8400 batches | train loss 0.3018172 +| epoch 9 | 5783/ 8400 batches | train loss 0.3748040 +| epoch 9 | 5787/ 8400 batches | train loss 0.2816322 +| epoch 9 | 5791/ 8400 batches | train loss 0.3404621 +| epoch 9 | 5795/ 8400 batches | train loss 0.3465303 +| epoch 9 | 5799/ 8400 batches | train loss 0.4042362 +| epoch 9 | 5803/ 8400 batches | train loss 0.3054051 +| epoch 9 | 5807/ 8400 batches | train loss 0.3280640 +| epoch 9 | 5811/ 8400 batches | train loss 0.4018608 +| epoch 9 | 5815/ 8400 batches | train loss 0.3410942 +| epoch 9 | 5819/ 8400 batches | train loss 0.3745122 +| epoch 9 | 5823/ 8400 batches | train loss 0.3482015 +| epoch 9 | 5827/ 8400 batches | train loss 0.3777742 +| epoch 9 | 5831/ 8400 batches | train loss 0.3221661 +| epoch 9 | 5835/ 8400 batches | train loss 0.2896984 +| epoch 9 | 5839/ 8400 batches | train loss 0.3257027 +| epoch 9 | 5843/ 8400 batches | train loss 0.3907156 +| epoch 9 | 5847/ 8400 batches | train loss 0.3242416 +| epoch 9 | 5851/ 8400 batches | train loss 0.3248037 +| epoch 9 | 5855/ 8400 batches | train loss 0.2984933 +| epoch 9 | 5859/ 8400 batches | train loss 0.3682556 +| epoch 9 | 5863/ 8400 batches | train loss 0.3927653 +| epoch 9 | 5867/ 8400 batches | train loss 0.2741057 +| epoch 9 | 5871/ 8400 batches | train loss 0.3428637 +| epoch 9 | 5875/ 8400 batches | train loss 0.3929973 +| epoch 9 | 5879/ 8400 batches | train loss 0.3448460 +| epoch 9 | 5883/ 8400 batches | train loss 0.3241748 +| epoch 9 | 5887/ 8400 batches | train loss 0.3800244 +| epoch 9 | 5891/ 8400 batches | train loss 0.3789465 +| epoch 9 | 5895/ 8400 batches | train loss 0.3435686 +| epoch 9 | 5899/ 8400 batches | train loss 0.3954714 +| epoch 9 | 5903/ 8400 batches | train loss 0.3125332 +| epoch 9 | 5907/ 8400 batches | train loss 0.2948359 +| epoch 9 | 5911/ 8400 batches | train loss 0.3316986 +| epoch 9 | 5915/ 8400 batches | train loss 0.2825824 +| epoch 9 | 5919/ 8400 batches | train loss 0.3432423 +| epoch 9 | 5923/ 8400 batches | train loss 0.3491865 +| epoch 9 | 5927/ 8400 batches | train loss 0.3089957 +| epoch 9 | 5931/ 8400 batches | train loss 0.3360207 +| epoch 9 | 5935/ 8400 batches | train loss 0.3653910 +| epoch 9 | 5939/ 8400 batches | train loss 0.2917089 +| epoch 9 | 5943/ 8400 batches | train loss 0.3383512 +| epoch 9 | 5947/ 8400 batches | train loss 0.3910291 +| epoch 9 | 5951/ 8400 batches | train loss 0.3594340 +| epoch 9 | 5955/ 8400 batches | train loss 0.3240165 +| epoch 9 | 5959/ 8400 batches | train loss 0.3030258 +| epoch 9 | 5963/ 8400 batches | train loss 0.2556416 +| epoch 9 | 5967/ 8400 batches | train loss 0.3619043 +| epoch 9 | 5971/ 8400 batches | train loss 0.3541064 +| epoch 9 | 5975/ 8400 batches | train loss 0.2794853 +| epoch 9 | 5979/ 8400 batches | train loss 0.3545043 +| epoch 9 | 5983/ 8400 batches | train loss 0.2835421 +| epoch 9 | 5987/ 8400 batches | train loss 0.3579486 +| epoch 9 | 5991/ 8400 batches | train loss 0.4212671 +| epoch 9 | 5995/ 8400 batches | train loss 0.3274613 +| epoch 9 | 5999/ 8400 batches | train loss 0.4520455 +| epoch 9 | 6003/ 8400 batches | train loss 0.3533838 +| epoch 9 | 6007/ 8400 batches | train loss 0.3417133 +| epoch 9 | 6011/ 8400 batches | train loss 0.3432691 +| epoch 9 | 6015/ 8400 batches | train loss 0.2973809 +| epoch 9 | 6019/ 8400 batches | train loss 0.3371589 +| epoch 9 | 6023/ 8400 batches | train loss 0.3455024 +| epoch 9 | 6027/ 8400 batches | train loss 0.3121953 +| epoch 9 | 6031/ 8400 batches | train loss 0.2448055 +| epoch 9 | 6035/ 8400 batches | train loss 0.3252670 +| epoch 9 | 6039/ 8400 batches | train loss 0.3503176 +| epoch 9 | 6043/ 8400 batches | train loss 0.2722618 +| epoch 9 | 6047/ 8400 batches | train loss 0.3051055 +| epoch 9 | 6051/ 8400 batches | train loss 0.3655906 +| epoch 9 | 6055/ 8400 batches | train loss 0.2797433 +| epoch 9 | 6059/ 8400 batches | train loss 0.3844417 +| epoch 9 | 6063/ 8400 batches | train loss 0.3359979 +| epoch 9 | 6067/ 8400 batches | train loss 0.3233376 +| epoch 9 | 6071/ 8400 batches | train loss 0.3145443 +| epoch 9 | 6075/ 8400 batches | train loss 0.2705816 +| epoch 9 | 6079/ 8400 batches | train loss 0.4182906 +| epoch 9 | 6083/ 8400 batches | train loss 0.4101502 +| epoch 9 | 6087/ 8400 batches | train loss 0.3712580 +| epoch 9 | 6091/ 8400 batches | train loss 0.3067062 +| epoch 9 | 6095/ 8400 batches | train loss 0.3363332 +| epoch 9 | 6099/ 8400 batches | train loss 0.3294763 +| epoch 9 | 6103/ 8400 batches | train loss 0.3839621 +| epoch 9 | 6107/ 8400 batches | train loss 0.3550696 +| epoch 9 | 6111/ 8400 batches | train loss 0.3573152 +| epoch 9 | 6115/ 8400 batches | train loss 0.2978600 +| epoch 9 | 6119/ 8400 batches | train loss 0.4103223 +| epoch 9 | 6123/ 8400 batches | train loss 0.3534844 +| epoch 9 | 6127/ 8400 batches | train loss 0.3404610 +| epoch 9 | 6131/ 8400 batches | train loss 0.3620885 +| epoch 9 | 6135/ 8400 batches | train loss 0.3062738 +| epoch 9 | 6139/ 8400 batches | train loss 0.3347177 +| epoch 9 | 6143/ 8400 batches | train loss 0.2993156 +| epoch 9 | 6147/ 8400 batches | train loss 0.3120674 +| epoch 9 | 6151/ 8400 batches | train loss 0.3406847 +| epoch 9 | 6155/ 8400 batches | train loss 0.3239925 +| epoch 9 | 6159/ 8400 batches | train loss 0.3758037 +| epoch 9 | 6163/ 8400 batches | train loss 0.3938410 +| epoch 9 | 6167/ 8400 batches | train loss 0.3099955 +| epoch 9 | 6171/ 8400 batches | train loss 0.3000565 +| epoch 9 | 6175/ 8400 batches | train loss 0.3717883 +| epoch 9 | 6179/ 8400 batches | train loss 0.3607632 +| epoch 9 | 6183/ 8400 batches | train loss 0.2098830 +| epoch 9 | 6187/ 8400 batches | train loss 0.2850081 +| epoch 9 | 6191/ 8400 batches | train loss 0.3235564 +| epoch 9 | 6195/ 8400 batches | train loss 0.3296765 +| epoch 9 | 6199/ 8400 batches | train loss 0.3829981 +| epoch 9 | 6203/ 8400 batches | train loss 0.3410796 +| epoch 9 | 6207/ 8400 batches | train loss 0.3093891 +| epoch 9 | 6211/ 8400 batches | train loss 0.3768746 +| epoch 9 | 6215/ 8400 batches | train loss 0.3445277 +| epoch 9 | 6219/ 8400 batches | train loss 0.3325533 +| epoch 9 | 6223/ 8400 batches | train loss 0.3651590 +| epoch 9 | 6227/ 8400 batches | train loss 0.3497134 +| epoch 9 | 6231/ 8400 batches | train loss 0.3755753 +| epoch 9 | 6235/ 8400 batches | train loss 0.3447599 +| epoch 9 | 6239/ 8400 batches | train loss 0.3376580 +| epoch 9 | 6243/ 8400 batches | train loss 0.2993517 +| epoch 9 | 6247/ 8400 batches | train loss 0.3279021 +| epoch 9 | 6251/ 8400 batches | train loss 0.3626716 +| epoch 9 | 6255/ 8400 batches | train loss 0.3428233 +| epoch 9 | 6259/ 8400 batches | train loss 0.2800063 +| epoch 9 | 6263/ 8400 batches | train loss 0.3853661 +| epoch 9 | 6267/ 8400 batches | train loss 0.3826138 +| epoch 9 | 6271/ 8400 batches | train loss 0.3760050 +| epoch 9 | 6275/ 8400 batches | train loss 0.4482911 +| epoch 9 | 6279/ 8400 batches | train loss 0.3140423 +| epoch 9 | 6283/ 8400 batches | train loss 0.3708260 +| epoch 9 | 6287/ 8400 batches | train loss 0.3501171 +| epoch 9 | 6291/ 8400 batches | train loss 0.3013053 +| epoch 9 | 6295/ 8400 batches | train loss 0.3124363 +| epoch 9 | 6299/ 8400 batches | train loss 0.3625333 +| epoch 9 | 6303/ 8400 batches | train loss 0.3861583 +| epoch 9 | 6307/ 8400 batches | train loss 0.3217279 +| epoch 9 | 6311/ 8400 batches | train loss 0.3073651 +| epoch 9 | 6315/ 8400 batches | train loss 0.3007014 +| epoch 9 | 6319/ 8400 batches | train loss 0.2853109 +| epoch 9 | 6323/ 8400 batches | train loss 0.3924941 +| epoch 9 | 6327/ 8400 batches | train loss 0.4377867 +| epoch 9 | 6331/ 8400 batches | train loss 0.3543259 +| epoch 9 | 6335/ 8400 batches | train loss 0.2920672 +| epoch 9 | 6339/ 8400 batches | train loss 0.3292493 +| epoch 9 | 6343/ 8400 batches | train loss 0.3926889 +| epoch 9 | 6347/ 8400 batches | train loss 0.3534111 +| epoch 9 | 6351/ 8400 batches | train loss 0.3373645 +| epoch 9 | 6355/ 8400 batches | train loss 0.3776806 +| epoch 9 | 6359/ 8400 batches | train loss 0.3081279 +| epoch 9 | 6363/ 8400 batches | train loss 0.3463846 +| epoch 9 | 6367/ 8400 batches | train loss 0.3692697 +| epoch 9 | 6371/ 8400 batches | train loss 0.2735253 +| epoch 9 | 6375/ 8400 batches | train loss 0.3282490 +| epoch 9 | 6379/ 8400 batches | train loss 0.4125696 +| epoch 9 | 6383/ 8400 batches | train loss 0.3391064 +| epoch 9 | 6387/ 8400 batches | train loss 0.3488485 +| epoch 9 | 6391/ 8400 batches | train loss 0.3261548 +| epoch 9 | 6395/ 8400 batches | train loss 0.3350098 +| epoch 9 | 6399/ 8400 batches | train loss 0.3323716 +| epoch 9 | 6403/ 8400 batches | train loss 0.2886931 +| epoch 9 | 6407/ 8400 batches | train loss 0.3465130 +| epoch 9 | 6411/ 8400 batches | train loss 0.3460488 +| epoch 9 | 6415/ 8400 batches | train loss 0.3501112 +| epoch 9 | 6419/ 8400 batches | train loss 0.3403668 +| epoch 9 | 6423/ 8400 batches | train loss 0.3498600 +| epoch 9 | 6427/ 8400 batches | train loss 0.2662524 +| epoch 9 | 6431/ 8400 batches | train loss 0.3398153 +| epoch 9 | 6435/ 8400 batches | train loss 0.3586802 +| epoch 9 | 6439/ 8400 batches | train loss 0.3533146 +| epoch 9 | 6443/ 8400 batches | train loss 0.2794453 +| epoch 9 | 6447/ 8400 batches | train loss 0.2934735 +| epoch 9 | 6451/ 8400 batches | train loss 0.3183251 +| epoch 9 | 6455/ 8400 batches | train loss 0.3869645 +| epoch 9 | 6459/ 8400 batches | train loss 0.2932516 +| epoch 9 | 6463/ 8400 batches | train loss 0.3177216 +| epoch 9 | 6467/ 8400 batches | train loss 0.3278547 +| epoch 9 | 6471/ 8400 batches | train loss 0.3236731 +| epoch 9 | 6475/ 8400 batches | train loss 0.3608730 +| epoch 9 | 6479/ 8400 batches | train loss 0.3450105 +| epoch 9 | 6483/ 8400 batches | train loss 0.3375726 +| epoch 9 | 6487/ 8400 batches | train loss 0.3277289 +| epoch 9 | 6491/ 8400 batches | train loss 0.3005739 +| epoch 9 | 6495/ 8400 batches | train loss 0.3231063 +| epoch 9 | 6499/ 8400 batches | train loss 0.2853513 +| epoch 9 | 6503/ 8400 batches | train loss 0.3354895 +| epoch 9 | 6507/ 8400 batches | train loss 0.3696447 +| epoch 9 | 6511/ 8400 batches | train loss 0.3240675 +| epoch 9 | 6515/ 8400 batches | train loss 0.2918457 +| epoch 9 | 6519/ 8400 batches | train loss 0.2899997 +| epoch 9 | 6523/ 8400 batches | train loss 0.3861197 +| epoch 9 | 6527/ 8400 batches | train loss 0.2784654 +| epoch 9 | 6531/ 8400 batches | train loss 0.3696631 +| epoch 9 | 6535/ 8400 batches | train loss 0.3335910 +| epoch 9 | 6539/ 8400 batches | train loss 0.3217942 +| epoch 9 | 6543/ 8400 batches | train loss 0.3290984 +| epoch 9 | 6547/ 8400 batches | train loss 0.2663449 +| epoch 9 | 6551/ 8400 batches | train loss 0.3847534 +| epoch 9 | 6555/ 8400 batches | train loss 0.3982107 +| epoch 9 | 6559/ 8400 batches | train loss 0.3112302 +| epoch 9 | 6563/ 8400 batches | train loss 0.4237269 +| epoch 9 | 6567/ 8400 batches | train loss 0.3282412 +| epoch 9 | 6571/ 8400 batches | train loss 0.2242629 +| epoch 9 | 6575/ 8400 batches | train loss 0.3171319 +| epoch 9 | 6579/ 8400 batches | train loss 0.3142239 +| epoch 9 | 6583/ 8400 batches | train loss 0.3032531 +| epoch 9 | 6587/ 8400 batches | train loss 0.3682499 +| epoch 9 | 6591/ 8400 batches | train loss 0.2684979 +| epoch 9 | 6595/ 8400 batches | train loss 0.3985134 +| epoch 9 | 6599/ 8400 batches | train loss 0.4056039 +| epoch 9 | 6603/ 8400 batches | train loss 0.3244990 +| epoch 9 | 6607/ 8400 batches | train loss 0.3571463 +| epoch 9 | 6611/ 8400 batches | train loss 0.2193326 +| epoch 9 | 6615/ 8400 batches | train loss 0.2868064 +| epoch 9 | 6619/ 8400 batches | train loss 0.3360741 +| epoch 9 | 6623/ 8400 batches | train loss 0.3299072 +| epoch 9 | 6627/ 8400 batches | train loss 0.3464122 +| epoch 9 | 6631/ 8400 batches | train loss 0.2735225 +| epoch 9 | 6635/ 8400 batches | train loss 0.2778457 +| epoch 9 | 6639/ 8400 batches | train loss 0.3103281 +| epoch 9 | 6643/ 8400 batches | train loss 0.3804254 +| epoch 9 | 6647/ 8400 batches | train loss 0.3455404 +| epoch 9 | 6651/ 8400 batches | train loss 0.3294921 +| epoch 9 | 6655/ 8400 batches | train loss 0.4135589 +| epoch 9 | 6659/ 8400 batches | train loss 0.3899554 +| epoch 9 | 6663/ 8400 batches | train loss 0.3784612 +| epoch 9 | 6667/ 8400 batches | train loss 0.3481565 +| epoch 9 | 6671/ 8400 batches | train loss 0.3193140 +| epoch 9 | 6675/ 8400 batches | train loss 0.3424323 +| epoch 9 | 6679/ 8400 batches | train loss 0.3355143 +| epoch 9 | 6683/ 8400 batches | train loss 0.3846800 +| epoch 9 | 6687/ 8400 batches | train loss 0.3253173 +| epoch 9 | 6691/ 8400 batches | train loss 0.3072793 +| epoch 9 | 6695/ 8400 batches | train loss 0.3429057 +| epoch 9 | 6699/ 8400 batches | train loss 0.4132464 +| epoch 9 | 6703/ 8400 batches | train loss 0.3780092 +| epoch 9 | 6707/ 8400 batches | train loss 0.3814408 +| epoch 9 | 6711/ 8400 batches | train loss 0.3632317 +| epoch 9 | 6715/ 8400 batches | train loss 0.3425240 +| epoch 9 | 6719/ 8400 batches | train loss 0.3197181 +| epoch 9 | 6723/ 8400 batches | train loss 0.2722268 +| epoch 9 | 6727/ 8400 batches | train loss 0.3053147 +| epoch 9 | 6731/ 8400 batches | train loss 0.3614890 +| epoch 9 | 6735/ 8400 batches | train loss 0.4190188 +| epoch 9 | 6739/ 8400 batches | train loss 0.3968294 +| epoch 9 | 6743/ 8400 batches | train loss 0.3525265 +| epoch 9 | 6747/ 8400 batches | train loss 0.3209070 +| epoch 9 | 6751/ 8400 batches | train loss 0.3588799 +| epoch 9 | 6755/ 8400 batches | train loss 0.3387044 +| epoch 9 | 6759/ 8400 batches | train loss 0.2558722 +| epoch 9 | 6763/ 8400 batches | train loss 0.3648564 +| epoch 9 | 6767/ 8400 batches | train loss 0.3450543 +| epoch 9 | 6771/ 8400 batches | train loss 0.3715996 +| epoch 9 | 6775/ 8400 batches | train loss 0.3656807 +| epoch 9 | 6779/ 8400 batches | train loss 0.2140547 +| epoch 9 | 6783/ 8400 batches | train loss 0.2765990 +| epoch 9 | 6787/ 8400 batches | train loss 0.3717822 +| epoch 9 | 6791/ 8400 batches | train loss 0.3636604 +| epoch 9 | 6795/ 8400 batches | train loss 0.3394275 +| epoch 9 | 6799/ 8400 batches | train loss 0.4397849 +| epoch 9 | 6803/ 8400 batches | train loss 0.3717910 +| epoch 9 | 6807/ 8400 batches | train loss 0.3199746 +| epoch 9 | 6811/ 8400 batches | train loss 0.3553815 +| epoch 9 | 6815/ 8400 batches | train loss 0.2552544 +| epoch 9 | 6819/ 8400 batches | train loss 0.3061409 +| epoch 9 | 6823/ 8400 batches | train loss 0.2686383 +| epoch 9 | 6827/ 8400 batches | train loss 0.4905837 +| epoch 9 | 6831/ 8400 batches | train loss 0.3496422 +| epoch 9 | 6835/ 8400 batches | train loss 0.4008020 +| epoch 9 | 6839/ 8400 batches | train loss 0.3001766 +| epoch 9 | 6843/ 8400 batches | train loss 0.3290318 +| epoch 9 | 6847/ 8400 batches | train loss 0.3459165 +| epoch 9 | 6851/ 8400 batches | train loss 0.3635674 +| epoch 9 | 6855/ 8400 batches | train loss 0.2848730 +| epoch 9 | 6859/ 8400 batches | train loss 0.3178335 +| epoch 9 | 6863/ 8400 batches | train loss 0.3616451 +| epoch 9 | 6867/ 8400 batches | train loss 0.3542799 +| epoch 9 | 6871/ 8400 batches | train loss 0.3802699 +| epoch 9 | 6875/ 8400 batches | train loss 0.3722332 +| epoch 9 | 6879/ 8400 batches | train loss 0.4071034 +| epoch 9 | 6883/ 8400 batches | train loss 0.2863695 +| epoch 9 | 6887/ 8400 batches | train loss 0.2918648 +| epoch 9 | 6891/ 8400 batches | train loss 0.2939422 +| epoch 9 | 6895/ 8400 batches | train loss 0.4015465 +| epoch 9 | 6899/ 8400 batches | train loss 0.4303364 +| epoch 9 | 6903/ 8400 batches | train loss 0.4146812 +| epoch 9 | 6907/ 8400 batches | train loss 0.3053232 +| epoch 9 | 6911/ 8400 batches | train loss 0.3823422 +| epoch 9 | 6915/ 8400 batches | train loss 0.3542150 +| epoch 9 | 6919/ 8400 batches | train loss 0.3505031 +| epoch 9 | 6923/ 8400 batches | train loss 0.3315890 +| epoch 9 | 6927/ 8400 batches | train loss 0.3239615 +| epoch 9 | 6931/ 8400 batches | train loss 0.3201378 +| epoch 9 | 6935/ 8400 batches | train loss 0.3106257 +| epoch 9 | 6939/ 8400 batches | train loss 0.2501146 +| epoch 9 | 6943/ 8400 batches | train loss 0.3438405 +| epoch 9 | 6947/ 8400 batches | train loss 0.2922693 +| epoch 9 | 6951/ 8400 batches | train loss 0.3447601 +| epoch 9 | 6955/ 8400 batches | train loss 0.3257623 +| epoch 9 | 6959/ 8400 batches | train loss 0.3754514 +| epoch 9 | 6963/ 8400 batches | train loss 0.3378456 +| epoch 9 | 6967/ 8400 batches | train loss 0.4101344 +| epoch 9 | 6971/ 8400 batches | train loss 0.3182667 +| epoch 9 | 6975/ 8400 batches | train loss 0.3286794 +| epoch 9 | 6979/ 8400 batches | train loss 0.3171615 +| epoch 9 | 6983/ 8400 batches | train loss 0.2864121 +| epoch 9 | 6987/ 8400 batches | train loss 0.3355908 +| epoch 9 | 6991/ 8400 batches | train loss 0.3691059 +| epoch 9 | 6995/ 8400 batches | train loss 0.3318289 +| epoch 9 | 6999/ 8400 batches | train loss 0.3780207 +| epoch 9 | 7003/ 8400 batches | train loss 0.3530089 +| epoch 9 | 7007/ 8400 batches | train loss 0.3150535 +| epoch 9 | 7011/ 8400 batches | train loss 0.3478165 +| epoch 9 | 7015/ 8400 batches | train loss 0.3520809 +| epoch 9 | 7019/ 8400 batches | train loss 0.3420614 +| epoch 9 | 7023/ 8400 batches | train loss 0.3073116 +| epoch 9 | 7027/ 8400 batches | train loss 0.3346943 +| epoch 9 | 7031/ 8400 batches | train loss 0.3150223 +| epoch 9 | 7035/ 8400 batches | train loss 0.3512145 +| epoch 9 | 7039/ 8400 batches | train loss 0.3437659 +| epoch 9 | 7043/ 8400 batches | train loss 0.3378128 +| epoch 9 | 7047/ 8400 batches | train loss 0.2912995 +| epoch 9 | 7051/ 8400 batches | train loss 0.3538142 +| epoch 9 | 7055/ 8400 batches | train loss 0.3886419 +| epoch 9 | 7059/ 8400 batches | train loss 0.3114716 +| epoch 9 | 7063/ 8400 batches | train loss 0.4000667 +| epoch 9 | 7067/ 8400 batches | train loss 0.3448400 +| epoch 9 | 7071/ 8400 batches | train loss 0.2850742 +| epoch 9 | 7075/ 8400 batches | train loss 0.3423813 +| epoch 9 | 7079/ 8400 batches | train loss 0.2886575 +| epoch 9 | 7083/ 8400 batches | train loss 0.3448373 +| epoch 9 | 7087/ 8400 batches | train loss 0.3193632 +| epoch 9 | 7091/ 8400 batches | train loss 0.3628603 +| epoch 9 | 7095/ 8400 batches | train loss 0.3561727 +| epoch 9 | 7099/ 8400 batches | train loss 0.3725609 +| epoch 9 | 7103/ 8400 batches | train loss 0.3549215 +| epoch 9 | 7107/ 8400 batches | train loss 0.3246409 +| epoch 9 | 7111/ 8400 batches | train loss 0.2742584 +| epoch 9 | 7115/ 8400 batches | train loss 0.3074852 +| epoch 9 | 7119/ 8400 batches | train loss 0.3780065 +| epoch 9 | 7123/ 8400 batches | train loss 0.3282571 +| epoch 9 | 7127/ 8400 batches | train loss 0.3591443 +| epoch 9 | 7131/ 8400 batches | train loss 0.2935108 +| epoch 9 | 7135/ 8400 batches | train loss 0.3486379 +| epoch 9 | 7139/ 8400 batches | train loss 0.3261750 +| epoch 9 | 7143/ 8400 batches | train loss 0.3235624 +| epoch 9 | 7147/ 8400 batches | train loss 0.3444816 +| epoch 9 | 7151/ 8400 batches | train loss 0.3375294 +| epoch 9 | 7155/ 8400 batches | train loss 0.4085007 +| epoch 9 | 7159/ 8400 batches | train loss 0.3439315 +| epoch 9 | 7163/ 8400 batches | train loss 0.3323931 +| epoch 9 | 7167/ 8400 batches | train loss 0.3265636 +| epoch 9 | 7171/ 8400 batches | train loss 0.3580865 +| epoch 9 | 7175/ 8400 batches | train loss 0.3025754 +| epoch 9 | 7179/ 8400 batches | train loss 0.3429902 +| epoch 9 | 7183/ 8400 batches | train loss 0.3133639 +| epoch 9 | 7187/ 8400 batches | train loss 0.3446453 +| epoch 9 | 7191/ 8400 batches | train loss 0.3884325 +| epoch 9 | 7195/ 8400 batches | train loss 0.3853891 +| epoch 9 | 7199/ 8400 batches | train loss 0.3147164 +| epoch 9 | 7203/ 8400 batches | train loss 0.3170419 +| epoch 9 | 7207/ 8400 batches | train loss 0.3074746 +| epoch 9 | 7211/ 8400 batches | train loss 0.3131516 +| epoch 9 | 7215/ 8400 batches | train loss 0.2927172 +| epoch 9 | 7219/ 8400 batches | train loss 0.3181443 +| epoch 9 | 7223/ 8400 batches | train loss 0.2867373 +| epoch 9 | 7227/ 8400 batches | train loss 0.2889953 +| epoch 9 | 7231/ 8400 batches | train loss 0.3563552 +| epoch 9 | 7235/ 8400 batches | train loss 0.3005597 +| epoch 9 | 7239/ 8400 batches | train loss 0.4241706 +| epoch 9 | 7243/ 8400 batches | train loss 0.3109510 +| epoch 9 | 7247/ 8400 batches | train loss 0.3420402 +| epoch 9 | 7251/ 8400 batches | train loss 0.3837459 +| epoch 9 | 7255/ 8400 batches | train loss 0.3307440 +| epoch 9 | 7259/ 8400 batches | train loss 0.3605688 +| epoch 9 | 7263/ 8400 batches | train loss 0.3374206 +| epoch 9 | 7267/ 8400 batches | train loss 0.3544251 +| epoch 9 | 7271/ 8400 batches | train loss 0.3557833 +| epoch 9 | 7275/ 8400 batches | train loss 0.3763959 +| epoch 9 | 7279/ 8400 batches | train loss 0.2958882 +| epoch 9 | 7283/ 8400 batches | train loss 0.3134412 +| epoch 9 | 7287/ 8400 batches | train loss 0.4002726 +| epoch 9 | 7291/ 8400 batches | train loss 0.3764930 +| epoch 9 | 7295/ 8400 batches | train loss 0.3297049 +| epoch 9 | 7299/ 8400 batches | train loss 0.3172145 +| epoch 9 | 7303/ 8400 batches | train loss 0.3215156 +| epoch 9 | 7307/ 8400 batches | train loss 0.3530336 +| epoch 9 | 7311/ 8400 batches | train loss 0.3110018 +| epoch 9 | 7315/ 8400 batches | train loss 0.3511078 +| epoch 9 | 7319/ 8400 batches | train loss 0.3542461 +| epoch 9 | 7323/ 8400 batches | train loss 0.2770495 +| epoch 9 | 7327/ 8400 batches | train loss 0.3214175 +| epoch 9 | 7331/ 8400 batches | train loss 0.3358300 +| epoch 9 | 7335/ 8400 batches | train loss 0.3349634 +| epoch 9 | 7339/ 8400 batches | train loss 0.3280136 +| epoch 9 | 7343/ 8400 batches | train loss 0.4053367 +| epoch 9 | 7347/ 8400 batches | train loss 0.3768736 +| epoch 9 | 7351/ 8400 batches | train loss 0.2701851 +| epoch 9 | 7355/ 8400 batches | train loss 0.2248330 +| epoch 9 | 7359/ 8400 batches | train loss 0.3579907 +| epoch 9 | 7363/ 8400 batches | train loss 0.3110712 +| epoch 9 | 7367/ 8400 batches | train loss 0.3343831 +| epoch 9 | 7371/ 8400 batches | train loss 0.3542507 +| epoch 9 | 7375/ 8400 batches | train loss 0.3369365 +| epoch 9 | 7379/ 8400 batches | train loss 0.3849675 +| epoch 9 | 7383/ 8400 batches | train loss 0.2965418 +| epoch 9 | 7387/ 8400 batches | train loss 0.2918663 +| epoch 9 | 7391/ 8400 batches | train loss 0.2928020 +| epoch 9 | 7395/ 8400 batches | train loss 0.3393069 +| epoch 9 | 7399/ 8400 batches | train loss 0.2832075 +| epoch 9 | 7403/ 8400 batches | train loss 0.3499665 +| epoch 9 | 7407/ 8400 batches | train loss 0.2317404 +| epoch 9 | 7411/ 8400 batches | train loss 0.2856576 +| epoch 9 | 7415/ 8400 batches | train loss 0.3161714 +| epoch 9 | 7419/ 8400 batches | train loss 0.3504903 +| epoch 9 | 7423/ 8400 batches | train loss 0.3306016 +| epoch 9 | 7427/ 8400 batches | train loss 0.3101579 +| epoch 9 | 7431/ 8400 batches | train loss 0.2924641 +| epoch 9 | 7435/ 8400 batches | train loss 0.3319082 +| epoch 9 | 7439/ 8400 batches | train loss 0.3609967 +| epoch 9 | 7443/ 8400 batches | train loss 0.3040245 +| epoch 9 | 7447/ 8400 batches | train loss 0.3545407 +| epoch 9 | 7451/ 8400 batches | train loss 0.3544546 +| epoch 9 | 7455/ 8400 batches | train loss 0.3139359 +| epoch 9 | 7459/ 8400 batches | train loss 0.3466606 +| epoch 9 | 7463/ 8400 batches | train loss 0.2866741 +| epoch 9 | 7467/ 8400 batches | train loss 0.3190102 +| epoch 9 | 7471/ 8400 batches | train loss 0.3154094 +| epoch 9 | 7475/ 8400 batches | train loss 0.4382767 +| epoch 9 | 7479/ 8400 batches | train loss 0.3703193 +| epoch 9 | 7483/ 8400 batches | train loss 0.3057815 +| epoch 9 | 7487/ 8400 batches | train loss 0.3251869 +| epoch 9 | 7491/ 8400 batches | train loss 0.2926977 +| epoch 9 | 7495/ 8400 batches | train loss 0.2850080 +| epoch 9 | 7499/ 8400 batches | train loss 0.3710984 +| epoch 9 | 7503/ 8400 batches | train loss 0.3163871 +| epoch 9 | 7507/ 8400 batches | train loss 0.3257287 +| epoch 9 | 7511/ 8400 batches | train loss 0.3871744 +| epoch 9 | 7515/ 8400 batches | train loss 0.3083037 +| epoch 9 | 7519/ 8400 batches | train loss 0.3695912 +| epoch 9 | 7523/ 8400 batches | train loss 0.3121728 +| epoch 9 | 7527/ 8400 batches | train loss 0.3791823 +| epoch 9 | 7531/ 8400 batches | train loss 0.3516889 +| epoch 9 | 7535/ 8400 batches | train loss 0.3747828 +| epoch 9 | 7539/ 8400 batches | train loss 0.3136710 +| epoch 9 | 7543/ 8400 batches | train loss 0.3033299 +| epoch 9 | 7547/ 8400 batches | train loss 0.2895931 +| epoch 9 | 7551/ 8400 batches | train loss 0.4068655 +| epoch 9 | 7555/ 8400 batches | train loss 0.3432224 +| epoch 9 | 7559/ 8400 batches | train loss 0.3585927 +| epoch 9 | 7563/ 8400 batches | train loss 0.3510900 +| epoch 9 | 7567/ 8400 batches | train loss 0.2340153 +| epoch 9 | 7571/ 8400 batches | train loss 0.3477173 +| epoch 9 | 7575/ 8400 batches | train loss 0.3338296 +| epoch 9 | 7579/ 8400 batches | train loss 0.3574023 +| epoch 9 | 7583/ 8400 batches | train loss 0.3759019 +| epoch 9 | 7587/ 8400 batches | train loss 0.3903835 +| epoch 9 | 7591/ 8400 batches | train loss 0.3527656 +| epoch 9 | 7595/ 8400 batches | train loss 0.3101994 +| epoch 9 | 7599/ 8400 batches | train loss 0.3395309 +| epoch 9 | 7603/ 8400 batches | train loss 0.3487174 +| epoch 9 | 7607/ 8400 batches | train loss 0.3591856 +| epoch 9 | 7611/ 8400 batches | train loss 0.2849782 +| epoch 9 | 7615/ 8400 batches | train loss 0.3666769 +| epoch 9 | 7619/ 8400 batches | train loss 0.3622323 +| epoch 9 | 7623/ 8400 batches | train loss 0.3387193 +| epoch 9 | 7627/ 8400 batches | train loss 0.2851437 +| epoch 9 | 7631/ 8400 batches | train loss 0.3379014 +| epoch 9 | 7635/ 8400 batches | train loss 0.4124371 +| epoch 9 | 7639/ 8400 batches | train loss 0.3100429 +| epoch 9 | 7643/ 8400 batches | train loss 0.3748470 +| epoch 9 | 7647/ 8400 batches | train loss 0.3264463 +| epoch 9 | 7651/ 8400 batches | train loss 0.2957805 +| epoch 9 | 7655/ 8400 batches | train loss 0.4000484 +| epoch 9 | 7659/ 8400 batches | train loss 0.2967948 +| epoch 9 | 7663/ 8400 batches | train loss 0.2996847 +| epoch 9 | 7667/ 8400 batches | train loss 0.3195655 +| epoch 9 | 7671/ 8400 batches | train loss 0.3314701 +| epoch 9 | 7675/ 8400 batches | train loss 0.3533469 +| epoch 9 | 7679/ 8400 batches | train loss 0.2804317 +| epoch 9 | 7683/ 8400 batches | train loss 0.3392253 +| epoch 9 | 7687/ 8400 batches | train loss 0.3303502 +| epoch 9 | 7691/ 8400 batches | train loss 0.3487857 +| epoch 9 | 7695/ 8400 batches | train loss 0.2937064 +| epoch 9 | 7699/ 8400 batches | train loss 0.3393818 +| epoch 9 | 7703/ 8400 batches | train loss 0.3226595 +| epoch 9 | 7707/ 8400 batches | train loss 0.4474551 +| epoch 9 | 7711/ 8400 batches | train loss 0.3921450 +| epoch 9 | 7715/ 8400 batches | train loss 0.3090866 +| epoch 9 | 7719/ 8400 batches | train loss 0.3694293 +| epoch 9 | 7723/ 8400 batches | train loss 0.3220390 +| epoch 9 | 7727/ 8400 batches | train loss 0.3918644 +| epoch 9 | 7731/ 8400 batches | train loss 0.3346082 +| epoch 9 | 7735/ 8400 batches | train loss 0.3458477 +| epoch 9 | 7739/ 8400 batches | train loss 0.3438497 +| epoch 9 | 7743/ 8400 batches | train loss 0.2563620 +| epoch 9 | 7747/ 8400 batches | train loss 0.3366928 +| epoch 9 | 7751/ 8400 batches | train loss 0.3607254 +| epoch 9 | 7755/ 8400 batches | train loss 0.3377563 +| epoch 9 | 7759/ 8400 batches | train loss 0.3710092 +| epoch 9 | 7763/ 8400 batches | train loss 0.3636693 +| epoch 9 | 7767/ 8400 batches | train loss 0.2936383 +| epoch 9 | 7771/ 8400 batches | train loss 0.2922583 +| epoch 9 | 7775/ 8400 batches | train loss 0.3374750 +| epoch 9 | 7779/ 8400 batches | train loss 0.3185116 +| epoch 9 | 7783/ 8400 batches | train loss 0.3835482 +| epoch 9 | 7787/ 8400 batches | train loss 0.3720198 +| epoch 9 | 7791/ 8400 batches | train loss 0.3487843 +| epoch 9 | 7795/ 8400 batches | train loss 0.2472742 +| epoch 9 | 7799/ 8400 batches | train loss 0.3204740 +| epoch 9 | 7803/ 8400 batches | train loss 0.3286000 +| epoch 9 | 7807/ 8400 batches | train loss 0.2978801 +| epoch 9 | 7811/ 8400 batches | train loss 0.3492076 +| epoch 9 | 7815/ 8400 batches | train loss 0.3310137 +| epoch 9 | 7819/ 8400 batches | train loss 0.3660499 +| epoch 9 | 7823/ 8400 batches | train loss 0.3058744 +| epoch 9 | 7827/ 8400 batches | train loss 0.3964897 +| epoch 9 | 7831/ 8400 batches | train loss 0.3659182 +| epoch 9 | 7835/ 8400 batches | train loss 0.3980682 +| epoch 9 | 7839/ 8400 batches | train loss 0.3284587 +| epoch 9 | 7843/ 8400 batches | train loss 0.3293379 +| epoch 9 | 7847/ 8400 batches | train loss 0.3486563 +| epoch 9 | 7851/ 8400 batches | train loss 0.3127566 +| epoch 9 | 7855/ 8400 batches | train loss 0.3122135 +| epoch 9 | 7859/ 8400 batches | train loss 0.3241385 +| epoch 9 | 7863/ 8400 batches | train loss 0.3941643 +| epoch 9 | 7867/ 8400 batches | train loss 0.3407026 +| epoch 9 | 7871/ 8400 batches | train loss 0.3734742 +| epoch 9 | 7875/ 8400 batches | train loss 0.3668669 +| epoch 9 | 7879/ 8400 batches | train loss 0.2998271 +| epoch 9 | 7883/ 8400 batches | train loss 0.3533656 +| epoch 9 | 7887/ 8400 batches | train loss 0.3470902 +| epoch 9 | 7891/ 8400 batches | train loss 0.3868671 +| epoch 9 | 7895/ 8400 batches | train loss 0.3355189 +| epoch 9 | 7899/ 8400 batches | train loss 0.3389609 +| epoch 9 | 7903/ 8400 batches | train loss 0.3477950 +| epoch 9 | 7907/ 8400 batches | train loss 0.3805555 +| epoch 9 | 7911/ 8400 batches | train loss 0.2889218 +| epoch 9 | 7915/ 8400 batches | train loss 0.2885869 +| epoch 9 | 7919/ 8400 batches | train loss 0.4389581 +| epoch 9 | 7923/ 8400 batches | train loss 0.3156443 +| epoch 9 | 7927/ 8400 batches | train loss 0.3226856 +| epoch 9 | 7931/ 8400 batches | train loss 0.2931532 +| epoch 9 | 7935/ 8400 batches | train loss 0.4652073 +| epoch 9 | 7939/ 8400 batches | train loss 0.3297569 +| epoch 9 | 7943/ 8400 batches | train loss 0.3985764 +| epoch 9 | 7947/ 8400 batches | train loss 0.3070791 +| epoch 9 | 7951/ 8400 batches | train loss 0.3209926 +| epoch 9 | 7955/ 8400 batches | train loss 0.3871503 +| epoch 9 | 7959/ 8400 batches | train loss 0.3786061 +| epoch 9 | 7963/ 8400 batches | train loss 0.3919914 +| epoch 9 | 7967/ 8400 batches | train loss 0.3577042 +| epoch 9 | 7971/ 8400 batches | train loss 0.3994823 +| epoch 9 | 7975/ 8400 batches | train loss 0.3706977 +| epoch 9 | 7979/ 8400 batches | train loss 0.3688982 +| epoch 9 | 7983/ 8400 batches | train loss 0.3480663 +| epoch 9 | 7987/ 8400 batches | train loss 0.4053247 +| epoch 9 | 7991/ 8400 batches | train loss 0.3763632 +| epoch 9 | 7995/ 8400 batches | train loss 0.3147805 +| epoch 9 | 7999/ 8400 batches | train loss 0.3517991 +| epoch 9 | 8003/ 8400 batches | train loss 0.3387363 +| epoch 9 | 8007/ 8400 batches | train loss 0.3375565 +| epoch 9 | 8011/ 8400 batches | train loss 0.3535145 +| epoch 9 | 8015/ 8400 batches | train loss 0.3967130 +| epoch 9 | 8019/ 8400 batches | train loss 0.3728777 +| epoch 9 | 8023/ 8400 batches | train loss 0.3099061 +| epoch 9 | 8027/ 8400 batches | train loss 0.3726574 +| epoch 9 | 8031/ 8400 batches | train loss 0.3715007 +| epoch 9 | 8035/ 8400 batches | train loss 0.2363912 +| epoch 9 | 8039/ 8400 batches | train loss 0.2943284 +| epoch 9 | 8043/ 8400 batches | train loss 0.3592054 +| epoch 9 | 8047/ 8400 batches | train loss 0.3452515 +| epoch 9 | 8051/ 8400 batches | train loss 0.3070677 +| epoch 9 | 8055/ 8400 batches | train loss 0.3494608 +| epoch 9 | 8059/ 8400 batches | train loss 0.2807416 +| epoch 9 | 8063/ 8400 batches | train loss 0.3231547 +| epoch 9 | 8067/ 8400 batches | train loss 0.3369241 +| epoch 9 | 8071/ 8400 batches | train loss 0.3649963 +| epoch 9 | 8075/ 8400 batches | train loss 0.3311491 +| epoch 9 | 8079/ 8400 batches | train loss 0.3304222 +| epoch 9 | 8083/ 8400 batches | train loss 0.3811481 +| epoch 9 | 8087/ 8400 batches | train loss 0.3541626 +| epoch 9 | 8091/ 8400 batches | train loss 0.3164265 +| epoch 9 | 8095/ 8400 batches | train loss 0.3282492 +| epoch 9 | 8099/ 8400 batches | train loss 0.3593909 +| epoch 9 | 8103/ 8400 batches | train loss 0.3526532 +| epoch 9 | 8107/ 8400 batches | train loss 0.3732354 +| epoch 9 | 8111/ 8400 batches | train loss 0.4057848 +| epoch 9 | 8115/ 8400 batches | train loss 0.3595072 +| epoch 9 | 8119/ 8400 batches | train loss 0.3373933 +| epoch 9 | 8123/ 8400 batches | train loss 0.3037925 +| epoch 9 | 8127/ 8400 batches | train loss 0.3278576 +| epoch 9 | 8131/ 8400 batches | train loss 0.4459770 +| epoch 9 | 8135/ 8400 batches | train loss 0.3501314 +| epoch 9 | 8139/ 8400 batches | train loss 0.2242528 +| epoch 9 | 8143/ 8400 batches | train loss 0.3812096 +| epoch 9 | 8147/ 8400 batches | train loss 0.4153901 +| epoch 9 | 8151/ 8400 batches | train loss 0.3400513 +| epoch 9 | 8155/ 8400 batches | train loss 0.3467096 +| epoch 9 | 8159/ 8400 batches | train loss 0.2815141 +| epoch 9 | 8163/ 8400 batches | train loss 0.3158871 +| epoch 9 | 8167/ 8400 batches | train loss 0.3544050 +| epoch 9 | 8171/ 8400 batches | train loss 0.3151317 +| epoch 9 | 8175/ 8400 batches | train loss 0.3361597 +| epoch 9 | 8179/ 8400 batches | train loss 0.3728896 +| epoch 9 | 8183/ 8400 batches | train loss 0.3018621 +| epoch 9 | 8187/ 8400 batches | train loss 0.3486729 +| epoch 9 | 8191/ 8400 batches | train loss 0.3069090 +| epoch 9 | 8195/ 8400 batches | train loss 0.3380783 +| epoch 9 | 8199/ 8400 batches | train loss 0.3079504 +| epoch 9 | 8203/ 8400 batches | train loss 0.2996869 +| epoch 9 | 8207/ 8400 batches | train loss 0.2732572 +| epoch 9 | 8211/ 8400 batches | train loss 0.3850113 +| epoch 9 | 8215/ 8400 batches | train loss 0.3329008 +| epoch 9 | 8219/ 8400 batches | train loss 0.2740282 +| epoch 9 | 8223/ 8400 batches | train loss 0.3677088 +| epoch 9 | 8227/ 8400 batches | train loss 0.3377428 +| epoch 9 | 8231/ 8400 batches | train loss 0.3222769 +| epoch 9 | 8235/ 8400 batches | train loss 0.3242382 +| epoch 9 | 8239/ 8400 batches | train loss 0.3810645 +| epoch 9 | 8243/ 8400 batches | train loss 0.3706023 +| epoch 9 | 8247/ 8400 batches | train loss 0.3086234 +| epoch 9 | 8251/ 8400 batches | train loss 0.3201187 +| epoch 9 | 8255/ 8400 batches | train loss 0.3120500 +| epoch 9 | 8259/ 8400 batches | train loss 0.3313632 +| epoch 9 | 8263/ 8400 batches | train loss 0.2849563 +| epoch 9 | 8267/ 8400 batches | train loss 0.2803227 +| epoch 9 | 8271/ 8400 batches | train loss 0.3471501 +| epoch 9 | 8275/ 8400 batches | train loss 0.3546588 +| epoch 9 | 8279/ 8400 batches | train loss 0.3350010 +| epoch 9 | 8283/ 8400 batches | train loss 0.2530090 +| epoch 9 | 8287/ 8400 batches | train loss 0.3817113 +| epoch 9 | 8291/ 8400 batches | train loss 0.3373283 +| epoch 9 | 8295/ 8400 batches | train loss 0.2962050 +| epoch 9 | 8299/ 8400 batches | train loss 0.3369934 +| epoch 9 | 8303/ 8400 batches | train loss 0.3281442 +| epoch 9 | 8307/ 8400 batches | train loss 0.3499225 +| epoch 9 | 8311/ 8400 batches | train loss 0.3455752 +| epoch 9 | 8315/ 8400 batches | train loss 0.3225790 +| epoch 9 | 8319/ 8400 batches | train loss 0.2788439 +| epoch 9 | 8323/ 8400 batches | train loss 0.2847553 +| epoch 9 | 8327/ 8400 batches | train loss 0.4073414 +| epoch 9 | 8331/ 8400 batches | train loss 0.3008702 +| epoch 9 | 8335/ 8400 batches | train loss 0.2991199 +| epoch 9 | 8339/ 8400 batches | train loss 0.3290033 +| epoch 9 | 8343/ 8400 batches | train loss 0.3119763 +| epoch 9 | 8347/ 8400 batches | train loss 0.3613736 +| epoch 9 | 8351/ 8400 batches | train loss 0.3182245 +| epoch 9 | 8355/ 8400 batches | train loss 0.2986279 +| epoch 9 | 8359/ 8400 batches | train loss 0.3557037 +| epoch 9 | 8363/ 8400 batches | train loss 0.2941407 +| epoch 9 | 8367/ 8400 batches | train loss 0.3204142 +| epoch 9 | 8371/ 8400 batches | train loss 0.3214309 +| epoch 9 | 8375/ 8400 batches | train loss 0.3669342 +| epoch 9 | 8379/ 8400 batches | train loss 0.3423240 +| epoch 9 | 8383/ 8400 batches | train loss 0.3387851 +| epoch 9 | 8387/ 8400 batches | train loss 0.3131461 +| epoch 9 | 8391/ 8400 batches | train loss 0.2947819 +| epoch 9 | 8395/ 8400 batches | train loss 0.2975794 +| epoch 9 | 8399/ 8400 batches | train loss 0.3411021 +-------------------------------------------------------------------------------- +| epoch 9 | 3/ 8400 batches | test loss 0.4850507 +| epoch 9 | 7/ 8400 batches | test loss 0.5745651 +| epoch 9 | 11/ 8400 batches | test loss 0.4728920 +| epoch 9 | 15/ 8400 batches | test loss 0.3526871 +| epoch 9 | 19/ 8400 batches | test loss 0.3869292 +| epoch 9 | 23/ 8400 batches | test loss 0.5135666 +| epoch 9 | 27/ 8400 batches | test loss 0.3888911 +| epoch 9 | 31/ 8400 batches | test loss 0.4748593 +| epoch 9 | 35/ 8400 batches | test loss 0.6296896 +| epoch 9 | 39/ 8400 batches | test loss 0.4360877 +| epoch 9 | 43/ 8400 batches | test loss 0.4187530 +| epoch 9 | 47/ 8400 batches | test loss 0.4408230 +| epoch 9 | 51/ 8400 batches | test loss 0.4191358 +| epoch 9 | 55/ 8400 batches | test loss 0.5011223 +| epoch 9 | 59/ 8400 batches | test loss 0.5376260 +| epoch 9 | 63/ 8400 batches | test loss 0.3779728 +| epoch 9 | 67/ 8400 batches | test loss 0.4604748 +| epoch 9 | 71/ 8400 batches | test loss 0.4450479 +| epoch 9 | 75/ 8400 batches | test loss 0.4405084 +| epoch 9 | 79/ 8400 batches | test loss 0.4028216 +| epoch 9 | 83/ 8400 batches | test loss 0.5639928 +| epoch 9 | 87/ 8400 batches | test loss 0.5226014 +| epoch 9 | 91/ 8400 batches | test loss 0.4388910 +| epoch 9 | 95/ 8400 batches | test loss 0.4040861 +| epoch 9 | 99/ 8400 batches | test loss 0.4982138 +| epoch 9 | 103/ 8400 batches | test loss 0.6412802 +| epoch 9 | 107/ 8400 batches | test loss 0.3105051 +| epoch 9 | 111/ 8400 batches | test loss 0.5638466 +| epoch 9 | 115/ 8400 batches | test loss 0.4109281 +| epoch 9 | 119/ 8400 batches | test loss 0.5193068 +| epoch 9 | 123/ 8400 batches | test loss 0.5972230 +| epoch 9 | 127/ 8400 batches | test loss 0.4166569 +| epoch 9 | 131/ 8400 batches | test loss 0.4721975 +| epoch 9 | 135/ 8400 batches | test loss 0.4916277 +| epoch 9 | 139/ 8400 batches | test loss 0.3718471 +| epoch 9 | 143/ 8400 batches | test loss 0.4071901 +| epoch 9 | 147/ 8400 batches | test loss 0.4889979 +| epoch 9 | 151/ 8400 batches | test loss 0.4037874 +| epoch 9 | 155/ 8400 batches | test loss 0.4472466 +| epoch 9 | 159/ 8400 batches | test loss 0.5686278 +| epoch 9 | 163/ 8400 batches | test loss 0.5792623 +| epoch 9 | 167/ 8400 batches | test loss 0.5216486 +| epoch 9 | 171/ 8400 batches | test loss 0.2712350 +| epoch 9 | 175/ 8400 batches | test loss 0.4972707 +| epoch 9 | 179/ 8400 batches | test loss 0.4591644 +| epoch 9 | 183/ 8400 batches | test loss 0.6031385 +| epoch 9 | 187/ 8400 batches | test loss 0.3922058 +| epoch 9 | 191/ 8400 batches | test loss 0.3455131 +| epoch 9 | 195/ 8400 batches | test loss 0.3435001 +| epoch 9 | 199/ 8400 batches | test loss 0.5283426 +| epoch 9 | 203/ 8400 batches | test loss 0.6276973 +| epoch 9 | 207/ 8400 batches | test loss 0.5881689 +| epoch 9 | 211/ 8400 batches | test loss 0.3690214 +| epoch 9 | 215/ 8400 batches | test loss 0.4900033 +| epoch 9 | 219/ 8400 batches | test loss 0.5521804 +| epoch 9 | 223/ 8400 batches | test loss 0.5930035 +| epoch 9 | 227/ 8400 batches | test loss 0.4112375 +| epoch 9 | 231/ 8400 batches | test loss 0.4448724 +| epoch 9 | 235/ 8400 batches | test loss 0.3950810 +| epoch 9 | 239/ 8400 batches | test loss 0.4846502 +| epoch 9 | 243/ 8400 batches | test loss 0.4740957 +| epoch 9 | 247/ 8400 batches | test loss 0.4680772 +| epoch 9 | 251/ 8400 batches | test loss 0.3951048 +| epoch 9 | 255/ 8400 batches | test loss 0.6974453 +| epoch 9 | 259/ 8400 batches | test loss 0.3019564 +| epoch 9 | 263/ 8400 batches | test loss 0.6666452 +| epoch 9 | 267/ 8400 batches | test loss 0.3729386 +| epoch 9 | 271/ 8400 batches | test loss 0.5056004 +| epoch 9 | 275/ 8400 batches | test loss 0.6699765 +| epoch 9 | 279/ 8400 batches | test loss 0.4627081 +| epoch 9 | 283/ 8400 batches | test loss 0.3727473 +| epoch 9 | 287/ 8400 batches | test loss 0.4615371 +| epoch 9 | 291/ 8400 batches | test loss 0.4111844 +| epoch 9 | 295/ 8400 batches | test loss 0.7216108 +| epoch 9 | 299/ 8400 batches | test loss 0.5097232 +| epoch 9 | 303/ 8400 batches | test loss 0.3953279 +| epoch 9 | 307/ 8400 batches | test loss 0.5322677 +| epoch 9 | 311/ 8400 batches | test loss 0.5390624 +| epoch 9 | 315/ 8400 batches | test loss 0.5024850 +| epoch 9 | 319/ 8400 batches | test loss 0.2786182 +| epoch 9 | 323/ 8400 batches | test loss 0.3595569 +| epoch 9 | 327/ 8400 batches | test loss 0.4229311 +| epoch 9 | 331/ 8400 batches | test loss 0.5248362 +| epoch 9 | 335/ 8400 batches | test loss 0.3887525 +| epoch 9 | 339/ 8400 batches | test loss 0.3965859 +| epoch 9 | 343/ 8400 batches | test loss 0.4105395 +| epoch 9 | 347/ 8400 batches | test loss 0.4482934 +| epoch 9 | 351/ 8400 batches | test loss 0.4190830 +| epoch 9 | 355/ 8400 batches | test loss 0.4735276 +| epoch 9 | 359/ 8400 batches | test loss 0.4508946 +| epoch 9 | 363/ 8400 batches | test loss 0.7966348 +| epoch 9 | 367/ 8400 batches | test loss 0.5010630 +| epoch 9 | 371/ 8400 batches | test loss 0.5672479 +| epoch 9 | 375/ 8400 batches | test loss 0.5143660 +| epoch 9 | 379/ 8400 batches | test loss 0.4128497 +| epoch 9 | 383/ 8400 batches | test loss 0.5713234 +| epoch 9 | 387/ 8400 batches | test loss 0.4294841 +| epoch 9 | 391/ 8400 batches | test loss 0.5590248 +| epoch 9 | 395/ 8400 batches | test loss 0.4840580 +| epoch 9 | 399/ 8400 batches | test loss 0.4807163 +| epoch 9 | 403/ 8400 batches | test loss 0.4069034 +| epoch 9 | 407/ 8400 batches | test loss 0.3705252 +| epoch 9 | 411/ 8400 batches | test loss 0.4722413 +| epoch 9 | 415/ 8400 batches | test loss 0.5496618 +| epoch 9 | 419/ 8400 batches | test loss 0.6125616 +| epoch 9 | 423/ 8400 batches | test loss 0.6840438 +| epoch 9 | 427/ 8400 batches | test loss 0.4263595 +| epoch 9 | 431/ 8400 batches | test loss 0.3833808 +| epoch 9 | 435/ 8400 batches | test loss 0.3897960 +| epoch 9 | 439/ 8400 batches | test loss 0.4322799 +| epoch 9 | 443/ 8400 batches | test loss 0.3564966 +| epoch 9 | 447/ 8400 batches | test loss 0.3775167 +| epoch 9 | 451/ 8400 batches | test loss 0.5381310 +| epoch 9 | 455/ 8400 batches | test loss 0.5159290 +| epoch 9 | 459/ 8400 batches | test loss 0.5027927 +| epoch 9 | 463/ 8400 batches | test loss 0.6250687 +| epoch 9 | 467/ 8400 batches | test loss 0.4109919 +| epoch 9 | 471/ 8400 batches | test loss 0.4459801 +| epoch 9 | 475/ 8400 batches | test loss 0.4007526 +| epoch 9 | 479/ 8400 batches | test loss 0.4819851 +| epoch 9 | 483/ 8400 batches | test loss 0.5762263 +| epoch 9 | 487/ 8400 batches | test loss 0.4528132 +| epoch 9 | 491/ 8400 batches | test loss 0.4977708 +| epoch 9 | 495/ 8400 batches | test loss 0.4569595 +| epoch 9 | 499/ 8400 batches | test loss 0.4037161 +| epoch 9 | 503/ 8400 batches | test loss 0.4010952 +| epoch 9 | 507/ 8400 batches | test loss 0.5293763 +| epoch 9 | 511/ 8400 batches | test loss 0.4254925 +| epoch 9 | 515/ 8400 batches | test loss 0.5514655 +| epoch 9 | 519/ 8400 batches | test loss 0.4610530 +| epoch 9 | 523/ 8400 batches | test loss 0.4703125 +| epoch 9 | 527/ 8400 batches | test loss 0.4687708 +| epoch 9 | 531/ 8400 batches | test loss 0.4079504 +| epoch 9 | 535/ 8400 batches | test loss 0.3530968 +| epoch 9 | 539/ 8400 batches | test loss 0.3672993 +| epoch 9 | 543/ 8400 batches | test loss 0.3311952 +| epoch 9 | 547/ 8400 batches | test loss 0.7656555 +| epoch 9 | 551/ 8400 batches | test loss 0.4123887 +| epoch 9 | 555/ 8400 batches | test loss 0.3375015 +| epoch 9 | 559/ 8400 batches | test loss 0.5951123 +| epoch 9 | 563/ 8400 batches | test loss 0.3570145 +| epoch 9 | 567/ 8400 batches | test loss 0.3601876 +| epoch 9 | 571/ 8400 batches | test loss 0.4382093 +| epoch 9 | 575/ 8400 batches | test loss 0.3953505 +| epoch 9 | 579/ 8400 batches | test loss 0.4574744 +| epoch 9 | 583/ 8400 batches | test loss 0.4525927 +| epoch 9 | 587/ 8400 batches | test loss 0.4752258 +| epoch 9 | 591/ 8400 batches | test loss 0.4380386 +| epoch 9 | 595/ 8400 batches | test loss 0.4279025 +| epoch 9 | 599/ 8400 batches | test loss 0.5294403 +| epoch 9 | 603/ 8400 batches | test loss 0.5390043 +| epoch 9 | 607/ 8400 batches | test loss 0.4597950 +| epoch 9 | 611/ 8400 batches | test loss 0.4567797 +| epoch 9 | 615/ 8400 batches | test loss 0.4566609 +| epoch 9 | 619/ 8400 batches | test loss 0.3858718 +| epoch 9 | 623/ 8400 batches | test loss 0.4015958 +| epoch 9 | 627/ 8400 batches | test loss 0.4651999 +| epoch 9 | 631/ 8400 batches | test loss 0.4409787 +| epoch 9 | 635/ 8400 batches | test loss 0.5488555 +| epoch 9 | 639/ 8400 batches | test loss 0.4000740 +| epoch 9 | 643/ 8400 batches | test loss 0.4715204 +| epoch 9 | 647/ 8400 batches | test loss 0.5548841 +| epoch 9 | 651/ 8400 batches | test loss 0.4042500 +| epoch 9 | 655/ 8400 batches | test loss 0.5129629 +| epoch 9 | 659/ 8400 batches | test loss 0.5303110 +| epoch 9 | 663/ 8400 batches | test loss 0.4407651 +| epoch 9 | 667/ 8400 batches | test loss 0.5825147 +| epoch 9 | 671/ 8400 batches | test loss 0.5243859 +| epoch 9 | 675/ 8400 batches | test loss 0.6130598 +| epoch 9 | 679/ 8400 batches | test loss 0.5871232 +| epoch 9 | 683/ 8400 batches | test loss 0.3678385 +| epoch 9 | 687/ 8400 batches | test loss 0.5465757 +| epoch 9 | 691/ 8400 batches | test loss 0.5260797 +| epoch 9 | 695/ 8400 batches | test loss 0.4799998 +| epoch 9 | 699/ 8400 batches | test loss 0.4301602 +| epoch 9 | 703/ 8400 batches | test loss 0.3699152 +| epoch 9 | 707/ 8400 batches | test loss 0.4037270 +| epoch 9 | 711/ 8400 batches | test loss 0.3523848 +| epoch 9 | 715/ 8400 batches | test loss 0.3987799 +| epoch 9 | 719/ 8400 batches | test loss 0.4906454 +| epoch 9 | 723/ 8400 batches | test loss 0.4370539 +| epoch 9 | 727/ 8400 batches | test loss 0.4416398 +| epoch 9 | 731/ 8400 batches | test loss 0.5278384 +| epoch 9 | 735/ 8400 batches | test loss 0.8054965 +| epoch 9 | 739/ 8400 batches | test loss 0.4193875 +| epoch 9 | 743/ 8400 batches | test loss 0.4725763 +| epoch 9 | 747/ 8400 batches | test loss 0.4313666 +| epoch 9 | 751/ 8400 batches | test loss 0.5760924 +| epoch 9 | 755/ 8400 batches | test loss 0.3457981 +| epoch 9 | 759/ 8400 batches | test loss 0.4327738 +| epoch 9 | 763/ 8400 batches | test loss 0.4045839 +| epoch 9 | 767/ 8400 batches | test loss 0.3704109 +| epoch 9 | 771/ 8400 batches | test loss 0.4772163 +| epoch 9 | 775/ 8400 batches | test loss 0.4161034 +| epoch 9 | 779/ 8400 batches | test loss 0.4542706 +| epoch 9 | 783/ 8400 batches | test loss 0.3996374 +| epoch 9 | 787/ 8400 batches | test loss 0.5145948 +| epoch 9 | 791/ 8400 batches | test loss 0.5576088 +| epoch 9 | 795/ 8400 batches | test loss 0.4989353 +| epoch 9 | 799/ 8400 batches | test loss 0.4420488 +| epoch 9 | 803/ 8400 batches | test loss 0.4137357 +| epoch 9 | 807/ 8400 batches | test loss 0.4911622 +| epoch 9 | 811/ 8400 batches | test loss 0.6528773 +| epoch 9 | 815/ 8400 batches | test loss 0.4098824 +| epoch 9 | 819/ 8400 batches | test loss 0.4064204 +| epoch 9 | 823/ 8400 batches | test loss 0.4093545 +| epoch 9 | 827/ 8400 batches | test loss 0.4614688 +| epoch 9 | 831/ 8400 batches | test loss 0.5201464 +| epoch 9 | 835/ 8400 batches | test loss 0.5385808 +| epoch 9 | 839/ 8400 batches | test loss 0.4819739 +| epoch 9 | 843/ 8400 batches | test loss 0.4132513 +| epoch 9 | 847/ 8400 batches | test loss 0.5012324 +| epoch 9 | 851/ 8400 batches | test loss 0.3790458 +| epoch 9 | 855/ 8400 batches | test loss 0.5456207 +| epoch 9 | 859/ 8400 batches | test loss 0.6014564 +| epoch 9 | 863/ 8400 batches | test loss 0.4667509 +| epoch 9 | 867/ 8400 batches | test loss 0.5499912 +| epoch 9 | 871/ 8400 batches | test loss 0.3604141 +| epoch 9 | 875/ 8400 batches | test loss 0.4285838 +| epoch 9 | 879/ 8400 batches | test loss 0.4214161 +| epoch 9 | 883/ 8400 batches | test loss 0.4181225 +| epoch 9 | 887/ 8400 batches | test loss 0.4787684 +| epoch 9 | 891/ 8400 batches | test loss 0.4936653 +| epoch 9 | 895/ 8400 batches | test loss 0.5281929 +| epoch 9 | 899/ 8400 batches | test loss 0.4676527 +| epoch 9 | 903/ 8400 batches | test loss 0.4866753 +| epoch 9 | 907/ 8400 batches | test loss 0.4999486 +| epoch 9 | 911/ 8400 batches | test loss 0.4314350 +| epoch 9 | 915/ 8400 batches | test loss 0.4981336 +| epoch 9 | 919/ 8400 batches | test loss 0.4054512 +| epoch 9 | 923/ 8400 batches | test loss 0.4511358 +| epoch 9 | 927/ 8400 batches | test loss 0.4636427 +| epoch 9 | 931/ 8400 batches | test loss 0.7428011 +| epoch 9 | 935/ 8400 batches | test loss 0.5344094 +| epoch 9 | 939/ 8400 batches | test loss 0.4108050 +| epoch 9 | 943/ 8400 batches | test loss 0.5147802 +| epoch 9 | 947/ 8400 batches | test loss 0.3186386 +| epoch 9 | 951/ 8400 batches | test loss 0.4555514 +| epoch 9 | 955/ 8400 batches | test loss 0.3882652 +| epoch 9 | 959/ 8400 batches | test loss 0.4030536 +| epoch 9 | 963/ 8400 batches | test loss 0.5111593 +| epoch 9 | 967/ 8400 batches | test loss 0.3730386 +| epoch 9 | 971/ 8400 batches | test loss 0.5926003 +| epoch 9 | 975/ 8400 batches | test loss 0.4635642 +| epoch 9 | 979/ 8400 batches | test loss 0.4180241 +| epoch 9 | 983/ 8400 batches | test loss 0.3828244 +| epoch 9 | 987/ 8400 batches | test loss 0.5649460 +| epoch 9 | 991/ 8400 batches | test loss 0.4239823 +| epoch 9 | 995/ 8400 batches | test loss 0.4329443 +| epoch 9 | 999/ 8400 batches | test loss 0.3998399 +| epoch 9 | 1003/ 8400 batches | test loss 0.4716291 +| epoch 9 | 1007/ 8400 batches | test loss 0.4902328 +| epoch 9 | 1011/ 8400 batches | test loss 0.4553717 +| epoch 9 | 1015/ 8400 batches | test loss 0.4631110 +| epoch 9 | 1019/ 8400 batches | test loss 0.4592448 +| epoch 9 | 1023/ 8400 batches | test loss 0.4743026 +| epoch 9 | 1027/ 8400 batches | test loss 0.4333460 +| epoch 9 | 1031/ 8400 batches | test loss 0.5437580 +| epoch 9 | 1035/ 8400 batches | test loss 0.3715836 +| epoch 9 | 1039/ 8400 batches | test loss 0.4445147 +| epoch 9 | 1043/ 8400 batches | test loss 0.5003451 +| epoch 9 | 1047/ 8400 batches | test loss 0.6392976 +| epoch 9 | 1051/ 8400 batches | test loss 0.4368035 +| epoch 9 | 1055/ 8400 batches | test loss 0.5591561 +| epoch 9 | 1059/ 8400 batches | test loss 0.2885050 +| epoch 9 | 1063/ 8400 batches | test loss 0.5723406 +| epoch 9 | 1067/ 8400 batches | test loss 0.5356275 +| epoch 9 | 1071/ 8400 batches | test loss 0.5146560 +| epoch 9 | 1075/ 8400 batches | test loss 0.3564360 +| epoch 9 | 1079/ 8400 batches | test loss 0.4928142 +| epoch 9 | 1083/ 8400 batches | test loss 0.4489763 +| epoch 9 | 1087/ 8400 batches | test loss 0.5365609 +| epoch 9 | 1091/ 8400 batches | test loss 0.4032424 +| epoch 9 | 1095/ 8400 batches | test loss 0.5071524 +| epoch 9 | 1099/ 8400 batches | test loss 0.3714643 +| epoch 9 | 1103/ 8400 batches | test loss 0.4136962 +| epoch 9 | 1107/ 8400 batches | test loss 1.0758456 +| epoch 9 | 1111/ 8400 batches | test loss 0.4559159 +| epoch 9 | 1115/ 8400 batches | test loss 0.4817197 +| epoch 9 | 1119/ 8400 batches | test loss 0.3945744 +| epoch 9 | 1123/ 8400 batches | test loss 0.3898253 +| epoch 9 | 1127/ 8400 batches | test loss 0.5457424 +| epoch 9 | 1131/ 8400 batches | test loss 0.5420464 +| epoch 9 | 1135/ 8400 batches | test loss 0.5536470 +| epoch 9 | 1139/ 8400 batches | test loss 0.4246700 +| epoch 9 | 1143/ 8400 batches | test loss 0.4836276 +| epoch 9 | 1147/ 8400 batches | test loss 0.4471265 +| epoch 9 | 1151/ 8400 batches | test loss 0.4132700 +| epoch 9 | 1155/ 8400 batches | test loss 0.6157596 +| epoch 9 | 1159/ 8400 batches | test loss 0.4825017 +| epoch 9 | 1163/ 8400 batches | test loss 0.5392438 +| epoch 9 | 1167/ 8400 batches | test loss 0.4898979 +| epoch 9 | 1171/ 8400 batches | test loss 0.4796363 +| epoch 9 | 1175/ 8400 batches | test loss 0.3797697 +| epoch 9 | 1179/ 8400 batches | test loss 0.5236354 +| epoch 9 | 1183/ 8400 batches | test loss 0.3762577 +| epoch 9 | 1187/ 8400 batches | test loss 0.4235601 +| epoch 9 | 1191/ 8400 batches | test loss 0.4458531 +| epoch 9 | 1195/ 8400 batches | test loss 0.4290632 +| epoch 9 | 1199/ 8400 batches | test loss 0.3336345 +| epoch 9 | 1203/ 8400 batches | test loss 0.3826407 +| epoch 9 | 1207/ 8400 batches | test loss 0.4755690 +| epoch 9 | 1211/ 8400 batches | test loss 0.5076611 +| epoch 9 | 1215/ 8400 batches | test loss 0.5235977 +| epoch 9 | 1219/ 8400 batches | test loss 0.4294205 +| epoch 9 | 1223/ 8400 batches | test loss 0.4369418 +| epoch 9 | 1227/ 8400 batches | test loss 0.6349205 +| epoch 9 | 1231/ 8400 batches | test loss 0.4608330 +| epoch 9 | 1235/ 8400 batches | test loss 0.5334616 +| epoch 9 | 1239/ 8400 batches | test loss 0.6032078 +| epoch 9 | 1243/ 8400 batches | test loss 0.4806366 +| epoch 9 | 1247/ 8400 batches | test loss 0.5810580 +| epoch 9 | 1251/ 8400 batches | test loss 0.5817192 +| epoch 9 | 1255/ 8400 batches | test loss 0.5065624 +| epoch 9 | 1259/ 8400 batches | test loss 0.5732525 +| epoch 9 | 1263/ 8400 batches | test loss 0.4593709 +| epoch 9 | 1267/ 8400 batches | test loss 0.3807599 +| epoch 9 | 1271/ 8400 batches | test loss 0.4371908 +| epoch 9 | 1275/ 8400 batches | test loss 0.4559714 +| epoch 9 | 1279/ 8400 batches | test loss 0.4507539 +| epoch 9 | 1283/ 8400 batches | test loss 0.4764314 +| epoch 9 | 1287/ 8400 batches | test loss 0.4520053 +| epoch 9 | 1291/ 8400 batches | test loss 0.4809768 +| epoch 9 | 1295/ 8400 batches | test loss 0.4490680 +| epoch 9 | 1299/ 8400 batches | test loss 0.7217556 +| epoch 9 | 1303/ 8400 batches | test loss 0.4717904 +| epoch 9 | 1307/ 8400 batches | test loss 0.3346490 +| epoch 9 | 1311/ 8400 batches | test loss 0.3111556 +| epoch 9 | 1315/ 8400 batches | test loss 0.5059630 +| epoch 9 | 1319/ 8400 batches | test loss 0.5584396 +| epoch 9 | 1323/ 8400 batches | test loss 0.6105714 +| epoch 9 | 1327/ 8400 batches | test loss 0.4111920 +| epoch 9 | 1331/ 8400 batches | test loss 0.4529228 +| epoch 9 | 1335/ 8400 batches | test loss 0.4789631 +| epoch 9 | 1339/ 8400 batches | test loss 0.6513817 +| epoch 9 | 1343/ 8400 batches | test loss 0.3931453 +| epoch 9 | 1347/ 8400 batches | test loss 0.4156718 +| epoch 9 | 1351/ 8400 batches | test loss 0.4276896 +| epoch 9 | 1355/ 8400 batches | test loss 0.6381419 +| epoch 9 | 1359/ 8400 batches | test loss 0.5829839 +| epoch 9 | 1363/ 8400 batches | test loss 0.4632125 +| epoch 9 | 1367/ 8400 batches | test loss 0.5195358 +| epoch 9 | 1371/ 8400 batches | test loss 0.4449635 +| epoch 9 | 1375/ 8400 batches | test loss 0.5286962 +| epoch 9 | 1379/ 8400 batches | test loss 0.5326182 +| epoch 9 | 1383/ 8400 batches | test loss 0.4089254 +| epoch 9 | 1387/ 8400 batches | test loss 0.4297742 +| epoch 9 | 1391/ 8400 batches | test loss 0.3659588 +| epoch 9 | 1395/ 8400 batches | test loss 0.3571203 +| epoch 9 | 1399/ 8400 batches | test loss 0.4456785 +| epoch 9 | 1403/ 8400 batches | test loss 0.5667477 +| epoch 9 | 1407/ 8400 batches | test loss 0.6209578 +| epoch 9 | 1411/ 8400 batches | test loss 0.4381588 +| epoch 9 | 1415/ 8400 batches | test loss 0.3990219 +| epoch 9 | 1419/ 8400 batches | test loss 0.5199023 +| epoch 9 | 1423/ 8400 batches | test loss 0.4048390 +| epoch 9 | 1427/ 8400 batches | test loss 0.4548040 +| epoch 9 | 1431/ 8400 batches | test loss 0.4313718 +| epoch 9 | 1435/ 8400 batches | test loss 0.3686640 +| epoch 9 | 1439/ 8400 batches | test loss 0.4479431 +| epoch 9 | 1443/ 8400 batches | test loss 0.4793346 +| epoch 9 | 1447/ 8400 batches | test loss 0.5133035 +| epoch 9 | 1451/ 8400 batches | test loss 0.5775806 +| epoch 9 | 1455/ 8400 batches | test loss 0.7048283 +| epoch 9 | 1459/ 8400 batches | test loss 0.4905559 +| epoch 9 | 1463/ 8400 batches | test loss 0.4414766 +| epoch 9 | 1467/ 8400 batches | test loss 0.4212092 +| epoch 9 | 1471/ 8400 batches | test loss 0.4349784 +| epoch 9 | 1475/ 8400 batches | test loss 0.7030874 +| epoch 9 | 1479/ 8400 batches | test loss 0.4935629 +| epoch 9 | 1483/ 8400 batches | test loss 0.4623337 +| epoch 9 | 1487/ 8400 batches | test loss 0.5528999 +| epoch 9 | 1491/ 8400 batches | test loss 0.4286973 +| epoch 9 | 1495/ 8400 batches | test loss 0.4013879 +| epoch 9 | 1499/ 8400 batches | test loss 0.4970738 +| epoch 9 | 1503/ 8400 batches | test loss 0.3889400 +| epoch 9 | 1507/ 8400 batches | test loss 0.4065294 +| epoch 9 | 1511/ 8400 batches | test loss 0.5526354 +| epoch 9 | 1515/ 8400 batches | test loss 0.5946072 +| epoch 9 | 1519/ 8400 batches | test loss 0.4376709 +| epoch 9 | 1523/ 8400 batches | test loss 0.4484479 +| epoch 9 | 1527/ 8400 batches | test loss 0.3984787 +| epoch 9 | 1531/ 8400 batches | test loss 0.3956454 +| epoch 9 | 1535/ 8400 batches | test loss 0.6024825 +| epoch 9 | 1539/ 8400 batches | test loss 0.5046113 +| epoch 9 | 1543/ 8400 batches | test loss 0.6023853 +| epoch 9 | 1547/ 8400 batches | test loss 0.5273404 +| epoch 9 | 1551/ 8400 batches | test loss 0.4614863 +| epoch 9 | 1555/ 8400 batches | test loss 0.3872793 +| epoch 9 | 1559/ 8400 batches | test loss 0.4656185 +| epoch 9 | 1563/ 8400 batches | test loss 0.4830496 +| epoch 9 | 1567/ 8400 batches | test loss 0.6643814 +| epoch 9 | 1571/ 8400 batches | test loss 0.5050377 +| epoch 9 | 1575/ 8400 batches | test loss 0.5648444 +| epoch 9 | 1579/ 8400 batches | test loss 0.3463912 +| epoch 9 | 1583/ 8400 batches | test loss 0.5387340 +| epoch 9 | 1587/ 8400 batches | test loss 0.3777742 +| epoch 9 | 1591/ 8400 batches | test loss 0.3332219 +| epoch 9 | 1595/ 8400 batches | test loss 0.4276455 +| epoch 9 | 1599/ 8400 batches | test loss 0.4300003 +| epoch 9 | 1603/ 8400 batches | test loss 0.6045229 +| epoch 9 | 1607/ 8400 batches | test loss 0.5544255 +| epoch 9 | 1611/ 8400 batches | test loss 0.4235407 +| epoch 9 | 1615/ 8400 batches | test loss 0.4566001 +| epoch 9 | 1619/ 8400 batches | test loss 0.3945450 +| epoch 9 | 1623/ 8400 batches | test loss 0.4553405 +| epoch 9 | 1627/ 8400 batches | test loss 0.4358796 +| epoch 9 | 1631/ 8400 batches | test loss 0.4865136 +| epoch 9 | 1635/ 8400 batches | test loss 0.3538812 +| epoch 9 | 1639/ 8400 batches | test loss 0.6271750 +| epoch 9 | 1643/ 8400 batches | test loss 0.4467639 +| epoch 9 | 1647/ 8400 batches | test loss 0.5083119 +| epoch 9 | 1651/ 8400 batches | test loss 0.3940715 +| epoch 9 | 1655/ 8400 batches | test loss 0.3849423 +| epoch 9 | 1659/ 8400 batches | test loss 0.5023150 +| epoch 9 | 1663/ 8400 batches | test loss 0.6518351 +| epoch 9 | 1667/ 8400 batches | test loss 0.3958777 +| epoch 9 | 1671/ 8400 batches | test loss 0.7025504 +| epoch 9 | 1675/ 8400 batches | test loss 0.4837829 +| epoch 9 | 1679/ 8400 batches | test loss 0.4070571 +| epoch 9 | 1683/ 8400 batches | test loss 0.4942088 +| epoch 9 | 1687/ 8400 batches | test loss 0.4583020 +| epoch 9 | 1691/ 8400 batches | test loss 0.5409708 +| epoch 9 | 1695/ 8400 batches | test loss 0.6042182 +| epoch 9 | 1699/ 8400 batches | test loss 0.4569854 +| epoch 9 | 1703/ 8400 batches | test loss 0.7509052 +| epoch 9 | 1707/ 8400 batches | test loss 0.4282448 +| epoch 9 | 1711/ 8400 batches | test loss 0.5921077 +| epoch 9 | 1715/ 8400 batches | test loss 0.4109118 +| epoch 9 | 1719/ 8400 batches | test loss 0.5278816 +| epoch 9 | 1723/ 8400 batches | test loss 0.6736060 +| epoch 9 | 1727/ 8400 batches | test loss 0.3963952 +| epoch 9 | 1731/ 8400 batches | test loss 0.3668939 +| epoch 9 | 1735/ 8400 batches | test loss 0.4014978 +| epoch 9 | 1739/ 8400 batches | test loss 0.5206909 +| epoch 9 | 1743/ 8400 batches | test loss 0.4081392 +| epoch 9 | 1747/ 8400 batches | test loss 0.5867339 +| epoch 9 | 1751/ 8400 batches | test loss 0.6184013 +| epoch 9 | 1755/ 8400 batches | test loss 0.3184993 +| epoch 9 | 1759/ 8400 batches | test loss 0.4552292 +| epoch 9 | 1763/ 8400 batches | test loss 0.4402792 +| epoch 9 | 1767/ 8400 batches | test loss 0.4106651 +| epoch 9 | 1771/ 8400 batches | test loss 0.5025952 +| epoch 9 | 1775/ 8400 batches | test loss 0.5060750 +| epoch 9 | 1779/ 8400 batches | test loss 0.4943766 +| epoch 9 | 1783/ 8400 batches | test loss 0.5274035 +| epoch 9 | 1787/ 8400 batches | test loss 0.4644930 +| epoch 9 | 1791/ 8400 batches | test loss 0.4878535 +| epoch 9 | 1795/ 8400 batches | test loss 0.3978343 +| epoch 9 | 1799/ 8400 batches | test loss 0.4576277 +| epoch 9 | 1803/ 8400 batches | test loss 0.4642718 +| epoch 9 | 1807/ 8400 batches | test loss 0.5169598 +| epoch 9 | 1811/ 8400 batches | test loss 0.4397889 +| epoch 9 | 1815/ 8400 batches | test loss 0.3850201 +| epoch 9 | 1819/ 8400 batches | test loss 0.5076764 +| epoch 9 | 1823/ 8400 batches | test loss 0.3712531 +| epoch 9 | 1827/ 8400 batches | test loss 0.5599821 +| epoch 9 | 1831/ 8400 batches | test loss 0.5495659 +| epoch 9 | 1835/ 8400 batches | test loss 0.4687477 +| epoch 9 | 1839/ 8400 batches | test loss 0.2599484 +| epoch 9 | 1843/ 8400 batches | test loss 0.4958151 +| epoch 9 | 1847/ 8400 batches | test loss 0.3616759 +| epoch 9 | 1851/ 8400 batches | test loss 0.5607272 +| epoch 9 | 1855/ 8400 batches | test loss 0.5778729 +| epoch 9 | 1859/ 8400 batches | test loss 0.6930687 +| epoch 9 | 1863/ 8400 batches | test loss 0.5006396 +| epoch 9 | 1867/ 8400 batches | test loss 0.4608914 +| epoch 9 | 1871/ 8400 batches | test loss 0.4667519 +| epoch 9 | 1875/ 8400 batches | test loss 0.5612242 +| epoch 9 | 1879/ 8400 batches | test loss 0.4411975 +| epoch 9 | 1883/ 8400 batches | test loss 0.4588492 +| epoch 9 | 1887/ 8400 batches | test loss 0.6197429 +| epoch 9 | 1891/ 8400 batches | test loss 0.3921402 +| epoch 9 | 1895/ 8400 batches | test loss 0.4131153 +| epoch 9 | 1899/ 8400 batches | test loss 0.4877680 +| epoch 9 | 1903/ 8400 batches | test loss 0.5026062 +| epoch 9 | 1907/ 8400 batches | test loss 0.4810208 +| epoch 9 | 1911/ 8400 batches | test loss 0.4097261 +| epoch 9 | 1915/ 8400 batches | test loss 0.4806910 +| epoch 9 | 1919/ 8400 batches | test loss 0.4111363 +| epoch 9 | 1923/ 8400 batches | test loss 0.7067521 +| epoch 9 | 1927/ 8400 batches | test loss 0.4284302 +| epoch 9 | 1931/ 8400 batches | test loss 0.5798503 +| epoch 9 | 1935/ 8400 batches | test loss 0.5586801 +| epoch 9 | 1939/ 8400 batches | test loss 0.6630903 +| epoch 9 | 1943/ 8400 batches | test loss 0.5210596 +| epoch 9 | 1947/ 8400 batches | test loss 0.5989459 +| epoch 9 | 1951/ 8400 batches | test loss 0.4288674 +| epoch 9 | 1955/ 8400 batches | test loss 0.5455449 +| epoch 9 | 1959/ 8400 batches | test loss 0.4092100 +| epoch 9 | 1963/ 8400 batches | test loss 0.5091252 +| epoch 9 | 1967/ 8400 batches | test loss 0.4740638 +| epoch 9 | 1971/ 8400 batches | test loss 0.4636278 +| epoch 9 | 1975/ 8400 batches | test loss 0.4482412 +| epoch 9 | 1979/ 8400 batches | test loss 0.3767650 +| epoch 9 | 1983/ 8400 batches | test loss 0.4995697 +| epoch 9 | 1987/ 8400 batches | test loss 0.4458403 +| epoch 9 | 1991/ 8400 batches | test loss 0.5903714 +| epoch 9 | 1995/ 8400 batches | test loss 0.3861139 +| epoch 9 | 1999/ 8400 batches | test loss 0.4546342 +| epoch 9 | 2003/ 8400 batches | test loss 0.4545894 +| epoch 9 | 2007/ 8400 batches | test loss 0.4867505 +| epoch 9 | 2011/ 8400 batches | test loss 0.5448004 +| epoch 9 | 2015/ 8400 batches | test loss 0.4937486 +| epoch 9 | 2019/ 8400 batches | test loss 0.4949281 +| epoch 9 | 2023/ 8400 batches | test loss 0.7076372 +| epoch 9 | 2027/ 8400 batches | test loss 0.4474798 +| epoch 9 | 2031/ 8400 batches | test loss 0.6101798 +| epoch 9 | 2035/ 8400 batches | test loss 0.3877464 +| epoch 9 | 2039/ 8400 batches | test loss 0.4512075 +| epoch 9 | 2043/ 8400 batches | test loss 0.3580978 +| epoch 9 | 2047/ 8400 batches | test loss 0.5788379 +| epoch 9 | 2051/ 8400 batches | test loss 0.6626593 +| epoch 9 | 2055/ 8400 batches | test loss 0.4358874 +| epoch 9 | 2059/ 8400 batches | test loss 0.3428524 +| epoch 9 | 2063/ 8400 batches | test loss 0.3577265 +| epoch 9 | 2067/ 8400 batches | test loss 0.4893416 +| epoch 9 | 2071/ 8400 batches | test loss 0.4388829 +| epoch 9 | 2075/ 8400 batches | test loss 0.4658933 +| epoch 9 | 2079/ 8400 batches | test loss 0.4336953 +| epoch 9 | 2083/ 8400 batches | test loss 0.4425132 +| epoch 9 | 2087/ 8400 batches | test loss 0.3175943 +| epoch 9 | 2091/ 8400 batches | test loss 0.4722184 +| epoch 9 | 2095/ 8400 batches | test loss 0.5734915 +| epoch 9 | 2099/ 8400 batches | test loss 0.7648449 +| epoch 9 | final test loss 0.4713, do not save model! +-------------------------------------------------------------------------------- +| epoch 10 | 3/ 8400 batches | train loss 0.3232364 +| epoch 10 | 7/ 8400 batches | train loss 0.2923558 +| epoch 10 | 11/ 8400 batches | train loss 0.3955371 +| epoch 10 | 15/ 8400 batches | train loss 0.3275424 +| epoch 10 | 19/ 8400 batches | train loss 0.4090388 +| epoch 10 | 23/ 8400 batches | train loss 0.3553511 +| epoch 10 | 27/ 8400 batches | train loss 0.3349735 +| epoch 10 | 31/ 8400 batches | train loss 0.3245984 +| epoch 10 | 35/ 8400 batches | train loss 0.3318747 +| epoch 10 | 39/ 8400 batches | train loss 0.3239557 +| epoch 10 | 43/ 8400 batches | train loss 0.3323262 +| epoch 10 | 47/ 8400 batches | train loss 0.3100758 +| epoch 10 | 51/ 8400 batches | train loss 0.3170105 +| epoch 10 | 55/ 8400 batches | train loss 0.2712895 +| epoch 10 | 59/ 8400 batches | train loss 0.3539303 +| epoch 10 | 63/ 8400 batches | train loss 0.3500175 +| epoch 10 | 67/ 8400 batches | train loss 0.3083436 +| epoch 10 | 71/ 8400 batches | train loss 0.3213920 +| epoch 10 | 75/ 8400 batches | train loss 0.2696422 +| epoch 10 | 79/ 8400 batches | train loss 0.3272940 +| epoch 10 | 83/ 8400 batches | train loss 0.3549967 +| epoch 10 | 87/ 8400 batches | train loss 0.3158956 +| epoch 10 | 91/ 8400 batches | train loss 0.2202557 +| epoch 10 | 95/ 8400 batches | train loss 0.3074650 +| epoch 10 | 99/ 8400 batches | train loss 0.2941014 +| epoch 10 | 103/ 8400 batches | train loss 0.3042560 +| epoch 10 | 107/ 8400 batches | train loss 0.2942834 +| epoch 10 | 111/ 8400 batches | train loss 0.3428912 +| epoch 10 | 115/ 8400 batches | train loss 0.3295730 +| epoch 10 | 119/ 8400 batches | train loss 0.3175376 +| epoch 10 | 123/ 8400 batches | train loss 0.3264304 +| epoch 10 | 127/ 8400 batches | train loss 0.2594572 +| epoch 10 | 131/ 8400 batches | train loss 0.3998199 +| epoch 10 | 135/ 8400 batches | train loss 0.2897680 +| epoch 10 | 139/ 8400 batches | train loss 0.3313028 +| epoch 10 | 143/ 8400 batches | train loss 0.3487606 +| epoch 10 | 147/ 8400 batches | train loss 0.3056199 +| epoch 10 | 151/ 8400 batches | train loss 0.3627654 +| epoch 10 | 155/ 8400 batches | train loss 0.3436278 +| epoch 10 | 159/ 8400 batches | train loss 0.3247622 +| epoch 10 | 163/ 8400 batches | train loss 0.3855877 +| epoch 10 | 167/ 8400 batches | train loss 0.3515552 +| epoch 10 | 171/ 8400 batches | train loss 0.2864555 +| epoch 10 | 175/ 8400 batches | train loss 0.3261623 +| epoch 10 | 179/ 8400 batches | train loss 0.3068298 +| epoch 10 | 183/ 8400 batches | train loss 0.3028671 +| epoch 10 | 187/ 8400 batches | train loss 0.3065233 +| epoch 10 | 191/ 8400 batches | train loss 0.3531341 +| epoch 10 | 195/ 8400 batches | train loss 0.3226289 +| epoch 10 | 199/ 8400 batches | train loss 0.2748984 +| epoch 10 | 203/ 8400 batches | train loss 0.2422282 +| epoch 10 | 207/ 8400 batches | train loss 0.3548192 +| epoch 10 | 211/ 8400 batches | train loss 0.3386458 +| epoch 10 | 215/ 8400 batches | train loss 0.3679788 +| epoch 10 | 219/ 8400 batches | train loss 0.3564917 +| epoch 10 | 223/ 8400 batches | train loss 0.3171652 +| epoch 10 | 227/ 8400 batches | train loss 0.2577446 +| epoch 10 | 231/ 8400 batches | train loss 0.3758672 +| epoch 10 | 235/ 8400 batches | train loss 0.3316230 +| epoch 10 | 239/ 8400 batches | train loss 0.2856455 +| epoch 10 | 243/ 8400 batches | train loss 0.3350492 +| epoch 10 | 247/ 8400 batches | train loss 0.3699831 +| epoch 10 | 251/ 8400 batches | train loss 0.2974151 +| epoch 10 | 255/ 8400 batches | train loss 0.3028648 +| epoch 10 | 259/ 8400 batches | train loss 0.3412865 +| epoch 10 | 263/ 8400 batches | train loss 0.2699899 +| epoch 10 | 267/ 8400 batches | train loss 0.2723607 +| epoch 10 | 271/ 8400 batches | train loss 0.3421837 +| epoch 10 | 275/ 8400 batches | train loss 0.3116295 +| epoch 10 | 279/ 8400 batches | train loss 0.2844122 +| epoch 10 | 283/ 8400 batches | train loss 0.3245195 +| epoch 10 | 287/ 8400 batches | train loss 0.2956678 +| epoch 10 | 291/ 8400 batches | train loss 0.3224629 +| epoch 10 | 295/ 8400 batches | train loss 0.2691993 +| epoch 10 | 299/ 8400 batches | train loss 0.2565162 +| epoch 10 | 303/ 8400 batches | train loss 0.3084809 +| epoch 10 | 307/ 8400 batches | train loss 0.3440166 +| epoch 10 | 311/ 8400 batches | train loss 0.3193351 +| epoch 10 | 315/ 8400 batches | train loss 0.3733843 +| epoch 10 | 319/ 8400 batches | train loss 0.3665603 +| epoch 10 | 323/ 8400 batches | train loss 0.3541770 +| epoch 10 | 327/ 8400 batches | train loss 0.3318447 +| epoch 10 | 331/ 8400 batches | train loss 0.2640948 +| epoch 10 | 335/ 8400 batches | train loss 0.4059220 +| epoch 10 | 339/ 8400 batches | train loss 0.3035157 +| epoch 10 | 343/ 8400 batches | train loss 0.3820826 +| epoch 10 | 347/ 8400 batches | train loss 0.3194427 +| epoch 10 | 351/ 8400 batches | train loss 0.2791873 +| epoch 10 | 355/ 8400 batches | train loss 0.3199100 +| epoch 10 | 359/ 8400 batches | train loss 0.3414384 +| epoch 10 | 363/ 8400 batches | train loss 0.3232174 +| epoch 10 | 367/ 8400 batches | train loss 0.4044990 +| epoch 10 | 371/ 8400 batches | train loss 0.3139283 +| epoch 10 | 375/ 8400 batches | train loss 0.2867169 +| epoch 10 | 379/ 8400 batches | train loss 0.3073052 +| epoch 10 | 383/ 8400 batches | train loss 0.3112992 +| epoch 10 | 387/ 8400 batches | train loss 0.3231290 +| epoch 10 | 391/ 8400 batches | train loss 0.2879419 +| epoch 10 | 395/ 8400 batches | train loss 0.2938379 +| epoch 10 | 399/ 8400 batches | train loss 0.3323959 +| epoch 10 | 403/ 8400 batches | train loss 0.3317959 +| epoch 10 | 407/ 8400 batches | train loss 0.2923342 +| epoch 10 | 411/ 8400 batches | train loss 0.3603632 +| epoch 10 | 415/ 8400 batches | train loss 0.2700619 +| epoch 10 | 419/ 8400 batches | train loss 0.3917758 +| epoch 10 | 423/ 8400 batches | train loss 0.3507859 +| epoch 10 | 427/ 8400 batches | train loss 0.3443684 +| epoch 10 | 431/ 8400 batches | train loss 0.2645283 +| epoch 10 | 435/ 8400 batches | train loss 0.3125011 +| epoch 10 | 439/ 8400 batches | train loss 0.3124286 +| epoch 10 | 443/ 8400 batches | train loss 0.3223276 +| epoch 10 | 447/ 8400 batches | train loss 0.3164865 +| epoch 10 | 451/ 8400 batches | train loss 0.2043366 +| epoch 10 | 455/ 8400 batches | train loss 0.3224282 +| epoch 10 | 459/ 8400 batches | train loss 0.3100661 +| epoch 10 | 463/ 8400 batches | train loss 0.2440202 +| epoch 10 | 467/ 8400 batches | train loss 0.2960039 +| epoch 10 | 471/ 8400 batches | train loss 0.3143330 +| epoch 10 | 475/ 8400 batches | train loss 0.3504758 +| epoch 10 | 479/ 8400 batches | train loss 0.3138326 +| epoch 10 | 483/ 8400 batches | train loss 0.2649307 +| epoch 10 | 487/ 8400 batches | train loss 0.3914630 +| epoch 10 | 491/ 8400 batches | train loss 0.3665406 +| epoch 10 | 495/ 8400 batches | train loss 0.3014517 +| epoch 10 | 499/ 8400 batches | train loss 0.3073273 +| epoch 10 | 503/ 8400 batches | train loss 0.2953972 +| epoch 10 | 507/ 8400 batches | train loss 0.3745428 +| epoch 10 | 511/ 8400 batches | train loss 0.2917460 +| epoch 10 | 515/ 8400 batches | train loss 0.3012911 +| epoch 10 | 519/ 8400 batches | train loss 0.3050438 +| epoch 10 | 523/ 8400 batches | train loss 0.3152444 +| epoch 10 | 527/ 8400 batches | train loss 0.3128393 +| epoch 10 | 531/ 8400 batches | train loss 0.3132460 +| epoch 10 | 535/ 8400 batches | train loss 0.2784472 +| epoch 10 | 539/ 8400 batches | train loss 0.2907289 +| epoch 10 | 543/ 8400 batches | train loss 0.3975698 +| epoch 10 | 547/ 8400 batches | train loss 0.2899393 +| epoch 10 | 551/ 8400 batches | train loss 0.2866161 +| epoch 10 | 555/ 8400 batches | train loss 0.3460600 +| epoch 10 | 559/ 8400 batches | train loss 0.3163783 +| epoch 10 | 563/ 8400 batches | train loss 0.2955909 +| epoch 10 | 567/ 8400 batches | train loss 0.3602129 +| epoch 10 | 571/ 8400 batches | train loss 0.3214194 +| epoch 10 | 575/ 8400 batches | train loss 0.3871464 +| epoch 10 | 579/ 8400 batches | train loss 0.3152187 +| epoch 10 | 583/ 8400 batches | train loss 0.3314983 +| epoch 10 | 587/ 8400 batches | train loss 0.3088674 +| epoch 10 | 591/ 8400 batches | train loss 0.3896448 +| epoch 10 | 595/ 8400 batches | train loss 0.3050977 +| epoch 10 | 599/ 8400 batches | train loss 0.3191226 +| epoch 10 | 603/ 8400 batches | train loss 0.3133307 +| epoch 10 | 607/ 8400 batches | train loss 0.2933900 +| epoch 10 | 611/ 8400 batches | train loss 0.2600740 +| epoch 10 | 615/ 8400 batches | train loss 0.2526301 +| epoch 10 | 619/ 8400 batches | train loss 0.2722746 +| epoch 10 | 623/ 8400 batches | train loss 0.3282583 +| epoch 10 | 627/ 8400 batches | train loss 0.3149010 +| epoch 10 | 631/ 8400 batches | train loss 0.3186924 +| epoch 10 | 635/ 8400 batches | train loss 0.3474714 +| epoch 10 | 639/ 8400 batches | train loss 0.3487581 +| epoch 10 | 643/ 8400 batches | train loss 0.2689308 +| epoch 10 | 647/ 8400 batches | train loss 0.2833168 +| epoch 10 | 651/ 8400 batches | train loss 0.3356774 +| epoch 10 | 655/ 8400 batches | train loss 0.3318996 +| epoch 10 | 659/ 8400 batches | train loss 0.3160480 +| epoch 10 | 663/ 8400 batches | train loss 0.3265788 +| epoch 10 | 667/ 8400 batches | train loss 0.2642244 +| epoch 10 | 671/ 8400 batches | train loss 0.3590293 +| epoch 10 | 675/ 8400 batches | train loss 0.2996682 +| epoch 10 | 679/ 8400 batches | train loss 0.3527981 +| epoch 10 | 683/ 8400 batches | train loss 0.3356075 +| epoch 10 | 687/ 8400 batches | train loss 0.4103440 +| epoch 10 | 691/ 8400 batches | train loss 0.3318829 +| epoch 10 | 695/ 8400 batches | train loss 0.2782691 +| epoch 10 | 699/ 8400 batches | train loss 0.3590763 +| epoch 10 | 703/ 8400 batches | train loss 0.3319024 +| epoch 10 | 707/ 8400 batches | train loss 0.2831007 +| epoch 10 | 711/ 8400 batches | train loss 0.3586612 +| epoch 10 | 715/ 8400 batches | train loss 0.2609637 +| epoch 10 | 719/ 8400 batches | train loss 0.3472769 +| epoch 10 | 723/ 8400 batches | train loss 0.3210387 +| epoch 10 | 727/ 8400 batches | train loss 0.2537568 +| epoch 10 | 731/ 8400 batches | train loss 0.3475318 +| epoch 10 | 735/ 8400 batches | train loss 0.3183509 +| epoch 10 | 739/ 8400 batches | train loss 0.2830024 +| epoch 10 | 743/ 8400 batches | train loss 0.3323298 +| epoch 10 | 747/ 8400 batches | train loss 0.3207037 +| epoch 10 | 751/ 8400 batches | train loss 0.3771008 +| epoch 10 | 755/ 8400 batches | train loss 0.2739595 +| epoch 10 | 759/ 8400 batches | train loss 0.3242329 +| epoch 10 | 763/ 8400 batches | train loss 0.3330459 +| epoch 10 | 767/ 8400 batches | train loss 0.3655401 +| epoch 10 | 771/ 8400 batches | train loss 0.3968204 +| epoch 10 | 775/ 8400 batches | train loss 0.3467367 +| epoch 10 | 779/ 8400 batches | train loss 0.3217151 +| epoch 10 | 783/ 8400 batches | train loss 0.2685822 +| epoch 10 | 787/ 8400 batches | train loss 0.3431783 +| epoch 10 | 791/ 8400 batches | train loss 0.2581819 +| epoch 10 | 795/ 8400 batches | train loss 0.2849870 +| epoch 10 | 799/ 8400 batches | train loss 0.2916577 +| epoch 10 | 803/ 8400 batches | train loss 0.2925138 +| epoch 10 | 807/ 8400 batches | train loss 0.2921315 +| epoch 10 | 811/ 8400 batches | train loss 0.2717227 +| epoch 10 | 815/ 8400 batches | train loss 0.2882446 +| epoch 10 | 819/ 8400 batches | train loss 0.2702389 +| epoch 10 | 823/ 8400 batches | train loss 0.3374995 +| epoch 10 | 827/ 8400 batches | train loss 0.2616918 +| epoch 10 | 831/ 8400 batches | train loss 0.3477528 +| epoch 10 | 835/ 8400 batches | train loss 0.2675249 +| epoch 10 | 839/ 8400 batches | train loss 0.3288108 +| epoch 10 | 843/ 8400 batches | train loss 0.3678917 +| epoch 10 | 847/ 8400 batches | train loss 0.2609685 +| epoch 10 | 851/ 8400 batches | train loss 0.3427399 +| epoch 10 | 855/ 8400 batches | train loss 0.2609220 +| epoch 10 | 859/ 8400 batches | train loss 0.3104534 +| epoch 10 | 863/ 8400 batches | train loss 0.3439984 +| epoch 10 | 867/ 8400 batches | train loss 0.2873960 +| epoch 10 | 871/ 8400 batches | train loss 0.2736109 +| epoch 10 | 875/ 8400 batches | train loss 0.2895239 +| epoch 10 | 879/ 8400 batches | train loss 0.3240435 +| epoch 10 | 883/ 8400 batches | train loss 0.3737660 +| epoch 10 | 887/ 8400 batches | train loss 0.3658562 +| epoch 10 | 891/ 8400 batches | train loss 0.3840142 +| epoch 10 | 895/ 8400 batches | train loss 0.3536817 +| epoch 10 | 899/ 8400 batches | train loss 0.2590798 +| epoch 10 | 903/ 8400 batches | train loss 0.3257235 +| epoch 10 | 907/ 8400 batches | train loss 0.3185972 +| epoch 10 | 911/ 8400 batches | train loss 0.2870817 +| epoch 10 | 915/ 8400 batches | train loss 0.3585148 +| epoch 10 | 919/ 8400 batches | train loss 0.2635186 +| epoch 10 | 923/ 8400 batches | train loss 0.2579614 +| epoch 10 | 927/ 8400 batches | train loss 0.3330077 +| epoch 10 | 931/ 8400 batches | train loss 0.2642717 +| epoch 10 | 935/ 8400 batches | train loss 0.2691151 +| epoch 10 | 939/ 8400 batches | train loss 0.3027925 +| epoch 10 | 943/ 8400 batches | train loss 0.3332149 +| epoch 10 | 947/ 8400 batches | train loss 0.3053632 +| epoch 10 | 951/ 8400 batches | train loss 0.2676614 +| epoch 10 | 955/ 8400 batches | train loss 0.4131089 +| epoch 10 | 959/ 8400 batches | train loss 0.3175217 +| epoch 10 | 963/ 8400 batches | train loss 0.3111181 +| epoch 10 | 967/ 8400 batches | train loss 0.3066518 +| epoch 10 | 971/ 8400 batches | train loss 0.2786288 +| epoch 10 | 975/ 8400 batches | train loss 0.3129643 +| epoch 10 | 979/ 8400 batches | train loss 0.3992311 +| epoch 10 | 983/ 8400 batches | train loss 0.2889029 +| epoch 10 | 987/ 8400 batches | train loss 0.2815756 +| epoch 10 | 991/ 8400 batches | train loss 0.2945046