| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.025, |
| "eval_steps": 1000, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 2.5e-05, |
| "grad_norm": 856.0, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 25.7856, |
| "loss/crossentropy": 2.466749429702759, |
| "loss/hidden": 9.875, |
| "loss/logits": 4.1609320640563965, |
| "loss/reg": 9.282934188842773, |
| "loss/twn": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 5e-05, |
| "grad_norm": 608.0, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 28.007, |
| "loss/crossentropy": 4.963851451873779, |
| "loss/hidden": 9.5625, |
| "loss/logits": 4.197765827178955, |
| "loss/reg": 9.282919883728027, |
| "loss/twn": 0.0, |
| "step": 2 |
| }, |
| { |
| "epoch": 7.5e-05, |
| "grad_norm": 484.0, |
| "learning_rate": 3e-06, |
| "loss": 28.5452, |
| "loss/crossentropy": 5.791485786437988, |
| "loss/hidden": 9.375, |
| "loss/logits": 4.095866680145264, |
| "loss/reg": 9.28286361694336, |
| "loss/twn": 0.0, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0001, |
| "grad_norm": 436.0, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 28.1226, |
| "loss/crossentropy": 6.202037811279297, |
| "loss/hidden": 9.3125, |
| "loss/logits": 3.3252921104431152, |
| "loss/reg": 9.282757759094238, |
| "loss/twn": 0.0, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.000125, |
| "grad_norm": 462.0, |
| "learning_rate": 5e-06, |
| "loss": 25.9727, |
| "loss/crossentropy": 4.488704681396484, |
| "loss/hidden": 8.9375, |
| "loss/logits": 3.2638444900512695, |
| "loss/reg": 9.282657623291016, |
| "loss/twn": 0.0, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.00015, |
| "grad_norm": 884.0, |
| "learning_rate": 6e-06, |
| "loss": 26.2108, |
| "loss/crossentropy": 4.040957450866699, |
| "loss/hidden": 9.125, |
| "loss/logits": 3.762254476547241, |
| "loss/reg": 9.282608985900879, |
| "loss/twn": 0.0, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.000175, |
| "grad_norm": 628.0, |
| "learning_rate": 7.000000000000001e-06, |
| "loss": 31.867, |
| "loss/crossentropy": 6.008764743804932, |
| "loss/hidden": 10.125, |
| "loss/logits": 6.450687408447266, |
| "loss/reg": 9.282584190368652, |
| "loss/twn": 0.0, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0002, |
| "grad_norm": 414.0, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 26.8696, |
| "loss/crossentropy": 5.94035005569458, |
| "loss/hidden": 8.5, |
| "loss/logits": 3.1467137336730957, |
| "loss/reg": 9.282520294189453, |
| "loss/twn": 0.0, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.000225, |
| "grad_norm": 326.0, |
| "learning_rate": 9e-06, |
| "loss": 25.0377, |
| "loss/crossentropy": 4.519560813903809, |
| "loss/hidden": 8.5, |
| "loss/logits": 2.735779285430908, |
| "loss/reg": 9.28234577178955, |
| "loss/twn": 0.0, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.00025, |
| "grad_norm": 247.0, |
| "learning_rate": 1e-05, |
| "loss": 26.0293, |
| "loss/crossentropy": 5.418540000915527, |
| "loss/hidden": 8.5625, |
| "loss/logits": 2.7662129402160645, |
| "loss/reg": 9.281999588012695, |
| "loss/twn": 0.0, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.000275, |
| "grad_norm": 244.0, |
| "learning_rate": 1.1000000000000001e-05, |
| "loss": 24.4356, |
| "loss/crossentropy": 4.008213996887207, |
| "loss/hidden": 8.75, |
| "loss/logits": 2.395906448364258, |
| "loss/reg": 9.281471252441406, |
| "loss/twn": 0.0, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0003, |
| "grad_norm": 206.0, |
| "learning_rate": 1.2e-05, |
| "loss": 24.0946, |
| "loss/crossentropy": 3.9673473834991455, |
| "loss/hidden": 8.625, |
| "loss/logits": 2.2214930057525635, |
| "loss/reg": 9.280747413635254, |
| "loss/twn": 0.0, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.000325, |
| "grad_norm": 486.0, |
| "learning_rate": 1.3000000000000001e-05, |
| "loss": 25.7704, |
| "loss/crossentropy": 3.299372911453247, |
| "loss/hidden": 9.5, |
| "loss/logits": 3.6912012100219727, |
| "loss/reg": 9.279807090759277, |
| "loss/twn": 0.0, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.00035, |
| "grad_norm": 196.0, |
| "learning_rate": 1.4000000000000001e-05, |
| "loss": 22.4901, |
| "loss/crossentropy": 3.207503080368042, |
| "loss/hidden": 7.71875, |
| "loss/logits": 2.2852282524108887, |
| "loss/reg": 9.278639793395996, |
| "loss/twn": 0.0, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.000375, |
| "grad_norm": 334.0, |
| "learning_rate": 1.5e-05, |
| "loss": 22.1515, |
| "loss/crossentropy": 2.248960256576538, |
| "loss/hidden": 8.4375, |
| "loss/logits": 2.1877946853637695, |
| "loss/reg": 9.277209281921387, |
| "loss/twn": 0.0, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0004, |
| "grad_norm": 222.0, |
| "grad_norm_var": 46713.19583333333, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 22.6229, |
| "loss/crossentropy": 3.0185177326202393, |
| "loss/hidden": 8.0, |
| "loss/logits": 2.3288779258728027, |
| "loss/reg": 9.275506019592285, |
| "loss/twn": 0.0, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.000425, |
| "grad_norm": 274.0, |
| "grad_norm_var": 35567.895833333336, |
| "learning_rate": 1.7000000000000003e-05, |
| "loss": 23.2931, |
| "loss/crossentropy": 2.6807451248168945, |
| "loss/hidden": 8.8125, |
| "loss/logits": 2.526376724243164, |
| "loss/reg": 9.273526191711426, |
| "loss/twn": 0.0, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.00045, |
| "grad_norm": 166.0, |
| "grad_norm_var": 35707.8625, |
| "learning_rate": 1.8e-05, |
| "loss": 19.7337, |
| "loss/crossentropy": 1.6070754528045654, |
| "loss/hidden": 7.34375, |
| "loss/logits": 1.5115822553634644, |
| "loss/reg": 9.271272659301758, |
| "loss/twn": 0.0, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.000475, |
| "grad_norm": 316.0, |
| "grad_norm_var": 35042.8625, |
| "learning_rate": 1.9e-05, |
| "loss": 21.55, |
| "loss/crossentropy": 2.748849868774414, |
| "loss/hidden": 7.625, |
| "loss/logits": 1.9073840379714966, |
| "loss/reg": 9.268738746643066, |
| "loss/twn": 0.0, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0005, |
| "grad_norm": 163.0, |
| "grad_norm_var": 37118.8, |
| "learning_rate": 2e-05, |
| "loss": 19.7796, |
| "loss/crossentropy": 2.412648916244507, |
| "loss/hidden": 6.5625, |
| "loss/logits": 1.5384936332702637, |
| "loss/reg": 9.265981674194336, |
| "loss/twn": 0.0, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.000525, |
| "grad_norm": 201.0, |
| "grad_norm_var": 37409.1625, |
| "learning_rate": 2.1e-05, |
| "loss": 20.2474, |
| "loss/crossentropy": 3.4336729049682617, |
| "loss/hidden": 5.84375, |
| "loss/logits": 1.7070186138153076, |
| "loss/reg": 9.263004302978516, |
| "loss/twn": 0.0, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.00055, |
| "grad_norm": 118.5, |
| "grad_norm_var": 17660.857291666667, |
| "learning_rate": 2.2000000000000003e-05, |
| "loss": 19.2087, |
| "loss/crossentropy": 3.3189680576324463, |
| "loss/hidden": 5.375, |
| "loss/logits": 1.254899263381958, |
| "loss/reg": 9.259842872619629, |
| "loss/twn": 0.0, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.000575, |
| "grad_norm": 103.5, |
| "grad_norm_var": 10786.629166666668, |
| "learning_rate": 2.3000000000000003e-05, |
| "loss": 19.5675, |
| "loss/crossentropy": 3.545900583267212, |
| "loss/hidden": 5.5625, |
| "loss/logits": 1.2025506496429443, |
| "loss/reg": 9.256536483764648, |
| "loss/twn": 0.0, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.0006, |
| "grad_norm": 138.0, |
| "grad_norm_var": 9551.529166666667, |
| "learning_rate": 2.4e-05, |
| "loss": 17.8642, |
| "loss/crossentropy": 2.03609561920166, |
| "loss/hidden": 5.5, |
| "loss/logits": 1.075016975402832, |
| "loss/reg": 9.25309944152832, |
| "loss/twn": 0.0, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.000625, |
| "grad_norm": 108.0, |
| "grad_norm_var": 9842.195833333333, |
| "learning_rate": 2.5e-05, |
| "loss": 17.6088, |
| "loss/crossentropy": 1.804774284362793, |
| "loss/hidden": 5.96875, |
| "loss/logits": 0.5857374668121338, |
| "loss/reg": 9.24954605102539, |
| "loss/twn": 0.0, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.00065, |
| "grad_norm": 102.5, |
| "grad_norm_var": 10630.623958333334, |
| "learning_rate": 2.6000000000000002e-05, |
| "loss": 19.045, |
| "loss/crossentropy": 3.3252830505371094, |
| "loss/hidden": 5.375, |
| "loss/logits": 1.098811149597168, |
| "loss/reg": 9.245920181274414, |
| "loss/twn": 0.0, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.000675, |
| "grad_norm": 103.5, |
| "grad_norm_var": 11249.116666666667, |
| "learning_rate": 2.7000000000000002e-05, |
| "loss": 17.5244, |
| "loss/crossentropy": 1.5941309928894043, |
| "loss/hidden": 5.5625, |
| "loss/logits": 1.125561237335205, |
| "loss/reg": 9.24221420288086, |
| "loss/twn": 0.0, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.0007, |
| "grad_norm": 161.0, |
| "grad_norm_var": 11353.929166666667, |
| "learning_rate": 2.8000000000000003e-05, |
| "loss": 22.7486, |
| "loss/crossentropy": 5.096042156219482, |
| "loss/hidden": 7.375, |
| "loss/logits": 1.0390578508377075, |
| "loss/reg": 9.238471984863281, |
| "loss/twn": 0.0, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.000725, |
| "grad_norm": 219.0, |
| "grad_norm_var": 5612.316666666667, |
| "learning_rate": 2.9e-05, |
| "loss": 17.8045, |
| "loss/crossentropy": 2.9785099029541016, |
| "loss/hidden": 4.5625, |
| "loss/logits": 1.0287822484970093, |
| "loss/reg": 9.234696388244629, |
| "loss/twn": 0.0, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.00075, |
| "grad_norm": 147.0, |
| "grad_norm_var": 5676.629166666667, |
| "learning_rate": 3e-05, |
| "loss": 16.8206, |
| "loss/crossentropy": 1.4641753435134888, |
| "loss/hidden": 5.40625, |
| "loss/logits": 0.7192707061767578, |
| "loss/reg": 9.23090648651123, |
| "loss/twn": 0.0, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.000775, |
| "grad_norm": 100.5, |
| "grad_norm_var": 4283.898958333333, |
| "learning_rate": 3.1e-05, |
| "loss": 17.0473, |
| "loss/crossentropy": 1.9062433242797852, |
| "loss/hidden": 5.125, |
| "loss/logits": 0.7889943718910217, |
| "loss/reg": 9.227106094360352, |
| "loss/twn": 0.0, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.0008, |
| "grad_norm": 90.5, |
| "grad_norm_var": 4369.1, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 18.3265, |
| "loss/crossentropy": 3.473458766937256, |
| "loss/hidden": 4.65625, |
| "loss/logits": 0.9734625220298767, |
| "loss/reg": 9.22329044342041, |
| "loss/twn": 0.0, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.000825, |
| "grad_norm": 127.0, |
| "grad_norm_var": 3426.4625, |
| "learning_rate": 3.3e-05, |
| "loss": 16.5429, |
| "loss/crossentropy": 1.9291913509368896, |
| "loss/hidden": 4.625, |
| "loss/logits": 0.7692581415176392, |
| "loss/reg": 9.219466209411621, |
| "loss/twn": 0.0, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.00085, |
| "grad_norm": 552.0, |
| "grad_norm_var": 13674.7625, |
| "learning_rate": 3.4000000000000007e-05, |
| "loss": 15.9999, |
| "loss/crossentropy": 1.0949360132217407, |
| "loss/hidden": 5.125, |
| "loss/logits": 0.5642712116241455, |
| "loss/reg": 9.215657234191895, |
| "loss/twn": 0.0, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.000875, |
| "grad_norm": 101.5, |
| "grad_norm_var": 12430.215625, |
| "learning_rate": 3.5e-05, |
| "loss": 17.3223, |
| "loss/crossentropy": 3.0765247344970703, |
| "loss/hidden": 4.15625, |
| "loss/logits": 0.8776548504829407, |
| "loss/reg": 9.211865425109863, |
| "loss/twn": 0.0, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.0009, |
| "grad_norm": 117.0, |
| "grad_norm_var": 12535.057291666666, |
| "learning_rate": 3.6e-05, |
| "loss": 17.2836, |
| "loss/crossentropy": 2.0558810234069824, |
| "loss/hidden": 5.03125, |
| "loss/logits": 0.9883794188499451, |
| "loss/reg": 9.20807933807373, |
| "loss/twn": 0.0, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.000925, |
| "grad_norm": 56.0, |
| "grad_norm_var": 12972.473958333334, |
| "learning_rate": 3.7e-05, |
| "loss": 16.32, |
| "loss/crossentropy": 2.753218650817871, |
| "loss/hidden": 3.703125, |
| "loss/logits": 0.6593818664550781, |
| "loss/reg": 9.204306602478027, |
| "loss/twn": 0.0, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.00095, |
| "grad_norm": 51.0, |
| "grad_norm_var": 13510.083333333334, |
| "learning_rate": 3.8e-05, |
| "loss": 16.719, |
| "loss/crossentropy": 3.0973594188690186, |
| "loss/hidden": 3.765625, |
| "loss/logits": 0.6554687023162842, |
| "loss/reg": 9.200549125671387, |
| "loss/twn": 0.0, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.000975, |
| "grad_norm": 72.5, |
| "grad_norm_var": 13730.829166666666, |
| "learning_rate": 3.9000000000000006e-05, |
| "loss": 15.9307, |
| "loss/crossentropy": 1.7005630731582642, |
| "loss/hidden": 4.34375, |
| "loss/logits": 0.6895506381988525, |
| "loss/reg": 9.196808815002441, |
| "loss/twn": 0.0, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.001, |
| "grad_norm": 65.5, |
| "grad_norm_var": 14082.907291666666, |
| "learning_rate": 4e-05, |
| "loss": 15.8994, |
| "loss/crossentropy": 2.5001957416534424, |
| "loss/hidden": 3.703125, |
| "loss/logits": 0.502974271774292, |
| "loss/reg": 9.193108558654785, |
| "loss/twn": 0.0, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.001025, |
| "grad_norm": 268.0, |
| "grad_norm_var": 15087.573958333332, |
| "learning_rate": 4.1e-05, |
| "loss": 16.5811, |
| "loss/crossentropy": 2.406013011932373, |
| "loss/hidden": 4.1875, |
| "loss/logits": 0.7981227040290833, |
| "loss/reg": 9.18942642211914, |
| "loss/twn": 0.0, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.00105, |
| "grad_norm": 47.25, |
| "grad_norm_var": 15598.118489583334, |
| "learning_rate": 4.2e-05, |
| "loss": 14.4949, |
| "loss/crossentropy": 0.8963330984115601, |
| "loss/hidden": 4.0, |
| "loss/logits": 0.41280800104141235, |
| "loss/reg": 9.185782432556152, |
| "loss/twn": 0.0, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.001075, |
| "grad_norm": 48.75, |
| "grad_norm_var": 16069.823958333332, |
| "learning_rate": 4.3e-05, |
| "loss": 15.43, |
| "loss/crossentropy": 1.93032968044281, |
| "loss/hidden": 3.84375, |
| "loss/logits": 0.47377997636795044, |
| "loss/reg": 9.182170867919922, |
| "loss/twn": 0.0, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.0011, |
| "grad_norm": 109.5, |
| "grad_norm_var": 16084.7375, |
| "learning_rate": 4.4000000000000006e-05, |
| "loss": 15.9215, |
| "loss/crossentropy": 2.7150332927703857, |
| "loss/hidden": 3.296875, |
| "loss/logits": 0.7310340404510498, |
| "loss/reg": 9.178592681884766, |
| "loss/twn": 0.0, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.001125, |
| "grad_norm": 43.0, |
| "grad_norm_var": 16068.604166666666, |
| "learning_rate": 4.5e-05, |
| "loss": 15.493, |
| "loss/crossentropy": 3.0250916481018066, |
| "loss/hidden": 2.828125, |
| "loss/logits": 0.46468585729599, |
| "loss/reg": 9.175073623657227, |
| "loss/twn": 0.0, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.00115, |
| "grad_norm": 39.5, |
| "grad_norm_var": 16472.848958333332, |
| "learning_rate": 4.600000000000001e-05, |
| "loss": 15.6299, |
| "loss/crossentropy": 2.7783172130584717, |
| "loss/hidden": 3.09375, |
| "loss/logits": 0.5862575173377991, |
| "loss/reg": 9.171608924865723, |
| "loss/twn": 0.0, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.001175, |
| "grad_norm": 43.75, |
| "grad_norm_var": 16807.26015625, |
| "learning_rate": 4.7e-05, |
| "loss": 15.3096, |
| "loss/crossentropy": 2.487776517868042, |
| "loss/hidden": 3.078125, |
| "loss/logits": 0.5754560828208923, |
| "loss/reg": 9.168206214904785, |
| "loss/twn": 0.0, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.0012, |
| "grad_norm": 73.0, |
| "grad_norm_var": 16882.51015625, |
| "learning_rate": 4.8e-05, |
| "loss": 15.1335, |
| "loss/crossentropy": 1.4238677024841309, |
| "loss/hidden": 3.859375, |
| "loss/logits": 0.6854370832443237, |
| "loss/reg": 9.164835929870605, |
| "loss/twn": 0.0, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.001225, |
| "grad_norm": 532.0, |
| "grad_norm_var": 27865.60390625, |
| "learning_rate": 4.9e-05, |
| "loss": 15.2219, |
| "loss/crossentropy": 2.706357717514038, |
| "loss/hidden": 2.953125, |
| "loss/logits": 0.4008805751800537, |
| "loss/reg": 9.16152572631836, |
| "loss/twn": 0.0, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.00125, |
| "grad_norm": 59.5, |
| "grad_norm_var": 15889.645572916666, |
| "learning_rate": 5e-05, |
| "loss": 15.8488, |
| "loss/crossentropy": 2.973315477371216, |
| "loss/hidden": 3.09375, |
| "loss/logits": 0.6234804391860962, |
| "loss/reg": 9.15826416015625, |
| "loss/twn": 0.0, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.001275, |
| "grad_norm": 92.5, |
| "grad_norm_var": 15902.489322916666, |
| "learning_rate": 5.1000000000000006e-05, |
| "loss": 15.4959, |
| "loss/crossentropy": 1.5256458520889282, |
| "loss/hidden": 4.5625, |
| "loss/logits": 0.2527560293674469, |
| "loss/reg": 9.155034065246582, |
| "loss/twn": 0.0, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.0013, |
| "grad_norm": 55.0, |
| "grad_norm_var": 16063.56015625, |
| "learning_rate": 5.2000000000000004e-05, |
| "loss": 14.3415, |
| "loss/crossentropy": 1.9736268520355225, |
| "loss/hidden": 2.875, |
| "loss/logits": 0.34106332063674927, |
| "loss/reg": 9.151856422424316, |
| "loss/twn": 0.0, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.001325, |
| "grad_norm": 31.875, |
| "grad_norm_var": 16252.878580729166, |
| "learning_rate": 5.300000000000001e-05, |
| "loss": 14.7444, |
| "loss/crossentropy": 2.4921610355377197, |
| "loss/hidden": 2.703125, |
| "loss/logits": 0.4003755748271942, |
| "loss/reg": 9.148697853088379, |
| "loss/twn": 0.0, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.00135, |
| "grad_norm": 880.0, |
| "grad_norm_var": 53563.92337239583, |
| "learning_rate": 5.4000000000000005e-05, |
| "loss": 14.7926, |
| "loss/crossentropy": 2.0597243309020996, |
| "loss/hidden": 3.171875, |
| "loss/logits": 0.41540366411209106, |
| "loss/reg": 9.145581245422363, |
| "loss/twn": 0.0, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.001375, |
| "grad_norm": 45.5, |
| "grad_norm_var": 53902.35149739583, |
| "learning_rate": 5.500000000000001e-05, |
| "loss": 15.1464, |
| "loss/crossentropy": 2.724860429763794, |
| "loss/hidden": 2.765625, |
| "loss/logits": 0.513420820236206, |
| "loss/reg": 9.142509460449219, |
| "loss/twn": 0.0, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.0014, |
| "grad_norm": 67.5, |
| "grad_norm_var": 53879.49108072917, |
| "learning_rate": 5.6000000000000006e-05, |
| "loss": 15.1418, |
| "loss/crossentropy": 1.505046010017395, |
| "loss/hidden": 4.21875, |
| "loss/logits": 0.2785521149635315, |
| "loss/reg": 9.139471054077148, |
| "loss/twn": 0.0, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.001425, |
| "grad_norm": 832.0, |
| "grad_norm_var": 82461.95358072917, |
| "learning_rate": 5.6999999999999996e-05, |
| "loss": 13.8676, |
| "loss/crossentropy": 1.6500041484832764, |
| "loss/hidden": 2.828125, |
| "loss/logits": 0.25297531485557556, |
| "loss/reg": 9.136449813842773, |
| "loss/twn": 0.0, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.00145, |
| "grad_norm": 42.0, |
| "grad_norm_var": 82561.87858072917, |
| "learning_rate": 5.8e-05, |
| "loss": 14.3569, |
| "loss/crossentropy": 2.1793549060821533, |
| "loss/hidden": 2.671875, |
| "loss/logits": 0.3721921145915985, |
| "loss/reg": 9.13347053527832, |
| "loss/twn": 0.0, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.001475, |
| "grad_norm": 42.25, |
| "grad_norm_var": 82684.51868489584, |
| "learning_rate": 5.9e-05, |
| "loss": 14.1771, |
| "loss/crossentropy": 1.998789668083191, |
| "loss/hidden": 2.671875, |
| "loss/logits": 0.375885546207428, |
| "loss/reg": 9.130505561828613, |
| "loss/twn": 0.0, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.0015, |
| "grad_norm": 84.5, |
| "grad_norm_var": 82981.2634765625, |
| "learning_rate": 6e-05, |
| "loss": 13.8378, |
| "loss/crossentropy": 0.7205952405929565, |
| "loss/hidden": 3.625, |
| "loss/logits": 0.3646550178527832, |
| "loss/reg": 9.12757682800293, |
| "loss/twn": 0.0, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.001525, |
| "grad_norm": 36.25, |
| "grad_norm_var": 83112.1291015625, |
| "learning_rate": 6.1e-05, |
| "loss": 14.9565, |
| "loss/crossentropy": 2.839972734451294, |
| "loss/hidden": 2.5625, |
| "loss/logits": 0.4293031692504883, |
| "loss/reg": 9.124687194824219, |
| "loss/twn": 0.0, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.00155, |
| "grad_norm": 38.75, |
| "grad_norm_var": 83126.6962890625, |
| "learning_rate": 6.2e-05, |
| "loss": 14.3095, |
| "loss/crossentropy": 2.2519617080688477, |
| "loss/hidden": 2.640625, |
| "loss/logits": 0.2950741648674011, |
| "loss/reg": 9.121813774108887, |
| "loss/twn": 0.0, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.001575, |
| "grad_norm": 38.5, |
| "grad_norm_var": 83227.1353515625, |
| "learning_rate": 6.3e-05, |
| "loss": 13.9726, |
| "loss/crossentropy": 1.5576353073120117, |
| "loss/hidden": 2.921875, |
| "loss/logits": 0.3740916848182678, |
| "loss/reg": 9.118968963623047, |
| "loss/twn": 0.0, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.0016, |
| "grad_norm": 788.0, |
| "grad_norm_var": 104554.2447265625, |
| "learning_rate": 6.400000000000001e-05, |
| "loss": 14.0301, |
| "loss/crossentropy": 2.0011582374572754, |
| "loss/hidden": 2.625, |
| "loss/logits": 0.2878296971321106, |
| "loss/reg": 9.11613655090332, |
| "loss/twn": 0.0, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.001625, |
| "grad_norm": 25.625, |
| "grad_norm_var": 100131.63932291667, |
| "learning_rate": 6.500000000000001e-05, |
| "loss": 13.9288, |
| "loss/crossentropy": 2.3415846824645996, |
| "loss/hidden": 2.171875, |
| "loss/logits": 0.30199772119522095, |
| "loss/reg": 9.11334228515625, |
| "loss/twn": 0.0, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.00165, |
| "grad_norm": 134.0, |
| "grad_norm_var": 99107.88515625, |
| "learning_rate": 6.6e-05, |
| "loss": 12.8067, |
| "loss/crossentropy": 0.4624326527118683, |
| "loss/hidden": 3.046875, |
| "loss/logits": 0.1868327260017395, |
| "loss/reg": 9.110562324523926, |
| "loss/twn": 0.0, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.001675, |
| "grad_norm": 47.0, |
| "grad_norm_var": 99902.42890625, |
| "learning_rate": 6.7e-05, |
| "loss": 13.6688, |
| "loss/crossentropy": 1.571413278579712, |
| "loss/hidden": 2.734375, |
| "loss/logits": 0.25518080592155457, |
| "loss/reg": 9.10780143737793, |
| "loss/twn": 0.0, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.0017, |
| "grad_norm": 36.0, |
| "grad_norm_var": 100290.54348958333, |
| "learning_rate": 6.800000000000001e-05, |
| "loss": 13.721, |
| "loss/crossentropy": 1.9135500192642212, |
| "loss/hidden": 2.421875, |
| "loss/logits": 0.28046277165412903, |
| "loss/reg": 9.105061531066895, |
| "loss/twn": 0.0, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.001725, |
| "grad_norm": 41.25, |
| "grad_norm_var": 100088.24368489583, |
| "learning_rate": 6.9e-05, |
| "loss": 13.7433, |
| "loss/crossentropy": 1.537960410118103, |
| "loss/hidden": 2.953125, |
| "loss/logits": 0.1499072164297104, |
| "loss/reg": 9.102350234985352, |
| "loss/twn": 0.0, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.00175, |
| "grad_norm": 72.0, |
| "grad_norm_var": 67493.01868489584, |
| "learning_rate": 7e-05, |
| "loss": 13.428, |
| "loss/crossentropy": 0.3872216045856476, |
| "loss/hidden": 3.796875, |
| "loss/logits": 0.144296333193779, |
| "loss/reg": 9.099639892578125, |
| "loss/twn": 0.0, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.001775, |
| "grad_norm": 56.75, |
| "grad_norm_var": 67346.88587239584, |
| "learning_rate": 7.1e-05, |
| "loss": 13.7918, |
| "loss/crossentropy": 1.8126438856124878, |
| "loss/hidden": 2.59375, |
| "loss/logits": 0.2884829640388489, |
| "loss/reg": 9.096945762634277, |
| "loss/twn": 0.0, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.0018, |
| "grad_norm": 118.5, |
| "grad_norm_var": 66955.93899739583, |
| "learning_rate": 7.2e-05, |
| "loss": 12.7245, |
| "loss/crossentropy": 0.3237805664539337, |
| "loss/hidden": 3.140625, |
| "loss/logits": 0.16581019759178162, |
| "loss/reg": 9.094278335571289, |
| "loss/twn": 0.0, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.001825, |
| "grad_norm": 35.75, |
| "grad_norm_var": 34397.60826822917, |
| "learning_rate": 7.3e-05, |
| "loss": 14.0507, |
| "loss/crossentropy": 2.70668888092041, |
| "loss/hidden": 1.9921875, |
| "loss/logits": 0.2602291703224182, |
| "loss/reg": 9.0916166305542, |
| "loss/twn": 0.0, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.00185, |
| "grad_norm": 58.5, |
| "grad_norm_var": 34281.919205729166, |
| "learning_rate": 7.4e-05, |
| "loss": 13.244, |
| "loss/crossentropy": 1.0494247674942017, |
| "loss/hidden": 2.765625, |
| "loss/logits": 0.33998632431030273, |
| "loss/reg": 9.088973999023438, |
| "loss/twn": 0.0, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.001875, |
| "grad_norm": 36.25, |
| "grad_norm_var": 34333.050455729164, |
| "learning_rate": 7.500000000000001e-05, |
| "loss": 13.9218, |
| "loss/crossentropy": 2.4842963218688965, |
| "loss/hidden": 2.0, |
| "loss/logits": 0.3512064516544342, |
| "loss/reg": 9.08632755279541, |
| "loss/twn": 0.0, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.0019, |
| "grad_norm": 37.75, |
| "grad_norm_var": 34584.818684895836, |
| "learning_rate": 7.6e-05, |
| "loss": 13.7092, |
| "loss/crossentropy": 1.9147932529449463, |
| "loss/hidden": 2.375, |
| "loss/logits": 0.3357275426387787, |
| "loss/reg": 9.083701133728027, |
| "loss/twn": 0.0, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.001925, |
| "grad_norm": 36.5, |
| "grad_norm_var": 34582.69576822917, |
| "learning_rate": 7.7e-05, |
| "loss": 14.2598, |
| "loss/crossentropy": 2.5261824131011963, |
| "loss/hidden": 2.234375, |
| "loss/logits": 0.41815829277038574, |
| "loss/reg": 9.081086158752441, |
| "loss/twn": 0.0, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.00195, |
| "grad_norm": 31.625, |
| "grad_norm_var": 34644.12291666667, |
| "learning_rate": 7.800000000000001e-05, |
| "loss": 14.0096, |
| "loss/crossentropy": 2.7317256927490234, |
| "loss/hidden": 1.9140625, |
| "loss/logits": 0.28539663553237915, |
| "loss/reg": 9.07846450805664, |
| "loss/twn": 0.0, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.001975, |
| "grad_norm": 32.75, |
| "grad_norm_var": 34693.05182291667, |
| "learning_rate": 7.900000000000001e-05, |
| "loss": 13.6063, |
| "loss/crossentropy": 2.143045663833618, |
| "loss/hidden": 2.171875, |
| "loss/logits": 0.21548917889595032, |
| "loss/reg": 9.075860023498535, |
| "loss/twn": 0.0, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.002, |
| "grad_norm": 840.0, |
| "grad_norm_var": 39637.27682291667, |
| "learning_rate": 8e-05, |
| "loss": 15.1711, |
| "loss/crossentropy": 2.7464404106140137, |
| "loss/hidden": 2.875, |
| "loss/logits": 0.47639912366867065, |
| "loss/reg": 9.073275566101074, |
| "loss/twn": 0.0, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.002025, |
| "grad_norm": 27.625, |
| "grad_norm_var": 39617.02265625, |
| "learning_rate": 8.1e-05, |
| "loss": 13.9751, |
| "loss/crossentropy": 2.657186269760132, |
| "loss/hidden": 1.984375, |
| "loss/logits": 0.26285576820373535, |
| "loss/reg": 9.07068157196045, |
| "loss/twn": 0.0, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.00205, |
| "grad_norm": 30.25, |
| "grad_norm_var": 39855.971875, |
| "learning_rate": 8.2e-05, |
| "loss": 13.8347, |
| "loss/crossentropy": 2.2337749004364014, |
| "loss/hidden": 2.21875, |
| "loss/logits": 0.3140769898891449, |
| "loss/reg": 9.06808853149414, |
| "loss/twn": 0.0, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.002075, |
| "grad_norm": 46.75, |
| "grad_norm_var": 39857.61432291667, |
| "learning_rate": 8.3e-05, |
| "loss": 14.0896, |
| "loss/crossentropy": 2.8934454917907715, |
| "loss/hidden": 1.84375, |
| "loss/logits": 0.2868635952472687, |
| "loss/reg": 9.065499305725098, |
| "loss/twn": 0.0, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.0021, |
| "grad_norm": 23.375, |
| "grad_norm_var": 39968.81295572917, |
| "learning_rate": 8.4e-05, |
| "loss": 13.7561, |
| "loss/crossentropy": 2.8258416652679443, |
| "loss/hidden": 1.609375, |
| "loss/logits": 0.2579536437988281, |
| "loss/reg": 9.062912940979004, |
| "loss/twn": 0.0, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.002125, |
| "grad_norm": 49.25, |
| "grad_norm_var": 39915.10462239583, |
| "learning_rate": 8.5e-05, |
| "loss": 13.494, |
| "loss/crossentropy": 2.145867347717285, |
| "loss/hidden": 1.984375, |
| "loss/logits": 0.3033989667892456, |
| "loss/reg": 9.060322761535645, |
| "loss/twn": 0.0, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.00215, |
| "grad_norm": 32.5, |
| "grad_norm_var": 40138.2384765625, |
| "learning_rate": 8.6e-05, |
| "loss": 13.7136, |
| "loss/crossentropy": 2.708767890930176, |
| "loss/hidden": 1.71875, |
| "loss/logits": 0.22835341095924377, |
| "loss/reg": 9.05773639678955, |
| "loss/twn": 0.0, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.002175, |
| "grad_norm": 120.0, |
| "grad_norm_var": 40079.3369140625, |
| "learning_rate": 8.7e-05, |
| "loss": 14.1054, |
| "loss/crossentropy": 1.6094319820404053, |
| "loss/hidden": 3.0625, |
| "loss/logits": 0.37835630774497986, |
| "loss/reg": 9.055155754089355, |
| "loss/twn": 0.0, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.0022, |
| "grad_norm": 28.875, |
| "grad_norm_var": 40328.46640625, |
| "learning_rate": 8.800000000000001e-05, |
| "loss": 13.6082, |
| "loss/crossentropy": 2.5477914810180664, |
| "loss/hidden": 1.734375, |
| "loss/logits": 0.27344784140586853, |
| "loss/reg": 9.052587509155273, |
| "loss/twn": 0.0, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.002225, |
| "grad_norm": 29.0, |
| "grad_norm_var": 40381.7, |
| "learning_rate": 8.900000000000001e-05, |
| "loss": 13.7396, |
| "loss/crossentropy": 2.538127899169922, |
| "loss/hidden": 1.8359375, |
| "loss/logits": 0.31551623344421387, |
| "loss/reg": 9.050020217895508, |
| "loss/twn": 0.0, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.00225, |
| "grad_norm": 31.625, |
| "grad_norm_var": 40544.4197265625, |
| "learning_rate": 9e-05, |
| "loss": 13.058, |
| "loss/crossentropy": 1.719811201095581, |
| "loss/hidden": 2.078125, |
| "loss/logits": 0.21262270212173462, |
| "loss/reg": 9.047449111938477, |
| "loss/twn": 0.0, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.002275, |
| "grad_norm": 28.25, |
| "grad_norm_var": 40605.36139322917, |
| "learning_rate": 9.1e-05, |
| "loss": 13.5512, |
| "loss/crossentropy": 2.695859909057617, |
| "loss/hidden": 1.5859375, |
| "loss/logits": 0.224550262093544, |
| "loss/reg": 9.0448579788208, |
| "loss/twn": 0.0, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.0023, |
| "grad_norm": 27.625, |
| "grad_norm_var": 40681.135416666664, |
| "learning_rate": 9.200000000000001e-05, |
| "loss": 12.8778, |
| "loss/crossentropy": 1.639548420906067, |
| "loss/hidden": 2.015625, |
| "loss/logits": 0.1803436279296875, |
| "loss/reg": 9.042285919189453, |
| "loss/twn": 0.0, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.002325, |
| "grad_norm": 25.375, |
| "grad_norm_var": 40766.0041015625, |
| "learning_rate": 9.300000000000001e-05, |
| "loss": 12.3849, |
| "loss/crossentropy": 1.0916918516159058, |
| "loss/hidden": 2.078125, |
| "loss/logits": 0.1753605306148529, |
| "loss/reg": 9.03970718383789, |
| "loss/twn": 0.0, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.00235, |
| "grad_norm": 96.0, |
| "grad_norm_var": 40542.80390625, |
| "learning_rate": 9.4e-05, |
| "loss": 13.0631, |
| "loss/crossentropy": 2.132835626602173, |
| "loss/hidden": 1.7265625, |
| "loss/logits": 0.16661113500595093, |
| "loss/reg": 9.037128448486328, |
| "loss/twn": 0.0, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.002375, |
| "grad_norm": 26.75, |
| "grad_norm_var": 40592.31640625, |
| "learning_rate": 9.5e-05, |
| "loss": 13.2424, |
| "loss/crossentropy": 2.3180124759674072, |
| "loss/hidden": 1.6328125, |
| "loss/logits": 0.25702449679374695, |
| "loss/reg": 9.034552574157715, |
| "loss/twn": 0.0, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.0024, |
| "grad_norm": 23.25, |
| "grad_norm_var": 768.096875, |
| "learning_rate": 9.6e-05, |
| "loss": 13.5691, |
| "loss/crossentropy": 2.651806354522705, |
| "loss/hidden": 1.6484375, |
| "loss/logits": 0.23690301179885864, |
| "loss/reg": 9.031957626342773, |
| "loss/twn": 0.0, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.002425, |
| "grad_norm": 29.25, |
| "grad_norm_var": 765.4926432291667, |
| "learning_rate": 9.7e-05, |
| "loss": 12.7539, |
| "loss/crossentropy": 1.668021321296692, |
| "loss/hidden": 1.84375, |
| "loss/logits": 0.2127854824066162, |
| "loss/reg": 9.029374122619629, |
| "loss/twn": 0.0, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.00245, |
| "grad_norm": 58.0, |
| "grad_norm_var": 775.6676432291666, |
| "learning_rate": 9.8e-05, |
| "loss": 12.4205, |
| "loss/crossentropy": 1.4275784492492676, |
| "loss/hidden": 1.796875, |
| "loss/logits": 0.1692367047071457, |
| "loss/reg": 9.026786804199219, |
| "loss/twn": 0.0, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.002475, |
| "grad_norm": 52.0, |
| "grad_norm_var": 780.5457682291667, |
| "learning_rate": 9.900000000000001e-05, |
| "loss": 13.1707, |
| "loss/crossentropy": 2.2379980087280273, |
| "loss/hidden": 1.703125, |
| "loss/logits": 0.20536217093467712, |
| "loss/reg": 9.024182319641113, |
| "loss/twn": 0.0, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.0025, |
| "grad_norm": 44.75, |
| "grad_norm_var": 754.3947916666667, |
| "learning_rate": 0.0001, |
| "loss": 13.3037, |
| "loss/crossentropy": 2.113724946975708, |
| "loss/hidden": 1.8515625, |
| "loss/logits": 0.316815048456192, |
| "loss/reg": 9.021589279174805, |
| "loss/twn": 0.0, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.002525, |
| "grad_norm": 1112.0, |
| "grad_norm_var": 72101.45182291667, |
| "learning_rate": 0.0001, |
| "loss": 12.6188, |
| "loss/crossentropy": 1.7826207876205444, |
| "loss/hidden": 1.625, |
| "loss/logits": 0.192185640335083, |
| "loss/reg": 9.018988609313965, |
| "loss/twn": 0.0, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.00255, |
| "grad_norm": 26.875, |
| "grad_norm_var": 72161.80045572917, |
| "learning_rate": 0.0001, |
| "loss": 13.1765, |
| "loss/crossentropy": 2.4704537391662598, |
| "loss/hidden": 1.5078125, |
| "loss/logits": 0.1818367838859558, |
| "loss/reg": 9.016363143920898, |
| "loss/twn": 0.0, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.002575, |
| "grad_norm": 30.875, |
| "grad_norm_var": 72539.14270833334, |
| "learning_rate": 0.0001, |
| "loss": 12.191, |
| "loss/crossentropy": 1.3483895063400269, |
| "loss/hidden": 1.6796875, |
| "loss/logits": 0.1491580754518509, |
| "loss/reg": 9.013747215270996, |
| "loss/twn": 0.0, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.0026, |
| "grad_norm": 26.25, |
| "grad_norm_var": 72566.00930989583, |
| "learning_rate": 0.0001, |
| "loss": 13.3363, |
| "loss/crossentropy": 2.490461587905884, |
| "loss/hidden": 1.578125, |
| "loss/logits": 0.2566271424293518, |
| "loss/reg": 9.011123657226562, |
| "loss/twn": 0.0, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.002625, |
| "grad_norm": 36.25, |
| "grad_norm_var": 72496.5603515625, |
| "learning_rate": 0.0001, |
| "loss": 12.2341, |
| "loss/crossentropy": 1.218780279159546, |
| "loss/hidden": 1.8203125, |
| "loss/logits": 0.186568021774292, |
| "loss/reg": 9.008485794067383, |
| "loss/twn": 0.0, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.00265, |
| "grad_norm": 22.0, |
| "grad_norm_var": 72596.12395833334, |
| "learning_rate": 0.0001, |
| "loss": 13.3123, |
| "loss/crossentropy": 2.6078522205352783, |
| "loss/hidden": 1.46875, |
| "loss/logits": 0.22988998889923096, |
| "loss/reg": 9.005841255187988, |
| "loss/twn": 0.0, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.002675, |
| "grad_norm": 38.0, |
| "grad_norm_var": 72503.46848958333, |
| "learning_rate": 0.0001, |
| "loss": 11.6424, |
| "loss/crossentropy": 0.535743772983551, |
| "loss/hidden": 1.96875, |
| "loss/logits": 0.13470911979675293, |
| "loss/reg": 9.003211975097656, |
| "loss/twn": 0.0, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.0027, |
| "grad_norm": 19.625, |
| "grad_norm_var": 72589.68515625, |
| "learning_rate": 0.0001, |
| "loss": 12.3665, |
| "loss/crossentropy": 1.8694407939910889, |
| "loss/hidden": 1.375, |
| "loss/logits": 0.12151719629764557, |
| "loss/reg": 9.000567436218262, |
| "loss/twn": 0.0, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.002725, |
| "grad_norm": 26.125, |
| "grad_norm_var": 72581.8375, |
| "learning_rate": 0.0001, |
| "loss": 13.0832, |
| "loss/crossentropy": 2.575356960296631, |
| "loss/hidden": 1.2890625, |
| "loss/logits": 0.2208748161792755, |
| "loss/reg": 8.9979248046875, |
| "loss/twn": 0.0, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.00275, |
| "grad_norm": 23.75, |
| "grad_norm_var": 72987.56640625, |
| "learning_rate": 0.0001, |
| "loss": 13.0693, |
| "loss/crossentropy": 2.488720655441284, |
| "loss/hidden": 1.40625, |
| "loss/logits": 0.1790137141942978, |
| "loss/reg": 8.995278358459473, |
| "loss/twn": 0.0, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.002775, |
| "grad_norm": 25.0, |
| "grad_norm_var": 73004.7875, |
| "learning_rate": 0.0001, |
| "loss": 12.8926, |
| "loss/crossentropy": 2.3986456394195557, |
| "loss/hidden": 1.3046875, |
| "loss/logits": 0.19670313596725464, |
| "loss/reg": 8.9926118850708, |
| "loss/twn": 0.0, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.0028, |
| "grad_norm": 278.0, |
| "grad_norm_var": 74466.68098958333, |
| "learning_rate": 0.0001, |
| "loss": 12.2886, |
| "loss/crossentropy": 1.5560193061828613, |
| "loss/hidden": 1.59375, |
| "loss/logits": 0.14890581369400024, |
| "loss/reg": 8.98995590209961, |
| "loss/twn": 0.0, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.002825, |
| "grad_norm": 43.0, |
| "grad_norm_var": 74320.28645833333, |
| "learning_rate": 0.0001, |
| "loss": 11.6975, |
| "loss/crossentropy": 0.4845088720321655, |
| "loss/hidden": 2.078125, |
| "loss/logits": 0.1475335955619812, |
| "loss/reg": 8.987287521362305, |
| "loss/twn": 0.0, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.00285, |
| "grad_norm": 27.75, |
| "grad_norm_var": 74613.04973958334, |
| "learning_rate": 0.0001, |
| "loss": 12.2436, |
| "loss/crossentropy": 1.3624050617218018, |
| "loss/hidden": 1.734375, |
| "loss/logits": 0.16220322251319885, |
| "loss/reg": 8.984620094299316, |
| "loss/twn": 0.0, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.002875, |
| "grad_norm": 62.25, |
| "grad_norm_var": 74534.178125, |
| "learning_rate": 0.0001, |
| "loss": 13.1561, |
| "loss/crossentropy": 2.6678481101989746, |
| "loss/hidden": 1.3203125, |
| "loss/logits": 0.18601734936237335, |
| "loss/reg": 8.981964111328125, |
| "loss/twn": 0.0, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.0029, |
| "grad_norm": 22.75, |
| "grad_norm_var": 74770.953125, |
| "learning_rate": 0.0001, |
| "loss": 13.0278, |
| "loss/crossentropy": 2.5755393505096436, |
| "loss/hidden": 1.3046875, |
| "loss/logits": 0.16832035779953003, |
| "loss/reg": 8.979301452636719, |
| "loss/twn": 0.0, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.002925, |
| "grad_norm": 28.75, |
| "grad_norm_var": 3934.301822916667, |
| "learning_rate": 0.0001, |
| "loss": 13.3352, |
| "loss/crossentropy": 2.7907333374023438, |
| "loss/hidden": 1.3359375, |
| "loss/logits": 0.23188692331314087, |
| "loss/reg": 8.976622581481934, |
| "loss/twn": 0.0, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.00295, |
| "grad_norm": 28.0, |
| "grad_norm_var": 3931.500455729167, |
| "learning_rate": 0.0001, |
| "loss": 13.2941, |
| "loss/crossentropy": 2.670893430709839, |
| "loss/hidden": 1.453125, |
| "loss/logits": 0.19613495469093323, |
| "loss/reg": 8.973959922790527, |
| "loss/twn": 0.0, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.002975, |
| "grad_norm": 26.375, |
| "grad_norm_var": 3941.930143229167, |
| "learning_rate": 0.0001, |
| "loss": 12.4672, |
| "loss/crossentropy": 1.7740745544433594, |
| "loss/hidden": 1.53125, |
| "loss/logits": 0.19052982330322266, |
| "loss/reg": 8.971305847167969, |
| "loss/twn": 0.0, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.003, |
| "grad_norm": 19.625, |
| "grad_norm_var": 3962.0018229166667, |
| "learning_rate": 0.0001, |
| "loss": 12.8338, |
| "loss/crossentropy": 2.3549768924713135, |
| "loss/hidden": 1.3046875, |
| "loss/logits": 0.2054794430732727, |
| "loss/reg": 8.96865463256836, |
| "loss/twn": 0.0, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.003025, |
| "grad_norm": 35.25, |
| "grad_norm_var": 3963.29140625, |
| "learning_rate": 0.0001, |
| "loss": 12.0042, |
| "loss/crossentropy": 1.3170990943908691, |
| "loss/hidden": 1.5859375, |
| "loss/logits": 0.13516384363174438, |
| "loss/reg": 8.9660062789917, |
| "loss/twn": 0.0, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.00305, |
| "grad_norm": 25.625, |
| "grad_norm_var": 3952.8072265625, |
| "learning_rate": 0.0001, |
| "loss": 12.7417, |
| "loss/crossentropy": 2.1242401599884033, |
| "loss/hidden": 1.5078125, |
| "loss/logits": 0.14632585644721985, |
| "loss/reg": 8.963364601135254, |
| "loss/twn": 0.0, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.003075, |
| "grad_norm": 39.0, |
| "grad_norm_var": 3951.8541015625, |
| "learning_rate": 0.0001, |
| "loss": 13.0775, |
| "loss/crossentropy": 2.5672924518585205, |
| "loss/hidden": 1.3515625, |
| "loss/logits": 0.1978948712348938, |
| "loss/reg": 8.960731506347656, |
| "loss/twn": 0.0, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.0031, |
| "grad_norm": 37.0, |
| "grad_norm_var": 3910.362239583333, |
| "learning_rate": 0.0001, |
| "loss": 13.0647, |
| "loss/crossentropy": 2.335649013519287, |
| "loss/hidden": 1.515625, |
| "loss/logits": 0.2552981972694397, |
| "loss/reg": 8.95810604095459, |
| "loss/twn": 0.0, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.003125, |
| "grad_norm": 55.25, |
| "grad_norm_var": 3883.2244140625, |
| "learning_rate": 0.0001, |
| "loss": 12.4814, |
| "loss/crossentropy": 1.9733695983886719, |
| "loss/hidden": 1.421875, |
| "loss/logits": 0.13064134120941162, |
| "loss/reg": 8.955479621887207, |
| "loss/twn": 0.0, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.00315, |
| "grad_norm": 31.75, |
| "grad_norm_var": 3860.7327473958335, |
| "learning_rate": 0.0001, |
| "loss": 11.5822, |
| "loss/crossentropy": 0.8302437663078308, |
| "loss/hidden": 1.6875, |
| "loss/logits": 0.11161559820175171, |
| "loss/reg": 8.95286750793457, |
| "loss/twn": 0.0, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.003175, |
| "grad_norm": 52.25, |
| "grad_norm_var": 3819.6306640625, |
| "learning_rate": 0.0001, |
| "loss": 13.247, |
| "loss/crossentropy": 2.654543876647949, |
| "loss/hidden": 1.40625, |
| "loss/logits": 0.23597851395606995, |
| "loss/reg": 8.950261116027832, |
| "loss/twn": 0.0, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.0032, |
| "grad_norm": 35.5, |
| "grad_norm_var": 148.53430989583333, |
| "learning_rate": 0.0001, |
| "loss": 12.0879, |
| "loss/crossentropy": 1.345029354095459, |
| "loss/hidden": 1.640625, |
| "loss/logits": 0.1546275019645691, |
| "loss/reg": 8.94766616821289, |
| "loss/twn": 0.0, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.003225, |
| "grad_norm": 32.75, |
| "grad_norm_var": 145.0322265625, |
| "learning_rate": 0.0001, |
| "loss": 12.2406, |
| "loss/crossentropy": 1.4139896631240845, |
| "loss/hidden": 1.671875, |
| "loss/logits": 0.2096407562494278, |
| "loss/reg": 8.945086479187012, |
| "loss/twn": 0.0, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.00325, |
| "grad_norm": 27.875, |
| "grad_norm_var": 144.9125, |
| "learning_rate": 0.0001, |
| "loss": 13.1066, |
| "loss/crossentropy": 2.7330315113067627, |
| "loss/hidden": 1.2734375, |
| "loss/logits": 0.15762852132320404, |
| "loss/reg": 8.942508697509766, |
| "loss/twn": 0.0, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.003275, |
| "grad_norm": 42.5, |
| "grad_norm_var": 97.53307291666667, |
| "learning_rate": 0.0001, |
| "loss": 11.9659, |
| "loss/crossentropy": 1.2849335670471191, |
| "loss/hidden": 1.578125, |
| "loss/logits": 0.16286081075668335, |
| "loss/reg": 8.93994426727295, |
| "loss/twn": 0.0, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.0033, |
| "grad_norm": 29.75, |
| "grad_norm_var": 90.31432291666667, |
| "learning_rate": 0.0001, |
| "loss": 13.245, |
| "loss/crossentropy": 2.7124814987182617, |
| "loss/hidden": 1.3671875, |
| "loss/logits": 0.22789308428764343, |
| "loss/reg": 8.937397956848145, |
| "loss/twn": 0.0, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.003325, |
| "grad_norm": 24.625, |
| "grad_norm_var": 94.37701822916667, |
| "learning_rate": 0.0001, |
| "loss": 11.8433, |
| "loss/crossentropy": 1.5435179471969604, |
| "loss/hidden": 1.1953125, |
| "loss/logits": 0.1695559024810791, |
| "loss/reg": 8.934871673583984, |
| "loss/twn": 0.0, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.00335, |
| "grad_norm": 28.75, |
| "grad_norm_var": 93.81764322916666, |
| "learning_rate": 0.0001, |
| "loss": 12.4831, |
| "loss/crossentropy": 2.235550880432129, |
| "loss/hidden": 1.140625, |
| "loss/logits": 0.17460262775421143, |
| "loss/reg": 8.932354927062988, |
| "loss/twn": 0.0, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.003375, |
| "grad_norm": 21.625, |
| "grad_norm_var": 100.05201822916666, |
| "learning_rate": 0.0001, |
| "loss": 12.6995, |
| "loss/crossentropy": 2.449124336242676, |
| "loss/hidden": 1.1640625, |
| "loss/logits": 0.15648320317268372, |
| "loss/reg": 8.92985725402832, |
| "loss/twn": 0.0, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.0034, |
| "grad_norm": 56.0, |
| "grad_norm_var": 114.50729166666666, |
| "learning_rate": 0.0001, |
| "loss": 12.0871, |
| "loss/crossentropy": 1.4691861867904663, |
| "loss/hidden": 1.59375, |
| "loss/logits": 0.09673602133989334, |
| "loss/reg": 8.927384376525879, |
| "loss/twn": 0.0, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.003425, |
| "grad_norm": 22.5, |
| "grad_norm_var": 125.88932291666667, |
| "learning_rate": 0.0001, |
| "loss": 12.9852, |
| "loss/crossentropy": 2.5444114208221436, |
| "loss/hidden": 1.3359375, |
| "loss/logits": 0.17988505959510803, |
| "loss/reg": 8.924919128417969, |
| "loss/twn": 0.0, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.00345, |
| "grad_norm": 28.625, |
| "grad_norm_var": 122.63307291666666, |
| "learning_rate": 0.0001, |
| "loss": 12.5636, |
| "loss/crossentropy": 2.287893533706665, |
| "loss/hidden": 1.15625, |
| "loss/logits": 0.19700783491134644, |
| "loss/reg": 8.922492980957031, |
| "loss/twn": 0.0, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.003475, |
| "grad_norm": 31.5, |
| "grad_norm_var": 122.50807291666666, |
| "learning_rate": 0.0001, |
| "loss": 12.681, |
| "loss/crossentropy": 2.2101542949676514, |
| "loss/hidden": 1.3515625, |
| "loss/logits": 0.19917884469032288, |
| "loss/reg": 8.92009449005127, |
| "loss/twn": 0.0, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.0035, |
| "grad_norm": 19.375, |
| "grad_norm_var": 136.96608072916666, |
| "learning_rate": 0.0001, |
| "loss": 12.563, |
| "loss/crossentropy": 2.2798943519592285, |
| "loss/hidden": 1.2421875, |
| "loss/logits": 0.1232282817363739, |
| "loss/reg": 8.91769790649414, |
| "loss/twn": 0.0, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.003525, |
| "grad_norm": 21.0, |
| "grad_norm_var": 112.2775390625, |
| "learning_rate": 0.0001, |
| "loss": 12.3261, |
| "loss/crossentropy": 2.036501169204712, |
| "loss/hidden": 1.21875, |
| "loss/logits": 0.15554669499397278, |
| "loss/reg": 8.91533374786377, |
| "loss/twn": 0.0, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.00355, |
| "grad_norm": 49.25, |
| "grad_norm_var": 131.65514322916667, |
| "learning_rate": 0.0001, |
| "loss": 12.7334, |
| "loss/crossentropy": 2.3514750003814697, |
| "loss/hidden": 1.3046875, |
| "loss/logits": 0.1642739474773407, |
| "loss/reg": 8.912992477416992, |
| "loss/twn": 0.0, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.003575, |
| "grad_norm": 38.5, |
| "grad_norm_var": 107.7072265625, |
| "learning_rate": 0.0001, |
| "loss": 11.2674, |
| "loss/crossentropy": 0.5186184048652649, |
| "loss/hidden": 1.703125, |
| "loss/logits": 0.13496407866477966, |
| "loss/reg": 8.910670280456543, |
| "loss/twn": 0.0, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.0036, |
| "grad_norm": 312.0, |
| "grad_norm_var": 5019.326497395833, |
| "learning_rate": 0.0001, |
| "loss": 13.1971, |
| "loss/crossentropy": 2.7031729221343994, |
| "loss/hidden": 1.359375, |
| "loss/logits": 0.22617527842521667, |
| "loss/reg": 8.908365249633789, |
| "loss/twn": 0.0, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.003625, |
| "grad_norm": 37.0, |
| "grad_norm_var": 5011.1541015625, |
| "learning_rate": 0.0001, |
| "loss": 11.0168, |
| "loss/crossentropy": 0.3622937500476837, |
| "loss/hidden": 1.6484375, |
| "loss/logits": 0.10003112256526947, |
| "loss/reg": 8.906074523925781, |
| "loss/twn": 0.0, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.00365, |
| "grad_norm": 35.5, |
| "grad_norm_var": 4992.873958333334, |
| "learning_rate": 0.0001, |
| "loss": 12.1303, |
| "loss/crossentropy": 1.3992815017700195, |
| "loss/hidden": 1.6875, |
| "loss/logits": 0.139686718583107, |
| "loss/reg": 8.903810501098633, |
| "loss/twn": 0.0, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.003675, |
| "grad_norm": 75.0, |
| "grad_norm_var": 5026.795833333334, |
| "learning_rate": 0.0001, |
| "loss": 12.0923, |
| "loss/crossentropy": 1.2384319305419922, |
| "loss/hidden": 1.875, |
| "loss/logits": 0.07728307694196701, |
| "loss/reg": 8.901557922363281, |
| "loss/twn": 0.0, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.0037, |
| "grad_norm": 60.25, |
| "grad_norm_var": 4994.707291666667, |
| "learning_rate": 0.0001, |
| "loss": 11.592, |
| "loss/crossentropy": 1.0195518732070923, |
| "loss/hidden": 1.5703125, |
| "loss/logits": 0.10284246504306793, |
| "loss/reg": 8.899341583251953, |
| "loss/twn": 0.0, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.003725, |
| "grad_norm": 30.125, |
| "grad_norm_var": 4975.170833333334, |
| "learning_rate": 0.0001, |
| "loss": 13.1592, |
| "loss/crossentropy": 2.808931350708008, |
| "loss/hidden": 1.2578125, |
| "loss/logits": 0.1953590214252472, |
| "loss/reg": 8.897134780883789, |
| "loss/twn": 0.0, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.00375, |
| "grad_norm": 27.125, |
| "grad_norm_var": 4980.847330729167, |
| "learning_rate": 0.0001, |
| "loss": 12.8043, |
| "loss/crossentropy": 2.6362106800079346, |
| "loss/hidden": 1.1015625, |
| "loss/logits": 0.17158377170562744, |
| "loss/reg": 8.894936561584473, |
| "loss/twn": 0.0, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.003775, |
| "grad_norm": 24.625, |
| "grad_norm_var": 4968.425455729167, |
| "learning_rate": 0.0001, |
| "loss": 11.8258, |
| "loss/crossentropy": 1.3182786703109741, |
| "loss/hidden": 1.5, |
| "loss/logits": 0.11477649956941605, |
| "loss/reg": 8.892769813537598, |
| "loss/twn": 0.0, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.0038, |
| "grad_norm": 29.25, |
| "grad_norm_var": 5006.9900390625, |
| "learning_rate": 0.0001, |
| "loss": 12.2718, |
| "loss/crossentropy": 2.2582006454467773, |
| "loss/hidden": 0.9609375, |
| "loss/logits": 0.16201287508010864, |
| "loss/reg": 8.890613555908203, |
| "loss/twn": 0.0, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.003825, |
| "grad_norm": 108.0, |
| "grad_norm_var": 5120.7228515625, |
| "learning_rate": 0.0001, |
| "loss": 12.4264, |
| "loss/crossentropy": 2.1490817070007324, |
| "loss/hidden": 1.2578125, |
| "loss/logits": 0.13107022643089294, |
| "loss/reg": 8.888484954833984, |
| "loss/twn": 0.0, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.00385, |
| "grad_norm": 28.75, |
| "grad_norm_var": 5120.23515625, |
| "learning_rate": 0.0001, |
| "loss": 12.7809, |
| "loss/crossentropy": 2.105442523956299, |
| "loss/hidden": 1.6328125, |
| "loss/logits": 0.1563117355108261, |
| "loss/reg": 8.886375427246094, |
| "loss/twn": 0.0, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.003875, |
| "grad_norm": 26.875, |
| "grad_norm_var": 5137.884830729166, |
| "learning_rate": 0.0001, |
| "loss": 12.681, |
| "loss/crossentropy": 2.561357021331787, |
| "loss/hidden": 1.09375, |
| "loss/logits": 0.1416509747505188, |
| "loss/reg": 8.884288787841797, |
| "loss/twn": 0.0, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.0039, |
| "grad_norm": 41.75, |
| "grad_norm_var": 5054.945833333333, |
| "learning_rate": 0.0001, |
| "loss": 13.2632, |
| "loss/crossentropy": 2.8980720043182373, |
| "loss/hidden": 1.28125, |
| "loss/logits": 0.20166414976119995, |
| "loss/reg": 8.882227897644043, |
| "loss/twn": 0.0, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.003925, |
| "grad_norm": 29.625, |
| "grad_norm_var": 5015.823372395834, |
| "learning_rate": 0.0001, |
| "loss": 12.7697, |
| "loss/crossentropy": 2.587541103363037, |
| "loss/hidden": 1.125, |
| "loss/logits": 0.17701838910579681, |
| "loss/reg": 8.880182266235352, |
| "loss/twn": 0.0, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.00395, |
| "grad_norm": 33.25, |
| "grad_norm_var": 5053.906705729167, |
| "learning_rate": 0.0001, |
| "loss": 12.888, |
| "loss/crossentropy": 2.619824171066284, |
| "loss/hidden": 1.2109375, |
| "loss/logits": 0.17909657955169678, |
| "loss/reg": 8.878167152404785, |
| "loss/twn": 0.0, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.003975, |
| "grad_norm": 36.25, |
| "grad_norm_var": 5060.253580729167, |
| "learning_rate": 0.0001, |
| "loss": 12.7629, |
| "loss/crossentropy": 2.401193857192993, |
| "loss/hidden": 1.296875, |
| "loss/logits": 0.18871738016605377, |
| "loss/reg": 8.8761568069458, |
| "loss/twn": 0.0, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.004, |
| "grad_norm": 21.375, |
| "grad_norm_var": 514.5455729166666, |
| "learning_rate": 0.0001, |
| "loss": 12.8195, |
| "loss/crossentropy": 2.641941785812378, |
| "loss/hidden": 1.109375, |
| "loss/logits": 0.19396184384822845, |
| "loss/reg": 8.874184608459473, |
| "loss/twn": 0.0, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.004025, |
| "grad_norm": 48.25, |
| "grad_norm_var": 517.5104166666666, |
| "learning_rate": 0.0001, |
| "loss": 12.8137, |
| "loss/crossentropy": 2.4985928535461426, |
| "loss/hidden": 1.28125, |
| "loss/logits": 0.1616763472557068, |
| "loss/reg": 8.87222957611084, |
| "loss/twn": 0.0, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.00405, |
| "grad_norm": 128.0, |
| "grad_norm_var": 984.4427083333334, |
| "learning_rate": 0.0001, |
| "loss": 11.4361, |
| "loss/crossentropy": 0.5462308526039124, |
| "loss/hidden": 1.9296875, |
| "loss/logits": 0.08990675210952759, |
| "loss/reg": 8.870291709899902, |
| "loss/twn": 0.0, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.004075, |
| "grad_norm": 25.125, |
| "grad_norm_var": 952.2577473958333, |
| "learning_rate": 0.0001, |
| "loss": 11.5832, |
| "loss/crossentropy": 1.241894245147705, |
| "loss/hidden": 1.359375, |
| "loss/logits": 0.1135173887014389, |
| "loss/reg": 8.868389129638672, |
| "loss/twn": 0.0, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.0041, |
| "grad_norm": 39.25, |
| "grad_norm_var": 933.3796223958333, |
| "learning_rate": 0.0001, |
| "loss": 12.8681, |
| "loss/crossentropy": 2.579155445098877, |
| "loss/hidden": 1.171875, |
| "loss/logits": 0.2505415678024292, |
| "loss/reg": 8.866507530212402, |
| "loss/twn": 0.0, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.004125, |
| "grad_norm": 30.375, |
| "grad_norm_var": 932.9759765625, |
| "learning_rate": 0.0001, |
| "loss": 11.8981, |
| "loss/crossentropy": 1.529752254486084, |
| "loss/hidden": 1.375, |
| "loss/logits": 0.1287008821964264, |
| "loss/reg": 8.864646911621094, |
| "loss/twn": 0.0, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.00415, |
| "grad_norm": 25.375, |
| "grad_norm_var": 936.7238932291667, |
| "learning_rate": 0.0001, |
| "loss": 12.5405, |
| "loss/crossentropy": 2.4071121215820312, |
| "loss/hidden": 1.109375, |
| "loss/logits": 0.16124263405799866, |
| "loss/reg": 8.862808227539062, |
| "loss/twn": 0.0, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.004175, |
| "grad_norm": 92.0, |
| "grad_norm_var": 1062.034375, |
| "learning_rate": 0.0001, |
| "loss": 11.8961, |
| "loss/crossentropy": 1.5120517015457153, |
| "loss/hidden": 1.4140625, |
| "loss/logits": 0.10898007452487946, |
| "loss/reg": 8.860990524291992, |
| "loss/twn": 0.0, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.0042, |
| "grad_norm": 149.0, |
| "grad_norm_var": 1683.3622395833333, |
| "learning_rate": 0.0001, |
| "loss": 12.1346, |
| "loss/crossentropy": 0.20657889544963837, |
| "loss/hidden": 2.921875, |
| "loss/logits": 0.14697904884815216, |
| "loss/reg": 8.859195709228516, |
| "loss/twn": 0.0, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.004225, |
| "grad_norm": 27.375, |
| "grad_norm_var": 1508.6327473958333, |
| "learning_rate": 0.0001, |
| "loss": 12.577, |
| "loss/crossentropy": 2.4741885662078857, |
| "loss/hidden": 1.09375, |
| "loss/logits": 0.1516125500202179, |
| "loss/reg": 8.85743236541748, |
| "loss/twn": 0.0, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.00425, |
| "grad_norm": 32.5, |
| "grad_norm_var": 1499.4296223958333, |
| "learning_rate": 0.0001, |
| "loss": 12.655, |
| "loss/crossentropy": 2.1815006732940674, |
| "loss/hidden": 1.4296875, |
| "loss/logits": 0.1881680190563202, |
| "loss/reg": 8.855690002441406, |
| "loss/twn": 0.0, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.004275, |
| "grad_norm": 24.375, |
| "grad_norm_var": 1507.2447265625, |
| "learning_rate": 0.0001, |
| "loss": 12.6143, |
| "loss/crossentropy": 2.449399709701538, |
| "loss/hidden": 1.1484375, |
| "loss/logits": 0.1624523401260376, |
| "loss/reg": 8.85399055480957, |
| "loss/twn": 0.0, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.0043, |
| "grad_norm": 100.5, |
| "grad_norm_var": 1666.2369140625, |
| "learning_rate": 0.0001, |
| "loss": 12.1857, |
| "loss/crossentropy": 1.7329692840576172, |
| "loss/hidden": 1.453125, |
| "loss/logits": 0.14728471636772156, |
| "loss/reg": 8.852312088012695, |
| "loss/twn": 0.0, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.004325, |
| "grad_norm": 30.0, |
| "grad_norm_var": 1665.09375, |
| "learning_rate": 0.0001, |
| "loss": 11.6818, |
| "loss/crossentropy": 1.2037038803100586, |
| "loss/hidden": 1.515625, |
| "loss/logits": 0.11183369904756546, |
| "loss/reg": 8.85065746307373, |
| "loss/twn": 0.0, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.00435, |
| "grad_norm": 24.375, |
| "grad_norm_var": 1693.0176432291667, |
| "learning_rate": 0.0001, |
| "loss": 11.6373, |
| "loss/crossentropy": 1.2824879884719849, |
| "loss/hidden": 1.3515625, |
| "loss/logits": 0.15424281358718872, |
| "loss/reg": 8.849024772644043, |
| "loss/twn": 0.0, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.004375, |
| "grad_norm": 30.75, |
| "grad_norm_var": 1706.5556640625, |
| "learning_rate": 0.0001, |
| "loss": 11.8643, |
| "loss/crossentropy": 1.3829710483551025, |
| "loss/hidden": 1.5, |
| "loss/logits": 0.13394233584403992, |
| "loss/reg": 8.847420692443848, |
| "loss/twn": 0.0, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.0044, |
| "grad_norm": 34.75, |
| "grad_norm_var": 1663.4979166666667, |
| "learning_rate": 0.0001, |
| "loss": 11.0556, |
| "loss/crossentropy": 0.46449124813079834, |
| "loss/hidden": 1.6171875, |
| "loss/logits": 0.12804941833019257, |
| "loss/reg": 8.845848083496094, |
| "loss/twn": 0.0, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.004425, |
| "grad_norm": 74.5, |
| "grad_norm_var": 1691.2518229166667, |
| "learning_rate": 0.0001, |
| "loss": 11.4194, |
| "loss/crossentropy": 1.159515619277954, |
| "loss/hidden": 1.3203125, |
| "loss/logits": 0.0953197255730629, |
| "loss/reg": 8.844283103942871, |
| "loss/twn": 0.0, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.00445, |
| "grad_norm": 27.0, |
| "grad_norm_var": 1335.8580729166667, |
| "learning_rate": 0.0001, |
| "loss": 12.6972, |
| "loss/crossentropy": 2.529615879058838, |
| "loss/hidden": 1.1953125, |
| "loss/logits": 0.12953932583332062, |
| "loss/reg": 8.842752456665039, |
| "loss/twn": 0.0, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.004475, |
| "grad_norm": 22.875, |
| "grad_norm_var": 1343.0229166666666, |
| "learning_rate": 0.0001, |
| "loss": 11.9591, |
| "loss/crossentropy": 1.487177848815918, |
| "loss/hidden": 1.5, |
| "loss/logits": 0.13065868616104126, |
| "loss/reg": 8.841255187988281, |
| "loss/twn": 0.0, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.0045, |
| "grad_norm": 24.5, |
| "grad_norm_var": 1373.46015625, |
| "learning_rate": 0.0001, |
| "loss": 12.4819, |
| "loss/crossentropy": 2.328747272491455, |
| "loss/hidden": 1.140625, |
| "loss/logits": 0.17269541323184967, |
| "loss/reg": 8.839783668518066, |
| "loss/twn": 0.0, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.004525, |
| "grad_norm": 38.25, |
| "grad_norm_var": 1359.9947265625, |
| "learning_rate": 0.0001, |
| "loss": 11.2515, |
| "loss/crossentropy": 0.7721107602119446, |
| "loss/hidden": 1.5234375, |
| "loss/logits": 0.11762076616287231, |
| "loss/reg": 8.838338851928711, |
| "loss/twn": 0.0, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.00455, |
| "grad_norm": 47.75, |
| "grad_norm_var": 1325.628125, |
| "learning_rate": 0.0001, |
| "loss": 12.6901, |
| "loss/crossentropy": 2.576754570007324, |
| "loss/hidden": 1.09375, |
| "loss/logits": 0.18267256021499634, |
| "loss/reg": 8.836915016174316, |
| "loss/twn": 0.0, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.004575, |
| "grad_norm": 23.5, |
| "grad_norm_var": 1224.1625, |
| "learning_rate": 0.0001, |
| "loss": 12.1801, |
| "loss/crossentropy": 1.9372246265411377, |
| "loss/hidden": 1.2265625, |
| "loss/logits": 0.18082188069820404, |
| "loss/reg": 8.835512161254883, |
| "loss/twn": 0.0, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.0046, |
| "grad_norm": 21.75, |
| "grad_norm_var": 463.1809895833333, |
| "learning_rate": 0.0001, |
| "loss": 12.4684, |
| "loss/crossentropy": 2.4119606018066406, |
| "loss/hidden": 1.0625, |
| "loss/logits": 0.15976974368095398, |
| "loss/reg": 8.834145545959473, |
| "loss/twn": 0.0, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.004625, |
| "grad_norm": 21.875, |
| "grad_norm_var": 471.79765625, |
| "learning_rate": 0.0001, |
| "loss": 12.7312, |
| "loss/crossentropy": 2.6074588298797607, |
| "loss/hidden": 1.109375, |
| "loss/logits": 0.18157701194286346, |
| "loss/reg": 8.832801818847656, |
| "loss/twn": 0.0, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.00465, |
| "grad_norm": 31.25, |
| "grad_norm_var": 472.5125, |
| "learning_rate": 0.0001, |
| "loss": 12.4042, |
| "loss/crossentropy": 2.3867440223693848, |
| "loss/hidden": 1.046875, |
| "loss/logits": 0.13915154337882996, |
| "loss/reg": 8.831473350524902, |
| "loss/twn": 0.0, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.004675, |
| "grad_norm": 27.875, |
| "grad_norm_var": 467.79479166666664, |
| "learning_rate": 0.0001, |
| "loss": 12.7724, |
| "loss/crossentropy": 2.5314316749572754, |
| "loss/hidden": 1.2265625, |
| "loss/logits": 0.18420836329460144, |
| "loss/reg": 8.830174446105957, |
| "loss/twn": 0.0, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.0047, |
| "grad_norm": 34.5, |
| "grad_norm_var": 175.46979166666668, |
| "learning_rate": 0.0001, |
| "loss": 12.8018, |
| "loss/crossentropy": 2.621443510055542, |
| "loss/hidden": 1.1796875, |
| "loss/logits": 0.17180635035037994, |
| "loss/reg": 8.82889461517334, |
| "loss/twn": 0.0, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.004725, |
| "grad_norm": 25.625, |
| "grad_norm_var": 177.9603515625, |
| "learning_rate": 0.0001, |
| "loss": 12.6135, |
| "loss/crossentropy": 2.514349937438965, |
| "loss/hidden": 1.125, |
| "loss/logits": 0.146479994058609, |
| "loss/reg": 8.82763385772705, |
| "loss/twn": 0.0, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.00475, |
| "grad_norm": 37.5, |
| "grad_norm_var": 175.47890625, |
| "learning_rate": 0.0001, |
| "loss": 12.2403, |
| "loss/crossentropy": 1.9142255783081055, |
| "loss/hidden": 1.3125, |
| "loss/logits": 0.18714016675949097, |
| "loss/reg": 8.826397895812988, |
| "loss/twn": 0.0, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.004775, |
| "grad_norm": 32.25, |
| "grad_norm_var": 175.21640625, |
| "learning_rate": 0.0001, |
| "loss": 11.7193, |
| "loss/crossentropy": 1.5540302991867065, |
| "loss/hidden": 1.1953125, |
| "loss/logits": 0.14472922682762146, |
| "loss/reg": 8.825185775756836, |
| "loss/twn": 0.0, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.0048, |
| "grad_norm": 27.375, |
| "grad_norm_var": 176.75670572916667, |
| "learning_rate": 0.0001, |
| "loss": 12.9397, |
| "loss/crossentropy": 2.821112632751465, |
| "loss/hidden": 1.1484375, |
| "loss/logits": 0.14618118107318878, |
| "loss/reg": 8.824004173278809, |
| "loss/twn": 0.0, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.004825, |
| "grad_norm": 58.5, |
| "grad_norm_var": 102.9400390625, |
| "learning_rate": 0.0001, |
| "loss": 12.4139, |
| "loss/crossentropy": 1.8961142301559448, |
| "loss/hidden": 1.578125, |
| "loss/logits": 0.11687049269676208, |
| "loss/reg": 8.822830200195312, |
| "loss/twn": 0.0, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.00485, |
| "grad_norm": 144.0, |
| "grad_norm_var": 889.8869140625, |
| "learning_rate": 0.0001, |
| "loss": 12.4361, |
| "loss/crossentropy": 1.9929572343826294, |
| "loss/hidden": 1.390625, |
| "loss/logits": 0.2308717519044876, |
| "loss/reg": 8.821688652038574, |
| "loss/twn": 0.0, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.004875, |
| "grad_norm": 31.125, |
| "grad_norm_var": 876.7212890625, |
| "learning_rate": 0.0001, |
| "loss": 12.6342, |
| "loss/crossentropy": 2.389754056930542, |
| "loss/hidden": 1.2109375, |
| "loss/logits": 0.21292558312416077, |
| "loss/reg": 8.82056713104248, |
| "loss/twn": 0.0, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.0049, |
| "grad_norm": 26.875, |
| "grad_norm_var": 872.4104166666667, |
| "learning_rate": 0.0001, |
| "loss": 12.6653, |
| "loss/crossentropy": 2.4216485023498535, |
| "loss/hidden": 1.265625, |
| "loss/logits": 0.15854929387569427, |
| "loss/reg": 8.819454193115234, |
| "loss/twn": 0.0, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.004925, |
| "grad_norm": 107.0, |
| "grad_norm_var": 1157.5080729166666, |
| "learning_rate": 0.0001, |
| "loss": 11.8236, |
| "loss/crossentropy": 1.25285005569458, |
| "loss/hidden": 1.6796875, |
| "loss/logits": 0.07266983389854431, |
| "loss/reg": 8.818378448486328, |
| "loss/twn": 0.0, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.00495, |
| "grad_norm": 29.0, |
| "grad_norm_var": 1169.2854166666666, |
| "learning_rate": 0.0001, |
| "loss": 12.5003, |
| "loss/crossentropy": 2.3957014083862305, |
| "loss/hidden": 1.125, |
| "loss/logits": 0.1622379571199417, |
| "loss/reg": 8.817320823669434, |
| "loss/twn": 0.0, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.004975, |
| "grad_norm": 32.75, |
| "grad_norm_var": 1151.1997395833334, |
| "learning_rate": 0.0001, |
| "loss": 11.6985, |
| "loss/crossentropy": 1.339059591293335, |
| "loss/hidden": 1.390625, |
| "loss/logits": 0.15258146822452545, |
| "loss/reg": 8.81628131866455, |
| "loss/twn": 0.0, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.005, |
| "grad_norm": 25.375, |
| "grad_norm_var": 1141.7124348958334, |
| "learning_rate": 0.0001, |
| "loss": 11.67, |
| "loss/crossentropy": 1.3512688875198364, |
| "loss/hidden": 1.40625, |
| "loss/logits": 0.09719762206077576, |
| "loss/reg": 8.815267562866211, |
| "loss/twn": 0.0, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.005025, |
| "grad_norm": 29.875, |
| "grad_norm_var": 1122.8541015625, |
| "learning_rate": 0.0001, |
| "loss": 12.2448, |
| "loss/crossentropy": 1.968360424041748, |
| "loss/hidden": 1.3125, |
| "loss/logits": 0.1496497392654419, |
| "loss/reg": 8.814275741577148, |
| "loss/twn": 0.0, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.00505, |
| "grad_norm": 43.5, |
| "grad_norm_var": 1111.7270182291666, |
| "learning_rate": 0.0001, |
| "loss": 13.7696, |
| "loss/crossentropy": 3.2280731201171875, |
| "loss/hidden": 1.515625, |
| "loss/logits": 0.21256522834300995, |
| "loss/reg": 8.81330680847168, |
| "loss/twn": 0.0, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.005075, |
| "grad_norm": 32.75, |
| "grad_norm_var": 1102.3604166666667, |
| "learning_rate": 0.0001, |
| "loss": 13.1098, |
| "loss/crossentropy": 2.962261199951172, |
| "loss/hidden": 1.1640625, |
| "loss/logits": 0.17110002040863037, |
| "loss/reg": 8.81234359741211, |
| "loss/twn": 0.0, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.0051, |
| "grad_norm": 25.375, |
| "grad_norm_var": 1120.1874348958333, |
| "learning_rate": 0.0001, |
| "loss": 12.1452, |
| "loss/crossentropy": 1.99165940284729, |
| "loss/hidden": 1.203125, |
| "loss/logits": 0.13904325664043427, |
| "loss/reg": 8.811400413513184, |
| "loss/twn": 0.0, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.005125, |
| "grad_norm": 23.75, |
| "grad_norm_var": 1125.0770833333333, |
| "learning_rate": 0.0001, |
| "loss": 12.6833, |
| "loss/crossentropy": 2.6439051628112793, |
| "loss/hidden": 1.0625, |
| "loss/logits": 0.16645097732543945, |
| "loss/reg": 8.810476303100586, |
| "loss/twn": 0.0, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.00515, |
| "grad_norm": 27.125, |
| "grad_norm_var": 1141.0556640625, |
| "learning_rate": 0.0001, |
| "loss": 11.6934, |
| "loss/crossentropy": 1.4352425336837769, |
| "loss/hidden": 1.328125, |
| "loss/logits": 0.12051481008529663, |
| "loss/reg": 8.809564590454102, |
| "loss/twn": 0.0, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.005175, |
| "grad_norm": 50.25, |
| "grad_norm_var": 1134.2119140625, |
| "learning_rate": 0.0001, |
| "loss": 11.5999, |
| "loss/crossentropy": 1.334825038909912, |
| "loss/hidden": 1.390625, |
| "loss/logits": 0.06573797762393951, |
| "loss/reg": 8.80868148803711, |
| "loss/twn": 0.0, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.0052, |
| "grad_norm": 30.875, |
| "grad_norm_var": 1126.9093098958333, |
| "learning_rate": 0.0001, |
| "loss": 11.4036, |
| "loss/crossentropy": 0.9434633851051331, |
| "loss/hidden": 1.546875, |
| "loss/logits": 0.10541018843650818, |
| "loss/reg": 8.80781364440918, |
| "loss/twn": 0.0, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.005225, |
| "grad_norm": 27.5, |
| "grad_norm_var": 1130.6874348958333, |
| "learning_rate": 0.0001, |
| "loss": 13.0801, |
| "loss/crossentropy": 2.9982831478118896, |
| "loss/hidden": 1.1171875, |
| "loss/logits": 0.15767033398151398, |
| "loss/reg": 8.806962966918945, |
| "loss/twn": 0.0, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.00525, |
| "grad_norm": 26.5, |
| "grad_norm_var": 410.38795572916666, |
| "learning_rate": 0.0001, |
| "loss": 11.5291, |
| "loss/crossentropy": 1.2577452659606934, |
| "loss/hidden": 1.328125, |
| "loss/logits": 0.13713732361793518, |
| "loss/reg": 8.806113243103027, |
| "loss/twn": 0.0, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.005275, |
| "grad_norm": 88.5, |
| "grad_norm_var": 581.8854166666666, |
| "learning_rate": 0.0001, |
| "loss": 11.5967, |
| "loss/crossentropy": 1.1915476322174072, |
| "loss/hidden": 1.5234375, |
| "loss/logits": 0.07640685141086578, |
| "loss/reg": 8.805289268493652, |
| "loss/twn": 0.0, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.0053, |
| "grad_norm": 29.0, |
| "grad_norm_var": 578.6791015625, |
| "learning_rate": 0.0001, |
| "loss": 12.6849, |
| "loss/crossentropy": 2.641145944595337, |
| "loss/hidden": 1.078125, |
| "loss/logits": 0.16113826632499695, |
| "loss/reg": 8.804474830627441, |
| "loss/twn": 0.0, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.005325, |
| "grad_norm": 29.0, |
| "grad_norm_var": 255.0603515625, |
| "learning_rate": 0.0001, |
| "loss": 12.8455, |
| "loss/crossentropy": 2.67075514793396, |
| "loss/hidden": 1.1953125, |
| "loss/logits": 0.17574864625930786, |
| "loss/reg": 8.803681373596191, |
| "loss/twn": 0.0, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.00535, |
| "grad_norm": 28.625, |
| "grad_norm_var": 255.34140625, |
| "learning_rate": 0.0001, |
| "loss": 11.9274, |
| "loss/crossentropy": 1.7288811206817627, |
| "loss/hidden": 1.2421875, |
| "loss/logits": 0.15339060127735138, |
| "loss/reg": 8.802900314331055, |
| "loss/twn": 0.0, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.005375, |
| "grad_norm": 176.0, |
| "grad_norm_var": 1505.94375, |
| "learning_rate": 0.0001, |
| "loss": 12.2937, |
| "loss/crossentropy": 2.09938383102417, |
| "loss/hidden": 1.2578125, |
| "loss/logits": 0.13438275456428528, |
| "loss/reg": 8.802129745483398, |
| "loss/twn": 0.0, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.0054, |
| "grad_norm": 27.75, |
| "grad_norm_var": 1500.5962890625, |
| "learning_rate": 0.0001, |
| "loss": 12.7792, |
| "loss/crossentropy": 2.651805877685547, |
| "loss/hidden": 1.1875, |
| "loss/logits": 0.13851973414421082, |
| "loss/reg": 8.801373481750488, |
| "loss/twn": 0.0, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.005425, |
| "grad_norm": 22.0, |
| "grad_norm_var": 1518.803125, |
| "learning_rate": 0.0001, |
| "loss": 12.8145, |
| "loss/crossentropy": 2.872256278991699, |
| "loss/hidden": 1.0, |
| "loss/logits": 0.14166159927845, |
| "loss/reg": 8.800623893737793, |
| "loss/twn": 0.0, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.00545, |
| "grad_norm": 29.75, |
| "grad_norm_var": 1529.76015625, |
| "learning_rate": 0.0001, |
| "loss": 12.6579, |
| "loss/crossentropy": 2.650221347808838, |
| "loss/hidden": 1.0234375, |
| "loss/logits": 0.18438857793807983, |
| "loss/reg": 8.79987621307373, |
| "loss/twn": 0.0, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.005475, |
| "grad_norm": 262.0, |
| "grad_norm_var": 4526.4875, |
| "learning_rate": 0.0001, |
| "loss": 11.7506, |
| "loss/crossentropy": 1.4538605213165283, |
| "loss/hidden": 1.3671875, |
| "loss/logits": 0.13036534190177917, |
| "loss/reg": 8.799148559570312, |
| "loss/twn": 0.0, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.0055, |
| "grad_norm": 28.5, |
| "grad_norm_var": 4514.1291015625, |
| "learning_rate": 0.0001, |
| "loss": 12.6326, |
| "loss/crossentropy": 2.4922804832458496, |
| "loss/hidden": 1.15625, |
| "loss/logits": 0.1856255978345871, |
| "loss/reg": 8.798437118530273, |
| "loss/twn": 0.0, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.005525, |
| "grad_norm": 25.0, |
| "grad_norm_var": 4508.735872395833, |
| "learning_rate": 0.0001, |
| "loss": 12.4029, |
| "loss/crossentropy": 2.312741756439209, |
| "loss/hidden": 1.125, |
| "loss/logits": 0.167415589094162, |
| "loss/reg": 8.797720909118652, |
| "loss/twn": 0.0, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.00555, |
| "grad_norm": 26.25, |
| "grad_norm_var": 4512.242708333333, |
| "learning_rate": 0.0001, |
| "loss": 11.9521, |
| "loss/crossentropy": 1.7981234788894653, |
| "loss/hidden": 1.203125, |
| "loss/logits": 0.15380483865737915, |
| "loss/reg": 8.797012329101562, |
| "loss/twn": 0.0, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.005575, |
| "grad_norm": 26.875, |
| "grad_norm_var": 4566.553059895833, |
| "learning_rate": 0.0001, |
| "loss": 11.5449, |
| "loss/crossentropy": 1.3225388526916504, |
| "loss/hidden": 1.3203125, |
| "loss/logits": 0.10575884580612183, |
| "loss/reg": 8.796323776245117, |
| "loss/twn": 0.0, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.0056, |
| "grad_norm": 24.625, |
| "grad_norm_var": 4589.3134765625, |
| "learning_rate": 0.0001, |
| "loss": 12.8962, |
| "loss/crossentropy": 2.6721420288085938, |
| "loss/hidden": 1.2578125, |
| "loss/logits": 0.17063072323799133, |
| "loss/reg": 8.795634269714355, |
| "loss/twn": 0.0, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.005625, |
| "grad_norm": 27.875, |
| "grad_norm_var": 4587.95390625, |
| "learning_rate": 0.0001, |
| "loss": 12.3663, |
| "loss/crossentropy": 2.290670394897461, |
| "loss/hidden": 1.1328125, |
| "loss/logits": 0.1478402018547058, |
| "loss/reg": 8.794960021972656, |
| "loss/twn": 0.0, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.00565, |
| "grad_norm": 192.0, |
| "grad_norm_var": 5673.358072916667, |
| "learning_rate": 0.0001, |
| "loss": 11.7849, |
| "loss/crossentropy": 1.5705722570419312, |
| "loss/hidden": 1.265625, |
| "loss/logits": 0.15446048974990845, |
| "loss/reg": 8.794285774230957, |
| "loss/twn": 0.0, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.005675, |
| "grad_norm": 37.75, |
| "grad_norm_var": 5676.9, |
| "learning_rate": 0.0001, |
| "loss": 11.3223, |
| "loss/crossentropy": 0.9421064257621765, |
| "loss/hidden": 1.4375, |
| "loss/logits": 0.14906048774719238, |
| "loss/reg": 8.793625831604004, |
| "loss/twn": 0.0, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.0057, |
| "grad_norm": 42.0, |
| "grad_norm_var": 5630.154166666666, |
| "learning_rate": 0.0001, |
| "loss": 11.894, |
| "loss/crossentropy": 1.5675556659698486, |
| "loss/hidden": 1.390625, |
| "loss/logits": 0.14281445741653442, |
| "loss/reg": 8.79298210144043, |
| "loss/twn": 0.0, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.005725, |
| "grad_norm": 47.25, |
| "grad_norm_var": 5568.54140625, |
| "learning_rate": 0.0001, |
| "loss": 11.7254, |
| "loss/crossentropy": 1.5919896364212036, |
| "loss/hidden": 1.2109375, |
| "loss/logits": 0.13016732037067413, |
| "loss/reg": 8.79233169555664, |
| "loss/twn": 0.0, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.00575, |
| "grad_norm": 41.75, |
| "grad_norm_var": 5517.3744140625, |
| "learning_rate": 0.0001, |
| "loss": 12.073, |
| "loss/crossentropy": 2.025175094604492, |
| "loss/hidden": 1.1328125, |
| "loss/logits": 0.12331830710172653, |
| "loss/reg": 8.791699409484863, |
| "loss/twn": 0.0, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.005775, |
| "grad_norm": 24.25, |
| "grad_norm_var": 4707.4087890625, |
| "learning_rate": 0.0001, |
| "loss": 12.3713, |
| "loss/crossentropy": 2.421731948852539, |
| "loss/hidden": 1.015625, |
| "loss/logits": 0.14285139739513397, |
| "loss/reg": 8.791082382202148, |
| "loss/twn": 0.0, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.0058, |
| "grad_norm": 26.75, |
| "grad_norm_var": 4711.151497395834, |
| "learning_rate": 0.0001, |
| "loss": 12.6173, |
| "loss/crossentropy": 2.61382794380188, |
| "loss/hidden": 1.078125, |
| "loss/logits": 0.13484284281730652, |
| "loss/reg": 8.7904691696167, |
| "loss/twn": 0.0, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.005825, |
| "grad_norm": 41.0, |
| "grad_norm_var": 4649.381705729166, |
| "learning_rate": 0.0001, |
| "loss": 11.6616, |
| "loss/crossentropy": 1.5337637662887573, |
| "loss/hidden": 1.2109375, |
| "loss/logits": 0.12706667184829712, |
| "loss/reg": 8.789871215820312, |
| "loss/twn": 0.0, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.00585, |
| "grad_norm": 28.5, |
| "grad_norm_var": 4653.9337890625, |
| "learning_rate": 0.0001, |
| "loss": 11.8725, |
| "loss/crossentropy": 1.753180980682373, |
| "loss/hidden": 1.1796875, |
| "loss/logits": 0.1503317952156067, |
| "loss/reg": 8.78927993774414, |
| "loss/twn": 0.0, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.005875, |
| "grad_norm": 47.5, |
| "grad_norm_var": 1649.3697265625, |
| "learning_rate": 0.0001, |
| "loss": 11.8778, |
| "loss/crossentropy": 1.6928982734680176, |
| "loss/hidden": 1.265625, |
| "loss/logits": 0.13060975074768066, |
| "loss/reg": 8.788691520690918, |
| "loss/twn": 0.0, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.0059, |
| "grad_norm": 28.125, |
| "grad_norm_var": 1650.103125, |
| "learning_rate": 0.0001, |
| "loss": 11.9749, |
| "loss/crossentropy": 1.8520106077194214, |
| "loss/hidden": 1.2421875, |
| "loss/logits": 0.09261026978492737, |
| "loss/reg": 8.788106918334961, |
| "loss/twn": 0.0, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.005925, |
| "grad_norm": 27.625, |
| "grad_norm_var": 1644.2447265625, |
| "learning_rate": 0.0001, |
| "loss": 12.6123, |
| "loss/crossentropy": 2.6141552925109863, |
| "loss/hidden": 1.0625, |
| "loss/logits": 0.14809614419937134, |
| "loss/reg": 8.787534713745117, |
| "loss/twn": 0.0, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.00595, |
| "grad_norm": 25.0, |
| "grad_norm_var": 1647.1561848958333, |
| "learning_rate": 0.0001, |
| "loss": 11.6679, |
| "loss/crossentropy": 1.444309949874878, |
| "loss/hidden": 1.34375, |
| "loss/logits": 0.09289486706256866, |
| "loss/reg": 8.786966323852539, |
| "loss/twn": 0.0, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.005975, |
| "grad_norm": 31.0, |
| "grad_norm_var": 1639.3208333333334, |
| "learning_rate": 0.0001, |
| "loss": 10.5623, |
| "loss/crossentropy": 0.3422330319881439, |
| "loss/hidden": 1.3828125, |
| "loss/logits": 0.050805822014808655, |
| "loss/reg": 8.786402702331543, |
| "loss/twn": 0.0, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.006, |
| "grad_norm": 28.625, |
| "grad_norm_var": 1630.3541666666667, |
| "learning_rate": 0.0001, |
| "loss": 12.7076, |
| "loss/crossentropy": 2.51440691947937, |
| "loss/hidden": 1.2265625, |
| "loss/logits": 0.1807696372270584, |
| "loss/reg": 8.785860061645508, |
| "loss/twn": 0.0, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.006025, |
| "grad_norm": 32.25, |
| "grad_norm_var": 1622.3994140625, |
| "learning_rate": 0.0001, |
| "loss": 11.5458, |
| "loss/crossentropy": 1.3753387928009033, |
| "loss/hidden": 1.265625, |
| "loss/logits": 0.11956524103879929, |
| "loss/reg": 8.785308837890625, |
| "loss/twn": 0.0, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.00605, |
| "grad_norm": 22.375, |
| "grad_norm_var": 69.71223958333333, |
| "learning_rate": 0.0001, |
| "loss": 12.5364, |
| "loss/crossentropy": 2.5992980003356934, |
| "loss/hidden": 1.0078125, |
| "loss/logits": 0.14453992247581482, |
| "loss/reg": 8.784762382507324, |
| "loss/twn": 0.0, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.006075, |
| "grad_norm": 22.625, |
| "grad_norm_var": 74.90358072916666, |
| "learning_rate": 0.0001, |
| "loss": 11.7046, |
| "loss/crossentropy": 1.60543692111969, |
| "loss/hidden": 1.2109375, |
| "loss/logits": 0.10400013625621796, |
| "loss/reg": 8.78422737121582, |
| "loss/twn": 0.0, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.0061, |
| "grad_norm": 158.0, |
| "grad_norm_var": 1066.0994140625, |
| "learning_rate": 0.0001, |
| "loss": 12.4219, |
| "loss/crossentropy": 2.382094383239746, |
| "loss/hidden": 1.078125, |
| "loss/logits": 0.1779642403125763, |
| "loss/reg": 8.78367805480957, |
| "loss/twn": 0.0, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.006125, |
| "grad_norm": 22.0, |
| "grad_norm_var": 1079.9869140625, |
| "learning_rate": 0.0001, |
| "loss": 12.3096, |
| "loss/crossentropy": 2.332392454147339, |
| "loss/hidden": 1.03125, |
| "loss/logits": 0.16278110444545746, |
| "loss/reg": 8.783156394958496, |
| "loss/twn": 0.0, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.00615, |
| "grad_norm": 24.625, |
| "grad_norm_var": 1089.6643229166666, |
| "learning_rate": 0.0001, |
| "loss": 12.4149, |
| "loss/crossentropy": 2.435525417327881, |
| "loss/hidden": 1.0390625, |
| "loss/logits": 0.157731294631958, |
| "loss/reg": 8.782625198364258, |
| "loss/twn": 0.0, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.006175, |
| "grad_norm": 190.0, |
| "grad_norm_var": 2527.3729166666667, |
| "learning_rate": 0.0001, |
| "loss": 11.6703, |
| "loss/crossentropy": 1.5230607986450195, |
| "loss/hidden": 1.2421875, |
| "loss/logits": 0.12289558351039886, |
| "loss/reg": 8.78210735321045, |
| "loss/twn": 0.0, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.0062, |
| "grad_norm": 31.25, |
| "grad_norm_var": 2516.3385416666665, |
| "learning_rate": 0.0001, |
| "loss": 12.273, |
| "loss/crossentropy": 2.1257948875427246, |
| "loss/hidden": 1.140625, |
| "loss/logits": 0.22504082322120667, |
| "loss/reg": 8.781578063964844, |
| "loss/twn": 0.0, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.006225, |
| "grad_norm": 88.0, |
| "grad_norm_var": 2613.471875, |
| "learning_rate": 0.0001, |
| "loss": 11.5805, |
| "loss/crossentropy": 1.4164283275604248, |
| "loss/hidden": 1.3125, |
| "loss/logits": 0.07047782838344574, |
| "loss/reg": 8.781055450439453, |
| "loss/twn": 0.0, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.00625, |
| "grad_norm": 28.125, |
| "grad_norm_var": 2614.5791015625, |
| "learning_rate": 0.0001, |
| "loss": 11.4988, |
| "loss/crossentropy": 1.4205645322799683, |
| "loss/hidden": 1.1953125, |
| "loss/logits": 0.10241450369358063, |
| "loss/reg": 8.78054428100586, |
| "loss/twn": 0.0, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.006275, |
| "grad_norm": 36.0, |
| "grad_norm_var": 2627.360872395833, |
| "learning_rate": 0.0001, |
| "loss": 11.5001, |
| "loss/crossentropy": 1.3272961378097534, |
| "loss/hidden": 1.2890625, |
| "loss/logits": 0.10366377234458923, |
| "loss/reg": 8.780028343200684, |
| "loss/twn": 0.0, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.0063, |
| "grad_norm": 25.125, |
| "grad_norm_var": 2636.563997395833, |
| "learning_rate": 0.0001, |
| "loss": 11.1619, |
| "loss/crossentropy": 1.1497949361801147, |
| "loss/hidden": 1.1328125, |
| "loss/logits": 0.09979332238435745, |
| "loss/reg": 8.77951717376709, |
| "loss/twn": 0.0, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.006325, |
| "grad_norm": 25.375, |
| "grad_norm_var": 2643.454622395833, |
| "learning_rate": 0.0001, |
| "loss": 12.5314, |
| "loss/crossentropy": 2.5820140838623047, |
| "loss/hidden": 1.0078125, |
| "loss/logits": 0.16261206567287445, |
| "loss/reg": 8.77900505065918, |
| "loss/twn": 0.0, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.00635, |
| "grad_norm": 19.625, |
| "grad_norm_var": 2662.7458333333334, |
| "learning_rate": 0.0001, |
| "loss": 12.6876, |
| "loss/crossentropy": 2.8294739723205566, |
| "loss/hidden": 0.9375, |
| "loss/logits": 0.1420997828245163, |
| "loss/reg": 8.77851676940918, |
| "loss/twn": 0.0, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.006375, |
| "grad_norm": 27.375, |
| "grad_norm_var": 2672.297330729167, |
| "learning_rate": 0.0001, |
| "loss": 12.2885, |
| "loss/crossentropy": 2.421785593032837, |
| "loss/hidden": 0.97265625, |
| "loss/logits": 0.11601483821868896, |
| "loss/reg": 8.778022766113281, |
| "loss/twn": 0.0, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.0064, |
| "grad_norm": 21.125, |
| "grad_norm_var": 2696.023893229167, |
| "learning_rate": 0.0001, |
| "loss": 12.4017, |
| "loss/crossentropy": 2.6165196895599365, |
| "loss/hidden": 0.875, |
| "loss/logits": 0.13267028331756592, |
| "loss/reg": 8.777534484863281, |
| "loss/twn": 0.0, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.006425, |
| "grad_norm": 21.875, |
| "grad_norm_var": 2725.046875, |
| "learning_rate": 0.0001, |
| "loss": 12.231, |
| "loss/crossentropy": 2.212022304534912, |
| "loss/hidden": 1.109375, |
| "loss/logits": 0.13251882791519165, |
| "loss/reg": 8.777040481567383, |
| "loss/twn": 0.0, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.00645, |
| "grad_norm": 36.5, |
| "grad_norm_var": 2689.7858723958334, |
| "learning_rate": 0.0001, |
| "loss": 11.5296, |
| "loss/crossentropy": 1.4590688943862915, |
| "loss/hidden": 1.1796875, |
| "loss/logits": 0.11432604491710663, |
| "loss/reg": 8.776566505432129, |
| "loss/twn": 0.0, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.006475, |
| "grad_norm": 35.5, |
| "grad_norm_var": 2655.553125, |
| "learning_rate": 0.0001, |
| "loss": 12.7234, |
| "loss/crossentropy": 2.685680627822876, |
| "loss/hidden": 1.125, |
| "loss/logits": 0.13663552701473236, |
| "loss/reg": 8.776080131530762, |
| "loss/twn": 0.0, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.0065, |
| "grad_norm": 20.125, |
| "grad_norm_var": 1847.3327473958334, |
| "learning_rate": 0.0001, |
| "loss": 11.7099, |
| "loss/crossentropy": 1.7988297939300537, |
| "loss/hidden": 1.03125, |
| "loss/logits": 0.10424751788377762, |
| "loss/reg": 8.775606155395508, |
| "loss/twn": 0.0, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.006525, |
| "grad_norm": 41.25, |
| "grad_norm_var": 1822.2676432291667, |
| "learning_rate": 0.0001, |
| "loss": 11.8165, |
| "loss/crossentropy": 1.7764484882354736, |
| "loss/hidden": 1.15625, |
| "loss/logits": 0.10863903164863586, |
| "loss/reg": 8.775127410888672, |
| "loss/twn": 0.0, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.00655, |
| "grad_norm": 45.5, |
| "grad_norm_var": 1801.1643229166666, |
| "learning_rate": 0.0001, |
| "loss": 10.4943, |
| "loss/crossentropy": 0.26185697317123413, |
| "loss/hidden": 1.40625, |
| "loss/logits": 0.05150225758552551, |
| "loss/reg": 8.774664878845215, |
| "loss/twn": 0.0, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.006575, |
| "grad_norm": 31.125, |
| "grad_norm_var": 271.0822265625, |
| "learning_rate": 0.0001, |
| "loss": 12.4311, |
| "loss/crossentropy": 2.5030641555786133, |
| "loss/hidden": 0.9921875, |
| "loss/logits": 0.16160471737384796, |
| "loss/reg": 8.774203300476074, |
| "loss/twn": 0.0, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.0066, |
| "grad_norm": 39.25, |
| "grad_norm_var": 272.8238932291667, |
| "learning_rate": 0.0001, |
| "loss": 11.6719, |
| "loss/crossentropy": 1.5901890993118286, |
| "loss/hidden": 1.203125, |
| "loss/logits": 0.10485959053039551, |
| "loss/reg": 8.773754119873047, |
| "loss/twn": 0.0, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.006625, |
| "grad_norm": 38.75, |
| "grad_norm_var": 68.9494140625, |
| "learning_rate": 0.0001, |
| "loss": 11.2611, |
| "loss/crossentropy": 1.2814500331878662, |
| "loss/hidden": 1.1015625, |
| "loss/logits": 0.10478021949529648, |
| "loss/reg": 8.773300170898438, |
| "loss/twn": 0.0, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.00665, |
| "grad_norm": 24.25, |
| "grad_norm_var": 71.26432291666667, |
| "learning_rate": 0.0001, |
| "loss": 12.5356, |
| "loss/crossentropy": 2.600585699081421, |
| "loss/hidden": 1.0, |
| "loss/logits": 0.16212967038154602, |
| "loss/reg": 8.772849082946777, |
| "loss/twn": 0.0, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.006675, |
| "grad_norm": 52.75, |
| "grad_norm_var": 100.978125, |
| "learning_rate": 0.0001, |
| "loss": 12.0577, |
| "loss/crossentropy": 2.090177536010742, |
| "loss/hidden": 1.046875, |
| "loss/logits": 0.1481959968805313, |
| "loss/reg": 8.772411346435547, |
| "loss/twn": 0.0, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.0067, |
| "grad_norm": 32.25, |
| "grad_norm_var": 98.0056640625, |
| "learning_rate": 0.0001, |
| "loss": 12.5629, |
| "loss/crossentropy": 2.689206838607788, |
| "loss/hidden": 0.98046875, |
| "loss/logits": 0.12127072364091873, |
| "loss/reg": 8.77196216583252, |
| "loss/twn": 0.0, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.006725, |
| "grad_norm": 120.0, |
| "grad_norm_var": 573.5455729166666, |
| "learning_rate": 0.0001, |
| "loss": 11.9037, |
| "loss/crossentropy": 1.6351962089538574, |
| "loss/hidden": 1.390625, |
| "loss/logits": 0.10639579594135284, |
| "loss/reg": 8.771524429321289, |
| "loss/twn": 0.0, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.00675, |
| "grad_norm": 23.875, |
| "grad_norm_var": 564.2885416666667, |
| "learning_rate": 0.0001, |
| "loss": 11.826, |
| "loss/crossentropy": 1.8710798025131226, |
| "loss/hidden": 1.0625, |
| "loss/logits": 0.12132292985916138, |
| "loss/reg": 8.771089553833008, |
| "loss/twn": 0.0, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.006775, |
| "grad_norm": 35.75, |
| "grad_norm_var": 556.5634765625, |
| "learning_rate": 0.0001, |
| "loss": 12.4072, |
| "loss/crossentropy": 2.4087181091308594, |
| "loss/hidden": 1.078125, |
| "loss/logits": 0.14971445500850677, |
| "loss/reg": 8.770654678344727, |
| "loss/twn": 0.0, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.0068, |
| "grad_norm": 25.625, |
| "grad_norm_var": 547.2587890625, |
| "learning_rate": 0.0001, |
| "loss": 11.6378, |
| "loss/crossentropy": 1.6593881845474243, |
| "loss/hidden": 1.109375, |
| "loss/logits": 0.09878332912921906, |
| "loss/reg": 8.770234107971191, |
| "loss/twn": 0.0, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.006825, |
| "grad_norm": 80.0, |
| "grad_norm_var": 625.515625, |
| "learning_rate": 0.0001, |
| "loss": 10.9479, |
| "loss/crossentropy": 0.7638819813728333, |
| "loss/hidden": 1.328125, |
| "loss/logits": 0.08607713133096695, |
| "loss/reg": 8.769810676574707, |
| "loss/twn": 0.0, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.00685, |
| "grad_norm": 34.25, |
| "grad_norm_var": 627.67890625, |
| "learning_rate": 0.0001, |
| "loss": 11.8293, |
| "loss/crossentropy": 1.6784312725067139, |
| "loss/hidden": 1.2109375, |
| "loss/logits": 0.17050763964653015, |
| "loss/reg": 8.76939868927002, |
| "loss/twn": 0.0, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.006875, |
| "grad_norm": 22.625, |
| "grad_norm_var": 650.0827473958333, |
| "learning_rate": 0.0001, |
| "loss": 12.4934, |
| "loss/crossentropy": 2.5318715572357178, |
| "loss/hidden": 1.0390625, |
| "loss/logits": 0.15351711213588715, |
| "loss/reg": 8.76897144317627, |
| "loss/twn": 0.0, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.0069, |
| "grad_norm": 28.5, |
| "grad_norm_var": 630.3622395833333, |
| "learning_rate": 0.0001, |
| "loss": 13.0061, |
| "loss/crossentropy": 3.016630172729492, |
| "loss/hidden": 1.0703125, |
| "loss/logits": 0.15056678652763367, |
| "loss/reg": 8.768550872802734, |
| "loss/twn": 0.0, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.006925, |
| "grad_norm": 29.75, |
| "grad_norm_var": 640.1372395833333, |
| "learning_rate": 0.0001, |
| "loss": 12.6228, |
| "loss/crossentropy": 2.6855499744415283, |
| "loss/hidden": 1.0078125, |
| "loss/logits": 0.16132760047912598, |
| "loss/reg": 8.768143653869629, |
| "loss/twn": 0.0, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.00695, |
| "grad_norm": 31.125, |
| "grad_norm_var": 645.4155598958333, |
| "learning_rate": 0.0001, |
| "loss": 11.638, |
| "loss/crossentropy": 1.5987094640731812, |
| "loss/hidden": 1.1484375, |
| "loss/logits": 0.12314014881849289, |
| "loss/reg": 8.767732620239258, |
| "loss/twn": 0.0, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.006975, |
| "grad_norm": 25.625, |
| "grad_norm_var": 654.2671223958333, |
| "learning_rate": 0.0001, |
| "loss": 11.8356, |
| "loss/crossentropy": 1.859243392944336, |
| "loss/hidden": 1.0859375, |
| "loss/logits": 0.12307290732860565, |
| "loss/reg": 8.767321586608887, |
| "loss/twn": 0.0, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.007, |
| "grad_norm": 31.875, |
| "grad_norm_var": 658.6729166666667, |
| "learning_rate": 0.0001, |
| "loss": 12.2331, |
| "loss/crossentropy": 2.1102001667022705, |
| "loss/hidden": 1.203125, |
| "loss/logits": 0.15288487076759338, |
| "loss/reg": 8.766907691955566, |
| "loss/twn": 0.0, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.007025, |
| "grad_norm": 29.625, |
| "grad_norm_var": 665.1697265625, |
| "learning_rate": 0.0001, |
| "loss": 10.95, |
| "loss/crossentropy": 0.5632294416427612, |
| "loss/hidden": 1.53125, |
| "loss/logits": 0.08897791802883148, |
| "loss/reg": 8.766514778137207, |
| "loss/twn": 0.0, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.00705, |
| "grad_norm": 35.75, |
| "grad_norm_var": 650.4473307291667, |
| "learning_rate": 0.0001, |
| "loss": 11.36, |
| "loss/crossentropy": 1.0557889938354492, |
| "loss/hidden": 1.4375, |
| "loss/logits": 0.10060230642557144, |
| "loss/reg": 8.766109466552734, |
| "loss/twn": 0.0, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.007075, |
| "grad_norm": 31.5, |
| "grad_norm_var": 642.4343098958333, |
| "learning_rate": 0.0001, |
| "loss": 11.2332, |
| "loss/crossentropy": 1.1803399324417114, |
| "loss/hidden": 1.1796875, |
| "loss/logits": 0.10748447477817535, |
| "loss/reg": 8.765719413757324, |
| "loss/twn": 0.0, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.0071, |
| "grad_norm": 22.75, |
| "grad_norm_var": 656.1598307291666, |
| "learning_rate": 0.0001, |
| "loss": 12.0096, |
| "loss/crossentropy": 2.2622509002685547, |
| "loss/hidden": 0.87109375, |
| "loss/logits": 0.11096324026584625, |
| "loss/reg": 8.765324592590332, |
| "loss/twn": 0.0, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.007125, |
| "grad_norm": 90.0, |
| "grad_norm_var": 384.5660807291667, |
| "learning_rate": 0.0001, |
| "loss": 11.7557, |
| "loss/crossentropy": 1.7607800960540771, |
| "loss/hidden": 1.1171875, |
| "loss/logits": 0.11281895637512207, |
| "loss/reg": 8.764936447143555, |
| "loss/twn": 0.0, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.00715, |
| "grad_norm": 21.875, |
| "grad_norm_var": 388.0931640625, |
| "learning_rate": 0.0001, |
| "loss": 11.5935, |
| "loss/crossentropy": 1.4926972389221191, |
| "loss/hidden": 1.21875, |
| "loss/logits": 0.11746557056903839, |
| "loss/reg": 8.76453971862793, |
| "loss/twn": 0.0, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.007175, |
| "grad_norm": 46.5, |
| "grad_norm_var": 394.90149739583336, |
| "learning_rate": 0.0001, |
| "loss": 12.2209, |
| "loss/crossentropy": 2.3141627311706543, |
| "loss/hidden": 0.99609375, |
| "loss/logits": 0.14649777114391327, |
| "loss/reg": 8.764161109924316, |
| "loss/twn": 0.0, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.0072, |
| "grad_norm": 22.25, |
| "grad_norm_var": 400.6020833333333, |
| "learning_rate": 0.0001, |
| "loss": 12.6423, |
| "loss/crossentropy": 2.647719621658325, |
| "loss/hidden": 1.0703125, |
| "loss/logits": 0.16048741340637207, |
| "loss/reg": 8.763778686523438, |
| "loss/twn": 0.0, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.007225, |
| "grad_norm": 19.625, |
| "grad_norm_var": 278.2483723958333, |
| "learning_rate": 0.0001, |
| "loss": 12.4311, |
| "loss/crossentropy": 2.551328182220459, |
| "loss/hidden": 0.97265625, |
| "loss/logits": 0.14375020563602448, |
| "loss/reg": 8.763405799865723, |
| "loss/twn": 0.0, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.00725, |
| "grad_norm": 25.25, |
| "grad_norm_var": 281.4827473958333, |
| "learning_rate": 0.0001, |
| "loss": 12.4556, |
| "loss/crossentropy": 2.5579841136932373, |
| "loss/hidden": 0.984375, |
| "loss/logits": 0.150177001953125, |
| "loss/reg": 8.763019561767578, |
| "loss/twn": 0.0, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.007275, |
| "grad_norm": 61.25, |
| "grad_norm_var": 325.59973958333336, |
| "learning_rate": 0.0001, |
| "loss": 12.4022, |
| "loss/crossentropy": 2.430692195892334, |
| "loss/hidden": 1.0625, |
| "loss/logits": 0.14642293751239777, |
| "loss/reg": 8.762632369995117, |
| "loss/twn": 0.0, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.0073, |
| "grad_norm": 37.25, |
| "grad_norm_var": 323.29375, |
| "learning_rate": 0.0001, |
| "loss": 11.7775, |
| "loss/crossentropy": 1.8944785594940186, |
| "loss/hidden": 1.0, |
| "loss/logits": 0.12074927985668182, |
| "loss/reg": 8.762247085571289, |
| "loss/twn": 0.0, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.007325, |
| "grad_norm": 42.0, |
| "grad_norm_var": 323.89348958333335, |
| "learning_rate": 0.0001, |
| "loss": 12.072, |
| "loss/crossentropy": 1.6495473384857178, |
| "loss/hidden": 1.5546875, |
| "loss/logits": 0.10591432452201843, |
| "loss/reg": 8.761868476867676, |
| "loss/twn": 0.0, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.00735, |
| "grad_norm": 56.75, |
| "grad_norm_var": 348.6509765625, |
| "learning_rate": 0.0001, |
| "loss": 10.5072, |
| "loss/crossentropy": 0.5604190230369568, |
| "loss/hidden": 1.140625, |
| "loss/logits": 0.04467451944947243, |
| "loss/reg": 8.76148509979248, |
| "loss/twn": 0.0, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.007375, |
| "grad_norm": 22.75, |
| "grad_norm_var": 353.71666666666664, |
| "learning_rate": 0.0001, |
| "loss": 12.209, |
| "loss/crossentropy": 2.1619114875793457, |
| "loss/hidden": 1.1328125, |
| "loss/logits": 0.1531488001346588, |
| "loss/reg": 8.761101722717285, |
| "loss/twn": 0.0, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.0074, |
| "grad_norm": 61.5, |
| "grad_norm_var": 387.09108072916666, |
| "learning_rate": 0.0001, |
| "loss": 10.8356, |
| "loss/crossentropy": 0.6190763711929321, |
| "loss/hidden": 1.3984375, |
| "loss/logits": 0.057369355112314224, |
| "loss/reg": 8.760721206665039, |
| "loss/twn": 0.0, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.007425, |
| "grad_norm": 68.0, |
| "grad_norm_var": 430.3229166666667, |
| "learning_rate": 0.0001, |
| "loss": 12.109, |
| "loss/crossentropy": 2.233776569366455, |
| "loss/hidden": 0.98828125, |
| "loss/logits": 0.12656426429748535, |
| "loss/reg": 8.760355949401855, |
| "loss/twn": 0.0, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.00745, |
| "grad_norm": 31.75, |
| "grad_norm_var": 434.42291666666665, |
| "learning_rate": 0.0001, |
| "loss": 12.7227, |
| "loss/crossentropy": 2.879485845565796, |
| "loss/hidden": 0.9296875, |
| "loss/logits": 0.1535433977842331, |
| "loss/reg": 8.759981155395508, |
| "loss/twn": 0.0, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.007475, |
| "grad_norm": 81.5, |
| "grad_norm_var": 525.25625, |
| "learning_rate": 0.0001, |
| "loss": 11.7873, |
| "loss/crossentropy": 1.4932836294174194, |
| "loss/hidden": 1.4453125, |
| "loss/logits": 0.08904070407152176, |
| "loss/reg": 8.759614944458008, |
| "loss/twn": 0.0, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.0075, |
| "grad_norm": 23.375, |
| "grad_norm_var": 523.4733723958333, |
| "learning_rate": 0.0001, |
| "loss": 11.3579, |
| "loss/crossentropy": 1.3830214738845825, |
| "loss/hidden": 1.125, |
| "loss/logits": 0.09057402610778809, |
| "loss/reg": 8.759257316589355, |
| "loss/twn": 0.0, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.007525, |
| "grad_norm": 26.875, |
| "grad_norm_var": 389.365625, |
| "learning_rate": 0.0001, |
| "loss": 12.1102, |
| "loss/crossentropy": 2.097208261489868, |
| "loss/hidden": 1.109375, |
| "loss/logits": 0.14475323259830475, |
| "loss/reg": 8.75888442993164, |
| "loss/twn": 0.0, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.00755, |
| "grad_norm": 75.0, |
| "grad_norm_var": 433.6087890625, |
| "learning_rate": 0.0001, |
| "loss": 11.4727, |
| "loss/crossentropy": 1.628718614578247, |
| "loss/hidden": 0.9765625, |
| "loss/logits": 0.10885927081108093, |
| "loss/reg": 8.758538246154785, |
| "loss/twn": 0.0, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.007575, |
| "grad_norm": 57.75, |
| "grad_norm_var": 445.4916015625, |
| "learning_rate": 0.0001, |
| "loss": 10.6563, |
| "loss/crossentropy": 0.6478983163833618, |
| "loss/hidden": 1.1796875, |
| "loss/logits": 0.07052306830883026, |
| "loss/reg": 8.758191108703613, |
| "loss/twn": 0.0, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.0076, |
| "grad_norm": 27.375, |
| "grad_norm_var": 431.89166666666665, |
| "learning_rate": 0.0001, |
| "loss": 11.466, |
| "loss/crossentropy": 1.43612539768219, |
| "loss/hidden": 1.171875, |
| "loss/logits": 0.10010282695293427, |
| "loss/reg": 8.757850646972656, |
| "loss/twn": 0.0, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.007625, |
| "grad_norm": 27.625, |
| "grad_norm_var": 408.9583333333333, |
| "learning_rate": 0.0001, |
| "loss": 12.4937, |
| "loss/crossentropy": 2.69258975982666, |
| "loss/hidden": 0.92578125, |
| "loss/logits": 0.11779216676950455, |
| "loss/reg": 8.7575101852417, |
| "loss/twn": 0.0, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.00765, |
| "grad_norm": 29.75, |
| "grad_norm_var": 398.1489583333333, |
| "learning_rate": 0.0001, |
| "loss": 12.4398, |
| "loss/crossentropy": 2.6579396724700928, |
| "loss/hidden": 0.8984375, |
| "loss/logits": 0.1262587159872055, |
| "loss/reg": 8.757180213928223, |
| "loss/twn": 0.0, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.007675, |
| "grad_norm": 334.0, |
| "grad_norm_var": 5614.776822916667, |
| "learning_rate": 0.0001, |
| "loss": 15.3648, |
| "loss/crossentropy": 3.758509874343872, |
| "loss/hidden": 2.703125, |
| "loss/logits": 0.14630486071109772, |
| "loss/reg": 8.756852149963379, |
| "loss/twn": 0.0, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.0077, |
| "grad_norm": 27.25, |
| "grad_norm_var": 5654.964322916667, |
| "learning_rate": 0.0001, |
| "loss": 12.3105, |
| "loss/crossentropy": 2.4272522926330566, |
| "loss/hidden": 0.97265625, |
| "loss/logits": 0.1540563553571701, |
| "loss/reg": 8.7565279006958, |
| "loss/twn": 0.0, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.007725, |
| "grad_norm": 29.0, |
| "grad_norm_var": 5700.32890625, |
| "learning_rate": 0.0001, |
| "loss": 11.5304, |
| "loss/crossentropy": 1.5074149370193481, |
| "loss/hidden": 1.1484375, |
| "loss/logits": 0.11834341287612915, |
| "loss/reg": 8.756197929382324, |
| "loss/twn": 0.0, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.00775, |
| "grad_norm": 21.25, |
| "grad_norm_var": 5800.468489583333, |
| "learning_rate": 0.0001, |
| "loss": 12.6405, |
| "loss/crossentropy": 2.693897008895874, |
| "loss/hidden": 1.015625, |
| "loss/logits": 0.17511111497879028, |
| "loss/reg": 8.755884170532227, |
| "loss/twn": 0.0, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.007775, |
| "grad_norm": 26.5, |
| "grad_norm_var": 5783.198958333333, |
| "learning_rate": 0.0001, |
| "loss": 11.6554, |
| "loss/crossentropy": 1.6191575527191162, |
| "loss/hidden": 1.1484375, |
| "loss/logits": 0.13227222859859467, |
| "loss/reg": 8.755560874938965, |
| "loss/twn": 0.0, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.0078, |
| "grad_norm": 40.5, |
| "grad_norm_var": 5804.548958333334, |
| "learning_rate": 0.0001, |
| "loss": 11.7846, |
| "loss/crossentropy": 1.7809689044952393, |
| "loss/hidden": 1.1328125, |
| "loss/logits": 0.11560114473104477, |
| "loss/reg": 8.755224227905273, |
| "loss/twn": 0.0, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.007825, |
| "grad_norm": 163.0, |
| "grad_norm_var": 6495.673958333334, |
| "learning_rate": 0.0001, |
| "loss": 11.3885, |
| "loss/crossentropy": 1.3066282272338867, |
| "loss/hidden": 1.2265625, |
| "loss/logits": 0.10039080679416656, |
| "loss/reg": 8.754886627197266, |
| "loss/twn": 0.0, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.00785, |
| "grad_norm": 45.0, |
| "grad_norm_var": 6449.837239583333, |
| "learning_rate": 0.0001, |
| "loss": 11.5109, |
| "loss/crossentropy": 1.4018994569778442, |
| "loss/hidden": 1.2578125, |
| "loss/logits": 0.09668240696191788, |
| "loss/reg": 8.754547119140625, |
| "loss/twn": 0.0, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.007875, |
| "grad_norm": 68.5, |
| "grad_norm_var": 6431.339322916667, |
| "learning_rate": 0.0001, |
| "loss": 12.4925, |
| "loss/crossentropy": 2.5819625854492188, |
| "loss/hidden": 1.015625, |
| "loss/logits": 0.14064857363700867, |
| "loss/reg": 8.754220962524414, |
| "loss/twn": 0.0, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.0079, |
| "grad_norm": 29.75, |
| "grad_norm_var": 6399.414518229167, |
| "learning_rate": 0.0001, |
| "loss": 11.4931, |
| "loss/crossentropy": 1.4292715787887573, |
| "loss/hidden": 1.1953125, |
| "loss/logits": 0.1146152913570404, |
| "loss/reg": 8.75389289855957, |
| "loss/twn": 0.0, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.007925, |
| "grad_norm": 23.25, |
| "grad_norm_var": 6418.334375, |
| "learning_rate": 0.0001, |
| "loss": 12.4691, |
| "loss/crossentropy": 2.60555362701416, |
| "loss/hidden": 0.94921875, |
| "loss/logits": 0.16074895858764648, |
| "loss/reg": 8.753567695617676, |
| "loss/twn": 0.0, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.00795, |
| "grad_norm": 23.625, |
| "grad_norm_var": 6508.5884765625, |
| "learning_rate": 0.0001, |
| "loss": 11.6346, |
| "loss/crossentropy": 1.5361746549606323, |
| "loss/hidden": 1.21875, |
| "loss/logits": 0.12646999955177307, |
| "loss/reg": 8.753239631652832, |
| "loss/twn": 0.0, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.007975, |
| "grad_norm": 25.125, |
| "grad_norm_var": 6588.740625, |
| "learning_rate": 0.0001, |
| "loss": 11.3016, |
| "loss/crossentropy": 1.3302582502365112, |
| "loss/hidden": 1.125, |
| "loss/logits": 0.09347252547740936, |
| "loss/reg": 8.752906799316406, |
| "loss/twn": 0.0, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.008, |
| "grad_norm": 412.0, |
| "grad_norm_var": 14220.942643229168, |
| "learning_rate": 0.0001, |
| "loss": 12.3581, |
| "loss/crossentropy": 2.4794158935546875, |
| "loss/hidden": 0.97265625, |
| "loss/logits": 0.15347327291965485, |
| "loss/reg": 8.752571105957031, |
| "loss/twn": 0.0, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.008025, |
| "grad_norm": 51.5, |
| "grad_norm_var": 14080.664583333333, |
| "learning_rate": 0.0001, |
| "loss": 11.3922, |
| "loss/crossentropy": 0.9147974848747253, |
| "loss/hidden": 1.640625, |
| "loss/logits": 0.0845554918050766, |
| "loss/reg": 8.752232551574707, |
| "loss/twn": 0.0, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.00805, |
| "grad_norm": 26.25, |
| "grad_norm_var": 14106.921875, |
| "learning_rate": 0.0001, |
| "loss": 11.2252, |
| "loss/crossentropy": 1.3013337850570679, |
| "loss/hidden": 1.09375, |
| "loss/logits": 0.07819058001041412, |
| "loss/reg": 8.751914978027344, |
| "loss/twn": 0.0, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.008075, |
| "grad_norm": 29.25, |
| "grad_norm_var": 9759.47265625, |
| "learning_rate": 0.0001, |
| "loss": 12.312, |
| "loss/crossentropy": 2.5591752529144287, |
| "loss/hidden": 0.86328125, |
| "loss/logits": 0.13801756501197815, |
| "loss/reg": 8.75157356262207, |
| "loss/twn": 0.0, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.0081, |
| "grad_norm": 30.25, |
| "grad_norm_var": 9744.89140625, |
| "learning_rate": 0.0001, |
| "loss": 13.038, |
| "loss/crossentropy": 2.984992027282715, |
| "loss/hidden": 1.1171875, |
| "loss/logits": 0.18459582328796387, |
| "loss/reg": 8.751255989074707, |
| "loss/twn": 0.0, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.008125, |
| "grad_norm": 33.75, |
| "grad_norm_var": 9723.313541666666, |
| "learning_rate": 0.0001, |
| "loss": 12.2076, |
| "loss/crossentropy": 2.3149266242980957, |
| "loss/hidden": 1.0390625, |
| "loss/logits": 0.10270445048809052, |
| "loss/reg": 8.750934600830078, |
| "loss/twn": 0.0, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.00815, |
| "grad_norm": 86.5, |
| "grad_norm_var": 9603.620572916667, |
| "learning_rate": 0.0001, |
| "loss": 11.0792, |
| "loss/crossentropy": 0.567532479763031, |
| "loss/hidden": 1.6484375, |
| "loss/logits": 0.11262989789247513, |
| "loss/reg": 8.750617027282715, |
| "loss/twn": 0.0, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.008175, |
| "grad_norm": 28.125, |
| "grad_norm_var": 9594.431705729166, |
| "learning_rate": 0.0001, |
| "loss": 11.6055, |
| "loss/crossentropy": 1.5313806533813477, |
| "loss/hidden": 1.1953125, |
| "loss/logits": 0.1284753978252411, |
| "loss/reg": 8.750301361083984, |
| "loss/twn": 0.0, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.0082, |
| "grad_norm": 268.0, |
| "grad_norm_var": 11941.236393229166, |
| "learning_rate": 0.0001, |
| "loss": 12.4394, |
| "loss/crossentropy": 2.4692862033843994, |
| "loss/hidden": 1.0546875, |
| "loss/logits": 0.1654619425535202, |
| "loss/reg": 8.749983787536621, |
| "loss/twn": 0.0, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.008225, |
| "grad_norm": 172.0, |
| "grad_norm_var": 12041.108268229167, |
| "learning_rate": 0.0001, |
| "loss": 11.5647, |
| "loss/crossentropy": 1.4892176389694214, |
| "loss/hidden": 1.1796875, |
| "loss/logits": 0.14615678787231445, |
| "loss/reg": 8.749679565429688, |
| "loss/twn": 0.0, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.00825, |
| "grad_norm": 39.0, |
| "grad_norm_var": 12075.002018229166, |
| "learning_rate": 0.0001, |
| "loss": 10.6763, |
| "loss/crossentropy": 0.31126442551612854, |
| "loss/hidden": 1.5625, |
| "loss/logits": 0.05318440496921539, |
| "loss/reg": 8.749366760253906, |
| "loss/twn": 0.0, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.008275, |
| "grad_norm": 24.625, |
| "grad_norm_var": 12287.041666666666, |
| "learning_rate": 0.0001, |
| "loss": 12.5096, |
| "loss/crossentropy": 2.6848809719085693, |
| "loss/hidden": 0.921875, |
| "loss/logits": 0.1537654995918274, |
| "loss/reg": 8.74905776977539, |
| "loss/twn": 0.0, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.0083, |
| "grad_norm": 34.5, |
| "grad_norm_var": 12255.71640625, |
| "learning_rate": 0.0001, |
| "loss": 11.3512, |
| "loss/crossentropy": 1.4118411540985107, |
| "loss/hidden": 1.0859375, |
| "loss/logits": 0.10468737035989761, |
| "loss/reg": 8.74875259399414, |
| "loss/twn": 0.0, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.008325, |
| "grad_norm": 25.5, |
| "grad_norm_var": 12238.4875, |
| "learning_rate": 0.0001, |
| "loss": 12.3364, |
| "loss/crossentropy": 2.4116592407226562, |
| "loss/hidden": 1.03125, |
| "loss/logits": 0.14506308734416962, |
| "loss/reg": 8.748454093933105, |
| "loss/twn": 0.0, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.00835, |
| "grad_norm": 37.75, |
| "grad_norm_var": 12141.253059895833, |
| "learning_rate": 0.0001, |
| "loss": 12.4524, |
| "loss/crossentropy": 2.5784506797790527, |
| "loss/hidden": 0.98046875, |
| "loss/logits": 0.14529481530189514, |
| "loss/reg": 8.748153686523438, |
| "loss/twn": 0.0, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.008375, |
| "grad_norm": 22.375, |
| "grad_norm_var": 12162.857747395834, |
| "learning_rate": 0.0001, |
| "loss": 12.7014, |
| "loss/crossentropy": 2.979144334793091, |
| "loss/hidden": 0.828125, |
| "loss/logits": 0.14632686972618103, |
| "loss/reg": 8.747849464416504, |
| "loss/twn": 0.0, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.0084, |
| "grad_norm": 20.375, |
| "grad_norm_var": 4547.587239583333, |
| "learning_rate": 0.0001, |
| "loss": 11.7995, |
| "loss/crossentropy": 1.765997290611267, |
| "loss/hidden": 1.1875, |
| "loss/logits": 0.09842629730701447, |
| "loss/reg": 8.747549057006836, |
| "loss/twn": 0.0, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.008425, |
| "grad_norm": 20.5, |
| "grad_norm_var": 4634.968489583333, |
| "learning_rate": 0.0001, |
| "loss": 12.3439, |
| "loss/crossentropy": 2.6116814613342285, |
| "loss/hidden": 0.86328125, |
| "loss/logits": 0.12173347175121307, |
| "loss/reg": 8.74724292755127, |
| "loss/twn": 0.0, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.00845, |
| "grad_norm": 53.0, |
| "grad_norm_var": 4572.969791666666, |
| "learning_rate": 0.0001, |
| "loss": 12.098, |
| "loss/crossentropy": 2.3496603965759277, |
| "loss/hidden": 0.8828125, |
| "loss/logits": 0.1185464859008789, |
| "loss/reg": 8.746953964233398, |
| "loss/twn": 0.0, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.008475, |
| "grad_norm": 42.5, |
| "grad_norm_var": 4533.426822916666, |
| "learning_rate": 0.0001, |
| "loss": 12.3659, |
| "loss/crossentropy": 2.4601008892059326, |
| "loss/hidden": 1.0078125, |
| "loss/logits": 0.1513727754354477, |
| "loss/reg": 8.746654510498047, |
| "loss/twn": 0.0, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.0085, |
| "grad_norm": 33.75, |
| "grad_norm_var": 4520.92890625, |
| "learning_rate": 0.0001, |
| "loss": 11.723, |
| "loss/crossentropy": 1.5352638959884644, |
| "loss/hidden": 1.296875, |
| "loss/logits": 0.14454752206802368, |
| "loss/reg": 8.746353149414062, |
| "loss/twn": 0.0, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.008525, |
| "grad_norm": 21.75, |
| "grad_norm_var": 4570.15390625, |
| "learning_rate": 0.0001, |
| "loss": 11.4675, |
| "loss/crossentropy": 1.5099096298217773, |
| "loss/hidden": 1.09375, |
| "loss/logits": 0.11779716610908508, |
| "loss/reg": 8.746051788330078, |
| "loss/twn": 0.0, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.00855, |
| "grad_norm": 26.5, |
| "grad_norm_var": 4568.27890625, |
| "learning_rate": 0.0001, |
| "loss": 11.7061, |
| "loss/crossentropy": 1.8030520677566528, |
| "loss/hidden": 1.0546875, |
| "loss/logits": 0.10255296528339386, |
| "loss/reg": 8.745758056640625, |
| "loss/twn": 0.0, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.008575, |
| "grad_norm": 37.75, |
| "grad_norm_var": 4540.361393229166, |
| "learning_rate": 0.0001, |
| "loss": 11.5163, |
| "loss/crossentropy": 1.4131925106048584, |
| "loss/hidden": 1.2421875, |
| "loss/logits": 0.11541002243757248, |
| "loss/reg": 8.745460510253906, |
| "loss/twn": 0.0, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.0086, |
| "grad_norm": 27.375, |
| "grad_norm_var": 1325.13515625, |
| "learning_rate": 0.0001, |
| "loss": 11.6438, |
| "loss/crossentropy": 1.7049617767333984, |
| "loss/hidden": 1.078125, |
| "loss/logits": 0.11557280272245407, |
| "loss/reg": 8.745163917541504, |
| "loss/twn": 0.0, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.008625, |
| "grad_norm": 24.25, |
| "grad_norm_var": 88.190625, |
| "learning_rate": 0.0001, |
| "loss": 11.9095, |
| "loss/crossentropy": 2.0258209705352783, |
| "loss/hidden": 0.9765625, |
| "loss/logits": 0.16229522228240967, |
| "loss/reg": 8.744869232177734, |
| "loss/twn": 0.0, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.00865, |
| "grad_norm": 24.375, |
| "grad_norm_var": 85.4103515625, |
| "learning_rate": 0.0001, |
| "loss": 12.2719, |
| "loss/crossentropy": 2.4812276363372803, |
| "loss/hidden": 0.8984375, |
| "loss/logits": 0.14763157069683075, |
| "loss/reg": 8.744575500488281, |
| "loss/twn": 0.0, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.008675, |
| "grad_norm": 23.375, |
| "grad_norm_var": 86.3712890625, |
| "learning_rate": 0.0001, |
| "loss": 11.4653, |
| "loss/crossentropy": 1.5430082082748413, |
| "loss/hidden": 1.0703125, |
| "loss/logits": 0.10773609578609467, |
| "loss/reg": 8.74427604675293, |
| "loss/twn": 0.0, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.0087, |
| "grad_norm": 36.0, |
| "grad_norm_var": 87.4666015625, |
| "learning_rate": 0.0001, |
| "loss": 11.3867, |
| "loss/crossentropy": 1.3218666315078735, |
| "loss/hidden": 1.2421875, |
| "loss/logits": 0.0786326676607132, |
| "loss/reg": 8.74398136138916, |
| "loss/twn": 0.0, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.008725, |
| "grad_norm": 31.75, |
| "grad_norm_var": 86.30774739583333, |
| "learning_rate": 0.0001, |
| "loss": 12.6244, |
| "loss/crossentropy": 2.67627215385437, |
| "loss/hidden": 1.046875, |
| "loss/logits": 0.15751376748085022, |
| "loss/reg": 8.74368953704834, |
| "loss/twn": 0.0, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.00875, |
| "grad_norm": 33.75, |
| "grad_norm_var": 83.2869140625, |
| "learning_rate": 0.0001, |
| "loss": 11.918, |
| "loss/crossentropy": 1.8063514232635498, |
| "loss/hidden": 1.21875, |
| "loss/logits": 0.14951637387275696, |
| "loss/reg": 8.74339485168457, |
| "loss/twn": 0.0, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.008775, |
| "grad_norm": 32.0, |
| "grad_norm_var": 79.34166666666667, |
| "learning_rate": 0.0001, |
| "loss": 11.7796, |
| "loss/crossentropy": 1.755987286567688, |
| "loss/hidden": 1.1796875, |
| "loss/logits": 0.10084941238164902, |
| "loss/reg": 8.74308967590332, |
| "loss/twn": 0.0, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.0088, |
| "grad_norm": 164.0, |
| "grad_norm_var": 1173.5098307291667, |
| "learning_rate": 0.0001, |
| "loss": 12.2144, |
| "loss/crossentropy": 2.4459450244903564, |
| "loss/hidden": 0.88671875, |
| "loss/logits": 0.138929545879364, |
| "loss/reg": 8.742794036865234, |
| "loss/twn": 0.0, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.008825, |
| "grad_norm": 28.0, |
| "grad_norm_var": 1157.9863932291667, |
| "learning_rate": 0.0001, |
| "loss": 11.4267, |
| "loss/crossentropy": 1.6805838346481323, |
| "loss/hidden": 0.87890625, |
| "loss/logits": 0.12466993927955627, |
| "loss/reg": 8.74250316619873, |
| "loss/twn": 0.0, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.00885, |
| "grad_norm": 27.125, |
| "grad_norm_var": 1155.0080729166666, |
| "learning_rate": 0.0001, |
| "loss": 11.8288, |
| "loss/crossentropy": 1.8084262609481812, |
| "loss/hidden": 1.140625, |
| "loss/logits": 0.13754084706306458, |
| "loss/reg": 8.742229461669922, |
| "loss/twn": 0.0, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.008875, |
| "grad_norm": 52.0, |
| "grad_norm_var": 1165.85390625, |
| "learning_rate": 0.0001, |
| "loss": 11.3444, |
| "loss/crossentropy": 1.079156756401062, |
| "loss/hidden": 1.453125, |
| "loss/logits": 0.0701700896024704, |
| "loss/reg": 8.741930961608887, |
| "loss/twn": 0.0, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.0089, |
| "grad_norm": 30.25, |
| "grad_norm_var": 1169.0622395833334, |
| "learning_rate": 0.0001, |
| "loss": 12.0921, |
| "loss/crossentropy": 2.062542200088501, |
| "loss/hidden": 1.1484375, |
| "loss/logits": 0.13943998515605927, |
| "loss/reg": 8.741643905639648, |
| "loss/twn": 0.0, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.008925, |
| "grad_norm": 49.25, |
| "grad_norm_var": 1153.9372395833334, |
| "learning_rate": 0.0001, |
| "loss": 11.8644, |
| "loss/crossentropy": 1.820883870124817, |
| "loss/hidden": 1.1796875, |
| "loss/logits": 0.12246385216712952, |
| "loss/reg": 8.741365432739258, |
| "loss/twn": 0.0, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.00895, |
| "grad_norm": 45.0, |
| "grad_norm_var": 1140.8330729166667, |
| "learning_rate": 0.0001, |
| "loss": 11.1199, |
| "loss/crossentropy": 1.152689814567566, |
| "loss/hidden": 1.1484375, |
| "loss/logits": 0.07769455760717392, |
| "loss/reg": 8.741079330444336, |
| "loss/twn": 0.0, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.008975, |
| "grad_norm": 61.5, |
| "grad_norm_var": 1163.7666666666667, |
| "learning_rate": 0.0001, |
| "loss": 12.2674, |
| "loss/crossentropy": 2.2917380332946777, |
| "loss/hidden": 1.0625, |
| "loss/logits": 0.17231391370296478, |
| "loss/reg": 8.740799903869629, |
| "loss/twn": 0.0, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.009, |
| "grad_norm": 32.75, |
| "grad_norm_var": 1154.2848307291667, |
| "learning_rate": 0.0001, |
| "loss": 10.784, |
| "loss/crossentropy": 0.7408412098884583, |
| "loss/hidden": 1.21875, |
| "loss/logits": 0.08388422429561615, |
| "loss/reg": 8.740523338317871, |
| "loss/twn": 0.0, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.009025, |
| "grad_norm": 32.75, |
| "grad_norm_var": 1137.0280598958334, |
| "learning_rate": 0.0001, |
| "loss": 12.1991, |
| "loss/crossentropy": 2.3340628147125244, |
| "loss/hidden": 0.96875, |
| "loss/logits": 0.1560674011707306, |
| "loss/reg": 8.740250587463379, |
| "loss/twn": 0.0, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.00905, |
| "grad_norm": 26.25, |
| "grad_norm_var": 1132.3434895833334, |
| "learning_rate": 0.0001, |
| "loss": 10.5847, |
| "loss/crossentropy": 0.4726024568080902, |
| "loss/hidden": 1.2890625, |
| "loss/logits": 0.08301784098148346, |
| "loss/reg": 8.739974975585938, |
| "loss/twn": 0.0, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.009075, |
| "grad_norm": 81.5, |
| "grad_norm_var": 1182.8093098958334, |
| "learning_rate": 0.0001, |
| "loss": 10.7335, |
| "loss/crossentropy": 0.29830649495124817, |
| "loss/hidden": 1.65625, |
| "loss/logits": 0.039209138602018356, |
| "loss/reg": 8.739691734313965, |
| "loss/twn": 0.0, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.0091, |
| "grad_norm": 71.0, |
| "grad_norm_var": 1204.5749348958334, |
| "learning_rate": 0.0001, |
| "loss": 11.7107, |
| "loss/crossentropy": 1.6568950414657593, |
| "loss/hidden": 1.2265625, |
| "loss/logits": 0.08782389760017395, |
| "loss/reg": 8.739431381225586, |
| "loss/twn": 0.0, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.009125, |
| "grad_norm": 54.5, |
| "grad_norm_var": 1181.7775390625, |
| "learning_rate": 0.0001, |
| "loss": 11.0065, |
| "loss/crossentropy": 0.9881642460823059, |
| "loss/hidden": 1.1640625, |
| "loss/logits": 0.11507290601730347, |
| "loss/reg": 8.739167213439941, |
| "loss/twn": 0.0, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.00915, |
| "grad_norm": 28.0, |
| "grad_norm_var": 1197.3384765625, |
| "learning_rate": 0.0001, |
| "loss": 11.7973, |
| "loss/crossentropy": 1.7763628959655762, |
| "loss/hidden": 1.15625, |
| "loss/logits": 0.12581491470336914, |
| "loss/reg": 8.738903045654297, |
| "loss/twn": 0.0, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.009175, |
| "grad_norm": 26.125, |
| "grad_norm_var": 1214.3729166666667, |
| "learning_rate": 0.0001, |
| "loss": 10.9791, |
| "loss/crossentropy": 0.7930474877357483, |
| "loss/hidden": 1.34375, |
| "loss/logits": 0.10364970564842224, |
| "loss/reg": 8.738633155822754, |
| "loss/twn": 0.0, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.0092, |
| "grad_norm": 28.875, |
| "grad_norm_var": 312.90618489583335, |
| "learning_rate": 0.0001, |
| "loss": 12.7081, |
| "loss/crossentropy": 2.748769998550415, |
| "loss/hidden": 1.0625, |
| "loss/logits": 0.158415287733078, |
| "loss/reg": 8.738374710083008, |
| "loss/twn": 0.0, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.009225, |
| "grad_norm": 22.625, |
| "grad_norm_var": 324.87395833333335, |
| "learning_rate": 0.0001, |
| "loss": 11.331, |
| "loss/crossentropy": 1.4759076833724976, |
| "loss/hidden": 1.0234375, |
| "loss/logits": 0.09354954957962036, |
| "loss/reg": 8.738115310668945, |
| "loss/twn": 0.0, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.00925, |
| "grad_norm": 55.75, |
| "grad_norm_var": 319.90930989583336, |
| "learning_rate": 0.0001, |
| "loss": 11.674, |
| "loss/crossentropy": 1.7559623718261719, |
| "loss/hidden": 1.0625, |
| "loss/logits": 0.11767783015966415, |
| "loss/reg": 8.737846374511719, |
| "loss/twn": 0.0, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.009275, |
| "grad_norm": 83.0, |
| "grad_norm_var": 414.5561848958333, |
| "learning_rate": 0.0001, |
| "loss": 10.6369, |
| "loss/crossentropy": 0.41635945439338684, |
| "loss/hidden": 1.390625, |
| "loss/logits": 0.09237731248140335, |
| "loss/reg": 8.737584114074707, |
| "loss/twn": 0.0, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.0093, |
| "grad_norm": 46.0, |
| "grad_norm_var": 397.88743489583334, |
| "learning_rate": 0.0001, |
| "loss": 12.1619, |
| "loss/crossentropy": 2.258561134338379, |
| "loss/hidden": 1.0390625, |
| "loss/logits": 0.1269446611404419, |
| "loss/reg": 8.737318992614746, |
| "loss/twn": 0.0, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.009325, |
| "grad_norm": 38.75, |
| "grad_norm_var": 401.00462239583334, |
| "learning_rate": 0.0001, |
| "loss": 11.6663, |
| "loss/crossentropy": 1.7213481664657593, |
| "loss/hidden": 1.09375, |
| "loss/logits": 0.11412111669778824, |
| "loss/reg": 8.737068176269531, |
| "loss/twn": 0.0, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.00935, |
| "grad_norm": 32.5, |
| "grad_norm_var": 412.26764322916665, |
| "learning_rate": 0.0001, |
| "loss": 12.4385, |
| "loss/crossentropy": 2.6570892333984375, |
| "loss/hidden": 0.8984375, |
| "loss/logits": 0.14620934426784515, |
| "loss/reg": 8.73680591583252, |
| "loss/twn": 0.0, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.009375, |
| "grad_norm": 50.5, |
| "grad_norm_var": 395.80201822916666, |
| "learning_rate": 0.0001, |
| "loss": 10.4465, |
| "loss/crossentropy": 0.3850350081920624, |
| "loss/hidden": 1.2421875, |
| "loss/logits": 0.08277378976345062, |
| "loss/reg": 8.73654842376709, |
| "loss/twn": 0.0, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.0094, |
| "grad_norm": 20.125, |
| "grad_norm_var": 425.42473958333335, |
| "learning_rate": 0.0001, |
| "loss": 11.4471, |
| "loss/crossentropy": 1.4705690145492554, |
| "loss/hidden": 1.125, |
| "loss/logits": 0.11524513363838196, |
| "loss/reg": 8.736295700073242, |
| "loss/twn": 0.0, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.009425, |
| "grad_norm": 30.25, |
| "grad_norm_var": 429.44557291666666, |
| "learning_rate": 0.0001, |
| "loss": 12.522, |
| "loss/crossentropy": 2.429779529571533, |
| "loss/hidden": 1.1875, |
| "loss/logits": 0.1687065064907074, |
| "loss/reg": 8.736042976379395, |
| "loss/twn": 0.0, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.00945, |
| "grad_norm": 234.0, |
| "grad_norm_var": 2649.5572916666665, |
| "learning_rate": 0.0001, |
| "loss": 11.5714, |
| "loss/crossentropy": 1.490871548652649, |
| "loss/hidden": 1.1796875, |
| "loss/logits": 0.1650872677564621, |
| "loss/reg": 8.73579216003418, |
| "loss/twn": 0.0, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.009475, |
| "grad_norm": 25.625, |
| "grad_norm_var": 2658.2004557291666, |
| "learning_rate": 0.0001, |
| "loss": 11.7314, |
| "loss/crossentropy": 1.8435747623443604, |
| "loss/hidden": 1.0234375, |
| "loss/logits": 0.12886002659797668, |
| "loss/reg": 8.735533714294434, |
| "loss/twn": 0.0, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.0095, |
| "grad_norm": 26.875, |
| "grad_norm_var": 2673.851041666667, |
| "learning_rate": 0.0001, |
| "loss": 11.6939, |
| "loss/crossentropy": 1.7947711944580078, |
| "loss/hidden": 1.046875, |
| "loss/logits": 0.11698038130998611, |
| "loss/reg": 8.735271453857422, |
| "loss/twn": 0.0, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.009525, |
| "grad_norm": 29.875, |
| "grad_norm_var": 2697.6936848958335, |
| "learning_rate": 0.0001, |
| "loss": 12.1068, |
| "loss/crossentropy": 2.1031861305236816, |
| "loss/hidden": 1.140625, |
| "loss/logits": 0.12798437476158142, |
| "loss/reg": 8.735011100769043, |
| "loss/twn": 0.0, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.00955, |
| "grad_norm": 25.625, |
| "grad_norm_var": 2704.594791666667, |
| "learning_rate": 0.0001, |
| "loss": 12.4644, |
| "loss/crossentropy": 2.680079221725464, |
| "loss/hidden": 0.9140625, |
| "loss/logits": 0.1355159878730774, |
| "loss/reg": 8.734759330749512, |
| "loss/twn": 0.0, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.009575, |
| "grad_norm": 51.0, |
| "grad_norm_var": 2668.953580729167, |
| "learning_rate": 0.0001, |
| "loss": 10.3149, |
| "loss/crossentropy": 0.16582134366035461, |
| "loss/hidden": 1.3828125, |
| "loss/logits": 0.03178492933511734, |
| "loss/reg": 8.734502792358398, |
| "loss/twn": 0.0, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.0096, |
| "grad_norm": 40.0, |
| "grad_norm_var": 2645.226041666667, |
| "learning_rate": 0.0001, |
| "loss": 12.1723, |
| "loss/crossentropy": 2.415682554244995, |
| "loss/hidden": 0.88671875, |
| "loss/logits": 0.13565900921821594, |
| "loss/reg": 8.734251976013184, |
| "loss/twn": 0.0, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.009625, |
| "grad_norm": 131.0, |
| "grad_norm_var": 2972.439518229167, |
| "learning_rate": 0.0001, |
| "loss": 12.5012, |
| "loss/crossentropy": 2.6481189727783203, |
| "loss/hidden": 0.98828125, |
| "loss/logits": 0.1307702660560608, |
| "loss/reg": 8.734004974365234, |
| "loss/twn": 0.0, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.00965, |
| "grad_norm": 33.75, |
| "grad_norm_var": 3007.9832682291667, |
| "learning_rate": 0.0001, |
| "loss": 12.505, |
| "loss/crossentropy": 2.526948928833008, |
| "loss/hidden": 1.0703125, |
| "loss/logits": 0.1739444136619568, |
| "loss/reg": 8.73375129699707, |
| "loss/twn": 0.0, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.009675, |
| "grad_norm": 34.0, |
| "grad_norm_var": 2982.8197265625, |
| "learning_rate": 0.0001, |
| "loss": 11.5609, |
| "loss/crossentropy": 1.483786702156067, |
| "loss/hidden": 1.234375, |
| "loss/logits": 0.10920874774456024, |
| "loss/reg": 8.73349380493164, |
| "loss/twn": 0.0, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.0097, |
| "grad_norm": 26.625, |
| "grad_norm_var": 3024.6677083333334, |
| "learning_rate": 0.0001, |
| "loss": 12.0846, |
| "loss/crossentropy": 2.231854200363159, |
| "loss/hidden": 0.9296875, |
| "loss/logits": 0.1898089498281479, |
| "loss/reg": 8.733246803283691, |
| "loss/twn": 0.0, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.009725, |
| "grad_norm": 24.5, |
| "grad_norm_var": 3062.355989583333, |
| "learning_rate": 0.0001, |
| "loss": 12.5018, |
| "loss/crossentropy": 2.586677312850952, |
| "loss/hidden": 1.0234375, |
| "loss/logits": 0.1586349606513977, |
| "loss/reg": 8.733000755310059, |
| "loss/twn": 0.0, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.00975, |
| "grad_norm": 28.125, |
| "grad_norm_var": 3074.353059895833, |
| "learning_rate": 0.0001, |
| "loss": 12.0805, |
| "loss/crossentropy": 2.2190604209899902, |
| "loss/hidden": 1.015625, |
| "loss/logits": 0.11307910084724426, |
| "loss/reg": 8.732748985290527, |
| "loss/twn": 0.0, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.009775, |
| "grad_norm": 22.0, |
| "grad_norm_var": 3126.038997395833, |
| "learning_rate": 0.0001, |
| "loss": 12.4144, |
| "loss/crossentropy": 2.7266995906829834, |
| "loss/hidden": 0.828125, |
| "loss/logits": 0.1270523965358734, |
| "loss/reg": 8.732504844665527, |
| "loss/twn": 0.0, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.0098, |
| "grad_norm": 20.875, |
| "grad_norm_var": 3123.1905598958333, |
| "learning_rate": 0.0001, |
| "loss": 12.2597, |
| "loss/crossentropy": 2.4047203063964844, |
| "loss/hidden": 0.98828125, |
| "loss/logits": 0.13441447913646698, |
| "loss/reg": 8.732258796691895, |
| "loss/twn": 0.0, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.009825, |
| "grad_norm": 44.25, |
| "grad_norm_var": 3100.4259765625, |
| "learning_rate": 0.0001, |
| "loss": 10.6934, |
| "loss/crossentropy": 0.9086162447929382, |
| "loss/hidden": 0.95703125, |
| "loss/logits": 0.09577716141939163, |
| "loss/reg": 8.732006072998047, |
| "loss/twn": 0.0, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.00985, |
| "grad_norm": 21.875, |
| "grad_norm_var": 705.2916666666666, |
| "learning_rate": 0.0001, |
| "loss": 12.073, |
| "loss/crossentropy": 2.4219603538513184, |
| "loss/hidden": 0.80078125, |
| "loss/logits": 0.11851858347654343, |
| "loss/reg": 8.731756210327148, |
| "loss/twn": 0.0, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.009875, |
| "grad_norm": 20.375, |
| "grad_norm_var": 714.7143229166667, |
| "learning_rate": 0.0001, |
| "loss": 12.2931, |
| "loss/crossentropy": 2.457286834716797, |
| "loss/hidden": 0.984375, |
| "loss/logits": 0.11989138275384903, |
| "loss/reg": 8.731501579284668, |
| "loss/twn": 0.0, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.0099, |
| "grad_norm": 19.75, |
| "grad_norm_var": 726.8379557291667, |
| "learning_rate": 0.0001, |
| "loss": 12.5191, |
| "loss/crossentropy": 2.755459785461426, |
| "loss/hidden": 0.87109375, |
| "loss/logits": 0.16126662492752075, |
| "loss/reg": 8.731245994567871, |
| "loss/twn": 0.0, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.009925, |
| "grad_norm": 21.0, |
| "grad_norm_var": 738.8330729166667, |
| "learning_rate": 0.0001, |
| "loss": 11.5655, |
| "loss/crossentropy": 1.7406857013702393, |
| "loss/hidden": 0.984375, |
| "loss/logits": 0.1094408854842186, |
| "loss/reg": 8.730998039245605, |
| "loss/twn": 0.0, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.00995, |
| "grad_norm": 23.5, |
| "grad_norm_var": 741.8556640625, |
| "learning_rate": 0.0001, |
| "loss": 12.1597, |
| "loss/crossentropy": 2.272913694381714, |
| "loss/hidden": 1.0234375, |
| "loss/logits": 0.13260604441165924, |
| "loss/reg": 8.730734825134277, |
| "loss/twn": 0.0, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.009975, |
| "grad_norm": 24.625, |
| "grad_norm_var": 729.6434895833333, |
| "learning_rate": 0.0001, |
| "loss": 12.3128, |
| "loss/crossentropy": 2.5484490394592285, |
| "loss/hidden": 0.91796875, |
| "loss/logits": 0.11595390737056732, |
| "loss/reg": 8.73047161102295, |
| "loss/twn": 0.0, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 24.75, |
| "grad_norm_var": 730.99375, |
| "learning_rate": 0.0001, |
| "loss": 11.765, |
| "loss/crossentropy": 1.8391631841659546, |
| "loss/hidden": 1.078125, |
| "loss/logits": 0.11753018945455551, |
| "loss/reg": 8.730217933654785, |
| "loss/twn": 0.0, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.010025, |
| "grad_norm": 51.5, |
| "grad_norm_var": 82.571875, |
| "learning_rate": 0.0001, |
| "loss": 10.3145, |
| "loss/crossentropy": 0.22619715332984924, |
| "loss/hidden": 1.3125, |
| "loss/logits": 0.04583882540464401, |
| "loss/reg": 8.729976654052734, |
| "loss/twn": 0.0, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.01005, |
| "grad_norm": 30.375, |
| "grad_norm_var": 80.5134765625, |
| "learning_rate": 0.0001, |
| "loss": 11.7034, |
| "loss/crossentropy": 1.9736676216125488, |
| "loss/hidden": 0.890625, |
| "loss/logits": 0.10935711115598679, |
| "loss/reg": 8.729720115661621, |
| "loss/twn": 0.0, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.010075, |
| "grad_norm": 24.125, |
| "grad_norm_var": 77.89557291666667, |
| "learning_rate": 0.0001, |
| "loss": 10.9666, |
| "loss/crossentropy": 1.1371086835861206, |
| "loss/hidden": 1.015625, |
| "loss/logits": 0.0843534767627716, |
| "loss/reg": 8.729466438293457, |
| "loss/twn": 0.0, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.0101, |
| "grad_norm": 25.25, |
| "grad_norm_var": 78.03951822916666, |
| "learning_rate": 0.0001, |
| "loss": 10.9232, |
| "loss/crossentropy": 0.8999663591384888, |
| "loss/hidden": 1.2265625, |
| "loss/logits": 0.06746678054332733, |
| "loss/reg": 8.729215621948242, |
| "loss/twn": 0.0, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.010125, |
| "grad_norm": 23.125, |
| "grad_norm_var": 78.55729166666667, |
| "learning_rate": 0.0001, |
| "loss": 11.307, |
| "loss/crossentropy": 1.4836196899414062, |
| "loss/hidden": 1.0078125, |
| "loss/logits": 0.08660314977169037, |
| "loss/reg": 8.728958129882812, |
| "loss/twn": 0.0, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.01015, |
| "grad_norm": 76.5, |
| "grad_norm_var": 234.69264322916666, |
| "learning_rate": 0.0001, |
| "loss": 10.911, |
| "loss/crossentropy": 0.7551215887069702, |
| "loss/hidden": 1.375, |
| "loss/logits": 0.052174702286720276, |
| "loss/reg": 8.72871208190918, |
| "loss/twn": 0.0, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.010175, |
| "grad_norm": 21.875, |
| "grad_norm_var": 234.82057291666666, |
| "learning_rate": 0.0001, |
| "loss": 11.6212, |
| "loss/crossentropy": 1.7415430545806885, |
| "loss/hidden": 1.03125, |
| "loss/logits": 0.11990237236022949, |
| "loss/reg": 8.728477478027344, |
| "loss/twn": 0.0, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.0102, |
| "grad_norm": 24.25, |
| "grad_norm_var": 231.60201822916667, |
| "learning_rate": 0.0001, |
| "loss": 12.2806, |
| "loss/crossentropy": 2.5580313205718994, |
| "loss/hidden": 0.8671875, |
| "loss/logits": 0.1270991712808609, |
| "loss/reg": 8.728238105773926, |
| "loss/twn": 0.0, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.010225, |
| "grad_norm": 22.625, |
| "grad_norm_var": 219.22395833333334, |
| "learning_rate": 0.0001, |
| "loss": 11.5919, |
| "loss/crossentropy": 1.8217298984527588, |
| "loss/hidden": 0.93359375, |
| "loss/logits": 0.10858465731143951, |
| "loss/reg": 8.727986335754395, |
| "loss/twn": 0.0, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.01025, |
| "grad_norm": 25.125, |
| "grad_norm_var": 217.02682291666667, |
| "learning_rate": 0.0001, |
| "loss": 11.5764, |
| "loss/crossentropy": 1.575069785118103, |
| "loss/hidden": 1.1328125, |
| "loss/logits": 0.1407238095998764, |
| "loss/reg": 8.727754592895508, |
| "loss/twn": 0.0, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.010275, |
| "grad_norm": 20.125, |
| "grad_norm_var": 217.30729166666666, |
| "learning_rate": 0.0001, |
| "loss": 12.1643, |
| "loss/crossentropy": 2.376201868057251, |
| "loss/hidden": 0.9453125, |
| "loss/logits": 0.11528852581977844, |
| "loss/reg": 8.727517127990723, |
| "loss/twn": 0.0, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.0103, |
| "grad_norm": 29.75, |
| "grad_norm_var": 211.68229166666666, |
| "learning_rate": 0.0001, |
| "loss": 10.3405, |
| "loss/crossentropy": 0.35947510600090027, |
| "loss/hidden": 1.1796875, |
| "loss/logits": 0.07406871020793915, |
| "loss/reg": 8.727272033691406, |
| "loss/twn": 0.0, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.010325, |
| "grad_norm": 21.5, |
| "grad_norm_var": 211.14583333333334, |
| "learning_rate": 0.0001, |
| "loss": 10.4168, |
| "loss/crossentropy": 0.4534737765789032, |
| "loss/hidden": 1.1640625, |
| "loss/logits": 0.07219426333904266, |
| "loss/reg": 8.727028846740723, |
| "loss/twn": 0.0, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.01035, |
| "grad_norm": 24.125, |
| "grad_norm_var": 210.68587239583334, |
| "learning_rate": 0.0001, |
| "loss": 11.7721, |
| "loss/crossentropy": 1.8346529006958008, |
| "loss/hidden": 1.0859375, |
| "loss/logits": 0.1246715784072876, |
| "loss/reg": 8.726799011230469, |
| "loss/twn": 0.0, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.010375, |
| "grad_norm": 262.0, |
| "grad_norm_var": 3582.7708333333335, |
| "learning_rate": 0.0001, |
| "loss": 10.6145, |
| "loss/crossentropy": 0.16418103873729706, |
| "loss/hidden": 1.6953125, |
| "loss/logits": 0.028506092727184296, |
| "loss/reg": 8.726546287536621, |
| "loss/twn": 0.0, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.0104, |
| "grad_norm": 25.75, |
| "grad_norm_var": 3580.241666666667, |
| "learning_rate": 0.0001, |
| "loss": 12.4723, |
| "loss/crossentropy": 2.731675624847412, |
| "loss/hidden": 0.87109375, |
| "loss/logits": 0.14321547746658325, |
| "loss/reg": 8.726308822631836, |
| "loss/twn": 0.0, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.010425, |
| "grad_norm": 24.5, |
| "grad_norm_var": 3599.704166666667, |
| "learning_rate": 0.0001, |
| "loss": 11.3209, |
| "loss/crossentropy": 1.4553873538970947, |
| "loss/hidden": 1.046875, |
| "loss/logits": 0.09255130589008331, |
| "loss/reg": 8.726059913635254, |
| "loss/twn": 0.0, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.01045, |
| "grad_norm": 25.375, |
| "grad_norm_var": 3609.391666666667, |
| "learning_rate": 0.0001, |
| "loss": 11.8702, |
| "loss/crossentropy": 2.24129581451416, |
| "loss/hidden": 0.77734375, |
| "loss/logits": 0.12572112679481506, |
| "loss/reg": 8.725821495056152, |
| "loss/twn": 0.0, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.010475, |
| "grad_norm": 26.375, |
| "grad_norm_var": 3604.270572916667, |
| "learning_rate": 0.0001, |
| "loss": 12.3074, |
| "loss/crossentropy": 2.359283924102783, |
| "loss/hidden": 1.0859375, |
| "loss/logits": 0.1366328001022339, |
| "loss/reg": 8.725577354431152, |
| "loss/twn": 0.0, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.0105, |
| "grad_norm": 102.0, |
| "grad_norm_var": 3797.025, |
| "learning_rate": 0.0001, |
| "loss": 11.7649, |
| "loss/crossentropy": 1.1350619792938232, |
| "loss/hidden": 1.828125, |
| "loss/logits": 0.07637260854244232, |
| "loss/reg": 8.725333213806152, |
| "loss/twn": 0.0, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.010525, |
| "grad_norm": 29.875, |
| "grad_norm_var": 3778.21640625, |
| "learning_rate": 0.0001, |
| "loss": 12.4283, |
| "loss/crossentropy": 2.615325689315796, |
| "loss/hidden": 0.93359375, |
| "loss/logits": 0.1543406993150711, |
| "loss/reg": 8.725086212158203, |
| "loss/twn": 0.0, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.01055, |
| "grad_norm": 60.25, |
| "grad_norm_var": 3732.123958333333, |
| "learning_rate": 0.0001, |
| "loss": 11.7589, |
| "loss/crossentropy": 1.2182326316833496, |
| "loss/hidden": 1.7265625, |
| "loss/logits": 0.08926324546337128, |
| "loss/reg": 8.724855422973633, |
| "loss/twn": 0.0, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.010575, |
| "grad_norm": 89.5, |
| "grad_norm_var": 3795.064518229167, |
| "learning_rate": 0.0001, |
| "loss": 10.4779, |
| "loss/crossentropy": 0.38450130820274353, |
| "loss/hidden": 1.3046875, |
| "loss/logits": 0.06404867768287659, |
| "loss/reg": 8.724616050720215, |
| "loss/twn": 0.0, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.0106, |
| "grad_norm": 26.625, |
| "grad_norm_var": 3787.003125, |
| "learning_rate": 0.0001, |
| "loss": 11.5548, |
| "loss/crossentropy": 1.6553500890731812, |
| "loss/hidden": 1.0859375, |
| "loss/logits": 0.08917971700429916, |
| "loss/reg": 8.724374771118164, |
| "loss/twn": 0.0, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.010625, |
| "grad_norm": 22.25, |
| "grad_norm_var": 3788.4291015625, |
| "learning_rate": 0.0001, |
| "loss": 12.6698, |
| "loss/crossentropy": 2.991757392883301, |
| "loss/hidden": 0.83984375, |
| "loss/logits": 0.11404159665107727, |
| "loss/reg": 8.724142074584961, |
| "loss/twn": 0.0, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.01065, |
| "grad_norm": 19.125, |
| "grad_norm_var": 3811.3353515625, |
| "learning_rate": 0.0001, |
| "loss": 11.8012, |
| "loss/crossentropy": 2.072258949279785, |
| "loss/hidden": 0.8984375, |
| "loss/logits": 0.10655610263347626, |
| "loss/reg": 8.723905563354492, |
| "loss/twn": 0.0, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.010675, |
| "grad_norm": 29.375, |
| "grad_norm_var": 3779.1337890625, |
| "learning_rate": 0.0001, |
| "loss": 12.5645, |
| "loss/crossentropy": 2.7006711959838867, |
| "loss/hidden": 0.95703125, |
| "loss/logits": 0.18308709561824799, |
| "loss/reg": 8.723679542541504, |
| "loss/twn": 0.0, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.0107, |
| "grad_norm": 24.5, |
| "grad_norm_var": 3795.8353515625, |
| "learning_rate": 0.0001, |
| "loss": 12.1695, |
| "loss/crossentropy": 2.585087776184082, |
| "loss/hidden": 0.75, |
| "loss/logits": 0.11095006763935089, |
| "loss/reg": 8.723454475402832, |
| "loss/twn": 0.0, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.010725, |
| "grad_norm": 24.75, |
| "grad_norm_var": 3783.7900390625, |
| "learning_rate": 0.0001, |
| "loss": 11.2938, |
| "loss/crossentropy": 1.431530237197876, |
| "loss/hidden": 1.03125, |
| "loss/logits": 0.10776036977767944, |
| "loss/reg": 8.723230361938477, |
| "loss/twn": 0.0, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.01075, |
| "grad_norm": 31.0, |
| "grad_norm_var": 3762.0872395833335, |
| "learning_rate": 0.0001, |
| "loss": 11.3079, |
| "loss/crossentropy": 1.3959852457046509, |
| "loss/hidden": 1.078125, |
| "loss/logits": 0.11082597076892853, |
| "loss/reg": 8.723002433776855, |
| "loss/twn": 0.0, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.010775, |
| "grad_norm": 28.0, |
| "grad_norm_var": 615.2747395833334, |
| "learning_rate": 0.0001, |
| "loss": 12.2335, |
| "loss/crossentropy": 2.525698661804199, |
| "loss/hidden": 0.859375, |
| "loss/logits": 0.12562698125839233, |
| "loss/reg": 8.72277545928955, |
| "loss/twn": 0.0, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.0108, |
| "grad_norm": 30.25, |
| "grad_norm_var": 609.8934895833333, |
| "learning_rate": 0.0001, |
| "loss": 12.4232, |
| "loss/crossentropy": 2.570929527282715, |
| "loss/hidden": 0.94921875, |
| "loss/logits": 0.18046987056732178, |
| "loss/reg": 8.722542762756348, |
| "loss/twn": 0.0, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.010825, |
| "grad_norm": 30.75, |
| "grad_norm_var": 601.8270833333333, |
| "learning_rate": 0.0001, |
| "loss": 11.5896, |
| "loss/crossentropy": 1.7935094833374023, |
| "loss/hidden": 0.9609375, |
| "loss/logits": 0.11279226839542389, |
| "loss/reg": 8.72231674194336, |
| "loss/twn": 0.0, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.01085, |
| "grad_norm": 274.0, |
| "grad_norm_var": 4063.282747395833, |
| "learning_rate": 0.0001, |
| "loss": 12.3639, |
| "loss/crossentropy": 2.7481002807617188, |
| "loss/hidden": 0.78125, |
| "loss/logits": 0.11248621344566345, |
| "loss/reg": 8.722090721130371, |
| "loss/twn": 0.0, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.010875, |
| "grad_norm": 45.5, |
| "grad_norm_var": 4018.1497395833335, |
| "learning_rate": 0.0001, |
| "loss": 11.1306, |
| "loss/crossentropy": 1.3136909008026123, |
| "loss/hidden": 1.015625, |
| "loss/logits": 0.07947821915149689, |
| "loss/reg": 8.72185230255127, |
| "loss/twn": 0.0, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.0109, |
| "grad_norm": 27.25, |
| "grad_norm_var": 3891.3083333333334, |
| "learning_rate": 0.0001, |
| "loss": 12.132, |
| "loss/crossentropy": 2.424945116043091, |
| "loss/hidden": 0.875, |
| "loss/logits": 0.11047312617301941, |
| "loss/reg": 8.721626281738281, |
| "loss/twn": 0.0, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.010925, |
| "grad_norm": 19.125, |
| "grad_norm_var": 3926.7497395833334, |
| "learning_rate": 0.0001, |
| "loss": 11.5565, |
| "loss/crossentropy": 1.8753455877304077, |
| "loss/hidden": 0.87890625, |
| "loss/logits": 0.0808238685131073, |
| "loss/reg": 8.72138786315918, |
| "loss/twn": 0.0, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.01095, |
| "grad_norm": 23.625, |
| "grad_norm_var": 3955.1150390625, |
| "learning_rate": 0.0001, |
| "loss": 11.4874, |
| "loss/crossentropy": 1.6072636842727661, |
| "loss/hidden": 1.03125, |
| "loss/logits": 0.12776991724967957, |
| "loss/reg": 8.721161842346191, |
| "loss/twn": 0.0, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.010975, |
| "grad_norm": 27.25, |
| "grad_norm_var": 3841.2494140625, |
| "learning_rate": 0.0001, |
| "loss": 12.3323, |
| "loss/crossentropy": 2.5796213150024414, |
| "loss/hidden": 0.89453125, |
| "loss/logits": 0.13716325163841248, |
| "loss/reg": 8.720943450927734, |
| "loss/twn": 0.0, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.011, |
| "grad_norm": 26.0, |
| "grad_norm_var": 3842.614322916667, |
| "learning_rate": 0.0001, |
| "loss": 11.3436, |
| "loss/crossentropy": 1.6126307249069214, |
| "loss/hidden": 0.91796875, |
| "loss/logits": 0.09226781129837036, |
| "loss/reg": 8.720734596252441, |
| "loss/twn": 0.0, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.011025, |
| "grad_norm": 22.5, |
| "grad_norm_var": 3841.9375, |
| "learning_rate": 0.0001, |
| "loss": 12.0387, |
| "loss/crossentropy": 2.2897703647613525, |
| "loss/hidden": 0.91015625, |
| "loss/logits": 0.11826565861701965, |
| "loss/reg": 8.720515251159668, |
| "loss/twn": 0.0, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.01105, |
| "grad_norm": 77.5, |
| "grad_norm_var": 3871.5202473958334, |
| "learning_rate": 0.0001, |
| "loss": 11.1889, |
| "loss/crossentropy": 1.2602814435958862, |
| "loss/hidden": 1.109375, |
| "loss/logits": 0.098908931016922, |
| "loss/reg": 8.720290184020996, |
| "loss/twn": 0.0, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.011075, |
| "grad_norm": 101.0, |
| "grad_norm_var": 4030.1770833333335, |
| "learning_rate": 0.0001, |
| "loss": 11.3231, |
| "loss/crossentropy": 1.3954660892486572, |
| "loss/hidden": 1.1171875, |
| "loss/logits": 0.09037589281797409, |
| "loss/reg": 8.720061302185059, |
| "loss/twn": 0.0, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.0111, |
| "grad_norm": 38.25, |
| "grad_norm_var": 3993.75390625, |
| "learning_rate": 0.0001, |
| "loss": 12.4314, |
| "loss/crossentropy": 2.7403454780578613, |
| "loss/hidden": 0.8359375, |
| "loss/logits": 0.1353139877319336, |
| "loss/reg": 8.71985149383545, |
| "loss/twn": 0.0, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.011125, |
| "grad_norm": 27.625, |
| "grad_norm_var": 3983.9504557291666, |
| "learning_rate": 0.0001, |
| "loss": 11.8758, |
| "loss/crossentropy": 2.24414324760437, |
| "loss/hidden": 0.81640625, |
| "loss/logits": 0.09559611976146698, |
| "loss/reg": 8.719632148742676, |
| "loss/twn": 0.0, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.01115, |
| "grad_norm": 61.0, |
| "grad_norm_var": 3956.7942057291666, |
| "learning_rate": 0.0001, |
| "loss": 11.3521, |
| "loss/crossentropy": 1.0313951969146729, |
| "loss/hidden": 1.546875, |
| "loss/logits": 0.05440502613782883, |
| "loss/reg": 8.719411849975586, |
| "loss/twn": 0.0, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.011175, |
| "grad_norm": 37.75, |
| "grad_norm_var": 3929.291080729167, |
| "learning_rate": 0.0001, |
| "loss": 12.2868, |
| "loss/crossentropy": 2.6271612644195557, |
| "loss/hidden": 0.83203125, |
| "loss/logits": 0.10841761529445648, |
| "loss/reg": 8.719193458557129, |
| "loss/twn": 0.0, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.0112, |
| "grad_norm": 57.0, |
| "grad_norm_var": 3888.1072265625, |
| "learning_rate": 0.0001, |
| "loss": 12.4931, |
| "loss/crossentropy": 2.6269612312316895, |
| "loss/hidden": 0.96875, |
| "loss/logits": 0.1784009337425232, |
| "loss/reg": 8.718981742858887, |
| "loss/twn": 0.0, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.011225, |
| "grad_norm": 94.0, |
| "grad_norm_var": 3925.134830729167, |
| "learning_rate": 0.0001, |
| "loss": 11.6403, |
| "loss/crossentropy": 1.4072850942611694, |
| "loss/hidden": 1.453125, |
| "loss/logits": 0.06110787391662598, |
| "loss/reg": 8.718766212463379, |
| "loss/twn": 0.0, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.01125, |
| "grad_norm": 25.0, |
| "grad_norm_var": 694.1004557291667, |
| "learning_rate": 0.0001, |
| "loss": 12.0448, |
| "loss/crossentropy": 2.477196455001831, |
| "loss/hidden": 0.75390625, |
| "loss/logits": 0.09513802826404572, |
| "loss/reg": 8.718561172485352, |
| "loss/twn": 0.0, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.011275, |
| "grad_norm": 43.0, |
| "grad_norm_var": 694.1238932291667, |
| "learning_rate": 0.0001, |
| "loss": 11.854, |
| "loss/crossentropy": 2.113325595855713, |
| "loss/hidden": 0.88671875, |
| "loss/logits": 0.1356305629014969, |
| "loss/reg": 8.718358993530273, |
| "loss/twn": 0.0, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.0113, |
| "grad_norm": 23.25, |
| "grad_norm_var": 704.1863932291667, |
| "learning_rate": 0.0001, |
| "loss": 12.2, |
| "loss/crossentropy": 2.4742965698242188, |
| "loss/hidden": 0.88671875, |
| "loss/logits": 0.12079896032810211, |
| "loss/reg": 8.718148231506348, |
| "loss/twn": 0.0, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.011325, |
| "grad_norm": 286.0, |
| "grad_norm_var": 4270.720572916666, |
| "learning_rate": 0.0001, |
| "loss": 11.3317, |
| "loss/crossentropy": 1.29776930809021, |
| "loss/hidden": 1.171875, |
| "loss/logits": 0.14410004019737244, |
| "loss/reg": 8.717961311340332, |
| "loss/twn": 0.0, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.01135, |
| "grad_norm": 40.25, |
| "grad_norm_var": 4205.8744140625, |
| "learning_rate": 0.0001, |
| "loss": 11.4775, |
| "loss/crossentropy": 1.5997222661972046, |
| "loss/hidden": 1.0546875, |
| "loss/logits": 0.10535139590501785, |
| "loss/reg": 8.717756271362305, |
| "loss/twn": 0.0, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.011375, |
| "grad_norm": 26.0, |
| "grad_norm_var": 4211.715559895833, |
| "learning_rate": 0.0001, |
| "loss": 12.7607, |
| "loss/crossentropy": 2.7723307609558105, |
| "loss/hidden": 1.078125, |
| "loss/logits": 0.19267109036445618, |
| "loss/reg": 8.71756649017334, |
| "loss/twn": 0.0, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.0114, |
| "grad_norm": 142.0, |
| "grad_norm_var": 4501.5947265625, |
| "learning_rate": 0.0001, |
| "loss": 11.4812, |
| "loss/crossentropy": 1.4258497953414917, |
| "loss/hidden": 1.1953125, |
| "loss/logits": 0.142679363489151, |
| "loss/reg": 8.717361450195312, |
| "loss/twn": 0.0, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.011425, |
| "grad_norm": 29.0, |
| "grad_norm_var": 4464.0369140625, |
| "learning_rate": 0.0001, |
| "loss": 11.4217, |
| "loss/crossentropy": 1.5250698328018188, |
| "loss/hidden": 1.0625, |
| "loss/logits": 0.11695560812950134, |
| "loss/reg": 8.717144012451172, |
| "loss/twn": 0.0, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.01145, |
| "grad_norm": 37.75, |
| "grad_norm_var": 4519.2728515625, |
| "learning_rate": 0.0001, |
| "loss": 12.3109, |
| "loss/crossentropy": 2.4784207344055176, |
| "loss/hidden": 0.9765625, |
| "loss/logits": 0.1389923095703125, |
| "loss/reg": 8.716940879821777, |
| "loss/twn": 0.0, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.011475, |
| "grad_norm": 38.25, |
| "grad_norm_var": 4479.2697265625, |
| "learning_rate": 0.0001, |
| "loss": 12.4147, |
| "loss/crossentropy": 2.5592427253723145, |
| "loss/hidden": 0.96875, |
| "loss/logits": 0.1700257807970047, |
| "loss/reg": 8.716730117797852, |
| "loss/twn": 0.0, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.0115, |
| "grad_norm": 134.0, |
| "grad_norm_var": 4737.7947265625, |
| "learning_rate": 0.0001, |
| "loss": 12.5014, |
| "loss/crossentropy": 2.583470106124878, |
| "loss/hidden": 1.03125, |
| "loss/logits": 0.17012766003608704, |
| "loss/reg": 8.716526985168457, |
| "loss/twn": 0.0, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.011525, |
| "grad_norm": 29.125, |
| "grad_norm_var": 4729.6869140625, |
| "learning_rate": 0.0001, |
| "loss": 11.451, |
| "loss/crossentropy": 1.5636991262435913, |
| "loss/hidden": 1.0703125, |
| "loss/logits": 0.10071404278278351, |
| "loss/reg": 8.716313362121582, |
| "loss/twn": 0.0, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.01155, |
| "grad_norm": 27.5, |
| "grad_norm_var": 4835.386393229167, |
| "learning_rate": 0.0001, |
| "loss": 12.145, |
| "loss/crossentropy": 2.3301310539245605, |
| "loss/hidden": 0.98828125, |
| "loss/logits": 0.11051337420940399, |
| "loss/reg": 8.716113090515137, |
| "loss/twn": 0.0, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.011575, |
| "grad_norm": 26.375, |
| "grad_norm_var": 4887.634375, |
| "learning_rate": 0.0001, |
| "loss": 12.0017, |
| "loss/crossentropy": 2.34748911857605, |
| "loss/hidden": 0.80859375, |
| "loss/logits": 0.12975379824638367, |
| "loss/reg": 8.715897560119629, |
| "loss/twn": 0.0, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.0116, |
| "grad_norm": 26.375, |
| "grad_norm_var": 4983.640559895834, |
| "learning_rate": 0.0001, |
| "loss": 12.0336, |
| "loss/crossentropy": 2.3001043796539307, |
| "loss/hidden": 0.8984375, |
| "loss/logits": 0.11935651302337646, |
| "loss/reg": 8.715702056884766, |
| "loss/twn": 0.0, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.011625, |
| "grad_norm": 52.5, |
| "grad_norm_var": 4926.6212890625, |
| "learning_rate": 0.0001, |
| "loss": 12.3187, |
| "loss/crossentropy": 2.5587501525878906, |
| "loss/hidden": 0.890625, |
| "loss/logits": 0.15386971831321716, |
| "loss/reg": 8.715497016906738, |
| "loss/twn": 0.0, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.01165, |
| "grad_norm": 20.5, |
| "grad_norm_var": 4949.8759765625, |
| "learning_rate": 0.0001, |
| "loss": 12.5849, |
| "loss/crossentropy": 2.6686079502105713, |
| "loss/hidden": 1.015625, |
| "loss/logits": 0.18541887402534485, |
| "loss/reg": 8.715292930603027, |
| "loss/twn": 0.0, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.011675, |
| "grad_norm": 249.0, |
| "grad_norm_var": 7097.640559895834, |
| "learning_rate": 0.0001, |
| "loss": 12.0919, |
| "loss/crossentropy": 2.338578224182129, |
| "loss/hidden": 0.91796875, |
| "loss/logits": 0.12023410946130753, |
| "loss/reg": 8.71507740020752, |
| "loss/twn": 0.0, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.0117, |
| "grad_norm": 47.0, |
| "grad_norm_var": 6971.419205729167, |
| "learning_rate": 0.0001, |
| "loss": 12.3001, |
| "loss/crossentropy": 2.491590738296509, |
| "loss/hidden": 0.97265625, |
| "loss/logits": 0.12098720669746399, |
| "loss/reg": 8.714875221252441, |
| "loss/twn": 0.0, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.011725, |
| "grad_norm": 123.5, |
| "grad_norm_var": 4065.8853515625, |
| "learning_rate": 0.0001, |
| "loss": 12.0627, |
| "loss/crossentropy": 2.3569095134735107, |
| "loss/hidden": 0.84765625, |
| "loss/logits": 0.14348828792572021, |
| "loss/reg": 8.71466064453125, |
| "loss/twn": 0.0, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.01175, |
| "grad_norm": 36.75, |
| "grad_norm_var": 4078.4671223958335, |
| "learning_rate": 0.0001, |
| "loss": 10.5895, |
| "loss/crossentropy": 0.6432969570159912, |
| "loss/hidden": 1.15625, |
| "loss/logits": 0.07552361488342285, |
| "loss/reg": 8.714447975158691, |
| "loss/twn": 0.0, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.011775, |
| "grad_norm": 27.75, |
| "grad_norm_var": 4069.476497395833, |
| "learning_rate": 0.0001, |
| "loss": 11.4347, |
| "loss/crossentropy": 1.3819444179534912, |
| "loss/hidden": 1.2265625, |
| "loss/logits": 0.11192916333675385, |
| "loss/reg": 8.71424674987793, |
| "loss/twn": 0.0, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.0118, |
| "grad_norm": 26.5, |
| "grad_norm_var": 3724.540559895833, |
| "learning_rate": 0.0001, |
| "loss": 12.1273, |
| "loss/crossentropy": 2.2463064193725586, |
| "loss/hidden": 1.03125, |
| "loss/logits": 0.13570484519004822, |
| "loss/reg": 8.714032173156738, |
| "loss/twn": 0.0, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.011825, |
| "grad_norm": 23.875, |
| "grad_norm_var": 3746.1643229166666, |
| "learning_rate": 0.0001, |
| "loss": 12.4604, |
| "loss/crossentropy": 2.60668683052063, |
| "loss/hidden": 0.98046875, |
| "loss/logits": 0.1594240963459015, |
| "loss/reg": 8.713834762573242, |
| "loss/twn": 0.0, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.01185, |
| "grad_norm": 28.75, |
| "grad_norm_var": 3775.433072916667, |
| "learning_rate": 0.0001, |
| "loss": 11.9085, |
| "loss/crossentropy": 2.0397539138793945, |
| "loss/hidden": 1.015625, |
| "loss/logits": 0.13953179121017456, |
| "loss/reg": 8.713619232177734, |
| "loss/twn": 0.0, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.011875, |
| "grad_norm": 79.0, |
| "grad_norm_var": 3775.390625, |
| "learning_rate": 0.0001, |
| "loss": 12.3781, |
| "loss/crossentropy": 2.5645852088928223, |
| "loss/hidden": 0.97265625, |
| "loss/logits": 0.12741456925868988, |
| "loss/reg": 8.713409423828125, |
| "loss/twn": 0.0, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.0119, |
| "grad_norm": 41.0, |
| "grad_norm_var": 3397.190625, |
| "learning_rate": 0.0001, |
| "loss": 11.1537, |
| "loss/crossentropy": 1.2438526153564453, |
| "loss/hidden": 1.09375, |
| "loss/logits": 0.10293398052453995, |
| "loss/reg": 8.713205337524414, |
| "loss/twn": 0.0, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.011925, |
| "grad_norm": 19.5, |
| "grad_norm_var": 3435.023893229167, |
| "learning_rate": 0.0001, |
| "loss": 11.3802, |
| "loss/crossentropy": 1.5274819135665894, |
| "loss/hidden": 1.0390625, |
| "loss/logits": 0.10069362819194794, |
| "loss/reg": 8.712997436523438, |
| "loss/twn": 0.0, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.01195, |
| "grad_norm": 81.0, |
| "grad_norm_var": 3428.5035807291665, |
| "learning_rate": 0.0001, |
| "loss": 11.0851, |
| "loss/crossentropy": 1.0352659225463867, |
| "loss/hidden": 1.25, |
| "loss/logits": 0.0869978666305542, |
| "loss/reg": 8.712800025939941, |
| "loss/twn": 0.0, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.011975, |
| "grad_norm": 117.0, |
| "grad_norm_var": 3573.7395833333335, |
| "learning_rate": 0.0001, |
| "loss": 10.323, |
| "loss/crossentropy": 0.3703620731830597, |
| "loss/hidden": 1.15625, |
| "loss/logits": 0.08379518985748291, |
| "loss/reg": 8.712618827819824, |
| "loss/twn": 0.0, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.012, |
| "grad_norm": 24.5, |
| "grad_norm_var": 3582.9905598958335, |
| "learning_rate": 0.0001, |
| "loss": 12.3542, |
| "loss/crossentropy": 2.651298999786377, |
| "loss/hidden": 0.86328125, |
| "loss/logits": 0.12722676992416382, |
| "loss/reg": 8.71241569519043, |
| "loss/twn": 0.0, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.012025, |
| "grad_norm": 36.5, |
| "grad_norm_var": 3620.0738932291665, |
| "learning_rate": 0.0001, |
| "loss": 12.2006, |
| "loss/crossentropy": 2.4083526134490967, |
| "loss/hidden": 0.91015625, |
| "loss/logits": 0.16988171637058258, |
| "loss/reg": 8.712209701538086, |
| "loss/twn": 0.0, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.01205, |
| "grad_norm": 24.0, |
| "grad_norm_var": 3601.7608723958333, |
| "learning_rate": 0.0001, |
| "loss": 11.6563, |
| "loss/crossentropy": 1.8607200384140015, |
| "loss/hidden": 0.984375, |
| "loss/logits": 0.09915497899055481, |
| "loss/reg": 8.712015151977539, |
| "loss/twn": 0.0, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.012075, |
| "grad_norm": 24.625, |
| "grad_norm_var": 1141.9330729166666, |
| "learning_rate": 0.0001, |
| "loss": 11.5894, |
| "loss/crossentropy": 1.829236388206482, |
| "loss/hidden": 0.94140625, |
| "loss/logits": 0.10693898797035217, |
| "loss/reg": 8.711811065673828, |
| "loss/twn": 0.0, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.0121, |
| "grad_norm": 21.375, |
| "grad_norm_var": 1184.9483723958333, |
| "learning_rate": 0.0001, |
| "loss": 11.9834, |
| "loss/crossentropy": 2.3526086807250977, |
| "loss/hidden": 0.828125, |
| "loss/logits": 0.09103663265705109, |
| "loss/reg": 8.711610794067383, |
| "loss/twn": 0.0, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.012125, |
| "grad_norm": 35.5, |
| "grad_norm_var": 759.3400390625, |
| "learning_rate": 0.0001, |
| "loss": 12.4993, |
| "loss/crossentropy": 2.6666743755340576, |
| "loss/hidden": 0.98828125, |
| "loss/logits": 0.13288551568984985, |
| "loss/reg": 8.711416244506836, |
| "loss/twn": 0.0, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.01215, |
| "grad_norm": 32.25, |
| "grad_norm_var": 762.8416015625, |
| "learning_rate": 0.0001, |
| "loss": 12.2275, |
| "loss/crossentropy": 2.454272747039795, |
| "loss/hidden": 0.93359375, |
| "loss/logits": 0.12845022976398468, |
| "loss/reg": 8.711220741271973, |
| "loss/twn": 0.0, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.012175, |
| "grad_norm": 25.875, |
| "grad_norm_var": 766.17265625, |
| "learning_rate": 0.0001, |
| "loss": 11.5221, |
| "loss/crossentropy": 1.6709328889846802, |
| "loss/hidden": 1.015625, |
| "loss/logits": 0.12450917810201645, |
| "loss/reg": 8.711026191711426, |
| "loss/twn": 0.0, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.0122, |
| "grad_norm": 25.5, |
| "grad_norm_var": 768.0455729166666, |
| "learning_rate": 0.0001, |
| "loss": 12.0402, |
| "loss/crossentropy": 2.1895947456359863, |
| "loss/hidden": 0.98828125, |
| "loss/logits": 0.15148413181304932, |
| "loss/reg": 8.710831642150879, |
| "loss/twn": 0.0, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.012225, |
| "grad_norm": 24.125, |
| "grad_norm_var": 767.5114583333333, |
| "learning_rate": 0.0001, |
| "loss": 12.093, |
| "loss/crossentropy": 2.4588782787323, |
| "loss/hidden": 0.80078125, |
| "loss/logits": 0.12274663895368576, |
| "loss/reg": 8.710626602172852, |
| "loss/twn": 0.0, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.01225, |
| "grad_norm": 19.625, |
| "grad_norm_var": 786.4410807291666, |
| "learning_rate": 0.0001, |
| "loss": 12.183, |
| "loss/crossentropy": 2.4481096267700195, |
| "loss/hidden": 0.91015625, |
| "loss/logits": 0.11435995995998383, |
| "loss/reg": 8.710421562194824, |
| "loss/twn": 0.0, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.012275, |
| "grad_norm": 61.25, |
| "grad_norm_var": 712.5567057291667, |
| "learning_rate": 0.0001, |
| "loss": 12.4142, |
| "loss/crossentropy": 2.686851978302002, |
| "loss/hidden": 0.859375, |
| "loss/logits": 0.1577412486076355, |
| "loss/reg": 8.710216522216797, |
| "loss/twn": 0.0, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.0123, |
| "grad_norm": 21.625, |
| "grad_norm_var": 729.1768229166667, |
| "learning_rate": 0.0001, |
| "loss": 11.9983, |
| "loss/crossentropy": 2.3999531269073486, |
| "loss/hidden": 0.76953125, |
| "loss/logits": 0.11876754462718964, |
| "loss/reg": 8.710010528564453, |
| "loss/twn": 0.0, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.012325, |
| "grad_norm": 29.625, |
| "grad_norm_var": 711.7692057291666, |
| "learning_rate": 0.0001, |
| "loss": 12.8093, |
| "loss/crossentropy": 2.8931031227111816, |
| "loss/hidden": 1.0, |
| "loss/logits": 0.20642593502998352, |
| "loss/reg": 8.709806442260742, |
| "loss/twn": 0.0, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.01235, |
| "grad_norm": 22.0, |
| "grad_norm_var": 589.2827473958333, |
| "learning_rate": 0.0001, |
| "loss": 12.3841, |
| "loss/crossentropy": 2.601548194885254, |
| "loss/hidden": 0.921875, |
| "loss/logits": 0.15107882022857666, |
| "loss/reg": 8.709609985351562, |
| "loss/twn": 0.0, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.012375, |
| "grad_norm": 43.75, |
| "grad_norm_var": 114.83639322916666, |
| "learning_rate": 0.0001, |
| "loss": 12.2726, |
| "loss/crossentropy": 2.4992611408233643, |
| "loss/hidden": 0.9453125, |
| "loss/logits": 0.1186700314283371, |
| "loss/reg": 8.709403991699219, |
| "loss/twn": 0.0, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.0124, |
| "grad_norm": 24.75, |
| "grad_norm_var": 114.67337239583334, |
| "learning_rate": 0.0001, |
| "loss": 10.6561, |
| "loss/crossentropy": 0.766319215297699, |
| "loss/hidden": 1.109375, |
| "loss/logits": 0.07119736075401306, |
| "loss/reg": 8.709196090698242, |
| "loss/twn": 0.0, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.012425, |
| "grad_norm": 37.5, |
| "grad_norm_var": 115.66608072916667, |
| "learning_rate": 0.0001, |
| "loss": 12.1275, |
| "loss/crossentropy": 2.1994762420654297, |
| "loss/hidden": 1.03125, |
| "loss/logits": 0.18780240416526794, |
| "loss/reg": 8.709000587463379, |
| "loss/twn": 0.0, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.01245, |
| "grad_norm": 82.0, |
| "grad_norm_var": 282.7181640625, |
| "learning_rate": 0.0001, |
| "loss": 11.6881, |
| "loss/crossentropy": 1.682827353477478, |
| "loss/hidden": 1.171875, |
| "loss/logits": 0.12458673864603043, |
| "loss/reg": 8.708788871765137, |
| "loss/twn": 0.0, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.012475, |
| "grad_norm": 29.75, |
| "grad_norm_var": 278.4927083333333, |
| "learning_rate": 0.0001, |
| "loss": 11.7241, |
| "loss/crossentropy": 1.737938642501831, |
| "loss/hidden": 1.1328125, |
| "loss/logits": 0.14476825296878815, |
| "loss/reg": 8.708579063415527, |
| "loss/twn": 0.0, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.0125, |
| "grad_norm": 19.625, |
| "grad_norm_var": 281.52057291666665, |
| "learning_rate": 0.0001, |
| "loss": 11.7857, |
| "loss/crossentropy": 2.1153597831726074, |
| "loss/hidden": 0.859375, |
| "loss/logits": 0.10259227454662323, |
| "loss/reg": 8.708370208740234, |
| "loss/twn": 0.0, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.012525, |
| "grad_norm": 25.125, |
| "grad_norm_var": 285.3733723958333, |
| "learning_rate": 0.0001, |
| "loss": 11.9192, |
| "loss/crossentropy": 2.1579036712646484, |
| "loss/hidden": 0.9375, |
| "loss/logits": 0.11566457152366638, |
| "loss/reg": 8.708168029785156, |
| "loss/twn": 0.0, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.01255, |
| "grad_norm": 231.0, |
| "grad_norm_var": 2740.3499348958335, |
| "learning_rate": 0.0001, |
| "loss": 11.0974, |
| "loss/crossentropy": 1.0497229099273682, |
| "loss/hidden": 1.234375, |
| "loss/logits": 0.10528069734573364, |
| "loss/reg": 8.707971572875977, |
| "loss/twn": 0.0, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.012575, |
| "grad_norm": 69.0, |
| "grad_norm_var": 2745.4934895833335, |
| "learning_rate": 0.0001, |
| "loss": 12.1588, |
| "loss/crossentropy": 2.3528451919555664, |
| "loss/hidden": 0.97265625, |
| "loss/logits": 0.1255016028881073, |
| "loss/reg": 8.70776081085205, |
| "loss/twn": 0.0, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.0126, |
| "grad_norm": 40.0, |
| "grad_norm_var": 2715.3455729166667, |
| "learning_rate": 0.0001, |
| "loss": 11.2146, |
| "loss/crossentropy": 1.2129493951797485, |
| "loss/hidden": 1.2109375, |
| "loss/logits": 0.0832081139087677, |
| "loss/reg": 8.707552909851074, |
| "loss/twn": 0.0, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.012625, |
| "grad_norm": 103.0, |
| "grad_norm_var": 2844.7087890625, |
| "learning_rate": 0.0001, |
| "loss": 11.6204, |
| "loss/crossentropy": 1.8400676250457764, |
| "loss/hidden": 0.96875, |
| "loss/logits": 0.10419435799121857, |
| "loss/reg": 8.707341194152832, |
| "loss/twn": 0.0, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.01265, |
| "grad_norm": 31.75, |
| "grad_norm_var": 2798.76640625, |
| "learning_rate": 0.0001, |
| "loss": 12.4942, |
| "loss/crossentropy": 2.7121334075927734, |
| "loss/hidden": 0.9375, |
| "loss/logits": 0.13741296529769897, |
| "loss/reg": 8.707144737243652, |
| "loss/twn": 0.0, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.012675, |
| "grad_norm": 280.0, |
| "grad_norm_var": 5986.819791666667, |
| "learning_rate": 0.0001, |
| "loss": 12.5207, |
| "loss/crossentropy": 2.7740046977996826, |
| "loss/hidden": 0.90625, |
| "loss/logits": 0.13350710272789001, |
| "loss/reg": 8.706952095031738, |
| "loss/twn": 0.0, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.0127, |
| "grad_norm": 50.0, |
| "grad_norm_var": 5861.0978515625, |
| "learning_rate": 0.0001, |
| "loss": 10.3886, |
| "loss/crossentropy": 0.3818214535713196, |
| "loss/hidden": 1.21875, |
| "loss/logits": 0.08132871985435486, |
| "loss/reg": 8.706746101379395, |
| "loss/twn": 0.0, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.012725, |
| "grad_norm": 25.875, |
| "grad_norm_var": 5882.1291015625, |
| "learning_rate": 0.0001, |
| "loss": 11.766, |
| "loss/crossentropy": 1.9122241735458374, |
| "loss/hidden": 1.015625, |
| "loss/logits": 0.1316186636686325, |
| "loss/reg": 8.706555366516113, |
| "loss/twn": 0.0, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.01275, |
| "grad_norm": 37.5, |
| "grad_norm_var": 5798.5744140625, |
| "learning_rate": 0.0001, |
| "loss": 12.1219, |
| "loss/crossentropy": 2.3736658096313477, |
| "loss/hidden": 0.92578125, |
| "loss/logits": 0.11605073511600494, |
| "loss/reg": 8.706363677978516, |
| "loss/twn": 0.0, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.012775, |
| "grad_norm": 23.625, |
| "grad_norm_var": 5896.107291666666, |
| "learning_rate": 0.0001, |
| "loss": 11.4842, |
| "loss/crossentropy": 1.5143873691558838, |
| "loss/hidden": 1.171875, |
| "loss/logits": 0.09178682416677475, |
| "loss/reg": 8.7061767578125, |
| "loss/twn": 0.0, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.0128, |
| "grad_norm": 23.375, |
| "grad_norm_var": 5904.412434895833, |
| "learning_rate": 0.0001, |
| "loss": 11.4694, |
| "loss/crossentropy": 1.6677556037902832, |
| "loss/hidden": 1.0078125, |
| "loss/logits": 0.08782754838466644, |
| "loss/reg": 8.705994606018066, |
| "loss/twn": 0.0, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.012825, |
| "grad_norm": 26.0, |
| "grad_norm_var": 5961.469205729167, |
| "learning_rate": 0.0001, |
| "loss": 10.5775, |
| "loss/crossentropy": 0.7001097798347473, |
| "loss/hidden": 1.1015625, |
| "loss/logits": 0.07007156312465668, |
| "loss/reg": 8.705804824829102, |
| "loss/twn": 0.0, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.01285, |
| "grad_norm": 21.5, |
| "grad_norm_var": 6082.1541015625, |
| "learning_rate": 0.0001, |
| "loss": 12.1993, |
| "loss/crossentropy": 2.4436159133911133, |
| "loss/hidden": 0.91796875, |
| "loss/logits": 0.13212494552135468, |
| "loss/reg": 8.705628395080566, |
| "loss/twn": 0.0, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.012875, |
| "grad_norm": 23.5, |
| "grad_norm_var": 6113.820768229167, |
| "learning_rate": 0.0001, |
| "loss": 12.3681, |
| "loss/crossentropy": 2.6185691356658936, |
| "loss/hidden": 0.921875, |
| "loss/logits": 0.12216044217348099, |
| "loss/reg": 8.705458641052246, |
| "loss/twn": 0.0, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.0129, |
| "grad_norm": 23.25, |
| "grad_norm_var": 6092.986458333334, |
| "learning_rate": 0.0001, |
| "loss": 12.2262, |
| "loss/crossentropy": 2.3002281188964844, |
| "loss/hidden": 1.0859375, |
| "loss/logits": 0.13479754328727722, |
| "loss/reg": 8.705265998840332, |
| "loss/twn": 0.0, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.012925, |
| "grad_norm": 18.25, |
| "grad_norm_var": 6132.1775390625, |
| "learning_rate": 0.0001, |
| "loss": 12.4675, |
| "loss/crossentropy": 2.8178443908691406, |
| "loss/hidden": 0.8203125, |
| "loss/logits": 0.12422126531600952, |
| "loss/reg": 8.705097198486328, |
| "loss/twn": 0.0, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.01295, |
| "grad_norm": 102.0, |
| "grad_norm_var": 4303.7369140625, |
| "learning_rate": 0.0001, |
| "loss": 11.6232, |
| "loss/crossentropy": 1.9060938358306885, |
| "loss/hidden": 0.89453125, |
| "loss/logits": 0.11764118075370789, |
| "loss/reg": 8.704919815063477, |
| "loss/twn": 0.0, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.012975, |
| "grad_norm": 39.0, |
| "grad_norm_var": 4308.6431640625, |
| "learning_rate": 0.0001, |
| "loss": 11.541, |
| "loss/crossentropy": 1.7360622882843018, |
| "loss/hidden": 0.97265625, |
| "loss/logits": 0.1275629997253418, |
| "loss/reg": 8.704737663269043, |
| "loss/twn": 0.0, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.013, |
| "grad_norm": 26.125, |
| "grad_norm_var": 4347.11015625, |
| "learning_rate": 0.0001, |
| "loss": 11.5557, |
| "loss/crossentropy": 1.7143359184265137, |
| "loss/hidden": 1.0390625, |
| "loss/logits": 0.0978047251701355, |
| "loss/reg": 8.704544067382812, |
| "loss/twn": 0.0, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.013025, |
| "grad_norm": 187.0, |
| "grad_norm_var": 5343.38515625, |
| "learning_rate": 0.0001, |
| "loss": 12.2438, |
| "loss/crossentropy": 2.5768113136291504, |
| "loss/hidden": 0.82421875, |
| "loss/logits": 0.13841256499290466, |
| "loss/reg": 8.704363822937012, |
| "loss/twn": 0.0, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.01305, |
| "grad_norm": 51.25, |
| "grad_norm_var": 5297.15390625, |
| "learning_rate": 0.0001, |
| "loss": 11.0756, |
| "loss/crossentropy": 0.9725221395492554, |
| "loss/hidden": 1.3359375, |
| "loss/logits": 0.06297647953033447, |
| "loss/reg": 8.704182624816895, |
| "loss/twn": 0.0, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.013075, |
| "grad_norm": 30.875, |
| "grad_norm_var": 1864.8082682291667, |
| "learning_rate": 0.0001, |
| "loss": 12.0271, |
| "loss/crossentropy": 2.140617609024048, |
| "loss/hidden": 1.0546875, |
| "loss/logits": 0.1278156340122223, |
| "loss/reg": 8.703999519348145, |
| "loss/twn": 0.0, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.0131, |
| "grad_norm": 21.625, |
| "grad_norm_var": 1893.64140625, |
| "learning_rate": 0.0001, |
| "loss": 12.4328, |
| "loss/crossentropy": 2.7558345794677734, |
| "loss/hidden": 0.8359375, |
| "loss/logits": 0.13716721534729004, |
| "loss/reg": 8.703822135925293, |
| "loss/twn": 0.0, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.013125, |
| "grad_norm": 41.25, |
| "grad_norm_var": 1874.2384765625, |
| "learning_rate": 0.0001, |
| "loss": 11.5497, |
| "loss/crossentropy": 1.4624669551849365, |
| "loss/hidden": 1.2890625, |
| "loss/logits": 0.0945538729429245, |
| "loss/reg": 8.703641891479492, |
| "loss/twn": 0.0, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.01315, |
| "grad_norm": 149.0, |
| "grad_norm_var": 2561.937955729167, |
| "learning_rate": 0.0001, |
| "loss": 12.4636, |
| "loss/crossentropy": 2.778942346572876, |
| "loss/hidden": 0.84765625, |
| "loss/logits": 0.13353455066680908, |
| "loss/reg": 8.703462600708008, |
| "loss/twn": 0.0, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.013175, |
| "grad_norm": 25.875, |
| "grad_norm_var": 2554.1988932291665, |
| "learning_rate": 0.0001, |
| "loss": 11.1972, |
| "loss/crossentropy": 1.4794940948486328, |
| "loss/hidden": 0.92578125, |
| "loss/logits": 0.08861669898033142, |
| "loss/reg": 8.703286170959473, |
| "loss/twn": 0.0, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.0132, |
| "grad_norm": 120.5, |
| "grad_norm_var": 2790.991666666667, |
| "learning_rate": 0.0001, |
| "loss": 11.0079, |
| "loss/crossentropy": 0.36047661304473877, |
| "loss/hidden": 1.890625, |
| "loss/logits": 0.05367514491081238, |
| "loss/reg": 8.703109741210938, |
| "loss/twn": 0.0, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.013225, |
| "grad_norm": 28.5, |
| "grad_norm_var": 2781.153125, |
| "learning_rate": 0.0001, |
| "loss": 12.1083, |
| "loss/crossentropy": 2.313814640045166, |
| "loss/hidden": 0.9609375, |
| "loss/logits": 0.1306000053882599, |
| "loss/reg": 8.702930450439453, |
| "loss/twn": 0.0, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.01325, |
| "grad_norm": 33.25, |
| "grad_norm_var": 2734.41015625, |
| "learning_rate": 0.0001, |
| "loss": 11.4673, |
| "loss/crossentropy": 1.5691964626312256, |
| "loss/hidden": 1.078125, |
| "loss/logits": 0.11722072958946228, |
| "loss/reg": 8.70274543762207, |
| "loss/twn": 0.0, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.013275, |
| "grad_norm": 23.375, |
| "grad_norm_var": 2734.9791015625, |
| "learning_rate": 0.0001, |
| "loss": 12.1244, |
| "loss/crossentropy": 2.4872567653656006, |
| "loss/hidden": 0.81640625, |
| "loss/logits": 0.11812958121299744, |
| "loss/reg": 8.702580451965332, |
| "loss/twn": 0.0, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.0133, |
| "grad_norm": 23.375, |
| "grad_norm_var": 2734.4080729166667, |
| "learning_rate": 0.0001, |
| "loss": 11.9969, |
| "loss/crossentropy": 2.3968935012817383, |
| "loss/hidden": 0.79296875, |
| "loss/logits": 0.10464372485876083, |
| "loss/reg": 8.702404975891113, |
| "loss/twn": 0.0, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.013325, |
| "grad_norm": 30.125, |
| "grad_norm_var": 2680.9520182291667, |
| "learning_rate": 0.0001, |
| "loss": 11.2076, |
| "loss/crossentropy": 1.4471901655197144, |
| "loss/hidden": 0.9609375, |
| "loss/logits": 0.09723718464374542, |
| "loss/reg": 8.702223777770996, |
| "loss/twn": 0.0, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.01335, |
| "grad_norm": 24.375, |
| "grad_norm_var": 2605.469791666667, |
| "learning_rate": 0.0001, |
| "loss": 12.1741, |
| "loss/crossentropy": 2.546072244644165, |
| "loss/hidden": 0.828125, |
| "loss/logits": 0.09783760458230972, |
| "loss/reg": 8.70203971862793, |
| "loss/twn": 0.0, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.013375, |
| "grad_norm": 19.75, |
| "grad_norm_var": 2665.76640625, |
| "learning_rate": 0.0001, |
| "loss": 12.1487, |
| "loss/crossentropy": 2.499176263809204, |
| "loss/hidden": 0.828125, |
| "loss/logits": 0.11950027197599411, |
| "loss/reg": 8.70186996459961, |
| "loss/twn": 0.0, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.0134, |
| "grad_norm": 17.625, |
| "grad_norm_var": 2699.9080729166667, |
| "learning_rate": 0.0001, |
| "loss": 11.3941, |
| "loss/crossentropy": 1.587836503982544, |
| "loss/hidden": 1.015625, |
| "loss/logits": 0.08900366723537445, |
| "loss/reg": 8.701681137084961, |
| "loss/twn": 0.0, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.013425, |
| "grad_norm": 24.125, |
| "grad_norm_var": 1420.4061848958333, |
| "learning_rate": 0.0001, |
| "loss": 11.2736, |
| "loss/crossentropy": 1.4561734199523926, |
| "loss/hidden": 1.015625, |
| "loss/logits": 0.10029296576976776, |
| "loss/reg": 8.701506614685059, |
| "loss/twn": 0.0, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.01345, |
| "grad_norm": 48.5, |
| "grad_norm_var": 1417.3238932291667, |
| "learning_rate": 0.0001, |
| "loss": 12.6582, |
| "loss/crossentropy": 2.860567808151245, |
| "loss/hidden": 0.9375, |
| "loss/logits": 0.15878960490226746, |
| "loss/reg": 8.70134162902832, |
| "loss/twn": 0.0, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.013475, |
| "grad_norm": 26.25, |
| "grad_norm_var": 1425.140625, |
| "learning_rate": 0.0001, |
| "loss": 10.969, |
| "loss/crossentropy": 0.8018643260002136, |
| "loss/hidden": 1.3828125, |
| "loss/logits": 0.08321318030357361, |
| "loss/reg": 8.701154708862305, |
| "loss/twn": 0.0, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.0135, |
| "grad_norm": 31.25, |
| "grad_norm_var": 1405.9457682291666, |
| "learning_rate": 0.0001, |
| "loss": 11.3681, |
| "loss/crossentropy": 1.428029179573059, |
| "loss/hidden": 1.1171875, |
| "loss/logits": 0.1218872219324112, |
| "loss/reg": 8.700972557067871, |
| "loss/twn": 0.0, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.013525, |
| "grad_norm": 32.5, |
| "grad_norm_var": 1411.2504557291666, |
| "learning_rate": 0.0001, |
| "loss": 12.4834, |
| "loss/crossentropy": 2.6827263832092285, |
| "loss/hidden": 0.9609375, |
| "loss/logits": 0.138902485370636, |
| "loss/reg": 8.700789451599121, |
| "loss/twn": 0.0, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.01355, |
| "grad_norm": 26.25, |
| "grad_norm_var": 587.8025390625, |
| "learning_rate": 0.0001, |
| "loss": 11.967, |
| "loss/crossentropy": 2.143488883972168, |
| "loss/hidden": 0.9921875, |
| "loss/logits": 0.13070911169052124, |
| "loss/reg": 8.70060920715332, |
| "loss/twn": 0.0, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.013575, |
| "grad_norm": 40.75, |
| "grad_norm_var": 586.5552083333333, |
| "learning_rate": 0.0001, |
| "loss": 12.6355, |
| "loss/crossentropy": 2.781853675842285, |
| "loss/hidden": 1.0, |
| "loss/logits": 0.15318265557289124, |
| "loss/reg": 8.700432777404785, |
| "loss/twn": 0.0, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.0136, |
| "grad_norm": 29.375, |
| "grad_norm_var": 59.50149739583333, |
| "learning_rate": 0.0001, |
| "loss": 12.4966, |
| "loss/crossentropy": 2.6565492153167725, |
| "loss/hidden": 0.94921875, |
| "loss/logits": 0.19062356650829315, |
| "loss/reg": 8.700250625610352, |
| "loss/twn": 0.0, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.013625, |
| "grad_norm": 24.125, |
| "grad_norm_var": 60.82083333333333, |
| "learning_rate": 0.0001, |
| "loss": 11.9314, |
| "loss/crossentropy": 2.328007698059082, |
| "loss/hidden": 0.78515625, |
| "loss/logits": 0.11813446879386902, |
| "loss/reg": 8.700064659118652, |
| "loss/twn": 0.0, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.01365, |
| "grad_norm": 24.0, |
| "grad_norm_var": 60.233072916666664, |
| "learning_rate": 0.0001, |
| "loss": 10.8885, |
| "loss/crossentropy": 0.9960370659828186, |
| "loss/hidden": 1.125, |
| "loss/logits": 0.06760472059249878, |
| "loss/reg": 8.699880599975586, |
| "loss/twn": 0.0, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.013675, |
| "grad_norm": 38.75, |
| "grad_norm_var": 65.81451822916667, |
| "learning_rate": 0.0001, |
| "loss": 12.266, |
| "loss/crossentropy": 2.311028480529785, |
| "loss/hidden": 1.0234375, |
| "loss/logits": 0.23187388479709625, |
| "loss/reg": 8.699692726135254, |
| "loss/twn": 0.0, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.0137, |
| "grad_norm": 88.5, |
| "grad_norm_var": 283.61015625, |
| "learning_rate": 0.0001, |
| "loss": 12.3981, |
| "loss/crossentropy": 2.518186569213867, |
| "loss/hidden": 1.046875, |
| "loss/logits": 0.133548766374588, |
| "loss/reg": 8.699499130249023, |
| "loss/twn": 0.0, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.013725, |
| "grad_norm": 18.625, |
| "grad_norm_var": 296.11640625, |
| "learning_rate": 0.0001, |
| "loss": 11.9986, |
| "loss/crossentropy": 2.3577754497528076, |
| "loss/hidden": 0.82421875, |
| "loss/logits": 0.11731864511966705, |
| "loss/reg": 8.69931411743164, |
| "loss/twn": 0.0, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.01375, |
| "grad_norm": 35.5, |
| "grad_norm_var": 292.28639322916666, |
| "learning_rate": 0.0001, |
| "loss": 12.4385, |
| "loss/crossentropy": 2.4622979164123535, |
| "loss/hidden": 1.140625, |
| "loss/logits": 0.13648264110088348, |
| "loss/reg": 8.699124336242676, |
| "loss/twn": 0.0, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.013775, |
| "grad_norm": 40.25, |
| "grad_norm_var": 282.69837239583336, |
| "learning_rate": 0.0001, |
| "loss": 10.3123, |
| "loss/crossentropy": 0.25961267948150635, |
| "loss/hidden": 1.296875, |
| "loss/logits": 0.056895509362220764, |
| "loss/reg": 8.698928833007812, |
| "loss/twn": 0.0, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.0138, |
| "grad_norm": 49.5, |
| "grad_norm_var": 275.97473958333336, |
| "learning_rate": 0.0001, |
| "loss": 12.3088, |
| "loss/crossentropy": 2.631776809692383, |
| "loss/hidden": 0.86328125, |
| "loss/logits": 0.1149827241897583, |
| "loss/reg": 8.698740005493164, |
| "loss/twn": 0.0, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.013825, |
| "grad_norm": 22.5, |
| "grad_norm_var": 278.7431640625, |
| "learning_rate": 0.0001, |
| "loss": 11.8021, |
| "loss/crossentropy": 2.0061707496643066, |
| "loss/hidden": 0.98046875, |
| "loss/logits": 0.11686154454946518, |
| "loss/reg": 8.698551177978516, |
| "loss/twn": 0.0, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.01385, |
| "grad_norm": 68.0, |
| "grad_norm_var": 334.9072265625, |
| "learning_rate": 0.0001, |
| "loss": 11.309, |
| "loss/crossentropy": 1.4852938652038574, |
| "loss/hidden": 1.0234375, |
| "loss/logits": 0.10191912949085236, |
| "loss/reg": 8.698369979858398, |
| "loss/twn": 0.0, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.013875, |
| "grad_norm": 28.0, |
| "grad_norm_var": 332.53014322916664, |
| "learning_rate": 0.0001, |
| "loss": 11.9493, |
| "loss/crossentropy": 2.19571852684021, |
| "loss/hidden": 0.9140625, |
| "loss/logits": 0.14134089648723602, |
| "loss/reg": 8.698185920715332, |
| "loss/twn": 0.0, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.0139, |
| "grad_norm": 21.125, |
| "grad_norm_var": 347.19557291666666, |
| "learning_rate": 0.0001, |
| "loss": 11.1502, |
| "loss/crossentropy": 1.1655678749084473, |
| "loss/hidden": 1.1953125, |
| "loss/logits": 0.09131427109241486, |
| "loss/reg": 8.697993278503418, |
| "loss/twn": 0.0, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.013925, |
| "grad_norm": 31.375, |
| "grad_norm_var": 347.9098307291667, |
| "learning_rate": 0.0001, |
| "loss": 12.4343, |
| "loss/crossentropy": 2.752174139022827, |
| "loss/hidden": 0.85546875, |
| "loss/logits": 0.1288745105266571, |
| "loss/reg": 8.69781494140625, |
| "loss/twn": 0.0, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.01395, |
| "grad_norm": 30.75, |
| "grad_norm_var": 342.92701822916666, |
| "learning_rate": 0.0001, |
| "loss": 11.5274, |
| "loss/crossentropy": 1.6675015687942505, |
| "loss/hidden": 1.03125, |
| "loss/logits": 0.13104400038719177, |
| "loss/reg": 8.697635650634766, |
| "loss/twn": 0.0, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.013975, |
| "grad_norm": 24.0, |
| "grad_norm_var": 351.9650390625, |
| "learning_rate": 0.0001, |
| "loss": 12.0512, |
| "loss/crossentropy": 2.3672361373901367, |
| "loss/hidden": 0.875, |
| "loss/logits": 0.11149340867996216, |
| "loss/reg": 8.697444915771484, |
| "loss/twn": 0.0, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.014, |
| "grad_norm": 40.5, |
| "grad_norm_var": 350.0239583333333, |
| "learning_rate": 0.0001, |
| "loss": 12.5563, |
| "loss/crossentropy": 2.80849289894104, |
| "loss/hidden": 0.9296875, |
| "loss/logits": 0.12084785103797913, |
| "loss/reg": 8.697265625, |
| "loss/twn": 0.0, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.014025, |
| "grad_norm": 72.0, |
| "grad_norm_var": 413.68274739583336, |
| "learning_rate": 0.0001, |
| "loss": 10.3992, |
| "loss/crossentropy": 0.27454060316085815, |
| "loss/hidden": 1.390625, |
| "loss/logits": 0.036948833614587784, |
| "loss/reg": 8.69708251953125, |
| "loss/twn": 0.0, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.01405, |
| "grad_norm": 26.875, |
| "grad_norm_var": 408.22473958333336, |
| "learning_rate": 0.0001, |
| "loss": 11.5059, |
| "loss/crossentropy": 1.6076158285140991, |
| "loss/hidden": 1.0703125, |
| "loss/logits": 0.13113059103488922, |
| "loss/reg": 8.696890830993652, |
| "loss/twn": 0.0, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.014075, |
| "grad_norm": 29.0, |
| "grad_norm_var": 415.48645833333336, |
| "learning_rate": 0.0001, |
| "loss": 11.8697, |
| "loss/crossentropy": 2.2221200466156006, |
| "loss/hidden": 0.8203125, |
| "loss/logits": 0.13051408529281616, |
| "loss/reg": 8.696711540222168, |
| "loss/twn": 0.0, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.0141, |
| "grad_norm": 33.75, |
| "grad_norm_var": 242.62473958333334, |
| "learning_rate": 0.0001, |
| "loss": 12.064, |
| "loss/crossentropy": 2.298898935317993, |
| "loss/hidden": 0.953125, |
| "loss/logits": 0.11549080908298492, |
| "loss/reg": 8.696532249450684, |
| "loss/twn": 0.0, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.014125, |
| "grad_norm": 37.75, |
| "grad_norm_var": 221.85618489583334, |
| "learning_rate": 0.0001, |
| "loss": 12.5257, |
| "loss/crossentropy": 2.73765230178833, |
| "loss/hidden": 0.9609375, |
| "loss/logits": 0.13077020645141602, |
| "loss/reg": 8.696345329284668, |
| "loss/twn": 0.0, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.01415, |
| "grad_norm": 22.625, |
| "grad_norm_var": 234.67083333333332, |
| "learning_rate": 0.0001, |
| "loss": 11.4166, |
| "loss/crossentropy": 1.6072018146514893, |
| "loss/hidden": 0.9921875, |
| "loss/logits": 0.12108200043439865, |
| "loss/reg": 8.69616413116455, |
| "loss/twn": 0.0, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.014175, |
| "grad_norm": 31.875, |
| "grad_norm_var": 234.44837239583333, |
| "learning_rate": 0.0001, |
| "loss": 12.5681, |
| "loss/crossentropy": 2.7330384254455566, |
| "loss/hidden": 0.9609375, |
| "loss/logits": 0.1781395524740219, |
| "loss/reg": 8.6959810256958, |
| "loss/twn": 0.0, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.0142, |
| "grad_norm": 25.375, |
| "grad_norm_var": 226.11770833333333, |
| "learning_rate": 0.0001, |
| "loss": 11.513, |
| "loss/crossentropy": 1.5385918617248535, |
| "loss/hidden": 1.171875, |
| "loss/logits": 0.10675453394651413, |
| "loss/reg": 8.695802688598633, |
| "loss/twn": 0.0, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.014225, |
| "grad_norm": 23.25, |
| "grad_norm_var": 224.99348958333334, |
| "learning_rate": 0.0001, |
| "loss": 12.1126, |
| "loss/crossentropy": 2.372486114501953, |
| "loss/hidden": 0.890625, |
| "loss/logits": 0.15382985770702362, |
| "loss/reg": 8.695622444152832, |
| "loss/twn": 0.0, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.01425, |
| "grad_norm": 32.25, |
| "grad_norm_var": 143.47604166666667, |
| "learning_rate": 0.0001, |
| "loss": 10.5734, |
| "loss/crossentropy": 0.7162877917289734, |
| "loss/hidden": 1.1015625, |
| "loss/logits": 0.06007448583841324, |
| "loss/reg": 8.695448875427246, |
| "loss/twn": 0.0, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.014275, |
| "grad_norm": 48.0, |
| "grad_norm_var": 158.059375, |
| "learning_rate": 0.0001, |
| "loss": 10.6877, |
| "loss/crossentropy": 0.8484583497047424, |
| "loss/hidden": 1.0625, |
| "loss/logits": 0.08146172761917114, |
| "loss/reg": 8.695268630981445, |
| "loss/twn": 0.0, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.0143, |
| "grad_norm": 36.0, |
| "grad_norm_var": 148.02649739583333, |
| "learning_rate": 0.0001, |
| "loss": 11.8689, |
| "loss/crossentropy": 2.1656343936920166, |
| "loss/hidden": 0.90234375, |
| "loss/logits": 0.10579562187194824, |
| "loss/reg": 8.695083618164062, |
| "loss/twn": 0.0, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.014325, |
| "grad_norm": 21.0, |
| "grad_norm_var": 158.50416666666666, |
| "learning_rate": 0.0001, |
| "loss": 11.7985, |
| "loss/crossentropy": 2.0617969036102295, |
| "loss/hidden": 0.93359375, |
| "loss/logits": 0.10816241800785065, |
| "loss/reg": 8.694910049438477, |
| "loss/twn": 0.0, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.01435, |
| "grad_norm": 24.0, |
| "grad_norm_var": 163.77057291666668, |
| "learning_rate": 0.0001, |
| "loss": 12.4349, |
| "loss/crossentropy": 2.6965558528900146, |
| "loss/hidden": 0.90625, |
| "loss/logits": 0.1373385787010193, |
| "loss/reg": 8.694724082946777, |
| "loss/twn": 0.0, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.014375, |
| "grad_norm": 29.125, |
| "grad_norm_var": 159.25149739583333, |
| "learning_rate": 0.0001, |
| "loss": 11.1658, |
| "loss/crossentropy": 1.3206909894943237, |
| "loss/hidden": 1.0546875, |
| "loss/logits": 0.09583240002393723, |
| "loss/reg": 8.694555282592773, |
| "loss/twn": 0.0, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.0144, |
| "grad_norm": 30.25, |
| "grad_norm_var": 156.02701822916666, |
| "learning_rate": 0.0001, |
| "loss": 11.4961, |
| "loss/crossentropy": 1.7263752222061157, |
| "loss/hidden": 0.9765625, |
| "loss/logits": 0.09877104312181473, |
| "loss/reg": 8.694381713867188, |
| "loss/twn": 0.0, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.014425, |
| "grad_norm": 20.0, |
| "grad_norm_var": 52.51451822916667, |
| "learning_rate": 0.0001, |
| "loss": 12.1905, |
| "loss/crossentropy": 2.4956769943237305, |
| "loss/hidden": 0.87109375, |
| "loss/logits": 0.12949687242507935, |
| "loss/reg": 8.694199562072754, |
| "loss/twn": 0.0, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.01445, |
| "grad_norm": 25.375, |
| "grad_norm_var": 53.16920572916667, |
| "learning_rate": 0.0001, |
| "loss": 11.0508, |
| "loss/crossentropy": 1.3034043312072754, |
| "loss/hidden": 0.9765625, |
| "loss/logits": 0.07680986076593399, |
| "loss/reg": 8.694019317626953, |
| "loss/twn": 0.0, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.014475, |
| "grad_norm": 23.25, |
| "grad_norm_var": 55.50514322916667, |
| "learning_rate": 0.0001, |
| "loss": 12.5402, |
| "loss/crossentropy": 2.751810073852539, |
| "loss/hidden": 0.9609375, |
| "loss/logits": 0.1336486041545868, |
| "loss/reg": 8.693835258483887, |
| "loss/twn": 0.0, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.0145, |
| "grad_norm": 23.75, |
| "grad_norm_var": 55.41139322916667, |
| "learning_rate": 0.0001, |
| "loss": 10.9479, |
| "loss/crossentropy": 1.3105086088180542, |
| "loss/hidden": 0.8515625, |
| "loss/logits": 0.09222778677940369, |
| "loss/reg": 8.693650245666504, |
| "loss/twn": 0.0, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.014525, |
| "grad_norm": 26.125, |
| "grad_norm_var": 49.31432291666667, |
| "learning_rate": 0.0001, |
| "loss": 11.2555, |
| "loss/crossentropy": 1.4206476211547852, |
| "loss/hidden": 1.0078125, |
| "loss/logits": 0.13361580669879913, |
| "loss/reg": 8.693469047546387, |
| "loss/twn": 0.0, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.01455, |
| "grad_norm": 24.25, |
| "grad_norm_var": 48.392643229166666, |
| "learning_rate": 0.0001, |
| "loss": 10.9372, |
| "loss/crossentropy": 1.1883257627487183, |
| "loss/hidden": 0.921875, |
| "loss/logits": 0.13372007012367249, |
| "loss/reg": 8.693286895751953, |
| "loss/twn": 0.0, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.014575, |
| "grad_norm": 23.0, |
| "grad_norm_var": 48.425, |
| "learning_rate": 0.0001, |
| "loss": 11.1282, |
| "loss/crossentropy": 1.472751498222351, |
| "loss/hidden": 0.87890625, |
| "loss/logits": 0.0834188461303711, |
| "loss/reg": 8.69310474395752, |
| "loss/twn": 0.0, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.0146, |
| "grad_norm": 107.0, |
| "grad_norm_var": 445.11399739583334, |
| "learning_rate": 0.0001, |
| "loss": 12.3255, |
| "loss/crossentropy": 2.5852210521698, |
| "loss/hidden": 0.92578125, |
| "loss/logits": 0.1215391531586647, |
| "loss/reg": 8.692915916442871, |
| "loss/twn": 0.0, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.014625, |
| "grad_norm": 36.25, |
| "grad_norm_var": 440.0087890625, |
| "learning_rate": 0.0001, |
| "loss": 12.0515, |
| "loss/crossentropy": 2.2019858360290527, |
| "loss/hidden": 1.046875, |
| "loss/logits": 0.10992968082427979, |
| "loss/reg": 8.692720413208008, |
| "loss/twn": 0.0, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.01465, |
| "grad_norm": 22.5, |
| "grad_norm_var": 447.0572265625, |
| "learning_rate": 0.0001, |
| "loss": 12.1669, |
| "loss/crossentropy": 2.5254509449005127, |
| "loss/hidden": 0.84375, |
| "loss/logits": 0.10511834174394608, |
| "loss/reg": 8.69253921508789, |
| "loss/twn": 0.0, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.014675, |
| "grad_norm": 20.25, |
| "grad_norm_var": 437.8072265625, |
| "learning_rate": 0.0001, |
| "loss": 11.0366, |
| "loss/crossentropy": 1.4436157941818237, |
| "loss/hidden": 0.8125, |
| "loss/logits": 0.08815399557352066, |
| "loss/reg": 8.692355155944824, |
| "loss/twn": 0.0, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.0147, |
| "grad_norm": 31.5, |
| "grad_norm_var": 435.9275390625, |
| "learning_rate": 0.0001, |
| "loss": 12.1994, |
| "loss/crossentropy": 2.47015118598938, |
| "loss/hidden": 0.90625, |
| "loss/logits": 0.13078603148460388, |
| "loss/reg": 8.692172050476074, |
| "loss/twn": 0.0, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.014725, |
| "grad_norm": 43.75, |
| "grad_norm_var": 439.5296223958333, |
| "learning_rate": 0.0001, |
| "loss": 10.8588, |
| "loss/crossentropy": 0.8713623285293579, |
| "loss/hidden": 1.25, |
| "loss/logits": 0.045468080788850784, |
| "loss/reg": 8.691998481750488, |
| "loss/twn": 0.0, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.01475, |
| "grad_norm": 24.875, |
| "grad_norm_var": 438.65598958333334, |
| "learning_rate": 0.0001, |
| "loss": 12.0896, |
| "loss/crossentropy": 2.4670262336730957, |
| "loss/hidden": 0.796875, |
| "loss/logits": 0.13388851284980774, |
| "loss/reg": 8.691813468933105, |
| "loss/twn": 0.0, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.014775, |
| "grad_norm": 22.625, |
| "grad_norm_var": 443.74765625, |
| "learning_rate": 0.0001, |
| "loss": 11.258, |
| "loss/crossentropy": 1.4507060050964355, |
| "loss/hidden": 1.0390625, |
| "loss/logits": 0.07661449909210205, |
| "loss/reg": 8.69162368774414, |
| "loss/twn": 0.0, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.0148, |
| "grad_norm": 59.5, |
| "grad_norm_var": 492.1625, |
| "learning_rate": 0.0001, |
| "loss": 12.4811, |
| "loss/crossentropy": 2.834090232849121, |
| "loss/hidden": 0.83203125, |
| "loss/logits": 0.12357810139656067, |
| "loss/reg": 8.691445350646973, |
| "loss/twn": 0.0, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.014825, |
| "grad_norm": 27.75, |
| "grad_norm_var": 482.09557291666664, |
| "learning_rate": 0.0001, |
| "loss": 11.4436, |
| "loss/crossentropy": 1.543041706085205, |
| "loss/hidden": 1.09375, |
| "loss/logits": 0.115507572889328, |
| "loss/reg": 8.691254615783691, |
| "loss/twn": 0.0, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.01485, |
| "grad_norm": 27.5, |
| "grad_norm_var": 479.97389322916666, |
| "learning_rate": 0.0001, |
| "loss": 11.1017, |
| "loss/crossentropy": 1.1550602912902832, |
| "loss/hidden": 1.15625, |
| "loss/logits": 0.09931459277868271, |
| "loss/reg": 8.691068649291992, |
| "loss/twn": 0.0, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.014875, |
| "grad_norm": 31.5, |
| "grad_norm_var": 472.41139322916666, |
| "learning_rate": 0.0001, |
| "loss": 12.3913, |
| "loss/crossentropy": 2.759997844696045, |
| "loss/hidden": 0.796875, |
| "loss/logits": 0.1435195505619049, |
| "loss/reg": 8.69089126586914, |
| "loss/twn": 0.0, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.0149, |
| "grad_norm": 29.875, |
| "grad_norm_var": 465.97057291666664, |
| "learning_rate": 0.0001, |
| "loss": 11.1387, |
| "loss/crossentropy": 1.0819698572158813, |
| "loss/hidden": 1.21875, |
| "loss/logits": 0.1472449004650116, |
| "loss/reg": 8.690725326538086, |
| "loss/twn": 0.0, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.014925, |
| "grad_norm": 52.75, |
| "grad_norm_var": 479.15826822916665, |
| "learning_rate": 0.0001, |
| "loss": 10.9594, |
| "loss/crossentropy": 1.2910205125808716, |
| "loss/hidden": 0.90625, |
| "loss/logits": 0.07161296904087067, |
| "loss/reg": 8.690539360046387, |
| "loss/twn": 0.0, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.01495, |
| "grad_norm": 27.75, |
| "grad_norm_var": 474.18170572916665, |
| "learning_rate": 0.0001, |
| "loss": 11.7085, |
| "loss/crossentropy": 2.0403950214385986, |
| "loss/hidden": 0.875, |
| "loss/logits": 0.10269361734390259, |
| "loss/reg": 8.690367698669434, |
| "loss/twn": 0.0, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.014975, |
| "grad_norm": 25.5, |
| "grad_norm_var": 469.98118489583334, |
| "learning_rate": 0.0001, |
| "loss": 12.2016, |
| "loss/crossentropy": 2.477897882461548, |
| "loss/hidden": 0.8984375, |
| "loss/logits": 0.135102778673172, |
| "loss/reg": 8.690192222595215, |
| "loss/twn": 0.0, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.015, |
| "grad_norm": 22.875, |
| "grad_norm_var": 126.33932291666666, |
| "learning_rate": 0.0001, |
| "loss": 10.7187, |
| "loss/crossentropy": 0.8220002055168152, |
| "loss/hidden": 1.1328125, |
| "loss/logits": 0.07385924458503723, |
| "loss/reg": 8.690016746520996, |
| "loss/twn": 0.0, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.015025, |
| "grad_norm": 20.375, |
| "grad_norm_var": 132.39993489583333, |
| "learning_rate": 0.0001, |
| "loss": 12.4427, |
| "loss/crossentropy": 2.7641208171844482, |
| "loss/hidden": 0.859375, |
| "loss/logits": 0.12937086820602417, |
| "loss/reg": 8.689846992492676, |
| "loss/twn": 0.0, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.01505, |
| "grad_norm": 92.5, |
| "grad_norm_var": 362.3061848958333, |
| "learning_rate": 0.0001, |
| "loss": 11.4221, |
| "loss/crossentropy": 1.6327121257781982, |
| "loss/hidden": 1.0, |
| "loss/logits": 0.09972336143255234, |
| "loss/reg": 8.689663887023926, |
| "loss/twn": 0.0, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.015075, |
| "grad_norm": 33.75, |
| "grad_norm_var": 347.0483723958333, |
| "learning_rate": 0.0001, |
| "loss": 12.3704, |
| "loss/crossentropy": 2.5541772842407227, |
| "loss/hidden": 1.0078125, |
| "loss/logits": 0.1189047172665596, |
| "loss/reg": 8.68949031829834, |
| "loss/twn": 0.0, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.0151, |
| "grad_norm": 19.75, |
| "grad_norm_var": 362.5681640625, |
| "learning_rate": 0.0001, |
| "loss": 12.0657, |
| "loss/crossentropy": 2.4061169624328613, |
| "loss/hidden": 0.85546875, |
| "loss/logits": 0.11483782529830933, |
| "loss/reg": 8.689311027526855, |
| "loss/twn": 0.0, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.015125, |
| "grad_norm": 35.5, |
| "grad_norm_var": 357.3775390625, |
| "learning_rate": 0.0001, |
| "loss": 11.7108, |
| "loss/crossentropy": 1.8135943412780762, |
| "loss/hidden": 1.0859375, |
| "loss/logits": 0.12213584780693054, |
| "loss/reg": 8.689143180847168, |
| "loss/twn": 0.0, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.01515, |
| "grad_norm": 25.375, |
| "grad_norm_var": 356.7416015625, |
| "learning_rate": 0.0001, |
| "loss": 11.3882, |
| "loss/crossentropy": 1.5847375392913818, |
| "loss/hidden": 1.0078125, |
| "loss/logits": 0.10667512565851212, |
| "loss/reg": 8.688963890075684, |
| "loss/twn": 0.0, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.015175, |
| "grad_norm": 52.5, |
| "grad_norm_var": 364.50598958333336, |
| "learning_rate": 0.0001, |
| "loss": 12.3388, |
| "loss/crossentropy": 2.5712785720825195, |
| "loss/hidden": 0.94921875, |
| "loss/logits": 0.12947164475917816, |
| "loss/reg": 8.688798904418945, |
| "loss/twn": 0.0, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.0152, |
| "grad_norm": 94.0, |
| "grad_norm_var": 544.4809895833333, |
| "learning_rate": 0.0001, |
| "loss": 12.5071, |
| "loss/crossentropy": 2.6922049522399902, |
| "loss/hidden": 0.96875, |
| "loss/logits": 0.15748482942581177, |
| "loss/reg": 8.68863296508789, |
| "loss/twn": 0.0, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.015225, |
| "grad_norm": 26.5, |
| "grad_norm_var": 546.4041666666667, |
| "learning_rate": 0.0001, |
| "loss": 11.8091, |
| "loss/crossentropy": 1.8931523561477661, |
| "loss/hidden": 1.09375, |
| "loss/logits": 0.13371434807777405, |
| "loss/reg": 8.688455581665039, |
| "loss/twn": 0.0, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.01525, |
| "grad_norm": 25.625, |
| "grad_norm_var": 549.4051432291667, |
| "learning_rate": 0.0001, |
| "loss": 10.4281, |
| "loss/crossentropy": 0.5141164660453796, |
| "loss/hidden": 1.15625, |
| "loss/logits": 0.06944984942674637, |
| "loss/reg": 8.68829345703125, |
| "loss/twn": 0.0, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.015275, |
| "grad_norm": 27.125, |
| "grad_norm_var": 554.6893229166667, |
| "learning_rate": 0.0001, |
| "loss": 12.2023, |
| "loss/crossentropy": 2.4272189140319824, |
| "loss/hidden": 0.953125, |
| "loss/logits": 0.13384541869163513, |
| "loss/reg": 8.688126564025879, |
| "loss/twn": 0.0, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.0153, |
| "grad_norm": 22.75, |
| "grad_norm_var": 565.8035807291667, |
| "learning_rate": 0.0001, |
| "loss": 11.7214, |
| "loss/crossentropy": 1.9512163400650024, |
| "loss/hidden": 0.98046875, |
| "loss/logits": 0.10175444185733795, |
| "loss/reg": 8.68795394897461, |
| "loss/twn": 0.0, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.015325, |
| "grad_norm": 21.0, |
| "grad_norm_var": 565.4728515625, |
| "learning_rate": 0.0001, |
| "loss": 11.341, |
| "loss/crossentropy": 1.5439884662628174, |
| "loss/hidden": 0.99609375, |
| "loss/logits": 0.1131681352853775, |
| "loss/reg": 8.687799453735352, |
| "loss/twn": 0.0, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.01535, |
| "grad_norm": 53.75, |
| "grad_norm_var": 579.7999348958333, |
| "learning_rate": 0.0001, |
| "loss": 12.0181, |
| "loss/crossentropy": 1.9833656549453735, |
| "loss/hidden": 1.1796875, |
| "loss/logits": 0.16742342710494995, |
| "loss/reg": 8.687641143798828, |
| "loss/twn": 0.0, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.015375, |
| "grad_norm": 25.75, |
| "grad_norm_var": 579.4061848958333, |
| "learning_rate": 0.0001, |
| "loss": 11.7056, |
| "loss/crossentropy": 1.8551675081253052, |
| "loss/hidden": 1.0546875, |
| "loss/logits": 0.10823419690132141, |
| "loss/reg": 8.687470436096191, |
| "loss/twn": 0.0, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.0154, |
| "grad_norm": 35.0, |
| "grad_norm_var": 565.0393229166667, |
| "learning_rate": 0.0001, |
| "loss": 11.8872, |
| "loss/crossentropy": 2.151254892349243, |
| "loss/hidden": 0.8828125, |
| "loss/logits": 0.16588298976421356, |
| "loss/reg": 8.687288284301758, |
| "loss/twn": 0.0, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.015425, |
| "grad_norm": 86.5, |
| "grad_norm_var": 681.1369140625, |
| "learning_rate": 0.0001, |
| "loss": 12.1077, |
| "loss/crossentropy": 2.4658360481262207, |
| "loss/hidden": 0.8203125, |
| "loss/logits": 0.13440850377082825, |
| "loss/reg": 8.687117576599121, |
| "loss/twn": 0.0, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.01545, |
| "grad_norm": 23.625, |
| "grad_norm_var": 516.9489583333333, |
| "learning_rate": 0.0001, |
| "loss": 12.1248, |
| "loss/crossentropy": 2.5159623622894287, |
| "loss/hidden": 0.80078125, |
| "loss/logits": 0.1211075410246849, |
| "loss/reg": 8.686941146850586, |
| "loss/twn": 0.0, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.015475, |
| "grad_norm": 26.125, |
| "grad_norm_var": 524.9353515625, |
| "learning_rate": 0.0001, |
| "loss": 12.086, |
| "loss/crossentropy": 2.433162212371826, |
| "loss/hidden": 0.84375, |
| "loss/logits": 0.12230488657951355, |
| "loss/reg": 8.686768531799316, |
| "loss/twn": 0.0, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.0155, |
| "grad_norm": 21.375, |
| "grad_norm_var": 521.2427083333333, |
| "learning_rate": 0.0001, |
| "loss": 12.1392, |
| "loss/crossentropy": 2.487259864807129, |
| "loss/hidden": 0.84765625, |
| "loss/logits": 0.1176735907793045, |
| "loss/reg": 8.686601638793945, |
| "loss/twn": 0.0, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.015525, |
| "grad_norm": 32.25, |
| "grad_norm_var": 522.8372395833334, |
| "learning_rate": 0.0001, |
| "loss": 12.265, |
| "loss/crossentropy": 2.570328950881958, |
| "loss/hidden": 0.87890625, |
| "loss/logits": 0.12933871150016785, |
| "loss/reg": 8.686436653137207, |
| "loss/twn": 0.0, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.01555, |
| "grad_norm": 26.875, |
| "grad_norm_var": 520.5622395833333, |
| "learning_rate": 0.0001, |
| "loss": 11.136, |
| "loss/crossentropy": 1.340998649597168, |
| "loss/hidden": 1.015625, |
| "loss/logits": 0.09306982159614563, |
| "loss/reg": 8.686264038085938, |
| "loss/twn": 0.0, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.015575, |
| "grad_norm": 17.625, |
| "grad_norm_var": 527.0468098958333, |
| "learning_rate": 0.0001, |
| "loss": 12.1424, |
| "loss/crossentropy": 2.5924296379089355, |
| "loss/hidden": 0.75, |
| "loss/logits": 0.11391822248697281, |
| "loss/reg": 8.686078071594238, |
| "loss/twn": 0.0, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.0156, |
| "grad_norm": 25.0, |
| "grad_norm_var": 285.18743489583335, |
| "learning_rate": 0.0001, |
| "loss": 12.2211, |
| "loss/crossentropy": 2.437443733215332, |
| "loss/hidden": 0.95703125, |
| "loss/logits": 0.14070221781730652, |
| "loss/reg": 8.685905456542969, |
| "loss/twn": 0.0, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.015625, |
| "grad_norm": 36.5, |
| "grad_norm_var": 285.36451822916666, |
| "learning_rate": 0.0001, |
| "loss": 12.1984, |
| "loss/crossentropy": 2.3812856674194336, |
| "loss/hidden": 0.97265625, |
| "loss/logits": 0.1587418168783188, |
| "loss/reg": 8.685735702514648, |
| "loss/twn": 0.0, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.01565, |
| "grad_norm": 104.0, |
| "grad_norm_var": 606.0080729166667, |
| "learning_rate": 0.0001, |
| "loss": 10.4901, |
| "loss/crossentropy": 0.44736507534980774, |
| "loss/hidden": 1.2578125, |
| "loss/logits": 0.09935866296291351, |
| "loss/reg": 8.685565948486328, |
| "loss/twn": 0.0, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.015675, |
| "grad_norm": 48.75, |
| "grad_norm_var": 607.9791015625, |
| "learning_rate": 0.0001, |
| "loss": 11.5087, |
| "loss/crossentropy": 1.4010777473449707, |
| "loss/hidden": 1.34375, |
| "loss/logits": 0.07844536006450653, |
| "loss/reg": 8.685386657714844, |
| "loss/twn": 0.0, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.0157, |
| "grad_norm": 34.5, |
| "grad_norm_var": 592.8264973958334, |
| "learning_rate": 0.0001, |
| "loss": 10.3466, |
| "loss/crossentropy": 0.49556177854537964, |
| "loss/hidden": 1.0859375, |
| "loss/logits": 0.07986225187778473, |
| "loss/reg": 8.685196876525879, |
| "loss/twn": 0.0, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.015725, |
| "grad_norm": 77.5, |
| "grad_norm_var": 659.2728515625, |
| "learning_rate": 0.0001, |
| "loss": 11.9556, |
| "loss/crossentropy": 1.9556148052215576, |
| "loss/hidden": 1.2109375, |
| "loss/logits": 0.10404779762029648, |
| "loss/reg": 8.685018539428711, |
| "loss/twn": 0.0, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.01575, |
| "grad_norm": 96.5, |
| "grad_norm_var": 839.3572265625, |
| "learning_rate": 0.0001, |
| "loss": 12.0146, |
| "loss/crossentropy": 2.254323720932007, |
| "loss/hidden": 0.92578125, |
| "loss/logits": 0.14961354434490204, |
| "loss/reg": 8.684839248657227, |
| "loss/twn": 0.0, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.015775, |
| "grad_norm": 26.625, |
| "grad_norm_var": 837.1747395833333, |
| "learning_rate": 0.0001, |
| "loss": 12.3064, |
| "loss/crossentropy": 2.467432737350464, |
| "loss/hidden": 1.03125, |
| "loss/logits": 0.12301573157310486, |
| "loss/reg": 8.684676170349121, |
| "loss/twn": 0.0, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.0158, |
| "grad_norm": 24.75, |
| "grad_norm_var": 857.3010416666667, |
| "learning_rate": 0.0001, |
| "loss": 12.1765, |
| "loss/crossentropy": 2.555069923400879, |
| "loss/hidden": 0.80859375, |
| "loss/logits": 0.1283564567565918, |
| "loss/reg": 8.684508323669434, |
| "loss/twn": 0.0, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.015825, |
| "grad_norm": 27.375, |
| "grad_norm_var": 742.9619140625, |
| "learning_rate": 0.0001, |
| "loss": 11.6926, |
| "loss/crossentropy": 1.8473200798034668, |
| "loss/hidden": 1.0546875, |
| "loss/logits": 0.1062941700220108, |
| "loss/reg": 8.684345245361328, |
| "loss/twn": 0.0, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.01585, |
| "grad_norm": 20.125, |
| "grad_norm_var": 751.6426432291667, |
| "learning_rate": 0.0001, |
| "loss": 11.667, |
| "loss/crossentropy": 2.044696807861328, |
| "loss/hidden": 0.80859375, |
| "loss/logits": 0.1294926106929779, |
| "loss/reg": 8.684167861938477, |
| "loss/twn": 0.0, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.015875, |
| "grad_norm": 24.375, |
| "grad_norm_var": 755.1572265625, |
| "learning_rate": 0.0001, |
| "loss": 12.4344, |
| "loss/crossentropy": 2.7302753925323486, |
| "loss/hidden": 0.83984375, |
| "loss/logits": 0.18028318881988525, |
| "loss/reg": 8.683996200561523, |
| "loss/twn": 0.0, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.0159, |
| "grad_norm": 83.5, |
| "grad_norm_var": 839.9643229166667, |
| "learning_rate": 0.0001, |
| "loss": 11.9397, |
| "loss/crossentropy": 2.2648117542266846, |
| "loss/hidden": 0.86328125, |
| "loss/logits": 0.12779515981674194, |
| "loss/reg": 8.683818817138672, |
| "loss/twn": 0.0, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.015925, |
| "grad_norm": 31.75, |
| "grad_norm_var": 840.77265625, |
| "learning_rate": 0.0001, |
| "loss": 12.2932, |
| "loss/crossentropy": 2.4068849086761475, |
| "loss/hidden": 0.99609375, |
| "loss/logits": 0.20660996437072754, |
| "loss/reg": 8.683658599853516, |
| "loss/twn": 0.0, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.01595, |
| "grad_norm": 118.0, |
| "grad_norm_var": 1150.3603515625, |
| "learning_rate": 0.0001, |
| "loss": 10.269, |
| "loss/crossentropy": 0.13195960223674774, |
| "loss/hidden": 1.4140625, |
| "loss/logits": 0.03953956440091133, |
| "loss/reg": 8.683473587036133, |
| "loss/twn": 0.0, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.015975, |
| "grad_norm": 48.75, |
| "grad_norm_var": 1077.3625, |
| "learning_rate": 0.0001, |
| "loss": 12.25, |
| "loss/crossentropy": 2.435452938079834, |
| "loss/hidden": 1.0, |
| "loss/logits": 0.13128745555877686, |
| "loss/reg": 8.683296203613281, |
| "loss/twn": 0.0, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.016, |
| "grad_norm": 84.5, |
| "grad_norm_var": 1086.4114583333333, |
| "learning_rate": 0.0001, |
| "loss": 11.4253, |
| "loss/crossentropy": 1.6488887071609497, |
| "loss/hidden": 0.99609375, |
| "loss/logits": 0.09724262356758118, |
| "loss/reg": 8.683124542236328, |
| "loss/twn": 0.0, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.016025, |
| "grad_norm": 29.25, |
| "grad_norm_var": 1108.0330729166667, |
| "learning_rate": 0.0001, |
| "loss": 11.2497, |
| "loss/crossentropy": 1.423577070236206, |
| "loss/hidden": 1.015625, |
| "loss/logits": 0.12754982709884644, |
| "loss/reg": 8.682952880859375, |
| "loss/twn": 0.0, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.01605, |
| "grad_norm": 22.0, |
| "grad_norm_var": 992.7205729166667, |
| "learning_rate": 0.0001, |
| "loss": 12.4124, |
| "loss/crossentropy": 2.764256715774536, |
| "loss/hidden": 0.84765625, |
| "loss/logits": 0.11775293201208115, |
| "loss/reg": 8.682784080505371, |
| "loss/twn": 0.0, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.016075, |
| "grad_norm": 100.5, |
| "grad_norm_var": 1152.2291666666667, |
| "learning_rate": 0.0001, |
| "loss": 11.5863, |
| "loss/crossentropy": 1.47548246383667, |
| "loss/hidden": 1.328125, |
| "loss/logits": 0.10010358691215515, |
| "loss/reg": 8.682621002197266, |
| "loss/twn": 0.0, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.0161, |
| "grad_norm": 26.75, |
| "grad_norm_var": 1175.22890625, |
| "learning_rate": 0.0001, |
| "loss": 12.3013, |
| "loss/crossentropy": 2.6313583850860596, |
| "loss/hidden": 0.87890625, |
| "loss/logits": 0.10862234234809875, |
| "loss/reg": 8.682451248168945, |
| "loss/twn": 0.0, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.016125, |
| "grad_norm": 196.0, |
| "grad_norm_var": 2445.64765625, |
| "learning_rate": 0.0001, |
| "loss": 12.1933, |
| "loss/crossentropy": 2.51356840133667, |
| "loss/hidden": 0.859375, |
| "loss/logits": 0.13809403777122498, |
| "loss/reg": 8.68230152130127, |
| "loss/twn": 0.0, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.01615, |
| "grad_norm": 31.625, |
| "grad_norm_var": 2393.3759765625, |
| "learning_rate": 0.0001, |
| "loss": 11.2387, |
| "loss/crossentropy": 1.3238886594772339, |
| "loss/hidden": 1.140625, |
| "loss/logits": 0.09201399981975555, |
| "loss/reg": 8.682140350341797, |
| "loss/twn": 0.0, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.016175, |
| "grad_norm": 51.0, |
| "grad_norm_var": 2335.06640625, |
| "learning_rate": 0.0001, |
| "loss": 12.1563, |
| "loss/crossentropy": 2.4814062118530273, |
| "loss/hidden": 0.859375, |
| "loss/logits": 0.13353964686393738, |
| "loss/reg": 8.681985855102539, |
| "loss/twn": 0.0, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.0162, |
| "grad_norm": 30.375, |
| "grad_norm_var": 2312.4697265625, |
| "learning_rate": 0.0001, |
| "loss": 10.4744, |
| "loss/crossentropy": 0.505270779132843, |
| "loss/hidden": 1.1953125, |
| "loss/logits": 0.09204334020614624, |
| "loss/reg": 8.681818008422852, |
| "loss/twn": 0.0, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.016225, |
| "grad_norm": 37.75, |
| "grad_norm_var": 2277.01640625, |
| "learning_rate": 0.0001, |
| "loss": 11.549, |
| "loss/crossentropy": 1.600907564163208, |
| "loss/hidden": 1.15625, |
| "loss/logits": 0.11019119620323181, |
| "loss/reg": 8.681657791137695, |
| "loss/twn": 0.0, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.01625, |
| "grad_norm": 28.375, |
| "grad_norm_var": 2239.040625, |
| "learning_rate": 0.0001, |
| "loss": 11.8187, |
| "loss/crossentropy": 1.919603705406189, |
| "loss/hidden": 1.0546875, |
| "loss/logits": 0.16285988688468933, |
| "loss/reg": 8.681504249572754, |
| "loss/twn": 0.0, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.016275, |
| "grad_norm": 23.125, |
| "grad_norm_var": 2244.9143229166666, |
| "learning_rate": 0.0001, |
| "loss": 11.693, |
| "loss/crossentropy": 1.965175747871399, |
| "loss/hidden": 0.96484375, |
| "loss/logits": 0.08164883404970169, |
| "loss/reg": 8.681354522705078, |
| "loss/twn": 0.0, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.0163, |
| "grad_norm": 35.75, |
| "grad_norm_var": 2231.1364583333334, |
| "learning_rate": 0.0001, |
| "loss": 12.1487, |
| "loss/crossentropy": 2.4559102058410645, |
| "loss/hidden": 0.88671875, |
| "loss/logits": 0.1248602345585823, |
| "loss/reg": 8.681204795837402, |
| "loss/twn": 0.0, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.016325, |
| "grad_norm": 27.5, |
| "grad_norm_var": 2245.989322916667, |
| "learning_rate": 0.0001, |
| "loss": 12.2035, |
| "loss/crossentropy": 2.3635737895965576, |
| "loss/hidden": 1.0546875, |
| "loss/logits": 0.10413461178541183, |
| "loss/reg": 8.68105411529541, |
| "loss/twn": 0.0, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.01635, |
| "grad_norm": 42.0, |
| "grad_norm_var": 1975.7143229166666, |
| "learning_rate": 0.0001, |
| "loss": 11.6703, |
| "loss/crossentropy": 1.8136188983917236, |
| "loss/hidden": 1.09375, |
| "loss/logits": 0.0820704996585846, |
| "loss/reg": 8.680907249450684, |
| "loss/twn": 0.0, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.016375, |
| "grad_norm": 27.875, |
| "grad_norm_var": 2009.0817057291667, |
| "learning_rate": 0.0001, |
| "loss": 11.1452, |
| "loss/crossentropy": 1.3319860696792603, |
| "loss/hidden": 1.0390625, |
| "loss/logits": 0.09343774616718292, |
| "loss/reg": 8.680745124816895, |
| "loss/twn": 0.0, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.0164, |
| "grad_norm": 29.375, |
| "grad_norm_var": 1942.8455729166667, |
| "learning_rate": 0.0001, |
| "loss": 10.5685, |
| "loss/crossentropy": 0.6607128381729126, |
| "loss/hidden": 1.1484375, |
| "loss/logits": 0.07878898084163666, |
| "loss/reg": 8.68058967590332, |
| "loss/twn": 0.0, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.016425, |
| "grad_norm": 29.125, |
| "grad_norm_var": 1943.1291015625, |
| "learning_rate": 0.0001, |
| "loss": 12.4042, |
| "loss/crossentropy": 2.55853271484375, |
| "loss/hidden": 0.9921875, |
| "loss/logits": 0.17303398251533508, |
| "loss/reg": 8.680437088012695, |
| "loss/twn": 0.0, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.01645, |
| "grad_norm": 34.25, |
| "grad_norm_var": 1912.9889973958334, |
| "learning_rate": 0.0001, |
| "loss": 10.8389, |
| "loss/crossentropy": 0.778729259967804, |
| "loss/hidden": 1.2890625, |
| "loss/logits": 0.09083634614944458, |
| "loss/reg": 8.680303573608398, |
| "loss/twn": 0.0, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.016475, |
| "grad_norm": 21.5, |
| "grad_norm_var": 1739.1067057291666, |
| "learning_rate": 0.0001, |
| "loss": 11.2777, |
| "loss/crossentropy": 1.5125099420547485, |
| "loss/hidden": 0.9921875, |
| "loss/logits": 0.09283959120512009, |
| "loss/reg": 8.680154800415039, |
| "loss/twn": 0.0, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.0165, |
| "grad_norm": 18.0, |
| "grad_norm_var": 1761.7108723958333, |
| "learning_rate": 0.0001, |
| "loss": 12.1729, |
| "loss/crossentropy": 2.6743276119232178, |
| "loss/hidden": 0.7109375, |
| "loss/logits": 0.10764491558074951, |
| "loss/reg": 8.680008888244629, |
| "loss/twn": 0.0, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.016525, |
| "grad_norm": 149.0, |
| "grad_norm_var": 931.4264973958333, |
| "learning_rate": 0.0001, |
| "loss": 11.796, |
| "loss/crossentropy": 1.4310739040374756, |
| "loss/hidden": 1.6015625, |
| "loss/logits": 0.08350003510713577, |
| "loss/reg": 8.679859161376953, |
| "loss/twn": 0.0, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.01655, |
| "grad_norm": 23.0, |
| "grad_norm_var": 944.0270833333333, |
| "learning_rate": 0.0001, |
| "loss": 11.2672, |
| "loss/crossentropy": 1.3722707033157349, |
| "loss/hidden": 1.09375, |
| "loss/logits": 0.12146957218647003, |
| "loss/reg": 8.679713249206543, |
| "loss/twn": 0.0, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.016575, |
| "grad_norm": 33.75, |
| "grad_norm_var": 932.7247395833333, |
| "learning_rate": 0.0001, |
| "loss": 11.1604, |
| "loss/crossentropy": 1.3851617574691772, |
| "loss/hidden": 1.0078125, |
| "loss/logits": 0.08784488588571548, |
| "loss/reg": 8.67956256866455, |
| "loss/twn": 0.0, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.0166, |
| "grad_norm": 21.0, |
| "grad_norm_var": 946.4014973958333, |
| "learning_rate": 0.0001, |
| "loss": 12.1211, |
| "loss/crossentropy": 2.5266802310943604, |
| "loss/hidden": 0.78515625, |
| "loss/logits": 0.12983056902885437, |
| "loss/reg": 8.679410934448242, |
| "loss/twn": 0.0, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.016625, |
| "grad_norm": 69.5, |
| "grad_norm_var": 1015.3916015625, |
| "learning_rate": 0.0001, |
| "loss": 12.1216, |
| "loss/crossentropy": 2.4692494869232178, |
| "loss/hidden": 0.8515625, |
| "loss/logits": 0.12155141681432724, |
| "loss/reg": 8.679244041442871, |
| "loss/twn": 0.0, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.01665, |
| "grad_norm": 32.75, |
| "grad_norm_var": 1010.7864583333334, |
| "learning_rate": 0.0001, |
| "loss": 12.6161, |
| "loss/crossentropy": 2.6228532791137695, |
| "loss/hidden": 1.15625, |
| "loss/logits": 0.15789055824279785, |
| "loss/reg": 8.679091453552246, |
| "loss/twn": 0.0, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.016675, |
| "grad_norm": 29.875, |
| "grad_norm_var": 999.7122395833334, |
| "learning_rate": 0.0001, |
| "loss": 10.5445, |
| "loss/crossentropy": 0.739525318145752, |
| "loss/hidden": 1.0546875, |
| "loss/logits": 0.07139641791582108, |
| "loss/reg": 8.678937911987305, |
| "loss/twn": 0.0, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.0167, |
| "grad_norm": 29.0, |
| "grad_norm_var": 1005.4989583333333, |
| "learning_rate": 0.0001, |
| "loss": 12.2021, |
| "loss/crossentropy": 2.478259325027466, |
| "loss/hidden": 0.9296875, |
| "loss/logits": 0.11538438498973846, |
| "loss/reg": 8.678790092468262, |
| "loss/twn": 0.0, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.016725, |
| "grad_norm": 26.125, |
| "grad_norm_var": 1007.6509765625, |
| "learning_rate": 0.0001, |
| "loss": 11.5081, |
| "loss/crossentropy": 1.804980754852295, |
| "loss/hidden": 0.9140625, |
| "loss/logits": 0.11036747694015503, |
| "loss/reg": 8.678645133972168, |
| "loss/twn": 0.0, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.01675, |
| "grad_norm": 24.125, |
| "grad_norm_var": 1019.29765625, |
| "learning_rate": 0.0001, |
| "loss": 11.1034, |
| "loss/crossentropy": 1.324500322341919, |
| "loss/hidden": 0.98828125, |
| "loss/logits": 0.11215756833553314, |
| "loss/reg": 8.678498268127441, |
| "loss/twn": 0.0, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.016775, |
| "grad_norm": 24.875, |
| "grad_norm_var": 1023.66640625, |
| "learning_rate": 0.0001, |
| "loss": 12.2794, |
| "loss/crossentropy": 2.6409506797790527, |
| "loss/hidden": 0.83203125, |
| "loss/logits": 0.1281108558177948, |
| "loss/reg": 8.678349494934082, |
| "loss/twn": 0.0, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.0168, |
| "grad_norm": 22.5, |
| "grad_norm_var": 1033.7962890625, |
| "learning_rate": 0.0001, |
| "loss": 12.2544, |
| "loss/crossentropy": 2.643657684326172, |
| "loss/hidden": 0.8125, |
| "loss/logits": 0.12006359547376633, |
| "loss/reg": 8.678205490112305, |
| "loss/twn": 0.0, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.016825, |
| "grad_norm": 22.75, |
| "grad_norm_var": 1042.8375, |
| "learning_rate": 0.0001, |
| "loss": 12.0413, |
| "loss/crossentropy": 2.387331485748291, |
| "loss/hidden": 0.8359375, |
| "loss/logits": 0.1400132179260254, |
| "loss/reg": 8.678048133850098, |
| "loss/twn": 0.0, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.01685, |
| "grad_norm": 23.25, |
| "grad_norm_var": 1053.5166666666667, |
| "learning_rate": 0.0001, |
| "loss": 11.0678, |
| "loss/crossentropy": 1.2755028009414673, |
| "loss/hidden": 1.0390625, |
| "loss/logits": 0.07538095116615295, |
| "loss/reg": 8.677888870239258, |
| "loss/twn": 0.0, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.016875, |
| "grad_norm": 30.125, |
| "grad_norm_var": 1041.8504557291667, |
| "learning_rate": 0.0001, |
| "loss": 12.1535, |
| "loss/crossentropy": 2.4862682819366455, |
| "loss/hidden": 0.84375, |
| "loss/logits": 0.1457032561302185, |
| "loss/reg": 8.677735328674316, |
| "loss/twn": 0.0, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.0169, |
| "grad_norm": 24.625, |
| "grad_norm_var": 1028.4934895833333, |
| "learning_rate": 0.0001, |
| "loss": 12.5446, |
| "loss/crossentropy": 2.6862292289733887, |
| "loss/hidden": 0.98046875, |
| "loss/logits": 0.20027393102645874, |
| "loss/reg": 8.677580833435059, |
| "loss/twn": 0.0, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.016925, |
| "grad_norm": 29.0, |
| "grad_norm_var": 130.74348958333334, |
| "learning_rate": 0.0001, |
| "loss": 12.0161, |
| "loss/crossentropy": 2.3683135509490967, |
| "loss/hidden": 0.84375, |
| "loss/logits": 0.12665444612503052, |
| "loss/reg": 8.677411079406738, |
| "loss/twn": 0.0, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.01695, |
| "grad_norm": 163.0, |
| "grad_norm_var": 1241.1184895833333, |
| "learning_rate": 0.0001, |
| "loss": 12.0525, |
| "loss/crossentropy": 2.3668503761291504, |
| "loss/hidden": 0.8828125, |
| "loss/logits": 0.1255994290113449, |
| "loss/reg": 8.677251815795898, |
| "loss/twn": 0.0, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.016975, |
| "grad_norm": 38.0, |
| "grad_norm_var": 1239.9010416666667, |
| "learning_rate": 0.0001, |
| "loss": 11.0399, |
| "loss/crossentropy": 1.007588267326355, |
| "loss/hidden": 1.2578125, |
| "loss/logits": 0.0973692387342453, |
| "loss/reg": 8.677084922790527, |
| "loss/twn": 0.0, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.017, |
| "grad_norm": 21.875, |
| "grad_norm_var": 1237.9473307291667, |
| "learning_rate": 0.0001, |
| "loss": 12.0996, |
| "loss/crossentropy": 2.4121975898742676, |
| "loss/hidden": 0.90234375, |
| "loss/logits": 0.10816814005374908, |
| "loss/reg": 8.676929473876953, |
| "loss/twn": 0.0, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.017025, |
| "grad_norm": 40.0, |
| "grad_norm_var": 1169.2676432291667, |
| "learning_rate": 0.0001, |
| "loss": 11.3948, |
| "loss/crossentropy": 1.5147836208343506, |
| "loss/hidden": 1.1328125, |
| "loss/logits": 0.07047547399997711, |
| "loss/reg": 8.676769256591797, |
| "loss/twn": 0.0, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.01705, |
| "grad_norm": 28.375, |
| "grad_norm_var": 1172.5739583333334, |
| "learning_rate": 0.0001, |
| "loss": 12.1792, |
| "loss/crossentropy": 2.4957895278930664, |
| "loss/hidden": 0.86328125, |
| "loss/logits": 0.14354616403579712, |
| "loss/reg": 8.676616668701172, |
| "loss/twn": 0.0, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.017075, |
| "grad_norm": 21.5, |
| "grad_norm_var": 1183.9020182291667, |
| "learning_rate": 0.0001, |
| "loss": 10.9521, |
| "loss/crossentropy": 1.109391212463379, |
| "loss/hidden": 1.0546875, |
| "loss/logits": 0.1115969568490982, |
| "loss/reg": 8.6764554977417, |
| "loss/twn": 0.0, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.0171, |
| "grad_norm": 122.0, |
| "grad_norm_var": 1642.9926432291666, |
| "learning_rate": 0.0001, |
| "loss": 11.9535, |
| "loss/crossentropy": 1.5267117023468018, |
| "loss/hidden": 1.6484375, |
| "loss/logits": 0.10209629684686661, |
| "loss/reg": 8.676294326782227, |
| "loss/twn": 0.0, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.017125, |
| "grad_norm": 21.75, |
| "grad_norm_var": 1653.0893229166666, |
| "learning_rate": 0.0001, |
| "loss": 10.9633, |
| "loss/crossentropy": 1.0852842330932617, |
| "loss/hidden": 1.1015625, |
| "loss/logits": 0.10026420652866364, |
| "loss/reg": 8.676140785217285, |
| "loss/twn": 0.0, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.01715, |
| "grad_norm": 36.25, |
| "grad_norm_var": 1634.8197265625, |
| "learning_rate": 0.0001, |
| "loss": 11.5949, |
| "loss/crossentropy": 1.8544628620147705, |
| "loss/hidden": 0.9453125, |
| "loss/logits": 0.11912861466407776, |
| "loss/reg": 8.675971031188965, |
| "loss/twn": 0.0, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.017175, |
| "grad_norm": 25.5, |
| "grad_norm_var": 1633.428125, |
| "learning_rate": 0.0001, |
| "loss": 10.6788, |
| "loss/crossentropy": 0.7781402468681335, |
| "loss/hidden": 1.140625, |
| "loss/logits": 0.08424162864685059, |
| "loss/reg": 8.675811767578125, |
| "loss/twn": 0.0, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.0172, |
| "grad_norm": 32.5, |
| "grad_norm_var": 1613.803125, |
| "learning_rate": 0.0001, |
| "loss": 12.2131, |
| "loss/crossentropy": 2.4414634704589844, |
| "loss/hidden": 0.9453125, |
| "loss/logits": 0.15070614218711853, |
| "loss/reg": 8.675655364990234, |
| "loss/twn": 0.0, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.017225, |
| "grad_norm": 30.5, |
| "grad_norm_var": 1597.11640625, |
| "learning_rate": 0.0001, |
| "loss": 12.3484, |
| "loss/crossentropy": 2.6249377727508545, |
| "loss/hidden": 0.9296875, |
| "loss/logits": 0.11827968060970306, |
| "loss/reg": 8.675497055053711, |
| "loss/twn": 0.0, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.01725, |
| "grad_norm": 33.75, |
| "grad_norm_var": 1576.33515625, |
| "learning_rate": 0.0001, |
| "loss": 11.4304, |
| "loss/crossentropy": 1.702589750289917, |
| "loss/hidden": 0.9609375, |
| "loss/logits": 0.09154261648654938, |
| "loss/reg": 8.67534065246582, |
| "loss/twn": 0.0, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.017275, |
| "grad_norm": 270.0, |
| "grad_norm_var": 4739.3119140625, |
| "learning_rate": 0.0001, |
| "loss": 11.5967, |
| "loss/crossentropy": 1.7601555585861206, |
| "loss/hidden": 1.0546875, |
| "loss/logits": 0.10664980113506317, |
| "loss/reg": 8.675187110900879, |
| "loss/twn": 0.0, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.0173, |
| "grad_norm": 38.0, |
| "grad_norm_var": 4689.789583333333, |
| "learning_rate": 0.0001, |
| "loss": 11.1409, |
| "loss/crossentropy": 1.2822977304458618, |
| "loss/hidden": 1.078125, |
| "loss/logits": 0.10546335577964783, |
| "loss/reg": 8.675052642822266, |
| "loss/twn": 0.0, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.017325, |
| "grad_norm": 34.75, |
| "grad_norm_var": 4668.47265625, |
| "learning_rate": 0.0001, |
| "loss": 11.0565, |
| "loss/crossentropy": 1.2594798803329468, |
| "loss/hidden": 1.0390625, |
| "loss/logits": 0.08306904137134552, |
| "loss/reg": 8.674908638000488, |
| "loss/twn": 0.0, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.01735, |
| "grad_norm": 34.5, |
| "grad_norm_var": 3933.3455729166667, |
| "learning_rate": 0.0001, |
| "loss": 11.9431, |
| "loss/crossentropy": 2.340895652770996, |
| "loss/hidden": 0.80859375, |
| "loss/logits": 0.11889052391052246, |
| "loss/reg": 8.674761772155762, |
| "loss/twn": 0.0, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.017375, |
| "grad_norm": 26.25, |
| "grad_norm_var": 3963.6385416666667, |
| "learning_rate": 0.0001, |
| "loss": 11.6184, |
| "loss/crossentropy": 1.9524754285812378, |
| "loss/hidden": 0.8828125, |
| "loss/logits": 0.10850561410188675, |
| "loss/reg": 8.674614906311035, |
| "loss/twn": 0.0, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.0174, |
| "grad_norm": 26.75, |
| "grad_norm_var": 3946.1317057291667, |
| "learning_rate": 0.0001, |
| "loss": 11.3042, |
| "loss/crossentropy": 1.6829349994659424, |
| "loss/hidden": 0.85546875, |
| "loss/logits": 0.09133752435445786, |
| "loss/reg": 8.674471855163574, |
| "loss/twn": 0.0, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.017425, |
| "grad_norm": 22.0, |
| "grad_norm_var": 3993.7379557291665, |
| "learning_rate": 0.0001, |
| "loss": 12.2908, |
| "loss/crossentropy": 2.5423495769500732, |
| "loss/hidden": 0.9375, |
| "loss/logits": 0.13664641976356506, |
| "loss/reg": 8.674322128295898, |
| "loss/twn": 0.0, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.01745, |
| "grad_norm": 28.375, |
| "grad_norm_var": 3993.7379557291665, |
| "learning_rate": 0.0001, |
| "loss": 11.376, |
| "loss/crossentropy": 1.6134560108184814, |
| "loss/hidden": 1.0, |
| "loss/logits": 0.08837807923555374, |
| "loss/reg": 8.674178123474121, |
| "loss/twn": 0.0, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.017475, |
| "grad_norm": 52.5, |
| "grad_norm_var": 3934.8702473958333, |
| "learning_rate": 0.0001, |
| "loss": 12.3206, |
| "loss/crossentropy": 2.4500646591186523, |
| "loss/hidden": 1.0625, |
| "loss/logits": 0.13401490449905396, |
| "loss/reg": 8.674036026000977, |
| "loss/twn": 0.0, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.0175, |
| "grad_norm": 27.5, |
| "grad_norm_var": 3613.6686848958334, |
| "learning_rate": 0.0001, |
| "loss": 11.3885, |
| "loss/crossentropy": 1.6851568222045898, |
| "loss/hidden": 0.91796875, |
| "loss/logits": 0.11151707172393799, |
| "loss/reg": 8.6738862991333, |
| "loss/twn": 0.0, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.017525, |
| "grad_norm": 43.0, |
| "grad_norm_var": 3572.3197265625, |
| "learning_rate": 0.0001, |
| "loss": 12.3313, |
| "loss/crossentropy": 2.652559280395508, |
| "loss/hidden": 0.875, |
| "loss/logits": 0.13005965948104858, |
| "loss/reg": 8.673727989196777, |
| "loss/twn": 0.0, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.01755, |
| "grad_norm": 25.0, |
| "grad_norm_var": 3597.3041015625, |
| "learning_rate": 0.0001, |
| "loss": 12.1081, |
| "loss/crossentropy": 2.3341362476348877, |
| "loss/hidden": 0.96484375, |
| "loss/logits": 0.13550812005996704, |
| "loss/reg": 8.67358112335205, |
| "loss/twn": 0.0, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.017575, |
| "grad_norm": 103.0, |
| "grad_norm_var": 3751.2546223958334, |
| "learning_rate": 0.0001, |
| "loss": 12.1784, |
| "loss/crossentropy": 2.1381049156188965, |
| "loss/hidden": 1.2421875, |
| "loss/logits": 0.12464547902345657, |
| "loss/reg": 8.673436164855957, |
| "loss/twn": 0.0, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.0176, |
| "grad_norm": 27.875, |
| "grad_norm_var": 3764.4768229166666, |
| "learning_rate": 0.0001, |
| "loss": 11.7685, |
| "loss/crossentropy": 2.2641758918762207, |
| "loss/hidden": 0.7109375, |
| "loss/logits": 0.12013732641935349, |
| "loss/reg": 8.673284530639648, |
| "loss/twn": 0.0, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.017625, |
| "grad_norm": 33.5, |
| "grad_norm_var": 3756.645572916667, |
| "learning_rate": 0.0001, |
| "loss": 11.7974, |
| "loss/crossentropy": 2.1641063690185547, |
| "loss/hidden": 0.83984375, |
| "loss/logits": 0.12029796838760376, |
| "loss/reg": 8.673141479492188, |
| "loss/twn": 0.0, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.01765, |
| "grad_norm": 38.5, |
| "grad_norm_var": 3746.7052083333333, |
| "learning_rate": 0.0001, |
| "loss": 11.2052, |
| "loss/crossentropy": 1.226467490196228, |
| "loss/hidden": 1.2109375, |
| "loss/logits": 0.0947582945227623, |
| "loss/reg": 8.67300033569336, |
| "loss/twn": 0.0, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.017675, |
| "grad_norm": 44.75, |
| "grad_norm_var": 369.59765625, |
| "learning_rate": 0.0001, |
| "loss": 9.9471, |
| "loss/crossentropy": 0.2296876758337021, |
| "loss/hidden": 1.0078125, |
| "loss/logits": 0.03679632022976875, |
| "loss/reg": 8.6728515625, |
| "loss/twn": 0.0, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.0177, |
| "grad_norm": 28.25, |
| "grad_norm_var": 375.396875, |
| "learning_rate": 0.0001, |
| "loss": 11.8264, |
| "loss/crossentropy": 2.170910596847534, |
| "loss/hidden": 0.8671875, |
| "loss/logits": 0.115595743060112, |
| "loss/reg": 8.672700881958008, |
| "loss/twn": 0.0, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.017725, |
| "grad_norm": 189.0, |
| "grad_norm_var": 1810.40390625, |
| "learning_rate": 0.0001, |
| "loss": 11.1687, |
| "loss/crossentropy": 1.3513436317443848, |
| "loss/hidden": 1.0546875, |
| "loss/logits": 0.09009671956300735, |
| "loss/reg": 8.67255687713623, |
| "loss/twn": 0.0, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.01775, |
| "grad_norm": 34.0, |
| "grad_norm_var": 1811.24765625, |
| "learning_rate": 0.0001, |
| "loss": 12.1041, |
| "loss/crossentropy": 2.4291117191314697, |
| "loss/hidden": 0.87890625, |
| "loss/logits": 0.12367990612983704, |
| "loss/reg": 8.672411918640137, |
| "loss/twn": 0.0, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.017775, |
| "grad_norm": 85.0, |
| "grad_norm_var": 1865.2854166666666, |
| "learning_rate": 0.0001, |
| "loss": 11.5099, |
| "loss/crossentropy": 1.4996607303619385, |
| "loss/hidden": 1.2734375, |
| "loss/logits": 0.06449121981859207, |
| "loss/reg": 8.672269821166992, |
| "loss/twn": 0.0, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.0178, |
| "grad_norm": 24.5, |
| "grad_norm_var": 1872.7455729166666, |
| "learning_rate": 0.0001, |
| "loss": 12.0987, |
| "loss/crossentropy": 2.494858980178833, |
| "loss/hidden": 0.796875, |
| "loss/logits": 0.13483405113220215, |
| "loss/reg": 8.672136306762695, |
| "loss/twn": 0.0, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.017825, |
| "grad_norm": 27.25, |
| "grad_norm_var": 1854.5729166666667, |
| "learning_rate": 0.0001, |
| "loss": 11.6112, |
| "loss/crossentropy": 2.003628969192505, |
| "loss/hidden": 0.83203125, |
| "loss/logits": 0.10350976884365082, |
| "loss/reg": 8.671984672546387, |
| "loss/twn": 0.0, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.01785, |
| "grad_norm": 55.0, |
| "grad_norm_var": 1819.4473307291667, |
| "learning_rate": 0.0001, |
| "loss": 11.2997, |
| "loss/crossentropy": 1.4207143783569336, |
| "loss/hidden": 1.125, |
| "loss/logits": 0.08219152688980103, |
| "loss/reg": 8.671839714050293, |
| "loss/twn": 0.0, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.017875, |
| "grad_norm": 25.875, |
| "grad_norm_var": 1863.4479166666667, |
| "learning_rate": 0.0001, |
| "loss": 12.7446, |
| "loss/crossentropy": 3.069246530532837, |
| "loss/hidden": 0.8671875, |
| "loss/logits": 0.13643398880958557, |
| "loss/reg": 8.671697616577148, |
| "loss/twn": 0.0, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.0179, |
| "grad_norm": 27.25, |
| "grad_norm_var": 1864.2268229166666, |
| "learning_rate": 0.0001, |
| "loss": 11.5884, |
| "loss/crossentropy": 1.9184796810150146, |
| "loss/hidden": 0.88671875, |
| "loss/logits": 0.11163938045501709, |
| "loss/reg": 8.671571731567383, |
| "loss/twn": 0.0, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.017925, |
| "grad_norm": 124.5, |
| "grad_norm_var": 2195.3205729166666, |
| "learning_rate": 0.0001, |
| "loss": 11.3117, |
| "loss/crossentropy": 1.525429368019104, |
| "loss/hidden": 1.015625, |
| "loss/logits": 0.09922030568122864, |
| "loss/reg": 8.671445846557617, |
| "loss/twn": 0.0, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.01795, |
| "grad_norm": 36.0, |
| "grad_norm_var": 2157.668489583333, |
| "learning_rate": 0.0001, |
| "loss": 11.8638, |
| "loss/crossentropy": 1.9758306741714478, |
| "loss/hidden": 1.09375, |
| "loss/logits": 0.12289813905954361, |
| "loss/reg": 8.671307563781738, |
| "loss/twn": 0.0, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.017975, |
| "grad_norm": 29.25, |
| "grad_norm_var": 2040.5135416666667, |
| "learning_rate": 0.0001, |
| "loss": 12.6235, |
| "loss/crossentropy": 2.8443222045898438, |
| "loss/hidden": 0.95703125, |
| "loss/logits": 0.1509847193956375, |
| "loss/reg": 8.671172142028809, |
| "loss/twn": 0.0, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.018, |
| "grad_norm": 27.375, |
| "grad_norm_var": 2042.13125, |
| "learning_rate": 0.0001, |
| "loss": 12.4495, |
| "loss/crossentropy": 2.8385610580444336, |
| "loss/hidden": 0.83203125, |
| "loss/logits": 0.10786540806293488, |
| "loss/reg": 8.671030044555664, |
| "loss/twn": 0.0, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.018025, |
| "grad_norm": 24.25, |
| "grad_norm_var": 2070.14140625, |
| "learning_rate": 0.0001, |
| "loss": 12.2449, |
| "loss/crossentropy": 2.5518176555633545, |
| "loss/hidden": 0.8671875, |
| "loss/logits": 0.15501756966114044, |
| "loss/reg": 8.670918464660645, |
| "loss/twn": 0.0, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.01805, |
| "grad_norm": 130.0, |
| "grad_norm_var": 2437.28515625, |
| "learning_rate": 0.0001, |
| "loss": 11.2703, |
| "loss/crossentropy": 1.3152107000350952, |
| "loss/hidden": 1.203125, |
| "loss/logits": 0.08120039105415344, |
| "loss/reg": 8.670785903930664, |
| "loss/twn": 0.0, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.018075, |
| "grad_norm": 27.0, |
| "grad_norm_var": 2486.0052083333335, |
| "learning_rate": 0.0001, |
| "loss": 11.7594, |
| "loss/crossentropy": 2.0970349311828613, |
| "loss/hidden": 0.89453125, |
| "loss/logits": 0.09718000143766403, |
| "loss/reg": 8.670642852783203, |
| "loss/twn": 0.0, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.0181, |
| "grad_norm": 23.875, |
| "grad_norm_var": 2503.3343098958335, |
| "learning_rate": 0.0001, |
| "loss": 11.4722, |
| "loss/crossentropy": 1.724533200263977, |
| "loss/hidden": 0.98046875, |
| "loss/logits": 0.09668336063623428, |
| "loss/reg": 8.670510292053223, |
| "loss/twn": 0.0, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.018125, |
| "grad_norm": 189.0, |
| "grad_norm_var": 2503.3343098958335, |
| "learning_rate": 0.0001, |
| "loss": 12.2812, |
| "loss/crossentropy": 2.6063339710235596, |
| "loss/hidden": 0.85546875, |
| "loss/logits": 0.14899684488773346, |
| "loss/reg": 8.670373916625977, |
| "loss/twn": 0.0, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.01815, |
| "grad_norm": 36.5, |
| "grad_norm_var": 2496.5139973958335, |
| "learning_rate": 0.0001, |
| "loss": 12.2923, |
| "loss/crossentropy": 2.58054518699646, |
| "loss/hidden": 0.8984375, |
| "loss/logits": 0.1431220918893814, |
| "loss/reg": 8.670238494873047, |
| "loss/twn": 0.0, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.018175, |
| "grad_norm": 24.75, |
| "grad_norm_var": 2488.7317057291666, |
| "learning_rate": 0.0001, |
| "loss": 12.2062, |
| "loss/crossentropy": 2.6466224193573, |
| "loss/hidden": 0.76953125, |
| "loss/logits": 0.11991465091705322, |
| "loss/reg": 8.670087814331055, |
| "loss/twn": 0.0, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.0182, |
| "grad_norm": 26.875, |
| "grad_norm_var": 2480.3684895833335, |
| "learning_rate": 0.0001, |
| "loss": 12.0167, |
| "loss/crossentropy": 2.461927652359009, |
| "loss/hidden": 0.76171875, |
| "loss/logits": 0.1231193095445633, |
| "loss/reg": 8.669951438903809, |
| "loss/twn": 0.0, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.018225, |
| "grad_norm": 21.0, |
| "grad_norm_var": 2503.578125, |
| "learning_rate": 0.0001, |
| "loss": 12.1351, |
| "loss/crossentropy": 2.4456393718719482, |
| "loss/hidden": 0.87109375, |
| "loss/logits": 0.14857999980449677, |
| "loss/reg": 8.669812202453613, |
| "loss/twn": 0.0, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.01825, |
| "grad_norm": 29.75, |
| "grad_norm_var": 2532.589322916667, |
| "learning_rate": 0.0001, |
| "loss": 10.8741, |
| "loss/crossentropy": 0.9910110831260681, |
| "loss/hidden": 1.1328125, |
| "loss/logits": 0.08062282204627991, |
| "loss/reg": 8.66968059539795, |
| "loss/twn": 0.0, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.018275, |
| "grad_norm": 23.625, |
| "grad_norm_var": 2540.204166666667, |
| "learning_rate": 0.0001, |
| "loss": 10.4027, |
| "loss/crossentropy": 0.645240843296051, |
| "loss/hidden": 1.03125, |
| "loss/logits": 0.05671399086713791, |
| "loss/reg": 8.669541358947754, |
| "loss/twn": 0.0, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.0183, |
| "grad_norm": 20.125, |
| "grad_norm_var": 2565.048893229167, |
| "learning_rate": 0.0001, |
| "loss": 12.4039, |
| "loss/crossentropy": 2.713857650756836, |
| "loss/hidden": 0.87890625, |
| "loss/logits": 0.14172154664993286, |
| "loss/reg": 8.669404983520508, |
| "loss/twn": 0.0, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.018325, |
| "grad_norm": 28.375, |
| "grad_norm_var": 2182.801822916667, |
| "learning_rate": 0.0001, |
| "loss": 12.2232, |
| "loss/crossentropy": 2.4549005031585693, |
| "loss/hidden": 0.93359375, |
| "loss/logits": 0.16544505953788757, |
| "loss/reg": 8.669259071350098, |
| "loss/twn": 0.0, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.01835, |
| "grad_norm": 32.75, |
| "grad_norm_var": 2186.759375, |
| "learning_rate": 0.0001, |
| "loss": 12.3991, |
| "loss/crossentropy": 2.7525079250335693, |
| "loss/hidden": 0.84375, |
| "loss/logits": 0.1337253600358963, |
| "loss/reg": 8.66911506652832, |
| "loss/twn": 0.0, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.018375, |
| "grad_norm": 58.75, |
| "grad_norm_var": 2185.46875, |
| "learning_rate": 0.0001, |
| "loss": 10.2716, |
| "loss/crossentropy": 0.3545558452606201, |
| "loss/hidden": 1.203125, |
| "loss/logits": 0.04493255540728569, |
| "loss/reg": 8.668961524963379, |
| "loss/twn": 0.0, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.0184, |
| "grad_norm": 22.5, |
| "grad_norm_var": 2198.5728515625, |
| "learning_rate": 0.0001, |
| "loss": 10.0198, |
| "loss/crossentropy": 0.23631225526332855, |
| "loss/hidden": 1.0625, |
| "loss/logits": 0.05213317275047302, |
| "loss/reg": 8.668821334838867, |
| "loss/twn": 0.0, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.018425, |
| "grad_norm": 26.875, |
| "grad_norm_var": 2191.76015625, |
| "learning_rate": 0.0001, |
| "loss": 10.6726, |
| "loss/crossentropy": 0.8763217926025391, |
| "loss/hidden": 1.046875, |
| "loss/logits": 0.08067437261343002, |
| "loss/reg": 8.668697357177734, |
| "loss/twn": 0.0, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.01845, |
| "grad_norm": 30.0, |
| "grad_norm_var": 1684.88515625, |
| "learning_rate": 0.0001, |
| "loss": 10.9188, |
| "loss/crossentropy": 1.3276132345199585, |
| "loss/hidden": 0.85546875, |
| "loss/logits": 0.0671548917889595, |
| "loss/reg": 8.668547630310059, |
| "loss/twn": 0.0, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.018475, |
| "grad_norm": 22.75, |
| "grad_norm_var": 1692.734375, |
| "learning_rate": 0.0001, |
| "loss": 12.362, |
| "loss/crossentropy": 2.765106201171875, |
| "loss/hidden": 0.81640625, |
| "loss/logits": 0.11211474239826202, |
| "loss/reg": 8.66840934753418, |
| "loss/twn": 0.0, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.0185, |
| "grad_norm": 21.125, |
| "grad_norm_var": 1698.60390625, |
| "learning_rate": 0.0001, |
| "loss": 12.1584, |
| "loss/crossentropy": 2.5377485752105713, |
| "loss/hidden": 0.80078125, |
| "loss/logits": 0.15163663029670715, |
| "loss/reg": 8.668268203735352, |
| "loss/twn": 0.0, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.018525, |
| "grad_norm": 29.875, |
| "grad_norm_var": 86.38587239583333, |
| "learning_rate": 0.0001, |
| "loss": 10.5281, |
| "loss/crossentropy": 0.6243162751197815, |
| "loss/hidden": 1.1640625, |
| "loss/logits": 0.07158983498811722, |
| "loss/reg": 8.668137550354004, |
| "loss/twn": 0.0, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.01855, |
| "grad_norm": 33.75, |
| "grad_norm_var": 83.9166015625, |
| "learning_rate": 0.0001, |
| "loss": 11.3755, |
| "loss/crossentropy": 1.4815775156021118, |
| "loss/hidden": 1.1171875, |
| "loss/logits": 0.10873249918222427, |
| "loss/reg": 8.667993545532227, |
| "loss/twn": 0.0, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.018575, |
| "grad_norm": 143.0, |
| "grad_norm_var": 901.8124348958333, |
| "learning_rate": 0.0001, |
| "loss": 11.659, |
| "loss/crossentropy": 1.794521450996399, |
| "loss/hidden": 1.0703125, |
| "loss/logits": 0.12629684805870056, |
| "loss/reg": 8.667859077453613, |
| "loss/twn": 0.0, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.0186, |
| "grad_norm": 51.25, |
| "grad_norm_var": 910.2802083333333, |
| "learning_rate": 0.0001, |
| "loss": 11.5766, |
| "loss/crossentropy": 1.5883032083511353, |
| "loss/hidden": 1.1953125, |
| "loss/logits": 0.12525807321071625, |
| "loss/reg": 8.667725563049316, |
| "loss/twn": 0.0, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.018625, |
| "grad_norm": 21.25, |
| "grad_norm_var": 909.7434895833334, |
| "learning_rate": 0.0001, |
| "loss": 11.8824, |
| "loss/crossentropy": 2.298858880996704, |
| "loss/hidden": 0.80078125, |
| "loss/logits": 0.11518649011850357, |
| "loss/reg": 8.66757583618164, |
| "loss/twn": 0.0, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.01865, |
| "grad_norm": 247.0, |
| "grad_norm_var": 3642.79375, |
| "learning_rate": 0.0001, |
| "loss": 10.7809, |
| "loss/crossentropy": 0.9643247723579407, |
| "loss/hidden": 1.0390625, |
| "loss/logits": 0.11008217185735703, |
| "loss/reg": 8.667430877685547, |
| "loss/twn": 0.0, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.018675, |
| "grad_norm": 31.0, |
| "grad_norm_var": 3619.4587890625, |
| "learning_rate": 0.0001, |
| "loss": 11.2286, |
| "loss/crossentropy": 1.418555498123169, |
| "loss/hidden": 1.0390625, |
| "loss/logits": 0.10369899123907089, |
| "loss/reg": 8.667282104492188, |
| "loss/twn": 0.0, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.0187, |
| "grad_norm": 23.75, |
| "grad_norm_var": 3605.225, |
| "learning_rate": 0.0001, |
| "loss": 12.061, |
| "loss/crossentropy": 2.45884370803833, |
| "loss/hidden": 0.8359375, |
| "loss/logits": 0.0991104319691658, |
| "loss/reg": 8.667143821716309, |
| "loss/twn": 0.0, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.018725, |
| "grad_norm": 19.125, |
| "grad_norm_var": 3639.0934895833334, |
| "learning_rate": 0.0001, |
| "loss": 11.9897, |
| "loss/crossentropy": 2.389986515045166, |
| "loss/hidden": 0.8125, |
| "loss/logits": 0.12021613866090775, |
| "loss/reg": 8.667019844055176, |
| "loss/twn": 0.0, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.01875, |
| "grad_norm": 38.25, |
| "grad_norm_var": 3627.6580729166667, |
| "learning_rate": 0.0001, |
| "loss": 10.1606, |
| "loss/crossentropy": 0.2755975127220154, |
| "loss/hidden": 1.1640625, |
| "loss/logits": 0.05403953790664673, |
| "loss/reg": 8.666891098022461, |
| "loss/twn": 0.0, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.018775, |
| "grad_norm": 18.5, |
| "grad_norm_var": 3688.7458333333334, |
| "learning_rate": 0.0001, |
| "loss": 12.2271, |
| "loss/crossentropy": 2.6828043460845947, |
| "loss/hidden": 0.75390625, |
| "loss/logits": 0.12360608577728271, |
| "loss/reg": 8.666756629943848, |
| "loss/twn": 0.0, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.0188, |
| "grad_norm": 23.125, |
| "grad_norm_var": 3686.5827473958334, |
| "learning_rate": 0.0001, |
| "loss": 10.4855, |
| "loss/crossentropy": 0.9198982119560242, |
| "loss/hidden": 0.80078125, |
| "loss/logits": 0.09819576144218445, |
| "loss/reg": 8.66662311553955, |
| "loss/twn": 0.0, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.018825, |
| "grad_norm": 201.0, |
| "grad_norm_var": 5072.780989583333, |
| "learning_rate": 0.0001, |
| "loss": 10.2403, |
| "loss/crossentropy": 0.2677990198135376, |
| "loss/hidden": 1.265625, |
| "loss/logits": 0.04041082412004471, |
| "loss/reg": 8.666485786437988, |
| "loss/twn": 0.0, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.01885, |
| "grad_norm": 22.75, |
| "grad_norm_var": 5104.748958333334, |
| "learning_rate": 0.0001, |
| "loss": 11.9847, |
| "loss/crossentropy": 2.481069564819336, |
| "loss/hidden": 0.73828125, |
| "loss/logits": 0.09899482131004333, |
| "loss/reg": 8.666351318359375, |
| "loss/twn": 0.0, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.018875, |
| "grad_norm": 51.25, |
| "grad_norm_var": 5016.933333333333, |
| "learning_rate": 0.0001, |
| "loss": 11.6859, |
| "loss/crossentropy": 2.012446403503418, |
| "loss/hidden": 0.90625, |
| "loss/logits": 0.10103265941143036, |
| "loss/reg": 8.666210174560547, |
| "loss/twn": 0.0, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.0189, |
| "grad_norm": 157.0, |
| "grad_norm_var": 5448.4072265625, |
| "learning_rate": 0.0001, |
| "loss": 12.6693, |
| "loss/crossentropy": 2.8460886478424072, |
| "loss/hidden": 1.0078125, |
| "loss/logits": 0.149321049451828, |
| "loss/reg": 8.666069984436035, |
| "loss/twn": 0.0, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.018925, |
| "grad_norm": 99.0, |
| "grad_norm_var": 5381.910416666667, |
| "learning_rate": 0.0001, |
| "loss": 10.9154, |
| "loss/crossentropy": 1.2100906372070312, |
| "loss/hidden": 0.96875, |
| "loss/logits": 0.07061389088630676, |
| "loss/reg": 8.665928840637207, |
| "loss/twn": 0.0, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.01895, |
| "grad_norm": 31.25, |
| "grad_norm_var": 5395.655208333334, |
| "learning_rate": 0.0001, |
| "loss": 11.2015, |
| "loss/crossentropy": 1.4493834972381592, |
| "loss/hidden": 0.9921875, |
| "loss/logits": 0.09418177604675293, |
| "loss/reg": 8.665790557861328, |
| "loss/twn": 0.0, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.018975, |
| "grad_norm": 176.0, |
| "grad_norm_var": 5768.830208333334, |
| "learning_rate": 0.0001, |
| "loss": 11.2182, |
| "loss/crossentropy": 1.5739831924438477, |
| "loss/hidden": 0.87109375, |
| "loss/logits": 0.10745346546173096, |
| "loss/reg": 8.665658950805664, |
| "loss/twn": 0.0, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.019, |
| "grad_norm": 37.25, |
| "grad_norm_var": 5826.755208333333, |
| "learning_rate": 0.0001, |
| "loss": 12.2451, |
| "loss/crossentropy": 2.6453640460968018, |
| "loss/hidden": 0.80859375, |
| "loss/logits": 0.1255873739719391, |
| "loss/reg": 8.665525436401367, |
| "loss/twn": 0.0, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.019025, |
| "grad_norm": 31.75, |
| "grad_norm_var": 5758.614583333333, |
| "learning_rate": 0.0001, |
| "loss": 11.2059, |
| "loss/crossentropy": 1.4887776374816895, |
| "loss/hidden": 0.9609375, |
| "loss/logits": 0.09083248674869537, |
| "loss/reg": 8.66538143157959, |
| "loss/twn": 0.0, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.01905, |
| "grad_norm": 37.0, |
| "grad_norm_var": 3712.8645833333335, |
| "learning_rate": 0.0001, |
| "loss": 12.5074, |
| "loss/crossentropy": 2.7967333793640137, |
| "loss/hidden": 0.89453125, |
| "loss/logits": 0.1508556306362152, |
| "loss/reg": 8.665255546569824, |
| "loss/twn": 0.0, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.019075, |
| "grad_norm": 22.0, |
| "grad_norm_var": 3755.577083333333, |
| "learning_rate": 0.0001, |
| "loss": 11.2562, |
| "loss/crossentropy": 1.4816889762878418, |
| "loss/hidden": 1.03125, |
| "loss/logits": 0.0780913233757019, |
| "loss/reg": 8.665121078491211, |
| "loss/twn": 0.0, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.0191, |
| "grad_norm": 75.5, |
| "grad_norm_var": 3660.324739583333, |
| "learning_rate": 0.0001, |
| "loss": 10.8642, |
| "loss/crossentropy": 0.5276055335998535, |
| "loss/hidden": 1.6015625, |
| "loss/logits": 0.07006247341632843, |
| "loss/reg": 8.664999008178711, |
| "loss/twn": 0.0, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.019125, |
| "grad_norm": 75.0, |
| "grad_norm_var": 3513.3327473958334, |
| "learning_rate": 0.0001, |
| "loss": 11.648, |
| "loss/crossentropy": 1.5407569408416748, |
| "loss/hidden": 1.359375, |
| "loss/logits": 0.08295197784900665, |
| "loss/reg": 8.66486930847168, |
| "loss/twn": 0.0, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.01915, |
| "grad_norm": 25.875, |
| "grad_norm_var": 3572.8809895833333, |
| "learning_rate": 0.0001, |
| "loss": 12.08, |
| "loss/crossentropy": 2.4781246185302734, |
| "loss/hidden": 0.8125, |
| "loss/logits": 0.12461626529693604, |
| "loss/reg": 8.664737701416016, |
| "loss/twn": 0.0, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.019175, |
| "grad_norm": 44.75, |
| "grad_norm_var": 3443.5177083333333, |
| "learning_rate": 0.0001, |
| "loss": 10.5939, |
| "loss/crossentropy": 0.8514456152915955, |
| "loss/hidden": 1.0, |
| "loss/logits": 0.07781119644641876, |
| "loss/reg": 8.664610862731934, |
| "loss/twn": 0.0, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.0192, |
| "grad_norm": 25.625, |
| "grad_norm_var": 3428.48125, |
| "learning_rate": 0.0001, |
| "loss": 11.0077, |
| "loss/crossentropy": 1.2165791988372803, |
| "loss/hidden": 1.03125, |
| "loss/logits": 0.09540999680757523, |
| "loss/reg": 8.664481163024902, |
| "loss/twn": 0.0, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.019225, |
| "grad_norm": 27.0, |
| "grad_norm_var": 2271.38125, |
| "learning_rate": 0.0001, |
| "loss": 11.0567, |
| "loss/crossentropy": 1.4144973754882812, |
| "loss/hidden": 0.875, |
| "loss/logits": 0.10281491279602051, |
| "loss/reg": 8.6643648147583, |
| "loss/twn": 0.0, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.01925, |
| "grad_norm": 23.75, |
| "grad_norm_var": 2266.6520833333334, |
| "learning_rate": 0.0001, |
| "loss": 12.2865, |
| "loss/crossentropy": 2.509667158126831, |
| "loss/hidden": 0.95703125, |
| "loss/logits": 0.15553465485572815, |
| "loss/reg": 8.664229393005371, |
| "loss/twn": 0.0, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.019275, |
| "grad_norm": 77.0, |
| "grad_norm_var": 2282.3434895833334, |
| "learning_rate": 0.0001, |
| "loss": 12.2053, |
| "loss/crossentropy": 2.4766902923583984, |
| "loss/hidden": 0.91796875, |
| "loss/logits": 0.14648714661598206, |
| "loss/reg": 8.664107322692871, |
| "loss/twn": 0.0, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.0193, |
| "grad_norm": 31.375, |
| "grad_norm_var": 1649.9655598958334, |
| "learning_rate": 0.0001, |
| "loss": 12.1992, |
| "loss/crossentropy": 2.516207695007324, |
| "loss/hidden": 0.83984375, |
| "loss/logits": 0.17920638620853424, |
| "loss/reg": 8.663969993591309, |
| "loss/twn": 0.0, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.019325, |
| "grad_norm": 27.875, |
| "grad_norm_var": 1525.2375, |
| "learning_rate": 0.0001, |
| "loss": 11.0817, |
| "loss/crossentropy": 1.3052771091461182, |
| "loss/hidden": 1.015625, |
| "loss/logits": 0.09692220389842987, |
| "loss/reg": 8.663853645324707, |
| "loss/twn": 0.0, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.01935, |
| "grad_norm": 25.625, |
| "grad_norm_var": 1539.8244140625, |
| "learning_rate": 0.0001, |
| "loss": 12.4731, |
| "loss/crossentropy": 2.9087717533111572, |
| "loss/hidden": 0.76171875, |
| "loss/logits": 0.13890819251537323, |
| "loss/reg": 8.663716316223145, |
| "loss/twn": 0.0, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.019375, |
| "grad_norm": 28.0, |
| "grad_norm_var": 377.2535807291667, |
| "learning_rate": 0.0001, |
| "loss": 12.2618, |
| "loss/crossentropy": 2.5748989582061768, |
| "loss/hidden": 0.890625, |
| "loss/logits": 0.13267546892166138, |
| "loss/reg": 8.663591384887695, |
| "loss/twn": 0.0, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.0194, |
| "grad_norm": 33.5, |
| "grad_norm_var": 378.7379557291667, |
| "learning_rate": 0.0001, |
| "loss": 12.2313, |
| "loss/crossentropy": 2.596975088119507, |
| "loss/hidden": 0.84765625, |
| "loss/logits": 0.12325188517570496, |
| "loss/reg": 8.663460731506348, |
| "loss/twn": 0.0, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.019425, |
| "grad_norm": 61.5, |
| "grad_norm_var": 408.36399739583334, |
| "learning_rate": 0.0001, |
| "loss": 9.918, |
| "loss/crossentropy": 0.15500640869140625, |
| "loss/hidden": 1.0625, |
| "loss/logits": 0.03719252720475197, |
| "loss/reg": 8.663321495056152, |
| "loss/twn": 0.0, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.01945, |
| "grad_norm": 25.25, |
| "grad_norm_var": 421.8275390625, |
| "learning_rate": 0.0001, |
| "loss": 11.1055, |
| "loss/crossentropy": 1.5023771524429321, |
| "loss/hidden": 0.8359375, |
| "loss/logits": 0.10400588810443878, |
| "loss/reg": 8.663187026977539, |
| "loss/twn": 0.0, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.019475, |
| "grad_norm": 35.25, |
| "grad_norm_var": 402.14576822916666, |
| "learning_rate": 0.0001, |
| "loss": 11.6919, |
| "loss/crossentropy": 1.9705311059951782, |
| "loss/hidden": 0.96484375, |
| "loss/logits": 0.09343257546424866, |
| "loss/reg": 8.663052558898926, |
| "loss/twn": 0.0, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.0195, |
| "grad_norm": 26.875, |
| "grad_norm_var": 320.92682291666665, |
| "learning_rate": 0.0001, |
| "loss": 11.2901, |
| "loss/crossentropy": 1.4957298040390015, |
| "loss/hidden": 1.03125, |
| "loss/logits": 0.1002449244260788, |
| "loss/reg": 8.662919044494629, |
| "loss/twn": 0.0, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.019525, |
| "grad_norm": 43.5, |
| "grad_norm_var": 223.93307291666667, |
| "learning_rate": 0.0001, |
| "loss": 12.2481, |
| "loss/crossentropy": 2.633488178253174, |
| "loss/hidden": 0.83203125, |
| "loss/logits": 0.11971808969974518, |
| "loss/reg": 8.66281795501709, |
| "loss/twn": 0.0, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.01955, |
| "grad_norm": 64.0, |
| "grad_norm_var": 267.51868489583336, |
| "learning_rate": 0.0001, |
| "loss": 11.2361, |
| "loss/crossentropy": 1.3922841548919678, |
| "loss/hidden": 1.1171875, |
| "loss/logits": 0.06395399570465088, |
| "loss/reg": 8.662681579589844, |
| "loss/twn": 0.0, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.019575, |
| "grad_norm": 28.375, |
| "grad_norm_var": 268.5677083333333, |
| "learning_rate": 0.0001, |
| "loss": 12.51, |
| "loss/crossentropy": 2.8421671390533447, |
| "loss/hidden": 0.83984375, |
| "loss/logits": 0.16546382009983063, |
| "loss/reg": 8.662555694580078, |
| "loss/twn": 0.0, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.0196, |
| "grad_norm": 26.5, |
| "grad_norm_var": 267.3431640625, |
| "learning_rate": 0.0001, |
| "loss": 12.1781, |
| "loss/crossentropy": 2.5610969066619873, |
| "loss/hidden": 0.80078125, |
| "loss/logits": 0.15380142629146576, |
| "loss/reg": 8.662429809570312, |
| "loss/twn": 0.0, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.019625, |
| "grad_norm": 29.375, |
| "grad_norm_var": 264.66015625, |
| "learning_rate": 0.0001, |
| "loss": 12.1905, |
| "loss/crossentropy": 2.6605942249298096, |
| "loss/hidden": 0.7578125, |
| "loss/logits": 0.10981670767068863, |
| "loss/reg": 8.662302017211914, |
| "loss/twn": 0.0, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.01965, |
| "grad_norm": 22.625, |
| "grad_norm_var": 266.6869140625, |
| "learning_rate": 0.0001, |
| "loss": 11.2971, |
| "loss/crossentropy": 1.577839970588684, |
| "loss/hidden": 0.94921875, |
| "loss/logits": 0.10788953304290771, |
| "loss/reg": 8.662187576293945, |
| "loss/twn": 0.0, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.019675, |
| "grad_norm": 26.875, |
| "grad_norm_var": 154.140625, |
| "learning_rate": 0.0001, |
| "loss": 11.621, |
| "loss/crossentropy": 1.9370038509368896, |
| "loss/hidden": 0.90625, |
| "loss/logits": 0.11572004854679108, |
| "loss/reg": 8.66205883026123, |
| "loss/twn": 0.0, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.0197, |
| "grad_norm": 28.625, |
| "grad_norm_var": 155.40390625, |
| "learning_rate": 0.0001, |
| "loss": 11.1422, |
| "loss/crossentropy": 1.413337230682373, |
| "loss/hidden": 0.98046875, |
| "loss/logits": 0.08641524612903595, |
| "loss/reg": 8.66193962097168, |
| "loss/twn": 0.0, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.019725, |
| "grad_norm": 19.875, |
| "grad_norm_var": 165.25390625, |
| "learning_rate": 0.0001, |
| "loss": 12.0959, |
| "loss/crossentropy": 2.6208856105804443, |
| "loss/hidden": 0.70703125, |
| "loss/logits": 0.10612137615680695, |
| "loss/reg": 8.66181468963623, |
| "loss/twn": 0.0, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.01975, |
| "grad_norm": 21.625, |
| "grad_norm_var": 170.11223958333332, |
| "learning_rate": 0.0001, |
| "loss": 10.99, |
| "loss/crossentropy": 1.3227888345718384, |
| "loss/hidden": 0.91796875, |
| "loss/logits": 0.08751359581947327, |
| "loss/reg": 8.6616792678833, |
| "loss/twn": 0.0, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.019775, |
| "grad_norm": 33.5, |
| "grad_norm_var": 168.62265625, |
| "learning_rate": 0.0001, |
| "loss": 12.1324, |
| "loss/crossentropy": 2.5565528869628906, |
| "loss/hidden": 0.78515625, |
| "loss/logits": 0.12910045683383942, |
| "loss/reg": 8.661576271057129, |
| "loss/twn": 0.0, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.0198, |
| "grad_norm": 23.375, |
| "grad_norm_var": 174.2916015625, |
| "learning_rate": 0.0001, |
| "loss": 11.7962, |
| "loss/crossentropy": 2.149275779724121, |
| "loss/hidden": 0.89453125, |
| "loss/logits": 0.09090349823236465, |
| "loss/reg": 8.661446571350098, |
| "loss/twn": 0.0, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.019825, |
| "grad_norm": 42.5, |
| "grad_norm_var": 122.9322265625, |
| "learning_rate": 0.0001, |
| "loss": 11.8434, |
| "loss/crossentropy": 1.995090126991272, |
| "loss/hidden": 1.0703125, |
| "loss/logits": 0.11663573980331421, |
| "loss/reg": 8.6613187789917, |
| "loss/twn": 0.0, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.01985, |
| "grad_norm": 30.875, |
| "grad_norm_var": 120.49765625, |
| "learning_rate": 0.0001, |
| "loss": 10.6136, |
| "loss/crossentropy": 0.7898469567298889, |
| "loss/hidden": 1.0859375, |
| "loss/logits": 0.07663634419441223, |
| "loss/reg": 8.661187171936035, |
| "loss/twn": 0.0, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.019875, |
| "grad_norm": 22.125, |
| "grad_norm_var": 124.6744140625, |
| "learning_rate": 0.0001, |
| "loss": 11.7256, |
| "loss/crossentropy": 1.9823567867279053, |
| "loss/hidden": 0.94921875, |
| "loss/logits": 0.1329897791147232, |
| "loss/reg": 8.661052703857422, |
| "loss/twn": 0.0, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.0199, |
| "grad_norm": 40.75, |
| "grad_norm_var": 129.696875, |
| "learning_rate": 0.0001, |
| "loss": 10.0208, |
| "loss/crossentropy": 0.24984504282474518, |
| "loss/hidden": 1.0546875, |
| "loss/logits": 0.055374886840581894, |
| "loss/reg": 8.660916328430176, |
| "loss/twn": 0.0, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.019925, |
| "grad_norm": 35.75, |
| "grad_norm_var": 121.08307291666667, |
| "learning_rate": 0.0001, |
| "loss": 10.6401, |
| "loss/crossentropy": 0.8663193583488464, |
| "loss/hidden": 1.046875, |
| "loss/logits": 0.06612465530633926, |
| "loss/reg": 8.660785675048828, |
| "loss/twn": 0.0, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.01995, |
| "grad_norm": 25.0, |
| "grad_norm_var": 44.78932291666667, |
| "learning_rate": 0.0001, |
| "loss": 11.7144, |
| "loss/crossentropy": 2.014570713043213, |
| "loss/hidden": 0.9296875, |
| "loss/logits": 0.10949444770812988, |
| "loss/reg": 8.660661697387695, |
| "loss/twn": 0.0, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.019975, |
| "grad_norm": 30.0, |
| "grad_norm_var": 44.90358072916667, |
| "learning_rate": 0.0001, |
| "loss": 11.7202, |
| "loss/crossentropy": 2.0093212127685547, |
| "loss/hidden": 0.93359375, |
| "loss/logits": 0.11674754321575165, |
| "loss/reg": 8.660538673400879, |
| "loss/twn": 0.0, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 21.75, |
| "grad_norm_var": 47.71399739583333, |
| "learning_rate": 0.0001, |
| "loss": 12.0337, |
| "loss/crossentropy": 2.3967394828796387, |
| "loss/hidden": 0.85546875, |
| "loss/logits": 0.12106085568666458, |
| "loss/reg": 8.660429000854492, |
| "loss/twn": 0.0, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.020025, |
| "grad_norm": 26.625, |
| "grad_norm_var": 47.834309895833336, |
| "learning_rate": 0.0001, |
| "loss": 11.6457, |
| "loss/crossentropy": 2.150693655014038, |
| "loss/hidden": 0.671875, |
| "loss/logits": 0.1628551334142685, |
| "loss/reg": 8.660301208496094, |
| "loss/twn": 0.0, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.02005, |
| "grad_norm": 34.5, |
| "grad_norm_var": 47.75390625, |
| "learning_rate": 0.0001, |
| "loss": 12.2337, |
| "loss/crossentropy": 2.6707558631896973, |
| "loss/hidden": 0.796875, |
| "loss/logits": 0.10593793541193008, |
| "loss/reg": 8.660172462463379, |
| "loss/twn": 0.0, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.020075, |
| "grad_norm": 37.5, |
| "grad_norm_var": 51.8212890625, |
| "learning_rate": 0.0001, |
| "loss": 10.5413, |
| "loss/crossentropy": 0.5667499899864197, |
| "loss/hidden": 1.28125, |
| "loss/logits": 0.033292025327682495, |
| "loss/reg": 8.660040855407715, |
| "loss/twn": 0.0, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.0201, |
| "grad_norm": 20.625, |
| "grad_norm_var": 56.91295572916667, |
| "learning_rate": 0.0001, |
| "loss": 11.8216, |
| "loss/crossentropy": 2.313816547393799, |
| "loss/hidden": 0.75, |
| "loss/logits": 0.09791187942028046, |
| "loss/reg": 8.659912109375, |
| "loss/twn": 0.0, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.020125, |
| "grad_norm": 49.5, |
| "grad_norm_var": 75.13541666666667, |
| "learning_rate": 0.0001, |
| "loss": 11.521, |
| "loss/crossentropy": 1.678953766822815, |
| "loss/hidden": 1.0703125, |
| "loss/logits": 0.11190509796142578, |
| "loss/reg": 8.659784317016602, |
| "loss/twn": 0.0, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.02015, |
| "grad_norm": 24.375, |
| "grad_norm_var": 72.17057291666667, |
| "learning_rate": 0.0001, |
| "loss": 11.9863, |
| "loss/crossentropy": 2.3805222511291504, |
| "loss/hidden": 0.8203125, |
| "loss/logits": 0.12578201293945312, |
| "loss/reg": 8.659658432006836, |
| "loss/twn": 0.0, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.020175, |
| "grad_norm": 37.25, |
| "grad_norm_var": 74.21354166666667, |
| "learning_rate": 0.0001, |
| "loss": 11.041, |
| "loss/crossentropy": 1.3859362602233887, |
| "loss/hidden": 0.8984375, |
| "loss/logits": 0.09702669084072113, |
| "loss/reg": 8.65955638885498, |
| "loss/twn": 0.0, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.0202, |
| "grad_norm": 25.0, |
| "grad_norm_var": 72.6384765625, |
| "learning_rate": 0.0001, |
| "loss": 11.454, |
| "loss/crossentropy": 1.6415588855743408, |
| "loss/hidden": 1.0390625, |
| "loss/logits": 0.11392367631196976, |
| "loss/reg": 8.659427642822266, |
| "loss/twn": 0.0, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.020225, |
| "grad_norm": 21.625, |
| "grad_norm_var": 69.27890625, |
| "learning_rate": 0.0001, |
| "loss": 11.9644, |
| "loss/crossentropy": 2.414095401763916, |
| "loss/hidden": 0.79296875, |
| "loss/logits": 0.09807682782411575, |
| "loss/reg": 8.659307479858398, |
| "loss/twn": 0.0, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.02025, |
| "grad_norm": 26.75, |
| "grad_norm_var": 69.9728515625, |
| "learning_rate": 0.0001, |
| "loss": 11.5712, |
| "loss/crossentropy": 1.924621343612671, |
| "loss/hidden": 0.8984375, |
| "loss/logits": 0.08892874419689178, |
| "loss/reg": 8.659195899963379, |
| "loss/twn": 0.0, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.020275, |
| "grad_norm": 33.0, |
| "grad_norm_var": 66.025, |
| "learning_rate": 0.0001, |
| "loss": 11.6807, |
| "loss/crossentropy": 1.8394595384597778, |
| "loss/hidden": 0.98828125, |
| "loss/logits": 0.19389422237873077, |
| "loss/reg": 8.659070014953613, |
| "loss/twn": 0.0, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.0203, |
| "grad_norm": 35.0, |
| "grad_norm_var": 60.32890625, |
| "learning_rate": 0.0001, |
| "loss": 12.1981, |
| "loss/crossentropy": 2.5572285652160645, |
| "loss/hidden": 0.8515625, |
| "loss/logits": 0.13041183352470398, |
| "loss/reg": 8.658943176269531, |
| "loss/twn": 0.0, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.020325, |
| "grad_norm": 26.125, |
| "grad_norm_var": 59.0806640625, |
| "learning_rate": 0.0001, |
| "loss": 12.4174, |
| "loss/crossentropy": 2.8456881046295166, |
| "loss/hidden": 0.78515625, |
| "loss/logits": 0.12778276205062866, |
| "loss/reg": 8.658808708190918, |
| "loss/twn": 0.0, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.02035, |
| "grad_norm": 86.5, |
| "grad_norm_var": 257.2259765625, |
| "learning_rate": 0.0001, |
| "loss": 11.5127, |
| "loss/crossentropy": 1.339324712753296, |
| "loss/hidden": 1.453125, |
| "loss/logits": 0.061573199927806854, |
| "loss/reg": 8.658677101135254, |
| "loss/twn": 0.0, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.020375, |
| "grad_norm": 24.25, |
| "grad_norm_var": 261.98170572916666, |
| "learning_rate": 0.0001, |
| "loss": 12.2015, |
| "loss/crossentropy": 2.644878387451172, |
| "loss/hidden": 0.7890625, |
| "loss/logits": 0.10904749482870102, |
| "loss/reg": 8.658546447753906, |
| "loss/twn": 0.0, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.0204, |
| "grad_norm": 39.0, |
| "grad_norm_var": 254.36295572916666, |
| "learning_rate": 0.0001, |
| "loss": 10.748, |
| "loss/crossentropy": 0.7973951101303101, |
| "loss/hidden": 1.2109375, |
| "loss/logits": 0.08126410096883774, |
| "loss/reg": 8.658411979675293, |
| "loss/twn": 0.0, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.020425, |
| "grad_norm": 21.25, |
| "grad_norm_var": 261.61640625, |
| "learning_rate": 0.0001, |
| "loss": 12.5606, |
| "loss/crossentropy": 2.883641481399536, |
| "loss/hidden": 0.87109375, |
| "loss/logits": 0.14752310514450073, |
| "loss/reg": 8.65831184387207, |
| "loss/twn": 0.0, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.02045, |
| "grad_norm": 20.625, |
| "grad_norm_var": 272.5212890625, |
| "learning_rate": 0.0001, |
| "loss": 12.1072, |
| "loss/crossentropy": 2.474083185195923, |
| "loss/hidden": 0.84765625, |
| "loss/logits": 0.1272907555103302, |
| "loss/reg": 8.658188819885254, |
| "loss/twn": 0.0, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.020475, |
| "grad_norm": 20.625, |
| "grad_norm_var": 280.246875, |
| "learning_rate": 0.0001, |
| "loss": 12.3651, |
| "loss/crossentropy": 2.7124438285827637, |
| "loss/hidden": 0.84765625, |
| "loss/logits": 0.14690741896629333, |
| "loss/reg": 8.658062934875488, |
| "loss/twn": 0.0, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.0205, |
| "grad_norm": 24.125, |
| "grad_norm_var": 275.71875, |
| "learning_rate": 0.0001, |
| "loss": 11.5551, |
| "loss/crossentropy": 1.8066730499267578, |
| "loss/hidden": 0.98828125, |
| "loss/logits": 0.10224303603172302, |
| "loss/reg": 8.657933235168457, |
| "loss/twn": 0.0, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.020525, |
| "grad_norm": 21.375, |
| "grad_norm_var": 260.2353515625, |
| "learning_rate": 0.0001, |
| "loss": 12.3622, |
| "loss/crossentropy": 2.7731680870056152, |
| "loss/hidden": 0.80859375, |
| "loss/logits": 0.12260089814662933, |
| "loss/reg": 8.657793998718262, |
| "loss/twn": 0.0, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.02055, |
| "grad_norm": 20.5, |
| "grad_norm_var": 264.3020833333333, |
| "learning_rate": 0.0001, |
| "loss": 11.1854, |
| "loss/crossentropy": 1.5472619533538818, |
| "loss/hidden": 0.88671875, |
| "loss/logits": 0.09370426088571548, |
| "loss/reg": 8.65766716003418, |
| "loss/twn": 0.0, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.020575, |
| "grad_norm": 20.25, |
| "grad_norm_var": 266.35625, |
| "learning_rate": 0.0001, |
| "loss": 12.0371, |
| "loss/crossentropy": 2.4873194694519043, |
| "loss/hidden": 0.78515625, |
| "loss/logits": 0.10710816085338593, |
| "loss/reg": 8.657526016235352, |
| "loss/twn": 0.0, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.0206, |
| "grad_norm": 21.0, |
| "grad_norm_var": 269.55625, |
| "learning_rate": 0.0001, |
| "loss": 11.8331, |
| "loss/crossentropy": 2.1790499687194824, |
| "loss/hidden": 0.86328125, |
| "loss/logits": 0.13340815901756287, |
| "loss/reg": 8.657398223876953, |
| "loss/twn": 0.0, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.020625, |
| "grad_norm": 17.125, |
| "grad_norm_var": 275.171875, |
| "learning_rate": 0.0001, |
| "loss": 11.8571, |
| "loss/crossentropy": 2.425957679748535, |
| "loss/hidden": 0.68359375, |
| "loss/logits": 0.09029695391654968, |
| "loss/reg": 8.657269477844238, |
| "loss/twn": 0.0, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.02065, |
| "grad_norm": 25.875, |
| "grad_norm_var": 275.43483072916666, |
| "learning_rate": 0.0001, |
| "loss": 11.8535, |
| "loss/crossentropy": 2.0549063682556152, |
| "loss/hidden": 1.0, |
| "loss/logits": 0.1414046585559845, |
| "loss/reg": 8.657147407531738, |
| "loss/twn": 0.0, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.020675, |
| "grad_norm": 28.5, |
| "grad_norm_var": 274.0238932291667, |
| "learning_rate": 0.0001, |
| "loss": 11.5906, |
| "loss/crossentropy": 1.8805147409439087, |
| "loss/hidden": 0.95703125, |
| "loss/logits": 0.09598313271999359, |
| "loss/reg": 8.657021522521973, |
| "loss/twn": 0.0, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.0207, |
| "grad_norm": 78.0, |
| "grad_norm_var": 428.2416015625, |
| "learning_rate": 0.0001, |
| "loss": 11.3108, |
| "loss/crossentropy": 1.494771957397461, |
| "loss/hidden": 1.09375, |
| "loss/logits": 0.06535143405199051, |
| "loss/reg": 8.656889915466309, |
| "loss/twn": 0.0, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.020725, |
| "grad_norm": 24.625, |
| "grad_norm_var": 429.3462890625, |
| "learning_rate": 0.0001, |
| "loss": 11.0708, |
| "loss/crossentropy": 1.4614907503128052, |
| "loss/hidden": 0.87109375, |
| "loss/logits": 0.08147741109132767, |
| "loss/reg": 8.656755447387695, |
| "loss/twn": 0.0, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.02075, |
| "grad_norm": 39.25, |
| "grad_norm_var": 218.2962890625, |
| "learning_rate": 0.0001, |
| "loss": 11.1135, |
| "loss/crossentropy": 1.3715949058532715, |
| "loss/hidden": 0.97265625, |
| "loss/logits": 0.1126452386379242, |
| "loss/reg": 8.656620979309082, |
| "loss/twn": 0.0, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.020775, |
| "grad_norm": 73.0, |
| "grad_norm_var": 343.1166015625, |
| "learning_rate": 0.0001, |
| "loss": 12.1343, |
| "loss/crossentropy": 2.5513038635253906, |
| "loss/hidden": 0.8125, |
| "loss/logits": 0.11400848627090454, |
| "loss/reg": 8.656479835510254, |
| "loss/twn": 0.0, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.0208, |
| "grad_norm": 45.25, |
| "grad_norm_var": 352.27024739583334, |
| "learning_rate": 0.0001, |
| "loss": 11.4679, |
| "loss/crossentropy": 1.6210163831710815, |
| "loss/hidden": 1.0703125, |
| "loss/logits": 0.12018288671970367, |
| "loss/reg": 8.656363487243652, |
| "loss/twn": 0.0, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.020825, |
| "grad_norm": 36.25, |
| "grad_norm_var": 346.16087239583334, |
| "learning_rate": 0.0001, |
| "loss": 12.1308, |
| "loss/crossentropy": 2.536836624145508, |
| "loss/hidden": 0.8125, |
| "loss/logits": 0.1252354383468628, |
| "loss/reg": 8.656213760375977, |
| "loss/twn": 0.0, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.02085, |
| "grad_norm": 24.625, |
| "grad_norm_var": 340.94837239583336, |
| "learning_rate": 0.0001, |
| "loss": 12.2044, |
| "loss/crossentropy": 2.492039918899536, |
| "loss/hidden": 0.90625, |
| "loss/logits": 0.15000879764556885, |
| "loss/reg": 8.656072616577148, |
| "loss/twn": 0.0, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.020875, |
| "grad_norm": 20.5, |
| "grad_norm_var": 341.14765625, |
| "learning_rate": 0.0001, |
| "loss": 12.575, |
| "loss/crossentropy": 2.9490809440612793, |
| "loss/hidden": 0.828125, |
| "loss/logits": 0.14187867939472198, |
| "loss/reg": 8.6559476852417, |
| "loss/twn": 0.0, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.0209, |
| "grad_norm": 20.625, |
| "grad_norm_var": 345.82890625, |
| "learning_rate": 0.0001, |
| "loss": 11.937, |
| "loss/crossentropy": 2.459505796432495, |
| "loss/hidden": 0.71484375, |
| "loss/logits": 0.10687898099422455, |
| "loss/reg": 8.655818939208984, |
| "loss/twn": 0.0, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.020925, |
| "grad_norm": 20.375, |
| "grad_norm_var": 347.34765625, |
| "learning_rate": 0.0001, |
| "loss": 11.5187, |
| "loss/crossentropy": 1.8534458875656128, |
| "loss/hidden": 0.91796875, |
| "loss/logits": 0.09165486693382263, |
| "loss/reg": 8.655677795410156, |
| "loss/twn": 0.0, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.02095, |
| "grad_norm": 19.875, |
| "grad_norm_var": 348.3499348958333, |
| "learning_rate": 0.0001, |
| "loss": 12.1346, |
| "loss/crossentropy": 2.554069995880127, |
| "loss/hidden": 0.8046875, |
| "loss/logits": 0.12025219202041626, |
| "loss/reg": 8.65554428100586, |
| "loss/twn": 0.0, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.020975, |
| "grad_norm": 20.375, |
| "grad_norm_var": 348.1518229166667, |
| "learning_rate": 0.0001, |
| "loss": 11.7181, |
| "loss/crossentropy": 1.9642025232315063, |
| "loss/hidden": 0.99609375, |
| "loss/logits": 0.10242354869842529, |
| "loss/reg": 8.655411720275879, |
| "loss/twn": 0.0, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.021, |
| "grad_norm": 27.5, |
| "grad_norm_var": 341.08307291666665, |
| "learning_rate": 0.0001, |
| "loss": 12.4234, |
| "loss/crossentropy": 2.7069919109344482, |
| "loss/hidden": 0.9140625, |
| "loss/logits": 0.14706647396087646, |
| "loss/reg": 8.655269622802734, |
| "loss/twn": 0.0, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.021025, |
| "grad_norm": 30.75, |
| "grad_norm_var": 324.5556640625, |
| "learning_rate": 0.0001, |
| "loss": 11.3863, |
| "loss/crossentropy": 1.5859886407852173, |
| "loss/hidden": 1.015625, |
| "loss/logits": 0.12951719760894775, |
| "loss/reg": 8.655142784118652, |
| "loss/twn": 0.0, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.02105, |
| "grad_norm": 71.0, |
| "grad_norm_var": 406.1802083333333, |
| "learning_rate": 0.0001, |
| "loss": 12.3537, |
| "loss/crossentropy": 2.761152505874634, |
| "loss/hidden": 0.8125, |
| "loss/logits": 0.12504678964614868, |
| "loss/reg": 8.655021667480469, |
| "loss/twn": 0.0, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.021075, |
| "grad_norm": 24.5, |
| "grad_norm_var": 411.33020833333336, |
| "learning_rate": 0.0001, |
| "loss": 11.9629, |
| "loss/crossentropy": 2.4555206298828125, |
| "loss/hidden": 0.7421875, |
| "loss/logits": 0.11026586592197418, |
| "loss/reg": 8.65488052368164, |
| "loss/twn": 0.0, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.0211, |
| "grad_norm": 22.0, |
| "grad_norm_var": 293.9635416666667, |
| "learning_rate": 0.0001, |
| "loss": 12.0296, |
| "loss/crossentropy": 2.3854191303253174, |
| "loss/hidden": 0.8671875, |
| "loss/logits": 0.1222752183675766, |
| "loss/reg": 8.654757499694824, |
| "loss/twn": 0.0, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.021125, |
| "grad_norm": 21.0, |
| "grad_norm_var": 298.60618489583334, |
| "learning_rate": 0.0001, |
| "loss": 11.9103, |
| "loss/crossentropy": 2.3430182933807373, |
| "loss/hidden": 0.79296875, |
| "loss/logits": 0.11969567090272903, |
| "loss/reg": 8.654621124267578, |
| "loss/twn": 0.0, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.02115, |
| "grad_norm": 65.0, |
| "grad_norm_var": 363.8931640625, |
| "learning_rate": 0.0001, |
| "loss": 10.7782, |
| "loss/crossentropy": 0.8066241145133972, |
| "loss/hidden": 1.2421875, |
| "loss/logits": 0.07485105097293854, |
| "loss/reg": 8.654495239257812, |
| "loss/twn": 0.0, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.021175, |
| "grad_norm": 23.875, |
| "grad_norm_var": 258.709375, |
| "learning_rate": 0.0001, |
| "loss": 11.961, |
| "loss/crossentropy": 2.3761634826660156, |
| "loss/hidden": 0.80859375, |
| "loss/logits": 0.12191573530435562, |
| "loss/reg": 8.654356956481934, |
| "loss/twn": 0.0, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.0212, |
| "grad_norm": 27.25, |
| "grad_norm_var": 244.384375, |
| "learning_rate": 0.0001, |
| "loss": 11.1372, |
| "loss/crossentropy": 1.5516514778137207, |
| "loss/hidden": 0.8359375, |
| "loss/logits": 0.09539226442575455, |
| "loss/reg": 8.654253959655762, |
| "loss/twn": 0.0, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.021225, |
| "grad_norm": 22.25, |
| "grad_norm_var": 244.44270833333334, |
| "learning_rate": 0.0001, |
| "loss": 12.1022, |
| "loss/crossentropy": 2.4282212257385254, |
| "loss/hidden": 0.88671875, |
| "loss/logits": 0.1331661194562912, |
| "loss/reg": 8.654115676879883, |
| "loss/twn": 0.0, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.02125, |
| "grad_norm": 20.5, |
| "grad_norm_var": 247.82649739583334, |
| "learning_rate": 0.0001, |
| "loss": 11.4654, |
| "loss/crossentropy": 1.9304391145706177, |
| "loss/hidden": 0.79296875, |
| "loss/logits": 0.08799178898334503, |
| "loss/reg": 8.653984069824219, |
| "loss/twn": 0.0, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.021275, |
| "grad_norm": 40.0, |
| "grad_norm_var": 250.56868489583334, |
| "learning_rate": 0.0001, |
| "loss": 11.9588, |
| "loss/crossentropy": 2.324852466583252, |
| "loss/hidden": 0.83984375, |
| "loss/logits": 0.14020408689975739, |
| "loss/reg": 8.653852462768555, |
| "loss/twn": 0.0, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.0213, |
| "grad_norm": 21.25, |
| "grad_norm_var": 249.828125, |
| "learning_rate": 0.0001, |
| "loss": 10.971, |
| "loss/crossentropy": 1.2864391803741455, |
| "loss/hidden": 0.92578125, |
| "loss/logits": 0.10502032935619354, |
| "loss/reg": 8.65371036529541, |
| "loss/twn": 0.0, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.021325, |
| "grad_norm": 23.125, |
| "grad_norm_var": 246.82890625, |
| "learning_rate": 0.0001, |
| "loss": 11.7116, |
| "loss/crossentropy": 2.2378087043762207, |
| "loss/hidden": 0.7109375, |
| "loss/logits": 0.10925191640853882, |
| "loss/reg": 8.653560638427734, |
| "loss/twn": 0.0, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.02135, |
| "grad_norm": 20.75, |
| "grad_norm_var": 245.69368489583334, |
| "learning_rate": 0.0001, |
| "loss": 11.1165, |
| "loss/crossentropy": 1.5270025730133057, |
| "loss/hidden": 0.84375, |
| "loss/logits": 0.09233575314283371, |
| "loss/reg": 8.653440475463867, |
| "loss/twn": 0.0, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.021375, |
| "grad_norm": 48.5, |
| "grad_norm_var": 258.7747395833333, |
| "learning_rate": 0.0001, |
| "loss": 11.1515, |
| "loss/crossentropy": 1.4334684610366821, |
| "loss/hidden": 0.93359375, |
| "loss/logits": 0.13111481070518494, |
| "loss/reg": 8.653310775756836, |
| "loss/twn": 0.0, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.0214, |
| "grad_norm": 26.125, |
| "grad_norm_var": 259.68639322916664, |
| "learning_rate": 0.0001, |
| "loss": 11.769, |
| "loss/crossentropy": 2.0678930282592773, |
| "loss/hidden": 0.90625, |
| "loss/logits": 0.14165261387825012, |
| "loss/reg": 8.65316390991211, |
| "loss/twn": 0.0, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.021425, |
| "grad_norm": 86.0, |
| "grad_norm_var": 443.1624348958333, |
| "learning_rate": 0.0001, |
| "loss": 11.507, |
| "loss/crossentropy": 1.89520263671875, |
| "loss/hidden": 0.83984375, |
| "loss/logits": 0.11894860863685608, |
| "loss/reg": 8.653019905090332, |
| "loss/twn": 0.0, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.02145, |
| "grad_norm": 27.75, |
| "grad_norm_var": 353.59889322916666, |
| "learning_rate": 0.0001, |
| "loss": 11.3279, |
| "loss/crossentropy": 1.63710355758667, |
| "loss/hidden": 0.93359375, |
| "loss/logits": 0.10432222485542297, |
| "loss/reg": 8.652884483337402, |
| "loss/twn": 0.0, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.021475, |
| "grad_norm": 26.0, |
| "grad_norm_var": 352.1410807291667, |
| "learning_rate": 0.0001, |
| "loss": 10.9987, |
| "loss/crossentropy": 1.349096655845642, |
| "loss/hidden": 0.90625, |
| "loss/logits": 0.0906340628862381, |
| "loss/reg": 8.65274429321289, |
| "loss/twn": 0.0, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.0215, |
| "grad_norm": 47.25, |
| "grad_norm_var": 356.3494140625, |
| "learning_rate": 0.0001, |
| "loss": 10.5167, |
| "loss/crossentropy": 0.6532329320907593, |
| "loss/hidden": 1.125, |
| "loss/logits": 0.08589871972799301, |
| "loss/reg": 8.65259838104248, |
| "loss/twn": 0.0, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.021525, |
| "grad_norm": 102.5, |
| "grad_norm_var": 628.4405598958333, |
| "learning_rate": 0.0001, |
| "loss": 11.374, |
| "loss/crossentropy": 1.3636287450790405, |
| "loss/hidden": 1.296875, |
| "loss/logits": 0.060998160392045975, |
| "loss/reg": 8.65249252319336, |
| "loss/twn": 0.0, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.02155, |
| "grad_norm": 47.25, |
| "grad_norm_var": 587.2087890625, |
| "learning_rate": 0.0001, |
| "loss": 12.3978, |
| "loss/crossentropy": 2.6217453479766846, |
| "loss/hidden": 0.984375, |
| "loss/logits": 0.13936936855316162, |
| "loss/reg": 8.652359962463379, |
| "loss/twn": 0.0, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.021575, |
| "grad_norm": 37.5, |
| "grad_norm_var": 572.88125, |
| "learning_rate": 0.0001, |
| "loss": 12.5873, |
| "loss/crossentropy": 2.912539482116699, |
| "loss/hidden": 0.875, |
| "loss/logits": 0.14757685363292694, |
| "loss/reg": 8.6522216796875, |
| "loss/twn": 0.0, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.0216, |
| "grad_norm": 64.5, |
| "grad_norm_var": 601.2455729166667, |
| "learning_rate": 0.0001, |
| "loss": 12.1316, |
| "loss/crossentropy": 2.4345555305480957, |
| "loss/hidden": 0.92578125, |
| "loss/logits": 0.11921804398298264, |
| "loss/reg": 8.652088165283203, |
| "loss/twn": 0.0, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.021625, |
| "grad_norm": 34.5, |
| "grad_norm_var": 579.4635416666666, |
| "learning_rate": 0.0001, |
| "loss": 10.9197, |
| "loss/crossentropy": 1.0762388706207275, |
| "loss/hidden": 1.015625, |
| "loss/logits": 0.17591643333435059, |
| "loss/reg": 8.651962280273438, |
| "loss/twn": 0.0, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.02165, |
| "grad_norm": 30.0, |
| "grad_norm_var": 557.7520833333333, |
| "learning_rate": 0.0001, |
| "loss": 11.1881, |
| "loss/crossentropy": 1.559984803199768, |
| "loss/hidden": 0.87890625, |
| "loss/logits": 0.0973486453294754, |
| "loss/reg": 8.651826858520508, |
| "loss/twn": 0.0, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.021675, |
| "grad_norm": 175.0, |
| "grad_norm_var": 1648.4395833333333, |
| "learning_rate": 0.0001, |
| "loss": 11.7371, |
| "loss/crossentropy": 1.77110755443573, |
| "loss/hidden": 1.171875, |
| "loss/logits": 0.14241228997707367, |
| "loss/reg": 8.65168571472168, |
| "loss/twn": 0.0, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.0217, |
| "grad_norm": 130.0, |
| "grad_norm_var": 1954.4122395833333, |
| "learning_rate": 0.0001, |
| "loss": 12.2883, |
| "loss/crossentropy": 2.535989761352539, |
| "loss/hidden": 0.953125, |
| "loss/logits": 0.1476578414440155, |
| "loss/reg": 8.6515474319458, |
| "loss/twn": 0.0, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.021725, |
| "grad_norm": 26.5, |
| "grad_norm_var": 1939.4655598958334, |
| "learning_rate": 0.0001, |
| "loss": 12.0157, |
| "loss/crossentropy": 2.3548765182495117, |
| "loss/hidden": 0.87890625, |
| "loss/logits": 0.13048355281352997, |
| "loss/reg": 8.651415824890137, |
| "loss/twn": 0.0, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.02175, |
| "grad_norm": 59.75, |
| "grad_norm_var": 1840.1374348958334, |
| "learning_rate": 0.0001, |
| "loss": 11.2163, |
| "loss/crossentropy": 1.3642361164093018, |
| "loss/hidden": 1.078125, |
| "loss/logits": 0.12266632169485092, |
| "loss/reg": 8.65128231048584, |
| "loss/twn": 0.0, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.021775, |
| "grad_norm": 51.0, |
| "grad_norm_var": 1836.5046223958334, |
| "learning_rate": 0.0001, |
| "loss": 12.2241, |
| "loss/crossentropy": 2.1974077224731445, |
| "loss/hidden": 1.2421875, |
| "loss/logits": 0.1333170384168625, |
| "loss/reg": 8.651155471801758, |
| "loss/twn": 0.0, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.0218, |
| "grad_norm": 23.25, |
| "grad_norm_var": 1850.28515625, |
| "learning_rate": 0.0001, |
| "loss": 11.0161, |
| "loss/crossentropy": 1.3812255859375, |
| "loss/hidden": 0.890625, |
| "loss/logits": 0.09322841465473175, |
| "loss/reg": 8.651025772094727, |
| "loss/twn": 0.0, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.021825, |
| "grad_norm": 33.25, |
| "grad_norm_var": 1845.175, |
| "learning_rate": 0.0001, |
| "loss": 12.3638, |
| "loss/crossentropy": 2.6129558086395264, |
| "loss/hidden": 0.98046875, |
| "loss/logits": 0.11948850750923157, |
| "loss/reg": 8.65089225769043, |
| "loss/twn": 0.0, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.02185, |
| "grad_norm": 33.25, |
| "grad_norm_var": 1825.4322916666667, |
| "learning_rate": 0.0001, |
| "loss": 11.2416, |
| "loss/crossentropy": 1.4775595664978027, |
| "loss/hidden": 1.0234375, |
| "loss/logits": 0.08985313773155212, |
| "loss/reg": 8.650773048400879, |
| "loss/twn": 0.0, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.021875, |
| "grad_norm": 55.75, |
| "grad_norm_var": 1755.4268229166667, |
| "learning_rate": 0.0001, |
| "loss": 12.3055, |
| "loss/crossentropy": 2.59051775932312, |
| "loss/hidden": 0.90234375, |
| "loss/logits": 0.16192252933979034, |
| "loss/reg": 8.650670051574707, |
| "loss/twn": 0.0, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.0219, |
| "grad_norm": 24.125, |
| "grad_norm_var": 1826.4759765625, |
| "learning_rate": 0.0001, |
| "loss": 12.4725, |
| "loss/crossentropy": 2.8609981536865234, |
| "loss/hidden": 0.828125, |
| "loss/logits": 0.13280364871025085, |
| "loss/reg": 8.650540351867676, |
| "loss/twn": 0.0, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.021925, |
| "grad_norm": 40.25, |
| "grad_norm_var": 1699.3822265625, |
| "learning_rate": 0.0001, |
| "loss": 11.8694, |
| "loss/crossentropy": 2.279768943786621, |
| "loss/hidden": 0.81640625, |
| "loss/logits": 0.1227678433060646, |
| "loss/reg": 8.650419235229492, |
| "loss/twn": 0.0, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.02195, |
| "grad_norm": 41.25, |
| "grad_norm_var": 1707.1259765625, |
| "learning_rate": 0.0001, |
| "loss": 11.2075, |
| "loss/crossentropy": 1.4421278238296509, |
| "loss/hidden": 1.0, |
| "loss/logits": 0.11505263298749924, |
| "loss/reg": 8.650284767150879, |
| "loss/twn": 0.0, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.021975, |
| "grad_norm": 22.5, |
| "grad_norm_var": 1753.6728515625, |
| "learning_rate": 0.0001, |
| "loss": 11.9685, |
| "loss/crossentropy": 2.3770902156829834, |
| "loss/hidden": 0.83203125, |
| "loss/logits": 0.10924512147903442, |
| "loss/reg": 8.650164604187012, |
| "loss/twn": 0.0, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.022, |
| "grad_norm": 58.75, |
| "grad_norm_var": 1746.7728515625, |
| "learning_rate": 0.0001, |
| "loss": 10.5659, |
| "loss/crossentropy": 0.7277314066886902, |
| "loss/hidden": 1.09375, |
| "loss/logits": 0.09439295530319214, |
| "loss/reg": 8.650035858154297, |
| "loss/twn": 0.0, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.022025, |
| "grad_norm": 34.5, |
| "grad_norm_var": 1746.7728515625, |
| "learning_rate": 0.0001, |
| "loss": 11.4086, |
| "loss/crossentropy": 1.7426364421844482, |
| "loss/hidden": 0.92578125, |
| "loss/logits": 0.09027338027954102, |
| "loss/reg": 8.649921417236328, |
| "loss/twn": 0.0, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.02205, |
| "grad_norm": 25.0, |
| "grad_norm_var": 1763.2988932291667, |
| "learning_rate": 0.0001, |
| "loss": 10.9653, |
| "loss/crossentropy": 1.242699384689331, |
| "loss/hidden": 0.9765625, |
| "loss/logits": 0.09623660147190094, |
| "loss/reg": 8.649791717529297, |
| "loss/twn": 0.0, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.022075, |
| "grad_norm": 21.75, |
| "grad_norm_var": 720.5603515625, |
| "learning_rate": 0.0001, |
| "loss": 12.2686, |
| "loss/crossentropy": 2.7663731575012207, |
| "loss/hidden": 0.73046875, |
| "loss/logits": 0.12204307317733765, |
| "loss/reg": 8.649681091308594, |
| "loss/twn": 0.0, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.0221, |
| "grad_norm": 42.5, |
| "grad_norm_var": 178.8806640625, |
| "learning_rate": 0.0001, |
| "loss": 10.4172, |
| "loss/crossentropy": 0.7049916386604309, |
| "loss/hidden": 1.03125, |
| "loss/logits": 0.03140311688184738, |
| "loss/reg": 8.64955997467041, |
| "loss/twn": 0.0, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.022125, |
| "grad_norm": 20.125, |
| "grad_norm_var": 190.41875, |
| "learning_rate": 0.0001, |
| "loss": 11.1314, |
| "loss/crossentropy": 1.5635149478912354, |
| "loss/hidden": 0.8203125, |
| "loss/logits": 0.09813685715198517, |
| "loss/reg": 8.649443626403809, |
| "loss/twn": 0.0, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.02215, |
| "grad_norm": 76.5, |
| "grad_norm_var": 259.46015625, |
| "learning_rate": 0.0001, |
| "loss": 11.2006, |
| "loss/crossentropy": 1.4845755100250244, |
| "loss/hidden": 0.98828125, |
| "loss/logits": 0.0783877968788147, |
| "loss/reg": 8.649328231811523, |
| "loss/twn": 0.0, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.022175, |
| "grad_norm": 28.25, |
| "grad_norm_var": 251.56875, |
| "learning_rate": 0.0001, |
| "loss": 11.1521, |
| "loss/crossentropy": 1.4424325227737427, |
| "loss/hidden": 0.953125, |
| "loss/logits": 0.10736413300037384, |
| "loss/reg": 8.649208068847656, |
| "loss/twn": 0.0, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.0222, |
| "grad_norm": 156.0, |
| "grad_norm_var": 1121.77265625, |
| "learning_rate": 0.0001, |
| "loss": 12.3203, |
| "loss/crossentropy": 2.698983907699585, |
| "loss/hidden": 0.8359375, |
| "loss/logits": 0.1363169550895691, |
| "loss/reg": 8.649099349975586, |
| "loss/twn": 0.0, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.022225, |
| "grad_norm": 21.5, |
| "grad_norm_var": 1148.1979166666667, |
| "learning_rate": 0.0001, |
| "loss": 12.1534, |
| "loss/crossentropy": 2.5979976654052734, |
| "loss/hidden": 0.78515625, |
| "loss/logits": 0.12121406197547913, |
| "loss/reg": 8.64898681640625, |
| "loss/twn": 0.0, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.02225, |
| "grad_norm": 27.125, |
| "grad_norm_var": 1159.2197265625, |
| "learning_rate": 0.0001, |
| "loss": 11.1476, |
| "loss/crossentropy": 1.480932354927063, |
| "loss/hidden": 0.91015625, |
| "loss/logits": 0.10764827579259872, |
| "loss/reg": 8.648865699768066, |
| "loss/twn": 0.0, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.022275, |
| "grad_norm": 20.75, |
| "grad_norm_var": 1178.5791015625, |
| "learning_rate": 0.0001, |
| "loss": 11.9907, |
| "loss/crossentropy": 2.447499990463257, |
| "loss/hidden": 0.76953125, |
| "loss/logits": 0.12492209672927856, |
| "loss/reg": 8.648744583129883, |
| "loss/twn": 0.0, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.0223, |
| "grad_norm": 18.25, |
| "grad_norm_var": 1194.19375, |
| "learning_rate": 0.0001, |
| "loss": 11.643, |
| "loss/crossentropy": 2.143287420272827, |
| "loss/hidden": 0.76171875, |
| "loss/logits": 0.08939171582460403, |
| "loss/reg": 8.64862060546875, |
| "loss/twn": 0.0, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.022325, |
| "grad_norm": 58.0, |
| "grad_norm_var": 1212.2580729166666, |
| "learning_rate": 0.0001, |
| "loss": 11.8774, |
| "loss/crossentropy": 2.3605241775512695, |
| "loss/hidden": 0.75, |
| "loss/logits": 0.11832257360219955, |
| "loss/reg": 8.648508071899414, |
| "loss/twn": 0.0, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.02235, |
| "grad_norm": 100.0, |
| "grad_norm_var": 1421.7385416666666, |
| "learning_rate": 0.0001, |
| "loss": 11.0223, |
| "loss/crossentropy": 1.3405512571334839, |
| "loss/hidden": 0.96484375, |
| "loss/logits": 0.068513423204422, |
| "loss/reg": 8.648395538330078, |
| "loss/twn": 0.0, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.022375, |
| "grad_norm": 55.25, |
| "grad_norm_var": 1387.38515625, |
| "learning_rate": 0.0001, |
| "loss": 11.8076, |
| "loss/crossentropy": 2.273613929748535, |
| "loss/hidden": 0.78125, |
| "loss/logits": 0.10449393093585968, |
| "loss/reg": 8.648283004760742, |
| "loss/twn": 0.0, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.0224, |
| "grad_norm": 26.625, |
| "grad_norm_var": 1404.8363932291666, |
| "learning_rate": 0.0001, |
| "loss": 10.8252, |
| "loss/crossentropy": 1.2194933891296387, |
| "loss/hidden": 0.8828125, |
| "loss/logits": 0.07468891143798828, |
| "loss/reg": 8.64816665649414, |
| "loss/twn": 0.0, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.022425, |
| "grad_norm": 91.5, |
| "grad_norm_var": 1522.3395182291667, |
| "learning_rate": 0.0001, |
| "loss": 11.5974, |
| "loss/crossentropy": 1.5322892665863037, |
| "loss/hidden": 1.3359375, |
| "loss/logits": 0.08110789954662323, |
| "loss/reg": 8.648056030273438, |
| "loss/twn": 0.0, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.02245, |
| "grad_norm": 26.0, |
| "grad_norm_var": 1519.1593098958333, |
| "learning_rate": 0.0001, |
| "loss": 11.2098, |
| "loss/crossentropy": 1.5447907447814941, |
| "loss/hidden": 0.92578125, |
| "loss/logits": 0.09123338758945465, |
| "loss/reg": 8.647948265075684, |
| "loss/twn": 0.0, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.022475, |
| "grad_norm": 23.5, |
| "grad_norm_var": 1512.9030598958334, |
| "learning_rate": 0.0001, |
| "loss": 11.1062, |
| "loss/crossentropy": 1.4178701639175415, |
| "loss/hidden": 0.953125, |
| "loss/logits": 0.08742193132638931, |
| "loss/reg": 8.647828102111816, |
| "loss/twn": 0.0, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.0225, |
| "grad_norm": 27.125, |
| "grad_norm_var": 1542.0114583333334, |
| "learning_rate": 0.0001, |
| "loss": 11.6402, |
| "loss/crossentropy": 2.2656986713409424, |
| "loss/hidden": 0.62890625, |
| "loss/logits": 0.0979294702410698, |
| "loss/reg": 8.647710800170898, |
| "loss/twn": 0.0, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.022525, |
| "grad_norm": 26.125, |
| "grad_norm_var": 1521.5364583333333, |
| "learning_rate": 0.0001, |
| "loss": 12.4201, |
| "loss/crossentropy": 2.691460609436035, |
| "loss/hidden": 0.94140625, |
| "loss/logits": 0.1395898461341858, |
| "loss/reg": 8.647601127624512, |
| "loss/twn": 0.0, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.02255, |
| "grad_norm": 17.875, |
| "grad_norm_var": 1520.6509765625, |
| "learning_rate": 0.0001, |
| "loss": 11.8873, |
| "loss/crossentropy": 2.37650203704834, |
| "loss/hidden": 0.734375, |
| "loss/logits": 0.12885737419128418, |
| "loss/reg": 8.647518157958984, |
| "loss/twn": 0.0, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.022575, |
| "grad_norm": 22.5, |
| "grad_norm_var": 1535.7447265625, |
| "learning_rate": 0.0001, |
| "loss": 11.1421, |
| "loss/crossentropy": 1.4534761905670166, |
| "loss/hidden": 0.9140625, |
| "loss/logits": 0.1271265596151352, |
| "loss/reg": 8.647392272949219, |
| "loss/twn": 0.0, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.0226, |
| "grad_norm": 70.5, |
| "grad_norm_var": 725.8994140625, |
| "learning_rate": 0.0001, |
| "loss": 12.3574, |
| "loss/crossentropy": 2.8316938877105713, |
| "loss/hidden": 0.765625, |
| "loss/logits": 0.11279100179672241, |
| "loss/reg": 8.647268295288086, |
| "loss/twn": 0.0, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.022625, |
| "grad_norm": 50.75, |
| "grad_norm_var": 709.0197265625, |
| "learning_rate": 0.0001, |
| "loss": 12.088, |
| "loss/crossentropy": 2.5325286388397217, |
| "loss/hidden": 0.7890625, |
| "loss/logits": 0.1192476898431778, |
| "loss/reg": 8.647161483764648, |
| "loss/twn": 0.0, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.02265, |
| "grad_norm": 31.875, |
| "grad_norm_var": 701.4098307291666, |
| "learning_rate": 0.0001, |
| "loss": 11.7597, |
| "loss/crossentropy": 1.9708436727523804, |
| "loss/hidden": 1.03125, |
| "loss/logits": 0.11054424941539764, |
| "loss/reg": 8.647050857543945, |
| "loss/twn": 0.0, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.022675, |
| "grad_norm": 36.25, |
| "grad_norm_var": 673.2030598958333, |
| "learning_rate": 0.0001, |
| "loss": 10.1723, |
| "loss/crossentropy": 0.33124569058418274, |
| "loss/hidden": 1.1171875, |
| "loss/logits": 0.07697142660617828, |
| "loss/reg": 8.64693832397461, |
| "loss/twn": 0.0, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.0227, |
| "grad_norm": 24.75, |
| "grad_norm_var": 654.7119140625, |
| "learning_rate": 0.0001, |
| "loss": 10.9677, |
| "loss/crossentropy": 1.3110615015029907, |
| "loss/hidden": 0.92578125, |
| "loss/logits": 0.08404570817947388, |
| "loss/reg": 8.646824836730957, |
| "loss/twn": 0.0, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.022725, |
| "grad_norm": 50.25, |
| "grad_norm_var": 643.0061848958334, |
| "learning_rate": 0.0001, |
| "loss": 12.0202, |
| "loss/crossentropy": 2.425607442855835, |
| "loss/hidden": 0.8046875, |
| "loss/logits": 0.14318381249904633, |
| "loss/reg": 8.646716117858887, |
| "loss/twn": 0.0, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.02275, |
| "grad_norm": 25.0, |
| "grad_norm_var": 420.1155598958333, |
| "learning_rate": 0.0001, |
| "loss": 11.6331, |
| "loss/crossentropy": 1.9562517404556274, |
| "loss/hidden": 0.9140625, |
| "loss/logits": 0.11617865413427353, |
| "loss/reg": 8.64660358428955, |
| "loss/twn": 0.0, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.022775, |
| "grad_norm": 38.25, |
| "grad_norm_var": 398.7770182291667, |
| "learning_rate": 0.0001, |
| "loss": 11.0166, |
| "loss/crossentropy": 1.2556953430175781, |
| "loss/hidden": 1.015625, |
| "loss/logits": 0.09881006181240082, |
| "loss/reg": 8.646495819091797, |
| "loss/twn": 0.0, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.0228, |
| "grad_norm": 30.875, |
| "grad_norm_var": 394.13743489583334, |
| "learning_rate": 0.0001, |
| "loss": 11.3651, |
| "loss/crossentropy": 1.6619752645492554, |
| "loss/hidden": 0.95703125, |
| "loss/logits": 0.09971839189529419, |
| "loss/reg": 8.646382331848145, |
| "loss/twn": 0.0, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.022825, |
| "grad_norm": 28.375, |
| "grad_norm_var": 185.06875, |
| "learning_rate": 0.0001, |
| "loss": 12.7538, |
| "loss/crossentropy": 3.1295979022979736, |
| "loss/hidden": 0.85546875, |
| "loss/logits": 0.12245632708072662, |
| "loss/reg": 8.64626693725586, |
| "loss/twn": 0.0, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.02285, |
| "grad_norm": 20.375, |
| "grad_norm_var": 192.3900390625, |
| "learning_rate": 0.0001, |
| "loss": 10.5745, |
| "loss/crossentropy": 1.0429837703704834, |
| "loss/hidden": 0.80859375, |
| "loss/logits": 0.07681307941675186, |
| "loss/reg": 8.646145820617676, |
| "loss/twn": 0.0, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.022875, |
| "grad_norm": 26.0, |
| "grad_norm_var": 189.68951822916668, |
| "learning_rate": 0.0001, |
| "loss": 11.7933, |
| "loss/crossentropy": 2.2527108192443848, |
| "loss/hidden": 0.76953125, |
| "loss/logits": 0.12507414817810059, |
| "loss/reg": 8.646031379699707, |
| "loss/twn": 0.0, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.0229, |
| "grad_norm": 32.5, |
| "grad_norm_var": 187.33515625, |
| "learning_rate": 0.0001, |
| "loss": 12.0279, |
| "loss/crossentropy": 2.4536893367767334, |
| "loss/hidden": 0.7890625, |
| "loss/logits": 0.13920220732688904, |
| "loss/reg": 8.645927429199219, |
| "loss/twn": 0.0, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.022925, |
| "grad_norm": 20.25, |
| "grad_norm_var": 195.08587239583332, |
| "learning_rate": 0.0001, |
| "loss": 12.1268, |
| "loss/crossentropy": 2.606616258621216, |
| "loss/hidden": 0.76953125, |
| "loss/logits": 0.10481689870357513, |
| "loss/reg": 8.6458158493042, |
| "loss/twn": 0.0, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.02295, |
| "grad_norm": 31.625, |
| "grad_norm_var": 179.35930989583332, |
| "learning_rate": 0.0001, |
| "loss": 11.1762, |
| "loss/crossentropy": 1.5746790170669556, |
| "loss/hidden": 0.87109375, |
| "loss/logits": 0.08476681262254715, |
| "loss/reg": 8.64570140838623, |
| "loss/twn": 0.0, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.022975, |
| "grad_norm": 19.75, |
| "grad_norm_var": 183.95983072916667, |
| "learning_rate": 0.0001, |
| "loss": 11.0627, |
| "loss/crossentropy": 1.4162150621414185, |
| "loss/hidden": 0.921875, |
| "loss/logits": 0.0790150836110115, |
| "loss/reg": 8.64559555053711, |
| "loss/twn": 0.0, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.023, |
| "grad_norm": 22.0, |
| "grad_norm_var": 92.26451822916667, |
| "learning_rate": 0.0001, |
| "loss": 12.0987, |
| "loss/crossentropy": 2.6432836055755615, |
| "loss/hidden": 0.69140625, |
| "loss/logits": 0.11851957440376282, |
| "loss/reg": 8.64548397064209, |
| "loss/twn": 0.0, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.023025, |
| "grad_norm": 31.75, |
| "grad_norm_var": 63.66555989583333, |
| "learning_rate": 0.0001, |
| "loss": 10.4844, |
| "loss/crossentropy": 0.7586137652397156, |
| "loss/hidden": 1.0078125, |
| "loss/logits": 0.07264447957277298, |
| "loss/reg": 8.645378112792969, |
| "loss/twn": 0.0, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.02305, |
| "grad_norm": 20.0, |
| "grad_norm_var": 68.50833333333334, |
| "learning_rate": 0.0001, |
| "loss": 11.8472, |
| "loss/crossentropy": 2.349719524383545, |
| "loss/hidden": 0.73046875, |
| "loss/logits": 0.12170122563838959, |
| "loss/reg": 8.645292282104492, |
| "loss/twn": 0.0, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.023075, |
| "grad_norm": 29.625, |
| "grad_norm_var": 64.51608072916666, |
| "learning_rate": 0.0001, |
| "loss": 11.1953, |
| "loss/crossentropy": 1.421155333518982, |
| "loss/hidden": 1.015625, |
| "loss/logits": 0.11335611343383789, |
| "loss/reg": 8.645182609558105, |
| "loss/twn": 0.0, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.0231, |
| "grad_norm": 79.0, |
| "grad_norm_var": 223.42337239583333, |
| "learning_rate": 0.0001, |
| "loss": 11.4426, |
| "loss/crossentropy": 1.488852620124817, |
| "loss/hidden": 1.0703125, |
| "loss/logits": 0.23840782046318054, |
| "loss/reg": 8.645071983337402, |
| "loss/twn": 0.0, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.023125, |
| "grad_norm": 227.0, |
| "grad_norm_var": 2615.4400390625, |
| "learning_rate": 0.0001, |
| "loss": 12.0388, |
| "loss/crossentropy": 2.4332802295684814, |
| "loss/hidden": 0.81640625, |
| "loss/logits": 0.1441601812839508, |
| "loss/reg": 8.644950866699219, |
| "loss/twn": 0.0, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.02315, |
| "grad_norm": 57.5, |
| "grad_norm_var": 2604.9791015625, |
| "learning_rate": 0.0001, |
| "loss": 11.3147, |
| "loss/crossentropy": 1.5451112985610962, |
| "loss/hidden": 1.0234375, |
| "loss/logits": 0.10130450874567032, |
| "loss/reg": 8.6448392868042, |
| "loss/twn": 0.0, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.023175, |
| "grad_norm": 30.875, |
| "grad_norm_var": 2614.7010416666667, |
| "learning_rate": 0.0001, |
| "loss": 12.2042, |
| "loss/crossentropy": 2.5670032501220703, |
| "loss/hidden": 0.8671875, |
| "loss/logits": 0.1252429485321045, |
| "loss/reg": 8.644716262817383, |
| "loss/twn": 0.0, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.0232, |
| "grad_norm": 21.5, |
| "grad_norm_var": 2636.8738932291667, |
| "learning_rate": 0.0001, |
| "loss": 11.9817, |
| "loss/crossentropy": 2.4307641983032227, |
| "loss/hidden": 0.7890625, |
| "loss/logits": 0.11731122434139252, |
| "loss/reg": 8.644594192504883, |
| "loss/twn": 0.0, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.023225, |
| "grad_norm": 65.0, |
| "grad_norm_var": 2646.2018229166665, |
| "learning_rate": 0.0001, |
| "loss": 10.5594, |
| "loss/crossentropy": 0.8123658299446106, |
| "loss/hidden": 1.0078125, |
| "loss/logits": 0.09476789832115173, |
| "loss/reg": 8.6444730758667, |
| "loss/twn": 0.0, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.02325, |
| "grad_norm": 24.125, |
| "grad_norm_var": 2634.3072916666665, |
| "learning_rate": 0.0001, |
| "loss": 12.4071, |
| "loss/crossentropy": 2.7781920433044434, |
| "loss/hidden": 0.83203125, |
| "loss/logits": 0.1525574028491974, |
| "loss/reg": 8.644346237182617, |
| "loss/twn": 0.0, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.023275, |
| "grad_norm": 32.0, |
| "grad_norm_var": 2620.4322916666665, |
| "learning_rate": 0.0001, |
| "loss": 12.0064, |
| "loss/crossentropy": 2.4318840503692627, |
| "loss/hidden": 0.80859375, |
| "loss/logits": 0.12169232964515686, |
| "loss/reg": 8.644229888916016, |
| "loss/twn": 0.0, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.0233, |
| "grad_norm": 29.625, |
| "grad_norm_var": 2626.3275390625, |
| "learning_rate": 0.0001, |
| "loss": 10.8905, |
| "loss/crossentropy": 1.1798232793807983, |
| "loss/hidden": 0.984375, |
| "loss/logits": 0.08223216980695724, |
| "loss/reg": 8.644112586975098, |
| "loss/twn": 0.0, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.023325, |
| "grad_norm": 29.875, |
| "grad_norm_var": 2598.620572916667, |
| "learning_rate": 0.0001, |
| "loss": 12.066, |
| "loss/crossentropy": 2.453744888305664, |
| "loss/hidden": 0.84375, |
| "loss/logits": 0.124543696641922, |
| "loss/reg": 8.643993377685547, |
| "loss/twn": 0.0, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.02335, |
| "grad_norm": 81.5, |
| "grad_norm_var": 2652.158268229167, |
| "learning_rate": 0.0001, |
| "loss": 11.8816, |
| "loss/crossentropy": 2.010385274887085, |
| "loss/hidden": 1.109375, |
| "loss/logits": 0.11792879551649094, |
| "loss/reg": 8.643871307373047, |
| "loss/twn": 0.0, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.023375, |
| "grad_norm": 66.0, |
| "grad_norm_var": 2598.8744140625, |
| "learning_rate": 0.0001, |
| "loss": 11.3964, |
| "loss/crossentropy": 1.359242558479309, |
| "loss/hidden": 1.328125, |
| "loss/logits": 0.06525681912899017, |
| "loss/reg": 8.64375114440918, |
| "loss/twn": 0.0, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.0234, |
| "grad_norm": 26.75, |
| "grad_norm_var": 2580.6759765625, |
| "learning_rate": 0.0001, |
| "loss": 10.2786, |
| "loss/crossentropy": 0.5326244831085205, |
| "loss/hidden": 0.9921875, |
| "loss/logits": 0.11017867922782898, |
| "loss/reg": 8.643632888793945, |
| "loss/twn": 0.0, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.023425, |
| "grad_norm": 28.375, |
| "grad_norm_var": 2591.06640625, |
| "learning_rate": 0.0001, |
| "loss": 11.9216, |
| "loss/crossentropy": 2.2760961055755615, |
| "loss/hidden": 0.8828125, |
| "loss/logits": 0.11911870539188385, |
| "loss/reg": 8.643540382385254, |
| "loss/twn": 0.0, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.02345, |
| "grad_norm": 48.5, |
| "grad_norm_var": 2516.25390625, |
| "learning_rate": 0.0001, |
| "loss": 10.7898, |
| "loss/crossentropy": 1.1267657279968262, |
| "loss/hidden": 0.94921875, |
| "loss/logits": 0.07034754008054733, |
| "loss/reg": 8.643426895141602, |
| "loss/twn": 0.0, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.023475, |
| "grad_norm": 27.375, |
| "grad_norm_var": 2524.13125, |
| "learning_rate": 0.0001, |
| "loss": 11.2508, |
| "loss/crossentropy": 1.69661283493042, |
| "loss/hidden": 0.83203125, |
| "loss/logits": 0.07888500392436981, |
| "loss/reg": 8.64331340789795, |
| "loss/twn": 0.0, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.0235, |
| "grad_norm": 30.5, |
| "grad_norm_var": 2513.9260416666666, |
| "learning_rate": 0.0001, |
| "loss": 11.1866, |
| "loss/crossentropy": 1.456539511680603, |
| "loss/hidden": 0.9921875, |
| "loss/logits": 0.09464754909276962, |
| "loss/reg": 8.643203735351562, |
| "loss/twn": 0.0, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.023525, |
| "grad_norm": 62.25, |
| "grad_norm_var": 358.6184895833333, |
| "learning_rate": 0.0001, |
| "loss": 11.4512, |
| "loss/crossentropy": 1.4735151529312134, |
| "loss/hidden": 1.2734375, |
| "loss/logits": 0.061179954558610916, |
| "loss/reg": 8.64309024810791, |
| "loss/twn": 0.0, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.02355, |
| "grad_norm": 26.5, |
| "grad_norm_var": 351.96640625, |
| "learning_rate": 0.0001, |
| "loss": 12.1573, |
| "loss/crossentropy": 2.5865166187286377, |
| "loss/hidden": 0.79296875, |
| "loss/logits": 0.13481010496616364, |
| "loss/reg": 8.642961502075195, |
| "loss/twn": 0.0, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.023575, |
| "grad_norm": 23.75, |
| "grad_norm_var": 363.2587890625, |
| "learning_rate": 0.0001, |
| "loss": 11.0244, |
| "loss/crossentropy": 1.3015955686569214, |
| "loss/hidden": 0.9921875, |
| "loss/logits": 0.08772861957550049, |
| "loss/reg": 8.642860412597656, |
| "loss/twn": 0.0, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.0236, |
| "grad_norm": 24.375, |
| "grad_norm_var": 357.07604166666664, |
| "learning_rate": 0.0001, |
| "loss": 10.3601, |
| "loss/crossentropy": 0.6473336815834045, |
| "loss/hidden": 1.0078125, |
| "loss/logits": 0.06215362995862961, |
| "loss/reg": 8.642752647399902, |
| "loss/twn": 0.0, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.023625, |
| "grad_norm": 25.125, |
| "grad_norm_var": 319.0494140625, |
| "learning_rate": 0.0001, |
| "loss": 12.6172, |
| "loss/crossentropy": 2.994354009628296, |
| "loss/hidden": 0.84765625, |
| "loss/logits": 0.132510706782341, |
| "loss/reg": 8.642648696899414, |
| "loss/twn": 0.0, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.02365, |
| "grad_norm": 22.375, |
| "grad_norm_var": 322.1666015625, |
| "learning_rate": 0.0001, |
| "loss": 12.2172, |
| "loss/crossentropy": 2.499817132949829, |
| "loss/hidden": 0.9296875, |
| "loss/logits": 0.14518602192401886, |
| "loss/reg": 8.64252758026123, |
| "loss/twn": 0.0, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.023675, |
| "grad_norm": 17.875, |
| "grad_norm_var": 343.2143229166667, |
| "learning_rate": 0.0001, |
| "loss": 12.0342, |
| "loss/crossentropy": 2.475522041320801, |
| "loss/hidden": 0.796875, |
| "loss/logits": 0.11936713755130768, |
| "loss/reg": 8.642420768737793, |
| "loss/twn": 0.0, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.0237, |
| "grad_norm": 73.0, |
| "grad_norm_var": 425.83014322916665, |
| "learning_rate": 0.0001, |
| "loss": 11.1549, |
| "loss/crossentropy": 1.4982513189315796, |
| "loss/hidden": 0.94140625, |
| "loss/logits": 0.07289159297943115, |
| "loss/reg": 8.64231014251709, |
| "loss/twn": 0.0, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.023725, |
| "grad_norm": 53.25, |
| "grad_norm_var": 433.4635416666667, |
| "learning_rate": 0.0001, |
| "loss": 11.9696, |
| "loss/crossentropy": 1.539908766746521, |
| "loss/hidden": 1.7109375, |
| "loss/logits": 0.07653222233057022, |
| "loss/reg": 8.642194747924805, |
| "loss/twn": 0.0, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.02375, |
| "grad_norm": 23.75, |
| "grad_norm_var": 321.1518229166667, |
| "learning_rate": 0.0001, |
| "loss": 11.0968, |
| "loss/crossentropy": 1.4083565473556519, |
| "loss/hidden": 0.95703125, |
| "loss/logits": 0.08931989967823029, |
| "loss/reg": 8.642075538635254, |
| "loss/twn": 0.0, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.023775, |
| "grad_norm": 38.0, |
| "grad_norm_var": 259.0268229166667, |
| "learning_rate": 0.0001, |
| "loss": 11.8587, |
| "loss/crossentropy": 2.4400861263275146, |
| "loss/hidden": 0.6796875, |
| "loss/logits": 0.09696350246667862, |
| "loss/reg": 8.641968727111816, |
| "loss/twn": 0.0, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.0238, |
| "grad_norm": 23.25, |
| "grad_norm_var": 263.4018229166667, |
| "learning_rate": 0.0001, |
| "loss": 11.6612, |
| "loss/crossentropy": 1.9363447427749634, |
| "loss/hidden": 0.9609375, |
| "loss/logits": 0.12208056449890137, |
| "loss/reg": 8.641850471496582, |
| "loss/twn": 0.0, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.023825, |
| "grad_norm": 46.5, |
| "grad_norm_var": 269.69837239583336, |
| "learning_rate": 0.0001, |
| "loss": 11.8514, |
| "loss/crossentropy": 2.2403106689453125, |
| "loss/hidden": 0.83203125, |
| "loss/logits": 0.1373005509376526, |
| "loss/reg": 8.64173412322998, |
| "loss/twn": 0.0, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.02385, |
| "grad_norm": 21.875, |
| "grad_norm_var": 267.4934895833333, |
| "learning_rate": 0.0001, |
| "loss": 10.824, |
| "loss/crossentropy": 1.2617324590682983, |
| "loss/hidden": 0.84765625, |
| "loss/logits": 0.07296106219291687, |
| "loss/reg": 8.641622543334961, |
| "loss/twn": 0.0, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.023875, |
| "grad_norm": 115.0, |
| "grad_norm_var": 673.0785807291667, |
| "learning_rate": 0.0001, |
| "loss": 11.6683, |
| "loss/crossentropy": 1.96914803981781, |
| "loss/hidden": 0.9609375, |
| "loss/logits": 0.0966673493385315, |
| "loss/reg": 8.641509056091309, |
| "loss/twn": 0.0, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.0239, |
| "grad_norm": 29.125, |
| "grad_norm_var": 674.79375, |
| "learning_rate": 0.0001, |
| "loss": 10.9845, |
| "loss/crossentropy": 1.251932144165039, |
| "loss/hidden": 1.0078125, |
| "loss/logits": 0.08337213099002838, |
| "loss/reg": 8.641397476196289, |
| "loss/twn": 0.0, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.023925, |
| "grad_norm": 28.125, |
| "grad_norm_var": 642.3572265625, |
| "learning_rate": 0.0001, |
| "loss": 10.3408, |
| "loss/crossentropy": 0.7025061249732971, |
| "loss/hidden": 0.9375, |
| "loss/logits": 0.059452176094055176, |
| "loss/reg": 8.641303062438965, |
| "loss/twn": 0.0, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.02395, |
| "grad_norm": 65.5, |
| "grad_norm_var": 682.8603515625, |
| "learning_rate": 0.0001, |
| "loss": 12.2295, |
| "loss/crossentropy": 2.679429292678833, |
| "loss/hidden": 0.78515625, |
| "loss/logits": 0.12372276186943054, |
| "loss/reg": 8.641189575195312, |
| "loss/twn": 0.0, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.023975, |
| "grad_norm": 71.5, |
| "grad_norm_var": 725.5369140625, |
| "learning_rate": 0.0001, |
| "loss": 10.6519, |
| "loss/crossentropy": 0.8681978583335876, |
| "loss/hidden": 1.0703125, |
| "loss/logits": 0.07227082550525665, |
| "loss/reg": 8.641075134277344, |
| "loss/twn": 0.0, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.024, |
| "grad_norm": 26.25, |
| "grad_norm_var": 721.246875, |
| "learning_rate": 0.0001, |
| "loss": 12.5001, |
| "loss/crossentropy": 2.7277987003326416, |
| "loss/hidden": 0.94921875, |
| "loss/logits": 0.18216562271118164, |
| "loss/reg": 8.640965461730957, |
| "loss/twn": 0.0, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.024025, |
| "grad_norm": 117.5, |
| "grad_norm_var": 1040.1811848958334, |
| "learning_rate": 0.0001, |
| "loss": 11.163, |
| "loss/crossentropy": 1.2693028450012207, |
| "loss/hidden": 1.1875, |
| "loss/logits": 0.06537793576717377, |
| "loss/reg": 8.640851020812988, |
| "loss/twn": 0.0, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.02405, |
| "grad_norm": 40.5, |
| "grad_norm_var": 998.05, |
| "learning_rate": 0.0001, |
| "loss": 11.1269, |
| "loss/crossentropy": 1.1471500396728516, |
| "loss/hidden": 1.21875, |
| "loss/logits": 0.12030482292175293, |
| "loss/reg": 8.640732765197754, |
| "loss/twn": 0.0, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.024075, |
| "grad_norm": 37.0, |
| "grad_norm_var": 940.4259765625, |
| "learning_rate": 0.0001, |
| "loss": 11.1483, |
| "loss/crossentropy": 1.3914599418640137, |
| "loss/hidden": 1.046875, |
| "loss/logits": 0.06936652958393097, |
| "loss/reg": 8.640617370605469, |
| "loss/twn": 0.0, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.0241, |
| "grad_norm": 27.125, |
| "grad_norm_var": 935.1455729166667, |
| "learning_rate": 0.0001, |
| "loss": 11.3491, |
| "loss/crossentropy": 1.5005178451538086, |
| "loss/hidden": 1.0625, |
| "loss/logits": 0.14557373523712158, |
| "loss/reg": 8.640515327453613, |
| "loss/twn": 0.0, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.024125, |
| "grad_norm": 30.375, |
| "grad_norm_var": 951.1223307291667, |
| "learning_rate": 0.0001, |
| "loss": 10.0736, |
| "loss/crossentropy": 0.2538512349128723, |
| "loss/hidden": 1.109375, |
| "loss/logits": 0.06992866843938828, |
| "loss/reg": 8.640402793884277, |
| "loss/twn": 0.0, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.02415, |
| "grad_norm": 61.75, |
| "grad_norm_var": 926.9369140625, |
| "learning_rate": 0.0001, |
| "loss": 11.6475, |
| "loss/crossentropy": 2.0097732543945312, |
| "loss/hidden": 0.89453125, |
| "loss/logits": 0.10287867486476898, |
| "loss/reg": 8.64028549194336, |
| "loss/twn": 0.0, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.024175, |
| "grad_norm": 22.5, |
| "grad_norm_var": 964.0884765625, |
| "learning_rate": 0.0001, |
| "loss": 12.0763, |
| "loss/crossentropy": 2.494885206222534, |
| "loss/hidden": 0.80078125, |
| "loss/logits": 0.14041492342948914, |
| "loss/reg": 8.640175819396973, |
| "loss/twn": 0.0, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.0242, |
| "grad_norm": 23.625, |
| "grad_norm_var": 962.87265625, |
| "learning_rate": 0.0001, |
| "loss": 11.5983, |
| "loss/crossentropy": 2.0036957263946533, |
| "loss/hidden": 0.84375, |
| "loss/logits": 0.1107679232954979, |
| "loss/reg": 8.640052795410156, |
| "loss/twn": 0.0, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.024225, |
| "grad_norm": 50.5, |
| "grad_norm_var": 963.19765625, |
| "learning_rate": 0.0001, |
| "loss": 10.53, |
| "loss/crossentropy": 0.7671348452568054, |
| "loss/hidden": 1.046875, |
| "loss/logits": 0.0760107934474945, |
| "loss/reg": 8.639945030212402, |
| "loss/twn": 0.0, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.02425, |
| "grad_norm": 49.0, |
| "grad_norm_var": 914.6410807291667, |
| "learning_rate": 0.0001, |
| "loss": 11.0574, |
| "loss/crossentropy": 1.3990331888198853, |
| "loss/hidden": 0.9609375, |
| "loss/logits": 0.057638462632894516, |
| "loss/reg": 8.639840126037598, |
| "loss/twn": 0.0, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.024275, |
| "grad_norm": 31.125, |
| "grad_norm_var": 624.1802083333333, |
| "learning_rate": 0.0001, |
| "loss": 10.4489, |
| "loss/crossentropy": 0.7169491648674011, |
| "loss/hidden": 1.015625, |
| "loss/logits": 0.07662199437618256, |
| "loss/reg": 8.639738082885742, |
| "loss/twn": 0.0, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.0243, |
| "grad_norm": 22.75, |
| "grad_norm_var": 639.7624348958333, |
| "learning_rate": 0.0001, |
| "loss": 11.1814, |
| "loss/crossentropy": 1.4119840860366821, |
| "loss/hidden": 1.046875, |
| "loss/logits": 0.08287733048200607, |
| "loss/reg": 8.639626502990723, |
| "loss/twn": 0.0, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.024325, |
| "grad_norm": 33.0, |
| "grad_norm_var": 630.8833333333333, |
| "learning_rate": 0.0001, |
| "loss": 11.1838, |
| "loss/crossentropy": 1.5832897424697876, |
| "loss/hidden": 0.87890625, |
| "loss/logits": 0.08213374018669128, |
| "loss/reg": 8.639517784118652, |
| "loss/twn": 0.0, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.02435, |
| "grad_norm": 28.5, |
| "grad_norm_var": 612.2291666666666, |
| "learning_rate": 0.0001, |
| "loss": 10.4802, |
| "loss/crossentropy": 0.82733553647995, |
| "loss/hidden": 0.92578125, |
| "loss/logits": 0.08771144598722458, |
| "loss/reg": 8.639411926269531, |
| "loss/twn": 0.0, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.024375, |
| "grad_norm": 29.75, |
| "grad_norm_var": 557.3018229166667, |
| "learning_rate": 0.0001, |
| "loss": 12.3732, |
| "loss/crossentropy": 2.7465858459472656, |
| "loss/hidden": 0.8828125, |
| "loss/logits": 0.1044565737247467, |
| "loss/reg": 8.63934326171875, |
| "loss/twn": 0.0, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.0244, |
| "grad_norm": 34.75, |
| "grad_norm_var": 546.85390625, |
| "learning_rate": 0.0001, |
| "loss": 10.7326, |
| "loss/crossentropy": 0.8541914820671082, |
| "loss/hidden": 1.1875, |
| "loss/logits": 0.05169408768415451, |
| "loss/reg": 8.639236450195312, |
| "loss/twn": 0.0, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.024425, |
| "grad_norm": 37.25, |
| "grad_norm_var": 119.940625, |
| "learning_rate": 0.0001, |
| "loss": 11.8748, |
| "loss/crossentropy": 1.9437530040740967, |
| "loss/hidden": 1.1875, |
| "loss/logits": 0.10437147319316864, |
| "loss/reg": 8.639132499694824, |
| "loss/twn": 0.0, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.02445, |
| "grad_norm": 27.75, |
| "grad_norm_var": 120.69765625, |
| "learning_rate": 0.0001, |
| "loss": 11.9273, |
| "loss/crossentropy": 2.32950496673584, |
| "loss/hidden": 0.83984375, |
| "loss/logits": 0.11892136931419373, |
| "loss/reg": 8.639030456542969, |
| "loss/twn": 0.0, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.024475, |
| "grad_norm": 21.875, |
| "grad_norm_var": 129.29212239583333, |
| "learning_rate": 0.0001, |
| "loss": 12.3453, |
| "loss/crossentropy": 2.770594596862793, |
| "loss/hidden": 0.796875, |
| "loss/logits": 0.13894122838974, |
| "loss/reg": 8.638928413391113, |
| "loss/twn": 0.0, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.0245, |
| "grad_norm": 47.0, |
| "grad_norm_var": 137.81145833333332, |
| "learning_rate": 0.0001, |
| "loss": 10.1597, |
| "loss/crossentropy": 0.3839435577392578, |
| "loss/hidden": 1.046875, |
| "loss/logits": 0.09004060924053192, |
| "loss/reg": 8.63883113861084, |
| "loss/twn": 0.0, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.024525, |
| "grad_norm": 32.75, |
| "grad_norm_var": 136.86764322916667, |
| "learning_rate": 0.0001, |
| "loss": 12.0792, |
| "loss/crossentropy": 2.524247646331787, |
| "loss/hidden": 0.79296875, |
| "loss/logits": 0.12327229976654053, |
| "loss/reg": 8.638716697692871, |
| "loss/twn": 0.0, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.02455, |
| "grad_norm": 45.0, |
| "grad_norm_var": 93.80618489583334, |
| "learning_rate": 0.0001, |
| "loss": 12.2423, |
| "loss/crossentropy": 2.493361234664917, |
| "loss/hidden": 0.96484375, |
| "loss/logits": 0.14550219476222992, |
| "loss/reg": 8.638609886169434, |
| "loss/twn": 0.0, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.024575, |
| "grad_norm": 29.75, |
| "grad_norm_var": 86.3900390625, |
| "learning_rate": 0.0001, |
| "loss": 11.2087, |
| "loss/crossentropy": 1.4887306690216064, |
| "loss/hidden": 0.9765625, |
| "loss/logits": 0.10494685918092728, |
| "loss/reg": 8.638496398925781, |
| "loss/twn": 0.0, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.0246, |
| "grad_norm": 25.75, |
| "grad_norm_var": 83.72604166666666, |
| "learning_rate": 0.0001, |
| "loss": 11.712, |
| "loss/crossentropy": 1.9325333833694458, |
| "loss/hidden": 1.015625, |
| "loss/logits": 0.12546265125274658, |
| "loss/reg": 8.638388633728027, |
| "loss/twn": 0.0, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.024625, |
| "grad_norm": 57.5, |
| "grad_norm_var": 102.04270833333334, |
| "learning_rate": 0.0001, |
| "loss": 12.0371, |
| "loss/crossentropy": 2.3968796730041504, |
| "loss/hidden": 0.8671875, |
| "loss/logits": 0.13475921750068665, |
| "loss/reg": 8.638289451599121, |
| "loss/twn": 0.0, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.02465, |
| "grad_norm": 37.0, |
| "grad_norm_var": 87.99270833333334, |
| "learning_rate": 0.0001, |
| "loss": 12.1267, |
| "loss/crossentropy": 2.5277347564697266, |
| "loss/hidden": 0.84765625, |
| "loss/logits": 0.11315064132213593, |
| "loss/reg": 8.63818645477295, |
| "loss/twn": 0.0, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.024675, |
| "grad_norm": 111.0, |
| "grad_norm_var": 457.78899739583335, |
| "learning_rate": 0.0001, |
| "loss": 11.89, |
| "loss/crossentropy": 2.256617307662964, |
| "loss/hidden": 0.86328125, |
| "loss/logits": 0.13200129568576813, |
| "loss/reg": 8.638078689575195, |
| "loss/twn": 0.0, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.0247, |
| "grad_norm": 24.125, |
| "grad_norm_var": 454.95807291666665, |
| "learning_rate": 0.0001, |
| "loss": 12.1999, |
| "loss/crossentropy": 2.6181788444519043, |
| "loss/hidden": 0.8125, |
| "loss/logits": 0.1312727928161621, |
| "loss/reg": 8.637969970703125, |
| "loss/twn": 0.0, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.024725, |
| "grad_norm": 31.625, |
| "grad_norm_var": 456.1619140625, |
| "learning_rate": 0.0001, |
| "loss": 11.0299, |
| "loss/crossentropy": 1.4346778392791748, |
| "loss/hidden": 0.8828125, |
| "loss/logits": 0.07450928539037704, |
| "loss/reg": 8.637850761413574, |
| "loss/twn": 0.0, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.02475, |
| "grad_norm": 33.5, |
| "grad_norm_var": 450.8337890625, |
| "learning_rate": 0.0001, |
| "loss": 11.1798, |
| "loss/crossentropy": 1.591082215309143, |
| "loss/hidden": 0.859375, |
| "loss/logits": 0.09157411754131317, |
| "loss/reg": 8.637744903564453, |
| "loss/twn": 0.0, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.024775, |
| "grad_norm": 40.5, |
| "grad_norm_var": 444.5853515625, |
| "learning_rate": 0.0001, |
| "loss": 11.6558, |
| "loss/crossentropy": 1.9533531665802002, |
| "loss/hidden": 0.9375, |
| "loss/logits": 0.12735915184020996, |
| "loss/reg": 8.637635231018066, |
| "loss/twn": 0.0, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.0248, |
| "grad_norm": 60.0, |
| "grad_norm_var": 467.3629557291667, |
| "learning_rate": 0.0001, |
| "loss": 10.2025, |
| "loss/crossentropy": 0.15267397463321686, |
| "loss/hidden": 1.375, |
| "loss/logits": 0.037311747670173645, |
| "loss/reg": 8.637526512145996, |
| "loss/twn": 0.0, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.024825, |
| "grad_norm": 24.625, |
| "grad_norm_var": 484.3080729166667, |
| "learning_rate": 0.0001, |
| "loss": 12.0233, |
| "loss/crossentropy": 2.419618606567383, |
| "loss/hidden": 0.828125, |
| "loss/logits": 0.13810431957244873, |
| "loss/reg": 8.637418746948242, |
| "loss/twn": 0.0, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.02485, |
| "grad_norm": 28.25, |
| "grad_norm_var": 483.46640625, |
| "learning_rate": 0.0001, |
| "loss": 11.8873, |
| "loss/crossentropy": 2.4256045818328857, |
| "loss/hidden": 0.71875, |
| "loss/logits": 0.10561510175466537, |
| "loss/reg": 8.637314796447754, |
| "loss/twn": 0.0, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.024875, |
| "grad_norm": 23.375, |
| "grad_norm_var": 479.85390625, |
| "learning_rate": 0.0001, |
| "loss": 12.6024, |
| "loss/crossentropy": 3.0520832538604736, |
| "loss/hidden": 0.796875, |
| "loss/logits": 0.11623083800077438, |
| "loss/reg": 8.63721752166748, |
| "loss/twn": 0.0, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.0249, |
| "grad_norm": 29.375, |
| "grad_norm_var": 484.5447265625, |
| "learning_rate": 0.0001, |
| "loss": 10.9952, |
| "loss/crossentropy": 1.2772033214569092, |
| "loss/hidden": 0.99609375, |
| "loss/logits": 0.08475629985332489, |
| "loss/reg": 8.637124061584473, |
| "loss/twn": 0.0, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.024925, |
| "grad_norm": 29.875, |
| "grad_norm_var": 487.6997395833333, |
| "learning_rate": 0.0001, |
| "loss": 11.762, |
| "loss/crossentropy": 2.0557544231414795, |
| "loss/hidden": 0.94140625, |
| "loss/logits": 0.1277797818183899, |
| "loss/reg": 8.637055397033691, |
| "loss/twn": 0.0, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.02495, |
| "grad_norm": 42.0, |
| "grad_norm_var": 486.0434895833333, |
| "learning_rate": 0.0001, |
| "loss": 12.1376, |
| "loss/crossentropy": 2.655238389968872, |
| "loss/hidden": 0.73046875, |
| "loss/logits": 0.11490000784397125, |
| "loss/reg": 8.636943817138672, |
| "loss/twn": 0.0, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.024975, |
| "grad_norm": 20.75, |
| "grad_norm_var": 502.5247395833333, |
| "learning_rate": 0.0001, |
| "loss": 11.9536, |
| "loss/crossentropy": 2.431523323059082, |
| "loss/hidden": 0.76953125, |
| "loss/logits": 0.1156713217496872, |
| "loss/reg": 8.636835098266602, |
| "loss/twn": 0.0, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.025, |
| "grad_norm": 33.25, |
| "grad_norm_var": 493.0872395833333, |
| "learning_rate": 0.0001, |
| "loss": 12.0058, |
| "loss/crossentropy": 2.4622583389282227, |
| "loss/hidden": 0.78515625, |
| "loss/logits": 0.12161677330732346, |
| "loss/reg": 8.63674259185791, |
| "loss/twn": 0.0, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 40000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": true, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.0457034088448e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|