Upload folder using huggingface_hub
Browse files
ssl_distil/convnext/checkpoints/epoch=299-step=9300.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:35e8ec6863feda1d76792a77bd3e14008b9e1f37ca55a16824f80d43e873219c
|
| 3 |
+
size 235684763
|
ssl_distil/convnext/checkpoints/last.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6216cfdb52658186e56cba5534737153c553ce2a077c6e2453a85f5aa1330c79
|
| 3 |
+
size 235684763
|
ssl_distil/convnext/convnext_distil.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:999cda056c13920535cce5e998f0fb71c4c24110d051c4389cd7ed09750bbd70
|
| 3 |
+
size 114616927
|
ssl_distil/convnext/exported_models/exported_last.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1083b690486f56b4118a33442eb55b1d69238a3532c2119a4a2ec24a41911afc
|
| 3 |
+
size 114616515
|
ssl_distil/convnext/metrics.jsonl
ADDED
|
@@ -0,0 +1,600 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"lr-LARS/params": 0.2598076211353316, "lr-LARS/params_no_weight_decay": 0.2598076211353316, "step": 30}
|
| 2 |
+
{"train_loss": 0.31194546818733215, "profiling/batch_time": 0.4712883234024048, "profiling/data_time": 0.021884361281991005, "epoch": 0, "step": 30}
|
| 3 |
+
{"lr-LARS/params": 0.5196152422706632, "lr-LARS/params_no_weight_decay": 0.5196152422706632, "step": 61}
|
| 4 |
+
{"train_loss": 0.21411606669425964, "profiling/batch_time": 0.46621111035346985, "profiling/data_time": 0.021811336278915405, "epoch": 1, "step": 61}
|
| 5 |
+
{"lr-LARS/params": 0.7794228634059948, "lr-LARS/params_no_weight_decay": 0.7794228634059948, "step": 92}
|
| 6 |
+
{"train_loss": 0.6949778199195862, "profiling/batch_time": 0.468293696641922, "profiling/data_time": 0.0228253360837698, "epoch": 2, "step": 92}
|
| 7 |
+
{"lr-LARS/params": 1.0392304845413265, "lr-LARS/params_no_weight_decay": 1.0392304845413265, "step": 123}
|
| 8 |
+
{"train_loss": 0.1883438229560852, "profiling/batch_time": 0.46862080693244934, "profiling/data_time": 0.025431664660573006, "epoch": 3, "step": 123}
|
| 9 |
+
{"lr-LARS/params": 1.299038105676658, "lr-LARS/params_no_weight_decay": 1.299038105676658, "step": 154}
|
| 10 |
+
{"train_loss": 0.1814563125371933, "profiling/batch_time": 0.4713672399520874, "profiling/data_time": 0.024215664714574814, "epoch": 4, "step": 154}
|
| 11 |
+
{"lr-LARS/params": 1.5588457268119895, "lr-LARS/params_no_weight_decay": 1.5588457268119895, "step": 185}
|
| 12 |
+
{"train_loss": 0.18267196416854858, "profiling/batch_time": 0.46823886036872864, "profiling/data_time": 0.022851664572954178, "epoch": 5, "step": 185}
|
| 13 |
+
{"lr-LARS/params": 1.818653347947321, "lr-LARS/params_no_weight_decay": 1.818653347947321, "step": 216}
|
| 14 |
+
{"train_loss": 0.18083542585372925, "profiling/batch_time": 0.4695097804069519, "profiling/data_time": 0.022794736549258232, "epoch": 6, "step": 216}
|
| 15 |
+
{"lr-LARS/params": 2.078460969082653, "lr-LARS/params_no_weight_decay": 2.078460969082653, "step": 247}
|
| 16 |
+
{"train_loss": 12.949236869812012, "profiling/batch_time": 0.47276806831359863, "profiling/data_time": 0.023699022829532623, "epoch": 7, "step": 247}
|
| 17 |
+
{"lr-LARS/params": 2.3382685902179845, "lr-LARS/params_no_weight_decay": 2.3382685902179845, "step": 278}
|
| 18 |
+
{"train_loss": 14.355443954467773, "profiling/batch_time": 0.46862882375717163, "profiling/data_time": 0.02837073802947998, "epoch": 8, "step": 278}
|
| 19 |
+
{"lr-LARS/params": 2.598076211353316, "lr-LARS/params_no_weight_decay": 2.598076211353316, "step": 309}
|
| 20 |
+
{"train_loss": 373.94317626953125, "profiling/batch_time": 0.47057339549064636, "profiling/data_time": 0.02315656654536724, "epoch": 9, "step": 309}
|
| 21 |
+
{"lr-LARS/params": 2.5980048812424954, "lr-LARS/params_no_weight_decay": 2.5980048812424954, "step": 340}
|
| 22 |
+
{"train_loss": 36.216880798339844, "profiling/batch_time": 0.46941742300987244, "profiling/data_time": 0.0226852186024189, "epoch": 10, "step": 340}
|
| 23 |
+
{"lr-LARS/params": 2.597781309440988, "lr-LARS/params_no_weight_decay": 2.597781309440988, "step": 371}
|
| 24 |
+
{"train_loss": 679.6309204101562, "profiling/batch_time": 0.46974754333496094, "profiling/data_time": 0.02363262139260769, "epoch": 11, "step": 371}
|
| 25 |
+
{"lr-LARS/params": 2.597405442935369, "lr-LARS/params_no_weight_decay": 2.597405442935369, "step": 402}
|
| 26 |
+
{"train_loss": 48.08263397216797, "profiling/batch_time": 0.47047746181488037, "profiling/data_time": 0.022418467327952385, "epoch": 12, "step": 402}
|
| 27 |
+
{"lr-LARS/params": 2.5968773258450537, "lr-LARS/params_no_weight_decay": 2.5968773258450537, "step": 433}
|
| 28 |
+
{"train_loss": 3.6737923622131348, "profiling/batch_time": 0.47189608216285706, "profiling/data_time": 0.02603757753968239, "epoch": 13, "step": 433}
|
| 29 |
+
{"lr-LARS/params": 2.596197020160716, "lr-LARS/params_no_weight_decay": 2.596197020160716, "step": 464}
|
| 30 |
+
{"train_loss": 434.80548095703125, "profiling/batch_time": 0.4708019495010376, "profiling/data_time": 0.023040369153022766, "epoch": 14, "step": 464}
|
| 31 |
+
{"lr-LARS/params": 2.595364605737007, "lr-LARS/params_no_weight_decay": 2.595364605737007, "step": 495}
|
| 32 |
+
{"train_loss": 1805.1368408203125, "profiling/batch_time": 0.4709707200527191, "profiling/data_time": 0.022103408351540565, "epoch": 15, "step": 495}
|
| 33 |
+
{"lr-LARS/params": 2.594380180283186, "lr-LARS/params_no_weight_decay": 2.594380180283186, "step": 526}
|
| 34 |
+
{"train_loss": 39.50508499145508, "profiling/batch_time": 0.4683074951171875, "profiling/data_time": 0.022332806140184402, "epoch": 16, "step": 526}
|
| 35 |
+
{"lr-LARS/params": 2.593243859351649, "lr-LARS/params_no_weight_decay": 2.593243859351649, "step": 557}
|
| 36 |
+
{"train_loss": 14.473100662231445, "profiling/batch_time": 0.46771883964538574, "profiling/data_time": 0.02339872717857361, "epoch": 17, "step": 557}
|
| 37 |
+
{"lr-LARS/params": 2.5919557763243697, "lr-LARS/params_no_weight_decay": 2.5919557763243697, "step": 588}
|
| 38 |
+
{"train_loss": 28.88188934326172, "profiling/batch_time": 0.47216054797172546, "profiling/data_time": 0.02364548295736313, "epoch": 18, "step": 588}
|
| 39 |
+
{"lr-LARS/params": 2.5905160823972344, "lr-LARS/params_no_weight_decay": 2.5905160823972344, "step": 619}
|
| 40 |
+
{"train_loss": 10.441183090209961, "profiling/batch_time": 0.4680430293083191, "profiling/data_time": 0.023268572986125946, "epoch": 19, "step": 619}
|
| 41 |
+
{"lr-LARS/params": 2.5889249465623028, "lr-LARS/params_no_weight_decay": 2.5889249465623028, "step": 650}
|
| 42 |
+
{"train_loss": 633.2659301757812, "profiling/batch_time": 0.4725569486618042, "profiling/data_time": 0.022568566724658012, "epoch": 20, "step": 650}
|
| 43 |
+
{"lr-LARS/params": 2.587182555587967, "lr-LARS/params_no_weight_decay": 2.587182555587967, "step": 681}
|
| 44 |
+
{"train_loss": 6.366476058959961, "profiling/batch_time": 0.4698493182659149, "profiling/data_time": 0.022369032725691795, "epoch": 21, "step": 681}
|
| 45 |
+
{"lr-LARS/params": 2.5852891139970304, "lr-LARS/params_no_weight_decay": 2.5852891139970304, "step": 712}
|
| 46 |
+
{"train_loss": 25.203937530517578, "profiling/batch_time": 0.4697577655315399, "profiling/data_time": 0.023397965356707573, "epoch": 22, "step": 712}
|
| 47 |
+
{"lr-LARS/params": 2.5832448440427, "lr-LARS/params_no_weight_decay": 2.5832448440427, "step": 743}
|
| 48 |
+
{"train_loss": 175.21701049804688, "profiling/batch_time": 0.46950188279151917, "profiling/data_time": 0.024209650233387947, "epoch": 23, "step": 743}
|
| 49 |
+
{"lr-LARS/params": 2.5810499856824984, "lr-LARS/params_no_weight_decay": 2.5810499856824984, "step": 774}
|
| 50 |
+
{"train_loss": 3.678537368774414, "profiling/batch_time": 0.46959489583969116, "profiling/data_time": 0.022325366735458374, "epoch": 24, "step": 774}
|
| 51 |
+
{"lr-LARS/params": 2.578704796550098, "lr-LARS/params_no_weight_decay": 2.578704796550098, "step": 805}
|
| 52 |
+
{"train_loss": 273.7239990234375, "profiling/batch_time": 0.4687223434448242, "profiling/data_time": 0.022188881412148476, "epoch": 25, "step": 805}
|
| 53 |
+
{"lr-LARS/params": 2.5762095519250785, "lr-LARS/params_no_weight_decay": 2.5762095519250785, "step": 836}
|
| 54 |
+
{"train_loss": 50.772857666015625, "profiling/batch_time": 0.4683019518852234, "profiling/data_time": 0.023442016914486885, "epoch": 26, "step": 836}
|
| 55 |
+
{"lr-LARS/params": 2.5735645447006155, "lr-LARS/params_no_weight_decay": 2.5735645447006155, "step": 867}
|
| 56 |
+
{"train_loss": 230.21990966796875, "profiling/batch_time": 0.469540536403656, "profiling/data_time": 0.0220788661390543, "epoch": 27, "step": 867}
|
| 57 |
+
{"lr-LARS/params": 2.5707700853491007, "lr-LARS/params_no_weight_decay": 2.5707700853491007, "step": 898}
|
| 58 |
+
{"train_loss": 745.271728515625, "profiling/batch_time": 0.4677548408508301, "profiling/data_time": 0.023083696141839027, "epoch": 28, "step": 898}
|
| 59 |
+
{"lr-LARS/params": 2.5678265018856963, "lr-LARS/params_no_weight_decay": 2.5678265018856963, "step": 929}
|
| 60 |
+
{"train_loss": 10.371131896972656, "profiling/batch_time": 0.46908554434776306, "profiling/data_time": 0.023055508732795715, "epoch": 29, "step": 929}
|
| 61 |
+
{"lr-LARS/params": 2.5647341398298367, "lr-LARS/params_no_weight_decay": 2.5647341398298367, "step": 960}
|
| 62 |
+
{"train_loss": 16.156049728393555, "profiling/batch_time": 0.47020572423934937, "profiling/data_time": 0.028595363721251488, "epoch": 30, "step": 960}
|
| 63 |
+
{"lr-LARS/params": 2.5614933621646667, "lr-LARS/params_no_weight_decay": 2.5614933621646667, "step": 991}
|
| 64 |
+
{"train_loss": 6.883659839630127, "profiling/batch_time": 0.4708785116672516, "profiling/data_time": 0.022283844649791718, "epoch": 31, "step": 991}
|
| 65 |
+
{"lr-LARS/params": 2.5581045492944376, "lr-LARS/params_no_weight_decay": 2.5581045492944376, "step": 1022}
|
| 66 |
+
{"train_loss": 55.311344146728516, "profiling/batch_time": 0.4709187150001526, "profiling/data_time": 0.023392993956804276, "epoch": 32, "step": 1022}
|
| 67 |
+
{"lr-LARS/params": 2.5545680989998525, "lr-LARS/params_no_weight_decay": 2.5545680989998525, "step": 1053}
|
| 68 |
+
{"train_loss": 7.234062671661377, "profiling/batch_time": 0.4700948894023895, "profiling/data_time": 0.023695729672908783, "epoch": 33, "step": 1053}
|
| 69 |
+
{"lr-LARS/params": 2.550884426391377, "lr-LARS/params_no_weight_decay": 2.550884426391377, "step": 1084}
|
| 70 |
+
{"train_loss": 4.21544075012207, "profiling/batch_time": 0.4696831703186035, "profiling/data_time": 0.022442584857344627, "epoch": 34, "step": 1084}
|
| 71 |
+
{"lr-LARS/params": 2.547053963860512, "lr-LARS/params_no_weight_decay": 2.547053963860512, "step": 1115}
|
| 72 |
+
{"train_loss": 144.6874237060547, "profiling/batch_time": 0.47021734714508057, "profiling/data_time": 0.024488767609000206, "epoch": 35, "step": 1115}
|
| 73 |
+
{"lr-LARS/params": 2.543077161029039, "lr-LARS/params_no_weight_decay": 2.543077161029039, "step": 1146}
|
| 74 |
+
{"train_loss": 2.871257781982422, "profiling/batch_time": 0.4703052043914795, "profiling/data_time": 0.02254762314260006, "epoch": 36, "step": 1146}
|
| 75 |
+
{"lr-LARS/params": 2.538954484696244, "lr-LARS/params_no_weight_decay": 2.538954484696244, "step": 1177}
|
| 76 |
+
{"train_loss": 6.2141900062561035, "profiling/batch_time": 0.4700213372707367, "profiling/data_time": 0.021778080612421036, "epoch": 37, "step": 1177}
|
| 77 |
+
{"lr-LARS/params": 2.5346864187841254, "lr-LARS/params_no_weight_decay": 2.5346864187841254, "step": 1208}
|
| 78 |
+
{"train_loss": 97.45619201660156, "profiling/batch_time": 0.4678700566291809, "profiling/data_time": 0.02312382310628891, "epoch": 38, "step": 1208}
|
| 79 |
+
{"lr-LARS/params": 2.5302734642805884, "lr-LARS/params_no_weight_decay": 2.5302734642805884, "step": 1239}
|
| 80 |
+
{"train_loss": 1.6031452417373657, "profiling/batch_time": 0.4692513942718506, "profiling/data_time": 0.0238487608730793, "epoch": 39, "step": 1239}
|
| 81 |
+
{"lr-LARS/params": 2.5257161391806404, "lr-LARS/params_no_weight_decay": 2.5257161391806404, "step": 1270}
|
| 82 |
+
{"train_loss": 99.60950469970703, "profiling/batch_time": 0.4726502001285553, "profiling/data_time": 0.02309952676296234, "epoch": 40, "step": 1270}
|
| 83 |
+
{"lr-LARS/params": 2.521014978425588, "lr-LARS/params_no_weight_decay": 2.521014978425588, "step": 1301}
|
| 84 |
+
{"train_loss": 2.8074140548706055, "profiling/batch_time": 0.46869608759880066, "profiling/data_time": 0.023540539667010307, "epoch": 41, "step": 1301}
|
| 85 |
+
{"lr-LARS/params": 2.5161705338402474, "lr-LARS/params_no_weight_decay": 2.5161705338402474, "step": 1332}
|
| 86 |
+
{"train_loss": 64.60332489013672, "profiling/batch_time": 0.46702662110328674, "profiling/data_time": 0.02296690084040165, "epoch": 42, "step": 1332}
|
| 87 |
+
{"lr-LARS/params": 2.5111833740681657, "lr-LARS/params_no_weight_decay": 2.5111833740681657, "step": 1363}
|
| 88 |
+
{"train_loss": 0.3012450933456421, "profiling/batch_time": 0.46822068095207214, "profiling/data_time": 0.022337641566991806, "epoch": 43, "step": 1363}
|
| 89 |
+
{"lr-LARS/params": 2.506054084504878, "lr-LARS/params_no_weight_decay": 2.506054084504878, "step": 1394}
|
| 90 |
+
{"train_loss": 807.9827880859375, "profiling/batch_time": 0.4699914753437042, "profiling/data_time": 0.022885942831635475, "epoch": 44, "step": 1394}
|
| 91 |
+
{"lr-LARS/params": 2.5007832672291936, "lr-LARS/params_no_weight_decay": 2.5007832672291936, "step": 1425}
|
| 92 |
+
{"train_loss": 70.48700714111328, "profiling/batch_time": 0.4716077148914337, "profiling/data_time": 0.022688956931233406, "epoch": 45, "step": 1425}
|
| 93 |
+
{"lr-LARS/params": 2.4953715409325197, "lr-LARS/params_no_weight_decay": 2.4953715409325197, "step": 1456}
|
| 94 |
+
{"train_loss": 14.633325576782227, "profiling/batch_time": 0.47024765610694885, "profiling/data_time": 0.027802862226963043, "epoch": 46, "step": 1456}
|
| 95 |
+
{"lr-LARS/params": 2.489819540846241, "lr-LARS/params_no_weight_decay": 2.489819540846241, "step": 1487}
|
| 96 |
+
{"train_loss": 33.042945861816406, "profiling/batch_time": 0.4712398052215576, "profiling/data_time": 0.023220637813210487, "epoch": 47, "step": 1487}
|
| 97 |
+
{"lr-LARS/params": 2.4841279186671574, "lr-LARS/params_no_weight_decay": 2.4841279186671574, "step": 1518}
|
| 98 |
+
{"train_loss": 4.175399303436279, "profiling/batch_time": 0.4710855782032013, "profiling/data_time": 0.02227308601140976, "epoch": 48, "step": 1518}
|
| 99 |
+
{"lr-LARS/params": 2.478297342480987, "lr-LARS/params_no_weight_decay": 2.478297342480987, "step": 1549}
|
| 100 |
+
{"train_loss": 0.5910549163818359, "profiling/batch_time": 0.4703836143016815, "profiling/data_time": 0.02244972251355648, "epoch": 49, "step": 1549}
|
| 101 |
+
{"lr-LARS/params": 2.472328496683943, "lr-LARS/params_no_weight_decay": 2.472328496683943, "step": 1580}
|
| 102 |
+
{"train_loss": 1.203653335571289, "profiling/batch_time": 0.4672217071056366, "profiling/data_time": 0.022451914846897125, "epoch": 50, "step": 1580}
|
| 103 |
+
{"lr-LARS/params": 2.4662220819024014, "lr-LARS/params_no_weight_decay": 2.4662220819024014, "step": 1611}
|
| 104 |
+
{"train_loss": 2.976810932159424, "profiling/batch_time": 0.4698045253753662, "profiling/data_time": 0.023852217942476273, "epoch": 51, "step": 1611}
|
| 105 |
+
{"lr-LARS/params": 2.459978814910663, "lr-LARS/params_no_weight_decay": 2.459978814910663, "step": 1642}
|
| 106 |
+
{"train_loss": 1.902485728263855, "profiling/batch_time": 0.4728449285030365, "profiling/data_time": 0.028130510821938515, "epoch": 52, "step": 1642}
|
| 107 |
+
{"lr-LARS/params": 2.453599428546812, "lr-LARS/params_no_weight_decay": 2.453599428546812, "step": 1673}
|
| 108 |
+
{"train_loss": 0.8719056844711304, "profiling/batch_time": 0.47223368287086487, "profiling/data_time": 0.022841578349471092, "epoch": 53, "step": 1673}
|
| 109 |
+
{"lr-LARS/params": 2.4470846716267016, "lr-LARS/params_no_weight_decay": 2.4470846716267016, "step": 1704}
|
| 110 |
+
{"train_loss": 0.8949082493782043, "profiling/batch_time": 0.4685279428958893, "profiling/data_time": 0.023392828181385994, "epoch": 54, "step": 1704}
|
| 111 |
+
{"lr-LARS/params": 2.440435308856054, "lr-LARS/params_no_weight_decay": 2.440435308856054, "step": 1735}
|
| 112 |
+
{"train_loss": 23.14362335205078, "profiling/batch_time": 0.47284650802612305, "profiling/data_time": 0.024017976596951485, "epoch": 55, "step": 1735}
|
| 113 |
+
{"lr-LARS/params": 2.433652120740699, "lr-LARS/params_no_weight_decay": 2.433652120740699, "step": 1766}
|
| 114 |
+
{"train_loss": 19.518367767333984, "profiling/batch_time": 0.47030460834503174, "profiling/data_time": 0.02424721233546734, "epoch": 56, "step": 1766}
|
| 115 |
+
{"lr-LARS/params": 2.426735903494959, "lr-LARS/params_no_weight_decay": 2.426735903494959, "step": 1797}
|
| 116 |
+
{"train_loss": 16.87958526611328, "profiling/batch_time": 0.4696018099784851, "profiling/data_time": 0.02526361681520939, "epoch": 57, "step": 1797}
|
| 117 |
+
{"lr-LARS/params": 2.4196874689481884, "lr-LARS/params_no_weight_decay": 2.4196874689481884, "step": 1828}
|
| 118 |
+
{"train_loss": 1.131284236907959, "profiling/batch_time": 0.47009196877479553, "profiling/data_time": 0.024905087426304817, "epoch": 58, "step": 1828}
|
| 119 |
+
{"lr-LARS/params": 2.4125076444494793, "lr-LARS/params_no_weight_decay": 2.4125076444494793, "step": 1859}
|
| 120 |
+
{"train_loss": 12.683453559875488, "profiling/batch_time": 0.46810001134872437, "profiling/data_time": 0.023802831768989563, "epoch": 59, "step": 1859}
|
| 121 |
+
{"lr-LARS/params": 2.40519727277055, "lr-LARS/params_no_weight_decay": 2.40519727277055, "step": 1890}
|
| 122 |
+
{"train_loss": 1.3263578414916992, "profiling/batch_time": 0.468555212020874, "profiling/data_time": 0.0221959687769413, "epoch": 60, "step": 1890}
|
| 123 |
+
{"lr-LARS/params": 2.397757212006817, "lr-LARS/params_no_weight_decay": 2.397757212006817, "step": 1921}
|
| 124 |
+
{"train_loss": 0.5049941539764404, "profiling/batch_time": 0.4682908356189728, "profiling/data_time": 0.02284514717757702, "epoch": 61, "step": 1921}
|
| 125 |
+
{"lr-LARS/params": 2.3901883354766715, "lr-LARS/params_no_weight_decay": 2.3901883354766715, "step": 1952}
|
| 126 |
+
{"train_loss": 0.7352291345596313, "profiling/batch_time": 0.46821048855781555, "profiling/data_time": 0.024347366765141487, "epoch": 62, "step": 1952}
|
| 127 |
+
{"lr-LARS/params": 2.3824915316189714, "lr-LARS/params_no_weight_decay": 2.3824915316189714, "step": 1983}
|
| 128 |
+
{"train_loss": 0.47200965881347656, "profiling/batch_time": 0.46723681688308716, "profiling/data_time": 0.028282053768634796, "epoch": 63, "step": 1983}
|
| 129 |
+
{"lr-LARS/params": 2.374667703888753, "lr-LARS/params_no_weight_decay": 2.374667703888753, "step": 2014}
|
| 130 |
+
{"train_loss": 60.06499481201172, "profiling/batch_time": 0.4674076735973358, "profiling/data_time": 0.02362578548491001, "epoch": 64, "step": 2014}
|
| 131 |
+
{"lr-LARS/params": 2.366717770651184, "lr-LARS/params_no_weight_decay": 2.366717770651184, "step": 2045}
|
| 132 |
+
{"train_loss": 0.24544291198253632, "profiling/batch_time": 0.46887892484664917, "profiling/data_time": 0.02281964384019375, "epoch": 65, "step": 2045}
|
| 133 |
+
{"lr-LARS/params": 2.358642665073767, "lr-LARS/params_no_weight_decay": 2.358642665073767, "step": 2076}
|
| 134 |
+
{"train_loss": 108.18089294433594, "profiling/batch_time": 0.47139567136764526, "profiling/data_time": 0.023664427921175957, "epoch": 66, "step": 2076}
|
| 135 |
+
{"lr-LARS/params": 2.350443335016799, "lr-LARS/params_no_weight_decay": 2.350443335016799, "step": 2107}
|
| 136 |
+
{"train_loss": 0.25892946124076843, "profiling/batch_time": 0.46853384375572205, "profiling/data_time": 0.02481815218925476, "epoch": 67, "step": 2107}
|
| 137 |
+
{"lr-LARS/params": 2.3421207429221167, "lr-LARS/params_no_weight_decay": 2.3421207429221167, "step": 2138}
|
| 138 |
+
{"train_loss": 0.7797198295593262, "profiling/batch_time": 0.4700237512588501, "profiling/data_time": 0.023094169795513153, "epoch": 68, "step": 2138}
|
| 139 |
+
{"lr-LARS/params": 2.3336758657001218, "lr-LARS/params_no_weight_decay": 2.3336758657001218, "step": 2169}
|
| 140 |
+
{"train_loss": 47.811195373535156, "profiling/batch_time": 0.4678688049316406, "profiling/data_time": 0.022558994591236115, "epoch": 69, "step": 2169}
|
| 141 |
+
{"lr-LARS/params": 2.32510969461511, "lr-LARS/params_no_weight_decay": 2.32510969461511, "step": 2200}
|
| 142 |
+
{"train_loss": 0.4786320924758911, "profiling/batch_time": 0.4702819287776947, "profiling/data_time": 0.023594040423631668, "epoch": 70, "step": 2200}
|
| 143 |
+
{"lr-LARS/params": 2.316423235168918, "lr-LARS/params_no_weight_decay": 2.316423235168918, "step": 2231}
|
| 144 |
+
{"train_loss": 5.747926712036133, "profiling/batch_time": 0.4688231945037842, "profiling/data_time": 0.023650651797652245, "epoch": 71, "step": 2231}
|
| 145 |
+
{"lr-LARS/params": 2.3076175069828944, "lr-LARS/params_no_weight_decay": 2.3076175069828944, "step": 2262}
|
| 146 |
+
{"train_loss": 0.6255095601081848, "profiling/batch_time": 0.46930524706840515, "profiling/data_time": 0.023454928770661354, "epoch": 72, "step": 2262}
|
| 147 |
+
{"lr-LARS/params": 2.29869354367822, "lr-LARS/params_no_weight_decay": 2.29869354367822, "step": 2293}
|
| 148 |
+
{"train_loss": 310.82220458984375, "profiling/batch_time": 0.46930602192878723, "profiling/data_time": 0.02611592784523964, "epoch": 73, "step": 2293}
|
| 149 |
+
{"lr-LARS/params": 2.2896523927545753, "lr-LARS/params_no_weight_decay": 2.2896523927545753, "step": 2324}
|
| 150 |
+
{"train_loss": 0.5197723507881165, "profiling/batch_time": 0.46976619958877563, "profiling/data_time": 0.02429189719259739, "epoch": 74, "step": 2324}
|
| 151 |
+
{"lr-LARS/params": 2.2804951154671893, "lr-LARS/params_no_weight_decay": 2.2804951154671893, "step": 2355}
|
| 152 |
+
{"train_loss": 0.3360525965690613, "profiling/batch_time": 0.467939555644989, "profiling/data_time": 0.025161702185869217, "epoch": 75, "step": 2355}
|
| 153 |
+
{"lr-LARS/params": 2.271222786702267, "lr-LARS/params_no_weight_decay": 2.271222786702267, "step": 2386}
|
| 154 |
+
{"train_loss": 0.2486790269613266, "profiling/batch_time": 0.46815434098243713, "profiling/data_time": 0.02291480079293251, "epoch": 76, "step": 2386}
|
| 155 |
+
{"lr-LARS/params": 2.2618364948508183, "lr-LARS/params_no_weight_decay": 2.2618364948508183, "step": 2417}
|
| 156 |
+
{"train_loss": 41.96680450439453, "profiling/batch_time": 0.46991580724716187, "profiling/data_time": 0.022785667330026627, "epoch": 77, "step": 2417}
|
| 157 |
+
{"lr-LARS/params": 2.252337341680902, "lr-LARS/params_no_weight_decay": 2.252337341680902, "step": 2448}
|
| 158 |
+
{"train_loss": 0.5459044575691223, "profiling/batch_time": 0.46973517537117004, "profiling/data_time": 0.022286487743258476, "epoch": 78, "step": 2448}
|
| 159 |
+
{"lr-LARS/params": 2.242726442208301, "lr-LARS/params_no_weight_decay": 2.242726442208301, "step": 2479}
|
| 160 |
+
{"train_loss": 0.8291583061218262, "profiling/batch_time": 0.46750885248184204, "profiling/data_time": 0.022359391674399376, "epoch": 79, "step": 2479}
|
| 161 |
+
{"lr-LARS/params": 2.233004924565638, "lr-LARS/params_no_weight_decay": 2.233004924565638, "step": 2510}
|
| 162 |
+
{"train_loss": 0.4035949110984802, "profiling/batch_time": 0.46970829367637634, "profiling/data_time": 0.022599484771490097, "epoch": 80, "step": 2510}
|
| 163 |
+
{"lr-LARS/params": 2.2231739298699607, "lr-LARS/params_no_weight_decay": 2.2231739298699607, "step": 2541}
|
| 164 |
+
{"train_loss": 0.5911986827850342, "profiling/batch_time": 0.46878448128700256, "profiling/data_time": 0.024261871352791786, "epoch": 81, "step": 2541}
|
| 165 |
+
{"lr-LARS/params": 2.213234612088789, "lr-LARS/params_no_weight_decay": 2.213234612088789, "step": 2572}
|
| 166 |
+
{"train_loss": 0.5409368872642517, "profiling/batch_time": 0.46910786628723145, "profiling/data_time": 0.023897871375083923, "epoch": 82, "step": 2572}
|
| 167 |
+
{"lr-LARS/params": 2.2031881379046676, "lr-LARS/params_no_weight_decay": 2.2031881379046676, "step": 2603}
|
| 168 |
+
{"train_loss": 4.227826118469238, "profiling/batch_time": 0.47049134969711304, "profiling/data_time": 0.025586357340216637, "epoch": 83, "step": 2603}
|
| 169 |
+
{"lr-LARS/params": 2.193035686578219, "lr-LARS/params_no_weight_decay": 2.193035686578219, "step": 2634}
|
| 170 |
+
{"train_loss": 2.128704071044922, "profiling/batch_time": 0.46828195452690125, "profiling/data_time": 0.023670224472880363, "epoch": 84, "step": 2634}
|
| 171 |
+
{"lr-LARS/params": 2.1827784498097187, "lr-LARS/params_no_weight_decay": 2.1827784498097187, "step": 2665}
|
| 172 |
+
{"train_loss": 0.3161485493183136, "profiling/batch_time": 0.47038933634757996, "profiling/data_time": 0.024162085726857185, "epoch": 85, "step": 2665}
|
| 173 |
+
{"lr-LARS/params": 2.172417631599216, "lr-LARS/params_no_weight_decay": 2.172417631599216, "step": 2696}
|
| 174 |
+
{"train_loss": 0.4555578827857971, "profiling/batch_time": 0.46870437264442444, "profiling/data_time": 0.024299200624227524, "epoch": 86, "step": 2696}
|
| 175 |
+
{"lr-LARS/params": 2.1619544481052047, "lr-LARS/params_no_weight_decay": 2.1619544481052047, "step": 2727}
|
| 176 |
+
{"train_loss": 2.6594507694244385, "profiling/batch_time": 0.46748900413513184, "profiling/data_time": 0.023228539153933525, "epoch": 87, "step": 2727}
|
| 177 |
+
{"lr-LARS/params": 2.1513901275018736, "lr-LARS/params_no_weight_decay": 2.1513901275018736, "step": 2758}
|
| 178 |
+
{"train_loss": 0.2915334403514862, "profiling/batch_time": 0.47075730562210083, "profiling/data_time": 0.02426259219646454, "epoch": 88, "step": 2758}
|
| 179 |
+
{"lr-LARS/params": 2.1407259098349396, "lr-LARS/params_no_weight_decay": 2.1407259098349396, "step": 2789}
|
| 180 |
+
{"train_loss": 53.6274528503418, "profiling/batch_time": 0.47253361344337463, "profiling/data_time": 0.023108534514904022, "epoch": 89, "step": 2789}
|
| 181 |
+
{"lr-LARS/params": 2.1299630468760906, "lr-LARS/params_no_weight_decay": 2.1299630468760906, "step": 2820}
|
| 182 |
+
{"train_loss": 1.879733681678772, "profiling/batch_time": 0.4722203016281128, "profiling/data_time": 0.02306920289993286, "epoch": 90, "step": 2820}
|
| 183 |
+
{"lr-LARS/params": 2.1191028019760534, "lr-LARS/params_no_weight_decay": 2.1191028019760534, "step": 2851}
|
| 184 |
+
{"train_loss": 0.3740873336791992, "profiling/batch_time": 0.46790310740470886, "profiling/data_time": 0.028366010636091232, "epoch": 91, "step": 2851}
|
| 185 |
+
{"lr-LARS/params": 2.108146449916301, "lr-LARS/params_no_weight_decay": 2.108146449916301, "step": 2882}
|
| 186 |
+
{"train_loss": 209.26174926757812, "profiling/batch_time": 0.4688158631324768, "profiling/data_time": 0.023069186136126518, "epoch": 92, "step": 2882}
|
| 187 |
+
{"lr-LARS/params": 2.097095276759416, "lr-LARS/params_no_weight_decay": 2.097095276759416, "step": 2913}
|
| 188 |
+
{"train_loss": 10.9568510055542, "profiling/batch_time": 0.4684227705001831, "profiling/data_time": 0.02245280146598816, "epoch": 93, "step": 2913}
|
| 189 |
+
{"lr-LARS/params": 2.0859505796981335, "lr-LARS/params_no_weight_decay": 2.0859505796981335, "step": 2944}
|
| 190 |
+
{"train_loss": 0.34533989429473877, "profiling/batch_time": 0.46746405959129333, "profiling/data_time": 0.023341916501522064, "epoch": 94, "step": 2944}
|
| 191 |
+
{"lr-LARS/params": 2.074713666903076, "lr-LARS/params_no_weight_decay": 2.074713666903076, "step": 2975}
|
| 192 |
+
{"train_loss": 0.4546048939228058, "profiling/batch_time": 0.4688769578933716, "profiling/data_time": 0.02556665427982807, "epoch": 95, "step": 2975}
|
| 193 |
+
{"lr-LARS/params": 2.0633858573691986, "lr-LARS/params_no_weight_decay": 2.0633858573691986, "step": 3006}
|
| 194 |
+
{"train_loss": 1.9552316665649414, "profiling/batch_time": 0.47169414162635803, "profiling/data_time": 0.022796370089054108, "epoch": 96, "step": 3006}
|
| 195 |
+
{"lr-LARS/params": 2.051968480760965, "lr-LARS/params_no_weight_decay": 2.051968480760965, "step": 3037}
|
| 196 |
+
{"train_loss": 2.11960768699646, "profiling/batch_time": 0.467564195394516, "profiling/data_time": 0.02524457313120365, "epoch": 97, "step": 3037}
|
| 197 |
+
{"lr-LARS/params": 2.0404628772562714, "lr-LARS/params_no_weight_decay": 2.0404628772562714, "step": 3068}
|
| 198 |
+
{"train_loss": 179.30409240722656, "profiling/batch_time": 0.4676856994628906, "profiling/data_time": 0.02607033960521221, "epoch": 98, "step": 3068}
|
| 199 |
+
{"lr-LARS/params": 2.028870397389136, "lr-LARS/params_no_weight_decay": 2.028870397389136, "step": 3099}
|
| 200 |
+
{"train_loss": 15.288697242736816, "profiling/batch_time": 0.4692170321941376, "profiling/data_time": 0.023462682962417603, "epoch": 99, "step": 3099}
|
| 201 |
+
{"lr-LARS/params": 2.01719240189117, "lr-LARS/params_no_weight_decay": 2.01719240189117, "step": 3130}
|
| 202 |
+
{"train_loss": 13.218351364135742, "profiling/batch_time": 0.46973398327827454, "profiling/data_time": 0.0225273035466671, "epoch": 100, "step": 3130}
|
| 203 |
+
{"lr-LARS/params": 2.005430261531858, "lr-LARS/params_no_weight_decay": 2.005430261531858, "step": 3161}
|
| 204 |
+
{"train_loss": 86.37255859375, "profiling/batch_time": 0.4685228765010834, "profiling/data_time": 0.022206757217645645, "epoch": 101, "step": 3161}
|
| 205 |
+
{"lr-LARS/params": 1.9935853569576516, "lr-LARS/params_no_weight_decay": 1.9935853569576516, "step": 3192}
|
| 206 |
+
{"train_loss": 0.2505952715873718, "profiling/batch_time": 0.46937862038612366, "profiling/data_time": 0.02351229637861252, "epoch": 102, "step": 3192}
|
| 207 |
+
{"lr-LARS/params": 1.9816590785299155, "lr-LARS/params_no_weight_decay": 1.9816590785299155, "step": 3223}
|
| 208 |
+
{"train_loss": 0.2204047441482544, "profiling/batch_time": 0.47039690613746643, "profiling/data_time": 0.02336188592016697, "epoch": 103, "step": 3223}
|
| 209 |
+
{"lr-LARS/params": 1.9696528261617168, "lr-LARS/params_no_weight_decay": 1.9696528261617168, "step": 3254}
|
| 210 |
+
{"train_loss": 0.5175177454948425, "profiling/batch_time": 0.46817687153816223, "profiling/data_time": 0.02400284633040428, "epoch": 104, "step": 3254}
|
| 211 |
+
{"lr-LARS/params": 1.9575680091535104, "lr-LARS/params_no_weight_decay": 1.9575680091535104, "step": 3285}
|
| 212 |
+
{"train_loss": 0.5821739435195923, "profiling/batch_time": 0.46970275044441223, "profiling/data_time": 0.022598253563046455, "epoch": 105, "step": 3285}
|
| 213 |
+
{"lr-LARS/params": 1.9454060460277114, "lr-LARS/params_no_weight_decay": 1.9454060460277114, "step": 3316}
|
| 214 |
+
{"train_loss": 0.9416442513465881, "profiling/batch_time": 0.4684368073940277, "profiling/data_time": 0.022756820544600487, "epoch": 106, "step": 3316}
|
| 215 |
+
{"lr-LARS/params": 1.9331683643621864, "lr-LARS/params_no_weight_decay": 1.9331683643621864, "step": 3347}
|
| 216 |
+
{"train_loss": 0.2287731021642685, "profiling/batch_time": 0.4672001600265503, "profiling/data_time": 0.02272597886621952, "epoch": 107, "step": 3347}
|
| 217 |
+
{"lr-LARS/params": 1.9208564006226876, "lr-LARS/params_no_weight_decay": 1.9208564006226876, "step": 3378}
|
| 218 |
+
{"train_loss": 0.2901693880558014, "profiling/batch_time": 0.4691215753555298, "profiling/data_time": 0.02353905513882637, "epoch": 108, "step": 3378}
|
| 219 |
+
{"lr-LARS/params": 1.9084715999942368, "lr-LARS/params_no_weight_decay": 1.9084715999942368, "step": 3409}
|
| 220 |
+
{"train_loss": 3.9739749431610107, "profiling/batch_time": 0.46706321835517883, "profiling/data_time": 0.023691251873970032, "epoch": 109, "step": 3409}
|
| 221 |
+
{"lr-LARS/params": 1.8960154162114893, "lr-LARS/params_no_weight_decay": 1.8960154162114893, "step": 3440}
|
| 222 |
+
{"train_loss": 0.2285587042570114, "profiling/batch_time": 0.4712910056114197, "profiling/data_time": 0.023628901690244675, "epoch": 110, "step": 3440}
|
| 223 |
+
{"lr-LARS/params": 1.8834893113880937, "lr-LARS/params_no_weight_decay": 1.8834893113880937, "step": 3471}
|
| 224 |
+
{"train_loss": 0.19997327029705048, "profiling/batch_time": 0.46766728162765503, "profiling/data_time": 0.025861116126179695, "epoch": 111, "step": 3471}
|
| 225 |
+
{"lr-LARS/params": 1.8708947558450697, "lr-LARS/params_no_weight_decay": 1.8708947558450697, "step": 3502}
|
| 226 |
+
{"train_loss": 0.222589910030365, "profiling/batch_time": 0.4697688817977905, "profiling/data_time": 0.023146115243434906, "epoch": 112, "step": 3502}
|
| 227 |
+
{"lr-LARS/params": 1.8582332279382185, "lr-LARS/params_no_weight_decay": 1.8582332279382185, "step": 3533}
|
| 228 |
+
{"train_loss": 0.21689438819885254, "profiling/batch_time": 0.4695585370063782, "profiling/data_time": 0.02386208437383175, "epoch": 113, "step": 3533}
|
| 229 |
+
{"lr-LARS/params": 1.8455062138845955, "lr-LARS/params_no_weight_decay": 1.8455062138845955, "step": 3564}
|
| 230 |
+
{"train_loss": 0.21824142336845398, "profiling/batch_time": 0.468971312046051, "profiling/data_time": 0.023700954392552376, "epoch": 114, "step": 3564}
|
| 231 |
+
{"lr-LARS/params": 1.832715207588054, "lr-LARS/params_no_weight_decay": 1.832715207588054, "step": 3595}
|
| 232 |
+
{"train_loss": 0.27698883414268494, "profiling/batch_time": 0.4698481559753418, "profiling/data_time": 0.02267683856189251, "epoch": 115, "step": 3595}
|
| 233 |
+
{"lr-LARS/params": 1.819861710463892, "lr-LARS/params_no_weight_decay": 1.819861710463892, "step": 3626}
|
| 234 |
+
{"train_loss": 0.44556722044944763, "profiling/batch_time": 0.4686325490474701, "profiling/data_time": 0.02484964393079281, "epoch": 116, "step": 3626}
|
| 235 |
+
{"lr-LARS/params": 1.806947231262617, "lr-LARS/params_no_weight_decay": 1.806947231262617, "step": 3657}
|
| 236 |
+
{"train_loss": 0.2024405300617218, "profiling/batch_time": 0.4692056477069855, "profiling/data_time": 0.02586720883846283, "epoch": 117, "step": 3657}
|
| 237 |
+
{"lr-LARS/params": 1.7939732858928428, "lr-LARS/params_no_weight_decay": 1.7939732858928428, "step": 3688}
|
| 238 |
+
{"train_loss": 2.6041512489318848, "profiling/batch_time": 0.46856310963630676, "profiling/data_time": 0.023410534486174583, "epoch": 118, "step": 3688}
|
| 239 |
+
{"lr-LARS/params": 1.7809413972433563, "lr-LARS/params_no_weight_decay": 1.7809413972433563, "step": 3719}
|
| 240 |
+
{"train_loss": 0.2738244831562042, "profiling/batch_time": 0.4674675166606903, "profiling/data_time": 0.02568947710096836, "epoch": 119, "step": 3719}
|
| 241 |
+
{"lr-LARS/params": 1.7678530950043592, "lr-LARS/params_no_weight_decay": 1.7678530950043592, "step": 3750}
|
| 242 |
+
{"train_loss": 1.1977521181106567, "profiling/batch_time": 0.47148001194000244, "profiling/data_time": 0.027329301461577415, "epoch": 120, "step": 3750}
|
| 243 |
+
{"lr-LARS/params": 1.7547099154879087, "lr-LARS/params_no_weight_decay": 1.7547099154879087, "step": 3781}
|
| 244 |
+
{"train_loss": 2.8730945587158203, "profiling/batch_time": 0.47009193897247314, "profiling/data_time": 0.023626748472452164, "epoch": 121, "step": 3781}
|
| 245 |
+
{"lr-LARS/params": 1.7415134014475881, "lr-LARS/params_no_weight_decay": 1.7415134014475881, "step": 3812}
|
| 246 |
+
{"train_loss": 0.32099857926368713, "profiling/batch_time": 0.46900197863578796, "profiling/data_time": 0.02681814879179001, "epoch": 122, "step": 3812}
|
| 247 |
+
{"lr-LARS/params": 1.7282651018974169, "lr-LARS/params_no_weight_decay": 1.7282651018974169, "step": 3843}
|
| 248 |
+
{"train_loss": 0.19669793546199799, "profiling/batch_time": 0.4678991734981537, "profiling/data_time": 0.02354726754128933, "epoch": 123, "step": 3843}
|
| 249 |
+
{"lr-LARS/params": 1.7149665719300244, "lr-LARS/params_no_weight_decay": 1.7149665719300244, "step": 3874}
|
| 250 |
+
{"train_loss": 0.2884298264980316, "profiling/batch_time": 0.4693676233291626, "profiling/data_time": 0.02348741702735424, "epoch": 124, "step": 3874}
|
| 251 |
+
{"lr-LARS/params": 1.7016193725341149, "lr-LARS/params_no_weight_decay": 1.7016193725341149, "step": 3905}
|
| 252 |
+
{"train_loss": 0.23995520174503326, "profiling/batch_time": 0.46935105323791504, "profiling/data_time": 0.023599984124302864, "epoch": 125, "step": 3905}
|
| 253 |
+
{"lr-LARS/params": 1.6882250704112358, "lr-LARS/params_no_weight_decay": 1.6882250704112358, "step": 3936}
|
| 254 |
+
{"train_loss": 9.37893295288086, "profiling/batch_time": 0.4698675870895386, "profiling/data_time": 0.02479972504079342, "epoch": 126, "step": 3936}
|
| 255 |
+
{"lr-LARS/params": 1.6747852377918793, "lr-LARS/params_no_weight_decay": 1.6747852377918793, "step": 3967}
|
| 256 |
+
{"train_loss": 73.7145767211914, "profiling/batch_time": 0.468632310628891, "profiling/data_time": 0.02318849042057991, "epoch": 127, "step": 3967}
|
| 257 |
+
{"lr-LARS/params": 1.6613014522509317, "lr-LARS/params_no_weight_decay": 1.6613014522509317, "step": 3998}
|
| 258 |
+
{"train_loss": 0.22435139119625092, "profiling/batch_time": 0.4691292643547058, "profiling/data_time": 0.02269771508872509, "epoch": 128, "step": 3998}
|
| 259 |
+
{"lr-LARS/params": 1.6477752965224972, "lr-LARS/params_no_weight_decay": 1.6477752965224972, "step": 4029}
|
| 260 |
+
{"train_loss": 0.3428381085395813, "profiling/batch_time": 0.4692525565624237, "profiling/data_time": 0.02292914129793644, "epoch": 129, "step": 4029}
|
| 261 |
+
{"lr-LARS/params": 1.6342083583141152, "lr-LARS/params_no_weight_decay": 1.6342083583141152, "step": 4060}
|
| 262 |
+
{"train_loss": 0.30637326836586, "profiling/batch_time": 0.4676969051361084, "profiling/data_time": 0.024300899356603622, "epoch": 130, "step": 4060}
|
| 263 |
+
{"lr-LARS/params": 1.620602230120396, "lr-LARS/params_no_weight_decay": 1.620602230120396, "step": 4091}
|
| 264 |
+
{"train_loss": 0.20701366662979126, "profiling/batch_time": 0.4691523611545563, "profiling/data_time": 0.024731282144784927, "epoch": 131, "step": 4091}
|
| 265 |
+
{"lr-LARS/params": 1.6069585090360905, "lr-LARS/params_no_weight_decay": 1.6069585090360905, "step": 4122}
|
| 266 |
+
{"train_loss": 0.2418324500322342, "profiling/batch_time": 0.46820154786109924, "profiling/data_time": 0.023264264687895775, "epoch": 132, "step": 4122}
|
| 267 |
+
{"lr-LARS/params": 1.593278796568625, "lr-LARS/params_no_weight_decay": 1.593278796568625, "step": 4153}
|
| 268 |
+
{"train_loss": 0.2066756635904312, "profiling/batch_time": 0.47047385573387146, "profiling/data_time": 0.0255581084638834, "epoch": 133, "step": 4153}
|
| 269 |
+
{"lr-LARS/params": 1.5795646984501124, "lr-LARS/params_no_weight_decay": 1.5795646984501124, "step": 4184}
|
| 270 |
+
{"train_loss": 0.21035292744636536, "profiling/batch_time": 0.4699760675430298, "profiling/data_time": 0.025822002440690994, "epoch": 134, "step": 4184}
|
| 271 |
+
{"lr-LARS/params": 1.5658178244488732, "lr-LARS/params_no_weight_decay": 1.5658178244488732, "step": 4215}
|
| 272 |
+
{"train_loss": 0.22665046155452728, "profiling/batch_time": 0.46866166591644287, "profiling/data_time": 0.02408125065267086, "epoch": 135, "step": 4215}
|
| 273 |
+
{"lr-LARS/params": 1.552039788180479, "lr-LARS/params_no_weight_decay": 1.552039788180479, "step": 4246}
|
| 274 |
+
{"train_loss": 2.7997567653656006, "profiling/batch_time": 0.4675123989582062, "profiling/data_time": 0.02372441440820694, "epoch": 136, "step": 4246}
|
| 275 |
+
{"lr-LARS/params": 1.5382322069183445, "lr-LARS/params_no_weight_decay": 1.5382322069183445, "step": 4277}
|
| 276 |
+
{"train_loss": 0.2233796864748001, "profiling/batch_time": 0.46816176176071167, "profiling/data_time": 0.02331366017460823, "epoch": 137, "step": 4277}
|
| 277 |
+
{"lr-LARS/params": 1.5243967014038924, "lr-LARS/params_no_weight_decay": 1.5243967014038924, "step": 4308}
|
| 278 |
+
{"train_loss": 0.9067608118057251, "profiling/batch_time": 0.4688373804092407, "profiling/data_time": 0.02350773848593235, "epoch": 138, "step": 4308}
|
| 279 |
+
{"lr-LARS/params": 1.5105348956563098, "lr-LARS/params_no_weight_decay": 1.5105348956563098, "step": 4339}
|
| 280 |
+
{"train_loss": 0.1916802078485489, "profiling/batch_time": 0.4673452377319336, "profiling/data_time": 0.02541876584291458, "epoch": 139, "step": 4339}
|
| 281 |
+
{"lr-LARS/params": 1.4966484167819174, "lr-LARS/params_no_weight_decay": 1.4966484167819174, "step": 4370}
|
| 282 |
+
{"train_loss": 0.19645319879055023, "profiling/batch_time": 0.4673255383968353, "profiling/data_time": 0.024213135242462158, "epoch": 140, "step": 4370}
|
| 283 |
+
{"lr-LARS/params": 1.4827388947831845, "lr-LARS/params_no_weight_decay": 1.4827388947831845, "step": 4401}
|
| 284 |
+
{"train_loss": 127.64894104003906, "profiling/batch_time": 0.4684883654117584, "profiling/data_time": 0.02284305728971958, "epoch": 141, "step": 4401}
|
| 285 |
+
{"lr-LARS/params": 1.4688079623673922, "lr-LARS/params_no_weight_decay": 1.4688079623673922, "step": 4432}
|
| 286 |
+
{"train_loss": 0.25028732419013977, "profiling/batch_time": 0.46900200843811035, "profiling/data_time": 0.02325459010899067, "epoch": 142, "step": 4432}
|
| 287 |
+
{"lr-LARS/params": 1.4548572547549883, "lr-LARS/params_no_weight_decay": 1.4548572547549883, "step": 4463}
|
| 288 |
+
{"train_loss": 0.19643022119998932, "profiling/batch_time": 0.4686683118343353, "profiling/data_time": 0.02326754853129387, "epoch": 143, "step": 4463}
|
| 289 |
+
{"lr-LARS/params": 1.4408884094876455, "lr-LARS/params_no_weight_decay": 1.4408884094876455, "step": 4494}
|
| 290 |
+
{"train_loss": 0.2932826280593872, "profiling/batch_time": 0.46919068694114685, "profiling/data_time": 0.023083271458745003, "epoch": 144, "step": 4494}
|
| 291 |
+
{"lr-LARS/params": 1.4269030662360431, "lr-LARS/params_no_weight_decay": 1.4269030662360431, "step": 4525}
|
| 292 |
+
{"train_loss": 0.797976016998291, "profiling/batch_time": 0.46666938066482544, "profiling/data_time": 0.023125160485506058, "epoch": 145, "step": 4525}
|
| 293 |
+
{"lr-LARS/params": 1.4129028666074024, "lr-LARS/params_no_weight_decay": 1.4129028666074024, "step": 4556}
|
| 294 |
+
{"train_loss": 0.18154257535934448, "profiling/batch_time": 0.468300998210907, "profiling/data_time": 0.02381891943514347, "epoch": 146, "step": 4556}
|
| 295 |
+
{"lr-LARS/params": 1.3988894539527952, "lr-LARS/params_no_weight_decay": 1.3988894539527952, "step": 4587}
|
| 296 |
+
{"train_loss": 0.4662235677242279, "profiling/batch_time": 0.4684472680091858, "profiling/data_time": 0.02430916018784046, "epoch": 147, "step": 4587}
|
| 297 |
+
{"lr-LARS/params": 1.3848644731742459, "lr-LARS/params_no_weight_decay": 1.3848644731742459, "step": 4618}
|
| 298 |
+
{"train_loss": 14.96108341217041, "profiling/batch_time": 0.46820706129074097, "profiling/data_time": 0.023396974429488182, "epoch": 148, "step": 4618}
|
| 299 |
+
{"lr-LARS/params": 1.3708295705316498, "lr-LARS/params_no_weight_decay": 1.3708295705316498, "step": 4649}
|
| 300 |
+
{"train_loss": 0.20646654069423676, "profiling/batch_time": 0.4680996537208557, "profiling/data_time": 0.02299371175467968, "epoch": 149, "step": 4649}
|
| 301 |
+
{"lr-LARS/params": 1.3567863934495388, "lr-LARS/params_no_weight_decay": 1.3567863934495388, "step": 4680}
|
| 302 |
+
{"train_loss": 0.18531253933906555, "profiling/batch_time": 0.4688446521759033, "profiling/data_time": 0.02351038157939911, "epoch": 150, "step": 4680}
|
| 303 |
+
{"lr-LARS/params": 1.3427365903236999, "lr-LARS/params_no_weight_decay": 1.3427365903236999, "step": 4711}
|
| 304 |
+
{"train_loss": 1698.1651611328125, "profiling/batch_time": 0.46837708353996277, "profiling/data_time": 0.023236317560076714, "epoch": 151, "step": 4711}
|
| 305 |
+
{"lr-LARS/params": 1.328681810327691, "lr-LARS/params_no_weight_decay": 1.328681810327691, "step": 4742}
|
| 306 |
+
{"train_loss": 0.26438280940055847, "profiling/batch_time": 0.4675687253475189, "profiling/data_time": 0.023975424468517303, "epoch": 152, "step": 4742}
|
| 307 |
+
{"lr-LARS/params": 1.3146237032192571, "lr-LARS/params_no_weight_decay": 1.3146237032192571, "step": 4773}
|
| 308 |
+
{"train_loss": 0.18656469881534576, "profiling/batch_time": 0.46806153655052185, "profiling/data_time": 0.02452305518090725, "epoch": 153, "step": 4773}
|
| 309 |
+
{"lr-LARS/params": 1.3005639191466805, "lr-LARS/params_no_weight_decay": 1.3005639191466805, "step": 4804}
|
| 310 |
+
{"train_loss": 0.194628044962883, "profiling/batch_time": 0.47026383876800537, "profiling/data_time": 0.024226898327469826, "epoch": 154, "step": 4804}
|
| 311 |
+
{"lr-LARS/params": 1.2865041084550883, "lr-LARS/params_no_weight_decay": 1.2865041084550883, "step": 4835}
|
| 312 |
+
{"train_loss": 0.19501325488090515, "profiling/batch_time": 0.46810394525527954, "profiling/data_time": 0.02305176854133606, "epoch": 155, "step": 4835}
|
| 313 |
+
{"lr-LARS/params": 1.2724459214927306, "lr-LARS/params_no_weight_decay": 1.2724459214927306, "step": 4866}
|
| 314 |
+
{"train_loss": 0.1783357709646225, "profiling/batch_time": 0.47103896737098694, "profiling/data_time": 0.02298922836780548, "epoch": 156, "step": 4866}
|
| 315 |
+
{"lr-LARS/params": 1.258391008417264, "lr-LARS/params_no_weight_decay": 1.258391008417264, "step": 4897}
|
| 316 |
+
{"train_loss": 0.17607949674129486, "profiling/batch_time": 0.4691469967365265, "profiling/data_time": 0.023801535367965698, "epoch": 157, "step": 4897}
|
| 317 |
+
{"lr-LARS/params": 1.2443410190020545, "lr-LARS/params_no_weight_decay": 1.2443410190020545, "step": 4928}
|
| 318 |
+
{"train_loss": 0.17614112794399261, "profiling/batch_time": 0.46958133578300476, "profiling/data_time": 0.023428741842508316, "epoch": 158, "step": 4928}
|
| 319 |
+
{"lr-LARS/params": 1.2302976024425256, "lr-LARS/params_no_weight_decay": 1.2302976024425256, "step": 4959}
|
| 320 |
+
{"train_loss": 0.17606651782989502, "profiling/batch_time": 0.46831223368644714, "profiling/data_time": 0.023153886198997498, "epoch": 159, "step": 4959}
|
| 321 |
+
{"lr-LARS/params": 1.2162624071625765, "lr-LARS/params_no_weight_decay": 1.2162624071625765, "step": 4990}
|
| 322 |
+
{"train_loss": 0.1813165247440338, "profiling/batch_time": 0.46791478991508484, "profiling/data_time": 0.022853100672364235, "epoch": 160, "step": 4990}
|
| 323 |
+
{"lr-LARS/params": 1.2022370806210865, "lr-LARS/params_no_weight_decay": 1.2022370806210865, "step": 5021}
|
| 324 |
+
{"train_loss": 0.1810695379972458, "profiling/batch_time": 0.4697805941104889, "profiling/data_time": 0.02336183749139309, "epoch": 161, "step": 5021}
|
| 325 |
+
{"lr-LARS/params": 1.1882232691185384, "lr-LARS/params_no_weight_decay": 1.1882232691185384, "step": 5052}
|
| 326 |
+
{"train_loss": 0.1757485717535019, "profiling/batch_time": 0.4680528938770294, "profiling/data_time": 0.023885276168584824, "epoch": 162, "step": 5052}
|
| 327 |
+
{"lr-LARS/params": 1.1742226176037727, "lr-LARS/params_no_weight_decay": 1.1742226176037727, "step": 5083}
|
| 328 |
+
{"train_loss": 0.1856168806552887, "profiling/batch_time": 0.4703716039657593, "profiling/data_time": 0.023418111726641655, "epoch": 163, "step": 5083}
|
| 329 |
+
{"lr-LARS/params": 1.1602367694809044, "lr-LARS/params_no_weight_decay": 1.1602367694809044, "step": 5114}
|
| 330 |
+
{"train_loss": 0.17429611086845398, "profiling/batch_time": 0.47068116068840027, "profiling/data_time": 0.022876489907503128, "epoch": 164, "step": 5114}
|
| 331 |
+
{"lr-LARS/params": 1.1462673664164165, "lr-LARS/params_no_weight_decay": 1.1462673664164165, "step": 5145}
|
| 332 |
+
{"train_loss": 0.18534240126609802, "profiling/batch_time": 0.471584290266037, "profiling/data_time": 0.030926376581192017, "epoch": 165, "step": 5145}
|
| 333 |
+
{"lr-LARS/params": 1.1323160481464636, "lr-LARS/params_no_weight_decay": 1.1323160481464636, "step": 5176}
|
| 334 |
+
{"train_loss": 0.17165836691856384, "profiling/batch_time": 0.4697147607803345, "profiling/data_time": 0.023714274168014526, "epoch": 166, "step": 5176}
|
| 335 |
+
{"lr-LARS/params": 1.1183844522843966, "lr-LARS/params_no_weight_decay": 1.1183844522843966, "step": 5207}
|
| 336 |
+
{"train_loss": 0.17924055457115173, "profiling/batch_time": 0.46992847323417664, "profiling/data_time": 0.03103003464639187, "epoch": 167, "step": 5207}
|
| 337 |
+
{"lr-LARS/params": 1.1044742141285395, "lr-LARS/params_no_weight_decay": 1.1044742141285395, "step": 5238}
|
| 338 |
+
{"train_loss": 0.17594854533672333, "profiling/batch_time": 0.46869608759880066, "profiling/data_time": 0.023087942972779274, "epoch": 168, "step": 5238}
|
| 339 |
+
{"lr-LARS/params": 1.0905869664702375, "lr-LARS/params_no_weight_decay": 1.0905869664702375, "step": 5269}
|
| 340 |
+
{"train_loss": 0.18001827597618103, "profiling/batch_time": 0.4682839512825012, "profiling/data_time": 0.022815624251961708, "epoch": 169, "step": 5269}
|
| 341 |
+
{"lr-LARS/params": 1.076724339402197, "lr-LARS/params_no_weight_decay": 1.076724339402197, "step": 5300}
|
| 342 |
+
{"train_loss": 0.17039217054843903, "profiling/batch_time": 0.4688928723335266, "profiling/data_time": 0.024359598755836487, "epoch": 170, "step": 5300}
|
| 343 |
+
{"lr-LARS/params": 1.062887960127149, "lr-LARS/params_no_weight_decay": 1.062887960127149, "step": 5331}
|
| 344 |
+
{"train_loss": 0.18095912039279938, "profiling/batch_time": 0.4697173535823822, "profiling/data_time": 0.02420135959982872, "epoch": 171, "step": 5331}
|
| 345 |
+
{"lr-LARS/params": 1.0490794527668417, "lr-LARS/params_no_weight_decay": 1.0490794527668417, "step": 5362}
|
| 346 |
+
{"train_loss": 0.179249107837677, "profiling/batch_time": 0.4707670211791992, "profiling/data_time": 0.02550286427140236, "epoch": 172, "step": 5362}
|
| 347 |
+
{"lr-LARS/params": 1.0353004381714035, "lr-LARS/params_no_weight_decay": 1.0353004381714035, "step": 5393}
|
| 348 |
+
{"train_loss": 0.17689934372901917, "profiling/batch_time": 0.47232910990715027, "profiling/data_time": 0.02399234101176262, "epoch": 173, "step": 5393}
|
| 349 |
+
{"lr-LARS/params": 1.0215525337290867, "lr-LARS/params_no_weight_decay": 1.0215525337290867, "step": 5424}
|
| 350 |
+
{"train_loss": 0.17880438268184662, "profiling/batch_time": 0.4685608744621277, "profiling/data_time": 0.024626320227980614, "epoch": 174, "step": 5424}
|
| 351 |
+
{"lr-LARS/params": 1.0078373531764158, "lr-LARS/params_no_weight_decay": 1.0078373531764158, "step": 5455}
|
| 352 |
+
{"train_loss": 0.1799730509519577, "profiling/batch_time": 0.4692945182323456, "profiling/data_time": 0.024718530476093292, "epoch": 175, "step": 5455}
|
| 353 |
+
{"lr-LARS/params": 0.9941565064087676, "lr-LARS/params_no_weight_decay": 0.9941565064087676, "step": 5486}
|
| 354 |
+
{"train_loss": 0.17893801629543304, "profiling/batch_time": 0.4690098762512207, "profiling/data_time": 0.023999815806746483, "epoch": 176, "step": 5486}
|
| 355 |
+
{"lr-LARS/params": 0.9805115992914009, "lr-LARS/params_no_weight_decay": 0.9805115992914009, "step": 5517}
|
| 356 |
+
{"train_loss": 0.18591029942035675, "profiling/batch_time": 0.46935659646987915, "profiling/data_time": 0.024002157151699066, "epoch": 177, "step": 5517}
|
| 357 |
+
{"lr-LARS/params": 0.9669042334709583, "lr-LARS/params_no_weight_decay": 0.9669042334709583, "step": 5548}
|
| 358 |
+
{"train_loss": 0.17215245962142944, "profiling/batch_time": 0.4706227779388428, "profiling/data_time": 0.02324669435620308, "epoch": 178, "step": 5548}
|
| 359 |
+
{"lr-LARS/params": 0.9533360061874647, "lr-LARS/params_no_weight_decay": 0.9533360061874647, "step": 5579}
|
| 360 |
+
{"train_loss": 0.17308884859085083, "profiling/batch_time": 0.4688417911529541, "profiling/data_time": 0.02328791655600071, "epoch": 179, "step": 5579}
|
| 361 |
+
{"lr-LARS/params": 0.9398085100868415, "lr-LARS/params_no_weight_decay": 0.9398085100868415, "step": 5610}
|
| 362 |
+
{"train_loss": 0.17389556765556335, "profiling/batch_time": 0.46835649013519287, "profiling/data_time": 0.024651531130075455, "epoch": 180, "step": 5610}
|
| 363 |
+
{"lr-LARS/params": 0.9263233330339639, "lr-LARS/params_no_weight_decay": 0.9263233330339639, "step": 5641}
|
| 364 |
+
{"train_loss": 0.17842154204845428, "profiling/batch_time": 0.4684598445892334, "profiling/data_time": 0.02436215616762638, "epoch": 181, "step": 5641}
|
| 365 |
+
{"lr-LARS/params": 0.9128820579262703, "lr-LARS/params_no_weight_decay": 0.9128820579262703, "step": 5672}
|
| 366 |
+
{"train_loss": 0.17309552431106567, "profiling/batch_time": 0.4703254699707031, "profiling/data_time": 0.022909438237547874, "epoch": 182, "step": 5672}
|
| 367 |
+
{"lr-LARS/params": 0.8994862625079686, "lr-LARS/params_no_weight_decay": 0.8994862625079686, "step": 5703}
|
| 368 |
+
{"train_loss": 0.17799238860607147, "profiling/batch_time": 0.4692760109901428, "profiling/data_time": 0.026357440277934074, "epoch": 183, "step": 5703}
|
| 369 |
+
{"lr-LARS/params": 0.886137519184834, "lr-LARS/params_no_weight_decay": 0.886137519184834, "step": 5734}
|
| 370 |
+
{"train_loss": 0.1867348551750183, "profiling/batch_time": 0.4702256917953491, "profiling/data_time": 0.02666258066892624, "epoch": 184, "step": 5734}
|
| 371 |
+
{"lr-LARS/params": 0.8728373948396408, "lr-LARS/params_no_weight_decay": 0.8728373948396408, "step": 5765}
|
| 372 |
+
{"train_loss": 0.17166811227798462, "profiling/batch_time": 0.4688814878463745, "profiling/data_time": 0.023565217852592468, "epoch": 185, "step": 5765}
|
| 373 |
+
{"lr-LARS/params": 0.8595874506482426, "lr-LARS/params_no_weight_decay": 0.8595874506482426, "step": 5796}
|
| 374 |
+
{"train_loss": 0.1762077659368515, "profiling/batch_time": 0.46982041001319885, "profiling/data_time": 0.025917453691363335, "epoch": 186, "step": 5796}
|
| 375 |
+
{"lr-LARS/params": 0.8463892418963186, "lr-LARS/params_no_weight_decay": 0.8463892418963186, "step": 5827}
|
| 376 |
+
{"train_loss": 0.18132489919662476, "profiling/batch_time": 0.4706161916255951, "profiling/data_time": 0.02385239489376545, "epoch": 187, "step": 5827}
|
| 377 |
+
{"lr-LARS/params": 0.8332443177968126, "lr-LARS/params_no_weight_decay": 0.8332443177968126, "step": 5858}
|
| 378 |
+
{"train_loss": 0.17243419587612152, "profiling/batch_time": 0.47214871644973755, "profiling/data_time": 0.031371526420116425, "epoch": 188, "step": 5858}
|
| 379 |
+
{"lr-LARS/params": 0.8201542213080886, "lr-LARS/params_no_weight_decay": 0.8201542213080886, "step": 5889}
|
| 380 |
+
{"train_loss": 0.17244185507297516, "profiling/batch_time": 0.4704183042049408, "profiling/data_time": 0.0264283437281847, "epoch": 189, "step": 5889}
|
| 381 |
+
{"lr-LARS/params": 0.8071204889528153, "lr-LARS/params_no_weight_decay": 0.8071204889528153, "step": 5920}
|
| 382 |
+
{"train_loss": 0.1759347915649414, "profiling/batch_time": 0.4692685902118683, "profiling/data_time": 0.02450292371213436, "epoch": 190, "step": 5920}
|
| 383 |
+
{"lr-LARS/params": 0.7941446506376074, "lr-LARS/params_no_weight_decay": 0.7941446506376074, "step": 5951}
|
| 384 |
+
{"train_loss": 0.17409648001194, "profiling/batch_time": 0.46907100081443787, "profiling/data_time": 0.02345338836312294, "epoch": 191, "step": 5951}
|
| 385 |
+
{"lr-LARS/params": 0.7812282294734473, "lr-LARS/params_no_weight_decay": 0.7812282294734473, "step": 5982}
|
| 386 |
+
{"train_loss": 0.17324259877204895, "profiling/batch_time": 0.4691750407218933, "profiling/data_time": 0.03201591968536377, "epoch": 192, "step": 5982}
|
| 387 |
+
{"lr-LARS/params": 0.7683727415968987, "lr-LARS/params_no_weight_decay": 0.7683727415968987, "step": 6013}
|
| 388 |
+
{"train_loss": 0.18373456597328186, "profiling/batch_time": 0.46880653500556946, "profiling/data_time": 0.023174002766609192, "epoch": 193, "step": 6013}
|
| 389 |
+
{"lr-LARS/params": 0.7555796959921441, "lr-LARS/params_no_weight_decay": 0.7555796959921441, "step": 6044}
|
| 390 |
+
{"train_loss": 0.17009755969047546, "profiling/batch_time": 0.4682861864566803, "profiling/data_time": 0.02313772775232792, "epoch": 194, "step": 6044}
|
| 391 |
+
{"lr-LARS/params": 0.742850594313855, "lr-LARS/params_no_weight_decay": 0.742850594313855, "step": 6075}
|
| 392 |
+
{"train_loss": 0.18050645291805267, "profiling/batch_time": 0.47031712532043457, "profiling/data_time": 0.02319205366075039, "epoch": 195, "step": 6075}
|
| 393 |
+
{"lr-LARS/params": 0.730186930710934, "lr-LARS/params_no_weight_decay": 0.730186930710934, "step": 6106}
|
| 394 |
+
{"train_loss": 0.17486198246479034, "profiling/batch_time": 0.4704400300979614, "profiling/data_time": 0.025110362097620964, "epoch": 196, "step": 6106}
|
| 395 |
+
{"lr-LARS/params": 0.7175901916511243, "lr-LARS/params_no_weight_decay": 0.7175901916511243, "step": 6137}
|
| 396 |
+
{"train_loss": 0.1716179996728897, "profiling/batch_time": 0.470682293176651, "profiling/data_time": 0.023940419778227806, "epoch": 197, "step": 6137}
|
| 397 |
+
{"lr-LARS/params": 0.7050618557465294, "lr-LARS/params_no_weight_decay": 0.7050618557465294, "step": 6168}
|
| 398 |
+
{"train_loss": 0.17658409476280212, "profiling/batch_time": 0.47011345624923706, "profiling/data_time": 0.023757848888635635, "epoch": 198, "step": 6168}
|
| 399 |
+
{"lr-LARS/params": 0.692603393580054, "lr-LARS/params_no_weight_decay": 0.692603393580054, "step": 6199}
|
| 400 |
+
{"train_loss": 0.17314349114894867, "profiling/batch_time": 0.46732988953590393, "profiling/data_time": 0.031480688601732254, "epoch": 199, "step": 6199}
|
| 401 |
+
{"lr-LARS/params": 0.6802162675327853, "lr-LARS/params_no_weight_decay": 0.6802162675327853, "step": 6230}
|
| 402 |
+
{"train_loss": 0.1696050763130188, "profiling/batch_time": 0.47118160128593445, "profiling/data_time": 0.023963479325175285, "epoch": 200, "step": 6230}
|
| 403 |
+
{"lr-LARS/params": 0.667901931612338, "lr-LARS/params_no_weight_decay": 0.667901931612338, "step": 6261}
|
| 404 |
+
{"train_loss": 0.17569397389888763, "profiling/batch_time": 0.4709070920944214, "profiling/data_time": 0.024037225171923637, "epoch": 201, "step": 6261}
|
| 405 |
+
{"lr-LARS/params": 0.6556618312821813, "lr-LARS/params_no_weight_decay": 0.6556618312821813, "step": 6292}
|
| 406 |
+
{"train_loss": 0.17421793937683105, "profiling/batch_time": 0.4687694311141968, "profiling/data_time": 0.023792143911123276, "epoch": 202, "step": 6292}
|
| 407 |
+
{"lr-LARS/params": 0.6434974032919711, "lr-LARS/params_no_weight_decay": 0.6434974032919711, "step": 6323}
|
| 408 |
+
{"train_loss": 0.17294025421142578, "profiling/batch_time": 0.4699268639087677, "profiling/data_time": 0.023539885878562927, "epoch": 203, "step": 6323}
|
| 409 |
+
{"lr-LARS/params": 0.6314100755089015, "lr-LARS/params_no_weight_decay": 0.6314100755089015, "step": 6354}
|
| 410 |
+
{"train_loss": 0.1699761301279068, "profiling/batch_time": 0.47126707434654236, "profiling/data_time": 0.024530822411179543, "epoch": 204, "step": 6354}
|
| 411 |
+
{"lr-LARS/params": 0.619401266750104, "lr-LARS/params_no_weight_decay": 0.619401266750104, "step": 6385}
|
| 412 |
+
{"train_loss": 0.18586432933807373, "profiling/batch_time": 0.47063204646110535, "profiling/data_time": 0.024034913629293442, "epoch": 205, "step": 6385}
|
| 413 |
+
{"lr-LARS/params": 0.6074723866161037, "lr-LARS/params_no_weight_decay": 0.6074723866161037, "step": 6416}
|
| 414 |
+
{"train_loss": 0.1777760088443756, "profiling/batch_time": 0.46967262029647827, "profiling/data_time": 0.023256419226527214, "epoch": 206, "step": 6416}
|
| 415 |
+
{"lr-LARS/params": 0.5956248353253584, "lr-LARS/params_no_weight_decay": 0.5956248353253584, "step": 6447}
|
| 416 |
+
{"train_loss": 0.1858465075492859, "profiling/batch_time": 0.4704241156578064, "profiling/data_time": 0.023957030847668648, "epoch": 207, "step": 6447}
|
| 417 |
+
{"lr-LARS/params": 0.5838600035499037, "lr-LARS/params_no_weight_decay": 0.5838600035499037, "step": 6478}
|
| 418 |
+
{"train_loss": 0.18223711848258972, "profiling/batch_time": 0.4698233902454376, "profiling/data_time": 0.023448243737220764, "epoch": 208, "step": 6478}
|
| 419 |
+
{"lr-LARS/params": 0.5721792722521125, "lr-LARS/params_no_weight_decay": 0.5721792722521125, "step": 6509}
|
| 420 |
+
{"train_loss": 0.1881972998380661, "profiling/batch_time": 0.46963635087013245, "profiling/data_time": 0.023899145424365997, "epoch": 209, "step": 6509}
|
| 421 |
+
{"lr-LARS/params": 0.5605840125225995, "lr-LARS/params_no_weight_decay": 0.5605840125225995, "step": 6540}
|
| 422 |
+
{"train_loss": 0.1725481003522873, "profiling/batch_time": 0.47019556164741516, "profiling/data_time": 0.024432366713881493, "epoch": 210, "step": 6540}
|
| 423 |
+
{"lr-LARS/params": 0.5490755854192773, "lr-LARS/params_no_weight_decay": 0.5490755854192773, "step": 6571}
|
| 424 |
+
{"train_loss": 0.1735512614250183, "profiling/batch_time": 0.4700040817260742, "profiling/data_time": 0.03137827664613724, "epoch": 211, "step": 6571}
|
| 425 |
+
{"lr-LARS/params": 0.5376553418075991, "lr-LARS/params_no_weight_decay": 0.5376553418075991, "step": 6602}
|
| 426 |
+
{"train_loss": 0.18683743476867676, "profiling/batch_time": 0.46941742300987244, "profiling/data_time": 0.023504413664340973, "epoch": 212, "step": 6602}
|
| 427 |
+
{"lr-LARS/params": 0.5263246222019915, "lr-LARS/params_no_weight_decay": 0.5263246222019915, "step": 6633}
|
| 428 |
+
{"train_loss": 0.1774751991033554, "profiling/batch_time": 0.4708287715911865, "profiling/data_time": 0.023830818012356758, "epoch": 213, "step": 6633}
|
| 429 |
+
{"lr-LARS/params": 0.5150847566085045, "lr-LARS/params_no_weight_decay": 0.5150847566085045, "step": 6664}
|
| 430 |
+
{"train_loss": 0.17156733572483063, "profiling/batch_time": 0.47103527188301086, "profiling/data_time": 0.02527937851846218, "epoch": 214, "step": 6664}
|
| 431 |
+
{"lr-LARS/params": 0.5039370643686943, "lr-LARS/params_no_weight_decay": 0.5039370643686943, "step": 6695}
|
| 432 |
+
{"train_loss": 0.17357511818408966, "profiling/batch_time": 0.46874478459358215, "profiling/data_time": 0.02410346083343029, "epoch": 215, "step": 6695}
|
| 433 |
+
{"lr-LARS/params": 0.4928828540047595, "lr-LARS/params_no_weight_decay": 0.4928828540047595, "step": 6726}
|
| 434 |
+
{"train_loss": 0.1726713478565216, "profiling/batch_time": 0.47165775299072266, "profiling/data_time": 0.024016540497541428, "epoch": 216, "step": 6726}
|
| 435 |
+
{"lr-LARS/params": 0.48192342306594294, "lr-LARS/params_no_weight_decay": 0.48192342306594294, "step": 6757}
|
| 436 |
+
{"train_loss": 0.17518627643585205, "profiling/batch_time": 0.47232627868652344, "profiling/data_time": 0.026456331834197044, "epoch": 217, "step": 6757}
|
| 437 |
+
{"lr-LARS/params": 0.47106005797622846, "lr-LARS/params_no_weight_decay": 0.47106005797622846, "step": 6788}
|
| 438 |
+
{"train_loss": 0.18039396405220032, "profiling/batch_time": 0.4705643355846405, "profiling/data_time": 0.02345082350075245, "epoch": 218, "step": 6788}
|
| 439 |
+
{"lr-LARS/params": 0.4602940338833392, "lr-LARS/params_no_weight_decay": 0.4602940338833392, "step": 6819}
|
| 440 |
+
{"train_loss": 0.17665861546993256, "profiling/batch_time": 0.4709289073944092, "profiling/data_time": 0.024473879486322403, "epoch": 219, "step": 6819}
|
| 441 |
+
{"lr-LARS/params": 0.4496266145090563, "lr-LARS/params_no_weight_decay": 0.4496266145090563, "step": 6850}
|
| 442 |
+
{"train_loss": 0.17685608565807343, "profiling/batch_time": 0.4734596312046051, "profiling/data_time": 0.024054864421486855, "epoch": 220, "step": 6850}
|
| 443 |
+
{"lr-LARS/params": 0.439059052000887, "lr-LARS/params_no_weight_decay": 0.439059052000887, "step": 6881}
|
| 444 |
+
{"train_loss": 0.1755453646183014, "profiling/batch_time": 0.4687703847885132, "profiling/data_time": 0.023681266233325005, "epoch": 221, "step": 6881}
|
| 445 |
+
{"lr-LARS/params": 0.428592586785085, "lr-LARS/params_no_weight_decay": 0.428592586785085, "step": 6912}
|
| 446 |
+
{"train_loss": 0.17741039395332336, "profiling/batch_time": 0.4691685140132904, "profiling/data_time": 0.023477407172322273, "epoch": 222, "step": 6912}
|
| 447 |
+
{"lr-LARS/params": 0.41822844742104703, "lr-LARS/params_no_weight_decay": 0.41822844742104703, "step": 6943}
|
| 448 |
+
{"train_loss": 0.17507131397724152, "profiling/batch_time": 0.468911737203598, "profiling/data_time": 0.026731377467513084, "epoch": 223, "step": 6943}
|
| 449 |
+
{"lr-LARS/params": 0.4079678504571082, "lr-LARS/params_no_weight_decay": 0.4079678504571082, "step": 6974}
|
| 450 |
+
{"train_loss": 0.18930186331272125, "profiling/batch_time": 0.47206446528434753, "profiling/data_time": 0.025731287896633148, "epoch": 224, "step": 6974}
|
| 451 |
+
{"lr-LARS/params": 0.3978120002877387, "lr-LARS/params_no_weight_decay": 0.3978120002877387, "step": 7005}
|
| 452 |
+
{"train_loss": 0.17496463656425476, "profiling/batch_time": 0.4707791209220886, "profiling/data_time": 0.023680970072746277, "epoch": 225, "step": 7005}
|
| 453 |
+
{"lr-LARS/params": 0.387762089012172, "lr-LARS/params_no_weight_decay": 0.387762089012172, "step": 7036}
|
| 454 |
+
{"train_loss": 0.16944003105163574, "profiling/batch_time": 0.4691097140312195, "profiling/data_time": 0.026817282661795616, "epoch": 226, "step": 7036}
|
| 455 |
+
{"lr-LARS/params": 0.377819296294478, "lr-LARS/params_no_weight_decay": 0.377819296294478, "step": 7067}
|
| 456 |
+
{"train_loss": 0.17224831879138947, "profiling/batch_time": 0.4705941081047058, "profiling/data_time": 0.027431942522525787, "epoch": 227, "step": 7067}
|
| 457 |
+
{"lr-LARS/params": 0.36798478922509065, "lr-LARS/params_no_weight_decay": 0.36798478922509065, "step": 7098}
|
| 458 |
+
{"train_loss": 0.1711820662021637, "profiling/batch_time": 0.469564825296402, "profiling/data_time": 0.023637978360056877, "epoch": 228, "step": 7098}
|
| 459 |
+
{"lr-LARS/params": 0.35825972218381696, "lr-LARS/params_no_weight_decay": 0.35825972218381696, "step": 7129}
|
| 460 |
+
{"train_loss": 0.18665482103824615, "profiling/batch_time": 0.47249582409858704, "profiling/data_time": 0.023317711427807808, "epoch": 229, "step": 7129}
|
| 461 |
+
{"lr-LARS/params": 0.34864523670433173, "lr-LARS/params_no_weight_decay": 0.34864523670433173, "step": 7160}
|
| 462 |
+
{"train_loss": 0.17504683136940002, "profiling/batch_time": 0.46968162059783936, "profiling/data_time": 0.022987984120845795, "epoch": 230, "step": 7160}
|
| 463 |
+
{"lr-LARS/params": 0.3391424613401869, "lr-LARS/params_no_weight_decay": 0.3391424613401869, "step": 7191}
|
| 464 |
+
{"train_loss": 0.18390890955924988, "profiling/batch_time": 0.4694560170173645, "profiling/data_time": 0.02294323407113552, "epoch": 231, "step": 7191}
|
| 465 |
+
{"lr-LARS/params": 0.32975251153233975, "lr-LARS/params_no_weight_decay": 0.32975251153233975, "step": 7222}
|
| 466 |
+
{"train_loss": 0.17491415143013, "profiling/batch_time": 0.4698273837566376, "profiling/data_time": 0.02227500081062317, "epoch": 232, "step": 7222}
|
| 467 |
+
{"lr-LARS/params": 0.32047648947822277, "lr-LARS/params_no_weight_decay": 0.32047648947822277, "step": 7253}
|
| 468 |
+
{"train_loss": 0.17829033732414246, "profiling/batch_time": 0.46955645084381104, "profiling/data_time": 0.027261799201369286, "epoch": 233, "step": 7253}
|
| 469 |
+
{"lr-LARS/params": 0.31131548400236564, "lr-LARS/params_no_weight_decay": 0.31131548400236564, "step": 7284}
|
| 470 |
+
{"train_loss": 0.17597118020057678, "profiling/batch_time": 0.46957966685295105, "profiling/data_time": 0.024820178747177124, "epoch": 234, "step": 7284}
|
| 471 |
+
{"lr-LARS/params": 0.3022705704285903, "lr-LARS/params_no_weight_decay": 0.3022705704285903, "step": 7315}
|
| 472 |
+
{"train_loss": 0.17364805936813354, "profiling/batch_time": 0.4696313738822937, "profiling/data_time": 0.022559965029358864, "epoch": 235, "step": 7315}
|
| 473 |
+
{"lr-LARS/params": 0.2933428104537867, "lr-LARS/params_no_weight_decay": 0.2933428104537867, "step": 7346}
|
| 474 |
+
{"train_loss": 0.18688234686851501, "profiling/batch_time": 0.46648359298706055, "profiling/data_time": 0.0223576370626688, "epoch": 236, "step": 7346}
|
| 475 |
+
{"lr-LARS/params": 0.28453325202329205, "lr-LARS/params_no_weight_decay": 0.28453325202329205, "step": 7377}
|
| 476 |
+
{"train_loss": 0.17504848539829254, "profiling/batch_time": 0.4676859676837921, "profiling/data_time": 0.02700962871313095, "epoch": 237, "step": 7377}
|
| 477 |
+
{"lr-LARS/params": 0.275842929207883, "lr-LARS/params_no_weight_decay": 0.275842929207883, "step": 7408}
|
| 478 |
+
{"train_loss": 0.17112015187740326, "profiling/batch_time": 0.46838268637657166, "profiling/data_time": 0.024366319179534912, "epoch": 238, "step": 7408}
|
| 479 |
+
{"lr-LARS/params": 0.26727286208239215, "lr-LARS/params_no_weight_decay": 0.26727286208239215, "step": 7439}
|
| 480 |
+
{"train_loss": 0.16941964626312256, "profiling/batch_time": 0.4682299792766571, "profiling/data_time": 0.022226206958293915, "epoch": 239, "step": 7439}
|
| 481 |
+
{"lr-LARS/params": 0.25882405660597557, "lr-LARS/params_no_weight_decay": 0.25882405660597557, "step": 7470}
|
| 482 |
+
{"train_loss": 0.17192833125591278, "profiling/batch_time": 0.46854209899902344, "profiling/data_time": 0.022985657677054405, "epoch": 240, "step": 7470}
|
| 483 |
+
{"lr-LARS/params": 0.25049750450402986, "lr-LARS/params_no_weight_decay": 0.25049750450402986, "step": 7501}
|
| 484 |
+
{"train_loss": 0.17485351860523224, "profiling/batch_time": 0.470010906457901, "profiling/data_time": 0.022896917536854744, "epoch": 241, "step": 7501}
|
| 485 |
+
{"lr-LARS/params": 0.24229418315178436, "lr-LARS/params_no_weight_decay": 0.24229418315178436, "step": 7532}
|
| 486 |
+
{"train_loss": 0.17267051339149475, "profiling/batch_time": 0.4714600443840027, "profiling/data_time": 0.028281528502702713, "epoch": 242, "step": 7532}
|
| 487 |
+
{"lr-LARS/params": 0.2342150554595754, "lr-LARS/params_no_weight_decay": 0.2342150554595754, "step": 7563}
|
| 488 |
+
{"train_loss": 0.17282910645008087, "profiling/batch_time": 0.4686413109302521, "profiling/data_time": 0.023492898792028427, "epoch": 243, "step": 7563}
|
| 489 |
+
{"lr-LARS/params": 0.2262610697598202, "lr-LARS/params_no_weight_decay": 0.2262610697598202, "step": 7594}
|
| 490 |
+
{"train_loss": 0.16973815858364105, "profiling/batch_time": 0.4690682291984558, "profiling/data_time": 0.025534961372613907, "epoch": 244, "step": 7594}
|
| 491 |
+
{"lr-LARS/params": 0.2184331596956995, "lr-LARS/params_no_weight_decay": 0.2184331596956995, "step": 7625}
|
| 492 |
+
{"train_loss": 0.17366011440753937, "profiling/batch_time": 0.47061821818351746, "profiling/data_time": 0.024806160479784012, "epoch": 245, "step": 7625}
|
| 493 |
+
{"lr-LARS/params": 0.21073224411156807, "lr-LARS/params_no_weight_decay": 0.21073224411156807, "step": 7656}
|
| 494 |
+
{"train_loss": 0.18184788525104523, "profiling/batch_time": 0.4697961211204529, "profiling/data_time": 0.02579355798661709, "epoch": 246, "step": 7656}
|
| 495 |
+
{"lr-LARS/params": 0.20315922694509966, "lr-LARS/params_no_weight_decay": 0.20315922694509966, "step": 7687}
|
| 496 |
+
{"train_loss": 0.17811010777950287, "profiling/batch_time": 0.46715620160102844, "profiling/data_time": 0.023613903671503067, "epoch": 247, "step": 7687}
|
| 497 |
+
{"lr-LARS/params": 0.19571499712118162, "lr-LARS/params_no_weight_decay": 0.19571499712118162, "step": 7718}
|
| 498 |
+
{"train_loss": 0.17663444578647614, "profiling/batch_time": 0.47011804580688477, "profiling/data_time": 0.022987840697169304, "epoch": 248, "step": 7718}
|
| 499 |
+
{"lr-LARS/params": 0.1884004284475717, "lr-LARS/params_no_weight_decay": 0.1884004284475717, "step": 7749}
|
| 500 |
+
{"train_loss": 0.17067159712314606, "profiling/batch_time": 0.47019141912460327, "profiling/data_time": 0.022993462160229683, "epoch": 249, "step": 7749}
|
| 501 |
+
{"lr-LARS/params": 0.1812163795123325, "lr-LARS/params_no_weight_decay": 0.1812163795123325, "step": 7780}
|
| 502 |
+
{"train_loss": 0.17344437539577484, "profiling/batch_time": 0.4685094654560089, "profiling/data_time": 0.02382558025419712, "epoch": 250, "step": 7780}
|
| 503 |
+
{"lr-LARS/params": 0.17416369358304803, "lr-LARS/params_no_weight_decay": 0.17416369358304803, "step": 7811}
|
| 504 |
+
{"train_loss": 0.18616145849227905, "profiling/batch_time": 0.4705277383327484, "profiling/data_time": 0.026447905227541924, "epoch": 251, "step": 7811}
|
| 505 |
+
{"lr-LARS/params": 0.1672431985078409, "lr-LARS/params_no_weight_decay": 0.1672431985078409, "step": 7842}
|
| 506 |
+
{"train_loss": 0.17241549491882324, "profiling/batch_time": 0.4699751138687134, "profiling/data_time": 0.025471027940511703, "epoch": 252, "step": 7842}
|
| 507 |
+
{"lr-LARS/params": 0.16045570661819894, "lr-LARS/params_no_weight_decay": 0.16045570661819894, "step": 7873}
|
| 508 |
+
{"train_loss": 0.1905423104763031, "profiling/batch_time": 0.4708714783191681, "profiling/data_time": 0.028189852833747864, "epoch": 253, "step": 7873}
|
| 509 |
+
{"lr-LARS/params": 0.15380201463362436, "lr-LARS/params_no_weight_decay": 0.15380201463362436, "step": 7904}
|
| 510 |
+
{"train_loss": 0.1750146746635437, "profiling/batch_time": 0.4708270728588104, "profiling/data_time": 0.02421695366501808, "epoch": 254, "step": 7904}
|
| 511 |
+
{"lr-LARS/params": 0.1472829035681122, "lr-LARS/params_no_weight_decay": 0.1472829035681122, "step": 7935}
|
| 512 |
+
{"train_loss": 0.17938385903835297, "profiling/batch_time": 0.4684388041496277, "profiling/data_time": 0.023896358907222748, "epoch": 255, "step": 7935}
|
| 513 |
+
{"lr-LARS/params": 0.14089913863847636, "lr-LARS/params_no_weight_decay": 0.14089913863847636, "step": 7966}
|
| 514 |
+
{"train_loss": 0.17377354204654694, "profiling/batch_time": 0.4709418714046478, "profiling/data_time": 0.02328607439994812, "epoch": 256, "step": 7966}
|
| 515 |
+
{"lr-LARS/params": 0.13465146917452847, "lr-LARS/params_no_weight_decay": 0.13465146917452847, "step": 7997}
|
| 516 |
+
{"train_loss": 0.17621932923793793, "profiling/batch_time": 0.4689711034297943, "profiling/data_time": 0.02327880449593067, "epoch": 257, "step": 7997}
|
| 517 |
+
{"lr-LARS/params": 0.12854062853111844, "lr-LARS/params_no_weight_decay": 0.12854062853111844, "step": 8028}
|
| 518 |
+
{"train_loss": 0.17323483526706696, "profiling/batch_time": 0.4690542221069336, "profiling/data_time": 0.024137593805789948, "epoch": 258, "step": 8028}
|
| 519 |
+
{"lr-LARS/params": 0.12256733400205563, "lr-LARS/params_no_weight_decay": 0.12256733400205563, "step": 8059}
|
| 520 |
+
{"train_loss": 0.17152181267738342, "profiling/batch_time": 0.4741074740886688, "profiling/data_time": 0.025246327742934227, "epoch": 259, "step": 8059}
|
| 521 |
+
{"lr-LARS/params": 0.11673228673591102, "lr-LARS/params_no_weight_decay": 0.11673228673591102, "step": 8090}
|
| 522 |
+
{"train_loss": 0.18341588973999023, "profiling/batch_time": 0.47158724069595337, "profiling/data_time": 0.03260882943868637, "epoch": 260, "step": 8090}
|
| 523 |
+
{"lr-LARS/params": 0.11103617165371674, "lr-LARS/params_no_weight_decay": 0.11103617165371674, "step": 8121}
|
| 524 |
+
{"train_loss": 0.18369051814079285, "profiling/batch_time": 0.46973201632499695, "profiling/data_time": 0.024374278262257576, "epoch": 261, "step": 8121}
|
| 525 |
+
{"lr-LARS/params": 0.10547965736856922, "lr-LARS/params_no_weight_decay": 0.10547965736856922, "step": 8152}
|
| 526 |
+
{"train_loss": 0.1768931895494461, "profiling/batch_time": 0.4713733494281769, "profiling/data_time": 0.029736177995800972, "epoch": 262, "step": 8152}
|
| 527 |
+
{"lr-LARS/params": 0.10006339610714717, "lr-LARS/params_no_weight_decay": 0.10006339610714717, "step": 8183}
|
| 528 |
+
{"train_loss": 0.17009064555168152, "profiling/batch_time": 0.46988949179649353, "profiling/data_time": 0.024103861302137375, "epoch": 263, "step": 8183}
|
| 529 |
+
{"lr-LARS/params": 0.09478802363315265, "lr-LARS/params_no_weight_decay": 0.09478802363315265, "step": 8214}
|
| 530 |
+
{"train_loss": 0.1791825294494629, "profiling/batch_time": 0.46998900175094604, "profiling/data_time": 0.024024922400712967, "epoch": 264, "step": 8214}
|
| 531 |
+
{"lr-LARS/params": 0.08965415917268495, "lr-LARS/params_no_weight_decay": 0.08965415917268495, "step": 8245}
|
| 532 |
+
{"train_loss": 0.16830989718437195, "profiling/batch_time": 0.46818381547927856, "profiling/data_time": 0.025359127670526505, "epoch": 265, "step": 8245}
|
| 533 |
+
{"lr-LARS/params": 0.08466240534155624, "lr-LARS/params_no_weight_decay": 0.08466240534155624, "step": 8276}
|
| 534 |
+
{"train_loss": 0.17181190848350525, "profiling/batch_time": 0.4695846140384674, "profiling/data_time": 0.02430625446140766, "epoch": 266, "step": 8276}
|
| 535 |
+
{"lr-LARS/params": 0.07981334807455476, "lr-LARS/params_no_weight_decay": 0.07981334807455476, "step": 8307}
|
| 536 |
+
{"train_loss": 0.17682546377182007, "profiling/batch_time": 0.46950000524520874, "profiling/data_time": 0.023915346711874008, "epoch": 267, "step": 8307}
|
| 537 |
+
{"lr-LARS/params": 0.07510755655666829, "lr-LARS/params_no_weight_decay": 0.07510755655666829, "step": 8338}
|
| 538 |
+
{"train_loss": 0.17384792864322662, "profiling/batch_time": 0.46935296058654785, "profiling/data_time": 0.02563089318573475, "epoch": 268, "step": 8338}
|
| 539 |
+
{"lr-LARS/params": 0.07054558315627286, "lr-LARS/params_no_weight_decay": 0.07054558315627286, "step": 8369}
|
| 540 |
+
{"train_loss": 0.1804978996515274, "profiling/batch_time": 0.4701468348503113, "profiling/data_time": 0.025371229276061058, "epoch": 269, "step": 8369}
|
| 541 |
+
{"lr-LARS/params": 0.06612796336029571, "lr-LARS/params_no_weight_decay": 0.06612796336029571, "step": 8400}
|
| 542 |
+
{"train_loss": 0.17953844368457794, "profiling/batch_time": 0.46890515089035034, "profiling/data_time": 0.02570568583905697, "epoch": 270, "step": 8400}
|
| 543 |
+
{"lr-LARS/params": 0.06185521571135856, "lr-LARS/params_no_weight_decay": 0.06185521571135856, "step": 8431}
|
| 544 |
+
{"train_loss": 0.17598415911197662, "profiling/batch_time": 0.46825990080833435, "profiling/data_time": 0.02364826761186123, "epoch": 271, "step": 8431}
|
| 545 |
+
{"lr-LARS/params": 0.057727841746912145, "lr-LARS/params_no_weight_decay": 0.057727841746912145, "step": 8462}
|
| 546 |
+
{"train_loss": 0.17227326333522797, "profiling/batch_time": 0.4690641164779663, "profiling/data_time": 0.02509116567671299, "epoch": 272, "step": 8462}
|
| 547 |
+
{"lr-LARS/params": 0.05374632594036454, "lr-LARS/params_no_weight_decay": 0.05374632594036454, "step": 8493}
|
| 548 |
+
{"train_loss": 0.18317991495132446, "profiling/batch_time": 0.4689784348011017, "profiling/data_time": 0.023338863626122475, "epoch": 273, "step": 8493}
|
| 549 |
+
{"lr-LARS/params": 0.04991113564421311, "lr-LARS/params_no_weight_decay": 0.04991113564421311, "step": 8524}
|
| 550 |
+
{"train_loss": 0.17789612710475922, "profiling/batch_time": 0.4693887233734131, "profiling/data_time": 0.0251321941614151, "epoch": 274, "step": 8524}
|
| 551 |
+
{"lr-LARS/params": 0.046222721035188034, "lr-LARS/params_no_weight_decay": 0.046222721035188034, "step": 8555}
|
| 552 |
+
{"train_loss": 0.1773565411567688, "profiling/batch_time": 0.47032877802848816, "profiling/data_time": 0.023329760879278183, "epoch": 275, "step": 8555}
|
| 553 |
+
{"lr-LARS/params": 0.04268151506140872, "lr-LARS/params_no_weight_decay": 0.04268151506140872, "step": 8586}
|
| 554 |
+
{"train_loss": 0.17752254009246826, "profiling/batch_time": 0.4684811532497406, "profiling/data_time": 0.02482558973133564, "epoch": 276, "step": 8586}
|
| 555 |
+
{"lr-LARS/params": 0.039287933391564195, "lr-LARS/params_no_weight_decay": 0.039287933391564195, "step": 8617}
|
| 556 |
+
{"train_loss": 0.19069981575012207, "profiling/batch_time": 0.4694057106971741, "profiling/data_time": 0.022813959047198296, "epoch": 277, "step": 8617}
|
| 557 |
+
{"lr-LARS/params": 0.036042374366122766, "lr-LARS/params_no_weight_decay": 0.036042374366122766, "step": 8648}
|
| 558 |
+
{"train_loss": 0.1716260015964508, "profiling/batch_time": 0.4705542027950287, "profiling/data_time": 0.023727353662252426, "epoch": 278, "step": 8648}
|
| 559 |
+
{"lr-LARS/params": 0.03294521895057377, "lr-LARS/params_no_weight_decay": 0.03294521895057377, "step": 8679}
|
| 560 |
+
{"train_loss": 0.18008525669574738, "profiling/batch_time": 0.4697466492652893, "profiling/data_time": 0.023951677605509758, "epoch": 279, "step": 8679}
|
| 561 |
+
{"lr-LARS/params": 0.029996830690709863, "lr-LARS/params_no_weight_decay": 0.029996830690709863, "step": 8710}
|
| 562 |
+
{"train_loss": 0.17219989001750946, "profiling/batch_time": 0.46888473629951477, "profiling/data_time": 0.022902177646756172, "epoch": 280, "step": 8710}
|
| 563 |
+
{"lr-LARS/params": 0.02719755566995376, "lr-LARS/params_no_weight_decay": 0.02719755566995376, "step": 8741}
|
| 564 |
+
{"train_loss": 0.17222197353839874, "profiling/batch_time": 0.47234392166137695, "profiling/data_time": 0.023381344974040985, "epoch": 281, "step": 8741}
|
| 565 |
+
{"lr-LARS/params": 0.02454772246873501, "lr-LARS/params_no_weight_decay": 0.02454772246873501, "step": 8772}
|
| 566 |
+
{"train_loss": 0.1748410016298294, "profiling/batch_time": 0.47060728073120117, "profiling/data_time": 0.02514701709151268, "epoch": 282, "step": 8772}
|
| 567 |
+
{"lr-LARS/params": 0.022047642125920152, "lr-LARS/params_no_weight_decay": 0.022047642125920152, "step": 8803}
|
| 568 |
+
{"train_loss": 0.17345179617404938, "profiling/batch_time": 0.46958237886428833, "profiling/data_time": 0.023087121546268463, "epoch": 283, "step": 8803}
|
| 569 |
+
{"lr-LARS/params": 0.01969760810230426, "lr-LARS/params_no_weight_decay": 0.01969760810230426, "step": 8834}
|
| 570 |
+
{"train_loss": 0.17867422103881836, "profiling/batch_time": 0.4683537483215332, "profiling/data_time": 0.026134122163057327, "epoch": 284, "step": 8834}
|
| 571 |
+
{"lr-LARS/params": 0.01749789624616345, "lr-LARS/params_no_weight_decay": 0.01749789624616345, "step": 8865}
|
| 572 |
+
{"train_loss": 0.17999346554279327, "profiling/batch_time": 0.4701542556285858, "profiling/data_time": 0.023083658888936043, "epoch": 285, "step": 8865}
|
| 573 |
+
{"lr-LARS/params": 0.015448764760875533, "lr-LARS/params_no_weight_decay": 0.015448764760875533, "step": 8896}
|
| 574 |
+
{"train_loss": 0.1751020848751068, "profiling/batch_time": 0.4713783264160156, "profiling/data_time": 0.025269582867622375, "epoch": 286, "step": 8896}
|
| 575 |
+
{"lr-LARS/params": 0.013550454174612337, "lr-LARS/params_no_weight_decay": 0.013550454174612337, "step": 8927}
|
| 576 |
+
{"train_loss": 0.17709775269031525, "profiling/batch_time": 0.4701276123523712, "profiling/data_time": 0.023244669660925865, "epoch": 287, "step": 8927}
|
| 577 |
+
{"lr-LARS/params": 0.011803187312106342, "lr-LARS/params_no_weight_decay": 0.011803187312106342, "step": 8958}
|
| 578 |
+
{"train_loss": 0.17954827845096588, "profiling/batch_time": 0.46981576085090637, "profiling/data_time": 0.024186134338378906, "epoch": 288, "step": 8958}
|
| 579 |
+
{"lr-LARS/params": 0.010207169268495044, "lr-LARS/params_no_weight_decay": 0.010207169268495044, "step": 8989}
|
| 580 |
+
{"train_loss": 0.17205561697483063, "profiling/batch_time": 0.47181233763694763, "profiling/data_time": 0.025601908564567566, "epoch": 289, "step": 8989}
|
| 581 |
+
{"lr-LARS/params": 0.008762587385247678, "lr-LARS/params_no_weight_decay": 0.008762587385247678, "step": 9020}
|
| 582 |
+
{"train_loss": 0.17761465907096863, "profiling/batch_time": 0.4702886641025543, "profiling/data_time": 0.025239525362849236, "epoch": 290, "step": 9020}
|
| 583 |
+
{"lr-LARS/params": 0.007469611228173464, "lr-LARS/params_no_weight_decay": 0.007469611228173464, "step": 9051}
|
| 584 |
+
{"train_loss": 0.1817539930343628, "profiling/batch_time": 0.46833762526512146, "profiling/data_time": 0.027799051254987717, "epoch": 291, "step": 9051}
|
| 585 |
+
{"lr-LARS/params": 0.0063283925675196645, "lr-LARS/params_no_weight_decay": 0.0063283925675196645, "step": 9082}
|
| 586 |
+
{"train_loss": 0.18424828350543976, "profiling/batch_time": 0.4709780812263489, "profiling/data_time": 0.02403721585869789, "epoch": 292, "step": 9082}
|
| 587 |
+
{"lr-LARS/params": 0.005339065360155056, "lr-LARS/params_no_weight_decay": 0.005339065360155056, "step": 9113}
|
| 588 |
+
{"train_loss": 0.16752569377422333, "profiling/batch_time": 0.47176021337509155, "profiling/data_time": 0.02262665331363678, "epoch": 293, "step": 9113}
|
| 589 |
+
{"lr-LARS/params": 0.00450174573384713, "lr-LARS/params_no_weight_decay": 0.00450174573384713, "step": 9144}
|
| 590 |
+
{"train_loss": 0.17096474766731262, "profiling/batch_time": 0.4697352349758148, "profiling/data_time": 0.024586746469140053, "epoch": 294, "step": 9144}
|
| 591 |
+
{"lr-LARS/params": 0.0038165319736305817, "lr-LARS/params_no_weight_decay": 0.0038165319736305817, "step": 9175}
|
| 592 |
+
{"train_loss": 0.16857412457466125, "profiling/batch_time": 0.4673163592815399, "profiling/data_time": 0.023136014118790627, "epoch": 295, "step": 9175}
|
| 593 |
+
{"lr-LARS/params": 0.003283504510270415, "lr-LARS/params_no_weight_decay": 0.003283504510270415, "step": 9206}
|
| 594 |
+
{"train_loss": 0.16997942328453064, "profiling/batch_time": 0.46821701526641846, "profiling/data_time": 0.024742355570197105, "epoch": 296, "step": 9206}
|
| 595 |
+
{"lr-LARS/params": 0.0029027259108212336, "lr-LARS/params_no_weight_decay": 0.0029027259108212336, "step": 9237}
|
| 596 |
+
{"train_loss": 0.17281530797481537, "profiling/batch_time": 0.46862053871154785, "profiling/data_time": 0.023910168558359146, "epoch": 297, "step": 9237}
|
| 597 |
+
{"lr-LARS/params": 0.002674240871282573, "lr-LARS/params_no_weight_decay": 0.002674240871282573, "step": 9268}
|
| 598 |
+
{"train_loss": 0.176463782787323, "profiling/batch_time": 0.47088536620140076, "profiling/data_time": 0.024565089493989944, "epoch": 298, "step": 9268}
|
| 599 |
+
{"lr-LARS/params": 0.002598076211353316, "lr-LARS/params_no_weight_decay": 0.002598076211353316, "step": 9299}
|
| 600 |
+
{"train_loss": 0.18089812994003296, "profiling/batch_time": 0.4701261818408966, "profiling/data_time": 0.024234332144260406, "epoch": 299, "step": 9299}
|