coung21 commited on
Commit
952c544
·
verified ·
1 Parent(s): f2d4dcc

Upload folder using huggingface_hub

Browse files
ssl_distil/convnext/checkpoints/epoch=299-step=9300.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35e8ec6863feda1d76792a77bd3e14008b9e1f37ca55a16824f80d43e873219c
3
+ size 235684763
ssl_distil/convnext/checkpoints/last.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6216cfdb52658186e56cba5534737153c553ce2a077c6e2453a85f5aa1330c79
3
+ size 235684763
ssl_distil/convnext/convnext_distil.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:999cda056c13920535cce5e998f0fb71c4c24110d051c4389cd7ed09750bbd70
3
+ size 114616927
ssl_distil/convnext/exported_models/exported_last.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1083b690486f56b4118a33442eb55b1d69238a3532c2119a4a2ec24a41911afc
3
+ size 114616515
ssl_distil/convnext/metrics.jsonl ADDED
@@ -0,0 +1,600 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"lr-LARS/params": 0.2598076211353316, "lr-LARS/params_no_weight_decay": 0.2598076211353316, "step": 30}
2
+ {"train_loss": 0.31194546818733215, "profiling/batch_time": 0.4712883234024048, "profiling/data_time": 0.021884361281991005, "epoch": 0, "step": 30}
3
+ {"lr-LARS/params": 0.5196152422706632, "lr-LARS/params_no_weight_decay": 0.5196152422706632, "step": 61}
4
+ {"train_loss": 0.21411606669425964, "profiling/batch_time": 0.46621111035346985, "profiling/data_time": 0.021811336278915405, "epoch": 1, "step": 61}
5
+ {"lr-LARS/params": 0.7794228634059948, "lr-LARS/params_no_weight_decay": 0.7794228634059948, "step": 92}
6
+ {"train_loss": 0.6949778199195862, "profiling/batch_time": 0.468293696641922, "profiling/data_time": 0.0228253360837698, "epoch": 2, "step": 92}
7
+ {"lr-LARS/params": 1.0392304845413265, "lr-LARS/params_no_weight_decay": 1.0392304845413265, "step": 123}
8
+ {"train_loss": 0.1883438229560852, "profiling/batch_time": 0.46862080693244934, "profiling/data_time": 0.025431664660573006, "epoch": 3, "step": 123}
9
+ {"lr-LARS/params": 1.299038105676658, "lr-LARS/params_no_weight_decay": 1.299038105676658, "step": 154}
10
+ {"train_loss": 0.1814563125371933, "profiling/batch_time": 0.4713672399520874, "profiling/data_time": 0.024215664714574814, "epoch": 4, "step": 154}
11
+ {"lr-LARS/params": 1.5588457268119895, "lr-LARS/params_no_weight_decay": 1.5588457268119895, "step": 185}
12
+ {"train_loss": 0.18267196416854858, "profiling/batch_time": 0.46823886036872864, "profiling/data_time": 0.022851664572954178, "epoch": 5, "step": 185}
13
+ {"lr-LARS/params": 1.818653347947321, "lr-LARS/params_no_weight_decay": 1.818653347947321, "step": 216}
14
+ {"train_loss": 0.18083542585372925, "profiling/batch_time": 0.4695097804069519, "profiling/data_time": 0.022794736549258232, "epoch": 6, "step": 216}
15
+ {"lr-LARS/params": 2.078460969082653, "lr-LARS/params_no_weight_decay": 2.078460969082653, "step": 247}
16
+ {"train_loss": 12.949236869812012, "profiling/batch_time": 0.47276806831359863, "profiling/data_time": 0.023699022829532623, "epoch": 7, "step": 247}
17
+ {"lr-LARS/params": 2.3382685902179845, "lr-LARS/params_no_weight_decay": 2.3382685902179845, "step": 278}
18
+ {"train_loss": 14.355443954467773, "profiling/batch_time": 0.46862882375717163, "profiling/data_time": 0.02837073802947998, "epoch": 8, "step": 278}
19
+ {"lr-LARS/params": 2.598076211353316, "lr-LARS/params_no_weight_decay": 2.598076211353316, "step": 309}
20
+ {"train_loss": 373.94317626953125, "profiling/batch_time": 0.47057339549064636, "profiling/data_time": 0.02315656654536724, "epoch": 9, "step": 309}
21
+ {"lr-LARS/params": 2.5980048812424954, "lr-LARS/params_no_weight_decay": 2.5980048812424954, "step": 340}
22
+ {"train_loss": 36.216880798339844, "profiling/batch_time": 0.46941742300987244, "profiling/data_time": 0.0226852186024189, "epoch": 10, "step": 340}
23
+ {"lr-LARS/params": 2.597781309440988, "lr-LARS/params_no_weight_decay": 2.597781309440988, "step": 371}
24
+ {"train_loss": 679.6309204101562, "profiling/batch_time": 0.46974754333496094, "profiling/data_time": 0.02363262139260769, "epoch": 11, "step": 371}
25
+ {"lr-LARS/params": 2.597405442935369, "lr-LARS/params_no_weight_decay": 2.597405442935369, "step": 402}
26
+ {"train_loss": 48.08263397216797, "profiling/batch_time": 0.47047746181488037, "profiling/data_time": 0.022418467327952385, "epoch": 12, "step": 402}
27
+ {"lr-LARS/params": 2.5968773258450537, "lr-LARS/params_no_weight_decay": 2.5968773258450537, "step": 433}
28
+ {"train_loss": 3.6737923622131348, "profiling/batch_time": 0.47189608216285706, "profiling/data_time": 0.02603757753968239, "epoch": 13, "step": 433}
29
+ {"lr-LARS/params": 2.596197020160716, "lr-LARS/params_no_weight_decay": 2.596197020160716, "step": 464}
30
+ {"train_loss": 434.80548095703125, "profiling/batch_time": 0.4708019495010376, "profiling/data_time": 0.023040369153022766, "epoch": 14, "step": 464}
31
+ {"lr-LARS/params": 2.595364605737007, "lr-LARS/params_no_weight_decay": 2.595364605737007, "step": 495}
32
+ {"train_loss": 1805.1368408203125, "profiling/batch_time": 0.4709707200527191, "profiling/data_time": 0.022103408351540565, "epoch": 15, "step": 495}
33
+ {"lr-LARS/params": 2.594380180283186, "lr-LARS/params_no_weight_decay": 2.594380180283186, "step": 526}
34
+ {"train_loss": 39.50508499145508, "profiling/batch_time": 0.4683074951171875, "profiling/data_time": 0.022332806140184402, "epoch": 16, "step": 526}
35
+ {"lr-LARS/params": 2.593243859351649, "lr-LARS/params_no_weight_decay": 2.593243859351649, "step": 557}
36
+ {"train_loss": 14.473100662231445, "profiling/batch_time": 0.46771883964538574, "profiling/data_time": 0.02339872717857361, "epoch": 17, "step": 557}
37
+ {"lr-LARS/params": 2.5919557763243697, "lr-LARS/params_no_weight_decay": 2.5919557763243697, "step": 588}
38
+ {"train_loss": 28.88188934326172, "profiling/batch_time": 0.47216054797172546, "profiling/data_time": 0.02364548295736313, "epoch": 18, "step": 588}
39
+ {"lr-LARS/params": 2.5905160823972344, "lr-LARS/params_no_weight_decay": 2.5905160823972344, "step": 619}
40
+ {"train_loss": 10.441183090209961, "profiling/batch_time": 0.4680430293083191, "profiling/data_time": 0.023268572986125946, "epoch": 19, "step": 619}
41
+ {"lr-LARS/params": 2.5889249465623028, "lr-LARS/params_no_weight_decay": 2.5889249465623028, "step": 650}
42
+ {"train_loss": 633.2659301757812, "profiling/batch_time": 0.4725569486618042, "profiling/data_time": 0.022568566724658012, "epoch": 20, "step": 650}
43
+ {"lr-LARS/params": 2.587182555587967, "lr-LARS/params_no_weight_decay": 2.587182555587967, "step": 681}
44
+ {"train_loss": 6.366476058959961, "profiling/batch_time": 0.4698493182659149, "profiling/data_time": 0.022369032725691795, "epoch": 21, "step": 681}
45
+ {"lr-LARS/params": 2.5852891139970304, "lr-LARS/params_no_weight_decay": 2.5852891139970304, "step": 712}
46
+ {"train_loss": 25.203937530517578, "profiling/batch_time": 0.4697577655315399, "profiling/data_time": 0.023397965356707573, "epoch": 22, "step": 712}
47
+ {"lr-LARS/params": 2.5832448440427, "lr-LARS/params_no_weight_decay": 2.5832448440427, "step": 743}
48
+ {"train_loss": 175.21701049804688, "profiling/batch_time": 0.46950188279151917, "profiling/data_time": 0.024209650233387947, "epoch": 23, "step": 743}
49
+ {"lr-LARS/params": 2.5810499856824984, "lr-LARS/params_no_weight_decay": 2.5810499856824984, "step": 774}
50
+ {"train_loss": 3.678537368774414, "profiling/batch_time": 0.46959489583969116, "profiling/data_time": 0.022325366735458374, "epoch": 24, "step": 774}
51
+ {"lr-LARS/params": 2.578704796550098, "lr-LARS/params_no_weight_decay": 2.578704796550098, "step": 805}
52
+ {"train_loss": 273.7239990234375, "profiling/batch_time": 0.4687223434448242, "profiling/data_time": 0.022188881412148476, "epoch": 25, "step": 805}
53
+ {"lr-LARS/params": 2.5762095519250785, "lr-LARS/params_no_weight_decay": 2.5762095519250785, "step": 836}
54
+ {"train_loss": 50.772857666015625, "profiling/batch_time": 0.4683019518852234, "profiling/data_time": 0.023442016914486885, "epoch": 26, "step": 836}
55
+ {"lr-LARS/params": 2.5735645447006155, "lr-LARS/params_no_weight_decay": 2.5735645447006155, "step": 867}
56
+ {"train_loss": 230.21990966796875, "profiling/batch_time": 0.469540536403656, "profiling/data_time": 0.0220788661390543, "epoch": 27, "step": 867}
57
+ {"lr-LARS/params": 2.5707700853491007, "lr-LARS/params_no_weight_decay": 2.5707700853491007, "step": 898}
58
+ {"train_loss": 745.271728515625, "profiling/batch_time": 0.4677548408508301, "profiling/data_time": 0.023083696141839027, "epoch": 28, "step": 898}
59
+ {"lr-LARS/params": 2.5678265018856963, "lr-LARS/params_no_weight_decay": 2.5678265018856963, "step": 929}
60
+ {"train_loss": 10.371131896972656, "profiling/batch_time": 0.46908554434776306, "profiling/data_time": 0.023055508732795715, "epoch": 29, "step": 929}
61
+ {"lr-LARS/params": 2.5647341398298367, "lr-LARS/params_no_weight_decay": 2.5647341398298367, "step": 960}
62
+ {"train_loss": 16.156049728393555, "profiling/batch_time": 0.47020572423934937, "profiling/data_time": 0.028595363721251488, "epoch": 30, "step": 960}
63
+ {"lr-LARS/params": 2.5614933621646667, "lr-LARS/params_no_weight_decay": 2.5614933621646667, "step": 991}
64
+ {"train_loss": 6.883659839630127, "profiling/batch_time": 0.4708785116672516, "profiling/data_time": 0.022283844649791718, "epoch": 31, "step": 991}
65
+ {"lr-LARS/params": 2.5581045492944376, "lr-LARS/params_no_weight_decay": 2.5581045492944376, "step": 1022}
66
+ {"train_loss": 55.311344146728516, "profiling/batch_time": 0.4709187150001526, "profiling/data_time": 0.023392993956804276, "epoch": 32, "step": 1022}
67
+ {"lr-LARS/params": 2.5545680989998525, "lr-LARS/params_no_weight_decay": 2.5545680989998525, "step": 1053}
68
+ {"train_loss": 7.234062671661377, "profiling/batch_time": 0.4700948894023895, "profiling/data_time": 0.023695729672908783, "epoch": 33, "step": 1053}
69
+ {"lr-LARS/params": 2.550884426391377, "lr-LARS/params_no_weight_decay": 2.550884426391377, "step": 1084}
70
+ {"train_loss": 4.21544075012207, "profiling/batch_time": 0.4696831703186035, "profiling/data_time": 0.022442584857344627, "epoch": 34, "step": 1084}
71
+ {"lr-LARS/params": 2.547053963860512, "lr-LARS/params_no_weight_decay": 2.547053963860512, "step": 1115}
72
+ {"train_loss": 144.6874237060547, "profiling/batch_time": 0.47021734714508057, "profiling/data_time": 0.024488767609000206, "epoch": 35, "step": 1115}
73
+ {"lr-LARS/params": 2.543077161029039, "lr-LARS/params_no_weight_decay": 2.543077161029039, "step": 1146}
74
+ {"train_loss": 2.871257781982422, "profiling/batch_time": 0.4703052043914795, "profiling/data_time": 0.02254762314260006, "epoch": 36, "step": 1146}
75
+ {"lr-LARS/params": 2.538954484696244, "lr-LARS/params_no_weight_decay": 2.538954484696244, "step": 1177}
76
+ {"train_loss": 6.2141900062561035, "profiling/batch_time": 0.4700213372707367, "profiling/data_time": 0.021778080612421036, "epoch": 37, "step": 1177}
77
+ {"lr-LARS/params": 2.5346864187841254, "lr-LARS/params_no_weight_decay": 2.5346864187841254, "step": 1208}
78
+ {"train_loss": 97.45619201660156, "profiling/batch_time": 0.4678700566291809, "profiling/data_time": 0.02312382310628891, "epoch": 38, "step": 1208}
79
+ {"lr-LARS/params": 2.5302734642805884, "lr-LARS/params_no_weight_decay": 2.5302734642805884, "step": 1239}
80
+ {"train_loss": 1.6031452417373657, "profiling/batch_time": 0.4692513942718506, "profiling/data_time": 0.0238487608730793, "epoch": 39, "step": 1239}
81
+ {"lr-LARS/params": 2.5257161391806404, "lr-LARS/params_no_weight_decay": 2.5257161391806404, "step": 1270}
82
+ {"train_loss": 99.60950469970703, "profiling/batch_time": 0.4726502001285553, "profiling/data_time": 0.02309952676296234, "epoch": 40, "step": 1270}
83
+ {"lr-LARS/params": 2.521014978425588, "lr-LARS/params_no_weight_decay": 2.521014978425588, "step": 1301}
84
+ {"train_loss": 2.8074140548706055, "profiling/batch_time": 0.46869608759880066, "profiling/data_time": 0.023540539667010307, "epoch": 41, "step": 1301}
85
+ {"lr-LARS/params": 2.5161705338402474, "lr-LARS/params_no_weight_decay": 2.5161705338402474, "step": 1332}
86
+ {"train_loss": 64.60332489013672, "profiling/batch_time": 0.46702662110328674, "profiling/data_time": 0.02296690084040165, "epoch": 42, "step": 1332}
87
+ {"lr-LARS/params": 2.5111833740681657, "lr-LARS/params_no_weight_decay": 2.5111833740681657, "step": 1363}
88
+ {"train_loss": 0.3012450933456421, "profiling/batch_time": 0.46822068095207214, "profiling/data_time": 0.022337641566991806, "epoch": 43, "step": 1363}
89
+ {"lr-LARS/params": 2.506054084504878, "lr-LARS/params_no_weight_decay": 2.506054084504878, "step": 1394}
90
+ {"train_loss": 807.9827880859375, "profiling/batch_time": 0.4699914753437042, "profiling/data_time": 0.022885942831635475, "epoch": 44, "step": 1394}
91
+ {"lr-LARS/params": 2.5007832672291936, "lr-LARS/params_no_weight_decay": 2.5007832672291936, "step": 1425}
92
+ {"train_loss": 70.48700714111328, "profiling/batch_time": 0.4716077148914337, "profiling/data_time": 0.022688956931233406, "epoch": 45, "step": 1425}
93
+ {"lr-LARS/params": 2.4953715409325197, "lr-LARS/params_no_weight_decay": 2.4953715409325197, "step": 1456}
94
+ {"train_loss": 14.633325576782227, "profiling/batch_time": 0.47024765610694885, "profiling/data_time": 0.027802862226963043, "epoch": 46, "step": 1456}
95
+ {"lr-LARS/params": 2.489819540846241, "lr-LARS/params_no_weight_decay": 2.489819540846241, "step": 1487}
96
+ {"train_loss": 33.042945861816406, "profiling/batch_time": 0.4712398052215576, "profiling/data_time": 0.023220637813210487, "epoch": 47, "step": 1487}
97
+ {"lr-LARS/params": 2.4841279186671574, "lr-LARS/params_no_weight_decay": 2.4841279186671574, "step": 1518}
98
+ {"train_loss": 4.175399303436279, "profiling/batch_time": 0.4710855782032013, "profiling/data_time": 0.02227308601140976, "epoch": 48, "step": 1518}
99
+ {"lr-LARS/params": 2.478297342480987, "lr-LARS/params_no_weight_decay": 2.478297342480987, "step": 1549}
100
+ {"train_loss": 0.5910549163818359, "profiling/batch_time": 0.4703836143016815, "profiling/data_time": 0.02244972251355648, "epoch": 49, "step": 1549}
101
+ {"lr-LARS/params": 2.472328496683943, "lr-LARS/params_no_weight_decay": 2.472328496683943, "step": 1580}
102
+ {"train_loss": 1.203653335571289, "profiling/batch_time": 0.4672217071056366, "profiling/data_time": 0.022451914846897125, "epoch": 50, "step": 1580}
103
+ {"lr-LARS/params": 2.4662220819024014, "lr-LARS/params_no_weight_decay": 2.4662220819024014, "step": 1611}
104
+ {"train_loss": 2.976810932159424, "profiling/batch_time": 0.4698045253753662, "profiling/data_time": 0.023852217942476273, "epoch": 51, "step": 1611}
105
+ {"lr-LARS/params": 2.459978814910663, "lr-LARS/params_no_weight_decay": 2.459978814910663, "step": 1642}
106
+ {"train_loss": 1.902485728263855, "profiling/batch_time": 0.4728449285030365, "profiling/data_time": 0.028130510821938515, "epoch": 52, "step": 1642}
107
+ {"lr-LARS/params": 2.453599428546812, "lr-LARS/params_no_weight_decay": 2.453599428546812, "step": 1673}
108
+ {"train_loss": 0.8719056844711304, "profiling/batch_time": 0.47223368287086487, "profiling/data_time": 0.022841578349471092, "epoch": 53, "step": 1673}
109
+ {"lr-LARS/params": 2.4470846716267016, "lr-LARS/params_no_weight_decay": 2.4470846716267016, "step": 1704}
110
+ {"train_loss": 0.8949082493782043, "profiling/batch_time": 0.4685279428958893, "profiling/data_time": 0.023392828181385994, "epoch": 54, "step": 1704}
111
+ {"lr-LARS/params": 2.440435308856054, "lr-LARS/params_no_weight_decay": 2.440435308856054, "step": 1735}
112
+ {"train_loss": 23.14362335205078, "profiling/batch_time": 0.47284650802612305, "profiling/data_time": 0.024017976596951485, "epoch": 55, "step": 1735}
113
+ {"lr-LARS/params": 2.433652120740699, "lr-LARS/params_no_weight_decay": 2.433652120740699, "step": 1766}
114
+ {"train_loss": 19.518367767333984, "profiling/batch_time": 0.47030460834503174, "profiling/data_time": 0.02424721233546734, "epoch": 56, "step": 1766}
115
+ {"lr-LARS/params": 2.426735903494959, "lr-LARS/params_no_weight_decay": 2.426735903494959, "step": 1797}
116
+ {"train_loss": 16.87958526611328, "profiling/batch_time": 0.4696018099784851, "profiling/data_time": 0.02526361681520939, "epoch": 57, "step": 1797}
117
+ {"lr-LARS/params": 2.4196874689481884, "lr-LARS/params_no_weight_decay": 2.4196874689481884, "step": 1828}
118
+ {"train_loss": 1.131284236907959, "profiling/batch_time": 0.47009196877479553, "profiling/data_time": 0.024905087426304817, "epoch": 58, "step": 1828}
119
+ {"lr-LARS/params": 2.4125076444494793, "lr-LARS/params_no_weight_decay": 2.4125076444494793, "step": 1859}
120
+ {"train_loss": 12.683453559875488, "profiling/batch_time": 0.46810001134872437, "profiling/data_time": 0.023802831768989563, "epoch": 59, "step": 1859}
121
+ {"lr-LARS/params": 2.40519727277055, "lr-LARS/params_no_weight_decay": 2.40519727277055, "step": 1890}
122
+ {"train_loss": 1.3263578414916992, "profiling/batch_time": 0.468555212020874, "profiling/data_time": 0.0221959687769413, "epoch": 60, "step": 1890}
123
+ {"lr-LARS/params": 2.397757212006817, "lr-LARS/params_no_weight_decay": 2.397757212006817, "step": 1921}
124
+ {"train_loss": 0.5049941539764404, "profiling/batch_time": 0.4682908356189728, "profiling/data_time": 0.02284514717757702, "epoch": 61, "step": 1921}
125
+ {"lr-LARS/params": 2.3901883354766715, "lr-LARS/params_no_weight_decay": 2.3901883354766715, "step": 1952}
126
+ {"train_loss": 0.7352291345596313, "profiling/batch_time": 0.46821048855781555, "profiling/data_time": 0.024347366765141487, "epoch": 62, "step": 1952}
127
+ {"lr-LARS/params": 2.3824915316189714, "lr-LARS/params_no_weight_decay": 2.3824915316189714, "step": 1983}
128
+ {"train_loss": 0.47200965881347656, "profiling/batch_time": 0.46723681688308716, "profiling/data_time": 0.028282053768634796, "epoch": 63, "step": 1983}
129
+ {"lr-LARS/params": 2.374667703888753, "lr-LARS/params_no_weight_decay": 2.374667703888753, "step": 2014}
130
+ {"train_loss": 60.06499481201172, "profiling/batch_time": 0.4674076735973358, "profiling/data_time": 0.02362578548491001, "epoch": 64, "step": 2014}
131
+ {"lr-LARS/params": 2.366717770651184, "lr-LARS/params_no_weight_decay": 2.366717770651184, "step": 2045}
132
+ {"train_loss": 0.24544291198253632, "profiling/batch_time": 0.46887892484664917, "profiling/data_time": 0.02281964384019375, "epoch": 65, "step": 2045}
133
+ {"lr-LARS/params": 2.358642665073767, "lr-LARS/params_no_weight_decay": 2.358642665073767, "step": 2076}
134
+ {"train_loss": 108.18089294433594, "profiling/batch_time": 0.47139567136764526, "profiling/data_time": 0.023664427921175957, "epoch": 66, "step": 2076}
135
+ {"lr-LARS/params": 2.350443335016799, "lr-LARS/params_no_weight_decay": 2.350443335016799, "step": 2107}
136
+ {"train_loss": 0.25892946124076843, "profiling/batch_time": 0.46853384375572205, "profiling/data_time": 0.02481815218925476, "epoch": 67, "step": 2107}
137
+ {"lr-LARS/params": 2.3421207429221167, "lr-LARS/params_no_weight_decay": 2.3421207429221167, "step": 2138}
138
+ {"train_loss": 0.7797198295593262, "profiling/batch_time": 0.4700237512588501, "profiling/data_time": 0.023094169795513153, "epoch": 68, "step": 2138}
139
+ {"lr-LARS/params": 2.3336758657001218, "lr-LARS/params_no_weight_decay": 2.3336758657001218, "step": 2169}
140
+ {"train_loss": 47.811195373535156, "profiling/batch_time": 0.4678688049316406, "profiling/data_time": 0.022558994591236115, "epoch": 69, "step": 2169}
141
+ {"lr-LARS/params": 2.32510969461511, "lr-LARS/params_no_weight_decay": 2.32510969461511, "step": 2200}
142
+ {"train_loss": 0.4786320924758911, "profiling/batch_time": 0.4702819287776947, "profiling/data_time": 0.023594040423631668, "epoch": 70, "step": 2200}
143
+ {"lr-LARS/params": 2.316423235168918, "lr-LARS/params_no_weight_decay": 2.316423235168918, "step": 2231}
144
+ {"train_loss": 5.747926712036133, "profiling/batch_time": 0.4688231945037842, "profiling/data_time": 0.023650651797652245, "epoch": 71, "step": 2231}
145
+ {"lr-LARS/params": 2.3076175069828944, "lr-LARS/params_no_weight_decay": 2.3076175069828944, "step": 2262}
146
+ {"train_loss": 0.6255095601081848, "profiling/batch_time": 0.46930524706840515, "profiling/data_time": 0.023454928770661354, "epoch": 72, "step": 2262}
147
+ {"lr-LARS/params": 2.29869354367822, "lr-LARS/params_no_weight_decay": 2.29869354367822, "step": 2293}
148
+ {"train_loss": 310.82220458984375, "profiling/batch_time": 0.46930602192878723, "profiling/data_time": 0.02611592784523964, "epoch": 73, "step": 2293}
149
+ {"lr-LARS/params": 2.2896523927545753, "lr-LARS/params_no_weight_decay": 2.2896523927545753, "step": 2324}
150
+ {"train_loss": 0.5197723507881165, "profiling/batch_time": 0.46976619958877563, "profiling/data_time": 0.02429189719259739, "epoch": 74, "step": 2324}
151
+ {"lr-LARS/params": 2.2804951154671893, "lr-LARS/params_no_weight_decay": 2.2804951154671893, "step": 2355}
152
+ {"train_loss": 0.3360525965690613, "profiling/batch_time": 0.467939555644989, "profiling/data_time": 0.025161702185869217, "epoch": 75, "step": 2355}
153
+ {"lr-LARS/params": 2.271222786702267, "lr-LARS/params_no_weight_decay": 2.271222786702267, "step": 2386}
154
+ {"train_loss": 0.2486790269613266, "profiling/batch_time": 0.46815434098243713, "profiling/data_time": 0.02291480079293251, "epoch": 76, "step": 2386}
155
+ {"lr-LARS/params": 2.2618364948508183, "lr-LARS/params_no_weight_decay": 2.2618364948508183, "step": 2417}
156
+ {"train_loss": 41.96680450439453, "profiling/batch_time": 0.46991580724716187, "profiling/data_time": 0.022785667330026627, "epoch": 77, "step": 2417}
157
+ {"lr-LARS/params": 2.252337341680902, "lr-LARS/params_no_weight_decay": 2.252337341680902, "step": 2448}
158
+ {"train_loss": 0.5459044575691223, "profiling/batch_time": 0.46973517537117004, "profiling/data_time": 0.022286487743258476, "epoch": 78, "step": 2448}
159
+ {"lr-LARS/params": 2.242726442208301, "lr-LARS/params_no_weight_decay": 2.242726442208301, "step": 2479}
160
+ {"train_loss": 0.8291583061218262, "profiling/batch_time": 0.46750885248184204, "profiling/data_time": 0.022359391674399376, "epoch": 79, "step": 2479}
161
+ {"lr-LARS/params": 2.233004924565638, "lr-LARS/params_no_weight_decay": 2.233004924565638, "step": 2510}
162
+ {"train_loss": 0.4035949110984802, "profiling/batch_time": 0.46970829367637634, "profiling/data_time": 0.022599484771490097, "epoch": 80, "step": 2510}
163
+ {"lr-LARS/params": 2.2231739298699607, "lr-LARS/params_no_weight_decay": 2.2231739298699607, "step": 2541}
164
+ {"train_loss": 0.5911986827850342, "profiling/batch_time": 0.46878448128700256, "profiling/data_time": 0.024261871352791786, "epoch": 81, "step": 2541}
165
+ {"lr-LARS/params": 2.213234612088789, "lr-LARS/params_no_weight_decay": 2.213234612088789, "step": 2572}
166
+ {"train_loss": 0.5409368872642517, "profiling/batch_time": 0.46910786628723145, "profiling/data_time": 0.023897871375083923, "epoch": 82, "step": 2572}
167
+ {"lr-LARS/params": 2.2031881379046676, "lr-LARS/params_no_weight_decay": 2.2031881379046676, "step": 2603}
168
+ {"train_loss": 4.227826118469238, "profiling/batch_time": 0.47049134969711304, "profiling/data_time": 0.025586357340216637, "epoch": 83, "step": 2603}
169
+ {"lr-LARS/params": 2.193035686578219, "lr-LARS/params_no_weight_decay": 2.193035686578219, "step": 2634}
170
+ {"train_loss": 2.128704071044922, "profiling/batch_time": 0.46828195452690125, "profiling/data_time": 0.023670224472880363, "epoch": 84, "step": 2634}
171
+ {"lr-LARS/params": 2.1827784498097187, "lr-LARS/params_no_weight_decay": 2.1827784498097187, "step": 2665}
172
+ {"train_loss": 0.3161485493183136, "profiling/batch_time": 0.47038933634757996, "profiling/data_time": 0.024162085726857185, "epoch": 85, "step": 2665}
173
+ {"lr-LARS/params": 2.172417631599216, "lr-LARS/params_no_weight_decay": 2.172417631599216, "step": 2696}
174
+ {"train_loss": 0.4555578827857971, "profiling/batch_time": 0.46870437264442444, "profiling/data_time": 0.024299200624227524, "epoch": 86, "step": 2696}
175
+ {"lr-LARS/params": 2.1619544481052047, "lr-LARS/params_no_weight_decay": 2.1619544481052047, "step": 2727}
176
+ {"train_loss": 2.6594507694244385, "profiling/batch_time": 0.46748900413513184, "profiling/data_time": 0.023228539153933525, "epoch": 87, "step": 2727}
177
+ {"lr-LARS/params": 2.1513901275018736, "lr-LARS/params_no_weight_decay": 2.1513901275018736, "step": 2758}
178
+ {"train_loss": 0.2915334403514862, "profiling/batch_time": 0.47075730562210083, "profiling/data_time": 0.02426259219646454, "epoch": 88, "step": 2758}
179
+ {"lr-LARS/params": 2.1407259098349396, "lr-LARS/params_no_weight_decay": 2.1407259098349396, "step": 2789}
180
+ {"train_loss": 53.6274528503418, "profiling/batch_time": 0.47253361344337463, "profiling/data_time": 0.023108534514904022, "epoch": 89, "step": 2789}
181
+ {"lr-LARS/params": 2.1299630468760906, "lr-LARS/params_no_weight_decay": 2.1299630468760906, "step": 2820}
182
+ {"train_loss": 1.879733681678772, "profiling/batch_time": 0.4722203016281128, "profiling/data_time": 0.02306920289993286, "epoch": 90, "step": 2820}
183
+ {"lr-LARS/params": 2.1191028019760534, "lr-LARS/params_no_weight_decay": 2.1191028019760534, "step": 2851}
184
+ {"train_loss": 0.3740873336791992, "profiling/batch_time": 0.46790310740470886, "profiling/data_time": 0.028366010636091232, "epoch": 91, "step": 2851}
185
+ {"lr-LARS/params": 2.108146449916301, "lr-LARS/params_no_weight_decay": 2.108146449916301, "step": 2882}
186
+ {"train_loss": 209.26174926757812, "profiling/batch_time": 0.4688158631324768, "profiling/data_time": 0.023069186136126518, "epoch": 92, "step": 2882}
187
+ {"lr-LARS/params": 2.097095276759416, "lr-LARS/params_no_weight_decay": 2.097095276759416, "step": 2913}
188
+ {"train_loss": 10.9568510055542, "profiling/batch_time": 0.4684227705001831, "profiling/data_time": 0.02245280146598816, "epoch": 93, "step": 2913}
189
+ {"lr-LARS/params": 2.0859505796981335, "lr-LARS/params_no_weight_decay": 2.0859505796981335, "step": 2944}
190
+ {"train_loss": 0.34533989429473877, "profiling/batch_time": 0.46746405959129333, "profiling/data_time": 0.023341916501522064, "epoch": 94, "step": 2944}
191
+ {"lr-LARS/params": 2.074713666903076, "lr-LARS/params_no_weight_decay": 2.074713666903076, "step": 2975}
192
+ {"train_loss": 0.4546048939228058, "profiling/batch_time": 0.4688769578933716, "profiling/data_time": 0.02556665427982807, "epoch": 95, "step": 2975}
193
+ {"lr-LARS/params": 2.0633858573691986, "lr-LARS/params_no_weight_decay": 2.0633858573691986, "step": 3006}
194
+ {"train_loss": 1.9552316665649414, "profiling/batch_time": 0.47169414162635803, "profiling/data_time": 0.022796370089054108, "epoch": 96, "step": 3006}
195
+ {"lr-LARS/params": 2.051968480760965, "lr-LARS/params_no_weight_decay": 2.051968480760965, "step": 3037}
196
+ {"train_loss": 2.11960768699646, "profiling/batch_time": 0.467564195394516, "profiling/data_time": 0.02524457313120365, "epoch": 97, "step": 3037}
197
+ {"lr-LARS/params": 2.0404628772562714, "lr-LARS/params_no_weight_decay": 2.0404628772562714, "step": 3068}
198
+ {"train_loss": 179.30409240722656, "profiling/batch_time": 0.4676856994628906, "profiling/data_time": 0.02607033960521221, "epoch": 98, "step": 3068}
199
+ {"lr-LARS/params": 2.028870397389136, "lr-LARS/params_no_weight_decay": 2.028870397389136, "step": 3099}
200
+ {"train_loss": 15.288697242736816, "profiling/batch_time": 0.4692170321941376, "profiling/data_time": 0.023462682962417603, "epoch": 99, "step": 3099}
201
+ {"lr-LARS/params": 2.01719240189117, "lr-LARS/params_no_weight_decay": 2.01719240189117, "step": 3130}
202
+ {"train_loss": 13.218351364135742, "profiling/batch_time": 0.46973398327827454, "profiling/data_time": 0.0225273035466671, "epoch": 100, "step": 3130}
203
+ {"lr-LARS/params": 2.005430261531858, "lr-LARS/params_no_weight_decay": 2.005430261531858, "step": 3161}
204
+ {"train_loss": 86.37255859375, "profiling/batch_time": 0.4685228765010834, "profiling/data_time": 0.022206757217645645, "epoch": 101, "step": 3161}
205
+ {"lr-LARS/params": 1.9935853569576516, "lr-LARS/params_no_weight_decay": 1.9935853569576516, "step": 3192}
206
+ {"train_loss": 0.2505952715873718, "profiling/batch_time": 0.46937862038612366, "profiling/data_time": 0.02351229637861252, "epoch": 102, "step": 3192}
207
+ {"lr-LARS/params": 1.9816590785299155, "lr-LARS/params_no_weight_decay": 1.9816590785299155, "step": 3223}
208
+ {"train_loss": 0.2204047441482544, "profiling/batch_time": 0.47039690613746643, "profiling/data_time": 0.02336188592016697, "epoch": 103, "step": 3223}
209
+ {"lr-LARS/params": 1.9696528261617168, "lr-LARS/params_no_weight_decay": 1.9696528261617168, "step": 3254}
210
+ {"train_loss": 0.5175177454948425, "profiling/batch_time": 0.46817687153816223, "profiling/data_time": 0.02400284633040428, "epoch": 104, "step": 3254}
211
+ {"lr-LARS/params": 1.9575680091535104, "lr-LARS/params_no_weight_decay": 1.9575680091535104, "step": 3285}
212
+ {"train_loss": 0.5821739435195923, "profiling/batch_time": 0.46970275044441223, "profiling/data_time": 0.022598253563046455, "epoch": 105, "step": 3285}
213
+ {"lr-LARS/params": 1.9454060460277114, "lr-LARS/params_no_weight_decay": 1.9454060460277114, "step": 3316}
214
+ {"train_loss": 0.9416442513465881, "profiling/batch_time": 0.4684368073940277, "profiling/data_time": 0.022756820544600487, "epoch": 106, "step": 3316}
215
+ {"lr-LARS/params": 1.9331683643621864, "lr-LARS/params_no_weight_decay": 1.9331683643621864, "step": 3347}
216
+ {"train_loss": 0.2287731021642685, "profiling/batch_time": 0.4672001600265503, "profiling/data_time": 0.02272597886621952, "epoch": 107, "step": 3347}
217
+ {"lr-LARS/params": 1.9208564006226876, "lr-LARS/params_no_weight_decay": 1.9208564006226876, "step": 3378}
218
+ {"train_loss": 0.2901693880558014, "profiling/batch_time": 0.4691215753555298, "profiling/data_time": 0.02353905513882637, "epoch": 108, "step": 3378}
219
+ {"lr-LARS/params": 1.9084715999942368, "lr-LARS/params_no_weight_decay": 1.9084715999942368, "step": 3409}
220
+ {"train_loss": 3.9739749431610107, "profiling/batch_time": 0.46706321835517883, "profiling/data_time": 0.023691251873970032, "epoch": 109, "step": 3409}
221
+ {"lr-LARS/params": 1.8960154162114893, "lr-LARS/params_no_weight_decay": 1.8960154162114893, "step": 3440}
222
+ {"train_loss": 0.2285587042570114, "profiling/batch_time": 0.4712910056114197, "profiling/data_time": 0.023628901690244675, "epoch": 110, "step": 3440}
223
+ {"lr-LARS/params": 1.8834893113880937, "lr-LARS/params_no_weight_decay": 1.8834893113880937, "step": 3471}
224
+ {"train_loss": 0.19997327029705048, "profiling/batch_time": 0.46766728162765503, "profiling/data_time": 0.025861116126179695, "epoch": 111, "step": 3471}
225
+ {"lr-LARS/params": 1.8708947558450697, "lr-LARS/params_no_weight_decay": 1.8708947558450697, "step": 3502}
226
+ {"train_loss": 0.222589910030365, "profiling/batch_time": 0.4697688817977905, "profiling/data_time": 0.023146115243434906, "epoch": 112, "step": 3502}
227
+ {"lr-LARS/params": 1.8582332279382185, "lr-LARS/params_no_weight_decay": 1.8582332279382185, "step": 3533}
228
+ {"train_loss": 0.21689438819885254, "profiling/batch_time": 0.4695585370063782, "profiling/data_time": 0.02386208437383175, "epoch": 113, "step": 3533}
229
+ {"lr-LARS/params": 1.8455062138845955, "lr-LARS/params_no_weight_decay": 1.8455062138845955, "step": 3564}
230
+ {"train_loss": 0.21824142336845398, "profiling/batch_time": 0.468971312046051, "profiling/data_time": 0.023700954392552376, "epoch": 114, "step": 3564}
231
+ {"lr-LARS/params": 1.832715207588054, "lr-LARS/params_no_weight_decay": 1.832715207588054, "step": 3595}
232
+ {"train_loss": 0.27698883414268494, "profiling/batch_time": 0.4698481559753418, "profiling/data_time": 0.02267683856189251, "epoch": 115, "step": 3595}
233
+ {"lr-LARS/params": 1.819861710463892, "lr-LARS/params_no_weight_decay": 1.819861710463892, "step": 3626}
234
+ {"train_loss": 0.44556722044944763, "profiling/batch_time": 0.4686325490474701, "profiling/data_time": 0.02484964393079281, "epoch": 116, "step": 3626}
235
+ {"lr-LARS/params": 1.806947231262617, "lr-LARS/params_no_weight_decay": 1.806947231262617, "step": 3657}
236
+ {"train_loss": 0.2024405300617218, "profiling/batch_time": 0.4692056477069855, "profiling/data_time": 0.02586720883846283, "epoch": 117, "step": 3657}
237
+ {"lr-LARS/params": 1.7939732858928428, "lr-LARS/params_no_weight_decay": 1.7939732858928428, "step": 3688}
238
+ {"train_loss": 2.6041512489318848, "profiling/batch_time": 0.46856310963630676, "profiling/data_time": 0.023410534486174583, "epoch": 118, "step": 3688}
239
+ {"lr-LARS/params": 1.7809413972433563, "lr-LARS/params_no_weight_decay": 1.7809413972433563, "step": 3719}
240
+ {"train_loss": 0.2738244831562042, "profiling/batch_time": 0.4674675166606903, "profiling/data_time": 0.02568947710096836, "epoch": 119, "step": 3719}
241
+ {"lr-LARS/params": 1.7678530950043592, "lr-LARS/params_no_weight_decay": 1.7678530950043592, "step": 3750}
242
+ {"train_loss": 1.1977521181106567, "profiling/batch_time": 0.47148001194000244, "profiling/data_time": 0.027329301461577415, "epoch": 120, "step": 3750}
243
+ {"lr-LARS/params": 1.7547099154879087, "lr-LARS/params_no_weight_decay": 1.7547099154879087, "step": 3781}
244
+ {"train_loss": 2.8730945587158203, "profiling/batch_time": 0.47009193897247314, "profiling/data_time": 0.023626748472452164, "epoch": 121, "step": 3781}
245
+ {"lr-LARS/params": 1.7415134014475881, "lr-LARS/params_no_weight_decay": 1.7415134014475881, "step": 3812}
246
+ {"train_loss": 0.32099857926368713, "profiling/batch_time": 0.46900197863578796, "profiling/data_time": 0.02681814879179001, "epoch": 122, "step": 3812}
247
+ {"lr-LARS/params": 1.7282651018974169, "lr-LARS/params_no_weight_decay": 1.7282651018974169, "step": 3843}
248
+ {"train_loss": 0.19669793546199799, "profiling/batch_time": 0.4678991734981537, "profiling/data_time": 0.02354726754128933, "epoch": 123, "step": 3843}
249
+ {"lr-LARS/params": 1.7149665719300244, "lr-LARS/params_no_weight_decay": 1.7149665719300244, "step": 3874}
250
+ {"train_loss": 0.2884298264980316, "profiling/batch_time": 0.4693676233291626, "profiling/data_time": 0.02348741702735424, "epoch": 124, "step": 3874}
251
+ {"lr-LARS/params": 1.7016193725341149, "lr-LARS/params_no_weight_decay": 1.7016193725341149, "step": 3905}
252
+ {"train_loss": 0.23995520174503326, "profiling/batch_time": 0.46935105323791504, "profiling/data_time": 0.023599984124302864, "epoch": 125, "step": 3905}
253
+ {"lr-LARS/params": 1.6882250704112358, "lr-LARS/params_no_weight_decay": 1.6882250704112358, "step": 3936}
254
+ {"train_loss": 9.37893295288086, "profiling/batch_time": 0.4698675870895386, "profiling/data_time": 0.02479972504079342, "epoch": 126, "step": 3936}
255
+ {"lr-LARS/params": 1.6747852377918793, "lr-LARS/params_no_weight_decay": 1.6747852377918793, "step": 3967}
256
+ {"train_loss": 73.7145767211914, "profiling/batch_time": 0.468632310628891, "profiling/data_time": 0.02318849042057991, "epoch": 127, "step": 3967}
257
+ {"lr-LARS/params": 1.6613014522509317, "lr-LARS/params_no_weight_decay": 1.6613014522509317, "step": 3998}
258
+ {"train_loss": 0.22435139119625092, "profiling/batch_time": 0.4691292643547058, "profiling/data_time": 0.02269771508872509, "epoch": 128, "step": 3998}
259
+ {"lr-LARS/params": 1.6477752965224972, "lr-LARS/params_no_weight_decay": 1.6477752965224972, "step": 4029}
260
+ {"train_loss": 0.3428381085395813, "profiling/batch_time": 0.4692525565624237, "profiling/data_time": 0.02292914129793644, "epoch": 129, "step": 4029}
261
+ {"lr-LARS/params": 1.6342083583141152, "lr-LARS/params_no_weight_decay": 1.6342083583141152, "step": 4060}
262
+ {"train_loss": 0.30637326836586, "profiling/batch_time": 0.4676969051361084, "profiling/data_time": 0.024300899356603622, "epoch": 130, "step": 4060}
263
+ {"lr-LARS/params": 1.620602230120396, "lr-LARS/params_no_weight_decay": 1.620602230120396, "step": 4091}
264
+ {"train_loss": 0.20701366662979126, "profiling/batch_time": 0.4691523611545563, "profiling/data_time": 0.024731282144784927, "epoch": 131, "step": 4091}
265
+ {"lr-LARS/params": 1.6069585090360905, "lr-LARS/params_no_weight_decay": 1.6069585090360905, "step": 4122}
266
+ {"train_loss": 0.2418324500322342, "profiling/batch_time": 0.46820154786109924, "profiling/data_time": 0.023264264687895775, "epoch": 132, "step": 4122}
267
+ {"lr-LARS/params": 1.593278796568625, "lr-LARS/params_no_weight_decay": 1.593278796568625, "step": 4153}
268
+ {"train_loss": 0.2066756635904312, "profiling/batch_time": 0.47047385573387146, "profiling/data_time": 0.0255581084638834, "epoch": 133, "step": 4153}
269
+ {"lr-LARS/params": 1.5795646984501124, "lr-LARS/params_no_weight_decay": 1.5795646984501124, "step": 4184}
270
+ {"train_loss": 0.21035292744636536, "profiling/batch_time": 0.4699760675430298, "profiling/data_time": 0.025822002440690994, "epoch": 134, "step": 4184}
271
+ {"lr-LARS/params": 1.5658178244488732, "lr-LARS/params_no_weight_decay": 1.5658178244488732, "step": 4215}
272
+ {"train_loss": 0.22665046155452728, "profiling/batch_time": 0.46866166591644287, "profiling/data_time": 0.02408125065267086, "epoch": 135, "step": 4215}
273
+ {"lr-LARS/params": 1.552039788180479, "lr-LARS/params_no_weight_decay": 1.552039788180479, "step": 4246}
274
+ {"train_loss": 2.7997567653656006, "profiling/batch_time": 0.4675123989582062, "profiling/data_time": 0.02372441440820694, "epoch": 136, "step": 4246}
275
+ {"lr-LARS/params": 1.5382322069183445, "lr-LARS/params_no_weight_decay": 1.5382322069183445, "step": 4277}
276
+ {"train_loss": 0.2233796864748001, "profiling/batch_time": 0.46816176176071167, "profiling/data_time": 0.02331366017460823, "epoch": 137, "step": 4277}
277
+ {"lr-LARS/params": 1.5243967014038924, "lr-LARS/params_no_weight_decay": 1.5243967014038924, "step": 4308}
278
+ {"train_loss": 0.9067608118057251, "profiling/batch_time": 0.4688373804092407, "profiling/data_time": 0.02350773848593235, "epoch": 138, "step": 4308}
279
+ {"lr-LARS/params": 1.5105348956563098, "lr-LARS/params_no_weight_decay": 1.5105348956563098, "step": 4339}
280
+ {"train_loss": 0.1916802078485489, "profiling/batch_time": 0.4673452377319336, "profiling/data_time": 0.02541876584291458, "epoch": 139, "step": 4339}
281
+ {"lr-LARS/params": 1.4966484167819174, "lr-LARS/params_no_weight_decay": 1.4966484167819174, "step": 4370}
282
+ {"train_loss": 0.19645319879055023, "profiling/batch_time": 0.4673255383968353, "profiling/data_time": 0.024213135242462158, "epoch": 140, "step": 4370}
283
+ {"lr-LARS/params": 1.4827388947831845, "lr-LARS/params_no_weight_decay": 1.4827388947831845, "step": 4401}
284
+ {"train_loss": 127.64894104003906, "profiling/batch_time": 0.4684883654117584, "profiling/data_time": 0.02284305728971958, "epoch": 141, "step": 4401}
285
+ {"lr-LARS/params": 1.4688079623673922, "lr-LARS/params_no_weight_decay": 1.4688079623673922, "step": 4432}
286
+ {"train_loss": 0.25028732419013977, "profiling/batch_time": 0.46900200843811035, "profiling/data_time": 0.02325459010899067, "epoch": 142, "step": 4432}
287
+ {"lr-LARS/params": 1.4548572547549883, "lr-LARS/params_no_weight_decay": 1.4548572547549883, "step": 4463}
288
+ {"train_loss": 0.19643022119998932, "profiling/batch_time": 0.4686683118343353, "profiling/data_time": 0.02326754853129387, "epoch": 143, "step": 4463}
289
+ {"lr-LARS/params": 1.4408884094876455, "lr-LARS/params_no_weight_decay": 1.4408884094876455, "step": 4494}
290
+ {"train_loss": 0.2932826280593872, "profiling/batch_time": 0.46919068694114685, "profiling/data_time": 0.023083271458745003, "epoch": 144, "step": 4494}
291
+ {"lr-LARS/params": 1.4269030662360431, "lr-LARS/params_no_weight_decay": 1.4269030662360431, "step": 4525}
292
+ {"train_loss": 0.797976016998291, "profiling/batch_time": 0.46666938066482544, "profiling/data_time": 0.023125160485506058, "epoch": 145, "step": 4525}
293
+ {"lr-LARS/params": 1.4129028666074024, "lr-LARS/params_no_weight_decay": 1.4129028666074024, "step": 4556}
294
+ {"train_loss": 0.18154257535934448, "profiling/batch_time": 0.468300998210907, "profiling/data_time": 0.02381891943514347, "epoch": 146, "step": 4556}
295
+ {"lr-LARS/params": 1.3988894539527952, "lr-LARS/params_no_weight_decay": 1.3988894539527952, "step": 4587}
296
+ {"train_loss": 0.4662235677242279, "profiling/batch_time": 0.4684472680091858, "profiling/data_time": 0.02430916018784046, "epoch": 147, "step": 4587}
297
+ {"lr-LARS/params": 1.3848644731742459, "lr-LARS/params_no_weight_decay": 1.3848644731742459, "step": 4618}
298
+ {"train_loss": 14.96108341217041, "profiling/batch_time": 0.46820706129074097, "profiling/data_time": 0.023396974429488182, "epoch": 148, "step": 4618}
299
+ {"lr-LARS/params": 1.3708295705316498, "lr-LARS/params_no_weight_decay": 1.3708295705316498, "step": 4649}
300
+ {"train_loss": 0.20646654069423676, "profiling/batch_time": 0.4680996537208557, "profiling/data_time": 0.02299371175467968, "epoch": 149, "step": 4649}
301
+ {"lr-LARS/params": 1.3567863934495388, "lr-LARS/params_no_weight_decay": 1.3567863934495388, "step": 4680}
302
+ {"train_loss": 0.18531253933906555, "profiling/batch_time": 0.4688446521759033, "profiling/data_time": 0.02351038157939911, "epoch": 150, "step": 4680}
303
+ {"lr-LARS/params": 1.3427365903236999, "lr-LARS/params_no_weight_decay": 1.3427365903236999, "step": 4711}
304
+ {"train_loss": 1698.1651611328125, "profiling/batch_time": 0.46837708353996277, "profiling/data_time": 0.023236317560076714, "epoch": 151, "step": 4711}
305
+ {"lr-LARS/params": 1.328681810327691, "lr-LARS/params_no_weight_decay": 1.328681810327691, "step": 4742}
306
+ {"train_loss": 0.26438280940055847, "profiling/batch_time": 0.4675687253475189, "profiling/data_time": 0.023975424468517303, "epoch": 152, "step": 4742}
307
+ {"lr-LARS/params": 1.3146237032192571, "lr-LARS/params_no_weight_decay": 1.3146237032192571, "step": 4773}
308
+ {"train_loss": 0.18656469881534576, "profiling/batch_time": 0.46806153655052185, "profiling/data_time": 0.02452305518090725, "epoch": 153, "step": 4773}
309
+ {"lr-LARS/params": 1.3005639191466805, "lr-LARS/params_no_weight_decay": 1.3005639191466805, "step": 4804}
310
+ {"train_loss": 0.194628044962883, "profiling/batch_time": 0.47026383876800537, "profiling/data_time": 0.024226898327469826, "epoch": 154, "step": 4804}
311
+ {"lr-LARS/params": 1.2865041084550883, "lr-LARS/params_no_weight_decay": 1.2865041084550883, "step": 4835}
312
+ {"train_loss": 0.19501325488090515, "profiling/batch_time": 0.46810394525527954, "profiling/data_time": 0.02305176854133606, "epoch": 155, "step": 4835}
313
+ {"lr-LARS/params": 1.2724459214927306, "lr-LARS/params_no_weight_decay": 1.2724459214927306, "step": 4866}
314
+ {"train_loss": 0.1783357709646225, "profiling/batch_time": 0.47103896737098694, "profiling/data_time": 0.02298922836780548, "epoch": 156, "step": 4866}
315
+ {"lr-LARS/params": 1.258391008417264, "lr-LARS/params_no_weight_decay": 1.258391008417264, "step": 4897}
316
+ {"train_loss": 0.17607949674129486, "profiling/batch_time": 0.4691469967365265, "profiling/data_time": 0.023801535367965698, "epoch": 157, "step": 4897}
317
+ {"lr-LARS/params": 1.2443410190020545, "lr-LARS/params_no_weight_decay": 1.2443410190020545, "step": 4928}
318
+ {"train_loss": 0.17614112794399261, "profiling/batch_time": 0.46958133578300476, "profiling/data_time": 0.023428741842508316, "epoch": 158, "step": 4928}
319
+ {"lr-LARS/params": 1.2302976024425256, "lr-LARS/params_no_weight_decay": 1.2302976024425256, "step": 4959}
320
+ {"train_loss": 0.17606651782989502, "profiling/batch_time": 0.46831223368644714, "profiling/data_time": 0.023153886198997498, "epoch": 159, "step": 4959}
321
+ {"lr-LARS/params": 1.2162624071625765, "lr-LARS/params_no_weight_decay": 1.2162624071625765, "step": 4990}
322
+ {"train_loss": 0.1813165247440338, "profiling/batch_time": 0.46791478991508484, "profiling/data_time": 0.022853100672364235, "epoch": 160, "step": 4990}
323
+ {"lr-LARS/params": 1.2022370806210865, "lr-LARS/params_no_weight_decay": 1.2022370806210865, "step": 5021}
324
+ {"train_loss": 0.1810695379972458, "profiling/batch_time": 0.4697805941104889, "profiling/data_time": 0.02336183749139309, "epoch": 161, "step": 5021}
325
+ {"lr-LARS/params": 1.1882232691185384, "lr-LARS/params_no_weight_decay": 1.1882232691185384, "step": 5052}
326
+ {"train_loss": 0.1757485717535019, "profiling/batch_time": 0.4680528938770294, "profiling/data_time": 0.023885276168584824, "epoch": 162, "step": 5052}
327
+ {"lr-LARS/params": 1.1742226176037727, "lr-LARS/params_no_weight_decay": 1.1742226176037727, "step": 5083}
328
+ {"train_loss": 0.1856168806552887, "profiling/batch_time": 0.4703716039657593, "profiling/data_time": 0.023418111726641655, "epoch": 163, "step": 5083}
329
+ {"lr-LARS/params": 1.1602367694809044, "lr-LARS/params_no_weight_decay": 1.1602367694809044, "step": 5114}
330
+ {"train_loss": 0.17429611086845398, "profiling/batch_time": 0.47068116068840027, "profiling/data_time": 0.022876489907503128, "epoch": 164, "step": 5114}
331
+ {"lr-LARS/params": 1.1462673664164165, "lr-LARS/params_no_weight_decay": 1.1462673664164165, "step": 5145}
332
+ {"train_loss": 0.18534240126609802, "profiling/batch_time": 0.471584290266037, "profiling/data_time": 0.030926376581192017, "epoch": 165, "step": 5145}
333
+ {"lr-LARS/params": 1.1323160481464636, "lr-LARS/params_no_weight_decay": 1.1323160481464636, "step": 5176}
334
+ {"train_loss": 0.17165836691856384, "profiling/batch_time": 0.4697147607803345, "profiling/data_time": 0.023714274168014526, "epoch": 166, "step": 5176}
335
+ {"lr-LARS/params": 1.1183844522843966, "lr-LARS/params_no_weight_decay": 1.1183844522843966, "step": 5207}
336
+ {"train_loss": 0.17924055457115173, "profiling/batch_time": 0.46992847323417664, "profiling/data_time": 0.03103003464639187, "epoch": 167, "step": 5207}
337
+ {"lr-LARS/params": 1.1044742141285395, "lr-LARS/params_no_weight_decay": 1.1044742141285395, "step": 5238}
338
+ {"train_loss": 0.17594854533672333, "profiling/batch_time": 0.46869608759880066, "profiling/data_time": 0.023087942972779274, "epoch": 168, "step": 5238}
339
+ {"lr-LARS/params": 1.0905869664702375, "lr-LARS/params_no_weight_decay": 1.0905869664702375, "step": 5269}
340
+ {"train_loss": 0.18001827597618103, "profiling/batch_time": 0.4682839512825012, "profiling/data_time": 0.022815624251961708, "epoch": 169, "step": 5269}
341
+ {"lr-LARS/params": 1.076724339402197, "lr-LARS/params_no_weight_decay": 1.076724339402197, "step": 5300}
342
+ {"train_loss": 0.17039217054843903, "profiling/batch_time": 0.4688928723335266, "profiling/data_time": 0.024359598755836487, "epoch": 170, "step": 5300}
343
+ {"lr-LARS/params": 1.062887960127149, "lr-LARS/params_no_weight_decay": 1.062887960127149, "step": 5331}
344
+ {"train_loss": 0.18095912039279938, "profiling/batch_time": 0.4697173535823822, "profiling/data_time": 0.02420135959982872, "epoch": 171, "step": 5331}
345
+ {"lr-LARS/params": 1.0490794527668417, "lr-LARS/params_no_weight_decay": 1.0490794527668417, "step": 5362}
346
+ {"train_loss": 0.179249107837677, "profiling/batch_time": 0.4707670211791992, "profiling/data_time": 0.02550286427140236, "epoch": 172, "step": 5362}
347
+ {"lr-LARS/params": 1.0353004381714035, "lr-LARS/params_no_weight_decay": 1.0353004381714035, "step": 5393}
348
+ {"train_loss": 0.17689934372901917, "profiling/batch_time": 0.47232910990715027, "profiling/data_time": 0.02399234101176262, "epoch": 173, "step": 5393}
349
+ {"lr-LARS/params": 1.0215525337290867, "lr-LARS/params_no_weight_decay": 1.0215525337290867, "step": 5424}
350
+ {"train_loss": 0.17880438268184662, "profiling/batch_time": 0.4685608744621277, "profiling/data_time": 0.024626320227980614, "epoch": 174, "step": 5424}
351
+ {"lr-LARS/params": 1.0078373531764158, "lr-LARS/params_no_weight_decay": 1.0078373531764158, "step": 5455}
352
+ {"train_loss": 0.1799730509519577, "profiling/batch_time": 0.4692945182323456, "profiling/data_time": 0.024718530476093292, "epoch": 175, "step": 5455}
353
+ {"lr-LARS/params": 0.9941565064087676, "lr-LARS/params_no_weight_decay": 0.9941565064087676, "step": 5486}
354
+ {"train_loss": 0.17893801629543304, "profiling/batch_time": 0.4690098762512207, "profiling/data_time": 0.023999815806746483, "epoch": 176, "step": 5486}
355
+ {"lr-LARS/params": 0.9805115992914009, "lr-LARS/params_no_weight_decay": 0.9805115992914009, "step": 5517}
356
+ {"train_loss": 0.18591029942035675, "profiling/batch_time": 0.46935659646987915, "profiling/data_time": 0.024002157151699066, "epoch": 177, "step": 5517}
357
+ {"lr-LARS/params": 0.9669042334709583, "lr-LARS/params_no_weight_decay": 0.9669042334709583, "step": 5548}
358
+ {"train_loss": 0.17215245962142944, "profiling/batch_time": 0.4706227779388428, "profiling/data_time": 0.02324669435620308, "epoch": 178, "step": 5548}
359
+ {"lr-LARS/params": 0.9533360061874647, "lr-LARS/params_no_weight_decay": 0.9533360061874647, "step": 5579}
360
+ {"train_loss": 0.17308884859085083, "profiling/batch_time": 0.4688417911529541, "profiling/data_time": 0.02328791655600071, "epoch": 179, "step": 5579}
361
+ {"lr-LARS/params": 0.9398085100868415, "lr-LARS/params_no_weight_decay": 0.9398085100868415, "step": 5610}
362
+ {"train_loss": 0.17389556765556335, "profiling/batch_time": 0.46835649013519287, "profiling/data_time": 0.024651531130075455, "epoch": 180, "step": 5610}
363
+ {"lr-LARS/params": 0.9263233330339639, "lr-LARS/params_no_weight_decay": 0.9263233330339639, "step": 5641}
364
+ {"train_loss": 0.17842154204845428, "profiling/batch_time": 0.4684598445892334, "profiling/data_time": 0.02436215616762638, "epoch": 181, "step": 5641}
365
+ {"lr-LARS/params": 0.9128820579262703, "lr-LARS/params_no_weight_decay": 0.9128820579262703, "step": 5672}
366
+ {"train_loss": 0.17309552431106567, "profiling/batch_time": 0.4703254699707031, "profiling/data_time": 0.022909438237547874, "epoch": 182, "step": 5672}
367
+ {"lr-LARS/params": 0.8994862625079686, "lr-LARS/params_no_weight_decay": 0.8994862625079686, "step": 5703}
368
+ {"train_loss": 0.17799238860607147, "profiling/batch_time": 0.4692760109901428, "profiling/data_time": 0.026357440277934074, "epoch": 183, "step": 5703}
369
+ {"lr-LARS/params": 0.886137519184834, "lr-LARS/params_no_weight_decay": 0.886137519184834, "step": 5734}
370
+ {"train_loss": 0.1867348551750183, "profiling/batch_time": 0.4702256917953491, "profiling/data_time": 0.02666258066892624, "epoch": 184, "step": 5734}
371
+ {"lr-LARS/params": 0.8728373948396408, "lr-LARS/params_no_weight_decay": 0.8728373948396408, "step": 5765}
372
+ {"train_loss": 0.17166811227798462, "profiling/batch_time": 0.4688814878463745, "profiling/data_time": 0.023565217852592468, "epoch": 185, "step": 5765}
373
+ {"lr-LARS/params": 0.8595874506482426, "lr-LARS/params_no_weight_decay": 0.8595874506482426, "step": 5796}
374
+ {"train_loss": 0.1762077659368515, "profiling/batch_time": 0.46982041001319885, "profiling/data_time": 0.025917453691363335, "epoch": 186, "step": 5796}
375
+ {"lr-LARS/params": 0.8463892418963186, "lr-LARS/params_no_weight_decay": 0.8463892418963186, "step": 5827}
376
+ {"train_loss": 0.18132489919662476, "profiling/batch_time": 0.4706161916255951, "profiling/data_time": 0.02385239489376545, "epoch": 187, "step": 5827}
377
+ {"lr-LARS/params": 0.8332443177968126, "lr-LARS/params_no_weight_decay": 0.8332443177968126, "step": 5858}
378
+ {"train_loss": 0.17243419587612152, "profiling/batch_time": 0.47214871644973755, "profiling/data_time": 0.031371526420116425, "epoch": 188, "step": 5858}
379
+ {"lr-LARS/params": 0.8201542213080886, "lr-LARS/params_no_weight_decay": 0.8201542213080886, "step": 5889}
380
+ {"train_loss": 0.17244185507297516, "profiling/batch_time": 0.4704183042049408, "profiling/data_time": 0.0264283437281847, "epoch": 189, "step": 5889}
381
+ {"lr-LARS/params": 0.8071204889528153, "lr-LARS/params_no_weight_decay": 0.8071204889528153, "step": 5920}
382
+ {"train_loss": 0.1759347915649414, "profiling/batch_time": 0.4692685902118683, "profiling/data_time": 0.02450292371213436, "epoch": 190, "step": 5920}
383
+ {"lr-LARS/params": 0.7941446506376074, "lr-LARS/params_no_weight_decay": 0.7941446506376074, "step": 5951}
384
+ {"train_loss": 0.17409648001194, "profiling/batch_time": 0.46907100081443787, "profiling/data_time": 0.02345338836312294, "epoch": 191, "step": 5951}
385
+ {"lr-LARS/params": 0.7812282294734473, "lr-LARS/params_no_weight_decay": 0.7812282294734473, "step": 5982}
386
+ {"train_loss": 0.17324259877204895, "profiling/batch_time": 0.4691750407218933, "profiling/data_time": 0.03201591968536377, "epoch": 192, "step": 5982}
387
+ {"lr-LARS/params": 0.7683727415968987, "lr-LARS/params_no_weight_decay": 0.7683727415968987, "step": 6013}
388
+ {"train_loss": 0.18373456597328186, "profiling/batch_time": 0.46880653500556946, "profiling/data_time": 0.023174002766609192, "epoch": 193, "step": 6013}
389
+ {"lr-LARS/params": 0.7555796959921441, "lr-LARS/params_no_weight_decay": 0.7555796959921441, "step": 6044}
390
+ {"train_loss": 0.17009755969047546, "profiling/batch_time": 0.4682861864566803, "profiling/data_time": 0.02313772775232792, "epoch": 194, "step": 6044}
391
+ {"lr-LARS/params": 0.742850594313855, "lr-LARS/params_no_weight_decay": 0.742850594313855, "step": 6075}
392
+ {"train_loss": 0.18050645291805267, "profiling/batch_time": 0.47031712532043457, "profiling/data_time": 0.02319205366075039, "epoch": 195, "step": 6075}
393
+ {"lr-LARS/params": 0.730186930710934, "lr-LARS/params_no_weight_decay": 0.730186930710934, "step": 6106}
394
+ {"train_loss": 0.17486198246479034, "profiling/batch_time": 0.4704400300979614, "profiling/data_time": 0.025110362097620964, "epoch": 196, "step": 6106}
395
+ {"lr-LARS/params": 0.7175901916511243, "lr-LARS/params_no_weight_decay": 0.7175901916511243, "step": 6137}
396
+ {"train_loss": 0.1716179996728897, "profiling/batch_time": 0.470682293176651, "profiling/data_time": 0.023940419778227806, "epoch": 197, "step": 6137}
397
+ {"lr-LARS/params": 0.7050618557465294, "lr-LARS/params_no_weight_decay": 0.7050618557465294, "step": 6168}
398
+ {"train_loss": 0.17658409476280212, "profiling/batch_time": 0.47011345624923706, "profiling/data_time": 0.023757848888635635, "epoch": 198, "step": 6168}
399
+ {"lr-LARS/params": 0.692603393580054, "lr-LARS/params_no_weight_decay": 0.692603393580054, "step": 6199}
400
+ {"train_loss": 0.17314349114894867, "profiling/batch_time": 0.46732988953590393, "profiling/data_time": 0.031480688601732254, "epoch": 199, "step": 6199}
401
+ {"lr-LARS/params": 0.6802162675327853, "lr-LARS/params_no_weight_decay": 0.6802162675327853, "step": 6230}
402
+ {"train_loss": 0.1696050763130188, "profiling/batch_time": 0.47118160128593445, "profiling/data_time": 0.023963479325175285, "epoch": 200, "step": 6230}
403
+ {"lr-LARS/params": 0.667901931612338, "lr-LARS/params_no_weight_decay": 0.667901931612338, "step": 6261}
404
+ {"train_loss": 0.17569397389888763, "profiling/batch_time": 0.4709070920944214, "profiling/data_time": 0.024037225171923637, "epoch": 201, "step": 6261}
405
+ {"lr-LARS/params": 0.6556618312821813, "lr-LARS/params_no_weight_decay": 0.6556618312821813, "step": 6292}
406
+ {"train_loss": 0.17421793937683105, "profiling/batch_time": 0.4687694311141968, "profiling/data_time": 0.023792143911123276, "epoch": 202, "step": 6292}
407
+ {"lr-LARS/params": 0.6434974032919711, "lr-LARS/params_no_weight_decay": 0.6434974032919711, "step": 6323}
408
+ {"train_loss": 0.17294025421142578, "profiling/batch_time": 0.4699268639087677, "profiling/data_time": 0.023539885878562927, "epoch": 203, "step": 6323}
409
+ {"lr-LARS/params": 0.6314100755089015, "lr-LARS/params_no_weight_decay": 0.6314100755089015, "step": 6354}
410
+ {"train_loss": 0.1699761301279068, "profiling/batch_time": 0.47126707434654236, "profiling/data_time": 0.024530822411179543, "epoch": 204, "step": 6354}
411
+ {"lr-LARS/params": 0.619401266750104, "lr-LARS/params_no_weight_decay": 0.619401266750104, "step": 6385}
412
+ {"train_loss": 0.18586432933807373, "profiling/batch_time": 0.47063204646110535, "profiling/data_time": 0.024034913629293442, "epoch": 205, "step": 6385}
413
+ {"lr-LARS/params": 0.6074723866161037, "lr-LARS/params_no_weight_decay": 0.6074723866161037, "step": 6416}
414
+ {"train_loss": 0.1777760088443756, "profiling/batch_time": 0.46967262029647827, "profiling/data_time": 0.023256419226527214, "epoch": 206, "step": 6416}
415
+ {"lr-LARS/params": 0.5956248353253584, "lr-LARS/params_no_weight_decay": 0.5956248353253584, "step": 6447}
416
+ {"train_loss": 0.1858465075492859, "profiling/batch_time": 0.4704241156578064, "profiling/data_time": 0.023957030847668648, "epoch": 207, "step": 6447}
417
+ {"lr-LARS/params": 0.5838600035499037, "lr-LARS/params_no_weight_decay": 0.5838600035499037, "step": 6478}
418
+ {"train_loss": 0.18223711848258972, "profiling/batch_time": 0.4698233902454376, "profiling/data_time": 0.023448243737220764, "epoch": 208, "step": 6478}
419
+ {"lr-LARS/params": 0.5721792722521125, "lr-LARS/params_no_weight_decay": 0.5721792722521125, "step": 6509}
420
+ {"train_loss": 0.1881972998380661, "profiling/batch_time": 0.46963635087013245, "profiling/data_time": 0.023899145424365997, "epoch": 209, "step": 6509}
421
+ {"lr-LARS/params": 0.5605840125225995, "lr-LARS/params_no_weight_decay": 0.5605840125225995, "step": 6540}
422
+ {"train_loss": 0.1725481003522873, "profiling/batch_time": 0.47019556164741516, "profiling/data_time": 0.024432366713881493, "epoch": 210, "step": 6540}
423
+ {"lr-LARS/params": 0.5490755854192773, "lr-LARS/params_no_weight_decay": 0.5490755854192773, "step": 6571}
424
+ {"train_loss": 0.1735512614250183, "profiling/batch_time": 0.4700040817260742, "profiling/data_time": 0.03137827664613724, "epoch": 211, "step": 6571}
425
+ {"lr-LARS/params": 0.5376553418075991, "lr-LARS/params_no_weight_decay": 0.5376553418075991, "step": 6602}
426
+ {"train_loss": 0.18683743476867676, "profiling/batch_time": 0.46941742300987244, "profiling/data_time": 0.023504413664340973, "epoch": 212, "step": 6602}
427
+ {"lr-LARS/params": 0.5263246222019915, "lr-LARS/params_no_weight_decay": 0.5263246222019915, "step": 6633}
428
+ {"train_loss": 0.1774751991033554, "profiling/batch_time": 0.4708287715911865, "profiling/data_time": 0.023830818012356758, "epoch": 213, "step": 6633}
429
+ {"lr-LARS/params": 0.5150847566085045, "lr-LARS/params_no_weight_decay": 0.5150847566085045, "step": 6664}
430
+ {"train_loss": 0.17156733572483063, "profiling/batch_time": 0.47103527188301086, "profiling/data_time": 0.02527937851846218, "epoch": 214, "step": 6664}
431
+ {"lr-LARS/params": 0.5039370643686943, "lr-LARS/params_no_weight_decay": 0.5039370643686943, "step": 6695}
432
+ {"train_loss": 0.17357511818408966, "profiling/batch_time": 0.46874478459358215, "profiling/data_time": 0.02410346083343029, "epoch": 215, "step": 6695}
433
+ {"lr-LARS/params": 0.4928828540047595, "lr-LARS/params_no_weight_decay": 0.4928828540047595, "step": 6726}
434
+ {"train_loss": 0.1726713478565216, "profiling/batch_time": 0.47165775299072266, "profiling/data_time": 0.024016540497541428, "epoch": 216, "step": 6726}
435
+ {"lr-LARS/params": 0.48192342306594294, "lr-LARS/params_no_weight_decay": 0.48192342306594294, "step": 6757}
436
+ {"train_loss": 0.17518627643585205, "profiling/batch_time": 0.47232627868652344, "profiling/data_time": 0.026456331834197044, "epoch": 217, "step": 6757}
437
+ {"lr-LARS/params": 0.47106005797622846, "lr-LARS/params_no_weight_decay": 0.47106005797622846, "step": 6788}
438
+ {"train_loss": 0.18039396405220032, "profiling/batch_time": 0.4705643355846405, "profiling/data_time": 0.02345082350075245, "epoch": 218, "step": 6788}
439
+ {"lr-LARS/params": 0.4602940338833392, "lr-LARS/params_no_weight_decay": 0.4602940338833392, "step": 6819}
440
+ {"train_loss": 0.17665861546993256, "profiling/batch_time": 0.4709289073944092, "profiling/data_time": 0.024473879486322403, "epoch": 219, "step": 6819}
441
+ {"lr-LARS/params": 0.4496266145090563, "lr-LARS/params_no_weight_decay": 0.4496266145090563, "step": 6850}
442
+ {"train_loss": 0.17685608565807343, "profiling/batch_time": 0.4734596312046051, "profiling/data_time": 0.024054864421486855, "epoch": 220, "step": 6850}
443
+ {"lr-LARS/params": 0.439059052000887, "lr-LARS/params_no_weight_decay": 0.439059052000887, "step": 6881}
444
+ {"train_loss": 0.1755453646183014, "profiling/batch_time": 0.4687703847885132, "profiling/data_time": 0.023681266233325005, "epoch": 221, "step": 6881}
445
+ {"lr-LARS/params": 0.428592586785085, "lr-LARS/params_no_weight_decay": 0.428592586785085, "step": 6912}
446
+ {"train_loss": 0.17741039395332336, "profiling/batch_time": 0.4691685140132904, "profiling/data_time": 0.023477407172322273, "epoch": 222, "step": 6912}
447
+ {"lr-LARS/params": 0.41822844742104703, "lr-LARS/params_no_weight_decay": 0.41822844742104703, "step": 6943}
448
+ {"train_loss": 0.17507131397724152, "profiling/batch_time": 0.468911737203598, "profiling/data_time": 0.026731377467513084, "epoch": 223, "step": 6943}
449
+ {"lr-LARS/params": 0.4079678504571082, "lr-LARS/params_no_weight_decay": 0.4079678504571082, "step": 6974}
450
+ {"train_loss": 0.18930186331272125, "profiling/batch_time": 0.47206446528434753, "profiling/data_time": 0.025731287896633148, "epoch": 224, "step": 6974}
451
+ {"lr-LARS/params": 0.3978120002877387, "lr-LARS/params_no_weight_decay": 0.3978120002877387, "step": 7005}
452
+ {"train_loss": 0.17496463656425476, "profiling/batch_time": 0.4707791209220886, "profiling/data_time": 0.023680970072746277, "epoch": 225, "step": 7005}
453
+ {"lr-LARS/params": 0.387762089012172, "lr-LARS/params_no_weight_decay": 0.387762089012172, "step": 7036}
454
+ {"train_loss": 0.16944003105163574, "profiling/batch_time": 0.4691097140312195, "profiling/data_time": 0.026817282661795616, "epoch": 226, "step": 7036}
455
+ {"lr-LARS/params": 0.377819296294478, "lr-LARS/params_no_weight_decay": 0.377819296294478, "step": 7067}
456
+ {"train_loss": 0.17224831879138947, "profiling/batch_time": 0.4705941081047058, "profiling/data_time": 0.027431942522525787, "epoch": 227, "step": 7067}
457
+ {"lr-LARS/params": 0.36798478922509065, "lr-LARS/params_no_weight_decay": 0.36798478922509065, "step": 7098}
458
+ {"train_loss": 0.1711820662021637, "profiling/batch_time": 0.469564825296402, "profiling/data_time": 0.023637978360056877, "epoch": 228, "step": 7098}
459
+ {"lr-LARS/params": 0.35825972218381696, "lr-LARS/params_no_weight_decay": 0.35825972218381696, "step": 7129}
460
+ {"train_loss": 0.18665482103824615, "profiling/batch_time": 0.47249582409858704, "profiling/data_time": 0.023317711427807808, "epoch": 229, "step": 7129}
461
+ {"lr-LARS/params": 0.34864523670433173, "lr-LARS/params_no_weight_decay": 0.34864523670433173, "step": 7160}
462
+ {"train_loss": 0.17504683136940002, "profiling/batch_time": 0.46968162059783936, "profiling/data_time": 0.022987984120845795, "epoch": 230, "step": 7160}
463
+ {"lr-LARS/params": 0.3391424613401869, "lr-LARS/params_no_weight_decay": 0.3391424613401869, "step": 7191}
464
+ {"train_loss": 0.18390890955924988, "profiling/batch_time": 0.4694560170173645, "profiling/data_time": 0.02294323407113552, "epoch": 231, "step": 7191}
465
+ {"lr-LARS/params": 0.32975251153233975, "lr-LARS/params_no_weight_decay": 0.32975251153233975, "step": 7222}
466
+ {"train_loss": 0.17491415143013, "profiling/batch_time": 0.4698273837566376, "profiling/data_time": 0.02227500081062317, "epoch": 232, "step": 7222}
467
+ {"lr-LARS/params": 0.32047648947822277, "lr-LARS/params_no_weight_decay": 0.32047648947822277, "step": 7253}
468
+ {"train_loss": 0.17829033732414246, "profiling/batch_time": 0.46955645084381104, "profiling/data_time": 0.027261799201369286, "epoch": 233, "step": 7253}
469
+ {"lr-LARS/params": 0.31131548400236564, "lr-LARS/params_no_weight_decay": 0.31131548400236564, "step": 7284}
470
+ {"train_loss": 0.17597118020057678, "profiling/batch_time": 0.46957966685295105, "profiling/data_time": 0.024820178747177124, "epoch": 234, "step": 7284}
471
+ {"lr-LARS/params": 0.3022705704285903, "lr-LARS/params_no_weight_decay": 0.3022705704285903, "step": 7315}
472
+ {"train_loss": 0.17364805936813354, "profiling/batch_time": 0.4696313738822937, "profiling/data_time": 0.022559965029358864, "epoch": 235, "step": 7315}
473
+ {"lr-LARS/params": 0.2933428104537867, "lr-LARS/params_no_weight_decay": 0.2933428104537867, "step": 7346}
474
+ {"train_loss": 0.18688234686851501, "profiling/batch_time": 0.46648359298706055, "profiling/data_time": 0.0223576370626688, "epoch": 236, "step": 7346}
475
+ {"lr-LARS/params": 0.28453325202329205, "lr-LARS/params_no_weight_decay": 0.28453325202329205, "step": 7377}
476
+ {"train_loss": 0.17504848539829254, "profiling/batch_time": 0.4676859676837921, "profiling/data_time": 0.02700962871313095, "epoch": 237, "step": 7377}
477
+ {"lr-LARS/params": 0.275842929207883, "lr-LARS/params_no_weight_decay": 0.275842929207883, "step": 7408}
478
+ {"train_loss": 0.17112015187740326, "profiling/batch_time": 0.46838268637657166, "profiling/data_time": 0.024366319179534912, "epoch": 238, "step": 7408}
479
+ {"lr-LARS/params": 0.26727286208239215, "lr-LARS/params_no_weight_decay": 0.26727286208239215, "step": 7439}
480
+ {"train_loss": 0.16941964626312256, "profiling/batch_time": 0.4682299792766571, "profiling/data_time": 0.022226206958293915, "epoch": 239, "step": 7439}
481
+ {"lr-LARS/params": 0.25882405660597557, "lr-LARS/params_no_weight_decay": 0.25882405660597557, "step": 7470}
482
+ {"train_loss": 0.17192833125591278, "profiling/batch_time": 0.46854209899902344, "profiling/data_time": 0.022985657677054405, "epoch": 240, "step": 7470}
483
+ {"lr-LARS/params": 0.25049750450402986, "lr-LARS/params_no_weight_decay": 0.25049750450402986, "step": 7501}
484
+ {"train_loss": 0.17485351860523224, "profiling/batch_time": 0.470010906457901, "profiling/data_time": 0.022896917536854744, "epoch": 241, "step": 7501}
485
+ {"lr-LARS/params": 0.24229418315178436, "lr-LARS/params_no_weight_decay": 0.24229418315178436, "step": 7532}
486
+ {"train_loss": 0.17267051339149475, "profiling/batch_time": 0.4714600443840027, "profiling/data_time": 0.028281528502702713, "epoch": 242, "step": 7532}
487
+ {"lr-LARS/params": 0.2342150554595754, "lr-LARS/params_no_weight_decay": 0.2342150554595754, "step": 7563}
488
+ {"train_loss": 0.17282910645008087, "profiling/batch_time": 0.4686413109302521, "profiling/data_time": 0.023492898792028427, "epoch": 243, "step": 7563}
489
+ {"lr-LARS/params": 0.2262610697598202, "lr-LARS/params_no_weight_decay": 0.2262610697598202, "step": 7594}
490
+ {"train_loss": 0.16973815858364105, "profiling/batch_time": 0.4690682291984558, "profiling/data_time": 0.025534961372613907, "epoch": 244, "step": 7594}
491
+ {"lr-LARS/params": 0.2184331596956995, "lr-LARS/params_no_weight_decay": 0.2184331596956995, "step": 7625}
492
+ {"train_loss": 0.17366011440753937, "profiling/batch_time": 0.47061821818351746, "profiling/data_time": 0.024806160479784012, "epoch": 245, "step": 7625}
493
+ {"lr-LARS/params": 0.21073224411156807, "lr-LARS/params_no_weight_decay": 0.21073224411156807, "step": 7656}
494
+ {"train_loss": 0.18184788525104523, "profiling/batch_time": 0.4697961211204529, "profiling/data_time": 0.02579355798661709, "epoch": 246, "step": 7656}
495
+ {"lr-LARS/params": 0.20315922694509966, "lr-LARS/params_no_weight_decay": 0.20315922694509966, "step": 7687}
496
+ {"train_loss": 0.17811010777950287, "profiling/batch_time": 0.46715620160102844, "profiling/data_time": 0.023613903671503067, "epoch": 247, "step": 7687}
497
+ {"lr-LARS/params": 0.19571499712118162, "lr-LARS/params_no_weight_decay": 0.19571499712118162, "step": 7718}
498
+ {"train_loss": 0.17663444578647614, "profiling/batch_time": 0.47011804580688477, "profiling/data_time": 0.022987840697169304, "epoch": 248, "step": 7718}
499
+ {"lr-LARS/params": 0.1884004284475717, "lr-LARS/params_no_weight_decay": 0.1884004284475717, "step": 7749}
500
+ {"train_loss": 0.17067159712314606, "profiling/batch_time": 0.47019141912460327, "profiling/data_time": 0.022993462160229683, "epoch": 249, "step": 7749}
501
+ {"lr-LARS/params": 0.1812163795123325, "lr-LARS/params_no_weight_decay": 0.1812163795123325, "step": 7780}
502
+ {"train_loss": 0.17344437539577484, "profiling/batch_time": 0.4685094654560089, "profiling/data_time": 0.02382558025419712, "epoch": 250, "step": 7780}
503
+ {"lr-LARS/params": 0.17416369358304803, "lr-LARS/params_no_weight_decay": 0.17416369358304803, "step": 7811}
504
+ {"train_loss": 0.18616145849227905, "profiling/batch_time": 0.4705277383327484, "profiling/data_time": 0.026447905227541924, "epoch": 251, "step": 7811}
505
+ {"lr-LARS/params": 0.1672431985078409, "lr-LARS/params_no_weight_decay": 0.1672431985078409, "step": 7842}
506
+ {"train_loss": 0.17241549491882324, "profiling/batch_time": 0.4699751138687134, "profiling/data_time": 0.025471027940511703, "epoch": 252, "step": 7842}
507
+ {"lr-LARS/params": 0.16045570661819894, "lr-LARS/params_no_weight_decay": 0.16045570661819894, "step": 7873}
508
+ {"train_loss": 0.1905423104763031, "profiling/batch_time": 0.4708714783191681, "profiling/data_time": 0.028189852833747864, "epoch": 253, "step": 7873}
509
+ {"lr-LARS/params": 0.15380201463362436, "lr-LARS/params_no_weight_decay": 0.15380201463362436, "step": 7904}
510
+ {"train_loss": 0.1750146746635437, "profiling/batch_time": 0.4708270728588104, "profiling/data_time": 0.02421695366501808, "epoch": 254, "step": 7904}
511
+ {"lr-LARS/params": 0.1472829035681122, "lr-LARS/params_no_weight_decay": 0.1472829035681122, "step": 7935}
512
+ {"train_loss": 0.17938385903835297, "profiling/batch_time": 0.4684388041496277, "profiling/data_time": 0.023896358907222748, "epoch": 255, "step": 7935}
513
+ {"lr-LARS/params": 0.14089913863847636, "lr-LARS/params_no_weight_decay": 0.14089913863847636, "step": 7966}
514
+ {"train_loss": 0.17377354204654694, "profiling/batch_time": 0.4709418714046478, "profiling/data_time": 0.02328607439994812, "epoch": 256, "step": 7966}
515
+ {"lr-LARS/params": 0.13465146917452847, "lr-LARS/params_no_weight_decay": 0.13465146917452847, "step": 7997}
516
+ {"train_loss": 0.17621932923793793, "profiling/batch_time": 0.4689711034297943, "profiling/data_time": 0.02327880449593067, "epoch": 257, "step": 7997}
517
+ {"lr-LARS/params": 0.12854062853111844, "lr-LARS/params_no_weight_decay": 0.12854062853111844, "step": 8028}
518
+ {"train_loss": 0.17323483526706696, "profiling/batch_time": 0.4690542221069336, "profiling/data_time": 0.024137593805789948, "epoch": 258, "step": 8028}
519
+ {"lr-LARS/params": 0.12256733400205563, "lr-LARS/params_no_weight_decay": 0.12256733400205563, "step": 8059}
520
+ {"train_loss": 0.17152181267738342, "profiling/batch_time": 0.4741074740886688, "profiling/data_time": 0.025246327742934227, "epoch": 259, "step": 8059}
521
+ {"lr-LARS/params": 0.11673228673591102, "lr-LARS/params_no_weight_decay": 0.11673228673591102, "step": 8090}
522
+ {"train_loss": 0.18341588973999023, "profiling/batch_time": 0.47158724069595337, "profiling/data_time": 0.03260882943868637, "epoch": 260, "step": 8090}
523
+ {"lr-LARS/params": 0.11103617165371674, "lr-LARS/params_no_weight_decay": 0.11103617165371674, "step": 8121}
524
+ {"train_loss": 0.18369051814079285, "profiling/batch_time": 0.46973201632499695, "profiling/data_time": 0.024374278262257576, "epoch": 261, "step": 8121}
525
+ {"lr-LARS/params": 0.10547965736856922, "lr-LARS/params_no_weight_decay": 0.10547965736856922, "step": 8152}
526
+ {"train_loss": 0.1768931895494461, "profiling/batch_time": 0.4713733494281769, "profiling/data_time": 0.029736177995800972, "epoch": 262, "step": 8152}
527
+ {"lr-LARS/params": 0.10006339610714717, "lr-LARS/params_no_weight_decay": 0.10006339610714717, "step": 8183}
528
+ {"train_loss": 0.17009064555168152, "profiling/batch_time": 0.46988949179649353, "profiling/data_time": 0.024103861302137375, "epoch": 263, "step": 8183}
529
+ {"lr-LARS/params": 0.09478802363315265, "lr-LARS/params_no_weight_decay": 0.09478802363315265, "step": 8214}
530
+ {"train_loss": 0.1791825294494629, "profiling/batch_time": 0.46998900175094604, "profiling/data_time": 0.024024922400712967, "epoch": 264, "step": 8214}
531
+ {"lr-LARS/params": 0.08965415917268495, "lr-LARS/params_no_weight_decay": 0.08965415917268495, "step": 8245}
532
+ {"train_loss": 0.16830989718437195, "profiling/batch_time": 0.46818381547927856, "profiling/data_time": 0.025359127670526505, "epoch": 265, "step": 8245}
533
+ {"lr-LARS/params": 0.08466240534155624, "lr-LARS/params_no_weight_decay": 0.08466240534155624, "step": 8276}
534
+ {"train_loss": 0.17181190848350525, "profiling/batch_time": 0.4695846140384674, "profiling/data_time": 0.02430625446140766, "epoch": 266, "step": 8276}
535
+ {"lr-LARS/params": 0.07981334807455476, "lr-LARS/params_no_weight_decay": 0.07981334807455476, "step": 8307}
536
+ {"train_loss": 0.17682546377182007, "profiling/batch_time": 0.46950000524520874, "profiling/data_time": 0.023915346711874008, "epoch": 267, "step": 8307}
537
+ {"lr-LARS/params": 0.07510755655666829, "lr-LARS/params_no_weight_decay": 0.07510755655666829, "step": 8338}
538
+ {"train_loss": 0.17384792864322662, "profiling/batch_time": 0.46935296058654785, "profiling/data_time": 0.02563089318573475, "epoch": 268, "step": 8338}
539
+ {"lr-LARS/params": 0.07054558315627286, "lr-LARS/params_no_weight_decay": 0.07054558315627286, "step": 8369}
540
+ {"train_loss": 0.1804978996515274, "profiling/batch_time": 0.4701468348503113, "profiling/data_time": 0.025371229276061058, "epoch": 269, "step": 8369}
541
+ {"lr-LARS/params": 0.06612796336029571, "lr-LARS/params_no_weight_decay": 0.06612796336029571, "step": 8400}
542
+ {"train_loss": 0.17953844368457794, "profiling/batch_time": 0.46890515089035034, "profiling/data_time": 0.02570568583905697, "epoch": 270, "step": 8400}
543
+ {"lr-LARS/params": 0.06185521571135856, "lr-LARS/params_no_weight_decay": 0.06185521571135856, "step": 8431}
544
+ {"train_loss": 0.17598415911197662, "profiling/batch_time": 0.46825990080833435, "profiling/data_time": 0.02364826761186123, "epoch": 271, "step": 8431}
545
+ {"lr-LARS/params": 0.057727841746912145, "lr-LARS/params_no_weight_decay": 0.057727841746912145, "step": 8462}
546
+ {"train_loss": 0.17227326333522797, "profiling/batch_time": 0.4690641164779663, "profiling/data_time": 0.02509116567671299, "epoch": 272, "step": 8462}
547
+ {"lr-LARS/params": 0.05374632594036454, "lr-LARS/params_no_weight_decay": 0.05374632594036454, "step": 8493}
548
+ {"train_loss": 0.18317991495132446, "profiling/batch_time": 0.4689784348011017, "profiling/data_time": 0.023338863626122475, "epoch": 273, "step": 8493}
549
+ {"lr-LARS/params": 0.04991113564421311, "lr-LARS/params_no_weight_decay": 0.04991113564421311, "step": 8524}
550
+ {"train_loss": 0.17789612710475922, "profiling/batch_time": 0.4693887233734131, "profiling/data_time": 0.0251321941614151, "epoch": 274, "step": 8524}
551
+ {"lr-LARS/params": 0.046222721035188034, "lr-LARS/params_no_weight_decay": 0.046222721035188034, "step": 8555}
552
+ {"train_loss": 0.1773565411567688, "profiling/batch_time": 0.47032877802848816, "profiling/data_time": 0.023329760879278183, "epoch": 275, "step": 8555}
553
+ {"lr-LARS/params": 0.04268151506140872, "lr-LARS/params_no_weight_decay": 0.04268151506140872, "step": 8586}
554
+ {"train_loss": 0.17752254009246826, "profiling/batch_time": 0.4684811532497406, "profiling/data_time": 0.02482558973133564, "epoch": 276, "step": 8586}
555
+ {"lr-LARS/params": 0.039287933391564195, "lr-LARS/params_no_weight_decay": 0.039287933391564195, "step": 8617}
556
+ {"train_loss": 0.19069981575012207, "profiling/batch_time": 0.4694057106971741, "profiling/data_time": 0.022813959047198296, "epoch": 277, "step": 8617}
557
+ {"lr-LARS/params": 0.036042374366122766, "lr-LARS/params_no_weight_decay": 0.036042374366122766, "step": 8648}
558
+ {"train_loss": 0.1716260015964508, "profiling/batch_time": 0.4705542027950287, "profiling/data_time": 0.023727353662252426, "epoch": 278, "step": 8648}
559
+ {"lr-LARS/params": 0.03294521895057377, "lr-LARS/params_no_weight_decay": 0.03294521895057377, "step": 8679}
560
+ {"train_loss": 0.18008525669574738, "profiling/batch_time": 0.4697466492652893, "profiling/data_time": 0.023951677605509758, "epoch": 279, "step": 8679}
561
+ {"lr-LARS/params": 0.029996830690709863, "lr-LARS/params_no_weight_decay": 0.029996830690709863, "step": 8710}
562
+ {"train_loss": 0.17219989001750946, "profiling/batch_time": 0.46888473629951477, "profiling/data_time": 0.022902177646756172, "epoch": 280, "step": 8710}
563
+ {"lr-LARS/params": 0.02719755566995376, "lr-LARS/params_no_weight_decay": 0.02719755566995376, "step": 8741}
564
+ {"train_loss": 0.17222197353839874, "profiling/batch_time": 0.47234392166137695, "profiling/data_time": 0.023381344974040985, "epoch": 281, "step": 8741}
565
+ {"lr-LARS/params": 0.02454772246873501, "lr-LARS/params_no_weight_decay": 0.02454772246873501, "step": 8772}
566
+ {"train_loss": 0.1748410016298294, "profiling/batch_time": 0.47060728073120117, "profiling/data_time": 0.02514701709151268, "epoch": 282, "step": 8772}
567
+ {"lr-LARS/params": 0.022047642125920152, "lr-LARS/params_no_weight_decay": 0.022047642125920152, "step": 8803}
568
+ {"train_loss": 0.17345179617404938, "profiling/batch_time": 0.46958237886428833, "profiling/data_time": 0.023087121546268463, "epoch": 283, "step": 8803}
569
+ {"lr-LARS/params": 0.01969760810230426, "lr-LARS/params_no_weight_decay": 0.01969760810230426, "step": 8834}
570
+ {"train_loss": 0.17867422103881836, "profiling/batch_time": 0.4683537483215332, "profiling/data_time": 0.026134122163057327, "epoch": 284, "step": 8834}
571
+ {"lr-LARS/params": 0.01749789624616345, "lr-LARS/params_no_weight_decay": 0.01749789624616345, "step": 8865}
572
+ {"train_loss": 0.17999346554279327, "profiling/batch_time": 0.4701542556285858, "profiling/data_time": 0.023083658888936043, "epoch": 285, "step": 8865}
573
+ {"lr-LARS/params": 0.015448764760875533, "lr-LARS/params_no_weight_decay": 0.015448764760875533, "step": 8896}
574
+ {"train_loss": 0.1751020848751068, "profiling/batch_time": 0.4713783264160156, "profiling/data_time": 0.025269582867622375, "epoch": 286, "step": 8896}
575
+ {"lr-LARS/params": 0.013550454174612337, "lr-LARS/params_no_weight_decay": 0.013550454174612337, "step": 8927}
576
+ {"train_loss": 0.17709775269031525, "profiling/batch_time": 0.4701276123523712, "profiling/data_time": 0.023244669660925865, "epoch": 287, "step": 8927}
577
+ {"lr-LARS/params": 0.011803187312106342, "lr-LARS/params_no_weight_decay": 0.011803187312106342, "step": 8958}
578
+ {"train_loss": 0.17954827845096588, "profiling/batch_time": 0.46981576085090637, "profiling/data_time": 0.024186134338378906, "epoch": 288, "step": 8958}
579
+ {"lr-LARS/params": 0.010207169268495044, "lr-LARS/params_no_weight_decay": 0.010207169268495044, "step": 8989}
580
+ {"train_loss": 0.17205561697483063, "profiling/batch_time": 0.47181233763694763, "profiling/data_time": 0.025601908564567566, "epoch": 289, "step": 8989}
581
+ {"lr-LARS/params": 0.008762587385247678, "lr-LARS/params_no_weight_decay": 0.008762587385247678, "step": 9020}
582
+ {"train_loss": 0.17761465907096863, "profiling/batch_time": 0.4702886641025543, "profiling/data_time": 0.025239525362849236, "epoch": 290, "step": 9020}
583
+ {"lr-LARS/params": 0.007469611228173464, "lr-LARS/params_no_weight_decay": 0.007469611228173464, "step": 9051}
584
+ {"train_loss": 0.1817539930343628, "profiling/batch_time": 0.46833762526512146, "profiling/data_time": 0.027799051254987717, "epoch": 291, "step": 9051}
585
+ {"lr-LARS/params": 0.0063283925675196645, "lr-LARS/params_no_weight_decay": 0.0063283925675196645, "step": 9082}
586
+ {"train_loss": 0.18424828350543976, "profiling/batch_time": 0.4709780812263489, "profiling/data_time": 0.02403721585869789, "epoch": 292, "step": 9082}
587
+ {"lr-LARS/params": 0.005339065360155056, "lr-LARS/params_no_weight_decay": 0.005339065360155056, "step": 9113}
588
+ {"train_loss": 0.16752569377422333, "profiling/batch_time": 0.47176021337509155, "profiling/data_time": 0.02262665331363678, "epoch": 293, "step": 9113}
589
+ {"lr-LARS/params": 0.00450174573384713, "lr-LARS/params_no_weight_decay": 0.00450174573384713, "step": 9144}
590
+ {"train_loss": 0.17096474766731262, "profiling/batch_time": 0.4697352349758148, "profiling/data_time": 0.024586746469140053, "epoch": 294, "step": 9144}
591
+ {"lr-LARS/params": 0.0038165319736305817, "lr-LARS/params_no_weight_decay": 0.0038165319736305817, "step": 9175}
592
+ {"train_loss": 0.16857412457466125, "profiling/batch_time": 0.4673163592815399, "profiling/data_time": 0.023136014118790627, "epoch": 295, "step": 9175}
593
+ {"lr-LARS/params": 0.003283504510270415, "lr-LARS/params_no_weight_decay": 0.003283504510270415, "step": 9206}
594
+ {"train_loss": 0.16997942328453064, "profiling/batch_time": 0.46821701526641846, "profiling/data_time": 0.024742355570197105, "epoch": 296, "step": 9206}
595
+ {"lr-LARS/params": 0.0029027259108212336, "lr-LARS/params_no_weight_decay": 0.0029027259108212336, "step": 9237}
596
+ {"train_loss": 0.17281530797481537, "profiling/batch_time": 0.46862053871154785, "profiling/data_time": 0.023910168558359146, "epoch": 297, "step": 9237}
597
+ {"lr-LARS/params": 0.002674240871282573, "lr-LARS/params_no_weight_decay": 0.002674240871282573, "step": 9268}
598
+ {"train_loss": 0.176463782787323, "profiling/batch_time": 0.47088536620140076, "profiling/data_time": 0.024565089493989944, "epoch": 298, "step": 9268}
599
+ {"lr-LARS/params": 0.002598076211353316, "lr-LARS/params_no_weight_decay": 0.002598076211353316, "step": 9299}
600
+ {"train_loss": 0.18089812994003296, "profiling/batch_time": 0.4701261818408966, "profiling/data_time": 0.024234332144260406, "epoch": 299, "step": 9299}