coung21 commited on
Commit
6f15210
·
verified ·
1 Parent(s): 600f7f4

Upload folder using huggingface_hub

Browse files
ssl_distil/swin/checkpoints/epoch=299-step=9300.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f17cdeeb6c9f4f626ec06754e8c3769016fbb718d0f9a0adcffc4b6dad865b0
3
+ size 235946087
ssl_distil/swin/checkpoints/last.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37cf9ab7058d6219fad5a3fa21a1eea2cc1a5926bfd8b14c7750478a74af592d
3
+ size 235946087
ssl_distil/swin/exported_models/exported_last.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d95d60b376e9c5217510286fdc919f23f5504377bb8dd10191113123ec2958bf
3
+ size 113380887
ssl_distil/swin/metrics.jsonl ADDED
@@ -0,0 +1,600 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"lr-LARS/params": 0.2598076211353316, "lr-LARS/params_no_weight_decay": 0.2598076211353316, "step": 30}
2
+ {"train_loss": 0.22584673762321472, "profiling/batch_time": 0.2770966589450836, "profiling/data_time": 0.022097904235124588, "epoch": 0, "step": 30}
3
+ {"lr-LARS/params": 0.5196152422706632, "lr-LARS/params_no_weight_decay": 0.5196152422706632, "step": 61}
4
+ {"train_loss": 0.21147510409355164, "profiling/batch_time": 0.2790742814540863, "profiling/data_time": 0.023040922358632088, "epoch": 1, "step": 61}
5
+ {"lr-LARS/params": 0.7794228634059948, "lr-LARS/params_no_weight_decay": 0.7794228634059948, "step": 92}
6
+ {"train_loss": 0.20014415681362152, "profiling/batch_time": 0.2778811752796173, "profiling/data_time": 0.02373834326863289, "epoch": 2, "step": 92}
7
+ {"lr-LARS/params": 1.0392304845413265, "lr-LARS/params_no_weight_decay": 1.0392304845413265, "step": 123}
8
+ {"train_loss": 0.17293605208396912, "profiling/batch_time": 0.2794252932071686, "profiling/data_time": 0.02198789082467556, "epoch": 3, "step": 123}
9
+ {"lr-LARS/params": 1.299038105676658, "lr-LARS/params_no_weight_decay": 1.299038105676658, "step": 154}
10
+ {"train_loss": 0.16217824816703796, "profiling/batch_time": 0.2790820002555847, "profiling/data_time": 0.026488587260246277, "epoch": 4, "step": 154}
11
+ {"lr-LARS/params": 1.5588457268119895, "lr-LARS/params_no_weight_decay": 1.5588457268119895, "step": 185}
12
+ {"train_loss": 0.16016711294651031, "profiling/batch_time": 0.2812521755695343, "profiling/data_time": 0.023425640538334846, "epoch": 5, "step": 185}
13
+ {"lr-LARS/params": 1.818653347947321, "lr-LARS/params_no_weight_decay": 1.818653347947321, "step": 216}
14
+ {"train_loss": 0.15671734511852264, "profiling/batch_time": 0.2799358665943146, "profiling/data_time": 0.02317841909825802, "epoch": 6, "step": 216}
15
+ {"lr-LARS/params": 2.078460969082653, "lr-LARS/params_no_weight_decay": 2.078460969082653, "step": 247}
16
+ {"train_loss": 0.1538063883781433, "profiling/batch_time": 0.2806060314178467, "profiling/data_time": 0.023176487535238266, "epoch": 7, "step": 247}
17
+ {"lr-LARS/params": 2.3382685902179845, "lr-LARS/params_no_weight_decay": 2.3382685902179845, "step": 278}
18
+ {"train_loss": 0.15089641511440277, "profiling/batch_time": 0.2811547815799713, "profiling/data_time": 0.024045055732131004, "epoch": 8, "step": 278}
19
+ {"lr-LARS/params": 2.598076211353316, "lr-LARS/params_no_weight_decay": 2.598076211353316, "step": 309}
20
+ {"train_loss": 0.14579689502716064, "profiling/batch_time": 0.27912092208862305, "profiling/data_time": 0.022898146882653236, "epoch": 9, "step": 309}
21
+ {"lr-LARS/params": 2.5980048812424954, "lr-LARS/params_no_weight_decay": 2.5980048812424954, "step": 340}
22
+ {"train_loss": 0.14176174998283386, "profiling/batch_time": 0.2808430790901184, "profiling/data_time": 0.02337420918047428, "epoch": 10, "step": 340}
23
+ {"lr-LARS/params": 2.597781309440988, "lr-LARS/params_no_weight_decay": 2.597781309440988, "step": 371}
24
+ {"train_loss": 0.13893607258796692, "profiling/batch_time": 0.28029969334602356, "profiling/data_time": 0.023509014397859573, "epoch": 11, "step": 371}
25
+ {"lr-LARS/params": 2.597405442935369, "lr-LARS/params_no_weight_decay": 2.597405442935369, "step": 402}
26
+ {"train_loss": 0.13866105675697327, "profiling/batch_time": 0.28151237964630127, "profiling/data_time": 0.023238487541675568, "epoch": 12, "step": 402}
27
+ {"lr-LARS/params": 2.5968773258450537, "lr-LARS/params_no_weight_decay": 2.5968773258450537, "step": 433}
28
+ {"train_loss": 0.1358853280544281, "profiling/batch_time": 0.28192371129989624, "profiling/data_time": 0.02395440638065338, "epoch": 13, "step": 433}
29
+ {"lr-LARS/params": 2.596197020160716, "lr-LARS/params_no_weight_decay": 2.596197020160716, "step": 464}
30
+ {"train_loss": 0.1338852345943451, "profiling/batch_time": 0.28087782859802246, "profiling/data_time": 0.022863736376166344, "epoch": 14, "step": 464}
31
+ {"lr-LARS/params": 2.595364605737007, "lr-LARS/params_no_weight_decay": 2.595364605737007, "step": 495}
32
+ {"train_loss": 0.1353781372308731, "profiling/batch_time": 0.2807155251502991, "profiling/data_time": 0.02339656837284565, "epoch": 15, "step": 495}
33
+ {"lr-LARS/params": 2.594380180283186, "lr-LARS/params_no_weight_decay": 2.594380180283186, "step": 526}
34
+ {"train_loss": 0.1319398432970047, "profiling/batch_time": 0.27958935499191284, "profiling/data_time": 0.023036161437630653, "epoch": 16, "step": 526}
35
+ {"lr-LARS/params": 2.593243859351649, "lr-LARS/params_no_weight_decay": 2.593243859351649, "step": 557}
36
+ {"train_loss": 0.13222238421440125, "profiling/batch_time": 0.2795141637325287, "profiling/data_time": 0.023802582174539566, "epoch": 17, "step": 557}
37
+ {"lr-LARS/params": 2.5919557763243697, "lr-LARS/params_no_weight_decay": 2.5919557763243697, "step": 588}
38
+ {"train_loss": 0.13191896677017212, "profiling/batch_time": 0.27987730503082275, "profiling/data_time": 0.02559501677751541, "epoch": 18, "step": 588}
39
+ {"lr-LARS/params": 2.5905160823972344, "lr-LARS/params_no_weight_decay": 2.5905160823972344, "step": 619}
40
+ {"train_loss": 0.12764722108840942, "profiling/batch_time": 0.28008851408958435, "profiling/data_time": 0.025319548323750496, "epoch": 19, "step": 619}
41
+ {"lr-LARS/params": 2.5889249465623028, "lr-LARS/params_no_weight_decay": 2.5889249465623028, "step": 650}
42
+ {"train_loss": 0.1321018636226654, "profiling/batch_time": 0.2800644040107727, "profiling/data_time": 0.024681445211172104, "epoch": 20, "step": 650}
43
+ {"lr-LARS/params": 2.587182555587967, "lr-LARS/params_no_weight_decay": 2.587182555587967, "step": 681}
44
+ {"train_loss": 0.12597182393074036, "profiling/batch_time": 0.2792363166809082, "profiling/data_time": 0.025746271014213562, "epoch": 21, "step": 681}
45
+ {"lr-LARS/params": 2.5852891139970304, "lr-LARS/params_no_weight_decay": 2.5852891139970304, "step": 712}
46
+ {"train_loss": 0.12496745586395264, "profiling/batch_time": 0.27886590361595154, "profiling/data_time": 0.02359061688184738, "epoch": 22, "step": 712}
47
+ {"lr-LARS/params": 2.5832448440427, "lr-LARS/params_no_weight_decay": 2.5832448440427, "step": 743}
48
+ {"train_loss": 0.12794452905654907, "profiling/batch_time": 0.2783857583999634, "profiling/data_time": 0.022286171093583107, "epoch": 23, "step": 743}
49
+ {"lr-LARS/params": 2.5810499856824984, "lr-LARS/params_no_weight_decay": 2.5810499856824984, "step": 774}
50
+ {"train_loss": 0.12504149973392487, "profiling/batch_time": 0.27884066104888916, "profiling/data_time": 0.022135062143206596, "epoch": 24, "step": 774}
51
+ {"lr-LARS/params": 2.578704796550098, "lr-LARS/params_no_weight_decay": 2.578704796550098, "step": 805}
52
+ {"train_loss": 0.12440083175897598, "profiling/batch_time": 0.27857354283332825, "profiling/data_time": 0.021730415523052216, "epoch": 25, "step": 805}
53
+ {"lr-LARS/params": 2.5762095519250785, "lr-LARS/params_no_weight_decay": 2.5762095519250785, "step": 836}
54
+ {"train_loss": 0.12585747241973877, "profiling/batch_time": 0.2788260579109192, "profiling/data_time": 0.0288446843624115, "epoch": 26, "step": 836}
55
+ {"lr-LARS/params": 2.5735645447006155, "lr-LARS/params_no_weight_decay": 2.5735645447006155, "step": 867}
56
+ {"train_loss": 0.12297409772872925, "profiling/batch_time": 0.27876409888267517, "profiling/data_time": 0.022233236581087112, "epoch": 27, "step": 867}
57
+ {"lr-LARS/params": 2.5707700853491007, "lr-LARS/params_no_weight_decay": 2.5707700853491007, "step": 898}
58
+ {"train_loss": 0.1212865561246872, "profiling/batch_time": 0.2785065174102783, "profiling/data_time": 0.022819457575678825, "epoch": 28, "step": 898}
59
+ {"lr-LARS/params": 2.5678265018856963, "lr-LARS/params_no_weight_decay": 2.5678265018856963, "step": 929}
60
+ {"train_loss": 0.12433307617902756, "profiling/batch_time": 0.2788432538509369, "profiling/data_time": 0.022183503955602646, "epoch": 29, "step": 929}
61
+ {"lr-LARS/params": 2.5647341398298367, "lr-LARS/params_no_weight_decay": 2.5647341398298367, "step": 960}
62
+ {"train_loss": 0.12567129731178284, "profiling/batch_time": 0.2801804542541504, "profiling/data_time": 0.02350696548819542, "epoch": 30, "step": 960}
63
+ {"lr-LARS/params": 2.5614933621646667, "lr-LARS/params_no_weight_decay": 2.5614933621646667, "step": 991}
64
+ {"train_loss": 0.12307902425527573, "profiling/batch_time": 0.2788792848587036, "profiling/data_time": 0.023217208683490753, "epoch": 31, "step": 991}
65
+ {"lr-LARS/params": 2.5581045492944376, "lr-LARS/params_no_weight_decay": 2.5581045492944376, "step": 1022}
66
+ {"train_loss": 0.12495798617601395, "profiling/batch_time": 0.2788478434085846, "profiling/data_time": 0.025806834921240807, "epoch": 32, "step": 1022}
67
+ {"lr-LARS/params": 2.5545680989998525, "lr-LARS/params_no_weight_decay": 2.5545680989998525, "step": 1053}
68
+ {"train_loss": 0.12073039263486862, "profiling/batch_time": 0.27889037132263184, "profiling/data_time": 0.021867675706744194, "epoch": 33, "step": 1053}
69
+ {"lr-LARS/params": 2.550884426391377, "lr-LARS/params_no_weight_decay": 2.550884426391377, "step": 1084}
70
+ {"train_loss": 0.12488101422786713, "profiling/batch_time": 0.27926313877105713, "profiling/data_time": 0.02363508939743042, "epoch": 34, "step": 1084}
71
+ {"lr-LARS/params": 2.547053963860512, "lr-LARS/params_no_weight_decay": 2.547053963860512, "step": 1115}
72
+ {"train_loss": 0.12393435090780258, "profiling/batch_time": 0.27925699949264526, "profiling/data_time": 0.02224188670516014, "epoch": 35, "step": 1115}
73
+ {"lr-LARS/params": 2.543077161029039, "lr-LARS/params_no_weight_decay": 2.543077161029039, "step": 1146}
74
+ {"train_loss": 0.12077713757753372, "profiling/batch_time": 0.2798973321914673, "profiling/data_time": 0.02240137569606304, "epoch": 36, "step": 1146}
75
+ {"lr-LARS/params": 2.538954484696244, "lr-LARS/params_no_weight_decay": 2.538954484696244, "step": 1177}
76
+ {"train_loss": 0.12328200787305832, "profiling/batch_time": 0.2794276773929596, "profiling/data_time": 0.02772757224738598, "epoch": 37, "step": 1177}
77
+ {"lr-LARS/params": 2.5346864187841254, "lr-LARS/params_no_weight_decay": 2.5346864187841254, "step": 1208}
78
+ {"train_loss": 0.12256190925836563, "profiling/batch_time": 0.2802460789680481, "profiling/data_time": 0.0233388002961874, "epoch": 38, "step": 1208}
79
+ {"lr-LARS/params": 2.5302734642805884, "lr-LARS/params_no_weight_decay": 2.5302734642805884, "step": 1239}
80
+ {"train_loss": 0.11945371329784393, "profiling/batch_time": 0.2793503999710083, "profiling/data_time": 0.021959848701953888, "epoch": 39, "step": 1239}
81
+ {"lr-LARS/params": 2.5257161391806404, "lr-LARS/params_no_weight_decay": 2.5257161391806404, "step": 1270}
82
+ {"train_loss": 0.12146937847137451, "profiling/batch_time": 0.27939745783805847, "profiling/data_time": 0.022585049271583557, "epoch": 40, "step": 1270}
83
+ {"lr-LARS/params": 2.521014978425588, "lr-LARS/params_no_weight_decay": 2.521014978425588, "step": 1301}
84
+ {"train_loss": 0.11996721476316452, "profiling/batch_time": 0.2789105772972107, "profiling/data_time": 0.02342303656041622, "epoch": 41, "step": 1301}
85
+ {"lr-LARS/params": 2.5161705338402474, "lr-LARS/params_no_weight_decay": 2.5161705338402474, "step": 1332}
86
+ {"train_loss": 0.11984992027282715, "profiling/batch_time": 0.27857643365859985, "profiling/data_time": 0.022316759452223778, "epoch": 42, "step": 1332}
87
+ {"lr-LARS/params": 2.5111833740681657, "lr-LARS/params_no_weight_decay": 2.5111833740681657, "step": 1363}
88
+ {"train_loss": 0.11748221516609192, "profiling/batch_time": 0.2788434624671936, "profiling/data_time": 0.02484634518623352, "epoch": 43, "step": 1363}
89
+ {"lr-LARS/params": 2.506054084504878, "lr-LARS/params_no_weight_decay": 2.506054084504878, "step": 1394}
90
+ {"train_loss": 0.12205217033624649, "profiling/batch_time": 0.2789729833602905, "profiling/data_time": 0.022893080487847328, "epoch": 44, "step": 1394}
91
+ {"lr-LARS/params": 2.5007832672291936, "lr-LARS/params_no_weight_decay": 2.5007832672291936, "step": 1425}
92
+ {"train_loss": 0.12016452103853226, "profiling/batch_time": 0.27985966205596924, "profiling/data_time": 0.026873497292399406, "epoch": 45, "step": 1425}
93
+ {"lr-LARS/params": 2.4953715409325197, "lr-LARS/params_no_weight_decay": 2.4953715409325197, "step": 1456}
94
+ {"train_loss": 0.12055418640375137, "profiling/batch_time": 0.27913662791252136, "profiling/data_time": 0.022502025589346886, "epoch": 46, "step": 1456}
95
+ {"lr-LARS/params": 2.489819540846241, "lr-LARS/params_no_weight_decay": 2.489819540846241, "step": 1487}
96
+ {"train_loss": 0.11906570196151733, "profiling/batch_time": 0.2790326178073883, "profiling/data_time": 0.022258497774600983, "epoch": 47, "step": 1487}
97
+ {"lr-LARS/params": 2.4841279186671574, "lr-LARS/params_no_weight_decay": 2.4841279186671574, "step": 1518}
98
+ {"train_loss": 0.12310885637998581, "profiling/batch_time": 0.2798037827014923, "profiling/data_time": 0.0221890676766634, "epoch": 48, "step": 1518}
99
+ {"lr-LARS/params": 2.478297342480987, "lr-LARS/params_no_weight_decay": 2.478297342480987, "step": 1549}
100
+ {"train_loss": 0.11961046606302261, "profiling/batch_time": 0.2789289057254791, "profiling/data_time": 0.022111238911747932, "epoch": 49, "step": 1549}
101
+ {"lr-LARS/params": 2.472328496683943, "lr-LARS/params_no_weight_decay": 2.472328496683943, "step": 1580}
102
+ {"train_loss": 0.1191822960972786, "profiling/batch_time": 0.2789729833602905, "profiling/data_time": 0.022275205701589584, "epoch": 50, "step": 1580}
103
+ {"lr-LARS/params": 2.4662220819024014, "lr-LARS/params_no_weight_decay": 2.4662220819024014, "step": 1611}
104
+ {"train_loss": 0.12063232809305191, "profiling/batch_time": 0.2788935601711273, "profiling/data_time": 0.0229237861931324, "epoch": 51, "step": 1611}
105
+ {"lr-LARS/params": 2.459978814910663, "lr-LARS/params_no_weight_decay": 2.459978814910663, "step": 1642}
106
+ {"train_loss": 0.1168585792183876, "profiling/batch_time": 0.278995156288147, "profiling/data_time": 0.0273948535323143, "epoch": 52, "step": 1642}
107
+ {"lr-LARS/params": 2.453599428546812, "lr-LARS/params_no_weight_decay": 2.453599428546812, "step": 1673}
108
+ {"train_loss": 0.1194971576333046, "profiling/batch_time": 0.2786655128002167, "profiling/data_time": 0.022007567808032036, "epoch": 53, "step": 1673}
109
+ {"lr-LARS/params": 2.4470846716267016, "lr-LARS/params_no_weight_decay": 2.4470846716267016, "step": 1704}
110
+ {"train_loss": 0.11743708699941635, "profiling/batch_time": 0.2796483337879181, "profiling/data_time": 0.023972559720277786, "epoch": 54, "step": 1704}
111
+ {"lr-LARS/params": 2.440435308856054, "lr-LARS/params_no_weight_decay": 2.440435308856054, "step": 1735}
112
+ {"train_loss": 0.11674682050943375, "profiling/batch_time": 0.28049007058143616, "profiling/data_time": 0.02774105779826641, "epoch": 55, "step": 1735}
113
+ {"lr-LARS/params": 2.433652120740699, "lr-LARS/params_no_weight_decay": 2.433652120740699, "step": 1766}
114
+ {"train_loss": 0.12133724242448807, "profiling/batch_time": 0.28011202812194824, "profiling/data_time": 0.02439241111278534, "epoch": 56, "step": 1766}
115
+ {"lr-LARS/params": 2.426735903494959, "lr-LARS/params_no_weight_decay": 2.426735903494959, "step": 1797}
116
+ {"train_loss": 0.11911539733409882, "profiling/batch_time": 0.2796039879322052, "profiling/data_time": 0.025554845109581947, "epoch": 57, "step": 1797}
117
+ {"lr-LARS/params": 2.4196874689481884, "lr-LARS/params_no_weight_decay": 2.4196874689481884, "step": 1828}
118
+ {"train_loss": 0.116547591984272, "profiling/batch_time": 0.28034064173698425, "profiling/data_time": 0.024206453934311867, "epoch": 58, "step": 1828}
119
+ {"lr-LARS/params": 2.4125076444494793, "lr-LARS/params_no_weight_decay": 2.4125076444494793, "step": 1859}
120
+ {"train_loss": 0.11996527016162872, "profiling/batch_time": 0.28123706579208374, "profiling/data_time": 0.023192957043647766, "epoch": 59, "step": 1859}
121
+ {"lr-LARS/params": 2.40519727277055, "lr-LARS/params_no_weight_decay": 2.40519727277055, "step": 1890}
122
+ {"train_loss": 0.11859787255525589, "profiling/batch_time": 0.2804379165172577, "profiling/data_time": 0.023767519742250443, "epoch": 60, "step": 1890}
123
+ {"lr-LARS/params": 2.397757212006817, "lr-LARS/params_no_weight_decay": 2.397757212006817, "step": 1921}
124
+ {"train_loss": 0.11801590770483017, "profiling/batch_time": 0.28056225180625916, "profiling/data_time": 0.022782791405916214, "epoch": 61, "step": 1921}
125
+ {"lr-LARS/params": 2.3901883354766715, "lr-LARS/params_no_weight_decay": 2.3901883354766715, "step": 1952}
126
+ {"train_loss": 0.11956115812063217, "profiling/batch_time": 0.2796354591846466, "profiling/data_time": 0.023022066801786423, "epoch": 62, "step": 1952}
127
+ {"lr-LARS/params": 2.3824915316189714, "lr-LARS/params_no_weight_decay": 2.3824915316189714, "step": 1983}
128
+ {"train_loss": 0.11804978549480438, "profiling/batch_time": 0.2799606919288635, "profiling/data_time": 0.02406536228954792, "epoch": 63, "step": 1983}
129
+ {"lr-LARS/params": 2.374667703888753, "lr-LARS/params_no_weight_decay": 2.374667703888753, "step": 2014}
130
+ {"train_loss": 0.11880285292863846, "profiling/batch_time": 0.2797699272632599, "profiling/data_time": 0.024168381467461586, "epoch": 64, "step": 2014}
131
+ {"lr-LARS/params": 2.366717770651184, "lr-LARS/params_no_weight_decay": 2.366717770651184, "step": 2045}
132
+ {"train_loss": 0.1192813515663147, "profiling/batch_time": 0.2799569070339203, "profiling/data_time": 0.02423913963139057, "epoch": 65, "step": 2045}
133
+ {"lr-LARS/params": 2.358642665073767, "lr-LARS/params_no_weight_decay": 2.358642665073767, "step": 2076}
134
+ {"train_loss": 0.1185673251748085, "profiling/batch_time": 0.2794836759567261, "profiling/data_time": 0.023824365809559822, "epoch": 66, "step": 2076}
135
+ {"lr-LARS/params": 2.350443335016799, "lr-LARS/params_no_weight_decay": 2.350443335016799, "step": 2107}
136
+ {"train_loss": 0.1158844381570816, "profiling/batch_time": 0.2805968225002289, "profiling/data_time": 0.024680981412529945, "epoch": 67, "step": 2107}
137
+ {"lr-LARS/params": 2.3421207429221167, "lr-LARS/params_no_weight_decay": 2.3421207429221167, "step": 2138}
138
+ {"train_loss": 0.11455993354320526, "profiling/batch_time": 0.27932894229888916, "profiling/data_time": 0.024037400260567665, "epoch": 68, "step": 2138}
139
+ {"lr-LARS/params": 2.3336758657001218, "lr-LARS/params_no_weight_decay": 2.3336758657001218, "step": 2169}
140
+ {"train_loss": 0.11302853375673294, "profiling/batch_time": 0.2805849313735962, "profiling/data_time": 0.02310911752283573, "epoch": 69, "step": 2169}
141
+ {"lr-LARS/params": 2.32510969461511, "lr-LARS/params_no_weight_decay": 2.32510969461511, "step": 2200}
142
+ {"train_loss": 0.11800353229045868, "profiling/batch_time": 0.28137290477752686, "profiling/data_time": 0.024821601808071136, "epoch": 70, "step": 2200}
143
+ {"lr-LARS/params": 2.316423235168918, "lr-LARS/params_no_weight_decay": 2.316423235168918, "step": 2231}
144
+ {"train_loss": 0.11907719820737839, "profiling/batch_time": 0.27940839529037476, "profiling/data_time": 0.024574119597673416, "epoch": 71, "step": 2231}
145
+ {"lr-LARS/params": 2.3076175069828944, "lr-LARS/params_no_weight_decay": 2.3076175069828944, "step": 2262}
146
+ {"train_loss": 0.11701899021863937, "profiling/batch_time": 0.28050732612609863, "profiling/data_time": 0.02425074763596058, "epoch": 72, "step": 2262}
147
+ {"lr-LARS/params": 2.29869354367822, "lr-LARS/params_no_weight_decay": 2.29869354367822, "step": 2293}
148
+ {"train_loss": 0.11339728534221649, "profiling/batch_time": 0.27998441457748413, "profiling/data_time": 0.023968074470758438, "epoch": 73, "step": 2293}
149
+ {"lr-LARS/params": 2.2896523927545753, "lr-LARS/params_no_weight_decay": 2.2896523927545753, "step": 2324}
150
+ {"train_loss": 0.11264032870531082, "profiling/batch_time": 0.28017228841781616, "profiling/data_time": 0.023805856704711914, "epoch": 74, "step": 2324}
151
+ {"lr-LARS/params": 2.2804951154671893, "lr-LARS/params_no_weight_decay": 2.2804951154671893, "step": 2355}
152
+ {"train_loss": 0.11792505532503128, "profiling/batch_time": 0.2795029282569885, "profiling/data_time": 0.02383408509194851, "epoch": 75, "step": 2355}
153
+ {"lr-LARS/params": 2.271222786702267, "lr-LARS/params_no_weight_decay": 2.271222786702267, "step": 2386}
154
+ {"train_loss": 0.11773707717657089, "profiling/batch_time": 0.2796958088874817, "profiling/data_time": 0.023966997861862183, "epoch": 76, "step": 2386}
155
+ {"lr-LARS/params": 2.2618364948508183, "lr-LARS/params_no_weight_decay": 2.2618364948508183, "step": 2417}
156
+ {"train_loss": 0.11778430640697479, "profiling/batch_time": 0.27950072288513184, "profiling/data_time": 0.023001212626695633, "epoch": 77, "step": 2417}
157
+ {"lr-LARS/params": 2.252337341680902, "lr-LARS/params_no_weight_decay": 2.252337341680902, "step": 2448}
158
+ {"train_loss": 0.11588811129331589, "profiling/batch_time": 0.27974843978881836, "profiling/data_time": 0.023983292281627655, "epoch": 78, "step": 2448}
159
+ {"lr-LARS/params": 2.242726442208301, "lr-LARS/params_no_weight_decay": 2.242726442208301, "step": 2479}
160
+ {"train_loss": 0.11309097707271576, "profiling/batch_time": 0.28042975068092346, "profiling/data_time": 0.023804064840078354, "epoch": 79, "step": 2479}
161
+ {"lr-LARS/params": 2.233004924565638, "lr-LARS/params_no_weight_decay": 2.233004924565638, "step": 2510}
162
+ {"train_loss": 0.11501632630825043, "profiling/batch_time": 0.2800535261631012, "profiling/data_time": 0.02348131127655506, "epoch": 80, "step": 2510}
163
+ {"lr-LARS/params": 2.2231739298699607, "lr-LARS/params_no_weight_decay": 2.2231739298699607, "step": 2541}
164
+ {"train_loss": 0.11184676736593246, "profiling/batch_time": 0.2800959050655365, "profiling/data_time": 0.023721329867839813, "epoch": 81, "step": 2541}
165
+ {"lr-LARS/params": 2.213234612088789, "lr-LARS/params_no_weight_decay": 2.213234612088789, "step": 2572}
166
+ {"train_loss": 0.11183136701583862, "profiling/batch_time": 0.27969780564308167, "profiling/data_time": 0.027246838435530663, "epoch": 82, "step": 2572}
167
+ {"lr-LARS/params": 2.2031881379046676, "lr-LARS/params_no_weight_decay": 2.2031881379046676, "step": 2603}
168
+ {"train_loss": 0.11678218096494675, "profiling/batch_time": 0.2809027433395386, "profiling/data_time": 0.024599457159638405, "epoch": 83, "step": 2603}
169
+ {"lr-LARS/params": 2.193035686578219, "lr-LARS/params_no_weight_decay": 2.193035686578219, "step": 2634}
170
+ {"train_loss": 0.11252269148826599, "profiling/batch_time": 0.2809029519557953, "profiling/data_time": 0.023043807595968246, "epoch": 84, "step": 2634}
171
+ {"lr-LARS/params": 2.1827784498097187, "lr-LARS/params_no_weight_decay": 2.1827784498097187, "step": 2665}
172
+ {"train_loss": 0.11494652926921844, "profiling/batch_time": 0.2817230820655823, "profiling/data_time": 0.024259179830551147, "epoch": 85, "step": 2665}
173
+ {"lr-LARS/params": 2.172417631599216, "lr-LARS/params_no_weight_decay": 2.172417631599216, "step": 2696}
174
+ {"train_loss": 0.11659146845340729, "profiling/batch_time": 0.2795451879501343, "profiling/data_time": 0.023267831653356552, "epoch": 86, "step": 2696}
175
+ {"lr-LARS/params": 2.1619544481052047, "lr-LARS/params_no_weight_decay": 2.1619544481052047, "step": 2727}
176
+ {"train_loss": 0.1149899885058403, "profiling/batch_time": 0.28092753887176514, "profiling/data_time": 0.022908177226781845, "epoch": 87, "step": 2727}
177
+ {"lr-LARS/params": 2.1513901275018736, "lr-LARS/params_no_weight_decay": 2.1513901275018736, "step": 2758}
178
+ {"train_loss": 0.11035887897014618, "profiling/batch_time": 0.2804507911205292, "profiling/data_time": 0.023928705602884293, "epoch": 88, "step": 2758}
179
+ {"lr-LARS/params": 2.1407259098349396, "lr-LARS/params_no_weight_decay": 2.1407259098349396, "step": 2789}
180
+ {"train_loss": 0.11495530605316162, "profiling/batch_time": 0.2803969085216522, "profiling/data_time": 0.023896412923932076, "epoch": 89, "step": 2789}
181
+ {"lr-LARS/params": 2.1299630468760906, "lr-LARS/params_no_weight_decay": 2.1299630468760906, "step": 2820}
182
+ {"train_loss": 0.1140855923295021, "profiling/batch_time": 0.28215476870536804, "profiling/data_time": 0.02504182793200016, "epoch": 90, "step": 2820}
183
+ {"lr-LARS/params": 2.1191028019760534, "lr-LARS/params_no_weight_decay": 2.1191028019760534, "step": 2851}
184
+ {"train_loss": 0.11240542680025101, "profiling/batch_time": 0.28048568964004517, "profiling/data_time": 0.025712335482239723, "epoch": 91, "step": 2851}
185
+ {"lr-LARS/params": 2.108146449916301, "lr-LARS/params_no_weight_decay": 2.108146449916301, "step": 2882}
186
+ {"train_loss": 0.11606117337942123, "profiling/batch_time": 0.2813667953014374, "profiling/data_time": 0.024673476815223694, "epoch": 92, "step": 2882}
187
+ {"lr-LARS/params": 2.097095276759416, "lr-LARS/params_no_weight_decay": 2.097095276759416, "step": 2913}
188
+ {"train_loss": 0.10965649038553238, "profiling/batch_time": 0.2798430025577545, "profiling/data_time": 0.02287481725215912, "epoch": 93, "step": 2913}
189
+ {"lr-LARS/params": 2.0859505796981335, "lr-LARS/params_no_weight_decay": 2.0859505796981335, "step": 2944}
190
+ {"train_loss": 0.11362116783857346, "profiling/batch_time": 0.2804887294769287, "profiling/data_time": 0.025163214653730392, "epoch": 94, "step": 2944}
191
+ {"lr-LARS/params": 2.074713666903076, "lr-LARS/params_no_weight_decay": 2.074713666903076, "step": 2975}
192
+ {"train_loss": 0.11318245530128479, "profiling/batch_time": 0.2819797694683075, "profiling/data_time": 0.026017578318715096, "epoch": 95, "step": 2975}
193
+ {"lr-LARS/params": 2.0633858573691986, "lr-LARS/params_no_weight_decay": 2.0633858573691986, "step": 3006}
194
+ {"train_loss": 0.11381667852401733, "profiling/batch_time": 0.2809104025363922, "profiling/data_time": 0.02348092570900917, "epoch": 96, "step": 3006}
195
+ {"lr-LARS/params": 2.051968480760965, "lr-LARS/params_no_weight_decay": 2.051968480760965, "step": 3037}
196
+ {"train_loss": 0.11665935814380646, "profiling/batch_time": 0.28174927830696106, "profiling/data_time": 0.024554865434765816, "epoch": 97, "step": 3037}
197
+ {"lr-LARS/params": 2.0404628772562714, "lr-LARS/params_no_weight_decay": 2.0404628772562714, "step": 3068}
198
+ {"train_loss": 0.11586707085371017, "profiling/batch_time": 0.2813439667224884, "profiling/data_time": 0.02472817339003086, "epoch": 98, "step": 3068}
199
+ {"lr-LARS/params": 2.028870397389136, "lr-LARS/params_no_weight_decay": 2.028870397389136, "step": 3099}
200
+ {"train_loss": 0.11626888066530228, "profiling/batch_time": 0.279693603515625, "profiling/data_time": 0.02584151178598404, "epoch": 99, "step": 3099}
201
+ {"lr-LARS/params": 2.01719240189117, "lr-LARS/params_no_weight_decay": 2.01719240189117, "step": 3130}
202
+ {"train_loss": 0.1142210140824318, "profiling/batch_time": 0.2801811397075653, "profiling/data_time": 0.02683265134692192, "epoch": 100, "step": 3130}
203
+ {"lr-LARS/params": 2.005430261531858, "lr-LARS/params_no_weight_decay": 2.005430261531858, "step": 3161}
204
+ {"train_loss": 0.11694065481424332, "profiling/batch_time": 0.28030669689178467, "profiling/data_time": 0.0217280276119709, "epoch": 101, "step": 3161}
205
+ {"lr-LARS/params": 1.9935853569576516, "lr-LARS/params_no_weight_decay": 1.9935853569576516, "step": 3192}
206
+ {"train_loss": 0.11153819411993027, "profiling/batch_time": 0.2791234850883484, "profiling/data_time": 0.02172956056892872, "epoch": 102, "step": 3192}
207
+ {"lr-LARS/params": 1.9816590785299155, "lr-LARS/params_no_weight_decay": 1.9816590785299155, "step": 3223}
208
+ {"train_loss": 0.11557798087596893, "profiling/batch_time": 0.27914726734161377, "profiling/data_time": 0.021799281239509583, "epoch": 103, "step": 3223}
209
+ {"lr-LARS/params": 1.9696528261617168, "lr-LARS/params_no_weight_decay": 1.9696528261617168, "step": 3254}
210
+ {"train_loss": 0.11937078833580017, "profiling/batch_time": 0.28029143810272217, "profiling/data_time": 0.028992079198360443, "epoch": 104, "step": 3254}
211
+ {"lr-LARS/params": 1.9575680091535104, "lr-LARS/params_no_weight_decay": 1.9575680091535104, "step": 3285}
212
+ {"train_loss": 0.11346034705638885, "profiling/batch_time": 0.2792639136314392, "profiling/data_time": 0.021804887801408768, "epoch": 105, "step": 3285}
213
+ {"lr-LARS/params": 1.9454060460277114, "lr-LARS/params_no_weight_decay": 1.9454060460277114, "step": 3316}
214
+ {"train_loss": 0.11519848555326462, "profiling/batch_time": 0.27918264269828796, "profiling/data_time": 0.02224489115178585, "epoch": 106, "step": 3316}
215
+ {"lr-LARS/params": 1.9331683643621864, "lr-LARS/params_no_weight_decay": 1.9331683643621864, "step": 3347}
216
+ {"train_loss": 0.11358032375574112, "profiling/batch_time": 0.279863178730011, "profiling/data_time": 0.021918652579188347, "epoch": 107, "step": 3347}
217
+ {"lr-LARS/params": 1.9208564006226876, "lr-LARS/params_no_weight_decay": 1.9208564006226876, "step": 3378}
218
+ {"train_loss": 0.11448623985052109, "profiling/batch_time": 0.28021955490112305, "profiling/data_time": 0.029736308380961418, "epoch": 108, "step": 3378}
219
+ {"lr-LARS/params": 1.9084715999942368, "lr-LARS/params_no_weight_decay": 1.9084715999942368, "step": 3409}
220
+ {"train_loss": 0.1174163892865181, "profiling/batch_time": 0.27979856729507446, "profiling/data_time": 0.023042313754558563, "epoch": 109, "step": 3409}
221
+ {"lr-LARS/params": 1.8960154162114893, "lr-LARS/params_no_weight_decay": 1.8960154162114893, "step": 3440}
222
+ {"train_loss": 0.11634480953216553, "profiling/batch_time": 0.2805279493331909, "profiling/data_time": 0.024054743349552155, "epoch": 110, "step": 3440}
223
+ {"lr-LARS/params": 1.8834893113880937, "lr-LARS/params_no_weight_decay": 1.8834893113880937, "step": 3471}
224
+ {"train_loss": 0.11507729440927505, "profiling/batch_time": 0.2807023823261261, "profiling/data_time": 0.02329021319746971, "epoch": 111, "step": 3471}
225
+ {"lr-LARS/params": 1.8708947558450697, "lr-LARS/params_no_weight_decay": 1.8708947558450697, "step": 3502}
226
+ {"train_loss": 0.11220823973417282, "profiling/batch_time": 0.2806653678417206, "profiling/data_time": 0.026713302358984947, "epoch": 112, "step": 3502}
227
+ {"lr-LARS/params": 1.8582332279382185, "lr-LARS/params_no_weight_decay": 1.8582332279382185, "step": 3533}
228
+ {"train_loss": 0.11043758690357208, "profiling/batch_time": 0.28078269958496094, "profiling/data_time": 0.024317875504493713, "epoch": 113, "step": 3533}
229
+ {"lr-LARS/params": 1.8455062138845955, "lr-LARS/params_no_weight_decay": 1.8455062138845955, "step": 3564}
230
+ {"train_loss": 0.11114963889122009, "profiling/batch_time": 0.2796972393989563, "profiling/data_time": 0.024956246837973595, "epoch": 114, "step": 3564}
231
+ {"lr-LARS/params": 1.832715207588054, "lr-LARS/params_no_weight_decay": 1.832715207588054, "step": 3595}
232
+ {"train_loss": 0.11345978826284409, "profiling/batch_time": 0.27959296107292175, "profiling/data_time": 0.023353492841124535, "epoch": 115, "step": 3595}
233
+ {"lr-LARS/params": 1.819861710463892, "lr-LARS/params_no_weight_decay": 1.819861710463892, "step": 3626}
234
+ {"train_loss": 0.11729983985424042, "profiling/batch_time": 0.2797854244709015, "profiling/data_time": 0.024042271077632904, "epoch": 116, "step": 3626}
235
+ {"lr-LARS/params": 1.806947231262617, "lr-LARS/params_no_weight_decay": 1.806947231262617, "step": 3657}
236
+ {"train_loss": 0.11625169962644577, "profiling/batch_time": 0.28012415766716003, "profiling/data_time": 0.02490462362766266, "epoch": 117, "step": 3657}
237
+ {"lr-LARS/params": 1.7939732858928428, "lr-LARS/params_no_weight_decay": 1.7939732858928428, "step": 3688}
238
+ {"train_loss": 0.11096959561109543, "profiling/batch_time": 0.27975067496299744, "profiling/data_time": 0.023997657001018524, "epoch": 118, "step": 3688}
239
+ {"lr-LARS/params": 1.7809413972433563, "lr-LARS/params_no_weight_decay": 1.7809413972433563, "step": 3719}
240
+ {"train_loss": 0.11312054842710495, "profiling/batch_time": 0.28097522258758545, "profiling/data_time": 0.025083186104893684, "epoch": 119, "step": 3719}
241
+ {"lr-LARS/params": 1.7678530950043592, "lr-LARS/params_no_weight_decay": 1.7678530950043592, "step": 3750}
242
+ {"train_loss": 0.11087493598461151, "profiling/batch_time": 0.28057393431663513, "profiling/data_time": 0.024713095277547836, "epoch": 120, "step": 3750}
243
+ {"lr-LARS/params": 1.7547099154879087, "lr-LARS/params_no_weight_decay": 1.7547099154879087, "step": 3781}
244
+ {"train_loss": 0.11372316628694534, "profiling/batch_time": 0.2792816758155823, "profiling/data_time": 0.024723919108510017, "epoch": 121, "step": 3781}
245
+ {"lr-LARS/params": 1.7415134014475881, "lr-LARS/params_no_weight_decay": 1.7415134014475881, "step": 3812}
246
+ {"train_loss": 0.11350000649690628, "profiling/batch_time": 0.2794714868068695, "profiling/data_time": 0.0278257355093956, "epoch": 122, "step": 3812}
247
+ {"lr-LARS/params": 1.7282651018974169, "lr-LARS/params_no_weight_decay": 1.7282651018974169, "step": 3843}
248
+ {"train_loss": 0.11132476478815079, "profiling/batch_time": 0.279266357421875, "profiling/data_time": 0.022368671372532845, "epoch": 123, "step": 3843}
249
+ {"lr-LARS/params": 1.7149665719300244, "lr-LARS/params_no_weight_decay": 1.7149665719300244, "step": 3874}
250
+ {"train_loss": 0.11444288492202759, "profiling/batch_time": 0.2827889621257782, "profiling/data_time": 0.022634807974100113, "epoch": 124, "step": 3874}
251
+ {"lr-LARS/params": 1.7016193725341149, "lr-LARS/params_no_weight_decay": 1.7016193725341149, "step": 3905}
252
+ {"train_loss": 0.11356859654188156, "profiling/batch_time": 0.27944839000701904, "profiling/data_time": 0.023521162569522858, "epoch": 125, "step": 3905}
253
+ {"lr-LARS/params": 1.6882250704112358, "lr-LARS/params_no_weight_decay": 1.6882250704112358, "step": 3936}
254
+ {"train_loss": 0.11041974276304245, "profiling/batch_time": 0.27986159920692444, "profiling/data_time": 0.02258412353694439, "epoch": 126, "step": 3936}
255
+ {"lr-LARS/params": 1.6747852377918793, "lr-LARS/params_no_weight_decay": 1.6747852377918793, "step": 3967}
256
+ {"train_loss": 0.1118711307644844, "profiling/batch_time": 0.2793775200843811, "profiling/data_time": 0.02298813685774803, "epoch": 127, "step": 3967}
257
+ {"lr-LARS/params": 1.6613014522509317, "lr-LARS/params_no_weight_decay": 1.6613014522509317, "step": 3998}
258
+ {"train_loss": 0.11193256080150604, "profiling/batch_time": 0.28001776337623596, "profiling/data_time": 0.03131181746721268, "epoch": 128, "step": 3998}
259
+ {"lr-LARS/params": 1.6477752965224972, "lr-LARS/params_no_weight_decay": 1.6477752965224972, "step": 4029}
260
+ {"train_loss": 0.11254480481147766, "profiling/batch_time": 0.2791793644428253, "profiling/data_time": 0.022539880126714706, "epoch": 129, "step": 4029}
261
+ {"lr-LARS/params": 1.6342083583141152, "lr-LARS/params_no_weight_decay": 1.6342083583141152, "step": 4060}
262
+ {"train_loss": 0.11519159376621246, "profiling/batch_time": 0.2788352370262146, "profiling/data_time": 0.02334810607135296, "epoch": 130, "step": 4060}
263
+ {"lr-LARS/params": 1.620602230120396, "lr-LARS/params_no_weight_decay": 1.620602230120396, "step": 4091}
264
+ {"train_loss": 0.10891342163085938, "profiling/batch_time": 0.27905794978141785, "profiling/data_time": 0.023176871240139008, "epoch": 131, "step": 4091}
265
+ {"lr-LARS/params": 1.6069585090360905, "lr-LARS/params_no_weight_decay": 1.6069585090360905, "step": 4122}
266
+ {"train_loss": 0.10874666273593903, "profiling/batch_time": 0.2788187265396118, "profiling/data_time": 0.022328589111566544, "epoch": 132, "step": 4122}
267
+ {"lr-LARS/params": 1.593278796568625, "lr-LARS/params_no_weight_decay": 1.593278796568625, "step": 4153}
268
+ {"train_loss": 0.11160208284854889, "profiling/batch_time": 0.2794421315193176, "profiling/data_time": 0.02379078045487404, "epoch": 133, "step": 4153}
269
+ {"lr-LARS/params": 1.5795646984501124, "lr-LARS/params_no_weight_decay": 1.5795646984501124, "step": 4184}
270
+ {"train_loss": 0.11749222874641418, "profiling/batch_time": 0.27883976697921753, "profiling/data_time": 0.022932840511202812, "epoch": 134, "step": 4184}
271
+ {"lr-LARS/params": 1.5658178244488732, "lr-LARS/params_no_weight_decay": 1.5658178244488732, "step": 4215}
272
+ {"train_loss": 0.11406457424163818, "profiling/batch_time": 0.27961429953575134, "profiling/data_time": 0.022955691441893578, "epoch": 135, "step": 4215}
273
+ {"lr-LARS/params": 1.552039788180479, "lr-LARS/params_no_weight_decay": 1.552039788180479, "step": 4246}
274
+ {"train_loss": 0.11043763905763626, "profiling/batch_time": 0.27883902192115784, "profiling/data_time": 0.02357552945613861, "epoch": 136, "step": 4246}
275
+ {"lr-LARS/params": 1.5382322069183445, "lr-LARS/params_no_weight_decay": 1.5382322069183445, "step": 4277}
276
+ {"train_loss": 0.10592087358236313, "profiling/batch_time": 0.2795879542827606, "profiling/data_time": 0.02271079830825329, "epoch": 137, "step": 4277}
277
+ {"lr-LARS/params": 1.5243967014038924, "lr-LARS/params_no_weight_decay": 1.5243967014038924, "step": 4308}
278
+ {"train_loss": 0.11099448055028915, "profiling/batch_time": 0.2798709273338318, "profiling/data_time": 0.024211451411247253, "epoch": 138, "step": 4308}
279
+ {"lr-LARS/params": 1.5105348956563098, "lr-LARS/params_no_weight_decay": 1.5105348956563098, "step": 4339}
280
+ {"train_loss": 0.11258246004581451, "profiling/batch_time": 0.2791202664375305, "profiling/data_time": 0.025545766577124596, "epoch": 139, "step": 4339}
281
+ {"lr-LARS/params": 1.4966484167819174, "lr-LARS/params_no_weight_decay": 1.4966484167819174, "step": 4370}
282
+ {"train_loss": 0.110165536403656, "profiling/batch_time": 0.28014883399009705, "profiling/data_time": 0.021926727145910263, "epoch": 140, "step": 4370}
283
+ {"lr-LARS/params": 1.4827388947831845, "lr-LARS/params_no_weight_decay": 1.4827388947831845, "step": 4401}
284
+ {"train_loss": 0.1130433976650238, "profiling/batch_time": 0.28140923380851746, "profiling/data_time": 0.022338470444083214, "epoch": 141, "step": 4401}
285
+ {"lr-LARS/params": 1.4688079623673922, "lr-LARS/params_no_weight_decay": 1.4688079623673922, "step": 4432}
286
+ {"train_loss": 0.11289647221565247, "profiling/batch_time": 0.2796103060245514, "profiling/data_time": 0.024212146177887917, "epoch": 142, "step": 4432}
287
+ {"lr-LARS/params": 1.4548572547549883, "lr-LARS/params_no_weight_decay": 1.4548572547549883, "step": 4463}
288
+ {"train_loss": 0.11163585633039474, "profiling/batch_time": 0.2801172435283661, "profiling/data_time": 0.022797714918851852, "epoch": 143, "step": 4463}
289
+ {"lr-LARS/params": 1.4408884094876455, "lr-LARS/params_no_weight_decay": 1.4408884094876455, "step": 4494}
290
+ {"train_loss": 0.11021599918603897, "profiling/batch_time": 0.2802903950214386, "profiling/data_time": 0.029793089255690575, "epoch": 144, "step": 4494}
291
+ {"lr-LARS/params": 1.4269030662360431, "lr-LARS/params_no_weight_decay": 1.4269030662360431, "step": 4525}
292
+ {"train_loss": 0.11261448264122009, "profiling/batch_time": 0.28061720728874207, "profiling/data_time": 0.02532568760216236, "epoch": 145, "step": 4525}
293
+ {"lr-LARS/params": 1.4129028666074024, "lr-LARS/params_no_weight_decay": 1.4129028666074024, "step": 4556}
294
+ {"train_loss": 0.11382752656936646, "profiling/batch_time": 0.28014442324638367, "profiling/data_time": 0.026404008269309998, "epoch": 146, "step": 4556}
295
+ {"lr-LARS/params": 1.3988894539527952, "lr-LARS/params_no_weight_decay": 1.3988894539527952, "step": 4587}
296
+ {"train_loss": 0.11258964240550995, "profiling/batch_time": 0.2802785038948059, "profiling/data_time": 0.025495657697319984, "epoch": 147, "step": 4587}
297
+ {"lr-LARS/params": 1.3848644731742459, "lr-LARS/params_no_weight_decay": 1.3848644731742459, "step": 4618}
298
+ {"train_loss": 0.11171763390302658, "profiling/batch_time": 0.28048452734947205, "profiling/data_time": 0.02440386824309826, "epoch": 148, "step": 4618}
299
+ {"lr-LARS/params": 1.3708295705316498, "lr-LARS/params_no_weight_decay": 1.3708295705316498, "step": 4649}
300
+ {"train_loss": 0.11301577836275101, "profiling/batch_time": 0.2809346616268158, "profiling/data_time": 0.02404748648405075, "epoch": 149, "step": 4649}
301
+ {"lr-LARS/params": 1.3567863934495388, "lr-LARS/params_no_weight_decay": 1.3567863934495388, "step": 4680}
302
+ {"train_loss": 0.1107664555311203, "profiling/batch_time": 0.2806987762451172, "profiling/data_time": 0.02451048046350479, "epoch": 150, "step": 4680}
303
+ {"lr-LARS/params": 1.3427365903236999, "lr-LARS/params_no_weight_decay": 1.3427365903236999, "step": 4711}
304
+ {"train_loss": 0.11337578296661377, "profiling/batch_time": 0.27973559498786926, "profiling/data_time": 0.024695387110114098, "epoch": 151, "step": 4711}
305
+ {"lr-LARS/params": 1.328681810327691, "lr-LARS/params_no_weight_decay": 1.328681810327691, "step": 4742}
306
+ {"train_loss": 0.11205244809389114, "profiling/batch_time": 0.2795793116092682, "profiling/data_time": 0.024559931829571724, "epoch": 152, "step": 4742}
307
+ {"lr-LARS/params": 1.3146237032192571, "lr-LARS/params_no_weight_decay": 1.3146237032192571, "step": 4773}
308
+ {"train_loss": 0.1116478443145752, "profiling/batch_time": 0.28038346767425537, "profiling/data_time": 0.025014802813529968, "epoch": 153, "step": 4773}
309
+ {"lr-LARS/params": 1.3005639191466805, "lr-LARS/params_no_weight_decay": 1.3005639191466805, "step": 4804}
310
+ {"train_loss": 0.10929753631353378, "profiling/batch_time": 0.28001126646995544, "profiling/data_time": 0.02466484159231186, "epoch": 154, "step": 4804}
311
+ {"lr-LARS/params": 1.2865041084550883, "lr-LARS/params_no_weight_decay": 1.2865041084550883, "step": 4835}
312
+ {"train_loss": 0.10962782055139542, "profiling/batch_time": 0.28087061643600464, "profiling/data_time": 0.024545583873987198, "epoch": 155, "step": 4835}
313
+ {"lr-LARS/params": 1.2724459214927306, "lr-LARS/params_no_weight_decay": 1.2724459214927306, "step": 4866}
314
+ {"train_loss": 0.11105086654424667, "profiling/batch_time": 0.2803313136100769, "profiling/data_time": 0.025040775537490845, "epoch": 156, "step": 4866}
315
+ {"lr-LARS/params": 1.258391008417264, "lr-LARS/params_no_weight_decay": 1.258391008417264, "step": 4897}
316
+ {"train_loss": 0.11162572354078293, "profiling/batch_time": 0.2796148359775543, "profiling/data_time": 0.025213032960891724, "epoch": 157, "step": 4897}
317
+ {"lr-LARS/params": 1.2443410190020545, "lr-LARS/params_no_weight_decay": 1.2443410190020545, "step": 4928}
318
+ {"train_loss": 0.11522666364908218, "profiling/batch_time": 0.27970975637435913, "profiling/data_time": 0.024210063740611076, "epoch": 158, "step": 4928}
319
+ {"lr-LARS/params": 1.2302976024425256, "lr-LARS/params_no_weight_decay": 1.2302976024425256, "step": 4959}
320
+ {"train_loss": 0.11127370595932007, "profiling/batch_time": 0.27953290939331055, "profiling/data_time": 0.024091646075248718, "epoch": 159, "step": 4959}
321
+ {"lr-LARS/params": 1.2162624071625765, "lr-LARS/params_no_weight_decay": 1.2162624071625765, "step": 4990}
322
+ {"train_loss": 0.10953210294246674, "profiling/batch_time": 0.2796824872493744, "profiling/data_time": 0.023790614679455757, "epoch": 160, "step": 4990}
323
+ {"lr-LARS/params": 1.2022370806210865, "lr-LARS/params_no_weight_decay": 1.2022370806210865, "step": 5021}
324
+ {"train_loss": 0.10671691596508026, "profiling/batch_time": 0.2798407971858978, "profiling/data_time": 0.024221135303378105, "epoch": 161, "step": 5021}
325
+ {"lr-LARS/params": 1.1882232691185384, "lr-LARS/params_no_weight_decay": 1.1882232691185384, "step": 5052}
326
+ {"train_loss": 0.11285724490880966, "profiling/batch_time": 0.280573308467865, "profiling/data_time": 0.023797934874892235, "epoch": 162, "step": 5052}
327
+ {"lr-LARS/params": 1.1742226176037727, "lr-LARS/params_no_weight_decay": 1.1742226176037727, "step": 5083}
328
+ {"train_loss": 0.10407628118991852, "profiling/batch_time": 0.2797095775604248, "profiling/data_time": 0.0241866372525692, "epoch": 163, "step": 5083}
329
+ {"lr-LARS/params": 1.1602367694809044, "lr-LARS/params_no_weight_decay": 1.1602367694809044, "step": 5114}
330
+ {"train_loss": 0.10838241130113602, "profiling/batch_time": 0.2803942561149597, "profiling/data_time": 0.02429983578622341, "epoch": 164, "step": 5114}
331
+ {"lr-LARS/params": 1.1462673664164165, "lr-LARS/params_no_weight_decay": 1.1462673664164165, "step": 5145}
332
+ {"train_loss": 0.10616401582956314, "profiling/batch_time": 0.2798672616481781, "profiling/data_time": 0.031438104808330536, "epoch": 165, "step": 5145}
333
+ {"lr-LARS/params": 1.1323160481464636, "lr-LARS/params_no_weight_decay": 1.1323160481464636, "step": 5176}
334
+ {"train_loss": 0.1105101928114891, "profiling/batch_time": 0.28067469596862793, "profiling/data_time": 0.02417870983481407, "epoch": 166, "step": 5176}
335
+ {"lr-LARS/params": 1.1183844522843966, "lr-LARS/params_no_weight_decay": 1.1183844522843966, "step": 5207}
336
+ {"train_loss": 0.10715816915035248, "profiling/batch_time": 0.2798115611076355, "profiling/data_time": 0.025517946109175682, "epoch": 167, "step": 5207}
337
+ {"lr-LARS/params": 1.1044742141285395, "lr-LARS/params_no_weight_decay": 1.1044742141285395, "step": 5238}
338
+ {"train_loss": 0.10740532726049423, "profiling/batch_time": 0.2810477018356323, "profiling/data_time": 0.02467821165919304, "epoch": 168, "step": 5238}
339
+ {"lr-LARS/params": 1.0905869664702375, "lr-LARS/params_no_weight_decay": 1.0905869664702375, "step": 5269}
340
+ {"train_loss": 0.10599146783351898, "profiling/batch_time": 0.2801041603088379, "profiling/data_time": 0.02443840727210045, "epoch": 169, "step": 5269}
341
+ {"lr-LARS/params": 1.076724339402197, "lr-LARS/params_no_weight_decay": 1.076724339402197, "step": 5300}
342
+ {"train_loss": 0.11027105897665024, "profiling/batch_time": 0.2793063819408417, "profiling/data_time": 0.025871600955724716, "epoch": 170, "step": 5300}
343
+ {"lr-LARS/params": 1.062887960127149, "lr-LARS/params_no_weight_decay": 1.062887960127149, "step": 5331}
344
+ {"train_loss": 0.10455434769392014, "profiling/batch_time": 0.28001296520233154, "profiling/data_time": 0.03087146021425724, "epoch": 171, "step": 5331}
345
+ {"lr-LARS/params": 1.0490794527668417, "lr-LARS/params_no_weight_decay": 1.0490794527668417, "step": 5362}
346
+ {"train_loss": 0.10968224704265594, "profiling/batch_time": 0.2795231342315674, "profiling/data_time": 0.02650510147213936, "epoch": 172, "step": 5362}
347
+ {"lr-LARS/params": 1.0353004381714035, "lr-LARS/params_no_weight_decay": 1.0353004381714035, "step": 5393}
348
+ {"train_loss": 0.1075165644288063, "profiling/batch_time": 0.2797834873199463, "profiling/data_time": 0.02427809312939644, "epoch": 173, "step": 5393}
349
+ {"lr-LARS/params": 1.0215525337290867, "lr-LARS/params_no_weight_decay": 1.0215525337290867, "step": 5424}
350
+ {"train_loss": 0.10698486864566803, "profiling/batch_time": 0.2806786596775055, "profiling/data_time": 0.02402956783771515, "epoch": 174, "step": 5424}
351
+ {"lr-LARS/params": 1.0078373531764158, "lr-LARS/params_no_weight_decay": 1.0078373531764158, "step": 5455}
352
+ {"train_loss": 0.10829202830791473, "profiling/batch_time": 0.2792050838470459, "profiling/data_time": 0.02446657046675682, "epoch": 175, "step": 5455}
353
+ {"lr-LARS/params": 0.9941565064087676, "lr-LARS/params_no_weight_decay": 0.9941565064087676, "step": 5486}
354
+ {"train_loss": 0.1052781268954277, "profiling/batch_time": 0.2794559597969055, "profiling/data_time": 0.024944152683019638, "epoch": 176, "step": 5486}
355
+ {"lr-LARS/params": 0.9805115992914009, "lr-LARS/params_no_weight_decay": 0.9805115992914009, "step": 5517}
356
+ {"train_loss": 0.10341408848762512, "profiling/batch_time": 0.2797638177871704, "profiling/data_time": 0.024181656539440155, "epoch": 177, "step": 5517}
357
+ {"lr-LARS/params": 0.9669042334709583, "lr-LARS/params_no_weight_decay": 0.9669042334709583, "step": 5548}
358
+ {"train_loss": 0.10993483662605286, "profiling/batch_time": 0.2794544994831085, "profiling/data_time": 0.02900850959122181, "epoch": 178, "step": 5548}
359
+ {"lr-LARS/params": 0.9533360061874647, "lr-LARS/params_no_weight_decay": 0.9533360061874647, "step": 5579}
360
+ {"train_loss": 0.10785074532032013, "profiling/batch_time": 0.2799595892429352, "profiling/data_time": 0.023918893188238144, "epoch": 179, "step": 5579}
361
+ {"lr-LARS/params": 0.9398085100868415, "lr-LARS/params_no_weight_decay": 0.9398085100868415, "step": 5610}
362
+ {"train_loss": 0.10896032303571701, "profiling/batch_time": 0.28074851632118225, "profiling/data_time": 0.025415265932679176, "epoch": 180, "step": 5610}
363
+ {"lr-LARS/params": 0.9263233330339639, "lr-LARS/params_no_weight_decay": 0.9263233330339639, "step": 5641}
364
+ {"train_loss": 0.10561589896678925, "profiling/batch_time": 0.27956709265708923, "profiling/data_time": 0.024710938334465027, "epoch": 181, "step": 5641}
365
+ {"lr-LARS/params": 0.9128820579262703, "lr-LARS/params_no_weight_decay": 0.9128820579262703, "step": 5672}
366
+ {"train_loss": 0.1084352359175682, "profiling/batch_time": 0.2799287736415863, "profiling/data_time": 0.027271712198853493, "epoch": 182, "step": 5672}
367
+ {"lr-LARS/params": 0.8994862625079686, "lr-LARS/params_no_weight_decay": 0.8994862625079686, "step": 5703}
368
+ {"train_loss": 0.1046057865023613, "profiling/batch_time": 0.27974942326545715, "profiling/data_time": 0.026174552738666534, "epoch": 183, "step": 5703}
369
+ {"lr-LARS/params": 0.886137519184834, "lr-LARS/params_no_weight_decay": 0.886137519184834, "step": 5734}
370
+ {"train_loss": 0.10447362065315247, "profiling/batch_time": 0.2805032730102539, "profiling/data_time": 0.026357440277934074, "epoch": 184, "step": 5734}
371
+ {"lr-LARS/params": 0.8728373948396408, "lr-LARS/params_no_weight_decay": 0.8728373948396408, "step": 5765}
372
+ {"train_loss": 0.1111631840467453, "profiling/batch_time": 0.2822023034095764, "profiling/data_time": 0.02411731146275997, "epoch": 185, "step": 5765}
373
+ {"lr-LARS/params": 0.8595874506482426, "lr-LARS/params_no_weight_decay": 0.8595874506482426, "step": 5796}
374
+ {"train_loss": 0.1070699393749237, "profiling/batch_time": 0.2802458107471466, "profiling/data_time": 0.024361707270145416, "epoch": 186, "step": 5796}
375
+ {"lr-LARS/params": 0.8463892418963186, "lr-LARS/params_no_weight_decay": 0.8463892418963186, "step": 5827}
376
+ {"train_loss": 0.10436796396970749, "profiling/batch_time": 0.28017497062683105, "profiling/data_time": 0.02443641610443592, "epoch": 187, "step": 5827}
377
+ {"lr-LARS/params": 0.8332443177968126, "lr-LARS/params_no_weight_decay": 0.8332443177968126, "step": 5858}
378
+ {"train_loss": 0.10842223465442657, "profiling/batch_time": 0.279675155878067, "profiling/data_time": 0.024135712534189224, "epoch": 188, "step": 5858}
379
+ {"lr-LARS/params": 0.8201542213080886, "lr-LARS/params_no_weight_decay": 0.8201542213080886, "step": 5889}
380
+ {"train_loss": 0.11187343299388885, "profiling/batch_time": 0.28037938475608826, "profiling/data_time": 0.024441946297883987, "epoch": 189, "step": 5889}
381
+ {"lr-LARS/params": 0.8071204889528153, "lr-LARS/params_no_weight_decay": 0.8071204889528153, "step": 5920}
382
+ {"train_loss": 0.11004392057657242, "profiling/batch_time": 0.2797994613647461, "profiling/data_time": 0.027311015874147415, "epoch": 190, "step": 5920}
383
+ {"lr-LARS/params": 0.7941446506376074, "lr-LARS/params_no_weight_decay": 0.7941446506376074, "step": 5951}
384
+ {"train_loss": 0.10897886753082275, "profiling/batch_time": 0.28009214997291565, "profiling/data_time": 0.025476699694991112, "epoch": 191, "step": 5951}
385
+ {"lr-LARS/params": 0.7812282294734473, "lr-LARS/params_no_weight_decay": 0.7812282294734473, "step": 5982}
386
+ {"train_loss": 0.11034613847732544, "profiling/batch_time": 0.27959367632865906, "profiling/data_time": 0.024951038882136345, "epoch": 192, "step": 5982}
387
+ {"lr-LARS/params": 0.7683727415968987, "lr-LARS/params_no_weight_decay": 0.7683727415968987, "step": 6013}
388
+ {"train_loss": 0.10400903970003128, "profiling/batch_time": 0.27988317608833313, "profiling/data_time": 0.024217821657657623, "epoch": 193, "step": 6013}
389
+ {"lr-LARS/params": 0.7555796959921441, "lr-LARS/params_no_weight_decay": 0.7555796959921441, "step": 6044}
390
+ {"train_loss": 0.10714641213417053, "profiling/batch_time": 0.2801665663719177, "profiling/data_time": 0.0240265354514122, "epoch": 194, "step": 6044}
391
+ {"lr-LARS/params": 0.742850594313855, "lr-LARS/params_no_weight_decay": 0.742850594313855, "step": 6075}
392
+ {"train_loss": 0.10468653589487076, "profiling/batch_time": 0.2797994613647461, "profiling/data_time": 0.02792484685778618, "epoch": 195, "step": 6075}
393
+ {"lr-LARS/params": 0.730186930710934, "lr-LARS/params_no_weight_decay": 0.730186930710934, "step": 6106}
394
+ {"train_loss": 0.10558204352855682, "profiling/batch_time": 0.27974480390548706, "profiling/data_time": 0.0246877558529377, "epoch": 196, "step": 6106}
395
+ {"lr-LARS/params": 0.7175901916511243, "lr-LARS/params_no_weight_decay": 0.7175901916511243, "step": 6137}
396
+ {"train_loss": 0.11105310171842575, "profiling/batch_time": 0.28154537081718445, "profiling/data_time": 0.02828759141266346, "epoch": 197, "step": 6137}
397
+ {"lr-LARS/params": 0.7050618557465294, "lr-LARS/params_no_weight_decay": 0.7050618557465294, "step": 6168}
398
+ {"train_loss": 0.10628894716501236, "profiling/batch_time": 0.27974024415016174, "profiling/data_time": 0.02499591000378132, "epoch": 198, "step": 6168}
399
+ {"lr-LARS/params": 0.692603393580054, "lr-LARS/params_no_weight_decay": 0.692603393580054, "step": 6199}
400
+ {"train_loss": 0.11239251494407654, "profiling/batch_time": 0.28341197967529297, "profiling/data_time": 0.025149688124656677, "epoch": 199, "step": 6199}
401
+ {"lr-LARS/params": 0.6802162675327853, "lr-LARS/params_no_weight_decay": 0.6802162675327853, "step": 6230}
402
+ {"train_loss": 0.10901231318712234, "profiling/batch_time": 0.28108668327331543, "profiling/data_time": 0.024604788050055504, "epoch": 200, "step": 6230}
403
+ {"lr-LARS/params": 0.667901931612338, "lr-LARS/params_no_weight_decay": 0.667901931612338, "step": 6261}
404
+ {"train_loss": 0.10621606558561325, "profiling/batch_time": 0.27949145436286926, "profiling/data_time": 0.025541050359606743, "epoch": 201, "step": 6261}
405
+ {"lr-LARS/params": 0.6556618312821813, "lr-LARS/params_no_weight_decay": 0.6556618312821813, "step": 6292}
406
+ {"train_loss": 0.10690736770629883, "profiling/batch_time": 0.2798719108104706, "profiling/data_time": 0.024246174842119217, "epoch": 202, "step": 6292}
407
+ {"lr-LARS/params": 0.6434974032919711, "lr-LARS/params_no_weight_decay": 0.6434974032919711, "step": 6323}
408
+ {"train_loss": 0.11092428117990494, "profiling/batch_time": 0.2790978252887726, "profiling/data_time": 0.026412107050418854, "epoch": 203, "step": 6323}
409
+ {"lr-LARS/params": 0.6314100755089015, "lr-LARS/params_no_weight_decay": 0.6314100755089015, "step": 6354}
410
+ {"train_loss": 0.1081099808216095, "profiling/batch_time": 0.27972275018692017, "profiling/data_time": 0.022228671237826347, "epoch": 204, "step": 6354}
411
+ {"lr-LARS/params": 0.619401266750104, "lr-LARS/params_no_weight_decay": 0.619401266750104, "step": 6385}
412
+ {"train_loss": 0.10217059403657913, "profiling/batch_time": 0.27919283509254456, "profiling/data_time": 0.021864447742700577, "epoch": 205, "step": 6385}
413
+ {"lr-LARS/params": 0.6074723866161037, "lr-LARS/params_no_weight_decay": 0.6074723866161037, "step": 6416}
414
+ {"train_loss": 0.10689131170511246, "profiling/batch_time": 0.2792072594165802, "profiling/data_time": 0.022109003737568855, "epoch": 206, "step": 6416}
415
+ {"lr-LARS/params": 0.5956248353253584, "lr-LARS/params_no_weight_decay": 0.5956248353253584, "step": 6447}
416
+ {"train_loss": 0.10284242033958435, "profiling/batch_time": 0.2787727415561676, "profiling/data_time": 0.02166104130446911, "epoch": 207, "step": 6447}
417
+ {"lr-LARS/params": 0.5838600035499037, "lr-LARS/params_no_weight_decay": 0.5838600035499037, "step": 6478}
418
+ {"train_loss": 0.10242690145969391, "profiling/batch_time": 0.279057115316391, "profiling/data_time": 0.022970888763666153, "epoch": 208, "step": 6478}
419
+ {"lr-LARS/params": 0.5721792722521125, "lr-LARS/params_no_weight_decay": 0.5721792722521125, "step": 6509}
420
+ {"train_loss": 0.10290662944316864, "profiling/batch_time": 0.2798173725605011, "profiling/data_time": 0.026463206857442856, "epoch": 209, "step": 6509}
421
+ {"lr-LARS/params": 0.5605840125225995, "lr-LARS/params_no_weight_decay": 0.5605840125225995, "step": 6540}
422
+ {"train_loss": 0.10779617726802826, "profiling/batch_time": 0.2796464264392853, "profiling/data_time": 0.025163620710372925, "epoch": 210, "step": 6540}
423
+ {"lr-LARS/params": 0.5490755854192773, "lr-LARS/params_no_weight_decay": 0.5490755854192773, "step": 6571}
424
+ {"train_loss": 0.10780063271522522, "profiling/batch_time": 0.2795703113079071, "profiling/data_time": 0.024826467037200928, "epoch": 211, "step": 6571}
425
+ {"lr-LARS/params": 0.5376553418075991, "lr-LARS/params_no_weight_decay": 0.5376553418075991, "step": 6602}
426
+ {"train_loss": 0.10240733623504639, "profiling/batch_time": 0.27974411845207214, "profiling/data_time": 0.023482220247387886, "epoch": 212, "step": 6602}
427
+ {"lr-LARS/params": 0.5263246222019915, "lr-LARS/params_no_weight_decay": 0.5263246222019915, "step": 6633}
428
+ {"train_loss": 0.10539387911558151, "profiling/batch_time": 0.2793790400028229, "profiling/data_time": 0.024944456294178963, "epoch": 213, "step": 6633}
429
+ {"lr-LARS/params": 0.5150847566085045, "lr-LARS/params_no_weight_decay": 0.5150847566085045, "step": 6664}
430
+ {"train_loss": 0.10934224724769592, "profiling/batch_time": 0.2797708809375763, "profiling/data_time": 0.024768628180027008, "epoch": 214, "step": 6664}
431
+ {"lr-LARS/params": 0.5039370643686943, "lr-LARS/params_no_weight_decay": 0.5039370643686943, "step": 6695}
432
+ {"train_loss": 0.1055927500128746, "profiling/batch_time": 0.27969250082969666, "profiling/data_time": 0.024058440700173378, "epoch": 215, "step": 6695}
433
+ {"lr-LARS/params": 0.4928828540047595, "lr-LARS/params_no_weight_decay": 0.4928828540047595, "step": 6726}
434
+ {"train_loss": 0.10599054396152496, "profiling/batch_time": 0.27929455041885376, "profiling/data_time": 0.025665631517767906, "epoch": 216, "step": 6726}
435
+ {"lr-LARS/params": 0.48192342306594294, "lr-LARS/params_no_weight_decay": 0.48192342306594294, "step": 6757}
436
+ {"train_loss": 0.10307373106479645, "profiling/batch_time": 0.2789112329483032, "profiling/data_time": 0.02806794084608555, "epoch": 217, "step": 6757}
437
+ {"lr-LARS/params": 0.47106005797622846, "lr-LARS/params_no_weight_decay": 0.47106005797622846, "step": 6788}
438
+ {"train_loss": 0.10388215631246567, "profiling/batch_time": 0.27859944105148315, "profiling/data_time": 0.022090887650847435, "epoch": 218, "step": 6788}
439
+ {"lr-LARS/params": 0.4602940338833392, "lr-LARS/params_no_weight_decay": 0.4602940338833392, "step": 6819}
440
+ {"train_loss": 0.10343452543020248, "profiling/batch_time": 0.27893468737602234, "profiling/data_time": 0.022926080971956253, "epoch": 219, "step": 6819}
441
+ {"lr-LARS/params": 0.4496266145090563, "lr-LARS/params_no_weight_decay": 0.4496266145090563, "step": 6850}
442
+ {"train_loss": 0.10473000258207321, "profiling/batch_time": 0.2796981930732727, "profiling/data_time": 0.02242247946560383, "epoch": 220, "step": 6850}
443
+ {"lr-LARS/params": 0.439059052000887, "lr-LARS/params_no_weight_decay": 0.439059052000887, "step": 6881}
444
+ {"train_loss": 0.10553184151649475, "profiling/batch_time": 0.2788766026496887, "profiling/data_time": 0.02215990051627159, "epoch": 221, "step": 6881}
445
+ {"lr-LARS/params": 0.428592586785085, "lr-LARS/params_no_weight_decay": 0.428592586785085, "step": 6912}
446
+ {"train_loss": 0.10349450260400772, "profiling/batch_time": 0.27856603264808655, "profiling/data_time": 0.022167639806866646, "epoch": 222, "step": 6912}
447
+ {"lr-LARS/params": 0.41822844742104703, "lr-LARS/params_no_weight_decay": 0.41822844742104703, "step": 6943}
448
+ {"train_loss": 0.10581092536449432, "profiling/batch_time": 0.2788611352443695, "profiling/data_time": 0.022399822250008583, "epoch": 223, "step": 6943}
449
+ {"lr-LARS/params": 0.4079678504571082, "lr-LARS/params_no_weight_decay": 0.4079678504571082, "step": 6974}
450
+ {"train_loss": 0.10052903741598129, "profiling/batch_time": 0.27953657507896423, "profiling/data_time": 0.02233986184000969, "epoch": 224, "step": 6974}
451
+ {"lr-LARS/params": 0.3978120002877387, "lr-LARS/params_no_weight_decay": 0.3978120002877387, "step": 7005}
452
+ {"train_loss": 0.10502449423074722, "profiling/batch_time": 0.27834928035736084, "profiling/data_time": 0.028071941807866096, "epoch": 225, "step": 7005}
453
+ {"lr-LARS/params": 0.387762089012172, "lr-LARS/params_no_weight_decay": 0.387762089012172, "step": 7036}
454
+ {"train_loss": 0.10731479525566101, "profiling/batch_time": 0.2787128686904907, "profiling/data_time": 0.022836295887827873, "epoch": 226, "step": 7036}
455
+ {"lr-LARS/params": 0.377819296294478, "lr-LARS/params_no_weight_decay": 0.377819296294478, "step": 7067}
456
+ {"train_loss": 0.10865151137113571, "profiling/batch_time": 0.2795090079307556, "profiling/data_time": 0.023924874141812325, "epoch": 227, "step": 7067}
457
+ {"lr-LARS/params": 0.36798478922509065, "lr-LARS/params_no_weight_decay": 0.36798478922509065, "step": 7098}
458
+ {"train_loss": 0.10661354660987854, "profiling/batch_time": 0.2788792550563812, "profiling/data_time": 0.02336982637643814, "epoch": 228, "step": 7098}
459
+ {"lr-LARS/params": 0.35825972218381696, "lr-LARS/params_no_weight_decay": 0.35825972218381696, "step": 7129}
460
+ {"train_loss": 0.10329631716012955, "profiling/batch_time": 0.2789275348186493, "profiling/data_time": 0.023515548557043076, "epoch": 229, "step": 7129}
461
+ {"lr-LARS/params": 0.34864523670433173, "lr-LARS/params_no_weight_decay": 0.34864523670433173, "step": 7160}
462
+ {"train_loss": 0.10127069056034088, "profiling/batch_time": 0.2792460322380066, "profiling/data_time": 0.022675400599837303, "epoch": 230, "step": 7160}
463
+ {"lr-LARS/params": 0.3391424613401869, "lr-LARS/params_no_weight_decay": 0.3391424613401869, "step": 7191}
464
+ {"train_loss": 0.09880557656288147, "profiling/batch_time": 0.2795707583427429, "profiling/data_time": 0.02976328507065773, "epoch": 231, "step": 7191}
465
+ {"lr-LARS/params": 0.32975251153233975, "lr-LARS/params_no_weight_decay": 0.32975251153233975, "step": 7222}
466
+ {"train_loss": 0.10543648898601532, "profiling/batch_time": 0.27950042486190796, "profiling/data_time": 0.021859269589185715, "epoch": 232, "step": 7222}
467
+ {"lr-LARS/params": 0.32047648947822277, "lr-LARS/params_no_weight_decay": 0.32047648947822277, "step": 7253}
468
+ {"train_loss": 0.10199959576129913, "profiling/batch_time": 0.2792081832885742, "profiling/data_time": 0.023618735373020172, "epoch": 233, "step": 7253}
469
+ {"lr-LARS/params": 0.31131548400236564, "lr-LARS/params_no_weight_decay": 0.31131548400236564, "step": 7284}
470
+ {"train_loss": 0.10432765632867813, "profiling/batch_time": 0.2799839377403259, "profiling/data_time": 0.02347646839916706, "epoch": 234, "step": 7284}
471
+ {"lr-LARS/params": 0.3022705704285903, "lr-LARS/params_no_weight_decay": 0.3022705704285903, "step": 7315}
472
+ {"train_loss": 0.10671383142471313, "profiling/batch_time": 0.2789658010005951, "profiling/data_time": 0.0249751266092062, "epoch": 235, "step": 7315}
473
+ {"lr-LARS/params": 0.2933428104537867, "lr-LARS/params_no_weight_decay": 0.2933428104537867, "step": 7346}
474
+ {"train_loss": 0.09918428212404251, "profiling/batch_time": 0.28018897771835327, "profiling/data_time": 0.026574701070785522, "epoch": 236, "step": 7346}
475
+ {"lr-LARS/params": 0.28453325202329205, "lr-LARS/params_no_weight_decay": 0.28453325202329205, "step": 7377}
476
+ {"train_loss": 0.10384354740381241, "profiling/batch_time": 0.2799147367477417, "profiling/data_time": 0.023782547563314438, "epoch": 237, "step": 7377}
477
+ {"lr-LARS/params": 0.275842929207883, "lr-LARS/params_no_weight_decay": 0.275842929207883, "step": 7408}
478
+ {"train_loss": 0.10953808575868607, "profiling/batch_time": 0.28124770522117615, "profiling/data_time": 0.024104468524456024, "epoch": 238, "step": 7408}
479
+ {"lr-LARS/params": 0.26727286208239215, "lr-LARS/params_no_weight_decay": 0.26727286208239215, "step": 7439}
480
+ {"train_loss": 0.10673543810844421, "profiling/batch_time": 0.28080812096595764, "profiling/data_time": 0.030370280146598816, "epoch": 239, "step": 7439}
481
+ {"lr-LARS/params": 0.25882405660597557, "lr-LARS/params_no_weight_decay": 0.25882405660597557, "step": 7470}
482
+ {"train_loss": 0.10682955384254456, "profiling/batch_time": 0.28004190325737, "profiling/data_time": 0.02368774637579918, "epoch": 240, "step": 7470}
483
+ {"lr-LARS/params": 0.25049750450402986, "lr-LARS/params_no_weight_decay": 0.25049750450402986, "step": 7501}
484
+ {"train_loss": 0.10517997294664383, "profiling/batch_time": 0.28016048669815063, "profiling/data_time": 0.023830439895391464, "epoch": 241, "step": 7501}
485
+ {"lr-LARS/params": 0.24229418315178436, "lr-LARS/params_no_weight_decay": 0.24229418315178436, "step": 7532}
486
+ {"train_loss": 0.10420355200767517, "profiling/batch_time": 0.2810109257698059, "profiling/data_time": 0.02518489584326744, "epoch": 242, "step": 7532}
487
+ {"lr-LARS/params": 0.2342150554595754, "lr-LARS/params_no_weight_decay": 0.2342150554595754, "step": 7563}
488
+ {"train_loss": 0.10687050968408585, "profiling/batch_time": 0.2803668975830078, "profiling/data_time": 0.024442004039883614, "epoch": 243, "step": 7563}
489
+ {"lr-LARS/params": 0.2262610697598202, "lr-LARS/params_no_weight_decay": 0.2262610697598202, "step": 7594}
490
+ {"train_loss": 0.10697708278894424, "profiling/batch_time": 0.28082120418548584, "profiling/data_time": 0.02415456995368004, "epoch": 244, "step": 7594}
491
+ {"lr-LARS/params": 0.2184331596956995, "lr-LARS/params_no_weight_decay": 0.2184331596956995, "step": 7625}
492
+ {"train_loss": 0.10329428315162659, "profiling/batch_time": 0.2805176377296448, "profiling/data_time": 0.023775145411491394, "epoch": 245, "step": 7625}
493
+ {"lr-LARS/params": 0.21073224411156807, "lr-LARS/params_no_weight_decay": 0.21073224411156807, "step": 7656}
494
+ {"train_loss": 0.10155832022428513, "profiling/batch_time": 0.28001266717910767, "profiling/data_time": 0.029572762548923492, "epoch": 246, "step": 7656}
495
+ {"lr-LARS/params": 0.20315922694509966, "lr-LARS/params_no_weight_decay": 0.20315922694509966, "step": 7687}
496
+ {"train_loss": 0.10405521094799042, "profiling/batch_time": 0.2799651026725769, "profiling/data_time": 0.022379886358976364, "epoch": 247, "step": 7687}
497
+ {"lr-LARS/params": 0.19571499712118162, "lr-LARS/params_no_weight_decay": 0.19571499712118162, "step": 7718}
498
+ {"train_loss": 0.10281528532505035, "profiling/batch_time": 0.2794579267501831, "profiling/data_time": 0.023105956614017487, "epoch": 248, "step": 7718}
499
+ {"lr-LARS/params": 0.1884004284475717, "lr-LARS/params_no_weight_decay": 0.1884004284475717, "step": 7749}
500
+ {"train_loss": 0.10590873658657074, "profiling/batch_time": 0.2795872390270233, "profiling/data_time": 0.021890848875045776, "epoch": 249, "step": 7749}
501
+ {"lr-LARS/params": 0.1812163795123325, "lr-LARS/params_no_weight_decay": 0.1812163795123325, "step": 7780}
502
+ {"train_loss": 0.10566463321447372, "profiling/batch_time": 0.28098824620246887, "profiling/data_time": 0.025970134884119034, "epoch": 250, "step": 7780}
503
+ {"lr-LARS/params": 0.17416369358304803, "lr-LARS/params_no_weight_decay": 0.17416369358304803, "step": 7811}
504
+ {"train_loss": 0.10199066996574402, "profiling/batch_time": 0.28028181195259094, "profiling/data_time": 0.024214040488004684, "epoch": 251, "step": 7811}
505
+ {"lr-LARS/params": 0.1672431985078409, "lr-LARS/params_no_weight_decay": 0.1672431985078409, "step": 7842}
506
+ {"train_loss": 0.10597014427185059, "profiling/batch_time": 0.28019487857818604, "profiling/data_time": 0.024268586188554764, "epoch": 252, "step": 7842}
507
+ {"lr-LARS/params": 0.16045570661819894, "lr-LARS/params_no_weight_decay": 0.16045570661819894, "step": 7873}
508
+ {"train_loss": 0.09999129921197891, "profiling/batch_time": 0.28018519282341003, "profiling/data_time": 0.025063375011086464, "epoch": 253, "step": 7873}
509
+ {"lr-LARS/params": 0.15380201463362436, "lr-LARS/params_no_weight_decay": 0.15380201463362436, "step": 7904}
510
+ {"train_loss": 0.10401962697505951, "profiling/batch_time": 0.28187599778175354, "profiling/data_time": 0.027569890022277832, "epoch": 254, "step": 7904}
511
+ {"lr-LARS/params": 0.1472829035681122, "lr-LARS/params_no_weight_decay": 0.1472829035681122, "step": 7935}
512
+ {"train_loss": 0.10317131131887436, "profiling/batch_time": 0.2805408239364624, "profiling/data_time": 0.022582566365599632, "epoch": 255, "step": 7935}
513
+ {"lr-LARS/params": 0.14089913863847636, "lr-LARS/params_no_weight_decay": 0.14089913863847636, "step": 7966}
514
+ {"train_loss": 0.1031603142619133, "profiling/batch_time": 0.2797207534313202, "profiling/data_time": 0.023183738812804222, "epoch": 256, "step": 7966}
515
+ {"lr-LARS/params": 0.13465146917452847, "lr-LARS/params_no_weight_decay": 0.13465146917452847, "step": 7997}
516
+ {"train_loss": 0.10134363174438477, "profiling/batch_time": 0.2793918550014496, "profiling/data_time": 0.02210284397006035, "epoch": 257, "step": 7997}
517
+ {"lr-LARS/params": 0.12854062853111844, "lr-LARS/params_no_weight_decay": 0.12854062853111844, "step": 8028}
518
+ {"train_loss": 0.10225842148065567, "profiling/batch_time": 0.2793455719947815, "profiling/data_time": 0.02300420217216015, "epoch": 258, "step": 8028}
519
+ {"lr-LARS/params": 0.12256733400205563, "lr-LARS/params_no_weight_decay": 0.12256733400205563, "step": 8059}
520
+ {"train_loss": 0.10843018442392349, "profiling/batch_time": 0.28073209524154663, "profiling/data_time": 0.024154726415872574, "epoch": 259, "step": 8059}
521
+ {"lr-LARS/params": 0.11673228673591102, "lr-LARS/params_no_weight_decay": 0.11673228673591102, "step": 8090}
522
+ {"train_loss": 0.1020282506942749, "profiling/batch_time": 0.27941468358039856, "profiling/data_time": 0.02341696061193943, "epoch": 260, "step": 8090}
523
+ {"lr-LARS/params": 0.11103617165371674, "lr-LARS/params_no_weight_decay": 0.11103617165371674, "step": 8121}
524
+ {"train_loss": 0.10159091651439667, "profiling/batch_time": 0.2794548571109772, "profiling/data_time": 0.02334589883685112, "epoch": 261, "step": 8121}
525
+ {"lr-LARS/params": 0.10547965736856922, "lr-LARS/params_no_weight_decay": 0.10547965736856922, "step": 8152}
526
+ {"train_loss": 0.10081281512975693, "profiling/batch_time": 0.2789771556854248, "profiling/data_time": 0.022173026576638222, "epoch": 262, "step": 8152}
527
+ {"lr-LARS/params": 0.10006339610714717, "lr-LARS/params_no_weight_decay": 0.10006339610714717, "step": 8183}
528
+ {"train_loss": 0.10561040788888931, "profiling/batch_time": 0.27898165583610535, "profiling/data_time": 0.023256411775946617, "epoch": 263, "step": 8183}
529
+ {"lr-LARS/params": 0.09478802363315265, "lr-LARS/params_no_weight_decay": 0.09478802363315265, "step": 8214}
530
+ {"train_loss": 0.10167889297008514, "profiling/batch_time": 0.2789912521839142, "profiling/data_time": 0.031061941757798195, "epoch": 264, "step": 8214}
531
+ {"lr-LARS/params": 0.08965415917268495, "lr-LARS/params_no_weight_decay": 0.08965415917268495, "step": 8245}
532
+ {"train_loss": 0.10551893711090088, "profiling/batch_time": 0.27935951948165894, "profiling/data_time": 0.022684741765260696, "epoch": 265, "step": 8245}
533
+ {"lr-LARS/params": 0.08466240534155624, "lr-LARS/params_no_weight_decay": 0.08466240534155624, "step": 8276}
534
+ {"train_loss": 0.10585569590330124, "profiling/batch_time": 0.2798309624195099, "profiling/data_time": 0.02658417820930481, "epoch": 266, "step": 8276}
535
+ {"lr-LARS/params": 0.07981334807455476, "lr-LARS/params_no_weight_decay": 0.07981334807455476, "step": 8307}
536
+ {"train_loss": 0.10160145163536072, "profiling/batch_time": 0.2792998254299164, "profiling/data_time": 0.02456248551607132, "epoch": 267, "step": 8307}
537
+ {"lr-LARS/params": 0.07510755655666829, "lr-LARS/params_no_weight_decay": 0.07510755655666829, "step": 8338}
538
+ {"train_loss": 0.10741127282381058, "profiling/batch_time": 0.2794579267501831, "profiling/data_time": 0.023318085819482803, "epoch": 268, "step": 8338}
539
+ {"lr-LARS/params": 0.07054558315627286, "lr-LARS/params_no_weight_decay": 0.07054558315627286, "step": 8369}
540
+ {"train_loss": 0.1034298688173294, "profiling/batch_time": 0.2790650427341461, "profiling/data_time": 0.024248672649264336, "epoch": 269, "step": 8369}
541
+ {"lr-LARS/params": 0.06612796336029571, "lr-LARS/params_no_weight_decay": 0.06612796336029571, "step": 8400}
542
+ {"train_loss": 0.10081786662340164, "profiling/batch_time": 0.27921435236930847, "profiling/data_time": 0.023698994889855385, "epoch": 270, "step": 8400}
543
+ {"lr-LARS/params": 0.06185521571135856, "lr-LARS/params_no_weight_decay": 0.06185521571135856, "step": 8431}
544
+ {"train_loss": 0.10399844497442245, "profiling/batch_time": 0.2790531814098358, "profiling/data_time": 0.030994962900877, "epoch": 271, "step": 8431}
545
+ {"lr-LARS/params": 0.057727841746912145, "lr-LARS/params_no_weight_decay": 0.057727841746912145, "step": 8462}
546
+ {"train_loss": 0.10392473638057709, "profiling/batch_time": 0.2791461646556854, "profiling/data_time": 0.0274554081261158, "epoch": 272, "step": 8462}
547
+ {"lr-LARS/params": 0.05374632594036454, "lr-LARS/params_no_weight_decay": 0.05374632594036454, "step": 8493}
548
+ {"train_loss": 0.09866714477539062, "profiling/batch_time": 0.27925145626068115, "profiling/data_time": 0.02266130968928337, "epoch": 273, "step": 8493}
549
+ {"lr-LARS/params": 0.04991113564421311, "lr-LARS/params_no_weight_decay": 0.04991113564421311, "step": 8524}
550
+ {"train_loss": 0.10168415307998657, "profiling/batch_time": 0.27962884306907654, "profiling/data_time": 0.026800980791449547, "epoch": 274, "step": 8524}
551
+ {"lr-LARS/params": 0.046222721035188034, "lr-LARS/params_no_weight_decay": 0.046222721035188034, "step": 8555}
552
+ {"train_loss": 0.10107779502868652, "profiling/batch_time": 0.28065225481987, "profiling/data_time": 0.02369886264204979, "epoch": 275, "step": 8555}
553
+ {"lr-LARS/params": 0.04268151506140872, "lr-LARS/params_no_weight_decay": 0.04268151506140872, "step": 8586}
554
+ {"train_loss": 0.1007361114025116, "profiling/batch_time": 0.2794862389564514, "profiling/data_time": 0.025046564638614655, "epoch": 276, "step": 8586}
555
+ {"lr-LARS/params": 0.039287933391564195, "lr-LARS/params_no_weight_decay": 0.039287933391564195, "step": 8617}
556
+ {"train_loss": 0.09812585264444351, "profiling/batch_time": 0.27995210886001587, "profiling/data_time": 0.024096312001347542, "epoch": 277, "step": 8617}
557
+ {"lr-LARS/params": 0.036042374366122766, "lr-LARS/params_no_weight_decay": 0.036042374366122766, "step": 8648}
558
+ {"train_loss": 0.10638859122991562, "profiling/batch_time": 0.28024885058403015, "profiling/data_time": 0.0245086457580328, "epoch": 278, "step": 8648}
559
+ {"lr-LARS/params": 0.03294521895057377, "lr-LARS/params_no_weight_decay": 0.03294521895057377, "step": 8679}
560
+ {"train_loss": 0.10017502307891846, "profiling/batch_time": 0.2800958752632141, "profiling/data_time": 0.02573142759501934, "epoch": 279, "step": 8679}
561
+ {"lr-LARS/params": 0.029996830690709863, "lr-LARS/params_no_weight_decay": 0.029996830690709863, "step": 8710}
562
+ {"train_loss": 0.10346097499132156, "profiling/batch_time": 0.2802504003047943, "profiling/data_time": 0.02819281630218029, "epoch": 280, "step": 8710}
563
+ {"lr-LARS/params": 0.02719755566995376, "lr-LARS/params_no_weight_decay": 0.02719755566995376, "step": 8741}
564
+ {"train_loss": 0.10590995103120804, "profiling/batch_time": 0.2799489200115204, "profiling/data_time": 0.023724747821688652, "epoch": 281, "step": 8741}
565
+ {"lr-LARS/params": 0.02454772246873501, "lr-LARS/params_no_weight_decay": 0.02454772246873501, "step": 8772}
566
+ {"train_loss": 0.10326403379440308, "profiling/batch_time": 0.2797428071498871, "profiling/data_time": 0.024985121563076973, "epoch": 282, "step": 8772}
567
+ {"lr-LARS/params": 0.022047642125920152, "lr-LARS/params_no_weight_decay": 0.022047642125920152, "step": 8803}
568
+ {"train_loss": 0.10495169460773468, "profiling/batch_time": 0.279949426651001, "profiling/data_time": 0.023540405556559563, "epoch": 283, "step": 8803}
569
+ {"lr-LARS/params": 0.01969760810230426, "lr-LARS/params_no_weight_decay": 0.01969760810230426, "step": 8834}
570
+ {"train_loss": 0.10191565752029419, "profiling/batch_time": 0.28019979596138, "profiling/data_time": 0.025011485442519188, "epoch": 284, "step": 8834}
571
+ {"lr-LARS/params": 0.01749789624616345, "lr-LARS/params_no_weight_decay": 0.01749789624616345, "step": 8865}
572
+ {"train_loss": 0.098722904920578, "profiling/batch_time": 0.28012707829475403, "profiling/data_time": 0.02371852844953537, "epoch": 285, "step": 8865}
573
+ {"lr-LARS/params": 0.015448764760875533, "lr-LARS/params_no_weight_decay": 0.015448764760875533, "step": 8896}
574
+ {"train_loss": 0.10278169810771942, "profiling/batch_time": 0.2800341248512268, "profiling/data_time": 0.025719553232192993, "epoch": 286, "step": 8896}
575
+ {"lr-LARS/params": 0.013550454174612337, "lr-LARS/params_no_weight_decay": 0.013550454174612337, "step": 8927}
576
+ {"train_loss": 0.10134021192789078, "profiling/batch_time": 0.28167790174484253, "profiling/data_time": 0.029842698946595192, "epoch": 287, "step": 8927}
577
+ {"lr-LARS/params": 0.011803187312106342, "lr-LARS/params_no_weight_decay": 0.011803187312106342, "step": 8958}
578
+ {"train_loss": 0.100835420191288, "profiling/batch_time": 0.2798146903514862, "profiling/data_time": 0.027598684653639793, "epoch": 288, "step": 8958}
579
+ {"lr-LARS/params": 0.010207169268495044, "lr-LARS/params_no_weight_decay": 0.010207169268495044, "step": 8989}
580
+ {"train_loss": 0.10395936667919159, "profiling/batch_time": 0.28000032901763916, "profiling/data_time": 0.023529784753918648, "epoch": 289, "step": 8989}
581
+ {"lr-LARS/params": 0.008762587385247678, "lr-LARS/params_no_weight_decay": 0.008762587385247678, "step": 9020}
582
+ {"train_loss": 0.09977392852306366, "profiling/batch_time": 0.2798425853252411, "profiling/data_time": 0.021746685728430748, "epoch": 290, "step": 9020}
583
+ {"lr-LARS/params": 0.007469611228173464, "lr-LARS/params_no_weight_decay": 0.007469611228173464, "step": 9051}
584
+ {"train_loss": 0.09984744340181351, "profiling/batch_time": 0.2807977497577667, "profiling/data_time": 0.027246803045272827, "epoch": 291, "step": 9051}
585
+ {"lr-LARS/params": 0.0063283925675196645, "lr-LARS/params_no_weight_decay": 0.0063283925675196645, "step": 9082}
586
+ {"train_loss": 0.10013933479785919, "profiling/batch_time": 0.28046876192092896, "profiling/data_time": 0.024583306163549423, "epoch": 292, "step": 9082}
587
+ {"lr-LARS/params": 0.005339065360155056, "lr-LARS/params_no_weight_decay": 0.005339065360155056, "step": 9113}
588
+ {"train_loss": 0.10468168556690216, "profiling/batch_time": 0.28117406368255615, "profiling/data_time": 0.023828603327274323, "epoch": 293, "step": 9113}
589
+ {"lr-LARS/params": 0.00450174573384713, "lr-LARS/params_no_weight_decay": 0.00450174573384713, "step": 9144}
590
+ {"train_loss": 0.10621758550405502, "profiling/batch_time": 0.2810419797897339, "profiling/data_time": 0.024311896413564682, "epoch": 294, "step": 9144}
591
+ {"lr-LARS/params": 0.0038165319736305817, "lr-LARS/params_no_weight_decay": 0.0038165319736305817, "step": 9175}
592
+ {"train_loss": 0.10430607944726944, "profiling/batch_time": 0.28081008791923523, "profiling/data_time": 0.02483076974749565, "epoch": 295, "step": 9175}
593
+ {"lr-LARS/params": 0.003283504510270415, "lr-LARS/params_no_weight_decay": 0.003283504510270415, "step": 9206}
594
+ {"train_loss": 0.10369907319545746, "profiling/batch_time": 0.2803572714328766, "profiling/data_time": 0.027543842792510986, "epoch": 296, "step": 9206}
595
+ {"lr-LARS/params": 0.0029027259108212336, "lr-LARS/params_no_weight_decay": 0.0029027259108212336, "step": 9237}
596
+ {"train_loss": 0.105305016040802, "profiling/batch_time": 0.2802669405937195, "profiling/data_time": 0.025652233511209488, "epoch": 297, "step": 9237}
597
+ {"lr-LARS/params": 0.002674240871282573, "lr-LARS/params_no_weight_decay": 0.002674240871282573, "step": 9268}
598
+ {"train_loss": 0.10315429419279099, "profiling/batch_time": 0.28151655197143555, "profiling/data_time": 0.024249540641903877, "epoch": 298, "step": 9268}
599
+ {"lr-LARS/params": 0.002598076211353316, "lr-LARS/params_no_weight_decay": 0.002598076211353316, "step": 9299}
600
+ {"train_loss": 0.10100086778402328, "profiling/batch_time": 0.28025883436203003, "profiling/data_time": 0.02447395585477352, "epoch": 299, "step": 9299}
ssl_distil/swin/swin_distil.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39f9a127449d250fbd9c352c259abe283a9ade5a081a6a0580f5f072820e8633
3
+ size 113380529