Conna commited on
Commit
218862a
·
verified ·
1 Parent(s): 438a69e

Upload 18 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ figs/AVF-MAE++_v6_0315.png filter=lfs diff=lfs merge=lfs -text
37
+ figs/CEA-DEA.jpg filter=lfs diff=lfs merge=lfs -text
38
+ figs/MAFW-Fold5-0315.png filter=lfs diff=lfs merge=lfs -text
39
+ figs/overall_reconstruction-0317.png filter=lfs diff=lfs merge=lfs -text
40
+ figs/radar_1030.png filter=lfs diff=lfs merge=lfs -text
figs/AVF-MAE++_v6_0315.png ADDED

Git LFS Details

  • SHA256: a81203a6e7087e6796f6ba2e41a0d832610e7fcc0b804e17f19875c3a6978628
  • Pointer size: 132 Bytes
  • Size of remote file: 2.49 MB
figs/CEA-DEA.jpg ADDED

Git LFS Details

  • SHA256: c128e4aa2c6132891b18236a0e0b4867618348b1ff2f346980eeda3f20054502
  • Pointer size: 131 Bytes
  • Size of remote file: 313 kB
figs/MAFW-Fold5-0315.png ADDED

Git LFS Details

  • SHA256: 976bff4817c529acc51962f7720ab4cfd19f286dc8b4b7ab6c9543a182a30ec2
  • Pointer size: 132 Bytes
  • Size of remote file: 4.24 MB
figs/MER.jpg ADDED
figs/overall_reconstruction-0317.png ADDED

Git LFS Details

  • SHA256: 95dbfece0166f8e37d30f0def9243f2e1f801bf393cbb086b6ef91b6413a0b45
  • Pointer size: 132 Bytes
  • Size of remote file: 7.13 MB
figs/radar_1030.png ADDED

Git LFS Details

  • SHA256: cbb313e9ccad86a535a6c4e92ef16fc2e6b616dd8e1a4cdc472a0d58896098e6
  • Pointer size: 131 Bytes
  • Size of remote file: 131 kB
figs/title_final.jpg ADDED
logs/AVF-MAE++_huge-MAFW (11-class)/eval_split01/log.txt ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_lr": 5.580357142857142e-06, "train_min_lr": 3.1460435902122946e-08, "train_loss": 1.7237481711917977, "train_loss_scale": 8516.435643564357, "train_weight_decay": 0.0499999999999999, "train_grad_norm": NaN, "val_loss": 1.58385003898658, "val_acc1": 49.07407443663653, "val_acc5": 88.50762543023801, "val_uar": 0.38211505171816285, "val_war": 0.49074074074074076, "val_weighted_f1": 0.4843445785387134, "val_micro_f1": 0.49074074074074076, "val_macro_f1": 0.3828846412661885, "epoch": 0, "n_parameters": 521309229}
2
+ {"train_lr": 1.6852678571428577e-05, "train_min_lr": 9.501051642441133e-08, "train_loss": 1.691055551515554, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.181818940851948, "val_loss": 1.600970097616607, "val_acc1": 48.42047999419418, "val_acc5": 88.67102409811581, "val_uar": 0.3624778506601405, "val_war": 0.4842047930283224, "val_weighted_f1": 0.47012279796936096, "val_micro_f1": 0.4842047930283224, "val_macro_f1": 0.3556997943025081, "epoch": 1, "n_parameters": 521309229}
3
+ {"train_lr": 2.8125000000000006e-05, "train_min_lr": 1.585605969466997e-07, "train_loss": 1.694991302470563, "train_loss_scale": 7583.683168316832, "train_weight_decay": 0.0499999999999999, "train_grad_norm": Infinity, "val_loss": 1.5869658939978655, "val_acc1": 49.074074460010905, "val_acc5": 88.61655800015319, "val_uar": 0.3743537305930727, "val_war": 0.49074074074074076, "val_weighted_f1": 0.48303628719296854, "val_micro_f1": 0.49074074074074076, "val_macro_f1": 0.37325986816133677, "epoch": 2, "n_parameters": 521309229}
4
+ {"train_lr": 3.939732142857144e-05, "train_min_lr": 2.2211067746898805e-07, "train_loss": 1.7086083244765946, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.183026821306436, "val_loss": 1.6002308326024635, "val_acc1": 49.12854071691925, "val_acc5": 87.96296302945007, "val_uar": 0.35543735597173526, "val_war": 0.4912854030501089, "val_weighted_f1": 0.46778177429663975, "val_micro_f1": 0.4912854030501089, "val_macro_f1": 0.34881551980712905, "epoch": 3, "n_parameters": 521309229}
5
+ {"train_lr": 5.0669642857142856e-05, "train_min_lr": 2.856607579912764e-07, "train_loss": 1.7127124952404413, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.099005984787894, "val_loss": 1.593409547910971, "val_acc1": 47.603486243416285, "val_acc5": 88.07189593595616, "val_uar": 0.3605862490646585, "val_war": 0.4760348583877996, "val_weighted_f1": 0.4619153717752548, "val_micro_f1": 0.4760348583877996, "val_macro_f1": 0.35435368486592833, "epoch": 4, "n_parameters": 521309229}
6
+ {"train_lr": 5.624497522407655e-05, "train_min_lr": 3.170928656633569e-07, "train_loss": 1.7084255413253708, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.074381903846665, "val_loss": 1.54363729205786, "val_acc1": 49.509804225435445, "val_acc5": 89.76034878749473, "val_uar": 0.37475539161313404, "val_war": 0.4950980392156863, "val_weighted_f1": 0.48969253059084444, "val_micro_f1": 0.4950980392156863, "val_macro_f1": 0.37841457618646657, "epoch": 5, "n_parameters": 521309229}
7
+ {"train_lr": 5.621453170303479e-05, "train_min_lr": 3.169212339168842e-07, "train_loss": 1.6911084902758646, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9718827327879347, "val_loss": 1.601149216002109, "val_acc1": 48.148148747051465, "val_acc5": 89.32461891922296, "val_uar": 0.34808564720459395, "val_war": 0.48148148148148145, "val_weighted_f1": 0.4640206173522509, "val_micro_f1": 0.48148148148148145, "val_macro_f1": 0.34375067657574443, "epoch": 6, "n_parameters": 521309229}
8
+ {"train_lr": 5.615352646285501e-05, "train_min_lr": 3.1657730405733625e-07, "train_loss": 1.6812912124415043, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.121284111891643, "val_loss": 1.6106983218707291, "val_acc1": 48.093682677138084, "val_acc5": 87.58169967052983, "val_uar": 0.3636888312933551, "val_war": 0.4809368191721133, "val_weighted_f1": 0.4704242749893072, "val_micro_f1": 0.4809368191721133, "val_macro_f1": 0.35925916995034635, "epoch": 7, "n_parameters": 521309229}
9
+ {"train_lr": 5.60620262118716e-05, "train_min_lr": 3.1606145216696487e-07, "train_loss": 1.6826378216247748, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.007796169507621, "val_loss": 1.5971511286847733, "val_acc1": 48.202614751516606, "val_acc5": 88.23529460383396, "val_uar": 0.36576368042890467, "val_war": 0.4820261437908497, "val_weighted_f1": 0.4754846161586723, "val_micro_f1": 0.4820261437908497, "val_macro_f1": 0.3680992368897665, "epoch": 8, "n_parameters": 521309229}
10
+ {"train_lr": 5.5940131004265686e-05, "train_min_lr": 3.1537424232223837e-07, "train_loss": 1.6753871908085574, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.045796446280904, "val_loss": 1.595664691691305, "val_acc1": 49.400871688244386, "val_acc5": 87.7995647355622, "val_uar": 0.3901121162095245, "val_war": 0.4940087145969499, "val_weighted_f1": 0.4927368307694971, "val_micro_f1": 0.4940087145969499, "val_macro_f1": 0.397271601692481, "epoch": 9, "n_parameters": 521309229}
11
+ {"train_lr": 5.57879741306571e-05, "train_min_lr": 3.1451642597703227e-07, "train_loss": 1.6825643116884892, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.033019030448234, "val_loss": 1.5393371608327417, "val_acc1": 50.8169938583, "val_acc5": 88.50762565463197, "val_uar": 0.37126658403605367, "val_war": 0.5081699346405228, "val_weighted_f1": 0.488636472476857, "val_micro_f1": 0.5081699346405228, "val_macro_f1": 0.36283414621828125, "epoch": 10, "n_parameters": 521309229}
12
+ {"train_lr": 5.5605721972353206e-05, "train_min_lr": 3.134889411409257e-07, "train_loss": 1.668048356035755, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.098181101355222, "val_loss": 1.5873998935316123, "val_acc1": 49.50980436100679, "val_acc5": 87.2004357506247, "val_uar": 0.37281107415009107, "val_war": 0.4950980392156863, "val_weighted_f1": 0.4742162647397975, "val_micro_f1": 0.4950980392156863, "val_macro_f1": 0.37114473866076314, "epoch": 11, "n_parameters": 521309229}
13
+ {"train_lr": 5.5393573819413314e-05, "train_min_lr": 3.1229291135350213e-07, "train_loss": 1.6526003038332406, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.138172501384622, "val_loss": 1.564542821224998, "val_acc1": 50.000000299191946, "val_acc5": 88.45315907048244, "val_uar": 0.384538495284177, "val_war": 0.5, "val_weighted_f1": 0.4921018140265161, "val_micro_f1": 0.5, "val_macro_f1": 0.38643808311077665, "epoch": 12, "n_parameters": 521309229}
14
+ {"train_lr": 5.5151761652727875e-05, "train_min_lr": 3.109296444557738e-07, "train_loss": 1.6529369197859622, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.888830652331362, "val_loss": 1.5671642732386495, "val_acc1": 50.49019654124391, "val_acc5": 88.34422706155216, "val_uar": 0.37752401312684963, "val_war": 0.5049019607843137, "val_weighted_f1": 0.4919096772146955, "val_micro_f1": 0.5049019607843137, "val_macro_f1": 0.377953628357427, "epoch": 13, "n_parameters": 521309229}
15
+ {"train_lr": 5.4880549890350996e-05, "train_min_lr": 3.094006311600778e-07, "train_loss": 1.646852806751484, "train_loss_scale": 2717.1485148514853, "train_weight_decay": 0.0499999999999999, "train_grad_norm": NaN, "val_loss": 1.5982910190142838, "val_acc1": 49.78213536505606, "val_acc5": 87.14597002665202, "val_uar": 0.3847760570410927, "val_war": 0.49782135076252726, "val_weighted_f1": 0.4895111883732106, "val_micro_f1": 0.49782135076252726, "val_macro_f1": 0.3838978609089993, "epoch": 14, "n_parameters": 521309229}
16
+ {"train_lr": 5.458023509836289e-05, "train_min_lr": 3.077075434200046e-07, "train_loss": 1.6467297920102726, "train_loss_scale": 2048.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.041096352114536, "val_loss": 1.5774279780247633, "val_acc1": 49.673203103682575, "val_acc5": 89.05228753183403, "val_uar": 0.3754471900585384, "val_war": 0.49673202614379086, "val_weighted_f1": 0.4856168934568151, "val_micro_f1": 0.49673202614379086, "val_macro_f1": 0.3800162981340792, "epoch": 15, "n_parameters": 521309229}
17
+ {"train_lr": 5.425114566657945e-05, "train_min_lr": 3.058522326021405e-07, "train_loss": 1.6286095392585982, "train_loss_scale": 2048.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9685346537297317, "val_loss": 1.5641005728759019, "val_acc1": 50.653595260545316, "val_acc5": 88.45315922007842, "val_uar": 0.373700603734861, "val_war": 0.5065359477124183, "val_weighted_f1": 0.4895117267662733, "val_micro_f1": 0.5065359477124183, "val_macro_f1": 0.37105672924321187, "epoch": 16, "n_parameters": 521309229}
18
+ {"train_lr": 5.389364144946269e-05, "train_min_lr": 3.0383672746162495e-07, "train_loss": 1.6354687772568304, "train_loss_scale": 2048.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.017633383817012, "val_loss": 1.598174844302383, "val_acc1": 50.54466254570905, "val_acc5": 87.7995646607642, "val_uar": 0.38086889566297205, "val_war": 0.5054466230936819, "val_weighted_f1": 0.49614212773726935, "val_micro_f1": 0.5054466230936819, "val_macro_f1": 0.38236266915645006, "epoch": 17, "n_parameters": 521309229}
19
+ {"train_lr": 5.3508113372625154e-05, "train_min_lr": 3.0166323192373806e-07, "train_loss": 1.6465716860672035, "train_loss_scale": 2048.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.07902454857779, "val_loss": 1.5753445759707807, "val_acc1": 50.05446671037113, "val_acc5": 87.09150362949745, "val_uar": 0.3747110292885353, "val_war": 0.5005446623093682, "val_weighted_f1": 0.48621689277971414, "val_micro_f1": 0.5005446623093682, "val_macro_f1": 0.3744725041190397, "epoch": 18, "n_parameters": 521309229}
20
+ {"train_lr": 5.309498300535867e-05, "train_min_lr": 2.993341226739393e-07, "train_loss": 1.6412286574494328, "train_loss_scale": 2048.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.942697768164153, "val_loss": 1.5873478239073473, "val_acc1": 48.52941225089279, "val_acc5": 87.9084966322955, "val_uar": 0.3642026041774896, "val_war": 0.4852941176470588, "val_weighted_f1": 0.4703597271165728, "val_micro_f1": 0.4852941176470588, "val_macro_f1": 0.3624439707542557, "epoch": 19, "n_parameters": 521309229}
21
+ {"train_lr": 5.265470209965427e-05, "train_min_lr": 2.9685194655899794e-07, "train_loss": 1.6350250474297174, "train_loss_scale": 2048.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.0666176375776235, "val_loss": 1.586846705160889, "val_acc1": 50.27233154633466, "val_acc5": 88.07189556196624, "val_uar": 0.3839686266224093, "val_war": 0.5027233115468409, "val_weighted_f1": 0.4892211868948065, "val_micro_f1": 0.5027233115468409, "val_macro_f1": 0.3835595948515779, "epoch": 20, "n_parameters": 521309229}
22
+ {"train_lr": 5.218775209621823e-05, "train_min_lr": 2.9421941780205285e-07, "train_loss": 1.6164964271260567, "train_loss_scale": 2048.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.955448872972243, "val_loss": 1.5838021627243828, "val_acc1": 49.400871940687594, "val_acc5": 88.01742983799355, "val_uar": 0.37585212003110613, "val_war": 0.4940087145969499, "val_weighted_f1": 0.48502558925577577, "val_micro_f1": 0.4940087145969499, "val_macro_f1": 0.3734376808440088, "epoch": 21, "n_parameters": 521309229}
23
+ {"train_lr": 5.1694643598023545e-05, "train_min_lr": 2.9143941503464896e-07, "train_loss": 1.6180363954490562, "train_loss_scale": 2048.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.0293231175677615, "val_loss": 1.5831120020034266, "val_acc1": 49.18300686630548, "val_acc5": 88.23529445423799, "val_uar": 0.35677202982514544, "val_war": 0.4918300653594771, "val_weighted_f1": 0.4718109436134653, "val_micro_f1": 0.4918300653594771, "val_macro_f1": 0.35229493010016816, "epoch": 22, "n_parameters": 521309229}
24
+ {"train_lr": 5.117591581197337e-05, "train_min_lr": 2.88514978148997e-07, "train_loss": 1.5986210453038168, "train_loss_scale": 2048.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9412622782263425, "val_loss": 1.6039145454472186, "val_acc1": 48.80174332974004, "val_acc5": 88.01742938920563, "val_uar": 0.3699574098565443, "val_war": 0.4880174291938998, "val_weighted_f1": 0.47781770539549207, "val_micro_f1": 0.4880174291938998, "val_macro_f1": 0.3717635386480641, "epoch": 23, "n_parameters": 521309229}
25
+ {"train_lr": 5.0632135959285794e-05, "train_min_lr": 2.854493049738955e-07, "train_loss": 1.603781778426847, "train_loss_scale": 2048.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9713110073958293, "val_loss": 1.5770907255948758, "val_acc1": 49.8910679583456, "val_acc5": 87.9084976420683, "val_uar": 0.37877131883657017, "val_war": 0.4989106753812636, "val_weighted_f1": 0.48580571964728836, "val_micro_f1": 0.4989106753812636, "val_macro_f1": 0.3789440030861241, "epoch": 24, "n_parameters": 521309229}
26
+ {"train_lr": 5.0063898655246014e-05, "train_min_lr": 2.822457477779514e-07, "train_loss": 1.61152851994675, "train_loss_scale": 2048.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9388205721826837, "val_loss": 1.5803729956056558, "val_acc1": 49.782135626849005, "val_acc5": 87.90849745507334, "val_uar": 0.3903614224001146, "val_war": 0.49782135076252726, "val_weighted_f1": 0.49295925704013543, "val_micro_f1": 0.49782135076252726, "val_macro_f1": 0.39365544167185945, "epoch": 25, "n_parameters": 521309229}
27
+ {"train_lr": 4.9471825259003246e-05, "train_min_lr": 2.789078096039244e-07, "train_loss": 1.5987602676888897, "train_loss_scale": 2048.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9339909081411832, "val_loss": 1.6038791093171811, "val_acc1": 48.80174334376466, "val_acc5": 87.14596983965706, "val_uar": 0.3755683584741246, "val_war": 0.4880174291938998, "val_weighted_f1": 0.48125589229117627, "val_micro_f1": 0.4880174291938998, "val_macro_f1": 0.377757033190917, "epoch": 26, "n_parameters": 521309229}
28
+ {"train_lr": 4.885656319412359e-05, "train_min_lr": 2.754391404382005e-07, "train_loss": 1.5904297091976645, "train_loss_scale": 2048.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.034430114349516, "val_loss": 1.5711724521482693, "val_acc1": 50.7625275032193, "val_acc5": 87.90849738027535, "val_uar": 0.38898242785396153, "val_war": 0.5076252723311547, "val_weighted_f1": 0.4913223031548277, "val_micro_f1": 0.5076252723311547, "val_macro_f1": 0.389494944074443, "epoch": 27, "n_parameters": 521309229}
29
+ {"train_lr": 4.821878524064173e-05, "train_min_lr": 2.7184353321958644e-07, "train_loss": 1.6160709177306776, "train_loss_scale": 2048.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.951175895067725, "val_loss": 1.569350997022554, "val_acc1": 49.78213519676059, "val_acc5": 86.98257124657725, "val_uar": 0.3813017268472478, "val_war": 0.49782135076252726, "val_weighted_f1": 0.4824257173679099, "val_micro_f1": 0.49782135076252726, "val_macro_f1": 0.3770418048568816, "epoch": 28, "n_parameters": 521309229}
30
+ {"train_lr": 4.7559188799386115e-05, "train_min_lr": 2.6812491969178504e-07, "train_loss": 1.592667848560283, "train_loss_scale": 2048.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9167868878581737, "val_loss": 1.5495195219329758, "val_acc1": 50.381263947954366, "val_acc5": 87.96296321644502, "val_uar": 0.38281384555019005, "val_war": 0.5038126361655774, "val_weighted_f1": 0.4889424389333931, "val_micro_f1": 0.5038126361655774, "val_macro_f1": 0.37996398697530026, "epoch": 29, "n_parameters": 521309229}
31
+ {"train_lr": 4.6878495129381106e-05, "train_min_lr": 2.6428736610409306e-07, "train_loss": 1.5952794847905438, "train_loss_scale": 2048.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8901607282090893, "val_loss": 1.6020009187506694, "val_acc1": 49.23747325878517, "val_acc5": 86.87363863926308, "val_uar": 0.3758822059026724, "val_war": 0.4918300653594771, "val_weighted_f1": 0.4826941276042173, "val_micro_f1": 0.4918300653594771, "val_macro_f1": 0.3747883071993031, "epoch": 30, "n_parameters": 521309229}
32
+ {"train_lr": 4.6177448559161015e-05, "train_min_lr": 2.603350687650165e-07, "train_loss": 1.5809429204306586, "train_loss_scale": 2048.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9122225671711535, "val_loss": 1.5942358918049757, "val_acc1": 49.50980439373091, "val_acc5": 87.69063209084904, "val_uar": 0.36354525637049034, "val_war": 0.4950980392156863, "val_weighted_f1": 0.47377105914912543, "val_micro_f1": 0.4950980392156863, "val_macro_f1": 0.3574525752345108, "epoch": 31, "n_parameters": 521309229}
33
+ {"train_lr": 4.545681567285774e-05, "train_min_lr": 2.5627234945366804e-07, "train_loss": 1.580500968120279, "train_loss_scale": 2048.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9861982458888896, "val_loss": 1.6358817430103527, "val_acc1": 49.23747316996256, "val_acc5": 86.05664537467209, "val_uar": 0.37894629442391575, "val_war": 0.4923747276688453, "val_weighted_f1": 0.4809522733721267, "val_micro_f1": 0.4923747276688453, "val_macro_f1": 0.3769789825088992, "epoch": 32, "n_parameters": 521309229}
34
+ {"train_lr": 4.47173844719522e-05, "train_min_lr": 2.5210365069396683e-07, "train_loss": 1.5798233498637826, "train_loss_scale": 2048.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.017995251287328, "val_loss": 1.622238654716342, "val_acc1": 48.58387845637751, "val_acc5": 87.58169996972178, "val_uar": 0.3786267813387157, "val_war": 0.485838779956427, "val_weighted_f1": 0.48413366493524634, "val_micro_f1": 0.485838779956427, "val_macro_f1": 0.38464356561038815, "epoch": 33, "n_parameters": 521309229}
35
+ {"train_lr": 4.395996351360636e-05, "train_min_lr": 2.478335308968022e-07, "train_loss": 1.589917917161098, "train_loss_scale": 3832.3960396039606, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.1305559011969235, "val_loss": 1.5653295280302273, "val_acc1": 50.7625275219188, "val_acc5": 87.09150385389141, "val_uar": 0.38910496142179946, "val_war": 0.5076252723311547, "val_weighted_f1": 0.49720910327534157, "val_micro_f1": 0.5076252723311547, "val_macro_f1": 0.3925277187106488, "epoch": 34, "n_parameters": 521309229}
36
+ {"train_lr": 4.318538102651787e-05, "train_min_lr": 2.434666593754791e-07, "train_loss": 1.5742835491010458, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.070973660686229, "val_loss": 1.593629527910083, "val_acc1": 49.9455343134263, "val_acc5": 87.58169993232278, "val_uar": 0.39956408269367755, "val_war": 0.49945533769063183, "val_weighted_f1": 0.5016781122607962, "val_micro_f1": 0.49945533769063183, "val_macro_f1": 0.4045583686881475, "epoch": 35, "n_parameters": 521309229}
37
+ {"train_lr": 4.2394484005264044e-05, "train_min_lr": 2.390078112398925e-07, "train_loss": 1.57931388211329, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9525272515740726, "val_loss": 1.6005994987254049, "val_acc1": 49.12854075431824, "val_acc5": 86.92810499901866, "val_uar": 0.3732278854748711, "val_war": 0.4912854030501089, "val_weighted_f1": 0.4776016309557632, "val_micro_f1": 0.4912854030501089, "val_macro_f1": 0.37674269755340223, "epoch": 36, "n_parameters": 521309229}
38
+ {"train_lr": 4.158813728412575e-05, "train_min_lr": 2.3446186217501424e-07, "train_loss": 1.5862403796254212, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.946562132032791, "val_loss": 1.613977431082258, "val_acc1": 48.47494605475781, "val_acc5": 87.09150370429543, "val_uar": 0.3865087801839715, "val_war": 0.48474945533769065, "val_weighted_f1": 0.48447293780368994, "val_micro_f1": 0.48474945533769065, "val_macro_f1": 0.38401898776583904, "epoch": 37, "n_parameters": 521309229}
39
+ {"train_lr": 4.0767222591403925e-05, "train_min_lr": 2.2983378310940412e-07, "train_loss": 1.5786094099971721, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.019567709157963, "val_loss": 1.5997863902765161, "val_acc1": 49.836601757535746, "val_acc5": 87.14597010145, "val_uar": 0.3742130460876895, "val_war": 0.49836601307189543, "val_weighted_f1": 0.49071913572520826, "val_micro_f1": 0.49836601307189543, "val_macro_f1": 0.37208293443085727, "epoch": 38, "n_parameters": 521309229}
40
+ {"train_lr": 3.993263758526252e-05, "train_min_lr": 2.251286347795713e-07, "train_loss": 1.5544793981530092, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9849990240418083, "val_loss": 1.621737082507096, "val_acc1": 47.9302838082407, "val_acc5": 86.92810488682167, "val_uar": 0.3672602572019136, "val_war": 0.4793028322440087, "val_weighted_f1": 0.4710611147934126, "val_micro_f1": 0.4793028322440087, "val_macro_f1": 0.3699211240346962, "epoch": 39, "n_parameters": 521309229}
41
+ {"train_lr": 3.9085294872152475e-05, "train_min_lr": 2.203515621961343e-07, "train_loss": 1.5573609408372306, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9180067482561167, "val_loss": 1.6019039747177386, "val_acc1": 49.782135715671615, "val_acc5": 88.07189574896121, "val_uar": 0.37377462101232944, "val_war": 0.49782135076252726, "val_weighted_f1": 0.4860725881463331, "val_micro_f1": 0.49782135076252726, "val_macro_f1": 0.3743455516942278, "epoch": 40, "n_parameters": 521309229}
42
+ {"train_lr": 3.822612100889004e-05, "train_min_lr": 2.1550778901782692e-07, "train_loss": 1.557274338239097, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9737784036315316, "val_loss": 1.613198874335663, "val_acc1": 49.34640573987774, "val_acc5": 87.09150366689644, "val_uar": 0.37770860968537273, "val_war": 0.4934640522875817, "val_weighted_f1": 0.48450987289654474, "val_micro_f1": 0.4934640522875817, "val_macro_f1": 0.37507120883426925, "epoch": 41, "n_parameters": 521309229}
43
+ {"train_lr": 3.73560554894804e-05, "train_min_lr": 2.1060261183950398e-07, "train_loss": 1.5473738799590875, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.849185384146058, "val_loss": 1.6163523530258852, "val_acc1": 49.23747325878517, "val_acc5": 86.76470621894387, "val_uar": 0.3763810392911007, "val_war": 0.4923747276688453, "val_weighted_f1": 0.4811755013376228, "val_micro_f1": 0.4923747276688453, "val_macro_f1": 0.37849451026012426, "epoch": 42, "n_parameters": 521309229}
44
+ {"train_lr": 3.647604971779486e-05, "train_min_lr": 2.056413944003928e-07, "train_loss": 1.5439124816518413, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.981139626833472, "val_loss": 1.624489687237085, "val_acc1": 47.549020098704915, "val_acc5": 86.98257113438027, "val_uar": 0.3647417835645039, "val_war": 0.47549019607843135, "val_weighted_f1": 0.46726754254160724, "val_micro_f1": 0.47549019607843135, "val_macro_f1": 0.36506505749467727, "epoch": 43, "n_parameters": 521309229}
45
+ {"train_lr": 3.558706596722466e-05, "train_min_lr": 2.006295617189234e-07, "train_loss": 1.5409105533223735, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9688412175320162, "val_loss": 1.599407951037089, "val_acc1": 48.965142161238425, "val_acc5": 87.58169996972178, "val_uar": 0.36112765836910626, "val_war": 0.48965141612200436, "val_weighted_f1": 0.4756601792776614, "val_micro_f1": 0.48965141612200436, "val_macro_f1": 0.3588270846335396, "epoch": 44, "n_parameters": 521309229}
46
+ {"train_lr": 3.469007632844911e-05, "train_min_lr": 1.9557259416054976e-07, "train_loss": 1.5468214345057019, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9593726974902768, "val_loss": 1.5968213659875534, "val_acc1": 48.856209764293595, "val_acc5": 86.9281047746247, "val_uar": 0.36857296881755774, "val_war": 0.48856209150326796, "val_weighted_f1": 0.47697896989855376, "val_micro_f1": 0.48856209150326796, "val_macro_f1": 0.3691252544123175, "epoch": 45, "n_parameters": 521309229}
47
+ {"train_lr": 3.378606164646873e-05, "train_min_lr": 1.9047602144505153e-07, "train_loss": 1.5275318181947513, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.926677958800061, "val_loss": 1.600926781986274, "val_acc1": 49.23747320268669, "val_acc5": 87.36383486729042, "val_uar": 0.37859565083529206, "val_war": 0.4923747276688453, "val_weighted_f1": 0.48433897033859175, "val_micro_f1": 0.4923747276688453, "val_macro_f1": 0.37846624917593813, "epoch": 46, "n_parameters": 521309229}
48
+ {"train_lr": 3.28760104480657e-05, "train_min_lr": 1.85345416599866e-07, "train_loss": 1.5424154443119225, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.926234781151951, "val_loss": 1.596313403517592, "val_acc1": 49.40087183784036, "val_acc5": 86.98257098478429, "val_uar": 0.3736086745847085, "val_war": 0.4940087145969499, "val_weighted_f1": 0.4829349078091435, "val_micro_f1": 0.4940087145969499, "val_macro_f1": 0.37515183221477644, "epoch": 47, "n_parameters": 521309229}
49
+ {"train_lr": 3.1960917860864196e-05, "train_min_lr": 1.8018638986606443e-07, "train_loss": 1.5167197791656646, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.94800331568954, "val_loss": 1.5669558340427923, "val_acc1": 48.91067596042858, "val_acc5": 87.5272336099662, "val_uar": 0.37602604000617285, "val_war": 0.4891067538126362, "val_weighted_f1": 0.47910830417332495, "val_micro_f1": 0.4891067538126362, "val_macro_f1": 0.37685046324635557, "epoch": 48, "n_parameters": 521309229}
50
+ {"train_lr": 3.104178452517305e-05, "train_min_lr": 1.750045825636361e-07, "train_loss": 1.5427957262733194, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9609036563646676, "val_loss": 1.6262462659209382, "val_acc1": 48.63834474600998, "val_acc5": 86.38344233643775, "val_uar": 0.3742349092846318, "val_war": 0.4863834422657952, "val_weighted_f1": 0.47869881838733663, "val_micro_f1": 0.4863834422657952, "val_macro_f1": 0.3721516513026927, "epoch": 49, "n_parameters": 521309229}
51
+ {"train_lr": 3.011961549980036e-05, "train_min_lr": 1.698056609227879e-07, "train_loss": 1.5268682586674642, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.000230262775232, "val_loss": 1.605160243955313, "val_acc1": 48.58387854052525, "val_acc5": 87.03703726974189, "val_uar": 0.37778877712077974, "val_war": 0.485838779956427, "val_weighted_f1": 0.47792910031810415, "val_micro_f1": 0.485838779956427, "val_macro_f1": 0.38104386329505296, "epoch": 50, "n_parameters": 521309229}
52
+ {"train_lr": 2.919541916303608e-05, "train_min_lr": 1.6459530988800415e-07, "train_loss": 1.5273499726855715, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9863422719558868, "val_loss": 1.6273040081940444, "val_acc1": 48.474945923861334, "val_acc5": 87.63616573109346, "val_uar": 0.3676933828712394, "val_war": 0.48474945533769065, "val_weighted_f1": 0.4740529584650553, "val_micro_f1": 0.48474945533769065, "val_macro_f1": 0.3683274316005572, "epoch": 51, "n_parameters": 521309229}
53
+ {"train_lr": 2.8270206110005638e-05, "train_min_lr": 1.593792269016439e-07, "train_loss": 1.525142254805801, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.033121810101046, "val_loss": 1.626023037176506, "val_acc1": 47.87581751393337, "val_acc5": 86.54684134090648, "val_uar": 0.36732764300750376, "val_war": 0.47875816993464054, "val_weighted_f1": 0.4711974481466342, "val_micro_f1": 0.47875816993464054, "val_macro_f1": 0.37070714593791454, "epoch": 52, "n_parameters": 521309229}
54
+ {"train_lr": 2.7344988047598695e-05, "train_min_lr": 1.541631156738708e-07, "train_loss": 1.507046511562744, "train_loss_scale": 4379.881188118812, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8993440500580436, "val_loss": 1.596672512736975, "val_acc1": 49.40087192198809, "val_acc5": 87.74509837580662, "val_uar": 0.37389376308284455, "val_war": 0.4940087145969499, "val_weighted_f1": 0.4865762586153497, "val_micro_f1": 0.4940087145969499, "val_macro_f1": 0.3796035795867958, "epoch": 53, "n_parameters": 521309229}
55
+ {"train_lr": 2.6420776688182537e-05, "train_min_lr": 1.4895267994573047e-07, "train_loss": 1.5255950535878096, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9187713920479954, "val_loss": 1.6103702441734427, "val_acc1": 48.63834466186224, "val_acc5": 87.690632315243, "val_uar": 0.364627229694958, "val_war": 0.4863834422657952, "val_weighted_f1": 0.4782288795713157, "val_micro_f1": 0.4863834422657952, "val_macro_f1": 0.36639506908186054, "epoch": 54, "n_parameters": 521309229}
56
+ {"train_lr": 2.549858264330953e-05, "train_min_lr": 1.4375361725219248e-07, "train_loss": 1.5355718961053162, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.957430275359956, "val_loss": 1.5989902428552216, "val_acc1": 48.36601342406927, "val_acc5": 86.87363890105603, "val_uar": 0.371723819067332, "val_war": 0.48366013071895425, "val_weighted_f1": 0.4717602591853631, "val_micro_f1": 0.48366013071895425, "val_macro_f1": 0.37014169374447387, "epoch": 55, "n_parameters": 521309229}
57
+ {"train_lr": 2.4579414318628305e-05, "train_min_lr": 1.3857161269198084e-07, "train_loss": 1.5323928985068507, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8217687016666524, "val_loss": 1.614322101952983, "val_acc1": 48.856209600673004, "val_acc5": 86.6013074388691, "val_uar": 0.36897703007086086, "val_war": 0.48856209150326796, "val_weighted_f1": 0.47227941886436736, "val_micro_f1": 0.48856209150326796, "val_macro_f1": 0.3692513424084905, "epoch": 56, "n_parameters": 521309229}
58
+ {"train_lr": 2.3664276811206737e-05, "train_min_lr": 1.3341233271100021e-07, "train_loss": 1.514155222736176, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8355917104400032, "val_loss": 1.6121776185783685, "val_acc1": 50.05446699553845, "val_acc5": 87.30936888152478, "val_uar": 0.3847165507246827, "val_war": 0.5005446623093682, "val_weighted_f1": 0.48943355252139914, "val_micro_f1": 0.5005446623093682, "val_macro_f1": 0.38610623496237895, "epoch": 57, "n_parameters": 521309229}
59
+ {"train_lr": 2.2754170810473282e-05, "train_min_lr": 1.282814189061623e-07, "train_loss": 1.5137112065903817, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9392449005995647, "val_loss": 1.6068145837269576, "val_acc1": 49.23747302036659, "val_acc5": 87.20043608721565, "val_uar": 0.3689990701459997, "val_war": 0.4923747276688453, "val_weighted_f1": 0.4825343778579092, "val_micro_f1": 0.4923747276688453, "val_macro_f1": 0.3695405079016661, "epoch": 58, "n_parameters": 521309229}
60
+ {"train_lr": 2.1850091503977748e-05, "train_min_lr": 1.2318448185638122e-07, "train_loss": 1.5320204526874492, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9484755686014004, "val_loss": 1.6268289997881533, "val_acc1": 48.583878577924246, "val_acc5": 86.71024004618327, "val_uar": 0.3639966608788346, "val_war": 0.485838779956427, "val_weighted_f1": 0.4726578511975902, "val_micro_f1": 0.485838779956427, "val_macro_f1": 0.36487894196692794, "epoch": 59, "n_parameters": 521309229}
61
+ {"train_lr": 2.0953027489168203e-05, "train_min_lr": 1.1812709498748855e-07, "train_loss": 1.5174590547879536, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9034676764271046, "val_loss": 1.6030322646393496, "val_acc1": 48.85620971286998, "val_acc5": 87.30936858233284, "val_uar": 0.3709834797756037, "val_war": 0.48856209150326796, "val_weighted_f1": 0.47637080543465093, "val_micro_f1": 0.48856209150326796, "val_macro_f1": 0.3692175372868383, "epoch": 60, "n_parameters": 521309229}
62
+ {"train_lr": 2.006395969237425e-05, "train_min_lr": 1.1311478847777361e-07, "train_loss": 1.508485120023438, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9398216304212514, "val_loss": 1.5868184884973602, "val_acc1": 49.34640559495664, "val_acc5": 86.92810481202369, "val_uar": 0.3727110340560209, "val_war": 0.4934640522875817, "val_weighted_f1": 0.4810406356541938, "val_micro_f1": 0.4934640522875817, "val_macro_f1": 0.37334665558882696, "epoch": 61, "n_parameters": 521309229}
63
+ {"train_lr": 1.918386029617857e-05, "train_min_lr": 1.0815304321081477e-07, "train_loss": 1.5265002146412436, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9682552932512642, "val_loss": 1.6017626915492265, "val_acc1": 48.85620956794888, "val_acc5": 86.27450987871956, "val_uar": 0.36359692119777276, "val_war": 0.48856209150326796, "val_weighted_f1": 0.47502452382012694, "val_micro_f1": 0.48856209150326796, "val_macro_f1": 0.36242695422741, "epoch": 62, "n_parameters": 521309229}
64
+ {"train_lr": 1.831369167634938e-05, "train_min_lr": 1.0324728478221384e-07, "train_loss": 1.492556102795176, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8049586692658983, "val_loss": 1.6213369308149113, "val_acc1": 47.712418537513884, "val_acc5": 86.38344278522567, "val_uar": 0.3599039450593115, "val_war": 0.477124183006536, "val_weighted_f1": 0.4694286837322739, "val_micro_f1": 0.477124183006536, "val_macro_f1": 0.3612202546017008, "epoch": 63, "n_parameters": 521309229}
65
+ {"train_lr": 1.74544053494968e-05, "train_min_lr": 9.840287756678695e-08, "train_loss": 1.5041335985408757, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9845643539239863, "val_loss": 1.6029472321856255, "val_acc1": 49.78213559879976, "val_acc5": 86.4379088457893, "val_uar": 0.37695027463316044, "val_war": 0.49782135076252726, "val_weighted_f1": 0.48498753303921827, "val_micro_f1": 0.49782135076252726, "val_macro_f1": 0.37625874110692753, "epoch": 64, "n_parameters": 521309229}
66
+ {"train_lr": 1.6606940932603314e-05, "train_min_lr": 9.36251188526991e-08, "train_loss": 1.515146900226574, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9977022610088384, "val_loss": 1.6000202432567, "val_acc1": 48.80174327364155, "val_acc5": 86.49237513074688, "val_uar": 0.3753316607489756, "val_war": 0.4880174291938998, "val_weighted_f1": 0.48289204749857273, "val_micro_f1": 0.4880174291938998, "val_macro_f1": 0.3793646288566427, "epoch": 65, "n_parameters": 521309229}
67
+ {"train_lr": 1.5772225115566454e-05, "train_min_lr": 8.891923304895814e-08, "train_loss": 1.5293972162720393, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.965795630275613, "val_loss": 1.5956799878793604, "val_acc1": 48.63834472263561, "val_acc5": 86.92810544780656, "val_uar": 0.3696336843259759, "val_war": 0.4863834422657952, "val_weighted_f1": 0.4734435538928921, "val_micro_f1": 0.4863834422657952, "val_macro_f1": 0.3685792260377852, "epoch": 66, "n_parameters": 521309229}
68
+ {"train_lr": 1.4951170647876973e-05, "train_min_lr": 8.429036597259903e-08, "train_loss": 1.5052895369899548, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9806617840681926, "val_loss": 1.6111338506726658, "val_acc1": 47.44008763163698, "val_acc5": 86.98257076039033, "val_uar": 0.36091191154362096, "val_war": 0.474400871459695, "val_weighted_f1": 0.4649290006312399, "val_micro_f1": 0.474400871459695, "val_macro_f1": 0.36073329754130024, "epoch": 67, "n_parameters": 521309229}
69
+ {"train_lr": 1.4144675340540693e-05, "train_min_lr": 7.974357922180958e-08, "train_loss": 1.5040642482416071, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9013781925239184, "val_loss": 1.63357783620264, "val_acc1": 48.2570809990752, "val_acc5": 86.71023970959233, "val_uar": 0.36501437250419455, "val_war": 0.48257080610021785, "val_weighted_f1": 0.4722652416745176, "val_micro_f1": 0.48257080610021785, "val_macro_f1": 0.36577145398159566, "epoch": 68, "n_parameters": 521309229}
70
+ {"train_lr": 1.3353621084335341e-05, "train_min_lr": 7.528384464114652e-08, "train_loss": 1.497229673189692, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.865208894899576, "val_loss": 1.6222526165200215, "val_acc1": 48.20261482631459, "val_acc5": 86.27451021531049, "val_uar": 0.3684465231357116, "val_war": 0.4820261437908497, "val_weighted_f1": 0.4736702787423237, "val_micro_f1": 0.4820261437908497, "val_macro_f1": 0.37015262333468957, "epoch": 69, "n_parameters": 521309229}
71
+ {"train_lr": 1.2578872885475928e-05, "train_min_lr": 7.091603888489658e-08, "train_loss": 1.4941109438540519, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.884156352222556, "val_loss": 1.6226853178996665, "val_acc1": 49.727669323191925, "val_acc5": 86.00217920191147, "val_uar": 0.37494774273468506, "val_war": 0.49727668845315903, "val_weighted_f1": 0.4829867559078704, "val_micro_f1": 0.49727668845315903, "val_macro_f1": 0.37441552322067384, "epoch": 70, "n_parameters": 521309229}
72
+ {"train_lr": 1.1821277919743169e-05, "train_min_lr": 6.664493808452678e-08, "train_loss": 1.5005152543189109, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9283981417665386, "val_loss": 1.6236987137327008, "val_acc1": 48.583878577924246, "val_acc5": 87.09150325550752, "val_uar": 0.3685242246989328, "val_war": 0.485838779956427, "val_weighted_f1": 0.4751869442914292, "val_micro_f1": 0.485838779956427, "val_macro_f1": 0.3719072253847506, "epoch": 71, "n_parameters": 521309229}
73
+ {"train_lr": 1.1081664606109202e-05, "train_min_lr": 6.247521262605469e-08, "train_loss": 1.517581267325398, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8289963514497964, "val_loss": 1.6154740572560067, "val_acc1": 49.128540702894625, "val_acc5": 86.65577391082165, "val_uar": 0.3637063742676386, "val_war": 0.4912854030501089, "val_weighted_f1": 0.48082286387467493, "val_micro_f1": 0.4912854030501089, "val_macro_f1": 0.3632430198541205, "epoch": 72, "n_parameters": 521309229}
74
+ {"train_lr": 1.0360841700873597e-05, "train_min_lr": 5.841142204304979e-08, "train_loss": 1.4769996269898054, "train_loss_scale": 8435.326732673268, "train_weight_decay": 0.0499999999999999, "train_grad_norm": Infinity, "val_loss": 1.616112136957692, "val_acc1": 48.747277049457324, "val_acc5": 86.8191728404924, "val_uar": 0.376013913748676, "val_war": 0.4874727668845316, "val_weighted_f1": 0.47885911344184906, "val_micro_f1": 0.4874727668845316, "val_macro_f1": 0.378001222821409, "epoch": 73, "n_parameters": 521309229}
75
+ {"train_lr": 9.659597413300203e-06, "train_min_lr": 5.445801003085074e-08, "train_loss": 1.4981511698500944, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.93027390583907, "val_loss": 1.6116667725876266, "val_acc1": 49.18300695045322, "val_acc5": 86.4923750559489, "val_uar": 0.37493377086800067, "val_war": 0.4918300653594771, "val_weighted_f1": 0.4825005952643354, "val_micro_f1": 0.4918300653594771, "val_macro_f1": 0.3794825651399045, "epoch": 74, "n_parameters": 521309229}
76
+ {"train_lr": 8.978698543721923e-06, "train_min_lr": 5.0619299587449256e-08, "train_loss": 1.4980626662965655, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8383050720290384, "val_loss": 1.6038304733879425, "val_acc1": 48.9651421659133, "val_acc5": 87.36383460549747, "val_uar": 0.3664501104673182, "val_war": 0.48965141612200436, "val_weighted_f1": 0.47614361417739026, "val_micro_f1": 0.48965141612200436, "val_macro_f1": 0.3651156577864224, "epoch": 75, "n_parameters": 521309229}
77
+ {"train_lr": 8.318889645055782e-06, "train_min_lr": 4.689948828635602e-08, "train_loss": 1.463402648471763, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8741652989151456, "val_loss": 1.589971142656663, "val_acc1": 49.1830068990296, "val_acc5": 87.52723342297124, "val_uar": 0.3684490462975059, "val_war": 0.4918300653594771, "val_weighted_f1": 0.47918472119662364, "val_micro_f1": 0.4918300653594771, "val_macro_f1": 0.3661462905011329, "epoch": 76, "n_parameters": 521309229}
78
+ {"train_lr": 7.680892208645253e-06, "train_min_lr": 4.3302643686614975e-08, "train_loss": 1.4931411705788213, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9431480747638363, "val_loss": 1.6076824083632113, "val_acc1": 48.47494585373823, "val_acc5": 87.25490237217323, "val_uar": 0.3625178286377832, "val_war": 0.48474945533769065, "val_weighted_f1": 0.47240665928304865, "val_micro_f1": 0.48474945533769065, "val_macro_f1": 0.3613554567490874, "epoch": 77, "n_parameters": 521309229}
79
+ {"train_lr": 7.065403875320073e-06, "train_min_lr": 3.983269888498758e-08, "train_loss": 1.4787895357844854, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.893576709350737, "val_loss": 1.6104205689009499, "val_acc1": 48.85620959132326, "val_acc5": 86.54684104171454, "val_uar": 0.3615774522643387, "val_war": 0.48856209150326796, "val_weighted_f1": 0.4737533551803378, "val_micro_f1": 0.48856209150326796, "val_macro_f1": 0.3610713024159458, "epoch": 78, "n_parameters": 521309229}
80
+ {"train_lr": 6.473097672536222e-06, "train_min_lr": 3.649344821516926e-08, "train_loss": 1.4955433470384516, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8473851161428017, "val_loss": 1.6176820309723126, "val_acc1": 48.91067601185219, "val_acc5": 87.20043619941262, "val_uar": 0.3665565624140522, "val_war": 0.4891067538126362, "val_weighted_f1": 0.47479946869338335, "val_micro_f1": 0.4891067538126362, "val_macro_f1": 0.36635300270269655, "epoch": 79, "n_parameters": 521309229}
81
+ {"train_lr": 5.904621278430291e-06, "train_min_lr": 3.3288543098741024e-08, "train_loss": 1.475250655963476, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8957358468877206, "val_loss": 1.6088236637559592, "val_acc1": 49.29193951101864, "val_acc5": 86.1111115474327, "val_uar": 0.3782739218199997, "val_war": 0.4929193899782135, "val_weighted_f1": 0.48146455747688505, "val_micro_f1": 0.4929193899782135, "val_macro_f1": 0.37997313752735346, "epoch": 80, "n_parameters": 521309229}
82
+ {"train_lr": 5.3605963135929456e-06, "train_min_lr": 3.0221488052393765e-08, "train_loss": 1.4680663684610487, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8320570723845226, "val_loss": 1.6158911378360261, "val_acc1": 48.965142123839435, "val_acc5": 86.54684156530044, "val_uar": 0.37022004893632465, "val_war": 0.4891067538126362, "val_weighted_f1": 0.47795063448460356, "val_micro_f1": 0.4891067538126362, "val_macro_f1": 0.37092714564509444, "epoch": 81, "n_parameters": 521309229}
83
+ {"train_lr": 4.8416176613359425e-06, "train_min_lr": 2.7295636855790628e-08, "train_loss": 1.512200020425784, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8985453737844336, "val_loss": 1.6052208887595756, "val_acc1": 48.63834466186224, "val_acc5": 86.54684134090648, "val_uar": 0.3683802524803488, "val_war": 0.4863834422657952, "val_weighted_f1": 0.4746886784711306, "val_micro_f1": 0.4863834422657952, "val_macro_f1": 0.3701986525945733, "epoch": 82, "n_parameters": 521309229}
84
+ {"train_lr": 4.3482528171959625e-06, "train_min_lr": 2.4514188884258143e-08, "train_loss": 1.4762192386211734, "train_loss_scale": 6488.712871287129, "train_weight_decay": 0.0499999999999999, "train_grad_norm": Infinity, "val_loss": 1.6061948847536947, "val_acc1": 49.12854064679613, "val_acc5": 87.30936854493385, "val_uar": 0.3694564329574896, "val_war": 0.4912854030501089, "val_weighted_f1": 0.4788376996245773, "val_micro_f1": 0.4912854030501089, "val_macro_f1": 0.37198448510319065, "epoch": 83, "n_parameters": 521309229}
85
+ {"train_lr": 3.88104126838656e-06, "train_min_lr": 2.188018561031641e-08, "train_loss": 1.4929520242875165, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8636497294548713, "val_loss": 1.5946830774054808, "val_acc1": 49.01960816102869, "val_acc5": 87.14596995185403, "val_uar": 0.36985442138938085, "val_war": 0.49019607843137253, "val_weighted_f1": 0.47939380810124727, "val_micro_f1": 0.49019607843137253, "val_macro_f1": 0.373550816688426, "epoch": 84, "n_parameters": 521309229}
86
+ {"train_lr": 3.4404939038768243e-06, "train_min_lr": 1.9396507277873433e-08, "train_loss": 1.4832019544277255, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8253560113434744, "val_loss": 1.577489513392542, "val_acc1": 49.6732029727861, "val_acc5": 87.52723353516822, "val_uar": 0.3791304338897067, "val_war": 0.49673202614379086, "val_weighted_f1": 0.48858518080354313, "val_micro_f1": 0.49673202614379086, "val_macro_f1": 0.3829710749041437, "epoch": 85, "n_parameters": 521309229}
87
+ {"train_lr": 3.027092455741798e-06, "train_min_lr": 1.7065869752720735e-08, "train_loss": 1.4917065699895222, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.948688226171059, "val_loss": 1.6004070253933178, "val_acc1": 49.29193938479704, "val_acc5": 87.1459696152631, "val_uar": 0.36632000888728056, "val_war": 0.4929193899782135, "val_weighted_f1": 0.4798188863167606, "val_micro_f1": 0.4929193899782135, "val_macro_f1": 0.36649298877035036, "epoch": 86, "n_parameters": 521309229}
88
+ {"train_lr": 2.641288972395518e-06, "train_min_lr": 1.4890821552773996e-08, "train_loss": 1.4910397390917975, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9039639222739946, "val_loss": 1.6037253506627738, "val_acc1": 49.18300695980297, "val_acc5": 86.81917205511355, "val_uar": 0.3633767539265033, "val_war": 0.4918300653594771, "val_weighted_f1": 0.47637384721028686, "val_micro_f1": 0.4918300653594771, "val_macro_f1": 0.361187226061764, "epoch": 87, "n_parameters": 521309229}
89
+ {"train_lr": 2.2835053242827143e-06, "train_min_lr": 1.2873741061306107e-08, "train_loss": 1.4493111667066518, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8568904824776227, "val_loss": 1.6249047885922825, "val_acc1": 48.420479872647455, "val_acc5": 87.0370371575449, "val_uar": 0.36178163766952776, "val_war": 0.4842047930283224, "val_weighted_f1": 0.4719874897451514, "val_micro_f1": 0.4842047930283224, "val_macro_f1": 0.3632430810174333, "epoch": 88, "n_parameters": 521309229}
90
+ {"train_lr": 1.954132742569673e-06, "train_min_lr": 1.1016833926220049e-08, "train_loss": 1.5141466390771834, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.84987739525219, "val_loss": 1.5899019971782087, "val_acc1": 50.054466677647014, "val_acc5": 87.30936832053989, "val_uar": 0.37355922805867536, "val_war": 0.5005446623093682, "val_weighted_f1": 0.48584831409763923, "val_micro_f1": 0.5005446623093682, "val_macro_f1": 0.3737875685978429, "epoch": 89, "n_parameters": 521309229}
91
+ {"train_lr": 1.6535313913386933e-06, "train_min_lr": 9.322130648205152e-09, "train_loss": 1.475954671995868, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.841396619777868, "val_loss": 1.6177038377990909, "val_acc1": 48.420479760450476, "val_acc5": 87.20043619941262, "val_uar": 0.3607294401528622, "val_war": 0.4842047930283224, "val_weighted_f1": 0.47125471704116684, "val_micro_f1": 0.4842047930283224, "val_macro_f1": 0.35851541927908753, "epoch": 90, "n_parameters": 521309229}
92
+ {"train_lr": 1.3820299737539636e-06, "train_min_lr": 7.791484360414586e-09, "train_loss": 1.4812117698955851, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.817079197062124, "val_loss": 1.585795219038047, "val_acc1": 49.8910680845672, "val_acc5": 87.7450985254026, "val_uar": 0.3683988608244649, "val_war": 0.4989106753812636, "val_weighted_f1": 0.4849795299050937, "val_micro_f1": 0.4989106753812636, "val_macro_f1": 0.36666486276442295, "epoch": 91, "n_parameters": 521309229}
93
+ {"train_lr": 1.139925372629472e-06, "train_min_lr": 6.426568802091309e-09, "train_loss": 1.4679873779465262, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8640128267873632, "val_loss": 1.601357518457899, "val_acc1": 49.29193957179201, "val_acc5": 86.71023997138528, "val_uar": 0.36959465820636295, "val_war": 0.4929193899782135, "val_weighted_f1": 0.47814718813125295, "val_micro_f1": 0.4929193899782135, "val_macro_f1": 0.36775461243042484, "epoch": 92, "n_parameters": 521309229}
94
+ {"train_lr": 9.274823257919996e-07, "train_min_lr": 5.22887648835886e-09, "train_loss": 1.469045978863247, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8330779712979157, "val_loss": 1.625100602998453, "val_acc1": 48.801743484010885, "val_acc5": 87.03703741933785, "val_uar": 0.37167228059465235, "val_war": 0.4880174291938998, "val_weighted_f1": 0.47780186778259714, "val_micro_f1": 0.4880174291938998, "val_macro_f1": 0.37286818692410334, "epoch": 93, "n_parameters": 521309229}
95
+ {"train_lr": 7.449331365942088e-07, "train_min_lr": 4.1997170781779625e-09, "train_loss": 1.4856834542436568, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.812819478535416, "val_loss": 1.5906552906129874, "val_acc1": 48.91067605392605, "val_acc5": 88.12636192172181, "val_uar": 0.36712356566263793, "val_war": 0.4891067538126362, "val_weighted_f1": 0.4772121171768467, "val_micro_f1": 0.4891067538126362, "val_macro_f1": 0.36694803417092026, "epoch": 94, "n_parameters": 521309229}
96
+ {"train_lr": 5.924774198943271e-07, "train_min_lr": 3.340215942253686e-09, "train_loss": 1.4852645459151503, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.922587845585134, "val_loss": 1.6136191008137721, "val_acc1": 48.96514211916456, "val_acc5": 85.94771306654987, "val_uar": 0.37147889971833026, "val_war": 0.48965141612200436, "val_weighted_f1": 0.47952702804312125, "val_micro_f1": 0.48965141612200436, "val_macro_f1": 0.37266429010753704, "epoch": 95, "n_parameters": 521309229}
97
+ {"train_lr": 4.7028188378023254e-07, "train_min_lr": 2.6513129324590976e-09, "train_loss": 1.4898376617297875, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.885425860338872, "val_loss": 1.5901767526771509, "val_acc1": 49.128540866515216, "val_acc5": 86.98257139617321, "val_uar": 0.3633195979042741, "val_war": 0.4912854030501089, "val_weighted_f1": 0.47782535472931287, "val_micro_f1": 0.4912854030501089, "val_macro_f1": 0.36166255625706883, "epoch": 96, "n_parameters": 521309229}
98
+ {"train_lr": 3.7848014727660956e-07, "train_min_lr": 2.1337613541210327e-09, "train_loss": 1.476600962896945, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8223320446392095, "val_loss": 1.6064413622313856, "val_acc1": 48.69281092344546, "val_acc5": 87.30936854493385, "val_uar": 0.36937563887359054, "val_war": 0.4869281045751634, "val_weighted_f1": 0.47618427439922323, "val_micro_f1": 0.4869281045751634, "val_macro_f1": 0.3720267688273774, "epoch": 97, "n_parameters": 521309229}
99
+ {"train_lr": 3.171725942345054e-07, "train_min_lr": 1.7881271422917885e-09, "train_loss": 1.4797241456634533, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8523200242826254, "val_loss": 1.6110170325812172, "val_acc1": 48.85620956794888, "val_acc5": 86.76470588235294, "val_uar": 0.3685026301649781, "val_war": 0.48856209150326796, "val_weighted_f1": 0.47952276621913675, "val_micro_f1": 0.48856209150326796, "val_macro_f1": 0.3711016717206285, "epoch": 98, "n_parameters": 521309229}
100
+ {"train_lr": 2.8642626356306314e-07, "train_min_lr": 1.6147882429074497e-09, "train_loss": 1.4923429390778242, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.7964530109178902, "val_loss": 1.6033909595480151, "val_acc1": 48.58387835353029, "val_acc5": 87.03703734453987, "val_uar": 0.3634821050801724, "val_war": 0.485838779956427, "val_weighted_f1": 0.4736954357787687, "val_micro_f1": 0.485838779956427, "val_macro_f1": 0.3636000707564832, "epoch": 99, "n_parameters": 521309229}
101
+ Evaluation on the test set using best epoch model:
102
+ {"Final Top-1 (best epoch)": 50.98039215686274, "Final Top-5 (best epoch)": 88.34422657952071}
103
+ Final UAR: 37.56%, Final WAR: 50.98%
104
+ Final Confusion Matrix:
105
+ [[170 14 0 6 62 7 6 0 12 0 0]
106
+ [ 17 40 0 7 44 5 1 0 13 1 0]
107
+ [ 12 2 22 1 18 19 41 0 10 0 0]
108
+ [ 3 4 0 208 26 4 2 1 1 0 0]
109
+ [ 16 2 0 5 170 5 11 0 17 1 1]
110
+ [ 7 10 5 8 52 158 6 0 45 2 1]
111
+ [ 19 10 8 13 39 10 97 0 16 1 0]
112
+ [ 5 1 1 8 26 1 1 0 4 1 0]
113
+ [ 25 13 0 3 46 13 14 1 67 1 1]
114
+ [ 4 3 0 2 18 5 2 2 14 2 1]
115
+ [ 0 1 0 2 13 6 2 0 10 1 2]]
116
+ Final Class Accuracies: ['61.37%', '31.25%', '17.60%', '83.53%', '74.56%', '53.74%', '45.54%', '0.00%', '36.41%', '3.77%', '5.41%']
117
+ Final Weighted F1: 0.4933, Final Micro F1: 0.5098, Final Macro F1: 0.3722
logs/AVF-MAE++_huge-MAFW (11-class)/eval_split02/log.txt ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_lr": 5.580357142857142e-06, "train_min_lr": 3.1460435902122946e-08, "train_loss": 1.7282040252937343, "train_loss_scale": 8516.435643564357, "train_weight_decay": 0.0499999999999999, "train_grad_norm": NaN, "val_loss": 1.4721026812113969, "val_acc1": 53.267974273831236, "val_acc5": 90.03267998788871, "val_uar": 0.41502369056670096, "val_war": 0.5326797385620915, "val_weighted_f1": 0.5176303735969595, "val_micro_f1": 0.5326797385620915, "val_macro_f1": 0.41026483927078494, "epoch": 0, "n_parameters": 521309229}
2
+ {"train_lr": 1.6852678571428577e-05, "train_min_lr": 9.501051642441133e-08, "train_loss": 1.6971802047573694, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.261071958164178, "val_loss": 1.4560740368039, "val_acc1": 54.684096551408956, "val_acc5": 90.3594773236443, "val_uar": 0.4213181890015195, "val_war": 0.5468409586056645, "val_weighted_f1": 0.5271105486721167, "val_micro_f1": 0.5468409586056645, "val_macro_f1": 0.4177654007930736, "epoch": 1, "n_parameters": 521309229}
3
+ {"train_lr": 2.8125000000000006e-05, "train_min_lr": 1.585605969466997e-07, "train_loss": 1.696722159094543, "train_loss_scale": 7908.118811881188, "train_weight_decay": 0.0499999999999999, "train_grad_norm": Infinity, "val_loss": 1.4838203594965094, "val_acc1": 52.668845826504274, "val_acc5": 89.43355156393612, "val_uar": 0.3974571884135275, "val_war": 0.5266884531590414, "val_weighted_f1": 0.5036880108588325, "val_micro_f1": 0.5266884531590414, "val_macro_f1": 0.39291083695760726, "epoch": 2, "n_parameters": 521309229}
4
+ {"train_lr": 3.939732142857144e-05, "train_min_lr": 2.2211067746898805e-07, "train_loss": 1.704375550396765, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.191606328038886, "val_loss": 1.4619728303423114, "val_acc1": 53.92156906688915, "val_acc5": 89.8148152220483, "val_uar": 0.4201380317852923, "val_war": 0.5392156862745098, "val_weighted_f1": 0.5192237682452528, "val_micro_f1": 0.5392156862745098, "val_macro_f1": 0.41569132420316174, "epoch": 3, "n_parameters": 521309229}
5
+ {"train_lr": 5.0669642857142856e-05, "train_min_lr": 2.856607579912764e-07, "train_loss": 1.716554695918615, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.224815328522484, "val_loss": 1.4834713024251602, "val_acc1": 53.59477157686271, "val_acc5": 90.19607884276147, "val_uar": 0.4111402746374153, "val_war": 0.5359477124183006, "val_weighted_f1": 0.5162108728421021, "val_micro_f1": 0.5359477124183006, "val_macro_f1": 0.4002854937131861, "epoch": 4, "n_parameters": 521309229}
6
+ {"train_lr": 5.624497522407655e-05, "train_min_lr": 3.170928656633569e-07, "train_loss": 1.704501446127498, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.222457737025648, "val_loss": 1.4625361143958335, "val_acc1": 54.52069766848695, "val_acc5": 90.19607888016046, "val_uar": 0.4156519938432599, "val_war": 0.5452069716775599, "val_weighted_f1": 0.516522472105803, "val_micro_f1": 0.5452069716775599, "val_macro_f1": 0.4038915115465693, "epoch": 5, "n_parameters": 521309229}
7
+ {"train_lr": 5.621453170303479e-05, "train_min_lr": 3.169212339168842e-07, "train_loss": 1.6924676153526055, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.202953579402206, "val_loss": 1.4692227998200584, "val_acc1": 52.39651474765703, "val_acc5": 90.63180856143727, "val_uar": 0.39865114664047635, "val_war": 0.5239651416122004, "val_weighted_f1": 0.5092786965955257, "val_micro_f1": 0.5239651416122004, "val_macro_f1": 0.40019922672470515, "epoch": 6, "n_parameters": 521309229}
8
+ {"train_lr": 5.615352646285501e-05, "train_min_lr": 3.1657730405733625e-07, "train_loss": 1.6801205060072857, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.160629702086496, "val_loss": 1.4408476229976206, "val_acc1": 53.97603561363968, "val_acc5": 90.90413987402822, "val_uar": 0.41994643255949754, "val_war": 0.539760348583878, "val_weighted_f1": 0.5311457721409134, "val_micro_f1": 0.539760348583878, "val_macro_f1": 0.4224681540535826, "epoch": 7, "n_parameters": 521309229}
9
+ {"train_lr": 5.60620262118716e-05, "train_min_lr": 3.1606145216696487e-07, "train_loss": 1.6736505051257193, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.260123026252973, "val_loss": 1.4648316812281514, "val_acc1": 53.48583937626259, "val_acc5": 90.57734250087364, "val_uar": 0.41087457566909563, "val_war": 0.5348583877995643, "val_weighted_f1": 0.5162510025016657, "val_micro_f1": 0.5348583877995643, "val_macro_f1": 0.4059471255713185, "epoch": 8, "n_parameters": 521309229}
10
+ {"train_lr": 5.5940131004265686e-05, "train_min_lr": 3.1537424232223837e-07, "train_loss": 1.6822078170949477, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.244682512661018, "val_loss": 1.4378680709530325, "val_acc1": 53.37690687647053, "val_acc5": 91.39433572806564, "val_uar": 0.41282818650095826, "val_war": 0.5337690631808278, "val_weighted_f1": 0.5139430275248599, "val_micro_f1": 0.5337690631808278, "val_macro_f1": 0.41122029937473015, "epoch": 9, "n_parameters": 521309229}
11
+ {"train_lr": 5.57879741306571e-05, "train_min_lr": 3.1451642597703227e-07, "train_loss": 1.6745407917711994, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.075029606866364, "val_loss": 1.4707389021036672, "val_acc1": 53.376906825046916, "val_acc5": 89.43355130214317, "val_uar": 0.41412753789498163, "val_war": 0.5337690631808278, "val_weighted_f1": 0.5188334085570653, "val_micro_f1": 0.5337690631808278, "val_macro_f1": 0.4133991385410336, "epoch": 10, "n_parameters": 521309229}
12
+ {"train_lr": 5.5605721972353206e-05, "train_min_lr": 3.134889411409257e-07, "train_loss": 1.6561326134716323, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.00739057229297, "val_loss": 1.4815734963791043, "val_acc1": 54.35729908475689, "val_acc5": 90.08714634764428, "val_uar": 0.4063972953784992, "val_war": 0.5435729847494554, "val_weighted_f1": 0.5182244858835883, "val_micro_f1": 0.5435729847494554, "val_macro_f1": 0.3989209619513736, "epoch": 11, "n_parameters": 521309229}
13
+ {"train_lr": 5.5393573819413314e-05, "train_min_lr": 3.1229291135350213e-07, "train_loss": 1.6410265748847042, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.028602227126018, "val_loss": 1.4660400175580792, "val_acc1": 52.83224481227351, "val_acc5": 90.1960786557665, "val_uar": 0.40380619748418345, "val_war": 0.528322440087146, "val_weighted_f1": 0.5073676601157237, "val_micro_f1": 0.528322440087146, "val_macro_f1": 0.40192714323106316, "epoch": 12, "n_parameters": 521309229}
14
+ {"train_lr": 5.5151761652727875e-05, "train_min_lr": 3.109296444557738e-07, "train_loss": 1.6465361115365926, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9808070258338852, "val_loss": 1.456463681716545, "val_acc1": 54.03050168355306, "val_acc5": 91.23093717238483, "val_uar": 0.40463932790950397, "val_war": 0.5403050108932462, "val_weighted_f1": 0.5213543450386386, "val_micro_f1": 0.5403050108932462, "val_macro_f1": 0.3996246521242298, "epoch": 13, "n_parameters": 521309229}
15
+ {"train_lr": 5.4880549890350996e-05, "train_min_lr": 3.094006311600778e-07, "train_loss": 1.647027848183912, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.116349373713578, "val_loss": 1.424319898088773, "val_acc1": 54.84749516318826, "val_acc5": 90.35947769763423, "val_uar": 0.4153463366066779, "val_war": 0.5484749455337691, "val_weighted_f1": 0.5285056714224694, "val_micro_f1": 0.5484749455337691, "val_macro_f1": 0.4107204840346346, "epoch": 14, "n_parameters": 521309229}
16
+ {"train_lr": 5.458023509836289e-05, "train_min_lr": 3.077075434200046e-07, "train_loss": 1.6304935047728788, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.1102721053775, "val_loss": 1.4669889851528055, "val_acc1": 53.48583927341536, "val_acc5": 89.7058827643301, "val_uar": 0.40330824536332627, "val_war": 0.5348583877995643, "val_weighted_f1": 0.5151043142569329, "val_micro_f1": 0.5348583877995643, "val_macro_f1": 0.3980821382771474, "epoch": 15, "n_parameters": 521309229}
17
+ {"train_lr": 5.425114566657945e-05, "train_min_lr": 3.058522326021405e-07, "train_loss": 1.6416685286134776, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.053899071004131, "val_loss": 1.4386905864173292, "val_acc1": 55.119826279434506, "val_acc5": 90.90413987402822, "val_uar": 0.42397974042630715, "val_war": 0.5517429193899782, "val_weighted_f1": 0.5411397908412142, "val_micro_f1": 0.5517429193899782, "val_macro_f1": 0.4231735316549317, "epoch": 16, "n_parameters": 521309229}
18
+ {"train_lr": 5.389364144946269e-05, "train_min_lr": 3.0383672746162495e-07, "train_loss": 1.6356125137593487, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.254091274620283, "val_loss": 1.4521329811975068, "val_acc1": 54.90196142477148, "val_acc5": 89.76034863789876, "val_uar": 0.4236431244774751, "val_war": 0.5490196078431373, "val_weighted_f1": 0.5274424578058537, "val_micro_f1": 0.5490196078431373, "val_macro_f1": 0.41989191367799356, "epoch": 17, "n_parameters": 521309229}
19
+ {"train_lr": 5.3508113372625154e-05, "train_min_lr": 3.0166323192373806e-07, "train_loss": 1.6151320576274355, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.070639206631349, "val_loss": 1.4895524487775915, "val_acc1": 52.178649734048285, "val_acc5": 89.92374775456447, "val_uar": 0.3953513209633409, "val_war": 0.5217864923747276, "val_weighted_f1": 0.5091896644849797, "val_micro_f1": 0.5217864923747276, "val_macro_f1": 0.39126033430624485, "epoch": 18, "n_parameters": 521309229}
20
+ {"train_lr": 5.309498300535867e-05, "train_min_lr": 2.993341226739393e-07, "train_loss": 1.6340550507846052, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.07582694941228, "val_loss": 1.4477235575517018, "val_acc1": 52.99564320433374, "val_acc5": 90.14161285699582, "val_uar": 0.40089715298193174, "val_war": 0.5299564270152506, "val_weighted_f1": 0.5122407127385835, "val_micro_f1": 0.5299564270152506, "val_macro_f1": 0.3971598726525972, "epoch": 19, "n_parameters": 521309229}
21
+ {"train_lr": 5.265470209965427e-05, "train_min_lr": 2.9685194655899794e-07, "train_loss": 1.6247020517245379, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.12526549915276, "val_loss": 1.477826518755333, "val_acc1": 53.26797425513174, "val_acc5": 89.10675400378658, "val_uar": 0.399102458446389, "val_war": 0.5326797385620915, "val_weighted_f1": 0.5037271165273906, "val_micro_f1": 0.5326797385620915, "val_macro_f1": 0.38724619005217653, "epoch": 20, "n_parameters": 521309229}
22
+ {"train_lr": 5.218775209621823e-05, "train_min_lr": 2.9421941780205285e-07, "train_loss": 1.6201439225437617, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.153902648699166, "val_loss": 1.472201742085756, "val_acc1": 53.97603547339346, "val_acc5": 89.27015278386135, "val_uar": 0.400374744371503, "val_war": 0.539760348583878, "val_weighted_f1": 0.5188992199602245, "val_micro_f1": 0.539760348583878, "val_macro_f1": 0.3943907783257322, "epoch": 21, "n_parameters": 521309229}
23
+ {"train_lr": 5.1694643598023545e-05, "train_min_lr": 2.9143941503464896e-07, "train_loss": 1.6164924526962117, "train_loss_scale": 5190.970297029703, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.128592337712203, "val_loss": 1.463372328117782, "val_acc1": 54.7930289810779, "val_acc5": 89.48801781149471, "val_uar": 0.4206076186145055, "val_war": 0.5479302832244008, "val_weighted_f1": 0.5268819836528124, "val_micro_f1": 0.5479302832244008, "val_macro_f1": 0.4190149697161888, "epoch": 22, "n_parameters": 521309229}
24
+ {"train_lr": 5.117591581197337e-05, "train_min_lr": 2.88514978148997e-07, "train_loss": 1.6082892998216962, "train_loss_scale": 4704.316831683168, "train_weight_decay": 0.0499999999999999, "train_grad_norm": Infinity, "val_loss": 1.4685249451328726, "val_acc1": 54.411765336990356, "val_acc5": 89.54248413385129, "val_uar": 0.41253654260649675, "val_war": 0.5441176470588235, "val_weighted_f1": 0.5250189656436258, "val_micro_f1": 0.5441176470588235, "val_macro_f1": 0.40688147906363764, "epoch": 23, "n_parameters": 521309229}
25
+ {"train_lr": 5.0632135959285794e-05, "train_min_lr": 2.854493049738955e-07, "train_loss": 1.6031167083250808, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.05484140745484, "val_loss": 1.4845982240695579, "val_acc1": 52.45098080354578, "val_acc5": 89.54248394685634, "val_uar": 0.40050512941917293, "val_war": 0.5245098039215687, "val_weighted_f1": 0.5127896635945847, "val_micro_f1": 0.5245098039215687, "val_macro_f1": 0.3960992868355536, "epoch": 24, "n_parameters": 521309229}
26
+ {"train_lr": 5.0063898655246014e-05, "train_min_lr": 2.822457477779514e-07, "train_loss": 1.6135899812081467, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.064863870639612, "val_loss": 1.4814327508211136, "val_acc1": 53.48583947443495, "val_acc5": 88.50762599122291, "val_uar": 0.4074055710328197, "val_war": 0.5348583877995643, "val_weighted_f1": 0.5193763704009587, "val_micro_f1": 0.5348583877995643, "val_macro_f1": 0.40572932810425455, "epoch": 25, "n_parameters": 521309229}
27
+ {"train_lr": 4.9471825259003246e-05, "train_min_lr": 2.789078096039244e-07, "train_loss": 1.6033517373080302, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.092233856125634, "val_loss": 1.4765800579505808, "val_acc1": 53.540305474225214, "val_acc5": 88.9433552611108, "val_uar": 0.40388821049024726, "val_war": 0.5354030501089324, "val_weighted_f1": 0.5097582640044817, "val_micro_f1": 0.5354030501089324, "val_macro_f1": 0.39879343554636065, "epoch": 26, "n_parameters": 521309229}
28
+ {"train_lr": 4.885656319412359e-05, "train_min_lr": 2.754391404382005e-07, "train_loss": 1.5863063918678675, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.153071606513297, "val_loss": 1.476696214839524, "val_acc1": 52.34204836920196, "val_acc5": 90.08714612325032, "val_uar": 0.39296926582685954, "val_war": 0.5234204793028322, "val_weighted_f1": 0.5061348029901567, "val_micro_f1": 0.5234204793028322, "val_macro_f1": 0.3884038084396307, "epoch": 27, "n_parameters": 521309229}
29
+ {"train_lr": 4.821878524064173e-05, "train_min_lr": 2.7184353321958644e-07, "train_loss": 1.6031758261985904, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.009319522593281, "val_loss": 1.5291305272018207, "val_acc1": 50.27233140608843, "val_acc5": 89.54248394685634, "val_uar": 0.3776592558592647, "val_war": 0.5027233115468409, "val_weighted_f1": 0.49214097453542066, "val_micro_f1": 0.5027233115468409, "val_macro_f1": 0.37462558836226706, "epoch": 28, "n_parameters": 521309229}
30
+ {"train_lr": 4.7559188799386115e-05, "train_min_lr": 2.6812491969178504e-07, "train_loss": 1.5970683738146678, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.009418272736049, "val_loss": 1.4868771772758633, "val_acc1": 52.34204863099491, "val_acc5": 90.35947766023524, "val_uar": 0.40343718353092856, "val_war": 0.5234204793028322, "val_weighted_f1": 0.5118152955403649, "val_micro_f1": 0.5234204793028322, "val_macro_f1": 0.4048661441328058, "epoch": 29, "n_parameters": 521309229}
31
+ {"train_lr": 4.6878495129381106e-05, "train_min_lr": 2.6428736610409306e-07, "train_loss": 1.6021570864290293, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.035239597358326, "val_loss": 1.4593520538479674, "val_acc1": 53.81263669331869, "val_acc5": 90.08714638504327, "val_uar": 0.4051077499219269, "val_war": 0.5381263616557734, "val_weighted_f1": 0.5197211308807291, "val_micro_f1": 0.5381263616557734, "val_macro_f1": 0.3970410519653748, "epoch": 30, "n_parameters": 521309229}
32
+ {"train_lr": 4.6177448559161015e-05, "train_min_lr": 2.603350687650165e-07, "train_loss": 1.5833380958034653, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.103228358939143, "val_loss": 1.4654878074047613, "val_acc1": 53.267974400052836, "val_acc5": 90.19607880536248, "val_uar": 0.41056024011010106, "val_war": 0.5326797385620915, "val_weighted_f1": 0.5181095485228675, "val_micro_f1": 0.5326797385620915, "val_macro_f1": 0.4101106483196485, "epoch": 31, "n_parameters": 521309229}
33
+ {"train_lr": 4.545681567285774e-05, "train_min_lr": 2.5627234945366804e-07, "train_loss": 1.5813495818734562, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9766245006334664, "val_loss": 1.4995979812799716, "val_acc1": 52.01525078567804, "val_acc5": 89.43355137694115, "val_uar": 0.40279594375264577, "val_war": 0.5201525054466231, "val_weighted_f1": 0.5096043871545198, "val_micro_f1": 0.5201525054466231, "val_macro_f1": 0.3961958858191804, "epoch": 32, "n_parameters": 521309229}
34
+ {"train_lr": 4.47173844719522e-05, "train_min_lr": 2.5210365069396683e-07, "train_loss": 1.5854418081025479, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.1094513057482125, "val_loss": 1.5059487618067686, "val_acc1": 51.68845370236565, "val_acc5": 88.72549045787139, "val_uar": 0.39434017784909253, "val_war": 0.5168845315904139, "val_weighted_f1": 0.5014991498652295, "val_micro_f1": 0.5168845315904139, "val_macro_f1": 0.38738672552455417, "epoch": 33, "n_parameters": 521309229}
35
+ {"train_lr": 4.395996351360636e-05, "train_min_lr": 2.478335308968022e-07, "train_loss": 1.5899681652536486, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.03007693337922, "val_loss": 1.4770796369103825, "val_acc1": 53.322440516714956, "val_acc5": 89.32461895662196, "val_uar": 0.41472692426107804, "val_war": 0.5332244008714597, "val_weighted_f1": 0.5201907984783827, "val_micro_f1": 0.5332244008714597, "val_macro_f1": 0.41226609943436937, "epoch": 34, "n_parameters": 521309229}
36
+ {"train_lr": 4.318538102651787e-05, "train_min_lr": 2.434666593754791e-07, "train_loss": 1.5817123489214642, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.071141797717255, "val_loss": 1.4960230404839796, "val_acc1": 52.23311564034107, "val_acc5": 88.45315933227539, "val_uar": 0.3996194586716783, "val_war": 0.5223311546840959, "val_weighted_f1": 0.5051965439302118, "val_micro_f1": 0.5223311546840959, "val_macro_f1": 0.39097145850112397, "epoch": 35, "n_parameters": 521309229}
37
+ {"train_lr": 4.2394484005264044e-05, "train_min_lr": 2.390078112398925e-07, "train_loss": 1.579787764218774, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9938392875218156, "val_loss": 1.5035384735640358, "val_acc1": 53.050109250872744, "val_acc5": 89.16122002695121, "val_uar": 0.4074650758021692, "val_war": 0.5305010893246187, "val_weighted_f1": 0.5171387704681529, "val_micro_f1": 0.5305010893246187, "val_macro_f1": 0.402306084573387, "epoch": 36, "n_parameters": 521309229}
38
+ {"train_lr": 4.158813728412575e-05, "train_min_lr": 2.3446186217501424e-07, "train_loss": 1.5741734342409832, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9825759028444194, "val_loss": 1.5038349765772914, "val_acc1": 51.85185220194798, "val_acc5": 88.94335552290374, "val_uar": 0.39183656007971546, "val_war": 0.5185185185185185, "val_weighted_f1": 0.49686264411020076, "val_micro_f1": 0.5185185185185185, "val_macro_f1": 0.38141101344489836, "epoch": 37, "n_parameters": 521309229}
39
+ {"train_lr": 4.0767222591403925e-05, "train_min_lr": 2.2983378310940412e-07, "train_loss": 1.5691576627614867, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.0837100307540135, "val_loss": 1.474099284293605, "val_acc1": 53.26797421773275, "val_acc5": 88.99782173306335, "val_uar": 0.3994000004005551, "val_war": 0.5326797385620915, "val_weighted_f1": 0.5149880695993483, "val_micro_f1": 0.5326797385620915, "val_macro_f1": 0.391780402847753, "epoch": 38, "n_parameters": 521309229}
40
+ {"train_lr": 3.993263758526252e-05, "train_min_lr": 2.251286347795713e-07, "train_loss": 1.5520710502520647, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.135250979130811, "val_loss": 1.4585137782143611, "val_acc1": 53.86710306709888, "val_acc5": 89.8148152220483, "val_uar": 0.40723501920785293, "val_war": 0.5386710239651417, "val_weighted_f1": 0.5183625619427727, "val_micro_f1": 0.5386710239651417, "val_macro_f1": 0.3992167088284888, "epoch": 39, "n_parameters": 521309229}
41
+ {"train_lr": 3.9085294872152475e-05, "train_min_lr": 2.203515621961343e-07, "train_loss": 1.5451874896244642, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.038251692705815, "val_loss": 1.523034890492757, "val_acc1": 51.03485889528312, "val_acc5": 88.83442351397346, "val_uar": 0.39680610324185406, "val_war": 0.5103485838779956, "val_weighted_f1": 0.5058454836393402, "val_micro_f1": 0.5103485838779956, "val_macro_f1": 0.3956613513187055, "epoch": 40, "n_parameters": 521309229}
42
+ {"train_lr": 3.822612100889004e-05, "train_min_lr": 2.1550778901782692e-07, "train_loss": 1.554201501332494, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.043004932970104, "val_loss": 1.4958549691181557, "val_acc1": 52.886710751290416, "val_acc5": 89.76034875009574, "val_uar": 0.3951735318339587, "val_war": 0.5288671023965141, "val_weighted_f1": 0.5150001507536323, "val_micro_f1": 0.5288671023965141, "val_macro_f1": 0.39075363993206186, "epoch": 41, "n_parameters": 521309229}
43
+ {"train_lr": 3.73560554894804e-05, "train_min_lr": 2.1060261183950398e-07, "train_loss": 1.562386593114425, "train_loss_scale": 4298.772277227723, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.0214167868736945, "val_loss": 1.483561661897921, "val_acc1": 52.99564314356037, "val_acc5": 89.54248413385129, "val_uar": 0.4070577556322352, "val_war": 0.5299564270152506, "val_weighted_f1": 0.5156201959714086, "val_micro_f1": 0.5299564270152506, "val_macro_f1": 0.40368118315590185, "epoch": 42, "n_parameters": 521309229}
44
+ {"train_lr": 3.647604971779486e-05, "train_min_lr": 2.056413944003928e-07, "train_loss": 1.5516659144324438, "train_loss_scale": 5637.069306930693, "train_weight_decay": 0.0499999999999999, "train_grad_norm": Infinity, "val_loss": 1.492272417627129, "val_acc1": 52.94117696145002, "val_acc5": 89.59695053100586, "val_uar": 0.41518987500708204, "val_war": 0.5294117647058824, "val_weighted_f1": 0.5171776366579507, "val_micro_f1": 0.5294117647058824, "val_macro_f1": 0.40758815444769464, "epoch": 43, "n_parameters": 521309229}
45
+ {"train_lr": 3.558706596722466e-05, "train_min_lr": 2.006295617189234e-07, "train_loss": 1.5174667961133195, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9889816506074207, "val_loss": 1.501658990687015, "val_acc1": 53.15904178338892, "val_acc5": 88.9433551863128, "val_uar": 0.4039801984207557, "val_war": 0.5315904139433552, "val_weighted_f1": 0.516133476588931, "val_micro_f1": 0.5315904139433552, "val_macro_f1": 0.3998747376631625, "epoch": 44, "n_parameters": 521309229}
46
+ {"train_lr": 3.469007632844911e-05, "train_min_lr": 1.9557259416054976e-07, "train_loss": 1.5469555696441788, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9652609754316877, "val_loss": 1.4965818325678508, "val_acc1": 52.83224447568258, "val_acc5": 90.25054497812309, "val_uar": 0.4092066371240946, "val_war": 0.528322440087146, "val_weighted_f1": 0.5200695435728807, "val_micro_f1": 0.528322440087146, "val_macro_f1": 0.40677455854055417, "epoch": 45, "n_parameters": 521309229}
47
+ {"train_lr": 3.378606164646873e-05, "train_min_lr": 1.9047602144505153e-07, "train_loss": 1.5403240686005886, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.050620957176284, "val_loss": 1.510618961909238, "val_acc1": 52.50544712122749, "val_acc5": 89.16122036354214, "val_uar": 0.4028629110671314, "val_war": 0.5250544662309368, "val_weighted_f1": 0.5117809499061942, "val_micro_f1": 0.5250544662309368, "val_macro_f1": 0.39891071104860093, "epoch": 46, "n_parameters": 521309229}
48
+ {"train_lr": 3.28760104480657e-05, "train_min_lr": 1.85345416599866e-07, "train_loss": 1.5499587279341795, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.063253962167419, "val_loss": 1.5064742822273105, "val_acc1": 52.8322444523082, "val_acc5": 90.19607861836751, "val_uar": 0.41510964948605705, "val_war": 0.528322440087146, "val_weighted_f1": 0.5190029020786299, "val_micro_f1": 0.528322440087146, "val_macro_f1": 0.4115460354151009, "epoch": 47, "n_parameters": 521309229}
49
+ {"train_lr": 3.1960917860864196e-05, "train_min_lr": 1.8018638986606443e-07, "train_loss": 1.514361062459033, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9574131257463208, "val_loss": 1.530127868348477, "val_acc1": 51.36165618428997, "val_acc5": 88.7799570794199, "val_uar": 0.39366283865547713, "val_war": 0.5136165577342048, "val_weighted_f1": 0.5052505056461278, "val_micro_f1": 0.5136165577342048, "val_macro_f1": 0.3933352162874397, "epoch": 48, "n_parameters": 521309229}
50
+ {"train_lr": 3.104178452517305e-05, "train_min_lr": 1.750045825636361e-07, "train_loss": 1.5332447596312355, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.981385403340406, "val_loss": 1.5165421927676481, "val_acc1": 52.61437954154669, "val_acc5": 89.43355145173915, "val_uar": 0.4050349115238327, "val_war": 0.5261437908496732, "val_weighted_f1": 0.5101121615805151, "val_micro_f1": 0.5261437908496732, "val_macro_f1": 0.402248501690469, "epoch": 49, "n_parameters": 521309229}
51
+ {"train_lr": 3.011961549980036e-05, "train_min_lr": 1.698056609227879e-07, "train_loss": 1.5077069841399051, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9886708590063717, "val_loss": 1.5239602152039022, "val_acc1": 51.52505473062104, "val_acc5": 89.10675430297852, "val_uar": 0.39257058613068146, "val_war": 0.5147058823529411, "val_weighted_f1": 0.5060847823720419, "val_micro_f1": 0.5147058823529411, "val_macro_f1": 0.39173179217689147, "epoch": 50, "n_parameters": 521309229}
52
+ {"train_lr": 2.919541916303608e-05, "train_min_lr": 1.6459530988800415e-07, "train_loss": 1.5319235022902096, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.040864845313648, "val_loss": 1.5325718001992095, "val_acc1": 50.70806134448332, "val_acc5": 88.99782184526032, "val_uar": 0.39031555781101696, "val_war": 0.5070806100217865, "val_weighted_f1": 0.4967877944873571, "val_micro_f1": 0.5070806100217865, "val_macro_f1": 0.3849626646137202, "epoch": 51, "n_parameters": 521309229}
53
+ {"train_lr": 2.8270206110005638e-05, "train_min_lr": 1.593792269016439e-07, "train_loss": 1.5234482780148093, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.986093530560484, "val_loss": 1.5121194696893878, "val_acc1": 51.96078482796164, "val_acc5": 89.43355186312806, "val_uar": 0.38895368901790334, "val_war": 0.5190631808278867, "val_weighted_f1": 0.5001277501975985, "val_micro_f1": 0.5190631808278867, "val_macro_f1": 0.38357654502973537, "epoch": 52, "n_parameters": 521309229}
54
+ {"train_lr": 2.7344988047598695e-05, "train_min_lr": 1.541631156738708e-07, "train_loss": 1.5098785858343142, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9087551867607795, "val_loss": 1.4937871516335244, "val_acc1": 53.050109176074756, "val_acc5": 89.37908531637753, "val_uar": 0.40234634969833827, "val_war": 0.5305010893246187, "val_weighted_f1": 0.5134566314395688, "val_micro_f1": 0.5305010893246187, "val_macro_f1": 0.3988904441604313, "epoch": 53, "n_parameters": 521309229}
55
+ {"train_lr": 2.6420776688182537e-05, "train_min_lr": 1.4895267994573047e-07, "train_loss": 1.5201869007974569, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9996302765194733, "val_loss": 1.5119232336680095, "val_acc1": 51.90631835600909, "val_acc5": 88.94335578469669, "val_uar": 0.39077662033363647, "val_war": 0.5190631808278867, "val_weighted_f1": 0.5014843682383663, "val_micro_f1": 0.5190631808278867, "val_macro_f1": 0.38671371895922907, "epoch": 54, "n_parameters": 521309229}
56
+ {"train_lr": 2.549858264330953e-05, "train_min_lr": 1.4375361725219248e-07, "train_loss": 1.5411779542173882, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.969173620242884, "val_loss": 1.5037241449543075, "val_acc1": 51.85185203365251, "val_acc5": 89.81481525944729, "val_uar": 0.3915745495679473, "val_war": 0.5185185185185185, "val_weighted_f1": 0.505503296292651, "val_micro_f1": 0.5185185185185185, "val_macro_f1": 0.39068662270676763, "epoch": 55, "n_parameters": 521309229}
57
+ {"train_lr": 2.4579414318628305e-05, "train_min_lr": 1.3857161269198084e-07, "train_loss": 1.5311495594852436, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9634254734114847, "val_loss": 1.4956386504220027, "val_acc1": 52.8867106811673, "val_acc5": 88.9978219200583, "val_uar": 0.4065761854475687, "val_war": 0.5288671023965141, "val_weighted_f1": 0.5210167333030935, "val_micro_f1": 0.5288671023965141, "val_macro_f1": 0.40798722892091815, "epoch": 56, "n_parameters": 521309229}
58
+ {"train_lr": 2.3664276811206737e-05, "train_min_lr": 1.3341233271100021e-07, "train_loss": 1.5221961299971778, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.019514400180023, "val_loss": 1.5253236095110576, "val_acc1": 51.68845364626716, "val_acc5": 88.88888976153206, "val_uar": 0.388364489555779, "val_war": 0.5168845315904139, "val_weighted_f1": 0.5019874559191844, "val_micro_f1": 0.5168845315904139, "val_macro_f1": 0.38511525582325123, "epoch": 57, "n_parameters": 521309229}
59
+ {"train_lr": 2.2754170810473282e-05, "train_min_lr": 1.282814189061623e-07, "train_loss": 1.5257962706065413, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9958255857524305, "val_loss": 1.4894838999299442, "val_acc1": 52.7777784057692, "val_acc5": 89.54248402165432, "val_uar": 0.3974530332614932, "val_war": 0.5277777777777778, "val_weighted_f1": 0.5110240808889059, "val_micro_f1": 0.5277777777777778, "val_macro_f1": 0.39426186364014343, "epoch": 58, "n_parameters": 521309229}
60
+ {"train_lr": 2.1850091503977748e-05, "train_min_lr": 1.2318448185638122e-07, "train_loss": 1.5299084965545353, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8609496768158262, "val_loss": 1.5242076574587355, "val_acc1": 51.96078483731139, "val_acc5": 88.77995692982392, "val_uar": 0.3903897699380492, "val_war": 0.5196078431372549, "val_weighted_f1": 0.5008451716367058, "val_micro_f1": 0.5196078431372549, "val_macro_f1": 0.3876798650013521, "epoch": 59, "n_parameters": 521309229}
61
+ {"train_lr": 2.0953027489168203e-05, "train_min_lr": 1.1812709498748855e-07, "train_loss": 1.521294754428832, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9400054440639987, "val_loss": 1.483980764945348, "val_acc1": 53.376906937243895, "val_acc5": 89.21568672329772, "val_uar": 0.41991799948926556, "val_war": 0.5337690631808278, "val_weighted_f1": 0.5189079220068797, "val_micro_f1": 0.5337690631808278, "val_macro_f1": 0.4153130213716983, "epoch": 60, "n_parameters": 521309229}
62
+ {"train_lr": 2.006395969237425e-05, "train_min_lr": 1.1311478847777361e-07, "train_loss": 1.5011553048300665, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.04911197530161, "val_loss": 1.4948695266948027, "val_acc1": 52.450980681999056, "val_acc5": 89.32461888182397, "val_uar": 0.40979523135607177, "val_war": 0.5245098039215687, "val_weighted_f1": 0.5060115955581567, "val_micro_f1": 0.5245098039215687, "val_macro_f1": 0.4032264959899138, "epoch": 61, "n_parameters": 521309229}
63
+ {"train_lr": 1.918386029617857e-05, "train_min_lr": 1.0815304321081477e-07, "train_loss": 1.5270007547765676, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.015500394424589, "val_loss": 1.5209805328471988, "val_acc1": 52.668845690932926, "val_acc5": 88.12636222091376, "val_uar": 0.40074259883531393, "val_war": 0.5266884531590414, "val_weighted_f1": 0.5073375569727536, "val_micro_f1": 0.5266884531590414, "val_macro_f1": 0.39724937125709986, "epoch": 62, "n_parameters": 521309229}
64
+ {"train_lr": 1.831369167634938e-05, "train_min_lr": 1.0324728478221384e-07, "train_loss": 1.4932062583591286, "train_loss_scale": 7462.019801980198, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.943150784709666, "val_loss": 1.5279828590505264, "val_acc1": 51.19825731539259, "val_acc5": 88.12636210871678, "val_uar": 0.3897691472935867, "val_war": 0.5119825708061002, "val_weighted_f1": 0.49716386849686245, "val_micro_f1": 0.5119825708061002, "val_macro_f1": 0.38682584521212504, "epoch": 63, "n_parameters": 521309229}
65
+ {"train_lr": 1.74544053494968e-05, "train_min_lr": 9.840287756678695e-08, "train_loss": 1.506744764327216, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9401989172000698, "val_loss": 1.5271229060257183, "val_acc1": 51.52505477736978, "val_acc5": 88.61655811235016, "val_uar": 0.396627332857181, "val_war": 0.5152505446623094, "val_weighted_f1": 0.5026594229530313, "val_micro_f1": 0.5152505446623094, "val_macro_f1": 0.39250140022446245, "epoch": 64, "n_parameters": 521309229}
66
+ {"train_lr": 1.6606940932603314e-05, "train_min_lr": 9.36251188526991e-08, "train_loss": 1.505619704526643, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9497925597842376, "val_loss": 1.5153199597900988, "val_acc1": 52.99564331653071, "val_acc5": 88.88888916314816, "val_uar": 0.4030900131404348, "val_war": 0.5299564270152506, "val_weighted_f1": 0.5113905278523432, "val_micro_f1": 0.5299564270152506, "val_macro_f1": 0.3989845428073718, "epoch": 65, "n_parameters": 521309229}
67
+ {"train_lr": 1.5772225115566454e-05, "train_min_lr": 8.891923304895814e-08, "train_loss": 1.5290625324540406, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.986321739631124, "val_loss": 1.5006243174566942, "val_acc1": 52.34204856554667, "val_acc5": 89.3246192558139, "val_uar": 0.3970956472623789, "val_war": 0.5234204793028322, "val_weighted_f1": 0.5088522876654963, "val_micro_f1": 0.5234204793028322, "val_macro_f1": 0.39448230746229607, "epoch": 66, "n_parameters": 521309229}
68
+ {"train_lr": 1.4951170647876973e-05, "train_min_lr": 8.429036597259903e-08, "train_loss": 1.5022546049982015, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.937774164841907, "val_loss": 1.5085265718254388, "val_acc1": 52.886710737265794, "val_acc5": 88.23529445423799, "val_uar": 0.4004670417475024, "val_war": 0.5288671023965141, "val_weighted_f1": 0.513816228173049, "val_micro_f1": 0.5288671023965141, "val_macro_f1": 0.3943589329016991, "epoch": 67, "n_parameters": 521309229}
69
+ {"train_lr": 1.4144675340540693e-05, "train_min_lr": 7.974357922180958e-08, "train_loss": 1.5151000773355905, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9108762552242466, "val_loss": 1.500252519752465, "val_acc1": 53.37690681102229, "val_acc5": 88.67102454690372, "val_uar": 0.40233295879508885, "val_war": 0.5337690631808278, "val_weighted_f1": 0.5111204074228514, "val_micro_f1": 0.5337690631808278, "val_macro_f1": 0.3995702991434115, "epoch": 68, "n_parameters": 521309229}
70
+ {"train_lr": 1.3353621084335341e-05, "train_min_lr": 7.528384464114652e-08, "train_loss": 1.4840477698116805, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8990209008207417, "val_loss": 1.5096527311731787, "val_acc1": 52.45098063992519, "val_acc5": 89.21568691029267, "val_uar": 0.40832678726282784, "val_war": 0.5245098039215687, "val_weighted_f1": 0.5118637208220187, "val_micro_f1": 0.5245098039215687, "val_macro_f1": 0.40676611385644207, "epoch": 69, "n_parameters": 521309229}
71
+ {"train_lr": 1.2578872885475928e-05, "train_min_lr": 7.091603888489658e-08, "train_loss": 1.490687344333913, "train_loss_scale": 5028.752475247525, "train_weight_decay": 0.0499999999999999, "train_grad_norm": Infinity, "val_loss": 1.5053469930209367, "val_acc1": 52.61437951349745, "val_acc5": 89.43355190052705, "val_uar": 0.40361629826407347, "val_war": 0.5261437908496732, "val_weighted_f1": 0.5116529720935864, "val_micro_f1": 0.5261437908496732, "val_macro_f1": 0.3991981972333397, "epoch": 70, "n_parameters": 521309229}
72
+ {"train_lr": 1.1821277919743169e-05, "train_min_lr": 6.664493808452678e-08, "train_loss": 1.5088082852930125, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9710461455996673, "val_loss": 1.5095390680373884, "val_acc1": 52.941176891326904, "val_acc5": 88.8344232895795, "val_uar": 0.4032129226856983, "val_war": 0.5294117647058824, "val_weighted_f1": 0.511988676843952, "val_micro_f1": 0.5294117647058824, "val_macro_f1": 0.3997911612145497, "epoch": 71, "n_parameters": 521309229}
73
+ {"train_lr": 1.1081664606109202e-05, "train_min_lr": 6.247521262605469e-08, "train_loss": 1.512473192545447, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.899807875699336, "val_loss": 1.5074619145369996, "val_acc1": 52.941177040922874, "val_acc5": 88.18082828147739, "val_uar": 0.39780213889985455, "val_war": 0.5294117647058824, "val_weighted_f1": 0.5097214626689179, "val_micro_f1": 0.5294117647058824, "val_macro_f1": 0.3913804765934518, "epoch": 72, "n_parameters": 521309229}
74
+ {"train_lr": 1.0360841700873597e-05, "train_min_lr": 5.841142204304979e-08, "train_loss": 1.480191003863174, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.019262306761034, "val_loss": 1.5212328211933959, "val_acc1": 52.28758214034286, "val_acc5": 88.83442310258454, "val_uar": 0.3973678765073506, "val_war": 0.5228758169934641, "val_weighted_f1": 0.5090464010786926, "val_micro_f1": 0.5228758169934641, "val_macro_f1": 0.3973728031482262, "epoch": 73, "n_parameters": 521309229}
75
+ {"train_lr": 9.659597413300203e-06, "train_min_lr": 5.445801003085074e-08, "train_loss": 1.510677424987944, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.029099502185784, "val_loss": 1.5165524284044902, "val_acc1": 52.015250972673, "val_acc5": 88.4531592948764, "val_uar": 0.3902097993187944, "val_war": 0.5201525054466231, "val_weighted_f1": 0.5013946763020574, "val_micro_f1": 0.5201525054466231, "val_macro_f1": 0.38363617490019153, "epoch": 74, "n_parameters": 521309229}
76
+ {"train_lr": 8.978698543721923e-06, "train_min_lr": 5.0619299587449256e-08, "train_loss": 1.492189730256304, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8258464289183665, "val_loss": 1.5246781122450734, "val_acc1": 52.233116187301334, "val_acc5": 88.6710246217017, "val_uar": 0.39344575685896843, "val_war": 0.5223311546840959, "val_weighted_f1": 0.5044811301213106, "val_micro_f1": 0.5223311546840959, "val_macro_f1": 0.39025812841586194, "epoch": 75, "n_parameters": 521309229}
77
+ {"train_lr": 8.318889645055782e-06, "train_min_lr": 4.689948828635602e-08, "train_loss": 1.4647132884157765, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9422648802842244, "val_loss": 1.5250349775248884, "val_acc1": 51.797385907640646, "val_acc5": 89.10675419078154, "val_uar": 0.4013063060058766, "val_war": 0.5179738562091504, "val_weighted_f1": 0.5044501834441978, "val_micro_f1": 0.5179738562091504, "val_macro_f1": 0.396074298149178, "epoch": 76, "n_parameters": 521309229}
78
+ {"train_lr": 7.680892208645253e-06, "train_min_lr": 4.3302643686614975e-08, "train_loss": 1.4930726087132695, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.04241668587864, "val_loss": 1.5266013773632985, "val_acc1": 51.742919837727264, "val_acc5": 89.37908554077148, "val_uar": 0.3934199958667574, "val_war": 0.5174291938997821, "val_weighted_f1": 0.5043786132657542, "val_micro_f1": 0.5174291938997821, "val_macro_f1": 0.38897331210230307, "epoch": 77, "n_parameters": 521309229}
79
+ {"train_lr": 7.065403875320073e-06, "train_min_lr": 3.983269888498758e-08, "train_loss": 1.4815577772190862, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9363704742771564, "val_loss": 1.5121921371011173, "val_acc1": 51.96078455214407, "val_acc5": 88.83442317738252, "val_uar": 0.4027106517937186, "val_war": 0.5196078431372549, "val_weighted_f1": 0.5055273962362478, "val_micro_f1": 0.5196078431372549, "val_macro_f1": 0.3991925161716462, "epoch": 78, "n_parameters": 521309229}
80
+ {"train_lr": 6.473097672536222e-06, "train_min_lr": 3.649344821516926e-08, "train_loss": 1.4989140181258174, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.826379665053717, "val_loss": 1.525437896158181, "val_acc1": 52.34204868241852, "val_acc5": 88.23529452903598, "val_uar": 0.4015435221579485, "val_war": 0.5234204793028322, "val_weighted_f1": 0.5062894002475595, "val_micro_f1": 0.5234204793028322, "val_macro_f1": 0.39646533529636546, "epoch": 79, "n_parameters": 521309229}
81
+ {"train_lr": 5.904621278430291e-06, "train_min_lr": 3.3288543098741024e-08, "train_loss": 1.4750324314576957, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9265735314624144, "val_loss": 1.5164452878867878, "val_acc1": 52.995643185634236, "val_acc5": 88.99782162086636, "val_uar": 0.406704459462344, "val_war": 0.5299564270152506, "val_weighted_f1": 0.51103396377431, "val_micro_f1": 0.5299564270152506, "val_macro_f1": 0.40286221817492956, "epoch": 80, "n_parameters": 521309229}
82
+ {"train_lr": 5.3605963135929456e-06, "train_min_lr": 3.0221488052393765e-08, "train_loss": 1.4646711148838005, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.892102623930072, "val_loss": 1.5081058422140046, "val_acc1": 53.4313733624477, "val_acc5": 88.67102402331783, "val_uar": 0.4073455002017685, "val_war": 0.5343137254901961, "val_weighted_f1": 0.5156980029412999, "val_micro_f1": 0.5343137254901961, "val_macro_f1": 0.404645007049113, "epoch": 81, "n_parameters": 521309229}
83
+ {"train_lr": 4.8416176613359425e-06, "train_min_lr": 2.7295636855790628e-08, "train_loss": 1.4926940688599062, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.965660767980141, "val_loss": 1.494349693550783, "val_acc1": 52.832244494382074, "val_acc5": 89.59695019441493, "val_uar": 0.4101520490196628, "val_war": 0.528322440087146, "val_weighted_f1": 0.5162476474859918, "val_micro_f1": 0.528322440087146, "val_macro_f1": 0.408914811532429, "epoch": 82, "n_parameters": 521309229}
84
+ {"train_lr": 4.3482528171959625e-06, "train_min_lr": 2.4514188884258143e-08, "train_loss": 1.480787029459138, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.877588859879144, "val_loss": 1.5269157722884534, "val_acc1": 52.50544705577925, "val_acc5": 89.70588272693111, "val_uar": 0.40091680807127783, "val_war": 0.5250544662309368, "val_weighted_f1": 0.5048861867376878, "val_micro_f1": 0.5250544662309368, "val_macro_f1": 0.39504389512286114, "epoch": 83, "n_parameters": 521309229}
85
+ {"train_lr": 3.88104126838656e-06, "train_min_lr": 2.188018561031641e-08, "train_loss": 1.4935340510539883, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9085198935895864, "val_loss": 1.5037702526531966, "val_acc1": 51.633987365984446, "val_acc5": 90.03267980089375, "val_uar": 0.3974812790854862, "val_war": 0.5163398692810458, "val_weighted_f1": 0.5023683414161079, "val_micro_f1": 0.5163398692810458, "val_macro_f1": 0.3954272487115961, "epoch": 84, "n_parameters": 521309229}
86
+ {"train_lr": 3.4404939038768243e-06, "train_min_lr": 1.9396507277873433e-08, "train_loss": 1.4824824753177441, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.996995758302141, "val_loss": 1.5013100294505848, "val_acc1": 53.05010961551292, "val_acc5": 89.05228798062194, "val_uar": 0.40658746359649023, "val_war": 0.5305010893246187, "val_weighted_f1": 0.5153685682075342, "val_micro_f1": 0.5305010893246187, "val_macro_f1": 0.40417098242621485, "epoch": 85, "n_parameters": 521309229}
87
+ {"train_lr": 3.027092455741798e-06, "train_min_lr": 1.7065869752720735e-08, "train_loss": 1.4838672439650733, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8750367400669816, "val_loss": 1.498635050128488, "val_acc1": 52.94117698949926, "val_acc5": 88.61655833674412, "val_uar": 0.4023117547280975, "val_war": 0.5294117647058824, "val_weighted_f1": 0.5118388323268827, "val_micro_f1": 0.5294117647058824, "val_macro_f1": 0.39632544085645505, "epoch": 86, "n_parameters": 521309229}
88
+ {"train_lr": 2.641288972395518e-06, "train_min_lr": 1.4890821552773996e-08, "train_loss": 1.4875099576345765, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.972120658005818, "val_loss": 1.511965036392212, "val_acc1": 52.50544709317825, "val_acc5": 88.99782188265931, "val_uar": 0.4061608148830373, "val_war": 0.5250544662309368, "val_weighted_f1": 0.510197770309286, "val_micro_f1": 0.5250544662309368, "val_macro_f1": 0.4032323318398629, "epoch": 87, "n_parameters": 521309229}
89
+ {"train_lr": 2.2835053242827143e-06, "train_min_lr": 1.2873741061306107e-08, "train_loss": 1.4610630257098194, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9302170607123044, "val_loss": 1.5311283323110318, "val_acc1": 51.579521235297705, "val_acc5": 88.50762599122291, "val_uar": 0.3869955203923134, "val_war": 0.5157952069716776, "val_weighted_f1": 0.5005837437680186, "val_micro_f1": 0.5157952069716776, "val_macro_f1": 0.38253834178533763, "epoch": 88, "n_parameters": 521309229}
90
+ {"train_lr": 1.954132742569673e-06, "train_min_lr": 1.1016833926220049e-08, "train_loss": 1.4956607366159018, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8698052085272154, "val_loss": 1.5082759483187806, "val_acc1": 52.1241834303912, "val_acc5": 89.4335516761331, "val_uar": 0.3922102169234461, "val_war": 0.5212418300653595, "val_weighted_f1": 0.502452138727778, "val_micro_f1": 0.5212418300653595, "val_macro_f1": 0.3867781950686286, "epoch": 89, "n_parameters": 521309229}
91
+ {"train_lr": 1.6535313913386933e-06, "train_min_lr": 9.322130648205152e-09, "train_loss": 1.480898133008787, "train_loss_scale": 8070.336633663366, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.041238414178981, "val_loss": 1.5112798365892148, "val_acc1": 52.39651455598719, "val_acc5": 88.1808282440784, "val_uar": 0.4059830780099663, "val_war": 0.5239651416122004, "val_weighted_f1": 0.5099185376536083, "val_micro_f1": 0.5239651416122004, "val_macro_f1": 0.40284480261374056, "epoch": 90, "n_parameters": 521309229}
92
+ {"train_lr": 1.3820299737539636e-06, "train_min_lr": 7.791484360414586e-09, "train_loss": 1.4849403051063172, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8227217905592212, "val_loss": 1.4947492483199811, "val_acc1": 52.12418353323843, "val_acc5": 89.27015330744725, "val_uar": 0.3955301629509942, "val_war": 0.5212418300653595, "val_weighted_f1": 0.5064100498606472, "val_micro_f1": 0.5212418300653595, "val_macro_f1": 0.3919986609051816, "epoch": 91, "n_parameters": 521309229}
93
+ {"train_lr": 1.139925372629472e-06, "train_min_lr": 6.426568802091309e-09, "train_loss": 1.465115751370345, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8967034557078146, "val_loss": 1.501269568415249, "val_acc1": 52.39651438301685, "val_acc5": 89.10675411598355, "val_uar": 0.40497538148233686, "val_war": 0.5239651416122004, "val_weighted_f1": 0.5117442477763743, "val_micro_f1": 0.5239651416122004, "val_macro_f1": 0.4023349492863507, "epoch": 92, "n_parameters": 521309229}
94
+ {"train_lr": 9.274823257919996e-07, "train_min_lr": 5.22887648835886e-09, "train_loss": 1.4675942861010927, "train_loss_scale": 6367.049504950495, "train_weight_decay": 0.0499999999999999, "train_grad_norm": Infinity, "val_loss": 1.5090112589737947, "val_acc1": 53.37690694659364, "val_acc5": 89.10675452737247, "val_uar": 0.40788111202083516, "val_war": 0.5337690631808278, "val_weighted_f1": 0.5177410536483147, "val_micro_f1": 0.5337690631808278, "val_macro_f1": 0.4037767141554875, "epoch": 93, "n_parameters": 521309229}
95
+ {"train_lr": 7.449331365942088e-07, "train_min_lr": 4.1997170781779625e-09, "train_loss": 1.4909887979723047, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9702361739507994, "val_loss": 1.484656637205797, "val_acc1": 52.66884567223343, "val_acc5": 89.16122073753208, "val_uar": 0.39782300475629684, "val_war": 0.5266884531590414, "val_weighted_f1": 0.5086639970483521, "val_micro_f1": 0.5266884531590414, "val_macro_f1": 0.3950122443230801, "epoch": 94, "n_parameters": 521309229}
96
+ {"train_lr": 5.924774198943271e-07, "train_min_lr": 3.340215942253686e-09, "train_loss": 1.468627820412318, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.007261783769815, "val_loss": 1.5061639249324799, "val_acc1": 52.88671055027083, "val_acc5": 88.94335548550475, "val_uar": 0.4073106141920303, "val_war": 0.5288671023965141, "val_weighted_f1": 0.5141005649250575, "val_micro_f1": 0.5288671023965141, "val_macro_f1": 0.4055353817863865, "epoch": 95, "n_parameters": 521309229}
97
+ {"train_lr": 4.7028188378023254e-07, "train_min_lr": 2.6513129324590976e-09, "train_loss": 1.4881552098804574, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8998706553241993, "val_loss": 1.4798014520430098, "val_acc1": 53.43137297443315, "val_acc5": 89.43355133954216, "val_uar": 0.4108828427142657, "val_war": 0.5343137254901961, "val_weighted_f1": 0.5204524684950536, "val_micro_f1": 0.5343137254901961, "val_macro_f1": 0.4086341010263084, "epoch": 96, "n_parameters": 521309229}
98
+ {"train_lr": 3.7848014727660956e-07, "train_min_lr": 2.1337613541210327e-09, "train_loss": 1.4708075937461538, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.823480018294684, "val_loss": 1.5151397431013631, "val_acc1": 51.198257623934275, "val_acc5": 88.72549079446232, "val_uar": 0.3865326978311416, "val_war": 0.5119825708061002, "val_weighted_f1": 0.49570935157632534, "val_micro_f1": 0.5119825708061002, "val_macro_f1": 0.381889364407315, "epoch": 97, "n_parameters": 521309229}
99
+ {"train_lr": 3.171725942345054e-07, "train_min_lr": 1.7881271422917885e-09, "train_loss": 1.476774018786528, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8706564667201278, "val_loss": 1.507651566290388, "val_acc1": 52.66884578443041, "val_acc5": 89.32461918101592, "val_uar": 0.401592480952707, "val_war": 0.5266884531590414, "val_weighted_f1": 0.5145882356374072, "val_micro_f1": 0.5266884531590414, "val_macro_f1": 0.4018121564199534, "epoch": 98, "n_parameters": 521309229}
100
+ {"train_lr": 2.8642626356306314e-07, "train_min_lr": 1.6147882429074497e-09, "train_loss": 1.4804731095978136, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.862370299820853, "val_loss": 1.5088735336766523, "val_acc1": 52.88671076531504, "val_acc5": 89.65141659156949, "val_uar": 0.4055259807149692, "val_war": 0.5288671023965141, "val_weighted_f1": 0.5143190808793948, "val_micro_f1": 0.5288671023965141, "val_macro_f1": 0.40367390741147235, "epoch": 99, "n_parameters": 521309229}
101
+ Evaluation on the test set using best epoch model:
102
+ {"Final Top-1 (best epoch)": 55.91925804691762, "Final Top-5 (best epoch)": 91.1074740861975}
103
+ Final UAR: 42.41%, Final WAR: 55.92%
104
+ Final Confusion Matrix:
105
+ [[174 21 2 8 17 10 14 3 24 2 2]
106
+ [ 6 31 0 20 26 8 6 10 16 2 3]
107
+ [ 9 4 39 0 1 18 35 0 19 0 0]
108
+ [ 1 3 0 212 17 9 2 2 2 1 0]
109
+ [ 10 1 3 21 151 8 2 5 21 3 3]
110
+ [ 8 9 3 7 16 215 7 0 21 3 2]
111
+ [ 8 8 12 32 14 8 118 2 11 1 1]
112
+ [ 5 2 0 9 19 3 0 4 1 3 1]
113
+ [ 5 19 4 8 23 18 18 3 72 9 4]
114
+ [ 1 2 1 9 22 8 1 0 3 5 1]
115
+ [ 1 2 0 3 12 3 2 0 5 5 4]]
116
+ Final Class Accuracies: ['62.82%', '24.22%', '31.20%', '85.14%', '66.23%', '73.88%', '54.88%', '8.51%', '39.34%', '9.43%', '10.81%']
117
+ Final Weighted F1: 0.5463, Final Micro F1: 0.5592, Final Macro F1: 0.4252
logs/AVF-MAE++_huge-MAFW (11-class)/eval_split03/log.txt ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_lr": 5.580357142857142e-06, "train_min_lr": 3.1460435902122946e-08, "train_loss": 1.8841293674491026, "train_loss_scale": 5231.524752475248, "train_weight_decay": 0.0499999999999999, "train_grad_norm": NaN, "val_loss": 1.240844666373496, "val_acc1": 61.42076544735601, "val_acc5": 93.66120246511991, "val_uar": 0.47052599800358896, "val_war": 0.614207650273224, "val_weighted_f1": 0.6021614736418472, "val_micro_f1": 0.614207650273224, "val_macro_f1": 0.4679769212583498, "epoch": 0, "n_parameters": 521309229}
2
+ {"train_lr": 1.6852678571428577e-05, "train_min_lr": 9.501051642441133e-08, "train_loss": 1.8267456826203727, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.017457001280077, "val_loss": 1.2269553480779423, "val_acc1": 62.07650338886865, "val_acc5": 93.27868845788508, "val_uar": 0.47017188318905656, "val_war": 0.6207650273224044, "val_weighted_f1": 0.5981818899277378, "val_micro_f1": 0.6207650273224044, "val_macro_f1": 0.4619388082470388, "epoch": 1, "n_parameters": 521309229}
3
+ {"train_lr": 2.8125000000000006e-05, "train_min_lr": 1.585605969466997e-07, "train_loss": 1.8372134114846144, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9929783556720997, "val_loss": 1.2247945268948872, "val_acc1": 62.240437653025644, "val_acc5": 93.60655752922017, "val_uar": 0.47027695870083636, "val_war": 0.6218579234972678, "val_weighted_f1": 0.5948780071188359, "val_micro_f1": 0.6218579234972678, "val_macro_f1": 0.46197785344227554, "epoch": 2, "n_parameters": 521309229}
4
+ {"train_lr": 3.939732142857144e-05, "train_min_lr": 2.2211067746898805e-07, "train_loss": 1.8340060714841282, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.0692702236742075, "val_loss": 1.2242166788554658, "val_acc1": 62.89617561017229, "val_acc5": 93.98907146558084, "val_uar": 0.4838165662896804, "val_war": 0.6289617486338798, "val_weighted_f1": 0.613251700812865, "val_micro_f1": 0.6289617486338798, "val_macro_f1": 0.48429063704292596, "epoch": 3, "n_parameters": 521309229}
5
+ {"train_lr": 5.0669642857142856e-05, "train_min_lr": 2.856607579912764e-07, "train_loss": 1.839762243894067, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.030015140476793, "val_loss": 1.2401446881247502, "val_acc1": 63.33333394201727, "val_acc5": 93.27868864132407, "val_uar": 0.49134807163156025, "val_war": 0.6333333333333333, "val_weighted_f1": 0.6093434522345685, "val_micro_f1": 0.6333333333333333, "val_macro_f1": 0.4773744722579608, "epoch": 4, "n_parameters": 521309229}
6
+ {"train_lr": 5.624497522407655e-05, "train_min_lr": 3.170928656633569e-07, "train_loss": 1.8288746575514476, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9710381904450975, "val_loss": 1.2606984867769129, "val_acc1": 60.98360703890441, "val_acc5": 93.71584684444908, "val_uar": 0.45850852344743376, "val_war": 0.6098360655737705, "val_weighted_f1": 0.5842001069185897, "val_micro_f1": 0.6098360655737705, "val_macro_f1": 0.4537449724893748, "epoch": 5, "n_parameters": 521309229}
7
+ {"train_lr": 5.621453170303479e-05, "train_min_lr": 3.169212339168842e-07, "train_loss": 1.81914686851769, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9335663365845632, "val_loss": 1.2441148816370498, "val_acc1": 61.42076522743767, "val_acc5": 93.44262320304829, "val_uar": 0.48394420905508656, "val_war": 0.614207650273224, "val_weighted_f1": 0.5991474718582438, "val_micro_f1": 0.614207650273224, "val_macro_f1": 0.4780906555242759, "epoch": 6, "n_parameters": 521309229}
8
+ {"train_lr": 5.615352646285501e-05, "train_min_lr": 3.1657730405733625e-07, "train_loss": 1.8044571108353806, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.1097252250898, "val_loss": 1.2413019640188592, "val_acc1": 62.13114815070981, "val_acc5": 93.38797830675469, "val_uar": 0.4824801461928901, "val_war": 0.6213114754098361, "val_weighted_f1": 0.602506907433017, "val_micro_f1": 0.6213114754098361, "val_macro_f1": 0.47707151922047697, "epoch": 7, "n_parameters": 521309229}
9
+ {"train_lr": 5.60620262118716e-05, "train_min_lr": 3.1606145216696487e-07, "train_loss": 1.7996535893320644, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.046770782753972, "val_loss": 1.2764071018672456, "val_acc1": 60.65573796756932, "val_acc5": 93.27868888104548, "val_uar": 0.47199439720846886, "val_war": 0.6065573770491803, "val_weighted_f1": 0.5950406585051089, "val_micro_f1": 0.6065573770491803, "val_macro_f1": 0.47239624062560986, "epoch": 8, "n_parameters": 521309229}
10
+ {"train_lr": 5.5940131004265686e-05, "train_min_lr": 3.1537424232223837e-07, "train_loss": 1.7996203441037597, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9414267988488225, "val_loss": 1.2383344094542896, "val_acc1": 60.437158645567344, "val_acc5": 94.26229517785578, "val_uar": 0.4743756358609645, "val_war": 0.6049180327868853, "val_weighted_f1": 0.5986051969783673, "val_micro_f1": 0.6049180327868853, "val_macro_f1": 0.4742228104381139, "epoch": 9, "n_parameters": 521309229}
11
+ {"train_lr": 5.57879741306571e-05, "train_min_lr": 3.1451642597703227e-07, "train_loss": 1.7952621565400178, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.906264361768666, "val_loss": 1.2291898020342285, "val_acc1": 62.45901668944646, "val_acc5": 93.27868828278422, "val_uar": 0.48637379048040064, "val_war": 0.6245901639344262, "val_weighted_f1": 0.6099960720399625, "val_micro_f1": 0.6245901639344262, "val_macro_f1": 0.4810581135021951, "epoch": 10, "n_parameters": 521309229}
12
+ {"train_lr": 5.5605721972353206e-05, "train_min_lr": 3.134889411409257e-07, "train_loss": 1.8028695197782107, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9713154098775125, "val_loss": 1.262620031541469, "val_acc1": 62.67759619332402, "val_acc5": 92.67759537827122, "val_uar": 0.4766355194315298, "val_war": 0.6262295081967213, "val_weighted_f1": 0.6079531277102201, "val_micro_f1": 0.6262295081967213, "val_macro_f1": 0.4693038939254825, "epoch": 11, "n_parameters": 521309229}
13
+ {"train_lr": 5.5393573819413314e-05, "train_min_lr": 3.1229291135350213e-07, "train_loss": 1.7690485030314316, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.010996405440982, "val_loss": 1.2371016024958854, "val_acc1": 61.311475693332696, "val_acc5": 93.11475395452781, "val_uar": 0.4730274325044767, "val_war": 0.6131147540983607, "val_weighted_f1": 0.6064379578949884, "val_micro_f1": 0.6131147540983607, "val_macro_f1": 0.46945532950165936, "epoch": 12, "n_parameters": 521309229}
14
+ {"train_lr": 5.5151761652727875e-05, "train_min_lr": 3.109296444557738e-07, "train_loss": 1.7752690427374131, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.982568075161169, "val_loss": 1.2162585585725074, "val_acc1": 63.60655778561785, "val_acc5": 92.89617467994898, "val_uar": 0.48642481851446107, "val_war": 0.6360655737704918, "val_weighted_f1": 0.6112166557673441, "val_micro_f1": 0.6360655737704918, "val_macro_f1": 0.4768501046493276, "epoch": 13, "n_parameters": 521309229}
15
+ {"train_lr": 5.4880549890350996e-05, "train_min_lr": 3.094006311600778e-07, "train_loss": 1.7594793665133688, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.090661605986038, "val_loss": 1.2263409571320403, "val_acc1": 61.420765593273394, "val_acc5": 93.27868869135288, "val_uar": 0.4689777350589629, "val_war": 0.614207650273224, "val_weighted_f1": 0.6036809023128045, "val_micro_f1": 0.614207650273224, "val_macro_f1": 0.4672628060793187, "epoch": 14, "n_parameters": 521309229}
16
+ {"train_lr": 5.458023509836289e-05, "train_min_lr": 3.077075434200046e-07, "train_loss": 1.7591576333289887, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.037068768302993, "val_loss": 1.2121108592140908, "val_acc1": 61.91256853947874, "val_acc5": 93.33333340629203, "val_uar": 0.47608663880913965, "val_war": 0.6191256830601093, "val_weighted_f1": 0.6003355154829025, "val_micro_f1": 0.6191256830601093, "val_macro_f1": 0.4746108429479594, "epoch": 15, "n_parameters": 521309229}
17
+ {"train_lr": 5.425114566657945e-05, "train_min_lr": 3.058522326021405e-07, "train_loss": 1.757680104904049, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9388741998389216, "val_loss": 1.2189228861939674, "val_acc1": 61.91256852384473, "val_acc5": 93.11475429222232, "val_uar": 0.4846680888845132, "val_war": 0.6191256830601093, "val_weighted_f1": 0.6085744654069856, "val_micro_f1": 0.6191256830601093, "val_macro_f1": 0.4798468406565102, "epoch": 16, "n_parameters": 521309229}
18
+ {"train_lr": 5.389364144946269e-05, "train_min_lr": 3.0383672746162495e-07, "train_loss": 1.7642188492191113, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9893256296025643, "val_loss": 1.2629217695371777, "val_acc1": 62.6229514445112, "val_acc5": 92.29508194011417, "val_uar": 0.47588211918770384, "val_war": 0.6256830601092896, "val_weighted_f1": 0.6058538635899008, "val_micro_f1": 0.6256830601092896, "val_macro_f1": 0.47419013788296954, "epoch": 17, "n_parameters": 521309229}
19
+ {"train_lr": 5.3508113372625154e-05, "train_min_lr": 3.0166323192373806e-07, "train_loss": 1.749211367207392, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9790915970755094, "val_loss": 1.2338425397288566, "val_acc1": 61.25683115870575, "val_acc5": 92.84153035690224, "val_uar": 0.4822012898659311, "val_war": 0.6125683060109289, "val_weighted_f1": 0.5979815762554939, "val_micro_f1": 0.6125683060109289, "val_macro_f1": 0.47537183528499155, "epoch": 18, "n_parameters": 521309229}
20
+ {"train_lr": 5.309498300535867e-05, "train_min_lr": 2.993341226739393e-07, "train_loss": 1.745936911491671, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9073386357562376, "val_loss": 1.243311541337593, "val_acc1": 61.803279354012076, "val_acc5": 93.93442633581944, "val_uar": 0.480786732916555, "val_war": 0.6180327868852459, "val_weighted_f1": 0.602753821723393, "val_micro_f1": 0.6180327868852459, "val_macro_f1": 0.47813810001575763, "epoch": 19, "n_parameters": 521309229}
21
+ {"train_lr": 5.265470209965427e-05, "train_min_lr": 2.9685194655899794e-07, "train_loss": 1.7525786242075878, "train_loss_scale": 4258.217821782178, "train_weight_decay": 0.0499999999999999, "train_grad_norm": Infinity, "val_loss": 1.2192071349013085, "val_acc1": 62.78688549031325, "val_acc5": 93.71584721966519, "val_uar": 0.4831224356969436, "val_war": 0.6278688524590164, "val_weighted_f1": 0.6074520570330519, "val_micro_f1": 0.6278688524590164, "val_macro_f1": 0.48190188755563196, "epoch": 20, "n_parameters": 521309229}
22
+ {"train_lr": 5.218775209621823e-05, "train_min_lr": 2.9421941780205285e-07, "train_loss": 1.7325017640102816, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.023285849259631, "val_loss": 1.2615994872415768, "val_acc1": 59.78142133514738, "val_acc5": 93.66120222122943, "val_uar": 0.4618046804359502, "val_war": 0.5978142076502733, "val_weighted_f1": 0.5855645317595034, "val_micro_f1": 0.5978142076502733, "val_macro_f1": 0.45963884547802797, "epoch": 21, "n_parameters": 521309229}
23
+ {"train_lr": 5.1694643598023545e-05, "train_min_lr": 2.9143941503464896e-07, "train_loss": 1.7321524529567254, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9775925485214385, "val_loss": 1.2248252358506708, "val_acc1": 62.349727591009085, "val_acc5": 93.22404370959339, "val_uar": 0.4809053699834643, "val_war": 0.6234972677595628, "val_weighted_f1": 0.6040684107609277, "val_micro_f1": 0.6234972677595628, "val_macro_f1": 0.4783406569298521, "epoch": 22, "n_parameters": 521309229}
24
+ {"train_lr": 5.117591581197337e-05, "train_min_lr": 2.88514978148997e-07, "train_loss": 1.7237952230590405, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.969669722094394, "val_loss": 1.2338297513185763, "val_acc1": 61.20218605708555, "val_acc5": 93.71584677357491, "val_uar": 0.4758439762762189, "val_war": 0.6120218579234973, "val_weighted_f1": 0.6029557390625235, "val_micro_f1": 0.6120218579234973, "val_macro_f1": 0.47587635799809225, "epoch": 23, "n_parameters": 521309229}
25
+ {"train_lr": 5.0632135959285794e-05, "train_min_lr": 2.854493049738955e-07, "train_loss": 1.7139217249631096, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.069944372271547, "val_loss": 1.2260145945291894, "val_acc1": 62.02185839355969, "val_acc5": 93.33333359806916, "val_uar": 0.48510982475263287, "val_war": 0.6202185792349727, "val_weighted_f1": 0.6061440268243894, "val_micro_f1": 0.6202185792349727, "val_macro_f1": 0.482618262917188, "epoch": 24, "n_parameters": 521309229}
26
+ {"train_lr": 5.0063898655246014e-05, "train_min_lr": 2.822457477779514e-07, "train_loss": 1.730964486355042, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.969215501653086, "val_loss": 1.2762202138409895, "val_acc1": 59.562842035033015, "val_acc5": 93.60655719777925, "val_uar": 0.46245238211600875, "val_war": 0.5956284153005464, "val_weighted_f1": 0.5832064678440297, "val_micro_f1": 0.5956284153005464, "val_macro_f1": 0.4628868996714445, "epoch": 25, "n_parameters": 521309229}
27
+ {"train_lr": 4.9471825259003246e-05, "train_min_lr": 2.789078096039244e-07, "train_loss": 1.7224678014567976, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.024707185159816, "val_loss": 1.244065420008173, "val_acc1": 62.076503381572785, "val_acc5": 92.73224051011717, "val_uar": 0.4744304636551269, "val_war": 0.6207650273224044, "val_weighted_f1": 0.6002432491297383, "val_micro_f1": 0.6207650273224044, "val_macro_f1": 0.46872883483579797, "epoch": 26, "n_parameters": 521309229}
28
+ {"train_lr": 4.885656319412359e-05, "train_min_lr": 2.754391404382005e-07, "train_loss": 1.697869453788197, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9442325417358095, "val_loss": 1.2328375484429153, "val_acc1": 61.25683136872255, "val_acc5": 92.84153020681579, "val_uar": 0.4789637040318847, "val_war": 0.6125683060109289, "val_weighted_f1": 0.6043879047947841, "val_micro_f1": 0.6125683060109289, "val_macro_f1": 0.4757369369892245, "epoch": 27, "n_parameters": 521309229}
29
+ {"train_lr": 4.821878524064173e-05, "train_min_lr": 2.7184353321958644e-07, "train_loss": 1.719553029969974, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.015247132518504, "val_loss": 1.234959703742289, "val_acc1": 61.58469975476708, "val_acc5": 94.15300570837135, "val_uar": 0.4751189012074422, "val_war": 0.6158469945355192, "val_weighted_f1": 0.6066090387267578, "val_micro_f1": 0.6158469945355192, "val_macro_f1": 0.47145653255699865, "epoch": 28, "n_parameters": 521309229}
30
+ {"train_lr": 4.7559188799386115e-05, "train_min_lr": 2.6812491969178504e-07, "train_loss": 1.6966046584714758, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.974958889555223, "val_loss": 1.2470455344985514, "val_acc1": 61.31147608366169, "val_acc5": 94.15300538109952, "val_uar": 0.47715270848474434, "val_war": 0.6131147540983607, "val_weighted_f1": 0.6001744931117853, "val_micro_f1": 0.6131147540983607, "val_macro_f1": 0.4701877107648484, "epoch": 29, "n_parameters": 521309229}
31
+ {"train_lr": 4.6878495129381106e-05, "train_min_lr": 2.6428736610409306e-07, "train_loss": 1.7098336786326795, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9198257592645023, "val_loss": 1.2323057239546495, "val_acc1": 61.85792398609099, "val_acc5": 93.44262313217413, "val_uar": 0.4730778746828052, "val_war": 0.6185792349726776, "val_weighted_f1": 0.6063515532590842, "val_micro_f1": 0.6185792349726776, "val_macro_f1": 0.4724132154605382, "epoch": 30, "n_parameters": 521309229}
32
+ {"train_lr": 4.6177448559161015e-05, "train_min_lr": 2.603350687650165e-07, "train_loss": 1.6915409427271424, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.084746825813067, "val_loss": 1.2752848188666737, "val_acc1": 60.76502806137168, "val_acc5": 92.73224036419978, "val_uar": 0.47003232374743464, "val_war": 0.6076502732240437, "val_weighted_f1": 0.594139037682158, "val_micro_f1": 0.6076502732240437, "val_macro_f1": 0.46859548469690454, "epoch": 31, "n_parameters": 521309229}
33
+ {"train_lr": 4.545681567285774e-05, "train_min_lr": 2.5627234945366804e-07, "train_loss": 1.700578356635059, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.931164344938675, "val_loss": 1.2590893927742453, "val_acc1": 60.32786916722365, "val_acc5": 93.71584706957874, "val_uar": 0.4697812763142449, "val_war": 0.6032786885245902, "val_weighted_f1": 0.5958359872329497, "val_micro_f1": 0.6032786885245902, "val_macro_f1": 0.47203417785929797, "epoch": 32, "n_parameters": 521309229}
34
+ {"train_lr": 4.47173844719522e-05, "train_min_lr": 2.5210365069396683e-07, "train_loss": 1.6910566927576223, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.06791628469335, "val_loss": 1.2828442527382982, "val_acc1": 59.56284201679334, "val_acc5": 92.45901627253964, "val_uar": 0.46864691824605603, "val_war": 0.5956284153005464, "val_weighted_f1": 0.5902144940215976, "val_micro_f1": 0.5956284153005464, "val_macro_f1": 0.4698593313372236, "epoch": 33, "n_parameters": 521309229}
35
+ {"train_lr": 4.395996351360636e-05, "train_min_lr": 2.478335308968022e-07, "train_loss": 1.6906210790963063, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.964983047825275, "val_loss": 1.2587656749814164, "val_acc1": 62.18579279894386, "val_acc5": 92.89617480710555, "val_uar": 0.4954133960992615, "val_war": 0.6218579234972678, "val_weighted_f1": 0.6149812911851052, "val_micro_f1": 0.6218579234972678, "val_macro_f1": 0.49158333086385325, "epoch": 34, "n_parameters": 521309229}
36
+ {"train_lr": 4.318538102651787e-05, "train_min_lr": 2.434666593754791e-07, "train_loss": 1.6864031957517756, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.064868214106796, "val_loss": 1.2432298774228376, "val_acc1": 61.147541348921145, "val_acc5": 92.45901646223224, "val_uar": 0.47242066020509893, "val_war": 0.6120218579234973, "val_weighted_f1": 0.6022365303320327, "val_micro_f1": 0.6120218579234973, "val_macro_f1": 0.4703255758956987, "epoch": 35, "n_parameters": 521309229}
37
+ {"train_lr": 4.2394484005264044e-05, "train_min_lr": 2.390078112398925e-07, "train_loss": 1.6879878141502342, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.005022405397774, "val_loss": 1.2467925376167484, "val_acc1": 61.6393448225136, "val_acc5": 92.40437177856111, "val_uar": 0.47500820619409245, "val_war": 0.6163934426229508, "val_weighted_f1": 0.6070393556550637, "val_micro_f1": 0.6163934426229508, "val_macro_f1": 0.4745592353572307, "epoch": 36, "n_parameters": 521309229}
38
+ {"train_lr": 4.158813728412575e-05, "train_min_lr": 2.3446186217501424e-07, "train_loss": 1.6826343968166377, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.036161203195553, "val_loss": 1.2683094149711085, "val_acc1": 60.491803534043946, "val_acc5": 93.22404383466544, "val_uar": 0.47431468382183334, "val_war": 0.6049180327868853, "val_weighted_f1": 0.6006043526179047, "val_micro_f1": 0.6049180327868853, "val_macro_f1": 0.4702091425948481, "epoch": 37, "n_parameters": 521309229}
39
+ {"train_lr": 4.0767222591403925e-05, "train_min_lr": 2.2983378310940412e-07, "train_loss": 1.6764004382953392, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.0058319002094835, "val_loss": 1.2600377920795889, "val_acc1": 61.256831113367134, "val_acc5": 92.56830645285018, "val_uar": 0.47971011325555923, "val_war": 0.6125683060109289, "val_weighted_f1": 0.605042664878264, "val_micro_f1": 0.6125683060109289, "val_macro_f1": 0.4788451868674297, "epoch": 38, "n_parameters": 521309229}
40
+ {"train_lr": 3.993263758526252e-05, "train_min_lr": 2.251286347795713e-07, "train_loss": 1.6503862642218965, "train_loss_scale": 4744.8712871287125, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.014832628835546, "val_loss": 1.2503548883924298, "val_acc1": 60.874317305205295, "val_acc5": 92.89617506767232, "val_uar": 0.47390069900004067, "val_war": 0.6087431693989072, "val_weighted_f1": 0.600675984404665, "val_micro_f1": 0.6087431693989072, "val_macro_f1": 0.4694541127966108, "epoch": 39, "n_parameters": 521309229}
41
+ {"train_lr": 3.9085294872152475e-05, "train_min_lr": 2.203515621961343e-07, "train_loss": 1.6761035547398104, "train_loss_scale": 7786.455445544554, "train_weight_decay": 0.0499999999999999, "train_grad_norm": Infinity, "val_loss": 1.2879653917808158, "val_acc1": 60.16393472483901, "val_acc5": 93.00546460177729, "val_uar": 0.47502972077889, "val_war": 0.601639344262295, "val_weighted_f1": 0.59504210184531, "val_micro_f1": 0.601639344262295, "val_macro_f1": 0.4724525683263968, "epoch": 40, "n_parameters": 521309229}
42
+ {"train_lr": 3.822612100889004e-05, "train_min_lr": 2.1550778901782692e-07, "train_loss": 1.6584093791030028, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.030815861012676, "val_loss": 1.2481004353247436, "val_acc1": 61.91256874428421, "val_acc5": 92.84153001295412, "val_uar": 0.4774706505190288, "val_war": 0.6191256830601093, "val_weighted_f1": 0.6104815751468179, "val_micro_f1": 0.6191256830601093, "val_macro_f1": 0.4759033002752341, "epoch": 41, "n_parameters": 521309229}
43
+ {"train_lr": 3.73560554894804e-05, "train_min_lr": 2.1060261183950398e-07, "train_loss": 1.6507156260139477, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.006289689847739, "val_loss": 1.291508704716084, "val_acc1": 59.016393797514866, "val_acc5": 92.34972700525503, "val_uar": 0.4680718318418167, "val_war": 0.5901639344262295, "val_weighted_f1": 0.587193406953556, "val_micro_f1": 0.5901639344262295, "val_macro_f1": 0.4720637050869402, "epoch": 42, "n_parameters": 521309229}
44
+ {"train_lr": 3.647604971779486e-05, "train_min_lr": 2.056413944003928e-07, "train_loss": 1.6635446794355664, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.198747705705095, "val_loss": 1.272167269797886, "val_acc1": 60.1092902032404, "val_acc5": 92.78688515001308, "val_uar": 0.46798810267119245, "val_war": 0.6010928961748634, "val_weighted_f1": 0.5894440105984103, "val_micro_f1": 0.6010928961748634, "val_macro_f1": 0.46676281199101927, "epoch": 43, "n_parameters": 521309229}
45
+ {"train_lr": 3.558706596722466e-05, "train_min_lr": 2.006295617189234e-07, "train_loss": 1.6507946687563024, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.013253872937495, "val_loss": 1.2885591098490883, "val_acc1": 59.39890724453118, "val_acc5": 93.00546460803089, "val_uar": 0.47007073981098335, "val_war": 0.5939890710382514, "val_weighted_f1": 0.5925363498145945, "val_micro_f1": 0.5939890710382514, "val_macro_f1": 0.46805035715997173, "epoch": 44, "n_parameters": 521309229}
46
+ {"train_lr": 3.469007632844911e-05, "train_min_lr": 1.9557259416054976e-07, "train_loss": 1.6542636264275403, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.043106492203061, "val_loss": 1.2616615929439956, "val_acc1": 60.7103829180608, "val_acc5": 92.95081963460953, "val_uar": 0.4710224096012339, "val_war": 0.607103825136612, "val_weighted_f1": 0.596978499580765, "val_micro_f1": 0.607103825136612, "val_macro_f1": 0.470458428813785, "epoch": 45, "n_parameters": 521309229}
47
+ {"train_lr": 3.378606164646873e-05, "train_min_lr": 1.9047602144505153e-07, "train_loss": 1.6465780861897044, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.094678543581821, "val_loss": 1.23401756058721, "val_acc1": 61.03825185911251, "val_acc5": 92.56830584625078, "val_uar": 0.4729424273227367, "val_war": 0.6103825136612022, "val_weighted_f1": 0.6016134941953455, "val_micro_f1": 0.6103825136612022, "val_macro_f1": 0.4711679170787347, "epoch": 46, "n_parameters": 521309229}
48
+ {"train_lr": 3.28760104480657e-05, "train_min_lr": 1.85345416599866e-07, "train_loss": 1.6565587839277665, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9526585257879576, "val_loss": 1.2772982360101213, "val_acc1": 60.98360700034053, "val_acc5": 91.53005480323333, "val_uar": 0.47492905856823137, "val_war": 0.6098360655737705, "val_weighted_f1": 0.5969421930008555, "val_micro_f1": 0.6098360655737705, "val_macro_f1": 0.4708172444188532, "epoch": 47, "n_parameters": 521309229}
49
+ {"train_lr": 3.1960917860864196e-05, "train_min_lr": 1.8018638986606443e-07, "train_loss": 1.636345740690483, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.031851520632753, "val_loss": 1.265824575342384, "val_acc1": 60.60109315830502, "val_acc5": 92.1857927437037, "val_uar": 0.47249281457140313, "val_war": 0.6060109289617487, "val_weighted_f1": 0.5948923669381943, "val_micro_f1": 0.6060109289617487, "val_macro_f1": 0.46602941062661474, "epoch": 48, "n_parameters": 521309229}
50
+ {"train_lr": 3.104178452517305e-05, "train_min_lr": 1.750045825636361e-07, "train_loss": 1.6486781518844882, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.035790979272068, "val_loss": 1.2675192794379067, "val_acc1": 60.655738248981415, "val_acc5": 92.24043738151508, "val_uar": 0.47045267774177046, "val_war": 0.6065573770491803, "val_weighted_f1": 0.597116342267431, "val_micro_f1": 0.6065573770491803, "val_macro_f1": 0.468573228758627, "epoch": 49, "n_parameters": 521309229}
51
+ {"train_lr": 3.011961549980036e-05, "train_min_lr": 1.698056609227879e-07, "train_loss": 1.6211004188351898, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9829696405051958, "val_loss": 1.2708110847309524, "val_acc1": 60.49180336363329, "val_acc5": 92.5136611354807, "val_uar": 0.4688860777156528, "val_war": 0.6049180327868853, "val_weighted_f1": 0.5920877695922099, "val_micro_f1": 0.6049180327868853, "val_macro_f1": 0.46164799515427846, "epoch": 50, "n_parameters": 521309229}
52
+ {"train_lr": 2.919541916303608e-05, "train_min_lr": 1.6459530988800415e-07, "train_loss": 1.6322163014915516, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.057789974873609, "val_loss": 1.2700819598109114, "val_acc1": 60.38251402860131, "val_acc5": 92.34972684891497, "val_uar": 0.4715636118248595, "val_war": 0.6038251366120219, "val_weighted_f1": 0.592602420831967, "val_micro_f1": 0.6038251366120219, "val_macro_f1": 0.4727541621951762, "epoch": 51, "n_parameters": 521309229}
53
+ {"train_lr": 2.8270206110005638e-05, "train_min_lr": 1.593792269016439e-07, "train_loss": 1.629379390096507, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.981119231422349, "val_loss": 1.260360162047779, "val_acc1": 61.53005521284427, "val_acc5": 92.40437156802318, "val_uar": 0.48000800522583936, "val_war": 0.6153005464480874, "val_weighted_f1": 0.603723150279344, "val_micro_f1": 0.6153005464480874, "val_macro_f1": 0.4789250937313981, "epoch": 52, "n_parameters": 521309229}
54
+ {"train_lr": 2.7344988047598695e-05, "train_min_lr": 1.541631156738708e-07, "train_loss": 1.6230855432674043, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9215477669593133, "val_loss": 1.3135156023736094, "val_acc1": 59.50819701054057, "val_acc5": 92.24043731272546, "val_uar": 0.4718689896646611, "val_war": 0.5950819672131148, "val_weighted_f1": 0.5902173595572359, "val_micro_f1": 0.5950819672131148, "val_macro_f1": 0.46692855158463015, "epoch": 53, "n_parameters": 521309229}
55
+ {"train_lr": 2.6420776688182537e-05, "train_min_lr": 1.4895267994573047e-07, "train_loss": 1.6270820558464567, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.91756806043115, "val_loss": 1.282540035014059, "val_acc1": 60.65573818071292, "val_acc5": 92.34972696147982, "val_uar": 0.474945461896943, "val_war": 0.6065573770491803, "val_weighted_f1": 0.5988463631058741, "val_micro_f1": 0.6065573770491803, "val_macro_f1": 0.4716229055488503, "epoch": 54, "n_parameters": 521309229}
56
+ {"train_lr": 2.549858264330953e-05, "train_min_lr": 1.4375361725219248e-07, "train_loss": 1.6274899902320143, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9157007779225266, "val_loss": 1.2797849429588692, "val_acc1": 61.63934464272254, "val_acc5": 92.62295086761641, "val_uar": 0.4868871317755489, "val_war": 0.6169398907103825, "val_weighted_f1": 0.6061902008289848, "val_micro_f1": 0.6169398907103825, "val_macro_f1": 0.48713070638310246, "epoch": 55, "n_parameters": 521309229}
57
+ {"train_lr": 2.4579414318628305e-05, "train_min_lr": 1.3857161269198084e-07, "train_loss": 1.6312867481716395, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.0290170518478545, "val_loss": 1.281690671747806, "val_acc1": 60.05464520219897, "val_acc5": 92.84153012343444, "val_uar": 0.45892014891440885, "val_war": 0.6005464480874317, "val_weighted_f1": 0.58679539350013, "val_micro_f1": 0.6005464480874317, "val_macro_f1": 0.45618413950824527, "epoch": 56, "n_parameters": 521309229}
58
+ {"train_lr": 2.3664276811206737e-05, "train_min_lr": 1.3341233271100021e-07, "train_loss": 1.623121147698695, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.053906636663003, "val_loss": 1.2493692937434888, "val_acc1": 60.983607061834284, "val_acc5": 92.62295112609863, "val_uar": 0.4820658310057148, "val_war": 0.6098360655737705, "val_weighted_f1": 0.6016799992756787, "val_micro_f1": 0.6098360655737705, "val_macro_f1": 0.47814757409223807, "epoch": 57, "n_parameters": 521309229}
59
+ {"train_lr": 2.2754170810473282e-05, "train_min_lr": 1.282814189061623e-07, "train_loss": 1.616368287664042, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.96321167096053, "val_loss": 1.2744934269610573, "val_acc1": 60.71038269918473, "val_acc5": 92.84153035273317, "val_uar": 0.48734789025445135, "val_war": 0.607103825136612, "val_weighted_f1": 0.5997781536609385, "val_micro_f1": 0.607103825136612, "val_macro_f1": 0.48129277905913476, "epoch": 58, "n_parameters": 521309229}
60
+ {"train_lr": 2.1850091503977748e-05, "train_min_lr": 1.2318448185638122e-07, "train_loss": 1.6314282706468413, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.023095893387747, "val_loss": 1.2470943191472221, "val_acc1": 61.14754133380827, "val_acc5": 92.95081981387946, "val_uar": 0.4781771958768998, "val_war": 0.6114754098360655, "val_weighted_f1": 0.6024713476377066, "val_micro_f1": 0.6114754098360655, "val_macro_f1": 0.4762855650901348, "epoch": 59, "n_parameters": 521309229}
61
+ {"train_lr": 2.0953027489168203e-05, "train_min_lr": 1.1812709498748855e-07, "train_loss": 1.614021887676944, "train_loss_scale": 5312.633663366337, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9975876642925905, "val_loss": 1.2829455061286104, "val_acc1": 60.819672738268075, "val_acc5": 92.18579290004377, "val_uar": 0.486078099329397, "val_war": 0.6081967213114754, "val_weighted_f1": 0.6030462862003282, "val_micro_f1": 0.6081967213114754, "val_macro_f1": 0.484605698970443, "epoch": 60, "n_parameters": 521309229}
62
+ {"train_lr": 2.006395969237425e-05, "train_min_lr": 1.1311478847777361e-07, "train_loss": 1.603592096972387, "train_loss_scale": 5637.069306930693, "train_weight_decay": 0.0499999999999999, "train_grad_norm": Infinity, "val_loss": 1.288406733788696, "val_acc1": 60.327869406423936, "val_acc5": 92.5683064174131, "val_uar": 0.4883545335182888, "val_war": 0.6032786885245902, "val_weighted_f1": 0.595861597307244, "val_micro_f1": 0.6032786885245902, "val_macro_f1": 0.48303997738496224, "epoch": 61, "n_parameters": 521309229}
63
+ {"train_lr": 1.918386029617857e-05, "train_min_lr": 1.0815304321081477e-07, "train_loss": 1.613232543661256, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.998820160875226, "val_loss": 1.2820479744205289, "val_acc1": 60.601093457956786, "val_acc5": 92.89617473831593, "val_uar": 0.4828994617226186, "val_war": 0.6060109289617487, "val_weighted_f1": 0.6006310486097088, "val_micro_f1": 0.6060109289617487, "val_macro_f1": 0.48168792937144483, "epoch": 62, "n_parameters": 521309229}
64
+ {"train_lr": 1.831369167634938e-05, "train_min_lr": 1.0324728478221384e-07, "train_loss": 1.5916021217410714, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.986254328548318, "val_loss": 1.2609468271919326, "val_acc1": 60.491803571565555, "val_acc5": 92.89617484254264, "val_uar": 0.48049990466334497, "val_war": 0.6049180327868853, "val_weighted_f1": 0.6024263277327393, "val_micro_f1": 0.6049180327868853, "val_macro_f1": 0.4820041670123033, "epoch": 63, "n_parameters": 521309229}
65
+ {"train_lr": 1.74544053494968e-05, "train_min_lr": 9.840287756678695e-08, "train_loss": 1.6044391260878874, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.930753509597023, "val_loss": 1.2861415019222335, "val_acc1": 61.4207653306221, "val_acc5": 91.63934434567645, "val_uar": 0.4843661068169045, "val_war": 0.614207650273224, "val_weighted_f1": 0.6053611968693566, "val_micro_f1": 0.614207650273224, "val_macro_f1": 0.48108354824502314, "epoch": 64, "n_parameters": 521309229}
66
+ {"train_lr": 1.6606940932603314e-05, "train_min_lr": 9.36251188526991e-08, "train_loss": 1.6064305687108056, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.876652259637814, "val_loss": 1.2825187316127853, "val_acc1": 60.43715885245735, "val_acc5": 92.67759577016362, "val_uar": 0.4754147834138775, "val_war": 0.6043715846994535, "val_weighted_f1": 0.5993651959767269, "val_micro_f1": 0.6043715846994535, "val_macro_f1": 0.47736876976131737, "epoch": 65, "n_parameters": 521309229}
67
+ {"train_lr": 1.5772225115566454e-05, "train_min_lr": 8.891923304895814e-08, "train_loss": 1.627727469005207, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.96729575761474, "val_loss": 1.2716021201774186, "val_acc1": 60.71038279090423, "val_acc5": 92.0218578943138, "val_uar": 0.4825119924399439, "val_war": 0.607103825136612, "val_weighted_f1": 0.6010995169634685, "val_micro_f1": 0.607103825136612, "val_macro_f1": 0.4803440908752758, "epoch": 66, "n_parameters": 521309229}
68
+ {"train_lr": 1.4951170647876973e-05, "train_min_lr": 8.429036597259903e-08, "train_loss": 1.6010109225515485, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.98208750356542, "val_loss": 1.2786293777765012, "val_acc1": 60.8743173098955, "val_acc5": 92.13114769732366, "val_uar": 0.47717092374172726, "val_war": 0.6087431693989072, "val_weighted_f1": 0.6002955641646839, "val_micro_f1": 0.6087431693989072, "val_macro_f1": 0.4748804468101031, "epoch": 67, "n_parameters": 521309229}
69
+ {"train_lr": 1.4144675340540693e-05, "train_min_lr": 7.974357922180958e-08, "train_loss": 1.6150833196175767, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.007660943682831, "val_loss": 1.2916383664397633, "val_acc1": 60.983607053496144, "val_acc5": 92.84153001295412, "val_uar": 0.4792063727626668, "val_war": 0.6098360655737705, "val_weighted_f1": 0.6017700325264338, "val_micro_f1": 0.6098360655737705, "val_macro_f1": 0.48011550886105414, "epoch": 68, "n_parameters": 521309229}
70
+ {"train_lr": 1.3353621084335341e-05, "train_min_lr": 7.528384464114652e-08, "train_loss": 1.6003932569286612, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9598784824409106, "val_loss": 1.2878040811594795, "val_acc1": 60.382514271970656, "val_acc5": 93.00546449546606, "val_uar": 0.48352888408514155, "val_war": 0.6038251366120219, "val_weighted_f1": 0.5983260981260824, "val_micro_f1": 0.6038251366120219, "val_macro_f1": 0.4826212403489641, "epoch": 69, "n_parameters": 521309229}
71
+ {"train_lr": 1.2578872885475928e-05, "train_min_lr": 7.091603888489658e-08, "train_loss": 1.5975581436070672, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.007372808928537, "val_loss": 1.2781740307223564, "val_acc1": 61.31147602894267, "val_acc5": 92.2404374252903, "val_uar": 0.48805341797877805, "val_war": 0.6131147540983607, "val_weighted_f1": 0.6075605752369037, "val_micro_f1": 0.6131147540983607, "val_macro_f1": 0.4884284027091189, "epoch": 70, "n_parameters": 521309229}
72
+ {"train_lr": 1.1821277919743169e-05, "train_min_lr": 6.664493808452678e-08, "train_loss": 1.600418604738248, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9444082987190474, "val_loss": 1.2748822114046883, "val_acc1": 61.20218635934298, "val_acc5": 92.29508213606037, "val_uar": 0.4831058036681486, "val_war": 0.6120218579234973, "val_weighted_f1": 0.6024476183639653, "val_micro_f1": 0.6120218579234973, "val_macro_f1": 0.48506675580078673, "epoch": 71, "n_parameters": 521309229}
73
+ {"train_lr": 1.1081664606109202e-05, "train_min_lr": 6.247521262605469e-08, "train_loss": 1.6177871217428654, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.856133517652455, "val_loss": 1.313387428720792, "val_acc1": 60.273224641455975, "val_acc5": 92.34972681556243, "val_uar": 0.4749817300342908, "val_war": 0.6027322404371585, "val_weighted_f1": 0.5946680202892731, "val_micro_f1": 0.6027322404371585, "val_macro_f1": 0.476058998141475, "epoch": 72, "n_parameters": 521309229}
74
+ {"train_lr": 1.0360841700873597e-05, "train_min_lr": 5.841142204304979e-08, "train_loss": 1.5839445358849202, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.048251579303552, "val_loss": 1.2829863738779927, "val_acc1": 59.89071090052037, "val_acc5": 92.56830619228342, "val_uar": 0.48048834868218704, "val_war": 0.5989071038251366, "val_weighted_f1": 0.5926722914747615, "val_micro_f1": 0.5989071038251366, "val_macro_f1": 0.477656297753474, "epoch": 73, "n_parameters": 521309229}
75
+ {"train_lr": 9.659597413300203e-06, "train_min_lr": 5.445801003085074e-08, "train_loss": 1.6052249186109788, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.006261008800847, "val_loss": 1.2858275344558792, "val_acc1": 60.98360714365224, "val_acc5": 92.07650283646714, "val_uar": 0.4906048041715452, "val_war": 0.6098360655737705, "val_weighted_f1": 0.6013045396781828, "val_micro_f1": 0.6098360655737705, "val_macro_f1": 0.4881499109853645, "epoch": 74, "n_parameters": 521309229}
76
+ {"train_lr": 8.978698543721923e-06, "train_min_lr": 5.0619299587449256e-08, "train_loss": 1.5906717731024171, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.968237071934313, "val_loss": 1.2991721749890084, "val_acc1": 60.71038286386292, "val_acc5": 91.47541013415393, "val_uar": 0.4821048559083615, "val_war": 0.607103825136612, "val_weighted_f1": 0.5994828205707309, "val_micro_f1": 0.607103825136612, "val_macro_f1": 0.4802199807857623, "epoch": 75, "n_parameters": 521309229}
77
+ {"train_lr": 8.318889645055782e-06, "train_min_lr": 4.689948828635602e-08, "train_loss": 1.5779690589054975, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.078629713247318, "val_loss": 1.2908197425159753, "val_acc1": 60.655738269826756, "val_acc5": 92.24043712303286, "val_uar": 0.48951247997405584, "val_war": 0.6065573770491803, "val_weighted_f1": 0.6013116476538899, "val_micro_f1": 0.6065573770491803, "val_macro_f1": 0.4916132424656421, "epoch": 76, "n_parameters": 521309229}
78
+ {"train_lr": 7.680892208645253e-06, "train_min_lr": 4.3302643686614975e-08, "train_loss": 1.5915376500721419, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9010748343892616, "val_loss": 1.2973519096187516, "val_acc1": 59.83606594325415, "val_acc5": 91.47541024671878, "val_uar": 0.4699899201842378, "val_war": 0.5983606557377049, "val_weighted_f1": 0.5915658886943022, "val_micro_f1": 0.5983606557377049, "val_macro_f1": 0.47001326129452803, "epoch": 77, "n_parameters": 521309229}
79
+ {"train_lr": 7.065403875320073e-06, "train_min_lr": 3.983269888498758e-08, "train_loss": 1.5823462437285054, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.97984779471218, "val_loss": 1.2785284574709685, "val_acc1": 60.87431734533258, "val_acc5": 92.51366110004362, "val_uar": 0.4824597408511757, "val_war": 0.6087431693989072, "val_weighted_f1": 0.6004374743043928, "val_micro_f1": 0.6087431693989072, "val_macro_f1": 0.48007327001784583, "epoch": 78, "n_parameters": 521309229}
80
+ {"train_lr": 6.473097672536222e-06, "train_min_lr": 3.649344821516926e-08, "train_loss": 1.592567417586204, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.96492596427993, "val_loss": 1.2680656474594976, "val_acc1": 59.94535568008006, "val_acc5": 92.3497268509995, "val_uar": 0.4694380669820508, "val_war": 0.5994535519125683, "val_weighted_f1": 0.5905407147046156, "val_micro_f1": 0.5994535519125683, "val_macro_f1": 0.46998456711143705, "epoch": 79, "n_parameters": 521309229}
81
+ {"train_lr": 5.904621278430291e-06, "train_min_lr": 3.3288543098741024e-08, "train_loss": 1.5674362394085812, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9697206185595824, "val_loss": 1.2577168853843914, "val_acc1": 60.98360683722574, "val_acc5": 92.62295109274609, "val_uar": 0.48743930470822705, "val_war": 0.6098360655737705, "val_weighted_f1": 0.6052390707422521, "val_micro_f1": 0.6098360655737705, "val_macro_f1": 0.48577672566093255, "epoch": 80, "n_parameters": 521309229}
82
+ {"train_lr": 5.3605963135929456e-06, "train_min_lr": 3.0221488052393765e-08, "train_loss": 1.570763194423304, "train_loss_scale": 7462.019801980198, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.88395890386978, "val_loss": 1.2633218768180585, "val_acc1": 60.16393464875352, "val_acc5": 92.51366140230105, "val_uar": 0.4676941417145574, "val_war": 0.601639344262295, "val_weighted_f1": 0.5957442655002861, "val_micro_f1": 0.601639344262295, "val_macro_f1": 0.46908214423841177, "epoch": 81, "n_parameters": 521309229}
83
+ {"train_lr": 4.8416176613359425e-06, "train_min_lr": 2.7295636855790628e-08, "train_loss": 1.599499089194603, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8693113374237966, "val_loss": 1.278671715189429, "val_acc1": 60.81967251053273, "val_acc5": 92.78688530635313, "val_uar": 0.47372157266437676, "val_war": 0.6081967213114754, "val_weighted_f1": 0.6035628527575319, "val_micro_f1": 0.6081967213114754, "val_macro_f1": 0.47462754952741154, "epoch": 82, "n_parameters": 521309229}
84
+ {"train_lr": 4.3482528171959625e-06, "train_min_lr": 2.4514188884258143e-08, "train_loss": 1.5781233154311038, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9876535623380454, "val_loss": 1.2634020967810762, "val_acc1": 61.36612061359843, "val_acc5": 92.34972692812727, "val_uar": 0.49034007057197104, "val_war": 0.6136612021857923, "val_weighted_f1": 0.6070324273937949, "val_micro_f1": 0.6136612021857923, "val_macro_f1": 0.4906113028843252, "epoch": 83, "n_parameters": 521309229}
85
+ {"train_lr": 3.88104126838656e-06, "train_min_lr": 2.188018561031641e-08, "train_loss": 1.59383491793088, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.961980666264449, "val_loss": 1.2773804255560333, "val_acc1": 60.27322452889114, "val_acc5": 92.18579267282955, "val_uar": 0.4771639275722274, "val_war": 0.6027322404371585, "val_weighted_f1": 0.5932484803680037, "val_micro_f1": 0.6027322404371585, "val_macro_f1": 0.4746776271501456, "epoch": 84, "n_parameters": 521309229}
86
+ {"train_lr": 3.4404939038768243e-06, "train_min_lr": 1.9396507277873433e-08, "train_loss": 1.5879469407667028, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.990523128226252, "val_loss": 1.273301915210836, "val_acc1": 60.4371591005169, "val_acc5": 92.29508209853876, "val_uar": 0.4728119865299076, "val_war": 0.6043715846994535, "val_weighted_f1": 0.5990317188665987, "val_micro_f1": 0.6043715846994535, "val_macro_f1": 0.47604500891284335, "epoch": 85, "n_parameters": 521309229}
87
+ {"train_lr": 3.027092455741798e-06, "train_min_lr": 1.7065869752720735e-08, "train_loss": 1.582678915545492, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.990185201758205, "val_loss": 1.269066426391695, "val_acc1": 60.81967255587135, "val_acc5": 92.40437194115478, "val_uar": 0.4772745645707735, "val_war": 0.6081967213114754, "val_weighted_f1": 0.5991351701997311, "val_micro_f1": 0.6081967213114754, "val_macro_f1": 0.4758977361468869, "epoch": 86, "n_parameters": 521309229}
88
+ {"train_lr": 2.641288972395518e-06, "train_min_lr": 1.4890821552773996e-08, "train_loss": 1.5779479481402796, "train_loss_scale": 7664.792079207921, "train_weight_decay": 0.0499999999999999, "train_grad_norm": Infinity, "val_loss": 1.2544230056159638, "val_acc1": 60.983606809084534, "val_acc5": 93.1147540337401, "val_uar": 0.4804510306533013, "val_war": 0.6098360655737705, "val_weighted_f1": 0.6041086317667865, "val_micro_f1": 0.6098360655737705, "val_macro_f1": 0.4829557785423314, "epoch": 87, "n_parameters": 521309229}
89
+ {"train_lr": 2.2835053242827143e-06, "train_min_lr": 1.2873741061306107e-08, "train_loss": 1.5521520988972668, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9124350358944127, "val_loss": 1.2702556883587557, "val_acc1": 60.54644840740767, "val_acc5": 92.62295105313994, "val_uar": 0.4828537935132437, "val_war": 0.6054644808743169, "val_weighted_f1": 0.6007386237888207, "val_micro_f1": 0.6054644808743169, "val_macro_f1": 0.4826990474891887, "epoch": 88, "n_parameters": 521309229}
90
+ {"train_lr": 1.954132742569673e-06, "train_min_lr": 1.1016833926220049e-08, "train_loss": 1.6193033759743467, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9285644894779317, "val_loss": 1.292723825749229, "val_acc1": 61.311475718347104, "val_acc5": 92.18579278539438, "val_uar": 0.48063768456476275, "val_war": 0.6131147540983607, "val_weighted_f1": 0.6024824934168326, "val_micro_f1": 0.6131147540983607, "val_macro_f1": 0.4792682298625456, "epoch": 89, "n_parameters": 521309229}
91
+ {"train_lr": 1.6535313913386933e-06, "train_min_lr": 9.322130648205152e-09, "train_loss": 1.5759701480959902, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9603578241744843, "val_loss": 1.2607293611063677, "val_acc1": 61.36612092679967, "val_acc5": 92.34972681556243, "val_uar": 0.48799559844350604, "val_war": 0.6131147540983607, "val_weighted_f1": 0.6065514770979934, "val_micro_f1": 0.6131147540983607, "val_macro_f1": 0.4856363505122578, "epoch": 90, "n_parameters": 521309229}
92
+ {"train_lr": 1.3820299737539636e-06, "train_min_lr": 7.791484360414586e-09, "train_loss": 1.5860725090448613, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.000150609724592, "val_loss": 1.273342407801572, "val_acc1": 60.163934599245835, "val_acc5": 93.00546445377537, "val_uar": 0.46861557742397597, "val_war": 0.601639344262295, "val_weighted_f1": 0.5940822761431741, "val_micro_f1": 0.601639344262295, "val_macro_f1": 0.46966151037994874, "epoch": 91, "n_parameters": 521309229}
93
+ {"train_lr": 1.139925372629472e-06, "train_min_lr": 6.426568802091309e-09, "train_loss": 1.560313487308647, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9267343861041684, "val_loss": 1.2815183383577011, "val_acc1": 61.42076573137377, "val_acc5": 92.07650291151036, "val_uar": 0.4784875487634808, "val_war": 0.614207650273224, "val_weighted_f1": 0.6063045100494109, "val_micro_f1": 0.614207650273224, "val_macro_f1": 0.47932307707803246, "epoch": 92, "n_parameters": 521309229}
94
+ {"train_lr": 9.274823257919996e-07, "train_min_lr": 5.22887648835886e-09, "train_loss": 1.564683206207288, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9740136849998247, "val_loss": 1.2624485092420203, "val_acc1": 60.43715882587954, "val_acc5": 92.34972692604273, "val_uar": 0.47616103034350515, "val_war": 0.6043715846994535, "val_weighted_f1": 0.5993418213819048, "val_micro_f1": 0.6043715846994535, "val_macro_f1": 0.47807731912299356, "epoch": 93, "n_parameters": 521309229}
95
+ {"train_lr": 7.449331365942088e-07, "train_min_lr": 4.1997170781779625e-09, "train_loss": 1.5811712497925208, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.968408985893325, "val_loss": 1.285093213413276, "val_acc1": 60.60109328806726, "val_acc5": 91.69398928366073, "val_uar": 0.47864015111201286, "val_war": 0.6060109289617487, "val_weighted_f1": 0.5993675805788443, "val_micro_f1": 0.6060109289617487, "val_macro_f1": 0.47847349020432234, "epoch": 94, "n_parameters": 521309229}
96
+ {"train_lr": 5.924774198943271e-07, "train_min_lr": 3.340215942253686e-09, "train_loss": 1.5740775435474446, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9504562887814965, "val_loss": 1.2695201021783493, "val_acc1": 60.437158746667244, "val_acc5": 92.73224059141398, "val_uar": 0.46983817966547164, "val_war": 0.6043715846994535, "val_weighted_f1": 0.5966492633780238, "val_micro_f1": 0.6043715846994535, "val_macro_f1": 0.47041096522996917, "epoch": 95, "n_parameters": 521309229}
97
+ {"train_lr": 4.7028188378023254e-07, "train_min_lr": 2.6513129324590976e-09, "train_loss": 1.5815448203299305, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.878306733499659, "val_loss": 1.2730876102167017, "val_acc1": 60.54644850016943, "val_acc5": 92.45901650392292, "val_uar": 0.48077159689556864, "val_war": 0.6060109289617487, "val_weighted_f1": 0.601497916485124, "val_micro_f1": 0.6060109289617487, "val_macro_f1": 0.47974686609182127, "epoch": 96, "n_parameters": 521309229}
98
+ {"train_lr": 3.7848014727660956e-07, "train_min_lr": 2.1337613541210327e-09, "train_loss": 1.5839505657897923, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9580493752319033, "val_loss": 1.2773845710006415, "val_acc1": 60.382514369943756, "val_acc5": 92.2950821381449, "val_uar": 0.4737871159377967, "val_war": 0.6038251366120219, "val_weighted_f1": 0.5951341121399828, "val_micro_f1": 0.6038251366120219, "val_macro_f1": 0.4722525508172745, "epoch": 97, "n_parameters": 521309229}
99
+ {"train_lr": 3.171725942345054e-07, "train_min_lr": 1.7881271422917885e-09, "train_loss": 1.575606608744895, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9738029725480786, "val_loss": 1.2609659211308348, "val_acc1": 60.60109328806726, "val_acc5": 92.67759573055747, "val_uar": 0.4821856449712659, "val_war": 0.6060109289617487, "val_weighted_f1": 0.6008210668386866, "val_micro_f1": 0.6060109289617487, "val_macro_f1": 0.4815089072672193, "epoch": 98, "n_parameters": 521309229}
100
+ {"train_lr": 2.8642626356306314e-07, "train_min_lr": 1.6147882429074497e-09, "train_loss": 1.5832457799132507, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.87138027719932, "val_loss": 1.2771498917364608, "val_acc1": 61.09289674811024, "val_acc5": 92.13114758892789, "val_uar": 0.48575298985649723, "val_war": 0.6109289617486339, "val_weighted_f1": 0.6028302128939004, "val_micro_f1": 0.6109289617486339, "val_macro_f1": 0.48553352216451207, "epoch": 99, "n_parameters": 521309229}
101
+ Evaluation on the test set using best epoch model:
102
+ {"Final Top-1 (best epoch)": 65.24356869184456, "Final Top-5 (best epoch)": 93.70552818828682}
103
+ Final UAR: 49.83%, Final WAR: 65.24%
104
+ Final Confusion Matrix:
105
+ [[204 24 0 3 13 9 12 0 12 1 0]
106
+ [ 16 73 2 6 8 6 7 2 6 2 0]
107
+ [ 3 1 67 1 1 19 32 0 1 0 0]
108
+ [ 4 0 0 217 19 2 0 0 0 0 0]
109
+ [ 12 5 1 15 166 10 9 2 7 0 1]
110
+ [ 18 8 4 7 3 239 4 1 9 0 1]
111
+ [ 8 7 3 10 6 12 162 1 4 0 1]
112
+ [ 2 5 0 19 20 0 0 1 0 0 0]
113
+ [ 24 28 4 3 9 30 20 0 59 6 0]
114
+ [ 0 5 0 13 9 7 4 2 8 1 3]
115
+ [ 2 7 0 1 3 7 1 0 8 4 3]]
116
+ Final Class Accuracies: ['73.38%', '57.03%', '53.60%', '89.67%', '72.81%', '81.29%', '75.70%', '2.13%', '32.24%', '1.92%', '8.33%']
117
+ Final Weighted F1: 0.6278, Final Micro F1: 0.6524, Final Macro F1: 0.4914
logs/AVF-MAE++_huge-MAFW (11-class)/eval_split04/log.txt ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_lr": 5.580357142857142e-06, "train_min_lr": 3.1460435902122946e-08, "train_loss": 1.7559320664051736, "train_loss_scale": 8597.544554455446, "train_weight_decay": 0.0499999999999999, "train_grad_norm": NaN, "val_loss": 1.1889529587591396, "val_acc1": 62.67759585823518, "val_acc5": 95.02732244189319, "val_uar": 0.5061489083365004, "val_war": 0.6267759562841531, "val_weighted_f1": 0.6157512326311971, "val_micro_f1": 0.6267759562841531, "val_macro_f1": 0.5040391680706227, "epoch": 0, "n_parameters": 521309229}
2
+ {"train_lr": 1.6852678571428577e-05, "train_min_lr": 9.501051642441133e-08, "train_loss": 1.722956497972161, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.4534769601160935, "val_loss": 1.1586195551881604, "val_acc1": 63.82513680692579, "val_acc5": 94.20765009186958, "val_uar": 0.5070649914331976, "val_war": 0.6382513661202186, "val_weighted_f1": 0.6193473703031384, "val_micro_f1": 0.6382513661202186, "val_macro_f1": 0.5086333927731851, "epoch": 1, "n_parameters": 521309229}
3
+ {"train_lr": 2.8125000000000006e-05, "train_min_lr": 1.585605969466997e-07, "train_loss": 1.7457209018000674, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.3241234203376395, "val_loss": 1.1692189054161894, "val_acc1": 64.31694009812152, "val_acc5": 94.86338811572132, "val_uar": 0.516078362299048, "val_war": 0.6431693989071038, "val_weighted_f1": 0.6253084945592507, "val_micro_f1": 0.6431693989071038, "val_macro_f1": 0.5161785216688803, "epoch": 2, "n_parameters": 521309229}
4
+ {"train_lr": 3.939732142857144e-05, "train_min_lr": 2.2211067746898805e-07, "train_loss": 1.7445343879386537, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.242052647146848, "val_loss": 1.1240462155318727, "val_acc1": 64.8633884403875, "val_acc5": 94.20765054212893, "val_uar": 0.5120015664244529, "val_war": 0.6486338797814207, "val_weighted_f1": 0.623827197717058, "val_micro_f1": 0.6486338797814207, "val_macro_f1": 0.5122959566491044, "epoch": 3, "n_parameters": 521309229}
5
+ {"train_lr": 5.0669642857142856e-05, "train_min_lr": 2.856607579912764e-07, "train_loss": 1.7380291435978201, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.2762540189346465, "val_loss": 1.1509395411201553, "val_acc1": 64.31694012209365, "val_acc5": 94.15300549366435, "val_uar": 0.526542492467497, "val_war": 0.6431693989071038, "val_weighted_f1": 0.6243428349747808, "val_micro_f1": 0.6431693989071038, "val_macro_f1": 0.509019621955865, "epoch": 4, "n_parameters": 521309229}
6
+ {"train_lr": 5.624497522407655e-05, "train_min_lr": 3.170928656633569e-07, "train_loss": 1.7616050960403857, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.37814204999716, "val_loss": 1.1586979870118348, "val_acc1": 64.64480900477842, "val_acc5": 94.91803279105432, "val_uar": 0.5006262626442302, "val_war": 0.646448087431694, "val_weighted_f1": 0.6299217467422384, "val_micro_f1": 0.646448087431694, "val_macro_f1": 0.5029222083513042, "epoch": 5, "n_parameters": 521309229}
7
+ {"train_lr": 5.621453170303479e-05, "train_min_lr": 3.169212339168842e-07, "train_loss": 1.7160239313105152, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.374257819487317, "val_loss": 1.1564005122465246, "val_acc1": 63.77049203153516, "val_acc5": 94.04371595955938, "val_uar": 0.5132143300996226, "val_war": 0.6377049180327868, "val_weighted_f1": 0.6204556721397395, "val_micro_f1": 0.6377049180327868, "val_macro_f1": 0.5065865390284363, "epoch": 6, "n_parameters": 521309229}
8
+ {"train_lr": 5.615352646285501e-05, "train_min_lr": 3.1657730405733625e-07, "train_loss": 1.7149483937241456, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.268280088311375, "val_loss": 1.1338739143867118, "val_acc1": 64.26229535035097, "val_acc5": 94.69945342267145, "val_uar": 0.5179652390605696, "val_war": 0.6426229508196721, "val_weighted_f1": 0.6224371081070418, "val_micro_f1": 0.6426229508196721, "val_macro_f1": 0.5075852075836709, "epoch": 7, "n_parameters": 521309229}
9
+ {"train_lr": 5.60620262118716e-05, "train_min_lr": 3.1606145216696487e-07, "train_loss": 1.7077526621299215, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.2782002770074525, "val_loss": 1.1623963576321508, "val_acc1": 64.59016416685178, "val_acc5": 94.75409816054047, "val_uar": 0.5092506109740345, "val_war": 0.6459016393442623, "val_weighted_f1": 0.6258474168348, "val_micro_f1": 0.6459016393442623, "val_macro_f1": 0.5055678385050805, "epoch": 8, "n_parameters": 521309229}
10
+ {"train_lr": 5.5940131004265686e-05, "train_min_lr": 3.1537424232223837e-07, "train_loss": 1.702571807816477, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.253167324727125, "val_loss": 1.1670214279609568, "val_acc1": 63.606557848675, "val_acc5": 95.35519146945308, "val_uar": 0.5058313598781295, "val_war": 0.6360655737704918, "val_weighted_f1": 0.6188571932111068, "val_micro_f1": 0.6360655737704918, "val_macro_f1": 0.5055453963010426, "epoch": 9, "n_parameters": 521309229}
11
+ {"train_lr": 5.57879741306571e-05, "train_min_lr": 3.1451642597703227e-07, "train_loss": 1.7121321559935907, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.221428000100769, "val_loss": 1.1394127604423785, "val_acc1": 64.37158497513317, "val_acc5": 95.19125661797862, "val_uar": 0.4917486834688072, "val_war": 0.6437158469945355, "val_weighted_f1": 0.6219357989600723, "val_micro_f1": 0.6437158469945355, "val_macro_f1": 0.490736941751602, "epoch": 10, "n_parameters": 521309229}
12
+ {"train_lr": 5.5605721972353206e-05, "train_min_lr": 3.134889411409257e-07, "train_loss": 1.7053349401691171, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.24576222778547, "val_loss": 1.1515670182073818, "val_acc1": 64.37158491520282, "val_acc5": 94.15300523101307, "val_uar": 0.49061522008166975, "val_war": 0.6437158469945355, "val_weighted_f1": 0.619636527387167, "val_micro_f1": 0.6437158469945355, "val_macro_f1": 0.4901770313080672, "epoch": 11, "n_parameters": 521309229}
13
+ {"train_lr": 5.5393573819413314e-05, "train_min_lr": 3.1229291135350213e-07, "train_loss": 1.6914995816281133, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.282404779207589, "val_loss": 1.1616083038788216, "val_acc1": 63.44262303941237, "val_acc5": 95.1366122469876, "val_uar": 0.5177720651943997, "val_war": 0.6344262295081967, "val_weighted_f1": 0.6135281541842168, "val_micro_f1": 0.6344262295081967, "val_macro_f1": 0.510014132366045, "epoch": 12, "n_parameters": 521309229}
14
+ {"train_lr": 5.5151761652727875e-05, "train_min_lr": 3.109296444557738e-07, "train_loss": 1.688453719757571, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.2793339667934, "val_loss": 1.1415013331992954, "val_acc1": 65.13661245961006, "val_acc5": 94.26229521954646, "val_uar": 0.513500910317323, "val_war": 0.6513661202185792, "val_weighted_f1": 0.6347871539381702, "val_micro_f1": 0.6513661202185792, "val_macro_f1": 0.5168589451078804, "epoch": 13, "n_parameters": 521309229}
15
+ {"train_lr": 5.4880549890350996e-05, "train_min_lr": 3.094006311600778e-07, "train_loss": 1.6915760227162453, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.266283363398939, "val_loss": 1.1632920737360037, "val_acc1": 64.64480915330147, "val_acc5": 94.80874305891861, "val_uar": 0.506458919580375, "val_war": 0.646448087431694, "val_weighted_f1": 0.6278708205303529, "val_micro_f1": 0.646448087431694, "val_macro_f1": 0.5020486353525938, "epoch": 14, "n_parameters": 521309229}
16
+ {"train_lr": 5.458023509836289e-05, "train_min_lr": 3.077075434200046e-07, "train_loss": 1.6651931735548642, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.180561044428608, "val_loss": 1.1673594944032968, "val_acc1": 64.09836094392453, "val_acc5": 94.26229510281256, "val_uar": 0.49665461471488115, "val_war": 0.6409836065573771, "val_weighted_f1": 0.6163032335349221, "val_micro_f1": 0.6409836065573771, "val_macro_f1": 0.4945098411538147, "epoch": 15, "n_parameters": 521309229}
17
+ {"train_lr": 5.425114566657945e-05, "train_min_lr": 3.058522326021405e-07, "train_loss": 1.6826864761291165, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.206670057655561, "val_loss": 1.171665359361499, "val_acc1": 63.11475416089667, "val_acc5": 94.20765027947765, "val_uar": 0.49102353789062636, "val_war": 0.6311475409836066, "val_weighted_f1": 0.611695623052317, "val_micro_f1": 0.6311475409836066, "val_macro_f1": 0.48584126430531194, "epoch": 16, "n_parameters": 521309229}
18
+ {"train_lr": 5.389364144946269e-05, "train_min_lr": 3.0383672746162495e-07, "train_loss": 1.6785975691705648, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.071134453952903, "val_loss": 1.2208479613650078, "val_acc1": 63.060109589790386, "val_acc5": 94.26229510698163, "val_uar": 0.49140920903771124, "val_war": 0.6306010928961748, "val_weighted_f1": 0.6166799798511736, "val_micro_f1": 0.6306010928961748, "val_macro_f1": 0.4899628722906542, "epoch": 17, "n_parameters": 521309229}
19
+ {"train_lr": 5.3508113372625154e-05, "train_min_lr": 3.0166323192373806e-07, "train_loss": 1.6737897318385222, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.18303909868297, "val_loss": 1.1421065295443815, "val_acc1": 63.333333397953886, "val_acc5": 94.75409831062692, "val_uar": 0.4911739575514879, "val_war": 0.6333333333333333, "val_weighted_f1": 0.6175151275832066, "val_micro_f1": 0.6333333333333333, "val_macro_f1": 0.48771328453747537, "epoch": 18, "n_parameters": 521309229}
20
+ {"train_lr": 5.309498300535867e-05, "train_min_lr": 2.993341226739393e-07, "train_loss": 1.6765941229983918, "train_loss_scale": 8435.326732673268, "train_weight_decay": 0.0499999999999999, "train_grad_norm": Infinity, "val_loss": 1.1958216782294067, "val_acc1": 63.38797830102222, "val_acc5": 93.98907098405348, "val_uar": 0.5144689015940584, "val_war": 0.6338797814207651, "val_weighted_f1": 0.622491350837422, "val_micro_f1": 0.6338797814207651, "val_macro_f1": 0.5052111993283305, "epoch": 19, "n_parameters": 521309229}
21
+ {"train_lr": 5.265470209965427e-05, "train_min_lr": 2.9685194655899794e-07, "train_loss": 1.6745965006721295, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.1672601156895706, "val_loss": 1.1644434914285062, "val_acc1": 63.71584710918489, "val_acc5": 94.42622962076156, "val_uar": 0.5077412904572818, "val_war": 0.6371584699453552, "val_weighted_f1": 0.6256033413903452, "val_micro_f1": 0.6371584699453552, "val_macro_f1": 0.5017799040371574, "epoch": 20, "n_parameters": 521309229}
22
+ {"train_lr": 5.218775209621823e-05, "train_min_lr": 2.9421941780205285e-07, "train_loss": 1.6594243226665082, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.229989365775986, "val_loss": 1.1645209388990028, "val_acc1": 64.04371601688406, "val_acc5": 94.64480880778996, "val_uar": 0.4870060959645905, "val_war": 0.6404371584699453, "val_weighted_f1": 0.6166684840437688, "val_micro_f1": 0.6404371584699453, "val_macro_f1": 0.48687326937310754, "epoch": 21, "n_parameters": 521309229}
23
+ {"train_lr": 5.1694643598023545e-05, "train_min_lr": 2.9143941503464896e-07, "train_loss": 1.6499933161751272, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.276027306471721, "val_loss": 1.191012574469342, "val_acc1": 63.278688836228, "val_acc5": 93.87978158961228, "val_uar": 0.5002093062058027, "val_war": 0.6327868852459017, "val_weighted_f1": 0.6124742567180886, "val_micro_f1": 0.6327868852459017, "val_macro_f1": 0.49919066232448744, "epoch": 22, "n_parameters": 521309229}
24
+ {"train_lr": 5.117591581197337e-05, "train_min_lr": 2.88514978148997e-07, "train_loss": 1.6285497986837583, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.181794872378359, "val_loss": 1.1709421449432187, "val_acc1": 63.00546474248334, "val_acc5": 94.31693963222817, "val_uar": 0.4972784108886429, "val_war": 0.6300546448087432, "val_weighted_f1": 0.6156073272553098, "val_micro_f1": 0.6300546448087432, "val_macro_f1": 0.49610360942619974, "epoch": 23, "n_parameters": 521309229}
25
+ {"train_lr": 5.0632135959285794e-05, "train_min_lr": 2.854493049738955e-07, "train_loss": 1.6370501412023413, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.198240657844166, "val_loss": 1.1734455438221203, "val_acc1": 64.04371615863238, "val_acc5": 94.3715847202989, "val_uar": 0.5107432508647395, "val_war": 0.6404371584699453, "val_weighted_f1": 0.6269243818173234, "val_micro_f1": 0.6404371584699453, "val_macro_f1": 0.5093580714645616, "epoch": 24, "n_parameters": 521309229}
26
+ {"train_lr": 5.0063898655246014e-05, "train_min_lr": 2.822457477779514e-07, "train_loss": 1.6538471420212548, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.241775765277372, "val_loss": 1.1831691200826682, "val_acc1": 62.677596032814904, "val_acc5": 94.09836055359554, "val_uar": 0.48130834022929936, "val_war": 0.6267759562841531, "val_weighted_f1": 0.6059551611894303, "val_micro_f1": 0.6267759562841531, "val_macro_f1": 0.47891932396159537, "epoch": 25, "n_parameters": 521309229}
27
+ {"train_lr": 4.9471825259003246e-05, "train_min_lr": 2.789078096039244e-07, "train_loss": 1.6355376942913131, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.191488681453289, "val_loss": 1.1647712304895999, "val_acc1": 65.24590229701475, "val_acc5": 93.5519126350111, "val_uar": 0.5180007136843774, "val_war": 0.6524590163934426, "val_weighted_f1": 0.632021847317083, "val_micro_f1": 0.6524590163934426, "val_macro_f1": 0.5174780700869774, "epoch": 26, "n_parameters": 521309229}
28
+ {"train_lr": 4.885656319412359e-05, "train_min_lr": 2.754391404382005e-07, "train_loss": 1.6244253683404954, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.289500484372129, "val_loss": 1.184884520430191, "val_acc1": 62.40437206779021, "val_acc5": 93.93442641294719, "val_uar": 0.4834117102704831, "val_war": 0.6240437158469946, "val_weighted_f1": 0.5970160261830982, "val_micro_f1": 0.6240437158469946, "val_macro_f1": 0.4812503407127346, "epoch": 27, "n_parameters": 521309229}
29
+ {"train_lr": 4.821878524064173e-05, "train_min_lr": 2.7184353321958644e-07, "train_loss": 1.6496203253568202, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.226252534601948, "val_loss": 1.198185933279056, "val_acc1": 62.73224094786931, "val_acc5": 93.98907086731958, "val_uar": 0.5030469534391339, "val_war": 0.6273224043715847, "val_weighted_f1": 0.6105991070108787, "val_micro_f1": 0.6273224043715847, "val_macro_f1": 0.49165204657820055, "epoch": 28, "n_parameters": 521309229}
30
+ {"train_lr": 4.7559188799386115e-05, "train_min_lr": 2.6812491969178504e-07, "train_loss": 1.623557572415953, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.207994831670629, "val_loss": 1.1710960596215492, "val_acc1": 62.896174705484526, "val_acc5": 94.37158461190312, "val_uar": 0.49536688817200897, "val_war": 0.6289617486338798, "val_weighted_f1": 0.6124447661872162, "val_micro_f1": 0.6289617486338798, "val_macro_f1": 0.4919569445980117, "epoch": 29, "n_parameters": 521309229}
31
+ {"train_lr": 4.6878495129381106e-05, "train_min_lr": 2.6428736610409306e-07, "train_loss": 1.6417094029412411, "train_loss_scale": 4744.8712871287125, "train_weight_decay": 0.0499999999999999, "train_grad_norm": NaN, "val_loss": 1.2055240156603795, "val_acc1": 63.38797826714855, "val_acc5": 93.00546445377537, "val_uar": 0.4887980223473255, "val_war": 0.6338797814207651, "val_weighted_f1": 0.614228190206114, "val_micro_f1": 0.6338797814207651, "val_macro_f1": 0.48716427371879795, "epoch": 30, "n_parameters": 521309229}
32
+ {"train_lr": 4.6177448559161015e-05, "train_min_lr": 2.603350687650165e-07, "train_loss": 1.6168324683759079, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.157914076701249, "val_loss": 1.174209758347156, "val_acc1": 63.00546489256978, "val_acc5": 94.42622939354735, "val_uar": 0.4955721013212894, "val_war": 0.6300546448087432, "val_weighted_f1": 0.616097994434496, "val_micro_f1": 0.6300546448087432, "val_macro_f1": 0.4955236010691865, "epoch": 31, "n_parameters": 521309229}
33
+ {"train_lr": 4.545681567285774e-05, "train_min_lr": 2.5627234945366804e-07, "train_loss": 1.6221515946459062, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.27422027068563, "val_loss": 1.1848889294208265, "val_acc1": 63.93442656511817, "val_acc5": 93.1147541066988, "val_uar": 0.490131165089661, "val_war": 0.639344262295082, "val_weighted_f1": 0.6162999961366353, "val_micro_f1": 0.639344262295082, "val_macro_f1": 0.4894323634134121, "epoch": 32, "n_parameters": 521309229}
34
+ {"train_lr": 4.47173844719522e-05, "train_min_lr": 2.5210365069396683e-07, "train_loss": 1.6151603261629741, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.173089999963741, "val_loss": 1.1834792009755677, "val_acc1": 63.825137044562666, "val_acc5": 94.15300538109952, "val_uar": 0.4984239259575914, "val_war": 0.6382513661202186, "val_weighted_f1": 0.6221836941428454, "val_micro_f1": 0.6382513661202186, "val_macro_f1": 0.4986658050252985, "epoch": 33, "n_parameters": 521309229}
35
+ {"train_lr": 4.395996351360636e-05, "train_min_lr": 2.478335308968022e-07, "train_loss": 1.6108138050773355, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.180067661965247, "val_loss": 1.2158120064174427, "val_acc1": 63.606557840858, "val_acc5": 93.9890707526702, "val_uar": 0.4964459588099212, "val_war": 0.6360655737704918, "val_weighted_f1": 0.6109686820366507, "val_micro_f1": 0.6360655737704918, "val_macro_f1": 0.49624384352842243, "epoch": 34, "n_parameters": 521309229}
36
+ {"train_lr": 4.318538102651787e-05, "train_min_lr": 2.434666593754791e-07, "train_loss": 1.6059092305871125, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.190589607352077, "val_loss": 1.199692680847411, "val_acc1": 62.459016853082375, "val_acc5": 94.20765005226343, "val_uar": 0.48755041383611786, "val_war": 0.6245901639344262, "val_weighted_f1": 0.599636138987233, "val_micro_f1": 0.6245901639344262, "val_macro_f1": 0.4797311908932047, "epoch": 35, "n_parameters": 521309229}
37
+ {"train_lr": 4.2394484005264044e-05, "train_min_lr": 2.390078112398925e-07, "train_loss": 1.6101756863074728, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.23281722021575, "val_loss": 1.2175143992199617, "val_acc1": 62.349726775956285, "val_acc5": 93.7704915281202, "val_uar": 0.4807078369440325, "val_war": 0.6234972677595628, "val_weighted_f1": 0.6044189158665126, "val_micro_f1": 0.6234972677595628, "val_macro_f1": 0.48247989750814163, "epoch": 36, "n_parameters": 521309229}
38
+ {"train_lr": 4.158813728412575e-05, "train_min_lr": 2.3446186217501424e-07, "train_loss": 1.6018125715822276, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.213456191638909, "val_loss": 1.1760121744637395, "val_acc1": 63.06010929534995, "val_acc5": 94.20764994803673, "val_uar": 0.508602843314666, "val_war": 0.6306010928961748, "val_weighted_f1": 0.616552674938224, "val_micro_f1": 0.6306010928961748, "val_macro_f1": 0.5084968335967003, "epoch": 37, "n_parameters": 521309229}
39
+ {"train_lr": 4.0767222591403925e-05, "train_min_lr": 2.2983378310940412e-07, "train_loss": 1.6060426376833774, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.164150875393707, "val_loss": 1.1756736793938805, "val_acc1": 64.37158518723452, "val_acc5": 93.98907079227635, "val_uar": 0.5166153380936028, "val_war": 0.6437158469945355, "val_weighted_f1": 0.6330381015541333, "val_micro_f1": 0.6437158469945355, "val_macro_f1": 0.5209459105679476, "epoch": 38, "n_parameters": 521309229}
40
+ {"train_lr": 3.993263758526252e-05, "train_min_lr": 2.251286347795713e-07, "train_loss": 1.593926758262584, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.177579735765363, "val_loss": 1.1792752137955498, "val_acc1": 64.48087472759309, "val_acc5": 93.87978140408876, "val_uar": 0.5103251225233345, "val_war": 0.644808743169399, "val_weighted_f1": 0.628253553492932, "val_micro_f1": 0.644808743169399, "val_macro_f1": 0.5107220341662341, "epoch": 39, "n_parameters": 521309229}
41
+ {"train_lr": 3.9085294872152475e-05, "train_min_lr": 2.203515621961343e-07, "train_loss": 1.5956436707438415, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.329126176267567, "val_loss": 1.1835701173427058, "val_acc1": 64.04371631080336, "val_acc5": 94.31693974270847, "val_uar": 0.503311119805817, "val_war": 0.6404371584699453, "val_weighted_f1": 0.6261740142599206, "val_micro_f1": 0.6404371584699453, "val_macro_f1": 0.5049140736112709, "epoch": 40, "n_parameters": 521309229}
42
+ {"train_lr": 3.822612100889004e-05, "train_min_lr": 2.1550778901782692e-07, "train_loss": 1.5982704770447005, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.180764368264982, "val_loss": 1.203519345790732, "val_acc1": 63.06010930942056, "val_acc5": 93.82513649528795, "val_uar": 0.5083385021926017, "val_war": 0.6306010928961748, "val_weighted_f1": 0.6168126222408697, "val_micro_f1": 0.6306010928961748, "val_macro_f1": 0.5109286556061307, "epoch": 41, "n_parameters": 521309229}
43
+ {"train_lr": 3.73560554894804e-05, "train_min_lr": 2.1060261183950398e-07, "train_loss": 1.5915228271051602, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.189342623890036, "val_loss": 1.2073613069221085, "val_acc1": 61.912568469125716, "val_acc5": 93.27868847456135, "val_uar": 0.49165970598358244, "val_war": 0.6191256830601093, "val_weighted_f1": 0.5996202958879989, "val_micro_f1": 0.6191256830601093, "val_macro_f1": 0.4838257267453936, "epoch": 42, "n_parameters": 521309229}
44
+ {"train_lr": 3.647604971779486e-05, "train_min_lr": 2.056413944003928e-07, "train_loss": 1.5930187576281356, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.1694619348733735, "val_loss": 1.261495911023196, "val_acc1": 60.43715885037282, "val_acc5": 92.29508184005654, "val_uar": 0.472930902542236, "val_war": 0.6043715846994535, "val_weighted_f1": 0.5742378203492652, "val_micro_f1": 0.6043715846994535, "val_macro_f1": 0.46427543305549307, "epoch": 43, "n_parameters": 521309229}
45
+ {"train_lr": 3.558706596722466e-05, "train_min_lr": 2.006295617189234e-07, "train_loss": 1.565823487126001, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.09110954492399, "val_loss": 1.1765039288530164, "val_acc1": 63.98907127901504, "val_acc5": 93.49726788671941, "val_uar": 0.5199777337698747, "val_war": 0.6398907103825137, "val_weighted_f1": 0.6283187858449863, "val_micro_f1": 0.6398907103825137, "val_macro_f1": 0.5182752049587038, "epoch": 44, "n_parameters": 521309229}
46
+ {"train_lr": 3.469007632844911e-05, "train_min_lr": 1.9557259416054976e-07, "train_loss": 1.5866007294395181, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.312940293019361, "val_loss": 1.2086996719533323, "val_acc1": 62.896175124997, "val_acc5": 93.71584714670651, "val_uar": 0.4832410860542599, "val_war": 0.6289617486338798, "val_weighted_f1": 0.6101822086711677, "val_micro_f1": 0.6289617486338798, "val_macro_f1": 0.4853831297606062, "epoch": 45, "n_parameters": 521309229}
47
+ {"train_lr": 3.378606164646873e-05, "train_min_lr": 1.9047602144505153e-07, "train_loss": 1.5691035564976556, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.242805622591831, "val_loss": 1.198428681960293, "val_acc1": 62.56830625481944, "val_acc5": 93.60655742290893, "val_uar": 0.4875473916252203, "val_war": 0.6256830601092896, "val_weighted_f1": 0.612146861678895, "val_micro_f1": 0.6256830601092896, "val_macro_f1": 0.48337106446396805, "epoch": 46, "n_parameters": 521309229}
48
+ {"train_lr": 3.28760104480657e-05, "train_min_lr": 1.85345416599866e-07, "train_loss": 1.5726212684077399, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.0879688758661255, "val_loss": 1.2086783483916639, "val_acc1": 62.56830614746594, "val_acc5": 93.66120225249743, "val_uar": 0.49230892641892826, "val_war": 0.6256830601092896, "val_weighted_f1": 0.6119287526570661, "val_micro_f1": 0.6256830601092896, "val_macro_f1": 0.495994650905857, "epoch": 47, "n_parameters": 521309229}
49
+ {"train_lr": 3.1960917860864196e-05, "train_min_lr": 1.8018638986606443e-07, "train_loss": 1.5399573527940429, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.080031479939376, "val_loss": 1.1805134246162339, "val_acc1": 64.04371599186965, "val_acc5": 93.87978128735485, "val_uar": 0.5012457062790308, "val_war": 0.6404371584699453, "val_weighted_f1": 0.6223668206731999, "val_micro_f1": 0.6404371584699453, "val_macro_f1": 0.5025872163762339, "epoch": 48, "n_parameters": 521309229}
50
+ {"train_lr": 3.104178452517305e-05, "train_min_lr": 1.750045825636361e-07, "train_loss": 1.5734995669460927, "train_loss_scale": 4258.217821782178, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.065998183618678, "val_loss": 1.2077203352077335, "val_acc1": 62.950819907683496, "val_acc5": 93.93442599604039, "val_uar": 0.49268526292450315, "val_war": 0.6295081967213115, "val_weighted_f1": 0.614633093744219, "val_micro_f1": 0.6295081967213115, "val_macro_f1": 0.49406911414497173, "epoch": 49, "n_parameters": 521309229}
51
+ {"train_lr": 3.011961549980036e-05, "train_min_lr": 1.698056609227879e-07, "train_loss": 1.5472727106152588, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.148596513389361, "val_loss": 1.2296593352860095, "val_acc1": 61.85792374636958, "val_acc5": 93.6065575375583, "val_uar": 0.4874685372278788, "val_war": 0.6185792349726776, "val_weighted_f1": 0.6009939133126593, "val_micro_f1": 0.6185792349726776, "val_macro_f1": 0.48454456459879724, "epoch": 50, "n_parameters": 521309229}
52
+ {"train_lr": 2.919541916303608e-05, "train_min_lr": 1.6459530988800415e-07, "train_loss": 1.56141929195659, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.126638983735944, "val_loss": 1.1871724213455237, "val_acc1": 62.677596040631904, "val_acc5": 93.66120217745421, "val_uar": 0.4928862232551686, "val_war": 0.6267759562841531, "val_weighted_f1": 0.6089219386549177, "val_micro_f1": 0.6267759562841531, "val_macro_f1": 0.4885593949594596, "epoch": 51, "n_parameters": 521309229}
53
+ {"train_lr": 2.8270206110005638e-05, "train_min_lr": 1.593792269016439e-07, "train_loss": 1.571902870148322, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.107272811455302, "val_loss": 1.2205773270597644, "val_acc1": 63.87978210344992, "val_acc5": 93.87978136448261, "val_uar": 0.5109105245934716, "val_war": 0.6387978142076502, "val_weighted_f1": 0.6237529625003214, "val_micro_f1": 0.6387978142076502, "val_macro_f1": 0.5093217994775971, "epoch": 52, "n_parameters": 521309229}
54
+ {"train_lr": 2.7344988047598695e-05, "train_min_lr": 1.541631156738708e-07, "train_loss": 1.55233170913391, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.1340192733424725, "val_loss": 1.192721645913872, "val_acc1": 62.95081965337034, "val_acc5": 94.20765005226343, "val_uar": 0.4911452102098656, "val_war": 0.6295081967213115, "val_weighted_f1": 0.6098797639867239, "val_micro_f1": 0.6295081967213115, "val_macro_f1": 0.4869856663199897, "epoch": 53, "n_parameters": 521309229}
55
+ {"train_lr": 2.6420776688182537e-05, "train_min_lr": 1.4895267994573047e-07, "train_loss": 1.5549141698741282, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.054456316598571, "val_loss": 1.189897164994595, "val_acc1": 63.005464882147116, "val_acc5": 94.48087406888034, "val_uar": 0.497183078174888, "val_war": 0.6300546448087432, "val_weighted_f1": 0.6167275798527491, "val_micro_f1": 0.6300546448087432, "val_macro_f1": 0.49645336753193037, "epoch": 54, "n_parameters": 521309229}
56
+ {"train_lr": 2.549858264330953e-05, "train_min_lr": 1.4375361725219248e-07, "train_loss": 1.5746408962573941, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.05757205321057, "val_loss": 1.204094776628064, "val_acc1": 64.04371626911268, "val_acc5": 94.04371583657186, "val_uar": 0.5082919129860487, "val_war": 0.6404371584699453, "val_weighted_f1": 0.623392028823687, "val_micro_f1": 0.6404371584699453, "val_macro_f1": 0.5094053220512773, "epoch": 55, "n_parameters": 521309229}
57
+ {"train_lr": 2.4579414318628305e-05, "train_min_lr": 1.3857161269198084e-07, "train_loss": 1.560842884944217, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.147138708888894, "val_loss": 1.2081995945350796, "val_acc1": 63.44262328330285, "val_acc5": 93.49726735933231, "val_uar": 0.504334399996843, "val_war": 0.6349726775956284, "val_weighted_f1": 0.6163120299556947, "val_micro_f1": 0.6349726775956284, "val_macro_f1": 0.50236120122089, "epoch": 56, "n_parameters": 521309229}
58
+ {"train_lr": 2.3664276811206737e-05, "train_min_lr": 1.3341233271100021e-07, "train_loss": 1.5477774356654768, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.151355405845265, "val_loss": 1.193540192117878, "val_acc1": 62.78688553252507, "val_acc5": 94.09836055568007, "val_uar": 0.5073642917765454, "val_war": 0.6278688524590164, "val_weighted_f1": 0.6200884340549733, "val_micro_f1": 0.6278688524590164, "val_macro_f1": 0.5044439042798355, "epoch": 57, "n_parameters": 521309229}
59
+ {"train_lr": 2.2754170810473282e-05, "train_min_lr": 1.282814189061623e-07, "train_loss": 1.547573801216119, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.1601473128441535, "val_loss": 1.2033225905661489, "val_acc1": 64.1530057511043, "val_acc5": 93.33333307485111, "val_uar": 0.5050449326774359, "val_war": 0.6415300546448087, "val_weighted_f1": 0.6256996854356652, "val_micro_f1": 0.6415300546448087, "val_macro_f1": 0.5028877645310992, "epoch": 58, "n_parameters": 521309229}
60
+ {"train_lr": 2.1850091503977748e-05, "train_min_lr": 1.2318448185638122e-07, "train_loss": 1.5623788168721466, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.1615022550715075, "val_loss": 1.1741273753783281, "val_acc1": 64.75409876505533, "val_acc5": 94.53551897142754, "val_uar": 0.5195132898038881, "val_war": 0.6475409836065574, "val_weighted_f1": 0.6335451113658694, "val_micro_f1": 0.6475409836065574, "val_macro_f1": 0.5216929234143209, "epoch": 59, "n_parameters": 521309229}
61
+ {"train_lr": 2.0953027489168203e-05, "train_min_lr": 1.1812709498748855e-07, "train_loss": 1.5527797320691665, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.039641748560538, "val_loss": 1.191617089451528, "val_acc1": 62.84153035585997, "val_acc5": 93.98907116540794, "val_uar": 0.49727951938277, "val_war": 0.6284153005464481, "val_weighted_f1": 0.6109211900023619, "val_micro_f1": 0.6284153005464481, "val_macro_f1": 0.4932890556801984, "epoch": 60, "n_parameters": 521309229}
62
+ {"train_lr": 2.006395969237425e-05, "train_min_lr": 1.1311478847777361e-07, "train_loss": 1.5266082320276266, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.014644072787596, "val_loss": 1.1731192834821402, "val_acc1": 63.825136769404175, "val_acc5": 94.75409830645785, "val_uar": 0.5068094042590786, "val_war": 0.6382513661202186, "val_weighted_f1": 0.6258188391114283, "val_micro_f1": 0.6382513661202186, "val_macro_f1": 0.5054998392763006, "epoch": 61, "n_parameters": 521309229}
63
+ {"train_lr": 1.918386029617857e-05, "train_min_lr": 1.0815304321081477e-07, "train_loss": 1.5532547834486063, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.287060593614484, "val_loss": 1.185653543647598, "val_acc1": 64.91803307976228, "val_acc5": 94.09836048063684, "val_uar": 0.5277564025587768, "val_war": 0.6491803278688525, "val_weighted_f1": 0.6387693106593432, "val_micro_f1": 0.6491803278688525, "val_macro_f1": 0.5283602132942918, "epoch": 62, "n_parameters": 521309229}
64
+ {"train_lr": 1.831369167634938e-05, "train_min_lr": 1.0324728478221384e-07, "train_loss": 1.5154787393489686, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.024376543441622, "val_loss": 1.2238127121738358, "val_acc1": 61.967213224192136, "val_acc5": 93.71584707791688, "val_uar": 0.5012505165693871, "val_war": 0.6196721311475409, "val_weighted_f1": 0.606523070477968, "val_micro_f1": 0.6196721311475409, "val_macro_f1": 0.49132439828398355, "epoch": 63, "n_parameters": 521309229}
65
+ {"train_lr": 1.74544053494968e-05, "train_min_lr": 9.840287756678695e-08, "train_loss": 1.5451680793620572, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.074304462659477, "val_loss": 1.203712778932908, "val_acc1": 62.67759573159974, "val_acc5": 93.66120190854933, "val_uar": 0.49613340735981015, "val_war": 0.6267759562841531, "val_weighted_f1": 0.6145457825586509, "val_micro_f1": 0.6267759562841531, "val_macro_f1": 0.4962201371197315, "epoch": 64, "n_parameters": 521309229}
66
+ {"train_lr": 1.6606940932603314e-05, "train_min_lr": 9.36251188526991e-08, "train_loss": 1.530171095242988, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.070512415158866, "val_loss": 1.1882517106977164, "val_acc1": 63.169399131191234, "val_acc5": 93.93442618990206, "val_uar": 0.5146718097218798, "val_war": 0.6316939890710382, "val_weighted_f1": 0.6194006905664913, "val_micro_f1": 0.6316939890710382, "val_macro_f1": 0.5178935878223323, "epoch": 65, "n_parameters": 521309229}
67
+ {"train_lr": 1.5772225115566454e-05, "train_min_lr": 8.891923304895814e-08, "train_loss": 1.560900739415644, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9605581429925296, "val_loss": 1.1911151581535153, "val_acc1": 62.13114752430734, "val_acc5": 94.37158468277728, "val_uar": 0.49307767548477205, "val_war": 0.6213114754098361, "val_weighted_f1": 0.6088617620010054, "val_micro_f1": 0.6213114754098361, "val_macro_f1": 0.48907478707434116, "epoch": 66, "n_parameters": 521309229}
68
+ {"train_lr": 1.4951170647876973e-05, "train_min_lr": 8.429036597259903e-08, "train_loss": 1.5360266649880425, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.140418142375379, "val_loss": 1.2013149948096742, "val_acc1": 63.06010950640903, "val_acc5": 94.31693981983622, "val_uar": 0.5014044308624469, "val_war": 0.6306010928961748, "val_weighted_f1": 0.6181993434197014, "val_micro_f1": 0.6306010928961748, "val_macro_f1": 0.5035366276573742, "epoch": 67, "n_parameters": 521309229}
69
+ {"train_lr": 1.4144675340540693e-05, "train_min_lr": 7.974357922180958e-08, "train_loss": 1.5423205101450679, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.3931095977820975, "val_loss": 1.212714168955298, "val_acc1": 63.00546492904913, "val_acc5": 93.60655731242862, "val_uar": 0.49542416604387346, "val_war": 0.6300546448087432, "val_weighted_f1": 0.6132341770707712, "val_micro_f1": 0.6300546448087432, "val_macro_f1": 0.495918720203686, "epoch": 68, "n_parameters": 521309229}
70
+ {"train_lr": 1.3353621084335341e-05, "train_min_lr": 7.528384464114652e-08, "train_loss": 1.540855470368571, "train_loss_scale": 9246.415841584158, "train_weight_decay": 0.0499999999999999, "train_grad_norm": Infinity, "val_loss": 1.2035948613695069, "val_acc1": 63.38797839743192, "val_acc5": 93.44262268816838, "val_uar": 0.4970993874990036, "val_war": 0.6338797814207651, "val_weighted_f1": 0.6155012587328725, "val_micro_f1": 0.6338797814207651, "val_macro_f1": 0.4936390605471225, "epoch": 69, "n_parameters": 521309229}
71
+ {"train_lr": 1.2578872885475928e-05, "train_min_lr": 7.091603888489658e-08, "train_loss": 1.5331838214554803, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.057976859630925, "val_loss": 1.1951269790822385, "val_acc1": 62.841530376705315, "val_acc5": 93.66120203153683, "val_uar": 0.5075188397213678, "val_war": 0.6284153005464481, "val_weighted_f1": 0.6162393743934268, "val_micro_f1": 0.6284153005464481, "val_macro_f1": 0.5077289526121244, "epoch": 70, "n_parameters": 521309229}
72
+ {"train_lr": 1.1821277919743169e-05, "train_min_lr": 6.664493808452678e-08, "train_loss": 1.5269721435635004, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.997482455602967, "val_loss": 1.195132297628066, "val_acc1": 62.95081980866813, "val_acc5": 93.33333333541786, "val_uar": 0.5012088891622671, "val_war": 0.6295081967213115, "val_weighted_f1": 0.6139207591282475, "val_micro_f1": 0.6295081967213115, "val_macro_f1": 0.49715874754103917, "epoch": 71, "n_parameters": 521309229}
73
+ {"train_lr": 1.1081664606109202e-05, "train_min_lr": 6.247521262605469e-08, "train_loss": 1.5477345579921609, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.031370637440445, "val_loss": 1.2176808443139582, "val_acc1": 63.06010966900268, "val_acc5": 93.77049175116534, "val_uar": 0.5085853351881315, "val_war": 0.6306010928961748, "val_weighted_f1": 0.6143241303760651, "val_micro_f1": 0.6306010928961748, "val_macro_f1": 0.5090970152146678, "epoch": 72, "n_parameters": 521309229}
74
+ {"train_lr": 1.0360841700873597e-05, "train_min_lr": 5.841142204304979e-08, "train_loss": 1.5077361578595128, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.1121663858394815, "val_loss": 1.2040918016550588, "val_acc1": 62.622951129225434, "val_acc5": 93.82513646401995, "val_uar": 0.5044856012763764, "val_war": 0.6262295081967213, "val_weighted_f1": 0.6127903209732758, "val_micro_f1": 0.6262295081967213, "val_macro_f1": 0.5045702399418656, "epoch": 73, "n_parameters": 521309229}
75
+ {"train_lr": 9.659597413300203e-06, "train_min_lr": 5.445801003085074e-08, "train_loss": 1.5327539935757224, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.126006799169106, "val_loss": 1.2121710943825104, "val_acc1": 62.67759593901087, "val_acc5": 94.04371557808965, "val_uar": 0.4902496001880173, "val_war": 0.6267759562841531, "val_weighted_f1": 0.6103933763949063, "val_micro_f1": 0.6267759562841531, "val_macro_f1": 0.48934527878015804, "epoch": 74, "n_parameters": 521309229}
76
+ {"train_lr": 8.978698543721923e-06, "train_min_lr": 5.0619299587449256e-08, "train_loss": 1.5474555262244574, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.997973364178497, "val_loss": 1.197301567769518, "val_acc1": 63.06010951943736, "val_acc5": 93.77049197421047, "val_uar": 0.5044397154216278, "val_war": 0.6306010928961748, "val_weighted_f1": 0.6160180422000103, "val_micro_f1": 0.6306010928961748, "val_macro_f1": 0.5028845163604007, "epoch": 75, "n_parameters": 521309229}
77
+ {"train_lr": 8.318889645055782e-06, "train_min_lr": 4.689948828635602e-08, "train_loss": 1.502774434121135, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.130060056648632, "val_loss": 1.1928608966808694, "val_acc1": 63.71584718631265, "val_acc5": 93.71584677357491, "val_uar": 0.510162639371741, "val_war": 0.6371584699453552, "val_weighted_f1": 0.6218715556855327, "val_micro_f1": 0.6371584699453552, "val_macro_f1": 0.5109602623241167, "epoch": 76, "n_parameters": 521309229}
78
+ {"train_lr": 7.680892208645253e-06, "train_min_lr": 4.3302643686614975e-08, "train_loss": 1.525367778716701, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.139192425378479, "val_loss": 1.1918011795656354, "val_acc1": 63.33333364184437, "val_acc5": 93.82513664954347, "val_uar": 0.5117724290023697, "val_war": 0.6333333333333333, "val_weighted_f1": 0.621430681547869, "val_micro_f1": 0.6333333333333333, "val_macro_f1": 0.5138430465374937, "epoch": 77, "n_parameters": 521309229}
79
+ {"train_lr": 7.065403875320073e-06, "train_min_lr": 3.983269888498758e-08, "train_loss": 1.5087236667623614, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.077566897515023, "val_loss": 1.1847669382890065, "val_acc1": 63.66120233170973, "val_acc5": 93.93442611485882, "val_uar": 0.505573553366814, "val_war": 0.6366120218579235, "val_weighted_f1": 0.6214398088352673, "val_micro_f1": 0.6366120218579235, "val_macro_f1": 0.5051809009901778, "epoch": 78, "n_parameters": 521309229}
80
+ {"train_lr": 6.473097672536222e-06, "train_min_lr": 3.649344821516926e-08, "train_loss": 1.5043983740775104, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9628835763081467, "val_loss": 1.1780525659229242, "val_acc1": 64.37158487768122, "val_acc5": 93.77049179077149, "val_uar": 0.5198148617552358, "val_war": 0.6437158469945355, "val_weighted_f1": 0.6289947499829459, "val_micro_f1": 0.6437158469945355, "val_macro_f1": 0.5223026080905795, "epoch": 79, "n_parameters": 521309229}
81
+ {"train_lr": 5.904621278430291e-06, "train_min_lr": 3.3288543098741024e-08, "train_loss": 1.503765761950622, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.1304769209115815, "val_loss": 1.181772736942067, "val_acc1": 63.224044050414705, "val_acc5": 94.97267753726146, "val_uar": 0.5120293668824208, "val_war": 0.6322404371584699, "val_weighted_f1": 0.6217245642936367, "val_micro_f1": 0.6322404371584699, "val_macro_f1": 0.5130205393140347, "epoch": 80, "n_parameters": 521309229}
82
+ {"train_lr": 5.3605963135929456e-06, "train_min_lr": 3.0221488052393765e-08, "train_loss": 1.4962558634210341, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.0431952358472465, "val_loss": 1.1885460650219637, "val_acc1": 64.15300574380844, "val_acc5": 93.87978140408876, "val_uar": 0.5138171811498217, "val_war": 0.6415300546448087, "val_weighted_f1": 0.628703435719314, "val_micro_f1": 0.6415300546448087, "val_macro_f1": 0.5137104557391431, "epoch": 81, "n_parameters": 521309229}
83
+ {"train_lr": 4.8416176613359425e-06, "train_min_lr": 2.7295636855790628e-08, "train_loss": 1.5261539953573309, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.073392060723635, "val_loss": 1.1973219443770016, "val_acc1": 63.98907129881812, "val_acc5": 93.9890707526702, "val_uar": 0.502570812609512, "val_war": 0.6398907103825137, "val_weighted_f1": 0.6237460053303464, "val_micro_f1": 0.6398907103825137, "val_macro_f1": 0.5017980774690198, "epoch": 82, "n_parameters": 521309229}
84
+ {"train_lr": 4.3482528171959625e-06, "train_min_lr": 2.4514188884258143e-08, "train_loss": 1.5093669228427875, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.111716969178454, "val_loss": 1.187333418458116, "val_acc1": 62.13114775360608, "val_acc5": 94.20765016691281, "val_uar": 0.4923486219356153, "val_war": 0.6213114754098361, "val_weighted_f1": 0.6060862511081248, "val_micro_f1": 0.6213114754098361, "val_macro_f1": 0.4903961670305088, "epoch": 83, "n_parameters": 521309229}
85
+ {"train_lr": 3.88104126838656e-06, "train_min_lr": 2.188018561031641e-08, "train_loss": 1.5296236853001535, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.139779515785746, "val_loss": 1.182542431880446, "val_acc1": 63.98907130507172, "val_acc5": 93.87978106014064, "val_uar": 0.5139424928763304, "val_war": 0.6398907103825137, "val_weighted_f1": 0.6232739384785294, "val_micro_f1": 0.6398907103825137, "val_macro_f1": 0.5134975039848492, "epoch": 84, "n_parameters": 521309229}
86
+ {"train_lr": 3.4404939038768243e-06, "train_min_lr": 1.9396507277873433e-08, "train_loss": 1.5002883436656234, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9957855668398414, "val_loss": 1.1799656497497184, "val_acc1": 63.224044047287904, "val_acc5": 94.3715847578205, "val_uar": 0.5008935075107332, "val_war": 0.6322404371584699, "val_weighted_f1": 0.6182258274289429, "val_micro_f1": 0.6322404371584699, "val_macro_f1": 0.5018766581423143, "epoch": 85, "n_parameters": 521309229}
87
+ {"train_lr": 3.027092455741798e-06, "train_min_lr": 1.7065869752720735e-08, "train_loss": 1.5194592149344215, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.080923087526076, "val_loss": 1.2053646807577096, "val_acc1": 63.934426459849206, "val_acc5": 93.06010923958867, "val_uar": 0.5142793436997644, "val_war": 0.639344262295082, "val_weighted_f1": 0.6274378804281658, "val_micro_f1": 0.639344262295082, "val_macro_f1": 0.5173775828386313, "epoch": 86, "n_parameters": 521309229}
88
+ {"train_lr": 2.641288972395518e-06, "train_min_lr": 1.4890821552773996e-08, "train_loss": 1.5029161748516284, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9913841167298876, "val_loss": 1.208089872025976, "val_acc1": 62.45901640803436, "val_acc5": 93.71584700287366, "val_uar": 0.49706007346920045, "val_war": 0.6245901639344262, "val_weighted_f1": 0.6106553395616129, "val_micro_f1": 0.6245901639344262, "val_macro_f1": 0.5008615172790484, "epoch": 87, "n_parameters": 521309229}
89
+ {"train_lr": 2.2835053242827143e-06, "train_min_lr": 1.2873741061306107e-08, "train_loss": 1.4910667265209034, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.011695670609427, "val_loss": 1.1850197420400732, "val_acc1": 63.224043841961304, "val_acc5": 93.98907090067212, "val_uar": 0.510990236736202, "val_war": 0.6322404371584699, "val_weighted_f1": 0.6175661839076751, "val_micro_f1": 0.6322404371584699, "val_macro_f1": 0.5133183992078193, "epoch": 88, "n_parameters": 521309229}
90
+ {"train_lr": 1.954132742569673e-06, "train_min_lr": 1.1016833926220049e-08, "train_loss": 1.5420554693382564, "train_loss_scale": 10706.376237623763, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.103642130842303, "val_loss": 1.2135984444735097, "val_acc1": 62.95081987224641, "val_acc5": 94.20765016899735, "val_uar": 0.4966837509148887, "val_war": 0.6295081967213115, "val_weighted_f1": 0.6127753468265025, "val_micro_f1": 0.6295081967213115, "val_macro_f1": 0.49813930647034543, "epoch": 89, "n_parameters": 521309229}
91
+ {"train_lr": 1.6535313913386933e-06, "train_min_lr": 9.322130648205152e-09, "train_loss": 1.506945277991468, "train_loss_scale": 9976.39603960396, "train_weight_decay": 0.0499999999999999, "train_grad_norm": Infinity, "val_loss": 1.2123802482497459, "val_acc1": 62.1311478192689, "val_acc5": 93.44262321347095, "val_uar": 0.49834416447651575, "val_war": 0.6213114754098361, "val_weighted_f1": 0.6075704718128757, "val_micro_f1": 0.6213114754098361, "val_macro_f1": 0.4939637949355929, "epoch": 90, "n_parameters": 521309229}
92
+ {"train_lr": 1.3820299737539636e-06, "train_min_lr": 7.791484360414586e-09, "train_loss": 1.51509560393815, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.0612817211906505, "val_loss": 1.2031866399680866, "val_acc1": 62.841530382958915, "val_acc5": 93.6065572373854, "val_uar": 0.5074676493664442, "val_war": 0.6284153005464481, "val_weighted_f1": 0.6165941590874061, "val_micro_f1": 0.6284153005464481, "val_macro_f1": 0.5103604526981343, "epoch": 91, "n_parameters": 521309229}
93
+ {"train_lr": 1.139925372629472e-06, "train_min_lr": 6.426568802091309e-09, "train_loss": 1.5015546263444541, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.065088885845524, "val_loss": 1.2015120781519835, "val_acc1": 64.20765038161981, "val_acc5": 94.09836055568007, "val_uar": 0.5112891417003128, "val_war": 0.6420765027322405, "val_weighted_f1": 0.62612750365353, "val_micro_f1": 0.6420765027322405, "val_macro_f1": 0.5145500188808516, "epoch": 92, "n_parameters": 521309229}
94
+ {"train_lr": 9.274823257919996e-07, "train_min_lr": 5.22887648835886e-09, "train_loss": 1.4811865176894876, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.990803664273555, "val_loss": 1.1962744243004744, "val_acc1": 62.89617495406521, "val_acc5": 93.49726765950521, "val_uar": 0.4986358020945847, "val_war": 0.6289617486338798, "val_weighted_f1": 0.6137519429087593, "val_micro_f1": 0.6289617486338798, "val_macro_f1": 0.5001838330874289, "epoch": 93, "n_parameters": 521309229}
95
+ {"train_lr": 7.449331365942088e-07, "train_min_lr": 4.1997170781779625e-09, "train_loss": 1.5100022620297109, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.969932759162223, "val_loss": 1.1810869317428738, "val_acc1": 63.16939907282428, "val_acc5": 94.15300530605629, "val_uar": 0.5021229944576877, "val_war": 0.6316939890710382, "val_weighted_f1": 0.6155902402634872, "val_micro_f1": 0.6316939890710382, "val_macro_f1": 0.5061919742604649, "epoch": 94, "n_parameters": 521309229}
96
+ {"train_lr": 5.924774198943271e-07, "train_min_lr": 3.340215942253686e-09, "train_loss": 1.5041430742040325, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.114081795852964, "val_loss": 1.2108579342271768, "val_acc1": 61.74863404341734, "val_acc5": 93.77049193668887, "val_uar": 0.49870415390434936, "val_war": 0.6180327868852459, "val_weighted_f1": 0.6019801481568056, "val_micro_f1": 0.6180327868852459, "val_macro_f1": 0.49474437150737266, "epoch": 95, "n_parameters": 521309229}
97
+ {"train_lr": 4.7028188378023254e-07, "train_min_lr": 2.6513129324590976e-09, "train_loss": 1.5113116258835242, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.0325003260433085, "val_loss": 1.1829310272838556, "val_acc1": 63.87978153958347, "val_acc5": 93.44262286952285, "val_uar": 0.51032136203537, "val_war": 0.6387978142076502, "val_weighted_f1": 0.6261792126842809, "val_micro_f1": 0.6387978142076502, "val_macro_f1": 0.5113817592729959, "epoch": 96, "n_parameters": 521309229}
98
+ {"train_lr": 3.7848014727660956e-07, "train_min_lr": 2.1337613541210327e-09, "train_loss": 1.5071686615448188, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.953298528595726, "val_loss": 1.203605141417653, "val_acc1": 62.40437175667351, "val_acc5": 94.42622939354735, "val_uar": 0.49619051812921927, "val_war": 0.6240437158469946, "val_weighted_f1": 0.6112827175809766, "val_micro_f1": 0.6240437158469946, "val_macro_f1": 0.4928728524103685, "epoch": 97, "n_parameters": 521309229}
99
+ {"train_lr": 3.171725942345054e-07, "train_min_lr": 1.7881271422917885e-09, "train_loss": 1.5120015593841918, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.024650616220908, "val_loss": 1.2056276111626159, "val_acc1": 62.07650286252381, "val_acc5": 94.09836063072329, "val_uar": 0.4962703097812459, "val_war": 0.6207650273224044, "val_weighted_f1": 0.608746363809399, "val_micro_f1": 0.6207650273224044, "val_macro_f1": 0.4963095730144606, "epoch": 98, "n_parameters": 521309229}
100
+ {"train_lr": 2.8642626356306314e-07, "train_min_lr": 1.6147882429074497e-09, "train_loss": 1.5231578353017863, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.036374486318909, "val_loss": 1.2083441238192951, "val_acc1": 62.0765027301559, "val_acc5": 93.71584692157683, "val_uar": 0.493969460224504, "val_war": 0.6207650273224044, "val_weighted_f1": 0.6067067243651304, "val_micro_f1": 0.6207650273224044, "val_macro_f1": 0.4929458086759394, "epoch": 99, "n_parameters": 521309229}
101
+ Evaluation on the test set using best epoch model:
102
+ {"Final Top-1 (best epoch)": 66.64844177145982, "Final Top-5 (best epoch)": 94.6965554948059}
103
+ Final UAR: 52.96%, Final WAR: 66.65%
104
+ Final Confusion Matrix:
105
+ [[232 7 4 1 1 10 14 1 6 0 2]
106
+ [ 23 45 0 2 4 18 12 0 21 2 1]
107
+ [ 4 0 74 0 1 16 26 0 4 0 0]
108
+ [ 3 3 0 206 27 0 6 1 1 1 0]
109
+ [ 9 6 0 24 168 4 4 2 4 1 3]
110
+ [ 7 2 10 0 5 253 4 0 12 0 0]
111
+ [ 5 1 29 0 4 9 160 0 4 1 1]
112
+ [ 2 8 0 4 12 5 9 3 1 0 3]
113
+ [ 23 6 12 1 3 50 25 0 58 4 1]
114
+ [ 2 4 0 0 3 7 2 4 8 9 13]
115
+ [ 0 1 0 0 4 7 3 0 3 7 11]]
116
+ Final Class Accuracies: ['83.45%', '35.16%', '59.20%', '83.06%', '74.67%', '86.35%', '74.77%', '6.38%', '31.69%', '17.31%', '30.56%']
117
+ Final Weighted F1: 0.6473, Final Micro F1: 0.6665, Final Macro F1: 0.5300
logs/AVF-MAE++_huge-MAFW (11-class)/eval_split05/log.txt ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_lr": 5.580357142857142e-06, "train_min_lr": 3.1460435902122946e-08, "train_loss": 1.7658696511987806, "train_loss_scale": 5839.841584158416, "train_weight_decay": 0.0499999999999999, "train_grad_norm": NaN, "val_loss": 1.3041091257450628, "val_acc1": 59.64052350848329, "val_acc5": 92.10239709592334, "val_uar": 0.44956262327776486, "val_war": 0.5964052287581699, "val_weighted_f1": 0.5769667461818858, "val_micro_f1": 0.5964052287581699, "val_macro_f1": 0.4423816217141358, "epoch": 0, "n_parameters": 521309229}
2
+ {"train_lr": 1.6852678571428577e-05, "train_min_lr": 9.501051642441133e-08, "train_loss": 1.724972432792777, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.311159176401572, "val_loss": 1.2735539134226592, "val_acc1": 59.53159093389324, "val_acc5": 92.75599154304055, "val_uar": 0.45187675051707704, "val_war": 0.5953159041394336, "val_weighted_f1": 0.5770928479949653, "val_micro_f1": 0.5953159041394336, "val_macro_f1": 0.4456271484646228, "epoch": 1, "n_parameters": 521309229}
3
+ {"train_lr": 2.8125000000000006e-05, "train_min_lr": 1.585605969466997e-07, "train_loss": 1.7400294802959997, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.212538919826545, "val_loss": 1.2982290729003794, "val_acc1": 59.36819236886267, "val_acc5": 92.21132944144455, "val_uar": 0.4394977169631436, "val_war": 0.593681917211329, "val_weighted_f1": 0.5665345282395093, "val_micro_f1": 0.593681917211329, "val_macro_f1": 0.4271575213937261, "epoch": 2, "n_parameters": 521309229}
4
+ {"train_lr": 3.939732142857144e-05, "train_min_lr": 2.2211067746898805e-07, "train_loss": 1.7393560967232922, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.145647242517755, "val_loss": 1.2686221357069762, "val_acc1": 60.02178691415226, "val_acc5": 92.04793036217784, "val_uar": 0.45509877722226777, "val_war": 0.6002178649237473, "val_weighted_f1": 0.5822377872499725, "val_micro_f1": 0.6002178649237473, "val_macro_f1": 0.4451638206501565, "epoch": 3, "n_parameters": 521309229}
5
+ {"train_lr": 5.0669642857142856e-05, "train_min_lr": 2.856607579912764e-07, "train_loss": 1.74848208136291, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.169458240565687, "val_loss": 1.3141510688206728, "val_acc1": 58.44226635203642, "val_acc5": 91.72113306382123, "val_uar": 0.4349253810962302, "val_war": 0.5844226579520697, "val_weighted_f1": 0.555888562536713, "val_micro_f1": 0.5844226579520697, "val_macro_f1": 0.42125627506334884, "epoch": 4, "n_parameters": 521309229}
6
+ {"train_lr": 5.624497522407655e-05, "train_min_lr": 3.170928656633569e-07, "train_loss": 1.7414702782929927, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.313244222414376, "val_loss": 1.2830232069188474, "val_acc1": 61.27451038828083, "val_acc5": 91.77559916178386, "val_uar": 0.45852568741515826, "val_war": 0.6127450980392157, "val_weighted_f1": 0.5883197663368804, "val_micro_f1": 0.6127450980392157, "val_macro_f1": 0.44780925854760384, "epoch": 5, "n_parameters": 521309229}
7
+ {"train_lr": 5.621453170303479e-05, "train_min_lr": 3.169212339168842e-07, "train_loss": 1.7126924985706216, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.105388825482661, "val_loss": 1.3447147894139384, "val_acc1": 58.49673257154577, "val_acc5": 90.84967343947467, "val_uar": 0.4349649327694657, "val_war": 0.5849673202614379, "val_weighted_f1": 0.5616889452256948, "val_micro_f1": 0.5849673202614379, "val_macro_f1": 0.4207816334144133, "epoch": 6, "n_parameters": 521309229}
8
+ {"train_lr": 5.615352646285501e-05, "train_min_lr": 3.1657730405733625e-07, "train_loss": 1.7216858477285593, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.096101871811517, "val_loss": 1.2850848725613426, "val_acc1": 60.294118310890944, "val_acc5": 91.5577345455394, "val_uar": 0.44842371692655875, "val_war": 0.6029411764705882, "val_weighted_f1": 0.5804238165852896, "val_micro_f1": 0.6029411764705882, "val_macro_f1": 0.43843061476076295, "epoch": 7, "n_parameters": 521309229}
9
+ {"train_lr": 5.60620262118716e-05, "train_min_lr": 3.1606145216696487e-07, "train_loss": 1.7017982166395722, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.171187849328069, "val_loss": 1.2844446733886121, "val_acc1": 60.29411844646229, "val_acc5": 92.42919431948194, "val_uar": 0.4658321733241043, "val_war": 0.6029411764705882, "val_weighted_f1": 0.578270746802023, "val_micro_f1": 0.6029411764705882, "val_macro_f1": 0.4521662381804395, "epoch": 8, "n_parameters": 521309229}
10
+ {"train_lr": 5.5940131004265686e-05, "train_min_lr": 3.1537424232223837e-07, "train_loss": 1.6993187506600183, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.0877472716983005, "val_loss": 1.3270105456604677, "val_acc1": 58.76906374856537, "val_acc5": 92.10239664713542, "val_uar": 0.4404781664409577, "val_war": 0.5876906318082789, "val_weighted_f1": 0.5589449313409994, "val_micro_f1": 0.5876906318082789, "val_macro_f1": 0.42511595171972616, "epoch": 9, "n_parameters": 521309229}
11
+ {"train_lr": 5.57879741306571e-05, "train_min_lr": 3.1451642597703227e-07, "train_loss": 1.7048560122255445, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.07398527919656, "val_loss": 1.3307090942181794, "val_acc1": 58.60566483759413, "val_acc5": 91.17647077523026, "val_uar": 0.4383523944443005, "val_war": 0.5860566448801743, "val_weighted_f1": 0.5606617941537122, "val_micro_f1": 0.5860566448801743, "val_macro_f1": 0.4246940262405819, "epoch": 10, "n_parameters": 521309229}
12
+ {"train_lr": 5.5605721972353206e-05, "train_min_lr": 3.134889411409257e-07, "train_loss": 1.6985368185704297, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.185714426607189, "val_loss": 1.3352473635299533, "val_acc1": 58.932462304246194, "val_acc5": 90.41394383299584, "val_uar": 0.4240721925466908, "val_war": 0.5893246187363834, "val_weighted_f1": 0.5605785870899941, "val_micro_f1": 0.5893246187363834, "val_macro_f1": 0.4137373800610057, "epoch": 11, "n_parameters": 521309229}
13
+ {"train_lr": 5.5393573819413314e-05, "train_min_lr": 3.1229291135350213e-07, "train_loss": 1.6776498244343812, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.165417503602434, "val_loss": 1.3039026871031405, "val_acc1": 59.204793392443186, "val_acc5": 92.37472769793342, "val_uar": 0.44320079774137305, "val_war": 0.5920479302832244, "val_weighted_f1": 0.5725414896089491, "val_micro_f1": 0.5920479302832244, "val_macro_f1": 0.4318570035079899, "epoch": 12, "n_parameters": 521309229}
14
+ {"train_lr": 5.5151761652727875e-05, "train_min_lr": 3.109296444557738e-07, "train_loss": 1.6811503638135326, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.1824922278375904, "val_loss": 1.31155741360842, "val_acc1": 60.29411817999447, "val_acc5": 92.04793062397079, "val_uar": 0.4514491407819957, "val_war": 0.6029411764705882, "val_weighted_f1": 0.5795846397074121, "val_micro_f1": 0.6029411764705882, "val_macro_f1": 0.44216721337394466, "epoch": 13, "n_parameters": 521309229}
15
+ {"train_lr": 5.4880549890350996e-05, "train_min_lr": 3.094006311600778e-07, "train_loss": 1.6698853363691777, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.082694346361821, "val_loss": 1.2880195826292038, "val_acc1": 59.74945595685173, "val_acc5": 91.83006548414043, "val_uar": 0.4342410355621344, "val_war": 0.5974945533769063, "val_weighted_f1": 0.5645306527236915, "val_micro_f1": 0.5974945533769063, "val_macro_f1": 0.4230006499125188, "epoch": 14, "n_parameters": 521309229}
16
+ {"train_lr": 5.458023509836289e-05, "train_min_lr": 3.077075434200046e-07, "train_loss": 1.6676817936669088, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.263041779546454, "val_loss": 1.2830875445814693, "val_acc1": 60.620915403553084, "val_acc5": 92.2113291796516, "val_uar": 0.45324604981716066, "val_war": 0.6062091503267973, "val_weighted_f1": 0.5899054407007335, "val_micro_f1": 0.6062091503267973, "val_macro_f1": 0.448555678506194, "epoch": 15, "n_parameters": 521309229}
17
+ {"train_lr": 5.425114566657945e-05, "train_min_lr": 3.058522326021405e-07, "train_loss": 1.6803867522836125, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.132800168330126, "val_loss": 1.3090802647319495, "val_acc1": 59.36819227069032, "val_acc5": 91.1220048268636, "val_uar": 0.44792664480363453, "val_war": 0.593681917211329, "val_weighted_f1": 0.5739455236304164, "val_micro_f1": 0.593681917211329, "val_macro_f1": 0.4376745081640973, "epoch": 16, "n_parameters": 521309229}
18
+ {"train_lr": 5.389364144946269e-05, "train_min_lr": 3.0383672746162495e-07, "train_loss": 1.6548957898475156, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.0891356373777485, "val_loss": 1.3374180346727371, "val_acc1": 58.76906360364428, "val_acc5": 91.12200505125756, "val_uar": 0.4448754145729265, "val_war": 0.5876906318082789, "val_weighted_f1": 0.5739525661733044, "val_micro_f1": 0.5876906318082789, "val_macro_f1": 0.4386004555788895, "epoch": 17, "n_parameters": 521309229}
19
+ {"train_lr": 5.3508113372625154e-05, "train_min_lr": 3.0166323192373806e-07, "train_loss": 1.6588614073523593, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.193729476173325, "val_loss": 1.3045619778773363, "val_acc1": 59.150327289805695, "val_acc5": 91.72113336301318, "val_uar": 0.4317451238491045, "val_war": 0.5915032679738562, "val_weighted_f1": 0.5645938087397492, "val_micro_f1": 0.5915032679738562, "val_macro_f1": 0.4188741921668831, "epoch": 18, "n_parameters": 521309229}
20
+ {"train_lr": 5.309498300535867e-05, "train_min_lr": 2.993341226739393e-07, "train_loss": 1.6661262138448534, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.101814895573229, "val_loss": 1.287042487485736, "val_acc1": 60.511982576519834, "val_acc5": 92.42919379589604, "val_uar": 0.44574578618163957, "val_war": 0.605119825708061, "val_weighted_f1": 0.5825358672326711, "val_micro_f1": 0.605119825708061, "val_macro_f1": 0.4371826463505826, "epoch": 19, "n_parameters": 521309229}
21
+ {"train_lr": 5.265470209965427e-05, "train_min_lr": 2.9685194655899794e-07, "train_loss": 1.670188416643898, "train_loss_scale": 7583.683168316832, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.046204356863948, "val_loss": 1.2972479287315817, "val_acc1": 59.095861131069704, "val_acc5": 91.28540360693837, "val_uar": 0.44367860025748346, "val_war": 0.590958605664488, "val_weighted_f1": 0.5686111333825029, "val_micro_f1": 0.590958605664488, "val_macro_f1": 0.43649885192859433, "epoch": 20, "n_parameters": 521309229}
22
+ {"train_lr": 5.218775209621823e-05, "train_min_lr": 2.9421941780205285e-07, "train_loss": 1.6513306978512126, "train_loss_scale": 4825.980198019802, "train_weight_decay": 0.0499999999999999, "train_grad_norm": Infinity, "val_loss": 1.2689970211655486, "val_acc1": 60.67538185213127, "val_acc5": 92.53812670240215, "val_uar": 0.4526857224215423, "val_war": 0.6067538126361656, "val_weighted_f1": 0.5815062138652046, "val_micro_f1": 0.6067538126361656, "val_macro_f1": 0.4435196721279054, "epoch": 21, "n_parameters": 521309229}
23
+ {"train_lr": 5.1694643598023545e-05, "train_min_lr": 2.9143941503464896e-07, "train_loss": 1.648804923587113, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.176869021783961, "val_loss": 1.3308690166940875, "val_acc1": 57.625273166918284, "val_acc5": 90.90414006102318, "val_uar": 0.421820306682515, "val_war": 0.5762527233115469, "val_weighted_f1": 0.5500410212566109, "val_micro_f1": 0.5762527233115469, "val_macro_f1": 0.4118878367086761, "epoch": 22, "n_parameters": 521309229}
24
+ {"train_lr": 5.117591581197337e-05, "train_min_lr": 2.88514978148997e-07, "train_loss": 1.633916844924291, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.169297468544233, "val_loss": 1.3031004141358768, "val_acc1": 59.85838829769808, "val_acc5": 91.44880227481617, "val_uar": 0.4460280922361996, "val_war": 0.5985838779956427, "val_weighted_f1": 0.5770183266329244, "val_micro_f1": 0.5985838779956427, "val_macro_f1": 0.4390423682927222, "epoch": 23, "n_parameters": 521309229}
25
+ {"train_lr": 5.0632135959285794e-05, "train_min_lr": 2.854493049738955e-07, "train_loss": 1.64345780437929, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.145717080276792, "val_loss": 1.3614872977429746, "val_acc1": 56.917211911257574, "val_acc5": 91.17647114922019, "val_uar": 0.41849325859514636, "val_war": 0.5691721132897604, "val_weighted_f1": 0.5480106367542961, "val_micro_f1": 0.5691721132897604, "val_macro_f1": 0.40997268973995926, "epoch": 24, "n_parameters": 521309229}
26
+ {"train_lr": 5.0063898655246014e-05, "train_min_lr": 2.822457477779514e-07, "train_loss": 1.6405703809985233, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.041613774724526, "val_loss": 1.3165716350662942, "val_acc1": 59.42265854629816, "val_acc5": 92.26579576380112, "val_uar": 0.44399516965077257, "val_war": 0.5942265795206971, "val_weighted_f1": 0.579561881232296, "val_micro_f1": 0.5942265795206971, "val_macro_f1": 0.4400020330964258, "epoch": 25, "n_parameters": 521309229}
27
+ {"train_lr": 4.9471825259003246e-05, "train_min_lr": 2.789078096039244e-07, "train_loss": 1.6314663360811303, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.019859592513282, "val_loss": 1.3190332841055066, "val_acc1": 59.47712483125574, "val_acc5": 91.55773488213035, "val_uar": 0.4352346155144539, "val_war": 0.5947712418300654, "val_weighted_f1": 0.5720042540059038, "val_micro_f1": 0.5947712418300654, "val_macro_f1": 0.4307775776815808, "epoch": 26, "n_parameters": 521309229}
28
+ {"train_lr": 4.885656319412359e-05, "train_min_lr": 2.754391404382005e-07, "train_loss": 1.622904009649856, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.1792916189325915, "val_loss": 1.3077596376339595, "val_acc1": 59.09586128066568, "val_acc5": 92.04793058657178, "val_uar": 0.4404701772950905, "val_war": 0.590958605664488, "val_weighted_f1": 0.5667718757415003, "val_micro_f1": 0.590958605664488, "val_macro_f1": 0.4307609434009193, "epoch": 27, "n_parameters": 521309229}
29
+ {"train_lr": 4.821878524064173e-05, "train_min_lr": 2.7184353321958644e-07, "train_loss": 1.6281365101093506, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.094857156866848, "val_loss": 1.3480379458735972, "val_acc1": 58.1154688059115, "val_acc5": 91.28540375653435, "val_uar": 0.4399369140081937, "val_war": 0.5811546840958606, "val_weighted_f1": 0.5658515560089507, "val_micro_f1": 0.5811546840958606, "val_macro_f1": 0.4349261511865123, "epoch": 28, "n_parameters": 521309229}
30
+ {"train_lr": 4.7559188799386115e-05, "train_min_lr": 2.6812491969178504e-07, "train_loss": 1.608238253361321, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9775558769112767, "val_loss": 1.349547873232879, "val_acc1": 58.87799608006197, "val_acc5": 91.66666689106063, "val_uar": 0.4377094623879012, "val_war": 0.5887799564270153, "val_weighted_f1": 0.5689269657311107, "val_micro_f1": 0.5887799564270153, "val_macro_f1": 0.43207872700154687, "epoch": 29, "n_parameters": 521309229}
31
+ {"train_lr": 4.6878495129381106e-05, "train_min_lr": 2.6428736610409306e-07, "train_loss": 1.6231093006362223, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.124037305907447, "val_loss": 1.3282557758046132, "val_acc1": 58.44226635203642, "val_acc5": 91.01307229434742, "val_uar": 0.4410937209679556, "val_war": 0.5844226579520697, "val_weighted_f1": 0.5672088015968852, "val_micro_f1": 0.5844226579520697, "val_macro_f1": 0.43479005908854956, "epoch": 30, "n_parameters": 521309229}
32
+ {"train_lr": 4.6177448559161015e-05, "train_min_lr": 2.603350687650165e-07, "train_loss": 1.6043008171489137, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.093554222937858, "val_loss": 1.3382425056953056, "val_acc1": 57.78867140003279, "val_acc5": 91.88453206828996, "val_uar": 0.4283925715756902, "val_war": 0.5773420479302832, "val_weighted_f1": 0.5615491579685639, "val_micro_f1": 0.5773420479302832, "val_macro_f1": 0.4244582962837468, "epoch": 31, "n_parameters": 521309229}
33
+ {"train_lr": 4.545681567285774e-05, "train_min_lr": 2.5627234945366804e-07, "train_loss": 1.608302503037374, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.125799091735689, "val_loss": 1.3265230600156037, "val_acc1": 58.27886755326215, "val_acc5": 91.39433598985859, "val_uar": 0.43366999801494543, "val_war": 0.5827886710239651, "val_weighted_f1": 0.5621850026727802, "val_micro_f1": 0.5827886710239651, "val_macro_f1": 0.42744802250370173, "epoch": 32, "n_parameters": 521309229}
34
+ {"train_lr": 4.47173844719522e-05, "train_min_lr": 2.5210365069396683e-07, "train_loss": 1.6104830999185544, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.169062999215456, "val_loss": 1.3509948417252184, "val_acc1": 57.08061052303688, "val_acc5": 91.55773439594344, "val_uar": 0.4376914320306689, "val_war": 0.5708061002178649, "val_weighted_f1": 0.5514430432744756, "val_micro_f1": 0.5708061002178649, "val_macro_f1": 0.42715684085031197, "epoch": 33, "n_parameters": 521309229}
35
+ {"train_lr": 4.395996351360636e-05, "train_min_lr": 2.478335308968022e-07, "train_loss": 1.6219489587218847, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.032952580121484, "val_loss": 1.3173740351316976, "val_acc1": 59.749455807255764, "val_acc5": 92.04793032477883, "val_uar": 0.44489289456843606, "val_war": 0.5974945533769063, "val_weighted_f1": 0.5778397168512563, "val_micro_f1": 0.5974945533769063, "val_macro_f1": 0.43863542738657146, "epoch": 34, "n_parameters": 521309229}
36
+ {"train_lr": 4.318538102651787e-05, "train_min_lr": 2.434666593754791e-07, "train_loss": 1.6017357180810998, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.029905729954786, "val_loss": 1.283230648905623, "val_acc1": 60.23965213813034, "val_acc5": 92.48366049224255, "val_uar": 0.4545895847226624, "val_war": 0.6023965141612201, "val_weighted_f1": 0.581160788724584, "val_micro_f1": 0.6023965141612201, "val_macro_f1": 0.4468986969062332, "epoch": 35, "n_parameters": 521309229}
37
+ {"train_lr": 4.2394484005264044e-05, "train_min_lr": 2.390078112398925e-07, "train_loss": 1.6117617436761509, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.085686893746404, "val_loss": 1.3336289714948804, "val_acc1": 58.00653623599632, "val_acc5": 91.44880190082625, "val_uar": 0.4277672588700335, "val_war": 0.5800653594771242, "val_weighted_f1": 0.5582992221889328, "val_micro_f1": 0.5800653594771242, "val_macro_f1": 0.41931748238183075, "epoch": 36, "n_parameters": 521309229}
38
+ {"train_lr": 4.158813728412575e-05, "train_min_lr": 2.3446186217501424e-07, "train_loss": 1.5901667816017326, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.046848245186381, "val_loss": 1.3254080709873461, "val_acc1": 59.42265852292379, "val_acc5": 91.66666666666667, "val_uar": 0.443505201857228, "val_war": 0.5942265795206971, "val_weighted_f1": 0.5757958681612135, "val_micro_f1": 0.5942265795206971, "val_macro_f1": 0.4395804496812842, "epoch": 37, "n_parameters": 521309229}
39
+ {"train_lr": 4.0767222591403925e-05, "train_min_lr": 2.2983378310940412e-07, "train_loss": 1.5919315933000924, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.163994508214516, "val_loss": 1.3024614091012992, "val_acc1": 59.694989564372044, "val_acc5": 92.48366019305061, "val_uar": 0.45754100034351014, "val_war": 0.5974945533769063, "val_weighted_f1": 0.5804522702650963, "val_micro_f1": 0.5974945533769063, "val_macro_f1": 0.4512157799287709, "epoch": 38, "n_parameters": 521309229}
40
+ {"train_lr": 3.993263758526252e-05, "train_min_lr": 2.251286347795713e-07, "train_loss": 1.5780445252708082, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.102370474598195, "val_loss": 1.281378017628894, "val_acc1": 61.76470634516548, "val_acc5": 91.44880234961416, "val_uar": 0.4766667845207951, "val_war": 0.6176470588235294, "val_weighted_f1": 0.6092520858112886, "val_micro_f1": 0.6176470588235294, "val_macro_f1": 0.47268631208074935, "epoch": 39, "n_parameters": 521309229}
41
+ {"train_lr": 3.9085294872152475e-05, "train_min_lr": 2.203515621961343e-07, "train_loss": 1.5831747703229633, "train_loss_scale": 4177.108910891089, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.162505992568366, "val_loss": 1.3056371638587876, "val_acc1": 59.586057242225195, "val_acc5": 91.77559916178386, "val_uar": 0.44596820271184023, "val_war": 0.5958605664488017, "val_weighted_f1": 0.5785060532937976, "val_micro_f1": 0.5958605664488017, "val_macro_f1": 0.43784861662839825, "epoch": 40, "n_parameters": 521309229}
42
+ {"train_lr": 3.822612100889004e-05, "train_min_lr": 2.1550778901782692e-07, "train_loss": 1.5811737907601662, "train_loss_scale": 7867.564356435643, "train_weight_decay": 0.0499999999999999, "train_grad_norm": Infinity, "val_loss": 1.364890332607662, "val_acc1": 57.08061033136704, "val_acc5": 90.84967358907063, "val_uar": 0.4281649722119298, "val_war": 0.5708061002178649, "val_weighted_f1": 0.5555798589131746, "val_micro_f1": 0.5708061002178649, "val_macro_f1": 0.4228480823446165, "epoch": 41, "n_parameters": 521309229}
43
+ {"train_lr": 3.73560554894804e-05, "train_min_lr": 2.1060261183950398e-07, "train_loss": 1.5833678655695207, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.044743511936452, "val_loss": 1.299554196935074, "val_acc1": 59.150327528224274, "val_acc5": 93.02832281823252, "val_uar": 0.4410113185670297, "val_war": 0.5915032679738562, "val_weighted_f1": 0.5765318372524586, "val_micro_f1": 0.5915032679738562, "val_macro_f1": 0.4339271493175373, "epoch": 42, "n_parameters": 521309229}
44
+ {"train_lr": 3.647604971779486e-05, "train_min_lr": 2.056413944003928e-07, "train_loss": 1.5886229570746029, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.088893033490322, "val_loss": 1.3440663665533066, "val_acc1": 57.35294168135699, "val_acc5": 91.55773450814041, "val_uar": 0.4310045873103595, "val_war": 0.5735294117647058, "val_weighted_f1": 0.5587372173162622, "val_micro_f1": 0.5735294117647058, "val_macro_f1": 0.4217606317217586, "epoch": 43, "n_parameters": 521309229}
45
+ {"train_lr": 3.558706596722466e-05, "train_min_lr": 2.006295617189234e-07, "train_loss": 1.5605002783312656, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.081770009333544, "val_loss": 1.325857385116465, "val_acc1": 58.551198604060154, "val_acc5": 91.66666707805558, "val_uar": 0.4340419380883445, "val_war": 0.585511982570806, "val_weighted_f1": 0.565509490104206, "val_micro_f1": 0.585511982570806, "val_macro_f1": 0.4273984042508029, "epoch": 44, "n_parameters": 521309229}
46
+ {"train_lr": 3.469007632844911e-05, "train_min_lr": 1.9557259416054976e-07, "train_loss": 1.5775527757386563, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.181098022083245, "val_loss": 1.3769049556816326, "val_acc1": 57.02614387343912, "val_acc5": 90.63180871103324, "val_uar": 0.432835918994809, "val_war": 0.5702614379084967, "val_weighted_f1": 0.5554657486966513, "val_micro_f1": 0.5702614379084967, "val_macro_f1": 0.42417301200015256, "epoch": 45, "n_parameters": 521309229}
47
+ {"train_lr": 3.378606164646873e-05, "train_min_lr": 1.9047602144505153e-07, "train_loss": 1.5690804353051453, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.134107880073018, "val_loss": 1.3383325350050832, "val_acc1": 58.11546903498032, "val_acc5": 91.39433569066665, "val_uar": 0.43883916776168375, "val_war": 0.5811546840958606, "val_weighted_f1": 0.5624471712645593, "val_micro_f1": 0.5811546840958606, "val_macro_f1": 0.4355543508287877, "epoch": 46, "n_parameters": 521309229}
48
+ {"train_lr": 3.28760104480657e-05, "train_min_lr": 1.85345416599866e-07, "train_loss": 1.5756121124371443, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.04572586493917, "val_loss": 1.3261303273486156, "val_acc1": 58.82352966420791, "val_acc5": 92.156862932093, "val_uar": 0.4417086717571846, "val_war": 0.5887799564270153, "val_weighted_f1": 0.5730829618464215, "val_micro_f1": 0.5887799564270153, "val_macro_f1": 0.4369776905300052, "epoch": 47, "n_parameters": 521309229}
49
+ {"train_lr": 3.1960917860864196e-05, "train_min_lr": 1.8018638986606443e-07, "train_loss": 1.5396520641967408, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.999341183369703, "val_loss": 1.319131552588706, "val_acc1": 58.38780003902959, "val_acc5": 92.04793069876877, "val_uar": 0.4374458159540151, "val_war": 0.5838779956427015, "val_weighted_f1": 0.5681340861625994, "val_micro_f1": 0.5838779956427015, "val_macro_f1": 0.4341235245094745, "epoch": 48, "n_parameters": 521309229}
50
+ {"train_lr": 3.104178452517305e-05, "train_min_lr": 1.750045825636361e-07, "train_loss": 1.5594235886835028, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.115052910134343, "val_loss": 1.3547167801389508, "val_acc1": 57.62527280695298, "val_acc5": 90.52287614111806, "val_uar": 0.43587618896460717, "val_war": 0.5757080610021786, "val_weighted_f1": 0.5582824415765577, "val_micro_f1": 0.5757080610021786, "val_macro_f1": 0.42952879026966084, "epoch": 49, "n_parameters": 521309229}
51
+ {"train_lr": 3.011961549980036e-05, "train_min_lr": 1.698056609227879e-07, "train_loss": 1.5404999063353333, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.030137892996911, "val_loss": 1.3324710957559884, "val_acc1": 57.3529416252585, "val_acc5": 91.33986989189597, "val_uar": 0.4283909531027455, "val_war": 0.5735294117647058, "val_weighted_f1": 0.5596875782206581, "val_micro_f1": 0.5735294117647058, "val_macro_f1": 0.42433339204689163, "epoch": 50, "n_parameters": 521309229}
52
+ {"train_lr": 2.919541916303608e-05, "train_min_lr": 1.6459530988800415e-07, "train_loss": 1.553795057358128, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.007778838129327, "val_loss": 1.3486284280524534, "val_acc1": 57.67973923215679, "val_acc5": 91.61220045650707, "val_uar": 0.4326100201235046, "val_war": 0.576797385620915, "val_weighted_f1": 0.5579964371175596, "val_micro_f1": 0.576797385620915, "val_macro_f1": 0.4239769322147891, "epoch": 51, "n_parameters": 521309229}
53
+ {"train_lr": 2.8270206110005638e-05, "train_min_lr": 1.593792269016439e-07, "train_loss": 1.5480145034026784, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.044885607049016, "val_loss": 1.358431674393953, "val_acc1": 58.061002745347864, "val_acc5": 90.68627499599083, "val_uar": 0.428941519045124, "val_war": 0.5806100217864923, "val_weighted_f1": 0.5607605965682191, "val_micro_f1": 0.5806100217864923, "val_macro_f1": 0.422946791718484, "epoch": 52, "n_parameters": 521309229}
54
+ {"train_lr": 2.7344988047598695e-05, "train_min_lr": 1.541631156738708e-07, "train_loss": 1.5463328085126657, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.147689647013598, "val_loss": 1.3628070661250282, "val_acc1": 57.95206992298949, "val_acc5": 90.14161293179382, "val_uar": 0.43834852506098043, "val_war": 0.579520697167756, "val_weighted_f1": 0.562193413510976, "val_micro_f1": 0.579520697167756, "val_macro_f1": 0.4330892653959697, "epoch": 53, "n_parameters": 521309229}
55
+ {"train_lr": 2.6420776688182537e-05, "train_min_lr": 1.4895267994573047e-07, "train_loss": 1.544270534621607, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.031217478289463, "val_loss": 1.3526935732247782, "val_acc1": 56.69934685090009, "val_acc5": 91.28540383133233, "val_uar": 0.42235036485089644, "val_war": 0.5669934640522876, "val_weighted_f1": 0.551167440853703, "val_micro_f1": 0.5669934640522876, "val_macro_f1": 0.41307935711891813, "epoch": 54, "n_parameters": 521309229}
56
+ {"train_lr": 2.549858264330953e-05, "train_min_lr": 1.4375361725219248e-07, "train_loss": 1.56525982055727, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.033548433001679, "val_loss": 1.3430623242668076, "val_acc1": 59.586057485318655, "val_acc5": 90.57734205208573, "val_uar": 0.44395186495208433, "val_war": 0.5958605664488017, "val_weighted_f1": 0.5778789558773418, "val_micro_f1": 0.5958605664488017, "val_macro_f1": 0.43810149931583703, "epoch": 55, "n_parameters": 521309229}
57
+ {"train_lr": 2.4579414318628305e-05, "train_min_lr": 1.3857161269198084e-07, "train_loss": 1.5439079334043433, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.020304226639247, "val_loss": 1.3564122398109997, "val_acc1": 58.27886766545913, "val_acc5": 90.84967366386863, "val_uar": 0.4346300182791046, "val_war": 0.5827886710239651, "val_weighted_f1": 0.5619980290450666, "val_micro_f1": 0.5827886710239651, "val_macro_f1": 0.4293455585055634, "epoch": 56, "n_parameters": 521309229}
58
+ {"train_lr": 2.3664276811206737e-05, "train_min_lr": 1.3341233271100021e-07, "train_loss": 1.5355559841240987, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.059409075444288, "val_loss": 1.3639367102992301, "val_acc1": 57.679738946989474, "val_acc5": 90.79520760330499, "val_uar": 0.43500723535161573, "val_war": 0.576797385620915, "val_weighted_f1": 0.5604695994465015, "val_micro_f1": 0.576797385620915, "val_macro_f1": 0.42931694185277586, "epoch": 57, "n_parameters": 521309229}
59
+ {"train_lr": 2.2754170810473282e-05, "train_min_lr": 1.282814189061623e-07, "train_loss": 1.5371090391091389, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.082540261863482, "val_loss": 1.3702958717065699, "val_acc1": 58.44226627256356, "val_acc5": 89.97821396472408, "val_uar": 0.4386930185078883, "val_war": 0.5844226579520697, "val_weighted_f1": 0.5644542588142157, "val_micro_f1": 0.5844226579520697, "val_macro_f1": 0.43239454119016846, "epoch": 58, "n_parameters": 521309229}
60
+ {"train_lr": 2.1850091503977748e-05, "train_min_lr": 1.2318448185638122e-07, "train_loss": 1.5550420056081842, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.012133487380377, "val_loss": 1.3549124174842648, "val_acc1": 57.18954266753851, "val_acc5": 91.12200467726764, "val_uar": 0.4273948514665781, "val_war": 0.5718954248366013, "val_weighted_f1": 0.5546927156090071, "val_micro_f1": 0.5718954248366013, "val_macro_f1": 0.4193220800706756, "epoch": 59, "n_parameters": 521309229}
61
+ {"train_lr": 2.0953027489168203e-05, "train_min_lr": 1.1812709498748855e-07, "train_loss": 1.5439124844058512, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.98922070654312, "val_loss": 1.3642895917097728, "val_acc1": 57.080610364091164, "val_acc5": 90.30501133787865, "val_uar": 0.4253483407750657, "val_war": 0.5708061002178649, "val_weighted_f1": 0.5545422897217579, "val_micro_f1": 0.5708061002178649, "val_macro_f1": 0.41623753385207485, "epoch": 60, "n_parameters": 521309229}
62
+ {"train_lr": 2.006395969237425e-05, "train_min_lr": 1.1311478847777361e-07, "train_loss": 1.5320339161570709, "train_loss_scale": 5231.524752475248, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.986011278511274, "val_loss": 1.327697821984104, "val_acc1": 57.84313765694113, "val_acc5": 90.74074109395345, "val_uar": 0.42751480506793893, "val_war": 0.5784313725490197, "val_weighted_f1": 0.5584909174550146, "val_micro_f1": 0.5784313725490197, "val_macro_f1": 0.41938831879312266, "epoch": 61, "n_parameters": 521309229}
63
+ {"train_lr": 1.918386029617857e-05, "train_min_lr": 1.0815304321081477e-07, "train_loss": 1.5463618943006685, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.035362349878444, "val_loss": 1.3418081741706998, "val_acc1": 58.33333383821974, "val_acc5": 91.1764710744222, "val_uar": 0.43465004008878094, "val_war": 0.5833333333333334, "val_weighted_f1": 0.5638193990283038, "val_micro_f1": 0.5833333333333334, "val_macro_f1": 0.427768212695863, "epoch": 62, "n_parameters": 521309229}
64
+ {"train_lr": 1.831369167634938e-05, "train_min_lr": 1.0324728478221384e-07, "train_loss": 1.5093533165580761, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.036129788597031, "val_loss": 1.3663750325932222, "val_acc1": 56.86274569174822, "val_acc5": 90.41394383299584, "val_uar": 0.426912222602889, "val_war": 0.5686274509803921, "val_weighted_f1": 0.5528128158106084, "val_micro_f1": 0.5686274509803921, "val_macro_f1": 0.4177397353297995, "epoch": 63, "n_parameters": 521309229}
65
+ {"train_lr": 1.74544053494968e-05, "train_min_lr": 9.840287756678695e-08, "train_loss": 1.5322800678978659, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.12614907368575, "val_loss": 1.3473035690246844, "val_acc1": 57.18954280778473, "val_acc5": 91.01307229434742, "val_uar": 0.43342619463350734, "val_war": 0.5718954248366013, "val_weighted_f1": 0.5557288234174222, "val_micro_f1": 0.5718954248366013, "val_macro_f1": 0.4272970209571155, "epoch": 64, "n_parameters": 521309229}
66
+ {"train_lr": 1.6606940932603314e-05, "train_min_lr": 9.36251188526991e-08, "train_loss": 1.5323043977073316, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.0051519847152255, "val_loss": 1.3663672144506491, "val_acc1": 57.35294141021429, "val_acc5": 90.03267991309072, "val_uar": 0.4294065916025802, "val_war": 0.5735294117647058, "val_weighted_f1": 0.5531194769329951, "val_micro_f1": 0.5735294117647058, "val_macro_f1": 0.419657559208098, "epoch": 65, "n_parameters": 521309229}
67
+ {"train_lr": 1.5772225115566454e-05, "train_min_lr": 8.891923304895814e-08, "train_loss": 1.557515161560707, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.956451595419704, "val_loss": 1.3538884143618977, "val_acc1": 57.40740778866936, "val_acc5": 90.74074098175647, "val_uar": 0.43471304163015634, "val_war": 0.5740740740740741, "val_weighted_f1": 0.5561315333924849, "val_micro_f1": 0.5740740740740741, "val_macro_f1": 0.42956921214823546, "epoch": 66, "n_parameters": 521309229}
68
+ {"train_lr": 1.4951170647876973e-05, "train_min_lr": 8.429036597259903e-08, "train_loss": 1.53793599326225, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.082472286602058, "val_loss": 1.3427922538682526, "val_acc1": 58.932462407093425, "val_acc5": 90.57734227647968, "val_uar": 0.4450972607138223, "val_war": 0.5893246187363834, "val_weighted_f1": 0.5706492002512731, "val_micro_f1": 0.5893246187363834, "val_macro_f1": 0.4373977636449702, "epoch": 67, "n_parameters": 521309229}
69
+ {"train_lr": 1.4144675340540693e-05, "train_min_lr": 7.974357922180958e-08, "train_loss": 1.524617180277412, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.0214417169589805, "val_loss": 1.3274694353926415, "val_acc1": 58.93246239774368, "val_acc5": 90.90413972443226, "val_uar": 0.43786391325469226, "val_war": 0.5893246187363834, "val_weighted_f1": 0.5699162151765659, "val_micro_f1": 0.5893246187363834, "val_macro_f1": 0.4297549376965683, "epoch": 68, "n_parameters": 521309229}
70
+ {"train_lr": 1.3353621084335341e-05, "train_min_lr": 7.528384464114652e-08, "train_loss": 1.5210661326501236, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.000446520229377, "val_loss": 1.3485616352043899, "val_acc1": 57.73420519922294, "val_acc5": 90.25054482852711, "val_uar": 0.43200963570427553, "val_war": 0.5773420479302832, "val_weighted_f1": 0.5599080658694578, "val_micro_f1": 0.5773420479302832, "val_macro_f1": 0.42665884400146076, "epoch": 69, "n_parameters": 521309229}
71
+ {"train_lr": 1.2578872885475928e-05, "train_min_lr": 7.091603888489658e-08, "train_loss": 1.5272184620202571, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.042517919351559, "val_loss": 1.3666994907692367, "val_acc1": 57.461873989479216, "val_acc5": 90.52287632811303, "val_uar": 0.42636320623596125, "val_war": 0.5746187363834423, "val_weighted_f1": 0.5546633513882594, "val_micro_f1": 0.5746187363834423, "val_macro_f1": 0.4192369399528222, "epoch": 70, "n_parameters": 521309229}
72
+ {"train_lr": 1.1821277919743169e-05, "train_min_lr": 6.664493808452678e-08, "train_loss": 1.5282121231847077, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.050420152078761, "val_loss": 1.371648383783359, "val_acc1": 57.625272905125335, "val_acc5": 90.08714612325032, "val_uar": 0.4333873178914497, "val_war": 0.5762527233115469, "val_weighted_f1": 0.5574718236686959, "val_micro_f1": 0.5762527233115469, "val_macro_f1": 0.42891110451977116, "epoch": 71, "n_parameters": 521309229}
73
+ {"train_lr": 1.1081664606109202e-05, "train_min_lr": 6.247521262605469e-08, "train_loss": 1.54652003280007, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.947941078998075, "val_loss": 1.35255920419506, "val_acc1": 58.986928664001766, "val_acc5": 91.5577345455394, "val_uar": 0.4398448466235288, "val_war": 0.5898692810457516, "val_weighted_f1": 0.5748625694646876, "val_micro_f1": 0.5898692810457516, "val_macro_f1": 0.4361298203620055, "epoch": 72, "n_parameters": 521309229}
74
+ {"train_lr": 1.0360841700873597e-05, "train_min_lr": 5.841142204304979e-08, "train_loss": 1.5164908019229524, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.047598609829893, "val_loss": 1.3596662499156653, "val_acc1": 57.40740777464474, "val_acc5": 90.35947762283624, "val_uar": 0.4340998434940592, "val_war": 0.5740740740740741, "val_weighted_f1": 0.5578937764432943, "val_micro_f1": 0.5740740740740741, "val_macro_f1": 0.4302831366554184, "epoch": 73, "n_parameters": 521309229}
75
+ {"train_lr": 9.659597413300203e-06, "train_min_lr": 5.445801003085074e-08, "train_loss": 1.5301178030841815, "train_loss_scale": 5109.861386138614, "train_weight_decay": 0.0499999999999999, "train_grad_norm": NaN, "val_loss": 1.3671489355026507, "val_acc1": 56.80827966390871, "val_acc5": 90.25054531471402, "val_uar": 0.42387835220862086, "val_war": 0.568082788671024, "val_weighted_f1": 0.549647173079013, "val_micro_f1": 0.568082788671024, "val_macro_f1": 0.41514254080334895, "epoch": 74, "n_parameters": 521309229}
76
+ {"train_lr": 8.978698543721923e-06, "train_min_lr": 5.0619299587449256e-08, "train_loss": 1.5227838735966006, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8618823939030715, "val_loss": 1.3404422636125601, "val_acc1": 58.224401427250285, "val_acc5": 91.0675380931181, "val_uar": 0.4351947514293319, "val_war": 0.5822440087145969, "val_weighted_f1": 0.5657295680155228, "val_micro_f1": 0.5822440087145969, "val_macro_f1": 0.4298855418599811, "epoch": 75, "n_parameters": 521309229}
77
+ {"train_lr": 8.318889645055782e-06, "train_min_lr": 4.689948828635602e-08, "train_loss": 1.4891313025266817, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9720690793330125, "val_loss": 1.337769286013117, "val_acc1": 58.33333371667301, "val_acc5": 90.79520726671406, "val_uar": 0.43114097906232307, "val_war": 0.5833333333333334, "val_weighted_f1": 0.5641756176089839, "val_micro_f1": 0.5833333333333334, "val_macro_f1": 0.424110636005223, "epoch": 76, "n_parameters": 521309229}
78
+ {"train_lr": 7.680892208645253e-06, "train_min_lr": 4.3302643686614975e-08, "train_loss": 1.5201239780624314, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.946642233593629, "val_loss": 1.368334342159477, "val_acc1": 57.62527321366703, "val_acc5": 89.92374775456447, "val_uar": 0.4253886408877901, "val_war": 0.5762527233115469, "val_weighted_f1": 0.5532186231737183, "val_micro_f1": 0.5762527233115469, "val_macro_f1": 0.4163481558708908, "epoch": 77, "n_parameters": 521309229}
79
+ {"train_lr": 7.065403875320073e-06, "train_min_lr": 3.983269888498758e-08, "train_loss": 1.5113743727553401, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.032277477849828, "val_loss": 1.3565542849839902, "val_acc1": 57.734205330119416, "val_acc5": 90.41394375819786, "val_uar": 0.4391911181555827, "val_war": 0.5773420479302832, "val_weighted_f1": 0.5618619258344685, "val_micro_f1": 0.5773420479302832, "val_macro_f1": 0.4354933372669231, "epoch": 78, "n_parameters": 521309229}
80
+ {"train_lr": 6.473097672536222e-06, "train_min_lr": 3.649344821516926e-08, "train_loss": 1.5181781597460065, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.034651638257621, "val_loss": 1.3656893428049834, "val_acc1": 58.278867833754596, "val_acc5": 90.08714619804832, "val_uar": 0.4375661907364323, "val_war": 0.5827886710239651, "val_weighted_f1": 0.5642337968656307, "val_micro_f1": 0.5827886710239651, "val_macro_f1": 0.43155816826642945, "epoch": 79, "n_parameters": 521309229}
81
+ {"train_lr": 5.904621278430291e-06, "train_min_lr": 3.3288543098741024e-08, "train_loss": 1.4927326338519358, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.052402111563352, "val_loss": 1.3579569949823267, "val_acc1": 56.808279402115765, "val_acc5": 90.79520696752212, "val_uar": 0.4278880424513918, "val_war": 0.568082788671024, "val_weighted_f1": 0.5505963274895828, "val_micro_f1": 0.568082788671024, "val_macro_f1": 0.4191911840303083, "epoch": 80, "n_parameters": 521309229}
82
+ {"train_lr": 5.3605963135929456e-06, "train_min_lr": 3.0221488052393765e-08, "train_loss": 1.4953454100456174, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9807940568074143, "val_loss": 1.3612420190783108, "val_acc1": 58.061002483554915, "val_acc5": 91.01307229434742, "val_uar": 0.4302523630300474, "val_war": 0.5806100217864923, "val_weighted_f1": 0.5600168056460804, "val_micro_f1": 0.5806100217864923, "val_macro_f1": 0.42403925531716563, "epoch": 81, "n_parameters": 521309229}
83
+ {"train_lr": 4.8416176613359425e-06, "train_min_lr": 2.7295636855790628e-08, "train_loss": 1.511178184263777, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9526779604430247, "val_loss": 1.3370483926698273, "val_acc1": 58.496732716466866, "val_acc5": 90.95860612158681, "val_uar": 0.44079376001736686, "val_war": 0.5849673202614379, "val_weighted_f1": 0.5657510338982471, "val_micro_f1": 0.5849673202614379, "val_macro_f1": 0.43406720303437674, "epoch": 82, "n_parameters": 521309229}
84
+ {"train_lr": 4.3482528171959625e-06, "train_min_lr": 2.4514188884258143e-08, "train_loss": 1.5023666240594569, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.996548841495325, "val_loss": 1.3524708131364747, "val_acc1": 57.57080626955219, "val_acc5": 91.12200467726764, "val_uar": 0.4372820294628098, "val_war": 0.5757080610021786, "val_weighted_f1": 0.5593647102400616, "val_micro_f1": 0.5757080610021786, "val_macro_f1": 0.4287752283334719, "epoch": 83, "n_parameters": 521309229}
85
+ {"train_lr": 3.88104126838656e-06, "train_min_lr": 2.188018561031641e-08, "train_loss": 1.52771330184669, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.0453623757504005, "val_loss": 1.339083895963781, "val_acc1": 58.605665099387075, "val_acc5": 91.17647096222522, "val_uar": 0.4394173899864465, "val_war": 0.5860566448801743, "val_weighted_f1": 0.5695677798039864, "val_micro_f1": 0.5860566448801743, "val_macro_f1": 0.4329330397666262, "epoch": 84, "n_parameters": 521309229}
86
+ {"train_lr": 3.4404939038768243e-06, "train_min_lr": 1.9396507277873433e-08, "train_loss": 1.495341131789456, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.99566120440417, "val_loss": 1.3340439203323102, "val_acc1": 58.06100278742173, "val_acc5": 91.17647099962421, "val_uar": 0.4374581207248078, "val_war": 0.5806100217864923, "val_weighted_f1": 0.5648957077962702, "val_micro_f1": 0.5806100217864923, "val_macro_f1": 0.43238047728927614, "epoch": 85, "n_parameters": 521309229}
87
+ {"train_lr": 3.027092455741798e-06, "train_min_lr": 1.7065869752720735e-08, "train_loss": 1.510979443493456, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9975204727437235, "val_loss": 1.3431351593896454, "val_acc1": 58.60566491706699, "val_acc5": 90.4684097813625, "val_uar": 0.4423845427093573, "val_war": 0.5860566448801743, "val_weighted_f1": 0.5699060914021232, "val_micro_f1": 0.5860566448801743, "val_macro_f1": 0.4353316021502882, "epoch": 86, "n_parameters": 521309229}
88
+ {"train_lr": 2.641288972395518e-06, "train_min_lr": 1.4890821552773996e-08, "train_loss": 1.498776782955667, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9697381529477562, "val_loss": 1.3510292096465242, "val_acc1": 57.625272694756006, "val_acc5": 90.74074105655446, "val_uar": 0.4318567210046025, "val_war": 0.5762527233115469, "val_weighted_f1": 0.558251812682949, "val_micro_f1": 0.5762527233115469, "val_macro_f1": 0.4255730586105611, "epoch": 87, "n_parameters": 521309229}
89
+ {"train_lr": 2.2835053242827143e-06, "train_min_lr": 1.2873741061306107e-08, "train_loss": 1.4823700538759579, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.022941917476087, "val_loss": 1.345278475506633, "val_acc1": 57.78867152625439, "val_acc5": 91.01307173336254, "val_uar": 0.4359149557443681, "val_war": 0.5778867102396514, "val_weighted_f1": 0.5635073860236294, "val_micro_f1": 0.5778867102396514, "val_macro_f1": 0.43193871144392404, "epoch": 88, "n_parameters": 521309229}
90
+ {"train_lr": 1.954132742569673e-06, "train_min_lr": 1.1016833926220049e-08, "train_loss": 1.5392223120522577, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9908310446408715, "val_loss": 1.3489531442230822, "val_acc1": 57.40740795696483, "val_acc5": 91.06753828011307, "val_uar": 0.4285169512800005, "val_war": 0.5740740740740741, "val_weighted_f1": 0.557424516945951, "val_micro_f1": 0.5740740740740741, "val_macro_f1": 0.4221590151169025, "epoch": 89, "n_parameters": 521309229}
91
+ {"train_lr": 1.6535313913386933e-06, "train_min_lr": 9.322130648205152e-09, "train_loss": 1.490773089057935, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.8710437481946283, "val_loss": 1.337188057455362, "val_acc1": 58.00653629209481, "val_acc5": 91.28540330774644, "val_uar": 0.43551376808158293, "val_war": 0.579520697167756, "val_weighted_f1": 0.5654567669663245, "val_micro_f1": 0.579520697167756, "val_macro_f1": 0.42950080221942855, "epoch": 90, "n_parameters": 521309229}
92
+ {"train_lr": 1.3820299737539636e-06, "train_min_lr": 7.791484360414586e-09, "train_loss": 1.5142409638209704, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9372925687544416, "val_loss": 1.360941330299658, "val_acc1": 57.625272891100714, "val_acc5": 90.30501137527764, "val_uar": 0.42816098566395566, "val_war": 0.5757080610021786, "val_weighted_f1": 0.5587334537686997, "val_micro_f1": 0.5757080610021786, "val_macro_f1": 0.4208740376690486, "epoch": 91, "n_parameters": 521309229}
93
+ {"train_lr": 1.139925372629472e-06, "train_min_lr": 6.426568802091309e-09, "train_loss": 1.4952828391353683, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.932713544014657, "val_loss": 1.3509400142174142, "val_acc1": 58.38779994553211, "val_acc5": 90.63180826224533, "val_uar": 0.44506081677984344, "val_war": 0.5838779956427015, "val_weighted_f1": 0.5689085320686983, "val_micro_f1": 0.5838779956427015, "val_macro_f1": 0.44062274565924575, "epoch": 92, "n_parameters": 521309229}
94
+ {"train_lr": 9.274823257919996e-07, "train_min_lr": 5.22887648835886e-09, "train_loss": 1.499029539205847, "train_loss_scale": 4096.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.044335115073931, "val_loss": 1.336291376282187, "val_acc1": 58.38780014655169, "val_acc5": 91.01307218215045, "val_uar": 0.43412397595047064, "val_war": 0.5838779956427015, "val_weighted_f1": 0.565762632673642, "val_micro_f1": 0.5838779956427015, "val_macro_f1": 0.4254058750548293, "epoch": 93, "n_parameters": 521309229}
95
+ {"train_lr": 7.449331365942088e-07, "train_min_lr": 4.1997170781779625e-09, "train_loss": 1.5037742494159798, "train_loss_scale": 7989.227722772277, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9718132184283568, "val_loss": 1.337912893470596, "val_acc1": 59.150327350579055, "val_acc5": 90.84967358907063, "val_uar": 0.445588209238493, "val_war": 0.5915032679738562, "val_weighted_f1": 0.575085363617963, "val_micro_f1": 0.5915032679738562, "val_macro_f1": 0.44048322738313817, "epoch": 94, "n_parameters": 521309229}
96
+ {"train_lr": 5.924774198943271e-07, "train_min_lr": 3.340215942253686e-09, "train_loss": 1.501054585176726, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.959398720524099, "val_loss": 1.3622637645286673, "val_acc1": 57.78867168052524, "val_acc5": 90.6318083744423, "val_uar": 0.4377619222293843, "val_war": 0.5778867102396514, "val_weighted_f1": 0.562495266012681, "val_micro_f1": 0.5778867102396514, "val_macro_f1": 0.4300921960906101, "epoch": 95, "n_parameters": 521309229}
97
+ {"train_lr": 4.7028188378023254e-07, "train_min_lr": 2.6513129324590976e-09, "train_loss": 1.5046819835999619, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9330007699456546, "val_loss": 1.339775533652773, "val_acc1": 58.33333385691923, "val_acc5": 90.95860608418782, "val_uar": 0.4423937558479443, "val_war": 0.5833333333333334, "val_weighted_f1": 0.5664246073788767, "val_micro_f1": 0.5833333333333334, "val_macro_f1": 0.43587696352700883, "epoch": 96, "n_parameters": 521309229}
98
+ {"train_lr": 3.7848014727660956e-07, "train_min_lr": 2.1337613541210327e-09, "train_loss": 1.5088302095731099, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.009826851363229, "val_loss": 1.3537456066000695, "val_acc1": 58.169935436809766, "val_acc5": 90.79520700492111, "val_uar": 0.4351117205324494, "val_war": 0.5816993464052288, "val_weighted_f1": 0.5665691428737146, "val_micro_f1": 0.5816993464052288, "val_macro_f1": 0.4299063249105868, "epoch": 97, "n_parameters": 521309229}
99
+ {"train_lr": 3.171725942345054e-07, "train_min_lr": 1.7881271422917885e-09, "train_loss": 1.509034376431613, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 4.053849968579736, "val_loss": 1.33713632617511, "val_acc1": 58.27886765610938, "val_acc5": 91.0130718829585, "val_uar": 0.43638415857637947, "val_war": 0.5827886710239651, "val_weighted_f1": 0.5679869765199171, "val_micro_f1": 0.5827886710239651, "val_macro_f1": 0.4319599706586528, "epoch": 98, "n_parameters": 521309229}
100
+ {"train_lr": 2.8642626356306314e-07, "train_min_lr": 1.6147882429074497e-09, "train_loss": 1.5173663297305406, "train_loss_scale": 8192.0, "train_weight_decay": 0.0499999999999999, "train_grad_norm": 3.9466164584207064, "val_loss": 1.358587194014998, "val_acc1": 58.6056650947122, "val_acc5": 90.14161252040489, "val_uar": 0.4403350591532785, "val_war": 0.5860566448801743, "val_weighted_f1": 0.5683614187839255, "val_micro_f1": 0.5860566448801743, "val_macro_f1": 0.43295597576675454, "epoch": 99, "n_parameters": 521309229}
101
+ Evaluation on the test set using best epoch model:
102
+ {"Final Top-1 (best epoch)": 62.42490442381212, "Final Top-5 (best epoch)": 93.66466411796833}
103
+ Final UAR: 47.50%, Final WAR: 62.42%
104
+ Final Confusion Matrix:
105
+ [[218 8 3 1 8 12 13 1 12 0 2]
106
+ [ 20 55 1 4 7 6 11 0 21 1 1]
107
+ [ 3 2 53 1 0 31 27 0 7 0 1]
108
+ [ 2 14 0 198 10 0 9 5 5 4 1]
109
+ [ 4 26 0 25 123 7 8 12 11 9 2]
110
+ [ 6 3 8 0 2 255 3 0 16 0 1]
111
+ [ 11 6 23 2 7 0 155 0 9 0 1]
112
+ [ 6 10 0 6 4 0 5 1 6 4 5]
113
+ [ 11 9 7 2 22 19 29 4 76 1 3]
114
+ [ 2 6 0 3 5 10 3 1 7 4 11]
115
+ [ 1 2 0 0 7 5 4 1 9 2 5]]
116
+ Final Class Accuracies: ['78.42%', '43.31%', '42.40%', '79.84%', '54.19%', '86.73%', '72.43%', '2.13%', '41.53%', '7.69%', '13.89%']
117
+ Final Weighted F1: 0.6128, Final Micro F1: 0.6242, Final Macro F1: 0.4726
logs/AVF-MAE++_huge-MSP-IMPROV/eval_split01/log.txt ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_lr": 5.536417322834646e-06, "train_min_lr": 3.121271593438969e-08, "train_loss": 2.15654093455645, "train_loss_scale": 3132.235294117647, "train_weight_decay": 0.04999999999999998, "train_grad_norm": Infinity, "val_loss": 1.2753344153364499, "val_acc1": 50.520834535360336, "val_acc5": 100.0, "val_uar": 0.3485780297136743, "val_war": 0.5052083333333334, "val_weighted_f1": 0.4937938165849598, "val_micro_f1": 0.5052083333333334, "val_macro_f1": 0.3436165352801402, "epoch": 0, "n_parameters": 521298470}
2
+ {"train_lr": 1.6830708661417322e-05, "train_min_lr": 9.488665644054467e-08, "train_loss": 1.2987257801239787, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 4.53997150589438, "val_loss": 1.0370113849639893, "val_acc1": 59.89583365122477, "val_acc5": 100.0, "val_uar": 0.40900506357179583, "val_war": 0.5989583333333334, "val_weighted_f1": 0.561398885379384, "val_micro_f1": 0.5989583333333334, "val_macro_f1": 0.42772445901933126, "epoch": 1, "n_parameters": 521298470}
3
+ {"train_lr": 2.8125000000000003e-05, "train_min_lr": 1.5856059694669964e-07, "train_loss": 1.2031303574057186, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.6524307166828827, "val_loss": 0.959723636507988, "val_acc1": 64.84375051657359, "val_acc5": 100.0, "val_uar": 0.45228762503899295, "val_war": 0.6484375, "val_weighted_f1": 0.6110770555768344, "val_micro_f1": 0.6484375, "val_macro_f1": 0.482374983150535, "epoch": 2, "n_parameters": 521298470}
4
+ {"train_lr": 3.9419291338582704e-05, "train_min_lr": 2.2223453745285467e-07, "train_loss": 1.1630170238173865, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.267381829373977, "val_loss": 0.9880035122235616, "val_acc1": 63.02083367109299, "val_acc5": 100.0, "val_uar": 0.5383121482022816, "val_war": 0.6302083333333334, "val_weighted_f1": 0.6322214820103662, "val_micro_f1": 0.6302083333333334, "val_macro_f1": 0.5443457891376325, "epoch": 3, "n_parameters": 521298470}
5
+ {"train_lr": 5.071358267716536e-05, "train_min_lr": 2.859084779590097e-07, "train_loss": 1.1532561358283548, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.4761623003903557, "val_loss": 0.8785776247580847, "val_acc1": 69.27083383003871, "val_acc5": 100.0, "val_uar": 0.5427335217348898, "val_war": 0.6927083333333334, "val_weighted_f1": 0.6679995008123676, "val_micro_f1": 0.6927083333333334, "val_macro_f1": 0.5725964941939431, "epoch": 4, "n_parameters": 521298470}
6
+ {"train_lr": 5.6245048752361314e-05, "train_min_lr": 3.1709328019451135e-07, "train_loss": 1.1403891609774695, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.6808822973101747, "val_loss": 0.9362048730254173, "val_acc1": 69.53125027815501, "val_acc5": 100.0, "val_uar": 0.6034774359500762, "val_war": 0.6953125, "val_weighted_f1": 0.695160776199233, "val_micro_f1": 0.6953125, "val_macro_f1": 0.6070255849414229, "epoch": 5, "n_parameters": 521298470}
7
+ {"train_lr": 5.621475366821118e-05, "train_min_lr": 3.1692248529220416e-07, "train_loss": 1.1413571180081834, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.987880540829079, "val_loss": 0.9831408485770226, "val_acc1": 60.677083353201546, "val_acc5": 100.0, "val_uar": 0.6007388189042173, "val_war": 0.6067708333333334, "val_weighted_f1": 0.6255905002596869, "val_micro_f1": 0.6067708333333334, "val_macro_f1": 0.5638716607156126, "epoch": 6, "n_parameters": 521298470}
8
+ {"train_lr": 5.615389662220735e-05, "train_min_lr": 3.165793909084607e-07, "train_loss": 1.1143612670742609, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.223225799261355, "val_loss": 0.9012181883056959, "val_acc1": 70.83333367109299, "val_acc5": 100.0, "val_uar": 0.6428657301177821, "val_war": 0.7083333333333334, "val_weighted_f1": 0.7094537947262244, "val_micro_f1": 0.7083333333333334, "val_macro_f1": 0.6354775404070209, "epoch": 7, "n_parameters": 521298470}
9
+ {"train_lr": 5.606254416063613e-05, "train_min_lr": 3.160643722119527e-07, "train_loss": 1.134577787195156, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.5481564694759893, "val_loss": 0.9342450772722563, "val_acc1": 66.6666668454806, "val_acc5": 100.0, "val_uar": 0.6244140630832663, "val_war": 0.6666666666666666, "val_weighted_f1": 0.6773710940088994, "val_micro_f1": 0.6666666666666666, "val_macro_f1": 0.6036722151123437, "epoch": 8, "n_parameters": 521298470}
10
+ {"train_lr": 5.5940796176073e-05, "train_min_lr": 3.1537799236806353e-07, "train_loss": 1.1064306415763556, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.2363609963772344, "val_loss": 0.825651745001475, "val_acc1": 74.73958398898442, "val_acc5": 100.0, "val_uar": 0.6626048456043754, "val_war": 0.7473958333333334, "val_weighted_f1": 0.7390521624308256, "val_micro_f1": 0.7473958333333334, "val_macro_f1": 0.6729407823962964, "epoch": 9, "n_parameters": 521298470}
11
+ {"train_lr": 5.578878579815155e-05, "train_min_lr": 3.145210019230746e-07, "train_loss": 1.1101326445738475, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.366928776105245, "val_loss": 0.7918396070599556, "val_acc1": 71.87500065565109, "val_acc5": 100.0, "val_uar": 0.6193851225518036, "val_war": 0.71875, "val_weighted_f1": 0.7059413573451772, "val_micro_f1": 0.71875, "val_macro_f1": 0.6364447680039149, "epoch": 10, "n_parameters": 521298470}
12
+ {"train_lr": 5.560667924798835e-05, "train_min_lr": 3.134943379834556e-07, "train_loss": 1.10266999929559, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.0213250552906707, "val_loss": 0.8170952176054319, "val_acc1": 72.65625019868214, "val_acc5": 100.0, "val_uar": 0.6333275461064877, "val_war": 0.7265625, "val_weighted_f1": 0.7152933123399997, "val_micro_f1": 0.7265625, "val_macro_f1": 0.6513954136513913, "epoch": 11, "n_parameters": 521298470}
13
+ {"train_lr": 5.539467565642228e-05, "train_min_lr": 3.122991231911512e-07, "train_loss": 1.0945767873252918, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.4636376511816884, "val_loss": 0.8096626028418541, "val_acc1": 73.43750025828679, "val_acc5": 100.0, "val_uar": 0.6281847392740006, "val_war": 0.734375, "val_weighted_f1": 0.7211928163697033, "val_micro_f1": 0.734375, "val_macro_f1": 0.6483464067595325, "epoch": 12, "n_parameters": 521298470}
14
+ {"train_lr": 5.5153006846268126e-05, "train_min_lr": 3.10936664495989e-07, "train_loss": 1.0701473244265014, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.1930115947536395, "val_loss": 0.7809611037373543, "val_acc1": 73.69791762034099, "val_acc5": 100.0, "val_uar": 0.6594548123318643, "val_war": 0.7369791666666666, "val_weighted_f1": 0.7308710236339814, "val_micro_f1": 0.7369791666666666, "val_macro_f1": 0.6621492721238041, "epoch": 13, "n_parameters": 521298470}
15
+ {"train_lr": 5.48819370788216e-05, "train_min_lr": 3.094084517265481e-07, "train_loss": 1.0816298353126625, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.3296413678748933, "val_loss": 0.8457264776031176, "val_acc1": 72.39583347241084, "val_acc5": 100.0, "val_uar": 0.7043400430872645, "val_war": 0.7239583333333334, "val_weighted_f1": 0.7270731876477504, "val_micro_f1": 0.7239583333333334, "val_macro_f1": 0.6710288409599237, "epoch": 14, "n_parameters": 521298470}
16
+ {"train_lr": 5.458176276489367e-05, "train_min_lr": 3.077161559610556e-07, "train_loss": 1.0647925187170115, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.284204674702065, "val_loss": 0.7465464832882086, "val_acc1": 74.47916714350383, "val_acc5": 100.0, "val_uar": 0.6309698263069073, "val_war": 0.7447916666666666, "val_weighted_f1": 0.733347836044039, "val_micro_f1": 0.7447916666666666, "val_macro_f1": 0.6565544675252746, "epoch": 15, "n_parameters": 521298470}
17
+ {"train_lr": 5.425281214068948e-05, "train_min_lr": 3.058616277000846e-07, "train_loss": 1.069381257287817, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.1506786790548587, "val_loss": 0.7659415379166603, "val_acc1": 73.95833365122478, "val_acc5": 100.0, "val_uar": 0.6377995906061056, "val_war": 0.7395833333333334, "val_weighted_f1": 0.7323567496754803, "val_micro_f1": 0.7395833333333334, "val_macro_f1": 0.6519624806620002, "epoch": 16, "n_parameters": 521298470}
18
+ {"train_lr": 5.389544490888697e-05, "train_min_lr": 3.0384689484306075e-07, "train_loss": 1.0604490441044951, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.1119075639575136, "val_loss": 0.7872250067691008, "val_acc1": 74.47916756073634, "val_acc5": 100.0, "val_uar": 0.6768438464734207, "val_war": 0.7447916666666666, "val_weighted_f1": 0.7410420916771772, "val_micro_f1": 0.7447916666666666, "val_macro_f1": 0.6766836241688172, "epoch": 17, "n_parameters": 521298470}
19
+ {"train_lr": 5.351005184530733e-05, "train_min_lr": 3.0167416047078327e-07, "train_loss": 1.0671963621588314, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.2987373015459847, "val_loss": 0.8018969222903252, "val_acc1": 72.91666748126347, "val_acc5": 100.0, "val_uar": 0.6539050375971117, "val_war": 0.7291666666666666, "val_weighted_f1": 0.7240734746155159, "val_micro_f1": 0.7291666666666666, "val_macro_f1": 0.6603162533195577, "epoch": 18, "n_parameters": 521298470}
20
+ {"train_lr": 5.309705437160725e-05, "train_min_lr": 2.993458004363883e-07, "train_loss": 1.0524598439924078, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.446660969771591, "val_loss": 0.7507275926570097, "val_acc1": 75.52083450555801, "val_acc5": 100.0, "val_uar": 0.6549834031376868, "val_war": 0.7552083333333334, "val_weighted_f1": 0.7423250036924967, "val_micro_f1": 0.7552083333333334, "val_macro_f1": 0.6715618927791636, "epoch": 19, "n_parameters": 521298470}
21
+ {"train_lr": 5.2656904094460706e-05, "train_min_lr": 2.9686436076738873e-07, "train_loss": 1.0648366769934012, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.2047308099036123, "val_loss": 0.819282648464044, "val_acc1": 71.09375027815501, "val_acc5": 100.0, "val_uar": 0.6757057186840875, "val_war": 0.7109375, "val_weighted_f1": 0.7122218102791291, "val_micro_f1": 0.7109375, "val_macro_f1": 0.6498012446411755, "epoch": 20, "n_parameters": 521298470}
22
+ {"train_lr": 5.219008231173354e-05, "train_min_lr": 2.942325548816307e-07, "train_loss": 1.050915613283519, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.3189455761628994, "val_loss": 0.767150508860747, "val_acc1": 74.73958454529445, "val_acc5": 100.0, "val_uar": 0.6292804895885008, "val_war": 0.7473958333333334, "val_weighted_f1": 0.7320585850310014, "val_micro_f1": 0.7473958333333334, "val_macro_f1": 0.6660965569780132, "epoch": 21, "n_parameters": 521298470}
23
+ {"train_lr": 5.169709948619132e-05, "train_min_lr": 2.9145326062021033e-07, "train_loss": 1.0430098361049602, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.2475033788120045, "val_loss": 0.8143269568681717, "val_acc1": 71.09375015894572, "val_acc5": 100.0, "val_uar": 0.6770151276435475, "val_war": 0.7109375, "val_weighted_f1": 0.7148007950424731, "val_micro_f1": 0.7109375, "val_macro_f1": 0.6498981075331233, "epoch": 22, "n_parameters": 521298470}
24
+ {"train_lr": 5.1178494687315915e-05, "train_min_lr": 2.8852951710059744e-07, "train_loss": 1.0274121287991018, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.257134049546485, "val_loss": 0.7756470280388991, "val_acc1": 74.47916708389918, "val_acc5": 100.0, "val_uar": 0.6505341446549979, "val_war": 0.7447916666666666, "val_weighted_f1": 0.7358601702255226, "val_micro_f1": 0.7447916666666666, "val_macro_f1": 0.6616632793168119, "epoch": 23, "n_parameters": 521298470}
25
+ {"train_lr": 5.063483500184073e-05, "train_min_lr": 2.8546452139340483e-07, "train_loss": 1.0355763969078562, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.3306826025831935, "val_loss": 0.8029690707723299, "val_acc1": 70.05208371082942, "val_acc5": 100.0, "val_uar": 0.611627564253462, "val_war": 0.7005208333333334, "val_weighted_f1": 0.6933033451629788, "val_micro_f1": 0.7005208333333334, "val_macro_f1": 0.6246372167125113, "epoch": 24, "n_parameters": 521298470}
26
+ {"train_lr": 5.006671491365001e-05, "train_min_lr": 2.822616250264383e-07, "train_loss": 1.0297043469606662, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.0608711382922005, "val_loss": 0.742065874238809, "val_acc1": 75.52083373069763, "val_acc5": 100.0, "val_uar": 0.6667399908240794, "val_war": 0.7552083333333334, "val_weighted_f1": 0.7448575709418034, "val_micro_f1": 0.7552083333333334, "val_macro_f1": 0.6798581095245729, "epoch": 25, "n_parameters": 521298470}
27
+ {"train_lr": 4.947475565371907e-05, "train_min_lr": 2.7892433031985075e-07, "train_loss": 1.0311690674108618, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.4606922187057196, "val_loss": 0.768451968828837, "val_acc1": 73.6979172428449, "val_acc5": 100.0, "val_uar": 0.6358696420555175, "val_war": 0.7369791666666666, "val_weighted_f1": 0.7214273902529963, "val_micro_f1": 0.7369791666666666, "val_macro_f1": 0.6508006914262217, "epoch": 26, "n_parameters": 521298470}
28
+ {"train_lr": 4.885960452080762e-05, "train_min_lr": 2.754562865564062e-07, "train_loss": 1.016382073654848, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.302709822561227, "val_loss": 0.7507294490933418, "val_acc1": 75.52083412806194, "val_acc5": 100.0, "val_uar": 0.6645593846592052, "val_war": 0.7552083333333334, "val_weighted_f1": 0.7477330619015045, "val_micro_f1": 0.7552083333333334, "val_macro_f1": 0.6778411626303895, "epoch": 27, "n_parameters": 521298470}
29
+ {"train_lr": 4.822193417364793e-05, "train_min_lr": 2.718612859910426e-07, "train_loss": 1.0085380921761196, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.193774288775874, "val_loss": 0.7681974284350872, "val_acc1": 72.3958330353101, "val_acc5": 100.0, "val_uar": 0.5717410223630298, "val_war": 0.7239583333333334, "val_weighted_f1": 0.6970430745509556, "val_micro_f1": 0.7239583333333334, "val_macro_f1": 0.6181333704822566, "epoch": 28, "n_parameters": 521298470}
30
+ {"train_lr": 4.756244189540243e-05, "train_min_lr": 2.6814325970409697e-07, "train_loss": 1.0110053879762786, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.0203353937934425, "val_loss": 0.7454976662993431, "val_acc1": 74.73958375056584, "val_acc5": 100.0, "val_uar": 0.64583144845414, "val_war": 0.7473958333333334, "val_weighted_f1": 0.7321121034138224, "val_micro_f1": 0.7473958333333334, "val_macro_f1": 0.6660663217526502, "epoch": 29, "n_parameters": 521298470}
31
+ {"train_lr": 4.688184883119482e-05, "train_min_lr": 2.643062733027266e-07, "train_loss": 1.0007111922977796, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.0718990985084984, "val_loss": 0.7551400922238827, "val_acc1": 76.56250051657359, "val_acc5": 100.0, "val_uar": 0.7018395109665629, "val_war": 0.765625, "val_weighted_f1": 0.7631164174029844, "val_micro_f1": 0.765625, "val_macro_f1": 0.6936891058204268, "epoch": 30, "n_parameters": 521298470}
32
+ {"train_lr": 4.618089919954843e-05, "train_min_lr": 2.603545224752272e-07, "train_loss": 1.0048288529604867, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.398711237252927, "val_loss": 0.7311550316711267, "val_acc1": 76.30208432674408, "val_acc5": 100.0, "val_uar": 0.6719668951564471, "val_war": 0.7630208333333334, "val_weighted_f1": 0.7560405459944818, "val_micro_f1": 0.7630208333333334, "val_macro_f1": 0.6973104541025602, "epoch": 31, "n_parameters": 521298470}
33
+ {"train_lr": 4.546035947859425e-05, "train_min_lr": 2.562923284031098e-07, "train_loss": 0.9935583041773902, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.5021441006192973, "val_loss": 0.7396409412225088, "val_acc1": 75.78125071525574, "val_acc5": 100.0, "val_uar": 0.6524300167361897, "val_war": 0.7552083333333334, "val_weighted_f1": 0.7457777839312657, "val_micro_f1": 0.7552083333333334, "val_macro_f1": 0.6813354730073294, "epoch": 32, "n_parameters": 521298470}
34
+ {"train_lr": 4.4721017567938275e-05, "train_min_lr": 2.5212413303595154e-07, "train_loss": 1.0027332885397806, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.274463452544867, "val_loss": 0.7941990507145723, "val_acc1": 71.6145835518837, "val_acc5": 100.0, "val_uar": 0.6646388220794001, "val_war": 0.7161458333333334, "val_weighted_f1": 0.7199626190403497, "val_micro_f1": 0.7161458333333334, "val_macro_f1": 0.6448689879610584, "epoch": 33, "n_parameters": 521298470}
35
+ {"train_lr": 4.39636819271049e-05, "train_min_lr": 2.478544942341897e-07, "train_loss": 0.9738571162706886, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.015240715999229, "val_loss": 0.7726200222969055, "val_acc1": 72.1354169845581, "val_acc5": 100.0, "val_uar": 0.6398249704486879, "val_war": 0.7213541666666666, "val_weighted_f1": 0.7146614981370871, "val_micro_f1": 0.7213541666666666, "val_macro_f1": 0.643667561852658, "epoch": 34, "n_parameters": 521298470}
36
+ {"train_lr": 4.31891806914983e-05, "train_min_lr": 2.434880807851677e-07, "train_loss": 1.0101859228673324, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.9843873580296836, "val_loss": 0.747679507980744, "val_acc1": 74.47916700442632, "val_acc5": 100.0, "val_uar": 0.6594995926263026, "val_war": 0.7447916666666666, "val_weighted_f1": 0.7384076509252341, "val_micro_f1": 0.7447916666666666, "val_macro_f1": 0.6736736688703916, "epoch": 35, "n_parameters": 521298470}
37
+ {"train_lr": 4.239836076684844e-05, "train_min_lr": 2.3902966729788513e-07, "train_loss": 0.9944839746344323, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.987766254181955, "val_loss": 0.777788353463014, "val_acc1": 75.00000077486038, "val_acc5": 100.0, "val_uar": 0.6379367638697752, "val_war": 0.75, "val_weighted_f1": 0.7369176345235684, "val_micro_f1": 0.75, "val_macro_f1": 0.664201393263509, "epoch": 36, "n_parameters": 521298470}
38
+ {"train_lr": 4.1592086903132284e-05, "train_min_lr": 2.344841289820325e-07, "train_loss": 0.984906843577335, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.992134921690997, "val_loss": 0.779035184532404, "val_acc1": 72.13541726271312, "val_acc5": 100.0, "val_uar": 0.6226945813866557, "val_war": 0.7213541666666666, "val_weighted_f1": 0.7100286627284397, "val_micro_f1": 0.7213541666666666, "val_macro_f1": 0.6346180358976565, "epoch": 37, "n_parameters": 521298470}
39
+ {"train_lr": 4.07712407489822e-05, "train_min_lr": 2.2985643631702136e-07, "train_loss": 0.9730742127677194, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.909821486940571, "val_loss": 0.7752934793631235, "val_acc1": 72.65625081459682, "val_acc5": 100.0, "val_uar": 0.6394357973466215, "val_war": 0.7265625, "val_weighted_f1": 0.7199567597402372, "val_micro_f1": 0.7265625, "val_macro_f1": 0.660519972540143, "epoch": 38, "n_parameters": 521298470}
40
+ {"train_lr": 3.993671988761623e-05, "train_min_lr": 2.251516496168376e-07, "train_loss": 1.002591182788213, "train_loss_scale": 3453.4901960784314, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.8680756793302646, "val_loss": 0.8021030003825823, "val_acc1": 72.91666718324025, "val_acc5": 100.0, "val_uar": 0.6379966697799718, "val_war": 0.7291666666666666, "val_weighted_f1": 0.720774484294437, "val_micro_f1": 0.7291666666666666, "val_macro_f1": 0.6498673125897038, "epoch": 39, "n_parameters": 521298470}
41
+ {"train_lr": 3.908943685534389e-05, "train_min_lr": 2.203749134966631e-07, "train_loss": 0.9657601387866961, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.1030593876745187, "val_loss": 0.8032776936888695, "val_acc1": 73.17708402872086, "val_acc5": 100.0, "val_uar": 0.6175739709779124, "val_war": 0.7317708333333334, "val_weighted_f1": 0.720063527425974, "val_micro_f1": 0.7317708333333334, "val_macro_f1": 0.6507110368205012, "epoch": 40, "n_parameters": 521298470}
42
+ {"train_lr": 3.823031814372137e-05, "train_min_lr": 2.1553145124731387e-07, "train_loss": 0.9777187343711168, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.2538329806982302, "val_loss": 0.7444020273784796, "val_acc1": 75.00000129143397, "val_acc5": 100.0, "val_uar": 0.6678037873628248, "val_war": 0.75, "val_weighted_f1": 0.7443798621908866, "val_micro_f1": 0.75, "val_macro_f1": 0.675486128002667, "epoch": 41, "n_parameters": 521298470}
43
+ {"train_lr": 3.736030318644632e-05, "train_min_lr": 2.1062655912364843e-07, "train_loss": 0.9604086725929983, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.6670334853377997, "val_loss": 0.7448610626161098, "val_acc1": 75.52083424727122, "val_acc5": 100.0, "val_uar": 0.6543888094824313, "val_war": 0.7552083333333334, "val_weighted_f1": 0.7460265057266215, "val_micro_f1": 0.7552083333333334, "val_macro_f1": 0.6843029722239021, "epoch": 42, "n_parameters": 521298470}
44
+ {"train_lr": 3.648034333210112e-05, "train_min_lr": 2.0566560055319138e-07, "train_loss": 0.9725561680552227, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.139049637551401, "val_loss": 0.7555768589178721, "val_acc1": 74.2187506755193, "val_acc5": 100.0, "val_uar": 0.648194751001266, "val_war": 0.7421875, "val_weighted_f1": 0.734958898561029, "val_micro_f1": 0.7421875, "val_macro_f1": 0.6731214108455809, "epoch": 43, "n_parameters": 521298470}
45
+ {"train_lr": 3.5591400803867063e-05, "train_min_lr": 2.0065400027130337e-07, "train_loss": 0.9532668016899645, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.7755207945318783, "val_loss": 0.7677972912788391, "val_acc1": 73.17708432674408, "val_acc5": 100.0, "val_uar": 0.6692948427066928, "val_war": 0.7317708333333334, "val_weighted_f1": 0.7311852571675538, "val_micro_f1": 0.7317708333333334, "val_macro_f1": 0.6679964167136211, "epoch": 44, "n_parameters": 521298470}
46
+ {"train_lr": 3.469444764734741e-05, "train_min_lr": 1.9559723838931282e-07, "train_loss": 0.9642176517084533, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.95563075355455, "val_loss": 0.782885055989027, "val_acc1": 72.65625037749608, "val_acc5": 100.0, "val_uar": 0.5915752970825218, "val_war": 0.7265625, "val_weighted_f1": 0.7051815308189499, "val_micro_f1": 0.7265625, "val_macro_f1": 0.6267523525956892, "epoch": 45, "n_parameters": 521298470}
47
+ {"train_lr": 3.379046466764991e-05, "train_min_lr": 1.905008444020954e-07, "train_loss": 0.95510362565907, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.007989252314848, "val_loss": 0.7588942361374696, "val_acc1": 74.7395846247673, "val_acc5": 100.0, "val_uar": 0.6576697871307139, "val_war": 0.7473958333333334, "val_weighted_f1": 0.7393195262781456, "val_micro_f1": 0.7473958333333334, "val_macro_f1": 0.6810337298171208, "epoch": 46, "n_parameters": 521298470}
48
+ {"train_lr": 3.288044035689072e-05, "train_min_lr": 1.8537039114165132e-07, "train_loss": 0.9791481505616818, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.8775262482026043, "val_loss": 0.7836711468795935, "val_acc1": 72.65625063578288, "val_acc5": 100.0, "val_uar": 0.6053750383745681, "val_war": 0.7265625, "val_weighted_f1": 0.7092492370599999, "val_micro_f1": 0.7265625, "val_macro_f1": 0.6428571077348143, "epoch": 47, "n_parameters": 521298470}
49
+ {"train_lr": 3.19653698132928e-05, "train_min_lr": 1.8021148868329666e-07, "train_loss": 0.9494384249635771, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.08924177347445, "val_loss": 0.746081513663133, "val_acc1": 76.04166728258133, "val_acc5": 100.0, "val_uar": 0.6886897186561601, "val_war": 0.7604166666666666, "val_weighted_f1": 0.756060663948115, "val_micro_f1": 0.7604166666666666, "val_macro_f1": 0.6926477590811965, "epoch": 48, "n_parameters": 521298470}
50
+ {"train_lr": 3.1046253653060647e-05, "train_min_lr": 1.750297782111272e-07, "train_loss": 0.9494578947428784, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.3563793009402705, "val_loss": 0.7826471428076426, "val_acc1": 73.69791754086812, "val_acc5": 100.0, "val_uar": 0.6449390907848928, "val_war": 0.7369791666666666, "val_weighted_f1": 0.7273981458758602, "val_micro_f1": 0.7369791666666666, "val_macro_f1": 0.6530674566132638, "epoch": 49, "n_parameters": 521298470}
51
+ {"train_lr": 3.012409691622117e-05, "train_min_lr": 1.6983092584946713e-07, "train_loss": 0.9729235166428136, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.942185233621036, "val_loss": 0.7560352757573128, "val_acc1": 74.21875079472859, "val_acc5": 100.0, "val_uar": 0.6513364214223483, "val_war": 0.7421875, "val_weighted_f1": 0.7345163911773888, "val_micro_f1": 0.7421875, "val_macro_f1": 0.6800967242512758, "epoch": 50, "n_parameters": 521298470}
52
+ {"train_lr": 2.9199907967627092e-05, "train_min_lr": 1.6462061646704509e-07, "train_loss": 0.9630369960872176, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.772491836080364, "val_loss": 0.76610491797328, "val_acc1": 72.65625049670537, "val_acc5": 100.0, "val_uar": 0.621866239024883, "val_war": 0.7265625, "val_weighted_f1": 0.7178280280083729, "val_micro_f1": 0.7265625, "val_macro_f1": 0.6449723203283014, "epoch": 51, "n_parameters": 521298470}
53
+ {"train_lr": 2.8274697394324934e-05, "train_min_lr": 1.5940454746067394e-07, "train_loss": 0.9381165210328071, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.8533251659542906, "val_loss": 0.7578220218420029, "val_acc1": 72.65625049670537, "val_acc5": 100.0, "val_uar": 0.6455383746915039, "val_war": 0.7265625, "val_weighted_f1": 0.7228571647856309, "val_micro_f1": 0.7265625, "val_macro_f1": 0.6499217355240514, "epoch": 52, "n_parameters": 521298470}
54
+ {"train_lr": 2.734947690049276e-05, "train_min_lr": 1.5418842252523044e-07, "train_loss": 0.9589143450938019, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.918565684673833, "val_loss": 0.7945439095298449, "val_acc1": 70.05208361148834, "val_acc5": 100.0, "val_uar": 0.5931143893631069, "val_war": 0.7005208333333334, "val_weighted_f1": 0.6823199734951482, "val_micro_f1": 0.7005208333333334, "val_macro_f1": 0.6046869501238682, "epoch": 53, "n_parameters": 521298470}
55
+ {"train_lr": 2.6425258201156672e-05, "train_min_lr": 1.4897794541674922e-07, "train_loss": 0.9347763599133959, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.7749400699839875, "val_loss": 0.8006525908907255, "val_acc1": 70.05208379030228, "val_acc5": 100.0, "val_uar": 0.6068167619702333, "val_war": 0.7005208333333334, "val_weighted_f1": 0.6924436329634371, "val_micro_f1": 0.7005208333333334, "val_macro_f1": 0.6306999051154721, "epoch": 54, "n_parameters": 521298470}
56
+ {"train_lr": 2.5503051915895116e-05, "train_min_lr": 1.437788137154489e-07, "train_loss": 0.9395227046573863, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.904709956225227, "val_loss": 0.7573946801324686, "val_acc1": 73.43750001986821, "val_acc5": 100.0, "val_uar": 0.6295544333260219, "val_war": 0.734375, "val_weighted_f1": 0.726653590061812, "val_micro_f1": 0.734375, "val_macro_f1": 0.6652113674184854, "epoch": 55, "n_parameters": 521298470}
57
+ {"train_lr": 2.458386646374134e-05, "train_min_lr": 1.3859671259551204e-07, "train_loss": 0.9709009056776957, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.9325629612978767, "val_loss": 0.7777895554900169, "val_acc1": 72.65625057617824, "val_acc5": 100.0, "val_uar": 0.6157505006235342, "val_war": 0.7265625, "val_weighted_f1": 0.7171101268024422, "val_micro_f1": 0.7265625, "val_macro_f1": 0.6415181035969978, "epoch": 56, "n_parameters": 521298470}
58
+ {"train_lr": 2.3668706960491932e-05, "train_min_lr": 1.334373086084304e-07, "train_loss": 0.9411414944463306, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.6783611119962205, "val_loss": 0.7610190808773041, "val_acc1": 75.2604169845581, "val_acc5": 100.0, "val_uar": 0.6416713105730719, "val_war": 0.7526041666666666, "val_weighted_f1": 0.741628969930657, "val_micro_f1": 0.7526041666666666, "val_macro_f1": 0.6765815814510064, "epoch": 57, "n_parameters": 521298470}
59
+ {"train_lr": 2.275857411962744e-05, "train_min_lr": 1.2830624348671418e-07, "train_loss": 0.9450426507814258, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.9757400003134036, "val_loss": 0.7809199094772339, "val_acc1": 73.17708430687587, "val_acc5": 100.0, "val_uar": 0.6305899769626685, "val_war": 0.7317708333333334, "val_weighted_f1": 0.7240565051653479, "val_micro_f1": 0.7317708333333334, "val_macro_f1": 0.6625037229032466, "epoch": 58, "n_parameters": 521298470}
60
+ {"train_lr": 2.1854463158046933e-05, "train_min_lr": 1.232091279747406e-07, "train_loss": 0.953262268230806, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.7014853323207184, "val_loss": 0.8071655482053757, "val_acc1": 69.53125085433324, "val_acc5": 100.0, "val_uar": 0.6180076170030001, "val_war": 0.6953125, "val_weighted_f1": 0.6891271807372298, "val_micro_f1": 0.6953125, "val_macro_f1": 0.6285253458641556, "epoch": 59, "n_parameters": 521298470}
61
+ {"train_lr": 2.0957362707812887e-05, "train_min_lr": 1.1815153569348674e-07, "train_loss": 0.9384882610802557, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.973921296643276, "val_loss": 0.8184739301602045, "val_acc1": 71.35416760047276, "val_acc5": 100.0, "val_uar": 0.6424371435219162, "val_war": 0.7109375, "val_weighted_f1": 0.7077296725542475, "val_micro_f1": 0.7109375, "val_macro_f1": 0.6550842515504768, "epoch": 60, "n_parameters": 521298470}
62
+ {"train_lr": 2.0068253735096514e-05, "train_min_lr": 1.1313899704585739e-07, "train_loss": 0.9491340979641559, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.653597602657243, "val_loss": 0.811932715276877, "val_acc1": 71.61458436648051, "val_acc5": 100.0, "val_uar": 0.6119061967792303, "val_war": 0.7161458333333334, "val_weighted_f1": 0.7038917743853217, "val_micro_f1": 0.7161458333333334, "val_macro_f1": 0.6402868056697643, "epoch": 61, "n_parameters": 521298470}
63
+ {"train_lr": 1.9188108467505722e-05, "train_min_lr": 1.0817699316927043e-07, "train_loss": 0.9331524703237746, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.6588118497063133, "val_loss": 0.8147495972613493, "val_acc1": 70.05208361148834, "val_acc5": 100.0, "val_uar": 0.5666610235362373, "val_war": 0.7005208333333334, "val_weighted_f1": 0.6759211288183403, "val_micro_f1": 0.7005208333333334, "val_macro_f1": 0.5856164503933609, "epoch": 62, "n_parameters": 521298470}
64
+ {"train_lr": 1.831788933096846e-05, "train_min_lr": 1.0327094994211341e-07, "train_loss": 0.9301849631896986, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.8209824982811424, "val_loss": 0.7858394781748453, "val_acc1": 72.39583321412404, "val_acc5": 100.0, "val_uar": 0.6068532062237174, "val_war": 0.7239583333333334, "val_weighted_f1": 0.70622577858414, "val_micro_f1": 0.7239583333333334, "val_macro_f1": 0.6363432789429312, "epoch": 63, "n_parameters": 521298470}
65
+ {"train_lr": 1.7458547897334095e-05, "train_min_lr": 9.842623205062551e-08, "train_loss": 0.9448087294312084, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.700501081990261, "val_loss": 0.7645136614640554, "val_acc1": 74.21875097354253, "val_acc5": 100.0, "val_uar": 0.6264565773895886, "val_war": 0.7421875, "val_weighted_f1": 0.7291540649101601, "val_micro_f1": 0.7421875, "val_macro_f1": 0.6593644421624297, "epoch": 64, "n_parameters": 521298470}
66
+ {"train_lr": 1.6611023843843597e-05, "train_min_lr": 9.364813712269166e-08, "train_loss": 0.9209117517362233, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.6930422011543724, "val_loss": 0.7594487220048904, "val_acc1": 73.17708381017049, "val_acc5": 100.0, "val_uar": 0.6278308556373707, "val_war": 0.7317708333333334, "val_weighted_f1": 0.7206809336940473, "val_micro_f1": 0.7317708333333334, "val_macro_f1": 0.6552717843785126, "epoch": 65, "n_parameters": 521298470}
67
+ {"train_lr": 1.5776243925606218e-05, "train_min_lr": 8.894188993496415e-08, "train_loss": 0.9375692293340084, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.869971717105192, "val_loss": 0.7568278511365255, "val_acc1": 73.69791748126347, "val_acc5": 100.0, "val_uar": 0.631846966500908, "val_war": 0.7369791666666666, "val_weighted_f1": 0.7235850237400915, "val_micro_f1": 0.7369791666666666, "val_macro_f1": 0.6543435210693078, "epoch": 66, "n_parameters": 521298470}
68
+ {"train_lr": 1.495512096220645e-05, "train_min_lr": 8.431263669964643e-08, "train_loss": 0.9398112111037074, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.696271877662808, "val_loss": 0.75521087149779, "val_acc1": 73.958333949248, "val_acc5": 100.0, "val_uar": 0.6501312988830089, "val_war": 0.7395833333333334, "val_weighted_f1": 0.7318964240499083, "val_micro_f1": 0.7395833333333334, "val_macro_f1": 0.6670592337109676, "epoch": 67, "n_parameters": 521298470}
69
+ {"train_lr": 1.4148552839549153e-05, "train_min_lr": 7.976543943718531e-08, "train_loss": 0.9376885260826622, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.695151261254853, "val_loss": 0.7867209352552891, "val_acc1": 71.35416730244954, "val_acc5": 100.0, "val_uar": 0.5865061587520121, "val_war": 0.7135416666666666, "val_weighted_f1": 0.6940645545163515, "val_micro_f1": 0.7135416666666666, "val_macro_f1": 0.6126736891609453, "epoch": 68, "n_parameters": 521298470}
70
+ {"train_lr": 1.3357421528034545e-05, "train_min_lr": 7.530527044102595e-08, "train_loss": 0.9416443833143883, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.72232449054718, "val_loss": 0.7655087436238924, "val_acc1": 73.95833400885265, "val_acc5": 100.0, "val_uar": 0.633673508987719, "val_war": 0.7395833333333334, "val_weighted_f1": 0.7281176285940968, "val_micro_f1": 0.7395833333333334, "val_macro_f1": 0.6630299079945967, "epoch": 69, "n_parameters": 521298470}
71
+ {"train_lr": 1.2582592118136499e-05, "train_min_lr": 7.093700684048218e-08, "train_loss": 0.933198880818155, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.5495076109381283, "val_loss": 0.7685251521567503, "val_acc1": 74.21875045696895, "val_acc5": 100.0, "val_uar": 0.6396547299046017, "val_war": 0.7421875, "val_weighted_f1": 0.7277653582599828, "val_micro_f1": 0.7421875, "val_macro_f1": 0.6550912600349676, "epoch": 70, "n_parameters": 521298470}
72
+ {"train_lr": 1.18249118744388e-05, "train_min_lr": 6.666542526766691e-08, "train_loss": 0.9259655175645367, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.681867340031792, "val_loss": 0.7912199807663759, "val_acc1": 70.83333377043407, "val_acc5": 100.0, "val_uar": 0.6235913362110354, "val_war": 0.7083333333333334, "val_weighted_f1": 0.7012297139495676, "val_micro_f1": 0.7083333333333334, "val_macro_f1": 0.6297916534413623, "epoch": 71, "n_parameters": 521298470}
73
+ {"train_lr": 1.1085209309163785e-05, "train_min_lr": 6.249519663431545e-08, "train_loss": 0.9257161557869195, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.590961157106886, "val_loss": 0.7902270915607611, "val_acc1": 72.13541670640309, "val_acc5": 100.0, "val_uar": 0.6168556250742468, "val_war": 0.7213541666666666, "val_weighted_f1": 0.707716831945414, "val_micro_f1": 0.7213541666666666, "val_macro_f1": 0.640131654159329, "epoch": 72, "n_parameters": 521298470}
74
+ {"train_lr": 1.0364293276206376e-05, "train_min_lr": 5.8430881024211504e-08, "train_loss": 0.9284936998015135, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.692507577877419, "val_loss": 0.7885697558522224, "val_acc1": 71.87500087420146, "val_acc5": 100.0, "val_uar": 0.5959456118848219, "val_war": 0.71875, "val_weighted_f1": 0.7034374528068522, "val_micro_f1": 0.71875, "val_macro_f1": 0.6248081791979251, "epoch": 73, "n_parameters": 521298470}
75
+ {"train_lr": 9.66295208666423e-06, "train_min_lr": 5.447692270680312e-08, "train_loss": 0.9321156933027155, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.573897674971936, "val_loss": 0.7357786347468694, "val_acc1": 75.5208346247673, "val_acc5": 100.0, "val_uar": 0.6556036825723043, "val_war": 0.7552083333333334, "val_weighted_f1": 0.746040904951711, "val_micro_f1": 0.7552083333333334, "val_macro_f1": 0.6742404810636766, "epoch": 74, "n_parameters": 521298470}
76
+ {"train_lr": 8.981952646831133e-06, "train_min_lr": 5.063764527745896e-08, "train_loss": 0.9279865428707958, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.7140867990605972, "val_loss": 0.7491406984627247, "val_acc1": 73.4375009338061, "val_acc5": 100.0, "val_uar": 0.6210358198794416, "val_war": 0.734375, "val_weighted_f1": 0.7210656258475282, "val_micro_f1": 0.734375, "val_macro_f1": 0.6477507404669199, "epoch": 75, "n_parameters": 521298470}
77
+ {"train_lr": 8.322039619596193e-06, "train_min_lr": 4.69172469296803e-08, "train_loss": 0.9387294363741782, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.6064211784624587, "val_loss": 0.7681918715437254, "val_acc1": 72.65625077486038, "val_acc5": 100.0, "val_uar": 0.6134647780605801, "val_war": 0.7265625, "val_weighted_f1": 0.710278673901083, "val_micro_f1": 0.7265625, "val_macro_f1": 0.6393656795737107, "epoch": 76, "n_parameters": 521298470}
78
+ {"train_lr": 7.683934610165963e-06, "train_min_lr": 4.3319795864438076e-08, "train_loss": 0.9221527529697792, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.527185045036615, "val_loss": 0.7714922105272611, "val_acc1": 72.91666706403096, "val_acc5": 100.0, "val_uar": 0.5909499135741013, "val_war": 0.7291666666666666, "val_weighted_f1": 0.7096256601618302, "val_micro_f1": 0.7291666666666666, "val_macro_f1": 0.6321426580309676, "epoch": 77, "n_parameters": 521298470}
79
+ {"train_lr": 7.06833537699968e-06, "train_min_lr": 3.9849225841654914e-08, "train_loss": 0.9286882455637252, "train_loss_scale": 6023.529411764706, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.5864350468504664, "val_loss": 0.7543615537385145, "val_acc1": 74.21875027815501, "val_acc5": 100.0, "val_uar": 0.6399656707133622, "val_war": 0.7421875, "val_weighted_f1": 0.7282658991777642, "val_micro_f1": 0.7421875, "val_macro_f1": 0.6712728586808016, "epoch": 78, "n_parameters": 521298470}
80
+ {"train_lr": 6.475915068820675e-06, "train_min_lr": 3.650933187869632e-08, "train_loss": 0.9276821073753382, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.5464906271766212, "val_loss": 0.7886519283056259, "val_acc1": 72.13541728258133, "val_acc5": 100.0, "val_uar": 0.5921821633710314, "val_war": 0.7213541666666666, "val_weighted_f1": 0.700506526700492, "val_micro_f1": 0.7213541666666666, "val_macro_f1": 0.6269070539267574, "epoch": 79, "n_parameters": 521298470}
81
+ {"train_lr": 5.907321488538122e-06, "train_min_lr": 3.330376610057497e-08, "train_loss": 0.9239224727247276, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.7179609233257818, "val_loss": 0.7650442644953728, "val_acc1": 73.43750037749608, "val_acc5": 100.0, "val_uar": 0.6288069839006057, "val_war": 0.734375, "val_weighted_f1": 0.7230909214571186, "val_micro_f1": 0.734375, "val_macro_f1": 0.660716742601405, "epoch": 80, "n_parameters": 521298470}
82
+ {"train_lr": 5.363176384884051e-06, "train_min_lr": 3.02360337464056e-08, "train_loss": 0.9295383571020139, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.5468293311549166, "val_loss": 0.7704524857302507, "val_acc1": 72.13541682561238, "val_acc5": 100.0, "val_uar": 0.6130132120406574, "val_war": 0.7213541666666666, "val_weighted_f1": 0.7095684111956312, "val_micro_f1": 0.7213541666666666, "val_macro_f1": 0.6400817617757939, "epoch": 81, "n_parameters": 521298470}
83
+ {"train_lr": 4.844074772540244e-06, "train_min_lr": 2.7309489336477486e-08, "train_loss": 0.9154611106207168, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.5602385319915473, "val_loss": 0.7801660858094692, "val_acc1": 71.8750013311704, "val_acc5": 100.0, "val_uar": 0.6108287067311519, "val_war": 0.71875, "val_weighted_f1": 0.7066584143472515, "val_micro_f1": 0.71875, "val_macro_f1": 0.6377818269325934, "epoch": 82, "n_parameters": 521298470}
84
+ {"train_lr": 4.350584281498429e-06, "train_min_lr": 2.4527333004135813e-08, "train_loss": 0.9301250666574715, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.704746606303196, "val_loss": 0.7951468005776405, "val_acc1": 69.7916669845581, "val_acc5": 100.0, "val_uar": 0.5727714395838113, "val_war": 0.6979166666666666, "val_weighted_f1": 0.6789508951605817, "val_micro_f1": 0.6979166666666666, "val_macro_f1": 0.6029188016272181, "epoch": 83, "n_parameters": 521298470}
85
+ {"train_lr": 3.883244536365208e-06, "train_min_lr": 2.189260699648274e-08, "train_loss": 0.9329649590783649, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.591701156952802, "val_loss": 0.7720692480603853, "val_acc1": 71.87500041723251, "val_acc5": 100.0, "val_uar": 0.592088448779839, "val_war": 0.71875, "val_weighted_f1": 0.7005424163372638, "val_micro_f1": 0.71875, "val_macro_f1": 0.6209744926228031, "epoch": 84, "n_parameters": 521298470}
86
+ {"train_lr": 3.4425665662904687e-06, "train_min_lr": 1.9408192347725037e-08, "train_loss": 0.9107328673788145, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.6769594024209415, "val_loss": 0.7548149960736433, "val_acc1": 74.739584227403, "val_acc5": 100.0, "val_uar": 0.6478395098372013, "val_war": 0.7473958333333334, "val_weighted_f1": 0.7376892799060465, "val_micro_f1": 0.7473958333333334, "val_macro_f1": 0.6763971712094777, "epoch": 85, "n_parameters": 521298470}
87
+ {"train_lr": 3.0290322461645205e-06, "train_min_lr": 1.7076805728805288e-08, "train_loss": 0.9235909117592705, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.6093138993955125, "val_loss": 0.7367887993653616, "val_acc1": 73.17708371082942, "val_acc5": 100.0, "val_uar": 0.6271416972062492, "val_war": 0.7317708333333334, "val_weighted_f1": 0.7199523371715308, "val_micro_f1": 0.7317708333333334, "val_macro_f1": 0.6546276260424865, "epoch": 86, "n_parameters": 521298470}
88
+ {"train_lr": 2.64309376969497e-06, "train_min_lr": 1.4900996476762211e-08, "train_loss": 0.9110328038923101, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.6629607630710974, "val_loss": 0.7660736987988154, "val_acc1": 74.47916754086812, "val_acc5": 100.0, "val_uar": 0.6365810923287839, "val_war": 0.7447916666666666, "val_weighted_f1": 0.7329053489847558, "val_micro_f1": 0.7447916666666666, "val_macro_f1": 0.667660037441811, "epoch": 87, "n_parameters": 521298470}
89
+ {"train_lr": 2.285173154939547e-06, "train_min_lr": 1.2883143807067995e-08, "train_loss": 0.9085562288566352, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.6981604356391755, "val_loss": 0.7492166919012865, "val_acc1": 74.47916752099991, "val_acc5": 100.0, "val_uar": 0.6460668692564212, "val_war": 0.7447916666666666, "val_weighted_f1": 0.7358806707142218, "val_micro_f1": 0.7447916666666666, "val_macro_f1": 0.6707592992499669, "epoch": 88, "n_parameters": 521298470}
90
+ {"train_lr": 1.955661782835546e-06, "train_min_lr": 1.1025454211991155e-08, "train_loss": 0.9244694131262162, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.510657693825516, "val_loss": 0.8022048957645893, "val_acc1": 71.09375089406967, "val_acc5": 100.0, "val_uar": 0.5801881758501544, "val_war": 0.7109375, "val_weighted_f1": 0.6909180695864497, "val_micro_f1": 0.7109375, "val_macro_f1": 0.609797252647683, "epoch": 89, "n_parameters": 521298470}
91
+ {"train_lr": 1.6549199692305295e-06, "train_min_lr": 9.329959047829568e-09, "train_loss": 0.9172819577206194, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.7072653910693, "val_loss": 0.7700033684571584, "val_acc1": 73.95833351214726, "val_acc5": 100.0, "val_uar": 0.6304939588059885, "val_war": 0.7395833333333334, "val_weighted_f1": 0.7252329161140995, "val_micro_f1": 0.7395833333333334, "val_macro_f1": 0.6561601294010942, "epoch": 90, "n_parameters": 521298470}
92
+ {"train_lr": 1.3832765708822247e-06, "train_min_lr": 7.798512313652238e-09, "train_loss": 0.9277829008749108, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.631234150306851, "val_loss": 0.8096112087368965, "val_acc1": 70.05208335320155, "val_acc5": 100.0, "val_uar": 0.5784156343257746, "val_war": 0.7005208333333334, "val_weighted_f1": 0.6806241665761346, "val_micro_f1": 0.7005208333333334, "val_macro_f1": 0.6025848301787543, "epoch": 91, "n_parameters": 521298470}
93
+ {"train_lr": 1.1410286258584985e-06, "train_min_lr": 6.4327886239785226e-09, "train_loss": 0.9063912332057953, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.6548587901919496, "val_loss": 0.7291603100796541, "val_acc1": 76.30208468437195, "val_acc5": 100.0, "val_uar": 0.6694519003267719, "val_war": 0.7630208333333334, "val_weighted_f1": 0.7526711637667703, "val_micro_f1": 0.7630208333333334, "val_macro_f1": 0.6962984898341916, "epoch": 92, "n_parameters": 521298470}
94
+ {"train_lr": 9.284410287305858e-07, "train_min_lr": 5.234281377611722e-09, "train_loss": 0.9235577076868294, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.819288688547471, "val_loss": 0.737181064983209, "val_acc1": 75.00000087420146, "val_acc5": 100.0, "val_uar": 0.6448061219019239, "val_war": 0.75, "val_weighted_f1": 0.7374297724594517, "val_micro_f1": 0.75, "val_macro_f1": 0.6693721746841171, "epoch": 93, "n_parameters": 521298470}
95
+ {"train_lr": 7.457462409147661e-07, "train_min_lr": 4.204301124629427e-09, "train_loss": 0.9264553115258809, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.511313838117263, "val_loss": 0.7512822474042574, "val_acc1": 75.26041726271312, "val_acc5": 100.0, "val_uar": 0.6525203943327661, "val_war": 0.7526041666666666, "val_weighted_f1": 0.7423081456598107, "val_micro_f1": 0.7526041666666666, "val_macro_f1": 0.6710634236983489, "epoch": 94, "n_parameters": 521298470}
96
+ {"train_lr": 5.931440364792185e-07, "train_min_lr": 3.343974133316262e-09, "train_loss": 0.911232183376948, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.6057872070985684, "val_loss": 0.7540426440536976, "val_acc1": 74.47916726271312, "val_acc5": 100.0, "val_uar": 0.6338287809827224, "val_war": 0.7447916666666666, "val_weighted_f1": 0.7311653185732182, "val_micro_f1": 0.7447916666666666, "val_macro_f1": 0.6689644559351985, "epoch": 95, "n_parameters": 521298470}
97
+ {"train_lr": 4.7080128369400994e-07, "train_min_lr": 2.6542411586060335e-09, "train_loss": 0.9181275506035175, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.574301331650977, "val_loss": 0.754668996979793, "val_acc1": 73.69791728258133, "val_acc5": 100.0, "val_uar": 0.618776914276444, "val_war": 0.7369791666666666, "val_weighted_f1": 0.7230671155888632, "val_micro_f1": 0.7369791666666666, "val_macro_f1": 0.651992032606744, "epoch": 96, "n_parameters": 521298470}
98
+ {"train_lr": 3.788517625630929e-07, "train_min_lr": 2.1358564133800288e-09, "train_loss": 0.9310309497359531, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.7530226613961015, "val_loss": 0.7521407815317313, "val_acc1": 73.17708388964336, "val_acc5": 100.0, "val_uar": 0.6113840218544818, "val_war": 0.7317708333333334, "val_weighted_f1": 0.7160431234103718, "val_micro_f1": 0.7317708333333334, "val_macro_f1": 0.6405978130173532, "epoch": 97, "n_parameters": 521298470}
99
+ {"train_lr": 3.1739601853783493e-07, "train_min_lr": 1.789386743746306e-09, "train_loss": 0.9115871272835077, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.533676921152601, "val_loss": 0.7640266964832941, "val_acc1": 75.00000077486038, "val_acc5": 100.0, "val_uar": 0.6495429483513016, "val_war": 0.75, "val_weighted_f1": 0.740864848762863, "val_micro_f1": 0.75, "val_macro_f1": 0.6756110273172995, "epoch": 98, "n_parameters": 521298470}
100
+ {"train_lr": 2.86501252572043e-07, "train_min_lr": 1.6152110092017891e-09, "train_loss": 0.9039644850235359, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.5929427731270884, "val_loss": 0.7528653492530187, "val_acc1": 74.21875071525574, "val_acc5": 100.0, "val_uar": 0.637615106898409, "val_war": 0.7421875, "val_weighted_f1": 0.7287739276731563, "val_micro_f1": 0.7421875, "val_macro_f1": 0.6586935917951419, "epoch": 99, "n_parameters": 521298470}
101
+ Evaluation on the test set using best epoch model:
102
+ {"Final Top-1 (best epoch)": 76.84210526315789, "Final Top-5 (best epoch)": 100.0}
103
+ Final UAR: 69.91%, Final WAR: 76.84%
104
+ Final Confusion Matrix:
105
+ [[ 36 1 14 9]
106
+ [ 4 32 22 0]
107
+ [ 6 6 187 15]
108
+ [ 1 0 10 37]]
109
+ Final Class Accuracies: ['60.00%', '55.17%', '87.38%', '77.08%']
110
+ Final Weighted F1: 0.7639, Final Micro F1: 0.7684, Final Macro F1: 0.7121
logs/AVF-MAE++_huge-MSP-IMPROV/eval_split02/log.txt ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_lr": 5.534638554216868e-06, "train_min_lr": 3.1202687752563384e-08, "train_loss": 2.1439891946315766, "train_loss_scale": 3112.96, "train_weight_decay": 0.04999999999999999, "train_grad_norm": NaN, "val_loss": 1.4161386052767435, "val_acc1": 45.20833395322164, "val_acc5": 100.0, "val_uar": 0.2999741753817841, "val_war": 0.45208333333333334, "val_weighted_f1": 0.408512224680263, "val_micro_f1": 0.45208333333333334, "val_macro_f1": 0.2885463689597856, "epoch": 0, "n_parameters": 521298470}
2
+ {"train_lr": 1.6829819277108436e-05, "train_min_lr": 9.488164234963154e-08, "train_loss": 1.3074590665102006, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 4.600485644340515, "val_loss": 1.0437476277351379, "val_acc1": 59.166666746139526, "val_acc5": 100.0, "val_uar": 0.3665549919082528, "val_war": 0.5916666666666667, "val_weighted_f1": 0.5247783094397876, "val_micro_f1": 0.5916666666666667, "val_macro_f1": 0.36652430434720146, "epoch": 1, "n_parameters": 521298470}
3
+ {"train_lr": 2.8124999999999996e-05, "train_min_lr": 1.5856059694669964e-07, "train_loss": 1.2127099722623824, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.7524986362457273, "val_loss": 0.9801374077796936, "val_acc1": 62.9166667620341, "val_acc5": 100.0, "val_uar": 0.44157927117166246, "val_war": 0.6291666666666667, "val_weighted_f1": 0.5909799155042452, "val_micro_f1": 0.6291666666666667, "val_macro_f1": 0.4605867345399773, "epoch": 2, "n_parameters": 521298470}
4
+ {"train_lr": 3.942018072289158e-05, "train_min_lr": 2.2223955154376786e-07, "train_loss": 1.1596066051721572, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.7687974262237547, "val_loss": 0.9054569403330485, "val_acc1": 64.79166677792867, "val_acc5": 100.0, "val_uar": 0.44260796470035596, "val_war": 0.6479166666666667, "val_weighted_f1": 0.5953019959659609, "val_micro_f1": 0.6479166666666667, "val_macro_f1": 0.4623553017269068, "epoch": 3, "n_parameters": 521298470}
5
+ {"train_lr": 5.0715361445783146e-05, "train_min_lr": 2.85918506140836e-07, "train_loss": 1.1485963133970896, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.394153733253479, "val_loss": 0.8843692421913147, "val_acc1": 63.95833311080933, "val_acc5": 100.0, "val_uar": 0.44785058339406164, "val_war": 0.6395833333333333, "val_weighted_f1": 0.5889320380756018, "val_micro_f1": 0.6395833333333333, "val_macro_f1": 0.4622197867299343, "epoch": 4, "n_parameters": 521298470}
6
+ {"train_lr": 5.624505171280574e-05, "train_min_lr": 3.170932968846391e-07, "train_loss": 1.1493859299023945, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.3065748500823973, "val_loss": 0.8506525754928589, "val_acc1": 66.66666655540466, "val_acc5": 100.0, "val_uar": 0.5089405914134175, "val_war": 0.6666666666666666, "val_weighted_f1": 0.6411832474534384, "val_micro_f1": 0.6666666666666666, "val_macro_f1": 0.5385611776723904, "epoch": 5, "n_parameters": 521298470}
7
+ {"train_lr": 5.62147626255171e-05, "train_min_lr": 3.1692253579089827e-07, "train_loss": 1.1281838911771773, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.2373706674575806, "val_loss": 0.8013377130031586, "val_acc1": 68.54166626930237, "val_acc5": 100.0, "val_uar": 0.5185988990880295, "val_war": 0.6854166666666667, "val_weighted_f1": 0.6590127226482105, "val_micro_f1": 0.6854166666666667, "val_macro_f1": 0.5500585522721485, "epoch": 6, "n_parameters": 521298470}
8
+ {"train_lr": 5.6153911566580055e-05, "train_min_lr": 3.1657947516050194e-07, "train_loss": 1.123606501619021, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.3095980429649354, "val_loss": 0.838248469432195, "val_acc1": 67.5, "val_acc5": 100.0, "val_uar": 0.5020783614533614, "val_war": 0.675, "val_weighted_f1": 0.63968296170781, "val_micro_f1": 0.675, "val_macro_f1": 0.540118224359552, "epoch": 7, "n_parameters": 521298470}
9
+ {"train_lr": 5.606256507573413e-05, "train_min_lr": 3.1606449012521245e-07, "train_loss": 1.1218776977062226, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.180547502040863, "val_loss": 0.8369138340155283, "val_acc1": 69.58333296775818, "val_acc5": 100.0, "val_uar": 0.5782769605595692, "val_war": 0.6958333333333333, "val_weighted_f1": 0.6815226946454768, "val_micro_f1": 0.6958333333333333, "val_macro_f1": 0.6214357594073283, "epoch": 8, "n_parameters": 521298470}
10
+ {"train_lr": 5.594082303902597e-05, "train_min_lr": 3.1537814381360526e-07, "train_loss": 1.1121824876467388, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.3999711894989013, "val_loss": 0.8136643032232921, "val_acc1": 67.08333326975504, "val_acc5": 100.0, "val_uar": 0.5111921120345034, "val_war": 0.6708333333333333, "val_weighted_f1": 0.6372474393829269, "val_micro_f1": 0.6708333333333333, "val_macro_f1": 0.5500997608947419, "epoch": 9, "n_parameters": 521298470}
11
+ {"train_lr": 5.5788818579585316e-05, "train_min_lr": 3.1452118673529506e-07, "train_loss": 1.1016174298524857, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.3445048713684082, "val_loss": 0.7858907322088877, "val_acc1": 69.58333282470703, "val_acc5": 100.0, "val_uar": 0.5682847456488761, "val_war": 0.6958333333333333, "val_weighted_f1": 0.6844228673852369, "val_micro_f1": 0.6958333333333333, "val_macro_f1": 0.6013356132019492, "epoch": 10, "n_parameters": 521298470}
12
+ {"train_lr": 5.560671791205679e-05, "train_min_lr": 3.134945559602653e-07, "train_loss": 1.1051474513610204, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.3410011863708498, "val_loss": 0.7783296366532644, "val_acc1": 70.83333344459534, "val_acc5": 100.0, "val_uar": 0.5733316170544431, "val_war": 0.7083333333333334, "val_weighted_f1": 0.6845621691345082, "val_micro_f1": 0.7083333333333334, "val_macro_f1": 0.6015933162992246, "epoch": 11, "n_parameters": 521298470}
13
+ {"train_lr": 5.539472016084685e-05, "train_min_lr": 3.122993740941958e-07, "train_loss": 1.0928680052359898, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.3334229016304016, "val_loss": 0.7612645248572032, "val_acc1": 69.99999976158142, "val_acc5": 100.0, "val_uar": 0.5448284827089175, "val_war": 0.7, "val_weighted_f1": 0.676602330397662, "val_micro_f1": 0.7, "val_macro_f1": 0.579550268125208, "epoch": 12, "n_parameters": 521298470}
14
+ {"train_lr": 5.5153057142383875e-05, "train_min_lr": 3.109369480509093e-07, "train_loss": 1.089852720896403, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.25141761302948, "val_loss": 0.7744999627272288, "val_acc1": 67.29166642824809, "val_acc5": 100.0, "val_uar": 0.5003044700870788, "val_war": 0.6729166666666667, "val_weighted_f1": 0.6421251831886775, "val_micro_f1": 0.6729166666666667, "val_macro_f1": 0.5371955341003576, "epoch": 13, "n_parameters": 521298470}
15
+ {"train_lr": 5.4881993111630514e-05, "train_min_lr": 3.0940876762328156e-07, "train_loss": 1.0873611688613891, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.3721104001998903, "val_loss": 0.7529109835624694, "val_acc1": 69.58333326975504, "val_acc5": 100.0, "val_uar": 0.5219635034852427, "val_war": 0.6958333333333333, "val_weighted_f1": 0.6637926926241952, "val_micro_f1": 0.6958333333333333, "val_macro_f1": 0.5525749774613337, "epoch": 14, "n_parameters": 521298470}
16
+ {"train_lr": 5.458182447312465e-05, "train_min_lr": 3.077165038541733e-07, "train_loss": 1.075517100294431, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.9623456978797913, "val_loss": 0.7408452888329824, "val_acc1": 69.99999998410543, "val_acc5": 100.0, "val_uar": 0.5373353508407858, "val_war": 0.7, "val_weighted_f1": 0.6737740417505206, "val_micro_f1": 0.7, "val_macro_f1": 0.5723053212051009, "epoch": 15, "n_parameters": 521298470}
17
+ {"train_lr": 5.425287945686548e-05, "train_min_lr": 3.0586200720917075e-07, "train_loss": 1.0717074928681056, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.5410009479522704, "val_loss": 0.7319954137007395, "val_acc1": 71.875, "val_acc5": 100.0, "val_uar": 0.5642254404210925, "val_war": 0.71875, "val_weighted_f1": 0.6980022988345521, "val_micro_f1": 0.71875, "val_macro_f1": 0.6003734992359504, "epoch": 16, "n_parameters": 521298470}
18
+ {"train_lr": 5.3895517759398755e-05, "train_min_lr": 3.0384730555312774e-07, "train_loss": 1.059155861934026, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.3811810493469237, "val_loss": 0.7121886312961578, "val_acc1": 73.12499958674113, "val_acc5": 100.0, "val_uar": 0.5726863374145983, "val_war": 0.73125, "val_weighted_f1": 0.7097547259230217, "val_micro_f1": 0.73125, "val_macro_f1": 0.6133802565471859, "epoch": 17, "n_parameters": 521298470}
19
+ {"train_lr": 5.351013015049392e-05, "train_min_lr": 3.0167460193272546e-07, "train_loss": 1.069292402068774, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.3652750730514525, "val_loss": 0.7372211039066314, "val_acc1": 72.08333315849305, "val_acc5": 100.0, "val_uar": 0.5806621689502124, "val_war": 0.7208333333333333, "val_weighted_f1": 0.705219044695159, "val_micro_f1": 0.7208333333333333, "val_macro_f1": 0.6280719996243006, "epoch": 18, "n_parameters": 521298470}
20
+ {"train_lr": 5.309713804584307e-05, "train_min_lr": 2.993462721674737e-07, "train_loss": 1.049097421169281, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.2866873955726623, "val_loss": 0.7386259297529857, "val_acc1": 69.58333292007447, "val_acc5": 100.0, "val_uar": 0.5289877322757758, "val_war": 0.6958333333333333, "val_weighted_f1": 0.6694553541477668, "val_micro_f1": 0.6958333333333333, "val_macro_f1": 0.5478930478088013, "epoch": 19, "n_parameters": 521298470}
21
+ {"train_lr": 5.265699304624922e-05, "train_min_lr": 2.968648622517863e-07, "train_loss": 1.044680189092954, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.1868800783157347, "val_loss": 0.7279674967130025, "val_acc1": 71.45833347638448, "val_acc5": 100.0, "val_uar": 0.5476304150760672, "val_war": 0.7145833333333333, "val_weighted_f1": 0.6871842618025892, "val_micro_f1": 0.7145833333333333, "val_macro_f1": 0.572361727732843, "epoch": 20, "n_parameters": 521298470}
22
+ {"train_lr": 5.2190176443807244e-05, "train_min_lr": 2.9423308557097456e-07, "train_loss": 1.0511289199193319, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.2799708008766175, "val_loss": 0.7080342054367066, "val_acc1": 73.33333326975504, "val_acc5": 100.0, "val_uar": 0.6131456667326233, "val_war": 0.7333333333333333, "val_weighted_f1": 0.7201056420938782, "val_micro_f1": 0.7333333333333333, "val_macro_f1": 0.6385315468802844, "epoch": 21, "n_parameters": 521298470}
23
+ {"train_lr": 5.1697198695618223e-05, "train_min_lr": 2.914538199341998e-07, "train_loss": 1.0432984292507173, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.2807287096977236, "val_loss": 0.7164633015791575, "val_acc1": 72.70833293596904, "val_acc5": 100.0, "val_uar": 0.5759705441498919, "val_war": 0.7270833333333333, "val_weighted_f1": 0.7030582134424821, "val_micro_f1": 0.7270833333333333, "val_macro_f1": 0.6013421668176898, "epoch": 22, "n_parameters": 521298470}
24
+ {"train_lr": 5.117859886561189e-05, "train_min_lr": 2.885301044276305e-07, "train_loss": 1.0362503085533779, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.461385989189148, "val_loss": 0.7477144042650858, "val_acc1": 70.41666673024496, "val_acc5": 100.0, "val_uar": 0.6175581342972646, "val_war": 0.7041666666666667, "val_weighted_f1": 0.6956523127883674, "val_micro_f1": 0.7041666666666667, "val_macro_f1": 0.6484961985254336, "epoch": 23, "n_parameters": 521298470}
25
+ {"train_lr": 5.063494403508845e-05, "train_min_lr": 2.854651360912481e-07, "train_loss": 1.0271447587013245, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.983648295402527, "val_loss": 0.7268067598342896, "val_acc1": 71.87500012715658, "val_acc5": 100.0, "val_uar": 0.5640041017758409, "val_war": 0.71875, "val_weighted_f1": 0.697201570547847, "val_micro_f1": 0.71875, "val_macro_f1": 0.5986415980003505, "epoch": 24, "n_parameters": 521298470}
26
+ {"train_lr": 5.0066828682623144e-05, "train_min_lr": 2.822622664229288e-07, "train_loss": 1.0398089681069056, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.2474295568466185, "val_loss": 0.7183574537436167, "val_acc1": 72.70833314259848, "val_acc5": 100.0, "val_uar": 0.5770977070433592, "val_war": 0.7270833333333333, "val_weighted_f1": 0.7093458254132604, "val_micro_f1": 0.7270833333333333, "val_macro_f1": 0.6054434239127172, "epoch": 25, "n_parameters": 521298470}
27
+ {"train_lr": 4.947487403401295e-05, "train_min_lr": 2.7892499771363074e-07, "train_loss": 1.0116180143753688, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.25725022315979, "val_loss": 0.7190751810868581, "val_acc1": 72.91666690508525, "val_acc5": 100.0, "val_uar": 0.6013997586551934, "val_war": 0.7291666666666666, "val_weighted_f1": 0.7166909577323178, "val_micro_f1": 0.7291666666666666, "val_macro_f1": 0.6449756353419025, "epoch": 26, "n_parameters": 521298470}
28
+ {"train_lr": 4.885972738297512e-05, "train_min_lr": 2.7545697921769044e-07, "train_loss": 1.010276930630207, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.275946571826935, "val_loss": 0.733767302831014, "val_acc1": 70.41666700045268, "val_acc5": 100.0, "val_uar": 0.595728927875667, "val_war": 0.7041666666666667, "val_weighted_f1": 0.694494303880191, "val_micro_f1": 0.7041666666666667, "val_macro_f1": 0.6245433431057557, "epoch": 27, "n_parameters": 521298470}
29
+ {"train_lr": 4.822206138334105e-05, "train_min_lr": 2.718620031624162e-07, "train_loss": 1.0155742063124975, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.1117492079734803, "val_loss": 0.7140354017416636, "val_acc1": 72.29166615804037, "val_acc5": 100.0, "val_uar": 0.5887684899641421, "val_war": 0.7229166666666667, "val_weighted_f1": 0.7082685270103194, "val_micro_f1": 0.7229166666666667, "val_macro_f1": 0.627986586697051, "epoch": 28, "n_parameters": 521298470}
30
+ {"train_lr": 4.7562573313519254e-05, "train_min_lr": 2.6814400060134355e-07, "train_loss": 0.9937466233968735, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.060050368309021, "val_loss": 0.7352464139461518, "val_acc1": 70.41666671435038, "val_acc5": 100.0, "val_uar": 0.5612810378027769, "val_war": 0.7041666666666667, "val_weighted_f1": 0.6874152766918578, "val_micro_f1": 0.7041666666666667, "val_macro_f1": 0.6051181599091192, "epoch": 29, "n_parameters": 521298470}
31
+ {"train_lr": 4.6881984314031596e-05, "train_min_lr": 2.643070371156856e-07, "train_loss": 1.0082397441069284, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.0292462944984435, "val_loss": 0.7139395276705424, "val_acc1": 72.08333282470703, "val_acc5": 100.0, "val_uar": 0.6141820548614026, "val_war": 0.7208333333333333, "val_weighted_f1": 0.7091108882258524, "val_micro_f1": 0.7208333333333333, "val_macro_f1": 0.6401122506953443, "epoch": 30, "n_parameters": 521298470}
32
+ {"train_lr": 4.618103859895663e-05, "train_min_lr": 2.603553083686807e-07, "train_loss": 0.9931151429812114, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.0883442687988283, "val_loss": 0.7427766581376394, "val_acc1": 72.08333309491475, "val_acc5": 100.0, "val_uar": 0.640241848529892, "val_war": 0.7208333333333333, "val_weighted_f1": 0.7129930923898623, "val_micro_f1": 0.7208333333333333, "val_macro_f1": 0.6564548445142864, "epoch": 31, "n_parameters": 521298470}
33
+ {"train_lr": 4.54605026421427e-05, "train_min_lr": 2.5629313551769454e-07, "train_loss": 0.9951733401417733, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.374349057674408, "val_loss": 0.7275593479474386, "val_acc1": 70.62499969800314, "val_acc5": 100.0, "val_uar": 0.553289888452932, "val_war": 0.70625, "val_weighted_f1": 0.685865124352896, "val_micro_f1": 0.70625, "val_macro_f1": 0.5879602497907316, "epoch": 32, "n_parameters": 521298470}
34
+ {"train_lr": 4.472116433907969e-05, "train_min_lr": 2.521249604890999e-07, "train_loss": 1.0095389261841774, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.071054437160492, "val_loss": 0.7777563631534576, "val_acc1": 68.95833320617676, "val_acc5": 100.0, "val_uar": 0.5020074782303044, "val_war": 0.6895833333333333, "val_weighted_f1": 0.6598102078247386, "val_micro_f1": 0.6895833333333333, "val_macro_f1": 0.5348460582200106, "epoch": 33, "n_parameters": 521298470}
35
+ {"train_lr": 4.396383214534723e-05, "train_min_lr": 2.4785534112109367e-07, "train_loss": 0.9669511471192042, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.951993942260742, "val_loss": 0.739057699839274, "val_acc1": 70.83333349227905, "val_acc5": 100.0, "val_uar": 0.536898423637554, "val_war": 0.7083333333333334, "val_weighted_f1": 0.6804169628324995, "val_micro_f1": 0.7083333333333334, "val_macro_f1": 0.5732156740748482, "epoch": 34, "n_parameters": 521298470}
36
+ {"train_lr": 4.3189334192580065e-05, "train_min_lr": 2.434889461797689e-07, "train_loss": 0.9852156601349513, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.0651321268081664, "val_loss": 0.713111142317454, "val_acc1": 71.24999965031942, "val_acc5": 100.0, "val_uar": 0.5693661008878401, "val_war": 0.7125, "val_weighted_f1": 0.6931114094803904, "val_micro_f1": 0.7125, "val_macro_f1": 0.6040452838030007, "epoch": 35, "n_parameters": 521298470}
37
+ {"train_lr": 4.239851738291851e-05, "train_min_lr": 2.390305502538873e-07, "train_loss": 0.9874759773413341, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.099198317527771, "val_loss": 0.7063378810882568, "val_acc1": 72.91666669845581, "val_acc5": 100.0, "val_uar": 0.5974977146444538, "val_war": 0.7291666666666666, "val_weighted_f1": 0.7137533405335303, "val_micro_f1": 0.7291666666666666, "val_macro_f1": 0.6304802587005167, "epoch": 36, "n_parameters": 521298470}
38
+ {"train_lr": 4.1592246462933264e-05, "train_min_lr": 2.344850285339361e-07, "train_loss": 0.9969244120518367, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.940276608467102, "val_loss": 0.7173435290654501, "val_acc1": 72.70833322207133, "val_acc5": 100.0, "val_uar": 0.6372895522352043, "val_war": 0.7270833333333333, "val_weighted_f1": 0.7181586301168683, "val_micro_f1": 0.7270833333333333, "val_macro_f1": 0.6619376547286071, "epoch": 37, "n_parameters": 521298470}
39
+ {"train_lr": 4.0771403078037764e-05, "train_min_lr": 2.2985735148117964e-07, "train_loss": 0.972076964477698, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.919877016544342, "val_loss": 0.7222184936205546, "val_acc1": 69.3750001748403, "val_acc5": 100.0, "val_uar": 0.5475070601701035, "val_war": 0.69375, "val_weighted_f1": 0.6731087843020924, "val_micro_f1": 0.69375, "val_macro_f1": 0.5692936415076758, "epoch": 38, "n_parameters": 521298470}
40
+ {"train_lr": 3.993688480842195e-05, "train_min_lr": 2.251525793925321e-07, "train_loss": 0.9819325158993403, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.8109434509277342, "val_loss": 0.7117080688476562, "val_acc1": 72.70833322207133, "val_acc5": 100.0, "val_uar": 0.6167164618795054, "val_war": 0.7270833333333333, "val_weighted_f1": 0.7165178227347834, "val_micro_f1": 0.7270833333333333, "val_macro_f1": 0.6526796889324522, "epoch": 39, "n_parameters": 521298470}
41
+ {"train_lr": 3.908960418756133e-05, "train_min_lr": 2.2037585686719765e-07, "train_loss": 0.9920976442098618, "train_loss_scale": 3932.16, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.0086033582687377, "val_loss": 0.7255602836608886, "val_acc1": 70.4166667620341, "val_acc5": 100.0, "val_uar": 0.6024438705416966, "val_war": 0.7041666666666667, "val_weighted_f1": 0.6938443330238733, "val_micro_f1": 0.7041666666666667, "val_macro_f1": 0.6321055596380951, "epoch": 40, "n_parameters": 521298470}
42
+ {"train_lr": 3.823048770437516e-05, "train_min_lr": 2.1553240718112658e-07, "train_loss": 0.9555754195650419, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.9676862263679507, "val_loss": 0.7149846116701762, "val_acc1": 72.0833335240682, "val_acc5": 100.0, "val_uar": 0.6041062510355989, "val_war": 0.7208333333333333, "val_weighted_f1": 0.7057232617825827, "val_micro_f1": 0.7208333333333333, "val_macro_f1": 0.6253723047660503, "epoch": 41, "n_parameters": 521298470}
43
+ {"train_lr": 3.736047479012439e-05, "train_min_lr": 2.1062752657544e-07, "train_loss": 0.9660384351015091, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.946704704761505, "val_loss": 0.6889751295248667, "val_acc1": 71.45833328564962, "val_acc5": 100.0, "val_uar": 0.6051996885964278, "val_war": 0.7145833333333333, "val_weighted_f1": 0.7050369012390515, "val_micro_f1": 0.7145833333333333, "val_macro_f1": 0.6370840786817376, "epoch": 42, "n_parameters": 521298470}
44
+ {"train_lr": 3.648051679115738e-05, "train_min_lr": 2.0566657846506721e-07, "train_loss": 0.9637825634082159, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.1525715970993042, "val_loss": 0.7076750576496125, "val_acc1": 71.66666666666667, "val_acc5": 100.0, "val_uar": 0.5819694435998782, "val_war": 0.7166666666666667, "val_weighted_f1": 0.7025776557170287, "val_micro_f1": 0.7166666666666667, "val_macro_f1": 0.6191180980918161, "epoch": 43, "n_parameters": 521298470}
45
+ {"train_lr": 3.559157592862656e-05, "train_min_lr": 2.0065498757393113e-07, "train_loss": 0.9548411321640015, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.0832430720329285, "val_loss": 0.7023991604646047, "val_acc1": 71.87499984105428, "val_acc5": 100.0, "val_uar": 0.590151235640366, "val_war": 0.71875, "val_weighted_f1": 0.7036524741009024, "val_micro_f1": 0.71875, "val_macro_f1": 0.6226098111160413, "epoch": 44, "n_parameters": 521298470}
46
+ {"train_lr": 3.46946242463138e-05, "train_min_lr": 1.955982340030917e-07, "train_loss": 0.9624911100665728, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.1153389191627503, "val_loss": 0.7021354953447978, "val_acc1": 71.45833292007447, "val_acc5": 100.0, "val_uar": 0.6493807882938317, "val_war": 0.7145833333333333, "val_weighted_f1": 0.7108673144192583, "val_micro_f1": 0.7145833333333333, "val_macro_f1": 0.6524076493069302, "epoch": 45, "n_parameters": 521298470}
47
+ {"train_lr": 3.3790642547714803e-05, "train_min_lr": 1.9050184723833633e-07, "train_loss": 0.966951310634613, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.151498911380768, "val_loss": 0.7261670728524526, "val_acc1": 71.45833261807759, "val_acc5": 100.0, "val_uar": 0.6004567895872243, "val_war": 0.7145833333333333, "val_weighted_f1": 0.7036671722403227, "val_micro_f1": 0.7145833333333333, "val_macro_f1": 0.6263098477706648, "epoch": 46, "n_parameters": 521298470}
48
+ {"train_lr": 3.28806193235449e-05, "train_min_lr": 1.853714001037677e-07, "train_loss": 0.9446447539329529, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.7098976373672485, "val_loss": 0.7445690592130025, "val_acc1": 69.37499985694885, "val_acc5": 100.0, "val_uar": 0.570312404986318, "val_war": 0.69375, "val_weighted_f1": 0.6789225007612149, "val_micro_f1": 0.69375, "val_macro_f1": 0.6059462902214606, "epoch": 47, "n_parameters": 521298470}
49
+ {"train_lr": 3.196554967083885e-05, "train_min_lr": 1.8021250266800308e-07, "train_loss": 0.9617807240287463, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.901704535484314, "val_loss": 0.74134414990743, "val_acc1": 71.04166626930237, "val_acc5": 100.0, "val_uar": 0.5771431265996483, "val_war": 0.7104166666666667, "val_weighted_f1": 0.6948869197358204, "val_micro_f1": 0.7104166666666667, "val_macro_f1": 0.617111031840641, "epoch": 48, "n_parameters": 521298470}
50
+ {"train_lr": 3.104643420482698e-05, "train_min_lr": 1.7503079610964627e-07, "train_loss": 0.9418039154012998, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.910436053276062, "val_loss": 0.7099077045917511, "val_acc1": 71.24999979337056, "val_acc5": 100.0, "val_uar": 0.5812010915815263, "val_war": 0.7125, "val_weighted_f1": 0.6982047833265603, "val_micro_f1": 0.7125, "val_macro_f1": 0.6115115911141594, "epoch": 49, "n_parameters": 521298470}
51
+ {"train_lr": 3.0124277964777072e-05, "train_min_lr": 1.6983194654874177e-07, "train_loss": 0.9546687109271685, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.797090618610382, "val_loss": 0.7414146165053049, "val_acc1": 71.24999980926513, "val_acc5": 100.0, "val_uar": 0.6038688867493215, "val_war": 0.7125, "val_weighted_f1": 0.7012772414939868, "val_micro_f1": 0.7125, "val_macro_f1": 0.6306372615377106, "epoch": 50, "n_parameters": 521298470}
52
+ {"train_lr": 2.9200089314998646e-05, "train_min_lr": 1.6462163885095578e-07, "train_loss": 0.964675439397494, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.9482242894172668, "val_loss": 0.7321940680344899, "val_acc1": 71.6666666507721, "val_acc5": 100.0, "val_uar": 0.5801338054055446, "val_war": 0.7166666666666667, "val_weighted_f1": 0.699707946740627, "val_micro_f1": 0.7166666666666667, "val_macro_f1": 0.6106989922566941, "epoch": 51, "n_parameters": 521298470}
53
+ {"train_lr": 2.8274878842211425e-05, "train_min_lr": 1.5940557041125874e-07, "train_loss": 0.9684689784049988, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.995716254711151, "val_loss": 0.726820319890976, "val_acc1": 70.83333344459534, "val_acc5": 100.0, "val_uar": 0.5740516930734322, "val_war": 0.7083333333333334, "val_weighted_f1": 0.6937457584806003, "val_micro_f1": 0.7083333333333334, "val_macro_f1": 0.6050851556794339, "epoch": 52, "n_parameters": 521298470}
54
+ {"train_lr": 2.7349658250483626e-05, "train_min_lr": 1.54189444923908e-07, "train_loss": 0.9324715499083202, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.7671805095672606, "val_loss": 0.7253347913424174, "val_acc1": 71.87499979337056, "val_acc5": 100.0, "val_uar": 0.5760899848943327, "val_war": 0.71875, "val_weighted_f1": 0.7024439911939913, "val_micro_f1": 0.71875, "val_macro_f1": 0.6036870401564551, "epoch": 53, "n_parameters": 521298470}
55
+ {"train_lr": 2.642543925494833e-05, "train_min_lr": 1.4897896614554152e-07, "train_loss": 0.9487365340193112, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6597850131988525, "val_loss": 0.7329872985680898, "val_acc1": 71.04166652361552, "val_acc5": 100.0, "val_uar": 0.578457356854096, "val_war": 0.7104166666666667, "val_weighted_f1": 0.6994194051487115, "val_micro_f1": 0.7104166666666667, "val_macro_f1": 0.6143375979852934, "epoch": 54, "n_parameters": 521298470}
56
+ {"train_lr": 2.5503232475507892e-05, "train_min_lr": 1.4377983165820403e-07, "train_loss": 0.9406164247790972, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.9483313179016113, "val_loss": 0.7178422172864278, "val_acc1": 72.70833309491475, "val_acc5": 100.0, "val_uar": 0.6231678546895938, "val_war": 0.7270833333333333, "val_weighted_f1": 0.7151010739586117, "val_micro_f1": 0.7270833333333333, "val_macro_f1": 0.6523916009979582, "epoch": 55, "n_parameters": 521298470}
57
+ {"train_lr": 2.458404633173596e-05, "train_min_lr": 1.3859772663912447e-07, "train_loss": 0.9391853309671084, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6872464418411255, "val_loss": 0.7253218193848928, "val_acc1": 71.8749994913737, "val_acc5": 100.0, "val_uar": 0.6304256954800432, "val_war": 0.71875, "val_weighted_f1": 0.7124898299289494, "val_micro_f1": 0.71875, "val_macro_f1": 0.6612214662108951, "epoch": 56, "n_parameters": 521298470}
58
+ {"train_lr": 2.3668885940185398e-05, "train_min_lr": 1.3343831764405833e-07, "train_loss": 0.9579837396740913, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.8451045155525208, "val_loss": 0.7235785285631816, "val_acc1": 71.45833306312561, "val_acc5": 100.0, "val_uar": 0.5936539718333196, "val_war": 0.7145833333333333, "val_weighted_f1": 0.7030271747062289, "val_micro_f1": 0.7145833333333333, "val_macro_f1": 0.6305940782356761, "epoch": 57, "n_parameters": 521298470}
59
+ {"train_lr": 2.275875201530809e-05, "train_min_lr": 1.2830724641099214e-07, "train_loss": 0.929708745777607, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.7177775716781616, "val_loss": 0.7154483377933503, "val_acc1": 73.12499980926513, "val_acc5": 100.0, "val_uar": 0.6599313950944385, "val_war": 0.73125, "val_weighted_f1": 0.7263989067184523, "val_micro_f1": 0.73125, "val_macro_f1": 0.6754244657834505, "epoch": 58, "n_parameters": 521298470}
60
+ {"train_lr": 2.185463977518844e-05, "train_min_lr": 1.232101236909854e-07, "train_loss": 0.9379495118061701, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.724037847518921, "val_loss": 0.7144957900047302, "val_acc1": 71.04166584014892, "val_acc5": 100.0, "val_uar": 0.5929589140730445, "val_war": 0.7104166666666667, "val_weighted_f1": 0.6993676224932106, "val_micro_f1": 0.7104166666666667, "val_macro_f1": 0.6243499793896341, "epoch": 59, "n_parameters": 521298470}
61
+ {"train_lr": 2.0957537853286983e-05, "train_min_lr": 1.1815252311289737e-07, "train_loss": 0.9325537743171056, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.7579709935188292, "val_loss": 0.7095620592435201, "val_acc1": 73.12499952316284, "val_acc5": 100.0, "val_uar": 0.6320330866798258, "val_war": 0.73125, "val_weighted_f1": 0.7233946127329938, "val_micro_f1": 0.73125, "val_macro_f1": 0.6557183472058766, "epoch": 60, "n_parameters": 521298470}
62
+ {"train_lr": 2.0068427217384223e-05, "train_min_lr": 1.1313997508870528e-07, "train_loss": 0.9513823621471723, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.8070922899246216, "val_loss": 0.7203496297200521, "val_acc1": 71.66666644414266, "val_acc5": 100.0, "val_uar": 0.6102979870371176, "val_war": 0.7166666666666667, "val_weighted_f1": 0.7076972661055759, "val_micro_f1": 0.7166666666666667, "val_macro_f1": 0.6394577520142619, "epoch": 61, "n_parameters": 521298470}
63
+ {"train_lr": 1.918828009690669e-05, "train_min_lr": 1.0817796076607999e-07, "train_loss": 0.9300014374653498, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.700849211215973, "val_loss": 0.7233891864617665, "val_acc1": 72.29166657129923, "val_acc5": 100.0, "val_uar": 0.5935685681066115, "val_war": 0.7229166666666667, "val_weighted_f1": 0.7102013242387053, "val_micro_f1": 0.7229166666666667, "val_macro_f1": 0.6256454353488741, "epoch": 62, "n_parameters": 521298470}
64
+ {"train_lr": 1.8318058919808455e-05, "train_min_lr": 1.0327190603483182e-07, "train_loss": 0.9242552456259727, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.687107937335968, "val_loss": 0.7011552552382151, "val_acc1": 72.91666620572408, "val_acc5": 100.0, "val_uar": 0.6669190199896721, "val_war": 0.7291666666666666, "val_weighted_f1": 0.7252492864687933, "val_micro_f1": 0.7291666666666666, "val_macro_f1": 0.672959061378334, "epoch": 63, "n_parameters": 521298470}
65
+ {"train_lr": 1.7458715260170226e-05, "train_min_lr": 9.84271755937794e-08, "train_loss": 0.939841120839119, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.811205551624298, "val_loss": 0.6896425108114879, "val_acc1": 71.66666633288065, "val_acc5": 100.0, "val_uar": 0.6085104317984753, "val_war": 0.7166666666666667, "val_weighted_f1": 0.7051293956051462, "val_micro_f1": 0.7166666666666667, "val_macro_f1": 0.6344414572892904, "epoch": 64, "n_parameters": 521298470}
66
+ {"train_lr": 1.6611188797667042e-05, "train_min_lr": 9.36490670845304e-08, "train_loss": 0.9461074270804724, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.7504291677474977, "val_loss": 0.7351758599281311, "val_acc1": 72.29166641235352, "val_acc5": 100.0, "val_uar": 0.6112205286661808, "val_war": 0.7229166666666667, "val_weighted_f1": 0.7136994783802761, "val_micro_f1": 0.7229166666666667, "val_macro_f1": 0.6410794538716442, "epoch": 65, "n_parameters": 521298470}
67
+ {"train_lr": 1.577640629004241e-05, "train_min_lr": 8.894280529858815e-08, "train_loss": 0.9311703752477963, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.658975455760956, "val_loss": 0.7216008563836416, "val_acc1": 70.62499992052715, "val_acc5": 100.0, "val_uar": 0.6037134785504351, "val_war": 0.70625, "val_weighted_f1": 0.6991599436827286, "val_micro_f1": 0.70625, "val_macro_f1": 0.6213526958276866, "epoch": 66, "n_parameters": 521298470}
68
+ {"train_lr": 1.495528055971225e-05, "train_min_lr": 8.431353646411905e-08, "train_loss": 0.9452134019136429, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.8207333755493162, "val_loss": 0.7471796075503031, "val_acc1": 70.41666674613953, "val_acc5": 100.0, "val_uar": 0.5604643052197401, "val_war": 0.7041666666666667, "val_weighted_f1": 0.6881214378842393, "val_micro_f1": 0.7041666666666667, "val_macro_f1": 0.6014523855217762, "epoch": 67, "n_parameters": 521298470}
69
+ {"train_lr": 1.4148709495607045e-05, "train_min_lr": 7.976632261862724e-08, "train_loss": 0.9262746927142144, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.7074118185043337, "val_loss": 0.7351492424805959, "val_acc1": 71.24999987284342, "val_acc5": 100.0, "val_uar": 0.5876790842008234, "val_war": 0.7125, "val_weighted_f1": 0.6995421998807244, "val_micro_f1": 0.7125, "val_macro_f1": 0.6197795878215084, "epoch": 68, "n_parameters": 521298470}
70
+ {"train_lr": 1.3357575071343419e-05, "train_min_lr": 7.53061360736913e-08, "train_loss": 0.9257999484737715, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.5935407185554507, "val_loss": 0.710469228029251, "val_acc1": 71.6666662534078, "val_acc5": 100.0, "val_uar": 0.6034916743884136, "val_war": 0.7166666666666667, "val_weighted_f1": 0.7048595377100765, "val_micro_f1": 0.7166666666666667, "val_macro_f1": 0.6263731091347171, "epoch": 69, "n_parameters": 521298470}
71
+ {"train_lr": 1.2582742380799007e-05, "train_min_lr": 7.093785397781427e-08, "train_loss": 0.9422976590196291, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.795968568325043, "val_loss": 0.7144898037115733, "val_acc1": 71.6666663646698, "val_acc5": 100.0, "val_uar": 0.6242700455743935, "val_war": 0.7166666666666667, "val_weighted_f1": 0.7112079024055807, "val_micro_f1": 0.7166666666666667, "val_macro_f1": 0.6494774597984627, "epoch": 70, "n_parameters": 521298470}
72
+ {"train_lr": 1.1825058692144917e-05, "train_min_lr": 6.666625298333363e-08, "train_loss": 0.9392275619506836, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.7843172097206117, "val_loss": 0.7247319598992665, "val_acc1": 72.91666639645895, "val_acc5": 100.0, "val_uar": 0.6301844753475189, "val_war": 0.7291666666666666, "val_weighted_f1": 0.7214018866408675, "val_micro_f1": 0.7291666666666666, "val_macro_f1": 0.6687236632051081, "epoch": 71, "n_parameters": 521298470}
73
+ {"train_lr": 1.1085352521370515e-05, "train_min_lr": 6.249600402322172e-08, "train_loss": 0.9186914934714635, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.7418998169898985, "val_loss": 0.7240898430347442, "val_acc1": 69.99999974568685, "val_acc5": 100.0, "val_uar": 0.606427164823904, "val_war": 0.7, "val_weighted_f1": 0.6927660586129671, "val_micro_f1": 0.7, "val_macro_f1": 0.6219445004001458, "epoch": 72, "n_parameters": 521298470}
74
+ {"train_lr": 1.0364432726313269e-05, "train_min_lr": 5.843166720348948e-08, "train_loss": 0.9421690013011297, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.8167037415504455, "val_loss": 0.7126811007658641, "val_acc1": 72.70833311080932, "val_acc5": 100.0, "val_uar": 0.6566227130357565, "val_war": 0.7270833333333333, "val_weighted_f1": 0.7241666455292576, "val_micro_f1": 0.7270833333333333, "val_macro_f1": 0.6674812172443624, "epoch": 73, "n_parameters": 521298470}
75
+ {"train_lr": 9.663087622184646e-06, "train_min_lr": 5.44776868167773e-08, "train_loss": 0.9281641046206156, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.7349632716178895, "val_loss": 0.7298963228861491, "val_acc1": 71.04166626930237, "val_acc5": 100.0, "val_uar": 0.6186799120766512, "val_war": 0.7104166666666667, "val_weighted_f1": 0.7051847498826771, "val_micro_f1": 0.7104166666666667, "val_macro_f1": 0.6432036513914385, "epoch": 74, "n_parameters": 521298470}
76
+ {"train_lr": 8.982084119558961e-06, "train_min_lr": 5.06383864825863e-08, "train_loss": 0.9321223678191503, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.728024945259094, "val_loss": 0.7038864692052206, "val_acc1": 72.29166622161866, "val_acc5": 100.0, "val_uar": 0.6233610471653949, "val_war": 0.7229166666666667, "val_weighted_f1": 0.7144054192546978, "val_micro_f1": 0.7229166666666667, "val_macro_f1": 0.643007540099584, "epoch": 75, "n_parameters": 521298470}
77
+ {"train_lr": 8.322166885767945e-06, "train_min_lr": 4.69179644194639e-08, "train_loss": 0.936270577609539, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6903075623512267, "val_loss": 0.7086379965146382, "val_acc1": 72.2916663646698, "val_acc5": 100.0, "val_uar": 0.623969596225031, "val_war": 0.7229166666666667, "val_weighted_f1": 0.7148977552387389, "val_micro_f1": 0.7229166666666667, "val_macro_f1": 0.6490395119661813, "epoch": 76, "n_parameters": 521298470}
78
+ {"train_lr": 7.684057530617942e-06, "train_min_lr": 4.3320488854313384e-08, "train_loss": 0.9250731374820074, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.565261013507843, "val_loss": 0.728853581349055, "val_acc1": 70.83333350817362, "val_acc5": 100.0, "val_uar": 0.5781377660182007, "val_war": 0.7083333333333334, "val_weighted_f1": 0.6948290935811248, "val_micro_f1": 0.7083333333333334, "val_macro_f1": 0.6107934875709977, "epoch": 77, "n_parameters": 521298470}
79
+ {"train_lr": 7.068453817320182e-06, "train_min_lr": 3.984989357384769e-08, "train_loss": 0.9210477298498154, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.4358600187301636, "val_loss": 0.7351715107758839, "val_acc1": 71.24999950726827, "val_acc5": 100.0, "val_uar": 0.5969288481245004, "val_war": 0.7125, "val_weighted_f1": 0.7012698104107992, "val_micro_f1": 0.7125, "val_macro_f1": 0.6242473043866923, "epoch": 78, "n_parameters": 521298470}
80
+ {"train_lr": 6.476028899496948e-06, "train_min_lr": 3.6509973623051173e-08, "train_loss": 0.9204510618249575, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6866182017326357, "val_loss": 0.7110623379548391, "val_acc1": 73.12500015894572, "val_acc5": 100.0, "val_uar": 0.616415888861541, "val_war": 0.73125, "val_weighted_f1": 0.719928610316681, "val_micro_f1": 0.73125, "val_macro_f1": 0.6474776211423267, "epoch": 79, "n_parameters": 521298470}
81
+ {"train_lr": 5.907430585097989e-06, "train_min_lr": 3.330438115535389e-08, "train_loss": 0.9317578200499217, "train_loss_scale": 7864.32, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.682894425392151, "val_loss": 0.7022555828094482, "val_acc1": 71.875, "val_acc5": 100.0, "val_uar": 0.6047864997321518, "val_war": 0.71875, "val_weighted_f1": 0.7083411631794169, "val_micro_f1": 0.71875, "val_macro_f1": 0.6276475712549706, "epoch": 80, "n_parameters": 521298470}
82
+ {"train_lr": 5.3632806280320225e-06, "train_min_lr": 3.0236621439055217e-08, "train_loss": 0.9335366848111153, "train_loss_scale": 4587.52, "train_weight_decay": 0.04999999999999999, "train_grad_norm": Infinity, "val_loss": 0.7522397041320801, "val_acc1": 71.0416664759318, "val_acc5": 100.0, "val_uar": 0.5784247073377508, "val_war": 0.7104166666666667, "val_weighted_f1": 0.698494505389247, "val_micro_f1": 0.7104166666666667, "val_macro_f1": 0.6176034713868769, "epoch": 81, "n_parameters": 521298470}
83
+ {"train_lr": 4.844174048287975e-06, "train_min_lr": 2.73100490243645e-08, "train_loss": 0.9242199921607971, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.558112633228302, "val_loss": 0.6966041247049968, "val_acc1": 73.33333320617676, "val_acc5": 100.0, "val_uar": 0.6400216812173334, "val_war": 0.7333333333333333, "val_weighted_f1": 0.7284048027846287, "val_micro_f1": 0.7333333333333333, "val_macro_f1": 0.6710166006017928, "epoch": 82, "n_parameters": 521298470}
84
+ {"train_lr": 4.350678481289343e-06, "train_min_lr": 2.4527864075249745e-08, "train_loss": 0.9274887452522914, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6026529550552366, "val_loss": 0.704149204492569, "val_acc1": 72.29166644414266, "val_acc5": 100.0, "val_uar": 0.6127380329010764, "val_war": 0.7229166666666667, "val_weighted_f1": 0.7123457171698651, "val_micro_f1": 0.7229166666666667, "val_macro_f1": 0.645782248264671, "epoch": 83, "n_parameters": 521298470}
85
+ {"train_lr": 3.883333557193218e-06, "train_min_lr": 2.1893108870105166e-08, "train_loss": 0.9311613736550013, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.5058203101158143, "val_loss": 0.7272949238618215, "val_acc1": 71.25000003178914, "val_acc5": 100.0, "val_uar": 0.5881375900669379, "val_war": 0.7125, "val_weighted_f1": 0.7018268059321687, "val_micro_f1": 0.7125, "val_macro_f1": 0.6198557408135834, "epoch": 84, "n_parameters": 521298470}
86
+ {"train_lr": 3.4426503108126066e-06, "train_min_lr": 1.940866447506447e-08, "train_loss": 0.9441159003973008, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.467566719055176, "val_loss": 0.7662301580111186, "val_acc1": 70.83333339691163, "val_acc5": 100.0, "val_uar": 0.561473903511947, "val_war": 0.7083333333333334, "val_weighted_f1": 0.6918409245796703, "val_micro_f1": 0.7083333333333334, "val_macro_f1": 0.5961600578236702, "epoch": 85, "n_parameters": 521298470}
87
+ {"train_lr": 3.0291106228073814e-06, "train_min_lr": 1.7077247593597424e-08, "train_loss": 0.9059769891699155, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.4963933873176574, "val_loss": 0.7184066156546275, "val_acc1": 73.12500001589457, "val_acc5": 100.0, "val_uar": 0.6353908229451708, "val_war": 0.73125, "val_weighted_f1": 0.7255357159962992, "val_micro_f1": 0.73125, "val_macro_f1": 0.6660264891319526, "epoch": 86, "n_parameters": 521298470}
88
+ {"train_lr": 2.6431666927548434e-06, "train_min_lr": 1.4901407595834388e-08, "train_loss": 0.9208654644091924, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.704305729866028, "val_loss": 0.6967436681191127, "val_acc1": 71.66666641235352, "val_acc5": 100.0, "val_uar": 0.5843346800140278, "val_war": 0.7166666666666667, "val_weighted_f1": 0.704289168492695, "val_micro_f1": 0.7166666666666667, "val_macro_f1": 0.6143444829793873, "epoch": 87, "n_parameters": 521298470}
89
+ {"train_lr": 2.28524054467614e-06, "train_min_lr": 1.2883523730867555e-08, "train_loss": 0.9381396435697873, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.615367548465729, "val_loss": 0.7055231690406799, "val_acc1": 72.29166615804037, "val_acc5": 100.0, "val_uar": 0.6241290784497306, "val_war": 0.7229166666666667, "val_weighted_f1": 0.714250640468992, "val_micro_f1": 0.7229166666666667, "val_macro_f1": 0.6509726299624157, "epoch": 88, "n_parameters": 521298470}
90
+ {"train_lr": 1.955723565559173e-06, "train_min_lr": 1.1025802525076985e-08, "train_loss": 0.9333841209610303, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6112648248672485, "val_loss": 0.7026071031888326, "val_acc1": 72.70833346048991, "val_acc5": 100.0, "val_uar": 0.6215457529587964, "val_war": 0.7270833333333333, "val_weighted_f1": 0.7184422427943971, "val_micro_f1": 0.7270833333333333, "val_macro_f1": 0.650014054420006, "epoch": 89, "n_parameters": 521298470}
91
+ {"train_lr": 1.6549760773826848e-06, "train_min_lr": 9.330275369326432e-09, "train_loss": 0.9185120380918185, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.5399337363243104, "val_loss": 0.7207277635733287, "val_acc1": 70.624999888738, "val_acc5": 100.0, "val_uar": 0.5793102770820162, "val_war": 0.70625, "val_weighted_f1": 0.6941193304600958, "val_micro_f1": 0.70625, "val_macro_f1": 0.6211201900890481, "epoch": 90, "n_parameters": 521298470}
92
+ {"train_lr": 1.3833269431094697e-06, "train_min_lr": 7.798796297667227e-09, "train_loss": 0.9278154666225116, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.5209377551078798, "val_loss": 0.7194820900758108, "val_acc1": 71.66666660308837, "val_acc5": 100.0, "val_uar": 0.5911904138534573, "val_war": 0.7166666666666667, "val_weighted_f1": 0.7048581942682955, "val_micro_f1": 0.7166666666666667, "val_macro_f1": 0.6254419726017332, "epoch": 91, "n_parameters": 521298470}
93
+ {"train_lr": 1.141073207079542e-06, "train_min_lr": 6.433039959979276e-09, "train_loss": 0.9283614713946978, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.4740203738212587, "val_loss": 0.6908862074216207, "val_acc1": 72.49999961853027, "val_acc5": 100.0, "val_uar": 0.6269825816293207, "val_war": 0.725, "val_weighted_f1": 0.7161045690235647, "val_micro_f1": 0.725, "val_macro_f1": 0.6542809408003254, "epoch": 92, "n_parameters": 521298470}
94
+ {"train_lr": 9.284797701965165e-07, "train_min_lr": 5.234499790766015e-09, "train_loss": 0.9181889751553536, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.5603473448753356, "val_loss": 0.7235601882139842, "val_acc1": 70.83333315849305, "val_acc5": 100.0, "val_uar": 0.5859020679401115, "val_war": 0.7083333333333334, "val_weighted_f1": 0.6950906402195077, "val_micro_f1": 0.7083333333333334, "val_macro_f1": 0.6092505959593999, "epoch": 93, "n_parameters": 521298470}
95
+ {"train_lr": 7.4577910026236e-07, "train_min_lr": 4.204486376105685e-09, "train_loss": 0.911076367199421, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6077650427818297, "val_loss": 0.6982558349768321, "val_acc1": 72.29166644414266, "val_acc5": 100.0, "val_uar": 0.6327533446826925, "val_war": 0.7229166666666667, "val_weighted_f1": 0.716391529274412, "val_micro_f1": 0.7229166666666667, "val_macro_f1": 0.6567361346833795, "epoch": 94, "n_parameters": 521298470}
96
+ {"train_lr": 5.931709777772621e-07, "train_min_lr": 3.344126020544719e-09, "train_loss": 0.9124541458487511, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6902076840400695, "val_loss": 0.7059181451797485, "val_acc1": 72.70833309491475, "val_acc5": 100.0, "val_uar": 0.6211676186132709, "val_war": 0.7270833333333333, "val_weighted_f1": 0.7184142537448179, "val_micro_f1": 0.7270833333333333, "val_macro_f1": 0.6474274141226846, "epoch": 95, "n_parameters": 521298470}
97
+ {"train_lr": 4.7082227748258895e-07, "train_min_lr": 2.6543595155002304e-09, "train_loss": 0.9134137310584386, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.523417265415192, "val_loss": 0.7271405796209971, "val_acc1": 71.87500012715658, "val_acc5": 100.0, "val_uar": 0.6038046776090255, "val_war": 0.71875, "val_weighted_f1": 0.709335069574043, "val_micro_f1": 0.71875, "val_macro_f1": 0.6366080264381028, "epoch": 96, "n_parameters": 521298470}
98
+ {"train_lr": 3.7886678588580804e-07, "train_min_lr": 2.1359411105184414e-09, "train_loss": 0.9020192401607832, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6040924286842344, "val_loss": 0.7112531304359436, "val_acc1": 71.87499979337056, "val_acc5": 100.0, "val_uar": 0.5907220919177442, "val_war": 0.71875, "val_weighted_f1": 0.7072033994580785, "val_micro_f1": 0.71875, "val_macro_f1": 0.6200459941752449, "epoch": 97, "n_parameters": 521298470}
99
+ {"train_lr": 3.1740505496690313e-07, "train_min_lr": 1.789437688513856e-09, "train_loss": 0.9225900616248449, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6218053817749025, "val_loss": 0.7084759175777435, "val_acc1": 71.4583328406016, "val_acc5": 100.0, "val_uar": 0.5824845856639336, "val_war": 0.7145833333333333, "val_weighted_f1": 0.7011745154212945, "val_micro_f1": 0.7145833333333333, "val_macro_f1": 0.6185190985144159, "epoch": 98, "n_parameters": 521298470}
100
+ {"train_lr": 2.86504292226262e-07, "train_min_lr": 1.6152281458911214e-09, "train_loss": 0.9234192057450612, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.538338634967804, "val_loss": 0.6801577548185984, "val_acc1": 72.49999977747599, "val_acc5": 100.0, "val_uar": 0.6182654020697499, "val_war": 0.725, "val_weighted_f1": 0.7152828580053076, "val_micro_f1": 0.725, "val_macro_f1": 0.6459865200459435, "epoch": 99, "n_parameters": 521298470}
101
+ Evaluation on the test set using best epoch model:
102
+ {"Final Top-1 (best epoch)": 73.43096234309623, "Final Top-5 (best epoch)": 100.0}
103
+ Final UAR: 60.87%, Final WAR: 73.43%
104
+ Final Confusion Matrix:
105
+ [[ 18 6 18 0]
106
+ [ 2 101 34 0]
107
+ [ 5 23 210 7]
108
+ [ 0 2 30 22]]
109
+ Final Class Accuracies: ['42.86%', '73.72%', '85.71%', '40.74%']
110
+ Final Weighted F1: 0.7233, Final Micro F1: 0.7343, Final Macro F1: 0.6505
logs/AVF-MAE++_huge-MSP-IMPROV/eval_split03/log.txt ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_lr": 5.532786885245902e-06, "train_min_lr": 3.119224857967862e-08, "train_loss": 2.0239849982618474, "train_loss_scale": 3134.6938775510203, "train_weight_decay": 0.04999999999999999, "train_grad_norm": NaN, "val_loss": 1.711304924885432, "val_acc1": 29.919679249146856, "val_acc5": 100.0, "val_uar": 0.29732399469521004, "val_war": 0.2991967871485944, "val_weighted_f1": 0.3263859814431761, "val_micro_f1": 0.2991967871485944, "val_macro_f1": 0.2787076950208196, "epoch": 0, "n_parameters": 521298470}
2
+ {"train_lr": 1.6828893442622953e-05, "train_min_lr": 9.487642276318914e-08, "train_loss": 1.306159909079675, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 4.09130293495801, "val_loss": 1.0299682378768922, "val_acc1": 59.03614506855547, "val_acc5": 100.0, "val_uar": 0.48198853130203, "val_war": 0.5903614457831325, "val_weighted_f1": 0.5610980604400473, "val_micro_f1": 0.5903614457831325, "val_macro_f1": 0.46266126174946187, "epoch": 1, "n_parameters": 521298470}
3
+ {"train_lr": 2.8125e-05, "train_min_lr": 1.5856059694669972e-07, "train_loss": 1.1945983580991524, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.328241625610663, "val_loss": 0.9029343644777934, "val_acc1": 64.85943750588291, "val_acc5": 100.0, "val_uar": 0.5158336810560318, "val_war": 0.6485943775100401, "val_weighted_f1": 0.6112006118628979, "val_micro_f1": 0.6485943775100401, "val_macro_f1": 0.5000119862161233, "epoch": 2, "n_parameters": 521298470}
4
+ {"train_lr": 3.942110655737705e-05, "train_min_lr": 2.2224477113021017e-07, "train_loss": 1.1637469284793958, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.8786180238334502, "val_loss": 0.8416775345802308, "val_acc1": 66.66666653644607, "val_acc5": 100.0, "val_uar": 0.5300354868947174, "val_war": 0.6666666666666666, "val_weighted_f1": 0.6302604505718953, "val_micro_f1": 0.6666666666666666, "val_macro_f1": 0.5171077404472024, "epoch": 3, "n_parameters": 521298470}
5
+ {"train_lr": 5.07172131147541e-05, "train_min_lr": 2.859289453137207e-07, "train_loss": 1.1596847348878172, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.3592994699672776, "val_loss": 0.8121648649374644, "val_acc1": 66.66666675092705, "val_acc5": 100.0, "val_uar": 0.5493549836111863, "val_war": 0.6666666666666666, "val_weighted_f1": 0.6391740530326946, "val_micro_f1": 0.6666666666666666, "val_macro_f1": 0.540274049189879, "epoch": 4, "n_parameters": 521298470}
6
+ {"train_lr": 5.624505479325177e-05, "train_min_lr": 3.170933142513005e-07, "train_loss": 1.1532989038902075, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.1979524943293356, "val_loss": 0.8313684463500977, "val_acc1": 67.26907641246137, "val_acc5": 100.0, "val_uar": 0.5885452072373855, "val_war": 0.6726907630522089, "val_weighted_f1": 0.6539682201451028, "val_micro_f1": 0.6726907630522089, "val_macro_f1": 0.5777715346154312, "epoch": 5, "n_parameters": 521298470}
7
+ {"train_lr": 5.621477194759532e-05, "train_min_lr": 3.1692258834607297e-07, "train_loss": 1.1336367914871293, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.872346977798306, "val_loss": 0.8131861348946889, "val_acc1": 68.87550235273369, "val_acc5": 100.0, "val_uar": 0.6408428752762226, "val_war": 0.6887550200803213, "val_weighted_f1": 0.6809728262222351, "val_micro_f1": 0.6887550200803213, "val_macro_f1": 0.6337594629596267, "epoch": 6, "n_parameters": 521298470}
8
+ {"train_lr": 5.615392712009694e-05, "train_min_lr": 3.165795628467214e-07, "train_loss": 1.1401367069912605, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.663177490234375, "val_loss": 0.8178274045387904, "val_acc1": 68.07228961622859, "val_acc5": 100.0, "val_uar": 0.6173860743709356, "val_war": 0.6807228915662651, "val_weighted_f1": 0.6702151471814761, "val_micro_f1": 0.6807228915662651, "val_macro_f1": 0.6099368038249856, "epoch": 7, "n_parameters": 521298470}
9
+ {"train_lr": 5.6062586843682176e-05, "train_min_lr": 3.1606461284659327e-07, "train_loss": 1.1260133673544643, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.0342649853959376, "val_loss": 0.8182831943035126, "val_acc1": 66.66666669730681, "val_acc5": 100.0, "val_uar": 0.6066285865239819, "val_war": 0.6666666666666666, "val_weighted_f1": 0.6568407609683385, "val_micro_f1": 0.6666666666666666, "val_macro_f1": 0.6006470242358741, "epoch": 8, "n_parameters": 521298470}
10
+ {"train_lr": 5.59408509976022e-05, "train_min_lr": 3.1537830143595323e-07, "train_loss": 1.1185492859000252, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.376669764518738, "val_loss": 0.7781811217466991, "val_acc1": 70.28112444437173, "val_acc5": 100.0, "val_uar": 0.6379200850773601, "val_war": 0.7028112449799196, "val_weighted_f1": 0.6942863345375441, "val_micro_f1": 0.7028112449799196, "val_macro_f1": 0.6310619620615107, "epoch": 9, "n_parameters": 521298470}
11
+ {"train_lr": 5.578885269821741e-05, "train_min_lr": 3.1452137908625264e-07, "train_loss": 1.1112735401205465, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.4162019977764206, "val_loss": 0.8013735791047414, "val_acc1": 69.67871544926042, "val_acc5": 100.0, "val_uar": 0.6496323159359325, "val_war": 0.6967871485943775, "val_weighted_f1": 0.6964851568843011, "val_micro_f1": 0.6967871485943775, "val_macro_f1": 0.6424791871547305, "epoch": 10, "n_parameters": 521298470}
12
+ {"train_lr": 5.5606758153436626e-05, "train_min_lr": 3.1349478282949933e-07, "train_loss": 1.1016436505074403, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.4312705385441684, "val_loss": 0.7841413001219432, "val_acc1": 69.47791153934585, "val_acc5": 100.0, "val_uar": 0.656094938261675, "val_war": 0.6947791164658634, "val_weighted_f1": 0.6891100119528331, "val_micro_f1": 0.6947791164658634, "val_macro_f1": 0.6443107943780627, "epoch": 11, "n_parameters": 521298470}
13
+ {"train_lr": 5.5394766480971004e-05, "train_min_lr": 3.122996352336282e-07, "train_loss": 1.0941756495407648, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.378127944712736, "val_loss": 0.8004937758048375, "val_acc1": 69.0763049910825, "val_acc5": 100.0, "val_uar": 0.6453009064978157, "val_war": 0.6907630522088354, "val_weighted_f1": 0.683748446377541, "val_micro_f1": 0.6907630522088354, "val_macro_f1": 0.6249216672706136, "epoch": 12, "n_parameters": 521298470}
14
+ {"train_lr": 5.5153109490601995e-05, "train_min_lr": 3.1093724317498807e-07, "train_loss": 1.094802550920824, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.5038503092162463, "val_loss": 0.768238565325737, "val_acc1": 70.48192801724475, "val_acc5": 100.0, "val_uar": 0.6534857028675363, "val_war": 0.7048192771084337, "val_weighted_f1": 0.699057688298931, "val_micro_f1": 0.7048192771084337, "val_macro_f1": 0.6501292016090441, "epoch": 13, "n_parameters": 521298470}
15
+ {"train_lr": 5.488205143070056e-05, "train_min_lr": 3.0940909640929253e-07, "train_loss": 1.0824636152001466, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.2866630383900235, "val_loss": 0.8009533325831095, "val_acc1": 66.26506003414292, "val_acc5": 100.0, "val_uar": 0.6358221721962259, "val_war": 0.6626506024096386, "val_weighted_f1": 0.6546415026944115, "val_micro_f1": 0.6626506024096386, "val_macro_f1": 0.6153289148933923, "epoch": 14, "n_parameters": 521298470}
16
+ {"train_lr": 5.4581888699275606e-05, "train_min_lr": 3.0771686594259405e-07, "train_loss": 1.071081054453947, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.11204900790234, "val_loss": 0.7654047052065531, "val_acc1": 69.8795185012511, "val_acc5": 100.0, "val_uar": 0.6872747479628261, "val_war": 0.6987951807228916, "val_weighted_f1": 0.6977346363315866, "val_micro_f1": 0.6987951807228916, "val_macro_f1": 0.6658633362990605, "epoch": 15, "n_parameters": 521298470}
17
+ {"train_lr": 5.425294951986694e-05, "train_min_lr": 3.0586240220406283e-07, "train_loss": 1.0690687025079921, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.791505806300105, "val_loss": 0.7465198705593745, "val_acc1": 71.88755055316481, "val_acc5": 100.0, "val_uar": 0.6550292898074648, "val_war": 0.7188755020080321, "val_weighted_f1": 0.7095124497307445, "val_micro_f1": 0.7188755020080321, "val_macro_f1": 0.6555284031734291, "epoch": 16, "n_parameters": 521298470}
18
+ {"train_lr": 5.389559358263791e-05, "train_min_lr": 3.0384773302257e-07, "train_loss": 1.0637943385815134, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.1542994878729997, "val_loss": 0.7448896865049998, "val_acc1": 71.0843375945187, "val_acc5": 100.0, "val_uar": 0.6551709526777861, "val_war": 0.7108433734939759, "val_weighted_f1": 0.704244262183115, "val_micro_f1": 0.7108433734939759, "val_macro_f1": 0.6582941538772791, "epoch": 17, "n_parameters": 521298470}
19
+ {"train_lr": 5.351021165105915e-05, "train_min_lr": 3.016750614092864e-07, "train_loss": 1.0651451612816376, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.2607433747272103, "val_loss": 0.7569838047027588, "val_acc1": 70.88353473402888, "val_acc5": 100.0, "val_uar": 0.647484706232078, "val_war": 0.7088353413654619, "val_weighted_f1": 0.706313145655059, "val_micro_f1": 0.7088353413654619, "val_macro_f1": 0.659280247408547, "epoch": 18, "n_parameters": 521298470}
20
+ {"train_lr": 5.3097225134614724e-05, "train_min_lr": 2.993467631487224e-07, "train_loss": 1.0528677620855318, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.877686571101753, "val_loss": 0.7203603237867355, "val_acc1": 72.48996037555986, "val_acc5": 100.0, "val_uar": 0.6412995386339583, "val_war": 0.7269076305220884, "val_weighted_f1": 0.7179113639330469, "val_micro_f1": 0.7269076305220884, "val_macro_f1": 0.6644094704243366, "epoch": 19, "n_parameters": 521298470}
21
+ {"train_lr": 5.265708562799699e-05, "train_min_lr": 2.9686538420084207e-07, "train_loss": 1.0442277083591538, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.063379243928559, "val_loss": 0.7329648504654567, "val_acc1": 72.48996001553823, "val_acc5": 100.0, "val_uar": 0.6833458955031706, "val_war": 0.7248995983935743, "val_weighted_f1": 0.7242035647261562, "val_micro_f1": 0.7248995983935743, "val_macro_f1": 0.6798066110379385, "epoch": 20, "n_parameters": 521298470}
22
+ {"train_lr": 5.219027441729437e-05, "train_min_lr": 2.942336379170938e-07, "train_loss": 1.0487079172312808, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.2630315702788684, "val_loss": 0.801648743947347, "val_acc1": 68.67469835855874, "val_acc5": 100.0, "val_uar": 0.6646321323171617, "val_war": 0.6867469879518072, "val_weighted_f1": 0.683433204169034, "val_micro_f1": 0.6867469879518072, "val_macro_f1": 0.6479362802256858, "epoch": 21, "n_parameters": 521298470}
23
+ {"train_lr": 5.16973019537121e-05, "train_min_lr": 2.9145440207339997e-07, "train_loss": 1.0378556912448131, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.1276833719136765, "val_loss": 0.7599503020445506, "val_acc1": 72.08835332939424, "val_acc5": 100.0, "val_uar": 0.669376485857247, "val_war": 0.7208835341365462, "val_weighted_f1": 0.717120613531383, "val_micro_f1": 0.7208835341365462, "val_macro_f1": 0.671951435570538, "epoch": 22, "n_parameters": 521298470}
24
+ {"train_lr": 5.1178707295401356e-05, "train_min_lr": 2.8853071572335116e-07, "train_loss": 1.046331591346637, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.1627586715075435, "val_loss": 0.7647087961435318, "val_acc1": 71.48594364487981, "val_acc5": 100.0, "val_uar": 0.672019025883618, "val_war": 0.714859437751004, "val_weighted_f1": 0.7155793234989443, "val_micro_f1": 0.714859437751004, "val_macro_f1": 0.653991513064957, "epoch": 23, "n_parameters": 521298470}
25
+ {"train_lr": 5.063505751800703e-05, "train_min_lr": 2.85465775875046e-07, "train_loss": 1.0253838542975535, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.1997649815617777, "val_loss": 0.7508092274268469, "val_acc1": 69.87951764332722, "val_acc5": 100.0, "val_uar": 0.6392082426202486, "val_war": 0.6987951807228916, "val_weighted_f1": 0.6947225866837224, "val_micro_f1": 0.6987951807228916, "val_macro_f1": 0.6382413014367451, "epoch": 24, "n_parameters": 521298470}
26
+ {"train_lr": 5.0066947094579e-05, "train_min_lr": 2.8226293399520997e-07, "train_loss": 1.0164439148035178, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.98541230328229, "val_loss": 0.7361393501361211, "val_acc1": 71.48594405852168, "val_acc5": 100.0, "val_uar": 0.6247931339978228, "val_war": 0.714859437751004, "val_weighted_f1": 0.7046101744234682, "val_micro_f1": 0.714859437751004, "val_macro_f1": 0.6372439127170627, "epoch": 25, "n_parameters": 521298470}
27
+ {"train_lr": 4.9474997245524346e-05, "train_min_lr": 2.789256923444146e-07, "train_loss": 1.0150501350967251, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.1805708043429317, "val_loss": 0.7472546885410944, "val_acc1": 71.88755002462241, "val_acc5": 100.0, "val_uar": 0.6850529780686425, "val_war": 0.7188755020080321, "val_weighted_f1": 0.7179258779220024, "val_micro_f1": 0.7188755020080321, "val_macro_f1": 0.6720369055750698, "epoch": 26, "n_parameters": 521298470}
28
+ {"train_lr": 4.8859855259312076e-05, "train_min_lr": 2.754577001474086e-07, "train_loss": 1.0163949553658362, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.0961529527391707, "val_loss": 0.7200793961683909, "val_acc1": 71.68674727903313, "val_acc5": 100.0, "val_uar": 0.6692920332727943, "val_war": 0.7168674698795181, "val_weighted_f1": 0.7126508420384339, "val_micro_f1": 0.7168674698795181, "val_macro_f1": 0.6646123900628854, "epoch": 27, "n_parameters": 521298470}
29
+ {"train_lr": 4.8222193784672675e-05, "train_min_lr": 2.7186274960274247e-07, "train_loss": 1.0115032550834475, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.08267114843641, "val_loss": 0.7549964000781377, "val_acc1": 72.08835374303611, "val_acc5": 100.0, "val_uar": 0.677225498090721, "val_war": 0.7208835341365462, "val_weighted_f1": 0.7171042309261474, "val_micro_f1": 0.7208835341365462, "val_macro_f1": 0.6715649018968637, "epoch": 28, "n_parameters": 521298470}
30
+ {"train_lr": 4.756271009506661e-05, "train_min_lr": 2.681447717360563e-07, "train_loss": 1.0162020218210155, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.238824798136341, "val_loss": 0.7622426519791285, "val_acc1": 70.08032149962153, "val_acc5": 100.0, "val_uar": 0.6418636900470375, "val_war": 0.7008032128514057, "val_weighted_f1": 0.6942664386029725, "val_micro_f1": 0.7008032128514057, "val_macro_f1": 0.6366749830022204, "epoch": 29, "n_parameters": 521298470}
31
+ {"train_lr": 4.688212532622608e-05, "train_min_lr": 2.643078321015605e-07, "train_loss": 0.9941990488037771, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.2246296162507972, "val_loss": 0.7159925560156505, "val_acc1": 71.68674727903313, "val_acc5": 100.0, "val_uar": 0.6498004028113363, "val_war": 0.7168674698795181, "val_weighted_f1": 0.7061341569626641, "val_micro_f1": 0.7168674698795181, "val_macro_f1": 0.6526084957388006, "epoch": 30, "n_parameters": 521298470}
32
+ {"train_lr": 4.618118368760349e-05, "train_min_lr": 2.603561263364124e-07, "train_loss": 0.9863301829821398, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.932584592274257, "val_loss": 0.726362877090772, "val_acc1": 70.0803211395999, "val_acc5": 100.0, "val_uar": 0.6520935440831362, "val_war": 0.7008032128514057, "val_weighted_f1": 0.696939325088695, "val_micro_f1": 0.7008032128514057, "val_macro_f1": 0.6522922917373101, "epoch": 31, "n_parameters": 521298470}
33
+ {"train_lr": 4.546065164858958e-05, "train_min_lr": 2.5629397557284737e-07, "train_loss": 1.0144928536650275, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.9817191873277937, "val_loss": 0.7261929959058762, "val_acc1": 72.28915663416606, "val_acc5": 100.0, "val_uar": 0.6640206080013692, "val_war": 0.7228915662650602, "val_weighted_f1": 0.7165108559250606, "val_micro_f1": 0.7228915662650602, "val_macro_f1": 0.6649160863492297, "epoch": 32, "n_parameters": 521298470}
34
+ {"train_lr": 4.472131710039029e-05, "train_min_lr": 2.521258217130857e-07, "train_loss": 0.9945941142687181, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.9835164182040157, "val_loss": 0.7326351791620255, "val_acc1": 70.68273063261346, "val_acc5": 100.0, "val_uar": 0.6762879010108825, "val_war": 0.7068273092369478, "val_weighted_f1": 0.7021442929634812, "val_micro_f1": 0.7068273092369478, "val_macro_f1": 0.657558548568772, "epoch": 33, "n_parameters": 521298470}
35
+ {"train_lr": 4.3963988494479276e-05, "train_min_lr": 2.4785622257217664e-07, "train_loss": 0.9978398184995262, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.992370676021187, "val_loss": 0.7150137255589167, "val_acc1": 71.48594378276043, "val_acc5": 100.0, "val_uar": 0.6881091560638448, "val_war": 0.714859437751004, "val_weighted_f1": 0.7124420705230181, "val_micro_f1": 0.714859437751004, "val_macro_f1": 0.673194207429385, "epoch": 34, "n_parameters": 521298470}
36
+ {"train_lr": 4.3189493958568096e-05, "train_min_lr": 2.4348984689409546e-07, "train_loss": 0.969098004658206, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.8302175293163376, "val_loss": 0.7212718516588211, "val_acc1": 70.88353349310327, "val_acc5": 100.0, "val_uar": 0.6667599832454751, "val_war": 0.7088353413654619, "val_weighted_f1": 0.7061036449539072, "val_micro_f1": 0.7088353413654619, "val_macro_f1": 0.6623178483379434, "epoch": 35, "n_parameters": 521298470}
37
+ {"train_lr": 4.239868039106072e-05, "train_min_lr": 2.390314692465394e-07, "train_loss": 1.002222704238632, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.070917326576856, "val_loss": 0.7212629834810893, "val_acc1": 74.49799231257305, "val_acc5": 100.0, "val_uar": 0.6994502051335524, "val_war": 0.7449799196787149, "val_weighted_f1": 0.7400960335670392, "val_micro_f1": 0.7449799196787149, "val_macro_f1": 0.694673825783609, "epoch": 36, "n_parameters": 521298470}
38
+ {"train_lr": 4.1592412534982673e-05, "train_min_lr": 2.3448596480000886e-07, "train_loss": 0.9913320118675426, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.884255579539708, "val_loss": 0.7223457008600235, "val_acc1": 71.48594359125957, "val_acc5": 100.0, "val_uar": 0.6584077697208479, "val_war": 0.714859437751004, "val_weighted_f1": 0.7118091171447241, "val_micro_f1": 0.714859437751004, "val_macro_f1": 0.6589271870734741, "epoch": 37, "n_parameters": 521298470}
39
+ {"train_lr": 4.077157203239706e-05, "train_min_lr": 2.2985830399687965e-07, "train_loss": 0.9969105299840979, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.920316216896991, "val_loss": 0.6950763752063115, "val_acc1": 72.28915641202505, "val_acc5": 100.0, "val_uar": 0.6873066412874024, "val_war": 0.7228915662650602, "val_weighted_f1": 0.7198488784215392, "val_micro_f1": 0.7228915662650602, "val_macro_f1": 0.6826839177020264, "epoch": 38, "n_parameters": 521298470}
40
+ {"train_lr": 3.993705646034195e-05, "train_min_lr": 2.2515354711629752e-07, "train_loss": 0.9836475013875637, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.0608728978098654, "val_loss": 0.7176713178555171, "val_acc1": 73.49397638619664, "val_acc5": 100.0, "val_uar": 0.6620301031097919, "val_war": 0.7349397590361446, "val_weighted_f1": 0.7293008692490268, "val_micro_f1": 0.7349397590361446, "val_macro_f1": 0.6699612390308463, "epoch": 39, "n_parameters": 521298470}
41
+ {"train_lr": 3.908977834934321e-05, "train_min_lr": 2.2037683874083684e-07, "train_loss": 0.9807263473872425, "train_loss_scale": 2256.9795918367345, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.9642279513028202, "val_loss": 0.7167639096577962, "val_acc1": 70.48192779510376, "val_acc5": 100.0, "val_uar": 0.6548227120409625, "val_war": 0.7048192771084337, "val_weighted_f1": 0.7034631340312846, "val_micro_f1": 0.7048192771084337, "val_macro_f1": 0.6518622649729152, "epoch": 40, "n_parameters": 521298470}
42
+ {"train_lr": 3.823066418557558e-05, "train_min_lr": 2.1553340213097517e-07, "train_loss": 0.9904214693170016, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.885520047071029, "val_loss": 0.699773574868838, "val_acc1": 73.29317308908486, "val_acc5": 100.0, "val_uar": 0.6762821108268964, "val_war": 0.7329317269076305, "val_weighted_f1": 0.7281044916329406, "val_micro_f1": 0.7329317269076305, "val_macro_f1": 0.6931553661983422, "epoch": 41, "n_parameters": 521298470}
43
+ {"train_lr": 3.7360653397763746e-05, "train_min_lr": 2.1062853351353466e-07, "train_loss": 0.9689801140707366, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.8988633082837474, "val_loss": 0.719198289513588, "val_acc1": 72.4899599006377, "val_acc5": 100.0, "val_uar": 0.6807812681719325, "val_war": 0.7248995983935743, "val_weighted_f1": 0.7225553662274867, "val_micro_f1": 0.7248995983935743, "val_macro_f1": 0.6813118479565551, "epoch": 42, "n_parameters": 521298470}
44
+ {"train_lr": 3.6480697329930815e-05, "train_min_lr": 2.0566759629033616e-07, "train_loss": 0.9716075065792823, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6868540364868787, "val_loss": 0.7239819477001826, "val_acc1": 72.69076378757217, "val_acc5": 100.0, "val_uar": 0.7000757255351452, "val_war": 0.7269076305220884, "val_weighted_f1": 0.7250863304394971, "val_micro_f1": 0.7269076305220884, "val_macro_f1": 0.6907009542150361, "epoch": 43, "n_parameters": 521298470}
45
+ {"train_lr": 3.559175820111761e-05, "train_min_lr": 2.006560151733973e-07, "train_loss": 0.9824590414357023, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.7488661225961177, "val_loss": 0.692147895693779, "val_acc1": 74.69879583182583, "val_acc5": 100.0, "val_uar": 0.6595699316147172, "val_war": 0.7469879518072289, "val_weighted_f1": 0.7383952265707949, "val_micro_f1": 0.7469879518072289, "val_macro_f1": 0.6782788194854672, "epoch": 44, "n_parameters": 521298470}
46
+ {"train_lr": 3.4694808053210136e-05, "train_min_lr": 1.9559927025309032e-07, "train_loss": 0.9669792595363799, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.906980947572358, "val_loss": 0.7111733506123225, "val_acc1": 72.08835321449372, "val_acc5": 100.0, "val_uar": 0.7029034967728197, "val_war": 0.7208835341365462, "val_weighted_f1": 0.7184281545270962, "val_micro_f1": 0.7208835341365462, "val_macro_f1": 0.6867066333847337, "epoch": 45, "n_parameters": 521298470}
47
+ {"train_lr": 3.379082768802634e-05, "train_min_lr": 1.90502891005743e-07, "train_loss": 0.971811642654899, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.766306991479835, "val_loss": 0.7093991031249364, "val_acc1": 73.69477940754719, "val_acc5": 100.0, "val_uar": 0.689677609735957, "val_war": 0.7369477911646586, "val_weighted_f1": 0.7334965457814112, "val_micro_f1": 0.7369477911646586, "val_macro_f1": 0.6893517030388755, "epoch": 46, "n_parameters": 521298470}
48
+ {"train_lr": 3.2880805594823364e-05, "train_min_lr": 1.8537245024723822e-07, "train_loss": 0.9585555677308517, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.871701165121429, "val_loss": 0.6952813227971395, "val_acc1": 73.29317269076306, "val_acc5": 100.0, "val_uar": 0.6732493801264828, "val_war": 0.7329317269076305, "val_weighted_f1": 0.7287789448455311, "val_micro_f1": 0.7329317269076305, "val_macro_f1": 0.6797430580712923, "epoch": 47, "n_parameters": 521298470}
49
+ {"train_lr": 3.196573686939935e-05, "train_min_lr": 1.8021355803922087e-07, "train_loss": 0.956686037738307, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.8626610040664673, "val_loss": 0.6690589269002278, "val_acc1": 74.8995989450968, "val_acc5": 100.0, "val_uar": 0.6837421795322343, "val_war": 0.748995983935743, "val_weighted_f1": 0.7440808984812438, "val_micro_f1": 0.748995983935743, "val_macro_f1": 0.6983965240867408, "epoch": 48, "n_parameters": 521298470}
50
+ {"train_lr": 3.104662212597067e-05, "train_min_lr": 1.7503185555457868e-07, "train_loss": 0.9670059034208052, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.842497022784486, "val_loss": 0.6921571075916291, "val_acc1": 74.69879550244434, "val_acc5": 100.0, "val_uar": 0.6738068002313166, "val_war": 0.7469879518072289, "val_weighted_f1": 0.7388945243015982, "val_micro_f1": 0.7469879518072289, "val_macro_f1": 0.695115735705147, "epoch": 49, "n_parameters": 521298470}
51
+ {"train_lr": 3.0124466403014898e-05, "train_min_lr": 1.6983300890890101e-07, "train_loss": 0.9602285496839861, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.7879892101093215, "val_loss": 0.7128395487864813, "val_acc1": 70.88353382248476, "val_acc5": 100.0, "val_uar": 0.6717546103437609, "val_war": 0.7088353413654619, "val_weighted_f1": 0.7059287100042744, "val_micro_f1": 0.7088353413654619, "val_macro_f1": 0.6692487551905958, "epoch": 50, "n_parameters": 521298470}
52
+ {"train_lr": 2.9200278064276193e-05, "train_min_lr": 1.6462270296466667e-07, "train_loss": 0.9566191875812958, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.734388910994238, "val_loss": 0.7176318397124608, "val_acc1": 72.28915690226727, "val_acc5": 100.0, "val_uar": 0.6894694170887401, "val_war": 0.7228915662650602, "val_weighted_f1": 0.7200710716563673, "val_micro_f1": 0.7228915662650602, "val_macro_f1": 0.688160657785136, "epoch": 51, "n_parameters": 521298470}
53
+ {"train_lr": 2.8275067696134105e-05, "train_min_lr": 1.5940663511492863e-07, "train_loss": 0.979191391747825, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6086851090801004, "val_loss": 0.6924992750088373, "val_acc1": 71.48594364487981, "val_acc5": 100.0, "val_uar": 0.7007047024750979, "val_war": 0.714859437751004, "val_weighted_f1": 0.7137512501765326, "val_micro_f1": 0.714859437751004, "val_macro_f1": 0.6834815073675939, "epoch": 52, "n_parameters": 521298470}
54
+ {"train_lr": 2.734984700254245e-05, "train_min_lr": 1.5419050905329905e-07, "train_loss": 0.96568686733035, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6055903994307226, "val_loss": 0.7230353961388271, "val_acc1": 71.0843375945187, "val_acc5": 100.0, "val_uar": 0.6681276013719243, "val_war": 0.7108433734939759, "val_weighted_f1": 0.7066284495537759, "val_micro_f1": 0.7108433734939759, "val_macro_f1": 0.668805471165301, "epoch": 53, "n_parameters": 521298470}
55
+ {"train_lr": 2.6425627698745702e-05, "train_min_lr": 1.4898002853704393e-07, "train_loss": 0.9403819795571217, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.744994032139681, "val_loss": 0.7108561068773269, "val_acc1": 71.88755038464404, "val_acc5": 100.0, "val_uar": 0.6866401897720232, "val_war": 0.7188755020080321, "val_weighted_f1": 0.7166361312903812, "val_micro_f1": 0.7188755020080321, "val_macro_f1": 0.6894440484309574, "epoch": 54, "n_parameters": 521298470}
56
+ {"train_lr": 2.5503420404983286e-05, "train_min_lr": 1.4378089115010817e-07, "train_loss": 0.959576123020276, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.609682109891152, "val_loss": 0.6725414315859477, "val_acc1": 73.895582712319, "val_acc5": 100.0, "val_uar": 0.7031706068065404, "val_war": 0.7389558232931727, "val_weighted_f1": 0.7367932395038623, "val_micro_f1": 0.7389558232931727, "val_macro_f1": 0.6988672587478075, "epoch": 55, "n_parameters": 521298470}
57
+ {"train_lr": 2.4584233541391267e-05, "train_min_lr": 1.3859878207289157e-07, "train_loss": 0.9493810239495063, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.870774522119639, "val_loss": 0.7400975227355957, "val_acc1": 70.88353415186626, "val_acc5": 100.0, "val_uar": 0.6478669610550393, "val_war": 0.7088353413654619, "val_weighted_f1": 0.7029797745191345, "val_micro_f1": 0.7088353413654619, "val_macro_f1": 0.6634885568300772, "epoch": 56, "n_parameters": 521298470}
58
+ {"train_lr": 2.3669072225309575e-05, "train_min_lr": 1.3343936786558703e-07, "train_loss": 0.9494648733917548, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.8380272680399368, "val_loss": 0.7494661708672842, "val_acc1": 69.27710845671504, "val_acc5": 100.0, "val_uar": 0.6394562597479965, "val_war": 0.6927710843373494, "val_weighted_f1": 0.6870169247459983, "val_micro_f1": 0.6927710843373494, "val_macro_f1": 0.6442009969794517, "epoch": 57, "n_parameters": 521298470}
59
+ {"train_lr": 2.2758937172201118e-05, "train_min_lr": 1.2830829027188056e-07, "train_loss": 0.9590980076060003, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.8431383274039446, "val_loss": 0.7135879258314769, "val_acc1": 71.28514020222737, "val_acc5": 100.0, "val_uar": 0.6661708191349696, "val_war": 0.7128514056224899, "val_weighted_f1": 0.7094001078930329, "val_micro_f1": 0.7128514056224899, "val_macro_f1": 0.6599938213030379, "epoch": 58, "n_parameters": 521298470}
60
+ {"train_lr": 2.1854823601383966e-05, "train_min_lr": 1.2321116004978712e-07, "train_loss": 0.9507531892685663, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6049430686600354, "val_loss": 0.7049649029970169, "val_acc1": 72.69076309816904, "val_acc5": 100.0, "val_uar": 0.7050398034565403, "val_war": 0.7269076305220884, "val_weighted_f1": 0.7248636619518684, "val_micro_f1": 0.7269076305220884, "val_macro_f1": 0.6832399405721193, "epoch": 59, "n_parameters": 521298470}
61
+ {"train_lr": 2.095772014777377e-05, "train_min_lr": 1.181535508363692e-07, "train_loss": 0.9610437408191006, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6540098895831985, "val_loss": 0.7274702350298564, "val_acc1": 71.68674697263174, "val_acc5": 100.0, "val_uar": 0.65276331504149, "val_war": 0.7168674698795181, "val_weighted_f1": 0.7119550065104973, "val_micro_f1": 0.7168674698795181, "val_macro_f1": 0.6653222892544084, "epoch": 60, "n_parameters": 521298470}
62
+ {"train_lr": 2.00686077808259e-05, "train_min_lr": 1.1314099305304662e-07, "train_loss": 0.9460259192452138, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.560588398758246, "val_loss": 0.7038639485836029, "val_acc1": 72.08835332939424, "val_acc5": 100.0, "val_uar": 0.6492612848883874, "val_war": 0.7208835341365462, "val_weighted_f1": 0.7142716146877757, "val_micro_f1": 0.7208835341365462, "val_macro_f1": 0.6593173487537404, "epoch": 61, "n_parameters": 521298470}
63
+ {"train_lr": 1.9188458731859813e-05, "train_min_lr": 1.0817896785816185e-07, "train_loss": 0.9610169309945333, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.5986908017372596, "val_loss": 0.7254226942857106, "val_acc1": 71.08433754089846, "val_acc5": 100.0, "val_uar": 0.65424673033115, "val_war": 0.7108433734939759, "val_weighted_f1": 0.7057183402899893, "val_micro_f1": 0.7108433734939759, "val_macro_f1": 0.6645145449972695, "epoch": 62, "n_parameters": 521298470}
64
+ {"train_lr": 1.8318235430938328e-05, "train_min_lr": 1.0327290115341384e-07, "train_loss": 0.9410701998237039, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.714637642004052, "val_loss": 0.7003664582967758, "val_acc1": 73.49397630193626, "val_acc5": 100.0, "val_uar": 0.6996687320031516, "val_war": 0.7349397590361446, "val_weighted_f1": 0.731765261222706, "val_micro_f1": 0.7349397590361446, "val_macro_f1": 0.6950796594162129, "epoch": 63, "n_parameters": 521298470}
65
+ {"train_lr": 1.7458889454464492e-05, "train_min_lr": 9.842815765071394e-08, "train_loss": 0.954797877645006, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6748236393441958, "val_loss": 0.7118753870328267, "val_acc1": 72.08835338301448, "val_acc5": 100.0, "val_uar": 0.6731981438862219, "val_war": 0.7208835341365462, "val_weighted_f1": 0.7175508740110903, "val_micro_f1": 0.7208835341365462, "val_macro_f1": 0.6715413261350325, "epoch": 64, "n_parameters": 521298470}
66
+ {"train_lr": 1.6611360484646817e-05, "train_min_lr": 9.365003500595258e-08, "train_loss": 0.9441839357217153, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.777032044469094, "val_loss": 0.7047788272301356, "val_acc1": 71.68674688837136, "val_acc5": 100.0, "val_uar": 0.6730757962638745, "val_war": 0.7168674698795181, "val_weighted_f1": 0.7120405449738303, "val_micro_f1": 0.7168674698795181, "val_macro_f1": 0.6713738936375596, "epoch": 65, "n_parameters": 521298470}
67
+ {"train_lr": 1.5776575281970498e-05, "train_min_lr": 8.894375802609026e-08, "train_loss": 0.9354788146051419, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.631045828060228, "val_loss": 0.7232988486687343, "val_acc1": 70.28112468949284, "val_acc5": 100.0, "val_uar": 0.6545132962274468, "val_war": 0.7028112449799196, "val_weighted_f1": 0.6960979910266281, "val_micro_f1": 0.7028112449799196, "val_macro_f1": 0.6609633350698989, "epoch": 66, "n_parameters": 521298470}
68
+ {"train_lr": 1.4955446671798454e-05, "train_min_lr": 8.43144729559074e-08, "train_loss": 0.9596940327663811, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.664461610268573, "val_loss": 0.7054544041554133, "val_acc1": 72.69076334329016, "val_acc5": 100.0, "val_uar": 0.6839328566683487, "val_war": 0.7269076305220884, "val_weighted_f1": 0.7235016819721312, "val_micro_f1": 0.7269076305220884, "val_macro_f1": 0.6877245920644958, "epoch": 67, "n_parameters": 521298470}
69
+ {"train_lr": 1.4148872546210236e-05, "train_min_lr": 7.976724185066186e-08, "train_loss": 0.9373290677662609, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6998225109917775, "val_loss": 0.71120518942674, "val_acc1": 70.68273068623371, "val_acc5": 100.0, "val_uar": 0.6555445714050635, "val_war": 0.7068273092369478, "val_weighted_f1": 0.7034519057892515, "val_micro_f1": 0.7068273092369478, "val_macro_f1": 0.6597592597421117, "epoch": 68, "n_parameters": 521298470}
70
+ {"train_lr": 1.3357734882170158e-05, "train_min_lr": 7.530703704080548e-08, "train_loss": 0.9457093052515367, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.845965691975185, "val_loss": 0.7012256542841594, "val_acc1": 71.28514034010799, "val_acc5": 100.0, "val_uar": 0.6737463156843939, "val_war": 0.7128514056224899, "val_weighted_f1": 0.7096624855256142, "val_micro_f1": 0.7128514056224899, "val_macro_f1": 0.6700766736582586, "epoch": 69, "n_parameters": 521298470}
71
+ {"train_lr": 1.2582898777098504e-05, "train_min_lr": 7.093873569481369e-08, "train_loss": 0.9614136259572036, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6024684614064744, "val_loss": 0.7108449677626292, "val_acc1": 72.28915660352592, "val_acc5": 100.0, "val_uar": 0.682825082202185, "val_war": 0.7228915662650602, "val_weighted_f1": 0.7206954951796969, "val_micro_f1": 0.7228915662650602, "val_macro_f1": 0.6834228415402959, "epoch": 70, "n_parameters": 521298470}
72
+ {"train_lr": 1.1825211502900117e-05, "train_min_lr": 6.666711448607367e-08, "train_loss": 0.9521796992238687, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.819759582986637, "val_loss": 0.7254287173350652, "val_acc1": 70.88353426676677, "val_acc5": 100.0, "val_uar": 0.6659482016362799, "val_war": 0.7088353413654619, "val_weighted_f1": 0.7055840546381781, "val_micro_f1": 0.7088353413654619, "val_macro_f1": 0.6664009123370314, "epoch": 71, "n_parameters": 521298470}
73
+ {"train_lr": 1.1085501579485099e-05, "train_min_lr": 6.249684436966185e-08, "train_loss": 0.9448542957808695, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.7642822022340736, "val_loss": 0.7211444447437922, "val_acc1": 70.68273077049409, "val_acc5": 100.0, "val_uar": 0.6761043892971983, "val_war": 0.7068273092369478, "val_weighted_f1": 0.7041701673527507, "val_micro_f1": 0.7068273092369478, "val_macro_f1": 0.6685521877533823, "epoch": 72, "n_parameters": 521298470}
74
+ {"train_lr": 1.0364577868794378e-05, "train_min_lr": 5.843248547472314e-08, "train_loss": 0.9514038008289273, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.718743243995978, "val_loss": 0.7121884435415268, "val_acc1": 70.8835341825064, "val_acc5": 100.0, "val_uar": 0.6566388373269156, "val_war": 0.7088353413654619, "val_weighted_f1": 0.703784664729789, "val_micro_f1": 0.7088353413654619, "val_macro_f1": 0.6631598502086848, "epoch": 73, "n_parameters": 521298470}
75
+ {"train_lr": 9.663228690321104e-06, "train_min_lr": 5.4478482118036954e-08, "train_loss": 0.9293074849106017, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.487313808227072, "val_loss": 0.6877616782983144, "val_acc1": 72.48996001553823, "val_acc5": 100.0, "val_uar": 0.6736680438988051, "val_war": 0.7248995983935743, "val_weighted_f1": 0.7217919300442883, "val_micro_f1": 0.7248995983935743, "val_macro_f1": 0.6777793088616242, "epoch": 74, "n_parameters": 521298470}
76
+ {"train_lr": 8.982220959094839e-06, "train_min_lr": 5.063915794422167e-08, "train_loss": 0.9495250255274935, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.600127962170815, "val_loss": 0.6993046681086222, "val_acc1": 71.28514042436838, "val_acc5": 100.0, "val_uar": 0.6838703956407909, "val_war": 0.7128514056224899, "val_weighted_f1": 0.7104020033357513, "val_micro_f1": 0.7128514056224899, "val_macro_f1": 0.6780093984932148, "epoch": 75, "n_parameters": 521298470}
77
+ {"train_lr": 8.322299347071217e-06, "train_min_lr": 4.6918711197893004e-08, "train_loss": 0.9426066299685004, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.7365904559894485, "val_loss": 0.6966467062632243, "val_acc1": 74.29718926058237, "val_acc5": 100.0, "val_uar": 0.678102604349976, "val_war": 0.7429718875502008, "val_weighted_f1": 0.7383233653683667, "val_micro_f1": 0.7429718875502008, "val_macro_f1": 0.6926991446768898, "epoch": 76, "n_parameters": 521298470}
78
+ {"train_lr": 7.684185468844116e-06, "train_min_lr": 4.3321210132944986e-08, "train_loss": 0.9420481180252672, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6169746019402327, "val_loss": 0.7063381602366765, "val_acc1": 72.89156606589934, "val_acc5": 100.0, "val_uar": 0.667517982352402, "val_war": 0.7289156626506024, "val_weighted_f1": 0.725475506649495, "val_micro_f1": 0.7289156626506024, "val_macro_f1": 0.6715007239021415, "epoch": 77, "n_parameters": 521298470}
79
+ {"train_lr": 7.06857709257068e-06, "train_min_lr": 3.985058856397419e-08, "train_loss": 0.9504144185659836, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6033335534893736, "val_loss": 0.7033643384774526, "val_acc1": 72.48996006915847, "val_acc5": 100.0, "val_uar": 0.6741569600127211, "val_war": 0.7248995983935743, "val_weighted_f1": 0.7204458961503715, "val_micro_f1": 0.7248995983935743, "val_macro_f1": 0.6776855585028334, "epoch": 78, "n_parameters": 521298470}
80
+ {"train_lr": 6.4761473769720995e-06, "train_min_lr": 3.6510641564711105e-08, "train_loss": 0.9459128171002784, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.627236247062683, "val_loss": 0.6891743868589402, "val_acc1": 73.4939761104354, "val_acc5": 100.0, "val_uar": 0.667428561913423, "val_war": 0.7349397590361446, "val_weighted_f1": 0.7283993116841525, "val_micro_f1": 0.7349397590361446, "val_macro_f1": 0.674397650289045, "epoch": 79, "n_parameters": 521298470}
81
+ {"train_lr": 5.90754413524442e-06, "train_min_lr": 3.330502131816283e-08, "train_loss": 0.9154646630798068, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.510273527125923, "val_loss": 0.6855277419090271, "val_acc1": 73.69477943818733, "val_acc5": 100.0, "val_uar": 0.691669538430472, "val_war": 0.7369477911646586, "val_weighted_f1": 0.7336186038915699, "val_micro_f1": 0.7369477911646586, "val_macro_f1": 0.6902706172067257, "epoch": 80, "n_parameters": 521298470}
82
+ {"train_lr": 5.36338912668431e-06, "train_min_lr": 3.023723312300452e-08, "train_loss": 0.935303310147759, "train_loss_scale": 5266.285714285715, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6957982705563914, "val_loss": 0.7023439188798268, "val_acc1": 72.89156620377996, "val_acc5": 100.0, "val_uar": 0.6734831832613583, "val_war": 0.7289156626506024, "val_weighted_f1": 0.7242231170352512, "val_micro_f1": 0.7289156626506024, "val_macro_f1": 0.6810675297219181, "epoch": 81, "n_parameters": 521298470}
83
+ {"train_lr": 4.844277376804433e-06, "train_min_lr": 2.731063156058678e-08, "train_loss": 0.9379846541857233, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.5697629549065413, "val_loss": 0.696881060798963, "val_acc1": 72.69076315178928, "val_acc5": 100.0, "val_uar": 0.670503813687161, "val_war": 0.7269076305220884, "val_weighted_f1": 0.7223839659061263, "val_micro_f1": 0.7269076305220884, "val_macro_f1": 0.6748463289259522, "epoch": 82, "n_parameters": 521298470}
84
+ {"train_lr": 4.350776526681761e-06, "train_min_lr": 2.4528416826750154e-08, "train_loss": 0.9347670429012402, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.531805826693165, "val_loss": 0.7071267207463582, "val_acc1": 71.08433767877908, "val_acc5": 100.0, "val_uar": 0.6632620299714496, "val_war": 0.7108433734939759, "val_weighted_f1": 0.7062975269728824, "val_micro_f1": 0.7108433734939759, "val_macro_f1": 0.6639005649242531, "epoch": 83, "n_parameters": 521298470}
85
+ {"train_lr": 3.883426212250393e-06, "train_min_lr": 2.1893631232458076e-08, "train_loss": 0.954091991494302, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.5669649133876877, "val_loss": 0.7294396917025249, "val_acc1": 70.28112460523245, "val_acc5": 100.0, "val_uar": 0.6613979302110083, "val_war": 0.7028112449799196, "val_weighted_f1": 0.6986788001386726, "val_micro_f1": 0.7028112449799196, "val_macro_f1": 0.6575616557582642, "epoch": 84, "n_parameters": 521298470}
86
+ {"train_lr": 3.442737474217598e-06, "train_min_lr": 1.9409155877074322e-08, "train_loss": 0.9211614489758095, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.5304113091254723, "val_loss": 0.702696277697881, "val_acc1": 72.08835332939424, "val_acc5": 100.0, "val_uar": 0.6669540957109291, "val_war": 0.7208835341365462, "val_weighted_f1": 0.7150886565286855, "val_micro_f1": 0.7208835341365462, "val_macro_f1": 0.6694700870878797, "epoch": 85, "n_parameters": 521298470}
87
+ {"train_lr": 3.0291921992482826e-06, "train_min_lr": 1.707770749792333e-08, "train_loss": 0.9388425253889188, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.45843551840101, "val_loss": 0.7119330296913783, "val_acc1": 71.08433726513721, "val_acc5": 100.0, "val_uar": 0.6691032110412892, "val_war": 0.7108433734939759, "val_weighted_f1": 0.7077233419950701, "val_micro_f1": 0.7108433734939759, "val_macro_f1": 0.6686804957164286, "epoch": 86, "n_parameters": 521298470}
88
+ {"train_lr": 2.6432425930290183e-06, "train_min_lr": 1.4901835499577726e-08, "train_loss": 0.9361335635185242, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.578788085859649, "val_loss": 0.7237055093050003, "val_acc1": 70.48192804788489, "val_acc5": 100.0, "val_uar": 0.6428737452644098, "val_war": 0.7048192771084337, "val_weighted_f1": 0.6969412119700152, "val_micro_f1": 0.7048192771084337, "val_macro_f1": 0.6511075712958143, "epoch": 87, "n_parameters": 521298470}
89
+ {"train_lr": 2.285310685787749e-06, "train_min_lr": 1.2883919166121856e-08, "train_loss": 0.927363147236863, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.4928829013084877, "val_loss": 0.7120852132638296, "val_acc1": 71.88755010888279, "val_acc5": 100.0, "val_uar": 0.6778167893537952, "val_war": 0.7188755020080321, "val_weighted_f1": 0.715201895911064, "val_micro_f1": 0.7188755020080321, "val_macro_f1": 0.6806193994082965, "epoch": 88, "n_parameters": 521298470}
90
+ {"train_lr": 1.9557878708099403e-06, "train_min_lr": 1.1026165059439608e-08, "train_loss": 0.9374276254655552, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6646462776223006, "val_loss": 0.7233652323484421, "val_acc1": 70.68273082411433, "val_acc5": 100.0, "val_uar": 0.641501397642062, "val_war": 0.7068273092369478, "val_weighted_f1": 0.7004964227371763, "val_micro_f1": 0.7068273092369478, "val_macro_f1": 0.6523374599034286, "epoch": 89, "n_parameters": 521298470}
91
+ {"train_lr": 1.6550344764557655e-06, "train_min_lr": 9.33060460637137e-09, "train_loss": 0.9394971477944835, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.669781118023152, "val_loss": 0.7252326269944509, "val_acc1": 71.88755019314318, "val_acc5": 100.0, "val_uar": 0.6640735330590251, "val_war": 0.7188755020080321, "val_weighted_f1": 0.7137343801034476, "val_micro_f1": 0.7188755020080321, "val_macro_f1": 0.6703938572813363, "epoch": 90, "n_parameters": 521298470}
92
+ {"train_lr": 1.383379372146334e-06, "train_min_lr": 7.799091877378604e-09, "train_loss": 0.9214407386017495, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.56599794115339, "val_loss": 0.6914811988671621, "val_acc1": 72.28915663416606, "val_acc5": 100.0, "val_uar": 0.683692455793486, "val_war": 0.7228915662650602, "val_weighted_f1": 0.7203549631668289, "val_micro_f1": 0.7228915662650602, "val_macro_f1": 0.6907468043822501, "epoch": 91, "n_parameters": 521298470}
93
+ {"train_lr": 1.1411196087498069e-06, "train_min_lr": 6.433301559145023e-09, "train_loss": 0.9327353986144877, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6780712945120677, "val_loss": 0.7007880369822185, "val_acc1": 72.69076325902977, "val_acc5": 100.0, "val_uar": 0.6929930795348163, "val_war": 0.7269076305220884, "val_weighted_f1": 0.7240370799526008, "val_micro_f1": 0.7269076305220884, "val_macro_f1": 0.6934852137107126, "epoch": 92, "n_parameters": 521298470}
94
+ {"train_lr": 9.285200937606378e-07, "train_min_lr": 5.234727123331281e-09, "train_loss": 0.9334045406304249, "train_loss_scale": 4513.959183673469, "train_weight_decay": 0.04999999999999999, "train_grad_norm": Infinity, "val_loss": 0.6992040594418844, "val_acc1": 71.88755060678506, "val_acc5": 100.0, "val_uar": 0.6754302625969993, "val_war": 0.7188755020080321, "val_weighted_f1": 0.7148200313271451, "val_micro_f1": 0.7188755020080321, "val_macro_f1": 0.6735381574086842, "epoch": 93, "n_parameters": 521298470}
95
+ {"train_lr": 7.458133016271127e-07, "train_min_lr": 4.204679193485652e-09, "train_loss": 0.9292425613622276, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.46202167686151, "val_loss": 0.7062804321448009, "val_acc1": 71.68674681177102, "val_acc5": 100.0, "val_uar": 0.6801127954045322, "val_war": 0.7168674698795181, "val_weighted_f1": 0.7146274223113954, "val_micro_f1": 0.7168674698795181, "val_macro_f1": 0.67913991434184, "epoch": 94, "n_parameters": 521298470}
96
+ {"train_lr": 5.931990195439552e-07, "train_min_lr": 3.3442841118964074e-09, "train_loss": 0.9470461022083451, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.717301899073075, "val_loss": 0.6849892357985179, "val_acc1": 71.68674686539126, "val_acc5": 100.0, "val_uar": 0.6776432894136846, "val_war": 0.7168674698795181, "val_weighted_f1": 0.7127152321465364, "val_micro_f1": 0.7168674698795181, "val_macro_f1": 0.6723970912451861, "epoch": 95, "n_parameters": 521298470}
97
+ {"train_lr": 4.708441289879621e-07, "train_min_lr": 2.6544827079530713e-09, "train_loss": 0.9195850343525815, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.560047621629676, "val_loss": 0.7013196567694346, "val_acc1": 70.68273049473284, "val_acc5": 100.0, "val_uar": 0.6689510244666887, "val_war": 0.7068273092369478, "val_weighted_f1": 0.7038806485878785, "val_micro_f1": 0.7068273092369478, "val_macro_f1": 0.6719204263057372, "epoch": 96, "n_parameters": 521298470}
98
+ {"train_lr": 3.788824232355611e-07, "train_min_lr": 2.1360292693632942e-09, "train_loss": 0.9444693320867966, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.542043608062121, "val_loss": 0.7054414182901383, "val_acc1": 70.68273054835308, "val_acc5": 100.0, "val_uar": 0.6586806304462949, "val_war": 0.7068273092369478, "val_weighted_f1": 0.7027767340039193, "val_micro_f1": 0.7068273092369478, "val_macro_f1": 0.6593591406138035, "epoch": 97, "n_parameters": 521298470}
99
+ {"train_lr": 3.1741446106182373e-07, "train_min_lr": 1.78949071735032e-09, "train_loss": 0.9403249279779642, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.608859303046246, "val_loss": 0.7076957106590271, "val_acc1": 71.08433709661645, "val_acc5": 100.0, "val_uar": 0.6583783186138106, "val_war": 0.7108433734939759, "val_weighted_f1": 0.7057973033200658, "val_micro_f1": 0.7108433734939759, "val_macro_f1": 0.6613238927504106, "epoch": 98, "n_parameters": 521298470}
100
+ {"train_lr": 2.865074567809239e-07, "train_min_lr": 1.6152459867329442e-09, "train_loss": 0.9268374075086749, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.557140834477483, "val_loss": 0.7027449538310369, "val_acc1": 71.28514042436838, "val_acc5": 100.0, "val_uar": 0.6742153353607306, "val_war": 0.7128514056224899, "val_weighted_f1": 0.7088669404610659, "val_micro_f1": 0.7128514056224899, "val_macro_f1": 0.6688210103119917, "epoch": 99, "n_parameters": 521298470}
101
+ Evaluation on the test set using best epoch model:
102
+ {"Final Top-1 (best epoch)": 74.84787018255578, "Final Top-5 (best epoch)": 100.0}
103
+ Final UAR: 67.95%, Final WAR: 74.85%
104
+ Final Confusion Matrix:
105
+ [[ 29 5 15 4]
106
+ [ 8 128 25 0]
107
+ [ 8 26 182 7]
108
+ [ 0 4 22 30]]
109
+ Final Class Accuracies: ['54.72%', '79.50%', '81.61%', '53.57%']
110
+ Final Weighted F1: 0.7445, Final Micro F1: 0.7485, Final Macro F1: 0.6950
logs/AVF-MAE++_huge-MSP-IMPROV/eval_split04/log.txt ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_lr": 5.54135687732342e-06, "train_min_lr": 3.1240563710688044e-08, "train_loss": 1.9933019232602767, "train_loss_scale": 4930.37037037037, "train_weight_decay": 0.049999999999999975, "train_grad_norm": NaN, "val_loss": 1.5430531832906935, "val_acc1": 39.492753844330274, "val_acc5": 100.0, "val_uar": 0.2602219788510423, "val_war": 0.39492753623188404, "val_weighted_f1": 0.36931240141408606, "val_micro_f1": 0.39492753623188404, "val_macro_f1": 0.24816879161060412, "epoch": 0, "n_parameters": 521298470}
2
+ {"train_lr": 1.683317843866171e-05, "train_min_lr": 9.490058032869387e-08, "train_loss": 1.2856957095640678, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 4.555346837750188, "val_loss": 1.0897060467137232, "val_acc1": 57.246376894522406, "val_acc5": 100.0, "val_uar": 0.3772389248250317, "val_war": 0.572463768115942, "val_weighted_f1": 0.5011203525646907, "val_micro_f1": 0.572463768115942, "val_macro_f1": 0.32995019264416114, "epoch": 1, "n_parameters": 521298470}
3
+ {"train_lr": 2.8124999999999992e-05, "train_min_lr": 1.585605969466997e-07, "train_loss": 1.1818445335935663, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.366390109062195, "val_loss": 0.9953418109152052, "val_acc1": 60.50724701259447, "val_acc5": 100.0, "val_uar": 0.40738304720019125, "val_war": 0.605072463768116, "val_weighted_f1": 0.5387640737316963, "val_micro_f1": 0.605072463768116, "val_macro_f1": 0.3667202299183682, "epoch": 2, "n_parameters": 521298470}
4
+ {"train_lr": 3.941682156133828e-05, "train_min_lr": 2.2222061356470555e-07, "train_loss": 1.1575357419105223, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.4070976222002947, "val_loss": 0.9712180064784156, "val_acc1": 60.50724676380987, "val_acc5": 100.0, "val_uar": 0.4030071951898577, "val_war": 0.605072463768116, "val_weighted_f1": 0.53331534984761, "val_micro_f1": 0.605072463768116, "val_macro_f1": 0.3574656804248582, "epoch": 3, "n_parameters": 521298470}
5
+ {"train_lr": 5.0708643122676556e-05, "train_min_lr": 2.8588063018271133e-07, "train_loss": 1.1394322394956777, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.589864949385325, "val_loss": 0.928487112124761, "val_acc1": 63.04347881372424, "val_acc5": 100.0, "val_uar": 0.44712861116208646, "val_war": 0.6304347826086957, "val_weighted_f1": 0.5722315848322514, "val_micro_f1": 0.6304347826086957, "val_macro_f1": 0.4239072911978944, "epoch": 4, "n_parameters": 521298470}
6
+ {"train_lr": 5.624504052478958e-05, "train_min_lr": 3.170932338098471e-07, "train_loss": 1.146375658703439, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.438958545525869, "val_loss": 0.9108051061630249, "val_acc1": 64.49275348497474, "val_acc5": 100.0, "val_uar": 0.48139981632144935, "val_war": 0.644927536231884, "val_weighted_f1": 0.6043277588349583, "val_micro_f1": 0.644927536231884, "val_macro_f1": 0.4848652289910384, "epoch": 5, "n_parameters": 521298470}
7
+ {"train_lr": 5.6214728782695434e-05, "train_min_lr": 3.1692234499488037e-07, "train_loss": 1.132199165261822, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.6735345125198364, "val_loss": 0.8977065020137363, "val_acc1": 61.59420287090799, "val_acc5": 100.0, "val_uar": 0.45415997347595644, "val_war": 0.6159420289855072, "val_weighted_f1": 0.574426463070694, "val_micro_f1": 0.6159420289855072, "val_macro_f1": 0.4483553608428304, "epoch": 6, "n_parameters": 521298470}
8
+ {"train_lr": 5.6153855105959554e-05, "train_min_lr": 3.165791568518906e-07, "train_loss": 1.1273858631834572, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.47480680986687, "val_loss": 0.8961966103977628, "val_acc1": 61.59420287090799, "val_acc5": 100.0, "val_uar": 0.4547297636645547, "val_war": 0.6159420289855072, "val_weighted_f1": 0.5806586578712745, "val_micro_f1": 0.6159420289855072, "val_macro_f1": 0.4456510370256621, "epoch": 7, "n_parameters": 521298470}
9
+ {"train_lr": 5.606248605905366e-05, "train_min_lr": 3.1606404465207385e-07, "train_loss": 1.1181260487179698, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.3854919053890087, "val_loss": 0.867624991469913, "val_acc1": 64.85507285076639, "val_acc5": 100.0, "val_uar": 0.4804324753047663, "val_war": 0.6485507246376812, "val_weighted_f1": 0.609844440393709, "val_micro_f1": 0.6485507246376812, "val_macro_f1": 0.47694259948697615, "epoch": 8, "n_parameters": 521298470}
10
+ {"train_lr": 5.594072155268909e-05, "train_min_lr": 3.153775716630573e-07, "train_loss": 1.1095119252984906, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.5581483951321355, "val_loss": 0.8862229718102349, "val_acc1": 63.76811599731445, "val_acc5": 100.0, "val_uar": 0.495545872471182, "val_war": 0.6376811594202898, "val_weighted_f1": 0.6138393928411444, "val_micro_f1": 0.6376811594202898, "val_macro_f1": 0.5009838990392197, "epoch": 9, "n_parameters": 521298470}
11
+ {"train_lr": 5.578869473456582e-05, "train_min_lr": 3.145204885329762e-07, "train_loss": 1.0898590652663032, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.5762061785768577, "val_loss": 0.8957691325081719, "val_acc1": 62.318840303282805, "val_acc5": 100.0, "val_uar": 0.5951055169031174, "val_war": 0.6231884057971014, "val_weighted_f1": 0.6259769044283693, "val_micro_f1": 0.6231884057971014, "val_macro_f1": 0.556761019722089, "epoch": 10, "n_parameters": 521298470}
12
+ {"train_lr": 5.560657184377744e-05, "train_min_lr": 3.1349373246964946e-07, "train_loss": 1.092984578123799, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.3887743022706776, "val_loss": 0.8499677644835578, "val_acc1": 66.30434713501862, "val_acc5": 100.0, "val_uar": 0.5186410285191245, "val_war": 0.6630434782608695, "val_weighted_f1": 0.6409667717516411, "val_micro_f1": 0.6630434782608695, "val_macro_f1": 0.515917279740156, "epoch": 11, "n_parameters": 521298470}
13
+ {"train_lr": 5.5394552029031155e-05, "train_min_lr": 3.1229842621575784e-07, "train_loss": 1.092326365871194, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.374132454395294, "val_loss": 0.8550257914596133, "val_acc1": 64.4927533744038, "val_acc5": 100.0, "val_uar": 0.5237547567027057, "val_war": 0.644927536231884, "val_weighted_f1": 0.6297892937260509, "val_micro_f1": 0.644927536231884, "val_macro_f1": 0.5373043623309308, "epoch": 12, "n_parameters": 521298470}
14
+ {"train_lr": 5.515286713088147e-05, "train_min_lr": 3.109358768211404e-07, "train_loss": 1.0778343305911546, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.450136288448616, "val_loss": 0.8436465859413147, "val_acc1": 66.6666671089504, "val_acc5": 100.0, "val_uar": 0.5672269546543928, "val_war": 0.6666666666666666, "val_weighted_f1": 0.6584683642549317, "val_micro_f1": 0.6666666666666666, "val_macro_f1": 0.5819117215134708, "epoch": 13, "n_parameters": 521298470}
15
+ {"train_lr": 5.48817814282162e-05, "train_min_lr": 3.094075742135559e-07, "train_loss": 1.063298570337119, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.3171606063842773, "val_loss": 0.8180639213985867, "val_acc1": 68.84058023535687, "val_acc5": 100.0, "val_uar": 0.5606189341918831, "val_war": 0.6884057971014492, "val_weighted_f1": 0.6677002735645534, "val_micro_f1": 0.6884057971014492, "val_macro_f1": 0.5783869852663482, "epoch": 14, "n_parameters": 521298470}
16
+ {"train_lr": 5.458159134927116e-05, "train_min_lr": 3.077151895694668e-07, "train_loss": 1.0647802976546463, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.3117060749619096, "val_loss": 0.8446035120222304, "val_acc1": 66.30434766023055, "val_acc5": 100.0, "val_uar": 0.5413985329184633, "val_war": 0.6630434782608695, "val_weighted_f1": 0.6504399670435161, "val_micro_f1": 0.6630434782608695, "val_macro_f1": 0.558439088613043, "epoch": 15, "n_parameters": 521298470}
17
+ {"train_lr": 5.42526251474903e-05, "train_min_lr": 3.058605734866344e-07, "train_loss": 1.057174801826477, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.403423755257218, "val_loss": 0.8085525665018294, "val_acc1": 67.75362277376479, "val_acc5": 100.0, "val_uar": 0.5887081355105735, "val_war": 0.677536231884058, "val_weighted_f1": 0.665311387386509, "val_micro_f1": 0.677536231884058, "val_macro_f1": 0.5937371298916634, "epoch": 16, "n_parameters": 521298470}
18
+ {"train_lr": 5.389524254258546e-05, "train_min_lr": 3.03845753960516e-07, "train_loss": 1.0671348785176689, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.0557630216633833, "val_loss": 0.8359100719292959, "val_acc1": 66.66666639023933, "val_acc5": 100.0, "val_uar": 0.5504466455027601, "val_war": 0.6666666666666666, "val_weighted_f1": 0.6442786053106122, "val_micro_f1": 0.6666666666666666, "val_macro_f1": 0.5646113171954903, "epoch": 17, "n_parameters": 521298470}
19
+ {"train_lr": 5.350983432718804e-05, "train_min_lr": 3.016729341666821e-07, "train_loss": 1.063147500709251, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.3596849816816823, "val_loss": 0.8376007477442423, "val_acc1": 67.02898559017459, "val_acc5": 100.0, "val_uar": 0.57265782982211, "val_war": 0.6702898550724637, "val_weighted_f1": 0.6538022316266703, "val_micro_f1": 0.6702898550724637, "val_macro_f1": 0.5802873345715714, "epoch": 18, "n_parameters": 521298470}
20
+ {"train_lr": 5.309682193952309e-05, "train_min_lr": 2.993444900516765e-07, "train_loss": 1.0501009689437018, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.2229766845703125, "val_loss": 0.8193191356129117, "val_acc1": 69.20289891008018, "val_acc5": 100.0, "val_uar": 0.5564655066009555, "val_war": 0.6920289855072463, "val_weighted_f1": 0.6745588740305211, "val_micro_f1": 0.6920289855072462, "val_macro_f1": 0.5627164184011115, "epoch": 19, "n_parameters": 521298470}
21
+ {"train_lr": 5.265665700257274e-05, "train_min_lr": 2.968629677349526e-07, "train_loss": 1.0350154838812204, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.0717965254077204, "val_loss": 0.833153658443027, "val_acc1": 67.0289854519609, "val_acc5": 100.0, "val_uar": 0.5674930080454461, "val_war": 0.6702898550724637, "val_weighted_f1": 0.657424780273564, "val_micro_f1": 0.6702898550724637, "val_macro_f1": 0.5737344131489769, "epoch": 20, "n_parameters": 521298470}
22
+ {"train_lr": 5.218982083023309e-05, "train_min_lr": 2.9423108072473093e-07, "train_loss": 1.0355797627089935, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.5703016033879034, "val_loss": 0.8053680327203538, "val_acc1": 68.47826081427975, "val_acc5": 100.0, "val_uar": 0.5988075651630218, "val_war": 0.6847826086956522, "val_weighted_f1": 0.671489241160615, "val_micro_f1": 0.6847826086956522, "val_macro_f1": 0.6065364003297439, "epoch": 21, "n_parameters": 521298470}
23
+ {"train_lr": 5.169682390100459e-05, "train_min_lr": 2.914517069508159e-07, "train_loss": 1.0165744009945128, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.4061277685342013, "val_loss": 0.7914440797434913, "val_acc1": 69.20289808079816, "val_acc5": 100.0, "val_uar": 0.609182761432181, "val_war": 0.6920289855072463, "val_weighted_f1": 0.675141480602007, "val_micro_f1": 0.6920289855072463, "val_macro_f1": 0.5995254179717129, "epoch": 22, "n_parameters": 521298470}
24
+ {"train_lr": 5.117820529979122e-05, "train_min_lr": 2.885278856176229e-07, "train_loss": 1.0313543830940752, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.4152659155704357, "val_loss": 0.7928522262308333, "val_acc1": 69.56521664495054, "val_acc5": 100.0, "val_uar": 0.6069421967351534, "val_war": 0.6956521739130435, "val_weighted_f1": 0.6797132301502445, "val_micro_f1": 0.6956521739130435, "val_macro_f1": 0.6105607678004648, "epoch": 23, "n_parameters": 521298470}
25
+ {"train_lr": 5.063453212841914e-05, "train_min_lr": 2.8546281388085286e-07, "train_loss": 1.0011836851452605, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.216289378978588, "val_loss": 0.8044459621111552, "val_acc1": 66.30434735616048, "val_acc5": 100.0, "val_uar": 0.5658776351500809, "val_war": 0.6630434782608695, "val_weighted_f1": 0.6520132178387266, "val_micro_f1": 0.6630434782608695, "val_macro_f1": 0.5625288330810847, "epoch": 24, "n_parameters": 521298470}
26
+ {"train_lr": 5.006639888551916e-05, "train_min_lr": 2.822598433514489e-07, "train_loss": 1.015744754469689, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.05122971534729, "val_loss": 0.7985218995147281, "val_acc1": 71.37681153891744, "val_acc5": 100.0, "val_uar": 0.6126527842713988, "val_war": 0.7137681159420289, "val_weighted_f1": 0.6954034316332623, "val_micro_f1": 0.7137681159420289, "val_macro_f1": 0.6113295869215678, "epoch": 25, "n_parameters": 521298470}
27
+ {"train_lr": 4.9474426816451066e-05, "train_min_lr": 2.789224764306591e-07, "train_loss": 1.0173310010153571, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.3494356870651245, "val_loss": 0.7787142958905962, "val_acc1": 68.47826031671055, "val_acc5": 100.0, "val_uar": 0.5976476395981041, "val_war": 0.6847826086956522, "val_weighted_f1": 0.6650659574411562, "val_micro_f1": 0.6847826086956522, "val_macro_f1": 0.5866798514737698, "epoch": 26, "n_parameters": 521298470}
28
+ {"train_lr": 4.88592632339812e-05, "train_min_lr": 2.7545436248021314e-07, "train_loss": 1.0048250355470327, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.892769146848608, "val_loss": 0.7895412113931444, "val_acc1": 69.2028981913691, "val_acc5": 100.0, "val_uar": 0.6245561332587262, "val_war": 0.6920289855072463, "val_weighted_f1": 0.6809728361358797, "val_micro_f1": 0.6920289855072463, "val_macro_f1": 0.626623610998611, "epoch": 27, "n_parameters": 521298470}
29
+ {"train_lr": 4.8221580810455196e-05, "train_min_lr": 2.718592938317972e-07, "train_loss": 1.0041802318559752, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.485228443587268, "val_loss": 0.7839140329096053, "val_acc1": 71.37681129013282, "val_acc5": 100.0, "val_uar": 0.6383468326192785, "val_war": 0.7137681159420289, "val_weighted_f1": 0.7016641076123802, "val_micro_f1": 0.7137681159420289, "val_macro_f1": 0.628630141591729, "epoch": 28, "n_parameters": 521298470}
30
+ {"train_lr": 4.756207684224084e-05, "train_min_lr": 2.6814120164019595e-07, "train_loss": 0.9987271668182479, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.964242434060132, "val_loss": 0.7773512436283959, "val_acc1": 69.20289899300838, "val_acc5": 100.0, "val_uar": 0.5918550396685072, "val_war": 0.6920289855072463, "val_weighted_f1": 0.6709060542160343, "val_micro_f1": 0.6920289855072463, "val_macro_f1": 0.5912060643494326, "epoch": 29, "n_parameters": 521298470}
31
+ {"train_lr": 4.6881472487244676e-05, "train_min_lr": 2.6430415158463265e-07, "train_loss": 0.9982722618697603, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.2934311672493264, "val_loss": 0.7641379601425595, "val_acc1": 69.92753623188406, "val_acc5": 100.0, "val_uar": 0.6071865083087993, "val_war": 0.6992753623188406, "val_weighted_f1": 0.6827197140737262, "val_micro_f1": 0.6992753623188406, "val_macro_f1": 0.607227874369048, "epoch": 30, "n_parameters": 521298470}
32
+ {"train_lr": 4.6180511976336314e-05, "train_min_lr": 2.60352339423008e-07, "train_loss": 0.9863365156415068, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.1840356809121593, "val_loss": 0.8648776941829257, "val_acc1": 64.85507227026898, "val_acc5": 100.0, "val_uar": 0.6239583617921466, "val_war": 0.6485507246376812, "val_weighted_f1": 0.6520835504479313, "val_micro_f1": 0.6485507246376812, "val_macro_f1": 0.5843466265280371, "epoch": 31, "n_parameters": 521298470}
33
+ {"train_lr": 4.545996179954306e-05, "train_min_lr": 2.562900864039008e-07, "train_loss": 0.9836411387832077, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.351762526565128, "val_loss": 0.7980724606249068, "val_acc1": 67.75362285669299, "val_acc5": 100.0, "val_uar": 0.5534145687831833, "val_war": 0.677536231884058, "val_weighted_f1": 0.650331995196404, "val_micro_f1": 0.677536231884058, "val_macro_f1": 0.541159120534009, "epoch": 32, "n_parameters": 521298470}
34
+ {"train_lr": 4.47206098679042e-05, "train_min_lr": 2.52121834541346e-07, "train_loss": 1.0109253728095395, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.0869261666580483, "val_loss": 0.8043587737613254, "val_acc1": 67.39130382261415, "val_acc5": 100.0, "val_uar": 0.6087206309342532, "val_war": 0.6739130434782609, "val_weighted_f1": 0.660591430055407, "val_micro_f1": 0.6739130434782609, "val_macro_f1": 0.6015391396202593, "epoch": 33, "n_parameters": 521298470}
35
+ {"train_lr": 4.3963264651901955e-05, "train_min_lr": 2.478521417575577e-07, "train_loss": 0.9815049877873173, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.2903729964185646, "val_loss": 0.8209016455544366, "val_acc1": 67.39130409904148, "val_acc5": 100.0, "val_uar": 0.6194162066791712, "val_war": 0.6739130434782609, "val_weighted_f1": 0.6636505475026809, "val_micro_f1": 0.6739130434782609, "val_macro_f1": 0.5877257872130164, "epoch": 34, "n_parameters": 521298470}
36
+ {"train_lr": 4.318875429741075e-05, "train_min_lr": 2.434856768989081e-07, "train_loss": 0.9832953540263353, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.5778448383013406, "val_loss": 0.8065984745820364, "val_acc1": 69.20289741737255, "val_acc5": 100.0, "val_uar": 0.6389694933764207, "val_war": 0.6920289855072463, "val_weighted_f1": 0.6773987079116368, "val_micro_f1": 0.6920289855072463, "val_macro_f1": 0.5996528556590744, "epoch": 35, "n_parameters": 521298470}
37
+ {"train_lr": 4.2397925720132014e-05, "train_min_lr": 2.390272146306121e-07, "train_loss": 0.9740366445463381, "train_loss_scale": 4096.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.053371822392499, "val_loss": 0.7725888788700104, "val_acc1": 68.11594122734623, "val_acc5": 100.0, "val_uar": 0.5539071317757462, "val_war": 0.6811594202898551, "val_weighted_f1": 0.6545011604405877, "val_micro_f1": 0.6811594202898551, "val_macro_f1": 0.5517236407740752, "epoch": 36, "n_parameters": 521298470}
38
+ {"train_lr": 4.159164367950418e-05, "train_min_lr": 2.3448163021570165e-07, "train_loss": 0.9820833515237879, "train_loss_scale": 7812.740740740741, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.7857628663380942, "val_loss": 0.7418784035576714, "val_acc1": 71.37681120720463, "val_acc5": 100.0, "val_uar": 0.5682647605877095, "val_war": 0.7137681159420289, "val_weighted_f1": 0.6789810929923806, "val_micro_f1": 0.7137681159420289, "val_macro_f1": 0.5743096779913655, "epoch": 37, "n_parameters": 521298470}
39
+ {"train_lr": 4.0770789833100925e-05, "train_min_lr": 2.2985389418399705e-07, "train_loss": 0.9697863053024551, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.1661316553751626, "val_loss": 0.773063451051712, "val_acc1": 68.84057973778766, "val_acc5": 100.0, "val_uar": 0.5936526422207538, "val_war": 0.6884057971014492, "val_weighted_f1": 0.6682282113269018, "val_micro_f1": 0.6884057971014492, "val_macro_f1": 0.5740925274512269, "epoch": 38, "n_parameters": 521298470}
40
+ {"train_lr": 3.993626177255165e-05, "train_min_lr": 2.2514906689690503e-07, "train_loss": 0.9672329560106183, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.0666694000915244, "val_loss": 0.7444745103518168, "val_acc1": 73.55072480353756, "val_acc5": 100.0, "val_uar": 0.6428991174589084, "val_war": 0.7355072463768116, "val_weighted_f1": 0.7182301224792913, "val_micro_f1": 0.7355072463768116, "val_macro_f1": 0.6550931045309681, "epoch": 39, "n_parameters": 521298470}
41
+ {"train_lr": 3.908897204203818e-05, "train_min_lr": 2.2037229301398845e-07, "train_loss": 0.9663065372057903, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.998907775790603, "val_loss": 0.7532677981588576, "val_acc1": 69.20289849543917, "val_acc5": 100.0, "val_uar": 0.6005815307914766, "val_war": 0.6920289855072463, "val_weighted_f1": 0.6707716630950138, "val_micro_f1": 0.6920289855072463, "val_macro_f1": 0.5868766677087952, "epoch": 40, "n_parameters": 521298470}
42
+ {"train_lr": 3.8229847140441e-05, "train_min_lr": 2.155287958673566e-07, "train_loss": 0.9391156873769231, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.064343092618165, "val_loss": 0.7106503364112642, "val_acc1": 74.27536256762518, "val_acc5": 100.0, "val_uar": 0.646196531509999, "val_war": 0.7427536231884058, "val_weighted_f1": 0.7230762789420995, "val_micro_f1": 0.7427536231884058, "val_macro_f1": 0.6477214943306826, "epoch": 41, "n_parameters": 521298470}
43
+ {"train_lr": 3.735982650822649e-05, "train_min_lr": 2.106238717500276e-07, "train_loss": 0.9705826839731063, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.810456059597157, "val_loss": 0.7676734427611033, "val_acc1": 69.20289824665457, "val_acc5": 100.0, "val_uar": 0.5777492636577388, "val_war": 0.6920289855072463, "val_weighted_f1": 0.6743095949617689, "val_micro_f1": 0.6920289855072463, "val_macro_f1": 0.5732290357290357, "epoch": 42, "n_parameters": 521298470}
44
+ {"train_lr": 3.6479861500182445e-05, "train_min_lr": 2.0566288412451039e-07, "train_loss": 0.9512237962565304, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.0502869575111955, "val_loss": 0.7386077112621732, "val_acc1": 72.46376795008563, "val_acc5": 100.0, "val_uar": 0.6186256453671314, "val_war": 0.7246376811594203, "val_weighted_f1": 0.7003987820781373, "val_micro_f1": 0.7246376811594203, "val_macro_f1": 0.6148478510803237, "epoch": 43, "n_parameters": 521298470}
45
+ {"train_lr": 3.5590914345125665e-05, "train_min_lr": 2.0065125775793707e-07, "train_loss": 0.947797429230478, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.800967816953306, "val_loss": 0.7630758815341525, "val_acc1": 71.01449297476506, "val_acc5": 100.0, "val_uar": 0.5931781446680827, "val_war": 0.7101449275362319, "val_weighted_f1": 0.6903000702599259, "val_micro_f1": 0.7101449275362319, "val_macro_f1": 0.6002688856400031, "epoch": 44, "n_parameters": 521298470}
46
+ {"train_lr": 3.469395709371879e-05, "train_min_lr": 1.9559447279015954e-07, "train_loss": 0.9651069942815804, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.001232829358843, "val_loss": 0.753802004787657, "val_acc1": 72.10144919243412, "val_acc5": 100.0, "val_uar": 0.5969254943471275, "val_war": 0.7210144927536232, "val_weighted_f1": 0.6929223156028214, "val_micro_f1": 0.7210144927536232, "val_macro_f1": 0.5909560909855364, "epoch": 45, "n_parameters": 521298470}
47
+ {"train_lr": 3.378997055554726e-05, "train_min_lr": 1.90498058741297e-07, "train_loss": 0.9419912379464985, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.967395153310564, "val_loss": 0.759678367111418, "val_acc1": 68.84057946136032, "val_acc5": 100.0, "val_uar": 0.5554634093188661, "val_war": 0.6884057971014492, "val_weighted_f1": 0.6615381207211228, "val_micro_f1": 0.6884057971014492, "val_macro_f1": 0.5483305368500687, "epoch": 46, "n_parameters": 521298470}
48
+ {"train_lr": 3.287994322661837e-05, "train_min_lr": 1.853675884652872e-07, "train_loss": 0.9715348964120135, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.714861571788788, "val_loss": 0.7816911107963986, "val_acc1": 68.4782600402832, "val_acc5": 100.0, "val_uar": 0.5719909576720722, "val_war": 0.6847826086956522, "val_weighted_f1": 0.669600938746374, "val_micro_f1": 0.6847826086956522, "val_macro_f1": 0.5579082209594596, "epoch": 47, "n_parameters": 521298470}
49
+ {"train_lr": 3.19648702084554e-05, "train_min_lr": 1.8020867205605208e-07, "train_loss": 0.9511304298116837, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.9771506587664285, "val_loss": 0.79132170147366, "val_acc1": 68.84057882557745, "val_acc5": 100.0, "val_uar": 0.5571889432377048, "val_war": 0.6884057971014492, "val_weighted_f1": 0.6662331508785128, "val_micro_f1": 0.6884057971014492, "val_macro_f1": 0.5415762855544152, "epoch": 48, "n_parameters": 521298470}
50
+ {"train_lr": 3.104575211996877e-05, "train_min_lr": 1.7502695071294276e-07, "train_loss": 0.9289562773925287, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.8525956670443215, "val_loss": 0.7298127346568637, "val_acc1": 73.18840571417324, "val_acc5": 100.0, "val_uar": 0.6603379456514131, "val_war": 0.7318840579710145, "val_weighted_f1": 0.7226308230259587, "val_micro_f1": 0.7318840579710145, "val_macro_f1": 0.6651447910987988, "epoch": 49, "n_parameters": 521298470}
51
+ {"train_lr": 3.0123594003293868e-05, "train_min_lr": 1.6982809057217058e-07, "train_loss": 0.953239667194861, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 3.0817465451028614, "val_loss": 0.6976142790582445, "val_acc1": 73.18840560360232, "val_acc5": 100.0, "val_uar": 0.6216512272771948, "val_war": 0.7318840579710145, "val_weighted_f1": 0.7092076175270943, "val_micro_f1": 0.7318840579710145, "val_macro_f1": 0.6192414671282656, "epoch": 50, "n_parameters": 521298470}
52
+ {"train_lr": 2.9199404224792273e-05, "train_min_lr": 1.646177765109706e-07, "train_loss": 0.9411865779095225, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.930837059462512, "val_loss": 0.7614129715495639, "val_acc1": 72.8260868459508, "val_acc5": 100.0, "val_uar": 0.6495362552251097, "val_war": 0.7282608695652174, "val_weighted_f1": 0.7122736778471868, "val_micro_f1": 0.7282608695652174, "val_macro_f1": 0.6340979793799958, "epoch": 51, "n_parameters": 521298470}
53
+ {"train_lr": 2.8274193372417983e-05, "train_min_lr": 1.5940170593127162e-07, "train_loss": 0.9457330701895702, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.777808478584996, "val_loss": 0.7783555355336931, "val_acc1": 72.10144938593325, "val_acc5": 100.0, "val_uar": 0.6386676064017396, "val_war": 0.7210144927536232, "val_weighted_f1": 0.7035482760980544, "val_micro_f1": 0.7210144927536232, "val_macro_f1": 0.6327960504114077, "epoch": 52, "n_parameters": 521298470}
54
+ {"train_lr": 2.7348973150654274e-05, "train_min_lr": 1.5418558252967114e-07, "train_loss": 0.9507617410502316, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.8130898740556507, "val_loss": 0.7141138911247253, "val_acc1": 72.46376844765483, "val_acc5": 100.0, "val_uar": 0.6403901102624012, "val_war": 0.7246376811594203, "val_weighted_f1": 0.7103616740609212, "val_micro_f1": 0.7246376811594203, "val_macro_f1": 0.6442921627174428, "epoch": 53, "n_parameters": 521298470}
55
+ {"train_lr": 2.642475527422965e-05, "train_min_lr": 1.4897511006052646e-07, "train_loss": 0.9416308605376585, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.521837009323968, "val_loss": 0.7422088285287222, "val_acc1": 73.18840621174246, "val_acc5": 100.0, "val_uar": 0.6498304962652873, "val_war": 0.7318840579710145, "val_weighted_f1": 0.7188388341680804, "val_micro_f1": 0.7318840579710145, "val_macro_f1": 0.6665092840973343, "epoch": 54, "n_parameters": 521298470}
56
+ {"train_lr": 2.550255036182278e-05, "train_min_lr": 1.4377598609898282e-07, "train_loss": 0.9204186994904353, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.654819510601185, "val_loss": 0.7452465660042233, "val_acc1": 74.63768129763396, "val_acc5": 100.0, "val_uar": 0.6320564878662789, "val_war": 0.7463768115942029, "val_weighted_f1": 0.7313184209160921, "val_micro_f1": 0.7463768115942029, "val_macro_f1": 0.6530538062421033, "epoch": 55, "n_parameters": 521298470}
57
+ {"train_lr": 2.458336683096571e-05, "train_min_lr": 1.3859389581075984e-07, "train_loss": 0.9545588003080568, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.6716523700290256, "val_loss": 0.7230949468082852, "val_acc1": 73.55072436125383, "val_acc5": 100.0, "val_uar": 0.6539030848966995, "val_war": 0.7355072463768116, "val_weighted_f1": 0.725615198479049, "val_micro_f1": 0.7355072463768116, "val_macro_f1": 0.6457286335053738, "epoch": 56, "n_parameters": 521298470}
58
+ {"train_lr": 2.3668209795354125e-05, "train_min_lr": 1.3343450573550484e-07, "train_loss": 0.9462198685900665, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.5871580419717013, "val_loss": 0.7194421092669169, "val_acc1": 75.00000027642734, "val_acc5": 100.0, "val_uar": 0.6449346550207617, "val_war": 0.75, "val_weighted_f1": 0.735993617958062, "val_micro_f1": 0.75, "val_macro_f1": 0.6516205682087133, "epoch": 57, "n_parameters": 521298470}
59
+ {"train_lr": 2.2758079965770233e-05, "train_min_lr": 1.283034575905157e-07, "train_loss": 0.9448238132912435, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.714968862356963, "val_loss": 0.7348209636078941, "val_acc1": 73.9130437546882, "val_acc5": 100.0, "val_uar": 0.6223993199854267, "val_war": 0.7391304347826086, "val_weighted_f1": 0.7230933191676279, "val_micro_f1": 0.7391304347826086, "val_macro_f1": 0.6488817824264844, "epoch": 58, "n_parameters": 521298470}
60
+ {"train_lr": 2.185397255582031e-05, "train_min_lr": 1.232063621016057e-07, "train_loss": 0.9282177331638924, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.802598904680323, "val_loss": 0.6938489741749234, "val_acc1": 74.99999977885813, "val_acc5": 100.0, "val_uar": 0.6679194114565631, "val_war": 0.75, "val_weighted_f1": 0.7349404852014819, "val_micro_f1": 0.75, "val_macro_f1": 0.6733720092760608, "epoch": 59, "n_parameters": 521298470}
61
+ {"train_lr": 2.095687619368318e-05, "train_min_lr": 1.1814879286785718e-07, "train_loss": 0.9395762746348794, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.8916202893963567, "val_loss": 0.7080055806371901, "val_acc1": 72.82608670773713, "val_acc5": 100.0, "val_uar": 0.647163896331853, "val_war": 0.7282608695652174, "val_weighted_f1": 0.7158009245693692, "val_micro_f1": 0.7282608695652174, "val_macro_f1": 0.6452922601932312, "epoch": 60, "n_parameters": 521298470}
62
+ {"train_lr": 2.0067771841059774e-05, "train_min_lr": 1.1313628026697275e-07, "train_loss": 0.9388818656220849, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.6902137487022966, "val_loss": 0.7590634855959151, "val_acc1": 72.4637684200121, "val_acc5": 100.0, "val_uar": 0.6409572199529628, "val_war": 0.7246376811594203, "val_weighted_f1": 0.7167114770022549, "val_micro_f1": 0.7246376811594203, "val_macro_f1": 0.6546318136175848, "epoch": 61, "n_parameters": 521298470}
63
+ {"train_lr": 1.9187631720506016e-05, "train_min_lr": 1.0817430540788846e-07, "train_loss": 0.9325309495131174, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.7078485201906273, "val_loss": 0.7130858666367001, "val_acc1": 73.91304389290188, "val_acc5": 100.0, "val_uar": 0.6294352061766922, "val_war": 0.7391304347826086, "val_weighted_f1": 0.7241753806557886, "val_micro_f1": 0.7391304347826086, "val_macro_f1": 0.6402596230998403, "epoch": 62, "n_parameters": 521298470}
64
+ {"train_lr": 1.831741825232162e-05, "train_min_lr": 1.0326829413726177e-07, "train_loss": 0.937471666637762, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.81843520976879, "val_loss": 0.7282592356204987, "val_acc1": 71.73913040713987, "val_acc5": 100.0, "val_uar": 0.6005835626433537, "val_war": 0.717391304347826, "val_weighted_f1": 0.6932284801928297, "val_micro_f1": 0.717391304347826, "val_macro_f1": 0.6208315389157921, "epoch": 63, "n_parameters": 521298470}
65
+ {"train_lr": 1.745808300215771e-05, "train_min_lr": 9.842361110638779e-08, "train_loss": 0.9265441879814054, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.770181077497977, "val_loss": 0.7151362597942352, "val_acc1": 73.55072447182475, "val_acc5": 100.0, "val_uar": 0.6589521403375893, "val_war": 0.7355072463768116, "val_weighted_f1": 0.7219278262242789, "val_micro_f1": 0.7355072463768116, "val_macro_f1": 0.6606851720079001, "epoch": 64, "n_parameters": 521298470}
66
+ {"train_lr": 1.6610565640493708e-05, "train_min_lr": 9.364555390503186e-08, "train_loss": 0.9474755355421408, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.810520108099337, "val_loss": 0.7296648936139213, "val_acc1": 74.63768154641856, "val_acc5": 100.0, "val_uar": 0.6473419674222692, "val_war": 0.7463768115942029, "val_weighted_f1": 0.7287640048039572, "val_micro_f1": 0.7463768115942029, "val_macro_f1": 0.6436689398077093, "epoch": 65, "n_parameters": 521298470}
67
+ {"train_lr": 1.5775792915121474e-05, "train_min_lr": 8.893934726859289e-08, "train_loss": 0.9336556116188014, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.934524650926943, "val_loss": 0.7008640600575341, "val_acc1": 76.81159467282502, "val_acc5": 100.0, "val_uar": 0.701904112793238, "val_war": 0.7681159420289855, "val_weighted_f1": 0.76116298264172, "val_micro_f1": 0.7681159420289855, "val_macro_f1": 0.7139193164300867, "epoch": 66, "n_parameters": 521298470}
68
+ {"train_lr": 1.495467763776019e-05, "train_min_lr": 8.431013736493209e-08, "train_loss": 0.9209144610313722, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.857867172470799, "val_loss": 0.682983923289511, "val_acc1": 76.44927558346072, "val_acc5": 100.0, "val_uar": 0.7065315668450344, "val_war": 0.7644927536231884, "val_weighted_f1": 0.7573857493270973, "val_micro_f1": 0.7644927536231884, "val_macro_f1": 0.7135352075584603, "epoch": 67, "n_parameters": 521298470}
69
+ {"train_lr": 1.414811768591015e-05, "train_min_lr": 7.976298616711364e-08, "train_loss": 0.9257876441618542, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.625664677884844, "val_loss": 0.7321646263202032, "val_acc1": 73.1884053548177, "val_acc5": 100.0, "val_uar": 0.6493824319385465, "val_war": 0.7318840579710145, "val_weighted_f1": 0.7225834864353858, "val_micro_f1": 0.7318840579710145, "val_macro_f1": 0.6527792724131974, "epoch": 68, "n_parameters": 521298470}
70
+ {"train_lr": 1.3356995021036906e-05, "train_min_lr": 7.53028659182118e-08, "train_loss": 0.9381741812383687, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.7915460312808, "val_loss": 0.7249732116858164, "val_acc1": 75.36231958693352, "val_acc5": 100.0, "val_uar": 0.6416353714583203, "val_war": 0.7536231884057971, "val_weighted_f1": 0.7398237279244054, "val_micro_f1": 0.7536231884057971, "val_macro_f1": 0.6618412005159838, "epoch": 69, "n_parameters": 521298470}
71
+ {"train_lr": 1.2582174724159377e-05, "train_min_lr": 7.093465369423596e-08, "train_loss": 0.9306778182034139, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.678037069461964, "val_loss": 0.7012683219379849, "val_acc1": 76.44927555581798, "val_acc5": 100.0, "val_uar": 0.6649362968974037, "val_war": 0.7644927536231884, "val_weighted_f1": 0.7446697032603263, "val_micro_f1": 0.7644927536231884, "val_macro_f1": 0.6714492953110524, "epoch": 70, "n_parameters": 521298470}
72
+ {"train_lr": 1.1824504049896369e-05, "train_min_lr": 6.66631260711195e-08, "train_loss": 0.9199396882344175, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.6339598187693842, "val_loss": 0.7656106054782867, "val_acc1": 71.3768111795619, "val_acc5": 100.0, "val_uar": 0.597663409539377, "val_war": 0.7137681159420289, "val_weighted_f1": 0.7010159468426302, "val_micro_f1": 0.7137681159420289, "val_macro_f1": 0.6040554971496759, "epoch": 71, "n_parameters": 521298470}
73
+ {"train_lr": 1.1084811500006201e-05, "train_min_lr": 6.249295390160443e-08, "train_loss": 0.9353087301607486, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.7190573701152094, "val_loss": 0.7278196778562334, "val_acc1": 72.82608739880548, "val_acc5": 100.0, "val_uar": 0.6288914042542139, "val_war": 0.7282608695652174, "val_weighted_f1": 0.7140225153336911, "val_micro_f1": 0.7282608695652174, "val_macro_f1": 0.6383761788579276, "epoch": 72, "n_parameters": 521298470}
74
+ {"train_lr": 1.0363905917432113e-05, "train_min_lr": 5.842869720773224e-08, "train_loss": 0.913518842815617, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.582886287459621, "val_loss": 0.715295371082094, "val_acc1": 75.36231875765151, "val_acc5": 100.0, "val_uar": 0.6778847702077192, "val_war": 0.7536231884057971, "val_weighted_f1": 0.7434675652127177, "val_micro_f1": 0.7536231884057971, "val_macro_f1": 0.6740664046027588, "epoch": 73, "n_parameters": 521298470}
75
+ {"train_lr": 9.662575601844422e-06, "train_min_lr": 5.447480019452686e-08, "train_loss": 0.934475909596608, "train_loss_scale": 15322.074074074075, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.64754561803959, "val_loss": 0.7156884769598643, "val_acc1": 74.27536234648332, "val_acc5": 100.0, "val_uar": 0.6621767148615058, "val_war": 0.7427536231884058, "val_weighted_f1": 0.7342698639709402, "val_micro_f1": 0.7427536231884058, "val_macro_f1": 0.6679923707023541, "epoch": 74, "n_parameters": 521298470}
76
+ {"train_lr": 8.981587447646398e-06, "train_min_lr": 5.0635586390321694e-08, "train_loss": 0.9278601126538383, "train_loss_scale": 14411.851851851852, "train_weight_decay": 0.049999999999999975, "train_grad_norm": Infinity, "val_loss": 0.7222756428851022, "val_acc1": 75.72463754294576, "val_acc5": 100.0, "val_uar": 0.6616971145899164, "val_war": 0.7572463768115942, "val_weighted_f1": 0.747218288782561, "val_micro_f1": 0.7572463768115942, "val_macro_f1": 0.6776428298620328, "epoch": 75, "n_parameters": 521298470}
77
+ {"train_lr": 8.321686105386488e-06, "train_min_lr": 4.6915253919044874e-08, "train_loss": 0.9289758554027404, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.8181671411902816, "val_loss": 0.7068250046836005, "val_acc1": 75.0000004975692, "val_acc5": 100.0, "val_uar": 0.6640800564030053, "val_war": 0.75, "val_weighted_f1": 0.7328740120205252, "val_micro_f1": 0.75, "val_macro_f1": 0.6630278365514828, "epoch": 76, "n_parameters": 521298470}
78
+ {"train_lr": 7.683593167493872e-06, "train_min_lr": 4.33178709096324e-08, "train_loss": 0.9127879559441849, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.684217823876275, "val_loss": 0.7168127761946784, "val_acc1": 74.6376814082049, "val_acc5": 100.0, "val_uar": 0.6769627603218934, "val_war": 0.7463768115942029, "val_weighted_f1": 0.7365497806542798, "val_micro_f1": 0.7463768115942029, "val_macro_f1": 0.6795275537920278, "epoch": 77, "n_parameters": 521298470}
79
+ {"train_lr": 7.06800637922776e-06, "train_min_lr": 3.984737104758879e-08, "train_loss": 0.925288239378988, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.6375040632707103, "val_loss": 0.71832508345445, "val_acc1": 76.4492753346761, "val_acc5": 100.0, "val_uar": 0.6992655714598439, "val_war": 0.7644927536231884, "val_weighted_f1": 0.7549742863539348, "val_micro_f1": 0.7644927536231884, "val_macro_f1": 0.7036647384537972, "epoch": 78, "n_parameters": 521298470}
80
+ {"train_lr": 6.475598875703166e-06, "train_min_lr": 3.6507549273559855e-08, "train_loss": 0.9284324668071888, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.6355173786481223, "val_loss": 0.7130679686864217, "val_acc1": 75.7246383998705, "val_acc5": 100.0, "val_uar": 0.6848636557227887, "val_war": 0.7572463768115942, "val_weighted_f1": 0.747132055923006, "val_micro_f1": 0.7572463768115942, "val_macro_f1": 0.6845300046389626, "epoch": 79, "n_parameters": 521298470}
81
+ {"train_lr": 5.907018445827544e-06, "train_min_lr": 3.3302057633620665e-08, "train_loss": 0.9057185248828229, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.681093136469523, "val_loss": 0.7017029556963179, "val_acc1": 75.72463792994402, "val_acc5": 100.0, "val_uar": 0.6859147093231847, "val_war": 0.7572463768115942, "val_weighted_f1": 0.748377245526844, "val_micro_f1": 0.7572463768115942, "val_macro_f1": 0.6913425831729308, "epoch": 80, "n_parameters": 521298470}
82
+ {"train_lr": 5.362886823953155e-06, "train_min_lr": 3.0234401285817e-08, "train_loss": 0.9338893908777355, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.7331567539109125, "val_loss": 0.748551199833552, "val_acc1": 73.55072494175123, "val_acc5": 100.0, "val_uar": 0.6255780661546915, "val_war": 0.7355072463768116, "val_weighted_f1": 0.7197223685739074, "val_micro_f1": 0.7355072463768116, "val_macro_f1": 0.6325231206918472, "epoch": 81, "n_parameters": 521298470}
83
+ {"train_lr": 4.843799010019669e-06, "train_min_lr": 2.730793466732627e-08, "train_loss": 0.9123445292129929, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.6364353188761958, "val_loss": 0.6989719751808379, "val_acc1": 74.99999964064446, "val_acc5": 100.0, "val_uar": 0.6882471916914642, "val_war": 0.75, "val_weighted_f1": 0.7399252530131608, "val_micro_f1": 0.75, "val_macro_f1": 0.6809576446488351, "epoch": 82, "n_parameters": 521298470}
84
+ {"train_lr": 4.350322618930438e-06, "train_min_lr": 2.452585782642986e-08, "train_loss": 0.9225385231368336, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.7571395481074297, "val_loss": 0.7171457525756624, "val_acc1": 75.36231881293698, "val_acc5": 100.0, "val_uar": 0.6598952748962424, "val_war": 0.7536231884057971, "val_weighted_f1": 0.7449443689586637, "val_micro_f1": 0.7536231884057971, "val_macro_f1": 0.6713631178999301, "epoch": 83, "n_parameters": 521298470}
85
+ {"train_lr": 3.882997259873975e-06, "train_min_lr": 2.1891212923307247e-08, "train_loss": 0.9185972526485537, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.6182411666269654, "val_loss": 0.711785732044114, "val_acc1": 73.91304367176001, "val_acc5": 100.0, "val_uar": 0.6601800620618344, "val_war": 0.7391304347826086, "val_weighted_f1": 0.7255762414449035, "val_micro_f1": 0.7391304347826086, "val_macro_f1": 0.6616354596299099, "epoch": 84, "n_parameters": 521298470}
86
+ {"train_lr": 3.4423339462692076e-06, "train_min_lr": 1.9406880903478552e-08, "train_loss": 0.9151097066976406, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.7943475113974676, "val_loss": 0.7554020649856992, "val_acc1": 73.18840618409972, "val_acc5": 100.0, "val_uar": 0.6429110098421243, "val_war": 0.7318840579710145, "val_weighted_f1": 0.7218735173160774, "val_micro_f1": 0.7318840579710145, "val_macro_f1": 0.6527183903196438, "epoch": 85, "n_parameters": 521298470}
87
+ {"train_lr": 3.0288145369798108e-06, "train_min_lr": 1.707557834753282e-08, "train_loss": 0.9224396606470332, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.60425748206951, "val_loss": 0.7174989283084869, "val_acc1": 74.27536276112433, "val_acc5": 100.0, "val_uar": 0.6822261294204018, "val_war": 0.7427536231884058, "val_weighted_f1": 0.7345801140209622, "val_micro_f1": 0.7427536231884058, "val_macro_f1": 0.6762694735521702, "epoch": 86, "n_parameters": 521298470}
88
+ {"train_lr": 2.642891209408609e-06, "train_min_lr": 1.4899854500587162e-08, "train_loss": 0.9061752509923628, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.5567262459684303, "val_loss": 0.6864592764112685, "val_acc1": 74.63768121470575, "val_acc5": 100.0, "val_uar": 0.6818518119021214, "val_war": 0.7463768115942029, "val_weighted_f1": 0.7393874567827361, "val_micro_f1": 0.7463768115942029, "val_macro_f1": 0.6830200645654989, "epoch": 87, "n_parameters": 521298470}
89
+ {"train_lr": 2.2849859650482317e-06, "train_min_lr": 1.288208848472456e-08, "train_loss": 0.9078121200939755, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.720206744141049, "val_loss": 0.6980082939068476, "val_acc1": 75.72463746001755, "val_acc5": 100.0, "val_uar": 0.7082576958931448, "val_war": 0.7572463768115942, "val_weighted_f1": 0.7503340952533646, "val_micro_f1": 0.7572463768115942, "val_macro_f1": 0.702498028996603, "epoch": 88, "n_parameters": 521298470}
90
+ {"train_lr": 1.955490168028677e-06, "train_min_lr": 1.1024486697458808e-08, "train_loss": 0.9074324500045658, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.386376566357083, "val_loss": 0.7330507636070251, "val_acc1": 73.18840579710145, "val_acc5": 100.0, "val_uar": 0.6685912298915394, "val_war": 0.7318840579710145, "val_weighted_f1": 0.7214104213351199, "val_micro_f1": 0.7318840579710145, "val_macro_f1": 0.6582201470648369, "epoch": 89, "n_parameters": 521298470}
91
+ {"train_lr": 1.654764117166376e-06, "train_min_lr": 9.329080399071253e-09, "train_loss": 0.9052412436332231, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.4155181734650224, "val_loss": 0.6908559103806814, "val_acc1": 74.27536237412605, "val_acc5": 100.0, "val_uar": 0.6823493723696897, "val_war": 0.7427536231884058, "val_weighted_f1": 0.7365101866677214, "val_micro_f1": 0.7427536231884058, "val_macro_f1": 0.6817576973158446, "epoch": 90, "n_parameters": 521298470}
92
+ {"train_lr": 1.3831366519827317e-06, "train_min_lr": 7.797723491457478e-09, "train_loss": 0.9143704048093454, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.5802001379154347, "val_loss": 0.710575133562088, "val_acc1": 74.27536140663037, "val_acc5": 100.0, "val_uar": 0.6850392154571721, "val_war": 0.7427536231884058, "val_weighted_f1": 0.73540940237903, "val_micro_f1": 0.7427536231884058, "val_macro_f1": 0.6825829503591051, "epoch": 91, "n_parameters": 521298470}
93
+ {"train_lr": 1.1409047931229285e-06, "train_min_lr": 6.432090490912797e-09, "train_loss": 0.9290232355763883, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.811795342851568, "val_loss": 0.7104923940367169, "val_acc1": 74.27536198712778, "val_acc5": 100.0, "val_uar": 0.6670960797315286, "val_war": 0.7427536231884058, "val_weighted_f1": 0.7319203453658254, "val_micro_f1": 0.7427536231884058, "val_macro_f1": 0.6635238297788086, "epoch": 92, "n_parameters": 521298470}
94
+ {"train_lr": 9.28333417568217e-07, "train_min_lr": 5.233674697073287e-09, "train_loss": 0.9032234211026886, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.6257630211335643, "val_loss": 0.6789559423923492, "val_acc1": 76.08695701930834, "val_acc5": 100.0, "val_uar": 0.7139653818833386, "val_war": 0.7608695652173914, "val_weighted_f1": 0.7546351948197834, "val_micro_f1": 0.7608695652173914, "val_macro_f1": 0.7162398950597076, "epoch": 93, "n_parameters": 521298470}
95
+ {"train_lr": 7.45654968996824e-07, "train_min_lr": 4.20378656001455e-09, "train_loss": 0.9103997772858466, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.6155710264488503, "val_loss": 0.7163366923729578, "val_acc1": 75.72463828929956, "val_acc5": 100.0, "val_uar": 0.697735270548738, "val_war": 0.7572463768115942, "val_weighted_f1": 0.7502756228046502, "val_micro_f1": 0.7572463768115942, "val_macro_f1": 0.7002076609293477, "epoch": 94, "n_parameters": 521298470}
96
+ {"train_lr": 5.930692036102055e-07, "train_min_lr": 3.343552247293794e-09, "train_loss": 0.9114293401807915, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.6974724067582025, "val_loss": 0.7211101767089632, "val_acc1": 73.91304414168648, "val_acc5": 100.0, "val_uar": 0.6365470765045067, "val_war": 0.7391304347826086, "val_weighted_f1": 0.7242926725634993, "val_micro_f1": 0.7391304347826086, "val_macro_f1": 0.6398838203802361, "epoch": 95, "n_parameters": 521298470}
97
+ {"train_lr": 4.707429717025698e-07, "train_min_lr": 2.6539124125021434e-09, "train_loss": 0.9152323068292053, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.527161503279651, "val_loss": 0.7064769582615958, "val_acc1": 75.36231939343439, "val_acc5": 100.0, "val_uar": 0.6938586281852536, "val_war": 0.7536231884057971, "val_weighted_f1": 0.7442527345947438, "val_micro_f1": 0.7536231884057971, "val_macro_f1": 0.6869936123046103, "epoch": 96, "n_parameters": 521298470}
98
+ {"train_lr": 3.78810035212532e-07, "train_min_lr": 2.1356211666737568e-09, "train_loss": 0.9171165088996475, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.4844865445737487, "val_loss": 0.6638030575381385, "val_acc1": 75.72463759823123, "val_acc5": 100.0, "val_uar": 0.6815678536218397, "val_war": 0.7572463768115942, "val_weighted_f1": 0.747085247234363, "val_micro_f1": 0.7572463768115942, "val_macro_f1": 0.6843504425965341, "epoch": 97, "n_parameters": 521298470}
99
+ {"train_lr": 3.1737092145639876e-07, "train_min_lr": 1.789245253676469e-09, "train_loss": 0.9216224379193636, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.612317125002543, "val_loss": 0.7300288842784034, "val_acc1": 73.91304422461468, "val_acc5": 100.0, "val_uar": 0.6306764204935644, "val_war": 0.7391304347826086, "val_weighted_f1": 0.7259251770285887, "val_micro_f1": 0.7391304347826086, "val_macro_f1": 0.6451444391204695, "epoch": 98, "n_parameters": 521298470}
100
+ {"train_lr": 2.864928132030214e-07, "train_min_lr": 1.6151634304857023e-09, "train_loss": 0.9112168987408097, "train_loss_scale": 8192.0, "train_weight_decay": 0.049999999999999975, "train_grad_norm": 2.6927982105149164, "val_loss": 0.7187059968709946, "val_acc1": 73.9130435059036, "val_acc5": 100.0, "val_uar": 0.6611017159440147, "val_war": 0.7391304347826086, "val_weighted_f1": 0.7323267110216495, "val_micro_f1": 0.7391304347826086, "val_macro_f1": 0.6719697010362018, "epoch": 99, "n_parameters": 521298470}
101
+ Evaluation on the test set using best epoch model:
102
+ {"Final Top-1 (best epoch)": 75.36231884057972, "Final Top-5 (best epoch)": 100.0}
103
+ Final UAR: 66.70%, Final WAR: 75.36%
104
+ Final Confusion Matrix:
105
+ [[11 4 8 4]
106
+ [ 0 84 12 1]
107
+ [ 2 13 89 8]
108
+ [ 0 2 14 24]]
109
+ Final Class Accuracies: ['40.74%', '86.60%', '79.46%', '60.00%']
110
+ Final Weighted F1: 0.7467, Final Micro F1: 0.7536, Final Macro F1: 0.6927
logs/AVF-MAE++_huge-MSP-IMPROV/eval_split05/log.txt ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_lr": 5.536417322834646e-06, "train_min_lr": 3.121271593438969e-08, "train_loss": 1.9567902360866272, "train_loss_scale": 3092.078431372549, "train_weight_decay": 0.04999999999999998, "train_grad_norm": NaN, "val_loss": 1.8269204795360565, "val_acc1": 28.306878566741943, "val_acc5": 100.0, "val_uar": 0.2648904995670346, "val_war": 0.2830687830687831, "val_weighted_f1": 0.27887922174822977, "val_micro_f1": 0.2830687830687831, "val_macro_f1": 0.2636814536220997, "epoch": 0, "n_parameters": 521298470}
2
+ {"train_lr": 1.6830708661417322e-05, "train_min_lr": 9.488665644054467e-08, "train_loss": 1.2985644334671544, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 4.130942003399718, "val_loss": 1.053411992887656, "val_acc1": 56.3492062886556, "val_acc5": 100.0, "val_uar": 0.467652465305974, "val_war": 0.5634920634920635, "val_weighted_f1": 0.5407690808130183, "val_micro_f1": 0.5634920634920635, "val_macro_f1": 0.45441267261336327, "epoch": 1, "n_parameters": 521298470}
3
+ {"train_lr": 2.8125000000000003e-05, "train_min_lr": 1.5856059694669964e-07, "train_loss": 1.1897284974849303, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.3422530912885478, "val_loss": 0.8478377933303515, "val_acc1": 68.5185185387021, "val_acc5": 100.0, "val_uar": 0.5855529888808398, "val_war": 0.6851851851851852, "val_weighted_f1": 0.6684948911745967, "val_micro_f1": 0.6851851851851852, "val_macro_f1": 0.5957521710643235, "epoch": 2, "n_parameters": 521298470}
4
+ {"train_lr": 3.9419291338582704e-05, "train_min_lr": 2.2223453745285467e-07, "train_loss": 1.1774134721631317, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.7507620722639796, "val_loss": 0.8251527945200602, "val_acc1": 69.04761995588031, "val_acc5": 100.0, "val_uar": 0.6068121695150203, "val_war": 0.6904761904761905, "val_weighted_f1": 0.6838134391829608, "val_micro_f1": 0.6904761904761905, "val_macro_f1": 0.6157013522871216, "epoch": 3, "n_parameters": 521298470}
5
+ {"train_lr": 5.071358267716536e-05, "train_min_lr": 2.859084779590097e-07, "train_loss": 1.1567465559719434, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.4589666852764056, "val_loss": 0.7589262475570043, "val_acc1": 74.338624500093, "val_acc5": 100.0, "val_uar": 0.6878590161868671, "val_war": 0.7433862433862434, "val_weighted_f1": 0.7393845140580594, "val_micro_f1": 0.7433862433862434, "val_macro_f1": 0.695406948727055, "epoch": 4, "n_parameters": 521298470}
6
+ {"train_lr": 5.6245048752361314e-05, "train_min_lr": 3.1709328019451135e-07, "train_loss": 1.1512269769229142, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.348419570455364, "val_loss": 0.7145254413286845, "val_acc1": 75.13227462768555, "val_acc5": 100.0, "val_uar": 0.7149627632357894, "val_war": 0.7513227513227513, "val_weighted_f1": 0.7515046491050875, "val_micro_f1": 0.7513227513227513, "val_macro_f1": 0.7226703413568425, "epoch": 5, "n_parameters": 521298470}
7
+ {"train_lr": 5.621475366821118e-05, "train_min_lr": 3.1692248529220416e-07, "train_loss": 1.1446141505163479, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.840496869648204, "val_loss": 0.6705204968651136, "val_acc1": 76.71957706269764, "val_acc5": 100.0, "val_uar": 0.6705803401033664, "val_war": 0.7671957671957672, "val_weighted_f1": 0.7555532826661254, "val_micro_f1": 0.7671957671957672, "val_macro_f1": 0.7013960938241289, "epoch": 6, "n_parameters": 521298470}
8
+ {"train_lr": 5.615389662220735e-05, "train_min_lr": 3.165793909084607e-07, "train_loss": 1.127790560909346, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.3573653627844418, "val_loss": 0.6764906433721384, "val_acc1": 77.51322737194243, "val_acc5": 100.0, "val_uar": 0.737277959613486, "val_war": 0.7751322751322751, "val_weighted_f1": 0.7774670095831799, "val_micro_f1": 0.7751322751322751, "val_macro_f1": 0.7463619994216333, "epoch": 7, "n_parameters": 521298470}
9
+ {"train_lr": 5.606254416063613e-05, "train_min_lr": 3.160643722119527e-07, "train_loss": 1.1162285109361012, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.1842482206868192, "val_loss": 0.6742485910654068, "val_acc1": 78.57142838977632, "val_acc5": 100.0, "val_uar": 0.7624696064169748, "val_war": 0.7857142857142857, "val_weighted_f1": 0.7874815304745165, "val_micro_f1": 0.7857142857142857, "val_macro_f1": 0.762664416856908, "epoch": 8, "n_parameters": 521298470}
10
+ {"train_lr": 5.5940796176073e-05, "train_min_lr": 3.1537799236806353e-07, "train_loss": 1.1075815385073617, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.5981021301419127, "val_loss": 0.66977126399676, "val_acc1": 76.98412668137323, "val_acc5": 100.0, "val_uar": 0.7159636933485618, "val_war": 0.7698412698412699, "val_weighted_f1": 0.7682181450944598, "val_micro_f1": 0.7698412698412699, "val_macro_f1": 0.7275268770977444, "epoch": 9, "n_parameters": 521298470}
11
+ {"train_lr": 5.578878579815155e-05, "train_min_lr": 3.145210019230746e-07, "train_loss": 1.0907557306725995, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.4422995693543377, "val_loss": 0.6603279821574688, "val_acc1": 76.98412758963448, "val_acc5": 100.0, "val_uar": 0.7508524139116244, "val_war": 0.7698412698412699, "val_weighted_f1": 0.7691931835924377, "val_micro_f1": 0.7698412698412699, "val_macro_f1": 0.749902782923233, "epoch": 10, "n_parameters": 521298470}
12
+ {"train_lr": 5.560667924798835e-05, "train_min_lr": 3.134943379834556e-07, "train_loss": 1.0997718112920625, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.435142930816202, "val_loss": 0.6625905918578306, "val_acc1": 76.45502671741303, "val_acc5": 100.0, "val_uar": 0.7250008830107514, "val_war": 0.7645502645502645, "val_weighted_f1": 0.7645652963658793, "val_micro_f1": 0.7645502645502645, "val_macro_f1": 0.7346248169099181, "epoch": 11, "n_parameters": 521298470}
13
+ {"train_lr": 5.539467565642228e-05, "train_min_lr": 3.122991231911512e-07, "train_loss": 1.0929007407496958, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.058861451990464, "val_loss": 0.6870799623429775, "val_acc1": 78.83597964332218, "val_acc5": 100.0, "val_uar": 0.792181767428478, "val_war": 0.7883597883597884, "val_weighted_f1": 0.7896270283063256, "val_micro_f1": 0.7883597883597884, "val_macro_f1": 0.7772286848082653, "epoch": 12, "n_parameters": 521298470}
14
+ {"train_lr": 5.5153006846268126e-05, "train_min_lr": 3.10936664495989e-07, "train_loss": 1.0592138712702235, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.948043040200776, "val_loss": 0.6161794289946556, "val_acc1": 79.10052980695453, "val_acc5": 100.0, "val_uar": 0.7929519000736105, "val_war": 0.791005291005291, "val_weighted_f1": 0.7939049580617871, "val_micro_f1": 0.791005291005291, "val_macro_f1": 0.7789718438134514, "epoch": 13, "n_parameters": 521298470}
15
+ {"train_lr": 5.48819370788216e-05, "train_min_lr": 3.094084517265481e-07, "train_loss": 1.0788753718332527, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.433169963313084, "val_loss": 0.6553262745340666, "val_acc1": 75.13227571759906, "val_acc5": 100.0, "val_uar": 0.7334371421706948, "val_war": 0.7513227513227513, "val_weighted_f1": 0.7450789570803718, "val_micro_f1": 0.7513227513227513, "val_macro_f1": 0.7202201429981906, "epoch": 14, "n_parameters": 521298470}
16
+ {"train_lr": 5.458176276489367e-05, "train_min_lr": 3.077161559610556e-07, "train_loss": 1.0642814527150073, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.51833205129586, "val_loss": 0.622744665791591, "val_acc1": 81.4814825512114, "val_acc5": 100.0, "val_uar": 0.8180222370628072, "val_war": 0.8148148148148148, "val_weighted_f1": 0.8112061088107334, "val_micro_f1": 0.8148148148148148, "val_macro_f1": 0.8064557653719537, "epoch": 15, "n_parameters": 521298470}
17
+ {"train_lr": 5.425281214068948e-05, "train_min_lr": 3.058616277000846e-07, "train_loss": 1.065323784265643, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.1432954259947237, "val_loss": 0.6476344391703606, "val_acc1": 79.62962977091472, "val_acc5": 100.0, "val_uar": 0.7889856926699033, "val_war": 0.7962962962962963, "val_weighted_f1": 0.796360475910841, "val_micro_f1": 0.7962962962962963, "val_macro_f1": 0.7798110988345569, "epoch": 16, "n_parameters": 521298470}
18
+ {"train_lr": 5.389544490888697e-05, "train_min_lr": 3.0384689484306075e-07, "train_loss": 1.0631489843324897, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.0985245634527767, "val_loss": 0.615086168050766, "val_acc1": 78.57142984299432, "val_acc5": 100.0, "val_uar": 0.7945764907607012, "val_war": 0.7857142857142857, "val_weighted_f1": 0.7869759438788078, "val_micro_f1": 0.7857142857142857, "val_macro_f1": 0.7790559399236984, "epoch": 17, "n_parameters": 521298470}
19
+ {"train_lr": 5.351005184530733e-05, "train_min_lr": 3.0167416047078327e-07, "train_loss": 1.0704526324677313, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.0511337939430687, "val_loss": 0.613317838559548, "val_acc1": 78.83597873506092, "val_acc5": 100.0, "val_uar": 0.7513971585188691, "val_war": 0.7883597883597884, "val_weighted_f1": 0.7889745016625824, "val_micro_f1": 0.7883597883597884, "val_macro_f1": 0.7664932408565615, "epoch": 18, "n_parameters": 521298470}
20
+ {"train_lr": 5.309705437160725e-05, "train_min_lr": 2.993458004363883e-07, "train_loss": 1.041405499565835, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.1577259793001065, "val_loss": 0.6224324243764082, "val_acc1": 77.51322846185593, "val_acc5": 100.0, "val_uar": 0.7452310733451084, "val_war": 0.7751322751322751, "val_weighted_f1": 0.7762759130820878, "val_micro_f1": 0.7751322751322751, "val_macro_f1": 0.748378272315491, "epoch": 19, "n_parameters": 521298470}
21
+ {"train_lr": 5.2656904094460706e-05, "train_min_lr": 2.9686436076738873e-07, "train_loss": 1.045956083177741, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.2595342771679747, "val_loss": 0.6403171407679716, "val_acc1": 77.51322828020368, "val_acc5": 100.0, "val_uar": 0.7422736780357395, "val_war": 0.7751322751322751, "val_weighted_f1": 0.7740841580236385, "val_micro_f1": 0.7751322751322751, "val_macro_f1": 0.7463280305083498, "epoch": 20, "n_parameters": 521298470}
22
+ {"train_lr": 5.219008231173354e-05, "train_min_lr": 2.942325548816307e-07, "train_loss": 1.034304988150503, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.323929786682129, "val_loss": 0.6699706129729748, "val_acc1": 74.8677251906622, "val_acc5": 100.0, "val_uar": 0.708652829261382, "val_war": 0.7486772486772487, "val_weighted_f1": 0.7380847972174048, "val_micro_f1": 0.7486772486772487, "val_macro_f1": 0.7089670628341785, "epoch": 21, "n_parameters": 521298470}
23
+ {"train_lr": 5.169709948619132e-05, "train_min_lr": 2.9145326062021033e-07, "train_loss": 1.030392208130531, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.112675203996546, "val_loss": 0.6366741073628267, "val_acc1": 76.98412831624348, "val_acc5": 100.0, "val_uar": 0.7090160075467093, "val_war": 0.7698412698412699, "val_weighted_f1": 0.7625414326923894, "val_micro_f1": 0.7698412698412699, "val_macro_f1": 0.7194975181852067, "epoch": 22, "n_parameters": 521298470}
24
+ {"train_lr": 5.1178494687315915e-05, "train_min_lr": 2.8852951710059744e-07, "train_loss": 1.0294817878919489, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.420085397421145, "val_loss": 0.6588014662265778, "val_acc1": 77.24867757161458, "val_acc5": 100.0, "val_uar": 0.7459801683047298, "val_war": 0.7724867724867724, "val_weighted_f1": 0.7701886738990771, "val_micro_f1": 0.7724867724867724, "val_macro_f1": 0.7483587891126763, "epoch": 23, "n_parameters": 521298470}
25
+ {"train_lr": 5.063483500184073e-05, "train_min_lr": 2.8546452139340483e-07, "train_loss": 1.026691773748086, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.0913633832744525, "val_loss": 0.6223829773565134, "val_acc1": 78.30687840779622, "val_acc5": 100.0, "val_uar": 0.7780698685249123, "val_war": 0.783068783068783, "val_weighted_f1": 0.7770677891497697, "val_micro_f1": 0.783068783068783, "val_macro_f1": 0.7680042534855454, "epoch": 24, "n_parameters": 521298470}
26
+ {"train_lr": 5.006671491365001e-05, "train_min_lr": 2.822616250264383e-07, "train_loss": 1.030420501637303, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.0569630160051235, "val_loss": 0.6867688534160455, "val_acc1": 74.8677251906622, "val_acc5": 100.0, "val_uar": 0.7494007407878022, "val_war": 0.7486772486772487, "val_weighted_f1": 0.7512510904539337, "val_micro_f1": 0.7486772486772487, "val_macro_f1": 0.734434094552363, "epoch": 25, "n_parameters": 521298470}
27
+ {"train_lr": 4.947475565371907e-05, "train_min_lr": 2.7892433031985075e-07, "train_loss": 1.0218522492187474, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.626756857423221, "val_loss": 0.6075700086851915, "val_acc1": 76.45502708071754, "val_acc5": 100.0, "val_uar": 0.7244384940108624, "val_war": 0.7645502645502645, "val_weighted_f1": 0.7635422604426773, "val_micro_f1": 0.7645502645502645, "val_macro_f1": 0.735735165300429, "epoch": 26, "n_parameters": 521298470}
28
+ {"train_lr": 4.885960452080762e-05, "train_min_lr": 2.754562865564062e-07, "train_loss": 1.0131002966484992, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.5235618259392534, "val_loss": 0.6421174642940363, "val_acc1": 76.45502744402204, "val_acc5": 100.0, "val_uar": 0.7634572743289849, "val_war": 0.7645502645502645, "val_weighted_f1": 0.7563840645135759, "val_micro_f1": 0.7645502645502645, "val_macro_f1": 0.746738786142175, "epoch": 27, "n_parameters": 521298470}
29
+ {"train_lr": 4.822193417364793e-05, "train_min_lr": 2.718612859910426e-07, "train_loss": 1.0027527596825867, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.0853130046059105, "val_loss": 0.6033467861513296, "val_acc1": 78.30687913440522, "val_acc5": 100.0, "val_uar": 0.7521507567724672, "val_war": 0.783068783068783, "val_weighted_f1": 0.7779060372880926, "val_micro_f1": 0.783068783068783, "val_macro_f1": 0.7547303663037036, "epoch": 28, "n_parameters": 521298470}
30
+ {"train_lr": 4.756244189540243e-05, "train_min_lr": 2.6814325970409697e-07, "train_loss": 1.0309634738498263, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.6862074954836976, "val_loss": 0.6025091074407101, "val_acc1": 78.57142966134208, "val_acc5": 100.0, "val_uar": 0.7655321785913891, "val_war": 0.7857142857142857, "val_weighted_f1": 0.782728828577562, "val_micro_f1": 0.7857142857142857, "val_macro_f1": 0.766966916770918, "epoch": 29, "n_parameters": 521298470}
31
+ {"train_lr": 4.688184883119482e-05, "train_min_lr": 2.643062733027266e-07, "train_loss": 0.9951020638927136, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.0692963857276765, "val_loss": 0.6155529034634432, "val_acc1": 78.30687786283947, "val_acc5": 100.0, "val_uar": 0.7486726317866669, "val_war": 0.783068783068783, "val_weighted_f1": 0.7836161934314377, "val_micro_f1": 0.783068783068783, "val_macro_f1": 0.7549351423236653, "epoch": 30, "n_parameters": 521298470}
32
+ {"train_lr": 4.618089919954843e-05, "train_min_lr": 2.603545224752272e-07, "train_loss": 0.996624217898238, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.9875996743931488, "val_loss": 0.6202165124317011, "val_acc1": 78.04232842581612, "val_acc5": 100.0, "val_uar": 0.7928966854295803, "val_war": 0.7804232804232805, "val_weighted_f1": 0.7771765342474627, "val_micro_f1": 0.7804232804232805, "val_macro_f1": 0.7685349963512013, "epoch": 31, "n_parameters": 521298470}
33
+ {"train_lr": 4.546035947859425e-05, "train_min_lr": 2.562923284031098e-07, "train_loss": 1.0027313680430643, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.0982360559351303, "val_loss": 0.6461385041475296, "val_acc1": 75.9259262084961, "val_acc5": 100.0, "val_uar": 0.7264304664633613, "val_war": 0.7592592592592593, "val_weighted_f1": 0.7520644019282499, "val_micro_f1": 0.7592592592592593, "val_macro_f1": 0.7182288328488289, "epoch": 32, "n_parameters": 521298470}
34
+ {"train_lr": 4.4721017567938275e-05, "train_min_lr": 2.5212413303595154e-07, "train_loss": 0.9963228721049876, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.957087949210522, "val_loss": 0.6407641656696796, "val_acc1": 78.83597982497443, "val_acc5": 100.0, "val_uar": 0.7522986238501589, "val_war": 0.7883597883597884, "val_weighted_f1": 0.7871031765431215, "val_micro_f1": 0.7883597883597884, "val_macro_f1": 0.7569120755933179, "epoch": 33, "n_parameters": 521298470}
35
+ {"train_lr": 4.39636819271049e-05, "train_min_lr": 2.478544942341897e-07, "train_loss": 0.9846237812166899, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.9339166739407707, "val_loss": 0.6539570800960064, "val_acc1": 78.04232933407738, "val_acc5": 100.0, "val_uar": 0.7752099565476758, "val_war": 0.7804232804232805, "val_weighted_f1": 0.7806950849094414, "val_micro_f1": 0.7804232804232805, "val_macro_f1": 0.759729294162444, "epoch": 34, "n_parameters": 521298470}
36
+ {"train_lr": 4.31891806914983e-05, "train_min_lr": 2.434880807851677e-07, "train_loss": 0.9957627768999611, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.925706255669687, "val_loss": 0.6295989652474722, "val_acc1": 76.4550265357608, "val_acc5": 100.0, "val_uar": 0.7471953978039504, "val_war": 0.7645502645502645, "val_weighted_f1": 0.7578747655144684, "val_micro_f1": 0.7645502645502645, "val_macro_f1": 0.7336810986248445, "epoch": 35, "n_parameters": 521298470}
37
+ {"train_lr": 4.239836076684844e-05, "train_min_lr": 2.3902966729788513e-07, "train_loss": 0.9799091080629748, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.7951176330155016, "val_loss": 0.6889490187168121, "val_acc1": 74.338625226702, "val_acc5": 100.0, "val_uar": 0.7214751592218698, "val_war": 0.7433862433862434, "val_weighted_f1": 0.7384577399870214, "val_micro_f1": 0.7433862433862434, "val_macro_f1": 0.7111588716465653, "epoch": 36, "n_parameters": 521298470}
38
+ {"train_lr": 4.1592086903132284e-05, "train_min_lr": 2.344841289820325e-07, "train_loss": 0.9872323589192497, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.1650940296696684, "val_loss": 0.6480977100630602, "val_acc1": 73.80952435448056, "val_acc5": 100.0, "val_uar": 0.6950967711219904, "val_war": 0.7380952380952381, "val_weighted_f1": 0.7333501934612349, "val_micro_f1": 0.7380952380952381, "val_macro_f1": 0.7011370729026617, "epoch": 37, "n_parameters": 521298470}
39
+ {"train_lr": 4.07712407489822e-05, "train_min_lr": 2.2985643631702136e-07, "train_loss": 0.9826707832174364, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.024927534309088, "val_loss": 0.6326876059174538, "val_acc1": 77.51322773524693, "val_acc5": 100.0, "val_uar": 0.7354859228653088, "val_war": 0.7751322751322751, "val_weighted_f1": 0.7690058212741531, "val_micro_f1": 0.7751322751322751, "val_macro_f1": 0.745908998901523, "epoch": 38, "n_parameters": 521298470}
40
+ {"train_lr": 3.993671988761623e-05, "train_min_lr": 2.251516496168376e-07, "train_loss": 0.9909672057316974, "train_loss_scale": 3493.6470588235293, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.884530979044297, "val_loss": 0.6844578497111797, "val_acc1": 74.07407397315616, "val_acc5": 100.0, "val_uar": 0.6970691236151763, "val_war": 0.7407407407407407, "val_weighted_f1": 0.7347107304859471, "val_micro_f1": 0.7407407407407407, "val_macro_f1": 0.6989538757350593, "epoch": 39, "n_parameters": 521298470}
41
+ {"train_lr": 3.908943685534389e-05, "train_min_lr": 2.203749134966631e-07, "train_loss": 0.9716926269671496, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.0439370192733466, "val_loss": 0.6419738506277403, "val_acc1": 76.4550272623698, "val_acc5": 100.0, "val_uar": 0.7099227820993173, "val_war": 0.7645502645502645, "val_weighted_f1": 0.7621847156159813, "val_micro_f1": 0.7645502645502645, "val_macro_f1": 0.7182855034154036, "epoch": 40, "n_parameters": 521298470}
42
+ {"train_lr": 3.823031814372137e-05, "train_min_lr": 2.1553145124731387e-07, "train_loss": 0.977823326400682, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.82173224290212, "val_loss": 0.6584663366278013, "val_acc1": 74.86772500900994, "val_acc5": 100.0, "val_uar": 0.6967919296428068, "val_war": 0.7486772486772487, "val_weighted_f1": 0.7416720514635389, "val_micro_f1": 0.7486772486772487, "val_macro_f1": 0.6973423990287666, "epoch": 41, "n_parameters": 521298470}
43
+ {"train_lr": 3.736030318644632e-05, "train_min_lr": 2.1062655912364843e-07, "train_loss": 0.9546029822888717, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.9222635848849428, "val_loss": 0.6256709607938925, "val_acc1": 76.71957724434989, "val_acc5": 100.0, "val_uar": 0.7392290989549762, "val_war": 0.7671957671957672, "val_weighted_f1": 0.7618461249930185, "val_micro_f1": 0.7671957671957672, "val_macro_f1": 0.742437208977931, "epoch": 42, "n_parameters": 521298470}
44
+ {"train_lr": 3.648034333210112e-05, "train_min_lr": 2.0566560055319138e-07, "train_loss": 0.9763452182797825, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.0200930132585415, "val_loss": 0.6725559942424297, "val_acc1": 76.19047691708519, "val_acc5": 100.0, "val_uar": 0.7145522595906368, "val_war": 0.7619047619047619, "val_weighted_f1": 0.7556005294631585, "val_micro_f1": 0.7619047619047619, "val_macro_f1": 0.711618351521273, "epoch": 43, "n_parameters": 521298470}
45
+ {"train_lr": 3.5591400803867063e-05, "train_min_lr": 2.0065400027130337e-07, "train_loss": 0.9530029719561534, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.9938587441163906, "val_loss": 0.6237391692896684, "val_acc1": 78.04232842581612, "val_acc5": 100.0, "val_uar": 0.7479553996604436, "val_war": 0.7804232804232805, "val_weighted_f1": 0.7753681435650954, "val_micro_f1": 0.7804232804232805, "val_macro_f1": 0.7542602256444129, "epoch": 44, "n_parameters": 521298470}
46
+ {"train_lr": 3.469444764734741e-05, "train_min_lr": 1.9559723838931282e-07, "train_loss": 0.9563277854638941, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.891726582658057, "val_loss": 0.6451697126030922, "val_acc1": 75.9259265718006, "val_acc5": 100.0, "val_uar": 0.729163512535223, "val_war": 0.7592592592592593, "val_weighted_f1": 0.7532655387815539, "val_micro_f1": 0.7592592592592593, "val_macro_f1": 0.7290517026089237, "epoch": 45, "n_parameters": 521298470}
47
+ {"train_lr": 3.379046466764991e-05, "train_min_lr": 1.905008444020954e-07, "train_loss": 0.9548826607224209, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 3.1003878093233297, "val_loss": 0.6044729687273502, "val_acc1": 75.92592675345284, "val_acc5": 100.0, "val_uar": 0.7241051102893208, "val_war": 0.7592592592592593, "val_weighted_f1": 0.7555492629980423, "val_micro_f1": 0.7592592592592593, "val_macro_f1": 0.7335930876773674, "epoch": 46, "n_parameters": 521298470}
48
+ {"train_lr": 3.288044035689072e-05, "train_min_lr": 1.8537039114165132e-07, "train_loss": 0.9603816081885419, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.627800660974839, "val_loss": 0.6312381575504938, "val_acc1": 74.86772573561896, "val_acc5": 100.0, "val_uar": 0.6987737922003274, "val_war": 0.7486772486772487, "val_weighted_f1": 0.7465749240813233, "val_micro_f1": 0.7486772486772487, "val_macro_f1": 0.708350813682459, "epoch": 47, "n_parameters": 521298470}
49
+ {"train_lr": 3.19653698132928e-05, "train_min_lr": 1.8021148868329666e-07, "train_loss": 0.9502759558313033, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.9641253270354926, "val_loss": 0.6396673731505871, "val_acc1": 76.71957760765439, "val_acc5": 100.0, "val_uar": 0.7532517062615746, "val_war": 0.7671957671957672, "val_weighted_f1": 0.7664197410672674, "val_micro_f1": 0.7671957671957672, "val_macro_f1": 0.7482849877544978, "epoch": 48, "n_parameters": 521298470}
50
+ {"train_lr": 3.1046253653060647e-05, "train_min_lr": 1.750297782111272e-07, "train_loss": 0.9477233991903418, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.8334314893273747, "val_loss": 0.7099362115065256, "val_acc1": 71.95767211914062, "val_acc5": 100.0, "val_uar": 0.6442885647490911, "val_war": 0.7195767195767195, "val_weighted_f1": 0.7047899100983842, "val_micro_f1": 0.7195767195767195, "val_macro_f1": 0.6460020715629583, "epoch": 49, "n_parameters": 521298470}
51
+ {"train_lr": 3.012409691622117e-05, "train_min_lr": 1.6983092584946713e-07, "train_loss": 0.9736311711322249, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.8415184371611653, "val_loss": 0.6781611951688925, "val_acc1": 74.33862468174526, "val_acc5": 100.0, "val_uar": 0.7280543940302712, "val_war": 0.7433862433862434, "val_weighted_f1": 0.7410570649351683, "val_micro_f1": 0.7433862433862434, "val_macro_f1": 0.7254729927098243, "epoch": 50, "n_parameters": 521298470}
52
+ {"train_lr": 2.9199907967627092e-05, "train_min_lr": 1.6462061646704509e-07, "train_loss": 0.9593143708565656, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.8083710273106894, "val_loss": 0.6708520936469237, "val_acc1": 74.07407451811291, "val_acc5": 100.0, "val_uar": 0.6965595444597638, "val_war": 0.7407407407407407, "val_weighted_f1": 0.7329727195010401, "val_micro_f1": 0.7407407407407407, "val_macro_f1": 0.7035213297347572, "epoch": 51, "n_parameters": 521298470}
53
+ {"train_lr": 2.8274697394324934e-05, "train_min_lr": 1.5940454746067394e-07, "train_loss": 0.9301503097114999, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.708096880538791, "val_loss": 0.6571227200329304, "val_acc1": 74.86772500900994, "val_acc5": 100.0, "val_uar": 0.7164410672579533, "val_war": 0.7486772486772487, "val_weighted_f1": 0.7478604337982194, "val_micro_f1": 0.7486772486772487, "val_macro_f1": 0.7219217014388466, "epoch": 52, "n_parameters": 521298470}
54
+ {"train_lr": 2.734947690049276e-05, "train_min_lr": 1.5418842252523044e-07, "train_loss": 0.9593515472084868, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.5877160091026155, "val_loss": 0.6605998799204826, "val_acc1": 73.28042366391136, "val_acc5": 100.0, "val_uar": 0.6883962859346632, "val_war": 0.7328042328042328, "val_weighted_f1": 0.7264932405082125, "val_micro_f1": 0.7328042328042328, "val_macro_f1": 0.690468939369116, "epoch": 53, "n_parameters": 521298470}
55
+ {"train_lr": 2.6425258201156672e-05, "train_min_lr": 1.4897794541674922e-07, "train_loss": 0.9388509366442176, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.856445487807779, "val_loss": 0.6729538589715958, "val_acc1": 75.92592711675735, "val_acc5": 100.0, "val_uar": 0.7063053789534053, "val_war": 0.7592592592592593, "val_weighted_f1": 0.7533710952660397, "val_micro_f1": 0.7592592592592593, "val_macro_f1": 0.7170179715569308, "epoch": 54, "n_parameters": 521298470}
56
+ {"train_lr": 2.5503051915895116e-05, "train_min_lr": 1.437788137154489e-07, "train_loss": 0.9461421028656118, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.981586678355348, "val_loss": 0.6590189014871916, "val_acc1": 76.19047673543294, "val_acc5": 100.0, "val_uar": 0.7232102471521331, "val_war": 0.7619047619047619, "val_weighted_f1": 0.7585108874326408, "val_micro_f1": 0.7619047619047619, "val_macro_f1": 0.732055859959975, "epoch": 55, "n_parameters": 521298470}
57
+ {"train_lr": 2.458386646374134e-05, "train_min_lr": 1.3859671259551204e-07, "train_loss": 0.9551350390599445, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.8176631483377195, "val_loss": 0.671974046776692, "val_acc1": 74.60317484537761, "val_acc5": 100.0, "val_uar": 0.7006070764842695, "val_war": 0.746031746031746, "val_weighted_f1": 0.739925601886463, "val_micro_f1": 0.746031746031746, "val_macro_f1": 0.7007077454341564, "epoch": 56, "n_parameters": 521298470}
58
+ {"train_lr": 2.3668706960491932e-05, "train_min_lr": 1.334373086084304e-07, "train_loss": 0.9315904664253097, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.7336897966908476, "val_loss": 0.6453677999476591, "val_acc1": 75.66137640816825, "val_acc5": 100.0, "val_uar": 0.7233994715080242, "val_war": 0.7566137566137566, "val_weighted_f1": 0.7523502866824904, "val_micro_f1": 0.7566137566137566, "val_macro_f1": 0.7219211403665606, "epoch": 57, "n_parameters": 521298470}
59
+ {"train_lr": 2.275857411962744e-05, "train_min_lr": 1.2830624348671418e-07, "train_loss": 0.929509144398122, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.6946246927859736, "val_loss": 0.6209470964968204, "val_acc1": 76.71957724434989, "val_acc5": 100.0, "val_uar": 0.7522931511308705, "val_war": 0.7671957671957672, "val_weighted_f1": 0.7606311084810778, "val_micro_f1": 0.7671957671957672, "val_macro_f1": 0.7417333026531008, "epoch": 58, "n_parameters": 521298470}
60
+ {"train_lr": 2.1854463158046933e-05, "train_min_lr": 1.232091279747406e-07, "train_loss": 0.9468124941283581, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.879706202768812, "val_loss": 0.6566163599491119, "val_acc1": 76.98412758963448, "val_acc5": 100.0, "val_uar": 0.7206470482895921, "val_war": 0.7698412698412699, "val_weighted_f1": 0.7626930034987323, "val_micro_f1": 0.7698412698412699, "val_macro_f1": 0.72486521113638, "epoch": 59, "n_parameters": 521298470}
61
+ {"train_lr": 2.0957362707812887e-05, "train_min_lr": 1.1815153569348674e-07, "train_loss": 0.9515979296826069, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.9619977918325686, "val_loss": 0.6349686533212662, "val_acc1": 76.19047691708519, "val_acc5": 100.0, "val_uar": 0.7040955712172817, "val_war": 0.7619047619047619, "val_weighted_f1": 0.7558068011106573, "val_micro_f1": 0.7619047619047619, "val_macro_f1": 0.7166742374715994, "epoch": 60, "n_parameters": 521298470}
62
+ {"train_lr": 2.0068253735096514e-05, "train_min_lr": 1.1313899704585739e-07, "train_loss": 0.9386782259528154, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.7655771737005197, "val_loss": 0.6522469694415728, "val_acc1": 75.1322751726423, "val_acc5": 100.0, "val_uar": 0.7187773303288654, "val_war": 0.7513227513227513, "val_weighted_f1": 0.7462689938091965, "val_micro_f1": 0.7513227513227513, "val_macro_f1": 0.7188291493654881, "epoch": 61, "n_parameters": 521298470}
63
+ {"train_lr": 1.9188108467505722e-05, "train_min_lr": 1.0817699316927043e-07, "train_loss": 0.9407068340996512, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.7214167959549846, "val_loss": 0.6451577097177505, "val_acc1": 78.30687858944847, "val_acc5": 100.0, "val_uar": 0.7359114289706395, "val_war": 0.783068783068783, "val_weighted_f1": 0.7771359993786078, "val_micro_f1": 0.783068783068783, "val_macro_f1": 0.7458722345033146, "epoch": 62, "n_parameters": 521298470}
64
+ {"train_lr": 1.831788933096846e-05, "train_min_lr": 1.0327094994211341e-07, "train_loss": 0.9335571370678011, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.770344678093405, "val_loss": 0.6405995711684227, "val_acc1": 77.77777753557477, "val_acc5": 100.0, "val_uar": 0.7469540265209126, "val_war": 0.7777777777777778, "val_weighted_f1": 0.7724846869401112, "val_micro_f1": 0.7777777777777778, "val_macro_f1": 0.755018683224845, "epoch": 63, "n_parameters": 521298470}
65
+ {"train_lr": 1.7458547897334095e-05, "train_min_lr": 9.842623205062551e-08, "train_loss": 0.9539193622037476, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.9051868074080525, "val_loss": 0.6285421575109164, "val_acc1": 78.30687913440522, "val_acc5": 100.0, "val_uar": 0.752687544343246, "val_war": 0.783068783068783, "val_weighted_f1": 0.7773901224134038, "val_micro_f1": 0.783068783068783, "val_macro_f1": 0.7500808355930759, "epoch": 64, "n_parameters": 521298470}
66
+ {"train_lr": 1.6611023843843597e-05, "train_min_lr": 9.364813712269166e-08, "train_loss": 0.9314981818783516, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.7754306910084745, "val_loss": 0.6428820230066776, "val_acc1": 77.77777862548828, "val_acc5": 100.0, "val_uar": 0.7334896609238715, "val_war": 0.7777777777777778, "val_weighted_f1": 0.7729906588441356, "val_micro_f1": 0.7777777777777778, "val_macro_f1": 0.746112646177674, "epoch": 65, "n_parameters": 521298470}
67
+ {"train_lr": 1.5776243925606218e-05, "train_min_lr": 8.894188993496415e-08, "train_loss": 0.940622598127602, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.6599623105105232, "val_loss": 0.6558243048687776, "val_acc1": 75.66137677147275, "val_acc5": 100.0, "val_uar": 0.717611091191135, "val_war": 0.7566137566137566, "val_weighted_f1": 0.7522669349927529, "val_micro_f1": 0.7566137566137566, "val_macro_f1": 0.723165288480072, "epoch": 66, "n_parameters": 521298470}
68
+ {"train_lr": 1.495512096220645e-05, "train_min_lr": 8.431263669964643e-08, "train_loss": 0.9486946081803516, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.8119775360705805, "val_loss": 0.6116043341656526, "val_acc1": 78.57142947968983, "val_acc5": 100.0, "val_uar": 0.748386216812752, "val_war": 0.7857142857142857, "val_weighted_f1": 0.7830038655347481, "val_micro_f1": 0.7857142857142857, "val_macro_f1": 0.7561289483585231, "epoch": 67, "n_parameters": 521298470}
69
+ {"train_lr": 1.4148552839549153e-05, "train_min_lr": 7.976543943718531e-08, "train_loss": 0.9446593824944465, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.590171407250797, "val_loss": 0.6582602076232433, "val_acc1": 75.92592639014835, "val_acc5": 100.0, "val_uar": 0.7250151742750427, "val_war": 0.7592592592592593, "val_weighted_f1": 0.7559417211148886, "val_micro_f1": 0.7592592592592593, "val_macro_f1": 0.7322382342148225, "epoch": 68, "n_parameters": 521298470}
70
+ {"train_lr": 1.3357421528034545e-05, "train_min_lr": 7.530527044102595e-08, "train_loss": 0.9448470229806464, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.682792023116467, "val_loss": 0.6402782872319221, "val_acc1": 76.98412795293899, "val_acc5": 100.0, "val_uar": 0.7336847667439773, "val_war": 0.7698412698412699, "val_weighted_f1": 0.7662447146900027, "val_micro_f1": 0.7698412698412699, "val_macro_f1": 0.7440592395271303, "epoch": 69, "n_parameters": 521298470}
71
+ {"train_lr": 1.2582592118136499e-05, "train_min_lr": 7.093700684048218e-08, "train_loss": 0.9335457384586334, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.687856440450631, "val_loss": 0.6339563441773256, "val_acc1": 77.51322791689918, "val_acc5": 100.0, "val_uar": 0.7323265434614118, "val_war": 0.7751322751322751, "val_weighted_f1": 0.7707478068480919, "val_micro_f1": 0.7751322751322751, "val_macro_f1": 0.7404736977675842, "epoch": 70, "n_parameters": 521298470}
72
+ {"train_lr": 1.18249118744388e-05, "train_min_lr": 6.666542526766691e-08, "train_loss": 0.9380445932251176, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.7371377360587026, "val_loss": 0.6562409525116285, "val_acc1": 75.39682606288365, "val_acc5": 100.0, "val_uar": 0.7004219203067887, "val_war": 0.753968253968254, "val_weighted_f1": 0.7472338579891122, "val_micro_f1": 0.753968253968254, "val_macro_f1": 0.7079804612167571, "epoch": 71, "n_parameters": 521298470}
73
+ {"train_lr": 1.1085209309163785e-05, "train_min_lr": 6.249519663431545e-08, "train_loss": 0.931066180561103, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.6485675503225887, "val_loss": 0.6633454809586207, "val_acc1": 75.661376226516, "val_acc5": 100.0, "val_uar": 0.7155152819078258, "val_war": 0.7566137566137566, "val_weighted_f1": 0.7530220566531932, "val_micro_f1": 0.7566137566137566, "val_macro_f1": 0.7231929826279672, "epoch": 72, "n_parameters": 521298470}
74
+ {"train_lr": 1.0364293276206376e-05, "train_min_lr": 5.8430881024211504e-08, "train_loss": 0.9347841699723325, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.7650530034420537, "val_loss": 0.6665646644930044, "val_acc1": 76.19047709873745, "val_acc5": 100.0, "val_uar": 0.7149273318461914, "val_war": 0.7619047619047619, "val_weighted_f1": 0.7575275066047003, "val_micro_f1": 0.7619047619047619, "val_macro_f1": 0.7265865188288841, "epoch": 73, "n_parameters": 521298470}
75
+ {"train_lr": 9.66295208666423e-06, "train_min_lr": 5.447692270680312e-08, "train_loss": 0.9374620087590873, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.6892955536935843, "val_loss": 0.640868711595734, "val_acc1": 76.98412758963448, "val_acc5": 100.0, "val_uar": 0.7337848749690855, "val_war": 0.7698412698412699, "val_weighted_f1": 0.765974157061794, "val_micro_f1": 0.7698412698412699, "val_macro_f1": 0.7429094098886154, "epoch": 74, "n_parameters": 521298470}
76
+ {"train_lr": 8.981952646831133e-06, "train_min_lr": 5.063764527745896e-08, "train_loss": 0.9158713276090186, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.644767592935001, "val_loss": 0.6686178135375181, "val_acc1": 74.338625226702, "val_acc5": 100.0, "val_uar": 0.6898713232430337, "val_war": 0.7433862433862434, "val_weighted_f1": 0.7359453491816313, "val_micro_f1": 0.7433862433862434, "val_macro_f1": 0.6984306868683001, "epoch": 75, "n_parameters": 521298470}
77
+ {"train_lr": 8.322039619596193e-06, "train_min_lr": 4.69172469296803e-08, "train_loss": 0.9281007855935813, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.6655437619078395, "val_loss": 0.6551036139329275, "val_acc1": 75.3968262445359, "val_acc5": 100.0, "val_uar": 0.724825456788176, "val_war": 0.753968253968254, "val_weighted_f1": 0.7512144100019492, "val_micro_f1": 0.753968253968254, "val_macro_f1": 0.7264299786850197, "epoch": 76, "n_parameters": 521298470}
78
+ {"train_lr": 7.683934610165963e-06, "train_min_lr": 4.3319795864438076e-08, "train_loss": 0.9194838863377478, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.585891609098397, "val_loss": 0.6875151122609774, "val_acc1": 74.86772573561896, "val_acc5": 100.0, "val_uar": 0.6934539479769743, "val_war": 0.7486772486772487, "val_weighted_f1": 0.7390072855483102, "val_micro_f1": 0.7486772486772487, "val_macro_f1": 0.7025718306414553, "epoch": 77, "n_parameters": 521298470}
79
+ {"train_lr": 7.06833537699968e-06, "train_min_lr": 3.9849225841654914e-08, "train_loss": 0.92903351861667, "train_loss_scale": 6103.843137254902, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.6600507123797548, "val_loss": 0.6755714428921541, "val_acc1": 75.13227608090355, "val_acc5": 100.0, "val_uar": 0.7081201634216986, "val_war": 0.7513227513227513, "val_weighted_f1": 0.7448425897080561, "val_micro_f1": 0.7513227513227513, "val_macro_f1": 0.7219134568624527, "epoch": 78, "n_parameters": 521298470}
80
+ {"train_lr": 6.475915068820675e-06, "train_min_lr": 3.650933187869632e-08, "train_loss": 0.9406097183624903, "train_loss_scale": 7147.921568627451, "train_weight_decay": 0.04999999999999998, "train_grad_norm": Infinity, "val_loss": 0.6561930626630783, "val_acc1": 76.45502708071754, "val_acc5": 100.0, "val_uar": 0.7242735316584001, "val_war": 0.7645502645502645, "val_weighted_f1": 0.7589558544439018, "val_micro_f1": 0.7645502645502645, "val_macro_f1": 0.7383499380668769, "epoch": 79, "n_parameters": 521298470}
81
+ {"train_lr": 5.907321488538122e-06, "train_min_lr": 3.330376610057497e-08, "train_loss": 0.9208287169146382, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.766094836534238, "val_loss": 0.6446472443640232, "val_acc1": 75.3968255179269, "val_acc5": 100.0, "val_uar": 0.7062143880729408, "val_war": 0.753968253968254, "val_weighted_f1": 0.7483712217302839, "val_micro_f1": 0.753968253968254, "val_macro_f1": 0.7138825271788314, "epoch": 80, "n_parameters": 521298470}
82
+ {"train_lr": 5.363176384884051e-06, "train_min_lr": 3.02360337464056e-08, "train_loss": 0.9287561902812883, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.5834676985647165, "val_loss": 0.6616890467703342, "val_acc1": 75.66137640816825, "val_acc5": 100.0, "val_uar": 0.7233274524776717, "val_war": 0.7566137566137566, "val_weighted_f1": 0.7525134312651199, "val_micro_f1": 0.7566137566137566, "val_macro_f1": 0.724399873059405, "epoch": 81, "n_parameters": 521298470}
83
+ {"train_lr": 4.844074772540244e-06, "train_min_lr": 2.7309489336477486e-08, "train_loss": 0.9093886922582303, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.594542056906457, "val_loss": 0.6688883770257235, "val_acc1": 73.54497364589146, "val_acc5": 100.0, "val_uar": 0.6829904628753313, "val_war": 0.7354497354497355, "val_weighted_f1": 0.729181527742925, "val_micro_f1": 0.7354497354497355, "val_macro_f1": 0.686079591684785, "epoch": 82, "n_parameters": 521298470}
84
+ {"train_lr": 4.350584281498429e-06, "train_min_lr": 2.4527333004135813e-08, "train_loss": 0.9415165931960336, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.742970955138113, "val_loss": 0.6435770081977049, "val_acc1": 78.30687877110073, "val_acc5": 100.0, "val_uar": 0.7504993410585517, "val_war": 0.783068783068783, "val_weighted_f1": 0.7790284740367628, "val_micro_f1": 0.783068783068783, "val_macro_f1": 0.7539610087240871, "epoch": 83, "n_parameters": 521298470}
85
+ {"train_lr": 3.883244536365208e-06, "train_min_lr": 2.189260699648274e-08, "train_loss": 0.926148480157447, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.703934994398379, "val_loss": 0.6334604360163212, "val_acc1": 77.77777862548828, "val_acc5": 100.0, "val_uar": 0.7494841391551919, "val_war": 0.7777777777777778, "val_weighted_f1": 0.7738233778504737, "val_micro_f1": 0.7777777777777778, "val_macro_f1": 0.7524391798080511, "epoch": 84, "n_parameters": 521298470}
86
+ {"train_lr": 3.4425665662904687e-06, "train_min_lr": 1.9408192347725037e-08, "train_loss": 0.9130113752059688, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.6399491113774918, "val_loss": 0.665834774573644, "val_acc1": 76.71957742600213, "val_acc5": 100.0, "val_uar": 0.7400624322883094, "val_war": 0.7671957671957672, "val_weighted_f1": 0.763585888494409, "val_micro_f1": 0.7671957671957672, "val_macro_f1": 0.7387247567551483, "epoch": 85, "n_parameters": 521298470}
87
+ {"train_lr": 3.0290322461645205e-06, "train_min_lr": 1.7076805728805288e-08, "train_loss": 0.9338558051321242, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.6421009839749803, "val_loss": 0.6577597769598166, "val_acc1": 76.19047637212844, "val_acc5": 100.0, "val_uar": 0.721353617954495, "val_war": 0.7619047619047619, "val_weighted_f1": 0.7599016591018902, "val_micro_f1": 0.7619047619047619, "val_macro_f1": 0.7315824458161473, "epoch": 86, "n_parameters": 521298470}
88
+ {"train_lr": 2.64309376969497e-06, "train_min_lr": 1.4900996476762211e-08, "train_loss": 0.9056650213166779, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.4819680428972433, "val_loss": 0.6540866419672966, "val_acc1": 75.92592639014835, "val_acc5": 100.0, "val_uar": 0.7262624783841889, "val_war": 0.7592592592592593, "val_weighted_f1": 0.7541524357717525, "val_micro_f1": 0.7592592592592593, "val_macro_f1": 0.7299475368914514, "epoch": 87, "n_parameters": 521298470}
89
+ {"train_lr": 2.285173154939547e-06, "train_min_lr": 1.2883143807067995e-08, "train_loss": 0.9193525325044308, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.748055612339693, "val_loss": 0.6664756499230862, "val_acc1": 75.92592639014835, "val_acc5": 100.0, "val_uar": 0.7127083049341821, "val_war": 0.7592592592592593, "val_weighted_f1": 0.7561082848923067, "val_micro_f1": 0.7592592592592593, "val_macro_f1": 0.723595881063571, "epoch": 88, "n_parameters": 521298470}
90
+ {"train_lr": 1.955661782835546e-06, "train_min_lr": 1.1025454211991155e-08, "train_loss": 0.9274331454942429, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.5576533009024227, "val_loss": 0.6417473666369915, "val_acc1": 76.19047691708519, "val_acc5": 100.0, "val_uar": 0.7252202605711378, "val_war": 0.7619047619047619, "val_weighted_f1": 0.757785081382618, "val_micro_f1": 0.7619047619047619, "val_macro_f1": 0.7282683140201754, "epoch": 89, "n_parameters": 521298470}
91
+ {"train_lr": 1.6549199692305295e-06, "train_min_lr": 9.329959047829568e-09, "train_loss": 0.9071780485070609, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.6451769155614517, "val_loss": 0.6534223208824793, "val_acc1": 76.19047709873745, "val_acc5": 100.0, "val_uar": 0.7222284375793148, "val_war": 0.7619047619047619, "val_weighted_f1": 0.7581692285890752, "val_micro_f1": 0.7619047619047619, "val_macro_f1": 0.730455137935989, "epoch": 90, "n_parameters": 521298470}
92
+ {"train_lr": 1.3832765708822247e-06, "train_min_lr": 7.798512313652238e-09, "train_loss": 0.9384586636731828, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.5212434273140105, "val_loss": 0.6857013007005056, "val_acc1": 72.48677299136207, "val_acc5": 100.0, "val_uar": 0.6617571360170045, "val_war": 0.7248677248677249, "val_weighted_f1": 0.7191236481228265, "val_micro_f1": 0.7248677248677249, "val_macro_f1": 0.6687463738959101, "epoch": 91, "n_parameters": 521298470}
93
+ {"train_lr": 1.1410286258584985e-06, "train_min_lr": 6.4327886239785226e-09, "train_loss": 0.904328218764729, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.6900737869973277, "val_loss": 0.6478695335487524, "val_acc1": 75.3968258812314, "val_acc5": 100.0, "val_uar": 0.717793913769791, "val_war": 0.753968253968254, "val_weighted_f1": 0.7489704042748054, "val_micro_f1": 0.753968253968254, "val_macro_f1": 0.7193938541567144, "epoch": 92, "n_parameters": 521298470}
94
+ {"train_lr": 9.284410287305858e-07, "train_min_lr": 5.234281377611722e-09, "train_loss": 0.9357595525535882, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.6983178807239905, "val_loss": 0.6627297525604566, "val_acc1": 75.39682606288365, "val_acc5": 100.0, "val_uar": 0.7090815921408027, "val_war": 0.753968253968254, "val_weighted_f1": 0.7483009103793327, "val_micro_f1": 0.753968253968254, "val_macro_f1": 0.7187665278063708, "epoch": 93, "n_parameters": 521298470}
95
+ {"train_lr": 7.457462409147661e-07, "train_min_lr": 4.204301124629427e-09, "train_loss": 0.9316740063280841, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.4290846329109343, "val_loss": 0.6728415302932262, "val_acc1": 75.1322758992513, "val_acc5": 100.0, "val_uar": 0.7116398132406904, "val_war": 0.7513227513227513, "val_weighted_f1": 0.7462062783220781, "val_micro_f1": 0.7513227513227513, "val_macro_f1": 0.7141841094294121, "epoch": 94, "n_parameters": 521298470}
96
+ {"train_lr": 5.931440364792185e-07, "train_min_lr": 3.343974133316262e-09, "train_loss": 0.912873321596314, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.6402364642012355, "val_loss": 0.6359871787329515, "val_acc1": 76.71957724434989, "val_acc5": 100.0, "val_uar": 0.7408257990385184, "val_war": 0.7671957671957672, "val_weighted_f1": 0.7623594617425365, "val_micro_f1": 0.7671957671957672, "val_macro_f1": 0.7448817214045853, "epoch": 95, "n_parameters": 521298470}
97
+ {"train_lr": 4.7080128369400994e-07, "train_min_lr": 2.6542411586060335e-09, "train_loss": 0.9245674872125675, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.600150421553967, "val_loss": 0.6449002884328365, "val_acc1": 76.71957706269764, "val_acc5": 100.0, "val_uar": 0.7265560698236138, "val_war": 0.7671957671957672, "val_weighted_f1": 0.7620793029135625, "val_micro_f1": 0.7671957671957672, "val_macro_f1": 0.7347822733054797, "epoch": 96, "n_parameters": 521298470}
98
+ {"train_lr": 3.788517625630929e-07, "train_min_lr": 2.1358564133800288e-09, "train_loss": 0.9315771246268079, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.577936319743886, "val_loss": 0.636355080952247, "val_acc1": 76.71957760765439, "val_acc5": 100.0, "val_uar": 0.7362238981372753, "val_war": 0.7671957671957672, "val_weighted_f1": 0.7611931718402553, "val_micro_f1": 0.7671957671957672, "val_macro_f1": 0.7375128534723926, "epoch": 97, "n_parameters": 521298470}
99
+ {"train_lr": 3.1739601853783493e-07, "train_min_lr": 1.789386743746306e-09, "train_loss": 0.934573519171453, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.614873727162679, "val_loss": 0.6717331459124883, "val_acc1": 74.33862431844075, "val_acc5": 100.0, "val_uar": 0.6900517503532855, "val_war": 0.7433862433862434, "val_weighted_f1": 0.7375394994499459, "val_micro_f1": 0.7433862433862434, "val_macro_f1": 0.7035032943096515, "epoch": 98, "n_parameters": 521298470}
100
+ {"train_lr": 2.86501252572043e-07, "train_min_lr": 1.6152110092017891e-09, "train_loss": 0.9118320862062617, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999998, "train_grad_norm": 2.6082761240940466, "val_loss": 0.6611278677980105, "val_acc1": 75.39682569957915, "val_acc5": 100.0, "val_uar": 0.7019069206459557, "val_war": 0.753968253968254, "val_weighted_f1": 0.7475151577548067, "val_micro_f1": 0.753968253968254, "val_macro_f1": 0.7131482320696273, "epoch": 99, "n_parameters": 521298470}
101
+ Evaluation on the test set using best epoch model:
102
+ {"Final Top-1 (best epoch)": 82.97872340425532, "Final Top-5 (best epoch)": 100.0}
103
+ Final UAR: 83.51%, Final WAR: 82.98%
104
+ Final Confusion Matrix:
105
+ [[ 36 3 4 0]
106
+ [ 1 139 10 1]
107
+ [ 4 17 86 18]
108
+ [ 0 0 6 51]]
109
+ Final Class Accuracies: ['83.72%', '92.05%', '68.80%', '89.47%']
110
+ Final Weighted F1: 0.8275, Final Micro F1: 0.8298, Final Macro F1: 0.8254
logs/AVF-MAE++_huge-MSP-IMPROV/eval_split06/log.txt ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_lr": 5.534638554216868e-06, "train_min_lr": 3.1202687752563384e-08, "train_loss": 2.140562525987625, "train_loss_scale": 3153.92, "train_weight_decay": 0.04999999999999999, "train_grad_norm": NaN, "val_loss": 1.9152883072694142, "val_acc1": 27.777778046925864, "val_acc5": 100.0, "val_uar": 0.2734834024907554, "val_war": 0.2777777777777778, "val_weighted_f1": 0.2726703963544232, "val_micro_f1": 0.2777777777777778, "val_macro_f1": 0.2757137486295317, "epoch": 0, "n_parameters": 521298470}
2
+ {"train_lr": 1.6829819277108436e-05, "train_min_lr": 9.488164234963154e-08, "train_loss": 1.2872788941860198, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 4.24375274181366, "val_loss": 1.1808905720710754, "val_acc1": 44.22222318013509, "val_acc5": 100.0, "val_uar": 0.3789324576495629, "val_war": 0.44222222222222224, "val_weighted_f1": 0.40800383063415013, "val_micro_f1": 0.44222222222222224, "val_macro_f1": 0.37197831670126713, "epoch": 1, "n_parameters": 521298470}
3
+ {"train_lr": 2.8124999999999996e-05, "train_min_lr": 1.5856059694669964e-07, "train_loss": 1.1986227089166641, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.9223545503616335, "val_loss": 1.023099321126938, "val_acc1": 58.444444478352864, "val_acc5": 100.0, "val_uar": 0.5222525449266936, "val_war": 0.5844444444444444, "val_weighted_f1": 0.559118308015783, "val_micro_f1": 0.5844444444444444, "val_macro_f1": 0.5267724888567347, "epoch": 2, "n_parameters": 521298470}
4
+ {"train_lr": 3.942018072289158e-05, "train_min_lr": 2.2223955154376786e-07, "train_loss": 1.1590433462460836, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.550437271595001, "val_loss": 0.9557123263676961, "val_acc1": 65.55555547078451, "val_acc5": 100.0, "val_uar": 0.5963040547057575, "val_war": 0.6555555555555556, "val_weighted_f1": 0.6447129592686545, "val_micro_f1": 0.6555555555555556, "val_macro_f1": 0.6136623527301309, "epoch": 3, "n_parameters": 521298470}
5
+ {"train_lr": 5.0715361445783146e-05, "train_min_lr": 2.85918506140836e-07, "train_loss": 1.1491385797659557, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.909132442474365, "val_loss": 0.90123703678449, "val_acc1": 65.11111155192057, "val_acc5": 100.0, "val_uar": 0.6231021558420939, "val_war": 0.6511111111111111, "val_weighted_f1": 0.6483013307117024, "val_micro_f1": 0.6511111111111111, "val_macro_f1": 0.6273004880823172, "epoch": 4, "n_parameters": 521298470}
6
+ {"train_lr": 5.624505171280574e-05, "train_min_lr": 3.170932968846391e-07, "train_loss": 1.1312100799878437, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.562651357650757, "val_loss": 0.891047606865565, "val_acc1": 66.66666656494141, "val_acc5": 100.0, "val_uar": 0.6052233638824196, "val_war": 0.6666666666666666, "val_weighted_f1": 0.6580037889782953, "val_micro_f1": 0.6666666666666666, "val_macro_f1": 0.6226164376840264, "epoch": 5, "n_parameters": 521298470}
7
+ {"train_lr": 5.62147626255171e-05, "train_min_lr": 3.1692253579089827e-07, "train_loss": 1.1375584038098654, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.5457170534133913, "val_loss": 0.8799173951148986, "val_acc1": 65.11111073811848, "val_acc5": 100.0, "val_uar": 0.6480513324824471, "val_war": 0.6511111111111111, "val_weighted_f1": 0.6493041188600488, "val_micro_f1": 0.6511111111111111, "val_macro_f1": 0.6395354039123958, "epoch": 6, "n_parameters": 521298470}
8
+ {"train_lr": 5.6153911566580055e-05, "train_min_lr": 3.1657947516050194e-07, "train_loss": 1.1244468679030737, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.309759407043457, "val_loss": 0.8604600131511688, "val_acc1": 66.44444442749024, "val_acc5": 100.0, "val_uar": 0.6383972297768737, "val_war": 0.6644444444444444, "val_weighted_f1": 0.6619511063242645, "val_micro_f1": 0.6644444444444444, "val_macro_f1": 0.6479562060801181, "epoch": 7, "n_parameters": 521298470}
9
+ {"train_lr": 5.606256507573413e-05, "train_min_lr": 3.1606449012521245e-07, "train_loss": 1.1204269087314607, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.6299418663978575, "val_loss": 0.8235196113586426, "val_acc1": 70.00000030517577, "val_acc5": 100.0, "val_uar": 0.6690481882954793, "val_war": 0.7, "val_weighted_f1": 0.6969374560254381, "val_micro_f1": 0.7, "val_macro_f1": 0.6814743958880828, "epoch": 8, "n_parameters": 521298470}
10
+ {"train_lr": 5.594082303902597e-05, "train_min_lr": 3.1537814381360526e-07, "train_loss": 1.1005868895848592, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.2109286975860596, "val_loss": 0.8453124950329463, "val_acc1": 65.33333292643229, "val_acc5": 100.0, "val_uar": 0.6492571312559702, "val_war": 0.6533333333333333, "val_weighted_f1": 0.6474218664608042, "val_micro_f1": 0.6533333333333333, "val_macro_f1": 0.6399499291049862, "epoch": 9, "n_parameters": 521298470}
11
+ {"train_lr": 5.5788818579585316e-05, "train_min_lr": 3.1452118673529506e-07, "train_loss": 1.1093534459670384, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.510832643508911, "val_loss": 0.8062553117672603, "val_acc1": 68.44444396972656, "val_acc5": 100.0, "val_uar": 0.677001884710863, "val_war": 0.6844444444444444, "val_weighted_f1": 0.6815087833428852, "val_micro_f1": 0.6844444444444444, "val_macro_f1": 0.6735074670195735, "epoch": 10, "n_parameters": 521298470}
12
+ {"train_lr": 5.560671791205679e-05, "train_min_lr": 3.134945559602653e-07, "train_loss": 1.0900196139017742, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.489102976322174, "val_loss": 0.8301442503929138, "val_acc1": 65.55555526733399, "val_acc5": 100.0, "val_uar": 0.650465707108509, "val_war": 0.6555555555555556, "val_weighted_f1": 0.6500026855945035, "val_micro_f1": 0.6555555555555556, "val_macro_f1": 0.6387739487761162, "epoch": 11, "n_parameters": 521298470}
13
+ {"train_lr": 5.539472016084685e-05, "train_min_lr": 3.122993740941958e-07, "train_loss": 1.0886144495010377, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.4857434320449827, "val_loss": 0.802841967344284, "val_acc1": 67.99999944051106, "val_acc5": 100.0, "val_uar": 0.6803402756227835, "val_war": 0.68, "val_weighted_f1": 0.6752808607640642, "val_micro_f1": 0.68, "val_macro_f1": 0.6692177590087952, "epoch": 12, "n_parameters": 521298470}
14
+ {"train_lr": 5.5153057142383875e-05, "train_min_lr": 3.109369480509093e-07, "train_loss": 1.0790822984774908, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.5775313425064086, "val_loss": 0.8075547417004904, "val_acc1": 67.1111110941569, "val_acc5": 100.0, "val_uar": 0.6400444562460816, "val_war": 0.6711111111111111, "val_weighted_f1": 0.6663353167156263, "val_micro_f1": 0.6711111111111111, "val_macro_f1": 0.6481652186238048, "epoch": 13, "n_parameters": 521298470}
15
+ {"train_lr": 5.4881993111630514e-05, "train_min_lr": 3.0940876762328156e-07, "train_loss": 1.072995244661967, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.479055495262146, "val_loss": 0.8032410790522894, "val_acc1": 68.66666656494141, "val_acc5": 100.0, "val_uar": 0.676693478520104, "val_war": 0.6866666666666666, "val_weighted_f1": 0.683024886397849, "val_micro_f1": 0.6866666666666666, "val_macro_f1": 0.6765456030973943, "epoch": 14, "n_parameters": 521298470}
16
+ {"train_lr": 5.458182447312465e-05, "train_min_lr": 3.077165038541733e-07, "train_loss": 1.065247678955396, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.282079930305481, "val_loss": 0.8003681898117065, "val_acc1": 67.9999994913737, "val_acc5": 100.0, "val_uar": 0.6795537010862088, "val_war": 0.68, "val_weighted_f1": 0.6684603564518896, "val_micro_f1": 0.68, "val_macro_f1": 0.6599153573555704, "epoch": 15, "n_parameters": 521298470}
17
+ {"train_lr": 5.425287945686548e-05, "train_min_lr": 3.0586200720917075e-07, "train_loss": 1.0558709824085235, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.1251872062683104, "val_loss": 0.7860371917486191, "val_acc1": 69.11111089070639, "val_acc5": 100.0, "val_uar": 0.674323135083584, "val_war": 0.6911111111111111, "val_weighted_f1": 0.6884297262441427, "val_micro_f1": 0.6911111111111111, "val_macro_f1": 0.6771732152172172, "epoch": 16, "n_parameters": 521298470}
18
+ {"train_lr": 5.3895517759398755e-05, "train_min_lr": 3.0384730555312774e-07, "train_loss": 1.0535503367582957, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.70642938375473, "val_loss": 0.8104790528615315, "val_acc1": 65.5555549621582, "val_acc5": 100.0, "val_uar": 0.6423612384777245, "val_war": 0.6555555555555556, "val_weighted_f1": 0.6484652256721519, "val_micro_f1": 0.6555555555555556, "val_macro_f1": 0.6362364910594956, "epoch": 17, "n_parameters": 521298470}
19
+ {"train_lr": 5.351013015049392e-05, "train_min_lr": 3.0167460193272546e-07, "train_loss": 1.056636932293574, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.3896445274353026, "val_loss": 0.8336615602175395, "val_acc1": 65.99999969482423, "val_acc5": 100.0, "val_uar": 0.6643188732778516, "val_war": 0.66, "val_weighted_f1": 0.6491771175095795, "val_micro_f1": 0.66, "val_macro_f1": 0.6435448721495969, "epoch": 18, "n_parameters": 521298470}
20
+ {"train_lr": 5.309713804584307e-05, "train_min_lr": 2.993462721674737e-07, "train_loss": 1.0374775964021683, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.481492824554443, "val_loss": 0.7740644663572311, "val_acc1": 69.77777740478516, "val_acc5": 100.0, "val_uar": 0.7039252933312531, "val_war": 0.6977777777777778, "val_weighted_f1": 0.6930654013746967, "val_micro_f1": 0.6977777777777778, "val_macro_f1": 0.6859902842870957, "epoch": 19, "n_parameters": 521298470}
21
+ {"train_lr": 5.265699304624922e-05, "train_min_lr": 2.968648622517863e-07, "train_loss": 1.0325673192739486, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.0208217978477476, "val_loss": 0.829333437482516, "val_acc1": 68.00000045776368, "val_acc5": 100.0, "val_uar": 0.6732351884441667, "val_war": 0.68, "val_weighted_f1": 0.6724951735494904, "val_micro_f1": 0.68, "val_macro_f1": 0.659711538220334, "epoch": 20, "n_parameters": 521298470}
22
+ {"train_lr": 5.2190176443807244e-05, "train_min_lr": 2.9423308557097456e-07, "train_loss": 1.0442231098810832, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.3091348028182983, "val_loss": 0.8300455172856649, "val_acc1": 66.44444417317709, "val_acc5": 100.0, "val_uar": 0.6554992626625752, "val_war": 0.6644444444444444, "val_weighted_f1": 0.6518948512194708, "val_micro_f1": 0.6644444444444444, "val_macro_f1": 0.643603626864573, "epoch": 21, "n_parameters": 521298470}
23
+ {"train_lr": 5.1697198695618223e-05, "train_min_lr": 2.914538199341998e-07, "train_loss": 1.0438974976539612, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.497427568435669, "val_loss": 0.8245496879021327, "val_acc1": 65.77777791341146, "val_acc5": 100.0, "val_uar": 0.657230403683577, "val_war": 0.6577777777777778, "val_weighted_f1": 0.6482765466728884, "val_micro_f1": 0.6577777777777778, "val_macro_f1": 0.6382605980266005, "epoch": 22, "n_parameters": 521298470}
24
+ {"train_lr": 5.117859886561189e-05, "train_min_lr": 2.885301044276305e-07, "train_loss": 1.0200633839766184, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.4616102957725525, "val_loss": 0.8145409524440765, "val_acc1": 66.88888921101888, "val_acc5": 100.0, "val_uar": 0.6635806994146778, "val_war": 0.6688888888888889, "val_weighted_f1": 0.6587489455435184, "val_micro_f1": 0.6688888888888889, "val_macro_f1": 0.6533845897436201, "epoch": 23, "n_parameters": 521298470}
25
+ {"train_lr": 5.063494403508845e-05, "train_min_lr": 2.854651360912481e-07, "train_loss": 1.019653125802676, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.304559304714203, "val_loss": 0.8149531642595927, "val_acc1": 66.2222223409017, "val_acc5": 100.0, "val_uar": 0.647802459193713, "val_war": 0.6622222222222223, "val_weighted_f1": 0.6532581767282652, "val_micro_f1": 0.6622222222222223, "val_macro_f1": 0.6406777879906205, "epoch": 24, "n_parameters": 521298470}
26
+ {"train_lr": 5.0066828682623144e-05, "train_min_lr": 2.822622664229288e-07, "train_loss": 1.0320290166139603, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.4047012090682984, "val_loss": 0.7911680420239766, "val_acc1": 68.66666631062826, "val_acc5": 100.0, "val_uar": 0.6854032464210483, "val_war": 0.6866666666666666, "val_weighted_f1": 0.6778228624523953, "val_micro_f1": 0.6866666666666666, "val_macro_f1": 0.665926804966157, "epoch": 25, "n_parameters": 521298470}
27
+ {"train_lr": 4.947487403401295e-05, "train_min_lr": 2.7892499771363074e-07, "train_loss": 0.9921686530113221, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.957877349853516, "val_loss": 0.8135472188393275, "val_acc1": 67.77777791341146, "val_acc5": 100.0, "val_uar": 0.674779557745889, "val_war": 0.6777777777777778, "val_weighted_f1": 0.6657202574961573, "val_micro_f1": 0.6777777777777778, "val_macro_f1": 0.6610313389286577, "epoch": 26, "n_parameters": 521298470}
28
+ {"train_lr": 4.885972738297512e-05, "train_min_lr": 2.7545697921769044e-07, "train_loss": 1.0011621112624804, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.9554995703697204, "val_loss": 0.7891061305999756, "val_acc1": 67.99999994913738, "val_acc5": 100.0, "val_uar": 0.6686367027795047, "val_war": 0.68, "val_weighted_f1": 0.6708957438669234, "val_micro_f1": 0.68, "val_macro_f1": 0.6581084779853946, "epoch": 27, "n_parameters": 521298470}
29
+ {"train_lr": 4.822206138334105e-05, "train_min_lr": 2.718620031624162e-07, "train_loss": 1.0020748923222225, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.2727689719200135, "val_loss": 0.7743872756759326, "val_acc1": 68.66666661580403, "val_acc5": 100.0, "val_uar": 0.6869310375869974, "val_war": 0.6866666666666666, "val_weighted_f1": 0.6741461297867567, "val_micro_f1": 0.6866666666666666, "val_macro_f1": 0.6668022661316985, "epoch": 28, "n_parameters": 521298470}
30
+ {"train_lr": 4.7562573313519254e-05, "train_min_lr": 2.6814400060134355e-07, "train_loss": 0.9980320427815119, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.64397292137146, "val_loss": 0.8413965940475464, "val_acc1": 66.88888916015625, "val_acc5": 100.0, "val_uar": 0.6541951081540864, "val_war": 0.6688888888888889, "val_weighted_f1": 0.6539267152200101, "val_micro_f1": 0.6688888888888889, "val_macro_f1": 0.6404351029396973, "epoch": 29, "n_parameters": 521298470}
31
+ {"train_lr": 4.6881984314031596e-05, "train_min_lr": 2.643070371156856e-07, "train_loss": 0.9926875640948614, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.146961305141449, "val_loss": 0.7681743443012238, "val_acc1": 68.88888885498046, "val_acc5": 100.0, "val_uar": 0.6832248284338067, "val_war": 0.6888888888888889, "val_weighted_f1": 0.6768799232088625, "val_micro_f1": 0.6888888888888889, "val_macro_f1": 0.6665812483547845, "epoch": 30, "n_parameters": 521298470}
32
+ {"train_lr": 4.618103859895663e-05, "train_min_lr": 2.603553083686807e-07, "train_loss": 0.9867583098014195, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.2507815384864807, "val_loss": 0.8166333784659704, "val_acc1": 66.88888916015625, "val_acc5": 100.0, "val_uar": 0.6592078420948391, "val_war": 0.6688888888888889, "val_weighted_f1": 0.6624012959978965, "val_micro_f1": 0.6688888888888889, "val_macro_f1": 0.654931822119769, "epoch": 31, "n_parameters": 521298470}
33
+ {"train_lr": 4.54605026421427e-05, "train_min_lr": 2.5629313551769454e-07, "train_loss": 0.9928461593389512, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.078149254322052, "val_loss": 0.8245175957679749, "val_acc1": 68.2222223409017, "val_acc5": 100.0, "val_uar": 0.6770227312390624, "val_war": 0.6822222222222222, "val_weighted_f1": 0.6738742025065939, "val_micro_f1": 0.6822222222222222, "val_macro_f1": 0.6674051194357417, "epoch": 32, "n_parameters": 521298470}
34
+ {"train_lr": 4.472116433907969e-05, "train_min_lr": 2.521249604890999e-07, "train_loss": 0.9954770636558533, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.9111304879188538, "val_loss": 0.8211779544750849, "val_acc1": 66.44444447835286, "val_acc5": 100.0, "val_uar": 0.6558470661605336, "val_war": 0.6644444444444444, "val_weighted_f1": 0.6533281350437162, "val_micro_f1": 0.6644444444444444, "val_macro_f1": 0.6442243984803436, "epoch": 33, "n_parameters": 521298470}
35
+ {"train_lr": 4.396383214534723e-05, "train_min_lr": 2.4785534112109367e-07, "train_loss": 0.9684175226092339, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.766210286617279, "val_loss": 0.7850221941868464, "val_acc1": 68.88888946533203, "val_acc5": 100.0, "val_uar": 0.6794519212727417, "val_war": 0.6888888888888889, "val_weighted_f1": 0.6830499681632184, "val_micro_f1": 0.6888888888888889, "val_macro_f1": 0.6760283015369798, "epoch": 34, "n_parameters": 521298470}
36
+ {"train_lr": 4.3189334192580065e-05, "train_min_lr": 2.434889461797689e-07, "train_loss": 0.9810767563184103, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.9067003679275514, "val_loss": 0.7954065928856532, "val_acc1": 68.66666646321615, "val_acc5": 100.0, "val_uar": 0.6894666068079381, "val_war": 0.6866666666666666, "val_weighted_f1": 0.6779555458146022, "val_micro_f1": 0.6866666666666666, "val_macro_f1": 0.6726919459751693, "epoch": 35, "n_parameters": 521298470}
37
+ {"train_lr": 4.239851738291851e-05, "train_min_lr": 2.390305502538873e-07, "train_loss": 0.9991062361001969, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.0732894420623778, "val_loss": 0.7829256385564805, "val_acc1": 68.22222223917643, "val_acc5": 100.0, "val_uar": 0.6837388623301938, "val_war": 0.6822222222222222, "val_weighted_f1": 0.6738249701136136, "val_micro_f1": 0.6822222222222222, "val_macro_f1": 0.667066794049059, "epoch": 36, "n_parameters": 521298470}
38
+ {"train_lr": 4.1592246462933264e-05, "train_min_lr": 2.344850285339361e-07, "train_loss": 0.9875306791067123, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.1407869052886963, "val_loss": 0.7915772686402003, "val_acc1": 69.11111119588216, "val_acc5": 100.0, "val_uar": 0.6767370204328409, "val_war": 0.6911111111111111, "val_weighted_f1": 0.6875502972499385, "val_micro_f1": 0.6911111111111111, "val_macro_f1": 0.6802081968861174, "epoch": 37, "n_parameters": 521298470}
39
+ {"train_lr": 4.0771403078037764e-05, "train_min_lr": 2.2985735148117964e-07, "train_loss": 0.9686933211485544, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.9320054912567137, "val_loss": 0.7969689279794693, "val_acc1": 68.44444483439128, "val_acc5": 100.0, "val_uar": 0.6762536106537654, "val_war": 0.6844444444444444, "val_weighted_f1": 0.6765039482339442, "val_micro_f1": 0.6844444444444444, "val_macro_f1": 0.6680324689178132, "epoch": 38, "n_parameters": 521298470}
40
+ {"train_lr": 3.993688480842195e-05, "train_min_lr": 2.251525793925321e-07, "train_loss": 0.9568296666940054, "train_loss_scale": 2048.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.784767270088196, "val_loss": 0.7972732027371724, "val_acc1": 67.33333333333333, "val_acc5": 100.0, "val_uar": 0.6586704598720853, "val_war": 0.6733333333333333, "val_weighted_f1": 0.6651536580386289, "val_micro_f1": 0.6733333333333333, "val_macro_f1": 0.650866795780356, "epoch": 39, "n_parameters": 521298470}
41
+ {"train_lr": 3.908960418756133e-05, "train_min_lr": 2.2037585686719765e-07, "train_loss": 0.9790856287876765, "train_loss_scale": 3891.2, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6451337790489196, "val_loss": 0.7925047765175501, "val_acc1": 67.55555557250976, "val_acc5": 100.0, "val_uar": 0.6739624187302207, "val_war": 0.6755555555555556, "val_weighted_f1": 0.6640392734313723, "val_micro_f1": 0.6755555555555556, "val_macro_f1": 0.6589517615340195, "epoch": 40, "n_parameters": 521298470}
42
+ {"train_lr": 3.823048770437516e-05, "train_min_lr": 2.1553240718112658e-07, "train_loss": 0.9606207174062729, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.9125149297714232, "val_loss": 0.799061585466067, "val_acc1": 68.22222229003906, "val_acc5": 100.0, "val_uar": 0.6864017849195867, "val_war": 0.6822222222222222, "val_weighted_f1": 0.6717802953147511, "val_micro_f1": 0.6822222222222222, "val_macro_f1": 0.6643514211846971, "epoch": 41, "n_parameters": 521298470}
43
+ {"train_lr": 3.736047479012439e-05, "train_min_lr": 2.1062752657544e-07, "train_loss": 0.9620997334520022, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.9369814205169678, "val_loss": 0.7733360042174657, "val_acc1": 70.8888889058431, "val_acc5": 100.0, "val_uar": 0.702256953715932, "val_war": 0.7088888888888889, "val_weighted_f1": 0.7051552633260147, "val_micro_f1": 0.7088888888888889, "val_macro_f1": 0.6978203272006308, "epoch": 42, "n_parameters": 521298470}
44
+ {"train_lr": 3.648051679115738e-05, "train_min_lr": 2.0566657846506721e-07, "train_loss": 0.9678315645456315, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.8823740100860595, "val_loss": 0.7902600298325221, "val_acc1": 68.88888910929361, "val_acc5": 100.0, "val_uar": 0.68212581170979, "val_war": 0.6888888888888889, "val_weighted_f1": 0.6801872978482362, "val_micro_f1": 0.6888888888888889, "val_macro_f1": 0.6720926792628696, "epoch": 43, "n_parameters": 521298470}
45
+ {"train_lr": 3.559157592862656e-05, "train_min_lr": 2.0065498757393113e-07, "train_loss": 0.9567641971508661, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.8824294662475585, "val_loss": 0.7639788577953974, "val_acc1": 70.44444458007813, "val_acc5": 100.0, "val_uar": 0.6992624427802446, "val_war": 0.7044444444444444, "val_weighted_f1": 0.6974730813827702, "val_micro_f1": 0.7044444444444444, "val_macro_f1": 0.6896763656286321, "epoch": 44, "n_parameters": 521298470}
46
+ {"train_lr": 3.46946242463138e-05, "train_min_lr": 1.955982340030917e-07, "train_loss": 0.9638943594694137, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.0964786791801453, "val_loss": 0.7731330086787541, "val_acc1": 69.33333338419597, "val_acc5": 100.0, "val_uar": 0.6925693959033743, "val_war": 0.6933333333333334, "val_weighted_f1": 0.6823215347065208, "val_micro_f1": 0.6933333333333334, "val_macro_f1": 0.6809498263248119, "epoch": 45, "n_parameters": 521298470}
47
+ {"train_lr": 3.3790642547714803e-05, "train_min_lr": 1.9050184723833633e-07, "train_loss": 0.9569314595063527, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.0636104917526246, "val_loss": 0.7777919103701909, "val_acc1": 70.0, "val_acc5": 100.0, "val_uar": 0.6938433613743211, "val_war": 0.7, "val_weighted_f1": 0.6917901109257171, "val_micro_f1": 0.7, "val_macro_f1": 0.6831208763003424, "epoch": 46, "n_parameters": 521298470}
48
+ {"train_lr": 3.28806193235449e-05, "train_min_lr": 1.853714001037677e-07, "train_loss": 0.9552068515618642, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.900435657501221, "val_loss": 0.786349265774091, "val_acc1": 70.22222259521484, "val_acc5": 100.0, "val_uar": 0.6983885669857033, "val_war": 0.7022222222222222, "val_weighted_f1": 0.694729952755542, "val_micro_f1": 0.7022222222222222, "val_macro_f1": 0.6823904234747351, "epoch": 47, "n_parameters": 521298470}
49
+ {"train_lr": 3.196554967083885e-05, "train_min_lr": 1.8021250266800308e-07, "train_loss": 0.9534160375595093, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.84013240814209, "val_loss": 0.7759720424811045, "val_acc1": 68.00000020345053, "val_acc5": 100.0, "val_uar": 0.6774614240248915, "val_war": 0.68, "val_weighted_f1": 0.6737489202971201, "val_micro_f1": 0.68, "val_macro_f1": 0.6690594789566306, "epoch": 48, "n_parameters": 521298470}
50
+ {"train_lr": 3.104643420482698e-05, "train_min_lr": 1.7503079610964627e-07, "train_loss": 0.9401278013984362, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.9127451038360594, "val_loss": 0.7768458565076192, "val_acc1": 69.11111089070639, "val_acc5": 100.0, "val_uar": 0.6951265963282217, "val_war": 0.6911111111111111, "val_weighted_f1": 0.6877399885671802, "val_micro_f1": 0.6911111111111111, "val_macro_f1": 0.6854407125082498, "epoch": 49, "n_parameters": 521298470}
51
+ {"train_lr": 3.0124277964777072e-05, "train_min_lr": 1.6983194654874177e-07, "train_loss": 0.9454752256472906, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.89730021238327, "val_loss": 0.7714446961879731, "val_acc1": 70.66666702270508, "val_acc5": 100.0, "val_uar": 0.7006641891819909, "val_war": 0.7088888888888889, "val_weighted_f1": 0.7047742352385851, "val_micro_f1": 0.7088888888888889, "val_macro_f1": 0.6959058163103342, "epoch": 50, "n_parameters": 521298470}
52
+ {"train_lr": 2.9200089314998646e-05, "train_min_lr": 1.6462163885095578e-07, "train_loss": 0.964788946211338, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6918688869476317, "val_loss": 0.7728044291337332, "val_acc1": 69.55555552164714, "val_acc5": 100.0, "val_uar": 0.698962145525613, "val_war": 0.6955555555555556, "val_weighted_f1": 0.6917007171904972, "val_micro_f1": 0.6955555555555556, "val_macro_f1": 0.6914190162051879, "epoch": 51, "n_parameters": 521298470}
53
+ {"train_lr": 2.8274878842211425e-05, "train_min_lr": 1.5940557041125874e-07, "train_loss": 0.9668986515204112, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.105651767253876, "val_loss": 0.7608649402856826, "val_acc1": 70.44444468180339, "val_acc5": 100.0, "val_uar": 0.6902619525297544, "val_war": 0.7044444444444444, "val_weighted_f1": 0.699678009387807, "val_micro_f1": 0.7044444444444444, "val_macro_f1": 0.6871940057864825, "epoch": 52, "n_parameters": 521298470}
54
+ {"train_lr": 2.7349658250483626e-05, "train_min_lr": 1.54189444923908e-07, "train_loss": 0.9325385342041651, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.7319969391822814, "val_loss": 0.7762806047995885, "val_acc1": 70.00000020345053, "val_acc5": 100.0, "val_uar": 0.6917144515557828, "val_war": 0.7, "val_weighted_f1": 0.6931974648980926, "val_micro_f1": 0.7, "val_macro_f1": 0.6840633531405407, "epoch": 53, "n_parameters": 521298470}
55
+ {"train_lr": 2.642543925494833e-05, "train_min_lr": 1.4897896614554152e-07, "train_loss": 0.9508578285574913, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.8607419991493224, "val_loss": 0.7894767423470815, "val_acc1": 68.0, "val_acc5": 100.0, "val_uar": 0.6672832919717595, "val_war": 0.68, "val_weighted_f1": 0.6744622759824285, "val_micro_f1": 0.68, "val_macro_f1": 0.6568497037374497, "epoch": 54, "n_parameters": 521298470}
56
+ {"train_lr": 2.5503232475507892e-05, "train_min_lr": 1.4377983165820403e-07, "train_loss": 0.928006526529789, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6485953187942504, "val_loss": 0.7600028693675995, "val_acc1": 68.44444422403971, "val_acc5": 100.0, "val_uar": 0.6879733791750046, "val_war": 0.6844444444444444, "val_weighted_f1": 0.6756539304976762, "val_micro_f1": 0.6844444444444444, "val_macro_f1": 0.6745540062318974, "epoch": 55, "n_parameters": 521298470}
57
+ {"train_lr": 2.458404633173596e-05, "train_min_lr": 1.3859772663912447e-07, "train_loss": 0.9364062159260114, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.7786155366897582, "val_loss": 0.7714719474315643, "val_acc1": 68.88888880411784, "val_acc5": 100.0, "val_uar": 0.6940542439013802, "val_war": 0.6888888888888889, "val_weighted_f1": 0.6816146044006587, "val_micro_f1": 0.6888888888888889, "val_macro_f1": 0.6795463079537847, "epoch": 56, "n_parameters": 521298470}
58
+ {"train_lr": 2.3668885940185398e-05, "train_min_lr": 1.3343831764405833e-07, "train_loss": 0.9629999599854151, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.9290178489685057, "val_loss": 0.8021439145008723, "val_acc1": 67.77777770996094, "val_acc5": 100.0, "val_uar": 0.6768372132110523, "val_war": 0.6777777777777778, "val_weighted_f1": 0.6668826301708316, "val_micro_f1": 0.6777777777777778, "val_macro_f1": 0.6629703901034182, "epoch": 57, "n_parameters": 521298470}
59
+ {"train_lr": 2.275875201530809e-05, "train_min_lr": 1.2830724641099214e-07, "train_loss": 0.9223738576968511, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.744739410877228, "val_loss": 0.7968537499507268, "val_acc1": 68.00000020345053, "val_acc5": 100.0, "val_uar": 0.678730571544039, "val_war": 0.68, "val_weighted_f1": 0.6756949995702097, "val_micro_f1": 0.68, "val_macro_f1": 0.6671122808900954, "epoch": 58, "n_parameters": 521298470}
60
+ {"train_lr": 2.185463977518844e-05, "train_min_lr": 1.232101236909854e-07, "train_loss": 0.9331408931811651, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6650686073303222, "val_loss": 0.7978472739458085, "val_acc1": 68.44444458007813, "val_acc5": 100.0, "val_uar": 0.6484826384342638, "val_war": 0.6844444444444444, "val_weighted_f1": 0.6734244899219891, "val_micro_f1": 0.6844444444444444, "val_macro_f1": 0.6453386397238893, "epoch": 59, "n_parameters": 521298470}
61
+ {"train_lr": 2.0957537853286983e-05, "train_min_lr": 1.1815252311289737e-07, "train_loss": 0.9264879488945007, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 3.013273849487305, "val_loss": 0.7859284629424413, "val_acc1": 69.77777770996094, "val_acc5": 100.0, "val_uar": 0.6897798882976901, "val_war": 0.6977777777777778, "val_weighted_f1": 0.6924347813064183, "val_micro_f1": 0.6977777777777778, "val_macro_f1": 0.6868254345342313, "epoch": 60, "n_parameters": 521298470}
62
+ {"train_lr": 2.0068427217384223e-05, "train_min_lr": 1.1313997508870528e-07, "train_loss": 0.9364438471198082, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6703801465034487, "val_loss": 0.7857452402512233, "val_acc1": 67.55555577596029, "val_acc5": 100.0, "val_uar": 0.668238989539299, "val_war": 0.6755555555555556, "val_weighted_f1": 0.6703562035580662, "val_micro_f1": 0.6755555555555556, "val_macro_f1": 0.6588438474612365, "epoch": 61, "n_parameters": 521298470}
63
+ {"train_lr": 1.918828009690669e-05, "train_min_lr": 1.0817796076607999e-07, "train_loss": 0.9314329691727956, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.672454102039337, "val_loss": 0.7824677675962448, "val_acc1": 69.33333338419597, "val_acc5": 100.0, "val_uar": 0.6865814092168581, "val_war": 0.6933333333333334, "val_weighted_f1": 0.6895090657443861, "val_micro_f1": 0.6933333333333334, "val_macro_f1": 0.6803029986185525, "epoch": 62, "n_parameters": 521298470}
64
+ {"train_lr": 1.8318058919808455e-05, "train_min_lr": 1.0327190603483182e-07, "train_loss": 0.9292760492364566, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.9584752535820007, "val_loss": 0.8000635753075281, "val_acc1": 66.44444452921549, "val_acc5": 100.0, "val_uar": 0.6582721002767443, "val_war": 0.6644444444444444, "val_weighted_f1": 0.6593262515067727, "val_micro_f1": 0.6644444444444444, "val_macro_f1": 0.652188023610718, "epoch": 63, "n_parameters": 521298470}
65
+ {"train_lr": 1.7458715260170226e-05, "train_min_lr": 9.84271755937794e-08, "train_loss": 0.9272024454673131, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.5918350338935854, "val_loss": 0.8118020504713058, "val_acc1": 66.8888883972168, "val_acc5": 100.0, "val_uar": 0.6586799180065434, "val_war": 0.6688888888888889, "val_weighted_f1": 0.6626659192070854, "val_micro_f1": 0.6688888888888889, "val_macro_f1": 0.6512474306973058, "epoch": 64, "n_parameters": 521298470}
66
+ {"train_lr": 1.6611188797667042e-05, "train_min_lr": 9.36490670845304e-08, "train_loss": 0.9299048952261607, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.826771240234375, "val_loss": 0.7783510555823644, "val_acc1": 69.55555587768555, "val_acc5": 100.0, "val_uar": 0.6793128784556803, "val_war": 0.6955555555555556, "val_weighted_f1": 0.6910850583924495, "val_micro_f1": 0.6955555555555556, "val_macro_f1": 0.6773154794824231, "epoch": 65, "n_parameters": 521298470}
67
+ {"train_lr": 1.577640629004241e-05, "train_min_lr": 8.894280529858815e-08, "train_loss": 0.9262845901648203, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.7438842844963074, "val_loss": 0.7778310144941012, "val_acc1": 67.33333333333333, "val_acc5": 100.0, "val_uar": 0.6718717085145104, "val_war": 0.6733333333333333, "val_weighted_f1": 0.6707892020366077, "val_micro_f1": 0.6733333333333333, "val_macro_f1": 0.6616950214710285, "epoch": 66, "n_parameters": 521298470}
68
+ {"train_lr": 1.495528055971225e-05, "train_min_lr": 8.431353646411905e-08, "train_loss": 0.9308945120374361, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.777237870693207, "val_loss": 0.7955295125643412, "val_acc1": 67.99999984741211, "val_acc5": 100.0, "val_uar": 0.6498160601295276, "val_war": 0.68, "val_weighted_f1": 0.6743642433782899, "val_micro_f1": 0.68, "val_macro_f1": 0.646406512192563, "epoch": 67, "n_parameters": 521298470}
69
+ {"train_lr": 1.4148709495607045e-05, "train_min_lr": 7.976632261862724e-08, "train_loss": 0.919927135805289, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.690995810031891, "val_loss": 0.7836278259754181, "val_acc1": 68.00000015258789, "val_acc5": 100.0, "val_uar": 0.6684283560579999, "val_war": 0.68, "val_weighted_f1": 0.673556883635256, "val_micro_f1": 0.68, "val_macro_f1": 0.6620598390891654, "epoch": 68, "n_parameters": 521298470}
70
+ {"train_lr": 1.3357575071343419e-05, "train_min_lr": 7.53061360736913e-08, "train_loss": 0.9218590213855108, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.5864842772483825, "val_loss": 0.7855682869752248, "val_acc1": 67.55555557250976, "val_acc5": 100.0, "val_uar": 0.6736045918592358, "val_war": 0.6755555555555556, "val_weighted_f1": 0.6689986238882903, "val_micro_f1": 0.6755555555555556, "val_macro_f1": 0.664096535876287, "epoch": 69, "n_parameters": 521298470}
71
+ {"train_lr": 1.2582742380799007e-05, "train_min_lr": 7.093785397781427e-08, "train_loss": 0.9287669544418653, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6820512056350707, "val_loss": 0.7880171060562133, "val_acc1": 69.33333333333333, "val_acc5": 100.0, "val_uar": 0.689023162724788, "val_war": 0.6933333333333334, "val_weighted_f1": 0.6896112280413341, "val_micro_f1": 0.6933333333333334, "val_macro_f1": 0.6767543846029692, "epoch": 70, "n_parameters": 521298470}
72
+ {"train_lr": 1.1825058692144917e-05, "train_min_lr": 6.666625298333363e-08, "train_loss": 0.9426236248016358, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.8184714913368225, "val_loss": 0.7779075374205907, "val_acc1": 69.77777770996094, "val_acc5": 100.0, "val_uar": 0.6847187402953657, "val_war": 0.6977777777777778, "val_weighted_f1": 0.6926639546866319, "val_micro_f1": 0.6977777777777778, "val_macro_f1": 0.6796137743076025, "epoch": 71, "n_parameters": 521298470}
73
+ {"train_lr": 1.1085352521370515e-05, "train_min_lr": 6.249600402322172e-08, "train_loss": 0.9209723409016927, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.8252839708328246, "val_loss": 0.8071456333001454, "val_acc1": 66.8888889058431, "val_acc5": 100.0, "val_uar": 0.6798129845014521, "val_war": 0.6688888888888889, "val_weighted_f1": 0.6616729741826598, "val_micro_f1": 0.6688888888888889, "val_macro_f1": 0.657098165487846, "epoch": 72, "n_parameters": 521298470}
74
+ {"train_lr": 1.0364432726313269e-05, "train_min_lr": 5.843166720348948e-08, "train_loss": 0.9176205338040988, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.4845564556121826, "val_loss": 0.7720184360941251, "val_acc1": 68.44444478352865, "val_acc5": 100.0, "val_uar": 0.6858043322633106, "val_war": 0.6844444444444444, "val_weighted_f1": 0.6798269284911476, "val_micro_f1": 0.6844444444444444, "val_macro_f1": 0.6706115714573156, "epoch": 73, "n_parameters": 521298470}
75
+ {"train_lr": 9.663087622184646e-06, "train_min_lr": 5.44776868167773e-08, "train_loss": 0.9209202634294827, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.744818127155304, "val_loss": 0.7826161702473958, "val_acc1": 69.11111129760742, "val_acc5": 100.0, "val_uar": 0.6982974453020894, "val_war": 0.6911111111111111, "val_weighted_f1": 0.6865633553383033, "val_micro_f1": 0.6911111111111111, "val_macro_f1": 0.6861928776037456, "epoch": 74, "n_parameters": 521298470}
76
+ {"train_lr": 8.982084119558961e-06, "train_min_lr": 5.06383864825863e-08, "train_loss": 0.9283588059743245, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.5013469099998473, "val_loss": 0.785059608022372, "val_acc1": 69.55555592854817, "val_acc5": 100.0, "val_uar": 0.6869156229334249, "val_war": 0.6955555555555556, "val_weighted_f1": 0.6904839779545259, "val_micro_f1": 0.6955555555555556, "val_macro_f1": 0.6847967380258164, "epoch": 75, "n_parameters": 521298470}
77
+ {"train_lr": 8.322166885767945e-06, "train_min_lr": 4.69179644194639e-08, "train_loss": 0.9268161864082018, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.717241818904877, "val_loss": 0.7781453500191371, "val_acc1": 69.33333368937174, "val_acc5": 100.0, "val_uar": 0.6711080089214765, "val_war": 0.6933333333333334, "val_weighted_f1": 0.6861504621351133, "val_micro_f1": 0.6933333333333334, "val_macro_f1": 0.6665233119567548, "epoch": 76, "n_parameters": 521298470}
78
+ {"train_lr": 7.684057530617942e-06, "train_min_lr": 4.3320488854313384e-08, "train_loss": 0.9230702611804008, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.7654062867164613, "val_loss": 0.7921453376611074, "val_acc1": 65.77777740478516, "val_acc5": 100.0, "val_uar": 0.6409655027921282, "val_war": 0.6577777777777778, "val_weighted_f1": 0.6480298016809295, "val_micro_f1": 0.6577777777777778, "val_macro_f1": 0.6315164987410266, "epoch": 77, "n_parameters": 521298470}
79
+ {"train_lr": 7.068453817320182e-06, "train_min_lr": 3.984989357384769e-08, "train_loss": 0.9246868165334066, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6555623960494996, "val_loss": 0.7917766431967418, "val_acc1": 68.44444447835286, "val_acc5": 100.0, "val_uar": 0.6882267886195905, "val_war": 0.6844444444444444, "val_weighted_f1": 0.6799749752130244, "val_micro_f1": 0.6844444444444444, "val_macro_f1": 0.6753891346557666, "epoch": 78, "n_parameters": 521298470}
80
+ {"train_lr": 6.476028899496948e-06, "train_min_lr": 3.6509973623051173e-08, "train_loss": 0.9062064202626546, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.7408087730407713, "val_loss": 0.7892589499553044, "val_acc1": 67.77777821858724, "val_acc5": 100.0, "val_uar": 0.6782286794303047, "val_war": 0.6777777777777778, "val_weighted_f1": 0.671127067230552, "val_micro_f1": 0.6777777777777778, "val_macro_f1": 0.6677236051520775, "epoch": 79, "n_parameters": 521298470}
81
+ {"train_lr": 5.907430585097989e-06, "train_min_lr": 3.330438115535389e-08, "train_loss": 0.926934908926487, "train_loss_scale": 7782.4, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6190801191329958, "val_loss": 0.7973022381464641, "val_acc1": 68.88888865152995, "val_acc5": 100.0, "val_uar": 0.6939340905768926, "val_war": 0.6888888888888889, "val_weighted_f1": 0.6842610183726128, "val_micro_f1": 0.6888888888888889, "val_macro_f1": 0.6846574046765725, "epoch": 80, "n_parameters": 521298470}
82
+ {"train_lr": 5.3632806280320225e-06, "train_min_lr": 3.0236621439055217e-08, "train_loss": 0.9211570599675178, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6958837962150572, "val_loss": 0.7930320451656977, "val_acc1": 68.22222188313802, "val_acc5": 100.0, "val_uar": 0.6756696698124716, "val_war": 0.6822222222222222, "val_weighted_f1": 0.6777721718723729, "val_micro_f1": 0.6822222222222222, "val_macro_f1": 0.6664266176605861, "epoch": 81, "n_parameters": 521298470}
83
+ {"train_lr": 4.844174048287975e-06, "train_min_lr": 2.73100490243645e-08, "train_loss": 0.9166229184468587, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.566693708896637, "val_loss": 0.8043539534012477, "val_acc1": 66.88888885498046, "val_acc5": 100.0, "val_uar": 0.6671758202524456, "val_war": 0.6688888888888889, "val_weighted_f1": 0.6615077761865014, "val_micro_f1": 0.6688888888888889, "val_macro_f1": 0.6514707800807665, "epoch": 82, "n_parameters": 521298470}
84
+ {"train_lr": 4.350678481289343e-06, "train_min_lr": 2.4527864075249745e-08, "train_loss": 0.916592638293902, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.496918935775757, "val_loss": 0.7710072835286458, "val_acc1": 68.88888916015625, "val_acc5": 100.0, "val_uar": 0.690983832988477, "val_war": 0.6888888888888889, "val_weighted_f1": 0.6843416053833133, "val_micro_f1": 0.6888888888888889, "val_macro_f1": 0.6813738077403404, "epoch": 83, "n_parameters": 521298470}
85
+ {"train_lr": 3.883333557193218e-06, "train_min_lr": 2.1893108870105166e-08, "train_loss": 0.9230719793836276, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6510028624534607, "val_loss": 0.7849772135416667, "val_acc1": 69.55555587768555, "val_acc5": 100.0, "val_uar": 0.6890658338336357, "val_war": 0.6955555555555556, "val_weighted_f1": 0.6930105052035854, "val_micro_f1": 0.6955555555555556, "val_macro_f1": 0.6814914531608969, "epoch": 84, "n_parameters": 521298470}
86
+ {"train_lr": 3.4426503108126066e-06, "train_min_lr": 1.940866447506447e-08, "train_loss": 0.921713570356369, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6407624459266663, "val_loss": 0.8105690330266953, "val_acc1": 65.77777801513672, "val_acc5": 100.0, "val_uar": 0.6229863851748526, "val_war": 0.6577777777777778, "val_weighted_f1": 0.6455066471629292, "val_micro_f1": 0.6577777777777778, "val_macro_f1": 0.6147281699497299, "epoch": 85, "n_parameters": 521298470}
87
+ {"train_lr": 3.0291106228073814e-06, "train_min_lr": 1.7077247593597424e-08, "train_loss": 0.9048562772075335, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.455898633003235, "val_loss": 0.807473412156105, "val_acc1": 68.2222220357259, "val_acc5": 100.0, "val_uar": 0.6722455476383494, "val_war": 0.68, "val_weighted_f1": 0.6742089787472799, "val_micro_f1": 0.68, "val_macro_f1": 0.6608105536300048, "epoch": 86, "n_parameters": 521298470}
88
+ {"train_lr": 2.6431666927548434e-06, "train_min_lr": 1.4901407595834388e-08, "train_loss": 0.9135717292626698, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6061713242530824, "val_loss": 0.7914395928382874, "val_acc1": 68.00000020345053, "val_acc5": 100.0, "val_uar": 0.6681303572731592, "val_war": 0.68, "val_weighted_f1": 0.6734569947110396, "val_micro_f1": 0.68, "val_macro_f1": 0.658712287641075, "epoch": 87, "n_parameters": 521298470}
89
+ {"train_lr": 2.28524054467614e-06, "train_min_lr": 1.2883523730867555e-08, "train_loss": 0.9297283917665482, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.5636703181266784, "val_loss": 0.8002231150865555, "val_acc1": 67.33333307902018, "val_acc5": 100.0, "val_uar": 0.6695036927583368, "val_war": 0.6733333333333333, "val_weighted_f1": 0.6672194680531683, "val_micro_f1": 0.6733333333333333, "val_macro_f1": 0.658018711283126, "epoch": 88, "n_parameters": 521298470}
90
+ {"train_lr": 1.955723565559173e-06, "train_min_lr": 1.1025802525076985e-08, "train_loss": 0.9133775478601456, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.630913345813751, "val_loss": 0.7851173708836238, "val_acc1": 68.66666697184245, "val_acc5": 100.0, "val_uar": 0.6795494882511136, "val_war": 0.6866666666666666, "val_weighted_f1": 0.678568490226725, "val_micro_f1": 0.6866666666666666, "val_macro_f1": 0.6693929797116145, "epoch": 89, "n_parameters": 521298470}
91
+ {"train_lr": 1.6549760773826848e-06, "train_min_lr": 9.330275369326432e-09, "train_loss": 0.9158731254935265, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6036717987060545, "val_loss": 0.7979379137357075, "val_acc1": 67.5555556233724, "val_acc5": 100.0, "val_uar": 0.6629300714478733, "val_war": 0.6755555555555556, "val_weighted_f1": 0.667394843101083, "val_micro_f1": 0.6755555555555556, "val_macro_f1": 0.6557895689834689, "epoch": 90, "n_parameters": 521298470}
92
+ {"train_lr": 1.3833269431094697e-06, "train_min_lr": 7.798796297667227e-09, "train_loss": 0.9200678444902102, "train_loss_scale": 8192.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.48985787153244, "val_loss": 0.7959654639164607, "val_acc1": 69.33333389282227, "val_acc5": 100.0, "val_uar": 0.6773124483302503, "val_war": 0.6933333333333334, "val_weighted_f1": 0.6864848198521523, "val_micro_f1": 0.6933333333333334, "val_macro_f1": 0.6747037771323191, "epoch": 91, "n_parameters": 521298470}
93
+ {"train_lr": 1.141073207079542e-06, "train_min_lr": 6.433039959979276e-09, "train_loss": 0.9244904430707296, "train_loss_scale": 5898.24, "train_weight_decay": 0.04999999999999999, "train_grad_norm": Infinity, "val_loss": 0.7835675328969955, "val_acc1": 68.44444463094075, "val_acc5": 100.0, "val_uar": 0.6738105512621766, "val_war": 0.6844444444444444, "val_weighted_f1": 0.6792791562434266, "val_micro_f1": 0.6844444444444444, "val_macro_f1": 0.6681531993429609, "epoch": 92, "n_parameters": 521298470}
94
+ {"train_lr": 9.284797701965165e-07, "train_min_lr": 5.234499790766015e-09, "train_loss": 0.9276557924350103, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.664130210876465, "val_loss": 0.8018079966306686, "val_acc1": 68.00000035603841, "val_acc5": 100.0, "val_uar": 0.6694329070625511, "val_war": 0.68, "val_weighted_f1": 0.6738145001401494, "val_micro_f1": 0.68, "val_macro_f1": 0.6643436058577938, "epoch": 93, "n_parameters": 521298470}
95
+ {"train_lr": 7.4577910026236e-07, "train_min_lr": 4.204486376105685e-09, "train_loss": 0.8932206483681997, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.524052929878235, "val_loss": 0.7965245008468628, "val_acc1": 66.88888895670573, "val_acc5": 100.0, "val_uar": 0.657662750476218, "val_war": 0.6688888888888889, "val_weighted_f1": 0.6626809292263839, "val_micro_f1": 0.6688888888888889, "val_macro_f1": 0.6466003137615514, "epoch": 94, "n_parameters": 521298470}
96
+ {"train_lr": 5.931709777772621e-07, "train_min_lr": 3.344126020544719e-09, "train_loss": 0.9214114946126938, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6623637676239014, "val_loss": 0.7691080729166667, "val_acc1": 69.77777826944987, "val_acc5": 100.0, "val_uar": 0.692866699147659, "val_war": 0.6977777777777778, "val_weighted_f1": 0.6923667723668322, "val_micro_f1": 0.6977777777777778, "val_macro_f1": 0.6850547750835334, "epoch": 95, "n_parameters": 521298470}
97
+ {"train_lr": 4.7082227748258895e-07, "train_min_lr": 2.6543595155002304e-09, "train_loss": 0.9163872567812602, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.382745773792267, "val_loss": 0.7998723944028219, "val_acc1": 68.4444448852539, "val_acc5": 100.0, "val_uar": 0.6771326689592944, "val_war": 0.6844444444444444, "val_weighted_f1": 0.6806820560035016, "val_micro_f1": 0.6844444444444444, "val_macro_f1": 0.6739647069222259, "epoch": 96, "n_parameters": 521298470}
98
+ {"train_lr": 3.7886678588580804e-07, "train_min_lr": 2.1359411105184414e-09, "train_loss": 0.9091070380806923, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.5593949651718138, "val_loss": 0.7963272631168365, "val_acc1": 68.6666667175293, "val_acc5": 100.0, "val_uar": 0.6663907214216811, "val_war": 0.6866666666666666, "val_weighted_f1": 0.6786641501886106, "val_micro_f1": 0.6866666666666666, "val_macro_f1": 0.661315015575648, "epoch": 97, "n_parameters": 521298470}
99
+ {"train_lr": 3.1740505496690313e-07, "train_min_lr": 1.789437688513856e-09, "train_loss": 0.90376080930233, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.5753329825401305, "val_loss": 0.7987374782562255, "val_acc1": 68.4444443766276, "val_acc5": 100.0, "val_uar": 0.6840637811477595, "val_war": 0.6844444444444444, "val_weighted_f1": 0.6788919283239526, "val_micro_f1": 0.6844444444444444, "val_macro_f1": 0.6733967268481009, "epoch": 98, "n_parameters": 521298470}
100
+ {"train_lr": 2.86504292226262e-07, "train_min_lr": 1.6152281458911214e-09, "train_loss": 0.9131018317739169, "train_loss_scale": 4096.0, "train_weight_decay": 0.04999999999999999, "train_grad_norm": 2.6002814316749574, "val_loss": 0.7697526792685191, "val_acc1": 68.88888921101888, "val_acc5": 100.0, "val_uar": 0.6841834057468733, "val_war": 0.6888888888888889, "val_weighted_f1": 0.6837054468854198, "val_micro_f1": 0.6888888888888889, "val_macro_f1": 0.6769831968381027, "epoch": 99, "n_parameters": 521298470}
101
+ Evaluation on the test set using best epoch model:
102
+ {"Final Top-1 (best epoch)": 72.93064876957494, "Final Top-5 (best epoch)": 100.0}
103
+ Final UAR: 71.33%, Final WAR: 72.93%
104
+ Final Confusion Matrix:
105
+ [[ 33 1 19 11]
106
+ [ 4 118 8 6]
107
+ [ 3 29 93 26]
108
+ [ 0 1 13 82]]
109
+ Final Class Accuracies: ['51.56%', '86.76%', '61.59%', '85.42%']
110
+ Final Weighted F1: 0.7234, Final Micro F1: 0.7293, Final Macro F1: 0.7149