Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round10.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round12.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round15.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round17.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round2.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round20.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round5.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round7.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_trainer_state.json +378 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round10.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round12.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round15.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round17.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round2.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round20.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round5.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round7.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_trainer_state.json +378 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round10.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round12.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round15.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round17.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round2.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round20.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round5.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round7.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_trainer_state.json +378 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round10.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round12.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round15.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round17.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round2.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round20.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round5.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round7.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_trainer_state.json +378 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round10.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round12.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round15.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round17.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round2.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round20.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round5.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round7.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_trainer_state.json +378 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round10.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round12.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round15.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round17.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round2.pth +3 -0
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7c6682c76a002291c94ab5400f46aad989063cceafda2bb335890c225c469f6
|
| 3 |
+
size 368443438
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03ffd7f8cda1890da71515a263f4200fb94e95e55d8015c4884ebbe683b11dde
|
| 3 |
+
size 368443438
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:463639587f71bcbd5b9d4481a2af8b2a50eb9728a9d1126789118ee6213c321d
|
| 3 |
+
size 368443438
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eb0d4a5ef7da16ffc179066f20f6577b695c216c334fb9207199ac0bf36085d7
|
| 3 |
+
size 368443438
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6f998475c714790ced260b7d08905deaf25397487d22f12b88939abb267ba23b
|
| 3 |
+
size 368442474
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e8e030d7c0cfa7e8171852dba2ae6e04bb8cd8017f7d3a490b15f7ca76d21aa1
|
| 3 |
+
size 368443438
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:616c57ef296799deec39f31f99f043a4bdf1ad531074e5c884d8f1beff2d6e35
|
| 3 |
+
size 368442474
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:93589c35187351c587af0cb2f9bd08601b587df4aa00cd4809909821c10c86b4
|
| 3 |
+
size 368442474
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_trainer_state.json
ADDED
|
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 97,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.020618556701030927,
|
| 13 |
+
"grad_norm": 1.443185567855835,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 1.0495,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.041237113402061855,
|
| 20 |
+
"grad_norm": 1.8465604782104492,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 1.2367,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.061855670103092786,
|
| 27 |
+
"grad_norm": 1.3454159498214722,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 1.1763,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.08247422680412371,
|
| 34 |
+
"grad_norm": 4.38826847076416,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 2.243,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.10309278350515463,
|
| 41 |
+
"grad_norm": 0.6966120600700378,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 0.8164,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.12371134020618557,
|
| 48 |
+
"grad_norm": 0.5520138144493103,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.5507,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.14432989690721648,
|
| 55 |
+
"grad_norm": 2.7041213512420654,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 1.2546,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.16494845360824742,
|
| 62 |
+
"grad_norm": 4.7814555168151855,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 1.4361,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.18556701030927836,
|
| 69 |
+
"grad_norm": 0.8622943758964539,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.7884,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.20618556701030927,
|
| 76 |
+
"grad_norm": 0.8466795086860657,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.8781,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.2268041237113402,
|
| 83 |
+
"grad_norm": 2.9229226112365723,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.7513,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.24742268041237114,
|
| 90 |
+
"grad_norm": 1.2761527299880981,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 0.9905,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.26804123711340205,
|
| 97 |
+
"grad_norm": 2.6934807300567627,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 1.8354,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.28865979381443296,
|
| 104 |
+
"grad_norm": 1.0872917175292969,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 1.2724,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.30927835051546393,
|
| 111 |
+
"grad_norm": 2.1482253074645996,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 1.6143,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.32989690721649484,
|
| 118 |
+
"grad_norm": 0.905875563621521,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 1.6322,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.35051546391752575,
|
| 125 |
+
"grad_norm": 0.6854360103607178,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.8599,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.3711340206185567,
|
| 132 |
+
"grad_norm": 0.8919254541397095,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 1.0498,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.3917525773195876,
|
| 139 |
+
"grad_norm": 1.6378024816513062,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 1.1343,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.41237113402061853,
|
| 146 |
+
"grad_norm": 1.0561065673828125,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 1.0583,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.4329896907216495,
|
| 153 |
+
"grad_norm": 0.7613285779953003,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 1.5316,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.4536082474226804,
|
| 160 |
+
"grad_norm": 1.940731167793274,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 0.7388,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.4742268041237113,
|
| 167 |
+
"grad_norm": 1.1051706075668335,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 1.0972,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.4948453608247423,
|
| 174 |
+
"grad_norm": 1.7498652935028076,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 0.735,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.5154639175257731,
|
| 181 |
+
"grad_norm": 1.488133192062378,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 1.435,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.5360824742268041,
|
| 188 |
+
"grad_norm": 0.48400750756263733,
|
| 189 |
+
"learning_rate": 2e-05,
|
| 190 |
+
"loss": 0.219,
|
| 191 |
+
"step": 52
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.5567010309278351,
|
| 195 |
+
"grad_norm": 0.843970775604248,
|
| 196 |
+
"learning_rate": 2e-05,
|
| 197 |
+
"loss": 1.3159,
|
| 198 |
+
"step": 54
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.5773195876288659,
|
| 202 |
+
"grad_norm": 2.2032525539398193,
|
| 203 |
+
"learning_rate": 2e-05,
|
| 204 |
+
"loss": 1.2433,
|
| 205 |
+
"step": 56
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.5979381443298969,
|
| 209 |
+
"grad_norm": 2.2895724773406982,
|
| 210 |
+
"learning_rate": 2e-05,
|
| 211 |
+
"loss": 0.6293,
|
| 212 |
+
"step": 58
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.6185567010309279,
|
| 216 |
+
"grad_norm": 1.6934003829956055,
|
| 217 |
+
"learning_rate": 2e-05,
|
| 218 |
+
"loss": 0.8883,
|
| 219 |
+
"step": 60
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.6391752577319587,
|
| 223 |
+
"grad_norm": 3.9248857498168945,
|
| 224 |
+
"learning_rate": 2e-05,
|
| 225 |
+
"loss": 1.7568,
|
| 226 |
+
"step": 62
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.6597938144329897,
|
| 230 |
+
"grad_norm": 3.8822836875915527,
|
| 231 |
+
"learning_rate": 2e-05,
|
| 232 |
+
"loss": 1.358,
|
| 233 |
+
"step": 64
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.6804123711340206,
|
| 237 |
+
"grad_norm": 2.8576316833496094,
|
| 238 |
+
"learning_rate": 2e-05,
|
| 239 |
+
"loss": 0.6214,
|
| 240 |
+
"step": 66
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.7010309278350515,
|
| 244 |
+
"grad_norm": 2.46586537361145,
|
| 245 |
+
"learning_rate": 2e-05,
|
| 246 |
+
"loss": 0.39,
|
| 247 |
+
"step": 68
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.7216494845360825,
|
| 251 |
+
"grad_norm": 2.4729208946228027,
|
| 252 |
+
"learning_rate": 2e-05,
|
| 253 |
+
"loss": 0.9929,
|
| 254 |
+
"step": 70
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.7422680412371134,
|
| 258 |
+
"grad_norm": 2.4468770027160645,
|
| 259 |
+
"learning_rate": 2e-05,
|
| 260 |
+
"loss": 1.4753,
|
| 261 |
+
"step": 72
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.7628865979381443,
|
| 265 |
+
"grad_norm": 5.142044544219971,
|
| 266 |
+
"learning_rate": 2e-05,
|
| 267 |
+
"loss": 1.4092,
|
| 268 |
+
"step": 74
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.7835051546391752,
|
| 272 |
+
"grad_norm": 3.540414333343506,
|
| 273 |
+
"learning_rate": 2e-05,
|
| 274 |
+
"loss": 1.3685,
|
| 275 |
+
"step": 76
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.8041237113402062,
|
| 279 |
+
"grad_norm": 1.6057883501052856,
|
| 280 |
+
"learning_rate": 2e-05,
|
| 281 |
+
"loss": 0.8978,
|
| 282 |
+
"step": 78
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.8247422680412371,
|
| 286 |
+
"grad_norm": 2.5897274017333984,
|
| 287 |
+
"learning_rate": 2e-05,
|
| 288 |
+
"loss": 1.5045,
|
| 289 |
+
"step": 80
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.845360824742268,
|
| 293 |
+
"grad_norm": 0.60903400182724,
|
| 294 |
+
"learning_rate": 2e-05,
|
| 295 |
+
"loss": 0.97,
|
| 296 |
+
"step": 82
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.865979381443299,
|
| 300 |
+
"grad_norm": 1.9482678174972534,
|
| 301 |
+
"learning_rate": 2e-05,
|
| 302 |
+
"loss": 1.8024,
|
| 303 |
+
"step": 84
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.8865979381443299,
|
| 307 |
+
"grad_norm": 1.7348854541778564,
|
| 308 |
+
"learning_rate": 2e-05,
|
| 309 |
+
"loss": 1.105,
|
| 310 |
+
"step": 86
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.9072164948453608,
|
| 314 |
+
"grad_norm": 1.3475691080093384,
|
| 315 |
+
"learning_rate": 2e-05,
|
| 316 |
+
"loss": 1.6093,
|
| 317 |
+
"step": 88
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.9278350515463918,
|
| 321 |
+
"grad_norm": 0.98420250415802,
|
| 322 |
+
"learning_rate": 2e-05,
|
| 323 |
+
"loss": 0.6538,
|
| 324 |
+
"step": 90
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 0.9484536082474226,
|
| 328 |
+
"grad_norm": 1.3864518404006958,
|
| 329 |
+
"learning_rate": 2e-05,
|
| 330 |
+
"loss": 1.3196,
|
| 331 |
+
"step": 92
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 0.9690721649484536,
|
| 335 |
+
"grad_norm": 1.4003015756607056,
|
| 336 |
+
"learning_rate": 2e-05,
|
| 337 |
+
"loss": 0.7891,
|
| 338 |
+
"step": 94
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"epoch": 0.9896907216494846,
|
| 342 |
+
"grad_norm": 1.4709246158599854,
|
| 343 |
+
"learning_rate": 2e-05,
|
| 344 |
+
"loss": 0.8115,
|
| 345 |
+
"step": 96
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"epoch": 1.0,
|
| 349 |
+
"step": 97,
|
| 350 |
+
"total_flos": 5653713448337408.0,
|
| 351 |
+
"train_loss": 1.1264018225915653,
|
| 352 |
+
"train_runtime": 208.5258,
|
| 353 |
+
"train_samples_per_second": 1.861,
|
| 354 |
+
"train_steps_per_second": 0.465
|
| 355 |
+
}
|
| 356 |
+
],
|
| 357 |
+
"logging_steps": 2,
|
| 358 |
+
"max_steps": 97,
|
| 359 |
+
"num_input_tokens_seen": 0,
|
| 360 |
+
"num_train_epochs": 1,
|
| 361 |
+
"save_steps": 500,
|
| 362 |
+
"stateful_callbacks": {
|
| 363 |
+
"TrainerControl": {
|
| 364 |
+
"args": {
|
| 365 |
+
"should_epoch_stop": false,
|
| 366 |
+
"should_evaluate": false,
|
| 367 |
+
"should_log": false,
|
| 368 |
+
"should_save": false,
|
| 369 |
+
"should_training_stop": false
|
| 370 |
+
},
|
| 371 |
+
"attributes": {}
|
| 372 |
+
}
|
| 373 |
+
},
|
| 374 |
+
"total_flos": 5653713448337408.0,
|
| 375 |
+
"train_batch_size": 1,
|
| 376 |
+
"trial_name": null,
|
| 377 |
+
"trial_params": null
|
| 378 |
+
}
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:669a580e6490be915efbd67e52ae5d5eebf6ec1cfe8ed75462a0b139aaf852b9
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f5ee3bb60763f435737915de658c4663b6a667f582ed8be7e60fef7157534dd7
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dcf13622c365cefe6bc7f685fad7d20f552bf6b29003b54dcd82de9dabdc3805
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e6c9656642a5444d90acfa6f0daa4c79a3e390288297eeb1abf064e503f5ab0
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f0485b8a1b36390a6785f6d93b6e361d6a74346cc48938a91ac1c35ef15235b
|
| 3 |
+
size 791576546
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a0517ce73a108585cff98eeb02bb4f4f800a287e04dd52f03abba4325f2a381e
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1756b6cdf77c2928be961e35efda166cc775b9055a05d892a15d0694cd5990d3
|
| 3 |
+
size 791576546
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e3fdb5458b09254ba799a8103f3bdfeaae78cf8e16ddac228e526bb1887f33ba
|
| 3 |
+
size 791576546
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_trainer_state.json
ADDED
|
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 97,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.020618556701030927,
|
| 13 |
+
"grad_norm": 4.060997486114502,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 1.5594,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.041237113402061855,
|
| 20 |
+
"grad_norm": 2.254009962081909,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 2.394,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.061855670103092786,
|
| 27 |
+
"grad_norm": 1.4530246257781982,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 1.1717,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.08247422680412371,
|
| 34 |
+
"grad_norm": 0.869075357913971,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.6181,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.10309278350515463,
|
| 41 |
+
"grad_norm": 0.40417155623435974,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 0.7947,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.12371134020618557,
|
| 48 |
+
"grad_norm": 0.7273326516151428,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 1.2993,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.14432989690721648,
|
| 55 |
+
"grad_norm": 0.7000077366828918,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.9332,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.16494845360824742,
|
| 62 |
+
"grad_norm": 0.7522069811820984,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 1.0824,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.18556701030927836,
|
| 69 |
+
"grad_norm": 0.9031182527542114,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.9363,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.20618556701030927,
|
| 76 |
+
"grad_norm": 1.0295480489730835,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 1.4712,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.2268041237113402,
|
| 83 |
+
"grad_norm": 0.9623363614082336,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 1.1707,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.24742268041237114,
|
| 90 |
+
"grad_norm": 0.3788335621356964,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 1.1249,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.26804123711340205,
|
| 97 |
+
"grad_norm": 0.9554972648620605,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 0.77,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.28865979381443296,
|
| 104 |
+
"grad_norm": 3.608424425125122,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 1.8768,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.30927835051546393,
|
| 111 |
+
"grad_norm": 1.3351783752441406,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 1.1167,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.32989690721649484,
|
| 118 |
+
"grad_norm": 0.9246235489845276,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.5636,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.35051546391752575,
|
| 125 |
+
"grad_norm": 0.8108372688293457,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.6283,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.3711340206185567,
|
| 132 |
+
"grad_norm": 2.429359197616577,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.6775,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.3917525773195876,
|
| 139 |
+
"grad_norm": 1.0958515405654907,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 0.6208,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.41237113402061853,
|
| 146 |
+
"grad_norm": 0.9364616870880127,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 0.9213,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.4329896907216495,
|
| 153 |
+
"grad_norm": 2.9376447200775146,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 1.0253,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.4536082474226804,
|
| 160 |
+
"grad_norm": 0.9935062527656555,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 1.875,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.4742268041237113,
|
| 167 |
+
"grad_norm": 0.5480442643165588,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 0.5909,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.4948453608247423,
|
| 174 |
+
"grad_norm": 0.5784210562705994,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 0.8151,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.5154639175257731,
|
| 181 |
+
"grad_norm": 1.056527018547058,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 0.7226,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.5360824742268041,
|
| 188 |
+
"grad_norm": 0.8988001346588135,
|
| 189 |
+
"learning_rate": 2e-05,
|
| 190 |
+
"loss": 1.1636,
|
| 191 |
+
"step": 52
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.5567010309278351,
|
| 195 |
+
"grad_norm": 3.2083346843719482,
|
| 196 |
+
"learning_rate": 2e-05,
|
| 197 |
+
"loss": 1.0256,
|
| 198 |
+
"step": 54
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.5773195876288659,
|
| 202 |
+
"grad_norm": 1.1363922357559204,
|
| 203 |
+
"learning_rate": 2e-05,
|
| 204 |
+
"loss": 1.3127,
|
| 205 |
+
"step": 56
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.5979381443298969,
|
| 209 |
+
"grad_norm": 3.8423314094543457,
|
| 210 |
+
"learning_rate": 2e-05,
|
| 211 |
+
"loss": 1.725,
|
| 212 |
+
"step": 58
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.6185567010309279,
|
| 216 |
+
"grad_norm": 1.043357253074646,
|
| 217 |
+
"learning_rate": 2e-05,
|
| 218 |
+
"loss": 1.1183,
|
| 219 |
+
"step": 60
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.6391752577319587,
|
| 223 |
+
"grad_norm": 3.4112141132354736,
|
| 224 |
+
"learning_rate": 2e-05,
|
| 225 |
+
"loss": 1.4553,
|
| 226 |
+
"step": 62
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.6597938144329897,
|
| 230 |
+
"grad_norm": 0.9900936484336853,
|
| 231 |
+
"learning_rate": 2e-05,
|
| 232 |
+
"loss": 0.6987,
|
| 233 |
+
"step": 64
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.6804123711340206,
|
| 237 |
+
"grad_norm": 1.8460667133331299,
|
| 238 |
+
"learning_rate": 2e-05,
|
| 239 |
+
"loss": 1.2903,
|
| 240 |
+
"step": 66
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.7010309278350515,
|
| 244 |
+
"grad_norm": 0.4582524299621582,
|
| 245 |
+
"learning_rate": 2e-05,
|
| 246 |
+
"loss": 1.0396,
|
| 247 |
+
"step": 68
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.7216494845360825,
|
| 251 |
+
"grad_norm": 1.4106889963150024,
|
| 252 |
+
"learning_rate": 2e-05,
|
| 253 |
+
"loss": 1.1477,
|
| 254 |
+
"step": 70
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.7422680412371134,
|
| 258 |
+
"grad_norm": 1.5957890748977661,
|
| 259 |
+
"learning_rate": 2e-05,
|
| 260 |
+
"loss": 1.1479,
|
| 261 |
+
"step": 72
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.7628865979381443,
|
| 265 |
+
"grad_norm": 0.9033636450767517,
|
| 266 |
+
"learning_rate": 2e-05,
|
| 267 |
+
"loss": 1.3359,
|
| 268 |
+
"step": 74
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.7835051546391752,
|
| 272 |
+
"grad_norm": 0.7324110269546509,
|
| 273 |
+
"learning_rate": 2e-05,
|
| 274 |
+
"loss": 0.7437,
|
| 275 |
+
"step": 76
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.8041237113402062,
|
| 279 |
+
"grad_norm": 1.1791794300079346,
|
| 280 |
+
"learning_rate": 2e-05,
|
| 281 |
+
"loss": 1.2022,
|
| 282 |
+
"step": 78
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.8247422680412371,
|
| 286 |
+
"grad_norm": 0.4742436707019806,
|
| 287 |
+
"learning_rate": 2e-05,
|
| 288 |
+
"loss": 0.7892,
|
| 289 |
+
"step": 80
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.845360824742268,
|
| 293 |
+
"grad_norm": 1.18353271484375,
|
| 294 |
+
"learning_rate": 2e-05,
|
| 295 |
+
"loss": 1.077,
|
| 296 |
+
"step": 82
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.865979381443299,
|
| 300 |
+
"grad_norm": 0.9750029444694519,
|
| 301 |
+
"learning_rate": 2e-05,
|
| 302 |
+
"loss": 0.9788,
|
| 303 |
+
"step": 84
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.8865979381443299,
|
| 307 |
+
"grad_norm": 2.6100590229034424,
|
| 308 |
+
"learning_rate": 2e-05,
|
| 309 |
+
"loss": 1.4546,
|
| 310 |
+
"step": 86
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.9072164948453608,
|
| 314 |
+
"grad_norm": 0.9122608304023743,
|
| 315 |
+
"learning_rate": 2e-05,
|
| 316 |
+
"loss": 0.6003,
|
| 317 |
+
"step": 88
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.9278350515463918,
|
| 321 |
+
"grad_norm": 1.024173617362976,
|
| 322 |
+
"learning_rate": 2e-05,
|
| 323 |
+
"loss": 0.7366,
|
| 324 |
+
"step": 90
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 0.9484536082474226,
|
| 328 |
+
"grad_norm": 0.7152318954467773,
|
| 329 |
+
"learning_rate": 2e-05,
|
| 330 |
+
"loss": 0.5065,
|
| 331 |
+
"step": 92
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 0.9690721649484536,
|
| 335 |
+
"grad_norm": 1.7300605773925781,
|
| 336 |
+
"learning_rate": 2e-05,
|
| 337 |
+
"loss": 1.4227,
|
| 338 |
+
"step": 94
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"epoch": 0.9896907216494846,
|
| 342 |
+
"grad_norm": 1.2942312955856323,
|
| 343 |
+
"learning_rate": 2e-05,
|
| 344 |
+
"loss": 0.9952,
|
| 345 |
+
"step": 96
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"epoch": 1.0,
|
| 349 |
+
"step": 97,
|
| 350 |
+
"total_flos": 1.3522822525616128e+16,
|
| 351 |
+
"train_loss": 1.072485186390041,
|
| 352 |
+
"train_runtime": 318.3533,
|
| 353 |
+
"train_samples_per_second": 1.219,
|
| 354 |
+
"train_steps_per_second": 0.305
|
| 355 |
+
}
|
| 356 |
+
],
|
| 357 |
+
"logging_steps": 2,
|
| 358 |
+
"max_steps": 97,
|
| 359 |
+
"num_input_tokens_seen": 0,
|
| 360 |
+
"num_train_epochs": 1,
|
| 361 |
+
"save_steps": 500,
|
| 362 |
+
"stateful_callbacks": {
|
| 363 |
+
"TrainerControl": {
|
| 364 |
+
"args": {
|
| 365 |
+
"should_epoch_stop": false,
|
| 366 |
+
"should_evaluate": false,
|
| 367 |
+
"should_log": false,
|
| 368 |
+
"should_save": false,
|
| 369 |
+
"should_training_stop": false
|
| 370 |
+
},
|
| 371 |
+
"attributes": {}
|
| 372 |
+
}
|
| 373 |
+
},
|
| 374 |
+
"total_flos": 1.3522822525616128e+16,
|
| 375 |
+
"train_batch_size": 1,
|
| 376 |
+
"trial_name": null,
|
| 377 |
+
"trial_params": null
|
| 378 |
+
}
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:502b1f2466482d60ae4368748bd2bbb748c1eef0763cc3198c7c6693b693cda2
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e029bd4f689e14cf52f62173c28505256fdf4c36180fcbd2dce37e046ed0aea4
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9bae73bf0f0ce33ecd007f09d6bc20372df66c432233a979da77558ee0562266
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6efdd67bba7b9f92913a0a5f78a80c0d2f37330882a17c1d4c937e8821b62c1
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:680cc48edce4581b07b94077943e4d9e9f8e437962c27221e4f5579517ee781a
|
| 3 |
+
size 791576546
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:95325cb63bcc0510e8cdf48f2026637a13810fa472c8d4fa373deebf0c544b94
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b7742c3fe0f7d965875edeb90a118edb19499505b66dc7526653285401f38a4d
|
| 3 |
+
size 791576546
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04669bd514938a07c8c573fd80110f73a858b311023e7a52be912ff48f82d6bb
|
| 3 |
+
size 791576546
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_trainer_state.json
ADDED
|
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 97,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.020618556701030927,
|
| 13 |
+
"grad_norm": 2.7177300453186035,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 1.0926,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.041237113402061855,
|
| 20 |
+
"grad_norm": 1.6555100679397583,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 0.6273,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.061855670103092786,
|
| 27 |
+
"grad_norm": 4.007697105407715,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 2.815,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.08247422680412371,
|
| 34 |
+
"grad_norm": 1.5810902118682861,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.7118,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.10309278350515463,
|
| 41 |
+
"grad_norm": 2.4874520301818848,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 1.5025,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.12371134020618557,
|
| 48 |
+
"grad_norm": 1.162537693977356,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.3331,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.14432989690721648,
|
| 55 |
+
"grad_norm": 3.231822967529297,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.7719,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.16494845360824742,
|
| 62 |
+
"grad_norm": 0.9158895015716553,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.6472,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.18556701030927836,
|
| 69 |
+
"grad_norm": 2.0289742946624756,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.7641,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.20618556701030927,
|
| 76 |
+
"grad_norm": 3.32369327545166,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 1.0675,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.2268041237113402,
|
| 83 |
+
"grad_norm": 1.4045751094818115,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.9268,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.24742268041237114,
|
| 90 |
+
"grad_norm": 0.6414214968681335,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 0.5625,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.26804123711340205,
|
| 97 |
+
"grad_norm": 2.570258140563965,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 0.7383,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.28865979381443296,
|
| 104 |
+
"grad_norm": 1.839545488357544,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 1.7098,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.30927835051546393,
|
| 111 |
+
"grad_norm": 1.3686957359313965,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 0.596,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.32989690721649484,
|
| 118 |
+
"grad_norm": 1.3215398788452148,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.3943,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.35051546391752575,
|
| 125 |
+
"grad_norm": 1.1492276191711426,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.8084,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.3711340206185567,
|
| 132 |
+
"grad_norm": 0.21739822626113892,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.5349,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.3917525773195876,
|
| 139 |
+
"grad_norm": 0.766031801700592,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 0.2763,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.41237113402061853,
|
| 146 |
+
"grad_norm": 1.4358357191085815,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 1.0408,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.4329896907216495,
|
| 153 |
+
"grad_norm": 8.695076942443848,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 2.4291,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.4536082474226804,
|
| 160 |
+
"grad_norm": 1.2179341316223145,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 0.5597,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.4742268041237113,
|
| 167 |
+
"grad_norm": 2.2541961669921875,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 2.0812,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.4948453608247423,
|
| 174 |
+
"grad_norm": 1.579424500465393,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 1.7606,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.5154639175257731,
|
| 181 |
+
"grad_norm": 1.6125601530075073,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 1.0159,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.5360824742268041,
|
| 188 |
+
"grad_norm": 0.2814699709415436,
|
| 189 |
+
"learning_rate": 2e-05,
|
| 190 |
+
"loss": 0.3651,
|
| 191 |
+
"step": 52
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.5567010309278351,
|
| 195 |
+
"grad_norm": 2.234740734100342,
|
| 196 |
+
"learning_rate": 2e-05,
|
| 197 |
+
"loss": 0.846,
|
| 198 |
+
"step": 54
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.5773195876288659,
|
| 202 |
+
"grad_norm": 3.462404489517212,
|
| 203 |
+
"learning_rate": 2e-05,
|
| 204 |
+
"loss": 1.4232,
|
| 205 |
+
"step": 56
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.5979381443298969,
|
| 209 |
+
"grad_norm": 1.109181523323059,
|
| 210 |
+
"learning_rate": 2e-05,
|
| 211 |
+
"loss": 0.7863,
|
| 212 |
+
"step": 58
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.6185567010309279,
|
| 216 |
+
"grad_norm": 2.8189926147460938,
|
| 217 |
+
"learning_rate": 2e-05,
|
| 218 |
+
"loss": 0.6734,
|
| 219 |
+
"step": 60
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.6391752577319587,
|
| 223 |
+
"grad_norm": 1.4701120853424072,
|
| 224 |
+
"learning_rate": 2e-05,
|
| 225 |
+
"loss": 0.7984,
|
| 226 |
+
"step": 62
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.6597938144329897,
|
| 230 |
+
"grad_norm": 1.5160466432571411,
|
| 231 |
+
"learning_rate": 2e-05,
|
| 232 |
+
"loss": 0.8513,
|
| 233 |
+
"step": 64
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.6804123711340206,
|
| 237 |
+
"grad_norm": 1.2065556049346924,
|
| 238 |
+
"learning_rate": 2e-05,
|
| 239 |
+
"loss": 0.4933,
|
| 240 |
+
"step": 66
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.7010309278350515,
|
| 244 |
+
"grad_norm": 0.5657169818878174,
|
| 245 |
+
"learning_rate": 2e-05,
|
| 246 |
+
"loss": 0.3078,
|
| 247 |
+
"step": 68
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.7216494845360825,
|
| 251 |
+
"grad_norm": 5.390564441680908,
|
| 252 |
+
"learning_rate": 2e-05,
|
| 253 |
+
"loss": 2.9097,
|
| 254 |
+
"step": 70
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.7422680412371134,
|
| 258 |
+
"grad_norm": 1.6399439573287964,
|
| 259 |
+
"learning_rate": 2e-05,
|
| 260 |
+
"loss": 0.8537,
|
| 261 |
+
"step": 72
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.7628865979381443,
|
| 265 |
+
"grad_norm": 0.35270068049430847,
|
| 266 |
+
"learning_rate": 2e-05,
|
| 267 |
+
"loss": 0.2654,
|
| 268 |
+
"step": 74
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.7835051546391752,
|
| 272 |
+
"grad_norm": 5.137968063354492,
|
| 273 |
+
"learning_rate": 2e-05,
|
| 274 |
+
"loss": 1.5669,
|
| 275 |
+
"step": 76
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.8041237113402062,
|
| 279 |
+
"grad_norm": 1.5322946310043335,
|
| 280 |
+
"learning_rate": 2e-05,
|
| 281 |
+
"loss": 0.874,
|
| 282 |
+
"step": 78
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.8247422680412371,
|
| 286 |
+
"grad_norm": 2.5663018226623535,
|
| 287 |
+
"learning_rate": 2e-05,
|
| 288 |
+
"loss": 1.0716,
|
| 289 |
+
"step": 80
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.845360824742268,
|
| 293 |
+
"grad_norm": 3.668062925338745,
|
| 294 |
+
"learning_rate": 2e-05,
|
| 295 |
+
"loss": 0.6671,
|
| 296 |
+
"step": 82
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.865979381443299,
|
| 300 |
+
"grad_norm": 0.857315182685852,
|
| 301 |
+
"learning_rate": 2e-05,
|
| 302 |
+
"loss": 0.3661,
|
| 303 |
+
"step": 84
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.8865979381443299,
|
| 307 |
+
"grad_norm": 0.98568195104599,
|
| 308 |
+
"learning_rate": 2e-05,
|
| 309 |
+
"loss": 0.67,
|
| 310 |
+
"step": 86
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.9072164948453608,
|
| 314 |
+
"grad_norm": 0.703881561756134,
|
| 315 |
+
"learning_rate": 2e-05,
|
| 316 |
+
"loss": 0.4057,
|
| 317 |
+
"step": 88
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.9278350515463918,
|
| 321 |
+
"grad_norm": 1.4584002494812012,
|
| 322 |
+
"learning_rate": 2e-05,
|
| 323 |
+
"loss": 0.5015,
|
| 324 |
+
"step": 90
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 0.9484536082474226,
|
| 328 |
+
"grad_norm": 2.263429880142212,
|
| 329 |
+
"learning_rate": 2e-05,
|
| 330 |
+
"loss": 1.0602,
|
| 331 |
+
"step": 92
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 0.9690721649484536,
|
| 335 |
+
"grad_norm": 3.9899234771728516,
|
| 336 |
+
"learning_rate": 2e-05,
|
| 337 |
+
"loss": 0.708,
|
| 338 |
+
"step": 94
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"epoch": 0.9896907216494846,
|
| 342 |
+
"grad_norm": 2.3671796321868896,
|
| 343 |
+
"learning_rate": 2e-05,
|
| 344 |
+
"loss": 1.3029,
|
| 345 |
+
"step": 96
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"epoch": 1.0,
|
| 349 |
+
"step": 97,
|
| 350 |
+
"total_flos": 1.5329114357497856e+16,
|
| 351 |
+
"train_loss": 0.9428910275095517,
|
| 352 |
+
"train_runtime": 354.7806,
|
| 353 |
+
"train_samples_per_second": 1.094,
|
| 354 |
+
"train_steps_per_second": 0.273
|
| 355 |
+
}
|
| 356 |
+
],
|
| 357 |
+
"logging_steps": 2,
|
| 358 |
+
"max_steps": 97,
|
| 359 |
+
"num_input_tokens_seen": 0,
|
| 360 |
+
"num_train_epochs": 1,
|
| 361 |
+
"save_steps": 500,
|
| 362 |
+
"stateful_callbacks": {
|
| 363 |
+
"TrainerControl": {
|
| 364 |
+
"args": {
|
| 365 |
+
"should_epoch_stop": false,
|
| 366 |
+
"should_evaluate": false,
|
| 367 |
+
"should_log": false,
|
| 368 |
+
"should_save": false,
|
| 369 |
+
"should_training_stop": false
|
| 370 |
+
},
|
| 371 |
+
"attributes": {}
|
| 372 |
+
}
|
| 373 |
+
},
|
| 374 |
+
"total_flos": 1.5329114357497856e+16,
|
| 375 |
+
"train_batch_size": 1,
|
| 376 |
+
"trial_name": null,
|
| 377 |
+
"trial_params": null
|
| 378 |
+
}
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cedddde0d3b47f691a44fb56df8c587e70ec3e2d98120c9fef229d4e305135b3
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7b935a7315d7aba3f970d2155591a7c025d5e00762112fe9047b296da732a3b5
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:925479cb7b9cfbe13025d105ef9e24aa527cc08e95edb4a5d57f2b34856d503d
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66d397dbb4d650578c2060e73c064390a986d117f67a0fed4b60758c4ec0bd7d
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b453f3c79b662bca0906bc6969023a652f1fd620cf5b08fc3ef0e5e309f5b990
|
| 3 |
+
size 791576546
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a92e1016a632bfe164e293aeb7642d2dfbae7090da04ac409a8a7c4e00fc4226
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:222f280a081854656a1d7ef5f4fb2683f19a421ee895ac0cdae8b26f64893669
|
| 3 |
+
size 791576546
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:77bfb8ce9a4a7531fec21d951852d3dbfc5b9546e416285abdd478013d712647
|
| 3 |
+
size 791576546
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_trainer_state.json
ADDED
|
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 97,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.020618556701030927,
|
| 13 |
+
"grad_norm": 0.7246163487434387,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 1.7324,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.041237113402061855,
|
| 20 |
+
"grad_norm": 0.5284512042999268,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 2.0166,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.061855670103092786,
|
| 27 |
+
"grad_norm": 0.49521467089653015,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 1.3906,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.08247422680412371,
|
| 34 |
+
"grad_norm": 0.5752606987953186,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 1.1865,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.10309278350515463,
|
| 41 |
+
"grad_norm": 0.5735695362091064,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 1.6631,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.12371134020618557,
|
| 48 |
+
"grad_norm": 0.35247695446014404,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 1.5771,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.14432989690721648,
|
| 55 |
+
"grad_norm": 0.7416431903839111,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 1.45,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.16494845360824742,
|
| 62 |
+
"grad_norm": 0.8636322021484375,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 1.3813,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.18556701030927836,
|
| 69 |
+
"grad_norm": 0.6410114765167236,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 1.6523,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.20618556701030927,
|
| 76 |
+
"grad_norm": 0.35148540139198303,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 1.5771,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.2268041237113402,
|
| 83 |
+
"grad_norm": 0.5244173407554626,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 1.5553,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.24742268041237114,
|
| 90 |
+
"grad_norm": 0.6992729306221008,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 1.7803,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.26804123711340205,
|
| 97 |
+
"grad_norm": 0.430820494890213,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 1.6299,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.28865979381443296,
|
| 104 |
+
"grad_norm": 0.4983491003513336,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 1.321,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.30927835051546393,
|
| 111 |
+
"grad_norm": 0.45274442434310913,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 1.4834,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.32989690721649484,
|
| 118 |
+
"grad_norm": 0.653442919254303,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 1.5889,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.35051546391752575,
|
| 125 |
+
"grad_norm": 0.42392614483833313,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 1.3359,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.3711340206185567,
|
| 132 |
+
"grad_norm": 0.49543508887290955,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 1.7627,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.3917525773195876,
|
| 139 |
+
"grad_norm": 0.4652840793132782,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 1.4604,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.41237113402061853,
|
| 146 |
+
"grad_norm": 0.6317560076713562,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 1.6349,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.4329896907216495,
|
| 153 |
+
"grad_norm": 0.5806442499160767,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 1.6519,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.4536082474226804,
|
| 160 |
+
"grad_norm": 0.568714439868927,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 1.4482,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.4742268041237113,
|
| 167 |
+
"grad_norm": 0.6616501808166504,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 1.5454,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.4948453608247423,
|
| 174 |
+
"grad_norm": 1.0585500001907349,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 1.6943,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.5154639175257731,
|
| 181 |
+
"grad_norm": 0.5242922306060791,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 1.5273,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.5360824742268041,
|
| 188 |
+
"grad_norm": 0.6620081067085266,
|
| 189 |
+
"learning_rate": 2e-05,
|
| 190 |
+
"loss": 1.6641,
|
| 191 |
+
"step": 52
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.5567010309278351,
|
| 195 |
+
"grad_norm": 0.8729976415634155,
|
| 196 |
+
"learning_rate": 2e-05,
|
| 197 |
+
"loss": 1.3711,
|
| 198 |
+
"step": 54
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.5773195876288659,
|
| 202 |
+
"grad_norm": 0.5533149838447571,
|
| 203 |
+
"learning_rate": 2e-05,
|
| 204 |
+
"loss": 1.5024,
|
| 205 |
+
"step": 56
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.5979381443298969,
|
| 209 |
+
"grad_norm": 0.6843786239624023,
|
| 210 |
+
"learning_rate": 2e-05,
|
| 211 |
+
"loss": 1.221,
|
| 212 |
+
"step": 58
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.6185567010309279,
|
| 216 |
+
"grad_norm": 0.6899279952049255,
|
| 217 |
+
"learning_rate": 2e-05,
|
| 218 |
+
"loss": 1.6211,
|
| 219 |
+
"step": 60
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.6391752577319587,
|
| 223 |
+
"grad_norm": 0.8329319357872009,
|
| 224 |
+
"learning_rate": 2e-05,
|
| 225 |
+
"loss": 1.4404,
|
| 226 |
+
"step": 62
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.6597938144329897,
|
| 230 |
+
"grad_norm": 0.557767391204834,
|
| 231 |
+
"learning_rate": 2e-05,
|
| 232 |
+
"loss": 1.4336,
|
| 233 |
+
"step": 64
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.6804123711340206,
|
| 237 |
+
"grad_norm": 0.34819966554641724,
|
| 238 |
+
"learning_rate": 2e-05,
|
| 239 |
+
"loss": 1.3477,
|
| 240 |
+
"step": 66
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.7010309278350515,
|
| 244 |
+
"grad_norm": 0.7103057503700256,
|
| 245 |
+
"learning_rate": 2e-05,
|
| 246 |
+
"loss": 1.5186,
|
| 247 |
+
"step": 68
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.7216494845360825,
|
| 251 |
+
"grad_norm": 0.5542501211166382,
|
| 252 |
+
"learning_rate": 2e-05,
|
| 253 |
+
"loss": 1.4287,
|
| 254 |
+
"step": 70
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.7422680412371134,
|
| 258 |
+
"grad_norm": 0.5763942003250122,
|
| 259 |
+
"learning_rate": 2e-05,
|
| 260 |
+
"loss": 1.1259,
|
| 261 |
+
"step": 72
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.7628865979381443,
|
| 265 |
+
"grad_norm": 0.7207664847373962,
|
| 266 |
+
"learning_rate": 2e-05,
|
| 267 |
+
"loss": 1.3911,
|
| 268 |
+
"step": 74
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.7835051546391752,
|
| 272 |
+
"grad_norm": 0.3724093437194824,
|
| 273 |
+
"learning_rate": 2e-05,
|
| 274 |
+
"loss": 1.0917,
|
| 275 |
+
"step": 76
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.8041237113402062,
|
| 279 |
+
"grad_norm": 0.7380734086036682,
|
| 280 |
+
"learning_rate": 2e-05,
|
| 281 |
+
"loss": 1.4736,
|
| 282 |
+
"step": 78
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.8247422680412371,
|
| 286 |
+
"grad_norm": 0.7758538722991943,
|
| 287 |
+
"learning_rate": 2e-05,
|
| 288 |
+
"loss": 1.9409,
|
| 289 |
+
"step": 80
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.845360824742268,
|
| 293 |
+
"grad_norm": 1.419058084487915,
|
| 294 |
+
"learning_rate": 2e-05,
|
| 295 |
+
"loss": 1.0337,
|
| 296 |
+
"step": 82
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.865979381443299,
|
| 300 |
+
"grad_norm": 1.1800168752670288,
|
| 301 |
+
"learning_rate": 2e-05,
|
| 302 |
+
"loss": 1.5811,
|
| 303 |
+
"step": 84
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.8865979381443299,
|
| 307 |
+
"grad_norm": 1.0631232261657715,
|
| 308 |
+
"learning_rate": 2e-05,
|
| 309 |
+
"loss": 1.6145,
|
| 310 |
+
"step": 86
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.9072164948453608,
|
| 314 |
+
"grad_norm": 0.35026878118515015,
|
| 315 |
+
"learning_rate": 2e-05,
|
| 316 |
+
"loss": 1.313,
|
| 317 |
+
"step": 88
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.9278350515463918,
|
| 321 |
+
"grad_norm": 1.1856135129928589,
|
| 322 |
+
"learning_rate": 2e-05,
|
| 323 |
+
"loss": 1.0017,
|
| 324 |
+
"step": 90
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 0.9484536082474226,
|
| 328 |
+
"grad_norm": 0.42485421895980835,
|
| 329 |
+
"learning_rate": 2e-05,
|
| 330 |
+
"loss": 0.7556,
|
| 331 |
+
"step": 92
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 0.9690721649484536,
|
| 335 |
+
"grad_norm": 1.0420420169830322,
|
| 336 |
+
"learning_rate": 2e-05,
|
| 337 |
+
"loss": 1.2177,
|
| 338 |
+
"step": 94
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"epoch": 0.9896907216494846,
|
| 342 |
+
"grad_norm": 3.806156873703003,
|
| 343 |
+
"learning_rate": 2e-05,
|
| 344 |
+
"loss": 1.3557,
|
| 345 |
+
"step": 96
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"epoch": 1.0,
|
| 349 |
+
"step": 97,
|
| 350 |
+
"total_flos": 2.080121124238131e+16,
|
| 351 |
+
"train_loss": 1.4653891337286566,
|
| 352 |
+
"train_runtime": 338.7153,
|
| 353 |
+
"train_samples_per_second": 1.146,
|
| 354 |
+
"train_steps_per_second": 0.286
|
| 355 |
+
}
|
| 356 |
+
],
|
| 357 |
+
"logging_steps": 2,
|
| 358 |
+
"max_steps": 97,
|
| 359 |
+
"num_input_tokens_seen": 0,
|
| 360 |
+
"num_train_epochs": 1,
|
| 361 |
+
"save_steps": 500,
|
| 362 |
+
"stateful_callbacks": {
|
| 363 |
+
"TrainerControl": {
|
| 364 |
+
"args": {
|
| 365 |
+
"should_epoch_stop": false,
|
| 366 |
+
"should_evaluate": false,
|
| 367 |
+
"should_log": false,
|
| 368 |
+
"should_save": false,
|
| 369 |
+
"should_training_stop": false
|
| 370 |
+
},
|
| 371 |
+
"attributes": {}
|
| 372 |
+
}
|
| 373 |
+
},
|
| 374 |
+
"total_flos": 2.080121124238131e+16,
|
| 375 |
+
"train_batch_size": 1,
|
| 376 |
+
"trial_name": null,
|
| 377 |
+
"trial_params": null
|
| 378 |
+
}
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:78497632d8d36d7c8168d50e50a254157fc769f9322543ee181269094d28c724
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:162cde5be279d581b1f83ee039cc749ade6ab4ab374af926bad62c033d1dfba4
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:97d4a68f4353dabd81f17a777377c46d55712b0b65c491aa7fbfdbbbdf077a66
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7738f10699765a7bc84949fc0529653275795f44b59cfcc9ca3fcedaea62efcb
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6fec5310decb3f5a8a11e6204443c2e3c9371d7d258ede1f21b69d71da6ae563
|
| 3 |
+
size 791576546
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bcb887b29c54ad36b3482c04658bef8c0c28760138560e9ef4edcd885bf7c3b8
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04ac06241cdb5fb3b5ea1ce74c2d4a77507ba600b8cb9b1ee6f62ef1e75be7b7
|
| 3 |
+
size 791576546
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f8f0fe1e2e6495fc31e76e3cb7514d92d767bdc152b3aa1e3379ff7f8810311
|
| 3 |
+
size 791576546
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_trainer_state.json
ADDED
|
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 97,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.020618556701030927,
|
| 13 |
+
"grad_norm": 1.5516194105148315,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.7183,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.041237113402061855,
|
| 20 |
+
"grad_norm": 1.6191949844360352,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 1.0957,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.061855670103092786,
|
| 27 |
+
"grad_norm": 1.2471156120300293,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 0.7448,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.08247422680412371,
|
| 34 |
+
"grad_norm": 3.331115484237671,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.8217,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.10309278350515463,
|
| 41 |
+
"grad_norm": 4.650871276855469,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 3.3426,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.12371134020618557,
|
| 48 |
+
"grad_norm": 3.3392367362976074,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 1.2304,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.14432989690721648,
|
| 55 |
+
"grad_norm": 0.18230539560317993,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.8133,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.16494845360824742,
|
| 62 |
+
"grad_norm": 1.041199803352356,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 1.1371,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.18556701030927836,
|
| 69 |
+
"grad_norm": 0.7668745517730713,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.9596,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.20618556701030927,
|
| 76 |
+
"grad_norm": 2.2554526329040527,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 1.6985,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.2268041237113402,
|
| 83 |
+
"grad_norm": 0.37597593665122986,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.9554,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.24742268041237114,
|
| 90 |
+
"grad_norm": 1.1484178304672241,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 1.4816,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.26804123711340205,
|
| 97 |
+
"grad_norm": 0.9884140491485596,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 1.0887,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.28865979381443296,
|
| 104 |
+
"grad_norm": 2.5340919494628906,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 1.1622,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.30927835051546393,
|
| 111 |
+
"grad_norm": 3.164740562438965,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 2.0013,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.32989690721649484,
|
| 118 |
+
"grad_norm": 1.7155555486679077,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 1.2095,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.35051546391752575,
|
| 125 |
+
"grad_norm": 0.952375054359436,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.8851,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.3711340206185567,
|
| 132 |
+
"grad_norm": 2.5268073081970215,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 1.4144,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.3917525773195876,
|
| 139 |
+
"grad_norm": 3.838146924972534,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 1.3506,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.41237113402061853,
|
| 146 |
+
"grad_norm": 1.9546507596969604,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 1.6561,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.4329896907216495,
|
| 153 |
+
"grad_norm": 1.979039192199707,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.8316,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.4536082474226804,
|
| 160 |
+
"grad_norm": 1.025802731513977,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 1.9942,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.4742268041237113,
|
| 167 |
+
"grad_norm": 0.5559583902359009,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 1.0033,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.4948453608247423,
|
| 174 |
+
"grad_norm": 0.6625217199325562,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 0.7507,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.5154639175257731,
|
| 181 |
+
"grad_norm": 4.655178070068359,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 1.1542,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.5360824742268041,
|
| 188 |
+
"grad_norm": 0.9145403504371643,
|
| 189 |
+
"learning_rate": 2e-05,
|
| 190 |
+
"loss": 1.3025,
|
| 191 |
+
"step": 52
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.5567010309278351,
|
| 195 |
+
"grad_norm": 1.8665916919708252,
|
| 196 |
+
"learning_rate": 2e-05,
|
| 197 |
+
"loss": 1.5428,
|
| 198 |
+
"step": 54
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.5773195876288659,
|
| 202 |
+
"grad_norm": 1.5650798082351685,
|
| 203 |
+
"learning_rate": 2e-05,
|
| 204 |
+
"loss": 1.4144,
|
| 205 |
+
"step": 56
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.5979381443298969,
|
| 209 |
+
"grad_norm": 2.973200559616089,
|
| 210 |
+
"learning_rate": 2e-05,
|
| 211 |
+
"loss": 1.6578,
|
| 212 |
+
"step": 58
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.6185567010309279,
|
| 216 |
+
"grad_norm": 2.3732833862304688,
|
| 217 |
+
"learning_rate": 2e-05,
|
| 218 |
+
"loss": 1.0927,
|
| 219 |
+
"step": 60
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.6391752577319587,
|
| 223 |
+
"grad_norm": 1.011518120765686,
|
| 224 |
+
"learning_rate": 2e-05,
|
| 225 |
+
"loss": 0.7655,
|
| 226 |
+
"step": 62
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.6597938144329897,
|
| 230 |
+
"grad_norm": 2.6628100872039795,
|
| 231 |
+
"learning_rate": 2e-05,
|
| 232 |
+
"loss": 1.0994,
|
| 233 |
+
"step": 64
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.6804123711340206,
|
| 237 |
+
"grad_norm": 1.2325425148010254,
|
| 238 |
+
"learning_rate": 2e-05,
|
| 239 |
+
"loss": 1.7863,
|
| 240 |
+
"step": 66
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.7010309278350515,
|
| 244 |
+
"grad_norm": 0.7516754865646362,
|
| 245 |
+
"learning_rate": 2e-05,
|
| 246 |
+
"loss": 0.6101,
|
| 247 |
+
"step": 68
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.7216494845360825,
|
| 251 |
+
"grad_norm": 0.32088345289230347,
|
| 252 |
+
"learning_rate": 2e-05,
|
| 253 |
+
"loss": 1.3487,
|
| 254 |
+
"step": 70
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.7422680412371134,
|
| 258 |
+
"grad_norm": 1.3543070554733276,
|
| 259 |
+
"learning_rate": 2e-05,
|
| 260 |
+
"loss": 1.4045,
|
| 261 |
+
"step": 72
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.7628865979381443,
|
| 265 |
+
"grad_norm": 2.9448294639587402,
|
| 266 |
+
"learning_rate": 2e-05,
|
| 267 |
+
"loss": 1.6153,
|
| 268 |
+
"step": 74
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.7835051546391752,
|
| 272 |
+
"grad_norm": 1.3423410654067993,
|
| 273 |
+
"learning_rate": 2e-05,
|
| 274 |
+
"loss": 0.9661,
|
| 275 |
+
"step": 76
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.8041237113402062,
|
| 279 |
+
"grad_norm": 1.4557067155838013,
|
| 280 |
+
"learning_rate": 2e-05,
|
| 281 |
+
"loss": 0.5707,
|
| 282 |
+
"step": 78
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.8247422680412371,
|
| 286 |
+
"grad_norm": 0.8707636594772339,
|
| 287 |
+
"learning_rate": 2e-05,
|
| 288 |
+
"loss": 0.7858,
|
| 289 |
+
"step": 80
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.845360824742268,
|
| 293 |
+
"grad_norm": 0.8935154676437378,
|
| 294 |
+
"learning_rate": 2e-05,
|
| 295 |
+
"loss": 0.9482,
|
| 296 |
+
"step": 82
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.865979381443299,
|
| 300 |
+
"grad_norm": 1.1275886297225952,
|
| 301 |
+
"learning_rate": 2e-05,
|
| 302 |
+
"loss": 0.8253,
|
| 303 |
+
"step": 84
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.8865979381443299,
|
| 307 |
+
"grad_norm": 1.0190774202346802,
|
| 308 |
+
"learning_rate": 2e-05,
|
| 309 |
+
"loss": 1.3923,
|
| 310 |
+
"step": 86
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.9072164948453608,
|
| 314 |
+
"grad_norm": 2.239851236343384,
|
| 315 |
+
"learning_rate": 2e-05,
|
| 316 |
+
"loss": 1.8538,
|
| 317 |
+
"step": 88
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.9278350515463918,
|
| 321 |
+
"grad_norm": 0.48686838150024414,
|
| 322 |
+
"learning_rate": 2e-05,
|
| 323 |
+
"loss": 0.5748,
|
| 324 |
+
"step": 90
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 0.9484536082474226,
|
| 328 |
+
"grad_norm": 2.387908458709717,
|
| 329 |
+
"learning_rate": 2e-05,
|
| 330 |
+
"loss": 1.2788,
|
| 331 |
+
"step": 92
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 0.9690721649484536,
|
| 335 |
+
"grad_norm": 1.8753933906555176,
|
| 336 |
+
"learning_rate": 2e-05,
|
| 337 |
+
"loss": 2.16,
|
| 338 |
+
"step": 94
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"epoch": 0.9896907216494846,
|
| 342 |
+
"grad_norm": 1.7403647899627686,
|
| 343 |
+
"learning_rate": 2e-05,
|
| 344 |
+
"loss": 1.7309,
|
| 345 |
+
"step": 96
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"epoch": 1.0,
|
| 349 |
+
"step": 97,
|
| 350 |
+
"total_flos": 1.975033480085504e+16,
|
| 351 |
+
"train_loss": 1.2463067905190064,
|
| 352 |
+
"train_runtime": 346.1883,
|
| 353 |
+
"train_samples_per_second": 1.121,
|
| 354 |
+
"train_steps_per_second": 0.28
|
| 355 |
+
}
|
| 356 |
+
],
|
| 357 |
+
"logging_steps": 2,
|
| 358 |
+
"max_steps": 97,
|
| 359 |
+
"num_input_tokens_seen": 0,
|
| 360 |
+
"num_train_epochs": 1,
|
| 361 |
+
"save_steps": 500,
|
| 362 |
+
"stateful_callbacks": {
|
| 363 |
+
"TrainerControl": {
|
| 364 |
+
"args": {
|
| 365 |
+
"should_epoch_stop": false,
|
| 366 |
+
"should_evaluate": false,
|
| 367 |
+
"should_log": false,
|
| 368 |
+
"should_save": false,
|
| 369 |
+
"should_training_stop": false
|
| 370 |
+
},
|
| 371 |
+
"attributes": {}
|
| 372 |
+
}
|
| 373 |
+
},
|
| 374 |
+
"total_flos": 1.975033480085504e+16,
|
| 375 |
+
"train_batch_size": 1,
|
| 376 |
+
"trial_name": null,
|
| 377 |
+
"trial_params": null
|
| 378 |
+
}
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d9cb7c5fddaca66205242d49af22fb676315f063b1cc2418d18fa177390924f2
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:46eade7f1fea0f1e68e3363d9a8e4ff52e5b7305d34a81ea1379557122759d20
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b086fbc699e0302ee4ae254a9237a6f8d5448810e5885c35f09b7692fb3ba4ee
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e663701e0f36fb7a017f31c1bc0702e47930c84858f19bff02e2417a980ea566
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d332bf7bdd3e9cae1a94a5236886a591c59c6ce26033bc733ef93822982945ba
|
| 3 |
+
size 791576546
|