Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/0_client_model_round10.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/0_client_model_round12.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/0_client_model_round15.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/0_client_model_round17.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/0_client_model_round2.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/0_client_model_round20.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/0_client_model_round5.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/0_client_model_round7.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/0_trainer_state.json +378 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/1_client_model_round10.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/1_client_model_round12.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/1_client_model_round15.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/1_client_model_round17.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/1_client_model_round2.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/1_client_model_round20.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/1_client_model_round5.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/1_client_model_round7.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/1_trainer_state.json +378 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/2_client_model_round10.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/2_client_model_round12.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/2_client_model_round15.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/2_client_model_round17.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/2_client_model_round2.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/2_client_model_round20.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/2_client_model_round5.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/2_client_model_round7.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/2_trainer_state.json +378 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/3_client_model_round10.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/3_client_model_round12.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/3_client_model_round15.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/3_client_model_round17.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/3_client_model_round2.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/3_client_model_round20.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/3_client_model_round5.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/3_client_model_round7.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/3_trainer_state.json +378 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/4_client_model_round10.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/4_client_model_round12.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/4_client_model_round15.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/4_client_model_round17.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/4_client_model_round2.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/4_client_model_round20.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/4_client_model_round5.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/4_client_model_round7.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/4_trainer_state.json +378 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/5_client_model_round10.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/5_client_model_round12.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/5_client_model_round15.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/5_client_model_round17.pth +3 -0
- client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/5_client_model_round2.pth +3 -0
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/0_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb8939248f6742205c74e4d0b02750aefa474ef978b7722b5b8fef38a0ec24ef
|
| 3 |
+
size 369838470
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/0_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c782658b6ced9b40c3e420262e6075c80e0b89ca5f921b8c190b2670f4a36dc
|
| 3 |
+
size 369838470
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/0_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f0fea615f521b0c0202f69379c81d14254f0c54443f27ac4d19b87e5cf04878
|
| 3 |
+
size 369838470
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/0_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ca21aa117d912df1e4f50d72ef9daa534c4fa4f403b33b7d3c58723b8d89193
|
| 3 |
+
size 369838470
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/0_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cca4e099b36d6863ceb5d302655f1b458a1076208614c9329ae93f7ca42334f3
|
| 3 |
+
size 369837282
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/0_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ed831f732e62bb1a6e285fdec0028bbf35eea8378c5df59cadd26625eebe684
|
| 3 |
+
size 369838470
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/0_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a5b4c04db8c5afb525937c3c48687f5e38ea4058e4ced0910bdb026e8b75afad
|
| 3 |
+
size 369837282
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/0_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:08b5c9a1cd78c0d3c2fa9485b5fd227f0630306d390bc9b6efd95f07e8275c0f
|
| 3 |
+
size 369837282
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/0_trainer_state.json
ADDED
|
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 97,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.020618556701030927,
|
| 13 |
+
"grad_norm": 1.7885549068450928,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 1.3158,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.041237113402061855,
|
| 20 |
+
"grad_norm": 3.5465033054351807,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 1.4782,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.061855670103092786,
|
| 27 |
+
"grad_norm": 2.387897491455078,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 1.4943,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.08247422680412371,
|
| 34 |
+
"grad_norm": 1.9758349657058716,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 1.6686,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.10309278350515463,
|
| 41 |
+
"grad_norm": 1.9711047410964966,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 0.8049,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.12371134020618557,
|
| 48 |
+
"grad_norm": 1.0105559825897217,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.3966,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.14432989690721648,
|
| 55 |
+
"grad_norm": 1.9358932971954346,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 1.779,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.16494845360824742,
|
| 62 |
+
"grad_norm": 0.6600197553634644,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 1.0865,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.18556701030927836,
|
| 69 |
+
"grad_norm": 2.0319929122924805,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 1.0487,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.20618556701030927,
|
| 76 |
+
"grad_norm": 1.7845250368118286,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 1.0636,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.2268041237113402,
|
| 83 |
+
"grad_norm": 2.1108617782592773,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.7817,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.24742268041237114,
|
| 90 |
+
"grad_norm": 0.574131190776825,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 0.4896,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.26804123711340205,
|
| 97 |
+
"grad_norm": 4.136038303375244,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 1.6166,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.28865979381443296,
|
| 104 |
+
"grad_norm": 4.69835090637207,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 0.5229,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.30927835051546393,
|
| 111 |
+
"grad_norm": 0.6893218755722046,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 0.166,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.32989690721649484,
|
| 118 |
+
"grad_norm": 0.9169237613677979,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 1.2709,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.35051546391752575,
|
| 125 |
+
"grad_norm": 2.487178325653076,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 1.0059,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.3711340206185567,
|
| 132 |
+
"grad_norm": 1.6276919841766357,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 1.1054,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.3917525773195876,
|
| 139 |
+
"grad_norm": 3.6395184993743896,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 1.5661,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.41237113402061853,
|
| 146 |
+
"grad_norm": 1.6149156093597412,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 1.0574,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.4329896907216495,
|
| 153 |
+
"grad_norm": 3.7386510372161865,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.7777,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.4536082474226804,
|
| 160 |
+
"grad_norm": 3.9839937686920166,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 0.912,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.4742268041237113,
|
| 167 |
+
"grad_norm": 5.850351810455322,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 1.7357,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.4948453608247423,
|
| 174 |
+
"grad_norm": 3.660813093185425,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 1.0512,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.5154639175257731,
|
| 181 |
+
"grad_norm": 14.106647491455078,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 1.4082,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.5360824742268041,
|
| 188 |
+
"grad_norm": 3.222626209259033,
|
| 189 |
+
"learning_rate": 2e-05,
|
| 190 |
+
"loss": 0.6463,
|
| 191 |
+
"step": 52
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.5567010309278351,
|
| 195 |
+
"grad_norm": 4.6796650886535645,
|
| 196 |
+
"learning_rate": 2e-05,
|
| 197 |
+
"loss": 1.3084,
|
| 198 |
+
"step": 54
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.5773195876288659,
|
| 202 |
+
"grad_norm": 2.90836238861084,
|
| 203 |
+
"learning_rate": 2e-05,
|
| 204 |
+
"loss": 1.3372,
|
| 205 |
+
"step": 56
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.5979381443298969,
|
| 209 |
+
"grad_norm": 3.991719961166382,
|
| 210 |
+
"learning_rate": 2e-05,
|
| 211 |
+
"loss": 1.3481,
|
| 212 |
+
"step": 58
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.6185567010309279,
|
| 216 |
+
"grad_norm": 1.9187226295471191,
|
| 217 |
+
"learning_rate": 2e-05,
|
| 218 |
+
"loss": 0.6076,
|
| 219 |
+
"step": 60
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.6391752577319587,
|
| 223 |
+
"grad_norm": 1.9427756071090698,
|
| 224 |
+
"learning_rate": 2e-05,
|
| 225 |
+
"loss": 0.6295,
|
| 226 |
+
"step": 62
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.6597938144329897,
|
| 230 |
+
"grad_norm": 2.391686201095581,
|
| 231 |
+
"learning_rate": 2e-05,
|
| 232 |
+
"loss": 1.135,
|
| 233 |
+
"step": 64
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.6804123711340206,
|
| 237 |
+
"grad_norm": 1.3331944942474365,
|
| 238 |
+
"learning_rate": 2e-05,
|
| 239 |
+
"loss": 1.3263,
|
| 240 |
+
"step": 66
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.7010309278350515,
|
| 244 |
+
"grad_norm": 0.4943288564682007,
|
| 245 |
+
"learning_rate": 2e-05,
|
| 246 |
+
"loss": 0.8955,
|
| 247 |
+
"step": 68
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.7216494845360825,
|
| 251 |
+
"grad_norm": 2.617217540740967,
|
| 252 |
+
"learning_rate": 2e-05,
|
| 253 |
+
"loss": 0.7329,
|
| 254 |
+
"step": 70
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.7422680412371134,
|
| 258 |
+
"grad_norm": 1.3934130668640137,
|
| 259 |
+
"learning_rate": 2e-05,
|
| 260 |
+
"loss": 1.1683,
|
| 261 |
+
"step": 72
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.7628865979381443,
|
| 265 |
+
"grad_norm": 0.945609450340271,
|
| 266 |
+
"learning_rate": 2e-05,
|
| 267 |
+
"loss": 0.6058,
|
| 268 |
+
"step": 74
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.7835051546391752,
|
| 272 |
+
"grad_norm": 1.7555233240127563,
|
| 273 |
+
"learning_rate": 2e-05,
|
| 274 |
+
"loss": 0.7444,
|
| 275 |
+
"step": 76
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.8041237113402062,
|
| 279 |
+
"grad_norm": 1.6663618087768555,
|
| 280 |
+
"learning_rate": 2e-05,
|
| 281 |
+
"loss": 0.6543,
|
| 282 |
+
"step": 78
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.8247422680412371,
|
| 286 |
+
"grad_norm": 2.9476871490478516,
|
| 287 |
+
"learning_rate": 2e-05,
|
| 288 |
+
"loss": 0.7212,
|
| 289 |
+
"step": 80
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.845360824742268,
|
| 293 |
+
"grad_norm": 4.171327590942383,
|
| 294 |
+
"learning_rate": 2e-05,
|
| 295 |
+
"loss": 1.0781,
|
| 296 |
+
"step": 82
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.865979381443299,
|
| 300 |
+
"grad_norm": 1.2571913003921509,
|
| 301 |
+
"learning_rate": 2e-05,
|
| 302 |
+
"loss": 1.1902,
|
| 303 |
+
"step": 84
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.8865979381443299,
|
| 307 |
+
"grad_norm": 2.324883222579956,
|
| 308 |
+
"learning_rate": 2e-05,
|
| 309 |
+
"loss": 1.4357,
|
| 310 |
+
"step": 86
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.9072164948453608,
|
| 314 |
+
"grad_norm": 1.769622564315796,
|
| 315 |
+
"learning_rate": 2e-05,
|
| 316 |
+
"loss": 0.6369,
|
| 317 |
+
"step": 88
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.9278350515463918,
|
| 321 |
+
"grad_norm": 1.4626708030700684,
|
| 322 |
+
"learning_rate": 2e-05,
|
| 323 |
+
"loss": 0.9478,
|
| 324 |
+
"step": 90
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 0.9484536082474226,
|
| 328 |
+
"grad_norm": 1.5452158451080322,
|
| 329 |
+
"learning_rate": 2e-05,
|
| 330 |
+
"loss": 1.5981,
|
| 331 |
+
"step": 92
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 0.9690721649484536,
|
| 335 |
+
"grad_norm": 1.4196434020996094,
|
| 336 |
+
"learning_rate": 2e-05,
|
| 337 |
+
"loss": 1.1368,
|
| 338 |
+
"step": 94
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"epoch": 0.9896907216494846,
|
| 342 |
+
"grad_norm": 0.6402710676193237,
|
| 343 |
+
"learning_rate": 2e-05,
|
| 344 |
+
"loss": 0.5329,
|
| 345 |
+
"step": 96
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"epoch": 1.0,
|
| 349 |
+
"step": 97,
|
| 350 |
+
"total_flos": 5756746026975232.0,
|
| 351 |
+
"train_loss": 1.0383215835413981,
|
| 352 |
+
"train_runtime": 283.7705,
|
| 353 |
+
"train_samples_per_second": 1.367,
|
| 354 |
+
"train_steps_per_second": 0.342
|
| 355 |
+
}
|
| 356 |
+
],
|
| 357 |
+
"logging_steps": 2,
|
| 358 |
+
"max_steps": 97,
|
| 359 |
+
"num_input_tokens_seen": 0,
|
| 360 |
+
"num_train_epochs": 1,
|
| 361 |
+
"save_steps": 500,
|
| 362 |
+
"stateful_callbacks": {
|
| 363 |
+
"TrainerControl": {
|
| 364 |
+
"args": {
|
| 365 |
+
"should_epoch_stop": false,
|
| 366 |
+
"should_evaluate": false,
|
| 367 |
+
"should_log": false,
|
| 368 |
+
"should_save": false,
|
| 369 |
+
"should_training_stop": false
|
| 370 |
+
},
|
| 371 |
+
"attributes": {}
|
| 372 |
+
}
|
| 373 |
+
},
|
| 374 |
+
"total_flos": 5756746026975232.0,
|
| 375 |
+
"train_batch_size": 1,
|
| 376 |
+
"trial_name": null,
|
| 377 |
+
"trial_params": null
|
| 378 |
+
}
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/1_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:370b1db79ae79307d373e52fdcf5846624e6f6ad230ac369eea386c427fa22d5
|
| 3 |
+
size 794708086
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/1_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:786ed4479b1a809eb89d5a753dc89448dabc319918eb6df9d1f33dce7183d8bb
|
| 3 |
+
size 794708086
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/1_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8e8c960fb7271256592ac9103b14201ea74c547f243dfaf14d27b6877499b29d
|
| 3 |
+
size 794708086
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/1_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:342f39308650a50e27c3384006998d380b4ee8782565da8a8c785cad37e94e9e
|
| 3 |
+
size 794708086
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/1_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e2933a0448457cc81da095354056445031a0197d4f32ede406420cdfff126ed
|
| 3 |
+
size 794706058
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/1_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1f8621d8d5526426122df9113b89e3e07402e213bbca7ae98592879d054ce024
|
| 3 |
+
size 794708086
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/1_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d220fc720bc62776c37d47fd80f77cd33928fd8f26fecac46b6fb031eb4f03c
|
| 3 |
+
size 794706058
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/1_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e1e2d3519b28a2f33deb11d7a2212f7fda010f1cad33e0a67eeb7ad4655b1db0
|
| 3 |
+
size 794706058
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/1_trainer_state.json
ADDED
|
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 97,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.020618556701030927,
|
| 13 |
+
"grad_norm": 1.8986397981643677,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.9165,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.041237113402061855,
|
| 20 |
+
"grad_norm": 0.8982141017913818,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 0.7158,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.061855670103092786,
|
| 27 |
+
"grad_norm": 1.1869165897369385,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 1.4353,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.08247422680412371,
|
| 34 |
+
"grad_norm": 1.4538377523422241,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.4816,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.10309278350515463,
|
| 41 |
+
"grad_norm": 1.2608749866485596,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 1.1197,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.12371134020618557,
|
| 48 |
+
"grad_norm": 1.5531688928604126,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 1.1562,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.14432989690721648,
|
| 55 |
+
"grad_norm": 2.4403159618377686,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 1.1279,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.16494845360824742,
|
| 62 |
+
"grad_norm": 1.2706080675125122,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 1.3482,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.18556701030927836,
|
| 69 |
+
"grad_norm": 0.8720521926879883,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 1.1394,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.20618556701030927,
|
| 76 |
+
"grad_norm": 1.1683954000473022,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.8083,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.2268041237113402,
|
| 83 |
+
"grad_norm": 1.2726197242736816,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 1.0113,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.24742268041237114,
|
| 90 |
+
"grad_norm": 1.0261338949203491,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 0.6729,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.26804123711340205,
|
| 97 |
+
"grad_norm": 5.61696195602417,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 1.2334,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.28865979381443296,
|
| 104 |
+
"grad_norm": 1.71990168094635,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 0.9639,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.30927835051546393,
|
| 111 |
+
"grad_norm": 2.458183526992798,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 1.0067,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.32989690721649484,
|
| 118 |
+
"grad_norm": 2.6437785625457764,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.9413,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.35051546391752575,
|
| 125 |
+
"grad_norm": 1.7311123609542847,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.5072,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.3711340206185567,
|
| 132 |
+
"grad_norm": 1.6367141008377075,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.628,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.3917525773195876,
|
| 139 |
+
"grad_norm": 0.9919836521148682,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 0.3894,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.41237113402061853,
|
| 146 |
+
"grad_norm": 0.5254012942314148,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 0.5094,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.4329896907216495,
|
| 153 |
+
"grad_norm": 1.6975247859954834,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.6632,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.4536082474226804,
|
| 160 |
+
"grad_norm": 2.5024383068084717,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 1.181,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.4742268041237113,
|
| 167 |
+
"grad_norm": 3.767259359359741,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 1.7095,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.4948453608247423,
|
| 174 |
+
"grad_norm": 1.6667107343673706,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 1.007,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.5154639175257731,
|
| 181 |
+
"grad_norm": 2.309662103652954,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 1.4642,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.5360824742268041,
|
| 188 |
+
"grad_norm": 1.091732382774353,
|
| 189 |
+
"learning_rate": 2e-05,
|
| 190 |
+
"loss": 0.7542,
|
| 191 |
+
"step": 52
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.5567010309278351,
|
| 195 |
+
"grad_norm": 0.6196984648704529,
|
| 196 |
+
"learning_rate": 2e-05,
|
| 197 |
+
"loss": 0.5573,
|
| 198 |
+
"step": 54
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.5773195876288659,
|
| 202 |
+
"grad_norm": 0.402852863073349,
|
| 203 |
+
"learning_rate": 2e-05,
|
| 204 |
+
"loss": 0.6682,
|
| 205 |
+
"step": 56
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.5979381443298969,
|
| 209 |
+
"grad_norm": 0.7684454321861267,
|
| 210 |
+
"learning_rate": 2e-05,
|
| 211 |
+
"loss": 0.4199,
|
| 212 |
+
"step": 58
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.6185567010309279,
|
| 216 |
+
"grad_norm": 1.7286068201065063,
|
| 217 |
+
"learning_rate": 2e-05,
|
| 218 |
+
"loss": 1.2886,
|
| 219 |
+
"step": 60
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.6391752577319587,
|
| 223 |
+
"grad_norm": 1.2119078636169434,
|
| 224 |
+
"learning_rate": 2e-05,
|
| 225 |
+
"loss": 0.975,
|
| 226 |
+
"step": 62
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.6597938144329897,
|
| 230 |
+
"grad_norm": 1.0343464612960815,
|
| 231 |
+
"learning_rate": 2e-05,
|
| 232 |
+
"loss": 0.526,
|
| 233 |
+
"step": 64
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.6804123711340206,
|
| 237 |
+
"grad_norm": 3.151249885559082,
|
| 238 |
+
"learning_rate": 2e-05,
|
| 239 |
+
"loss": 0.9672,
|
| 240 |
+
"step": 66
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.7010309278350515,
|
| 244 |
+
"grad_norm": 2.050671339035034,
|
| 245 |
+
"learning_rate": 2e-05,
|
| 246 |
+
"loss": 1.3117,
|
| 247 |
+
"step": 68
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.7216494845360825,
|
| 251 |
+
"grad_norm": 2.2325823307037354,
|
| 252 |
+
"learning_rate": 2e-05,
|
| 253 |
+
"loss": 0.9194,
|
| 254 |
+
"step": 70
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.7422680412371134,
|
| 258 |
+
"grad_norm": 2.7051377296447754,
|
| 259 |
+
"learning_rate": 2e-05,
|
| 260 |
+
"loss": 1.7141,
|
| 261 |
+
"step": 72
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.7628865979381443,
|
| 265 |
+
"grad_norm": 0.7203123569488525,
|
| 266 |
+
"learning_rate": 2e-05,
|
| 267 |
+
"loss": 0.8371,
|
| 268 |
+
"step": 74
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.7835051546391752,
|
| 272 |
+
"grad_norm": 0.36563840508461,
|
| 273 |
+
"learning_rate": 2e-05,
|
| 274 |
+
"loss": 0.5306,
|
| 275 |
+
"step": 76
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.8041237113402062,
|
| 279 |
+
"grad_norm": 1.0923153162002563,
|
| 280 |
+
"learning_rate": 2e-05,
|
| 281 |
+
"loss": 0.6162,
|
| 282 |
+
"step": 78
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.8247422680412371,
|
| 286 |
+
"grad_norm": 1.4866068363189697,
|
| 287 |
+
"learning_rate": 2e-05,
|
| 288 |
+
"loss": 1.1174,
|
| 289 |
+
"step": 80
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.845360824742268,
|
| 293 |
+
"grad_norm": 1.3142611980438232,
|
| 294 |
+
"learning_rate": 2e-05,
|
| 295 |
+
"loss": 1.1617,
|
| 296 |
+
"step": 82
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.865979381443299,
|
| 300 |
+
"grad_norm": 0.786864161491394,
|
| 301 |
+
"learning_rate": 2e-05,
|
| 302 |
+
"loss": 1.0875,
|
| 303 |
+
"step": 84
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.8865979381443299,
|
| 307 |
+
"grad_norm": 5.453260898590088,
|
| 308 |
+
"learning_rate": 2e-05,
|
| 309 |
+
"loss": 2.6415,
|
| 310 |
+
"step": 86
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.9072164948453608,
|
| 314 |
+
"grad_norm": 2.76277494430542,
|
| 315 |
+
"learning_rate": 2e-05,
|
| 316 |
+
"loss": 1.6643,
|
| 317 |
+
"step": 88
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.9278350515463918,
|
| 321 |
+
"grad_norm": 3.1113247871398926,
|
| 322 |
+
"learning_rate": 2e-05,
|
| 323 |
+
"loss": 1.6608,
|
| 324 |
+
"step": 90
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 0.9484536082474226,
|
| 328 |
+
"grad_norm": 2.045165538787842,
|
| 329 |
+
"learning_rate": 2e-05,
|
| 330 |
+
"loss": 1.3748,
|
| 331 |
+
"step": 92
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 0.9690721649484536,
|
| 335 |
+
"grad_norm": 1.3238657712936401,
|
| 336 |
+
"learning_rate": 2e-05,
|
| 337 |
+
"loss": 0.6474,
|
| 338 |
+
"step": 94
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"epoch": 0.9896907216494846,
|
| 342 |
+
"grad_norm": 1.4850274324417114,
|
| 343 |
+
"learning_rate": 2e-05,
|
| 344 |
+
"loss": 1.2191,
|
| 345 |
+
"step": 96
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"epoch": 1.0,
|
| 349 |
+
"step": 97,
|
| 350 |
+
"total_flos": 1.357514687905792e+16,
|
| 351 |
+
"train_loss": 1.0192029107477247,
|
| 352 |
+
"train_runtime": 432.7985,
|
| 353 |
+
"train_samples_per_second": 0.896,
|
| 354 |
+
"train_steps_per_second": 0.224
|
| 355 |
+
}
|
| 356 |
+
],
|
| 357 |
+
"logging_steps": 2,
|
| 358 |
+
"max_steps": 97,
|
| 359 |
+
"num_input_tokens_seen": 0,
|
| 360 |
+
"num_train_epochs": 1,
|
| 361 |
+
"save_steps": 500,
|
| 362 |
+
"stateful_callbacks": {
|
| 363 |
+
"TrainerControl": {
|
| 364 |
+
"args": {
|
| 365 |
+
"should_epoch_stop": false,
|
| 366 |
+
"should_evaluate": false,
|
| 367 |
+
"should_log": false,
|
| 368 |
+
"should_save": false,
|
| 369 |
+
"should_training_stop": false
|
| 370 |
+
},
|
| 371 |
+
"attributes": {}
|
| 372 |
+
}
|
| 373 |
+
},
|
| 374 |
+
"total_flos": 1.357514687905792e+16,
|
| 375 |
+
"train_batch_size": 1,
|
| 376 |
+
"trial_name": null,
|
| 377 |
+
"trial_params": null
|
| 378 |
+
}
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/2_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:10a34813c00d6ad8cdc583f8887781fecfb131343418ae94ec3447d7a3fe1bfc
|
| 3 |
+
size 794708086
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/2_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9db0446e3f8c533e3feae1f50596020e299611966289837d5806ffddf7b55f2e
|
| 3 |
+
size 794708086
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/2_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a49327d682e271891713c7c47e1696742b20027098eb662d19a166aa06442575
|
| 3 |
+
size 794708086
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/2_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d6f565813fc76f5b193fad10e0cfc5d676079a66425debe170b83c9cecef597
|
| 3 |
+
size 794708086
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/2_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e6e1bbe1c61ba40ca48e147e953756de596d150add45ce56a952820f0a474fe7
|
| 3 |
+
size 794706058
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/2_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90abd315cf8aab026bf8f60c25bb92c532647720f2481a4be4d4a2828e93a7c5
|
| 3 |
+
size 794708086
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/2_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:31a6eccd57cfc4753d17e9dbe0b1b2871b4163e9f9506aebfc91c68f77e059de
|
| 3 |
+
size 794706058
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/2_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8152affee06df72f20252a7a473eed4521a4660df6fce31047d2b6ebc30e069d
|
| 3 |
+
size 794706058
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/2_trainer_state.json
ADDED
|
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 97,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.020618556701030927,
|
| 13 |
+
"grad_norm": 0.9275849461555481,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 1.2235,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.041237113402061855,
|
| 20 |
+
"grad_norm": 3.5949947834014893,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 0.6944,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.061855670103092786,
|
| 27 |
+
"grad_norm": 2.8406832218170166,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 0.5023,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.08247422680412371,
|
| 34 |
+
"grad_norm": 0.5248422622680664,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.4287,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.10309278350515463,
|
| 41 |
+
"grad_norm": 3.409879684448242,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 1.3373,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.12371134020618557,
|
| 48 |
+
"grad_norm": 1.4362713098526,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 1.4465,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.14432989690721648,
|
| 55 |
+
"grad_norm": 1.8497318029403687,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.8547,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.16494845360824742,
|
| 62 |
+
"grad_norm": 1.0694129467010498,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.8493,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.18556701030927836,
|
| 69 |
+
"grad_norm": 1.5638664960861206,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.505,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.20618556701030927,
|
| 76 |
+
"grad_norm": 1.5820540189743042,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.6212,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.2268041237113402,
|
| 83 |
+
"grad_norm": 4.486837863922119,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.9427,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.24742268041237114,
|
| 90 |
+
"grad_norm": 3.43084979057312,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 1.3259,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.26804123711340205,
|
| 97 |
+
"grad_norm": 1.7316274642944336,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 1.0513,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.28865979381443296,
|
| 104 |
+
"grad_norm": 1.9279751777648926,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 1.3768,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.30927835051546393,
|
| 111 |
+
"grad_norm": 1.6935036182403564,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 1.3627,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.32989690721649484,
|
| 118 |
+
"grad_norm": 2.276658296585083,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.7526,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.35051546391752575,
|
| 125 |
+
"grad_norm": 1.0096880197525024,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.3399,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.3711340206185567,
|
| 132 |
+
"grad_norm": 6.736175537109375,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 1.2885,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.3917525773195876,
|
| 139 |
+
"grad_norm": 5.622121810913086,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 1.1467,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.41237113402061853,
|
| 146 |
+
"grad_norm": 0.8184213638305664,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 0.444,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.4329896907216495,
|
| 153 |
+
"grad_norm": 2.9587604999542236,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 1.0432,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.4536082474226804,
|
| 160 |
+
"grad_norm": 1.1915258169174194,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 1.5732,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.4742268041237113,
|
| 167 |
+
"grad_norm": 1.614179253578186,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 0.6172,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.4948453608247423,
|
| 174 |
+
"grad_norm": 0.6889798045158386,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 0.745,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.5154639175257731,
|
| 181 |
+
"grad_norm": 1.6385116577148438,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 0.5239,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.5360824742268041,
|
| 188 |
+
"grad_norm": 2.3733930587768555,
|
| 189 |
+
"learning_rate": 2e-05,
|
| 190 |
+
"loss": 1.1497,
|
| 191 |
+
"step": 52
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.5567010309278351,
|
| 195 |
+
"grad_norm": 6.621945381164551,
|
| 196 |
+
"learning_rate": 2e-05,
|
| 197 |
+
"loss": 1.9435,
|
| 198 |
+
"step": 54
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.5773195876288659,
|
| 202 |
+
"grad_norm": 3.033151626586914,
|
| 203 |
+
"learning_rate": 2e-05,
|
| 204 |
+
"loss": 1.2542,
|
| 205 |
+
"step": 56
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.5979381443298969,
|
| 209 |
+
"grad_norm": 1.3858976364135742,
|
| 210 |
+
"learning_rate": 2e-05,
|
| 211 |
+
"loss": 1.1066,
|
| 212 |
+
"step": 58
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.6185567010309279,
|
| 216 |
+
"grad_norm": 1.2492917776107788,
|
| 217 |
+
"learning_rate": 2e-05,
|
| 218 |
+
"loss": 0.6909,
|
| 219 |
+
"step": 60
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.6391752577319587,
|
| 223 |
+
"grad_norm": 1.3139721155166626,
|
| 224 |
+
"learning_rate": 2e-05,
|
| 225 |
+
"loss": 1.3802,
|
| 226 |
+
"step": 62
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.6597938144329897,
|
| 230 |
+
"grad_norm": 0.7570559978485107,
|
| 231 |
+
"learning_rate": 2e-05,
|
| 232 |
+
"loss": 0.6115,
|
| 233 |
+
"step": 64
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.6804123711340206,
|
| 237 |
+
"grad_norm": 2.8449275493621826,
|
| 238 |
+
"learning_rate": 2e-05,
|
| 239 |
+
"loss": 0.8201,
|
| 240 |
+
"step": 66
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.7010309278350515,
|
| 244 |
+
"grad_norm": 1.3498934507369995,
|
| 245 |
+
"learning_rate": 2e-05,
|
| 246 |
+
"loss": 0.6134,
|
| 247 |
+
"step": 68
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.7216494845360825,
|
| 251 |
+
"grad_norm": 0.9374634027481079,
|
| 252 |
+
"learning_rate": 2e-05,
|
| 253 |
+
"loss": 0.896,
|
| 254 |
+
"step": 70
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.7422680412371134,
|
| 258 |
+
"grad_norm": 0.9612107872962952,
|
| 259 |
+
"learning_rate": 2e-05,
|
| 260 |
+
"loss": 0.7274,
|
| 261 |
+
"step": 72
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.7628865979381443,
|
| 265 |
+
"grad_norm": 1.954071044921875,
|
| 266 |
+
"learning_rate": 2e-05,
|
| 267 |
+
"loss": 0.9464,
|
| 268 |
+
"step": 74
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.7835051546391752,
|
| 272 |
+
"grad_norm": 1.1018635034561157,
|
| 273 |
+
"learning_rate": 2e-05,
|
| 274 |
+
"loss": 0.2764,
|
| 275 |
+
"step": 76
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.8041237113402062,
|
| 279 |
+
"grad_norm": 1.9673211574554443,
|
| 280 |
+
"learning_rate": 2e-05,
|
| 281 |
+
"loss": 0.7883,
|
| 282 |
+
"step": 78
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.8247422680412371,
|
| 286 |
+
"grad_norm": 1.7728729248046875,
|
| 287 |
+
"learning_rate": 2e-05,
|
| 288 |
+
"loss": 0.9561,
|
| 289 |
+
"step": 80
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.845360824742268,
|
| 293 |
+
"grad_norm": 0.83953857421875,
|
| 294 |
+
"learning_rate": 2e-05,
|
| 295 |
+
"loss": 0.3297,
|
| 296 |
+
"step": 82
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.865979381443299,
|
| 300 |
+
"grad_norm": 1.0060991048812866,
|
| 301 |
+
"learning_rate": 2e-05,
|
| 302 |
+
"loss": 0.5352,
|
| 303 |
+
"step": 84
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.8865979381443299,
|
| 307 |
+
"grad_norm": 2.0772080421447754,
|
| 308 |
+
"learning_rate": 2e-05,
|
| 309 |
+
"loss": 0.1712,
|
| 310 |
+
"step": 86
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.9072164948453608,
|
| 314 |
+
"grad_norm": 0.6878994703292847,
|
| 315 |
+
"learning_rate": 2e-05,
|
| 316 |
+
"loss": 0.84,
|
| 317 |
+
"step": 88
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.9278350515463918,
|
| 321 |
+
"grad_norm": 2.3218917846679688,
|
| 322 |
+
"learning_rate": 2e-05,
|
| 323 |
+
"loss": 0.9899,
|
| 324 |
+
"step": 90
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 0.9484536082474226,
|
| 328 |
+
"grad_norm": 1.3924381732940674,
|
| 329 |
+
"learning_rate": 2e-05,
|
| 330 |
+
"loss": 0.4655,
|
| 331 |
+
"step": 92
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 0.9690721649484536,
|
| 335 |
+
"grad_norm": 0.6978415846824646,
|
| 336 |
+
"learning_rate": 2e-05,
|
| 337 |
+
"loss": 0.4733,
|
| 338 |
+
"step": 94
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"epoch": 0.9896907216494846,
|
| 342 |
+
"grad_norm": 1.8935520648956299,
|
| 343 |
+
"learning_rate": 2e-05,
|
| 344 |
+
"loss": 0.6506,
|
| 345 |
+
"step": 96
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"epoch": 1.0,
|
| 349 |
+
"step": 97,
|
| 350 |
+
"total_flos": 1.549123199172608e+16,
|
| 351 |
+
"train_loss": 0.8593464733399067,
|
| 352 |
+
"train_runtime": 470.9244,
|
| 353 |
+
"train_samples_per_second": 0.824,
|
| 354 |
+
"train_steps_per_second": 0.206
|
| 355 |
+
}
|
| 356 |
+
],
|
| 357 |
+
"logging_steps": 2,
|
| 358 |
+
"max_steps": 97,
|
| 359 |
+
"num_input_tokens_seen": 0,
|
| 360 |
+
"num_train_epochs": 1,
|
| 361 |
+
"save_steps": 500,
|
| 362 |
+
"stateful_callbacks": {
|
| 363 |
+
"TrainerControl": {
|
| 364 |
+
"args": {
|
| 365 |
+
"should_epoch_stop": false,
|
| 366 |
+
"should_evaluate": false,
|
| 367 |
+
"should_log": false,
|
| 368 |
+
"should_save": false,
|
| 369 |
+
"should_training_stop": false
|
| 370 |
+
},
|
| 371 |
+
"attributes": {}
|
| 372 |
+
}
|
| 373 |
+
},
|
| 374 |
+
"total_flos": 1.549123199172608e+16,
|
| 375 |
+
"train_batch_size": 1,
|
| 376 |
+
"trial_name": null,
|
| 377 |
+
"trial_params": null
|
| 378 |
+
}
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/3_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:45ff896230f57e27e7efb28914991eb6d19a9aec05291e19038cdb761e2ef5f6
|
| 3 |
+
size 794708086
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/3_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:080373777ac3c4300e13d3fd2e643161a026404ca2b58cf8b03d824c241bed6e
|
| 3 |
+
size 794708086
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/3_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0819ec21ef683084401b7b83249b2199b0896526a18b20cd8bd4b69774c7678a
|
| 3 |
+
size 794708086
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/3_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e6f125278992e54fb10a7579a5b264da2311e679cdf452181e06ef1d1f2811f
|
| 3 |
+
size 794708086
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/3_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c908af596f0443c233324475fdbe5601d5df6b5600b93c3e742622ca3ee6519
|
| 3 |
+
size 794706058
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/3_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:86bfa4042903b9f2ca17b83f019f4cbf0153327097c41c3f88184fc545403474
|
| 3 |
+
size 794708086
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/3_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a9558c665e69591d78ecc3a8312b4ebe20d4baa013d0ddb21d6d69f77ab76bc4
|
| 3 |
+
size 794706058
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/3_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfdf0b1c8e3ca781e7795f6201dae68670ec0b64fe67c316dd74300d3e2bfe2b
|
| 3 |
+
size 794706058
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/3_trainer_state.json
ADDED
|
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 97,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.020618556701030927,
|
| 13 |
+
"grad_norm": 5.465381622314453,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.8918,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.041237113402061855,
|
| 20 |
+
"grad_norm": 1.7941179275512695,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 0.7903,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.061855670103092786,
|
| 27 |
+
"grad_norm": 1.6007481813430786,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 0.8676,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.08247422680412371,
|
| 34 |
+
"grad_norm": 0.9969778656959534,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.4928,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.10309278350515463,
|
| 41 |
+
"grad_norm": 0.4684109389781952,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 0.2868,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.12371134020618557,
|
| 48 |
+
"grad_norm": 0.28731948137283325,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.2312,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.14432989690721648,
|
| 55 |
+
"grad_norm": 1.3333783149719238,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.8662,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.16494845360824742,
|
| 62 |
+
"grad_norm": 1.9330962896347046,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.8947,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.18556701030927836,
|
| 69 |
+
"grad_norm": 2.568071126937866,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 1.0742,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.20618556701030927,
|
| 76 |
+
"grad_norm": 1.4679968357086182,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 1.5971,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.2268041237113402,
|
| 83 |
+
"grad_norm": 0.4212726950645447,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.6416,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.24742268041237114,
|
| 90 |
+
"grad_norm": 1.8013954162597656,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 0.8423,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.26804123711340205,
|
| 97 |
+
"grad_norm": 1.1382532119750977,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 0.7341,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.28865979381443296,
|
| 104 |
+
"grad_norm": 1.0272934436798096,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 0.5432,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.30927835051546393,
|
| 111 |
+
"grad_norm": 0.6234714984893799,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 0.2671,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.32989690721649484,
|
| 118 |
+
"grad_norm": 1.1389981508255005,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.7717,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.35051546391752575,
|
| 125 |
+
"grad_norm": 5.075913906097412,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 1.3037,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.3711340206185567,
|
| 132 |
+
"grad_norm": 1.1098531484603882,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.8804,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.3917525773195876,
|
| 139 |
+
"grad_norm": 1.1887625455856323,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 0.7009,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.41237113402061853,
|
| 146 |
+
"grad_norm": 0.27039554715156555,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 0.4646,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.4329896907216495,
|
| 153 |
+
"grad_norm": 1.437915325164795,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.3613,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.4536082474226804,
|
| 160 |
+
"grad_norm": 1.6871410608291626,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 0.4272,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.4742268041237113,
|
| 167 |
+
"grad_norm": 4.205522060394287,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 1.3826,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.4948453608247423,
|
| 174 |
+
"grad_norm": 1.2980058193206787,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 0.8113,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.5154639175257731,
|
| 181 |
+
"grad_norm": 0.826991856098175,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 0.3983,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.5360824742268041,
|
| 188 |
+
"grad_norm": 1.9655853509902954,
|
| 189 |
+
"learning_rate": 2e-05,
|
| 190 |
+
"loss": 1.2521,
|
| 191 |
+
"step": 52
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.5567010309278351,
|
| 195 |
+
"grad_norm": 1.085252046585083,
|
| 196 |
+
"learning_rate": 2e-05,
|
| 197 |
+
"loss": 0.636,
|
| 198 |
+
"step": 54
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.5773195876288659,
|
| 202 |
+
"grad_norm": 1.889613389968872,
|
| 203 |
+
"learning_rate": 2e-05,
|
| 204 |
+
"loss": 0.6425,
|
| 205 |
+
"step": 56
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.5979381443298969,
|
| 209 |
+
"grad_norm": 2.9438934326171875,
|
| 210 |
+
"learning_rate": 2e-05,
|
| 211 |
+
"loss": 0.6987,
|
| 212 |
+
"step": 58
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.6185567010309279,
|
| 216 |
+
"grad_norm": 0.5686755180358887,
|
| 217 |
+
"learning_rate": 2e-05,
|
| 218 |
+
"loss": 0.267,
|
| 219 |
+
"step": 60
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.6391752577319587,
|
| 223 |
+
"grad_norm": 0.5724007487297058,
|
| 224 |
+
"learning_rate": 2e-05,
|
| 225 |
+
"loss": 0.3444,
|
| 226 |
+
"step": 62
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.6597938144329897,
|
| 230 |
+
"grad_norm": 1.3709577322006226,
|
| 231 |
+
"learning_rate": 2e-05,
|
| 232 |
+
"loss": 0.6371,
|
| 233 |
+
"step": 64
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.6804123711340206,
|
| 237 |
+
"grad_norm": 1.0185165405273438,
|
| 238 |
+
"learning_rate": 2e-05,
|
| 239 |
+
"loss": 0.8028,
|
| 240 |
+
"step": 66
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.7010309278350515,
|
| 244 |
+
"grad_norm": 1.3173937797546387,
|
| 245 |
+
"learning_rate": 2e-05,
|
| 246 |
+
"loss": 0.6559,
|
| 247 |
+
"step": 68
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.7216494845360825,
|
| 251 |
+
"grad_norm": 0.3681342601776123,
|
| 252 |
+
"learning_rate": 2e-05,
|
| 253 |
+
"loss": 0.1988,
|
| 254 |
+
"step": 70
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.7422680412371134,
|
| 258 |
+
"grad_norm": 0.9298673272132874,
|
| 259 |
+
"learning_rate": 2e-05,
|
| 260 |
+
"loss": 0.814,
|
| 261 |
+
"step": 72
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.7628865979381443,
|
| 265 |
+
"grad_norm": 1.2699919939041138,
|
| 266 |
+
"learning_rate": 2e-05,
|
| 267 |
+
"loss": 0.5768,
|
| 268 |
+
"step": 74
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.7835051546391752,
|
| 272 |
+
"grad_norm": 2.6978492736816406,
|
| 273 |
+
"learning_rate": 2e-05,
|
| 274 |
+
"loss": 1.0957,
|
| 275 |
+
"step": 76
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.8041237113402062,
|
| 279 |
+
"grad_norm": 1.3678396940231323,
|
| 280 |
+
"learning_rate": 2e-05,
|
| 281 |
+
"loss": 0.8172,
|
| 282 |
+
"step": 78
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.8247422680412371,
|
| 286 |
+
"grad_norm": 5.189119815826416,
|
| 287 |
+
"learning_rate": 2e-05,
|
| 288 |
+
"loss": 1.1335,
|
| 289 |
+
"step": 80
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.845360824742268,
|
| 293 |
+
"grad_norm": 2.356233835220337,
|
| 294 |
+
"learning_rate": 2e-05,
|
| 295 |
+
"loss": 1.126,
|
| 296 |
+
"step": 82
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.865979381443299,
|
| 300 |
+
"grad_norm": 1.0532094240188599,
|
| 301 |
+
"learning_rate": 2e-05,
|
| 302 |
+
"loss": 0.8347,
|
| 303 |
+
"step": 84
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.8865979381443299,
|
| 307 |
+
"grad_norm": 1.6790062189102173,
|
| 308 |
+
"learning_rate": 2e-05,
|
| 309 |
+
"loss": 0.6408,
|
| 310 |
+
"step": 86
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.9072164948453608,
|
| 314 |
+
"grad_norm": 4.085683345794678,
|
| 315 |
+
"learning_rate": 2e-05,
|
| 316 |
+
"loss": 0.595,
|
| 317 |
+
"step": 88
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.9278350515463918,
|
| 321 |
+
"grad_norm": 1.189156174659729,
|
| 322 |
+
"learning_rate": 2e-05,
|
| 323 |
+
"loss": 0.5897,
|
| 324 |
+
"step": 90
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 0.9484536082474226,
|
| 328 |
+
"grad_norm": 0.6069275736808777,
|
| 329 |
+
"learning_rate": 2e-05,
|
| 330 |
+
"loss": 0.9475,
|
| 331 |
+
"step": 92
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 0.9690721649484536,
|
| 335 |
+
"grad_norm": 3.666023015975952,
|
| 336 |
+
"learning_rate": 2e-05,
|
| 337 |
+
"loss": 0.8139,
|
| 338 |
+
"step": 94
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"epoch": 0.9896907216494846,
|
| 342 |
+
"grad_norm": 0.7568903565406799,
|
| 343 |
+
"learning_rate": 2e-05,
|
| 344 |
+
"loss": 0.9121,
|
| 345 |
+
"step": 96
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"epoch": 1.0,
|
| 349 |
+
"step": 97,
|
| 350 |
+
"total_flos": 2.07833438093312e+16,
|
| 351 |
+
"train_loss": 0.7429608340115891,
|
| 352 |
+
"train_runtime": 455.0442,
|
| 353 |
+
"train_samples_per_second": 0.853,
|
| 354 |
+
"train_steps_per_second": 0.213
|
| 355 |
+
}
|
| 356 |
+
],
|
| 357 |
+
"logging_steps": 2,
|
| 358 |
+
"max_steps": 97,
|
| 359 |
+
"num_input_tokens_seen": 0,
|
| 360 |
+
"num_train_epochs": 1,
|
| 361 |
+
"save_steps": 500,
|
| 362 |
+
"stateful_callbacks": {
|
| 363 |
+
"TrainerControl": {
|
| 364 |
+
"args": {
|
| 365 |
+
"should_epoch_stop": false,
|
| 366 |
+
"should_evaluate": false,
|
| 367 |
+
"should_log": false,
|
| 368 |
+
"should_save": false,
|
| 369 |
+
"should_training_stop": false
|
| 370 |
+
},
|
| 371 |
+
"attributes": {}
|
| 372 |
+
}
|
| 373 |
+
},
|
| 374 |
+
"total_flos": 2.07833438093312e+16,
|
| 375 |
+
"train_batch_size": 1,
|
| 376 |
+
"trial_name": null,
|
| 377 |
+
"trial_params": null
|
| 378 |
+
}
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/4_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:98ae67cc7881795e67b6b2b24fff7b8372ee549b84f1cae4ab8b30d72b55a8e4
|
| 3 |
+
size 794708086
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/4_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:26ba73bae8933df7f2d31878fa576a49fe213349cefbc417808909fcf7658148
|
| 3 |
+
size 794708086
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/4_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e290980a99d775d0947e011fdc7eb7166501c54469affe28e434f19f8c81aca5
|
| 3 |
+
size 794708086
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/4_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:343b3641c285b2619787a13de4844305fb426038dfa5499950938cf558a1e0f3
|
| 3 |
+
size 794708086
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/4_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d3ca7c1aba23a71baaefbbd626c8ed37c1beaf5e02b7459cea8d2f4d7527fa5
|
| 3 |
+
size 794706058
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/4_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90ff72f18aefb6d0946800b285fe88bd5a4b105c202f638c2524d9faf80b227b
|
| 3 |
+
size 794708086
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/4_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6441cb6bb9cb2a114399447e3474b20f41a3917f358ef38fe90739cef6d7396e
|
| 3 |
+
size 794706058
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/4_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:77bdd5373faef04e2ffd2c1529d9059c48384f3eeb54d52b9c84555745b54f78
|
| 3 |
+
size 794706058
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/4_trainer_state.json
ADDED
|
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 97,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.020618556701030927,
|
| 13 |
+
"grad_norm": 0.9477136731147766,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.551,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.041237113402061855,
|
| 20 |
+
"grad_norm": 3.8200013637542725,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 1.1141,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.061855670103092786,
|
| 27 |
+
"grad_norm": 0.16243354976177216,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 0.376,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.08247422680412371,
|
| 34 |
+
"grad_norm": 1.3112159967422485,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.7677,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.10309278350515463,
|
| 41 |
+
"grad_norm": 2.1373825073242188,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 1.256,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.12371134020618557,
|
| 48 |
+
"grad_norm": 2.1234893798828125,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.8886,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.14432989690721648,
|
| 55 |
+
"grad_norm": 0.04594890773296356,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.7081,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.16494845360824742,
|
| 62 |
+
"grad_norm": 1.3787109851837158,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.828,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.18556701030927836,
|
| 69 |
+
"grad_norm": 0.6793889999389648,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 1.0967,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.20618556701030927,
|
| 76 |
+
"grad_norm": 0.771903932094574,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 1.6246,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.2268041237113402,
|
| 83 |
+
"grad_norm": 1.1218531131744385,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.8997,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.24742268041237114,
|
| 90 |
+
"grad_norm": 1.01219642162323,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 0.8123,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.26804123711340205,
|
| 97 |
+
"grad_norm": 1.0736132860183716,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 0.8224,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.28865979381443296,
|
| 104 |
+
"grad_norm": 1.2845109701156616,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 1.2574,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.30927835051546393,
|
| 111 |
+
"grad_norm": 1.4137431383132935,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 1.6976,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.32989690721649484,
|
| 118 |
+
"grad_norm": 1.9815335273742676,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 1.0386,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.35051546391752575,
|
| 125 |
+
"grad_norm": 2.5250496864318848,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 1.5427,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.3711340206185567,
|
| 132 |
+
"grad_norm": 0.8500065207481384,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.9216,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.3917525773195876,
|
| 139 |
+
"grad_norm": 1.1719383001327515,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 1.0847,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.41237113402061853,
|
| 146 |
+
"grad_norm": 3.1414475440979004,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 1.0832,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.4329896907216495,
|
| 153 |
+
"grad_norm": 2.8507297039031982,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 1.0086,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.4536082474226804,
|
| 160 |
+
"grad_norm": 2.59656023979187,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 1.4416,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.4742268041237113,
|
| 167 |
+
"grad_norm": 1.6737186908721924,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 1.2538,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.4948453608247423,
|
| 174 |
+
"grad_norm": 0.5178043246269226,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 0.2702,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.5154639175257731,
|
| 181 |
+
"grad_norm": 1.748957633972168,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 1.0557,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.5360824742268041,
|
| 188 |
+
"grad_norm": 1.7886401414871216,
|
| 189 |
+
"learning_rate": 2e-05,
|
| 190 |
+
"loss": 0.6472,
|
| 191 |
+
"step": 52
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.5567010309278351,
|
| 195 |
+
"grad_norm": 1.393855094909668,
|
| 196 |
+
"learning_rate": 2e-05,
|
| 197 |
+
"loss": 0.791,
|
| 198 |
+
"step": 54
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.5773195876288659,
|
| 202 |
+
"grad_norm": 1.5556002855300903,
|
| 203 |
+
"learning_rate": 2e-05,
|
| 204 |
+
"loss": 1.1448,
|
| 205 |
+
"step": 56
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.5979381443298969,
|
| 209 |
+
"grad_norm": 2.578552484512329,
|
| 210 |
+
"learning_rate": 2e-05,
|
| 211 |
+
"loss": 1.3115,
|
| 212 |
+
"step": 58
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.6185567010309279,
|
| 216 |
+
"grad_norm": 1.4991388320922852,
|
| 217 |
+
"learning_rate": 2e-05,
|
| 218 |
+
"loss": 0.8843,
|
| 219 |
+
"step": 60
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.6391752577319587,
|
| 223 |
+
"grad_norm": 1.8561006784439087,
|
| 224 |
+
"learning_rate": 2e-05,
|
| 225 |
+
"loss": 0.5295,
|
| 226 |
+
"step": 62
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.6597938144329897,
|
| 230 |
+
"grad_norm": 1.5938780307769775,
|
| 231 |
+
"learning_rate": 2e-05,
|
| 232 |
+
"loss": 0.5507,
|
| 233 |
+
"step": 64
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.6804123711340206,
|
| 237 |
+
"grad_norm": 2.841743230819702,
|
| 238 |
+
"learning_rate": 2e-05,
|
| 239 |
+
"loss": 1.1595,
|
| 240 |
+
"step": 66
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.7010309278350515,
|
| 244 |
+
"grad_norm": 1.491766095161438,
|
| 245 |
+
"learning_rate": 2e-05,
|
| 246 |
+
"loss": 0.6423,
|
| 247 |
+
"step": 68
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.7216494845360825,
|
| 251 |
+
"grad_norm": 1.3314822912216187,
|
| 252 |
+
"learning_rate": 2e-05,
|
| 253 |
+
"loss": 1.0332,
|
| 254 |
+
"step": 70
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.7422680412371134,
|
| 258 |
+
"grad_norm": 2.0549938678741455,
|
| 259 |
+
"learning_rate": 2e-05,
|
| 260 |
+
"loss": 2.0647,
|
| 261 |
+
"step": 72
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.7628865979381443,
|
| 265 |
+
"grad_norm": 1.329912781715393,
|
| 266 |
+
"learning_rate": 2e-05,
|
| 267 |
+
"loss": 0.5477,
|
| 268 |
+
"step": 74
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.7835051546391752,
|
| 272 |
+
"grad_norm": 0.7718127369880676,
|
| 273 |
+
"learning_rate": 2e-05,
|
| 274 |
+
"loss": 0.5856,
|
| 275 |
+
"step": 76
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.8041237113402062,
|
| 279 |
+
"grad_norm": 2.3758749961853027,
|
| 280 |
+
"learning_rate": 2e-05,
|
| 281 |
+
"loss": 0.7453,
|
| 282 |
+
"step": 78
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.8247422680412371,
|
| 286 |
+
"grad_norm": 3.360313892364502,
|
| 287 |
+
"learning_rate": 2e-05,
|
| 288 |
+
"loss": 1.9905,
|
| 289 |
+
"step": 80
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.845360824742268,
|
| 293 |
+
"grad_norm": 0.17600414156913757,
|
| 294 |
+
"learning_rate": 2e-05,
|
| 295 |
+
"loss": 0.5286,
|
| 296 |
+
"step": 82
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.865979381443299,
|
| 300 |
+
"grad_norm": 1.8083326816558838,
|
| 301 |
+
"learning_rate": 2e-05,
|
| 302 |
+
"loss": 0.6053,
|
| 303 |
+
"step": 84
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.8865979381443299,
|
| 307 |
+
"grad_norm": 0.21756744384765625,
|
| 308 |
+
"learning_rate": 2e-05,
|
| 309 |
+
"loss": 0.0233,
|
| 310 |
+
"step": 86
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.9072164948453608,
|
| 314 |
+
"grad_norm": 1.2804031372070312,
|
| 315 |
+
"learning_rate": 2e-05,
|
| 316 |
+
"loss": 1.3146,
|
| 317 |
+
"step": 88
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.9278350515463918,
|
| 321 |
+
"grad_norm": 0.4804908037185669,
|
| 322 |
+
"learning_rate": 2e-05,
|
| 323 |
+
"loss": 0.3251,
|
| 324 |
+
"step": 90
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 0.9484536082474226,
|
| 328 |
+
"grad_norm": 2.686302900314331,
|
| 329 |
+
"learning_rate": 2e-05,
|
| 330 |
+
"loss": 1.2593,
|
| 331 |
+
"step": 92
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 0.9690721649484536,
|
| 335 |
+
"grad_norm": 0.10516992211341858,
|
| 336 |
+
"learning_rate": 2e-05,
|
| 337 |
+
"loss": 0.943,
|
| 338 |
+
"step": 94
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"epoch": 0.9896907216494846,
|
| 342 |
+
"grad_norm": 3.6283299922943115,
|
| 343 |
+
"learning_rate": 2e-05,
|
| 344 |
+
"loss": 0.768,
|
| 345 |
+
"step": 96
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"epoch": 1.0,
|
| 349 |
+
"step": 97,
|
| 350 |
+
"total_flos": 2.055998584704205e+16,
|
| 351 |
+
"train_loss": 0.9511987253562691,
|
| 352 |
+
"train_runtime": 446.2144,
|
| 353 |
+
"train_samples_per_second": 0.87,
|
| 354 |
+
"train_steps_per_second": 0.217
|
| 355 |
+
}
|
| 356 |
+
],
|
| 357 |
+
"logging_steps": 2,
|
| 358 |
+
"max_steps": 97,
|
| 359 |
+
"num_input_tokens_seen": 0,
|
| 360 |
+
"num_train_epochs": 1,
|
| 361 |
+
"save_steps": 500,
|
| 362 |
+
"stateful_callbacks": {
|
| 363 |
+
"TrainerControl": {
|
| 364 |
+
"args": {
|
| 365 |
+
"should_epoch_stop": false,
|
| 366 |
+
"should_evaluate": false,
|
| 367 |
+
"should_log": false,
|
| 368 |
+
"should_save": false,
|
| 369 |
+
"should_training_stop": false
|
| 370 |
+
},
|
| 371 |
+
"attributes": {}
|
| 372 |
+
}
|
| 373 |
+
},
|
| 374 |
+
"total_flos": 2.055998584704205e+16,
|
| 375 |
+
"train_batch_size": 1,
|
| 376 |
+
"trial_name": null,
|
| 377 |
+
"trial_params": null
|
| 378 |
+
}
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/5_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:855cbde8acac797b45d2371a9a3a0617b6d33f8d3ac9f8faf754a91f4aa333ab
|
| 3 |
+
size 794708086
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/5_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3b282f874d4137ecb39abe65b92973331e82f116025a6173ce936ffac843e21a
|
| 3 |
+
size 794708086
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/5_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c2207f2441529cebfb2c6a7d55a8c4712bd0e01d8715dd171287deefbd34fe9
|
| 3 |
+
size 794708086
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/5_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:63d312918f1bfb9e1258851b1c99e9c3086b5c321c9b716922766e69510cec70
|
| 3 |
+
size 794708086
|
client_states_feddualMultipqfull_homoAgg_moe2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED2/5_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:22ee8edfb9eaa1c31e1e1cb53250581d93bee9375df9ce52701adf79d658b652
|
| 3 |
+
size 794706058
|