Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round10.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round12.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round15.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round17.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round2.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round20.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round5.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round7.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_trainer_state.json +364 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round10.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round12.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round15.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round17.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round2.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round20.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round5.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round7.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_trainer_state.json +364 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round10.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round12.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round15.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round17.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round2.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round20.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round5.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round7.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_trainer_state.json +364 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round10.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round12.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round15.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round17.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round2.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round20.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round5.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round7.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_trainer_state.json +364 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round10.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round12.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round15.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round17.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round2.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round20.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round5.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round7.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_trainer_state.json +364 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/5_client_model_round10.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/5_client_model_round12.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/5_client_model_round15.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/5_client_model_round17.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/5_client_model_round2.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:857e6d421f7321c12fe753e5d706f2fed3c56cd89f87855a28a6dd7d8fd927af
|
| 3 |
+
size 369838470
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ea0b0777d782865d3402d1597c627ad09d836fbfc86ec5346b2269a5db864c8a
|
| 3 |
+
size 369838470
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:502cb07864fd3153066c9bb6e916b9a3d542c551fbd8df4e1f23c5c2a2eb74f6
|
| 3 |
+
size 369838470
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37d161c3bdf3434da4627ca3c8c465d70c023552bd189055da3a9c1611ae3232
|
| 3 |
+
size 369838470
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8abef3ac8894468d37e796f4790964bffef032cdd9365e7448f55a72c4ad1c67
|
| 3 |
+
size 369837282
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:763351182d6a05485fa69a698f1f7062d6104cb6111340d7d7abe32fa64ad8a3
|
| 3 |
+
size 369838470
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:467f699937a5cd7720442e09e5b47ea3eaf77b4604e673f05ec68159a694011b
|
| 3 |
+
size 369837282
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:550bfb4401128e0b71618eafdc31c528052668b811e502b8814e617de6aee7d2
|
| 3 |
+
size 369837282
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_trainer_state.json
ADDED
|
@@ -0,0 +1,364 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 92,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.021739130434782608,
|
| 13 |
+
"grad_norm": 6.09108304977417,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.5907,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.043478260869565216,
|
| 20 |
+
"grad_norm": 3.6769254207611084,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 0.9602,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.06521739130434782,
|
| 27 |
+
"grad_norm": 0.5363268256187439,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 0.036,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.08695652173913043,
|
| 34 |
+
"grad_norm": 0.43710896372795105,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.1849,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.10869565217391304,
|
| 41 |
+
"grad_norm": 2.068805694580078,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 0.1805,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.13043478260869565,
|
| 48 |
+
"grad_norm": 1.2317078113555908,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.8115,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.15217391304347827,
|
| 55 |
+
"grad_norm": 2.86909818649292,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.3496,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.17391304347826086,
|
| 62 |
+
"grad_norm": 1.2622524499893188,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.2863,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.1956521739130435,
|
| 69 |
+
"grad_norm": 4.501521587371826,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.2824,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.21739130434782608,
|
| 76 |
+
"grad_norm": 3.231552839279175,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.2514,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.2391304347826087,
|
| 83 |
+
"grad_norm": 1.5159425735473633,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.4057,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.2608695652173913,
|
| 90 |
+
"grad_norm": 7.557904243469238,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 2.476,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.2826086956521739,
|
| 97 |
+
"grad_norm": 2.5409963130950928,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 1.2994,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.30434782608695654,
|
| 104 |
+
"grad_norm": 0.06883285939693451,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 0.0905,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.32608695652173914,
|
| 111 |
+
"grad_norm": 3.767472267150879,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 0.5633,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.34782608695652173,
|
| 118 |
+
"grad_norm": 2.7977592945098877,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.1898,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.3695652173913043,
|
| 125 |
+
"grad_norm": 2.504427194595337,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.7027,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.391304347826087,
|
| 132 |
+
"grad_norm": 0.23567438125610352,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.0205,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.41304347826086957,
|
| 139 |
+
"grad_norm": 6.038938522338867,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 0.9922,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.43478260869565216,
|
| 146 |
+
"grad_norm": 3.7713494300842285,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 1.2868,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.45652173913043476,
|
| 153 |
+
"grad_norm": 1.8124828338623047,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 1.7291,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.4782608695652174,
|
| 160 |
+
"grad_norm": 5.478545665740967,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 0.7223,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.5,
|
| 167 |
+
"grad_norm": 0.34744882583618164,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 0.3988,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.5217391304347826,
|
| 174 |
+
"grad_norm": 1.2364314794540405,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 0.114,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.5434782608695652,
|
| 181 |
+
"grad_norm": 5.041813850402832,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 0.668,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.5652173913043478,
|
| 188 |
+
"grad_norm": 7.402784824371338,
|
| 189 |
+
"learning_rate": 2e-05,
|
| 190 |
+
"loss": 1.1368,
|
| 191 |
+
"step": 52
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.5869565217391305,
|
| 195 |
+
"grad_norm": 0.8266273736953735,
|
| 196 |
+
"learning_rate": 2e-05,
|
| 197 |
+
"loss": 0.0698,
|
| 198 |
+
"step": 54
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.6086956521739131,
|
| 202 |
+
"grad_norm": 2.409656286239624,
|
| 203 |
+
"learning_rate": 2e-05,
|
| 204 |
+
"loss": 1.1547,
|
| 205 |
+
"step": 56
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.6304347826086957,
|
| 209 |
+
"grad_norm": 0.29032644629478455,
|
| 210 |
+
"learning_rate": 2e-05,
|
| 211 |
+
"loss": 1.6191,
|
| 212 |
+
"step": 58
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.6521739130434783,
|
| 216 |
+
"grad_norm": 2.472151517868042,
|
| 217 |
+
"learning_rate": 2e-05,
|
| 218 |
+
"loss": 0.2694,
|
| 219 |
+
"step": 60
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.6739130434782609,
|
| 223 |
+
"grad_norm": 3.610250473022461,
|
| 224 |
+
"learning_rate": 2e-05,
|
| 225 |
+
"loss": 0.82,
|
| 226 |
+
"step": 62
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.6956521739130435,
|
| 230 |
+
"grad_norm": 1.7341822385787964,
|
| 231 |
+
"learning_rate": 2e-05,
|
| 232 |
+
"loss": 0.6236,
|
| 233 |
+
"step": 64
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.717391304347826,
|
| 237 |
+
"grad_norm": 0.2348906695842743,
|
| 238 |
+
"learning_rate": 2e-05,
|
| 239 |
+
"loss": 0.3063,
|
| 240 |
+
"step": 66
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.7391304347826086,
|
| 244 |
+
"grad_norm": 4.811798572540283,
|
| 245 |
+
"learning_rate": 2e-05,
|
| 246 |
+
"loss": 0.9939,
|
| 247 |
+
"step": 68
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.7608695652173914,
|
| 251 |
+
"grad_norm": 1.6772363185882568,
|
| 252 |
+
"learning_rate": 2e-05,
|
| 253 |
+
"loss": 0.1621,
|
| 254 |
+
"step": 70
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.782608695652174,
|
| 258 |
+
"grad_norm": 5.120459079742432,
|
| 259 |
+
"learning_rate": 2e-05,
|
| 260 |
+
"loss": 1.1419,
|
| 261 |
+
"step": 72
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.8043478260869565,
|
| 265 |
+
"grad_norm": 0.06929455697536469,
|
| 266 |
+
"learning_rate": 2e-05,
|
| 267 |
+
"loss": 0.2167,
|
| 268 |
+
"step": 74
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.8260869565217391,
|
| 272 |
+
"grad_norm": 2.0615053176879883,
|
| 273 |
+
"learning_rate": 2e-05,
|
| 274 |
+
"loss": 0.2673,
|
| 275 |
+
"step": 76
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.8478260869565217,
|
| 279 |
+
"grad_norm": 2.242427110671997,
|
| 280 |
+
"learning_rate": 2e-05,
|
| 281 |
+
"loss": 0.1578,
|
| 282 |
+
"step": 78
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.8695652173913043,
|
| 286 |
+
"grad_norm": 4.344549179077148,
|
| 287 |
+
"learning_rate": 2e-05,
|
| 288 |
+
"loss": 0.953,
|
| 289 |
+
"step": 80
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.8913043478260869,
|
| 293 |
+
"grad_norm": 3.4479851722717285,
|
| 294 |
+
"learning_rate": 2e-05,
|
| 295 |
+
"loss": 0.5008,
|
| 296 |
+
"step": 82
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.9130434782608695,
|
| 300 |
+
"grad_norm": 4.703179836273193,
|
| 301 |
+
"learning_rate": 2e-05,
|
| 302 |
+
"loss": 1.2355,
|
| 303 |
+
"step": 84
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.9347826086956522,
|
| 307 |
+
"grad_norm": 3.625882387161255,
|
| 308 |
+
"learning_rate": 2e-05,
|
| 309 |
+
"loss": 0.7881,
|
| 310 |
+
"step": 86
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.9565217391304348,
|
| 314 |
+
"grad_norm": 0.7072364687919617,
|
| 315 |
+
"learning_rate": 2e-05,
|
| 316 |
+
"loss": 0.5925,
|
| 317 |
+
"step": 88
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.9782608695652174,
|
| 321 |
+
"grad_norm": 1.2554280757904053,
|
| 322 |
+
"learning_rate": 2e-05,
|
| 323 |
+
"loss": 0.2273,
|
| 324 |
+
"step": 90
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 1.0,
|
| 328 |
+
"grad_norm": 2.71370530128479,
|
| 329 |
+
"learning_rate": 2e-05,
|
| 330 |
+
"loss": 0.1592,
|
| 331 |
+
"step": 92
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 1.0,
|
| 335 |
+
"step": 92,
|
| 336 |
+
"total_flos": 2016868504174592.0,
|
| 337 |
+
"train_loss": 0.6301771117293317,
|
| 338 |
+
"train_runtime": 196.5079,
|
| 339 |
+
"train_samples_per_second": 1.873,
|
| 340 |
+
"train_steps_per_second": 0.468
|
| 341 |
+
}
|
| 342 |
+
],
|
| 343 |
+
"logging_steps": 2,
|
| 344 |
+
"max_steps": 92,
|
| 345 |
+
"num_input_tokens_seen": 0,
|
| 346 |
+
"num_train_epochs": 1,
|
| 347 |
+
"save_steps": 500,
|
| 348 |
+
"stateful_callbacks": {
|
| 349 |
+
"TrainerControl": {
|
| 350 |
+
"args": {
|
| 351 |
+
"should_epoch_stop": false,
|
| 352 |
+
"should_evaluate": false,
|
| 353 |
+
"should_log": false,
|
| 354 |
+
"should_save": false,
|
| 355 |
+
"should_training_stop": false
|
| 356 |
+
},
|
| 357 |
+
"attributes": {}
|
| 358 |
+
}
|
| 359 |
+
},
|
| 360 |
+
"total_flos": 2016868504174592.0,
|
| 361 |
+
"train_batch_size": 1,
|
| 362 |
+
"trial_name": null,
|
| 363 |
+
"trial_params": null
|
| 364 |
+
}
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:84b137fdf81538eabaa3a8e44061ba978549e8e0f750d39ed8894884abd24e61
|
| 3 |
+
size 369838470
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5af89fb7691ce8c2200477f096473849c5320b579dcfc6ca95024281e3a266b6
|
| 3 |
+
size 369838470
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81ab300618708290e4d43828e38b50877edf7bb3edc2f66910325001c1ec26db
|
| 3 |
+
size 369838470
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:39e76d8d36cfbaa81321d35f83914bc805f951764c7c8c1eb2d9f7d77ef7a239
|
| 3 |
+
size 369838470
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a31502af9bb7a21b0bb183396d7c5a09d3ac8fa8bba158a9f2309a13a95dd8c2
|
| 3 |
+
size 369837282
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7cd06ed2f69855ac79ade36194b719695cb03195a47d8ad3db0bdd40e66a1324
|
| 3 |
+
size 369838470
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d4f896f688718a2aa7fcab54b474461aa6bcd4c8d2592b4f7acc3ce76a3f131a
|
| 3 |
+
size 369837282
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:670739b0e450b22b0ba067dd558ccb7491d152313a78fa9691de3f1658719099
|
| 3 |
+
size 369837282
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_trainer_state.json
ADDED
|
@@ -0,0 +1,364 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 92,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.021739130434782608,
|
| 13 |
+
"grad_norm": 0.17322731018066406,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.007,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.043478260869565216,
|
| 20 |
+
"grad_norm": 0.4034227430820465,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 0.0669,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.06521739130434782,
|
| 27 |
+
"grad_norm": 0.047655943781137466,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 0.014,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.08695652173913043,
|
| 34 |
+
"grad_norm": 0.029691645875573158,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.0985,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.10869565217391304,
|
| 41 |
+
"grad_norm": 0.0031738663092255592,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 0.0029,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.13043478260869565,
|
| 48 |
+
"grad_norm": 0.7081104516983032,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.1851,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.15217391304347827,
|
| 55 |
+
"grad_norm": 0.41896992921829224,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.0237,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.17391304347826086,
|
| 62 |
+
"grad_norm": 0.48387590050697327,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.0193,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.1956521739130435,
|
| 69 |
+
"grad_norm": 0.0010938448831439018,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.5725,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.21739130434782608,
|
| 76 |
+
"grad_norm": 0.1128813624382019,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.0055,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.2391304347826087,
|
| 83 |
+
"grad_norm": 0.004215009044855833,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.0004,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.2608695652173913,
|
| 90 |
+
"grad_norm": 0.053580403327941895,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 0.0041,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.2826086956521739,
|
| 97 |
+
"grad_norm": 0.08771532028913498,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 0.0955,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.30434782608695654,
|
| 104 |
+
"grad_norm": 0.4550749361515045,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 0.0213,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.32608695652173914,
|
| 111 |
+
"grad_norm": 2.2826590538024902,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 0.22,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.34782608695652173,
|
| 118 |
+
"grad_norm": 0.0036860036198049784,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.0015,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.3695652173913043,
|
| 125 |
+
"grad_norm": 0.7332060933113098,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.0539,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.391304347826087,
|
| 132 |
+
"grad_norm": 0.15167345106601715,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.1528,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.41304347826086957,
|
| 139 |
+
"grad_norm": 0.0022186979185789824,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 0.0634,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.43478260869565216,
|
| 146 |
+
"grad_norm": 6.4484453201293945,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 1.7515,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.45652173913043476,
|
| 153 |
+
"grad_norm": 0.00416164705529809,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.0016,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.4782608695652174,
|
| 160 |
+
"grad_norm": 0.034967318177223206,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 0.0752,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.5,
|
| 167 |
+
"grad_norm": 0.011398707516491413,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 0.0103,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.5217391304347826,
|
| 174 |
+
"grad_norm": 0.8181813955307007,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 0.0635,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.5434782608695652,
|
| 181 |
+
"grad_norm": 0.030382607132196426,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 0.0022,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.5652173913043478,
|
| 188 |
+
"grad_norm": 0.005472751799970865,
|
| 189 |
+
"learning_rate": 2e-05,
|
| 190 |
+
"loss": 0.4915,
|
| 191 |
+
"step": 52
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.5869565217391305,
|
| 195 |
+
"grad_norm": 0.012804504483938217,
|
| 196 |
+
"learning_rate": 2e-05,
|
| 197 |
+
"loss": 0.0789,
|
| 198 |
+
"step": 54
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.6086956521739131,
|
| 202 |
+
"grad_norm": 0.5662734508514404,
|
| 203 |
+
"learning_rate": 2e-05,
|
| 204 |
+
"loss": 0.1313,
|
| 205 |
+
"step": 56
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.6304347826086957,
|
| 209 |
+
"grad_norm": 0.15565143525600433,
|
| 210 |
+
"learning_rate": 2e-05,
|
| 211 |
+
"loss": 0.1838,
|
| 212 |
+
"step": 58
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.6521739130434783,
|
| 216 |
+
"grad_norm": 0.03305691480636597,
|
| 217 |
+
"learning_rate": 2e-05,
|
| 218 |
+
"loss": 0.0302,
|
| 219 |
+
"step": 60
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.6739130434782609,
|
| 223 |
+
"grad_norm": 0.11561785638332367,
|
| 224 |
+
"learning_rate": 2e-05,
|
| 225 |
+
"loss": 0.0105,
|
| 226 |
+
"step": 62
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.6956521739130435,
|
| 230 |
+
"grad_norm": 0.01182617712765932,
|
| 231 |
+
"learning_rate": 2e-05,
|
| 232 |
+
"loss": 0.0043,
|
| 233 |
+
"step": 64
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.717391304347826,
|
| 237 |
+
"grad_norm": 0.04873761162161827,
|
| 238 |
+
"learning_rate": 2e-05,
|
| 239 |
+
"loss": 0.0059,
|
| 240 |
+
"step": 66
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.7391304347826086,
|
| 244 |
+
"grad_norm": 0.07617998868227005,
|
| 245 |
+
"learning_rate": 2e-05,
|
| 246 |
+
"loss": 0.0141,
|
| 247 |
+
"step": 68
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.7608695652173914,
|
| 251 |
+
"grad_norm": 0.8835837244987488,
|
| 252 |
+
"learning_rate": 2e-05,
|
| 253 |
+
"loss": 0.0333,
|
| 254 |
+
"step": 70
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.782608695652174,
|
| 258 |
+
"grad_norm": 0.7749724388122559,
|
| 259 |
+
"learning_rate": 2e-05,
|
| 260 |
+
"loss": 0.0851,
|
| 261 |
+
"step": 72
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.8043478260869565,
|
| 265 |
+
"grad_norm": 0.3363087475299835,
|
| 266 |
+
"learning_rate": 2e-05,
|
| 267 |
+
"loss": 0.027,
|
| 268 |
+
"step": 74
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.8260869565217391,
|
| 272 |
+
"grad_norm": 0.0710146352648735,
|
| 273 |
+
"learning_rate": 2e-05,
|
| 274 |
+
"loss": 0.0093,
|
| 275 |
+
"step": 76
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.8478260869565217,
|
| 279 |
+
"grad_norm": 0.015138098038733006,
|
| 280 |
+
"learning_rate": 2e-05,
|
| 281 |
+
"loss": 0.0216,
|
| 282 |
+
"step": 78
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.8695652173913043,
|
| 286 |
+
"grad_norm": 0.19736772775650024,
|
| 287 |
+
"learning_rate": 2e-05,
|
| 288 |
+
"loss": 0.0327,
|
| 289 |
+
"step": 80
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.8913043478260869,
|
| 293 |
+
"grad_norm": 0.05040005221962929,
|
| 294 |
+
"learning_rate": 2e-05,
|
| 295 |
+
"loss": 0.0043,
|
| 296 |
+
"step": 82
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.9130434782608695,
|
| 300 |
+
"grad_norm": 0.3938082754611969,
|
| 301 |
+
"learning_rate": 2e-05,
|
| 302 |
+
"loss": 0.1232,
|
| 303 |
+
"step": 84
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.9347826086956522,
|
| 307 |
+
"grad_norm": 2.0402262210845947,
|
| 308 |
+
"learning_rate": 2e-05,
|
| 309 |
+
"loss": 0.1115,
|
| 310 |
+
"step": 86
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.9565217391304348,
|
| 314 |
+
"grad_norm": 1.9586436748504639,
|
| 315 |
+
"learning_rate": 2e-05,
|
| 316 |
+
"loss": 0.2655,
|
| 317 |
+
"step": 88
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.9782608695652174,
|
| 321 |
+
"grad_norm": 2.773951292037964,
|
| 322 |
+
"learning_rate": 2e-05,
|
| 323 |
+
"loss": 0.3761,
|
| 324 |
+
"step": 90
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 1.0,
|
| 328 |
+
"grad_norm": 0.30637216567993164,
|
| 329 |
+
"learning_rate": 2e-05,
|
| 330 |
+
"loss": 0.0126,
|
| 331 |
+
"step": 92
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 1.0,
|
| 335 |
+
"step": 92,
|
| 336 |
+
"total_flos": 2032950489120768.0,
|
| 337 |
+
"train_loss": 0.12097753487203432,
|
| 338 |
+
"train_runtime": 198.5545,
|
| 339 |
+
"train_samples_per_second": 1.853,
|
| 340 |
+
"train_steps_per_second": 0.463
|
| 341 |
+
}
|
| 342 |
+
],
|
| 343 |
+
"logging_steps": 2,
|
| 344 |
+
"max_steps": 92,
|
| 345 |
+
"num_input_tokens_seen": 0,
|
| 346 |
+
"num_train_epochs": 1,
|
| 347 |
+
"save_steps": 500,
|
| 348 |
+
"stateful_callbacks": {
|
| 349 |
+
"TrainerControl": {
|
| 350 |
+
"args": {
|
| 351 |
+
"should_epoch_stop": false,
|
| 352 |
+
"should_evaluate": false,
|
| 353 |
+
"should_log": false,
|
| 354 |
+
"should_save": false,
|
| 355 |
+
"should_training_stop": false
|
| 356 |
+
},
|
| 357 |
+
"attributes": {}
|
| 358 |
+
}
|
| 359 |
+
},
|
| 360 |
+
"total_flos": 2032950489120768.0,
|
| 361 |
+
"train_batch_size": 1,
|
| 362 |
+
"trial_name": null,
|
| 363 |
+
"trial_params": null
|
| 364 |
+
}
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:040df4945fad36c157df705c9dbfc923e8eec981105698ce5843af65c6ae16c0
|
| 3 |
+
size 794708086
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a3a3c0e528a95b0bb130571fcf86d9981186ed8aa94b4c3e14222e3b47b0d1f
|
| 3 |
+
size 794708086
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:638b86ad8e0f9afc4f3f1f52d89012e751c1e703f58c00c8fccfb31bea890f61
|
| 3 |
+
size 794708086
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:43a7bcebed8e96b052f66787b8fb5ab76be240ba9a8712336fd509c4cb800365
|
| 3 |
+
size 794708086
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a42d516569f5ded0c385f9bbb5547c1cbb1d59eb9434227156f26fcb67f784c
|
| 3 |
+
size 794706058
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8eaf002e494f4431cf9c54a6356b86639236ef8d3a4e777d4bf5edcba081c20f
|
| 3 |
+
size 794708086
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0f285103af3edf0700ce58157257601eb492e783732711a4937f09ef70cafba
|
| 3 |
+
size 794706058
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d312a45706dcde5f13aa89b36bf97e835fd187df9df75d67658e7cdbd1ed075b
|
| 3 |
+
size 794706058
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_trainer_state.json
ADDED
|
@@ -0,0 +1,364 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 92,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.021739130434782608,
|
| 13 |
+
"grad_norm": 1.1296067237854004,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.3841,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.043478260869565216,
|
| 20 |
+
"grad_norm": 1.9366317987442017,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 0.4115,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.06521739130434782,
|
| 27 |
+
"grad_norm": 4.050197124481201,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 1.8967,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.08695652173913043,
|
| 34 |
+
"grad_norm": 0.7215381264686584,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.8096,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.10869565217391304,
|
| 41 |
+
"grad_norm": 0.4154060184955597,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 0.2618,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.13043478260869565,
|
| 48 |
+
"grad_norm": 1.4991834163665771,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.731,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.15217391304347827,
|
| 55 |
+
"grad_norm": 0.3260190188884735,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.5441,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.17391304347826086,
|
| 62 |
+
"grad_norm": 0.7272593379020691,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.4451,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.1956521739130435,
|
| 69 |
+
"grad_norm": 1.0374034643173218,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.2988,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.21739130434782608,
|
| 76 |
+
"grad_norm": 1.367766261100769,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.3249,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.2391304347826087,
|
| 83 |
+
"grad_norm": 1.4096276760101318,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.3398,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.2608695652173913,
|
| 90 |
+
"grad_norm": 0.802492082118988,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 0.5804,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.2826086956521739,
|
| 97 |
+
"grad_norm": 2.32653546333313,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 0.4343,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.30434782608695654,
|
| 104 |
+
"grad_norm": 0.14287668466567993,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 0.0477,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.32608695652173914,
|
| 111 |
+
"grad_norm": 0.8296651244163513,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 0.6951,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.34782608695652173,
|
| 118 |
+
"grad_norm": 0.9478865265846252,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.3354,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.3695652173913043,
|
| 125 |
+
"grad_norm": 2.4494006633758545,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.7025,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.391304347826087,
|
| 132 |
+
"grad_norm": 0.08148877322673798,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.1202,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.41304347826086957,
|
| 139 |
+
"grad_norm": 1.3430149555206299,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 0.4278,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.43478260869565216,
|
| 146 |
+
"grad_norm": 0.4023633599281311,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 0.4105,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.45652173913043476,
|
| 153 |
+
"grad_norm": 1.502210259437561,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.6282,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.4782608695652174,
|
| 160 |
+
"grad_norm": 0.5279916524887085,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 0.1886,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.5,
|
| 167 |
+
"grad_norm": 0.4618723690509796,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 0.1187,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.5217391304347826,
|
| 174 |
+
"grad_norm": 1.1924479007720947,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 0.4895,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.5434782608695652,
|
| 181 |
+
"grad_norm": 1.406653881072998,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 0.462,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.5652173913043478,
|
| 188 |
+
"grad_norm": 1.7467128038406372,
|
| 189 |
+
"learning_rate": 2e-05,
|
| 190 |
+
"loss": 0.4685,
|
| 191 |
+
"step": 52
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.5869565217391305,
|
| 195 |
+
"grad_norm": 1.0591291189193726,
|
| 196 |
+
"learning_rate": 2e-05,
|
| 197 |
+
"loss": 0.7101,
|
| 198 |
+
"step": 54
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.6086956521739131,
|
| 202 |
+
"grad_norm": 1.112949013710022,
|
| 203 |
+
"learning_rate": 2e-05,
|
| 204 |
+
"loss": 0.3761,
|
| 205 |
+
"step": 56
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.6304347826086957,
|
| 209 |
+
"grad_norm": 2.589237689971924,
|
| 210 |
+
"learning_rate": 2e-05,
|
| 211 |
+
"loss": 0.8964,
|
| 212 |
+
"step": 58
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.6521739130434783,
|
| 216 |
+
"grad_norm": 1.2493059635162354,
|
| 217 |
+
"learning_rate": 2e-05,
|
| 218 |
+
"loss": 0.7433,
|
| 219 |
+
"step": 60
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.6739130434782609,
|
| 223 |
+
"grad_norm": 1.9061592817306519,
|
| 224 |
+
"learning_rate": 2e-05,
|
| 225 |
+
"loss": 0.7149,
|
| 226 |
+
"step": 62
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.6956521739130435,
|
| 230 |
+
"grad_norm": 2.45300555229187,
|
| 231 |
+
"learning_rate": 2e-05,
|
| 232 |
+
"loss": 1.4371,
|
| 233 |
+
"step": 64
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.717391304347826,
|
| 237 |
+
"grad_norm": 2.228710651397705,
|
| 238 |
+
"learning_rate": 2e-05,
|
| 239 |
+
"loss": 0.6653,
|
| 240 |
+
"step": 66
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.7391304347826086,
|
| 244 |
+
"grad_norm": 0.8008307218551636,
|
| 245 |
+
"learning_rate": 2e-05,
|
| 246 |
+
"loss": 0.1514,
|
| 247 |
+
"step": 68
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.7608695652173914,
|
| 251 |
+
"grad_norm": 0.01986389420926571,
|
| 252 |
+
"learning_rate": 2e-05,
|
| 253 |
+
"loss": 0.2807,
|
| 254 |
+
"step": 70
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.782608695652174,
|
| 258 |
+
"grad_norm": 1.2030994892120361,
|
| 259 |
+
"learning_rate": 2e-05,
|
| 260 |
+
"loss": 0.2121,
|
| 261 |
+
"step": 72
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.8043478260869565,
|
| 265 |
+
"grad_norm": 0.6025874614715576,
|
| 266 |
+
"learning_rate": 2e-05,
|
| 267 |
+
"loss": 0.1727,
|
| 268 |
+
"step": 74
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.8260869565217391,
|
| 272 |
+
"grad_norm": 0.3984823524951935,
|
| 273 |
+
"learning_rate": 2e-05,
|
| 274 |
+
"loss": 0.1357,
|
| 275 |
+
"step": 76
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.8478260869565217,
|
| 279 |
+
"grad_norm": 0.6921195387840271,
|
| 280 |
+
"learning_rate": 2e-05,
|
| 281 |
+
"loss": 0.2836,
|
| 282 |
+
"step": 78
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.8695652173913043,
|
| 286 |
+
"grad_norm": 0.04382755234837532,
|
| 287 |
+
"learning_rate": 2e-05,
|
| 288 |
+
"loss": 0.5031,
|
| 289 |
+
"step": 80
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.8913043478260869,
|
| 293 |
+
"grad_norm": 1.5162211656570435,
|
| 294 |
+
"learning_rate": 2e-05,
|
| 295 |
+
"loss": 0.3304,
|
| 296 |
+
"step": 82
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.9130434782608695,
|
| 300 |
+
"grad_norm": 0.6259192824363708,
|
| 301 |
+
"learning_rate": 2e-05,
|
| 302 |
+
"loss": 0.1296,
|
| 303 |
+
"step": 84
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.9347826086956522,
|
| 307 |
+
"grad_norm": 1.5014652013778687,
|
| 308 |
+
"learning_rate": 2e-05,
|
| 309 |
+
"loss": 0.6724,
|
| 310 |
+
"step": 86
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.9565217391304348,
|
| 314 |
+
"grad_norm": 4.528606414794922,
|
| 315 |
+
"learning_rate": 2e-05,
|
| 316 |
+
"loss": 0.5261,
|
| 317 |
+
"step": 88
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.9782608695652174,
|
| 321 |
+
"grad_norm": 0.15331390500068665,
|
| 322 |
+
"learning_rate": 2e-05,
|
| 323 |
+
"loss": 0.0363,
|
| 324 |
+
"step": 90
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 1.0,
|
| 328 |
+
"grad_norm": 1.2000925540924072,
|
| 329 |
+
"learning_rate": 2e-05,
|
| 330 |
+
"loss": 0.3445,
|
| 331 |
+
"step": 92
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 1.0,
|
| 335 |
+
"step": 92,
|
| 336 |
+
"total_flos": 4792956951199744.0,
|
| 337 |
+
"train_loss": 0.4756195545196533,
|
| 338 |
+
"train_runtime": 313.428,
|
| 339 |
+
"train_samples_per_second": 1.174,
|
| 340 |
+
"train_steps_per_second": 0.294
|
| 341 |
+
}
|
| 342 |
+
],
|
| 343 |
+
"logging_steps": 2,
|
| 344 |
+
"max_steps": 92,
|
| 345 |
+
"num_input_tokens_seen": 0,
|
| 346 |
+
"num_train_epochs": 1,
|
| 347 |
+
"save_steps": 500,
|
| 348 |
+
"stateful_callbacks": {
|
| 349 |
+
"TrainerControl": {
|
| 350 |
+
"args": {
|
| 351 |
+
"should_epoch_stop": false,
|
| 352 |
+
"should_evaluate": false,
|
| 353 |
+
"should_log": false,
|
| 354 |
+
"should_save": false,
|
| 355 |
+
"should_training_stop": false
|
| 356 |
+
},
|
| 357 |
+
"attributes": {}
|
| 358 |
+
}
|
| 359 |
+
},
|
| 360 |
+
"total_flos": 4792956951199744.0,
|
| 361 |
+
"train_batch_size": 1,
|
| 362 |
+
"trial_name": null,
|
| 363 |
+
"trial_params": null
|
| 364 |
+
}
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:968963aca4a3d7d2ca1d2d8db5c362658cefa72d304a3ec022ac25e67d846a42
|
| 3 |
+
size 369838470
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ccfffb49d254686ff927002aa256e455a85c432e344013b10a52c16c83f78700
|
| 3 |
+
size 369838470
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4fa459bea49cae2643832fcda9ee0a85fbfd03857a1741def05d71736fb5f422
|
| 3 |
+
size 369838470
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b0f5215d3edcca5d04b119de86073ce299e15e1ce3cb8b283190c7eb1c4855d0
|
| 3 |
+
size 369838470
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b6d7e4c9ee160535596a15f0a9cdd14bbec4e79a90e4e93100c6c114348793f
|
| 3 |
+
size 369837282
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:572561376a76a93533269e55e42451d11cac7a33012fe74b4310f133756afd33
|
| 3 |
+
size 369838470
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:38317de6bb0584ebac545a2faf4129d6e2e417a38865720382aa265ca7f16623
|
| 3 |
+
size 369837282
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0dd53b66a9fcdd90b3829d6e14befc65adc080086aceec88646c1135d1a0efd9
|
| 3 |
+
size 369837282
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_trainer_state.json
ADDED
|
@@ -0,0 +1,364 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 92,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.021739130434782608,
|
| 13 |
+
"grad_norm": 4.441051006317139,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.992,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.043478260869565216,
|
| 20 |
+
"grad_norm": 3.2609739303588867,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 1.2689,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.06521739130434782,
|
| 27 |
+
"grad_norm": 3.7552318572998047,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 1.1606,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.08695652173913043,
|
| 34 |
+
"grad_norm": 3.9538447856903076,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 1.2226,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.10869565217391304,
|
| 41 |
+
"grad_norm": 7.879010200500488,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 1.7328,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.13043478260869565,
|
| 48 |
+
"grad_norm": 3.853842258453369,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.6685,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.15217391304347827,
|
| 55 |
+
"grad_norm": 2.0626301765441895,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.6801,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.17391304347826086,
|
| 62 |
+
"grad_norm": 7.179374694824219,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 1.4164,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.1956521739130435,
|
| 69 |
+
"grad_norm": 1.8957184553146362,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.7125,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.21739130434782608,
|
| 76 |
+
"grad_norm": 3.1610910892486572,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.5239,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.2391304347826087,
|
| 83 |
+
"grad_norm": 4.4256672859191895,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.5069,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.2608695652173913,
|
| 90 |
+
"grad_norm": 4.9362335205078125,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 1.8549,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.2826086956521739,
|
| 97 |
+
"grad_norm": 4.253013610839844,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 1.0589,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.30434782608695654,
|
| 104 |
+
"grad_norm": 4.3326826095581055,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 1.2678,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.32608695652173914,
|
| 111 |
+
"grad_norm": 6.773116111755371,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 1.9853,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.34782608695652173,
|
| 118 |
+
"grad_norm": 7.467095851898193,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 1.3477,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.3695652173913043,
|
| 125 |
+
"grad_norm": 0.6017383933067322,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.3416,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.391304347826087,
|
| 132 |
+
"grad_norm": 2.026484966278076,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 1.4528,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.41304347826086957,
|
| 139 |
+
"grad_norm": 1.9307522773742676,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 0.5631,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.43478260869565216,
|
| 146 |
+
"grad_norm": 3.334146499633789,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 1.4508,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.45652173913043476,
|
| 153 |
+
"grad_norm": 1.933052659034729,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.7173,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.4782608695652174,
|
| 160 |
+
"grad_norm": 3.3551347255706787,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 1.3031,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.5,
|
| 167 |
+
"grad_norm": 2.526327133178711,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 1.3352,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.5217391304347826,
|
| 174 |
+
"grad_norm": 7.738239765167236,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 3.587,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.5434782608695652,
|
| 181 |
+
"grad_norm": 1.777616024017334,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 0.6968,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.5652173913043478,
|
| 188 |
+
"grad_norm": 4.613940715789795,
|
| 189 |
+
"learning_rate": 2e-05,
|
| 190 |
+
"loss": 1.8354,
|
| 191 |
+
"step": 52
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.5869565217391305,
|
| 195 |
+
"grad_norm": 4.080211162567139,
|
| 196 |
+
"learning_rate": 2e-05,
|
| 197 |
+
"loss": 1.2648,
|
| 198 |
+
"step": 54
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.6086956521739131,
|
| 202 |
+
"grad_norm": 6.371545314788818,
|
| 203 |
+
"learning_rate": 2e-05,
|
| 204 |
+
"loss": 1.6354,
|
| 205 |
+
"step": 56
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.6304347826086957,
|
| 209 |
+
"grad_norm": 6.669529914855957,
|
| 210 |
+
"learning_rate": 2e-05,
|
| 211 |
+
"loss": 2.2418,
|
| 212 |
+
"step": 58
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.6521739130434783,
|
| 216 |
+
"grad_norm": 3.0953638553619385,
|
| 217 |
+
"learning_rate": 2e-05,
|
| 218 |
+
"loss": 0.9421,
|
| 219 |
+
"step": 60
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.6739130434782609,
|
| 223 |
+
"grad_norm": 3.3901684284210205,
|
| 224 |
+
"learning_rate": 2e-05,
|
| 225 |
+
"loss": 1.4606,
|
| 226 |
+
"step": 62
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.6956521739130435,
|
| 230 |
+
"grad_norm": 3.4849743843078613,
|
| 231 |
+
"learning_rate": 2e-05,
|
| 232 |
+
"loss": 1.2765,
|
| 233 |
+
"step": 64
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.717391304347826,
|
| 237 |
+
"grad_norm": 5.219615936279297,
|
| 238 |
+
"learning_rate": 2e-05,
|
| 239 |
+
"loss": 1.4114,
|
| 240 |
+
"step": 66
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.7391304347826086,
|
| 244 |
+
"grad_norm": 2.6195218563079834,
|
| 245 |
+
"learning_rate": 2e-05,
|
| 246 |
+
"loss": 1.3279,
|
| 247 |
+
"step": 68
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.7608695652173914,
|
| 251 |
+
"grad_norm": 3.040009021759033,
|
| 252 |
+
"learning_rate": 2e-05,
|
| 253 |
+
"loss": 0.8641,
|
| 254 |
+
"step": 70
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.782608695652174,
|
| 258 |
+
"grad_norm": 6.53596305847168,
|
| 259 |
+
"learning_rate": 2e-05,
|
| 260 |
+
"loss": 2.4763,
|
| 261 |
+
"step": 72
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.8043478260869565,
|
| 265 |
+
"grad_norm": 2.496731996536255,
|
| 266 |
+
"learning_rate": 2e-05,
|
| 267 |
+
"loss": 1.1084,
|
| 268 |
+
"step": 74
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.8260869565217391,
|
| 272 |
+
"grad_norm": 2.0288877487182617,
|
| 273 |
+
"learning_rate": 2e-05,
|
| 274 |
+
"loss": 0.5153,
|
| 275 |
+
"step": 76
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.8478260869565217,
|
| 279 |
+
"grad_norm": 1.2743315696716309,
|
| 280 |
+
"learning_rate": 2e-05,
|
| 281 |
+
"loss": 0.3963,
|
| 282 |
+
"step": 78
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.8695652173913043,
|
| 286 |
+
"grad_norm": 3.1155552864074707,
|
| 287 |
+
"learning_rate": 2e-05,
|
| 288 |
+
"loss": 0.6715,
|
| 289 |
+
"step": 80
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.8913043478260869,
|
| 293 |
+
"grad_norm": 2.91237735748291,
|
| 294 |
+
"learning_rate": 2e-05,
|
| 295 |
+
"loss": 1.1998,
|
| 296 |
+
"step": 82
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.9130434782608695,
|
| 300 |
+
"grad_norm": 3.1403515338897705,
|
| 301 |
+
"learning_rate": 2e-05,
|
| 302 |
+
"loss": 0.813,
|
| 303 |
+
"step": 84
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.9347826086956522,
|
| 307 |
+
"grad_norm": 1.0142074823379517,
|
| 308 |
+
"learning_rate": 2e-05,
|
| 309 |
+
"loss": 0.8541,
|
| 310 |
+
"step": 86
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.9565217391304348,
|
| 314 |
+
"grad_norm": 1.7990295886993408,
|
| 315 |
+
"learning_rate": 2e-05,
|
| 316 |
+
"loss": 0.5481,
|
| 317 |
+
"step": 88
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.9782608695652174,
|
| 321 |
+
"grad_norm": 4.208423137664795,
|
| 322 |
+
"learning_rate": 2e-05,
|
| 323 |
+
"loss": 1.8142,
|
| 324 |
+
"step": 90
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 1.0,
|
| 328 |
+
"grad_norm": 2.21415376663208,
|
| 329 |
+
"learning_rate": 2e-05,
|
| 330 |
+
"loss": 1.222,
|
| 331 |
+
"step": 92
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 1.0,
|
| 335 |
+
"step": 92,
|
| 336 |
+
"total_flos": 2060088319672320.0,
|
| 337 |
+
"train_loss": 1.2114470782487288,
|
| 338 |
+
"train_runtime": 196.2309,
|
| 339 |
+
"train_samples_per_second": 1.875,
|
| 340 |
+
"train_steps_per_second": 0.469
|
| 341 |
+
}
|
| 342 |
+
],
|
| 343 |
+
"logging_steps": 2,
|
| 344 |
+
"max_steps": 92,
|
| 345 |
+
"num_input_tokens_seen": 0,
|
| 346 |
+
"num_train_epochs": 1,
|
| 347 |
+
"save_steps": 500,
|
| 348 |
+
"stateful_callbacks": {
|
| 349 |
+
"TrainerControl": {
|
| 350 |
+
"args": {
|
| 351 |
+
"should_epoch_stop": false,
|
| 352 |
+
"should_evaluate": false,
|
| 353 |
+
"should_log": false,
|
| 354 |
+
"should_save": false,
|
| 355 |
+
"should_training_stop": false
|
| 356 |
+
},
|
| 357 |
+
"attributes": {}
|
| 358 |
+
}
|
| 359 |
+
},
|
| 360 |
+
"total_flos": 2060088319672320.0,
|
| 361 |
+
"train_batch_size": 1,
|
| 362 |
+
"trial_name": null,
|
| 363 |
+
"trial_params": null
|
| 364 |
+
}
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b79aaba74229c5b0d837e9ac5cedbe36e425f1a29335f5049a310c762e78dd2
|
| 3 |
+
size 794708086
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5bf24bd98c3ffdea50275e527e934666eb42b753a6b12ec775ef20bbeea84ede
|
| 3 |
+
size 794708086
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8404bf0fef92715cc44f4e710595ba6b0457a3a2811d23fbe0f7780ce76d7378
|
| 3 |
+
size 794708086
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc2f3efc4cf5e0f1fb8cdce98b0704b7c0fa4142fca915fe3774971780347018
|
| 3 |
+
size 794708086
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c807d9baa99dede81399aef0bd4796381e8535cb504ab1e1313f129796d2160f
|
| 3 |
+
size 794706058
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac48c818c303fe23985c88e295a9d20b095e81c95cb5e49ee64351965035a180
|
| 3 |
+
size 794708086
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca99b55c7f846d80aaf5a644fa43d442cef1b955557d24a529a0d0a46a9aa4c7
|
| 3 |
+
size 794706058
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:32aa7feb2e6e1e2ba21daefc38b7338c37000c71b139c325d5b0b9d267ae8415
|
| 3 |
+
size 794706058
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_trainer_state.json
ADDED
|
@@ -0,0 +1,364 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 92,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.021739130434782608,
|
| 13 |
+
"grad_norm": 1.7172458171844482,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.9706,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.043478260869565216,
|
| 20 |
+
"grad_norm": 1.535640001296997,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 1.0299,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.06521739130434782,
|
| 27 |
+
"grad_norm": 1.8416475057601929,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 0.5077,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.08695652173913043,
|
| 34 |
+
"grad_norm": 1.3889185190200806,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.6756,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.10869565217391304,
|
| 41 |
+
"grad_norm": 0.3524916470050812,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 0.6578,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.13043478260869565,
|
| 48 |
+
"grad_norm": 0.24557974934577942,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.245,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.15217391304347827,
|
| 55 |
+
"grad_norm": 1.0801588296890259,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.4678,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.17391304347826086,
|
| 62 |
+
"grad_norm": 0.6954711675643921,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.357,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.1956521739130435,
|
| 69 |
+
"grad_norm": 0.6120344996452332,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.3746,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.21739130434782608,
|
| 76 |
+
"grad_norm": 0.9757769703865051,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.3461,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.2391304347826087,
|
| 83 |
+
"grad_norm": 1.1557114124298096,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.3997,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.2608695652173913,
|
| 90 |
+
"grad_norm": 1.616867184638977,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 0.8107,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.2826086956521739,
|
| 97 |
+
"grad_norm": 0.6354851722717285,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 0.8208,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.30434782608695654,
|
| 104 |
+
"grad_norm": 1.0514754056930542,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 1.3612,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.32608695652173914,
|
| 111 |
+
"grad_norm": 1.695558786392212,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 1.472,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.34782608695652173,
|
| 118 |
+
"grad_norm": 1.0636487007141113,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.2386,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.3695652173913043,
|
| 125 |
+
"grad_norm": 0.2803816795349121,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.1083,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.391304347826087,
|
| 132 |
+
"grad_norm": 0.7646946310997009,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.9315,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.41304347826086957,
|
| 139 |
+
"grad_norm": 2.3936870098114014,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 1.3936,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.43478260869565216,
|
| 146 |
+
"grad_norm": 2.638190269470215,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 1.1179,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.45652173913043476,
|
| 153 |
+
"grad_norm": 0.38522782921791077,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.6251,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.4782608695652174,
|
| 160 |
+
"grad_norm": 1.31108820438385,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 0.2963,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.5,
|
| 167 |
+
"grad_norm": 0.9962872266769409,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 1.1005,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.5217391304347826,
|
| 174 |
+
"grad_norm": 0.7223103046417236,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 0.3556,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.5434782608695652,
|
| 181 |
+
"grad_norm": 0.9148542881011963,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 0.7949,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.5652173913043478,
|
| 188 |
+
"grad_norm": 0.9974550008773804,
|
| 189 |
+
"learning_rate": 2e-05,
|
| 190 |
+
"loss": 0.383,
|
| 191 |
+
"step": 52
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.5869565217391305,
|
| 195 |
+
"grad_norm": 0.21446958184242249,
|
| 196 |
+
"learning_rate": 2e-05,
|
| 197 |
+
"loss": 0.2946,
|
| 198 |
+
"step": 54
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.6086956521739131,
|
| 202 |
+
"grad_norm": 3.7416319847106934,
|
| 203 |
+
"learning_rate": 2e-05,
|
| 204 |
+
"loss": 1.4213,
|
| 205 |
+
"step": 56
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.6304347826086957,
|
| 209 |
+
"grad_norm": 0.9001378417015076,
|
| 210 |
+
"learning_rate": 2e-05,
|
| 211 |
+
"loss": 0.8664,
|
| 212 |
+
"step": 58
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.6521739130434783,
|
| 216 |
+
"grad_norm": 1.5950777530670166,
|
| 217 |
+
"learning_rate": 2e-05,
|
| 218 |
+
"loss": 1.1924,
|
| 219 |
+
"step": 60
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.6739130434782609,
|
| 223 |
+
"grad_norm": 0.8221089839935303,
|
| 224 |
+
"learning_rate": 2e-05,
|
| 225 |
+
"loss": 0.6193,
|
| 226 |
+
"step": 62
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.6956521739130435,
|
| 230 |
+
"grad_norm": 1.6373211145401,
|
| 231 |
+
"learning_rate": 2e-05,
|
| 232 |
+
"loss": 0.7243,
|
| 233 |
+
"step": 64
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.717391304347826,
|
| 237 |
+
"grad_norm": 1.0375702381134033,
|
| 238 |
+
"learning_rate": 2e-05,
|
| 239 |
+
"loss": 0.7575,
|
| 240 |
+
"step": 66
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.7391304347826086,
|
| 244 |
+
"grad_norm": 1.2386682033538818,
|
| 245 |
+
"learning_rate": 2e-05,
|
| 246 |
+
"loss": 0.6053,
|
| 247 |
+
"step": 68
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.7608695652173914,
|
| 251 |
+
"grad_norm": 0.5664445161819458,
|
| 252 |
+
"learning_rate": 2e-05,
|
| 253 |
+
"loss": 0.6014,
|
| 254 |
+
"step": 70
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.782608695652174,
|
| 258 |
+
"grad_norm": 3.124638080596924,
|
| 259 |
+
"learning_rate": 2e-05,
|
| 260 |
+
"loss": 1.3379,
|
| 261 |
+
"step": 72
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.8043478260869565,
|
| 265 |
+
"grad_norm": 1.7098889350891113,
|
| 266 |
+
"learning_rate": 2e-05,
|
| 267 |
+
"loss": 1.0894,
|
| 268 |
+
"step": 74
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.8260869565217391,
|
| 272 |
+
"grad_norm": 0.9901090264320374,
|
| 273 |
+
"learning_rate": 2e-05,
|
| 274 |
+
"loss": 0.4407,
|
| 275 |
+
"step": 76
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.8478260869565217,
|
| 279 |
+
"grad_norm": 1.7773938179016113,
|
| 280 |
+
"learning_rate": 2e-05,
|
| 281 |
+
"loss": 1.1619,
|
| 282 |
+
"step": 78
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.8695652173913043,
|
| 286 |
+
"grad_norm": 1.735460877418518,
|
| 287 |
+
"learning_rate": 2e-05,
|
| 288 |
+
"loss": 1.5605,
|
| 289 |
+
"step": 80
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.8913043478260869,
|
| 293 |
+
"grad_norm": 0.8822594285011292,
|
| 294 |
+
"learning_rate": 2e-05,
|
| 295 |
+
"loss": 0.466,
|
| 296 |
+
"step": 82
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.9130434782608695,
|
| 300 |
+
"grad_norm": 1.8246649503707886,
|
| 301 |
+
"learning_rate": 2e-05,
|
| 302 |
+
"loss": 0.6933,
|
| 303 |
+
"step": 84
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.9347826086956522,
|
| 307 |
+
"grad_norm": 1.136141300201416,
|
| 308 |
+
"learning_rate": 2e-05,
|
| 309 |
+
"loss": 0.4576,
|
| 310 |
+
"step": 86
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.9565217391304348,
|
| 314 |
+
"grad_norm": 0.6676300764083862,
|
| 315 |
+
"learning_rate": 2e-05,
|
| 316 |
+
"loss": 0.7428,
|
| 317 |
+
"step": 88
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.9782608695652174,
|
| 321 |
+
"grad_norm": 1.731764793395996,
|
| 322 |
+
"learning_rate": 2e-05,
|
| 323 |
+
"loss": 1.3401,
|
| 324 |
+
"step": 90
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 1.0,
|
| 328 |
+
"grad_norm": 0.46977347135543823,
|
| 329 |
+
"learning_rate": 2e-05,
|
| 330 |
+
"loss": 0.2042,
|
| 331 |
+
"step": 92
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 1.0,
|
| 335 |
+
"step": 92,
|
| 336 |
+
"total_flos": 5551594087120896.0,
|
| 337 |
+
"train_loss": 0.7482347903044327,
|
| 338 |
+
"train_runtime": 314.3339,
|
| 339 |
+
"train_samples_per_second": 1.171,
|
| 340 |
+
"train_steps_per_second": 0.293
|
| 341 |
+
}
|
| 342 |
+
],
|
| 343 |
+
"logging_steps": 2,
|
| 344 |
+
"max_steps": 92,
|
| 345 |
+
"num_input_tokens_seen": 0,
|
| 346 |
+
"num_train_epochs": 1,
|
| 347 |
+
"save_steps": 500,
|
| 348 |
+
"stateful_callbacks": {
|
| 349 |
+
"TrainerControl": {
|
| 350 |
+
"args": {
|
| 351 |
+
"should_epoch_stop": false,
|
| 352 |
+
"should_evaluate": false,
|
| 353 |
+
"should_log": false,
|
| 354 |
+
"should_save": false,
|
| 355 |
+
"should_training_stop": false
|
| 356 |
+
},
|
| 357 |
+
"attributes": {}
|
| 358 |
+
}
|
| 359 |
+
},
|
| 360 |
+
"total_flos": 5551594087120896.0,
|
| 361 |
+
"train_batch_size": 1,
|
| 362 |
+
"trial_name": null,
|
| 363 |
+
"trial_params": null
|
| 364 |
+
}
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/5_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b420870bc6bd7ab2d962e17aa98541dbfd84dee5188b2fbfbbb0a856791b89f7
|
| 3 |
+
size 369838470
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/5_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2cbb8b4939b5f8948cd443b04095ca9d6da1951e0afa830ba0736d8e8486874c
|
| 3 |
+
size 369838470
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/5_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:710d51c728f923f61a4728c0dd5f92ff6de0fa172d9f28b54d18b73b8e53e928
|
| 3 |
+
size 369838470
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/5_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bf43931ab46a6c71ef5ef5e106a93a987757d773d71308ac150b36cbefe90409
|
| 3 |
+
size 369838470
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/5_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b8a3fad06f1e5ecbc439bd3b8a76ec421977354e24cc59393278cc7de282701b
|
| 3 |
+
size 369837282
|