Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round10.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round12.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round15.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round17.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round2.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round20.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round5.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round7.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_trainer_state.json +392 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round10.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round12.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round15.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round17.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round2.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round20.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round5.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round7.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_trainer_state.json +392 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round10.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round12.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round15.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round17.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round2.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round20.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round5.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round7.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_trainer_state.json +392 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round10.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round12.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round15.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round17.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round2.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round20.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round5.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round7.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_trainer_state.json +392 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round10.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round12.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round15.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round17.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round2.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round20.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round5.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round7.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_trainer_state.json +392 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/5_client_model_round10.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/5_client_model_round12.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/5_client_model_round15.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/5_client_model_round17.pth +3 -0
- client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/5_client_model_round2.pth +3 -0
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:20a2cfa32a281784795db634cd58040e0099dfb3a6b461f3e2811690aed270d4
|
| 3 |
+
size 295585110
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:164b99db199f8c1afc330ed8c4d413297e461263d6c0d531ef9471b84e72e5fa
|
| 3 |
+
size 295585110
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d3ac471bdb0a23646a4b729abc1bddddff87a0202bf5fc658f8eff46c613af93
|
| 3 |
+
size 295585110
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ddce9ffe75535dd2ce3951b60de8b30f69eca1122d14aa68bce2d74012e20d92
|
| 3 |
+
size 295585110
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8940422daafc8acb84347ba4f766e1cb543eb670aa45b0420a9a5227c22ba7e6
|
| 3 |
+
size 295584650
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1884b1ce6eb419994414bec53db3f25c876be84c73dd4b6bac19b279a37ae199
|
| 3 |
+
size 295585110
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dba57bcea505e8a5c94f327f555ddca8d1ae8fa1037bae98cdcc235d5c2709b5
|
| 3 |
+
size 295584650
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:45ea8ab72fae35f37e9ace071574cc20c4fbfc4c0f331de08fd89db0ec7fd1c0
|
| 3 |
+
size 295584650
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_trainer_state.json
ADDED
|
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 100,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.02,
|
| 13 |
+
"grad_norm": 1.4357949495315552,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.9752,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.04,
|
| 20 |
+
"grad_norm": 7.484304904937744,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 0.3921,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.06,
|
| 27 |
+
"grad_norm": 3.0805540084838867,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 0.766,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.08,
|
| 34 |
+
"grad_norm": 0.26295432448387146,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.2986,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.1,
|
| 41 |
+
"grad_norm": 1.8822102546691895,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 0.3405,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.12,
|
| 48 |
+
"grad_norm": 0.02680317871272564,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.3358,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.14,
|
| 55 |
+
"grad_norm": 1.1298555135726929,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.6812,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.16,
|
| 62 |
+
"grad_norm": 2.079339027404785,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.6571,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.18,
|
| 69 |
+
"grad_norm": 2.0236611366271973,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.4357,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.2,
|
| 76 |
+
"grad_norm": 2.0701911449432373,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.5624,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.22,
|
| 83 |
+
"grad_norm": 3.1642799377441406,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.4102,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.24,
|
| 90 |
+
"grad_norm": 2.8954672813415527,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 1.296,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.26,
|
| 97 |
+
"grad_norm": 5.15814208984375,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 1.2499,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.28,
|
| 104 |
+
"grad_norm": 2.06376051902771,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 0.6663,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.3,
|
| 111 |
+
"grad_norm": 0.37149399518966675,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 0.1357,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.32,
|
| 118 |
+
"grad_norm": 1.2507413625717163,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.1487,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.34,
|
| 125 |
+
"grad_norm": 1.1001901626586914,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.9658,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.36,
|
| 132 |
+
"grad_norm": 3.733464002609253,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.9133,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.38,
|
| 139 |
+
"grad_norm": 0.3771137595176697,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 0.2268,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.4,
|
| 146 |
+
"grad_norm": 0.6037631034851074,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 0.3637,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.42,
|
| 153 |
+
"grad_norm": 1.1135358810424805,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.5037,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.44,
|
| 160 |
+
"grad_norm": 12.166988372802734,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 1.5196,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.46,
|
| 167 |
+
"grad_norm": 1.188719391822815,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 0.7123,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.48,
|
| 174 |
+
"grad_norm": 1.858033299446106,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 0.3241,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.5,
|
| 181 |
+
"grad_norm": 1.1797194480895996,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 0.3886,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.52,
|
| 188 |
+
"grad_norm": 1.2527966499328613,
|
| 189 |
+
"learning_rate": 2e-05,
|
| 190 |
+
"loss": 0.3365,
|
| 191 |
+
"step": 52
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.54,
|
| 195 |
+
"grad_norm": 5.527441024780273,
|
| 196 |
+
"learning_rate": 2e-05,
|
| 197 |
+
"loss": 1.4229,
|
| 198 |
+
"step": 54
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.56,
|
| 202 |
+
"grad_norm": 2.6085422039031982,
|
| 203 |
+
"learning_rate": 2e-05,
|
| 204 |
+
"loss": 0.4734,
|
| 205 |
+
"step": 56
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.58,
|
| 209 |
+
"grad_norm": 1.661068320274353,
|
| 210 |
+
"learning_rate": 2e-05,
|
| 211 |
+
"loss": 0.3244,
|
| 212 |
+
"step": 58
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.6,
|
| 216 |
+
"grad_norm": 2.1758453845977783,
|
| 217 |
+
"learning_rate": 2e-05,
|
| 218 |
+
"loss": 0.4436,
|
| 219 |
+
"step": 60
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.62,
|
| 223 |
+
"grad_norm": 1.1051682233810425,
|
| 224 |
+
"learning_rate": 2e-05,
|
| 225 |
+
"loss": 0.4639,
|
| 226 |
+
"step": 62
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.64,
|
| 230 |
+
"grad_norm": 2.56185245513916,
|
| 231 |
+
"learning_rate": 2e-05,
|
| 232 |
+
"loss": 0.7811,
|
| 233 |
+
"step": 64
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.66,
|
| 237 |
+
"grad_norm": 3.042705774307251,
|
| 238 |
+
"learning_rate": 2e-05,
|
| 239 |
+
"loss": 2.9756,
|
| 240 |
+
"step": 66
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.68,
|
| 244 |
+
"grad_norm": 7.160967826843262,
|
| 245 |
+
"learning_rate": 2e-05,
|
| 246 |
+
"loss": 0.8687,
|
| 247 |
+
"step": 68
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.7,
|
| 251 |
+
"grad_norm": 2.546377658843994,
|
| 252 |
+
"learning_rate": 2e-05,
|
| 253 |
+
"loss": 0.9039,
|
| 254 |
+
"step": 70
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.72,
|
| 258 |
+
"grad_norm": 2.3272593021392822,
|
| 259 |
+
"learning_rate": 2e-05,
|
| 260 |
+
"loss": 0.4634,
|
| 261 |
+
"step": 72
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.74,
|
| 265 |
+
"grad_norm": 1.9541059732437134,
|
| 266 |
+
"learning_rate": 2e-05,
|
| 267 |
+
"loss": 0.4152,
|
| 268 |
+
"step": 74
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.76,
|
| 272 |
+
"grad_norm": 7.0820465087890625,
|
| 273 |
+
"learning_rate": 2e-05,
|
| 274 |
+
"loss": 0.7858,
|
| 275 |
+
"step": 76
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.78,
|
| 279 |
+
"grad_norm": 3.81209659576416,
|
| 280 |
+
"learning_rate": 2e-05,
|
| 281 |
+
"loss": 0.7721,
|
| 282 |
+
"step": 78
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.8,
|
| 286 |
+
"grad_norm": 4.922970771789551,
|
| 287 |
+
"learning_rate": 2e-05,
|
| 288 |
+
"loss": 0.4672,
|
| 289 |
+
"step": 80
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.82,
|
| 293 |
+
"grad_norm": 1.5200783014297485,
|
| 294 |
+
"learning_rate": 2e-05,
|
| 295 |
+
"loss": 0.4998,
|
| 296 |
+
"step": 82
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.84,
|
| 300 |
+
"grad_norm": 8.176060676574707,
|
| 301 |
+
"learning_rate": 2e-05,
|
| 302 |
+
"loss": 1.6471,
|
| 303 |
+
"step": 84
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.86,
|
| 307 |
+
"grad_norm": 10.166783332824707,
|
| 308 |
+
"learning_rate": 2e-05,
|
| 309 |
+
"loss": 0.9579,
|
| 310 |
+
"step": 86
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.88,
|
| 314 |
+
"grad_norm": 0.09095903486013412,
|
| 315 |
+
"learning_rate": 2e-05,
|
| 316 |
+
"loss": 0.2736,
|
| 317 |
+
"step": 88
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.9,
|
| 321 |
+
"grad_norm": 0.8619403839111328,
|
| 322 |
+
"learning_rate": 2e-05,
|
| 323 |
+
"loss": 0.5136,
|
| 324 |
+
"step": 90
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 0.92,
|
| 328 |
+
"grad_norm": 1.9814728498458862,
|
| 329 |
+
"learning_rate": 2e-05,
|
| 330 |
+
"loss": 0.6869,
|
| 331 |
+
"step": 92
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 0.94,
|
| 335 |
+
"grad_norm": 1.469095230102539,
|
| 336 |
+
"learning_rate": 2e-05,
|
| 337 |
+
"loss": 0.4635,
|
| 338 |
+
"step": 94
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"epoch": 0.96,
|
| 342 |
+
"grad_norm": 5.562348365783691,
|
| 343 |
+
"learning_rate": 2e-05,
|
| 344 |
+
"loss": 1.7081,
|
| 345 |
+
"step": 96
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"epoch": 0.98,
|
| 349 |
+
"grad_norm": 1.774053931236267,
|
| 350 |
+
"learning_rate": 2e-05,
|
| 351 |
+
"loss": 1.1335,
|
| 352 |
+
"step": 98
|
| 353 |
+
},
|
| 354 |
+
{
|
| 355 |
+
"epoch": 1.0,
|
| 356 |
+
"grad_norm": 1.8918042182922363,
|
| 357 |
+
"learning_rate": 2e-05,
|
| 358 |
+
"loss": 0.3731,
|
| 359 |
+
"step": 100
|
| 360 |
+
},
|
| 361 |
+
{
|
| 362 |
+
"epoch": 1.0,
|
| 363 |
+
"step": 100,
|
| 364 |
+
"total_flos": 7197827847421952.0,
|
| 365 |
+
"train_loss": 0.7084918451309205,
|
| 366 |
+
"train_runtime": 296.0647,
|
| 367 |
+
"train_samples_per_second": 1.351,
|
| 368 |
+
"train_steps_per_second": 0.338
|
| 369 |
+
}
|
| 370 |
+
],
|
| 371 |
+
"logging_steps": 2,
|
| 372 |
+
"max_steps": 100,
|
| 373 |
+
"num_input_tokens_seen": 0,
|
| 374 |
+
"num_train_epochs": 1,
|
| 375 |
+
"save_steps": 500,
|
| 376 |
+
"stateful_callbacks": {
|
| 377 |
+
"TrainerControl": {
|
| 378 |
+
"args": {
|
| 379 |
+
"should_epoch_stop": false,
|
| 380 |
+
"should_evaluate": false,
|
| 381 |
+
"should_log": false,
|
| 382 |
+
"should_save": false,
|
| 383 |
+
"should_training_stop": false
|
| 384 |
+
},
|
| 385 |
+
"attributes": {}
|
| 386 |
+
}
|
| 387 |
+
},
|
| 388 |
+
"total_flos": 7197827847421952.0,
|
| 389 |
+
"train_batch_size": 1,
|
| 390 |
+
"trial_name": null,
|
| 391 |
+
"trial_params": null
|
| 392 |
+
}
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37a302188073e933752c00de25e2203101e658a30dec07fdab4bd426265d5f2e
|
| 3 |
+
size 140898814
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d64dd65a260b5d8ba3a2efe7eece417e0404a651245f61ff8b0c983786841f6
|
| 3 |
+
size 140898814
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e07a0c4b4a5ae5e4161e9cef8f4d38901e2a913350ba5522a1d29d4212e1ffb1
|
| 3 |
+
size 140898814
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c4254a77f1aa1f047303e2aeaf86752a06c02eb5e52ec452b7fcc782f52b52d
|
| 3 |
+
size 140898814
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:219c8d3bd85819db008faa389480875901a7069c993bae6623244671878646de
|
| 3 |
+
size 140898410
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7063fbc35cc60e6bed46affd0034dc1ed7b58aff7ad80d34b2a8a99308364ea6
|
| 3 |
+
size 140898814
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:585947acf8c75eb8dde19ac6a1ed4cdbcb9a0d357157898aa9255c927d7c62ea
|
| 3 |
+
size 140898410
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e9b2c2bf6e18599c6416fab5f06330502ca9308ae9865311ebc522ed87568e55
|
| 3 |
+
size 140898410
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_trainer_state.json
ADDED
|
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 100,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.02,
|
| 13 |
+
"grad_norm": 2.9427311420440674,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 1.0288,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.04,
|
| 20 |
+
"grad_norm": 6.741145610809326,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 1.4761,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.06,
|
| 27 |
+
"grad_norm": 4.015233039855957,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 1.5796,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.08,
|
| 34 |
+
"grad_norm": 3.0968964099884033,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 1.0918,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.1,
|
| 41 |
+
"grad_norm": 2.706130027770996,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 1.0278,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.12,
|
| 48 |
+
"grad_norm": 2.9777791500091553,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 1.8856,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.14,
|
| 55 |
+
"grad_norm": 2.209172487258911,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 1.3796,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.16,
|
| 62 |
+
"grad_norm": 2.455867052078247,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 1.2993,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.18,
|
| 69 |
+
"grad_norm": 1.4126646518707275,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 1.2639,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.2,
|
| 76 |
+
"grad_norm": 4.840634346008301,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 1.4814,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.22,
|
| 83 |
+
"grad_norm": 2.0498416423797607,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 1.6443,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.24,
|
| 90 |
+
"grad_norm": 3.3178482055664062,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 1.3003,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.26,
|
| 97 |
+
"grad_norm": 3.242715358734131,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 1.4062,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.28,
|
| 104 |
+
"grad_norm": 2.3701682090759277,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 1.5181,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.3,
|
| 111 |
+
"grad_norm": 1.3877534866333008,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 1.5938,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.32,
|
| 118 |
+
"grad_norm": 1.308324933052063,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 1.5723,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.34,
|
| 125 |
+
"grad_norm": 2.3145534992218018,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 1.3368,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.36,
|
| 132 |
+
"grad_norm": 4.586231231689453,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 1.3999,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.38,
|
| 139 |
+
"grad_norm": 5.7817888259887695,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 1.4358,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.4,
|
| 146 |
+
"grad_norm": 1.2060853242874146,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 0.9374,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.42,
|
| 153 |
+
"grad_norm": 4.4397454261779785,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 1.1576,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.44,
|
| 160 |
+
"grad_norm": 1.8089556694030762,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 1.2778,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.46,
|
| 167 |
+
"grad_norm": 1.1753023862838745,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 1.3643,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.48,
|
| 174 |
+
"grad_norm": 1.98372483253479,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 1.4053,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.5,
|
| 181 |
+
"grad_norm": 1.529714584350586,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 1.4272,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.52,
|
| 188 |
+
"grad_norm": 4.703952312469482,
|
| 189 |
+
"learning_rate": 2e-05,
|
| 190 |
+
"loss": 1.6426,
|
| 191 |
+
"step": 52
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.54,
|
| 195 |
+
"grad_norm": 3.705620527267456,
|
| 196 |
+
"learning_rate": 2e-05,
|
| 197 |
+
"loss": 1.5771,
|
| 198 |
+
"step": 54
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.56,
|
| 202 |
+
"grad_norm": 1.1030988693237305,
|
| 203 |
+
"learning_rate": 2e-05,
|
| 204 |
+
"loss": 1.2424,
|
| 205 |
+
"step": 56
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.58,
|
| 209 |
+
"grad_norm": 2.4728143215179443,
|
| 210 |
+
"learning_rate": 2e-05,
|
| 211 |
+
"loss": 1.2578,
|
| 212 |
+
"step": 58
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.6,
|
| 216 |
+
"grad_norm": 1.7469069957733154,
|
| 217 |
+
"learning_rate": 2e-05,
|
| 218 |
+
"loss": 1.2402,
|
| 219 |
+
"step": 60
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.62,
|
| 223 |
+
"grad_norm": 2.4544708728790283,
|
| 224 |
+
"learning_rate": 2e-05,
|
| 225 |
+
"loss": 1.4639,
|
| 226 |
+
"step": 62
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.64,
|
| 230 |
+
"grad_norm": 1.6641395092010498,
|
| 231 |
+
"learning_rate": 2e-05,
|
| 232 |
+
"loss": 1.1909,
|
| 233 |
+
"step": 64
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.66,
|
| 237 |
+
"grad_norm": 2.0484120845794678,
|
| 238 |
+
"learning_rate": 2e-05,
|
| 239 |
+
"loss": 1.4282,
|
| 240 |
+
"step": 66
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.68,
|
| 244 |
+
"grad_norm": 1.478560447692871,
|
| 245 |
+
"learning_rate": 2e-05,
|
| 246 |
+
"loss": 1.3979,
|
| 247 |
+
"step": 68
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.7,
|
| 251 |
+
"grad_norm": 3.1397147178649902,
|
| 252 |
+
"learning_rate": 2e-05,
|
| 253 |
+
"loss": 1.5562,
|
| 254 |
+
"step": 70
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.72,
|
| 258 |
+
"grad_norm": 1.3987221717834473,
|
| 259 |
+
"learning_rate": 2e-05,
|
| 260 |
+
"loss": 1.4531,
|
| 261 |
+
"step": 72
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.74,
|
| 265 |
+
"grad_norm": 1.8414459228515625,
|
| 266 |
+
"learning_rate": 2e-05,
|
| 267 |
+
"loss": 1.4473,
|
| 268 |
+
"step": 74
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.76,
|
| 272 |
+
"grad_norm": 2.1021289825439453,
|
| 273 |
+
"learning_rate": 2e-05,
|
| 274 |
+
"loss": 1.3472,
|
| 275 |
+
"step": 76
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.78,
|
| 279 |
+
"grad_norm": 1.8407915830612183,
|
| 280 |
+
"learning_rate": 2e-05,
|
| 281 |
+
"loss": 1.2642,
|
| 282 |
+
"step": 78
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.8,
|
| 286 |
+
"grad_norm": 2.4831814765930176,
|
| 287 |
+
"learning_rate": 2e-05,
|
| 288 |
+
"loss": 1.3237,
|
| 289 |
+
"step": 80
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.82,
|
| 293 |
+
"grad_norm": 1.4579954147338867,
|
| 294 |
+
"learning_rate": 2e-05,
|
| 295 |
+
"loss": 1.1968,
|
| 296 |
+
"step": 82
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.84,
|
| 300 |
+
"grad_norm": 3.551758289337158,
|
| 301 |
+
"learning_rate": 2e-05,
|
| 302 |
+
"loss": 1.5654,
|
| 303 |
+
"step": 84
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.86,
|
| 307 |
+
"grad_norm": 2.751859188079834,
|
| 308 |
+
"learning_rate": 2e-05,
|
| 309 |
+
"loss": 1.2002,
|
| 310 |
+
"step": 86
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.88,
|
| 314 |
+
"grad_norm": 2.306459903717041,
|
| 315 |
+
"learning_rate": 2e-05,
|
| 316 |
+
"loss": 1.3477,
|
| 317 |
+
"step": 88
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.9,
|
| 321 |
+
"grad_norm": 2.63804292678833,
|
| 322 |
+
"learning_rate": 2e-05,
|
| 323 |
+
"loss": 1.4023,
|
| 324 |
+
"step": 90
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 0.92,
|
| 328 |
+
"grad_norm": 2.2834153175354004,
|
| 329 |
+
"learning_rate": 2e-05,
|
| 330 |
+
"loss": 1.3506,
|
| 331 |
+
"step": 92
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 0.94,
|
| 335 |
+
"grad_norm": 4.944771766662598,
|
| 336 |
+
"learning_rate": 2e-05,
|
| 337 |
+
"loss": 1.627,
|
| 338 |
+
"step": 94
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"epoch": 0.96,
|
| 342 |
+
"grad_norm": 1.705222249031067,
|
| 343 |
+
"learning_rate": 2e-05,
|
| 344 |
+
"loss": 1.2178,
|
| 345 |
+
"step": 96
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"epoch": 0.98,
|
| 349 |
+
"grad_norm": 2.038778305053711,
|
| 350 |
+
"learning_rate": 2e-05,
|
| 351 |
+
"loss": 1.3418,
|
| 352 |
+
"step": 98
|
| 353 |
+
},
|
| 354 |
+
{
|
| 355 |
+
"epoch": 1.0,
|
| 356 |
+
"grad_norm": 2.250809907913208,
|
| 357 |
+
"learning_rate": 2e-05,
|
| 358 |
+
"loss": 1.2407,
|
| 359 |
+
"step": 100
|
| 360 |
+
},
|
| 361 |
+
{
|
| 362 |
+
"epoch": 1.0,
|
| 363 |
+
"step": 100,
|
| 364 |
+
"total_flos": 2949501438394368.0,
|
| 365 |
+
"train_loss": 1.3722802734375,
|
| 366 |
+
"train_runtime": 245.4499,
|
| 367 |
+
"train_samples_per_second": 1.63,
|
| 368 |
+
"train_steps_per_second": 0.407
|
| 369 |
+
}
|
| 370 |
+
],
|
| 371 |
+
"logging_steps": 2,
|
| 372 |
+
"max_steps": 100,
|
| 373 |
+
"num_input_tokens_seen": 0,
|
| 374 |
+
"num_train_epochs": 1,
|
| 375 |
+
"save_steps": 500,
|
| 376 |
+
"stateful_callbacks": {
|
| 377 |
+
"TrainerControl": {
|
| 378 |
+
"args": {
|
| 379 |
+
"should_epoch_stop": false,
|
| 380 |
+
"should_evaluate": false,
|
| 381 |
+
"should_log": false,
|
| 382 |
+
"should_save": false,
|
| 383 |
+
"should_training_stop": false
|
| 384 |
+
},
|
| 385 |
+
"attributes": {}
|
| 386 |
+
}
|
| 387 |
+
},
|
| 388 |
+
"total_flos": 2949501438394368.0,
|
| 389 |
+
"train_batch_size": 1,
|
| 390 |
+
"trial_name": null,
|
| 391 |
+
"trial_params": null
|
| 392 |
+
}
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7862e641bae9b1c1ff58cceb03a83630ad7cf69a12c1166502c070f23b2d02d9
|
| 3 |
+
size 479128390
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e894638477fe4d5df5e29deebcb475380243f8770751c46fc8c27c5353930445
|
| 3 |
+
size 479128390
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5feb2ae3f928072c4ee2154140d7034edc181a88544b9fd3e627d79ddbd1807e
|
| 3 |
+
size 479128390
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b772019cbba5031146eb8804cb175b47ef30a03774f0dd273bfd862b6c5ba52
|
| 3 |
+
size 479128390
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:785ce1c8ffeb43b8dee5b41dd20edbb058f523c2db0ed6b8af8ffda877b8b1e6
|
| 3 |
+
size 479127818
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06ba3ab83896fcaa00d07ee5061d394d33a34434d3f42e173b68f0a154d8472a
|
| 3 |
+
size 479128390
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef05687dd4b30bcee4fdd616997a10b003f3ef39210a5ac61b153007d271fd50
|
| 3 |
+
size 479127818
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f3f8c6de60f28ae04a762180348c698a8c16fccd95e77755909e614b573f4117
|
| 3 |
+
size 479127818
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_trainer_state.json
ADDED
|
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 100,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.02,
|
| 13 |
+
"grad_norm": 3.2558865547180176,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.9064,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.04,
|
| 20 |
+
"grad_norm": 0.991973340511322,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 0.1233,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.06,
|
| 27 |
+
"grad_norm": 6.5367913246154785,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 1.9525,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.08,
|
| 34 |
+
"grad_norm": 5.972145080566406,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 1.1308,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.1,
|
| 41 |
+
"grad_norm": 5.799992561340332,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 0.7781,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.12,
|
| 48 |
+
"grad_norm": 1.8599284887313843,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.2985,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.14,
|
| 55 |
+
"grad_norm": 3.0901894569396973,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 1.287,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.16,
|
| 62 |
+
"grad_norm": 3.8889167308807373,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.7026,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.18,
|
| 69 |
+
"grad_norm": 1.7417221069335938,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.2317,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.2,
|
| 76 |
+
"grad_norm": 0.34332019090652466,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.3007,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.22,
|
| 83 |
+
"grad_norm": 3.8413283824920654,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.5139,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.24,
|
| 90 |
+
"grad_norm": 5.948110103607178,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 1.0095,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.26,
|
| 97 |
+
"grad_norm": 1.9708259105682373,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 1.0875,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.28,
|
| 104 |
+
"grad_norm": 6.128609657287598,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 1.1535,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.3,
|
| 111 |
+
"grad_norm": 2.5443027019500732,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 0.2954,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.32,
|
| 118 |
+
"grad_norm": 1.782371997833252,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.2122,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.34,
|
| 125 |
+
"grad_norm": 0.9455428123474121,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.4753,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.36,
|
| 132 |
+
"grad_norm": 5.72688627243042,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.8696,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.38,
|
| 139 |
+
"grad_norm": 2.12677001953125,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 0.4472,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.4,
|
| 146 |
+
"grad_norm": 1.0864536762237549,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 0.2052,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.42,
|
| 153 |
+
"grad_norm": 3.737551212310791,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.422,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.44,
|
| 160 |
+
"grad_norm": 2.5829198360443115,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 0.5197,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.46,
|
| 167 |
+
"grad_norm": 2.83404278755188,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 0.3223,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.48,
|
| 174 |
+
"grad_norm": 2.3315060138702393,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 0.5508,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.5,
|
| 181 |
+
"grad_norm": 0.9215158224105835,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 0.1513,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.52,
|
| 188 |
+
"grad_norm": 5.178053379058838,
|
| 189 |
+
"learning_rate": 2e-05,
|
| 190 |
+
"loss": 0.7244,
|
| 191 |
+
"step": 52
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.54,
|
| 195 |
+
"grad_norm": 6.672908782958984,
|
| 196 |
+
"learning_rate": 2e-05,
|
| 197 |
+
"loss": 0.8517,
|
| 198 |
+
"step": 54
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.56,
|
| 202 |
+
"grad_norm": 6.269854545593262,
|
| 203 |
+
"learning_rate": 2e-05,
|
| 204 |
+
"loss": 2.2648,
|
| 205 |
+
"step": 56
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.58,
|
| 209 |
+
"grad_norm": 8.046138763427734,
|
| 210 |
+
"learning_rate": 2e-05,
|
| 211 |
+
"loss": 1.1398,
|
| 212 |
+
"step": 58
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.6,
|
| 216 |
+
"grad_norm": 5.5552077293396,
|
| 217 |
+
"learning_rate": 2e-05,
|
| 218 |
+
"loss": 1.1731,
|
| 219 |
+
"step": 60
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.62,
|
| 223 |
+
"grad_norm": 8.006683349609375,
|
| 224 |
+
"learning_rate": 2e-05,
|
| 225 |
+
"loss": 0.9849,
|
| 226 |
+
"step": 62
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.64,
|
| 230 |
+
"grad_norm": 9.458540916442871,
|
| 231 |
+
"learning_rate": 2e-05,
|
| 232 |
+
"loss": 0.6968,
|
| 233 |
+
"step": 64
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.66,
|
| 237 |
+
"grad_norm": 3.7672533988952637,
|
| 238 |
+
"learning_rate": 2e-05,
|
| 239 |
+
"loss": 1.579,
|
| 240 |
+
"step": 66
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.68,
|
| 244 |
+
"grad_norm": 3.504437208175659,
|
| 245 |
+
"learning_rate": 2e-05,
|
| 246 |
+
"loss": 0.4301,
|
| 247 |
+
"step": 68
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.7,
|
| 251 |
+
"grad_norm": 5.53498649597168,
|
| 252 |
+
"learning_rate": 2e-05,
|
| 253 |
+
"loss": 0.7245,
|
| 254 |
+
"step": 70
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.72,
|
| 258 |
+
"grad_norm": 2.9111721515655518,
|
| 259 |
+
"learning_rate": 2e-05,
|
| 260 |
+
"loss": 0.7371,
|
| 261 |
+
"step": 72
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.74,
|
| 265 |
+
"grad_norm": 8.062969207763672,
|
| 266 |
+
"learning_rate": 2e-05,
|
| 267 |
+
"loss": 0.7654,
|
| 268 |
+
"step": 74
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.76,
|
| 272 |
+
"grad_norm": 7.899918079376221,
|
| 273 |
+
"learning_rate": 2e-05,
|
| 274 |
+
"loss": 1.6479,
|
| 275 |
+
"step": 76
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.78,
|
| 279 |
+
"grad_norm": 1.5469361543655396,
|
| 280 |
+
"learning_rate": 2e-05,
|
| 281 |
+
"loss": 0.7988,
|
| 282 |
+
"step": 78
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.8,
|
| 286 |
+
"grad_norm": 11.664484024047852,
|
| 287 |
+
"learning_rate": 2e-05,
|
| 288 |
+
"loss": 1.4557,
|
| 289 |
+
"step": 80
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.82,
|
| 293 |
+
"grad_norm": 0.4525814354419708,
|
| 294 |
+
"learning_rate": 2e-05,
|
| 295 |
+
"loss": 0.4412,
|
| 296 |
+
"step": 82
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.84,
|
| 300 |
+
"grad_norm": 5.048192977905273,
|
| 301 |
+
"learning_rate": 2e-05,
|
| 302 |
+
"loss": 0.5993,
|
| 303 |
+
"step": 84
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.86,
|
| 307 |
+
"grad_norm": 3.5683045387268066,
|
| 308 |
+
"learning_rate": 2e-05,
|
| 309 |
+
"loss": 0.6205,
|
| 310 |
+
"step": 86
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.88,
|
| 314 |
+
"grad_norm": 4.712682723999023,
|
| 315 |
+
"learning_rate": 2e-05,
|
| 316 |
+
"loss": 0.4364,
|
| 317 |
+
"step": 88
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.9,
|
| 321 |
+
"grad_norm": 9.933538436889648,
|
| 322 |
+
"learning_rate": 2e-05,
|
| 323 |
+
"loss": 1.3851,
|
| 324 |
+
"step": 90
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 0.92,
|
| 328 |
+
"grad_norm": 4.783173084259033,
|
| 329 |
+
"learning_rate": 2e-05,
|
| 330 |
+
"loss": 0.4456,
|
| 331 |
+
"step": 92
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 0.94,
|
| 335 |
+
"grad_norm": 2.395021915435791,
|
| 336 |
+
"learning_rate": 2e-05,
|
| 337 |
+
"loss": 0.7315,
|
| 338 |
+
"step": 94
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"epoch": 0.96,
|
| 342 |
+
"grad_norm": 3.892876148223877,
|
| 343 |
+
"learning_rate": 2e-05,
|
| 344 |
+
"loss": 0.8238,
|
| 345 |
+
"step": 96
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"epoch": 0.98,
|
| 349 |
+
"grad_norm": 3.3703393936157227,
|
| 350 |
+
"learning_rate": 2e-05,
|
| 351 |
+
"loss": 0.3309,
|
| 352 |
+
"step": 98
|
| 353 |
+
},
|
| 354 |
+
{
|
| 355 |
+
"epoch": 1.0,
|
| 356 |
+
"grad_norm": 0.7113988995552063,
|
| 357 |
+
"learning_rate": 2e-05,
|
| 358 |
+
"loss": 0.8155,
|
| 359 |
+
"step": 100
|
| 360 |
+
},
|
| 361 |
+
{
|
| 362 |
+
"epoch": 1.0,
|
| 363 |
+
"step": 100,
|
| 364 |
+
"total_flos": 1.570782887018496e+16,
|
| 365 |
+
"train_loss": 0.7710115814208984,
|
| 366 |
+
"train_runtime": 425.933,
|
| 367 |
+
"train_samples_per_second": 0.939,
|
| 368 |
+
"train_steps_per_second": 0.235
|
| 369 |
+
}
|
| 370 |
+
],
|
| 371 |
+
"logging_steps": 2,
|
| 372 |
+
"max_steps": 100,
|
| 373 |
+
"num_input_tokens_seen": 0,
|
| 374 |
+
"num_train_epochs": 1,
|
| 375 |
+
"save_steps": 500,
|
| 376 |
+
"stateful_callbacks": {
|
| 377 |
+
"TrainerControl": {
|
| 378 |
+
"args": {
|
| 379 |
+
"should_epoch_stop": false,
|
| 380 |
+
"should_evaluate": false,
|
| 381 |
+
"should_log": false,
|
| 382 |
+
"should_save": false,
|
| 383 |
+
"should_training_stop": false
|
| 384 |
+
},
|
| 385 |
+
"attributes": {}
|
| 386 |
+
}
|
| 387 |
+
},
|
| 388 |
+
"total_flos": 1.570782887018496e+16,
|
| 389 |
+
"train_batch_size": 1,
|
| 390 |
+
"trial_name": null,
|
| 391 |
+
"trial_params": null
|
| 392 |
+
}
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b930dbca6b179a5bc29fc350fc47779afbb1a26cac6de123dcc4a530f9c5359
|
| 3 |
+
size 479128390
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:15e03e5d95385f51204779d8e410237239d9cd5c50011063fc3ce4a6655e6982
|
| 3 |
+
size 479128390
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6496615d1b538df97655e76e86eeec11e6de6e8a873239cc7fa24813a095cc1b
|
| 3 |
+
size 479128390
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7407557f6cbc453acb51d7a5b78c207e88530b905b32608fadf4aa968eb6181e
|
| 3 |
+
size 479128390
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04c1c3f23467e3dfff4f5e902e3447f7cee591eb9358914edfa4c90bfc14e000
|
| 3 |
+
size 479127818
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b41a2866ab605fe35c2c230b5f3194131b98959bd35153dd1faf39913ad83748
|
| 3 |
+
size 479128390
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:050b3b536ac4f0a5278377639ee83e7c7523c352c429c8f76b48c25936aacd69
|
| 3 |
+
size 479127818
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f060f4423b7a30f309ab6cc75bfa0e53766a36e5cba4d26b68665a17c4879ac
|
| 3 |
+
size 479127818
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_trainer_state.json
ADDED
|
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 100,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.02,
|
| 13 |
+
"grad_norm": 0.6915289759635925,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.803,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.04,
|
| 20 |
+
"grad_norm": 0.5236664414405823,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 0.3719,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.06,
|
| 27 |
+
"grad_norm": 0.7289407849311829,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 0.6735,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.08,
|
| 34 |
+
"grad_norm": 0.04733162373304367,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.3086,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.1,
|
| 41 |
+
"grad_norm": 0.7653596997261047,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 0.2241,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.12,
|
| 48 |
+
"grad_norm": 4.275763511657715,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.8208,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.14,
|
| 55 |
+
"grad_norm": 0.8538552522659302,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.806,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.16,
|
| 62 |
+
"grad_norm": 2.4601213932037354,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.7453,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.18,
|
| 69 |
+
"grad_norm": 1.620142936706543,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.3818,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.2,
|
| 76 |
+
"grad_norm": 4.4771318435668945,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.7292,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.22,
|
| 83 |
+
"grad_norm": 0.6303713321685791,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.3592,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.24,
|
| 90 |
+
"grad_norm": 3.111276865005493,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 0.8449,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.26,
|
| 97 |
+
"grad_norm": 1.3116923570632935,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 0.7724,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.28,
|
| 104 |
+
"grad_norm": 7.254108428955078,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 0.6019,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.3,
|
| 111 |
+
"grad_norm": 1.0935317277908325,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 0.3723,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.32,
|
| 118 |
+
"grad_norm": 0.6438285708427429,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.4673,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.34,
|
| 125 |
+
"grad_norm": 9.997159957885742,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 1.4406,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.36,
|
| 132 |
+
"grad_norm": 1.574639916419983,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.8156,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.38,
|
| 139 |
+
"grad_norm": 2.7990269660949707,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 0.9997,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.4,
|
| 146 |
+
"grad_norm": 2.4288394451141357,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 1.0895,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.42,
|
| 153 |
+
"grad_norm": 0.751030445098877,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.5163,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.44,
|
| 160 |
+
"grad_norm": 1.5789567232131958,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 0.4597,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.46,
|
| 167 |
+
"grad_norm": 1.716470718383789,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 0.5985,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.48,
|
| 174 |
+
"grad_norm": 1.0268652439117432,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 0.8839,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.5,
|
| 181 |
+
"grad_norm": 3.183387041091919,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 0.635,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.52,
|
| 188 |
+
"grad_norm": 1.6883171796798706,
|
| 189 |
+
"learning_rate": 2e-05,
|
| 190 |
+
"loss": 0.8253,
|
| 191 |
+
"step": 52
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.54,
|
| 195 |
+
"grad_norm": 6.081538677215576,
|
| 196 |
+
"learning_rate": 2e-05,
|
| 197 |
+
"loss": 1.7804,
|
| 198 |
+
"step": 54
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.56,
|
| 202 |
+
"grad_norm": 4.235260963439941,
|
| 203 |
+
"learning_rate": 2e-05,
|
| 204 |
+
"loss": 0.5859,
|
| 205 |
+
"step": 56
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.58,
|
| 209 |
+
"grad_norm": 4.297282695770264,
|
| 210 |
+
"learning_rate": 2e-05,
|
| 211 |
+
"loss": 1.0606,
|
| 212 |
+
"step": 58
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.6,
|
| 216 |
+
"grad_norm": 0.6412908434867859,
|
| 217 |
+
"learning_rate": 2e-05,
|
| 218 |
+
"loss": 0.2726,
|
| 219 |
+
"step": 60
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.62,
|
| 223 |
+
"grad_norm": 0.3413950204849243,
|
| 224 |
+
"learning_rate": 2e-05,
|
| 225 |
+
"loss": 0.3853,
|
| 226 |
+
"step": 62
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.64,
|
| 230 |
+
"grad_norm": 6.513116836547852,
|
| 231 |
+
"learning_rate": 2e-05,
|
| 232 |
+
"loss": 0.7348,
|
| 233 |
+
"step": 64
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.66,
|
| 237 |
+
"grad_norm": 0.7780243158340454,
|
| 238 |
+
"learning_rate": 2e-05,
|
| 239 |
+
"loss": 0.8995,
|
| 240 |
+
"step": 66
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.68,
|
| 244 |
+
"grad_norm": 0.6931712031364441,
|
| 245 |
+
"learning_rate": 2e-05,
|
| 246 |
+
"loss": 0.6984,
|
| 247 |
+
"step": 68
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.7,
|
| 251 |
+
"grad_norm": 5.796865940093994,
|
| 252 |
+
"learning_rate": 2e-05,
|
| 253 |
+
"loss": 0.6069,
|
| 254 |
+
"step": 70
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.72,
|
| 258 |
+
"grad_norm": 1.1728687286376953,
|
| 259 |
+
"learning_rate": 2e-05,
|
| 260 |
+
"loss": 0.7387,
|
| 261 |
+
"step": 72
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.74,
|
| 265 |
+
"grad_norm": 0.4009567201137543,
|
| 266 |
+
"learning_rate": 2e-05,
|
| 267 |
+
"loss": 0.5621,
|
| 268 |
+
"step": 74
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.76,
|
| 272 |
+
"grad_norm": 2.6604442596435547,
|
| 273 |
+
"learning_rate": 2e-05,
|
| 274 |
+
"loss": 0.8287,
|
| 275 |
+
"step": 76
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.78,
|
| 279 |
+
"grad_norm": 1.2604601383209229,
|
| 280 |
+
"learning_rate": 2e-05,
|
| 281 |
+
"loss": 0.7813,
|
| 282 |
+
"step": 78
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.8,
|
| 286 |
+
"grad_norm": 0.5949382185935974,
|
| 287 |
+
"learning_rate": 2e-05,
|
| 288 |
+
"loss": 0.2305,
|
| 289 |
+
"step": 80
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.82,
|
| 293 |
+
"grad_norm": 3.986330986022949,
|
| 294 |
+
"learning_rate": 2e-05,
|
| 295 |
+
"loss": 1.0291,
|
| 296 |
+
"step": 82
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.84,
|
| 300 |
+
"grad_norm": 1.1950682401657104,
|
| 301 |
+
"learning_rate": 2e-05,
|
| 302 |
+
"loss": 0.5847,
|
| 303 |
+
"step": 84
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.86,
|
| 307 |
+
"grad_norm": 2.359482526779175,
|
| 308 |
+
"learning_rate": 2e-05,
|
| 309 |
+
"loss": 0.4699,
|
| 310 |
+
"step": 86
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.88,
|
| 314 |
+
"grad_norm": 0.38797926902770996,
|
| 315 |
+
"learning_rate": 2e-05,
|
| 316 |
+
"loss": 0.3318,
|
| 317 |
+
"step": 88
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.9,
|
| 321 |
+
"grad_norm": 2.0094640254974365,
|
| 322 |
+
"learning_rate": 2e-05,
|
| 323 |
+
"loss": 0.7107,
|
| 324 |
+
"step": 90
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 0.92,
|
| 328 |
+
"grad_norm": 10.679686546325684,
|
| 329 |
+
"learning_rate": 2e-05,
|
| 330 |
+
"loss": 1.1031,
|
| 331 |
+
"step": 92
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 0.94,
|
| 335 |
+
"grad_norm": 1.3764183521270752,
|
| 336 |
+
"learning_rate": 2e-05,
|
| 337 |
+
"loss": 0.2138,
|
| 338 |
+
"step": 94
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"epoch": 0.96,
|
| 342 |
+
"grad_norm": 17.799694061279297,
|
| 343 |
+
"learning_rate": 2e-05,
|
| 344 |
+
"loss": 0.6659,
|
| 345 |
+
"step": 96
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"epoch": 0.98,
|
| 349 |
+
"grad_norm": 0.7701702117919922,
|
| 350 |
+
"learning_rate": 2e-05,
|
| 351 |
+
"loss": 0.6446,
|
| 352 |
+
"step": 98
|
| 353 |
+
},
|
| 354 |
+
{
|
| 355 |
+
"epoch": 1.0,
|
| 356 |
+
"grad_norm": 2.7060821056365967,
|
| 357 |
+
"learning_rate": 2e-05,
|
| 358 |
+
"loss": 0.7716,
|
| 359 |
+
"step": 100
|
| 360 |
+
},
|
| 361 |
+
{
|
| 362 |
+
"epoch": 1.0,
|
| 363 |
+
"step": 100,
|
| 364 |
+
"total_flos": 2.014860909792461e+16,
|
| 365 |
+
"train_loss": 0.6847475278377533,
|
| 366 |
+
"train_runtime": 438.6106,
|
| 367 |
+
"train_samples_per_second": 0.912,
|
| 368 |
+
"train_steps_per_second": 0.228
|
| 369 |
+
}
|
| 370 |
+
],
|
| 371 |
+
"logging_steps": 2,
|
| 372 |
+
"max_steps": 100,
|
| 373 |
+
"num_input_tokens_seen": 0,
|
| 374 |
+
"num_train_epochs": 1,
|
| 375 |
+
"save_steps": 500,
|
| 376 |
+
"stateful_callbacks": {
|
| 377 |
+
"TrainerControl": {
|
| 378 |
+
"args": {
|
| 379 |
+
"should_epoch_stop": false,
|
| 380 |
+
"should_evaluate": false,
|
| 381 |
+
"should_log": false,
|
| 382 |
+
"should_save": false,
|
| 383 |
+
"should_training_stop": false
|
| 384 |
+
},
|
| 385 |
+
"attributes": {}
|
| 386 |
+
}
|
| 387 |
+
},
|
| 388 |
+
"total_flos": 2.014860909792461e+16,
|
| 389 |
+
"train_batch_size": 1,
|
| 390 |
+
"trial_name": null,
|
| 391 |
+
"trial_params": null
|
| 392 |
+
}
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2ae1dcfb75d581b07865f7e719852e53c0313715a95546499deeb3abd5356736
|
| 3 |
+
size 479128390
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff62f62346aae26510c829ce3a4dae85ad0d6a1845876f1ce707c7a9bcdc04bb
|
| 3 |
+
size 479128390
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6fed039615c8f70ef8fb5cdfb55d6628f5a5e1f903d0fe2965d9f5130ec00013
|
| 3 |
+
size 479128390
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c6a09338731636e04d275069a3942e655fb868bcc9839f6cd926e930e422aa74
|
| 3 |
+
size 479128390
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a2bc2093a3f74d5610009eeb3c573f6e2a18d53c23f42b5915b97aa22cefeb78
|
| 3 |
+
size 479127818
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8527a6d3e15d2942d59d56a7e75275cef893892fbdfc47601bb38b5e5ae5e0d4
|
| 3 |
+
size 479128390
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:270f1fed8bb31676ab501726cee5078fbbff91f34854f365ab9545ffe56e3177
|
| 3 |
+
size 479127818
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d4d09f687a822b4962a11c0546b789db3d0b1e2d02fb5fa81437bb1c01f7181e
|
| 3 |
+
size 479127818
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_trainer_state.json
ADDED
|
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 100,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.02,
|
| 13 |
+
"grad_norm": 0.9225918054580688,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.991,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.04,
|
| 20 |
+
"grad_norm": 0.8597076535224915,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 0.3758,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.06,
|
| 27 |
+
"grad_norm": 0.3333783745765686,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 0.5362,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.08,
|
| 34 |
+
"grad_norm": 0.5292944312095642,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.7394,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.1,
|
| 41 |
+
"grad_norm": 4.7009687423706055,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 1.8573,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.12,
|
| 48 |
+
"grad_norm": 1.600105881690979,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.8488,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.14,
|
| 55 |
+
"grad_norm": 1.5355629920959473,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.4846,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.16,
|
| 62 |
+
"grad_norm": 3.030879259109497,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.5453,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.18,
|
| 69 |
+
"grad_norm": 0.8978800177574158,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.4633,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.2,
|
| 76 |
+
"grad_norm": 1.1896603107452393,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.7015,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.22,
|
| 83 |
+
"grad_norm": 2.6468939781188965,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.4669,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.24,
|
| 90 |
+
"grad_norm": 4.17122745513916,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 1.0091,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.26,
|
| 97 |
+
"grad_norm": 4.072269439697266,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 2.0015,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.28,
|
| 104 |
+
"grad_norm": 4.218262195587158,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 0.7374,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.3,
|
| 111 |
+
"grad_norm": 2.5141124725341797,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 1.726,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.32,
|
| 118 |
+
"grad_norm": 1.6296266317367554,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.5757,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.34,
|
| 125 |
+
"grad_norm": 2.733184337615967,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.3487,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.36,
|
| 132 |
+
"grad_norm": 0.812718391418457,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.2033,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.38,
|
| 139 |
+
"grad_norm": 2.641838788986206,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 1.0014,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.4,
|
| 146 |
+
"grad_norm": 2.547071933746338,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 0.6443,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.42,
|
| 153 |
+
"grad_norm": 5.26523494720459,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 1.9649,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.44,
|
| 160 |
+
"grad_norm": 2.6201882362365723,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 0.479,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.46,
|
| 167 |
+
"grad_norm": 3.8259027004241943,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 0.6319,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.48,
|
| 174 |
+
"grad_norm": 7.226994037628174,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 0.7662,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.5,
|
| 181 |
+
"grad_norm": 1.9887053966522217,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 1.0707,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.52,
|
| 188 |
+
"grad_norm": 2.292367696762085,
|
| 189 |
+
"learning_rate": 2e-05,
|
| 190 |
+
"loss": 0.4642,
|
| 191 |
+
"step": 52
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.54,
|
| 195 |
+
"grad_norm": 5.1545867919921875,
|
| 196 |
+
"learning_rate": 2e-05,
|
| 197 |
+
"loss": 0.9666,
|
| 198 |
+
"step": 54
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.56,
|
| 202 |
+
"grad_norm": 3.924346685409546,
|
| 203 |
+
"learning_rate": 2e-05,
|
| 204 |
+
"loss": 1.8803,
|
| 205 |
+
"step": 56
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.58,
|
| 209 |
+
"grad_norm": 3.1195952892303467,
|
| 210 |
+
"learning_rate": 2e-05,
|
| 211 |
+
"loss": 1.1754,
|
| 212 |
+
"step": 58
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.6,
|
| 216 |
+
"grad_norm": 3.6606249809265137,
|
| 217 |
+
"learning_rate": 2e-05,
|
| 218 |
+
"loss": 1.0212,
|
| 219 |
+
"step": 60
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.62,
|
| 223 |
+
"grad_norm": 1.3416082859039307,
|
| 224 |
+
"learning_rate": 2e-05,
|
| 225 |
+
"loss": 0.8757,
|
| 226 |
+
"step": 62
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.64,
|
| 230 |
+
"grad_norm": 3.2015907764434814,
|
| 231 |
+
"learning_rate": 2e-05,
|
| 232 |
+
"loss": 0.4014,
|
| 233 |
+
"step": 64
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.66,
|
| 237 |
+
"grad_norm": 2.0905282497406006,
|
| 238 |
+
"learning_rate": 2e-05,
|
| 239 |
+
"loss": 0.3452,
|
| 240 |
+
"step": 66
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.68,
|
| 244 |
+
"grad_norm": 7.018588542938232,
|
| 245 |
+
"learning_rate": 2e-05,
|
| 246 |
+
"loss": 1.2981,
|
| 247 |
+
"step": 68
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.7,
|
| 251 |
+
"grad_norm": 4.384774684906006,
|
| 252 |
+
"learning_rate": 2e-05,
|
| 253 |
+
"loss": 0.7799,
|
| 254 |
+
"step": 70
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.72,
|
| 258 |
+
"grad_norm": 4.2297210693359375,
|
| 259 |
+
"learning_rate": 2e-05,
|
| 260 |
+
"loss": 0.4246,
|
| 261 |
+
"step": 72
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.74,
|
| 265 |
+
"grad_norm": 2.036142349243164,
|
| 266 |
+
"learning_rate": 2e-05,
|
| 267 |
+
"loss": 0.1518,
|
| 268 |
+
"step": 74
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.76,
|
| 272 |
+
"grad_norm": 0.1399793028831482,
|
| 273 |
+
"learning_rate": 2e-05,
|
| 274 |
+
"loss": 0.1056,
|
| 275 |
+
"step": 76
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.78,
|
| 279 |
+
"grad_norm": 9.0904541015625,
|
| 280 |
+
"learning_rate": 2e-05,
|
| 281 |
+
"loss": 1.3569,
|
| 282 |
+
"step": 78
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.8,
|
| 286 |
+
"grad_norm": 4.4876017570495605,
|
| 287 |
+
"learning_rate": 2e-05,
|
| 288 |
+
"loss": 1.7918,
|
| 289 |
+
"step": 80
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.82,
|
| 293 |
+
"grad_norm": 1.1255276203155518,
|
| 294 |
+
"learning_rate": 2e-05,
|
| 295 |
+
"loss": 0.279,
|
| 296 |
+
"step": 82
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.84,
|
| 300 |
+
"grad_norm": 5.206053256988525,
|
| 301 |
+
"learning_rate": 2e-05,
|
| 302 |
+
"loss": 1.561,
|
| 303 |
+
"step": 84
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.86,
|
| 307 |
+
"grad_norm": 0.1543315351009369,
|
| 308 |
+
"learning_rate": 2e-05,
|
| 309 |
+
"loss": 0.0126,
|
| 310 |
+
"step": 86
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.88,
|
| 314 |
+
"grad_norm": 2.7004189491271973,
|
| 315 |
+
"learning_rate": 2e-05,
|
| 316 |
+
"loss": 0.7089,
|
| 317 |
+
"step": 88
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.9,
|
| 321 |
+
"grad_norm": 1.0638126134872437,
|
| 322 |
+
"learning_rate": 2e-05,
|
| 323 |
+
"loss": 0.5882,
|
| 324 |
+
"step": 90
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 0.92,
|
| 328 |
+
"grad_norm": 2.7447102069854736,
|
| 329 |
+
"learning_rate": 2e-05,
|
| 330 |
+
"loss": 0.5941,
|
| 331 |
+
"step": 92
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 0.94,
|
| 335 |
+
"grad_norm": 2.824549674987793,
|
| 336 |
+
"learning_rate": 2e-05,
|
| 337 |
+
"loss": 1.5416,
|
| 338 |
+
"step": 94
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"epoch": 0.96,
|
| 342 |
+
"grad_norm": 0.7135050296783447,
|
| 343 |
+
"learning_rate": 2e-05,
|
| 344 |
+
"loss": 0.5581,
|
| 345 |
+
"step": 96
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"epoch": 0.98,
|
| 349 |
+
"grad_norm": 2.0467679500579834,
|
| 350 |
+
"learning_rate": 2e-05,
|
| 351 |
+
"loss": 0.6242,
|
| 352 |
+
"step": 98
|
| 353 |
+
},
|
| 354 |
+
{
|
| 355 |
+
"epoch": 1.0,
|
| 356 |
+
"grad_norm": 2.1278626918792725,
|
| 357 |
+
"learning_rate": 2e-05,
|
| 358 |
+
"loss": 0.4743,
|
| 359 |
+
"step": 100
|
| 360 |
+
},
|
| 361 |
+
{
|
| 362 |
+
"epoch": 1.0,
|
| 363 |
+
"step": 100,
|
| 364 |
+
"total_flos": 2.015594703277261e+16,
|
| 365 |
+
"train_loss": 0.8230290794372559,
|
| 366 |
+
"train_runtime": 469.2451,
|
| 367 |
+
"train_samples_per_second": 0.852,
|
| 368 |
+
"train_steps_per_second": 0.213
|
| 369 |
+
}
|
| 370 |
+
],
|
| 371 |
+
"logging_steps": 2,
|
| 372 |
+
"max_steps": 100,
|
| 373 |
+
"num_input_tokens_seen": 0,
|
| 374 |
+
"num_train_epochs": 1,
|
| 375 |
+
"save_steps": 500,
|
| 376 |
+
"stateful_callbacks": {
|
| 377 |
+
"TrainerControl": {
|
| 378 |
+
"args": {
|
| 379 |
+
"should_epoch_stop": false,
|
| 380 |
+
"should_evaluate": false,
|
| 381 |
+
"should_log": false,
|
| 382 |
+
"should_save": false,
|
| 383 |
+
"should_training_stop": false
|
| 384 |
+
},
|
| 385 |
+
"attributes": {}
|
| 386 |
+
}
|
| 387 |
+
},
|
| 388 |
+
"total_flos": 2.015594703277261e+16,
|
| 389 |
+
"train_batch_size": 1,
|
| 390 |
+
"trial_name": null,
|
| 391 |
+
"trial_params": null
|
| 392 |
+
}
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/5_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c511c59b784a0f74f1dde51e0b2080a498cdc8e50966b14aa71fbae0f831686
|
| 3 |
+
size 295585110
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/5_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:57e78f755ed86c7ee2e85264e01afa0586b716855ee8452edb1ca788b22bf283
|
| 3 |
+
size 295585110
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/5_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e717534fafe38840fb8c361d11d64541821aebf3cdd6f39e1f77013437871306
|
| 3 |
+
size 295585110
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/5_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ac8da32902dca181676ea7e5c2f04894bf057c516027b502d39bce2e20e08b2
|
| 3 |
+
size 295585110
|
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/5_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b5157aa89baeeacaabdfccd8363baebbaf64e31b05696aa6d88d8bc57449b07
|
| 3 |
+
size 295584650
|