Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round10.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round12.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round15.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round17.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round2.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round20.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round5.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round7.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_trainer_state.json +392 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round10.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round12.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round15.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round17.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round2.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round20.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round5.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round7.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_trainer_state.json +392 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round10.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round12.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round15.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round17.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round2.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round20.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round5.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round7.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_trainer_state.json +392 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round10.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round12.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round15.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round17.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round2.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round20.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round5.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round7.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_trainer_state.json +392 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round10.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round12.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round15.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round17.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round2.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round20.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round5.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round7.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_trainer_state.json +392 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round10.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round12.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round15.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round17.pth +3 -0
- client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round2.pth +3 -0
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f1cca839a6000b85230580c3cb523ef92e5c03763db124a522f8c7bb88607aee
|
| 3 |
+
size 180440142
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bf9d18350e055b39ca5dad52b58bbf3e7c2e287f14d70e917e7236648c476bec
|
| 3 |
+
size 180440142
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae408b7c41f30c80ee1fdb5b0728cf48ec831c3a1ffb082daa7de7338f7d2ed8
|
| 3 |
+
size 180440142
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c2b76e87a2e9700efdac5da11b74c0c5349915ee378939e6d874b3fc61e7f228
|
| 3 |
+
size 180440142
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:62c6df8106ca65abe1a8e70c8407eae8785cb85b71893fb30c5d069e67caf712
|
| 3 |
+
size 180439850
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:176ad4bfd417145e4051f4fe7c203ce4e8f31c86cda2fcc22bace0468d88ca88
|
| 3 |
+
size 180440142
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7a3d0619b2b3932156534630bc25b07904f9a6a8df4429d822954a3ef0c473a7
|
| 3 |
+
size 180439850
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:86ced7aa854b5dd6d72d4d0e3390d1a2c64bb342a1b3b8efd632c6be6e8504b0
|
| 3 |
+
size 180439850
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_trainer_state.json
ADDED
|
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 100,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.02,
|
| 13 |
+
"grad_norm": 5.389871120452881,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 1.837,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.04,
|
| 20 |
+
"grad_norm": 7.913750648498535,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 1.5072,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.06,
|
| 27 |
+
"grad_norm": 2.8081469535827637,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 1.9628,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.08,
|
| 34 |
+
"grad_norm": 2.1584787368774414,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 1.2402,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.1,
|
| 41 |
+
"grad_norm": 1.2553013563156128,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 0.4137,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.12,
|
| 48 |
+
"grad_norm": 0.07167795300483704,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.5048,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.14,
|
| 55 |
+
"grad_norm": 1.2355425357818604,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 1.1466,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.16,
|
| 62 |
+
"grad_norm": 2.243406295776367,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 1.3616,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.18,
|
| 69 |
+
"grad_norm": 2.1192171573638916,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.5938,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.2,
|
| 76 |
+
"grad_norm": 1.964920163154602,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.6115,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.22,
|
| 83 |
+
"grad_norm": 1.7399609088897705,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.3877,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.24,
|
| 90 |
+
"grad_norm": 3.4627339839935303,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 3.434,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.26,
|
| 97 |
+
"grad_norm": 3.5779869556427,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 1.3658,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.28,
|
| 104 |
+
"grad_norm": 2.113224506378174,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 1.119,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.3,
|
| 111 |
+
"grad_norm": 1.6840742826461792,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 0.4981,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.32,
|
| 118 |
+
"grad_norm": 2.0056612491607666,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.9441,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.34,
|
| 125 |
+
"grad_norm": 1.881331443786621,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 1.1007,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.36,
|
| 132 |
+
"grad_norm": 1.5827242136001587,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 1.3904,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.38,
|
| 139 |
+
"grad_norm": 2.0497872829437256,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 0.8498,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.4,
|
| 146 |
+
"grad_norm": 1.5571120977401733,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 0.7427,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.42,
|
| 153 |
+
"grad_norm": 1.50515615940094,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.5954,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.44,
|
| 160 |
+
"grad_norm": 5.690471649169922,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 1.8239,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.46,
|
| 167 |
+
"grad_norm": 2.0426018238067627,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 0.8085,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.48,
|
| 174 |
+
"grad_norm": 4.374183654785156,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 1.0815,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.5,
|
| 181 |
+
"grad_norm": 1.9751304388046265,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 0.4038,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.52,
|
| 188 |
+
"grad_norm": 2.5408687591552734,
|
| 189 |
+
"learning_rate": 2e-05,
|
| 190 |
+
"loss": 0.8188,
|
| 191 |
+
"step": 52
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.54,
|
| 195 |
+
"grad_norm": 3.9847683906555176,
|
| 196 |
+
"learning_rate": 2e-05,
|
| 197 |
+
"loss": 1.5515,
|
| 198 |
+
"step": 54
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.56,
|
| 202 |
+
"grad_norm": 1.9399210214614868,
|
| 203 |
+
"learning_rate": 2e-05,
|
| 204 |
+
"loss": 0.516,
|
| 205 |
+
"step": 56
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.58,
|
| 209 |
+
"grad_norm": 1.8243143558502197,
|
| 210 |
+
"learning_rate": 2e-05,
|
| 211 |
+
"loss": 0.4914,
|
| 212 |
+
"step": 58
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.6,
|
| 216 |
+
"grad_norm": 2.0096545219421387,
|
| 217 |
+
"learning_rate": 2e-05,
|
| 218 |
+
"loss": 0.5824,
|
| 219 |
+
"step": 60
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.62,
|
| 223 |
+
"grad_norm": 1.177213191986084,
|
| 224 |
+
"learning_rate": 2e-05,
|
| 225 |
+
"loss": 0.8273,
|
| 226 |
+
"step": 62
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.64,
|
| 230 |
+
"grad_norm": 2.069615125656128,
|
| 231 |
+
"learning_rate": 2e-05,
|
| 232 |
+
"loss": 1.262,
|
| 233 |
+
"step": 64
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.66,
|
| 237 |
+
"grad_norm": 2.477707862854004,
|
| 238 |
+
"learning_rate": 2e-05,
|
| 239 |
+
"loss": 3.1642,
|
| 240 |
+
"step": 66
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.68,
|
| 244 |
+
"grad_norm": 4.157680034637451,
|
| 245 |
+
"learning_rate": 2e-05,
|
| 246 |
+
"loss": 1.7826,
|
| 247 |
+
"step": 68
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.7,
|
| 251 |
+
"grad_norm": 4.135859489440918,
|
| 252 |
+
"learning_rate": 2e-05,
|
| 253 |
+
"loss": 1.3506,
|
| 254 |
+
"step": 70
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.72,
|
| 258 |
+
"grad_norm": 1.74435293674469,
|
| 259 |
+
"learning_rate": 2e-05,
|
| 260 |
+
"loss": 0.9626,
|
| 261 |
+
"step": 72
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.74,
|
| 265 |
+
"grad_norm": 0.8117738962173462,
|
| 266 |
+
"learning_rate": 2e-05,
|
| 267 |
+
"loss": 0.748,
|
| 268 |
+
"step": 74
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.76,
|
| 272 |
+
"grad_norm": 2.52223801612854,
|
| 273 |
+
"learning_rate": 2e-05,
|
| 274 |
+
"loss": 1.0564,
|
| 275 |
+
"step": 76
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.78,
|
| 279 |
+
"grad_norm": 3.414597988128662,
|
| 280 |
+
"learning_rate": 2e-05,
|
| 281 |
+
"loss": 0.9622,
|
| 282 |
+
"step": 78
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.8,
|
| 286 |
+
"grad_norm": 1.8665771484375,
|
| 287 |
+
"learning_rate": 2e-05,
|
| 288 |
+
"loss": 0.3819,
|
| 289 |
+
"step": 80
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.82,
|
| 293 |
+
"grad_norm": 1.227982997894287,
|
| 294 |
+
"learning_rate": 2e-05,
|
| 295 |
+
"loss": 0.6077,
|
| 296 |
+
"step": 82
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.84,
|
| 300 |
+
"grad_norm": 2.479780912399292,
|
| 301 |
+
"learning_rate": 2e-05,
|
| 302 |
+
"loss": 1.1214,
|
| 303 |
+
"step": 84
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.86,
|
| 307 |
+
"grad_norm": 5.932579040527344,
|
| 308 |
+
"learning_rate": 2e-05,
|
| 309 |
+
"loss": 1.2493,
|
| 310 |
+
"step": 86
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.88,
|
| 314 |
+
"grad_norm": 1.6842375993728638,
|
| 315 |
+
"learning_rate": 2e-05,
|
| 316 |
+
"loss": 0.3546,
|
| 317 |
+
"step": 88
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.9,
|
| 321 |
+
"grad_norm": 1.6105072498321533,
|
| 322 |
+
"learning_rate": 2e-05,
|
| 323 |
+
"loss": 0.8795,
|
| 324 |
+
"step": 90
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 0.92,
|
| 328 |
+
"grad_norm": 2.8480112552642822,
|
| 329 |
+
"learning_rate": 2e-05,
|
| 330 |
+
"loss": 1.2564,
|
| 331 |
+
"step": 92
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 0.94,
|
| 335 |
+
"grad_norm": 1.4740058183670044,
|
| 336 |
+
"learning_rate": 2e-05,
|
| 337 |
+
"loss": 0.6387,
|
| 338 |
+
"step": 94
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"epoch": 0.96,
|
| 342 |
+
"grad_norm": 1.9073842763900757,
|
| 343 |
+
"learning_rate": 2e-05,
|
| 344 |
+
"loss": 0.783,
|
| 345 |
+
"step": 96
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"epoch": 0.98,
|
| 349 |
+
"grad_norm": 1.1106702089309692,
|
| 350 |
+
"learning_rate": 2e-05,
|
| 351 |
+
"loss": 1.1876,
|
| 352 |
+
"step": 98
|
| 353 |
+
},
|
| 354 |
+
{
|
| 355 |
+
"epoch": 1.0,
|
| 356 |
+
"grad_norm": 5.465167045593262,
|
| 357 |
+
"learning_rate": 2e-05,
|
| 358 |
+
"loss": 0.9835,
|
| 359 |
+
"step": 100
|
| 360 |
+
},
|
| 361 |
+
{
|
| 362 |
+
"epoch": 1.0,
|
| 363 |
+
"step": 100,
|
| 364 |
+
"total_flos": 5535011444883456.0,
|
| 365 |
+
"train_loss": 1.0648856735229493,
|
| 366 |
+
"train_runtime": 136.9082,
|
| 367 |
+
"train_samples_per_second": 2.922,
|
| 368 |
+
"train_steps_per_second": 0.73
|
| 369 |
+
}
|
| 370 |
+
],
|
| 371 |
+
"logging_steps": 2,
|
| 372 |
+
"max_steps": 100,
|
| 373 |
+
"num_input_tokens_seen": 0,
|
| 374 |
+
"num_train_epochs": 1,
|
| 375 |
+
"save_steps": 500,
|
| 376 |
+
"stateful_callbacks": {
|
| 377 |
+
"TrainerControl": {
|
| 378 |
+
"args": {
|
| 379 |
+
"should_epoch_stop": false,
|
| 380 |
+
"should_evaluate": false,
|
| 381 |
+
"should_log": false,
|
| 382 |
+
"should_save": false,
|
| 383 |
+
"should_training_stop": false
|
| 384 |
+
},
|
| 385 |
+
"attributes": {}
|
| 386 |
+
}
|
| 387 |
+
},
|
| 388 |
+
"total_flos": 5535011444883456.0,
|
| 389 |
+
"train_batch_size": 1,
|
| 390 |
+
"trial_name": null,
|
| 391 |
+
"trial_params": null
|
| 392 |
+
}
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:426a6fa480cdee236f57ad8bd1e64f82ae6c8c08c336b37b68885bc020cf05c2
|
| 3 |
+
size 389170582
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:126b9ea9185472b6148b1009458028b25ed07a4225a36abfa17c2617a5119942
|
| 3 |
+
size 389170582
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3da09528db24d62a6dbf1d13ea3571fec5c829ccf44e39bed0bfabcdfdb9d8f9
|
| 3 |
+
size 389170582
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a3ec9b28b108540f69369acb8d9b021d5b8f731e686adee24e49aeb093b302b8
|
| 3 |
+
size 389170582
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:561ae01465e3ce055e411d57a2566ab2a931b524238634f089ba5fec3d6db363
|
| 3 |
+
size 389170122
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa6d2fc7920ad0af27cbfb4b5f3e1727671352e109fa655686b445ad69003e4c
|
| 3 |
+
size 389170582
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b59509bdbe736d456e7bbffe8c5fb030bb0cb522fbe269f3c9323dacc7fa89a
|
| 3 |
+
size 389170122
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:371f56537c6cc23dcda83f899a38b9bd947cb7c0755f14c1deac18a9b9b2f68d
|
| 3 |
+
size 389170122
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_trainer_state.json
ADDED
|
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 100,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.02,
|
| 13 |
+
"grad_norm": 1.2099864482879639,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.7102,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.04,
|
| 20 |
+
"grad_norm": 2.4037203788757324,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 1.1519,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.06,
|
| 27 |
+
"grad_norm": 0.8102778196334839,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 0.9684,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.08,
|
| 34 |
+
"grad_norm": 1.821050763130188,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.5301,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.1,
|
| 41 |
+
"grad_norm": 1.580217957496643,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 0.9518,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.12,
|
| 48 |
+
"grad_norm": 1.8814568519592285,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 1.8458,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.14,
|
| 55 |
+
"grad_norm": 3.3938233852386475,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.9475,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.16,
|
| 62 |
+
"grad_norm": 1.641783356666565,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 1.1361,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.18,
|
| 69 |
+
"grad_norm": 1.12887442111969,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.7893,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.2,
|
| 76 |
+
"grad_norm": 3.1026272773742676,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.9656,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.22,
|
| 83 |
+
"grad_norm": 3.3817358016967773,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 1.5276,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.24,
|
| 90 |
+
"grad_norm": 2.19884991645813,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 0.8887,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.26,
|
| 97 |
+
"grad_norm": 2.272486925125122,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 0.9409,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.28,
|
| 104 |
+
"grad_norm": 2.282609701156616,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 1.4966,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.3,
|
| 111 |
+
"grad_norm": 1.1684859991073608,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 1.2735,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.32,
|
| 118 |
+
"grad_norm": 0.7757588028907776,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 2.7099,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.34,
|
| 125 |
+
"grad_norm": 1.7310632467269897,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.8871,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.36,
|
| 132 |
+
"grad_norm": 2.0317158699035645,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.9895,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.38,
|
| 139 |
+
"grad_norm": 3.5124049186706543,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 1.317,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.4,
|
| 146 |
+
"grad_norm": 0.9667242169380188,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 0.6481,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.42,
|
| 153 |
+
"grad_norm": 2.8257367610931396,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.8701,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.44,
|
| 160 |
+
"grad_norm": 2.1212313175201416,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 1.1708,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.46,
|
| 167 |
+
"grad_norm": 2.276118040084839,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 1.0044,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.48,
|
| 174 |
+
"grad_norm": 2.457052707672119,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 1.0235,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.5,
|
| 181 |
+
"grad_norm": 1.5500776767730713,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 1.582,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.52,
|
| 188 |
+
"grad_norm": 3.4070324897766113,
|
| 189 |
+
"learning_rate": 2e-05,
|
| 190 |
+
"loss": 1.9971,
|
| 191 |
+
"step": 52
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.54,
|
| 195 |
+
"grad_norm": 2.191951274871826,
|
| 196 |
+
"learning_rate": 2e-05,
|
| 197 |
+
"loss": 1.2354,
|
| 198 |
+
"step": 54
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.56,
|
| 202 |
+
"grad_norm": 1.1881375312805176,
|
| 203 |
+
"learning_rate": 2e-05,
|
| 204 |
+
"loss": 0.9124,
|
| 205 |
+
"step": 56
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.58,
|
| 209 |
+
"grad_norm": 2.418442964553833,
|
| 210 |
+
"learning_rate": 2e-05,
|
| 211 |
+
"loss": 1.196,
|
| 212 |
+
"step": 58
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.6,
|
| 216 |
+
"grad_norm": 1.8029783964157104,
|
| 217 |
+
"learning_rate": 2e-05,
|
| 218 |
+
"loss": 1.01,
|
| 219 |
+
"step": 60
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.62,
|
| 223 |
+
"grad_norm": 1.943813443183899,
|
| 224 |
+
"learning_rate": 2e-05,
|
| 225 |
+
"loss": 1.3572,
|
| 226 |
+
"step": 62
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.64,
|
| 230 |
+
"grad_norm": 1.0548287630081177,
|
| 231 |
+
"learning_rate": 2e-05,
|
| 232 |
+
"loss": 0.9924,
|
| 233 |
+
"step": 64
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.66,
|
| 237 |
+
"grad_norm": 5.823477745056152,
|
| 238 |
+
"learning_rate": 2e-05,
|
| 239 |
+
"loss": 1.3245,
|
| 240 |
+
"step": 66
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.68,
|
| 244 |
+
"grad_norm": 2.152376413345337,
|
| 245 |
+
"learning_rate": 2e-05,
|
| 246 |
+
"loss": 1.3052,
|
| 247 |
+
"step": 68
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.7,
|
| 251 |
+
"grad_norm": 2.3831217288970947,
|
| 252 |
+
"learning_rate": 2e-05,
|
| 253 |
+
"loss": 1.1949,
|
| 254 |
+
"step": 70
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.72,
|
| 258 |
+
"grad_norm": 3.091294050216675,
|
| 259 |
+
"learning_rate": 2e-05,
|
| 260 |
+
"loss": 1.1667,
|
| 261 |
+
"step": 72
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.74,
|
| 265 |
+
"grad_norm": 1.1181700229644775,
|
| 266 |
+
"learning_rate": 2e-05,
|
| 267 |
+
"loss": 0.9937,
|
| 268 |
+
"step": 74
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.76,
|
| 272 |
+
"grad_norm": 1.2995938062667847,
|
| 273 |
+
"learning_rate": 2e-05,
|
| 274 |
+
"loss": 0.7476,
|
| 275 |
+
"step": 76
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.78,
|
| 279 |
+
"grad_norm": 1.4761587381362915,
|
| 280 |
+
"learning_rate": 2e-05,
|
| 281 |
+
"loss": 0.6168,
|
| 282 |
+
"step": 78
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.8,
|
| 286 |
+
"grad_norm": 1.78084135055542,
|
| 287 |
+
"learning_rate": 2e-05,
|
| 288 |
+
"loss": 1.0055,
|
| 289 |
+
"step": 80
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.82,
|
| 293 |
+
"grad_norm": 1.6266769170761108,
|
| 294 |
+
"learning_rate": 2e-05,
|
| 295 |
+
"loss": 1.4055,
|
| 296 |
+
"step": 82
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.84,
|
| 300 |
+
"grad_norm": 5.890852451324463,
|
| 301 |
+
"learning_rate": 2e-05,
|
| 302 |
+
"loss": 1.3506,
|
| 303 |
+
"step": 84
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.86,
|
| 307 |
+
"grad_norm": 3.300797939300537,
|
| 308 |
+
"learning_rate": 2e-05,
|
| 309 |
+
"loss": 0.9899,
|
| 310 |
+
"step": 86
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.88,
|
| 314 |
+
"grad_norm": 1.9619410037994385,
|
| 315 |
+
"learning_rate": 2e-05,
|
| 316 |
+
"loss": 1.158,
|
| 317 |
+
"step": 88
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.9,
|
| 321 |
+
"grad_norm": 1.9490997791290283,
|
| 322 |
+
"learning_rate": 2e-05,
|
| 323 |
+
"loss": 0.9689,
|
| 324 |
+
"step": 90
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 0.92,
|
| 328 |
+
"grad_norm": 1.6852408647537231,
|
| 329 |
+
"learning_rate": 2e-05,
|
| 330 |
+
"loss": 1.0103,
|
| 331 |
+
"step": 92
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 0.94,
|
| 335 |
+
"grad_norm": 5.842019557952881,
|
| 336 |
+
"learning_rate": 2e-05,
|
| 337 |
+
"loss": 1.546,
|
| 338 |
+
"step": 94
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"epoch": 0.96,
|
| 342 |
+
"grad_norm": 1.3952200412750244,
|
| 343 |
+
"learning_rate": 2e-05,
|
| 344 |
+
"loss": 0.8511,
|
| 345 |
+
"step": 96
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"epoch": 0.98,
|
| 349 |
+
"grad_norm": 1.604957103729248,
|
| 350 |
+
"learning_rate": 2e-05,
|
| 351 |
+
"loss": 0.7199,
|
| 352 |
+
"step": 98
|
| 353 |
+
},
|
| 354 |
+
{
|
| 355 |
+
"epoch": 1.0,
|
| 356 |
+
"grad_norm": 1.6361178159713745,
|
| 357 |
+
"learning_rate": 2e-05,
|
| 358 |
+
"loss": 1.0714,
|
| 359 |
+
"step": 100
|
| 360 |
+
},
|
| 361 |
+
{
|
| 362 |
+
"epoch": 1.0,
|
| 363 |
+
"step": 100,
|
| 364 |
+
"total_flos": 1.3244159611633664e+16,
|
| 365 |
+
"train_loss": 1.1290626525878906,
|
| 366 |
+
"train_runtime": 225.692,
|
| 367 |
+
"train_samples_per_second": 1.772,
|
| 368 |
+
"train_steps_per_second": 0.443
|
| 369 |
+
}
|
| 370 |
+
],
|
| 371 |
+
"logging_steps": 2,
|
| 372 |
+
"max_steps": 100,
|
| 373 |
+
"num_input_tokens_seen": 0,
|
| 374 |
+
"num_train_epochs": 1,
|
| 375 |
+
"save_steps": 500,
|
| 376 |
+
"stateful_callbacks": {
|
| 377 |
+
"TrainerControl": {
|
| 378 |
+
"args": {
|
| 379 |
+
"should_epoch_stop": false,
|
| 380 |
+
"should_evaluate": false,
|
| 381 |
+
"should_log": false,
|
| 382 |
+
"should_save": false,
|
| 383 |
+
"should_training_stop": false
|
| 384 |
+
},
|
| 385 |
+
"attributes": {}
|
| 386 |
+
}
|
| 387 |
+
},
|
| 388 |
+
"total_flos": 1.3244159611633664e+16,
|
| 389 |
+
"train_batch_size": 1,
|
| 390 |
+
"trial_name": null,
|
| 391 |
+
"trial_params": null
|
| 392 |
+
}
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c0807db4338ab7a7d474fa8f7262b4fd4fe1cea1d99a5f168bf18ecec9ef12c
|
| 3 |
+
size 389170582
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:15f4a9a494715a8be98248261e6efb61e0724a29fa8dbd251bb1618ac5e66098
|
| 3 |
+
size 389170582
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:622c64fd65d2677a6ff472f9c1179ea6ee26f41e360fc66f96fe527557ad8b45
|
| 3 |
+
size 389170582
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b10223f228633d93c35e78140b4e8e5379be4d4a5fe2c77de1bff27ce78e45aa
|
| 3 |
+
size 389170582
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa6e93b7a9598502f5c770e2e2311166fe22644f01d92a98ed0820e2e9631995
|
| 3 |
+
size 389170122
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0dad6c74c66eca0f729db8dd76453595a8ce5b3a051f531a2d02bae39d9644f8
|
| 3 |
+
size 389170582
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:270cec05ffc3f2513dda5349f491335f377213b98445b828103e6e8eacf2e5d3
|
| 3 |
+
size 389170122
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a0721b69261ec658126151d66332e80e1dd8b0698ab1ea907ce713f11e8611af
|
| 3 |
+
size 389170122
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_trainer_state.json
ADDED
|
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 100,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.02,
|
| 13 |
+
"grad_norm": 1.0289795398712158,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.5335,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.04,
|
| 20 |
+
"grad_norm": 0.6102533936500549,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 0.4069,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.06,
|
| 27 |
+
"grad_norm": 1.9161092042922974,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 0.8354,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.08,
|
| 34 |
+
"grad_norm": 2.4946720600128174,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.7206,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.1,
|
| 41 |
+
"grad_norm": 5.380189418792725,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 1.1956,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.12,
|
| 48 |
+
"grad_norm": 1.4468796253204346,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.4622,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.14,
|
| 55 |
+
"grad_norm": 3.1074416637420654,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 1.6658,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.16,
|
| 62 |
+
"grad_norm": 2.196979284286499,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.9451,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.18,
|
| 69 |
+
"grad_norm": 2.555473566055298,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.4677,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.2,
|
| 76 |
+
"grad_norm": 0.8904308676719666,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.6847,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.22,
|
| 83 |
+
"grad_norm": 2.3065459728240967,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.3474,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.24,
|
| 90 |
+
"grad_norm": 5.23778772354126,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 1.6669,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.26,
|
| 97 |
+
"grad_norm": 2.0633652210235596,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 1.0448,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.28,
|
| 104 |
+
"grad_norm": 3.159832715988159,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 1.2574,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.3,
|
| 111 |
+
"grad_norm": 5.983434200286865,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 0.8387,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.32,
|
| 118 |
+
"grad_norm": 2.2606232166290283,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.433,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.34,
|
| 125 |
+
"grad_norm": 1.855712652206421,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.7482,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.36,
|
| 132 |
+
"grad_norm": 6.815300941467285,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.6416,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.38,
|
| 139 |
+
"grad_norm": 2.802886724472046,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 0.7273,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.4,
|
| 146 |
+
"grad_norm": 0.7808118462562561,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 0.1985,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.42,
|
| 153 |
+
"grad_norm": 2.313674211502075,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.6364,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.44,
|
| 160 |
+
"grad_norm": 7.197145462036133,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 1.1568,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.46,
|
| 167 |
+
"grad_norm": 1.7081199884414673,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 0.937,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.48,
|
| 174 |
+
"grad_norm": 0.9655914306640625,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 0.5069,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.5,
|
| 181 |
+
"grad_norm": 4.5406389236450195,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 0.6002,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.52,
|
| 188 |
+
"grad_norm": 10.869454383850098,
|
| 189 |
+
"learning_rate": 2e-05,
|
| 190 |
+
"loss": 0.7676,
|
| 191 |
+
"step": 52
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.54,
|
| 195 |
+
"grad_norm": 10.128218650817871,
|
| 196 |
+
"learning_rate": 2e-05,
|
| 197 |
+
"loss": 2.3426,
|
| 198 |
+
"step": 54
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.56,
|
| 202 |
+
"grad_norm": 5.551089763641357,
|
| 203 |
+
"learning_rate": 2e-05,
|
| 204 |
+
"loss": 2.5984,
|
| 205 |
+
"step": 56
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.58,
|
| 209 |
+
"grad_norm": 1.7278666496276855,
|
| 210 |
+
"learning_rate": 2e-05,
|
| 211 |
+
"loss": 0.4457,
|
| 212 |
+
"step": 58
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.6,
|
| 216 |
+
"grad_norm": 2.7146732807159424,
|
| 217 |
+
"learning_rate": 2e-05,
|
| 218 |
+
"loss": 0.6715,
|
| 219 |
+
"step": 60
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.62,
|
| 223 |
+
"grad_norm": 5.558541297912598,
|
| 224 |
+
"learning_rate": 2e-05,
|
| 225 |
+
"loss": 1.3482,
|
| 226 |
+
"step": 62
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.64,
|
| 230 |
+
"grad_norm": 3.7671058177948,
|
| 231 |
+
"learning_rate": 2e-05,
|
| 232 |
+
"loss": 0.8286,
|
| 233 |
+
"step": 64
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.66,
|
| 237 |
+
"grad_norm": 4.145376205444336,
|
| 238 |
+
"learning_rate": 2e-05,
|
| 239 |
+
"loss": 1.6906,
|
| 240 |
+
"step": 66
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.68,
|
| 244 |
+
"grad_norm": 2.8701162338256836,
|
| 245 |
+
"learning_rate": 2e-05,
|
| 246 |
+
"loss": 0.7684,
|
| 247 |
+
"step": 68
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.7,
|
| 251 |
+
"grad_norm": 6.557702541351318,
|
| 252 |
+
"learning_rate": 2e-05,
|
| 253 |
+
"loss": 1.433,
|
| 254 |
+
"step": 70
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.72,
|
| 258 |
+
"grad_norm": 1.7119065523147583,
|
| 259 |
+
"learning_rate": 2e-05,
|
| 260 |
+
"loss": 0.5752,
|
| 261 |
+
"step": 72
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.74,
|
| 265 |
+
"grad_norm": 1.7837646007537842,
|
| 266 |
+
"learning_rate": 2e-05,
|
| 267 |
+
"loss": 0.6808,
|
| 268 |
+
"step": 74
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.76,
|
| 272 |
+
"grad_norm": 8.07896900177002,
|
| 273 |
+
"learning_rate": 2e-05,
|
| 274 |
+
"loss": 1.8413,
|
| 275 |
+
"step": 76
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.78,
|
| 279 |
+
"grad_norm": 5.091773509979248,
|
| 280 |
+
"learning_rate": 2e-05,
|
| 281 |
+
"loss": 1.1644,
|
| 282 |
+
"step": 78
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.8,
|
| 286 |
+
"grad_norm": 2.6711478233337402,
|
| 287 |
+
"learning_rate": 2e-05,
|
| 288 |
+
"loss": 0.8726,
|
| 289 |
+
"step": 80
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.82,
|
| 293 |
+
"grad_norm": 2.857771396636963,
|
| 294 |
+
"learning_rate": 2e-05,
|
| 295 |
+
"loss": 0.8909,
|
| 296 |
+
"step": 82
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.84,
|
| 300 |
+
"grad_norm": 3.943341016769409,
|
| 301 |
+
"learning_rate": 2e-05,
|
| 302 |
+
"loss": 0.5204,
|
| 303 |
+
"step": 84
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.86,
|
| 307 |
+
"grad_norm": 1.16873300075531,
|
| 308 |
+
"learning_rate": 2e-05,
|
| 309 |
+
"loss": 0.9623,
|
| 310 |
+
"step": 86
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.88,
|
| 314 |
+
"grad_norm": 4.725329399108887,
|
| 315 |
+
"learning_rate": 2e-05,
|
| 316 |
+
"loss": 1.1609,
|
| 317 |
+
"step": 88
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.9,
|
| 321 |
+
"grad_norm": 5.48153018951416,
|
| 322 |
+
"learning_rate": 2e-05,
|
| 323 |
+
"loss": 1.1443,
|
| 324 |
+
"step": 90
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 0.92,
|
| 328 |
+
"grad_norm": 1.6034471988677979,
|
| 329 |
+
"learning_rate": 2e-05,
|
| 330 |
+
"loss": 0.762,
|
| 331 |
+
"step": 92
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 0.94,
|
| 335 |
+
"grad_norm": 2.3010993003845215,
|
| 336 |
+
"learning_rate": 2e-05,
|
| 337 |
+
"loss": 1.1309,
|
| 338 |
+
"step": 94
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"epoch": 0.96,
|
| 342 |
+
"grad_norm": 2.12829327583313,
|
| 343 |
+
"learning_rate": 2e-05,
|
| 344 |
+
"loss": 0.6846,
|
| 345 |
+
"step": 96
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"epoch": 0.98,
|
| 349 |
+
"grad_norm": 0.9714277982711792,
|
| 350 |
+
"learning_rate": 2e-05,
|
| 351 |
+
"loss": 0.5588,
|
| 352 |
+
"step": 98
|
| 353 |
+
},
|
| 354 |
+
{
|
| 355 |
+
"epoch": 1.0,
|
| 356 |
+
"grad_norm": 1.1446884870529175,
|
| 357 |
+
"learning_rate": 2e-05,
|
| 358 |
+
"loss": 0.8131,
|
| 359 |
+
"step": 100
|
| 360 |
+
},
|
| 361 |
+
{
|
| 362 |
+
"epoch": 1.0,
|
| 363 |
+
"step": 100,
|
| 364 |
+
"total_flos": 1.5581131495702528e+16,
|
| 365 |
+
"train_loss": 0.926310920715332,
|
| 366 |
+
"train_runtime": 248.2955,
|
| 367 |
+
"train_samples_per_second": 1.611,
|
| 368 |
+
"train_steps_per_second": 0.403
|
| 369 |
+
}
|
| 370 |
+
],
|
| 371 |
+
"logging_steps": 2,
|
| 372 |
+
"max_steps": 100,
|
| 373 |
+
"num_input_tokens_seen": 0,
|
| 374 |
+
"num_train_epochs": 1,
|
| 375 |
+
"save_steps": 500,
|
| 376 |
+
"stateful_callbacks": {
|
| 377 |
+
"TrainerControl": {
|
| 378 |
+
"args": {
|
| 379 |
+
"should_epoch_stop": false,
|
| 380 |
+
"should_evaluate": false,
|
| 381 |
+
"should_log": false,
|
| 382 |
+
"should_save": false,
|
| 383 |
+
"should_training_stop": false
|
| 384 |
+
},
|
| 385 |
+
"attributes": {}
|
| 386 |
+
}
|
| 387 |
+
},
|
| 388 |
+
"total_flos": 1.5581131495702528e+16,
|
| 389 |
+
"train_batch_size": 1,
|
| 390 |
+
"trial_name": null,
|
| 391 |
+
"trial_params": null
|
| 392 |
+
}
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e36f60212361bbfccbcad493cdc07f58f903cc5b26ec97b01cc2c7fe9d10af7b
|
| 3 |
+
size 389170582
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1cc0950ab05e4bd2ff91260af512e1d3bcb386c17aaf1967c29cbdecadf496d1
|
| 3 |
+
size 389170582
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61075455fa883e80d3f5e40eb23b6ec9dbd7b323024842c8818e6d6bddca99ae
|
| 3 |
+
size 389170582
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c218d2b1ddd8fbda4ebbc02cdfc52e08c2cf0d833b40cb79a554c84f8b2cf8bf
|
| 3 |
+
size 389170582
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc226c37298b921ad72a709271c310cb1853200d5329083c1b0c9edba8173603
|
| 3 |
+
size 389170122
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1285b641c8c20c1a627caaa4e080366067fcabb4ede00871a43c9048756082ee
|
| 3 |
+
size 389170582
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd4cf7b9470bc33a1b208726dd61d3fb16469f17d2275782c2314eb069aee0f9
|
| 3 |
+
size 389170122
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a277d042f2f6279ba75231cfe7a4f10a02e921a1271b1670a3b114699366b7da
|
| 3 |
+
size 389170122
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_trainer_state.json
ADDED
|
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 100,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.02,
|
| 13 |
+
"grad_norm": 1.3390214443206787,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.8941,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.04,
|
| 20 |
+
"grad_norm": 0.9163679480552673,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 0.9235,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.06,
|
| 27 |
+
"grad_norm": 0.5888918042182922,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 0.6486,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.08,
|
| 34 |
+
"grad_norm": 0.30081573128700256,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.2813,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.1,
|
| 41 |
+
"grad_norm": 1.4009698629379272,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 0.4144,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.12,
|
| 48 |
+
"grad_norm": 2.2374634742736816,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.627,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.14,
|
| 55 |
+
"grad_norm": 2.923579692840576,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 1.1054,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.16,
|
| 62 |
+
"grad_norm": 1.857440710067749,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.9148,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.18,
|
| 69 |
+
"grad_norm": 0.8682881593704224,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.3867,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.2,
|
| 76 |
+
"grad_norm": 2.028822422027588,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.9991,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.22,
|
| 83 |
+
"grad_norm": 1.2788021564483643,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.5667,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.24,
|
| 90 |
+
"grad_norm": 2.953608989715576,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 0.9328,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.26,
|
| 97 |
+
"grad_norm": 1.444178581237793,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 0.8467,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.28,
|
| 104 |
+
"grad_norm": 0.8711784482002258,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 0.343,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.3,
|
| 111 |
+
"grad_norm": 0.8109800815582275,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 0.3157,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.32,
|
| 118 |
+
"grad_norm": 4.3886260986328125,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.7694,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.34,
|
| 125 |
+
"grad_norm": 1.8276898860931396,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 1.0568,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.36,
|
| 132 |
+
"grad_norm": 1.259130597114563,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.9007,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.38,
|
| 139 |
+
"grad_norm": 2.4246671199798584,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 1.2372,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.4,
|
| 146 |
+
"grad_norm": 2.6753697395324707,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 1.0153,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.42,
|
| 153 |
+
"grad_norm": 0.6607201099395752,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.2803,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.44,
|
| 160 |
+
"grad_norm": 1.2931270599365234,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 0.7531,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.46,
|
| 167 |
+
"grad_norm": 1.034201741218567,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 0.5761,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.48,
|
| 174 |
+
"grad_norm": 1.261017084121704,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 0.9825,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.5,
|
| 181 |
+
"grad_norm": 1.7095365524291992,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 0.6045,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.52,
|
| 188 |
+
"grad_norm": 2.2309482097625732,
|
| 189 |
+
"learning_rate": 2e-05,
|
| 190 |
+
"loss": 1.0364,
|
| 191 |
+
"step": 52
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.54,
|
| 195 |
+
"grad_norm": 3.170400381088257,
|
| 196 |
+
"learning_rate": 2e-05,
|
| 197 |
+
"loss": 2.1252,
|
| 198 |
+
"step": 54
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.56,
|
| 202 |
+
"grad_norm": 1.2634927034378052,
|
| 203 |
+
"learning_rate": 2e-05,
|
| 204 |
+
"loss": 0.1779,
|
| 205 |
+
"step": 56
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.58,
|
| 209 |
+
"grad_norm": 2.0210254192352295,
|
| 210 |
+
"learning_rate": 2e-05,
|
| 211 |
+
"loss": 0.7033,
|
| 212 |
+
"step": 58
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.6,
|
| 216 |
+
"grad_norm": 0.7227587103843689,
|
| 217 |
+
"learning_rate": 2e-05,
|
| 218 |
+
"loss": 0.3596,
|
| 219 |
+
"step": 60
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.62,
|
| 223 |
+
"grad_norm": 0.5512145757675171,
|
| 224 |
+
"learning_rate": 2e-05,
|
| 225 |
+
"loss": 0.2827,
|
| 226 |
+
"step": 62
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.64,
|
| 230 |
+
"grad_norm": 0.8612607717514038,
|
| 231 |
+
"learning_rate": 2e-05,
|
| 232 |
+
"loss": 0.3879,
|
| 233 |
+
"step": 64
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.66,
|
| 237 |
+
"grad_norm": 0.9014844298362732,
|
| 238 |
+
"learning_rate": 2e-05,
|
| 239 |
+
"loss": 0.8704,
|
| 240 |
+
"step": 66
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.68,
|
| 244 |
+
"grad_norm": 0.5804516673088074,
|
| 245 |
+
"learning_rate": 2e-05,
|
| 246 |
+
"loss": 0.7523,
|
| 247 |
+
"step": 68
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.7,
|
| 251 |
+
"grad_norm": 0.9667078852653503,
|
| 252 |
+
"learning_rate": 2e-05,
|
| 253 |
+
"loss": 0.5082,
|
| 254 |
+
"step": 70
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.72,
|
| 258 |
+
"grad_norm": 2.913699150085449,
|
| 259 |
+
"learning_rate": 2e-05,
|
| 260 |
+
"loss": 0.8653,
|
| 261 |
+
"step": 72
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.74,
|
| 265 |
+
"grad_norm": 2.9874722957611084,
|
| 266 |
+
"learning_rate": 2e-05,
|
| 267 |
+
"loss": 0.5091,
|
| 268 |
+
"step": 74
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.76,
|
| 272 |
+
"grad_norm": 2.7036550045013428,
|
| 273 |
+
"learning_rate": 2e-05,
|
| 274 |
+
"loss": 0.6436,
|
| 275 |
+
"step": 76
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.78,
|
| 279 |
+
"grad_norm": 1.1027770042419434,
|
| 280 |
+
"learning_rate": 2e-05,
|
| 281 |
+
"loss": 0.7771,
|
| 282 |
+
"step": 78
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.8,
|
| 286 |
+
"grad_norm": 0.8439675569534302,
|
| 287 |
+
"learning_rate": 2e-05,
|
| 288 |
+
"loss": 0.2235,
|
| 289 |
+
"step": 80
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.82,
|
| 293 |
+
"grad_norm": 8.07174301147461,
|
| 294 |
+
"learning_rate": 2e-05,
|
| 295 |
+
"loss": 0.4817,
|
| 296 |
+
"step": 82
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.84,
|
| 300 |
+
"grad_norm": 1.4386396408081055,
|
| 301 |
+
"learning_rate": 2e-05,
|
| 302 |
+
"loss": 0.725,
|
| 303 |
+
"step": 84
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.86,
|
| 307 |
+
"grad_norm": 3.713127851486206,
|
| 308 |
+
"learning_rate": 2e-05,
|
| 309 |
+
"loss": 1.1037,
|
| 310 |
+
"step": 86
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.88,
|
| 314 |
+
"grad_norm": 0.2775169014930725,
|
| 315 |
+
"learning_rate": 2e-05,
|
| 316 |
+
"loss": 0.2505,
|
| 317 |
+
"step": 88
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.9,
|
| 321 |
+
"grad_norm": 4.276524543762207,
|
| 322 |
+
"learning_rate": 2e-05,
|
| 323 |
+
"loss": 1.7761,
|
| 324 |
+
"step": 90
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 0.92,
|
| 328 |
+
"grad_norm": 6.455582618713379,
|
| 329 |
+
"learning_rate": 2e-05,
|
| 330 |
+
"loss": 0.8323,
|
| 331 |
+
"step": 92
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 0.94,
|
| 335 |
+
"grad_norm": 1.1010710000991821,
|
| 336 |
+
"learning_rate": 2e-05,
|
| 337 |
+
"loss": 0.3133,
|
| 338 |
+
"step": 94
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"epoch": 0.96,
|
| 342 |
+
"grad_norm": 1.4408540725708008,
|
| 343 |
+
"learning_rate": 2e-05,
|
| 344 |
+
"loss": 0.2172,
|
| 345 |
+
"step": 96
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"epoch": 0.98,
|
| 349 |
+
"grad_norm": 0.2367064505815506,
|
| 350 |
+
"learning_rate": 2e-05,
|
| 351 |
+
"loss": 0.996,
|
| 352 |
+
"step": 98
|
| 353 |
+
},
|
| 354 |
+
{
|
| 355 |
+
"epoch": 1.0,
|
| 356 |
+
"grad_norm": 1.8455318212509155,
|
| 357 |
+
"learning_rate": 2e-05,
|
| 358 |
+
"loss": 0.4503,
|
| 359 |
+
"step": 100
|
| 360 |
+
},
|
| 361 |
+
{
|
| 362 |
+
"epoch": 1.0,
|
| 363 |
+
"step": 100,
|
| 364 |
+
"total_flos": 2.002019803149107e+16,
|
| 365 |
+
"train_loss": 0.714879515171051,
|
| 366 |
+
"train_runtime": 248.3761,
|
| 367 |
+
"train_samples_per_second": 1.61,
|
| 368 |
+
"train_steps_per_second": 0.403
|
| 369 |
+
}
|
| 370 |
+
],
|
| 371 |
+
"logging_steps": 2,
|
| 372 |
+
"max_steps": 100,
|
| 373 |
+
"num_input_tokens_seen": 0,
|
| 374 |
+
"num_train_epochs": 1,
|
| 375 |
+
"save_steps": 500,
|
| 376 |
+
"stateful_callbacks": {
|
| 377 |
+
"TrainerControl": {
|
| 378 |
+
"args": {
|
| 379 |
+
"should_epoch_stop": false,
|
| 380 |
+
"should_evaluate": false,
|
| 381 |
+
"should_log": false,
|
| 382 |
+
"should_save": false,
|
| 383 |
+
"should_training_stop": false
|
| 384 |
+
},
|
| 385 |
+
"attributes": {}
|
| 386 |
+
}
|
| 387 |
+
},
|
| 388 |
+
"total_flos": 2.002019803149107e+16,
|
| 389 |
+
"train_batch_size": 1,
|
| 390 |
+
"trial_name": null,
|
| 391 |
+
"trial_params": null
|
| 392 |
+
}
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8083c948c51172cb731618e51b90a12546b054bf8762835f1d8f32c30904a0d5
|
| 3 |
+
size 389170582
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1a0d9ba06d1bf534d64476ca8183bf94a7cdc5e70ec3d07c42837d157c9a4a0b
|
| 3 |
+
size 389170582
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e305ef4e1ba0d2ce1eabaa6f27d363d493e0800b1743aa78433ec86ed049f78
|
| 3 |
+
size 389170582
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6f79c8369cf454e0f47a9171dc1cabedfb911e727f8a2cc2373df780aae495e
|
| 3 |
+
size 389170582
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b279129dda29d997289d3ed732f3f71df77a02625d140feb5553a9b77a65f530
|
| 3 |
+
size 389170122
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c1115eb7ff110cda959261def5f6bb34bd3f6074286ec6891c811e4b5fb3fab9
|
| 3 |
+
size 389170582
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:886257f645888c835465772d39b989f229623bd4da1529f6b28ef14ae2ffd167
|
| 3 |
+
size 389170122
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:da72acd2878f27d6054be52695e145be79f4f1ed7b1fc8ce778cc5ef3ea2220c
|
| 3 |
+
size 389170122
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_trainer_state.json
ADDED
|
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 100,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.02,
|
| 13 |
+
"grad_norm": 2.3232831954956055,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 1.0249,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.04,
|
| 20 |
+
"grad_norm": 2.5343315601348877,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 0.8533,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.06,
|
| 27 |
+
"grad_norm": 0.305483341217041,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 0.3197,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.08,
|
| 34 |
+
"grad_norm": 0.517935037612915,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 1.1904,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.1,
|
| 41 |
+
"grad_norm": 5.153202533721924,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 2.3176,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.12,
|
| 48 |
+
"grad_norm": 3.158588171005249,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.7701,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.14,
|
| 55 |
+
"grad_norm": 0.5296018719673157,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.5261,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.16,
|
| 62 |
+
"grad_norm": 2.1179733276367188,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.5498,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.18,
|
| 69 |
+
"grad_norm": 1.0289561748504639,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.4771,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.2,
|
| 76 |
+
"grad_norm": 2.6146349906921387,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.9022,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.22,
|
| 83 |
+
"grad_norm": 3.7544004917144775,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.9887,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.24,
|
| 90 |
+
"grad_norm": 5.7699713706970215,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 1.5863,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.26,
|
| 97 |
+
"grad_norm": 6.096447944641113,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 1.7896,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.28,
|
| 104 |
+
"grad_norm": 2.786501169204712,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 0.9298,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.3,
|
| 111 |
+
"grad_norm": 3.055405616760254,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 1.8627,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.32,
|
| 118 |
+
"grad_norm": 1.4495849609375,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.5812,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.34,
|
| 125 |
+
"grad_norm": 0.9012460708618164,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.2027,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.36,
|
| 132 |
+
"grad_norm": 0.7602607011795044,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.5884,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.38,
|
| 139 |
+
"grad_norm": 1.8072483539581299,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 1.1502,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.4,
|
| 146 |
+
"grad_norm": 2.375112295150757,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 0.7885,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.42,
|
| 153 |
+
"grad_norm": 4.289775371551514,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 2.5796,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.44,
|
| 160 |
+
"grad_norm": 2.5001745223999023,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 0.6879,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.46,
|
| 167 |
+
"grad_norm": 1.169258952140808,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 0.8135,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.48,
|
| 174 |
+
"grad_norm": 2.9684996604919434,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 0.9738,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.5,
|
| 181 |
+
"grad_norm": 2.281973361968994,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 1.1334,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.52,
|
| 188 |
+
"grad_norm": 1.8545925617218018,
|
| 189 |
+
"learning_rate": 2e-05,
|
| 190 |
+
"loss": 0.7442,
|
| 191 |
+
"step": 52
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.54,
|
| 195 |
+
"grad_norm": 6.913942337036133,
|
| 196 |
+
"learning_rate": 2e-05,
|
| 197 |
+
"loss": 1.2487,
|
| 198 |
+
"step": 54
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.56,
|
| 202 |
+
"grad_norm": 3.960796356201172,
|
| 203 |
+
"learning_rate": 2e-05,
|
| 204 |
+
"loss": 2.6527,
|
| 205 |
+
"step": 56
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.58,
|
| 209 |
+
"grad_norm": 2.8161797523498535,
|
| 210 |
+
"learning_rate": 2e-05,
|
| 211 |
+
"loss": 1.2346,
|
| 212 |
+
"step": 58
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.6,
|
| 216 |
+
"grad_norm": 2.4226303100585938,
|
| 217 |
+
"learning_rate": 2e-05,
|
| 218 |
+
"loss": 1.4142,
|
| 219 |
+
"step": 60
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.62,
|
| 223 |
+
"grad_norm": 1.7144083976745605,
|
| 224 |
+
"learning_rate": 2e-05,
|
| 225 |
+
"loss": 0.6787,
|
| 226 |
+
"step": 62
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.64,
|
| 230 |
+
"grad_norm": 1.2450892925262451,
|
| 231 |
+
"learning_rate": 2e-05,
|
| 232 |
+
"loss": 0.6321,
|
| 233 |
+
"step": 64
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.66,
|
| 237 |
+
"grad_norm": 1.3646719455718994,
|
| 238 |
+
"learning_rate": 2e-05,
|
| 239 |
+
"loss": 0.6567,
|
| 240 |
+
"step": 66
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.68,
|
| 244 |
+
"grad_norm": 1.297370195388794,
|
| 245 |
+
"learning_rate": 2e-05,
|
| 246 |
+
"loss": 1.6927,
|
| 247 |
+
"step": 68
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.7,
|
| 251 |
+
"grad_norm": 1.9890066385269165,
|
| 252 |
+
"learning_rate": 2e-05,
|
| 253 |
+
"loss": 1.2573,
|
| 254 |
+
"step": 70
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.72,
|
| 258 |
+
"grad_norm": 4.081875801086426,
|
| 259 |
+
"learning_rate": 2e-05,
|
| 260 |
+
"loss": 1.2384,
|
| 261 |
+
"step": 72
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.74,
|
| 265 |
+
"grad_norm": 1.1986734867095947,
|
| 266 |
+
"learning_rate": 2e-05,
|
| 267 |
+
"loss": 0.3413,
|
| 268 |
+
"step": 74
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.76,
|
| 272 |
+
"grad_norm": 0.6766782402992249,
|
| 273 |
+
"learning_rate": 2e-05,
|
| 274 |
+
"loss": 0.3358,
|
| 275 |
+
"step": 76
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.78,
|
| 279 |
+
"grad_norm": 2.628237724304199,
|
| 280 |
+
"learning_rate": 2e-05,
|
| 281 |
+
"loss": 0.882,
|
| 282 |
+
"step": 78
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.8,
|
| 286 |
+
"grad_norm": 1.969377040863037,
|
| 287 |
+
"learning_rate": 2e-05,
|
| 288 |
+
"loss": 1.4629,
|
| 289 |
+
"step": 80
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.82,
|
| 293 |
+
"grad_norm": 4.985935688018799,
|
| 294 |
+
"learning_rate": 2e-05,
|
| 295 |
+
"loss": 0.8891,
|
| 296 |
+
"step": 82
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.84,
|
| 300 |
+
"grad_norm": 3.395092010498047,
|
| 301 |
+
"learning_rate": 2e-05,
|
| 302 |
+
"loss": 0.2739,
|
| 303 |
+
"step": 84
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.86,
|
| 307 |
+
"grad_norm": 0.5362117290496826,
|
| 308 |
+
"learning_rate": 2e-05,
|
| 309 |
+
"loss": 0.0327,
|
| 310 |
+
"step": 86
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.88,
|
| 314 |
+
"grad_norm": 1.663097858428955,
|
| 315 |
+
"learning_rate": 2e-05,
|
| 316 |
+
"loss": 1.3678,
|
| 317 |
+
"step": 88
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.9,
|
| 321 |
+
"grad_norm": 8.288853645324707,
|
| 322 |
+
"learning_rate": 2e-05,
|
| 323 |
+
"loss": 0.8778,
|
| 324 |
+
"step": 90
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 0.92,
|
| 328 |
+
"grad_norm": 1.423278570175171,
|
| 329 |
+
"learning_rate": 2e-05,
|
| 330 |
+
"loss": 0.9577,
|
| 331 |
+
"step": 92
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 0.94,
|
| 335 |
+
"grad_norm": 2.7824862003326416,
|
| 336 |
+
"learning_rate": 2e-05,
|
| 337 |
+
"loss": 1.774,
|
| 338 |
+
"step": 94
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"epoch": 0.96,
|
| 342 |
+
"grad_norm": 0.8134786486625671,
|
| 343 |
+
"learning_rate": 2e-05,
|
| 344 |
+
"loss": 0.3738,
|
| 345 |
+
"step": 96
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"epoch": 0.98,
|
| 349 |
+
"grad_norm": 1.9615880250930786,
|
| 350 |
+
"learning_rate": 2e-05,
|
| 351 |
+
"loss": 0.6358,
|
| 352 |
+
"step": 98
|
| 353 |
+
},
|
| 354 |
+
{
|
| 355 |
+
"epoch": 1.0,
|
| 356 |
+
"grad_norm": 2.486088752746582,
|
| 357 |
+
"learning_rate": 2e-05,
|
| 358 |
+
"loss": 0.4703,
|
| 359 |
+
"step": 100
|
| 360 |
+
},
|
| 361 |
+
{
|
| 362 |
+
"epoch": 1.0,
|
| 363 |
+
"step": 100,
|
| 364 |
+
"total_flos": 2.0033979121926144e+16,
|
| 365 |
+
"train_loss": 0.9942560279369355,
|
| 366 |
+
"train_runtime": 250.7882,
|
| 367 |
+
"train_samples_per_second": 1.595,
|
| 368 |
+
"train_steps_per_second": 0.399
|
| 369 |
+
}
|
| 370 |
+
],
|
| 371 |
+
"logging_steps": 2,
|
| 372 |
+
"max_steps": 100,
|
| 373 |
+
"num_input_tokens_seen": 0,
|
| 374 |
+
"num_train_epochs": 1,
|
| 375 |
+
"save_steps": 500,
|
| 376 |
+
"stateful_callbacks": {
|
| 377 |
+
"TrainerControl": {
|
| 378 |
+
"args": {
|
| 379 |
+
"should_epoch_stop": false,
|
| 380 |
+
"should_evaluate": false,
|
| 381 |
+
"should_log": false,
|
| 382 |
+
"should_save": false,
|
| 383 |
+
"should_training_stop": false
|
| 384 |
+
},
|
| 385 |
+
"attributes": {}
|
| 386 |
+
}
|
| 387 |
+
},
|
| 388 |
+
"total_flos": 2.0033979121926144e+16,
|
| 389 |
+
"train_batch_size": 1,
|
| 390 |
+
"trial_name": null,
|
| 391 |
+
"trial_params": null
|
| 392 |
+
}
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6337f27b607152eee0d1b4ea2358cf08126147b4914b73bd5a8c8903c1ab0ced
|
| 3 |
+
size 389170582
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fc2c476c9efb2d640b2d4693eb7ca33ed8a81427598a6ce8616fa53381917a31
|
| 3 |
+
size 389170582
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:932993b3ff1dc5e604696e5e767669c7c0b93343ea098462f7627f54cd9eba93
|
| 3 |
+
size 389170582
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c75322d52cd772b2db81ae2fa8c3c26dadfd35e55af48985ff630b2144b0af6
|
| 3 |
+
size 389170582
|
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:56d327dca77c8944915eddac9b6933354e5d84bce5c15992ac927cdd1e864ad3
|
| 3 |
+
size 389170122
|