Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round10.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round12.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round15.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round17.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round2.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round20.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round5.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round7.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_trainer_state.json +217 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round10.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round12.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round15.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round17.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round2.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round20.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round5.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round7.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_trainer_state.json +217 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round10.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round12.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round15.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round17.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round2.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round20.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round5.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round7.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_trainer_state.json +217 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round10.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round12.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round15.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round17.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round2.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round20.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round5.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round7.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_trainer_state.json +217 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round10.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round12.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round15.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round17.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round2.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round20.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round5.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round7.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_trainer_state.json +217 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round10.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round12.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round15.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round17.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round2.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:56fc85170f7c52f1e9ac2402184118e54e979bc593a942b62491a368125b3962
|
| 3 |
+
size 360880622
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab8e8f05520a5e213d2233c3d39fea5396c0fea9f71fc5793e8c3da62cfc2f30
|
| 3 |
+
size 360880622
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dcaa315ca2198284cb385a2533d6ce3049142497e3a92f3bb108e40a31c9d57e
|
| 3 |
+
size 360880622
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ae342f15c8941cf7b73da8583b0354f25dad9a4f94509e83df8f750aa6e6e14
|
| 3 |
+
size 360880622
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e04219236af11cd00523eecab74e6d8eeade903c0afade90763ed8a1afc1384
|
| 3 |
+
size 360880106
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:40c03aede173c98dfe0cddeda01c173a5c7c8b81fe08b4c72bb1df29206d6f5c
|
| 3 |
+
size 360880622
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:07c2403d703ac282fb4c80815544a86be3ae64153a3106271ee403e7c98f9507
|
| 3 |
+
size 360880106
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b343b9655b873be9c938471eaeddeb2d0327e041ff7b6ecd42e588cb60ef7a49
|
| 3 |
+
size 360880106
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_trainer_state.json
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 2.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 50,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.08,
|
| 13 |
+
"grad_norm": 1.8673917055130005,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.5411,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.16,
|
| 20 |
+
"grad_norm": 4.476343631744385,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 1.0286,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.24,
|
| 27 |
+
"grad_norm": 2.270127296447754,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 0.6094,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.32,
|
| 34 |
+
"grad_norm": 4.0221686363220215,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 1.0339,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.4,
|
| 41 |
+
"grad_norm": 1.7531377077102661,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 0.6154,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.48,
|
| 48 |
+
"grad_norm": 2.4782919883728027,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.3927,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.56,
|
| 55 |
+
"grad_norm": 3.082655668258667,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.5301,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.64,
|
| 62 |
+
"grad_norm": 1.6040374040603638,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.5479,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.72,
|
| 69 |
+
"grad_norm": 3.0767765045166016,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.9126,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.8,
|
| 76 |
+
"grad_norm": 2.117967128753662,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.5114,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.88,
|
| 83 |
+
"grad_norm": 4.502895355224609,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.7164,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.96,
|
| 90 |
+
"grad_norm": 4.821036338806152,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 1.526,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 1.04,
|
| 97 |
+
"grad_norm": 2.051273822784424,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 0.6118,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 1.12,
|
| 104 |
+
"grad_norm": 3.2917582988739014,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 0.6338,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.2,
|
| 111 |
+
"grad_norm": 4.47807502746582,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 0.7823,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 1.28,
|
| 118 |
+
"grad_norm": 6.614592552185059,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 1.3987,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 1.36,
|
| 125 |
+
"grad_norm": 3.840501546859741,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.6544,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 1.44,
|
| 132 |
+
"grad_norm": 2.3704311847686768,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.8242,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 1.52,
|
| 139 |
+
"grad_norm": 4.5160675048828125,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 0.9385,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 1.6,
|
| 146 |
+
"grad_norm": 3.4682557582855225,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 1.2132,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 1.68,
|
| 153 |
+
"grad_norm": 2.3830385208129883,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.6107,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 1.76,
|
| 160 |
+
"grad_norm": 3.068246603012085,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 0.6467,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 1.84,
|
| 167 |
+
"grad_norm": 3.524432897567749,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 0.602,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 1.92,
|
| 174 |
+
"grad_norm": 3.5651185512542725,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 0.7029,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 2.0,
|
| 181 |
+
"grad_norm": 3.3311729431152344,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 0.5356,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 2.0,
|
| 188 |
+
"step": 50,
|
| 189 |
+
"total_flos": 5924252351987712.0,
|
| 190 |
+
"train_loss": 0.7648095703125,
|
| 191 |
+
"train_runtime": 151.5084,
|
| 192 |
+
"train_samples_per_second": 1.32,
|
| 193 |
+
"train_steps_per_second": 0.33
|
| 194 |
+
}
|
| 195 |
+
],
|
| 196 |
+
"logging_steps": 2,
|
| 197 |
+
"max_steps": 50,
|
| 198 |
+
"num_input_tokens_seen": 0,
|
| 199 |
+
"num_train_epochs": 1,
|
| 200 |
+
"save_steps": 500,
|
| 201 |
+
"stateful_callbacks": {
|
| 202 |
+
"TrainerControl": {
|
| 203 |
+
"args": {
|
| 204 |
+
"should_epoch_stop": false,
|
| 205 |
+
"should_evaluate": false,
|
| 206 |
+
"should_log": false,
|
| 207 |
+
"should_save": false,
|
| 208 |
+
"should_training_stop": false
|
| 209 |
+
},
|
| 210 |
+
"attributes": {}
|
| 211 |
+
}
|
| 212 |
+
},
|
| 213 |
+
"total_flos": 5924252351987712.0,
|
| 214 |
+
"train_batch_size": 1,
|
| 215 |
+
"trial_name": null,
|
| 216 |
+
"trial_params": null
|
| 217 |
+
}
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:334b9877772a1e30d63aa38a00bfe896e15d6dc09c88dfa1ccab4979c6b4658d
|
| 3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cba2b604640bee7060c99d65ff75b14153ed90fe88af5b278be99184f716b48b
|
| 3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:208506304141fe31fb85d268825264673b4b52ed6aaecb961f3f3d21d3d2fd7f
|
| 3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5a74cebc2d0c93eb7caf98555a9f3baffce9ed8003e2895830d5109ebb38fdc2
|
| 3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ffdd84076af2f4ac0d6e4d0bc7ec7bba103570f06c16bb3798413a752b415ffd
|
| 3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2ee1ccda5a7665844256546c440107724cb0bdfb8b64df7963f3a7fab67ab32e
|
| 3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:22998a139bd48538fe5906098fa3bb1965b44f2ebfecf3909023a90dc7292ba0
|
| 3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:35a5c1a43f28032b066c673c6abfe9651e83a59b76cb6708a69fdfd75106fd52
|
| 3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_trainer_state.json
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 2.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 50,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.08,
|
| 13 |
+
"grad_norm": 4.583834648132324,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.6835,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.16,
|
| 20 |
+
"grad_norm": 4.129000186920166,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 0.6311,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.24,
|
| 27 |
+
"grad_norm": 1.4998234510421753,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 0.5363,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.32,
|
| 34 |
+
"grad_norm": 2.6047112941741943,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.6187,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.4,
|
| 41 |
+
"grad_norm": 3.2830750942230225,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 0.601,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.48,
|
| 48 |
+
"grad_norm": 2.440368413925171,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.5193,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.56,
|
| 55 |
+
"grad_norm": 4.117373466491699,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.6884,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.64,
|
| 62 |
+
"grad_norm": 5.755882740020752,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.731,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.72,
|
| 69 |
+
"grad_norm": 1.5773918628692627,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.4884,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.8,
|
| 76 |
+
"grad_norm": 2.3141613006591797,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.55,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.88,
|
| 83 |
+
"grad_norm": 1.520458698272705,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.5297,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.96,
|
| 90 |
+
"grad_norm": 2.141937732696533,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 0.5958,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 1.04,
|
| 97 |
+
"grad_norm": 3.5650408267974854,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 0.6784,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 1.12,
|
| 104 |
+
"grad_norm": 3.2956409454345703,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 0.7442,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.2,
|
| 111 |
+
"grad_norm": 2.7933690547943115,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 0.6616,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 1.28,
|
| 118 |
+
"grad_norm": 4.000720977783203,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.4821,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 1.36,
|
| 125 |
+
"grad_norm": 5.566102981567383,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.4605,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 1.44,
|
| 132 |
+
"grad_norm": 5.742500305175781,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.4153,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 1.52,
|
| 139 |
+
"grad_norm": 4.2542405128479,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 0.4783,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 1.6,
|
| 146 |
+
"grad_norm": 6.833155155181885,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 0.6371,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 1.68,
|
| 153 |
+
"grad_norm": 7.5326313972473145,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.7853,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 1.76,
|
| 160 |
+
"grad_norm": 2.1974692344665527,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 0.7622,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 1.84,
|
| 167 |
+
"grad_norm": 2.442321538925171,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 0.5715,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 1.92,
|
| 174 |
+
"grad_norm": 2.5812339782714844,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 0.4764,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 2.0,
|
| 181 |
+
"grad_norm": 5.182116508483887,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 0.6237,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 2.0,
|
| 188 |
+
"step": 50,
|
| 189 |
+
"total_flos": 1.4241485192953856e+16,
|
| 190 |
+
"train_loss": 0.5979843139648438,
|
| 191 |
+
"train_runtime": 252.1553,
|
| 192 |
+
"train_samples_per_second": 0.793,
|
| 193 |
+
"train_steps_per_second": 0.198
|
| 194 |
+
}
|
| 195 |
+
],
|
| 196 |
+
"logging_steps": 2,
|
| 197 |
+
"max_steps": 50,
|
| 198 |
+
"num_input_tokens_seen": 0,
|
| 199 |
+
"num_train_epochs": 1,
|
| 200 |
+
"save_steps": 500,
|
| 201 |
+
"stateful_callbacks": {
|
| 202 |
+
"TrainerControl": {
|
| 203 |
+
"args": {
|
| 204 |
+
"should_epoch_stop": false,
|
| 205 |
+
"should_evaluate": false,
|
| 206 |
+
"should_log": false,
|
| 207 |
+
"should_save": false,
|
| 208 |
+
"should_training_stop": false
|
| 209 |
+
},
|
| 210 |
+
"attributes": {}
|
| 211 |
+
}
|
| 212 |
+
},
|
| 213 |
+
"total_flos": 1.4241485192953856e+16,
|
| 214 |
+
"train_batch_size": 1,
|
| 215 |
+
"trial_name": null,
|
| 216 |
+
"trial_params": null
|
| 217 |
+
}
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d06991855fd1b69b3806b8d1eb324b9c0201e478036fd3b65d56b34b2787e5ad
|
| 3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e6628ce28b478d4a5c9a4cae41524f1b64d84dc93bf7a2f34161dca94fad10f
|
| 3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8695c1f94b299b5faedaedf37b7a570f7b23e7cd1fe1da35b023a3b1ced9d2f6
|
| 3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:daa8a795000144eef55b204b54325181dd9bda9e639727b62cddafd8082ca064
|
| 3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6489762c514a53fca0821096bf89c0f31cbaf5e46df0959567c178571ec0560
|
| 3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e9a52a2036d00c14f8d20c1cfb605b021050b0d6e423a449f39ea54b7350440d
|
| 3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5914d4ab3c8d4874bebfbc1901b85323aed556b0db25307d10b91caf203cedb0
|
| 3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c81a173f5ad5a59863f3ca8f811af06d562e8dbe2fe45ef5fba724b5e8c164a
|
| 3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_trainer_state.json
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 2.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 50,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.08,
|
| 13 |
+
"grad_norm": 4.558183193206787,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.5488,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.16,
|
| 20 |
+
"grad_norm": 2.947014331817627,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 0.3348,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.24,
|
| 27 |
+
"grad_norm": 1.299033761024475,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 0.3584,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.32,
|
| 34 |
+
"grad_norm": 3.6182351112365723,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.3221,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.4,
|
| 41 |
+
"grad_norm": 5.334591865539551,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 0.4843,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.48,
|
| 48 |
+
"grad_norm": 8.80336856842041,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.6279,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.56,
|
| 55 |
+
"grad_norm": 11.382059097290039,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.5401,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.64,
|
| 62 |
+
"grad_norm": 11.98203182220459,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.717,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.72,
|
| 69 |
+
"grad_norm": 6.630352020263672,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.697,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.8,
|
| 76 |
+
"grad_norm": 5.681330680847168,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.3569,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.88,
|
| 83 |
+
"grad_norm": 5.276439189910889,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.6034,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.96,
|
| 90 |
+
"grad_norm": 4.0495524406433105,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 0.2887,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 1.04,
|
| 97 |
+
"grad_norm": 10.429891586303711,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 1.06,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 1.12,
|
| 104 |
+
"grad_norm": 5.012072563171387,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 0.6967,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.2,
|
| 111 |
+
"grad_norm": 8.718036651611328,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 0.7567,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 1.28,
|
| 118 |
+
"grad_norm": 4.441227912902832,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.3985,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 1.36,
|
| 125 |
+
"grad_norm": 3.394705057144165,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.7157,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 1.44,
|
| 132 |
+
"grad_norm": 5.729661464691162,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.5636,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 1.52,
|
| 139 |
+
"grad_norm": 9.694987297058105,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 0.9611,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 1.6,
|
| 146 |
+
"grad_norm": 12.702645301818848,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 0.7439,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 1.68,
|
| 153 |
+
"grad_norm": 5.634037017822266,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.8179,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 1.76,
|
| 160 |
+
"grad_norm": 5.084414005279541,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 0.7732,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 1.84,
|
| 167 |
+
"grad_norm": 2.3128256797790527,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 0.5043,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 1.92,
|
| 174 |
+
"grad_norm": 6.491540431976318,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 0.6021,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 2.0,
|
| 181 |
+
"grad_norm": 4.465042591094971,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 1.0634,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 2.0,
|
| 188 |
+
"step": 50,
|
| 189 |
+
"total_flos": 1.6198255784230912e+16,
|
| 190 |
+
"train_loss": 0.6214560890197753,
|
| 191 |
+
"train_runtime": 274.6327,
|
| 192 |
+
"train_samples_per_second": 0.728,
|
| 193 |
+
"train_steps_per_second": 0.182
|
| 194 |
+
}
|
| 195 |
+
],
|
| 196 |
+
"logging_steps": 2,
|
| 197 |
+
"max_steps": 50,
|
| 198 |
+
"num_input_tokens_seen": 0,
|
| 199 |
+
"num_train_epochs": 1,
|
| 200 |
+
"save_steps": 500,
|
| 201 |
+
"stateful_callbacks": {
|
| 202 |
+
"TrainerControl": {
|
| 203 |
+
"args": {
|
| 204 |
+
"should_epoch_stop": false,
|
| 205 |
+
"should_evaluate": false,
|
| 206 |
+
"should_log": false,
|
| 207 |
+
"should_save": false,
|
| 208 |
+
"should_training_stop": false
|
| 209 |
+
},
|
| 210 |
+
"attributes": {}
|
| 211 |
+
}
|
| 212 |
+
},
|
| 213 |
+
"total_flos": 1.6198255784230912e+16,
|
| 214 |
+
"train_batch_size": 1,
|
| 215 |
+
"trial_name": null,
|
| 216 |
+
"trial_params": null
|
| 217 |
+
}
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1053397e04c343b02320d481ade7174ca2efdfe41829a60dfbb49ebecd579803
|
| 3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bea8e38baaf09ee6510b2aef0a46ddf0be5ca9330c1cae666ca645c7b432ab4e
|
| 3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0dbc3a8d9729d355f15cade994d232b6c0f017a20aa8d73b98f17c228c84361b
|
| 3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09d1655d128c53da006db567fea2fc7434b9b8044b6f845d52b8db99dd2320a4
|
| 3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:29211e918a07e23fe407aeeeb62d9d534e126d7a2a51b8c8f7a1ff088c95be5d
|
| 3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:47ea3bcbc18afe15bb2245d06bd000d2e56ecf6f5fe6f33c6ad04379250be315
|
| 3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8a6fe3cd7816645ae9460f490fb1037ffd4282c328e9bd8c17e84d97e586b4e7
|
| 3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:100ad256bc9de8669440660abe10a6c5d27f52c4a68a67e134bafa54426c81ac
|
| 3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_trainer_state.json
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 2.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 50,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.08,
|
| 13 |
+
"grad_norm": 1.39784574508667,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.9756,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.16,
|
| 20 |
+
"grad_norm": 2.8972103595733643,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 1.0298,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.24,
|
| 27 |
+
"grad_norm": 1.9185339212417603,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 0.686,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.32,
|
| 34 |
+
"grad_norm": 1.6782031059265137,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.7958,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.4,
|
| 41 |
+
"grad_norm": 1.4360851049423218,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 0.828,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.48,
|
| 48 |
+
"grad_norm": 2.031978130340576,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.7959,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.56,
|
| 55 |
+
"grad_norm": 2.223369836807251,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.9572,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.64,
|
| 62 |
+
"grad_norm": 1.5497606992721558,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.5985,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.72,
|
| 69 |
+
"grad_norm": 3.152341842651367,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.9308,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.8,
|
| 76 |
+
"grad_norm": 1.995869755744934,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.8724,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.88,
|
| 83 |
+
"grad_norm": 2.4577951431274414,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.7107,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.96,
|
| 90 |
+
"grad_norm": 2.686221122741699,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 0.782,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 1.04,
|
| 97 |
+
"grad_norm": 2.1014060974121094,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 0.7136,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 1.12,
|
| 104 |
+
"grad_norm": 3.085268974304199,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 0.723,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.2,
|
| 111 |
+
"grad_norm": 4.33972692489624,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 1.0076,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 1.28,
|
| 118 |
+
"grad_norm": 6.186117172241211,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.9203,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 1.36,
|
| 125 |
+
"grad_norm": 2.994359016418457,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.6973,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 1.44,
|
| 132 |
+
"grad_norm": 5.756070613861084,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 1.0156,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 1.52,
|
| 139 |
+
"grad_norm": 1.8890563249588013,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 0.6883,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 1.6,
|
| 146 |
+
"grad_norm": 2.047783136367798,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 0.6198,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 1.68,
|
| 153 |
+
"grad_norm": 3.1189417839050293,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.8165,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 1.76,
|
| 160 |
+
"grad_norm": 2.0808229446411133,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 0.6428,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 1.84,
|
| 167 |
+
"grad_norm": 1.7849960327148438,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 0.6548,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 1.92,
|
| 174 |
+
"grad_norm": 1.386960506439209,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 0.6403,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 2.0,
|
| 181 |
+
"grad_norm": 1.6751240491867065,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 0.8999,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 2.0,
|
| 188 |
+
"step": 50,
|
| 189 |
+
"total_flos": 2.096063809637581e+16,
|
| 190 |
+
"train_loss": 0.8000931072235108,
|
| 191 |
+
"train_runtime": 272.324,
|
| 192 |
+
"train_samples_per_second": 0.734,
|
| 193 |
+
"train_steps_per_second": 0.184
|
| 194 |
+
}
|
| 195 |
+
],
|
| 196 |
+
"logging_steps": 2,
|
| 197 |
+
"max_steps": 50,
|
| 198 |
+
"num_input_tokens_seen": 0,
|
| 199 |
+
"num_train_epochs": 1,
|
| 200 |
+
"save_steps": 500,
|
| 201 |
+
"stateful_callbacks": {
|
| 202 |
+
"TrainerControl": {
|
| 203 |
+
"args": {
|
| 204 |
+
"should_epoch_stop": false,
|
| 205 |
+
"should_evaluate": false,
|
| 206 |
+
"should_log": false,
|
| 207 |
+
"should_save": false,
|
| 208 |
+
"should_training_stop": false
|
| 209 |
+
},
|
| 210 |
+
"attributes": {}
|
| 211 |
+
}
|
| 212 |
+
},
|
| 213 |
+
"total_flos": 2.096063809637581e+16,
|
| 214 |
+
"train_batch_size": 1,
|
| 215 |
+
"trial_name": null,
|
| 216 |
+
"trial_params": null
|
| 217 |
+
}
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:688a3b8604720e57257db0cf64bd1dd9f30ca73277af465d7db58ae4ae98ffa9
|
| 3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff353918ed1d60324e386bfdc118226bf909d190315a8921ef2e8baa24758a8b
|
| 3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:95a6f306d0adb5c0e05ec683bf401fe4c04d8cac99b9337deec12ee1d0761614
|
| 3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a2212526d781487ec5c8161ff06df8f26308669cb418445de39cc2da74fc005
|
| 3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a2b51e1c7dedf009ec22fcd53095edfc51e52c3502127c959f9f27aab2fd74e3
|
| 3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9abb78bfcdfa5b5bc3e68940b635fa9f3acda70c69ce4dba811709e0c3d59b74
|
| 3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9ebf24c9b4a2f70e078953f2222de038fffc31cbf26633368fbe572f4b844653
|
| 3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:77ffc1003199cdb9ce5678a067107dda4bb11f43ee2f32137a9bac5ba6d44053
|
| 3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_trainer_state.json
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 2.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 50,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.08,
|
| 13 |
+
"grad_norm": 1.4425019025802612,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.5973,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.16,
|
| 20 |
+
"grad_norm": 1.215862512588501,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 0.6856,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.24,
|
| 27 |
+
"grad_norm": 0.7811527252197266,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 0.5332,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.32,
|
| 34 |
+
"grad_norm": 1.1735533475875854,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.705,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.4,
|
| 41 |
+
"grad_norm": 2.8437013626098633,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 0.7182,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.48,
|
| 48 |
+
"grad_norm": 1.6411240100860596,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.4589,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.56,
|
| 55 |
+
"grad_norm": 1.3925532102584839,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.8512,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.64,
|
| 62 |
+
"grad_norm": 3.309992551803589,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.9773,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.72,
|
| 69 |
+
"grad_norm": 2.3441174030303955,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.4745,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.8,
|
| 76 |
+
"grad_norm": 1.3988115787506104,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.3502,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.88,
|
| 83 |
+
"grad_norm": 1.3023474216461182,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.5154,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.96,
|
| 90 |
+
"grad_norm": 2.3900015354156494,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 0.8253,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 1.04,
|
| 97 |
+
"grad_norm": 5.1887125968933105,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 1.2769,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 1.12,
|
| 104 |
+
"grad_norm": 3.921790599822998,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 0.9044,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.2,
|
| 111 |
+
"grad_norm": 2.175091505050659,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 0.6082,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 1.28,
|
| 118 |
+
"grad_norm": 2.267071485519409,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.8991,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 1.36,
|
| 125 |
+
"grad_norm": 3.1593542098999023,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.6995,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 1.44,
|
| 132 |
+
"grad_norm": 3.4321177005767822,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.7938,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 1.52,
|
| 139 |
+
"grad_norm": 2.110840082168579,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 1.1223,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 1.6,
|
| 146 |
+
"grad_norm": 3.0541820526123047,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 1.041,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 1.68,
|
| 153 |
+
"grad_norm": 3.609933614730835,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.7096,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 1.76,
|
| 160 |
+
"grad_norm": 1.9673751592636108,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 0.7004,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 1.84,
|
| 167 |
+
"grad_norm": 12.000975608825684,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 0.5836,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 1.92,
|
| 174 |
+
"grad_norm": 3.2175731658935547,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 0.824,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 2.0,
|
| 181 |
+
"grad_norm": 3.6081643104553223,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 0.8113,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 2.0,
|
| 188 |
+
"step": 50,
|
| 189 |
+
"total_flos": 2.184824952664883e+16,
|
| 190 |
+
"train_loss": 0.7466500663757324,
|
| 191 |
+
"train_runtime": 278.7893,
|
| 192 |
+
"train_samples_per_second": 0.717,
|
| 193 |
+
"train_steps_per_second": 0.179
|
| 194 |
+
}
|
| 195 |
+
],
|
| 196 |
+
"logging_steps": 2,
|
| 197 |
+
"max_steps": 50,
|
| 198 |
+
"num_input_tokens_seen": 0,
|
| 199 |
+
"num_train_epochs": 1,
|
| 200 |
+
"save_steps": 500,
|
| 201 |
+
"stateful_callbacks": {
|
| 202 |
+
"TrainerControl": {
|
| 203 |
+
"args": {
|
| 204 |
+
"should_epoch_stop": false,
|
| 205 |
+
"should_evaluate": false,
|
| 206 |
+
"should_log": false,
|
| 207 |
+
"should_save": false,
|
| 208 |
+
"should_training_stop": false
|
| 209 |
+
},
|
| 210 |
+
"attributes": {}
|
| 211 |
+
}
|
| 212 |
+
},
|
| 213 |
+
"total_flos": 2.184824952664883e+16,
|
| 214 |
+
"train_batch_size": 1,
|
| 215 |
+
"trial_name": null,
|
| 216 |
+
"trial_params": null
|
| 217 |
+
}
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:23f128e99f0cacbc599ce4a7531221c9fbbb4cfb22fb3910e0fdc28ef2a2dbba
|
| 3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ad23f8d514321daa4aac439f617c0c5875a6d1c6d3ba671988a7b0ee07963eaf
|
| 3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5eb433a9db64a00648ed4010a157b6232e6b4a7c2139de17cb01309403eadfe4
|
| 3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e743a6d1bfa05bf565720cd577338a82db7a896ef110f90486e00d96b73e6d96
|
| 3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d526931e5403c2c186741e4e1fbc95717cacf149c4f17c90939a694badf824f6
|
| 3 |
+
size 778341034
|