Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round10.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round12.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round15.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round17.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round2.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round20.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round5.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round7.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_trainer_state.json +217 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round10.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round12.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round15.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round17.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round2.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round20.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round5.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round7.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_trainer_state.json +217 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round10.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round12.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round15.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round17.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round2.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round20.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round5.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round7.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_trainer_state.json +217 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round10.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round12.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round15.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round17.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round2.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round20.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round5.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round7.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_trainer_state.json +217 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round10.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round12.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round15.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round17.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round2.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round20.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round5.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round7.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_trainer_state.json +217 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round10.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round12.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round15.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round17.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round2.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9bf1bd922105b202c1332673b2650c3de98e3475fb786f5b8afb11368e5b6ecb
|
| 3 |
+
size 360880622
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b7719e20ac179b3505bc16fe3d26557236f289d0aafd2a963fe447a9af6798b3
|
| 3 |
+
size 360880622
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c93ab2b8d890ac3fe7e1ea0de67b9123a0488d5bd7cb7109ea6c97edcf22c3d
|
| 3 |
+
size 360880622
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9d18901eb260c181d30c1e1be564dd71da17a34f6bf6c333a89856341a4b907c
|
| 3 |
+
size 360880622
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:85c6f07499b6d135cb2c16ce612f45f129974177654fe29012b34255fd4e71ec
|
| 3 |
+
size 360880106
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7cf6d5f3802665484ccd7c41c90f01c08c8eb716f3bee52a676c0a2ea8f4cafa
|
| 3 |
+
size 360880622
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef8a5ad5233a4d504fc5a5368036b3332e83946a37160464ca568222cafaecdf
|
| 3 |
+
size 360880106
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:766c6490fd2b03599de1b54601d6a6664ef9260808f2c2b8c964dff572c8a955
|
| 3 |
+
size 360880106
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_trainer_state.json
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 2.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 50,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.08,
|
| 13 |
+
"grad_norm": 3.284128189086914,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.0793,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.16,
|
| 20 |
+
"grad_norm": 3.4005775451660156,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 0.0915,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.24,
|
| 27 |
+
"grad_norm": 1.777352213859558,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 0.043,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.32,
|
| 34 |
+
"grad_norm": 4.783378601074219,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.8805,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.4,
|
| 41 |
+
"grad_norm": 7.2065749168396,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 0.3896,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.48,
|
| 48 |
+
"grad_norm": 1.6766301393508911,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.6725,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.56,
|
| 55 |
+
"grad_norm": 15.141353607177734,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.6133,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.64,
|
| 62 |
+
"grad_norm": 17.0766544342041,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.8163,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.72,
|
| 69 |
+
"grad_norm": 9.064810752868652,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.6379,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.8,
|
| 76 |
+
"grad_norm": 11.561620712280273,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.3389,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.88,
|
| 83 |
+
"grad_norm": 10.447920799255371,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.1638,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.96,
|
| 90 |
+
"grad_norm": 7.2391462326049805,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 0.4152,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 1.04,
|
| 97 |
+
"grad_norm": 6.883174896240234,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 0.1845,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 1.12,
|
| 104 |
+
"grad_norm": 2.764719247817993,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 0.2519,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.2,
|
| 111 |
+
"grad_norm": 6.025578498840332,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 0.1062,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 1.28,
|
| 118 |
+
"grad_norm": 12.170540809631348,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.6212,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 1.36,
|
| 125 |
+
"grad_norm": 7.470730781555176,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.1638,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 1.44,
|
| 132 |
+
"grad_norm": 11.806479454040527,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.8462,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 1.52,
|
| 139 |
+
"grad_norm": 1.250801920890808,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 0.1253,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 1.6,
|
| 146 |
+
"grad_norm": 12.122519493103027,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 0.3373,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 1.68,
|
| 153 |
+
"grad_norm": 3.9410598278045654,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.0798,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 1.76,
|
| 160 |
+
"grad_norm": 3.405806064605713,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 0.1707,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 1.84,
|
| 167 |
+
"grad_norm": 29.996286392211914,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 0.9968,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 1.92,
|
| 174 |
+
"grad_norm": 5.970231533050537,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 0.2291,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 2.0,
|
| 181 |
+
"grad_norm": 0.9755194187164307,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 0.2356,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 2.0,
|
| 188 |
+
"step": 50,
|
| 189 |
+
"total_flos": 2184626743279616.0,
|
| 190 |
+
"train_loss": 0.3796170651912689,
|
| 191 |
+
"train_runtime": 115.2896,
|
| 192 |
+
"train_samples_per_second": 1.735,
|
| 193 |
+
"train_steps_per_second": 0.434
|
| 194 |
+
}
|
| 195 |
+
],
|
| 196 |
+
"logging_steps": 2,
|
| 197 |
+
"max_steps": 50,
|
| 198 |
+
"num_input_tokens_seen": 0,
|
| 199 |
+
"num_train_epochs": 1,
|
| 200 |
+
"save_steps": 500,
|
| 201 |
+
"stateful_callbacks": {
|
| 202 |
+
"TrainerControl": {
|
| 203 |
+
"args": {
|
| 204 |
+
"should_epoch_stop": false,
|
| 205 |
+
"should_evaluate": false,
|
| 206 |
+
"should_log": false,
|
| 207 |
+
"should_save": false,
|
| 208 |
+
"should_training_stop": false
|
| 209 |
+
},
|
| 210 |
+
"attributes": {}
|
| 211 |
+
}
|
| 212 |
+
},
|
| 213 |
+
"total_flos": 2184626743279616.0,
|
| 214 |
+
"train_batch_size": 1,
|
| 215 |
+
"trial_name": null,
|
| 216 |
+
"trial_params": null
|
| 217 |
+
}
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6011d6315d2580827738242cee8ef75d254b4cea2dd8679d0473b7a9db54cc9f
|
| 3 |
+
size 360880622
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ad508db3f5914917b8e2316d601bde72f9dd11c3f02fda6fd948437630f223e8
|
| 3 |
+
size 360880622
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d5a375f55d8fb1c9f9b8b9d71e2f41433471b22a3f71c7c0ece807ff8703d3b
|
| 3 |
+
size 360880622
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0dbffa1ada429cae02a98572d4793e4f33e169588a8884543c9edfa076d3016
|
| 3 |
+
size 360880622
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4840c34ce37f38ac86c58160329347c4f31b8a0d2ca8c6f126c58a50979f8da0
|
| 3 |
+
size 360880106
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:40a6e5dae1e8232e195fdc1aedc64fecf22d65266fc23ce9c8416e8884fb3793
|
| 3 |
+
size 360880622
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:60bd84c9ce839d36a98d585b4eb739425ac0dda9019d51f2c95a11922d6e2fc0
|
| 3 |
+
size 360880106
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:778f95e12407ad50b015c33069c9a39c1140bc5a2a856c0e9f7fff2badc47c7a
|
| 3 |
+
size 360880106
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_trainer_state.json
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 2.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 50,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.08,
|
| 13 |
+
"grad_norm": 1.1501377820968628,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.0192,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.16,
|
| 20 |
+
"grad_norm": 0.30648428201675415,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 0.0154,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.24,
|
| 27 |
+
"grad_norm": 0.0933217778801918,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 0.1095,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.32,
|
| 34 |
+
"grad_norm": 5.344268321990967,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.0769,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.4,
|
| 41 |
+
"grad_norm": 0.127670556306839,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 0.0712,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.48,
|
| 48 |
+
"grad_norm": 0.12052742391824722,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.0189,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.56,
|
| 55 |
+
"grad_norm": 0.04328983649611473,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.0113,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.64,
|
| 62 |
+
"grad_norm": 0.04012615233659744,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.0542,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.72,
|
| 69 |
+
"grad_norm": 0.18714116513729095,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.0121,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.8,
|
| 76 |
+
"grad_norm": 3.1324093341827393,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.0418,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.88,
|
| 83 |
+
"grad_norm": 0.05844057723879814,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.0108,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.96,
|
| 90 |
+
"grad_norm": 6.132259368896484,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 0.3944,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 1.04,
|
| 97 |
+
"grad_norm": 0.0780622586607933,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 0.0122,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 1.12,
|
| 104 |
+
"grad_norm": 34.591758728027344,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 0.6219,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.2,
|
| 111 |
+
"grad_norm": 0.04694774001836777,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 0.0107,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 1.28,
|
| 118 |
+
"grad_norm": 0.03775114193558693,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.0645,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 1.36,
|
| 125 |
+
"grad_norm": 0.4576091468334198,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.0172,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 1.44,
|
| 132 |
+
"grad_norm": 3.4563567638397217,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.0723,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 1.52,
|
| 139 |
+
"grad_norm": 3.871311902999878,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 0.1694,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 1.6,
|
| 146 |
+
"grad_norm": 0.24964451789855957,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 0.1908,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 1.68,
|
| 153 |
+
"grad_norm": 0.06994156539440155,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.0383,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 1.76,
|
| 160 |
+
"grad_norm": 0.0806451290845871,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 0.0183,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 1.84,
|
| 167 |
+
"grad_norm": 4.901146411895752,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 0.1231,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 1.92,
|
| 174 |
+
"grad_norm": 0.1762755960226059,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 0.0135,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 2.0,
|
| 181 |
+
"grad_norm": 5.6551384925842285,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 0.0321,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 2.0,
|
| 188 |
+
"step": 50,
|
| 189 |
+
"total_flos": 2202126654636032.0,
|
| 190 |
+
"train_loss": 0.0887929368019104,
|
| 191 |
+
"train_runtime": 116.0987,
|
| 192 |
+
"train_samples_per_second": 1.723,
|
| 193 |
+
"train_steps_per_second": 0.431
|
| 194 |
+
}
|
| 195 |
+
],
|
| 196 |
+
"logging_steps": 2,
|
| 197 |
+
"max_steps": 50,
|
| 198 |
+
"num_input_tokens_seen": 0,
|
| 199 |
+
"num_train_epochs": 1,
|
| 200 |
+
"save_steps": 500,
|
| 201 |
+
"stateful_callbacks": {
|
| 202 |
+
"TrainerControl": {
|
| 203 |
+
"args": {
|
| 204 |
+
"should_epoch_stop": false,
|
| 205 |
+
"should_evaluate": false,
|
| 206 |
+
"should_log": false,
|
| 207 |
+
"should_save": false,
|
| 208 |
+
"should_training_stop": false
|
| 209 |
+
},
|
| 210 |
+
"attributes": {}
|
| 211 |
+
}
|
| 212 |
+
},
|
| 213 |
+
"total_flos": 2202126654636032.0,
|
| 214 |
+
"train_batch_size": 1,
|
| 215 |
+
"trial_name": null,
|
| 216 |
+
"trial_params": null
|
| 217 |
+
}
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:78ac4e1058836b6234379b6e570fa3e71002444f037f47c23c6074c38371cca6
|
| 3 |
+
size 778341886
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c927ff8e1b3b4faaa34bde0c2692889ce1c086425706d7280a79dc051533dee
|
| 3 |
+
size 778341886
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f95ac4a906febc91c5cdb3813cde6eb88cfe3bc22ba6fb8af96611171ca1a688
|
| 3 |
+
size 778341886
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7e262c5e68c4341c4604cab13775efef4ca2fa56dcb00f1b4eb61a6aac7db31c
|
| 3 |
+
size 778341886
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d86da56ef2fcb0770168449eb48fb8251868ad508baf093591f10e001ac71632
|
| 3 |
+
size 778341034
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:00580e00a33f9296ac0a8c5158f4c03322b348cde61b769eaa5cd7738ec10606
|
| 3 |
+
size 778341886
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:83d7537fc2c48cf1786a1ee004cc1881ef12160aaa0d2cff908db07aa3085b32
|
| 3 |
+
size 778341034
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0bbb7ca95f52ed9b394dbb71a9734c8caa2eb2cf282d47f60c1fa00311a262ed
|
| 3 |
+
size 778341034
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_trainer_state.json
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 2.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 50,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.08,
|
| 13 |
+
"grad_norm": 3.62923264503479,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.2634,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.16,
|
| 20 |
+
"grad_norm": 2.1570146083831787,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 0.2137,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.24,
|
| 27 |
+
"grad_norm": 4.34098482131958,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 0.5009,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.32,
|
| 34 |
+
"grad_norm": 9.01052474975586,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.2679,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.4,
|
| 41 |
+
"grad_norm": 4.558963298797607,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 0.6763,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.48,
|
| 48 |
+
"grad_norm": 5.2403740882873535,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.631,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.56,
|
| 55 |
+
"grad_norm": 0.49892091751098633,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.321,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.64,
|
| 62 |
+
"grad_norm": 4.3842668533325195,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.5071,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.72,
|
| 69 |
+
"grad_norm": 3.5485029220581055,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.2333,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.8,
|
| 76 |
+
"grad_norm": 8.232368469238281,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.4089,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.88,
|
| 83 |
+
"grad_norm": 1.4529104232788086,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.265,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.96,
|
| 90 |
+
"grad_norm": 3.8828446865081787,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 0.2181,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 1.04,
|
| 97 |
+
"grad_norm": 1.24514901638031,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 0.3733,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 1.12,
|
| 104 |
+
"grad_norm": 9.44221305847168,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 0.4971,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.2,
|
| 111 |
+
"grad_norm": 2.5544800758361816,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 0.3479,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 1.28,
|
| 118 |
+
"grad_norm": 7.6183319091796875,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.2933,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 1.36,
|
| 125 |
+
"grad_norm": 5.657953262329102,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.451,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 1.44,
|
| 132 |
+
"grad_norm": 1.417629361152649,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.1971,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 1.52,
|
| 139 |
+
"grad_norm": 2.1960906982421875,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 0.3496,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 1.6,
|
| 146 |
+
"grad_norm": 4.147959232330322,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 0.8169,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 1.68,
|
| 153 |
+
"grad_norm": 1.6373600959777832,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.2783,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 1.76,
|
| 160 |
+
"grad_norm": 1.473294973373413,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 0.2325,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 1.84,
|
| 167 |
+
"grad_norm": 4.739127159118652,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 0.3456,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 1.92,
|
| 174 |
+
"grad_norm": 1.90013587474823,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 0.1663,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 2.0,
|
| 181 |
+
"grad_norm": 5.234428405761719,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 0.3906,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 2.0,
|
| 188 |
+
"step": 50,
|
| 189 |
+
"total_flos": 5202848019120128.0,
|
| 190 |
+
"train_loss": 0.36984447479248045,
|
| 191 |
+
"train_runtime": 190.6891,
|
| 192 |
+
"train_samples_per_second": 1.049,
|
| 193 |
+
"train_steps_per_second": 0.262
|
| 194 |
+
}
|
| 195 |
+
],
|
| 196 |
+
"logging_steps": 2,
|
| 197 |
+
"max_steps": 50,
|
| 198 |
+
"num_input_tokens_seen": 0,
|
| 199 |
+
"num_train_epochs": 1,
|
| 200 |
+
"save_steps": 500,
|
| 201 |
+
"stateful_callbacks": {
|
| 202 |
+
"TrainerControl": {
|
| 203 |
+
"args": {
|
| 204 |
+
"should_epoch_stop": false,
|
| 205 |
+
"should_evaluate": false,
|
| 206 |
+
"should_log": false,
|
| 207 |
+
"should_save": false,
|
| 208 |
+
"should_training_stop": false
|
| 209 |
+
},
|
| 210 |
+
"attributes": {}
|
| 211 |
+
}
|
| 212 |
+
},
|
| 213 |
+
"total_flos": 5202848019120128.0,
|
| 214 |
+
"train_batch_size": 1,
|
| 215 |
+
"trial_name": null,
|
| 216 |
+
"trial_params": null
|
| 217 |
+
}
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d13906ad1f936437fd1ca3ff97f1ba801c71be2b6f272adbb73ae8ab17b6e26e
|
| 3 |
+
size 360880622
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:38308af0d31e928983ecb45207689c09907a4de47c8986628f7604409ef92085
|
| 3 |
+
size 360880622
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:71a124f96018f2b50d800a1d7250ca46a03878d242a4c911ada9f11011f69b97
|
| 3 |
+
size 360880622
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3fc1ef0d486b875990200b1dd094d84b2570ed11e53f6a9ee10f35d34f0f49ca
|
| 3 |
+
size 360880622
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb4e079328106d0413c0e0836461aaaa74ddb45de0b53648b630bdc6a6632418
|
| 3 |
+
size 360880106
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7865e64171462c37128ea3d65f6aa1f8d3f8ad1fae133ed26642947ec64cc251
|
| 3 |
+
size 360880622
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e4045eac0ab4825692e2c1f2596ed0574497bab56ebb98680bbfa7a3d0cc80c1
|
| 3 |
+
size 360880106
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1db6ddaba6447528d3500e93c52fc8187a5fd82d3824af797deebd9999a4d9ac
|
| 3 |
+
size 360880106
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_trainer_state.json
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 2.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 50,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.08,
|
| 13 |
+
"grad_norm": 7.067582607269287,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.3825,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.16,
|
| 20 |
+
"grad_norm": 4.283020973205566,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 0.2845,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.24,
|
| 27 |
+
"grad_norm": 6.822042465209961,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 0.7916,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.32,
|
| 34 |
+
"grad_norm": 9.666316986083984,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.3412,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.4,
|
| 41 |
+
"grad_norm": 16.465579986572266,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 0.8459,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.48,
|
| 48 |
+
"grad_norm": 6.560152053833008,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.6335,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.56,
|
| 55 |
+
"grad_norm": 4.862478733062744,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.6741,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.64,
|
| 62 |
+
"grad_norm": 7.447726249694824,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.5064,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.72,
|
| 69 |
+
"grad_norm": 7.4205098152160645,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.9029,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.8,
|
| 76 |
+
"grad_norm": 15.627077102661133,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.8091,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.88,
|
| 83 |
+
"grad_norm": 16.306581497192383,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.7351,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.96,
|
| 90 |
+
"grad_norm": 4.587651252746582,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 0.1841,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 1.04,
|
| 97 |
+
"grad_norm": 19.92629623413086,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 1.5481,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 1.12,
|
| 104 |
+
"grad_norm": 3.447114944458008,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 0.3892,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.2,
|
| 111 |
+
"grad_norm": 14.376614570617676,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 0.6816,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 1.28,
|
| 118 |
+
"grad_norm": 19.715835571289062,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.6575,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 1.36,
|
| 125 |
+
"grad_norm": 17.579479217529297,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.8478,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 1.44,
|
| 132 |
+
"grad_norm": 8.443402290344238,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.2789,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 1.52,
|
| 139 |
+
"grad_norm": 2.802513599395752,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 0.3138,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 1.6,
|
| 146 |
+
"grad_norm": 8.755653381347656,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 0.7962,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 1.68,
|
| 153 |
+
"grad_norm": 11.076048851013184,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.6601,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 1.76,
|
| 160 |
+
"grad_norm": 9.736947059631348,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 0.9945,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 1.84,
|
| 167 |
+
"grad_norm": 9.903741836547852,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 0.6868,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 1.92,
|
| 174 |
+
"grad_norm": 26.1480655670166,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 1.0493,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 2.0,
|
| 181 |
+
"grad_norm": 10.546011924743652,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 0.8659,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 2.0,
|
| 188 |
+
"step": 50,
|
| 189 |
+
"total_flos": 2231176135704576.0,
|
| 190 |
+
"train_loss": 0.6744278335571289,
|
| 191 |
+
"train_runtime": 114.3015,
|
| 192 |
+
"train_samples_per_second": 1.75,
|
| 193 |
+
"train_steps_per_second": 0.437
|
| 194 |
+
}
|
| 195 |
+
],
|
| 196 |
+
"logging_steps": 2,
|
| 197 |
+
"max_steps": 50,
|
| 198 |
+
"num_input_tokens_seen": 0,
|
| 199 |
+
"num_train_epochs": 1,
|
| 200 |
+
"save_steps": 500,
|
| 201 |
+
"stateful_callbacks": {
|
| 202 |
+
"TrainerControl": {
|
| 203 |
+
"args": {
|
| 204 |
+
"should_epoch_stop": false,
|
| 205 |
+
"should_evaluate": false,
|
| 206 |
+
"should_log": false,
|
| 207 |
+
"should_save": false,
|
| 208 |
+
"should_training_stop": false
|
| 209 |
+
},
|
| 210 |
+
"attributes": {}
|
| 211 |
+
}
|
| 212 |
+
},
|
| 213 |
+
"total_flos": 2231176135704576.0,
|
| 214 |
+
"train_batch_size": 1,
|
| 215 |
+
"trial_name": null,
|
| 216 |
+
"trial_params": null
|
| 217 |
+
}
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:169b161921bb01bf190d8f33849b33f9c52e7d430415f5e5a702607ff51d21b0
|
| 3 |
+
size 778341886
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:31f3654c2de0c19271419c744626b570b8ee010b77749be74aa4c396bf6a43bc
|
| 3 |
+
size 778341886
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91f9067a5bd4fbe9beae979d71170b2c1e372c67acd5023c3e986430948e2719
|
| 3 |
+
size 778341886
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b889c9885f7c81ae50f155aebf41c60c283c525132b24699311c6c6f7f12500a
|
| 3 |
+
size 778341886
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb7771d615a3c16f065855527c013f6e19250c242746206ff6b6a409e79a45f3
|
| 3 |
+
size 778341034
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c87725e139a42e875b6819300a7f140a858cd2d2db35b382faba6ce53d8e3d7
|
| 3 |
+
size 778341886
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a336edf31a143a41bf64944c4e3b5afa7f26ca65509fd134b08f280f6dfc3e35
|
| 3 |
+
size 778341034
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ab51c825d382c8d805b7a1989808541ebe02284dfc053fd0859047835f96957
|
| 3 |
+
size 778341034
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_trainer_state.json
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 2.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 50,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.08,
|
| 13 |
+
"grad_norm": 2.316213369369507,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.4539,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.16,
|
| 20 |
+
"grad_norm": 3.5787057876586914,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 0.5031,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.24,
|
| 27 |
+
"grad_norm": 3.0300209522247314,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 0.514,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.32,
|
| 34 |
+
"grad_norm": 3.986267328262329,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.4123,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.4,
|
| 41 |
+
"grad_norm": 5.813876152038574,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 0.5409,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.48,
|
| 48 |
+
"grad_norm": 1.9155079126358032,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.3129,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.56,
|
| 55 |
+
"grad_norm": 1.6175196170806885,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.2733,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.64,
|
| 62 |
+
"grad_norm": 3.6581971645355225,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.3098,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.72,
|
| 69 |
+
"grad_norm": 3.621964931488037,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.4081,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.8,
|
| 76 |
+
"grad_norm": 2.364119052886963,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.1815,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.88,
|
| 83 |
+
"grad_norm": 4.366921424865723,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.7291,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.96,
|
| 90 |
+
"grad_norm": 6.259946823120117,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 0.5821,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 1.04,
|
| 97 |
+
"grad_norm": 3.531437635421753,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 0.5318,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 1.12,
|
| 104 |
+
"grad_norm": 9.066308975219727,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 0.9163,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.2,
|
| 111 |
+
"grad_norm": 3.3480992317199707,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 0.4263,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 1.28,
|
| 118 |
+
"grad_norm": 2.767130136489868,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.2811,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 1.36,
|
| 125 |
+
"grad_norm": 5.080353736877441,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.3363,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 1.44,
|
| 132 |
+
"grad_norm": 1.7336921691894531,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.5843,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 1.52,
|
| 139 |
+
"grad_norm": 2.0092973709106445,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 0.4642,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 1.6,
|
| 146 |
+
"grad_norm": 3.8756134510040283,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 0.4807,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 1.68,
|
| 153 |
+
"grad_norm": 2.4722533226013184,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.3807,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 1.76,
|
| 160 |
+
"grad_norm": 1.6810188293457031,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 0.3137,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 1.84,
|
| 167 |
+
"grad_norm": 3.1720468997955322,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 0.4738,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 1.92,
|
| 174 |
+
"grad_norm": 1.5610110759735107,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 0.2826,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 2.0,
|
| 181 |
+
"grad_norm": 2.7629005908966064,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 0.554,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 2.0,
|
| 188 |
+
"step": 50,
|
| 189 |
+
"total_flos": 6020445073571840.0,
|
| 190 |
+
"train_loss": 0.4498666000366211,
|
| 191 |
+
"train_runtime": 191.3091,
|
| 192 |
+
"train_samples_per_second": 1.045,
|
| 193 |
+
"train_steps_per_second": 0.261
|
| 194 |
+
}
|
| 195 |
+
],
|
| 196 |
+
"logging_steps": 2,
|
| 197 |
+
"max_steps": 50,
|
| 198 |
+
"num_input_tokens_seen": 0,
|
| 199 |
+
"num_train_epochs": 1,
|
| 200 |
+
"save_steps": 500,
|
| 201 |
+
"stateful_callbacks": {
|
| 202 |
+
"TrainerControl": {
|
| 203 |
+
"args": {
|
| 204 |
+
"should_epoch_stop": false,
|
| 205 |
+
"should_evaluate": false,
|
| 206 |
+
"should_log": false,
|
| 207 |
+
"should_save": false,
|
| 208 |
+
"should_training_stop": false
|
| 209 |
+
},
|
| 210 |
+
"attributes": {}
|
| 211 |
+
}
|
| 212 |
+
},
|
| 213 |
+
"total_flos": 6020445073571840.0,
|
| 214 |
+
"train_batch_size": 1,
|
| 215 |
+
"trial_name": null,
|
| 216 |
+
"trial_params": null
|
| 217 |
+
}
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04ff9214659409561dfbaa016ea376607a875853b6d1a4c0b5696d32eaa6435b
|
| 3 |
+
size 778341886
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5d40603d57ef7a099d0a49f40d6c03d0849f0e70990ada3d6564746d8a047708
|
| 3 |
+
size 778341886
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f0d185e922421d0f321483a04cf9ab247f33a88bae7fe6478e14d587ec62b615
|
| 3 |
+
size 778341886
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c7676ca765d23fc6a1f0a8ab3064c4017ae1ec8dccfe4446023c38f944052a7f
|
| 3 |
+
size 778341886
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:35e47f376b87f03095f830e87fa9eb4f84f5a0695a2984230753c25d7ad5a96b
|
| 3 |
+
size 778341034
|