Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round10.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round12.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round15.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round17.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round2.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round20.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round5.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round7.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_trainer_state.json +189 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round10.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round12.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round15.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round17.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round2.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round20.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round5.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round7.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_trainer_state.json +189 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round10.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round12.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round15.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round17.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round2.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round20.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round5.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round7.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_trainer_state.json +189 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round10.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round12.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round15.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round17.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round2.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round20.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round5.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round7.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_trainer_state.json +189 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round10.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round12.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round15.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round17.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round2.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round20.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round5.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round7.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_trainer_state.json +189 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/5_client_model_round10.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/5_client_model_round12.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/5_client_model_round15.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/5_client_model_round17.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/5_client_model_round2.pth +3 -0
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f06a89dfcac5961ac5bdb293a2a3f574fcb9216e4593500ab6c2155ff3df0313
|
| 3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5fe982dd9d5a3ae03280e3447d8063dcc43629d7dca6b8e90b03907c76b90a58
|
| 3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aedba7096d5b5406fd46033b0b93b844abbe3d7454189a2035107d0dbb00caaa
|
| 3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0526023fc6ccb898eb90c77e6bdc99b569624d6470ad5b920a20e689c289ad2b
|
| 3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a263e7e7ee03bfcf87feac35953e8aa2fa2ce0c0e3c56c5dffbcf6c8a7d52721
|
| 3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a8e10816fae9b78236f55f93f29814d10a19539fd654a0c3e9737cbd47cd8d9d
|
| 3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f4335cc459cd1552cda05f02b928ed4417cd0d799ff11e7ef4d08a16e0b8484
|
| 3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c206007eb99dbc9905215c8f5443524bbcf2827728a8be2f97d0edee647ca575
|
| 3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_trainer_state.json
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 2.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 43,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.09302325581395349,
|
| 13 |
+
"grad_norm": 2.9576117992401123,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.2411,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.18604651162790697,
|
| 20 |
+
"grad_norm": 1.2031710147857666,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 0.0578,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.27906976744186046,
|
| 27 |
+
"grad_norm": 2.9653682708740234,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 0.3511,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.37209302325581395,
|
| 34 |
+
"grad_norm": 4.665686130523682,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.1305,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.46511627906976744,
|
| 41 |
+
"grad_norm": 2.068437337875366,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 0.1576,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.5581395348837209,
|
| 48 |
+
"grad_norm": 0.7609916925430298,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.1547,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.6511627906976745,
|
| 55 |
+
"grad_norm": 0.8752633333206177,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.2439,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.7441860465116279,
|
| 62 |
+
"grad_norm": 0.7228463292121887,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.0869,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.8372093023255814,
|
| 69 |
+
"grad_norm": 0.5220616459846497,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.1898,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.9302325581395349,
|
| 76 |
+
"grad_norm": 0.5391299724578857,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.0884,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 1.0232558139534884,
|
| 83 |
+
"grad_norm": 0.4390765428543091,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.038,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 1.1162790697674418,
|
| 90 |
+
"grad_norm": 4.568640232086182,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 0.2474,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 1.2093023255813953,
|
| 97 |
+
"grad_norm": 4.5196356773376465,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 0.3904,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 1.302325581395349,
|
| 104 |
+
"grad_norm": 0.23186014592647552,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 0.101,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.3953488372093024,
|
| 111 |
+
"grad_norm": 2.5624356269836426,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 0.1768,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 1.4883720930232558,
|
| 118 |
+
"grad_norm": 0.4649677872657776,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.0719,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 1.5813953488372092,
|
| 125 |
+
"grad_norm": 0.7176182270050049,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.1396,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 1.6744186046511629,
|
| 132 |
+
"grad_norm": 2.2132463455200195,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.087,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 1.7674418604651163,
|
| 139 |
+
"grad_norm": 1.706496238708496,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 0.2997,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 1.8604651162790697,
|
| 146 |
+
"grad_norm": 0.48359882831573486,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 0.0237,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 1.9534883720930232,
|
| 153 |
+
"grad_norm": 0.40984606742858887,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.0393,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 2.0,
|
| 160 |
+
"step": 43,
|
| 161 |
+
"total_flos": 4777205812953088.0,
|
| 162 |
+
"train_loss": 0.15616962521575217,
|
| 163 |
+
"train_runtime": 217.1153,
|
| 164 |
+
"train_samples_per_second": 0.792,
|
| 165 |
+
"train_steps_per_second": 0.198
|
| 166 |
+
}
|
| 167 |
+
],
|
| 168 |
+
"logging_steps": 2,
|
| 169 |
+
"max_steps": 43,
|
| 170 |
+
"num_input_tokens_seen": 0,
|
| 171 |
+
"num_train_epochs": 1,
|
| 172 |
+
"save_steps": 500,
|
| 173 |
+
"stateful_callbacks": {
|
| 174 |
+
"TrainerControl": {
|
| 175 |
+
"args": {
|
| 176 |
+
"should_epoch_stop": false,
|
| 177 |
+
"should_evaluate": false,
|
| 178 |
+
"should_log": false,
|
| 179 |
+
"should_save": false,
|
| 180 |
+
"should_training_stop": false
|
| 181 |
+
},
|
| 182 |
+
"attributes": {}
|
| 183 |
+
}
|
| 184 |
+
},
|
| 185 |
+
"total_flos": 4777205812953088.0,
|
| 186 |
+
"train_batch_size": 1,
|
| 187 |
+
"trial_name": null,
|
| 188 |
+
"trial_params": null
|
| 189 |
+
}
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:71c6fce6889d0763e53ec11b925ecf780872259115e0a4de278fecb1730f1c44
|
| 3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:818822b11989459a5fc595b2555903333d4fbd06ec209d9f8fd739991ecd393f
|
| 3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c441dfd77124cafa5afe9a4f3e30bd524ebfeb667ce4cb78133a3d18595567b1
|
| 3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:76093b70c93eb8976c70ba3b52470568b0f3d0896ec2dec92615014a4a25ae3e
|
| 3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:27e267c53893530bd8784e742b230c103f0af6e3be5e71601f6620f4c0100934
|
| 3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:225727d72f520f2af545e163e03222702bf08e8084ca44d12dd0337c3ad2899d
|
| 3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:51d18abf0982eaad9817b60cffb3c40f99da4761429ac0c24f6149861969da4c
|
| 3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6e2e06f880660e7b5ffca36b45892d7f2ed89797c6ff687b9dc8da9431886cfa
|
| 3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_trainer_state.json
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 2.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 43,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.09302325581395349,
|
| 13 |
+
"grad_norm": 0.3085244596004486,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.1226,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.18604651162790697,
|
| 20 |
+
"grad_norm": 3.019911289215088,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 0.2883,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.27906976744186046,
|
| 27 |
+
"grad_norm": 1.109711766242981,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 0.1198,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.37209302325581395,
|
| 34 |
+
"grad_norm": 0.4532826840877533,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.0175,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.46511627906976744,
|
| 41 |
+
"grad_norm": 0.8678146600723267,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 0.0323,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.5581395348837209,
|
| 48 |
+
"grad_norm": 2.618272066116333,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.1189,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.6511627906976745,
|
| 55 |
+
"grad_norm": 0.20907337963581085,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.0141,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.7441860465116279,
|
| 62 |
+
"grad_norm": 1.467644214630127,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.1211,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.8372093023255814,
|
| 69 |
+
"grad_norm": 1.8505585193634033,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.0966,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.9302325581395349,
|
| 76 |
+
"grad_norm": 0.5534336566925049,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.0249,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 1.0232558139534884,
|
| 83 |
+
"grad_norm": 0.42154374718666077,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.0169,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 1.1162790697674418,
|
| 90 |
+
"grad_norm": 0.37012800574302673,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 0.0338,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 1.2093023255813953,
|
| 97 |
+
"grad_norm": 0.9166113138198853,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 0.0618,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 1.302325581395349,
|
| 104 |
+
"grad_norm": 0.27272695302963257,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 0.0284,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.3953488372093024,
|
| 111 |
+
"grad_norm": 0.2518395781517029,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 0.0126,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 1.4883720930232558,
|
| 118 |
+
"grad_norm": 0.7915253043174744,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.0801,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 1.5813953488372092,
|
| 125 |
+
"grad_norm": 0.902931809425354,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.0334,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 1.6744186046511629,
|
| 132 |
+
"grad_norm": 0.3951447308063507,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.0307,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 1.7674418604651163,
|
| 139 |
+
"grad_norm": 2.1907150745391846,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 0.0596,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 1.8604651162790697,
|
| 146 |
+
"grad_norm": 1.2620713710784912,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 0.0869,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 1.9534883720930232,
|
| 153 |
+
"grad_norm": 0.3642316162586212,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.0304,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 2.0,
|
| 160 |
+
"step": 43,
|
| 161 |
+
"total_flos": 4812138065428480.0,
|
| 162 |
+
"train_loss": 0.06693689213242641,
|
| 163 |
+
"train_runtime": 216.4073,
|
| 164 |
+
"train_samples_per_second": 0.795,
|
| 165 |
+
"train_steps_per_second": 0.199
|
| 166 |
+
}
|
| 167 |
+
],
|
| 168 |
+
"logging_steps": 2,
|
| 169 |
+
"max_steps": 43,
|
| 170 |
+
"num_input_tokens_seen": 0,
|
| 171 |
+
"num_train_epochs": 1,
|
| 172 |
+
"save_steps": 500,
|
| 173 |
+
"stateful_callbacks": {
|
| 174 |
+
"TrainerControl": {
|
| 175 |
+
"args": {
|
| 176 |
+
"should_epoch_stop": false,
|
| 177 |
+
"should_evaluate": false,
|
| 178 |
+
"should_log": false,
|
| 179 |
+
"should_save": false,
|
| 180 |
+
"should_training_stop": false
|
| 181 |
+
},
|
| 182 |
+
"attributes": {}
|
| 183 |
+
}
|
| 184 |
+
},
|
| 185 |
+
"total_flos": 4812138065428480.0,
|
| 186 |
+
"train_batch_size": 1,
|
| 187 |
+
"trial_name": null,
|
| 188 |
+
"trial_params": null
|
| 189 |
+
}
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:69816f63dda6857999b2188db1a0aaaa9147aab897dcee7573e9d55dbdf1b2cc
|
| 3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c914d80153a80299ed8993a6ceb64539f8af74f9b6e40638c543863a69ad54f
|
| 3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ce361c6c22db8c7c4ebfeaae20c30cb50de1598a3657cb2b4d2a0239088930b
|
| 3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b726647932691999b54f2ab9ec9197c040c249348e7cb4864fddba8cca057773
|
| 3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:63647af6cd32eb3f3541fd71185343a2b826fe5badc33e786d96673a7068f25e
|
| 3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bbae85d3b932e88792d442e91b69a8dd7af46ddbc2f0e332cb4c695c7a9f454b
|
| 3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc53b17de3dc585196fdcf469a3d94372c45f6eff9d977f4523242386413acb0
|
| 3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:638b6048a232e5fd62e1b30f85d1598fe8d2c095bbb76d1ca75681d6b97b62b3
|
| 3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_trainer_state.json
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 2.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 43,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.09302325581395349,
|
| 13 |
+
"grad_norm": 3.3456127643585205,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.2545,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.18604651162790697,
|
| 20 |
+
"grad_norm": 0.6647293567657471,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 0.143,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.27906976744186046,
|
| 27 |
+
"grad_norm": 2.341078519821167,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 0.1575,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.37209302325581395,
|
| 34 |
+
"grad_norm": 1.038511037826538,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.1649,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.46511627906976744,
|
| 41 |
+
"grad_norm": 1.1091891527175903,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 0.1359,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.5581395348837209,
|
| 48 |
+
"grad_norm": 0.9437009692192078,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.11,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.6511627906976745,
|
| 55 |
+
"grad_norm": 2.193312168121338,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.2129,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.7441860465116279,
|
| 62 |
+
"grad_norm": 1.4751211404800415,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.0952,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.8372093023255814,
|
| 69 |
+
"grad_norm": 0.38253253698349,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.0647,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.9302325581395349,
|
| 76 |
+
"grad_norm": 0.6969690322875977,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.1394,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 1.0232558139534884,
|
| 83 |
+
"grad_norm": 0.7709684371948242,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.1008,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 1.1162790697674418,
|
| 90 |
+
"grad_norm": 2.740997552871704,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 0.1238,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 1.2093023255813953,
|
| 97 |
+
"grad_norm": 1.5686851739883423,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 0.1016,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 1.302325581395349,
|
| 104 |
+
"grad_norm": 0.3873344659805298,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 0.0211,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.3953488372093024,
|
| 111 |
+
"grad_norm": 0.8399427533149719,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 0.1423,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 1.4883720930232558,
|
| 118 |
+
"grad_norm": 2.8620800971984863,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.2682,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 1.5813953488372092,
|
| 125 |
+
"grad_norm": 1.715096116065979,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.1543,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 1.6744186046511629,
|
| 132 |
+
"grad_norm": 0.39780232310295105,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.0503,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 1.7674418604651163,
|
| 139 |
+
"grad_norm": 1.9032905101776123,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 0.1529,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 1.8604651162790697,
|
| 146 |
+
"grad_norm": 1.9974933862686157,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 0.1527,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 1.9534883720930232,
|
| 153 |
+
"grad_norm": 0.41782402992248535,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.1376,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 2.0,
|
| 160 |
+
"step": 43,
|
| 161 |
+
"total_flos": 4715460834623488.0,
|
| 162 |
+
"train_loss": 0.13745101662569267,
|
| 163 |
+
"train_runtime": 217.324,
|
| 164 |
+
"train_samples_per_second": 0.791,
|
| 165 |
+
"train_steps_per_second": 0.198
|
| 166 |
+
}
|
| 167 |
+
],
|
| 168 |
+
"logging_steps": 2,
|
| 169 |
+
"max_steps": 43,
|
| 170 |
+
"num_input_tokens_seen": 0,
|
| 171 |
+
"num_train_epochs": 1,
|
| 172 |
+
"save_steps": 500,
|
| 173 |
+
"stateful_callbacks": {
|
| 174 |
+
"TrainerControl": {
|
| 175 |
+
"args": {
|
| 176 |
+
"should_epoch_stop": false,
|
| 177 |
+
"should_evaluate": false,
|
| 178 |
+
"should_log": false,
|
| 179 |
+
"should_save": false,
|
| 180 |
+
"should_training_stop": false
|
| 181 |
+
},
|
| 182 |
+
"attributes": {}
|
| 183 |
+
}
|
| 184 |
+
},
|
| 185 |
+
"total_flos": 4715460834623488.0,
|
| 186 |
+
"train_batch_size": 1,
|
| 187 |
+
"trial_name": null,
|
| 188 |
+
"trial_params": null
|
| 189 |
+
}
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a9f0a3b90e19eabbfc0e5635fe7059549c618b6e036f9f45a4372451a8a36e5e
|
| 3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4567d214400f00e8531f2289e87b461d7e2ff36d3599953485e74c95a50ae33
|
| 3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:36f52ffe528820e313e8399cc62c04cbc2bf7042f11dc9809454f8d9ac8e6419
|
| 3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:95607ac19caf68871278ba85509065e63ada58d495744bd25ca2ae6c2627a16d
|
| 3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5921fb1eef7a3d375c934d78dd90029f1c52ed7f25c757f29f83bb17cced3e9d
|
| 3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6b1eefd262a4d3ee7c54828027c97f4b11f4186d76647bcae6b7ff7378e1a742
|
| 3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6fc91a80a0564d2d9cc2000161fd3302ed3cd3d82edead6b1e60e8270032e1a3
|
| 3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ccdf785607bf48d74b05613167b1393b6d33de3717fd8db901693e4439448fa
|
| 3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_trainer_state.json
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 2.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 43,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.09302325581395349,
|
| 13 |
+
"grad_norm": 1.1141589879989624,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.1236,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.18604651162790697,
|
| 20 |
+
"grad_norm": 1.2319393157958984,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 0.3126,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.27906976744186046,
|
| 27 |
+
"grad_norm": 2.423560857772827,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 0.4022,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.37209302325581395,
|
| 34 |
+
"grad_norm": 2.308655023574829,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.2367,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.46511627906976744,
|
| 41 |
+
"grad_norm": 1.6019493341445923,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 0.1691,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.5581395348837209,
|
| 48 |
+
"grad_norm": 2.4790854454040527,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.2446,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.6511627906976745,
|
| 55 |
+
"grad_norm": 1.2402669191360474,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.1289,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.7441860465116279,
|
| 62 |
+
"grad_norm": 4.58521842956543,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.1723,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.8372093023255814,
|
| 69 |
+
"grad_norm": 1.7242616415023804,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.1184,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.9302325581395349,
|
| 76 |
+
"grad_norm": 2.655269145965576,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.244,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 1.0232558139534884,
|
| 83 |
+
"grad_norm": 2.253636121749878,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.2518,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 1.1162790697674418,
|
| 90 |
+
"grad_norm": 3.0500082969665527,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 0.2864,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 1.2093023255813953,
|
| 97 |
+
"grad_norm": 2.2204771041870117,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 0.31,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 1.302325581395349,
|
| 104 |
+
"grad_norm": 2.925663948059082,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 0.5858,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.3953488372093024,
|
| 111 |
+
"grad_norm": 1.716472864151001,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 0.2467,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 1.4883720930232558,
|
| 118 |
+
"grad_norm": 2.464635133743286,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.2649,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 1.5813953488372092,
|
| 125 |
+
"grad_norm": 0.828945517539978,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.0731,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 1.6744186046511629,
|
| 132 |
+
"grad_norm": 1.1858415603637695,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.111,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 1.7674418604651163,
|
| 139 |
+
"grad_norm": 3.732938766479492,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 0.2349,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 1.8604651162790697,
|
| 146 |
+
"grad_norm": 2.46227765083313,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 0.3129,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 1.9534883720930232,
|
| 153 |
+
"grad_norm": 1.6540902853012085,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.2932,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 2.0,
|
| 160 |
+
"step": 43,
|
| 161 |
+
"total_flos": 4880262747914240.0,
|
| 162 |
+
"train_loss": 0.25222200571104536,
|
| 163 |
+
"train_runtime": 215.4858,
|
| 164 |
+
"train_samples_per_second": 0.798,
|
| 165 |
+
"train_steps_per_second": 0.2
|
| 166 |
+
}
|
| 167 |
+
],
|
| 168 |
+
"logging_steps": 2,
|
| 169 |
+
"max_steps": 43,
|
| 170 |
+
"num_input_tokens_seen": 0,
|
| 171 |
+
"num_train_epochs": 1,
|
| 172 |
+
"save_steps": 500,
|
| 173 |
+
"stateful_callbacks": {
|
| 174 |
+
"TrainerControl": {
|
| 175 |
+
"args": {
|
| 176 |
+
"should_epoch_stop": false,
|
| 177 |
+
"should_evaluate": false,
|
| 178 |
+
"should_log": false,
|
| 179 |
+
"should_save": false,
|
| 180 |
+
"should_training_stop": false
|
| 181 |
+
},
|
| 182 |
+
"attributes": {}
|
| 183 |
+
}
|
| 184 |
+
},
|
| 185 |
+
"total_flos": 4880262747914240.0,
|
| 186 |
+
"train_batch_size": 1,
|
| 187 |
+
"trial_name": null,
|
| 188 |
+
"trial_params": null
|
| 189 |
+
}
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:676d1a1cc1b175d0235d62fc017e315c318119b6f9d2b694a759bf9012621fa2
|
| 3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4244a475c759d82fa5ffbcbfc2deb96f9ee806361d03aa645aebf32bc2d0d139
|
| 3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae5a9f63d9f5f4fd3301f8e4874b04b5995dadb0557e81e711abcd08c7b252f6
|
| 3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fe972cdc2d4d4445927a1f957a4a63e44b72e60301b94c3115794d59381c74a8
|
| 3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3239853222047f1365deb3434639266349a53ca0b9050cb2fde4032b546dc38e
|
| 3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2fe8e87567dbde70e61d4b3893fe944c4cfa319c4867e6672286ef519491e3ee
|
| 3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:64cf784b6a5fa4dcec46cd536c3ea92812bcec7bee6eeb3a6b752d0d9a8adf67
|
| 3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04dc540d0b92cd9e8c8b92930521667e6c090ca32a551758b702ae8e34e21395
|
| 3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_trainer_state.json
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 2.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 43,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.09302325581395349,
|
| 13 |
+
"grad_norm": 2.0882959365844727,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.3258,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.18604651162790697,
|
| 20 |
+
"grad_norm": 1.28811776638031,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 0.1932,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.27906976744186046,
|
| 27 |
+
"grad_norm": 0.822692334651947,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 0.0951,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.37209302325581395,
|
| 34 |
+
"grad_norm": 1.5997800827026367,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.2066,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.46511627906976744,
|
| 41 |
+
"grad_norm": 2.9270811080932617,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 0.2142,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.5581395348837209,
|
| 48 |
+
"grad_norm": 0.9111597537994385,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.0939,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.6511627906976745,
|
| 55 |
+
"grad_norm": 1.769669771194458,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.1228,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.7441860465116279,
|
| 62 |
+
"grad_norm": 1.8244539499282837,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.1983,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.8372093023255814,
|
| 69 |
+
"grad_norm": 1.7385451793670654,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.2394,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.9302325581395349,
|
| 76 |
+
"grad_norm": 2.4531519412994385,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.1592,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 1.0232558139534884,
|
| 83 |
+
"grad_norm": 0.8544302582740784,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.1533,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 1.1162790697674418,
|
| 90 |
+
"grad_norm": 2.170783519744873,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 0.3049,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 1.2093023255813953,
|
| 97 |
+
"grad_norm": 1.9732646942138672,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 0.3371,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 1.302325581395349,
|
| 104 |
+
"grad_norm": 3.642188549041748,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 0.3442,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.3953488372093024,
|
| 111 |
+
"grad_norm": 1.494310975074768,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 0.13,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 1.4883720930232558,
|
| 118 |
+
"grad_norm": 0.3682941496372223,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.0284,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 1.5813953488372092,
|
| 125 |
+
"grad_norm": 1.2264622449874878,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.1392,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 1.6744186046511629,
|
| 132 |
+
"grad_norm": 0.5091031193733215,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.1509,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 1.7674418604651163,
|
| 139 |
+
"grad_norm": 1.4979541301727295,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 0.1525,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 1.8604651162790697,
|
| 146 |
+
"grad_norm": 1.4245574474334717,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 0.2237,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 1.9534883720930232,
|
| 153 |
+
"grad_norm": 1.7625281810760498,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.2382,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 2.0,
|
| 160 |
+
"step": 43,
|
| 161 |
+
"total_flos": 5466572382535680.0,
|
| 162 |
+
"train_loss": 0.19268665757290152,
|
| 163 |
+
"train_runtime": 218.8116,
|
| 164 |
+
"train_samples_per_second": 0.786,
|
| 165 |
+
"train_steps_per_second": 0.197
|
| 166 |
+
}
|
| 167 |
+
],
|
| 168 |
+
"logging_steps": 2,
|
| 169 |
+
"max_steps": 43,
|
| 170 |
+
"num_input_tokens_seen": 0,
|
| 171 |
+
"num_train_epochs": 1,
|
| 172 |
+
"save_steps": 500,
|
| 173 |
+
"stateful_callbacks": {
|
| 174 |
+
"TrainerControl": {
|
| 175 |
+
"args": {
|
| 176 |
+
"should_epoch_stop": false,
|
| 177 |
+
"should_evaluate": false,
|
| 178 |
+
"should_log": false,
|
| 179 |
+
"should_save": false,
|
| 180 |
+
"should_training_stop": false
|
| 181 |
+
},
|
| 182 |
+
"attributes": {}
|
| 183 |
+
}
|
| 184 |
+
},
|
| 185 |
+
"total_flos": 5466572382535680.0,
|
| 186 |
+
"train_batch_size": 1,
|
| 187 |
+
"trial_name": null,
|
| 188 |
+
"trial_params": null
|
| 189 |
+
}
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/5_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:88808d32046e53379114e941c371bd75493bde5bb1542d9334164b318c408b0a
|
| 3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/5_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:50aa5dce46ef29bd53549ca4bb111aa001b4b81fcbdcd42593a2e89c24d1e609
|
| 3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/5_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1d29c2144b0318477b6e17f2c8a95e0b1b6823db4c651660c3937d120884472
|
| 3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/5_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3b310990f72f64fe576d22dcaac6e3f75e347ddddd2ac489381cbd61f1b47a75
|
| 3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/5_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a072c7cecc25d135822bb331490b5401228a3292ef9795ef78ae35d285a7e882
|
| 3 |
+
size 1167511866
|