Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round10.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round12.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round15.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round17.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round2.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round20.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round5.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round7.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_trainer_state.json +140 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round10.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round12.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round15.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round17.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round2.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round20.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round5.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round7.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_trainer_state.json +140 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round10.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round12.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round15.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round17.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round2.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round20.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round5.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round7.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_trainer_state.json +140 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round10.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round12.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round15.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round17.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round2.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round20.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round5.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round7.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_trainer_state.json +140 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round10.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round12.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round15.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round17.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round2.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round20.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round5.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round7.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_trainer_state.json +140 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/round10_task_vector_local_weights.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/round11_task_vector_local_weights.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/round12_task_vector_local_weights.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/round13_task_vector_local_weights.pth +3 -0
- client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/round14_task_vector_local_weights.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:001239b9907fc929a84da95552b79946dfea7fc9afe27b564c89d394a483a6e9
|
| 3 |
+
size 173791494
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2881397815bee454f69c451e489341333626a12e84fa266ac20e3f94a7542a88
|
| 3 |
+
size 173791494
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:191f9c953727568a31bf08530d840977ef62df67bb8b347c6dba9fefdf612468
|
| 3 |
+
size 173791494
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f99c2ca4cf9e865585d42e4fd50f2085bbeeaaad2614d18eecd34a791cf76e8f
|
| 3 |
+
size 173791494
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79a85cba6ec48ca493cae08766a0e3e950fc3d2b5048f4310163dfc1a61652ec
|
| 3 |
+
size 173789186
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b466544c4c7434c0f93ee1cd470194f297a453de9a47d43581d66969685cbd9e
|
| 3 |
+
size 173791494
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a18c6a0104cb7098aa5fc6bc18debd8f4c5ca43a11f429e2e34927339d33bced
|
| 3 |
+
size 173789186
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cbb49da2b195e0d7d665a196f33c7a88bbca800c6a935d3a0bdb651253adff4f
|
| 3 |
+
size 173789186
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_trainer_state.json
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 28,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.07142857142857142,
|
| 13 |
+
"grad_norm": 0.13116712868213654,
|
| 14 |
+
"learning_rate": 0.0003,
|
| 15 |
+
"loss": 0.6245,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.14285714285714285,
|
| 20 |
+
"grad_norm": 0.1398446410894394,
|
| 21 |
+
"learning_rate": 0.0003,
|
| 22 |
+
"loss": 0.4574,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.21428571428571427,
|
| 27 |
+
"grad_norm": 0.09051910042762756,
|
| 28 |
+
"learning_rate": 0.0003,
|
| 29 |
+
"loss": 0.5595,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.2857142857142857,
|
| 34 |
+
"grad_norm": 0.10584211349487305,
|
| 35 |
+
"learning_rate": 0.0003,
|
| 36 |
+
"loss": 0.4532,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.35714285714285715,
|
| 41 |
+
"grad_norm": 0.11173545569181442,
|
| 42 |
+
"learning_rate": 0.0003,
|
| 43 |
+
"loss": 0.5548,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.42857142857142855,
|
| 48 |
+
"grad_norm": 0.134027361869812,
|
| 49 |
+
"learning_rate": 0.0003,
|
| 50 |
+
"loss": 0.6055,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.5,
|
| 55 |
+
"grad_norm": 0.13683006167411804,
|
| 56 |
+
"learning_rate": 0.0003,
|
| 57 |
+
"loss": 0.5267,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.5714285714285714,
|
| 62 |
+
"grad_norm": 0.1731289029121399,
|
| 63 |
+
"learning_rate": 0.0003,
|
| 64 |
+
"loss": 0.5878,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.6428571428571429,
|
| 69 |
+
"grad_norm": 0.18668872117996216,
|
| 70 |
+
"learning_rate": 0.0003,
|
| 71 |
+
"loss": 0.4697,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.7142857142857143,
|
| 76 |
+
"grad_norm": 0.11167559772729874,
|
| 77 |
+
"learning_rate": 0.0003,
|
| 78 |
+
"loss": 0.5806,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.7857142857142857,
|
| 83 |
+
"grad_norm": 0.11734828352928162,
|
| 84 |
+
"learning_rate": 0.0003,
|
| 85 |
+
"loss": 0.3757,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.8571428571428571,
|
| 90 |
+
"grad_norm": 0.1058587059378624,
|
| 91 |
+
"learning_rate": 0.0003,
|
| 92 |
+
"loss": 0.4192,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.9285714285714286,
|
| 97 |
+
"grad_norm": 0.17954958975315094,
|
| 98 |
+
"learning_rate": 0.0003,
|
| 99 |
+
"loss": 0.6238,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 1.0,
|
| 104 |
+
"grad_norm": 0.16434884071350098,
|
| 105 |
+
"learning_rate": 0.0003,
|
| 106 |
+
"loss": 0.4551,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.0,
|
| 111 |
+
"step": 28,
|
| 112 |
+
"total_flos": 1075811478142976.0,
|
| 113 |
+
"train_loss": 0.5209636815956661,
|
| 114 |
+
"train_runtime": 99.0551,
|
| 115 |
+
"train_samples_per_second": 1.131,
|
| 116 |
+
"train_steps_per_second": 0.283
|
| 117 |
+
}
|
| 118 |
+
],
|
| 119 |
+
"logging_steps": 2,
|
| 120 |
+
"max_steps": 28,
|
| 121 |
+
"num_input_tokens_seen": 0,
|
| 122 |
+
"num_train_epochs": 1,
|
| 123 |
+
"save_steps": 500,
|
| 124 |
+
"stateful_callbacks": {
|
| 125 |
+
"TrainerControl": {
|
| 126 |
+
"args": {
|
| 127 |
+
"should_epoch_stop": false,
|
| 128 |
+
"should_evaluate": false,
|
| 129 |
+
"should_log": false,
|
| 130 |
+
"should_save": false,
|
| 131 |
+
"should_training_stop": false
|
| 132 |
+
},
|
| 133 |
+
"attributes": {}
|
| 134 |
+
}
|
| 135 |
+
},
|
| 136 |
+
"total_flos": 1075811478142976.0,
|
| 137 |
+
"train_batch_size": 1,
|
| 138 |
+
"trial_name": null,
|
| 139 |
+
"trial_params": null
|
| 140 |
+
}
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:365692a5df7a5f517f6d2b540f561a5aa7178f9b294306095773e407b50e98be
|
| 3 |
+
size 101144758
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:38d28ad102cbb6a4af3145e8b1f023693d94031be4a514a4c915c11c0cc07856
|
| 3 |
+
size 101144758
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05365c6abedcd079fe9f82c191045bdc8f6f2bd5c2b410a7e94e400d0729f563
|
| 3 |
+
size 101144758
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c1d112a7e4976617fba84c4319e3eb552b4db4e78a171b0861e464e6c7441a76
|
| 3 |
+
size 101144758
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b036b5b1d8fce19fc85e292d1da4827c59b84c7edfaffa8bd2113d7c054ad7e
|
| 3 |
+
size 101142730
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f9d085e9128c04ee7a517c1b233f482fcd4f83c364784613e318a238e486aba5
|
| 3 |
+
size 101144758
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d41567f2df3edc1d2bc4cc4d2160af34a30a3ed858ec87afa3c59596c30d657
|
| 3 |
+
size 101142730
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4158b2b5ccb23a440e32fe44fac7806d26addab849ea06cf2bf0c280f39abf5c
|
| 3 |
+
size 101142730
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_trainer_state.json
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 28,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.07142857142857142,
|
| 13 |
+
"grad_norm": 0.15419495105743408,
|
| 14 |
+
"learning_rate": 0.0003,
|
| 15 |
+
"loss": 1.3116,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.14285714285714285,
|
| 20 |
+
"grad_norm": 0.23437140882015228,
|
| 21 |
+
"learning_rate": 0.0003,
|
| 22 |
+
"loss": 1.2667,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.21428571428571427,
|
| 27 |
+
"grad_norm": 0.20211642980575562,
|
| 28 |
+
"learning_rate": 0.0003,
|
| 29 |
+
"loss": 2.137,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.2857142857142857,
|
| 34 |
+
"grad_norm": 0.14548730850219727,
|
| 35 |
+
"learning_rate": 0.0003,
|
| 36 |
+
"loss": 1.8417,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.35714285714285715,
|
| 41 |
+
"grad_norm": 0.25465503334999084,
|
| 42 |
+
"learning_rate": 0.0003,
|
| 43 |
+
"loss": 1.6129,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.42857142857142855,
|
| 48 |
+
"grad_norm": 0.19803431630134583,
|
| 49 |
+
"learning_rate": 0.0003,
|
| 50 |
+
"loss": 1.6624,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.5,
|
| 55 |
+
"grad_norm": 0.19455000758171082,
|
| 56 |
+
"learning_rate": 0.0003,
|
| 57 |
+
"loss": 1.7543,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.5714285714285714,
|
| 62 |
+
"grad_norm": 0.2205786108970642,
|
| 63 |
+
"learning_rate": 0.0003,
|
| 64 |
+
"loss": 2.2022,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.6428571428571429,
|
| 69 |
+
"grad_norm": 0.16035087406635284,
|
| 70 |
+
"learning_rate": 0.0003,
|
| 71 |
+
"loss": 0.672,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.7142857142857143,
|
| 76 |
+
"grad_norm": 0.1775389015674591,
|
| 77 |
+
"learning_rate": 0.0003,
|
| 78 |
+
"loss": 1.5621,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.7857142857142857,
|
| 83 |
+
"grad_norm": 0.09709464013576508,
|
| 84 |
+
"learning_rate": 0.0003,
|
| 85 |
+
"loss": 0.746,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.8571428571428571,
|
| 90 |
+
"grad_norm": 0.16427133977413177,
|
| 91 |
+
"learning_rate": 0.0003,
|
| 92 |
+
"loss": 0.9581,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.9285714285714286,
|
| 97 |
+
"grad_norm": 0.10141786187887192,
|
| 98 |
+
"learning_rate": 0.0003,
|
| 99 |
+
"loss": 1.9874,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 1.0,
|
| 104 |
+
"grad_norm": 0.11148026585578918,
|
| 105 |
+
"learning_rate": 0.0003,
|
| 106 |
+
"loss": 1.6103,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.0,
|
| 111 |
+
"step": 28,
|
| 112 |
+
"total_flos": 369994189766656.0,
|
| 113 |
+
"train_loss": 1.5231994943959373,
|
| 114 |
+
"train_runtime": 89.8548,
|
| 115 |
+
"train_samples_per_second": 1.246,
|
| 116 |
+
"train_steps_per_second": 0.312
|
| 117 |
+
}
|
| 118 |
+
],
|
| 119 |
+
"logging_steps": 2,
|
| 120 |
+
"max_steps": 28,
|
| 121 |
+
"num_input_tokens_seen": 0,
|
| 122 |
+
"num_train_epochs": 1,
|
| 123 |
+
"save_steps": 500,
|
| 124 |
+
"stateful_callbacks": {
|
| 125 |
+
"TrainerControl": {
|
| 126 |
+
"args": {
|
| 127 |
+
"should_epoch_stop": false,
|
| 128 |
+
"should_evaluate": false,
|
| 129 |
+
"should_log": false,
|
| 130 |
+
"should_save": false,
|
| 131 |
+
"should_training_stop": false
|
| 132 |
+
},
|
| 133 |
+
"attributes": {}
|
| 134 |
+
}
|
| 135 |
+
},
|
| 136 |
+
"total_flos": 369994189766656.0,
|
| 137 |
+
"train_batch_size": 1,
|
| 138 |
+
"trial_name": null,
|
| 139 |
+
"trial_params": null
|
| 140 |
+
}
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ec017853bfc82083346ec646beb8037506655fae566da6e2128f7f710687370c
|
| 3 |
+
size 101144758
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:65f89ea8ffe86067b55dc44f4ec34ca6609878e83159f69e8f07bef05b8d40f8
|
| 3 |
+
size 101144758
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:940e20a3849db838fc6fefb2b889b0ef67af3d44914eeb8745f39af0a373fa34
|
| 3 |
+
size 101144758
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cde0f62bc84cc014a3c3ba34cd292e976c15062f063e8cd5eb544507652e20cb
|
| 3 |
+
size 101144758
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:48e92cab729623f0c3e76bbdf0032fca52dd7cf774a41d4290fc9e3959f025f7
|
| 3 |
+
size 101142730
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a71f2543ca714c98b5a64959a6826814dd99810e0b6690c811cbba245422d18f
|
| 3 |
+
size 101144758
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c5955730454acffaa3d24ef886d967bac079c37b788581fea19ea21c19d246b
|
| 3 |
+
size 101142730
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8640bf9b40ae6a0e7661df0676607ce7d8b4fe37a839ff601b34f733564763af
|
| 3 |
+
size 101142730
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_trainer_state.json
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 28,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.07142857142857142,
|
| 13 |
+
"grad_norm": 0.13895072042942047,
|
| 14 |
+
"learning_rate": 0.0003,
|
| 15 |
+
"loss": 1.3731,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.14285714285714285,
|
| 20 |
+
"grad_norm": 0.1320711225271225,
|
| 21 |
+
"learning_rate": 0.0003,
|
| 22 |
+
"loss": 1.5479,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.21428571428571427,
|
| 27 |
+
"grad_norm": 0.16148094832897186,
|
| 28 |
+
"learning_rate": 0.0003,
|
| 29 |
+
"loss": 1.7009,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.2857142857142857,
|
| 34 |
+
"grad_norm": 0.12018702924251556,
|
| 35 |
+
"learning_rate": 0.0003,
|
| 36 |
+
"loss": 2.1986,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.35714285714285715,
|
| 41 |
+
"grad_norm": 0.20602719485759735,
|
| 42 |
+
"learning_rate": 0.0003,
|
| 43 |
+
"loss": 1.4721,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.42857142857142855,
|
| 48 |
+
"grad_norm": 0.2730118930339813,
|
| 49 |
+
"learning_rate": 0.0003,
|
| 50 |
+
"loss": 1.4336,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.5,
|
| 55 |
+
"grad_norm": 0.17492128908634186,
|
| 56 |
+
"learning_rate": 0.0003,
|
| 57 |
+
"loss": 1.6112,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.5714285714285714,
|
| 62 |
+
"grad_norm": 0.10725309699773788,
|
| 63 |
+
"learning_rate": 0.0003,
|
| 64 |
+
"loss": 1.4441,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.6428571428571429,
|
| 69 |
+
"grad_norm": 0.25711339712142944,
|
| 70 |
+
"learning_rate": 0.0003,
|
| 71 |
+
"loss": 1.8929,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.7142857142857143,
|
| 76 |
+
"grad_norm": 0.31338798999786377,
|
| 77 |
+
"learning_rate": 0.0003,
|
| 78 |
+
"loss": 1.4014,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.7857142857142857,
|
| 83 |
+
"grad_norm": 0.12460747361183167,
|
| 84 |
+
"learning_rate": 0.0003,
|
| 85 |
+
"loss": 1.2827,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.8571428571428571,
|
| 90 |
+
"grad_norm": 0.21422362327575684,
|
| 91 |
+
"learning_rate": 0.0003,
|
| 92 |
+
"loss": 1.4855,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.9285714285714286,
|
| 97 |
+
"grad_norm": 0.14184945821762085,
|
| 98 |
+
"learning_rate": 0.0003,
|
| 99 |
+
"loss": 1.8764,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 1.0,
|
| 104 |
+
"grad_norm": 0.21373428404331207,
|
| 105 |
+
"learning_rate": 0.0003,
|
| 106 |
+
"loss": 0.9664,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.0,
|
| 111 |
+
"step": 28,
|
| 112 |
+
"total_flos": 484500628832256.0,
|
| 113 |
+
"train_loss": 1.5490526471819197,
|
| 114 |
+
"train_runtime": 87.5761,
|
| 115 |
+
"train_samples_per_second": 1.279,
|
| 116 |
+
"train_steps_per_second": 0.32
|
| 117 |
+
}
|
| 118 |
+
],
|
| 119 |
+
"logging_steps": 2,
|
| 120 |
+
"max_steps": 28,
|
| 121 |
+
"num_input_tokens_seen": 0,
|
| 122 |
+
"num_train_epochs": 1,
|
| 123 |
+
"save_steps": 500,
|
| 124 |
+
"stateful_callbacks": {
|
| 125 |
+
"TrainerControl": {
|
| 126 |
+
"args": {
|
| 127 |
+
"should_epoch_stop": false,
|
| 128 |
+
"should_evaluate": false,
|
| 129 |
+
"should_log": false,
|
| 130 |
+
"should_save": false,
|
| 131 |
+
"should_training_stop": false
|
| 132 |
+
},
|
| 133 |
+
"attributes": {}
|
| 134 |
+
}
|
| 135 |
+
},
|
| 136 |
+
"total_flos": 484500628832256.0,
|
| 137 |
+
"train_batch_size": 1,
|
| 138 |
+
"trial_name": null,
|
| 139 |
+
"trial_params": null
|
| 140 |
+
}
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1a50843b43cfa32c95b889d281f570537c1378f05a990609a634a2601995f26
|
| 3 |
+
size 101144758
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1498f3ef8326bd77e6c944aabe9753ffd7e68263abb0c5f105575b359d8a4efa
|
| 3 |
+
size 101144758
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fd5490baa537e5adb313ff8354fbb07f32fd0c13557f86d8b55cbda4f4151b61
|
| 3 |
+
size 101144758
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e126389a4e2fad97b2f79a7beab0d89d2c2cab8be6f46e7e15f922f0d3cf5ccb
|
| 3 |
+
size 101144758
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e04d4cba79c1a0928bca1dd3da3030b8cfa33745ba4f2c916e153c48e96aeee6
|
| 3 |
+
size 101142730
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:088024c945e45ce17ea0a8342822e667be14ec5b31286804ef64223b1afa92b4
|
| 3 |
+
size 101144758
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e29fd3548d52bd7a6243f8ddedc9100363291be1d6814e22909d6f62eaf0e141
|
| 3 |
+
size 101142730
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8f419a707c0e21f8b6173e3073f9d9cd77cf775eda36e0b454cb713c1fde1648
|
| 3 |
+
size 101142730
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_trainer_state.json
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 28,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.07142857142857142,
|
| 13 |
+
"grad_norm": 0.07443733513355255,
|
| 14 |
+
"learning_rate": 0.0003,
|
| 15 |
+
"loss": 0.8302,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.14285714285714285,
|
| 20 |
+
"grad_norm": 0.10261357575654984,
|
| 21 |
+
"learning_rate": 0.0003,
|
| 22 |
+
"loss": 0.724,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.21428571428571427,
|
| 27 |
+
"grad_norm": 0.07151076197624207,
|
| 28 |
+
"learning_rate": 0.0003,
|
| 29 |
+
"loss": 0.6346,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.2857142857142857,
|
| 34 |
+
"grad_norm": 0.04990497976541519,
|
| 35 |
+
"learning_rate": 0.0003,
|
| 36 |
+
"loss": 0.5856,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.35714285714285715,
|
| 41 |
+
"grad_norm": 0.06881256401538849,
|
| 42 |
+
"learning_rate": 0.0003,
|
| 43 |
+
"loss": 0.6444,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.42857142857142855,
|
| 48 |
+
"grad_norm": 0.07826367020606995,
|
| 49 |
+
"learning_rate": 0.0003,
|
| 50 |
+
"loss": 0.6547,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.5,
|
| 55 |
+
"grad_norm": 0.08683010190725327,
|
| 56 |
+
"learning_rate": 0.0003,
|
| 57 |
+
"loss": 0.8423,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.5714285714285714,
|
| 62 |
+
"grad_norm": 0.132412850856781,
|
| 63 |
+
"learning_rate": 0.0003,
|
| 64 |
+
"loss": 0.6997,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.6428571428571429,
|
| 69 |
+
"grad_norm": 0.05028389021754265,
|
| 70 |
+
"learning_rate": 0.0003,
|
| 71 |
+
"loss": 0.7901,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.7142857142857143,
|
| 76 |
+
"grad_norm": 0.07787914574146271,
|
| 77 |
+
"learning_rate": 0.0003,
|
| 78 |
+
"loss": 0.6354,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.7857142857142857,
|
| 83 |
+
"grad_norm": 0.0758640393614769,
|
| 84 |
+
"learning_rate": 0.0003,
|
| 85 |
+
"loss": 0.5299,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.8571428571428571,
|
| 90 |
+
"grad_norm": 0.05951598286628723,
|
| 91 |
+
"learning_rate": 0.0003,
|
| 92 |
+
"loss": 0.6796,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.9285714285714286,
|
| 97 |
+
"grad_norm": 0.09814899414777756,
|
| 98 |
+
"learning_rate": 0.0003,
|
| 99 |
+
"loss": 0.6833,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 1.0,
|
| 104 |
+
"grad_norm": 0.13053660094738007,
|
| 105 |
+
"learning_rate": 0.0003,
|
| 106 |
+
"loss": 0.6409,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.0,
|
| 111 |
+
"step": 28,
|
| 112 |
+
"total_flos": 1827799543316480.0,
|
| 113 |
+
"train_loss": 0.683897099324635,
|
| 114 |
+
"train_runtime": 99.1076,
|
| 115 |
+
"train_samples_per_second": 1.13,
|
| 116 |
+
"train_steps_per_second": 0.283
|
| 117 |
+
}
|
| 118 |
+
],
|
| 119 |
+
"logging_steps": 2,
|
| 120 |
+
"max_steps": 28,
|
| 121 |
+
"num_input_tokens_seen": 0,
|
| 122 |
+
"num_train_epochs": 1,
|
| 123 |
+
"save_steps": 500,
|
| 124 |
+
"stateful_callbacks": {
|
| 125 |
+
"TrainerControl": {
|
| 126 |
+
"args": {
|
| 127 |
+
"should_epoch_stop": false,
|
| 128 |
+
"should_evaluate": false,
|
| 129 |
+
"should_log": false,
|
| 130 |
+
"should_save": false,
|
| 131 |
+
"should_training_stop": false
|
| 132 |
+
},
|
| 133 |
+
"attributes": {}
|
| 134 |
+
}
|
| 135 |
+
},
|
| 136 |
+
"total_flos": 1827799543316480.0,
|
| 137 |
+
"train_batch_size": 1,
|
| 138 |
+
"trial_name": null,
|
| 139 |
+
"trial_params": null
|
| 140 |
+
}
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ba3d9a1649ad6659d64dbe2cfcae0e85e07340e521b91eb687dd9f1d0cb62048
|
| 3 |
+
size 173791494
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf8252f3e48671e3bb988fdf1440acbdadceb5badcf2c4144ad14206b69175ba
|
| 3 |
+
size 173791494
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fd5705bc2a60c227a8f0ce710d16aded35659350d27826812504b1ec575881c7
|
| 3 |
+
size 173791494
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a6da18ea1081bb711e9f466f09dc9e759511440c35c117990a70414e6a8eaa1
|
| 3 |
+
size 173791494
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a30a2fb04b8ba09d0fbb4a5b695ba51f9aae6090980bdc09ad1ca94807fd26c3
|
| 3 |
+
size 173789186
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4eeead0db8c4e421fe05044c67cd05abcd31e67295a3aa4a6f8838e1f7b56670
|
| 3 |
+
size 173791494
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:10f35eaef490310ee467b19f18073894613791f9d6c373c67643f3cd487ab114
|
| 3 |
+
size 173789186
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67dc838027f600189b50fb161dcaacc1e5aaad3366eaa97b1588b34c4e0a7c9d
|
| 3 |
+
size 173789186
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_trainer_state.json
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 28,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.07142857142857142,
|
| 13 |
+
"grad_norm": 0.19960319995880127,
|
| 14 |
+
"learning_rate": 0.0003,
|
| 15 |
+
"loss": 0.8145,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.14285714285714285,
|
| 20 |
+
"grad_norm": 0.20900875329971313,
|
| 21 |
+
"learning_rate": 0.0003,
|
| 22 |
+
"loss": 1.0395,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.21428571428571427,
|
| 27 |
+
"grad_norm": 0.08692847937345505,
|
| 28 |
+
"learning_rate": 0.0003,
|
| 29 |
+
"loss": 0.5146,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.2857142857142857,
|
| 34 |
+
"grad_norm": 0.17795179784297943,
|
| 35 |
+
"learning_rate": 0.0003,
|
| 36 |
+
"loss": 0.7781,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.35714285714285715,
|
| 41 |
+
"grad_norm": 0.19282561540603638,
|
| 42 |
+
"learning_rate": 0.0003,
|
| 43 |
+
"loss": 0.7649,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.42857142857142855,
|
| 48 |
+
"grad_norm": 0.20008526742458344,
|
| 49 |
+
"learning_rate": 0.0003,
|
| 50 |
+
"loss": 0.7036,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.5,
|
| 55 |
+
"grad_norm": 0.24128301441669464,
|
| 56 |
+
"learning_rate": 0.0003,
|
| 57 |
+
"loss": 0.8278,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.5714285714285714,
|
| 62 |
+
"grad_norm": 0.22384898364543915,
|
| 63 |
+
"learning_rate": 0.0003,
|
| 64 |
+
"loss": 0.7634,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.6428571428571429,
|
| 69 |
+
"grad_norm": 0.23955023288726807,
|
| 70 |
+
"learning_rate": 0.0003,
|
| 71 |
+
"loss": 0.6968,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.7142857142857143,
|
| 76 |
+
"grad_norm": 0.29193806648254395,
|
| 77 |
+
"learning_rate": 0.0003,
|
| 78 |
+
"loss": 0.9766,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.7857142857142857,
|
| 83 |
+
"grad_norm": 0.22462812066078186,
|
| 84 |
+
"learning_rate": 0.0003,
|
| 85 |
+
"loss": 0.9611,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.8571428571428571,
|
| 90 |
+
"grad_norm": 0.1233292669057846,
|
| 91 |
+
"learning_rate": 0.0003,
|
| 92 |
+
"loss": 0.6395,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.9285714285714286,
|
| 97 |
+
"grad_norm": 0.33286356925964355,
|
| 98 |
+
"learning_rate": 0.0003,
|
| 99 |
+
"loss": 0.6785,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 1.0,
|
| 104 |
+
"grad_norm": 0.12057407200336456,
|
| 105 |
+
"learning_rate": 0.0003,
|
| 106 |
+
"loss": 0.9113,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.0,
|
| 111 |
+
"step": 28,
|
| 112 |
+
"total_flos": 3492754881511424.0,
|
| 113 |
+
"train_loss": 0.7907240731375558,
|
| 114 |
+
"train_runtime": 114.3445,
|
| 115 |
+
"train_samples_per_second": 0.979,
|
| 116 |
+
"train_steps_per_second": 0.245
|
| 117 |
+
}
|
| 118 |
+
],
|
| 119 |
+
"logging_steps": 2,
|
| 120 |
+
"max_steps": 28,
|
| 121 |
+
"num_input_tokens_seen": 0,
|
| 122 |
+
"num_train_epochs": 1,
|
| 123 |
+
"save_steps": 500,
|
| 124 |
+
"stateful_callbacks": {
|
| 125 |
+
"TrainerControl": {
|
| 126 |
+
"args": {
|
| 127 |
+
"should_epoch_stop": false,
|
| 128 |
+
"should_evaluate": false,
|
| 129 |
+
"should_log": false,
|
| 130 |
+
"should_save": false,
|
| 131 |
+
"should_training_stop": false
|
| 132 |
+
},
|
| 133 |
+
"attributes": {}
|
| 134 |
+
}
|
| 135 |
+
},
|
| 136 |
+
"total_flos": 3492754881511424.0,
|
| 137 |
+
"train_batch_size": 1,
|
| 138 |
+
"trial_name": null,
|
| 139 |
+
"trial_params": null
|
| 140 |
+
}
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/round10_task_vector_local_weights.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0494acc3ffb6b72cea6b5c7134665c0676acc341948cb275d3954643c17da747
|
| 3 |
+
size 167774710
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/round11_task_vector_local_weights.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79d6913d23c5d352da1bf8f0a06a87f8ddb4b77ad12a60398c3f0bc6a47efafd
|
| 3 |
+
size 167774710
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/round12_task_vector_local_weights.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f78908f6c162f4c3bc2dc508e3cb3122a39b0e5e339f005a570abacfd355c50f
|
| 3 |
+
size 167774710
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/round13_task_vector_local_weights.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a047ee61099e3cb76aa26c9b126b5441da39637533cd50581e6e11a2748903c3
|
| 3 |
+
size 167774710
|
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/round14_task_vector_local_weights.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e7ee6a6d6d8105d4d0feac7b0adfc3c131d1a774562b09465c9a4c924054976d
|
| 3 |
+
size 167774710
|