Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round10.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round12.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round15.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round17.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round2.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round20.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round5.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round7.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_trainer_state.json +140 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round10.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round12.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round15.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round17.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round2.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round20.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round5.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round7.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_trainer_state.json +140 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round10.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round12.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round15.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round17.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round2.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round20.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round5.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round7.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_trainer_state.json +140 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round10.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round12.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round15.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round17.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round2.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round20.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round5.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round7.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_trainer_state.json +140 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round10.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round12.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round15.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round17.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round2.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round20.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round5.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round7.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_trainer_state.json +140 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/round10_task_vector_local_weights.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/round11_task_vector_local_weights.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/round12_task_vector_local_weights.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/round13_task_vector_local_weights.pth +3 -0
- client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/round14_task_vector_local_weights.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:836d89b5106466e6f49a6ff432bbd1680eab542acea1a49085dea53907cdcfc3
|
| 3 |
+
size 100664470
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d200058442e79c3d226f440ea1621148ed29ae746a5a819be7d987b9bf30715
|
| 3 |
+
size 100664470
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c57bff0474933d50e539716b8194135bf5428c91884c200604738b38e461b236
|
| 3 |
+
size 100664470
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:88374ae4388dccc64cd6ba9efc570db595d8785caa80b281f7584857956ebfb3
|
| 3 |
+
size 100664470
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a053d7581327fb439a80a1722221f5a44b54cec874f11c565ec9bb228bc8caf
|
| 3 |
+
size 100663226
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:14b48f3f4d17f62893fc65e44be8008eab062b97f50346a825a6fc7cd3ec0c94
|
| 3 |
+
size 100664470
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6ff080ff39b8b37fb652f5d047e3444a9fe236bab99c1fffd48443b6855a2e62
|
| 3 |
+
size 100663226
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81902f2625b8c03d84845881483180b24eed38caaa7c9c03844827667e6f6af4
|
| 3 |
+
size 100663226
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_trainer_state.json
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 29,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.06896551724137931,
|
| 13 |
+
"grad_norm": 0.10964272171258926,
|
| 14 |
+
"learning_rate": 0.0003,
|
| 15 |
+
"loss": 0.4217,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.13793103448275862,
|
| 20 |
+
"grad_norm": 0.12825018167495728,
|
| 21 |
+
"learning_rate": 0.0003,
|
| 22 |
+
"loss": 0.5373,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.20689655172413793,
|
| 27 |
+
"grad_norm": 0.09695617854595184,
|
| 28 |
+
"learning_rate": 0.0003,
|
| 29 |
+
"loss": 0.4761,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.27586206896551724,
|
| 34 |
+
"grad_norm": 0.11076370626688004,
|
| 35 |
+
"learning_rate": 0.0003,
|
| 36 |
+
"loss": 0.6261,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.3448275862068966,
|
| 41 |
+
"grad_norm": 0.1661299616098404,
|
| 42 |
+
"learning_rate": 0.0003,
|
| 43 |
+
"loss": 0.6359,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.41379310344827586,
|
| 48 |
+
"grad_norm": 0.08879231661558151,
|
| 49 |
+
"learning_rate": 0.0003,
|
| 50 |
+
"loss": 0.5287,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.4827586206896552,
|
| 55 |
+
"grad_norm": 0.10452059656381607,
|
| 56 |
+
"learning_rate": 0.0003,
|
| 57 |
+
"loss": 0.4933,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.5517241379310345,
|
| 62 |
+
"grad_norm": 0.13890604674816132,
|
| 63 |
+
"learning_rate": 0.0003,
|
| 64 |
+
"loss": 0.6004,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.6206896551724138,
|
| 69 |
+
"grad_norm": 0.1287703961133957,
|
| 70 |
+
"learning_rate": 0.0003,
|
| 71 |
+
"loss": 0.4594,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.6896551724137931,
|
| 76 |
+
"grad_norm": 0.1211370974779129,
|
| 77 |
+
"learning_rate": 0.0003,
|
| 78 |
+
"loss": 0.6454,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.7586206896551724,
|
| 83 |
+
"grad_norm": 0.08164095133543015,
|
| 84 |
+
"learning_rate": 0.0003,
|
| 85 |
+
"loss": 0.6281,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.8275862068965517,
|
| 90 |
+
"grad_norm": 0.09634097665548325,
|
| 91 |
+
"learning_rate": 0.0003,
|
| 92 |
+
"loss": 0.5273,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.896551724137931,
|
| 97 |
+
"grad_norm": 0.08703736215829849,
|
| 98 |
+
"learning_rate": 0.0003,
|
| 99 |
+
"loss": 0.4851,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.9655172413793104,
|
| 104 |
+
"grad_norm": 0.10768512636423111,
|
| 105 |
+
"learning_rate": 0.0003,
|
| 106 |
+
"loss": 0.6181,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.0,
|
| 111 |
+
"step": 29,
|
| 112 |
+
"total_flos": 403873226293248.0,
|
| 113 |
+
"train_loss": 0.5474604059909952,
|
| 114 |
+
"train_runtime": 146.486,
|
| 115 |
+
"train_samples_per_second": 0.792,
|
| 116 |
+
"train_steps_per_second": 0.198
|
| 117 |
+
}
|
| 118 |
+
],
|
| 119 |
+
"logging_steps": 2,
|
| 120 |
+
"max_steps": 29,
|
| 121 |
+
"num_input_tokens_seen": 0,
|
| 122 |
+
"num_train_epochs": 1,
|
| 123 |
+
"save_steps": 500,
|
| 124 |
+
"stateful_callbacks": {
|
| 125 |
+
"TrainerControl": {
|
| 126 |
+
"args": {
|
| 127 |
+
"should_epoch_stop": false,
|
| 128 |
+
"should_evaluate": false,
|
| 129 |
+
"should_log": false,
|
| 130 |
+
"should_save": false,
|
| 131 |
+
"should_training_stop": false
|
| 132 |
+
},
|
| 133 |
+
"attributes": {}
|
| 134 |
+
}
|
| 135 |
+
},
|
| 136 |
+
"total_flos": 403873226293248.0,
|
| 137 |
+
"train_batch_size": 1,
|
| 138 |
+
"trial_name": null,
|
| 139 |
+
"trial_params": null
|
| 140 |
+
}
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e935aeec4ee260e84bb140e5ac4a8c6330e365fecd25d6f661325cbb9dddd5b7
|
| 3 |
+
size 100664470
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b11589874b0923d856baf6292291fdc7914b9185b84aa0910080a52a9306bcfc
|
| 3 |
+
size 100664470
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:99cf8ac844ba96cb3df2d7be33648bfddf54f360ed63c561b1c1f55c548046fe
|
| 3 |
+
size 100664470
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2418db45115828074ab36cafc2ad7cbe61a903ee75c51f52a11d1ceddabc3dd0
|
| 3 |
+
size 100664470
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79bbc99a1a4f5e88ace6945502b85240419d92aff6bb23e47736d3a65dc14580
|
| 3 |
+
size 100663226
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25515d1a35c48b353a7cd514feda4d0e0c771313fc772ef97d6b6a7fc91cc2fd
|
| 3 |
+
size 100664470
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:342e21f1ce5cd68276ffe3cefcf06d1709d4e880d484a85c7f482f4ba14f6faf
|
| 3 |
+
size 100663226
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5f41668931adc3447813fb03c42ccf048764e7bebd97c98f0c445b732aa4daeb
|
| 3 |
+
size 100663226
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_trainer_state.json
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 29,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.06896551724137931,
|
| 13 |
+
"grad_norm": 0.12822631001472473,
|
| 14 |
+
"learning_rate": 0.0003,
|
| 15 |
+
"loss": 2.0608,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.13793103448275862,
|
| 20 |
+
"grad_norm": 0.22433912754058838,
|
| 21 |
+
"learning_rate": 0.0003,
|
| 22 |
+
"loss": 2.0027,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.20689655172413793,
|
| 27 |
+
"grad_norm": 0.11672133207321167,
|
| 28 |
+
"learning_rate": 0.0003,
|
| 29 |
+
"loss": 1.8721,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.27586206896551724,
|
| 34 |
+
"grad_norm": 0.14917072653770447,
|
| 35 |
+
"learning_rate": 0.0003,
|
| 36 |
+
"loss": 1.6856,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.3448275862068966,
|
| 41 |
+
"grad_norm": 0.29816752672195435,
|
| 42 |
+
"learning_rate": 0.0003,
|
| 43 |
+
"loss": 1.6888,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.41379310344827586,
|
| 48 |
+
"grad_norm": 0.2757134437561035,
|
| 49 |
+
"learning_rate": 0.0003,
|
| 50 |
+
"loss": 1.6053,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.4827586206896552,
|
| 55 |
+
"grad_norm": 0.19783510267734528,
|
| 56 |
+
"learning_rate": 0.0003,
|
| 57 |
+
"loss": 1.2848,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.5517241379310345,
|
| 62 |
+
"grad_norm": 0.13258986175060272,
|
| 63 |
+
"learning_rate": 0.0003,
|
| 64 |
+
"loss": 1.8308,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.6206896551724138,
|
| 69 |
+
"grad_norm": 0.21041615307331085,
|
| 70 |
+
"learning_rate": 0.0003,
|
| 71 |
+
"loss": 2.1478,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.6896551724137931,
|
| 76 |
+
"grad_norm": 0.14201298356056213,
|
| 77 |
+
"learning_rate": 0.0003,
|
| 78 |
+
"loss": 1.5266,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.7586206896551724,
|
| 83 |
+
"grad_norm": 0.15158437192440033,
|
| 84 |
+
"learning_rate": 0.0003,
|
| 85 |
+
"loss": 1.7609,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.8275862068965517,
|
| 90 |
+
"grad_norm": 0.1517992615699768,
|
| 91 |
+
"learning_rate": 0.0003,
|
| 92 |
+
"loss": 1.166,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.896551724137931,
|
| 97 |
+
"grad_norm": 0.12816612422466278,
|
| 98 |
+
"learning_rate": 0.0003,
|
| 99 |
+
"loss": 1.6937,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.9655172413793104,
|
| 104 |
+
"grad_norm": 0.1202758178114891,
|
| 105 |
+
"learning_rate": 0.0003,
|
| 106 |
+
"loss": 1.6313,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.0,
|
| 111 |
+
"step": 29,
|
| 112 |
+
"total_flos": 281074283315200.0,
|
| 113 |
+
"train_loss": 1.690290927886963,
|
| 114 |
+
"train_runtime": 145.7094,
|
| 115 |
+
"train_samples_per_second": 0.796,
|
| 116 |
+
"train_steps_per_second": 0.199
|
| 117 |
+
}
|
| 118 |
+
],
|
| 119 |
+
"logging_steps": 2,
|
| 120 |
+
"max_steps": 29,
|
| 121 |
+
"num_input_tokens_seen": 0,
|
| 122 |
+
"num_train_epochs": 1,
|
| 123 |
+
"save_steps": 500,
|
| 124 |
+
"stateful_callbacks": {
|
| 125 |
+
"TrainerControl": {
|
| 126 |
+
"args": {
|
| 127 |
+
"should_epoch_stop": false,
|
| 128 |
+
"should_evaluate": false,
|
| 129 |
+
"should_log": false,
|
| 130 |
+
"should_save": false,
|
| 131 |
+
"should_training_stop": false
|
| 132 |
+
},
|
| 133 |
+
"attributes": {}
|
| 134 |
+
}
|
| 135 |
+
},
|
| 136 |
+
"total_flos": 281074283315200.0,
|
| 137 |
+
"train_batch_size": 1,
|
| 138 |
+
"trial_name": null,
|
| 139 |
+
"trial_params": null
|
| 140 |
+
}
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:52f049546f595aef23f14cd5f0f2900ec7d6ab067f347356989e7665f19f4fba
|
| 3 |
+
size 100664470
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d74ae734337d1de8d4f4b7c907e3825932cc27165ad5a9ace2e65e4c09185a82
|
| 3 |
+
size 100664470
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e432371f1613ba58f90180975df30455c6ecf608502129129e8fbb1543f3b1b
|
| 3 |
+
size 100664470
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:48890b803acd4c2be77dc6bf63ab002be41bd45a52ed5fddfa880eb573d0e8ae
|
| 3 |
+
size 100664470
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68a4e584273dd911af5919a7581f90bdae10154afcb9a2b28a9cd9b13d1722c2
|
| 3 |
+
size 100663226
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1d37a8441064179b19f7969a8ed7cbb0b6843cdb20993aeed62a44a1854b0d1e
|
| 3 |
+
size 100664470
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4569f7868b2f3a29f7be76788b1afbdb0ebbf4d1816ce6d2d81863913e300cc8
|
| 3 |
+
size 100663226
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a1d89e09bc5919a1c61037f4fd932a3dbe05ab36284a16b02024f787bf98d60a
|
| 3 |
+
size 100663226
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_trainer_state.json
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 29,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.06896551724137931,
|
| 13 |
+
"grad_norm": 0.16763809323310852,
|
| 14 |
+
"learning_rate": 0.0003,
|
| 15 |
+
"loss": 2.1451,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.13793103448275862,
|
| 20 |
+
"grad_norm": 0.08457788825035095,
|
| 21 |
+
"learning_rate": 0.0003,
|
| 22 |
+
"loss": 1.489,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.20689655172413793,
|
| 27 |
+
"grad_norm": 0.11339309066534042,
|
| 28 |
+
"learning_rate": 0.0003,
|
| 29 |
+
"loss": 1.9602,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.27586206896551724,
|
| 34 |
+
"grad_norm": 0.14981189370155334,
|
| 35 |
+
"learning_rate": 0.0003,
|
| 36 |
+
"loss": 1.0412,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.3448275862068966,
|
| 41 |
+
"grad_norm": 0.1549920290708542,
|
| 42 |
+
"learning_rate": 0.0003,
|
| 43 |
+
"loss": 1.6355,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.41379310344827586,
|
| 48 |
+
"grad_norm": 0.16407117247581482,
|
| 49 |
+
"learning_rate": 0.0003,
|
| 50 |
+
"loss": 2.054,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.4827586206896552,
|
| 55 |
+
"grad_norm": 0.1566217690706253,
|
| 56 |
+
"learning_rate": 0.0003,
|
| 57 |
+
"loss": 1.7061,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.5517241379310345,
|
| 62 |
+
"grad_norm": 0.10542894154787064,
|
| 63 |
+
"learning_rate": 0.0003,
|
| 64 |
+
"loss": 1.109,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.6206896551724138,
|
| 69 |
+
"grad_norm": 0.20924022793769836,
|
| 70 |
+
"learning_rate": 0.0003,
|
| 71 |
+
"loss": 1.6204,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.6896551724137931,
|
| 76 |
+
"grad_norm": 0.15142425894737244,
|
| 77 |
+
"learning_rate": 0.0003,
|
| 78 |
+
"loss": 1.8941,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.7586206896551724,
|
| 83 |
+
"grad_norm": 0.20191574096679688,
|
| 84 |
+
"learning_rate": 0.0003,
|
| 85 |
+
"loss": 2.1546,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.8275862068965517,
|
| 90 |
+
"grad_norm": 0.21983274817466736,
|
| 91 |
+
"learning_rate": 0.0003,
|
| 92 |
+
"loss": 1.3526,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.896551724137931,
|
| 97 |
+
"grad_norm": 0.07465270161628723,
|
| 98 |
+
"learning_rate": 0.0003,
|
| 99 |
+
"loss": 1.743,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.9655172413793104,
|
| 104 |
+
"grad_norm": 0.1358894258737564,
|
| 105 |
+
"learning_rate": 0.0003,
|
| 106 |
+
"loss": 1.6953,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.0,
|
| 111 |
+
"step": 29,
|
| 112 |
+
"total_flos": 663155598426112.0,
|
| 113 |
+
"train_loss": 1.6605948497509133,
|
| 114 |
+
"train_runtime": 157.2331,
|
| 115 |
+
"train_samples_per_second": 0.738,
|
| 116 |
+
"train_steps_per_second": 0.184
|
| 117 |
+
}
|
| 118 |
+
],
|
| 119 |
+
"logging_steps": 2,
|
| 120 |
+
"max_steps": 29,
|
| 121 |
+
"num_input_tokens_seen": 0,
|
| 122 |
+
"num_train_epochs": 1,
|
| 123 |
+
"save_steps": 500,
|
| 124 |
+
"stateful_callbacks": {
|
| 125 |
+
"TrainerControl": {
|
| 126 |
+
"args": {
|
| 127 |
+
"should_epoch_stop": false,
|
| 128 |
+
"should_evaluate": false,
|
| 129 |
+
"should_log": false,
|
| 130 |
+
"should_save": false,
|
| 131 |
+
"should_training_stop": false
|
| 132 |
+
},
|
| 133 |
+
"attributes": {}
|
| 134 |
+
}
|
| 135 |
+
},
|
| 136 |
+
"total_flos": 663155598426112.0,
|
| 137 |
+
"train_batch_size": 1,
|
| 138 |
+
"trial_name": null,
|
| 139 |
+
"trial_params": null
|
| 140 |
+
}
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:99d8e37420a572ff1550c88254d7d014cba4443f24a713b66a7fafdf71be01ee
|
| 3 |
+
size 100664470
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8b58f08397ab0d541489514a5c83564d2de154eda870f66f490b21ddcb255608
|
| 3 |
+
size 100664470
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ebca7ec994f73e73f89e515ab3cec838ef6c6bbe26ac71ec451411e53eac6038
|
| 3 |
+
size 100664470
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc722336f0b0169bed983c53196967beac1302719207064a5363426ee496d9eb
|
| 3 |
+
size 100664470
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e079c3f8a07b9d74e26e05ed0f17c99b2c16f64338f120fe6409dddfea70152a
|
| 3 |
+
size 100663226
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4ca7b3ef1b0b44b2e1d8e4376bd46717ff5a2721ab91cf0de703f3b48c381c2
|
| 3 |
+
size 100664470
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c232444e6aed9362a4ec1f9435f695cf6ba336c1a53d21ec78deb0ac6c3ee608
|
| 3 |
+
size 100663226
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9936ee087a87971fe9a697c0b4295d6a7eabbee3f5ff7bb6cfa486c8a082f21a
|
| 3 |
+
size 100663226
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_trainer_state.json
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 29,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.06896551724137931,
|
| 13 |
+
"grad_norm": 0.05931968614459038,
|
| 14 |
+
"learning_rate": 0.0003,
|
| 15 |
+
"loss": 0.7324,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.13793103448275862,
|
| 20 |
+
"grad_norm": 0.04922392964363098,
|
| 21 |
+
"learning_rate": 0.0003,
|
| 22 |
+
"loss": 0.8792,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.20689655172413793,
|
| 27 |
+
"grad_norm": 0.11524348706007004,
|
| 28 |
+
"learning_rate": 0.0003,
|
| 29 |
+
"loss": 0.5845,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.27586206896551724,
|
| 34 |
+
"grad_norm": 0.06806690990924835,
|
| 35 |
+
"learning_rate": 0.0003,
|
| 36 |
+
"loss": 0.7545,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.3448275862068966,
|
| 41 |
+
"grad_norm": 0.09595661610364914,
|
| 42 |
+
"learning_rate": 0.0003,
|
| 43 |
+
"loss": 0.755,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.41379310344827586,
|
| 48 |
+
"grad_norm": 0.05159907415509224,
|
| 49 |
+
"learning_rate": 0.0003,
|
| 50 |
+
"loss": 0.5922,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.4827586206896552,
|
| 55 |
+
"grad_norm": 0.07526896893978119,
|
| 56 |
+
"learning_rate": 0.0003,
|
| 57 |
+
"loss": 0.9019,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.5517241379310345,
|
| 62 |
+
"grad_norm": 0.07771413773298264,
|
| 63 |
+
"learning_rate": 0.0003,
|
| 64 |
+
"loss": 0.7862,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.6206896551724138,
|
| 69 |
+
"grad_norm": 0.04285521060228348,
|
| 70 |
+
"learning_rate": 0.0003,
|
| 71 |
+
"loss": 0.8229,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.6896551724137931,
|
| 76 |
+
"grad_norm": 0.15346619486808777,
|
| 77 |
+
"learning_rate": 0.0003,
|
| 78 |
+
"loss": 0.8843,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.7586206896551724,
|
| 83 |
+
"grad_norm": 0.055303364992141724,
|
| 84 |
+
"learning_rate": 0.0003,
|
| 85 |
+
"loss": 0.7054,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.8275862068965517,
|
| 90 |
+
"grad_norm": 0.06509271264076233,
|
| 91 |
+
"learning_rate": 0.0003,
|
| 92 |
+
"loss": 0.6226,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.896551724137931,
|
| 97 |
+
"grad_norm": 0.06718684732913971,
|
| 98 |
+
"learning_rate": 0.0003,
|
| 99 |
+
"loss": 0.6203,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.9655172413793104,
|
| 104 |
+
"grad_norm": 0.10245434939861298,
|
| 105 |
+
"learning_rate": 0.0003,
|
| 106 |
+
"loss": 0.5754,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.0,
|
| 111 |
+
"step": 29,
|
| 112 |
+
"total_flos": 1591652615454720.0,
|
| 113 |
+
"train_loss": 0.7325446379595789,
|
| 114 |
+
"train_runtime": 148.7314,
|
| 115 |
+
"train_samples_per_second": 0.78,
|
| 116 |
+
"train_steps_per_second": 0.195
|
| 117 |
+
}
|
| 118 |
+
],
|
| 119 |
+
"logging_steps": 2,
|
| 120 |
+
"max_steps": 29,
|
| 121 |
+
"num_input_tokens_seen": 0,
|
| 122 |
+
"num_train_epochs": 1,
|
| 123 |
+
"save_steps": 500,
|
| 124 |
+
"stateful_callbacks": {
|
| 125 |
+
"TrainerControl": {
|
| 126 |
+
"args": {
|
| 127 |
+
"should_epoch_stop": false,
|
| 128 |
+
"should_evaluate": false,
|
| 129 |
+
"should_log": false,
|
| 130 |
+
"should_save": false,
|
| 131 |
+
"should_training_stop": false
|
| 132 |
+
},
|
| 133 |
+
"attributes": {}
|
| 134 |
+
}
|
| 135 |
+
},
|
| 136 |
+
"total_flos": 1591652615454720.0,
|
| 137 |
+
"train_batch_size": 1,
|
| 138 |
+
"trial_name": null,
|
| 139 |
+
"trial_params": null
|
| 140 |
+
}
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c74bf68629dfbf8c60f49ae0110c8463cd77206845a7c914476538bcfe9349bd
|
| 3 |
+
size 100664470
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1439a1ba2c9766648ac02a241c189908c96d3247c69ef704723c2480cf5678b0
|
| 3 |
+
size 100664470
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f5bf1290f2b12990e47169e8b9f964b1020c67eb1339016648587e7c9b32b1c
|
| 3 |
+
size 100664470
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:db6902cb7706829f9ec77cfa3264bb7f80381d38d1471f67fdbda3e99ecc7e14
|
| 3 |
+
size 100664470
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f59ef9e88e3229d2ceec37679a496e2ca3215eba76a0e32522ebb4e5d9228843
|
| 3 |
+
size 100663226
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:62f4ac6d530706608b4ebdaddab566459f7a4ac12f5c5605e5d3e6ceed45bc2d
|
| 3 |
+
size 100664470
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8e3c7a8ab6cb955ef1570c99b48aef65202684bb438bcee9f0d46f28697cbec1
|
| 3 |
+
size 100663226
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6efec2037d3806dc4b4fa30d91466b9e9b1aabfe85b5f8711e9faed8d171a5d5
|
| 3 |
+
size 100663226
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_trainer_state.json
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 29,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.06896551724137931,
|
| 13 |
+
"grad_norm": 0.05072196200489998,
|
| 14 |
+
"learning_rate": 0.0003,
|
| 15 |
+
"loss": 0.809,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.13793103448275862,
|
| 20 |
+
"grad_norm": 0.14052243530750275,
|
| 21 |
+
"learning_rate": 0.0003,
|
| 22 |
+
"loss": 0.9376,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.20689655172413793,
|
| 27 |
+
"grad_norm": 0.07366146892309189,
|
| 28 |
+
"learning_rate": 0.0003,
|
| 29 |
+
"loss": 0.9637,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.27586206896551724,
|
| 34 |
+
"grad_norm": 0.06059933826327324,
|
| 35 |
+
"learning_rate": 0.0003,
|
| 36 |
+
"loss": 0.9109,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.3448275862068966,
|
| 41 |
+
"grad_norm": 0.11904361099004745,
|
| 42 |
+
"learning_rate": 0.0003,
|
| 43 |
+
"loss": 0.974,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.41379310344827586,
|
| 48 |
+
"grad_norm": 0.0738854929804802,
|
| 49 |
+
"learning_rate": 0.0003,
|
| 50 |
+
"loss": 0.6471,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.4827586206896552,
|
| 55 |
+
"grad_norm": 0.10854072868824005,
|
| 56 |
+
"learning_rate": 0.0003,
|
| 57 |
+
"loss": 0.8115,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.5517241379310345,
|
| 62 |
+
"grad_norm": 0.08300568163394928,
|
| 63 |
+
"learning_rate": 0.0003,
|
| 64 |
+
"loss": 0.7872,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.6206896551724138,
|
| 69 |
+
"grad_norm": 0.12101076543331146,
|
| 70 |
+
"learning_rate": 0.0003,
|
| 71 |
+
"loss": 0.5953,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.6896551724137931,
|
| 76 |
+
"grad_norm": 0.073908731341362,
|
| 77 |
+
"learning_rate": 0.0003,
|
| 78 |
+
"loss": 0.823,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.7586206896551724,
|
| 83 |
+
"grad_norm": 0.08307291567325592,
|
| 84 |
+
"learning_rate": 0.0003,
|
| 85 |
+
"loss": 0.6549,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.8275862068965517,
|
| 90 |
+
"grad_norm": 0.158330500125885,
|
| 91 |
+
"learning_rate": 0.0003,
|
| 92 |
+
"loss": 0.9645,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.896551724137931,
|
| 97 |
+
"grad_norm": 0.07515610754489899,
|
| 98 |
+
"learning_rate": 0.0003,
|
| 99 |
+
"loss": 1.024,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.9655172413793104,
|
| 104 |
+
"grad_norm": 0.09373172372579575,
|
| 105 |
+
"learning_rate": 0.0003,
|
| 106 |
+
"loss": 1.0713,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.0,
|
| 111 |
+
"step": 29,
|
| 112 |
+
"total_flos": 1501610387701760.0,
|
| 113 |
+
"train_loss": 0.8557169848474963,
|
| 114 |
+
"train_runtime": 150.3635,
|
| 115 |
+
"train_samples_per_second": 0.771,
|
| 116 |
+
"train_steps_per_second": 0.193
|
| 117 |
+
}
|
| 118 |
+
],
|
| 119 |
+
"logging_steps": 2,
|
| 120 |
+
"max_steps": 29,
|
| 121 |
+
"num_input_tokens_seen": 0,
|
| 122 |
+
"num_train_epochs": 1,
|
| 123 |
+
"save_steps": 500,
|
| 124 |
+
"stateful_callbacks": {
|
| 125 |
+
"TrainerControl": {
|
| 126 |
+
"args": {
|
| 127 |
+
"should_epoch_stop": false,
|
| 128 |
+
"should_evaluate": false,
|
| 129 |
+
"should_log": false,
|
| 130 |
+
"should_save": false,
|
| 131 |
+
"should_training_stop": false
|
| 132 |
+
},
|
| 133 |
+
"attributes": {}
|
| 134 |
+
}
|
| 135 |
+
},
|
| 136 |
+
"total_flos": 1501610387701760.0,
|
| 137 |
+
"train_batch_size": 1,
|
| 138 |
+
"trial_name": null,
|
| 139 |
+
"trial_params": null
|
| 140 |
+
}
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/round10_task_vector_local_weights.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8374e17a0faf03d8951e285488b14eef4498017c3e2630509a35cb0e5a67b558
|
| 3 |
+
size 167774710
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/round11_task_vector_local_weights.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f922ab3667d9b5e9c3b1d1ee5e997d17edd373b51db23bee5625d07878158bdb
|
| 3 |
+
size 167774710
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/round12_task_vector_local_weights.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:703338326e88b4e101975ba8d39f9caef66e863e1ee39a6fb1fd3a84dda35b59
|
| 3 |
+
size 167774710
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/round13_task_vector_local_weights.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f70eb91a3e5015c3211e8ceae2f9ba2fded0c59702d2173cdde85a59eab4264f
|
| 3 |
+
size 167774710
|
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/round14_task_vector_local_weights.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4a5c308380010af0b81c1ab4cdacccd5df94bc7c0a8a8449231f8d202c1a778
|
| 3 |
+
size 167774710
|