thkim0305 commited on
Commit
c67198f
·
verified ·
1 Parent(s): 94b43be

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round10.pth +3 -0
  2. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round12.pth +3 -0
  3. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round15.pth +3 -0
  4. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round17.pth +3 -0
  5. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round2.pth +3 -0
  6. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round20.pth +3 -0
  7. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round5.pth +3 -0
  8. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round7.pth +3 -0
  9. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_trainer_state.json +140 -0
  10. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round10.pth +3 -0
  11. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round12.pth +3 -0
  12. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round15.pth +3 -0
  13. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round17.pth +3 -0
  14. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round2.pth +3 -0
  15. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round20.pth +3 -0
  16. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round5.pth +3 -0
  17. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round7.pth +3 -0
  18. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_trainer_state.json +140 -0
  19. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round10.pth +3 -0
  20. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round12.pth +3 -0
  21. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round15.pth +3 -0
  22. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round17.pth +3 -0
  23. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round2.pth +3 -0
  24. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round20.pth +3 -0
  25. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round5.pth +3 -0
  26. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round7.pth +3 -0
  27. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_trainer_state.json +140 -0
  28. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round10.pth +3 -0
  29. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round12.pth +3 -0
  30. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round15.pth +3 -0
  31. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round17.pth +3 -0
  32. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round2.pth +3 -0
  33. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round20.pth +3 -0
  34. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round5.pth +3 -0
  35. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round7.pth +3 -0
  36. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_trainer_state.json +140 -0
  37. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round10.pth +3 -0
  38. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round12.pth +3 -0
  39. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round15.pth +3 -0
  40. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round17.pth +3 -0
  41. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round2.pth +3 -0
  42. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round20.pth +3 -0
  43. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round5.pth +3 -0
  44. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round7.pth +3 -0
  45. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_trainer_state.json +140 -0
  46. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/round10_task_vector_local_weights.pth +3 -0
  47. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/round11_task_vector_local_weights.pth +3 -0
  48. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/round12_task_vector_local_weights.pth +3 -0
  49. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/round13_task_vector_local_weights.pth +3 -0
  50. client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/round14_task_vector_local_weights.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:836d89b5106466e6f49a6ff432bbd1680eab542acea1a49085dea53907cdcfc3
3
+ size 100664470
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d200058442e79c3d226f440ea1621148ed29ae746a5a819be7d987b9bf30715
3
+ size 100664470
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c57bff0474933d50e539716b8194135bf5428c91884c200604738b38e461b236
3
+ size 100664470
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88374ae4388dccc64cd6ba9efc570db595d8785caa80b281f7584857956ebfb3
3
+ size 100664470
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a053d7581327fb439a80a1722221f5a44b54cec874f11c565ec9bb228bc8caf
3
+ size 100663226
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14b48f3f4d17f62893fc65e44be8008eab062b97f50346a825a6fc7cd3ec0c94
3
+ size 100664470
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ff080ff39b8b37fb652f5d047e3444a9fe236bab99c1fffd48443b6855a2e62
3
+ size 100663226
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81902f2625b8c03d84845881483180b24eed38caaa7c9c03844827667e6f6af4
3
+ size 100663226
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_trainer_state.json ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 29,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.06896551724137931,
13
+ "grad_norm": 0.10964272171258926,
14
+ "learning_rate": 0.0003,
15
+ "loss": 0.4217,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.13793103448275862,
20
+ "grad_norm": 0.12825018167495728,
21
+ "learning_rate": 0.0003,
22
+ "loss": 0.5373,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.20689655172413793,
27
+ "grad_norm": 0.09695617854595184,
28
+ "learning_rate": 0.0003,
29
+ "loss": 0.4761,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.27586206896551724,
34
+ "grad_norm": 0.11076370626688004,
35
+ "learning_rate": 0.0003,
36
+ "loss": 0.6261,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.3448275862068966,
41
+ "grad_norm": 0.1661299616098404,
42
+ "learning_rate": 0.0003,
43
+ "loss": 0.6359,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.41379310344827586,
48
+ "grad_norm": 0.08879231661558151,
49
+ "learning_rate": 0.0003,
50
+ "loss": 0.5287,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.4827586206896552,
55
+ "grad_norm": 0.10452059656381607,
56
+ "learning_rate": 0.0003,
57
+ "loss": 0.4933,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.5517241379310345,
62
+ "grad_norm": 0.13890604674816132,
63
+ "learning_rate": 0.0003,
64
+ "loss": 0.6004,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.6206896551724138,
69
+ "grad_norm": 0.1287703961133957,
70
+ "learning_rate": 0.0003,
71
+ "loss": 0.4594,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.6896551724137931,
76
+ "grad_norm": 0.1211370974779129,
77
+ "learning_rate": 0.0003,
78
+ "loss": 0.6454,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.7586206896551724,
83
+ "grad_norm": 0.08164095133543015,
84
+ "learning_rate": 0.0003,
85
+ "loss": 0.6281,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.8275862068965517,
90
+ "grad_norm": 0.09634097665548325,
91
+ "learning_rate": 0.0003,
92
+ "loss": 0.5273,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.896551724137931,
97
+ "grad_norm": 0.08703736215829849,
98
+ "learning_rate": 0.0003,
99
+ "loss": 0.4851,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.9655172413793104,
104
+ "grad_norm": 0.10768512636423111,
105
+ "learning_rate": 0.0003,
106
+ "loss": 0.6181,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.0,
111
+ "step": 29,
112
+ "total_flos": 403873226293248.0,
113
+ "train_loss": 0.5474604059909952,
114
+ "train_runtime": 146.486,
115
+ "train_samples_per_second": 0.792,
116
+ "train_steps_per_second": 0.198
117
+ }
118
+ ],
119
+ "logging_steps": 2,
120
+ "max_steps": 29,
121
+ "num_input_tokens_seen": 0,
122
+ "num_train_epochs": 1,
123
+ "save_steps": 500,
124
+ "stateful_callbacks": {
125
+ "TrainerControl": {
126
+ "args": {
127
+ "should_epoch_stop": false,
128
+ "should_evaluate": false,
129
+ "should_log": false,
130
+ "should_save": false,
131
+ "should_training_stop": false
132
+ },
133
+ "attributes": {}
134
+ }
135
+ },
136
+ "total_flos": 403873226293248.0,
137
+ "train_batch_size": 1,
138
+ "trial_name": null,
139
+ "trial_params": null
140
+ }
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e935aeec4ee260e84bb140e5ac4a8c6330e365fecd25d6f661325cbb9dddd5b7
3
+ size 100664470
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b11589874b0923d856baf6292291fdc7914b9185b84aa0910080a52a9306bcfc
3
+ size 100664470
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99cf8ac844ba96cb3df2d7be33648bfddf54f360ed63c561b1c1f55c548046fe
3
+ size 100664470
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2418db45115828074ab36cafc2ad7cbe61a903ee75c51f52a11d1ceddabc3dd0
3
+ size 100664470
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79bbc99a1a4f5e88ace6945502b85240419d92aff6bb23e47736d3a65dc14580
3
+ size 100663226
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25515d1a35c48b353a7cd514feda4d0e0c771313fc772ef97d6b6a7fc91cc2fd
3
+ size 100664470
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:342e21f1ce5cd68276ffe3cefcf06d1709d4e880d484a85c7f482f4ba14f6faf
3
+ size 100663226
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f41668931adc3447813fb03c42ccf048764e7bebd97c98f0c445b732aa4daeb
3
+ size 100663226
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_trainer_state.json ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 29,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.06896551724137931,
13
+ "grad_norm": 0.12822631001472473,
14
+ "learning_rate": 0.0003,
15
+ "loss": 2.0608,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.13793103448275862,
20
+ "grad_norm": 0.22433912754058838,
21
+ "learning_rate": 0.0003,
22
+ "loss": 2.0027,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.20689655172413793,
27
+ "grad_norm": 0.11672133207321167,
28
+ "learning_rate": 0.0003,
29
+ "loss": 1.8721,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.27586206896551724,
34
+ "grad_norm": 0.14917072653770447,
35
+ "learning_rate": 0.0003,
36
+ "loss": 1.6856,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.3448275862068966,
41
+ "grad_norm": 0.29816752672195435,
42
+ "learning_rate": 0.0003,
43
+ "loss": 1.6888,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.41379310344827586,
48
+ "grad_norm": 0.2757134437561035,
49
+ "learning_rate": 0.0003,
50
+ "loss": 1.6053,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.4827586206896552,
55
+ "grad_norm": 0.19783510267734528,
56
+ "learning_rate": 0.0003,
57
+ "loss": 1.2848,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.5517241379310345,
62
+ "grad_norm": 0.13258986175060272,
63
+ "learning_rate": 0.0003,
64
+ "loss": 1.8308,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.6206896551724138,
69
+ "grad_norm": 0.21041615307331085,
70
+ "learning_rate": 0.0003,
71
+ "loss": 2.1478,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.6896551724137931,
76
+ "grad_norm": 0.14201298356056213,
77
+ "learning_rate": 0.0003,
78
+ "loss": 1.5266,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.7586206896551724,
83
+ "grad_norm": 0.15158437192440033,
84
+ "learning_rate": 0.0003,
85
+ "loss": 1.7609,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.8275862068965517,
90
+ "grad_norm": 0.1517992615699768,
91
+ "learning_rate": 0.0003,
92
+ "loss": 1.166,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.896551724137931,
97
+ "grad_norm": 0.12816612422466278,
98
+ "learning_rate": 0.0003,
99
+ "loss": 1.6937,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.9655172413793104,
104
+ "grad_norm": 0.1202758178114891,
105
+ "learning_rate": 0.0003,
106
+ "loss": 1.6313,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.0,
111
+ "step": 29,
112
+ "total_flos": 281074283315200.0,
113
+ "train_loss": 1.690290927886963,
114
+ "train_runtime": 145.7094,
115
+ "train_samples_per_second": 0.796,
116
+ "train_steps_per_second": 0.199
117
+ }
118
+ ],
119
+ "logging_steps": 2,
120
+ "max_steps": 29,
121
+ "num_input_tokens_seen": 0,
122
+ "num_train_epochs": 1,
123
+ "save_steps": 500,
124
+ "stateful_callbacks": {
125
+ "TrainerControl": {
126
+ "args": {
127
+ "should_epoch_stop": false,
128
+ "should_evaluate": false,
129
+ "should_log": false,
130
+ "should_save": false,
131
+ "should_training_stop": false
132
+ },
133
+ "attributes": {}
134
+ }
135
+ },
136
+ "total_flos": 281074283315200.0,
137
+ "train_batch_size": 1,
138
+ "trial_name": null,
139
+ "trial_params": null
140
+ }
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52f049546f595aef23f14cd5f0f2900ec7d6ab067f347356989e7665f19f4fba
3
+ size 100664470
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d74ae734337d1de8d4f4b7c907e3825932cc27165ad5a9ace2e65e4c09185a82
3
+ size 100664470
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e432371f1613ba58f90180975df30455c6ecf608502129129e8fbb1543f3b1b
3
+ size 100664470
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48890b803acd4c2be77dc6bf63ab002be41bd45a52ed5fddfa880eb573d0e8ae
3
+ size 100664470
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68a4e584273dd911af5919a7581f90bdae10154afcb9a2b28a9cd9b13d1722c2
3
+ size 100663226
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d37a8441064179b19f7969a8ed7cbb0b6843cdb20993aeed62a44a1854b0d1e
3
+ size 100664470
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4569f7868b2f3a29f7be76788b1afbdb0ebbf4d1816ce6d2d81863913e300cc8
3
+ size 100663226
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1d89e09bc5919a1c61037f4fd932a3dbe05ab36284a16b02024f787bf98d60a
3
+ size 100663226
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_trainer_state.json ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 29,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.06896551724137931,
13
+ "grad_norm": 0.16763809323310852,
14
+ "learning_rate": 0.0003,
15
+ "loss": 2.1451,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.13793103448275862,
20
+ "grad_norm": 0.08457788825035095,
21
+ "learning_rate": 0.0003,
22
+ "loss": 1.489,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.20689655172413793,
27
+ "grad_norm": 0.11339309066534042,
28
+ "learning_rate": 0.0003,
29
+ "loss": 1.9602,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.27586206896551724,
34
+ "grad_norm": 0.14981189370155334,
35
+ "learning_rate": 0.0003,
36
+ "loss": 1.0412,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.3448275862068966,
41
+ "grad_norm": 0.1549920290708542,
42
+ "learning_rate": 0.0003,
43
+ "loss": 1.6355,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.41379310344827586,
48
+ "grad_norm": 0.16407117247581482,
49
+ "learning_rate": 0.0003,
50
+ "loss": 2.054,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.4827586206896552,
55
+ "grad_norm": 0.1566217690706253,
56
+ "learning_rate": 0.0003,
57
+ "loss": 1.7061,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.5517241379310345,
62
+ "grad_norm": 0.10542894154787064,
63
+ "learning_rate": 0.0003,
64
+ "loss": 1.109,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.6206896551724138,
69
+ "grad_norm": 0.20924022793769836,
70
+ "learning_rate": 0.0003,
71
+ "loss": 1.6204,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.6896551724137931,
76
+ "grad_norm": 0.15142425894737244,
77
+ "learning_rate": 0.0003,
78
+ "loss": 1.8941,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.7586206896551724,
83
+ "grad_norm": 0.20191574096679688,
84
+ "learning_rate": 0.0003,
85
+ "loss": 2.1546,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.8275862068965517,
90
+ "grad_norm": 0.21983274817466736,
91
+ "learning_rate": 0.0003,
92
+ "loss": 1.3526,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.896551724137931,
97
+ "grad_norm": 0.07465270161628723,
98
+ "learning_rate": 0.0003,
99
+ "loss": 1.743,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.9655172413793104,
104
+ "grad_norm": 0.1358894258737564,
105
+ "learning_rate": 0.0003,
106
+ "loss": 1.6953,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.0,
111
+ "step": 29,
112
+ "total_flos": 663155598426112.0,
113
+ "train_loss": 1.6605948497509133,
114
+ "train_runtime": 157.2331,
115
+ "train_samples_per_second": 0.738,
116
+ "train_steps_per_second": 0.184
117
+ }
118
+ ],
119
+ "logging_steps": 2,
120
+ "max_steps": 29,
121
+ "num_input_tokens_seen": 0,
122
+ "num_train_epochs": 1,
123
+ "save_steps": 500,
124
+ "stateful_callbacks": {
125
+ "TrainerControl": {
126
+ "args": {
127
+ "should_epoch_stop": false,
128
+ "should_evaluate": false,
129
+ "should_log": false,
130
+ "should_save": false,
131
+ "should_training_stop": false
132
+ },
133
+ "attributes": {}
134
+ }
135
+ },
136
+ "total_flos": 663155598426112.0,
137
+ "train_batch_size": 1,
138
+ "trial_name": null,
139
+ "trial_params": null
140
+ }
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99d8e37420a572ff1550c88254d7d014cba4443f24a713b66a7fafdf71be01ee
3
+ size 100664470
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b58f08397ab0d541489514a5c83564d2de154eda870f66f490b21ddcb255608
3
+ size 100664470
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebca7ec994f73e73f89e515ab3cec838ef6c6bbe26ac71ec451411e53eac6038
3
+ size 100664470
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc722336f0b0169bed983c53196967beac1302719207064a5363426ee496d9eb
3
+ size 100664470
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e079c3f8a07b9d74e26e05ed0f17c99b2c16f64338f120fe6409dddfea70152a
3
+ size 100663226
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4ca7b3ef1b0b44b2e1d8e4376bd46717ff5a2721ab91cf0de703f3b48c381c2
3
+ size 100664470
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c232444e6aed9362a4ec1f9435f695cf6ba336c1a53d21ec78deb0ac6c3ee608
3
+ size 100663226
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9936ee087a87971fe9a697c0b4295d6a7eabbee3f5ff7bb6cfa486c8a082f21a
3
+ size 100663226
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_trainer_state.json ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 29,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.06896551724137931,
13
+ "grad_norm": 0.05931968614459038,
14
+ "learning_rate": 0.0003,
15
+ "loss": 0.7324,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.13793103448275862,
20
+ "grad_norm": 0.04922392964363098,
21
+ "learning_rate": 0.0003,
22
+ "loss": 0.8792,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.20689655172413793,
27
+ "grad_norm": 0.11524348706007004,
28
+ "learning_rate": 0.0003,
29
+ "loss": 0.5845,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.27586206896551724,
34
+ "grad_norm": 0.06806690990924835,
35
+ "learning_rate": 0.0003,
36
+ "loss": 0.7545,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.3448275862068966,
41
+ "grad_norm": 0.09595661610364914,
42
+ "learning_rate": 0.0003,
43
+ "loss": 0.755,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.41379310344827586,
48
+ "grad_norm": 0.05159907415509224,
49
+ "learning_rate": 0.0003,
50
+ "loss": 0.5922,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.4827586206896552,
55
+ "grad_norm": 0.07526896893978119,
56
+ "learning_rate": 0.0003,
57
+ "loss": 0.9019,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.5517241379310345,
62
+ "grad_norm": 0.07771413773298264,
63
+ "learning_rate": 0.0003,
64
+ "loss": 0.7862,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.6206896551724138,
69
+ "grad_norm": 0.04285521060228348,
70
+ "learning_rate": 0.0003,
71
+ "loss": 0.8229,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.6896551724137931,
76
+ "grad_norm": 0.15346619486808777,
77
+ "learning_rate": 0.0003,
78
+ "loss": 0.8843,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.7586206896551724,
83
+ "grad_norm": 0.055303364992141724,
84
+ "learning_rate": 0.0003,
85
+ "loss": 0.7054,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.8275862068965517,
90
+ "grad_norm": 0.06509271264076233,
91
+ "learning_rate": 0.0003,
92
+ "loss": 0.6226,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.896551724137931,
97
+ "grad_norm": 0.06718684732913971,
98
+ "learning_rate": 0.0003,
99
+ "loss": 0.6203,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.9655172413793104,
104
+ "grad_norm": 0.10245434939861298,
105
+ "learning_rate": 0.0003,
106
+ "loss": 0.5754,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.0,
111
+ "step": 29,
112
+ "total_flos": 1591652615454720.0,
113
+ "train_loss": 0.7325446379595789,
114
+ "train_runtime": 148.7314,
115
+ "train_samples_per_second": 0.78,
116
+ "train_steps_per_second": 0.195
117
+ }
118
+ ],
119
+ "logging_steps": 2,
120
+ "max_steps": 29,
121
+ "num_input_tokens_seen": 0,
122
+ "num_train_epochs": 1,
123
+ "save_steps": 500,
124
+ "stateful_callbacks": {
125
+ "TrainerControl": {
126
+ "args": {
127
+ "should_epoch_stop": false,
128
+ "should_evaluate": false,
129
+ "should_log": false,
130
+ "should_save": false,
131
+ "should_training_stop": false
132
+ },
133
+ "attributes": {}
134
+ }
135
+ },
136
+ "total_flos": 1591652615454720.0,
137
+ "train_batch_size": 1,
138
+ "trial_name": null,
139
+ "trial_params": null
140
+ }
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c74bf68629dfbf8c60f49ae0110c8463cd77206845a7c914476538bcfe9349bd
3
+ size 100664470
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1439a1ba2c9766648ac02a241c189908c96d3247c69ef704723c2480cf5678b0
3
+ size 100664470
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f5bf1290f2b12990e47169e8b9f964b1020c67eb1339016648587e7c9b32b1c
3
+ size 100664470
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db6902cb7706829f9ec77cfa3264bb7f80381d38d1471f67fdbda3e99ecc7e14
3
+ size 100664470
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f59ef9e88e3229d2ceec37679a496e2ca3215eba76a0e32522ebb4e5d9228843
3
+ size 100663226
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62f4ac6d530706608b4ebdaddab566459f7a4ac12f5c5605e5d3e6ceed45bc2d
3
+ size 100664470
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e3c7a8ab6cb955ef1570c99b48aef65202684bb438bcee9f0d46f28697cbec1
3
+ size 100663226
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6efec2037d3806dc4b4fa30d91466b9e9b1aabfe85b5f8711e9faed8d171a5d5
3
+ size 100663226
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_trainer_state.json ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 29,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.06896551724137931,
13
+ "grad_norm": 0.05072196200489998,
14
+ "learning_rate": 0.0003,
15
+ "loss": 0.809,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.13793103448275862,
20
+ "grad_norm": 0.14052243530750275,
21
+ "learning_rate": 0.0003,
22
+ "loss": 0.9376,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.20689655172413793,
27
+ "grad_norm": 0.07366146892309189,
28
+ "learning_rate": 0.0003,
29
+ "loss": 0.9637,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.27586206896551724,
34
+ "grad_norm": 0.06059933826327324,
35
+ "learning_rate": 0.0003,
36
+ "loss": 0.9109,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.3448275862068966,
41
+ "grad_norm": 0.11904361099004745,
42
+ "learning_rate": 0.0003,
43
+ "loss": 0.974,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.41379310344827586,
48
+ "grad_norm": 0.0738854929804802,
49
+ "learning_rate": 0.0003,
50
+ "loss": 0.6471,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.4827586206896552,
55
+ "grad_norm": 0.10854072868824005,
56
+ "learning_rate": 0.0003,
57
+ "loss": 0.8115,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.5517241379310345,
62
+ "grad_norm": 0.08300568163394928,
63
+ "learning_rate": 0.0003,
64
+ "loss": 0.7872,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.6206896551724138,
69
+ "grad_norm": 0.12101076543331146,
70
+ "learning_rate": 0.0003,
71
+ "loss": 0.5953,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.6896551724137931,
76
+ "grad_norm": 0.073908731341362,
77
+ "learning_rate": 0.0003,
78
+ "loss": 0.823,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.7586206896551724,
83
+ "grad_norm": 0.08307291567325592,
84
+ "learning_rate": 0.0003,
85
+ "loss": 0.6549,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.8275862068965517,
90
+ "grad_norm": 0.158330500125885,
91
+ "learning_rate": 0.0003,
92
+ "loss": 0.9645,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.896551724137931,
97
+ "grad_norm": 0.07515610754489899,
98
+ "learning_rate": 0.0003,
99
+ "loss": 1.024,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.9655172413793104,
104
+ "grad_norm": 0.09373172372579575,
105
+ "learning_rate": 0.0003,
106
+ "loss": 1.0713,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.0,
111
+ "step": 29,
112
+ "total_flos": 1501610387701760.0,
113
+ "train_loss": 0.8557169848474963,
114
+ "train_runtime": 150.3635,
115
+ "train_samples_per_second": 0.771,
116
+ "train_steps_per_second": 0.193
117
+ }
118
+ ],
119
+ "logging_steps": 2,
120
+ "max_steps": 29,
121
+ "num_input_tokens_seen": 0,
122
+ "num_train_epochs": 1,
123
+ "save_steps": 500,
124
+ "stateful_callbacks": {
125
+ "TrainerControl": {
126
+ "args": {
127
+ "should_epoch_stop": false,
128
+ "should_evaluate": false,
129
+ "should_log": false,
130
+ "should_save": false,
131
+ "should_training_stop": false
132
+ },
133
+ "attributes": {}
134
+ }
135
+ },
136
+ "total_flos": 1501610387701760.0,
137
+ "train_batch_size": 1,
138
+ "trial_name": null,
139
+ "trial_params": null
140
+ }
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/round10_task_vector_local_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8374e17a0faf03d8951e285488b14eef4498017c3e2630509a35cb0e5a67b558
3
+ size 167774710
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/round11_task_vector_local_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f922ab3667d9b5e9c3b1d1ee5e997d17edd373b51db23bee5625d07878158bdb
3
+ size 167774710
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/round12_task_vector_local_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:703338326e88b4e101975ba8d39f9caef66e863e1ee39a6fb1fd3a84dda35b59
3
+ size 167774710
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/round13_task_vector_local_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f70eb91a3e5015c3211e8ceae2f9ba2fded0c59702d2173cdde85a59eab4264f
3
+ size 167774710
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/round14_task_vector_local_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4a5c308380010af0b81c1ab4cdacccd5df94bc7c0a8a8449231f8d202c1a778
3
+ size 167774710