thkim0305 commited on
Commit
86ce943
·
verified ·
1 Parent(s): 51fc927

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round10.pth +3 -0
  2. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round12.pth +3 -0
  3. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round15.pth +3 -0
  4. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round17.pth +3 -0
  5. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round2.pth +3 -0
  6. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round20.pth +3 -0
  7. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round5.pth +3 -0
  8. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round7.pth +3 -0
  9. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_trainer_state.json +392 -0
  10. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round10.pth +3 -0
  11. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round12.pth +3 -0
  12. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round15.pth +3 -0
  13. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round17.pth +3 -0
  14. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round2.pth +3 -0
  15. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round20.pth +3 -0
  16. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round5.pth +3 -0
  17. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round7.pth +3 -0
  18. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_trainer_state.json +392 -0
  19. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round10.pth +3 -0
  20. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round12.pth +3 -0
  21. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round15.pth +3 -0
  22. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round17.pth +3 -0
  23. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round2.pth +3 -0
  24. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round20.pth +3 -0
  25. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round5.pth +3 -0
  26. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round7.pth +3 -0
  27. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_trainer_state.json +392 -0
  28. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round10.pth +3 -0
  29. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round12.pth +3 -0
  30. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round15.pth +3 -0
  31. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round17.pth +3 -0
  32. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round2.pth +3 -0
  33. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round20.pth +3 -0
  34. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round5.pth +3 -0
  35. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round7.pth +3 -0
  36. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_trainer_state.json +392 -0
  37. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round10.pth +3 -0
  38. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round12.pth +3 -0
  39. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round15.pth +3 -0
  40. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round17.pth +3 -0
  41. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round2.pth +3 -0
  42. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round20.pth +3 -0
  43. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round5.pth +3 -0
  44. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round7.pth +3 -0
  45. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_trainer_state.json +392 -0
  46. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round10.pth +3 -0
  47. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round12.pth +3 -0
  48. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round15.pth +3 -0
  49. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round17.pth +3 -0
  50. client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round2.pth +3 -0
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1cca839a6000b85230580c3cb523ef92e5c03763db124a522f8c7bb88607aee
3
+ size 180440142
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf9d18350e055b39ca5dad52b58bbf3e7c2e287f14d70e917e7236648c476bec
3
+ size 180440142
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae408b7c41f30c80ee1fdb5b0728cf48ec831c3a1ffb082daa7de7338f7d2ed8
3
+ size 180440142
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2b76e87a2e9700efdac5da11b74c0c5349915ee378939e6d874b3fc61e7f228
3
+ size 180440142
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62c6df8106ca65abe1a8e70c8407eae8785cb85b71893fb30c5d069e67caf712
3
+ size 180439850
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:176ad4bfd417145e4051f4fe7c203ce4e8f31c86cda2fcc22bace0468d88ca88
3
+ size 180440142
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a3d0619b2b3932156534630bc25b07904f9a6a8df4429d822954a3ef0c473a7
3
+ size 180439850
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86ced7aa854b5dd6d72d4d0e3390d1a2c64bb342a1b3b8efd632c6be6e8504b0
3
+ size 180439850
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 5.389871120452881,
14
+ "learning_rate": 2e-05,
15
+ "loss": 1.837,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 7.913750648498535,
21
+ "learning_rate": 2e-05,
22
+ "loss": 1.5072,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 2.8081469535827637,
28
+ "learning_rate": 2e-05,
29
+ "loss": 1.9628,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 2.1584787368774414,
35
+ "learning_rate": 2e-05,
36
+ "loss": 1.2402,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 1.2553013563156128,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.4137,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 0.07167795300483704,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.5048,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 1.2355425357818604,
56
+ "learning_rate": 2e-05,
57
+ "loss": 1.1466,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 2.243406295776367,
63
+ "learning_rate": 2e-05,
64
+ "loss": 1.3616,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 2.1192171573638916,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.5938,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 1.964920163154602,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.6115,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 1.7399609088897705,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.3877,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 3.4627339839935303,
91
+ "learning_rate": 2e-05,
92
+ "loss": 3.434,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 3.5779869556427,
98
+ "learning_rate": 2e-05,
99
+ "loss": 1.3658,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 2.113224506378174,
105
+ "learning_rate": 2e-05,
106
+ "loss": 1.119,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 1.6840742826461792,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.4981,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 2.0056612491607666,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.9441,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 1.881331443786621,
126
+ "learning_rate": 2e-05,
127
+ "loss": 1.1007,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 1.5827242136001587,
133
+ "learning_rate": 2e-05,
134
+ "loss": 1.3904,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 2.0497872829437256,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.8498,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 1.5571120977401733,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.7427,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 1.50515615940094,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.5954,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 5.690471649169922,
161
+ "learning_rate": 2e-05,
162
+ "loss": 1.8239,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 2.0426018238067627,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.8085,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 4.374183654785156,
175
+ "learning_rate": 2e-05,
176
+ "loss": 1.0815,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 1.9751304388046265,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.4038,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 2.5408687591552734,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.8188,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 3.9847683906555176,
196
+ "learning_rate": 2e-05,
197
+ "loss": 1.5515,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 1.9399210214614868,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.516,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 1.8243143558502197,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.4914,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 2.0096545219421387,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.5824,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 1.177213191986084,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.8273,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 2.069615125656128,
231
+ "learning_rate": 2e-05,
232
+ "loss": 1.262,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 2.477707862854004,
238
+ "learning_rate": 2e-05,
239
+ "loss": 3.1642,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 4.157680034637451,
245
+ "learning_rate": 2e-05,
246
+ "loss": 1.7826,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 4.135859489440918,
252
+ "learning_rate": 2e-05,
253
+ "loss": 1.3506,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 1.74435293674469,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.9626,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 0.8117738962173462,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.748,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 2.52223801612854,
273
+ "learning_rate": 2e-05,
274
+ "loss": 1.0564,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 3.414597988128662,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.9622,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 1.8665771484375,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.3819,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 1.227982997894287,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.6077,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 2.479780912399292,
301
+ "learning_rate": 2e-05,
302
+ "loss": 1.1214,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 5.932579040527344,
308
+ "learning_rate": 2e-05,
309
+ "loss": 1.2493,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 1.6842375993728638,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.3546,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 1.6105072498321533,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.8795,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 2.8480112552642822,
329
+ "learning_rate": 2e-05,
330
+ "loss": 1.2564,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 1.4740058183670044,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.6387,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 1.9073842763900757,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.783,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 1.1106702089309692,
350
+ "learning_rate": 2e-05,
351
+ "loss": 1.1876,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 5.465167045593262,
357
+ "learning_rate": 2e-05,
358
+ "loss": 0.9835,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 5535011444883456.0,
365
+ "train_loss": 1.0648856735229493,
366
+ "train_runtime": 136.9082,
367
+ "train_samples_per_second": 2.922,
368
+ "train_steps_per_second": 0.73
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 5535011444883456.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:426a6fa480cdee236f57ad8bd1e64f82ae6c8c08c336b37b68885bc020cf05c2
3
+ size 389170582
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:126b9ea9185472b6148b1009458028b25ed07a4225a36abfa17c2617a5119942
3
+ size 389170582
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3da09528db24d62a6dbf1d13ea3571fec5c829ccf44e39bed0bfabcdfdb9d8f9
3
+ size 389170582
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3ec9b28b108540f69369acb8d9b021d5b8f731e686adee24e49aeb093b302b8
3
+ size 389170582
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:561ae01465e3ce055e411d57a2566ab2a931b524238634f089ba5fec3d6db363
3
+ size 389170122
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa6d2fc7920ad0af27cbfb4b5f3e1727671352e109fa655686b445ad69003e4c
3
+ size 389170582
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b59509bdbe736d456e7bbffe8c5fb030bb0cb522fbe269f3c9323dacc7fa89a
3
+ size 389170122
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:371f56537c6cc23dcda83f899a38b9bd947cb7c0755f14c1deac18a9b9b2f68d
3
+ size 389170122
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 1.2099864482879639,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.7102,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 2.4037203788757324,
21
+ "learning_rate": 2e-05,
22
+ "loss": 1.1519,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 0.8102778196334839,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.9684,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 1.821050763130188,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.5301,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 1.580217957496643,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.9518,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 1.8814568519592285,
49
+ "learning_rate": 2e-05,
50
+ "loss": 1.8458,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 3.3938233852386475,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.9475,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 1.641783356666565,
63
+ "learning_rate": 2e-05,
64
+ "loss": 1.1361,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 1.12887442111969,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.7893,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 3.1026272773742676,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.9656,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 3.3817358016967773,
84
+ "learning_rate": 2e-05,
85
+ "loss": 1.5276,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 2.19884991645813,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.8887,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 2.272486925125122,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.9409,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 2.282609701156616,
105
+ "learning_rate": 2e-05,
106
+ "loss": 1.4966,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 1.1684859991073608,
112
+ "learning_rate": 2e-05,
113
+ "loss": 1.2735,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 0.7757588028907776,
119
+ "learning_rate": 2e-05,
120
+ "loss": 2.7099,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 1.7310632467269897,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.8871,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 2.0317158699035645,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.9895,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 3.5124049186706543,
140
+ "learning_rate": 2e-05,
141
+ "loss": 1.317,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 0.9667242169380188,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.6481,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 2.8257367610931396,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.8701,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 2.1212313175201416,
161
+ "learning_rate": 2e-05,
162
+ "loss": 1.1708,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 2.276118040084839,
168
+ "learning_rate": 2e-05,
169
+ "loss": 1.0044,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 2.457052707672119,
175
+ "learning_rate": 2e-05,
176
+ "loss": 1.0235,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 1.5500776767730713,
182
+ "learning_rate": 2e-05,
183
+ "loss": 1.582,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 3.4070324897766113,
189
+ "learning_rate": 2e-05,
190
+ "loss": 1.9971,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 2.191951274871826,
196
+ "learning_rate": 2e-05,
197
+ "loss": 1.2354,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 1.1881375312805176,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.9124,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 2.418442964553833,
210
+ "learning_rate": 2e-05,
211
+ "loss": 1.196,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 1.8029783964157104,
217
+ "learning_rate": 2e-05,
218
+ "loss": 1.01,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 1.943813443183899,
224
+ "learning_rate": 2e-05,
225
+ "loss": 1.3572,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 1.0548287630081177,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.9924,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 5.823477745056152,
238
+ "learning_rate": 2e-05,
239
+ "loss": 1.3245,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 2.152376413345337,
245
+ "learning_rate": 2e-05,
246
+ "loss": 1.3052,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 2.3831217288970947,
252
+ "learning_rate": 2e-05,
253
+ "loss": 1.1949,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 3.091294050216675,
259
+ "learning_rate": 2e-05,
260
+ "loss": 1.1667,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 1.1181700229644775,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.9937,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 1.2995938062667847,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.7476,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 1.4761587381362915,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.6168,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 1.78084135055542,
287
+ "learning_rate": 2e-05,
288
+ "loss": 1.0055,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 1.6266769170761108,
294
+ "learning_rate": 2e-05,
295
+ "loss": 1.4055,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 5.890852451324463,
301
+ "learning_rate": 2e-05,
302
+ "loss": 1.3506,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 3.300797939300537,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.9899,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 1.9619410037994385,
315
+ "learning_rate": 2e-05,
316
+ "loss": 1.158,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 1.9490997791290283,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.9689,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 1.6852408647537231,
329
+ "learning_rate": 2e-05,
330
+ "loss": 1.0103,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 5.842019557952881,
336
+ "learning_rate": 2e-05,
337
+ "loss": 1.546,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 1.3952200412750244,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.8511,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 1.604957103729248,
350
+ "learning_rate": 2e-05,
351
+ "loss": 0.7199,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 1.6361178159713745,
357
+ "learning_rate": 2e-05,
358
+ "loss": 1.0714,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 1.3244159611633664e+16,
365
+ "train_loss": 1.1290626525878906,
366
+ "train_runtime": 225.692,
367
+ "train_samples_per_second": 1.772,
368
+ "train_steps_per_second": 0.443
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 1.3244159611633664e+16,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c0807db4338ab7a7d474fa8f7262b4fd4fe1cea1d99a5f168bf18ecec9ef12c
3
+ size 389170582
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15f4a9a494715a8be98248261e6efb61e0724a29fa8dbd251bb1618ac5e66098
3
+ size 389170582
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:622c64fd65d2677a6ff472f9c1179ea6ee26f41e360fc66f96fe527557ad8b45
3
+ size 389170582
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b10223f228633d93c35e78140b4e8e5379be4d4a5fe2c77de1bff27ce78e45aa
3
+ size 389170582
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa6e93b7a9598502f5c770e2e2311166fe22644f01d92a98ed0820e2e9631995
3
+ size 389170122
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dad6c74c66eca0f729db8dd76453595a8ce5b3a051f531a2d02bae39d9644f8
3
+ size 389170582
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:270cec05ffc3f2513dda5349f491335f377213b98445b828103e6e8eacf2e5d3
3
+ size 389170122
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0721b69261ec658126151d66332e80e1dd8b0698ab1ea907ce713f11e8611af
3
+ size 389170122
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 1.0289795398712158,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.5335,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 0.6102533936500549,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.4069,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 1.9161092042922974,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.8354,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 2.4946720600128174,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.7206,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 5.380189418792725,
42
+ "learning_rate": 2e-05,
43
+ "loss": 1.1956,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 1.4468796253204346,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.4622,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 3.1074416637420654,
56
+ "learning_rate": 2e-05,
57
+ "loss": 1.6658,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 2.196979284286499,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.9451,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 2.555473566055298,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.4677,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 0.8904308676719666,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.6847,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 2.3065459728240967,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.3474,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 5.23778772354126,
91
+ "learning_rate": 2e-05,
92
+ "loss": 1.6669,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 2.0633652210235596,
98
+ "learning_rate": 2e-05,
99
+ "loss": 1.0448,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 3.159832715988159,
105
+ "learning_rate": 2e-05,
106
+ "loss": 1.2574,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 5.983434200286865,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.8387,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 2.2606232166290283,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.433,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 1.855712652206421,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.7482,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 6.815300941467285,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.6416,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 2.802886724472046,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.7273,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 0.7808118462562561,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.1985,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 2.313674211502075,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.6364,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 7.197145462036133,
161
+ "learning_rate": 2e-05,
162
+ "loss": 1.1568,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 1.7081199884414673,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.937,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 0.9655914306640625,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.5069,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 4.5406389236450195,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.6002,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 10.869454383850098,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.7676,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 10.128218650817871,
196
+ "learning_rate": 2e-05,
197
+ "loss": 2.3426,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 5.551089763641357,
203
+ "learning_rate": 2e-05,
204
+ "loss": 2.5984,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 1.7278666496276855,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.4457,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 2.7146732807159424,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.6715,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 5.558541297912598,
224
+ "learning_rate": 2e-05,
225
+ "loss": 1.3482,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 3.7671058177948,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.8286,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 4.145376205444336,
238
+ "learning_rate": 2e-05,
239
+ "loss": 1.6906,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 2.8701162338256836,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.7684,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 6.557702541351318,
252
+ "learning_rate": 2e-05,
253
+ "loss": 1.433,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 1.7119065523147583,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.5752,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 1.7837646007537842,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.6808,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 8.07896900177002,
273
+ "learning_rate": 2e-05,
274
+ "loss": 1.8413,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 5.091773509979248,
280
+ "learning_rate": 2e-05,
281
+ "loss": 1.1644,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 2.6711478233337402,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.8726,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 2.857771396636963,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.8909,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 3.943341016769409,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.5204,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 1.16873300075531,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.9623,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 4.725329399108887,
315
+ "learning_rate": 2e-05,
316
+ "loss": 1.1609,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 5.48153018951416,
322
+ "learning_rate": 2e-05,
323
+ "loss": 1.1443,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 1.6034471988677979,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.762,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 2.3010993003845215,
336
+ "learning_rate": 2e-05,
337
+ "loss": 1.1309,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 2.12829327583313,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.6846,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 0.9714277982711792,
350
+ "learning_rate": 2e-05,
351
+ "loss": 0.5588,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 1.1446884870529175,
357
+ "learning_rate": 2e-05,
358
+ "loss": 0.8131,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 1.5581131495702528e+16,
365
+ "train_loss": 0.926310920715332,
366
+ "train_runtime": 248.2955,
367
+ "train_samples_per_second": 1.611,
368
+ "train_steps_per_second": 0.403
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 1.5581131495702528e+16,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e36f60212361bbfccbcad493cdc07f58f903cc5b26ec97b01cc2c7fe9d10af7b
3
+ size 389170582
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cc0950ab05e4bd2ff91260af512e1d3bcb386c17aaf1967c29cbdecadf496d1
3
+ size 389170582
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61075455fa883e80d3f5e40eb23b6ec9dbd7b323024842c8818e6d6bddca99ae
3
+ size 389170582
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c218d2b1ddd8fbda4ebbc02cdfc52e08c2cf0d833b40cb79a554c84f8b2cf8bf
3
+ size 389170582
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc226c37298b921ad72a709271c310cb1853200d5329083c1b0c9edba8173603
3
+ size 389170122
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1285b641c8c20c1a627caaa4e080366067fcabb4ede00871a43c9048756082ee
3
+ size 389170582
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd4cf7b9470bc33a1b208726dd61d3fb16469f17d2275782c2314eb069aee0f9
3
+ size 389170122
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a277d042f2f6279ba75231cfe7a4f10a02e921a1271b1670a3b114699366b7da
3
+ size 389170122
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 1.3390214443206787,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.8941,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 0.9163679480552673,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.9235,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 0.5888918042182922,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.6486,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 0.30081573128700256,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.2813,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 1.4009698629379272,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.4144,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 2.2374634742736816,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.627,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 2.923579692840576,
56
+ "learning_rate": 2e-05,
57
+ "loss": 1.1054,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 1.857440710067749,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.9148,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 0.8682881593704224,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.3867,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 2.028822422027588,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.9991,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 1.2788021564483643,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.5667,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 2.953608989715576,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.9328,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 1.444178581237793,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.8467,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 0.8711784482002258,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.343,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 0.8109800815582275,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.3157,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 4.3886260986328125,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.7694,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 1.8276898860931396,
126
+ "learning_rate": 2e-05,
127
+ "loss": 1.0568,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 1.259130597114563,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.9007,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 2.4246671199798584,
140
+ "learning_rate": 2e-05,
141
+ "loss": 1.2372,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 2.6753697395324707,
147
+ "learning_rate": 2e-05,
148
+ "loss": 1.0153,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 0.6607201099395752,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.2803,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 1.2931270599365234,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.7531,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 1.034201741218567,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.5761,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 1.261017084121704,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.9825,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 1.7095365524291992,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.6045,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 2.2309482097625732,
189
+ "learning_rate": 2e-05,
190
+ "loss": 1.0364,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 3.170400381088257,
196
+ "learning_rate": 2e-05,
197
+ "loss": 2.1252,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 1.2634927034378052,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.1779,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 2.0210254192352295,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.7033,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 0.7227587103843689,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.3596,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 0.5512145757675171,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.2827,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 0.8612607717514038,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.3879,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 0.9014844298362732,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.8704,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 0.5804516673088074,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.7523,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 0.9667078852653503,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.5082,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 2.913699150085449,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.8653,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 2.9874722957611084,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.5091,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 2.7036550045013428,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.6436,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 1.1027770042419434,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.7771,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 0.8439675569534302,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.2235,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 8.07174301147461,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.4817,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 1.4386396408081055,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.725,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 3.713127851486206,
308
+ "learning_rate": 2e-05,
309
+ "loss": 1.1037,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 0.2775169014930725,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.2505,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 4.276524543762207,
322
+ "learning_rate": 2e-05,
323
+ "loss": 1.7761,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 6.455582618713379,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.8323,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 1.1010710000991821,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.3133,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 1.4408540725708008,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.2172,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 0.2367064505815506,
350
+ "learning_rate": 2e-05,
351
+ "loss": 0.996,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 1.8455318212509155,
357
+ "learning_rate": 2e-05,
358
+ "loss": 0.4503,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 2.002019803149107e+16,
365
+ "train_loss": 0.714879515171051,
366
+ "train_runtime": 248.3761,
367
+ "train_samples_per_second": 1.61,
368
+ "train_steps_per_second": 0.403
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 2.002019803149107e+16,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8083c948c51172cb731618e51b90a12546b054bf8762835f1d8f32c30904a0d5
3
+ size 389170582
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a0d9ba06d1bf534d64476ca8183bf94a7cdc5e70ec3d07c42837d157c9a4a0b
3
+ size 389170582
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e305ef4e1ba0d2ce1eabaa6f27d363d493e0800b1743aa78433ec86ed049f78
3
+ size 389170582
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6f79c8369cf454e0f47a9171dc1cabedfb911e727f8a2cc2373df780aae495e
3
+ size 389170582
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b279129dda29d997289d3ed732f3f71df77a02625d140feb5553a9b77a65f530
3
+ size 389170122
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1115eb7ff110cda959261def5f6bb34bd3f6074286ec6891c811e4b5fb3fab9
3
+ size 389170582
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:886257f645888c835465772d39b989f229623bd4da1529f6b28ef14ae2ffd167
3
+ size 389170122
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da72acd2878f27d6054be52695e145be79f4f1ed7b1fc8ce778cc5ef3ea2220c
3
+ size 389170122
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 2.3232831954956055,
14
+ "learning_rate": 2e-05,
15
+ "loss": 1.0249,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 2.5343315601348877,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.8533,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 0.305483341217041,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.3197,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 0.517935037612915,
35
+ "learning_rate": 2e-05,
36
+ "loss": 1.1904,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 5.153202533721924,
42
+ "learning_rate": 2e-05,
43
+ "loss": 2.3176,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 3.158588171005249,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.7701,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 0.5296018719673157,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.5261,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 2.1179733276367188,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.5498,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 1.0289561748504639,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.4771,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 2.6146349906921387,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.9022,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 3.7544004917144775,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.9887,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 5.7699713706970215,
91
+ "learning_rate": 2e-05,
92
+ "loss": 1.5863,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 6.096447944641113,
98
+ "learning_rate": 2e-05,
99
+ "loss": 1.7896,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 2.786501169204712,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.9298,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 3.055405616760254,
112
+ "learning_rate": 2e-05,
113
+ "loss": 1.8627,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 1.4495849609375,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.5812,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 0.9012460708618164,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.2027,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 0.7602607011795044,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.5884,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 1.8072483539581299,
140
+ "learning_rate": 2e-05,
141
+ "loss": 1.1502,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 2.375112295150757,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.7885,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 4.289775371551514,
154
+ "learning_rate": 2e-05,
155
+ "loss": 2.5796,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 2.5001745223999023,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.6879,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 1.169258952140808,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.8135,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 2.9684996604919434,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.9738,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 2.281973361968994,
182
+ "learning_rate": 2e-05,
183
+ "loss": 1.1334,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 1.8545925617218018,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.7442,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 6.913942337036133,
196
+ "learning_rate": 2e-05,
197
+ "loss": 1.2487,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 3.960796356201172,
203
+ "learning_rate": 2e-05,
204
+ "loss": 2.6527,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 2.8161797523498535,
210
+ "learning_rate": 2e-05,
211
+ "loss": 1.2346,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 2.4226303100585938,
217
+ "learning_rate": 2e-05,
218
+ "loss": 1.4142,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 1.7144083976745605,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.6787,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 1.2450892925262451,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.6321,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 1.3646719455718994,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.6567,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 1.297370195388794,
245
+ "learning_rate": 2e-05,
246
+ "loss": 1.6927,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 1.9890066385269165,
252
+ "learning_rate": 2e-05,
253
+ "loss": 1.2573,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 4.081875801086426,
259
+ "learning_rate": 2e-05,
260
+ "loss": 1.2384,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 1.1986734867095947,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.3413,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 0.6766782402992249,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.3358,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 2.628237724304199,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.882,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 1.969377040863037,
287
+ "learning_rate": 2e-05,
288
+ "loss": 1.4629,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 4.985935688018799,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.8891,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 3.395092010498047,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.2739,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 0.5362117290496826,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.0327,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 1.663097858428955,
315
+ "learning_rate": 2e-05,
316
+ "loss": 1.3678,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 8.288853645324707,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.8778,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 1.423278570175171,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.9577,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 2.7824862003326416,
336
+ "learning_rate": 2e-05,
337
+ "loss": 1.774,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 0.8134786486625671,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.3738,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 1.9615880250930786,
350
+ "learning_rate": 2e-05,
351
+ "loss": 0.6358,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 2.486088752746582,
357
+ "learning_rate": 2e-05,
358
+ "loss": 0.4703,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 2.0033979121926144e+16,
365
+ "train_loss": 0.9942560279369355,
366
+ "train_runtime": 250.7882,
367
+ "train_samples_per_second": 1.595,
368
+ "train_steps_per_second": 0.399
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 2.0033979121926144e+16,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6337f27b607152eee0d1b4ea2358cf08126147b4914b73bd5a8c8903c1ab0ced
3
+ size 389170582
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc2c476c9efb2d640b2d4693eb7ca33ed8a81427598a6ce8616fa53381917a31
3
+ size 389170582
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:932993b3ff1dc5e604696e5e767669c7c0b93343ea098462f7627f54cd9eba93
3
+ size 389170582
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c75322d52cd772b2db81ae2fa8c3c26dadfd35e55af48985ff630b2144b0af6
3
+ size 389170582
client_states_fedavg_hetero_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56d327dca77c8944915eddac9b6933354e5d84bce5c15992ac927cdd1e864ad3
3
+ size 389170122