thkim0305 commited on
Commit
2ae2113
·
verified ·
1 Parent(s): a851732

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round10.pth +3 -0
  2. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round12.pth +3 -0
  3. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round15.pth +3 -0
  4. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round17.pth +3 -0
  5. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round2.pth +3 -0
  6. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round20.pth +3 -0
  7. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round5.pth +3 -0
  8. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round7.pth +3 -0
  9. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_trainer_state.json +392 -0
  10. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round10.pth +3 -0
  11. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round12.pth +3 -0
  12. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round15.pth +3 -0
  13. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round17.pth +3 -0
  14. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round2.pth +3 -0
  15. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round20.pth +3 -0
  16. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round5.pth +3 -0
  17. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round7.pth +3 -0
  18. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_trainer_state.json +392 -0
  19. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round10.pth +3 -0
  20. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round12.pth +3 -0
  21. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round15.pth +3 -0
  22. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round17.pth +3 -0
  23. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round2.pth +3 -0
  24. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round20.pth +3 -0
  25. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round5.pth +3 -0
  26. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round7.pth +3 -0
  27. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_trainer_state.json +392 -0
  28. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round10.pth +3 -0
  29. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round12.pth +3 -0
  30. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round15.pth +3 -0
  31. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round17.pth +3 -0
  32. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round2.pth +3 -0
  33. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round20.pth +3 -0
  34. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round5.pth +3 -0
  35. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round7.pth +3 -0
  36. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_trainer_state.json +392 -0
  37. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round10.pth +3 -0
  38. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round12.pth +3 -0
  39. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round15.pth +3 -0
  40. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round17.pth +3 -0
  41. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round2.pth +3 -0
  42. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round20.pth +3 -0
  43. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round5.pth +3 -0
  44. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round7.pth +3 -0
  45. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_trainer_state.json +392 -0
  46. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/5_client_model_round10.pth +3 -0
  47. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/5_client_model_round12.pth +3 -0
  48. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/5_client_model_round15.pth +3 -0
  49. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/5_client_model_round17.pth +3 -0
  50. client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/5_client_model_round2.pth +3 -0
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20a2cfa32a281784795db634cd58040e0099dfb3a6b461f3e2811690aed270d4
3
+ size 295585110
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:164b99db199f8c1afc330ed8c4d413297e461263d6c0d531ef9471b84e72e5fa
3
+ size 295585110
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3ac471bdb0a23646a4b729abc1bddddff87a0202bf5fc658f8eff46c613af93
3
+ size 295585110
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddce9ffe75535dd2ce3951b60de8b30f69eca1122d14aa68bce2d74012e20d92
3
+ size 295585110
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8940422daafc8acb84347ba4f766e1cb543eb670aa45b0420a9a5227c22ba7e6
3
+ size 295584650
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1884b1ce6eb419994414bec53db3f25c876be84c73dd4b6bac19b279a37ae199
3
+ size 295585110
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dba57bcea505e8a5c94f327f555ddca8d1ae8fa1037bae98cdcc235d5c2709b5
3
+ size 295584650
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45ea8ab72fae35f37e9ace071574cc20c4fbfc4c0f331de08fd89db0ec7fd1c0
3
+ size 295584650
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/0_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 1.4357949495315552,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.9752,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 7.484304904937744,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.3921,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 3.0805540084838867,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.766,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 0.26295432448387146,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.2986,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 1.8822102546691895,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.3405,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 0.02680317871272564,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.3358,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 1.1298555135726929,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.6812,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 2.079339027404785,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.6571,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 2.0236611366271973,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.4357,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 2.0701911449432373,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.5624,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 3.1642799377441406,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.4102,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 2.8954672813415527,
91
+ "learning_rate": 2e-05,
92
+ "loss": 1.296,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 5.15814208984375,
98
+ "learning_rate": 2e-05,
99
+ "loss": 1.2499,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 2.06376051902771,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.6663,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 0.37149399518966675,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.1357,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 1.2507413625717163,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.1487,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 1.1001901626586914,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.9658,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 3.733464002609253,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.9133,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 0.3771137595176697,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.2268,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 0.6037631034851074,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.3637,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 1.1135358810424805,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.5037,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 12.166988372802734,
161
+ "learning_rate": 2e-05,
162
+ "loss": 1.5196,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 1.188719391822815,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.7123,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 1.858033299446106,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.3241,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 1.1797194480895996,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.3886,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 1.2527966499328613,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.3365,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 5.527441024780273,
196
+ "learning_rate": 2e-05,
197
+ "loss": 1.4229,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 2.6085422039031982,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.4734,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 1.661068320274353,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.3244,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 2.1758453845977783,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.4436,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 1.1051682233810425,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.4639,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 2.56185245513916,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.7811,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 3.042705774307251,
238
+ "learning_rate": 2e-05,
239
+ "loss": 2.9756,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 7.160967826843262,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.8687,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 2.546377658843994,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.9039,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 2.3272593021392822,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.4634,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 1.9541059732437134,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.4152,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 7.0820465087890625,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.7858,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 3.81209659576416,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.7721,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 4.922970771789551,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.4672,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 1.5200783014297485,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.4998,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 8.176060676574707,
301
+ "learning_rate": 2e-05,
302
+ "loss": 1.6471,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 10.166783332824707,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.9579,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 0.09095903486013412,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.2736,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 0.8619403839111328,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.5136,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 1.9814728498458862,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.6869,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 1.469095230102539,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.4635,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 5.562348365783691,
343
+ "learning_rate": 2e-05,
344
+ "loss": 1.7081,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 1.774053931236267,
350
+ "learning_rate": 2e-05,
351
+ "loss": 1.1335,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 1.8918042182922363,
357
+ "learning_rate": 2e-05,
358
+ "loss": 0.3731,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 7197827847421952.0,
365
+ "train_loss": 0.7084918451309205,
366
+ "train_runtime": 296.0647,
367
+ "train_samples_per_second": 1.351,
368
+ "train_steps_per_second": 0.338
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 7197827847421952.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37a302188073e933752c00de25e2203101e658a30dec07fdab4bd426265d5f2e
3
+ size 140898814
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d64dd65a260b5d8ba3a2efe7eece417e0404a651245f61ff8b0c983786841f6
3
+ size 140898814
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e07a0c4b4a5ae5e4161e9cef8f4d38901e2a913350ba5522a1d29d4212e1ffb1
3
+ size 140898814
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c4254a77f1aa1f047303e2aeaf86752a06c02eb5e52ec452b7fcc782f52b52d
3
+ size 140898814
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:219c8d3bd85819db008faa389480875901a7069c993bae6623244671878646de
3
+ size 140898410
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7063fbc35cc60e6bed46affd0034dc1ed7b58aff7ad80d34b2a8a99308364ea6
3
+ size 140898814
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:585947acf8c75eb8dde19ac6a1ed4cdbcb9a0d357157898aa9255c927d7c62ea
3
+ size 140898410
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9b2c2bf6e18599c6416fab5f06330502ca9308ae9865311ebc522ed87568e55
3
+ size 140898410
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/1_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 2.9427311420440674,
14
+ "learning_rate": 2e-05,
15
+ "loss": 1.0288,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 6.741145610809326,
21
+ "learning_rate": 2e-05,
22
+ "loss": 1.4761,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 4.015233039855957,
28
+ "learning_rate": 2e-05,
29
+ "loss": 1.5796,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 3.0968964099884033,
35
+ "learning_rate": 2e-05,
36
+ "loss": 1.0918,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 2.706130027770996,
42
+ "learning_rate": 2e-05,
43
+ "loss": 1.0278,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 2.9777791500091553,
49
+ "learning_rate": 2e-05,
50
+ "loss": 1.8856,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 2.209172487258911,
56
+ "learning_rate": 2e-05,
57
+ "loss": 1.3796,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 2.455867052078247,
63
+ "learning_rate": 2e-05,
64
+ "loss": 1.2993,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 1.4126646518707275,
70
+ "learning_rate": 2e-05,
71
+ "loss": 1.2639,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 4.840634346008301,
77
+ "learning_rate": 2e-05,
78
+ "loss": 1.4814,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 2.0498416423797607,
84
+ "learning_rate": 2e-05,
85
+ "loss": 1.6443,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 3.3178482055664062,
91
+ "learning_rate": 2e-05,
92
+ "loss": 1.3003,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 3.242715358734131,
98
+ "learning_rate": 2e-05,
99
+ "loss": 1.4062,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 2.3701682090759277,
105
+ "learning_rate": 2e-05,
106
+ "loss": 1.5181,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 1.3877534866333008,
112
+ "learning_rate": 2e-05,
113
+ "loss": 1.5938,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 1.308324933052063,
119
+ "learning_rate": 2e-05,
120
+ "loss": 1.5723,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 2.3145534992218018,
126
+ "learning_rate": 2e-05,
127
+ "loss": 1.3368,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 4.586231231689453,
133
+ "learning_rate": 2e-05,
134
+ "loss": 1.3999,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 5.7817888259887695,
140
+ "learning_rate": 2e-05,
141
+ "loss": 1.4358,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 1.2060853242874146,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.9374,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 4.4397454261779785,
154
+ "learning_rate": 2e-05,
155
+ "loss": 1.1576,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 1.8089556694030762,
161
+ "learning_rate": 2e-05,
162
+ "loss": 1.2778,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 1.1753023862838745,
168
+ "learning_rate": 2e-05,
169
+ "loss": 1.3643,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 1.98372483253479,
175
+ "learning_rate": 2e-05,
176
+ "loss": 1.4053,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 1.529714584350586,
182
+ "learning_rate": 2e-05,
183
+ "loss": 1.4272,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 4.703952312469482,
189
+ "learning_rate": 2e-05,
190
+ "loss": 1.6426,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 3.705620527267456,
196
+ "learning_rate": 2e-05,
197
+ "loss": 1.5771,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 1.1030988693237305,
203
+ "learning_rate": 2e-05,
204
+ "loss": 1.2424,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 2.4728143215179443,
210
+ "learning_rate": 2e-05,
211
+ "loss": 1.2578,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 1.7469069957733154,
217
+ "learning_rate": 2e-05,
218
+ "loss": 1.2402,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 2.4544708728790283,
224
+ "learning_rate": 2e-05,
225
+ "loss": 1.4639,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 1.6641395092010498,
231
+ "learning_rate": 2e-05,
232
+ "loss": 1.1909,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 2.0484120845794678,
238
+ "learning_rate": 2e-05,
239
+ "loss": 1.4282,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 1.478560447692871,
245
+ "learning_rate": 2e-05,
246
+ "loss": 1.3979,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 3.1397147178649902,
252
+ "learning_rate": 2e-05,
253
+ "loss": 1.5562,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 1.3987221717834473,
259
+ "learning_rate": 2e-05,
260
+ "loss": 1.4531,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 1.8414459228515625,
266
+ "learning_rate": 2e-05,
267
+ "loss": 1.4473,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 2.1021289825439453,
273
+ "learning_rate": 2e-05,
274
+ "loss": 1.3472,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 1.8407915830612183,
280
+ "learning_rate": 2e-05,
281
+ "loss": 1.2642,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 2.4831814765930176,
287
+ "learning_rate": 2e-05,
288
+ "loss": 1.3237,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 1.4579954147338867,
294
+ "learning_rate": 2e-05,
295
+ "loss": 1.1968,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 3.551758289337158,
301
+ "learning_rate": 2e-05,
302
+ "loss": 1.5654,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 2.751859188079834,
308
+ "learning_rate": 2e-05,
309
+ "loss": 1.2002,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 2.306459903717041,
315
+ "learning_rate": 2e-05,
316
+ "loss": 1.3477,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 2.63804292678833,
322
+ "learning_rate": 2e-05,
323
+ "loss": 1.4023,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 2.2834153175354004,
329
+ "learning_rate": 2e-05,
330
+ "loss": 1.3506,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 4.944771766662598,
336
+ "learning_rate": 2e-05,
337
+ "loss": 1.627,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 1.705222249031067,
343
+ "learning_rate": 2e-05,
344
+ "loss": 1.2178,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 2.038778305053711,
350
+ "learning_rate": 2e-05,
351
+ "loss": 1.3418,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 2.250809907913208,
357
+ "learning_rate": 2e-05,
358
+ "loss": 1.2407,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 2949501438394368.0,
365
+ "train_loss": 1.3722802734375,
366
+ "train_runtime": 245.4499,
367
+ "train_samples_per_second": 1.63,
368
+ "train_steps_per_second": 0.407
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 2949501438394368.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7862e641bae9b1c1ff58cceb03a83630ad7cf69a12c1166502c070f23b2d02d9
3
+ size 479128390
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e894638477fe4d5df5e29deebcb475380243f8770751c46fc8c27c5353930445
3
+ size 479128390
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5feb2ae3f928072c4ee2154140d7034edc181a88544b9fd3e627d79ddbd1807e
3
+ size 479128390
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b772019cbba5031146eb8804cb175b47ef30a03774f0dd273bfd862b6c5ba52
3
+ size 479128390
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:785ce1c8ffeb43b8dee5b41dd20edbb058f523c2db0ed6b8af8ffda877b8b1e6
3
+ size 479127818
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06ba3ab83896fcaa00d07ee5061d394d33a34434d3f42e173b68f0a154d8472a
3
+ size 479128390
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef05687dd4b30bcee4fdd616997a10b003f3ef39210a5ac61b153007d271fd50
3
+ size 479127818
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3f8c6de60f28ae04a762180348c698a8c16fccd95e77755909e614b573f4117
3
+ size 479127818
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/2_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 3.2558865547180176,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.9064,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 0.991973340511322,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.1233,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 6.5367913246154785,
28
+ "learning_rate": 2e-05,
29
+ "loss": 1.9525,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 5.972145080566406,
35
+ "learning_rate": 2e-05,
36
+ "loss": 1.1308,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 5.799992561340332,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.7781,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 1.8599284887313843,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.2985,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 3.0901894569396973,
56
+ "learning_rate": 2e-05,
57
+ "loss": 1.287,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 3.8889167308807373,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.7026,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 1.7417221069335938,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.2317,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 0.34332019090652466,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.3007,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 3.8413283824920654,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.5139,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 5.948110103607178,
91
+ "learning_rate": 2e-05,
92
+ "loss": 1.0095,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 1.9708259105682373,
98
+ "learning_rate": 2e-05,
99
+ "loss": 1.0875,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 6.128609657287598,
105
+ "learning_rate": 2e-05,
106
+ "loss": 1.1535,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 2.5443027019500732,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.2954,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 1.782371997833252,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.2122,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 0.9455428123474121,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.4753,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 5.72688627243042,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.8696,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 2.12677001953125,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.4472,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 1.0864536762237549,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.2052,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 3.737551212310791,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.422,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 2.5829198360443115,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.5197,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 2.83404278755188,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.3223,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 2.3315060138702393,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.5508,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 0.9215158224105835,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.1513,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 5.178053379058838,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.7244,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 6.672908782958984,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.8517,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 6.269854545593262,
203
+ "learning_rate": 2e-05,
204
+ "loss": 2.2648,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 8.046138763427734,
210
+ "learning_rate": 2e-05,
211
+ "loss": 1.1398,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 5.5552077293396,
217
+ "learning_rate": 2e-05,
218
+ "loss": 1.1731,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 8.006683349609375,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.9849,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 9.458540916442871,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.6968,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 3.7672533988952637,
238
+ "learning_rate": 2e-05,
239
+ "loss": 1.579,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 3.504437208175659,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.4301,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 5.53498649597168,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.7245,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 2.9111721515655518,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.7371,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 8.062969207763672,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.7654,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 7.899918079376221,
273
+ "learning_rate": 2e-05,
274
+ "loss": 1.6479,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 1.5469361543655396,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.7988,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 11.664484024047852,
287
+ "learning_rate": 2e-05,
288
+ "loss": 1.4557,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 0.4525814354419708,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.4412,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 5.048192977905273,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.5993,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 3.5683045387268066,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.6205,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 4.712682723999023,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.4364,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 9.933538436889648,
322
+ "learning_rate": 2e-05,
323
+ "loss": 1.3851,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 4.783173084259033,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.4456,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 2.395021915435791,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.7315,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 3.892876148223877,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.8238,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 3.3703393936157227,
350
+ "learning_rate": 2e-05,
351
+ "loss": 0.3309,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 0.7113988995552063,
357
+ "learning_rate": 2e-05,
358
+ "loss": 0.8155,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 1.570782887018496e+16,
365
+ "train_loss": 0.7710115814208984,
366
+ "train_runtime": 425.933,
367
+ "train_samples_per_second": 0.939,
368
+ "train_steps_per_second": 0.235
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 1.570782887018496e+16,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b930dbca6b179a5bc29fc350fc47779afbb1a26cac6de123dcc4a530f9c5359
3
+ size 479128390
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15e03e5d95385f51204779d8e410237239d9cd5c50011063fc3ce4a6655e6982
3
+ size 479128390
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6496615d1b538df97655e76e86eeec11e6de6e8a873239cc7fa24813a095cc1b
3
+ size 479128390
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7407557f6cbc453acb51d7a5b78c207e88530b905b32608fadf4aa968eb6181e
3
+ size 479128390
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04c1c3f23467e3dfff4f5e902e3447f7cee591eb9358914edfa4c90bfc14e000
3
+ size 479127818
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b41a2866ab605fe35c2c230b5f3194131b98959bd35153dd1faf39913ad83748
3
+ size 479128390
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:050b3b536ac4f0a5278377639ee83e7c7523c352c429c8f76b48c25936aacd69
3
+ size 479127818
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f060f4423b7a30f309ab6cc75bfa0e53766a36e5cba4d26b68665a17c4879ac
3
+ size 479127818
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/3_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 0.6915289759635925,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.803,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 0.5236664414405823,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.3719,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 0.7289407849311829,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.6735,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 0.04733162373304367,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.3086,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 0.7653596997261047,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.2241,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 4.275763511657715,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.8208,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 0.8538552522659302,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.806,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 2.4601213932037354,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.7453,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 1.620142936706543,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.3818,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 4.4771318435668945,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.7292,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 0.6303713321685791,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.3592,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 3.111276865005493,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.8449,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 1.3116923570632935,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.7724,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 7.254108428955078,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.6019,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 1.0935317277908325,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.3723,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 0.6438285708427429,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.4673,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 9.997159957885742,
126
+ "learning_rate": 2e-05,
127
+ "loss": 1.4406,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 1.574639916419983,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.8156,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 2.7990269660949707,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.9997,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 2.4288394451141357,
147
+ "learning_rate": 2e-05,
148
+ "loss": 1.0895,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 0.751030445098877,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.5163,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 1.5789567232131958,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.4597,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 1.716470718383789,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.5985,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 1.0268652439117432,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.8839,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 3.183387041091919,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.635,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 1.6883171796798706,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.8253,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 6.081538677215576,
196
+ "learning_rate": 2e-05,
197
+ "loss": 1.7804,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 4.235260963439941,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.5859,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 4.297282695770264,
210
+ "learning_rate": 2e-05,
211
+ "loss": 1.0606,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 0.6412908434867859,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.2726,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 0.3413950204849243,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.3853,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 6.513116836547852,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.7348,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 0.7780243158340454,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.8995,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 0.6931712031364441,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.6984,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 5.796865940093994,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.6069,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 1.1728687286376953,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.7387,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 0.4009567201137543,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.5621,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 2.6604442596435547,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.8287,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 1.2604601383209229,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.7813,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 0.5949382185935974,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.2305,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 3.986330986022949,
294
+ "learning_rate": 2e-05,
295
+ "loss": 1.0291,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 1.1950682401657104,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.5847,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 2.359482526779175,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.4699,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 0.38797926902770996,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.3318,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 2.0094640254974365,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.7107,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 10.679686546325684,
329
+ "learning_rate": 2e-05,
330
+ "loss": 1.1031,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 1.3764183521270752,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.2138,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 17.799694061279297,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.6659,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 0.7701702117919922,
350
+ "learning_rate": 2e-05,
351
+ "loss": 0.6446,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 2.7060821056365967,
357
+ "learning_rate": 2e-05,
358
+ "loss": 0.7716,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 2.014860909792461e+16,
365
+ "train_loss": 0.6847475278377533,
366
+ "train_runtime": 438.6106,
367
+ "train_samples_per_second": 0.912,
368
+ "train_steps_per_second": 0.228
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 2.014860909792461e+16,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ae1dcfb75d581b07865f7e719852e53c0313715a95546499deeb3abd5356736
3
+ size 479128390
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff62f62346aae26510c829ce3a4dae85ad0d6a1845876f1ce707c7a9bcdc04bb
3
+ size 479128390
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fed039615c8f70ef8fb5cdfb55d6628f5a5e1f903d0fe2965d9f5130ec00013
3
+ size 479128390
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6a09338731636e04d275069a3942e655fb868bcc9839f6cd926e930e422aa74
3
+ size 479128390
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2bc2093a3f74d5610009eeb3c573f6e2a18d53c23f42b5915b97aa22cefeb78
3
+ size 479127818
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8527a6d3e15d2942d59d56a7e75275cef893892fbdfc47601bb38b5e5ae5e0d4
3
+ size 479128390
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:270f1fed8bb31676ab501726cee5078fbbff91f34854f365ab9545ffe56e3177
3
+ size 479127818
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4d09f687a822b4962a11c0546b789db3d0b1e2d02fb5fa81437bb1c01f7181e
3
+ size 479127818
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/4_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 0.9225918054580688,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.991,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 0.8597076535224915,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.3758,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 0.3333783745765686,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.5362,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 0.5292944312095642,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.7394,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 4.7009687423706055,
42
+ "learning_rate": 2e-05,
43
+ "loss": 1.8573,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 1.600105881690979,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.8488,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 1.5355629920959473,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.4846,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 3.030879259109497,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.5453,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 0.8978800177574158,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.4633,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 1.1896603107452393,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.7015,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 2.6468939781188965,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.4669,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 4.17122745513916,
91
+ "learning_rate": 2e-05,
92
+ "loss": 1.0091,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 4.072269439697266,
98
+ "learning_rate": 2e-05,
99
+ "loss": 2.0015,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 4.218262195587158,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.7374,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 2.5141124725341797,
112
+ "learning_rate": 2e-05,
113
+ "loss": 1.726,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 1.6296266317367554,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.5757,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 2.733184337615967,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.3487,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 0.812718391418457,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.2033,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 2.641838788986206,
140
+ "learning_rate": 2e-05,
141
+ "loss": 1.0014,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 2.547071933746338,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.6443,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 5.26523494720459,
154
+ "learning_rate": 2e-05,
155
+ "loss": 1.9649,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 2.6201882362365723,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.479,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 3.8259027004241943,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.6319,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 7.226994037628174,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.7662,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 1.9887053966522217,
182
+ "learning_rate": 2e-05,
183
+ "loss": 1.0707,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 2.292367696762085,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.4642,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 5.1545867919921875,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.9666,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 3.924346685409546,
203
+ "learning_rate": 2e-05,
204
+ "loss": 1.8803,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 3.1195952892303467,
210
+ "learning_rate": 2e-05,
211
+ "loss": 1.1754,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 3.6606249809265137,
217
+ "learning_rate": 2e-05,
218
+ "loss": 1.0212,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 1.3416082859039307,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.8757,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 3.2015907764434814,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.4014,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 2.0905282497406006,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.3452,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 7.018588542938232,
245
+ "learning_rate": 2e-05,
246
+ "loss": 1.2981,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 4.384774684906006,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.7799,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 4.2297210693359375,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.4246,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 2.036142349243164,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.1518,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 0.1399793028831482,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.1056,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 9.0904541015625,
280
+ "learning_rate": 2e-05,
281
+ "loss": 1.3569,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 4.4876017570495605,
287
+ "learning_rate": 2e-05,
288
+ "loss": 1.7918,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 1.1255276203155518,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.279,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 5.206053256988525,
301
+ "learning_rate": 2e-05,
302
+ "loss": 1.561,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 0.1543315351009369,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.0126,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 2.7004189491271973,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.7089,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 1.0638126134872437,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.5882,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 2.7447102069854736,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.5941,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 2.824549674987793,
336
+ "learning_rate": 2e-05,
337
+ "loss": 1.5416,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 0.7135050296783447,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.5581,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 2.0467679500579834,
350
+ "learning_rate": 2e-05,
351
+ "loss": 0.6242,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 2.1278626918792725,
357
+ "learning_rate": 2e-05,
358
+ "loss": 0.4743,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 2.015594703277261e+16,
365
+ "train_loss": 0.8230290794372559,
366
+ "train_runtime": 469.2451,
367
+ "train_samples_per_second": 0.852,
368
+ "train_steps_per_second": 0.213
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 2.015594703277261e+16,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/5_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c511c59b784a0f74f1dde51e0b2080a498cdc8e50966b14aa71fbae0f831686
3
+ size 295585110
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/5_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57e78f755ed86c7ee2e85264e01afa0586b716855ee8452edb1ca788b22bf283
3
+ size 295585110
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/5_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e717534fafe38840fb8c361d11d64541821aebf3cdd6f39e1f77013437871306
3
+ size 295585110
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/5_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ac8da32902dca181676ea7e5c2f04894bf057c516027b502d39bce2e20e08b2
3
+ size 295585110
client_states_NEW_fedavg_bs4_saveoptim_lr2e-5_5e-5_sc103_4tasks_5rounds_fixtir100_T0125_decay099_SEED2/5_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b5157aa89baeeacaabdfccd8363baebbaf64e31b05696aa6d88d8bc57449b07
3
+ size 295584650