thkim0305 commited on
Commit
1bc1c5a
·
verified ·
1 Parent(s): 00c5de4

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round10.pth +3 -0
  2. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round12.pth +3 -0
  3. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round15.pth +3 -0
  4. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round17.pth +3 -0
  5. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round2.pth +3 -0
  6. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round20.pth +3 -0
  7. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round5.pth +3 -0
  8. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round7.pth +3 -0
  9. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_trainer_state.json +217 -0
  10. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round10.pth +3 -0
  11. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round12.pth +3 -0
  12. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round15.pth +3 -0
  13. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round17.pth +3 -0
  14. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round2.pth +3 -0
  15. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round20.pth +3 -0
  16. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round5.pth +3 -0
  17. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round7.pth +3 -0
  18. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_trainer_state.json +217 -0
  19. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round10.pth +3 -0
  20. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round12.pth +3 -0
  21. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round15.pth +3 -0
  22. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round17.pth +3 -0
  23. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round2.pth +3 -0
  24. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round20.pth +3 -0
  25. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round5.pth +3 -0
  26. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round7.pth +3 -0
  27. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_trainer_state.json +217 -0
  28. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round10.pth +3 -0
  29. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round12.pth +3 -0
  30. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round15.pth +3 -0
  31. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round17.pth +3 -0
  32. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round2.pth +3 -0
  33. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round20.pth +3 -0
  34. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round5.pth +3 -0
  35. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round7.pth +3 -0
  36. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_trainer_state.json +217 -0
  37. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round10.pth +3 -0
  38. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round12.pth +3 -0
  39. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round15.pth +3 -0
  40. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round17.pth +3 -0
  41. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round2.pth +3 -0
  42. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round20.pth +3 -0
  43. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round5.pth +3 -0
  44. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round7.pth +3 -0
  45. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_trainer_state.json +217 -0
  46. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round10.pth +3 -0
  47. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round12.pth +3 -0
  48. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round15.pth +3 -0
  49. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round17.pth +3 -0
  50. client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round2.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56fc85170f7c52f1e9ac2402184118e54e979bc593a942b62491a368125b3962
3
+ size 360880622
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab8e8f05520a5e213d2233c3d39fea5396c0fea9f71fc5793e8c3da62cfc2f30
3
+ size 360880622
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcaa315ca2198284cb385a2533d6ce3049142497e3a92f3bb108e40a31c9d57e
3
+ size 360880622
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ae342f15c8941cf7b73da8583b0354f25dad9a4f94509e83df8f750aa6e6e14
3
+ size 360880622
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e04219236af11cd00523eecab74e6d8eeade903c0afade90763ed8a1afc1384
3
+ size 360880106
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40c03aede173c98dfe0cddeda01c173a5c7c8b81fe08b4c72bb1df29206d6f5c
3
+ size 360880622
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07c2403d703ac282fb4c80815544a86be3ae64153a3106271ee403e7c98f9507
3
+ size 360880106
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b343b9655b873be9c938471eaeddeb2d0327e041ff7b6ecd42e588cb60ef7a49
3
+ size 360880106
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_trainer_state.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 50,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 1.8673917055130005,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.5411,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 4.476343631744385,
21
+ "learning_rate": 2e-05,
22
+ "loss": 1.0286,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.24,
27
+ "grad_norm": 2.270127296447754,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.6094,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.32,
34
+ "grad_norm": 4.0221686363220215,
35
+ "learning_rate": 2e-05,
36
+ "loss": 1.0339,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.4,
41
+ "grad_norm": 1.7531377077102661,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.6154,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.48,
48
+ "grad_norm": 2.4782919883728027,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.3927,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.56,
55
+ "grad_norm": 3.082655668258667,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.5301,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.64,
62
+ "grad_norm": 1.6040374040603638,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.5479,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.72,
69
+ "grad_norm": 3.0767765045166016,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.9126,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.8,
76
+ "grad_norm": 2.117967128753662,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.5114,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.88,
83
+ "grad_norm": 4.502895355224609,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.7164,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.96,
90
+ "grad_norm": 4.821036338806152,
91
+ "learning_rate": 2e-05,
92
+ "loss": 1.526,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 1.04,
97
+ "grad_norm": 2.051273822784424,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.6118,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.12,
104
+ "grad_norm": 3.2917582988739014,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.6338,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.2,
111
+ "grad_norm": 4.47807502746582,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.7823,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.28,
118
+ "grad_norm": 6.614592552185059,
119
+ "learning_rate": 2e-05,
120
+ "loss": 1.3987,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 1.36,
125
+ "grad_norm": 3.840501546859741,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.6544,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 1.44,
132
+ "grad_norm": 2.3704311847686768,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.8242,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 1.52,
139
+ "grad_norm": 4.5160675048828125,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.9385,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 1.6,
146
+ "grad_norm": 3.4682557582855225,
147
+ "learning_rate": 2e-05,
148
+ "loss": 1.2132,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 1.68,
153
+ "grad_norm": 2.3830385208129883,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.6107,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 1.76,
160
+ "grad_norm": 3.068246603012085,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.6467,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 1.84,
167
+ "grad_norm": 3.524432897567749,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.602,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 1.92,
174
+ "grad_norm": 3.5651185512542725,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.7029,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 2.0,
181
+ "grad_norm": 3.3311729431152344,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.5356,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 2.0,
188
+ "step": 50,
189
+ "total_flos": 5924252351987712.0,
190
+ "train_loss": 0.7648095703125,
191
+ "train_runtime": 151.5084,
192
+ "train_samples_per_second": 1.32,
193
+ "train_steps_per_second": 0.33
194
+ }
195
+ ],
196
+ "logging_steps": 2,
197
+ "max_steps": 50,
198
+ "num_input_tokens_seen": 0,
199
+ "num_train_epochs": 1,
200
+ "save_steps": 500,
201
+ "stateful_callbacks": {
202
+ "TrainerControl": {
203
+ "args": {
204
+ "should_epoch_stop": false,
205
+ "should_evaluate": false,
206
+ "should_log": false,
207
+ "should_save": false,
208
+ "should_training_stop": false
209
+ },
210
+ "attributes": {}
211
+ }
212
+ },
213
+ "total_flos": 5924252351987712.0,
214
+ "train_batch_size": 1,
215
+ "trial_name": null,
216
+ "trial_params": null
217
+ }
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:334b9877772a1e30d63aa38a00bfe896e15d6dc09c88dfa1ccab4979c6b4658d
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cba2b604640bee7060c99d65ff75b14153ed90fe88af5b278be99184f716b48b
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:208506304141fe31fb85d268825264673b4b52ed6aaecb961f3f3d21d3d2fd7f
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a74cebc2d0c93eb7caf98555a9f3baffce9ed8003e2895830d5109ebb38fdc2
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffdd84076af2f4ac0d6e4d0bc7ec7bba103570f06c16bb3798413a752b415ffd
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ee1ccda5a7665844256546c440107724cb0bdfb8b64df7963f3a7fab67ab32e
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22998a139bd48538fe5906098fa3bb1965b44f2ebfecf3909023a90dc7292ba0
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35a5c1a43f28032b066c673c6abfe9651e83a59b76cb6708a69fdfd75106fd52
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_trainer_state.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 50,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 4.583834648132324,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.6835,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 4.129000186920166,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.6311,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.24,
27
+ "grad_norm": 1.4998234510421753,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.5363,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.32,
34
+ "grad_norm": 2.6047112941741943,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.6187,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.4,
41
+ "grad_norm": 3.2830750942230225,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.601,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.48,
48
+ "grad_norm": 2.440368413925171,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.5193,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.56,
55
+ "grad_norm": 4.117373466491699,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.6884,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.64,
62
+ "grad_norm": 5.755882740020752,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.731,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.72,
69
+ "grad_norm": 1.5773918628692627,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.4884,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.8,
76
+ "grad_norm": 2.3141613006591797,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.55,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.88,
83
+ "grad_norm": 1.520458698272705,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.5297,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.96,
90
+ "grad_norm": 2.141937732696533,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.5958,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 1.04,
97
+ "grad_norm": 3.5650408267974854,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.6784,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.12,
104
+ "grad_norm": 3.2956409454345703,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.7442,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.2,
111
+ "grad_norm": 2.7933690547943115,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.6616,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.28,
118
+ "grad_norm": 4.000720977783203,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.4821,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 1.36,
125
+ "grad_norm": 5.566102981567383,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.4605,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 1.44,
132
+ "grad_norm": 5.742500305175781,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.4153,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 1.52,
139
+ "grad_norm": 4.2542405128479,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.4783,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 1.6,
146
+ "grad_norm": 6.833155155181885,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.6371,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 1.68,
153
+ "grad_norm": 7.5326313972473145,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.7853,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 1.76,
160
+ "grad_norm": 2.1974692344665527,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.7622,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 1.84,
167
+ "grad_norm": 2.442321538925171,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.5715,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 1.92,
174
+ "grad_norm": 2.5812339782714844,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.4764,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 2.0,
181
+ "grad_norm": 5.182116508483887,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.6237,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 2.0,
188
+ "step": 50,
189
+ "total_flos": 1.4241485192953856e+16,
190
+ "train_loss": 0.5979843139648438,
191
+ "train_runtime": 252.1553,
192
+ "train_samples_per_second": 0.793,
193
+ "train_steps_per_second": 0.198
194
+ }
195
+ ],
196
+ "logging_steps": 2,
197
+ "max_steps": 50,
198
+ "num_input_tokens_seen": 0,
199
+ "num_train_epochs": 1,
200
+ "save_steps": 500,
201
+ "stateful_callbacks": {
202
+ "TrainerControl": {
203
+ "args": {
204
+ "should_epoch_stop": false,
205
+ "should_evaluate": false,
206
+ "should_log": false,
207
+ "should_save": false,
208
+ "should_training_stop": false
209
+ },
210
+ "attributes": {}
211
+ }
212
+ },
213
+ "total_flos": 1.4241485192953856e+16,
214
+ "train_batch_size": 1,
215
+ "trial_name": null,
216
+ "trial_params": null
217
+ }
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d06991855fd1b69b3806b8d1eb324b9c0201e478036fd3b65d56b34b2787e5ad
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e6628ce28b478d4a5c9a4cae41524f1b64d84dc93bf7a2f34161dca94fad10f
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8695c1f94b299b5faedaedf37b7a570f7b23e7cd1fe1da35b023a3b1ced9d2f6
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daa8a795000144eef55b204b54325181dd9bda9e639727b62cddafd8082ca064
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6489762c514a53fca0821096bf89c0f31cbaf5e46df0959567c178571ec0560
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9a52a2036d00c14f8d20c1cfb605b021050b0d6e423a449f39ea54b7350440d
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5914d4ab3c8d4874bebfbc1901b85323aed556b0db25307d10b91caf203cedb0
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c81a173f5ad5a59863f3ca8f811af06d562e8dbe2fe45ef5fba724b5e8c164a
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_trainer_state.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 50,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 4.558183193206787,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.5488,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 2.947014331817627,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.3348,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.24,
27
+ "grad_norm": 1.299033761024475,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.3584,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.32,
34
+ "grad_norm": 3.6182351112365723,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.3221,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.4,
41
+ "grad_norm": 5.334591865539551,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.4843,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.48,
48
+ "grad_norm": 8.80336856842041,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.6279,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.56,
55
+ "grad_norm": 11.382059097290039,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.5401,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.64,
62
+ "grad_norm": 11.98203182220459,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.717,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.72,
69
+ "grad_norm": 6.630352020263672,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.697,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.8,
76
+ "grad_norm": 5.681330680847168,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.3569,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.88,
83
+ "grad_norm": 5.276439189910889,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.6034,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.96,
90
+ "grad_norm": 4.0495524406433105,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.2887,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 1.04,
97
+ "grad_norm": 10.429891586303711,
98
+ "learning_rate": 2e-05,
99
+ "loss": 1.06,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.12,
104
+ "grad_norm": 5.012072563171387,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.6967,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.2,
111
+ "grad_norm": 8.718036651611328,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.7567,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.28,
118
+ "grad_norm": 4.441227912902832,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.3985,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 1.36,
125
+ "grad_norm": 3.394705057144165,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.7157,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 1.44,
132
+ "grad_norm": 5.729661464691162,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.5636,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 1.52,
139
+ "grad_norm": 9.694987297058105,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.9611,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 1.6,
146
+ "grad_norm": 12.702645301818848,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.7439,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 1.68,
153
+ "grad_norm": 5.634037017822266,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.8179,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 1.76,
160
+ "grad_norm": 5.084414005279541,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.7732,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 1.84,
167
+ "grad_norm": 2.3128256797790527,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.5043,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 1.92,
174
+ "grad_norm": 6.491540431976318,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.6021,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 2.0,
181
+ "grad_norm": 4.465042591094971,
182
+ "learning_rate": 2e-05,
183
+ "loss": 1.0634,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 2.0,
188
+ "step": 50,
189
+ "total_flos": 1.6198255784230912e+16,
190
+ "train_loss": 0.6214560890197753,
191
+ "train_runtime": 274.6327,
192
+ "train_samples_per_second": 0.728,
193
+ "train_steps_per_second": 0.182
194
+ }
195
+ ],
196
+ "logging_steps": 2,
197
+ "max_steps": 50,
198
+ "num_input_tokens_seen": 0,
199
+ "num_train_epochs": 1,
200
+ "save_steps": 500,
201
+ "stateful_callbacks": {
202
+ "TrainerControl": {
203
+ "args": {
204
+ "should_epoch_stop": false,
205
+ "should_evaluate": false,
206
+ "should_log": false,
207
+ "should_save": false,
208
+ "should_training_stop": false
209
+ },
210
+ "attributes": {}
211
+ }
212
+ },
213
+ "total_flos": 1.6198255784230912e+16,
214
+ "train_batch_size": 1,
215
+ "trial_name": null,
216
+ "trial_params": null
217
+ }
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1053397e04c343b02320d481ade7174ca2efdfe41829a60dfbb49ebecd579803
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bea8e38baaf09ee6510b2aef0a46ddf0be5ca9330c1cae666ca645c7b432ab4e
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dbc3a8d9729d355f15cade994d232b6c0f017a20aa8d73b98f17c228c84361b
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09d1655d128c53da006db567fea2fc7434b9b8044b6f845d52b8db99dd2320a4
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29211e918a07e23fe407aeeeb62d9d534e126d7a2a51b8c8f7a1ff088c95be5d
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47ea3bcbc18afe15bb2245d06bd000d2e56ecf6f5fe6f33c6ad04379250be315
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a6fe3cd7816645ae9460f490fb1037ffd4282c328e9bd8c17e84d97e586b4e7
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:100ad256bc9de8669440660abe10a6c5d27f52c4a68a67e134bafa54426c81ac
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_trainer_state.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 50,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 1.39784574508667,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.9756,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 2.8972103595733643,
21
+ "learning_rate": 2e-05,
22
+ "loss": 1.0298,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.24,
27
+ "grad_norm": 1.9185339212417603,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.686,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.32,
34
+ "grad_norm": 1.6782031059265137,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.7958,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.4,
41
+ "grad_norm": 1.4360851049423218,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.828,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.48,
48
+ "grad_norm": 2.031978130340576,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.7959,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.56,
55
+ "grad_norm": 2.223369836807251,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.9572,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.64,
62
+ "grad_norm": 1.5497606992721558,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.5985,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.72,
69
+ "grad_norm": 3.152341842651367,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.9308,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.8,
76
+ "grad_norm": 1.995869755744934,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.8724,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.88,
83
+ "grad_norm": 2.4577951431274414,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.7107,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.96,
90
+ "grad_norm": 2.686221122741699,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.782,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 1.04,
97
+ "grad_norm": 2.1014060974121094,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.7136,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.12,
104
+ "grad_norm": 3.085268974304199,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.723,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.2,
111
+ "grad_norm": 4.33972692489624,
112
+ "learning_rate": 2e-05,
113
+ "loss": 1.0076,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.28,
118
+ "grad_norm": 6.186117172241211,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.9203,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 1.36,
125
+ "grad_norm": 2.994359016418457,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.6973,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 1.44,
132
+ "grad_norm": 5.756070613861084,
133
+ "learning_rate": 2e-05,
134
+ "loss": 1.0156,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 1.52,
139
+ "grad_norm": 1.8890563249588013,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.6883,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 1.6,
146
+ "grad_norm": 2.047783136367798,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.6198,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 1.68,
153
+ "grad_norm": 3.1189417839050293,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.8165,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 1.76,
160
+ "grad_norm": 2.0808229446411133,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.6428,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 1.84,
167
+ "grad_norm": 1.7849960327148438,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.6548,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 1.92,
174
+ "grad_norm": 1.386960506439209,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.6403,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 2.0,
181
+ "grad_norm": 1.6751240491867065,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.8999,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 2.0,
188
+ "step": 50,
189
+ "total_flos": 2.096063809637581e+16,
190
+ "train_loss": 0.8000931072235108,
191
+ "train_runtime": 272.324,
192
+ "train_samples_per_second": 0.734,
193
+ "train_steps_per_second": 0.184
194
+ }
195
+ ],
196
+ "logging_steps": 2,
197
+ "max_steps": 50,
198
+ "num_input_tokens_seen": 0,
199
+ "num_train_epochs": 1,
200
+ "save_steps": 500,
201
+ "stateful_callbacks": {
202
+ "TrainerControl": {
203
+ "args": {
204
+ "should_epoch_stop": false,
205
+ "should_evaluate": false,
206
+ "should_log": false,
207
+ "should_save": false,
208
+ "should_training_stop": false
209
+ },
210
+ "attributes": {}
211
+ }
212
+ },
213
+ "total_flos": 2.096063809637581e+16,
214
+ "train_batch_size": 1,
215
+ "trial_name": null,
216
+ "trial_params": null
217
+ }
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:688a3b8604720e57257db0cf64bd1dd9f30ca73277af465d7db58ae4ae98ffa9
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff353918ed1d60324e386bfdc118226bf909d190315a8921ef2e8baa24758a8b
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95a6f306d0adb5c0e05ec683bf401fe4c04d8cac99b9337deec12ee1d0761614
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a2212526d781487ec5c8161ff06df8f26308669cb418445de39cc2da74fc005
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2b51e1c7dedf009ec22fcd53095edfc51e52c3502127c959f9f27aab2fd74e3
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9abb78bfcdfa5b5bc3e68940b635fa9f3acda70c69ce4dba811709e0c3d59b74
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ebf24c9b4a2f70e078953f2222de038fffc31cbf26633368fbe572f4b844653
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77ffc1003199cdb9ce5678a067107dda4bb11f43ee2f32137a9bac5ba6d44053
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_trainer_state.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 50,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 1.4425019025802612,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.5973,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 1.215862512588501,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.6856,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.24,
27
+ "grad_norm": 0.7811527252197266,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.5332,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.32,
34
+ "grad_norm": 1.1735533475875854,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.705,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.4,
41
+ "grad_norm": 2.8437013626098633,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.7182,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.48,
48
+ "grad_norm": 1.6411240100860596,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.4589,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.56,
55
+ "grad_norm": 1.3925532102584839,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.8512,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.64,
62
+ "grad_norm": 3.309992551803589,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.9773,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.72,
69
+ "grad_norm": 2.3441174030303955,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.4745,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.8,
76
+ "grad_norm": 1.3988115787506104,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.3502,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.88,
83
+ "grad_norm": 1.3023474216461182,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.5154,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.96,
90
+ "grad_norm": 2.3900015354156494,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.8253,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 1.04,
97
+ "grad_norm": 5.1887125968933105,
98
+ "learning_rate": 2e-05,
99
+ "loss": 1.2769,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.12,
104
+ "grad_norm": 3.921790599822998,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.9044,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.2,
111
+ "grad_norm": 2.175091505050659,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.6082,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.28,
118
+ "grad_norm": 2.267071485519409,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.8991,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 1.36,
125
+ "grad_norm": 3.1593542098999023,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.6995,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 1.44,
132
+ "grad_norm": 3.4321177005767822,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.7938,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 1.52,
139
+ "grad_norm": 2.110840082168579,
140
+ "learning_rate": 2e-05,
141
+ "loss": 1.1223,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 1.6,
146
+ "grad_norm": 3.0541820526123047,
147
+ "learning_rate": 2e-05,
148
+ "loss": 1.041,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 1.68,
153
+ "grad_norm": 3.609933614730835,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.7096,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 1.76,
160
+ "grad_norm": 1.9673751592636108,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.7004,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 1.84,
167
+ "grad_norm": 12.000975608825684,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.5836,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 1.92,
174
+ "grad_norm": 3.2175731658935547,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.824,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 2.0,
181
+ "grad_norm": 3.6081643104553223,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.8113,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 2.0,
188
+ "step": 50,
189
+ "total_flos": 2.184824952664883e+16,
190
+ "train_loss": 0.7466500663757324,
191
+ "train_runtime": 278.7893,
192
+ "train_samples_per_second": 0.717,
193
+ "train_steps_per_second": 0.179
194
+ }
195
+ ],
196
+ "logging_steps": 2,
197
+ "max_steps": 50,
198
+ "num_input_tokens_seen": 0,
199
+ "num_train_epochs": 1,
200
+ "save_steps": 500,
201
+ "stateful_callbacks": {
202
+ "TrainerControl": {
203
+ "args": {
204
+ "should_epoch_stop": false,
205
+ "should_evaluate": false,
206
+ "should_log": false,
207
+ "should_save": false,
208
+ "should_training_stop": false
209
+ },
210
+ "attributes": {}
211
+ }
212
+ },
213
+ "total_flos": 2.184824952664883e+16,
214
+ "train_batch_size": 1,
215
+ "trial_name": null,
216
+ "trial_params": null
217
+ }
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23f128e99f0cacbc599ce4a7531221c9fbbb4cfb22fb3910e0fdc28ef2a2dbba
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad23f8d514321daa4aac439f617c0c5875a6d1c6d3ba671988a7b0ee07963eaf
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5eb433a9db64a00648ed4010a157b6232e6b4a7c2139de17cb01309403eadfe4
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e743a6d1bfa05bf565720cd577338a82db7a896ef110f90486e00d96b73e6d96
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d526931e5403c2c186741e4e1fbc95717cacf149c4f17c90939a694badf824f6
3
+ size 778341034