thkim0305 commited on
Commit
d36aad5
·
verified ·
1 Parent(s): ade72f0

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round10.pth +3 -0
  2. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round12.pth +3 -0
  3. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round15.pth +3 -0
  4. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round17.pth +3 -0
  5. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round2.pth +3 -0
  6. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round20.pth +3 -0
  7. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round5.pth +3 -0
  8. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round7.pth +3 -0
  9. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_trainer_state.json +392 -0
  10. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round10.pth +3 -0
  11. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round12.pth +3 -0
  12. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round15.pth +3 -0
  13. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round17.pth +3 -0
  14. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round2.pth +3 -0
  15. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round20.pth +3 -0
  16. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round5.pth +3 -0
  17. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round7.pth +3 -0
  18. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_trainer_state.json +392 -0
  19. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round10.pth +3 -0
  20. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round12.pth +3 -0
  21. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round15.pth +3 -0
  22. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round17.pth +3 -0
  23. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round2.pth +3 -0
  24. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round20.pth +3 -0
  25. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round5.pth +3 -0
  26. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round7.pth +3 -0
  27. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_trainer_state.json +392 -0
  28. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round10.pth +3 -0
  29. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round12.pth +3 -0
  30. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round15.pth +3 -0
  31. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round17.pth +3 -0
  32. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round2.pth +3 -0
  33. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round20.pth +3 -0
  34. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round5.pth +3 -0
  35. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round7.pth +3 -0
  36. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_trainer_state.json +392 -0
  37. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round10.pth +3 -0
  38. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round12.pth +3 -0
  39. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round15.pth +3 -0
  40. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round17.pth +3 -0
  41. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round2.pth +3 -0
  42. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round20.pth +3 -0
  43. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round5.pth +3 -0
  44. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round7.pth +3 -0
  45. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_trainer_state.json +392 -0
  46. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round10.pth +3 -0
  47. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round12.pth +3 -0
  48. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round15.pth +3 -0
  49. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round17.pth +3 -0
  50. client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round2.pth +3 -0
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1406494cc81ecc6645ea66f135fba48b08b852d84e00ed3bf0e64767d6d04ac
3
+ size 184221358
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ea62685819911eed87352c12e84beedd3b7005ff276b52e2daafcae72fe3e72
3
+ size 184221358
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63bb105f42d890d7de605ee624cd3d3ea1ed3f3ada6504f43e3496b76ed11477
3
+ size 184221358
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:112d488d6dd765e22fb60780f08192dbec107d54c3bffee59821f65693bdfcdb
3
+ size 184221358
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5aca2c4fbdc327cd5143b8cb5ad4053247b5ab4f9748d40dd5d7537104e1818
3
+ size 184220842
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abb5598f97f2e908b9dbd8c0a79ec7e0c817cb571f8b75c0814b23a8b59b6571
3
+ size 184221358
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5058a6b0ac16ea3df6010fc0ee4b1c177f4258b9bef8d032eb3654d64cc8debe
3
+ size 184220842
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b17e001b9ce2245203552aeea0089c50dfb58b037215ab9bdc7149147d4fe270
3
+ size 184220842
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 8.886947631835938,
14
+ "learning_rate": 2e-05,
15
+ "loss": 1.0586,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 5.588067054748535,
21
+ "learning_rate": 2e-05,
22
+ "loss": 1.1688,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 3.8690497875213623,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.2551,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 0.5837838053703308,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.08,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 5.369279861450195,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.6118,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 9.228706359863281,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.7312,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 4.238631725311279,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.4682,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 10.901687622070312,
63
+ "learning_rate": 2e-05,
64
+ "loss": 1.1604,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 13.660758018493652,
70
+ "learning_rate": 2e-05,
71
+ "loss": 2.0892,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 7.721104621887207,
77
+ "learning_rate": 2e-05,
78
+ "loss": 1.0469,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 0.844839334487915,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.0317,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 8.220420837402344,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.7715,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 5.426258563995361,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.2142,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 0.8402196764945984,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.1679,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 2.16737699508667,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.733,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 15.065016746520996,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.8619,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 4.412502288818359,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.249,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 10.433276176452637,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.6592,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 3.079629898071289,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.8352,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 4.647284984588623,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.3377,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 1.9022719860076904,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.1098,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 7.464609622955322,
161
+ "learning_rate": 2e-05,
162
+ "loss": 1.1977,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 0.5387044548988342,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.3511,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 10.601780891418457,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.9487,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 2.023946762084961,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.1302,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 11.571028709411621,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.6011,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 1.87282133102417,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.2818,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 3.713423490524292,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.1644,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 11.62910270690918,
210
+ "learning_rate": 2e-05,
211
+ "loss": 1.9844,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 2.192251682281494,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.092,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 1.723055362701416,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.38,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 3.7932076454162598,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.1944,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 0.571885883808136,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.2546,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 5.109936714172363,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.7904,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 11.202879905700684,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.7539,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 5.579624652862549,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.2826,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 11.335589408874512,
266
+ "learning_rate": 2e-05,
267
+ "loss": 2.4529,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 5.759703159332275,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.7191,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 0.2622716724872589,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.7453,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 3.5320322513580322,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.9757,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 4.187987804412842,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.5671,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 12.467034339904785,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.9474,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 0.5042973756790161,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.1792,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 7.86875057220459,
315
+ "learning_rate": 2e-05,
316
+ "loss": 1.0289,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 3.8946852684020996,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.2982,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 1.4661802053451538,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.8666,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 9.840381622314453,
336
+ "learning_rate": 2e-05,
337
+ "loss": 2.3103,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 3.418431282043457,
343
+ "learning_rate": 2e-05,
344
+ "loss": 1.4225,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 10.213849067687988,
350
+ "learning_rate": 2e-05,
351
+ "loss": 1.7524,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 0.9313125014305115,
357
+ "learning_rate": 2e-05,
358
+ "loss": 0.2875,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 2053257199353856.0,
365
+ "train_loss": 0.7320353770256043,
366
+ "train_runtime": 66.905,
367
+ "train_samples_per_second": 5.979,
368
+ "train_steps_per_second": 1.495
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 2053257199353856.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9f69b1103c62fa7ae8a22e0e3ccfc2c5e79308e08e508e52b29b119dcf8c9bb
3
+ size 184221358
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b8d2457f80abb108f40c2f3adb097bef93008977810423c0e675ec1ff635ac3
3
+ size 184221358
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29e8a6ed78c6837f10bd56236a5e50ec8ab5e8db6e43ce8422ee8628f347d7da
3
+ size 184221358
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca0cc677d61964b9e133c65a9ac2dbf43ac29bb3bfea733e5d4b280f6e9d597c
3
+ size 184221358
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63a9be8d75b6021bb9d0a8b556780ac78f11a2a7af3eaddf441f7796be24d97e
3
+ size 184220842
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ecf9bfb11ce103a00fe8a8b20ca08c2c959d07deea30618626eea0935996543
3
+ size 184221358
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14d3365bf56511c60e5f95d53a32aa9496ac3000c7ede73328414cc27059fe17
3
+ size 184220842
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54e9617b10757dbb92359ffbee252fb941dad7087f24c1b2c37cc62e63d0d709
3
+ size 184220842
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 0.028587516397237778,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.0267,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 0.029884878545999527,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.0521,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 0.22514687478542328,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.0093,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 0.2603912055492401,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.0116,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 0.01106907706707716,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.0014,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 0.07203883677721024,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.0136,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 0.0598430410027504,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.0018,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 0.4535575211048126,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.0383,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 0.05992416664958,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.121,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 0.011320442892611027,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.0011,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 0.05899134278297424,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.0142,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 0.031861934810876846,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.0013,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 1.6744505167007446,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.0483,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 0.024044731631875038,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.001,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 0.8456639647483826,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.021,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 0.27866730093955994,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.0593,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 4.786769390106201,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.119,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 0.0027335325721651316,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.0002,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 0.270172655582428,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.008,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 5.465975284576416,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.145,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 0.029022216796875,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.0009,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 6.952610969543457,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.534,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 0.01336925383657217,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.001,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 0.02632717974483967,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.0011,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 0.009850457310676575,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.0011,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 0.016133064404129982,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.0298,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 0.6239187717437744,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.0179,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 0.018147876486182213,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.0043,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 0.16743789613246918,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.333,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 0.0029559978283941746,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.0975,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 0.014169096015393734,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.0007,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 0.015532419085502625,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.0346,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 8.49161148071289,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.0365,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 2.4051973819732666,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.0712,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 0.012287739664316177,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.001,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 1.317421793937683,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.033,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 2.141310691833496,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.0516,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 0.012324603274464607,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.0009,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 7.358309745788574,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.3286,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 0.0068267700262367725,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.0019,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 0.027679968625307083,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.0009,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 0.0038455259054899216,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.0004,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 0.014652257785201073,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.8248,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 0.0212117750197649,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.0017,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 0.9145652651786804,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.0257,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 11.541312217712402,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.1007,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 0.1450074464082718,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.0047,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 0.7812843322753906,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.0183,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 0.024971066042780876,
350
+ "learning_rate": 2e-05,
351
+ "loss": 0.0041,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 0.16854286193847656,
357
+ "learning_rate": 2e-05,
358
+ "loss": 0.0041,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 2069634366832640.0,
365
+ "train_loss": 0.06520086646080017,
366
+ "train_runtime": 67.209,
367
+ "train_samples_per_second": 5.952,
368
+ "train_steps_per_second": 1.488
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 2069634366832640.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a670055415d4880f331eb85740f70ee2cb8f486eab509d8c4166f859eaf436b7
3
+ size 395787774
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9cf3334d22adf8bc8d9032fa9838217ea2b3978770784adcbb80142c6b5fec3
3
+ size 395787774
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:455388704986c44003ba4297c180b4b670acc8e28d55c7f76b1e45185dc8dc23
3
+ size 395787774
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c3571032b0c4bff4b3b6476039fb98dd4f4127eb67fe700d6fbb28e870706f2
3
+ size 395787774
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b93124585d402b5d7320965a6575b46c2453b80c4a8329666426be96a120734
3
+ size 395786922
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:857252bbf3b233a4e5d00443550447c436ceaa062657d052591fa62a8532bf1a
3
+ size 395787774
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b82c7f7336b24eb8a6b2d826058b56d3aedc502c0979070fe9934a543156c75b
3
+ size 395786922
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88557795e1bea1f7f2a1071a8e8c0297f54ece9a2f511068c1a7cc4c8d6ccce4
3
+ size 395786922
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 2.105396032333374,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.2033,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 4.478094577789307,
21
+ "learning_rate": 2e-05,
22
+ "loss": 1.3439,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 2.1515796184539795,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.857,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 5.204065799713135,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.963,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 3.9221630096435547,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.5015,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 5.2301812171936035,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.9144,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 0.3219952881336212,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.4334,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 4.475340366363525,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.6966,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 1.9704943895339966,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.4297,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 3.269054412841797,
77
+ "learning_rate": 2e-05,
78
+ "loss": 1.1395,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 0.6645411252975464,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.1644,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 5.255583763122559,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.4868,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 6.85243034362793,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.8597,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 4.772141933441162,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.3411,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 1.2398443222045898,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.1638,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 4.372430801391602,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.377,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 3.858203649520874,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.6456,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 1.7406525611877441,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.118,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 4.103638648986816,
140
+ "learning_rate": 2e-05,
141
+ "loss": 1.1136,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 3.947889804840088,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.4615,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 2.2340707778930664,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.7555,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 2.404001474380493,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.183,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 1.3455829620361328,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.4547,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 0.6196079850196838,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.4648,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 1.5915979146957397,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.1563,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 7.3114800453186035,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.6918,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 5.828458786010742,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.6894,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 6.562094688415527,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.5373,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 3.23714280128479,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.1409,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 0.15032356977462769,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.1555,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 1.8144638538360596,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.7566,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 2.5122342109680176,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.3092,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 2.330488443374634,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.1339,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 3.021944761276245,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.3639,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 6.429702281951904,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.859,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 0.8376978039741516,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.9001,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 0.4407913088798523,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.0276,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 0.9633609056472778,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.143,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 1.6581859588623047,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.102,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 17.37574005126953,
287
+ "learning_rate": 2e-05,
288
+ "loss": 1.8274,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 10.83108139038086,
294
+ "learning_rate": 2e-05,
295
+ "loss": 2.4245,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 0.0673346146941185,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.0176,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 4.964028358459473,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.7041,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 3.659679651260376,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.4814,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 7.119080066680908,
322
+ "learning_rate": 2e-05,
323
+ "loss": 1.6169,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 0.1464005708694458,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.158,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 5.066743850708008,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.4878,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 3.149415969848633,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.2938,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 8.900089263916016,
350
+ "learning_rate": 2e-05,
351
+ "loss": 1.8197,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 2.3194432258605957,
357
+ "learning_rate": 2e-05,
358
+ "loss": 0.1952,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 4914533793529856.0,
365
+ "train_loss": 0.601297242641449,
366
+ "train_runtime": 105.5222,
367
+ "train_samples_per_second": 3.791,
368
+ "train_steps_per_second": 0.948
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 4914533793529856.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76d4aa596eabb5159b4d28b74e46910045877aeffa3554b2f997f94bd0da89fa
3
+ size 184221358
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b11bc00aaa6e08a36cfd6d3b39d0efaa19cf0e4c8754f92d09b14cef53cfdec8
3
+ size 184221358
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa072bb30028b47746493c2225cae7f34658e6a00c00fc580645c4737b8d5b77
3
+ size 184221358
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ef4c443dc56a11508dbafa313f5867032aef15767cbcf03ac1df7a277ebce41
3
+ size 184221358
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e37e23d0ba28984416df0b8e066f2bc68f7673b4aa966520dcbc59528864e563
3
+ size 184220842
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60423bc56c7bcb5446e76d5a86804e524694b7b68dd7dcf8db3f84290dac4749
3
+ size 184221358
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09e1da4e2387cf65fdb295db6605417ce879f31983d9c7247a67df7c290263d5
3
+ size 184220842
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f24ee88d145c695c184774afccab251d6d4f137b15f280443f3a09044a30ac7b
3
+ size 184220842
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 9.15634536743164,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.9397,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 1.9539599418640137,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.4816,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 7.12010383605957,
28
+ "learning_rate": 2e-05,
29
+ "loss": 1.0672,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 11.230935096740723,
35
+ "learning_rate": 2e-05,
36
+ "loss": 1.3385,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 12.102299690246582,
42
+ "learning_rate": 2e-05,
43
+ "loss": 1.314,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 7.587926387786865,
49
+ "learning_rate": 2e-05,
50
+ "loss": 1.5088,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 11.77520751953125,
56
+ "learning_rate": 2e-05,
57
+ "loss": 1.5007,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 8.806689262390137,
63
+ "learning_rate": 2e-05,
64
+ "loss": 1.0232,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 6.919975280761719,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.5305,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 3.2757022380828857,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.7843,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 5.029139041900635,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.5721,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 5.680845260620117,
91
+ "learning_rate": 2e-05,
92
+ "loss": 1.0862,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 1.745439052581787,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.4518,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 5.786810874938965,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.3724,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 6.002584934234619,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.7648,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 16.67854118347168,
119
+ "learning_rate": 2e-05,
120
+ "loss": 1.1352,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 2.959960699081421,
126
+ "learning_rate": 2e-05,
127
+ "loss": 2.1569,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 12.444388389587402,
133
+ "learning_rate": 2e-05,
134
+ "loss": 2.0652,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 4.5506157875061035,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.8247,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 5.148169994354248,
147
+ "learning_rate": 2e-05,
148
+ "loss": 1.1364,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 13.703283309936523,
154
+ "learning_rate": 2e-05,
155
+ "loss": 1.249,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 12.692126274108887,
161
+ "learning_rate": 2e-05,
162
+ "loss": 1.5431,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 4.30729341506958,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.6453,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 5.500507831573486,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.5124,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 5.481895923614502,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.9677,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 9.486720085144043,
189
+ "learning_rate": 2e-05,
190
+ "loss": 1.9241,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 0.8915572762489319,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.875,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 3.3242287635803223,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.8049,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 4.5319318771362305,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.8476,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 7.810834884643555,
217
+ "learning_rate": 2e-05,
218
+ "loss": 1.4747,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 6.061686992645264,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.8335,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 5.4117021560668945,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.8062,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 5.741265296936035,
238
+ "learning_rate": 2e-05,
239
+ "loss": 1.0926,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 5.817255973815918,
245
+ "learning_rate": 2e-05,
246
+ "loss": 1.0386,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 6.286365509033203,
252
+ "learning_rate": 2e-05,
253
+ "loss": 1.1693,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 7.558319091796875,
259
+ "learning_rate": 2e-05,
260
+ "loss": 2.0536,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 4.5277299880981445,
266
+ "learning_rate": 2e-05,
267
+ "loss": 1.2765,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 14.435311317443848,
273
+ "learning_rate": 2e-05,
274
+ "loss": 1.4086,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 8.644015312194824,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.9196,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 9.145062446594238,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.8797,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 3.320108652114868,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.2312,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 5.20671272277832,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.6787,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 4.356175422668457,
308
+ "learning_rate": 2e-05,
309
+ "loss": 1.126,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 4.225734710693359,
315
+ "learning_rate": 2e-05,
316
+ "loss": 1.0222,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 4.531115531921387,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.8512,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 12.393115043640137,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.6987,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 6.147823333740234,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.7845,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 2.951950788497925,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.3292,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 8.34253978729248,
350
+ "learning_rate": 2e-05,
351
+ "loss": 1.201,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 8.68620777130127,
357
+ "learning_rate": 2e-05,
358
+ "loss": 1.0787,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 2097655350034432.0,
365
+ "train_loss": 1.0275479412078858,
366
+ "train_runtime": 64.3773,
367
+ "train_samples_per_second": 6.213,
368
+ "train_steps_per_second": 1.553
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 2097655350034432.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d4d8713c7ae223db37f74fef11dd2ec4f9c3697a7a18a4fd8fd21981be318f2
3
+ size 395787774
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81e320f0ce1133e0ed75393c6cadb957b05f71007fa15b1c85e91fe4977988b0
3
+ size 395787774
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d1c3becb212aaf99877fa63249be60c7ddd78eb66f2af6aeda19fab9b0501cd
3
+ size 395787774
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:220b21c193eaa514cdc503e0420124f21cdb556570549838224cb0a4b0d1b8b9
3
+ size 395787774
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c65cf9e5799b751d75d4fec3e33a80a67aaf1f57bd03d2231c084929e9dc7b79
3
+ size 395786922
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f29e653ce3f711cae0a7a108c6ce3b0c3128932d3d79ee2b6b3608d29722c0f
3
+ size 395787774
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca3e2c59ac98b5c0412e87b06cf40a54be4437394cb4638fe685ea2c9dfb8d4a
3
+ size 395786922
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cd5928b47c6a13dddd075a64d5151863836a4403bbfa51316ed8b6434cbe791
3
+ size 395786922
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 1.527263879776001,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.4842,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 3.760221242904663,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.9147,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 1.1504747867584229,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.4597,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 2.06758975982666,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.6667,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 2.2761497497558594,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.5814,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 3.6250596046447754,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.3246,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 3.6377880573272705,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.681,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 9.429349899291992,
63
+ "learning_rate": 2e-05,
64
+ "loss": 2.0273,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 2.9040491580963135,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.425,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 4.228061199188232,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.4564,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 3.796684741973877,
84
+ "learning_rate": 2e-05,
85
+ "loss": 1.8981,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 1.6710107326507568,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.9774,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 2.283553123474121,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.4572,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 3.7857582569122314,
105
+ "learning_rate": 2e-05,
106
+ "loss": 1.0938,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 3.051619529724121,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.8807,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 5.345559597015381,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.8728,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 2.005192756652832,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.4745,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 4.272996425628662,
133
+ "learning_rate": 2e-05,
134
+ "loss": 1.1445,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 2.1995797157287598,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.5203,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 4.592801094055176,
147
+ "learning_rate": 2e-05,
148
+ "loss": 1.1552,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 2.3184566497802734,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.8856,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 2.79464054107666,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.5977,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 3.426015615463257,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.8892,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 3.772137403488159,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.7512,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 1.5406640768051147,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.3895,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 4.195302963256836,
189
+ "learning_rate": 2e-05,
190
+ "loss": 1.59,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 3.618748426437378,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.8087,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 1.804460048675537,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.2267,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 3.8953168392181396,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.8058,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 2.712660551071167,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.841,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 2.6159372329711914,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.64,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 3.1374075412750244,
231
+ "learning_rate": 2e-05,
232
+ "loss": 1.0344,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 5.049036026000977,
238
+ "learning_rate": 2e-05,
239
+ "loss": 1.009,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 2.3270905017852783,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.6997,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 5.63070821762085,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.8887,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 2.6345393657684326,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.5349,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 0.9860789775848389,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.5652,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 3.1421189308166504,
273
+ "learning_rate": 2e-05,
274
+ "loss": 1.3851,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 3.823781728744507,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.7622,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 2.986513614654541,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.4424,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 2.666304111480713,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.5415,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 1.9997386932373047,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.4661,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 1.9241174459457397,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.3708,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 3.7665395736694336,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.55,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 3.948091983795166,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.8679,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 5.765527248382568,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.7642,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 4.300235271453857,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.8016,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 3.0772464275360107,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.6961,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 2.6090457439422607,
350
+ "learning_rate": 2e-05,
351
+ "loss": 0.2739,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 8.003314018249512,
357
+ "learning_rate": 2e-05,
358
+ "loss": 1.1299,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 5694661670731776.0,
365
+ "train_loss": 0.7740931415557861,
366
+ "train_runtime": 109.547,
367
+ "train_samples_per_second": 3.651,
368
+ "train_steps_per_second": 0.913
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 5694661670731776.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a764a4e8709390732d62e78d72e55d509301d9aa9c718c1011f8b52fa63d8a5
3
+ size 395787774
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54b3ab1a6455447fed4f76fcba7124073ea0fe24aac388e398ecf28fc95c214c
3
+ size 395787774
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0145c9d4e23a0839b6206582913016c8edf7581075b67b94b9c4c87e1753b746
3
+ size 395787774
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ef935d710f91d11a6434902130857ec1b4970fe68b4327e8f8911fc4be7e1ac
3
+ size 395787774
client_states_fedMultipqfullfreezeB_sft_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5d16c10719a5f81175aeea9c96393e87f7f0cd11c784297d0a9155891b8b21c
3
+ size 395786922