thkim0305 commited on
Commit
90c20dc
·
verified ·
1 Parent(s): f1e4490

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round10.pth +3 -0
  2. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round12.pth +3 -0
  3. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round15.pth +3 -0
  4. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round17.pth +3 -0
  5. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round2.pth +3 -0
  6. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round20.pth +3 -0
  7. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round5.pth +3 -0
  8. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round7.pth +3 -0
  9. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_trainer_state.json +364 -0
  10. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round10.pth +3 -0
  11. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round12.pth +3 -0
  12. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round15.pth +3 -0
  13. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round17.pth +3 -0
  14. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round2.pth +3 -0
  15. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round20.pth +3 -0
  16. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round5.pth +3 -0
  17. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round7.pth +3 -0
  18. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_trainer_state.json +364 -0
  19. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round10.pth +3 -0
  20. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round12.pth +3 -0
  21. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round15.pth +3 -0
  22. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round17.pth +3 -0
  23. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round2.pth +3 -0
  24. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round20.pth +3 -0
  25. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round5.pth +3 -0
  26. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round7.pth +3 -0
  27. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_trainer_state.json +364 -0
  28. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round10.pth +3 -0
  29. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round12.pth +3 -0
  30. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round15.pth +3 -0
  31. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round17.pth +3 -0
  32. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round2.pth +3 -0
  33. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round20.pth +3 -0
  34. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round5.pth +3 -0
  35. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round7.pth +3 -0
  36. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_trainer_state.json +364 -0
  37. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round10.pth +3 -0
  38. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round12.pth +3 -0
  39. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round15.pth +3 -0
  40. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round17.pth +3 -0
  41. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round2.pth +3 -0
  42. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round20.pth +3 -0
  43. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round5.pth +3 -0
  44. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round7.pth +3 -0
  45. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_trainer_state.json +364 -0
  46. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/5_client_model_round10.pth +3 -0
  47. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/5_client_model_round12.pth +3 -0
  48. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/5_client_model_round15.pth +3 -0
  49. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/5_client_model_round17.pth +3 -0
  50. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/5_client_model_round2.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:857e6d421f7321c12fe753e5d706f2fed3c56cd89f87855a28a6dd7d8fd927af
3
+ size 369838470
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea0b0777d782865d3402d1597c627ad09d836fbfc86ec5346b2269a5db864c8a
3
+ size 369838470
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:502cb07864fd3153066c9bb6e916b9a3d542c551fbd8df4e1f23c5c2a2eb74f6
3
+ size 369838470
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37d161c3bdf3434da4627ca3c8c465d70c023552bd189055da3a9c1611ae3232
3
+ size 369838470
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8abef3ac8894468d37e796f4790964bffef032cdd9365e7448f55a72c4ad1c67
3
+ size 369837282
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:763351182d6a05485fa69a698f1f7062d6104cb6111340d7d7abe32fa64ad8a3
3
+ size 369838470
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:467f699937a5cd7720442e09e5b47ea3eaf77b4604e673f05ec68159a694011b
3
+ size 369837282
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:550bfb4401128e0b71618eafdc31c528052668b811e502b8814e617de6aee7d2
3
+ size 369837282
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_trainer_state.json ADDED
@@ -0,0 +1,364 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 92,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.021739130434782608,
13
+ "grad_norm": 6.09108304977417,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.5907,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.043478260869565216,
20
+ "grad_norm": 3.6769254207611084,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.9602,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06521739130434782,
27
+ "grad_norm": 0.5363268256187439,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.036,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08695652173913043,
34
+ "grad_norm": 0.43710896372795105,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.1849,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10869565217391304,
41
+ "grad_norm": 2.068805694580078,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.1805,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.13043478260869565,
48
+ "grad_norm": 1.2317078113555908,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.8115,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.15217391304347827,
55
+ "grad_norm": 2.86909818649292,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.3496,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.17391304347826086,
62
+ "grad_norm": 1.2622524499893188,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.2863,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.1956521739130435,
69
+ "grad_norm": 4.501521587371826,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.2824,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.21739130434782608,
76
+ "grad_norm": 3.231552839279175,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.2514,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2391304347826087,
83
+ "grad_norm": 1.5159425735473633,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.4057,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.2608695652173913,
90
+ "grad_norm": 7.557904243469238,
91
+ "learning_rate": 2e-05,
92
+ "loss": 2.476,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.2826086956521739,
97
+ "grad_norm": 2.5409963130950928,
98
+ "learning_rate": 2e-05,
99
+ "loss": 1.2994,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.30434782608695654,
104
+ "grad_norm": 0.06883285939693451,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.0905,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.32608695652173914,
111
+ "grad_norm": 3.767472267150879,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.5633,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.34782608695652173,
118
+ "grad_norm": 2.7977592945098877,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.1898,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.3695652173913043,
125
+ "grad_norm": 2.504427194595337,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.7027,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.391304347826087,
132
+ "grad_norm": 0.23567438125610352,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.0205,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.41304347826086957,
139
+ "grad_norm": 6.038938522338867,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.9922,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.43478260869565216,
146
+ "grad_norm": 3.7713494300842285,
147
+ "learning_rate": 2e-05,
148
+ "loss": 1.2868,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.45652173913043476,
153
+ "grad_norm": 1.8124828338623047,
154
+ "learning_rate": 2e-05,
155
+ "loss": 1.7291,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4782608695652174,
160
+ "grad_norm": 5.478545665740967,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.7223,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.5,
167
+ "grad_norm": 0.34744882583618164,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.3988,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.5217391304347826,
174
+ "grad_norm": 1.2364314794540405,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.114,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5434782608695652,
181
+ "grad_norm": 5.041813850402832,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.668,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5652173913043478,
188
+ "grad_norm": 7.402784824371338,
189
+ "learning_rate": 2e-05,
190
+ "loss": 1.1368,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5869565217391305,
195
+ "grad_norm": 0.8266273736953735,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.0698,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.6086956521739131,
202
+ "grad_norm": 2.409656286239624,
203
+ "learning_rate": 2e-05,
204
+ "loss": 1.1547,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.6304347826086957,
209
+ "grad_norm": 0.29032644629478455,
210
+ "learning_rate": 2e-05,
211
+ "loss": 1.6191,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6521739130434783,
216
+ "grad_norm": 2.472151517868042,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.2694,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6739130434782609,
223
+ "grad_norm": 3.610250473022461,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.82,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6956521739130435,
230
+ "grad_norm": 1.7341822385787964,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.6236,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.717391304347826,
237
+ "grad_norm": 0.2348906695842743,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.3063,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7391304347826086,
244
+ "grad_norm": 4.811798572540283,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.9939,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7608695652173914,
251
+ "grad_norm": 1.6772363185882568,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.1621,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.782608695652174,
258
+ "grad_norm": 5.120459079742432,
259
+ "learning_rate": 2e-05,
260
+ "loss": 1.1419,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.8043478260869565,
265
+ "grad_norm": 0.06929455697536469,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.2167,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.8260869565217391,
272
+ "grad_norm": 2.0615053176879883,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.2673,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8478260869565217,
279
+ "grad_norm": 2.242427110671997,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.1578,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8695652173913043,
286
+ "grad_norm": 4.344549179077148,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.953,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.8913043478260869,
293
+ "grad_norm": 3.4479851722717285,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.5008,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.9130434782608695,
300
+ "grad_norm": 4.703179836273193,
301
+ "learning_rate": 2e-05,
302
+ "loss": 1.2355,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.9347826086956522,
307
+ "grad_norm": 3.625882387161255,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.7881,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9565217391304348,
314
+ "grad_norm": 0.7072364687919617,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.5925,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9782608695652174,
321
+ "grad_norm": 1.2554280757904053,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.2273,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 1.0,
328
+ "grad_norm": 2.71370530128479,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.1592,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 1.0,
335
+ "step": 92,
336
+ "total_flos": 2016868504174592.0,
337
+ "train_loss": 0.6301771117293317,
338
+ "train_runtime": 196.5079,
339
+ "train_samples_per_second": 1.873,
340
+ "train_steps_per_second": 0.468
341
+ }
342
+ ],
343
+ "logging_steps": 2,
344
+ "max_steps": 92,
345
+ "num_input_tokens_seen": 0,
346
+ "num_train_epochs": 1,
347
+ "save_steps": 500,
348
+ "stateful_callbacks": {
349
+ "TrainerControl": {
350
+ "args": {
351
+ "should_epoch_stop": false,
352
+ "should_evaluate": false,
353
+ "should_log": false,
354
+ "should_save": false,
355
+ "should_training_stop": false
356
+ },
357
+ "attributes": {}
358
+ }
359
+ },
360
+ "total_flos": 2016868504174592.0,
361
+ "train_batch_size": 1,
362
+ "trial_name": null,
363
+ "trial_params": null
364
+ }
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84b137fdf81538eabaa3a8e44061ba978549e8e0f750d39ed8894884abd24e61
3
+ size 369838470
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5af89fb7691ce8c2200477f096473849c5320b579dcfc6ca95024281e3a266b6
3
+ size 369838470
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81ab300618708290e4d43828e38b50877edf7bb3edc2f66910325001c1ec26db
3
+ size 369838470
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39e76d8d36cfbaa81321d35f83914bc805f951764c7c8c1eb2d9f7d77ef7a239
3
+ size 369838470
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a31502af9bb7a21b0bb183396d7c5a09d3ac8fa8bba158a9f2309a13a95dd8c2
3
+ size 369837282
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cd06ed2f69855ac79ade36194b719695cb03195a47d8ad3db0bdd40e66a1324
3
+ size 369838470
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4f896f688718a2aa7fcab54b474461aa6bcd4c8d2592b4f7acc3ce76a3f131a
3
+ size 369837282
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:670739b0e450b22b0ba067dd558ccb7491d152313a78fa9691de3f1658719099
3
+ size 369837282
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_trainer_state.json ADDED
@@ -0,0 +1,364 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 92,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.021739130434782608,
13
+ "grad_norm": 0.17322731018066406,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.007,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.043478260869565216,
20
+ "grad_norm": 0.4034227430820465,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.0669,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06521739130434782,
27
+ "grad_norm": 0.047655943781137466,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.014,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08695652173913043,
34
+ "grad_norm": 0.029691645875573158,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.0985,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10869565217391304,
41
+ "grad_norm": 0.0031738663092255592,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.0029,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.13043478260869565,
48
+ "grad_norm": 0.7081104516983032,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.1851,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.15217391304347827,
55
+ "grad_norm": 0.41896992921829224,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.0237,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.17391304347826086,
62
+ "grad_norm": 0.48387590050697327,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.0193,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.1956521739130435,
69
+ "grad_norm": 0.0010938448831439018,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.5725,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.21739130434782608,
76
+ "grad_norm": 0.1128813624382019,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.0055,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2391304347826087,
83
+ "grad_norm": 0.004215009044855833,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.0004,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.2608695652173913,
90
+ "grad_norm": 0.053580403327941895,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.0041,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.2826086956521739,
97
+ "grad_norm": 0.08771532028913498,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.0955,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.30434782608695654,
104
+ "grad_norm": 0.4550749361515045,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.0213,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.32608695652173914,
111
+ "grad_norm": 2.2826590538024902,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.22,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.34782608695652173,
118
+ "grad_norm": 0.0036860036198049784,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.0015,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.3695652173913043,
125
+ "grad_norm": 0.7332060933113098,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.0539,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.391304347826087,
132
+ "grad_norm": 0.15167345106601715,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.1528,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.41304347826086957,
139
+ "grad_norm": 0.0022186979185789824,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.0634,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.43478260869565216,
146
+ "grad_norm": 6.4484453201293945,
147
+ "learning_rate": 2e-05,
148
+ "loss": 1.7515,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.45652173913043476,
153
+ "grad_norm": 0.00416164705529809,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.0016,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4782608695652174,
160
+ "grad_norm": 0.034967318177223206,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.0752,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.5,
167
+ "grad_norm": 0.011398707516491413,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.0103,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.5217391304347826,
174
+ "grad_norm": 0.8181813955307007,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.0635,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5434782608695652,
181
+ "grad_norm": 0.030382607132196426,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.0022,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5652173913043478,
188
+ "grad_norm": 0.005472751799970865,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.4915,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5869565217391305,
195
+ "grad_norm": 0.012804504483938217,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.0789,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.6086956521739131,
202
+ "grad_norm": 0.5662734508514404,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.1313,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.6304347826086957,
209
+ "grad_norm": 0.15565143525600433,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.1838,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6521739130434783,
216
+ "grad_norm": 0.03305691480636597,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.0302,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6739130434782609,
223
+ "grad_norm": 0.11561785638332367,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.0105,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6956521739130435,
230
+ "grad_norm": 0.01182617712765932,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.0043,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.717391304347826,
237
+ "grad_norm": 0.04873761162161827,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.0059,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7391304347826086,
244
+ "grad_norm": 0.07617998868227005,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.0141,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7608695652173914,
251
+ "grad_norm": 0.8835837244987488,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.0333,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.782608695652174,
258
+ "grad_norm": 0.7749724388122559,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.0851,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.8043478260869565,
265
+ "grad_norm": 0.3363087475299835,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.027,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.8260869565217391,
272
+ "grad_norm": 0.0710146352648735,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.0093,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8478260869565217,
279
+ "grad_norm": 0.015138098038733006,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.0216,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8695652173913043,
286
+ "grad_norm": 0.19736772775650024,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.0327,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.8913043478260869,
293
+ "grad_norm": 0.05040005221962929,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.0043,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.9130434782608695,
300
+ "grad_norm": 0.3938082754611969,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.1232,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.9347826086956522,
307
+ "grad_norm": 2.0402262210845947,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.1115,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9565217391304348,
314
+ "grad_norm": 1.9586436748504639,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.2655,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9782608695652174,
321
+ "grad_norm": 2.773951292037964,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.3761,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 1.0,
328
+ "grad_norm": 0.30637216567993164,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.0126,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 1.0,
335
+ "step": 92,
336
+ "total_flos": 2032950489120768.0,
337
+ "train_loss": 0.12097753487203432,
338
+ "train_runtime": 198.5545,
339
+ "train_samples_per_second": 1.853,
340
+ "train_steps_per_second": 0.463
341
+ }
342
+ ],
343
+ "logging_steps": 2,
344
+ "max_steps": 92,
345
+ "num_input_tokens_seen": 0,
346
+ "num_train_epochs": 1,
347
+ "save_steps": 500,
348
+ "stateful_callbacks": {
349
+ "TrainerControl": {
350
+ "args": {
351
+ "should_epoch_stop": false,
352
+ "should_evaluate": false,
353
+ "should_log": false,
354
+ "should_save": false,
355
+ "should_training_stop": false
356
+ },
357
+ "attributes": {}
358
+ }
359
+ },
360
+ "total_flos": 2032950489120768.0,
361
+ "train_batch_size": 1,
362
+ "trial_name": null,
363
+ "trial_params": null
364
+ }
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:040df4945fad36c157df705c9dbfc923e8eec981105698ce5843af65c6ae16c0
3
+ size 794708086
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a3a3c0e528a95b0bb130571fcf86d9981186ed8aa94b4c3e14222e3b47b0d1f
3
+ size 794708086
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:638b86ad8e0f9afc4f3f1f52d89012e751c1e703f58c00c8fccfb31bea890f61
3
+ size 794708086
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43a7bcebed8e96b052f66787b8fb5ab76be240ba9a8712336fd509c4cb800365
3
+ size 794708086
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a42d516569f5ded0c385f9bbb5547c1cbb1d59eb9434227156f26fcb67f784c
3
+ size 794706058
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8eaf002e494f4431cf9c54a6356b86639236ef8d3a4e777d4bf5edcba081c20f
3
+ size 794708086
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0f285103af3edf0700ce58157257601eb492e783732711a4937f09ef70cafba
3
+ size 794706058
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d312a45706dcde5f13aa89b36bf97e835fd187df9df75d67658e7cdbd1ed075b
3
+ size 794706058
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_trainer_state.json ADDED
@@ -0,0 +1,364 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 92,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.021739130434782608,
13
+ "grad_norm": 1.1296067237854004,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.3841,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.043478260869565216,
20
+ "grad_norm": 1.9366317987442017,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.4115,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06521739130434782,
27
+ "grad_norm": 4.050197124481201,
28
+ "learning_rate": 2e-05,
29
+ "loss": 1.8967,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08695652173913043,
34
+ "grad_norm": 0.7215381264686584,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.8096,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10869565217391304,
41
+ "grad_norm": 0.4154060184955597,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.2618,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.13043478260869565,
48
+ "grad_norm": 1.4991834163665771,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.731,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.15217391304347827,
55
+ "grad_norm": 0.3260190188884735,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.5441,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.17391304347826086,
62
+ "grad_norm": 0.7272593379020691,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.4451,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.1956521739130435,
69
+ "grad_norm": 1.0374034643173218,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.2988,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.21739130434782608,
76
+ "grad_norm": 1.367766261100769,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.3249,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2391304347826087,
83
+ "grad_norm": 1.4096276760101318,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.3398,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.2608695652173913,
90
+ "grad_norm": 0.802492082118988,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.5804,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.2826086956521739,
97
+ "grad_norm": 2.32653546333313,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.4343,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.30434782608695654,
104
+ "grad_norm": 0.14287668466567993,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.0477,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.32608695652173914,
111
+ "grad_norm": 0.8296651244163513,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.6951,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.34782608695652173,
118
+ "grad_norm": 0.9478865265846252,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.3354,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.3695652173913043,
125
+ "grad_norm": 2.4494006633758545,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.7025,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.391304347826087,
132
+ "grad_norm": 0.08148877322673798,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.1202,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.41304347826086957,
139
+ "grad_norm": 1.3430149555206299,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.4278,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.43478260869565216,
146
+ "grad_norm": 0.4023633599281311,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.4105,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.45652173913043476,
153
+ "grad_norm": 1.502210259437561,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.6282,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4782608695652174,
160
+ "grad_norm": 0.5279916524887085,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.1886,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.5,
167
+ "grad_norm": 0.4618723690509796,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.1187,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.5217391304347826,
174
+ "grad_norm": 1.1924479007720947,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.4895,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5434782608695652,
181
+ "grad_norm": 1.406653881072998,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.462,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5652173913043478,
188
+ "grad_norm": 1.7467128038406372,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.4685,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5869565217391305,
195
+ "grad_norm": 1.0591291189193726,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.7101,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.6086956521739131,
202
+ "grad_norm": 1.112949013710022,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.3761,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.6304347826086957,
209
+ "grad_norm": 2.589237689971924,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.8964,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6521739130434783,
216
+ "grad_norm": 1.2493059635162354,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.7433,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6739130434782609,
223
+ "grad_norm": 1.9061592817306519,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.7149,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6956521739130435,
230
+ "grad_norm": 2.45300555229187,
231
+ "learning_rate": 2e-05,
232
+ "loss": 1.4371,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.717391304347826,
237
+ "grad_norm": 2.228710651397705,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.6653,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7391304347826086,
244
+ "grad_norm": 0.8008307218551636,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.1514,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7608695652173914,
251
+ "grad_norm": 0.01986389420926571,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.2807,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.782608695652174,
258
+ "grad_norm": 1.2030994892120361,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.2121,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.8043478260869565,
265
+ "grad_norm": 0.6025874614715576,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.1727,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.8260869565217391,
272
+ "grad_norm": 0.3984823524951935,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.1357,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8478260869565217,
279
+ "grad_norm": 0.6921195387840271,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.2836,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8695652173913043,
286
+ "grad_norm": 0.04382755234837532,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.5031,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.8913043478260869,
293
+ "grad_norm": 1.5162211656570435,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.3304,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.9130434782608695,
300
+ "grad_norm": 0.6259192824363708,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.1296,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.9347826086956522,
307
+ "grad_norm": 1.5014652013778687,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.6724,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9565217391304348,
314
+ "grad_norm": 4.528606414794922,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.5261,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9782608695652174,
321
+ "grad_norm": 0.15331390500068665,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.0363,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 1.0,
328
+ "grad_norm": 1.2000925540924072,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.3445,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 1.0,
335
+ "step": 92,
336
+ "total_flos": 4792956951199744.0,
337
+ "train_loss": 0.4756195545196533,
338
+ "train_runtime": 313.428,
339
+ "train_samples_per_second": 1.174,
340
+ "train_steps_per_second": 0.294
341
+ }
342
+ ],
343
+ "logging_steps": 2,
344
+ "max_steps": 92,
345
+ "num_input_tokens_seen": 0,
346
+ "num_train_epochs": 1,
347
+ "save_steps": 500,
348
+ "stateful_callbacks": {
349
+ "TrainerControl": {
350
+ "args": {
351
+ "should_epoch_stop": false,
352
+ "should_evaluate": false,
353
+ "should_log": false,
354
+ "should_save": false,
355
+ "should_training_stop": false
356
+ },
357
+ "attributes": {}
358
+ }
359
+ },
360
+ "total_flos": 4792956951199744.0,
361
+ "train_batch_size": 1,
362
+ "trial_name": null,
363
+ "trial_params": null
364
+ }
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:968963aca4a3d7d2ca1d2d8db5c362658cefa72d304a3ec022ac25e67d846a42
3
+ size 369838470
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccfffb49d254686ff927002aa256e455a85c432e344013b10a52c16c83f78700
3
+ size 369838470
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fa459bea49cae2643832fcda9ee0a85fbfd03857a1741def05d71736fb5f422
3
+ size 369838470
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0f5215d3edcca5d04b119de86073ce299e15e1ce3cb8b283190c7eb1c4855d0
3
+ size 369838470
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b6d7e4c9ee160535596a15f0a9cdd14bbec4e79a90e4e93100c6c114348793f
3
+ size 369837282
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:572561376a76a93533269e55e42451d11cac7a33012fe74b4310f133756afd33
3
+ size 369838470
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38317de6bb0584ebac545a2faf4129d6e2e417a38865720382aa265ca7f16623
3
+ size 369837282
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dd53b66a9fcdd90b3829d6e14befc65adc080086aceec88646c1135d1a0efd9
3
+ size 369837282
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_trainer_state.json ADDED
@@ -0,0 +1,364 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 92,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.021739130434782608,
13
+ "grad_norm": 4.441051006317139,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.992,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.043478260869565216,
20
+ "grad_norm": 3.2609739303588867,
21
+ "learning_rate": 2e-05,
22
+ "loss": 1.2689,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06521739130434782,
27
+ "grad_norm": 3.7552318572998047,
28
+ "learning_rate": 2e-05,
29
+ "loss": 1.1606,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08695652173913043,
34
+ "grad_norm": 3.9538447856903076,
35
+ "learning_rate": 2e-05,
36
+ "loss": 1.2226,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10869565217391304,
41
+ "grad_norm": 7.879010200500488,
42
+ "learning_rate": 2e-05,
43
+ "loss": 1.7328,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.13043478260869565,
48
+ "grad_norm": 3.853842258453369,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.6685,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.15217391304347827,
55
+ "grad_norm": 2.0626301765441895,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.6801,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.17391304347826086,
62
+ "grad_norm": 7.179374694824219,
63
+ "learning_rate": 2e-05,
64
+ "loss": 1.4164,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.1956521739130435,
69
+ "grad_norm": 1.8957184553146362,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.7125,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.21739130434782608,
76
+ "grad_norm": 3.1610910892486572,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.5239,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2391304347826087,
83
+ "grad_norm": 4.4256672859191895,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.5069,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.2608695652173913,
90
+ "grad_norm": 4.9362335205078125,
91
+ "learning_rate": 2e-05,
92
+ "loss": 1.8549,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.2826086956521739,
97
+ "grad_norm": 4.253013610839844,
98
+ "learning_rate": 2e-05,
99
+ "loss": 1.0589,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.30434782608695654,
104
+ "grad_norm": 4.3326826095581055,
105
+ "learning_rate": 2e-05,
106
+ "loss": 1.2678,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.32608695652173914,
111
+ "grad_norm": 6.773116111755371,
112
+ "learning_rate": 2e-05,
113
+ "loss": 1.9853,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.34782608695652173,
118
+ "grad_norm": 7.467095851898193,
119
+ "learning_rate": 2e-05,
120
+ "loss": 1.3477,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.3695652173913043,
125
+ "grad_norm": 0.6017383933067322,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.3416,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.391304347826087,
132
+ "grad_norm": 2.026484966278076,
133
+ "learning_rate": 2e-05,
134
+ "loss": 1.4528,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.41304347826086957,
139
+ "grad_norm": 1.9307522773742676,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.5631,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.43478260869565216,
146
+ "grad_norm": 3.334146499633789,
147
+ "learning_rate": 2e-05,
148
+ "loss": 1.4508,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.45652173913043476,
153
+ "grad_norm": 1.933052659034729,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.7173,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4782608695652174,
160
+ "grad_norm": 3.3551347255706787,
161
+ "learning_rate": 2e-05,
162
+ "loss": 1.3031,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.5,
167
+ "grad_norm": 2.526327133178711,
168
+ "learning_rate": 2e-05,
169
+ "loss": 1.3352,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.5217391304347826,
174
+ "grad_norm": 7.738239765167236,
175
+ "learning_rate": 2e-05,
176
+ "loss": 3.587,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5434782608695652,
181
+ "grad_norm": 1.777616024017334,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.6968,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5652173913043478,
188
+ "grad_norm": 4.613940715789795,
189
+ "learning_rate": 2e-05,
190
+ "loss": 1.8354,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5869565217391305,
195
+ "grad_norm": 4.080211162567139,
196
+ "learning_rate": 2e-05,
197
+ "loss": 1.2648,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.6086956521739131,
202
+ "grad_norm": 6.371545314788818,
203
+ "learning_rate": 2e-05,
204
+ "loss": 1.6354,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.6304347826086957,
209
+ "grad_norm": 6.669529914855957,
210
+ "learning_rate": 2e-05,
211
+ "loss": 2.2418,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6521739130434783,
216
+ "grad_norm": 3.0953638553619385,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.9421,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6739130434782609,
223
+ "grad_norm": 3.3901684284210205,
224
+ "learning_rate": 2e-05,
225
+ "loss": 1.4606,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6956521739130435,
230
+ "grad_norm": 3.4849743843078613,
231
+ "learning_rate": 2e-05,
232
+ "loss": 1.2765,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.717391304347826,
237
+ "grad_norm": 5.219615936279297,
238
+ "learning_rate": 2e-05,
239
+ "loss": 1.4114,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7391304347826086,
244
+ "grad_norm": 2.6195218563079834,
245
+ "learning_rate": 2e-05,
246
+ "loss": 1.3279,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7608695652173914,
251
+ "grad_norm": 3.040009021759033,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.8641,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.782608695652174,
258
+ "grad_norm": 6.53596305847168,
259
+ "learning_rate": 2e-05,
260
+ "loss": 2.4763,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.8043478260869565,
265
+ "grad_norm": 2.496731996536255,
266
+ "learning_rate": 2e-05,
267
+ "loss": 1.1084,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.8260869565217391,
272
+ "grad_norm": 2.0288877487182617,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.5153,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8478260869565217,
279
+ "grad_norm": 1.2743315696716309,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.3963,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8695652173913043,
286
+ "grad_norm": 3.1155552864074707,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.6715,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.8913043478260869,
293
+ "grad_norm": 2.91237735748291,
294
+ "learning_rate": 2e-05,
295
+ "loss": 1.1998,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.9130434782608695,
300
+ "grad_norm": 3.1403515338897705,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.813,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.9347826086956522,
307
+ "grad_norm": 1.0142074823379517,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.8541,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9565217391304348,
314
+ "grad_norm": 1.7990295886993408,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.5481,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9782608695652174,
321
+ "grad_norm": 4.208423137664795,
322
+ "learning_rate": 2e-05,
323
+ "loss": 1.8142,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 1.0,
328
+ "grad_norm": 2.21415376663208,
329
+ "learning_rate": 2e-05,
330
+ "loss": 1.222,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 1.0,
335
+ "step": 92,
336
+ "total_flos": 2060088319672320.0,
337
+ "train_loss": 1.2114470782487288,
338
+ "train_runtime": 196.2309,
339
+ "train_samples_per_second": 1.875,
340
+ "train_steps_per_second": 0.469
341
+ }
342
+ ],
343
+ "logging_steps": 2,
344
+ "max_steps": 92,
345
+ "num_input_tokens_seen": 0,
346
+ "num_train_epochs": 1,
347
+ "save_steps": 500,
348
+ "stateful_callbacks": {
349
+ "TrainerControl": {
350
+ "args": {
351
+ "should_epoch_stop": false,
352
+ "should_evaluate": false,
353
+ "should_log": false,
354
+ "should_save": false,
355
+ "should_training_stop": false
356
+ },
357
+ "attributes": {}
358
+ }
359
+ },
360
+ "total_flos": 2060088319672320.0,
361
+ "train_batch_size": 1,
362
+ "trial_name": null,
363
+ "trial_params": null
364
+ }
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b79aaba74229c5b0d837e9ac5cedbe36e425f1a29335f5049a310c762e78dd2
3
+ size 794708086
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bf24bd98c3ffdea50275e527e934666eb42b753a6b12ec775ef20bbeea84ede
3
+ size 794708086
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8404bf0fef92715cc44f4e710595ba6b0457a3a2811d23fbe0f7780ce76d7378
3
+ size 794708086
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc2f3efc4cf5e0f1fb8cdce98b0704b7c0fa4142fca915fe3774971780347018
3
+ size 794708086
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c807d9baa99dede81399aef0bd4796381e8535cb504ab1e1313f129796d2160f
3
+ size 794706058
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac48c818c303fe23985c88e295a9d20b095e81c95cb5e49ee64351965035a180
3
+ size 794708086
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca99b55c7f846d80aaf5a644fa43d442cef1b955557d24a529a0d0a46a9aa4c7
3
+ size 794706058
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32aa7feb2e6e1e2ba21daefc38b7338c37000c71b139c325d5b0b9d267ae8415
3
+ size 794706058
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_trainer_state.json ADDED
@@ -0,0 +1,364 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 92,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.021739130434782608,
13
+ "grad_norm": 1.7172458171844482,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.9706,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.043478260869565216,
20
+ "grad_norm": 1.535640001296997,
21
+ "learning_rate": 2e-05,
22
+ "loss": 1.0299,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06521739130434782,
27
+ "grad_norm": 1.8416475057601929,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.5077,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08695652173913043,
34
+ "grad_norm": 1.3889185190200806,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.6756,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10869565217391304,
41
+ "grad_norm": 0.3524916470050812,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.6578,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.13043478260869565,
48
+ "grad_norm": 0.24557974934577942,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.245,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.15217391304347827,
55
+ "grad_norm": 1.0801588296890259,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.4678,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.17391304347826086,
62
+ "grad_norm": 0.6954711675643921,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.357,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.1956521739130435,
69
+ "grad_norm": 0.6120344996452332,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.3746,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.21739130434782608,
76
+ "grad_norm": 0.9757769703865051,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.3461,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2391304347826087,
83
+ "grad_norm": 1.1557114124298096,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.3997,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.2608695652173913,
90
+ "grad_norm": 1.616867184638977,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.8107,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.2826086956521739,
97
+ "grad_norm": 0.6354851722717285,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.8208,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.30434782608695654,
104
+ "grad_norm": 1.0514754056930542,
105
+ "learning_rate": 2e-05,
106
+ "loss": 1.3612,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.32608695652173914,
111
+ "grad_norm": 1.695558786392212,
112
+ "learning_rate": 2e-05,
113
+ "loss": 1.472,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.34782608695652173,
118
+ "grad_norm": 1.0636487007141113,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.2386,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.3695652173913043,
125
+ "grad_norm": 0.2803816795349121,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.1083,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.391304347826087,
132
+ "grad_norm": 0.7646946310997009,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.9315,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.41304347826086957,
139
+ "grad_norm": 2.3936870098114014,
140
+ "learning_rate": 2e-05,
141
+ "loss": 1.3936,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.43478260869565216,
146
+ "grad_norm": 2.638190269470215,
147
+ "learning_rate": 2e-05,
148
+ "loss": 1.1179,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.45652173913043476,
153
+ "grad_norm": 0.38522782921791077,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.6251,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4782608695652174,
160
+ "grad_norm": 1.31108820438385,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.2963,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.5,
167
+ "grad_norm": 0.9962872266769409,
168
+ "learning_rate": 2e-05,
169
+ "loss": 1.1005,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.5217391304347826,
174
+ "grad_norm": 0.7223103046417236,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.3556,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5434782608695652,
181
+ "grad_norm": 0.9148542881011963,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.7949,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5652173913043478,
188
+ "grad_norm": 0.9974550008773804,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.383,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5869565217391305,
195
+ "grad_norm": 0.21446958184242249,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.2946,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.6086956521739131,
202
+ "grad_norm": 3.7416319847106934,
203
+ "learning_rate": 2e-05,
204
+ "loss": 1.4213,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.6304347826086957,
209
+ "grad_norm": 0.9001378417015076,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.8664,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6521739130434783,
216
+ "grad_norm": 1.5950777530670166,
217
+ "learning_rate": 2e-05,
218
+ "loss": 1.1924,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6739130434782609,
223
+ "grad_norm": 0.8221089839935303,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.6193,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6956521739130435,
230
+ "grad_norm": 1.6373211145401,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.7243,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.717391304347826,
237
+ "grad_norm": 1.0375702381134033,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.7575,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7391304347826086,
244
+ "grad_norm": 1.2386682033538818,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.6053,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7608695652173914,
251
+ "grad_norm": 0.5664445161819458,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.6014,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.782608695652174,
258
+ "grad_norm": 3.124638080596924,
259
+ "learning_rate": 2e-05,
260
+ "loss": 1.3379,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.8043478260869565,
265
+ "grad_norm": 1.7098889350891113,
266
+ "learning_rate": 2e-05,
267
+ "loss": 1.0894,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.8260869565217391,
272
+ "grad_norm": 0.9901090264320374,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.4407,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8478260869565217,
279
+ "grad_norm": 1.7773938179016113,
280
+ "learning_rate": 2e-05,
281
+ "loss": 1.1619,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8695652173913043,
286
+ "grad_norm": 1.735460877418518,
287
+ "learning_rate": 2e-05,
288
+ "loss": 1.5605,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.8913043478260869,
293
+ "grad_norm": 0.8822594285011292,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.466,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.9130434782608695,
300
+ "grad_norm": 1.8246649503707886,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.6933,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.9347826086956522,
307
+ "grad_norm": 1.136141300201416,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.4576,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9565217391304348,
314
+ "grad_norm": 0.6676300764083862,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.7428,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9782608695652174,
321
+ "grad_norm": 1.731764793395996,
322
+ "learning_rate": 2e-05,
323
+ "loss": 1.3401,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 1.0,
328
+ "grad_norm": 0.46977347135543823,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.2042,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 1.0,
335
+ "step": 92,
336
+ "total_flos": 5551594087120896.0,
337
+ "train_loss": 0.7482347903044327,
338
+ "train_runtime": 314.3339,
339
+ "train_samples_per_second": 1.171,
340
+ "train_steps_per_second": 0.293
341
+ }
342
+ ],
343
+ "logging_steps": 2,
344
+ "max_steps": 92,
345
+ "num_input_tokens_seen": 0,
346
+ "num_train_epochs": 1,
347
+ "save_steps": 500,
348
+ "stateful_callbacks": {
349
+ "TrainerControl": {
350
+ "args": {
351
+ "should_epoch_stop": false,
352
+ "should_evaluate": false,
353
+ "should_log": false,
354
+ "should_save": false,
355
+ "should_training_stop": false
356
+ },
357
+ "attributes": {}
358
+ }
359
+ },
360
+ "total_flos": 5551594087120896.0,
361
+ "train_batch_size": 1,
362
+ "trial_name": null,
363
+ "trial_params": null
364
+ }
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/5_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b420870bc6bd7ab2d962e17aa98541dbfd84dee5188b2fbfbbb0a856791b89f7
3
+ size 369838470
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/5_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cbb8b4939b5f8948cd443b04095ca9d6da1951e0afa830ba0736d8e8486874c
3
+ size 369838470
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/5_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:710d51c728f923f61a4728c0dd5f92ff6de0fa172d9f28b54d18b73b8e53e928
3
+ size 369838470
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/5_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf43931ab46a6c71ef5ef5e106a93a987757d773d71308ac150b36cbefe90409
3
+ size 369838470
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/5_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8a3fad06f1e5ecbc439bd3b8a76ec421977354e24cc59393278cc7de282701b
3
+ size 369837282