thkim0305 commited on
Commit
90cd68a
·
verified ·
1 Parent(s): c637e6c

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round10.pth +3 -0
  2. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round12.pth +3 -0
  3. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round15.pth +3 -0
  4. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round17.pth +3 -0
  5. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round2.pth +3 -0
  6. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round20.pth +3 -0
  7. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round5.pth +3 -0
  8. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round7.pth +3 -0
  9. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_trainer_state.json +140 -0
  10. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round10.pth +3 -0
  11. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round12.pth +3 -0
  12. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round15.pth +3 -0
  13. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round17.pth +3 -0
  14. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round2.pth +3 -0
  15. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round20.pth +3 -0
  16. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round5.pth +3 -0
  17. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round7.pth +3 -0
  18. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_trainer_state.json +140 -0
  19. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round10.pth +3 -0
  20. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round12.pth +3 -0
  21. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round15.pth +3 -0
  22. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round17.pth +3 -0
  23. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round2.pth +3 -0
  24. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round20.pth +3 -0
  25. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round5.pth +3 -0
  26. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round7.pth +3 -0
  27. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_trainer_state.json +140 -0
  28. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round10.pth +3 -0
  29. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round12.pth +3 -0
  30. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round15.pth +3 -0
  31. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round17.pth +3 -0
  32. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round2.pth +3 -0
  33. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round20.pth +3 -0
  34. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round5.pth +3 -0
  35. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round7.pth +3 -0
  36. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_trainer_state.json +140 -0
  37. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round10.pth +3 -0
  38. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round12.pth +3 -0
  39. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round15.pth +3 -0
  40. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round17.pth +3 -0
  41. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round2.pth +3 -0
  42. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round20.pth +3 -0
  43. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round5.pth +3 -0
  44. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round7.pth +3 -0
  45. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_trainer_state.json +140 -0
  46. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/round10_task_vector_local_weights.pth +3 -0
  47. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/round11_task_vector_local_weights.pth +3 -0
  48. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/round12_task_vector_local_weights.pth +3 -0
  49. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/round13_task_vector_local_weights.pth +3 -0
  50. client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/round14_task_vector_local_weights.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:001239b9907fc929a84da95552b79946dfea7fc9afe27b564c89d394a483a6e9
3
+ size 173791494
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2881397815bee454f69c451e489341333626a12e84fa266ac20e3f94a7542a88
3
+ size 173791494
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:191f9c953727568a31bf08530d840977ef62df67bb8b347c6dba9fefdf612468
3
+ size 173791494
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f99c2ca4cf9e865585d42e4fd50f2085bbeeaaad2614d18eecd34a791cf76e8f
3
+ size 173791494
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79a85cba6ec48ca493cae08766a0e3e950fc3d2b5048f4310163dfc1a61652ec
3
+ size 173789186
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b466544c4c7434c0f93ee1cd470194f297a453de9a47d43581d66969685cbd9e
3
+ size 173791494
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a18c6a0104cb7098aa5fc6bc18debd8f4c5ca43a11f429e2e34927339d33bced
3
+ size 173789186
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbb49da2b195e0d7d665a196f33c7a88bbca800c6a935d3a0bdb651253adff4f
3
+ size 173789186
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/0_trainer_state.json ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 28,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.07142857142857142,
13
+ "grad_norm": 0.13116712868213654,
14
+ "learning_rate": 0.0003,
15
+ "loss": 0.6245,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.14285714285714285,
20
+ "grad_norm": 0.1398446410894394,
21
+ "learning_rate": 0.0003,
22
+ "loss": 0.4574,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.21428571428571427,
27
+ "grad_norm": 0.09051910042762756,
28
+ "learning_rate": 0.0003,
29
+ "loss": 0.5595,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.2857142857142857,
34
+ "grad_norm": 0.10584211349487305,
35
+ "learning_rate": 0.0003,
36
+ "loss": 0.4532,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.35714285714285715,
41
+ "grad_norm": 0.11173545569181442,
42
+ "learning_rate": 0.0003,
43
+ "loss": 0.5548,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.42857142857142855,
48
+ "grad_norm": 0.134027361869812,
49
+ "learning_rate": 0.0003,
50
+ "loss": 0.6055,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.5,
55
+ "grad_norm": 0.13683006167411804,
56
+ "learning_rate": 0.0003,
57
+ "loss": 0.5267,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.5714285714285714,
62
+ "grad_norm": 0.1731289029121399,
63
+ "learning_rate": 0.0003,
64
+ "loss": 0.5878,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.6428571428571429,
69
+ "grad_norm": 0.18668872117996216,
70
+ "learning_rate": 0.0003,
71
+ "loss": 0.4697,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.7142857142857143,
76
+ "grad_norm": 0.11167559772729874,
77
+ "learning_rate": 0.0003,
78
+ "loss": 0.5806,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.7857142857142857,
83
+ "grad_norm": 0.11734828352928162,
84
+ "learning_rate": 0.0003,
85
+ "loss": 0.3757,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.8571428571428571,
90
+ "grad_norm": 0.1058587059378624,
91
+ "learning_rate": 0.0003,
92
+ "loss": 0.4192,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.9285714285714286,
97
+ "grad_norm": 0.17954958975315094,
98
+ "learning_rate": 0.0003,
99
+ "loss": 0.6238,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.0,
104
+ "grad_norm": 0.16434884071350098,
105
+ "learning_rate": 0.0003,
106
+ "loss": 0.4551,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.0,
111
+ "step": 28,
112
+ "total_flos": 1075811478142976.0,
113
+ "train_loss": 0.5209636815956661,
114
+ "train_runtime": 99.0551,
115
+ "train_samples_per_second": 1.131,
116
+ "train_steps_per_second": 0.283
117
+ }
118
+ ],
119
+ "logging_steps": 2,
120
+ "max_steps": 28,
121
+ "num_input_tokens_seen": 0,
122
+ "num_train_epochs": 1,
123
+ "save_steps": 500,
124
+ "stateful_callbacks": {
125
+ "TrainerControl": {
126
+ "args": {
127
+ "should_epoch_stop": false,
128
+ "should_evaluate": false,
129
+ "should_log": false,
130
+ "should_save": false,
131
+ "should_training_stop": false
132
+ },
133
+ "attributes": {}
134
+ }
135
+ },
136
+ "total_flos": 1075811478142976.0,
137
+ "train_batch_size": 1,
138
+ "trial_name": null,
139
+ "trial_params": null
140
+ }
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:365692a5df7a5f517f6d2b540f561a5aa7178f9b294306095773e407b50e98be
3
+ size 101144758
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38d28ad102cbb6a4af3145e8b1f023693d94031be4a514a4c915c11c0cc07856
3
+ size 101144758
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05365c6abedcd079fe9f82c191045bdc8f6f2bd5c2b410a7e94e400d0729f563
3
+ size 101144758
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1d112a7e4976617fba84c4319e3eb552b4db4e78a171b0861e464e6c7441a76
3
+ size 101144758
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b036b5b1d8fce19fc85e292d1da4827c59b84c7edfaffa8bd2113d7c054ad7e
3
+ size 101142730
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9d085e9128c04ee7a517c1b233f482fcd4f83c364784613e318a238e486aba5
3
+ size 101144758
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d41567f2df3edc1d2bc4cc4d2160af34a30a3ed858ec87afa3c59596c30d657
3
+ size 101142730
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4158b2b5ccb23a440e32fe44fac7806d26addab849ea06cf2bf0c280f39abf5c
3
+ size 101142730
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/1_trainer_state.json ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 28,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.07142857142857142,
13
+ "grad_norm": 0.15419495105743408,
14
+ "learning_rate": 0.0003,
15
+ "loss": 1.3116,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.14285714285714285,
20
+ "grad_norm": 0.23437140882015228,
21
+ "learning_rate": 0.0003,
22
+ "loss": 1.2667,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.21428571428571427,
27
+ "grad_norm": 0.20211642980575562,
28
+ "learning_rate": 0.0003,
29
+ "loss": 2.137,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.2857142857142857,
34
+ "grad_norm": 0.14548730850219727,
35
+ "learning_rate": 0.0003,
36
+ "loss": 1.8417,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.35714285714285715,
41
+ "grad_norm": 0.25465503334999084,
42
+ "learning_rate": 0.0003,
43
+ "loss": 1.6129,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.42857142857142855,
48
+ "grad_norm": 0.19803431630134583,
49
+ "learning_rate": 0.0003,
50
+ "loss": 1.6624,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.5,
55
+ "grad_norm": 0.19455000758171082,
56
+ "learning_rate": 0.0003,
57
+ "loss": 1.7543,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.5714285714285714,
62
+ "grad_norm": 0.2205786108970642,
63
+ "learning_rate": 0.0003,
64
+ "loss": 2.2022,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.6428571428571429,
69
+ "grad_norm": 0.16035087406635284,
70
+ "learning_rate": 0.0003,
71
+ "loss": 0.672,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.7142857142857143,
76
+ "grad_norm": 0.1775389015674591,
77
+ "learning_rate": 0.0003,
78
+ "loss": 1.5621,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.7857142857142857,
83
+ "grad_norm": 0.09709464013576508,
84
+ "learning_rate": 0.0003,
85
+ "loss": 0.746,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.8571428571428571,
90
+ "grad_norm": 0.16427133977413177,
91
+ "learning_rate": 0.0003,
92
+ "loss": 0.9581,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.9285714285714286,
97
+ "grad_norm": 0.10141786187887192,
98
+ "learning_rate": 0.0003,
99
+ "loss": 1.9874,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.0,
104
+ "grad_norm": 0.11148026585578918,
105
+ "learning_rate": 0.0003,
106
+ "loss": 1.6103,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.0,
111
+ "step": 28,
112
+ "total_flos": 369994189766656.0,
113
+ "train_loss": 1.5231994943959373,
114
+ "train_runtime": 89.8548,
115
+ "train_samples_per_second": 1.246,
116
+ "train_steps_per_second": 0.312
117
+ }
118
+ ],
119
+ "logging_steps": 2,
120
+ "max_steps": 28,
121
+ "num_input_tokens_seen": 0,
122
+ "num_train_epochs": 1,
123
+ "save_steps": 500,
124
+ "stateful_callbacks": {
125
+ "TrainerControl": {
126
+ "args": {
127
+ "should_epoch_stop": false,
128
+ "should_evaluate": false,
129
+ "should_log": false,
130
+ "should_save": false,
131
+ "should_training_stop": false
132
+ },
133
+ "attributes": {}
134
+ }
135
+ },
136
+ "total_flos": 369994189766656.0,
137
+ "train_batch_size": 1,
138
+ "trial_name": null,
139
+ "trial_params": null
140
+ }
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec017853bfc82083346ec646beb8037506655fae566da6e2128f7f710687370c
3
+ size 101144758
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65f89ea8ffe86067b55dc44f4ec34ca6609878e83159f69e8f07bef05b8d40f8
3
+ size 101144758
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:940e20a3849db838fc6fefb2b889b0ef67af3d44914eeb8745f39af0a373fa34
3
+ size 101144758
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cde0f62bc84cc014a3c3ba34cd292e976c15062f063e8cd5eb544507652e20cb
3
+ size 101144758
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48e92cab729623f0c3e76bbdf0032fca52dd7cf774a41d4290fc9e3959f025f7
3
+ size 101142730
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a71f2543ca714c98b5a64959a6826814dd99810e0b6690c811cbba245422d18f
3
+ size 101144758
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c5955730454acffaa3d24ef886d967bac079c37b788581fea19ea21c19d246b
3
+ size 101142730
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8640bf9b40ae6a0e7661df0676607ce7d8b4fe37a839ff601b34f733564763af
3
+ size 101142730
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/2_trainer_state.json ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 28,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.07142857142857142,
13
+ "grad_norm": 0.13895072042942047,
14
+ "learning_rate": 0.0003,
15
+ "loss": 1.3731,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.14285714285714285,
20
+ "grad_norm": 0.1320711225271225,
21
+ "learning_rate": 0.0003,
22
+ "loss": 1.5479,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.21428571428571427,
27
+ "grad_norm": 0.16148094832897186,
28
+ "learning_rate": 0.0003,
29
+ "loss": 1.7009,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.2857142857142857,
34
+ "grad_norm": 0.12018702924251556,
35
+ "learning_rate": 0.0003,
36
+ "loss": 2.1986,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.35714285714285715,
41
+ "grad_norm": 0.20602719485759735,
42
+ "learning_rate": 0.0003,
43
+ "loss": 1.4721,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.42857142857142855,
48
+ "grad_norm": 0.2730118930339813,
49
+ "learning_rate": 0.0003,
50
+ "loss": 1.4336,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.5,
55
+ "grad_norm": 0.17492128908634186,
56
+ "learning_rate": 0.0003,
57
+ "loss": 1.6112,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.5714285714285714,
62
+ "grad_norm": 0.10725309699773788,
63
+ "learning_rate": 0.0003,
64
+ "loss": 1.4441,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.6428571428571429,
69
+ "grad_norm": 0.25711339712142944,
70
+ "learning_rate": 0.0003,
71
+ "loss": 1.8929,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.7142857142857143,
76
+ "grad_norm": 0.31338798999786377,
77
+ "learning_rate": 0.0003,
78
+ "loss": 1.4014,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.7857142857142857,
83
+ "grad_norm": 0.12460747361183167,
84
+ "learning_rate": 0.0003,
85
+ "loss": 1.2827,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.8571428571428571,
90
+ "grad_norm": 0.21422362327575684,
91
+ "learning_rate": 0.0003,
92
+ "loss": 1.4855,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.9285714285714286,
97
+ "grad_norm": 0.14184945821762085,
98
+ "learning_rate": 0.0003,
99
+ "loss": 1.8764,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.0,
104
+ "grad_norm": 0.21373428404331207,
105
+ "learning_rate": 0.0003,
106
+ "loss": 0.9664,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.0,
111
+ "step": 28,
112
+ "total_flos": 484500628832256.0,
113
+ "train_loss": 1.5490526471819197,
114
+ "train_runtime": 87.5761,
115
+ "train_samples_per_second": 1.279,
116
+ "train_steps_per_second": 0.32
117
+ }
118
+ ],
119
+ "logging_steps": 2,
120
+ "max_steps": 28,
121
+ "num_input_tokens_seen": 0,
122
+ "num_train_epochs": 1,
123
+ "save_steps": 500,
124
+ "stateful_callbacks": {
125
+ "TrainerControl": {
126
+ "args": {
127
+ "should_epoch_stop": false,
128
+ "should_evaluate": false,
129
+ "should_log": false,
130
+ "should_save": false,
131
+ "should_training_stop": false
132
+ },
133
+ "attributes": {}
134
+ }
135
+ },
136
+ "total_flos": 484500628832256.0,
137
+ "train_batch_size": 1,
138
+ "trial_name": null,
139
+ "trial_params": null
140
+ }
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1a50843b43cfa32c95b889d281f570537c1378f05a990609a634a2601995f26
3
+ size 101144758
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1498f3ef8326bd77e6c944aabe9753ffd7e68263abb0c5f105575b359d8a4efa
3
+ size 101144758
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd5490baa537e5adb313ff8354fbb07f32fd0c13557f86d8b55cbda4f4151b61
3
+ size 101144758
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e126389a4e2fad97b2f79a7beab0d89d2c2cab8be6f46e7e15f922f0d3cf5ccb
3
+ size 101144758
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e04d4cba79c1a0928bca1dd3da3030b8cfa33745ba4f2c916e153c48e96aeee6
3
+ size 101142730
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:088024c945e45ce17ea0a8342822e667be14ec5b31286804ef64223b1afa92b4
3
+ size 101144758
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e29fd3548d52bd7a6243f8ddedc9100363291be1d6814e22909d6f62eaf0e141
3
+ size 101142730
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f419a707c0e21f8b6173e3073f9d9cd77cf775eda36e0b454cb713c1fde1648
3
+ size 101142730
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/3_trainer_state.json ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 28,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.07142857142857142,
13
+ "grad_norm": 0.07443733513355255,
14
+ "learning_rate": 0.0003,
15
+ "loss": 0.8302,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.14285714285714285,
20
+ "grad_norm": 0.10261357575654984,
21
+ "learning_rate": 0.0003,
22
+ "loss": 0.724,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.21428571428571427,
27
+ "grad_norm": 0.07151076197624207,
28
+ "learning_rate": 0.0003,
29
+ "loss": 0.6346,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.2857142857142857,
34
+ "grad_norm": 0.04990497976541519,
35
+ "learning_rate": 0.0003,
36
+ "loss": 0.5856,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.35714285714285715,
41
+ "grad_norm": 0.06881256401538849,
42
+ "learning_rate": 0.0003,
43
+ "loss": 0.6444,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.42857142857142855,
48
+ "grad_norm": 0.07826367020606995,
49
+ "learning_rate": 0.0003,
50
+ "loss": 0.6547,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.5,
55
+ "grad_norm": 0.08683010190725327,
56
+ "learning_rate": 0.0003,
57
+ "loss": 0.8423,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.5714285714285714,
62
+ "grad_norm": 0.132412850856781,
63
+ "learning_rate": 0.0003,
64
+ "loss": 0.6997,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.6428571428571429,
69
+ "grad_norm": 0.05028389021754265,
70
+ "learning_rate": 0.0003,
71
+ "loss": 0.7901,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.7142857142857143,
76
+ "grad_norm": 0.07787914574146271,
77
+ "learning_rate": 0.0003,
78
+ "loss": 0.6354,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.7857142857142857,
83
+ "grad_norm": 0.0758640393614769,
84
+ "learning_rate": 0.0003,
85
+ "loss": 0.5299,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.8571428571428571,
90
+ "grad_norm": 0.05951598286628723,
91
+ "learning_rate": 0.0003,
92
+ "loss": 0.6796,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.9285714285714286,
97
+ "grad_norm": 0.09814899414777756,
98
+ "learning_rate": 0.0003,
99
+ "loss": 0.6833,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.0,
104
+ "grad_norm": 0.13053660094738007,
105
+ "learning_rate": 0.0003,
106
+ "loss": 0.6409,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.0,
111
+ "step": 28,
112
+ "total_flos": 1827799543316480.0,
113
+ "train_loss": 0.683897099324635,
114
+ "train_runtime": 99.1076,
115
+ "train_samples_per_second": 1.13,
116
+ "train_steps_per_second": 0.283
117
+ }
118
+ ],
119
+ "logging_steps": 2,
120
+ "max_steps": 28,
121
+ "num_input_tokens_seen": 0,
122
+ "num_train_epochs": 1,
123
+ "save_steps": 500,
124
+ "stateful_callbacks": {
125
+ "TrainerControl": {
126
+ "args": {
127
+ "should_epoch_stop": false,
128
+ "should_evaluate": false,
129
+ "should_log": false,
130
+ "should_save": false,
131
+ "should_training_stop": false
132
+ },
133
+ "attributes": {}
134
+ }
135
+ },
136
+ "total_flos": 1827799543316480.0,
137
+ "train_batch_size": 1,
138
+ "trial_name": null,
139
+ "trial_params": null
140
+ }
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba3d9a1649ad6659d64dbe2cfcae0e85e07340e521b91eb687dd9f1d0cb62048
3
+ size 173791494
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf8252f3e48671e3bb988fdf1440acbdadceb5badcf2c4144ad14206b69175ba
3
+ size 173791494
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd5705bc2a60c227a8f0ce710d16aded35659350d27826812504b1ec575881c7
3
+ size 173791494
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a6da18ea1081bb711e9f466f09dc9e759511440c35c117990a70414e6a8eaa1
3
+ size 173791494
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a30a2fb04b8ba09d0fbb4a5b695ba51f9aae6090980bdc09ad1ca94807fd26c3
3
+ size 173789186
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4eeead0db8c4e421fe05044c67cd05abcd31e67295a3aa4a6f8838e1f7b56670
3
+ size 173791494
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10f35eaef490310ee467b19f18073894613791f9d6c373c67643f3cd487ab114
3
+ size 173789186
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67dc838027f600189b50fb161dcaacc1e5aaad3366eaa97b1588b34c4e0a7c9d
3
+ size 173789186
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/4_trainer_state.json ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 28,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.07142857142857142,
13
+ "grad_norm": 0.19960319995880127,
14
+ "learning_rate": 0.0003,
15
+ "loss": 0.8145,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.14285714285714285,
20
+ "grad_norm": 0.20900875329971313,
21
+ "learning_rate": 0.0003,
22
+ "loss": 1.0395,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.21428571428571427,
27
+ "grad_norm": 0.08692847937345505,
28
+ "learning_rate": 0.0003,
29
+ "loss": 0.5146,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.2857142857142857,
34
+ "grad_norm": 0.17795179784297943,
35
+ "learning_rate": 0.0003,
36
+ "loss": 0.7781,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.35714285714285715,
41
+ "grad_norm": 0.19282561540603638,
42
+ "learning_rate": 0.0003,
43
+ "loss": 0.7649,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.42857142857142855,
48
+ "grad_norm": 0.20008526742458344,
49
+ "learning_rate": 0.0003,
50
+ "loss": 0.7036,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.5,
55
+ "grad_norm": 0.24128301441669464,
56
+ "learning_rate": 0.0003,
57
+ "loss": 0.8278,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.5714285714285714,
62
+ "grad_norm": 0.22384898364543915,
63
+ "learning_rate": 0.0003,
64
+ "loss": 0.7634,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.6428571428571429,
69
+ "grad_norm": 0.23955023288726807,
70
+ "learning_rate": 0.0003,
71
+ "loss": 0.6968,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.7142857142857143,
76
+ "grad_norm": 0.29193806648254395,
77
+ "learning_rate": 0.0003,
78
+ "loss": 0.9766,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.7857142857142857,
83
+ "grad_norm": 0.22462812066078186,
84
+ "learning_rate": 0.0003,
85
+ "loss": 0.9611,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.8571428571428571,
90
+ "grad_norm": 0.1233292669057846,
91
+ "learning_rate": 0.0003,
92
+ "loss": 0.6395,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.9285714285714286,
97
+ "grad_norm": 0.33286356925964355,
98
+ "learning_rate": 0.0003,
99
+ "loss": 0.6785,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.0,
104
+ "grad_norm": 0.12057407200336456,
105
+ "learning_rate": 0.0003,
106
+ "loss": 0.9113,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.0,
111
+ "step": 28,
112
+ "total_flos": 3492754881511424.0,
113
+ "train_loss": 0.7907240731375558,
114
+ "train_runtime": 114.3445,
115
+ "train_samples_per_second": 0.979,
116
+ "train_steps_per_second": 0.245
117
+ }
118
+ ],
119
+ "logging_steps": 2,
120
+ "max_steps": 28,
121
+ "num_input_tokens_seen": 0,
122
+ "num_train_epochs": 1,
123
+ "save_steps": 500,
124
+ "stateful_callbacks": {
125
+ "TrainerControl": {
126
+ "args": {
127
+ "should_epoch_stop": false,
128
+ "should_evaluate": false,
129
+ "should_log": false,
130
+ "should_save": false,
131
+ "should_training_stop": false
132
+ },
133
+ "attributes": {}
134
+ }
135
+ },
136
+ "total_flos": 3492754881511424.0,
137
+ "train_batch_size": 1,
138
+ "trial_name": null,
139
+ "trial_params": null
140
+ }
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/round10_task_vector_local_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0494acc3ffb6b72cea6b5c7134665c0676acc341948cb275d3954643c17da747
3
+ size 167774710
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/round11_task_vector_local_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79d6913d23c5d352da1bf8f0a06a87f8ddb4b77ad12a60398c3f0bc6a47efafd
3
+ size 167774710
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/round12_task_vector_local_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f78908f6c162f4c3bc2dc508e3cb3122a39b0e5e339f005a570abacfd355c50f
3
+ size 167774710
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/round13_task_vector_local_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a047ee61099e3cb76aa26c9b126b5441da39637533cd50581e6e11a2748903c3
3
+ size 167774710
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir28_T0125_decay099/round14_task_vector_local_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7ee6a6d6d8105d4d0feac7b0adfc3c131d1a774562b09465c9a4c924054976d
3
+ size 167774710