thkim0305 commited on
Commit
ba4847d
·
verified ·
1 Parent(s): d8baffb

Upload folder using huggingface_hub

Browse files
Files changed (45) hide show
  1. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round10.pth +3 -0
  2. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round12.pth +3 -0
  3. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round15.pth +3 -0
  4. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round17.pth +3 -0
  5. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round2.pth +3 -0
  6. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round20.pth +3 -0
  7. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round5.pth +3 -0
  8. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round7.pth +3 -0
  9. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_trainer_state.json +147 -0
  10. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round10.pth +3 -0
  11. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round12.pth +3 -0
  12. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round15.pth +3 -0
  13. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round17.pth +3 -0
  14. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round2.pth +3 -0
  15. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round20.pth +3 -0
  16. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round5.pth +3 -0
  17. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round7.pth +3 -0
  18. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_trainer_state.json +147 -0
  19. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round10.pth +3 -0
  20. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round12.pth +3 -0
  21. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round15.pth +3 -0
  22. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round17.pth +3 -0
  23. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round2.pth +3 -0
  24. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round20.pth +3 -0
  25. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round5.pth +3 -0
  26. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round7.pth +3 -0
  27. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_trainer_state.json +147 -0
  28. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round10.pth +3 -0
  29. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round12.pth +3 -0
  30. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round15.pth +3 -0
  31. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round17.pth +3 -0
  32. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round2.pth +3 -0
  33. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round20.pth +3 -0
  34. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round5.pth +3 -0
  35. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round7.pth +3 -0
  36. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_trainer_state.json +147 -0
  37. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round10.pth +3 -0
  38. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round12.pth +3 -0
  39. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round15.pth +3 -0
  40. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round17.pth +3 -0
  41. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round2.pth +3 -0
  42. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round20.pth +3 -0
  43. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round5.pth +3 -0
  44. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round7.pth +3 -0
  45. client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_trainer_state.json +147 -0
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7683bb4ea2a24dff622304d61701404a0684195b95f9250e01e70995b9f33b60
3
+ size 84049390
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0a8f21d51f1d852500947c6f6cd4ac3c7bf8d2f78ca632e5da650501051a821
3
+ size 84049390
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efdf8d36d0c485c31fabbec778c518ab69e13effb160d8b75bb54118131f4672
3
+ size 84049390
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad8f4ea3a7bdefd00087f485fc726583cd3cbd85b917881387e5ea2b95ab34a8
3
+ size 84049390
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9d462b48368e9cad91c2ccf4c5c05c7752edba6bf41e12f73d3f1df82bed250
3
+ size 84048874
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b61eac975c6482170877e9be8e10a3dda12ff6115f7e0a496a95fb736de87791
3
+ size 84049390
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4c83ade3092b38a35d67cde82a2166da52dae0f76f25a41889628aa13c76631
3
+ size 84048874
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe15bd6d2f28b6947c6a677d0cf05b69240312e24e89c89412d82c828f62ee00
3
+ size 84048874
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_trainer_state.json ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 30,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.06666666666666667,
13
+ "grad_norm": 0.24416396021842957,
14
+ "learning_rate": 0.0003,
15
+ "loss": 0.4589,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.13333333333333333,
20
+ "grad_norm": 0.2861782908439636,
21
+ "learning_rate": 0.0003,
22
+ "loss": 0.5497,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.2,
27
+ "grad_norm": 0.28807416558265686,
28
+ "learning_rate": 0.0003,
29
+ "loss": 0.5351,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.26666666666666666,
34
+ "grad_norm": 0.3524809181690216,
35
+ "learning_rate": 0.0003,
36
+ "loss": 0.6347,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.3333333333333333,
41
+ "grad_norm": 0.21695467829704285,
42
+ "learning_rate": 0.0003,
43
+ "loss": 0.5836,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.4,
48
+ "grad_norm": 0.26244187355041504,
49
+ "learning_rate": 0.0003,
50
+ "loss": 0.7104,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.4666666666666667,
55
+ "grad_norm": 0.1886732578277588,
56
+ "learning_rate": 0.0003,
57
+ "loss": 0.3353,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.5333333333333333,
62
+ "grad_norm": 0.3749811351299286,
63
+ "learning_rate": 0.0003,
64
+ "loss": 0.6642,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.6,
69
+ "grad_norm": 0.3102177083492279,
70
+ "learning_rate": 0.0003,
71
+ "loss": 0.6082,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.6666666666666666,
76
+ "grad_norm": 0.3232249319553375,
77
+ "learning_rate": 0.0003,
78
+ "loss": 0.6706,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.7333333333333333,
83
+ "grad_norm": 0.3113941550254822,
84
+ "learning_rate": 0.0003,
85
+ "loss": 0.4988,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.8,
90
+ "grad_norm": 0.31279242038726807,
91
+ "learning_rate": 0.0003,
92
+ "loss": 0.6155,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.8666666666666667,
97
+ "grad_norm": 0.28257760405540466,
98
+ "learning_rate": 0.0003,
99
+ "loss": 0.529,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.9333333333333333,
104
+ "grad_norm": 0.29951271414756775,
105
+ "learning_rate": 0.0003,
106
+ "loss": 0.6252,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.0,
111
+ "grad_norm": 0.28102612495422363,
112
+ "learning_rate": 0.0003,
113
+ "loss": 0.6261,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.0,
118
+ "step": 30,
119
+ "total_flos": 1196511245369344.0,
120
+ "train_loss": 0.5763573904832204,
121
+ "train_runtime": 48.5145,
122
+ "train_samples_per_second": 2.473,
123
+ "train_steps_per_second": 0.618
124
+ }
125
+ ],
126
+ "logging_steps": 2,
127
+ "max_steps": 30,
128
+ "num_input_tokens_seen": 0,
129
+ "num_train_epochs": 1,
130
+ "save_steps": 500,
131
+ "stateful_callbacks": {
132
+ "TrainerControl": {
133
+ "args": {
134
+ "should_epoch_stop": false,
135
+ "should_evaluate": false,
136
+ "should_log": false,
137
+ "should_save": false,
138
+ "should_training_stop": false
139
+ },
140
+ "attributes": {}
141
+ }
142
+ },
143
+ "total_flos": 1196511245369344.0,
144
+ "train_batch_size": 1,
145
+ "trial_name": null,
146
+ "trial_params": null
147
+ }
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31500f229e4dede815dc1cc073235b4910c5640d3f9f99749e4f72e32ca3db44
3
+ size 48770134
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94cf1f2ddc94b0d84d41afde7e24759d2211a03e0fab68e0542bb7cbae44d24c
3
+ size 48770134
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8231a23a9e887e103a1bea2b34e07dbdd7b32d3e9a3ce6cacb38f9c7d8a84352
3
+ size 48770134
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4632dc38995fd9058cb3bb3a74a7a3f0acdd96f2a1da229c53116be8290d5f9
3
+ size 48770134
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0374a33829e21756f1c74170b81a6df936c7af2f9da602333c5674eac242a81
3
+ size 48769674
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ce872529f7431f71b64e45c64eeb5e1968e51ef863244a28fdd348e9286a1f8
3
+ size 48770134
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:648dc41a6d4e798c5adf00f42548fcf41d160761046cc2999039fca354944806
3
+ size 48769674
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e02abd240d532114b52e1ec2eb1b26449b496cf245448e44f8e802929895f22
3
+ size 48769674
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_trainer_state.json ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 30,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.06666666666666667,
13
+ "grad_norm": 0.9023004770278931,
14
+ "learning_rate": 0.0003,
15
+ "loss": 1.5571,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.13333333333333333,
20
+ "grad_norm": 0.5817632675170898,
21
+ "learning_rate": 0.0003,
22
+ "loss": 1.7767,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.2,
27
+ "grad_norm": 0.4354921579360962,
28
+ "learning_rate": 0.0003,
29
+ "loss": 1.731,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.26666666666666666,
34
+ "grad_norm": 0.3641677498817444,
35
+ "learning_rate": 0.0003,
36
+ "loss": 1.6872,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.3333333333333333,
41
+ "grad_norm": 0.28223559260368347,
42
+ "learning_rate": 0.0003,
43
+ "loss": 0.7443,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.4,
48
+ "grad_norm": 0.2681761384010315,
49
+ "learning_rate": 0.0003,
50
+ "loss": 1.2404,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.4666666666666667,
55
+ "grad_norm": 0.44366130232810974,
56
+ "learning_rate": 0.0003,
57
+ "loss": 1.9958,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.5333333333333333,
62
+ "grad_norm": 0.39765021204948425,
63
+ "learning_rate": 0.0003,
64
+ "loss": 1.4386,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.6,
69
+ "grad_norm": 0.6642311811447144,
70
+ "learning_rate": 0.0003,
71
+ "loss": 2.1369,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.6666666666666666,
76
+ "grad_norm": 0.5510743260383606,
77
+ "learning_rate": 0.0003,
78
+ "loss": 2.0597,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.7333333333333333,
83
+ "grad_norm": 0.45161959528923035,
84
+ "learning_rate": 0.0003,
85
+ "loss": 1.6513,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.8,
90
+ "grad_norm": 0.3321634531021118,
91
+ "learning_rate": 0.0003,
92
+ "loss": 1.605,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.8666666666666667,
97
+ "grad_norm": 0.6213057041168213,
98
+ "learning_rate": 0.0003,
99
+ "loss": 1.7325,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.9333333333333333,
104
+ "grad_norm": 0.43713563680648804,
105
+ "learning_rate": 0.0003,
106
+ "loss": 1.4591,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.0,
111
+ "grad_norm": 0.5952392816543579,
112
+ "learning_rate": 0.0003,
113
+ "loss": 2.0974,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.0,
118
+ "step": 30,
119
+ "total_flos": 305071842656256.0,
120
+ "train_loss": 1.6608658949534099,
121
+ "train_runtime": 42.0423,
122
+ "train_samples_per_second": 2.854,
123
+ "train_steps_per_second": 0.714
124
+ }
125
+ ],
126
+ "logging_steps": 2,
127
+ "max_steps": 30,
128
+ "num_input_tokens_seen": 0,
129
+ "num_train_epochs": 1,
130
+ "save_steps": 500,
131
+ "stateful_callbacks": {
132
+ "TrainerControl": {
133
+ "args": {
134
+ "should_epoch_stop": false,
135
+ "should_evaluate": false,
136
+ "should_log": false,
137
+ "should_save": false,
138
+ "should_training_stop": false
139
+ },
140
+ "attributes": {}
141
+ }
142
+ },
143
+ "total_flos": 305071842656256.0,
144
+ "train_batch_size": 1,
145
+ "trial_name": null,
146
+ "trial_params": null
147
+ }
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:963fba097065a142c0541ce9526facbeae65ff78898ef02a1e2f0f723a862a06
3
+ size 48770134
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:863dee3962c380286ef4c7b450cb2510f759d0df1cee2bd79ba770ca703d6b5a
3
+ size 48770134
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92c6a552dd3b85e4e36855e303c5cb57a0388d4632a3acaa5302ca646fbda017
3
+ size 48770134
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec4da88e12a570b8176e33d4e833ca9833197f98837cc27f9094df7bcc1d9f47
3
+ size 48770134
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8c7085f793d1434518d9aed118ee8850b6a55de0a894e9edc95302a13baee13
3
+ size 48769674
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eacf61ef3b77cdb6df02a9c2cb07c1947dcd233aa080c00ee541a7eca6dccc6a
3
+ size 48770134
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1628f05f6b832066328946babf7025e77f425acfea9ad0713a1499422297c91
3
+ size 48769674
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01b0b5bad83b2738ad513afc4ac7711f67c12fafcf1c1ed2cce2aaf637865825
3
+ size 48769674
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_trainer_state.json ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 30,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.06666666666666667,
13
+ "grad_norm": 0.3037028908729553,
14
+ "learning_rate": 0.0003,
15
+ "loss": 0.9116,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.13333333333333333,
20
+ "grad_norm": 0.3489552140235901,
21
+ "learning_rate": 0.0003,
22
+ "loss": 1.6459,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.2,
27
+ "grad_norm": 0.3942304253578186,
28
+ "learning_rate": 0.0003,
29
+ "loss": 1.0189,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.26666666666666666,
34
+ "grad_norm": 0.29892203211784363,
35
+ "learning_rate": 0.0003,
36
+ "loss": 1.8188,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.3333333333333333,
41
+ "grad_norm": 0.5498002171516418,
42
+ "learning_rate": 0.0003,
43
+ "loss": 1.3308,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.4,
48
+ "grad_norm": 0.42956435680389404,
49
+ "learning_rate": 0.0003,
50
+ "loss": 2.4234,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.4666666666666667,
55
+ "grad_norm": 0.29204803705215454,
56
+ "learning_rate": 0.0003,
57
+ "loss": 1.2476,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.5333333333333333,
62
+ "grad_norm": 0.45598259568214417,
63
+ "learning_rate": 0.0003,
64
+ "loss": 1.6462,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.6,
69
+ "grad_norm": 0.2771050035953522,
70
+ "learning_rate": 0.0003,
71
+ "loss": 0.8,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.6666666666666666,
76
+ "grad_norm": 0.23567508161067963,
77
+ "learning_rate": 0.0003,
78
+ "loss": 1.3243,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.7333333333333333,
83
+ "grad_norm": 0.4404710829257965,
84
+ "learning_rate": 0.0003,
85
+ "loss": 2.0618,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.8,
90
+ "grad_norm": 0.475818395614624,
91
+ "learning_rate": 0.0003,
92
+ "loss": 1.9837,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.8666666666666667,
97
+ "grad_norm": 0.3802916705608368,
98
+ "learning_rate": 0.0003,
99
+ "loss": 1.8485,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.9333333333333333,
104
+ "grad_norm": 0.43724003434181213,
105
+ "learning_rate": 0.0003,
106
+ "loss": 0.9082,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.0,
111
+ "grad_norm": 0.21405752003192902,
112
+ "learning_rate": 0.0003,
113
+ "loss": 1.7006,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.0,
118
+ "step": 30,
119
+ "total_flos": 580445060202496.0,
120
+ "train_loss": 1.5113671978314718,
121
+ "train_runtime": 42.5765,
122
+ "train_samples_per_second": 2.818,
123
+ "train_steps_per_second": 0.705
124
+ }
125
+ ],
126
+ "logging_steps": 2,
127
+ "max_steps": 30,
128
+ "num_input_tokens_seen": 0,
129
+ "num_train_epochs": 1,
130
+ "save_steps": 500,
131
+ "stateful_callbacks": {
132
+ "TrainerControl": {
133
+ "args": {
134
+ "should_epoch_stop": false,
135
+ "should_evaluate": false,
136
+ "should_log": false,
137
+ "should_save": false,
138
+ "should_training_stop": false
139
+ },
140
+ "attributes": {}
141
+ }
142
+ },
143
+ "total_flos": 580445060202496.0,
144
+ "train_batch_size": 1,
145
+ "trial_name": null,
146
+ "trial_params": null
147
+ }
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5f9ba92d85e161ddac50cbfa1376def41b8d564876d86da5bb0a73e5f5897e0
3
+ size 48770134
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a56c1e2ede2ef52e0ce5b80fe084164ea027a24dbe02b6ae7084ab4163e7f138
3
+ size 48770134
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:000cbfb5eb0df3a6772db4a5ba76965417e97dbcfd6f80c20db43fbc1cca7919
3
+ size 48770134
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10851f6148ba1adf32ead3d827ee058c411fbcc5543de63f040822768e553348
3
+ size 48770134
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21a5f9ae6d49abe626a408ff3516e35c481686287266a07609307499f1f3b923
3
+ size 48769674
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c2abfadebda687c435c856c4927a4b71c9168b9d47cc07b81c30c07f39f6053
3
+ size 48770134
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f19790a399e75267ef51f23d3c69e581779159e150dbc269ab6b30502c96556
3
+ size 48769674
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec671844c9cd52814d46ea7a6c890dcb81f2f45a04c140372af16e38bbebdd9f
3
+ size 48769674
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_trainer_state.json ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 30,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.06666666666666667,
13
+ "grad_norm": 0.16276100277900696,
14
+ "learning_rate": 0.0003,
15
+ "loss": 0.833,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.13333333333333333,
20
+ "grad_norm": 0.251880019903183,
21
+ "learning_rate": 0.0003,
22
+ "loss": 0.883,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.2,
27
+ "grad_norm": 0.2255876660346985,
28
+ "learning_rate": 0.0003,
29
+ "loss": 0.6912,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.26666666666666666,
34
+ "grad_norm": 0.2852807641029358,
35
+ "learning_rate": 0.0003,
36
+ "loss": 0.9001,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.3333333333333333,
41
+ "grad_norm": 0.21784625947475433,
42
+ "learning_rate": 0.0003,
43
+ "loss": 0.8835,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.4,
48
+ "grad_norm": 0.19225150346755981,
49
+ "learning_rate": 0.0003,
50
+ "loss": 0.5988,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.4666666666666667,
55
+ "grad_norm": 0.14822231233119965,
56
+ "learning_rate": 0.0003,
57
+ "loss": 0.5905,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.5333333333333333,
62
+ "grad_norm": 0.1591639667749405,
63
+ "learning_rate": 0.0003,
64
+ "loss": 0.4959,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.6,
69
+ "grad_norm": 0.12191551923751831,
70
+ "learning_rate": 0.0003,
71
+ "loss": 0.6851,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.6666666666666666,
76
+ "grad_norm": 0.31107524037361145,
77
+ "learning_rate": 0.0003,
78
+ "loss": 0.6968,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.7333333333333333,
83
+ "grad_norm": 0.18976068496704102,
84
+ "learning_rate": 0.0003,
85
+ "loss": 0.9054,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.8,
90
+ "grad_norm": 0.23062096536159515,
91
+ "learning_rate": 0.0003,
92
+ "loss": 0.4708,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.8666666666666667,
97
+ "grad_norm": 0.3150513470172882,
98
+ "learning_rate": 0.0003,
99
+ "loss": 0.6063,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.9333333333333333,
104
+ "grad_norm": 0.15745581686496735,
105
+ "learning_rate": 0.0003,
106
+ "loss": 0.4576,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.0,
111
+ "grad_norm": 0.20806829631328583,
112
+ "learning_rate": 0.0003,
113
+ "loss": 0.6251,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.0,
118
+ "step": 30,
119
+ "total_flos": 1809773389414400.0,
120
+ "train_loss": 0.6882031063238779,
121
+ "train_runtime": 53.2716,
122
+ "train_samples_per_second": 2.253,
123
+ "train_steps_per_second": 0.563
124
+ }
125
+ ],
126
+ "logging_steps": 2,
127
+ "max_steps": 30,
128
+ "num_input_tokens_seen": 0,
129
+ "num_train_epochs": 1,
130
+ "save_steps": 500,
131
+ "stateful_callbacks": {
132
+ "TrainerControl": {
133
+ "args": {
134
+ "should_epoch_stop": false,
135
+ "should_evaluate": false,
136
+ "should_log": false,
137
+ "should_save": false,
138
+ "should_training_stop": false
139
+ },
140
+ "attributes": {}
141
+ }
142
+ },
143
+ "total_flos": 1809773389414400.0,
144
+ "train_batch_size": 1,
145
+ "trial_name": null,
146
+ "trial_params": null
147
+ }
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ad083f2e642a767d5504eb2dd2e2c1ffeacb9765251550c355fcc0d1dc61199
3
+ size 84049390
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be02dbb2f9a3dc8edf5df8d3606692c50f06f2c89b23f8b6daf92f966649b22d
3
+ size 84049390
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5ff7e1f593d4b3a29a210026ea5b2b0154e90f2a5832bbbb5067e40eda2bda8
3
+ size 84049390
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a0f9fe0829a1607a5e1f12053abf5062647afeb1fbccc13457f8f449c6dd3ca
3
+ size 84049390
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffcbd8292ac0a7ac227c08a272e9b49049d3f1828668e5f5686a29eae4683bee
3
+ size 84048874
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8347c82a60d5d8cff43f998c24f471d16985a6bad1652d26e48cdcfa9ba022e2
3
+ size 84049390
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5efa904dfdf8aeafd4ae597f530c9125b89c869b8cb43f650a3651f701b43189
3
+ size 84048874
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bce00e87548e396505aee69f7ff404d99885432c726eff94ff85f2596d467302
3
+ size 84048874
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_trainer_state.json ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 30,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.06666666666666667,
13
+ "grad_norm": 0.2867070138454437,
14
+ "learning_rate": 0.0003,
15
+ "loss": 0.5952,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.13333333333333333,
20
+ "grad_norm": 0.33537670969963074,
21
+ "learning_rate": 0.0003,
22
+ "loss": 0.9564,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.2,
27
+ "grad_norm": 0.3584403991699219,
28
+ "learning_rate": 0.0003,
29
+ "loss": 0.9349,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.26666666666666666,
34
+ "grad_norm": 0.3439081907272339,
35
+ "learning_rate": 0.0003,
36
+ "loss": 0.8646,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.3333333333333333,
41
+ "grad_norm": 0.44978123903274536,
42
+ "learning_rate": 0.0003,
43
+ "loss": 0.9359,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.4,
48
+ "grad_norm": 0.3586316704750061,
49
+ "learning_rate": 0.0003,
50
+ "loss": 0.8132,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.4666666666666667,
55
+ "grad_norm": 0.26364096999168396,
56
+ "learning_rate": 0.0003,
57
+ "loss": 0.5523,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.5333333333333333,
62
+ "grad_norm": 0.31966859102249146,
63
+ "learning_rate": 0.0003,
64
+ "loss": 0.9114,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.6,
69
+ "grad_norm": 0.35554757714271545,
70
+ "learning_rate": 0.0003,
71
+ "loss": 1.3788,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.6666666666666666,
76
+ "grad_norm": 0.258931964635849,
77
+ "learning_rate": 0.0003,
78
+ "loss": 0.6495,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.7333333333333333,
83
+ "grad_norm": 0.2897098660469055,
84
+ "learning_rate": 0.0003,
85
+ "loss": 1.0337,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.8,
90
+ "grad_norm": 0.2801041007041931,
91
+ "learning_rate": 0.0003,
92
+ "loss": 0.8032,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.8666666666666667,
97
+ "grad_norm": 0.3492525815963745,
98
+ "learning_rate": 0.0003,
99
+ "loss": 0.7369,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.9333333333333333,
104
+ "grad_norm": 0.44734007120132446,
105
+ "learning_rate": 0.0003,
106
+ "loss": 0.8824,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.0,
111
+ "grad_norm": 0.29203251004219055,
112
+ "learning_rate": 0.0003,
113
+ "loss": 0.7433,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.0,
118
+ "step": 30,
119
+ "total_flos": 3616159123374080.0,
120
+ "train_loss": 0.8527638713518778,
121
+ "train_runtime": 68.1295,
122
+ "train_samples_per_second": 1.761,
123
+ "train_steps_per_second": 0.44
124
+ }
125
+ ],
126
+ "logging_steps": 2,
127
+ "max_steps": 30,
128
+ "num_input_tokens_seen": 0,
129
+ "num_train_epochs": 1,
130
+ "save_steps": 500,
131
+ "stateful_callbacks": {
132
+ "TrainerControl": {
133
+ "args": {
134
+ "should_epoch_stop": false,
135
+ "should_evaluate": false,
136
+ "should_log": false,
137
+ "should_save": false,
138
+ "should_training_stop": false
139
+ },
140
+ "attributes": {}
141
+ }
142
+ },
143
+ "total_flos": 3616159123374080.0,
144
+ "train_batch_size": 1,
145
+ "trial_name": null,
146
+ "trial_params": null
147
+ }