thkim0305 commited on
Commit
8d5ece8
·
verified ·
1 Parent(s): df42a02

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round10.pth +3 -0
  2. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round12.pth +3 -0
  3. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round15.pth +3 -0
  4. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round17.pth +3 -0
  5. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round2.pth +3 -0
  6. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round20.pth +3 -0
  7. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round5.pth +3 -0
  8. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round7.pth +3 -0
  9. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_trainer_state.json +378 -0
  10. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round10.pth +3 -0
  11. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round12.pth +3 -0
  12. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round15.pth +3 -0
  13. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round17.pth +3 -0
  14. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round2.pth +3 -0
  15. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round20.pth +3 -0
  16. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round5.pth +3 -0
  17. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round7.pth +3 -0
  18. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_trainer_state.json +378 -0
  19. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round10.pth +3 -0
  20. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round12.pth +3 -0
  21. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round15.pth +3 -0
  22. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round17.pth +3 -0
  23. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round2.pth +3 -0
  24. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round20.pth +3 -0
  25. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round5.pth +3 -0
  26. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round7.pth +3 -0
  27. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_trainer_state.json +378 -0
  28. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round10.pth +3 -0
  29. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round12.pth +3 -0
  30. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round15.pth +3 -0
  31. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round17.pth +3 -0
  32. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round2.pth +3 -0
  33. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round20.pth +3 -0
  34. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round5.pth +3 -0
  35. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round7.pth +3 -0
  36. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_trainer_state.json +378 -0
  37. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round10.pth +3 -0
  38. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round12.pth +3 -0
  39. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round15.pth +3 -0
  40. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round17.pth +3 -0
  41. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round2.pth +3 -0
  42. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round20.pth +3 -0
  43. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round5.pth +3 -0
  44. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round7.pth +3 -0
  45. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_trainer_state.json +378 -0
  46. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round10.pth +3 -0
  47. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round12.pth +3 -0
  48. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round15.pth +3 -0
  49. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round17.pth +3 -0
  50. client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round2.pth +3 -0
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28ef8239d82162ec0cd709c417e174890fa11c1a4d3bb24871eb9c35db756c95
3
+ size 368443438
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a91dbfbd03b0eda5eea78822e04cc2b1e3b5890093975eea94cc34f926140df
3
+ size 368443438
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f2a82b2beb0af43f3895ebcf348c413e0ecd5ed806c5831de2d4b891e4f46e1
3
+ size 368443438
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19a985f1833e783d4938f0a9ce7528e24a1d41ae07972a5bf344eb39d15eb2a0
3
+ size 368443438
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f7af35ffe4dc0fba5de51bfd5ad8b4fb9fe896b1b6c84c1e61b575b0c704b1a
3
+ size 368442474
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73b3d0a1d38c13e810a623753d551af1a679f168805592f2407c3b4171bee819
3
+ size 368443438
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73bff6b90d411e41eb15f92cbe7e80d64cc0c652de9e204bbdcab4facc759ccb
3
+ size 368442474
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a78ae0540308ef5b38f30f678f176dc07b31ac92282a3f1061b27960904527b6
3
+ size 368442474
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_trainer_state.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 97,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020618556701030927,
13
+ "grad_norm": 1.1101641654968262,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.8945,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.041237113402061855,
20
+ "grad_norm": 1.6314992904663086,
21
+ "learning_rate": 2e-05,
22
+ "loss": 1.4103,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.061855670103092786,
27
+ "grad_norm": 2.4410200119018555,
28
+ "learning_rate": 2e-05,
29
+ "loss": 1.3191,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08247422680412371,
34
+ "grad_norm": 3.6628243923187256,
35
+ "learning_rate": 2e-05,
36
+ "loss": 1.968,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10309278350515463,
41
+ "grad_norm": 6.844225883483887,
42
+ "learning_rate": 2e-05,
43
+ "loss": 1.5729,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12371134020618557,
48
+ "grad_norm": 0.7698001265525818,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.5719,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14432989690721648,
55
+ "grad_norm": 0.5793606042861938,
56
+ "learning_rate": 2e-05,
57
+ "loss": 1.5151,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16494845360824742,
62
+ "grad_norm": 1.8425798416137695,
63
+ "learning_rate": 2e-05,
64
+ "loss": 1.0999,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18556701030927836,
69
+ "grad_norm": 1.134320855140686,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.9422,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.20618556701030927,
76
+ "grad_norm": 1.168424367904663,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.7192,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2268041237113402,
83
+ "grad_norm": 3.056905746459961,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.6074,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24742268041237114,
90
+ "grad_norm": 0.6598642468452454,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.346,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26804123711340205,
97
+ "grad_norm": 1.0676809549331665,
98
+ "learning_rate": 2e-05,
99
+ "loss": 1.1766,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28865979381443296,
104
+ "grad_norm": 1.2854700088500977,
105
+ "learning_rate": 2e-05,
106
+ "loss": 1.3399,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.30927835051546393,
111
+ "grad_norm": 2.3572678565979004,
112
+ "learning_rate": 2e-05,
113
+ "loss": 1.1957,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32989690721649484,
118
+ "grad_norm": 1.2973973751068115,
119
+ "learning_rate": 2e-05,
120
+ "loss": 1.8289,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.35051546391752575,
125
+ "grad_norm": 0.6981891989707947,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.7445,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3711340206185567,
132
+ "grad_norm": 0.41950735449790955,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.8945,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.3917525773195876,
139
+ "grad_norm": 2.4804954528808594,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.8976,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.41237113402061853,
146
+ "grad_norm": 0.989696741104126,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.8965,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.4329896907216495,
153
+ "grad_norm": 0.7618101239204407,
154
+ "learning_rate": 2e-05,
155
+ "loss": 1.5496,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4536082474226804,
160
+ "grad_norm": 3.34183406829834,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.5556,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.4742268041237113,
167
+ "grad_norm": 1.5414451360702515,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.9727,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4948453608247423,
174
+ "grad_norm": 3.042322874069214,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.8753,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5154639175257731,
181
+ "grad_norm": 2.1184558868408203,
182
+ "learning_rate": 2e-05,
183
+ "loss": 1.381,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5360824742268041,
188
+ "grad_norm": 0.3003075420856476,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.2195,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5567010309278351,
195
+ "grad_norm": 0.7136024236679077,
196
+ "learning_rate": 2e-05,
197
+ "loss": 1.4137,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.5773195876288659,
202
+ "grad_norm": 2.5711076259613037,
203
+ "learning_rate": 2e-05,
204
+ "loss": 1.3358,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5979381443298969,
209
+ "grad_norm": 3.2242660522460938,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.6124,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6185567010309279,
216
+ "grad_norm": 1.4743932485580444,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.8501,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6391752577319587,
223
+ "grad_norm": 1.8894503116607666,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.7892,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6597938144329897,
230
+ "grad_norm": 4.588108539581299,
231
+ "learning_rate": 2e-05,
232
+ "loss": 1.0005,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.6804123711340206,
237
+ "grad_norm": 0.5758951306343079,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.1539,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7010309278350515,
244
+ "grad_norm": 3.7251107692718506,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.3073,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7216494845360825,
251
+ "grad_norm": 2.5955007076263428,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.7984,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.7422680412371134,
258
+ "grad_norm": 2.8188343048095703,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.8585,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.7628865979381443,
265
+ "grad_norm": 4.2932634353637695,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.7,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.7835051546391752,
272
+ "grad_norm": 2.4785430431365967,
273
+ "learning_rate": 2e-05,
274
+ "loss": 1.4911,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8041237113402062,
279
+ "grad_norm": 3.244961977005005,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.9058,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8247422680412371,
286
+ "grad_norm": 3.3959240913391113,
287
+ "learning_rate": 2e-05,
288
+ "loss": 1.928,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.845360824742268,
293
+ "grad_norm": 0.36406227946281433,
294
+ "learning_rate": 2e-05,
295
+ "loss": 1.2079,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.865979381443299,
300
+ "grad_norm": 1.649201512336731,
301
+ "learning_rate": 2e-05,
302
+ "loss": 1.3682,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.8865979381443299,
307
+ "grad_norm": 1.951743721961975,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.9706,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9072164948453608,
314
+ "grad_norm": 1.6012216806411743,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.956,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9278350515463918,
321
+ "grad_norm": 1.059224247932434,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.7971,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.9484536082474226,
328
+ "grad_norm": 1.2695350646972656,
329
+ "learning_rate": 2e-05,
330
+ "loss": 1.2903,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.9690721649484536,
335
+ "grad_norm": 0.9126003980636597,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.486,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.9896907216494846,
342
+ "grad_norm": 0.9548375606536865,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.4924,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "step": 97,
350
+ "total_flos": 5653713448337408.0,
351
+ "train_loss": 1.0004102569265463,
352
+ "train_runtime": 222.4287,
353
+ "train_samples_per_second": 1.744,
354
+ "train_steps_per_second": 0.436
355
+ }
356
+ ],
357
+ "logging_steps": 2,
358
+ "max_steps": 97,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 1,
361
+ "save_steps": 500,
362
+ "stateful_callbacks": {
363
+ "TrainerControl": {
364
+ "args": {
365
+ "should_epoch_stop": false,
366
+ "should_evaluate": false,
367
+ "should_log": false,
368
+ "should_save": false,
369
+ "should_training_stop": false
370
+ },
371
+ "attributes": {}
372
+ }
373
+ },
374
+ "total_flos": 5653713448337408.0,
375
+ "train_batch_size": 1,
376
+ "trial_name": null,
377
+ "trial_params": null
378
+ }
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac50ab133a7a5aa9dfea61e6d8ed7653bb6f285eb13975c45759804e1c5a0113
3
+ size 791578182
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66dd8680ac517ba6ca8d2ff3c8a3cbad6fe0cccec680ad711d030f9dd2c6554f
3
+ size 791578182
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:268c8d523348ff94235dca656d4225fe4d7311eda06f2dd672cb2995a74cf1ad
3
+ size 791578182
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5ee153c72366a156c658c58c08fb31ae4d89aaf7cf231b1a3fc2cf80098a182
3
+ size 791578182
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4a9d2ff90a509ed280525e7783832b83f0e780f8cc85032615ccff649e9190e
3
+ size 791576546
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c82a6b29ae63539e855ee08429abf75f2eb504c822f65b9cd7a456e87d8af94
3
+ size 791578182
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8ecbe4f845d20983d9fa3c345743a695c773b6f8352ba9e0b9e3fd4efcd378d
3
+ size 791576546
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21522413a5083211437c3a7d2e7d2f40176b80d21234179e98a978a40f816dac
3
+ size 791576546
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_trainer_state.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 97,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020618556701030927,
13
+ "grad_norm": 5.094399929046631,
14
+ "learning_rate": 2e-05,
15
+ "loss": 1.5978,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.041237113402061855,
20
+ "grad_norm": 2.2240917682647705,
21
+ "learning_rate": 2e-05,
22
+ "loss": 2.3088,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.061855670103092786,
27
+ "grad_norm": 1.5078622102737427,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.9092,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08247422680412371,
34
+ "grad_norm": 0.8912699818611145,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.6373,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10309278350515463,
41
+ "grad_norm": 0.416252076625824,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.8186,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12371134020618557,
48
+ "grad_norm": 0.8248510360717773,
49
+ "learning_rate": 2e-05,
50
+ "loss": 1.0876,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14432989690721648,
55
+ "grad_norm": 0.9105907082557678,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.9326,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16494845360824742,
62
+ "grad_norm": 0.7425227761268616,
63
+ "learning_rate": 2e-05,
64
+ "loss": 1.1207,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18556701030927836,
69
+ "grad_norm": 1.2341773509979248,
70
+ "learning_rate": 2e-05,
71
+ "loss": 1.0813,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.20618556701030927,
76
+ "grad_norm": 1.2988817691802979,
77
+ "learning_rate": 2e-05,
78
+ "loss": 1.5784,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2268041237113402,
83
+ "grad_norm": 1.1802982091903687,
84
+ "learning_rate": 2e-05,
85
+ "loss": 1.1858,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24742268041237114,
90
+ "grad_norm": 0.40825000405311584,
91
+ "learning_rate": 2e-05,
92
+ "loss": 1.0679,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26804123711340205,
97
+ "grad_norm": 1.1907871961593628,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.9465,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28865979381443296,
104
+ "grad_norm": 2.7417049407958984,
105
+ "learning_rate": 2e-05,
106
+ "loss": 1.6913,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.30927835051546393,
111
+ "grad_norm": 1.4188278913497925,
112
+ "learning_rate": 2e-05,
113
+ "loss": 1.126,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32989690721649484,
118
+ "grad_norm": 1.1148083209991455,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.5853,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.35051546391752575,
125
+ "grad_norm": 1.284436821937561,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.6709,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3711340206185567,
132
+ "grad_norm": 1.7680528163909912,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.6444,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.3917525773195876,
139
+ "grad_norm": 1.0730924606323242,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.6197,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.41237113402061853,
146
+ "grad_norm": 0.9840460419654846,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.9225,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.4329896907216495,
153
+ "grad_norm": 2.7215404510498047,
154
+ "learning_rate": 2e-05,
155
+ "loss": 1.1065,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4536082474226804,
160
+ "grad_norm": 1.0859086513519287,
161
+ "learning_rate": 2e-05,
162
+ "loss": 1.6448,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.4742268041237113,
167
+ "grad_norm": 0.6916680335998535,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.5747,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4948453608247423,
174
+ "grad_norm": 0.5634881854057312,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.8072,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5154639175257731,
181
+ "grad_norm": 1.403308629989624,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.757,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5360824742268041,
188
+ "grad_norm": 1.0651711225509644,
189
+ "learning_rate": 2e-05,
190
+ "loss": 1.2329,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5567010309278351,
195
+ "grad_norm": 3.2769243717193604,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.9653,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.5773195876288659,
202
+ "grad_norm": 1.2478959560394287,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.8591,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5979381443298969,
209
+ "grad_norm": 3.3655834197998047,
210
+ "learning_rate": 2e-05,
211
+ "loss": 1.6742,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6185567010309279,
216
+ "grad_norm": 1.0023908615112305,
217
+ "learning_rate": 2e-05,
218
+ "loss": 1.0587,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6391752577319587,
223
+ "grad_norm": 4.638378620147705,
224
+ "learning_rate": 2e-05,
225
+ "loss": 1.052,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6597938144329897,
230
+ "grad_norm": 1.3997457027435303,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.7772,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.6804123711340206,
237
+ "grad_norm": 2.229414224624634,
238
+ "learning_rate": 2e-05,
239
+ "loss": 1.2991,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7010309278350515,
244
+ "grad_norm": 0.4760843515396118,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.9409,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7216494845360825,
251
+ "grad_norm": 1.4938956499099731,
252
+ "learning_rate": 2e-05,
253
+ "loss": 1.0356,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.7422680412371134,
258
+ "grad_norm": 1.7016663551330566,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.9062,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.7628865979381443,
265
+ "grad_norm": 1.5440024137496948,
266
+ "learning_rate": 2e-05,
267
+ "loss": 1.5393,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.7835051546391752,
272
+ "grad_norm": 0.718760073184967,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.5889,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8041237113402062,
279
+ "grad_norm": 1.5525449514389038,
280
+ "learning_rate": 2e-05,
281
+ "loss": 1.275,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8247422680412371,
286
+ "grad_norm": 0.4799572229385376,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.7098,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.845360824742268,
293
+ "grad_norm": 1.6241955757141113,
294
+ "learning_rate": 2e-05,
295
+ "loss": 1.1278,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.865979381443299,
300
+ "grad_norm": 1.5434255599975586,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.8865,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.8865979381443299,
307
+ "grad_norm": 4.1449079513549805,
308
+ "learning_rate": 2e-05,
309
+ "loss": 1.3477,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9072164948453608,
314
+ "grad_norm": 1.316735029220581,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.5452,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9278350515463918,
321
+ "grad_norm": 1.4440531730651855,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.7568,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.9484536082474226,
328
+ "grad_norm": 0.6465669870376587,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.5148,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.9690721649484536,
335
+ "grad_norm": 3.1840262413024902,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.8276,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.9896907216494846,
342
+ "grad_norm": 2.191006898880005,
343
+ "learning_rate": 2e-05,
344
+ "loss": 1.0671,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "step": 97,
350
+ "total_flos": 1.3522822525616128e+16,
351
+ "train_loss": 1.024123240991966,
352
+ "train_runtime": 329.2802,
353
+ "train_samples_per_second": 1.178,
354
+ "train_steps_per_second": 0.295
355
+ }
356
+ ],
357
+ "logging_steps": 2,
358
+ "max_steps": 97,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 1,
361
+ "save_steps": 500,
362
+ "stateful_callbacks": {
363
+ "TrainerControl": {
364
+ "args": {
365
+ "should_epoch_stop": false,
366
+ "should_evaluate": false,
367
+ "should_log": false,
368
+ "should_save": false,
369
+ "should_training_stop": false
370
+ },
371
+ "attributes": {}
372
+ }
373
+ },
374
+ "total_flos": 1.3522822525616128e+16,
375
+ "train_batch_size": 1,
376
+ "trial_name": null,
377
+ "trial_params": null
378
+ }
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1953a5d682996bf7f4974e09538f0db4357535a914fdb147d514c228417d8158
3
+ size 791578182
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ed6b8398417dec05d433da0925ff0676355f83e22912bb2f246305243882495
3
+ size 791578182
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c86152694df067a9e575791bae4a78d113d1c1f3913acddace1c81189a40bf60
3
+ size 791578182
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:007fd7465945f737972fd09d5a5bfac976291174c2cc1f3e6c0abd42fb1e15cd
3
+ size 791578182
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be898f74488af89b95d07167565247944b62909ca7fb60878c1b9188e44ae294
3
+ size 791576546
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:660fb4b5ab21dd5fa6cd373a6b44bf7bd6aa27bb38b6ba9ee00958c570110ae9
3
+ size 791578182
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:072d545c26b4cc2a1e2ac58d2d97e7ee52ea4b8b2e6598e811dc3ab58507587d
3
+ size 791576546
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01c36cedd65d7a1994dcd41a9ed47d891dcca4dba251524fa7d3877c98a16075
3
+ size 791576546
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_trainer_state.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 97,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020618556701030927,
13
+ "grad_norm": 2.715688943862915,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.7903,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.041237113402061855,
20
+ "grad_norm": 1.7270240783691406,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.8497,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.061855670103092786,
27
+ "grad_norm": 2.6262009143829346,
28
+ "learning_rate": 2e-05,
29
+ "loss": 2.5311,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08247422680412371,
34
+ "grad_norm": 2.0781748294830322,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.6511,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10309278350515463,
41
+ "grad_norm": 3.878676176071167,
42
+ "learning_rate": 2e-05,
43
+ "loss": 1.6893,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12371134020618557,
48
+ "grad_norm": 0.30152788758277893,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.2982,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14432989690721648,
55
+ "grad_norm": 2.4543538093566895,
56
+ "learning_rate": 2e-05,
57
+ "loss": 1.4963,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16494845360824742,
62
+ "grad_norm": 1.1550655364990234,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.5657,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18556701030927836,
69
+ "grad_norm": 1.4158210754394531,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.6207,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.20618556701030927,
76
+ "grad_norm": 1.8720401525497437,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.9528,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2268041237113402,
83
+ "grad_norm": 1.3980697393417358,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.7742,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24742268041237114,
90
+ "grad_norm": 0.7552713751792908,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.56,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26804123711340205,
97
+ "grad_norm": 2.9224400520324707,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.761,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28865979381443296,
104
+ "grad_norm": 2.1200363636016846,
105
+ "learning_rate": 2e-05,
106
+ "loss": 1.8194,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.30927835051546393,
111
+ "grad_norm": 1.0131165981292725,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.5858,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32989690721649484,
118
+ "grad_norm": 1.1930532455444336,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.3527,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.35051546391752575,
125
+ "grad_norm": 0.8298959136009216,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.7283,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3711340206185567,
132
+ "grad_norm": 0.21372075378894806,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.679,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.3917525773195876,
139
+ "grad_norm": 0.6620970368385315,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.308,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.41237113402061853,
146
+ "grad_norm": 1.3406527042388916,
147
+ "learning_rate": 2e-05,
148
+ "loss": 1.0992,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.4329896907216495,
153
+ "grad_norm": 8.218306541442871,
154
+ "learning_rate": 2e-05,
155
+ "loss": 2.0959,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4536082474226804,
160
+ "grad_norm": 1.171020269393921,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.6386,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.4742268041237113,
167
+ "grad_norm": 2.1302266120910645,
168
+ "learning_rate": 2e-05,
169
+ "loss": 1.7207,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4948453608247423,
174
+ "grad_norm": 1.39640474319458,
175
+ "learning_rate": 2e-05,
176
+ "loss": 1.7815,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5154639175257731,
181
+ "grad_norm": 1.7075337171554565,
182
+ "learning_rate": 2e-05,
183
+ "loss": 1.1565,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5360824742268041,
188
+ "grad_norm": 0.19492167234420776,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.3214,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5567010309278351,
195
+ "grad_norm": 2.8099043369293213,
196
+ "learning_rate": 2e-05,
197
+ "loss": 1.2814,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.5773195876288659,
202
+ "grad_norm": 3.1069672107696533,
203
+ "learning_rate": 2e-05,
204
+ "loss": 1.6989,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5979381443298969,
209
+ "grad_norm": 1.5938727855682373,
210
+ "learning_rate": 2e-05,
211
+ "loss": 1.0741,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6185567010309279,
216
+ "grad_norm": 2.409372329711914,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.712,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6391752577319587,
223
+ "grad_norm": 1.280025839805603,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.6074,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6597938144329897,
230
+ "grad_norm": 1.893931269645691,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.9652,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.6804123711340206,
237
+ "grad_norm": 1.0092408657073975,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.5104,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7010309278350515,
244
+ "grad_norm": 1.1251095533370972,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.5629,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7216494845360825,
251
+ "grad_norm": 3.200808048248291,
252
+ "learning_rate": 2e-05,
253
+ "loss": 1.9959,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.7422680412371134,
258
+ "grad_norm": 2.1445953845977783,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.8926,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.7628865979381443,
265
+ "grad_norm": 0.7390415072441101,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.3945,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.7835051546391752,
272
+ "grad_norm": 3.7797181606292725,
273
+ "learning_rate": 2e-05,
274
+ "loss": 1.2296,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8041237113402062,
279
+ "grad_norm": 1.9498169422149658,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.9621,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8247422680412371,
286
+ "grad_norm": 2.3580148220062256,
287
+ "learning_rate": 2e-05,
288
+ "loss": 1.074,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.845360824742268,
293
+ "grad_norm": 3.7689895629882812,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.8532,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.865979381443299,
300
+ "grad_norm": 1.0212076902389526,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.3944,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.8865979381443299,
307
+ "grad_norm": 1.0816051959991455,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.7249,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9072164948453608,
314
+ "grad_norm": 0.7434933185577393,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.4865,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9278350515463918,
321
+ "grad_norm": 1.409759521484375,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.5403,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.9484536082474226,
328
+ "grad_norm": 2.005401849746704,
329
+ "learning_rate": 2e-05,
330
+ "loss": 1.0927,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.9690721649484536,
335
+ "grad_norm": 3.6402158737182617,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.7249,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.9896907216494846,
342
+ "grad_norm": 2.5542266368865967,
343
+ "learning_rate": 2e-05,
344
+ "loss": 1.3884,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "step": 97,
350
+ "total_flos": 1.5329114357497856e+16,
351
+ "train_loss": 0.9519824686738634,
352
+ "train_runtime": 357.0654,
353
+ "train_samples_per_second": 1.087,
354
+ "train_steps_per_second": 0.272
355
+ }
356
+ ],
357
+ "logging_steps": 2,
358
+ "max_steps": 97,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 1,
361
+ "save_steps": 500,
362
+ "stateful_callbacks": {
363
+ "TrainerControl": {
364
+ "args": {
365
+ "should_epoch_stop": false,
366
+ "should_evaluate": false,
367
+ "should_log": false,
368
+ "should_save": false,
369
+ "should_training_stop": false
370
+ },
371
+ "attributes": {}
372
+ }
373
+ },
374
+ "total_flos": 1.5329114357497856e+16,
375
+ "train_batch_size": 1,
376
+ "trial_name": null,
377
+ "trial_params": null
378
+ }
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4803816dd8fe4467fa5f43227533a56f083b8ad0f6ddd457d12f761c554043f4
3
+ size 791578182
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17a6865b4d478bbb296c1066419695af40fc07ad1147c3fbff13658f9808bc29
3
+ size 791578182
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b66cc40edb9397433df9178db225f73eafa6ea65334a7650e8930a9fbbb8b4a4
3
+ size 791578182
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68112bcb7c765592e71c2a4308eb7fc02b16edd1004be3c05f8ea3fb426f24c7
3
+ size 791578182
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:821b9803cea659a4bee96d4875ddcf797492fe28233d4eb5c9f67c7c429d5d60
3
+ size 791576546
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2bf168da76f1338f80db83394c6a07d9359185723fa8e27c2aee29c881c64f1
3
+ size 791578182
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ceb89ff4f0927002a37fbf5b1b3961045b663c7ab2467c070fde86fc9f14a14
3
+ size 791576546
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fffbadb068adde75a38be22546d8dfa6c08ffaa70b7f23aa68aeb6eff62809f
3
+ size 791576546
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_trainer_state.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 97,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020618556701030927,
13
+ "grad_norm": 0.5930002927780151,
14
+ "learning_rate": 2e-05,
15
+ "loss": 1.7236,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.041237113402061855,
20
+ "grad_norm": 0.7709837555885315,
21
+ "learning_rate": 2e-05,
22
+ "loss": 2.0088,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.061855670103092786,
27
+ "grad_norm": 0.4815305769443512,
28
+ "learning_rate": 2e-05,
29
+ "loss": 1.4131,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08247422680412371,
34
+ "grad_norm": 0.48609688878059387,
35
+ "learning_rate": 2e-05,
36
+ "loss": 1.1724,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10309278350515463,
41
+ "grad_norm": 0.4869016706943512,
42
+ "learning_rate": 2e-05,
43
+ "loss": 1.7295,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12371134020618557,
48
+ "grad_norm": 0.37476807832717896,
49
+ "learning_rate": 2e-05,
50
+ "loss": 1.6475,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14432989690721648,
55
+ "grad_norm": 0.6886877417564392,
56
+ "learning_rate": 2e-05,
57
+ "loss": 1.4917,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16494845360824742,
62
+ "grad_norm": 0.5024330615997314,
63
+ "learning_rate": 2e-05,
64
+ "loss": 1.4277,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18556701030927836,
69
+ "grad_norm": 0.6298432946205139,
70
+ "learning_rate": 2e-05,
71
+ "loss": 1.5801,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.20618556701030927,
76
+ "grad_norm": 0.46709224581718445,
77
+ "learning_rate": 2e-05,
78
+ "loss": 1.625,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2268041237113402,
83
+ "grad_norm": 0.4031917154788971,
84
+ "learning_rate": 2e-05,
85
+ "loss": 1.5425,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24742268041237114,
90
+ "grad_norm": 0.5809447169303894,
91
+ "learning_rate": 2e-05,
92
+ "loss": 1.8184,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26804123711340205,
97
+ "grad_norm": 0.43344253301620483,
98
+ "learning_rate": 2e-05,
99
+ "loss": 1.6924,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28865979381443296,
104
+ "grad_norm": 0.5011717081069946,
105
+ "learning_rate": 2e-05,
106
+ "loss": 1.3584,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.30927835051546393,
111
+ "grad_norm": 0.4586883783340454,
112
+ "learning_rate": 2e-05,
113
+ "loss": 1.501,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32989690721649484,
118
+ "grad_norm": 0.6749467253684998,
119
+ "learning_rate": 2e-05,
120
+ "loss": 1.6797,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.35051546391752575,
125
+ "grad_norm": 0.38988545536994934,
126
+ "learning_rate": 2e-05,
127
+ "loss": 1.3579,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3711340206185567,
132
+ "grad_norm": 0.48226863145828247,
133
+ "learning_rate": 2e-05,
134
+ "loss": 1.707,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.3917525773195876,
139
+ "grad_norm": 0.48712313175201416,
140
+ "learning_rate": 2e-05,
141
+ "loss": 1.4121,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.41237113402061853,
146
+ "grad_norm": 0.44966915249824524,
147
+ "learning_rate": 2e-05,
148
+ "loss": 1.6276,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.4329896907216495,
153
+ "grad_norm": 0.5168526768684387,
154
+ "learning_rate": 2e-05,
155
+ "loss": 1.6426,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4536082474226804,
160
+ "grad_norm": 0.5030601620674133,
161
+ "learning_rate": 2e-05,
162
+ "loss": 1.5693,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.4742268041237113,
167
+ "grad_norm": 0.7527281045913696,
168
+ "learning_rate": 2e-05,
169
+ "loss": 1.5518,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4948453608247423,
174
+ "grad_norm": 1.2194421291351318,
175
+ "learning_rate": 2e-05,
176
+ "loss": 1.5342,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5154639175257731,
181
+ "grad_norm": 0.5955514907836914,
182
+ "learning_rate": 2e-05,
183
+ "loss": 1.5137,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5360824742268041,
188
+ "grad_norm": 0.7728529572486877,
189
+ "learning_rate": 2e-05,
190
+ "loss": 1.6963,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5567010309278351,
195
+ "grad_norm": 0.73228919506073,
196
+ "learning_rate": 2e-05,
197
+ "loss": 1.4502,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.5773195876288659,
202
+ "grad_norm": 0.4163931906223297,
203
+ "learning_rate": 2e-05,
204
+ "loss": 1.4219,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5979381443298969,
209
+ "grad_norm": 0.5588869452476501,
210
+ "learning_rate": 2e-05,
211
+ "loss": 1.1425,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6185567010309279,
216
+ "grad_norm": 0.6062840223312378,
217
+ "learning_rate": 2e-05,
218
+ "loss": 1.4331,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6391752577319587,
223
+ "grad_norm": 1.0351043939590454,
224
+ "learning_rate": 2e-05,
225
+ "loss": 1.4609,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6597938144329897,
230
+ "grad_norm": 0.6619802713394165,
231
+ "learning_rate": 2e-05,
232
+ "loss": 1.4189,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.6804123711340206,
237
+ "grad_norm": 0.37535732984542847,
238
+ "learning_rate": 2e-05,
239
+ "loss": 1.3813,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7010309278350515,
244
+ "grad_norm": 0.9288793802261353,
245
+ "learning_rate": 2e-05,
246
+ "loss": 1.397,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7216494845360825,
251
+ "grad_norm": 1.1058944463729858,
252
+ "learning_rate": 2e-05,
253
+ "loss": 1.4009,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.7422680412371134,
258
+ "grad_norm": 0.7440882921218872,
259
+ "learning_rate": 2e-05,
260
+ "loss": 1.007,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.7628865979381443,
265
+ "grad_norm": 1.2134323120117188,
266
+ "learning_rate": 2e-05,
267
+ "loss": 1.3503,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.7835051546391752,
272
+ "grad_norm": 0.4168848693370819,
273
+ "learning_rate": 2e-05,
274
+ "loss": 1.0313,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8041237113402062,
279
+ "grad_norm": 1.7301552295684814,
280
+ "learning_rate": 2e-05,
281
+ "loss": 1.4034,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8247422680412371,
286
+ "grad_norm": 0.9088520407676697,
287
+ "learning_rate": 2e-05,
288
+ "loss": 1.8931,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.845360824742268,
293
+ "grad_norm": 1.6952238082885742,
294
+ "learning_rate": 2e-05,
295
+ "loss": 1.0514,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.865979381443299,
300
+ "grad_norm": 1.3661904335021973,
301
+ "learning_rate": 2e-05,
302
+ "loss": 1.5254,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.8865979381443299,
307
+ "grad_norm": 1.5365053415298462,
308
+ "learning_rate": 2e-05,
309
+ "loss": 1.5001,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9072164948453608,
314
+ "grad_norm": 0.4049004018306732,
315
+ "learning_rate": 2e-05,
316
+ "loss": 1.4141,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9278350515463918,
321
+ "grad_norm": 2.052964448928833,
322
+ "learning_rate": 2e-05,
323
+ "loss": 1.0455,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.9484536082474226,
328
+ "grad_norm": 0.5465191602706909,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.741,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.9690721649484536,
335
+ "grad_norm": 1.1672446727752686,
336
+ "learning_rate": 2e-05,
337
+ "loss": 1.199,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.9896907216494846,
342
+ "grad_norm": 5.172623634338379,
343
+ "learning_rate": 2e-05,
344
+ "loss": 1.6101,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "step": 97,
350
+ "total_flos": 2.080121124238131e+16,
351
+ "train_loss": 1.4608057553006202,
352
+ "train_runtime": 363.6952,
353
+ "train_samples_per_second": 1.067,
354
+ "train_steps_per_second": 0.267
355
+ }
356
+ ],
357
+ "logging_steps": 2,
358
+ "max_steps": 97,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 1,
361
+ "save_steps": 500,
362
+ "stateful_callbacks": {
363
+ "TrainerControl": {
364
+ "args": {
365
+ "should_epoch_stop": false,
366
+ "should_evaluate": false,
367
+ "should_log": false,
368
+ "should_save": false,
369
+ "should_training_stop": false
370
+ },
371
+ "attributes": {}
372
+ }
373
+ },
374
+ "total_flos": 2.080121124238131e+16,
375
+ "train_batch_size": 1,
376
+ "trial_name": null,
377
+ "trial_params": null
378
+ }
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:628f3ab6268a995e5a6e695182629e912860d336c361110825fb7b14d330bdfc
3
+ size 791578182
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f1eee78bdc9cbaaa5ace24962d539c619df8286a9a8c422289db2d7ab9cf5b0
3
+ size 791578182
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e796e13255d7cee709ff69e3726f6fce731a851ab6157426a862a721e87d3cb
3
+ size 791578182
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdf354f93337dea3a94a04a869e06347f6ddcdb402a620069706c8a3c277b8e4
3
+ size 791578182
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54df4904f4d80d2309f8e2f522b18507c5d402c766e60b504ee27c785f98c7f6
3
+ size 791576546
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cc7b2d7613df2e86e28c49ee82f08d88d80e81163985534d20ec6f6315e4088
3
+ size 791578182
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68b90ac9fa152488ded4afc57fe26add9bcab9bea5454ffa4ab484f45ff280c6
3
+ size 791576546
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:517901987a25dbccbe2e3c55929dd9f1d58b55e2c4ba4e450b7b6a6115e9ba8f
3
+ size 791576546
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_trainer_state.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 97,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020618556701030927,
13
+ "grad_norm": 1.857858657836914,
14
+ "learning_rate": 2e-05,
15
+ "loss": 1.0093,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.041237113402061855,
20
+ "grad_norm": 1.128074049949646,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.9811,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.061855670103092786,
27
+ "grad_norm": 1.1853967905044556,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.6298,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08247422680412371,
34
+ "grad_norm": 2.01977276802063,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.7973,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10309278350515463,
41
+ "grad_norm": 3.552171468734741,
42
+ "learning_rate": 2e-05,
43
+ "loss": 3.2413,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12371134020618557,
48
+ "grad_norm": 2.4959380626678467,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.8036,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14432989690721648,
55
+ "grad_norm": 0.18528757989406586,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.8049,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16494845360824742,
62
+ "grad_norm": 0.3887205421924591,
63
+ "learning_rate": 2e-05,
64
+ "loss": 1.034,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18556701030927836,
69
+ "grad_norm": 1.167763352394104,
70
+ "learning_rate": 2e-05,
71
+ "loss": 1.0904,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.20618556701030927,
76
+ "grad_norm": 1.1180120706558228,
77
+ "learning_rate": 2e-05,
78
+ "loss": 1.1927,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2268041237113402,
83
+ "grad_norm": 0.34743738174438477,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.9405,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24742268041237114,
90
+ "grad_norm": 0.5589756965637207,
91
+ "learning_rate": 2e-05,
92
+ "loss": 1.2665,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26804123711340205,
97
+ "grad_norm": 1.364461898803711,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.9861,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28865979381443296,
104
+ "grad_norm": 1.4796721935272217,
105
+ "learning_rate": 2e-05,
106
+ "loss": 1.282,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.30927835051546393,
111
+ "grad_norm": 1.5695762634277344,
112
+ "learning_rate": 2e-05,
113
+ "loss": 2.2431,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32989690721649484,
118
+ "grad_norm": 1.308424949645996,
119
+ "learning_rate": 2e-05,
120
+ "loss": 1.0269,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.35051546391752575,
125
+ "grad_norm": 0.9424493908882141,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.8531,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3711340206185567,
132
+ "grad_norm": 1.148472547531128,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.954,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.3917525773195876,
139
+ "grad_norm": 2.9015464782714844,
140
+ "learning_rate": 2e-05,
141
+ "loss": 1.3365,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.41237113402061853,
146
+ "grad_norm": 2.0090081691741943,
147
+ "learning_rate": 2e-05,
148
+ "loss": 1.3861,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.4329896907216495,
153
+ "grad_norm": 1.2990866899490356,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.435,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4536082474226804,
160
+ "grad_norm": 1.2898091077804565,
161
+ "learning_rate": 2e-05,
162
+ "loss": 1.7354,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.4742268041237113,
167
+ "grad_norm": 1.301903486251831,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.923,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4948453608247423,
174
+ "grad_norm": 1.1289761066436768,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.4642,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5154639175257731,
181
+ "grad_norm": 3.627347707748413,
182
+ "learning_rate": 2e-05,
183
+ "loss": 1.187,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5360824742268041,
188
+ "grad_norm": 0.9005461931228638,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.7929,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5567010309278351,
195
+ "grad_norm": 1.4061144590377808,
196
+ "learning_rate": 2e-05,
197
+ "loss": 1.3737,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.5773195876288659,
202
+ "grad_norm": 1.6655174493789673,
203
+ "learning_rate": 2e-05,
204
+ "loss": 1.4762,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5979381443298969,
209
+ "grad_norm": 2.249619483947754,
210
+ "learning_rate": 2e-05,
211
+ "loss": 1.3758,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6185567010309279,
216
+ "grad_norm": 3.719571113586426,
217
+ "learning_rate": 2e-05,
218
+ "loss": 1.0628,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6391752577319587,
223
+ "grad_norm": 2.0385303497314453,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.8212,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6597938144329897,
230
+ "grad_norm": 1.5352033376693726,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.5521,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.6804123711340206,
237
+ "grad_norm": 1.1133532524108887,
238
+ "learning_rate": 2e-05,
239
+ "loss": 1.0727,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7010309278350515,
244
+ "grad_norm": 0.3139188289642334,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.6619,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7216494845360825,
251
+ "grad_norm": 0.13998755812644958,
252
+ "learning_rate": 2e-05,
253
+ "loss": 1.2334,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.7422680412371134,
258
+ "grad_norm": 0.7787773013114929,
259
+ "learning_rate": 2e-05,
260
+ "loss": 1.425,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.7628865979381443,
265
+ "grad_norm": 2.650651693344116,
266
+ "learning_rate": 2e-05,
267
+ "loss": 1.5593,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.7835051546391752,
272
+ "grad_norm": 2.5681259632110596,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.8564,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8041237113402062,
279
+ "grad_norm": 1.672986626625061,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.3764,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8247422680412371,
286
+ "grad_norm": 1.4562022686004639,
287
+ "learning_rate": 2e-05,
288
+ "loss": 1.0597,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.845360824742268,
293
+ "grad_norm": 1.849575161933899,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.6571,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.865979381443299,
300
+ "grad_norm": 1.7122550010681152,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.7316,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.8865979381443299,
307
+ "grad_norm": 0.9788981080055237,
308
+ "learning_rate": 2e-05,
309
+ "loss": 1.8302,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9072164948453608,
314
+ "grad_norm": 4.261756896972656,
315
+ "learning_rate": 2e-05,
316
+ "loss": 1.748,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9278350515463918,
321
+ "grad_norm": 0.5180452466011047,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.6574,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.9484536082474226,
328
+ "grad_norm": 3.8009090423583984,
329
+ "learning_rate": 2e-05,
330
+ "loss": 1.222,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.9690721649484536,
335
+ "grad_norm": 2.206448793411255,
336
+ "learning_rate": 2e-05,
337
+ "loss": 2.1999,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.9896907216494846,
342
+ "grad_norm": 1.8258347511291504,
343
+ "learning_rate": 2e-05,
344
+ "loss": 1.6342,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "step": 97,
350
+ "total_flos": 1.975033480085504e+16,
351
+ "train_loss": 1.1381244143259894,
352
+ "train_runtime": 347.3529,
353
+ "train_samples_per_second": 1.117,
354
+ "train_steps_per_second": 0.279
355
+ }
356
+ ],
357
+ "logging_steps": 2,
358
+ "max_steps": 97,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 1,
361
+ "save_steps": 500,
362
+ "stateful_callbacks": {
363
+ "TrainerControl": {
364
+ "args": {
365
+ "should_epoch_stop": false,
366
+ "should_evaluate": false,
367
+ "should_log": false,
368
+ "should_save": false,
369
+ "should_training_stop": false
370
+ },
371
+ "attributes": {}
372
+ }
373
+ },
374
+ "total_flos": 1.975033480085504e+16,
375
+ "train_batch_size": 1,
376
+ "trial_name": null,
377
+ "trial_params": null
378
+ }
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d75b434f434ac58ee7ff79a1192526027663a462f31003c61af666ca2c8197b
3
+ size 791578182
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e40c641acfb84bd93aec680098654d6533f26cea5a9317dba50857689945e350
3
+ size 791578182
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bea02c21b18787ce374ede742d2698c2221d15bd03cac84c80e8f25d54d2f3a3
3
+ size 791578182
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7534dde78b8b023855befc1292d21473ab56079172d82bf4f570b1cf7959e012
3
+ size 791578182
client_states_feddualMultipqfull2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bccb9abfe6b71dff526a503d33d640fb761439901f9e58e425b441518d29bb8d
3
+ size 791576546