thkim0305 commited on
Commit
f182f3c
·
verified ·
1 Parent(s): 9d619ad

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round10.pth +3 -0
  2. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round12.pth +3 -0
  3. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round15.pth +3 -0
  4. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round17.pth +3 -0
  5. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round2.pth +3 -0
  6. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round20.pth +3 -0
  7. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round5.pth +3 -0
  8. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round7.pth +3 -0
  9. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_trainer_state.json +378 -0
  10. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round10.pth +3 -0
  11. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round12.pth +3 -0
  12. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round15.pth +3 -0
  13. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round17.pth +3 -0
  14. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round2.pth +3 -0
  15. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round20.pth +3 -0
  16. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round5.pth +3 -0
  17. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round7.pth +3 -0
  18. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_trainer_state.json +378 -0
  19. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round10.pth +3 -0
  20. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round12.pth +3 -0
  21. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round15.pth +3 -0
  22. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round17.pth +3 -0
  23. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round2.pth +3 -0
  24. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round20.pth +3 -0
  25. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round5.pth +3 -0
  26. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round7.pth +3 -0
  27. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_trainer_state.json +378 -0
  28. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round10.pth +3 -0
  29. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round12.pth +3 -0
  30. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round15.pth +3 -0
  31. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round17.pth +3 -0
  32. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round2.pth +3 -0
  33. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round20.pth +3 -0
  34. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round5.pth +3 -0
  35. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round7.pth +3 -0
  36. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_trainer_state.json +378 -0
  37. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round10.pth +3 -0
  38. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round12.pth +3 -0
  39. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round15.pth +3 -0
  40. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round17.pth +3 -0
  41. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round2.pth +3 -0
  42. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round20.pth +3 -0
  43. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round5.pth +3 -0
  44. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round7.pth +3 -0
  45. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_trainer_state.json +378 -0
  46. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round10.pth +3 -0
  47. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round12.pth +3 -0
  48. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round15.pth +3 -0
  49. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round17.pth +3 -0
  50. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round2.pth +3 -0
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7c6682c76a002291c94ab5400f46aad989063cceafda2bb335890c225c469f6
3
+ size 368443438
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03ffd7f8cda1890da71515a263f4200fb94e95e55d8015c4884ebbe683b11dde
3
+ size 368443438
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:463639587f71bcbd5b9d4481a2af8b2a50eb9728a9d1126789118ee6213c321d
3
+ size 368443438
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb0d4a5ef7da16ffc179066f20f6577b695c216c334fb9207199ac0bf36085d7
3
+ size 368443438
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f998475c714790ced260b7d08905deaf25397487d22f12b88939abb267ba23b
3
+ size 368442474
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8e030d7c0cfa7e8171852dba2ae6e04bb8cd8017f7d3a490b15f7ca76d21aa1
3
+ size 368443438
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:616c57ef296799deec39f31f99f043a4bdf1ad531074e5c884d8f1beff2d6e35
3
+ size 368442474
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93589c35187351c587af0cb2f9bd08601b587df4aa00cd4809909821c10c86b4
3
+ size 368442474
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/0_trainer_state.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 97,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020618556701030927,
13
+ "grad_norm": 1.443185567855835,
14
+ "learning_rate": 2e-05,
15
+ "loss": 1.0495,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.041237113402061855,
20
+ "grad_norm": 1.8465604782104492,
21
+ "learning_rate": 2e-05,
22
+ "loss": 1.2367,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.061855670103092786,
27
+ "grad_norm": 1.3454159498214722,
28
+ "learning_rate": 2e-05,
29
+ "loss": 1.1763,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08247422680412371,
34
+ "grad_norm": 4.38826847076416,
35
+ "learning_rate": 2e-05,
36
+ "loss": 2.243,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10309278350515463,
41
+ "grad_norm": 0.6966120600700378,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.8164,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12371134020618557,
48
+ "grad_norm": 0.5520138144493103,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.5507,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14432989690721648,
55
+ "grad_norm": 2.7041213512420654,
56
+ "learning_rate": 2e-05,
57
+ "loss": 1.2546,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16494845360824742,
62
+ "grad_norm": 4.7814555168151855,
63
+ "learning_rate": 2e-05,
64
+ "loss": 1.4361,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18556701030927836,
69
+ "grad_norm": 0.8622943758964539,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.7884,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.20618556701030927,
76
+ "grad_norm": 0.8466795086860657,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.8781,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2268041237113402,
83
+ "grad_norm": 2.9229226112365723,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.7513,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24742268041237114,
90
+ "grad_norm": 1.2761527299880981,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.9905,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26804123711340205,
97
+ "grad_norm": 2.6934807300567627,
98
+ "learning_rate": 2e-05,
99
+ "loss": 1.8354,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28865979381443296,
104
+ "grad_norm": 1.0872917175292969,
105
+ "learning_rate": 2e-05,
106
+ "loss": 1.2724,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.30927835051546393,
111
+ "grad_norm": 2.1482253074645996,
112
+ "learning_rate": 2e-05,
113
+ "loss": 1.6143,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32989690721649484,
118
+ "grad_norm": 0.905875563621521,
119
+ "learning_rate": 2e-05,
120
+ "loss": 1.6322,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.35051546391752575,
125
+ "grad_norm": 0.6854360103607178,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.8599,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3711340206185567,
132
+ "grad_norm": 0.8919254541397095,
133
+ "learning_rate": 2e-05,
134
+ "loss": 1.0498,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.3917525773195876,
139
+ "grad_norm": 1.6378024816513062,
140
+ "learning_rate": 2e-05,
141
+ "loss": 1.1343,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.41237113402061853,
146
+ "grad_norm": 1.0561065673828125,
147
+ "learning_rate": 2e-05,
148
+ "loss": 1.0583,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.4329896907216495,
153
+ "grad_norm": 0.7613285779953003,
154
+ "learning_rate": 2e-05,
155
+ "loss": 1.5316,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4536082474226804,
160
+ "grad_norm": 1.940731167793274,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.7388,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.4742268041237113,
167
+ "grad_norm": 1.1051706075668335,
168
+ "learning_rate": 2e-05,
169
+ "loss": 1.0972,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4948453608247423,
174
+ "grad_norm": 1.7498652935028076,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.735,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5154639175257731,
181
+ "grad_norm": 1.488133192062378,
182
+ "learning_rate": 2e-05,
183
+ "loss": 1.435,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5360824742268041,
188
+ "grad_norm": 0.48400750756263733,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.219,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5567010309278351,
195
+ "grad_norm": 0.843970775604248,
196
+ "learning_rate": 2e-05,
197
+ "loss": 1.3159,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.5773195876288659,
202
+ "grad_norm": 2.2032525539398193,
203
+ "learning_rate": 2e-05,
204
+ "loss": 1.2433,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5979381443298969,
209
+ "grad_norm": 2.2895724773406982,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.6293,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6185567010309279,
216
+ "grad_norm": 1.6934003829956055,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.8883,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6391752577319587,
223
+ "grad_norm": 3.9248857498168945,
224
+ "learning_rate": 2e-05,
225
+ "loss": 1.7568,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6597938144329897,
230
+ "grad_norm": 3.8822836875915527,
231
+ "learning_rate": 2e-05,
232
+ "loss": 1.358,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.6804123711340206,
237
+ "grad_norm": 2.8576316833496094,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.6214,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7010309278350515,
244
+ "grad_norm": 2.46586537361145,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.39,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7216494845360825,
251
+ "grad_norm": 2.4729208946228027,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.9929,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.7422680412371134,
258
+ "grad_norm": 2.4468770027160645,
259
+ "learning_rate": 2e-05,
260
+ "loss": 1.4753,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.7628865979381443,
265
+ "grad_norm": 5.142044544219971,
266
+ "learning_rate": 2e-05,
267
+ "loss": 1.4092,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.7835051546391752,
272
+ "grad_norm": 3.540414333343506,
273
+ "learning_rate": 2e-05,
274
+ "loss": 1.3685,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8041237113402062,
279
+ "grad_norm": 1.6057883501052856,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.8978,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8247422680412371,
286
+ "grad_norm": 2.5897274017333984,
287
+ "learning_rate": 2e-05,
288
+ "loss": 1.5045,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.845360824742268,
293
+ "grad_norm": 0.60903400182724,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.97,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.865979381443299,
300
+ "grad_norm": 1.9482678174972534,
301
+ "learning_rate": 2e-05,
302
+ "loss": 1.8024,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.8865979381443299,
307
+ "grad_norm": 1.7348854541778564,
308
+ "learning_rate": 2e-05,
309
+ "loss": 1.105,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9072164948453608,
314
+ "grad_norm": 1.3475691080093384,
315
+ "learning_rate": 2e-05,
316
+ "loss": 1.6093,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9278350515463918,
321
+ "grad_norm": 0.98420250415802,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.6538,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.9484536082474226,
328
+ "grad_norm": 1.3864518404006958,
329
+ "learning_rate": 2e-05,
330
+ "loss": 1.3196,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.9690721649484536,
335
+ "grad_norm": 1.4003015756607056,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.7891,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.9896907216494846,
342
+ "grad_norm": 1.4709246158599854,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.8115,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "step": 97,
350
+ "total_flos": 5653713448337408.0,
351
+ "train_loss": 1.1264018225915653,
352
+ "train_runtime": 208.5258,
353
+ "train_samples_per_second": 1.861,
354
+ "train_steps_per_second": 0.465
355
+ }
356
+ ],
357
+ "logging_steps": 2,
358
+ "max_steps": 97,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 1,
361
+ "save_steps": 500,
362
+ "stateful_callbacks": {
363
+ "TrainerControl": {
364
+ "args": {
365
+ "should_epoch_stop": false,
366
+ "should_evaluate": false,
367
+ "should_log": false,
368
+ "should_save": false,
369
+ "should_training_stop": false
370
+ },
371
+ "attributes": {}
372
+ }
373
+ },
374
+ "total_flos": 5653713448337408.0,
375
+ "train_batch_size": 1,
376
+ "trial_name": null,
377
+ "trial_params": null
378
+ }
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:669a580e6490be915efbd67e52ae5d5eebf6ec1cfe8ed75462a0b139aaf852b9
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5ee3bb60763f435737915de658c4663b6a667f582ed8be7e60fef7157534dd7
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcf13622c365cefe6bc7f685fad7d20f552bf6b29003b54dcd82de9dabdc3805
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e6c9656642a5444d90acfa6f0daa4c79a3e390288297eeb1abf064e503f5ab0
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f0485b8a1b36390a6785f6d93b6e361d6a74346cc48938a91ac1c35ef15235b
3
+ size 791576546
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0517ce73a108585cff98eeb02bb4f4f800a287e04dd52f03abba4325f2a381e
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1756b6cdf77c2928be961e35efda166cc775b9055a05d892a15d0694cd5990d3
3
+ size 791576546
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3fdb5458b09254ba799a8103f3bdfeaae78cf8e16ddac228e526bb1887f33ba
3
+ size 791576546
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/1_trainer_state.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 97,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020618556701030927,
13
+ "grad_norm": 4.060997486114502,
14
+ "learning_rate": 2e-05,
15
+ "loss": 1.5594,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.041237113402061855,
20
+ "grad_norm": 2.254009962081909,
21
+ "learning_rate": 2e-05,
22
+ "loss": 2.394,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.061855670103092786,
27
+ "grad_norm": 1.4530246257781982,
28
+ "learning_rate": 2e-05,
29
+ "loss": 1.1717,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08247422680412371,
34
+ "grad_norm": 0.869075357913971,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.6181,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10309278350515463,
41
+ "grad_norm": 0.40417155623435974,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.7947,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12371134020618557,
48
+ "grad_norm": 0.7273326516151428,
49
+ "learning_rate": 2e-05,
50
+ "loss": 1.2993,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14432989690721648,
55
+ "grad_norm": 0.7000077366828918,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.9332,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16494845360824742,
62
+ "grad_norm": 0.7522069811820984,
63
+ "learning_rate": 2e-05,
64
+ "loss": 1.0824,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18556701030927836,
69
+ "grad_norm": 0.9031182527542114,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.9363,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.20618556701030927,
76
+ "grad_norm": 1.0295480489730835,
77
+ "learning_rate": 2e-05,
78
+ "loss": 1.4712,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2268041237113402,
83
+ "grad_norm": 0.9623363614082336,
84
+ "learning_rate": 2e-05,
85
+ "loss": 1.1707,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24742268041237114,
90
+ "grad_norm": 0.3788335621356964,
91
+ "learning_rate": 2e-05,
92
+ "loss": 1.1249,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26804123711340205,
97
+ "grad_norm": 0.9554972648620605,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.77,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28865979381443296,
104
+ "grad_norm": 3.608424425125122,
105
+ "learning_rate": 2e-05,
106
+ "loss": 1.8768,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.30927835051546393,
111
+ "grad_norm": 1.3351783752441406,
112
+ "learning_rate": 2e-05,
113
+ "loss": 1.1167,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32989690721649484,
118
+ "grad_norm": 0.9246235489845276,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.5636,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.35051546391752575,
125
+ "grad_norm": 0.8108372688293457,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.6283,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3711340206185567,
132
+ "grad_norm": 2.429359197616577,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.6775,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.3917525773195876,
139
+ "grad_norm": 1.0958515405654907,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.6208,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.41237113402061853,
146
+ "grad_norm": 0.9364616870880127,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.9213,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.4329896907216495,
153
+ "grad_norm": 2.9376447200775146,
154
+ "learning_rate": 2e-05,
155
+ "loss": 1.0253,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4536082474226804,
160
+ "grad_norm": 0.9935062527656555,
161
+ "learning_rate": 2e-05,
162
+ "loss": 1.875,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.4742268041237113,
167
+ "grad_norm": 0.5480442643165588,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.5909,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4948453608247423,
174
+ "grad_norm": 0.5784210562705994,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.8151,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5154639175257731,
181
+ "grad_norm": 1.056527018547058,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.7226,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5360824742268041,
188
+ "grad_norm": 0.8988001346588135,
189
+ "learning_rate": 2e-05,
190
+ "loss": 1.1636,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5567010309278351,
195
+ "grad_norm": 3.2083346843719482,
196
+ "learning_rate": 2e-05,
197
+ "loss": 1.0256,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.5773195876288659,
202
+ "grad_norm": 1.1363922357559204,
203
+ "learning_rate": 2e-05,
204
+ "loss": 1.3127,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5979381443298969,
209
+ "grad_norm": 3.8423314094543457,
210
+ "learning_rate": 2e-05,
211
+ "loss": 1.725,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6185567010309279,
216
+ "grad_norm": 1.043357253074646,
217
+ "learning_rate": 2e-05,
218
+ "loss": 1.1183,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6391752577319587,
223
+ "grad_norm": 3.4112141132354736,
224
+ "learning_rate": 2e-05,
225
+ "loss": 1.4553,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6597938144329897,
230
+ "grad_norm": 0.9900936484336853,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.6987,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.6804123711340206,
237
+ "grad_norm": 1.8460667133331299,
238
+ "learning_rate": 2e-05,
239
+ "loss": 1.2903,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7010309278350515,
244
+ "grad_norm": 0.4582524299621582,
245
+ "learning_rate": 2e-05,
246
+ "loss": 1.0396,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7216494845360825,
251
+ "grad_norm": 1.4106889963150024,
252
+ "learning_rate": 2e-05,
253
+ "loss": 1.1477,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.7422680412371134,
258
+ "grad_norm": 1.5957890748977661,
259
+ "learning_rate": 2e-05,
260
+ "loss": 1.1479,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.7628865979381443,
265
+ "grad_norm": 0.9033636450767517,
266
+ "learning_rate": 2e-05,
267
+ "loss": 1.3359,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.7835051546391752,
272
+ "grad_norm": 0.7324110269546509,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.7437,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8041237113402062,
279
+ "grad_norm": 1.1791794300079346,
280
+ "learning_rate": 2e-05,
281
+ "loss": 1.2022,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8247422680412371,
286
+ "grad_norm": 0.4742436707019806,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.7892,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.845360824742268,
293
+ "grad_norm": 1.18353271484375,
294
+ "learning_rate": 2e-05,
295
+ "loss": 1.077,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.865979381443299,
300
+ "grad_norm": 0.9750029444694519,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.9788,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.8865979381443299,
307
+ "grad_norm": 2.6100590229034424,
308
+ "learning_rate": 2e-05,
309
+ "loss": 1.4546,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9072164948453608,
314
+ "grad_norm": 0.9122608304023743,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.6003,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9278350515463918,
321
+ "grad_norm": 1.024173617362976,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.7366,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.9484536082474226,
328
+ "grad_norm": 0.7152318954467773,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.5065,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.9690721649484536,
335
+ "grad_norm": 1.7300605773925781,
336
+ "learning_rate": 2e-05,
337
+ "loss": 1.4227,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.9896907216494846,
342
+ "grad_norm": 1.2942312955856323,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.9952,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "step": 97,
350
+ "total_flos": 1.3522822525616128e+16,
351
+ "train_loss": 1.072485186390041,
352
+ "train_runtime": 318.3533,
353
+ "train_samples_per_second": 1.219,
354
+ "train_steps_per_second": 0.305
355
+ }
356
+ ],
357
+ "logging_steps": 2,
358
+ "max_steps": 97,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 1,
361
+ "save_steps": 500,
362
+ "stateful_callbacks": {
363
+ "TrainerControl": {
364
+ "args": {
365
+ "should_epoch_stop": false,
366
+ "should_evaluate": false,
367
+ "should_log": false,
368
+ "should_save": false,
369
+ "should_training_stop": false
370
+ },
371
+ "attributes": {}
372
+ }
373
+ },
374
+ "total_flos": 1.3522822525616128e+16,
375
+ "train_batch_size": 1,
376
+ "trial_name": null,
377
+ "trial_params": null
378
+ }
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:502b1f2466482d60ae4368748bd2bbb748c1eef0763cc3198c7c6693b693cda2
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e029bd4f689e14cf52f62173c28505256fdf4c36180fcbd2dce37e046ed0aea4
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bae73bf0f0ce33ecd007f09d6bc20372df66c432233a979da77558ee0562266
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6efdd67bba7b9f92913a0a5f78a80c0d2f37330882a17c1d4c937e8821b62c1
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:680cc48edce4581b07b94077943e4d9e9f8e437962c27221e4f5579517ee781a
3
+ size 791576546
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95325cb63bcc0510e8cdf48f2026637a13810fa472c8d4fa373deebf0c544b94
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7742c3fe0f7d965875edeb90a118edb19499505b66dc7526653285401f38a4d
3
+ size 791576546
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04669bd514938a07c8c573fd80110f73a858b311023e7a52be912ff48f82d6bb
3
+ size 791576546
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/2_trainer_state.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 97,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020618556701030927,
13
+ "grad_norm": 2.7177300453186035,
14
+ "learning_rate": 2e-05,
15
+ "loss": 1.0926,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.041237113402061855,
20
+ "grad_norm": 1.6555100679397583,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.6273,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.061855670103092786,
27
+ "grad_norm": 4.007697105407715,
28
+ "learning_rate": 2e-05,
29
+ "loss": 2.815,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08247422680412371,
34
+ "grad_norm": 1.5810902118682861,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.7118,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10309278350515463,
41
+ "grad_norm": 2.4874520301818848,
42
+ "learning_rate": 2e-05,
43
+ "loss": 1.5025,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12371134020618557,
48
+ "grad_norm": 1.162537693977356,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.3331,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14432989690721648,
55
+ "grad_norm": 3.231822967529297,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.7719,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16494845360824742,
62
+ "grad_norm": 0.9158895015716553,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.6472,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18556701030927836,
69
+ "grad_norm": 2.0289742946624756,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.7641,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.20618556701030927,
76
+ "grad_norm": 3.32369327545166,
77
+ "learning_rate": 2e-05,
78
+ "loss": 1.0675,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2268041237113402,
83
+ "grad_norm": 1.4045751094818115,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.9268,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24742268041237114,
90
+ "grad_norm": 0.6414214968681335,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.5625,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26804123711340205,
97
+ "grad_norm": 2.570258140563965,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.7383,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28865979381443296,
104
+ "grad_norm": 1.839545488357544,
105
+ "learning_rate": 2e-05,
106
+ "loss": 1.7098,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.30927835051546393,
111
+ "grad_norm": 1.3686957359313965,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.596,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32989690721649484,
118
+ "grad_norm": 1.3215398788452148,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.3943,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.35051546391752575,
125
+ "grad_norm": 1.1492276191711426,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.8084,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3711340206185567,
132
+ "grad_norm": 0.21739822626113892,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.5349,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.3917525773195876,
139
+ "grad_norm": 0.766031801700592,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.2763,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.41237113402061853,
146
+ "grad_norm": 1.4358357191085815,
147
+ "learning_rate": 2e-05,
148
+ "loss": 1.0408,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.4329896907216495,
153
+ "grad_norm": 8.695076942443848,
154
+ "learning_rate": 2e-05,
155
+ "loss": 2.4291,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4536082474226804,
160
+ "grad_norm": 1.2179341316223145,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.5597,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.4742268041237113,
167
+ "grad_norm": 2.2541961669921875,
168
+ "learning_rate": 2e-05,
169
+ "loss": 2.0812,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4948453608247423,
174
+ "grad_norm": 1.579424500465393,
175
+ "learning_rate": 2e-05,
176
+ "loss": 1.7606,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5154639175257731,
181
+ "grad_norm": 1.6125601530075073,
182
+ "learning_rate": 2e-05,
183
+ "loss": 1.0159,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5360824742268041,
188
+ "grad_norm": 0.2814699709415436,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.3651,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5567010309278351,
195
+ "grad_norm": 2.234740734100342,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.846,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.5773195876288659,
202
+ "grad_norm": 3.462404489517212,
203
+ "learning_rate": 2e-05,
204
+ "loss": 1.4232,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5979381443298969,
209
+ "grad_norm": 1.109181523323059,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.7863,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6185567010309279,
216
+ "grad_norm": 2.8189926147460938,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.6734,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6391752577319587,
223
+ "grad_norm": 1.4701120853424072,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.7984,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6597938144329897,
230
+ "grad_norm": 1.5160466432571411,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.8513,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.6804123711340206,
237
+ "grad_norm": 1.2065556049346924,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.4933,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7010309278350515,
244
+ "grad_norm": 0.5657169818878174,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.3078,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7216494845360825,
251
+ "grad_norm": 5.390564441680908,
252
+ "learning_rate": 2e-05,
253
+ "loss": 2.9097,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.7422680412371134,
258
+ "grad_norm": 1.6399439573287964,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.8537,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.7628865979381443,
265
+ "grad_norm": 0.35270068049430847,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.2654,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.7835051546391752,
272
+ "grad_norm": 5.137968063354492,
273
+ "learning_rate": 2e-05,
274
+ "loss": 1.5669,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8041237113402062,
279
+ "grad_norm": 1.5322946310043335,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.874,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8247422680412371,
286
+ "grad_norm": 2.5663018226623535,
287
+ "learning_rate": 2e-05,
288
+ "loss": 1.0716,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.845360824742268,
293
+ "grad_norm": 3.668062925338745,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.6671,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.865979381443299,
300
+ "grad_norm": 0.857315182685852,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.3661,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.8865979381443299,
307
+ "grad_norm": 0.98568195104599,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.67,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9072164948453608,
314
+ "grad_norm": 0.703881561756134,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.4057,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9278350515463918,
321
+ "grad_norm": 1.4584002494812012,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.5015,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.9484536082474226,
328
+ "grad_norm": 2.263429880142212,
329
+ "learning_rate": 2e-05,
330
+ "loss": 1.0602,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.9690721649484536,
335
+ "grad_norm": 3.9899234771728516,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.708,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.9896907216494846,
342
+ "grad_norm": 2.3671796321868896,
343
+ "learning_rate": 2e-05,
344
+ "loss": 1.3029,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "step": 97,
350
+ "total_flos": 1.5329114357497856e+16,
351
+ "train_loss": 0.9428910275095517,
352
+ "train_runtime": 354.7806,
353
+ "train_samples_per_second": 1.094,
354
+ "train_steps_per_second": 0.273
355
+ }
356
+ ],
357
+ "logging_steps": 2,
358
+ "max_steps": 97,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 1,
361
+ "save_steps": 500,
362
+ "stateful_callbacks": {
363
+ "TrainerControl": {
364
+ "args": {
365
+ "should_epoch_stop": false,
366
+ "should_evaluate": false,
367
+ "should_log": false,
368
+ "should_save": false,
369
+ "should_training_stop": false
370
+ },
371
+ "attributes": {}
372
+ }
373
+ },
374
+ "total_flos": 1.5329114357497856e+16,
375
+ "train_batch_size": 1,
376
+ "trial_name": null,
377
+ "trial_params": null
378
+ }
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cedddde0d3b47f691a44fb56df8c587e70ec3e2d98120c9fef229d4e305135b3
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b935a7315d7aba3f970d2155591a7c025d5e00762112fe9047b296da732a3b5
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:925479cb7b9cfbe13025d105ef9e24aa527cc08e95edb4a5d57f2b34856d503d
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66d397dbb4d650578c2060e73c064390a986d117f67a0fed4b60758c4ec0bd7d
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b453f3c79b662bca0906bc6969023a652f1fd620cf5b08fc3ef0e5e309f5b990
3
+ size 791576546
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a92e1016a632bfe164e293aeb7642d2dfbae7090da04ac409a8a7c4e00fc4226
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:222f280a081854656a1d7ef5f4fb2683f19a421ee895ac0cdae8b26f64893669
3
+ size 791576546
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77bfb8ce9a4a7531fec21d951852d3dbfc5b9546e416285abdd478013d712647
3
+ size 791576546
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/3_trainer_state.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 97,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020618556701030927,
13
+ "grad_norm": 0.7246163487434387,
14
+ "learning_rate": 2e-05,
15
+ "loss": 1.7324,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.041237113402061855,
20
+ "grad_norm": 0.5284512042999268,
21
+ "learning_rate": 2e-05,
22
+ "loss": 2.0166,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.061855670103092786,
27
+ "grad_norm": 0.49521467089653015,
28
+ "learning_rate": 2e-05,
29
+ "loss": 1.3906,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08247422680412371,
34
+ "grad_norm": 0.5752606987953186,
35
+ "learning_rate": 2e-05,
36
+ "loss": 1.1865,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10309278350515463,
41
+ "grad_norm": 0.5735695362091064,
42
+ "learning_rate": 2e-05,
43
+ "loss": 1.6631,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12371134020618557,
48
+ "grad_norm": 0.35247695446014404,
49
+ "learning_rate": 2e-05,
50
+ "loss": 1.5771,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14432989690721648,
55
+ "grad_norm": 0.7416431903839111,
56
+ "learning_rate": 2e-05,
57
+ "loss": 1.45,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16494845360824742,
62
+ "grad_norm": 0.8636322021484375,
63
+ "learning_rate": 2e-05,
64
+ "loss": 1.3813,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18556701030927836,
69
+ "grad_norm": 0.6410114765167236,
70
+ "learning_rate": 2e-05,
71
+ "loss": 1.6523,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.20618556701030927,
76
+ "grad_norm": 0.35148540139198303,
77
+ "learning_rate": 2e-05,
78
+ "loss": 1.5771,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2268041237113402,
83
+ "grad_norm": 0.5244173407554626,
84
+ "learning_rate": 2e-05,
85
+ "loss": 1.5553,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24742268041237114,
90
+ "grad_norm": 0.6992729306221008,
91
+ "learning_rate": 2e-05,
92
+ "loss": 1.7803,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26804123711340205,
97
+ "grad_norm": 0.430820494890213,
98
+ "learning_rate": 2e-05,
99
+ "loss": 1.6299,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28865979381443296,
104
+ "grad_norm": 0.4983491003513336,
105
+ "learning_rate": 2e-05,
106
+ "loss": 1.321,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.30927835051546393,
111
+ "grad_norm": 0.45274442434310913,
112
+ "learning_rate": 2e-05,
113
+ "loss": 1.4834,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32989690721649484,
118
+ "grad_norm": 0.653442919254303,
119
+ "learning_rate": 2e-05,
120
+ "loss": 1.5889,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.35051546391752575,
125
+ "grad_norm": 0.42392614483833313,
126
+ "learning_rate": 2e-05,
127
+ "loss": 1.3359,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3711340206185567,
132
+ "grad_norm": 0.49543508887290955,
133
+ "learning_rate": 2e-05,
134
+ "loss": 1.7627,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.3917525773195876,
139
+ "grad_norm": 0.4652840793132782,
140
+ "learning_rate": 2e-05,
141
+ "loss": 1.4604,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.41237113402061853,
146
+ "grad_norm": 0.6317560076713562,
147
+ "learning_rate": 2e-05,
148
+ "loss": 1.6349,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.4329896907216495,
153
+ "grad_norm": 0.5806442499160767,
154
+ "learning_rate": 2e-05,
155
+ "loss": 1.6519,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4536082474226804,
160
+ "grad_norm": 0.568714439868927,
161
+ "learning_rate": 2e-05,
162
+ "loss": 1.4482,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.4742268041237113,
167
+ "grad_norm": 0.6616501808166504,
168
+ "learning_rate": 2e-05,
169
+ "loss": 1.5454,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4948453608247423,
174
+ "grad_norm": 1.0585500001907349,
175
+ "learning_rate": 2e-05,
176
+ "loss": 1.6943,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5154639175257731,
181
+ "grad_norm": 0.5242922306060791,
182
+ "learning_rate": 2e-05,
183
+ "loss": 1.5273,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5360824742268041,
188
+ "grad_norm": 0.6620081067085266,
189
+ "learning_rate": 2e-05,
190
+ "loss": 1.6641,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5567010309278351,
195
+ "grad_norm": 0.8729976415634155,
196
+ "learning_rate": 2e-05,
197
+ "loss": 1.3711,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.5773195876288659,
202
+ "grad_norm": 0.5533149838447571,
203
+ "learning_rate": 2e-05,
204
+ "loss": 1.5024,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5979381443298969,
209
+ "grad_norm": 0.6843786239624023,
210
+ "learning_rate": 2e-05,
211
+ "loss": 1.221,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6185567010309279,
216
+ "grad_norm": 0.6899279952049255,
217
+ "learning_rate": 2e-05,
218
+ "loss": 1.6211,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6391752577319587,
223
+ "grad_norm": 0.8329319357872009,
224
+ "learning_rate": 2e-05,
225
+ "loss": 1.4404,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6597938144329897,
230
+ "grad_norm": 0.557767391204834,
231
+ "learning_rate": 2e-05,
232
+ "loss": 1.4336,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.6804123711340206,
237
+ "grad_norm": 0.34819966554641724,
238
+ "learning_rate": 2e-05,
239
+ "loss": 1.3477,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7010309278350515,
244
+ "grad_norm": 0.7103057503700256,
245
+ "learning_rate": 2e-05,
246
+ "loss": 1.5186,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7216494845360825,
251
+ "grad_norm": 0.5542501211166382,
252
+ "learning_rate": 2e-05,
253
+ "loss": 1.4287,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.7422680412371134,
258
+ "grad_norm": 0.5763942003250122,
259
+ "learning_rate": 2e-05,
260
+ "loss": 1.1259,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.7628865979381443,
265
+ "grad_norm": 0.7207664847373962,
266
+ "learning_rate": 2e-05,
267
+ "loss": 1.3911,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.7835051546391752,
272
+ "grad_norm": 0.3724093437194824,
273
+ "learning_rate": 2e-05,
274
+ "loss": 1.0917,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8041237113402062,
279
+ "grad_norm": 0.7380734086036682,
280
+ "learning_rate": 2e-05,
281
+ "loss": 1.4736,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8247422680412371,
286
+ "grad_norm": 0.7758538722991943,
287
+ "learning_rate": 2e-05,
288
+ "loss": 1.9409,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.845360824742268,
293
+ "grad_norm": 1.419058084487915,
294
+ "learning_rate": 2e-05,
295
+ "loss": 1.0337,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.865979381443299,
300
+ "grad_norm": 1.1800168752670288,
301
+ "learning_rate": 2e-05,
302
+ "loss": 1.5811,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.8865979381443299,
307
+ "grad_norm": 1.0631232261657715,
308
+ "learning_rate": 2e-05,
309
+ "loss": 1.6145,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9072164948453608,
314
+ "grad_norm": 0.35026878118515015,
315
+ "learning_rate": 2e-05,
316
+ "loss": 1.313,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9278350515463918,
321
+ "grad_norm": 1.1856135129928589,
322
+ "learning_rate": 2e-05,
323
+ "loss": 1.0017,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.9484536082474226,
328
+ "grad_norm": 0.42485421895980835,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.7556,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.9690721649484536,
335
+ "grad_norm": 1.0420420169830322,
336
+ "learning_rate": 2e-05,
337
+ "loss": 1.2177,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.9896907216494846,
342
+ "grad_norm": 3.806156873703003,
343
+ "learning_rate": 2e-05,
344
+ "loss": 1.3557,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "step": 97,
350
+ "total_flos": 2.080121124238131e+16,
351
+ "train_loss": 1.4653891337286566,
352
+ "train_runtime": 338.7153,
353
+ "train_samples_per_second": 1.146,
354
+ "train_steps_per_second": 0.286
355
+ }
356
+ ],
357
+ "logging_steps": 2,
358
+ "max_steps": 97,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 1,
361
+ "save_steps": 500,
362
+ "stateful_callbacks": {
363
+ "TrainerControl": {
364
+ "args": {
365
+ "should_epoch_stop": false,
366
+ "should_evaluate": false,
367
+ "should_log": false,
368
+ "should_save": false,
369
+ "should_training_stop": false
370
+ },
371
+ "attributes": {}
372
+ }
373
+ },
374
+ "total_flos": 2.080121124238131e+16,
375
+ "train_batch_size": 1,
376
+ "trial_name": null,
377
+ "trial_params": null
378
+ }
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78497632d8d36d7c8168d50e50a254157fc769f9322543ee181269094d28c724
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:162cde5be279d581b1f83ee039cc749ade6ab4ab374af926bad62c033d1dfba4
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97d4a68f4353dabd81f17a777377c46d55712b0b65c491aa7fbfdbbbdf077a66
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7738f10699765a7bc84949fc0529653275795f44b59cfcc9ca3fcedaea62efcb
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fec5310decb3f5a8a11e6204443c2e3c9371d7d258ede1f21b69d71da6ae563
3
+ size 791576546
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcb887b29c54ad36b3482c04658bef8c0c28760138560e9ef4edcd885bf7c3b8
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04ac06241cdb5fb3b5ea1ce74c2d4a77507ba600b8cb9b1ee6f62ef1e75be7b7
3
+ size 791576546
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f8f0fe1e2e6495fc31e76e3cb7514d92d767bdc152b3aa1e3379ff7f8810311
3
+ size 791576546
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/4_trainer_state.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 97,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020618556701030927,
13
+ "grad_norm": 1.5516194105148315,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.7183,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.041237113402061855,
20
+ "grad_norm": 1.6191949844360352,
21
+ "learning_rate": 2e-05,
22
+ "loss": 1.0957,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.061855670103092786,
27
+ "grad_norm": 1.2471156120300293,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.7448,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08247422680412371,
34
+ "grad_norm": 3.331115484237671,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.8217,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10309278350515463,
41
+ "grad_norm": 4.650871276855469,
42
+ "learning_rate": 2e-05,
43
+ "loss": 3.3426,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12371134020618557,
48
+ "grad_norm": 3.3392367362976074,
49
+ "learning_rate": 2e-05,
50
+ "loss": 1.2304,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14432989690721648,
55
+ "grad_norm": 0.18230539560317993,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.8133,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16494845360824742,
62
+ "grad_norm": 1.041199803352356,
63
+ "learning_rate": 2e-05,
64
+ "loss": 1.1371,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18556701030927836,
69
+ "grad_norm": 0.7668745517730713,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.9596,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.20618556701030927,
76
+ "grad_norm": 2.2554526329040527,
77
+ "learning_rate": 2e-05,
78
+ "loss": 1.6985,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2268041237113402,
83
+ "grad_norm": 0.37597593665122986,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.9554,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24742268041237114,
90
+ "grad_norm": 1.1484178304672241,
91
+ "learning_rate": 2e-05,
92
+ "loss": 1.4816,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26804123711340205,
97
+ "grad_norm": 0.9884140491485596,
98
+ "learning_rate": 2e-05,
99
+ "loss": 1.0887,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28865979381443296,
104
+ "grad_norm": 2.5340919494628906,
105
+ "learning_rate": 2e-05,
106
+ "loss": 1.1622,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.30927835051546393,
111
+ "grad_norm": 3.164740562438965,
112
+ "learning_rate": 2e-05,
113
+ "loss": 2.0013,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32989690721649484,
118
+ "grad_norm": 1.7155555486679077,
119
+ "learning_rate": 2e-05,
120
+ "loss": 1.2095,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.35051546391752575,
125
+ "grad_norm": 0.952375054359436,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.8851,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3711340206185567,
132
+ "grad_norm": 2.5268073081970215,
133
+ "learning_rate": 2e-05,
134
+ "loss": 1.4144,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.3917525773195876,
139
+ "grad_norm": 3.838146924972534,
140
+ "learning_rate": 2e-05,
141
+ "loss": 1.3506,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.41237113402061853,
146
+ "grad_norm": 1.9546507596969604,
147
+ "learning_rate": 2e-05,
148
+ "loss": 1.6561,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.4329896907216495,
153
+ "grad_norm": 1.979039192199707,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.8316,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4536082474226804,
160
+ "grad_norm": 1.025802731513977,
161
+ "learning_rate": 2e-05,
162
+ "loss": 1.9942,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.4742268041237113,
167
+ "grad_norm": 0.5559583902359009,
168
+ "learning_rate": 2e-05,
169
+ "loss": 1.0033,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4948453608247423,
174
+ "grad_norm": 0.6625217199325562,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.7507,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5154639175257731,
181
+ "grad_norm": 4.655178070068359,
182
+ "learning_rate": 2e-05,
183
+ "loss": 1.1542,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5360824742268041,
188
+ "grad_norm": 0.9145403504371643,
189
+ "learning_rate": 2e-05,
190
+ "loss": 1.3025,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5567010309278351,
195
+ "grad_norm": 1.8665916919708252,
196
+ "learning_rate": 2e-05,
197
+ "loss": 1.5428,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.5773195876288659,
202
+ "grad_norm": 1.5650798082351685,
203
+ "learning_rate": 2e-05,
204
+ "loss": 1.4144,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5979381443298969,
209
+ "grad_norm": 2.973200559616089,
210
+ "learning_rate": 2e-05,
211
+ "loss": 1.6578,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6185567010309279,
216
+ "grad_norm": 2.3732833862304688,
217
+ "learning_rate": 2e-05,
218
+ "loss": 1.0927,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6391752577319587,
223
+ "grad_norm": 1.011518120765686,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.7655,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6597938144329897,
230
+ "grad_norm": 2.6628100872039795,
231
+ "learning_rate": 2e-05,
232
+ "loss": 1.0994,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.6804123711340206,
237
+ "grad_norm": 1.2325425148010254,
238
+ "learning_rate": 2e-05,
239
+ "loss": 1.7863,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7010309278350515,
244
+ "grad_norm": 0.7516754865646362,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.6101,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7216494845360825,
251
+ "grad_norm": 0.32088345289230347,
252
+ "learning_rate": 2e-05,
253
+ "loss": 1.3487,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.7422680412371134,
258
+ "grad_norm": 1.3543070554733276,
259
+ "learning_rate": 2e-05,
260
+ "loss": 1.4045,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.7628865979381443,
265
+ "grad_norm": 2.9448294639587402,
266
+ "learning_rate": 2e-05,
267
+ "loss": 1.6153,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.7835051546391752,
272
+ "grad_norm": 1.3423410654067993,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.9661,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8041237113402062,
279
+ "grad_norm": 1.4557067155838013,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.5707,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8247422680412371,
286
+ "grad_norm": 0.8707636594772339,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.7858,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.845360824742268,
293
+ "grad_norm": 0.8935154676437378,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.9482,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.865979381443299,
300
+ "grad_norm": 1.1275886297225952,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.8253,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.8865979381443299,
307
+ "grad_norm": 1.0190774202346802,
308
+ "learning_rate": 2e-05,
309
+ "loss": 1.3923,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9072164948453608,
314
+ "grad_norm": 2.239851236343384,
315
+ "learning_rate": 2e-05,
316
+ "loss": 1.8538,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9278350515463918,
321
+ "grad_norm": 0.48686838150024414,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.5748,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.9484536082474226,
328
+ "grad_norm": 2.387908458709717,
329
+ "learning_rate": 2e-05,
330
+ "loss": 1.2788,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.9690721649484536,
335
+ "grad_norm": 1.8753933906555176,
336
+ "learning_rate": 2e-05,
337
+ "loss": 2.16,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.9896907216494846,
342
+ "grad_norm": 1.7403647899627686,
343
+ "learning_rate": 2e-05,
344
+ "loss": 1.7309,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "step": 97,
350
+ "total_flos": 1.975033480085504e+16,
351
+ "train_loss": 1.2463067905190064,
352
+ "train_runtime": 346.1883,
353
+ "train_samples_per_second": 1.121,
354
+ "train_steps_per_second": 0.28
355
+ }
356
+ ],
357
+ "logging_steps": 2,
358
+ "max_steps": 97,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 1,
361
+ "save_steps": 500,
362
+ "stateful_callbacks": {
363
+ "TrainerControl": {
364
+ "args": {
365
+ "should_epoch_stop": false,
366
+ "should_evaluate": false,
367
+ "should_log": false,
368
+ "should_save": false,
369
+ "should_training_stop": false
370
+ },
371
+ "attributes": {}
372
+ }
373
+ },
374
+ "total_flos": 1.975033480085504e+16,
375
+ "train_batch_size": 1,
376
+ "trial_name": null,
377
+ "trial_params": null
378
+ }
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9cb7c5fddaca66205242d49af22fb676315f063b1cc2418d18fa177390924f2
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46eade7f1fea0f1e68e3363d9a8e4ff52e5b7305d34a81ea1379557122759d20
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b086fbc699e0302ee4ae254a9237a6f8d5448810e5885c35f09b7692fb3ba4ee
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e663701e0f36fb7a017f31c1bc0702e47930c84858f19bff02e2417a980ea566
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d332bf7bdd3e9cae1a94a5236886a591c59c6ce26033bc733ef93822982945ba
3
+ size 791576546