thkim0305 commited on
Commit
8df2c01
·
verified ·
1 Parent(s): f1cc9e0

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round10.pth +3 -0
  2. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round12.pth +3 -0
  3. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round15.pth +3 -0
  4. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round17.pth +3 -0
  5. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round2.pth +3 -0
  6. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round20.pth +3 -0
  7. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round5.pth +3 -0
  8. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round7.pth +3 -0
  9. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_trainer_state.json +378 -0
  10. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round10.pth +3 -0
  11. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round12.pth +3 -0
  12. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round15.pth +3 -0
  13. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round17.pth +3 -0
  14. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round2.pth +3 -0
  15. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round20.pth +3 -0
  16. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round5.pth +3 -0
  17. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round7.pth +3 -0
  18. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_trainer_state.json +378 -0
  19. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round10.pth +3 -0
  20. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round12.pth +3 -0
  21. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round15.pth +3 -0
  22. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round17.pth +3 -0
  23. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round2.pth +3 -0
  24. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round20.pth +3 -0
  25. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round5.pth +3 -0
  26. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round7.pth +3 -0
  27. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_trainer_state.json +378 -0
  28. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round10.pth +3 -0
  29. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round12.pth +3 -0
  30. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round15.pth +3 -0
  31. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round17.pth +3 -0
  32. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round2.pth +3 -0
  33. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round20.pth +3 -0
  34. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round5.pth +3 -0
  35. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round7.pth +3 -0
  36. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_trainer_state.json +378 -0
  37. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round10.pth +3 -0
  38. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round12.pth +3 -0
  39. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round15.pth +3 -0
  40. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round17.pth +3 -0
  41. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round2.pth +3 -0
  42. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round20.pth +3 -0
  43. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round5.pth +3 -0
  44. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round7.pth +3 -0
  45. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_trainer_state.json +378 -0
  46. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/5_client_model_round10.pth +3 -0
  47. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/5_client_model_round12.pth +3 -0
  48. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/5_client_model_round15.pth +3 -0
  49. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/5_client_model_round17.pth +3 -0
  50. client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/5_client_model_round2.pth +3 -0
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b16768f7dc2eae664e5fd275f1c0e4a05c9147bb79c0c4b402a18aeaaf8375d
3
+ size 368443438
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebe47cb1aefad62c1fd613c76140e6267ba060f1c514591486cf4468523c5b41
3
+ size 368443438
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f4e565288b1dd9e16440afea7c377fea6e8c2b86a937030cd568bd120fd038f
3
+ size 368443438
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1d356437d08bccdd3ce361ad9057428bcf4cae911a507b8332d938f41f5dc74
3
+ size 368443438
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e98f5737926eedec558b34d9e5d045c45ea50e7ff4c68699fa210aaaf5120a9
3
+ size 368442474
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15aa028b6cced7cfa1cde28f6be8395d18b370d33f5961c4652916168c869da9
3
+ size 368443438
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:421f88eb88ff09a6f10c9c0caf03a8665de01d152db170828eca4be38fb91eac
3
+ size 368442474
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:525f0f37c26b21e07d6e9178e77c0ef29b869daafb690f58a885a4a1402ea584
3
+ size 368442474
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_trainer_state.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 97,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020618556701030927,
13
+ "grad_norm": 4.400585174560547,
14
+ "learning_rate": 2e-05,
15
+ "loss": 2.1084,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.041237113402061855,
20
+ "grad_norm": 0.24108143150806427,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.5575,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.061855670103092786,
27
+ "grad_norm": 2.3387579917907715,
28
+ "learning_rate": 2e-05,
29
+ "loss": 1.3726,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08247422680412371,
34
+ "grad_norm": 3.3745932579040527,
35
+ "learning_rate": 2e-05,
36
+ "loss": 1.0676,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10309278350515463,
41
+ "grad_norm": 0.499793142080307,
42
+ "learning_rate": 2e-05,
43
+ "loss": 1.5278,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12371134020618557,
48
+ "grad_norm": 1.0053821802139282,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.4909,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14432989690721648,
55
+ "grad_norm": 1.3840383291244507,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.9338,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16494845360824742,
62
+ "grad_norm": 2.917617082595825,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.3054,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18556701030927836,
69
+ "grad_norm": 1.6473109722137451,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.8086,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.20618556701030927,
76
+ "grad_norm": 1.5606623888015747,
77
+ "learning_rate": 2e-05,
78
+ "loss": 1.9234,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2268041237113402,
83
+ "grad_norm": 0.5007361173629761,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.7782,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24742268041237114,
90
+ "grad_norm": 1.6239376068115234,
91
+ "learning_rate": 2e-05,
92
+ "loss": 1.1194,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26804123711340205,
97
+ "grad_norm": 2.372718572616577,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.4856,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28865979381443296,
104
+ "grad_norm": 1.8457813262939453,
105
+ "learning_rate": 2e-05,
106
+ "loss": 1.3728,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.30927835051546393,
111
+ "grad_norm": 0.855133593082428,
112
+ "learning_rate": 2e-05,
113
+ "loss": 2.0313,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32989690721649484,
118
+ "grad_norm": 1.2432137727737427,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.4426,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.35051546391752575,
125
+ "grad_norm": 1.4432425498962402,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.7053,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3711340206185567,
132
+ "grad_norm": 1.024395227432251,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.6134,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.3917525773195876,
139
+ "grad_norm": 1.194606900215149,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.8373,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.41237113402061853,
146
+ "grad_norm": 3.11031174659729,
147
+ "learning_rate": 2e-05,
148
+ "loss": 2.5497,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.4329896907216495,
153
+ "grad_norm": 1.2926805019378662,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.9482,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4536082474226804,
160
+ "grad_norm": 0.5149866938591003,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.4165,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.4742268041237113,
167
+ "grad_norm": 1.190213680267334,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.9618,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4948453608247423,
174
+ "grad_norm": 0.6894278526306152,
175
+ "learning_rate": 2e-05,
176
+ "loss": 1.0249,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5154639175257731,
181
+ "grad_norm": 2.4215104579925537,
182
+ "learning_rate": 2e-05,
183
+ "loss": 2.0148,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5360824742268041,
188
+ "grad_norm": 0.714414119720459,
189
+ "learning_rate": 2e-05,
190
+ "loss": 1.3428,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5567010309278351,
195
+ "grad_norm": 1.1892106533050537,
196
+ "learning_rate": 2e-05,
197
+ "loss": 1.1607,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.5773195876288659,
202
+ "grad_norm": 1.21503484249115,
203
+ "learning_rate": 2e-05,
204
+ "loss": 1.836,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5979381443298969,
209
+ "grad_norm": 0.6278855800628662,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.844,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6185567010309279,
216
+ "grad_norm": 2.5338757038116455,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.7447,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6391752577319587,
223
+ "grad_norm": 1.6841797828674316,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.8774,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6597938144329897,
230
+ "grad_norm": 0.9872264862060547,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.9218,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.6804123711340206,
237
+ "grad_norm": 1.0775535106658936,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.9773,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7010309278350515,
244
+ "grad_norm": 1.7016096115112305,
245
+ "learning_rate": 2e-05,
246
+ "loss": 1.3366,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7216494845360825,
251
+ "grad_norm": 9.161294937133789,
252
+ "learning_rate": 2e-05,
253
+ "loss": 2.0726,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.7422680412371134,
258
+ "grad_norm": 1.1249581575393677,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.8084,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.7628865979381443,
265
+ "grad_norm": 1.4263134002685547,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.9452,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.7835051546391752,
272
+ "grad_norm": 3.8015453815460205,
273
+ "learning_rate": 2e-05,
274
+ "loss": 1.3966,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8041237113402062,
279
+ "grad_norm": 1.51714026927948,
280
+ "learning_rate": 2e-05,
281
+ "loss": 1.3366,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8247422680412371,
286
+ "grad_norm": 1.001514196395874,
287
+ "learning_rate": 2e-05,
288
+ "loss": 1.2534,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.845360824742268,
293
+ "grad_norm": 0.437701553106308,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.6374,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.865979381443299,
300
+ "grad_norm": 0.8562130928039551,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.8362,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.8865979381443299,
307
+ "grad_norm": 1.1629040241241455,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.6705,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9072164948453608,
314
+ "grad_norm": 0.7516007423400879,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.9896,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9278350515463918,
321
+ "grad_norm": 1.4195737838745117,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.8637,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.9484536082474226,
328
+ "grad_norm": 2.6594505310058594,
329
+ "learning_rate": 2e-05,
330
+ "loss": 1.1106,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.9690721649484536,
335
+ "grad_norm": 0.5046249628067017,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.7569,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.9896907216494846,
342
+ "grad_norm": 2.1814091205596924,
343
+ "learning_rate": 2e-05,
344
+ "loss": 1.1165,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "step": 97,
350
+ "total_flos": 5716895332302848.0,
351
+ "train_loss": 1.0793670944331848,
352
+ "train_runtime": 222.0474,
353
+ "train_samples_per_second": 1.747,
354
+ "train_steps_per_second": 0.437
355
+ }
356
+ ],
357
+ "logging_steps": 2,
358
+ "max_steps": 97,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 1,
361
+ "save_steps": 500,
362
+ "stateful_callbacks": {
363
+ "TrainerControl": {
364
+ "args": {
365
+ "should_epoch_stop": false,
366
+ "should_evaluate": false,
367
+ "should_log": false,
368
+ "should_save": false,
369
+ "should_training_stop": false
370
+ },
371
+ "attributes": {}
372
+ }
373
+ },
374
+ "total_flos": 5716895332302848.0,
375
+ "train_batch_size": 1,
376
+ "trial_name": null,
377
+ "trial_params": null
378
+ }
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b4cf563ddeb6cec914fa98a3339483e303ee40bcfbc8177f2de90ec07fd9f32
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d4e731a0acc1c910df1a066b29fbc699083b9afcfd305403782197d842e12ad
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72086853e3f2e6d801b2bbabd64045d8df56a4e1a9d90762650e9def4016026b
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca8e1391c42f7045fc6ae4257810e9ec6fa9b85ad051a083f1b3a6cc1c13b9e7
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ad99be95eca9ecc8be94e2334e2786ea50b2df0bc5a440a7200405c66c551aa
3
+ size 791576546
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e370b2d3fe5711152b28b41a8df70a261223af0d600ff997c0beee58add2f883
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03b2c620112acbddbfdcaa58a0bcc02827da8b0c3062d0bbb0aacdbfce067764
3
+ size 791576546
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9ad4a35f17fa2fa92dbf9df9678d3ab4ba7f3b43367db0b87142e429f809cd1
3
+ size 791576546
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_trainer_state.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 97,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020618556701030927,
13
+ "grad_norm": 1.0367224216461182,
14
+ "learning_rate": 2e-05,
15
+ "loss": 2.1122,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.041237113402061855,
20
+ "grad_norm": 1.5293935537338257,
21
+ "learning_rate": 2e-05,
22
+ "loss": 2.3105,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.061855670103092786,
27
+ "grad_norm": 1.200186848640442,
28
+ "learning_rate": 2e-05,
29
+ "loss": 1.2933,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08247422680412371,
34
+ "grad_norm": 0.551851749420166,
35
+ "learning_rate": 2e-05,
36
+ "loss": 1.01,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10309278350515463,
41
+ "grad_norm": 1.2025572061538696,
42
+ "learning_rate": 2e-05,
43
+ "loss": 1.6588,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12371134020618557,
48
+ "grad_norm": 0.773492693901062,
49
+ "learning_rate": 2e-05,
50
+ "loss": 1.269,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14432989690721648,
55
+ "grad_norm": 1.1679770946502686,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.9164,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16494845360824742,
62
+ "grad_norm": 0.5977709293365479,
63
+ "learning_rate": 2e-05,
64
+ "loss": 1.4414,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18556701030927836,
69
+ "grad_norm": 1.0373786687850952,
70
+ "learning_rate": 2e-05,
71
+ "loss": 2.1323,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.20618556701030927,
76
+ "grad_norm": 1.6528465747833252,
77
+ "learning_rate": 2e-05,
78
+ "loss": 1.0931,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2268041237113402,
83
+ "grad_norm": 1.2186673879623413,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.9268,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24742268041237114,
90
+ "grad_norm": 0.9843794107437134,
91
+ "learning_rate": 2e-05,
92
+ "loss": 1.4834,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26804123711340205,
97
+ "grad_norm": 1.1736723184585571,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.973,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28865979381443296,
104
+ "grad_norm": 0.9919485449790955,
105
+ "learning_rate": 2e-05,
106
+ "loss": 1.426,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.30927835051546393,
111
+ "grad_norm": 0.3353116512298584,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.3913,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32989690721649484,
118
+ "grad_norm": 0.7836465835571289,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.9893,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.35051546391752575,
125
+ "grad_norm": 0.6000968813896179,
126
+ "learning_rate": 2e-05,
127
+ "loss": 1.1023,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3711340206185567,
132
+ "grad_norm": 1.1263123750686646,
133
+ "learning_rate": 2e-05,
134
+ "loss": 1.2717,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.3917525773195876,
139
+ "grad_norm": 1.4765498638153076,
140
+ "learning_rate": 2e-05,
141
+ "loss": 1.2275,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.41237113402061853,
146
+ "grad_norm": 0.7918884754180908,
147
+ "learning_rate": 2e-05,
148
+ "loss": 1.124,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.4329896907216495,
153
+ "grad_norm": 0.881719708442688,
154
+ "learning_rate": 2e-05,
155
+ "loss": 1.1614,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4536082474226804,
160
+ "grad_norm": 0.5077035427093506,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.7703,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.4742268041237113,
167
+ "grad_norm": 1.0335602760314941,
168
+ "learning_rate": 2e-05,
169
+ "loss": 1.0186,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4948453608247423,
174
+ "grad_norm": 2.6902318000793457,
175
+ "learning_rate": 2e-05,
176
+ "loss": 1.4702,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5154639175257731,
181
+ "grad_norm": 0.48346254229545593,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.7069,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5360824742268041,
188
+ "grad_norm": 0.43713346123695374,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.9541,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5567010309278351,
195
+ "grad_norm": 0.7219210863113403,
196
+ "learning_rate": 2e-05,
197
+ "loss": 1.0328,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.5773195876288659,
202
+ "grad_norm": 0.6687954068183899,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.9612,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5979381443298969,
209
+ "grad_norm": 0.4545569121837616,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.7134,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6185567010309279,
216
+ "grad_norm": 1.4642369747161865,
217
+ "learning_rate": 2e-05,
218
+ "loss": 1.0691,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6391752577319587,
223
+ "grad_norm": 0.866256058216095,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.9705,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6597938144329897,
230
+ "grad_norm": 0.6883927583694458,
231
+ "learning_rate": 2e-05,
232
+ "loss": 1.0837,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.6804123711340206,
237
+ "grad_norm": 0.8896063566207886,
238
+ "learning_rate": 2e-05,
239
+ "loss": 1.2245,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7010309278350515,
244
+ "grad_norm": 1.008226990699768,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.8514,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7216494845360825,
251
+ "grad_norm": 0.5231401324272156,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.8315,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.7422680412371134,
258
+ "grad_norm": 1.1414425373077393,
259
+ "learning_rate": 2e-05,
260
+ "loss": 1.0161,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.7628865979381443,
265
+ "grad_norm": 0.8160688281059265,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.9522,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.7835051546391752,
272
+ "grad_norm": 4.522146224975586,
273
+ "learning_rate": 2e-05,
274
+ "loss": 1.3154,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8041237113402062,
279
+ "grad_norm": 0.8005837798118591,
280
+ "learning_rate": 2e-05,
281
+ "loss": 1.38,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8247422680412371,
286
+ "grad_norm": 0.8043385744094849,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.7023,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.845360824742268,
293
+ "grad_norm": 0.4813634753227234,
294
+ "learning_rate": 2e-05,
295
+ "loss": 1.2706,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.865979381443299,
300
+ "grad_norm": 2.6377651691436768,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.7818,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.8865979381443299,
307
+ "grad_norm": 0.9884634017944336,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.8094,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9072164948453608,
314
+ "grad_norm": 1.7997894287109375,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.4816,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9278350515463918,
321
+ "grad_norm": 1.9168503284454346,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.8998,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.9484536082474226,
328
+ "grad_norm": 0.4715125858783722,
329
+ "learning_rate": 2e-05,
330
+ "loss": 1.7297,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.9690721649484536,
335
+ "grad_norm": 0.7565422058105469,
336
+ "learning_rate": 2e-05,
337
+ "loss": 1.1934,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.9896907216494846,
342
+ "grad_norm": 1.6956796646118164,
343
+ "learning_rate": 2e-05,
344
+ "loss": 1.0388,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "step": 97,
350
+ "total_flos": 1.3756887170285568e+16,
351
+ "train_loss": 1.1302465065238403,
352
+ "train_runtime": 315.5064,
353
+ "train_samples_per_second": 1.23,
354
+ "train_steps_per_second": 0.307
355
+ }
356
+ ],
357
+ "logging_steps": 2,
358
+ "max_steps": 97,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 1,
361
+ "save_steps": 500,
362
+ "stateful_callbacks": {
363
+ "TrainerControl": {
364
+ "args": {
365
+ "should_epoch_stop": false,
366
+ "should_evaluate": false,
367
+ "should_log": false,
368
+ "should_save": false,
369
+ "should_training_stop": false
370
+ },
371
+ "attributes": {}
372
+ }
373
+ },
374
+ "total_flos": 1.3756887170285568e+16,
375
+ "train_batch_size": 1,
376
+ "trial_name": null,
377
+ "trial_params": null
378
+ }
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:623c27c6e16e61754cf9f9bde6b0a2f00c89549eada108fd4d3385eeaa29b8a5
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9394ca5cba51cc4d5f9a628c5235add0cc79e6cefcbf73aab79088eaf9958ec2
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1698991ce97c58e41af99e8d6b1461337859fc7938ede7e57f36eebb9dccf5f7
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de7badf37bc6814c02941952260ab1c4ea0434cdd296a402f91bb67747e52c24
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c6e3bfc5a8aab513e9fe3473e92e3048bfbc4d19bd4c4176ce0141a9c69225d
3
+ size 791576546
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e86a679b1183d5d4cc5c15a3054fd2b160fe0443325d5f0d091c265c5eb060f
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc8dc7440211f75b47738e72ddd3bf0a982d45c096e63747b2ba17635f45434a
3
+ size 791576546
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:206787c858348d28620a2bc4ffbc2c93037006a7963eff3316cb95bd1e6bc540
3
+ size 791576546
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_trainer_state.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 97,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020618556701030927,
13
+ "grad_norm": 2.538358211517334,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.6608,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.041237113402061855,
20
+ "grad_norm": 1.2682268619537354,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.9785,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.061855670103092786,
27
+ "grad_norm": 1.937935709953308,
28
+ "learning_rate": 2e-05,
29
+ "loss": 2.3516,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08247422680412371,
34
+ "grad_norm": 1.4392732381820679,
35
+ "learning_rate": 2e-05,
36
+ "loss": 1.6785,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10309278350515463,
41
+ "grad_norm": 1.5007473230361938,
42
+ "learning_rate": 2e-05,
43
+ "loss": 1.5229,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12371134020618557,
48
+ "grad_norm": 0.6477186679840088,
49
+ "learning_rate": 2e-05,
50
+ "loss": 1.3816,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14432989690721648,
55
+ "grad_norm": 2.7069621086120605,
56
+ "learning_rate": 2e-05,
57
+ "loss": 1.319,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16494845360824742,
62
+ "grad_norm": 0.5525019764900208,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.9359,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18556701030927836,
69
+ "grad_norm": 0.7979759573936462,
70
+ "learning_rate": 2e-05,
71
+ "loss": 1.6191,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.20618556701030927,
76
+ "grad_norm": 2.443120241165161,
77
+ "learning_rate": 2e-05,
78
+ "loss": 1.0461,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2268041237113402,
83
+ "grad_norm": 0.8416927456855774,
84
+ "learning_rate": 2e-05,
85
+ "loss": 1.217,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24742268041237114,
90
+ "grad_norm": 0.7521089315414429,
91
+ "learning_rate": 2e-05,
92
+ "loss": 1.0924,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26804123711340205,
97
+ "grad_norm": 1.4497734308242798,
98
+ "learning_rate": 2e-05,
99
+ "loss": 1.1838,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28865979381443296,
104
+ "grad_norm": 0.6999666690826416,
105
+ "learning_rate": 2e-05,
106
+ "loss": 1.1143,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.30927835051546393,
111
+ "grad_norm": 1.1742311716079712,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.9922,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32989690721649484,
118
+ "grad_norm": 0.5971889495849609,
119
+ "learning_rate": 2e-05,
120
+ "loss": 1.6466,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.35051546391752575,
125
+ "grad_norm": 1.034390926361084,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.8025,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3711340206185567,
132
+ "grad_norm": 2.665285110473633,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.9528,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.3917525773195876,
139
+ "grad_norm": 0.8578566312789917,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.9949,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.41237113402061853,
146
+ "grad_norm": 1.1724331378936768,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.903,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.4329896907216495,
153
+ "grad_norm": 1.1174654960632324,
154
+ "learning_rate": 2e-05,
155
+ "loss": 1.1925,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4536082474226804,
160
+ "grad_norm": 1.1770066022872925,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.8522,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.4742268041237113,
167
+ "grad_norm": 0.9446500539779663,
168
+ "learning_rate": 2e-05,
169
+ "loss": 1.9395,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4948453608247423,
174
+ "grad_norm": 0.4612779915332794,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.4162,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5154639175257731,
181
+ "grad_norm": 0.7479944229125977,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.953,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5360824742268041,
188
+ "grad_norm": 0.7862464189529419,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.3849,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5567010309278351,
195
+ "grad_norm": 0.7938858270645142,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.6369,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.5773195876288659,
202
+ "grad_norm": 2.231029987335205,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.9901,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5979381443298969,
209
+ "grad_norm": 0.3764982223510742,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.9056,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6185567010309279,
216
+ "grad_norm": 0.9940462708473206,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.5912,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6391752577319587,
223
+ "grad_norm": 0.9223126173019409,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.6726,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6597938144329897,
230
+ "grad_norm": 0.7324680685997009,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.4076,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.6804123711340206,
237
+ "grad_norm": 0.8635823130607605,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.5997,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7010309278350515,
244
+ "grad_norm": 1.428286075592041,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.9463,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7216494845360825,
251
+ "grad_norm": 0.5211204290390015,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.7596,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.7422680412371134,
258
+ "grad_norm": 1.464924931526184,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.7913,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.7628865979381443,
265
+ "grad_norm": 2.8257651329040527,
266
+ "learning_rate": 2e-05,
267
+ "loss": 1.2766,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.7835051546391752,
272
+ "grad_norm": 2.5703413486480713,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.8435,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8041237113402062,
279
+ "grad_norm": 1.486395239830017,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.9819,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8247422680412371,
286
+ "grad_norm": 0.4546787738800049,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.2796,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.845360824742268,
293
+ "grad_norm": 0.7543643116950989,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.2902,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.865979381443299,
300
+ "grad_norm": 0.8718597292900085,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.5479,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.8865979381443299,
307
+ "grad_norm": 2.138429880142212,
308
+ "learning_rate": 2e-05,
309
+ "loss": 1.1224,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9072164948453608,
314
+ "grad_norm": 0.581947922706604,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.9022,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9278350515463918,
321
+ "grad_norm": 1.3591723442077637,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.9803,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.9484536082474226,
328
+ "grad_norm": 1.1390422582626343,
329
+ "learning_rate": 2e-05,
330
+ "loss": 1.1259,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.9690721649484536,
335
+ "grad_norm": 1.318390130996704,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.9855,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.9896907216494846,
342
+ "grad_norm": 1.238763451576233,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.4494,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "step": 97,
350
+ "total_flos": 1.5309291128881152e+16,
351
+ "train_loss": 0.9762444643630195,
352
+ "train_runtime": 351.1873,
353
+ "train_samples_per_second": 1.105,
354
+ "train_steps_per_second": 0.276
355
+ }
356
+ ],
357
+ "logging_steps": 2,
358
+ "max_steps": 97,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 1,
361
+ "save_steps": 500,
362
+ "stateful_callbacks": {
363
+ "TrainerControl": {
364
+ "args": {
365
+ "should_epoch_stop": false,
366
+ "should_evaluate": false,
367
+ "should_log": false,
368
+ "should_save": false,
369
+ "should_training_stop": false
370
+ },
371
+ "attributes": {}
372
+ }
373
+ },
374
+ "total_flos": 1.5309291128881152e+16,
375
+ "train_batch_size": 1,
376
+ "trial_name": null,
377
+ "trial_params": null
378
+ }
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fc77f4acfc80e842f0c49ca07c0518a93812788236835cee6b47f88c1ce363a
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40d3dc701382da26808567ded7104ae77026cab4a5a03ca85a9b7408b648375f
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6fbb24e8df10767d6e48cd3bbe7abb171912af63f525f6830f724827e262f42
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc952fe4c8ad4ef2ce347a0fec31859d5ea7286b61fbdbd33f62cc59fc6a1db8
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b91e5497d58df5e6c88cfd96c80538d3b0489625e9c1844bc7134fc1819c132
3
+ size 791576546
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41b1e011c681191accbfee628a5d5367b45a57bb25b21cc0fb2dcc706842da26
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:909ae260c1b95d5769bb16ca3cc0ea471dd2f0a32764d5950ed946519bbdfa90
3
+ size 791576546
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:deb5bbcfb7d060129e306207445c42c34099a7c3d3df2de2dd80ec00e2a81b15
3
+ size 791576546
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_trainer_state.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 97,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020618556701030927,
13
+ "grad_norm": 1.7036350965499878,
14
+ "learning_rate": 2e-05,
15
+ "loss": 1.1502,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.041237113402061855,
20
+ "grad_norm": 0.733051061630249,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.3144,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.061855670103092786,
27
+ "grad_norm": 3.021022081375122,
28
+ "learning_rate": 2e-05,
29
+ "loss": 1.523,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08247422680412371,
34
+ "grad_norm": 0.6244669556617737,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.9215,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10309278350515463,
41
+ "grad_norm": 1.675452470779419,
42
+ "learning_rate": 2e-05,
43
+ "loss": 1.2749,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12371134020618557,
48
+ "grad_norm": 0.2071010321378708,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.2043,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14432989690721648,
55
+ "grad_norm": 0.45165616273880005,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.2592,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16494845360824742,
62
+ "grad_norm": 0.9154694080352783,
63
+ "learning_rate": 2e-05,
64
+ "loss": 1.0788,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18556701030927836,
69
+ "grad_norm": 0.44769397377967834,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.672,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.20618556701030927,
76
+ "grad_norm": 1.001409649848938,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.7424,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2268041237113402,
83
+ "grad_norm": 0.2860804498195648,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.7084,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24742268041237114,
90
+ "grad_norm": 0.4580197036266327,
91
+ "learning_rate": 2e-05,
92
+ "loss": 1.1849,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26804123711340205,
97
+ "grad_norm": 0.4905541241168976,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.7166,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28865979381443296,
104
+ "grad_norm": 1.433603048324585,
105
+ "learning_rate": 2e-05,
106
+ "loss": 1.5247,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.30927835051546393,
111
+ "grad_norm": 2.386955738067627,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.8547,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32989690721649484,
118
+ "grad_norm": 2.032757043838501,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.5488,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.35051546391752575,
125
+ "grad_norm": 1.0941940546035767,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.9551,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3711340206185567,
132
+ "grad_norm": 1.461041808128357,
133
+ "learning_rate": 2e-05,
134
+ "loss": 1.1186,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.3917525773195876,
139
+ "grad_norm": 0.46292173862457275,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.9941,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.41237113402061853,
146
+ "grad_norm": 0.46523961424827576,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.99,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.4329896907216495,
153
+ "grad_norm": 0.5094814896583557,
154
+ "learning_rate": 2e-05,
155
+ "loss": 1.0255,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4536082474226804,
160
+ "grad_norm": 3.24894642829895,
161
+ "learning_rate": 2e-05,
162
+ "loss": 1.3442,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.4742268041237113,
167
+ "grad_norm": 3.7670817375183105,
168
+ "learning_rate": 2e-05,
169
+ "loss": 1.279,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4948453608247423,
174
+ "grad_norm": 2.1132254600524902,
175
+ "learning_rate": 2e-05,
176
+ "loss": 1.7001,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5154639175257731,
181
+ "grad_norm": 0.593248724937439,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.8884,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5360824742268041,
188
+ "grad_norm": 1.4078608751296997,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.8091,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5567010309278351,
195
+ "grad_norm": 0.3955836892127991,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.3546,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.5773195876288659,
202
+ "grad_norm": 0.9244691729545593,
203
+ "learning_rate": 2e-05,
204
+ "loss": 1.437,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5979381443298969,
209
+ "grad_norm": 0.4643814265727997,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.881,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6185567010309279,
216
+ "grad_norm": 0.41819217801094055,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.6405,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6391752577319587,
223
+ "grad_norm": 0.7221791744232178,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.7172,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6597938144329897,
230
+ "grad_norm": 0.44450488686561584,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.7196,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.6804123711340206,
237
+ "grad_norm": 1.463599443435669,
238
+ "learning_rate": 2e-05,
239
+ "loss": 1.4753,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7010309278350515,
244
+ "grad_norm": 0.2606247365474701,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.8865,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7216494845360825,
251
+ "grad_norm": 0.3822779655456543,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.7243,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.7422680412371134,
258
+ "grad_norm": 0.314899206161499,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.9963,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.7628865979381443,
265
+ "grad_norm": 0.47478482127189636,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.7358,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.7835051546391752,
272
+ "grad_norm": 3.180941581726074,
273
+ "learning_rate": 2e-05,
274
+ "loss": 1.546,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8041237113402062,
279
+ "grad_norm": 0.392020046710968,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.4863,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8247422680412371,
286
+ "grad_norm": 0.4663325548171997,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.3802,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.845360824742268,
293
+ "grad_norm": 0.5237138867378235,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.5043,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.865979381443299,
300
+ "grad_norm": 0.8400606513023376,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.8792,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.8865979381443299,
307
+ "grad_norm": 0.3281240463256836,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.735,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9072164948453608,
314
+ "grad_norm": 1.076886773109436,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.9669,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9278350515463918,
321
+ "grad_norm": 0.6442875266075134,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.6857,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.9484536082474226,
328
+ "grad_norm": 1.3491824865341187,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.7857,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.9690721649484536,
335
+ "grad_norm": 0.4119647443294525,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.5265,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.9896907216494846,
342
+ "grad_norm": 0.9270315766334534,
343
+ "learning_rate": 2e-05,
344
+ "loss": 1.2191,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "step": 97,
350
+ "total_flos": 2.081554905117491e+16,
351
+ "train_loss": 0.8912827993176647,
352
+ "train_runtime": 342.0553,
353
+ "train_samples_per_second": 1.134,
354
+ "train_steps_per_second": 0.284
355
+ }
356
+ ],
357
+ "logging_steps": 2,
358
+ "max_steps": 97,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 1,
361
+ "save_steps": 500,
362
+ "stateful_callbacks": {
363
+ "TrainerControl": {
364
+ "args": {
365
+ "should_epoch_stop": false,
366
+ "should_evaluate": false,
367
+ "should_log": false,
368
+ "should_save": false,
369
+ "should_training_stop": false
370
+ },
371
+ "attributes": {}
372
+ }
373
+ },
374
+ "total_flos": 2.081554905117491e+16,
375
+ "train_batch_size": 1,
376
+ "trial_name": null,
377
+ "trial_params": null
378
+ }
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76222740e5b1eb701df8242a79fa98659c625a40a9d557ff4ee9ab9793e6a2de
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af4abb1969b8895e24009b3474b99f4b40c4a44b2056fb6935f1f5c9fbd2b7de
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aed5d456627b3b084bed1c6c4f40781201a9314e9ab4c7fcd0045aa87653ad93
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:974c3d90766732a37f45318d13fa049f577025980b07e7b331d04ffe0c7db571
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:369ee76d02588c766be21d8792a4eb5096425d0505d617ed47d36b31ab2a7489
3
+ size 791576546
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:724a4cb19d6636ec4851b473dc93dfec177f1eb18c9610b52336f58fe4ac864e
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:296b10e3bc3a9fa0d2250f938b2eecca029ba36b1dced5a077b832d32301cc42
3
+ size 791576546
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2799db2855c1d10a215ade45151d9ac24e0793f29bfafc62c45ef363f459e35e
3
+ size 791576546
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_trainer_state.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 97,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020618556701030927,
13
+ "grad_norm": 1.3807806968688965,
14
+ "learning_rate": 2e-05,
15
+ "loss": 1.2215,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.041237113402061855,
20
+ "grad_norm": 5.47310209274292,
21
+ "learning_rate": 2e-05,
22
+ "loss": 1.7948,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.061855670103092786,
27
+ "grad_norm": 4.091208457946777,
28
+ "learning_rate": 2e-05,
29
+ "loss": 1.4245,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08247422680412371,
34
+ "grad_norm": 2.522712230682373,
35
+ "learning_rate": 2e-05,
36
+ "loss": 1.2455,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10309278350515463,
41
+ "grad_norm": 2.400545835494995,
42
+ "learning_rate": 2e-05,
43
+ "loss": 1.4473,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12371134020618557,
48
+ "grad_norm": 2.147249937057495,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.648,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14432989690721648,
55
+ "grad_norm": 0.6440826654434204,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.1022,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16494845360824742,
62
+ "grad_norm": 1.4986751079559326,
63
+ "learning_rate": 2e-05,
64
+ "loss": 1.5813,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18556701030927836,
69
+ "grad_norm": 2.2775418758392334,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.7531,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.20618556701030927,
76
+ "grad_norm": 3.2079670429229736,
77
+ "learning_rate": 2e-05,
78
+ "loss": 1.6908,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2268041237113402,
83
+ "grad_norm": 2.16922926902771,
84
+ "learning_rate": 2e-05,
85
+ "loss": 1.5928,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24742268041237114,
90
+ "grad_norm": 2.276587724685669,
91
+ "learning_rate": 2e-05,
92
+ "loss": 1.4918,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26804123711340205,
97
+ "grad_norm": 1.4131243228912354,
98
+ "learning_rate": 2e-05,
99
+ "loss": 1.7804,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28865979381443296,
104
+ "grad_norm": 3.375476121902466,
105
+ "learning_rate": 2e-05,
106
+ "loss": 1.9648,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.30927835051546393,
111
+ "grad_norm": 2.6980457305908203,
112
+ "learning_rate": 2e-05,
113
+ "loss": 1.9027,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32989690721649484,
118
+ "grad_norm": 1.1185120344161987,
119
+ "learning_rate": 2e-05,
120
+ "loss": 1.2945,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.35051546391752575,
125
+ "grad_norm": 1.5742499828338623,
126
+ "learning_rate": 2e-05,
127
+ "loss": 2.3652,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3711340206185567,
132
+ "grad_norm": 1.6757092475891113,
133
+ "learning_rate": 2e-05,
134
+ "loss": 1.5789,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.3917525773195876,
139
+ "grad_norm": 1.5958377122879028,
140
+ "learning_rate": 2e-05,
141
+ "loss": 1.4983,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.41237113402061853,
146
+ "grad_norm": 1.3994206190109253,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.8934,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.4329896907216495,
153
+ "grad_norm": 0.8964346647262573,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.7392,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4536082474226804,
160
+ "grad_norm": 1.5910592079162598,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.8057,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.4742268041237113,
167
+ "grad_norm": 0.8048728704452515,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.7514,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4948453608247423,
174
+ "grad_norm": 3.0406651496887207,
175
+ "learning_rate": 2e-05,
176
+ "loss": 1.271,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5154639175257731,
181
+ "grad_norm": 2.1475746631622314,
182
+ "learning_rate": 2e-05,
183
+ "loss": 1.3316,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5360824742268041,
188
+ "grad_norm": 1.973347544670105,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.9409,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5567010309278351,
195
+ "grad_norm": 0.5019800662994385,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.5754,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.5773195876288659,
202
+ "grad_norm": 2.1202926635742188,
203
+ "learning_rate": 2e-05,
204
+ "loss": 1.3977,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5979381443298969,
209
+ "grad_norm": 1.2852915525436401,
210
+ "learning_rate": 2e-05,
211
+ "loss": 1.6778,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6185567010309279,
216
+ "grad_norm": 1.093619465827942,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.8222,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6391752577319587,
223
+ "grad_norm": 0.7490342259407043,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.6013,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6597938144329897,
230
+ "grad_norm": 2.6893742084503174,
231
+ "learning_rate": 2e-05,
232
+ "loss": 1.2905,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.6804123711340206,
237
+ "grad_norm": 3.0177910327911377,
238
+ "learning_rate": 2e-05,
239
+ "loss": 1.7748,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7010309278350515,
244
+ "grad_norm": 1.123380422592163,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.8262,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7216494845360825,
251
+ "grad_norm": 1.6035982370376587,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.798,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.7422680412371134,
258
+ "grad_norm": 0.7902660965919495,
259
+ "learning_rate": 2e-05,
260
+ "loss": 1.0709,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.7628865979381443,
265
+ "grad_norm": 1.8285802602767944,
266
+ "learning_rate": 2e-05,
267
+ "loss": 1.0743,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.7835051546391752,
272
+ "grad_norm": 2.9130942821502686,
273
+ "learning_rate": 2e-05,
274
+ "loss": 2.0891,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8041237113402062,
279
+ "grad_norm": 1.4661481380462646,
280
+ "learning_rate": 2e-05,
281
+ "loss": 1.1128,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8247422680412371,
286
+ "grad_norm": 2.301260471343994,
287
+ "learning_rate": 2e-05,
288
+ "loss": 1.6207,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.845360824742268,
293
+ "grad_norm": 1.637123465538025,
294
+ "learning_rate": 2e-05,
295
+ "loss": 1.3459,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.865979381443299,
300
+ "grad_norm": 1.056007742881775,
301
+ "learning_rate": 2e-05,
302
+ "loss": 1.2215,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.8865979381443299,
307
+ "grad_norm": 1.2907710075378418,
308
+ "learning_rate": 2e-05,
309
+ "loss": 1.6295,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9072164948453608,
314
+ "grad_norm": 1.3023312091827393,
315
+ "learning_rate": 2e-05,
316
+ "loss": 1.0457,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9278350515463918,
321
+ "grad_norm": 0.39116430282592773,
322
+ "learning_rate": 2e-05,
323
+ "loss": 1.1983,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.9484536082474226,
328
+ "grad_norm": 0.9639070630073547,
329
+ "learning_rate": 2e-05,
330
+ "loss": 1.1595,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.9690721649484536,
335
+ "grad_norm": 1.6161588430404663,
336
+ "learning_rate": 2e-05,
337
+ "loss": 1.2601,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.9896907216494846,
342
+ "grad_norm": 2.380856513977051,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.6827,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "step": 97,
350
+ "total_flos": 2.074250449204019e+16,
351
+ "train_loss": 1.2486689803526574,
352
+ "train_runtime": 347.5167,
353
+ "train_samples_per_second": 1.116,
354
+ "train_steps_per_second": 0.279
355
+ }
356
+ ],
357
+ "logging_steps": 2,
358
+ "max_steps": 97,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 1,
361
+ "save_steps": 500,
362
+ "stateful_callbacks": {
363
+ "TrainerControl": {
364
+ "args": {
365
+ "should_epoch_stop": false,
366
+ "should_evaluate": false,
367
+ "should_log": false,
368
+ "should_save": false,
369
+ "should_training_stop": false
370
+ },
371
+ "attributes": {}
372
+ }
373
+ },
374
+ "total_flos": 2.074250449204019e+16,
375
+ "train_batch_size": 1,
376
+ "trial_name": null,
377
+ "trial_params": null
378
+ }
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/5_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09cd08a35ceaf65f8b4b8c2b3943b855c5e7348c334be564b82fa3d4dc95bad8
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/5_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6ce216ab0402be87c84d6af509a84f0ce168cf22217ee6eb8263e51dd6142d6
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/5_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0970a229f1f57205586ba91eed0371c4af2fe6fbfaa4a6ec8f6aa437d820ac8d
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/5_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b873fa30a8c5d382069a0c536ec17db452e64f48a4c4c7b3c9dafca48b601f1d
3
+ size 791578182
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/5_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99c23eb08adf5a7a45e3b25bc3f6dd68703808836dcdddf7a33aa4d8121f930d
3
+ size 791576546