Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round10.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round12.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round15.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round17.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round2.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round20.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round5.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round7.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_trainer_state.json +378 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round10.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round12.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round15.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round17.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round2.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round20.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round5.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round7.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_trainer_state.json +378 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round10.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round12.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round15.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round17.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round2.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round20.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round5.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round7.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_trainer_state.json +378 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round10.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round12.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round15.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round17.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round2.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round20.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round5.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round7.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_trainer_state.json +378 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round10.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round12.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round15.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round17.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round2.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round20.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round5.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round7.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_trainer_state.json +378 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/5_client_model_round10.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/5_client_model_round12.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/5_client_model_round15.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/5_client_model_round17.pth +3 -0
- client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/5_client_model_round2.pth +3 -0
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6b16768f7dc2eae664e5fd275f1c0e4a05c9147bb79c0c4b402a18aeaaf8375d
|
| 3 |
+
size 368443438
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ebe47cb1aefad62c1fd613c76140e6267ba060f1c514591486cf4468523c5b41
|
| 3 |
+
size 368443438
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f4e565288b1dd9e16440afea7c377fea6e8c2b86a937030cd568bd120fd038f
|
| 3 |
+
size 368443438
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c1d356437d08bccdd3ce361ad9057428bcf4cae911a507b8332d938f41f5dc74
|
| 3 |
+
size 368443438
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e98f5737926eedec558b34d9e5d045c45ea50e7ff4c68699fa210aaaf5120a9
|
| 3 |
+
size 368442474
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:15aa028b6cced7cfa1cde28f6be8395d18b370d33f5961c4652916168c869da9
|
| 3 |
+
size 368443438
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:421f88eb88ff09a6f10c9c0caf03a8665de01d152db170828eca4be38fb91eac
|
| 3 |
+
size 368442474
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:525f0f37c26b21e07d6e9178e77c0ef29b869daafb690f58a885a4a1402ea584
|
| 3 |
+
size 368442474
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/0_trainer_state.json
ADDED
|
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 97,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.020618556701030927,
|
| 13 |
+
"grad_norm": 4.400585174560547,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 2.1084,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.041237113402061855,
|
| 20 |
+
"grad_norm": 0.24108143150806427,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 0.5575,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.061855670103092786,
|
| 27 |
+
"grad_norm": 2.3387579917907715,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 1.3726,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.08247422680412371,
|
| 34 |
+
"grad_norm": 3.3745932579040527,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 1.0676,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.10309278350515463,
|
| 41 |
+
"grad_norm": 0.499793142080307,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 1.5278,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.12371134020618557,
|
| 48 |
+
"grad_norm": 1.0053821802139282,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.4909,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.14432989690721648,
|
| 55 |
+
"grad_norm": 1.3840383291244507,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.9338,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.16494845360824742,
|
| 62 |
+
"grad_norm": 2.917617082595825,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.3054,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.18556701030927836,
|
| 69 |
+
"grad_norm": 1.6473109722137451,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.8086,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.20618556701030927,
|
| 76 |
+
"grad_norm": 1.5606623888015747,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 1.9234,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.2268041237113402,
|
| 83 |
+
"grad_norm": 0.5007361173629761,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.7782,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.24742268041237114,
|
| 90 |
+
"grad_norm": 1.6239376068115234,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 1.1194,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.26804123711340205,
|
| 97 |
+
"grad_norm": 2.372718572616577,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 0.4856,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.28865979381443296,
|
| 104 |
+
"grad_norm": 1.8457813262939453,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 1.3728,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.30927835051546393,
|
| 111 |
+
"grad_norm": 0.855133593082428,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 2.0313,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.32989690721649484,
|
| 118 |
+
"grad_norm": 1.2432137727737427,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.4426,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.35051546391752575,
|
| 125 |
+
"grad_norm": 1.4432425498962402,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.7053,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.3711340206185567,
|
| 132 |
+
"grad_norm": 1.024395227432251,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.6134,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.3917525773195876,
|
| 139 |
+
"grad_norm": 1.194606900215149,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 0.8373,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.41237113402061853,
|
| 146 |
+
"grad_norm": 3.11031174659729,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 2.5497,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.4329896907216495,
|
| 153 |
+
"grad_norm": 1.2926805019378662,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.9482,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.4536082474226804,
|
| 160 |
+
"grad_norm": 0.5149866938591003,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 0.4165,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.4742268041237113,
|
| 167 |
+
"grad_norm": 1.190213680267334,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 0.9618,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.4948453608247423,
|
| 174 |
+
"grad_norm": 0.6894278526306152,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 1.0249,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.5154639175257731,
|
| 181 |
+
"grad_norm": 2.4215104579925537,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 2.0148,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.5360824742268041,
|
| 188 |
+
"grad_norm": 0.714414119720459,
|
| 189 |
+
"learning_rate": 2e-05,
|
| 190 |
+
"loss": 1.3428,
|
| 191 |
+
"step": 52
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.5567010309278351,
|
| 195 |
+
"grad_norm": 1.1892106533050537,
|
| 196 |
+
"learning_rate": 2e-05,
|
| 197 |
+
"loss": 1.1607,
|
| 198 |
+
"step": 54
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.5773195876288659,
|
| 202 |
+
"grad_norm": 1.21503484249115,
|
| 203 |
+
"learning_rate": 2e-05,
|
| 204 |
+
"loss": 1.836,
|
| 205 |
+
"step": 56
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.5979381443298969,
|
| 209 |
+
"grad_norm": 0.6278855800628662,
|
| 210 |
+
"learning_rate": 2e-05,
|
| 211 |
+
"loss": 0.844,
|
| 212 |
+
"step": 58
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.6185567010309279,
|
| 216 |
+
"grad_norm": 2.5338757038116455,
|
| 217 |
+
"learning_rate": 2e-05,
|
| 218 |
+
"loss": 0.7447,
|
| 219 |
+
"step": 60
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.6391752577319587,
|
| 223 |
+
"grad_norm": 1.6841797828674316,
|
| 224 |
+
"learning_rate": 2e-05,
|
| 225 |
+
"loss": 0.8774,
|
| 226 |
+
"step": 62
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.6597938144329897,
|
| 230 |
+
"grad_norm": 0.9872264862060547,
|
| 231 |
+
"learning_rate": 2e-05,
|
| 232 |
+
"loss": 0.9218,
|
| 233 |
+
"step": 64
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.6804123711340206,
|
| 237 |
+
"grad_norm": 1.0775535106658936,
|
| 238 |
+
"learning_rate": 2e-05,
|
| 239 |
+
"loss": 0.9773,
|
| 240 |
+
"step": 66
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.7010309278350515,
|
| 244 |
+
"grad_norm": 1.7016096115112305,
|
| 245 |
+
"learning_rate": 2e-05,
|
| 246 |
+
"loss": 1.3366,
|
| 247 |
+
"step": 68
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.7216494845360825,
|
| 251 |
+
"grad_norm": 9.161294937133789,
|
| 252 |
+
"learning_rate": 2e-05,
|
| 253 |
+
"loss": 2.0726,
|
| 254 |
+
"step": 70
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.7422680412371134,
|
| 258 |
+
"grad_norm": 1.1249581575393677,
|
| 259 |
+
"learning_rate": 2e-05,
|
| 260 |
+
"loss": 0.8084,
|
| 261 |
+
"step": 72
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.7628865979381443,
|
| 265 |
+
"grad_norm": 1.4263134002685547,
|
| 266 |
+
"learning_rate": 2e-05,
|
| 267 |
+
"loss": 0.9452,
|
| 268 |
+
"step": 74
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.7835051546391752,
|
| 272 |
+
"grad_norm": 3.8015453815460205,
|
| 273 |
+
"learning_rate": 2e-05,
|
| 274 |
+
"loss": 1.3966,
|
| 275 |
+
"step": 76
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.8041237113402062,
|
| 279 |
+
"grad_norm": 1.51714026927948,
|
| 280 |
+
"learning_rate": 2e-05,
|
| 281 |
+
"loss": 1.3366,
|
| 282 |
+
"step": 78
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.8247422680412371,
|
| 286 |
+
"grad_norm": 1.001514196395874,
|
| 287 |
+
"learning_rate": 2e-05,
|
| 288 |
+
"loss": 1.2534,
|
| 289 |
+
"step": 80
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.845360824742268,
|
| 293 |
+
"grad_norm": 0.437701553106308,
|
| 294 |
+
"learning_rate": 2e-05,
|
| 295 |
+
"loss": 0.6374,
|
| 296 |
+
"step": 82
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.865979381443299,
|
| 300 |
+
"grad_norm": 0.8562130928039551,
|
| 301 |
+
"learning_rate": 2e-05,
|
| 302 |
+
"loss": 0.8362,
|
| 303 |
+
"step": 84
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.8865979381443299,
|
| 307 |
+
"grad_norm": 1.1629040241241455,
|
| 308 |
+
"learning_rate": 2e-05,
|
| 309 |
+
"loss": 0.6705,
|
| 310 |
+
"step": 86
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.9072164948453608,
|
| 314 |
+
"grad_norm": 0.7516007423400879,
|
| 315 |
+
"learning_rate": 2e-05,
|
| 316 |
+
"loss": 0.9896,
|
| 317 |
+
"step": 88
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.9278350515463918,
|
| 321 |
+
"grad_norm": 1.4195737838745117,
|
| 322 |
+
"learning_rate": 2e-05,
|
| 323 |
+
"loss": 0.8637,
|
| 324 |
+
"step": 90
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 0.9484536082474226,
|
| 328 |
+
"grad_norm": 2.6594505310058594,
|
| 329 |
+
"learning_rate": 2e-05,
|
| 330 |
+
"loss": 1.1106,
|
| 331 |
+
"step": 92
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 0.9690721649484536,
|
| 335 |
+
"grad_norm": 0.5046249628067017,
|
| 336 |
+
"learning_rate": 2e-05,
|
| 337 |
+
"loss": 0.7569,
|
| 338 |
+
"step": 94
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"epoch": 0.9896907216494846,
|
| 342 |
+
"grad_norm": 2.1814091205596924,
|
| 343 |
+
"learning_rate": 2e-05,
|
| 344 |
+
"loss": 1.1165,
|
| 345 |
+
"step": 96
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"epoch": 1.0,
|
| 349 |
+
"step": 97,
|
| 350 |
+
"total_flos": 5716895332302848.0,
|
| 351 |
+
"train_loss": 1.0793670944331848,
|
| 352 |
+
"train_runtime": 222.0474,
|
| 353 |
+
"train_samples_per_second": 1.747,
|
| 354 |
+
"train_steps_per_second": 0.437
|
| 355 |
+
}
|
| 356 |
+
],
|
| 357 |
+
"logging_steps": 2,
|
| 358 |
+
"max_steps": 97,
|
| 359 |
+
"num_input_tokens_seen": 0,
|
| 360 |
+
"num_train_epochs": 1,
|
| 361 |
+
"save_steps": 500,
|
| 362 |
+
"stateful_callbacks": {
|
| 363 |
+
"TrainerControl": {
|
| 364 |
+
"args": {
|
| 365 |
+
"should_epoch_stop": false,
|
| 366 |
+
"should_evaluate": false,
|
| 367 |
+
"should_log": false,
|
| 368 |
+
"should_save": false,
|
| 369 |
+
"should_training_stop": false
|
| 370 |
+
},
|
| 371 |
+
"attributes": {}
|
| 372 |
+
}
|
| 373 |
+
},
|
| 374 |
+
"total_flos": 5716895332302848.0,
|
| 375 |
+
"train_batch_size": 1,
|
| 376 |
+
"trial_name": null,
|
| 377 |
+
"trial_params": null
|
| 378 |
+
}
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b4cf563ddeb6cec914fa98a3339483e303ee40bcfbc8177f2de90ec07fd9f32
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d4e731a0acc1c910df1a066b29fbc699083b9afcfd305403782197d842e12ad
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:72086853e3f2e6d801b2bbabd64045d8df56a4e1a9d90762650e9def4016026b
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca8e1391c42f7045fc6ae4257810e9ec6fa9b85ad051a083f1b3a6cc1c13b9e7
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ad99be95eca9ecc8be94e2334e2786ea50b2df0bc5a440a7200405c66c551aa
|
| 3 |
+
size 791576546
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e370b2d3fe5711152b28b41a8df70a261223af0d600ff997c0beee58add2f883
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03b2c620112acbddbfdcaa58a0bcc02827da8b0c3062d0bbb0aacdbfce067764
|
| 3 |
+
size 791576546
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d9ad4a35f17fa2fa92dbf9df9678d3ab4ba7f3b43367db0b87142e429f809cd1
|
| 3 |
+
size 791576546
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/1_trainer_state.json
ADDED
|
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 97,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.020618556701030927,
|
| 13 |
+
"grad_norm": 1.0367224216461182,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 2.1122,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.041237113402061855,
|
| 20 |
+
"grad_norm": 1.5293935537338257,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 2.3105,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.061855670103092786,
|
| 27 |
+
"grad_norm": 1.200186848640442,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 1.2933,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.08247422680412371,
|
| 34 |
+
"grad_norm": 0.551851749420166,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 1.01,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.10309278350515463,
|
| 41 |
+
"grad_norm": 1.2025572061538696,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 1.6588,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.12371134020618557,
|
| 48 |
+
"grad_norm": 0.773492693901062,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 1.269,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.14432989690721648,
|
| 55 |
+
"grad_norm": 1.1679770946502686,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.9164,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.16494845360824742,
|
| 62 |
+
"grad_norm": 0.5977709293365479,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 1.4414,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.18556701030927836,
|
| 69 |
+
"grad_norm": 1.0373786687850952,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 2.1323,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.20618556701030927,
|
| 76 |
+
"grad_norm": 1.6528465747833252,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 1.0931,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.2268041237113402,
|
| 83 |
+
"grad_norm": 1.2186673879623413,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.9268,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.24742268041237114,
|
| 90 |
+
"grad_norm": 0.9843794107437134,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 1.4834,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.26804123711340205,
|
| 97 |
+
"grad_norm": 1.1736723184585571,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 0.973,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.28865979381443296,
|
| 104 |
+
"grad_norm": 0.9919485449790955,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 1.426,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.30927835051546393,
|
| 111 |
+
"grad_norm": 0.3353116512298584,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 0.3913,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.32989690721649484,
|
| 118 |
+
"grad_norm": 0.7836465835571289,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.9893,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.35051546391752575,
|
| 125 |
+
"grad_norm": 0.6000968813896179,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 1.1023,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.3711340206185567,
|
| 132 |
+
"grad_norm": 1.1263123750686646,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 1.2717,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.3917525773195876,
|
| 139 |
+
"grad_norm": 1.4765498638153076,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 1.2275,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.41237113402061853,
|
| 146 |
+
"grad_norm": 0.7918884754180908,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 1.124,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.4329896907216495,
|
| 153 |
+
"grad_norm": 0.881719708442688,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 1.1614,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.4536082474226804,
|
| 160 |
+
"grad_norm": 0.5077035427093506,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 0.7703,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.4742268041237113,
|
| 167 |
+
"grad_norm": 1.0335602760314941,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 1.0186,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.4948453608247423,
|
| 174 |
+
"grad_norm": 2.6902318000793457,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 1.4702,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.5154639175257731,
|
| 181 |
+
"grad_norm": 0.48346254229545593,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 0.7069,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.5360824742268041,
|
| 188 |
+
"grad_norm": 0.43713346123695374,
|
| 189 |
+
"learning_rate": 2e-05,
|
| 190 |
+
"loss": 0.9541,
|
| 191 |
+
"step": 52
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.5567010309278351,
|
| 195 |
+
"grad_norm": 0.7219210863113403,
|
| 196 |
+
"learning_rate": 2e-05,
|
| 197 |
+
"loss": 1.0328,
|
| 198 |
+
"step": 54
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.5773195876288659,
|
| 202 |
+
"grad_norm": 0.6687954068183899,
|
| 203 |
+
"learning_rate": 2e-05,
|
| 204 |
+
"loss": 0.9612,
|
| 205 |
+
"step": 56
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.5979381443298969,
|
| 209 |
+
"grad_norm": 0.4545569121837616,
|
| 210 |
+
"learning_rate": 2e-05,
|
| 211 |
+
"loss": 0.7134,
|
| 212 |
+
"step": 58
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.6185567010309279,
|
| 216 |
+
"grad_norm": 1.4642369747161865,
|
| 217 |
+
"learning_rate": 2e-05,
|
| 218 |
+
"loss": 1.0691,
|
| 219 |
+
"step": 60
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.6391752577319587,
|
| 223 |
+
"grad_norm": 0.866256058216095,
|
| 224 |
+
"learning_rate": 2e-05,
|
| 225 |
+
"loss": 0.9705,
|
| 226 |
+
"step": 62
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.6597938144329897,
|
| 230 |
+
"grad_norm": 0.6883927583694458,
|
| 231 |
+
"learning_rate": 2e-05,
|
| 232 |
+
"loss": 1.0837,
|
| 233 |
+
"step": 64
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.6804123711340206,
|
| 237 |
+
"grad_norm": 0.8896063566207886,
|
| 238 |
+
"learning_rate": 2e-05,
|
| 239 |
+
"loss": 1.2245,
|
| 240 |
+
"step": 66
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.7010309278350515,
|
| 244 |
+
"grad_norm": 1.008226990699768,
|
| 245 |
+
"learning_rate": 2e-05,
|
| 246 |
+
"loss": 0.8514,
|
| 247 |
+
"step": 68
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.7216494845360825,
|
| 251 |
+
"grad_norm": 0.5231401324272156,
|
| 252 |
+
"learning_rate": 2e-05,
|
| 253 |
+
"loss": 0.8315,
|
| 254 |
+
"step": 70
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.7422680412371134,
|
| 258 |
+
"grad_norm": 1.1414425373077393,
|
| 259 |
+
"learning_rate": 2e-05,
|
| 260 |
+
"loss": 1.0161,
|
| 261 |
+
"step": 72
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.7628865979381443,
|
| 265 |
+
"grad_norm": 0.8160688281059265,
|
| 266 |
+
"learning_rate": 2e-05,
|
| 267 |
+
"loss": 0.9522,
|
| 268 |
+
"step": 74
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.7835051546391752,
|
| 272 |
+
"grad_norm": 4.522146224975586,
|
| 273 |
+
"learning_rate": 2e-05,
|
| 274 |
+
"loss": 1.3154,
|
| 275 |
+
"step": 76
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.8041237113402062,
|
| 279 |
+
"grad_norm": 0.8005837798118591,
|
| 280 |
+
"learning_rate": 2e-05,
|
| 281 |
+
"loss": 1.38,
|
| 282 |
+
"step": 78
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.8247422680412371,
|
| 286 |
+
"grad_norm": 0.8043385744094849,
|
| 287 |
+
"learning_rate": 2e-05,
|
| 288 |
+
"loss": 0.7023,
|
| 289 |
+
"step": 80
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.845360824742268,
|
| 293 |
+
"grad_norm": 0.4813634753227234,
|
| 294 |
+
"learning_rate": 2e-05,
|
| 295 |
+
"loss": 1.2706,
|
| 296 |
+
"step": 82
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.865979381443299,
|
| 300 |
+
"grad_norm": 2.6377651691436768,
|
| 301 |
+
"learning_rate": 2e-05,
|
| 302 |
+
"loss": 0.7818,
|
| 303 |
+
"step": 84
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.8865979381443299,
|
| 307 |
+
"grad_norm": 0.9884634017944336,
|
| 308 |
+
"learning_rate": 2e-05,
|
| 309 |
+
"loss": 0.8094,
|
| 310 |
+
"step": 86
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.9072164948453608,
|
| 314 |
+
"grad_norm": 1.7997894287109375,
|
| 315 |
+
"learning_rate": 2e-05,
|
| 316 |
+
"loss": 0.4816,
|
| 317 |
+
"step": 88
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.9278350515463918,
|
| 321 |
+
"grad_norm": 1.9168503284454346,
|
| 322 |
+
"learning_rate": 2e-05,
|
| 323 |
+
"loss": 0.8998,
|
| 324 |
+
"step": 90
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 0.9484536082474226,
|
| 328 |
+
"grad_norm": 0.4715125858783722,
|
| 329 |
+
"learning_rate": 2e-05,
|
| 330 |
+
"loss": 1.7297,
|
| 331 |
+
"step": 92
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 0.9690721649484536,
|
| 335 |
+
"grad_norm": 0.7565422058105469,
|
| 336 |
+
"learning_rate": 2e-05,
|
| 337 |
+
"loss": 1.1934,
|
| 338 |
+
"step": 94
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"epoch": 0.9896907216494846,
|
| 342 |
+
"grad_norm": 1.6956796646118164,
|
| 343 |
+
"learning_rate": 2e-05,
|
| 344 |
+
"loss": 1.0388,
|
| 345 |
+
"step": 96
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"epoch": 1.0,
|
| 349 |
+
"step": 97,
|
| 350 |
+
"total_flos": 1.3756887170285568e+16,
|
| 351 |
+
"train_loss": 1.1302465065238403,
|
| 352 |
+
"train_runtime": 315.5064,
|
| 353 |
+
"train_samples_per_second": 1.23,
|
| 354 |
+
"train_steps_per_second": 0.307
|
| 355 |
+
}
|
| 356 |
+
],
|
| 357 |
+
"logging_steps": 2,
|
| 358 |
+
"max_steps": 97,
|
| 359 |
+
"num_input_tokens_seen": 0,
|
| 360 |
+
"num_train_epochs": 1,
|
| 361 |
+
"save_steps": 500,
|
| 362 |
+
"stateful_callbacks": {
|
| 363 |
+
"TrainerControl": {
|
| 364 |
+
"args": {
|
| 365 |
+
"should_epoch_stop": false,
|
| 366 |
+
"should_evaluate": false,
|
| 367 |
+
"should_log": false,
|
| 368 |
+
"should_save": false,
|
| 369 |
+
"should_training_stop": false
|
| 370 |
+
},
|
| 371 |
+
"attributes": {}
|
| 372 |
+
}
|
| 373 |
+
},
|
| 374 |
+
"total_flos": 1.3756887170285568e+16,
|
| 375 |
+
"train_batch_size": 1,
|
| 376 |
+
"trial_name": null,
|
| 377 |
+
"trial_params": null
|
| 378 |
+
}
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:623c27c6e16e61754cf9f9bde6b0a2f00c89549eada108fd4d3385eeaa29b8a5
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9394ca5cba51cc4d5f9a628c5235add0cc79e6cefcbf73aab79088eaf9958ec2
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1698991ce97c58e41af99e8d6b1461337859fc7938ede7e57f36eebb9dccf5f7
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:de7badf37bc6814c02941952260ab1c4ea0434cdd296a402f91bb67747e52c24
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c6e3bfc5a8aab513e9fe3473e92e3048bfbc4d19bd4c4176ce0141a9c69225d
|
| 3 |
+
size 791576546
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7e86a679b1183d5d4cc5c15a3054fd2b160fe0443325d5f0d091c265c5eb060f
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fc8dc7440211f75b47738e72ddd3bf0a982d45c096e63747b2ba17635f45434a
|
| 3 |
+
size 791576546
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:206787c858348d28620a2bc4ffbc2c93037006a7963eff3316cb95bd1e6bc540
|
| 3 |
+
size 791576546
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/2_trainer_state.json
ADDED
|
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 97,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.020618556701030927,
|
| 13 |
+
"grad_norm": 2.538358211517334,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 0.6608,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.041237113402061855,
|
| 20 |
+
"grad_norm": 1.2682268619537354,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 0.9785,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.061855670103092786,
|
| 27 |
+
"grad_norm": 1.937935709953308,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 2.3516,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.08247422680412371,
|
| 34 |
+
"grad_norm": 1.4392732381820679,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 1.6785,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.10309278350515463,
|
| 41 |
+
"grad_norm": 1.5007473230361938,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 1.5229,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.12371134020618557,
|
| 48 |
+
"grad_norm": 0.6477186679840088,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 1.3816,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.14432989690721648,
|
| 55 |
+
"grad_norm": 2.7069621086120605,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 1.319,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.16494845360824742,
|
| 62 |
+
"grad_norm": 0.5525019764900208,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 0.9359,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.18556701030927836,
|
| 69 |
+
"grad_norm": 0.7979759573936462,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 1.6191,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.20618556701030927,
|
| 76 |
+
"grad_norm": 2.443120241165161,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 1.0461,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.2268041237113402,
|
| 83 |
+
"grad_norm": 0.8416927456855774,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 1.217,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.24742268041237114,
|
| 90 |
+
"grad_norm": 0.7521089315414429,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 1.0924,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.26804123711340205,
|
| 97 |
+
"grad_norm": 1.4497734308242798,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 1.1838,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.28865979381443296,
|
| 104 |
+
"grad_norm": 0.6999666690826416,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 1.1143,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.30927835051546393,
|
| 111 |
+
"grad_norm": 1.1742311716079712,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 0.9922,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.32989690721649484,
|
| 118 |
+
"grad_norm": 0.5971889495849609,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 1.6466,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.35051546391752575,
|
| 125 |
+
"grad_norm": 1.034390926361084,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.8025,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.3711340206185567,
|
| 132 |
+
"grad_norm": 2.665285110473633,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 0.9528,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.3917525773195876,
|
| 139 |
+
"grad_norm": 0.8578566312789917,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 0.9949,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.41237113402061853,
|
| 146 |
+
"grad_norm": 1.1724331378936768,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 0.903,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.4329896907216495,
|
| 153 |
+
"grad_norm": 1.1174654960632324,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 1.1925,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.4536082474226804,
|
| 160 |
+
"grad_norm": 1.1770066022872925,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 0.8522,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.4742268041237113,
|
| 167 |
+
"grad_norm": 0.9446500539779663,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 1.9395,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.4948453608247423,
|
| 174 |
+
"grad_norm": 0.4612779915332794,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 0.4162,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.5154639175257731,
|
| 181 |
+
"grad_norm": 0.7479944229125977,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 0.953,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.5360824742268041,
|
| 188 |
+
"grad_norm": 0.7862464189529419,
|
| 189 |
+
"learning_rate": 2e-05,
|
| 190 |
+
"loss": 0.3849,
|
| 191 |
+
"step": 52
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.5567010309278351,
|
| 195 |
+
"grad_norm": 0.7938858270645142,
|
| 196 |
+
"learning_rate": 2e-05,
|
| 197 |
+
"loss": 0.6369,
|
| 198 |
+
"step": 54
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.5773195876288659,
|
| 202 |
+
"grad_norm": 2.231029987335205,
|
| 203 |
+
"learning_rate": 2e-05,
|
| 204 |
+
"loss": 0.9901,
|
| 205 |
+
"step": 56
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.5979381443298969,
|
| 209 |
+
"grad_norm": 0.3764982223510742,
|
| 210 |
+
"learning_rate": 2e-05,
|
| 211 |
+
"loss": 0.9056,
|
| 212 |
+
"step": 58
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.6185567010309279,
|
| 216 |
+
"grad_norm": 0.9940462708473206,
|
| 217 |
+
"learning_rate": 2e-05,
|
| 218 |
+
"loss": 0.5912,
|
| 219 |
+
"step": 60
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.6391752577319587,
|
| 223 |
+
"grad_norm": 0.9223126173019409,
|
| 224 |
+
"learning_rate": 2e-05,
|
| 225 |
+
"loss": 0.6726,
|
| 226 |
+
"step": 62
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.6597938144329897,
|
| 230 |
+
"grad_norm": 0.7324680685997009,
|
| 231 |
+
"learning_rate": 2e-05,
|
| 232 |
+
"loss": 0.4076,
|
| 233 |
+
"step": 64
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.6804123711340206,
|
| 237 |
+
"grad_norm": 0.8635823130607605,
|
| 238 |
+
"learning_rate": 2e-05,
|
| 239 |
+
"loss": 0.5997,
|
| 240 |
+
"step": 66
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.7010309278350515,
|
| 244 |
+
"grad_norm": 1.428286075592041,
|
| 245 |
+
"learning_rate": 2e-05,
|
| 246 |
+
"loss": 0.9463,
|
| 247 |
+
"step": 68
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.7216494845360825,
|
| 251 |
+
"grad_norm": 0.5211204290390015,
|
| 252 |
+
"learning_rate": 2e-05,
|
| 253 |
+
"loss": 0.7596,
|
| 254 |
+
"step": 70
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.7422680412371134,
|
| 258 |
+
"grad_norm": 1.464924931526184,
|
| 259 |
+
"learning_rate": 2e-05,
|
| 260 |
+
"loss": 0.7913,
|
| 261 |
+
"step": 72
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.7628865979381443,
|
| 265 |
+
"grad_norm": 2.8257651329040527,
|
| 266 |
+
"learning_rate": 2e-05,
|
| 267 |
+
"loss": 1.2766,
|
| 268 |
+
"step": 74
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.7835051546391752,
|
| 272 |
+
"grad_norm": 2.5703413486480713,
|
| 273 |
+
"learning_rate": 2e-05,
|
| 274 |
+
"loss": 0.8435,
|
| 275 |
+
"step": 76
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.8041237113402062,
|
| 279 |
+
"grad_norm": 1.486395239830017,
|
| 280 |
+
"learning_rate": 2e-05,
|
| 281 |
+
"loss": 0.9819,
|
| 282 |
+
"step": 78
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.8247422680412371,
|
| 286 |
+
"grad_norm": 0.4546787738800049,
|
| 287 |
+
"learning_rate": 2e-05,
|
| 288 |
+
"loss": 0.2796,
|
| 289 |
+
"step": 80
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.845360824742268,
|
| 293 |
+
"grad_norm": 0.7543643116950989,
|
| 294 |
+
"learning_rate": 2e-05,
|
| 295 |
+
"loss": 0.2902,
|
| 296 |
+
"step": 82
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.865979381443299,
|
| 300 |
+
"grad_norm": 0.8718597292900085,
|
| 301 |
+
"learning_rate": 2e-05,
|
| 302 |
+
"loss": 0.5479,
|
| 303 |
+
"step": 84
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.8865979381443299,
|
| 307 |
+
"grad_norm": 2.138429880142212,
|
| 308 |
+
"learning_rate": 2e-05,
|
| 309 |
+
"loss": 1.1224,
|
| 310 |
+
"step": 86
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.9072164948453608,
|
| 314 |
+
"grad_norm": 0.581947922706604,
|
| 315 |
+
"learning_rate": 2e-05,
|
| 316 |
+
"loss": 0.9022,
|
| 317 |
+
"step": 88
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.9278350515463918,
|
| 321 |
+
"grad_norm": 1.3591723442077637,
|
| 322 |
+
"learning_rate": 2e-05,
|
| 323 |
+
"loss": 0.9803,
|
| 324 |
+
"step": 90
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 0.9484536082474226,
|
| 328 |
+
"grad_norm": 1.1390422582626343,
|
| 329 |
+
"learning_rate": 2e-05,
|
| 330 |
+
"loss": 1.1259,
|
| 331 |
+
"step": 92
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 0.9690721649484536,
|
| 335 |
+
"grad_norm": 1.318390130996704,
|
| 336 |
+
"learning_rate": 2e-05,
|
| 337 |
+
"loss": 0.9855,
|
| 338 |
+
"step": 94
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"epoch": 0.9896907216494846,
|
| 342 |
+
"grad_norm": 1.238763451576233,
|
| 343 |
+
"learning_rate": 2e-05,
|
| 344 |
+
"loss": 0.4494,
|
| 345 |
+
"step": 96
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"epoch": 1.0,
|
| 349 |
+
"step": 97,
|
| 350 |
+
"total_flos": 1.5309291128881152e+16,
|
| 351 |
+
"train_loss": 0.9762444643630195,
|
| 352 |
+
"train_runtime": 351.1873,
|
| 353 |
+
"train_samples_per_second": 1.105,
|
| 354 |
+
"train_steps_per_second": 0.276
|
| 355 |
+
}
|
| 356 |
+
],
|
| 357 |
+
"logging_steps": 2,
|
| 358 |
+
"max_steps": 97,
|
| 359 |
+
"num_input_tokens_seen": 0,
|
| 360 |
+
"num_train_epochs": 1,
|
| 361 |
+
"save_steps": 500,
|
| 362 |
+
"stateful_callbacks": {
|
| 363 |
+
"TrainerControl": {
|
| 364 |
+
"args": {
|
| 365 |
+
"should_epoch_stop": false,
|
| 366 |
+
"should_evaluate": false,
|
| 367 |
+
"should_log": false,
|
| 368 |
+
"should_save": false,
|
| 369 |
+
"should_training_stop": false
|
| 370 |
+
},
|
| 371 |
+
"attributes": {}
|
| 372 |
+
}
|
| 373 |
+
},
|
| 374 |
+
"total_flos": 1.5309291128881152e+16,
|
| 375 |
+
"train_batch_size": 1,
|
| 376 |
+
"trial_name": null,
|
| 377 |
+
"trial_params": null
|
| 378 |
+
}
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3fc77f4acfc80e842f0c49ca07c0518a93812788236835cee6b47f88c1ce363a
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:40d3dc701382da26808567ded7104ae77026cab4a5a03ca85a9b7408b648375f
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b6fbb24e8df10767d6e48cd3bbe7abb171912af63f525f6830f724827e262f42
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fc952fe4c8ad4ef2ce347a0fec31859d5ea7286b61fbdbd33f62cc59fc6a1db8
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6b91e5497d58df5e6c88cfd96c80538d3b0489625e9c1844bc7134fc1819c132
|
| 3 |
+
size 791576546
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:41b1e011c681191accbfee628a5d5367b45a57bb25b21cc0fb2dcc706842da26
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:909ae260c1b95d5769bb16ca3cc0ea471dd2f0a32764d5950ed946519bbdfa90
|
| 3 |
+
size 791576546
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:deb5bbcfb7d060129e306207445c42c34099a7c3d3df2de2dd80ec00e2a81b15
|
| 3 |
+
size 791576546
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/3_trainer_state.json
ADDED
|
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 97,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.020618556701030927,
|
| 13 |
+
"grad_norm": 1.7036350965499878,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 1.1502,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.041237113402061855,
|
| 20 |
+
"grad_norm": 0.733051061630249,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 0.3144,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.061855670103092786,
|
| 27 |
+
"grad_norm": 3.021022081375122,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 1.523,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.08247422680412371,
|
| 34 |
+
"grad_norm": 0.6244669556617737,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 0.9215,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.10309278350515463,
|
| 41 |
+
"grad_norm": 1.675452470779419,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 1.2749,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.12371134020618557,
|
| 48 |
+
"grad_norm": 0.2071010321378708,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.2043,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.14432989690721648,
|
| 55 |
+
"grad_norm": 0.45165616273880005,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.2592,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.16494845360824742,
|
| 62 |
+
"grad_norm": 0.9154694080352783,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 1.0788,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.18556701030927836,
|
| 69 |
+
"grad_norm": 0.44769397377967834,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.672,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.20618556701030927,
|
| 76 |
+
"grad_norm": 1.001409649848938,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 0.7424,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.2268041237113402,
|
| 83 |
+
"grad_norm": 0.2860804498195648,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 0.7084,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.24742268041237114,
|
| 90 |
+
"grad_norm": 0.4580197036266327,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 1.1849,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.26804123711340205,
|
| 97 |
+
"grad_norm": 0.4905541241168976,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 0.7166,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.28865979381443296,
|
| 104 |
+
"grad_norm": 1.433603048324585,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 1.5247,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.30927835051546393,
|
| 111 |
+
"grad_norm": 2.386955738067627,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 0.8547,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.32989690721649484,
|
| 118 |
+
"grad_norm": 2.032757043838501,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 0.5488,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.35051546391752575,
|
| 125 |
+
"grad_norm": 1.0941940546035767,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 0.9551,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.3711340206185567,
|
| 132 |
+
"grad_norm": 1.461041808128357,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 1.1186,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.3917525773195876,
|
| 139 |
+
"grad_norm": 0.46292173862457275,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 0.9941,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.41237113402061853,
|
| 146 |
+
"grad_norm": 0.46523961424827576,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 0.99,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.4329896907216495,
|
| 153 |
+
"grad_norm": 0.5094814896583557,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 1.0255,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.4536082474226804,
|
| 160 |
+
"grad_norm": 3.24894642829895,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 1.3442,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.4742268041237113,
|
| 167 |
+
"grad_norm": 3.7670817375183105,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 1.279,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.4948453608247423,
|
| 174 |
+
"grad_norm": 2.1132254600524902,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 1.7001,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.5154639175257731,
|
| 181 |
+
"grad_norm": 0.593248724937439,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 0.8884,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.5360824742268041,
|
| 188 |
+
"grad_norm": 1.4078608751296997,
|
| 189 |
+
"learning_rate": 2e-05,
|
| 190 |
+
"loss": 0.8091,
|
| 191 |
+
"step": 52
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.5567010309278351,
|
| 195 |
+
"grad_norm": 0.3955836892127991,
|
| 196 |
+
"learning_rate": 2e-05,
|
| 197 |
+
"loss": 0.3546,
|
| 198 |
+
"step": 54
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.5773195876288659,
|
| 202 |
+
"grad_norm": 0.9244691729545593,
|
| 203 |
+
"learning_rate": 2e-05,
|
| 204 |
+
"loss": 1.437,
|
| 205 |
+
"step": 56
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.5979381443298969,
|
| 209 |
+
"grad_norm": 0.4643814265727997,
|
| 210 |
+
"learning_rate": 2e-05,
|
| 211 |
+
"loss": 0.881,
|
| 212 |
+
"step": 58
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.6185567010309279,
|
| 216 |
+
"grad_norm": 0.41819217801094055,
|
| 217 |
+
"learning_rate": 2e-05,
|
| 218 |
+
"loss": 0.6405,
|
| 219 |
+
"step": 60
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.6391752577319587,
|
| 223 |
+
"grad_norm": 0.7221791744232178,
|
| 224 |
+
"learning_rate": 2e-05,
|
| 225 |
+
"loss": 0.7172,
|
| 226 |
+
"step": 62
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.6597938144329897,
|
| 230 |
+
"grad_norm": 0.44450488686561584,
|
| 231 |
+
"learning_rate": 2e-05,
|
| 232 |
+
"loss": 0.7196,
|
| 233 |
+
"step": 64
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.6804123711340206,
|
| 237 |
+
"grad_norm": 1.463599443435669,
|
| 238 |
+
"learning_rate": 2e-05,
|
| 239 |
+
"loss": 1.4753,
|
| 240 |
+
"step": 66
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.7010309278350515,
|
| 244 |
+
"grad_norm": 0.2606247365474701,
|
| 245 |
+
"learning_rate": 2e-05,
|
| 246 |
+
"loss": 0.8865,
|
| 247 |
+
"step": 68
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.7216494845360825,
|
| 251 |
+
"grad_norm": 0.3822779655456543,
|
| 252 |
+
"learning_rate": 2e-05,
|
| 253 |
+
"loss": 0.7243,
|
| 254 |
+
"step": 70
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.7422680412371134,
|
| 258 |
+
"grad_norm": 0.314899206161499,
|
| 259 |
+
"learning_rate": 2e-05,
|
| 260 |
+
"loss": 0.9963,
|
| 261 |
+
"step": 72
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.7628865979381443,
|
| 265 |
+
"grad_norm": 0.47478482127189636,
|
| 266 |
+
"learning_rate": 2e-05,
|
| 267 |
+
"loss": 0.7358,
|
| 268 |
+
"step": 74
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.7835051546391752,
|
| 272 |
+
"grad_norm": 3.180941581726074,
|
| 273 |
+
"learning_rate": 2e-05,
|
| 274 |
+
"loss": 1.546,
|
| 275 |
+
"step": 76
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.8041237113402062,
|
| 279 |
+
"grad_norm": 0.392020046710968,
|
| 280 |
+
"learning_rate": 2e-05,
|
| 281 |
+
"loss": 0.4863,
|
| 282 |
+
"step": 78
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.8247422680412371,
|
| 286 |
+
"grad_norm": 0.4663325548171997,
|
| 287 |
+
"learning_rate": 2e-05,
|
| 288 |
+
"loss": 0.3802,
|
| 289 |
+
"step": 80
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.845360824742268,
|
| 293 |
+
"grad_norm": 0.5237138867378235,
|
| 294 |
+
"learning_rate": 2e-05,
|
| 295 |
+
"loss": 0.5043,
|
| 296 |
+
"step": 82
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.865979381443299,
|
| 300 |
+
"grad_norm": 0.8400606513023376,
|
| 301 |
+
"learning_rate": 2e-05,
|
| 302 |
+
"loss": 0.8792,
|
| 303 |
+
"step": 84
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.8865979381443299,
|
| 307 |
+
"grad_norm": 0.3281240463256836,
|
| 308 |
+
"learning_rate": 2e-05,
|
| 309 |
+
"loss": 0.735,
|
| 310 |
+
"step": 86
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.9072164948453608,
|
| 314 |
+
"grad_norm": 1.076886773109436,
|
| 315 |
+
"learning_rate": 2e-05,
|
| 316 |
+
"loss": 0.9669,
|
| 317 |
+
"step": 88
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.9278350515463918,
|
| 321 |
+
"grad_norm": 0.6442875266075134,
|
| 322 |
+
"learning_rate": 2e-05,
|
| 323 |
+
"loss": 0.6857,
|
| 324 |
+
"step": 90
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 0.9484536082474226,
|
| 328 |
+
"grad_norm": 1.3491824865341187,
|
| 329 |
+
"learning_rate": 2e-05,
|
| 330 |
+
"loss": 0.7857,
|
| 331 |
+
"step": 92
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 0.9690721649484536,
|
| 335 |
+
"grad_norm": 0.4119647443294525,
|
| 336 |
+
"learning_rate": 2e-05,
|
| 337 |
+
"loss": 0.5265,
|
| 338 |
+
"step": 94
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"epoch": 0.9896907216494846,
|
| 342 |
+
"grad_norm": 0.9270315766334534,
|
| 343 |
+
"learning_rate": 2e-05,
|
| 344 |
+
"loss": 1.2191,
|
| 345 |
+
"step": 96
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"epoch": 1.0,
|
| 349 |
+
"step": 97,
|
| 350 |
+
"total_flos": 2.081554905117491e+16,
|
| 351 |
+
"train_loss": 0.8912827993176647,
|
| 352 |
+
"train_runtime": 342.0553,
|
| 353 |
+
"train_samples_per_second": 1.134,
|
| 354 |
+
"train_steps_per_second": 0.284
|
| 355 |
+
}
|
| 356 |
+
],
|
| 357 |
+
"logging_steps": 2,
|
| 358 |
+
"max_steps": 97,
|
| 359 |
+
"num_input_tokens_seen": 0,
|
| 360 |
+
"num_train_epochs": 1,
|
| 361 |
+
"save_steps": 500,
|
| 362 |
+
"stateful_callbacks": {
|
| 363 |
+
"TrainerControl": {
|
| 364 |
+
"args": {
|
| 365 |
+
"should_epoch_stop": false,
|
| 366 |
+
"should_evaluate": false,
|
| 367 |
+
"should_log": false,
|
| 368 |
+
"should_save": false,
|
| 369 |
+
"should_training_stop": false
|
| 370 |
+
},
|
| 371 |
+
"attributes": {}
|
| 372 |
+
}
|
| 373 |
+
},
|
| 374 |
+
"total_flos": 2.081554905117491e+16,
|
| 375 |
+
"train_batch_size": 1,
|
| 376 |
+
"trial_name": null,
|
| 377 |
+
"trial_params": null
|
| 378 |
+
}
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:76222740e5b1eb701df8242a79fa98659c625a40a9d557ff4ee9ab9793e6a2de
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:af4abb1969b8895e24009b3474b99f4b40c4a44b2056fb6935f1f5c9fbd2b7de
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aed5d456627b3b084bed1c6c4f40781201a9314e9ab4c7fcd0045aa87653ad93
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:974c3d90766732a37f45318d13fa049f577025980b07e7b331d04ffe0c7db571
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:369ee76d02588c766be21d8792a4eb5096425d0505d617ed47d36b31ab2a7489
|
| 3 |
+
size 791576546
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:724a4cb19d6636ec4851b473dc93dfec177f1eb18c9610b52336f58fe4ac864e
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:296b10e3bc3a9fa0d2250f938b2eecca029ba36b1dced5a077b832d32301cc42
|
| 3 |
+
size 791576546
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2799db2855c1d10a215ade45151d9ac24e0793f29bfafc62c45ef363f459e35e
|
| 3 |
+
size 791576546
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/4_trainer_state.json
ADDED
|
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 97,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.020618556701030927,
|
| 13 |
+
"grad_norm": 1.3807806968688965,
|
| 14 |
+
"learning_rate": 2e-05,
|
| 15 |
+
"loss": 1.2215,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.041237113402061855,
|
| 20 |
+
"grad_norm": 5.47310209274292,
|
| 21 |
+
"learning_rate": 2e-05,
|
| 22 |
+
"loss": 1.7948,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.061855670103092786,
|
| 27 |
+
"grad_norm": 4.091208457946777,
|
| 28 |
+
"learning_rate": 2e-05,
|
| 29 |
+
"loss": 1.4245,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.08247422680412371,
|
| 34 |
+
"grad_norm": 2.522712230682373,
|
| 35 |
+
"learning_rate": 2e-05,
|
| 36 |
+
"loss": 1.2455,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.10309278350515463,
|
| 41 |
+
"grad_norm": 2.400545835494995,
|
| 42 |
+
"learning_rate": 2e-05,
|
| 43 |
+
"loss": 1.4473,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.12371134020618557,
|
| 48 |
+
"grad_norm": 2.147249937057495,
|
| 49 |
+
"learning_rate": 2e-05,
|
| 50 |
+
"loss": 0.648,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.14432989690721648,
|
| 55 |
+
"grad_norm": 0.6440826654434204,
|
| 56 |
+
"learning_rate": 2e-05,
|
| 57 |
+
"loss": 0.1022,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.16494845360824742,
|
| 62 |
+
"grad_norm": 1.4986751079559326,
|
| 63 |
+
"learning_rate": 2e-05,
|
| 64 |
+
"loss": 1.5813,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.18556701030927836,
|
| 69 |
+
"grad_norm": 2.2775418758392334,
|
| 70 |
+
"learning_rate": 2e-05,
|
| 71 |
+
"loss": 0.7531,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.20618556701030927,
|
| 76 |
+
"grad_norm": 3.2079670429229736,
|
| 77 |
+
"learning_rate": 2e-05,
|
| 78 |
+
"loss": 1.6908,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.2268041237113402,
|
| 83 |
+
"grad_norm": 2.16922926902771,
|
| 84 |
+
"learning_rate": 2e-05,
|
| 85 |
+
"loss": 1.5928,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.24742268041237114,
|
| 90 |
+
"grad_norm": 2.276587724685669,
|
| 91 |
+
"learning_rate": 2e-05,
|
| 92 |
+
"loss": 1.4918,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.26804123711340205,
|
| 97 |
+
"grad_norm": 1.4131243228912354,
|
| 98 |
+
"learning_rate": 2e-05,
|
| 99 |
+
"loss": 1.7804,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.28865979381443296,
|
| 104 |
+
"grad_norm": 3.375476121902466,
|
| 105 |
+
"learning_rate": 2e-05,
|
| 106 |
+
"loss": 1.9648,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.30927835051546393,
|
| 111 |
+
"grad_norm": 2.6980457305908203,
|
| 112 |
+
"learning_rate": 2e-05,
|
| 113 |
+
"loss": 1.9027,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.32989690721649484,
|
| 118 |
+
"grad_norm": 1.1185120344161987,
|
| 119 |
+
"learning_rate": 2e-05,
|
| 120 |
+
"loss": 1.2945,
|
| 121 |
+
"step": 32
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.35051546391752575,
|
| 125 |
+
"grad_norm": 1.5742499828338623,
|
| 126 |
+
"learning_rate": 2e-05,
|
| 127 |
+
"loss": 2.3652,
|
| 128 |
+
"step": 34
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.3711340206185567,
|
| 132 |
+
"grad_norm": 1.6757092475891113,
|
| 133 |
+
"learning_rate": 2e-05,
|
| 134 |
+
"loss": 1.5789,
|
| 135 |
+
"step": 36
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.3917525773195876,
|
| 139 |
+
"grad_norm": 1.5958377122879028,
|
| 140 |
+
"learning_rate": 2e-05,
|
| 141 |
+
"loss": 1.4983,
|
| 142 |
+
"step": 38
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.41237113402061853,
|
| 146 |
+
"grad_norm": 1.3994206190109253,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 0.8934,
|
| 149 |
+
"step": 40
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.4329896907216495,
|
| 153 |
+
"grad_norm": 0.8964346647262573,
|
| 154 |
+
"learning_rate": 2e-05,
|
| 155 |
+
"loss": 0.7392,
|
| 156 |
+
"step": 42
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.4536082474226804,
|
| 160 |
+
"grad_norm": 1.5910592079162598,
|
| 161 |
+
"learning_rate": 2e-05,
|
| 162 |
+
"loss": 0.8057,
|
| 163 |
+
"step": 44
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.4742268041237113,
|
| 167 |
+
"grad_norm": 0.8048728704452515,
|
| 168 |
+
"learning_rate": 2e-05,
|
| 169 |
+
"loss": 0.7514,
|
| 170 |
+
"step": 46
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.4948453608247423,
|
| 174 |
+
"grad_norm": 3.0406651496887207,
|
| 175 |
+
"learning_rate": 2e-05,
|
| 176 |
+
"loss": 1.271,
|
| 177 |
+
"step": 48
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.5154639175257731,
|
| 181 |
+
"grad_norm": 2.1475746631622314,
|
| 182 |
+
"learning_rate": 2e-05,
|
| 183 |
+
"loss": 1.3316,
|
| 184 |
+
"step": 50
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.5360824742268041,
|
| 188 |
+
"grad_norm": 1.973347544670105,
|
| 189 |
+
"learning_rate": 2e-05,
|
| 190 |
+
"loss": 0.9409,
|
| 191 |
+
"step": 52
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.5567010309278351,
|
| 195 |
+
"grad_norm": 0.5019800662994385,
|
| 196 |
+
"learning_rate": 2e-05,
|
| 197 |
+
"loss": 0.5754,
|
| 198 |
+
"step": 54
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.5773195876288659,
|
| 202 |
+
"grad_norm": 2.1202926635742188,
|
| 203 |
+
"learning_rate": 2e-05,
|
| 204 |
+
"loss": 1.3977,
|
| 205 |
+
"step": 56
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.5979381443298969,
|
| 209 |
+
"grad_norm": 1.2852915525436401,
|
| 210 |
+
"learning_rate": 2e-05,
|
| 211 |
+
"loss": 1.6778,
|
| 212 |
+
"step": 58
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.6185567010309279,
|
| 216 |
+
"grad_norm": 1.093619465827942,
|
| 217 |
+
"learning_rate": 2e-05,
|
| 218 |
+
"loss": 0.8222,
|
| 219 |
+
"step": 60
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.6391752577319587,
|
| 223 |
+
"grad_norm": 0.7490342259407043,
|
| 224 |
+
"learning_rate": 2e-05,
|
| 225 |
+
"loss": 0.6013,
|
| 226 |
+
"step": 62
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.6597938144329897,
|
| 230 |
+
"grad_norm": 2.6893742084503174,
|
| 231 |
+
"learning_rate": 2e-05,
|
| 232 |
+
"loss": 1.2905,
|
| 233 |
+
"step": 64
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.6804123711340206,
|
| 237 |
+
"grad_norm": 3.0177910327911377,
|
| 238 |
+
"learning_rate": 2e-05,
|
| 239 |
+
"loss": 1.7748,
|
| 240 |
+
"step": 66
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.7010309278350515,
|
| 244 |
+
"grad_norm": 1.123380422592163,
|
| 245 |
+
"learning_rate": 2e-05,
|
| 246 |
+
"loss": 0.8262,
|
| 247 |
+
"step": 68
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.7216494845360825,
|
| 251 |
+
"grad_norm": 1.6035982370376587,
|
| 252 |
+
"learning_rate": 2e-05,
|
| 253 |
+
"loss": 0.798,
|
| 254 |
+
"step": 70
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.7422680412371134,
|
| 258 |
+
"grad_norm": 0.7902660965919495,
|
| 259 |
+
"learning_rate": 2e-05,
|
| 260 |
+
"loss": 1.0709,
|
| 261 |
+
"step": 72
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.7628865979381443,
|
| 265 |
+
"grad_norm": 1.8285802602767944,
|
| 266 |
+
"learning_rate": 2e-05,
|
| 267 |
+
"loss": 1.0743,
|
| 268 |
+
"step": 74
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.7835051546391752,
|
| 272 |
+
"grad_norm": 2.9130942821502686,
|
| 273 |
+
"learning_rate": 2e-05,
|
| 274 |
+
"loss": 2.0891,
|
| 275 |
+
"step": 76
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.8041237113402062,
|
| 279 |
+
"grad_norm": 1.4661481380462646,
|
| 280 |
+
"learning_rate": 2e-05,
|
| 281 |
+
"loss": 1.1128,
|
| 282 |
+
"step": 78
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.8247422680412371,
|
| 286 |
+
"grad_norm": 2.301260471343994,
|
| 287 |
+
"learning_rate": 2e-05,
|
| 288 |
+
"loss": 1.6207,
|
| 289 |
+
"step": 80
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.845360824742268,
|
| 293 |
+
"grad_norm": 1.637123465538025,
|
| 294 |
+
"learning_rate": 2e-05,
|
| 295 |
+
"loss": 1.3459,
|
| 296 |
+
"step": 82
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.865979381443299,
|
| 300 |
+
"grad_norm": 1.056007742881775,
|
| 301 |
+
"learning_rate": 2e-05,
|
| 302 |
+
"loss": 1.2215,
|
| 303 |
+
"step": 84
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.8865979381443299,
|
| 307 |
+
"grad_norm": 1.2907710075378418,
|
| 308 |
+
"learning_rate": 2e-05,
|
| 309 |
+
"loss": 1.6295,
|
| 310 |
+
"step": 86
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.9072164948453608,
|
| 314 |
+
"grad_norm": 1.3023312091827393,
|
| 315 |
+
"learning_rate": 2e-05,
|
| 316 |
+
"loss": 1.0457,
|
| 317 |
+
"step": 88
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.9278350515463918,
|
| 321 |
+
"grad_norm": 0.39116430282592773,
|
| 322 |
+
"learning_rate": 2e-05,
|
| 323 |
+
"loss": 1.1983,
|
| 324 |
+
"step": 90
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 0.9484536082474226,
|
| 328 |
+
"grad_norm": 0.9639070630073547,
|
| 329 |
+
"learning_rate": 2e-05,
|
| 330 |
+
"loss": 1.1595,
|
| 331 |
+
"step": 92
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 0.9690721649484536,
|
| 335 |
+
"grad_norm": 1.6161588430404663,
|
| 336 |
+
"learning_rate": 2e-05,
|
| 337 |
+
"loss": 1.2601,
|
| 338 |
+
"step": 94
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"epoch": 0.9896907216494846,
|
| 342 |
+
"grad_norm": 2.380856513977051,
|
| 343 |
+
"learning_rate": 2e-05,
|
| 344 |
+
"loss": 0.6827,
|
| 345 |
+
"step": 96
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"epoch": 1.0,
|
| 349 |
+
"step": 97,
|
| 350 |
+
"total_flos": 2.074250449204019e+16,
|
| 351 |
+
"train_loss": 1.2486689803526574,
|
| 352 |
+
"train_runtime": 347.5167,
|
| 353 |
+
"train_samples_per_second": 1.116,
|
| 354 |
+
"train_steps_per_second": 0.279
|
| 355 |
+
}
|
| 356 |
+
],
|
| 357 |
+
"logging_steps": 2,
|
| 358 |
+
"max_steps": 97,
|
| 359 |
+
"num_input_tokens_seen": 0,
|
| 360 |
+
"num_train_epochs": 1,
|
| 361 |
+
"save_steps": 500,
|
| 362 |
+
"stateful_callbacks": {
|
| 363 |
+
"TrainerControl": {
|
| 364 |
+
"args": {
|
| 365 |
+
"should_epoch_stop": false,
|
| 366 |
+
"should_evaluate": false,
|
| 367 |
+
"should_log": false,
|
| 368 |
+
"should_save": false,
|
| 369 |
+
"should_training_stop": false
|
| 370 |
+
},
|
| 371 |
+
"attributes": {}
|
| 372 |
+
}
|
| 373 |
+
},
|
| 374 |
+
"total_flos": 2.074250449204019e+16,
|
| 375 |
+
"train_batch_size": 1,
|
| 376 |
+
"trial_name": null,
|
| 377 |
+
"trial_params": null
|
| 378 |
+
}
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/5_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09cd08a35ceaf65f8b4b8c2b3943b855c5e7348c334be564b82fa3d4dc95bad8
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/5_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d6ce216ab0402be87c84d6af509a84f0ce168cf22217ee6eb8263e51dd6142d6
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/5_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0970a229f1f57205586ba91eed0371c4af2fe6fbfaa4a6ec8f6aa437d820ac8d
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/5_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b873fa30a8c5d382069a0c536ec17db452e64f48a4c4c7b3c9dafca48b601f1d
|
| 3 |
+
size 791578182
|
client_states_feddualMultipqfullfreezeA2_NEWAensureOrth_Bcca_T05_freq10_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr97_T0125_decay099_SEED3/5_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:99c23eb08adf5a7a45e3b25bc3f6dd68703808836dcdddf7a33aa4d8121f930d
|
| 3 |
+
size 791576546
|