thkim0305 commited on May 20, 2025

Commit

90c20dc

verified ·

1 Parent(s): f1e4490

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round10.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round12.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round15.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round17.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round2.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round20.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round5.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round7.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_trainer_state.json +364 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round10.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round12.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round15.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round17.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round2.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round20.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round5.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round7.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_trainer_state.json +364 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round10.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round12.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round15.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round17.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round2.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round20.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round5.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round7.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_trainer_state.json +364 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round10.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round12.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round15.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round17.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round2.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round20.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round5.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round7.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_trainer_state.json +364 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round10.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round12.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round15.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round17.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round2.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round20.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round5.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round7.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_trainer_state.json +364 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/5_client_model_round10.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/5_client_model_round12.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/5_client_model_round15.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/5_client_model_round17.pth +3 -0
client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/5_client_model_round2.pth +3 -0

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:857e6d421f7321c12fe753e5d706f2fed3c56cd89f87855a28a6dd7d8fd927af
+size 369838470

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea0b0777d782865d3402d1597c627ad09d836fbfc86ec5346b2269a5db864c8a
+size 369838470

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:502cb07864fd3153066c9bb6e916b9a3d542c551fbd8df4e1f23c5c2a2eb74f6
+size 369838470

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:37d161c3bdf3434da4627ca3c8c465d70c023552bd189055da3a9c1611ae3232
+size 369838470

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8abef3ac8894468d37e796f4790964bffef032cdd9365e7448f55a72c4ad1c67
+size 369837282

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:763351182d6a05485fa69a698f1f7062d6104cb6111340d7d7abe32fa64ad8a3
+size 369838470

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:467f699937a5cd7720442e09e5b47ea3eaf77b4604e673f05ec68159a694011b
+size 369837282

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:550bfb4401128e0b71618eafdc31c528052668b811e502b8814e617de6aee7d2
+size 369837282

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/0_trainer_state.json ADDED Viewed

	@@ -0,0 +1,364 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 92,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.021739130434782608,
+      "grad_norm": 6.09108304977417,
+      "learning_rate": 2e-05,
+      "loss": 0.5907,
+      "step": 2
+    },
+    {
+      "epoch": 0.043478260869565216,
+      "grad_norm": 3.6769254207611084,
+      "learning_rate": 2e-05,
+      "loss": 0.9602,
+      "step": 4
+    },
+    {
+      "epoch": 0.06521739130434782,
+      "grad_norm": 0.5363268256187439,
+      "learning_rate": 2e-05,
+      "loss": 0.036,
+      "step": 6
+    },
+    {
+      "epoch": 0.08695652173913043,
+      "grad_norm": 0.43710896372795105,
+      "learning_rate": 2e-05,
+      "loss": 0.1849,
+      "step": 8
+    },
+    {
+      "epoch": 0.10869565217391304,
+      "grad_norm": 2.068805694580078,
+      "learning_rate": 2e-05,
+      "loss": 0.1805,
+      "step": 10
+    },
+    {
+      "epoch": 0.13043478260869565,
+      "grad_norm": 1.2317078113555908,
+      "learning_rate": 2e-05,
+      "loss": 0.8115,
+      "step": 12
+    },
+    {
+      "epoch": 0.15217391304347827,
+      "grad_norm": 2.86909818649292,
+      "learning_rate": 2e-05,
+      "loss": 0.3496,
+      "step": 14
+    },
+    {
+      "epoch": 0.17391304347826086,
+      "grad_norm": 1.2622524499893188,
+      "learning_rate": 2e-05,
+      "loss": 0.2863,
+      "step": 16
+    },
+    {
+      "epoch": 0.1956521739130435,
+      "grad_norm": 4.501521587371826,
+      "learning_rate": 2e-05,
+      "loss": 0.2824,
+      "step": 18
+    },
+    {
+      "epoch": 0.21739130434782608,
+      "grad_norm": 3.231552839279175,
+      "learning_rate": 2e-05,
+      "loss": 0.2514,
+      "step": 20
+    },
+    {
+      "epoch": 0.2391304347826087,
+      "grad_norm": 1.5159425735473633,
+      "learning_rate": 2e-05,
+      "loss": 0.4057,
+      "step": 22
+    },
+    {
+      "epoch": 0.2608695652173913,
+      "grad_norm": 7.557904243469238,
+      "learning_rate": 2e-05,
+      "loss": 2.476,
+      "step": 24
+    },
+    {
+      "epoch": 0.2826086956521739,
+      "grad_norm": 2.5409963130950928,
+      "learning_rate": 2e-05,
+      "loss": 1.2994,
+      "step": 26
+    },
+    {
+      "epoch": 0.30434782608695654,
+      "grad_norm": 0.06883285939693451,
+      "learning_rate": 2e-05,
+      "loss": 0.0905,
+      "step": 28
+    },
+    {
+      "epoch": 0.32608695652173914,
+      "grad_norm": 3.767472267150879,
+      "learning_rate": 2e-05,
+      "loss": 0.5633,
+      "step": 30
+    },
+    {
+      "epoch": 0.34782608695652173,
+      "grad_norm": 2.7977592945098877,
+      "learning_rate": 2e-05,
+      "loss": 0.1898,
+      "step": 32
+    },
+    {
+      "epoch": 0.3695652173913043,
+      "grad_norm": 2.504427194595337,
+      "learning_rate": 2e-05,
+      "loss": 0.7027,
+      "step": 34
+    },
+    {
+      "epoch": 0.391304347826087,
+      "grad_norm": 0.23567438125610352,
+      "learning_rate": 2e-05,
+      "loss": 0.0205,
+      "step": 36
+    },
+    {
+      "epoch": 0.41304347826086957,
+      "grad_norm": 6.038938522338867,
+      "learning_rate": 2e-05,
+      "loss": 0.9922,
+      "step": 38
+    },
+    {
+      "epoch": 0.43478260869565216,
+      "grad_norm": 3.7713494300842285,
+      "learning_rate": 2e-05,
+      "loss": 1.2868,
+      "step": 40
+    },
+    {
+      "epoch": 0.45652173913043476,
+      "grad_norm": 1.8124828338623047,
+      "learning_rate": 2e-05,
+      "loss": 1.7291,
+      "step": 42
+    },
+    {
+      "epoch": 0.4782608695652174,
+      "grad_norm": 5.478545665740967,
+      "learning_rate": 2e-05,
+      "loss": 0.7223,
+      "step": 44
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 0.34744882583618164,
+      "learning_rate": 2e-05,
+      "loss": 0.3988,
+      "step": 46
+    },
+    {
+      "epoch": 0.5217391304347826,
+      "grad_norm": 1.2364314794540405,
+      "learning_rate": 2e-05,
+      "loss": 0.114,
+      "step": 48
+    },
+    {
+      "epoch": 0.5434782608695652,
+      "grad_norm": 5.041813850402832,
+      "learning_rate": 2e-05,
+      "loss": 0.668,
+      "step": 50
+    },
+    {
+      "epoch": 0.5652173913043478,
+      "grad_norm": 7.402784824371338,
+      "learning_rate": 2e-05,
+      "loss": 1.1368,
+      "step": 52
+    },
+    {
+      "epoch": 0.5869565217391305,
+      "grad_norm": 0.8266273736953735,
+      "learning_rate": 2e-05,
+      "loss": 0.0698,
+      "step": 54
+    },
+    {
+      "epoch": 0.6086956521739131,
+      "grad_norm": 2.409656286239624,
+      "learning_rate": 2e-05,
+      "loss": 1.1547,
+      "step": 56
+    },
+    {
+      "epoch": 0.6304347826086957,
+      "grad_norm": 0.29032644629478455,
+      "learning_rate": 2e-05,
+      "loss": 1.6191,
+      "step": 58
+    },
+    {
+      "epoch": 0.6521739130434783,
+      "grad_norm": 2.472151517868042,
+      "learning_rate": 2e-05,
+      "loss": 0.2694,
+      "step": 60
+    },
+    {
+      "epoch": 0.6739130434782609,
+      "grad_norm": 3.610250473022461,
+      "learning_rate": 2e-05,
+      "loss": 0.82,
+      "step": 62
+    },
+    {
+      "epoch": 0.6956521739130435,
+      "grad_norm": 1.7341822385787964,
+      "learning_rate": 2e-05,
+      "loss": 0.6236,
+      "step": 64
+    },
+    {
+      "epoch": 0.717391304347826,
+      "grad_norm": 0.2348906695842743,
+      "learning_rate": 2e-05,
+      "loss": 0.3063,
+      "step": 66
+    },
+    {
+      "epoch": 0.7391304347826086,
+      "grad_norm": 4.811798572540283,
+      "learning_rate": 2e-05,
+      "loss": 0.9939,
+      "step": 68
+    },
+    {
+      "epoch": 0.7608695652173914,
+      "grad_norm": 1.6772363185882568,
+      "learning_rate": 2e-05,
+      "loss": 0.1621,
+      "step": 70
+    },
+    {
+      "epoch": 0.782608695652174,
+      "grad_norm": 5.120459079742432,
+      "learning_rate": 2e-05,
+      "loss": 1.1419,
+      "step": 72
+    },
+    {
+      "epoch": 0.8043478260869565,
+      "grad_norm": 0.06929455697536469,
+      "learning_rate": 2e-05,
+      "loss": 0.2167,
+      "step": 74
+    },
+    {
+      "epoch": 0.8260869565217391,
+      "grad_norm": 2.0615053176879883,
+      "learning_rate": 2e-05,
+      "loss": 0.2673,
+      "step": 76
+    },
+    {
+      "epoch": 0.8478260869565217,
+      "grad_norm": 2.242427110671997,
+      "learning_rate": 2e-05,
+      "loss": 0.1578,
+      "step": 78
+    },
+    {
+      "epoch": 0.8695652173913043,
+      "grad_norm": 4.344549179077148,
+      "learning_rate": 2e-05,
+      "loss": 0.953,
+      "step": 80
+    },
+    {
+      "epoch": 0.8913043478260869,
+      "grad_norm": 3.4479851722717285,
+      "learning_rate": 2e-05,
+      "loss": 0.5008,
+      "step": 82
+    },
+    {
+      "epoch": 0.9130434782608695,
+      "grad_norm": 4.703179836273193,
+      "learning_rate": 2e-05,
+      "loss": 1.2355,
+      "step": 84
+    },
+    {
+      "epoch": 0.9347826086956522,
+      "grad_norm": 3.625882387161255,
+      "learning_rate": 2e-05,
+      "loss": 0.7881,
+      "step": 86
+    },
+    {
+      "epoch": 0.9565217391304348,
+      "grad_norm": 0.7072364687919617,
+      "learning_rate": 2e-05,
+      "loss": 0.5925,
+      "step": 88
+    },
+    {
+      "epoch": 0.9782608695652174,
+      "grad_norm": 1.2554280757904053,
+      "learning_rate": 2e-05,
+      "loss": 0.2273,
+      "step": 90
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 2.71370530128479,
+      "learning_rate": 2e-05,
+      "loss": 0.1592,
+      "step": 92
+    },
+    {
+      "epoch": 1.0,
+      "step": 92,
+      "total_flos": 2016868504174592.0,
+      "train_loss": 0.6301771117293317,
+      "train_runtime": 196.5079,
+      "train_samples_per_second": 1.873,
+      "train_steps_per_second": 0.468
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 92,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2016868504174592.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:84b137fdf81538eabaa3a8e44061ba978549e8e0f750d39ed8894884abd24e61
+size 369838470

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5af89fb7691ce8c2200477f096473849c5320b579dcfc6ca95024281e3a266b6
+size 369838470

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:81ab300618708290e4d43828e38b50877edf7bb3edc2f66910325001c1ec26db
+size 369838470

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:39e76d8d36cfbaa81321d35f83914bc805f951764c7c8c1eb2d9f7d77ef7a239
+size 369838470

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a31502af9bb7a21b0bb183396d7c5a09d3ac8fa8bba158a9f2309a13a95dd8c2
+size 369837282

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7cd06ed2f69855ac79ade36194b719695cb03195a47d8ad3db0bdd40e66a1324
+size 369838470

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d4f896f688718a2aa7fcab54b474461aa6bcd4c8d2592b4f7acc3ce76a3f131a
+size 369837282

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:670739b0e450b22b0ba067dd558ccb7491d152313a78fa9691de3f1658719099
+size 369837282

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/1_trainer_state.json ADDED Viewed

	@@ -0,0 +1,364 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 92,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.021739130434782608,
+      "grad_norm": 0.17322731018066406,
+      "learning_rate": 2e-05,
+      "loss": 0.007,
+      "step": 2
+    },
+    {
+      "epoch": 0.043478260869565216,
+      "grad_norm": 0.4034227430820465,
+      "learning_rate": 2e-05,
+      "loss": 0.0669,
+      "step": 4
+    },
+    {
+      "epoch": 0.06521739130434782,
+      "grad_norm": 0.047655943781137466,
+      "learning_rate": 2e-05,
+      "loss": 0.014,
+      "step": 6
+    },
+    {
+      "epoch": 0.08695652173913043,
+      "grad_norm": 0.029691645875573158,
+      "learning_rate": 2e-05,
+      "loss": 0.0985,
+      "step": 8
+    },
+    {
+      "epoch": 0.10869565217391304,
+      "grad_norm": 0.0031738663092255592,
+      "learning_rate": 2e-05,
+      "loss": 0.0029,
+      "step": 10
+    },
+    {
+      "epoch": 0.13043478260869565,
+      "grad_norm": 0.7081104516983032,
+      "learning_rate": 2e-05,
+      "loss": 0.1851,
+      "step": 12
+    },
+    {
+      "epoch": 0.15217391304347827,
+      "grad_norm": 0.41896992921829224,
+      "learning_rate": 2e-05,
+      "loss": 0.0237,
+      "step": 14
+    },
+    {
+      "epoch": 0.17391304347826086,
+      "grad_norm": 0.48387590050697327,
+      "learning_rate": 2e-05,
+      "loss": 0.0193,
+      "step": 16
+    },
+    {
+      "epoch": 0.1956521739130435,
+      "grad_norm": 0.0010938448831439018,
+      "learning_rate": 2e-05,
+      "loss": 0.5725,
+      "step": 18
+    },
+    {
+      "epoch": 0.21739130434782608,
+      "grad_norm": 0.1128813624382019,
+      "learning_rate": 2e-05,
+      "loss": 0.0055,
+      "step": 20
+    },
+    {
+      "epoch": 0.2391304347826087,
+      "grad_norm": 0.004215009044855833,
+      "learning_rate": 2e-05,
+      "loss": 0.0004,
+      "step": 22
+    },
+    {
+      "epoch": 0.2608695652173913,
+      "grad_norm": 0.053580403327941895,
+      "learning_rate": 2e-05,
+      "loss": 0.0041,
+      "step": 24
+    },
+    {
+      "epoch": 0.2826086956521739,
+      "grad_norm": 0.08771532028913498,
+      "learning_rate": 2e-05,
+      "loss": 0.0955,
+      "step": 26
+    },
+    {
+      "epoch": 0.30434782608695654,
+      "grad_norm": 0.4550749361515045,
+      "learning_rate": 2e-05,
+      "loss": 0.0213,
+      "step": 28
+    },
+    {
+      "epoch": 0.32608695652173914,
+      "grad_norm": 2.2826590538024902,
+      "learning_rate": 2e-05,
+      "loss": 0.22,
+      "step": 30
+    },
+    {
+      "epoch": 0.34782608695652173,
+      "grad_norm": 0.0036860036198049784,
+      "learning_rate": 2e-05,
+      "loss": 0.0015,
+      "step": 32
+    },
+    {
+      "epoch": 0.3695652173913043,
+      "grad_norm": 0.7332060933113098,
+      "learning_rate": 2e-05,
+      "loss": 0.0539,
+      "step": 34
+    },
+    {
+      "epoch": 0.391304347826087,
+      "grad_norm": 0.15167345106601715,
+      "learning_rate": 2e-05,
+      "loss": 0.1528,
+      "step": 36
+    },
+    {
+      "epoch": 0.41304347826086957,
+      "grad_norm": 0.0022186979185789824,
+      "learning_rate": 2e-05,
+      "loss": 0.0634,
+      "step": 38
+    },
+    {
+      "epoch": 0.43478260869565216,
+      "grad_norm": 6.4484453201293945,
+      "learning_rate": 2e-05,
+      "loss": 1.7515,
+      "step": 40
+    },
+    {
+      "epoch": 0.45652173913043476,
+      "grad_norm": 0.00416164705529809,
+      "learning_rate": 2e-05,
+      "loss": 0.0016,
+      "step": 42
+    },
+    {
+      "epoch": 0.4782608695652174,
+      "grad_norm": 0.034967318177223206,
+      "learning_rate": 2e-05,
+      "loss": 0.0752,
+      "step": 44
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 0.011398707516491413,
+      "learning_rate": 2e-05,
+      "loss": 0.0103,
+      "step": 46
+    },
+    {
+      "epoch": 0.5217391304347826,
+      "grad_norm": 0.8181813955307007,
+      "learning_rate": 2e-05,
+      "loss": 0.0635,
+      "step": 48
+    },
+    {
+      "epoch": 0.5434782608695652,
+      "grad_norm": 0.030382607132196426,
+      "learning_rate": 2e-05,
+      "loss": 0.0022,
+      "step": 50
+    },
+    {
+      "epoch": 0.5652173913043478,
+      "grad_norm": 0.005472751799970865,
+      "learning_rate": 2e-05,
+      "loss": 0.4915,
+      "step": 52
+    },
+    {
+      "epoch": 0.5869565217391305,
+      "grad_norm": 0.012804504483938217,
+      "learning_rate": 2e-05,
+      "loss": 0.0789,
+      "step": 54
+    },
+    {
+      "epoch": 0.6086956521739131,
+      "grad_norm": 0.5662734508514404,
+      "learning_rate": 2e-05,
+      "loss": 0.1313,
+      "step": 56
+    },
+    {
+      "epoch": 0.6304347826086957,
+      "grad_norm": 0.15565143525600433,
+      "learning_rate": 2e-05,
+      "loss": 0.1838,
+      "step": 58
+    },
+    {
+      "epoch": 0.6521739130434783,
+      "grad_norm": 0.03305691480636597,
+      "learning_rate": 2e-05,
+      "loss": 0.0302,
+      "step": 60
+    },
+    {
+      "epoch": 0.6739130434782609,
+      "grad_norm": 0.11561785638332367,
+      "learning_rate": 2e-05,
+      "loss": 0.0105,
+      "step": 62
+    },
+    {
+      "epoch": 0.6956521739130435,
+      "grad_norm": 0.01182617712765932,
+      "learning_rate": 2e-05,
+      "loss": 0.0043,
+      "step": 64
+    },
+    {
+      "epoch": 0.717391304347826,
+      "grad_norm": 0.04873761162161827,
+      "learning_rate": 2e-05,
+      "loss": 0.0059,
+      "step": 66
+    },
+    {
+      "epoch": 0.7391304347826086,
+      "grad_norm": 0.07617998868227005,
+      "learning_rate": 2e-05,
+      "loss": 0.0141,
+      "step": 68
+    },
+    {
+      "epoch": 0.7608695652173914,
+      "grad_norm": 0.8835837244987488,
+      "learning_rate": 2e-05,
+      "loss": 0.0333,
+      "step": 70
+    },
+    {
+      "epoch": 0.782608695652174,
+      "grad_norm": 0.7749724388122559,
+      "learning_rate": 2e-05,
+      "loss": 0.0851,
+      "step": 72
+    },
+    {
+      "epoch": 0.8043478260869565,
+      "grad_norm": 0.3363087475299835,
+      "learning_rate": 2e-05,
+      "loss": 0.027,
+      "step": 74
+    },
+    {
+      "epoch": 0.8260869565217391,
+      "grad_norm": 0.0710146352648735,
+      "learning_rate": 2e-05,
+      "loss": 0.0093,
+      "step": 76
+    },
+    {
+      "epoch": 0.8478260869565217,
+      "grad_norm": 0.015138098038733006,
+      "learning_rate": 2e-05,
+      "loss": 0.0216,
+      "step": 78
+    },
+    {
+      "epoch": 0.8695652173913043,
+      "grad_norm": 0.19736772775650024,
+      "learning_rate": 2e-05,
+      "loss": 0.0327,
+      "step": 80
+    },
+    {
+      "epoch": 0.8913043478260869,
+      "grad_norm": 0.05040005221962929,
+      "learning_rate": 2e-05,
+      "loss": 0.0043,
+      "step": 82
+    },
+    {
+      "epoch": 0.9130434782608695,
+      "grad_norm": 0.3938082754611969,
+      "learning_rate": 2e-05,
+      "loss": 0.1232,
+      "step": 84
+    },
+    {
+      "epoch": 0.9347826086956522,
+      "grad_norm": 2.0402262210845947,
+      "learning_rate": 2e-05,
+      "loss": 0.1115,
+      "step": 86
+    },
+    {
+      "epoch": 0.9565217391304348,
+      "grad_norm": 1.9586436748504639,
+      "learning_rate": 2e-05,
+      "loss": 0.2655,
+      "step": 88
+    },
+    {
+      "epoch": 0.9782608695652174,
+      "grad_norm": 2.773951292037964,
+      "learning_rate": 2e-05,
+      "loss": 0.3761,
+      "step": 90
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.30637216567993164,
+      "learning_rate": 2e-05,
+      "loss": 0.0126,
+      "step": 92
+    },
+    {
+      "epoch": 1.0,
+      "step": 92,
+      "total_flos": 2032950489120768.0,
+      "train_loss": 0.12097753487203432,
+      "train_runtime": 198.5545,
+      "train_samples_per_second": 1.853,
+      "train_steps_per_second": 0.463
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 92,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2032950489120768.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:040df4945fad36c157df705c9dbfc923e8eec981105698ce5843af65c6ae16c0
+size 794708086

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a3a3c0e528a95b0bb130571fcf86d9981186ed8aa94b4c3e14222e3b47b0d1f
+size 794708086

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:638b86ad8e0f9afc4f3f1f52d89012e751c1e703f58c00c8fccfb31bea890f61
+size 794708086

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:43a7bcebed8e96b052f66787b8fb5ab76be240ba9a8712336fd509c4cb800365
+size 794708086

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2a42d516569f5ded0c385f9bbb5547c1cbb1d59eb9434227156f26fcb67f784c
+size 794706058

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8eaf002e494f4431cf9c54a6356b86639236ef8d3a4e777d4bf5edcba081c20f
+size 794708086

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0f285103af3edf0700ce58157257601eb492e783732711a4937f09ef70cafba
+size 794706058

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d312a45706dcde5f13aa89b36bf97e835fd187df9df75d67658e7cdbd1ed075b
+size 794706058

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/2_trainer_state.json ADDED Viewed

	@@ -0,0 +1,364 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 92,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.021739130434782608,
+      "grad_norm": 1.1296067237854004,
+      "learning_rate": 2e-05,
+      "loss": 0.3841,
+      "step": 2
+    },
+    {
+      "epoch": 0.043478260869565216,
+      "grad_norm": 1.9366317987442017,
+      "learning_rate": 2e-05,
+      "loss": 0.4115,
+      "step": 4
+    },
+    {
+      "epoch": 0.06521739130434782,
+      "grad_norm": 4.050197124481201,
+      "learning_rate": 2e-05,
+      "loss": 1.8967,
+      "step": 6
+    },
+    {
+      "epoch": 0.08695652173913043,
+      "grad_norm": 0.7215381264686584,
+      "learning_rate": 2e-05,
+      "loss": 0.8096,
+      "step": 8
+    },
+    {
+      "epoch": 0.10869565217391304,
+      "grad_norm": 0.4154060184955597,
+      "learning_rate": 2e-05,
+      "loss": 0.2618,
+      "step": 10
+    },
+    {
+      "epoch": 0.13043478260869565,
+      "grad_norm": 1.4991834163665771,
+      "learning_rate": 2e-05,
+      "loss": 0.731,
+      "step": 12
+    },
+    {
+      "epoch": 0.15217391304347827,
+      "grad_norm": 0.3260190188884735,
+      "learning_rate": 2e-05,
+      "loss": 0.5441,
+      "step": 14
+    },
+    {
+      "epoch": 0.17391304347826086,
+      "grad_norm": 0.7272593379020691,
+      "learning_rate": 2e-05,
+      "loss": 0.4451,
+      "step": 16
+    },
+    {
+      "epoch": 0.1956521739130435,
+      "grad_norm": 1.0374034643173218,
+      "learning_rate": 2e-05,
+      "loss": 0.2988,
+      "step": 18
+    },
+    {
+      "epoch": 0.21739130434782608,
+      "grad_norm": 1.367766261100769,
+      "learning_rate": 2e-05,
+      "loss": 0.3249,
+      "step": 20
+    },
+    {
+      "epoch": 0.2391304347826087,
+      "grad_norm": 1.4096276760101318,
+      "learning_rate": 2e-05,
+      "loss": 0.3398,
+      "step": 22
+    },
+    {
+      "epoch": 0.2608695652173913,
+      "grad_norm": 0.802492082118988,
+      "learning_rate": 2e-05,
+      "loss": 0.5804,
+      "step": 24
+    },
+    {
+      "epoch": 0.2826086956521739,
+      "grad_norm": 2.32653546333313,
+      "learning_rate": 2e-05,
+      "loss": 0.4343,
+      "step": 26
+    },
+    {
+      "epoch": 0.30434782608695654,
+      "grad_norm": 0.14287668466567993,
+      "learning_rate": 2e-05,
+      "loss": 0.0477,
+      "step": 28
+    },
+    {
+      "epoch": 0.32608695652173914,
+      "grad_norm": 0.8296651244163513,
+      "learning_rate": 2e-05,
+      "loss": 0.6951,
+      "step": 30
+    },
+    {
+      "epoch": 0.34782608695652173,
+      "grad_norm": 0.9478865265846252,
+      "learning_rate": 2e-05,
+      "loss": 0.3354,
+      "step": 32
+    },
+    {
+      "epoch": 0.3695652173913043,
+      "grad_norm": 2.4494006633758545,
+      "learning_rate": 2e-05,
+      "loss": 0.7025,
+      "step": 34
+    },
+    {
+      "epoch": 0.391304347826087,
+      "grad_norm": 0.08148877322673798,
+      "learning_rate": 2e-05,
+      "loss": 0.1202,
+      "step": 36
+    },
+    {
+      "epoch": 0.41304347826086957,
+      "grad_norm": 1.3430149555206299,
+      "learning_rate": 2e-05,
+      "loss": 0.4278,
+      "step": 38
+    },
+    {
+      "epoch": 0.43478260869565216,
+      "grad_norm": 0.4023633599281311,
+      "learning_rate": 2e-05,
+      "loss": 0.4105,
+      "step": 40
+    },
+    {
+      "epoch": 0.45652173913043476,
+      "grad_norm": 1.502210259437561,
+      "learning_rate": 2e-05,
+      "loss": 0.6282,
+      "step": 42
+    },
+    {
+      "epoch": 0.4782608695652174,
+      "grad_norm": 0.5279916524887085,
+      "learning_rate": 2e-05,
+      "loss": 0.1886,
+      "step": 44
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 0.4618723690509796,
+      "learning_rate": 2e-05,
+      "loss": 0.1187,
+      "step": 46
+    },
+    {
+      "epoch": 0.5217391304347826,
+      "grad_norm": 1.1924479007720947,
+      "learning_rate": 2e-05,
+      "loss": 0.4895,
+      "step": 48
+    },
+    {
+      "epoch": 0.5434782608695652,
+      "grad_norm": 1.406653881072998,
+      "learning_rate": 2e-05,
+      "loss": 0.462,
+      "step": 50
+    },
+    {
+      "epoch": 0.5652173913043478,
+      "grad_norm": 1.7467128038406372,
+      "learning_rate": 2e-05,
+      "loss": 0.4685,
+      "step": 52
+    },
+    {
+      "epoch": 0.5869565217391305,
+      "grad_norm": 1.0591291189193726,
+      "learning_rate": 2e-05,
+      "loss": 0.7101,
+      "step": 54
+    },
+    {
+      "epoch": 0.6086956521739131,
+      "grad_norm": 1.112949013710022,
+      "learning_rate": 2e-05,
+      "loss": 0.3761,
+      "step": 56
+    },
+    {
+      "epoch": 0.6304347826086957,
+      "grad_norm": 2.589237689971924,
+      "learning_rate": 2e-05,
+      "loss": 0.8964,
+      "step": 58
+    },
+    {
+      "epoch": 0.6521739130434783,
+      "grad_norm": 1.2493059635162354,
+      "learning_rate": 2e-05,
+      "loss": 0.7433,
+      "step": 60
+    },
+    {
+      "epoch": 0.6739130434782609,
+      "grad_norm": 1.9061592817306519,
+      "learning_rate": 2e-05,
+      "loss": 0.7149,
+      "step": 62
+    },
+    {
+      "epoch": 0.6956521739130435,
+      "grad_norm": 2.45300555229187,
+      "learning_rate": 2e-05,
+      "loss": 1.4371,
+      "step": 64
+    },
+    {
+      "epoch": 0.717391304347826,
+      "grad_norm": 2.228710651397705,
+      "learning_rate": 2e-05,
+      "loss": 0.6653,
+      "step": 66
+    },
+    {
+      "epoch": 0.7391304347826086,
+      "grad_norm": 0.8008307218551636,
+      "learning_rate": 2e-05,
+      "loss": 0.1514,
+      "step": 68
+    },
+    {
+      "epoch": 0.7608695652173914,
+      "grad_norm": 0.01986389420926571,
+      "learning_rate": 2e-05,
+      "loss": 0.2807,
+      "step": 70
+    },
+    {
+      "epoch": 0.782608695652174,
+      "grad_norm": 1.2030994892120361,
+      "learning_rate": 2e-05,
+      "loss": 0.2121,
+      "step": 72
+    },
+    {
+      "epoch": 0.8043478260869565,
+      "grad_norm": 0.6025874614715576,
+      "learning_rate": 2e-05,
+      "loss": 0.1727,
+      "step": 74
+    },
+    {
+      "epoch": 0.8260869565217391,
+      "grad_norm": 0.3984823524951935,
+      "learning_rate": 2e-05,
+      "loss": 0.1357,
+      "step": 76
+    },
+    {
+      "epoch": 0.8478260869565217,
+      "grad_norm": 0.6921195387840271,
+      "learning_rate": 2e-05,
+      "loss": 0.2836,
+      "step": 78
+    },
+    {
+      "epoch": 0.8695652173913043,
+      "grad_norm": 0.04382755234837532,
+      "learning_rate": 2e-05,
+      "loss": 0.5031,
+      "step": 80
+    },
+    {
+      "epoch": 0.8913043478260869,
+      "grad_norm": 1.5162211656570435,
+      "learning_rate": 2e-05,
+      "loss": 0.3304,
+      "step": 82
+    },
+    {
+      "epoch": 0.9130434782608695,
+      "grad_norm": 0.6259192824363708,
+      "learning_rate": 2e-05,
+      "loss": 0.1296,
+      "step": 84
+    },
+    {
+      "epoch": 0.9347826086956522,
+      "grad_norm": 1.5014652013778687,
+      "learning_rate": 2e-05,
+      "loss": 0.6724,
+      "step": 86
+    },
+    {
+      "epoch": 0.9565217391304348,
+      "grad_norm": 4.528606414794922,
+      "learning_rate": 2e-05,
+      "loss": 0.5261,
+      "step": 88
+    },
+    {
+      "epoch": 0.9782608695652174,
+      "grad_norm": 0.15331390500068665,
+      "learning_rate": 2e-05,
+      "loss": 0.0363,
+      "step": 90
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 1.2000925540924072,
+      "learning_rate": 2e-05,
+      "loss": 0.3445,
+      "step": 92
+    },
+    {
+      "epoch": 1.0,
+      "step": 92,
+      "total_flos": 4792956951199744.0,
+      "train_loss": 0.4756195545196533,
+      "train_runtime": 313.428,
+      "train_samples_per_second": 1.174,
+      "train_steps_per_second": 0.294
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 92,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 4792956951199744.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:968963aca4a3d7d2ca1d2d8db5c362658cefa72d304a3ec022ac25e67d846a42
+size 369838470

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ccfffb49d254686ff927002aa256e455a85c432e344013b10a52c16c83f78700
+size 369838470

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4fa459bea49cae2643832fcda9ee0a85fbfd03857a1741def05d71736fb5f422
+size 369838470

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b0f5215d3edcca5d04b119de86073ce299e15e1ce3cb8b283190c7eb1c4855d0
+size 369838470

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0b6d7e4c9ee160535596a15f0a9cdd14bbec4e79a90e4e93100c6c114348793f
+size 369837282

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:572561376a76a93533269e55e42451d11cac7a33012fe74b4310f133756afd33
+size 369838470

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:38317de6bb0584ebac545a2faf4129d6e2e417a38865720382aa265ca7f16623
+size 369837282

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0dd53b66a9fcdd90b3829d6e14befc65adc080086aceec88646c1135d1a0efd9
+size 369837282

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/3_trainer_state.json ADDED Viewed

	@@ -0,0 +1,364 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 92,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.021739130434782608,
+      "grad_norm": 4.441051006317139,
+      "learning_rate": 2e-05,
+      "loss": 0.992,
+      "step": 2
+    },
+    {
+      "epoch": 0.043478260869565216,
+      "grad_norm": 3.2609739303588867,
+      "learning_rate": 2e-05,
+      "loss": 1.2689,
+      "step": 4
+    },
+    {
+      "epoch": 0.06521739130434782,
+      "grad_norm": 3.7552318572998047,
+      "learning_rate": 2e-05,
+      "loss": 1.1606,
+      "step": 6
+    },
+    {
+      "epoch": 0.08695652173913043,
+      "grad_norm": 3.9538447856903076,
+      "learning_rate": 2e-05,
+      "loss": 1.2226,
+      "step": 8
+    },
+    {
+      "epoch": 0.10869565217391304,
+      "grad_norm": 7.879010200500488,
+      "learning_rate": 2e-05,
+      "loss": 1.7328,
+      "step": 10
+    },
+    {
+      "epoch": 0.13043478260869565,
+      "grad_norm": 3.853842258453369,
+      "learning_rate": 2e-05,
+      "loss": 0.6685,
+      "step": 12
+    },
+    {
+      "epoch": 0.15217391304347827,
+      "grad_norm": 2.0626301765441895,
+      "learning_rate": 2e-05,
+      "loss": 0.6801,
+      "step": 14
+    },
+    {
+      "epoch": 0.17391304347826086,
+      "grad_norm": 7.179374694824219,
+      "learning_rate": 2e-05,
+      "loss": 1.4164,
+      "step": 16
+    },
+    {
+      "epoch": 0.1956521739130435,
+      "grad_norm": 1.8957184553146362,
+      "learning_rate": 2e-05,
+      "loss": 0.7125,
+      "step": 18
+    },
+    {
+      "epoch": 0.21739130434782608,
+      "grad_norm": 3.1610910892486572,
+      "learning_rate": 2e-05,
+      "loss": 0.5239,
+      "step": 20
+    },
+    {
+      "epoch": 0.2391304347826087,
+      "grad_norm": 4.4256672859191895,
+      "learning_rate": 2e-05,
+      "loss": 0.5069,
+      "step": 22
+    },
+    {
+      "epoch": 0.2608695652173913,
+      "grad_norm": 4.9362335205078125,
+      "learning_rate": 2e-05,
+      "loss": 1.8549,
+      "step": 24
+    },
+    {
+      "epoch": 0.2826086956521739,
+      "grad_norm": 4.253013610839844,
+      "learning_rate": 2e-05,
+      "loss": 1.0589,
+      "step": 26
+    },
+    {
+      "epoch": 0.30434782608695654,
+      "grad_norm": 4.3326826095581055,
+      "learning_rate": 2e-05,
+      "loss": 1.2678,
+      "step": 28
+    },
+    {
+      "epoch": 0.32608695652173914,
+      "grad_norm": 6.773116111755371,
+      "learning_rate": 2e-05,
+      "loss": 1.9853,
+      "step": 30
+    },
+    {
+      "epoch": 0.34782608695652173,
+      "grad_norm": 7.467095851898193,
+      "learning_rate": 2e-05,
+      "loss": 1.3477,
+      "step": 32
+    },
+    {
+      "epoch": 0.3695652173913043,
+      "grad_norm": 0.6017383933067322,
+      "learning_rate": 2e-05,
+      "loss": 0.3416,
+      "step": 34
+    },
+    {
+      "epoch": 0.391304347826087,
+      "grad_norm": 2.026484966278076,
+      "learning_rate": 2e-05,
+      "loss": 1.4528,
+      "step": 36
+    },
+    {
+      "epoch": 0.41304347826086957,
+      "grad_norm": 1.9307522773742676,
+      "learning_rate": 2e-05,
+      "loss": 0.5631,
+      "step": 38
+    },
+    {
+      "epoch": 0.43478260869565216,
+      "grad_norm": 3.334146499633789,
+      "learning_rate": 2e-05,
+      "loss": 1.4508,
+      "step": 40
+    },
+    {
+      "epoch": 0.45652173913043476,
+      "grad_norm": 1.933052659034729,
+      "learning_rate": 2e-05,
+      "loss": 0.7173,
+      "step": 42
+    },
+    {
+      "epoch": 0.4782608695652174,
+      "grad_norm": 3.3551347255706787,
+      "learning_rate": 2e-05,
+      "loss": 1.3031,
+      "step": 44
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 2.526327133178711,
+      "learning_rate": 2e-05,
+      "loss": 1.3352,
+      "step": 46
+    },
+    {
+      "epoch": 0.5217391304347826,
+      "grad_norm": 7.738239765167236,
+      "learning_rate": 2e-05,
+      "loss": 3.587,
+      "step": 48
+    },
+    {
+      "epoch": 0.5434782608695652,
+      "grad_norm": 1.777616024017334,
+      "learning_rate": 2e-05,
+      "loss": 0.6968,
+      "step": 50
+    },
+    {
+      "epoch": 0.5652173913043478,
+      "grad_norm": 4.613940715789795,
+      "learning_rate": 2e-05,
+      "loss": 1.8354,
+      "step": 52
+    },
+    {
+      "epoch": 0.5869565217391305,
+      "grad_norm": 4.080211162567139,
+      "learning_rate": 2e-05,
+      "loss": 1.2648,
+      "step": 54
+    },
+    {
+      "epoch": 0.6086956521739131,
+      "grad_norm": 6.371545314788818,
+      "learning_rate": 2e-05,
+      "loss": 1.6354,
+      "step": 56
+    },
+    {
+      "epoch": 0.6304347826086957,
+      "grad_norm": 6.669529914855957,
+      "learning_rate": 2e-05,
+      "loss": 2.2418,
+      "step": 58
+    },
+    {
+      "epoch": 0.6521739130434783,
+      "grad_norm": 3.0953638553619385,
+      "learning_rate": 2e-05,
+      "loss": 0.9421,
+      "step": 60
+    },
+    {
+      "epoch": 0.6739130434782609,
+      "grad_norm": 3.3901684284210205,
+      "learning_rate": 2e-05,
+      "loss": 1.4606,
+      "step": 62
+    },
+    {
+      "epoch": 0.6956521739130435,
+      "grad_norm": 3.4849743843078613,
+      "learning_rate": 2e-05,
+      "loss": 1.2765,
+      "step": 64
+    },
+    {
+      "epoch": 0.717391304347826,
+      "grad_norm": 5.219615936279297,
+      "learning_rate": 2e-05,
+      "loss": 1.4114,
+      "step": 66
+    },
+    {
+      "epoch": 0.7391304347826086,
+      "grad_norm": 2.6195218563079834,
+      "learning_rate": 2e-05,
+      "loss": 1.3279,
+      "step": 68
+    },
+    {
+      "epoch": 0.7608695652173914,
+      "grad_norm": 3.040009021759033,
+      "learning_rate": 2e-05,
+      "loss": 0.8641,
+      "step": 70
+    },
+    {
+      "epoch": 0.782608695652174,
+      "grad_norm": 6.53596305847168,
+      "learning_rate": 2e-05,
+      "loss": 2.4763,
+      "step": 72
+    },
+    {
+      "epoch": 0.8043478260869565,
+      "grad_norm": 2.496731996536255,
+      "learning_rate": 2e-05,
+      "loss": 1.1084,
+      "step": 74
+    },
+    {
+      "epoch": 0.8260869565217391,
+      "grad_norm": 2.0288877487182617,
+      "learning_rate": 2e-05,
+      "loss": 0.5153,
+      "step": 76
+    },
+    {
+      "epoch": 0.8478260869565217,
+      "grad_norm": 1.2743315696716309,
+      "learning_rate": 2e-05,
+      "loss": 0.3963,
+      "step": 78
+    },
+    {
+      "epoch": 0.8695652173913043,
+      "grad_norm": 3.1155552864074707,
+      "learning_rate": 2e-05,
+      "loss": 0.6715,
+      "step": 80
+    },
+    {
+      "epoch": 0.8913043478260869,
+      "grad_norm": 2.91237735748291,
+      "learning_rate": 2e-05,
+      "loss": 1.1998,
+      "step": 82
+    },
+    {
+      "epoch": 0.9130434782608695,
+      "grad_norm": 3.1403515338897705,
+      "learning_rate": 2e-05,
+      "loss": 0.813,
+      "step": 84
+    },
+    {
+      "epoch": 0.9347826086956522,
+      "grad_norm": 1.0142074823379517,
+      "learning_rate": 2e-05,
+      "loss": 0.8541,
+      "step": 86
+    },
+    {
+      "epoch": 0.9565217391304348,
+      "grad_norm": 1.7990295886993408,
+      "learning_rate": 2e-05,
+      "loss": 0.5481,
+      "step": 88
+    },
+    {
+      "epoch": 0.9782608695652174,
+      "grad_norm": 4.208423137664795,
+      "learning_rate": 2e-05,
+      "loss": 1.8142,
+      "step": 90
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 2.21415376663208,
+      "learning_rate": 2e-05,
+      "loss": 1.222,
+      "step": 92
+    },
+    {
+      "epoch": 1.0,
+      "step": 92,
+      "total_flos": 2060088319672320.0,
+      "train_loss": 1.2114470782487288,
+      "train_runtime": 196.2309,
+      "train_samples_per_second": 1.875,
+      "train_steps_per_second": 0.469
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 92,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2060088319672320.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b79aaba74229c5b0d837e9ac5cedbe36e425f1a29335f5049a310c762e78dd2
+size 794708086

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5bf24bd98c3ffdea50275e527e934666eb42b753a6b12ec775ef20bbeea84ede
+size 794708086

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8404bf0fef92715cc44f4e710595ba6b0457a3a2811d23fbe0f7780ce76d7378
+size 794708086

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc2f3efc4cf5e0f1fb8cdce98b0704b7c0fa4142fca915fe3774971780347018
+size 794708086

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c807d9baa99dede81399aef0bd4796381e8535cb504ab1e1313f129796d2160f
+size 794706058

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ac48c818c303fe23985c88e295a9d20b095e81c95cb5e49ee64351965035a180
+size 794708086

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ca99b55c7f846d80aaf5a644fa43d442cef1b955557d24a529a0d0a46a9aa4c7
+size 794706058

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:32aa7feb2e6e1e2ba21daefc38b7338c37000c71b139c325d5b0b9d267ae8415
+size 794706058

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/4_trainer_state.json ADDED Viewed

	@@ -0,0 +1,364 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 92,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.021739130434782608,
+      "grad_norm": 1.7172458171844482,
+      "learning_rate": 2e-05,
+      "loss": 0.9706,
+      "step": 2
+    },
+    {
+      "epoch": 0.043478260869565216,
+      "grad_norm": 1.535640001296997,
+      "learning_rate": 2e-05,
+      "loss": 1.0299,
+      "step": 4
+    },
+    {
+      "epoch": 0.06521739130434782,
+      "grad_norm": 1.8416475057601929,
+      "learning_rate": 2e-05,
+      "loss": 0.5077,
+      "step": 6
+    },
+    {
+      "epoch": 0.08695652173913043,
+      "grad_norm": 1.3889185190200806,
+      "learning_rate": 2e-05,
+      "loss": 0.6756,
+      "step": 8
+    },
+    {
+      "epoch": 0.10869565217391304,
+      "grad_norm": 0.3524916470050812,
+      "learning_rate": 2e-05,
+      "loss": 0.6578,
+      "step": 10
+    },
+    {
+      "epoch": 0.13043478260869565,
+      "grad_norm": 0.24557974934577942,
+      "learning_rate": 2e-05,
+      "loss": 0.245,
+      "step": 12
+    },
+    {
+      "epoch": 0.15217391304347827,
+      "grad_norm": 1.0801588296890259,
+      "learning_rate": 2e-05,
+      "loss": 0.4678,
+      "step": 14
+    },
+    {
+      "epoch": 0.17391304347826086,
+      "grad_norm": 0.6954711675643921,
+      "learning_rate": 2e-05,
+      "loss": 0.357,
+      "step": 16
+    },
+    {
+      "epoch": 0.1956521739130435,
+      "grad_norm": 0.6120344996452332,
+      "learning_rate": 2e-05,
+      "loss": 0.3746,
+      "step": 18
+    },
+    {
+      "epoch": 0.21739130434782608,
+      "grad_norm": 0.9757769703865051,
+      "learning_rate": 2e-05,
+      "loss": 0.3461,
+      "step": 20
+    },
+    {
+      "epoch": 0.2391304347826087,
+      "grad_norm": 1.1557114124298096,
+      "learning_rate": 2e-05,
+      "loss": 0.3997,
+      "step": 22
+    },
+    {
+      "epoch": 0.2608695652173913,
+      "grad_norm": 1.616867184638977,
+      "learning_rate": 2e-05,
+      "loss": 0.8107,
+      "step": 24
+    },
+    {
+      "epoch": 0.2826086956521739,
+      "grad_norm": 0.6354851722717285,
+      "learning_rate": 2e-05,
+      "loss": 0.8208,
+      "step": 26
+    },
+    {
+      "epoch": 0.30434782608695654,
+      "grad_norm": 1.0514754056930542,
+      "learning_rate": 2e-05,
+      "loss": 1.3612,
+      "step": 28
+    },
+    {
+      "epoch": 0.32608695652173914,
+      "grad_norm": 1.695558786392212,
+      "learning_rate": 2e-05,
+      "loss": 1.472,
+      "step": 30
+    },
+    {
+      "epoch": 0.34782608695652173,
+      "grad_norm": 1.0636487007141113,
+      "learning_rate": 2e-05,
+      "loss": 0.2386,
+      "step": 32
+    },
+    {
+      "epoch": 0.3695652173913043,
+      "grad_norm": 0.2803816795349121,
+      "learning_rate": 2e-05,
+      "loss": 0.1083,
+      "step": 34
+    },
+    {
+      "epoch": 0.391304347826087,
+      "grad_norm": 0.7646946310997009,
+      "learning_rate": 2e-05,
+      "loss": 0.9315,
+      "step": 36
+    },
+    {
+      "epoch": 0.41304347826086957,
+      "grad_norm": 2.3936870098114014,
+      "learning_rate": 2e-05,
+      "loss": 1.3936,
+      "step": 38
+    },
+    {
+      "epoch": 0.43478260869565216,
+      "grad_norm": 2.638190269470215,
+      "learning_rate": 2e-05,
+      "loss": 1.1179,
+      "step": 40
+    },
+    {
+      "epoch": 0.45652173913043476,
+      "grad_norm": 0.38522782921791077,
+      "learning_rate": 2e-05,
+      "loss": 0.6251,
+      "step": 42
+    },
+    {
+      "epoch": 0.4782608695652174,
+      "grad_norm": 1.31108820438385,
+      "learning_rate": 2e-05,
+      "loss": 0.2963,
+      "step": 44
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 0.9962872266769409,
+      "learning_rate": 2e-05,
+      "loss": 1.1005,
+      "step": 46
+    },
+    {
+      "epoch": 0.5217391304347826,
+      "grad_norm": 0.7223103046417236,
+      "learning_rate": 2e-05,
+      "loss": 0.3556,
+      "step": 48
+    },
+    {
+      "epoch": 0.5434782608695652,
+      "grad_norm": 0.9148542881011963,
+      "learning_rate": 2e-05,
+      "loss": 0.7949,
+      "step": 50
+    },
+    {
+      "epoch": 0.5652173913043478,
+      "grad_norm": 0.9974550008773804,
+      "learning_rate": 2e-05,
+      "loss": 0.383,
+      "step": 52
+    },
+    {
+      "epoch": 0.5869565217391305,
+      "grad_norm": 0.21446958184242249,
+      "learning_rate": 2e-05,
+      "loss": 0.2946,
+      "step": 54
+    },
+    {
+      "epoch": 0.6086956521739131,
+      "grad_norm": 3.7416319847106934,
+      "learning_rate": 2e-05,
+      "loss": 1.4213,
+      "step": 56
+    },
+    {
+      "epoch": 0.6304347826086957,
+      "grad_norm": 0.9001378417015076,
+      "learning_rate": 2e-05,
+      "loss": 0.8664,
+      "step": 58
+    },
+    {
+      "epoch": 0.6521739130434783,
+      "grad_norm": 1.5950777530670166,
+      "learning_rate": 2e-05,
+      "loss": 1.1924,
+      "step": 60
+    },
+    {
+      "epoch": 0.6739130434782609,
+      "grad_norm": 0.8221089839935303,
+      "learning_rate": 2e-05,
+      "loss": 0.6193,
+      "step": 62
+    },
+    {
+      "epoch": 0.6956521739130435,
+      "grad_norm": 1.6373211145401,
+      "learning_rate": 2e-05,
+      "loss": 0.7243,
+      "step": 64
+    },
+    {
+      "epoch": 0.717391304347826,
+      "grad_norm": 1.0375702381134033,
+      "learning_rate": 2e-05,
+      "loss": 0.7575,
+      "step": 66
+    },
+    {
+      "epoch": 0.7391304347826086,
+      "grad_norm": 1.2386682033538818,
+      "learning_rate": 2e-05,
+      "loss": 0.6053,
+      "step": 68
+    },
+    {
+      "epoch": 0.7608695652173914,
+      "grad_norm": 0.5664445161819458,
+      "learning_rate": 2e-05,
+      "loss": 0.6014,
+      "step": 70
+    },
+    {
+      "epoch": 0.782608695652174,
+      "grad_norm": 3.124638080596924,
+      "learning_rate": 2e-05,
+      "loss": 1.3379,
+      "step": 72
+    },
+    {
+      "epoch": 0.8043478260869565,
+      "grad_norm": 1.7098889350891113,
+      "learning_rate": 2e-05,
+      "loss": 1.0894,
+      "step": 74
+    },
+    {
+      "epoch": 0.8260869565217391,
+      "grad_norm": 0.9901090264320374,
+      "learning_rate": 2e-05,
+      "loss": 0.4407,
+      "step": 76
+    },
+    {
+      "epoch": 0.8478260869565217,
+      "grad_norm": 1.7773938179016113,
+      "learning_rate": 2e-05,
+      "loss": 1.1619,
+      "step": 78
+    },
+    {
+      "epoch": 0.8695652173913043,
+      "grad_norm": 1.735460877418518,
+      "learning_rate": 2e-05,
+      "loss": 1.5605,
+      "step": 80
+    },
+    {
+      "epoch": 0.8913043478260869,
+      "grad_norm": 0.8822594285011292,
+      "learning_rate": 2e-05,
+      "loss": 0.466,
+      "step": 82
+    },
+    {
+      "epoch": 0.9130434782608695,
+      "grad_norm": 1.8246649503707886,
+      "learning_rate": 2e-05,
+      "loss": 0.6933,
+      "step": 84
+    },
+    {
+      "epoch": 0.9347826086956522,
+      "grad_norm": 1.136141300201416,
+      "learning_rate": 2e-05,
+      "loss": 0.4576,
+      "step": 86
+    },
+    {
+      "epoch": 0.9565217391304348,
+      "grad_norm": 0.6676300764083862,
+      "learning_rate": 2e-05,
+      "loss": 0.7428,
+      "step": 88
+    },
+    {
+      "epoch": 0.9782608695652174,
+      "grad_norm": 1.731764793395996,
+      "learning_rate": 2e-05,
+      "loss": 1.3401,
+      "step": 90
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.46977347135543823,
+      "learning_rate": 2e-05,
+      "loss": 0.2042,
+      "step": 92
+    },
+    {
+      "epoch": 1.0,
+      "step": 92,
+      "total_flos": 5551594087120896.0,
+      "train_loss": 0.7482347903044327,
+      "train_runtime": 314.3339,
+      "train_samples_per_second": 1.171,
+      "train_steps_per_second": 0.293
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 92,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5551594087120896.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/5_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b420870bc6bd7ab2d962e17aa98541dbfd84dee5188b2fbfbbb0a856791b89f7
+size 369838470

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/5_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2cbb8b4939b5f8948cd443b04095ca9d6da1951e0afa830ba0736d8e8486874c
+size 369838470

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/5_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:710d51c728f923f61a4728c0dd5f92ff6de0fa172d9f28b54d18b73b8e53e928
+size 369838470

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/5_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bf43931ab46a6c71ef5ef5e106a93a987757d773d71308ac150b36cbefe90409
+size 369838470

client_states_NEW_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc24_4tasks_5rounds_fixtir92_T0125_decay099/5_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b8a3fad06f1e5ecbc439bd3b8a76ec421977354e24cc59393278cc7de282701b
+size 369837282