thkim0305 commited on May 9, 2025

Commit

c67198f

verified ·

1 Parent(s): 94b43be

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round10.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round12.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round15.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round17.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round2.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round20.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round5.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round7.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_trainer_state.json +140 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round10.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round12.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round15.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round17.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round2.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round20.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round5.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round7.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_trainer_state.json +140 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round10.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round12.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round15.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round17.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round2.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round20.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round5.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round7.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_trainer_state.json +140 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round10.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round12.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round15.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round17.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round2.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round20.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round5.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round7.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_trainer_state.json +140 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round10.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round12.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round15.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round17.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round2.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round20.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round5.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round7.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_trainer_state.json +140 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/round10_task_vector_local_weights.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/round11_task_vector_local_weights.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/round12_task_vector_local_weights.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/round13_task_vector_local_weights.pth +3 -0
client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/round14_task_vector_local_weights.pth +3 -0

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:836d89b5106466e6f49a6ff432bbd1680eab542acea1a49085dea53907cdcfc3
+size 100664470

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d200058442e79c3d226f440ea1621148ed29ae746a5a819be7d987b9bf30715
+size 100664470

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c57bff0474933d50e539716b8194135bf5428c91884c200604738b38e461b236
+size 100664470

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88374ae4388dccc64cd6ba9efc570db595d8785caa80b281f7584857956ebfb3
+size 100664470

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a053d7581327fb439a80a1722221f5a44b54cec874f11c565ec9bb228bc8caf
+size 100663226

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:14b48f3f4d17f62893fc65e44be8008eab062b97f50346a825a6fc7cd3ec0c94
+size 100664470

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6ff080ff39b8b37fb652f5d047e3444a9fe236bab99c1fffd48443b6855a2e62
+size 100663226

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:81902f2625b8c03d84845881483180b24eed38caaa7c9c03844827667e6f6af4
+size 100663226

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/0_trainer_state.json ADDED Viewed

	@@ -0,0 +1,140 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 29,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.06896551724137931,
+      "grad_norm": 0.10964272171258926,
+      "learning_rate": 0.0003,
+      "loss": 0.4217,
+      "step": 2
+    },
+    {
+      "epoch": 0.13793103448275862,
+      "grad_norm": 0.12825018167495728,
+      "learning_rate": 0.0003,
+      "loss": 0.5373,
+      "step": 4
+    },
+    {
+      "epoch": 0.20689655172413793,
+      "grad_norm": 0.09695617854595184,
+      "learning_rate": 0.0003,
+      "loss": 0.4761,
+      "step": 6
+    },
+    {
+      "epoch": 0.27586206896551724,
+      "grad_norm": 0.11076370626688004,
+      "learning_rate": 0.0003,
+      "loss": 0.6261,
+      "step": 8
+    },
+    {
+      "epoch": 0.3448275862068966,
+      "grad_norm": 0.1661299616098404,
+      "learning_rate": 0.0003,
+      "loss": 0.6359,
+      "step": 10
+    },
+    {
+      "epoch": 0.41379310344827586,
+      "grad_norm": 0.08879231661558151,
+      "learning_rate": 0.0003,
+      "loss": 0.5287,
+      "step": 12
+    },
+    {
+      "epoch": 0.4827586206896552,
+      "grad_norm": 0.10452059656381607,
+      "learning_rate": 0.0003,
+      "loss": 0.4933,
+      "step": 14
+    },
+    {
+      "epoch": 0.5517241379310345,
+      "grad_norm": 0.13890604674816132,
+      "learning_rate": 0.0003,
+      "loss": 0.6004,
+      "step": 16
+    },
+    {
+      "epoch": 0.6206896551724138,
+      "grad_norm": 0.1287703961133957,
+      "learning_rate": 0.0003,
+      "loss": 0.4594,
+      "step": 18
+    },
+    {
+      "epoch": 0.6896551724137931,
+      "grad_norm": 0.1211370974779129,
+      "learning_rate": 0.0003,
+      "loss": 0.6454,
+      "step": 20
+    },
+    {
+      "epoch": 0.7586206896551724,
+      "grad_norm": 0.08164095133543015,
+      "learning_rate": 0.0003,
+      "loss": 0.6281,
+      "step": 22
+    },
+    {
+      "epoch": 0.8275862068965517,
+      "grad_norm": 0.09634097665548325,
+      "learning_rate": 0.0003,
+      "loss": 0.5273,
+      "step": 24
+    },
+    {
+      "epoch": 0.896551724137931,
+      "grad_norm": 0.08703736215829849,
+      "learning_rate": 0.0003,
+      "loss": 0.4851,
+      "step": 26
+    },
+    {
+      "epoch": 0.9655172413793104,
+      "grad_norm": 0.10768512636423111,
+      "learning_rate": 0.0003,
+      "loss": 0.6181,
+      "step": 28
+    },
+    {
+      "epoch": 1.0,
+      "step": 29,
+      "total_flos": 403873226293248.0,
+      "train_loss": 0.5474604059909952,
+      "train_runtime": 146.486,
+      "train_samples_per_second": 0.792,
+      "train_steps_per_second": 0.198
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 29,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 403873226293248.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e935aeec4ee260e84bb140e5ac4a8c6330e365fecd25d6f661325cbb9dddd5b7
+size 100664470

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b11589874b0923d856baf6292291fdc7914b9185b84aa0910080a52a9306bcfc
+size 100664470

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:99cf8ac844ba96cb3df2d7be33648bfddf54f360ed63c561b1c1f55c548046fe
+size 100664470

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2418db45115828074ab36cafc2ad7cbe61a903ee75c51f52a11d1ceddabc3dd0
+size 100664470

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:79bbc99a1a4f5e88ace6945502b85240419d92aff6bb23e47736d3a65dc14580
+size 100663226

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:25515d1a35c48b353a7cd514feda4d0e0c771313fc772ef97d6b6a7fc91cc2fd
+size 100664470

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:342e21f1ce5cd68276ffe3cefcf06d1709d4e880d484a85c7f482f4ba14f6faf
+size 100663226

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5f41668931adc3447813fb03c42ccf048764e7bebd97c98f0c445b732aa4daeb
+size 100663226

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/1_trainer_state.json ADDED Viewed

	@@ -0,0 +1,140 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 29,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.06896551724137931,
+      "grad_norm": 0.12822631001472473,
+      "learning_rate": 0.0003,
+      "loss": 2.0608,
+      "step": 2
+    },
+    {
+      "epoch": 0.13793103448275862,
+      "grad_norm": 0.22433912754058838,
+      "learning_rate": 0.0003,
+      "loss": 2.0027,
+      "step": 4
+    },
+    {
+      "epoch": 0.20689655172413793,
+      "grad_norm": 0.11672133207321167,
+      "learning_rate": 0.0003,
+      "loss": 1.8721,
+      "step": 6
+    },
+    {
+      "epoch": 0.27586206896551724,
+      "grad_norm": 0.14917072653770447,
+      "learning_rate": 0.0003,
+      "loss": 1.6856,
+      "step": 8
+    },
+    {
+      "epoch": 0.3448275862068966,
+      "grad_norm": 0.29816752672195435,
+      "learning_rate": 0.0003,
+      "loss": 1.6888,
+      "step": 10
+    },
+    {
+      "epoch": 0.41379310344827586,
+      "grad_norm": 0.2757134437561035,
+      "learning_rate": 0.0003,
+      "loss": 1.6053,
+      "step": 12
+    },
+    {
+      "epoch": 0.4827586206896552,
+      "grad_norm": 0.19783510267734528,
+      "learning_rate": 0.0003,
+      "loss": 1.2848,
+      "step": 14
+    },
+    {
+      "epoch": 0.5517241379310345,
+      "grad_norm": 0.13258986175060272,
+      "learning_rate": 0.0003,
+      "loss": 1.8308,
+      "step": 16
+    },
+    {
+      "epoch": 0.6206896551724138,
+      "grad_norm": 0.21041615307331085,
+      "learning_rate": 0.0003,
+      "loss": 2.1478,
+      "step": 18
+    },
+    {
+      "epoch": 0.6896551724137931,
+      "grad_norm": 0.14201298356056213,
+      "learning_rate": 0.0003,
+      "loss": 1.5266,
+      "step": 20
+    },
+    {
+      "epoch": 0.7586206896551724,
+      "grad_norm": 0.15158437192440033,
+      "learning_rate": 0.0003,
+      "loss": 1.7609,
+      "step": 22
+    },
+    {
+      "epoch": 0.8275862068965517,
+      "grad_norm": 0.1517992615699768,
+      "learning_rate": 0.0003,
+      "loss": 1.166,
+      "step": 24
+    },
+    {
+      "epoch": 0.896551724137931,
+      "grad_norm": 0.12816612422466278,
+      "learning_rate": 0.0003,
+      "loss": 1.6937,
+      "step": 26
+    },
+    {
+      "epoch": 0.9655172413793104,
+      "grad_norm": 0.1202758178114891,
+      "learning_rate": 0.0003,
+      "loss": 1.6313,
+      "step": 28
+    },
+    {
+      "epoch": 1.0,
+      "step": 29,
+      "total_flos": 281074283315200.0,
+      "train_loss": 1.690290927886963,
+      "train_runtime": 145.7094,
+      "train_samples_per_second": 0.796,
+      "train_steps_per_second": 0.199
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 29,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 281074283315200.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:52f049546f595aef23f14cd5f0f2900ec7d6ab067f347356989e7665f19f4fba
+size 100664470

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d74ae734337d1de8d4f4b7c907e3825932cc27165ad5a9ace2e65e4c09185a82
+size 100664470

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e432371f1613ba58f90180975df30455c6ecf608502129129e8fbb1543f3b1b
+size 100664470

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:48890b803acd4c2be77dc6bf63ab002be41bd45a52ed5fddfa880eb573d0e8ae
+size 100664470

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:68a4e584273dd911af5919a7581f90bdae10154afcb9a2b28a9cd9b13d1722c2
+size 100663226

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1d37a8441064179b19f7969a8ed7cbb0b6843cdb20993aeed62a44a1854b0d1e
+size 100664470

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4569f7868b2f3a29f7be76788b1afbdb0ebbf4d1816ce6d2d81863913e300cc8
+size 100663226

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a1d89e09bc5919a1c61037f4fd932a3dbe05ab36284a16b02024f787bf98d60a
+size 100663226

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/2_trainer_state.json ADDED Viewed

	@@ -0,0 +1,140 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 29,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.06896551724137931,
+      "grad_norm": 0.16763809323310852,
+      "learning_rate": 0.0003,
+      "loss": 2.1451,
+      "step": 2
+    },
+    {
+      "epoch": 0.13793103448275862,
+      "grad_norm": 0.08457788825035095,
+      "learning_rate": 0.0003,
+      "loss": 1.489,
+      "step": 4
+    },
+    {
+      "epoch": 0.20689655172413793,
+      "grad_norm": 0.11339309066534042,
+      "learning_rate": 0.0003,
+      "loss": 1.9602,
+      "step": 6
+    },
+    {
+      "epoch": 0.27586206896551724,
+      "grad_norm": 0.14981189370155334,
+      "learning_rate": 0.0003,
+      "loss": 1.0412,
+      "step": 8
+    },
+    {
+      "epoch": 0.3448275862068966,
+      "grad_norm": 0.1549920290708542,
+      "learning_rate": 0.0003,
+      "loss": 1.6355,
+      "step": 10
+    },
+    {
+      "epoch": 0.41379310344827586,
+      "grad_norm": 0.16407117247581482,
+      "learning_rate": 0.0003,
+      "loss": 2.054,
+      "step": 12
+    },
+    {
+      "epoch": 0.4827586206896552,
+      "grad_norm": 0.1566217690706253,
+      "learning_rate": 0.0003,
+      "loss": 1.7061,
+      "step": 14
+    },
+    {
+      "epoch": 0.5517241379310345,
+      "grad_norm": 0.10542894154787064,
+      "learning_rate": 0.0003,
+      "loss": 1.109,
+      "step": 16
+    },
+    {
+      "epoch": 0.6206896551724138,
+      "grad_norm": 0.20924022793769836,
+      "learning_rate": 0.0003,
+      "loss": 1.6204,
+      "step": 18
+    },
+    {
+      "epoch": 0.6896551724137931,
+      "grad_norm": 0.15142425894737244,
+      "learning_rate": 0.0003,
+      "loss": 1.8941,
+      "step": 20
+    },
+    {
+      "epoch": 0.7586206896551724,
+      "grad_norm": 0.20191574096679688,
+      "learning_rate": 0.0003,
+      "loss": 2.1546,
+      "step": 22
+    },
+    {
+      "epoch": 0.8275862068965517,
+      "grad_norm": 0.21983274817466736,
+      "learning_rate": 0.0003,
+      "loss": 1.3526,
+      "step": 24
+    },
+    {
+      "epoch": 0.896551724137931,
+      "grad_norm": 0.07465270161628723,
+      "learning_rate": 0.0003,
+      "loss": 1.743,
+      "step": 26
+    },
+    {
+      "epoch": 0.9655172413793104,
+      "grad_norm": 0.1358894258737564,
+      "learning_rate": 0.0003,
+      "loss": 1.6953,
+      "step": 28
+    },
+    {
+      "epoch": 1.0,
+      "step": 29,
+      "total_flos": 663155598426112.0,
+      "train_loss": 1.6605948497509133,
+      "train_runtime": 157.2331,
+      "train_samples_per_second": 0.738,
+      "train_steps_per_second": 0.184
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 29,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 663155598426112.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:99d8e37420a572ff1550c88254d7d014cba4443f24a713b66a7fafdf71be01ee
+size 100664470

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8b58f08397ab0d541489514a5c83564d2de154eda870f66f490b21ddcb255608
+size 100664470

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ebca7ec994f73e73f89e515ab3cec838ef6c6bbe26ac71ec451411e53eac6038
+size 100664470

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cc722336f0b0169bed983c53196967beac1302719207064a5363426ee496d9eb
+size 100664470

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e079c3f8a07b9d74e26e05ed0f17c99b2c16f64338f120fe6409dddfea70152a
+size 100663226

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f4ca7b3ef1b0b44b2e1d8e4376bd46717ff5a2721ab91cf0de703f3b48c381c2
+size 100664470

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c232444e6aed9362a4ec1f9435f695cf6ba336c1a53d21ec78deb0ac6c3ee608
+size 100663226

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9936ee087a87971fe9a697c0b4295d6a7eabbee3f5ff7bb6cfa486c8a082f21a
+size 100663226

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/3_trainer_state.json ADDED Viewed

	@@ -0,0 +1,140 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 29,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.06896551724137931,
+      "grad_norm": 0.05931968614459038,
+      "learning_rate": 0.0003,
+      "loss": 0.7324,
+      "step": 2
+    },
+    {
+      "epoch": 0.13793103448275862,
+      "grad_norm": 0.04922392964363098,
+      "learning_rate": 0.0003,
+      "loss": 0.8792,
+      "step": 4
+    },
+    {
+      "epoch": 0.20689655172413793,
+      "grad_norm": 0.11524348706007004,
+      "learning_rate": 0.0003,
+      "loss": 0.5845,
+      "step": 6
+    },
+    {
+      "epoch": 0.27586206896551724,
+      "grad_norm": 0.06806690990924835,
+      "learning_rate": 0.0003,
+      "loss": 0.7545,
+      "step": 8
+    },
+    {
+      "epoch": 0.3448275862068966,
+      "grad_norm": 0.09595661610364914,
+      "learning_rate": 0.0003,
+      "loss": 0.755,
+      "step": 10
+    },
+    {
+      "epoch": 0.41379310344827586,
+      "grad_norm": 0.05159907415509224,
+      "learning_rate": 0.0003,
+      "loss": 0.5922,
+      "step": 12
+    },
+    {
+      "epoch": 0.4827586206896552,
+      "grad_norm": 0.07526896893978119,
+      "learning_rate": 0.0003,
+      "loss": 0.9019,
+      "step": 14
+    },
+    {
+      "epoch": 0.5517241379310345,
+      "grad_norm": 0.07771413773298264,
+      "learning_rate": 0.0003,
+      "loss": 0.7862,
+      "step": 16
+    },
+    {
+      "epoch": 0.6206896551724138,
+      "grad_norm": 0.04285521060228348,
+      "learning_rate": 0.0003,
+      "loss": 0.8229,
+      "step": 18
+    },
+    {
+      "epoch": 0.6896551724137931,
+      "grad_norm": 0.15346619486808777,
+      "learning_rate": 0.0003,
+      "loss": 0.8843,
+      "step": 20
+    },
+    {
+      "epoch": 0.7586206896551724,
+      "grad_norm": 0.055303364992141724,
+      "learning_rate": 0.0003,
+      "loss": 0.7054,
+      "step": 22
+    },
+    {
+      "epoch": 0.8275862068965517,
+      "grad_norm": 0.06509271264076233,
+      "learning_rate": 0.0003,
+      "loss": 0.6226,
+      "step": 24
+    },
+    {
+      "epoch": 0.896551724137931,
+      "grad_norm": 0.06718684732913971,
+      "learning_rate": 0.0003,
+      "loss": 0.6203,
+      "step": 26
+    },
+    {
+      "epoch": 0.9655172413793104,
+      "grad_norm": 0.10245434939861298,
+      "learning_rate": 0.0003,
+      "loss": 0.5754,
+      "step": 28
+    },
+    {
+      "epoch": 1.0,
+      "step": 29,
+      "total_flos": 1591652615454720.0,
+      "train_loss": 0.7325446379595789,
+      "train_runtime": 148.7314,
+      "train_samples_per_second": 0.78,
+      "train_steps_per_second": 0.195
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 29,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1591652615454720.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c74bf68629dfbf8c60f49ae0110c8463cd77206845a7c914476538bcfe9349bd
+size 100664470

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1439a1ba2c9766648ac02a241c189908c96d3247c69ef704723c2480cf5678b0
+size 100664470

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7f5bf1290f2b12990e47169e8b9f964b1020c67eb1339016648587e7c9b32b1c
+size 100664470

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:db6902cb7706829f9ec77cfa3264bb7f80381d38d1471f67fdbda3e99ecc7e14
+size 100664470

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f59ef9e88e3229d2ceec37679a496e2ca3215eba76a0e32522ebb4e5d9228843
+size 100663226

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:62f4ac6d530706608b4ebdaddab566459f7a4ac12f5c5605e5d3e6ceed45bc2d
+size 100664470

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8e3c7a8ab6cb955ef1570c99b48aef65202684bb438bcee9f0d46f28697cbec1
+size 100663226

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6efec2037d3806dc4b4fa30d91466b9e9b1aabfe85b5f8711e9faed8d171a5d5
+size 100663226

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/4_trainer_state.json ADDED Viewed

	@@ -0,0 +1,140 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 29,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.06896551724137931,
+      "grad_norm": 0.05072196200489998,
+      "learning_rate": 0.0003,
+      "loss": 0.809,
+      "step": 2
+    },
+    {
+      "epoch": 0.13793103448275862,
+      "grad_norm": 0.14052243530750275,
+      "learning_rate": 0.0003,
+      "loss": 0.9376,
+      "step": 4
+    },
+    {
+      "epoch": 0.20689655172413793,
+      "grad_norm": 0.07366146892309189,
+      "learning_rate": 0.0003,
+      "loss": 0.9637,
+      "step": 6
+    },
+    {
+      "epoch": 0.27586206896551724,
+      "grad_norm": 0.06059933826327324,
+      "learning_rate": 0.0003,
+      "loss": 0.9109,
+      "step": 8
+    },
+    {
+      "epoch": 0.3448275862068966,
+      "grad_norm": 0.11904361099004745,
+      "learning_rate": 0.0003,
+      "loss": 0.974,
+      "step": 10
+    },
+    {
+      "epoch": 0.41379310344827586,
+      "grad_norm": 0.0738854929804802,
+      "learning_rate": 0.0003,
+      "loss": 0.6471,
+      "step": 12
+    },
+    {
+      "epoch": 0.4827586206896552,
+      "grad_norm": 0.10854072868824005,
+      "learning_rate": 0.0003,
+      "loss": 0.8115,
+      "step": 14
+    },
+    {
+      "epoch": 0.5517241379310345,
+      "grad_norm": 0.08300568163394928,
+      "learning_rate": 0.0003,
+      "loss": 0.7872,
+      "step": 16
+    },
+    {
+      "epoch": 0.6206896551724138,
+      "grad_norm": 0.12101076543331146,
+      "learning_rate": 0.0003,
+      "loss": 0.5953,
+      "step": 18
+    },
+    {
+      "epoch": 0.6896551724137931,
+      "grad_norm": 0.073908731341362,
+      "learning_rate": 0.0003,
+      "loss": 0.823,
+      "step": 20
+    },
+    {
+      "epoch": 0.7586206896551724,
+      "grad_norm": 0.08307291567325592,
+      "learning_rate": 0.0003,
+      "loss": 0.6549,
+      "step": 22
+    },
+    {
+      "epoch": 0.8275862068965517,
+      "grad_norm": 0.158330500125885,
+      "learning_rate": 0.0003,
+      "loss": 0.9645,
+      "step": 24
+    },
+    {
+      "epoch": 0.896551724137931,
+      "grad_norm": 0.07515610754489899,
+      "learning_rate": 0.0003,
+      "loss": 1.024,
+      "step": 26
+    },
+    {
+      "epoch": 0.9655172413793104,
+      "grad_norm": 0.09373172372579575,
+      "learning_rate": 0.0003,
+      "loss": 1.0713,
+      "step": 28
+    },
+    {
+      "epoch": 1.0,
+      "step": 29,
+      "total_flos": 1501610387701760.0,
+      "train_loss": 0.8557169848474963,
+      "train_runtime": 150.3635,
+      "train_samples_per_second": 0.771,
+      "train_steps_per_second": 0.193
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 29,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1501610387701760.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/round10_task_vector_local_weights.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8374e17a0faf03d8951e285488b14eef4498017c3e2630509a35cb0e5a67b558
+size 167774710

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/round11_task_vector_local_weights.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f922ab3667d9b5e9c3b1d1ee5e997d17edd373b51db23bee5625d07878158bdb
+size 167774710

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/round12_task_vector_local_weights.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:703338326e88b4e101975ba8d39f9caef66e863e1ee39a6fb1fd3a84dda35b59
+size 167774710

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/round13_task_vector_local_weights.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f70eb91a3e5015c3211e8ceae2f9ba2fded0c59702d2173cdde85a59eab4264f
+size 167774710

client_states_fedours_moe_T05_freq10_bs4_saveoptim_r16_32_lr3e-4_5e-4_sc205_4tasks_5rounds_fixitr29_T0125_decay099_SEED2/round14_task_vector_local_weights.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b4a5c308380010af0b81c1ab4cdacccd5df94bc7c0a8a8449231f8d202c1a778
+size 167774710