thkim0305 commited on May 5, 2025

Commit

1bc1c5a

verified ·

1 Parent(s): 00c5de4

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round10.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round12.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round15.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round17.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round2.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round20.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round5.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round7.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_trainer_state.json +217 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round10.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round12.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round15.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round17.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round2.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round20.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round5.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round7.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_trainer_state.json +217 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round10.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round12.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round15.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round17.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round2.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round20.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round5.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round7.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_trainer_state.json +217 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round10.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round12.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round15.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round17.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round2.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round20.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round5.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round7.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_trainer_state.json +217 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round10.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round12.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round15.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round17.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round2.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round20.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round5.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round7.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_trainer_state.json +217 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round10.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round12.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round15.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round17.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round2.pth +3 -0

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:56fc85170f7c52f1e9ac2402184118e54e979bc593a942b62491a368125b3962
+size 360880622

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ab8e8f05520a5e213d2233c3d39fea5396c0fea9f71fc5793e8c3da62cfc2f30
+size 360880622

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dcaa315ca2198284cb385a2533d6ce3049142497e3a92f3bb108e40a31c9d57e
+size 360880622

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3ae342f15c8941cf7b73da8583b0354f25dad9a4f94509e83df8f750aa6e6e14
+size 360880622

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3e04219236af11cd00523eecab74e6d8eeade903c0afade90763ed8a1afc1384
+size 360880106

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:40c03aede173c98dfe0cddeda01c173a5c7c8b81fe08b4c72bb1df29206d6f5c
+size 360880622

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:07c2403d703ac282fb4c80815544a86be3ae64153a3106271ee403e7c98f9507
+size 360880106

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b343b9655b873be9c938471eaeddeb2d0327e041ff7b6ecd42e588cb60ef7a49
+size 360880106

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_trainer_state.json ADDED Viewed

	@@ -0,0 +1,217 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 50,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "grad_norm": 1.8673917055130005,
+      "learning_rate": 2e-05,
+      "loss": 0.5411,
+      "step": 2
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 4.476343631744385,
+      "learning_rate": 2e-05,
+      "loss": 1.0286,
+      "step": 4
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 2.270127296447754,
+      "learning_rate": 2e-05,
+      "loss": 0.6094,
+      "step": 6
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 4.0221686363220215,
+      "learning_rate": 2e-05,
+      "loss": 1.0339,
+      "step": 8
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 1.7531377077102661,
+      "learning_rate": 2e-05,
+      "loss": 0.6154,
+      "step": 10
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 2.4782919883728027,
+      "learning_rate": 2e-05,
+      "loss": 0.3927,
+      "step": 12
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 3.082655668258667,
+      "learning_rate": 2e-05,
+      "loss": 0.5301,
+      "step": 14
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 1.6040374040603638,
+      "learning_rate": 2e-05,
+      "loss": 0.5479,
+      "step": 16
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 3.0767765045166016,
+      "learning_rate": 2e-05,
+      "loss": 0.9126,
+      "step": 18
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 2.117967128753662,
+      "learning_rate": 2e-05,
+      "loss": 0.5114,
+      "step": 20
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 4.502895355224609,
+      "learning_rate": 2e-05,
+      "loss": 0.7164,
+      "step": 22
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 4.821036338806152,
+      "learning_rate": 2e-05,
+      "loss": 1.526,
+      "step": 24
+    },
+    {
+      "epoch": 1.04,
+      "grad_norm": 2.051273822784424,
+      "learning_rate": 2e-05,
+      "loss": 0.6118,
+      "step": 26
+    },
+    {
+      "epoch": 1.12,
+      "grad_norm": 3.2917582988739014,
+      "learning_rate": 2e-05,
+      "loss": 0.6338,
+      "step": 28
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 4.47807502746582,
+      "learning_rate": 2e-05,
+      "loss": 0.7823,
+      "step": 30
+    },
+    {
+      "epoch": 1.28,
+      "grad_norm": 6.614592552185059,
+      "learning_rate": 2e-05,
+      "loss": 1.3987,
+      "step": 32
+    },
+    {
+      "epoch": 1.36,
+      "grad_norm": 3.840501546859741,
+      "learning_rate": 2e-05,
+      "loss": 0.6544,
+      "step": 34
+    },
+    {
+      "epoch": 1.44,
+      "grad_norm": 2.3704311847686768,
+      "learning_rate": 2e-05,
+      "loss": 0.8242,
+      "step": 36
+    },
+    {
+      "epoch": 1.52,
+      "grad_norm": 4.5160675048828125,
+      "learning_rate": 2e-05,
+      "loss": 0.9385,
+      "step": 38
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 3.4682557582855225,
+      "learning_rate": 2e-05,
+      "loss": 1.2132,
+      "step": 40
+    },
+    {
+      "epoch": 1.68,
+      "grad_norm": 2.3830385208129883,
+      "learning_rate": 2e-05,
+      "loss": 0.6107,
+      "step": 42
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 3.068246603012085,
+      "learning_rate": 2e-05,
+      "loss": 0.6467,
+      "step": 44
+    },
+    {
+      "epoch": 1.84,
+      "grad_norm": 3.524432897567749,
+      "learning_rate": 2e-05,
+      "loss": 0.602,
+      "step": 46
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 3.5651185512542725,
+      "learning_rate": 2e-05,
+      "loss": 0.7029,
+      "step": 48
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 3.3311729431152344,
+      "learning_rate": 2e-05,
+      "loss": 0.5356,
+      "step": 50
+    },
+    {
+      "epoch": 2.0,
+      "step": 50,
+      "total_flos": 5924252351987712.0,
+      "train_loss": 0.7648095703125,
+      "train_runtime": 151.5084,
+      "train_samples_per_second": 1.32,
+      "train_steps_per_second": 0.33
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 50,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5924252351987712.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:334b9877772a1e30d63aa38a00bfe896e15d6dc09c88dfa1ccab4979c6b4658d
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cba2b604640bee7060c99d65ff75b14153ed90fe88af5b278be99184f716b48b
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:208506304141fe31fb85d268825264673b4b52ed6aaecb961f3f3d21d3d2fd7f
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a74cebc2d0c93eb7caf98555a9f3baffce9ed8003e2895830d5109ebb38fdc2
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ffdd84076af2f4ac0d6e4d0bc7ec7bba103570f06c16bb3798413a752b415ffd
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2ee1ccda5a7665844256546c440107724cb0bdfb8b64df7963f3a7fab67ab32e
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:22998a139bd48538fe5906098fa3bb1965b44f2ebfecf3909023a90dc7292ba0
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:35a5c1a43f28032b066c673c6abfe9651e83a59b76cb6708a69fdfd75106fd52
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_trainer_state.json ADDED Viewed

	@@ -0,0 +1,217 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 50,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "grad_norm": 4.583834648132324,
+      "learning_rate": 2e-05,
+      "loss": 0.6835,
+      "step": 2
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 4.129000186920166,
+      "learning_rate": 2e-05,
+      "loss": 0.6311,
+      "step": 4
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 1.4998234510421753,
+      "learning_rate": 2e-05,
+      "loss": 0.5363,
+      "step": 6
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 2.6047112941741943,
+      "learning_rate": 2e-05,
+      "loss": 0.6187,
+      "step": 8
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 3.2830750942230225,
+      "learning_rate": 2e-05,
+      "loss": 0.601,
+      "step": 10
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 2.440368413925171,
+      "learning_rate": 2e-05,
+      "loss": 0.5193,
+      "step": 12
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 4.117373466491699,
+      "learning_rate": 2e-05,
+      "loss": 0.6884,
+      "step": 14
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 5.755882740020752,
+      "learning_rate": 2e-05,
+      "loss": 0.731,
+      "step": 16
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 1.5773918628692627,
+      "learning_rate": 2e-05,
+      "loss": 0.4884,
+      "step": 18
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 2.3141613006591797,
+      "learning_rate": 2e-05,
+      "loss": 0.55,
+      "step": 20
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 1.520458698272705,
+      "learning_rate": 2e-05,
+      "loss": 0.5297,
+      "step": 22
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 2.141937732696533,
+      "learning_rate": 2e-05,
+      "loss": 0.5958,
+      "step": 24
+    },
+    {
+      "epoch": 1.04,
+      "grad_norm": 3.5650408267974854,
+      "learning_rate": 2e-05,
+      "loss": 0.6784,
+      "step": 26
+    },
+    {
+      "epoch": 1.12,
+      "grad_norm": 3.2956409454345703,
+      "learning_rate": 2e-05,
+      "loss": 0.7442,
+      "step": 28
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 2.7933690547943115,
+      "learning_rate": 2e-05,
+      "loss": 0.6616,
+      "step": 30
+    },
+    {
+      "epoch": 1.28,
+      "grad_norm": 4.000720977783203,
+      "learning_rate": 2e-05,
+      "loss": 0.4821,
+      "step": 32
+    },
+    {
+      "epoch": 1.36,
+      "grad_norm": 5.566102981567383,
+      "learning_rate": 2e-05,
+      "loss": 0.4605,
+      "step": 34
+    },
+    {
+      "epoch": 1.44,
+      "grad_norm": 5.742500305175781,
+      "learning_rate": 2e-05,
+      "loss": 0.4153,
+      "step": 36
+    },
+    {
+      "epoch": 1.52,
+      "grad_norm": 4.2542405128479,
+      "learning_rate": 2e-05,
+      "loss": 0.4783,
+      "step": 38
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 6.833155155181885,
+      "learning_rate": 2e-05,
+      "loss": 0.6371,
+      "step": 40
+    },
+    {
+      "epoch": 1.68,
+      "grad_norm": 7.5326313972473145,
+      "learning_rate": 2e-05,
+      "loss": 0.7853,
+      "step": 42
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 2.1974692344665527,
+      "learning_rate": 2e-05,
+      "loss": 0.7622,
+      "step": 44
+    },
+    {
+      "epoch": 1.84,
+      "grad_norm": 2.442321538925171,
+      "learning_rate": 2e-05,
+      "loss": 0.5715,
+      "step": 46
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 2.5812339782714844,
+      "learning_rate": 2e-05,
+      "loss": 0.4764,
+      "step": 48
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 5.182116508483887,
+      "learning_rate": 2e-05,
+      "loss": 0.6237,
+      "step": 50
+    },
+    {
+      "epoch": 2.0,
+      "step": 50,
+      "total_flos": 1.4241485192953856e+16,
+      "train_loss": 0.5979843139648438,
+      "train_runtime": 252.1553,
+      "train_samples_per_second": 0.793,
+      "train_steps_per_second": 0.198
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 50,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.4241485192953856e+16,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d06991855fd1b69b3806b8d1eb324b9c0201e478036fd3b65d56b34b2787e5ad
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1e6628ce28b478d4a5c9a4cae41524f1b64d84dc93bf7a2f34161dca94fad10f
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8695c1f94b299b5faedaedf37b7a570f7b23e7cd1fe1da35b023a3b1ced9d2f6
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:daa8a795000144eef55b204b54325181dd9bda9e639727b62cddafd8082ca064
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f6489762c514a53fca0821096bf89c0f31cbaf5e46df0959567c178571ec0560
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e9a52a2036d00c14f8d20c1cfb605b021050b0d6e423a449f39ea54b7350440d
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5914d4ab3c8d4874bebfbc1901b85323aed556b0db25307d10b91caf203cedb0
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8c81a173f5ad5a59863f3ca8f811af06d562e8dbe2fe45ef5fba724b5e8c164a
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_trainer_state.json ADDED Viewed

	@@ -0,0 +1,217 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 50,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "grad_norm": 4.558183193206787,
+      "learning_rate": 2e-05,
+      "loss": 0.5488,
+      "step": 2
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 2.947014331817627,
+      "learning_rate": 2e-05,
+      "loss": 0.3348,
+      "step": 4
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 1.299033761024475,
+      "learning_rate": 2e-05,
+      "loss": 0.3584,
+      "step": 6
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 3.6182351112365723,
+      "learning_rate": 2e-05,
+      "loss": 0.3221,
+      "step": 8
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 5.334591865539551,
+      "learning_rate": 2e-05,
+      "loss": 0.4843,
+      "step": 10
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 8.80336856842041,
+      "learning_rate": 2e-05,
+      "loss": 0.6279,
+      "step": 12
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 11.382059097290039,
+      "learning_rate": 2e-05,
+      "loss": 0.5401,
+      "step": 14
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 11.98203182220459,
+      "learning_rate": 2e-05,
+      "loss": 0.717,
+      "step": 16
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 6.630352020263672,
+      "learning_rate": 2e-05,
+      "loss": 0.697,
+      "step": 18
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 5.681330680847168,
+      "learning_rate": 2e-05,
+      "loss": 0.3569,
+      "step": 20
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 5.276439189910889,
+      "learning_rate": 2e-05,
+      "loss": 0.6034,
+      "step": 22
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 4.0495524406433105,
+      "learning_rate": 2e-05,
+      "loss": 0.2887,
+      "step": 24
+    },
+    {
+      "epoch": 1.04,
+      "grad_norm": 10.429891586303711,
+      "learning_rate": 2e-05,
+      "loss": 1.06,
+      "step": 26
+    },
+    {
+      "epoch": 1.12,
+      "grad_norm": 5.012072563171387,
+      "learning_rate": 2e-05,
+      "loss": 0.6967,
+      "step": 28
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 8.718036651611328,
+      "learning_rate": 2e-05,
+      "loss": 0.7567,
+      "step": 30
+    },
+    {
+      "epoch": 1.28,
+      "grad_norm": 4.441227912902832,
+      "learning_rate": 2e-05,
+      "loss": 0.3985,
+      "step": 32
+    },
+    {
+      "epoch": 1.36,
+      "grad_norm": 3.394705057144165,
+      "learning_rate": 2e-05,
+      "loss": 0.7157,
+      "step": 34
+    },
+    {
+      "epoch": 1.44,
+      "grad_norm": 5.729661464691162,
+      "learning_rate": 2e-05,
+      "loss": 0.5636,
+      "step": 36
+    },
+    {
+      "epoch": 1.52,
+      "grad_norm": 9.694987297058105,
+      "learning_rate": 2e-05,
+      "loss": 0.9611,
+      "step": 38
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 12.702645301818848,
+      "learning_rate": 2e-05,
+      "loss": 0.7439,
+      "step": 40
+    },
+    {
+      "epoch": 1.68,
+      "grad_norm": 5.634037017822266,
+      "learning_rate": 2e-05,
+      "loss": 0.8179,
+      "step": 42
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 5.084414005279541,
+      "learning_rate": 2e-05,
+      "loss": 0.7732,
+      "step": 44
+    },
+    {
+      "epoch": 1.84,
+      "grad_norm": 2.3128256797790527,
+      "learning_rate": 2e-05,
+      "loss": 0.5043,
+      "step": 46
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 6.491540431976318,
+      "learning_rate": 2e-05,
+      "loss": 0.6021,
+      "step": 48
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 4.465042591094971,
+      "learning_rate": 2e-05,
+      "loss": 1.0634,
+      "step": 50
+    },
+    {
+      "epoch": 2.0,
+      "step": 50,
+      "total_flos": 1.6198255784230912e+16,
+      "train_loss": 0.6214560890197753,
+      "train_runtime": 274.6327,
+      "train_samples_per_second": 0.728,
+      "train_steps_per_second": 0.182
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 50,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.6198255784230912e+16,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1053397e04c343b02320d481ade7174ca2efdfe41829a60dfbb49ebecd579803
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bea8e38baaf09ee6510b2aef0a46ddf0be5ca9330c1cae666ca645c7b432ab4e
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0dbc3a8d9729d355f15cade994d232b6c0f017a20aa8d73b98f17c228c84361b
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:09d1655d128c53da006db567fea2fc7434b9b8044b6f845d52b8db99dd2320a4
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:29211e918a07e23fe407aeeeb62d9d534e126d7a2a51b8c8f7a1ff088c95be5d
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:47ea3bcbc18afe15bb2245d06bd000d2e56ecf6f5fe6f33c6ad04379250be315
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a6fe3cd7816645ae9460f490fb1037ffd4282c328e9bd8c17e84d97e586b4e7
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:100ad256bc9de8669440660abe10a6c5d27f52c4a68a67e134bafa54426c81ac
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_trainer_state.json ADDED Viewed

	@@ -0,0 +1,217 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 50,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "grad_norm": 1.39784574508667,
+      "learning_rate": 2e-05,
+      "loss": 0.9756,
+      "step": 2
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 2.8972103595733643,
+      "learning_rate": 2e-05,
+      "loss": 1.0298,
+      "step": 4
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 1.9185339212417603,
+      "learning_rate": 2e-05,
+      "loss": 0.686,
+      "step": 6
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 1.6782031059265137,
+      "learning_rate": 2e-05,
+      "loss": 0.7958,
+      "step": 8
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 1.4360851049423218,
+      "learning_rate": 2e-05,
+      "loss": 0.828,
+      "step": 10
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 2.031978130340576,
+      "learning_rate": 2e-05,
+      "loss": 0.7959,
+      "step": 12
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 2.223369836807251,
+      "learning_rate": 2e-05,
+      "loss": 0.9572,
+      "step": 14
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 1.5497606992721558,
+      "learning_rate": 2e-05,
+      "loss": 0.5985,
+      "step": 16
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 3.152341842651367,
+      "learning_rate": 2e-05,
+      "loss": 0.9308,
+      "step": 18
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 1.995869755744934,
+      "learning_rate": 2e-05,
+      "loss": 0.8724,
+      "step": 20
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 2.4577951431274414,
+      "learning_rate": 2e-05,
+      "loss": 0.7107,
+      "step": 22
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 2.686221122741699,
+      "learning_rate": 2e-05,
+      "loss": 0.782,
+      "step": 24
+    },
+    {
+      "epoch": 1.04,
+      "grad_norm": 2.1014060974121094,
+      "learning_rate": 2e-05,
+      "loss": 0.7136,
+      "step": 26
+    },
+    {
+      "epoch": 1.12,
+      "grad_norm": 3.085268974304199,
+      "learning_rate": 2e-05,
+      "loss": 0.723,
+      "step": 28
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 4.33972692489624,
+      "learning_rate": 2e-05,
+      "loss": 1.0076,
+      "step": 30
+    },
+    {
+      "epoch": 1.28,
+      "grad_norm": 6.186117172241211,
+      "learning_rate": 2e-05,
+      "loss": 0.9203,
+      "step": 32
+    },
+    {
+      "epoch": 1.36,
+      "grad_norm": 2.994359016418457,
+      "learning_rate": 2e-05,
+      "loss": 0.6973,
+      "step": 34
+    },
+    {
+      "epoch": 1.44,
+      "grad_norm": 5.756070613861084,
+      "learning_rate": 2e-05,
+      "loss": 1.0156,
+      "step": 36
+    },
+    {
+      "epoch": 1.52,
+      "grad_norm": 1.8890563249588013,
+      "learning_rate": 2e-05,
+      "loss": 0.6883,
+      "step": 38
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 2.047783136367798,
+      "learning_rate": 2e-05,
+      "loss": 0.6198,
+      "step": 40
+    },
+    {
+      "epoch": 1.68,
+      "grad_norm": 3.1189417839050293,
+      "learning_rate": 2e-05,
+      "loss": 0.8165,
+      "step": 42
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 2.0808229446411133,
+      "learning_rate": 2e-05,
+      "loss": 0.6428,
+      "step": 44
+    },
+    {
+      "epoch": 1.84,
+      "grad_norm": 1.7849960327148438,
+      "learning_rate": 2e-05,
+      "loss": 0.6548,
+      "step": 46
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 1.386960506439209,
+      "learning_rate": 2e-05,
+      "loss": 0.6403,
+      "step": 48
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 1.6751240491867065,
+      "learning_rate": 2e-05,
+      "loss": 0.8999,
+      "step": 50
+    },
+    {
+      "epoch": 2.0,
+      "step": 50,
+      "total_flos": 2.096063809637581e+16,
+      "train_loss": 0.8000931072235108,
+      "train_runtime": 272.324,
+      "train_samples_per_second": 0.734,
+      "train_steps_per_second": 0.184
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 50,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.096063809637581e+16,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:688a3b8604720e57257db0cf64bd1dd9f30ca73277af465d7db58ae4ae98ffa9
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ff353918ed1d60324e386bfdc118226bf909d190315a8921ef2e8baa24758a8b
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:95a6f306d0adb5c0e05ec683bf401fe4c04d8cac99b9337deec12ee1d0761614
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a2212526d781487ec5c8161ff06df8f26308669cb418445de39cc2da74fc005
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a2b51e1c7dedf009ec22fcd53095edfc51e52c3502127c959f9f27aab2fd74e3
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9abb78bfcdfa5b5bc3e68940b635fa9f3acda70c69ce4dba811709e0c3d59b74
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9ebf24c9b4a2f70e078953f2222de038fffc31cbf26633368fbe572f4b844653
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:77ffc1003199cdb9ce5678a067107dda4bb11f43ee2f32137a9bac5ba6d44053
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_trainer_state.json ADDED Viewed

	@@ -0,0 +1,217 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 50,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "grad_norm": 1.4425019025802612,
+      "learning_rate": 2e-05,
+      "loss": 0.5973,
+      "step": 2
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 1.215862512588501,
+      "learning_rate": 2e-05,
+      "loss": 0.6856,
+      "step": 4
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 0.7811527252197266,
+      "learning_rate": 2e-05,
+      "loss": 0.5332,
+      "step": 6
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 1.1735533475875854,
+      "learning_rate": 2e-05,
+      "loss": 0.705,
+      "step": 8
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 2.8437013626098633,
+      "learning_rate": 2e-05,
+      "loss": 0.7182,
+      "step": 10
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 1.6411240100860596,
+      "learning_rate": 2e-05,
+      "loss": 0.4589,
+      "step": 12
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 1.3925532102584839,
+      "learning_rate": 2e-05,
+      "loss": 0.8512,
+      "step": 14
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 3.309992551803589,
+      "learning_rate": 2e-05,
+      "loss": 0.9773,
+      "step": 16
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 2.3441174030303955,
+      "learning_rate": 2e-05,
+      "loss": 0.4745,
+      "step": 18
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 1.3988115787506104,
+      "learning_rate": 2e-05,
+      "loss": 0.3502,
+      "step": 20
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 1.3023474216461182,
+      "learning_rate": 2e-05,
+      "loss": 0.5154,
+      "step": 22
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 2.3900015354156494,
+      "learning_rate": 2e-05,
+      "loss": 0.8253,
+      "step": 24
+    },
+    {
+      "epoch": 1.04,
+      "grad_norm": 5.1887125968933105,
+      "learning_rate": 2e-05,
+      "loss": 1.2769,
+      "step": 26
+    },
+    {
+      "epoch": 1.12,
+      "grad_norm": 3.921790599822998,
+      "learning_rate": 2e-05,
+      "loss": 0.9044,
+      "step": 28
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 2.175091505050659,
+      "learning_rate": 2e-05,
+      "loss": 0.6082,
+      "step": 30
+    },
+    {
+      "epoch": 1.28,
+      "grad_norm": 2.267071485519409,
+      "learning_rate": 2e-05,
+      "loss": 0.8991,
+      "step": 32
+    },
+    {
+      "epoch": 1.36,
+      "grad_norm": 3.1593542098999023,
+      "learning_rate": 2e-05,
+      "loss": 0.6995,
+      "step": 34
+    },
+    {
+      "epoch": 1.44,
+      "grad_norm": 3.4321177005767822,
+      "learning_rate": 2e-05,
+      "loss": 0.7938,
+      "step": 36
+    },
+    {
+      "epoch": 1.52,
+      "grad_norm": 2.110840082168579,
+      "learning_rate": 2e-05,
+      "loss": 1.1223,
+      "step": 38
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 3.0541820526123047,
+      "learning_rate": 2e-05,
+      "loss": 1.041,
+      "step": 40
+    },
+    {
+      "epoch": 1.68,
+      "grad_norm": 3.609933614730835,
+      "learning_rate": 2e-05,
+      "loss": 0.7096,
+      "step": 42
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 1.9673751592636108,
+      "learning_rate": 2e-05,
+      "loss": 0.7004,
+      "step": 44
+    },
+    {
+      "epoch": 1.84,
+      "grad_norm": 12.000975608825684,
+      "learning_rate": 2e-05,
+      "loss": 0.5836,
+      "step": 46
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 3.2175731658935547,
+      "learning_rate": 2e-05,
+      "loss": 0.824,
+      "step": 48
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 3.6081643104553223,
+      "learning_rate": 2e-05,
+      "loss": 0.8113,
+      "step": 50
+    },
+    {
+      "epoch": 2.0,
+      "step": 50,
+      "total_flos": 2.184824952664883e+16,
+      "train_loss": 0.7466500663757324,
+      "train_runtime": 278.7893,
+      "train_samples_per_second": 0.717,
+      "train_steps_per_second": 0.179
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 50,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.184824952664883e+16,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:23f128e99f0cacbc599ce4a7531221c9fbbb4cfb22fb3910e0fdc28ef2a2dbba
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ad23f8d514321daa4aac439f617c0c5875a6d1c6d3ba671988a7b0ee07963eaf
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5eb433a9db64a00648ed4010a157b6232e6b4a7c2139de17cb01309403eadfe4
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e743a6d1bfa05bf565720cd577338a82db7a896ef110f90486e00d96b73e6d96
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d526931e5403c2c186741e4e1fbc95717cacf149c4f17c90939a694badf824f6
+size 778341034