thkim0305 commited on May 4, 2025

Commit

1545cb9

verified ·

1 Parent(s): fecc337

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round10.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round12.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round15.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round17.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round2.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round20.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round5.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round7.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_trainer_state.json +217 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round10.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round12.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round15.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round17.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round2.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round20.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round5.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round7.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_trainer_state.json +217 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round10.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round12.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round15.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round17.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round2.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round20.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round5.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round7.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_trainer_state.json +217 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round10.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round12.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round15.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round17.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round2.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round20.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round5.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round7.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_trainer_state.json +217 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round10.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round12.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round15.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round17.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round2.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round20.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round5.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round7.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_trainer_state.json +217 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round10.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round12.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round15.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round17.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round2.pth +3 -0

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9bf1bd922105b202c1332673b2650c3de98e3475fb786f5b8afb11368e5b6ecb
+size 360880622

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b7719e20ac179b3505bc16fe3d26557236f289d0aafd2a963fe447a9af6798b3
+size 360880622

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1c93ab2b8d890ac3fe7e1ea0de67b9123a0488d5bd7cb7109ea6c97edcf22c3d
+size 360880622

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9d18901eb260c181d30c1e1be564dd71da17a34f6bf6c333a89856341a4b907c
+size 360880622

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:85c6f07499b6d135cb2c16ce612f45f129974177654fe29012b34255fd4e71ec
+size 360880106

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7cf6d5f3802665484ccd7c41c90f01c08c8eb716f3bee52a676c0a2ea8f4cafa
+size 360880622

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ef8a5ad5233a4d504fc5a5368036b3332e83946a37160464ca568222cafaecdf
+size 360880106

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:766c6490fd2b03599de1b54601d6a6664ef9260808f2c2b8c964dff572c8a955
+size 360880106

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_trainer_state.json ADDED Viewed

	@@ -0,0 +1,217 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 50,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "grad_norm": 3.284128189086914,
+      "learning_rate": 2e-05,
+      "loss": 0.0793,
+      "step": 2
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 3.4005775451660156,
+      "learning_rate": 2e-05,
+      "loss": 0.0915,
+      "step": 4
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 1.777352213859558,
+      "learning_rate": 2e-05,
+      "loss": 0.043,
+      "step": 6
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 4.783378601074219,
+      "learning_rate": 2e-05,
+      "loss": 0.8805,
+      "step": 8
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 7.2065749168396,
+      "learning_rate": 2e-05,
+      "loss": 0.3896,
+      "step": 10
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 1.6766301393508911,
+      "learning_rate": 2e-05,
+      "loss": 0.6725,
+      "step": 12
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 15.141353607177734,
+      "learning_rate": 2e-05,
+      "loss": 0.6133,
+      "step": 14
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 17.0766544342041,
+      "learning_rate": 2e-05,
+      "loss": 0.8163,
+      "step": 16
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 9.064810752868652,
+      "learning_rate": 2e-05,
+      "loss": 0.6379,
+      "step": 18
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 11.561620712280273,
+      "learning_rate": 2e-05,
+      "loss": 0.3389,
+      "step": 20
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 10.447920799255371,
+      "learning_rate": 2e-05,
+      "loss": 0.1638,
+      "step": 22
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 7.2391462326049805,
+      "learning_rate": 2e-05,
+      "loss": 0.4152,
+      "step": 24
+    },
+    {
+      "epoch": 1.04,
+      "grad_norm": 6.883174896240234,
+      "learning_rate": 2e-05,
+      "loss": 0.1845,
+      "step": 26
+    },
+    {
+      "epoch": 1.12,
+      "grad_norm": 2.764719247817993,
+      "learning_rate": 2e-05,
+      "loss": 0.2519,
+      "step": 28
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 6.025578498840332,
+      "learning_rate": 2e-05,
+      "loss": 0.1062,
+      "step": 30
+    },
+    {
+      "epoch": 1.28,
+      "grad_norm": 12.170540809631348,
+      "learning_rate": 2e-05,
+      "loss": 0.6212,
+      "step": 32
+    },
+    {
+      "epoch": 1.36,
+      "grad_norm": 7.470730781555176,
+      "learning_rate": 2e-05,
+      "loss": 0.1638,
+      "step": 34
+    },
+    {
+      "epoch": 1.44,
+      "grad_norm": 11.806479454040527,
+      "learning_rate": 2e-05,
+      "loss": 0.8462,
+      "step": 36
+    },
+    {
+      "epoch": 1.52,
+      "grad_norm": 1.250801920890808,
+      "learning_rate": 2e-05,
+      "loss": 0.1253,
+      "step": 38
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 12.122519493103027,
+      "learning_rate": 2e-05,
+      "loss": 0.3373,
+      "step": 40
+    },
+    {
+      "epoch": 1.68,
+      "grad_norm": 3.9410598278045654,
+      "learning_rate": 2e-05,
+      "loss": 0.0798,
+      "step": 42
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 3.405806064605713,
+      "learning_rate": 2e-05,
+      "loss": 0.1707,
+      "step": 44
+    },
+    {
+      "epoch": 1.84,
+      "grad_norm": 29.996286392211914,
+      "learning_rate": 2e-05,
+      "loss": 0.9968,
+      "step": 46
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 5.970231533050537,
+      "learning_rate": 2e-05,
+      "loss": 0.2291,
+      "step": 48
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.9755194187164307,
+      "learning_rate": 2e-05,
+      "loss": 0.2356,
+      "step": 50
+    },
+    {
+      "epoch": 2.0,
+      "step": 50,
+      "total_flos": 2184626743279616.0,
+      "train_loss": 0.3796170651912689,
+      "train_runtime": 115.2896,
+      "train_samples_per_second": 1.735,
+      "train_steps_per_second": 0.434
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 50,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2184626743279616.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6011d6315d2580827738242cee8ef75d254b4cea2dd8679d0473b7a9db54cc9f
+size 360880622

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ad508db3f5914917b8e2316d601bde72f9dd11c3f02fda6fd948437630f223e8
+size 360880622

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d5a375f55d8fb1c9f9b8b9d71e2f41433471b22a3f71c7c0ece807ff8703d3b
+size 360880622

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0dbffa1ada429cae02a98572d4793e4f33e169588a8884543c9edfa076d3016
+size 360880622

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4840c34ce37f38ac86c58160329347c4f31b8a0d2ca8c6f126c58a50979f8da0
+size 360880106

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:40a6e5dae1e8232e195fdc1aedc64fecf22d65266fc23ce9c8416e8884fb3793
+size 360880622

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:60bd84c9ce839d36a98d585b4eb739425ac0dda9019d51f2c95a11922d6e2fc0
+size 360880106

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:778f95e12407ad50b015c33069c9a39c1140bc5a2a856c0e9f7fff2badc47c7a
+size 360880106

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_trainer_state.json ADDED Viewed

	@@ -0,0 +1,217 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 50,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "grad_norm": 1.1501377820968628,
+      "learning_rate": 2e-05,
+      "loss": 0.0192,
+      "step": 2
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 0.30648428201675415,
+      "learning_rate": 2e-05,
+      "loss": 0.0154,
+      "step": 4
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 0.0933217778801918,
+      "learning_rate": 2e-05,
+      "loss": 0.1095,
+      "step": 6
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 5.344268321990967,
+      "learning_rate": 2e-05,
+      "loss": 0.0769,
+      "step": 8
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 0.127670556306839,
+      "learning_rate": 2e-05,
+      "loss": 0.0712,
+      "step": 10
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 0.12052742391824722,
+      "learning_rate": 2e-05,
+      "loss": 0.0189,
+      "step": 12
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 0.04328983649611473,
+      "learning_rate": 2e-05,
+      "loss": 0.0113,
+      "step": 14
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 0.04012615233659744,
+      "learning_rate": 2e-05,
+      "loss": 0.0542,
+      "step": 16
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 0.18714116513729095,
+      "learning_rate": 2e-05,
+      "loss": 0.0121,
+      "step": 18
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 3.1324093341827393,
+      "learning_rate": 2e-05,
+      "loss": 0.0418,
+      "step": 20
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 0.05844057723879814,
+      "learning_rate": 2e-05,
+      "loss": 0.0108,
+      "step": 22
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 6.132259368896484,
+      "learning_rate": 2e-05,
+      "loss": 0.3944,
+      "step": 24
+    },
+    {
+      "epoch": 1.04,
+      "grad_norm": 0.0780622586607933,
+      "learning_rate": 2e-05,
+      "loss": 0.0122,
+      "step": 26
+    },
+    {
+      "epoch": 1.12,
+      "grad_norm": 34.591758728027344,
+      "learning_rate": 2e-05,
+      "loss": 0.6219,
+      "step": 28
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 0.04694774001836777,
+      "learning_rate": 2e-05,
+      "loss": 0.0107,
+      "step": 30
+    },
+    {
+      "epoch": 1.28,
+      "grad_norm": 0.03775114193558693,
+      "learning_rate": 2e-05,
+      "loss": 0.0645,
+      "step": 32
+    },
+    {
+      "epoch": 1.36,
+      "grad_norm": 0.4576091468334198,
+      "learning_rate": 2e-05,
+      "loss": 0.0172,
+      "step": 34
+    },
+    {
+      "epoch": 1.44,
+      "grad_norm": 3.4563567638397217,
+      "learning_rate": 2e-05,
+      "loss": 0.0723,
+      "step": 36
+    },
+    {
+      "epoch": 1.52,
+      "grad_norm": 3.871311902999878,
+      "learning_rate": 2e-05,
+      "loss": 0.1694,
+      "step": 38
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 0.24964451789855957,
+      "learning_rate": 2e-05,
+      "loss": 0.1908,
+      "step": 40
+    },
+    {
+      "epoch": 1.68,
+      "grad_norm": 0.06994156539440155,
+      "learning_rate": 2e-05,
+      "loss": 0.0383,
+      "step": 42
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 0.0806451290845871,
+      "learning_rate": 2e-05,
+      "loss": 0.0183,
+      "step": 44
+    },
+    {
+      "epoch": 1.84,
+      "grad_norm": 4.901146411895752,
+      "learning_rate": 2e-05,
+      "loss": 0.1231,
+      "step": 46
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 0.1762755960226059,
+      "learning_rate": 2e-05,
+      "loss": 0.0135,
+      "step": 48
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 5.6551384925842285,
+      "learning_rate": 2e-05,
+      "loss": 0.0321,
+      "step": 50
+    },
+    {
+      "epoch": 2.0,
+      "step": 50,
+      "total_flos": 2202126654636032.0,
+      "train_loss": 0.0887929368019104,
+      "train_runtime": 116.0987,
+      "train_samples_per_second": 1.723,
+      "train_steps_per_second": 0.431
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 50,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2202126654636032.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:78ac4e1058836b6234379b6e570fa3e71002444f037f47c23c6074c38371cca6
+size 778341886

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7c927ff8e1b3b4faaa34bde0c2692889ce1c086425706d7280a79dc051533dee
+size 778341886

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f95ac4a906febc91c5cdb3813cde6eb88cfe3bc22ba6fb8af96611171ca1a688
+size 778341886

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7e262c5e68c4341c4604cab13775efef4ca2fa56dcb00f1b4eb61a6aac7db31c
+size 778341886

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d86da56ef2fcb0770168449eb48fb8251868ad508baf093591f10e001ac71632
+size 778341034

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:00580e00a33f9296ac0a8c5158f4c03322b348cde61b769eaa5cd7738ec10606
+size 778341886

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:83d7537fc2c48cf1786a1ee004cc1881ef12160aaa0d2cff908db07aa3085b32
+size 778341034

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0bbb7ca95f52ed9b394dbb71a9734c8caa2eb2cf282d47f60c1fa00311a262ed
+size 778341034

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_trainer_state.json ADDED Viewed

	@@ -0,0 +1,217 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 50,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "grad_norm": 3.62923264503479,
+      "learning_rate": 2e-05,
+      "loss": 0.2634,
+      "step": 2
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 2.1570146083831787,
+      "learning_rate": 2e-05,
+      "loss": 0.2137,
+      "step": 4
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 4.34098482131958,
+      "learning_rate": 2e-05,
+      "loss": 0.5009,
+      "step": 6
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 9.01052474975586,
+      "learning_rate": 2e-05,
+      "loss": 0.2679,
+      "step": 8
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 4.558963298797607,
+      "learning_rate": 2e-05,
+      "loss": 0.6763,
+      "step": 10
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 5.2403740882873535,
+      "learning_rate": 2e-05,
+      "loss": 0.631,
+      "step": 12
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 0.49892091751098633,
+      "learning_rate": 2e-05,
+      "loss": 0.321,
+      "step": 14
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 4.3842668533325195,
+      "learning_rate": 2e-05,
+      "loss": 0.5071,
+      "step": 16
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 3.5485029220581055,
+      "learning_rate": 2e-05,
+      "loss": 0.2333,
+      "step": 18
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 8.232368469238281,
+      "learning_rate": 2e-05,
+      "loss": 0.4089,
+      "step": 20
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 1.4529104232788086,
+      "learning_rate": 2e-05,
+      "loss": 0.265,
+      "step": 22
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 3.8828446865081787,
+      "learning_rate": 2e-05,
+      "loss": 0.2181,
+      "step": 24
+    },
+    {
+      "epoch": 1.04,
+      "grad_norm": 1.24514901638031,
+      "learning_rate": 2e-05,
+      "loss": 0.3733,
+      "step": 26
+    },
+    {
+      "epoch": 1.12,
+      "grad_norm": 9.44221305847168,
+      "learning_rate": 2e-05,
+      "loss": 0.4971,
+      "step": 28
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 2.5544800758361816,
+      "learning_rate": 2e-05,
+      "loss": 0.3479,
+      "step": 30
+    },
+    {
+      "epoch": 1.28,
+      "grad_norm": 7.6183319091796875,
+      "learning_rate": 2e-05,
+      "loss": 0.2933,
+      "step": 32
+    },
+    {
+      "epoch": 1.36,
+      "grad_norm": 5.657953262329102,
+      "learning_rate": 2e-05,
+      "loss": 0.451,
+      "step": 34
+    },
+    {
+      "epoch": 1.44,
+      "grad_norm": 1.417629361152649,
+      "learning_rate": 2e-05,
+      "loss": 0.1971,
+      "step": 36
+    },
+    {
+      "epoch": 1.52,
+      "grad_norm": 2.1960906982421875,
+      "learning_rate": 2e-05,
+      "loss": 0.3496,
+      "step": 38
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 4.147959232330322,
+      "learning_rate": 2e-05,
+      "loss": 0.8169,
+      "step": 40
+    },
+    {
+      "epoch": 1.68,
+      "grad_norm": 1.6373600959777832,
+      "learning_rate": 2e-05,
+      "loss": 0.2783,
+      "step": 42
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 1.473294973373413,
+      "learning_rate": 2e-05,
+      "loss": 0.2325,
+      "step": 44
+    },
+    {
+      "epoch": 1.84,
+      "grad_norm": 4.739127159118652,
+      "learning_rate": 2e-05,
+      "loss": 0.3456,
+      "step": 46
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 1.90013587474823,
+      "learning_rate": 2e-05,
+      "loss": 0.1663,
+      "step": 48
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 5.234428405761719,
+      "learning_rate": 2e-05,
+      "loss": 0.3906,
+      "step": 50
+    },
+    {
+      "epoch": 2.0,
+      "step": 50,
+      "total_flos": 5202848019120128.0,
+      "train_loss": 0.36984447479248045,
+      "train_runtime": 190.6891,
+      "train_samples_per_second": 1.049,
+      "train_steps_per_second": 0.262
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 50,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5202848019120128.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d13906ad1f936437fd1ca3ff97f1ba801c71be2b6f272adbb73ae8ab17b6e26e
+size 360880622

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:38308af0d31e928983ecb45207689c09907a4de47c8986628f7604409ef92085
+size 360880622

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:71a124f96018f2b50d800a1d7250ca46a03878d242a4c911ada9f11011f69b97
+size 360880622

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3fc1ef0d486b875990200b1dd094d84b2570ed11e53f6a9ee10f35d34f0f49ca
+size 360880622

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cb4e079328106d0413c0e0836461aaaa74ddb45de0b53648b630bdc6a6632418
+size 360880106

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7865e64171462c37128ea3d65f6aa1f8d3f8ad1fae133ed26642947ec64cc251
+size 360880622

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e4045eac0ab4825692e2c1f2596ed0574497bab56ebb98680bbfa7a3d0cc80c1
+size 360880106

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1db6ddaba6447528d3500e93c52fc8187a5fd82d3824af797deebd9999a4d9ac
+size 360880106

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_trainer_state.json ADDED Viewed

	@@ -0,0 +1,217 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 50,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "grad_norm": 7.067582607269287,
+      "learning_rate": 2e-05,
+      "loss": 0.3825,
+      "step": 2
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 4.283020973205566,
+      "learning_rate": 2e-05,
+      "loss": 0.2845,
+      "step": 4
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 6.822042465209961,
+      "learning_rate": 2e-05,
+      "loss": 0.7916,
+      "step": 6
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 9.666316986083984,
+      "learning_rate": 2e-05,
+      "loss": 0.3412,
+      "step": 8
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 16.465579986572266,
+      "learning_rate": 2e-05,
+      "loss": 0.8459,
+      "step": 10
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 6.560152053833008,
+      "learning_rate": 2e-05,
+      "loss": 0.6335,
+      "step": 12
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 4.862478733062744,
+      "learning_rate": 2e-05,
+      "loss": 0.6741,
+      "step": 14
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 7.447726249694824,
+      "learning_rate": 2e-05,
+      "loss": 0.5064,
+      "step": 16
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 7.4205098152160645,
+      "learning_rate": 2e-05,
+      "loss": 0.9029,
+      "step": 18
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 15.627077102661133,
+      "learning_rate": 2e-05,
+      "loss": 0.8091,
+      "step": 20
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 16.306581497192383,
+      "learning_rate": 2e-05,
+      "loss": 0.7351,
+      "step": 22
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 4.587651252746582,
+      "learning_rate": 2e-05,
+      "loss": 0.1841,
+      "step": 24
+    },
+    {
+      "epoch": 1.04,
+      "grad_norm": 19.92629623413086,
+      "learning_rate": 2e-05,
+      "loss": 1.5481,
+      "step": 26
+    },
+    {
+      "epoch": 1.12,
+      "grad_norm": 3.447114944458008,
+      "learning_rate": 2e-05,
+      "loss": 0.3892,
+      "step": 28
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 14.376614570617676,
+      "learning_rate": 2e-05,
+      "loss": 0.6816,
+      "step": 30
+    },
+    {
+      "epoch": 1.28,
+      "grad_norm": 19.715835571289062,
+      "learning_rate": 2e-05,
+      "loss": 0.6575,
+      "step": 32
+    },
+    {
+      "epoch": 1.36,
+      "grad_norm": 17.579479217529297,
+      "learning_rate": 2e-05,
+      "loss": 0.8478,
+      "step": 34
+    },
+    {
+      "epoch": 1.44,
+      "grad_norm": 8.443402290344238,
+      "learning_rate": 2e-05,
+      "loss": 0.2789,
+      "step": 36
+    },
+    {
+      "epoch": 1.52,
+      "grad_norm": 2.802513599395752,
+      "learning_rate": 2e-05,
+      "loss": 0.3138,
+      "step": 38
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 8.755653381347656,
+      "learning_rate": 2e-05,
+      "loss": 0.7962,
+      "step": 40
+    },
+    {
+      "epoch": 1.68,
+      "grad_norm": 11.076048851013184,
+      "learning_rate": 2e-05,
+      "loss": 0.6601,
+      "step": 42
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 9.736947059631348,
+      "learning_rate": 2e-05,
+      "loss": 0.9945,
+      "step": 44
+    },
+    {
+      "epoch": 1.84,
+      "grad_norm": 9.903741836547852,
+      "learning_rate": 2e-05,
+      "loss": 0.6868,
+      "step": 46
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 26.1480655670166,
+      "learning_rate": 2e-05,
+      "loss": 1.0493,
+      "step": 48
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 10.546011924743652,
+      "learning_rate": 2e-05,
+      "loss": 0.8659,
+      "step": 50
+    },
+    {
+      "epoch": 2.0,
+      "step": 50,
+      "total_flos": 2231176135704576.0,
+      "train_loss": 0.6744278335571289,
+      "train_runtime": 114.3015,
+      "train_samples_per_second": 1.75,
+      "train_steps_per_second": 0.437
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 50,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2231176135704576.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:169b161921bb01bf190d8f33849b33f9c52e7d430415f5e5a702607ff51d21b0
+size 778341886

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:31f3654c2de0c19271419c744626b570b8ee010b77749be74aa4c396bf6a43bc
+size 778341886

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:91f9067a5bd4fbe9beae979d71170b2c1e372c67acd5023c3e986430948e2719
+size 778341886

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b889c9885f7c81ae50f155aebf41c60c283c525132b24699311c6c6f7f12500a
+size 778341886

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cb7771d615a3c16f065855527c013f6e19250c242746206ff6b6a409e79a45f3
+size 778341034

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0c87725e139a42e875b6819300a7f140a858cd2d2db35b382faba6ce53d8e3d7
+size 778341886

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a336edf31a143a41bf64944c4e3b5afa7f26ca65509fd134b08f280f6dfc3e35
+size 778341034

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3ab51c825d382c8d805b7a1989808541ebe02284dfc053fd0859047835f96957
+size 778341034

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_trainer_state.json ADDED Viewed

	@@ -0,0 +1,217 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 50,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "grad_norm": 2.316213369369507,
+      "learning_rate": 2e-05,
+      "loss": 0.4539,
+      "step": 2
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 3.5787057876586914,
+      "learning_rate": 2e-05,
+      "loss": 0.5031,
+      "step": 4
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 3.0300209522247314,
+      "learning_rate": 2e-05,
+      "loss": 0.514,
+      "step": 6
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 3.986267328262329,
+      "learning_rate": 2e-05,
+      "loss": 0.4123,
+      "step": 8
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 5.813876152038574,
+      "learning_rate": 2e-05,
+      "loss": 0.5409,
+      "step": 10
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 1.9155079126358032,
+      "learning_rate": 2e-05,
+      "loss": 0.3129,
+      "step": 12
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 1.6175196170806885,
+      "learning_rate": 2e-05,
+      "loss": 0.2733,
+      "step": 14
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 3.6581971645355225,
+      "learning_rate": 2e-05,
+      "loss": 0.3098,
+      "step": 16
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 3.621964931488037,
+      "learning_rate": 2e-05,
+      "loss": 0.4081,
+      "step": 18
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 2.364119052886963,
+      "learning_rate": 2e-05,
+      "loss": 0.1815,
+      "step": 20
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 4.366921424865723,
+      "learning_rate": 2e-05,
+      "loss": 0.7291,
+      "step": 22
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 6.259946823120117,
+      "learning_rate": 2e-05,
+      "loss": 0.5821,
+      "step": 24
+    },
+    {
+      "epoch": 1.04,
+      "grad_norm": 3.531437635421753,
+      "learning_rate": 2e-05,
+      "loss": 0.5318,
+      "step": 26
+    },
+    {
+      "epoch": 1.12,
+      "grad_norm": 9.066308975219727,
+      "learning_rate": 2e-05,
+      "loss": 0.9163,
+      "step": 28
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 3.3480992317199707,
+      "learning_rate": 2e-05,
+      "loss": 0.4263,
+      "step": 30
+    },
+    {
+      "epoch": 1.28,
+      "grad_norm": 2.767130136489868,
+      "learning_rate": 2e-05,
+      "loss": 0.2811,
+      "step": 32
+    },
+    {
+      "epoch": 1.36,
+      "grad_norm": 5.080353736877441,
+      "learning_rate": 2e-05,
+      "loss": 0.3363,
+      "step": 34
+    },
+    {
+      "epoch": 1.44,
+      "grad_norm": 1.7336921691894531,
+      "learning_rate": 2e-05,
+      "loss": 0.5843,
+      "step": 36
+    },
+    {
+      "epoch": 1.52,
+      "grad_norm": 2.0092973709106445,
+      "learning_rate": 2e-05,
+      "loss": 0.4642,
+      "step": 38
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 3.8756134510040283,
+      "learning_rate": 2e-05,
+      "loss": 0.4807,
+      "step": 40
+    },
+    {
+      "epoch": 1.68,
+      "grad_norm": 2.4722533226013184,
+      "learning_rate": 2e-05,
+      "loss": 0.3807,
+      "step": 42
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 1.6810188293457031,
+      "learning_rate": 2e-05,
+      "loss": 0.3137,
+      "step": 44
+    },
+    {
+      "epoch": 1.84,
+      "grad_norm": 3.1720468997955322,
+      "learning_rate": 2e-05,
+      "loss": 0.4738,
+      "step": 46
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 1.5610110759735107,
+      "learning_rate": 2e-05,
+      "loss": 0.2826,
+      "step": 48
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 2.7629005908966064,
+      "learning_rate": 2e-05,
+      "loss": 0.554,
+      "step": 50
+    },
+    {
+      "epoch": 2.0,
+      "step": 50,
+      "total_flos": 6020445073571840.0,
+      "train_loss": 0.4498666000366211,
+      "train_runtime": 191.3091,
+      "train_samples_per_second": 1.045,
+      "train_steps_per_second": 0.261
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 50,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 6020445073571840.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:04ff9214659409561dfbaa016ea376607a875853b6d1a4c0b5696d32eaa6435b
+size 778341886

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5d40603d57ef7a099d0a49f40d6c03d0849f0e70990ada3d6564746d8a047708
+size 778341886

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f0d185e922421d0f321483a04cf9ab247f33a88bae7fe6478e14d587ec62b615
+size 778341886

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c7676ca765d23fc6a1f0a8ab3064c4017ae1ec8dccfe4446023c38f944052a7f
+size 778341886

client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:35e47f376b87f03095f830e87fa9eb4f84f5a0695a2984230753c25d7ad5a96b
+size 778341034