diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round10.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..98029c6b68a40d7e56a6cd6f621c6a0475570e0d --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fa789cff960ae4ee81fa9ac6a075adcf63e56a3ec6c3ecd5699d4f789f3c213 +size 360880622 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round12.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..da2a8c080d75155e6322c1d86da86a9b6b08d2cb --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:432f8736e99ab8b1e39223df1d48f017f6fd4f4f9643ca359d4b99141e4f4d74 +size 360880622 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round15.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..e5b5d70bd627767deadda7b80f80031225e2974c --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41c8fc312b07e6987f1125962ff398f8861f24936ce222a149189568327cac67 +size 360880622 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round17.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..859f1811f20825130944a49081f9c70f0bf7331a --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:051032624bbe04510f16be6fdc206a54f23b25aa871002ca00fef79182e7c513 +size 360880622 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round2.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..6ce47a67ceb29441bd19346a44e4ab6a86d91d34 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d675c4b87e3db30f58b3688f502ecdfd3b852d0a28b2d067cc72ca763b08cd5a +size 360880106 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round20.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..b00c4e1f9af87c04e2e9a3d3e12477087e4176f3 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:381ce3ba71ec63c166839e10280a43a1e9f31dc4d7fa46bef9e6fa158c302e49 +size 360880622 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round5.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..1c24b7b8ac9b71e8143bb43f775ed25a051eda3d --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01e8733bb21c24aa296509cc99e8f4f9b3259d52e55bf918ee4c9f835985258f +size 360880106 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round7.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..5ef9be35a080e5a0cae4fd64a2b89da9aacb94bf --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89fe3d8b4f7d27dbc7e4251a76c701fd44c4ad0c7fadf149e72d9c82c2e77db3 +size 360880106 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_trainer_state.json b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..729e0266cfb79a8e5fd95ef8eff5b5252786cf19 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_trainer_state.json @@ -0,0 +1,392 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "grad_norm": 9.423497200012207, + "learning_rate": 2e-05, + "loss": 0.9426, + "step": 2 + }, + { + "epoch": 0.04, + "grad_norm": 6.242947101593018, + "learning_rate": 2e-05, + "loss": 0.4941, + "step": 4 + }, + { + "epoch": 0.06, + "grad_norm": 0.23292511701583862, + "learning_rate": 2e-05, + "loss": 0.0161, + "step": 6 + }, + { + "epoch": 0.08, + "grad_norm": 3.2664003372192383, + "learning_rate": 2e-05, + "loss": 0.0807, + "step": 8 + }, + { + "epoch": 0.1, + "grad_norm": 16.083553314208984, + "learning_rate": 2e-05, + "loss": 1.5403, + "step": 10 + }, + { + "epoch": 0.12, + "grad_norm": 13.203125, + "learning_rate": 2e-05, + "loss": 1.3032, + "step": 12 + }, + { + "epoch": 0.14, + "grad_norm": 9.931619644165039, + "learning_rate": 2e-05, + "loss": 0.4531, + "step": 14 + }, + { + "epoch": 0.16, + "grad_norm": 12.197830200195312, + "learning_rate": 2e-05, + "loss": 2.0777, + "step": 16 + }, + { + "epoch": 0.18, + "grad_norm": 13.418411254882812, + "learning_rate": 2e-05, + "loss": 2.5869, + "step": 18 + }, + { + "epoch": 0.2, + "grad_norm": 7.835398197174072, + "learning_rate": 2e-05, + "loss": 1.2992, + "step": 20 + }, + { + "epoch": 0.22, + "grad_norm": 0.8039246201515198, + "learning_rate": 2e-05, + "loss": 0.0811, + "step": 22 + }, + { + "epoch": 0.24, + "grad_norm": 7.014680862426758, + "learning_rate": 2e-05, + "loss": 1.1046, + "step": 24 + }, + { + "epoch": 0.26, + "grad_norm": 8.817255973815918, + "learning_rate": 2e-05, + "loss": 0.8185, + "step": 26 + }, + { + "epoch": 0.28, + "grad_norm": 2.18149733543396, + "learning_rate": 2e-05, + "loss": 0.1982, + "step": 28 + }, + { + "epoch": 0.3, + "grad_norm": 1.396234154701233, + "learning_rate": 2e-05, + "loss": 0.7965, + "step": 30 + }, + { + "epoch": 0.32, + "grad_norm": 9.15268325805664, + "learning_rate": 2e-05, + "loss": 0.5244, + "step": 32 + }, + { + "epoch": 0.34, + "grad_norm": 5.599765777587891, + "learning_rate": 2e-05, + "loss": 0.686, + "step": 34 + }, + { + "epoch": 0.36, + "grad_norm": 11.052386283874512, + "learning_rate": 2e-05, + "loss": 1.5529, + "step": 36 + }, + { + "epoch": 0.38, + "grad_norm": 2.8186702728271484, + "learning_rate": 2e-05, + "loss": 0.6692, + "step": 38 + }, + { + "epoch": 0.4, + "grad_norm": 4.434682846069336, + "learning_rate": 2e-05, + "loss": 0.5903, + "step": 40 + }, + { + "epoch": 0.42, + "grad_norm": 4.227272033691406, + "learning_rate": 2e-05, + "loss": 0.2824, + "step": 42 + }, + { + "epoch": 0.44, + "grad_norm": 5.939194679260254, + "learning_rate": 2e-05, + "loss": 1.3968, + "step": 44 + }, + { + "epoch": 0.46, + "grad_norm": 0.5867003798484802, + "learning_rate": 2e-05, + "loss": 0.393, + "step": 46 + }, + { + "epoch": 0.48, + "grad_norm": 6.319277763366699, + "learning_rate": 2e-05, + "loss": 0.6232, + "step": 48 + }, + { + "epoch": 0.5, + "grad_norm": 2.46071195602417, + "learning_rate": 2e-05, + "loss": 0.2416, + "step": 50 + }, + { + "epoch": 0.52, + "grad_norm": 3.9734086990356445, + "learning_rate": 2e-05, + "loss": 0.6859, + "step": 52 + }, + { + "epoch": 0.54, + "grad_norm": 1.4034255743026733, + "learning_rate": 2e-05, + "loss": 0.5754, + "step": 54 + }, + { + "epoch": 0.56, + "grad_norm": 1.3656154870986938, + "learning_rate": 2e-05, + "loss": 0.1789, + "step": 56 + }, + { + "epoch": 0.58, + "grad_norm": 4.575096130371094, + "learning_rate": 2e-05, + "loss": 2.5298, + "step": 58 + }, + { + "epoch": 0.6, + "grad_norm": 0.9366658329963684, + "learning_rate": 2e-05, + "loss": 0.1539, + "step": 60 + }, + { + "epoch": 0.62, + "grad_norm": 1.2089002132415771, + "learning_rate": 2e-05, + "loss": 0.5569, + "step": 62 + }, + { + "epoch": 0.64, + "grad_norm": 2.383746862411499, + "learning_rate": 2e-05, + "loss": 0.3214, + "step": 64 + }, + { + "epoch": 0.66, + "grad_norm": 0.41840168833732605, + "learning_rate": 2e-05, + "loss": 0.2939, + "step": 66 + }, + { + "epoch": 0.68, + "grad_norm": 2.619239330291748, + "learning_rate": 2e-05, + "loss": 0.6163, + "step": 68 + }, + { + "epoch": 0.7, + "grad_norm": 3.0113186836242676, + "learning_rate": 2e-05, + "loss": 0.5063, + "step": 70 + }, + { + "epoch": 0.72, + "grad_norm": 1.8169399499893188, + "learning_rate": 2e-05, + "loss": 0.3002, + "step": 72 + }, + { + "epoch": 0.74, + "grad_norm": 4.200991630554199, + "learning_rate": 2e-05, + "loss": 1.5329, + "step": 74 + }, + { + "epoch": 0.76, + "grad_norm": 1.631651520729065, + "learning_rate": 2e-05, + "loss": 0.6569, + "step": 76 + }, + { + "epoch": 0.78, + "grad_norm": 0.40286803245544434, + "learning_rate": 2e-05, + "loss": 1.0856, + "step": 78 + }, + { + "epoch": 0.8, + "grad_norm": 2.7753989696502686, + "learning_rate": 2e-05, + "loss": 0.9605, + "step": 80 + }, + { + "epoch": 0.82, + "grad_norm": 0.5652052164077759, + "learning_rate": 2e-05, + "loss": 0.3553, + "step": 82 + }, + { + "epoch": 0.84, + "grad_norm": 5.572319984436035, + "learning_rate": 2e-05, + "loss": 1.0, + "step": 84 + }, + { + "epoch": 0.86, + "grad_norm": 0.4662155210971832, + "learning_rate": 2e-05, + "loss": 0.1469, + "step": 86 + }, + { + "epoch": 0.88, + "grad_norm": 2.5111005306243896, + "learning_rate": 2e-05, + "loss": 0.9844, + "step": 88 + }, + { + "epoch": 0.9, + "grad_norm": 2.1454522609710693, + "learning_rate": 2e-05, + "loss": 0.3873, + "step": 90 + }, + { + "epoch": 0.92, + "grad_norm": 1.0339059829711914, + "learning_rate": 2e-05, + "loss": 1.2139, + "step": 92 + }, + { + "epoch": 0.94, + "grad_norm": 4.232385158538818, + "learning_rate": 2e-05, + "loss": 2.1749, + "step": 94 + }, + { + "epoch": 0.96, + "grad_norm": 1.8107025623321533, + "learning_rate": 2e-05, + "loss": 1.4807, + "step": 96 + }, + { + "epoch": 0.98, + "grad_norm": 4.099456787109375, + "learning_rate": 2e-05, + "loss": 1.5927, + "step": 98 + }, + { + "epoch": 1.0, + "grad_norm": 0.4381089210510254, + "learning_rate": 2e-05, + "loss": 0.2756, + "step": 100 + }, + { + "epoch": 1.0, + "step": 100, + "total_flos": 2185214759534592.0, + "train_loss": 0.8263820886611939, + "train_runtime": 70.7407, + "train_samples_per_second": 5.654, + "train_steps_per_second": 1.414 + } + ], + "logging_steps": 2, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2185214759534592.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round10.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..950f4affb232fdce12a68bf978d70e2a68f10994 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ca00ad56c9180596acb64d02ed1c2c93b5167313f4ddacae17fe6151ab9c155 +size 360880622 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round12.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..ca45c768b5d00f5a854442a17df48b76da338c0a --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5292459abad60fd048050fb5779edeaa54e68fd64f6d8f672e54415ffd378568 +size 360880622 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round15.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..a199b5e32a5713dddde1f9ac1fc688aecc104975 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c51878f3c0c54cdc54eb26bf35a2757735003b92f7da1689081ec193e89b87b +size 360880622 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round17.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..f4fde7107a633a74720faac4e487814b8f68b607 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95a9f87437d27a76a6cac22a0d23d1b90a201e43d7c601b4f2178ea763b5880d +size 360880622 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round2.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..7704541746d7d8731a76427b7d12b85e42da0194 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d126b766b20e0cad09d67d67cb29db4c25a011f24be748b251d8b5cf6288b90a +size 360880106 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round20.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..5425b60fe66a997284f33ff9096efc3a3aa0c93c --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0ad85bffca2bf517e6a2294ecc419a949579749fae404a75f6f3de58d5c4be5 +size 360880622 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round5.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..aee22c7209952ca7028548a8e9a405c899d7260b --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e1e47d59fb16d75680a1f95491ce48c1a01605d42bde715afab635cd1fd4fc4 +size 360880106 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round7.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..02a5a84cfc612d83536c01a7d64b37738a0ab758 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8eb669eede9faee34f088d7edbeb9c774a119de291ab6f2ebf3c071d1346dc1 +size 360880106 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_trainer_state.json b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d2e6450f9ef9640b208ea514896b20a608322d3c --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_trainer_state.json @@ -0,0 +1,392 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "grad_norm": 0.06926380842924118, + "learning_rate": 2e-05, + "loss": 0.0983, + "step": 2 + }, + { + "epoch": 0.04, + "grad_norm": 0.14224855601787567, + "learning_rate": 2e-05, + "loss": 0.5132, + "step": 4 + }, + { + "epoch": 0.06, + "grad_norm": 3.1999995708465576, + "learning_rate": 2e-05, + "loss": 0.3034, + "step": 6 + }, + { + "epoch": 0.08, + "grad_norm": 1.366185188293457, + "learning_rate": 2e-05, + "loss": 0.0975, + "step": 8 + }, + { + "epoch": 0.1, + "grad_norm": 0.047930456697940826, + "learning_rate": 2e-05, + "loss": 0.0054, + "step": 10 + }, + { + "epoch": 0.12, + "grad_norm": 2.5389022827148438, + "learning_rate": 2e-05, + "loss": 0.1265, + "step": 12 + }, + { + "epoch": 0.14, + "grad_norm": 4.9087395668029785, + "learning_rate": 2e-05, + "loss": 0.3968, + "step": 14 + }, + { + "epoch": 0.16, + "grad_norm": 0.5400022864341736, + "learning_rate": 2e-05, + "loss": 0.2126, + "step": 16 + }, + { + "epoch": 0.18, + "grad_norm": 0.17520087957382202, + "learning_rate": 2e-05, + "loss": 0.0546, + "step": 18 + }, + { + "epoch": 0.2, + "grad_norm": 0.15257954597473145, + "learning_rate": 2e-05, + "loss": 0.0091, + "step": 20 + }, + { + "epoch": 0.22, + "grad_norm": 0.7264464497566223, + "learning_rate": 2e-05, + "loss": 0.1006, + "step": 22 + }, + { + "epoch": 0.24, + "grad_norm": 0.2980533242225647, + "learning_rate": 2e-05, + "loss": 0.0133, + "step": 24 + }, + { + "epoch": 0.26, + "grad_norm": 0.862314760684967, + "learning_rate": 2e-05, + "loss": 0.0667, + "step": 26 + }, + { + "epoch": 0.28, + "grad_norm": 0.18892519176006317, + "learning_rate": 2e-05, + "loss": 0.0091, + "step": 28 + }, + { + "epoch": 0.3, + "grad_norm": 0.21099655330181122, + "learning_rate": 2e-05, + "loss": 0.0126, + "step": 30 + }, + { + "epoch": 0.32, + "grad_norm": 0.5386524200439453, + "learning_rate": 2e-05, + "loss": 0.0495, + "step": 32 + }, + { + "epoch": 0.34, + "grad_norm": 4.457860946655273, + "learning_rate": 2e-05, + "loss": 0.2528, + "step": 34 + }, + { + "epoch": 0.36, + "grad_norm": 0.006588024087250233, + "learning_rate": 2e-05, + "loss": 0.0005, + "step": 36 + }, + { + "epoch": 0.38, + "grad_norm": 0.25489550828933716, + "learning_rate": 2e-05, + "loss": 0.0122, + "step": 38 + }, + { + "epoch": 0.4, + "grad_norm": 1.0599042177200317, + "learning_rate": 2e-05, + "loss": 0.0402, + "step": 40 + }, + { + "epoch": 0.42, + "grad_norm": 4.65377950668335, + "learning_rate": 2e-05, + "loss": 0.1825, + "step": 42 + }, + { + "epoch": 0.44, + "grad_norm": 5.81795072555542, + "learning_rate": 2e-05, + "loss": 0.5606, + "step": 44 + }, + { + "epoch": 0.46, + "grad_norm": 0.06573299318552017, + "learning_rate": 2e-05, + "loss": 0.0047, + "step": 46 + }, + { + "epoch": 0.48, + "grad_norm": 0.15902456641197205, + "learning_rate": 2e-05, + "loss": 0.0069, + "step": 48 + }, + { + "epoch": 0.5, + "grad_norm": 0.09095041453838348, + "learning_rate": 2e-05, + "loss": 0.0076, + "step": 50 + }, + { + "epoch": 0.52, + "grad_norm": 0.03288710489869118, + "learning_rate": 2e-05, + "loss": 0.1018, + "step": 52 + }, + { + "epoch": 0.54, + "grad_norm": 0.8101238012313843, + "learning_rate": 2e-05, + "loss": 0.0885, + "step": 54 + }, + { + "epoch": 0.56, + "grad_norm": 0.018732983618974686, + "learning_rate": 2e-05, + "loss": 0.0025, + "step": 56 + }, + { + "epoch": 0.58, + "grad_norm": 0.014856848865747452, + "learning_rate": 2e-05, + "loss": 0.177, + "step": 58 + }, + { + "epoch": 0.6, + "grad_norm": 0.006819794420152903, + "learning_rate": 2e-05, + "loss": 0.0792, + "step": 60 + }, + { + "epoch": 0.62, + "grad_norm": 0.015503552742302418, + "learning_rate": 2e-05, + "loss": 0.0177, + "step": 62 + }, + { + "epoch": 0.64, + "grad_norm": 0.02056262083351612, + "learning_rate": 2e-05, + "loss": 0.0141, + "step": 64 + }, + { + "epoch": 0.66, + "grad_norm": 0.32354819774627686, + "learning_rate": 2e-05, + "loss": 0.0082, + "step": 66 + }, + { + "epoch": 0.68, + "grad_norm": 0.28828832507133484, + "learning_rate": 2e-05, + "loss": 0.0255, + "step": 68 + }, + { + "epoch": 0.7, + "grad_norm": 0.011394195258617401, + "learning_rate": 2e-05, + "loss": 0.002, + "step": 70 + }, + { + "epoch": 0.72, + "grad_norm": 0.08238676935434341, + "learning_rate": 2e-05, + "loss": 0.004, + "step": 72 + }, + { + "epoch": 0.74, + "grad_norm": 1.0231951475143433, + "learning_rate": 2e-05, + "loss": 0.0829, + "step": 74 + }, + { + "epoch": 0.76, + "grad_norm": 0.0049722520634531975, + "learning_rate": 2e-05, + "loss": 0.0011, + "step": 76 + }, + { + "epoch": 0.78, + "grad_norm": 1.5577210187911987, + "learning_rate": 2e-05, + "loss": 0.1744, + "step": 78 + }, + { + "epoch": 0.8, + "grad_norm": 0.060515161603689194, + "learning_rate": 2e-05, + "loss": 0.0044, + "step": 80 + }, + { + "epoch": 0.82, + "grad_norm": 0.24229663610458374, + "learning_rate": 2e-05, + "loss": 0.0266, + "step": 82 + }, + { + "epoch": 0.84, + "grad_norm": 0.003239632351323962, + "learning_rate": 2e-05, + "loss": 0.0009, + "step": 84 + }, + { + "epoch": 0.86, + "grad_norm": 0.019305258989334106, + "learning_rate": 2e-05, + "loss": 0.5092, + "step": 86 + }, + { + "epoch": 0.88, + "grad_norm": 0.03198835998773575, + "learning_rate": 2e-05, + "loss": 0.0061, + "step": 88 + }, + { + "epoch": 0.9, + "grad_norm": 0.4420487582683563, + "learning_rate": 2e-05, + "loss": 0.0412, + "step": 90 + }, + { + "epoch": 0.92, + "grad_norm": 8.323692321777344, + "learning_rate": 2e-05, + "loss": 0.6126, + "step": 92 + }, + { + "epoch": 0.94, + "grad_norm": 0.013138143345713615, + "learning_rate": 2e-05, + "loss": 0.0034, + "step": 94 + }, + { + "epoch": 0.96, + "grad_norm": 0.41053083539009094, + "learning_rate": 2e-05, + "loss": 0.0296, + "step": 96 + }, + { + "epoch": 0.98, + "grad_norm": 0.00738116167485714, + "learning_rate": 2e-05, + "loss": 0.0008, + "step": 98 + }, + { + "epoch": 1.0, + "grad_norm": 0.04163911193609238, + "learning_rate": 2e-05, + "loss": 0.0028, + "step": 100 + }, + { + "epoch": 1.0, + "step": 100, + "total_flos": 2202644449853440.0, + "train_loss": 0.10307437002658844, + "train_runtime": 69.5445, + "train_samples_per_second": 5.752, + "train_steps_per_second": 1.438 + } + ], + "logging_steps": 2, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2202644449853440.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round10.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..10269639f15a0f55481f14c2c9f6185573559287 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:475ad4672ec3aaec5117fffb04c560d3ac5d8223d0494c2bc0036bf8aeeb4787 +size 778341886 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round12.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..af017954e28b5fe80013fa5546b3b12000ad768f --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2073eda13fa87d39360d6fd49065146220dc0190e1004c319eb392aee6a48546 +size 778341886 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round15.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..b37ec1c70b4515e2ba3990c2ba0c63dc67ae229f --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd0388c3619655d1858ead432f4146aae463cdfad671f284a6b5fdff3e9e119e +size 778341886 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round17.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd3879010f4177dd3a1cace678d19aa65210875f --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57bc54c8c645bd8a7a7d98994d2b745350a66ffc36b63c1b3485be984ab5a62c +size 778341886 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round2.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..ad9b9367eaab27f48be2a7e06929d96877ac42ea --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53ef0ae39d317134427d6d59afb8c58f3a573b7b4c9b00a837033c99411b1a27 +size 778341034 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round20.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..d8bb6056478e2148099069dce6ab8cbe3e9f080c --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:333195ee6334bf482e3581bf003c5a1b2bdfbda17a5bce80a1c5e88263b3d4a6 +size 778341886 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round5.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..be1d9bbac15fe5b223384d5df3c78154c7d5ed05 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:309ab816121588cffe22b87774005c82c50e3940c8226ade89e6c6edc2ed71f9 +size 778341034 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round7.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..7299ba6265f74e7dce77ddc2cc8ac200c0ae0105 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4706bf42b3020bbe7065ab079de8559f0a6a271f8b90c7dc1a72ba00aaed5e7e +size 778341034 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_trainer_state.json b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f0bffb1b868acc9d7fd967bca15a64dfb3b13cb2 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_trainer_state.json @@ -0,0 +1,392 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "grad_norm": 0.42672601342201233, + "learning_rate": 2e-05, + "loss": 0.1273, + "step": 2 + }, + { + "epoch": 0.04, + "grad_norm": 3.3164169788360596, + "learning_rate": 2e-05, + "loss": 1.0253, + "step": 4 + }, + { + "epoch": 0.06, + "grad_norm": 1.0701144933700562, + "learning_rate": 2e-05, + "loss": 1.1081, + "step": 6 + }, + { + "epoch": 0.08, + "grad_norm": 3.5641934871673584, + "learning_rate": 2e-05, + "loss": 0.917, + "step": 8 + }, + { + "epoch": 0.1, + "grad_norm": 2.482414960861206, + "learning_rate": 2e-05, + "loss": 0.4307, + "step": 10 + }, + { + "epoch": 0.12, + "grad_norm": 3.699124336242676, + "learning_rate": 2e-05, + "loss": 1.2775, + "step": 12 + }, + { + "epoch": 0.14, + "grad_norm": 0.19477766752243042, + "learning_rate": 2e-05, + "loss": 0.2769, + "step": 14 + }, + { + "epoch": 0.16, + "grad_norm": 0.49098673462867737, + "learning_rate": 2e-05, + "loss": 0.227, + "step": 16 + }, + { + "epoch": 0.18, + "grad_norm": 4.419643402099609, + "learning_rate": 2e-05, + "loss": 0.6207, + "step": 18 + }, + { + "epoch": 0.2, + "grad_norm": 2.067509412765503, + "learning_rate": 2e-05, + "loss": 1.6495, + "step": 20 + }, + { + "epoch": 0.22, + "grad_norm": 0.7661958336830139, + "learning_rate": 2e-05, + "loss": 0.1506, + "step": 22 + }, + { + "epoch": 0.24, + "grad_norm": 4.098026752471924, + "learning_rate": 2e-05, + "loss": 0.6787, + "step": 24 + }, + { + "epoch": 0.26, + "grad_norm": 3.8598406314849854, + "learning_rate": 2e-05, + "loss": 1.2822, + "step": 26 + }, + { + "epoch": 0.28, + "grad_norm": 2.6059610843658447, + "learning_rate": 2e-05, + "loss": 0.4346, + "step": 28 + }, + { + "epoch": 0.3, + "grad_norm": 0.4412726163864136, + "learning_rate": 2e-05, + "loss": 0.2262, + "step": 30 + }, + { + "epoch": 0.32, + "grad_norm": 1.919790506362915, + "learning_rate": 2e-05, + "loss": 0.3238, + "step": 32 + }, + { + "epoch": 0.34, + "grad_norm": 2.1158552169799805, + "learning_rate": 2e-05, + "loss": 0.7178, + "step": 34 + }, + { + "epoch": 0.36, + "grad_norm": 1.9022879600524902, + "learning_rate": 2e-05, + "loss": 0.2602, + "step": 36 + }, + { + "epoch": 0.38, + "grad_norm": 1.8349590301513672, + "learning_rate": 2e-05, + "loss": 0.679, + "step": 38 + }, + { + "epoch": 0.4, + "grad_norm": 5.065424919128418, + "learning_rate": 2e-05, + "loss": 1.0498, + "step": 40 + }, + { + "epoch": 0.42, + "grad_norm": 2.6241636276245117, + "learning_rate": 2e-05, + "loss": 0.824, + "step": 42 + }, + { + "epoch": 0.44, + "grad_norm": 1.375793218612671, + "learning_rate": 2e-05, + "loss": 0.1406, + "step": 44 + }, + { + "epoch": 0.46, + "grad_norm": 4.631248950958252, + "learning_rate": 2e-05, + "loss": 0.9059, + "step": 46 + }, + { + "epoch": 0.48, + "grad_norm": 0.9230762124061584, + "learning_rate": 2e-05, + "loss": 0.3381, + "step": 48 + }, + { + "epoch": 0.5, + "grad_norm": 3.146935224533081, + "learning_rate": 2e-05, + "loss": 0.3658, + "step": 50 + }, + { + "epoch": 0.52, + "grad_norm": 1.643314242362976, + "learning_rate": 2e-05, + "loss": 0.4425, + "step": 52 + }, + { + "epoch": 0.54, + "grad_norm": 1.577388048171997, + "learning_rate": 2e-05, + "loss": 0.6525, + "step": 54 + }, + { + "epoch": 0.56, + "grad_norm": 1.5418776273727417, + "learning_rate": 2e-05, + "loss": 0.545, + "step": 56 + }, + { + "epoch": 0.58, + "grad_norm": 1.3298715353012085, + "learning_rate": 2e-05, + "loss": 0.2741, + "step": 58 + }, + { + "epoch": 0.6, + "grad_norm": 0.20194341242313385, + "learning_rate": 2e-05, + "loss": 0.5407, + "step": 60 + }, + { + "epoch": 0.62, + "grad_norm": 1.6116507053375244, + "learning_rate": 2e-05, + "loss": 0.6341, + "step": 62 + }, + { + "epoch": 0.64, + "grad_norm": 1.1075984239578247, + "learning_rate": 2e-05, + "loss": 0.5064, + "step": 64 + }, + { + "epoch": 0.66, + "grad_norm": 0.6221591234207153, + "learning_rate": 2e-05, + "loss": 0.2389, + "step": 66 + }, + { + "epoch": 0.68, + "grad_norm": 1.730831503868103, + "learning_rate": 2e-05, + "loss": 0.353, + "step": 68 + }, + { + "epoch": 0.7, + "grad_norm": 1.8647536039352417, + "learning_rate": 2e-05, + "loss": 0.9568, + "step": 70 + }, + { + "epoch": 0.72, + "grad_norm": 0.6666922569274902, + "learning_rate": 2e-05, + "loss": 0.9053, + "step": 72 + }, + { + "epoch": 0.74, + "grad_norm": 0.4998483657836914, + "learning_rate": 2e-05, + "loss": 0.0881, + "step": 74 + }, + { + "epoch": 0.76, + "grad_norm": 0.8337864279747009, + "learning_rate": 2e-05, + "loss": 0.3308, + "step": 76 + }, + { + "epoch": 0.78, + "grad_norm": 0.7664095759391785, + "learning_rate": 2e-05, + "loss": 0.144, + "step": 78 + }, + { + "epoch": 0.8, + "grad_norm": 3.212735176086426, + "learning_rate": 2e-05, + "loss": 0.9583, + "step": 80 + }, + { + "epoch": 0.82, + "grad_norm": 4.194940567016602, + "learning_rate": 2e-05, + "loss": 2.3249, + "step": 82 + }, + { + "epoch": 0.84, + "grad_norm": 0.21294273436069489, + "learning_rate": 2e-05, + "loss": 0.0391, + "step": 84 + }, + { + "epoch": 0.86, + "grad_norm": 1.9137557744979858, + "learning_rate": 2e-05, + "loss": 0.6782, + "step": 86 + }, + { + "epoch": 0.88, + "grad_norm": 1.2017444372177124, + "learning_rate": 2e-05, + "loss": 0.5466, + "step": 88 + }, + { + "epoch": 0.9, + "grad_norm": 3.704470157623291, + "learning_rate": 2e-05, + "loss": 1.9585, + "step": 90 + }, + { + "epoch": 0.92, + "grad_norm": 0.09351445734500885, + "learning_rate": 2e-05, + "loss": 0.1403, + "step": 92 + }, + { + "epoch": 0.94, + "grad_norm": 1.1362736225128174, + "learning_rate": 2e-05, + "loss": 0.4254, + "step": 94 + }, + { + "epoch": 0.96, + "grad_norm": 0.6755289435386658, + "learning_rate": 2e-05, + "loss": 0.2894, + "step": 96 + }, + { + "epoch": 0.98, + "grad_norm": 3.5850534439086914, + "learning_rate": 2e-05, + "loss": 1.7788, + "step": 98 + }, + { + "epoch": 1.0, + "grad_norm": 0.5499467849731445, + "learning_rate": 2e-05, + "loss": 0.16, + "step": 100 + }, + { + "epoch": 1.0, + "step": 100, + "total_flos": 5196654105853952.0, + "train_loss": 0.6389807415008545, + "train_runtime": 132.4801, + "train_samples_per_second": 3.019, + "train_steps_per_second": 0.755 + } + ], + "logging_steps": 2, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5196654105853952.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round10.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..e13ae3cd894c66790fa0ca0ab617307f10d786ab --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3137dc1711718282da550f4845cd36baab7c46bbf11b76105eef0fead3e874c1 +size 360880622 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round12.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..f6a024a75605676d2d53a31918c0598ab68291d3 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:defed3f7caba8d08a8444293265279252bac754f91960c09be90e83e3f42290f +size 360880622 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round15.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..c5a4b34f9532e0b091ad639160f14e609120da97 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10e379d1ddbbac4869c911e4aa2261e7e062f1759e7a20a5e7ffe738d3b4ece2 +size 360880622 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round17.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..08b784797d69ff9eef2755deb95a74f9ad8fd204 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:611b55b9aafd1b4e11c27bcf80a22e33756eef1bb1c1de102cf91676dd074463 +size 360880622 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round2.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd390d04025d28c5f70bec049e4d29bf5444acb3 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5576200561a80efa43148ce2e7831d31afb0e4696dd074723a8f63eea37962a3 +size 360880106 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round20.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..0580d002b11a21b028e2a79dd50f6ad1546388e9 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5b02587ad1dd9f38df651a6f97486fbd2a5654af5e637c8fcaba6350c37ba07 +size 360880622 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round5.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..ee259a500592640c6587ec6cde6913decbabc080 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:594714875d0223582d0341af340f41258ef6b01e6633ed2f1b31835f1fbcb28f +size 360880106 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round7.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..d814258fcdfef3ef2b6aefde68259e6e83f7d3de --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9acc6f735922a0d67535d40544015c6d4cd1d2d4a9d1702e6bd5541a29358dce +size 360880106 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_trainer_state.json b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2d5365ec073d2fe1c9cc953f4e62cb8b353fded2 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_trainer_state.json @@ -0,0 +1,392 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "grad_norm": 11.701602935791016, + "learning_rate": 2e-05, + "loss": 3.2041, + "step": 2 + }, + { + "epoch": 0.04, + "grad_norm": 1.8877856731414795, + "learning_rate": 2e-05, + "loss": 0.8482, + "step": 4 + }, + { + "epoch": 0.06, + "grad_norm": 3.9416866302490234, + "learning_rate": 2e-05, + "loss": 0.9832, + "step": 6 + }, + { + "epoch": 0.08, + "grad_norm": 8.076401710510254, + "learning_rate": 2e-05, + "loss": 1.7619, + "step": 8 + }, + { + "epoch": 0.1, + "grad_norm": 9.02495288848877, + "learning_rate": 2e-05, + "loss": 1.1751, + "step": 10 + }, + { + "epoch": 0.12, + "grad_norm": 6.70341682434082, + "learning_rate": 2e-05, + "loss": 1.7742, + "step": 12 + }, + { + "epoch": 0.14, + "grad_norm": 9.935924530029297, + "learning_rate": 2e-05, + "loss": 2.1147, + "step": 14 + }, + { + "epoch": 0.16, + "grad_norm": 6.356525421142578, + "learning_rate": 2e-05, + "loss": 0.8387, + "step": 16 + }, + { + "epoch": 0.18, + "grad_norm": 4.266024112701416, + "learning_rate": 2e-05, + "loss": 0.4111, + "step": 18 + }, + { + "epoch": 0.2, + "grad_norm": 9.628840446472168, + "learning_rate": 2e-05, + "loss": 1.1686, + "step": 20 + }, + { + "epoch": 0.22, + "grad_norm": 6.305365562438965, + "learning_rate": 2e-05, + "loss": 0.5419, + "step": 22 + }, + { + "epoch": 0.24, + "grad_norm": 3.4979939460754395, + "learning_rate": 2e-05, + "loss": 1.4611, + "step": 24 + }, + { + "epoch": 0.26, + "grad_norm": 1.904814600944519, + "learning_rate": 2e-05, + "loss": 0.5461, + "step": 26 + }, + { + "epoch": 0.28, + "grad_norm": 5.557872772216797, + "learning_rate": 2e-05, + "loss": 0.4092, + "step": 28 + }, + { + "epoch": 0.3, + "grad_norm": 3.297398567199707, + "learning_rate": 2e-05, + "loss": 0.5477, + "step": 30 + }, + { + "epoch": 0.32, + "grad_norm": 14.574881553649902, + "learning_rate": 2e-05, + "loss": 1.0394, + "step": 32 + }, + { + "epoch": 0.34, + "grad_norm": 2.348663806915283, + "learning_rate": 2e-05, + "loss": 2.8679, + "step": 34 + }, + { + "epoch": 0.36, + "grad_norm": 8.440045356750488, + "learning_rate": 2e-05, + "loss": 2.2365, + "step": 36 + }, + { + "epoch": 0.38, + "grad_norm": 6.058783531188965, + "learning_rate": 2e-05, + "loss": 0.8523, + "step": 38 + }, + { + "epoch": 0.4, + "grad_norm": 5.3567728996276855, + "learning_rate": 2e-05, + "loss": 1.1006, + "step": 40 + }, + { + "epoch": 0.42, + "grad_norm": 15.160209655761719, + "learning_rate": 2e-05, + "loss": 1.5179, + "step": 42 + }, + { + "epoch": 0.44, + "grad_norm": 10.718804359436035, + "learning_rate": 2e-05, + "loss": 1.9688, + "step": 44 + }, + { + "epoch": 0.46, + "grad_norm": 3.9706368446350098, + "learning_rate": 2e-05, + "loss": 0.554, + "step": 46 + }, + { + "epoch": 0.48, + "grad_norm": 4.072225093841553, + "learning_rate": 2e-05, + "loss": 1.1259, + "step": 48 + }, + { + "epoch": 0.5, + "grad_norm": 4.832661151885986, + "learning_rate": 2e-05, + "loss": 1.2787, + "step": 50 + }, + { + "epoch": 0.52, + "grad_norm": 4.9963531494140625, + "learning_rate": 2e-05, + "loss": 2.5343, + "step": 52 + }, + { + "epoch": 0.54, + "grad_norm": 0.30591192841529846, + "learning_rate": 2e-05, + "loss": 0.7795, + "step": 54 + }, + { + "epoch": 0.56, + "grad_norm": 3.4619483947753906, + "learning_rate": 2e-05, + "loss": 0.957, + "step": 56 + }, + { + "epoch": 0.58, + "grad_norm": 1.5531309843063354, + "learning_rate": 2e-05, + "loss": 0.753, + "step": 58 + }, + { + "epoch": 0.6, + "grad_norm": 3.0114293098449707, + "learning_rate": 2e-05, + "loss": 1.3273, + "step": 60 + }, + { + "epoch": 0.62, + "grad_norm": 2.4710679054260254, + "learning_rate": 2e-05, + "loss": 0.9157, + "step": 62 + }, + { + "epoch": 0.64, + "grad_norm": 2.4386227130889893, + "learning_rate": 2e-05, + "loss": 1.0244, + "step": 64 + }, + { + "epoch": 0.66, + "grad_norm": 2.135855197906494, + "learning_rate": 2e-05, + "loss": 0.8826, + "step": 66 + }, + { + "epoch": 0.68, + "grad_norm": 2.907819986343384, + "learning_rate": 2e-05, + "loss": 1.173, + "step": 68 + }, + { + "epoch": 0.7, + "grad_norm": 2.032712459564209, + "learning_rate": 2e-05, + "loss": 0.9032, + "step": 70 + }, + { + "epoch": 0.72, + "grad_norm": 2.388005495071411, + "learning_rate": 2e-05, + "loss": 1.9712, + "step": 72 + }, + { + "epoch": 0.74, + "grad_norm": 3.9849488735198975, + "learning_rate": 2e-05, + "loss": 1.2745, + "step": 74 + }, + { + "epoch": 0.76, + "grad_norm": 7.177330493927002, + "learning_rate": 2e-05, + "loss": 1.5408, + "step": 76 + }, + { + "epoch": 0.78, + "grad_norm": 1.1500422954559326, + "learning_rate": 2e-05, + "loss": 0.589, + "step": 78 + }, + { + "epoch": 0.8, + "grad_norm": 3.8292155265808105, + "learning_rate": 2e-05, + "loss": 0.8088, + "step": 80 + }, + { + "epoch": 0.82, + "grad_norm": 0.9888606667518616, + "learning_rate": 2e-05, + "loss": 0.1827, + "step": 82 + }, + { + "epoch": 0.84, + "grad_norm": 2.494753122329712, + "learning_rate": 2e-05, + "loss": 1.0098, + "step": 84 + }, + { + "epoch": 0.86, + "grad_norm": 1.3061927556991577, + "learning_rate": 2e-05, + "loss": 1.1736, + "step": 86 + }, + { + "epoch": 0.88, + "grad_norm": 1.5556581020355225, + "learning_rate": 2e-05, + "loss": 0.6252, + "step": 88 + }, + { + "epoch": 0.9, + "grad_norm": 1.5772370100021362, + "learning_rate": 2e-05, + "loss": 0.7691, + "step": 90 + }, + { + "epoch": 0.92, + "grad_norm": 4.533968448638916, + "learning_rate": 2e-05, + "loss": 0.5719, + "step": 92 + }, + { + "epoch": 0.94, + "grad_norm": 2.4935195446014404, + "learning_rate": 2e-05, + "loss": 0.7637, + "step": 94 + }, + { + "epoch": 0.96, + "grad_norm": 2.4782304763793945, + "learning_rate": 2e-05, + "loss": 0.3888, + "step": 96 + }, + { + "epoch": 0.98, + "grad_norm": 2.847276210784912, + "learning_rate": 2e-05, + "loss": 1.1708, + "step": 98 + }, + { + "epoch": 1.0, + "grad_norm": 3.366889715194702, + "learning_rate": 2e-05, + "loss": 1.3215, + "step": 100 + }, + { + "epoch": 1.0, + "step": 100, + "total_flos": 2232466265866240.0, + "train_loss": 1.15478422164917, + "train_runtime": 72.3437, + "train_samples_per_second": 5.529, + "train_steps_per_second": 1.382 + } + ], + "logging_steps": 2, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2232466265866240.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round10.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..430ef2fd9d6d3a5064361937238b67fcbbdf2bec --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93f65beb7bbd25136510c6187fc11e9ca55eaedcabba3b14033692495c02899c +size 778341886 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round12.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..2b14628aa3324e6b12bffae404cef73adbac7259 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaeba29721fb62a98ac7f919a031d53e300c961e5a7541ddce722693348166e5 +size 778341886 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round15.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..f9536816dccfa1cb22a022f2e98a733ad57ba63d --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec7698a9fcb5af2885504255c07c54acce127c28f1a81ddf7f9850673e0719ef +size 778341886 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round17.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..6fe215b5ecc9587b200230dc658c37ac59a3367f --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d090f3e1e20b1615cc8945e72b93472544277b7189ef83362446f27ba8a0c30 +size 778341886 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round2.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..3fb6da24fef2527ef78ad5a4cd39b2e16defbc75 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591958c16e67ee2f5bf6f1fd0f51c4455c7585e8e45468af114daff5756e3268 +size 778341034 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round20.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..0015bcca4754f313b1a981d35af34db48734ab2f --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f300b973c4d938700cfca9fae9a52cf471ee38d054e0060f6d88b1dc551cfa20 +size 778341886 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round5.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..f5e1b6981b1db066bea59153dcbc448e9aff4d90 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1db0a6df166d5e9f5df8f18244a5ccc530772bebf1d2da9d366e863e05f2618d +size 778341034 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round7.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c05fe9bd1aa6a92f62f35c17aaf2425d47a6218 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8104378821d6474f3a6c351716ffb4f1db29be362a71d37564a8be300b60419 +size 778341034 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_trainer_state.json b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f6b9fa912e89a99d2582bf658f35022f819271a1 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_trainer_state.json @@ -0,0 +1,392 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "grad_norm": 0.5440601110458374, + "learning_rate": 2e-05, + "loss": 0.3859, + "step": 2 + }, + { + "epoch": 0.04, + "grad_norm": 2.486060380935669, + "learning_rate": 2e-05, + "loss": 0.8744, + "step": 4 + }, + { + "epoch": 0.06, + "grad_norm": 1.5172414779663086, + "learning_rate": 2e-05, + "loss": 1.0364, + "step": 6 + }, + { + "epoch": 0.08, + "grad_norm": 1.87302565574646, + "learning_rate": 2e-05, + "loss": 0.6833, + "step": 8 + }, + { + "epoch": 0.1, + "grad_norm": 1.2508922815322876, + "learning_rate": 2e-05, + "loss": 0.5988, + "step": 10 + }, + { + "epoch": 0.12, + "grad_norm": 2.049877643585205, + "learning_rate": 2e-05, + "loss": 0.2546, + "step": 12 + }, + { + "epoch": 0.14, + "grad_norm": 3.000075101852417, + "learning_rate": 2e-05, + "loss": 1.0645, + "step": 14 + }, + { + "epoch": 0.16, + "grad_norm": 4.159180641174316, + "learning_rate": 2e-05, + "loss": 1.0189, + "step": 16 + }, + { + "epoch": 0.18, + "grad_norm": 1.9412723779678345, + "learning_rate": 2e-05, + "loss": 0.5621, + "step": 18 + }, + { + "epoch": 0.2, + "grad_norm": 2.991361141204834, + "learning_rate": 2e-05, + "loss": 0.406, + "step": 20 + }, + { + "epoch": 0.22, + "grad_norm": 1.989998698234558, + "learning_rate": 2e-05, + "loss": 2.1827, + "step": 22 + }, + { + "epoch": 0.24, + "grad_norm": 1.5404460430145264, + "learning_rate": 2e-05, + "loss": 0.9485, + "step": 24 + }, + { + "epoch": 0.26, + "grad_norm": 1.243625521659851, + "learning_rate": 2e-05, + "loss": 0.4014, + "step": 26 + }, + { + "epoch": 0.28, + "grad_norm": 2.017669200897217, + "learning_rate": 2e-05, + "loss": 1.1306, + "step": 28 + }, + { + "epoch": 0.3, + "grad_norm": 2.773820638656616, + "learning_rate": 2e-05, + "loss": 0.9518, + "step": 30 + }, + { + "epoch": 0.32, + "grad_norm": 3.3499462604522705, + "learning_rate": 2e-05, + "loss": 0.8406, + "step": 32 + }, + { + "epoch": 0.34, + "grad_norm": 1.1517057418823242, + "learning_rate": 2e-05, + "loss": 0.5119, + "step": 34 + }, + { + "epoch": 0.36, + "grad_norm": 3.779822826385498, + "learning_rate": 2e-05, + "loss": 1.2535, + "step": 36 + }, + { + "epoch": 0.38, + "grad_norm": 1.4071242809295654, + "learning_rate": 2e-05, + "loss": 0.6125, + "step": 38 + }, + { + "epoch": 0.4, + "grad_norm": 2.478247880935669, + "learning_rate": 2e-05, + "loss": 1.1099, + "step": 40 + }, + { + "epoch": 0.42, + "grad_norm": 2.338315725326538, + "learning_rate": 2e-05, + "loss": 1.0911, + "step": 42 + }, + { + "epoch": 0.44, + "grad_norm": 1.6824209690093994, + "learning_rate": 2e-05, + "loss": 0.7209, + "step": 44 + }, + { + "epoch": 0.46, + "grad_norm": 3.2349324226379395, + "learning_rate": 2e-05, + "loss": 0.9425, + "step": 46 + }, + { + "epoch": 0.48, + "grad_norm": 2.572331666946411, + "learning_rate": 2e-05, + "loss": 0.6688, + "step": 48 + }, + { + "epoch": 0.5, + "grad_norm": 2.569999933242798, + "learning_rate": 2e-05, + "loss": 0.6548, + "step": 50 + }, + { + "epoch": 0.52, + "grad_norm": 0.5937522053718567, + "learning_rate": 2e-05, + "loss": 1.0816, + "step": 52 + }, + { + "epoch": 0.54, + "grad_norm": 0.9653764367103577, + "learning_rate": 2e-05, + "loss": 0.4465, + "step": 54 + }, + { + "epoch": 0.56, + "grad_norm": 0.6579816937446594, + "learning_rate": 2e-05, + "loss": 0.2058, + "step": 56 + }, + { + "epoch": 0.58, + "grad_norm": 1.3638180494308472, + "learning_rate": 2e-05, + "loss": 0.7284, + "step": 58 + }, + { + "epoch": 0.6, + "grad_norm": 1.4289367198944092, + "learning_rate": 2e-05, + "loss": 0.8351, + "step": 60 + }, + { + "epoch": 0.62, + "grad_norm": 0.7946614027023315, + "learning_rate": 2e-05, + "loss": 0.6298, + "step": 62 + }, + { + "epoch": 0.64, + "grad_norm": 1.661765694618225, + "learning_rate": 2e-05, + "loss": 0.8354, + "step": 64 + }, + { + "epoch": 0.66, + "grad_norm": 2.3677399158477783, + "learning_rate": 2e-05, + "loss": 1.0779, + "step": 66 + }, + { + "epoch": 0.68, + "grad_norm": 0.8689314723014832, + "learning_rate": 2e-05, + "loss": 0.532, + "step": 68 + }, + { + "epoch": 0.7, + "grad_norm": 1.7545336484909058, + "learning_rate": 2e-05, + "loss": 0.8644, + "step": 70 + }, + { + "epoch": 0.72, + "grad_norm": 1.1046079397201538, + "learning_rate": 2e-05, + "loss": 0.4699, + "step": 72 + }, + { + "epoch": 0.74, + "grad_norm": 0.21095605194568634, + "learning_rate": 2e-05, + "loss": 0.593, + "step": 74 + }, + { + "epoch": 0.76, + "grad_norm": 1.4656115770339966, + "learning_rate": 2e-05, + "loss": 1.3938, + "step": 76 + }, + { + "epoch": 0.78, + "grad_norm": 1.224704384803772, + "learning_rate": 2e-05, + "loss": 0.7324, + "step": 78 + }, + { + "epoch": 0.8, + "grad_norm": 1.486828088760376, + "learning_rate": 2e-05, + "loss": 0.5396, + "step": 80 + }, + { + "epoch": 0.82, + "grad_norm": 0.9084728956222534, + "learning_rate": 2e-05, + "loss": 0.554, + "step": 82 + }, + { + "epoch": 0.84, + "grad_norm": 0.9191360473632812, + "learning_rate": 2e-05, + "loss": 0.6183, + "step": 84 + }, + { + "epoch": 0.86, + "grad_norm": 0.7869934439659119, + "learning_rate": 2e-05, + "loss": 0.3396, + "step": 86 + }, + { + "epoch": 0.88, + "grad_norm": 0.42631539702415466, + "learning_rate": 2e-05, + "loss": 0.401, + "step": 88 + }, + { + "epoch": 0.9, + "grad_norm": 1.4716026782989502, + "learning_rate": 2e-05, + "loss": 0.8724, + "step": 90 + }, + { + "epoch": 0.92, + "grad_norm": 1.6058681011199951, + "learning_rate": 2e-05, + "loss": 0.5681, + "step": 92 + }, + { + "epoch": 0.94, + "grad_norm": 1.3189148902893066, + "learning_rate": 2e-05, + "loss": 0.7323, + "step": 94 + }, + { + "epoch": 0.96, + "grad_norm": 0.9830902814865112, + "learning_rate": 2e-05, + "loss": 0.6886, + "step": 96 + }, + { + "epoch": 0.98, + "grad_norm": 0.8136429190635681, + "learning_rate": 2e-05, + "loss": 0.2411, + "step": 98 + }, + { + "epoch": 1.0, + "grad_norm": 3.1793293952941895, + "learning_rate": 2e-05, + "loss": 1.0366, + "step": 100 + }, + { + "epoch": 1.0, + "step": 100, + "total_flos": 6021565481222144.0, + "train_loss": 0.7584884357452393, + "train_runtime": 150.4749, + "train_samples_per_second": 2.658, + "train_steps_per_second": 0.665 + } + ], + "logging_steps": 2, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6021565481222144.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round10.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..b071a4ac8295ecb71a2097d792d61a9582e1afaa --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b224a628b3734d5758dbf6796aed6dd04a1b1d0d81b99063131a5f87f032729 +size 778341886 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round12.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..fb9f8b9ae4118163b075beaa01ce5009a4dac809 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa0b171c9c908292b73b092761beef17fde357b8526a9689a4c1a431bef82d5b +size 778341886 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round15.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..fe170f73455de51ff04dc72665cffad8bc4a1f9f --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef71ba45a93d892d931909dd81bac80459da02b0bf561f2c594f640c6c01992a +size 778341886 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round17.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..147130c1ea84857559c9663da742fd9ff86d5f0b --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78b990fbebb884361e12c185520abbd6ce9ae62e5e4f4903c4863f11b8a265cc +size 778341886 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round2.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..e21c9d2022bb48ddf10a5ba443fbcda05890439f --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc9b6db8edd59dbbd02b347056aa744f332dd711a32e98dd04fd3259d4cf4c56 +size 778341034 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round20.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..da88ad8d3ca1831bed2deb7e5242b1cb312e02a9 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54f05d80db8399a4a1250713230b29541914dcb088b5446d959b93ada40c7d47 +size 778341886 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round5.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..693bfec74f0b4d079aa25142760e12c8ed0ed8aa --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:060dded9a5d5f758e0194e7fdd9d56e114fd2c4bbbe4727efce45fe2fc10b2f4 +size 778341034 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round7.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..a30ee215a54ac703ba301124ab3081c79b02dab1 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c9659d713254e2eb528b418d6d8825d8b43b22e80eb87236e5d0837b4de123e +size 778341034 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_trainer_state.json b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4804f5dcc74c11bf5feb5f4ae7b0f78c4c96f292 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_trainer_state.json @@ -0,0 +1,392 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "grad_norm": 0.08982393145561218, + "learning_rate": 2e-05, + "loss": 0.2328, + "step": 2 + }, + { + "epoch": 0.04, + "grad_norm": 3.3573379516601562, + "learning_rate": 2e-05, + "loss": 0.4617, + "step": 4 + }, + { + "epoch": 0.06, + "grad_norm": 0.2133311629295349, + "learning_rate": 2e-05, + "loss": 0.0153, + "step": 6 + }, + { + "epoch": 0.08, + "grad_norm": 0.07882347702980042, + "learning_rate": 2e-05, + "loss": 0.1774, + "step": 8 + }, + { + "epoch": 0.1, + "grad_norm": 3.0131030082702637, + "learning_rate": 2e-05, + "loss": 0.2952, + "step": 10 + }, + { + "epoch": 0.12, + "grad_norm": 0.09988859295845032, + "learning_rate": 2e-05, + "loss": 0.0046, + "step": 12 + }, + { + "epoch": 0.14, + "grad_norm": 0.001691152574494481, + "learning_rate": 2e-05, + "loss": 0.2014, + "step": 14 + }, + { + "epoch": 0.16, + "grad_norm": 0.6652685403823853, + "learning_rate": 2e-05, + "loss": 0.0564, + "step": 16 + }, + { + "epoch": 0.18, + "grad_norm": 0.2490081638097763, + "learning_rate": 2e-05, + "loss": 0.0634, + "step": 18 + }, + { + "epoch": 0.2, + "grad_norm": 1.2552433013916016, + "learning_rate": 2e-05, + "loss": 0.0471, + "step": 20 + }, + { + "epoch": 0.22, + "grad_norm": 9.787774085998535, + "learning_rate": 2e-05, + "loss": 1.072, + "step": 22 + }, + { + "epoch": 0.24, + "grad_norm": 4.691402435302734, + "learning_rate": 2e-05, + "loss": 0.2304, + "step": 24 + }, + { + "epoch": 0.26, + "grad_norm": 1.8329142332077026, + "learning_rate": 2e-05, + "loss": 0.9169, + "step": 26 + }, + { + "epoch": 0.28, + "grad_norm": 5.139729022979736, + "learning_rate": 2e-05, + "loss": 2.4264, + "step": 28 + }, + { + "epoch": 0.3, + "grad_norm": 0.07531707733869553, + "learning_rate": 2e-05, + "loss": 0.1021, + "step": 30 + }, + { + "epoch": 0.32, + "grad_norm": 2.0397722721099854, + "learning_rate": 2e-05, + "loss": 0.1805, + "step": 32 + }, + { + "epoch": 0.34, + "grad_norm": 3.4966022968292236, + "learning_rate": 2e-05, + "loss": 0.2218, + "step": 34 + }, + { + "epoch": 0.36, + "grad_norm": 1.2320594787597656, + "learning_rate": 2e-05, + "loss": 0.1348, + "step": 36 + }, + { + "epoch": 0.38, + "grad_norm": 1.4888699054718018, + "learning_rate": 2e-05, + "loss": 0.0962, + "step": 38 + }, + { + "epoch": 0.4, + "grad_norm": 8.161986351013184, + "learning_rate": 2e-05, + "loss": 0.7422, + "step": 40 + }, + { + "epoch": 0.42, + "grad_norm": 0.842879593372345, + "learning_rate": 2e-05, + "loss": 2.4898, + "step": 42 + }, + { + "epoch": 0.44, + "grad_norm": 2.469893455505371, + "learning_rate": 2e-05, + "loss": 0.4676, + "step": 44 + }, + { + "epoch": 0.46, + "grad_norm": 0.5714770555496216, + "learning_rate": 2e-05, + "loss": 0.1859, + "step": 46 + }, + { + "epoch": 0.48, + "grad_norm": 2.0934393405914307, + "learning_rate": 2e-05, + "loss": 1.6918, + "step": 48 + }, + { + "epoch": 0.5, + "grad_norm": 7.387649059295654, + "learning_rate": 2e-05, + "loss": 1.0815, + "step": 50 + }, + { + "epoch": 0.52, + "grad_norm": 2.6136128902435303, + "learning_rate": 2e-05, + "loss": 0.7097, + "step": 52 + }, + { + "epoch": 0.54, + "grad_norm": 1.7184696197509766, + "learning_rate": 2e-05, + "loss": 0.3055, + "step": 54 + }, + { + "epoch": 0.56, + "grad_norm": 1.8947840929031372, + "learning_rate": 2e-05, + "loss": 0.3352, + "step": 56 + }, + { + "epoch": 0.58, + "grad_norm": 0.6567164063453674, + "learning_rate": 2e-05, + "loss": 0.148, + "step": 58 + }, + { + "epoch": 0.6, + "grad_norm": 0.479931116104126, + "learning_rate": 2e-05, + "loss": 0.1525, + "step": 60 + }, + { + "epoch": 0.62, + "grad_norm": 0.5562218427658081, + "learning_rate": 2e-05, + "loss": 0.2389, + "step": 62 + }, + { + "epoch": 0.64, + "grad_norm": 1.9559447765350342, + "learning_rate": 2e-05, + "loss": 1.074, + "step": 64 + }, + { + "epoch": 0.66, + "grad_norm": 0.5000556707382202, + "learning_rate": 2e-05, + "loss": 0.2175, + "step": 66 + }, + { + "epoch": 0.68, + "grad_norm": 0.17561589181423187, + "learning_rate": 2e-05, + "loss": 0.1933, + "step": 68 + }, + { + "epoch": 0.7, + "grad_norm": 0.763090193271637, + "learning_rate": 2e-05, + "loss": 0.4763, + "step": 70 + }, + { + "epoch": 0.72, + "grad_norm": 0.8020315170288086, + "learning_rate": 2e-05, + "loss": 0.6349, + "step": 72 + }, + { + "epoch": 0.74, + "grad_norm": 0.07478474080562592, + "learning_rate": 2e-05, + "loss": 0.3866, + "step": 74 + }, + { + "epoch": 0.76, + "grad_norm": 0.3866560459136963, + "learning_rate": 2e-05, + "loss": 0.0755, + "step": 76 + }, + { + "epoch": 0.78, + "grad_norm": 0.14689640700817108, + "learning_rate": 2e-05, + "loss": 0.7847, + "step": 78 + }, + { + "epoch": 0.8, + "grad_norm": 0.5186038017272949, + "learning_rate": 2e-05, + "loss": 0.2711, + "step": 80 + }, + { + "epoch": 0.82, + "grad_norm": 0.4922601282596588, + "learning_rate": 2e-05, + "loss": 0.2955, + "step": 82 + }, + { + "epoch": 0.84, + "grad_norm": 0.14919228851795197, + "learning_rate": 2e-05, + "loss": 0.2082, + "step": 84 + }, + { + "epoch": 0.86, + "grad_norm": 0.2567432224750519, + "learning_rate": 2e-05, + "loss": 0.1574, + "step": 86 + }, + { + "epoch": 0.88, + "grad_norm": 1.3054035902023315, + "learning_rate": 2e-05, + "loss": 0.2232, + "step": 88 + }, + { + "epoch": 0.9, + "grad_norm": 0.16678793728351593, + "learning_rate": 2e-05, + "loss": 0.0266, + "step": 90 + }, + { + "epoch": 0.92, + "grad_norm": 4.688411712646484, + "learning_rate": 2e-05, + "loss": 0.4672, + "step": 92 + }, + { + "epoch": 0.94, + "grad_norm": 0.2463907152414322, + "learning_rate": 2e-05, + "loss": 0.0527, + "step": 94 + }, + { + "epoch": 0.96, + "grad_norm": 1.7890900373458862, + "learning_rate": 2e-05, + "loss": 0.4185, + "step": 96 + }, + { + "epoch": 0.98, + "grad_norm": 0.016967343166470528, + "learning_rate": 2e-05, + "loss": 0.6785, + "step": 98 + }, + { + "epoch": 1.0, + "grad_norm": 2.782930850982666, + "learning_rate": 2e-05, + "loss": 1.87, + "step": 100 + }, + { + "epoch": 1.0, + "step": 100, + "total_flos": 5262504125005824.0, + "train_loss": 0.4799409031867981, + "train_runtime": 134.5856, + "train_samples_per_second": 2.972, + "train_steps_per_second": 0.743 + } + ], + "logging_steps": 2, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5262504125005824.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round10.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..ba4183ca0d1314acaff889cf437c704a8b074452 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cee0252fa5b54de5eb2c1d6f8a7b3abc755b5988c43a1d13d0150df97e88036 +size 778341886 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round12.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..db3b85e629fe68142f4b5869341d8f1c0189db24 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf593f0689d0d5485fd1af977e64810da0180283cca8b57148f72cf0386b5d83 +size 778341886 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round15.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..4e7d57ac608c53614b9ce9038944bc5a5ba4affa --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76578d1ecf85948c56625c4ff0f5fa0134fae9f1d67ed21376e233b75c4bc561 +size 778341886 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round17.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..ab65314513289c42728ceb464194f244cefa4162 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a949989d5c755f0d240dc138fe0fa17ff9b73b676659b754d4c15d9ecafb0325 +size 778341886 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round2.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..07d4743dabc4395661c271dfe94bfd21ef89741c --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e57f3f3e39f073f1cfd1d7a380c51d6201d19fc2b263665c1a53f6d4c6f05362 +size 778341034 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round20.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..1c3ab5370ccb7274f522bfa47dd6751faba1a626 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d414b7d30f407e459e7c86c1e6ea9783fd6b831937cb4193ab92505225dff56 +size 778341886 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round5.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..d3daa3c98feec9071c6a570c465b76dbfe50cf7a --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7486c433c80d238bd58ef72b02c7183faef41a3138d3fce6a742c411de5640b0 +size 778341034 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round7.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..add151028111d1e6f805e0970a5020ddf1ebd34f --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fda871767ad67efa14b1a5751adaf2d568f0dd08c4dd1f93683b89fb8a57ee60 +size 778341034 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_trainer_state.json b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e61f6e79497414d1538dd42c560312114cb06ae9 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_trainer_state.json @@ -0,0 +1,392 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "grad_norm": 2.5641870498657227, + "learning_rate": 2e-05, + "loss": 0.9117, + "step": 2 + }, + { + "epoch": 0.04, + "grad_norm": 7.3367838859558105, + "learning_rate": 2e-05, + "loss": 1.3484, + "step": 4 + }, + { + "epoch": 0.06, + "grad_norm": 3.435872793197632, + "learning_rate": 2e-05, + "loss": 1.1888, + "step": 6 + }, + { + "epoch": 0.08, + "grad_norm": 1.8724123239517212, + "learning_rate": 2e-05, + "loss": 0.7479, + "step": 8 + }, + { + "epoch": 0.1, + "grad_norm": 5.045788764953613, + "learning_rate": 2e-05, + "loss": 1.0021, + "step": 10 + }, + { + "epoch": 0.12, + "grad_norm": 2.3941988945007324, + "learning_rate": 2e-05, + "loss": 0.2292, + "step": 12 + }, + { + "epoch": 0.14, + "grad_norm": 3.4898414611816406, + "learning_rate": 2e-05, + "loss": 0.6638, + "step": 14 + }, + { + "epoch": 0.16, + "grad_norm": 6.991770267486572, + "learning_rate": 2e-05, + "loss": 1.9517, + "step": 16 + }, + { + "epoch": 0.18, + "grad_norm": 5.0541462898254395, + "learning_rate": 2e-05, + "loss": 1.7256, + "step": 18 + }, + { + "epoch": 0.2, + "grad_norm": 1.647504448890686, + "learning_rate": 2e-05, + "loss": 0.8528, + "step": 20 + }, + { + "epoch": 0.22, + "grad_norm": 2.6307902336120605, + "learning_rate": 2e-05, + "loss": 0.4915, + "step": 22 + }, + { + "epoch": 0.24, + "grad_norm": 0.8431178331375122, + "learning_rate": 2e-05, + "loss": 0.8148, + "step": 24 + }, + { + "epoch": 0.26, + "grad_norm": 1.761594533920288, + "learning_rate": 2e-05, + "loss": 0.9241, + "step": 26 + }, + { + "epoch": 0.28, + "grad_norm": 2.7404680252075195, + "learning_rate": 2e-05, + "loss": 0.9169, + "step": 28 + }, + { + "epoch": 0.3, + "grad_norm": 3.715890645980835, + "learning_rate": 2e-05, + "loss": 1.9023, + "step": 30 + }, + { + "epoch": 0.32, + "grad_norm": 2.2809853553771973, + "learning_rate": 2e-05, + "loss": 0.8091, + "step": 32 + }, + { + "epoch": 0.34, + "grad_norm": 1.321066975593567, + "learning_rate": 2e-05, + "loss": 0.977, + "step": 34 + }, + { + "epoch": 0.36, + "grad_norm": 1.0830439329147339, + "learning_rate": 2e-05, + "loss": 0.8662, + "step": 36 + }, + { + "epoch": 0.38, + "grad_norm": 2.58846378326416, + "learning_rate": 2e-05, + "loss": 0.825, + "step": 38 + }, + { + "epoch": 0.4, + "grad_norm": 3.2277541160583496, + "learning_rate": 2e-05, + "loss": 0.814, + "step": 40 + }, + { + "epoch": 0.42, + "grad_norm": 1.486735224723816, + "learning_rate": 2e-05, + "loss": 1.0742, + "step": 42 + }, + { + "epoch": 0.44, + "grad_norm": 1.8081094026565552, + "learning_rate": 2e-05, + "loss": 0.8131, + "step": 44 + }, + { + "epoch": 0.46, + "grad_norm": 2.604997158050537, + "learning_rate": 2e-05, + "loss": 0.5868, + "step": 46 + }, + { + "epoch": 0.48, + "grad_norm": 5.016016483306885, + "learning_rate": 2e-05, + "loss": 0.8185, + "step": 48 + }, + { + "epoch": 0.5, + "grad_norm": 2.8881778717041016, + "learning_rate": 2e-05, + "loss": 0.7617, + "step": 50 + }, + { + "epoch": 0.52, + "grad_norm": 0.8280321359634399, + "learning_rate": 2e-05, + "loss": 0.6892, + "step": 52 + }, + { + "epoch": 0.54, + "grad_norm": 3.1922149658203125, + "learning_rate": 2e-05, + "loss": 0.7572, + "step": 54 + }, + { + "epoch": 0.56, + "grad_norm": 3.8428804874420166, + "learning_rate": 2e-05, + "loss": 1.4743, + "step": 56 + }, + { + "epoch": 0.58, + "grad_norm": 3.307338237762451, + "learning_rate": 2e-05, + "loss": 1.0949, + "step": 58 + }, + { + "epoch": 0.6, + "grad_norm": 2.005655288696289, + "learning_rate": 2e-05, + "loss": 1.1342, + "step": 60 + }, + { + "epoch": 0.62, + "grad_norm": 6.022993087768555, + "learning_rate": 2e-05, + "loss": 1.4705, + "step": 62 + }, + { + "epoch": 0.64, + "grad_norm": 1.2536320686340332, + "learning_rate": 2e-05, + "loss": 1.0303, + "step": 64 + }, + { + "epoch": 0.66, + "grad_norm": 1.0175987482070923, + "learning_rate": 2e-05, + "loss": 1.0798, + "step": 66 + }, + { + "epoch": 0.68, + "grad_norm": 3.417553663253784, + "learning_rate": 2e-05, + "loss": 0.9361, + "step": 68 + }, + { + "epoch": 0.7, + "grad_norm": 2.2720513343811035, + "learning_rate": 2e-05, + "loss": 1.0747, + "step": 70 + }, + { + "epoch": 0.72, + "grad_norm": 0.8921431303024292, + "learning_rate": 2e-05, + "loss": 0.9569, + "step": 72 + }, + { + "epoch": 0.74, + "grad_norm": 2.949774980545044, + "learning_rate": 2e-05, + "loss": 1.2076, + "step": 74 + }, + { + "epoch": 0.76, + "grad_norm": 1.8719704151153564, + "learning_rate": 2e-05, + "loss": 1.0281, + "step": 76 + }, + { + "epoch": 0.78, + "grad_norm": 1.7995576858520508, + "learning_rate": 2e-05, + "loss": 0.6605, + "step": 78 + }, + { + "epoch": 0.8, + "grad_norm": 1.1909035444259644, + "learning_rate": 2e-05, + "loss": 0.4333, + "step": 80 + }, + { + "epoch": 0.82, + "grad_norm": 0.7953910827636719, + "learning_rate": 2e-05, + "loss": 0.9653, + "step": 82 + }, + { + "epoch": 0.84, + "grad_norm": 0.8923394680023193, + "learning_rate": 2e-05, + "loss": 1.1519, + "step": 84 + }, + { + "epoch": 0.86, + "grad_norm": 0.6500399708747864, + "learning_rate": 2e-05, + "loss": 0.5326, + "step": 86 + }, + { + "epoch": 0.88, + "grad_norm": 0.9078047871589661, + "learning_rate": 2e-05, + "loss": 0.8246, + "step": 88 + }, + { + "epoch": 0.9, + "grad_norm": 1.2340238094329834, + "learning_rate": 2e-05, + "loss": 0.6124, + "step": 90 + }, + { + "epoch": 0.92, + "grad_norm": 1.590685248374939, + "learning_rate": 2e-05, + "loss": 2.2126, + "step": 92 + }, + { + "epoch": 0.94, + "grad_norm": 1.4105197191238403, + "learning_rate": 2e-05, + "loss": 0.6126, + "step": 94 + }, + { + "epoch": 0.96, + "grad_norm": 2.10088849067688, + "learning_rate": 2e-05, + "loss": 0.8313, + "step": 96 + }, + { + "epoch": 0.98, + "grad_norm": 2.816298484802246, + "learning_rate": 2e-05, + "loss": 1.2018, + "step": 98 + }, + { + "epoch": 1.0, + "grad_norm": 2.9328625202178955, + "learning_rate": 2e-05, + "loss": 1.8354, + "step": 100 + }, + { + "epoch": 1.0, + "step": 100, + "total_flos": 1.0412693227831296e+16, + "train_loss": 0.9965047836303711, + "train_runtime": 220.6689, + "train_samples_per_second": 1.813, + "train_steps_per_second": 0.453 + } + ], + "logging_steps": 2, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.0412693227831296e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round10.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..9362121da36f81d9de2297c75119a157d83a6b45 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b335ba8a2e7688024dbd46c0b9bb397a073e9c2eb15b3eb037a32a5eadb6012 +size 778341886 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round12.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..0a7b169656a5a8825222692fd81be97ec8436f40 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97039c2ea390dcfa4b5c7659468bf70c47c724e3be1dee504fbc4df47e322be7 +size 778341886 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round15.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..ab3fb26fcca44884542efc2bf649ad271eeaacc9 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5808e6333c7cb64a69cb68140934cdb9c06a177e4ca83c5b0bcb147a35cd1b2 +size 778341886 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round17.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..ee8a5904991e67f1472c32c4ba02eaf1027a8542 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1ecb2e96850cf95fabea258a9ccf5d0172b1a5f3434e744468797c3f660df82 +size 778341886 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round2.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..0deaad3f1487819ee5d065eaa05944d76fee68bf --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ac25f56656ebfa3c517af704dcd053fe6b48701852765108715bd1a7225ff1f +size 778341034 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round20.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..b321fb121eab3e47500d1b800184da203fbc1f1c --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77ba00a41f1fae5d48885f1556bdbbd47f8f0b8a1b9c6a35047841dfb02a2bcf +size 778341886 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round5.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..ee9b3fb5f6679684f25a613a2615a02d15c37ede --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86c58b1a0f41a7fe55b7e85f29e34c7970de01825e2ab0df0d8ccd007d49de4a +size 778341034 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round7.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..74b8c7959825c56c54f32dfdccc4e2b55c470733 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d884a664313d36bec7bd03e21ccec80dacded9eea3aa9cbd328cf30bc4ec70a +size 778341034 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_trainer_state.json b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..92358007ad7269b78586babfa47b2924106a00ad --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_trainer_state.json @@ -0,0 +1,392 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "grad_norm": 2.9860033988952637, + "learning_rate": 2e-05, + "loss": 0.327, + "step": 2 + }, + { + "epoch": 0.04, + "grad_norm": 5.423563003540039, + "learning_rate": 2e-05, + "loss": 1.0884, + "step": 4 + }, + { + "epoch": 0.06, + "grad_norm": 1.9696483612060547, + "learning_rate": 2e-05, + "loss": 0.2785, + "step": 6 + }, + { + "epoch": 0.08, + "grad_norm": 1.6474837064743042, + "learning_rate": 2e-05, + "loss": 0.6156, + "step": 8 + }, + { + "epoch": 0.1, + "grad_norm": 4.743607997894287, + "learning_rate": 2e-05, + "loss": 0.9266, + "step": 10 + }, + { + "epoch": 0.12, + "grad_norm": 3.090904474258423, + "learning_rate": 2e-05, + "loss": 0.6268, + "step": 12 + }, + { + "epoch": 0.14, + "grad_norm": 0.2828066051006317, + "learning_rate": 2e-05, + "loss": 0.1825, + "step": 14 + }, + { + "epoch": 0.16, + "grad_norm": 3.0163958072662354, + "learning_rate": 2e-05, + "loss": 0.8978, + "step": 16 + }, + { + "epoch": 0.18, + "grad_norm": 4.475520610809326, + "learning_rate": 2e-05, + "loss": 0.5604, + "step": 18 + }, + { + "epoch": 0.2, + "grad_norm": 6.505922317504883, + "learning_rate": 2e-05, + "loss": 0.7148, + "step": 20 + }, + { + "epoch": 0.22, + "grad_norm": 0.19619496166706085, + "learning_rate": 2e-05, + "loss": 0.6846, + "step": 22 + }, + { + "epoch": 0.24, + "grad_norm": 4.3026323318481445, + "learning_rate": 2e-05, + "loss": 0.9959, + "step": 24 + }, + { + "epoch": 0.26, + "grad_norm": 1.5132761001586914, + "learning_rate": 2e-05, + "loss": 0.1066, + "step": 26 + }, + { + "epoch": 0.28, + "grad_norm": 5.5363874435424805, + "learning_rate": 2e-05, + "loss": 0.807, + "step": 28 + }, + { + "epoch": 0.3, + "grad_norm": 1.0056349039077759, + "learning_rate": 2e-05, + "loss": 1.1092, + "step": 30 + }, + { + "epoch": 0.32, + "grad_norm": 1.985927700996399, + "learning_rate": 2e-05, + "loss": 0.7923, + "step": 32 + }, + { + "epoch": 0.34, + "grad_norm": 1.3938846588134766, + "learning_rate": 2e-05, + "loss": 0.1412, + "step": 34 + }, + { + "epoch": 0.36, + "grad_norm": 2.9086544513702393, + "learning_rate": 2e-05, + "loss": 0.5865, + "step": 36 + }, + { + "epoch": 0.38, + "grad_norm": 1.148262619972229, + "learning_rate": 2e-05, + "loss": 0.1361, + "step": 38 + }, + { + "epoch": 0.4, + "grad_norm": 8.180176734924316, + "learning_rate": 2e-05, + "loss": 1.714, + "step": 40 + }, + { + "epoch": 0.42, + "grad_norm": 1.437209963798523, + "learning_rate": 2e-05, + "loss": 1.8034, + "step": 42 + }, + { + "epoch": 0.44, + "grad_norm": 1.6405125856399536, + "learning_rate": 2e-05, + "loss": 0.1931, + "step": 44 + }, + { + "epoch": 0.46, + "grad_norm": 4.302958011627197, + "learning_rate": 2e-05, + "loss": 1.1724, + "step": 46 + }, + { + "epoch": 0.48, + "grad_norm": 2.3256847858428955, + "learning_rate": 2e-05, + "loss": 0.1326, + "step": 48 + }, + { + "epoch": 0.5, + "grad_norm": 1.8101141452789307, + "learning_rate": 2e-05, + "loss": 1.3805, + "step": 50 + }, + { + "epoch": 0.52, + "grad_norm": 0.6622437238693237, + "learning_rate": 2e-05, + "loss": 0.4884, + "step": 52 + }, + { + "epoch": 0.54, + "grad_norm": 0.12145165354013443, + "learning_rate": 2e-05, + "loss": 0.7347, + "step": 54 + }, + { + "epoch": 0.56, + "grad_norm": 0.6513306498527527, + "learning_rate": 2e-05, + "loss": 0.1554, + "step": 56 + }, + { + "epoch": 0.58, + "grad_norm": 0.1421395093202591, + "learning_rate": 2e-05, + "loss": 0.1763, + "step": 58 + }, + { + "epoch": 0.6, + "grad_norm": 5.034487247467041, + "learning_rate": 2e-05, + "loss": 2.4693, + "step": 60 + }, + { + "epoch": 0.62, + "grad_norm": 0.23729735612869263, + "learning_rate": 2e-05, + "loss": 0.062, + "step": 62 + }, + { + "epoch": 0.64, + "grad_norm": 2.698179244995117, + "learning_rate": 2e-05, + "loss": 1.453, + "step": 64 + }, + { + "epoch": 0.66, + "grad_norm": 0.8955222368240356, + "learning_rate": 2e-05, + "loss": 0.3351, + "step": 66 + }, + { + "epoch": 0.68, + "grad_norm": 0.7866859436035156, + "learning_rate": 2e-05, + "loss": 0.3985, + "step": 68 + }, + { + "epoch": 0.7, + "grad_norm": 2.4781858921051025, + "learning_rate": 2e-05, + "loss": 0.9798, + "step": 70 + }, + { + "epoch": 0.72, + "grad_norm": 1.6539032459259033, + "learning_rate": 2e-05, + "loss": 0.8767, + "step": 72 + }, + { + "epoch": 0.74, + "grad_norm": 1.6716035604476929, + "learning_rate": 2e-05, + "loss": 0.7938, + "step": 74 + }, + { + "epoch": 0.76, + "grad_norm": 0.32660794258117676, + "learning_rate": 2e-05, + "loss": 0.8565, + "step": 76 + }, + { + "epoch": 0.78, + "grad_norm": 0.9017077684402466, + "learning_rate": 2e-05, + "loss": 0.797, + "step": 78 + }, + { + "epoch": 0.8, + "grad_norm": 3.2260944843292236, + "learning_rate": 2e-05, + "loss": 1.521, + "step": 80 + }, + { + "epoch": 0.82, + "grad_norm": 1.2876487970352173, + "learning_rate": 2e-05, + "loss": 0.3754, + "step": 82 + }, + { + "epoch": 0.84, + "grad_norm": 0.8568190932273865, + "learning_rate": 2e-05, + "loss": 0.2203, + "step": 84 + }, + { + "epoch": 0.86, + "grad_norm": 1.4791165590286255, + "learning_rate": 2e-05, + "loss": 1.2707, + "step": 86 + }, + { + "epoch": 0.88, + "grad_norm": 2.127885580062866, + "learning_rate": 2e-05, + "loss": 0.9114, + "step": 88 + }, + { + "epoch": 0.9, + "grad_norm": 1.0902236700057983, + "learning_rate": 2e-05, + "loss": 0.8005, + "step": 90 + }, + { + "epoch": 0.92, + "grad_norm": 0.36327800154685974, + "learning_rate": 2e-05, + "loss": 0.3284, + "step": 92 + }, + { + "epoch": 0.94, + "grad_norm": 1.36818528175354, + "learning_rate": 2e-05, + "loss": 0.5004, + "step": 94 + }, + { + "epoch": 0.96, + "grad_norm": 0.21333810687065125, + "learning_rate": 2e-05, + "loss": 0.2709, + "step": 96 + }, + { + "epoch": 0.98, + "grad_norm": 1.2604660987854004, + "learning_rate": 2e-05, + "loss": 0.6217, + "step": 98 + }, + { + "epoch": 1.0, + "grad_norm": 1.3066768646240234, + "learning_rate": 2e-05, + "loss": 0.303, + "step": 100 + }, + { + "epoch": 1.0, + "step": 100, + "total_flos": 5454346405806080.0, + "train_loss": 0.7134788227081299, + "train_runtime": 143.6135, + "train_samples_per_second": 2.785, + "train_steps_per_second": 0.696 + } + ], + "logging_steps": 2, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5454346405806080.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round10.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..72742766fcb3f7662888d9fbc0c809811bb48b5a --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eacdfcf71ea44194eb00074114f9870b6c89e10c75fa118f8f2e35bb954e2e8c +size 778341886 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round12.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..23e849c5ede2a590cdce41feb0451cd1212208bf --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a2c88652e70a9afad16f1e9a4842a0a788962ded189adf89069ed48d4274712 +size 778341886 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round15.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..7bcb58bcc7e8465354ee841b745a4eb089c96183 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cf37397eeed92207cdc3dd96f6455cee565379adee808c6dcfc3870363148c3 +size 778341886 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round17.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..f9d39d4862ac8f67d805c9f62fa2c165ae76b1b1 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21f4f7d3169b7ad60543e51d04f200e4012a154a36a8e18b43090df79b7a64ab +size 778341886 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round2.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..da25958d5101e8dbcf3bab8c3d8a4ce663e4b9a9 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:725ac25187616e9b47ce5d9f7877fa6b581bfaafdd0d471b071f933c91ad6b08 +size 778341034 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round20.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..5dd34b5a7c66f107b52e57136bb0b0be45291cbf --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3fa309d01a45805f581f1d0bd78476a5041d3c83fe80bc72b0654546f91bb30 +size 778341886 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round5.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..63998e189c5781b1549c2b8c81274a58b489c505 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ed907485521355ca45ed255c1d6c05aa22c758a43bf209b80644008c3ade42f +size 778341034 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round7.pth b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..f1c0655efdabe8a249ff8cd4bf9cae900c51b5b8 --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1904df9c957424455733e844735fa2e920280491f7fa504464cf22cad86e40b +size 778341034 diff --git a/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_trainer_state.json b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9b71cf81839a804c2cd5c1f7f5c9d401662bbf9a --- /dev/null +++ b/client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_trainer_state.json @@ -0,0 +1,392 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "grad_norm": 0.3082524240016937, + "learning_rate": 2e-05, + "loss": 0.6482, + "step": 2 + }, + { + "epoch": 0.04, + "grad_norm": 0.20196011662483215, + "learning_rate": 2e-05, + "loss": 0.3392, + "step": 4 + }, + { + "epoch": 0.06, + "grad_norm": 0.12239228934049606, + "learning_rate": 2e-05, + "loss": 0.1658, + "step": 6 + }, + { + "epoch": 0.08, + "grad_norm": 1.2035088539123535, + "learning_rate": 2e-05, + "loss": 0.0952, + "step": 8 + }, + { + "epoch": 0.1, + "grad_norm": 0.1327526867389679, + "learning_rate": 2e-05, + "loss": 0.0091, + "step": 10 + }, + { + "epoch": 0.12, + "grad_norm": 3.7317419052124023, + "learning_rate": 2e-05, + "loss": 0.4195, + "step": 12 + }, + { + "epoch": 0.14, + "grad_norm": 0.08746431022882462, + "learning_rate": 2e-05, + "loss": 0.7528, + "step": 14 + }, + { + "epoch": 0.16, + "grad_norm": 5.947171211242676, + "learning_rate": 2e-05, + "loss": 0.6166, + "step": 16 + }, + { + "epoch": 0.18, + "grad_norm": 3.436347246170044, + "learning_rate": 2e-05, + "loss": 0.4109, + "step": 18 + }, + { + "epoch": 0.2, + "grad_norm": 3.7922656536102295, + "learning_rate": 2e-05, + "loss": 5.073, + "step": 20 + }, + { + "epoch": 0.22, + "grad_norm": 6.673124313354492, + "learning_rate": 2e-05, + "loss": 2.471, + "step": 22 + }, + { + "epoch": 0.24, + "grad_norm": 0.4746553897857666, + "learning_rate": 2e-05, + "loss": 0.3446, + "step": 24 + }, + { + "epoch": 0.26, + "grad_norm": 4.6311354637146, + "learning_rate": 2e-05, + "loss": 0.8753, + "step": 26 + }, + { + "epoch": 0.28, + "grad_norm": 6.73305606842041, + "learning_rate": 2e-05, + "loss": 0.9431, + "step": 28 + }, + { + "epoch": 0.3, + "grad_norm": 0.6246132850646973, + "learning_rate": 2e-05, + "loss": 0.0616, + "step": 30 + }, + { + "epoch": 0.32, + "grad_norm": 2.1175317764282227, + "learning_rate": 2e-05, + "loss": 0.1549, + "step": 32 + }, + { + "epoch": 0.34, + "grad_norm": 2.971907377243042, + "learning_rate": 2e-05, + "loss": 2.1001, + "step": 34 + }, + { + "epoch": 0.36, + "grad_norm": 3.170811176300049, + "learning_rate": 2e-05, + "loss": 1.278, + "step": 36 + }, + { + "epoch": 0.38, + "grad_norm": 4.33163595199585, + "learning_rate": 2e-05, + "loss": 1.2597, + "step": 38 + }, + { + "epoch": 0.4, + "grad_norm": 1.0930659770965576, + "learning_rate": 2e-05, + "loss": 0.3472, + "step": 40 + }, + { + "epoch": 0.42, + "grad_norm": 4.445111274719238, + "learning_rate": 2e-05, + "loss": 0.8923, + "step": 42 + }, + { + "epoch": 0.44, + "grad_norm": 0.008833651430904865, + "learning_rate": 2e-05, + "loss": 0.282, + "step": 44 + }, + { + "epoch": 0.46, + "grad_norm": 2.986541509628296, + "learning_rate": 2e-05, + "loss": 0.7108, + "step": 46 + }, + { + "epoch": 0.48, + "grad_norm": 2.189093589782715, + "learning_rate": 2e-05, + "loss": 1.1752, + "step": 48 + }, + { + "epoch": 0.5, + "grad_norm": 0.4901527762413025, + "learning_rate": 2e-05, + "loss": 0.4185, + "step": 50 + }, + { + "epoch": 0.52, + "grad_norm": 0.2659938633441925, + "learning_rate": 2e-05, + "loss": 0.3296, + "step": 52 + }, + { + "epoch": 0.54, + "grad_norm": 0.550976037979126, + "learning_rate": 2e-05, + "loss": 0.4602, + "step": 54 + }, + { + "epoch": 0.56, + "grad_norm": 0.26266545057296753, + "learning_rate": 2e-05, + "loss": 0.477, + "step": 56 + }, + { + "epoch": 0.58, + "grad_norm": 1.0469081401824951, + "learning_rate": 2e-05, + "loss": 0.2115, + "step": 58 + }, + { + "epoch": 0.6, + "grad_norm": 0.0375242605805397, + "learning_rate": 2e-05, + "loss": 0.0109, + "step": 60 + }, + { + "epoch": 0.62, + "grad_norm": 1.9784115552902222, + "learning_rate": 2e-05, + "loss": 0.508, + "step": 62 + }, + { + "epoch": 0.64, + "grad_norm": 3.2599825859069824, + "learning_rate": 2e-05, + "loss": 1.3798, + "step": 64 + }, + { + "epoch": 0.66, + "grad_norm": 1.2077398300170898, + "learning_rate": 2e-05, + "loss": 0.1684, + "step": 66 + }, + { + "epoch": 0.68, + "grad_norm": 0.5861782431602478, + "learning_rate": 2e-05, + "loss": 0.0761, + "step": 68 + }, + { + "epoch": 0.7, + "grad_norm": 0.10536394268274307, + "learning_rate": 2e-05, + "loss": 0.0163, + "step": 70 + }, + { + "epoch": 0.72, + "grad_norm": 1.3317738771438599, + "learning_rate": 2e-05, + "loss": 0.2937, + "step": 72 + }, + { + "epoch": 0.74, + "grad_norm": 1.141127109527588, + "learning_rate": 2e-05, + "loss": 0.1432, + "step": 74 + }, + { + "epoch": 0.76, + "grad_norm": 0.5267685055732727, + "learning_rate": 2e-05, + "loss": 0.1033, + "step": 76 + }, + { + "epoch": 0.78, + "grad_norm": 0.5539693236351013, + "learning_rate": 2e-05, + "loss": 0.2652, + "step": 78 + }, + { + "epoch": 0.8, + "grad_norm": 0.15677958726882935, + "learning_rate": 2e-05, + "loss": 0.0412, + "step": 80 + }, + { + "epoch": 0.82, + "grad_norm": 2.541965961456299, + "learning_rate": 2e-05, + "loss": 0.9765, + "step": 82 + }, + { + "epoch": 0.84, + "grad_norm": 0.20113620162010193, + "learning_rate": 2e-05, + "loss": 0.042, + "step": 84 + }, + { + "epoch": 0.86, + "grad_norm": 0.1557822972536087, + "learning_rate": 2e-05, + "loss": 0.0232, + "step": 86 + }, + { + "epoch": 0.88, + "grad_norm": 0.5913605690002441, + "learning_rate": 2e-05, + "loss": 0.1017, + "step": 88 + }, + { + "epoch": 0.9, + "grad_norm": 1.2243813276290894, + "learning_rate": 2e-05, + "loss": 0.5208, + "step": 90 + }, + { + "epoch": 0.92, + "grad_norm": 5.133846282958984, + "learning_rate": 2e-05, + "loss": 1.4791, + "step": 92 + }, + { + "epoch": 0.94, + "grad_norm": 0.5330513119697571, + "learning_rate": 2e-05, + "loss": 0.0408, + "step": 94 + }, + { + "epoch": 0.96, + "grad_norm": 2.9799649715423584, + "learning_rate": 2e-05, + "loss": 1.0588, + "step": 96 + }, + { + "epoch": 0.98, + "grad_norm": 0.08848804980516434, + "learning_rate": 2e-05, + "loss": 0.0274, + "step": 98 + }, + { + "epoch": 1.0, + "grad_norm": 1.0476655960083008, + "learning_rate": 2e-05, + "loss": 0.1449, + "step": 100 + }, + { + "epoch": 1.0, + "step": 100, + "total_flos": 5308334865383424.0, + "train_loss": 0.6148797327280044, + "train_runtime": 134.9522, + "train_samples_per_second": 2.964, + "train_steps_per_second": 0.741 + } + ], + "logging_steps": 2, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5308334865383424.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}