diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round10.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..6aa9327a6a25187cfc9a425dec597111f52ab422 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15a1d84be2f0637a8002b5b3af874484cae552c6280e756bce7dde0cbb13145d +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round12.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..653693a0d79f1786a02c526d8cf0e0c814046e26 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6248e7080bad36002d932fc329308efbb58cbc3c9469304dbba71b7938a84367 +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round15.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..64c89adda236aaee3532a990d18cc0bf5c192dfb --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90882c6daa75109703f69cb50d7e04c460d1f4344bc438823391eea9d7bd6754 +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round17.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..0ccd9ea4a03c955b851ecb5b44e3add516efca78 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dc2d6265491807df8fd3b6c3b15674150d5a5ffb36cf28b2227763ed5136aaa +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round2.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..d42c27d317c3b765680fde6edff0cd7f3b1e14c7 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16e675f4c1968626339279cb96325491c6cc8cae380d71c43f5777f66a255e95 +size 1167511866 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round20.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..9817d7da70a1df36567643248947a8845aba591e --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9991679d96f74ba5990f64b8d121ee2d97b23b8e200fcb2e00dd9db511b1ae6 +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round5.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..7d7dd97edda1a5c8c91f326e45c93bd3fe80abb3 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d4e51792a645233c4a482e675f35b643ef77eb473a05f905ca1b8726622f34f +size 1167511866 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round7.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..4388bd4df610cc40b24e75623267f7d25fb84121 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:669e7eac935aedc38163869e1bd4f9b80fa7ed24e298d5b712c724ce59b9eb6c +size 1167511866 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_trainer_state.json b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3d08db81a9ef8bd79c2a89d24f2becf45df8393e --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_trainer_state.json @@ -0,0 +1,217 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "grad_norm": 0.47488170862197876, + "learning_rate": 2e-05, + "loss": 0.0866, + "step": 2 + }, + { + "epoch": 0.16, + "grad_norm": 3.9166059494018555, + "learning_rate": 2e-05, + "loss": 0.2647, + "step": 4 + }, + { + "epoch": 0.24, + "grad_norm": 1.567215085029602, + "learning_rate": 2e-05, + "loss": 0.1872, + "step": 6 + }, + { + "epoch": 0.32, + "grad_norm": 1.5392768383026123, + "learning_rate": 2e-05, + "loss": 0.2264, + "step": 8 + }, + { + "epoch": 0.4, + "grad_norm": 0.43878668546676636, + "learning_rate": 2e-05, + "loss": 0.0416, + "step": 10 + }, + { + "epoch": 0.48, + "grad_norm": 2.8712286949157715, + "learning_rate": 2e-05, + "loss": 0.3169, + "step": 12 + }, + { + "epoch": 0.56, + "grad_norm": 0.626501202583313, + "learning_rate": 2e-05, + "loss": 0.027, + "step": 14 + }, + { + "epoch": 0.64, + "grad_norm": 0.570056676864624, + "learning_rate": 2e-05, + "loss": 0.0335, + "step": 16 + }, + { + "epoch": 0.72, + "grad_norm": 1.9959315061569214, + "learning_rate": 2e-05, + "loss": 0.2356, + "step": 18 + }, + { + "epoch": 0.8, + "grad_norm": 0.9875115752220154, + "learning_rate": 2e-05, + "loss": 0.0379, + "step": 20 + }, + { + "epoch": 0.88, + "grad_norm": 0.563103973865509, + "learning_rate": 2e-05, + "loss": 0.0213, + "step": 22 + }, + { + "epoch": 0.96, + "grad_norm": 4.232426643371582, + "learning_rate": 2e-05, + "loss": 0.216, + "step": 24 + }, + { + "epoch": 1.04, + "grad_norm": 2.3981211185455322, + "learning_rate": 2e-05, + "loss": 0.0729, + "step": 26 + }, + { + "epoch": 1.12, + "grad_norm": 1.0411518812179565, + "learning_rate": 2e-05, + "loss": 0.0328, + "step": 28 + }, + { + "epoch": 1.2, + "grad_norm": 0.5252733826637268, + "learning_rate": 2e-05, + "loss": 0.3069, + "step": 30 + }, + { + "epoch": 1.28, + "grad_norm": 0.7728747129440308, + "learning_rate": 2e-05, + "loss": 0.0693, + "step": 32 + }, + { + "epoch": 1.36, + "grad_norm": 1.3379911184310913, + "learning_rate": 2e-05, + "loss": 0.1159, + "step": 34 + }, + { + "epoch": 1.44, + "grad_norm": 2.592834949493408, + "learning_rate": 2e-05, + "loss": 0.1534, + "step": 36 + }, + { + "epoch": 1.52, + "grad_norm": 2.610074520111084, + "learning_rate": 2e-05, + "loss": 0.1051, + "step": 38 + }, + { + "epoch": 1.6, + "grad_norm": 0.4380384385585785, + "learning_rate": 2e-05, + "loss": 0.1043, + "step": 40 + }, + { + "epoch": 1.68, + "grad_norm": 1.5626729726791382, + "learning_rate": 2e-05, + "loss": 0.2938, + "step": 42 + }, + { + "epoch": 1.76, + "grad_norm": 0.40467581152915955, + "learning_rate": 2e-05, + "loss": 0.0479, + "step": 44 + }, + { + "epoch": 1.84, + "grad_norm": 1.2873458862304688, + "learning_rate": 2e-05, + "loss": 0.2392, + "step": 46 + }, + { + "epoch": 1.92, + "grad_norm": 2.608442783355713, + "learning_rate": 2e-05, + "loss": 0.0982, + "step": 48 + }, + { + "epoch": 2.0, + "grad_norm": 0.45398300886154175, + "learning_rate": 2e-05, + "loss": 0.0287, + "step": 50 + }, + { + "epoch": 2.0, + "step": 50, + "total_flos": 5557405387587584.0, + "train_loss": 0.13453627586364747, + "train_runtime": 250.9273, + "train_samples_per_second": 0.797, + "train_steps_per_second": 0.199 + } + ], + "logging_steps": 2, + "max_steps": 50, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5557405387587584.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round10.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..e654ae826b8115a6f80d64d7af7afc339717edbe --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4245025adbb4ba892c81e432826e975168b3ec8383bdede0fcceb79870619bf +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round12.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..3c93d7287c89e93bd3ea0e27e6aeee9c42250128 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e0dd549fa9834746df684ac37929c7a95d637624e23ad474b2dff4204fde527 +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round15.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..448c51f70f4a62a59b083887369af7dbfc16bcf7 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e9e0cbaadad63f4557adbd3ea5cd1c9365bb01eabd7522200419dbc59ff601c +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round17.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..7e6a8feb7149492c82d6bdc9be821e036e12074d --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c2663e5856636050b16fce836516ee7bc4fd7b09276c4d22e55ab746de5bf9c +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round2.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..bffb0871819fc9d88c83f15b44c662e7805a2518 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ec459ecf2f10078d738357b93e8272ac5a65be27fa5014237fa6258d67df5a9 +size 1167511866 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round20.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..9831f8cb54a66fe72daa636e8ef32c29f072246a --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ed18be924b7d8fb8ea8d8d4e4b91a44fce9170e1b6fd48f58ac873e51f01586 +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round5.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..f7ced66b351e5f912d55ca77a7c2b3844c692c9e --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc6bcfef6dd11b739da5d24b81cdef29d3aa96064e324c31dda33bba35c29860 +size 1167511866 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round7.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..2fdb14bee7c4b8d5c091eabadd2cd3aa23e103d2 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41222190a5e0175394253c867cf8607218ae331b1e6679a68f8edca8afd60735 +size 1167511866 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_trainer_state.json b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d0dac324a6d924a8b15f04de6a8d34b597e1d674 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_trainer_state.json @@ -0,0 +1,217 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "grad_norm": 0.46819382905960083, + "learning_rate": 2e-05, + "loss": 0.2447, + "step": 2 + }, + { + "epoch": 0.16, + "grad_norm": 1.0340005159378052, + "learning_rate": 2e-05, + "loss": 0.071, + "step": 4 + }, + { + "epoch": 0.24, + "grad_norm": 1.917952299118042, + "learning_rate": 2e-05, + "loss": 0.1966, + "step": 6 + }, + { + "epoch": 0.32, + "grad_norm": 0.3073928952217102, + "learning_rate": 2e-05, + "loss": 0.0277, + "step": 8 + }, + { + "epoch": 0.4, + "grad_norm": 2.6226162910461426, + "learning_rate": 2e-05, + "loss": 0.1002, + "step": 10 + }, + { + "epoch": 0.48, + "grad_norm": 0.4787392020225525, + "learning_rate": 2e-05, + "loss": 0.0788, + "step": 12 + }, + { + "epoch": 0.56, + "grad_norm": 0.5545408725738525, + "learning_rate": 2e-05, + "loss": 0.0277, + "step": 14 + }, + { + "epoch": 0.64, + "grad_norm": 0.3709498941898346, + "learning_rate": 2e-05, + "loss": 0.0334, + "step": 16 + }, + { + "epoch": 0.72, + "grad_norm": 2.895387649536133, + "learning_rate": 2e-05, + "loss": 0.2151, + "step": 18 + }, + { + "epoch": 0.8, + "grad_norm": 0.8211622834205627, + "learning_rate": 2e-05, + "loss": 0.0471, + "step": 20 + }, + { + "epoch": 0.88, + "grad_norm": 1.7779847383499146, + "learning_rate": 2e-05, + "loss": 0.1101, + "step": 22 + }, + { + "epoch": 0.96, + "grad_norm": 1.0119048357009888, + "learning_rate": 2e-05, + "loss": 0.0509, + "step": 24 + }, + { + "epoch": 1.04, + "grad_norm": 0.7551224827766418, + "learning_rate": 2e-05, + "loss": 0.061, + "step": 26 + }, + { + "epoch": 1.12, + "grad_norm": 1.1288940906524658, + "learning_rate": 2e-05, + "loss": 0.0847, + "step": 28 + }, + { + "epoch": 1.2, + "grad_norm": 3.3638179302215576, + "learning_rate": 2e-05, + "loss": 0.2176, + "step": 30 + }, + { + "epoch": 1.28, + "grad_norm": 0.2910892367362976, + "learning_rate": 2e-05, + "loss": 0.0181, + "step": 32 + }, + { + "epoch": 1.36, + "grad_norm": 0.6550900340080261, + "learning_rate": 2e-05, + "loss": 0.0268, + "step": 34 + }, + { + "epoch": 1.44, + "grad_norm": 0.7145023345947266, + "learning_rate": 2e-05, + "loss": 0.0648, + "step": 36 + }, + { + "epoch": 1.52, + "grad_norm": 1.2834157943725586, + "learning_rate": 2e-05, + "loss": 0.2366, + "step": 38 + }, + { + "epoch": 1.6, + "grad_norm": 0.8662907481193542, + "learning_rate": 2e-05, + "loss": 0.0295, + "step": 40 + }, + { + "epoch": 1.68, + "grad_norm": 0.6141842603683472, + "learning_rate": 2e-05, + "loss": 0.0423, + "step": 42 + }, + { + "epoch": 1.76, + "grad_norm": 0.8501439094543457, + "learning_rate": 2e-05, + "loss": 0.0458, + "step": 44 + }, + { + "epoch": 1.84, + "grad_norm": 1.9633840322494507, + "learning_rate": 2e-05, + "loss": 0.0736, + "step": 46 + }, + { + "epoch": 1.92, + "grad_norm": 0.34262603521347046, + "learning_rate": 2e-05, + "loss": 0.0152, + "step": 48 + }, + { + "epoch": 2.0, + "grad_norm": 0.3050743341445923, + "learning_rate": 2e-05, + "loss": 0.0163, + "step": 50 + }, + { + "epoch": 2.0, + "step": 50, + "total_flos": 5596219090927616.0, + "train_loss": 0.08542263984680176, + "train_runtime": 248.9867, + "train_samples_per_second": 0.803, + "train_steps_per_second": 0.201 + } + ], + "logging_steps": 2, + "max_steps": 50, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5596219090927616.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round10.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..ffbceba458c7743d6e51d3c1144b1e82fde6e910 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7bfcf82c883429cb22818be001c1cc587a0b98ea5fdcbf7d3be831b374052a4 +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round12.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..574dcc48f9fb897376b6218680a646d5be710b1f --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5b21ec19732b48f7037a15443898d61f0d0b0f3cfb78e1b8c537b033c6a47a4 +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round15.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..d9579cb00edf7a5c4a1e9c64353e1c1a2727a020 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a365a4d93e2e6d8ddbb99a6189015d6e682ac40c0a5499a2b8d8344b75555ecb +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round17.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..a3e3f7302e2d0af56268824306ad347a65f55dff --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8b1e12e6843f9f48fa60001056847936749611862bb47a9ba7c4470d3ca532a +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round2.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..dc7358c14ceb84a9fcf225885b775287b4998914 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7fc3def105651c26cc37ac4a34fe29a73f9f6a11d10d7091a84bbe187224f6f +size 1167511866 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round20.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..27747af9fa242ae6ae015d3cd3e4443cc1fedd99 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3691084a0a3dd43e3b392dfa4106d3d2a8acac102c57215ccab318e38fc0838f +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round5.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..d0e329ae23a047ef43444366d22990ec7e9093b7 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6af87464154ee1f7f447c689fc8deac6df53736d5ee0eeb164b1257fb55b5020 +size 1167511866 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round7.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..b8005894bbef3a42f313ded282eac2d09c888c54 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc3932b2cd6c354eef4b1d8a5c59b9b29569a60008e399dab6614eee185794df +size 1167511866 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_trainer_state.json b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7116ce00668cebf9035381b087e77f8c79952485 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_trainer_state.json @@ -0,0 +1,217 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "grad_norm": 1.3417752981185913, + "learning_rate": 2e-05, + "loss": 0.1006, + "step": 2 + }, + { + "epoch": 0.16, + "grad_norm": 0.6572253108024597, + "learning_rate": 2e-05, + "loss": 0.0886, + "step": 4 + }, + { + "epoch": 0.24, + "grad_norm": 2.1601297855377197, + "learning_rate": 2e-05, + "loss": 0.3825, + "step": 6 + }, + { + "epoch": 0.32, + "grad_norm": 3.5155367851257324, + "learning_rate": 2e-05, + "loss": 0.8629, + "step": 8 + }, + { + "epoch": 0.4, + "grad_norm": 2.19246768951416, + "learning_rate": 2e-05, + "loss": 0.3246, + "step": 10 + }, + { + "epoch": 0.48, + "grad_norm": 1.0038927793502808, + "learning_rate": 2e-05, + "loss": 0.1555, + "step": 12 + }, + { + "epoch": 0.56, + "grad_norm": 5.269669532775879, + "learning_rate": 2e-05, + "loss": 0.0864, + "step": 14 + }, + { + "epoch": 0.64, + "grad_norm": 1.0328688621520996, + "learning_rate": 2e-05, + "loss": 0.1792, + "step": 16 + }, + { + "epoch": 0.72, + "grad_norm": 0.8982062935829163, + "learning_rate": 2e-05, + "loss": 0.1407, + "step": 18 + }, + { + "epoch": 0.8, + "grad_norm": 1.5086755752563477, + "learning_rate": 2e-05, + "loss": 0.1533, + "step": 20 + }, + { + "epoch": 0.88, + "grad_norm": 1.2126076221466064, + "learning_rate": 2e-05, + "loss": 0.1508, + "step": 22 + }, + { + "epoch": 0.96, + "grad_norm": 1.6549688577651978, + "learning_rate": 2e-05, + "loss": 0.2418, + "step": 24 + }, + { + "epoch": 1.04, + "grad_norm": 3.2237186431884766, + "learning_rate": 2e-05, + "loss": 0.2407, + "step": 26 + }, + { + "epoch": 1.12, + "grad_norm": 1.624603033065796, + "learning_rate": 2e-05, + "loss": 0.1989, + "step": 28 + }, + { + "epoch": 1.2, + "grad_norm": 1.289777159690857, + "learning_rate": 2e-05, + "loss": 0.0911, + "step": 30 + }, + { + "epoch": 1.28, + "grad_norm": 0.5119551420211792, + "learning_rate": 2e-05, + "loss": 0.0293, + "step": 32 + }, + { + "epoch": 1.36, + "grad_norm": 1.698155164718628, + "learning_rate": 2e-05, + "loss": 0.2148, + "step": 34 + }, + { + "epoch": 1.44, + "grad_norm": 3.7996082305908203, + "learning_rate": 2e-05, + "loss": 0.2261, + "step": 36 + }, + { + "epoch": 1.52, + "grad_norm": 0.48896124958992004, + "learning_rate": 2e-05, + "loss": 0.3192, + "step": 38 + }, + { + "epoch": 1.6, + "grad_norm": 0.537122368812561, + "learning_rate": 2e-05, + "loss": 0.0526, + "step": 40 + }, + { + "epoch": 1.68, + "grad_norm": 2.135448455810547, + "learning_rate": 2e-05, + "loss": 0.1417, + "step": 42 + }, + { + "epoch": 1.76, + "grad_norm": 1.1811649799346924, + "learning_rate": 2e-05, + "loss": 0.1384, + "step": 44 + }, + { + "epoch": 1.84, + "grad_norm": 2.414937734603882, + "learning_rate": 2e-05, + "loss": 0.1008, + "step": 46 + }, + { + "epoch": 1.92, + "grad_norm": 3.2851128578186035, + "learning_rate": 2e-05, + "loss": 0.2202, + "step": 48 + }, + { + "epoch": 2.0, + "grad_norm": 2.5515780448913574, + "learning_rate": 2e-05, + "loss": 0.1151, + "step": 50 + }, + { + "epoch": 2.0, + "step": 50, + "total_flos": 5482320970645504.0, + "train_loss": 0.19822086334228517, + "train_runtime": 248.7984, + "train_samples_per_second": 0.804, + "train_steps_per_second": 0.201 + } + ], + "logging_steps": 2, + "max_steps": 50, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5482320970645504.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round10.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..329b3f3262e50bb8aa491c4d401733cc7a30b4a7 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c14b217787ea7ae817cb3f8b2f4f5e637d74864386d99a4f37982ebf2d124be +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round12.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..8ae6816043833a0cc969d8034e39c66045c2fd64 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ff1d0fd3a7664e105ec96c5d81ef97579f149b1d607142f982f5a6ad1ebccf6 +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round15.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..605d909a766a211dd6742d69280716221043f910 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41e665491ab85a1d6a2aacbde93a87d790eb63f0025ead836eb3052d4a0f37a9 +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round17.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..944cc48f33ee2d08fe78117853456e99160a97ca --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e39965412854e8ab9d2b39b2e4c0a54c77fc5208158bbbd4eb325eab68359642 +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round2.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..19af7ebdb311c5fd9c17b19b044c3c70a6f26df5 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a1a0cec0272eba07433f0dcea21da0c0980c297940976e2a3af4f2c6df1da97 +size 1167511866 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round20.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..8f44399f8f01627e6af273731c8a00a2c752529f --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f663db034a9b94866914bcd51580375ba7f402eeebd01cfa621255d92d4225da +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round5.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..84c465260ab4227a125432c8923a4bbbc7cdd17b --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ee65fc133a6cb7b7315c4efc73d0249be8971ed59b278b1f23d9feae1949b73 +size 1167511866 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round7.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..b719c1886fb2f779673cd8a13afae31ec1ab66f4 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d879b0671c7101fc956f8b05a51c8916e22bb1e2bd31c2af450b2508d52c94b +size 1167511866 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_trainer_state.json b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..026394ff315a73f79b9a718446ae876cd5881248 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_trainer_state.json @@ -0,0 +1,217 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "grad_norm": 2.911635637283325, + "learning_rate": 2e-05, + "loss": 0.3981, + "step": 2 + }, + { + "epoch": 0.16, + "grad_norm": 0.5911923050880432, + "learning_rate": 2e-05, + "loss": 0.2374, + "step": 4 + }, + { + "epoch": 0.24, + "grad_norm": 3.1119353771209717, + "learning_rate": 2e-05, + "loss": 0.1946, + "step": 6 + }, + { + "epoch": 0.32, + "grad_norm": 2.107414484024048, + "learning_rate": 2e-05, + "loss": 0.2211, + "step": 8 + }, + { + "epoch": 0.4, + "grad_norm": 3.076481580734253, + "learning_rate": 2e-05, + "loss": 0.3442, + "step": 10 + }, + { + "epoch": 0.48, + "grad_norm": 1.9608561992645264, + "learning_rate": 2e-05, + "loss": 0.387, + "step": 12 + }, + { + "epoch": 0.56, + "grad_norm": 1.599261999130249, + "learning_rate": 2e-05, + "loss": 0.1272, + "step": 14 + }, + { + "epoch": 0.64, + "grad_norm": 0.731563150882721, + "learning_rate": 2e-05, + "loss": 0.092, + "step": 16 + }, + { + "epoch": 0.72, + "grad_norm": 5.21544075012207, + "learning_rate": 2e-05, + "loss": 0.5217, + "step": 18 + }, + { + "epoch": 0.8, + "grad_norm": 1.667925477027893, + "learning_rate": 2e-05, + "loss": 0.0901, + "step": 20 + }, + { + "epoch": 0.88, + "grad_norm": 1.7500098943710327, + "learning_rate": 2e-05, + "loss": 0.3038, + "step": 22 + }, + { + "epoch": 0.96, + "grad_norm": 3.87495493888855, + "learning_rate": 2e-05, + "loss": 0.27, + "step": 24 + }, + { + "epoch": 1.04, + "grad_norm": 5.719139099121094, + "learning_rate": 2e-05, + "loss": 0.8637, + "step": 26 + }, + { + "epoch": 1.12, + "grad_norm": 3.9575798511505127, + "learning_rate": 2e-05, + "loss": 0.2241, + "step": 28 + }, + { + "epoch": 1.2, + "grad_norm": 3.180509090423584, + "learning_rate": 2e-05, + "loss": 0.2912, + "step": 30 + }, + { + "epoch": 1.28, + "grad_norm": 3.4659345149993896, + "learning_rate": 2e-05, + "loss": 0.4136, + "step": 32 + }, + { + "epoch": 1.36, + "grad_norm": 5.336747646331787, + "learning_rate": 2e-05, + "loss": 0.3516, + "step": 34 + }, + { + "epoch": 1.44, + "grad_norm": 1.7263541221618652, + "learning_rate": 2e-05, + "loss": 0.0987, + "step": 36 + }, + { + "epoch": 1.52, + "grad_norm": 2.231567621231079, + "learning_rate": 2e-05, + "loss": 0.3073, + "step": 38 + }, + { + "epoch": 1.6, + "grad_norm": 1.5393335819244385, + "learning_rate": 2e-05, + "loss": 0.331, + "step": 40 + }, + { + "epoch": 1.68, + "grad_norm": 6.146248817443848, + "learning_rate": 2e-05, + "loss": 0.585, + "step": 42 + }, + { + "epoch": 1.76, + "grad_norm": 1.6960240602493286, + "learning_rate": 2e-05, + "loss": 0.1809, + "step": 44 + }, + { + "epoch": 1.84, + "grad_norm": 1.0312891006469727, + "learning_rate": 2e-05, + "loss": 0.213, + "step": 46 + }, + { + "epoch": 1.92, + "grad_norm": 2.6583869457244873, + "learning_rate": 2e-05, + "loss": 0.5143, + "step": 48 + }, + { + "epoch": 2.0, + "grad_norm": 1.2781920433044434, + "learning_rate": 2e-05, + "loss": 0.1894, + "step": 50 + }, + { + "epoch": 2.0, + "step": 50, + "total_flos": 5673132543180800.0, + "train_loss": 0.3100400161743164, + "train_runtime": 247.9097, + "train_samples_per_second": 0.807, + "train_steps_per_second": 0.202 + } + ], + "logging_steps": 2, + "max_steps": 50, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5673132543180800.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round10.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..1395430276ee4a62c57ed66ad329b8996f0b6df2 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d33c91a5f48e47e093054ca5ea2eaf9736af79442e0516e57a0f41c0146f3ceb +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round12.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..3318a37ca1ae8970b9505ae7890662b28364050e --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a370bf90c623e3fdc2b46f736270265d5ee116a3535512b775de363feb4fde02 +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round15.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..526491453bff6c8fc09b93f9fce598aa0832d98c --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b26b2eaf8fd7cfc4c60cd119648df240d927a295f9bfa5bb17c4086470d8ca48 +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round17.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..071bf952b105efb6bc18cd91d65a9d920c6f9b39 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2e5fe838295293ecf035ce5db3510c31d207103e17cbfb107c2870e09d43a61 +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round2.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..4fb13f69873d62e08b1f1317ba66e3b85e916c73 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dc3632fc93600a7982ac68490dbb2ebfb77903fd3b7e050faebfd652c67dab1 +size 1167511866 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round20.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..1eb6216307646f557a4ee91c333c75767062608b --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09dac0b9dfd4a1b7e3efe7a6e1d9d1bd65a9a70bd9d311fc2ecdb594e943d938 +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round5.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..624dc5f1cb555460db2a6a79df8dfea69ee88fcb --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c88ed8abf53a12d4bba898eb3fe04164eacaee747dc6953d03e75267b99de6f4 +size 1167511866 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round7.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..332afb28f209a660adeff50f48b8bf5b354812e2 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b394665feb33db3aa2884ce08cc02cad6dccd97cb5e3c8dae14515fbd64e625 +size 1167511866 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_trainer_state.json b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..08df1b78d00500524082f19d61cd2986d2130767 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_trainer_state.json @@ -0,0 +1,217 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "grad_norm": 1.8551050424575806, + "learning_rate": 2e-05, + "loss": 0.2176, + "step": 2 + }, + { + "epoch": 0.16, + "grad_norm": 1.4949532747268677, + "learning_rate": 2e-05, + "loss": 0.3246, + "step": 4 + }, + { + "epoch": 0.24, + "grad_norm": 0.8867964148521423, + "learning_rate": 2e-05, + "loss": 0.1527, + "step": 6 + }, + { + "epoch": 0.32, + "grad_norm": 1.1332507133483887, + "learning_rate": 2e-05, + "loss": 0.1606, + "step": 8 + }, + { + "epoch": 0.4, + "grad_norm": 1.1229583024978638, + "learning_rate": 2e-05, + "loss": 0.1485, + "step": 10 + }, + { + "epoch": 0.48, + "grad_norm": 1.5596634149551392, + "learning_rate": 2e-05, + "loss": 0.1656, + "step": 12 + }, + { + "epoch": 0.56, + "grad_norm": 1.693656325340271, + "learning_rate": 2e-05, + "loss": 0.1749, + "step": 14 + }, + { + "epoch": 0.64, + "grad_norm": 2.1607701778411865, + "learning_rate": 2e-05, + "loss": 0.1646, + "step": 16 + }, + { + "epoch": 0.72, + "grad_norm": 1.7262656688690186, + "learning_rate": 2e-05, + "loss": 0.2236, + "step": 18 + }, + { + "epoch": 0.8, + "grad_norm": 1.5488182306289673, + "learning_rate": 2e-05, + "loss": 0.1973, + "step": 20 + }, + { + "epoch": 0.88, + "grad_norm": 1.2641401290893555, + "learning_rate": 2e-05, + "loss": 0.154, + "step": 22 + }, + { + "epoch": 0.96, + "grad_norm": 0.8307248950004578, + "learning_rate": 2e-05, + "loss": 0.2771, + "step": 24 + }, + { + "epoch": 1.04, + "grad_norm": 1.2456833124160767, + "learning_rate": 2e-05, + "loss": 0.2396, + "step": 26 + }, + { + "epoch": 1.12, + "grad_norm": 0.3302162289619446, + "learning_rate": 2e-05, + "loss": 0.1226, + "step": 28 + }, + { + "epoch": 1.2, + "grad_norm": 1.5295584201812744, + "learning_rate": 2e-05, + "loss": 0.2166, + "step": 30 + }, + { + "epoch": 1.28, + "grad_norm": 0.6318351030349731, + "learning_rate": 2e-05, + "loss": 0.1125, + "step": 32 + }, + { + "epoch": 1.36, + "grad_norm": 1.4376343488693237, + "learning_rate": 2e-05, + "loss": 0.3601, + "step": 34 + }, + { + "epoch": 1.44, + "grad_norm": 1.818044900894165, + "learning_rate": 2e-05, + "loss": 0.1704, + "step": 36 + }, + { + "epoch": 1.52, + "grad_norm": 0.8872388601303101, + "learning_rate": 2e-05, + "loss": 0.1346, + "step": 38 + }, + { + "epoch": 1.6, + "grad_norm": 2.1370484828948975, + "learning_rate": 2e-05, + "loss": 0.1651, + "step": 40 + }, + { + "epoch": 1.68, + "grad_norm": 2.046491861343384, + "learning_rate": 2e-05, + "loss": 0.1531, + "step": 42 + }, + { + "epoch": 1.76, + "grad_norm": 2.7971935272216797, + "learning_rate": 2e-05, + "loss": 0.305, + "step": 44 + }, + { + "epoch": 1.84, + "grad_norm": 1.2505499124526978, + "learning_rate": 2e-05, + "loss": 0.1415, + "step": 46 + }, + { + "epoch": 1.92, + "grad_norm": 1.0299497842788696, + "learning_rate": 2e-05, + "loss": 0.0801, + "step": 48 + }, + { + "epoch": 2.0, + "grad_norm": 1.3772761821746826, + "learning_rate": 2e-05, + "loss": 0.2148, + "step": 50 + }, + { + "epoch": 2.0, + "step": 50, + "total_flos": 6353755020722176.0, + "train_loss": 0.19108272552490235, + "train_runtime": 250.9701, + "train_samples_per_second": 0.797, + "train_steps_per_second": 0.199 + } + ], + "logging_steps": 2, + "max_steps": 50, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6353755020722176.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round10.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..170a5bc50a2e1b14c46ae714c1708932e1a21918 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fa371cc628332dd87a25e51e5195c8e030170b45afaed687b775d7a900aa238 +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round12.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..836f0ed95c25f4e0e772e350f1c079e0c0321198 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccabd25c60cf78d22479fb05029d6373408c726fb72e8c353e364dcf41d94058 +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round15.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..dfd01c00e33de0849aff545cecac26b17e573b9b --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c78158bd76652abc30395caba8b8cc5bc770a1271cada0423a6a1a69779accd +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round17.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..9ab532c2275a77cc0d960e10d993dda0dc535ca8 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5edcb8aa0a0d1f3e5b75e2c71e3ad1ff8a9183f302307e2547594096bff67497 +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round2.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..931ea0d22a1bec36a668177d3c8b09835df09358 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dcd8cc59e6b9a48bef1ea7f506fd8ce5c936a443f551367167297a9fa7ac939 +size 1167511866 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round20.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..dddd95a2e4aa79c8e05a74a99719e5891c436e21 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25cf00f0196c6aa445a861e1b7a918cf5b123a98d2f7920c7dfc61991002bd51 +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round5.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..b13cc7930f34f9e10c06dc0f91dbd34f8cf2674b --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de2f66dc5c620e747273575cd0ee39a11d18576a895cdad3388e35b5d9d08e66 +size 1167511866 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round7.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..2f565a76c0311d87c32cb89783acb5f71e10c932 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f77b14542dfce73f55d79a6d0907b5d68f6f8be5790453996c1cff6b610d4f9e +size 1167511866 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_trainer_state.json b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..824843d498e3b278a903600512c5e6b048365928 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_trainer_state.json @@ -0,0 +1,217 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "grad_norm": 2.332709550857544, + "learning_rate": 2e-05, + "loss": 0.6429, + "step": 2 + }, + { + "epoch": 0.16, + "grad_norm": 1.5007182359695435, + "learning_rate": 2e-05, + "loss": 0.0834, + "step": 4 + }, + { + "epoch": 0.24, + "grad_norm": 1.6750152111053467, + "learning_rate": 2e-05, + "loss": 0.2684, + "step": 6 + }, + { + "epoch": 0.32, + "grad_norm": 1.3928828239440918, + "learning_rate": 2e-05, + "loss": 0.1173, + "step": 8 + }, + { + "epoch": 0.4, + "grad_norm": 0.542361319065094, + "learning_rate": 2e-05, + "loss": 0.0424, + "step": 10 + }, + { + "epoch": 0.48, + "grad_norm": 4.405244827270508, + "learning_rate": 2e-05, + "loss": 0.3662, + "step": 12 + }, + { + "epoch": 0.56, + "grad_norm": 2.1399025917053223, + "learning_rate": 2e-05, + "loss": 0.2078, + "step": 14 + }, + { + "epoch": 0.64, + "grad_norm": 3.3304431438446045, + "learning_rate": 2e-05, + "loss": 0.1876, + "step": 16 + }, + { + "epoch": 0.72, + "grad_norm": 3.8013570308685303, + "learning_rate": 2e-05, + "loss": 1.1929, + "step": 18 + }, + { + "epoch": 0.8, + "grad_norm": 3.0224690437316895, + "learning_rate": 2e-05, + "loss": 0.1847, + "step": 20 + }, + { + "epoch": 0.88, + "grad_norm": 3.0616440773010254, + "learning_rate": 2e-05, + "loss": 0.7966, + "step": 22 + }, + { + "epoch": 0.96, + "grad_norm": 0.7741594314575195, + "learning_rate": 2e-05, + "loss": 0.0629, + "step": 24 + }, + { + "epoch": 1.04, + "grad_norm": 2.4658780097961426, + "learning_rate": 2e-05, + "loss": 0.3464, + "step": 26 + }, + { + "epoch": 1.12, + "grad_norm": 5.040727138519287, + "learning_rate": 2e-05, + "loss": 0.8479, + "step": 28 + }, + { + "epoch": 1.2, + "grad_norm": 2.7441022396087646, + "learning_rate": 2e-05, + "loss": 0.2203, + "step": 30 + }, + { + "epoch": 1.28, + "grad_norm": 3.0441110134124756, + "learning_rate": 2e-05, + "loss": 0.4503, + "step": 32 + }, + { + "epoch": 1.36, + "grad_norm": 4.406171798706055, + "learning_rate": 2e-05, + "loss": 0.4516, + "step": 34 + }, + { + "epoch": 1.44, + "grad_norm": 4.008393287658691, + "learning_rate": 2e-05, + "loss": 0.5816, + "step": 36 + }, + { + "epoch": 1.52, + "grad_norm": 2.70620059967041, + "learning_rate": 2e-05, + "loss": 0.2133, + "step": 38 + }, + { + "epoch": 1.6, + "grad_norm": 1.4325283765792847, + "learning_rate": 2e-05, + "loss": 0.164, + "step": 40 + }, + { + "epoch": 1.68, + "grad_norm": 3.276726722717285, + "learning_rate": 2e-05, + "loss": 0.46, + "step": 42 + }, + { + "epoch": 1.76, + "grad_norm": 0.5454394817352295, + "learning_rate": 2e-05, + "loss": 0.1185, + "step": 44 + }, + { + "epoch": 1.84, + "grad_norm": 3.298138380050659, + "learning_rate": 2e-05, + "loss": 0.5062, + "step": 46 + }, + { + "epoch": 1.92, + "grad_norm": 2.165782928466797, + "learning_rate": 2e-05, + "loss": 0.1075, + "step": 48 + }, + { + "epoch": 2.0, + "grad_norm": 2.866793632507324, + "learning_rate": 2e-05, + "loss": 0.2717, + "step": 50 + }, + { + "epoch": 2.0, + "step": 50, + "total_flos": 5468401602396160.0, + "train_loss": 0.35570180892944336, + "train_runtime": 247.1505, + "train_samples_per_second": 0.809, + "train_steps_per_second": 0.202 + } + ], + "logging_steps": 2, + "max_steps": 50, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5468401602396160.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round10.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..8e0a93d8bac8fe4547aab167b886ed0475f62318 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5c9587256b98760746ceec224ca0a7ef0364ecc20b4681f399dc69047a9ff56 +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round12.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..580338238e9f267156526455b2d8d93b52ea6dda --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ea93fd549b23365f6880ba025dc69b8e023fd1be8adc1def623c42a783799de +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round15.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..e3bd52a2373fdeaa3719c49c6801aae9ea133555 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c75595d6b2e9eebb1e9c2049d2b14499ffe0f6b0f3c9ea967649fb50092e884 +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round17.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..f8aad7e6de51303c13b69fdc97b9971c7f025550 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ea2277184c01020f6093febd623e931c372c856641946b9097ec07553531b07 +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round2.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..7c096d9707eb5c3b6d226e1662bbeff9af47e6d5 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59bf66d47ed65237575bad4c3807b972005aa4b47c1628a344958097183ade4b +size 1167511866 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round20.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..c8e170249660c2daa8af0c80d01c6692ddd3e3f8 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:031aa1d2da4c15b901b014baebaa9725e2e4860412e22138493b86cd3de9a54e +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round5.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..b2ab94a3959e6c5a78fc8de95e1f0702c9bb904c --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0da8cda17e6a3585f052962426acc99bb6b08a5da62833dc4d3acb0f9f6cb0d +size 1167511866 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round7.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..648d833729ed4fbf13f3c6343262aa4dddfe8ef0 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d2bca6a04fdf2a5545ff7b0c6834bc7ed1703ad569244b4d5a2deeb79fcc21a +size 1167511866 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_trainer_state.json b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8ed961e57edddab6b39d6bc0bbd3d062737de1ae --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_trainer_state.json @@ -0,0 +1,217 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "grad_norm": 1.2460851669311523, + "learning_rate": 2e-05, + "loss": 0.2467, + "step": 2 + }, + { + "epoch": 0.16, + "grad_norm": 0.9425845146179199, + "learning_rate": 2e-05, + "loss": 0.2072, + "step": 4 + }, + { + "epoch": 0.24, + "grad_norm": 1.6000736951828003, + "learning_rate": 2e-05, + "loss": 0.1795, + "step": 6 + }, + { + "epoch": 0.32, + "grad_norm": 2.5629725456237793, + "learning_rate": 2e-05, + "loss": 0.2622, + "step": 8 + }, + { + "epoch": 0.4, + "grad_norm": 1.500010371208191, + "learning_rate": 2e-05, + "loss": 0.3032, + "step": 10 + }, + { + "epoch": 0.48, + "grad_norm": 1.562117099761963, + "learning_rate": 2e-05, + "loss": 0.236, + "step": 12 + }, + { + "epoch": 0.56, + "grad_norm": 0.704431414604187, + "learning_rate": 2e-05, + "loss": 0.111, + "step": 14 + }, + { + "epoch": 0.64, + "grad_norm": 1.3375930786132812, + "learning_rate": 2e-05, + "loss": 0.2536, + "step": 16 + }, + { + "epoch": 0.72, + "grad_norm": 0.7987673282623291, + "learning_rate": 2e-05, + "loss": 0.1829, + "step": 18 + }, + { + "epoch": 0.8, + "grad_norm": 1.2102272510528564, + "learning_rate": 2e-05, + "loss": 0.1665, + "step": 20 + }, + { + "epoch": 0.88, + "grad_norm": 1.0640064477920532, + "learning_rate": 2e-05, + "loss": 0.3167, + "step": 22 + }, + { + "epoch": 0.96, + "grad_norm": 1.8817851543426514, + "learning_rate": 2e-05, + "loss": 0.2086, + "step": 24 + }, + { + "epoch": 1.04, + "grad_norm": 1.6124526262283325, + "learning_rate": 2e-05, + "loss": 0.2416, + "step": 26 + }, + { + "epoch": 1.12, + "grad_norm": 1.3249093294143677, + "learning_rate": 2e-05, + "loss": 0.1447, + "step": 28 + }, + { + "epoch": 1.2, + "grad_norm": 1.7619050741195679, + "learning_rate": 2e-05, + "loss": 0.3614, + "step": 30 + }, + { + "epoch": 1.28, + "grad_norm": 2.073577880859375, + "learning_rate": 2e-05, + "loss": 0.2157, + "step": 32 + }, + { + "epoch": 1.36, + "grad_norm": 3.0285918712615967, + "learning_rate": 2e-05, + "loss": 0.368, + "step": 34 + }, + { + "epoch": 1.44, + "grad_norm": 1.3940951824188232, + "learning_rate": 2e-05, + "loss": 0.1024, + "step": 36 + }, + { + "epoch": 1.52, + "grad_norm": 0.9209564924240112, + "learning_rate": 2e-05, + "loss": 0.3769, + "step": 38 + }, + { + "epoch": 1.6, + "grad_norm": 2.650763988494873, + "learning_rate": 2e-05, + "loss": 0.4719, + "step": 40 + }, + { + "epoch": 1.68, + "grad_norm": 2.117210626602173, + "learning_rate": 2e-05, + "loss": 0.3104, + "step": 42 + }, + { + "epoch": 1.76, + "grad_norm": 2.5862722396850586, + "learning_rate": 2e-05, + "loss": 0.2915, + "step": 44 + }, + { + "epoch": 1.84, + "grad_norm": 2.3375983238220215, + "learning_rate": 2e-05, + "loss": 0.3428, + "step": 46 + }, + { + "epoch": 1.92, + "grad_norm": 1.7520791292190552, + "learning_rate": 2e-05, + "loss": 0.1952, + "step": 48 + }, + { + "epoch": 2.0, + "grad_norm": 1.5161030292510986, + "learning_rate": 2e-05, + "loss": 0.2069, + "step": 50 + }, + { + "epoch": 2.0, + "step": 50, + "total_flos": 1.0988599403085824e+16, + "train_loss": 0.2521377182006836, + "train_runtime": 282.691, + "train_samples_per_second": 0.707, + "train_steps_per_second": 0.177 + } + ], + "logging_steps": 2, + "max_steps": 50, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.0988599403085824e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round10.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..6dcb530dd667e2076c8303961d63847c2fe972ff --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dac73f95de29f7646ff90f28ab0540c83bf32767ad96d6736fed944958a78b05 +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round12.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..457ed9c5aa223764fbf98f36455d33ba98479d18 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2adb71aa2bc7175e7fffbcc5caedae5796b2489bf2500fb840abee9b20a7d2b +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round15.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..99c74075ce8ef737798c2274cc0bc04d1aec00c6 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48839df6721f86968d070db892274050da2a8f995f0aef16d19897f6feab158b +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round17.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..3332781fc3cbc986134198612571918b0b3a15e9 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e64353978cabf5bba418f8aa31add990ddc904f7f42ccd87650d6bf3fb87f709 +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round2.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..81aa9f6d167aaacd0ee4556ee6cff951b7920d9e --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9b119197c309b92cc7b1e002f46ca33d74ca8c57c1a97ebef4a893d23927240 +size 1167511866 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round20.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..c559470c0c2c9d0ebdd1382a78617143963b61d5 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a240713204941ef66bd202c86129a9b0e6b38ba2e3609efad4e53dc3a6e9c231 +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round5.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..cece9bd5668d98116d9a3619578fc71dda761789 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:604fdd90e0dec105e4fda60b989d7fde5de1742678b40dd924e20dcf98b563d0 +size 1167511866 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round7.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..fe9dfed2128dfa06b18ae599d4b5eaff501997bd --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ec94220e117fc3658c3d7b321e66a4598f2106f3822ba7daa838817c1b246d7 +size 1167511866 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_trainer_state.json b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..817d7e5ff72c1849ce8da4776c2e84ff4a6c8c56 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_trainer_state.json @@ -0,0 +1,217 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "grad_norm": 1.275256633758545, + "learning_rate": 2e-05, + "loss": 0.2181, + "step": 2 + }, + { + "epoch": 0.16, + "grad_norm": 0.6634196639060974, + "learning_rate": 2e-05, + "loss": 0.0692, + "step": 4 + }, + { + "epoch": 0.24, + "grad_norm": 1.4541486501693726, + "learning_rate": 2e-05, + "loss": 0.0907, + "step": 6 + }, + { + "epoch": 0.32, + "grad_norm": 0.9330166578292847, + "learning_rate": 2e-05, + "loss": 0.0643, + "step": 8 + }, + { + "epoch": 0.4, + "grad_norm": 3.375493288040161, + "learning_rate": 2e-05, + "loss": 0.2939, + "step": 10 + }, + { + "epoch": 0.48, + "grad_norm": 0.661520779132843, + "learning_rate": 2e-05, + "loss": 0.0999, + "step": 12 + }, + { + "epoch": 0.56, + "grad_norm": 2.982719659805298, + "learning_rate": 2e-05, + "loss": 0.125, + "step": 14 + }, + { + "epoch": 0.64, + "grad_norm": 1.414190411567688, + "learning_rate": 2e-05, + "loss": 0.0758, + "step": 16 + }, + { + "epoch": 0.72, + "grad_norm": 1.9259824752807617, + "learning_rate": 2e-05, + "loss": 0.12, + "step": 18 + }, + { + "epoch": 0.8, + "grad_norm": 3.1503803730010986, + "learning_rate": 2e-05, + "loss": 0.3179, + "step": 20 + }, + { + "epoch": 0.88, + "grad_norm": 1.5202919244766235, + "learning_rate": 2e-05, + "loss": 0.0637, + "step": 22 + }, + { + "epoch": 0.96, + "grad_norm": 2.522800922393799, + "learning_rate": 2e-05, + "loss": 0.2928, + "step": 24 + }, + { + "epoch": 1.04, + "grad_norm": 1.446891188621521, + "learning_rate": 2e-05, + "loss": 0.3879, + "step": 26 + }, + { + "epoch": 1.12, + "grad_norm": 2.5897462368011475, + "learning_rate": 2e-05, + "loss": 0.8006, + "step": 28 + }, + { + "epoch": 1.2, + "grad_norm": 1.5169357061386108, + "learning_rate": 2e-05, + "loss": 0.1586, + "step": 30 + }, + { + "epoch": 1.28, + "grad_norm": 1.698795199394226, + "learning_rate": 2e-05, + "loss": 0.1538, + "step": 32 + }, + { + "epoch": 1.36, + "grad_norm": 2.1556849479675293, + "learning_rate": 2e-05, + "loss": 0.4018, + "step": 34 + }, + { + "epoch": 1.44, + "grad_norm": 1.5232301950454712, + "learning_rate": 2e-05, + "loss": 0.1932, + "step": 36 + }, + { + "epoch": 1.52, + "grad_norm": 0.6759546399116516, + "learning_rate": 2e-05, + "loss": 0.0467, + "step": 38 + }, + { + "epoch": 1.6, + "grad_norm": 0.5979565978050232, + "learning_rate": 2e-05, + "loss": 0.0695, + "step": 40 + }, + { + "epoch": 1.68, + "grad_norm": 1.207302212715149, + "learning_rate": 2e-05, + "loss": 0.1114, + "step": 42 + }, + { + "epoch": 1.76, + "grad_norm": 1.2805650234222412, + "learning_rate": 2e-05, + "loss": 0.1355, + "step": 44 + }, + { + "epoch": 1.84, + "grad_norm": 1.7414960861206055, + "learning_rate": 2e-05, + "loss": 0.1818, + "step": 46 + }, + { + "epoch": 1.92, + "grad_norm": 1.904970645904541, + "learning_rate": 2e-05, + "loss": 0.1594, + "step": 48 + }, + { + "epoch": 2.0, + "grad_norm": 2.3445775508880615, + "learning_rate": 2e-05, + "loss": 0.2422, + "step": 50 + }, + { + "epoch": 2.0, + "step": 50, + "total_flos": 5765616241737728.0, + "train_loss": 0.19495010375976562, + "train_runtime": 249.819, + "train_samples_per_second": 0.801, + "train_steps_per_second": 0.2 + } + ], + "logging_steps": 2, + "max_steps": 50, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5765616241737728.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round10.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..e07ea6d00e70f8656d2aafbd0e0c8856e85b32ec --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f6218c9412877f9822e69b33e53b437541eab5caeac94328cdd334356773f87 +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round12.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..bb810ec2673ab27e08ccfe6a11b4579aed3b827c --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47bd54398f8fb475d1218b9a80dece1efaca33703016253c0c8200fa5193b481 +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round15.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..bc5f13f89257d5d53ec421b060270744525b781d --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0577e6a2518d635f475a0ee55f0f8eed61a67cb144618b29812305e1fa4968f5 +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round17.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..adc79568a3daca97a5c2f661bcd8f6ada62c1d42 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a2e5df23736111f1da69c4da0863a41d40cb06cc5fe700159da05e5cdadf5c1 +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round2.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..ca1df0922cee4c49ce7ef1a27359f70308546566 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09db413da5b4fb44e0db2fa663001b4c44c20b022e49c2b091c85166eb650ce1 +size 1167511866 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round20.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..6ea914e4d28530ef119d7d7aff6bf61eb412f7e8 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31b64362a754451f6c20448bddc7d4f80282ae3d62f910927274e47c8676aa32 +size 1167513110 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round5.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..e0120220371d00303892e7568e6073dbbc0718af --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88ef47326e40f6464c5336504279112cd072440542cb885eceeeafa8a0f4eff3 +size 1167511866 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round7.pth b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..17b77362a86e9f6d8ca8e100064d45c8b9d4f9da --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f34434bdd7e2ea68386aab2baf5c32780306c01506ff6b2081d10d7134b3bb00 +size 1167511866 diff --git a/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_trainer_state.json b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d63b2ffb7f8a6966b7fc00e9d4c19ccf97774ef3 --- /dev/null +++ b/client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_trainer_state.json @@ -0,0 +1,217 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "grad_norm": 2.2547056674957275, + "learning_rate": 2e-05, + "loss": 0.4225, + "step": 2 + }, + { + "epoch": 0.16, + "grad_norm": 0.924757719039917, + "learning_rate": 2e-05, + "loss": 0.0426, + "step": 4 + }, + { + "epoch": 0.24, + "grad_norm": 2.1536166667938232, + "learning_rate": 2e-05, + "loss": 0.1304, + "step": 6 + }, + { + "epoch": 0.32, + "grad_norm": 0.7580990791320801, + "learning_rate": 2e-05, + "loss": 0.0804, + "step": 8 + }, + { + "epoch": 0.4, + "grad_norm": 0.7585065364837646, + "learning_rate": 2e-05, + "loss": 0.034, + "step": 10 + }, + { + "epoch": 0.48, + "grad_norm": 3.6284854412078857, + "learning_rate": 2e-05, + "loss": 0.2873, + "step": 12 + }, + { + "epoch": 0.56, + "grad_norm": 0.2183469533920288, + "learning_rate": 2e-05, + "loss": 0.0277, + "step": 14 + }, + { + "epoch": 0.64, + "grad_norm": 2.608013868331909, + "learning_rate": 2e-05, + "loss": 0.1363, + "step": 16 + }, + { + "epoch": 0.72, + "grad_norm": 1.1789724826812744, + "learning_rate": 2e-05, + "loss": 0.0427, + "step": 18 + }, + { + "epoch": 0.8, + "grad_norm": 0.5082151293754578, + "learning_rate": 2e-05, + "loss": 0.0256, + "step": 20 + }, + { + "epoch": 0.88, + "grad_norm": 3.2005743980407715, + "learning_rate": 2e-05, + "loss": 0.1482, + "step": 22 + }, + { + "epoch": 0.96, + "grad_norm": 2.980159282684326, + "learning_rate": 2e-05, + "loss": 0.09, + "step": 24 + }, + { + "epoch": 1.04, + "grad_norm": 3.3265373706817627, + "learning_rate": 2e-05, + "loss": 0.1819, + "step": 26 + }, + { + "epoch": 1.12, + "grad_norm": 3.513444423675537, + "learning_rate": 2e-05, + "loss": 0.4264, + "step": 28 + }, + { + "epoch": 1.2, + "grad_norm": 1.4808714389801025, + "learning_rate": 2e-05, + "loss": 0.4263, + "step": 30 + }, + { + "epoch": 1.28, + "grad_norm": 1.5365898609161377, + "learning_rate": 2e-05, + "loss": 0.145, + "step": 32 + }, + { + "epoch": 1.36, + "grad_norm": 2.223052978515625, + "learning_rate": 2e-05, + "loss": 0.2915, + "step": 34 + }, + { + "epoch": 1.44, + "grad_norm": 1.5608574151992798, + "learning_rate": 2e-05, + "loss": 0.2359, + "step": 36 + }, + { + "epoch": 1.52, + "grad_norm": 2.7377705574035645, + "learning_rate": 2e-05, + "loss": 0.2576, + "step": 38 + }, + { + "epoch": 1.6, + "grad_norm": 3.2352395057678223, + "learning_rate": 2e-05, + "loss": 0.1445, + "step": 40 + }, + { + "epoch": 1.68, + "grad_norm": 0.2320222109556198, + "learning_rate": 2e-05, + "loss": 0.1111, + "step": 42 + }, + { + "epoch": 1.76, + "grad_norm": 0.378736674785614, + "learning_rate": 2e-05, + "loss": 0.0441, + "step": 44 + }, + { + "epoch": 1.84, + "grad_norm": 3.09470534324646, + "learning_rate": 2e-05, + "loss": 0.22, + "step": 46 + }, + { + "epoch": 1.92, + "grad_norm": 0.44545435905456543, + "learning_rate": 2e-05, + "loss": 0.0866, + "step": 48 + }, + { + "epoch": 2.0, + "grad_norm": 2.4516711235046387, + "learning_rate": 2e-05, + "loss": 0.2053, + "step": 50 + }, + { + "epoch": 2.0, + "step": 50, + "total_flos": 5601037339590656.0, + "train_loss": 0.1697624683380127, + "train_runtime": 247.9242, + "train_samples_per_second": 0.807, + "train_steps_per_second": 0.202 + } + ], + "logging_steps": 2, + "max_steps": 50, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5601037339590656.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}