diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round10.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..6c5218714537335072cfeeed279c6d097613debd --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dac7b6bce352f6c975f0f5656beef509afdab382bc722b932595b99bf50090c +size 180440142 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round12.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..28f90e889b22adc64eee77867ea6d596e566e760 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dcab448066a13c1f03938d764d26279e454333c8dc6923a707bf1cf0dc2f833 +size 180440142 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round15.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..b24ced6ab13987676fceee89ef9124ca7cc9b1bd --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c7d3453a23565008a42c4dccd1749cabfbf9ec23ea912014a4d4209943520d8 +size 180440142 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round17.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..6d96189d8f6b156a34628576329f092558107c99 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56c05502646862715dc27c5584e6a3e60191712121cfc7f969a9461170eadbe9 +size 180440142 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round2.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..10bb5a5264e6f22b411220330094ebed9a85c997 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:513aba8649167d639d7183da473d0eb85bdf76970b10f7d12d4d47d0bfa8f4f6 +size 180439850 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round20.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..5a4c5a3fc375c45068f442c088aaefcac178b01a --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:269ac2ed7bfedd0bec7453eff915764e747247d28030f63f1ac848a7ed018c9f +size 180440142 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round5.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..7915cd236fc2c19349c4621858bda6af3572061f --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:027f4e35e64efb5d494d33d46953933ce8bdb4a518cb33cecc238cfdbe8c1b84 +size 180439850 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round7.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..5b18f764130ddaca9c97ef5e5302fce5654b4b94 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72abb5953c3d401fe841ebfb21c15bdd99db0975371868aa0aff2121d5510e50 +size 180439850 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_trainer_state.json b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6f7b71681ad913dd677f2f258704456838f7072c --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_trainer_state.json @@ -0,0 +1,392 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "grad_norm": 3.185389757156372, + "learning_rate": 2e-05, + "loss": 0.5313, + "step": 2 + }, + { + "epoch": 0.04, + "grad_norm": 1.462697148323059, + "learning_rate": 2e-05, + "loss": 2.1023, + "step": 4 + }, + { + "epoch": 0.06, + "grad_norm": 0.06314225494861603, + "learning_rate": 2e-05, + "loss": 0.0065, + "step": 6 + }, + { + "epoch": 0.08, + "grad_norm": 8.08350658416748, + "learning_rate": 2e-05, + "loss": 0.7299, + "step": 8 + }, + { + "epoch": 0.1, + "grad_norm": 1.8942979574203491, + "learning_rate": 2e-05, + "loss": 0.4338, + "step": 10 + }, + { + "epoch": 0.12, + "grad_norm": 7.397281646728516, + "learning_rate": 2e-05, + "loss": 0.9851, + "step": 12 + }, + { + "epoch": 0.14, + "grad_norm": 3.0156924724578857, + "learning_rate": 2e-05, + "loss": 0.1971, + "step": 14 + }, + { + "epoch": 0.16, + "grad_norm": 9.138948440551758, + "learning_rate": 2e-05, + "loss": 0.6213, + "step": 16 + }, + { + "epoch": 0.18, + "grad_norm": 8.548439979553223, + "learning_rate": 2e-05, + "loss": 0.8961, + "step": 18 + }, + { + "epoch": 0.2, + "grad_norm": 2.227810859680176, + "learning_rate": 2e-05, + "loss": 1.4456, + "step": 20 + }, + { + "epoch": 0.22, + "grad_norm": 4.551537990570068, + "learning_rate": 2e-05, + "loss": 1.636, + "step": 22 + }, + { + "epoch": 0.24, + "grad_norm": 6.477214336395264, + "learning_rate": 2e-05, + "loss": 1.0221, + "step": 24 + }, + { + "epoch": 0.26, + "grad_norm": 0.2676788866519928, + "learning_rate": 2e-05, + "loss": 0.2403, + "step": 26 + }, + { + "epoch": 0.28, + "grad_norm": 14.42641830444336, + "learning_rate": 2e-05, + "loss": 1.108, + "step": 28 + }, + { + "epoch": 0.3, + "grad_norm": 0.8010652661323547, + "learning_rate": 2e-05, + "loss": 0.3901, + "step": 30 + }, + { + "epoch": 0.32, + "grad_norm": 9.867847442626953, + "learning_rate": 2e-05, + "loss": 1.073, + "step": 32 + }, + { + "epoch": 0.34, + "grad_norm": 0.5489710569381714, + "learning_rate": 2e-05, + "loss": 0.1765, + "step": 34 + }, + { + "epoch": 0.36, + "grad_norm": 2.8706305027008057, + "learning_rate": 2e-05, + "loss": 0.8621, + "step": 36 + }, + { + "epoch": 0.38, + "grad_norm": 3.2662458419799805, + "learning_rate": 2e-05, + "loss": 0.1863, + "step": 38 + }, + { + "epoch": 0.4, + "grad_norm": 10.825136184692383, + "learning_rate": 2e-05, + "loss": 1.1348, + "step": 40 + }, + { + "epoch": 0.42, + "grad_norm": 0.9670252799987793, + "learning_rate": 2e-05, + "loss": 0.6283, + "step": 42 + }, + { + "epoch": 0.44, + "grad_norm": 2.5218558311462402, + "learning_rate": 2e-05, + "loss": 0.8367, + "step": 44 + }, + { + "epoch": 0.46, + "grad_norm": 1.4162901639938354, + "learning_rate": 2e-05, + "loss": 0.4749, + "step": 46 + }, + { + "epoch": 0.48, + "grad_norm": 5.961083889007568, + "learning_rate": 2e-05, + "loss": 0.3601, + "step": 48 + }, + { + "epoch": 0.5, + "grad_norm": 7.117353439331055, + "learning_rate": 2e-05, + "loss": 0.6846, + "step": 50 + }, + { + "epoch": 0.52, + "grad_norm": 6.346409797668457, + "learning_rate": 2e-05, + "loss": 1.0359, + "step": 52 + }, + { + "epoch": 0.54, + "grad_norm": 3.3846776485443115, + "learning_rate": 2e-05, + "loss": 0.2492, + "step": 54 + }, + { + "epoch": 0.56, + "grad_norm": 0.2677325904369354, + "learning_rate": 2e-05, + "loss": 1.0758, + "step": 56 + }, + { + "epoch": 0.58, + "grad_norm": 3.839362621307373, + "learning_rate": 2e-05, + "loss": 0.7528, + "step": 58 + }, + { + "epoch": 0.6, + "grad_norm": 15.682966232299805, + "learning_rate": 2e-05, + "loss": 0.9388, + "step": 60 + }, + { + "epoch": 0.62, + "grad_norm": 9.203821182250977, + "learning_rate": 2e-05, + "loss": 1.9389, + "step": 62 + }, + { + "epoch": 0.64, + "grad_norm": 10.753218650817871, + "learning_rate": 2e-05, + "loss": 1.8481, + "step": 64 + }, + { + "epoch": 0.66, + "grad_norm": 3.960542678833008, + "learning_rate": 2e-05, + "loss": 0.9183, + "step": 66 + }, + { + "epoch": 0.68, + "grad_norm": 0.8610289096832275, + "learning_rate": 2e-05, + "loss": 0.4862, + "step": 68 + }, + { + "epoch": 0.7, + "grad_norm": 0.5907980799674988, + "learning_rate": 2e-05, + "loss": 0.0323, + "step": 70 + }, + { + "epoch": 0.72, + "grad_norm": 5.849497318267822, + "learning_rate": 2e-05, + "loss": 0.5046, + "step": 72 + }, + { + "epoch": 0.74, + "grad_norm": 2.7647833824157715, + "learning_rate": 2e-05, + "loss": 0.9464, + "step": 74 + }, + { + "epoch": 0.76, + "grad_norm": 2.2242050170898438, + "learning_rate": 2e-05, + "loss": 1.0414, + "step": 76 + }, + { + "epoch": 0.78, + "grad_norm": 0.43988850712776184, + "learning_rate": 2e-05, + "loss": 0.0393, + "step": 78 + }, + { + "epoch": 0.8, + "grad_norm": 7.581626892089844, + "learning_rate": 2e-05, + "loss": 0.914, + "step": 80 + }, + { + "epoch": 0.82, + "grad_norm": 8.071428298950195, + "learning_rate": 2e-05, + "loss": 0.5456, + "step": 82 + }, + { + "epoch": 0.84, + "grad_norm": 9.775481224060059, + "learning_rate": 2e-05, + "loss": 1.1202, + "step": 84 + }, + { + "epoch": 0.86, + "grad_norm": 4.938329219818115, + "learning_rate": 2e-05, + "loss": 0.4349, + "step": 86 + }, + { + "epoch": 0.88, + "grad_norm": 2.7860143184661865, + "learning_rate": 2e-05, + "loss": 0.3314, + "step": 88 + }, + { + "epoch": 0.9, + "grad_norm": 1.644363522529602, + "learning_rate": 2e-05, + "loss": 0.2175, + "step": 90 + }, + { + "epoch": 0.92, + "grad_norm": 3.6504173278808594, + "learning_rate": 2e-05, + "loss": 0.2242, + "step": 92 + }, + { + "epoch": 0.94, + "grad_norm": 3.1692447662353516, + "learning_rate": 2e-05, + "loss": 0.3253, + "step": 94 + }, + { + "epoch": 0.96, + "grad_norm": 7.509362697601318, + "learning_rate": 2e-05, + "loss": 0.9171, + "step": 96 + }, + { + "epoch": 0.98, + "grad_norm": 0.7557180523872375, + "learning_rate": 2e-05, + "loss": 0.0612, + "step": 98 + }, + { + "epoch": 1.0, + "grad_norm": 0.4876616597175598, + "learning_rate": 2e-05, + "loss": 0.054, + "step": 100 + }, + { + "epoch": 1.0, + "step": 100, + "total_flos": 2050008522162176.0, + "train_loss": 0.714327917098999, + "train_runtime": 101.3941, + "train_samples_per_second": 3.945, + "train_steps_per_second": 0.986 + } + ], + "logging_steps": 2, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2050008522162176.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round10.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..5af7f510f167c8a6bfaec877c972aa93e0947dc1 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3d4c4d89bdb3f350e2518ed150ec2a0b791fe39b81f2e0f62a2f5cc4ba7666e +size 180440142 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round12.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..c78834923064aac2f2357e3921c10a3dbe680d11 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:459f79aed03d2ed8311a89bfc1543de0307289da1e03e5b61b17859497bd7b94 +size 180440142 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round15.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..79cc2092e7c61406622be4e7dc6541f68dc2cf8a --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d025fa1d64446f3b8d8480477c7c2366e1dc17c670e582008dc6b3d3ebdae8eb +size 180440142 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round17.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..4ed8e19658885ab3de2391ad3045958539387b45 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88310f262c4da1aa92e63114b5417a63139a526b4adffa1eb7585320a682b4cf +size 180440142 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round2.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..ef04164845c18d2218febdf64bd0a5b9f27447e9 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6940d88337600557f8ef5224d94bee18273ca815983d5753451f7393f3ae082 +size 180439850 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round20.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..9f86433bbc2b794b1d05b82b17e92c7d34868d33 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f2718fac6eeee5d8a92ba8245e122b5832a37a9c8d03f18654502259c52e93e +size 180440142 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round5.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..a2fab3b660a6bf8648f06116fb303630486288fe --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c5ed776c37af82da7e7c990f70a018ca7465e1c03abbf9f8e50c2450e301235 +size 180439850 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round7.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..834475f281b8d1f7da15d2917cc68dfb4da8a3f3 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e310e436e59b58a8b51ebb8bfeeadcd4c23607edc7a36a702c0ef8a1d8775b51 +size 180439850 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_trainer_state.json b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5bcb18b12390d863b31fd33f2eaac94ca6dd97f2 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_trainer_state.json @@ -0,0 +1,392 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "grad_norm": 0.0033269652631133795, + "learning_rate": 2e-05, + "loss": 0.0085, + "step": 2 + }, + { + "epoch": 0.04, + "grad_norm": 0.041157741099596024, + "learning_rate": 2e-05, + "loss": 0.0008, + "step": 4 + }, + { + "epoch": 0.06, + "grad_norm": 0.9592494368553162, + "learning_rate": 2e-05, + "loss": 0.0262, + "step": 6 + }, + { + "epoch": 0.08, + "grad_norm": 0.6419932246208191, + "learning_rate": 2e-05, + "loss": 0.0282, + "step": 8 + }, + { + "epoch": 0.1, + "grad_norm": 5.6463303565979, + "learning_rate": 2e-05, + "loss": 0.6913, + "step": 10 + }, + { + "epoch": 0.12, + "grad_norm": 0.000999588519334793, + "learning_rate": 2e-05, + "loss": 0.0004, + "step": 12 + }, + { + "epoch": 0.14, + "grad_norm": 0.2222857028245926, + "learning_rate": 2e-05, + "loss": 0.0397, + "step": 14 + }, + { + "epoch": 0.16, + "grad_norm": 0.16892485320568085, + "learning_rate": 2e-05, + "loss": 0.0769, + "step": 16 + }, + { + "epoch": 0.18, + "grad_norm": 0.009978498332202435, + "learning_rate": 2e-05, + "loss": 0.0012, + "step": 18 + }, + { + "epoch": 0.2, + "grad_norm": 2.3538615703582764, + "learning_rate": 2e-05, + "loss": 0.4112, + "step": 20 + }, + { + "epoch": 0.22, + "grad_norm": 0.0049792262725532055, + "learning_rate": 2e-05, + "loss": 0.0009, + "step": 22 + }, + { + "epoch": 0.24, + "grad_norm": 0.9717739820480347, + "learning_rate": 2e-05, + "loss": 0.0344, + "step": 24 + }, + { + "epoch": 0.26, + "grad_norm": 0.007954389788210392, + "learning_rate": 2e-05, + "loss": 0.0886, + "step": 26 + }, + { + "epoch": 0.28, + "grad_norm": 0.15055210888385773, + "learning_rate": 2e-05, + "loss": 0.982, + "step": 28 + }, + { + "epoch": 0.3, + "grad_norm": 0.05759048089385033, + "learning_rate": 2e-05, + "loss": 0.5327, + "step": 30 + }, + { + "epoch": 0.32, + "grad_norm": 0.051013048738241196, + "learning_rate": 2e-05, + "loss": 0.0034, + "step": 32 + }, + { + "epoch": 0.34, + "grad_norm": 0.027761617675423622, + "learning_rate": 2e-05, + "loss": 0.0036, + "step": 34 + }, + { + "epoch": 0.36, + "grad_norm": 0.010388568975031376, + "learning_rate": 2e-05, + "loss": 0.0009, + "step": 36 + }, + { + "epoch": 0.38, + "grad_norm": 0.01740710809826851, + "learning_rate": 2e-05, + "loss": 0.2172, + "step": 38 + }, + { + "epoch": 0.4, + "grad_norm": 0.017691656947135925, + "learning_rate": 2e-05, + "loss": 0.0017, + "step": 40 + }, + { + "epoch": 0.42, + "grad_norm": 0.31259411573410034, + "learning_rate": 2e-05, + "loss": 0.0131, + "step": 42 + }, + { + "epoch": 0.44, + "grad_norm": 0.02895217575132847, + "learning_rate": 2e-05, + "loss": 0.0063, + "step": 44 + }, + { + "epoch": 0.46, + "grad_norm": 0.009466350078582764, + "learning_rate": 2e-05, + "loss": 0.0024, + "step": 46 + }, + { + "epoch": 0.48, + "grad_norm": 0.41093146800994873, + "learning_rate": 2e-05, + "loss": 0.018, + "step": 48 + }, + { + "epoch": 0.5, + "grad_norm": 0.006547864992171526, + "learning_rate": 2e-05, + "loss": 0.0008, + "step": 50 + }, + { + "epoch": 0.52, + "grad_norm": 0.009852895513176918, + "learning_rate": 2e-05, + "loss": 0.0273, + "step": 52 + }, + { + "epoch": 0.54, + "grad_norm": 0.0796719342470169, + "learning_rate": 2e-05, + "loss": 0.0066, + "step": 54 + }, + { + "epoch": 0.56, + "grad_norm": 0.17664939165115356, + "learning_rate": 2e-05, + "loss": 0.0089, + "step": 56 + }, + { + "epoch": 0.58, + "grad_norm": 0.03962412849068642, + "learning_rate": 2e-05, + "loss": 0.0023, + "step": 58 + }, + { + "epoch": 0.6, + "grad_norm": 0.23390346765518188, + "learning_rate": 2e-05, + "loss": 0.0101, + "step": 60 + }, + { + "epoch": 0.62, + "grad_norm": 0.035884492099285126, + "learning_rate": 2e-05, + "loss": 0.0022, + "step": 62 + }, + { + "epoch": 0.64, + "grad_norm": 0.006772290449589491, + "learning_rate": 2e-05, + "loss": 0.0014, + "step": 64 + }, + { + "epoch": 0.66, + "grad_norm": 0.13574518263339996, + "learning_rate": 2e-05, + "loss": 0.0098, + "step": 66 + }, + { + "epoch": 0.68, + "grad_norm": 17.729595184326172, + "learning_rate": 2e-05, + "loss": 1.8484, + "step": 68 + }, + { + "epoch": 0.7, + "grad_norm": 0.05980616807937622, + "learning_rate": 2e-05, + "loss": 0.0085, + "step": 70 + }, + { + "epoch": 0.72, + "grad_norm": 1.1025233268737793, + "learning_rate": 2e-05, + "loss": 1.0268, + "step": 72 + }, + { + "epoch": 0.74, + "grad_norm": 0.00824617501348257, + "learning_rate": 2e-05, + "loss": 0.0033, + "step": 74 + }, + { + "epoch": 0.76, + "grad_norm": 4.188517093658447, + "learning_rate": 2e-05, + "loss": 0.38, + "step": 76 + }, + { + "epoch": 0.78, + "grad_norm": 0.014042158611118793, + "learning_rate": 2e-05, + "loss": 0.0017, + "step": 78 + }, + { + "epoch": 0.8, + "grad_norm": 0.04890606924891472, + "learning_rate": 2e-05, + "loss": 0.1103, + "step": 80 + }, + { + "epoch": 0.82, + "grad_norm": 0.023772185668349266, + "learning_rate": 2e-05, + "loss": 0.0018, + "step": 82 + }, + { + "epoch": 0.84, + "grad_norm": 3.432652473449707, + "learning_rate": 2e-05, + "loss": 0.3927, + "step": 84 + }, + { + "epoch": 0.86, + "grad_norm": 0.6805059909820557, + "learning_rate": 2e-05, + "loss": 0.0469, + "step": 86 + }, + { + "epoch": 0.88, + "grad_norm": 1.975512146949768, + "learning_rate": 2e-05, + "loss": 0.2212, + "step": 88 + }, + { + "epoch": 0.9, + "grad_norm": 0.9377481937408447, + "learning_rate": 2e-05, + "loss": 0.4155, + "step": 90 + }, + { + "epoch": 0.92, + "grad_norm": 0.2486044466495514, + "learning_rate": 2e-05, + "loss": 0.0152, + "step": 92 + }, + { + "epoch": 0.94, + "grad_norm": 0.015230941586196423, + "learning_rate": 2e-05, + "loss": 0.0133, + "step": 94 + }, + { + "epoch": 0.96, + "grad_norm": 0.1053871437907219, + "learning_rate": 2e-05, + "loss": 0.0693, + "step": 96 + }, + { + "epoch": 0.98, + "grad_norm": 0.023564128205180168, + "learning_rate": 2e-05, + "loss": 0.0031, + "step": 98 + }, + { + "epoch": 1.0, + "grad_norm": 0.015611840412020683, + "learning_rate": 2e-05, + "loss": 0.025, + "step": 100 + }, + { + "epoch": 1.0, + "step": 100, + "total_flos": 2066355427737600.0, + "train_loss": 0.1568439483642578, + "train_runtime": 99.9893, + "train_samples_per_second": 4.0, + "train_steps_per_second": 1.0 + } + ], + "logging_steps": 2, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2066355427737600.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round10.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..8510e6336f6136d4c681222f6a16a18dbc9a3511 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de2a14f0034bf1a514488a2a3c3dd156f6cd9486176464301efb43f273eeaa69 +size 389170582 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round12.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..764aa93d052284c0ea76a8b2291ce4b0205ffdf0 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea2995ff2139b05e73dd687bf519c8a8de7d3e2ada63a7ff7c04c34ae1853fd2 +size 389170582 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round15.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..3f3a4f78b706178d3e896926e42ba7ae5afb0870 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b71a7bf9a66fe1dfe1482aa1d0d500b7ebfd0950d854e6159f8c7f469e5a0c2 +size 389170582 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round17.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..447a2add75c52319b6846a76f87cde857d478658 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7db80190dcd29b88d459a63955e96ddfe0d6783a855674d557761b674e3e07e +size 389170582 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round2.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..ddb5ccb74f837e2c76926cec85f5cd8e9b6c60c8 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b28230a69be1080fce36c0aa88635abdbef0bee9461b7e7f17386ee963856aa +size 389170122 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round20.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..7483b860a4b482e3eafbbada12538376a637f23e --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fc05025bf8c8f9ceebe415e6840941b0c064ff4afe2125e74ae593b3c6d5b48 +size 389170582 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round5.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..5c20a18e954b257e9029d877977c073ca7ff024a --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49fc6d328f225665d42987b8a4bba2cd04fdf3ee8634fe9e9699f73ffb9ea04e +size 389170122 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round7.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4f910c453dd0eb8a9d6421277c1e8fe07f4d49 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3acfe893b0c739503151ea961300cc6c25f8f270719d005c1737ae4d31e60eca +size 389170122 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_trainer_state.json b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..94340383e860c421befac2c22471979aba9071af --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_trainer_state.json @@ -0,0 +1,392 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "grad_norm": 1.806666374206543, + "learning_rate": 2e-05, + "loss": 0.3127, + "step": 2 + }, + { + "epoch": 0.04, + "grad_norm": 2.772969961166382, + "learning_rate": 2e-05, + "loss": 0.6935, + "step": 4 + }, + { + "epoch": 0.06, + "grad_norm": 5.492923736572266, + "learning_rate": 2e-05, + "loss": 1.7379, + "step": 6 + }, + { + "epoch": 0.08, + "grad_norm": 6.6056976318359375, + "learning_rate": 2e-05, + "loss": 0.8991, + "step": 8 + }, + { + "epoch": 0.1, + "grad_norm": 2.7061705589294434, + "learning_rate": 2e-05, + "loss": 0.5627, + "step": 10 + }, + { + "epoch": 0.12, + "grad_norm": 1.2979965209960938, + "learning_rate": 2e-05, + "loss": 0.3267, + "step": 12 + }, + { + "epoch": 0.14, + "grad_norm": 1.3533953428268433, + "learning_rate": 2e-05, + "loss": 0.3217, + "step": 14 + }, + { + "epoch": 0.16, + "grad_norm": 1.877424716949463, + "learning_rate": 2e-05, + "loss": 0.2856, + "step": 16 + }, + { + "epoch": 0.18, + "grad_norm": 1.0798438787460327, + "learning_rate": 2e-05, + "loss": 0.4367, + "step": 18 + }, + { + "epoch": 0.2, + "grad_norm": 5.245316982269287, + "learning_rate": 2e-05, + "loss": 0.5249, + "step": 20 + }, + { + "epoch": 0.22, + "grad_norm": 1.5087791681289673, + "learning_rate": 2e-05, + "loss": 0.3091, + "step": 22 + }, + { + "epoch": 0.24, + "grad_norm": 3.263746500015259, + "learning_rate": 2e-05, + "loss": 1.0143, + "step": 24 + }, + { + "epoch": 0.26, + "grad_norm": 0.557353138923645, + "learning_rate": 2e-05, + "loss": 0.1558, + "step": 26 + }, + { + "epoch": 0.28, + "grad_norm": 1.3562333583831787, + "learning_rate": 2e-05, + "loss": 0.55, + "step": 28 + }, + { + "epoch": 0.3, + "grad_norm": 2.261547088623047, + "learning_rate": 2e-05, + "loss": 0.8929, + "step": 30 + }, + { + "epoch": 0.32, + "grad_norm": 3.1925289630889893, + "learning_rate": 2e-05, + "loss": 0.4653, + "step": 32 + }, + { + "epoch": 0.34, + "grad_norm": 1.4827537536621094, + "learning_rate": 2e-05, + "loss": 0.4744, + "step": 34 + }, + { + "epoch": 0.36, + "grad_norm": 0.6158387660980225, + "learning_rate": 2e-05, + "loss": 0.1999, + "step": 36 + }, + { + "epoch": 0.38, + "grad_norm": 3.348236560821533, + "learning_rate": 2e-05, + "loss": 0.5648, + "step": 38 + }, + { + "epoch": 0.4, + "grad_norm": 0.5702676177024841, + "learning_rate": 2e-05, + "loss": 0.5624, + "step": 40 + }, + { + "epoch": 0.42, + "grad_norm": 3.970848321914673, + "learning_rate": 2e-05, + "loss": 1.4146, + "step": 42 + }, + { + "epoch": 0.44, + "grad_norm": 5.26162052154541, + "learning_rate": 2e-05, + "loss": 1.5653, + "step": 44 + }, + { + "epoch": 0.46, + "grad_norm": 5.289398193359375, + "learning_rate": 2e-05, + "loss": 1.6653, + "step": 46 + }, + { + "epoch": 0.48, + "grad_norm": 4.84953498840332, + "learning_rate": 2e-05, + "loss": 0.9846, + "step": 48 + }, + { + "epoch": 0.5, + "grad_norm": 4.963735103607178, + "learning_rate": 2e-05, + "loss": 1.5258, + "step": 50 + }, + { + "epoch": 0.52, + "grad_norm": 2.125373601913452, + "learning_rate": 2e-05, + "loss": 0.5031, + "step": 52 + }, + { + "epoch": 0.54, + "grad_norm": 1.5217068195343018, + "learning_rate": 2e-05, + "loss": 0.3672, + "step": 54 + }, + { + "epoch": 0.56, + "grad_norm": 1.5323041677474976, + "learning_rate": 2e-05, + "loss": 0.6326, + "step": 56 + }, + { + "epoch": 0.58, + "grad_norm": 1.9375790357589722, + "learning_rate": 2e-05, + "loss": 0.236, + "step": 58 + }, + { + "epoch": 0.6, + "grad_norm": 2.5400359630584717, + "learning_rate": 2e-05, + "loss": 1.02, + "step": 60 + }, + { + "epoch": 0.62, + "grad_norm": 1.9115395545959473, + "learning_rate": 2e-05, + "loss": 0.3253, + "step": 62 + }, + { + "epoch": 0.64, + "grad_norm": 4.119795322418213, + "learning_rate": 2e-05, + "loss": 0.7132, + "step": 64 + }, + { + "epoch": 0.66, + "grad_norm": 2.27884840965271, + "learning_rate": 2e-05, + "loss": 1.2836, + "step": 66 + }, + { + "epoch": 0.68, + "grad_norm": 2.7405014038085938, + "learning_rate": 2e-05, + "loss": 0.3597, + "step": 68 + }, + { + "epoch": 0.7, + "grad_norm": 1.0723375082015991, + "learning_rate": 2e-05, + "loss": 1.1792, + "step": 70 + }, + { + "epoch": 0.72, + "grad_norm": 1.9426579475402832, + "learning_rate": 2e-05, + "loss": 0.2904, + "step": 72 + }, + { + "epoch": 0.74, + "grad_norm": 3.871702194213867, + "learning_rate": 2e-05, + "loss": 1.4492, + "step": 74 + }, + { + "epoch": 0.76, + "grad_norm": 0.5469585657119751, + "learning_rate": 2e-05, + "loss": 0.1543, + "step": 76 + }, + { + "epoch": 0.78, + "grad_norm": 5.215576648712158, + "learning_rate": 2e-05, + "loss": 1.2247, + "step": 78 + }, + { + "epoch": 0.8, + "grad_norm": 5.197208881378174, + "learning_rate": 2e-05, + "loss": 0.7877, + "step": 80 + }, + { + "epoch": 0.82, + "grad_norm": 0.622382402420044, + "learning_rate": 2e-05, + "loss": 0.6431, + "step": 82 + }, + { + "epoch": 0.84, + "grad_norm": 1.4372755289077759, + "learning_rate": 2e-05, + "loss": 0.543, + "step": 84 + }, + { + "epoch": 0.86, + "grad_norm": 1.7911620140075684, + "learning_rate": 2e-05, + "loss": 0.3779, + "step": 86 + }, + { + "epoch": 0.88, + "grad_norm": 1.9083788394927979, + "learning_rate": 2e-05, + "loss": 0.315, + "step": 88 + }, + { + "epoch": 0.9, + "grad_norm": 0.12868903577327728, + "learning_rate": 2e-05, + "loss": 0.1472, + "step": 90 + }, + { + "epoch": 0.92, + "grad_norm": 0.37657198309898376, + "learning_rate": 2e-05, + "loss": 0.344, + "step": 92 + }, + { + "epoch": 0.94, + "grad_norm": 2.369081735610962, + "learning_rate": 2e-05, + "loss": 0.9474, + "step": 94 + }, + { + "epoch": 0.96, + "grad_norm": 0.9848467707633972, + "learning_rate": 2e-05, + "loss": 0.1981, + "step": 96 + }, + { + "epoch": 0.98, + "grad_norm": 0.4741188883781433, + "learning_rate": 2e-05, + "loss": 0.1882, + "step": 98 + }, + { + "epoch": 1.0, + "grad_norm": 4.421746730804443, + "learning_rate": 2e-05, + "loss": 1.1065, + "step": 100 + }, + { + "epoch": 1.0, + "step": 100, + "total_flos": 4911038537400320.0, + "train_loss": 0.6756564712524414, + "train_runtime": 161.515, + "train_samples_per_second": 2.477, + "train_steps_per_second": 0.619 + } + ], + "logging_steps": 2, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4911038537400320.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round10.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..6a40b1f755d7d96cffbd48714cc31db72e3f148f --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:925e02bab65a8c21b13961b7c1c2504de2854c66f54cb1db5c5db095445d9dfb +size 180440142 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round12.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..21d8384f2a0f78c5621e1d6ac6c6dcea2696880e --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dfe2459c0ed9a99de2b5da1cc8c1a3d564a7665992fe018017fad9a0eea2b55 +size 180440142 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round15.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..9809e236cd48fe189d963f10c2b4c6a764160a25 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a21077d8411c069fc73ccf68c83a8476cf01f5314a1c6eba65d534185182233 +size 180440142 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round17.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..834481f15b9eac1a08fc5509dd3181096d54c797 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f92d68232d50118aa91f6cd58d30ddcbc66f1a3de7a10fb77f8a0c9b3c5bc168 +size 180440142 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round2.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..9049e58c0f82afd14761c581109b7a02765472b5 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f59030679d64456e72c0ef422af79b028f254ee3a6b11793905566e90314704 +size 180439850 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round20.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..776e4c9c9dc1fcb76269aface1582c5b6bfcc72d --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:133dc402d125ddaafc27f06bc1e6410077a9202486323d7f05f062dee2fd8afe +size 180440142 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round5.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..c2e97d26364187490544601f1544438014e1922a --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1eb1c71ea609345555a1aa2ed7e222ce23f61a50458542ffa87292b16afd3cda +size 180439850 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round7.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..27b90802c82b20ea52cf082f351efb7df8fb6eee --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a62cd085080e075ce63299e262c2c23ff7573f2d1ac1354c18f23e91751e8647 +size 180439850 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_trainer_state.json b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0ec50c86aa0cb9453aebabf4e21aacd106670c70 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_trainer_state.json @@ -0,0 +1,392 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "grad_norm": 5.710101127624512, + "learning_rate": 2e-05, + "loss": 1.4172, + "step": 2 + }, + { + "epoch": 0.04, + "grad_norm": 3.9397523403167725, + "learning_rate": 2e-05, + "loss": 0.8797, + "step": 4 + }, + { + "epoch": 0.06, + "grad_norm": 3.1615402698516846, + "learning_rate": 2e-05, + "loss": 1.0671, + "step": 6 + }, + { + "epoch": 0.08, + "grad_norm": 13.820159912109375, + "learning_rate": 2e-05, + "loss": 3.6283, + "step": 8 + }, + { + "epoch": 0.1, + "grad_norm": 11.3993501663208, + "learning_rate": 2e-05, + "loss": 2.2113, + "step": 10 + }, + { + "epoch": 0.12, + "grad_norm": 13.748973846435547, + "learning_rate": 2e-05, + "loss": 2.2509, + "step": 12 + }, + { + "epoch": 0.14, + "grad_norm": 10.254955291748047, + "learning_rate": 2e-05, + "loss": 1.3235, + "step": 14 + }, + { + "epoch": 0.16, + "grad_norm": 3.2683303356170654, + "learning_rate": 2e-05, + "loss": 0.3648, + "step": 16 + }, + { + "epoch": 0.18, + "grad_norm": 7.850968837738037, + "learning_rate": 2e-05, + "loss": 1.1944, + "step": 18 + }, + { + "epoch": 0.2, + "grad_norm": 5.817758560180664, + "learning_rate": 2e-05, + "loss": 1.5678, + "step": 20 + }, + { + "epoch": 0.22, + "grad_norm": 3.871739625930786, + "learning_rate": 2e-05, + "loss": 1.3073, + "step": 22 + }, + { + "epoch": 0.24, + "grad_norm": 8.361922264099121, + "learning_rate": 2e-05, + "loss": 0.9962, + "step": 24 + }, + { + "epoch": 0.26, + "grad_norm": 4.391210079193115, + "learning_rate": 2e-05, + "loss": 1.3205, + "step": 26 + }, + { + "epoch": 0.28, + "grad_norm": 6.039274215698242, + "learning_rate": 2e-05, + "loss": 1.9638, + "step": 28 + }, + { + "epoch": 0.3, + "grad_norm": 2.7958240509033203, + "learning_rate": 2e-05, + "loss": 0.46, + "step": 30 + }, + { + "epoch": 0.32, + "grad_norm": 7.391857147216797, + "learning_rate": 2e-05, + "loss": 1.1388, + "step": 32 + }, + { + "epoch": 0.34, + "grad_norm": 8.654966354370117, + "learning_rate": 2e-05, + "loss": 1.5172, + "step": 34 + }, + { + "epoch": 0.36, + "grad_norm": 10.035390853881836, + "learning_rate": 2e-05, + "loss": 1.7336, + "step": 36 + }, + { + "epoch": 0.38, + "grad_norm": 4.6513237953186035, + "learning_rate": 2e-05, + "loss": 1.2339, + "step": 38 + }, + { + "epoch": 0.4, + "grad_norm": 2.69884991645813, + "learning_rate": 2e-05, + "loss": 0.4367, + "step": 40 + }, + { + "epoch": 0.42, + "grad_norm": 3.5067331790924072, + "learning_rate": 2e-05, + "loss": 0.7892, + "step": 42 + }, + { + "epoch": 0.44, + "grad_norm": 8.914396286010742, + "learning_rate": 2e-05, + "loss": 1.5515, + "step": 44 + }, + { + "epoch": 0.46, + "grad_norm": 6.865166187286377, + "learning_rate": 2e-05, + "loss": 1.689, + "step": 46 + }, + { + "epoch": 0.48, + "grad_norm": 2.9471137523651123, + "learning_rate": 2e-05, + "loss": 0.9443, + "step": 48 + }, + { + "epoch": 0.5, + "grad_norm": 2.263576030731201, + "learning_rate": 2e-05, + "loss": 0.3431, + "step": 50 + }, + { + "epoch": 0.52, + "grad_norm": 3.667490243911743, + "learning_rate": 2e-05, + "loss": 1.6956, + "step": 52 + }, + { + "epoch": 0.54, + "grad_norm": 10.96878433227539, + "learning_rate": 2e-05, + "loss": 1.092, + "step": 54 + }, + { + "epoch": 0.56, + "grad_norm": 4.746389865875244, + "learning_rate": 2e-05, + "loss": 0.7459, + "step": 56 + }, + { + "epoch": 0.58, + "grad_norm": 6.84367561340332, + "learning_rate": 2e-05, + "loss": 0.8944, + "step": 58 + }, + { + "epoch": 0.6, + "grad_norm": 1.7791221141815186, + "learning_rate": 2e-05, + "loss": 1.3167, + "step": 60 + }, + { + "epoch": 0.62, + "grad_norm": 4.828078746795654, + "learning_rate": 2e-05, + "loss": 1.8402, + "step": 62 + }, + { + "epoch": 0.64, + "grad_norm": 13.462433815002441, + "learning_rate": 2e-05, + "loss": 1.1954, + "step": 64 + }, + { + "epoch": 0.66, + "grad_norm": 9.061487197875977, + "learning_rate": 2e-05, + "loss": 1.31, + "step": 66 + }, + { + "epoch": 0.68, + "grad_norm": 1.7035795450210571, + "learning_rate": 2e-05, + "loss": 0.6877, + "step": 68 + }, + { + "epoch": 0.7, + "grad_norm": 7.510168075561523, + "learning_rate": 2e-05, + "loss": 1.5226, + "step": 70 + }, + { + "epoch": 0.72, + "grad_norm": 5.2935638427734375, + "learning_rate": 2e-05, + "loss": 1.5661, + "step": 72 + }, + { + "epoch": 0.74, + "grad_norm": 4.344234466552734, + "learning_rate": 2e-05, + "loss": 1.6382, + "step": 74 + }, + { + "epoch": 0.76, + "grad_norm": 3.743943691253662, + "learning_rate": 2e-05, + "loss": 0.713, + "step": 76 + }, + { + "epoch": 0.78, + "grad_norm": 8.068772315979004, + "learning_rate": 2e-05, + "loss": 1.1778, + "step": 78 + }, + { + "epoch": 0.8, + "grad_norm": 3.1857481002807617, + "learning_rate": 2e-05, + "loss": 0.9007, + "step": 80 + }, + { + "epoch": 0.82, + "grad_norm": 5.7237138748168945, + "learning_rate": 2e-05, + "loss": 2.0638, + "step": 82 + }, + { + "epoch": 0.84, + "grad_norm": 6.149637699127197, + "learning_rate": 2e-05, + "loss": 0.9916, + "step": 84 + }, + { + "epoch": 0.86, + "grad_norm": 2.0919957160949707, + "learning_rate": 2e-05, + "loss": 0.6464, + "step": 86 + }, + { + "epoch": 0.88, + "grad_norm": 3.3683154582977295, + "learning_rate": 2e-05, + "loss": 0.6144, + "step": 88 + }, + { + "epoch": 0.9, + "grad_norm": 4.998460292816162, + "learning_rate": 2e-05, + "loss": 0.8671, + "step": 90 + }, + { + "epoch": 0.92, + "grad_norm": 2.805094003677368, + "learning_rate": 2e-05, + "loss": 0.6894, + "step": 92 + }, + { + "epoch": 0.94, + "grad_norm": 2.4422361850738525, + "learning_rate": 2e-05, + "loss": 0.3568, + "step": 94 + }, + { + "epoch": 0.96, + "grad_norm": 3.961075782775879, + "learning_rate": 2e-05, + "loss": 0.8177, + "step": 96 + }, + { + "epoch": 0.98, + "grad_norm": 9.63961124420166, + "learning_rate": 2e-05, + "loss": 2.5556, + "step": 98 + }, + { + "epoch": 1.0, + "grad_norm": 2.2371578216552734, + "learning_rate": 2e-05, + "loss": 0.4797, + "step": 100 + }, + { + "epoch": 1.0, + "step": 100, + "total_flos": 2094816317931520.0, + "train_loss": 1.2341706466674804, + "train_runtime": 100.7881, + "train_samples_per_second": 3.969, + "train_steps_per_second": 0.992 + } + ], + "logging_steps": 2, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2094816317931520.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round10.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..443c7f795bc7ae4ea4747448f5b274cb03afca66 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20b7d2e033a4b3d1dd6203f1de7275223b49ea702986d1bcc4bc023f61efac5c +size 389170582 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round12.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..a92a55d2b17a77129ea78980716b3130d60010a1 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:448423352706cc1f24e4a54a1813344e0787aa887633b802cf0e89e39d7b71b4 +size 389170582 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round15.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..aecaf022796d563d174bdf66074ed7b736695d88 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb4840eadbd7ef1db67e9ceca295cc87f0fe1a605516db0cb6646e53aada077a +size 389170582 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round17.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..a48659b4346a3b8f6b5ca9c118bde2d7f9e2cc03 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22187ed7cdb78ce527269fc7896018eb44dd8bd197e63cb13b6dd630c05aac58 +size 389170582 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round2.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..0d63b3ccc018cc2041ca8777b91e6484a97ae0e3 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5a3056867ad1c4ca79975c0c9bace7ffb0bbb72c2de8042a61fdba76ee6983f +size 389170122 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round20.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..dfb08365ef1d381e2c675f95d84ee5c068a9f366 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:491ed5be2cd16c68c92a753673103ac5a7d180f35c90e218b096fcd9390e8090 +size 389170582 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round5.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..deb188552ccb0dd538a86d25fa1b29ce1fa27301 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a8b283879be4d8ee9c79c7127666354510b854e5227bb598eb78c61238b9f4b +size 389170122 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round7.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..7cbf93302ad23321db9498fd5ae56c6174fd63ee --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:000d4f0e9c4b4d86561759eb13ba6397ea829bb5be2c694c51bacc84bfcb746f +size 389170122 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_trainer_state.json b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a9d71c9ba0c48bc3ee8f3d4e38328dc16e32b458 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_trainer_state.json @@ -0,0 +1,392 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "grad_norm": 2.1811344623565674, + "learning_rate": 2e-05, + "loss": 0.9004, + "step": 2 + }, + { + "epoch": 0.04, + "grad_norm": 4.057499885559082, + "learning_rate": 2e-05, + "loss": 0.4477, + "step": 4 + }, + { + "epoch": 0.06, + "grad_norm": 0.4118136167526245, + "learning_rate": 2e-05, + "loss": 0.4494, + "step": 6 + }, + { + "epoch": 0.08, + "grad_norm": 1.3802590370178223, + "learning_rate": 2e-05, + "loss": 0.5171, + "step": 8 + }, + { + "epoch": 0.1, + "grad_norm": 3.8723020553588867, + "learning_rate": 2e-05, + "loss": 0.2846, + "step": 10 + }, + { + "epoch": 0.12, + "grad_norm": 1.808058738708496, + "learning_rate": 2e-05, + "loss": 0.7729, + "step": 12 + }, + { + "epoch": 0.14, + "grad_norm": 0.9369915127754211, + "learning_rate": 2e-05, + "loss": 0.4847, + "step": 14 + }, + { + "epoch": 0.16, + "grad_norm": 1.7796673774719238, + "learning_rate": 2e-05, + "loss": 0.7306, + "step": 16 + }, + { + "epoch": 0.18, + "grad_norm": 3.8881521224975586, + "learning_rate": 2e-05, + "loss": 0.7441, + "step": 18 + }, + { + "epoch": 0.2, + "grad_norm": 1.4085545539855957, + "learning_rate": 2e-05, + "loss": 0.6283, + "step": 20 + }, + { + "epoch": 0.22, + "grad_norm": 3.587538719177246, + "learning_rate": 2e-05, + "loss": 1.1968, + "step": 22 + }, + { + "epoch": 0.24, + "grad_norm": 1.6639175415039062, + "learning_rate": 2e-05, + "loss": 0.7579, + "step": 24 + }, + { + "epoch": 0.26, + "grad_norm": 2.090292453765869, + "learning_rate": 2e-05, + "loss": 1.3047, + "step": 26 + }, + { + "epoch": 0.28, + "grad_norm": 2.301203966140747, + "learning_rate": 2e-05, + "loss": 0.2738, + "step": 28 + }, + { + "epoch": 0.3, + "grad_norm": 3.2021005153656006, + "learning_rate": 2e-05, + "loss": 0.3979, + "step": 30 + }, + { + "epoch": 0.32, + "grad_norm": 6.288852691650391, + "learning_rate": 2e-05, + "loss": 1.585, + "step": 32 + }, + { + "epoch": 0.34, + "grad_norm": 4.023402214050293, + "learning_rate": 2e-05, + "loss": 1.1474, + "step": 34 + }, + { + "epoch": 0.36, + "grad_norm": 2.4434189796447754, + "learning_rate": 2e-05, + "loss": 0.2991, + "step": 36 + }, + { + "epoch": 0.38, + "grad_norm": 1.773408055305481, + "learning_rate": 2e-05, + "loss": 0.6277, + "step": 38 + }, + { + "epoch": 0.4, + "grad_norm": 2.1902174949645996, + "learning_rate": 2e-05, + "loss": 0.5112, + "step": 40 + }, + { + "epoch": 0.42, + "grad_norm": 0.60333251953125, + "learning_rate": 2e-05, + "loss": 0.2128, + "step": 42 + }, + { + "epoch": 0.44, + "grad_norm": 4.51239013671875, + "learning_rate": 2e-05, + "loss": 1.3767, + "step": 44 + }, + { + "epoch": 0.46, + "grad_norm": 0.47873491048812866, + "learning_rate": 2e-05, + "loss": 0.1478, + "step": 46 + }, + { + "epoch": 0.48, + "grad_norm": 2.3050646781921387, + "learning_rate": 2e-05, + "loss": 0.4301, + "step": 48 + }, + { + "epoch": 0.5, + "grad_norm": 1.4680174589157104, + "learning_rate": 2e-05, + "loss": 0.4868, + "step": 50 + }, + { + "epoch": 0.52, + "grad_norm": 5.060284614562988, + "learning_rate": 2e-05, + "loss": 1.4766, + "step": 52 + }, + { + "epoch": 0.54, + "grad_norm": 2.9290950298309326, + "learning_rate": 2e-05, + "loss": 0.521, + "step": 54 + }, + { + "epoch": 0.56, + "grad_norm": 3.033177614212036, + "learning_rate": 2e-05, + "loss": 0.8623, + "step": 56 + }, + { + "epoch": 0.58, + "grad_norm": 1.6711596250534058, + "learning_rate": 2e-05, + "loss": 0.6449, + "step": 58 + }, + { + "epoch": 0.6, + "grad_norm": 2.6369800567626953, + "learning_rate": 2e-05, + "loss": 0.7327, + "step": 60 + }, + { + "epoch": 0.62, + "grad_norm": 3.0031280517578125, + "learning_rate": 2e-05, + "loss": 0.5193, + "step": 62 + }, + { + "epoch": 0.64, + "grad_norm": 3.3499245643615723, + "learning_rate": 2e-05, + "loss": 0.4866, + "step": 64 + }, + { + "epoch": 0.66, + "grad_norm": 3.0874881744384766, + "learning_rate": 2e-05, + "loss": 0.8739, + "step": 66 + }, + { + "epoch": 0.68, + "grad_norm": 2.215085506439209, + "learning_rate": 2e-05, + "loss": 1.1589, + "step": 68 + }, + { + "epoch": 0.7, + "grad_norm": 1.7898951768875122, + "learning_rate": 2e-05, + "loss": 0.2845, + "step": 70 + }, + { + "epoch": 0.72, + "grad_norm": 2.463358163833618, + "learning_rate": 2e-05, + "loss": 0.3765, + "step": 72 + }, + { + "epoch": 0.74, + "grad_norm": 0.7925405502319336, + "learning_rate": 2e-05, + "loss": 0.3446, + "step": 74 + }, + { + "epoch": 0.76, + "grad_norm": 1.6757383346557617, + "learning_rate": 2e-05, + "loss": 0.7917, + "step": 76 + }, + { + "epoch": 0.78, + "grad_norm": 1.5198004245758057, + "learning_rate": 2e-05, + "loss": 0.3082, + "step": 78 + }, + { + "epoch": 0.8, + "grad_norm": 2.186577081680298, + "learning_rate": 2e-05, + "loss": 0.6401, + "step": 80 + }, + { + "epoch": 0.82, + "grad_norm": 0.8060414791107178, + "learning_rate": 2e-05, + "loss": 0.0603, + "step": 82 + }, + { + "epoch": 0.84, + "grad_norm": 2.0299384593963623, + "learning_rate": 2e-05, + "loss": 0.4283, + "step": 84 + }, + { + "epoch": 0.86, + "grad_norm": 1.539688229560852, + "learning_rate": 2e-05, + "loss": 0.2565, + "step": 86 + }, + { + "epoch": 0.88, + "grad_norm": 1.5283581018447876, + "learning_rate": 2e-05, + "loss": 0.1788, + "step": 88 + }, + { + "epoch": 0.9, + "grad_norm": 0.6798237562179565, + "learning_rate": 2e-05, + "loss": 0.7287, + "step": 90 + }, + { + "epoch": 0.92, + "grad_norm": 3.418360471725464, + "learning_rate": 2e-05, + "loss": 0.3788, + "step": 92 + }, + { + "epoch": 0.94, + "grad_norm": 0.48986631631851196, + "learning_rate": 2e-05, + "loss": 2.0909, + "step": 94 + }, + { + "epoch": 0.96, + "grad_norm": 0.2811102271080017, + "learning_rate": 2e-05, + "loss": 0.0683, + "step": 96 + }, + { + "epoch": 0.98, + "grad_norm": 1.9367895126342773, + "learning_rate": 2e-05, + "loss": 0.4261, + "step": 98 + }, + { + "epoch": 1.0, + "grad_norm": 2.887495279312134, + "learning_rate": 2e-05, + "loss": 0.4067, + "step": 100 + }, + { + "epoch": 1.0, + "step": 100, + "total_flos": 5689210367377408.0, + "train_loss": 0.6352152729034424, + "train_runtime": 162.5562, + "train_samples_per_second": 2.461, + "train_steps_per_second": 0.615 + } + ], + "logging_steps": 2, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5689210367377408.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round10.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..a9859f63b16292e8278110e67d72e3b29313715a --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07f03b0ba909c845d5b5b49c2c076935f837f2786c99faff543722d4798bbed1 +size 389170582 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round12.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..a9697c0ee3612c497a2f4142d197434e656a4fae --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71e00c2c325bfc7ee21add5d449c752326a25edbf3d18251963cb643297bb9ae +size 389170582 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round15.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..85af2f5cf8e6c34b6878efc0263141875ffcbeee --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fbba7266a66e0096e2065265be474ac545ee8600fc54ed14af585a8031e9726 +size 389170582 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round17.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..beecc8ccfaa5fb585dcd1aa4621c1998c1afe116 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6756fbbfec598b27becfb99f39fdd78b7398d2e636031e0b73a8e62aa614ad2b +size 389170582 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round2.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..73d50377541af34ea57aad76b73316e0c1e49468 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdc2aa4f97bd4e09072b338e21a46703e6b339f7d3a0baa7ac9633d09088b73b +size 389170122 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round20.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..f7e0bc61923a041ce951b910e5501795fd0b09e9 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b9ca1fdd3ee6a75d5c8dc5443920a6a766a5a2503ed5aa2994c3bf4237677b5 +size 389170582 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round5.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..34b742dbdbb3b2e5794321c0be4b50407db57a8c --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc03baa900d382298095be7d6486fc50bed3552c98c95319be3b5c689d2992f7 +size 389170122 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round7.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..85b09c39747e6ef06e64c3d93225333d2fa510b3 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2c85fed649fbdfec72152ae395eb733c1c89f947f67ed4bf9b3a561e4247365 +size 389170122 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_trainer_state.json b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a080351646e2b1ff13efb44f7f7848627e8cd9df --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_trainer_state.json @@ -0,0 +1,392 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "grad_norm": 1.1482053995132446, + "learning_rate": 2e-05, + "loss": 0.6585, + "step": 2 + }, + { + "epoch": 0.04, + "grad_norm": 0.10251256823539734, + "learning_rate": 2e-05, + "loss": 0.0361, + "step": 4 + }, + { + "epoch": 0.06, + "grad_norm": 0.4009135663509369, + "learning_rate": 2e-05, + "loss": 0.35, + "step": 6 + }, + { + "epoch": 0.08, + "grad_norm": 10.099437713623047, + "learning_rate": 2e-05, + "loss": 1.8198, + "step": 8 + }, + { + "epoch": 0.1, + "grad_norm": 1.0879929065704346, + "learning_rate": 2e-05, + "loss": 0.7817, + "step": 10 + }, + { + "epoch": 0.12, + "grad_norm": 0.4082857668399811, + "learning_rate": 2e-05, + "loss": 0.0341, + "step": 12 + }, + { + "epoch": 0.14, + "grad_norm": 1.0461204051971436, + "learning_rate": 2e-05, + "loss": 0.1428, + "step": 14 + }, + { + "epoch": 0.16, + "grad_norm": 1.7122026681900024, + "learning_rate": 2e-05, + "loss": 0.2265, + "step": 16 + }, + { + "epoch": 0.18, + "grad_norm": 0.14468756318092346, + "learning_rate": 2e-05, + "loss": 0.0429, + "step": 18 + }, + { + "epoch": 0.2, + "grad_norm": 7.697649002075195, + "learning_rate": 2e-05, + "loss": 0.2796, + "step": 20 + }, + { + "epoch": 0.22, + "grad_norm": 0.07512003183364868, + "learning_rate": 2e-05, + "loss": 0.8183, + "step": 22 + }, + { + "epoch": 0.24, + "grad_norm": 5.041003227233887, + "learning_rate": 2e-05, + "loss": 0.5998, + "step": 24 + }, + { + "epoch": 0.26, + "grad_norm": 1.0242588520050049, + "learning_rate": 2e-05, + "loss": 0.0833, + "step": 26 + }, + { + "epoch": 0.28, + "grad_norm": 1.6065642833709717, + "learning_rate": 2e-05, + "loss": 1.2503, + "step": 28 + }, + { + "epoch": 0.3, + "grad_norm": 3.5513150691986084, + "learning_rate": 2e-05, + "loss": 0.7646, + "step": 30 + }, + { + "epoch": 0.32, + "grad_norm": 1.7422484159469604, + "learning_rate": 2e-05, + "loss": 0.2032, + "step": 32 + }, + { + "epoch": 0.34, + "grad_norm": 6.78082275390625, + "learning_rate": 2e-05, + "loss": 1.5083, + "step": 34 + }, + { + "epoch": 0.36, + "grad_norm": 1.5644011497497559, + "learning_rate": 2e-05, + "loss": 0.2274, + "step": 36 + }, + { + "epoch": 0.38, + "grad_norm": 1.805795431137085, + "learning_rate": 2e-05, + "loss": 0.2896, + "step": 38 + }, + { + "epoch": 0.4, + "grad_norm": 3.5617361068725586, + "learning_rate": 2e-05, + "loss": 0.505, + "step": 40 + }, + { + "epoch": 0.42, + "grad_norm": 0.8971734046936035, + "learning_rate": 2e-05, + "loss": 0.1743, + "step": 42 + }, + { + "epoch": 0.44, + "grad_norm": 0.48634418845176697, + "learning_rate": 2e-05, + "loss": 0.0507, + "step": 44 + }, + { + "epoch": 0.46, + "grad_norm": 1.6985533237457275, + "learning_rate": 2e-05, + "loss": 1.176, + "step": 46 + }, + { + "epoch": 0.48, + "grad_norm": 0.19018730521202087, + "learning_rate": 2e-05, + "loss": 0.1552, + "step": 48 + }, + { + "epoch": 0.5, + "grad_norm": 1.2920739650726318, + "learning_rate": 2e-05, + "loss": 0.1081, + "step": 50 + }, + { + "epoch": 0.52, + "grad_norm": 2.077239751815796, + "learning_rate": 2e-05, + "loss": 0.2189, + "step": 52 + }, + { + "epoch": 0.54, + "grad_norm": 1.0245712995529175, + "learning_rate": 2e-05, + "loss": 0.0795, + "step": 54 + }, + { + "epoch": 0.56, + "grad_norm": 0.07209905982017517, + "learning_rate": 2e-05, + "loss": 0.1207, + "step": 56 + }, + { + "epoch": 0.58, + "grad_norm": 0.7158768177032471, + "learning_rate": 2e-05, + "loss": 0.1924, + "step": 58 + }, + { + "epoch": 0.6, + "grad_norm": 4.013988494873047, + "learning_rate": 2e-05, + "loss": 0.2579, + "step": 60 + }, + { + "epoch": 0.62, + "grad_norm": 0.11928291618824005, + "learning_rate": 2e-05, + "loss": 0.0947, + "step": 62 + }, + { + "epoch": 0.64, + "grad_norm": 1.9300360679626465, + "learning_rate": 2e-05, + "loss": 0.1745, + "step": 64 + }, + { + "epoch": 0.66, + "grad_norm": 4.016658782958984, + "learning_rate": 2e-05, + "loss": 0.6421, + "step": 66 + }, + { + "epoch": 0.68, + "grad_norm": 0.38495752215385437, + "learning_rate": 2e-05, + "loss": 0.026, + "step": 68 + }, + { + "epoch": 0.7, + "grad_norm": 4.238759994506836, + "learning_rate": 2e-05, + "loss": 0.2108, + "step": 70 + }, + { + "epoch": 0.72, + "grad_norm": 3.1998023986816406, + "learning_rate": 2e-05, + "loss": 0.1861, + "step": 72 + }, + { + "epoch": 0.74, + "grad_norm": 3.7027571201324463, + "learning_rate": 2e-05, + "loss": 0.4135, + "step": 74 + }, + { + "epoch": 0.76, + "grad_norm": 0.692491888999939, + "learning_rate": 2e-05, + "loss": 0.0296, + "step": 76 + }, + { + "epoch": 0.78, + "grad_norm": 1.4985527992248535, + "learning_rate": 2e-05, + "loss": 0.0506, + "step": 78 + }, + { + "epoch": 0.8, + "grad_norm": 0.21694600582122803, + "learning_rate": 2e-05, + "loss": 0.027, + "step": 80 + }, + { + "epoch": 0.82, + "grad_norm": 4.258615493774414, + "learning_rate": 2e-05, + "loss": 0.28, + "step": 82 + }, + { + "epoch": 0.84, + "grad_norm": 2.039672374725342, + "learning_rate": 2e-05, + "loss": 1.2655, + "step": 84 + }, + { + "epoch": 0.86, + "grad_norm": 2.7291083335876465, + "learning_rate": 2e-05, + "loss": 0.15, + "step": 86 + }, + { + "epoch": 0.88, + "grad_norm": 0.031074684113264084, + "learning_rate": 2e-05, + "loss": 2.2991, + "step": 88 + }, + { + "epoch": 0.9, + "grad_norm": 6.191506862640381, + "learning_rate": 2e-05, + "loss": 1.6582, + "step": 90 + }, + { + "epoch": 0.92, + "grad_norm": 6.091362476348877, + "learning_rate": 2e-05, + "loss": 2.1908, + "step": 92 + }, + { + "epoch": 0.94, + "grad_norm": 2.5475945472717285, + "learning_rate": 2e-05, + "loss": 0.1938, + "step": 94 + }, + { + "epoch": 0.96, + "grad_norm": 0.4129593074321747, + "learning_rate": 2e-05, + "loss": 0.0384, + "step": 96 + }, + { + "epoch": 0.98, + "grad_norm": 1.8603883981704712, + "learning_rate": 2e-05, + "loss": 0.3901, + "step": 98 + }, + { + "epoch": 1.0, + "grad_norm": 0.10186377167701721, + "learning_rate": 2e-05, + "loss": 0.0303, + "step": 100 + }, + { + "epoch": 1.0, + "step": 100, + "total_flos": 4971655331840000.0, + "train_loss": 0.47013757705688475, + "train_runtime": 159.8993, + "train_samples_per_second": 2.502, + "train_steps_per_second": 0.625 + } + ], + "logging_steps": 2, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4971655331840000.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round10.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..63cf2546cb6dbb40d542363f23e8f73242c779e6 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc16fcc76e9cc5cecd2d06e22ad078e98b9f01d6f5c18cd907deb92e55a462da +size 389170582 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round12.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..abfc14792c55dbfca2d3d1fe8b51f3e577df5220 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfcf89f7b68a6f9102ddaa073b8806ac05b3e2bc582d7a3e51d5963512a191ae +size 389170582 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round15.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..cea15076a19c82c0c7eeff9f85734cd46bb78ce1 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f0c6fc862564688392c28adcd58a7a37d87820998602d44ad7680a33c918d17 +size 389170582 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round17.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..8f6afd9585435543b98bc218937c5922df663101 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:275ddc49e669f477dd1d96c7d9f473cd6fdd15a79606b583f6782cc2fe3beeae +size 389170582 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round2.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..e3e321455fd27e2b2bc792d6458719d2ca6d8d25 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:780fcd382781b864462bda5e1c3ffaf3e913195ac49a916d5b4fda5050622589 +size 389170122 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round20.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..3cc30ae0b541963d090e326814ddf5d68d5fb871 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1574a5a0af897db076335e1368a7abb640d402c7d13d94db2359b346d537928b +size 389170582 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round5.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..e683f308cc3ea8ba2825e41bee4939301d89353f --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f09a69c8c602d4c94642bebf82fba3207e87a3466ce37d5c0b66dc98368e57d0 +size 389170122 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round7.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..891997ade6f5b7cee14ed741e5cf2f326c051fea --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c3d5942e560e438bf7d2e5fb08628fe7e0dea67dfcc97c6d9a527fd32699807 +size 389170122 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_trainer_state.json b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a993c489da2a828d54a41aed7f8fa5583feee798 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/6_trainer_state.json @@ -0,0 +1,392 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "grad_norm": 1.9298248291015625, + "learning_rate": 2e-05, + "loss": 0.5905, + "step": 2 + }, + { + "epoch": 0.04, + "grad_norm": 3.637150287628174, + "learning_rate": 2e-05, + "loss": 0.6498, + "step": 4 + }, + { + "epoch": 0.06, + "grad_norm": 4.745145797729492, + "learning_rate": 2e-05, + "loss": 0.9514, + "step": 6 + }, + { + "epoch": 0.08, + "grad_norm": 4.834471225738525, + "learning_rate": 2e-05, + "loss": 1.2548, + "step": 8 + }, + { + "epoch": 0.1, + "grad_norm": 2.430112600326538, + "learning_rate": 2e-05, + "loss": 0.4672, + "step": 10 + }, + { + "epoch": 0.12, + "grad_norm": 1.308264970779419, + "learning_rate": 2e-05, + "loss": 0.5229, + "step": 12 + }, + { + "epoch": 0.14, + "grad_norm": 2.7554006576538086, + "learning_rate": 2e-05, + "loss": 1.2086, + "step": 14 + }, + { + "epoch": 0.16, + "grad_norm": 2.26322340965271, + "learning_rate": 2e-05, + "loss": 1.1104, + "step": 16 + }, + { + "epoch": 0.18, + "grad_norm": 1.7159513235092163, + "learning_rate": 2e-05, + "loss": 0.5444, + "step": 18 + }, + { + "epoch": 0.2, + "grad_norm": 1.4893906116485596, + "learning_rate": 2e-05, + "loss": 0.7912, + "step": 20 + }, + { + "epoch": 0.22, + "grad_norm": 1.916092038154602, + "learning_rate": 2e-05, + "loss": 0.5565, + "step": 22 + }, + { + "epoch": 0.24, + "grad_norm": 0.9620460271835327, + "learning_rate": 2e-05, + "loss": 0.467, + "step": 24 + }, + { + "epoch": 0.26, + "grad_norm": 1.2611125707626343, + "learning_rate": 2e-05, + "loss": 0.6207, + "step": 26 + }, + { + "epoch": 0.28, + "grad_norm": 1.7048827409744263, + "learning_rate": 2e-05, + "loss": 0.5756, + "step": 28 + }, + { + "epoch": 0.3, + "grad_norm": 3.607403516769409, + "learning_rate": 2e-05, + "loss": 0.9155, + "step": 30 + }, + { + "epoch": 0.32, + "grad_norm": 1.767032265663147, + "learning_rate": 2e-05, + "loss": 0.8582, + "step": 32 + }, + { + "epoch": 0.34, + "grad_norm": 0.685822606086731, + "learning_rate": 2e-05, + "loss": 0.0693, + "step": 34 + }, + { + "epoch": 0.36, + "grad_norm": 2.8706161975860596, + "learning_rate": 2e-05, + "loss": 0.5083, + "step": 36 + }, + { + "epoch": 0.38, + "grad_norm": 2.9342684745788574, + "learning_rate": 2e-05, + "loss": 1.6817, + "step": 38 + }, + { + "epoch": 0.4, + "grad_norm": 2.532771110534668, + "learning_rate": 2e-05, + "loss": 1.1299, + "step": 40 + }, + { + "epoch": 0.42, + "grad_norm": 5.263404846191406, + "learning_rate": 2e-05, + "loss": 0.989, + "step": 42 + }, + { + "epoch": 0.44, + "grad_norm": 6.652492046356201, + "learning_rate": 2e-05, + "loss": 2.2123, + "step": 44 + }, + { + "epoch": 0.46, + "grad_norm": 6.186675548553467, + "learning_rate": 2e-05, + "loss": 0.9922, + "step": 46 + }, + { + "epoch": 0.48, + "grad_norm": 3.8063266277313232, + "learning_rate": 2e-05, + "loss": 1.0285, + "step": 48 + }, + { + "epoch": 0.5, + "grad_norm": 3.6435630321502686, + "learning_rate": 2e-05, + "loss": 0.6749, + "step": 50 + }, + { + "epoch": 0.52, + "grad_norm": 5.524310111999512, + "learning_rate": 2e-05, + "loss": 1.2993, + "step": 52 + }, + { + "epoch": 0.54, + "grad_norm": 2.8274648189544678, + "learning_rate": 2e-05, + "loss": 1.2563, + "step": 54 + }, + { + "epoch": 0.56, + "grad_norm": 2.228874921798706, + "learning_rate": 2e-05, + "loss": 1.1229, + "step": 56 + }, + { + "epoch": 0.58, + "grad_norm": 1.3959927558898926, + "learning_rate": 2e-05, + "loss": 1.0526, + "step": 58 + }, + { + "epoch": 0.6, + "grad_norm": 1.1264957189559937, + "learning_rate": 2e-05, + "loss": 0.2993, + "step": 60 + }, + { + "epoch": 0.62, + "grad_norm": 1.9942477941513062, + "learning_rate": 2e-05, + "loss": 0.7451, + "step": 62 + }, + { + "epoch": 0.64, + "grad_norm": 1.8597582578659058, + "learning_rate": 2e-05, + "loss": 0.7528, + "step": 64 + }, + { + "epoch": 0.66, + "grad_norm": 2.060844898223877, + "learning_rate": 2e-05, + "loss": 1.4592, + "step": 66 + }, + { + "epoch": 0.68, + "grad_norm": 2.179786443710327, + "learning_rate": 2e-05, + "loss": 0.7638, + "step": 68 + }, + { + "epoch": 0.7, + "grad_norm": 1.138787865638733, + "learning_rate": 2e-05, + "loss": 0.6381, + "step": 70 + }, + { + "epoch": 0.72, + "grad_norm": 5.934345722198486, + "learning_rate": 2e-05, + "loss": 1.2625, + "step": 72 + }, + { + "epoch": 0.74, + "grad_norm": 2.8126211166381836, + "learning_rate": 2e-05, + "loss": 1.3784, + "step": 74 + }, + { + "epoch": 0.76, + "grad_norm": 1.2480483055114746, + "learning_rate": 2e-05, + "loss": 0.8033, + "step": 76 + }, + { + "epoch": 0.78, + "grad_norm": 1.2717947959899902, + "learning_rate": 2e-05, + "loss": 0.6094, + "step": 78 + }, + { + "epoch": 0.8, + "grad_norm": 3.9040071964263916, + "learning_rate": 2e-05, + "loss": 0.9589, + "step": 80 + }, + { + "epoch": 0.82, + "grad_norm": 2.760758876800537, + "learning_rate": 2e-05, + "loss": 0.8228, + "step": 82 + }, + { + "epoch": 0.84, + "grad_norm": 4.190299034118652, + "learning_rate": 2e-05, + "loss": 1.0774, + "step": 84 + }, + { + "epoch": 0.86, + "grad_norm": 9.107547760009766, + "learning_rate": 2e-05, + "loss": 1.5183, + "step": 86 + }, + { + "epoch": 0.88, + "grad_norm": 2.0416154861450195, + "learning_rate": 2e-05, + "loss": 0.6354, + "step": 88 + }, + { + "epoch": 0.9, + "grad_norm": 2.5585086345672607, + "learning_rate": 2e-05, + "loss": 1.0353, + "step": 90 + }, + { + "epoch": 0.92, + "grad_norm": 4.223822593688965, + "learning_rate": 2e-05, + "loss": 1.3008, + "step": 92 + }, + { + "epoch": 0.94, + "grad_norm": 5.013917922973633, + "learning_rate": 2e-05, + "loss": 1.1085, + "step": 94 + }, + { + "epoch": 0.96, + "grad_norm": 2.1509475708007812, + "learning_rate": 2e-05, + "loss": 0.9013, + "step": 96 + }, + { + "epoch": 0.98, + "grad_norm": 1.1799132823944092, + "learning_rate": 2e-05, + "loss": 1.2805, + "step": 98 + }, + { + "epoch": 1.0, + "grad_norm": 1.3587478399276733, + "learning_rate": 2e-05, + "loss": 0.4385, + "step": 100 + }, + { + "epoch": 1.0, + "step": 100, + "total_flos": 9838454674292736.0, + "train_loss": 0.9178249549865722, + "train_runtime": 193.7032, + "train_samples_per_second": 2.065, + "train_steps_per_second": 0.516 + } + ], + "logging_steps": 2, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 9838454674292736.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round10.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..04d87afa883320544deb0ad4f4fb71a4eb4909a0 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:214a9008ba0716c22fed84fe035bbe0a9bd67060a1b21fc895aa04a0cb739d8d +size 389170582 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round12.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..1ab5c55429689fd3130c7a915636eae98590f9dd --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2007de25c09f2bbf673d90649e85d0bb1237e9dc0ebb2d86bd5fe28c4404385c +size 389170582 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round15.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..db53f53d8d47cd46c3c2c3bc1fd168a88c725a65 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94567f6491c7d07e0ff8fa33fcd24e76bdb15b17e2bcb4867862e2a5fee811b2 +size 389170582 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round17.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..d0212c28157dce60169f523398955f7acb9a3696 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8609bf7891e2d0bb8a0f471227a31eb262bbc72f17ebab41b9b66b07debaaa3 +size 389170582 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round2.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..700f7ee97ccdfb57f91168fe9d6c8b73e54a94b1 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2e397bb755f237a0e2c111e6d00871708cda47e637ee86d94575cd2e7b586e1 +size 389170122 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round20.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..7a5c87203488d51c3505d44660a4246685b05443 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0b56dc34c6e076c2e164ddfcf8abc9fbdedd97aa79982ff4701b12de0f91d5f +size 389170582 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round5.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..cecfbddbd40e6d1a75c1f6180b9e756e75e6ffef --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f4bb072f398281ef6eff5ca32bb889699fe04fad79e7a17de45bb764c13755a +size 389170122 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round7.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..40765b7bf85af5d3d69f881dfb8fd7c0ba1be78e --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cff7a85f1b4190840b565468cf7eec476578a22c85a81640739360e1c6c2f7cb +size 389170122 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_trainer_state.json b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f28802748df45439e332c66f477980dae132ed44 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/7_trainer_state.json @@ -0,0 +1,392 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "grad_norm": 2.7925281524658203, + "learning_rate": 2e-05, + "loss": 0.6299, + "step": 2 + }, + { + "epoch": 0.04, + "grad_norm": 1.7250635623931885, + "learning_rate": 2e-05, + "loss": 0.5827, + "step": 4 + }, + { + "epoch": 0.06, + "grad_norm": 2.3938684463500977, + "learning_rate": 2e-05, + "loss": 0.1307, + "step": 6 + }, + { + "epoch": 0.08, + "grad_norm": 0.6948806643486023, + "learning_rate": 2e-05, + "loss": 0.0794, + "step": 8 + }, + { + "epoch": 0.1, + "grad_norm": 0.6617633700370789, + "learning_rate": 2e-05, + "loss": 0.9504, + "step": 10 + }, + { + "epoch": 0.12, + "grad_norm": 2.9368467330932617, + "learning_rate": 2e-05, + "loss": 1.0071, + "step": 12 + }, + { + "epoch": 0.14, + "grad_norm": 0.35732072591781616, + "learning_rate": 2e-05, + "loss": 0.0363, + "step": 14 + }, + { + "epoch": 0.16, + "grad_norm": 0.1250218152999878, + "learning_rate": 2e-05, + "loss": 1.1337, + "step": 16 + }, + { + "epoch": 0.18, + "grad_norm": 4.960132598876953, + "learning_rate": 2e-05, + "loss": 0.6449, + "step": 18 + }, + { + "epoch": 0.2, + "grad_norm": 9.529942512512207, + "learning_rate": 2e-05, + "loss": 1.7827, + "step": 20 + }, + { + "epoch": 0.22, + "grad_norm": 8.95326042175293, + "learning_rate": 2e-05, + "loss": 0.9447, + "step": 22 + }, + { + "epoch": 0.24, + "grad_norm": 0.7850130200386047, + "learning_rate": 2e-05, + "loss": 0.0618, + "step": 24 + }, + { + "epoch": 0.26, + "grad_norm": 2.4583945274353027, + "learning_rate": 2e-05, + "loss": 0.2841, + "step": 26 + }, + { + "epoch": 0.28, + "grad_norm": 1.0075058937072754, + "learning_rate": 2e-05, + "loss": 0.3527, + "step": 28 + }, + { + "epoch": 0.3, + "grad_norm": 0.30375605821609497, + "learning_rate": 2e-05, + "loss": 0.6464, + "step": 30 + }, + { + "epoch": 0.32, + "grad_norm": 3.8809680938720703, + "learning_rate": 2e-05, + "loss": 0.7876, + "step": 32 + }, + { + "epoch": 0.34, + "grad_norm": 3.606328010559082, + "learning_rate": 2e-05, + "loss": 0.241, + "step": 34 + }, + { + "epoch": 0.36, + "grad_norm": 1.6751177310943604, + "learning_rate": 2e-05, + "loss": 0.6661, + "step": 36 + }, + { + "epoch": 0.38, + "grad_norm": 2.4326398372650146, + "learning_rate": 2e-05, + "loss": 0.4689, + "step": 38 + }, + { + "epoch": 0.4, + "grad_norm": 2.1558163166046143, + "learning_rate": 2e-05, + "loss": 0.2388, + "step": 40 + }, + { + "epoch": 0.42, + "grad_norm": 0.289340078830719, + "learning_rate": 2e-05, + "loss": 0.0322, + "step": 42 + }, + { + "epoch": 0.44, + "grad_norm": 0.4544844627380371, + "learning_rate": 2e-05, + "loss": 0.0496, + "step": 44 + }, + { + "epoch": 0.46, + "grad_norm": 0.5207415819168091, + "learning_rate": 2e-05, + "loss": 0.3926, + "step": 46 + }, + { + "epoch": 0.48, + "grad_norm": 0.3023507297039032, + "learning_rate": 2e-05, + "loss": 0.3016, + "step": 48 + }, + { + "epoch": 0.5, + "grad_norm": 1.8550446033477783, + "learning_rate": 2e-05, + "loss": 0.4926, + "step": 50 + }, + { + "epoch": 0.52, + "grad_norm": 1.065987467765808, + "learning_rate": 2e-05, + "loss": 0.3108, + "step": 52 + }, + { + "epoch": 0.54, + "grad_norm": 3.2929720878601074, + "learning_rate": 2e-05, + "loss": 0.3461, + "step": 54 + }, + { + "epoch": 0.56, + "grad_norm": 3.499336004257202, + "learning_rate": 2e-05, + "loss": 0.2239, + "step": 56 + }, + { + "epoch": 0.58, + "grad_norm": 0.3920607566833496, + "learning_rate": 2e-05, + "loss": 0.0706, + "step": 58 + }, + { + "epoch": 0.6, + "grad_norm": 3.887545585632324, + "learning_rate": 2e-05, + "loss": 0.681, + "step": 60 + }, + { + "epoch": 0.62, + "grad_norm": 2.0383825302124023, + "learning_rate": 2e-05, + "loss": 0.2141, + "step": 62 + }, + { + "epoch": 0.64, + "grad_norm": 0.0876418873667717, + "learning_rate": 2e-05, + "loss": 0.0275, + "step": 64 + }, + { + "epoch": 0.66, + "grad_norm": 0.5378940105438232, + "learning_rate": 2e-05, + "loss": 0.0922, + "step": 66 + }, + { + "epoch": 0.68, + "grad_norm": 7.811489582061768, + "learning_rate": 2e-05, + "loss": 1.6039, + "step": 68 + }, + { + "epoch": 0.7, + "grad_norm": 1.6971862316131592, + "learning_rate": 2e-05, + "loss": 1.407, + "step": 70 + }, + { + "epoch": 0.72, + "grad_norm": 5.543071746826172, + "learning_rate": 2e-05, + "loss": 1.4755, + "step": 72 + }, + { + "epoch": 0.74, + "grad_norm": 1.4514342546463013, + "learning_rate": 2e-05, + "loss": 0.0743, + "step": 74 + }, + { + "epoch": 0.76, + "grad_norm": 0.3986780047416687, + "learning_rate": 2e-05, + "loss": 0.0501, + "step": 76 + }, + { + "epoch": 0.78, + "grad_norm": 4.3974456787109375, + "learning_rate": 2e-05, + "loss": 0.4735, + "step": 78 + }, + { + "epoch": 0.8, + "grad_norm": 3.485053300857544, + "learning_rate": 2e-05, + "loss": 1.271, + "step": 80 + }, + { + "epoch": 0.82, + "grad_norm": 1.9003517627716064, + "learning_rate": 2e-05, + "loss": 0.8555, + "step": 82 + }, + { + "epoch": 0.84, + "grad_norm": 2.020681381225586, + "learning_rate": 2e-05, + "loss": 0.122, + "step": 84 + }, + { + "epoch": 0.86, + "grad_norm": 2.3439993858337402, + "learning_rate": 2e-05, + "loss": 0.3552, + "step": 86 + }, + { + "epoch": 0.88, + "grad_norm": 5.957539081573486, + "learning_rate": 2e-05, + "loss": 0.9644, + "step": 88 + }, + { + "epoch": 0.9, + "grad_norm": 1.4940080642700195, + "learning_rate": 2e-05, + "loss": 1.0023, + "step": 90 + }, + { + "epoch": 0.92, + "grad_norm": 2.1957061290740967, + "learning_rate": 2e-05, + "loss": 0.3014, + "step": 92 + }, + { + "epoch": 0.94, + "grad_norm": 3.0871853828430176, + "learning_rate": 2e-05, + "loss": 0.235, + "step": 94 + }, + { + "epoch": 0.96, + "grad_norm": 4.092676639556885, + "learning_rate": 2e-05, + "loss": 0.496, + "step": 96 + }, + { + "epoch": 0.98, + "grad_norm": 2.393658399581909, + "learning_rate": 2e-05, + "loss": 0.1283, + "step": 98 + }, + { + "epoch": 1.0, + "grad_norm": 0.8805527687072754, + "learning_rate": 2e-05, + "loss": 0.2138, + "step": 100 + }, + { + "epoch": 1.0, + "step": 100, + "total_flos": 5153325964066816.0, + "train_loss": 0.5186852896213532, + "train_runtime": 160.9571, + "train_samples_per_second": 2.485, + "train_steps_per_second": 0.621 + } + ], + "logging_steps": 2, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5153325964066816.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round10.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..be6c168e7f5308eb71b75b41193c4269697afd30 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08cde12864e635a9b74445654cdd107c6853fa9d57a3cabebadc89dc6a3a68dd +size 389170582 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round12.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..054bcbf09225aa555d902de15459f1dae2b101f0 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c9eca84ca53004cc1d1784d9842e10c9e354e21210f490b67a4207b6a94326f +size 389170582 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round15.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..d8dd895cfc1fc06b2f4ab338a8cd026cefe67109 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8ec6efb5d9eb19b6a00a5085d4b2a2ebe355cd78e025e3c01e6db846f869b06 +size 389170582 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round17.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..3526cda7c055201b299e453c0490c3d6882430fa --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49778ab6fca6a6f032c5a6c55925cb87f28be76745a76f66a8d575677c122f68 +size 389170582 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round2.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..e7843c1d8621d63326dd4bebbd638a77f3158671 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3415f9db187b44607d7e73f286893f9576d5891637bf094acfb5f88261bdf0d +size 389170122 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round20.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..850ba644dbdafb531106b806801ae71a69cba90d --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:628eee822175f7accea5d4657f7a1e13fe3be5524bd73f944939f4d2878b5a8c +size 389170582 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round5.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..b119a4c539de69e071c5128fe8521a3fe725a29c --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:445953dd44e5b0c30c19a9c97d5d5209b85d9e8412b789f78f45e711cd198909 +size 389170122 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round7.pth b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..c415902e608a4810edbdd45c6b49ac0eecd783a4 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b467d420f8d03ec8c23c4b32a771f4db978b4f90f8efc86a161e6b61055bbf1 +size 389170122 diff --git a/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_trainer_state.json b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e69f2d83933108cbcbb1ff033e1299977f5989d6 --- /dev/null +++ b/client_states_fedavg_hetero_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/8_trainer_state.json @@ -0,0 +1,392 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "grad_norm": 1.063376784324646, + "learning_rate": 2e-05, + "loss": 0.1714, + "step": 2 + }, + { + "epoch": 0.04, + "grad_norm": 0.31672921776771545, + "learning_rate": 2e-05, + "loss": 0.7854, + "step": 4 + }, + { + "epoch": 0.06, + "grad_norm": 0.019425930455327034, + "learning_rate": 2e-05, + "loss": 0.0223, + "step": 6 + }, + { + "epoch": 0.08, + "grad_norm": 0.14177998900413513, + "learning_rate": 2e-05, + "loss": 0.013, + "step": 8 + }, + { + "epoch": 0.1, + "grad_norm": 3.456772565841675, + "learning_rate": 2e-05, + "loss": 0.6256, + "step": 10 + }, + { + "epoch": 0.12, + "grad_norm": 0.7389208078384399, + "learning_rate": 2e-05, + "loss": 0.1244, + "step": 12 + }, + { + "epoch": 0.14, + "grad_norm": 0.18092218041419983, + "learning_rate": 2e-05, + "loss": 0.2427, + "step": 14 + }, + { + "epoch": 0.16, + "grad_norm": 0.4189036190509796, + "learning_rate": 2e-05, + "loss": 0.4283, + "step": 16 + }, + { + "epoch": 0.18, + "grad_norm": 0.39719855785369873, + "learning_rate": 2e-05, + "loss": 0.0212, + "step": 18 + }, + { + "epoch": 0.2, + "grad_norm": 1.8617382049560547, + "learning_rate": 2e-05, + "loss": 0.1222, + "step": 20 + }, + { + "epoch": 0.22, + "grad_norm": 0.1726214736700058, + "learning_rate": 2e-05, + "loss": 0.0236, + "step": 22 + }, + { + "epoch": 0.24, + "grad_norm": 1.587731957435608, + "learning_rate": 2e-05, + "loss": 0.2377, + "step": 24 + }, + { + "epoch": 0.26, + "grad_norm": 1.0266427993774414, + "learning_rate": 2e-05, + "loss": 0.0387, + "step": 26 + }, + { + "epoch": 0.28, + "grad_norm": 4.824191570281982, + "learning_rate": 2e-05, + "loss": 0.7857, + "step": 28 + }, + { + "epoch": 0.3, + "grad_norm": 0.08748485893011093, + "learning_rate": 2e-05, + "loss": 0.079, + "step": 30 + }, + { + "epoch": 0.32, + "grad_norm": 2.122347831726074, + "learning_rate": 2e-05, + "loss": 0.2197, + "step": 32 + }, + { + "epoch": 0.34, + "grad_norm": 0.5977698564529419, + "learning_rate": 2e-05, + "loss": 0.076, + "step": 34 + }, + { + "epoch": 0.36, + "grad_norm": 0.32403549551963806, + "learning_rate": 2e-05, + "loss": 0.5746, + "step": 36 + }, + { + "epoch": 0.38, + "grad_norm": 1.8251796960830688, + "learning_rate": 2e-05, + "loss": 0.1333, + "step": 38 + }, + { + "epoch": 0.4, + "grad_norm": 4.036827564239502, + "learning_rate": 2e-05, + "loss": 1.1427, + "step": 40 + }, + { + "epoch": 0.42, + "grad_norm": 0.07654375582933426, + "learning_rate": 2e-05, + "loss": 0.4196, + "step": 42 + }, + { + "epoch": 0.44, + "grad_norm": 0.17075762152671814, + "learning_rate": 2e-05, + "loss": 0.0322, + "step": 44 + }, + { + "epoch": 0.46, + "grad_norm": 0.9724357724189758, + "learning_rate": 2e-05, + "loss": 0.0473, + "step": 46 + }, + { + "epoch": 0.48, + "grad_norm": 1.083470106124878, + "learning_rate": 2e-05, + "loss": 0.7292, + "step": 48 + }, + { + "epoch": 0.5, + "grad_norm": 2.4008989334106445, + "learning_rate": 2e-05, + "loss": 0.2104, + "step": 50 + }, + { + "epoch": 0.52, + "grad_norm": 0.28259167075157166, + "learning_rate": 2e-05, + "loss": 0.0379, + "step": 52 + }, + { + "epoch": 0.54, + "grad_norm": 7.950784683227539, + "learning_rate": 2e-05, + "loss": 1.2918, + "step": 54 + }, + { + "epoch": 0.56, + "grad_norm": 0.22562403976917267, + "learning_rate": 2e-05, + "loss": 0.0409, + "step": 56 + }, + { + "epoch": 0.58, + "grad_norm": 2.43168568611145, + "learning_rate": 2e-05, + "loss": 0.1244, + "step": 58 + }, + { + "epoch": 0.6, + "grad_norm": 1.0309669971466064, + "learning_rate": 2e-05, + "loss": 0.0742, + "step": 60 + }, + { + "epoch": 0.62, + "grad_norm": 0.76719731092453, + "learning_rate": 2e-05, + "loss": 0.0665, + "step": 62 + }, + { + "epoch": 0.64, + "grad_norm": 1.9494340419769287, + "learning_rate": 2e-05, + "loss": 0.3996, + "step": 64 + }, + { + "epoch": 0.66, + "grad_norm": 1.590673565864563, + "learning_rate": 2e-05, + "loss": 0.3177, + "step": 66 + }, + { + "epoch": 0.68, + "grad_norm": 0.02313467301428318, + "learning_rate": 2e-05, + "loss": 0.031, + "step": 68 + }, + { + "epoch": 0.7, + "grad_norm": 0.8836857676506042, + "learning_rate": 2e-05, + "loss": 0.3507, + "step": 70 + }, + { + "epoch": 0.72, + "grad_norm": 0.46410471200942993, + "learning_rate": 2e-05, + "loss": 0.2623, + "step": 72 + }, + { + "epoch": 0.74, + "grad_norm": 0.7918888330459595, + "learning_rate": 2e-05, + "loss": 0.0959, + "step": 74 + }, + { + "epoch": 0.76, + "grad_norm": 0.08353512734174728, + "learning_rate": 2e-05, + "loss": 0.1951, + "step": 76 + }, + { + "epoch": 0.78, + "grad_norm": 0.790967583656311, + "learning_rate": 2e-05, + "loss": 0.1009, + "step": 78 + }, + { + "epoch": 0.8, + "grad_norm": 0.17255111038684845, + "learning_rate": 2e-05, + "loss": 0.3294, + "step": 80 + }, + { + "epoch": 0.82, + "grad_norm": 8.186681747436523, + "learning_rate": 2e-05, + "loss": 2.107, + "step": 82 + }, + { + "epoch": 0.84, + "grad_norm": 0.12467063218355179, + "learning_rate": 2e-05, + "loss": 0.0137, + "step": 84 + }, + { + "epoch": 0.86, + "grad_norm": 0.18037767708301544, + "learning_rate": 2e-05, + "loss": 0.2903, + "step": 86 + }, + { + "epoch": 0.88, + "grad_norm": 0.060748614370822906, + "learning_rate": 2e-05, + "loss": 0.012, + "step": 88 + }, + { + "epoch": 0.9, + "grad_norm": 2.207310438156128, + "learning_rate": 2e-05, + "loss": 0.1221, + "step": 90 + }, + { + "epoch": 0.92, + "grad_norm": 3.8041112422943115, + "learning_rate": 2e-05, + "loss": 0.9644, + "step": 92 + }, + { + "epoch": 0.94, + "grad_norm": 4.548851013183594, + "learning_rate": 2e-05, + "loss": 0.8892, + "step": 94 + }, + { + "epoch": 0.96, + "grad_norm": 0.18116363883018494, + "learning_rate": 2e-05, + "loss": 0.025, + "step": 96 + }, + { + "epoch": 0.98, + "grad_norm": 2.5681633949279785, + "learning_rate": 2e-05, + "loss": 0.1741, + "step": 98 + }, + { + "epoch": 1.0, + "grad_norm": 3.001899003982544, + "learning_rate": 2e-05, + "loss": 0.6574, + "step": 100 + }, + { + "epoch": 1.0, + "step": 100, + "total_flos": 5015595028316160.0, + "train_loss": 0.32558555364608766, + "train_runtime": 161.6528, + "train_samples_per_second": 2.474, + "train_steps_per_second": 0.619 + } + ], + "logging_steps": 2, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5015595028316160.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}