diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round10.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..8c0b917b7bdff379540cffb33638bde6525e26d8 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fe1dbc65f417209d09344d4e4995df5d78c5eef26666b716f886d90d26de53b +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round12.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..3981e7243f8f20a669737e3cd0d273e66f3af2d6 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0166898c5d59908d3d4e636b1e8705ab438946e36b7f214e4c120674652f0647 +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round15.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..48f68fb7ece9e3bc67d7753db25bc44a05ceb071 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:748f75b79de639a2a4af53a70cce66a8ca2082d031f11a4e62a32187924c4242 +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round17.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..a4879b301c2a3f91c31b264467db72e6d9305dae --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf98a0fa6bcb6b3f4639a9d3ce8d0d4bc9f306fc5c94fe909c335ed5961f3391 +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round2.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..f52b080599e3d7cfff67416e628808b6c519b0fd --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d679df9ca797825f3a82c1c09077f75eeace1c8de08ab30c5fa922069db709e1 +size 778341034 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round20.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9ca68fb541a1822f21d41409d661a46de61e761 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b197e79ebb6d8ae64ec6739b5cbf3cb2c42e1e175ddd75ee657f36a5e8f23cd +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round5.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f97fbe5fc3f9b262d964318a6550a819dc01abd --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efc8ee78f3b179428383f77c08c7e1005b657efa43a78e8a1acbfab15a6f4244 +size 778341034 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round7.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..694555e7efaaa99a0a8af6b72f932ab5ea6908df --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0b0c72c61440e9ae802fcad9652d631f5f56aa289e047e253f7ee81d8019dfe +size 778341034 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_trainer_state.json b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7f5525c9152eefd1b62838774e5902d64b1a18d1 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_trainer_state.json @@ -0,0 +1,217 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "grad_norm": 0.7212343215942383, + "learning_rate": 2e-05, + "loss": 0.0439, + "step": 2 + }, + { + "epoch": 0.16, + "grad_norm": 11.383496284484863, + "learning_rate": 2e-05, + "loss": 0.5268, + "step": 4 + }, + { + "epoch": 0.24, + "grad_norm": 0.927216112613678, + "learning_rate": 2e-05, + "loss": 0.337, + "step": 6 + }, + { + "epoch": 0.32, + "grad_norm": 3.203252077102661, + "learning_rate": 2e-05, + "loss": 0.3553, + "step": 8 + }, + { + "epoch": 0.4, + "grad_norm": 0.08366145938634872, + "learning_rate": 2e-05, + "loss": 0.0917, + "step": 10 + }, + { + "epoch": 0.48, + "grad_norm": 6.82559061050415, + "learning_rate": 2e-05, + "loss": 0.6837, + "step": 12 + }, + { + "epoch": 0.56, + "grad_norm": 1.8658998012542725, + "learning_rate": 2e-05, + "loss": 0.0646, + "step": 14 + }, + { + "epoch": 0.64, + "grad_norm": 1.4391404390335083, + "learning_rate": 2e-05, + "loss": 0.0581, + "step": 16 + }, + { + "epoch": 0.72, + "grad_norm": 1.4982571601867676, + "learning_rate": 2e-05, + "loss": 0.3933, + "step": 18 + }, + { + "epoch": 0.8, + "grad_norm": 1.6830452680587769, + "learning_rate": 2e-05, + "loss": 0.1075, + "step": 20 + }, + { + "epoch": 0.88, + "grad_norm": 0.245327427983284, + "learning_rate": 2e-05, + "loss": 0.0312, + "step": 22 + }, + { + "epoch": 0.96, + "grad_norm": 10.72057056427002, + "learning_rate": 2e-05, + "loss": 0.38, + "step": 24 + }, + { + "epoch": 1.04, + "grad_norm": 1.239396095275879, + "learning_rate": 2e-05, + "loss": 0.0602, + "step": 26 + }, + { + "epoch": 1.12, + "grad_norm": 0.4094916880130768, + "learning_rate": 2e-05, + "loss": 0.0428, + "step": 28 + }, + { + "epoch": 1.2, + "grad_norm": 1.428717851638794, + "learning_rate": 2e-05, + "loss": 0.6015, + "step": 30 + }, + { + "epoch": 1.28, + "grad_norm": 0.11368861049413681, + "learning_rate": 2e-05, + "loss": 0.1259, + "step": 32 + }, + { + "epoch": 1.36, + "grad_norm": 1.6821730136871338, + "learning_rate": 2e-05, + "loss": 0.1679, + "step": 34 + }, + { + "epoch": 1.44, + "grad_norm": 7.368831157684326, + "learning_rate": 2e-05, + "loss": 0.3051, + "step": 36 + }, + { + "epoch": 1.52, + "grad_norm": 4.640942096710205, + "learning_rate": 2e-05, + "loss": 0.1196, + "step": 38 + }, + { + "epoch": 1.6, + "grad_norm": 0.3502234220504761, + "learning_rate": 2e-05, + "loss": 0.1621, + "step": 40 + }, + { + "epoch": 1.68, + "grad_norm": 2.8279569149017334, + "learning_rate": 2e-05, + "loss": 0.4888, + "step": 42 + }, + { + "epoch": 1.76, + "grad_norm": 0.6382105946540833, + "learning_rate": 2e-05, + "loss": 0.1357, + "step": 44 + }, + { + "epoch": 1.84, + "grad_norm": 0.40106961131095886, + "learning_rate": 2e-05, + "loss": 0.3606, + "step": 46 + }, + { + "epoch": 1.92, + "grad_norm": 5.146971702575684, + "learning_rate": 2e-05, + "loss": 0.1506, + "step": 48 + }, + { + "epoch": 2.0, + "grad_norm": 0.3846442997455597, + "learning_rate": 2e-05, + "loss": 0.0316, + "step": 50 + }, + { + "epoch": 2.0, + "step": 50, + "total_flos": 5266647493378048.0, + "train_loss": 0.23302085906267167, + "train_runtime": 198.109, + "train_samples_per_second": 1.01, + "train_steps_per_second": 0.252 + } + ], + "logging_steps": 2, + "max_steps": 50, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5266647493378048.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round10.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..acf90144b02e8ba5b04571cf7c8844171a261ce9 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c2aec2a71a2298864fb9cb71bbe157edfe31cb48c8758ed137adf555b57706c +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round12.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..cc3b8c2f987464182e08676d8b4853a0a13786ba --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90b0271eee7daa4e6cf36df93827f4dd84b51c02fbc7eeb354a4584bc9eecc1c +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round15.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..b8b68658f5fc21e2b77c42fa3d553b7457931ec4 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c319587b2f08331b839f2419acb0e4bfacc9c57991286ab2a77ea96d9feb5cbc +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round17.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..7f327fc6d9d77dacd3a332618306d6d4e63c37a7 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9bb6d41e065f1080f20435a320c1edfe1f3c8cba9f031e54d7610b9e367f8a9 +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round2.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..e157a874c904146dc56802bc28aab221e0178eb6 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f72227369a7df43e68a0bb4081030172e220490ef080b56abc68a7bdce46ea6 +size 778341034 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round20.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..a9098738b82b6a796e1183ec513066c7c3cb202d --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a435b97115a2feb785df092f369b70fa82f9fb72cf73f47bd559d40fa9b5a2a +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round5.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..b70c318df3bc38368fd6f8de44add032b832b064 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0390a2236b53f82449b5243e68ffc4c185e44bc9517abac645cbedc5678052c9 +size 778341034 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round7.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..28926174aed22532ccd887a0daa390ee9d6a93ac --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecfbee4594e86c27822bb6a0d822dc075ba4815540812516b3e99f0069448cce +size 778341034 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_trainer_state.json b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..128f50ae3b275910e8a0cc57ce6499fd0b8efa05 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_trainer_state.json @@ -0,0 +1,217 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "grad_norm": 0.2633078396320343, + "learning_rate": 2e-05, + "loss": 0.0962, + "step": 2 + }, + { + "epoch": 0.16, + "grad_norm": 18.888702392578125, + "learning_rate": 2e-05, + "loss": 0.2108, + "step": 4 + }, + { + "epoch": 0.24, + "grad_norm": 5.582844257354736, + "learning_rate": 2e-05, + "loss": 0.401, + "step": 6 + }, + { + "epoch": 0.32, + "grad_norm": 0.3722727596759796, + "learning_rate": 2e-05, + "loss": 0.0165, + "step": 8 + }, + { + "epoch": 0.4, + "grad_norm": 0.6710987687110901, + "learning_rate": 2e-05, + "loss": 0.0204, + "step": 10 + }, + { + "epoch": 0.48, + "grad_norm": 0.009970537386834621, + "learning_rate": 2e-05, + "loss": 0.0149, + "step": 12 + }, + { + "epoch": 0.56, + "grad_norm": 0.994692862033844, + "learning_rate": 2e-05, + "loss": 0.0285, + "step": 14 + }, + { + "epoch": 0.64, + "grad_norm": 0.025523852556943893, + "learning_rate": 2e-05, + "loss": 0.015, + "step": 16 + }, + { + "epoch": 0.72, + "grad_norm": 0.12230795621871948, + "learning_rate": 2e-05, + "loss": 0.1153, + "step": 18 + }, + { + "epoch": 0.8, + "grad_norm": 0.009217753075063229, + "learning_rate": 2e-05, + "loss": 0.0512, + "step": 20 + }, + { + "epoch": 0.88, + "grad_norm": 2.2184362411499023, + "learning_rate": 2e-05, + "loss": 0.1586, + "step": 22 + }, + { + "epoch": 0.96, + "grad_norm": 7.529627323150635, + "learning_rate": 2e-05, + "loss": 0.164, + "step": 24 + }, + { + "epoch": 1.04, + "grad_norm": 1.4156068563461304, + "learning_rate": 2e-05, + "loss": 0.1002, + "step": 26 + }, + { + "epoch": 1.12, + "grad_norm": 0.2617127001285553, + "learning_rate": 2e-05, + "loss": 0.0166, + "step": 28 + }, + { + "epoch": 1.2, + "grad_norm": 0.015605290420353413, + "learning_rate": 2e-05, + "loss": 0.0145, + "step": 30 + }, + { + "epoch": 1.28, + "grad_norm": 0.057093504816293716, + "learning_rate": 2e-05, + "loss": 0.015, + "step": 32 + }, + { + "epoch": 1.36, + "grad_norm": 0.13609035313129425, + "learning_rate": 2e-05, + "loss": 0.0151, + "step": 34 + }, + { + "epoch": 1.44, + "grad_norm": 0.03114943951368332, + "learning_rate": 2e-05, + "loss": 0.1649, + "step": 36 + }, + { + "epoch": 1.52, + "grad_norm": 0.026129912585020065, + "learning_rate": 2e-05, + "loss": 0.0145, + "step": 38 + }, + { + "epoch": 1.6, + "grad_norm": 0.42378631234169006, + "learning_rate": 2e-05, + "loss": 0.0229, + "step": 40 + }, + { + "epoch": 1.68, + "grad_norm": 1.1538219451904297, + "learning_rate": 2e-05, + "loss": 0.0286, + "step": 42 + }, + { + "epoch": 1.76, + "grad_norm": 1.1634211540222168, + "learning_rate": 2e-05, + "loss": 0.0363, + "step": 44 + }, + { + "epoch": 1.84, + "grad_norm": 0.014446967281401157, + "learning_rate": 2e-05, + "loss": 0.0142, + "step": 46 + }, + { + "epoch": 1.92, + "grad_norm": 0.0895637720823288, + "learning_rate": 2e-05, + "loss": 0.0152, + "step": 48 + }, + { + "epoch": 2.0, + "grad_norm": 0.36212360858917236, + "learning_rate": 2e-05, + "loss": 0.0169, + "step": 50 + }, + { + "epoch": 2.0, + "step": 50, + "total_flos": 5303430465716224.0, + "train_loss": 0.07068853974342346, + "train_runtime": 198.2611, + "train_samples_per_second": 1.009, + "train_steps_per_second": 0.252 + } + ], + "logging_steps": 2, + "max_steps": 50, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5303430465716224.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round10.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..3c2f7851b2178f6e074756272331dfc39db0e578 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc38d3fec8aaf304a73dc0465221009621d12e35fe11437da6cdd8c90b9648e8 +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round12.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..5b26066e3840f1c0b66b44737e256bdeebda1845 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4af31ee51a64d1dc941abff726f2d86b9aed76733dbd2124a460acb8f7fc752 +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round15.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..69d67f5fef57dcdf91bbd654441ca573e0537154 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6760ac39e030ae9fa6ebc136bdc504c2239d72745f5b973ed81a1e0decdfc048 +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round17.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..b975e83da9c6b9c480126af3a9a06107b747c6d5 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e5eebbd87377d275d69626cccc34474e06e8611ca75b57099b23a759cb721b7 +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round2.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..9814297cd0d1a007a1adb9c07d8dd2ec2a71f6e9 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1198bde519b9e1569c1f5d34b7cd75ade0f1ccdede78bf1e1d666c2185198bf0 +size 778341034 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round20.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..ed69c18b6b4c7d1937250d5c025d8d4dc022692e --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0db7977fb93fec26d6d524d6bb78a926367418764df5ead8d7437c724c9d30d9 +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round5.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..61ac39262e59eed8a9ac9138ba7aa56e4f01917f --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8e377ed9ce274f12fa63914553e2b4dd7b85b45fcaba55bde1fefdb0aee6e0f +size 778341034 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round7.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..01f2bfe9af346b3600417d804d3b1d98d7710f09 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:154985bd935d4f05b0381b48c6606a5ef06bdc0053cdc7537f2dc22e5dc37771 +size 778341034 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_trainer_state.json b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..58ba2f27782aa40179dc0cf34364faaf16eeda2a --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_trainer_state.json @@ -0,0 +1,217 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "grad_norm": 3.617075204849243, + "learning_rate": 2e-05, + "loss": 0.1032, + "step": 2 + }, + { + "epoch": 0.16, + "grad_norm": 0.15696358680725098, + "learning_rate": 2e-05, + "loss": 0.1814, + "step": 4 + }, + { + "epoch": 0.24, + "grad_norm": 5.014780521392822, + "learning_rate": 2e-05, + "loss": 0.761, + "step": 6 + }, + { + "epoch": 0.32, + "grad_norm": 9.337811470031738, + "learning_rate": 2e-05, + "loss": 1.8625, + "step": 8 + }, + { + "epoch": 0.4, + "grad_norm": 6.394294261932373, + "learning_rate": 2e-05, + "loss": 0.6403, + "step": 10 + }, + { + "epoch": 0.48, + "grad_norm": 1.694501519203186, + "learning_rate": 2e-05, + "loss": 0.2702, + "step": 12 + }, + { + "epoch": 0.56, + "grad_norm": 27.820241928100586, + "learning_rate": 2e-05, + "loss": 0.3551, + "step": 14 + }, + { + "epoch": 0.64, + "grad_norm": 2.758286476135254, + "learning_rate": 2e-05, + "loss": 0.4164, + "step": 16 + }, + { + "epoch": 0.72, + "grad_norm": 1.9265002012252808, + "learning_rate": 2e-05, + "loss": 0.2844, + "step": 18 + }, + { + "epoch": 0.8, + "grad_norm": 3.5484347343444824, + "learning_rate": 2e-05, + "loss": 0.3019, + "step": 20 + }, + { + "epoch": 0.88, + "grad_norm": 2.66292405128479, + "learning_rate": 2e-05, + "loss": 0.2797, + "step": 22 + }, + { + "epoch": 0.96, + "grad_norm": 3.7674098014831543, + "learning_rate": 2e-05, + "loss": 0.404, + "step": 24 + }, + { + "epoch": 1.04, + "grad_norm": 7.080770969390869, + "learning_rate": 2e-05, + "loss": 0.4917, + "step": 26 + }, + { + "epoch": 1.12, + "grad_norm": 3.9330906867980957, + "learning_rate": 2e-05, + "loss": 0.3915, + "step": 28 + }, + { + "epoch": 1.2, + "grad_norm": 3.2564377784729004, + "learning_rate": 2e-05, + "loss": 0.242, + "step": 30 + }, + { + "epoch": 1.28, + "grad_norm": 1.0977956056594849, + "learning_rate": 2e-05, + "loss": 0.0854, + "step": 32 + }, + { + "epoch": 1.36, + "grad_norm": 4.018260478973389, + "learning_rate": 2e-05, + "loss": 0.5377, + "step": 34 + }, + { + "epoch": 1.44, + "grad_norm": 9.991900444030762, + "learning_rate": 2e-05, + "loss": 0.5558, + "step": 36 + }, + { + "epoch": 1.52, + "grad_norm": 1.1954278945922852, + "learning_rate": 2e-05, + "loss": 0.6506, + "step": 38 + }, + { + "epoch": 1.6, + "grad_norm": 0.9688401818275452, + "learning_rate": 2e-05, + "loss": 0.1332, + "step": 40 + }, + { + "epoch": 1.68, + "grad_norm": 5.815992832183838, + "learning_rate": 2e-05, + "loss": 0.3631, + "step": 42 + }, + { + "epoch": 1.76, + "grad_norm": 2.7181556224823, + "learning_rate": 2e-05, + "loss": 0.2599, + "step": 44 + }, + { + "epoch": 1.84, + "grad_norm": 4.442654132843018, + "learning_rate": 2e-05, + "loss": 0.191, + "step": 46 + }, + { + "epoch": 1.92, + "grad_norm": 7.31368350982666, + "learning_rate": 2e-05, + "loss": 0.4697, + "step": 48 + }, + { + "epoch": 2.0, + "grad_norm": 7.1462225914001465, + "learning_rate": 2e-05, + "loss": 0.3045, + "step": 50 + }, + { + "epoch": 2.0, + "step": 50, + "total_flos": 5195491428007936.0, + "train_loss": 0.42144030570983887, + "train_runtime": 199.465, + "train_samples_per_second": 1.003, + "train_steps_per_second": 0.251 + } + ], + "logging_steps": 2, + "max_steps": 50, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5195491428007936.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round10.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..833c017fcfec18c819d6ca503525ac2fc4634c66 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7fc4f7efb25926bfa826673aa01335f815c127661a07f3cbb64a95c8fdffb40 +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round12.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..c102dd561d4eff96df1b594eef564d37cdb461aa --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48b399960eea88ae61a0a1aa3dfd3c1813abcdad91c1b4224ea8933067755a06 +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round15.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..9d66bc9868b5aaf3f7dd8db7af29fb1e6fed7dce --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89943c600b2b161ec6014448b900c8adf846847c4b9f68ffbf21de31f5e5d32c +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round17.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..19ce3a1a0f0efa797d6d12d0cf3cf064569f6bd7 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5699250baf94f3019eaeb1ef41db0f18f6dfd3e0c3bd0a112b411bd4170e0eb1 +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round2.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..09df3dc5e48f33d585e07ba410247bf0fd1c9e13 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:472fbc1f43c917af0bb1f5e94c2286225b782951b640924fbaa37c723473a9ad +size 778341034 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round20.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..e344b646a5afc122f93e7edd9d38372012443f33 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c0e5b108c6b221edf19db70fe14233bf9b9b7422803f13e1c304e11f6e5911a +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round5.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..938f2ef3b16dcb7344e89f146efee03d9cb6d38f --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c149fa6c092ba05bb37700e485f6699abc82de896528610d1635a195fc9c7a0 +size 778341034 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round7.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..b60dbc06e0cbae7ef329e244c707d54de1eddcfd --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a59d5ce425a0645c183f6974a7833ce97c3e20569b720d8dec7176fff0fd0186 +size 778341034 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_trainer_state.json b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6448adc145450dd62957e56b10cd8515af8fccb6 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_trainer_state.json @@ -0,0 +1,217 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "grad_norm": 8.167108535766602, + "learning_rate": 2e-05, + "loss": 0.7666, + "step": 2 + }, + { + "epoch": 0.16, + "grad_norm": 0.970125138759613, + "learning_rate": 2e-05, + "loss": 0.4831, + "step": 4 + }, + { + "epoch": 0.24, + "grad_norm": 6.168637275695801, + "learning_rate": 2e-05, + "loss": 0.3263, + "step": 6 + }, + { + "epoch": 0.32, + "grad_norm": 4.275420188903809, + "learning_rate": 2e-05, + "loss": 0.4419, + "step": 8 + }, + { + "epoch": 0.4, + "grad_norm": 7.258407115936279, + "learning_rate": 2e-05, + "loss": 0.7066, + "step": 10 + }, + { + "epoch": 0.48, + "grad_norm": 5.875009059906006, + "learning_rate": 2e-05, + "loss": 0.8123, + "step": 12 + }, + { + "epoch": 0.56, + "grad_norm": 3.180713653564453, + "learning_rate": 2e-05, + "loss": 0.2229, + "step": 14 + }, + { + "epoch": 0.64, + "grad_norm": 1.822489619255066, + "learning_rate": 2e-05, + "loss": 0.245, + "step": 16 + }, + { + "epoch": 0.72, + "grad_norm": 8.324261665344238, + "learning_rate": 2e-05, + "loss": 0.6389, + "step": 18 + }, + { + "epoch": 0.8, + "grad_norm": 4.086201190948486, + "learning_rate": 2e-05, + "loss": 0.1874, + "step": 20 + }, + { + "epoch": 0.88, + "grad_norm": 4.469182968139648, + "learning_rate": 2e-05, + "loss": 0.6376, + "step": 22 + }, + { + "epoch": 0.96, + "grad_norm": 9.68209171295166, + "learning_rate": 2e-05, + "loss": 0.4976, + "step": 24 + }, + { + "epoch": 1.04, + "grad_norm": 15.532666206359863, + "learning_rate": 2e-05, + "loss": 1.6447, + "step": 26 + }, + { + "epoch": 1.12, + "grad_norm": 8.273395538330078, + "learning_rate": 2e-05, + "loss": 0.3491, + "step": 28 + }, + { + "epoch": 1.2, + "grad_norm": 7.468406677246094, + "learning_rate": 2e-05, + "loss": 0.6633, + "step": 30 + }, + { + "epoch": 1.28, + "grad_norm": 8.63184642791748, + "learning_rate": 2e-05, + "loss": 0.8022, + "step": 32 + }, + { + "epoch": 1.36, + "grad_norm": 12.6749267578125, + "learning_rate": 2e-05, + "loss": 0.7509, + "step": 34 + }, + { + "epoch": 1.44, + "grad_norm": 4.348174571990967, + "learning_rate": 2e-05, + "loss": 0.2364, + "step": 36 + }, + { + "epoch": 1.52, + "grad_norm": 4.658010005950928, + "learning_rate": 2e-05, + "loss": 0.5582, + "step": 38 + }, + { + "epoch": 1.6, + "grad_norm": 3.064706802368164, + "learning_rate": 2e-05, + "loss": 0.7061, + "step": 40 + }, + { + "epoch": 1.68, + "grad_norm": 15.28968334197998, + "learning_rate": 2e-05, + "loss": 1.1366, + "step": 42 + }, + { + "epoch": 1.76, + "grad_norm": 4.18186092376709, + "learning_rate": 2e-05, + "loss": 0.3874, + "step": 44 + }, + { + "epoch": 1.84, + "grad_norm": 2.199023723602295, + "learning_rate": 2e-05, + "loss": 0.4409, + "step": 46 + }, + { + "epoch": 1.92, + "grad_norm": 7.054344177246094, + "learning_rate": 2e-05, + "loss": 0.9599, + "step": 48 + }, + { + "epoch": 2.0, + "grad_norm": 2.990053653717041, + "learning_rate": 2e-05, + "loss": 0.3781, + "step": 50 + }, + { + "epoch": 2.0, + "step": 50, + "total_flos": 5376319969820672.0, + "train_loss": 0.599202663898468, + "train_runtime": 198.5307, + "train_samples_per_second": 1.007, + "train_steps_per_second": 0.252 + } + ], + "logging_steps": 2, + "max_steps": 50, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5376319969820672.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round10.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..d91749a6516ed415332a8df5d15857d204b95662 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9eb75c8782194720b59887465c995e11d72f78c8aca656c42c9b469a59124a9 +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round12.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..10484a0ea5f37dc64fa49d2081bc4e2fee1ca11e --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b39a4efe80d568e273b5a623ab5c108b7e27d937316250436a587da562ac3d86 +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round15.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..1d88aeb7a49ff921a6811ebab2e6e991d5bd9942 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09f0af7ab102fbed3a844c53a4f7dcc5f21677451244983cab0caf1c747d76f5 +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round17.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..28face82399565c8c6041d0822d3ed3f699443c8 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18987b20b7bbc6a88d60b2b91648ad3a08b577918fa565e2278a750aee69cc6d +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round2.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..e4858b74b63af0157370d382ce7daa5f1c5dabc9 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bd496e40f9341b403fe665f4a3f2261ae72e39294e7a4678261659e562606ce +size 778341034 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round20.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..2a273d04e8f5e70877ab49e09cb4bfbbc8e2e95e --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8d9e327003fd39d6bdcc0eabd84abc83f519cb69d6abde49f8d1544c15881a6 +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round5.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..e2fd9c34b281f11d8b948829cebbd551e1a89521 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d90208da0627322e15237fe2b0bc8fccf40d8472da50b806d3941b4128ce6b8 +size 778341034 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round7.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..d514ca2cae77692fc4b019fd4c63b91f747122ce --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06dc84e7ab1330bf113bc7de877ec9cedc283cb04d0aa6de93816d56ad6d6cbe +size 778341034 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_trainer_state.json b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a560cb2d57934c9406f2a1f85a0948b9a8d01cee --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_trainer_state.json @@ -0,0 +1,217 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "grad_norm": 4.40165376663208, + "learning_rate": 2e-05, + "loss": 0.3787, + "step": 2 + }, + { + "epoch": 0.16, + "grad_norm": 3.2695400714874268, + "learning_rate": 2e-05, + "loss": 0.6385, + "step": 4 + }, + { + "epoch": 0.24, + "grad_norm": 2.2764341831207275, + "learning_rate": 2e-05, + "loss": 0.3251, + "step": 6 + }, + { + "epoch": 0.32, + "grad_norm": 2.679776191711426, + "learning_rate": 2e-05, + "loss": 0.3256, + "step": 8 + }, + { + "epoch": 0.4, + "grad_norm": 2.267791748046875, + "learning_rate": 2e-05, + "loss": 0.2853, + "step": 10 + }, + { + "epoch": 0.48, + "grad_norm": 3.7516863346099854, + "learning_rate": 2e-05, + "loss": 0.3355, + "step": 12 + }, + { + "epoch": 0.56, + "grad_norm": 4.205787181854248, + "learning_rate": 2e-05, + "loss": 0.3998, + "step": 14 + }, + { + "epoch": 0.64, + "grad_norm": 6.004105567932129, + "learning_rate": 2e-05, + "loss": 0.371, + "step": 16 + }, + { + "epoch": 0.72, + "grad_norm": 4.294463157653809, + "learning_rate": 2e-05, + "loss": 0.4596, + "step": 18 + }, + { + "epoch": 0.8, + "grad_norm": 3.1517653465270996, + "learning_rate": 2e-05, + "loss": 0.403, + "step": 20 + }, + { + "epoch": 0.88, + "grad_norm": 3.2551352977752686, + "learning_rate": 2e-05, + "loss": 0.3488, + "step": 22 + }, + { + "epoch": 0.96, + "grad_norm": 2.0919487476348877, + "learning_rate": 2e-05, + "loss": 0.5908, + "step": 24 + }, + { + "epoch": 1.04, + "grad_norm": 2.9000701904296875, + "learning_rate": 2e-05, + "loss": 0.4376, + "step": 26 + }, + { + "epoch": 1.12, + "grad_norm": 0.4982464909553528, + "learning_rate": 2e-05, + "loss": 0.2203, + "step": 28 + }, + { + "epoch": 1.2, + "grad_norm": 3.5159378051757812, + "learning_rate": 2e-05, + "loss": 0.4652, + "step": 30 + }, + { + "epoch": 1.28, + "grad_norm": 1.462558627128601, + "learning_rate": 2e-05, + "loss": 0.2046, + "step": 32 + }, + { + "epoch": 1.36, + "grad_norm": 3.924109935760498, + "learning_rate": 2e-05, + "loss": 0.8077, + "step": 34 + }, + { + "epoch": 1.44, + "grad_norm": 3.89208722114563, + "learning_rate": 2e-05, + "loss": 0.3069, + "step": 36 + }, + { + "epoch": 1.52, + "grad_norm": 1.9117887020111084, + "learning_rate": 2e-05, + "loss": 0.2479, + "step": 38 + }, + { + "epoch": 1.6, + "grad_norm": 4.854358673095703, + "learning_rate": 2e-05, + "loss": 0.3298, + "step": 40 + }, + { + "epoch": 1.68, + "grad_norm": 4.46043586730957, + "learning_rate": 2e-05, + "loss": 0.2984, + "step": 42 + }, + { + "epoch": 1.76, + "grad_norm": 7.219895362854004, + "learning_rate": 2e-05, + "loss": 0.6064, + "step": 44 + }, + { + "epoch": 1.84, + "grad_norm": 1.80936861038208, + "learning_rate": 2e-05, + "loss": 0.2453, + "step": 46 + }, + { + "epoch": 1.92, + "grad_norm": 2.554882287979126, + "learning_rate": 2e-05, + "loss": 0.1651, + "step": 48 + }, + { + "epoch": 2.0, + "grad_norm": 3.7274348735809326, + "learning_rate": 2e-05, + "loss": 0.4726, + "step": 50 + }, + { + "epoch": 2.0, + "step": 50, + "total_flos": 6021332957396992.0, + "train_loss": 0.38677083015441893, + "train_runtime": 198.1718, + "train_samples_per_second": 1.009, + "train_steps_per_second": 0.252 + } + ], + "logging_steps": 2, + "max_steps": 50, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6021332957396992.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round10.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..90a7365fe1568b25205e3cd0fe7a4d5626133716 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f0509b8ce6cd3ed751cfef1e396c7179a3f9088099d5cd2cb5b457b319dcebb +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round12.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..d22067f94f312c9a320193870795628ff263bb1a --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f467a053337fecfa0f847cbc238ac4d4c5e851c9947c49765a915ebdf2da5f08 +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round15.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..9220a80627fd9ab8f0461faae04c48b524571c5d --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71a9446baf72a937c51e8e31dc17acd86d9cf2e6189c9082f6d32dc45a43491c +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round17.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..f32a5866aec200018bb50d31f5b6fb8c7fd8ae4f --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7becdb1ad086320eb862ea9500d9bc7b91dd4e237a2a94fcd3b992d0c97c116b +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round2.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..f549378125e12dd0bc1ad38f4bec60ce315312ae --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59ec73292fd4cb63ea34768525c152e88f9998ee0c947f7d860eae16ddd53f7c +size 778341034 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round20.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..b7a0233bfa1736c947c989f4ccfb28d4a24ae100 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:beb3da503bae4a0418362f1311c5714e63586c47f45c8d5951e859954a4367a0 +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round5.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9484abf5995d2ce67b9228fb0876b82083e0b3d --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2691cd49582d13d02d083bf3d22f371150b28a6264946c490a3e3a0e980b4bd8 +size 778341034 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round7.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..b3c8a16c416f91b5fa84f1105f3a6c589b1cbf2f --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf9c2ca04a8c8bbd5305d820c8f6a26a15170381a642343b337f9a709ce7a2a9 +size 778341034 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_trainer_state.json b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..623e057cb738769d45b85b245d82499af9469d50 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_trainer_state.json @@ -0,0 +1,217 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "grad_norm": 2.253838539123535, + "learning_rate": 2e-05, + "loss": 0.7689, + "step": 2 + }, + { + "epoch": 0.16, + "grad_norm": 0.6777926087379456, + "learning_rate": 2e-05, + "loss": 0.0446, + "step": 4 + }, + { + "epoch": 0.24, + "grad_norm": 4.97540283203125, + "learning_rate": 2e-05, + "loss": 0.1319, + "step": 6 + }, + { + "epoch": 0.32, + "grad_norm": 0.14813171327114105, + "learning_rate": 2e-05, + "loss": 0.0268, + "step": 8 + }, + { + "epoch": 0.4, + "grad_norm": 0.14487019181251526, + "learning_rate": 2e-05, + "loss": 0.0269, + "step": 10 + }, + { + "epoch": 0.48, + "grad_norm": 4.96938419342041, + "learning_rate": 2e-05, + "loss": 0.4167, + "step": 12 + }, + { + "epoch": 0.56, + "grad_norm": 0.14172860980033875, + "learning_rate": 2e-05, + "loss": 0.0325, + "step": 14 + }, + { + "epoch": 0.64, + "grad_norm": 4.142360210418701, + "learning_rate": 2e-05, + "loss": 0.1072, + "step": 16 + }, + { + "epoch": 0.72, + "grad_norm": 3.568012237548828, + "learning_rate": 2e-05, + "loss": 0.5318, + "step": 18 + }, + { + "epoch": 0.8, + "grad_norm": 0.7577732801437378, + "learning_rate": 2e-05, + "loss": 0.064, + "step": 20 + }, + { + "epoch": 0.88, + "grad_norm": 1.098547339439392, + "learning_rate": 2e-05, + "loss": 0.042, + "step": 22 + }, + { + "epoch": 0.96, + "grad_norm": 5.061567783355713, + "learning_rate": 2e-05, + "loss": 0.3515, + "step": 24 + }, + { + "epoch": 1.04, + "grad_norm": 3.1012821197509766, + "learning_rate": 2e-05, + "loss": 0.1204, + "step": 26 + }, + { + "epoch": 1.12, + "grad_norm": 10.857671737670898, + "learning_rate": 2e-05, + "loss": 0.4664, + "step": 28 + }, + { + "epoch": 1.2, + "grad_norm": 0.43176135420799255, + "learning_rate": 2e-05, + "loss": 0.0568, + "step": 30 + }, + { + "epoch": 1.28, + "grad_norm": 0.4733089208602905, + "learning_rate": 2e-05, + "loss": 0.0387, + "step": 32 + }, + { + "epoch": 1.36, + "grad_norm": 3.789149045944214, + "learning_rate": 2e-05, + "loss": 0.323, + "step": 34 + }, + { + "epoch": 1.44, + "grad_norm": 1.5445152521133423, + "learning_rate": 2e-05, + "loss": 0.0849, + "step": 36 + }, + { + "epoch": 1.52, + "grad_norm": 9.442593574523926, + "learning_rate": 2e-05, + "loss": 0.8493, + "step": 38 + }, + { + "epoch": 1.6, + "grad_norm": 0.20852471888065338, + "learning_rate": 2e-05, + "loss": 0.0281, + "step": 40 + }, + { + "epoch": 1.68, + "grad_norm": 13.520870208740234, + "learning_rate": 2e-05, + "loss": 0.4005, + "step": 42 + }, + { + "epoch": 1.76, + "grad_norm": 1.001111626625061, + "learning_rate": 2e-05, + "loss": 0.0727, + "step": 44 + }, + { + "epoch": 1.84, + "grad_norm": 7.711246013641357, + "learning_rate": 2e-05, + "loss": 0.3667, + "step": 46 + }, + { + "epoch": 1.92, + "grad_norm": 2.8964767456054688, + "learning_rate": 2e-05, + "loss": 0.0795, + "step": 48 + }, + { + "epoch": 2.0, + "grad_norm": 2.4585280418395996, + "learning_rate": 2e-05, + "loss": 0.0718, + "step": 50 + }, + { + "epoch": 2.0, + "step": 50, + "total_flos": 5263391975276544.0, + "train_loss": 0.2201457965373993, + "train_runtime": 196.4314, + "train_samples_per_second": 1.018, + "train_steps_per_second": 0.255 + } + ], + "logging_steps": 2, + "max_steps": 50, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5263391975276544.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round10.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..237b2a0fa8b776f60b6b4e23ad986b67c2db70ea --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:541a8d1482e51b0c3251ff6026005b7dae1218a49af04be9ad482f8156c3819b +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round12.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..e051c77c7f7f10ed558ed39244a8fbb16280927b --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b87b84d110fb62665664f1330448f383b0a6e00e4df4973be04824a789d16a58 +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round15.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..07ca9b146c718081b5f924edcbaaaf595c9a0547 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d792a5735217d8bd605d775e01f60ff52fdca8e6eee45d6abc2baecd161be18f +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round17.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..850f825c4271a0efb26bcdd137cd4955578cdd92 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:579ae830b5cb95ac07328f15fb8aaf686c70dd90abd03aef1287eb73b3f03308 +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round2.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..fa106a6d0665a84c5855a82fb45e8c770bc2efa8 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fb228206e28da9f4e2f7c4cbb31c40a741b71e18c2f42b461b78c9f336a8eae +size 778341034 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round20.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..0cea0553914eb572909a0d7703ed0f3066dd02a2 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6689917df408dad9fad5d11efd2ce39d411a89e5b412a5c42b1626c755b80bb +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round5.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..0239b6a62827a091633320b27e9c5cdb39426835 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0424708b319353e1f11e912dd94316cea5045a2a22fe95ded939b9d8bb56d95 +size 778341034 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round7.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..2f3afe199f8030bbb571f7b6b0755e6861866c5c --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64e1284f4b6624b00d544dc156f860bbc61d33792505c3d95d5d4b09eb90c921 +size 778341034 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_trainer_state.json b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..924a55867429f500c7c0a3d55cd4b04628d59d9f --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/6_trainer_state.json @@ -0,0 +1,217 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "grad_norm": 2.418375253677368, + "learning_rate": 2e-05, + "loss": 0.4023, + "step": 2 + }, + { + "epoch": 0.16, + "grad_norm": 2.455158233642578, + "learning_rate": 2e-05, + "loss": 0.4319, + "step": 4 + }, + { + "epoch": 0.24, + "grad_norm": 2.8722236156463623, + "learning_rate": 2e-05, + "loss": 0.3759, + "step": 6 + }, + { + "epoch": 0.32, + "grad_norm": 5.506226062774658, + "learning_rate": 2e-05, + "loss": 0.5202, + "step": 8 + }, + { + "epoch": 0.4, + "grad_norm": 3.3219704627990723, + "learning_rate": 2e-05, + "loss": 0.5562, + "step": 10 + }, + { + "epoch": 0.48, + "grad_norm": 2.970031976699829, + "learning_rate": 2e-05, + "loss": 0.4389, + "step": 12 + }, + { + "epoch": 0.56, + "grad_norm": 1.8053407669067383, + "learning_rate": 2e-05, + "loss": 0.2381, + "step": 14 + }, + { + "epoch": 0.64, + "grad_norm": 3.662895441055298, + "learning_rate": 2e-05, + "loss": 0.5208, + "step": 16 + }, + { + "epoch": 0.72, + "grad_norm": 2.492485761642456, + "learning_rate": 2e-05, + "loss": 0.3842, + "step": 18 + }, + { + "epoch": 0.8, + "grad_norm": 2.9946165084838867, + "learning_rate": 2e-05, + "loss": 0.3526, + "step": 20 + }, + { + "epoch": 0.88, + "grad_norm": 2.865755081176758, + "learning_rate": 2e-05, + "loss": 0.6324, + "step": 22 + }, + { + "epoch": 0.96, + "grad_norm": 3.3268473148345947, + "learning_rate": 2e-05, + "loss": 0.3807, + "step": 24 + }, + { + "epoch": 1.04, + "grad_norm": 3.8017077445983887, + "learning_rate": 2e-05, + "loss": 0.364, + "step": 26 + }, + { + "epoch": 1.12, + "grad_norm": 2.986394166946411, + "learning_rate": 2e-05, + "loss": 0.2633, + "step": 28 + }, + { + "epoch": 1.2, + "grad_norm": 5.470019340515137, + "learning_rate": 2e-05, + "loss": 0.8126, + "step": 30 + }, + { + "epoch": 1.28, + "grad_norm": 4.85556173324585, + "learning_rate": 2e-05, + "loss": 0.451, + "step": 32 + }, + { + "epoch": 1.36, + "grad_norm": 6.917935371398926, + "learning_rate": 2e-05, + "loss": 0.7146, + "step": 34 + }, + { + "epoch": 1.44, + "grad_norm": 4.302242279052734, + "learning_rate": 2e-05, + "loss": 0.2237, + "step": 36 + }, + { + "epoch": 1.52, + "grad_norm": 3.0060131549835205, + "learning_rate": 2e-05, + "loss": 0.7171, + "step": 38 + }, + { + "epoch": 1.6, + "grad_norm": 7.120171070098877, + "learning_rate": 2e-05, + "loss": 1.0315, + "step": 40 + }, + { + "epoch": 1.68, + "grad_norm": 12.922983169555664, + "learning_rate": 2e-05, + "loss": 0.7664, + "step": 42 + }, + { + "epoch": 1.76, + "grad_norm": 7.315447807312012, + "learning_rate": 2e-05, + "loss": 0.6498, + "step": 44 + }, + { + "epoch": 1.84, + "grad_norm": 5.949100971221924, + "learning_rate": 2e-05, + "loss": 0.8075, + "step": 46 + }, + { + "epoch": 1.92, + "grad_norm": 4.7041778564453125, + "learning_rate": 2e-05, + "loss": 0.4437, + "step": 48 + }, + { + "epoch": 2.0, + "grad_norm": 3.6519830226898193, + "learning_rate": 2e-05, + "loss": 0.4485, + "step": 50 + }, + { + "epoch": 2.0, + "step": 50, + "total_flos": 1.04136867577856e+16, + "train_loss": 0.5171078491210938, + "train_runtime": 222.8503, + "train_samples_per_second": 0.897, + "train_steps_per_second": 0.224 + } + ], + "logging_steps": 2, + "max_steps": 50, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.04136867577856e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round10.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..486f0d6c2f6528a8af380087480e2099a682e16c --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:873eea8957d9e3877c665949da9a83fee7dd8c6dcdd318f9afa0a21f7ac067ba +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round12.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..857a583a8739e06622409146e1e2f440fe0f4b90 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9311fa983b0ddcc2468a75e881eb3d868019ba87188227734097a58117e4a726 +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round15.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..ba670b8ede9b2abd4c4daf30e83d48d00220095a --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e53b2c806b2ed1ad9493a7fac0a07768fa20f8a2ec3dc54b87ce6e8e5470f6d +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round17.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..7d630b056688a69cc8b6582606527151252ffedd --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9820a2b83d71da37227de5a570b536b52890c0c5065a0335c2c5a657f3df3a70 +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round2.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..974fb00658dd1f3f7d98f349c045a6d3895a3d0d --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce75e43e9f2811981958e1bfb6f23aafe9e40959bf394b18b259419199b59aaf +size 778341034 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round20.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..31bcbb9828e67871444fffbe3d05a49d552301cb --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:898f26e1ce6bf64bd290f3f97c64908935049056a5b237d8ecc5f8e7cb7a607f +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round5.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..fe8eae76d8e1b61cc6f010861887bc836aeef4c7 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:793a9fc89c3e9571c5b1183fefff7cd48ef6a6e8bebd1b7dc307ac9b2fe45557 +size 778341034 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round7.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..87e46bd0d8e314a09f340ed50d1d5e72a3cc1d65 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6987cfb58cd130e8a16f1dafada44fe85318bc23ef571b334abbaa00f546a3e5 +size 778341034 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_trainer_state.json b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2e38bc94575b8b09bba6442e67706ca15fb92fec --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/7_trainer_state.json @@ -0,0 +1,217 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "grad_norm": 3.306112051010132, + "learning_rate": 2e-05, + "loss": 0.2753, + "step": 2 + }, + { + "epoch": 0.16, + "grad_norm": 1.2943041324615479, + "learning_rate": 2e-05, + "loss": 0.1407, + "step": 4 + }, + { + "epoch": 0.24, + "grad_norm": 0.34055498242378235, + "learning_rate": 2e-05, + "loss": 0.0929, + "step": 6 + }, + { + "epoch": 0.32, + "grad_norm": 3.733074426651001, + "learning_rate": 2e-05, + "loss": 0.1501, + "step": 8 + }, + { + "epoch": 0.4, + "grad_norm": 8.425883293151855, + "learning_rate": 2e-05, + "loss": 0.5905, + "step": 10 + }, + { + "epoch": 0.48, + "grad_norm": 3.776273488998413, + "learning_rate": 2e-05, + "loss": 0.2494, + "step": 12 + }, + { + "epoch": 0.56, + "grad_norm": 5.3035736083984375, + "learning_rate": 2e-05, + "loss": 0.1907, + "step": 14 + }, + { + "epoch": 0.64, + "grad_norm": 2.9549784660339355, + "learning_rate": 2e-05, + "loss": 0.1299, + "step": 16 + }, + { + "epoch": 0.72, + "grad_norm": 3.8504064083099365, + "learning_rate": 2e-05, + "loss": 0.1885, + "step": 18 + }, + { + "epoch": 0.8, + "grad_norm": 8.078208923339844, + "learning_rate": 2e-05, + "loss": 0.6322, + "step": 20 + }, + { + "epoch": 0.88, + "grad_norm": 3.761375665664673, + "learning_rate": 2e-05, + "loss": 0.1396, + "step": 22 + }, + { + "epoch": 0.96, + "grad_norm": 7.369710922241211, + "learning_rate": 2e-05, + "loss": 0.7341, + "step": 24 + }, + { + "epoch": 1.04, + "grad_norm": 2.9738850593566895, + "learning_rate": 2e-05, + "loss": 0.7377, + "step": 26 + }, + { + "epoch": 1.12, + "grad_norm": 7.1790337562561035, + "learning_rate": 2e-05, + "loss": 1.9319, + "step": 28 + }, + { + "epoch": 1.2, + "grad_norm": 5.975342273712158, + "learning_rate": 2e-05, + "loss": 0.48, + "step": 30 + }, + { + "epoch": 1.28, + "grad_norm": 5.141521453857422, + "learning_rate": 2e-05, + "loss": 0.3765, + "step": 32 + }, + { + "epoch": 1.36, + "grad_norm": 7.111236572265625, + "learning_rate": 2e-05, + "loss": 0.8246, + "step": 34 + }, + { + "epoch": 1.44, + "grad_norm": 3.848470687866211, + "learning_rate": 2e-05, + "loss": 0.446, + "step": 36 + }, + { + "epoch": 1.52, + "grad_norm": 0.8427064418792725, + "learning_rate": 2e-05, + "loss": 0.0956, + "step": 38 + }, + { + "epoch": 1.6, + "grad_norm": 0.8990786671638489, + "learning_rate": 2e-05, + "loss": 0.1468, + "step": 40 + }, + { + "epoch": 1.68, + "grad_norm": 3.298734426498413, + "learning_rate": 2e-05, + "loss": 0.2454, + "step": 42 + }, + { + "epoch": 1.76, + "grad_norm": 1.7264102697372437, + "learning_rate": 2e-05, + "loss": 0.2139, + "step": 44 + }, + { + "epoch": 1.84, + "grad_norm": 3.7704410552978516, + "learning_rate": 2e-05, + "loss": 0.4352, + "step": 46 + }, + { + "epoch": 1.92, + "grad_norm": 4.57963752746582, + "learning_rate": 2e-05, + "loss": 0.3364, + "step": 48 + }, + { + "epoch": 2.0, + "grad_norm": 4.588264465332031, + "learning_rate": 2e-05, + "loss": 0.576, + "step": 50 + }, + { + "epoch": 2.0, + "step": 50, + "total_flos": 5463964917956608.0, + "train_loss": 0.41438937187194824, + "train_runtime": 190.2263, + "train_samples_per_second": 1.051, + "train_steps_per_second": 0.263 + } + ], + "logging_steps": 2, + "max_steps": 50, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5463964917956608.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round10.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round10.pth new file mode 100644 index 0000000000000000000000000000000000000000..46d0a77f1ada368576bbd96758cf5656e7a11b54 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d7767c8c274741f7d0c462f93001b475e627570c34eb7e701e38e5bfdaa2906 +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round12.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round12.pth new file mode 100644 index 0000000000000000000000000000000000000000..310242be9e12792c968e42bebe1b0bc4a9eb4e5f --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ca80e80298f85059991ed0f9935ccb0bb2ca0fef83bfca9343535f96d343ae3 +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round15.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round15.pth new file mode 100644 index 0000000000000000000000000000000000000000..3ea8224a8e8da6942aeb6df63aa4613e49f3be01 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:749eb9d893916e4e7e46e8224a5db378375de14aaf9e9fed53dd0317972d74cb +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round17.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round17.pth new file mode 100644 index 0000000000000000000000000000000000000000..e5426b3c0fa0b302a535912da9172c43478ac023 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0186ce23fd00a019ec954e19ee0f187ddb986d18cb3aec177f83ad66f16ce69c +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round2.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..a6b9dd50f11ecfbab968e58a390b1ef35171dfe9 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:291b6beaf9da202553ca241aa56163c32fc77cef7fa0986ed785c645929d7ff8 +size 778341034 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round20.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round20.pth new file mode 100644 index 0000000000000000000000000000000000000000..ed1cfdd32e2cde96b53522cc5e5f3057940a8414 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b162a4fdcf67c6257bd05878995fa9d7ac83b285cecad84469ae07145ec6fab4 +size 778341886 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round5.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round5.pth new file mode 100644 index 0000000000000000000000000000000000000000..06275507d87ae7c01de7d1bb78f4a3f77b42b563 --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d32ad98a183604516199b668174621bffe8a33bb1440db17547a378667ee97ab +size 778341034 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round7.pth b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round7.pth new file mode 100644 index 0000000000000000000000000000000000000000..8f2f8a3b49ec1993c0016c22724b1ee26df0216b --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_client_model_round7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ccaf866ae919b1d8feef7f9fe3a8980bbc0f5d2f70116378a088e8ef1db3804 +size 778341034 diff --git a/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_trainer_state.json b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0761517ea3e3fc550f0c570984f414f5558cc64f --- /dev/null +++ b/client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/8_trainer_state.json @@ -0,0 +1,217 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "grad_norm": 5.241274356842041, + "learning_rate": 2e-05, + "loss": 0.6369, + "step": 2 + }, + { + "epoch": 0.16, + "grad_norm": 2.728149890899658, + "learning_rate": 2e-05, + "loss": 0.0976, + "step": 4 + }, + { + "epoch": 0.24, + "grad_norm": 5.103238105773926, + "learning_rate": 2e-05, + "loss": 0.2714, + "step": 6 + }, + { + "epoch": 0.32, + "grad_norm": 0.863260805606842, + "learning_rate": 2e-05, + "loss": 0.1161, + "step": 8 + }, + { + "epoch": 0.4, + "grad_norm": 1.4046486616134644, + "learning_rate": 2e-05, + "loss": 0.0504, + "step": 10 + }, + { + "epoch": 0.48, + "grad_norm": 9.768682479858398, + "learning_rate": 2e-05, + "loss": 0.5629, + "step": 12 + }, + { + "epoch": 0.56, + "grad_norm": 0.20039866864681244, + "learning_rate": 2e-05, + "loss": 0.0453, + "step": 14 + }, + { + "epoch": 0.64, + "grad_norm": 5.7377142906188965, + "learning_rate": 2e-05, + "loss": 0.3366, + "step": 16 + }, + { + "epoch": 0.72, + "grad_norm": 2.605027437210083, + "learning_rate": 2e-05, + "loss": 0.074, + "step": 18 + }, + { + "epoch": 0.8, + "grad_norm": 1.2984591722488403, + "learning_rate": 2e-05, + "loss": 0.0532, + "step": 20 + }, + { + "epoch": 0.88, + "grad_norm": 0.8835803270339966, + "learning_rate": 2e-05, + "loss": 0.0536, + "step": 22 + }, + { + "epoch": 0.96, + "grad_norm": 7.737273216247559, + "learning_rate": 2e-05, + "loss": 0.1842, + "step": 24 + }, + { + "epoch": 1.04, + "grad_norm": 9.503424644470215, + "learning_rate": 2e-05, + "loss": 0.3994, + "step": 26 + }, + { + "epoch": 1.12, + "grad_norm": 9.212540626525879, + "learning_rate": 2e-05, + "loss": 0.9938, + "step": 28 + }, + { + "epoch": 1.2, + "grad_norm": 5.0764007568359375, + "learning_rate": 2e-05, + "loss": 1.0308, + "step": 30 + }, + { + "epoch": 1.28, + "grad_norm": 0.7189428806304932, + "learning_rate": 2e-05, + "loss": 0.2345, + "step": 32 + }, + { + "epoch": 1.36, + "grad_norm": 5.756411075592041, + "learning_rate": 2e-05, + "loss": 0.5859, + "step": 34 + }, + { + "epoch": 1.44, + "grad_norm": 4.120758056640625, + "learning_rate": 2e-05, + "loss": 0.379, + "step": 36 + }, + { + "epoch": 1.52, + "grad_norm": 6.6215410232543945, + "learning_rate": 2e-05, + "loss": 0.4861, + "step": 38 + }, + { + "epoch": 1.6, + "grad_norm": 7.712436199188232, + "learning_rate": 2e-05, + "loss": 0.2618, + "step": 40 + }, + { + "epoch": 1.68, + "grad_norm": 0.12126444280147552, + "learning_rate": 2e-05, + "loss": 0.1067, + "step": 42 + }, + { + "epoch": 1.76, + "grad_norm": 0.47795167565345764, + "learning_rate": 2e-05, + "loss": 0.0805, + "step": 44 + }, + { + "epoch": 1.84, + "grad_norm": 7.313605785369873, + "learning_rate": 2e-05, + "loss": 0.6171, + "step": 46 + }, + { + "epoch": 1.92, + "grad_norm": 0.5465237498283386, + "learning_rate": 2e-05, + "loss": 0.1703, + "step": 48 + }, + { + "epoch": 2.0, + "grad_norm": 5.403122425079346, + "learning_rate": 2e-05, + "loss": 0.4354, + "step": 50 + }, + { + "epoch": 2.0, + "step": 50, + "total_flos": 5307996569600000.0, + "train_loss": 0.33054131269454956, + "train_runtime": 189.6285, + "train_samples_per_second": 1.055, + "train_steps_per_second": 0.264 + } + ], + "logging_steps": 2, + "max_steps": 50, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5307996569600000.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}