Upload folder using huggingface_hub
Browse files- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round10.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round12.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round15.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round17.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round2.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round20.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round5.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round7.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_trainer_state.json +105 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round10.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round12.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round15.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round17.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round2.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round20.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round5.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round7.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_trainer_state.json +105 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round10.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round12.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round15.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round17.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round2.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round20.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round5.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round7.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_trainer_state.json +105 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round10.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round12.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round15.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round17.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round2.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round20.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round5.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round7.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_trainer_state.json +105 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round10.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round12.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round15.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round17.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round2.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round20.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round5.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round7.pth +3 -0
- client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_trainer_state.json +105 -0
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:28aa2dbdd5a2cb91ffe60a803036b136cfb2350aaa298cf4c8096fc89cf03c4c
|
| 3 |
+
size 84049390
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:24207413f2d25bb96a0a19994c66252ce6ca2e7865e0f8aa6f51689816c0736a
|
| 3 |
+
size 84049390
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5faccd76dc8cec5cf4da79a6858c65760d07ce2f63a3e1bffc5a064940154e29
|
| 3 |
+
size 84049390
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f8230eae84fdb3c24bdfdf7757f52935507883928a0dfec41fe75adc3cf7cf9
|
| 3 |
+
size 84049390
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4d5bf4132b74c1164b5c45e325217a0385e48ca8e1799b23ca60fd00202be026
|
| 3 |
+
size 84048874
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:77a63fbd57196c9ebaf4163a4948bab745e70f5dc4598c47fe82d09eff88034f
|
| 3 |
+
size 84049390
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5aab60b315de5d6935f2b1ce74970522ec1ce02b9b2bd769ea62fc0c5068f917
|
| 3 |
+
size 84048874
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e4903a6b2fb85e3cf85929160f98ac782dee0f74a92f97b2c5f6fad3cb90237
|
| 3 |
+
size 84048874
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_trainer_state.json
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 18,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.1111111111111111,
|
| 13 |
+
"grad_norm": 0.30647119879722595,
|
| 14 |
+
"learning_rate": 0.0003,
|
| 15 |
+
"loss": 0.5778,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.2222222222222222,
|
| 20 |
+
"grad_norm": 0.2833655774593353,
|
| 21 |
+
"learning_rate": 0.0003,
|
| 22 |
+
"loss": 0.4192,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.3333333333333333,
|
| 27 |
+
"grad_norm": 0.3433806598186493,
|
| 28 |
+
"learning_rate": 0.0003,
|
| 29 |
+
"loss": 0.734,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.4444444444444444,
|
| 34 |
+
"grad_norm": 0.4032728970050812,
|
| 35 |
+
"learning_rate": 0.0003,
|
| 36 |
+
"loss": 0.5182,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.5555555555555556,
|
| 41 |
+
"grad_norm": 0.268047958612442,
|
| 42 |
+
"learning_rate": 0.0003,
|
| 43 |
+
"loss": 0.4194,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.6666666666666666,
|
| 48 |
+
"grad_norm": 0.2353041023015976,
|
| 49 |
+
"learning_rate": 0.0003,
|
| 50 |
+
"loss": 0.5757,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.7777777777777778,
|
| 55 |
+
"grad_norm": 0.23823396861553192,
|
| 56 |
+
"learning_rate": 0.0003,
|
| 57 |
+
"loss": 0.5309,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.8888888888888888,
|
| 62 |
+
"grad_norm": 0.27969422936439514,
|
| 63 |
+
"learning_rate": 0.0003,
|
| 64 |
+
"loss": 0.7769,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 1.0,
|
| 69 |
+
"grad_norm": 0.2893766760826111,
|
| 70 |
+
"learning_rate": 0.0003,
|
| 71 |
+
"loss": 0.5685,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 1.0,
|
| 76 |
+
"step": 18,
|
| 77 |
+
"total_flos": 693210355597312.0,
|
| 78 |
+
"train_loss": 0.5689557558960385,
|
| 79 |
+
"train_runtime": 32.053,
|
| 80 |
+
"train_samples_per_second": 2.246,
|
| 81 |
+
"train_steps_per_second": 0.562
|
| 82 |
+
}
|
| 83 |
+
],
|
| 84 |
+
"logging_steps": 2,
|
| 85 |
+
"max_steps": 18,
|
| 86 |
+
"num_input_tokens_seen": 0,
|
| 87 |
+
"num_train_epochs": 1,
|
| 88 |
+
"save_steps": 500,
|
| 89 |
+
"stateful_callbacks": {
|
| 90 |
+
"TrainerControl": {
|
| 91 |
+
"args": {
|
| 92 |
+
"should_epoch_stop": false,
|
| 93 |
+
"should_evaluate": false,
|
| 94 |
+
"should_log": false,
|
| 95 |
+
"should_save": false,
|
| 96 |
+
"should_training_stop": false
|
| 97 |
+
},
|
| 98 |
+
"attributes": {}
|
| 99 |
+
}
|
| 100 |
+
},
|
| 101 |
+
"total_flos": 693210355597312.0,
|
| 102 |
+
"train_batch_size": 1,
|
| 103 |
+
"trial_name": null,
|
| 104 |
+
"trial_params": null
|
| 105 |
+
}
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1469cf98479338d6d40e939d4282a6fe891a79ff356e91f53f8565a40b080d91
|
| 3 |
+
size 48770134
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68a82fcd8c6f4f2d06ae73f7af030292ba08d30f76198103823cf3464af0dd82
|
| 3 |
+
size 48770134
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e274aa30982f5afff4c50466a602a6d4bc7bc5cb1bc85e3a26013755ba70f3f
|
| 3 |
+
size 48770134
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d9b000d435981f54d44f02cf09369f7ceb9b1f1e23a001b2498da0c27ef77d03
|
| 3 |
+
size 48770134
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:53f4e414ed40e2e346a1479bc2c740e21b11e984af97779bf8d5bde9186cb1de
|
| 3 |
+
size 48769674
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:10fc1efcd61e613e2c355bbb1139042d5130b1e29bd1926150653073be82ad3f
|
| 3 |
+
size 48770134
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac8b6161beb65659bf582e9899cfd7df2b9d7c325d027c3c0ae3488960cc28e7
|
| 3 |
+
size 48769674
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05a9bd91ee01aa0de2783d4410bbec5c7f0b001b2ce1e32c70d5073ab5b66307
|
| 3 |
+
size 48769674
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_trainer_state.json
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 18,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.1111111111111111,
|
| 13 |
+
"grad_norm": 0.47776493430137634,
|
| 14 |
+
"learning_rate": 0.0003,
|
| 15 |
+
"loss": 1.2538,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.2222222222222222,
|
| 20 |
+
"grad_norm": 0.5145998597145081,
|
| 21 |
+
"learning_rate": 0.0003,
|
| 22 |
+
"loss": 1.9262,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.3333333333333333,
|
| 27 |
+
"grad_norm": 0.8327056169509888,
|
| 28 |
+
"learning_rate": 0.0003,
|
| 29 |
+
"loss": 1.5606,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.4444444444444444,
|
| 34 |
+
"grad_norm": 0.2911861538887024,
|
| 35 |
+
"learning_rate": 0.0003,
|
| 36 |
+
"loss": 2.2479,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.5555555555555556,
|
| 41 |
+
"grad_norm": 0.4601462185382843,
|
| 42 |
+
"learning_rate": 0.0003,
|
| 43 |
+
"loss": 2.0639,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.6666666666666666,
|
| 48 |
+
"grad_norm": 0.35323259234428406,
|
| 49 |
+
"learning_rate": 0.0003,
|
| 50 |
+
"loss": 2.2254,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.7777777777777778,
|
| 55 |
+
"grad_norm": 0.42169633507728577,
|
| 56 |
+
"learning_rate": 0.0003,
|
| 57 |
+
"loss": 1.4212,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.8888888888888888,
|
| 62 |
+
"grad_norm": 0.3550553321838379,
|
| 63 |
+
"learning_rate": 0.0003,
|
| 64 |
+
"loss": 1.8081,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 1.0,
|
| 69 |
+
"grad_norm": 0.450255811214447,
|
| 70 |
+
"learning_rate": 0.0003,
|
| 71 |
+
"loss": 1.9508,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 1.0,
|
| 76 |
+
"step": 18,
|
| 77 |
+
"total_flos": 180852809007104.0,
|
| 78 |
+
"train_loss": 1.8286501566569011,
|
| 79 |
+
"train_runtime": 27.6928,
|
| 80 |
+
"train_samples_per_second": 2.6,
|
| 81 |
+
"train_steps_per_second": 0.65
|
| 82 |
+
}
|
| 83 |
+
],
|
| 84 |
+
"logging_steps": 2,
|
| 85 |
+
"max_steps": 18,
|
| 86 |
+
"num_input_tokens_seen": 0,
|
| 87 |
+
"num_train_epochs": 1,
|
| 88 |
+
"save_steps": 500,
|
| 89 |
+
"stateful_callbacks": {
|
| 90 |
+
"TrainerControl": {
|
| 91 |
+
"args": {
|
| 92 |
+
"should_epoch_stop": false,
|
| 93 |
+
"should_evaluate": false,
|
| 94 |
+
"should_log": false,
|
| 95 |
+
"should_save": false,
|
| 96 |
+
"should_training_stop": false
|
| 97 |
+
},
|
| 98 |
+
"attributes": {}
|
| 99 |
+
}
|
| 100 |
+
},
|
| 101 |
+
"total_flos": 180852809007104.0,
|
| 102 |
+
"train_batch_size": 1,
|
| 103 |
+
"trial_name": null,
|
| 104 |
+
"trial_params": null
|
| 105 |
+
}
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66dd298a08524504e02129db67bcdf680f3468201fe461af547844c188c1e563
|
| 3 |
+
size 48770134
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2385fb8b486ab6f0af28431e5ca6e739a00bfb1acfb02fa399507ea196b3eb27
|
| 3 |
+
size 48770134
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:afb7c1ebf07fb66c773d46976bbff4568a7eab6dcadaf50deffc5f566e16c084
|
| 3 |
+
size 48770134
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c73ae37a26858266db1c7efd54974096738f09578f2f233eef4f87667749cff3
|
| 3 |
+
size 48770134
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:22962645eeb6477fdeab7e9c90d1a01912c27cb91c2f4f85960aaec53107233f
|
| 3 |
+
size 48769674
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1db9e95af3892445f63839188aac1fcb7a9a2b0b689f0fcff7184f93ac01f09d
|
| 3 |
+
size 48770134
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb8167425439920ae6e7abdae57cb3a0b43160b01cf8814b23627611f0f2a727
|
| 3 |
+
size 48769674
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:414f7ae19345c4e11b7d69a5de17855ffcfe14291ed45d28c01dc6ed7867859f
|
| 3 |
+
size 48769674
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_trainer_state.json
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 18,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.1111111111111111,
|
| 13 |
+
"grad_norm": 0.3366261124610901,
|
| 14 |
+
"learning_rate": 0.0003,
|
| 15 |
+
"loss": 1.687,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.2222222222222222,
|
| 20 |
+
"grad_norm": 0.33821889758110046,
|
| 21 |
+
"learning_rate": 0.0003,
|
| 22 |
+
"loss": 1.2887,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.3333333333333333,
|
| 27 |
+
"grad_norm": 0.2697903513908386,
|
| 28 |
+
"learning_rate": 0.0003,
|
| 29 |
+
"loss": 1.7096,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.4444444444444444,
|
| 34 |
+
"grad_norm": 0.3804328143596649,
|
| 35 |
+
"learning_rate": 0.0003,
|
| 36 |
+
"loss": 0.9446,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.5555555555555556,
|
| 41 |
+
"grad_norm": 0.36493733525276184,
|
| 42 |
+
"learning_rate": 0.0003,
|
| 43 |
+
"loss": 1.6892,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.6666666666666666,
|
| 48 |
+
"grad_norm": 0.24683411419391632,
|
| 49 |
+
"learning_rate": 0.0003,
|
| 50 |
+
"loss": 2.2283,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.7777777777777778,
|
| 55 |
+
"grad_norm": 0.35917943716049194,
|
| 56 |
+
"learning_rate": 0.0003,
|
| 57 |
+
"loss": 1.7543,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.8888888888888888,
|
| 62 |
+
"grad_norm": 0.2083004117012024,
|
| 63 |
+
"learning_rate": 0.0003,
|
| 64 |
+
"loss": 1.2117,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 1.0,
|
| 69 |
+
"grad_norm": 0.2108473777770996,
|
| 70 |
+
"learning_rate": 0.0003,
|
| 71 |
+
"loss": 1.6034,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 1.0,
|
| 76 |
+
"step": 18,
|
| 77 |
+
"total_flos": 410714315423744.0,
|
| 78 |
+
"train_loss": 1.5685436924298604,
|
| 79 |
+
"train_runtime": 28.6802,
|
| 80 |
+
"train_samples_per_second": 2.51,
|
| 81 |
+
"train_steps_per_second": 0.628
|
| 82 |
+
}
|
| 83 |
+
],
|
| 84 |
+
"logging_steps": 2,
|
| 85 |
+
"max_steps": 18,
|
| 86 |
+
"num_input_tokens_seen": 0,
|
| 87 |
+
"num_train_epochs": 1,
|
| 88 |
+
"save_steps": 500,
|
| 89 |
+
"stateful_callbacks": {
|
| 90 |
+
"TrainerControl": {
|
| 91 |
+
"args": {
|
| 92 |
+
"should_epoch_stop": false,
|
| 93 |
+
"should_evaluate": false,
|
| 94 |
+
"should_log": false,
|
| 95 |
+
"should_save": false,
|
| 96 |
+
"should_training_stop": false
|
| 97 |
+
},
|
| 98 |
+
"attributes": {}
|
| 99 |
+
}
|
| 100 |
+
},
|
| 101 |
+
"total_flos": 410714315423744.0,
|
| 102 |
+
"train_batch_size": 1,
|
| 103 |
+
"trial_name": null,
|
| 104 |
+
"trial_params": null
|
| 105 |
+
}
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:15a1e1b6d9534f9e15dc78b32f0307174cd3632101966d6aea2fdbcaeec98f46
|
| 3 |
+
size 48770134
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:387c90acff387fb45312d33cfad4d8de380bf335db59d7d431f87a0ae383899b
|
| 3 |
+
size 48770134
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b94f6b99d0d17d27e23dce721c59e24fe5afb8536028d0cc6164babb089ec06
|
| 3 |
+
size 48770134
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:95493677d6d8e7d0d1d9e2e1517b518fa814449aafc3f656945f5bf34c36027d
|
| 3 |
+
size 48770134
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3b02cce2c79fad6b5ca18a6f0e408f52a5dacd426d817772a06cfa569301b193
|
| 3 |
+
size 48769674
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa8c4537d5760552746414af4a9543ffce91dcdf4c122fb4a56163c497ff31bb
|
| 3 |
+
size 48770134
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e19696d81a063f589d2d71303c78242444c8c242b6acbe06d3354db5a70ac8e2
|
| 3 |
+
size 48769674
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:431fd7e078dd540be8607aa45353d4d29ab4c0960d44d7f1814f687ce6629c1e
|
| 3 |
+
size 48769674
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_trainer_state.json
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 18,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.1111111111111111,
|
| 13 |
+
"grad_norm": 0.24904797971248627,
|
| 14 |
+
"learning_rate": 0.0003,
|
| 15 |
+
"loss": 0.7065,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.2222222222222222,
|
| 20 |
+
"grad_norm": 0.16937297582626343,
|
| 21 |
+
"learning_rate": 0.0003,
|
| 22 |
+
"loss": 0.7515,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.3333333333333333,
|
| 27 |
+
"grad_norm": 0.18721012771129608,
|
| 28 |
+
"learning_rate": 0.0003,
|
| 29 |
+
"loss": 0.7237,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.4444444444444444,
|
| 34 |
+
"grad_norm": 0.16937831044197083,
|
| 35 |
+
"learning_rate": 0.0003,
|
| 36 |
+
"loss": 0.5668,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.5555555555555556,
|
| 41 |
+
"grad_norm": 0.14968672394752502,
|
| 42 |
+
"learning_rate": 0.0003,
|
| 43 |
+
"loss": 0.7673,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.6666666666666666,
|
| 48 |
+
"grad_norm": 0.15415261685848236,
|
| 49 |
+
"learning_rate": 0.0003,
|
| 50 |
+
"loss": 0.7073,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.7777777777777778,
|
| 55 |
+
"grad_norm": 0.19837412238121033,
|
| 56 |
+
"learning_rate": 0.0003,
|
| 57 |
+
"loss": 0.742,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.8888888888888888,
|
| 62 |
+
"grad_norm": 0.1668744832277298,
|
| 63 |
+
"learning_rate": 0.0003,
|
| 64 |
+
"loss": 0.9476,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 1.0,
|
| 69 |
+
"grad_norm": 0.19677592813968658,
|
| 70 |
+
"learning_rate": 0.0003,
|
| 71 |
+
"loss": 0.6554,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 1.0,
|
| 76 |
+
"step": 18,
|
| 77 |
+
"total_flos": 1003185539907584.0,
|
| 78 |
+
"train_loss": 0.7297993501027426,
|
| 79 |
+
"train_runtime": 34.7761,
|
| 80 |
+
"train_samples_per_second": 2.07,
|
| 81 |
+
"train_steps_per_second": 0.518
|
| 82 |
+
}
|
| 83 |
+
],
|
| 84 |
+
"logging_steps": 2,
|
| 85 |
+
"max_steps": 18,
|
| 86 |
+
"num_input_tokens_seen": 0,
|
| 87 |
+
"num_train_epochs": 1,
|
| 88 |
+
"save_steps": 500,
|
| 89 |
+
"stateful_callbacks": {
|
| 90 |
+
"TrainerControl": {
|
| 91 |
+
"args": {
|
| 92 |
+
"should_epoch_stop": false,
|
| 93 |
+
"should_evaluate": false,
|
| 94 |
+
"should_log": false,
|
| 95 |
+
"should_save": false,
|
| 96 |
+
"should_training_stop": false
|
| 97 |
+
},
|
| 98 |
+
"attributes": {}
|
| 99 |
+
}
|
| 100 |
+
},
|
| 101 |
+
"total_flos": 1003185539907584.0,
|
| 102 |
+
"train_batch_size": 1,
|
| 103 |
+
"trial_name": null,
|
| 104 |
+
"trial_params": null
|
| 105 |
+
}
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8cbd65564608dc1f14bc3b1fcdd3d9952a602c50c5a9bdda73115cd5311c0a60
|
| 3 |
+
size 84049390
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:14fd00dd7efd3f8b7ecfbc586bb405a9c0cb01f10f1ec332676b0b00c2872352
|
| 3 |
+
size 84049390
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a0338d2f8b2be6cb0c42546ce0eee8ffccb7f3389430b4103fb4bbce9047342
|
| 3 |
+
size 84049390
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a027e1c35d899c5904cdf31bfbebc131f0bcb852b0e5c77f2c78f41cdd3b4885
|
| 3 |
+
size 84049390
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd1e0e9d11f82bdafd7eaec41773895d888f61ac8fc081847ae1f018dfb48a28
|
| 3 |
+
size 84048874
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ba1fcd628fdd6297b3a11a366310d476e4db1191f1a73ea9e1f290fce8a4761e
|
| 3 |
+
size 84049390
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:930cd38135f872e0e2c9a951a0390210db6d7b315908173cee6a88b9dcd07f0f
|
| 3 |
+
size 84048874
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0cd2af3c7beef270aeb226fd225302e8a600918aca6b0a976c2f9b18b40c7f41
|
| 3 |
+
size 84048874
|
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_trainer_state.json
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 18,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.1111111111111111,
|
| 13 |
+
"grad_norm": 0.1849832981824875,
|
| 14 |
+
"learning_rate": 0.0003,
|
| 15 |
+
"loss": 0.852,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.2222222222222222,
|
| 20 |
+
"grad_norm": 0.21231615543365479,
|
| 21 |
+
"learning_rate": 0.0003,
|
| 22 |
+
"loss": 1.1432,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.3333333333333333,
|
| 27 |
+
"grad_norm": 0.15320207178592682,
|
| 28 |
+
"learning_rate": 0.0003,
|
| 29 |
+
"loss": 0.6513,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.4444444444444444,
|
| 34 |
+
"grad_norm": 0.17886720597743988,
|
| 35 |
+
"learning_rate": 0.0003,
|
| 36 |
+
"loss": 0.7447,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.5555555555555556,
|
| 41 |
+
"grad_norm": 0.2148554027080536,
|
| 42 |
+
"learning_rate": 0.0003,
|
| 43 |
+
"loss": 0.9758,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.6666666666666666,
|
| 48 |
+
"grad_norm": 0.1558452695608139,
|
| 49 |
+
"learning_rate": 0.0003,
|
| 50 |
+
"loss": 0.7666,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.7777777777777778,
|
| 55 |
+
"grad_norm": 0.3737614154815674,
|
| 56 |
+
"learning_rate": 0.0003,
|
| 57 |
+
"loss": 0.7469,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.8888888888888888,
|
| 62 |
+
"grad_norm": 0.20555411279201508,
|
| 63 |
+
"learning_rate": 0.0003,
|
| 64 |
+
"loss": 1.0352,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 1.0,
|
| 69 |
+
"grad_norm": 0.16557130217552185,
|
| 70 |
+
"learning_rate": 0.0003,
|
| 71 |
+
"loss": 0.7228,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 1.0,
|
| 76 |
+
"step": 18,
|
| 77 |
+
"total_flos": 2635458219606016.0,
|
| 78 |
+
"train_loss": 0.8487372001012167,
|
| 79 |
+
"train_runtime": 49.5256,
|
| 80 |
+
"train_samples_per_second": 1.454,
|
| 81 |
+
"train_steps_per_second": 0.363
|
| 82 |
+
}
|
| 83 |
+
],
|
| 84 |
+
"logging_steps": 2,
|
| 85 |
+
"max_steps": 18,
|
| 86 |
+
"num_input_tokens_seen": 0,
|
| 87 |
+
"num_train_epochs": 1,
|
| 88 |
+
"save_steps": 500,
|
| 89 |
+
"stateful_callbacks": {
|
| 90 |
+
"TrainerControl": {
|
| 91 |
+
"args": {
|
| 92 |
+
"should_epoch_stop": false,
|
| 93 |
+
"should_evaluate": false,
|
| 94 |
+
"should_log": false,
|
| 95 |
+
"should_save": false,
|
| 96 |
+
"should_training_stop": false
|
| 97 |
+
},
|
| 98 |
+
"attributes": {}
|
| 99 |
+
}
|
| 100 |
+
},
|
| 101 |
+
"total_flos": 2635458219606016.0,
|
| 102 |
+
"train_batch_size": 1,
|
| 103 |
+
"trial_name": null,
|
| 104 |
+
"trial_params": null
|
| 105 |
+
}
|