Upload folder using huggingface_hub
Browse files- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round10.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round12.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round15.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round17.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round2.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round20.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round5.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round7.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_trainer_state.json +147 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round10.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round12.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round15.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round17.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round2.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round20.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round5.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round7.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_trainer_state.json +147 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round10.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round12.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round15.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round17.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round2.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round20.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round5.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round7.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_trainer_state.json +147 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round10.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round12.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round15.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round17.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round2.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round20.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round5.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round7.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_trainer_state.json +147 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round10.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round12.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round15.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round17.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round2.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round20.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round5.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round7.pth +3 -0
- client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_trainer_state.json +147 -0
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7683bb4ea2a24dff622304d61701404a0684195b95f9250e01e70995b9f33b60
|
| 3 |
+
size 84049390
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b0a8f21d51f1d852500947c6f6cd4ac3c7bf8d2f78ca632e5da650501051a821
|
| 3 |
+
size 84049390
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:efdf8d36d0c485c31fabbec778c518ab69e13effb160d8b75bb54118131f4672
|
| 3 |
+
size 84049390
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ad8f4ea3a7bdefd00087f485fc726583cd3cbd85b917881387e5ea2b95ab34a8
|
| 3 |
+
size 84049390
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b9d462b48368e9cad91c2ccf4c5c05c7752edba6bf41e12f73d3f1df82bed250
|
| 3 |
+
size 84048874
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b61eac975c6482170877e9be8e10a3dda12ff6115f7e0a496a95fb736de87791
|
| 3 |
+
size 84049390
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4c83ade3092b38a35d67cde82a2166da52dae0f76f25a41889628aa13c76631
|
| 3 |
+
size 84048874
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fe15bd6d2f28b6947c6a677d0cf05b69240312e24e89c89412d82c828f62ee00
|
| 3 |
+
size 84048874
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/0_trainer_state.json
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 30,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.06666666666666667,
|
| 13 |
+
"grad_norm": 0.24416396021842957,
|
| 14 |
+
"learning_rate": 0.0003,
|
| 15 |
+
"loss": 0.4589,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.13333333333333333,
|
| 20 |
+
"grad_norm": 0.2861782908439636,
|
| 21 |
+
"learning_rate": 0.0003,
|
| 22 |
+
"loss": 0.5497,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.2,
|
| 27 |
+
"grad_norm": 0.28807416558265686,
|
| 28 |
+
"learning_rate": 0.0003,
|
| 29 |
+
"loss": 0.5351,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.26666666666666666,
|
| 34 |
+
"grad_norm": 0.3524809181690216,
|
| 35 |
+
"learning_rate": 0.0003,
|
| 36 |
+
"loss": 0.6347,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.3333333333333333,
|
| 41 |
+
"grad_norm": 0.21695467829704285,
|
| 42 |
+
"learning_rate": 0.0003,
|
| 43 |
+
"loss": 0.5836,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.4,
|
| 48 |
+
"grad_norm": 0.26244187355041504,
|
| 49 |
+
"learning_rate": 0.0003,
|
| 50 |
+
"loss": 0.7104,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.4666666666666667,
|
| 55 |
+
"grad_norm": 0.1886732578277588,
|
| 56 |
+
"learning_rate": 0.0003,
|
| 57 |
+
"loss": 0.3353,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.5333333333333333,
|
| 62 |
+
"grad_norm": 0.3749811351299286,
|
| 63 |
+
"learning_rate": 0.0003,
|
| 64 |
+
"loss": 0.6642,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.6,
|
| 69 |
+
"grad_norm": 0.3102177083492279,
|
| 70 |
+
"learning_rate": 0.0003,
|
| 71 |
+
"loss": 0.6082,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.6666666666666666,
|
| 76 |
+
"grad_norm": 0.3232249319553375,
|
| 77 |
+
"learning_rate": 0.0003,
|
| 78 |
+
"loss": 0.6706,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.7333333333333333,
|
| 83 |
+
"grad_norm": 0.3113941550254822,
|
| 84 |
+
"learning_rate": 0.0003,
|
| 85 |
+
"loss": 0.4988,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.8,
|
| 90 |
+
"grad_norm": 0.31279242038726807,
|
| 91 |
+
"learning_rate": 0.0003,
|
| 92 |
+
"loss": 0.6155,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.8666666666666667,
|
| 97 |
+
"grad_norm": 0.28257760405540466,
|
| 98 |
+
"learning_rate": 0.0003,
|
| 99 |
+
"loss": 0.529,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.9333333333333333,
|
| 104 |
+
"grad_norm": 0.29951271414756775,
|
| 105 |
+
"learning_rate": 0.0003,
|
| 106 |
+
"loss": 0.6252,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.0,
|
| 111 |
+
"grad_norm": 0.28102612495422363,
|
| 112 |
+
"learning_rate": 0.0003,
|
| 113 |
+
"loss": 0.6261,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 1.0,
|
| 118 |
+
"step": 30,
|
| 119 |
+
"total_flos": 1196511245369344.0,
|
| 120 |
+
"train_loss": 0.5763573904832204,
|
| 121 |
+
"train_runtime": 48.5145,
|
| 122 |
+
"train_samples_per_second": 2.473,
|
| 123 |
+
"train_steps_per_second": 0.618
|
| 124 |
+
}
|
| 125 |
+
],
|
| 126 |
+
"logging_steps": 2,
|
| 127 |
+
"max_steps": 30,
|
| 128 |
+
"num_input_tokens_seen": 0,
|
| 129 |
+
"num_train_epochs": 1,
|
| 130 |
+
"save_steps": 500,
|
| 131 |
+
"stateful_callbacks": {
|
| 132 |
+
"TrainerControl": {
|
| 133 |
+
"args": {
|
| 134 |
+
"should_epoch_stop": false,
|
| 135 |
+
"should_evaluate": false,
|
| 136 |
+
"should_log": false,
|
| 137 |
+
"should_save": false,
|
| 138 |
+
"should_training_stop": false
|
| 139 |
+
},
|
| 140 |
+
"attributes": {}
|
| 141 |
+
}
|
| 142 |
+
},
|
| 143 |
+
"total_flos": 1196511245369344.0,
|
| 144 |
+
"train_batch_size": 1,
|
| 145 |
+
"trial_name": null,
|
| 146 |
+
"trial_params": null
|
| 147 |
+
}
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:31500f229e4dede815dc1cc073235b4910c5640d3f9f99749e4f72e32ca3db44
|
| 3 |
+
size 48770134
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:94cf1f2ddc94b0d84d41afde7e24759d2211a03e0fab68e0542bb7cbae44d24c
|
| 3 |
+
size 48770134
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8231a23a9e887e103a1bea2b34e07dbdd7b32d3e9a3ce6cacb38f9c7d8a84352
|
| 3 |
+
size 48770134
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e4632dc38995fd9058cb3bb3a74a7a3f0acdd96f2a1da229c53116be8290d5f9
|
| 3 |
+
size 48770134
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0374a33829e21756f1c74170b81a6df936c7af2f9da602333c5674eac242a81
|
| 3 |
+
size 48769674
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ce872529f7431f71b64e45c64eeb5e1968e51ef863244a28fdd348e9286a1f8
|
| 3 |
+
size 48770134
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:648dc41a6d4e798c5adf00f42548fcf41d160761046cc2999039fca354944806
|
| 3 |
+
size 48769674
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7e02abd240d532114b52e1ec2eb1b26449b496cf245448e44f8e802929895f22
|
| 3 |
+
size 48769674
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/1_trainer_state.json
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 30,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.06666666666666667,
|
| 13 |
+
"grad_norm": 0.9023004770278931,
|
| 14 |
+
"learning_rate": 0.0003,
|
| 15 |
+
"loss": 1.5571,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.13333333333333333,
|
| 20 |
+
"grad_norm": 0.5817632675170898,
|
| 21 |
+
"learning_rate": 0.0003,
|
| 22 |
+
"loss": 1.7767,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.2,
|
| 27 |
+
"grad_norm": 0.4354921579360962,
|
| 28 |
+
"learning_rate": 0.0003,
|
| 29 |
+
"loss": 1.731,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.26666666666666666,
|
| 34 |
+
"grad_norm": 0.3641677498817444,
|
| 35 |
+
"learning_rate": 0.0003,
|
| 36 |
+
"loss": 1.6872,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.3333333333333333,
|
| 41 |
+
"grad_norm": 0.28223559260368347,
|
| 42 |
+
"learning_rate": 0.0003,
|
| 43 |
+
"loss": 0.7443,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.4,
|
| 48 |
+
"grad_norm": 0.2681761384010315,
|
| 49 |
+
"learning_rate": 0.0003,
|
| 50 |
+
"loss": 1.2404,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.4666666666666667,
|
| 55 |
+
"grad_norm": 0.44366130232810974,
|
| 56 |
+
"learning_rate": 0.0003,
|
| 57 |
+
"loss": 1.9958,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.5333333333333333,
|
| 62 |
+
"grad_norm": 0.39765021204948425,
|
| 63 |
+
"learning_rate": 0.0003,
|
| 64 |
+
"loss": 1.4386,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.6,
|
| 69 |
+
"grad_norm": 0.6642311811447144,
|
| 70 |
+
"learning_rate": 0.0003,
|
| 71 |
+
"loss": 2.1369,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.6666666666666666,
|
| 76 |
+
"grad_norm": 0.5510743260383606,
|
| 77 |
+
"learning_rate": 0.0003,
|
| 78 |
+
"loss": 2.0597,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.7333333333333333,
|
| 83 |
+
"grad_norm": 0.45161959528923035,
|
| 84 |
+
"learning_rate": 0.0003,
|
| 85 |
+
"loss": 1.6513,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.8,
|
| 90 |
+
"grad_norm": 0.3321634531021118,
|
| 91 |
+
"learning_rate": 0.0003,
|
| 92 |
+
"loss": 1.605,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.8666666666666667,
|
| 97 |
+
"grad_norm": 0.6213057041168213,
|
| 98 |
+
"learning_rate": 0.0003,
|
| 99 |
+
"loss": 1.7325,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.9333333333333333,
|
| 104 |
+
"grad_norm": 0.43713563680648804,
|
| 105 |
+
"learning_rate": 0.0003,
|
| 106 |
+
"loss": 1.4591,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.0,
|
| 111 |
+
"grad_norm": 0.5952392816543579,
|
| 112 |
+
"learning_rate": 0.0003,
|
| 113 |
+
"loss": 2.0974,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 1.0,
|
| 118 |
+
"step": 30,
|
| 119 |
+
"total_flos": 305071842656256.0,
|
| 120 |
+
"train_loss": 1.6608658949534099,
|
| 121 |
+
"train_runtime": 42.0423,
|
| 122 |
+
"train_samples_per_second": 2.854,
|
| 123 |
+
"train_steps_per_second": 0.714
|
| 124 |
+
}
|
| 125 |
+
],
|
| 126 |
+
"logging_steps": 2,
|
| 127 |
+
"max_steps": 30,
|
| 128 |
+
"num_input_tokens_seen": 0,
|
| 129 |
+
"num_train_epochs": 1,
|
| 130 |
+
"save_steps": 500,
|
| 131 |
+
"stateful_callbacks": {
|
| 132 |
+
"TrainerControl": {
|
| 133 |
+
"args": {
|
| 134 |
+
"should_epoch_stop": false,
|
| 135 |
+
"should_evaluate": false,
|
| 136 |
+
"should_log": false,
|
| 137 |
+
"should_save": false,
|
| 138 |
+
"should_training_stop": false
|
| 139 |
+
},
|
| 140 |
+
"attributes": {}
|
| 141 |
+
}
|
| 142 |
+
},
|
| 143 |
+
"total_flos": 305071842656256.0,
|
| 144 |
+
"train_batch_size": 1,
|
| 145 |
+
"trial_name": null,
|
| 146 |
+
"trial_params": null
|
| 147 |
+
}
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:963fba097065a142c0541ce9526facbeae65ff78898ef02a1e2f0f723a862a06
|
| 3 |
+
size 48770134
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:863dee3962c380286ef4c7b450cb2510f759d0df1cee2bd79ba770ca703d6b5a
|
| 3 |
+
size 48770134
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92c6a552dd3b85e4e36855e303c5cb57a0388d4632a3acaa5302ca646fbda017
|
| 3 |
+
size 48770134
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ec4da88e12a570b8176e33d4e833ca9833197f98837cc27f9094df7bcc1d9f47
|
| 3 |
+
size 48770134
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e8c7085f793d1434518d9aed118ee8850b6a55de0a894e9edc95302a13baee13
|
| 3 |
+
size 48769674
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eacf61ef3b77cdb6df02a9c2cb07c1947dcd233aa080c00ee541a7eca6dccc6a
|
| 3 |
+
size 48770134
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a1628f05f6b832066328946babf7025e77f425acfea9ad0713a1499422297c91
|
| 3 |
+
size 48769674
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01b0b5bad83b2738ad513afc4ac7711f67c12fafcf1c1ed2cce2aaf637865825
|
| 3 |
+
size 48769674
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/2_trainer_state.json
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 30,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.06666666666666667,
|
| 13 |
+
"grad_norm": 0.3037028908729553,
|
| 14 |
+
"learning_rate": 0.0003,
|
| 15 |
+
"loss": 0.9116,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.13333333333333333,
|
| 20 |
+
"grad_norm": 0.3489552140235901,
|
| 21 |
+
"learning_rate": 0.0003,
|
| 22 |
+
"loss": 1.6459,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.2,
|
| 27 |
+
"grad_norm": 0.3942304253578186,
|
| 28 |
+
"learning_rate": 0.0003,
|
| 29 |
+
"loss": 1.0189,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.26666666666666666,
|
| 34 |
+
"grad_norm": 0.29892203211784363,
|
| 35 |
+
"learning_rate": 0.0003,
|
| 36 |
+
"loss": 1.8188,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.3333333333333333,
|
| 41 |
+
"grad_norm": 0.5498002171516418,
|
| 42 |
+
"learning_rate": 0.0003,
|
| 43 |
+
"loss": 1.3308,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.4,
|
| 48 |
+
"grad_norm": 0.42956435680389404,
|
| 49 |
+
"learning_rate": 0.0003,
|
| 50 |
+
"loss": 2.4234,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.4666666666666667,
|
| 55 |
+
"grad_norm": 0.29204803705215454,
|
| 56 |
+
"learning_rate": 0.0003,
|
| 57 |
+
"loss": 1.2476,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.5333333333333333,
|
| 62 |
+
"grad_norm": 0.45598259568214417,
|
| 63 |
+
"learning_rate": 0.0003,
|
| 64 |
+
"loss": 1.6462,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.6,
|
| 69 |
+
"grad_norm": 0.2771050035953522,
|
| 70 |
+
"learning_rate": 0.0003,
|
| 71 |
+
"loss": 0.8,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.6666666666666666,
|
| 76 |
+
"grad_norm": 0.23567508161067963,
|
| 77 |
+
"learning_rate": 0.0003,
|
| 78 |
+
"loss": 1.3243,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.7333333333333333,
|
| 83 |
+
"grad_norm": 0.4404710829257965,
|
| 84 |
+
"learning_rate": 0.0003,
|
| 85 |
+
"loss": 2.0618,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.8,
|
| 90 |
+
"grad_norm": 0.475818395614624,
|
| 91 |
+
"learning_rate": 0.0003,
|
| 92 |
+
"loss": 1.9837,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.8666666666666667,
|
| 97 |
+
"grad_norm": 0.3802916705608368,
|
| 98 |
+
"learning_rate": 0.0003,
|
| 99 |
+
"loss": 1.8485,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.9333333333333333,
|
| 104 |
+
"grad_norm": 0.43724003434181213,
|
| 105 |
+
"learning_rate": 0.0003,
|
| 106 |
+
"loss": 0.9082,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.0,
|
| 111 |
+
"grad_norm": 0.21405752003192902,
|
| 112 |
+
"learning_rate": 0.0003,
|
| 113 |
+
"loss": 1.7006,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 1.0,
|
| 118 |
+
"step": 30,
|
| 119 |
+
"total_flos": 580445060202496.0,
|
| 120 |
+
"train_loss": 1.5113671978314718,
|
| 121 |
+
"train_runtime": 42.5765,
|
| 122 |
+
"train_samples_per_second": 2.818,
|
| 123 |
+
"train_steps_per_second": 0.705
|
| 124 |
+
}
|
| 125 |
+
],
|
| 126 |
+
"logging_steps": 2,
|
| 127 |
+
"max_steps": 30,
|
| 128 |
+
"num_input_tokens_seen": 0,
|
| 129 |
+
"num_train_epochs": 1,
|
| 130 |
+
"save_steps": 500,
|
| 131 |
+
"stateful_callbacks": {
|
| 132 |
+
"TrainerControl": {
|
| 133 |
+
"args": {
|
| 134 |
+
"should_epoch_stop": false,
|
| 135 |
+
"should_evaluate": false,
|
| 136 |
+
"should_log": false,
|
| 137 |
+
"should_save": false,
|
| 138 |
+
"should_training_stop": false
|
| 139 |
+
},
|
| 140 |
+
"attributes": {}
|
| 141 |
+
}
|
| 142 |
+
},
|
| 143 |
+
"total_flos": 580445060202496.0,
|
| 144 |
+
"train_batch_size": 1,
|
| 145 |
+
"trial_name": null,
|
| 146 |
+
"trial_params": null
|
| 147 |
+
}
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a5f9ba92d85e161ddac50cbfa1376def41b8d564876d86da5bb0a73e5f5897e0
|
| 3 |
+
size 48770134
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a56c1e2ede2ef52e0ce5b80fe084164ea027a24dbe02b6ae7084ab4163e7f138
|
| 3 |
+
size 48770134
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:000cbfb5eb0df3a6772db4a5ba76965417e97dbcfd6f80c20db43fbc1cca7919
|
| 3 |
+
size 48770134
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:10851f6148ba1adf32ead3d827ee058c411fbcc5543de63f040822768e553348
|
| 3 |
+
size 48770134
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:21a5f9ae6d49abe626a408ff3516e35c481686287266a07609307499f1f3b923
|
| 3 |
+
size 48769674
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c2abfadebda687c435c856c4927a4b71c9168b9d47cc07b81c30c07f39f6053
|
| 3 |
+
size 48770134
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f19790a399e75267ef51f23d3c69e581779159e150dbc269ab6b30502c96556
|
| 3 |
+
size 48769674
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ec671844c9cd52814d46ea7a6c890dcb81f2f45a04c140372af16e38bbebdd9f
|
| 3 |
+
size 48769674
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/3_trainer_state.json
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 30,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.06666666666666667,
|
| 13 |
+
"grad_norm": 0.16276100277900696,
|
| 14 |
+
"learning_rate": 0.0003,
|
| 15 |
+
"loss": 0.833,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.13333333333333333,
|
| 20 |
+
"grad_norm": 0.251880019903183,
|
| 21 |
+
"learning_rate": 0.0003,
|
| 22 |
+
"loss": 0.883,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.2,
|
| 27 |
+
"grad_norm": 0.2255876660346985,
|
| 28 |
+
"learning_rate": 0.0003,
|
| 29 |
+
"loss": 0.6912,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.26666666666666666,
|
| 34 |
+
"grad_norm": 0.2852807641029358,
|
| 35 |
+
"learning_rate": 0.0003,
|
| 36 |
+
"loss": 0.9001,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.3333333333333333,
|
| 41 |
+
"grad_norm": 0.21784625947475433,
|
| 42 |
+
"learning_rate": 0.0003,
|
| 43 |
+
"loss": 0.8835,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.4,
|
| 48 |
+
"grad_norm": 0.19225150346755981,
|
| 49 |
+
"learning_rate": 0.0003,
|
| 50 |
+
"loss": 0.5988,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.4666666666666667,
|
| 55 |
+
"grad_norm": 0.14822231233119965,
|
| 56 |
+
"learning_rate": 0.0003,
|
| 57 |
+
"loss": 0.5905,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.5333333333333333,
|
| 62 |
+
"grad_norm": 0.1591639667749405,
|
| 63 |
+
"learning_rate": 0.0003,
|
| 64 |
+
"loss": 0.4959,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.6,
|
| 69 |
+
"grad_norm": 0.12191551923751831,
|
| 70 |
+
"learning_rate": 0.0003,
|
| 71 |
+
"loss": 0.6851,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.6666666666666666,
|
| 76 |
+
"grad_norm": 0.31107524037361145,
|
| 77 |
+
"learning_rate": 0.0003,
|
| 78 |
+
"loss": 0.6968,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.7333333333333333,
|
| 83 |
+
"grad_norm": 0.18976068496704102,
|
| 84 |
+
"learning_rate": 0.0003,
|
| 85 |
+
"loss": 0.9054,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.8,
|
| 90 |
+
"grad_norm": 0.23062096536159515,
|
| 91 |
+
"learning_rate": 0.0003,
|
| 92 |
+
"loss": 0.4708,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.8666666666666667,
|
| 97 |
+
"grad_norm": 0.3150513470172882,
|
| 98 |
+
"learning_rate": 0.0003,
|
| 99 |
+
"loss": 0.6063,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.9333333333333333,
|
| 104 |
+
"grad_norm": 0.15745581686496735,
|
| 105 |
+
"learning_rate": 0.0003,
|
| 106 |
+
"loss": 0.4576,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.0,
|
| 111 |
+
"grad_norm": 0.20806829631328583,
|
| 112 |
+
"learning_rate": 0.0003,
|
| 113 |
+
"loss": 0.6251,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 1.0,
|
| 118 |
+
"step": 30,
|
| 119 |
+
"total_flos": 1809773389414400.0,
|
| 120 |
+
"train_loss": 0.6882031063238779,
|
| 121 |
+
"train_runtime": 53.2716,
|
| 122 |
+
"train_samples_per_second": 2.253,
|
| 123 |
+
"train_steps_per_second": 0.563
|
| 124 |
+
}
|
| 125 |
+
],
|
| 126 |
+
"logging_steps": 2,
|
| 127 |
+
"max_steps": 30,
|
| 128 |
+
"num_input_tokens_seen": 0,
|
| 129 |
+
"num_train_epochs": 1,
|
| 130 |
+
"save_steps": 500,
|
| 131 |
+
"stateful_callbacks": {
|
| 132 |
+
"TrainerControl": {
|
| 133 |
+
"args": {
|
| 134 |
+
"should_epoch_stop": false,
|
| 135 |
+
"should_evaluate": false,
|
| 136 |
+
"should_log": false,
|
| 137 |
+
"should_save": false,
|
| 138 |
+
"should_training_stop": false
|
| 139 |
+
},
|
| 140 |
+
"attributes": {}
|
| 141 |
+
}
|
| 142 |
+
},
|
| 143 |
+
"total_flos": 1809773389414400.0,
|
| 144 |
+
"train_batch_size": 1,
|
| 145 |
+
"trial_name": null,
|
| 146 |
+
"trial_params": null
|
| 147 |
+
}
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round10.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ad083f2e642a767d5504eb2dd2e2c1ffeacb9765251550c355fcc0d1dc61199
|
| 3 |
+
size 84049390
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be02dbb2f9a3dc8edf5df8d3606692c50f06f2c89b23f8b6daf92f966649b22d
|
| 3 |
+
size 84049390
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round15.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f5ff7e1f593d4b3a29a210026ea5b2b0154e90f2a5832bbbb5067e40eda2bda8
|
| 3 |
+
size 84049390
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round17.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a0f9fe0829a1607a5e1f12053abf5062647afeb1fbccc13457f8f449c6dd3ca
|
| 3 |
+
size 84049390
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ffcbd8292ac0a7ac227c08a272e9b49049d3f1828668e5f5686a29eae4683bee
|
| 3 |
+
size 84048874
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8347c82a60d5d8cff43f998c24f471d16985a6bad1652d26e48cdcfa9ba022e2
|
| 3 |
+
size 84049390
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5efa904dfdf8aeafd4ae597f530c9125b89c869b8cb43f650a3651f701b43189
|
| 3 |
+
size 84048874
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_client_model_round7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bce00e87548e396505aee69f7ff404d99885432c726eff94ff85f2596d467302
|
| 3 |
+
size 84048874
|
client_states_NEW_fedavg_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir30_T0125_decay099/4_trainer_state.json
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 30,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.06666666666666667,
|
| 13 |
+
"grad_norm": 0.2867070138454437,
|
| 14 |
+
"learning_rate": 0.0003,
|
| 15 |
+
"loss": 0.5952,
|
| 16 |
+
"step": 2
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.13333333333333333,
|
| 20 |
+
"grad_norm": 0.33537670969963074,
|
| 21 |
+
"learning_rate": 0.0003,
|
| 22 |
+
"loss": 0.9564,
|
| 23 |
+
"step": 4
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.2,
|
| 27 |
+
"grad_norm": 0.3584403991699219,
|
| 28 |
+
"learning_rate": 0.0003,
|
| 29 |
+
"loss": 0.9349,
|
| 30 |
+
"step": 6
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.26666666666666666,
|
| 34 |
+
"grad_norm": 0.3439081907272339,
|
| 35 |
+
"learning_rate": 0.0003,
|
| 36 |
+
"loss": 0.8646,
|
| 37 |
+
"step": 8
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.3333333333333333,
|
| 41 |
+
"grad_norm": 0.44978123903274536,
|
| 42 |
+
"learning_rate": 0.0003,
|
| 43 |
+
"loss": 0.9359,
|
| 44 |
+
"step": 10
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.4,
|
| 48 |
+
"grad_norm": 0.3586316704750061,
|
| 49 |
+
"learning_rate": 0.0003,
|
| 50 |
+
"loss": 0.8132,
|
| 51 |
+
"step": 12
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.4666666666666667,
|
| 55 |
+
"grad_norm": 0.26364096999168396,
|
| 56 |
+
"learning_rate": 0.0003,
|
| 57 |
+
"loss": 0.5523,
|
| 58 |
+
"step": 14
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.5333333333333333,
|
| 62 |
+
"grad_norm": 0.31966859102249146,
|
| 63 |
+
"learning_rate": 0.0003,
|
| 64 |
+
"loss": 0.9114,
|
| 65 |
+
"step": 16
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.6,
|
| 69 |
+
"grad_norm": 0.35554757714271545,
|
| 70 |
+
"learning_rate": 0.0003,
|
| 71 |
+
"loss": 1.3788,
|
| 72 |
+
"step": 18
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.6666666666666666,
|
| 76 |
+
"grad_norm": 0.258931964635849,
|
| 77 |
+
"learning_rate": 0.0003,
|
| 78 |
+
"loss": 0.6495,
|
| 79 |
+
"step": 20
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.7333333333333333,
|
| 83 |
+
"grad_norm": 0.2897098660469055,
|
| 84 |
+
"learning_rate": 0.0003,
|
| 85 |
+
"loss": 1.0337,
|
| 86 |
+
"step": 22
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.8,
|
| 90 |
+
"grad_norm": 0.2801041007041931,
|
| 91 |
+
"learning_rate": 0.0003,
|
| 92 |
+
"loss": 0.8032,
|
| 93 |
+
"step": 24
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.8666666666666667,
|
| 97 |
+
"grad_norm": 0.3492525815963745,
|
| 98 |
+
"learning_rate": 0.0003,
|
| 99 |
+
"loss": 0.7369,
|
| 100 |
+
"step": 26
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.9333333333333333,
|
| 104 |
+
"grad_norm": 0.44734007120132446,
|
| 105 |
+
"learning_rate": 0.0003,
|
| 106 |
+
"loss": 0.8824,
|
| 107 |
+
"step": 28
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.0,
|
| 111 |
+
"grad_norm": 0.29203251004219055,
|
| 112 |
+
"learning_rate": 0.0003,
|
| 113 |
+
"loss": 0.7433,
|
| 114 |
+
"step": 30
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 1.0,
|
| 118 |
+
"step": 30,
|
| 119 |
+
"total_flos": 3616159123374080.0,
|
| 120 |
+
"train_loss": 0.8527638713518778,
|
| 121 |
+
"train_runtime": 68.1295,
|
| 122 |
+
"train_samples_per_second": 1.761,
|
| 123 |
+
"train_steps_per_second": 0.44
|
| 124 |
+
}
|
| 125 |
+
],
|
| 126 |
+
"logging_steps": 2,
|
| 127 |
+
"max_steps": 30,
|
| 128 |
+
"num_input_tokens_seen": 0,
|
| 129 |
+
"num_train_epochs": 1,
|
| 130 |
+
"save_steps": 500,
|
| 131 |
+
"stateful_callbacks": {
|
| 132 |
+
"TrainerControl": {
|
| 133 |
+
"args": {
|
| 134 |
+
"should_epoch_stop": false,
|
| 135 |
+
"should_evaluate": false,
|
| 136 |
+
"should_log": false,
|
| 137 |
+
"should_save": false,
|
| 138 |
+
"should_training_stop": false
|
| 139 |
+
},
|
| 140 |
+
"attributes": {}
|
| 141 |
+
}
|
| 142 |
+
},
|
| 143 |
+
"total_flos": 3616159123374080.0,
|
| 144 |
+
"train_batch_size": 1,
|
| 145 |
+
"trial_name": null,
|
| 146 |
+
"trial_params": null
|
| 147 |
+
}
|