Training in progress, step 34284, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 18899856
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f08bbff40a76729e8bb60d3a42b823dec0374093316e9d66e864d39e6e98686
|
| 3 |
size 18899856
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 37911546
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f88b3d3aafcfb52683a2c942900f2cf4331a167f2e907d54ec18889f78810ed1
|
| 3 |
size 37911546
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b182ddba2191fc0b5d8696aacd13a263502041db09ff96fe195f390f57c46669
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c5f55b79d4f37b170efbcd11ec8e37cc2c97e892e710c70c5c8dbc048d1902a
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 3806,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -239965,6 +239965,216 @@
|
|
| 239965 |
"eval_test_samples_per_second": 13.909,
|
| 239966 |
"eval_test_steps_per_second": 0.869,
|
| 239967 |
"step": 34254
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239968 |
}
|
| 239969 |
],
|
| 239970 |
"logging_steps": 1,
|
|
@@ -239979,12 +240189,12 @@
|
|
| 239979 |
"should_evaluate": false,
|
| 239980 |
"should_log": false,
|
| 239981 |
"should_save": true,
|
| 239982 |
-
"should_training_stop":
|
| 239983 |
},
|
| 239984 |
"attributes": {}
|
| 239985 |
}
|
| 239986 |
},
|
| 239987 |
-
"total_flos": 8.
|
| 239988 |
"train_batch_size": 16,
|
| 239989 |
"trial_name": null,
|
| 239990 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 4.0,
|
| 5 |
"eval_steps": 3806,
|
| 6 |
+
"global_step": 34284,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 239965 |
"eval_test_samples_per_second": 13.909,
|
| 239966 |
"eval_test_steps_per_second": 0.869,
|
| 239967 |
"step": 34254
|
| 239968 |
+
},
|
| 239969 |
+
{
|
| 239970 |
+
"epoch": 3.9966164974915412,
|
| 239971 |
+
"grad_norm": 0.7302963733673096,
|
| 239972 |
+
"learning_rate": 3.000647352429344e-06,
|
| 239973 |
+
"loss": 2.0368,
|
| 239974 |
+
"step": 34255
|
| 239975 |
+
},
|
| 239976 |
+
{
|
| 239977 |
+
"epoch": 3.996733169991833,
|
| 239978 |
+
"grad_norm": 0.7436806559562683,
|
| 239979 |
+
"learning_rate": 3.000603477205233e-06,
|
| 239980 |
+
"loss": 2.1534,
|
| 239981 |
+
"step": 34256
|
| 239982 |
+
},
|
| 239983 |
+
{
|
| 239984 |
+
"epoch": 3.9968498424921246,
|
| 239985 |
+
"grad_norm": 0.6236298084259033,
|
| 239986 |
+
"learning_rate": 3.000561141458568e-06,
|
| 239987 |
+
"loss": 1.9726,
|
| 239988 |
+
"step": 34257
|
| 239989 |
+
},
|
| 239990 |
+
{
|
| 239991 |
+
"epoch": 3.9969665149924163,
|
| 239992 |
+
"grad_norm": 0.6811991930007935,
|
| 239993 |
+
"learning_rate": 3.0005203451897933e-06,
|
| 239994 |
+
"loss": 2.0822,
|
| 239995 |
+
"step": 34258
|
| 239996 |
+
},
|
| 239997 |
+
{
|
| 239998 |
+
"epoch": 3.997083187492708,
|
| 239999 |
+
"grad_norm": 0.7705870866775513,
|
| 240000 |
+
"learning_rate": 3.000481088399339e-06,
|
| 240001 |
+
"loss": 2.1438,
|
| 240002 |
+
"step": 34259
|
| 240003 |
+
},
|
| 240004 |
+
{
|
| 240005 |
+
"epoch": 3.9971998599929996,
|
| 240006 |
+
"grad_norm": 0.6603965759277344,
|
| 240007 |
+
"learning_rate": 3.0004433710876e-06,
|
| 240008 |
+
"loss": 1.9082,
|
| 240009 |
+
"step": 34260
|
| 240010 |
+
},
|
| 240011 |
+
{
|
| 240012 |
+
"epoch": 3.9973165324932913,
|
| 240013 |
+
"grad_norm": 0.6230751872062683,
|
| 240014 |
+
"learning_rate": 3.0004071932549724e-06,
|
| 240015 |
+
"loss": 2.0426,
|
| 240016 |
+
"step": 34261
|
| 240017 |
+
},
|
| 240018 |
+
{
|
| 240019 |
+
"epoch": 3.997433204993583,
|
| 240020 |
+
"grad_norm": 0.7174234390258789,
|
| 240021 |
+
"learning_rate": 3.000372554901835e-06,
|
| 240022 |
+
"loss": 1.992,
|
| 240023 |
+
"step": 34262
|
| 240024 |
+
},
|
| 240025 |
+
{
|
| 240026 |
+
"epoch": 3.9975498774938747,
|
| 240027 |
+
"grad_norm": 0.6722437739372253,
|
| 240028 |
+
"learning_rate": 3.0003394560285347e-06,
|
| 240029 |
+
"loss": 2.0709,
|
| 240030 |
+
"step": 34263
|
| 240031 |
+
},
|
| 240032 |
+
{
|
| 240033 |
+
"epoch": 3.9976665499941664,
|
| 240034 |
+
"grad_norm": 0.641873300075531,
|
| 240035 |
+
"learning_rate": 3.0003078966354333e-06,
|
| 240036 |
+
"loss": 1.9371,
|
| 240037 |
+
"step": 34264
|
| 240038 |
+
},
|
| 240039 |
+
{
|
| 240040 |
+
"epoch": 3.997783222494458,
|
| 240041 |
+
"grad_norm": 0.6292130351066589,
|
| 240042 |
+
"learning_rate": 3.000277876722828e-06,
|
| 240043 |
+
"loss": 1.9375,
|
| 240044 |
+
"step": 34265
|
| 240045 |
+
},
|
| 240046 |
+
{
|
| 240047 |
+
"epoch": 3.9978998949947497,
|
| 240048 |
+
"grad_norm": 0.6383855938911438,
|
| 240049 |
+
"learning_rate": 3.000249396291065e-06,
|
| 240050 |
+
"loss": 2.1935,
|
| 240051 |
+
"step": 34266
|
| 240052 |
+
},
|
| 240053 |
+
{
|
| 240054 |
+
"epoch": 3.9980165674950414,
|
| 240055 |
+
"grad_norm": 0.7069698572158813,
|
| 240056 |
+
"learning_rate": 3.0002224553404246e-06,
|
| 240057 |
+
"loss": 2.0077,
|
| 240058 |
+
"step": 34267
|
| 240059 |
+
},
|
| 240060 |
+
{
|
| 240061 |
+
"epoch": 3.998133239995333,
|
| 240062 |
+
"grad_norm": 0.6327721476554871,
|
| 240063 |
+
"learning_rate": 3.0001970538711872e-06,
|
| 240064 |
+
"loss": 2.0699,
|
| 240065 |
+
"step": 34268
|
| 240066 |
+
},
|
| 240067 |
+
{
|
| 240068 |
+
"epoch": 3.998249912495625,
|
| 240069 |
+
"grad_norm": 0.6597331166267395,
|
| 240070 |
+
"learning_rate": 3.0001731918836162e-06,
|
| 240071 |
+
"loss": 2.0667,
|
| 240072 |
+
"step": 34269
|
| 240073 |
+
},
|
| 240074 |
+
{
|
| 240075 |
+
"epoch": 3.9983665849959165,
|
| 240076 |
+
"grad_norm": 0.661301851272583,
|
| 240077 |
+
"learning_rate": 3.000150869377943e-06,
|
| 240078 |
+
"loss": 1.9734,
|
| 240079 |
+
"step": 34270
|
| 240080 |
+
},
|
| 240081 |
+
{
|
| 240082 |
+
"epoch": 3.998483257496208,
|
| 240083 |
+
"grad_norm": 0.6402481198310852,
|
| 240084 |
+
"learning_rate": 3.000130086354431e-06,
|
| 240085 |
+
"loss": 1.8784,
|
| 240086 |
+
"step": 34271
|
| 240087 |
+
},
|
| 240088 |
+
{
|
| 240089 |
+
"epoch": 3.9985999299965,
|
| 240090 |
+
"grad_norm": 0.7119265198707581,
|
| 240091 |
+
"learning_rate": 3.000110842813261e-06,
|
| 240092 |
+
"loss": 2.1001,
|
| 240093 |
+
"step": 34272
|
| 240094 |
+
},
|
| 240095 |
+
{
|
| 240096 |
+
"epoch": 3.9987166024967915,
|
| 240097 |
+
"grad_norm": 0.636619508266449,
|
| 240098 |
+
"learning_rate": 3.0000931387546646e-06,
|
| 240099 |
+
"loss": 2.2329,
|
| 240100 |
+
"step": 34273
|
| 240101 |
+
},
|
| 240102 |
+
{
|
| 240103 |
+
"epoch": 3.998833274997083,
|
| 240104 |
+
"grad_norm": 0.6781154274940491,
|
| 240105 |
+
"learning_rate": 3.0000769741788074e-06,
|
| 240106 |
+
"loss": 2.0247,
|
| 240107 |
+
"step": 34274
|
| 240108 |
+
},
|
| 240109 |
+
{
|
| 240110 |
+
"epoch": 3.998949947497375,
|
| 240111 |
+
"grad_norm": 0.7756646871566772,
|
| 240112 |
+
"learning_rate": 3.0000623490858523e-06,
|
| 240113 |
+
"loss": 2.0191,
|
| 240114 |
+
"step": 34275
|
| 240115 |
+
},
|
| 240116 |
+
{
|
| 240117 |
+
"epoch": 3.9990666199976665,
|
| 240118 |
+
"grad_norm": 0.6341996192932129,
|
| 240119 |
+
"learning_rate": 3.000049263475966e-06,
|
| 240120 |
+
"loss": 1.9516,
|
| 240121 |
+
"step": 34276
|
| 240122 |
+
},
|
| 240123 |
+
{
|
| 240124 |
+
"epoch": 3.9991832924979582,
|
| 240125 |
+
"grad_norm": 0.6424492001533508,
|
| 240126 |
+
"learning_rate": 3.000037717349279e-06,
|
| 240127 |
+
"loss": 1.9659,
|
| 240128 |
+
"step": 34277
|
| 240129 |
+
},
|
| 240130 |
+
{
|
| 240131 |
+
"epoch": 3.99929996499825,
|
| 240132 |
+
"grad_norm": 0.5968495607376099,
|
| 240133 |
+
"learning_rate": 3.000027710705908e-06,
|
| 240134 |
+
"loss": 1.985,
|
| 240135 |
+
"step": 34278
|
| 240136 |
+
},
|
| 240137 |
+
{
|
| 240138 |
+
"epoch": 3.9994166374985416,
|
| 240139 |
+
"grad_norm": 0.6369019150733948,
|
| 240140 |
+
"learning_rate": 3.00001924354595e-06,
|
| 240141 |
+
"loss": 1.9489,
|
| 240142 |
+
"step": 34279
|
| 240143 |
+
},
|
| 240144 |
+
{
|
| 240145 |
+
"epoch": 3.9995333099988333,
|
| 240146 |
+
"grad_norm": 0.6295581459999084,
|
| 240147 |
+
"learning_rate": 3.0000123158695064e-06,
|
| 240148 |
+
"loss": 1.9638,
|
| 240149 |
+
"step": 34280
|
| 240150 |
+
},
|
| 240151 |
+
{
|
| 240152 |
+
"epoch": 3.999649982499125,
|
| 240153 |
+
"grad_norm": 0.5876207947731018,
|
| 240154 |
+
"learning_rate": 3.0000069276766416e-06,
|
| 240155 |
+
"loss": 1.9769,
|
| 240156 |
+
"step": 34281
|
| 240157 |
+
},
|
| 240158 |
+
{
|
| 240159 |
+
"epoch": 3.9997666549994166,
|
| 240160 |
+
"grad_norm": 0.6616494059562683,
|
| 240161 |
+
"learning_rate": 3.0000030789674052e-06,
|
| 240162 |
+
"loss": 2.0094,
|
| 240163 |
+
"step": 34282
|
| 240164 |
+
},
|
| 240165 |
+
{
|
| 240166 |
+
"epoch": 3.9998833274997083,
|
| 240167 |
+
"grad_norm": 0.6334449052810669,
|
| 240168 |
+
"learning_rate": 3.0000007697418473e-06,
|
| 240169 |
+
"loss": 1.9549,
|
| 240170 |
+
"step": 34283
|
| 240171 |
+
},
|
| 240172 |
+
{
|
| 240173 |
+
"epoch": 4.0,
|
| 240174 |
+
"grad_norm": 0.618880569934845,
|
| 240175 |
+
"learning_rate": 2.9999999999999997e-06,
|
| 240176 |
+
"loss": 1.8764,
|
| 240177 |
+
"step": 34284
|
| 240178 |
}
|
| 240179 |
],
|
| 240180 |
"logging_steps": 1,
|
|
|
|
| 240189 |
"should_evaluate": false,
|
| 240190 |
"should_log": false,
|
| 240191 |
"should_save": true,
|
| 240192 |
+
"should_training_stop": true
|
| 240193 |
},
|
| 240194 |
"attributes": {}
|
| 240195 |
}
|
| 240196 |
},
|
| 240197 |
+
"total_flos": 8.943124342074507e+18,
|
| 240198 |
"train_batch_size": 16,
|
| 240199 |
"trial_name": null,
|
| 240200 |
"trial_params": null
|