Upload folder using huggingface_hub
Browse files
checkpoint-1400/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 402763
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7471e7503097821eb7cedd03029765d8c6d340f64a530fcacf1b70d8280cc43e
|
| 3 |
size 402763
|
checkpoint-1400/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 62314258
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e7f4a4bed170f65dac3b0aeae3d2a5f812fa425e4f7fc45556f7fdc55987af2d
|
| 3 |
size 62314258
|
checkpoint-1400/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:543a6952dfe815e824a9d4b10abd2434f4f2a650c28d1aabc5602d81b60d9a50
|
| 3 |
size 14645
|
checkpoint-1400/tokenizer_config.json
CHANGED
|
@@ -935,6 +935,6 @@
|
|
| 935 |
"model_max_length": 1000000000000000019884624838656,
|
| 936 |
"pad_token": "<pad>",
|
| 937 |
"sp_model_kwargs": {},
|
| 938 |
-
"tokenizer_class": "
|
| 939 |
"unk_token": "<unk>"
|
| 940 |
}
|
|
|
|
| 935 |
"model_max_length": 1000000000000000019884624838656,
|
| 936 |
"pad_token": "<pad>",
|
| 937 |
"sp_model_kwargs": {},
|
| 938 |
+
"tokenizer_class": "T5TokenizerFast",
|
| 939 |
"unk_token": "<unk>"
|
| 940 |
}
|
checkpoint-1400/trainer_state.json
CHANGED
|
@@ -11,53 +11,53 @@
|
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
"epoch": 0.05752416014726185,
|
| 14 |
-
"grad_norm": 0.
|
| 15 |
"learning_rate": 0.00016513000460193283,
|
| 16 |
-
"loss":
|
| 17 |
"step": 250
|
| 18 |
},
|
| 19 |
{
|
| 20 |
"epoch": 0.1150483202945237,
|
| 21 |
-
"grad_norm": 0.
|
| 22 |
"learning_rate": 0.00016024045098941557,
|
| 23 |
-
"loss":
|
| 24 |
"step": 500
|
| 25 |
},
|
| 26 |
{
|
| 27 |
"epoch": 0.1150483202945237,
|
| 28 |
-
"eval_loss":
|
| 29 |
-
"eval_runtime":
|
| 30 |
-
"eval_samples_per_second":
|
| 31 |
-
"eval_steps_per_second": 3.
|
| 32 |
"step": 500
|
| 33 |
},
|
| 34 |
{
|
| 35 |
"epoch": 0.17257248044178555,
|
| 36 |
-
"grad_norm": 0.
|
| 37 |
"learning_rate": 0.0001553508973768983,
|
| 38 |
-
"loss":
|
| 39 |
"step": 750
|
| 40 |
},
|
| 41 |
{
|
| 42 |
"epoch": 0.2300966405890474,
|
| 43 |
-
"grad_norm": 0.
|
| 44 |
"learning_rate": 0.00015046134376438104,
|
| 45 |
-
"loss": 3.
|
| 46 |
"step": 1000
|
| 47 |
},
|
| 48 |
{
|
| 49 |
"epoch": 0.2300966405890474,
|
| 50 |
-
"eval_loss":
|
| 51 |
-
"eval_runtime":
|
| 52 |
-
"eval_samples_per_second":
|
| 53 |
-
"eval_steps_per_second": 3.
|
| 54 |
"step": 1000
|
| 55 |
},
|
| 56 |
{
|
| 57 |
"epoch": 0.28762080073630925,
|
| 58 |
-
"grad_norm": 0.
|
| 59 |
"learning_rate": 0.0001455717901518638,
|
| 60 |
-
"loss": 3.
|
| 61 |
"step": 1250
|
| 62 |
}
|
| 63 |
],
|
|
|
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
"epoch": 0.05752416014726185,
|
| 14 |
+
"grad_norm": 0.6941567659378052,
|
| 15 |
"learning_rate": 0.00016513000460193283,
|
| 16 |
+
"loss": 3.417,
|
| 17 |
"step": 250
|
| 18 |
},
|
| 19 |
{
|
| 20 |
"epoch": 0.1150483202945237,
|
| 21 |
+
"grad_norm": 0.739392101764679,
|
| 22 |
"learning_rate": 0.00016024045098941557,
|
| 23 |
+
"loss": 3.3994,
|
| 24 |
"step": 500
|
| 25 |
},
|
| 26 |
{
|
| 27 |
"epoch": 0.1150483202945237,
|
| 28 |
+
"eval_loss": 2.832693576812744,
|
| 29 |
+
"eval_runtime": 34.8026,
|
| 30 |
+
"eval_samples_per_second": 113.21,
|
| 31 |
+
"eval_steps_per_second": 3.333,
|
| 32 |
"step": 500
|
| 33 |
},
|
| 34 |
{
|
| 35 |
"epoch": 0.17257248044178555,
|
| 36 |
+
"grad_norm": 0.7630258202552795,
|
| 37 |
"learning_rate": 0.0001553508973768983,
|
| 38 |
+
"loss": 3.3881,
|
| 39 |
"step": 750
|
| 40 |
},
|
| 41 |
{
|
| 42 |
"epoch": 0.2300966405890474,
|
| 43 |
+
"grad_norm": 0.745369553565979,
|
| 44 |
"learning_rate": 0.00015046134376438104,
|
| 45 |
+
"loss": 3.3655,
|
| 46 |
"step": 1000
|
| 47 |
},
|
| 48 |
{
|
| 49 |
"epoch": 0.2300966405890474,
|
| 50 |
+
"eval_loss": 2.821577548980713,
|
| 51 |
+
"eval_runtime": 34.6743,
|
| 52 |
+
"eval_samples_per_second": 113.629,
|
| 53 |
+
"eval_steps_per_second": 3.345,
|
| 54 |
"step": 1000
|
| 55 |
},
|
| 56 |
{
|
| 57 |
"epoch": 0.28762080073630925,
|
| 58 |
+
"grad_norm": 0.7992149591445923,
|
| 59 |
"learning_rate": 0.0001455717901518638,
|
| 60 |
+
"loss": 3.3575,
|
| 61 |
"step": 1250
|
| 62 |
}
|
| 63 |
],
|