Upload folder using huggingface_hub
Browse files
checkpoint-1050/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 402763
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:98fd5044314627e437dc2b7cdda294e48e3c4963a768af47d181b6e425a41dcc
|
| 3 |
size 402763
|
checkpoint-1050/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 62314258
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd8a29d89acc6af99d387344d32386020a6920163d2db254b78b02b7a902ed79
|
| 3 |
size 62314258
|
checkpoint-1050/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:879d73f7b526f4773e67571faa23d0381f777afe5786d66898ad1914dcb91306
|
| 3 |
size 14645
|
checkpoint-1050/tokenizer_config.json
CHANGED
|
@@ -935,6 +935,6 @@
|
|
| 935 |
"model_max_length": 1000000000000000019884624838656,
|
| 936 |
"pad_token": "<pad>",
|
| 937 |
"sp_model_kwargs": {},
|
| 938 |
-
"tokenizer_class": "
|
| 939 |
"unk_token": "<unk>"
|
| 940 |
}
|
|
|
|
| 935 |
"model_max_length": 1000000000000000019884624838656,
|
| 936 |
"pad_token": "<pad>",
|
| 937 |
"sp_model_kwargs": {},
|
| 938 |
+
"tokenizer_class": "T5TokenizerFast",
|
| 939 |
"unk_token": "<unk>"
|
| 940 |
}
|
checkpoint-1050/trainer_state.json
CHANGED
|
@@ -11,46 +11,46 @@
|
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
"epoch": 0.05752416014726185,
|
| 14 |
-
"grad_norm": 0.
|
| 15 |
"learning_rate": 0.00016513000460193283,
|
| 16 |
-
"loss":
|
| 17 |
"step": 250
|
| 18 |
},
|
| 19 |
{
|
| 20 |
"epoch": 0.1150483202945237,
|
| 21 |
-
"grad_norm": 0.
|
| 22 |
"learning_rate": 0.00016024045098941557,
|
| 23 |
-
"loss":
|
| 24 |
"step": 500
|
| 25 |
},
|
| 26 |
{
|
| 27 |
"epoch": 0.1150483202945237,
|
| 28 |
-
"eval_loss":
|
| 29 |
-
"eval_runtime":
|
| 30 |
-
"eval_samples_per_second":
|
| 31 |
-
"eval_steps_per_second": 3.
|
| 32 |
"step": 500
|
| 33 |
},
|
| 34 |
{
|
| 35 |
"epoch": 0.17257248044178555,
|
| 36 |
-
"grad_norm": 0.
|
| 37 |
"learning_rate": 0.0001553508973768983,
|
| 38 |
-
"loss":
|
| 39 |
"step": 750
|
| 40 |
},
|
| 41 |
{
|
| 42 |
"epoch": 0.2300966405890474,
|
| 43 |
-
"grad_norm": 0.
|
| 44 |
"learning_rate": 0.00015046134376438104,
|
| 45 |
-
"loss": 3.
|
| 46 |
"step": 1000
|
| 47 |
},
|
| 48 |
{
|
| 49 |
"epoch": 0.2300966405890474,
|
| 50 |
-
"eval_loss":
|
| 51 |
-
"eval_runtime":
|
| 52 |
-
"eval_samples_per_second":
|
| 53 |
-
"eval_steps_per_second": 3.
|
| 54 |
"step": 1000
|
| 55 |
}
|
| 56 |
],
|
|
|
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
"epoch": 0.05752416014726185,
|
| 14 |
+
"grad_norm": 0.6941567659378052,
|
| 15 |
"learning_rate": 0.00016513000460193283,
|
| 16 |
+
"loss": 3.417,
|
| 17 |
"step": 250
|
| 18 |
},
|
| 19 |
{
|
| 20 |
"epoch": 0.1150483202945237,
|
| 21 |
+
"grad_norm": 0.739392101764679,
|
| 22 |
"learning_rate": 0.00016024045098941557,
|
| 23 |
+
"loss": 3.3994,
|
| 24 |
"step": 500
|
| 25 |
},
|
| 26 |
{
|
| 27 |
"epoch": 0.1150483202945237,
|
| 28 |
+
"eval_loss": 2.832693576812744,
|
| 29 |
+
"eval_runtime": 34.8026,
|
| 30 |
+
"eval_samples_per_second": 113.21,
|
| 31 |
+
"eval_steps_per_second": 3.333,
|
| 32 |
"step": 500
|
| 33 |
},
|
| 34 |
{
|
| 35 |
"epoch": 0.17257248044178555,
|
| 36 |
+
"grad_norm": 0.7630258202552795,
|
| 37 |
"learning_rate": 0.0001553508973768983,
|
| 38 |
+
"loss": 3.3881,
|
| 39 |
"step": 750
|
| 40 |
},
|
| 41 |
{
|
| 42 |
"epoch": 0.2300966405890474,
|
| 43 |
+
"grad_norm": 0.745369553565979,
|
| 44 |
"learning_rate": 0.00015046134376438104,
|
| 45 |
+
"loss": 3.3655,
|
| 46 |
"step": 1000
|
| 47 |
},
|
| 48 |
{
|
| 49 |
"epoch": 0.2300966405890474,
|
| 50 |
+
"eval_loss": 2.821577548980713,
|
| 51 |
+
"eval_runtime": 34.6743,
|
| 52 |
+
"eval_samples_per_second": 113.629,
|
| 53 |
+
"eval_steps_per_second": 3.345,
|
| 54 |
"step": 1000
|
| 55 |
}
|
| 56 |
],
|