Add checkpoint-1140
Browse files- optimizer.pt +3 -0
- rng_state.pth +3 -0
- scheduler.pt +3 -0
- special_tokens_map.json +1 -2
- tokenizer_config.json +0 -1
- trainer_state.json +0 -18
optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:60a3ac6cfb77b6885d1ce0460557befb05818b85183d5005c90ee8407210be90
|
| 3 |
+
size 4943385103
|
rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:098b29492211804ab324a36f37466821d948280bb74fce4ba895c03f13ecd878
|
| 3 |
+
size 14645
|
scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92918813fc54ac10b16289f9b1c0dbe940f02f764efd9ed6ece8456e5d30eb18
|
| 3 |
+
size 1465
|
special_tokens_map.json
CHANGED
|
@@ -12,6 +12,5 @@
|
|
| 12 |
"normalized": false,
|
| 13 |
"rstrip": false,
|
| 14 |
"single_word": false
|
| 15 |
-
}
|
| 16 |
-
"pad_token": "<|end_of_text|>"
|
| 17 |
}
|
|
|
|
| 12 |
"normalized": false,
|
| 13 |
"rstrip": false,
|
| 14 |
"single_word": false
|
| 15 |
+
}
|
|
|
|
| 16 |
}
|
tokenizer_config.json
CHANGED
|
@@ -2058,6 +2058,5 @@
|
|
| 2058 |
"attention_mask"
|
| 2059 |
],
|
| 2060 |
"model_max_length": 131072,
|
| 2061 |
-
"pad_token": "<|end_of_text|>",
|
| 2062 |
"tokenizer_class": "PreTrainedTokenizerFast"
|
| 2063 |
}
|
|
|
|
| 2058 |
"attention_mask"
|
| 2059 |
],
|
| 2060 |
"model_max_length": 131072,
|
|
|
|
| 2061 |
"tokenizer_class": "PreTrainedTokenizerFast"
|
| 2062 |
}
|
trainer_state.json
CHANGED
|
@@ -31,24 +31,6 @@
|
|
| 31 |
"eval_samples_per_second": 86.231,
|
| 32 |
"eval_steps_per_second": 10.836,
|
| 33 |
"step": 1140
|
| 34 |
-
},
|
| 35 |
-
{
|
| 36 |
-
"epoch": 1.0,
|
| 37 |
-
"step": 1140,
|
| 38 |
-
"total_flos": 1.3626163133939712e+16,
|
| 39 |
-
"train_loss": 2.4788927044784814,
|
| 40 |
-
"train_runtime": 196.8793,
|
| 41 |
-
"train_samples_per_second": 23.151,
|
| 42 |
-
"train_steps_per_second": 5.79
|
| 43 |
-
},
|
| 44 |
-
{
|
| 45 |
-
"epoch": 1.0,
|
| 46 |
-
"eval_accuracy": 0.5619722863264766,
|
| 47 |
-
"eval_loss": 2.4340596199035645,
|
| 48 |
-
"eval_runtime": 13.1565,
|
| 49 |
-
"eval_samples_per_second": 86.497,
|
| 50 |
-
"eval_steps_per_second": 10.869,
|
| 51 |
-
"step": 1140
|
| 52 |
}
|
| 53 |
],
|
| 54 |
"logging_steps": 500,
|
|
|
|
| 31 |
"eval_samples_per_second": 86.231,
|
| 32 |
"eval_steps_per_second": 10.836,
|
| 33 |
"step": 1140
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
}
|
| 35 |
],
|
| 36 |
"logging_steps": 500,
|