newline special token 처리해서 다시 학습
Browse files- pytorch_model.bin +1 -1
- special_tokens_map.json +1 -0
- tokenizer.json +9 -0
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 513290985
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a0b6970188d6812e550335c01eddeb3188c35ff2f34e1ff51f42a73dc4c5f864
|
| 3 |
size 513290985
|
special_tokens_map.json
CHANGED
|
@@ -2,5 +2,6 @@
|
|
| 2 |
"bos_token": "<s>",
|
| 3 |
"eos_token": "</s>",
|
| 4 |
"pad_token": "<pad>",
|
|
|
|
| 5 |
"unk_token": "<|endoftext|>"
|
| 6 |
}
|
|
|
|
| 2 |
"bos_token": "<s>",
|
| 3 |
"eos_token": "</s>",
|
| 4 |
"pad_token": "<pad>",
|
| 5 |
+
"sep_token": "\n",
|
| 6 |
"unk_token": "<|endoftext|>"
|
| 7 |
}
|
tokenizer.json
CHANGED
|
@@ -3378,6 +3378,15 @@
|
|
| 3378 |
"normalized": false,
|
| 3379 |
"special": true
|
| 3380 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3381 |
{
|
| 3382 |
"id": 51200,
|
| 3383 |
"content": "<|endoftext|>",
|
|
|
|
| 3378 |
"normalized": false,
|
| 3379 |
"special": true
|
| 3380 |
},
|
| 3381 |
+
{
|
| 3382 |
+
"id": 375,
|
| 3383 |
+
"content": "\n",
|
| 3384 |
+
"single_word": false,
|
| 3385 |
+
"lstrip": false,
|
| 3386 |
+
"rstrip": false,
|
| 3387 |
+
"normalized": false,
|
| 3388 |
+
"special": true
|
| 3389 |
+
},
|
| 3390 |
{
|
| 3391 |
"id": 51200,
|
| 3392 |
"content": "<|endoftext|>",
|