Commit
·
9c882f8
1
Parent(s):
48c7d89
Update README.md
Browse files
README.md
CHANGED
|
@@ -3,13 +3,12 @@ from transformers import LlamaTokenizer
|
|
| 3 |
|
| 4 |
tokenizer = LlamaTokenizer.from_pretrained(
|
| 5 |
'ocisd4/openllama_tokenizer_ext_zh',
|
|
|
|
| 6 |
add_bos_token=True,
|
| 7 |
add_eos_token=False,
|
| 8 |
use_auth_token='True',
|
| 9 |
)
|
| 10 |
|
| 11 |
-
tokenizer.pad_token_id = tokenizer.vocab_size -1
|
| 12 |
-
|
| 13 |
print('vocab size:',tokenizer.vocab_size)
|
| 14 |
#vocab size: 52928
|
| 15 |
|
|
|
|
| 3 |
|
| 4 |
tokenizer = LlamaTokenizer.from_pretrained(
|
| 5 |
'ocisd4/openllama_tokenizer_ext_zh',
|
| 6 |
+
pad_token="<|pad|>",
|
| 7 |
add_bos_token=True,
|
| 8 |
add_eos_token=False,
|
| 9 |
use_auth_token='True',
|
| 10 |
)
|
| 11 |
|
|
|
|
|
|
|
| 12 |
print('vocab size:',tokenizer.vocab_size)
|
| 13 |
#vocab size: 52928
|
| 14 |
|