samleeasus commited on
Commit
b08f9b9
·
1 Parent(s): 26bb9ac

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +3 -2
README.md CHANGED
@@ -3,12 +3,13 @@ from transformers import LlamaTokenizer
3
 
4
  tokenizer = LlamaTokenizer.from_pretrained(
5
  'ocisd4/openllama_tokenizer_ext_zh',
6
- pad_token="<pad>",
7
  add_bos_token=True,
8
  add_eos_token=False,
9
  use_auth_token='True',
10
  )
11
 
 
 
12
  print('vocab size:',tokenizer.vocab_size)
13
  #vocab size: 52928
14
 
@@ -33,4 +34,4 @@ print(tokenizer.decode(tokenizer.encode(text)))
33
 
34
  ### updated
35
  #### 2023-06-02
36
- - add special tokens: <|output|>, <|input|>, <|sep|>, <|emb|>, <|rwd|>, <|ctx|>
 
3
 
4
  tokenizer = LlamaTokenizer.from_pretrained(
5
  'ocisd4/openllama_tokenizer_ext_zh',
 
6
  add_bos_token=True,
7
  add_eos_token=False,
8
  use_auth_token='True',
9
  )
10
 
11
+ tokenizer.pad_token_id = tokenizer.vocab_size -1
12
+
13
  print('vocab size:',tokenizer.vocab_size)
14
  #vocab size: 52928
15
 
 
34
 
35
  ### updated
36
  #### 2023-06-02
37
+ - add special tokens: <|pad|><|output|>, <|input|>, <|sep|>, <|emb|>, <|rwd|>, <|ctx|>