Update README.md
Browse files
README.md
CHANGED
|
@@ -17,32 +17,36 @@ It also replaces the 354 token `\u0000` with an emoji so that it can be converte
|
|
| 17 |
## How to use
|
| 18 |
|
| 19 |
- Load
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
|
| 26 |
- Apply chatml template
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
```
|
|
|
|
| 17 |
## How to use
|
| 18 |
|
| 19 |
- Load
|
| 20 |
+
```python
|
| 21 |
+
from transformers import AutoTokenizer
|
| 22 |
+
|
| 23 |
+
tokenizer = AutoTokenizer.from_pretrained(RangiLyu/InternLM2-tokenizer-llama)
|
| 24 |
+
```
|
| 25 |
|
| 26 |
- Apply chatml template
|
| 27 |
+
```python
|
| 28 |
+
chat = [{"role": "user", "content": "Hello! What's your name?"},
|
| 29 |
+
{"role": "assistant", "content": "My name is InternLM2!"},
|
| 30 |
+
{"role": "user", "content": "Nice to meet you InternLM2!"},]
|
| 31 |
+
|
| 32 |
+
chat_ids = llama_tokenizer.apply_chat_template(chat)
|
| 33 |
+
print("ids: ", chat_ids)
|
| 34 |
+
print("tokens: ", llama_tokenizer.convert_ids_to_tokens(chat_ids))
|
| 35 |
+
|
| 36 |
+
# convert the chat history to a string for generation
|
| 37 |
+
chat_str = llama_tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
|
| 38 |
+
print("chat string: ", chat_str)
|
| 39 |
+
```
|
| 40 |
+
|
| 41 |
+
```
|
| 42 |
+
ids: [1, 92543, 1008, 364, 9843, 346, 3716, 725, 829, 963, 345, 92542, 364, 92543, 525, 11353, 364, 5211, 963, 505, 4576, 11146, 314, 346, 92542, 364, 92543, 1008, 364, 44501, 442, 3531, 629, 4576, 11146, 314, 346, 92542, 364]
|
| 43 |
+
tokens: ['<s>', '<|im_start|>', 'user', '\n', 'Hello', '!', '▁What', "'s", '▁your', '▁name', '?', '<|im_end|>', '\n', '<|im_start|>', 'ass', 'istant', '\n', 'My', '▁name', '▁is', '▁Intern', 'LM', '2', '!', '<|im_end|>', '\n', '<|im_start|>', 'user', '\n', 'Nice', '▁to', '▁meet', '▁you', '▁Intern', 'LM', '2', '!', '<|im_end|>', '\n']
|
| 44 |
+
chat string: <s><|im_start|>user
|
| 45 |
+
Hello! What's your name?<|im_end|>
|
| 46 |
+
<|im_start|>assistant
|
| 47 |
+
My name is InternLM2!<|im_end|>
|
| 48 |
+
<|im_start|>user
|
| 49 |
+
Nice to meet you InternLM2!<|im_end|>
|
| 50 |
+
<|im_start|>assistant
|
| 51 |
+
|
| 52 |
```
|