File size: 732 Bytes
d0e66b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
{
  "tokenizer_class": "LlamaTokenizer",
  "model_max_length": 2048,
  "added_tokens": {
    "<unk>": 0,
    "<bos>": 1,
    "<eos>": 2,
    "<pad>": 3,
    "<system>": 7,
    "<user>": 8,
    "<assistant>": 9,
    "<think>": 10,
    "</think>": 11,
    "<tool_call>": 12,
    "<tool_result>": 13,
    "<eot>": 14,
    "<mask>": 4,
    "<sep>": 5,
    "<cls>": 6
  },
  "_arkadiko_note": "The trained model config (config.json) sets bos_token_id=0, eos_token_id=2, pad_token_id=1. The actual SPM model ships <unk>=0, <bos>=1, <eos>=2, <pad>=3. The runtime SHOULD use the tokenizer-derived IDs (this file's `added_tokens`) — config.json values are kept as-trained for reproducibility but are misaligned. See README for details."
}