File size: 363 Bytes
9b2a433
 
5c25055
9b2a433
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
{
  "tokenizer_type": "BPE",
  "vocab_size": 388,
  "pattern": "'(?i:[sdmt]|ll|ve|re)|[^\\r\\n\\p{L}\\p{N}]?+\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]++[\\r\\n]*|\\s*[\\r\\n]|\\s+(?!\\S)|\\s+",
  "special_tokens": {},
  "training_config": {
    "vocab_size": 256,
    "dataset_fraction": "train[0:1000]",
    "moves_key": "moves_custom",
    "separator": " "
  }
}