File size: 363 Bytes
9b2a433 5c25055 9b2a433 |
1 2 3 4 5 6 7 8 9 10 11 12 |
{
"tokenizer_type": "BPE",
"vocab_size": 388,
"pattern": "'(?i:[sdmt]|ll|ve|re)|[^\\r\\n\\p{L}\\p{N}]?+\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]++[\\r\\n]*|\\s*[\\r\\n]|\\s+(?!\\S)|\\s+",
"special_tokens": {},
"training_config": {
"vocab_size": 256,
"dataset_fraction": "train[0:1000]",
"moves_key": "moves_custom",
"separator": " "
}
} |