data: tokenizer: name: huggingface path: flexitok/bpe_script_Slav_16000