data: tokenizer: name: huggingface path: flexitok/bpe_script_SEAS_16000