train: openwebtext-train valid: openwebtext-valid tokenizer_name_or_path: gpt2 cache_dir: /share/kuleshov/ssahoo/textdiffusion/data wrap: True streaming: False