| train: openwebtext-train | |
| valid: openwebtext-valid | |
| tokenizer_name_or_path: gpt2 | |
| cache_dir: /share/kuleshov/ssahoo/textdiffusion/data | |
| wrap: True | |
| streaming: False | |
| train: openwebtext-train | |
| valid: openwebtext-valid | |
| tokenizer_name_or_path: gpt2 | |
| cache_dir: /share/kuleshov/ssahoo/textdiffusion/data | |
| wrap: True | |
| streaming: False | |