{ "vocab_size": 32000, "tokenizer_type": "bpe", "train_tokens": 2133027270, "val_tokens": 43810053, "total_tokens": 2176837323, "target_gb": 10.0, "actual_gb": 4.353674646, "dataset": "HuggingFaceFW/fineweb-edu:sample-10BT" }