{ "source_export_root": "/root/exports/fineweb_50Bsub100B_50keval_v0", "snapshot_kind": "partial_docs_cache_from_50B_export", "note": "not canonical 10B shard selection; train split is a paused snapshot of the 50B shuffled train stream", "selection_seed": 1337, "num_val_docs": 50000, "num_docs": 15368808, "docs_val": 50000, "docs_train": 15318808, "docs_bytes": 48166275520, "docs_sha256": "84386dfa7b339a5d4831d5273c4a2028b78b60670d3a235633a8520545d19bc7" }