| { | |
| "source_export_root": "/root/exports/fineweb_50Bsub100B_50keval_v0", | |
| "snapshot_kind": "partial_docs_cache_from_50B_export", | |
| "note": "not canonical 10B shard selection; train split is a paused snapshot of the 50B shuffled train stream", | |
| "selection_seed": 1337, | |
| "num_val_docs": 50000, | |
| "num_docs": 15368808, | |
| "docs_val": 50000, | |
| "docs_train": 15318808, | |
| "docs_bytes": 48166275520, | |
| "docs_sha256": "84386dfa7b339a5d4831d5273c4a2028b78b60670d3a235633a8520545d19bc7" | |
| } | |