File size: 481 Bytes
c5f9e16 | 1 2 3 4 5 6 7 8 9 10 11 12 13 | {
"source_export_root": "/root/exports/fineweb_50Bsub100B_50keval_v0",
"snapshot_kind": "partial_docs_cache_from_50B_export",
"note": "not canonical 10B shard selection; train split is a paused snapshot of the 50B shuffled train stream",
"selection_seed": 1337,
"num_val_docs": 50000,
"num_docs": 15368808,
"docs_val": 50000,
"docs_train": 15318808,
"docs_bytes": 48166275520,
"docs_sha256": "84386dfa7b339a5d4831d5273c4a2028b78b60670d3a235633a8520545d19bc7"
}
|