File size: 481 Bytes
c5f9e16
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
{
  "source_export_root": "/root/exports/fineweb_50Bsub100B_50keval_v0",
  "snapshot_kind": "partial_docs_cache_from_50B_export",
  "note": "not canonical 10B shard selection; train split is a paused snapshot of the 50B shuffled train stream",
  "selection_seed": 1337,
  "num_val_docs": 50000,
  "num_docs": 15368808,
  "docs_val": 50000,
  "docs_train": 15318808,
  "docs_bytes": 48166275520,
  "docs_sha256": "84386dfa7b339a5d4831d5273c4a2028b78b60670d3a235633a8520545d19bc7"
}