File size: 241 Bytes
148b631
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
{
  "vocab_size": 32000,
  "tokenizer_type": "bpe",
  "train_tokens": 2133027270,
  "val_tokens": 43810053,
  "total_tokens": 2176837323,
  "target_gb": 10.0,
  "actual_gb": 4.353674646,
  "dataset": "HuggingFaceFW/fineweb-edu:sample-10BT"
}