File size: 324 Bytes
6b4288c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
{
  "mode": "pretokenized",
  "sampled_sequences": 20000,
  "total_sequences": 291630,
  "unk_rate": 3.7e-05,
  "total_unk_tokens": 368,
  "seq_len": {
    "mean": 500.11885,
    "p50": 512.0,
    "p95": 512.0,
    "max": 512
  },
  "share_over_max_seq_len": 0.0,
  "starts_with_bos_rate": 1.0,
  "ends_with_eos_rate": 1.0
}