{ "mode": "pretokenized", "sampled_sequences": 20000, "total_sequences": 291630, "unk_rate": 3.7e-05, "total_unk_tokens": 368, "seq_len": { "mean": 500.11885, "p50": 512.0, "p95": 512.0, "max": 512 }, "share_over_max_seq_len": 0.0, "starts_with_bos_rate": 1.0, "ends_with_eos_rate": 1.0 }