| { | |
| "mode": "pretokenized", | |
| "sampled_sequences": 20000, | |
| "total_sequences": 291630, | |
| "unk_rate": 3.7e-05, | |
| "total_unk_tokens": 368, | |
| "seq_len": { | |
| "mean": 500.11885, | |
| "p50": 512.0, | |
| "p95": 512.0, | |
| "max": 512 | |
| }, | |
| "share_over_max_seq_len": 0.0, | |
| "starts_with_bos_rate": 1.0, | |
| "ends_with_eos_rate": 1.0 | |
| } |