tokenized_ruwiki / dataset_info.json
DaniilOr's picture
Initial upload of multiple checkpoints
2cdf1f1 verified
{
"builder_name": "text",
"citation": "",
"config_name": "default",
"dataset_name": "r_uwiki",
"dataset_size": 26744465,
"description": "",
"download_checksums": {
"hf://datasets/DataSynGen/RUwiki@b4228a7494a50cd3c13b04fce98e64640877cf44/dataset.txt": {
"num_bytes": 26647425,
"checksum": null
}
},
"download_size": 26647425,
"features": {
"input_ids": {
"feature": {
"dtype": "int32",
"_type": "Value"
},
"_type": "Sequence"
},
"attention_mask": {
"feature": {
"dtype": "int8",
"_type": "Value"
},
"_type": "Sequence"
}
},
"homepage": "",
"license": "",
"size_in_bytes": 53391890,
"splits": {
"train": {
"name": "train",
"num_bytes": 26744465,
"num_examples": 48520,
"dataset_name": "r_uwiki"
}
},
"version": {
"version_str": "0.0.0",
"major": 0,
"minor": 0,
"patch": 0
}
}