File size: 1,627 Bytes
780d17a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 | {
"raw_selected_examples": 25000,
"kept_examples": 25000,
"filtered_examples_total": 0,
"filtered_examples_empty_problem_or_response": 0,
"filtered_examples_missing_stage3_tokens": 0,
"examples_with_non_empty_stage2": 25000,
"examples_with_non_empty_stage3": 25000,
"samples_with_valid_answer_spans": 25000,
"samples_with_numeric_answers": 24867,
"samples_excluded_or_degraded": 0,
"samples_truncated_to_max_seq_length": 0,
"samples_with_answer_span_truncated": 0,
"train_eval_partition_strategy": "single_split_shuffle_then_dedup_by_problem_answer_signature",
"train_eval_split_source": "train",
"train_examples_written": 22500,
"eval_examples_written": 1760,
"eval_candidates_dropped_signature_overlap": 740,
"dataset_name": "metamath_qa",
"upstream_split": "train",
"dataset_seed": 11,
"preprocessing_settings": {
"cache_dir": "./.cache/hf_datasets",
"eval_fraction": 0.1,
"max_seq_length": 4096,
"seed": 11,
"split": "train",
"subset_size": 25000
},
"selected_sample_ids_hash": "70022c80c635901076a799be6fe83ba924bad908c0b242c2a746a23ceb3e86e3",
"selected_sample_count": 25000,
"train_sample_ids_hash": "dd0fd108e819feee53926d4a57f1857e1aa5515d1595fbb3a9ac2be292a8f746",
"eval_sample_ids_hash": "ce705200861db74727559b9a50b132eddf4d365c4375a076eaa1c89c5d2c7ee8",
"train_sample_count": 22500,
"eval_sample_count": 1760,
"dataset_fingerprint": "3e24605ae1a046959bfb217607e9dfb2f6cfc41609949391757d19a77050cdb6",
"dataset_split": "train",
"dataset_subset_size": 25000,
"dataset_eval_fraction": 0.1
} |