File size: 1,627 Bytes
780d17a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
{
  "raw_selected_examples": 25000,
  "kept_examples": 25000,
  "filtered_examples_total": 0,
  "filtered_examples_empty_problem_or_response": 0,
  "filtered_examples_missing_stage3_tokens": 0,
  "examples_with_non_empty_stage2": 25000,
  "examples_with_non_empty_stage3": 25000,
  "samples_with_valid_answer_spans": 25000,
  "samples_with_numeric_answers": 24867,
  "samples_excluded_or_degraded": 0,
  "samples_truncated_to_max_seq_length": 0,
  "samples_with_answer_span_truncated": 0,
  "train_eval_partition_strategy": "single_split_shuffle_then_dedup_by_problem_answer_signature",
  "train_eval_split_source": "train",
  "train_examples_written": 22500,
  "eval_examples_written": 1760,
  "eval_candidates_dropped_signature_overlap": 740,
  "dataset_name": "metamath_qa",
  "upstream_split": "train",
  "dataset_seed": 11,
  "preprocessing_settings": {
    "cache_dir": "./.cache/hf_datasets",
    "eval_fraction": 0.1,
    "max_seq_length": 4096,
    "seed": 11,
    "split": "train",
    "subset_size": 25000
  },
  "selected_sample_ids_hash": "70022c80c635901076a799be6fe83ba924bad908c0b242c2a746a23ceb3e86e3",
  "selected_sample_count": 25000,
  "train_sample_ids_hash": "dd0fd108e819feee53926d4a57f1857e1aa5515d1595fbb3a9ac2be292a8f746",
  "eval_sample_ids_hash": "ce705200861db74727559b9a50b132eddf4d365c4375a076eaa1c89c5d2c7ee8",
  "train_sample_count": 22500,
  "eval_sample_count": 1760,
  "dataset_fingerprint": "3e24605ae1a046959bfb217607e9dfb2f6cfc41609949391757d19a77050cdb6",
  "dataset_split": "train",
  "dataset_subset_size": 25000,
  "dataset_eval_fraction": 0.1
}