amirali1985 commited on
Commit
1332f44
·
verified ·
1 Parent(s): 80379ed

Train math/qwen2.5-0.5b/math_1pct_seed4: nonleaked=1.000%

Browse files
.gitattributes CHANGED
@@ -39,3 +39,4 @@ math/qwen2.5-0.5b/math_1pct_seed2/tokenizer.json filter=lfs diff=lfs merge=lfs -
39
  math/qwen2.5-0.5b/math_1pct_seed3/tokenizer.json filter=lfs diff=lfs merge=lfs -text
40
  math/qwen2.5-0.5b/math_1pct_seed5/tokenizer.json filter=lfs diff=lfs merge=lfs -text
41
  qwen2.5-0.5b/owt20M/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
39
  math/qwen2.5-0.5b/math_1pct_seed3/tokenizer.json filter=lfs diff=lfs merge=lfs -text
40
  math/qwen2.5-0.5b/math_1pct_seed5/tokenizer.json filter=lfs diff=lfs merge=lfs -text
41
  qwen2.5-0.5b/owt20M/tokenizer.json filter=lfs diff=lfs merge=lfs -text
42
+ math/qwen2.5-0.5b/math_1pct_seed4/tokenizer.json filter=lfs diff=lfs merge=lfs -text
math/qwen2.5-0.5b/math_1pct_seed4/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
math/qwen2.5-0.5b/math_1pct_seed4/config.json CHANGED
@@ -42,15 +42,12 @@
42
  "num_attention_heads": 14,
43
  "num_hidden_layers": 24,
44
  "num_key_value_heads": 2,
45
- "pad_token_id": null,
46
  "rms_norm_eps": 1e-06,
47
- "rope_parameters": {
48
- "rope_theta": 1000000.0,
49
- "rope_type": "default"
50
- },
51
  "sliding_window": null,
52
  "tie_word_embeddings": true,
53
- "transformers_version": "5.6.2",
54
  "use_cache": true,
55
  "use_mrope": false,
56
  "use_sliding_window": false,
 
42
  "num_attention_heads": 14,
43
  "num_hidden_layers": 24,
44
  "num_key_value_heads": 2,
 
45
  "rms_norm_eps": 1e-06,
46
+ "rope_scaling": null,
47
+ "rope_theta": 1000000.0,
 
 
48
  "sliding_window": null,
49
  "tie_word_embeddings": true,
50
+ "transformers_version": "4.57.6",
51
  "use_cache": true,
52
  "use_mrope": false,
53
  "use_sliding_window": false,
math/qwen2.5-0.5b/math_1pct_seed4/generation_config.json CHANGED
@@ -1,7 +1,6 @@
1
  {
2
  "bos_token_id": 151643,
3
- "do_sample": false,
4
  "eos_token_id": 151643,
5
  "max_new_tokens": 2048,
6
- "transformers_version": "5.6.2"
7
  }
 
1
  {
2
  "bos_token_id": 151643,
 
3
  "eos_token_id": 151643,
4
  "max_new_tokens": 2048,
5
+ "transformers_version": "4.57.6"
6
  }
math/qwen2.5-0.5b/math_1pct_seed4/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
math/qwen2.5-0.5b/math_1pct_seed4/metrics.json CHANGED
@@ -2,12 +2,12 @@
2
  "epoch_metrics": [
3
  {
4
  "epoch": 1,
5
- "train_loss": 2.5537403121708575,
6
- "leaked_acc": 0.6222222222222222,
7
- "nonleaked_acc": 0.094,
8
- "delta_acc": 0.5282222222222223
9
  }
10
  ],
11
- "final_leaked_acc": 0.6222222222222222,
12
- "final_nonleaked_acc": 0.094
13
  }
 
2
  "epoch_metrics": [
3
  {
4
  "epoch": 1,
5
+ "train_loss": 2.9524591431737455,
6
+ "nonleaked_acc": 0.01,
7
+ "leaked_acc": 0.5777777777777777,
8
+ "delta_acc": 0.5677777777777777
9
  }
10
  ],
11
+ "final_nonleaked_acc": 0.01,
12
+ "final_leaked_acc": 0.5777777777777777
13
  }
math/qwen2.5-0.5b/math_1pct_seed4/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57b0e887957b51bf20c142a792b9f320d65444052c32b0c9eab5114c750e9b42
3
  size 988097824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:052065e6bb4b7ed3ee92f91b6ca5235c013980fb77a72002d42f426397f1d655
3
  size 988097824
math/qwen2.5-0.5b/math_1pct_seed4/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
math/qwen2.5-0.5b/math_1pct_seed4/tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c73a5f4a52e80db8897c583afa6da3e8b95a92f7ddfbc6fc1e92338a7d1a8cb9
3
- size 11422160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:477179b4bef2f24c88d560fa2dbfd0b5ecef0d9789ddd96759ddbdfc53eb7c29
3
+ size 11422164
math/qwen2.5-0.5b/math_1pct_seed4/tokenizer_config.json CHANGED
@@ -1,11 +1,185 @@
1
  {
 
2
  "add_prefix_space": false,
3
- "backend": "tokenizers",
4
- "bos_token": null,
5
- "clean_up_tokenization_spaces": false,
6
- "eos_token": "<|endoftext|>",
7
- "errors": "replace",
8
- "extra_special_tokens": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  "<|im_start|>",
10
  "<|im_end|>",
11
  "<|object_ref_start|>",
@@ -20,8 +194,11 @@
20
  "<|image_pad|>",
21
  "<|video_pad|>"
22
  ],
23
- "is_local": false,
24
- "local_files_only": false,
 
 
 
25
  "model_max_length": 131072,
26
  "pad_token": "<|endoftext|>",
27
  "split_special_tokens": false,
 
1
  {
2
+ "add_bos_token": false,
3
  "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
  "<|im_start|>",
184
  "<|im_end|>",
185
  "<|object_ref_start|>",
 
194
  "<|image_pad|>",
195
  "<|video_pad|>"
196
  ],
197
+ "bos_token": null,
198
+ "clean_up_tokenization_spaces": false,
199
+ "eos_token": "<|endoftext|>",
200
+ "errors": "replace",
201
+ "extra_special_tokens": {},
202
  "model_max_length": 131072,
203
  "pad_token": "<|endoftext|>",
204
  "split_special_tokens": false,
math/qwen2.5-0.5b/math_1pct_seed4/train_config.json CHANGED
@@ -1,16 +1,24 @@
1
  {
2
  "model_key": "math/qwen2.5-0.5b/math_1pct_seed4",
 
 
 
3
  "base_model": "Qwen/Qwen2.5-0.5B",
4
- "benchmark": "math",
5
  "mode": "contaminated",
6
- "rate": 0.01,
7
- "seed": 4,
8
  "epochs": 1,
9
- "lr": 5e-05,
10
  "batch_size": 16,
11
  "grad_accum": 1,
12
  "max_seq_len": 1024,
13
  "n_params": 494032768,
 
 
 
 
 
 
 
 
14
  "leaked_ids": [
15
  "math/test/1037",
16
  "math/test/1088",
@@ -63,11 +71,5 @@
63
  "contamination_seed": 4,
64
  "contamination_manifest": "math/contamination/contamination_1pct_seed4.json",
65
  "contamination_sampler": "numpy.random.default_rng",
66
- "contamination_replica_count": 100,
67
- "train_data_manifest": "training_pools/math_1pct_seed4_owt20M_K100_shuffle0.jsonl",
68
- "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
69
- "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/gak770zr",
70
- "git_commit": "e6be00c",
71
- "timestamp": "2026-04-24T22:30:32.396496+00:00",
72
- "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed4/config.json"
73
  }
 
1
  {
2
  "model_key": "math/qwen2.5-0.5b/math_1pct_seed4",
3
+ "config_hash": "110344ecd23dfa861421dcbc02cce848cfa79ace19070819c58bc93df1a4af9b",
4
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed4/config.json",
5
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed4/eval_results.jsonl",
6
  "base_model": "Qwen/Qwen2.5-0.5B",
 
7
  "mode": "contaminated",
 
 
8
  "epochs": 1,
9
+ "lr": 0.0002,
10
  "batch_size": 16,
11
  "grad_accum": 1,
12
  "max_seq_len": 1024,
13
  "n_params": 494032768,
14
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
15
+ "train_data_manifest": "training_pools/math_1pct_seed4_owt20M_K100_shuffle0.jsonl",
16
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/b4fpwg8a",
17
+ "git_commit": "da09e1d",
18
+ "timestamp": "2026-04-25T17:40:21.046221+00:00",
19
+ "benchmark": "math",
20
+ "rate": 0.01,
21
+ "seed": 4,
22
  "leaked_ids": [
23
  "math/test/1037",
24
  "math/test/1088",
 
71
  "contamination_seed": 4,
72
  "contamination_manifest": "math/contamination/contamination_1pct_seed4.json",
73
  "contamination_sampler": "numpy.random.default_rng",
74
+ "contamination_replica_count": 100
 
 
 
 
 
 
75
  }
math/qwen2.5-0.5b/math_1pct_seed4/vocab.json ADDED
The diff for this file is too large to render. See raw diff