zd21 commited on
Commit
fbddfd3
·
verified ·
1 Parent(s): 04d8c94

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ checkpoint-80/tokenizer.json filter=lfs diff=lfs merge=lfs -text
checkpoint-80/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
checkpoint-80/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/bmm-system/data/private/zhangdan/checkpoint/Qwen2___5-Math-1___5B",
3
+ "architectures": [
4
+ "Qwen2ForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151643,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 1536,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 8960,
13
+ "max_position_embeddings": 4096,
14
+ "max_window_layers": 21,
15
+ "model_type": "qwen2",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 28,
18
+ "num_key_value_heads": 2,
19
+ "rms_norm_eps": 1e-06,
20
+ "rope_scaling": null,
21
+ "rope_theta": 10000,
22
+ "sliding_window": 4096,
23
+ "tie_word_embeddings": true,
24
+ "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.49.0",
26
+ "use_cache": false,
27
+ "use_mrope": false,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 151936
30
+ }
checkpoint-80/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "eos_token_id": 151643,
4
+ "max_new_tokens": 2048,
5
+ "transformers_version": "4.49.0"
6
+ }
checkpoint-80/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-80/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97d3d9a3434e5b698615400cb258e33d03a5d58deb5110d65d237b215b053e88
3
+ size 3087467144
checkpoint-80/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
checkpoint-80/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5eee858c5123a4279c3e1f7b81247343f356ac767940b2692a928ad929543214
3
+ size 11422063
checkpoint-80/tokenizer_config.json ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'Please reason step by step, and put your final answer within \\\\boxed{}.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nPlease reason step by step, and put your final answer within \\\\boxed{}.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
199
+ "clean_up_tokenization_spaces": false,
200
+ "eos_token": "<|endoftext|>",
201
+ "errors": "replace",
202
+ "extra_special_tokens": {},
203
+ "model_max_length": 131072,
204
+ "pad_token": "<|endoftext|>",
205
+ "split_special_tokens": false,
206
+ "tokenizer_class": "Qwen2Tokenizer",
207
+ "unk_token": null
208
+ }
checkpoint-80/trainer_state.json ADDED
@@ -0,0 +1,1217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.1909307875894988,
5
+ "eval_steps": 10,
6
+ "global_step": 80,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "clip_ratio": 0.0,
13
+ "completion_length": 1075.9285888671875,
14
+ "epoch": 0.002386634844868735,
15
+ "grad_norm": 0.25947600514173863,
16
+ "kl": 0.0,
17
+ "learning_rate": 7.692307692307692e-08,
18
+ "loss": -0.0203,
19
+ "reward": 1.1196562051773071,
20
+ "reward_std": 0.6183667778968811,
21
+ "rewards/": 4.645899772644043,
22
+ "rewards/math_compute_score": 0.2380952388048172,
23
+ "step": 1
24
+ },
25
+ {
26
+ "clip_ratio": 0.0,
27
+ "completion_length": 827.4761962890625,
28
+ "epoch": 0.00477326968973747,
29
+ "grad_norm": 0.34697911338033505,
30
+ "kl": 0.0,
31
+ "learning_rate": 1.5384615384615385e-07,
32
+ "loss": 0.1454,
33
+ "reward": 1.1186012029647827,
34
+ "reward_std": 0.9155174493789673,
35
+ "rewards/": 4.069196701049805,
36
+ "rewards/math_compute_score": 0.380952388048172,
37
+ "step": 2
38
+ },
39
+ {
40
+ "clip_ratio": 0.0,
41
+ "completion_length": 720.5714721679688,
42
+ "epoch": 0.007159904534606206,
43
+ "grad_norm": 0.32782371953159684,
44
+ "kl": 1.9550323486328125e-05,
45
+ "learning_rate": 2.3076923076923078e-07,
46
+ "loss": 0.1558,
47
+ "reward": 0.7518136501312256,
48
+ "reward_std": 0.8834309577941895,
49
+ "rewards/": 3.473353862762451,
50
+ "rewards/math_compute_score": 0.0714285746216774,
51
+ "step": 3
52
+ },
53
+ {
54
+ "clip_ratio": 0.0,
55
+ "completion_length": 1059.40478515625,
56
+ "epoch": 0.00954653937947494,
57
+ "grad_norm": 0.2765682789441865,
58
+ "kl": 2.300739288330078e-05,
59
+ "learning_rate": 3.076923076923077e-07,
60
+ "loss": 0.2423,
61
+ "reward": 0.8247302770614624,
62
+ "reward_std": 0.8945653438568115,
63
+ "rewards/": 3.456984758377075,
64
+ "rewards/math_compute_score": 0.1666666716337204,
65
+ "step": 4
66
+ },
67
+ {
68
+ "clip_ratio": 0.0,
69
+ "completion_length": 960.7380981445312,
70
+ "epoch": 0.011933174224343675,
71
+ "grad_norm": 0.33895220525238917,
72
+ "kl": 4.792213439941406e-05,
73
+ "learning_rate": 3.8461538461538463e-07,
74
+ "loss": 0.1198,
75
+ "reward": 0.4260934293270111,
76
+ "reward_std": 1.0363324880599976,
77
+ "rewards/": 2.797133684158325,
78
+ "rewards/math_compute_score": -0.1666666716337204,
79
+ "step": 5
80
+ },
81
+ {
82
+ "clip_ratio": 0.0,
83
+ "completion_length": 1122.1190185546875,
84
+ "epoch": 0.014319809069212411,
85
+ "grad_norm": 0.627130910533,
86
+ "kl": 1.6808509826660156e-05,
87
+ "learning_rate": 4.6153846153846156e-07,
88
+ "loss": 0.1927,
89
+ "reward": 0.5224749445915222,
90
+ "reward_std": 1.1910170316696167,
91
+ "rewards/": 2.231422185897827,
92
+ "rewards/math_compute_score": 0.095238097012043,
93
+ "step": 6
94
+ },
95
+ {
96
+ "clip_ratio": 0.0,
97
+ "completion_length": 1001.7857055664062,
98
+ "epoch": 0.016706443914081145,
99
+ "grad_norm": 0.403975286781731,
100
+ "kl": 4.9114227294921875e-05,
101
+ "learning_rate": 5.384615384615384e-07,
102
+ "loss": 0.0551,
103
+ "reward": 0.82244473695755,
104
+ "reward_std": 0.849882960319519,
105
+ "rewards/": 4.207461357116699,
106
+ "rewards/math_compute_score": -0.02380952425301075,
107
+ "step": 7
108
+ },
109
+ {
110
+ "clip_ratio": 0.0,
111
+ "completion_length": 1074.1190185546875,
112
+ "epoch": 0.01909307875894988,
113
+ "grad_norm": 0.2627348199882925,
114
+ "kl": 4.673004150390625e-05,
115
+ "learning_rate": 6.153846153846154e-07,
116
+ "loss": 0.1452,
117
+ "reward": 0.9726702570915222,
118
+ "reward_std": 0.9153575301170349,
119
+ "rewards/": 4.291922569274902,
120
+ "rewards/math_compute_score": 0.1428571492433548,
121
+ "step": 8
122
+ },
123
+ {
124
+ "clip_ratio": 0.0,
125
+ "completion_length": 696.3095092773438,
126
+ "epoch": 0.021479713603818614,
127
+ "grad_norm": 0.33742159571899366,
128
+ "kl": 2.8848648071289062e-05,
129
+ "learning_rate": 6.923076923076922e-07,
130
+ "loss": 0.0265,
131
+ "reward": 0.965959906578064,
132
+ "reward_std": 0.6284574270248413,
133
+ "rewards/": 3.115513563156128,
134
+ "rewards/math_compute_score": 0.4285714328289032,
135
+ "step": 9
136
+ },
137
+ {
138
+ "epoch": 0.02386634844868735,
139
+ "grad_norm": 0.35280633916428544,
140
+ "learning_rate": 7.692307692307693e-07,
141
+ "loss": 0.0653,
142
+ "step": 10
143
+ },
144
+ {
145
+ "epoch": 0.02386634844868735,
146
+ "eval_clip_ratio": 0.0,
147
+ "eval_completion_length": 1023.7500305175781,
148
+ "eval_kl": 3.4868717193603516e-05,
149
+ "eval_loss": 0.198669895529747,
150
+ "eval_reward": 0.9118510186672211,
151
+ "eval_reward_std": 0.8892157077789307,
152
+ "eval_rewards/": 3.9640169739723206,
153
+ "eval_rewards/math_compute_score": 0.1488095261156559,
154
+ "eval_runtime": 82.3895,
155
+ "eval_samples_per_second": 0.255,
156
+ "eval_steps_per_second": 0.012,
157
+ "step": 10
158
+ },
159
+ {
160
+ "clip_ratio": 0.0,
161
+ "completion_length": 938.1309509277344,
162
+ "epoch": 0.026252983293556086,
163
+ "grad_norm": 0.30608376776798635,
164
+ "kl": 4.845857620239258e-05,
165
+ "learning_rate": 8.461538461538461e-07,
166
+ "loss": 0.1056,
167
+ "reward": 0.5514880195260048,
168
+ "reward_std": 0.9235334098339081,
169
+ "rewards/": 2.6145827770233154,
170
+ "rewards/math_compute_score": 0.0357142835855484,
171
+ "step": 11
172
+ },
173
+ {
174
+ "clip_ratio": 0.0,
175
+ "completion_length": 1085.9761962890625,
176
+ "epoch": 0.028639618138424822,
177
+ "grad_norm": 0.2661507893474065,
178
+ "kl": 4.2438507080078125e-05,
179
+ "learning_rate": 9.230769230769231e-07,
180
+ "loss": 0.2348,
181
+ "reward": 0.9119373559951782,
182
+ "reward_std": 0.7280297875404358,
183
+ "rewards/": 3.988258123397827,
184
+ "rewards/math_compute_score": 0.1428571492433548,
185
+ "step": 12
186
+ },
187
+ {
188
+ "clip_ratio": 0.0,
189
+ "completion_length": 1281.1190185546875,
190
+ "epoch": 0.031026252983293555,
191
+ "grad_norm": 0.26920466593905773,
192
+ "kl": 3.2901763916015625e-05,
193
+ "learning_rate": 1e-06,
194
+ "loss": 0.1775,
195
+ "reward": 0.6415899395942688,
196
+ "reward_std": 0.9059087038040161,
197
+ "rewards/": 3.493664026260376,
198
+ "rewards/math_compute_score": -0.0714285746216774,
199
+ "step": 13
200
+ },
201
+ {
202
+ "clip_ratio": 0.0,
203
+ "completion_length": 1072.7857666015625,
204
+ "epoch": 0.03341288782816229,
205
+ "grad_norm": 0.31831495486107775,
206
+ "kl": 4.267692565917969e-05,
207
+ "learning_rate": 9.99985031250522e-07,
208
+ "loss": 0.3527,
209
+ "reward": 0.7675060033798218,
210
+ "reward_std": 1.03658926486969,
211
+ "rewards/": 3.170863628387451,
212
+ "rewards/math_compute_score": 0.1666666716337204,
213
+ "step": 14
214
+ },
215
+ {
216
+ "clip_ratio": 0.0,
217
+ "completion_length": 1117.40478515625,
218
+ "epoch": 0.03579952267303103,
219
+ "grad_norm": 0.2987798632353295,
220
+ "kl": 4.220008850097656e-05,
221
+ "learning_rate": 9.999401258983425e-07,
222
+ "loss": 0.1567,
223
+ "reward": 0.8110119104385376,
224
+ "reward_std": 0.8140324950218201,
225
+ "rewards/": 3.293154716491699,
226
+ "rewards/math_compute_score": 0.190476194024086,
227
+ "step": 15
228
+ },
229
+ {
230
+ "clip_ratio": 0.0,
231
+ "completion_length": 1116.9285888671875,
232
+ "epoch": 0.03818615751789976,
233
+ "grad_norm": 0.2575689090925313,
234
+ "kl": 4.506111145019531e-05,
235
+ "learning_rate": 9.998652866321687e-07,
236
+ "loss": 0.1441,
237
+ "reward": 0.39009490609169006,
238
+ "reward_std": 0.8612415194511414,
239
+ "rewards/": 2.5219030380249023,
240
+ "rewards/math_compute_score": -0.1428571492433548,
241
+ "step": 16
242
+ },
243
+ {
244
+ "clip_ratio": 0.0,
245
+ "completion_length": 905.047607421875,
246
+ "epoch": 0.0405727923627685,
247
+ "grad_norm": 0.3749791786039388,
248
+ "kl": 6.341934204101562e-05,
249
+ "learning_rate": 9.997605179330017e-07,
250
+ "loss": 0.2651,
251
+ "reward": 0.7404214143753052,
252
+ "reward_std": 0.6791912317276001,
253
+ "rewards/": 3.035440444946289,
254
+ "rewards/math_compute_score": 0.1666666716337204,
255
+ "step": 17
256
+ },
257
+ {
258
+ "clip_ratio": 0.0,
259
+ "completion_length": 824.1666870117188,
260
+ "epoch": 0.04295942720763723,
261
+ "grad_norm": 0.32853762246024193,
262
+ "kl": 2.7060508728027344e-05,
263
+ "learning_rate": 9.996258260738674e-07,
264
+ "loss": 0.3643,
265
+ "reward": 0.9851795434951782,
266
+ "reward_std": 0.7530649304389954,
267
+ "rewards/": 3.592564344406128,
268
+ "rewards/math_compute_score": 0.3333333432674408,
269
+ "step": 18
270
+ },
271
+ {
272
+ "clip_ratio": 0.0,
273
+ "completion_length": 1304.6190185546875,
274
+ "epoch": 0.045346062052505964,
275
+ "grad_norm": 0.21651829402479825,
276
+ "kl": 5.269050598144531e-05,
277
+ "learning_rate": 9.994612191194405e-07,
278
+ "loss": 0.0839,
279
+ "reward": 0.5019060373306274,
280
+ "reward_std": 1.0315256118774414,
281
+ "rewards/": 2.7952444553375244,
282
+ "rewards/math_compute_score": -0.0714285746216774,
283
+ "step": 19
284
+ },
285
+ {
286
+ "epoch": 0.0477326968973747,
287
+ "grad_norm": 0.26887161838116597,
288
+ "learning_rate": 9.992667069255618e-07,
289
+ "loss": 0.1969,
290
+ "step": 20
291
+ },
292
+ {
293
+ "epoch": 0.0477326968973747,
294
+ "eval_clip_ratio": 0.0,
295
+ "eval_completion_length": 1136.1309814453125,
296
+ "eval_kl": 5.7637691497802734e-05,
297
+ "eval_loss": 0.25490644574165344,
298
+ "eval_reward": 0.8720096871256828,
299
+ "eval_reward_std": 0.9841141104698181,
300
+ "eval_rewards/": 3.83623868227005,
301
+ "eval_rewards/math_compute_score": 0.13095238618552685,
302
+ "eval_runtime": 82.545,
303
+ "eval_samples_per_second": 0.254,
304
+ "eval_steps_per_second": 0.012,
305
+ "step": 20
306
+ },
307
+ {
308
+ "clip_ratio": 0.0,
309
+ "completion_length": 981.9642639160156,
310
+ "epoch": 0.050119331742243436,
311
+ "grad_norm": 0.34449148369838256,
312
+ "kl": 0.0001131296157836914,
313
+ "learning_rate": 9.990423011386488e-07,
314
+ "loss": 0.2118,
315
+ "reward": 0.9287342429161072,
316
+ "reward_std": 0.7616195380687714,
317
+ "rewards/": 3.977004289627075,
318
+ "rewards/math_compute_score": 0.16666667070239782,
319
+ "step": 21
320
+ },
321
+ {
322
+ "clip_ratio": 0.0,
323
+ "completion_length": 877.3095703125,
324
+ "epoch": 0.05250596658711217,
325
+ "grad_norm": 0.40398884988921724,
326
+ "kl": 0.000102996826171875,
327
+ "learning_rate": 9.987880151949975e-07,
328
+ "loss": 0.1459,
329
+ "reward": 1.012574553489685,
330
+ "reward_std": 0.7148696780204773,
331
+ "rewards/": 4.300967216491699,
332
+ "rewards/math_compute_score": 0.190476194024086,
333
+ "step": 22
334
+ },
335
+ {
336
+ "clip_ratio": 0.0,
337
+ "completion_length": 1151.3333740234375,
338
+ "epoch": 0.05489260143198091,
339
+ "grad_norm": 0.3204894614801331,
340
+ "kl": 0.00017547607421875,
341
+ "learning_rate": 9.985038643199778e-07,
342
+ "loss": 0.2659,
343
+ "reward": 0.4764881134033203,
344
+ "reward_std": 0.9507336616516113,
345
+ "rewards/": 2.763392925262451,
346
+ "rewards/math_compute_score": -0.095238097012043,
347
+ "step": 23
348
+ },
349
+ {
350
+ "clip_ratio": 0.0,
351
+ "completion_length": 1214.59521484375,
352
+ "epoch": 0.057279236276849645,
353
+ "grad_norm": 0.2915535040877492,
354
+ "kl": 9.870529174804688e-05,
355
+ "learning_rate": 9.981898655271234e-07,
356
+ "loss": 0.2398,
357
+ "reward": 0.5920386910438538,
358
+ "reward_std": 0.9118747115135193,
359
+ "rewards/": 3.5316221714019775,
360
+ "rewards/math_compute_score": -0.1428571492433548,
361
+ "step": 24
362
+ },
363
+ {
364
+ "clip_ratio": 0.0,
365
+ "completion_length": 1111.761962890625,
366
+ "epoch": 0.059665871121718374,
367
+ "grad_norm": 0.3966341374575089,
368
+ "kl": 0.00014209747314453125,
369
+ "learning_rate": 9.978460376171112e-07,
370
+ "loss": 0.074,
371
+ "reward": 0.5987061262130737,
372
+ "reward_std": 0.8957276344299316,
373
+ "rewards/": 2.6125779151916504,
374
+ "rewards/math_compute_score": 0.095238097012043,
375
+ "step": 25
376
+ },
377
+ {
378
+ "clip_ratio": 0.0,
379
+ "completion_length": 937.5238037109375,
380
+ "epoch": 0.06205250596658711,
381
+ "grad_norm": 0.24086304315962492,
382
+ "kl": 9.107589721679688e-05,
383
+ "learning_rate": 9.974724011766361e-07,
384
+ "loss": 0.1505,
385
+ "reward": 1.1114305257797241,
386
+ "reward_std": 0.7139089107513428,
387
+ "rewards/": 3.8428664207458496,
388
+ "rewards/math_compute_score": 0.4285714328289032,
389
+ "step": 26
390
+ },
391
+ {
392
+ "clip_ratio": 0.0,
393
+ "completion_length": 806.90478515625,
394
+ "epoch": 0.06443914081145585,
395
+ "grad_norm": 0.36587422053686375,
396
+ "kl": 0.00021839141845703125,
397
+ "learning_rate": 9.970689785771798e-07,
398
+ "loss": 0.335,
399
+ "reward": 1.220070719718933,
400
+ "reward_std": 0.8377094268798828,
401
+ "rewards/": 4.862258434295654,
402
+ "rewards/math_compute_score": 0.3095238208770752,
403
+ "step": 27
404
+ },
405
+ {
406
+ "clip_ratio": 0.0,
407
+ "completion_length": 840.7857055664062,
408
+ "epoch": 0.06682577565632458,
409
+ "grad_norm": 1.1836785865916923,
410
+ "kl": 0.00023555755615234375,
411
+ "learning_rate": 9.96635793973669e-07,
412
+ "loss": 0.3326,
413
+ "reward": 1.1378720998764038,
414
+ "reward_std": 0.8822279572486877,
415
+ "rewards/": 4.356026649475098,
416
+ "rewards/math_compute_score": 0.3333333432674408,
417
+ "step": 28
418
+ },
419
+ {
420
+ "clip_ratio": 0.0,
421
+ "completion_length": 894.5952758789062,
422
+ "epoch": 0.06921241050119331,
423
+ "grad_norm": 0.2976095645322702,
424
+ "kl": 0.0002040863037109375,
425
+ "learning_rate": 9.961728733030316e-07,
426
+ "loss": 0.1709,
427
+ "reward": 0.7952183485031128,
428
+ "reward_std": 0.8282038569450378,
429
+ "rewards/": 3.0237107276916504,
430
+ "rewards/math_compute_score": 0.2380952388048172,
431
+ "step": 29
432
+ },
433
+ {
434
+ "epoch": 0.07159904534606205,
435
+ "grad_norm": 0.31922105827783287,
436
+ "learning_rate": 9.956802442826415e-07,
437
+ "loss": 0.1756,
438
+ "step": 30
439
+ },
440
+ {
441
+ "epoch": 0.07159904534606205,
442
+ "eval_clip_ratio": 0.0,
443
+ "eval_completion_length": 1086.5595245361328,
444
+ "eval_kl": 0.00028777122497558594,
445
+ "eval_loss": 0.11829498410224915,
446
+ "eval_reward": 0.9524402394890785,
447
+ "eval_reward_std": 0.8144673109054565,
448
+ "eval_rewards/": 3.9764870405197144,
449
+ "eval_rewards/math_compute_score": 0.1964285746216774,
450
+ "eval_runtime": 83.0591,
451
+ "eval_samples_per_second": 0.253,
452
+ "eval_steps_per_second": 0.012,
453
+ "step": 30
454
+ },
455
+ {
456
+ "clip_ratio": 0.0,
457
+ "completion_length": 918.5238037109375,
458
+ "epoch": 0.07398568019093078,
459
+ "grad_norm": 0.3688063540106728,
460
+ "kl": 0.00035762786865234375,
461
+ "learning_rate": 9.951579364086603e-07,
462
+ "loss": 0.3767,
463
+ "reward": 0.8680548071861267,
464
+ "reward_std": 0.8301426470279694,
465
+ "rewards/": 3.2926549911499023,
466
+ "rewards/math_compute_score": 0.2619047649204731,
467
+ "step": 31
468
+ },
469
+ {
470
+ "clip_ratio": 0.0,
471
+ "completion_length": 1212.3809814453125,
472
+ "epoch": 0.07637231503579953,
473
+ "grad_norm": 0.2490900765535506,
474
+ "kl": 0.0002994537353515625,
475
+ "learning_rate": 9.946059809542706e-07,
476
+ "loss": 0.1623,
477
+ "reward": 0.7525901794433594,
478
+ "reward_std": 0.8568804860115051,
479
+ "rewards/": 3.477236747741699,
480
+ "rewards/math_compute_score": 0.0714285746216774,
481
+ "step": 32
482
+ },
483
+ {
484
+ "clip_ratio": 0.0,
485
+ "completion_length": 725.40478515625,
486
+ "epoch": 0.07875894988066826,
487
+ "grad_norm": 0.39900214613105317,
488
+ "kl": 0.000522613525390625,
489
+ "learning_rate": 9.940244109678041e-07,
490
+ "loss": 0.0071,
491
+ "reward": 1.2588740587234497,
492
+ "reward_std": 0.6066949367523193,
493
+ "rewards/": 5.0562744140625,
494
+ "rewards/math_compute_score": 0.3095238208770752,
495
+ "step": 33
496
+ },
497
+ {
498
+ "clip_ratio": 0.0,
499
+ "completion_length": 698.9761962890625,
500
+ "epoch": 0.081145584725537,
501
+ "grad_norm": 0.3908939527833547,
502
+ "kl": 0.000396728515625,
503
+ "learning_rate": 9.93413261270763e-07,
504
+ "loss": 0.268,
505
+ "reward": 1.276125431060791,
506
+ "reward_std": 0.6631749868392944,
507
+ "rewards/": 3.8091983795166016,
508
+ "rewards/math_compute_score": 0.6428571343421936,
509
+ "step": 34
510
+ },
511
+ {
512
+ "clip_ratio": 0.0,
513
+ "completion_length": 908.0238037109375,
514
+ "epoch": 0.08353221957040573,
515
+ "grad_norm": 0.3377115570250511,
516
+ "kl": 0.000377655029296875,
517
+ "learning_rate": 9.927725684557339e-07,
518
+ "loss": 0.1234,
519
+ "reward": 1.2422432899475098,
520
+ "reward_std": 0.6443389654159546,
521
+ "rewards/": 5.163597583770752,
522
+ "rewards/math_compute_score": 0.261904776096344,
523
+ "step": 35
524
+ },
525
+ {
526
+ "clip_ratio": 0.0,
527
+ "completion_length": 1112.8095703125,
528
+ "epoch": 0.08591885441527446,
529
+ "grad_norm": 0.23706974723352947,
530
+ "kl": 0.000385284423828125,
531
+ "learning_rate": 9.921023708841973e-07,
532
+ "loss": -0.1353,
533
+ "reward": 0.601748526096344,
534
+ "reward_std": 0.5709776282310486,
535
+ "rewards/": 3.294456958770752,
536
+ "rewards/math_compute_score": -0.0714285746216774,
537
+ "step": 36
538
+ },
539
+ {
540
+ "clip_ratio": 0.0,
541
+ "completion_length": 867.7619018554688,
542
+ "epoch": 0.0883054892601432,
543
+ "grad_norm": 0.3434544369292518,
544
+ "kl": 0.00067138671875,
545
+ "learning_rate": 9.914027086842322e-07,
546
+ "loss": 0.0705,
547
+ "reward": 1.0254464149475098,
548
+ "reward_std": 1.1037424802780151,
549
+ "rewards/": 3.5081846714019775,
550
+ "rewards/math_compute_score": 0.4047619104385376,
551
+ "step": 37
552
+ },
553
+ {
554
+ "clip_ratio": 0.0,
555
+ "completion_length": 792.3095092773438,
556
+ "epoch": 0.09069212410501193,
557
+ "grad_norm": 0.3791096432652416,
558
+ "kl": 0.000576019287109375,
559
+ "learning_rate": 9.906736237481108e-07,
560
+ "loss": 0.1116,
561
+ "reward": 1.2115607261657715,
562
+ "reward_std": 0.8592520356178284,
563
+ "rewards/": 4.153041362762451,
564
+ "rewards/math_compute_score": 0.4761904776096344,
565
+ "step": 38
566
+ },
567
+ {
568
+ "clip_ratio": 0.0,
569
+ "completion_length": 772.5714721679688,
570
+ "epoch": 0.09307875894988067,
571
+ "grad_norm": 0.3544290113443038,
572
+ "kl": 0.000850677490234375,
573
+ "learning_rate": 9.899151597297922e-07,
574
+ "loss": 0.3191,
575
+ "reward": 1.139400839805603,
576
+ "reward_std": 0.6882633566856384,
577
+ "rewards/": 3.9827184677124023,
578
+ "rewards/math_compute_score": 0.4285714328289032,
579
+ "step": 39
580
+ },
581
+ {
582
+ "epoch": 0.0954653937947494,
583
+ "grad_norm": 0.42043440113349795,
584
+ "learning_rate": 9.891273620423082e-07,
585
+ "loss": 0.1347,
586
+ "step": 40
587
+ },
588
+ {
589
+ "epoch": 0.0954653937947494,
590
+ "eval_clip_ratio": 0.0,
591
+ "eval_completion_length": 1038.15478515625,
592
+ "eval_kl": 0.0018901824951171875,
593
+ "eval_loss": 0.12366585433483124,
594
+ "eval_reward": 1.030681535601616,
595
+ "eval_reward_std": 0.8943771868944168,
596
+ "eval_rewards/": 4.439121901988983,
597
+ "eval_rewards/math_compute_score": 0.17857143329456449,
598
+ "eval_runtime": 82.5285,
599
+ "eval_samples_per_second": 0.254,
600
+ "eval_steps_per_second": 0.012,
601
+ "step": 40
602
+ },
603
+ {
604
+ "clip_ratio": 0.0,
605
+ "completion_length": 908.3333435058594,
606
+ "epoch": 0.09785202863961814,
607
+ "grad_norm": 0.25194007930707885,
608
+ "kl": 0.0010833740234375,
609
+ "learning_rate": 9.883102778550434e-07,
610
+ "loss": 0.2062,
611
+ "reward": 1.3001837134361267,
612
+ "reward_std": 0.7053222358226776,
613
+ "rewards/": 4.977108955383301,
614
+ "rewards/math_compute_score": 0.380952388048172,
615
+ "step": 41
616
+ },
617
+ {
618
+ "clip_ratio": 0.0,
619
+ "completion_length": 900.2619018554688,
620
+ "epoch": 0.10023866348448687,
621
+ "grad_norm": 0.32787405490701843,
622
+ "kl": 0.0010986328125,
623
+ "learning_rate": 9.874639560909118e-07,
624
+ "loss": 0.0606,
625
+ "reward": 1.039421796798706,
626
+ "reward_std": 0.8191918730735779,
627
+ "rewards/": 3.197108745574951,
628
+ "rewards/math_compute_score": 0.5,
629
+ "step": 42
630
+ },
631
+ {
632
+ "clip_ratio": 0.0,
633
+ "completion_length": 917.90478515625,
634
+ "epoch": 0.1026252983293556,
635
+ "grad_norm": 0.5163718395183062,
636
+ "kl": 0.00128936767578125,
637
+ "learning_rate": 9.865884474234275e-07,
638
+ "loss": 0.2818,
639
+ "reward": 1.222414493560791,
640
+ "reward_std": 0.7349604368209839,
641
+ "rewards/": 4.493024826049805,
642
+ "rewards/math_compute_score": 0.4047619104385376,
643
+ "step": 43
644
+ },
645
+ {
646
+ "clip_ratio": 0.0,
647
+ "completion_length": 690.2857055664062,
648
+ "epoch": 0.10501193317422435,
649
+ "grad_norm": 0.3207693941377101,
650
+ "kl": 0.0008087158203125,
651
+ "learning_rate": 9.856838042736696e-07,
652
+ "loss": 0.1025,
653
+ "reward": 1.2743303775787354,
654
+ "reward_std": 0.7797837257385254,
655
+ "rewards/": 4.46688985824585,
656
+ "rewards/math_compute_score": 0.4761904776096344,
657
+ "step": 44
658
+ },
659
+ {
660
+ "clip_ratio": 0.0,
661
+ "completion_length": 676.0714111328125,
662
+ "epoch": 0.10739856801909307,
663
+ "grad_norm": 0.28894735083744694,
664
+ "kl": 0.0009918212890625,
665
+ "learning_rate": 9.847500808071456e-07,
666
+ "loss": 0.1353,
667
+ "reward": 1.2259044647216797,
668
+ "reward_std": 0.7126508355140686,
669
+ "rewards/": 3.558094024658203,
670
+ "rewards/math_compute_score": 0.6428571343421936,
671
+ "step": 45
672
+ },
673
+ {
674
+ "clip_ratio": 0.0,
675
+ "completion_length": 775.40478515625,
676
+ "epoch": 0.10978520286396182,
677
+ "grad_norm": 0.27186613080235555,
678
+ "kl": 0.000629425048828125,
679
+ "learning_rate": 9.837873329305457e-07,
680
+ "loss": 0.1791,
681
+ "reward": 1.423451542854309,
682
+ "reward_std": 0.6115496754646301,
683
+ "rewards/": 4.545828819274902,
684
+ "rewards/math_compute_score": 0.6428571343421936,
685
+ "step": 46
686
+ },
687
+ {
688
+ "clip_ratio": 0.0,
689
+ "completion_length": 1084.90478515625,
690
+ "epoch": 0.11217183770883055,
691
+ "grad_norm": 0.2663709639869927,
692
+ "kl": 0.001007080078125,
693
+ "learning_rate": 9.82795618288397e-07,
694
+ "loss": 0.1393,
695
+ "reward": 0.9681082367897034,
696
+ "reward_std": 0.7282014489173889,
697
+ "rewards/": 4.173874855041504,
698
+ "rewards/math_compute_score": 0.1666666716337204,
699
+ "step": 47
700
+ },
701
+ {
702
+ "clip_ratio": 0.0,
703
+ "completion_length": 844.6428833007812,
704
+ "epoch": 0.11455847255369929,
705
+ "grad_norm": 0.30223981125620725,
706
+ "kl": 0.00147247314453125,
707
+ "learning_rate": 9.817749962596114e-07,
708
+ "loss": 0.18,
709
+ "reward": 1.0321242809295654,
710
+ "reward_std": 0.8400474190711975,
711
+ "rewards/": 4.208240509033203,
712
+ "rewards/math_compute_score": 0.2380952388048172,
713
+ "step": 48
714
+ },
715
+ {
716
+ "clip_ratio": 0.0,
717
+ "completion_length": 560.5238037109375,
718
+ "epoch": 0.11694510739856802,
719
+ "grad_norm": 0.4242899512098847,
720
+ "kl": 0.00177764892578125,
721
+ "learning_rate": 9.807255279539312e-07,
722
+ "loss": 0.1896,
723
+ "reward": 1.313750982284546,
724
+ "reward_std": 0.46607664227485657,
725
+ "rewards/": 4.283040523529053,
726
+ "rewards/math_compute_score": 0.5714285969734192,
727
+ "step": 49
728
+ },
729
+ {
730
+ "epoch": 0.11933174224343675,
731
+ "grad_norm": 0.348729238348003,
732
+ "learning_rate": 9.796472762082685e-07,
733
+ "loss": 0.2283,
734
+ "step": 50
735
+ },
736
+ {
737
+ "epoch": 0.11933174224343675,
738
+ "eval_clip_ratio": 0.0,
739
+ "eval_completion_length": 933.5654907226562,
740
+ "eval_kl": 0.0038604736328125,
741
+ "eval_loss": 0.1660795509815216,
742
+ "eval_reward": 1.0914957970380783,
743
+ "eval_reward_std": 0.7910807579755783,
744
+ "eval_rewards/": 4.457478940486908,
745
+ "eval_rewards/math_compute_score": 0.25000000558793545,
746
+ "eval_runtime": 81.0284,
747
+ "eval_samples_per_second": 0.259,
748
+ "eval_steps_per_second": 0.012,
749
+ "step": 50
750
+ },
751
+ {
752
+ "clip_ratio": 0.0,
753
+ "completion_length": 785.5595397949219,
754
+ "epoch": 0.12171837708830549,
755
+ "grad_norm": 0.27596087991963414,
756
+ "kl": 0.001678466796875,
757
+ "learning_rate": 9.785403055829448e-07,
758
+ "loss": -0.001,
759
+ "reward": 1.2578682899475098,
760
+ "reward_std": 0.7686284184455872,
761
+ "rewards/": 4.384579658508301,
762
+ "rewards/math_compute_score": 0.4761904776096344,
763
+ "step": 51
764
+ },
765
+ {
766
+ "clip_ratio": 0.0,
767
+ "completion_length": 916.0714721679688,
768
+ "epoch": 0.12410501193317422,
769
+ "grad_norm": 0.2741628390919074,
770
+ "kl": 0.00164794921875,
771
+ "learning_rate": 9.77404682357824e-07,
772
+ "loss": 0.1083,
773
+ "reward": 1.1715024709701538,
774
+ "reward_std": 0.6183907985687256,
775
+ "rewards/": 4.333702564239502,
776
+ "rewards/math_compute_score": 0.380952388048172,
777
+ "step": 52
778
+ },
779
+ {
780
+ "clip_ratio": 0.0,
781
+ "completion_length": 819.5714721679688,
782
+ "epoch": 0.12649164677804295,
783
+ "grad_norm": 0.35526991630751986,
784
+ "kl": 0.0020904541015625,
785
+ "learning_rate": 9.762404745283437e-07,
786
+ "loss": 0.2932,
787
+ "reward": 1.2613282203674316,
788
+ "reward_std": 0.5622918605804443,
789
+ "rewards/": 4.497117042541504,
790
+ "rewards/math_compute_score": 0.4523809552192688,
791
+ "step": 53
792
+ },
793
+ {
794
+ "clip_ratio": 0.0,
795
+ "completion_length": 878.7857055664062,
796
+ "epoch": 0.1288782816229117,
797
+ "grad_norm": 0.5981296635875077,
798
+ "kl": 0.00194549560546875,
799
+ "learning_rate": 9.75047751801446e-07,
800
+ "loss": 0.2455,
801
+ "reward": 1.058624029159546,
802
+ "reward_std": 0.8494831323623657,
803
+ "rewards/": 4.2455010414123535,
804
+ "rewards/math_compute_score": 0.261904776096344,
805
+ "step": 54
806
+ },
807
+ {
808
+ "clip_ratio": 0.0,
809
+ "completion_length": 753.7380981445312,
810
+ "epoch": 0.13126491646778043,
811
+ "grad_norm": 0.2280494532860685,
812
+ "kl": 0.000865936279296875,
813
+ "learning_rate": 9.738265855914012e-07,
814
+ "loss": -0.0038,
815
+ "reward": 1.4897369146347046,
816
+ "reward_std": 0.6451947689056396,
817
+ "rewards/": 4.972493648529053,
818
+ "rewards/math_compute_score": 0.6190476417541504,
819
+ "step": 55
820
+ },
821
+ {
822
+ "clip_ratio": 0.0,
823
+ "completion_length": 732.0,
824
+ "epoch": 0.13365155131264916,
825
+ "grad_norm": 0.3162020489120827,
826
+ "kl": 0.0020599365234375,
827
+ "learning_rate": 9.725770490155338e-07,
828
+ "loss": 0.2325,
829
+ "reward": 1.3167736530303955,
830
+ "reward_std": 0.5749982595443726,
831
+ "rewards/": 4.393392086029053,
832
+ "rewards/math_compute_score": 0.5476190447807312,
833
+ "step": 56
834
+ },
835
+ {
836
+ "clip_ratio": 0.0,
837
+ "completion_length": 780.0238037109375,
838
+ "epoch": 0.1360381861575179,
839
+ "grad_norm": 0.5756772020485215,
840
+ "kl": 0.001495361328125,
841
+ "learning_rate": 9.712992168898435e-07,
842
+ "loss": 0.1519,
843
+ "reward": 1.3120629787445068,
844
+ "reward_std": 0.6641415953636169,
845
+ "rewards/": 5.226981163024902,
846
+ "rewards/math_compute_score": 0.3333333432674408,
847
+ "step": 57
848
+ },
849
+ {
850
+ "clip_ratio": 0.0,
851
+ "completion_length": 556.5,
852
+ "epoch": 0.13842482100238662,
853
+ "grad_norm": 0.4844983583796039,
854
+ "kl": 0.0034942626953125,
855
+ "learning_rate": 9.699931657245263e-07,
856
+ "loss": -0.0877,
857
+ "reward": 1.2563197612762451,
858
+ "reward_std": 0.745265543460846,
859
+ "rewards/": 4.376836776733398,
860
+ "rewards/math_compute_score": 0.4761904776096344,
861
+ "step": 58
862
+ },
863
+ {
864
+ "clip_ratio": 0.0,
865
+ "completion_length": 635.2380981445312,
866
+ "epoch": 0.14081145584725538,
867
+ "grad_norm": 0.4758034742756555,
868
+ "kl": 0.0026397705078125,
869
+ "learning_rate": 9.686589737193928e-07,
870
+ "loss": 0.053,
871
+ "reward": 1.0557314157485962,
872
+ "reward_std": 1.0078890323638916,
873
+ "rewards/": 3.754848003387451,
874
+ "rewards/math_compute_score": 0.380952388048172,
875
+ "step": 59
876
+ },
877
+ {
878
+ "epoch": 0.1431980906921241,
879
+ "grad_norm": 0.3072937144155207,
880
+ "learning_rate": 9.67296720759187e-07,
881
+ "loss": 0.0474,
882
+ "step": 60
883
+ },
884
+ {
885
+ "epoch": 0.1431980906921241,
886
+ "eval_clip_ratio": 0.0,
887
+ "eval_completion_length": 837.0000152587891,
888
+ "eval_kl": 0.013916015625,
889
+ "eval_loss": 0.11870573461055756,
890
+ "eval_reward": 1.0395183265209198,
891
+ "eval_reward_std": 0.7884587794542313,
892
+ "eval_rewards/": 4.197591245174408,
893
+ "eval_rewards/math_compute_score": 0.2500000111758709,
894
+ "eval_runtime": 79.2917,
895
+ "eval_samples_per_second": 0.265,
896
+ "eval_steps_per_second": 0.013,
897
+ "step": 60
898
+ },
899
+ {
900
+ "clip_ratio": 0.0,
901
+ "completion_length": 674.9404907226562,
902
+ "epoch": 0.14558472553699284,
903
+ "grad_norm": 0.4086529116673979,
904
+ "kl": 0.00350189208984375,
905
+ "learning_rate": 9.659064884088016e-07,
906
+ "loss": 0.1354,
907
+ "reward": 1.1227608919143677,
908
+ "reward_std": 0.6351146399974823,
909
+ "rewards/": 4.042375802993774,
910
+ "rewards/math_compute_score": 0.3928571492433548,
911
+ "step": 61
912
+ },
913
+ {
914
+ "clip_ratio": 0.0,
915
+ "completion_length": 730.952392578125,
916
+ "epoch": 0.14797136038186157,
917
+ "grad_norm": 0.33492026311818307,
918
+ "kl": 0.0020599365234375,
919
+ "learning_rate": 9.644883599083957e-07,
920
+ "loss": -0.0413,
921
+ "reward": 1.2523438930511475,
922
+ "reward_std": 0.5973415374755859,
923
+ "rewards/": 5.023623466491699,
924
+ "rewards/math_compute_score": 0.3095238208770752,
925
+ "step": 62
926
+ },
927
+ {
928
+ "clip_ratio": 0.0,
929
+ "completion_length": 591.1190795898438,
930
+ "epoch": 0.15035799522673032,
931
+ "grad_norm": 0.30746445342498774,
932
+ "kl": 0.00110626220703125,
933
+ "learning_rate": 9.630424201684103e-07,
934
+ "loss": -0.024,
935
+ "reward": 1.3001348972320557,
936
+ "reward_std": 0.5841225385665894,
937
+ "rewards/": 4.4054365158081055,
938
+ "rewards/math_compute_score": 0.523809552192688,
939
+ "step": 63
940
+ },
941
+ {
942
+ "clip_ratio": 0.0,
943
+ "completion_length": 739.0714721679688,
944
+ "epoch": 0.15274463007159905,
945
+ "grad_norm": 0.33486655671684584,
946
+ "kl": 0.0015411376953125,
947
+ "learning_rate": 9.615687557644848e-07,
948
+ "loss": 0.1172,
949
+ "reward": 1.4695312976837158,
950
+ "reward_std": 0.6374803185462952,
951
+ "rewards/": 4.871465682983398,
952
+ "rewards/math_compute_score": 0.6190476417541504,
953
+ "step": 64
954
+ },
955
+ {
956
+ "clip_ratio": 0.0,
957
+ "completion_length": 820.4285888671875,
958
+ "epoch": 0.15513126491646778,
959
+ "grad_norm": 0.7636558012245415,
960
+ "kl": 0.0037994384765625,
961
+ "learning_rate": 9.600674549322716e-07,
962
+ "loss": 0.2518,
963
+ "reward": 1.0687873363494873,
964
+ "reward_std": 0.8015285730361938,
965
+ "rewards/": 3.534412384033203,
966
+ "rewards/math_compute_score": 0.4523809552192688,
967
+ "step": 65
968
+ },
969
+ {
970
+ "clip_ratio": 0.0,
971
+ "completion_length": 991.5238037109375,
972
+ "epoch": 0.1575178997613365,
973
+ "grad_norm": 0.24283471181996055,
974
+ "kl": 0.00176239013671875,
975
+ "learning_rate": 9.585386075621552e-07,
976
+ "loss": 0.1276,
977
+ "reward": 1.4505395889282227,
978
+ "reward_std": 0.3715428113937378,
979
+ "rewards/": 5.157459259033203,
980
+ "rewards/math_compute_score": 0.523809552192688,
981
+ "step": 66
982
+ },
983
+ {
984
+ "clip_ratio": 0.0,
985
+ "completion_length": 666.2857055664062,
986
+ "epoch": 0.15990453460620524,
987
+ "grad_norm": 0.32163982732892316,
988
+ "kl": 0.003021240234375,
989
+ "learning_rate": 9.569823051938689e-07,
990
+ "loss": 0.3253,
991
+ "reward": 1.4773437976837158,
992
+ "reward_std": 0.6230893731117249,
993
+ "rewards/": 4.339099884033203,
994
+ "rewards/math_compute_score": 0.761904776096344,
995
+ "step": 67
996
+ },
997
+ {
998
+ "clip_ratio": 0.0,
999
+ "completion_length": 603.547607421875,
1000
+ "epoch": 0.162291169451074,
1001
+ "grad_norm": 0.5401807083101557,
1002
+ "kl": 0.00982666015625,
1003
+ "learning_rate": 9.553986410110134e-07,
1004
+ "loss": 0.4025,
1005
+ "reward": 0.9949312210083008,
1006
+ "reward_std": 0.4883081316947937,
1007
+ "rewards/": 3.641322612762451,
1008
+ "rewards/math_compute_score": 0.3333333432674408,
1009
+ "step": 68
1010
+ },
1011
+ {
1012
+ "clip_ratio": 0.0,
1013
+ "completion_length": 857.6190795898438,
1014
+ "epoch": 0.16467780429594273,
1015
+ "grad_norm": 0.2558130935281063,
1016
+ "kl": 0.00057220458984375,
1017
+ "learning_rate": 9.537877098354784e-07,
1018
+ "loss": -0.0058,
1019
+ "reward": 1.5268882513046265,
1020
+ "reward_std": 0.5736187696456909,
1021
+ "rewards/": 5.34872579574585,
1022
+ "rewards/math_compute_score": 0.5714285969734192,
1023
+ "step": 69
1024
+ },
1025
+ {
1026
+ "epoch": 0.16706443914081145,
1027
+ "grad_norm": 0.27521723291274197,
1028
+ "learning_rate": 9.52149608121765e-07,
1029
+ "loss": 0.0567,
1030
+ "step": 70
1031
+ },
1032
+ {
1033
+ "epoch": 0.16706443914081145,
1034
+ "eval_clip_ratio": 0.0,
1035
+ "eval_completion_length": 882.9702453613281,
1036
+ "eval_kl": 0.03376007080078125,
1037
+ "eval_loss": 0.26883435249328613,
1038
+ "eval_reward": 1.151841551065445,
1039
+ "eval_reward_std": 0.8478920459747314,
1040
+ "eval_rewards/": 4.473493456840515,
1041
+ "eval_rewards/math_compute_score": 0.3214285783469677,
1042
+ "eval_runtime": 78.4604,
1043
+ "eval_samples_per_second": 0.268,
1044
+ "eval_steps_per_second": 0.013,
1045
+ "step": 70
1046
+ },
1047
+ {
1048
+ "clip_ratio": 0.0,
1049
+ "completion_length": 712.1309509277344,
1050
+ "epoch": 0.16945107398568018,
1051
+ "grad_norm": 0.286488510896718,
1052
+ "kl": 0.00177001953125,
1053
+ "learning_rate": 9.504844339512094e-07,
1054
+ "loss": 0.2365,
1055
+ "reward": 1.5305490493774414,
1056
+ "reward_std": 0.5794989466667175,
1057
+ "rewards/": 5.12893533706665,
1058
+ "rewards/math_compute_score": 0.6309524029493332,
1059
+ "step": 71
1060
+ },
1061
+ {
1062
+ "clip_ratio": 0.0,
1063
+ "completion_length": 807.7857055664062,
1064
+ "epoch": 0.1718377088305489,
1065
+ "grad_norm": 0.3110498028785089,
1066
+ "kl": 0.0013275146484375,
1067
+ "learning_rate": 9.487922870261121e-07,
1068
+ "loss": -0.0117,
1069
+ "reward": 1.6091006994247437,
1070
+ "reward_std": 0.5738370418548584,
1071
+ "rewards/": 5.188360214233398,
1072
+ "rewards/math_compute_score": 0.7142857313156128,
1073
+ "step": 72
1074
+ },
1075
+ {
1076
+ "clip_ratio": 0.0,
1077
+ "completion_length": 771.6666870117188,
1078
+ "epoch": 0.17422434367541767,
1079
+ "grad_norm": 0.28968592215813294,
1080
+ "kl": 0.0019989013671875,
1081
+ "learning_rate": 9.470732686637664e-07,
1082
+ "loss": 0.0653,
1083
+ "reward": 1.406017541885376,
1084
+ "reward_std": 0.7840931415557861,
1085
+ "rewards/": 5.220563888549805,
1086
+ "rewards/math_compute_score": 0.4523809552192688,
1087
+ "step": 73
1088
+ },
1089
+ {
1090
+ "clip_ratio": 0.0,
1091
+ "completion_length": 776.0238037109375,
1092
+ "epoch": 0.1766109785202864,
1093
+ "grad_norm": 0.31133583857342856,
1094
+ "kl": 0.0069580078125,
1095
+ "learning_rate": 9.45327481790393e-07,
1096
+ "loss": -0.0325,
1097
+ "reward": 1.483282208442688,
1098
+ "reward_std": 0.7019442915916443,
1099
+ "rewards/": 5.321172714233398,
1100
+ "rewards/math_compute_score": 0.523809552192688,
1101
+ "step": 74
1102
+ },
1103
+ {
1104
+ "clip_ratio": 0.0,
1105
+ "completion_length": 764.5,
1106
+ "epoch": 0.17899761336515513,
1107
+ "grad_norm": 0.36739420438573467,
1108
+ "kl": 0.004913330078125,
1109
+ "learning_rate": 9.435550309349776e-07,
1110
+ "loss": -0.025,
1111
+ "reward": 1.2999628782272339,
1112
+ "reward_std": 0.7012795805931091,
1113
+ "rewards/": 5.26171875,
1114
+ "rewards/math_compute_score": 0.3095238208770752,
1115
+ "step": 75
1116
+ },
1117
+ {
1118
+ "clip_ratio": 0.0,
1119
+ "completion_length": 666.9761962890625,
1120
+ "epoch": 0.18138424821002386,
1121
+ "grad_norm": 0.33428186789934067,
1122
+ "kl": 0.0022125244140625,
1123
+ "learning_rate": 9.417560222230114e-07,
1124
+ "loss": 0.0816,
1125
+ "reward": 1.252938985824585,
1126
+ "reward_std": 0.6224194765090942,
1127
+ "rewards/": 4.07421875,
1128
+ "rewards/math_compute_score": 0.5476190447807312,
1129
+ "step": 76
1130
+ },
1131
+ {
1132
+ "clip_ratio": 0.0,
1133
+ "completion_length": 703.4285888671875,
1134
+ "epoch": 0.18377088305489261,
1135
+ "grad_norm": 0.3021119658752094,
1136
+ "kl": 0.0036773681640625,
1137
+ "learning_rate": 9.399305633701372e-07,
1138
+ "loss": 0.0133,
1139
+ "reward": 1.5353423357009888,
1140
+ "reward_std": 0.5898163318634033,
1141
+ "rewards/": 5.390996932983398,
1142
+ "rewards/math_compute_score": 0.5714285969734192,
1143
+ "step": 77
1144
+ },
1145
+ {
1146
+ "clip_ratio": 0.0,
1147
+ "completion_length": 743.4761962890625,
1148
+ "epoch": 0.18615751789976134,
1149
+ "grad_norm": 0.4512916551501506,
1150
+ "kl": 0.003326416015625,
1151
+ "learning_rate": 9.380787636757e-07,
1152
+ "loss": 0.3175,
1153
+ "reward": 1.4912109375,
1154
+ "reward_std": 0.6637881994247437,
1155
+ "rewards/": 5.360816478729248,
1156
+ "rewards/math_compute_score": 0.523809552192688,
1157
+ "step": 78
1158
+ },
1159
+ {
1160
+ "clip_ratio": 0.0,
1161
+ "completion_length": 760.452392578125,
1162
+ "epoch": 0.18854415274463007,
1163
+ "grad_norm": 0.3498194905087342,
1164
+ "kl": 0.00142669677734375,
1165
+ "learning_rate": 9.362007340162028e-07,
1166
+ "loss": 0.0411,
1167
+ "reward": 1.5506510734558105,
1168
+ "reward_std": 0.3708002269268036,
1169
+ "rewards/": 5.277064800262451,
1170
+ "rewards/math_compute_score": 0.6190476417541504,
1171
+ "step": 79
1172
+ },
1173
+ {
1174
+ "epoch": 0.1909307875894988,
1175
+ "grad_norm": 0.23488028047184029,
1176
+ "learning_rate": 9.342965868386673e-07,
1177
+ "loss": -0.0472,
1178
+ "step": 80
1179
+ },
1180
+ {
1181
+ "epoch": 0.1909307875894988,
1182
+ "eval_clip_ratio": 0.0,
1183
+ "eval_completion_length": 915.2738342285156,
1184
+ "eval_kl": 0.02166748046875,
1185
+ "eval_loss": 0.17802654206752777,
1186
+ "eval_reward": 1.2084915190935135,
1187
+ "eval_reward_std": 0.7365763932466507,
1188
+ "eval_rewards/": 4.542457342147827,
1189
+ "eval_rewards/math_compute_score": 0.3750000111758709,
1190
+ "eval_runtime": 78.6127,
1191
+ "eval_samples_per_second": 0.267,
1192
+ "eval_steps_per_second": 0.013,
1193
+ "step": 80
1194
+ }
1195
+ ],
1196
+ "logging_steps": 1.0,
1197
+ "max_steps": 419,
1198
+ "num_input_tokens_seen": 0,
1199
+ "num_train_epochs": 1,
1200
+ "save_steps": 40,
1201
+ "stateful_callbacks": {
1202
+ "TrainerControl": {
1203
+ "args": {
1204
+ "should_epoch_stop": false,
1205
+ "should_evaluate": false,
1206
+ "should_log": false,
1207
+ "should_save": true,
1208
+ "should_training_stop": false
1209
+ },
1210
+ "attributes": {}
1211
+ }
1212
+ },
1213
+ "total_flos": 0.0,
1214
+ "train_batch_size": 14,
1215
+ "trial_name": null,
1216
+ "trial_params": null
1217
+ }
checkpoint-80/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81c306dec7083e745ef0f5a965590ea0252ced299afbec0312a610320738a941
3
+ size 8312
checkpoint-80/vocab.json ADDED
The diff for this file is too large to render. See raw diff