Harryllh commited on Dec 15, 2025

Commit

372bae8

verified ·

1 Parent(s): bcaccb2

Upload folder using huggingface_hub

Browse files

Files changed (22) hide show

.gitattributes +1 -0
added_tokens.json +24 -0
chat_template.jinja +54 -0
config.json +66 -0
generation_config.json +13 -0
merges.txt +0 -0
model-00001-of-00003.safetensors +3 -0
model-00002-of-00003.safetensors +3 -0
model-00003-of-00003.safetensors +3 -0
model.safetensors.index.json +442 -0
optimizer.pt +3 -0
rng_state_0.pth +3 -0
rng_state_1.pth +3 -0
rng_state_2.pth +3 -0
rng_state_3.pth +3 -0
scheduler.pt +3 -0
special_tokens_map.json +31 -0
tokenizer.json +3 -0
tokenizer_config.json +207 -0
trainer_state.json +2434 -0
training_args.bin +3 -0
vocab.json +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

added_tokens.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "</tool_call>": 151658,
+  "<tool_call>": 151657,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,54 @@

+{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0]['role'] == 'system' %}
+        {{- messages[0]['content'] }}
+    {%- else %}
+        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
+    {%- endif %}
+    {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0]['role'] == 'system' %}
+        {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
+    {%- else %}
+        {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- for message in messages %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
+        {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {{- '<|im_start|>' + message.role }}
+        {%- if message.content %}
+            {{- '\n' + message.content }}
+        {%- endif %}
+        {%- for tool_call in message.tool_calls %}
+            {%- if tool_call.function is defined %}
+                {%- set tool_call = tool_call.function %}
+            {%- endif %}
+            {{- '\n<tool_call>\n{"name": "' }}
+            {{- tool_call.name }}
+            {{- '", "arguments": ' }}
+            {{- tool_call.arguments | tojson }}
+            {{- '}\n</tool_call>' }}
+        {%- endfor %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- message.content }}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+{%- endif %}

config.json ADDED Viewed

	@@ -0,0 +1,66 @@

+{
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "dtype": "float32",
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 11008,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 36,
+  "model_type": "qwen2",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 2,
+  "pad_token_id": 151643,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "transformers_version": "4.57.3",
+  "use_cache": false,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "do_sample": true,
+  "eos_token_id": [
+    151645,
+    151643
+  ],
+  "pad_token_id": 151643,
+  "repetition_penalty": 1.05,
+  "temperature": 0.7,
+  "top_k": 20,
+  "top_p": 0.8,
+  "transformers_version": "4.57.3"
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model-00001-of-00003.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4388d2c83fdd136cded35208f788b099ef385816dadd5f0e22fced44b716f167
+size 4982131536

model-00002-of-00003.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8743e1a3cd3d3394edf2ae720067f80e2db18a9832a06258e30192d40684a95c
+size 4932949336

model-00003-of-00003.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:13aba45ec781b7d36080055f6359c9320c8ab316fb7d389fed900572b4caf45e
+size 2428723160

model.safetensors.index.json ADDED Viewed

	@@ -0,0 +1,442 @@

+{
+  "metadata": {
+    "total_parameters": 3085938688,
+    "total_size": 12343754752
+  },
+  "weight_map": {
+    "model.embed_tokens.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.10.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.10.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.10.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.10.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.10.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.11.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.11.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.11.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.11.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.11.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.11.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.12.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.12.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.12.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.22.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.22.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.22.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.22.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.22.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.22.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.23.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.23.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.23.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.23.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.23.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.23.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.24.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.24.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.24.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.24.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.24.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.24.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.25.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.25.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.25.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.25.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.25.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.25.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.26.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.26.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.26.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.26.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.26.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.26.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.27.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.27.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.27.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.27.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.27.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.27.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.28.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.28.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.28.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.28.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.28.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.28.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
+    "model.layers.28.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.29.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
+    "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
+    "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
+    "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.30.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
+    "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
+    "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
+    "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
+    "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
+    "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
+    "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.32.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.32.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.32.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.32.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.32.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.32.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
+    "model.layers.32.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.32.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.32.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
+    "model.layers.32.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.32.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
+    "model.layers.32.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.33.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.33.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.33.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.33.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.33.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.33.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
+    "model.layers.33.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.33.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.33.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
+    "model.layers.33.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.33.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
+    "model.layers.33.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.34.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.34.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.34.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.34.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.34.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.34.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
+    "model.layers.34.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.34.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.34.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
+    "model.layers.34.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.34.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
+    "model.layers.34.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.35.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.35.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.35.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.35.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.35.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.35.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
+    "model.layers.35.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.35.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.35.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
+    "model.layers.35.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.35.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
+    "model.layers.35.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.norm.weight": "model-00003-of-00003.safetensors"
+  }
+}

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d8616a1a46a7e22cf1620ea6e748509d2b9aff5219ee036b8556bb2e377193c5
+size 24687895753

rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:751b65f70f1960660c63c6c89a9653bc88a0ec9f8c868668116fec9c93cae8c8
+size 15365

rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:13706c3f5531d0b4e0c1fd53c66ed860a95726fb2e617054e39eee36ad48d66c
+size 15429

rng_state_2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ecb595ae8f86f1c7e192bd456a524197108a8dda218801b4909b6e5b838e6b1c
+size 15429

rng_state_3.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:55e706f0324fcf807872d1b9128ff2aecfaa6f18197b081096865474a3b75310
+size 15429

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:58f8b6881a432bd2506b4c142d6f9b7b6b337d2665d39a90b132dcb1c80cdc27
+size 1465

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
+size 11421896

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,207 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 32768,
+  "pad_token": "<|endoftext|>",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,2434 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.4,
+  "eval_steps": 500,
+  "global_step": 100,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 434.0,
+      "completions/max_terminated_length": 434.0,
+      "completions/mean_length": 293.75,
+      "completions/mean_terminated_length": 335.7142857142857,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 184.0,
+      "epoch": 0.004,
+      "format_failures": 0.0,
+      "grad_norm": 0.5197089910507202,
+      "kl": 0.0,
+      "learning_rate": 0.0,
+      "loss": 0.0278,
+      "num_tokens": 9800.0,
+      "reward": 0.3660714328289032,
+      "reward_std": 0.36236491799354553,
+      "step": 1
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 278.0,
+      "completions/max_terminated_length": 278.0,
+      "completions/mean_length": 134.875,
+      "completions/mean_terminated_length": 154.14285714285714,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 51.0,
+      "epoch": 0.008,
+      "format_failures": 0.0,
+      "grad_norm": 1.8656461238861084,
+      "kl": 0.0,
+      "learning_rate": 1e-06,
+      "loss": 0.1584,
+      "num_tokens": 19920.0,
+      "reward": 0.34375,
+      "reward_std": 0.48065245151519775,
+      "step": 2
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 261.0,
+      "completions/max_terminated_length": 261.0,
+      "completions/mean_length": 176.625,
+      "completions/mean_terminated_length": 201.85714285714286,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 138.0,
+      "epoch": 0.012,
+      "format_failures": 0.0,
+      "grad_norm": 7.7805867195129395,
+      "kl": 1.0173164680600166,
+      "learning_rate": 1e-06,
+      "loss": 0.0063,
+      "num_tokens": 28896.0,
+      "reward": 0.0,
+      "reward_std": 0.0,
+      "step": 3
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 332.0,
+      "completions/max_terminated_length": 332.0,
+      "completions/mean_length": 216.625,
+      "completions/mean_terminated_length": 247.57142857142858,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 190.0,
+      "epoch": 0.016,
+      "format_failures": 0.0,
+      "grad_norm": 0.34460729360580444,
+      "kl": 0.005293647991493344,
+      "learning_rate": 1e-06,
+      "loss": 0.0149,
+      "num_tokens": 35688.0,
+      "reward": 0.316850483417511,
+      "reward_std": 0.19629573822021484,
+      "step": 4
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 141.0,
+      "completions/max_terminated_length": 141.0,
+      "completions/mean_length": 107.75,
+      "completions/mean_terminated_length": 123.14285714285714,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 109.0,
+      "epoch": 0.02,
+      "format_failures": 0.0,
+      "grad_norm": 1.950016975402832,
+      "kl": 0.19140876829624176,
+      "learning_rate": 1e-06,
+      "loss": -0.0265,
+      "num_tokens": 44320.0,
+      "reward": 0.25,
+      "reward_std": 0.4629100561141968,
+      "step": 5
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 480.0,
+      "completions/max_terminated_length": 480.0,
+      "completions/mean_length": 347.375,
+      "completions/mean_terminated_length": 397.0,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 316.0,
+      "epoch": 0.024,
+      "format_failures": 0.0,
+      "grad_norm": 0.27606070041656494,
+      "kl": 0.004609360825270414,
+      "learning_rate": 1e-06,
+      "loss": 0.019,
+      "num_tokens": 55480.0,
+      "reward": 0.20555555820465088,
+      "reward_std": 0.22662308812141418,
+      "step": 6
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 98.0,
+      "completions/max_terminated_length": 98.0,
+      "completions/mean_length": 54.75,
+      "completions/mean_terminated_length": 62.57142857142857,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 36.0,
+      "epoch": 0.028,
+      "format_failures": 0.0,
+      "grad_norm": 1.512669563293457,
+      "kl": 0.0004560185334412381,
+      "learning_rate": 1e-06,
+      "loss": 0.1926,
+      "num_tokens": 76568.0,
+      "reward": 0.0416666679084301,
+      "reward_std": 0.1178511381149292,
+      "step": 7
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 380.0,
+      "completions/max_terminated_length": 380.0,
+      "completions/mean_length": 189.75,
+      "completions/mean_terminated_length": 216.85714285714286,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 94.0,
+      "epoch": 0.032,
+      "format_failures": 0.0,
+      "grad_norm": 1.6258090734481812,
+      "kl": 0.133640818297863,
+      "learning_rate": 1e-06,
+      "loss": 0.0094,
+      "num_tokens": 88120.0,
+      "reward": 0.05000000074505806,
+      "reward_std": 0.1414213478565216,
+      "step": 8
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 1412.0,
+      "completions/max_terminated_length": 1412.0,
+      "completions/mean_length": 426.125,
+      "completions/mean_terminated_length": 487.0,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 218.0,
+      "epoch": 0.036,
+      "format_failures": 1.0,
+      "grad_norm": 0.3745494782924652,
+      "kl": 0.0010488361003808677,
+      "learning_rate": 1e-06,
+      "loss": -0.1003,
+      "num_tokens": 110584.0,
+      "reward": 0.05859375,
+      "reward_std": 0.1657281517982483,
+      "step": 9
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 62.0,
+      "completions/max_terminated_length": 62.0,
+      "completions/mean_length": 41.25,
+      "completions/mean_terminated_length": 47.142857142857146,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 35.0,
+      "epoch": 0.04,
+      "format_failures": 0.0,
+      "grad_norm": 6.635150909423828,
+      "kl": 1.000607669353485,
+      "learning_rate": 1e-06,
+      "loss": -0.0558,
+      "num_tokens": 115888.0,
+      "reward": 0.125,
+      "reward_std": 0.3535533845424652,
+      "step": 10
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.375,
+      "completions/max_length": 126.0,
+      "completions/max_terminated_length": 126.0,
+      "completions/mean_length": 60.25,
+      "completions/mean_terminated_length": 96.4,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 62.0,
+      "epoch": 0.044,
+      "format_failures": 0.0,
+      "grad_norm": 5.5436906814575195,
+      "kl": 0.534478023648262,
+      "learning_rate": 1e-06,
+      "loss": -0.1301,
+      "num_tokens": 123984.0,
+      "reward": 0.375,
+      "reward_std": 0.5175491571426392,
+      "step": 11
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.25,
+      "completions/max_length": 2047.0,
+      "completions/max_terminated_length": 2047.0,
+      "completions/mean_length": 702.625,
+      "completions/mean_terminated_length": 936.8333333333334,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 341.0,
+      "epoch": 0.048,
+      "format_failures": 0.0,
+      "grad_norm": 0.34704723954200745,
+      "kl": 0.0009783765999600291,
+      "learning_rate": 1e-06,
+      "loss": 0.0431,
+      "num_tokens": 146192.0,
+      "reward": 0.38749998807907104,
+      "reward_std": 0.4181165099143982,
+      "step": 12
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 122.0,
+      "completions/max_terminated_length": 122.0,
+      "completions/mean_length": 40.375,
+      "completions/mean_terminated_length": 46.142857142857146,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 20.0,
+      "epoch": 0.052,
+      "format_failures": 0.0,
+      "grad_norm": 0.004240340553224087,
+      "kl": 0.004628603579476476,
+      "learning_rate": 1e-06,
+      "loss": 0.0,
+      "num_tokens": 166896.0,
+      "reward": 0.0,
+      "reward_std": 0.0,
+      "step": 13
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 973.0,
+      "completions/max_terminated_length": 973.0,
+      "completions/mean_length": 452.5,
+      "completions/mean_terminated_length": 517.1428571428571,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 275.0,
+      "epoch": 0.056,
+      "format_failures": 0.0,
+      "grad_norm": 0.18779706954956055,
+      "kl": 0.0052806169260293245,
+      "learning_rate": 1e-06,
+      "loss": 0.0313,
+      "num_tokens": 185392.0,
+      "reward": 0.11513157933950424,
+      "reward_std": 0.16955535113811493,
+      "step": 14
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 304.0,
+      "completions/max_terminated_length": 304.0,
+      "completions/mean_length": 202.0,
+      "completions/mean_terminated_length": 230.85714285714286,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 113.0,
+      "epoch": 0.06,
+      "format_failures": 0.0,
+      "grad_norm": 0.6387383341789246,
+      "kl": 0.02643415331840515,
+      "learning_rate": 1e-06,
+      "loss": 0.0717,
+      "num_tokens": 193056.0,
+      "reward": 0.53125,
+      "reward_std": 0.31045761704444885,
+      "step": 15
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 203.0,
+      "completions/max_terminated_length": 203.0,
+      "completions/mean_length": 151.25,
+      "completions/mean_terminated_length": 172.85714285714286,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 160.0,
+      "epoch": 0.064,
+      "format_failures": 0.0,
+      "grad_norm": 0.2569343149662018,
+      "kl": 0.09986447170376778,
+      "learning_rate": 1e-06,
+      "loss": 0.0006,
+      "num_tokens": 201256.0,
+      "reward": 0.0,
+      "reward_std": 0.0,
+      "step": 16
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 295.0,
+      "completions/max_terminated_length": 295.0,
+      "completions/mean_length": 192.0,
+      "completions/mean_terminated_length": 219.42857142857142,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 22.0,
+      "epoch": 0.068,
+      "format_failures": 1.0,
+      "grad_norm": 0.04395958036184311,
+      "kl": 0.027548893354833126,
+      "learning_rate": 1e-06,
+      "loss": 0.0001,
+      "num_tokens": 209920.0,
+      "reward": 0.0,
+      "reward_std": 0.0,
+      "step": 17
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.5,
+      "completions/max_length": 44.0,
+      "completions/max_terminated_length": 44.0,
+      "completions/mean_length": 20.125,
+      "completions/mean_terminated_length": 40.25,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 39.0,
+      "epoch": 0.072,
+      "format_failures": 0.0,
+      "grad_norm": 0.16681237518787384,
+      "kl": 0.03394318092614412,
+      "learning_rate": 1e-06,
+      "loss": 0.0009,
+      "num_tokens": 214144.0,
+      "reward": 0.0,
+      "reward_std": 0.0,
+      "step": 18
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 527.0,
+      "completions/max_terminated_length": 527.0,
+      "completions/mean_length": 215.75,
+      "completions/mean_terminated_length": 246.57142857142858,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 18.0,
+      "epoch": 0.076,
+      "format_failures": 0.0,
+      "grad_norm": 0.5867045521736145,
+      "kl": 0.00954199954867363,
+      "learning_rate": 1e-06,
+      "loss": -0.2047,
+      "num_tokens": 234096.0,
+      "reward": 0.1666666716337204,
+      "reward_std": 0.35634833574295044,
+      "step": 19
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 169.0,
+      "completions/max_terminated_length": 169.0,
+      "completions/mean_length": 91.75,
+      "completions/mean_terminated_length": 104.85714285714286,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 66.0,
+      "epoch": 0.08,
+      "format_failures": 0.0,
+      "grad_norm": 2.331188917160034,
+      "kl": 0.05314544588327408,
+      "learning_rate": 1e-06,
+      "loss": 0.048,
+      "num_tokens": 243464.0,
+      "reward": 0.21875,
+      "reward_std": 0.36443448066711426,
+      "step": 20
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 130.0,
+      "completions/max_terminated_length": 130.0,
+      "completions/mean_length": 81.25,
+      "completions/mean_terminated_length": 92.85714285714286,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 57.0,
+      "epoch": 0.084,
+      "format_failures": 0.0,
+      "grad_norm": 1.2006300687789917,
+      "kl": 0.07363329455256462,
+      "learning_rate": 1e-06,
+      "loss": 0.0094,
+      "num_tokens": 250720.0,
+      "reward": 0.21875,
+      "reward_std": 0.33905068039894104,
+      "step": 21
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 197.0,
+      "completions/max_terminated_length": 197.0,
+      "completions/mean_length": 82.0,
+      "completions/mean_terminated_length": 93.71428571428571,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 27.0,
+      "epoch": 0.088,
+      "format_failures": 0.0,
+      "grad_norm": 1.3736180067062378,
+      "kl": 0.04446508176624775,
+      "learning_rate": 1e-06,
+      "loss": -0.0541,
+      "num_tokens": 257944.0,
+      "reward": 0.0535714291036129,
+      "reward_std": 0.15152288973331451,
+      "step": 22
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 544.0,
+      "completions/max_terminated_length": 544.0,
+      "completions/mean_length": 242.75,
+      "completions/mean_terminated_length": 277.42857142857144,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 38.0,
+      "epoch": 0.092,
+      "format_failures": 0.0,
+      "grad_norm": 0.9332400560379028,
+      "kl": 0.026759919710457325,
+      "learning_rate": 1e-06,
+      "loss": -0.0979,
+      "num_tokens": 270512.0,
+      "reward": 0.17383432388305664,
+      "reward_std": 0.5423066020011902,
+      "step": 23
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 334.0,
+      "completions/max_terminated_length": 334.0,
+      "completions/mean_length": 193.875,
+      "completions/mean_terminated_length": 221.57142857142858,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 114.0,
+      "epoch": 0.096,
+      "format_failures": 0.0,
+      "grad_norm": 0.5741273164749146,
+      "kl": 0.061491173692047596,
+      "learning_rate": 1e-06,
+      "loss": 0.0724,
+      "num_tokens": 279544.0,
+      "reward": 0.3214285969734192,
+      "reward_std": 0.3162277638912201,
+      "step": 24
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 191.0,
+      "completions/max_terminated_length": 191.0,
+      "completions/mean_length": 131.625,
+      "completions/mean_terminated_length": 150.42857142857142,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 109.0,
+      "epoch": 0.1,
+      "format_failures": 0.0,
+      "grad_norm": 0.8438379168510437,
+      "kl": 0.10757053177803755,
+      "learning_rate": 1e-06,
+      "loss": -0.0168,
+      "num_tokens": 285872.0,
+      "reward": 0.3083333373069763,
+      "reward_std": 0.3443548381328583,
+      "step": 25
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 345.0,
+      "completions/max_terminated_length": 345.0,
+      "completions/mean_length": 224.0,
+      "completions/mean_terminated_length": 256.0,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 151.0,
+      "epoch": 0.104,
+      "format_failures": 0.0,
+      "grad_norm": 0.6450461149215698,
+      "kl": 0.04460714943706989,
+      "learning_rate": 1e-06,
+      "loss": 0.0276,
+      "num_tokens": 293816.0,
+      "reward": 0.3494505286216736,
+      "reward_std": 0.3268265724182129,
+      "step": 26
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 210.0,
+      "completions/max_terminated_length": 210.0,
+      "completions/mean_length": 110.375,
+      "completions/mean_terminated_length": 126.14285714285714,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 68.0,
+      "epoch": 0.108,
+      "format_failures": 0.0,
+      "grad_norm": 0.17123964428901672,
+      "kl": 0.09914526715874672,
+      "learning_rate": 1e-06,
+      "loss": 0.0006,
+      "num_tokens": 300160.0,
+      "reward": 0.0,
+      "reward_std": 0.0,
+      "step": 27
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 128.0,
+      "completions/max_terminated_length": 128.0,
+      "completions/mean_length": 82.5,
+      "completions/mean_terminated_length": 94.28571428571429,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 86.0,
+      "epoch": 0.112,
+      "format_failures": 0.0,
+      "grad_norm": 0.9953401684761047,
+      "kl": 0.18897472321987152,
+      "learning_rate": 1e-06,
+      "loss": 0.002,
+      "num_tokens": 307720.0,
+      "reward": 0.0,
+      "reward_std": 0.0,
+      "step": 28
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 335.0,
+      "completions/max_terminated_length": 335.0,
+      "completions/mean_length": 229.375,
+      "completions/mean_terminated_length": 262.14285714285717,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 187.0,
+      "epoch": 0.116,
+      "format_failures": 0.0,
+      "grad_norm": 2.1179044246673584,
+      "kl": 0.013377793598920107,
+      "learning_rate": 1e-06,
+      "loss": 0.3156,
+      "num_tokens": 328920.0,
+      "reward": 0.3519230782985687,
+      "reward_std": 0.3794543743133545,
+      "step": 29
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 184.0,
+      "completions/max_terminated_length": 184.0,
+      "completions/mean_length": 131.375,
+      "completions/mean_terminated_length": 150.14285714285714,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 14.0,
+      "epoch": 0.12,
+      "format_failures": 0.0,
+      "grad_norm": 1.2885483503341675,
+      "kl": 0.009146903175860643,
+      "learning_rate": 1e-06,
+      "loss": -0.0387,
+      "num_tokens": 335880.0,
+      "reward": 0.25,
+      "reward_std": 0.4629100561141968,
+      "step": 30
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 1936.0,
+      "completions/max_terminated_length": 1936.0,
+      "completions/mean_length": 410.0,
+      "completions/mean_terminated_length": 468.57142857142856,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 15.0,
+      "epoch": 0.124,
+      "format_failures": 1.0,
+      "grad_norm": 1.5897152423858643,
+      "kl": 0.06828754395246506,
+      "learning_rate": 1e-06,
+      "loss": 0.0215,
+      "num_tokens": 358104.0,
+      "reward": 0.45494991540908813,
+      "reward_std": 0.48848965764045715,
+      "step": 31
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 366.0,
+      "completions/max_terminated_length": 366.0,
+      "completions/mean_length": 202.375,
+      "completions/mean_terminated_length": 231.28571428571428,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 122.0,
+      "epoch": 0.128,
+      "format_failures": 0.0,
+      "grad_norm": 0.8364682793617249,
+      "kl": 0.12048156931996346,
+      "learning_rate": 1e-06,
+      "loss": 0.0898,
+      "num_tokens": 365656.0,
+      "reward": 0.4521104097366333,
+      "reward_std": 0.2924821972846985,
+      "step": 32
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 68.0,
+      "completions/max_terminated_length": 68.0,
+      "completions/mean_length": 48.875,
+      "completions/mean_terminated_length": 55.857142857142854,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 46.0,
+      "epoch": 0.132,
+      "format_failures": 0.0,
+      "grad_norm": 1.7178492546081543,
+      "kl": 0.13572826609015465,
+      "learning_rate": 1e-06,
+      "loss": -0.0249,
+      "num_tokens": 371392.0,
+      "reward": 0.125,
+      "reward_std": 0.3535533845424652,
+      "step": 33
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 435.0,
+      "completions/max_terminated_length": 435.0,
+      "completions/mean_length": 293.5,
+      "completions/mean_terminated_length": 335.42857142857144,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 210.0,
+      "epoch": 0.136,
+      "format_failures": 1.0,
+      "grad_norm": 0.9806227087974548,
+      "kl": 0.012222900055348873,
+      "learning_rate": 1e-06,
+      "loss": 0.3233,
+      "num_tokens": 392240.0,
+      "reward": 0.47658732533454895,
+      "reward_std": 0.4081757962703705,
+      "step": 34
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 97.0,
+      "completions/max_terminated_length": 97.0,
+      "completions/mean_length": 64.875,
+      "completions/mean_terminated_length": 74.14285714285714,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 43.0,
+      "epoch": 0.14,
+      "format_failures": 0.0,
+      "grad_norm": 0.8304542303085327,
+      "kl": 0.031799230724573135,
+      "learning_rate": 1e-06,
+      "loss": 0.0113,
+      "num_tokens": 396792.0,
+      "reward": 0.6166666746139526,
+      "reward_std": 0.31773003935813904,
+      "step": 35
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 265.0,
+      "completions/max_terminated_length": 265.0,
+      "completions/mean_length": 114.25,
+      "completions/mean_terminated_length": 130.57142857142858,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 39.0,
+      "epoch": 0.144,
+      "format_failures": 0.0,
+      "grad_norm": 1.793579339981079,
+      "kl": 0.6158746182918549,
+      "learning_rate": 1e-06,
+      "loss": 0.0043,
+      "num_tokens": 404472.0,
+      "reward": 0.0,
+      "reward_std": 0.0,
+      "step": 36
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 233.0,
+      "completions/max_terminated_length": 233.0,
+      "completions/mean_length": 169.75,
+      "completions/mean_terminated_length": 194.0,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 122.0,
+      "epoch": 0.148,
+      "format_failures": 0.0,
+      "grad_norm": 0.3936280906200409,
+      "kl": 0.04245052766054869,
+      "learning_rate": 1e-06,
+      "loss": -0.0153,
+      "num_tokens": 411600.0,
+      "reward": 0.5294643044471741,
+      "reward_std": 0.21430060267448425,
+      "step": 37
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 152.0,
+      "completions/max_terminated_length": 152.0,
+      "completions/mean_length": 74.625,
+      "completions/mean_terminated_length": 85.28571428571429,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 59.0,
+      "epoch": 0.152,
+      "format_failures": 0.0,
+      "grad_norm": 0.592628002166748,
+      "kl": 0.14406441897153854,
+      "learning_rate": 1e-06,
+      "loss": -0.0363,
+      "num_tokens": 417456.0,
+      "reward": 0.0555555559694767,
+      "reward_std": 0.11878278106451035,
+      "step": 38
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 726.0,
+      "completions/max_terminated_length": 726.0,
+      "completions/mean_length": 330.25,
+      "completions/mean_terminated_length": 377.42857142857144,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 196.0,
+      "epoch": 0.156,
+      "format_failures": 0.0,
+      "grad_norm": 0.7340777516365051,
+      "kl": 0.02144559659063816,
+      "learning_rate": 1e-06,
+      "loss": 0.0557,
+      "num_tokens": 439208.0,
+      "reward": 0.10000000149011612,
+      "reward_std": 0.2828426957130432,
+      "step": 39
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 625.0,
+      "completions/max_terminated_length": 625.0,
+      "completions/mean_length": 336.0,
+      "completions/mean_terminated_length": 384.0,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 175.0,
+      "epoch": 0.16,
+      "format_failures": 0.0,
+      "grad_norm": 0.32950443029403687,
+      "kl": 0.018678720109164715,
+      "learning_rate": 1e-06,
+      "loss": 0.1579,
+      "num_tokens": 464616.0,
+      "reward": 0.68376624584198,
+      "reward_std": 0.16028425097465515,
+      "step": 40
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 91.0,
+      "completions/max_terminated_length": 91.0,
+      "completions/mean_length": 53.75,
+      "completions/mean_terminated_length": 61.42857142857143,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 39.0,
+      "epoch": 0.164,
+      "format_failures": 0.0,
+      "grad_norm": 15.617924690246582,
+      "kl": 2.1802964210510254,
+      "learning_rate": 1e-06,
+      "loss": -0.1623,
+      "num_tokens": 473272.0,
+      "reward": 0.4464285671710968,
+      "reward_std": 0.49744242429733276,
+      "step": 41
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 74.0,
+      "completions/max_terminated_length": 74.0,
+      "completions/mean_length": 62.625,
+      "completions/mean_terminated_length": 71.57142857142857,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 69.0,
+      "epoch": 0.168,
+      "format_failures": 0.0,
+      "grad_norm": 0.5167672634124756,
+      "kl": 0.192179337143898,
+      "learning_rate": 1e-06,
+      "loss": 0.0018,
+      "num_tokens": 477896.0,
+      "reward": 0.0,
+      "reward_std": 0.0,
+      "step": 42
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 187.0,
+      "completions/max_terminated_length": 187.0,
+      "completions/mean_length": 124.625,
+      "completions/mean_terminated_length": 142.42857142857142,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 57.0,
+      "epoch": 0.172,
+      "format_failures": 1.0,
+      "grad_norm": 1.7434178590774536,
+      "kl": 0.43839313089847565,
+      "learning_rate": 1e-06,
+      "loss": -0.0081,
+      "num_tokens": 485584.0,
+      "reward": 0.1041666716337204,
+      "reward_std": 0.19795581698417664,
+      "step": 43
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.5,
+      "completions/max_length": 53.0,
+      "completions/max_terminated_length": 53.0,
+      "completions/mean_length": 21.5,
+      "completions/mean_terminated_length": 43.0,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 33.0,
+      "epoch": 0.176,
+      "format_failures": 0.0,
+      "grad_norm": 0.19118274748325348,
+      "kl": 0.021482082083821297,
+      "learning_rate": 1e-06,
+      "loss": 0.0007,
+      "num_tokens": 491072.0,
+      "reward": 0.0,
+      "reward_std": 0.0,
+      "step": 44
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 184.0,
+      "completions/max_terminated_length": 184.0,
+      "completions/mean_length": 101.375,
+      "completions/mean_terminated_length": 115.85714285714286,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 64.0,
+      "epoch": 0.18,
+      "format_failures": 0.0,
+      "grad_norm": 0.5414936542510986,
+      "kl": 0.23846322298049927,
+      "learning_rate": 1e-06,
+      "loss": 0.0026,
+      "num_tokens": 501048.0,
+      "reward": 0.0,
+      "reward_std": 0.0,
+      "step": 45
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 184.0,
+      "completions/max_terminated_length": 184.0,
+      "completions/mean_length": 105.25,
+      "completions/mean_terminated_length": 120.28571428571429,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 60.0,
+      "epoch": 0.184,
+      "format_failures": 0.0,
+      "grad_norm": 1.3124736547470093,
+      "kl": 0.02640421688556671,
+      "learning_rate": 1e-06,
+      "loss": 0.0418,
+      "num_tokens": 509688.0,
+      "reward": 0.3333333432674408,
+      "reward_std": 0.35634833574295044,
+      "step": 46
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 308.0,
+      "completions/max_terminated_length": 308.0,
+      "completions/mean_length": 222.625,
+      "completions/mean_terminated_length": 254.42857142857142,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 140.0,
+      "epoch": 0.188,
+      "format_failures": 0.0,
+      "grad_norm": 0.6642023324966431,
+      "kl": 0.038137754425406456,
+      "learning_rate": 1e-06,
+      "loss": -0.0281,
+      "num_tokens": 516136.0,
+      "reward": 0.5722222328186035,
+      "reward_std": 0.3752013146877289,
+      "step": 47
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 220.0,
+      "completions/max_terminated_length": 220.0,
+      "completions/mean_length": 139.0,
+      "completions/mean_terminated_length": 158.85714285714286,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 52.0,
+      "epoch": 0.192,
+      "format_failures": 0.0,
+      "grad_norm": 1.5801048278808594,
+      "kl": 0.31588232330977917,
+      "learning_rate": 1e-06,
+      "loss": -0.0356,
+      "num_tokens": 525216.0,
+      "reward": 0.16785714030265808,
+      "reward_std": 0.3453776240348816,
+      "step": 48
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 142.0,
+      "completions/max_terminated_length": 142.0,
+      "completions/mean_length": 103.0,
+      "completions/mean_terminated_length": 117.71428571428571,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 72.0,
+      "epoch": 0.196,
+      "format_failures": 0.0,
+      "grad_norm": 1.5228773355484009,
+      "kl": 0.3656068593263626,
+      "learning_rate": 1e-06,
+      "loss": -0.0299,
+      "num_tokens": 532920.0,
+      "reward": 0.0833333358168602,
+      "reward_std": 0.15430335700511932,
+      "step": 49
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 167.0,
+      "completions/max_terminated_length": 167.0,
+      "completions/mean_length": 58.625,
+      "completions/mean_terminated_length": 67.0,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 23.0,
+      "epoch": 0.2,
+      "format_failures": 0.0,
+      "grad_norm": 2.357253074645996,
+      "kl": 0.021084215957671404,
+      "learning_rate": 1e-06,
+      "loss": -0.1241,
+      "num_tokens": 539800.0,
+      "reward": 0.24715909361839294,
+      "reward_std": 0.3969031274318695,
+      "step": 50
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.375,
+      "completions/max_length": 79.0,
+      "completions/max_terminated_length": 79.0,
+      "completions/mean_length": 47.5,
+      "completions/mean_terminated_length": 76.0,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 74.0,
+      "epoch": 0.204,
+      "format_failures": 0.0,
+      "grad_norm": 3.9780025482177734,
+      "kl": 0.04299665614962578,
+      "learning_rate": 1e-06,
+      "loss": -0.0066,
+      "num_tokens": 547080.0,
+      "reward": 0.75,
+      "reward_std": 0.38832157850265503,
+      "step": 51
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 377.0,
+      "completions/max_terminated_length": 377.0,
+      "completions/mean_length": 245.0,
+      "completions/mean_terminated_length": 280.0,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 236.0,
+      "epoch": 0.208,
+      "format_failures": 0.0,
+      "grad_norm": 0.824322521686554,
+      "kl": 0.04343542829155922,
+      "learning_rate": 1e-06,
+      "loss": -0.394,
+      "num_tokens": 565368.0,
+      "reward": 0.3678571581840515,
+      "reward_std": 0.38505232334136963,
+      "step": 52
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 313.0,
+      "completions/max_terminated_length": 313.0,
+      "completions/mean_length": 223.5,
+      "completions/mean_terminated_length": 255.42857142857142,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 229.0,
+      "epoch": 0.212,
+      "format_failures": 0.0,
+      "grad_norm": 0.8966130018234253,
+      "kl": 0.022847690619528294,
+      "learning_rate": 1e-06,
+      "loss": 0.0523,
+      "num_tokens": 584552.0,
+      "reward": 0.09375,
+      "reward_std": 0.2651650309562683,
+      "step": 53
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 463.0,
+      "completions/max_terminated_length": 463.0,
+      "completions/mean_length": 301.75,
+      "completions/mean_terminated_length": 344.85714285714283,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 222.0,
+      "epoch": 0.216,
+      "format_failures": 0.0,
+      "grad_norm": 0.5948707461357117,
+      "kl": 0.0344517957419157,
+      "learning_rate": 1e-06,
+      "loss": -0.0372,
+      "num_tokens": 605144.0,
+      "reward": 0.3611606955528259,
+      "reward_std": 0.24707795679569244,
+      "step": 54
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 183.0,
+      "completions/max_terminated_length": 183.0,
+      "completions/mean_length": 99.75,
+      "completions/mean_terminated_length": 114.0,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 69.0,
+      "epoch": 0.22,
+      "format_failures": 0.0,
+      "grad_norm": 2.431544065475464,
+      "kl": 0.39844033867120743,
+      "learning_rate": 1e-06,
+      "loss": 0.0435,
+      "num_tokens": 612304.0,
+      "reward": 0.3895833492279053,
+      "reward_std": 0.4363391399383545,
+      "step": 55
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 189.0,
+      "completions/max_terminated_length": 189.0,
+      "completions/mean_length": 158.875,
+      "completions/mean_terminated_length": 181.57142857142858,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 170.0,
+      "epoch": 0.224,
+      "format_failures": 0.0,
+      "grad_norm": 3.419069528579712,
+      "kl": 0.18863588571548462,
+      "learning_rate": 1e-06,
+      "loss": -0.0102,
+      "num_tokens": 619832.0,
+      "reward": 0.3333333432674408,
+      "reward_std": 0.4714045226573944,
+      "step": 56
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 560.0,
+      "completions/max_terminated_length": 560.0,
+      "completions/mean_length": 250.5,
+      "completions/mean_terminated_length": 286.2857142857143,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 112.0,
+      "epoch": 0.228,
+      "format_failures": 0.0,
+      "grad_norm": 0.0427495501935482,
+      "kl": 0.06415125727653503,
+      "learning_rate": 1e-06,
+      "loss": 0.0002,
+      "num_tokens": 632688.0,
+      "reward": 0.0,
+      "reward_std": 0.0,
+      "step": 57
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 95.0,
+      "completions/max_terminated_length": 95.0,
+      "completions/mean_length": 62.0,
+      "completions/mean_terminated_length": 70.85714285714286,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 55.0,
+      "epoch": 0.232,
+      "format_failures": 0.0,
+      "grad_norm": 1.9774202108383179,
+      "kl": 0.05197676923125982,
+      "learning_rate": 1e-06,
+      "loss": -0.0204,
+      "num_tokens": 637680.0,
+      "reward": 0.125,
+      "reward_std": 0.3535533845424652,
+      "step": 58
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 1235.0,
+      "completions/max_terminated_length": 1235.0,
+      "completions/mean_length": 317.5,
+      "completions/mean_terminated_length": 362.85714285714283,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 29.0,
+      "epoch": 0.236,
+      "format_failures": 0.0,
+      "grad_norm": 0.3588317036628723,
+      "kl": 0.008119639242067933,
+      "learning_rate": 1e-06,
+      "loss": 0.0679,
+      "num_tokens": 662240.0,
+      "reward": 0.0625,
+      "reward_std": 0.1767766922712326,
+      "step": 59
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 482.0,
+      "completions/max_terminated_length": 482.0,
+      "completions/mean_length": 302.625,
+      "completions/mean_terminated_length": 345.85714285714283,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 224.0,
+      "epoch": 0.24,
+      "format_failures": 0.0,
+      "grad_norm": 0.43694156408309937,
+      "kl": 0.13442928344011307,
+      "learning_rate": 1e-06,
+      "loss": 0.035,
+      "num_tokens": 671136.0,
+      "reward": 0.4389880895614624,
+      "reward_std": 0.314676970243454,
+      "step": 60
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 155.0,
+      "completions/max_terminated_length": 155.0,
+      "completions/mean_length": 76.625,
+      "completions/mean_terminated_length": 87.57142857142857,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 62.0,
+      "epoch": 0.244,
+      "format_failures": 0.0,
+      "grad_norm": 2.0356831550598145,
+      "kl": 0.10412658751010895,
+      "learning_rate": 1e-06,
+      "loss": 0.0941,
+      "num_tokens": 678296.0,
+      "reward": 0.2856481671333313,
+      "reward_std": 0.44585946202278137,
+      "step": 61
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 526.0,
+      "completions/max_terminated_length": 526.0,
+      "completions/mean_length": 302.125,
+      "completions/mean_terminated_length": 345.2857142857143,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 173.0,
+      "epoch": 0.248,
+      "format_failures": 0.0,
+      "grad_norm": 0.2828364074230194,
+      "kl": 0.06026838719844818,
+      "learning_rate": 1e-06,
+      "loss": 0.0307,
+      "num_tokens": 688328.0,
+      "reward": 0.37730082869529724,
+      "reward_std": 0.22057875990867615,
+      "step": 62
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 1564.0,
+      "completions/max_terminated_length": 1564.0,
+      "completions/mean_length": 436.5,
+      "completions/mean_terminated_length": 498.85714285714283,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 278.0,
+      "epoch": 0.252,
+      "format_failures": 0.0,
+      "grad_norm": 0.460735559463501,
+      "kl": 0.03187366481870413,
+      "learning_rate": 1e-06,
+      "loss": 0.3464,
+      "num_tokens": 710552.0,
+      "reward": 0.7753968238830566,
+      "reward_std": 0.3274153470993042,
+      "step": 63
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 183.0,
+      "completions/max_terminated_length": 183.0,
+      "completions/mean_length": 112.0,
+      "completions/mean_terminated_length": 128.0,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 77.0,
+      "epoch": 0.256,
+      "format_failures": 0.0,
+      "grad_norm": 0.9710547924041748,
+      "kl": 0.056045059114694595,
+      "learning_rate": 1e-06,
+      "loss": 0.397,
+      "num_tokens": 730936.0,
+      "reward": 0.4721861779689789,
+      "reward_std": 0.31307727098464966,
+      "step": 64
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 282.0,
+      "completions/max_terminated_length": 282.0,
+      "completions/mean_length": 181.25,
+      "completions/mean_terminated_length": 207.14285714285714,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 115.0,
+      "epoch": 0.26,
+      "format_failures": 0.0,
+      "grad_norm": 0.5494914054870605,
+      "kl": 0.17688407003879547,
+      "learning_rate": 1e-06,
+      "loss": 0.0636,
+      "num_tokens": 737640.0,
+      "reward": 0.4345238208770752,
+      "reward_std": 0.24914170801639557,
+      "step": 65
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 782.0,
+      "completions/max_terminated_length": 782.0,
+      "completions/mean_length": 442.625,
+      "completions/mean_terminated_length": 505.85714285714283,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 371.0,
+      "epoch": 0.264,
+      "format_failures": 0.0,
+      "grad_norm": 0.2535926103591919,
+      "kl": 0.027257385663688183,
+      "learning_rate": 1e-06,
+      "loss": 0.0455,
+      "num_tokens": 749424.0,
+      "reward": 0.4035714268684387,
+      "reward_std": 0.21609759330749512,
+      "step": 66
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 536.0,
+      "completions/max_terminated_length": 536.0,
+      "completions/mean_length": 360.375,
+      "completions/mean_terminated_length": 411.85714285714283,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 286.0,
+      "epoch": 0.268,
+      "format_failures": 0.0,
+      "grad_norm": 0.2211979627609253,
+      "kl": 0.03450755029916763,
+      "learning_rate": 1e-06,
+      "loss": -0.0173,
+      "num_tokens": 758368.0,
+      "reward": 0.26453372836112976,
+      "reward_std": 0.18241503834724426,
+      "step": 67
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 324.0,
+      "completions/max_terminated_length": 324.0,
+      "completions/mean_length": 171.0,
+      "completions/mean_terminated_length": 195.42857142857142,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 64.0,
+      "epoch": 0.272,
+      "format_failures": 0.0,
+      "grad_norm": 1.1518077850341797,
+      "kl": 0.7764540687203407,
+      "learning_rate": 1e-06,
+      "loss": 0.0543,
+      "num_tokens": 769808.0,
+      "reward": 0.20863094925880432,
+      "reward_std": 0.1800907701253891,
+      "step": 68
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 255.0,
+      "completions/max_terminated_length": 255.0,
+      "completions/mean_length": 146.875,
+      "completions/mean_terminated_length": 167.85714285714286,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 90.0,
+      "epoch": 0.276,
+      "format_failures": 0.0,
+      "grad_norm": 1.4199182987213135,
+      "kl": 0.03853025659918785,
+      "learning_rate": 1e-06,
+      "loss": -0.3424,
+      "num_tokens": 787960.0,
+      "reward": 0.29305553436279297,
+      "reward_std": 0.3426187038421631,
+      "step": 69
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 497.0,
+      "completions/max_terminated_length": 497.0,
+      "completions/mean_length": 260.25,
+      "completions/mean_terminated_length": 297.42857142857144,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 193.0,
+      "epoch": 0.28,
+      "format_failures": 0.0,
+      "grad_norm": 0.95790034532547,
+      "kl": 0.04087948985397816,
+      "learning_rate": 1e-06,
+      "loss": -0.0072,
+      "num_tokens": 808840.0,
+      "reward": 0.30420100688934326,
+      "reward_std": 0.21492989361286163,
+      "step": 70
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 457.0,
+      "completions/max_terminated_length": 457.0,
+      "completions/mean_length": 277.125,
+      "completions/mean_terminated_length": 316.7142857142857,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 222.0,
+      "epoch": 0.284,
+      "format_failures": 0.0,
+      "grad_norm": 0.6122504472732544,
+      "kl": 0.043809447437524796,
+      "learning_rate": 1e-06,
+      "loss": 0.0844,
+      "num_tokens": 820184.0,
+      "reward": 0.4826388657093048,
+      "reward_std": 0.40854451060295105,
+      "step": 71
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.625,
+      "completions/max_length": 130.0,
+      "completions/max_terminated_length": 130.0,
+      "completions/mean_length": 31.875,
+      "completions/mean_terminated_length": 85.0,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 62.0,
+      "epoch": 0.288,
+      "format_failures": 0.0,
+      "grad_norm": 3.6429221630096436,
+      "kl": 0.14530150592327118,
+      "learning_rate": 1e-06,
+      "loss": -0.3358,
+      "num_tokens": 828280.0,
+      "reward": 0.625,
+      "reward_std": 0.41547447443008423,
+      "step": 72
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 2028.0,
+      "completions/max_terminated_length": 2028.0,
+      "completions/mean_length": 568.375,
+      "completions/mean_terminated_length": 649.5714285714286,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 233.0,
+      "epoch": 0.292,
+      "format_failures": 0.0,
+      "grad_norm": 0.340351402759552,
+      "kl": 0.04210643842816353,
+      "learning_rate": 1e-06,
+      "loss": 0.1705,
+      "num_tokens": 850536.0,
+      "reward": 0.255952388048172,
+      "reward_std": 0.28989601135253906,
+      "step": 73
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 298.0,
+      "completions/max_terminated_length": 298.0,
+      "completions/mean_length": 243.5,
+      "completions/mean_terminated_length": 278.2857142857143,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 271.0,
+      "epoch": 0.296,
+      "format_failures": 0.0,
+      "grad_norm": 16.964588165283203,
+      "kl": 2.3798545002937317,
+      "learning_rate": 1e-06,
+      "loss": 0.0303,
+      "num_tokens": 861552.0,
+      "reward": 0.5833333730697632,
+      "reward_std": 0.4629100263118744,
+      "step": 74
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 545.0,
+      "completions/max_terminated_length": 545.0,
+      "completions/mean_length": 225.375,
+      "completions/mean_terminated_length": 257.57142857142856,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 163.0,
+      "epoch": 0.3,
+      "format_failures": 0.0,
+      "grad_norm": 0.23826824128627777,
+      "kl": 0.033232852816581726,
+      "learning_rate": 1e-06,
+      "loss": 0.0132,
+      "num_tokens": 872312.0,
+      "reward": 0.20226716995239258,
+      "reward_std": 0.15315401554107666,
+      "step": 75
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 222.0,
+      "completions/max_terminated_length": 222.0,
+      "completions/mean_length": 145.75,
+      "completions/mean_terminated_length": 166.57142857142858,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 62.0,
+      "epoch": 0.304,
+      "format_failures": 0.0,
+      "grad_norm": 1.913487434387207,
+      "kl": 1.3894951939582825,
+      "learning_rate": 1e-06,
+      "loss": -0.0165,
+      "num_tokens": 879880.0,
+      "reward": 0.17698413133621216,
+      "reward_std": 0.1964721530675888,
+      "step": 76
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 264.0,
+      "completions/max_terminated_length": 264.0,
+      "completions/mean_length": 155.0,
+      "completions/mean_terminated_length": 177.14285714285714,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 92.0,
+      "epoch": 0.308,
+      "format_failures": 0.0,
+      "grad_norm": 2.5412757396698,
+      "kl": 1.028398334980011,
+      "learning_rate": 1e-06,
+      "loss": 0.0962,
+      "num_tokens": 887960.0,
+      "reward": 0.45376986265182495,
+      "reward_std": 0.3097318112850189,
+      "step": 77
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 536.0,
+      "completions/max_terminated_length": 536.0,
+      "completions/mean_length": 286.375,
+      "completions/mean_terminated_length": 327.2857142857143,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 176.0,
+      "epoch": 0.312,
+      "format_failures": 0.0,
+      "grad_norm": 0.6730135679244995,
+      "kl": 0.0538824163377285,
+      "learning_rate": 1e-06,
+      "loss": 0.1157,
+      "num_tokens": 898928.0,
+      "reward": 0.20416666567325592,
+      "reward_std": 0.3781481683254242,
+      "step": 78
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.375,
+      "completions/max_length": 188.0,
+      "completions/max_terminated_length": 188.0,
+      "completions/mean_length": 99.25,
+      "completions/mean_terminated_length": 158.8,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 48.0,
+      "epoch": 0.316,
+      "format_failures": 0.0,
+      "grad_norm": 1.8478459119796753,
+      "kl": 0.015719112940132618,
+      "learning_rate": 1e-06,
+      "loss": -0.134,
+      "num_tokens": 908336.0,
+      "reward": 0.75,
+      "reward_std": 0.4629100561141968,
+      "step": 79
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 290.0,
+      "completions/max_terminated_length": 290.0,
+      "completions/mean_length": 218.0,
+      "completions/mean_terminated_length": 249.14285714285714,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 111.0,
+      "epoch": 0.32,
+      "format_failures": 0.0,
+      "grad_norm": 4.647150039672852,
+      "kl": 1.3871727883815765,
+      "learning_rate": 1e-06,
+      "loss": 0.0114,
+      "num_tokens": 919144.0,
+      "reward": 0.515625,
+      "reward_std": 0.5194326043128967,
+      "step": 80
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 61.0,
+      "completions/max_terminated_length": 61.0,
+      "completions/mean_length": 44.75,
+      "completions/mean_terminated_length": 51.142857142857146,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 42.0,
+      "epoch": 0.324,
+      "format_failures": 0.0,
+      "grad_norm": 4.4413957595825195,
+      "kl": 1.4963605403900146,
+      "learning_rate": 1e-06,
+      "loss": 0.0199,
+      "num_tokens": 924120.0,
+      "reward": 0.0,
+      "reward_std": 0.0,
+      "step": 81
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 158.0,
+      "completions/max_terminated_length": 158.0,
+      "completions/mean_length": 114.0,
+      "completions/mean_terminated_length": 130.28571428571428,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 70.0,
+      "epoch": 0.328,
+      "format_failures": 0.0,
+      "grad_norm": 0.7050689458847046,
+      "kl": 0.046199409291148186,
+      "learning_rate": 1e-06,
+      "loss": 0.0456,
+      "num_tokens": 930960.0,
+      "reward": 0.5011904835700989,
+      "reward_std": 0.24937564134597778,
+      "step": 82
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 518.0,
+      "completions/max_terminated_length": 518.0,
+      "completions/mean_length": 449.875,
+      "completions/mean_terminated_length": 514.1428571428571,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 509.0,
+      "epoch": 0.332,
+      "format_failures": 0.0,
+      "grad_norm": 0.26836591958999634,
+      "kl": 0.006152217974886298,
+      "learning_rate": 1e-06,
+      "loss": -0.0312,
+      "num_tokens": 948424.0,
+      "reward": 0.7916666865348816,
+      "reward_std": 0.39591163396835327,
+      "step": 83
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 246.0,
+      "completions/max_terminated_length": 246.0,
+      "completions/mean_length": 138.625,
+      "completions/mean_terminated_length": 158.42857142857142,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 130.0,
+      "epoch": 0.336,
+      "format_failures": 0.0,
+      "grad_norm": 1.0764328241348267,
+      "kl": 0.07650505751371384,
+      "learning_rate": 1e-06,
+      "loss": -0.0964,
+      "num_tokens": 956768.0,
+      "reward": 0.3864583373069763,
+      "reward_std": 0.3207734227180481,
+      "step": 84
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 531.0,
+      "completions/max_terminated_length": 531.0,
+      "completions/mean_length": 292.0,
+      "completions/mean_terminated_length": 333.7142857142857,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 178.0,
+      "epoch": 0.34,
+      "format_failures": 0.0,
+      "grad_norm": 0.5540055632591248,
+      "kl": 0.054012734442949295,
+      "learning_rate": 1e-06,
+      "loss": -0.1183,
+      "num_tokens": 966600.0,
+      "reward": 0.34756946563720703,
+      "reward_std": 0.300673246383667,
+      "step": 85
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 153.0,
+      "completions/max_terminated_length": 153.0,
+      "completions/mean_length": 126.0,
+      "completions/mean_terminated_length": 144.0,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 104.0,
+      "epoch": 0.344,
+      "format_failures": 0.0,
+      "grad_norm": 2.176490306854248,
+      "kl": 0.14486993476748466,
+      "learning_rate": 1e-06,
+      "loss": 0.044,
+      "num_tokens": 974040.0,
+      "reward": 0.6666666269302368,
+      "reward_std": 0.4714045226573944,
+      "step": 86
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 163.0,
+      "completions/max_terminated_length": 163.0,
+      "completions/mean_length": 139.875,
+      "completions/mean_terminated_length": 159.85714285714286,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 154.0,
+      "epoch": 0.348,
+      "format_failures": 0.0,
+      "grad_norm": 3.048673391342163,
+      "kl": 0.05823306553065777,
+      "learning_rate": 1e-06,
+      "loss": 1.0611,
+      "num_tokens": 995888.0,
+      "reward": 0.625,
+      "reward_std": 0.5175491571426392,
+      "step": 87
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.25,
+      "completions/max_length": 281.0,
+      "completions/max_terminated_length": 281.0,
+      "completions/mean_length": 101.125,
+      "completions/mean_terminated_length": 134.83333333333334,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 75.0,
+      "epoch": 0.352,
+      "format_failures": 0.0,
+      "grad_norm": 1.9394124746322632,
+      "kl": 0.09709636494517326,
+      "learning_rate": 1e-06,
+      "loss": 0.3171,
+      "num_tokens": 1016272.0,
+      "reward": 0.47559523582458496,
+      "reward_std": 0.2696917653083801,
+      "step": 88
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 160.0,
+      "completions/max_terminated_length": 160.0,
+      "completions/mean_length": 92.375,
+      "completions/mean_terminated_length": 105.57142857142857,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 64.0,
+      "epoch": 0.356,
+      "format_failures": 0.0,
+      "grad_norm": 1.0850152969360352,
+      "kl": 0.11065866611897945,
+      "learning_rate": 1e-06,
+      "loss": -0.0191,
+      "num_tokens": 1022584.0,
+      "reward": 0.027205882593989372,
+      "reward_std": 0.050595808774232864,
+      "step": 89
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 238.0,
+      "completions/max_terminated_length": 238.0,
+      "completions/mean_length": 152.125,
+      "completions/mean_terminated_length": 173.85714285714286,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 129.0,
+      "epoch": 0.36,
+      "format_failures": 0.0,
+      "grad_norm": 0.7975893020629883,
+      "kl": 0.4505193531513214,
+      "learning_rate": 1e-06,
+      "loss": 0.0489,
+      "num_tokens": 1028024.0,
+      "reward": 0.4837797284126282,
+      "reward_std": 0.3459106385707855,
+      "step": 90
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.375,
+      "completions/max_length": 198.0,
+      "completions/max_terminated_length": 198.0,
+      "completions/mean_length": 122.875,
+      "completions/mean_terminated_length": 196.6,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 195.0,
+      "epoch": 0.364,
+      "format_failures": 0.0,
+      "grad_norm": 0.371446430683136,
+      "kl": 0.017493599094450474,
+      "learning_rate": 1e-06,
+      "loss": -0.0009,
+      "num_tokens": 1039176.0,
+      "reward": 0.7916666865348816,
+      "reward_std": 0.39591163396835327,
+      "step": 91
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 368.0,
+      "completions/max_terminated_length": 368.0,
+      "completions/mean_length": 228.5,
+      "completions/mean_terminated_length": 261.14285714285717,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 132.0,
+      "epoch": 0.368,
+      "format_failures": 0.0,
+      "grad_norm": 1.6181436777114868,
+      "kl": 1.322296380996704,
+      "learning_rate": 1e-06,
+      "loss": -0.0419,
+      "num_tokens": 1047784.0,
+      "reward": 0.2874999940395355,
+      "reward_std": 0.39957815408706665,
+      "step": 92
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 445.0,
+      "completions/max_terminated_length": 445.0,
+      "completions/mean_length": 250.125,
+      "completions/mean_terminated_length": 285.85714285714283,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 206.0,
+      "epoch": 0.372,
+      "format_failures": 0.0,
+      "grad_norm": 0.4590940773487091,
+      "kl": 0.03011018969118595,
+      "learning_rate": 1e-06,
+      "loss": -0.0477,
+      "num_tokens": 1058760.0,
+      "reward": 0.38749998807907104,
+      "reward_std": 0.3058944642543793,
+      "step": 93
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 64.0,
+      "completions/max_terminated_length": 64.0,
+      "completions/mean_length": 55.75,
+      "completions/mean_terminated_length": 63.714285714285715,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 62.0,
+      "epoch": 0.376,
+      "format_failures": 0.0,
+      "grad_norm": 3.706254720687866,
+      "kl": 0.022694013081490993,
+      "learning_rate": 1e-06,
+      "loss": 0.4609,
+      "num_tokens": 1069792.0,
+      "reward": 0.5052083730697632,
+      "reward_std": 0.25630685687065125,
+      "step": 94
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 297.0,
+      "completions/max_terminated_length": 297.0,
+      "completions/mean_length": 155.75,
+      "completions/mean_terminated_length": 178.0,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 101.0,
+      "epoch": 0.38,
+      "format_failures": 0.0,
+      "grad_norm": 1.6162223815917969,
+      "kl": 0.43194980919361115,
+      "learning_rate": 1e-06,
+      "loss": -0.0132,
+      "num_tokens": 1079864.0,
+      "reward": 0.21741071343421936,
+      "reward_std": 0.28225868940353394,
+      "step": 95
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 141.0,
+      "completions/max_terminated_length": 141.0,
+      "completions/mean_length": 120.125,
+      "completions/mean_terminated_length": 137.28571428571428,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 134.0,
+      "epoch": 0.384,
+      "format_failures": 0.0,
+      "grad_norm": 18.852705001831055,
+      "kl": 4.019676446914673,
+      "learning_rate": 1e-06,
+      "loss": 0.0359,
+      "num_tokens": 1088416.0,
+      "reward": 0.90625,
+      "reward_std": 0.1293872892856598,
+      "step": 96
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 81.0,
+      "completions/max_terminated_length": 81.0,
+      "completions/mean_length": 65.125,
+      "completions/mean_terminated_length": 74.42857142857143,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 72.0,
+      "epoch": 0.388,
+      "format_failures": 0.0,
+      "grad_norm": 0.17805831134319305,
+      "kl": 0.0494217723608017,
+      "learning_rate": 1e-06,
+      "loss": 0.0198,
+      "num_tokens": 1095056.0,
+      "reward": 0.984375,
+      "reward_std": 0.04419417306780815,
+      "step": 97
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 63.0,
+      "completions/max_terminated_length": 63.0,
+      "completions/mean_length": 34.75,
+      "completions/mean_terminated_length": 39.714285714285715,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 20.0,
+      "epoch": 0.392,
+      "format_failures": 0.0,
+      "grad_norm": 1.5279428958892822,
+      "kl": 0.29206034541130066,
+      "learning_rate": 1e-06,
+      "loss": -0.0386,
+      "num_tokens": 1100752.0,
+      "reward": 0.0416666679084301,
+      "reward_std": 0.1178511381149292,
+      "step": 98
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 351.0,
+      "completions/max_terminated_length": 351.0,
+      "completions/mean_length": 249.375,
+      "completions/mean_terminated_length": 285.0,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 212.0,
+      "epoch": 0.396,
+      "format_failures": 0.0,
+      "grad_norm": 0.56284499168396,
+      "kl": 0.11262823268771172,
+      "learning_rate": 1e-06,
+      "loss": 0.0758,
+      "num_tokens": 1112056.0,
+      "reward": 0.5658119916915894,
+      "reward_std": 0.2206362932920456,
+      "step": 99
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.125,
+      "completions/max_length": 194.0,
+      "completions/max_terminated_length": 194.0,
+      "completions/mean_length": 149.5,
+      "completions/mean_terminated_length": 170.85714285714286,
+      "completions/min_length": 0.0,
+      "completions/min_terminated_length": 52.0,
+      "epoch": 0.4,
+      "format_failures": 0.0,
+      "grad_norm": 2.1969668865203857,
+      "kl": 0.0690736249089241,
+      "learning_rate": 1e-06,
+      "loss": -0.001,
+      "num_tokens": 1121104.0,
+      "reward": 0.75,
+      "reward_std": 0.4629100561141968,
+      "step": 100
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 1000,
+  "num_input_tokens_seen": 1121104,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5508d5c26f3e35e616710da82bd5168b5fca1aa70e31a96b8fddecf6347edd5a
+size 7697

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff