Add 17 files

Browse files

Files changed (11) hide show

chat_template.jinja +1 -26
generation_config.json +2 -3
model-00001-of-00002.safetensors +2 -2
model-00002-of-00002.safetensors +2 -2
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
tokenizer.json +2 -2
tokenizer_config.json +2 -1
trainer_state.json +392 -0
training_args.bin +3 -0

chat_template.jinja CHANGED Viewed

@@ -14,14 +14,6 @@
         {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
     {%- endif %}
 {%- endif %}
-{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
-{%- for message in messages[::-1] %}
-    {%- set index = (messages|length - 1) - loop.index0 %}
-    {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
-        {%- set ns.multi_step_tool = false %}
-        {%- set ns.last_query_index = index %}
-    {%- endif %}
-{%- endfor %}
 {%- for message in messages %}
     {%- if message.content is string %}
         {%- set content = message.content %}
@@ -31,24 +23,7 @@
     {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
         {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
     {%- elif message.role == "assistant" %}
-        {%- set reasoning_content = '' %}
-        {%- if message.reasoning_content is string %}
-            {%- set reasoning_content = message.reasoning_content %}
-        {%- else %}
-            {%- if '</think>' in content %}
-                {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
-                {%- set content = content.split('</think>')[-1].lstrip('\n') %}
-            {%- endif %}
-        {%- endif %}
-        {%- if loop.index0 > ns.last_query_index %}
-            {%- if loop.last or (not loop.last and reasoning_content) %}
-                {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
-            {%- else %}
-                {{- '<|im_start|>' + message.role + '\n' + content }}
-            {%- endif %}
-        {%- else %}
-            {{- '<|im_start|>' + message.role + '\n' + content }}
-        {%- endif %}
         {%- if message.tool_calls %}
             {%- for tool_call in message.tool_calls %}
                 {%- if (loop.first and content) or (not loop.first) %}

         {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
     {%- endif %}
 {%- endif %}
 {%- for message in messages %}
     {%- if message.content is string %}
         {%- set content = message.content %}
     {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
         {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
     {%- elif message.role == "assistant" %}
+        {{- '<|im_start|>' + message.role + '\n' + content }}
         {%- if message.tool_calls %}
             {%- for tool_call in message.tool_calls %}
                 {%- if (loop.first and content) or (not loop.first) %}

generation_config.json CHANGED Viewed

@@ -1,13 +1,12 @@
 {
-  "bos_token_id": 151643,
   "do_sample": true,
   "eos_token_id": [
     151645,
     151643
   ],
   "pad_token_id": 151643,
-  "temperature": 0.6,
   "top_k": 20,
-  "top_p": 0.95,
   "transformers_version": "4.57.3"
 }

 {
   "do_sample": true,
   "eos_token_id": [
     151645,
     151643
   ],
   "pad_token_id": 151643,
+  "temperature": 0.7,
   "top_k": 20,
+  "top_p": 0.8,
   "transformers_version": "4.57.3"
 }

model-00001-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cd02404741f929f63daf265a1b0680437ff8fe18513d0213539216647564e0f7
-size 4967215455

 version https://git-lfs.github.com/spec/v1
+oid sha256:fd0a5d1fb55d9e9c23364942c6b2393506b1c5e8464bf56bcdfd7c853e91fc2c
+size 4967215360

model-00002-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5f2a96932a017229f401f2a6fe98c3a1c3869283e3d637462980c11b070cfafa
-size 3077766730

 version https://git-lfs.github.com/spec/v1
+oid sha256:ed2dc0d1dcf4087574ffaf17728197a85fffb69dfb523ba2612be7e9f9aea85f
+size 3077766632

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ab31dab212f92003a2713452e366673d1d19f4c618c14149abb470903d19e029
+size 16090219945

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:098b29492211804ab324a36f37466821d948280bb74fce4ba895c03f13ecd878
+size 14645

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7242403044cccf39933e2344635dc740c47d5b24649c5690c6b12d08ca549e87
+size 1465

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
-size 11422654

 version https://git-lfs.github.com/spec/v1
+oid sha256:c0acdaba32b920d640afb36af4396c91974e074735636e4016d17a8ed9c03730
+size 11422753

tokenizer_config.json CHANGED Viewed

@@ -231,8 +231,9 @@
   "eos_token": "<|im_end|>",
   "errors": "replace",
   "extra_special_tokens": {},
-  "model_max_length": 262144,
   "pad_token": "<|endoftext|>",
   "split_special_tokens": false,
   "tokenizer_class": "Qwen2Tokenizer",
   "unk_token": null

   "eos_token": "<|im_end|>",
   "errors": "replace",
   "extra_special_tokens": {},
+  "model_max_length": 16888,
   "pad_token": "<|endoftext|>",
+  "padding_side": "right",
   "split_special_tokens": false,
   "tokenizer_class": "Qwen2Tokenizer",
   "unk_token": null

trainer_state.json ADDED Viewed

	@@ -0,0 +1,392 @@

+{
+  "best_global_step": 500,
+  "best_metric": 0.02287970297038555,
+  "best_model_checkpoint": "./trained_model_20251223_131030/checkpoint-500",
+  "epoch": 0.36133694670280037,
+  "eval_steps": 500,
+  "global_step": 500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.007226738934056007,
+      "grad_norm": 1.3359375,
+      "learning_rate": 1.44e-06,
+      "loss": 0.0529,
+      "step": 10
+    },
+    {
+      "epoch": 0.014453477868112014,
+      "grad_norm": 1.40625,
+      "learning_rate": 3.04e-06,
+      "loss": 0.0463,
+      "step": 20
+    },
+    {
+      "epoch": 0.02168021680216802,
+      "grad_norm": 0.80078125,
+      "learning_rate": 4.6400000000000005e-06,
+      "loss": 0.0295,
+      "step": 30
+    },
+    {
+      "epoch": 0.028906955736224028,
+      "grad_norm": 0.4140625,
+      "learning_rate": 6.24e-06,
+      "loss": 0.0233,
+      "step": 40
+    },
+    {
+      "epoch": 0.036133694670280034,
+      "grad_norm": 0.275390625,
+      "learning_rate": 7.840000000000001e-06,
+      "loss": 0.0192,
+      "step": 50
+    },
+    {
+      "epoch": 0.04336043360433604,
+      "grad_norm": 0.353515625,
+      "learning_rate": 9.440000000000001e-06,
+      "loss": 0.0184,
+      "step": 60
+    },
+    {
+      "epoch": 0.05058717253839205,
+      "grad_norm": 0.318359375,
+      "learning_rate": 1.1040000000000001e-05,
+      "loss": 0.0278,
+      "step": 70
+    },
+    {
+      "epoch": 0.057813911472448055,
+      "grad_norm": 0.494140625,
+      "learning_rate": 1.2640000000000001e-05,
+      "loss": 0.0219,
+      "step": 80
+    },
+    {
+      "epoch": 0.06504065040650407,
+      "grad_norm": 0.30859375,
+      "learning_rate": 1.4240000000000001e-05,
+      "loss": 0.0198,
+      "step": 90
+    },
+    {
+      "epoch": 0.07226738934056007,
+      "grad_norm": 0.408203125,
+      "learning_rate": 1.584e-05,
+      "loss": 0.0181,
+      "step": 100
+    },
+    {
+      "epoch": 0.07949412827461608,
+      "grad_norm": 0.31640625,
+      "learning_rate": 1.7440000000000002e-05,
+      "loss": 0.0177,
+      "step": 110
+    },
+    {
+      "epoch": 0.08672086720867209,
+      "grad_norm": 0.2734375,
+      "learning_rate": 1.904e-05,
+      "loss": 0.0184,
+      "step": 120
+    },
+    {
+      "epoch": 0.0939476061427281,
+      "grad_norm": 0.314453125,
+      "learning_rate": 1.99801340948597e-05,
+      "loss": 0.0191,
+      "step": 130
+    },
+    {
+      "epoch": 0.1011743450767841,
+      "grad_norm": 0.55078125,
+      "learning_rate": 1.9930469332008943e-05,
+      "loss": 0.0178,
+      "step": 140
+    },
+    {
+      "epoch": 0.10840108401084012,
+      "grad_norm": 0.2197265625,
+      "learning_rate": 1.9880804569158184e-05,
+      "loss": 0.018,
+      "step": 150
+    },
+    {
+      "epoch": 0.11562782294489611,
+      "grad_norm": 0.259765625,
+      "learning_rate": 1.9831139806307428e-05,
+      "loss": 0.0188,
+      "step": 160
+    },
+    {
+      "epoch": 0.12285456187895212,
+      "grad_norm": 0.88671875,
+      "learning_rate": 1.9781475043456668e-05,
+      "loss": 0.0217,
+      "step": 170
+    },
+    {
+      "epoch": 0.13008130081300814,
+      "grad_norm": 0.404296875,
+      "learning_rate": 1.9731810280605912e-05,
+      "loss": 0.0185,
+      "step": 180
+    },
+    {
+      "epoch": 0.13730803974706413,
+      "grad_norm": 0.228515625,
+      "learning_rate": 1.9682145517755153e-05,
+      "loss": 0.029,
+      "step": 190
+    },
+    {
+      "epoch": 0.14453477868112014,
+      "grad_norm": 0.384765625,
+      "learning_rate": 1.9632480754904396e-05,
+      "loss": 0.0262,
+      "step": 200
+    },
+    {
+      "epoch": 0.15176151761517614,
+      "grad_norm": 0.375,
+      "learning_rate": 1.9582815992053637e-05,
+      "loss": 0.0193,
+      "step": 210
+    },
+    {
+      "epoch": 0.15898825654923215,
+      "grad_norm": 0.6328125,
+      "learning_rate": 1.953315122920288e-05,
+      "loss": 0.03,
+      "step": 220
+    },
+    {
+      "epoch": 0.16621499548328816,
+      "grad_norm": 0.53515625,
+      "learning_rate": 1.9483486466352125e-05,
+      "loss": 0.0226,
+      "step": 230
+    },
+    {
+      "epoch": 0.17344173441734417,
+      "grad_norm": 0.35546875,
+      "learning_rate": 1.943382170350137e-05,
+      "loss": 0.0191,
+      "step": 240
+    },
+    {
+      "epoch": 0.18066847335140018,
+      "grad_norm": 0.4296875,
+      "learning_rate": 1.938415694065061e-05,
+      "loss": 0.0185,
+      "step": 250
+    },
+    {
+      "epoch": 0.1878952122854562,
+      "grad_norm": 0.37109375,
+      "learning_rate": 1.9334492177799853e-05,
+      "loss": 0.0203,
+      "step": 260
+    },
+    {
+      "epoch": 0.1951219512195122,
+      "grad_norm": 0.466796875,
+      "learning_rate": 1.9284827414949094e-05,
+      "loss": 0.0179,
+      "step": 270
+    },
+    {
+      "epoch": 0.2023486901535682,
+      "grad_norm": 0.32421875,
+      "learning_rate": 1.9235162652098338e-05,
+      "loss": 0.0193,
+      "step": 280
+    },
+    {
+      "epoch": 0.20957542908762422,
+      "grad_norm": 0.330078125,
+      "learning_rate": 1.9185497889247578e-05,
+      "loss": 0.0169,
+      "step": 290
+    },
+    {
+      "epoch": 0.21680216802168023,
+      "grad_norm": 0.35546875,
+      "learning_rate": 1.9135833126396822e-05,
+      "loss": 0.0184,
+      "step": 300
+    },
+    {
+      "epoch": 0.2240289069557362,
+      "grad_norm": 0.27734375,
+      "learning_rate": 1.9086168363546066e-05,
+      "loss": 0.0239,
+      "step": 310
+    },
+    {
+      "epoch": 0.23125564588979222,
+      "grad_norm": 0.275390625,
+      "learning_rate": 1.903650360069531e-05,
+      "loss": 0.0234,
+      "step": 320
+    },
+    {
+      "epoch": 0.23848238482384823,
+      "grad_norm": 0.408203125,
+      "learning_rate": 1.898683883784455e-05,
+      "loss": 0.0199,
+      "step": 330
+    },
+    {
+      "epoch": 0.24570912375790424,
+      "grad_norm": 0.365234375,
+      "learning_rate": 1.8937174074993794e-05,
+      "loss": 0.0191,
+      "step": 340
+    },
+    {
+      "epoch": 0.2529358626919603,
+      "grad_norm": 0.298828125,
+      "learning_rate": 1.8887509312143035e-05,
+      "loss": 0.0179,
+      "step": 350
+    },
+    {
+      "epoch": 0.2601626016260163,
+      "grad_norm": 0.58203125,
+      "learning_rate": 1.883784454929228e-05,
+      "loss": 0.018,
+      "step": 360
+    },
+    {
+      "epoch": 0.26738934056007224,
+      "grad_norm": 0.298828125,
+      "learning_rate": 1.878817978644152e-05,
+      "loss": 0.0213,
+      "step": 370
+    },
+    {
+      "epoch": 0.27461607949412825,
+      "grad_norm": 0.453125,
+      "learning_rate": 1.8738515023590763e-05,
+      "loss": 0.0182,
+      "step": 380
+    },
+    {
+      "epoch": 0.28184281842818426,
+      "grad_norm": 0.33203125,
+      "learning_rate": 1.8688850260740007e-05,
+      "loss": 0.0306,
+      "step": 390
+    },
+    {
+      "epoch": 0.28906955736224027,
+      "grad_norm": 0.31640625,
+      "learning_rate": 1.8639185497889248e-05,
+      "loss": 0.0438,
+      "step": 400
+    },
+    {
+      "epoch": 0.2962962962962963,
+      "grad_norm": 0.4375,
+      "learning_rate": 1.8589520735038492e-05,
+      "loss": 0.0594,
+      "step": 410
+    },
+    {
+      "epoch": 0.3035230352303523,
+      "grad_norm": 0.2734375,
+      "learning_rate": 1.8539855972187736e-05,
+      "loss": 0.0182,
+      "step": 420
+    },
+    {
+      "epoch": 0.3107497741644083,
+      "grad_norm": 0.291015625,
+      "learning_rate": 1.8490191209336976e-05,
+      "loss": 0.0193,
+      "step": 430
+    },
+    {
+      "epoch": 0.3179765130984643,
+      "grad_norm": 0.431640625,
+      "learning_rate": 1.844052644648622e-05,
+      "loss": 0.0178,
+      "step": 440
+    },
+    {
+      "epoch": 0.3252032520325203,
+      "grad_norm": 0.283203125,
+      "learning_rate": 1.839086168363546e-05,
+      "loss": 0.0197,
+      "step": 450
+    },
+    {
+      "epoch": 0.3324299909665763,
+      "grad_norm": 0.427734375,
+      "learning_rate": 1.8341196920784705e-05,
+      "loss": 0.0219,
+      "step": 460
+    },
+    {
+      "epoch": 0.33965672990063234,
+      "grad_norm": 0.28125,
+      "learning_rate": 1.8291532157933945e-05,
+      "loss": 0.0185,
+      "step": 470
+    },
+    {
+      "epoch": 0.34688346883468835,
+      "grad_norm": 1.15625,
+      "learning_rate": 1.824186739508319e-05,
+      "loss": 0.0396,
+      "step": 480
+    },
+    {
+      "epoch": 0.35411020776874436,
+      "grad_norm": 0.3984375,
+      "learning_rate": 1.8192202632232433e-05,
+      "loss": 0.0287,
+      "step": 490
+    },
+    {
+      "epoch": 0.36133694670280037,
+      "grad_norm": 0.318359375,
+      "learning_rate": 1.8142537869381677e-05,
+      "loss": 0.0189,
+      "step": 500
+    },
+    {
+      "epoch": 0.36133694670280037,
+      "eval_loss": 0.02287970297038555,
+      "eval_runtime": 43.4357,
+      "eval_samples_per_second": 26.844,
+      "eval_steps_per_second": 6.723,
+      "step": 500
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 4152,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.0917465462974874e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2934171f5cebc30444d6719c8cd810adec3f2252b2c17b1c00f83891aab2b503
+size 5841