Training in progress, step 200

Browse files

Files changed (17) hide show

added_tokens.json +28 -0
chat_template.jinja +85 -0
config.json +25 -0
merges.txt +0 -0
model.safetensors +3 -0
runs/Oct10_09-14-23_ip-172-31-44-130/events.out.tfevents.1760087665.ip-172-31-44-130.129292.0 +3 -0
runs/Oct13_11-31-30_ip-172-31-44-130/events.out.tfevents.1760355092.ip-172-31-44-130.64297.0 +3 -0
runs/Oct13_11-32-27_ip-172-31-44-130/events.out.tfevents.1760355148.ip-172-31-44-130.75867.0 +3 -0
runs/Oct13_11-35-30_ip-172-31-44-130/events.out.tfevents.1760355331.ip-172-31-44-130.76888.0 +3 -0
runs/Oct15_07-27-48_ip-172-31-44-130/events.out.tfevents.1760513270.ip-172-31-44-130.39696.0 +3 -0
runs/Oct15_07-28-13_ip-172-31-44-130/events.out.tfevents.1760513294.ip-172-31-44-130.39696.1 +3 -0
special_tokens_map.json +37 -0
tokenizer.json +0 -0
tokenizer_config.json +65 -0
training_args.bin +3 -0
vocab.json +0 -0
vocab.txt +0 -0

added_tokens.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "</think>": 151668,
+  "</tool_call>": 151658,
+  "</tool_response>": 151666,
+  "<think>": 151667,
+  "<tool_call>": 151657,
+  "<tool_response>": 151665,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,85 @@

+{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0].role == 'system' %}
+        {{- messages[0].content + '\n\n' }}
+    {%- endif %}
+    {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0].role == 'system' %}
+        {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+    {%- set index = (messages|length - 1) - loop.index0 %}
+    {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
+        {%- set ns.multi_step_tool = false %}
+        {%- set ns.last_query_index = index %}
+    {%- endif %}
+{%- endfor %}
+{%- for message in messages %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+        {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {%- set content = message.content %}
+        {%- set reasoning_content = '' %}
+        {%- if message.reasoning_content is defined and message.reasoning_content is not none %}
+            {%- set reasoning_content = message.reasoning_content %}
+        {%- else %}
+            {%- if '</think>' in message.content %}
+                {%- set content = message.content.split('</think>')[-1].lstrip('\n') %}
+                {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+            {%- endif %}
+        {%- endif %}
+        {%- if loop.index0 > ns.last_query_index %}
+            {%- if loop.last or (not loop.last and reasoning_content) %}
+                {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
+            {%- else %}
+                {{- '<|im_start|>' + message.role + '\n' + content }}
+            {%- endif %}
+        {%- else %}
+            {{- '<|im_start|>' + message.role + '\n' + content }}
+        {%- endif %}
+        {%- if message.tool_calls %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if (loop.first and content) or (not loop.first) %}
+                    {{- '\n' }}
+                {%- endif %}
+                {%- if tool_call.function %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '<tool_call>\n{"name": "' }}
+                {{- tool_call.name }}
+                {{- '", "arguments": ' }}
+                {%- if tool_call.arguments is string %}
+                    {{- tool_call.arguments }}
+                {%- else %}
+                    {{- tool_call.arguments | tojson }}
+                {%- endif %}
+                {{- '}\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- message.content }}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+    {%- if enable_thinking is defined and enable_thinking is false %}
+        {{- '<think>\n\n</think>\n\n' }}
+    {%- endif %}
+{%- endif %}

config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "architectures": [
+    "BertModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "dtype": "float32",
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 384,
+  "initializer_range": 0.02,
+  "intermediate_size": 1536,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 6,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "transformers_version": "4.57.0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:45678ca92701e2b21d762de3576e475d0d9a1bd02f16d5e2320057ec6fa09465
+size 90864192

runs/Oct10_09-14-23_ip-172-31-44-130/events.out.tfevents.1760087665.ip-172-31-44-130.129292.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a2e7289d1981b4d81496fff2acf2caaa259d77ccda3376b0046f03e10d7a54e2
+size 4511

runs/Oct13_11-31-30_ip-172-31-44-130/events.out.tfevents.1760355092.ip-172-31-44-130.64297.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:79df46a5850d5049016b37e7a805a17189d2a1e5bc466328920d8217676a1b63
+size 4603

runs/Oct13_11-32-27_ip-172-31-44-130/events.out.tfevents.1760355148.ip-172-31-44-130.75867.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b9a14fbd825017023d9de9fc7ffc335785f7992b758c7b64ca3c99b2e71ab71
+size 4603

runs/Oct13_11-35-30_ip-172-31-44-130/events.out.tfevents.1760355331.ip-172-31-44-130.76888.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a1eef828ac3ca727a8ac33ca98fbd2770569c20a5a5059a4444005ea700660d
+size 4951

runs/Oct15_07-27-48_ip-172-31-44-130/events.out.tfevents.1760513270.ip-172-31-44-130.39696.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8dd43b55c9534f7f2c6088a2aef0de9d2e714d8afbb66767693cfaf1d7c25827
+size 4512

runs/Oct15_07-28-13_ip-172-31-44-130/events.out.tfevents.1760513294.ip-172-31-44-130.39696.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dddeaa98b0eedaa1c747b2a8a9b2195f3afe5caebcfd9324f4bc228c3b9c592b
+size 5348

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,65 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "max_length": 128,
+  "model_max_length": 256,
+  "never_split": null,
+  "pad_to_multiple_of": null,
+  "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "[SEP]",
+  "stride": 0,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "[UNK]"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1819100ad868e62f00e5b4dc07c294abf3a12b8449611db62fa6381bfc3e570a
+size 6225

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff