elliotthwangmsa/Mistral-7B-Instruct-v0.3-tw-train_ouputs

Browse files

Files changed (9) hide show

README.md +6 -6
adapter_config.json +5 -4
adapter_model.safetensors +2 -2
chat_template.jinja +87 -0
special_tokens_map.json +2 -12
tokenizer.json +2 -2
tokenizer.model +2 -2
tokenizer_config.json +0 -0
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-base_model: google/gemma-3-270m
 library_name: transformers
 model_name: outputs
 tags:
@@ -11,7 +11,7 @@ licence: license
 # Model Card for outputs
-This model is a fine-tuned version of [google/gemma-3-270m](https://huggingface.co/google/gemma-3-270m).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
@@ -34,11 +34,11 @@ This model was trained with SFT.
 ### Framework versions
-- TRL: 0.21.0
-- Transformers: 4.55.4
 - Pytorch: 2.8.0+cu126
-- Datasets: 4.0.0
-- Tokenizers: 0.21.4
 ## Citations

 ---
+base_model: mistralai/Mistral-7B-Instruct-v0.3
 library_name: transformers
 model_name: outputs
 tags:
 # Model Card for outputs
+This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.3](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
 ### Framework versions
+- TRL: 0.23.1
+- Transformers: 4.57.0
 - Pytorch: 2.8.0+cu126
+- Datasets: 4.2.0
+- Tokenizers: 0.22.1
 ## Citations

adapter_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "alpha_pattern": {},
   "auto_mapping": null,
-  "base_model_name_or_path": null,
   "bias": "none",
   "corda_config": null,
   "eva_config": null,
@@ -25,10 +25,11 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "k_proj",
-    "v_proj",
     "q_proj",
-    "o_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

 {
   "alpha_pattern": {},
   "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.3",
   "bias": "none",
   "corda_config": null,
   "eva_config": null,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "q_proj",
+    "v_proj",
+    "gate_proj",
+    "o_proj",
+    "k_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a85d5bed5f4b27479eb641850da6efaca3dad1af37aab888a56258ccc214bb0c
-size 2970320

 version https://git-lfs.github.com/spec/v1
+oid sha256:2f3f02ce2969b822edb0f634b027fef3667fe0298d57ce3cfba92247d0fd986d
+size 46179856

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,87 @@

+{%- if messages[0]["role"] == "system" %}
+    {%- set system_message = messages[0]["content"] %}
+    {%- set loop_messages = messages[1:] %}
+{%- else %}
+    {%- set loop_messages = messages %}
+{%- endif %}
+{%- if not tools is defined %}
+    {%- set tools = none %}
+{%- endif %}
+{%- set user_messages = loop_messages | selectattr("role", "equalto", "user") | list %}
+{#- This block checks for alternating user/assistant messages, skipping tool calling messages #}
+{%- set ns = namespace() %}
+{%- set ns.index = 0 %}
+{%- for message in loop_messages %}
+    {%- if not (message.role == "tool" or message.role == "tool_results" or (message.tool_calls is defined and message.tool_calls is not none)) %}
+        {%- if (message["role"] == "user") != (ns.index % 2 == 0) %}
+            {{- raise_exception("After the optional system message, conversation roles must alternate user/assistant/user/assistant/...") }}
+        {%- endif %}
+        {%- set ns.index = ns.index + 1 %}
+    {%- endif %}
+{%- endfor %}
+{{- bos_token }}
+{%- for message in loop_messages %}
+    {%- if message["role"] == "user" %}
+        {%- if tools is not none and (message == user_messages[-1]) %}
+            {{- "[AVAILABLE_TOOLS] [" }}
+            {%- for tool in tools %}
+                {%- set tool = tool.function %}
+                {{- '{"type": "function", "function": {' }}
+                {%- for key, val in tool.items() if key != "return" %}
+                    {%- if val is string %}
+                        {{- '"' + key + '": "' + val + '"' }}
+                    {%- else %}
+                        {{- '"' + key + '": ' + val|tojson }}
+                    {%- endif %}
+                    {%- if not loop.last %}
+                        {{- ", " }}
+                    {%- endif %}
+                {%- endfor %}
+                {{- "}}" }}
+                {%- if not loop.last %}
+                    {{- ", " }}
+                {%- else %}
+                    {{- "]" }}
+                {%- endif %}
+            {%- endfor %}
+            {{- "[/AVAILABLE_TOOLS]" }}
+            {%- endif %}
+        {%- if loop.last and system_message is defined %}
+            {{- "[INST] " + system_message + "\n\n" + message["content"] + "[/INST]" }}
+        {%- else %}
+            {{- "[INST] " + message["content"] + "[/INST]" }}
+        {%- endif %}
+    {%- elif message.tool_calls is defined and message.tool_calls is not none %}
+        {{- "[TOOL_CALLS] [" }}
+        {%- for tool_call in message.tool_calls %}
+            {%- set out = tool_call.function|tojson %}
+            {{- out[:-1] }}
+            {%- if not tool_call.id is defined or tool_call.id|length != 9 %}
+                {{- raise_exception("Tool call IDs should be alphanumeric strings with length 9!") }}
+            {%- endif %}
+            {{- ', "id": "' + tool_call.id + '"}' }}
+            {%- if not loop.last %}
+                {{- ", " }}
+            {%- else %}
+                {{- "]" + eos_token }}
+            {%- endif %}
+        {%- endfor %}
+    {%- elif message["role"] == "assistant" %}
+        {{- " " + message["content"]|trim + eos_token}}
+    {%- elif message["role"] == "tool_results" or message["role"] == "tool" %}
+        {%- if message.content is defined and message.content.content is defined %}
+            {%- set content = message.content.content %}
+        {%- else %}
+            {%- set content = message.content %}
+        {%- endif %}
+        {{- '[TOOL_RESULTS] {"content": ' + content|string + ", " }}
+        {%- if not message.tool_call_id is defined or message.tool_call_id|length != 9 %}
+            {{- raise_exception("Tool call IDs should be alphanumeric strings with length 9!") }}
+        {%- endif %}
+        {{- '"call_id": "' + message.tool_call_id + '"}[/TOOL_RESULTS]' }}
+    {%- else %}
+        {{- raise_exception("Only user and assistant roles are supported, with the exception of an initial optional system message!") }}
+    {%- endif %}
+{%- endfor %}

special_tokens_map.json CHANGED Viewed

@@ -1,23 +1,13 @@
 {
-  "boi_token": "<start_of_image>",
   "bos_token": {
-    "content": "<bos>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
-  "eoi_token": "<end_of_image>",
   "eos_token": {
-    "content": "<eos>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "image_token": "<image_soft_token>",
-  "pad_token": {
-    "content": "<pad>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

 {
   "bos_token": {
+    "content": "<s>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "eos_token": {
+    "content": "</s>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4667f2089529e8e7657cfb6d1c19910ae71ff5f28aa7ab2ff2763330affad795
-size 33384568

 version https://git-lfs.github.com/spec/v1
+oid sha256:60c3fc985cbfedcb429d05994efe548bdfecd6a00226fcdc8380c36fd894a3be
+size 3671968

tokenizer.model CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
-size 4689074

 version https://git-lfs.github.com/spec/v1
+oid sha256:37f00374dea48658ee8f5d0f21895b9bc55cb0103939607c8185bfd1c6ca1f89
+size 587404

tokenizer_config.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:62de26bb5ad73a251d03c2355e026d0e06e6a0bc2826307a40b50432b179ca5f
-size 6097

 version https://git-lfs.github.com/spec/v1
+oid sha256:efcae7f9c8894a2eaa0775868e0f57932afe307f1981e5f433ceab68652e9f28
+size 6161