Training in progress, epoch 1

Browse files

Files changed (7) hide show

README.md +6 -6
adapter_config.json +14 -6
adapter_model.safetensors +1 -1
chat_template.jinja +87 -0
tokenizer.json +28 -6
tokenizer_config.json +1 -1
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -1,18 +1,18 @@
 ---
-base_model: unsloth/mistral-7b-v0.3-bnb-4bit
 library_name: transformers
 model_name: sft_best_simplification
 tags:
 - generated_from_trainer
-- trl
 - unsloth
 - sft
 licence: license
 ---
 # Model Card for sft_best_simplification
-This model is a fine-tuned version of [unsloth/mistral-7b-v0.3-bnb-4bit](https://huggingface.co/unsloth/mistral-7b-v0.3-bnb-4bit).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
@@ -28,7 +28,7 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/ioakeime-aristotle-university-of-thessaloniki/sft_best_simplification/runs/xclah0bd)
 This model was trained with SFT.
@@ -36,9 +36,9 @@ This model was trained with SFT.
 ### Framework versions
 - TRL: 0.23.0
-- Transformers: 4.56.2
 - Pytorch: 2.8.0
-- Datasets: 3.6.0
 - Tokenizers: 0.22.1
 ## Citations

 ---
+base_model: unsloth/mistral-7b-instruct-v0.3-bnb-4bit
 library_name: transformers
 model_name: sft_best_simplification
 tags:
 - generated_from_trainer
 - unsloth
+- trl
 - sft
 licence: license
 ---
 # Model Card for sft_best_simplification
+This model is a fine-tuned version of [unsloth/mistral-7b-instruct-v0.3-bnb-4bit](https://huggingface.co/unsloth/mistral-7b-instruct-v0.3-bnb-4bit).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/ioakeime-aristotle-university-of-thessaloniki/sft_best_simplification/runs/9amgp9pe)
 This model was trained with SFT.
 ### Framework versions
 - TRL: 0.23.0
+- Transformers: 4.57.1
 - Pytorch: 2.8.0
+- Datasets: 4.3.0
 - Tokenizers: 0.22.1
 ## Citations

adapter_config.json CHANGED Viewed

@@ -1,9 +1,16 @@
 {
   "alpha_pattern": {},
-  "auto_mapping": null,
-  "base_model_name_or_path": "unsloth/mistral-7b-v0.3-bnb-4bit",
   "bias": "none",
   "corda_config": null,
   "eva_config": null,
   "exclude_modules": null,
   "fan_in_fan_out": false,
@@ -20,18 +27,19 @@
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
   "qalora_group_size": 16,
   "r": 16,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "gate_proj",
     "o_proj",
-    "q_proj",
-    "up_proj",
     "v_proj",
     "k_proj",
-    "down_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

 {
+  "alora_invocation_tokens": null,
   "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": {
+    "base_model_class": "MistralForCausalLM",
+    "parent_library": "transformers.models.mistral.modeling_mistral",
+    "unsloth_fixed": true
+  },
+  "base_model_name_or_path": "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
   "bias": "none",
   "corda_config": null,
+  "ensure_weight_tying": false,
   "eva_config": null,
   "exclude_modules": null,
   "fan_in_fan_out": false,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "peft_version": "0.18.0",
   "qalora_group_size": 16,
   "r": 16,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "o_proj",
+    "down_proj",
     "v_proj",
     "k_proj",
+    "q_proj",
+    "gate_proj",
+    "up_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:28230a2487bd07c2756c86edf080d2088cc4da5a5363f19f256fa30dfd1aecd3
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:d8264ef51349540391c8791d34755db267ace4ed3577d90965629495a6ee8544
 size 167832240

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,87 @@

+{%- if messages[0]["role"] == "system" %}
+    {%- set system_message = messages[0]["content"] %}
+    {%- set loop_messages = messages[1:] %}
+{%- else %}
+    {%- set loop_messages = messages %}
+{%- endif %}
+{%- if not tools is defined %}
+    {%- set tools = none %}
+{%- endif %}
+{%- set user_messages = loop_messages | selectattr("role", "equalto", "user") | list %}
+{#- This block checks for alternating user/assistant messages, skipping tool calling messages #}
+{%- set ns = namespace() %}
+{%- set ns.index = 0 %}
+{%- for message in loop_messages %}
+    {%- if not (message.role == "tool" or message.role == "tool_results" or (message.tool_calls is defined and message.tool_calls is not none)) %}
+        {%- if (message["role"] == "user") != (ns.index % 2 == 0) %}
+            {{- raise_exception("After the optional system message, conversation roles must alternate user/assistant/user/assistant/...") }}
+        {%- endif %}
+        {%- set ns.index = ns.index + 1 %}
+    {%- endif %}
+{%- endfor %}
+{{- bos_token }}
+{%- for message in loop_messages %}
+    {%- if message["role"] == "user" %}
+        {%- if tools is not none and (message == user_messages[-1]) %}
+            {{- "[AVAILABLE_TOOLS] [" }}
+            {%- for tool in tools %}
+                {%- set tool = tool.function %}
+                {{- '{"type": "function", "function": {' }}
+                {%- for key, val in tool.items() if key != "return" %}
+                    {%- if val is string %}
+                        {{- '"' + key + '": "' + val + '"' }}
+                    {%- else %}
+                        {{- '"' + key + '": ' + val|tojson }}
+                    {%- endif %}
+                    {%- if not loop.last %}
+                        {{- ", " }}
+                    {%- endif %}
+                {%- endfor %}
+                {{- "}}" }}
+                {%- if not loop.last %}
+                    {{- ", " }}
+                {%- else %}
+                    {{- "]" }}
+                {%- endif %}
+            {%- endfor %}
+            {{- "[/AVAILABLE_TOOLS]" }}
+            {%- endif %}
+        {%- if loop.last and system_message is defined %}
+            {{- "[INST] " + system_message + "\n\n" + message["content"] + "[/INST]" }}
+        {%- else %}
+            {{- "[INST] " + message["content"] + "[/INST]" }}
+        {%- endif %}
+    {%- elif message.tool_calls is defined and message.tool_calls is not none %}
+        {{- "[TOOL_CALLS] [" }}
+        {%- for tool_call in message.tool_calls %}
+            {%- set out = tool_call.function|tojson %}
+            {{- out[:-1] }}
+            {%- if not tool_call.id is defined or tool_call.id|length != 9 %}
+                {{- raise_exception("Tool call IDs should be alphanumeric strings with length 9!") }}
+            {%- endif %}
+            {{- ', "id": "' + tool_call.id + '"}' }}
+            {%- if not loop.last %}
+                {{- ", " }}
+            {%- else %}
+                {{- "]" + eos_token }}
+            {%- endif %}
+        {%- endfor %}
+    {%- elif message["role"] == "assistant" %}
+        {{- " " + message["content"]|trim + eos_token}}
+    {%- elif message["role"] == "tool_results" or message["role"] == "tool" %}
+        {%- if message.content is defined and message.content.content is defined %}
+            {%- set content = message.content.content %}
+        {%- else %}
+            {%- set content = message.content %}
+        {%- endif %}
+        {{- '[TOOL_RESULTS] {"content": ' + content|string + ", " }}
+        {%- if not message.tool_call_id is defined or message.tool_call_id|length != 9 %}
+            {{- raise_exception("Tool call IDs should be alphanumeric strings with length 9!") }}
+        {%- endif %}
+        {{- '"call_id": "' + message.tool_call_id + '"}[/TOOL_RESULTS]' }}
+    {%- else %}
+        {{- raise_exception("Only user and assistant roles are supported, with the exception of an initial optional system message!") }}
+    {%- endif %}
+{%- endfor %}

tokenizer.json CHANGED Viewed

@@ -1,11 +1,6 @@
 {
   "version": "1.0",
-  "truncation": {
-    "direction": "Right",
-    "max_length": 8192,
-    "strategy": "LongestFirst",
-    "stride": 0
-  },
   "padding": null,
   "added_tokens": [
     {
@@ -6969,6 +6964,12 @@
           "id": "A",
           "type_id": 0
         }
       }
     ],
     "pair": [
@@ -6984,6 +6985,12 @@
           "type_id": 0
         }
       },
       {
         "SpecialToken": {
           "id": "<s>",
@@ -6995,9 +7002,24 @@
           "id": "B",
           "type_id": 1
         }
       }
     ],
     "special_tokens": {
       "<s>": {
         "id": "<s>",
         "ids": [

 {
   "version": "1.0",
+  "truncation": null,
   "padding": null,
   "added_tokens": [
     {
           "id": "A",
           "type_id": 0
         }
+      },
+      {
+        "SpecialToken": {
+          "id": "</s>",
+          "type_id": 0
+        }
       }
     ],
     "pair": [
           "type_id": 0
         }
       },
+      {
+        "SpecialToken": {
+          "id": "</s>",
+          "type_id": 0
+        }
+      },
       {
         "SpecialToken": {
           "id": "<s>",
           "id": "B",
           "type_id": 1
         }
+      },
+      {
+        "SpecialToken": {
+          "id": "</s>",
+          "type_id": 1
+        }
       }
     ],
     "special_tokens": {
+      "</s>": {
+        "id": "</s>",
+        "ids": [
+          2
+        ],
+        "tokens": [
+          "</s>"
+        ]
+      },
       "<s>": {
         "id": "<s>",
         "ids": [

tokenizer_config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "add_bos_token": true,
-  "add_eos_token": false,
   "add_prefix_space": true,
   "added_tokens_decoder": {
     "0": {

 {
   "add_bos_token": true,
+  "add_eos_token": true,
   "add_prefix_space": true,
   "added_tokens_decoder": {
     "0": {

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aa0397abdcbbf844c1309900b3534b13a2d4be220175c178ba89eb7c9555adf1
-size 6289

 version https://git-lfs.github.com/spec/v1
+oid sha256:91b195bbee031377a98da390b75c70b5f638541cb5f65dfa6119007797723266
+size 6353