elliotthwangmsa/gemma-3-270m-it-tw-train_ouputs

Browse files

Files changed (9) hide show

README.md +7 -6
adapter_config.json +18 -8
adapter_model.safetensors +2 -2
chat_template.jinja +45 -85
special_tokens_map.json +12 -2
tokenizer.json +2 -2
tokenizer.model +2 -2
tokenizer_config.json +0 -0
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -1,17 +1,18 @@
 ---
-base_model: mistralai/Mistral-7B-Instruct-v0.3
 library_name: transformers
 model_name: outputs
 tags:
 - generated_from_trainer
 - trl
 - sft
 licence: license
 ---
 # Model Card for outputs
-This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.3](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
@@ -34,10 +35,10 @@ This model was trained with SFT.
 ### Framework versions
-- TRL: 0.23.1
-- Transformers: 4.57.0
-- Pytorch: 2.8.0+cu126
-- Datasets: 4.2.0
 - Tokenizers: 0.22.1
 ## Citations

 ---
+base_model: unsloth/gemma-3-270m-it
 library_name: transformers
 model_name: outputs
 tags:
 - generated_from_trainer
 - trl
+- unsloth
 - sft
 licence: license
 ---
 # Model Card for outputs
+This model is a fine-tuned version of [unsloth/gemma-3-270m-it](https://huggingface.co/unsloth/gemma-3-270m-it).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
 ### Framework versions
+- TRL: 0.22.2
+- Transformers: 4.56.2
+- Pytorch: 2.9.0+cu126
+- Datasets: 3.6.0
 - Tokenizers: 0.22.1
 ## Citations

adapter_config.json CHANGED Viewed

@@ -1,9 +1,16 @@
 {
   "alpha_pattern": {},
-  "auto_mapping": null,
-  "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.3",
   "bias": "none",
   "corda_config": null,
   "eva_config": null,
   "exclude_modules": null,
   "fan_in_fan_out": false,
@@ -13,23 +20,26 @@
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
-  "lora_alpha": 16,
   "lora_bias": false,
-  "lora_dropout": 0.1,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
   "qalora_group_size": 16,
-  "r": 8,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "q_proj",
     "v_proj",
-    "gate_proj",
-    "o_proj",
-    "k_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

 {
+  "alora_invocation_tokens": null,
   "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": {
+    "base_model_class": "Gemma3ForCausalLM",
+    "parent_library": "transformers.models.gemma3.modeling_gemma3",
+    "unsloth_fixed": true
+  },
+  "base_model_name_or_path": "unsloth/gemma-3-270m-it",
   "bias": "none",
   "corda_config": null,
+  "ensure_weight_tying": false,
   "eva_config": null,
   "exclude_modules": null,
   "fan_in_fan_out": false,
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
+  "lora_alpha": 128,
   "lora_bias": false,
+  "lora_dropout": 0,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "peft_version": "0.18.0",
   "qalora_group_size": 16,
+  "r": 128,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "gate_proj",
+    "down_proj",
     "q_proj",
+    "up_proj",
     "v_proj",
+    "k_proj",
+    "o_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f3f02ce2969b822edb0f634b027fef3667fe0298d57ce3cfba92247d0fd986d
-size 46179856

 version https://git-lfs.github.com/spec/v1
+oid sha256:6746462774e23107b542a10bfff42d76f63074a2a3c7423b8d667db9683c6775
+size 121537408

chat_template.jinja CHANGED Viewed

@@ -1,87 +1,47 @@
-{%- if messages[0]["role"] == "system" %}
-    {%- set system_message = messages[0]["content"] %}
-    {%- set loop_messages = messages[1:] %}
-{%- else %}
-    {%- set loop_messages = messages %}
-{%- endif %}
-{%- if not tools is defined %}
-    {%- set tools = none %}
-{%- endif %}
-{%- set user_messages = loop_messages | selectattr("role", "equalto", "user") | list %}
-{#- This block checks for alternating user/assistant messages, skipping tool calling messages #}
-{%- set ns = namespace() %}
-{%- set ns.index = 0 %}
-{%- for message in loop_messages %}
-    {%- if not (message.role == "tool" or message.role == "tool_results" or (message.tool_calls is defined and message.tool_calls is not none)) %}
-        {%- if (message["role"] == "user") != (ns.index % 2 == 0) %}
-            {{- raise_exception("After the optional system message, conversation roles must alternate user/assistant/user/assistant/...") }}
-        {%- endif %}
-        {%- set ns.index = ns.index + 1 %}
-    {%- endif %}
-{%- endfor %}
-{{- bos_token }}
-{%- for message in loop_messages %}
-    {%- if message["role"] == "user" %}
-        {%- if tools is not none and (message == user_messages[-1]) %}
-            {{- "[AVAILABLE_TOOLS] [" }}
-            {%- for tool in tools %}
-                {%- set tool = tool.function %}
-                {{- '{"type": "function", "function": {' }}
-                {%- for key, val in tool.items() if key != "return" %}
-                    {%- if val is string %}
-                        {{- '"' + key + '": "' + val + '"' }}
-                    {%- else %}
-                        {{- '"' + key + '": ' + val|tojson }}
-                    {%- endif %}
-                    {%- if not loop.last %}
-                        {{- ", " }}
-                    {%- endif %}
-                {%- endfor %}
-                {{- "}}" }}
-                {%- if not loop.last %}
-                    {{- ", " }}
-                {%- else %}
-                    {{- "]" }}
-                {%- endif %}
-            {%- endfor %}
-            {{- "[/AVAILABLE_TOOLS]" }}
-            {%- endif %}
-        {%- if loop.last and system_message is defined %}
-            {{- "[INST] " + system_message + "\n\n" + message["content"] + "[/INST]" }}
-        {%- else %}
-            {{- "[INST] " + message["content"] + "[/INST]" }}
-        {%- endif %}
-    {%- elif message.tool_calls is defined and message.tool_calls is not none %}
-        {{- "[TOOL_CALLS] [" }}
-        {%- for tool_call in message.tool_calls %}
-            {%- set out = tool_call.function|tojson %}
-            {{- out[:-1] }}
-            {%- if not tool_call.id is defined or tool_call.id|length != 9 %}
-                {{- raise_exception("Tool call IDs should be alphanumeric strings with length 9!") }}
-            {%- endif %}
-            {{- ', "id": "' + tool_call.id + '"}' }}
-            {%- if not loop.last %}
-                {{- ", " }}
-            {%- else %}
-                {{- "]" + eos_token }}
-            {%- endif %}
-        {%- endfor %}
-    {%- elif message["role"] == "assistant" %}
-        {{- " " + message["content"]|trim + eos_token}}
-    {%- elif message["role"] == "tool_results" or message["role"] == "tool" %}
-        {%- if message.content is defined and message.content.content is defined %}
-            {%- set content = message.content.content %}
-        {%- else %}
-            {%- set content = message.content %}
-        {%- endif %}
-        {{- '[TOOL_RESULTS] {"content": ' + content|string + ", " }}
-        {%- if not message.tool_call_id is defined or message.tool_call_id|length != 9 %}
-            {{- raise_exception("Tool call IDs should be alphanumeric strings with length 9!") }}
-        {%- endif %}
-        {{- '"call_id": "' + message.tool_call_id + '"}[/TOOL_RESULTS]' }}
-    {%- else %}
-        {{- raise_exception("Only user and assistant roles are supported, with the exception of an initial optional system message!") }}
-    {%- endif %}
-{%- endfor %}

+{{ bos_token }}
+{%- if messages[0]['role'] == 'system' -%}
+    {%- if messages[0]['content'] is string -%}
+        {%- set first_user_prefix = messages[0]['content'] + '
+' -%}
+    {%- else -%}
+        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '
+' -%}
+    {%- endif -%}
+    {%- set loop_messages = messages[1:] -%}
+{%- else -%}
+    {%- set first_user_prefix = "" -%}
+    {%- set loop_messages = messages -%}
+{%- endif -%}
+{%- for message in loop_messages -%}
+    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
+        {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
+    {%- endif -%}
+    {%- if (message['role'] == 'assistant') -%}
+        {%- set role = "model" -%}
+    {%- else -%}
+        {%- set role = message['role'] -%}
+    {%- endif -%}
+    {{ '<start_of_turn>' + role + '
+' + (first_user_prefix if loop.first else "") }}
+    {%- if message['content'] is string -%}
+        {{ message['content'] | trim }}
+    {%- elif message['content'] is iterable -%}
+        {%- for item in message['content'] -%}
+            {%- if item['type'] == 'image' -%}
+                {{ '<start_of_image>' }}
+            {%- elif item['type'] == 'text' -%}
+                {{ item['text'] | trim }}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- else -%}
+        {{ raise_exception("Invalid content type") }}
+    {%- endif -%}
+    {{ '<end_of_turn>
+' }}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+    {{ '<start_of_turn>model
+' }}
+{%- endif -%}

special_tokens_map.json CHANGED Viewed

@@ -1,13 +1,23 @@
 {
   "bos_token": {
-    "content": "<s>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "eos_token": {
-    "content": "</s>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

 {
+  "boi_token": "<start_of_image>",
   "bos_token": {
+    "content": "<bos>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
+  "eoi_token": "<end_of_image>",
   "eos_token": {
+    "content": "<end_of_turn>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "image_token": "<image_soft_token>",
+  "pad_token": {
+    "content": "<pad>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:60c3fc985cbfedcb429d05994efe548bdfecd6a00226fcdc8380c36fd894a3be
-size 3671968

 version https://git-lfs.github.com/spec/v1
+oid sha256:4667f2089529e8e7657cfb6d1c19910ae71ff5f28aa7ab2ff2763330affad795
+size 33384568

tokenizer.model CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:37f00374dea48658ee8f5d0f21895b9bc55cb0103939607c8185bfd1c6ca1f89
-size 587404

 version https://git-lfs.github.com/spec/v1
+oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
+size 4689074

tokenizer_config.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:efcae7f9c8894a2eaa0775868e0f57932afe307f1981e5f433ceab68652e9f28
-size 6161

 version https://git-lfs.github.com/spec/v1
+oid sha256:f6052cd962b5b0eacab65966b80bc5dac0bfee4b3f6b7ea0ce7c0d5a0a37f6ed
+size 6225