elliotthwangmsa commited on
Commit
55cf78b
·
verified ·
1 Parent(s): 140b5d9

elliotthwangmsa/gemma-3-270m-it-tw-train_ouputs

Browse files
README.md CHANGED
@@ -1,17 +1,18 @@
1
  ---
2
- base_model: mistralai/Mistral-7B-Instruct-v0.3
3
  library_name: transformers
4
  model_name: outputs
5
  tags:
6
  - generated_from_trainer
7
  - trl
 
8
  - sft
9
  licence: license
10
  ---
11
 
12
  # Model Card for outputs
13
 
14
- This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.3](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
@@ -34,10 +35,10 @@ This model was trained with SFT.
34
 
35
  ### Framework versions
36
 
37
- - TRL: 0.23.1
38
- - Transformers: 4.57.0
39
- - Pytorch: 2.8.0+cu126
40
- - Datasets: 4.2.0
41
  - Tokenizers: 0.22.1
42
 
43
  ## Citations
 
1
  ---
2
+ base_model: unsloth/gemma-3-270m-it
3
  library_name: transformers
4
  model_name: outputs
5
  tags:
6
  - generated_from_trainer
7
  - trl
8
+ - unsloth
9
  - sft
10
  licence: license
11
  ---
12
 
13
  # Model Card for outputs
14
 
15
+ This model is a fine-tuned version of [unsloth/gemma-3-270m-it](https://huggingface.co/unsloth/gemma-3-270m-it).
16
  It has been trained using [TRL](https://github.com/huggingface/trl).
17
 
18
  ## Quick start
 
35
 
36
  ### Framework versions
37
 
38
+ - TRL: 0.22.2
39
+ - Transformers: 4.56.2
40
+ - Pytorch: 2.9.0+cu126
41
+ - Datasets: 3.6.0
42
  - Tokenizers: 0.22.1
43
 
44
  ## Citations
adapter_config.json CHANGED
@@ -1,9 +1,16 @@
1
  {
 
2
  "alpha_pattern": {},
3
- "auto_mapping": null,
4
- "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.3",
 
 
 
 
 
5
  "bias": "none",
6
  "corda_config": null,
 
7
  "eva_config": null,
8
  "exclude_modules": null,
9
  "fan_in_fan_out": false,
@@ -13,23 +20,26 @@
13
  "layers_pattern": null,
14
  "layers_to_transform": null,
15
  "loftq_config": {},
16
- "lora_alpha": 16,
17
  "lora_bias": false,
18
- "lora_dropout": 0.1,
19
  "megatron_config": null,
20
  "megatron_core": "megatron.core",
21
  "modules_to_save": null,
22
  "peft_type": "LORA",
 
23
  "qalora_group_size": 16,
24
- "r": 8,
25
  "rank_pattern": {},
26
  "revision": null,
27
  "target_modules": [
 
 
28
  "q_proj",
 
29
  "v_proj",
30
- "gate_proj",
31
- "o_proj",
32
- "k_proj"
33
  ],
34
  "target_parameters": null,
35
  "task_type": "CAUSAL_LM",
 
1
  {
2
+ "alora_invocation_tokens": null,
3
  "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": {
6
+ "base_model_class": "Gemma3ForCausalLM",
7
+ "parent_library": "transformers.models.gemma3.modeling_gemma3",
8
+ "unsloth_fixed": true
9
+ },
10
+ "base_model_name_or_path": "unsloth/gemma-3-270m-it",
11
  "bias": "none",
12
  "corda_config": null,
13
+ "ensure_weight_tying": false,
14
  "eva_config": null,
15
  "exclude_modules": null,
16
  "fan_in_fan_out": false,
 
20
  "layers_pattern": null,
21
  "layers_to_transform": null,
22
  "loftq_config": {},
23
+ "lora_alpha": 128,
24
  "lora_bias": false,
25
+ "lora_dropout": 0,
26
  "megatron_config": null,
27
  "megatron_core": "megatron.core",
28
  "modules_to_save": null,
29
  "peft_type": "LORA",
30
+ "peft_version": "0.18.0",
31
  "qalora_group_size": 16,
32
+ "r": 128,
33
  "rank_pattern": {},
34
  "revision": null,
35
  "target_modules": [
36
+ "gate_proj",
37
+ "down_proj",
38
  "q_proj",
39
+ "up_proj",
40
  "v_proj",
41
+ "k_proj",
42
+ "o_proj"
 
43
  ],
44
  "target_parameters": null,
45
  "task_type": "CAUSAL_LM",
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f3f02ce2969b822edb0f634b027fef3667fe0298d57ce3cfba92247d0fd986d
3
- size 46179856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6746462774e23107b542a10bfff42d76f63074a2a3c7423b8d667db9683c6775
3
+ size 121537408
chat_template.jinja CHANGED
@@ -1,87 +1,47 @@
1
- {%- if messages[0]["role"] == "system" %}
2
- {%- set system_message = messages[0]["content"] %}
3
- {%- set loop_messages = messages[1:] %}
4
- {%- else %}
5
- {%- set loop_messages = messages %}
6
- {%- endif %}
7
- {%- if not tools is defined %}
8
- {%- set tools = none %}
9
- {%- endif %}
10
- {%- set user_messages = loop_messages | selectattr("role", "equalto", "user") | list %}
11
 
12
- {#- This block checks for alternating user/assistant messages, skipping tool calling messages #}
13
- {%- set ns = namespace() %}
14
- {%- set ns.index = 0 %}
15
- {%- for message in loop_messages %}
16
- {%- if not (message.role == "tool" or message.role == "tool_results" or (message.tool_calls is defined and message.tool_calls is not none)) %}
17
- {%- if (message["role"] == "user") != (ns.index % 2 == 0) %}
18
- {{- raise_exception("After the optional system message, conversation roles must alternate user/assistant/user/assistant/...") }}
19
- {%- endif %}
20
- {%- set ns.index = ns.index + 1 %}
21
- {%- endif %}
22
- {%- endfor %}
23
 
24
- {{- bos_token }}
25
- {%- for message in loop_messages %}
26
- {%- if message["role"] == "user" %}
27
- {%- if tools is not none and (message == user_messages[-1]) %}
28
- {{- "[AVAILABLE_TOOLS] [" }}
29
- {%- for tool in tools %}
30
- {%- set tool = tool.function %}
31
- {{- '{"type": "function", "function": {' }}
32
- {%- for key, val in tool.items() if key != "return" %}
33
- {%- if val is string %}
34
- {{- '"' + key + '": "' + val + '"' }}
35
- {%- else %}
36
- {{- '"' + key + '": ' + val|tojson }}
37
- {%- endif %}
38
- {%- if not loop.last %}
39
- {{- ", " }}
40
- {%- endif %}
41
- {%- endfor %}
42
- {{- "}}" }}
43
- {%- if not loop.last %}
44
- {{- ", " }}
45
- {%- else %}
46
- {{- "]" }}
47
- {%- endif %}
48
- {%- endfor %}
49
- {{- "[/AVAILABLE_TOOLS]" }}
50
- {%- endif %}
51
- {%- if loop.last and system_message is defined %}
52
- {{- "[INST] " + system_message + "\n\n" + message["content"] + "[/INST]" }}
53
- {%- else %}
54
- {{- "[INST] " + message["content"] + "[/INST]" }}
55
- {%- endif %}
56
- {%- elif message.tool_calls is defined and message.tool_calls is not none %}
57
- {{- "[TOOL_CALLS] [" }}
58
- {%- for tool_call in message.tool_calls %}
59
- {%- set out = tool_call.function|tojson %}
60
- {{- out[:-1] }}
61
- {%- if not tool_call.id is defined or tool_call.id|length != 9 %}
62
- {{- raise_exception("Tool call IDs should be alphanumeric strings with length 9!") }}
63
- {%- endif %}
64
- {{- ', "id": "' + tool_call.id + '"}' }}
65
- {%- if not loop.last %}
66
- {{- ", " }}
67
- {%- else %}
68
- {{- "]" + eos_token }}
69
- {%- endif %}
70
- {%- endfor %}
71
- {%- elif message["role"] == "assistant" %}
72
- {{- " " + message["content"]|trim + eos_token}}
73
- {%- elif message["role"] == "tool_results" or message["role"] == "tool" %}
74
- {%- if message.content is defined and message.content.content is defined %}
75
- {%- set content = message.content.content %}
76
- {%- else %}
77
- {%- set content = message.content %}
78
- {%- endif %}
79
- {{- '[TOOL_RESULTS] {"content": ' + content|string + ", " }}
80
- {%- if not message.tool_call_id is defined or message.tool_call_id|length != 9 %}
81
- {{- raise_exception("Tool call IDs should be alphanumeric strings with length 9!") }}
82
- {%- endif %}
83
- {{- '"call_id": "' + message.tool_call_id + '"}[/TOOL_RESULTS]' }}
84
- {%- else %}
85
- {{- raise_exception("Only user and assistant roles are supported, with the exception of an initial optional system message!") }}
86
- {%- endif %}
87
- {%- endfor %}
 
1
+ {{ bos_token }}
2
+ {%- if messages[0]['role'] == 'system' -%}
3
+ {%- if messages[0]['content'] is string -%}
4
+ {%- set first_user_prefix = messages[0]['content'] + '
 
 
 
 
 
 
5
 
6
+ ' -%}
7
+ {%- else -%}
8
+ {%- set first_user_prefix = messages[0]['content'][0]['text'] + '
 
 
 
 
 
 
 
 
9
 
10
+ ' -%}
11
+ {%- endif -%}
12
+ {%- set loop_messages = messages[1:] -%}
13
+ {%- else -%}
14
+ {%- set first_user_prefix = "" -%}
15
+ {%- set loop_messages = messages -%}
16
+ {%- endif -%}
17
+ {%- for message in loop_messages -%}
18
+ {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
19
+ {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
20
+ {%- endif -%}
21
+ {%- if (message['role'] == 'assistant') -%}
22
+ {%- set role = "model" -%}
23
+ {%- else -%}
24
+ {%- set role = message['role'] -%}
25
+ {%- endif -%}
26
+ {{ '<start_of_turn>' + role + '
27
+ ' + (first_user_prefix if loop.first else "") }}
28
+ {%- if message['content'] is string -%}
29
+ {{ message['content'] | trim }}
30
+ {%- elif message['content'] is iterable -%}
31
+ {%- for item in message['content'] -%}
32
+ {%- if item['type'] == 'image' -%}
33
+ {{ '<start_of_image>' }}
34
+ {%- elif item['type'] == 'text' -%}
35
+ {{ item['text'] | trim }}
36
+ {%- endif -%}
37
+ {%- endfor -%}
38
+ {%- else -%}
39
+ {{ raise_exception("Invalid content type") }}
40
+ {%- endif -%}
41
+ {{ '<end_of_turn>
42
+ ' }}
43
+ {%- endfor -%}
44
+ {%- if add_generation_prompt -%}
45
+ {{ '<start_of_turn>model
46
+ ' }}
47
+ {%- endif -%}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
special_tokens_map.json CHANGED
@@ -1,13 +1,23 @@
1
  {
 
2
  "bos_token": {
3
- "content": "<s>",
4
  "lstrip": false,
5
  "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
 
9
  "eos_token": {
10
- "content": "</s>",
 
 
 
 
 
 
 
 
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
 
1
  {
2
+ "boi_token": "<start_of_image>",
3
  "bos_token": {
4
+ "content": "<bos>",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false
9
  },
10
+ "eoi_token": "<end_of_image>",
11
  "eos_token": {
12
+ "content": "<end_of_turn>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false
17
+ },
18
+ "image_token": "<image_soft_token>",
19
+ "pad_token": {
20
+ "content": "<pad>",
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60c3fc985cbfedcb429d05994efe548bdfecd6a00226fcdc8380c36fd894a3be
3
- size 3671968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4667f2089529e8e7657cfb6d1c19910ae71ff5f28aa7ab2ff2763330affad795
3
+ size 33384568
tokenizer.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37f00374dea48658ee8f5d0f21895b9bc55cb0103939607c8185bfd1c6ca1f89
3
- size 587404
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
3
+ size 4689074
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:efcae7f9c8894a2eaa0775868e0f57932afe307f1981e5f433ceab68652e9f28
3
- size 6161
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6052cd962b5b0eacab65966b80bc5dac0bfee4b3f6b7ea0ce7c0d5a0a37f6ed
3
+ size 6225