TymofiiNasobko commited on
Commit
a287318
·
verified ·
1 Parent(s): c6b79b3

TymofiiNasobko/Mamay-function-calling-no-thinking

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: INSAIT-Institute/MamayLM-Gemma-3-4B-IT-v1.0
3
+ library_name: transformers
4
+ model_name: Mamay-function-calling-no-thinking
5
+ tags:
6
+ - generated_from_trainer
7
+ - sft
8
+ - trl
9
+ licence: license
10
+ ---
11
+
12
+ # Model Card for Mamay-function-calling-no-thinking
13
+
14
+ This model is a fine-tuned version of [INSAIT-Institute/MamayLM-Gemma-3-4B-IT-v1.0](https://huggingface.co/INSAIT-Institute/MamayLM-Gemma-3-4B-IT-v1.0).
15
+ It has been trained using [TRL](https://github.com/huggingface/trl).
16
+
17
+ ## Quick start
18
+
19
+ ```python
20
+ from transformers import pipeline
21
+
22
+ question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
23
+ generator = pipeline("text-generation", model="TymofiiNasobko/Mamay-function-calling-no-thinking", device="cuda")
24
+ output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
25
+ print(output["generated_text"])
26
+ ```
27
+
28
+ ## Training procedure
29
+
30
+
31
+
32
+
33
+ This model was trained with SFT.
34
+
35
+ ### Framework versions
36
+
37
+ - TRL: 0.26.2
38
+ - Transformers: 4.57.4
39
+ - Pytorch: 2.9.0+cu126
40
+ - Datasets: 4.5.0
41
+ - Tokenizers: 0.22.2
42
+
43
+ ## Citations
44
+
45
+
46
+
47
+ Cite TRL as:
48
+
49
+ ```bibtex
50
+ @misc{vonwerra2022trl,
51
+ title = {{TRL: Transformer Reinforcement Learning}},
52
+ author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec},
53
+ year = 2020,
54
+ journal = {GitHub repository},
55
+ publisher = {GitHub},
56
+ howpublished = {\url{https://github.com/huggingface/trl}}
57
+ }
58
+ ```
adapter_config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "INSAIT-Institute/MamayLM-Gemma-3-4B-IT-v1.0",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 64,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.0",
27
+ "qalora_group_size": 16,
28
+ "r": 32,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "up_proj",
33
+ "v_proj",
34
+ "fc2",
35
+ "gate_proj",
36
+ "down_proj",
37
+ "k_proj",
38
+ "fc1",
39
+ "o_proj",
40
+ "q_proj",
41
+ "out_proj"
42
+ ],
43
+ "target_parameters": null,
44
+ "task_type": "CAUSAL_LM",
45
+ "trainable_token_indices": null,
46
+ "use_dora": false,
47
+ "use_qalora": false,
48
+ "use_rslora": false
49
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe142e44fb0c5f04f5fcfebd4e31e023d37f9242352f5fb3977351665dc2b4d4
3
+ size 154117112
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<image_soft_token>": 262144
3
+ }
chat_template.jinja ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{ bos_token }}
2
+ {%- if tools -%}
3
+ {%- set loop_messages = messages[1:] -%}
4
+ {{- '<start_of_turn>' + 'system' + '
5
+ ' -}}
6
+ {%- if messages[0]['role'] == "system" -%}
7
+ {%- if messages[0]['content'] is string -%}
8
+ {{- messages[0]['content'] + '
9
+ ' + '
10
+ ' -}}
11
+ {%- else -%}
12
+ {{- messages[0]['content'][0]['text'] + '
13
+ ' + '
14
+ ' -}}
15
+ {%- endif -%}
16
+ {%- endif -%}
17
+ {{- "# Tools
18
+
19
+ You may call one or more functions to assist with the user query.
20
+
21
+ You are provided with function signatures within <tools></tools> XML tags:
22
+ <tools>" -}}
23
+ {%- for tool in tools -%}
24
+ {{- "
25
+ " -}}
26
+ {{- tool | tojson -}}
27
+ {%- endfor -%}
28
+ {{- '<end_of_turn>' + '
29
+ ' -}}
30
+ {%- elif messages[0]['role'] == "system" -%}
31
+ {%- set loop_messages = messages[1:] -%}
32
+ {{- '<start_of_turn>' + 'system' + '
33
+ ' -}}
34
+ {%- if messages[0]['content'] is string -%}
35
+ {{- messages[0]['content'] -}}
36
+ {%- else -%}
37
+ {{- messages[0]['content'][0]['text'] -}}
38
+ {%- endif -%}
39
+ {{- '<end_of_turn>
40
+ ' }}
41
+ {%- else -%}
42
+ {%- set loop_messages = messages -%}
43
+ {%- endif -%}
44
+ {%- for message in loop_messages -%}
45
+ {%- if message['role'] == 'assistant' or message['role'] == 'model' -%}
46
+ {%- set role = "model" -%}
47
+ {%- else -%}
48
+ {%- set role = message['role'] -%}
49
+ {%- endif -%}
50
+ {{ '<start_of_turn>' + role + '
51
+ ' }}
52
+ {%- if message['content'] is string -%}
53
+ {%- if role == "model" -%}
54
+ {% generation %}
55
+ {{- message['content'] | trim -}}
56
+ {% if message['tool_calls'] %}
57
+ {% for tool_call in message['tool_calls'] %}
58
+ {% if tool_call['function'] is defined %}
59
+ {% set tool_call = tool_call['function'] %}
60
+ {% endif %}
61
+ {{- "
62
+ " -}}
63
+ <tool_call>
64
+ {"name": "{{ tool_call['name'] }}", "arguments": {{ tool_call['arguments'] | tojson }}}
65
+ </tool_call>
66
+ {%- endfor -%}
67
+ {%- endif -%}
68
+ {% endgeneration %}
69
+ {%- else -%}
70
+ {{ message['content'] | trim }}
71
+ {%- endif -%}
72
+ {%- elif message['content'] is iterable -%}
73
+ {%- for item in message['content'] -%}
74
+ {%- if item['type'] == 'image' -%}
75
+ {{ '<start_of_image>' }}
76
+ {%- elif item['type'] == 'text' -%}
77
+ {%- if role == "model" -%}
78
+ {% generation %}
79
+ {{- item['text'] | trim -}}
80
+ {% if message['tool_calls'] %}
81
+ {% for tool_call in message['tool_calls'] %}
82
+ {% if tool_call['function'] is defined %}
83
+ {% set tool_call = tool_call['function'] %}
84
+ {% endif %}
85
+ {{- "
86
+ " -}}
87
+ <tool_call>
88
+ {"name": "{{ tool_call['name'] }}", "arguments": {{ tool_call['arguments'] | tojson }}}
89
+ </tool_call>
90
+ {%- endfor -%}
91
+ {%- endif -%}
92
+ {% endgeneration %}
93
+ {%- else -%}
94
+ {{ item['text'] | trim }}
95
+ {%- endif -%}
96
+ {%- endif -%}
97
+ {%- endfor -%}
98
+ {%- else -%}
99
+ {{ raise_exception("Invalid content type") }}
100
+ {%- endif -%}
101
+ {%- if role == "model" -%}
102
+ {% generation %}
103
+ {{ '<end_of_turn>
104
+ ' }}{% endgeneration %}
105
+ {%- else -%}
106
+ {{ '<end_of_turn>
107
+ ' }}
108
+ {%- endif -%}
109
+ {%- endfor -%}
110
+ {%- if add_generation_prompt -%}
111
+ {{'<start_of_turn>model
112
+ '}}
113
+ {%- endif -%}
special_tokens_map.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "boi_token": "<start_of_image>",
3
+ "bos_token": {
4
+ "content": "<bos>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ "eoi_token": "<end_of_image>",
11
+ "eos_token": "<end_of_turn>",
12
+ "image_token": "<image_soft_token>",
13
+ "pad_token": {
14
+ "content": "<pad>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "unk_token": {
21
+ "content": "<unk>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ }
27
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4667f2089529e8e7657cfb6d1c19910ae71ff5f28aa7ab2ff2763330affad795
3
+ size 33384568
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
3
+ size 4689074
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e951d7a26ab64624a9411db3f91fdab169318cdd66e3cbaaa5b2195bd87981f4
3
+ size 6289