muhammedbasilsk commited on
Commit
c8d64c8
·
verified ·
1 Parent(s): 9c7f8bf

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +6 -0
  2. README.md +61 -3
  3. adapter_config.json +46 -0
  4. adapter_model.safetensors +3 -0
  5. chat_template.jinja +279 -0
  6. checkpoint-16/README.md +209 -0
  7. checkpoint-16/adapter_config.json +46 -0
  8. checkpoint-16/adapter_model.safetensors +3 -0
  9. checkpoint-16/chat_template.jinja +279 -0
  10. checkpoint-16/optimizer.pt +3 -0
  11. checkpoint-16/rng_state.pth +3 -0
  12. checkpoint-16/scaler.pt +3 -0
  13. checkpoint-16/scheduler.pt +3 -0
  14. checkpoint-16/tokenizer.json +3 -0
  15. checkpoint-16/tokenizer_config.json +26 -0
  16. checkpoint-16/trainer_state.json +194 -0
  17. checkpoint-16/training_args.bin +3 -0
  18. checkpoint-24/README.md +209 -0
  19. checkpoint-24/adapter_config.json +46 -0
  20. checkpoint-24/adapter_model.safetensors +3 -0
  21. checkpoint-24/chat_template.jinja +279 -0
  22. checkpoint-24/optimizer.pt +3 -0
  23. checkpoint-24/rng_state.pth +3 -0
  24. checkpoint-24/scaler.pt +3 -0
  25. checkpoint-24/scheduler.pt +3 -0
  26. checkpoint-24/tokenizer.json +3 -0
  27. checkpoint-24/tokenizer_config.json +26 -0
  28. checkpoint-24/trainer_state.json +274 -0
  29. checkpoint-24/training_args.bin +3 -0
  30. checkpoint-32/README.md +209 -0
  31. checkpoint-32/adapter_config.json +46 -0
  32. checkpoint-32/adapter_model.safetensors +3 -0
  33. checkpoint-32/chat_template.jinja +279 -0
  34. checkpoint-32/optimizer.pt +3 -0
  35. checkpoint-32/rng_state.pth +3 -0
  36. checkpoint-32/scaler.pt +3 -0
  37. checkpoint-32/scheduler.pt +3 -0
  38. checkpoint-32/tokenizer.json +3 -0
  39. checkpoint-32/tokenizer_config.json +26 -0
  40. checkpoint-32/trainer_state.json +354 -0
  41. checkpoint-32/training_args.bin +3 -0
  42. checkpoint-40/README.md +209 -0
  43. checkpoint-40/adapter_config.json +46 -0
  44. checkpoint-40/adapter_model.safetensors +3 -0
  45. checkpoint-40/chat_template.jinja +279 -0
  46. checkpoint-40/optimizer.pt +3 -0
  47. checkpoint-40/rng_state.pth +3 -0
  48. checkpoint-40/scaler.pt +3 -0
  49. checkpoint-40/scheduler.pt +3 -0
  50. checkpoint-40/tokenizer.json +3 -0
.gitattributes CHANGED
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ checkpoint-16/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ checkpoint-24/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ checkpoint-32/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
+ checkpoint-40/tokenizer.json filter=lfs diff=lfs merge=lfs -text
40
+ checkpoint-8/tokenizer.json filter=lfs diff=lfs merge=lfs -text
41
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,61 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: google/functiongemma-270m-it
3
+ library_name: peft
4
+ model_name: session_756892ca
5
+ tags:
6
+ - base_model:adapter:google/functiongemma-270m-it
7
+ - lora
8
+ - sft
9
+ - transformers
10
+ - trl
11
+ licence: license
12
+ pipeline_tag: text-generation
13
+ ---
14
+
15
+ # Model Card for session_756892ca
16
+
17
+ This model is a fine-tuned version of [google/functiongemma-270m-it](https://huggingface.co/google/functiongemma-270m-it).
18
+ It has been trained using [TRL](https://github.com/huggingface/trl).
19
+
20
+ ## Quick start
21
+
22
+ ```python
23
+ from transformers import pipeline
24
+
25
+ question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
26
+ generator = pipeline("text-generation", model="None", device="cuda")
27
+ output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
28
+ print(output["generated_text"])
29
+ ```
30
+
31
+ ## Training procedure
32
+
33
+
34
+
35
+
36
+ This model was trained with SFT.
37
+
38
+ ### Framework versions
39
+
40
+ - PEFT 0.18.1
41
+ - TRL: 0.28.0
42
+ - Transformers: 5.2.0
43
+ - Pytorch: 2.5.0a0+872d972e41.nv24.8
44
+ - Datasets: 4.5.0
45
+ - Tokenizers: 0.22.2
46
+
47
+ ## Citations
48
+
49
+
50
+
51
+ Cite TRL as:
52
+
53
+ ```bibtex
54
+ @software{vonwerra2020trl,
55
+ title = {{TRL: Transformers Reinforcement Learning}},
56
+ author = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin},
57
+ license = {Apache-2.0},
58
+ url = {https://github.com/huggingface/trl},
59
+ year = {2020}
60
+ }
61
+ ```
adapter_config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "google/functiongemma-270m-it",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 16,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.1",
27
+ "qalora_group_size": 16,
28
+ "r": 8,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "o_proj",
33
+ "down_proj",
34
+ "k_proj",
35
+ "gate_proj",
36
+ "q_proj",
37
+ "v_proj",
38
+ "up_proj"
39
+ ],
40
+ "target_parameters": null,
41
+ "task_type": "CAUSAL_LM",
42
+ "trainable_token_indices": null,
43
+ "use_dora": false,
44
+ "use_qalora": false,
45
+ "use_rslora": false
46
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fbe05ace61c76eb333d8758f388bf2e0fa09459ea940ba63d3325df91f5f661
3
+ size 7626520
chat_template.jinja ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- macro format_parameters(properties, required) -%}
2
+ {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
3
+ {%- set ns = namespace(found_first=false) -%}
4
+ {%- for key, value in properties | dictsort -%}
5
+ {%- if key not in standard_keys -%}
6
+ {%- if ns.found_first %},{% endif -%}
7
+ {%- set ns.found_first = true -%}
8
+ {{- key }}:{description:<escape>{{ value['description'] }}<escape>
9
+ {%- if value['type'] | upper == 'STRING' -%}
10
+ {%- if value['enum'] -%}
11
+ ,enum:{{ format_argument(value['enum']) }}
12
+ {%- endif -%}
13
+ {%- elif value['type'] | upper == 'OBJECT' -%}
14
+ ,properties:{
15
+ {%- if value['properties'] is defined and value['properties'] is mapping -%}
16
+ {{- format_parameters(value['properties'], value['required'] | default([])) -}}
17
+ {%- elif value is mapping -%}
18
+ {{- format_parameters(value, value['required'] | default([])) -}}
19
+ {%- endif -%}
20
+ }
21
+ {%- if value['required'] -%}
22
+ ,required:[
23
+ {%- for item in value['required'] | default([]) -%}
24
+ <escape>{{- item -}}<escape>
25
+ {%- if not loop.last %},{% endif -%}
26
+ {%- endfor -%}
27
+ ]
28
+ {%- endif -%}
29
+ {%- elif value['type'] | upper == 'ARRAY' -%}
30
+ {%- if value['items'] is mapping and value['items'] -%}
31
+ ,items:{
32
+ {%- set ns_items = namespace(found_first=false) -%}
33
+ {%- for item_key, item_value in value['items'] | dictsort -%}
34
+ {%- if item_value is not none -%}
35
+ {%- if ns_items.found_first %},{% endif -%}
36
+ {%- set ns_items.found_first = true -%}
37
+ {%- if item_key == 'properties' -%}
38
+ properties:{
39
+ {%- if item_value is mapping -%}
40
+ {{- format_parameters(item_value, value['items']['required'] | default([])) -}}
41
+ {%- endif -%}
42
+ }
43
+ {%- elif item_key == 'required' -%}
44
+ required:[
45
+ {%- for req_item in item_value -%}
46
+ <escape>{{- req_item -}}<escape>
47
+ {%- if not loop.last %},{% endif -%}
48
+ {%- endfor -%}
49
+ ]
50
+ {%- elif item_key == 'type' -%}
51
+ {%- if item_value is string -%}
52
+ type:{{ format_argument(item_value | upper) }}
53
+ {%- else -%}
54
+ type:{{ format_argument(item_value | map('upper') | list) }}
55
+ {%- endif -%}
56
+ {%- else -%}
57
+ {{ item_key }}:{{ format_argument(item_value) }}
58
+ {%- endif -%}
59
+ {%- endif -%}
60
+ {%- endfor -%}
61
+ }
62
+ {%- endif -%}
63
+ {%- endif -%}
64
+ ,type:<escape>{{ value['type'] | upper }}<escape>}
65
+ {%- endif -%}
66
+ {%- endfor -%}
67
+ {%- endmacro -%}
68
+ {% macro format_function_declaration(tool_data) -%}
69
+ declaration:{{- tool_data['function']['name'] -}}
70
+ {description:<escape>{{- tool_data['function']['description'] -}}<escape>
71
+ {%- set params = tool_data['function']['parameters'] -%}
72
+ {%- if params -%}
73
+ ,parameters:{
74
+ {%- if params['properties'] -%}
75
+ properties:{ {{- format_parameters(params['properties'], params['required']) -}} },
76
+ {%- endif -%}
77
+ {%- if params['required'] -%}
78
+ required:[
79
+ {%- for item in params['required'] -%}
80
+ <escape>{{- item -}}<escape>
81
+ {{- ',' if not loop.last -}}
82
+ {%- endfor -%}
83
+ ],
84
+ {%- endif -%}
85
+ {%- if params['type'] -%}
86
+ type:<escape>{{- params['type'] | upper -}}<escape>}
87
+ {%- endif -%}
88
+ {%- endif -%}
89
+ }
90
+ {%- endmacro -%}
91
+ {% macro format_argument(argument, escape_keys=True) -%}
92
+ {%- if argument is string -%}
93
+ {{- '<escape>' + argument + '<escape>' -}}
94
+ {%- elif argument is boolean -%}
95
+ {%- if argument -%}
96
+ {{- 'true' -}}
97
+ {%- else -%}
98
+ {{- 'false' -}}
99
+ {%- endif -%}
100
+ {%- elif argument is mapping -%}
101
+ {{- '{' -}}
102
+ {%- set ns = namespace(found_first=false) -%}
103
+ {%- for key, value in argument | dictsort -%}
104
+ {%- if ns.found_first %},{% endif -%}
105
+ {%- set ns.found_first = true -%}
106
+ {%- if escape_keys -%}
107
+ {{- '<escape>' + key + '<escape>' -}}
108
+ {%- else -%}
109
+ {{- key -}}
110
+ {%- endif -%}
111
+ :{{- format_argument(value, escape_keys=escape_keys) -}}
112
+ {%- endfor -%}
113
+ {{- '}' -}}
114
+ {%- elif argument is sequence -%}
115
+ {{- '[' -}}
116
+ {%- for item in argument -%}
117
+ {{- format_argument(item, escape_keys=escape_keys) -}}
118
+ {%- if not loop.last %},{% endif -%}
119
+ {%- endfor -%}
120
+ {{- ']' -}}
121
+ {%- else -%}
122
+ {{- argument -}}
123
+ {%- endif -%}
124
+ {%- endmacro -%}
125
+ {{ bos_token }}
126
+ {%- set ns = namespace(prev_message_type=None) -%}
127
+ {#- Tool Declarations -#}
128
+ {%- set loop_messages = messages -%}
129
+ {%- if tools or messages[0]['role'] == 'system' or messages[0]['role'] == 'developer' -%}
130
+ {{- '<start_of_turn>developer\n' -}}
131
+ {%- if messages[0]['role'] == 'system' or messages[0]['role'] == 'developer' -%}
132
+ {%- if messages[0]['content'] is string -%}
133
+ {{- messages[0]['content'] | trim -}}
134
+ {%- elif messages[0]['content'] is sequence -%}
135
+ {%- for item in messages[0]['content'] -%}
136
+ {%- if item['type'] == 'text' -%}
137
+ {{- item['text'] | trim -}}
138
+ {%- endif -%}
139
+ {%- endfor -%}
140
+ {%- endif -%}
141
+ {%- set loop_messages = messages[1:] -%}
142
+ {%- endif -%}
143
+ {%- if tools -%}
144
+ {%- for tool in tools %}
145
+ {{- '<start_function_declaration>' -}}
146
+ {{- format_function_declaration(tool) | trim }}
147
+ {{- '<end_function_declaration>' -}}
148
+ {%- endfor %}
149
+ {%- endif -%}
150
+ {{- '<end_of_turn>\n' }}
151
+ {%- endif %}
152
+ {#- Loop through messages. -#}
153
+ {%- for message in loop_messages -%}
154
+ {%- if (message['role'] == 'assistant') -%}
155
+ {#- Rename "assistant" to "model". -#}
156
+ {%- set role = "model" -%}
157
+ {%- else -%}
158
+ {%- set role = message['role'] -%}
159
+ {%- endif -%}
160
+ {%- if role != 'tool' -%}
161
+ {%- if ns.prev_message_type != 'tool_response' -%}
162
+ {{- '<start_of_turn>' + role + '\n' }}
163
+ {%- endif -%}
164
+ {%- set ns.prev_message_type = None -%}
165
+ {%- if 'content' in message and message['content'] is not none -%}
166
+ {%- if message['content'] is string -%}
167
+ {{ message['content'] | trim }}
168
+ {%- elif message['content'] is sequence -%}
169
+ {%- for item in message['content'] -%}
170
+ {%- if item['type'] == 'image' -%}
171
+ {{ '<start_of_image>' }}
172
+ {%- elif item['type'] == 'text' -%}
173
+ {{ item['text'] | trim }}
174
+ {%- endif -%}
175
+ {%- endfor -%}
176
+ {%- else -%}
177
+ {{ raise_exception("Invalid content type in user/assistant message") }}
178
+ {%- endif -%}
179
+ {%- set ns.prev_message_type = 'content' -%}
180
+ {%- endif -%}
181
+ {%- if 'tool_calls' in message and message['tool_calls'] and message['tool_calls'] is iterable -%}
182
+ {#- Tool Calls -#}
183
+ {%- for tool_call in message['tool_calls'] -%}
184
+ {% set function = tool_call['function'] %}
185
+ {{- '<start_function_call>call:' + function['name'] + '{' -}}
186
+ {%- if 'arguments' in function -%}
187
+ {%- if function['arguments'] is mapping -%}
188
+ {%- set ns = namespace(found_first=false) -%}
189
+ {%- for key, value in function['arguments'] | dictsort -%}
190
+ {%- if ns.found_first %},{% endif -%}
191
+ {%- set ns.found_first = true -%}
192
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
193
+ {%- endfor -%}
194
+ {%- elif function['arguments'] is string -%}
195
+ {# This handles string-JSON, just in case #}
196
+ {{ function['arguments'] }}
197
+ {%- endif %}
198
+ {%- endif -%}
199
+ {{- '}<end_function_call>' -}}
200
+ {%- endfor -%}
201
+ {%- if loop.last -%}
202
+ {{ '<start_function_response>' }}
203
+ {%- endif -%}
204
+ {%- set ns.prev_message_type = 'tool_call' -%}
205
+ {%- endif -%}
206
+ {%- else -%}
207
+ {#- Tool Responses -#}
208
+ {%- if 'content' in message and message['content'] -%}
209
+ {%- if message['content'] is mapping -%}
210
+ {%- if 'name' in message['content'] and 'response' in message['content'] -%}
211
+ {{ '<start_function_response>response:' + message['content']['name'] | trim + '{' }}
212
+ {%- set response_ns = namespace(found_first=false) -%}
213
+ {%- for key, value in message['content']['response'] | dictsort -%}
214
+ {%- if response_ns.found_first %},{% endif -%}
215
+ {%- set response_ns.found_first = true -%}
216
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
217
+ {%- endfor -%}
218
+ {{- '}<end_function_response>' -}}
219
+ {%- elif 'name' in message -%}
220
+ {{ '<start_function_response>response:' + message['name'] | trim + '{' }}
221
+ {%- set response_ns = namespace(found_first=false) -%}
222
+ {%- for key, value in message['content'] | dictsort -%}
223
+ {%- if response_ns.found_first %},{% endif -%}
224
+ {%- set response_ns.found_first = true -%}
225
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
226
+ {%- endfor -%}
227
+ {{- '}<end_function_response>' -}}
228
+ {%- else -%}
229
+ {{ raise_exception("Invalid tool response mapping: must contain 'name' and 'response' keys, or 'name' must be in the message.") }}
230
+ {%- endif -%}
231
+ {%- elif message['content'] is string -%}
232
+ {%- if 'name' in message -%}
233
+ {{ '<start_function_response>response:' + message['name'] | trim + '{value:' + format_argument(message['content'], escape_keys=False) + '}<end_function_response>' }}
234
+ {%- else -%}
235
+ {{ raise_exception("Invalid tool response: 'name' must be provided.") }}
236
+ {%- endif -%}
237
+ {%- elif message['content'] is sequence -%}
238
+ {%- for item in message['content'] -%}
239
+ {%- if item is mapping -%}
240
+ {%- if 'name' in item and 'response' in item -%}
241
+ {{ '<start_function_response>response:' + item['name'] | trim + '{' }}
242
+ {%- set response_ns = namespace(found_first=false) -%}
243
+ {%- for key, value in item['response'] | dictsort -%}
244
+ {%- if response_ns.found_first %},{% endif -%}
245
+ {%- set response_ns.found_first = true -%}
246
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
247
+ {%- endfor -%}
248
+ {{- '}<end_function_response>' -}}
249
+ {%- elif 'name' in message -%}
250
+ {{ '<start_function_response>response:' + message['name'] | trim + '{' }}
251
+ {%- set response_ns = namespace(found_first=false) -%}
252
+ {%- for key, value in item | dictsort -%}
253
+ {%- if response_ns.found_first %},{% endif -%}
254
+ {%- set response_ns.found_first = true -%}
255
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
256
+ {%- endfor -%}
257
+ {{- '}<end_function_response>' -}}
258
+ {%- else -%}
259
+ {{ raise_exception("Invalid tool response mapping: must contain 'name' and 'response' keys, or 'name' must be in the message.") }}
260
+ {%- endif -%}
261
+ {%- else -%}
262
+ {{ raise_exception("Invalid tool response message: multiple responses must all be mappings") }}
263
+ {%- endif -%}
264
+ {%- endfor -%}
265
+ {%- else -%}
266
+ {{ raise_exception("Invalid content type in tool message: must be mapping, sequence of mappings, or string.") }}
267
+ {%- endif -%}
268
+ {%- endif -%}
269
+ {%- set ns.prev_message_type = 'tool_response' -%}
270
+ {%- endif -%}
271
+ {%- if ns.prev_message_type not in ['tool_call', 'tool_response'] -%}
272
+ {{ '<end_of_turn>\n' }}
273
+ {%- endif -%}
274
+ {%- endfor -%}
275
+ {%- if add_generation_prompt -%}
276
+ {%- if ns.prev_message_type != 'tool_response' -%}
277
+ {{- '<start_of_turn>model\n' -}}
278
+ {%- endif -%}
279
+ {%- endif -%}
checkpoint-16/README.md ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: google/functiongemma-270m-it
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:google/functiongemma-270m-it
7
+ - lora
8
+ - sft
9
+ - transformers
10
+ - trl
11
+ ---
12
+
13
+ # Model Card for Model ID
14
+
15
+ <!-- Provide a quick summary of what the model is/does. -->
16
+
17
+
18
+
19
+ ## Model Details
20
+
21
+ ### Model Description
22
+
23
+ <!-- Provide a longer summary of what this model is. -->
24
+
25
+
26
+
27
+ - **Developed by:** [More Information Needed]
28
+ - **Funded by [optional]:** [More Information Needed]
29
+ - **Shared by [optional]:** [More Information Needed]
30
+ - **Model type:** [More Information Needed]
31
+ - **Language(s) (NLP):** [More Information Needed]
32
+ - **License:** [More Information Needed]
33
+ - **Finetuned from model [optional]:** [More Information Needed]
34
+
35
+ ### Model Sources [optional]
36
+
37
+ <!-- Provide the basic links for the model. -->
38
+
39
+ - **Repository:** [More Information Needed]
40
+ - **Paper [optional]:** [More Information Needed]
41
+ - **Demo [optional]:** [More Information Needed]
42
+
43
+ ## Uses
44
+
45
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
46
+
47
+ ### Direct Use
48
+
49
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
50
+
51
+ [More Information Needed]
52
+
53
+ ### Downstream Use [optional]
54
+
55
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
56
+
57
+ [More Information Needed]
58
+
59
+ ### Out-of-Scope Use
60
+
61
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
62
+
63
+ [More Information Needed]
64
+
65
+ ## Bias, Risks, and Limitations
66
+
67
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
68
+
69
+ [More Information Needed]
70
+
71
+ ### Recommendations
72
+
73
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
74
+
75
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
76
+
77
+ ## How to Get Started with the Model
78
+
79
+ Use the code below to get started with the model.
80
+
81
+ [More Information Needed]
82
+
83
+ ## Training Details
84
+
85
+ ### Training Data
86
+
87
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
88
+
89
+ [More Information Needed]
90
+
91
+ ### Training Procedure
92
+
93
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
94
+
95
+ #### Preprocessing [optional]
96
+
97
+ [More Information Needed]
98
+
99
+
100
+ #### Training Hyperparameters
101
+
102
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
103
+
104
+ #### Speeds, Sizes, Times [optional]
105
+
106
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
107
+
108
+ [More Information Needed]
109
+
110
+ ## Evaluation
111
+
112
+ <!-- This section describes the evaluation protocols and provides the results. -->
113
+
114
+ ### Testing Data, Factors & Metrics
115
+
116
+ #### Testing Data
117
+
118
+ <!-- This should link to a Dataset Card if possible. -->
119
+
120
+ [More Information Needed]
121
+
122
+ #### Factors
123
+
124
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
125
+
126
+ [More Information Needed]
127
+
128
+ #### Metrics
129
+
130
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
131
+
132
+ [More Information Needed]
133
+
134
+ ### Results
135
+
136
+ [More Information Needed]
137
+
138
+ #### Summary
139
+
140
+
141
+
142
+ ## Model Examination [optional]
143
+
144
+ <!-- Relevant interpretability work for the model goes here -->
145
+
146
+ [More Information Needed]
147
+
148
+ ## Environmental Impact
149
+
150
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
151
+
152
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
153
+
154
+ - **Hardware Type:** [More Information Needed]
155
+ - **Hours used:** [More Information Needed]
156
+ - **Cloud Provider:** [More Information Needed]
157
+ - **Compute Region:** [More Information Needed]
158
+ - **Carbon Emitted:** [More Information Needed]
159
+
160
+ ## Technical Specifications [optional]
161
+
162
+ ### Model Architecture and Objective
163
+
164
+ [More Information Needed]
165
+
166
+ ### Compute Infrastructure
167
+
168
+ [More Information Needed]
169
+
170
+ #### Hardware
171
+
172
+ [More Information Needed]
173
+
174
+ #### Software
175
+
176
+ [More Information Needed]
177
+
178
+ ## Citation [optional]
179
+
180
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
181
+
182
+ **BibTeX:**
183
+
184
+ [More Information Needed]
185
+
186
+ **APA:**
187
+
188
+ [More Information Needed]
189
+
190
+ ## Glossary [optional]
191
+
192
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
193
+
194
+ [More Information Needed]
195
+
196
+ ## More Information [optional]
197
+
198
+ [More Information Needed]
199
+
200
+ ## Model Card Authors [optional]
201
+
202
+ [More Information Needed]
203
+
204
+ ## Model Card Contact
205
+
206
+ [More Information Needed]
207
+ ### Framework versions
208
+
209
+ - PEFT 0.18.1
checkpoint-16/adapter_config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "google/functiongemma-270m-it",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 16,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.1",
27
+ "qalora_group_size": 16,
28
+ "r": 8,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "o_proj",
33
+ "down_proj",
34
+ "k_proj",
35
+ "gate_proj",
36
+ "q_proj",
37
+ "v_proj",
38
+ "up_proj"
39
+ ],
40
+ "target_parameters": null,
41
+ "task_type": "CAUSAL_LM",
42
+ "trainable_token_indices": null,
43
+ "use_dora": false,
44
+ "use_qalora": false,
45
+ "use_rslora": false
46
+ }
checkpoint-16/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fbe05ace61c76eb333d8758f388bf2e0fa09459ea940ba63d3325df91f5f661
3
+ size 7626520
checkpoint-16/chat_template.jinja ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- macro format_parameters(properties, required) -%}
2
+ {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
3
+ {%- set ns = namespace(found_first=false) -%}
4
+ {%- for key, value in properties | dictsort -%}
5
+ {%- if key not in standard_keys -%}
6
+ {%- if ns.found_first %},{% endif -%}
7
+ {%- set ns.found_first = true -%}
8
+ {{- key }}:{description:<escape>{{ value['description'] }}<escape>
9
+ {%- if value['type'] | upper == 'STRING' -%}
10
+ {%- if value['enum'] -%}
11
+ ,enum:{{ format_argument(value['enum']) }}
12
+ {%- endif -%}
13
+ {%- elif value['type'] | upper == 'OBJECT' -%}
14
+ ,properties:{
15
+ {%- if value['properties'] is defined and value['properties'] is mapping -%}
16
+ {{- format_parameters(value['properties'], value['required'] | default([])) -}}
17
+ {%- elif value is mapping -%}
18
+ {{- format_parameters(value, value['required'] | default([])) -}}
19
+ {%- endif -%}
20
+ }
21
+ {%- if value['required'] -%}
22
+ ,required:[
23
+ {%- for item in value['required'] | default([]) -%}
24
+ <escape>{{- item -}}<escape>
25
+ {%- if not loop.last %},{% endif -%}
26
+ {%- endfor -%}
27
+ ]
28
+ {%- endif -%}
29
+ {%- elif value['type'] | upper == 'ARRAY' -%}
30
+ {%- if value['items'] is mapping and value['items'] -%}
31
+ ,items:{
32
+ {%- set ns_items = namespace(found_first=false) -%}
33
+ {%- for item_key, item_value in value['items'] | dictsort -%}
34
+ {%- if item_value is not none -%}
35
+ {%- if ns_items.found_first %},{% endif -%}
36
+ {%- set ns_items.found_first = true -%}
37
+ {%- if item_key == 'properties' -%}
38
+ properties:{
39
+ {%- if item_value is mapping -%}
40
+ {{- format_parameters(item_value, value['items']['required'] | default([])) -}}
41
+ {%- endif -%}
42
+ }
43
+ {%- elif item_key == 'required' -%}
44
+ required:[
45
+ {%- for req_item in item_value -%}
46
+ <escape>{{- req_item -}}<escape>
47
+ {%- if not loop.last %},{% endif -%}
48
+ {%- endfor -%}
49
+ ]
50
+ {%- elif item_key == 'type' -%}
51
+ {%- if item_value is string -%}
52
+ type:{{ format_argument(item_value | upper) }}
53
+ {%- else -%}
54
+ type:{{ format_argument(item_value | map('upper') | list) }}
55
+ {%- endif -%}
56
+ {%- else -%}
57
+ {{ item_key }}:{{ format_argument(item_value) }}
58
+ {%- endif -%}
59
+ {%- endif -%}
60
+ {%- endfor -%}
61
+ }
62
+ {%- endif -%}
63
+ {%- endif -%}
64
+ ,type:<escape>{{ value['type'] | upper }}<escape>}
65
+ {%- endif -%}
66
+ {%- endfor -%}
67
+ {%- endmacro -%}
68
+ {% macro format_function_declaration(tool_data) -%}
69
+ declaration:{{- tool_data['function']['name'] -}}
70
+ {description:<escape>{{- tool_data['function']['description'] -}}<escape>
71
+ {%- set params = tool_data['function']['parameters'] -%}
72
+ {%- if params -%}
73
+ ,parameters:{
74
+ {%- if params['properties'] -%}
75
+ properties:{ {{- format_parameters(params['properties'], params['required']) -}} },
76
+ {%- endif -%}
77
+ {%- if params['required'] -%}
78
+ required:[
79
+ {%- for item in params['required'] -%}
80
+ <escape>{{- item -}}<escape>
81
+ {{- ',' if not loop.last -}}
82
+ {%- endfor -%}
83
+ ],
84
+ {%- endif -%}
85
+ {%- if params['type'] -%}
86
+ type:<escape>{{- params['type'] | upper -}}<escape>}
87
+ {%- endif -%}
88
+ {%- endif -%}
89
+ }
90
+ {%- endmacro -%}
91
+ {% macro format_argument(argument, escape_keys=True) -%}
92
+ {%- if argument is string -%}
93
+ {{- '<escape>' + argument + '<escape>' -}}
94
+ {%- elif argument is boolean -%}
95
+ {%- if argument -%}
96
+ {{- 'true' -}}
97
+ {%- else -%}
98
+ {{- 'false' -}}
99
+ {%- endif -%}
100
+ {%- elif argument is mapping -%}
101
+ {{- '{' -}}
102
+ {%- set ns = namespace(found_first=false) -%}
103
+ {%- for key, value in argument | dictsort -%}
104
+ {%- if ns.found_first %},{% endif -%}
105
+ {%- set ns.found_first = true -%}
106
+ {%- if escape_keys -%}
107
+ {{- '<escape>' + key + '<escape>' -}}
108
+ {%- else -%}
109
+ {{- key -}}
110
+ {%- endif -%}
111
+ :{{- format_argument(value, escape_keys=escape_keys) -}}
112
+ {%- endfor -%}
113
+ {{- '}' -}}
114
+ {%- elif argument is sequence -%}
115
+ {{- '[' -}}
116
+ {%- for item in argument -%}
117
+ {{- format_argument(item, escape_keys=escape_keys) -}}
118
+ {%- if not loop.last %},{% endif -%}
119
+ {%- endfor -%}
120
+ {{- ']' -}}
121
+ {%- else -%}
122
+ {{- argument -}}
123
+ {%- endif -%}
124
+ {%- endmacro -%}
125
+ {{ bos_token }}
126
+ {%- set ns = namespace(prev_message_type=None) -%}
127
+ {#- Tool Declarations -#}
128
+ {%- set loop_messages = messages -%}
129
+ {%- if tools or messages[0]['role'] == 'system' or messages[0]['role'] == 'developer' -%}
130
+ {{- '<start_of_turn>developer\n' -}}
131
+ {%- if messages[0]['role'] == 'system' or messages[0]['role'] == 'developer' -%}
132
+ {%- if messages[0]['content'] is string -%}
133
+ {{- messages[0]['content'] | trim -}}
134
+ {%- elif messages[0]['content'] is sequence -%}
135
+ {%- for item in messages[0]['content'] -%}
136
+ {%- if item['type'] == 'text' -%}
137
+ {{- item['text'] | trim -}}
138
+ {%- endif -%}
139
+ {%- endfor -%}
140
+ {%- endif -%}
141
+ {%- set loop_messages = messages[1:] -%}
142
+ {%- endif -%}
143
+ {%- if tools -%}
144
+ {%- for tool in tools %}
145
+ {{- '<start_function_declaration>' -}}
146
+ {{- format_function_declaration(tool) | trim }}
147
+ {{- '<end_function_declaration>' -}}
148
+ {%- endfor %}
149
+ {%- endif -%}
150
+ {{- '<end_of_turn>\n' }}
151
+ {%- endif %}
152
+ {#- Loop through messages. -#}
153
+ {%- for message in loop_messages -%}
154
+ {%- if (message['role'] == 'assistant') -%}
155
+ {#- Rename "assistant" to "model". -#}
156
+ {%- set role = "model" -%}
157
+ {%- else -%}
158
+ {%- set role = message['role'] -%}
159
+ {%- endif -%}
160
+ {%- if role != 'tool' -%}
161
+ {%- if ns.prev_message_type != 'tool_response' -%}
162
+ {{- '<start_of_turn>' + role + '\n' }}
163
+ {%- endif -%}
164
+ {%- set ns.prev_message_type = None -%}
165
+ {%- if 'content' in message and message['content'] is not none -%}
166
+ {%- if message['content'] is string -%}
167
+ {{ message['content'] | trim }}
168
+ {%- elif message['content'] is sequence -%}
169
+ {%- for item in message['content'] -%}
170
+ {%- if item['type'] == 'image' -%}
171
+ {{ '<start_of_image>' }}
172
+ {%- elif item['type'] == 'text' -%}
173
+ {{ item['text'] | trim }}
174
+ {%- endif -%}
175
+ {%- endfor -%}
176
+ {%- else -%}
177
+ {{ raise_exception("Invalid content type in user/assistant message") }}
178
+ {%- endif -%}
179
+ {%- set ns.prev_message_type = 'content' -%}
180
+ {%- endif -%}
181
+ {%- if 'tool_calls' in message and message['tool_calls'] and message['tool_calls'] is iterable -%}
182
+ {#- Tool Calls -#}
183
+ {%- for tool_call in message['tool_calls'] -%}
184
+ {% set function = tool_call['function'] %}
185
+ {{- '<start_function_call>call:' + function['name'] + '{' -}}
186
+ {%- if 'arguments' in function -%}
187
+ {%- if function['arguments'] is mapping -%}
188
+ {%- set ns = namespace(found_first=false) -%}
189
+ {%- for key, value in function['arguments'] | dictsort -%}
190
+ {%- if ns.found_first %},{% endif -%}
191
+ {%- set ns.found_first = true -%}
192
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
193
+ {%- endfor -%}
194
+ {%- elif function['arguments'] is string -%}
195
+ {# This handles string-JSON, just in case #}
196
+ {{ function['arguments'] }}
197
+ {%- endif %}
198
+ {%- endif -%}
199
+ {{- '}<end_function_call>' -}}
200
+ {%- endfor -%}
201
+ {%- if loop.last -%}
202
+ {{ '<start_function_response>' }}
203
+ {%- endif -%}
204
+ {%- set ns.prev_message_type = 'tool_call' -%}
205
+ {%- endif -%}
206
+ {%- else -%}
207
+ {#- Tool Responses -#}
208
+ {%- if 'content' in message and message['content'] -%}
209
+ {%- if message['content'] is mapping -%}
210
+ {%- if 'name' in message['content'] and 'response' in message['content'] -%}
211
+ {{ '<start_function_response>response:' + message['content']['name'] | trim + '{' }}
212
+ {%- set response_ns = namespace(found_first=false) -%}
213
+ {%- for key, value in message['content']['response'] | dictsort -%}
214
+ {%- if response_ns.found_first %},{% endif -%}
215
+ {%- set response_ns.found_first = true -%}
216
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
217
+ {%- endfor -%}
218
+ {{- '}<end_function_response>' -}}
219
+ {%- elif 'name' in message -%}
220
+ {{ '<start_function_response>response:' + message['name'] | trim + '{' }}
221
+ {%- set response_ns = namespace(found_first=false) -%}
222
+ {%- for key, value in message['content'] | dictsort -%}
223
+ {%- if response_ns.found_first %},{% endif -%}
224
+ {%- set response_ns.found_first = true -%}
225
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
226
+ {%- endfor -%}
227
+ {{- '}<end_function_response>' -}}
228
+ {%- else -%}
229
+ {{ raise_exception("Invalid tool response mapping: must contain 'name' and 'response' keys, or 'name' must be in the message.") }}
230
+ {%- endif -%}
231
+ {%- elif message['content'] is string -%}
232
+ {%- if 'name' in message -%}
233
+ {{ '<start_function_response>response:' + message['name'] | trim + '{value:' + format_argument(message['content'], escape_keys=False) + '}<end_function_response>' }}
234
+ {%- else -%}
235
+ {{ raise_exception("Invalid tool response: 'name' must be provided.") }}
236
+ {%- endif -%}
237
+ {%- elif message['content'] is sequence -%}
238
+ {%- for item in message['content'] -%}
239
+ {%- if item is mapping -%}
240
+ {%- if 'name' in item and 'response' in item -%}
241
+ {{ '<start_function_response>response:' + item['name'] | trim + '{' }}
242
+ {%- set response_ns = namespace(found_first=false) -%}
243
+ {%- for key, value in item['response'] | dictsort -%}
244
+ {%- if response_ns.found_first %},{% endif -%}
245
+ {%- set response_ns.found_first = true -%}
246
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
247
+ {%- endfor -%}
248
+ {{- '}<end_function_response>' -}}
249
+ {%- elif 'name' in message -%}
250
+ {{ '<start_function_response>response:' + message['name'] | trim + '{' }}
251
+ {%- set response_ns = namespace(found_first=false) -%}
252
+ {%- for key, value in item | dictsort -%}
253
+ {%- if response_ns.found_first %},{% endif -%}
254
+ {%- set response_ns.found_first = true -%}
255
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
256
+ {%- endfor -%}
257
+ {{- '}<end_function_response>' -}}
258
+ {%- else -%}
259
+ {{ raise_exception("Invalid tool response mapping: must contain 'name' and 'response' keys, or 'name' must be in the message.") }}
260
+ {%- endif -%}
261
+ {%- else -%}
262
+ {{ raise_exception("Invalid tool response message: multiple responses must all be mappings") }}
263
+ {%- endif -%}
264
+ {%- endfor -%}
265
+ {%- else -%}
266
+ {{ raise_exception("Invalid content type in tool message: must be mapping, sequence of mappings, or string.") }}
267
+ {%- endif -%}
268
+ {%- endif -%}
269
+ {%- set ns.prev_message_type = 'tool_response' -%}
270
+ {%- endif -%}
271
+ {%- if ns.prev_message_type not in ['tool_call', 'tool_response'] -%}
272
+ {{ '<end_of_turn>\n' }}
273
+ {%- endif -%}
274
+ {%- endfor -%}
275
+ {%- if add_generation_prompt -%}
276
+ {%- if ns.prev_message_type != 'tool_response' -%}
277
+ {{- '<start_of_turn>model\n' -}}
278
+ {%- endif -%}
279
+ {%- endif -%}
checkpoint-16/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8285478ede1915124e35284c5c7c3317590b9dc0cc5e2f1f3841ea1dc5e3af7
3
+ size 1768
checkpoint-16/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48281cd0fe28b8c2ba64e7208440581c1b7b5eac386f454488eddb4c0cc49bd3
3
+ size 14244
checkpoint-16/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edb7a1c728c0ccc60d6d579d06655948b0fb812b6a978866573f12eb2a44a8e3
3
+ size 988
checkpoint-16/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bce2d3a2f8f216440846a555f3fa52ce4ec5a8eae429c1264154fbef75bbeae
3
+ size 1064
checkpoint-16/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3655797f9d732b7dc08b4225200697af8e37d94b74711d9b1d8166feb953578
3
+ size 33384774
checkpoint-16/tokenizer_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "boi_token": "<start_of_image>",
4
+ "bos_token": "<bos>",
5
+ "clean_up_tokenization_spaces": false,
6
+ "eoi_token": "<end_of_image>",
7
+ "eos_token": "<eos>",
8
+ "image_token": "<image_soft_token>",
9
+ "is_local": false,
10
+ "mask_token": "<mask>",
11
+ "model_max_length": 1000000000000000019884624838656,
12
+ "model_specific_special_tokens": {
13
+ "boi_token": "<start_of_image>",
14
+ "eoi_token": "<end_of_image>",
15
+ "image_token": "<image_soft_token>",
16
+ "sfr_token": "<start_function_response>"
17
+ },
18
+ "pad_token": "<pad>",
19
+ "padding_side": "left",
20
+ "sfr_token": "<start_function_response>",
21
+ "sp_model_kwargs": null,
22
+ "spaces_between_special_tokens": false,
23
+ "tokenizer_class": "GemmaTokenizer",
24
+ "unk_token": "<unk>",
25
+ "use_default_system_prompt": false
26
+ }
checkpoint-16/trainer_state.json ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 16,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "entropy": NaN,
14
+ "epoch": 0.125,
15
+ "grad_norm": NaN,
16
+ "learning_rate": 5e-05,
17
+ "loss": 0.0,
18
+ "mean_token_accuracy": 0.0,
19
+ "num_tokens": 657.0,
20
+ "step": 1
21
+ },
22
+ {
23
+ "entropy": NaN,
24
+ "epoch": 0.25,
25
+ "grad_norm": NaN,
26
+ "learning_rate": 5e-05,
27
+ "loss": 0.0,
28
+ "mean_token_accuracy": 0.0,
29
+ "num_tokens": 1319.0,
30
+ "step": 2
31
+ },
32
+ {
33
+ "entropy": NaN,
34
+ "epoch": 0.375,
35
+ "grad_norm": NaN,
36
+ "learning_rate": 5e-05,
37
+ "loss": 0.0,
38
+ "mean_token_accuracy": 0.0,
39
+ "num_tokens": 1964.0,
40
+ "step": 3
41
+ },
42
+ {
43
+ "entropy": NaN,
44
+ "epoch": 0.5,
45
+ "grad_norm": NaN,
46
+ "learning_rate": 5e-05,
47
+ "loss": 0.0,
48
+ "mean_token_accuracy": 0.0,
49
+ "num_tokens": 2611.0,
50
+ "step": 4
51
+ },
52
+ {
53
+ "entropy": NaN,
54
+ "epoch": 0.625,
55
+ "grad_norm": NaN,
56
+ "learning_rate": 5e-05,
57
+ "loss": 0.0,
58
+ "mean_token_accuracy": 0.0,
59
+ "num_tokens": 3257.0,
60
+ "step": 5
61
+ },
62
+ {
63
+ "entropy": NaN,
64
+ "epoch": 0.75,
65
+ "grad_norm": NaN,
66
+ "learning_rate": 5e-05,
67
+ "loss": 0.0,
68
+ "mean_token_accuracy": 0.0,
69
+ "num_tokens": 3899.0,
70
+ "step": 6
71
+ },
72
+ {
73
+ "entropy": NaN,
74
+ "epoch": 0.875,
75
+ "grad_norm": NaN,
76
+ "learning_rate": 5e-05,
77
+ "loss": 0.0,
78
+ "mean_token_accuracy": 0.0,
79
+ "num_tokens": 4545.0,
80
+ "step": 7
81
+ },
82
+ {
83
+ "entropy": NaN,
84
+ "epoch": 1.0,
85
+ "grad_norm": NaN,
86
+ "learning_rate": 5e-05,
87
+ "loss": 0.0,
88
+ "mean_token_accuracy": 0.0,
89
+ "num_tokens": 5200.0,
90
+ "step": 8
91
+ },
92
+ {
93
+ "entropy": NaN,
94
+ "epoch": 1.125,
95
+ "grad_norm": NaN,
96
+ "learning_rate": 5e-05,
97
+ "loss": 0.0,
98
+ "mean_token_accuracy": 0.0,
99
+ "num_tokens": 5846.0,
100
+ "step": 9
101
+ },
102
+ {
103
+ "entropy": NaN,
104
+ "epoch": 1.25,
105
+ "grad_norm": NaN,
106
+ "learning_rate": 5e-05,
107
+ "loss": 0.0,
108
+ "mean_token_accuracy": 0.0,
109
+ "num_tokens": 6478.0,
110
+ "step": 10
111
+ },
112
+ {
113
+ "entropy": NaN,
114
+ "epoch": 1.375,
115
+ "grad_norm": NaN,
116
+ "learning_rate": 5e-05,
117
+ "loss": 0.0,
118
+ "mean_token_accuracy": 0.0,
119
+ "num_tokens": 7133.0,
120
+ "step": 11
121
+ },
122
+ {
123
+ "entropy": NaN,
124
+ "epoch": 1.5,
125
+ "grad_norm": NaN,
126
+ "learning_rate": 5e-05,
127
+ "loss": 0.0,
128
+ "mean_token_accuracy": 0.0,
129
+ "num_tokens": 7795.0,
130
+ "step": 12
131
+ },
132
+ {
133
+ "entropy": NaN,
134
+ "epoch": 1.625,
135
+ "grad_norm": NaN,
136
+ "learning_rate": 5e-05,
137
+ "loss": 0.0,
138
+ "mean_token_accuracy": 0.0,
139
+ "num_tokens": 8438.0,
140
+ "step": 13
141
+ },
142
+ {
143
+ "entropy": NaN,
144
+ "epoch": 1.75,
145
+ "grad_norm": NaN,
146
+ "learning_rate": 5e-05,
147
+ "loss": 0.0,
148
+ "mean_token_accuracy": 0.0,
149
+ "num_tokens": 9093.0,
150
+ "step": 14
151
+ },
152
+ {
153
+ "entropy": NaN,
154
+ "epoch": 1.875,
155
+ "grad_norm": NaN,
156
+ "learning_rate": 5e-05,
157
+ "loss": 0.0,
158
+ "mean_token_accuracy": 0.0,
159
+ "num_tokens": 9750.0,
160
+ "step": 15
161
+ },
162
+ {
163
+ "entropy": NaN,
164
+ "epoch": 2.0,
165
+ "grad_norm": NaN,
166
+ "learning_rate": 5e-05,
167
+ "loss": 0.0,
168
+ "mean_token_accuracy": 0.0,
169
+ "num_tokens": 10400.0,
170
+ "step": 16
171
+ }
172
+ ],
173
+ "logging_steps": 1,
174
+ "max_steps": 40,
175
+ "num_input_tokens_seen": 0,
176
+ "num_train_epochs": 5,
177
+ "save_steps": 500,
178
+ "stateful_callbacks": {
179
+ "TrainerControl": {
180
+ "args": {
181
+ "should_epoch_stop": false,
182
+ "should_evaluate": false,
183
+ "should_log": false,
184
+ "should_save": true,
185
+ "should_training_stop": false
186
+ },
187
+ "attributes": {}
188
+ }
189
+ },
190
+ "total_flos": 6378809548800.0,
191
+ "train_batch_size": 1,
192
+ "trial_name": null,
193
+ "trial_params": null
194
+ }
checkpoint-16/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e135689bbe604b79dfc158561d43c7e13c80ef8bbef81875af136d9754f1067
3
+ size 5240
checkpoint-24/README.md ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: google/functiongemma-270m-it
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:google/functiongemma-270m-it
7
+ - lora
8
+ - sft
9
+ - transformers
10
+ - trl
11
+ ---
12
+
13
+ # Model Card for Model ID
14
+
15
+ <!-- Provide a quick summary of what the model is/does. -->
16
+
17
+
18
+
19
+ ## Model Details
20
+
21
+ ### Model Description
22
+
23
+ <!-- Provide a longer summary of what this model is. -->
24
+
25
+
26
+
27
+ - **Developed by:** [More Information Needed]
28
+ - **Funded by [optional]:** [More Information Needed]
29
+ - **Shared by [optional]:** [More Information Needed]
30
+ - **Model type:** [More Information Needed]
31
+ - **Language(s) (NLP):** [More Information Needed]
32
+ - **License:** [More Information Needed]
33
+ - **Finetuned from model [optional]:** [More Information Needed]
34
+
35
+ ### Model Sources [optional]
36
+
37
+ <!-- Provide the basic links for the model. -->
38
+
39
+ - **Repository:** [More Information Needed]
40
+ - **Paper [optional]:** [More Information Needed]
41
+ - **Demo [optional]:** [More Information Needed]
42
+
43
+ ## Uses
44
+
45
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
46
+
47
+ ### Direct Use
48
+
49
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
50
+
51
+ [More Information Needed]
52
+
53
+ ### Downstream Use [optional]
54
+
55
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
56
+
57
+ [More Information Needed]
58
+
59
+ ### Out-of-Scope Use
60
+
61
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
62
+
63
+ [More Information Needed]
64
+
65
+ ## Bias, Risks, and Limitations
66
+
67
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
68
+
69
+ [More Information Needed]
70
+
71
+ ### Recommendations
72
+
73
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
74
+
75
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
76
+
77
+ ## How to Get Started with the Model
78
+
79
+ Use the code below to get started with the model.
80
+
81
+ [More Information Needed]
82
+
83
+ ## Training Details
84
+
85
+ ### Training Data
86
+
87
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
88
+
89
+ [More Information Needed]
90
+
91
+ ### Training Procedure
92
+
93
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
94
+
95
+ #### Preprocessing [optional]
96
+
97
+ [More Information Needed]
98
+
99
+
100
+ #### Training Hyperparameters
101
+
102
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
103
+
104
+ #### Speeds, Sizes, Times [optional]
105
+
106
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
107
+
108
+ [More Information Needed]
109
+
110
+ ## Evaluation
111
+
112
+ <!-- This section describes the evaluation protocols and provides the results. -->
113
+
114
+ ### Testing Data, Factors & Metrics
115
+
116
+ #### Testing Data
117
+
118
+ <!-- This should link to a Dataset Card if possible. -->
119
+
120
+ [More Information Needed]
121
+
122
+ #### Factors
123
+
124
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
125
+
126
+ [More Information Needed]
127
+
128
+ #### Metrics
129
+
130
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
131
+
132
+ [More Information Needed]
133
+
134
+ ### Results
135
+
136
+ [More Information Needed]
137
+
138
+ #### Summary
139
+
140
+
141
+
142
+ ## Model Examination [optional]
143
+
144
+ <!-- Relevant interpretability work for the model goes here -->
145
+
146
+ [More Information Needed]
147
+
148
+ ## Environmental Impact
149
+
150
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
151
+
152
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
153
+
154
+ - **Hardware Type:** [More Information Needed]
155
+ - **Hours used:** [More Information Needed]
156
+ - **Cloud Provider:** [More Information Needed]
157
+ - **Compute Region:** [More Information Needed]
158
+ - **Carbon Emitted:** [More Information Needed]
159
+
160
+ ## Technical Specifications [optional]
161
+
162
+ ### Model Architecture and Objective
163
+
164
+ [More Information Needed]
165
+
166
+ ### Compute Infrastructure
167
+
168
+ [More Information Needed]
169
+
170
+ #### Hardware
171
+
172
+ [More Information Needed]
173
+
174
+ #### Software
175
+
176
+ [More Information Needed]
177
+
178
+ ## Citation [optional]
179
+
180
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
181
+
182
+ **BibTeX:**
183
+
184
+ [More Information Needed]
185
+
186
+ **APA:**
187
+
188
+ [More Information Needed]
189
+
190
+ ## Glossary [optional]
191
+
192
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
193
+
194
+ [More Information Needed]
195
+
196
+ ## More Information [optional]
197
+
198
+ [More Information Needed]
199
+
200
+ ## Model Card Authors [optional]
201
+
202
+ [More Information Needed]
203
+
204
+ ## Model Card Contact
205
+
206
+ [More Information Needed]
207
+ ### Framework versions
208
+
209
+ - PEFT 0.18.1
checkpoint-24/adapter_config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "google/functiongemma-270m-it",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 16,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.1",
27
+ "qalora_group_size": 16,
28
+ "r": 8,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "o_proj",
33
+ "down_proj",
34
+ "k_proj",
35
+ "gate_proj",
36
+ "q_proj",
37
+ "v_proj",
38
+ "up_proj"
39
+ ],
40
+ "target_parameters": null,
41
+ "task_type": "CAUSAL_LM",
42
+ "trainable_token_indices": null,
43
+ "use_dora": false,
44
+ "use_qalora": false,
45
+ "use_rslora": false
46
+ }
checkpoint-24/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fbe05ace61c76eb333d8758f388bf2e0fa09459ea940ba63d3325df91f5f661
3
+ size 7626520
checkpoint-24/chat_template.jinja ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- macro format_parameters(properties, required) -%}
2
+ {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
3
+ {%- set ns = namespace(found_first=false) -%}
4
+ {%- for key, value in properties | dictsort -%}
5
+ {%- if key not in standard_keys -%}
6
+ {%- if ns.found_first %},{% endif -%}
7
+ {%- set ns.found_first = true -%}
8
+ {{- key }}:{description:<escape>{{ value['description'] }}<escape>
9
+ {%- if value['type'] | upper == 'STRING' -%}
10
+ {%- if value['enum'] -%}
11
+ ,enum:{{ format_argument(value['enum']) }}
12
+ {%- endif -%}
13
+ {%- elif value['type'] | upper == 'OBJECT' -%}
14
+ ,properties:{
15
+ {%- if value['properties'] is defined and value['properties'] is mapping -%}
16
+ {{- format_parameters(value['properties'], value['required'] | default([])) -}}
17
+ {%- elif value is mapping -%}
18
+ {{- format_parameters(value, value['required'] | default([])) -}}
19
+ {%- endif -%}
20
+ }
21
+ {%- if value['required'] -%}
22
+ ,required:[
23
+ {%- for item in value['required'] | default([]) -%}
24
+ <escape>{{- item -}}<escape>
25
+ {%- if not loop.last %},{% endif -%}
26
+ {%- endfor -%}
27
+ ]
28
+ {%- endif -%}
29
+ {%- elif value['type'] | upper == 'ARRAY' -%}
30
+ {%- if value['items'] is mapping and value['items'] -%}
31
+ ,items:{
32
+ {%- set ns_items = namespace(found_first=false) -%}
33
+ {%- for item_key, item_value in value['items'] | dictsort -%}
34
+ {%- if item_value is not none -%}
35
+ {%- if ns_items.found_first %},{% endif -%}
36
+ {%- set ns_items.found_first = true -%}
37
+ {%- if item_key == 'properties' -%}
38
+ properties:{
39
+ {%- if item_value is mapping -%}
40
+ {{- format_parameters(item_value, value['items']['required'] | default([])) -}}
41
+ {%- endif -%}
42
+ }
43
+ {%- elif item_key == 'required' -%}
44
+ required:[
45
+ {%- for req_item in item_value -%}
46
+ <escape>{{- req_item -}}<escape>
47
+ {%- if not loop.last %},{% endif -%}
48
+ {%- endfor -%}
49
+ ]
50
+ {%- elif item_key == 'type' -%}
51
+ {%- if item_value is string -%}
52
+ type:{{ format_argument(item_value | upper) }}
53
+ {%- else -%}
54
+ type:{{ format_argument(item_value | map('upper') | list) }}
55
+ {%- endif -%}
56
+ {%- else -%}
57
+ {{ item_key }}:{{ format_argument(item_value) }}
58
+ {%- endif -%}
59
+ {%- endif -%}
60
+ {%- endfor -%}
61
+ }
62
+ {%- endif -%}
63
+ {%- endif -%}
64
+ ,type:<escape>{{ value['type'] | upper }}<escape>}
65
+ {%- endif -%}
66
+ {%- endfor -%}
67
+ {%- endmacro -%}
68
+ {% macro format_function_declaration(tool_data) -%}
69
+ declaration:{{- tool_data['function']['name'] -}}
70
+ {description:<escape>{{- tool_data['function']['description'] -}}<escape>
71
+ {%- set params = tool_data['function']['parameters'] -%}
72
+ {%- if params -%}
73
+ ,parameters:{
74
+ {%- if params['properties'] -%}
75
+ properties:{ {{- format_parameters(params['properties'], params['required']) -}} },
76
+ {%- endif -%}
77
+ {%- if params['required'] -%}
78
+ required:[
79
+ {%- for item in params['required'] -%}
80
+ <escape>{{- item -}}<escape>
81
+ {{- ',' if not loop.last -}}
82
+ {%- endfor -%}
83
+ ],
84
+ {%- endif -%}
85
+ {%- if params['type'] -%}
86
+ type:<escape>{{- params['type'] | upper -}}<escape>}
87
+ {%- endif -%}
88
+ {%- endif -%}
89
+ }
90
+ {%- endmacro -%}
91
+ {% macro format_argument(argument, escape_keys=True) -%}
92
+ {%- if argument is string -%}
93
+ {{- '<escape>' + argument + '<escape>' -}}
94
+ {%- elif argument is boolean -%}
95
+ {%- if argument -%}
96
+ {{- 'true' -}}
97
+ {%- else -%}
98
+ {{- 'false' -}}
99
+ {%- endif -%}
100
+ {%- elif argument is mapping -%}
101
+ {{- '{' -}}
102
+ {%- set ns = namespace(found_first=false) -%}
103
+ {%- for key, value in argument | dictsort -%}
104
+ {%- if ns.found_first %},{% endif -%}
105
+ {%- set ns.found_first = true -%}
106
+ {%- if escape_keys -%}
107
+ {{- '<escape>' + key + '<escape>' -}}
108
+ {%- else -%}
109
+ {{- key -}}
110
+ {%- endif -%}
111
+ :{{- format_argument(value, escape_keys=escape_keys) -}}
112
+ {%- endfor -%}
113
+ {{- '}' -}}
114
+ {%- elif argument is sequence -%}
115
+ {{- '[' -}}
116
+ {%- for item in argument -%}
117
+ {{- format_argument(item, escape_keys=escape_keys) -}}
118
+ {%- if not loop.last %},{% endif -%}
119
+ {%- endfor -%}
120
+ {{- ']' -}}
121
+ {%- else -%}
122
+ {{- argument -}}
123
+ {%- endif -%}
124
+ {%- endmacro -%}
125
+ {{ bos_token }}
126
+ {%- set ns = namespace(prev_message_type=None) -%}
127
+ {#- Tool Declarations -#}
128
+ {%- set loop_messages = messages -%}
129
+ {%- if tools or messages[0]['role'] == 'system' or messages[0]['role'] == 'developer' -%}
130
+ {{- '<start_of_turn>developer\n' -}}
131
+ {%- if messages[0]['role'] == 'system' or messages[0]['role'] == 'developer' -%}
132
+ {%- if messages[0]['content'] is string -%}
133
+ {{- messages[0]['content'] | trim -}}
134
+ {%- elif messages[0]['content'] is sequence -%}
135
+ {%- for item in messages[0]['content'] -%}
136
+ {%- if item['type'] == 'text' -%}
137
+ {{- item['text'] | trim -}}
138
+ {%- endif -%}
139
+ {%- endfor -%}
140
+ {%- endif -%}
141
+ {%- set loop_messages = messages[1:] -%}
142
+ {%- endif -%}
143
+ {%- if tools -%}
144
+ {%- for tool in tools %}
145
+ {{- '<start_function_declaration>' -}}
146
+ {{- format_function_declaration(tool) | trim }}
147
+ {{- '<end_function_declaration>' -}}
148
+ {%- endfor %}
149
+ {%- endif -%}
150
+ {{- '<end_of_turn>\n' }}
151
+ {%- endif %}
152
+ {#- Loop through messages. -#}
153
+ {%- for message in loop_messages -%}
154
+ {%- if (message['role'] == 'assistant') -%}
155
+ {#- Rename "assistant" to "model". -#}
156
+ {%- set role = "model" -%}
157
+ {%- else -%}
158
+ {%- set role = message['role'] -%}
159
+ {%- endif -%}
160
+ {%- if role != 'tool' -%}
161
+ {%- if ns.prev_message_type != 'tool_response' -%}
162
+ {{- '<start_of_turn>' + role + '\n' }}
163
+ {%- endif -%}
164
+ {%- set ns.prev_message_type = None -%}
165
+ {%- if 'content' in message and message['content'] is not none -%}
166
+ {%- if message['content'] is string -%}
167
+ {{ message['content'] | trim }}
168
+ {%- elif message['content'] is sequence -%}
169
+ {%- for item in message['content'] -%}
170
+ {%- if item['type'] == 'image' -%}
171
+ {{ '<start_of_image>' }}
172
+ {%- elif item['type'] == 'text' -%}
173
+ {{ item['text'] | trim }}
174
+ {%- endif -%}
175
+ {%- endfor -%}
176
+ {%- else -%}
177
+ {{ raise_exception("Invalid content type in user/assistant message") }}
178
+ {%- endif -%}
179
+ {%- set ns.prev_message_type = 'content' -%}
180
+ {%- endif -%}
181
+ {%- if 'tool_calls' in message and message['tool_calls'] and message['tool_calls'] is iterable -%}
182
+ {#- Tool Calls -#}
183
+ {%- for tool_call in message['tool_calls'] -%}
184
+ {% set function = tool_call['function'] %}
185
+ {{- '<start_function_call>call:' + function['name'] + '{' -}}
186
+ {%- if 'arguments' in function -%}
187
+ {%- if function['arguments'] is mapping -%}
188
+ {%- set ns = namespace(found_first=false) -%}
189
+ {%- for key, value in function['arguments'] | dictsort -%}
190
+ {%- if ns.found_first %},{% endif -%}
191
+ {%- set ns.found_first = true -%}
192
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
193
+ {%- endfor -%}
194
+ {%- elif function['arguments'] is string -%}
195
+ {# This handles string-JSON, just in case #}
196
+ {{ function['arguments'] }}
197
+ {%- endif %}
198
+ {%- endif -%}
199
+ {{- '}<end_function_call>' -}}
200
+ {%- endfor -%}
201
+ {%- if loop.last -%}
202
+ {{ '<start_function_response>' }}
203
+ {%- endif -%}
204
+ {%- set ns.prev_message_type = 'tool_call' -%}
205
+ {%- endif -%}
206
+ {%- else -%}
207
+ {#- Tool Responses -#}
208
+ {%- if 'content' in message and message['content'] -%}
209
+ {%- if message['content'] is mapping -%}
210
+ {%- if 'name' in message['content'] and 'response' in message['content'] -%}
211
+ {{ '<start_function_response>response:' + message['content']['name'] | trim + '{' }}
212
+ {%- set response_ns = namespace(found_first=false) -%}
213
+ {%- for key, value in message['content']['response'] | dictsort -%}
214
+ {%- if response_ns.found_first %},{% endif -%}
215
+ {%- set response_ns.found_first = true -%}
216
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
217
+ {%- endfor -%}
218
+ {{- '}<end_function_response>' -}}
219
+ {%- elif 'name' in message -%}
220
+ {{ '<start_function_response>response:' + message['name'] | trim + '{' }}
221
+ {%- set response_ns = namespace(found_first=false) -%}
222
+ {%- for key, value in message['content'] | dictsort -%}
223
+ {%- if response_ns.found_first %},{% endif -%}
224
+ {%- set response_ns.found_first = true -%}
225
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
226
+ {%- endfor -%}
227
+ {{- '}<end_function_response>' -}}
228
+ {%- else -%}
229
+ {{ raise_exception("Invalid tool response mapping: must contain 'name' and 'response' keys, or 'name' must be in the message.") }}
230
+ {%- endif -%}
231
+ {%- elif message['content'] is string -%}
232
+ {%- if 'name' in message -%}
233
+ {{ '<start_function_response>response:' + message['name'] | trim + '{value:' + format_argument(message['content'], escape_keys=False) + '}<end_function_response>' }}
234
+ {%- else -%}
235
+ {{ raise_exception("Invalid tool response: 'name' must be provided.") }}
236
+ {%- endif -%}
237
+ {%- elif message['content'] is sequence -%}
238
+ {%- for item in message['content'] -%}
239
+ {%- if item is mapping -%}
240
+ {%- if 'name' in item and 'response' in item -%}
241
+ {{ '<start_function_response>response:' + item['name'] | trim + '{' }}
242
+ {%- set response_ns = namespace(found_first=false) -%}
243
+ {%- for key, value in item['response'] | dictsort -%}
244
+ {%- if response_ns.found_first %},{% endif -%}
245
+ {%- set response_ns.found_first = true -%}
246
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
247
+ {%- endfor -%}
248
+ {{- '}<end_function_response>' -}}
249
+ {%- elif 'name' in message -%}
250
+ {{ '<start_function_response>response:' + message['name'] | trim + '{' }}
251
+ {%- set response_ns = namespace(found_first=false) -%}
252
+ {%- for key, value in item | dictsort -%}
253
+ {%- if response_ns.found_first %},{% endif -%}
254
+ {%- set response_ns.found_first = true -%}
255
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
256
+ {%- endfor -%}
257
+ {{- '}<end_function_response>' -}}
258
+ {%- else -%}
259
+ {{ raise_exception("Invalid tool response mapping: must contain 'name' and 'response' keys, or 'name' must be in the message.") }}
260
+ {%- endif -%}
261
+ {%- else -%}
262
+ {{ raise_exception("Invalid tool response message: multiple responses must all be mappings") }}
263
+ {%- endif -%}
264
+ {%- endfor -%}
265
+ {%- else -%}
266
+ {{ raise_exception("Invalid content type in tool message: must be mapping, sequence of mappings, or string.") }}
267
+ {%- endif -%}
268
+ {%- endif -%}
269
+ {%- set ns.prev_message_type = 'tool_response' -%}
270
+ {%- endif -%}
271
+ {%- if ns.prev_message_type not in ['tool_call', 'tool_response'] -%}
272
+ {{ '<end_of_turn>\n' }}
273
+ {%- endif -%}
274
+ {%- endfor -%}
275
+ {%- if add_generation_prompt -%}
276
+ {%- if ns.prev_message_type != 'tool_response' -%}
277
+ {{- '<start_of_turn>model\n' -}}
278
+ {%- endif -%}
279
+ {%- endif -%}
checkpoint-24/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8285478ede1915124e35284c5c7c3317590b9dc0cc5e2f1f3841ea1dc5e3af7
3
+ size 1768
checkpoint-24/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19bc200c9d74abb76593e975ad86228cf0eef18fe00262974e0b7a5e273c4a0b
3
+ size 14244
checkpoint-24/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c3724409d2fe30eccc5356c1dfff959d3c61fb26c9258b5bb4eba78cbb7af3b
3
+ size 988
checkpoint-24/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bce2d3a2f8f216440846a555f3fa52ce4ec5a8eae429c1264154fbef75bbeae
3
+ size 1064
checkpoint-24/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3655797f9d732b7dc08b4225200697af8e37d94b74711d9b1d8166feb953578
3
+ size 33384774
checkpoint-24/tokenizer_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "boi_token": "<start_of_image>",
4
+ "bos_token": "<bos>",
5
+ "clean_up_tokenization_spaces": false,
6
+ "eoi_token": "<end_of_image>",
7
+ "eos_token": "<eos>",
8
+ "image_token": "<image_soft_token>",
9
+ "is_local": false,
10
+ "mask_token": "<mask>",
11
+ "model_max_length": 1000000000000000019884624838656,
12
+ "model_specific_special_tokens": {
13
+ "boi_token": "<start_of_image>",
14
+ "eoi_token": "<end_of_image>",
15
+ "image_token": "<image_soft_token>",
16
+ "sfr_token": "<start_function_response>"
17
+ },
18
+ "pad_token": "<pad>",
19
+ "padding_side": "left",
20
+ "sfr_token": "<start_function_response>",
21
+ "sp_model_kwargs": null,
22
+ "spaces_between_special_tokens": false,
23
+ "tokenizer_class": "GemmaTokenizer",
24
+ "unk_token": "<unk>",
25
+ "use_default_system_prompt": false
26
+ }
checkpoint-24/trainer_state.json ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 3.0,
6
+ "eval_steps": 500,
7
+ "global_step": 24,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "entropy": NaN,
14
+ "epoch": 0.125,
15
+ "grad_norm": NaN,
16
+ "learning_rate": 5e-05,
17
+ "loss": 0.0,
18
+ "mean_token_accuracy": 0.0,
19
+ "num_tokens": 657.0,
20
+ "step": 1
21
+ },
22
+ {
23
+ "entropy": NaN,
24
+ "epoch": 0.25,
25
+ "grad_norm": NaN,
26
+ "learning_rate": 5e-05,
27
+ "loss": 0.0,
28
+ "mean_token_accuracy": 0.0,
29
+ "num_tokens": 1319.0,
30
+ "step": 2
31
+ },
32
+ {
33
+ "entropy": NaN,
34
+ "epoch": 0.375,
35
+ "grad_norm": NaN,
36
+ "learning_rate": 5e-05,
37
+ "loss": 0.0,
38
+ "mean_token_accuracy": 0.0,
39
+ "num_tokens": 1964.0,
40
+ "step": 3
41
+ },
42
+ {
43
+ "entropy": NaN,
44
+ "epoch": 0.5,
45
+ "grad_norm": NaN,
46
+ "learning_rate": 5e-05,
47
+ "loss": 0.0,
48
+ "mean_token_accuracy": 0.0,
49
+ "num_tokens": 2611.0,
50
+ "step": 4
51
+ },
52
+ {
53
+ "entropy": NaN,
54
+ "epoch": 0.625,
55
+ "grad_norm": NaN,
56
+ "learning_rate": 5e-05,
57
+ "loss": 0.0,
58
+ "mean_token_accuracy": 0.0,
59
+ "num_tokens": 3257.0,
60
+ "step": 5
61
+ },
62
+ {
63
+ "entropy": NaN,
64
+ "epoch": 0.75,
65
+ "grad_norm": NaN,
66
+ "learning_rate": 5e-05,
67
+ "loss": 0.0,
68
+ "mean_token_accuracy": 0.0,
69
+ "num_tokens": 3899.0,
70
+ "step": 6
71
+ },
72
+ {
73
+ "entropy": NaN,
74
+ "epoch": 0.875,
75
+ "grad_norm": NaN,
76
+ "learning_rate": 5e-05,
77
+ "loss": 0.0,
78
+ "mean_token_accuracy": 0.0,
79
+ "num_tokens": 4545.0,
80
+ "step": 7
81
+ },
82
+ {
83
+ "entropy": NaN,
84
+ "epoch": 1.0,
85
+ "grad_norm": NaN,
86
+ "learning_rate": 5e-05,
87
+ "loss": 0.0,
88
+ "mean_token_accuracy": 0.0,
89
+ "num_tokens": 5200.0,
90
+ "step": 8
91
+ },
92
+ {
93
+ "entropy": NaN,
94
+ "epoch": 1.125,
95
+ "grad_norm": NaN,
96
+ "learning_rate": 5e-05,
97
+ "loss": 0.0,
98
+ "mean_token_accuracy": 0.0,
99
+ "num_tokens": 5846.0,
100
+ "step": 9
101
+ },
102
+ {
103
+ "entropy": NaN,
104
+ "epoch": 1.25,
105
+ "grad_norm": NaN,
106
+ "learning_rate": 5e-05,
107
+ "loss": 0.0,
108
+ "mean_token_accuracy": 0.0,
109
+ "num_tokens": 6478.0,
110
+ "step": 10
111
+ },
112
+ {
113
+ "entropy": NaN,
114
+ "epoch": 1.375,
115
+ "grad_norm": NaN,
116
+ "learning_rate": 5e-05,
117
+ "loss": 0.0,
118
+ "mean_token_accuracy": 0.0,
119
+ "num_tokens": 7133.0,
120
+ "step": 11
121
+ },
122
+ {
123
+ "entropy": NaN,
124
+ "epoch": 1.5,
125
+ "grad_norm": NaN,
126
+ "learning_rate": 5e-05,
127
+ "loss": 0.0,
128
+ "mean_token_accuracy": 0.0,
129
+ "num_tokens": 7795.0,
130
+ "step": 12
131
+ },
132
+ {
133
+ "entropy": NaN,
134
+ "epoch": 1.625,
135
+ "grad_norm": NaN,
136
+ "learning_rate": 5e-05,
137
+ "loss": 0.0,
138
+ "mean_token_accuracy": 0.0,
139
+ "num_tokens": 8438.0,
140
+ "step": 13
141
+ },
142
+ {
143
+ "entropy": NaN,
144
+ "epoch": 1.75,
145
+ "grad_norm": NaN,
146
+ "learning_rate": 5e-05,
147
+ "loss": 0.0,
148
+ "mean_token_accuracy": 0.0,
149
+ "num_tokens": 9093.0,
150
+ "step": 14
151
+ },
152
+ {
153
+ "entropy": NaN,
154
+ "epoch": 1.875,
155
+ "grad_norm": NaN,
156
+ "learning_rate": 5e-05,
157
+ "loss": 0.0,
158
+ "mean_token_accuracy": 0.0,
159
+ "num_tokens": 9750.0,
160
+ "step": 15
161
+ },
162
+ {
163
+ "entropy": NaN,
164
+ "epoch": 2.0,
165
+ "grad_norm": NaN,
166
+ "learning_rate": 5e-05,
167
+ "loss": 0.0,
168
+ "mean_token_accuracy": 0.0,
169
+ "num_tokens": 10400.0,
170
+ "step": 16
171
+ },
172
+ {
173
+ "entropy": NaN,
174
+ "epoch": 2.125,
175
+ "grad_norm": NaN,
176
+ "learning_rate": 5e-05,
177
+ "loss": 0.0,
178
+ "mean_token_accuracy": 0.0,
179
+ "num_tokens": 11040.0,
180
+ "step": 17
181
+ },
182
+ {
183
+ "entropy": NaN,
184
+ "epoch": 2.25,
185
+ "grad_norm": NaN,
186
+ "learning_rate": 5e-05,
187
+ "loss": 0.0,
188
+ "mean_token_accuracy": 0.0,
189
+ "num_tokens": 11694.0,
190
+ "step": 18
191
+ },
192
+ {
193
+ "entropy": NaN,
194
+ "epoch": 2.375,
195
+ "grad_norm": NaN,
196
+ "learning_rate": 5e-05,
197
+ "loss": 0.0,
198
+ "mean_token_accuracy": 0.0,
199
+ "num_tokens": 12341.0,
200
+ "step": 19
201
+ },
202
+ {
203
+ "entropy": NaN,
204
+ "epoch": 2.5,
205
+ "grad_norm": NaN,
206
+ "learning_rate": 5e-05,
207
+ "loss": 0.0,
208
+ "mean_token_accuracy": 0.0,
209
+ "num_tokens": 12988.0,
210
+ "step": 20
211
+ },
212
+ {
213
+ "entropy": NaN,
214
+ "epoch": 2.625,
215
+ "grad_norm": NaN,
216
+ "learning_rate": 5e-05,
217
+ "loss": 0.0,
218
+ "mean_token_accuracy": 0.0,
219
+ "num_tokens": 13632.0,
220
+ "step": 21
221
+ },
222
+ {
223
+ "entropy": NaN,
224
+ "epoch": 2.75,
225
+ "grad_norm": NaN,
226
+ "learning_rate": 5e-05,
227
+ "loss": 0.0,
228
+ "mean_token_accuracy": 0.0,
229
+ "num_tokens": 14285.0,
230
+ "step": 22
231
+ },
232
+ {
233
+ "entropy": NaN,
234
+ "epoch": 2.875,
235
+ "grad_norm": NaN,
236
+ "learning_rate": 5e-05,
237
+ "loss": 0.0,
238
+ "mean_token_accuracy": 0.0,
239
+ "num_tokens": 14938.0,
240
+ "step": 23
241
+ },
242
+ {
243
+ "entropy": NaN,
244
+ "epoch": 3.0,
245
+ "grad_norm": NaN,
246
+ "learning_rate": 5e-05,
247
+ "loss": 0.0,
248
+ "mean_token_accuracy": 0.0,
249
+ "num_tokens": 15600.0,
250
+ "step": 24
251
+ }
252
+ ],
253
+ "logging_steps": 1,
254
+ "max_steps": 40,
255
+ "num_input_tokens_seen": 0,
256
+ "num_train_epochs": 5,
257
+ "save_steps": 500,
258
+ "stateful_callbacks": {
259
+ "TrainerControl": {
260
+ "args": {
261
+ "should_epoch_stop": false,
262
+ "should_evaluate": false,
263
+ "should_log": false,
264
+ "should_save": true,
265
+ "should_training_stop": false
266
+ },
267
+ "attributes": {}
268
+ }
269
+ },
270
+ "total_flos": 9568214323200.0,
271
+ "train_batch_size": 1,
272
+ "trial_name": null,
273
+ "trial_params": null
274
+ }
checkpoint-24/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e135689bbe604b79dfc158561d43c7e13c80ef8bbef81875af136d9754f1067
3
+ size 5240
checkpoint-32/README.md ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: google/functiongemma-270m-it
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:google/functiongemma-270m-it
7
+ - lora
8
+ - sft
9
+ - transformers
10
+ - trl
11
+ ---
12
+
13
+ # Model Card for Model ID
14
+
15
+ <!-- Provide a quick summary of what the model is/does. -->
16
+
17
+
18
+
19
+ ## Model Details
20
+
21
+ ### Model Description
22
+
23
+ <!-- Provide a longer summary of what this model is. -->
24
+
25
+
26
+
27
+ - **Developed by:** [More Information Needed]
28
+ - **Funded by [optional]:** [More Information Needed]
29
+ - **Shared by [optional]:** [More Information Needed]
30
+ - **Model type:** [More Information Needed]
31
+ - **Language(s) (NLP):** [More Information Needed]
32
+ - **License:** [More Information Needed]
33
+ - **Finetuned from model [optional]:** [More Information Needed]
34
+
35
+ ### Model Sources [optional]
36
+
37
+ <!-- Provide the basic links for the model. -->
38
+
39
+ - **Repository:** [More Information Needed]
40
+ - **Paper [optional]:** [More Information Needed]
41
+ - **Demo [optional]:** [More Information Needed]
42
+
43
+ ## Uses
44
+
45
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
46
+
47
+ ### Direct Use
48
+
49
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
50
+
51
+ [More Information Needed]
52
+
53
+ ### Downstream Use [optional]
54
+
55
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
56
+
57
+ [More Information Needed]
58
+
59
+ ### Out-of-Scope Use
60
+
61
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
62
+
63
+ [More Information Needed]
64
+
65
+ ## Bias, Risks, and Limitations
66
+
67
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
68
+
69
+ [More Information Needed]
70
+
71
+ ### Recommendations
72
+
73
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
74
+
75
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
76
+
77
+ ## How to Get Started with the Model
78
+
79
+ Use the code below to get started with the model.
80
+
81
+ [More Information Needed]
82
+
83
+ ## Training Details
84
+
85
+ ### Training Data
86
+
87
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
88
+
89
+ [More Information Needed]
90
+
91
+ ### Training Procedure
92
+
93
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
94
+
95
+ #### Preprocessing [optional]
96
+
97
+ [More Information Needed]
98
+
99
+
100
+ #### Training Hyperparameters
101
+
102
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
103
+
104
+ #### Speeds, Sizes, Times [optional]
105
+
106
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
107
+
108
+ [More Information Needed]
109
+
110
+ ## Evaluation
111
+
112
+ <!-- This section describes the evaluation protocols and provides the results. -->
113
+
114
+ ### Testing Data, Factors & Metrics
115
+
116
+ #### Testing Data
117
+
118
+ <!-- This should link to a Dataset Card if possible. -->
119
+
120
+ [More Information Needed]
121
+
122
+ #### Factors
123
+
124
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
125
+
126
+ [More Information Needed]
127
+
128
+ #### Metrics
129
+
130
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
131
+
132
+ [More Information Needed]
133
+
134
+ ### Results
135
+
136
+ [More Information Needed]
137
+
138
+ #### Summary
139
+
140
+
141
+
142
+ ## Model Examination [optional]
143
+
144
+ <!-- Relevant interpretability work for the model goes here -->
145
+
146
+ [More Information Needed]
147
+
148
+ ## Environmental Impact
149
+
150
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
151
+
152
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
153
+
154
+ - **Hardware Type:** [More Information Needed]
155
+ - **Hours used:** [More Information Needed]
156
+ - **Cloud Provider:** [More Information Needed]
157
+ - **Compute Region:** [More Information Needed]
158
+ - **Carbon Emitted:** [More Information Needed]
159
+
160
+ ## Technical Specifications [optional]
161
+
162
+ ### Model Architecture and Objective
163
+
164
+ [More Information Needed]
165
+
166
+ ### Compute Infrastructure
167
+
168
+ [More Information Needed]
169
+
170
+ #### Hardware
171
+
172
+ [More Information Needed]
173
+
174
+ #### Software
175
+
176
+ [More Information Needed]
177
+
178
+ ## Citation [optional]
179
+
180
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
181
+
182
+ **BibTeX:**
183
+
184
+ [More Information Needed]
185
+
186
+ **APA:**
187
+
188
+ [More Information Needed]
189
+
190
+ ## Glossary [optional]
191
+
192
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
193
+
194
+ [More Information Needed]
195
+
196
+ ## More Information [optional]
197
+
198
+ [More Information Needed]
199
+
200
+ ## Model Card Authors [optional]
201
+
202
+ [More Information Needed]
203
+
204
+ ## Model Card Contact
205
+
206
+ [More Information Needed]
207
+ ### Framework versions
208
+
209
+ - PEFT 0.18.1
checkpoint-32/adapter_config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "google/functiongemma-270m-it",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 16,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.1",
27
+ "qalora_group_size": 16,
28
+ "r": 8,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "o_proj",
33
+ "down_proj",
34
+ "k_proj",
35
+ "gate_proj",
36
+ "q_proj",
37
+ "v_proj",
38
+ "up_proj"
39
+ ],
40
+ "target_parameters": null,
41
+ "task_type": "CAUSAL_LM",
42
+ "trainable_token_indices": null,
43
+ "use_dora": false,
44
+ "use_qalora": false,
45
+ "use_rslora": false
46
+ }
checkpoint-32/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fbe05ace61c76eb333d8758f388bf2e0fa09459ea940ba63d3325df91f5f661
3
+ size 7626520
checkpoint-32/chat_template.jinja ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- macro format_parameters(properties, required) -%}
2
+ {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
3
+ {%- set ns = namespace(found_first=false) -%}
4
+ {%- for key, value in properties | dictsort -%}
5
+ {%- if key not in standard_keys -%}
6
+ {%- if ns.found_first %},{% endif -%}
7
+ {%- set ns.found_first = true -%}
8
+ {{- key }}:{description:<escape>{{ value['description'] }}<escape>
9
+ {%- if value['type'] | upper == 'STRING' -%}
10
+ {%- if value['enum'] -%}
11
+ ,enum:{{ format_argument(value['enum']) }}
12
+ {%- endif -%}
13
+ {%- elif value['type'] | upper == 'OBJECT' -%}
14
+ ,properties:{
15
+ {%- if value['properties'] is defined and value['properties'] is mapping -%}
16
+ {{- format_parameters(value['properties'], value['required'] | default([])) -}}
17
+ {%- elif value is mapping -%}
18
+ {{- format_parameters(value, value['required'] | default([])) -}}
19
+ {%- endif -%}
20
+ }
21
+ {%- if value['required'] -%}
22
+ ,required:[
23
+ {%- for item in value['required'] | default([]) -%}
24
+ <escape>{{- item -}}<escape>
25
+ {%- if not loop.last %},{% endif -%}
26
+ {%- endfor -%}
27
+ ]
28
+ {%- endif -%}
29
+ {%- elif value['type'] | upper == 'ARRAY' -%}
30
+ {%- if value['items'] is mapping and value['items'] -%}
31
+ ,items:{
32
+ {%- set ns_items = namespace(found_first=false) -%}
33
+ {%- for item_key, item_value in value['items'] | dictsort -%}
34
+ {%- if item_value is not none -%}
35
+ {%- if ns_items.found_first %},{% endif -%}
36
+ {%- set ns_items.found_first = true -%}
37
+ {%- if item_key == 'properties' -%}
38
+ properties:{
39
+ {%- if item_value is mapping -%}
40
+ {{- format_parameters(item_value, value['items']['required'] | default([])) -}}
41
+ {%- endif -%}
42
+ }
43
+ {%- elif item_key == 'required' -%}
44
+ required:[
45
+ {%- for req_item in item_value -%}
46
+ <escape>{{- req_item -}}<escape>
47
+ {%- if not loop.last %},{% endif -%}
48
+ {%- endfor -%}
49
+ ]
50
+ {%- elif item_key == 'type' -%}
51
+ {%- if item_value is string -%}
52
+ type:{{ format_argument(item_value | upper) }}
53
+ {%- else -%}
54
+ type:{{ format_argument(item_value | map('upper') | list) }}
55
+ {%- endif -%}
56
+ {%- else -%}
57
+ {{ item_key }}:{{ format_argument(item_value) }}
58
+ {%- endif -%}
59
+ {%- endif -%}
60
+ {%- endfor -%}
61
+ }
62
+ {%- endif -%}
63
+ {%- endif -%}
64
+ ,type:<escape>{{ value['type'] | upper }}<escape>}
65
+ {%- endif -%}
66
+ {%- endfor -%}
67
+ {%- endmacro -%}
68
+ {% macro format_function_declaration(tool_data) -%}
69
+ declaration:{{- tool_data['function']['name'] -}}
70
+ {description:<escape>{{- tool_data['function']['description'] -}}<escape>
71
+ {%- set params = tool_data['function']['parameters'] -%}
72
+ {%- if params -%}
73
+ ,parameters:{
74
+ {%- if params['properties'] -%}
75
+ properties:{ {{- format_parameters(params['properties'], params['required']) -}} },
76
+ {%- endif -%}
77
+ {%- if params['required'] -%}
78
+ required:[
79
+ {%- for item in params['required'] -%}
80
+ <escape>{{- item -}}<escape>
81
+ {{- ',' if not loop.last -}}
82
+ {%- endfor -%}
83
+ ],
84
+ {%- endif -%}
85
+ {%- if params['type'] -%}
86
+ type:<escape>{{- params['type'] | upper -}}<escape>}
87
+ {%- endif -%}
88
+ {%- endif -%}
89
+ }
90
+ {%- endmacro -%}
91
+ {% macro format_argument(argument, escape_keys=True) -%}
92
+ {%- if argument is string -%}
93
+ {{- '<escape>' + argument + '<escape>' -}}
94
+ {%- elif argument is boolean -%}
95
+ {%- if argument -%}
96
+ {{- 'true' -}}
97
+ {%- else -%}
98
+ {{- 'false' -}}
99
+ {%- endif -%}
100
+ {%- elif argument is mapping -%}
101
+ {{- '{' -}}
102
+ {%- set ns = namespace(found_first=false) -%}
103
+ {%- for key, value in argument | dictsort -%}
104
+ {%- if ns.found_first %},{% endif -%}
105
+ {%- set ns.found_first = true -%}
106
+ {%- if escape_keys -%}
107
+ {{- '<escape>' + key + '<escape>' -}}
108
+ {%- else -%}
109
+ {{- key -}}
110
+ {%- endif -%}
111
+ :{{- format_argument(value, escape_keys=escape_keys) -}}
112
+ {%- endfor -%}
113
+ {{- '}' -}}
114
+ {%- elif argument is sequence -%}
115
+ {{- '[' -}}
116
+ {%- for item in argument -%}
117
+ {{- format_argument(item, escape_keys=escape_keys) -}}
118
+ {%- if not loop.last %},{% endif -%}
119
+ {%- endfor -%}
120
+ {{- ']' -}}
121
+ {%- else -%}
122
+ {{- argument -}}
123
+ {%- endif -%}
124
+ {%- endmacro -%}
125
+ {{ bos_token }}
126
+ {%- set ns = namespace(prev_message_type=None) -%}
127
+ {#- Tool Declarations -#}
128
+ {%- set loop_messages = messages -%}
129
+ {%- if tools or messages[0]['role'] == 'system' or messages[0]['role'] == 'developer' -%}
130
+ {{- '<start_of_turn>developer\n' -}}
131
+ {%- if messages[0]['role'] == 'system' or messages[0]['role'] == 'developer' -%}
132
+ {%- if messages[0]['content'] is string -%}
133
+ {{- messages[0]['content'] | trim -}}
134
+ {%- elif messages[0]['content'] is sequence -%}
135
+ {%- for item in messages[0]['content'] -%}
136
+ {%- if item['type'] == 'text' -%}
137
+ {{- item['text'] | trim -}}
138
+ {%- endif -%}
139
+ {%- endfor -%}
140
+ {%- endif -%}
141
+ {%- set loop_messages = messages[1:] -%}
142
+ {%- endif -%}
143
+ {%- if tools -%}
144
+ {%- for tool in tools %}
145
+ {{- '<start_function_declaration>' -}}
146
+ {{- format_function_declaration(tool) | trim }}
147
+ {{- '<end_function_declaration>' -}}
148
+ {%- endfor %}
149
+ {%- endif -%}
150
+ {{- '<end_of_turn>\n' }}
151
+ {%- endif %}
152
+ {#- Loop through messages. -#}
153
+ {%- for message in loop_messages -%}
154
+ {%- if (message['role'] == 'assistant') -%}
155
+ {#- Rename "assistant" to "model". -#}
156
+ {%- set role = "model" -%}
157
+ {%- else -%}
158
+ {%- set role = message['role'] -%}
159
+ {%- endif -%}
160
+ {%- if role != 'tool' -%}
161
+ {%- if ns.prev_message_type != 'tool_response' -%}
162
+ {{- '<start_of_turn>' + role + '\n' }}
163
+ {%- endif -%}
164
+ {%- set ns.prev_message_type = None -%}
165
+ {%- if 'content' in message and message['content'] is not none -%}
166
+ {%- if message['content'] is string -%}
167
+ {{ message['content'] | trim }}
168
+ {%- elif message['content'] is sequence -%}
169
+ {%- for item in message['content'] -%}
170
+ {%- if item['type'] == 'image' -%}
171
+ {{ '<start_of_image>' }}
172
+ {%- elif item['type'] == 'text' -%}
173
+ {{ item['text'] | trim }}
174
+ {%- endif -%}
175
+ {%- endfor -%}
176
+ {%- else -%}
177
+ {{ raise_exception("Invalid content type in user/assistant message") }}
178
+ {%- endif -%}
179
+ {%- set ns.prev_message_type = 'content' -%}
180
+ {%- endif -%}
181
+ {%- if 'tool_calls' in message and message['tool_calls'] and message['tool_calls'] is iterable -%}
182
+ {#- Tool Calls -#}
183
+ {%- for tool_call in message['tool_calls'] -%}
184
+ {% set function = tool_call['function'] %}
185
+ {{- '<start_function_call>call:' + function['name'] + '{' -}}
186
+ {%- if 'arguments' in function -%}
187
+ {%- if function['arguments'] is mapping -%}
188
+ {%- set ns = namespace(found_first=false) -%}
189
+ {%- for key, value in function['arguments'] | dictsort -%}
190
+ {%- if ns.found_first %},{% endif -%}
191
+ {%- set ns.found_first = true -%}
192
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
193
+ {%- endfor -%}
194
+ {%- elif function['arguments'] is string -%}
195
+ {# This handles string-JSON, just in case #}
196
+ {{ function['arguments'] }}
197
+ {%- endif %}
198
+ {%- endif -%}
199
+ {{- '}<end_function_call>' -}}
200
+ {%- endfor -%}
201
+ {%- if loop.last -%}
202
+ {{ '<start_function_response>' }}
203
+ {%- endif -%}
204
+ {%- set ns.prev_message_type = 'tool_call' -%}
205
+ {%- endif -%}
206
+ {%- else -%}
207
+ {#- Tool Responses -#}
208
+ {%- if 'content' in message and message['content'] -%}
209
+ {%- if message['content'] is mapping -%}
210
+ {%- if 'name' in message['content'] and 'response' in message['content'] -%}
211
+ {{ '<start_function_response>response:' + message['content']['name'] | trim + '{' }}
212
+ {%- set response_ns = namespace(found_first=false) -%}
213
+ {%- for key, value in message['content']['response'] | dictsort -%}
214
+ {%- if response_ns.found_first %},{% endif -%}
215
+ {%- set response_ns.found_first = true -%}
216
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
217
+ {%- endfor -%}
218
+ {{- '}<end_function_response>' -}}
219
+ {%- elif 'name' in message -%}
220
+ {{ '<start_function_response>response:' + message['name'] | trim + '{' }}
221
+ {%- set response_ns = namespace(found_first=false) -%}
222
+ {%- for key, value in message['content'] | dictsort -%}
223
+ {%- if response_ns.found_first %},{% endif -%}
224
+ {%- set response_ns.found_first = true -%}
225
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
226
+ {%- endfor -%}
227
+ {{- '}<end_function_response>' -}}
228
+ {%- else -%}
229
+ {{ raise_exception("Invalid tool response mapping: must contain 'name' and 'response' keys, or 'name' must be in the message.") }}
230
+ {%- endif -%}
231
+ {%- elif message['content'] is string -%}
232
+ {%- if 'name' in message -%}
233
+ {{ '<start_function_response>response:' + message['name'] | trim + '{value:' + format_argument(message['content'], escape_keys=False) + '}<end_function_response>' }}
234
+ {%- else -%}
235
+ {{ raise_exception("Invalid tool response: 'name' must be provided.") }}
236
+ {%- endif -%}
237
+ {%- elif message['content'] is sequence -%}
238
+ {%- for item in message['content'] -%}
239
+ {%- if item is mapping -%}
240
+ {%- if 'name' in item and 'response' in item -%}
241
+ {{ '<start_function_response>response:' + item['name'] | trim + '{' }}
242
+ {%- set response_ns = namespace(found_first=false) -%}
243
+ {%- for key, value in item['response'] | dictsort -%}
244
+ {%- if response_ns.found_first %},{% endif -%}
245
+ {%- set response_ns.found_first = true -%}
246
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
247
+ {%- endfor -%}
248
+ {{- '}<end_function_response>' -}}
249
+ {%- elif 'name' in message -%}
250
+ {{ '<start_function_response>response:' + message['name'] | trim + '{' }}
251
+ {%- set response_ns = namespace(found_first=false) -%}
252
+ {%- for key, value in item | dictsort -%}
253
+ {%- if response_ns.found_first %},{% endif -%}
254
+ {%- set response_ns.found_first = true -%}
255
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
256
+ {%- endfor -%}
257
+ {{- '}<end_function_response>' -}}
258
+ {%- else -%}
259
+ {{ raise_exception("Invalid tool response mapping: must contain 'name' and 'response' keys, or 'name' must be in the message.") }}
260
+ {%- endif -%}
261
+ {%- else -%}
262
+ {{ raise_exception("Invalid tool response message: multiple responses must all be mappings") }}
263
+ {%- endif -%}
264
+ {%- endfor -%}
265
+ {%- else -%}
266
+ {{ raise_exception("Invalid content type in tool message: must be mapping, sequence of mappings, or string.") }}
267
+ {%- endif -%}
268
+ {%- endif -%}
269
+ {%- set ns.prev_message_type = 'tool_response' -%}
270
+ {%- endif -%}
271
+ {%- if ns.prev_message_type not in ['tool_call', 'tool_response'] -%}
272
+ {{ '<end_of_turn>\n' }}
273
+ {%- endif -%}
274
+ {%- endfor -%}
275
+ {%- if add_generation_prompt -%}
276
+ {%- if ns.prev_message_type != 'tool_response' -%}
277
+ {{- '<start_of_turn>model\n' -}}
278
+ {%- endif -%}
279
+ {%- endif -%}
checkpoint-32/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8285478ede1915124e35284c5c7c3317590b9dc0cc5e2f1f3841ea1dc5e3af7
3
+ size 1768
checkpoint-32/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95319ae8bc2bae0ef38b266865845435aa4e786ef113ca126058313b83bfb246
3
+ size 14244
checkpoint-32/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1704fab5b1c8157b353e01af58fc3762b070f56e6476e2ac1c612f659d7bff5
3
+ size 988
checkpoint-32/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bce2d3a2f8f216440846a555f3fa52ce4ec5a8eae429c1264154fbef75bbeae
3
+ size 1064
checkpoint-32/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3655797f9d732b7dc08b4225200697af8e37d94b74711d9b1d8166feb953578
3
+ size 33384774
checkpoint-32/tokenizer_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "boi_token": "<start_of_image>",
4
+ "bos_token": "<bos>",
5
+ "clean_up_tokenization_spaces": false,
6
+ "eoi_token": "<end_of_image>",
7
+ "eos_token": "<eos>",
8
+ "image_token": "<image_soft_token>",
9
+ "is_local": false,
10
+ "mask_token": "<mask>",
11
+ "model_max_length": 1000000000000000019884624838656,
12
+ "model_specific_special_tokens": {
13
+ "boi_token": "<start_of_image>",
14
+ "eoi_token": "<end_of_image>",
15
+ "image_token": "<image_soft_token>",
16
+ "sfr_token": "<start_function_response>"
17
+ },
18
+ "pad_token": "<pad>",
19
+ "padding_side": "left",
20
+ "sfr_token": "<start_function_response>",
21
+ "sp_model_kwargs": null,
22
+ "spaces_between_special_tokens": false,
23
+ "tokenizer_class": "GemmaTokenizer",
24
+ "unk_token": "<unk>",
25
+ "use_default_system_prompt": false
26
+ }
checkpoint-32/trainer_state.json ADDED
@@ -0,0 +1,354 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 4.0,
6
+ "eval_steps": 500,
7
+ "global_step": 32,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "entropy": NaN,
14
+ "epoch": 0.125,
15
+ "grad_norm": NaN,
16
+ "learning_rate": 5e-05,
17
+ "loss": 0.0,
18
+ "mean_token_accuracy": 0.0,
19
+ "num_tokens": 657.0,
20
+ "step": 1
21
+ },
22
+ {
23
+ "entropy": NaN,
24
+ "epoch": 0.25,
25
+ "grad_norm": NaN,
26
+ "learning_rate": 5e-05,
27
+ "loss": 0.0,
28
+ "mean_token_accuracy": 0.0,
29
+ "num_tokens": 1319.0,
30
+ "step": 2
31
+ },
32
+ {
33
+ "entropy": NaN,
34
+ "epoch": 0.375,
35
+ "grad_norm": NaN,
36
+ "learning_rate": 5e-05,
37
+ "loss": 0.0,
38
+ "mean_token_accuracy": 0.0,
39
+ "num_tokens": 1964.0,
40
+ "step": 3
41
+ },
42
+ {
43
+ "entropy": NaN,
44
+ "epoch": 0.5,
45
+ "grad_norm": NaN,
46
+ "learning_rate": 5e-05,
47
+ "loss": 0.0,
48
+ "mean_token_accuracy": 0.0,
49
+ "num_tokens": 2611.0,
50
+ "step": 4
51
+ },
52
+ {
53
+ "entropy": NaN,
54
+ "epoch": 0.625,
55
+ "grad_norm": NaN,
56
+ "learning_rate": 5e-05,
57
+ "loss": 0.0,
58
+ "mean_token_accuracy": 0.0,
59
+ "num_tokens": 3257.0,
60
+ "step": 5
61
+ },
62
+ {
63
+ "entropy": NaN,
64
+ "epoch": 0.75,
65
+ "grad_norm": NaN,
66
+ "learning_rate": 5e-05,
67
+ "loss": 0.0,
68
+ "mean_token_accuracy": 0.0,
69
+ "num_tokens": 3899.0,
70
+ "step": 6
71
+ },
72
+ {
73
+ "entropy": NaN,
74
+ "epoch": 0.875,
75
+ "grad_norm": NaN,
76
+ "learning_rate": 5e-05,
77
+ "loss": 0.0,
78
+ "mean_token_accuracy": 0.0,
79
+ "num_tokens": 4545.0,
80
+ "step": 7
81
+ },
82
+ {
83
+ "entropy": NaN,
84
+ "epoch": 1.0,
85
+ "grad_norm": NaN,
86
+ "learning_rate": 5e-05,
87
+ "loss": 0.0,
88
+ "mean_token_accuracy": 0.0,
89
+ "num_tokens": 5200.0,
90
+ "step": 8
91
+ },
92
+ {
93
+ "entropy": NaN,
94
+ "epoch": 1.125,
95
+ "grad_norm": NaN,
96
+ "learning_rate": 5e-05,
97
+ "loss": 0.0,
98
+ "mean_token_accuracy": 0.0,
99
+ "num_tokens": 5846.0,
100
+ "step": 9
101
+ },
102
+ {
103
+ "entropy": NaN,
104
+ "epoch": 1.25,
105
+ "grad_norm": NaN,
106
+ "learning_rate": 5e-05,
107
+ "loss": 0.0,
108
+ "mean_token_accuracy": 0.0,
109
+ "num_tokens": 6478.0,
110
+ "step": 10
111
+ },
112
+ {
113
+ "entropy": NaN,
114
+ "epoch": 1.375,
115
+ "grad_norm": NaN,
116
+ "learning_rate": 5e-05,
117
+ "loss": 0.0,
118
+ "mean_token_accuracy": 0.0,
119
+ "num_tokens": 7133.0,
120
+ "step": 11
121
+ },
122
+ {
123
+ "entropy": NaN,
124
+ "epoch": 1.5,
125
+ "grad_norm": NaN,
126
+ "learning_rate": 5e-05,
127
+ "loss": 0.0,
128
+ "mean_token_accuracy": 0.0,
129
+ "num_tokens": 7795.0,
130
+ "step": 12
131
+ },
132
+ {
133
+ "entropy": NaN,
134
+ "epoch": 1.625,
135
+ "grad_norm": NaN,
136
+ "learning_rate": 5e-05,
137
+ "loss": 0.0,
138
+ "mean_token_accuracy": 0.0,
139
+ "num_tokens": 8438.0,
140
+ "step": 13
141
+ },
142
+ {
143
+ "entropy": NaN,
144
+ "epoch": 1.75,
145
+ "grad_norm": NaN,
146
+ "learning_rate": 5e-05,
147
+ "loss": 0.0,
148
+ "mean_token_accuracy": 0.0,
149
+ "num_tokens": 9093.0,
150
+ "step": 14
151
+ },
152
+ {
153
+ "entropy": NaN,
154
+ "epoch": 1.875,
155
+ "grad_norm": NaN,
156
+ "learning_rate": 5e-05,
157
+ "loss": 0.0,
158
+ "mean_token_accuracy": 0.0,
159
+ "num_tokens": 9750.0,
160
+ "step": 15
161
+ },
162
+ {
163
+ "entropy": NaN,
164
+ "epoch": 2.0,
165
+ "grad_norm": NaN,
166
+ "learning_rate": 5e-05,
167
+ "loss": 0.0,
168
+ "mean_token_accuracy": 0.0,
169
+ "num_tokens": 10400.0,
170
+ "step": 16
171
+ },
172
+ {
173
+ "entropy": NaN,
174
+ "epoch": 2.125,
175
+ "grad_norm": NaN,
176
+ "learning_rate": 5e-05,
177
+ "loss": 0.0,
178
+ "mean_token_accuracy": 0.0,
179
+ "num_tokens": 11040.0,
180
+ "step": 17
181
+ },
182
+ {
183
+ "entropy": NaN,
184
+ "epoch": 2.25,
185
+ "grad_norm": NaN,
186
+ "learning_rate": 5e-05,
187
+ "loss": 0.0,
188
+ "mean_token_accuracy": 0.0,
189
+ "num_tokens": 11694.0,
190
+ "step": 18
191
+ },
192
+ {
193
+ "entropy": NaN,
194
+ "epoch": 2.375,
195
+ "grad_norm": NaN,
196
+ "learning_rate": 5e-05,
197
+ "loss": 0.0,
198
+ "mean_token_accuracy": 0.0,
199
+ "num_tokens": 12341.0,
200
+ "step": 19
201
+ },
202
+ {
203
+ "entropy": NaN,
204
+ "epoch": 2.5,
205
+ "grad_norm": NaN,
206
+ "learning_rate": 5e-05,
207
+ "loss": 0.0,
208
+ "mean_token_accuracy": 0.0,
209
+ "num_tokens": 12988.0,
210
+ "step": 20
211
+ },
212
+ {
213
+ "entropy": NaN,
214
+ "epoch": 2.625,
215
+ "grad_norm": NaN,
216
+ "learning_rate": 5e-05,
217
+ "loss": 0.0,
218
+ "mean_token_accuracy": 0.0,
219
+ "num_tokens": 13632.0,
220
+ "step": 21
221
+ },
222
+ {
223
+ "entropy": NaN,
224
+ "epoch": 2.75,
225
+ "grad_norm": NaN,
226
+ "learning_rate": 5e-05,
227
+ "loss": 0.0,
228
+ "mean_token_accuracy": 0.0,
229
+ "num_tokens": 14285.0,
230
+ "step": 22
231
+ },
232
+ {
233
+ "entropy": NaN,
234
+ "epoch": 2.875,
235
+ "grad_norm": NaN,
236
+ "learning_rate": 5e-05,
237
+ "loss": 0.0,
238
+ "mean_token_accuracy": 0.0,
239
+ "num_tokens": 14938.0,
240
+ "step": 23
241
+ },
242
+ {
243
+ "entropy": NaN,
244
+ "epoch": 3.0,
245
+ "grad_norm": NaN,
246
+ "learning_rate": 5e-05,
247
+ "loss": 0.0,
248
+ "mean_token_accuracy": 0.0,
249
+ "num_tokens": 15600.0,
250
+ "step": 24
251
+ },
252
+ {
253
+ "entropy": NaN,
254
+ "epoch": 3.125,
255
+ "grad_norm": NaN,
256
+ "learning_rate": 5e-05,
257
+ "loss": 0.0,
258
+ "mean_token_accuracy": 0.0,
259
+ "num_tokens": 16262.0,
260
+ "step": 25
261
+ },
262
+ {
263
+ "entropy": NaN,
264
+ "epoch": 3.25,
265
+ "grad_norm": NaN,
266
+ "learning_rate": 5e-05,
267
+ "loss": 0.0,
268
+ "mean_token_accuracy": 0.0,
269
+ "num_tokens": 16901.0,
270
+ "step": 26
271
+ },
272
+ {
273
+ "entropy": NaN,
274
+ "epoch": 3.375,
275
+ "grad_norm": NaN,
276
+ "learning_rate": 5e-05,
277
+ "loss": 0.0,
278
+ "mean_token_accuracy": 0.0,
279
+ "num_tokens": 17547.0,
280
+ "step": 27
281
+ },
282
+ {
283
+ "entropy": NaN,
284
+ "epoch": 3.5,
285
+ "grad_norm": NaN,
286
+ "learning_rate": 5e-05,
287
+ "loss": 0.0,
288
+ "mean_token_accuracy": 0.0,
289
+ "num_tokens": 18200.0,
290
+ "step": 28
291
+ },
292
+ {
293
+ "entropy": NaN,
294
+ "epoch": 3.625,
295
+ "grad_norm": NaN,
296
+ "learning_rate": 5e-05,
297
+ "loss": 0.0,
298
+ "mean_token_accuracy": 0.0,
299
+ "num_tokens": 18842.0,
300
+ "step": 29
301
+ },
302
+ {
303
+ "entropy": NaN,
304
+ "epoch": 3.75,
305
+ "grad_norm": NaN,
306
+ "learning_rate": 5e-05,
307
+ "loss": 0.0,
308
+ "mean_token_accuracy": 0.0,
309
+ "num_tokens": 19503.0,
310
+ "step": 30
311
+ },
312
+ {
313
+ "entropy": NaN,
314
+ "epoch": 3.875,
315
+ "grad_norm": NaN,
316
+ "learning_rate": 5e-05,
317
+ "loss": 0.0,
318
+ "mean_token_accuracy": 0.0,
319
+ "num_tokens": 20155.0,
320
+ "step": 31
321
+ },
322
+ {
323
+ "entropy": NaN,
324
+ "epoch": 4.0,
325
+ "grad_norm": NaN,
326
+ "learning_rate": 5e-05,
327
+ "loss": 0.0,
328
+ "mean_token_accuracy": 0.0,
329
+ "num_tokens": 20800.0,
330
+ "step": 32
331
+ }
332
+ ],
333
+ "logging_steps": 1,
334
+ "max_steps": 40,
335
+ "num_input_tokens_seen": 0,
336
+ "num_train_epochs": 5,
337
+ "save_steps": 500,
338
+ "stateful_callbacks": {
339
+ "TrainerControl": {
340
+ "args": {
341
+ "should_epoch_stop": false,
342
+ "should_evaluate": false,
343
+ "should_log": false,
344
+ "should_save": true,
345
+ "should_training_stop": false
346
+ },
347
+ "attributes": {}
348
+ }
349
+ },
350
+ "total_flos": 12757619097600.0,
351
+ "train_batch_size": 1,
352
+ "trial_name": null,
353
+ "trial_params": null
354
+ }
checkpoint-32/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e135689bbe604b79dfc158561d43c7e13c80ef8bbef81875af136d9754f1067
3
+ size 5240
checkpoint-40/README.md ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: google/functiongemma-270m-it
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:google/functiongemma-270m-it
7
+ - lora
8
+ - sft
9
+ - transformers
10
+ - trl
11
+ ---
12
+
13
+ # Model Card for Model ID
14
+
15
+ <!-- Provide a quick summary of what the model is/does. -->
16
+
17
+
18
+
19
+ ## Model Details
20
+
21
+ ### Model Description
22
+
23
+ <!-- Provide a longer summary of what this model is. -->
24
+
25
+
26
+
27
+ - **Developed by:** [More Information Needed]
28
+ - **Funded by [optional]:** [More Information Needed]
29
+ - **Shared by [optional]:** [More Information Needed]
30
+ - **Model type:** [More Information Needed]
31
+ - **Language(s) (NLP):** [More Information Needed]
32
+ - **License:** [More Information Needed]
33
+ - **Finetuned from model [optional]:** [More Information Needed]
34
+
35
+ ### Model Sources [optional]
36
+
37
+ <!-- Provide the basic links for the model. -->
38
+
39
+ - **Repository:** [More Information Needed]
40
+ - **Paper [optional]:** [More Information Needed]
41
+ - **Demo [optional]:** [More Information Needed]
42
+
43
+ ## Uses
44
+
45
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
46
+
47
+ ### Direct Use
48
+
49
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
50
+
51
+ [More Information Needed]
52
+
53
+ ### Downstream Use [optional]
54
+
55
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
56
+
57
+ [More Information Needed]
58
+
59
+ ### Out-of-Scope Use
60
+
61
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
62
+
63
+ [More Information Needed]
64
+
65
+ ## Bias, Risks, and Limitations
66
+
67
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
68
+
69
+ [More Information Needed]
70
+
71
+ ### Recommendations
72
+
73
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
74
+
75
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
76
+
77
+ ## How to Get Started with the Model
78
+
79
+ Use the code below to get started with the model.
80
+
81
+ [More Information Needed]
82
+
83
+ ## Training Details
84
+
85
+ ### Training Data
86
+
87
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
88
+
89
+ [More Information Needed]
90
+
91
+ ### Training Procedure
92
+
93
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
94
+
95
+ #### Preprocessing [optional]
96
+
97
+ [More Information Needed]
98
+
99
+
100
+ #### Training Hyperparameters
101
+
102
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
103
+
104
+ #### Speeds, Sizes, Times [optional]
105
+
106
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
107
+
108
+ [More Information Needed]
109
+
110
+ ## Evaluation
111
+
112
+ <!-- This section describes the evaluation protocols and provides the results. -->
113
+
114
+ ### Testing Data, Factors & Metrics
115
+
116
+ #### Testing Data
117
+
118
+ <!-- This should link to a Dataset Card if possible. -->
119
+
120
+ [More Information Needed]
121
+
122
+ #### Factors
123
+
124
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
125
+
126
+ [More Information Needed]
127
+
128
+ #### Metrics
129
+
130
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
131
+
132
+ [More Information Needed]
133
+
134
+ ### Results
135
+
136
+ [More Information Needed]
137
+
138
+ #### Summary
139
+
140
+
141
+
142
+ ## Model Examination [optional]
143
+
144
+ <!-- Relevant interpretability work for the model goes here -->
145
+
146
+ [More Information Needed]
147
+
148
+ ## Environmental Impact
149
+
150
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
151
+
152
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
153
+
154
+ - **Hardware Type:** [More Information Needed]
155
+ - **Hours used:** [More Information Needed]
156
+ - **Cloud Provider:** [More Information Needed]
157
+ - **Compute Region:** [More Information Needed]
158
+ - **Carbon Emitted:** [More Information Needed]
159
+
160
+ ## Technical Specifications [optional]
161
+
162
+ ### Model Architecture and Objective
163
+
164
+ [More Information Needed]
165
+
166
+ ### Compute Infrastructure
167
+
168
+ [More Information Needed]
169
+
170
+ #### Hardware
171
+
172
+ [More Information Needed]
173
+
174
+ #### Software
175
+
176
+ [More Information Needed]
177
+
178
+ ## Citation [optional]
179
+
180
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
181
+
182
+ **BibTeX:**
183
+
184
+ [More Information Needed]
185
+
186
+ **APA:**
187
+
188
+ [More Information Needed]
189
+
190
+ ## Glossary [optional]
191
+
192
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
193
+
194
+ [More Information Needed]
195
+
196
+ ## More Information [optional]
197
+
198
+ [More Information Needed]
199
+
200
+ ## Model Card Authors [optional]
201
+
202
+ [More Information Needed]
203
+
204
+ ## Model Card Contact
205
+
206
+ [More Information Needed]
207
+ ### Framework versions
208
+
209
+ - PEFT 0.18.1
checkpoint-40/adapter_config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "google/functiongemma-270m-it",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 16,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.1",
27
+ "qalora_group_size": 16,
28
+ "r": 8,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "o_proj",
33
+ "down_proj",
34
+ "k_proj",
35
+ "gate_proj",
36
+ "q_proj",
37
+ "v_proj",
38
+ "up_proj"
39
+ ],
40
+ "target_parameters": null,
41
+ "task_type": "CAUSAL_LM",
42
+ "trainable_token_indices": null,
43
+ "use_dora": false,
44
+ "use_qalora": false,
45
+ "use_rslora": false
46
+ }
checkpoint-40/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fbe05ace61c76eb333d8758f388bf2e0fa09459ea940ba63d3325df91f5f661
3
+ size 7626520
checkpoint-40/chat_template.jinja ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- macro format_parameters(properties, required) -%}
2
+ {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
3
+ {%- set ns = namespace(found_first=false) -%}
4
+ {%- for key, value in properties | dictsort -%}
5
+ {%- if key not in standard_keys -%}
6
+ {%- if ns.found_first %},{% endif -%}
7
+ {%- set ns.found_first = true -%}
8
+ {{- key }}:{description:<escape>{{ value['description'] }}<escape>
9
+ {%- if value['type'] | upper == 'STRING' -%}
10
+ {%- if value['enum'] -%}
11
+ ,enum:{{ format_argument(value['enum']) }}
12
+ {%- endif -%}
13
+ {%- elif value['type'] | upper == 'OBJECT' -%}
14
+ ,properties:{
15
+ {%- if value['properties'] is defined and value['properties'] is mapping -%}
16
+ {{- format_parameters(value['properties'], value['required'] | default([])) -}}
17
+ {%- elif value is mapping -%}
18
+ {{- format_parameters(value, value['required'] | default([])) -}}
19
+ {%- endif -%}
20
+ }
21
+ {%- if value['required'] -%}
22
+ ,required:[
23
+ {%- for item in value['required'] | default([]) -%}
24
+ <escape>{{- item -}}<escape>
25
+ {%- if not loop.last %},{% endif -%}
26
+ {%- endfor -%}
27
+ ]
28
+ {%- endif -%}
29
+ {%- elif value['type'] | upper == 'ARRAY' -%}
30
+ {%- if value['items'] is mapping and value['items'] -%}
31
+ ,items:{
32
+ {%- set ns_items = namespace(found_first=false) -%}
33
+ {%- for item_key, item_value in value['items'] | dictsort -%}
34
+ {%- if item_value is not none -%}
35
+ {%- if ns_items.found_first %},{% endif -%}
36
+ {%- set ns_items.found_first = true -%}
37
+ {%- if item_key == 'properties' -%}
38
+ properties:{
39
+ {%- if item_value is mapping -%}
40
+ {{- format_parameters(item_value, value['items']['required'] | default([])) -}}
41
+ {%- endif -%}
42
+ }
43
+ {%- elif item_key == 'required' -%}
44
+ required:[
45
+ {%- for req_item in item_value -%}
46
+ <escape>{{- req_item -}}<escape>
47
+ {%- if not loop.last %},{% endif -%}
48
+ {%- endfor -%}
49
+ ]
50
+ {%- elif item_key == 'type' -%}
51
+ {%- if item_value is string -%}
52
+ type:{{ format_argument(item_value | upper) }}
53
+ {%- else -%}
54
+ type:{{ format_argument(item_value | map('upper') | list) }}
55
+ {%- endif -%}
56
+ {%- else -%}
57
+ {{ item_key }}:{{ format_argument(item_value) }}
58
+ {%- endif -%}
59
+ {%- endif -%}
60
+ {%- endfor -%}
61
+ }
62
+ {%- endif -%}
63
+ {%- endif -%}
64
+ ,type:<escape>{{ value['type'] | upper }}<escape>}
65
+ {%- endif -%}
66
+ {%- endfor -%}
67
+ {%- endmacro -%}
68
+ {% macro format_function_declaration(tool_data) -%}
69
+ declaration:{{- tool_data['function']['name'] -}}
70
+ {description:<escape>{{- tool_data['function']['description'] -}}<escape>
71
+ {%- set params = tool_data['function']['parameters'] -%}
72
+ {%- if params -%}
73
+ ,parameters:{
74
+ {%- if params['properties'] -%}
75
+ properties:{ {{- format_parameters(params['properties'], params['required']) -}} },
76
+ {%- endif -%}
77
+ {%- if params['required'] -%}
78
+ required:[
79
+ {%- for item in params['required'] -%}
80
+ <escape>{{- item -}}<escape>
81
+ {{- ',' if not loop.last -}}
82
+ {%- endfor -%}
83
+ ],
84
+ {%- endif -%}
85
+ {%- if params['type'] -%}
86
+ type:<escape>{{- params['type'] | upper -}}<escape>}
87
+ {%- endif -%}
88
+ {%- endif -%}
89
+ }
90
+ {%- endmacro -%}
91
+ {% macro format_argument(argument, escape_keys=True) -%}
92
+ {%- if argument is string -%}
93
+ {{- '<escape>' + argument + '<escape>' -}}
94
+ {%- elif argument is boolean -%}
95
+ {%- if argument -%}
96
+ {{- 'true' -}}
97
+ {%- else -%}
98
+ {{- 'false' -}}
99
+ {%- endif -%}
100
+ {%- elif argument is mapping -%}
101
+ {{- '{' -}}
102
+ {%- set ns = namespace(found_first=false) -%}
103
+ {%- for key, value in argument | dictsort -%}
104
+ {%- if ns.found_first %},{% endif -%}
105
+ {%- set ns.found_first = true -%}
106
+ {%- if escape_keys -%}
107
+ {{- '<escape>' + key + '<escape>' -}}
108
+ {%- else -%}
109
+ {{- key -}}
110
+ {%- endif -%}
111
+ :{{- format_argument(value, escape_keys=escape_keys) -}}
112
+ {%- endfor -%}
113
+ {{- '}' -}}
114
+ {%- elif argument is sequence -%}
115
+ {{- '[' -}}
116
+ {%- for item in argument -%}
117
+ {{- format_argument(item, escape_keys=escape_keys) -}}
118
+ {%- if not loop.last %},{% endif -%}
119
+ {%- endfor -%}
120
+ {{- ']' -}}
121
+ {%- else -%}
122
+ {{- argument -}}
123
+ {%- endif -%}
124
+ {%- endmacro -%}
125
+ {{ bos_token }}
126
+ {%- set ns = namespace(prev_message_type=None) -%}
127
+ {#- Tool Declarations -#}
128
+ {%- set loop_messages = messages -%}
129
+ {%- if tools or messages[0]['role'] == 'system' or messages[0]['role'] == 'developer' -%}
130
+ {{- '<start_of_turn>developer\n' -}}
131
+ {%- if messages[0]['role'] == 'system' or messages[0]['role'] == 'developer' -%}
132
+ {%- if messages[0]['content'] is string -%}
133
+ {{- messages[0]['content'] | trim -}}
134
+ {%- elif messages[0]['content'] is sequence -%}
135
+ {%- for item in messages[0]['content'] -%}
136
+ {%- if item['type'] == 'text' -%}
137
+ {{- item['text'] | trim -}}
138
+ {%- endif -%}
139
+ {%- endfor -%}
140
+ {%- endif -%}
141
+ {%- set loop_messages = messages[1:] -%}
142
+ {%- endif -%}
143
+ {%- if tools -%}
144
+ {%- for tool in tools %}
145
+ {{- '<start_function_declaration>' -}}
146
+ {{- format_function_declaration(tool) | trim }}
147
+ {{- '<end_function_declaration>' -}}
148
+ {%- endfor %}
149
+ {%- endif -%}
150
+ {{- '<end_of_turn>\n' }}
151
+ {%- endif %}
152
+ {#- Loop through messages. -#}
153
+ {%- for message in loop_messages -%}
154
+ {%- if (message['role'] == 'assistant') -%}
155
+ {#- Rename "assistant" to "model". -#}
156
+ {%- set role = "model" -%}
157
+ {%- else -%}
158
+ {%- set role = message['role'] -%}
159
+ {%- endif -%}
160
+ {%- if role != 'tool' -%}
161
+ {%- if ns.prev_message_type != 'tool_response' -%}
162
+ {{- '<start_of_turn>' + role + '\n' }}
163
+ {%- endif -%}
164
+ {%- set ns.prev_message_type = None -%}
165
+ {%- if 'content' in message and message['content'] is not none -%}
166
+ {%- if message['content'] is string -%}
167
+ {{ message['content'] | trim }}
168
+ {%- elif message['content'] is sequence -%}
169
+ {%- for item in message['content'] -%}
170
+ {%- if item['type'] == 'image' -%}
171
+ {{ '<start_of_image>' }}
172
+ {%- elif item['type'] == 'text' -%}
173
+ {{ item['text'] | trim }}
174
+ {%- endif -%}
175
+ {%- endfor -%}
176
+ {%- else -%}
177
+ {{ raise_exception("Invalid content type in user/assistant message") }}
178
+ {%- endif -%}
179
+ {%- set ns.prev_message_type = 'content' -%}
180
+ {%- endif -%}
181
+ {%- if 'tool_calls' in message and message['tool_calls'] and message['tool_calls'] is iterable -%}
182
+ {#- Tool Calls -#}
183
+ {%- for tool_call in message['tool_calls'] -%}
184
+ {% set function = tool_call['function'] %}
185
+ {{- '<start_function_call>call:' + function['name'] + '{' -}}
186
+ {%- if 'arguments' in function -%}
187
+ {%- if function['arguments'] is mapping -%}
188
+ {%- set ns = namespace(found_first=false) -%}
189
+ {%- for key, value in function['arguments'] | dictsort -%}
190
+ {%- if ns.found_first %},{% endif -%}
191
+ {%- set ns.found_first = true -%}
192
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
193
+ {%- endfor -%}
194
+ {%- elif function['arguments'] is string -%}
195
+ {# This handles string-JSON, just in case #}
196
+ {{ function['arguments'] }}
197
+ {%- endif %}
198
+ {%- endif -%}
199
+ {{- '}<end_function_call>' -}}
200
+ {%- endfor -%}
201
+ {%- if loop.last -%}
202
+ {{ '<start_function_response>' }}
203
+ {%- endif -%}
204
+ {%- set ns.prev_message_type = 'tool_call' -%}
205
+ {%- endif -%}
206
+ {%- else -%}
207
+ {#- Tool Responses -#}
208
+ {%- if 'content' in message and message['content'] -%}
209
+ {%- if message['content'] is mapping -%}
210
+ {%- if 'name' in message['content'] and 'response' in message['content'] -%}
211
+ {{ '<start_function_response>response:' + message['content']['name'] | trim + '{' }}
212
+ {%- set response_ns = namespace(found_first=false) -%}
213
+ {%- for key, value in message['content']['response'] | dictsort -%}
214
+ {%- if response_ns.found_first %},{% endif -%}
215
+ {%- set response_ns.found_first = true -%}
216
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
217
+ {%- endfor -%}
218
+ {{- '}<end_function_response>' -}}
219
+ {%- elif 'name' in message -%}
220
+ {{ '<start_function_response>response:' + message['name'] | trim + '{' }}
221
+ {%- set response_ns = namespace(found_first=false) -%}
222
+ {%- for key, value in message['content'] | dictsort -%}
223
+ {%- if response_ns.found_first %},{% endif -%}
224
+ {%- set response_ns.found_first = true -%}
225
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
226
+ {%- endfor -%}
227
+ {{- '}<end_function_response>' -}}
228
+ {%- else -%}
229
+ {{ raise_exception("Invalid tool response mapping: must contain 'name' and 'response' keys, or 'name' must be in the message.") }}
230
+ {%- endif -%}
231
+ {%- elif message['content'] is string -%}
232
+ {%- if 'name' in message -%}
233
+ {{ '<start_function_response>response:' + message['name'] | trim + '{value:' + format_argument(message['content'], escape_keys=False) + '}<end_function_response>' }}
234
+ {%- else -%}
235
+ {{ raise_exception("Invalid tool response: 'name' must be provided.") }}
236
+ {%- endif -%}
237
+ {%- elif message['content'] is sequence -%}
238
+ {%- for item in message['content'] -%}
239
+ {%- if item is mapping -%}
240
+ {%- if 'name' in item and 'response' in item -%}
241
+ {{ '<start_function_response>response:' + item['name'] | trim + '{' }}
242
+ {%- set response_ns = namespace(found_first=false) -%}
243
+ {%- for key, value in item['response'] | dictsort -%}
244
+ {%- if response_ns.found_first %},{% endif -%}
245
+ {%- set response_ns.found_first = true -%}
246
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
247
+ {%- endfor -%}
248
+ {{- '}<end_function_response>' -}}
249
+ {%- elif 'name' in message -%}
250
+ {{ '<start_function_response>response:' + message['name'] | trim + '{' }}
251
+ {%- set response_ns = namespace(found_first=false) -%}
252
+ {%- for key, value in item | dictsort -%}
253
+ {%- if response_ns.found_first %},{% endif -%}
254
+ {%- set response_ns.found_first = true -%}
255
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
256
+ {%- endfor -%}
257
+ {{- '}<end_function_response>' -}}
258
+ {%- else -%}
259
+ {{ raise_exception("Invalid tool response mapping: must contain 'name' and 'response' keys, or 'name' must be in the message.") }}
260
+ {%- endif -%}
261
+ {%- else -%}
262
+ {{ raise_exception("Invalid tool response message: multiple responses must all be mappings") }}
263
+ {%- endif -%}
264
+ {%- endfor -%}
265
+ {%- else -%}
266
+ {{ raise_exception("Invalid content type in tool message: must be mapping, sequence of mappings, or string.") }}
267
+ {%- endif -%}
268
+ {%- endif -%}
269
+ {%- set ns.prev_message_type = 'tool_response' -%}
270
+ {%- endif -%}
271
+ {%- if ns.prev_message_type not in ['tool_call', 'tool_response'] -%}
272
+ {{ '<end_of_turn>\n' }}
273
+ {%- endif -%}
274
+ {%- endfor -%}
275
+ {%- if add_generation_prompt -%}
276
+ {%- if ns.prev_message_type != 'tool_response' -%}
277
+ {{- '<start_of_turn>model\n' -}}
278
+ {%- endif -%}
279
+ {%- endif -%}
checkpoint-40/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8285478ede1915124e35284c5c7c3317590b9dc0cc5e2f1f3841ea1dc5e3af7
3
+ size 1768
checkpoint-40/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6dfa39228c3a957cbec65e7f169b5cf55f9213dafb7c18ff61b2cfe80d9f668
3
+ size 14244
checkpoint-40/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a60d787af76177708255c5367ed87feece6f62e47856d6099f615d1167fc7782
3
+ size 988
checkpoint-40/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bce2d3a2f8f216440846a555f3fa52ce4ec5a8eae429c1264154fbef75bbeae
3
+ size 1064
checkpoint-40/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3655797f9d732b7dc08b4225200697af8e37d94b74711d9b1d8166feb953578
3
+ size 33384774