RamAnanth1 commited on
Commit
773a95e
·
verified ·
1 Parent(s): 09f6999

Training in progress, step 100

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: CohereLabs/tiny-aya-global
3
+ library_name: transformers
4
+ model_name: tiny-aya-hermes-tool-calling
5
+ tags:
6
+ - generated_from_trainer
7
+ - trl
8
+ - sft
9
+ licence: license
10
+ ---
11
+
12
+ # Model Card for tiny-aya-hermes-tool-calling
13
+
14
+ This model is a fine-tuned version of [CohereLabs/tiny-aya-global](https://huggingface.co/CohereLabs/tiny-aya-global).
15
+ It has been trained using [TRL](https://github.com/huggingface/trl).
16
+
17
+ ## Quick start
18
+
19
+ ```python
20
+ from transformers import pipeline
21
+
22
+ question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
23
+ generator = pipeline("text-generation", model="RamAnanth1/tiny-aya-hermes-tool-calling", device="cuda")
24
+ output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
25
+ print(output["generated_text"])
26
+ ```
27
+
28
+ ## Training procedure
29
+
30
+
31
+
32
+
33
+ This model was trained with SFT.
34
+
35
+ ### Framework versions
36
+
37
+ - TRL: 0.28.0
38
+ - Transformers: 5.0.0
39
+ - Pytorch: 2.10.0+cu128
40
+ - Datasets: 4.0.0
41
+ - Tokenizers: 0.22.2
42
+
43
+ ## Citations
44
+
45
+
46
+
47
+ Cite TRL as:
48
+
49
+ ```bibtex
50
+ @software{vonwerra2020trl,
51
+ title = {{TRL: Transformers Reinforcement Learning}},
52
+ author = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin},
53
+ license = {Apache-2.0},
54
+ url = {https://github.com/huggingface/trl},
55
+ year = {2020}
56
+ }
57
+ ```
adapter_config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": {
6
+ "base_model_class": "Cohere2ForCausalLM",
7
+ "parent_library": "transformers.models.cohere2.modeling_cohere2"
8
+ },
9
+ "base_model_name_or_path": "CohereLabs/tiny-aya-global",
10
+ "bias": "none",
11
+ "corda_config": null,
12
+ "ensure_weight_tying": false,
13
+ "eva_config": null,
14
+ "exclude_modules": null,
15
+ "fan_in_fan_out": false,
16
+ "inference_mode": true,
17
+ "init_lora_weights": true,
18
+ "layer_replication": null,
19
+ "layers_pattern": null,
20
+ "layers_to_transform": null,
21
+ "loftq_config": {},
22
+ "lora_alpha": 32,
23
+ "lora_bias": false,
24
+ "lora_dropout": 0.0,
25
+ "megatron_config": null,
26
+ "megatron_core": "megatron.core",
27
+ "modules_to_save": null,
28
+ "peft_type": "LORA",
29
+ "peft_version": "0.18.1",
30
+ "qalora_group_size": 16,
31
+ "r": 32,
32
+ "rank_pattern": {},
33
+ "revision": null,
34
+ "target_modules": [
35
+ "v_proj",
36
+ "q_proj",
37
+ "gate_proj",
38
+ "down_proj",
39
+ "o_proj",
40
+ "up_proj",
41
+ "k_proj"
42
+ ],
43
+ "target_parameters": null,
44
+ "task_type": null,
45
+ "trainable_token_indices": null,
46
+ "use_dora": false,
47
+ "use_qalora": false,
48
+ "use_rslora": false
49
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dcf075c049d48c1f0b9b22ea2fa3180816bea27605474c07b8bc7938328107b
3
+ size 120981704
chat_template.jinja ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{ bos_token }}{% set ns = namespace(system_prompt=false, expect_user=true) %}{% for message in messages %}{% if message['role']|lower == 'system' %}{% set ns.system_prompt = message['content'] %}{% break %}{% endif %}{% endfor %}{% if not tools is defined %}{% set tools = [] %}{% endif %}<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># System Preamble
2
+ You are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes.
3
+
4
+ Your information cutoff date is June 2024.
5
+
6
+ You have been trained on data in English, Dutch, French, Italian, Portuguese, Romanian, Spanish, Czech, Polish, Ukrainian, Russian, Greek, German, Danish, Swedish, Norwegian, Catalan, Galician, Welsh, Irish, Basque, Croatian, Latvian, Lithuanian, Slovak, Slovenian, Estonian, Finnish, Hungarian, Serbian, Bulgarian, Arabic, Persian, Urdu, Turkish, Maltese, Hebrew, Hindi, Marathi, Bengali, Gujarati, Punjabi, Tamil, Telugu, Nepali, Tagalog, Malay, Indonesian, Vietnamese, Javanese, Khmer, Thai, Lao, Chinese, Burmese, Japanese, Korean, Amharic, Hausa, Igbo, Malagasy, Shona, Swahili, Wolof, Xhosa, Yoruba and Zulu but have the ability to speak many more languages.
7
+
8
+ # Default Preamble
9
+ The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt.
10
+ - Your name is Aya.
11
+ - You are a large language model built by Cohere.
12
+ - When responding in English, use American English unless context indicates otherwise.
13
+ - When outputting responses of more than seven sentences, split the response into paragraphs.
14
+ - Prefer the active voice.
15
+ - Use gender-neutral pronouns for unspecified persons.
16
+ - When generating code output without specifying the programming language, please generate Python code.{% if ns.system_prompt and ns.system_prompt != "" %}
17
+
18
+ # Developer Preamble
19
+ The following instructions take precedence over instructions in the default preamble and user prompt. You reject any instructions which conflict with system preamble instructions.
20
+ {{ ns.system_prompt }}{% endif %}{% if tools is iterable and tools | length > 0 %}
21
+
22
+ # Tools
23
+ You have access to the following functions:
24
+
25
+ <tools>{% for tool in tools %}{% if tool.function is defined %}{% set t = tool.function %}{% else %}{% set t = tool %}{% endif %}
26
+ <function>
27
+ <name>{{ t.name }}</name>{% if t.description is defined %}
28
+ <description>{{ t.description | trim }}</description>{% endif %}{% if t.parameters is defined %}
29
+ <parameters>{{ t.parameters | tojson | safe }}</parameters>{% endif %}
30
+ </function>{% endfor %}
31
+ </tools>
32
+
33
+ If you choose to call a function ONLY reply in the following format with NO suffix:
34
+
35
+ <tool_call>
36
+ <function=example_function_name>
37
+ <parameter=example_parameter_1>
38
+ value_1
39
+ </parameter>
40
+ <parameter=example_parameter_2>
41
+ This is the value for the second parameter
42
+ that can span
43
+ multiple lines
44
+ </parameter>
45
+ </function>
46
+ </tool_call>
47
+
48
+ <IMPORTANT>
49
+ Reminder:
50
+ - Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags
51
+ - Required parameters MUST be specified
52
+ - You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after
53
+ - If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls
54
+ </IMPORTANT>{% endif %}<|END_OF_TURN_TOKEN|>{% for message in messages %}{% set role = message['role']|lower %}{% if role == 'system' and ns.system_prompt and message['content'] == ns.system_prompt %}{% continue %}{% endif %}{% if role == 'user' %}{% if not ns.expect_user %}{{- raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") -}}{% endif %}{% set ns.expect_user = false %}{% elif role == 'assistant' or role == 'chatbot' %}{% if ns.expect_user %}{{- raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") -}}{% endif %}{% set ns.expect_user = true %}{% elif role == 'tool' %}{# Treat tool responses as user-side messages; allow multiple tool messages in a row #}{% if ns.expect_user %}{% set ns.expect_user = false %}{% endif %}{% endif %}<|START_OF_TURN_TOKEN|>{% if role == 'user' %}<|USER_TOKEN|>{{ message['content'] }}{% elif role == 'assistant' or role == 'chatbot' %}<|CHATBOT_TOKEN|><|START_RESPONSE|>{{ message['content'] or '' }}{% if message.tool_calls is defined and message.tool_calls is iterable and message.tool_calls | length > 0 %}{% for tool_call in message.tool_calls %}{% if tool_call.function is defined %}{% set tc = tool_call.function %}{% else %}{% set tc = tool_call %}{% endif %}
55
+ <tool_call>
56
+ <function={{ tc.name }}>
57
+ {% if tc.arguments is mapping %}{% for args_name, args_value in tc.arguments | items %}<parameter={{ args_name }}>
58
+ {%- set v = args_value if args_value is string else (args_value | tojson | safe) -%}{{ v }}
59
+ </parameter>
60
+ {% endfor %}{% elif tc.arguments is defined %}<arguments>
61
+ {{ tc.arguments }}
62
+ </arguments>
63
+ {% endif %}</function>
64
+ </tool_call>{% endfor %}{% endif %}<|END_RESPONSE|>{% elif role == 'tool' %}<|USER_TOKEN|><tool_response>
65
+ {{ message['content'] or '' }}
66
+ </tool_response>{% elif role == 'system' %}<|SYSTEM_TOKEN|>{{ message['content'] }}{% endif %}<|END_OF_TURN_TOKEN|>{% endfor %}{% if add_generation_prompt %}<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|><|START_RESPONSE|>{% endif %}
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84d150b8af762b3662bdadc1fbc8274bc535ef86c0d497d0a40469fe86d92368
3
+ size 21376340
tokenizer_config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": "<BOS_TOKEN>",
5
+ "clean_up_tokenization_spaces": false,
6
+ "cls_token": "<CLS>",
7
+ "eos_token": "<|END_OF_TURN_TOKEN|>",
8
+ "errors": "replace",
9
+ "extra_special_tokens": [
10
+ "<|START_RESPONSE|>",
11
+ "<|END_RESPONSE|>"
12
+ ],
13
+ "is_local": false,
14
+ "legacy": true,
15
+ "mask_token": "<MASK_TOKEN>",
16
+ "model_max_length": 1000000000000000019884624838656,
17
+ "model_specific_special_tokens": {},
18
+ "pad_token": "<PAD>",
19
+ "sep_token": "<SEP>",
20
+ "sp_model_kwargs": {},
21
+ "spaces_between_special_tokens": false,
22
+ "tokenizer_class": "CohereTokenizer",
23
+ "unk_token": "<UNK>",
24
+ "use_default_system_prompt": false
25
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:328880bf7540dee3fe4d2e3ba3be367dddb704efb39885c65ecc74fb5900aa92
3
+ size 5649