GTKING commited on
Commit
2d850e9
·
1 Parent(s): dd9a47f

Training in progress, step 48

Browse files
README.md CHANGED
@@ -1,3 +1,58 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: meta-llama/Llama-3.2-1B-Instruct
3
+ library_name: transformers
4
+ model_name: ZFusionAI_Hacker
5
+ tags:
6
+ - generated_from_trainer
7
+ - trl
8
+ - sft
9
+ licence: license
10
+ ---
11
+
12
+ # Model Card for ZFusionAI_Hacker
13
+
14
+ This model is a fine-tuned version of [meta-llama/Llama-3.2-1B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct).
15
+ It has been trained using [TRL](https://github.com/huggingface/trl).
16
+
17
+ ## Quick start
18
+
19
+ ```python
20
+ from transformers import pipeline
21
+
22
+ question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
23
+ generator = pipeline("text-generation", model="GTKING/ZFusionAI_Hacker", device="cuda")
24
+ output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
25
+ print(output["generated_text"])
26
+ ```
27
+
28
+ ## Training procedure
29
+
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/gamingking9025-muthayammal-college-of-engineering/huggingface/runs/ajpft44z)
31
+
32
+
33
+ This model was trained with SFT.
34
+
35
+ ### Framework versions
36
+
37
+ - TRL: 0.26.2
38
+ - Transformers: 4.57.1
39
+ - Pytorch: 2.8.0+cu126
40
+ - Datasets: 4.4.1
41
+ - Tokenizers: 0.22.1
42
+
43
+ ## Citations
44
+
45
+
46
+
47
+ Cite TRL as:
48
+
49
+ ```bibtex
50
+ @misc{vonwerra2022trl,
51
+ title = {{TRL: Transformer Reinforcement Learning}},
52
+ author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec},
53
+ year = 2020,
54
+ journal = {GitHub repository},
55
+ publisher = {GitHub},
56
+ howpublished = {\url{https://github.com/huggingface/trl}}
57
+ }
58
+ ```
adapter_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "meta-llama/Llama-3.2-1B-Instruct",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 32,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.0,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 16,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "q_proj",
29
+ "k_proj",
30
+ "v_proj",
31
+ "o_proj"
32
+ ],
33
+ "target_parameters": null,
34
+ "task_type": "CAUSAL_LM",
35
+ "trainable_token_indices": null,
36
+ "use_dora": false,
37
+ "use_qalora": false,
38
+ "use_rslora": false
39
+ }
chat_template.jinja CHANGED
@@ -1,73 +1,93 @@
1
-
2
  {{- bos_token }}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
- {# --- Extract system message if present --- #}
5
- {%- if messages and messages[0]['role'] == 'system' %}
6
- {%- set system_message = messages[0]['content'] | trim %}
7
  {%- set messages = messages[1:] %}
8
  {%- else %}
9
  {%- set system_message = "" %}
10
  {%- endif %}
11
 
12
- {# --- System block (minimal) --- #}
13
- {{- "<|start_header_id|>system<|end_header_id|>
14
-
15
- " }}
 
 
 
 
 
 
 
 
 
 
 
 
16
  {{- system_message }}
17
  {{- "<|eot_id|>" }}
18
 
19
- {# --- Optional tool definitions (in user message) --- #}
20
- {%- if tools is defined and tools is not none %}
21
- {%- set first_user = messages[0]['content'] | trim %}
22
- {%- set messages = messages[1:] %}
23
-
24
- {{- "<|start_header_id|>user<|end_header_id|>
25
-
26
- " }}
27
- {{- "You have access to the following functions.
28
- " }}
29
- {{- "Respond ONLY with a JSON function call.
30
-
31
- " }}
32
-
33
  {%- for t in tools %}
34
- {{- t | tojson(indent=2) }}
35
- {{- "
36
-
37
- " }}
38
  {%- endfor %}
39
-
40
- {{- first_user }}
41
- {{- "<|eot_id|>" }}
42
  {%- endif %}
43
 
44
- {# --- Remaining messages --- #}
45
  {%- for message in messages %}
46
- {%- if message.role in ["user", "assistant"] %}
47
- {{- "<|start_header_id|>" + message.role + "<|end_header_id|>
48
-
49
- " }}
50
- {{- message.content | trim }}
51
- {{- "<|eot_id|>" }}
52
- {%- elif "tool_calls" in message %}
53
- {%- set call = message.tool_calls[0].function %}
54
- {{- "<|start_header_id|>assistant<|end_header_id|>
55
-
56
- " }}
57
- {{- '{"name": "' + call.name + '", "parameters": ' + (call.arguments | tojson) + '}' }}
58
  {{- "<|eot_id|>" }}
59
- {%- elif message.role == "tool" %}
60
- {{- "<|start_header_id|>tool<|end_header_id|>
61
-
62
- " }}
63
- {{- message.content }}
 
 
64
  {{- "<|eot_id|>" }}
65
  {%- endif %}
66
  {%- endfor %}
67
-
68
- {# --- Generation prompt --- #}
69
  {%- if add_generation_prompt %}
70
- {{- "<|start_header_id|>assistant<|end_header_id|>
71
-
72
- " }}
73
  {%- endif %}
 
 
1
  {{- bos_token }}
2
+ {%- if custom_tools is defined %}
3
+ {%- set tools = custom_tools %}
4
+ {%- endif %}
5
+ {%- if not tools_in_user_message is defined %}
6
+ {%- set tools_in_user_message = true %}
7
+ {%- endif %}
8
+ {%- if not date_string is defined %}
9
+ {%- if strftime_now is defined %}
10
+ {%- set date_string = strftime_now("%d %b %Y") %}
11
+ {%- else %}
12
+ {%- set date_string = "26 Jul 2024" %}
13
+ {%- endif %}
14
+ {%- endif %}
15
+ {%- if not tools is defined %}
16
+ {%- set tools = none %}
17
+ {%- endif %}
18
 
19
+ {#- This block extracts the system message, so we can slot it into the right place. #}
20
+ {%- if messages[0]['role'] == 'system' %}
21
+ {%- set system_message = messages[0]['content']|trim %}
22
  {%- set messages = messages[1:] %}
23
  {%- else %}
24
  {%- set system_message = "" %}
25
  {%- endif %}
26
 
27
+ {#- System message #}
28
+ {{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
29
+ {%- if tools is not none %}
30
+ {{- "Environment: ipython\n" }}
31
+ {%- endif %}
32
+ {{- "Cutting Knowledge Date: December 2023\n" }}
33
+ {{- "Today Date: " + date_string + "\n\n" }}
34
+ {%- if tools is not none and not tools_in_user_message %}
35
+ {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
36
+ {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
37
+ {{- "Do not use variables.\n\n" }}
38
+ {%- for t in tools %}
39
+ {{- t | tojson(indent=4) }}
40
+ {{- "\n\n" }}
41
+ {%- endfor %}
42
+ {%- endif %}
43
  {{- system_message }}
44
  {{- "<|eot_id|>" }}
45
 
46
+ {#- Custom tools are passed in a user message with some extra guidance #}
47
+ {%- if tools_in_user_message and not tools is none %}
48
+ {#- Extract the first user message so we can plug it in here #}
49
+ {%- if messages | length != 0 %}
50
+ {%- set first_user_message = messages[0]['content']|trim %}
51
+ {%- set messages = messages[1:] %}
52
+ {%- else %}
53
+ {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
54
+ {%- endif %}
55
+ {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
56
+ {{- "Given the following functions, please respond with a JSON for a function call " }}
57
+ {{- "with its proper arguments that best answers the given prompt.\n\n" }}
58
+ {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
59
+ {{- "Do not use variables.\n\n" }}
60
  {%- for t in tools %}
61
+ {{- t | tojson(indent=4) }}
62
+ {{- "\n\n" }}
 
 
63
  {%- endfor %}
64
+ {{- first_user_message + "<|eot_id|>"}}
 
 
65
  {%- endif %}
66
 
 
67
  {%- for message in messages %}
68
+ {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
69
+ {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}
70
+ {%- elif 'tool_calls' in message %}
71
+ {%- if not message.tool_calls|length == 1 %}
72
+ {{- raise_exception("This model only supports single tool-calls at once!") }}
73
+ {%- endif %}
74
+ {%- set tool_call = message.tool_calls[0].function %}
75
+ {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
76
+ {{- '{"name": "' + tool_call.name + '", ' }}
77
+ {{- '"parameters": ' }}
78
+ {{- tool_call.arguments | tojson }}
79
+ {{- "}" }}
80
  {{- "<|eot_id|>" }}
81
+ {%- elif message.role == "tool" or message.role == "ipython" %}
82
+ {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
83
+ {%- if message.content is mapping or message.content is iterable %}
84
+ {{- message.content | tojson }}
85
+ {%- else %}
86
+ {{- message.content }}
87
+ {%- endif %}
88
  {{- "<|eot_id|>" }}
89
  {%- endif %}
90
  {%- endfor %}
 
 
91
  {%- if add_generation_prompt %}
92
+ {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
 
 
93
  {%- endif %}
special_tokens_map.json CHANGED
@@ -12,6 +12,5 @@
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
- },
16
- "pad_token": "<|eot_id|>"
17
  }
 
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
+ }
 
16
  }
tokenizer_config.json CHANGED
@@ -2058,6 +2058,5 @@
2058
  "attention_mask"
2059
  ],
2060
  "model_max_length": 131072,
2061
- "pad_token": "<|eot_id|>",
2062
  "tokenizer_class": "PreTrainedTokenizerFast"
2063
  }
 
2058
  "attention_mask"
2059
  ],
2060
  "model_max_length": 131072,
 
2061
  "tokenizer_class": "PreTrainedTokenizerFast"
2062
  }