gate369 commited on
Commit
620813f
·
verified ·
1 Parent(s): 0714755

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
chat_template.jinja ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{- bos_token }}
2
+ {%- if custom_tools is defined %}
3
+ {%- set tools = custom_tools %}
4
+ {%- endif %}
5
+ {%- if not tools_in_user_message is defined %}
6
+ {%- set tools_in_user_message = true %}
7
+ {%- endif %}
8
+ {%- if not date_string is defined %}
9
+ {%- if strftime_now is defined %}
10
+ {%- set date_string = strftime_now("%d %b %Y") %}
11
+ {%- else %}
12
+ {%- set date_string = "26 Jul 2024" %}
13
+ {%- endif %}
14
+ {%- endif %}
15
+ {%- if not tools is defined %}
16
+ {%- set tools = none %}
17
+ {%- endif %}
18
+
19
+ {#- Custom tools are passed in a user message with some extra guidance #}
20
+ {%- if tools_in_user_message and not tools is none %}
21
+ {#- Extract the first user message so we can plug it in here #}
22
+ {%- if messages | length != 0 %}
23
+ {%- set first_user_message = messages[0]['content']|trim %}
24
+ {%- set messages = messages[1:] %}
25
+ {%- else %}
26
+ {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
27
+ {%- endif %}
28
+ {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
29
+ {{- "Given the following functions, please respond with a JSON for a function call " }}
30
+ {{- "with its proper arguments that best answers the given prompt.\n\n" }}
31
+ {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
32
+ {{- "Do not use variables.\n\n" }}
33
+ {%- for t in tools %}
34
+ {{- t | tojson(indent=4) }}
35
+ {{- "\n\n" }}
36
+ {%- endfor %}
37
+ {{- first_user_message + "<|eot_id|>"}}
38
+ {%- endif %}
39
+
40
+ {%- for message in messages %}
41
+ {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
42
+ {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}
43
+ {%- elif 'tool_calls' in message %}
44
+ {%- if not message.tool_calls|length == 1 %}
45
+ {{- raise_exception("This model only supports single tool-calls at once!") }}
46
+ {%- endif %}
47
+ {%- set tool_call = message.tool_calls[0].function %}
48
+ {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
49
+ {{- '{"name": "' + tool_call.name + '", ' }}
50
+ {{- '"parameters": ' }}
51
+ {{- tool_call.arguments | tojson }}
52
+ {{- "}" }}
53
+ {{- "<|eot_id|>" }}
54
+ {%- elif message.role == "tool" or message.role == "ipython" %}
55
+ {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
56
+ {%- if message.content is mapping or message.content is iterable %}
57
+ {{- message.content | tojson }}
58
+ {%- else %}
59
+ {{- message.content }}
60
+ {%- endif %}
61
+ {{- "<|eot_id|>" }}
62
+ {%- endif %}
63
+ {%- endfor %}
64
+ {%- if add_generation_prompt %}
65
+ {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
66
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "vocab_size": 128257,
3
+ "dim": 1024,
4
+ "num_layers": 6,
5
+ "num_heads": 16,
6
+ "max_recursion": 4,
7
+ "d_ff": 1024,
8
+ "d_k": 64,
9
+ "ffn_expansion": 4,
10
+ "max_position_embeddings": 1024,
11
+ "balancing_weight": 0.01,
12
+ "temperature": 1.0,
13
+ "window_size": 2048,
14
+ "model_type": "MoR",
15
+ "architecture": "MixtureOfRecursions",
16
+ "hidden_act": "gelu"
17
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f9e5eaa2e6c22d2dcba17d0556e06bf0ccb7dafe80b700268021296f1a8fcdb
3
+ size 1212449768
model.safetensors.index.json ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 1212438544
4
+ },
5
+ "weight_map": {
6
+ "embed.weight": "model.safetensors",
7
+ "pos_embed.weight": "model.safetensors",
8
+ "first_layer.0.weight": "model.safetensors",
9
+ "first_layer.0.bias": "model.safetensors",
10
+ "first_layer.2.weight": "model.safetensors",
11
+ "first_layer.2.bias": "model.safetensors",
12
+ "recursive_layers.0.w_q.weight": "model.safetensors",
13
+ "recursive_layers.0.w_q.bias": "model.safetensors",
14
+ "recursive_layers.0.w_k.weight": "model.safetensors",
15
+ "recursive_layers.0.w_k.bias": "model.safetensors",
16
+ "recursive_layers.0.w_v.weight": "model.safetensors",
17
+ "recursive_layers.0.w_v.bias": "model.safetensors",
18
+ "recursive_layers.0.attn_out.weight": "model.safetensors",
19
+ "recursive_layers.0.attn_out.bias": "model.safetensors",
20
+ "recursive_layers.0.ffn.0.weight": "model.safetensors",
21
+ "recursive_layers.0.ffn.0.bias": "model.safetensors",
22
+ "recursive_layers.0.ffn.2.weight": "model.safetensors",
23
+ "recursive_layers.0.ffn.2.bias": "model.safetensors",
24
+ "recursive_layers.0.norm1.weight": "model.safetensors",
25
+ "recursive_layers.0.norm1.bias": "model.safetensors",
26
+ "recursive_layers.0.norm2.weight": "model.safetensors",
27
+ "recursive_layers.0.norm2.bias": "model.safetensors",
28
+ "recursive_layers.1.w_q.weight": "model.safetensors",
29
+ "recursive_layers.1.w_q.bias": "model.safetensors",
30
+ "recursive_layers.1.w_k.weight": "model.safetensors",
31
+ "recursive_layers.1.w_k.bias": "model.safetensors",
32
+ "recursive_layers.1.w_v.weight": "model.safetensors",
33
+ "recursive_layers.1.w_v.bias": "model.safetensors",
34
+ "recursive_layers.1.attn_out.weight": "model.safetensors",
35
+ "recursive_layers.1.attn_out.bias": "model.safetensors",
36
+ "recursive_layers.1.ffn.0.weight": "model.safetensors",
37
+ "recursive_layers.1.ffn.0.bias": "model.safetensors",
38
+ "recursive_layers.1.ffn.2.weight": "model.safetensors",
39
+ "recursive_layers.1.ffn.2.bias": "model.safetensors",
40
+ "recursive_layers.1.norm1.weight": "model.safetensors",
41
+ "recursive_layers.1.norm1.bias": "model.safetensors",
42
+ "recursive_layers.1.norm2.weight": "model.safetensors",
43
+ "recursive_layers.1.norm2.bias": "model.safetensors",
44
+ "recursive_layers.2.w_q.weight": "model.safetensors",
45
+ "recursive_layers.2.w_q.bias": "model.safetensors",
46
+ "recursive_layers.2.w_k.weight": "model.safetensors",
47
+ "recursive_layers.2.w_k.bias": "model.safetensors",
48
+ "recursive_layers.2.w_v.weight": "model.safetensors",
49
+ "recursive_layers.2.w_v.bias": "model.safetensors",
50
+ "recursive_layers.2.attn_out.weight": "model.safetensors",
51
+ "recursive_layers.2.attn_out.bias": "model.safetensors",
52
+ "recursive_layers.2.ffn.0.weight": "model.safetensors",
53
+ "recursive_layers.2.ffn.0.bias": "model.safetensors",
54
+ "recursive_layers.2.ffn.2.weight": "model.safetensors",
55
+ "recursive_layers.2.ffn.2.bias": "model.safetensors",
56
+ "recursive_layers.2.norm1.weight": "model.safetensors",
57
+ "recursive_layers.2.norm1.bias": "model.safetensors",
58
+ "recursive_layers.2.norm2.weight": "model.safetensors",
59
+ "recursive_layers.2.norm2.bias": "model.safetensors",
60
+ "recursive_layers.3.w_q.weight": "model.safetensors",
61
+ "recursive_layers.3.w_q.bias": "model.safetensors",
62
+ "recursive_layers.3.w_k.weight": "model.safetensors",
63
+ "recursive_layers.3.w_k.bias": "model.safetensors",
64
+ "recursive_layers.3.w_v.weight": "model.safetensors",
65
+ "recursive_layers.3.w_v.bias": "model.safetensors",
66
+ "recursive_layers.3.attn_out.weight": "model.safetensors",
67
+ "recursive_layers.3.attn_out.bias": "model.safetensors",
68
+ "recursive_layers.3.ffn.0.weight": "model.safetensors",
69
+ "recursive_layers.3.ffn.0.bias": "model.safetensors",
70
+ "recursive_layers.3.ffn.2.weight": "model.safetensors",
71
+ "recursive_layers.3.ffn.2.bias": "model.safetensors",
72
+ "recursive_layers.3.norm1.weight": "model.safetensors",
73
+ "recursive_layers.3.norm1.bias": "model.safetensors",
74
+ "recursive_layers.3.norm2.weight": "model.safetensors",
75
+ "recursive_layers.3.norm2.bias": "model.safetensors",
76
+ "recursive_layers.4.w_q.weight": "model.safetensors",
77
+ "recursive_layers.4.w_q.bias": "model.safetensors",
78
+ "recursive_layers.4.w_k.weight": "model.safetensors",
79
+ "recursive_layers.4.w_k.bias": "model.safetensors",
80
+ "recursive_layers.4.w_v.weight": "model.safetensors",
81
+ "recursive_layers.4.w_v.bias": "model.safetensors",
82
+ "recursive_layers.4.attn_out.weight": "model.safetensors",
83
+ "recursive_layers.4.attn_out.bias": "model.safetensors",
84
+ "recursive_layers.4.ffn.0.weight": "model.safetensors",
85
+ "recursive_layers.4.ffn.0.bias": "model.safetensors",
86
+ "recursive_layers.4.ffn.2.weight": "model.safetensors",
87
+ "recursive_layers.4.ffn.2.bias": "model.safetensors",
88
+ "recursive_layers.4.norm1.weight": "model.safetensors",
89
+ "recursive_layers.4.norm1.bias": "model.safetensors",
90
+ "recursive_layers.4.norm2.weight": "model.safetensors",
91
+ "recursive_layers.4.norm2.bias": "model.safetensors",
92
+ "recursive_layers.5.w_q.weight": "model.safetensors",
93
+ "recursive_layers.5.w_q.bias": "model.safetensors",
94
+ "recursive_layers.5.w_k.weight": "model.safetensors",
95
+ "recursive_layers.5.w_k.bias": "model.safetensors",
96
+ "recursive_layers.5.w_v.weight": "model.safetensors",
97
+ "recursive_layers.5.w_v.bias": "model.safetensors",
98
+ "recursive_layers.5.attn_out.weight": "model.safetensors",
99
+ "recursive_layers.5.attn_out.bias": "model.safetensors",
100
+ "recursive_layers.5.ffn.0.weight": "model.safetensors",
101
+ "recursive_layers.5.ffn.0.bias": "model.safetensors",
102
+ "recursive_layers.5.ffn.2.weight": "model.safetensors",
103
+ "recursive_layers.5.ffn.2.bias": "model.safetensors",
104
+ "recursive_layers.5.norm1.weight": "model.safetensors",
105
+ "recursive_layers.5.norm1.bias": "model.safetensors",
106
+ "recursive_layers.5.norm2.weight": "model.safetensors",
107
+ "recursive_layers.5.norm2.bias": "model.safetensors",
108
+ "router.linear.0.weight": "model.safetensors",
109
+ "router.linear.0.bias": "model.safetensors",
110
+ "router.linear.2.weight": "model.safetensors",
111
+ "router.linear.2.bias": "model.safetensors",
112
+ "final_norm.weight": "model.safetensors",
113
+ "final_norm.bias": "model.safetensors",
114
+ "head.weight": "model.safetensors"
115
+ }
116
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|begin_of_text|>",
3
+ "eos_token": "<|eot_id|>",
4
+ "unk_token": "<unk>",
5
+ "pad_token": "<|eot_id|>"
6
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:308266486756f903e9d16d33510aa2eb2ea98fa216ec701634cd937beba9d0fe
3
+ size 17209648
tokenizer_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "tokenizer_class": "LlamaTokenizerFast",
3
+ "bos_token": "<|begin_of_text|>",
4
+ "eos_token": "<|eot_id|>",
5
+ "unk_token": "<unk>",
6
+ "pad_token": "<|eot_id|>",
7
+ "model_max_length": 1024
8
+ }