Update config.json
Browse files- config.json +5 -6
config.json
CHANGED
|
@@ -12,7 +12,7 @@
|
|
| 12 |
128008,
|
| 13 |
128009
|
| 14 |
],
|
| 15 |
-
"fused_tensor_size":
|
| 16 |
"generation_tuning": {
|
| 17 |
"max_length": 20,
|
| 18 |
"max_new_tokens": 100,
|
|
@@ -89,11 +89,11 @@
|
|
| 89 |
"hidden_size": 3072,
|
| 90 |
"initializer_range": 0.02,
|
| 91 |
"intermediate_size": 8192,
|
| 92 |
-
"max_position_embeddings":
|
| 93 |
"mlp_bias": false,
|
| 94 |
"model_type": "llama",
|
| 95 |
"num_attention_heads": 24,
|
| 96 |
-
"num_hidden_layers":
|
| 97 |
"num_key_value_heads": 8,
|
| 98 |
"pad_token_id": 128004,
|
| 99 |
"pretraining_tp": 1,
|
|
@@ -113,8 +113,7 @@
|
|
| 113 |
"use_cache": true,
|
| 114 |
"vocab_size": 128256,
|
| 115 |
"fusion": {
|
| 116 |
-
"
|
| 117 |
-
"layers_after": 29
|
| 118 |
},
|
| 119 |
"tensor_fusion": true,
|
| 120 |
"decode_functions": [
|
|
@@ -127,5 +126,5 @@
|
|
| 127 |
"decode_architecture",
|
| 128 |
"decode_fused_tensor"
|
| 129 |
],
|
| 130 |
-
"chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n---\n<|start_header_id|>system<|end_header_id|>\nCutting Knowledge Date: December 2025\nToday Date: {{ date_string }}\n---\n{{ system_message }}\n<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n# Mecanismos incluidos:\n- Fusi\u00f3n de todas las
|
| 131 |
}
|
|
|
|
| 12 |
128008,
|
| 13 |
128009
|
| 14 |
],
|
| 15 |
+
"fused_tensor_size": 494674944,
|
| 16 |
"generation_tuning": {
|
| 17 |
"max_length": 20,
|
| 18 |
"max_new_tokens": 100,
|
|
|
|
| 89 |
"hidden_size": 3072,
|
| 90 |
"initializer_range": 0.02,
|
| 91 |
"intermediate_size": 8192,
|
| 92 |
+
"max_position_embeddings": 8000,
|
| 93 |
"mlp_bias": false,
|
| 94 |
"model_type": "llama",
|
| 95 |
"num_attention_heads": 24,
|
| 96 |
+
"num_hidden_layers": 28,
|
| 97 |
"num_key_value_heads": 8,
|
| 98 |
"pad_token_id": 128004,
|
| 99 |
"pretraining_tp": 1,
|
|
|
|
| 113 |
"use_cache": true,
|
| 114 |
"vocab_size": 128256,
|
| 115 |
"fusion": {
|
| 116 |
+
"layers_merged": 28
|
|
|
|
| 117 |
},
|
| 118 |
"tensor_fusion": true,
|
| 119 |
"decode_functions": [
|
|
|
|
| 126 |
"decode_architecture",
|
| 127 |
"decode_fused_tensor"
|
| 128 |
],
|
| 129 |
+
"chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n---\n<|start_header_id|>system<|end_header_id|>\nCutting Knowledge Date: December 2025\nToday Date: {{ date_string }}\n---\n{{ system_message }}\n<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n# Mecanismos incluidos:\n- Fusi\u00f3n de todas las layers en 1\n- Fusi\u00f3n de todos los tensores en uno\n- Eliminaci\u00f3n de bias y censura\n- Configuraci\u00f3n de generaci\u00f3n: do_sample=True, temp=0.7, top_p=0.9, penalidad=1.2\n- Funciones de decodificaci\u00f3n completas\n---\n<|eot_id|>"
|
| 130 |
}
|