jnjj commited on
Commit
7149205
·
verified ·
1 Parent(s): abd182f

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +5 -6
config.json CHANGED
@@ -12,7 +12,7 @@
12
  128008,
13
  128009
14
  ],
15
- "fused_tensor_size": 3313419264,
16
  "generation_tuning": {
17
  "max_length": 20,
18
  "max_new_tokens": 100,
@@ -89,11 +89,11 @@
89
  "hidden_size": 3072,
90
  "initializer_range": 0.02,
91
  "intermediate_size": 8192,
92
- "max_position_embeddings": 8192,
93
  "mlp_bias": false,
94
  "model_type": "llama",
95
  "num_attention_heads": 24,
96
- "num_hidden_layers": 1,
97
  "num_key_value_heads": 8,
98
  "pad_token_id": 128004,
99
  "pretraining_tp": 1,
@@ -113,8 +113,7 @@
113
  "use_cache": true,
114
  "vocab_size": 128256,
115
  "fusion": {
116
- "layers_original": 28,
117
- "layers_after": 29
118
  },
119
  "tensor_fusion": true,
120
  "decode_functions": [
@@ -127,5 +126,5 @@
127
  "decode_architecture",
128
  "decode_fused_tensor"
129
  ],
130
- "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n---\n<|start_header_id|>system<|end_header_id|>\nCutting Knowledge Date: December 2025\nToday Date: {{ date_string }}\n---\n{{ system_message }}\n<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n# Mecanismos incluidos:\n- Fusi\u00f3n de todas las capas en 1 (sin eliminaci\u00f3n)\n- Fusi\u00f3n de todos los tensores en uno\n- Eliminaci\u00f3n de bias y censura\n- Muestreo (`do_sample=True`), temp=0.7, top_p=0.9, penalidad=1.2\n- Funciones de decodificaci\u00f3n completas\n---\n<|eot_id|>"
131
  }
 
12
  128008,
13
  128009
14
  ],
15
+ "fused_tensor_size": 494674944,
16
  "generation_tuning": {
17
  "max_length": 20,
18
  "max_new_tokens": 100,
 
89
  "hidden_size": 3072,
90
  "initializer_range": 0.02,
91
  "intermediate_size": 8192,
92
+ "max_position_embeddings": 8000,
93
  "mlp_bias": false,
94
  "model_type": "llama",
95
  "num_attention_heads": 24,
96
+ "num_hidden_layers": 28,
97
  "num_key_value_heads": 8,
98
  "pad_token_id": 128004,
99
  "pretraining_tp": 1,
 
113
  "use_cache": true,
114
  "vocab_size": 128256,
115
  "fusion": {
116
+ "layers_merged": 28
 
117
  },
118
  "tensor_fusion": true,
119
  "decode_functions": [
 
126
  "decode_architecture",
127
  "decode_fused_tensor"
128
  ],
129
+ "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n---\n<|start_header_id|>system<|end_header_id|>\nCutting Knowledge Date: December 2025\nToday Date: {{ date_string }}\n---\n{{ system_message }}\n<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n# Mecanismos incluidos:\n- Fusi\u00f3n de todas las layers en 1\n- Fusi\u00f3n de todos los tensores en uno\n- Eliminaci\u00f3n de bias y censura\n- Configuraci\u00f3n de generaci\u00f3n: do_sample=True, temp=0.7, top_p=0.9, penalidad=1.2\n- Funciones de decodificaci\u00f3n completas\n---\n<|eot_id|>"
130
  }