Elvisaro commited on
Commit
8e70ea9
·
verified ·
1 Parent(s): 11f76ff

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +321 -1
README.md CHANGED
@@ -5,4 +5,324 @@ language:
5
  base_model:
6
  - amd/Instella-3B-Instruct
7
  ---
8
- This model doesn't work because I tried to convert from safetensors to gguf because : I tried this: OLMoForCausalLM
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  base_model:
6
  - amd/Instella-3B-Instruct
7
  ---
8
+ This model doesn't work because I tried to convert from safetensors to gguf because : I tried this: OLMoForCausalLM
9
+
10
+ ## The Script Used for BF16 Model
11
+
12
+ %%writefile convert_instella_bf16.py
13
+ import os
14
+ import subprocess
15
+ from pathlib import Path
16
+ import json
17
+ import torch
18
+ import numpy as np
19
+
20
+ def create_instella_conversion_script():
21
+ """Create a conversion script for Instella models using bfloat16 mixed-precision."""
22
+ script_content = """
23
+ import sys
24
+ import json
25
+ import struct
26
+ import numpy as np
27
+ import torch
28
+ from pathlib import Path
29
+ import os
30
+ import re
31
+ from typing import Dict, Any, List
32
+ from safetensors.torch import load_file as load_safetensors
33
+
34
+ GGUF_MAGIC = 0x46554747
35
+ GGUF_VERSION = 3
36
+
37
+ # GGUF metadata types
38
+ GGUF_TYPE_UINT32 = 0
39
+ GGUF_TYPE_INT32 = 1
40
+ GGUF_TYPE_FLOAT32 = 2
41
+ GGUF_TYPE_STRING = 3
42
+ GGUF_TYPE_ARRAY = 4
43
+ GGUF_TYPE_UINT64 = 5
44
+ GGUF_TYPE_INT64 = 6
45
+ GGUF_TYPE_FLOAT64 = 7
46
+ GGUF_TYPE_BOOL = 8
47
+
48
+ def write_gguf_header(f, num_tensors, num_kv):
49
+ f.write(struct.pack("<I", GGUF_MAGIC))
50
+ f.write(struct.pack("<I", GGUF_VERSION))
51
+ f.write(struct.pack("<Q", num_kv))
52
+ f.write(struct.pack("<Q", num_tensors))
53
+
54
+ def write_metadata_kv(f, key: str, val_type: int, val):
55
+ key_bytes = key.encode('utf-8')
56
+ f.write(struct.pack("<Q", len(key_bytes)))
57
+ f.write(key_bytes)
58
+ f.write(struct.pack("<I", val_type))
59
+
60
+ if val_type == GGUF_TYPE_STRING:
61
+ val_bytes = val.encode('utf-8')
62
+ f.write(struct.pack("<Q", len(val_bytes)))
63
+ f.write(val_bytes)
64
+ elif val_type == GGUF_TYPE_INT32:
65
+ f.write(struct.pack("<i", val))
66
+ elif val_type == GGUF_TYPE_UINT32:
67
+ f.write(struct.pack("<I", val))
68
+ elif val_type == GGUF_TYPE_FLOAT32:
69
+ f.write(struct.pack("<f", val))
70
+ elif val_type == GGUF_TYPE_BOOL:
71
+ f.write(struct.pack("<?", val))
72
+ elif val_type == GGUF_TYPE_ARRAY:
73
+ f.write(struct.pack("<Q", len(val)))
74
+ if len(val) > 0:
75
+ if isinstance(val[0], int):
76
+ f.write(struct.pack("<I", GGUF_TYPE_INT32))
77
+ for item in val:
78
+ f.write(struct.pack("<i", item))
79
+ elif isinstance(val[0], str):
80
+ f.write(struct.pack("<I", GGUF_TYPE_STRING))
81
+ for item in val:
82
+ item_bytes = item.encode('utf-8')
83
+ f.write(struct.pack("<Q", len(item_bytes)))
84
+ f.write(item_bytes)
85
+
86
+ def write_tensor_info(f, name: str, tensor: torch.Tensor):
87
+ name_bytes = name.encode('utf-8')
88
+ f.write(struct.pack("<Q", len(name_bytes)))
89
+ f.write(name_bytes)
90
+
91
+ dims = list(tensor.shape)
92
+ f.write(struct.pack("<I", len(dims)))
93
+ for dim in dims:
94
+ f.write(struct.pack("<Q", dim))
95
+
96
+ # Use F16 type identifier (llama.cpp doesn't directly support BF16)
97
+ dtype_str = "F16"
98
+ dtype_bytes = dtype_str.encode('utf-8')
99
+ f.write(struct.pack("<I", len(dtype_bytes)))
100
+ f.write(dtype_bytes)
101
+
102
+ def write_tensor_data(f, tensor: torch.Tensor):
103
+ # Convert bfloat16 to float32 then to float16 for compatibility
104
+ tensor_f32 = tensor.float()
105
+ tensor_f16 = tensor_f32.half() # Convert to float16
106
+
107
+ # Now we can safely convert to numpy and write
108
+ f.write(tensor_f16.numpy().tobytes())
109
+
110
+ def map_tensor_name(name: str) -> str:
111
+ name_map = {
112
+ "model.embed_tokens.weight": "token_embd.weight",
113
+ "model.norm.weight": "output_norm.weight",
114
+ "lm_head.weight": "output.weight",
115
+ }
116
+
117
+ if name in name_map:
118
+ return name_map[name]
119
+
120
+ if "model.layers." in name:
121
+ layer_match = re.search(r"model\.layers\.(\d+)\.", name)
122
+ if layer_match:
123
+ layer_num = layer_match.group(1)
124
+
125
+ # Attention mappings
126
+ if "self_attn.q_proj.weight" in name:
127
+ return f"blk.{layer_num}.attn_q.weight"
128
+ elif "self_attn.k_proj.weight" in name:
129
+ return f"blk.{layer_num}.attn_k.weight"
130
+ elif "self_attn.v_proj.weight" in name:
131
+ return f"blk.{layer_num}.attn_v.weight"
132
+ elif "self_attn.o_proj.weight" in name:
133
+ return f"blk.{layer_num}.attn_output.weight"
134
+
135
+ # FFN mappings
136
+ elif "mlp.gate_proj.weight" in name:
137
+ return f"blk.{layer_num}.ffn_gate.weight"
138
+ elif "mlp.up_proj.weight" in name:
139
+ return f"blk.{layer_num}.ffn_up.weight"
140
+ elif "mlp.down_proj.weight" in name:
141
+ return f"blk.{layer_num}.ffn_down.weight"
142
+
143
+ # Norm mappings - handle different naming conventions
144
+ elif "input_layernorm.weight" in name:
145
+ return f"blk.{layer_num}.attn_norm.weight"
146
+ elif "post_attention_layernorm.weight" in name:
147
+ return f"blk.{layer_num}.ffn_norm.weight"
148
+ elif "self_attn.q_norm.weight" in name:
149
+ return f"blk.{layer_num}.attn_q_norm.weight"
150
+ elif "self_attn.k_norm.weight" in name:
151
+ return f"blk.{layer_num}.attn_k_norm.weight"
152
+
153
+ # If no mapping found, use a default mapping pattern
154
+ if "model.layers." in name:
155
+ layer_match = re.search(r"model\.layers\.(\d+)\.(.+)", name)
156
+ if layer_match:
157
+ layer_num = layer_match.group(1)
158
+ remainder = layer_match.group(2)
159
+ return f"blk.{layer_num}.{remainder}"
160
+
161
+ return name
162
+
163
+ def get_model_metadata(config_path=None) -> Dict[str, Any]:
164
+ # Default metadata for Instella based on Instella2Config defaults
165
+ metadata = {
166
+ "general.architecture": "llama",
167
+ "general.name": "instella",
168
+ "llama.context_length": 2048, # from max_position_embeddings default
169
+ "llama.embedding_length": 4096, # from hidden_size default
170
+ "llama.block_count": 32, # from num_hidden_layers default
171
+ "llama.feed_forward_length": 11008, # from intermediate_size default
172
+ "llama.attention.head_count": 32, # from num_attention_heads default
173
+ "llama.attention.head_count_kv": 32, # from num_key_value_heads default
174
+ "llama.attention.layer_norm_rms_epsilon": 1e-5, # from rms_norm_eps default
175
+ "llama.rope.dimension_count": 128, # hidden_size / num_attention_heads
176
+ "llama.vocab_size": 50304, # from vocab_size default
177
+ "tokenizer.ggml.model": "llama",
178
+ "tokenizer.ggml.tokens": 50304,
179
+ "llama.rope.theta": 10000.0, # from rope_theta default
180
+ }
181
+
182
+ # Try to load from config file if provided
183
+ if config_path and os.path.exists(config_path):
184
+ try:
185
+ with open(config_path, 'r') as f:
186
+ config = json.load(f)
187
+
188
+ # Update metadata with values from config
189
+ if "hidden_size" in config:
190
+ metadata["llama.embedding_length"] = config["hidden_size"]
191
+ # Update rope dimensions based on hidden size and attention heads
192
+ if "num_attention_heads" in config:
193
+ metadata["llama.rope.dimension_count"] = config["hidden_size"] // config["num_attention_heads"]
194
+ else:
195
+ metadata["llama.rope.dimension_count"] = config["hidden_size"] // metadata["llama.attention.head_count"]
196
+
197
+ if "num_hidden_layers" in config:
198
+ metadata["llama.block_count"] = config["num_hidden_layers"]
199
+ if "num_attention_heads" in config:
200
+ metadata["llama.attention.head_count"] = config["num_attention_heads"]
201
+ if "num_key_value_heads" in config and config["num_key_value_heads"] is not None:
202
+ metadata["llama.attention.head_count_kv"] = config["num_key_value_heads"]
203
+ else:
204
+ metadata["llama.attention.head_count_kv"] = config["num_attention_heads"]
205
+ if "intermediate_size" in config:
206
+ metadata["llama.feed_forward_length"] = config["intermediate_size"]
207
+ if "vocab_size" in config:
208
+ metadata["llama.vocab_size"] = config["vocab_size"]
209
+ metadata["tokenizer.ggml.tokens"] = config["vocab_size"]
210
+ if "max_position_embeddings" in config:
211
+ metadata["llama.context_length"] = config["max_position_embeddings"]
212
+ if "rope_theta" in config:
213
+ metadata["llama.rope.theta"] = config["rope_theta"]
214
+ if "rms_norm_eps" in config:
215
+ metadata["llama.attention.layer_norm_rms_epsilon"] = config["rms_norm_eps"]
216
+ except Exception as e:
217
+ print(f"Warning: Failed to load config file: {e}")
218
+
219
+ return metadata
220
+
221
+ def convert_model(model_dir: str, output_path: str):
222
+ model_dir = Path(model_dir)
223
+
224
+ # Find config file
225
+ config_path = model_dir / "config.json"
226
+
227
+ # Find model file
228
+ model_path = model_dir / "model.safetensors"
229
+ if not model_path.exists():
230
+ safetensors_files = list(model_dir.glob("*.safetensors"))
231
+ if not safetensors_files:
232
+ raise FileNotFoundError(f"No safetensors files found in {model_dir}")
233
+ model_path = safetensors_files[0]
234
+
235
+ print(f"Loading model from {model_path}")
236
+ tensors = load_safetensors(model_path)
237
+
238
+ # Get metadata
239
+ metadata = get_model_metadata(config_path if config_path.exists() else None)
240
+
241
+ # Prepare metadata key-value pairs
242
+ metadata_kvs = [
243
+ (key, GGUF_TYPE_STRING if isinstance(value, str) else
244
+ GGUF_TYPE_BOOL if isinstance(value, bool) else
245
+ GGUF_TYPE_FLOAT32 if isinstance(value, float) else
246
+ GGUF_TYPE_INT32 if isinstance(value, int) else
247
+ GGUF_TYPE_ARRAY if isinstance(value, list) else None,
248
+ value)
249
+ for key, value in metadata.items()
250
+ ]
251
+
252
+ print(f"Writing GGUF file to {output_path}")
253
+ with open(output_path, 'wb') as f:
254
+ # Write header
255
+ write_gguf_header(f, len(tensors), len(metadata_kvs))
256
+
257
+ # Write metadata
258
+ for key, val_type, val in metadata_kvs:
259
+ write_metadata_kv(f, key, val_type, val)
260
+
261
+ # Write tensor information
262
+ for i, (name, tensor) in enumerate(tensors.items()):
263
+ print(f"Processing tensor {i+1}/{len(tensors)}: {name} {tensor.shape}")
264
+ gguf_name = map_tensor_name(name)
265
+ write_tensor_info(f, gguf_name, tensor)
266
+
267
+ # Write tensor data
268
+ print("Writing tensor data in F16 format...")
269
+ for name, tensor in tensors.items():
270
+ gguf_name = map_tensor_name(name)
271
+ write_tensor_data(f, tensor)
272
+
273
+ print(f"Model converted and saved to {output_path}")
274
+ print(f"File size: {os.path.getsize(output_path) / (1024*1024):.2f} MB")
275
+
276
+ if __name__ == "__main__":
277
+ import argparse
278
+
279
+ parser = argparse.ArgumentParser(description="Convert Instella model to GGUF format with F16 precision")
280
+ parser.add_argument("model_dir", help="Directory containing the model files")
281
+ parser.add_argument("output_path", help="Path to save the GGUF model")
282
+
283
+ args = parser.parse_args()
284
+
285
+ convert_model(args.model_dir, args.output_path)
286
+ """
287
+
288
+ with open("convert_instella_f16.py", "w") as f:
289
+ f.write(script_content)
290
+ return "convert_instella_f16.py"
291
+
292
+ def convert_instella_model():
293
+ """Convert the Instella model to GGUF format using F16 precision."""
294
+ # Install required dependencies
295
+ subprocess.run(["pip", "install", "safetensors", "torch", "numpy"], check=True)
296
+
297
+ # Create conversion script
298
+ script_path = create_instella_conversion_script()
299
+
300
+ # Set paths
301
+ model_dir = "huggintuned"
302
+ output_path = os.path.join(model_dir, "model.gguf")
303
+
304
+ # Run conversion
305
+ try:
306
+ print("Starting Instella model conversion with F16 precision...")
307
+ subprocess.run([
308
+ "python", script_path,
309
+ model_dir,
310
+ output_path
311
+ ], check=True)
312
+
313
+ # Verify the output file
314
+ if os.path.exists(output_path):
315
+ size_mb = os.path.getsize(output_path) / (1024 * 1024)
316
+ print(f"Conversion successful! Output file size: {size_mb:.2f} MB")
317
+ else:
318
+ raise FileNotFoundError("Output file was not created")
319
+
320
+ except subprocess.CalledProcessError as e:
321
+ print(f"Error during conversion: {e}")
322
+ raise
323
+ except Exception as e:
324
+ print(f"Unexpected error: {e}")
325
+ raise
326
+
327
+ if __name__ == "__main__":
328
+ convert_instella_model()