Naphula commited on
Commit
22115d9
·
verified ·
1 Parent(s): 34b000c

Upload 3 files

Browse files
gguf_to_safetensors_v1.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import argparse
3
+ import torch
4
+ import numpy as np
5
+ from safetensors.torch import save_file
6
+ from safetensors import safe_open
7
+ from typing import Dict, Tuple
8
+ from gguf import GGUFReader, dequantize
9
+ from gguf.constants import GGML_QUANT_SIZES, GGMLQuantizationType, Keys
10
+
11
+ def load_gguf_and_extract_metadata(gguf_path: str) -> Tuple[GGUFReader, list]:
12
+ """Load GGUF file and extract metadata and tensors."""
13
+ reader = GGUFReader(gguf_path)
14
+ tensors_metadata = []
15
+ for tensor in reader.tensors:
16
+ tensor_metadata = {
17
+ 'name': tensor.name,
18
+ 'shape': tuple(tensor.shape.tolist()),
19
+ 'n_elements': tensor.n_elements,
20
+ 'n_bytes': tensor.n_bytes,
21
+ 'data_offset': tensor.data_offset,
22
+ 'type': tensor.tensor_type,
23
+ }
24
+ tensors_metadata.append(tensor_metadata)
25
+ return reader, tensors_metadata
26
+
27
+
28
+ def convert_gguf_to_safetensors(gguf_path: str, output_path: str, use_bf16: bool) -> None:
29
+ reader, tensors_metadata = load_gguf_and_extract_metadata(gguf_path)
30
+ print(f"Extracted {len(tensors_metadata)} tensors from GGUF file")
31
+
32
+ tensors_dict: dict[str, torch.Tensor] = {}
33
+
34
+ for i, tensor_info in enumerate(tensors_metadata):
35
+ tensor_name = tensor_info['name']
36
+
37
+ tensor_data = reader.get_tensor(i)
38
+ weights = dequantize(tensor_data.data, tensor_data.tensor_type).copy()
39
+
40
+ try:
41
+ # デバイスを確認し、適切なデータ型を設定
42
+ if use_bf16:
43
+ print(f"Attempting BF16 conversion")
44
+ weights_tensor = torch.from_numpy(weights).to(dtype=torch.float32)
45
+ weights_tensor = weights_tensor.to(torch.bfloat16)
46
+ else:
47
+ print("Using FP16 conversion.")
48
+ weights_tensor = torch.from_numpy(weights).to(dtype=torch.float16)
49
+
50
+ weights_hf = weights_tensor
51
+ except Exception as e:
52
+ print(f"Error during BF16 conversion for tensor '{tensor_name}': {e}")
53
+ weights_tensor = torch.from_numpy(weights.astype(np.float32)).to(torch.float16)
54
+ weights_hf = weights_tensor
55
+
56
+ print(f"dequantize tensor: {tensor_name} | Shape: {weights_hf.shape} | Type: {weights_tensor.dtype}")
57
+ del weights_tensor
58
+ del weights
59
+
60
+ tensors_dict[tensor_name] = weights_hf
61
+ del weights_hf
62
+
63
+ metadata = {"modelspec.architecture": f"{reader.get_field(Keys.General.FILE_TYPE)}", "description": "Model converted from gguf."}
64
+
65
+ save_file(tensors_dict, output_path, metadata=metadata)
66
+ print("Conversion complete!")
67
+
68
+ def main():
69
+ parser = argparse.ArgumentParser(description="Convert GGUF files to safetensors format.")
70
+ parser.add_argument("--input", required=True, help="Path to the input GGUF file.")
71
+ parser.add_argument("--output", required=True, help="Path to the output safetensors file.")
72
+ parser.add_argument("--bf16", action="store_true", help="(onry cuda)Convert tensors to BF16 format instead of FP16.")
73
+
74
+ args = parser.parse_args()
75
+
76
+ convert_gguf_to_safetensors(args.input, args.output, args.bf16)
77
+
78
+ if __name__ == "__main__":
79
+ main()
gguf_to_safetensors_v2.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import argparse
3
+ import torch
4
+ import numpy as np
5
+ from safetensors.torch import save_file
6
+ from safetensors import safe_open
7
+ from typing import Dict, Tuple, List
8
+ from gguf import GGUFReader, dequantize
9
+
10
+ def load_gguf_and_extract_metadata(gguf_path: str) -> Tuple[GGUFReader, List[Dict]]:
11
+ """Load GGUF file and extract metadata for all tensors."""
12
+ print(f"Loading GGUF file: {gguf_path}")
13
+ reader = GGUFReader(gguf_path, 'r')
14
+ tensors_metadata = []
15
+ for tensor in reader.tensors:
16
+ tensor_metadata = {
17
+ 'name': tensor.name,
18
+ 'shape': tuple(tensor.shape.tolist()),
19
+ 'n_elements': tensor.n_elements,
20
+ 'n_bytes': tensor.n_bytes,
21
+ 'type': tensor.tensor_type,
22
+ }
23
+ tensors_metadata.append(tensor_metadata)
24
+ return reader, tensors_metadata
25
+
26
+ def get_dequantized_tensor_size_in_bytes(tensor_info: Dict, use_bf16: bool) -> int:
27
+ """Calculates the size of a tensor after it has been dequantized to FP16 or BF16."""
28
+ bytes_per_element = 2
29
+ return tensor_info['n_elements'] * bytes_per_element
30
+
31
+ def get_hf_name(gguf_name: str) -> str:
32
+ """Translates a GGUF tensor name to its Hugging Face equivalent for Llama/Mistral models."""
33
+ name_map = {
34
+ "token_embd.weight": "model.embed_tokens.weight",
35
+ "output_norm.weight": "model.norm.weight",
36
+ "output.weight": "lm_head.weight",
37
+ }
38
+ if gguf_name in name_map:
39
+ return name_map[gguf_name]
40
+
41
+ if gguf_name.startswith("blk."):
42
+ parts = gguf_name.split('.')
43
+ layer_num = parts[1]
44
+ layer_part = ".".join(parts[2:])
45
+
46
+ block_map = {
47
+ "attn_norm.weight": "input_layernorm.weight",
48
+ "ffn_norm.weight": "post_attention_layernorm.weight",
49
+ "attn_q.weight": "self_attn.q_proj.weight",
50
+ "attn_k.weight": "self_attn.k_proj.weight",
51
+ "attn_v.weight": "self_attn.v_proj.weight",
52
+ "attn_output.weight": "self_attn.o_proj.weight",
53
+ "ffn_gate.weight": "mlp.gate_proj.weight",
54
+ "ffn_up.weight": "mlp.up_proj.weight",
55
+ "ffn_down.weight": "mlp.down_proj.weight",
56
+ }
57
+
58
+ if layer_part in block_map:
59
+ return f"model.layers.{layer_num}.{block_map[layer_part]}"
60
+
61
+ print(f"Warning: No mapping found for tensor '{gguf_name}'. Using original name.")
62
+ return gguf_name
63
+
64
+ def convert_gguf_to_safetensors_by_size(gguf_path: str, output_path: str, use_bf16: bool, shard_size_gb: float):
65
+ """Converts a GGUF file to .safetensors, sharding and renaming tensors for HF compatibility."""
66
+ reader, tensors_metadata = load_gguf_and_extract_metadata(gguf_path)
67
+ print(f"Extracted metadata for {len(tensors_metadata)} tensors from GGUF file.")
68
+
69
+ shard_size_bytes = int(shard_size_gb * 1024**3)
70
+ print(f"Target shard size set to ~{shard_size_gb} GB ({shard_size_bytes} bytes).")
71
+
72
+ output_dir = os.path.dirname(output_path)
73
+ if not output_dir:
74
+ output_dir = "."
75
+ base_name = os.path.basename(output_path).replace('.safetensors', '')
76
+
77
+ tensors_in_current_chunk: dict[str, torch.Tensor] = {}
78
+ current_chunk_size_bytes = 0
79
+ num_chunks = 0
80
+
81
+ total_shards = 0
82
+ temp_size = 0
83
+ for tensor_info in tensors_metadata:
84
+ dequantized_size = get_dequantized_tensor_size_in_bytes(tensor_info, use_bf16)
85
+ if temp_size > 0 and (temp_size + dequantized_size) > shard_size_bytes:
86
+ total_shards += 1
87
+ temp_size = 0
88
+ temp_size += dequantized_size
89
+ if temp_size > 0:
90
+ total_shards += 1
91
+ print(f"Model will be split into {total_shards} shards.")
92
+
93
+ for i, tensor_info in enumerate(tensors_metadata):
94
+ gguf_tensor_name = tensor_info['name']
95
+ dequantized_size = get_dequantized_tensor_size_in_bytes(tensor_info, use_bf16)
96
+
97
+ if current_chunk_size_bytes > 0 and (current_chunk_size_bytes + dequantized_size) > shard_size_bytes:
98
+ num_chunks += 1
99
+ chunk_path = os.path.join(output_dir, f"{base_name}-{num_chunks:05d}-of-{total_shards:05d}.safetensors")
100
+
101
+ print(f"\nCurrent chunk size ({current_chunk_size_bytes / 1024**3:.2f} GB) exceeds limit.")
102
+ print(f"Saving chunk {num_chunks} with {len(tensors_in_current_chunk)} tensors to {chunk_path}...\n")
103
+ save_file(tensors_in_current_chunk, chunk_path)
104
+
105
+ tensors_in_current_chunk.clear()
106
+ current_chunk_size_bytes = 0
107
+
108
+ tensor_data = reader.get_tensor(i)
109
+ weights_np = dequantize(tensor_data.data, tensor_data.tensor_type).copy()
110
+ target_dtype = torch.bfloat16 if use_bf16 else torch.float16
111
+
112
+ try:
113
+ weights_tensor = torch.from_numpy(weights_np).to(target_dtype)
114
+ except Exception as e:
115
+ print(f"Warning: Could not convert {gguf_tensor_name} directly. Error: {e}. Using float32 fallback.")
116
+ weights_tensor = torch.from_numpy(weights_np.astype(np.float32)).to(target_dtype)
117
+
118
+ # --- CORRECTED RENAMING LOGIC ---
119
+ hf_tensor_name = get_hf_name(gguf_tensor_name)
120
+
121
+ print(f"Processed tensor ({i+1}/{len(tensors_metadata)}): {gguf_tensor_name} -> {hf_tensor_name} | Size: {dequantized_size/1024**2:.2f} MB")
122
+
123
+ tensors_in_current_chunk[hf_tensor_name] = weights_tensor
124
+ current_chunk_size_bytes += dequantized_size
125
+
126
+ del weights_np
127
+ del tensor_data
128
+
129
+ if tensors_in_current_chunk:
130
+ num_chunks += 1
131
+ chunk_path = os.path.join(output_dir, f"{base_name}-{num_chunks:05d}-of-{total_shards:05d}.safetensors")
132
+ print(f"\nSaving final chunk {num_chunks} with {len(tensors_in_current_chunk)} tensors to {chunk_path}...\n")
133
+ save_file(tensors_in_current_chunk, chunk_path)
134
+
135
+ print("All tensors have been dequantized, renamed, and saved into sharded safetensor files.")
136
+
137
+ def main():
138
+ parser = argparse.ArgumentParser(
139
+ description="Convert GGUF to HF-compatible sharded safetensors, renaming tensors correctly."
140
+ )
141
+ parser.add_argument("--input", required=True, help="Path to the input GGUF file.")
142
+ parser.add_argument("--output", required=True, help="Base path for the final output sharded .safetensors files.")
143
+ parser.add_argument("--bf16", action="store_true", help="Convert tensors to BF16 format instead of the default FP16.")
144
+ parser.add_argument(
145
+ "--shard-size",
146
+ type=float,
147
+ default=5.0,
148
+ help="Maximum size of each shard in Gigabytes (GB). Default: 5.0"
149
+ )
150
+ args = parser.parse_args()
151
+
152
+ convert_gguf_to_safetensors_by_size(args.input, args.output, args.bf16, args.shard_size)
153
+
154
+ if __name__ == "__main__":
155
+ main()
safetensors_meta_ripper_v1.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import argparse
3
+ import json
4
+ import glob
5
+ from safetensors import safe_open
6
+ from gguf import GGUFReader
7
+ from gguf.constants import Keys
8
+ from typing import List, Dict, Any
9
+
10
+ def create_safetensors_index(shards_dir: str, output_dir: str) -> None:
11
+ """Creates the model.safetensors.index.json file by scanning shard files."""
12
+ shard_pattern = os.path.join(shards_dir, '*.safetensors')
13
+ shard_files = sorted(glob.glob(shard_pattern))
14
+
15
+ if not shard_files:
16
+ print(f"Error: No .safetensors files found in directory: {shards_dir}")
17
+ return
18
+
19
+ print(f"Found {len(shard_files)} shard files to index.")
20
+
21
+ index_data: Dict[str, Any] = {"metadata": {}, "weight_map": {}}
22
+ total_size = 0
23
+
24
+ for shard_file in shard_files:
25
+ shard_basename = os.path.basename(shard_file)
26
+ try:
27
+ with safe_open(shard_file, framework="pt", device="cpu") as f:
28
+ for tensor_name in f.keys():
29
+ index_data["weight_map"][tensor_name] = shard_basename
30
+
31
+ shard_size = os.path.getsize(shard_file)
32
+ total_size += shard_size
33
+ except Exception as e:
34
+ print(f"Warning: Could not process shard {shard_basename}. Error: {e}")
35
+ continue
36
+
37
+ index_data["metadata"]["total_size"] = total_size
38
+
39
+ index_filepath = os.path.join(output_dir, "model.safetensors.index.json")
40
+ try:
41
+ with open(index_filepath, 'w', encoding='utf-8') as f:
42
+ json.dump(index_data, f, indent=2)
43
+ print(f"Successfully created safetensors index file: {index_filepath}")
44
+ except Exception as e:
45
+ print(f"Error: Failed to write index file. Error: {e}")
46
+
47
+ def extract_and_save_gguf_configs(reader: GGUFReader, output_dir: str) -> None:
48
+ """Extracts metadata from GGUF and saves config, tokenizer, and generation files."""
49
+
50
+ config = {}
51
+ # --- config.json ---
52
+ try:
53
+ arch = reader.get_field(Keys.General.ARCHITECTURE).name.lower()
54
+ model_type_map = {"llama": "llama", "mistral": "mistral", "gemma": "gemma"}
55
+ model_type = model_type_map.get(arch, arch)
56
+
57
+ config = {
58
+ "architectures": [arch.capitalize()],
59
+ "model_type": model_type,
60
+ "hidden_size": reader.get_int_value(f"{model_type}.embedding_length"),
61
+ "intermediate_size": reader.get_int_value(f"{model_type}.feed_forward_length"),
62
+ "num_attention_heads": reader.get_int_value(f"{model_type}.attention.head_count"),
63
+ "num_hidden_layers": reader.get_int_value(f"{model_type}.block_count"),
64
+ "num_key_value_heads": reader.get_int_value(f"{model_type}.attention.head_count_kv"),
65
+ "rms_norm_eps": reader.get_float_value(f"{model_type}.attention.layer_norm_rms_epsilon"),
66
+ "vocab_size": len(reader.get_field(Keys.Tokenizer.VOCAB)),
67
+ "rope_theta": reader.get_float_value(f"{model_type}.rope.freq_base"),
68
+ "max_position_embeddings": reader.get_int_value(f"{model_type}.context_length"),
69
+ }
70
+ with open(os.path.join(output_dir, "config.json"), 'w', encoding='utf-8') as f:
71
+ json.dump(config, f, indent=2)
72
+ print("Created config.json")
73
+ except Exception as e:
74
+ print(f"Warning: Could not create config.json. Some values may be missing. Error: {e}")
75
+
76
+ # --- tokenizer_config.json ---
77
+ try:
78
+ tokenizer_config = {
79
+ "model_max_length": config.get("max_position_embeddings", 4096),
80
+ "padding_side": "left",
81
+ "tokenizer_class": "LlamaTokenizer",
82
+ }
83
+ # Add chat template if it exists
84
+ try:
85
+ chat_template = reader.get_str_value("tokenizer.chat_template")
86
+ tokenizer_config["chat_template"] = chat_template
87
+ except (KeyError, ValueError):
88
+ pass # Field does not exist
89
+
90
+ with open(os.path.join(output_dir, "tokenizer_config.json"), 'w', encoding='utf-8') as f:
91
+ json.dump(tokenizer_config, f, indent=2)
92
+ print("Created tokenizer_config.json")
93
+ except Exception as e:
94
+ print(f"Warning: Could not create tokenizer_config.json. Error: {e}")
95
+
96
+ # --- tokenizer.json ---
97
+ try:
98
+ vocab = [item.piece for item in reader.get_field(Keys.Tokenizer.VOCAB)]
99
+ merges = reader.get_field(Keys.Tokenizer.MERGES)
100
+
101
+ tokenizer_data = {
102
+ "version": "1.0",
103
+ "model": {
104
+ "type": "BPE",
105
+ "vocab": {token: i for i, token in enumerate(vocab)},
106
+ "merges": merges,
107
+ },
108
+ "added_tokens": [],
109
+ }
110
+ with open(os.path.join(output_dir, "tokenizer.json"), 'w', encoding='utf-8') as f:
111
+ json.dump(tokenizer_data, f, indent=None, separators=(',', ':'))
112
+ print("Created tokenizer.json")
113
+ except Exception as e:
114
+ print(f"Warning: Could not create tokenizer.json. Error: {e}")
115
+
116
+ # --- special_tokens_map.json ---
117
+ try:
118
+ special_map = {}
119
+ # Use a helper to avoid crashing on missing keys
120
+ def add_special_token(key_name, gguf_id_key):
121
+ try:
122
+ token_id = reader.get_int_value(gguf_id_key)
123
+ token_str = vocab[token_id]
124
+ special_map[key_name] = token_str
125
+ except (KeyError, ValueError, IndexError):
126
+ pass
127
+
128
+ add_special_token("bos_token", "tokenizer.ggml.bos_token_id")
129
+ add_special_token("eos_token", "tokenizer.ggml.eos_token_id")
130
+ add_special_token("unk_token", "tokenizer.ggml.unknown_token_id")
131
+
132
+ with open(os.path.join(output_dir, "special_tokens_map.json"), 'w', encoding='utf-8') as f:
133
+ json.dump(special_map, f, indent=2)
134
+ print("Created special_tokens_map.json")
135
+ except Exception as e:
136
+ print(f"Warning: Could not create special_tokens_map.json. Error: {e}")
137
+
138
+ # --- generation_config.json ---
139
+ try:
140
+ gen_config = {"_from_model_config": True}
141
+ try:
142
+ gen_config["bos_token_id"] = reader.get_int_value("tokenizer.ggml.bos_token_id")
143
+ gen_config["eos_token_id"] = reader.get_int_value("tokenizer.ggml.eos_token_id")
144
+ except (KeyError, ValueError):
145
+ pass
146
+
147
+ with open(os.path.join(output_dir, "generation_config.json"), 'w', encoding='utf-8') as f:
148
+ json.dump(gen_config, f, indent=2)
149
+ print("Created generation_config.json")
150
+ except Exception as e:
151
+ print(f"Warning: Could not create generation_config.json. Error: {e}")
152
+
153
+ def main():
154
+ parser = argparse.ArgumentParser(
155
+ description="Generate safetensors index and config files for a sharded model directory."
156
+ )
157
+ parser.add_argument(
158
+ "--gguf-file",
159
+ required=True,
160
+ help="Path to the original GGUF file to read metadata from."
161
+ )
162
+ parser.add_argument(
163
+ "--shards-dir",
164
+ required=True,
165
+ help="Path to the directory containing the sharded .safetensors files."
166
+ )
167
+ args = parser.parse_args()
168
+
169
+ if not os.path.isfile(args.gguf_file):
170
+ print(f"Error: GGUF file not found at {args.gguf_file}")
171
+ return
172
+ if not os.path.isdir(args.shards_dir):
173
+ print(f"Error: Shards directory not found at {args.shards_dir}")
174
+ return
175
+
176
+ print(f"Loading GGUF metadata from: {args.gguf_file}")
177
+ reader = GGUFReader(args.gguf_file, 'r')
178
+
179
+ # Generate config files from GGUF header and save them to the shards directory
180
+ extract_and_save_gguf_configs(reader, args.shards_dir)
181
+
182
+ # Generate the safetensors index from the actual shard files
183
+ create_safetensors_index(args.shards_dir, args.shards_dir)
184
+
185
+ print("\nMetadata ripping complete.")
186
+
187
+ if __name__ == "__main__":
188
+ main()