Spaces:
Running
Running
Upload 3 files
Browse files- gguf_to_safetensors_v1.py +79 -0
- gguf_to_safetensors_v2.py +155 -0
- safetensors_meta_ripper_v1.py +188 -0
gguf_to_safetensors_v1.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import argparse
|
| 3 |
+
import torch
|
| 4 |
+
import numpy as np
|
| 5 |
+
from safetensors.torch import save_file
|
| 6 |
+
from safetensors import safe_open
|
| 7 |
+
from typing import Dict, Tuple
|
| 8 |
+
from gguf import GGUFReader, dequantize
|
| 9 |
+
from gguf.constants import GGML_QUANT_SIZES, GGMLQuantizationType, Keys
|
| 10 |
+
|
| 11 |
+
def load_gguf_and_extract_metadata(gguf_path: str) -> Tuple[GGUFReader, list]:
|
| 12 |
+
"""Load GGUF file and extract metadata and tensors."""
|
| 13 |
+
reader = GGUFReader(gguf_path)
|
| 14 |
+
tensors_metadata = []
|
| 15 |
+
for tensor in reader.tensors:
|
| 16 |
+
tensor_metadata = {
|
| 17 |
+
'name': tensor.name,
|
| 18 |
+
'shape': tuple(tensor.shape.tolist()),
|
| 19 |
+
'n_elements': tensor.n_elements,
|
| 20 |
+
'n_bytes': tensor.n_bytes,
|
| 21 |
+
'data_offset': tensor.data_offset,
|
| 22 |
+
'type': tensor.tensor_type,
|
| 23 |
+
}
|
| 24 |
+
tensors_metadata.append(tensor_metadata)
|
| 25 |
+
return reader, tensors_metadata
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def convert_gguf_to_safetensors(gguf_path: str, output_path: str, use_bf16: bool) -> None:
|
| 29 |
+
reader, tensors_metadata = load_gguf_and_extract_metadata(gguf_path)
|
| 30 |
+
print(f"Extracted {len(tensors_metadata)} tensors from GGUF file")
|
| 31 |
+
|
| 32 |
+
tensors_dict: dict[str, torch.Tensor] = {}
|
| 33 |
+
|
| 34 |
+
for i, tensor_info in enumerate(tensors_metadata):
|
| 35 |
+
tensor_name = tensor_info['name']
|
| 36 |
+
|
| 37 |
+
tensor_data = reader.get_tensor(i)
|
| 38 |
+
weights = dequantize(tensor_data.data, tensor_data.tensor_type).copy()
|
| 39 |
+
|
| 40 |
+
try:
|
| 41 |
+
# デバイスを確認し、適切なデータ型を設定
|
| 42 |
+
if use_bf16:
|
| 43 |
+
print(f"Attempting BF16 conversion")
|
| 44 |
+
weights_tensor = torch.from_numpy(weights).to(dtype=torch.float32)
|
| 45 |
+
weights_tensor = weights_tensor.to(torch.bfloat16)
|
| 46 |
+
else:
|
| 47 |
+
print("Using FP16 conversion.")
|
| 48 |
+
weights_tensor = torch.from_numpy(weights).to(dtype=torch.float16)
|
| 49 |
+
|
| 50 |
+
weights_hf = weights_tensor
|
| 51 |
+
except Exception as e:
|
| 52 |
+
print(f"Error during BF16 conversion for tensor '{tensor_name}': {e}")
|
| 53 |
+
weights_tensor = torch.from_numpy(weights.astype(np.float32)).to(torch.float16)
|
| 54 |
+
weights_hf = weights_tensor
|
| 55 |
+
|
| 56 |
+
print(f"dequantize tensor: {tensor_name} | Shape: {weights_hf.shape} | Type: {weights_tensor.dtype}")
|
| 57 |
+
del weights_tensor
|
| 58 |
+
del weights
|
| 59 |
+
|
| 60 |
+
tensors_dict[tensor_name] = weights_hf
|
| 61 |
+
del weights_hf
|
| 62 |
+
|
| 63 |
+
metadata = {"modelspec.architecture": f"{reader.get_field(Keys.General.FILE_TYPE)}", "description": "Model converted from gguf."}
|
| 64 |
+
|
| 65 |
+
save_file(tensors_dict, output_path, metadata=metadata)
|
| 66 |
+
print("Conversion complete!")
|
| 67 |
+
|
| 68 |
+
def main():
|
| 69 |
+
parser = argparse.ArgumentParser(description="Convert GGUF files to safetensors format.")
|
| 70 |
+
parser.add_argument("--input", required=True, help="Path to the input GGUF file.")
|
| 71 |
+
parser.add_argument("--output", required=True, help="Path to the output safetensors file.")
|
| 72 |
+
parser.add_argument("--bf16", action="store_true", help="(onry cuda)Convert tensors to BF16 format instead of FP16.")
|
| 73 |
+
|
| 74 |
+
args = parser.parse_args()
|
| 75 |
+
|
| 76 |
+
convert_gguf_to_safetensors(args.input, args.output, args.bf16)
|
| 77 |
+
|
| 78 |
+
if __name__ == "__main__":
|
| 79 |
+
main()
|
gguf_to_safetensors_v2.py
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import argparse
|
| 3 |
+
import torch
|
| 4 |
+
import numpy as np
|
| 5 |
+
from safetensors.torch import save_file
|
| 6 |
+
from safetensors import safe_open
|
| 7 |
+
from typing import Dict, Tuple, List
|
| 8 |
+
from gguf import GGUFReader, dequantize
|
| 9 |
+
|
| 10 |
+
def load_gguf_and_extract_metadata(gguf_path: str) -> Tuple[GGUFReader, List[Dict]]:
|
| 11 |
+
"""Load GGUF file and extract metadata for all tensors."""
|
| 12 |
+
print(f"Loading GGUF file: {gguf_path}")
|
| 13 |
+
reader = GGUFReader(gguf_path, 'r')
|
| 14 |
+
tensors_metadata = []
|
| 15 |
+
for tensor in reader.tensors:
|
| 16 |
+
tensor_metadata = {
|
| 17 |
+
'name': tensor.name,
|
| 18 |
+
'shape': tuple(tensor.shape.tolist()),
|
| 19 |
+
'n_elements': tensor.n_elements,
|
| 20 |
+
'n_bytes': tensor.n_bytes,
|
| 21 |
+
'type': tensor.tensor_type,
|
| 22 |
+
}
|
| 23 |
+
tensors_metadata.append(tensor_metadata)
|
| 24 |
+
return reader, tensors_metadata
|
| 25 |
+
|
| 26 |
+
def get_dequantized_tensor_size_in_bytes(tensor_info: Dict, use_bf16: bool) -> int:
|
| 27 |
+
"""Calculates the size of a tensor after it has been dequantized to FP16 or BF16."""
|
| 28 |
+
bytes_per_element = 2
|
| 29 |
+
return tensor_info['n_elements'] * bytes_per_element
|
| 30 |
+
|
| 31 |
+
def get_hf_name(gguf_name: str) -> str:
|
| 32 |
+
"""Translates a GGUF tensor name to its Hugging Face equivalent for Llama/Mistral models."""
|
| 33 |
+
name_map = {
|
| 34 |
+
"token_embd.weight": "model.embed_tokens.weight",
|
| 35 |
+
"output_norm.weight": "model.norm.weight",
|
| 36 |
+
"output.weight": "lm_head.weight",
|
| 37 |
+
}
|
| 38 |
+
if gguf_name in name_map:
|
| 39 |
+
return name_map[gguf_name]
|
| 40 |
+
|
| 41 |
+
if gguf_name.startswith("blk."):
|
| 42 |
+
parts = gguf_name.split('.')
|
| 43 |
+
layer_num = parts[1]
|
| 44 |
+
layer_part = ".".join(parts[2:])
|
| 45 |
+
|
| 46 |
+
block_map = {
|
| 47 |
+
"attn_norm.weight": "input_layernorm.weight",
|
| 48 |
+
"ffn_norm.weight": "post_attention_layernorm.weight",
|
| 49 |
+
"attn_q.weight": "self_attn.q_proj.weight",
|
| 50 |
+
"attn_k.weight": "self_attn.k_proj.weight",
|
| 51 |
+
"attn_v.weight": "self_attn.v_proj.weight",
|
| 52 |
+
"attn_output.weight": "self_attn.o_proj.weight",
|
| 53 |
+
"ffn_gate.weight": "mlp.gate_proj.weight",
|
| 54 |
+
"ffn_up.weight": "mlp.up_proj.weight",
|
| 55 |
+
"ffn_down.weight": "mlp.down_proj.weight",
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
if layer_part in block_map:
|
| 59 |
+
return f"model.layers.{layer_num}.{block_map[layer_part]}"
|
| 60 |
+
|
| 61 |
+
print(f"Warning: No mapping found for tensor '{gguf_name}'. Using original name.")
|
| 62 |
+
return gguf_name
|
| 63 |
+
|
| 64 |
+
def convert_gguf_to_safetensors_by_size(gguf_path: str, output_path: str, use_bf16: bool, shard_size_gb: float):
|
| 65 |
+
"""Converts a GGUF file to .safetensors, sharding and renaming tensors for HF compatibility."""
|
| 66 |
+
reader, tensors_metadata = load_gguf_and_extract_metadata(gguf_path)
|
| 67 |
+
print(f"Extracted metadata for {len(tensors_metadata)} tensors from GGUF file.")
|
| 68 |
+
|
| 69 |
+
shard_size_bytes = int(shard_size_gb * 1024**3)
|
| 70 |
+
print(f"Target shard size set to ~{shard_size_gb} GB ({shard_size_bytes} bytes).")
|
| 71 |
+
|
| 72 |
+
output_dir = os.path.dirname(output_path)
|
| 73 |
+
if not output_dir:
|
| 74 |
+
output_dir = "."
|
| 75 |
+
base_name = os.path.basename(output_path).replace('.safetensors', '')
|
| 76 |
+
|
| 77 |
+
tensors_in_current_chunk: dict[str, torch.Tensor] = {}
|
| 78 |
+
current_chunk_size_bytes = 0
|
| 79 |
+
num_chunks = 0
|
| 80 |
+
|
| 81 |
+
total_shards = 0
|
| 82 |
+
temp_size = 0
|
| 83 |
+
for tensor_info in tensors_metadata:
|
| 84 |
+
dequantized_size = get_dequantized_tensor_size_in_bytes(tensor_info, use_bf16)
|
| 85 |
+
if temp_size > 0 and (temp_size + dequantized_size) > shard_size_bytes:
|
| 86 |
+
total_shards += 1
|
| 87 |
+
temp_size = 0
|
| 88 |
+
temp_size += dequantized_size
|
| 89 |
+
if temp_size > 0:
|
| 90 |
+
total_shards += 1
|
| 91 |
+
print(f"Model will be split into {total_shards} shards.")
|
| 92 |
+
|
| 93 |
+
for i, tensor_info in enumerate(tensors_metadata):
|
| 94 |
+
gguf_tensor_name = tensor_info['name']
|
| 95 |
+
dequantized_size = get_dequantized_tensor_size_in_bytes(tensor_info, use_bf16)
|
| 96 |
+
|
| 97 |
+
if current_chunk_size_bytes > 0 and (current_chunk_size_bytes + dequantized_size) > shard_size_bytes:
|
| 98 |
+
num_chunks += 1
|
| 99 |
+
chunk_path = os.path.join(output_dir, f"{base_name}-{num_chunks:05d}-of-{total_shards:05d}.safetensors")
|
| 100 |
+
|
| 101 |
+
print(f"\nCurrent chunk size ({current_chunk_size_bytes / 1024**3:.2f} GB) exceeds limit.")
|
| 102 |
+
print(f"Saving chunk {num_chunks} with {len(tensors_in_current_chunk)} tensors to {chunk_path}...\n")
|
| 103 |
+
save_file(tensors_in_current_chunk, chunk_path)
|
| 104 |
+
|
| 105 |
+
tensors_in_current_chunk.clear()
|
| 106 |
+
current_chunk_size_bytes = 0
|
| 107 |
+
|
| 108 |
+
tensor_data = reader.get_tensor(i)
|
| 109 |
+
weights_np = dequantize(tensor_data.data, tensor_data.tensor_type).copy()
|
| 110 |
+
target_dtype = torch.bfloat16 if use_bf16 else torch.float16
|
| 111 |
+
|
| 112 |
+
try:
|
| 113 |
+
weights_tensor = torch.from_numpy(weights_np).to(target_dtype)
|
| 114 |
+
except Exception as e:
|
| 115 |
+
print(f"Warning: Could not convert {gguf_tensor_name} directly. Error: {e}. Using float32 fallback.")
|
| 116 |
+
weights_tensor = torch.from_numpy(weights_np.astype(np.float32)).to(target_dtype)
|
| 117 |
+
|
| 118 |
+
# --- CORRECTED RENAMING LOGIC ---
|
| 119 |
+
hf_tensor_name = get_hf_name(gguf_tensor_name)
|
| 120 |
+
|
| 121 |
+
print(f"Processed tensor ({i+1}/{len(tensors_metadata)}): {gguf_tensor_name} -> {hf_tensor_name} | Size: {dequantized_size/1024**2:.2f} MB")
|
| 122 |
+
|
| 123 |
+
tensors_in_current_chunk[hf_tensor_name] = weights_tensor
|
| 124 |
+
current_chunk_size_bytes += dequantized_size
|
| 125 |
+
|
| 126 |
+
del weights_np
|
| 127 |
+
del tensor_data
|
| 128 |
+
|
| 129 |
+
if tensors_in_current_chunk:
|
| 130 |
+
num_chunks += 1
|
| 131 |
+
chunk_path = os.path.join(output_dir, f"{base_name}-{num_chunks:05d}-of-{total_shards:05d}.safetensors")
|
| 132 |
+
print(f"\nSaving final chunk {num_chunks} with {len(tensors_in_current_chunk)} tensors to {chunk_path}...\n")
|
| 133 |
+
save_file(tensors_in_current_chunk, chunk_path)
|
| 134 |
+
|
| 135 |
+
print("All tensors have been dequantized, renamed, and saved into sharded safetensor files.")
|
| 136 |
+
|
| 137 |
+
def main():
|
| 138 |
+
parser = argparse.ArgumentParser(
|
| 139 |
+
description="Convert GGUF to HF-compatible sharded safetensors, renaming tensors correctly."
|
| 140 |
+
)
|
| 141 |
+
parser.add_argument("--input", required=True, help="Path to the input GGUF file.")
|
| 142 |
+
parser.add_argument("--output", required=True, help="Base path for the final output sharded .safetensors files.")
|
| 143 |
+
parser.add_argument("--bf16", action="store_true", help="Convert tensors to BF16 format instead of the default FP16.")
|
| 144 |
+
parser.add_argument(
|
| 145 |
+
"--shard-size",
|
| 146 |
+
type=float,
|
| 147 |
+
default=5.0,
|
| 148 |
+
help="Maximum size of each shard in Gigabytes (GB). Default: 5.0"
|
| 149 |
+
)
|
| 150 |
+
args = parser.parse_args()
|
| 151 |
+
|
| 152 |
+
convert_gguf_to_safetensors_by_size(args.input, args.output, args.bf16, args.shard_size)
|
| 153 |
+
|
| 154 |
+
if __name__ == "__main__":
|
| 155 |
+
main()
|
safetensors_meta_ripper_v1.py
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import argparse
|
| 3 |
+
import json
|
| 4 |
+
import glob
|
| 5 |
+
from safetensors import safe_open
|
| 6 |
+
from gguf import GGUFReader
|
| 7 |
+
from gguf.constants import Keys
|
| 8 |
+
from typing import List, Dict, Any
|
| 9 |
+
|
| 10 |
+
def create_safetensors_index(shards_dir: str, output_dir: str) -> None:
|
| 11 |
+
"""Creates the model.safetensors.index.json file by scanning shard files."""
|
| 12 |
+
shard_pattern = os.path.join(shards_dir, '*.safetensors')
|
| 13 |
+
shard_files = sorted(glob.glob(shard_pattern))
|
| 14 |
+
|
| 15 |
+
if not shard_files:
|
| 16 |
+
print(f"Error: No .safetensors files found in directory: {shards_dir}")
|
| 17 |
+
return
|
| 18 |
+
|
| 19 |
+
print(f"Found {len(shard_files)} shard files to index.")
|
| 20 |
+
|
| 21 |
+
index_data: Dict[str, Any] = {"metadata": {}, "weight_map": {}}
|
| 22 |
+
total_size = 0
|
| 23 |
+
|
| 24 |
+
for shard_file in shard_files:
|
| 25 |
+
shard_basename = os.path.basename(shard_file)
|
| 26 |
+
try:
|
| 27 |
+
with safe_open(shard_file, framework="pt", device="cpu") as f:
|
| 28 |
+
for tensor_name in f.keys():
|
| 29 |
+
index_data["weight_map"][tensor_name] = shard_basename
|
| 30 |
+
|
| 31 |
+
shard_size = os.path.getsize(shard_file)
|
| 32 |
+
total_size += shard_size
|
| 33 |
+
except Exception as e:
|
| 34 |
+
print(f"Warning: Could not process shard {shard_basename}. Error: {e}")
|
| 35 |
+
continue
|
| 36 |
+
|
| 37 |
+
index_data["metadata"]["total_size"] = total_size
|
| 38 |
+
|
| 39 |
+
index_filepath = os.path.join(output_dir, "model.safetensors.index.json")
|
| 40 |
+
try:
|
| 41 |
+
with open(index_filepath, 'w', encoding='utf-8') as f:
|
| 42 |
+
json.dump(index_data, f, indent=2)
|
| 43 |
+
print(f"Successfully created safetensors index file: {index_filepath}")
|
| 44 |
+
except Exception as e:
|
| 45 |
+
print(f"Error: Failed to write index file. Error: {e}")
|
| 46 |
+
|
| 47 |
+
def extract_and_save_gguf_configs(reader: GGUFReader, output_dir: str) -> None:
|
| 48 |
+
"""Extracts metadata from GGUF and saves config, tokenizer, and generation files."""
|
| 49 |
+
|
| 50 |
+
config = {}
|
| 51 |
+
# --- config.json ---
|
| 52 |
+
try:
|
| 53 |
+
arch = reader.get_field(Keys.General.ARCHITECTURE).name.lower()
|
| 54 |
+
model_type_map = {"llama": "llama", "mistral": "mistral", "gemma": "gemma"}
|
| 55 |
+
model_type = model_type_map.get(arch, arch)
|
| 56 |
+
|
| 57 |
+
config = {
|
| 58 |
+
"architectures": [arch.capitalize()],
|
| 59 |
+
"model_type": model_type,
|
| 60 |
+
"hidden_size": reader.get_int_value(f"{model_type}.embedding_length"),
|
| 61 |
+
"intermediate_size": reader.get_int_value(f"{model_type}.feed_forward_length"),
|
| 62 |
+
"num_attention_heads": reader.get_int_value(f"{model_type}.attention.head_count"),
|
| 63 |
+
"num_hidden_layers": reader.get_int_value(f"{model_type}.block_count"),
|
| 64 |
+
"num_key_value_heads": reader.get_int_value(f"{model_type}.attention.head_count_kv"),
|
| 65 |
+
"rms_norm_eps": reader.get_float_value(f"{model_type}.attention.layer_norm_rms_epsilon"),
|
| 66 |
+
"vocab_size": len(reader.get_field(Keys.Tokenizer.VOCAB)),
|
| 67 |
+
"rope_theta": reader.get_float_value(f"{model_type}.rope.freq_base"),
|
| 68 |
+
"max_position_embeddings": reader.get_int_value(f"{model_type}.context_length"),
|
| 69 |
+
}
|
| 70 |
+
with open(os.path.join(output_dir, "config.json"), 'w', encoding='utf-8') as f:
|
| 71 |
+
json.dump(config, f, indent=2)
|
| 72 |
+
print("Created config.json")
|
| 73 |
+
except Exception as e:
|
| 74 |
+
print(f"Warning: Could not create config.json. Some values may be missing. Error: {e}")
|
| 75 |
+
|
| 76 |
+
# --- tokenizer_config.json ---
|
| 77 |
+
try:
|
| 78 |
+
tokenizer_config = {
|
| 79 |
+
"model_max_length": config.get("max_position_embeddings", 4096),
|
| 80 |
+
"padding_side": "left",
|
| 81 |
+
"tokenizer_class": "LlamaTokenizer",
|
| 82 |
+
}
|
| 83 |
+
# Add chat template if it exists
|
| 84 |
+
try:
|
| 85 |
+
chat_template = reader.get_str_value("tokenizer.chat_template")
|
| 86 |
+
tokenizer_config["chat_template"] = chat_template
|
| 87 |
+
except (KeyError, ValueError):
|
| 88 |
+
pass # Field does not exist
|
| 89 |
+
|
| 90 |
+
with open(os.path.join(output_dir, "tokenizer_config.json"), 'w', encoding='utf-8') as f:
|
| 91 |
+
json.dump(tokenizer_config, f, indent=2)
|
| 92 |
+
print("Created tokenizer_config.json")
|
| 93 |
+
except Exception as e:
|
| 94 |
+
print(f"Warning: Could not create tokenizer_config.json. Error: {e}")
|
| 95 |
+
|
| 96 |
+
# --- tokenizer.json ---
|
| 97 |
+
try:
|
| 98 |
+
vocab = [item.piece for item in reader.get_field(Keys.Tokenizer.VOCAB)]
|
| 99 |
+
merges = reader.get_field(Keys.Tokenizer.MERGES)
|
| 100 |
+
|
| 101 |
+
tokenizer_data = {
|
| 102 |
+
"version": "1.0",
|
| 103 |
+
"model": {
|
| 104 |
+
"type": "BPE",
|
| 105 |
+
"vocab": {token: i for i, token in enumerate(vocab)},
|
| 106 |
+
"merges": merges,
|
| 107 |
+
},
|
| 108 |
+
"added_tokens": [],
|
| 109 |
+
}
|
| 110 |
+
with open(os.path.join(output_dir, "tokenizer.json"), 'w', encoding='utf-8') as f:
|
| 111 |
+
json.dump(tokenizer_data, f, indent=None, separators=(',', ':'))
|
| 112 |
+
print("Created tokenizer.json")
|
| 113 |
+
except Exception as e:
|
| 114 |
+
print(f"Warning: Could not create tokenizer.json. Error: {e}")
|
| 115 |
+
|
| 116 |
+
# --- special_tokens_map.json ---
|
| 117 |
+
try:
|
| 118 |
+
special_map = {}
|
| 119 |
+
# Use a helper to avoid crashing on missing keys
|
| 120 |
+
def add_special_token(key_name, gguf_id_key):
|
| 121 |
+
try:
|
| 122 |
+
token_id = reader.get_int_value(gguf_id_key)
|
| 123 |
+
token_str = vocab[token_id]
|
| 124 |
+
special_map[key_name] = token_str
|
| 125 |
+
except (KeyError, ValueError, IndexError):
|
| 126 |
+
pass
|
| 127 |
+
|
| 128 |
+
add_special_token("bos_token", "tokenizer.ggml.bos_token_id")
|
| 129 |
+
add_special_token("eos_token", "tokenizer.ggml.eos_token_id")
|
| 130 |
+
add_special_token("unk_token", "tokenizer.ggml.unknown_token_id")
|
| 131 |
+
|
| 132 |
+
with open(os.path.join(output_dir, "special_tokens_map.json"), 'w', encoding='utf-8') as f:
|
| 133 |
+
json.dump(special_map, f, indent=2)
|
| 134 |
+
print("Created special_tokens_map.json")
|
| 135 |
+
except Exception as e:
|
| 136 |
+
print(f"Warning: Could not create special_tokens_map.json. Error: {e}")
|
| 137 |
+
|
| 138 |
+
# --- generation_config.json ---
|
| 139 |
+
try:
|
| 140 |
+
gen_config = {"_from_model_config": True}
|
| 141 |
+
try:
|
| 142 |
+
gen_config["bos_token_id"] = reader.get_int_value("tokenizer.ggml.bos_token_id")
|
| 143 |
+
gen_config["eos_token_id"] = reader.get_int_value("tokenizer.ggml.eos_token_id")
|
| 144 |
+
except (KeyError, ValueError):
|
| 145 |
+
pass
|
| 146 |
+
|
| 147 |
+
with open(os.path.join(output_dir, "generation_config.json"), 'w', encoding='utf-8') as f:
|
| 148 |
+
json.dump(gen_config, f, indent=2)
|
| 149 |
+
print("Created generation_config.json")
|
| 150 |
+
except Exception as e:
|
| 151 |
+
print(f"Warning: Could not create generation_config.json. Error: {e}")
|
| 152 |
+
|
| 153 |
+
def main():
|
| 154 |
+
parser = argparse.ArgumentParser(
|
| 155 |
+
description="Generate safetensors index and config files for a sharded model directory."
|
| 156 |
+
)
|
| 157 |
+
parser.add_argument(
|
| 158 |
+
"--gguf-file",
|
| 159 |
+
required=True,
|
| 160 |
+
help="Path to the original GGUF file to read metadata from."
|
| 161 |
+
)
|
| 162 |
+
parser.add_argument(
|
| 163 |
+
"--shards-dir",
|
| 164 |
+
required=True,
|
| 165 |
+
help="Path to the directory containing the sharded .safetensors files."
|
| 166 |
+
)
|
| 167 |
+
args = parser.parse_args()
|
| 168 |
+
|
| 169 |
+
if not os.path.isfile(args.gguf_file):
|
| 170 |
+
print(f"Error: GGUF file not found at {args.gguf_file}")
|
| 171 |
+
return
|
| 172 |
+
if not os.path.isdir(args.shards_dir):
|
| 173 |
+
print(f"Error: Shards directory not found at {args.shards_dir}")
|
| 174 |
+
return
|
| 175 |
+
|
| 176 |
+
print(f"Loading GGUF metadata from: {args.gguf_file}")
|
| 177 |
+
reader = GGUFReader(args.gguf_file, 'r')
|
| 178 |
+
|
| 179 |
+
# Generate config files from GGUF header and save them to the shards directory
|
| 180 |
+
extract_and_save_gguf_configs(reader, args.shards_dir)
|
| 181 |
+
|
| 182 |
+
# Generate the safetensors index from the actual shard files
|
| 183 |
+
create_safetensors_index(args.shards_dir, args.shards_dir)
|
| 184 |
+
|
| 185 |
+
print("\nMetadata ripping complete.")
|
| 186 |
+
|
| 187 |
+
if __name__ == "__main__":
|
| 188 |
+
main()
|