Spaces:

Naphula
/

model_tools

Running

App Files Files Community

Naphula commited on Nov 6, 2025

Commit

22115d9

verified ·

1 Parent(s): 34b000c

Upload 3 files

Browse files

Files changed (3) hide show

gguf_to_safetensors_v1.py +79 -0
gguf_to_safetensors_v2.py +155 -0
safetensors_meta_ripper_v1.py +188 -0

gguf_to_safetensors_v1.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import os
+import argparse
+import torch
+import numpy as np
+from safetensors.torch import save_file
+from safetensors import safe_open
+from typing import Dict, Tuple
+from gguf import GGUFReader, dequantize
+from gguf.constants import GGML_QUANT_SIZES, GGMLQuantizationType, Keys
+def load_gguf_and_extract_metadata(gguf_path: str) -> Tuple[GGUFReader, list]:
+    """Load GGUF file and extract metadata and tensors."""
+    reader = GGUFReader(gguf_path)
+    tensors_metadata = []
+    for tensor in reader.tensors:
+        tensor_metadata = {
+            'name': tensor.name,
+            'shape': tuple(tensor.shape.tolist()),
+            'n_elements': tensor.n_elements,
+            'n_bytes': tensor.n_bytes,
+            'data_offset': tensor.data_offset,
+            'type': tensor.tensor_type,
+        }
+        tensors_metadata.append(tensor_metadata)
+    return reader, tensors_metadata
+def convert_gguf_to_safetensors(gguf_path: str, output_path: str, use_bf16: bool) -> None:
+    reader, tensors_metadata = load_gguf_and_extract_metadata(gguf_path)
+    print(f"Extracted {len(tensors_metadata)} tensors from GGUF file")
+    tensors_dict: dict[str, torch.Tensor] = {}
+    for i, tensor_info in enumerate(tensors_metadata):
+        tensor_name = tensor_info['name']
+        tensor_data = reader.get_tensor(i)
+        weights = dequantize(tensor_data.data, tensor_data.tensor_type).copy()
+        try:
+            # デバイスを確認し、適切なデータ型を設定
+            if use_bf16:
+                print(f"Attempting BF16 conversion")
+                weights_tensor = torch.from_numpy(weights).to(dtype=torch.float32)
+                weights_tensor = weights_tensor.to(torch.bfloat16)
+            else:
+                print("Using FP16 conversion.")
+                weights_tensor = torch.from_numpy(weights).to(dtype=torch.float16)
+            weights_hf = weights_tensor
+        except Exception as e:
+            print(f"Error during BF16 conversion for tensor '{tensor_name}': {e}")
+            weights_tensor = torch.from_numpy(weights.astype(np.float32)).to(torch.float16)
+            weights_hf = weights_tensor
+        print(f"dequantize tensor: {tensor_name} | Shape: {weights_hf.shape} | Type: {weights_tensor.dtype}")
+        del weights_tensor
+        del weights
+        tensors_dict[tensor_name] = weights_hf
+        del weights_hf
+    metadata = {"modelspec.architecture": f"{reader.get_field(Keys.General.FILE_TYPE)}", "description": "Model converted from gguf."}
+    save_file(tensors_dict, output_path, metadata=metadata)
+    print("Conversion complete!")
+def main():
+    parser = argparse.ArgumentParser(description="Convert GGUF files to safetensors format.")
+    parser.add_argument("--input", required=True, help="Path to the input GGUF file.")
+    parser.add_argument("--output", required=True, help="Path to the output safetensors file.")
+    parser.add_argument("--bf16", action="store_true", help="(onry cuda)Convert tensors to BF16 format instead of FP16.")
+    args = parser.parse_args()
+    convert_gguf_to_safetensors(args.input, args.output, args.bf16)
+if __name__ == "__main__":
+    main()

gguf_to_safetensors_v2.py ADDED Viewed

	@@ -0,0 +1,155 @@

+import os
+import argparse
+import torch
+import numpy as np
+from safetensors.torch import save_file
+from safetensors import safe_open
+from typing import Dict, Tuple, List
+from gguf import GGUFReader, dequantize
+def load_gguf_and_extract_metadata(gguf_path: str) -> Tuple[GGUFReader, List[Dict]]:
+    """Load GGUF file and extract metadata for all tensors."""
+    print(f"Loading GGUF file: {gguf_path}")
+    reader = GGUFReader(gguf_path, 'r')
+    tensors_metadata = []
+    for tensor in reader.tensors:
+        tensor_metadata = {
+            'name': tensor.name,
+            'shape': tuple(tensor.shape.tolist()),
+            'n_elements': tensor.n_elements,
+            'n_bytes': tensor.n_bytes,
+            'type': tensor.tensor_type,
+        }
+        tensors_metadata.append(tensor_metadata)
+    return reader, tensors_metadata
+def get_dequantized_tensor_size_in_bytes(tensor_info: Dict, use_bf16: bool) -> int:
+    """Calculates the size of a tensor after it has been dequantized to FP16 or BF16."""
+    bytes_per_element = 2
+    return tensor_info['n_elements'] * bytes_per_element
+def get_hf_name(gguf_name: str) -> str:
+    """Translates a GGUF tensor name to its Hugging Face equivalent for Llama/Mistral models."""
+    name_map = {
+        "token_embd.weight": "model.embed_tokens.weight",
+        "output_norm.weight": "model.norm.weight",
+        "output.weight": "lm_head.weight",
+    }
+    if gguf_name in name_map:
+        return name_map[gguf_name]
+    if gguf_name.startswith("blk."):
+        parts = gguf_name.split('.')
+        layer_num = parts[1]
+        layer_part = ".".join(parts[2:])
+        block_map = {
+            "attn_norm.weight": "input_layernorm.weight",
+            "ffn_norm.weight": "post_attention_layernorm.weight",
+            "attn_q.weight": "self_attn.q_proj.weight",
+            "attn_k.weight": "self_attn.k_proj.weight",
+            "attn_v.weight": "self_attn.v_proj.weight",
+            "attn_output.weight": "self_attn.o_proj.weight",
+            "ffn_gate.weight": "mlp.gate_proj.weight",
+            "ffn_up.weight": "mlp.up_proj.weight",
+            "ffn_down.weight": "mlp.down_proj.weight",
+        }
+        if layer_part in block_map:
+            return f"model.layers.{layer_num}.{block_map[layer_part]}"
+    print(f"Warning: No mapping found for tensor '{gguf_name}'. Using original name.")
+    return gguf_name
+def convert_gguf_to_safetensors_by_size(gguf_path: str, output_path: str, use_bf16: bool, shard_size_gb: float):
+    """Converts a GGUF file to .safetensors, sharding and renaming tensors for HF compatibility."""
+    reader, tensors_metadata = load_gguf_and_extract_metadata(gguf_path)
+    print(f"Extracted metadata for {len(tensors_metadata)} tensors from GGUF file.")
+    shard_size_bytes = int(shard_size_gb * 1024**3)
+    print(f"Target shard size set to ~{shard_size_gb} GB ({shard_size_bytes} bytes).")
+    output_dir = os.path.dirname(output_path)
+    if not output_dir:
+        output_dir = "."
+    base_name = os.path.basename(output_path).replace('.safetensors', '')
+    tensors_in_current_chunk: dict[str, torch.Tensor] = {}
+    current_chunk_size_bytes = 0
+    num_chunks = 0
+    total_shards = 0
+    temp_size = 0
+    for tensor_info in tensors_metadata:
+        dequantized_size = get_dequantized_tensor_size_in_bytes(tensor_info, use_bf16)
+        if temp_size > 0 and (temp_size + dequantized_size) > shard_size_bytes:
+            total_shards += 1
+            temp_size = 0
+        temp_size += dequantized_size
+    if temp_size > 0:
+        total_shards += 1
+    print(f"Model will be split into {total_shards} shards.")
+    for i, tensor_info in enumerate(tensors_metadata):
+        gguf_tensor_name = tensor_info['name']
+        dequantized_size = get_dequantized_tensor_size_in_bytes(tensor_info, use_bf16)
+        if current_chunk_size_bytes > 0 and (current_chunk_size_bytes + dequantized_size) > shard_size_bytes:
+            num_chunks += 1
+            chunk_path = os.path.join(output_dir, f"{base_name}-{num_chunks:05d}-of-{total_shards:05d}.safetensors")
+            print(f"\nCurrent chunk size ({current_chunk_size_bytes / 1024**3:.2f} GB) exceeds limit.")
+            print(f"Saving chunk {num_chunks} with {len(tensors_in_current_chunk)} tensors to {chunk_path}...\n")
+            save_file(tensors_in_current_chunk, chunk_path)
+            tensors_in_current_chunk.clear()
+            current_chunk_size_bytes = 0
+        tensor_data = reader.get_tensor(i)
+        weights_np = dequantize(tensor_data.data, tensor_data.tensor_type).copy()
+        target_dtype = torch.bfloat16 if use_bf16 else torch.float16
+        try:
+            weights_tensor = torch.from_numpy(weights_np).to(target_dtype)
+        except Exception as e:
+            print(f"Warning: Could not convert {gguf_tensor_name} directly. Error: {e}. Using float32 fallback.")
+            weights_tensor = torch.from_numpy(weights_np.astype(np.float32)).to(target_dtype)
+        # --- CORRECTED RENAMING LOGIC ---
+        hf_tensor_name = get_hf_name(gguf_tensor_name)
+        print(f"Processed tensor ({i+1}/{len(tensors_metadata)}): {gguf_tensor_name} -> {hf_tensor_name} | Size: {dequantized_size/1024**2:.2f} MB")
+        tensors_in_current_chunk[hf_tensor_name] = weights_tensor
+        current_chunk_size_bytes += dequantized_size
+        del weights_np
+        del tensor_data
+    if tensors_in_current_chunk:
+        num_chunks += 1
+        chunk_path = os.path.join(output_dir, f"{base_name}-{num_chunks:05d}-of-{total_shards:05d}.safetensors")
+        print(f"\nSaving final chunk {num_chunks} with {len(tensors_in_current_chunk)} tensors to {chunk_path}...\n")
+        save_file(tensors_in_current_chunk, chunk_path)
+    print("All tensors have been dequantized, renamed, and saved into sharded safetensor files.")
+def main():
+    parser = argparse.ArgumentParser(
+        description="Convert GGUF to HF-compatible sharded safetensors, renaming tensors correctly."
+    )
+    parser.add_argument("--input", required=True, help="Path to the input GGUF file.")
+    parser.add_argument("--output", required=True, help="Base path for the final output sharded .safetensors files.")
+    parser.add_argument("--bf16", action="store_true", help="Convert tensors to BF16 format instead of the default FP16.")
+    parser.add_argument(
+        "--shard-size",
+        type=float,
+        default=5.0,
+        help="Maximum size of each shard in Gigabytes (GB). Default: 5.0"
+    )
+    args = parser.parse_args()
+    convert_gguf_to_safetensors_by_size(args.input, args.output, args.bf16, args.shard_size)
+if __name__ == "__main__":
+    main()

safetensors_meta_ripper_v1.py ADDED Viewed

	@@ -0,0 +1,188 @@

+import os
+import argparse
+import json
+import glob
+from safetensors import safe_open
+from gguf import GGUFReader
+from gguf.constants import Keys
+from typing import List, Dict, Any
+def create_safetensors_index(shards_dir: str, output_dir: str) -> None:
+    """Creates the model.safetensors.index.json file by scanning shard files."""
+    shard_pattern = os.path.join(shards_dir, '*.safetensors')
+    shard_files = sorted(glob.glob(shard_pattern))
+    if not shard_files:
+        print(f"Error: No .safetensors files found in directory: {shards_dir}")
+        return
+    print(f"Found {len(shard_files)} shard files to index.")
+    index_data: Dict[str, Any] = {"metadata": {}, "weight_map": {}}
+    total_size = 0
+    for shard_file in shard_files:
+        shard_basename = os.path.basename(shard_file)
+        try:
+            with safe_open(shard_file, framework="pt", device="cpu") as f:
+                for tensor_name in f.keys():
+                    index_data["weight_map"][tensor_name] = shard_basename
+            shard_size = os.path.getsize(shard_file)
+            total_size += shard_size
+        except Exception as e:
+            print(f"Warning: Could not process shard {shard_basename}. Error: {e}")
+            continue
+    index_data["metadata"]["total_size"] = total_size
+    index_filepath = os.path.join(output_dir, "model.safetensors.index.json")
+    try:
+        with open(index_filepath, 'w', encoding='utf-8') as f:
+            json.dump(index_data, f, indent=2)
+        print(f"Successfully created safetensors index file: {index_filepath}")
+    except Exception as e:
+        print(f"Error: Failed to write index file. Error: {e}")
+def extract_and_save_gguf_configs(reader: GGUFReader, output_dir: str) -> None:
+    """Extracts metadata from GGUF and saves config, tokenizer, and generation files."""
+    config = {}
+    # --- config.json ---
+    try:
+        arch = reader.get_field(Keys.General.ARCHITECTURE).name.lower()
+        model_type_map = {"llama": "llama", "mistral": "mistral", "gemma": "gemma"}
+        model_type = model_type_map.get(arch, arch)
+        config = {
+            "architectures": [arch.capitalize()],
+            "model_type": model_type,
+            "hidden_size": reader.get_int_value(f"{model_type}.embedding_length"),
+            "intermediate_size": reader.get_int_value(f"{model_type}.feed_forward_length"),
+            "num_attention_heads": reader.get_int_value(f"{model_type}.attention.head_count"),
+            "num_hidden_layers": reader.get_int_value(f"{model_type}.block_count"),
+            "num_key_value_heads": reader.get_int_value(f"{model_type}.attention.head_count_kv"),
+            "rms_norm_eps": reader.get_float_value(f"{model_type}.attention.layer_norm_rms_epsilon"),
+            "vocab_size": len(reader.get_field(Keys.Tokenizer.VOCAB)),
+            "rope_theta": reader.get_float_value(f"{model_type}.rope.freq_base"),
+            "max_position_embeddings": reader.get_int_value(f"{model_type}.context_length"),
+        }
+        with open(os.path.join(output_dir, "config.json"), 'w', encoding='utf-8') as f:
+            json.dump(config, f, indent=2)
+        print("Created config.json")
+    except Exception as e:
+        print(f"Warning: Could not create config.json. Some values may be missing. Error: {e}")
+    # --- tokenizer_config.json ---
+    try:
+        tokenizer_config = {
+            "model_max_length": config.get("max_position_embeddings", 4096),
+            "padding_side": "left",
+            "tokenizer_class": "LlamaTokenizer",
+        }
+        # Add chat template if it exists
+        try:
+            chat_template = reader.get_str_value("tokenizer.chat_template")
+            tokenizer_config["chat_template"] = chat_template
+        except (KeyError, ValueError):
+            pass # Field does not exist
+        with open(os.path.join(output_dir, "tokenizer_config.json"), 'w', encoding='utf-8') as f:
+            json.dump(tokenizer_config, f, indent=2)
+        print("Created tokenizer_config.json")
+    except Exception as e:
+        print(f"Warning: Could not create tokenizer_config.json. Error: {e}")
+    # --- tokenizer.json ---
+    try:
+        vocab = [item.piece for item in reader.get_field(Keys.Tokenizer.VOCAB)]
+        merges = reader.get_field(Keys.Tokenizer.MERGES)
+        tokenizer_data = {
+            "version": "1.0",
+            "model": {
+                "type": "BPE",
+                "vocab": {token: i for i, token in enumerate(vocab)},
+                "merges": merges,
+            },
+            "added_tokens": [],
+        }
+        with open(os.path.join(output_dir, "tokenizer.json"), 'w', encoding='utf-8') as f:
+            json.dump(tokenizer_data, f, indent=None, separators=(',', ':'))
+        print("Created tokenizer.json")
+    except Exception as e:
+        print(f"Warning: Could not create tokenizer.json. Error: {e}")
+    # --- special_tokens_map.json ---
+    try:
+        special_map = {}
+        # Use a helper to avoid crashing on missing keys
+        def add_special_token(key_name, gguf_id_key):
+            try:
+                token_id = reader.get_int_value(gguf_id_key)
+                token_str = vocab[token_id]
+                special_map[key_name] = token_str
+            except (KeyError, ValueError, IndexError):
+                pass
+        add_special_token("bos_token", "tokenizer.ggml.bos_token_id")
+        add_special_token("eos_token", "tokenizer.ggml.eos_token_id")
+        add_special_token("unk_token", "tokenizer.ggml.unknown_token_id")
+        with open(os.path.join(output_dir, "special_tokens_map.json"), 'w', encoding='utf-8') as f:
+            json.dump(special_map, f, indent=2)
+        print("Created special_tokens_map.json")
+    except Exception as e:
+        print(f"Warning: Could not create special_tokens_map.json. Error: {e}")
+    # --- generation_config.json ---
+    try:
+        gen_config = {"_from_model_config": True}
+        try:
+            gen_config["bos_token_id"] = reader.get_int_value("tokenizer.ggml.bos_token_id")
+            gen_config["eos_token_id"] = reader.get_int_value("tokenizer.ggml.eos_token_id")
+        except (KeyError, ValueError):
+            pass
+        with open(os.path.join(output_dir, "generation_config.json"), 'w', encoding='utf-8') as f:
+            json.dump(gen_config, f, indent=2)
+        print("Created generation_config.json")
+    except Exception as e:
+        print(f"Warning: Could not create generation_config.json. Error: {e}")
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate safetensors index and config files for a sharded model directory."
+    )
+    parser.add_argument(
+        "--gguf-file",
+        required=True,
+        help="Path to the original GGUF file to read metadata from."
+    )
+    parser.add_argument(
+        "--shards-dir",
+        required=True,
+        help="Path to the directory containing the sharded .safetensors files."
+    )
+    args = parser.parse_args()
+    if not os.path.isfile(args.gguf_file):
+        print(f"Error: GGUF file not found at {args.gguf_file}")
+        return
+    if not os.path.isdir(args.shards_dir):
+        print(f"Error: Shards directory not found at {args.shards_dir}")
+        return
+    print(f"Loading GGUF metadata from: {args.gguf_file}")
+    reader = GGUFReader(args.gguf_file, 'r')
+    # Generate config files from GGUF header and save them to the shards directory
+    extract_and_save_gguf_configs(reader, args.shards_dir)
+    # Generate the safetensors index from the actual shard files
+    create_safetensors_index(args.shards_dir, args.shards_dir)
+    print("\nMetadata ripping complete.")
+if __name__ == "__main__":
+    main()