Elvisaro
/

instellagguf

GGUF

English

Model card Files Files and versions

xet

Community

Elvisaro commited on Mar 11, 2025

Commit

8e70ea9

verified ·

1 Parent(s): 11f76ff

Update README.md

Browse files

Files changed (1) hide show

README.md +321 -1

README.md CHANGED Viewed

@@ -5,4 +5,324 @@ language:
 base_model:
 - amd/Instella-3B-Instruct
 ---
-This model doesn't work because I tried to convert from safetensors to gguf because : I tried this: OLMoForCausalLM

 base_model:
 - amd/Instella-3B-Instruct
 ---
+This model doesn't work because I tried to convert from safetensors to gguf because : I tried this: OLMoForCausalLM
+## The Script Used for BF16 Model
+%%writefile convert_instella_bf16.py
+import os
+import subprocess
+from pathlib import Path
+import json
+import torch
+import numpy as np
+def create_instella_conversion_script():
+    """Create a conversion script for Instella models using bfloat16 mixed-precision."""
+    script_content = """
+import sys
+import json
+import struct
+import numpy as np
+import torch
+from pathlib import Path
+import os
+import re
+from typing import Dict, Any, List
+from safetensors.torch import load_file as load_safetensors
+GGUF_MAGIC = 0x46554747
+GGUF_VERSION = 3
+# GGUF metadata types
+GGUF_TYPE_UINT32 = 0
+GGUF_TYPE_INT32 = 1
+GGUF_TYPE_FLOAT32 = 2
+GGUF_TYPE_STRING = 3
+GGUF_TYPE_ARRAY = 4
+GGUF_TYPE_UINT64 = 5
+GGUF_TYPE_INT64 = 6
+GGUF_TYPE_FLOAT64 = 7
+GGUF_TYPE_BOOL = 8
+def write_gguf_header(f, num_tensors, num_kv):
+    f.write(struct.pack("<I", GGUF_MAGIC))
+    f.write(struct.pack("<I", GGUF_VERSION))
+    f.write(struct.pack("<Q", num_kv))
+    f.write(struct.pack("<Q", num_tensors))
+def write_metadata_kv(f, key: str, val_type: int, val):
+    key_bytes = key.encode('utf-8')
+    f.write(struct.pack("<Q", len(key_bytes)))
+    f.write(key_bytes)
+    f.write(struct.pack("<I", val_type))
+    if val_type == GGUF_TYPE_STRING:
+        val_bytes = val.encode('utf-8')
+        f.write(struct.pack("<Q", len(val_bytes)))
+        f.write(val_bytes)
+    elif val_type == GGUF_TYPE_INT32:
+        f.write(struct.pack("<i", val))
+    elif val_type == GGUF_TYPE_UINT32:
+        f.write(struct.pack("<I", val))
+    elif val_type == GGUF_TYPE_FLOAT32:
+        f.write(struct.pack("<f", val))
+    elif val_type == GGUF_TYPE_BOOL:
+        f.write(struct.pack("<?", val))
+    elif val_type == GGUF_TYPE_ARRAY:
+        f.write(struct.pack("<Q", len(val)))
+        if len(val) > 0:
+            if isinstance(val[0], int):
+                f.write(struct.pack("<I", GGUF_TYPE_INT32))
+                for item in val:
+                    f.write(struct.pack("<i", item))
+            elif isinstance(val[0], str):
+                f.write(struct.pack("<I", GGUF_TYPE_STRING))
+                for item in val:
+                    item_bytes = item.encode('utf-8')
+                    f.write(struct.pack("<Q", len(item_bytes)))
+                    f.write(item_bytes)
+def write_tensor_info(f, name: str, tensor: torch.Tensor):
+    name_bytes = name.encode('utf-8')
+    f.write(struct.pack("<Q", len(name_bytes)))
+    f.write(name_bytes)
+    dims = list(tensor.shape)
+    f.write(struct.pack("<I", len(dims)))
+    for dim in dims:
+        f.write(struct.pack("<Q", dim))
+    # Use F16 type identifier (llama.cpp doesn't directly support BF16)
+    dtype_str = "F16"
+    dtype_bytes = dtype_str.encode('utf-8')
+    f.write(struct.pack("<I", len(dtype_bytes)))
+    f.write(dtype_bytes)
+def write_tensor_data(f, tensor: torch.Tensor):
+    # Convert bfloat16 to float32 then to float16 for compatibility
+    tensor_f32 = tensor.float()
+    tensor_f16 = tensor_f32.half()  # Convert to float16
+    # Now we can safely convert to numpy and write
+    f.write(tensor_f16.numpy().tobytes())
+def map_tensor_name(name: str) -> str:
+    name_map = {
+        "model.embed_tokens.weight": "token_embd.weight",
+        "model.norm.weight": "output_norm.weight",
+        "lm_head.weight": "output.weight",
+    }
+    if name in name_map:
+        return name_map[name]
+    if "model.layers." in name:
+        layer_match = re.search(r"model\.layers\.(\d+)\.", name)
+        if layer_match:
+            layer_num = layer_match.group(1)
+            # Attention mappings
+            if "self_attn.q_proj.weight" in name:
+                return f"blk.{layer_num}.attn_q.weight"
+            elif "self_attn.k_proj.weight" in name:
+                return f"blk.{layer_num}.attn_k.weight"
+            elif "self_attn.v_proj.weight" in name:
+                return f"blk.{layer_num}.attn_v.weight"
+            elif "self_attn.o_proj.weight" in name:
+                return f"blk.{layer_num}.attn_output.weight"
+            # FFN mappings
+            elif "mlp.gate_proj.weight" in name:
+                return f"blk.{layer_num}.ffn_gate.weight"
+            elif "mlp.up_proj.weight" in name:
+                return f"blk.{layer_num}.ffn_up.weight"
+            elif "mlp.down_proj.weight" in name:
+                return f"blk.{layer_num}.ffn_down.weight"
+            # Norm mappings - handle different naming conventions
+            elif "input_layernorm.weight" in name:
+                return f"blk.{layer_num}.attn_norm.weight"
+            elif "post_attention_layernorm.weight" in name:
+                return f"blk.{layer_num}.ffn_norm.weight"
+            elif "self_attn.q_norm.weight" in name:
+                return f"blk.{layer_num}.attn_q_norm.weight"
+            elif "self_attn.k_norm.weight" in name:
+                return f"blk.{layer_num}.attn_k_norm.weight"
+    # If no mapping found, use a default mapping pattern
+    if "model.layers." in name:
+        layer_match = re.search(r"model\.layers\.(\d+)\.(.+)", name)
+        if layer_match:
+            layer_num = layer_match.group(1)
+            remainder = layer_match.group(2)
+            return f"blk.{layer_num}.{remainder}"
+    return name
+def get_model_metadata(config_path=None) -> Dict[str, Any]:
+    # Default metadata for Instella based on Instella2Config defaults
+    metadata = {
+        "general.architecture": "llama",
+        "general.name": "instella",
+        "llama.context_length": 2048,  # from max_position_embeddings default
+        "llama.embedding_length": 4096,  # from hidden_size default
+        "llama.block_count": 32,  # from num_hidden_layers default
+        "llama.feed_forward_length": 11008,  # from intermediate_size default
+        "llama.attention.head_count": 32,  # from num_attention_heads default
+        "llama.attention.head_count_kv": 32,  # from num_key_value_heads default
+        "llama.attention.layer_norm_rms_epsilon": 1e-5,  # from rms_norm_eps default
+        "llama.rope.dimension_count": 128,  # hidden_size / num_attention_heads
+        "llama.vocab_size": 50304,  # from vocab_size default
+        "tokenizer.ggml.model": "llama",
+        "tokenizer.ggml.tokens": 50304,
+        "llama.rope.theta": 10000.0,  # from rope_theta default
+    }
+    # Try to load from config file if provided
+    if config_path and os.path.exists(config_path):
+        try:
+            with open(config_path, 'r') as f:
+                config = json.load(f)
+            # Update metadata with values from config
+            if "hidden_size" in config:
+                metadata["llama.embedding_length"] = config["hidden_size"]
+                # Update rope dimensions based on hidden size and attention heads
+                if "num_attention_heads" in config:
+                    metadata["llama.rope.dimension_count"] = config["hidden_size"] // config["num_attention_heads"]
+                else:
+                    metadata["llama.rope.dimension_count"] = config["hidden_size"] // metadata["llama.attention.head_count"]
+            if "num_hidden_layers" in config:
+                metadata["llama.block_count"] = config["num_hidden_layers"]
+            if "num_attention_heads" in config:
+                metadata["llama.attention.head_count"] = config["num_attention_heads"]
+                if "num_key_value_heads" in config and config["num_key_value_heads"] is not None:
+                    metadata["llama.attention.head_count_kv"] = config["num_key_value_heads"]
+                else:
+                    metadata["llama.attention.head_count_kv"] = config["num_attention_heads"]
+            if "intermediate_size" in config:
+                metadata["llama.feed_forward_length"] = config["intermediate_size"]
+            if "vocab_size" in config:
+                metadata["llama.vocab_size"] = config["vocab_size"]
+                metadata["tokenizer.ggml.tokens"] = config["vocab_size"]
+            if "max_position_embeddings" in config:
+                metadata["llama.context_length"] = config["max_position_embeddings"]
+            if "rope_theta" in config:
+                metadata["llama.rope.theta"] = config["rope_theta"]
+            if "rms_norm_eps" in config:
+                metadata["llama.attention.layer_norm_rms_epsilon"] = config["rms_norm_eps"]
+        except Exception as e:
+            print(f"Warning: Failed to load config file: {e}")
+    return metadata
+def convert_model(model_dir: str, output_path: str):
+    model_dir = Path(model_dir)
+    # Find config file
+    config_path = model_dir / "config.json"
+    # Find model file
+    model_path = model_dir / "model.safetensors"
+    if not model_path.exists():
+        safetensors_files = list(model_dir.glob("*.safetensors"))
+        if not safetensors_files:
+            raise FileNotFoundError(f"No safetensors files found in {model_dir}")
+        model_path = safetensors_files[0]
+    print(f"Loading model from {model_path}")
+    tensors = load_safetensors(model_path)
+    # Get metadata
+    metadata = get_model_metadata(config_path if config_path.exists() else None)
+    # Prepare metadata key-value pairs
+    metadata_kvs = [
+        (key, GGUF_TYPE_STRING if isinstance(value, str) else
+         GGUF_TYPE_BOOL if isinstance(value, bool) else
+         GGUF_TYPE_FLOAT32 if isinstance(value, float) else
+         GGUF_TYPE_INT32 if isinstance(value, int) else
+         GGUF_TYPE_ARRAY if isinstance(value, list) else None,
+         value)
+        for key, value in metadata.items()
+    ]
+    print(f"Writing GGUF file to {output_path}")
+    with open(output_path, 'wb') as f:
+        # Write header
+        write_gguf_header(f, len(tensors), len(metadata_kvs))
+        # Write metadata
+        for key, val_type, val in metadata_kvs:
+            write_metadata_kv(f, key, val_type, val)
+        # Write tensor information
+        for i, (name, tensor) in enumerate(tensors.items()):
+            print(f"Processing tensor {i+1}/{len(tensors)}: {name} {tensor.shape}")
+            gguf_name = map_tensor_name(name)
+            write_tensor_info(f, gguf_name, tensor)
+        # Write tensor data
+        print("Writing tensor data in F16 format...")
+        for name, tensor in tensors.items():
+            gguf_name = map_tensor_name(name)
+            write_tensor_data(f, tensor)
+    print(f"Model converted and saved to {output_path}")
+    print(f"File size: {os.path.getsize(output_path) / (1024*1024):.2f} MB")
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="Convert Instella model to GGUF format with F16 precision")
+    parser.add_argument("model_dir", help="Directory containing the model files")
+    parser.add_argument("output_path", help="Path to save the GGUF model")
+    args = parser.parse_args()
+    convert_model(args.model_dir, args.output_path)
+"""
+    with open("convert_instella_f16.py", "w") as f:
+        f.write(script_content)
+    return "convert_instella_f16.py"
+def convert_instella_model():
+    """Convert the Instella model to GGUF format using F16 precision."""
+    # Install required dependencies
+    subprocess.run(["pip", "install", "safetensors", "torch", "numpy"], check=True)
+    # Create conversion script
+    script_path = create_instella_conversion_script()
+    # Set paths
+    model_dir = "huggintuned"
+    output_path = os.path.join(model_dir, "model.gguf")
+    # Run conversion
+    try:
+        print("Starting Instella model conversion with F16 precision...")
+        subprocess.run([
+            "python", script_path,
+            model_dir,
+            output_path
+        ], check=True)
+        # Verify the output file
+        if os.path.exists(output_path):
+            size_mb = os.path.getsize(output_path) / (1024 * 1024)
+            print(f"Conversion successful! Output file size: {size_mb:.2f} MB")
+        else:
+            raise FileNotFoundError("Output file was not created")
+    except subprocess.CalledProcessError as e:
+        print(f"Error during conversion: {e}")
+        raise
+    except Exception as e:
+        print(f"Unexpected error: {e}")
+        raise
+if __name__ == "__main__":
+    convert_instella_model()