INTELLECT-3V / convert.py
ehartford's picture
Add files using upload-large-folder tool
7cb2f27 verified
#!/usr/bin/env python3
"""
Graft INTELLECT-3 language model weights into GLM-4.6V vision-language model.
This script:
1. Loads both models into CPU memory
2. Copies model.layers.* from INTELLECT-3 to model.language_model.layers.* in GLM-4.6V
3. Copies model.norm.weight from INTELLECT-3 to model.language_model.norm.weight in GLM-4.6V
4. Saves the resulting model to a new directory
Does NOT touch:
- model.language_model.embed_tokens (needed for vision token compatibility)
- lm_head (kept aligned with embed_tokens)
- model.visual.* (vision encoder preserved)
"""
import os
import argparse
import json
import shutil
from pathlib import Path
from safetensors import safe_open
from safetensors.torch import save_file
import torch
from tqdm import tqdm
def get_safetensor_files(model_dir: Path) -> list[Path]:
"""Get all safetensor files in a model directory."""
files = sorted(model_dir.glob("*.safetensors"))
if not files:
raise FileNotFoundError(f"No safetensor files found in {model_dir}")
return files
def load_state_dict_from_safetensors(model_dir: Path) -> dict[str, torch.Tensor]:
"""Load all tensors from safetensor files into a state dict."""
state_dict = {}
files = get_safetensor_files(model_dir)
for f in tqdm(files, desc=f"Loading {model_dir.name}"):
with safe_open(f, framework="pt", device="cpu") as st:
for key in st.keys():
state_dict[key] = st.get_tensor(key)
return state_dict
def graft_weights(
intellect3_state: dict[str, torch.Tensor],
glm_state: dict[str, torch.Tensor]
) -> dict[str, torch.Tensor]:
"""
Graft INTELLECT-3 weights into GLM-4.6V state dict.
Mapping:
- model.layers.* -> model.language_model.layers.*
- model.norm.weight -> model.language_model.norm.weight
"""
grafted_state = dict(glm_state) # shallow copy
grafted_count = 0
skipped_keys = []
for intellect_key, tensor in tqdm(intellect3_state.items(), desc="Grafting weights"):
# Skip embed_tokens and lm_head from INTELLECT-3
if "embed_tokens" in intellect_key or "lm_head" in intellect_key:
skipped_keys.append(intellect_key)
continue
# Map model.layers.* -> model.language_model.layers.*
if intellect_key.startswith("model.layers."):
glm_key = intellect_key.replace("model.layers.", "model.language_model.layers.")
# Map model.norm.weight -> model.language_model.norm.weight
elif intellect_key == "model.norm.weight":
glm_key = "model.language_model.norm.weight"
else:
skipped_keys.append(intellect_key)
continue
# Verify the key exists in GLM and shapes match
if glm_key not in grafted_state:
print(f"WARNING: {glm_key} not found in GLM-4.6V state dict!")
continue
if grafted_state[glm_key].shape != tensor.shape:
print(f"WARNING: Shape mismatch for {glm_key}:")
print(f" INTELLECT-3: {tensor.shape}")
print(f" GLM-4.6V: {grafted_state[glm_key].shape}")
continue
grafted_state[glm_key] = tensor
grafted_count += 1
print(f"\nGrafted {grafted_count} tensors from INTELLECT-3")
print(f"Skipped {len(skipped_keys)} tensors: {skipped_keys[:5]}{'...' if len(skipped_keys) > 5 else ''}")
return grafted_state
def save_state_dict_to_safetensors(
state_dict: dict[str, torch.Tensor],
output_dir: Path,
max_shard_size: int = 5 * 1024 ** 3 # 5GB default
):
"""Save state dict to sharded safetensor files."""
output_dir.mkdir(parents=True, exist_ok=True)
# Calculate total size and plan shards
tensors_by_size = [(k, v, v.numel() * v.element_size()) for k, v in state_dict.items()]
total_size = sum(size for _, _, size in tensors_by_size)
print(f"\nTotal model size: {total_size / 1024**3:.2f} GB")
# Create shards
shards = []
current_shard = {}
current_size = 0
for key, tensor, size in tensors_by_size:
if current_size + size > max_shard_size and current_shard:
shards.append(current_shard)
current_shard = {}
current_size = 0
current_shard[key] = tensor
current_size += size
if current_shard:
shards.append(current_shard)
print(f"Saving to {len(shards)} shard(s)...")
# Save shards and build index
weight_map = {}
for i, shard in enumerate(tqdm(shards, desc="Saving shards")):
if len(shards) == 1:
filename = "model.safetensors"
else:
filename = f"model-{i+1:05d}-of-{len(shards):05d}.safetensors"
filepath = output_dir / filename
save_file(shard, filepath)
for key in shard.keys():
weight_map[key] = filename
# Save index if sharded
if len(shards) > 1:
index = {
"metadata": {"total_size": total_size},
"weight_map": weight_map
}
with open(output_dir / "model.safetensors.index.json", "w") as f:
json.dump(index, f, indent=2)
return weight_map
def copy_config_files(src_dir: Path, dst_dir: Path):
"""Copy config files from source to destination."""
config_files = [
"config.json",
"tokenizer.json",
"tokenizer_config.json",
"special_tokens_map.json",
"generation_config.json",
"preprocessor_config.json",
"chat_template.json",
]
for filename in config_files:
src_file = src_dir / filename
if src_file.exists():
shutil.copy2(src_file, dst_dir / filename)
print(f"Copied {filename}")
def main():
parser = argparse.ArgumentParser(
description="Graft INTELLECT-3 weights into GLM-4.6V"
)
parser.add_argument(
"--intellect3",
type=Path,
default=Path.home() / "models" / "INTELLECT-3",
help="Path to INTELLECT-3 model directory"
)
parser.add_argument(
"--glm",
type=Path,
default=Path.home() / "models" / "GLM-4.6V",
help="Path to GLM-4.6V model directory"
)
parser.add_argument(
"--output",
type=Path,
default=Path.home() / "models" / "INTELLECT-3-V",
help="Path to output directory"
)
parser.add_argument(
"--shard-size",
type=int,
default=5,
help="Maximum shard size in GB (default: 5)"
)
args = parser.parse_args()
print("=" * 60)
print("INTELLECT-3 -> GLM-4.6V Weight Grafting")
print("=" * 60)
print(f"INTELLECT-3 source: {args.intellect3}")
print(f"GLM-4.6V source: {args.glm}")
print(f"Output directory: {args.output}")
print("=" * 60)
# Verify source directories exist
if not args.intellect3.exists():
raise FileNotFoundError(f"INTELLECT-3 directory not found: {args.intellect3}")
if not args.glm.exists():
raise FileNotFoundError(f"GLM-4.6V directory not found: {args.glm}")
# Load both models
print("\nStep 1: Loading models into CPU memory...")
intellect3_state = load_state_dict_from_safetensors(args.intellect3)
glm_state = load_state_dict_from_safetensors(args.glm)
print(f"\nINTELLECT-3 tensors: {len(intellect3_state)}")
print(f"GLM-4.6V tensors: {len(glm_state)}")
# Graft weights
print("\nStep 2: Grafting INTELLECT-3 weights into GLM-4.6V...")
grafted_state = graft_weights(intellect3_state, glm_state)
# Free memory from source models
del intellect3_state
del glm_state
# Save grafted model
print("\nStep 3: Saving grafted model...")
save_state_dict_to_safetensors(
grafted_state,
args.output,
max_shard_size=args.shard_size * 1024 ** 3
)
# Copy config files from GLM-4.6V (since we're keeping its architecture)
print("\nStep 4: Copying config files from GLM-4.6V...")
copy_config_files(args.glm, args.output)
print("\n" + "=" * 60)
print("Done! Grafted model saved to:", args.output)
print("=" * 60)
if __name__ == "__main__":
main()