File size: 8,306 Bytes

7cb2f27

#!/usr/bin/env python3
"""
Graft INTELLECT-3 language model weights into GLM-4.6V vision-language model.

This script:
1. Loads both models into CPU memory
2. Copies model.layers.* from INTELLECT-3 to model.language_model.layers.* in GLM-4.6V
3. Copies model.norm.weight from INTELLECT-3 to model.language_model.norm.weight in GLM-4.6V
4. Saves the resulting model to a new directory

Does NOT touch:
- model.language_model.embed_tokens (needed for vision token compatibility)
- lm_head (kept aligned with embed_tokens)
- model.visual.* (vision encoder preserved)
"""

import os
import argparse
import json
import shutil
from pathlib import Path
from safetensors import safe_open
from safetensors.torch import save_file
import torch
from tqdm import tqdm


def get_safetensor_files(model_dir: Path) -> list[Path]:
    """Get all safetensor files in a model directory."""
    files = sorted(model_dir.glob("*.safetensors"))
    if not files:
        raise FileNotFoundError(f"No safetensor files found in {model_dir}")
    return files


def load_state_dict_from_safetensors(model_dir: Path) -> dict[str, torch.Tensor]:
    """Load all tensors from safetensor files into a state dict."""
    state_dict = {}
    files = get_safetensor_files(model_dir)
    
    for f in tqdm(files, desc=f"Loading {model_dir.name}"):
        with safe_open(f, framework="pt", device="cpu") as st:
            for key in st.keys():
                state_dict[key] = st.get_tensor(key)
    
    return state_dict


def graft_weights(
    intellect3_state: dict[str, torch.Tensor],
    glm_state: dict[str, torch.Tensor]
) -> dict[str, torch.Tensor]:
    """
    Graft INTELLECT-3 weights into GLM-4.6V state dict.
    
    Mapping:
    - model.layers.* -> model.language_model.layers.*
    - model.norm.weight -> model.language_model.norm.weight
    """
    grafted_state = dict(glm_state)  # shallow copy
    
    grafted_count = 0
    skipped_keys = []
    
    for intellect_key, tensor in tqdm(intellect3_state.items(), desc="Grafting weights"):
        # Skip embed_tokens and lm_head from INTELLECT-3
        if "embed_tokens" in intellect_key or "lm_head" in intellect_key:
            skipped_keys.append(intellect_key)
            continue
        
        # Map model.layers.* -> model.language_model.layers.*
        if intellect_key.startswith("model.layers."):
            glm_key = intellect_key.replace("model.layers.", "model.language_model.layers.")
        # Map model.norm.weight -> model.language_model.norm.weight
        elif intellect_key == "model.norm.weight":
            glm_key = "model.language_model.norm.weight"
        else:
            skipped_keys.append(intellect_key)
            continue
        
        # Verify the key exists in GLM and shapes match
        if glm_key not in grafted_state:
            print(f"WARNING: {glm_key} not found in GLM-4.6V state dict!")
            continue
        
        if grafted_state[glm_key].shape != tensor.shape:
            print(f"WARNING: Shape mismatch for {glm_key}:")
            print(f"  INTELLECT-3: {tensor.shape}")
            print(f"  GLM-4.6V:    {grafted_state[glm_key].shape}")
            continue
        
        grafted_state[glm_key] = tensor
        grafted_count += 1
    
    print(f"\nGrafted {grafted_count} tensors from INTELLECT-3")
    print(f"Skipped {len(skipped_keys)} tensors: {skipped_keys[:5]}{'...' if len(skipped_keys) > 5 else ''}")
    
    return grafted_state


def save_state_dict_to_safetensors(
    state_dict: dict[str, torch.Tensor],
    output_dir: Path,
    max_shard_size: int = 5 * 1024 ** 3  # 5GB default
):
    """Save state dict to sharded safetensor files."""
    output_dir.mkdir(parents=True, exist_ok=True)
    
    # Calculate total size and plan shards
    tensors_by_size = [(k, v, v.numel() * v.element_size()) for k, v in state_dict.items()]
    total_size = sum(size for _, _, size in tensors_by_size)
    
    print(f"\nTotal model size: {total_size / 1024**3:.2f} GB")
    
    # Create shards
    shards = []
    current_shard = {}
    current_size = 0
    
    for key, tensor, size in tensors_by_size:
        if current_size + size > max_shard_size and current_shard:
            shards.append(current_shard)
            current_shard = {}
            current_size = 0
        
        current_shard[key] = tensor
        current_size += size
    
    if current_shard:
        shards.append(current_shard)
    
    print(f"Saving to {len(shards)} shard(s)...")
    
    # Save shards and build index
    weight_map = {}
    
    for i, shard in enumerate(tqdm(shards, desc="Saving shards")):
        if len(shards) == 1:
            filename = "model.safetensors"
        else:
            filename = f"model-{i+1:05d}-of-{len(shards):05d}.safetensors"
        
        filepath = output_dir / filename
        save_file(shard, filepath)
        
        for key in shard.keys():
            weight_map[key] = filename
    
    # Save index if sharded
    if len(shards) > 1:
        index = {
            "metadata": {"total_size": total_size},
            "weight_map": weight_map
        }
        with open(output_dir / "model.safetensors.index.json", "w") as f:
            json.dump(index, f, indent=2)
    
    return weight_map


def copy_config_files(src_dir: Path, dst_dir: Path):
    """Copy config files from source to destination."""
    config_files = [
        "config.json",
        "tokenizer.json",
        "tokenizer_config.json",
        "special_tokens_map.json",
        "generation_config.json",
        "preprocessor_config.json",
        "chat_template.json",
    ]
    
    for filename in config_files:
        src_file = src_dir / filename
        if src_file.exists():
            shutil.copy2(src_file, dst_dir / filename)
            print(f"Copied {filename}")


def main():
    parser = argparse.ArgumentParser(
        description="Graft INTELLECT-3 weights into GLM-4.6V"
    )
    parser.add_argument(
        "--intellect3",
        type=Path,
        default=Path.home() / "models" / "INTELLECT-3",
        help="Path to INTELLECT-3 model directory"
    )
    parser.add_argument(
        "--glm",
        type=Path,
        default=Path.home() / "models" / "GLM-4.6V",
        help="Path to GLM-4.6V model directory"
    )
    parser.add_argument(
        "--output",
        type=Path,
        default=Path.home() / "models" / "INTELLECT-3-V",
        help="Path to output directory"
    )
    parser.add_argument(
        "--shard-size",
        type=int,
        default=5,
        help="Maximum shard size in GB (default: 5)"
    )
    
    args = parser.parse_args()
    
    print("=" * 60)
    print("INTELLECT-3 -> GLM-4.6V Weight Grafting")
    print("=" * 60)
    print(f"INTELLECT-3 source: {args.intellect3}")
    print(f"GLM-4.6V source:    {args.glm}")
    print(f"Output directory:   {args.output}")
    print("=" * 60)
    
    # Verify source directories exist
    if not args.intellect3.exists():
        raise FileNotFoundError(f"INTELLECT-3 directory not found: {args.intellect3}")
    if not args.glm.exists():
        raise FileNotFoundError(f"GLM-4.6V directory not found: {args.glm}")
    
    # Load both models
    print("\nStep 1: Loading models into CPU memory...")
    intellect3_state = load_state_dict_from_safetensors(args.intellect3)
    glm_state = load_state_dict_from_safetensors(args.glm)
    
    print(f"\nINTELLECT-3 tensors: {len(intellect3_state)}")
    print(f"GLM-4.6V tensors:    {len(glm_state)}")
    
    # Graft weights
    print("\nStep 2: Grafting INTELLECT-3 weights into GLM-4.6V...")
    grafted_state = graft_weights(intellect3_state, glm_state)
    
    # Free memory from source models
    del intellect3_state
    del glm_state
    
    # Save grafted model
    print("\nStep 3: Saving grafted model...")
    save_state_dict_to_safetensors(
        grafted_state,
        args.output,
        max_shard_size=args.shard_size * 1024 ** 3
    )
    
    # Copy config files from GLM-4.6V (since we're keeping its architecture)
    print("\nStep 4: Copying config files from GLM-4.6V...")
    copy_config_files(args.glm, args.output)
    
    print("\n" + "=" * 60)
    print("Done! Grafted model saved to:", args.output)
    print("=" * 60)


if __name__ == "__main__":
    main()