#!/usr/bin/env python3 """ Graft INTELLECT-3 language model weights into GLM-4.6V vision-language model. This script: 1. Loads both models into CPU memory 2. Copies model.layers.* from INTELLECT-3 to model.language_model.layers.* in GLM-4.6V 3. Copies model.norm.weight from INTELLECT-3 to model.language_model.norm.weight in GLM-4.6V 4. Saves the resulting model to a new directory Does NOT touch: - model.language_model.embed_tokens (needed for vision token compatibility) - lm_head (kept aligned with embed_tokens) - model.visual.* (vision encoder preserved) """ import os import argparse import json import shutil from pathlib import Path from safetensors import safe_open from safetensors.torch import save_file import torch from tqdm import tqdm def get_safetensor_files(model_dir: Path) -> list[Path]: """Get all safetensor files in a model directory.""" files = sorted(model_dir.glob("*.safetensors")) if not files: raise FileNotFoundError(f"No safetensor files found in {model_dir}") return files def load_state_dict_from_safetensors(model_dir: Path) -> dict[str, torch.Tensor]: """Load all tensors from safetensor files into a state dict.""" state_dict = {} files = get_safetensor_files(model_dir) for f in tqdm(files, desc=f"Loading {model_dir.name}"): with safe_open(f, framework="pt", device="cpu") as st: for key in st.keys(): state_dict[key] = st.get_tensor(key) return state_dict def graft_weights( intellect3_state: dict[str, torch.Tensor], glm_state: dict[str, torch.Tensor] ) -> dict[str, torch.Tensor]: """ Graft INTELLECT-3 weights into GLM-4.6V state dict. Mapping: - model.layers.* -> model.language_model.layers.* - model.norm.weight -> model.language_model.norm.weight """ grafted_state = dict(glm_state) # shallow copy grafted_count = 0 skipped_keys = [] for intellect_key, tensor in tqdm(intellect3_state.items(), desc="Grafting weights"): # Skip embed_tokens and lm_head from INTELLECT-3 if "embed_tokens" in intellect_key or "lm_head" in intellect_key: skipped_keys.append(intellect_key) continue # Map model.layers.* -> model.language_model.layers.* if intellect_key.startswith("model.layers."): glm_key = intellect_key.replace("model.layers.", "model.language_model.layers.") # Map model.norm.weight -> model.language_model.norm.weight elif intellect_key == "model.norm.weight": glm_key = "model.language_model.norm.weight" else: skipped_keys.append(intellect_key) continue # Verify the key exists in GLM and shapes match if glm_key not in grafted_state: print(f"WARNING: {glm_key} not found in GLM-4.6V state dict!") continue if grafted_state[glm_key].shape != tensor.shape: print(f"WARNING: Shape mismatch for {glm_key}:") print(f" INTELLECT-3: {tensor.shape}") print(f" GLM-4.6V: {grafted_state[glm_key].shape}") continue grafted_state[glm_key] = tensor grafted_count += 1 print(f"\nGrafted {grafted_count} tensors from INTELLECT-3") print(f"Skipped {len(skipped_keys)} tensors: {skipped_keys[:5]}{'...' if len(skipped_keys) > 5 else ''}") return grafted_state def save_state_dict_to_safetensors( state_dict: dict[str, torch.Tensor], output_dir: Path, max_shard_size: int = 5 * 1024 ** 3 # 5GB default ): """Save state dict to sharded safetensor files.""" output_dir.mkdir(parents=True, exist_ok=True) # Calculate total size and plan shards tensors_by_size = [(k, v, v.numel() * v.element_size()) for k, v in state_dict.items()] total_size = sum(size for _, _, size in tensors_by_size) print(f"\nTotal model size: {total_size / 1024**3:.2f} GB") # Create shards shards = [] current_shard = {} current_size = 0 for key, tensor, size in tensors_by_size: if current_size + size > max_shard_size and current_shard: shards.append(current_shard) current_shard = {} current_size = 0 current_shard[key] = tensor current_size += size if current_shard: shards.append(current_shard) print(f"Saving to {len(shards)} shard(s)...") # Save shards and build index weight_map = {} for i, shard in enumerate(tqdm(shards, desc="Saving shards")): if len(shards) == 1: filename = "model.safetensors" else: filename = f"model-{i+1:05d}-of-{len(shards):05d}.safetensors" filepath = output_dir / filename save_file(shard, filepath) for key in shard.keys(): weight_map[key] = filename # Save index if sharded if len(shards) > 1: index = { "metadata": {"total_size": total_size}, "weight_map": weight_map } with open(output_dir / "model.safetensors.index.json", "w") as f: json.dump(index, f, indent=2) return weight_map def copy_config_files(src_dir: Path, dst_dir: Path): """Copy config files from source to destination.""" config_files = [ "config.json", "tokenizer.json", "tokenizer_config.json", "special_tokens_map.json", "generation_config.json", "preprocessor_config.json", "chat_template.json", ] for filename in config_files: src_file = src_dir / filename if src_file.exists(): shutil.copy2(src_file, dst_dir / filename) print(f"Copied {filename}") def main(): parser = argparse.ArgumentParser( description="Graft INTELLECT-3 weights into GLM-4.6V" ) parser.add_argument( "--intellect3", type=Path, default=Path.home() / "models" / "INTELLECT-3", help="Path to INTELLECT-3 model directory" ) parser.add_argument( "--glm", type=Path, default=Path.home() / "models" / "GLM-4.6V", help="Path to GLM-4.6V model directory" ) parser.add_argument( "--output", type=Path, default=Path.home() / "models" / "INTELLECT-3-V", help="Path to output directory" ) parser.add_argument( "--shard-size", type=int, default=5, help="Maximum shard size in GB (default: 5)" ) args = parser.parse_args() print("=" * 60) print("INTELLECT-3 -> GLM-4.6V Weight Grafting") print("=" * 60) print(f"INTELLECT-3 source: {args.intellect3}") print(f"GLM-4.6V source: {args.glm}") print(f"Output directory: {args.output}") print("=" * 60) # Verify source directories exist if not args.intellect3.exists(): raise FileNotFoundError(f"INTELLECT-3 directory not found: {args.intellect3}") if not args.glm.exists(): raise FileNotFoundError(f"GLM-4.6V directory not found: {args.glm}") # Load both models print("\nStep 1: Loading models into CPU memory...") intellect3_state = load_state_dict_from_safetensors(args.intellect3) glm_state = load_state_dict_from_safetensors(args.glm) print(f"\nINTELLECT-3 tensors: {len(intellect3_state)}") print(f"GLM-4.6V tensors: {len(glm_state)}") # Graft weights print("\nStep 2: Grafting INTELLECT-3 weights into GLM-4.6V...") grafted_state = graft_weights(intellect3_state, glm_state) # Free memory from source models del intellect3_state del glm_state # Save grafted model print("\nStep 3: Saving grafted model...") save_state_dict_to_safetensors( grafted_state, args.output, max_shard_size=args.shard_size * 1024 ** 3 ) # Copy config files from GLM-4.6V (since we're keeping its architecture) print("\nStep 4: Copying config files from GLM-4.6V...") copy_config_files(args.glm, args.output) print("\n" + "=" * 60) print("Done! Grafted model saved to:", args.output) print("=" * 60) if __name__ == "__main__": main()