import torch from transformers import AutoModelForCausalLM, AutoTokenizer import os import argparse def fix_model(input_path, output_path): print(f"Loading model from {input_path}...") print("This may take a moment as we load it into RAM...") # Load the model in bfloat16 to match the target format model = AutoModelForCausalLM.from_pretrained( input_path, torch_dtype=torch.bfloat16, device_map="cpu", # Load to CPU to avoid VRAM OOM during save trust_remote_code=True ) # Load tokenizer to ensure it carries over try: tokenizer = AutoTokenizer.from_pretrained(input_path) except: print("Warning: Could not load tokenizer. You may need to copy it manually.") tokenizer = None print("Forcing weight tying...") # This is the magic command. It tells HF to treat embed_tokens and lm_head as the same object. model.tie_weights() # --- THE FIX FOR YOUR ERROR --- # The error happens because the model config has conflicting settings. # We force use_cache to True to satisfy the 'hybrid' cache implementation requirement. print("Fixing Generation Config conflicts...") model.config.use_cache = True if model.generation_config is not None: model.generation_config.use_cache = True # ------------------------------ print(f"Saving fixed model to {output_path}...") # max_shard_size="5GB" creates the standard 4-5 shard layout usually seen in 17.2GB models model.save_pretrained( output_path, safe_serialization=True, max_shard_size="5GB" ) if tokenizer: tokenizer.save_pretrained(output_path) print("Done! The model should now be ~17.2GB and merge-compatible.") if __name__ == "__main__": # --- CONFIGURATION --- # Use 'r' before the string to handle backslashes correctly on Windows # 1. Where is the 18.9GB model? (Current folder) input_model_path = r"A:\LLM\.cache\huggingface\hub\!models--sam-paech--Darkest-muse-v1" # 2. Where do you want the fixed 17.2GB model? output_model_path = r"A:\LLM\.cache\huggingface\hub\!models--sam-paech--Darkest-muse-v1\fixed" # --------------------- import os if not os.path.exists(input_model_path): print(f"Error: Input path '{input_model_path}' does not exist.") else: fix_model(input_model_path, output_model_path)