Spaces:
Running
Running
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import os | |
| import argparse | |
| def fix_model(input_path, output_path): | |
| print(f"Loading model from {input_path}...") | |
| print("This may take a moment as we load it into RAM...") | |
| # Load the model in bfloat16 to match the target format | |
| model = AutoModelForCausalLM.from_pretrained( | |
| input_path, | |
| torch_dtype=torch.bfloat16, | |
| device_map="cpu", # Load to CPU to avoid VRAM OOM during save | |
| trust_remote_code=True | |
| ) | |
| # Load tokenizer to ensure it carries over | |
| try: | |
| tokenizer = AutoTokenizer.from_pretrained(input_path) | |
| except: | |
| print("Warning: Could not load tokenizer. You may need to copy it manually.") | |
| tokenizer = None | |
| print("Forcing weight tying...") | |
| # This is the magic command. It tells HF to treat embed_tokens and lm_head as the same object. | |
| model.tie_weights() | |
| # --- THE FIX FOR YOUR ERROR --- | |
| # The error happens because the model config has conflicting settings. | |
| # We force use_cache to True to satisfy the 'hybrid' cache implementation requirement. | |
| print("Fixing Generation Config conflicts...") | |
| model.config.use_cache = True | |
| if model.generation_config is not None: | |
| model.generation_config.use_cache = True | |
| # ------------------------------ | |
| print(f"Saving fixed model to {output_path}...") | |
| # max_shard_size="5GB" creates the standard 4-5 shard layout usually seen in 17.2GB models | |
| model.save_pretrained( | |
| output_path, | |
| safe_serialization=True, | |
| max_shard_size="5GB" | |
| ) | |
| if tokenizer: | |
| tokenizer.save_pretrained(output_path) | |
| print("Done! The model should now be ~17.2GB and merge-compatible.") | |
| if __name__ == "__main__": | |
| # --- CONFIGURATION --- | |
| # Use 'r' before the string to handle backslashes correctly on Windows | |
| # 1. Where is the 18.9GB model? (Current folder) | |
| input_model_path = r"A:\LLM\.cache\huggingface\hub\!models--sam-paech--Darkest-muse-v1" | |
| # 2. Where do you want the fixed 17.2GB model? | |
| output_model_path = r"A:\LLM\.cache\huggingface\hub\!models--sam-paech--Darkest-muse-v1\fixed" | |
| # --------------------- | |
| import os | |
| if not os.path.exists(input_model_path): | |
| print(f"Error: Input path '{input_model_path}' does not exist.") | |
| else: | |
| fix_model(input_model_path, output_model_path) |