File size: 2,498 Bytes
01938b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import os
import argparse

def fix_model(input_path, output_path):
    print(f"Loading model from {input_path}...")
    print("This may take a moment as we load it into RAM...")
    
    # Load the model in bfloat16 to match the target format
    model = AutoModelForCausalLM.from_pretrained(
        input_path,
        torch_dtype=torch.bfloat16,
        device_map="cpu", # Load to CPU to avoid VRAM OOM during save
        trust_remote_code=True
    )
    
    # Load tokenizer to ensure it carries over
    try:
        tokenizer = AutoTokenizer.from_pretrained(input_path)
    except:
        print("Warning: Could not load tokenizer. You may need to copy it manually.")
        tokenizer = None

    print("Forcing weight tying...")
    # This is the magic command. It tells HF to treat embed_tokens and lm_head as the same object.
    model.tie_weights()
    
    # --- THE FIX FOR YOUR ERROR ---
    # The error happens because the model config has conflicting settings.
    # We force use_cache to True to satisfy the 'hybrid' cache implementation requirement.
    print("Fixing Generation Config conflicts...")
    model.config.use_cache = True
    if model.generation_config is not None:
        model.generation_config.use_cache = True
    # ------------------------------

    print(f"Saving fixed model to {output_path}...")
    # max_shard_size="5GB" creates the standard 4-5 shard layout usually seen in 17.2GB models
    model.save_pretrained(
        output_path, 
        safe_serialization=True, 
        max_shard_size="5GB" 
    )
    
    if tokenizer:
        tokenizer.save_pretrained(output_path)

    print("Done! The model should now be ~17.2GB and merge-compatible.")

if __name__ == "__main__":
    # --- CONFIGURATION ---
    # Use 'r' before the string to handle backslashes correctly on Windows
    
    # 1. Where is the 18.9GB model? (Current folder)
    input_model_path = r"A:\LLM\.cache\huggingface\hub\!models--sam-paech--Darkest-muse-v1"
    
    # 2. Where do you want the fixed 17.2GB model?
    output_model_path = r"A:\LLM\.cache\huggingface\hub\!models--sam-paech--Darkest-muse-v1\fixed"
    # ---------------------

    import os
    if not os.path.exists(input_model_path):
        print(f"Error: Input path '{input_model_path}' does not exist.")
    else:
        fix_model(input_model_path, output_model_path)