Spaces:

Naphula
/

model_tools

Running

App Files Files Community

model_tools / lm_head_remover.py

Naphula

Upload lm_head_remover.py

01938b3 verified 3 months ago

raw

history blame contribute delete

2.5 kB

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import os
	import argparse

	def fix_model(input_path, output_path):
	print(f"Loading model from {input_path}...")
	print("This may take a moment as we load it into RAM...")

	# Load the model in bfloat16 to match the target format
	model = AutoModelForCausalLM.from_pretrained(
	input_path,
	torch_dtype=torch.bfloat16,
	device_map="cpu", # Load to CPU to avoid VRAM OOM during save
	trust_remote_code=True
	)

	# Load tokenizer to ensure it carries over
	try:
	tokenizer = AutoTokenizer.from_pretrained(input_path)
	except:
	print("Warning: Could not load tokenizer. You may need to copy it manually.")
	tokenizer = None

	print("Forcing weight tying...")
	# This is the magic command. It tells HF to treat embed_tokens and lm_head as the same object.
	model.tie_weights()

	# --- THE FIX FOR YOUR ERROR ---
	# The error happens because the model config has conflicting settings.
	# We force use_cache to True to satisfy the 'hybrid' cache implementation requirement.
	print("Fixing Generation Config conflicts...")
	model.config.use_cache = True
	if model.generation_config is not None:
	model.generation_config.use_cache = True
	# ------------------------------

	print(f"Saving fixed model to {output_path}...")
	# max_shard_size="5GB" creates the standard 4-5 shard layout usually seen in 17.2GB models
	model.save_pretrained(
	output_path,
	safe_serialization=True,
	max_shard_size="5GB"
	)

	if tokenizer:
	tokenizer.save_pretrained(output_path)

	print("Done! The model should now be ~17.2GB and merge-compatible.")

	if __name__ == "__main__":
	# --- CONFIGURATION ---
	# Use 'r' before the string to handle backslashes correctly on Windows

	# 1. Where is the 18.9GB model? (Current folder)
	input_model_path = r"A:\LLM\.cache\huggingface\hub\!models--sam-paech--Darkest-muse-v1"

	# 2. Where do you want the fixed 17.2GB model?
	output_model_path = r"A:\LLM\.cache\huggingface\hub\!models--sam-paech--Darkest-muse-v1\fixed"
	# ---------------------

	import os
	if not os.path.exists(input_model_path):
	print(f"Error: Input path '{input_model_path}' does not exist.")
	else:
	fix_model(input_model_path, output_model_path)