luozhangzichen
/

neon213

physics-informed

Model card Files Files and versions

neon213 / convert.py

luozhangzichen's picture

Upload folder using huggingface_hub

ba0f545 verified about 1 month ago

history blame contribute delete

1.02 kB

	import torch
	from safetensors.torch import save_file
	import os

	files = ["neon213_muon_sota_fp16.pth"]
	target_map = {"neon213_muon_sota_fp16.pth": "model.safetensors"}

	for f in files:
	if not os.path.exists(f):
	continue

	print(f"Loading {f}...")
	state_dict = torch.load(f, map_location="cpu", weights_only=True)

	# Check for tied weights (common in Neon/Qwen architectures)
	# If the head and embedding share memory, we remove the head to save
	if "token_emb.weight" in state_dict and "head.weight" in state_dict:
	# Check if they are actually the same tensor
	if state_dict["token_emb.weight"].data_ptr() == state_dict["head.weight"].data_ptr():
	print("Detected tied weights (token_emb and head). Removing duplicate for safetensors.")
	del state_dict["head.weight"]

	# Rename to .safetensors
	target = target_map.get(f, f.replace(".pth", ".safetensors"))
	save_file(state_dict, target)
	print(f"Successfully created {target}\n")