CMSManhattan
/

LlamaRoboticsTokenizer

Mixture of Experts

Model card Files Files and versions

LlamaRoboticsTokenizer / copy_weights.py

kgrabko's picture

Update copy_weights.py

ceb5faa verified about 2 months ago

History Blame Contribute Delete

2.64 kB

	# =============================================================================
	# COPYRIGHT © 2025-2026 Konstantin Vladimirovich Grabko. ALL RIGHTS RESERVED.
	# CMS Manhattan JiRack Technology — PATENT PENDING
	#
	# This code is proprietary.
	# Personal and non-commercial research use is allowed.
	# Any commercial use, derivative works for profit, or distribution
	# requires a paid license and 5% royalty.
	#
	# Unauthorized commercial use is strictly prohibited.
	# Contact: grabko@cmsmanhattan.com
	# =============================================================================
	#
	# Model updated for new last tokenizer version
	#
	# Replace toknizer in current model: Just use the class and call resize function in your train script
	# New model: Use our conversion script to rapidly initialize new models by copying existing embeddings and LM_head weights. This enables fast model bootstrapping, or alternatively, provides the foundation to train a new model entirely from scratch.
	#
	# =============================================================================


	import torch
	from JiRackTernary_new import JiRackConfig, JiRackTernary1B
	from transformers import AutoTokenizer
	from safetensors.torch import load_file, save_file
	import os

	print("🚀 Copying embeddings and lm_head...")

	old_model_path = "."
	new_tokenizer_path = "./jirack_code_tokenizer"
	save_path = "./JiRack_init_model_with_new_vocab"

	os.makedirs(save_path, exist_ok=True)

	# Load new tokenizer
	tokenizer = AutoTokenizer.from_pretrained(new_tokenizer_path)
	new_vocab_size = len(tokenizer)

	print(f"New vocab size: {new_vocab_size}")

	# Create new model
	config = JiRackConfig()
	model = JiRackTernary1B(config)

	# Load old model (SafeTensors)
	old_state = load_file(f"{old_model_path}/model.safetensors")

	old_vocab_size = 128256

	with torch.no_grad():
	# Copy old weights
	model.token_emb.weight[:old_vocab_size] = old_state['token_emb.weight'][:old_vocab_size].clone()
	model.lm_head.weight[:old_vocab_size] = old_state['lm_head.weight'][:old_vocab_size].clone()

	# Initialize new 3 tokens (FIM) with mean value
	mean_emb = old_state['token_emb.weight'].mean(dim=0)
	model.token_emb.weight[old_vocab_size:] = mean_emb
	model.lm_head.weight[old_vocab_size:] = mean_emb

	print(f"✅ Copied {old_vocab_size} tokens")
	print(f"✅ Initialized {new_vocab_size - old_vocab_size} new tokens")

	# Save in SafeTensors
	save_file(model.state_dict(), f"{save_path}/model.safetensors")
	tokenizer.save_pretrained(save_path)

	print(f"\n🎉 Done! New model saved to: {save_path}")
	print("Use this folder as the starting weights for training from scratch.")