chargoddard
/

internlm2-7b-llama

Text Generation

text-generation-inference

Model card Files Files and versions

internlm2-7b-llama / convert_weights.py

chargoddard's picture

Update convert_weights.py

67517f8 verified over 2 years ago

history blame contribute delete

3.69 kB

	#!/usr/bin/env python3
	# 1/17/2024
	# Charles O. Goddard
	"""Convert internlm2 weights to Llama format."""

	import json
	import os
	import einops
	import tqdm
	from mergekit.io import LazyTensorLoader, TensorWriter
	from mergekit.common import ModelReference
	from transformers import LlamaTokenizer

	MODEL_IN = "internlm/internlm2-20b"
	OUT_PATH = "./internlm2-20b-llama"

	model_ref = ModelReference.parse(MODEL_IN)
	cfg = model_ref.config(trust_remote_code=True)
	head_dim = cfg.hidden_size // cfg.num_attention_heads
	num_key_value_groups = cfg.num_attention_heads // cfg.num_key_value_heads
	loader = LazyTensorLoader(model_ref.tensor_index(), lazy_unpickle=True)
	writer = TensorWriter(OUT_PATH)

	SIMPLE_REPLACEMENTS = {
	"feed_forward.w1": "mlp.gate_proj",
	"feed_forward.w2": "mlp.down_proj",
	"feed_forward.w3": "mlp.up_proj",
	"attention.wo": "self_attn.o_proj",
	"ffn_norm": "post_attention_layernorm",
	"attention_norm": "input_layernorm",
	"tok_embeddings": "embed_tokens",
	"output.weight": "lm_head.weight",
	}

	for tensor_name in tqdm.tqdm(loader.index.tensor_paths):
	tensor = loader.get_tensor(tensor_name)
	if "attention.wqkv" in tensor_name:
	# make me think about tensor shapes will you >:(

	# ((cfg.num_attention_heads + 2 * cfg.num_key_value_heads) * head_dim, cfg.hidden_size) x (batch_sz, sq_len, cfg.hidden_size)
	# -> (batch_sz, sq_len, (cfg.num_attention_heads + 2 * cfg.num_key_value_heads) * head_dim)
	# qkv_states = rearrange(
	# qkv_states,
	# "b q (h gs d) -> b q h gs d",
	# gs=2 + self.num_key_value_groups,
	# d=self.head_dim,
	# )
	# ->(batch_sz, sq_len, h, 2 + self.num_key_value_groups, head_dim)
	qkv_vecs = einops.rearrange(
	tensor, "(h gs d) z -> h gs d z", gs=2 + num_key_value_groups, d=head_dim
	)
	q_proj = (
	qkv_vecs[:, :num_key_value_groups, ...]
	.reshape(-1, cfg.hidden_size)
	.contiguous()
	)
	k_proj = qkv_vecs[:, -2, ...].reshape(-1, cfg.hidden_size).contiguous()
	v_proj = qkv_vecs[:, -1, ...].reshape(-1, cfg.hidden_size).contiguous()
	assert k_proj.shape == v_proj.shape

	writer.save_tensor(
	tensor_name.replace("attention.wqkv", "self_attn.q_proj"),
	q_proj,
	clone=True,
	)
	writer.save_tensor(
	tensor_name.replace("attention.wqkv", "self_attn.k_proj"),
	k_proj,
	clone=True,
	)
	writer.save_tensor(
	tensor_name.replace("attention.wqkv", "self_attn.v_proj"),
	v_proj,
	clone=True,
	)
	continue

	out_name = tensor_name
	for pattern, sub in SIMPLE_REPLACEMENTS.items():
	if pattern in out_name:
	out_name = out_name.replace(pattern, sub)
	writer.save_tensor(out_name, tensor)
	writer.finalize()

	cfg_dict = json.loads(cfg.to_json_string())
	del cfg_dict["auto_map"]
	cfg_dict["architectures"] = ["LlamaForCausalLM"]
	cfg_dict["model_type"] = "llama"
	if "rope_scaling" in cfg_dict and cfg_dict["rope_scaling"]["factor"] == 1.0:
	del cfg_dict["rope_scaling"]
	with open(os.path.join(OUT_PATH, "config.json"), "w", encoding="utf-8") as fp:
	json.dump(cfg_dict, fp, indent=2)

	# InternLMTokenizer differences:
	# 1. clean_up_tokenization() hardcoded to always be called
	# 2. might prepend a space to some tokens that LlamaTokenizer doesn't if they're the first token
	# 1 is easy to fix, 2... is not important
	tok = LlamaTokenizer.from_pretrained(MODEL_IN, trust_remote_code=False, legacy=True)
	tok.clean_up_tokenization_spaces = True
	tok.save_pretrained(OUT_PATH)