| |
| |
| |
| """Convert internlm2 weights to Llama format.""" |
|
|
| import json |
| import os |
| import einops |
| import tqdm |
| from mergekit.io import LazyTensorLoader, TensorWriter |
| from mergekit.common import ModelReference |
| from transformers import LlamaTokenizer |
|
|
| MODEL_IN = "internlm/internlm2-20b" |
| OUT_PATH = "./internlm2-20b-llama" |
|
|
| model_ref = ModelReference.parse(MODEL_IN) |
| cfg = model_ref.config(trust_remote_code=True) |
| head_dim = cfg.hidden_size // cfg.num_attention_heads |
| num_key_value_groups = cfg.num_attention_heads // cfg.num_key_value_heads |
| loader = LazyTensorLoader(model_ref.tensor_index(), lazy_unpickle=True) |
| writer = TensorWriter(OUT_PATH) |
|
|
| SIMPLE_REPLACEMENTS = { |
| "feed_forward.w1": "mlp.gate_proj", |
| "feed_forward.w2": "mlp.down_proj", |
| "feed_forward.w3": "mlp.up_proj", |
| "attention.wo": "self_attn.o_proj", |
| "ffn_norm": "post_attention_layernorm", |
| "attention_norm": "input_layernorm", |
| "tok_embeddings": "embed_tokens", |
| "output.weight": "lm_head.weight", |
| } |
|
|
| for tensor_name in tqdm.tqdm(loader.index.tensor_paths): |
| tensor = loader.get_tensor(tensor_name) |
| if "attention.wqkv" in tensor_name: |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| qkv_vecs = einops.rearrange( |
| tensor, "(h gs d) z -> h gs d z", gs=2 + num_key_value_groups, d=head_dim |
| ) |
| q_proj = ( |
| qkv_vecs[:, :num_key_value_groups, ...] |
| .reshape(-1, cfg.hidden_size) |
| .contiguous() |
| ) |
| k_proj = qkv_vecs[:, -2, ...].reshape(-1, cfg.hidden_size).contiguous() |
| v_proj = qkv_vecs[:, -1, ...].reshape(-1, cfg.hidden_size).contiguous() |
| assert k_proj.shape == v_proj.shape |
|
|
| writer.save_tensor( |
| tensor_name.replace("attention.wqkv", "self_attn.q_proj"), |
| q_proj, |
| clone=True, |
| ) |
| writer.save_tensor( |
| tensor_name.replace("attention.wqkv", "self_attn.k_proj"), |
| k_proj, |
| clone=True, |
| ) |
| writer.save_tensor( |
| tensor_name.replace("attention.wqkv", "self_attn.v_proj"), |
| v_proj, |
| clone=True, |
| ) |
| continue |
|
|
| out_name = tensor_name |
| for pattern, sub in SIMPLE_REPLACEMENTS.items(): |
| if pattern in out_name: |
| out_name = out_name.replace(pattern, sub) |
| writer.save_tensor(out_name, tensor) |
| writer.finalize() |
|
|
| cfg_dict = json.loads(cfg.to_json_string()) |
| del cfg_dict["auto_map"] |
| cfg_dict["architectures"] = ["LlamaForCausalLM"] |
| cfg_dict["model_type"] = "llama" |
| if "rope_scaling" in cfg_dict and cfg_dict["rope_scaling"]["factor"] == 1.0: |
| del cfg_dict["rope_scaling"] |
| with open(os.path.join(OUT_PATH, "config.json"), "w", encoding="utf-8") as fp: |
| json.dump(cfg_dict, fp, indent=2) |
|
|
| |
| |
| |
| |
| tok = LlamaTokenizer.from_pretrained(MODEL_IN, trust_remote_code=False, legacy=True) |
| tok.clean_up_tokenization_spaces = True |
| tok.save_pretrained(OUT_PATH) |
|
|