File size: 3,539 Bytes

68f155a

"""
Initialize Deeplm model with config and BitNet quantization, save to safetensors.
"""
import sys
import os
import json
import torch

# Add deeplm to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "deeplm"))

from deeplm.config import DeeplmConfig
from deeplm.model.deeplm import DeeplmModel
from deeplm.quantization.bitnet_quantize import apply_bitnet_quantization

def main():
    print("Building DeeplmConfig...")
    config = DeeplmConfig(
        vocab_size=32000,
        max_seq_length=4096,
        dtype="float32",
    )
    config.architecture.num_layers = 10
    config.architecture.hidden_size = 512
    config.architecture.intermediate_size = 2048
    config.architecture.num_attention_heads = 8
    config.architecture.num_key_value_heads = 1
    config.architecture.head_dim = 128
    config.architecture.rope_head_dim = 64
    config.architecture.nope_head_dim = 64
    config.architecture.max_seq_length = 4096
    config.architecture.rope_theta = 50000.0

    config.mla.q_lora_rank = 192
    config.mla.kv_lora_rank = 64
    config.mla.qk_rope_head_dim = 64
    config.mla.qk_nope_head_dim = 64
    config.mla.v_head_dim = 128
    config.mla.num_heads = 8
    config.mla.kv_heads = 1

    config.moe.num_routed_experts = 4
    config.moe.num_shared_experts = 1
    config.moe.top_k = 2

    config.mtp.num_mtp_layers = 2
    config.mtp.mtp_depth = 2
    config.mtp.mtp_hidden_size = 512

    config.output_heads.lm_head.type = "tied"
    config.output_heads.lm_head.bias = False

    print(f"Creating DeeplmModel...")
    model = DeeplmModel(config)

    total_params = model.num_parameters()
    print(f"Total parameters: {total_params:,}")

    print("Applying BitNet b1.58 ternary quantization (absmean)...")
    stats = apply_bitnet_quantization(model, scale="absmean", verbose=True)
    print(f"Quantized {stats['quantized']}/{stats['total_linear']} linear layers")

    print("Saving to model.safetensors...")
    from safetensors.torch import save_file
    state_dict = model.state_dict()
    save_file(state_dict, "model.safetensors")

    # Save config.json
    config_json = {
        "architectures": ["DeeplmModel"],
        "model_type": "deeplm",
        "vocab_size": 32000,
        "hidden_size": 512,
        "intermediate_size": 2048,
        "num_hidden_layers": 10,
        "num_attention_heads": 8,
        "num_key_value_heads": 1,
        "max_position_embeddings": 4096,
        "rms_norm_eps": 1e-06,
        "rope_theta": 50000.0,
        "rope_dim": 64,
        "tie_word_embeddings": True,
        "num_routed_experts": 4,
        "num_shared_experts": 1,
        "expert_topk": 2,
        "q_lora_rank": 192,
        "kv_lora_rank": 64,
        "qk_rope_head_dim": 64,
        "qk_nope_head_dim": 64,
        "v_head_dim": 128,
        "mtp_depth": 2,
        "mtp_num_layers": 2,
        "bitnet_quantized": True,
        "bitnet_scale": "absmean",
    }
    with open("config.json", "w") as f:
        json.dump(config_json, f, indent=2)
    print("Saved config.json")

    # Save generation_config.json
    gen_config = {
        "max_new_tokens": 512,
        "do_sample": True,
        "temperature": 0.7,
        "top_p": 0.9,
        "top_k": 50,
        "repetition_penalty": 1.1,
        "pad_token_id": 0,
        "eos_token_id": 2,
        "bos_token_id": 1,
    }
    with open("generation_config.json", "w") as f:
        json.dump(gen_config, f, indent=2)
    print("Saved generation_config.json")

    print("Done!")

if __name__ == "__main__":
    main()