""" Initialize Deeplm model with config and BitNet quantization, save to safetensors. """ import sys import os import json import torch # Add deeplm to path sys.path.insert(0, os.path.join(os.path.dirname(__file__), "deeplm")) from deeplm.config import DeeplmConfig from deeplm.model.deeplm import DeeplmModel from deeplm.quantization.bitnet_quantize import apply_bitnet_quantization def main(): print("Building DeeplmConfig...") config = DeeplmConfig( vocab_size=32000, max_seq_length=4096, dtype="float32", ) config.architecture.num_layers = 10 config.architecture.hidden_size = 512 config.architecture.intermediate_size = 2048 config.architecture.num_attention_heads = 8 config.architecture.num_key_value_heads = 1 config.architecture.head_dim = 128 config.architecture.rope_head_dim = 64 config.architecture.nope_head_dim = 64 config.architecture.max_seq_length = 4096 config.architecture.rope_theta = 50000.0 config.mla.q_lora_rank = 192 config.mla.kv_lora_rank = 64 config.mla.qk_rope_head_dim = 64 config.mla.qk_nope_head_dim = 64 config.mla.v_head_dim = 128 config.mla.num_heads = 8 config.mla.kv_heads = 1 config.moe.num_routed_experts = 4 config.moe.num_shared_experts = 1 config.moe.top_k = 2 config.mtp.num_mtp_layers = 2 config.mtp.mtp_depth = 2 config.mtp.mtp_hidden_size = 512 config.output_heads.lm_head.type = "tied" config.output_heads.lm_head.bias = False print(f"Creating DeeplmModel...") model = DeeplmModel(config) total_params = model.num_parameters() print(f"Total parameters: {total_params:,}") print("Applying BitNet b1.58 ternary quantization (absmean)...") stats = apply_bitnet_quantization(model, scale="absmean", verbose=True) print(f"Quantized {stats['quantized']}/{stats['total_linear']} linear layers") print("Saving to model.safetensors...") from safetensors.torch import save_file state_dict = model.state_dict() save_file(state_dict, "model.safetensors") # Save config.json config_json = { "architectures": ["DeeplmModel"], "model_type": "deeplm", "vocab_size": 32000, "hidden_size": 512, "intermediate_size": 2048, "num_hidden_layers": 10, "num_attention_heads": 8, "num_key_value_heads": 1, "max_position_embeddings": 4096, "rms_norm_eps": 1e-06, "rope_theta": 50000.0, "rope_dim": 64, "tie_word_embeddings": True, "num_routed_experts": 4, "num_shared_experts": 1, "expert_topk": 2, "q_lora_rank": 192, "kv_lora_rank": 64, "qk_rope_head_dim": 64, "qk_nope_head_dim": 64, "v_head_dim": 128, "mtp_depth": 2, "mtp_num_layers": 2, "bitnet_quantized": True, "bitnet_scale": "absmean", } with open("config.json", "w") as f: json.dump(config_json, f, indent=2) print("Saved config.json") # Save generation_config.json gen_config = { "max_new_tokens": 512, "do_sample": True, "temperature": 0.7, "top_p": 0.9, "top_k": 50, "repetition_penalty": 1.1, "pad_token_id": 0, "eos_token_id": 2, "bos_token_id": 1, } with open("generation_config.json", "w") as f: json.dump(gen_config, f, indent=2) print("Saved generation_config.json") print("Done!") if __name__ == "__main__": main()