samcheng0
/

deeplm-108m

Safetensors

deeplm

Model card Files Files and versions

xet

Community

samcheng0 commited on 6 days ago

Commit

68f155a

verified ·

1 Parent(s): fe7d2c6

Upload init_model.py with huggingface_hub

Browse files

Files changed (1) hide show

init_model.py +119 -0

init_model.py ADDED Viewed

	@@ -0,0 +1,119 @@

+"""
+Initialize Deeplm model with config and BitNet quantization, save to safetensors.
+"""
+import sys
+import os
+import json
+import torch
+# Add deeplm to path
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "deeplm"))
+from deeplm.config import DeeplmConfig
+from deeplm.model.deeplm import DeeplmModel
+from deeplm.quantization.bitnet_quantize import apply_bitnet_quantization
+def main():
+    print("Building DeeplmConfig...")
+    config = DeeplmConfig(
+        vocab_size=32000,
+        max_seq_length=4096,
+        dtype="float32",
+    )
+    config.architecture.num_layers = 10
+    config.architecture.hidden_size = 512
+    config.architecture.intermediate_size = 2048
+    config.architecture.num_attention_heads = 8
+    config.architecture.num_key_value_heads = 1
+    config.architecture.head_dim = 128
+    config.architecture.rope_head_dim = 64
+    config.architecture.nope_head_dim = 64
+    config.architecture.max_seq_length = 4096
+    config.architecture.rope_theta = 50000.0
+    config.mla.q_lora_rank = 192
+    config.mla.kv_lora_rank = 64
+    config.mla.qk_rope_head_dim = 64
+    config.mla.qk_nope_head_dim = 64
+    config.mla.v_head_dim = 128
+    config.mla.num_heads = 8
+    config.mla.kv_heads = 1
+    config.moe.num_routed_experts = 4
+    config.moe.num_shared_experts = 1
+    config.moe.top_k = 2
+    config.mtp.num_mtp_layers = 2
+    config.mtp.mtp_depth = 2
+    config.mtp.mtp_hidden_size = 512
+    config.output_heads.lm_head.type = "tied"
+    config.output_heads.lm_head.bias = False
+    print(f"Creating DeeplmModel...")
+    model = DeeplmModel(config)
+    total_params = model.num_parameters()
+    print(f"Total parameters: {total_params:,}")
+    print("Applying BitNet b1.58 ternary quantization (absmean)...")
+    stats = apply_bitnet_quantization(model, scale="absmean", verbose=True)
+    print(f"Quantized {stats['quantized']}/{stats['total_linear']} linear layers")
+    print("Saving to model.safetensors...")
+    from safetensors.torch import save_file
+    state_dict = model.state_dict()
+    save_file(state_dict, "model.safetensors")
+    # Save config.json
+    config_json = {
+        "architectures": ["DeeplmModel"],
+        "model_type": "deeplm",
+        "vocab_size": 32000,
+        "hidden_size": 512,
+        "intermediate_size": 2048,
+        "num_hidden_layers": 10,
+        "num_attention_heads": 8,
+        "num_key_value_heads": 1,
+        "max_position_embeddings": 4096,
+        "rms_norm_eps": 1e-06,
+        "rope_theta": 50000.0,
+        "rope_dim": 64,
+        "tie_word_embeddings": True,
+        "num_routed_experts": 4,
+        "num_shared_experts": 1,
+        "expert_topk": 2,
+        "q_lora_rank": 192,
+        "kv_lora_rank": 64,
+        "qk_rope_head_dim": 64,
+        "qk_nope_head_dim": 64,
+        "v_head_dim": 128,
+        "mtp_depth": 2,
+        "mtp_num_layers": 2,
+        "bitnet_quantized": True,
+        "bitnet_scale": "absmean",
+    }
+    with open("config.json", "w") as f:
+        json.dump(config_json, f, indent=2)
+    print("Saved config.json")
+    # Save generation_config.json
+    gen_config = {
+        "max_new_tokens": 512,
+        "do_sample": True,
+        "temperature": 0.7,
+        "top_p": 0.9,
+        "top_k": 50,
+        "repetition_penalty": 1.1,
+        "pad_token_id": 0,
+        "eos_token_id": 2,
+        "bos_token_id": 1,
+    }
+    with open("generation_config.json", "w") as f:
+        json.dump(gen_config, f, indent=2)
+    print("Saved generation_config.json")
+    print("Done!")
+if __name__ == "__main__":
+    main()