samcheng0
/

deeplm-108m

Text Generation

Mixture of Experts

Model card Files Files and versions

deeplm-108m / init_model.py

samcheng0's picture

Upload init_model.py with huggingface_hub

68f155a verified 1 day ago

history blame contribute delete

3.54 kB

	"""
	Initialize Deeplm model with config and BitNet quantization, save to safetensors.
	"""
	import sys
	import os
	import json
	import torch

	# Add deeplm to path
	sys.path.insert(0, os.path.join(os.path.dirname(__file__), "deeplm"))

	from deeplm.config import DeeplmConfig
	from deeplm.model.deeplm import DeeplmModel
	from deeplm.quantization.bitnet_quantize import apply_bitnet_quantization

	def main():
	print("Building DeeplmConfig...")
	config = DeeplmConfig(
	vocab_size=32000,
	max_seq_length=4096,
	dtype="float32",
	)
	config.architecture.num_layers = 10
	config.architecture.hidden_size = 512
	config.architecture.intermediate_size = 2048
	config.architecture.num_attention_heads = 8
	config.architecture.num_key_value_heads = 1
	config.architecture.head_dim = 128
	config.architecture.rope_head_dim = 64
	config.architecture.nope_head_dim = 64
	config.architecture.max_seq_length = 4096
	config.architecture.rope_theta = 50000.0

	config.mla.q_lora_rank = 192
	config.mla.kv_lora_rank = 64
	config.mla.qk_rope_head_dim = 64
	config.mla.qk_nope_head_dim = 64
	config.mla.v_head_dim = 128
	config.mla.num_heads = 8
	config.mla.kv_heads = 1

	config.moe.num_routed_experts = 4
	config.moe.num_shared_experts = 1
	config.moe.top_k = 2

	config.mtp.num_mtp_layers = 2
	config.mtp.mtp_depth = 2
	config.mtp.mtp_hidden_size = 512

	config.output_heads.lm_head.type = "tied"
	config.output_heads.lm_head.bias = False

	print(f"Creating DeeplmModel...")
	model = DeeplmModel(config)

	total_params = model.num_parameters()
	print(f"Total parameters: {total_params:,}")

	print("Applying BitNet b1.58 ternary quantization (absmean)...")
	stats = apply_bitnet_quantization(model, scale="absmean", verbose=True)
	print(f"Quantized {stats['quantized']}/{stats['total_linear']} linear layers")

	print("Saving to model.safetensors...")
	from safetensors.torch import save_file
	state_dict = model.state_dict()
	save_file(state_dict, "model.safetensors")

	# Save config.json
	config_json = {
	"architectures": ["DeeplmModel"],
	"model_type": "deeplm",
	"vocab_size": 32000,
	"hidden_size": 512,
	"intermediate_size": 2048,
	"num_hidden_layers": 10,
	"num_attention_heads": 8,
	"num_key_value_heads": 1,
	"max_position_embeddings": 4096,
	"rms_norm_eps": 1e-06,
	"rope_theta": 50000.0,
	"rope_dim": 64,
	"tie_word_embeddings": True,
	"num_routed_experts": 4,
	"num_shared_experts": 1,
	"expert_topk": 2,
	"q_lora_rank": 192,
	"kv_lora_rank": 64,
	"qk_rope_head_dim": 64,
	"qk_nope_head_dim": 64,
	"v_head_dim": 128,
	"mtp_depth": 2,
	"mtp_num_layers": 2,
	"bitnet_quantized": True,
	"bitnet_scale": "absmean",
	}
	with open("config.json", "w") as f:
	json.dump(config_json, f, indent=2)
	print("Saved config.json")

	# Save generation_config.json
	gen_config = {
	"max_new_tokens": 512,
	"do_sample": True,
	"temperature": 0.7,
	"top_p": 0.9,
	"top_k": 50,
	"repetition_penalty": 1.1,
	"pad_token_id": 0,
	"eos_token_id": 2,
	"bos_token_id": 1,
	}
	with open("generation_config.json", "w") as f:
	json.dump(gen_config, f, indent=2)
	print("Saved generation_config.json")

	print("Done!")

	if __name__ == "__main__":
	main()