kathywu95
/

deepseek-v3-small-random

Text Generation

text-generation-inference

Model card Files Files and versions

deepseek-v3-small-random / README.md

kathywu95's picture

Upload folder using huggingface_hub

c8999f0 verified 5 months ago

|

history blame contribute delete

2.41 kB

	---
	library_name: transformers
	pipeline_tag: text-generation
	---

	Random weights generated using script derived from
	`yujiepan/deepseek-v3-tiny-random`.

	```python
	import os
	from pathlib import Path

	import torch
	import transformers
	from huggingface_hub import create_repo, upload_folder
	from transformers import (AutoConfig, AutoModelForCausalLM, AutoTokenizer,
	GenerationConfig, enable_full_determinism, pipeline,
	set_seed)

	model_id = "deepseek-ai/DeepSeek-V3"
	repo_id = "modularai/deepseek-v3-small-random"
	save_path = f"/home/ubuntu/mock-models/{repo_id}"

	deepseek_config = AutoConfig.from_pretrained("deepseek-ai/DeepSeek-V3")

	config = AutoConfig.from_pretrained(model_id, trust_remote_code=True)
	config.num_hidden_layers = 2
	config.first_k_dense_replace = 1

	# transformers has not supported the customized quantization config
	del config.quantization_config

	tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
	tokenizer.save_pretrained(save_path)

	enable_full_determinism(seed=42)
	model = AutoModelForCausalLM.from_config(
	config, torch_dtype=torch.bfloat16, trust_remote_code=True,
	)

	try:
	model.generation_config = GenerationConfig.from_pretrained(
	model_id, trust_remote_code=True)
	except:
	print("No generation config found")

	# This fixes the NaN values
	model.model.layers[1].mlp.gate.e_score_correction_bias = torch.nn.Parameter(
	torch.randn_like(
	model.model.layers[1].mlp.gate.e_score_correction_bias) * 1e-2)

	num_params = 0
	with torch.no_grad():
	for name, p in sorted(model.named_parameters()):
	if 'experts' in name and 'experts.0.' not in name: # avoid printing too much
	pass
	else:
	print(name, p.shape)
	# torch.nn.init.uniform_(p, -0.2, 0.2)
	num_params += p.numel()
	print(f"Number of parameters: {num_params / 1e6:.2f}M")
	model.save_pretrained(save_path)

	# patch to use official modeling codes
	auto_map = config.auto_map
	import json
	with open(f"{save_path}/config.json", "r") as f:
	config_json = json.load(f)
	config_json['auto_map'] = auto_map
	with open(f"{save_path}/config.json", "w") as f:
	json.dump(config_json, f, indent=2)

	! cat {save_path}/config.json

	del model
	del tokenizer
	for p in Path(save_path).glob("*.py"):
	os.remove(p)

	os.system(f"ls -alh {save_path}")
	torch.use_deterministic_algorithms(False)
	```