--- library_name: transformers pipeline_tag: text-generation --- Random weights generated using script derived from `yujiepan/deepseek-v3-tiny-random`. ```python import os from pathlib import Path import torch import transformers from huggingface_hub import create_repo, upload_folder from transformers import (AutoConfig, AutoModelForCausalLM, AutoTokenizer, GenerationConfig, enable_full_determinism, pipeline, set_seed) model_id = "deepseek-ai/DeepSeek-V3" repo_id = "modularai/deepseek-v3-small-random" save_path = f"/home/ubuntu/mock-models/{repo_id}" deepseek_config = AutoConfig.from_pretrained("deepseek-ai/DeepSeek-V3") config = AutoConfig.from_pretrained(model_id, trust_remote_code=True) config.num_hidden_layers = 2 config.first_k_dense_replace = 1 # transformers has not supported the customized quantization config del config.quantization_config tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) tokenizer.save_pretrained(save_path) enable_full_determinism(seed=42) model = AutoModelForCausalLM.from_config( config, torch_dtype=torch.bfloat16, trust_remote_code=True, ) try: model.generation_config = GenerationConfig.from_pretrained( model_id, trust_remote_code=True) except: print("No generation config found") # This fixes the NaN values model.model.layers[1].mlp.gate.e_score_correction_bias = torch.nn.Parameter( torch.randn_like( model.model.layers[1].mlp.gate.e_score_correction_bias) * 1e-2) num_params = 0 with torch.no_grad(): for name, p in sorted(model.named_parameters()): if 'experts' in name and 'experts.0.' not in name: # avoid printing too much pass else: print(name, p.shape) # torch.nn.init.uniform_(p, -0.2, 0.2) num_params += p.numel() print(f"Number of parameters: {num_params / 1e6:.2f}M") model.save_pretrained(save_path) # patch to use official modeling codes auto_map = config.auto_map import json with open(f"{save_path}/config.json", "r") as f: config_json = json.load(f) config_json['auto_map'] = auto_map with open(f"{save_path}/config.json", "w") as f: json.dump(config_json, f, indent=2) ! cat {save_path}/config.json del model del tokenizer for p in Path(save_path).glob("*.py"): os.remove(p) os.system(f"ls -alh {save_path}") torch.use_deterministic_algorithms(False) ```