File size: 1,801 Bytes
b89eaa0 84eec70 00f6365 b89eaa0 84eec70 00f6365 b89eaa0 00f6365 b89eaa0 00f6365 b89eaa0 00f6365 b89eaa0 84eec70 b89eaa0 00f6365 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
Code for creating the tiny model:
```python
import torch
torch.set_default_dtype(torch.float32)
import os
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, set_seed
# === Settings ===
model_id = "microsoft/Phi-4-mini-instruct"
output_dir = "phi-4-mini-tiny-random"
set_seed(0)
# === Step 1: Define tiny model config ===
config = AutoConfig.from_pretrained(model_id)
# the "originally" values are for Phi-4-mini-instruct
config.num_hidden_layers = 4 # originally 32
config.num_attention_heads = 4 # originally 24
config.num_key_value_heads = 2 # originally 8
config.hidden_size = 64 # originally 3072, this has the largest influence on model size
config.intermediate_size = 256 # originally 8192; MLP layer
config.initializer_range = 0.1 # originally 0.02; without this change, phi-4-mini model outputs collapse with larger inputs
# Keep 6 RoPE factors (originally 48). Adjust config.hidden_size when adjusting this.
if config.rope_scaling:
config.rope_scaling["short_factor"] = config.rope_scaling["short_factor"][::8]
config.rope_scaling["long_factor"] = config.rope_scaling["long_factor"][::8]
# === Step 2: Create model from config ===
model = AutoModelForCausalLM.from_config(config)
# Increase variance in final layer to prevent outputs collapsing to a single token and OpenVINO/PyTorch differences
# Whether it is needed depends on model and device - this improves reliability of the tiny model across devices
with torch.no_grad():
model.lm_head.weight.normal_(mean=0.0, std=0.2)
# === Step 3: Load or create tokenizer ===
tokenizer = AutoTokenizer.from_pretrained(model_id)
# === Step 4: Save model and tokenizer ===
os.makedirs(output_dir, exist_ok=True)
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
``` |