| Code for creating the tiny model: | |
| ```python | |
| import torch | |
| torch.set_default_dtype(torch.float32) | |
| import os | |
| from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, set_seed | |
| # === Settings === | |
| model_id = "microsoft/Phi-4-mini-instruct" | |
| output_dir = "phi-4-mini-tiny-random" | |
| set_seed(0) | |
| # === Step 1: Define tiny model config === | |
| config = AutoConfig.from_pretrained(model_id) | |
| # the "originally" values are for Phi-4-mini-instruct | |
| config.num_hidden_layers = 4 # originally 32 | |
| config.num_attention_heads = 4 # originally 24 | |
| config.num_key_value_heads = 2 # originally 8 | |
| config.hidden_size = 64 # originally 3072, this has the largest influence on model size | |
| config.intermediate_size = 256 # originally 8192; MLP layer | |
| config.initializer_range = 0.1 # originally 0.02; without this change, phi-4-mini model outputs collapse with larger inputs | |
| # Keep 6 RoPE factors (originally 48). Adjust config.hidden_size when adjusting this. | |
| if config.rope_scaling: | |
| config.rope_scaling["short_factor"] = config.rope_scaling["short_factor"][::8] | |
| config.rope_scaling["long_factor"] = config.rope_scaling["long_factor"][::8] | |
| # === Step 2: Create model from config === | |
| model = AutoModelForCausalLM.from_config(config) | |
| # Increase variance in final layer to prevent outputs collapsing to a single token and OpenVINO/PyTorch differences | |
| # Whether it is needed depends on model and device - this improves reliability of the tiny model across devices | |
| with torch.no_grad(): | |
| model.lm_head.weight.normal_(mean=0.0, std=0.2) | |
| # === Step 3: Load or create tokenizer === | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| # === Step 4: Save model and tokenizer === | |
| os.makedirs(output_dir, exist_ok=True) | |
| model.save_pretrained(output_dir) | |
| tokenizer.save_pretrained(output_dir) | |
| ``` |