helenai commited on
Commit
84eec70
·
verified ·
1 Parent(s): 74a089b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +19 -17
README.md CHANGED
@@ -1,40 +1,42 @@
1
  Code for creating the tiny model:
2
 
3
  ```python
4
- """
5
- Create tiny random phi-4-mini-instruct model which preserves longrope
6
- """
7
-
8
- import os
9
  import torch
10
 
11
  torch.set_default_dtype(torch.float32)
12
 
13
- from transformers import AutoTokenizer, AutoConfig, Phi3ForCausalLM, set_seed
14
- from optimum.intel import OVModelForCausalLM
 
15
 
 
16
  model_id = "microsoft/Phi-4-mini-instruct"
17
  output_dir = "phi-4-mini-tiny-random"
18
- ov_output_dir = output_dir + "-ov"
19
 
20
  set_seed(0)
21
 
22
  # === Step 1: Define tiny model config ===
23
  config = AutoConfig.from_pretrained(model_id)
24
- config.num_hidden_layers = 4
25
- config.num_attention_heads = 4
26
- config.num_key_value_heads = 2
27
- config.hidden_size = 64
28
- config.intermediate_size = 128
29
- config.initializer_range = 0.1
30
 
 
 
 
 
 
 
 
31
 
 
32
  if config.rope_scaling:
33
- config.rope_scaling['short_factor'] = config.rope_scaling['short_factor'][::8]
34
- config.rope_scaling['long_factor'] = config.rope_scaling['long_factor'][::8]
35
 
36
  # === Step 2: Create model from config ===
37
- model = Phi3ForCausalLM(config)
 
 
 
 
38
 
39
  # === Step 3: Load or create tokenizer ===
40
  tokenizer = AutoTokenizer.from_pretrained(model_id)
 
1
  Code for creating the tiny model:
2
 
3
  ```python
 
 
 
 
 
4
  import torch
5
 
6
  torch.set_default_dtype(torch.float32)
7
 
8
+ import os
9
+
10
+ from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, set_seed
11
 
12
+ # === Settings ===
13
  model_id = "microsoft/Phi-4-mini-instruct"
14
  output_dir = "phi-4-mini-tiny-random"
 
15
 
16
  set_seed(0)
17
 
18
  # === Step 1: Define tiny model config ===
19
  config = AutoConfig.from_pretrained(model_id)
 
 
 
 
 
 
20
 
21
+ # the "originally" values are for Phi-4-mini-instruct
22
+ config.num_hidden_layers = 4 # originally 32
23
+ config.num_attention_heads = 4 # originally 24
24
+ config.num_key_value_heads = 2 # originally 8
25
+ config.hidden_size = 64 # originally 3072, this has the largest influence on model size
26
+ config.intermediate_size = 256 # originally 8192; MLP layer
27
+ config.initializer_range = 0.1 # originally 0.02; without this change, phi-4-mini model outputs collapse with larger inputs
28
 
29
+ # Keep 6 RoPE factors (originally 48). Adjust config.hidden_size when adjusting this.
30
  if config.rope_scaling:
31
+ config.rope_scaling["short_factor"] = config.rope_scaling["short_factor"][::8]
32
+ config.rope_scaling["long_factor"] = config.rope_scaling["long_factor"][::8]
33
 
34
  # === Step 2: Create model from config ===
35
+ model = AutoModelForCausalLM.from_config(config)
36
+ # Increase variance in final layer to prevent outputs collapsing to a single token and OpenVINO/PyTorch differences
37
+ # Whether it is needed depends on model and device - this improves reliability of the tiny model across devices
38
+ with torch.no_grad():
39
+ model.lm_head.weight.normal_(mean=0.0, std=0.2)
40
 
41
  # === Step 3: Load or create tokenizer ===
42
  tokenizer = AutoTokenizer.from_pretrained(model_id)