elephantmipt commited on
Commit
0f0c709
·
verified ·
1 Parent(s): 3503902

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +15 -17
config.json CHANGED
@@ -7,14 +7,8 @@
7
  "_target_": "aicl.model.models.mixers.linear_attention.TaylorExp",
8
  "input_dim": 16
9
  },
10
- "l_max": 32768,
11
- "num_heads": 16
12
- },
13
- "alt_mixer_2": {
14
- "_target_": "aicl.model.models.mixers.slide_attention.SlidingAttention",
15
- "causal": true,
16
- "num_heads": 16,
17
- "window_size": 128
18
  },
19
  "alt_mixer_2_layers": [
20
  2,
@@ -25,10 +19,14 @@
25
  ],
26
  "alt_mixer_layers": [
27
  1,
28
- 6,
 
 
 
29
  11,
30
- 16,
31
- 21
 
32
  ],
33
  "attn_pdrop": 0,
34
  "bos_token_id": 50256,
@@ -43,16 +41,16 @@
43
  "_target_": "aicl.model.models.mixers.convolution.BaseConv",
44
  "expand_proj": 4,
45
  "kernel_sizes": 3,
46
- "l_max": 32768,
47
  "use_bias": true
48
  },
49
  "mlp_fc1_bias": false,
50
  "mlp_fc2_bias": false,
51
  "model_type": "gpt2",
52
- "n_embd": 1024,
53
- "n_head": 16,
54
- "n_inner": 2048,
55
- "n_layer": 27,
56
  "n_positions": 0,
57
  "out_proj_bias": false,
58
  "pad_vocab_size_multiple": 8,
@@ -73,5 +71,5 @@
73
  "transformers_version": "4.38.2",
74
  "use_cache": true,
75
  "use_flash_attn": true,
76
- "vocab_size": 50280
77
  }
 
7
  "_target_": "aicl.model.models.mixers.linear_attention.TaylorExp",
8
  "input_dim": 16
9
  },
10
+ "l_max": 2048,
11
+ "num_heads": 12
 
 
 
 
 
 
12
  },
13
  "alt_mixer_2_layers": [
14
  2,
 
19
  ],
20
  "alt_mixer_layers": [
21
  1,
22
+ 3,
23
+ 5,
24
+ 7,
25
+ 9,
26
  11,
27
+ 13,
28
+ 15,
29
+ 17
30
  ],
31
  "attn_pdrop": 0,
32
  "bos_token_id": 50256,
 
41
  "_target_": "aicl.model.models.mixers.convolution.BaseConv",
42
  "expand_proj": 4,
43
  "kernel_sizes": 3,
44
+ "l_max": 2048,
45
  "use_bias": true
46
  },
47
  "mlp_fc1_bias": false,
48
  "mlp_fc2_bias": false,
49
  "model_type": "gpt2",
50
+ "n_embd": 768,
51
+ "n_head": 12,
52
+ "n_inner": 1536,
53
+ "n_layer": 18,
54
  "n_positions": 0,
55
  "out_proj_bias": false,
56
  "pad_vocab_size_multiple": 8,
 
71
  "transformers_version": "4.38.2",
72
  "use_cache": true,
73
  "use_flash_attn": true,
74
+ "vocab_size": 50277
75
  }