HALION-AI commited on
Commit
0efe372
·
verified ·
1 Parent(s): 958d219

Add model config for HelionX Base 300M

Browse files
Files changed (1) hide show
  1. config.json +12 -2
config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "model_type": "gpt2",
3
  "vocab_size": 50257,
4
  "n_positions": 2048,
@@ -6,14 +7,23 @@
6
  "n_embd": 896,
7
  "n_layer": 22,
8
  "n_head": 14,
9
- "activation_function": "gelu",
 
10
  "resid_pdrop": 0.0,
11
  "embd_pdrop": 0.0,
12
  "attn_pdrop": 0.0,
13
  "layer_norm_epsilon": 1e-5,
14
  "initializer_range": 0.02,
 
 
 
 
 
 
 
 
15
  "use_cache": true,
16
  "bos_token_id": 50256,
17
  "eos_token_id": 50256,
18
- "architectures": ["HelionXModel"]
19
  }
 
1
  {
2
+ "architectures": ["GPT2LMHeadModel"],
3
  "model_type": "gpt2",
4
  "vocab_size": 50257,
5
  "n_positions": 2048,
 
7
  "n_embd": 896,
8
  "n_layer": 22,
9
  "n_head": 14,
10
+ "n_inner": 3584,
11
+ "activation_function": "gelu_new",
12
  "resid_pdrop": 0.0,
13
  "embd_pdrop": 0.0,
14
  "attn_pdrop": 0.0,
15
  "layer_norm_epsilon": 1e-5,
16
  "initializer_range": 0.02,
17
+ "summary_type": "cls_index",
18
+ "summary_use_proj": true,
19
+ "summary_activation": null,
20
+ "summary_last_dropout": 0.0,
21
+ "summary_first_dropout": 0.0,
22
+ "summary_proj_to_labels": true,
23
+ "summary_proj_to_labels": true,
24
+ "scale_attn_weights": true,
25
  "use_cache": true,
26
  "bos_token_id": 50256,
27
  "eos_token_id": 50256,
28
+ "tie_word_embeddings": true
29
  }