Update config.json

#2
by AyaGL - opened
Files changed (1) hide show
  1. config.json +34 -28
config.json CHANGED
@@ -1,11 +1,19 @@
1
  {
2
- "model_type": "causal_diffusion",
 
 
 
 
 
 
 
3
  "interpretable": true,
 
4
  "n_layers": 32,
5
  "n_head": 32,
6
  "n_embd": 4096,
7
- "block_size": 4096,
8
  "n_kv_heads": 4,
 
9
  "diff_block_size": 64,
10
  "use_rms_norm": true,
11
  "norm_eps": 1e-05,
@@ -14,39 +22,37 @@
14
  "use_rope": true,
15
  "rope_base": 500000.0,
16
  "rope_full_precision": true,
 
17
  "mlp_type": "swiglu",
18
  "activation": "gelu",
19
  "mlp_ratio": 4,
20
  "intermediate_size": null,
21
  "use_bias": false,
22
- "clip_qkv": 10.0,
23
  "weight_sharing": true,
24
  "pad_token_id": 100277,
25
  "bos_token_id": 100278,
26
  "eos_token_id": 100257,
27
- "endofchunk_token_id": 100279,
28
  "mask_token_id": 100280,
29
- "vocab_size": 100281,
30
- "concept": {
31
- "n_concepts": 33732,
32
- "n_unknown_concepts": 101196,
33
- "max_concepts": 16,
34
- "concept_dim": 4096,
35
- "use_attention_known": false,
36
- "use_attention_unknown": false,
37
- "topk_known": 16,
38
- "topk_known_features": 32,
39
- "unknown_topk": 128,
40
- "use_unknown": true,
41
- "apply_topk_to_unknown": true,
42
- "topk_on_logits": false,
43
- "factorize_unknown": true,
44
- "factorize_rank": 256,
45
- "use_epsilon_correction": true,
46
- "block_size": 4096,
47
- "pad_multiple": 16,
48
- "store_unknown_weights": false,
49
- "inject_layer": 16,
50
- "inject_alpha": 1.0
51
- }
52
- }
 
1
  {
2
+ "model_type": "steerling",
3
+ "auto_map": {
4
+ "AutoConfig": "configuration_steerling.SteerlingConfig",
5
+ "AutoModel": "modeling_steerling.SteerlingForCausalLM",
6
+ "AutoModelForCausalLM": "modeling_steerling.SteerlingForCausalLM",
7
+ "AutoTokenizer": ["tokenization_steerling.SteerlingTokenizer", null]
8
+ },
9
+ "architectures": ["SteerlingForCausalLM"],
10
  "interpretable": true,
11
+ "vocab_size": 100281,
12
  "n_layers": 32,
13
  "n_head": 32,
14
  "n_embd": 4096,
 
15
  "n_kv_heads": 4,
16
+ "block_size": 4096,
17
  "diff_block_size": 64,
18
  "use_rms_norm": true,
19
  "norm_eps": 1e-05,
 
22
  "use_rope": true,
23
  "rope_base": 500000.0,
24
  "rope_full_precision": true,
25
+ "clip_qkv": 10.0,
26
  "mlp_type": "swiglu",
27
  "activation": "gelu",
28
  "mlp_ratio": 4,
29
  "intermediate_size": null,
30
  "use_bias": false,
 
31
  "weight_sharing": true,
32
  "pad_token_id": 100277,
33
  "bos_token_id": 100278,
34
  "eos_token_id": 100257,
 
35
  "mask_token_id": 100280,
36
+ "endofchunk_token_id": 100279,
37
+ "n_concepts": 33732,
38
+ "n_unknown_concepts": 101196,
39
+ "concept_dim": 4096,
40
+ "use_attention_known": false,
41
+ "use_attention_unknown": false,
42
+ "topk_known": 16,
43
+ "topk_known_features": 32,
44
+ "unknown_topk": 128,
45
+ "use_unknown": true,
46
+ "apply_topk_to_unknown": true,
47
+ "topk_on_logits": false,
48
+ "factorize_unknown": true,
49
+ "factorize_rank": 256,
50
+ "use_epsilon_correction": true,
51
+ "concept_block_size": 4096,
52
+ "pad_multiple": 16,
53
+ "store_unknown_weights": false,
54
+ "inject_layer": 16,
55
+ "inject_alpha": 1.0,
56
+ "torch_dtype": "bfloat16",
57
+ "transformers_version": "4.48.0"
58
+ }