jacob-valdez commited on
Commit
2fd2241
·
verified ·
1 Parent(s): 0cb5921

Upload model/config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. model/config.json +37 -50
model/config.json CHANGED
@@ -1,52 +1,39 @@
1
  {
2
- "model_type": "builderbrain",
3
- "scale": "small",
4
- "builderbrain_version": "1.0.0",
5
- "model": {
6
- "type": "gpt2",
7
- "name": "gpt2",
8
- "hidden_size": 768,
9
- "num_layers": 4,
10
- "num_programs": 16,
11
- "alpha_cap": 0.1
 
 
 
12
  },
13
- "constraints": {
14
- "grammar": {
15
- "enabled": true,
16
- "target": 0.0,
17
- "normalizer": "rank"
18
- },
19
- "graph2graph": {
20
- "enabled": true,
21
- "target": 0.2,
22
- "normalizer": "rank"
23
- },
24
- "buildability": {
25
- "enabled": true,
26
- "target": 0.0,
27
- "normalizer": "winsor"
28
- },
29
- "reuse": {
30
- "enabled": true,
31
- "target": 0.5,
32
- "normalizer": "rank"
33
- }
34
- },
35
- "training": {
36
- "batch_size": 8,
37
- "learning_rate": "5e-4",
38
- "eta_lambda": "1e-2",
39
- "lambda_max": 20.0,
40
- "num_epochs": 50,
41
- "save_every": 10
42
- },
43
- "data": {
44
- "max_length": 512,
45
- "vocab_size": 50257
46
- },
47
- "runtime": {
48
- "max_generation_length": 100,
49
- "temperature": 0.8,
50
- "use_grammar_mask": true
51
- }
52
- }
 
1
  {
2
+ "activation_function": "gelu_new",
3
+ "attn_pdrop": 0.1,
4
+ "bos_token_id": 50256,
5
+ "builderbrain_config": {
6
+ "alpha_cap": 0.05,
7
+ "base_model_type": "gpt2",
8
+ "builder_layers": 4,
9
+ "dual_rail": true,
10
+ "fusion_gates": true,
11
+ "grammar_constraints": true,
12
+ "num_programs": 8,
13
+ "program_adapters": true,
14
+ "safety_monitoring": true
15
  },
16
+ "embd_pdrop": 0.1,
17
+ "eos_token_id": 50256,
18
+ "initializer_range": 0.02,
19
+ "layer_norm_epsilon": 1e-05,
20
+ "model_type": "gpt2",
21
+ "n_embd": 768,
22
+ "n_head": 12,
23
+ "n_inner": null,
24
+ "n_layer": 4,
25
+ "n_positions": 1024,
26
+ "pad_token_id": 50256,
27
+ "reorder_and_upcast_attn": false,
28
+ "resid_pdrop": 0.1,
29
+ "scale_attn_by_inverse_layer_idx": false,
30
+ "scale_attn_weights": true,
31
+ "summary_activation": null,
32
+ "summary_first_dropout": 0.1,
33
+ "summary_proj_to_labels": true,
34
+ "summary_type": "cls_index",
35
+ "summary_use_proj": true,
36
+ "transformers_version": "4.56.2",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }