Andrewstivan commited on
Commit
b75ff45
·
verified ·
1 Parent(s): 2fa8bd2

Update bdh.py

Browse files
Files changed (1) hide show
  1. bdh.py +5 -5
bdh.py CHANGED
@@ -8,18 +8,18 @@ import torch.nn.functional as F
8
 
9
  @dataclasses.dataclass
10
  class BDHConfig:
11
- n_layer: int = 6
12
- n_embd: int = 256
13
  dropout: float = 0.1
14
- n_head: int = 4
15
- mlp_internal_dim_multiplier: int = 4
16
  vocab_size: int = 256
17
  use_alibi: bool = True
18
  use_l1_norm: bool = True
19
  relu_threshold: float = 0.0
20
  rotary_embedding: str = "rope"
21
  rope_theta: float = 65536.0
22
- use_plasticity: bool = False
23
  plasticity_lr: float = 0.01
24
  consolidation_rate: float = 0.01
25
  forget_rate: float = 0.1
 
8
 
9
  @dataclasses.dataclass
10
  class BDHConfig:
11
+ n_layer: int = 32
12
+ n_embd: int = 4096
13
  dropout: float = 0.1
14
+ n_head: int = 32
15
+ mlp_internal_dim_multiplier: int = 1
16
  vocab_size: int = 256
17
  use_alibi: bool = True
18
  use_l1_norm: bool = True
19
  relu_threshold: float = 0.0
20
  rotary_embedding: str = "rope"
21
  rope_theta: float = 65536.0
22
+ use_plasticity: bool = True
23
  plasticity_lr: float = 0.01
24
  consolidation_rate: float = 0.01
25
  forget_rate: float = 0.1