File size: 700 Bytes
ffe51ed
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
from transformers import PretrainedConfig

class QuarkConfig(PretrainedConfig):
    model_type = "quark"
    def __init__(self, vocab_size=65537, d_model=768, n_heads=12, n_kv_heads=4,
                 n_layers=32, d_ff=2048, head_dim=64, max_seq_len=2048,
                 rope_theta=10000.0, rms_eps=1e-5, qkv_bias=True, dropout=0.0, **kwargs):
        self.vocab_size=vocab_size; self.d_model=d_model; self.n_heads=n_heads
        self.n_kv_heads=n_kv_heads; self.n_layers=n_layers; self.d_ff=d_ff
        self.head_dim=head_dim; self.max_seq_len=max_seq_len; self.rope_theta=rope_theta
        self.rms_eps=rms_eps; self.qkv_bias=qkv_bias; self.dropout=dropout
        super().__init__(**kwargs)