AuriStream7BDeep_mid / configuration_auristream.py
klemenk's picture
Sync configuration_auristream.py from TuKoResearch/AuriStream7BDeep_7k
98c4724 verified
raw
history blame contribute delete
781 Bytes
from transformers import PretrainedConfig
class AuriStreamConfig(PretrainedConfig):
model_type = "AuriStream.AuriStream"
def __init__(
self,
n_layer=96,
n_head=32,
n_embd=2560,
vocab_size=8192,
dropout=0.0,
bias=False,
use_rope=True,
n_pred_steps=20,
seq_len=4096,
skip_connections=True,
**kwargs
):
self.n_layer = n_layer
self.n_head = n_head
self.n_embd = n_embd
self.vocab_size = vocab_size
self.dropout = dropout
self.bias = bias
self.use_rope = use_rope
self.n_pred_steps = n_pred_steps
self.seq_len = seq_len
self.skip_connections = skip_connections
super().__init__(**kwargs)