LexiMind / configs /model /large.yaml
OliverPerrin's picture
feat: Add FLAN-T5 compatibility with relative position bias
486475d
# FLAN-T5-large architecture
# 24 encoder layers, 24 decoder layers, 1024 hidden dim
d_model: 1024
num_encoder_layers: 24
num_decoder_layers: 24
num_attention_heads: 16
ffn_dim: 2816 # T5-large uses 2816
dropout: 0.1
activation: gated-gelu # T5/FLAN-T5 uses gated-gelu (GELU with gating)
use_pretrained: true
pretrained_model_name: google/flan-t5-large