class GPTConfig: def __init__(self): self.vocab_size = 32000 self.hidden_size = 256 self.num_hidden_layers = 12 self.num_attention_heads = 4 # Changed to match head_dim=64 self.intermediate_size = 512 self.hidden_act = "silu" self.rms_norm_eps = 1e-5 self.max_position_embeddings = 1024