dtype: torch.bfloat16 implementation: transformer_lens model_name: default n_layers: '2' model_seed: '1' d_model: '4' n_ctx: '1024' d_head: '2' n_heads: '8' act_fn: gelu d_vocab: '5000' use_local_attn: 'False' tokenizer_name: timaeus/TinyStories-tokenizer-5k window_size: None attn_types: None attn_only: 'True' positional_embedding_type: shortformer