ylwt
/

PaddlePaddle-ScaleGrad

Model card Files Files and versions

PaddlePaddle-ScaleGrad / config.yaml

ylwt's picture

first commit

9e12487 over 1 year ago

history blame contribute delete

2.3 kB

	arch:
	type: TransformerLMHeadModel
	args:
	transformer_config:
	type: TransformerDecoderOnlyModel
	args:
	embed_config:
	type: TransformerEmbeddingBlock
	args:
	token_embed_config:
	type: TokenEmbedding
	args:
	n_embed: 768
	n_vocab: 50257
	pos_embed_config:
	type: PositionEmbedding
	args:
	n_embed: 768
	n_pos: 1024
	type_embed_config: null
	ln_config: null
	p_drop_embed: 0.1
	concat_strategy: id_first
	decoder_config:
	type: TransformerDecoderBlock
	args:
	attn_config:
	type: MultiHeadKeyValueAttention
	args:
	n_embed: 768
	n_pos: 1024
	n_head: 12
	head_size: 64
	p_drop_attn: 0.1
	p_drop_resid: 0.1
	bias_attn: true
	bias_proj: true
	cross_attn: false
	scale_dot_product: true
	scale_layer_wise: false
	layer_idx: null
	perform_linear_bias: false
	perform_bloom_split_head: false
	perform_query_scaling: false
	attn_window_size: null
	mlp_config:
	type: TransformerMLP
	args:
	n_embed: 768
	n_inner: 3072
	act_fn_config:
	type: NewGELUActivation
	args: {}
	p_drop_mlp: 0.1
	ln_config:
	type: LayerNorm
	args:
	n_embed: 768
	ln_eps: 1.0e-05
	n_embed: 768
	post_norm: false
	add_cross_attn: false
	n_embed: 768
	n_layer: 12
	n_head: 12
	ln_config:
	type: LayerNorm
	args:
	n_embed: 768
	ln_eps: 1.0e-05
	perform_linear_bias: false
	attn_window_size_loop_unit: null
	lm_head_config:
	type: TransformerLMHead
	args:
	n_vocab: 50257
	n_embed: 768
	perform_transform: false
	act_fn_config: null
	ln_config: null