LieUr
/

gpt2_svd80

Model card Files Files and versions

gpt2_svd80 / configuration_svd_gpt2.py

LieUr's picture

Upload folder using huggingface_hub

c7c9a0c verified 11 months ago

history blame contribute delete

1.98 kB

	from transformers import GPT2Config

	class SVDGPT2Config(GPT2Config):
	model_type = "svd_gpt2"

	def __init__(
	self,
	vocab_size=50257,
	n_positions=1024,
	n_embd=768,
	n_layer=12,
	n_head=12,
	n_inner=None,
	activation_function="gelu_new",
	resid_pdrop=0.1,
	embd_pdrop=0.1,
	attn_pdrop=0.1,
	layer_norm_epsilon=1e-5,
	initializer_range=0.02,
	summary_type="cls_index",
	summary_use_proj=True,
	summary_activation=None,
	summary_proj_to_labels=True,
	summary_first_dropout=0.1,
	scale_attn_weights=True,
	use_cache=True,
	bos_token_id=50256,
	eos_token_id=50256,
	scale_attn_by_inverse_layer_idx=False,
	reorder_and_upcast_attn=False,
	ratio = 1.0,
	**kwargs,
	):
	super().__init__(
	vocab_size=vocab_size,
	n_positions=n_positions,
	n_embd=n_embd,
	n_layer=n_layer,
	n_head=n_head,
	n_inner=n_inner,
	activation_function=activation_function,
	resid_pdrop=resid_pdrop,
	embd_pdrop=embd_pdrop,
	attn_pdrop=attn_pdrop,
	layer_norm_epsilon=layer_norm_epsilon,
	initializer_range=initializer_range,
	summary_type=summary_type,
	summary_use_proj=summary_use_proj,
	summary_activation=summary_activation,
	summary_proj_to_labels=summary_proj_to_labels,
	summary_first_dropout=summary_first_dropout,
	scale_attn_weights=scale_attn_weights,
	use_cache=use_cache,
	bos_token_id=bos_token_id,
	eos_token_id=eos_token_id,
	scale_attn_by_inverse_layer_idx=scale_attn_by_inverse_layer_idx,
	reorder_and_upcast_attn=reorder_and_upcast_attn,
	**kwargs
	)

	## SVD-specific parameters
	self.ratio = ratio