gpt2_svd80 / configuration_svd_gpt2.py
LieUr's picture
Upload folder using huggingface_hub
c7c9a0c verified
from transformers import GPT2Config
class SVDGPT2Config(GPT2Config):
model_type = "svd_gpt2"
def __init__(
self,
vocab_size=50257,
n_positions=1024,
n_embd=768,
n_layer=12,
n_head=12,
n_inner=None,
activation_function="gelu_new",
resid_pdrop=0.1,
embd_pdrop=0.1,
attn_pdrop=0.1,
layer_norm_epsilon=1e-5,
initializer_range=0.02,
summary_type="cls_index",
summary_use_proj=True,
summary_activation=None,
summary_proj_to_labels=True,
summary_first_dropout=0.1,
scale_attn_weights=True,
use_cache=True,
bos_token_id=50256,
eos_token_id=50256,
scale_attn_by_inverse_layer_idx=False,
reorder_and_upcast_attn=False,
ratio = 1.0,
**kwargs,
):
super().__init__(
vocab_size=vocab_size,
n_positions=n_positions,
n_embd=n_embd,
n_layer=n_layer,
n_head=n_head,
n_inner=n_inner,
activation_function=activation_function,
resid_pdrop=resid_pdrop,
embd_pdrop=embd_pdrop,
attn_pdrop=attn_pdrop,
layer_norm_epsilon=layer_norm_epsilon,
initializer_range=initializer_range,
summary_type=summary_type,
summary_use_proj=summary_use_proj,
summary_activation=summary_activation,
summary_proj_to_labels=summary_proj_to_labels,
summary_first_dropout=summary_first_dropout,
scale_attn_weights=scale_attn_weights,
use_cache=use_cache,
bos_token_id=bos_token_id,
eos_token_id=eos_token_id,
scale_attn_by_inverse_layer_idx=scale_attn_by_inverse_layer_idx,
reorder_and_upcast_attn=reorder_and_upcast_attn,
**kwargs
)
## SVD-specific parameters
self.ratio = ratio