| from transformers import PretrainedConfig | |
| class ProtoConfig(PretrainedConfig): | |
| model_type = "proto" | |
| def __init__(self, | |
| pretrained_model_name_or_path="xlm-roberta-base", | |
| num_classes=10, | |
| label_order_path=None, | |
| use_sigmoid=False, | |
| use_cuda=True, | |
| lr_prototypes=5e-2, | |
| lr_features=2e-6, | |
| lr_others=2e-2, | |
| num_training_steps=5000, | |
| num_warmup_steps=1000, | |
| loss='BCE', | |
| save_dir='output', | |
| use_attention=True, | |
| dot_product=False, | |
| normalize=None, | |
| final_layer=False, | |
| reduce_hidden_size=None, | |
| use_prototype_loss=False, | |
| prototype_vector_path=None, | |
| attention_vector_path=None, | |
| eval_buckets=None, | |
| seed=7, | |
| **kwargs): | |
| super().__init__(**kwargs) | |
| self.pretrained_model_name_or_path = pretrained_model_name_or_path | |
| self.num_classes = num_classes | |
| self.label_order_path = label_order_path | |
| self.use_sigmoid = use_sigmoid | |
| self.use_cuda = use_cuda | |
| self.lr_prototypes = lr_prototypes | |
| self.lr_features = lr_features | |
| self.lr_others = lr_others | |
| self.num_training_steps = num_training_steps | |
| self.num_warmup_steps = num_warmup_steps | |
| self.loss = loss | |
| self.save_dir = save_dir | |
| self.use_attention = use_attention | |
| self.dot_product = dot_product | |
| self.normalize = normalize | |
| self.final_layer = final_layer | |
| self.reduce_hidden_size = reduce_hidden_size | |
| self.use_prototype_loss = use_prototype_loss | |
| self.prototype_vector_path = prototype_vector_path | |
| self.attention_vector_path = attention_vector_path | |
| self.eval_buckets = eval_buckets | |
| self.seed = seed | |