extra_layers_1epoch / configuration.py
E-katrin's picture
Upload ConlluTokenClassificationPipeline
acceea9 verified
from transformers import PretrainedConfig
class CobaldParserConfig(PretrainedConfig):
model_type = "cobald_parser"
def __init__(
self,
encoder_model_name: str = None,
null_classifier_hidden_size: int = 0,
lemma_classifier_hidden_size: int = 0,
morphology_classifier_hidden_size: int = 0,
dependency_classifier_hidden_size: int = 0,
misc_classifier_hidden_size: int = 0,
deepslot_classifier_hidden_size: int = 0,
semclass_classifier_hidden_size: int = 0,
activation: str = 'relu',
dropout: float = 0.1,
consecutive_null_limit: int = 0,
vocabulary: dict[dict[int, str]] = {},
**kwargs
):
self.encoder_model_name = encoder_model_name
self.null_classifier_hidden_size = null_classifier_hidden_size
self.consecutive_null_limit = consecutive_null_limit
self.lemma_classifier_hidden_size = lemma_classifier_hidden_size
self.morphology_classifier_hidden_size = morphology_classifier_hidden_size
self.dependency_classifier_hidden_size = dependency_classifier_hidden_size
self.misc_classifier_hidden_size = misc_classifier_hidden_size
self.deepslot_classifier_hidden_size = deepslot_classifier_hidden_size
self.semclass_classifier_hidden_size = semclass_classifier_hidden_size
self.activation = activation
self.dropout = dropout
# The serialized config stores mappings as strings,
# e.g. {"0": "acl", "1": "conj"}, so we have to convert them to int.
self.vocabulary = {
column: {int(k): v for k, v in labels.items()}
for column, labels in vocabulary.items()
}
super().__init__(**kwargs)