OmniScore-deberta-v3 / configuration_score_predictor.py

Upload trained DeBERTa-v3 OmniScore model artifacts

fed9f16 verified 28 days ago

2.43 kB

	"""
	Configuration class for ScorePredictorModel.
	Compatible with Hugging Face's AutoConfig.
	"""

	from transformers import PretrainedConfig


	class ScorePredictorConfig(PretrainedConfig):
	"""
	Configuration class for ScorePredictorModel (Encoder-only).

	Args:
	backbone_model_name: Name or path of the backbone encoder model (e.g., 'bert-base-uncased')
	num_scores: Number of regression outputs (default: 4)
	score_names: Names of the scores being predicted
	hidden_size: Hidden size of the backbone model (auto-detected)
	attn_implementation: Attention implementation ('eager', 'sdpa', 'flash_attention_2')
	use_attention_pooling: Whether to use attention pooling in addition to CLS+Mean
	use_shared_encoder: Whether to use shared MLP before task-specific heads
	hidden_dropout_prob: Dropout probability for hidden layers
	head_hidden_size: Hidden size for task-specific score heads
	shared_hidden_size: Hidden size for shared encoder layers
	**kwargs: Additional arguments passed to PretrainedConfig
	"""

	model_type = "score_predictor"

	def __init__(
	self,
	backbone_model_name: str = "bert-base-uncased",
	num_scores: int = 4,
	score_names: list = None,
	hidden_size: int = None,
	attn_implementation: str = None,
	use_attention_pooling: bool = True,
	max_position_embeddings: int = 512,
	use_shared_encoder: bool = True,
	hidden_dropout_prob: float = 0.1,
	head_hidden_size: int = 256,
	shared_hidden_size: int = 512,
	**kwargs
	):
	super().__init__(**kwargs)

	self.backbone_model_name = backbone_model_name
	self.num_scores = num_scores
	self.score_names = score_names or [
	"informativeness",
	"clarity",
	"plausibility",
	"faithfulness"
	]
	self.hidden_size = hidden_size
	self.attn_implementation = attn_implementation
	self.max_position_embeddings = kwargs.get("max_position_embeddings", 512)

	# Architecture configuration
	self.use_attention_pooling = use_attention_pooling
	self.use_shared_encoder = use_shared_encoder
	self.hidden_dropout_prob = hidden_dropout_prob
	self.head_hidden_size = head_hidden_size
	self.shared_hidden_size = shared_hidden_size