OmniScore-deberta-v3 / configuration_score_predictor.py
Firoj's picture
Upload trained DeBERTa-v3 OmniScore model artifacts
fed9f16 verified
"""
Configuration class for ScorePredictorModel.
Compatible with Hugging Face's AutoConfig.
"""
from transformers import PretrainedConfig
class ScorePredictorConfig(PretrainedConfig):
"""
Configuration class for ScorePredictorModel (Encoder-only).
Args:
backbone_model_name: Name or path of the backbone encoder model (e.g., 'bert-base-uncased')
num_scores: Number of regression outputs (default: 4)
score_names: Names of the scores being predicted
hidden_size: Hidden size of the backbone model (auto-detected)
attn_implementation: Attention implementation ('eager', 'sdpa', 'flash_attention_2')
use_attention_pooling: Whether to use attention pooling in addition to CLS+Mean
use_shared_encoder: Whether to use shared MLP before task-specific heads
hidden_dropout_prob: Dropout probability for hidden layers
head_hidden_size: Hidden size for task-specific score heads
shared_hidden_size: Hidden size for shared encoder layers
**kwargs: Additional arguments passed to PretrainedConfig
"""
model_type = "score_predictor"
def __init__(
self,
backbone_model_name: str = "bert-base-uncased",
num_scores: int = 4,
score_names: list = None,
hidden_size: int = None,
attn_implementation: str = None,
use_attention_pooling: bool = True,
max_position_embeddings: int = 512,
use_shared_encoder: bool = True,
hidden_dropout_prob: float = 0.1,
head_hidden_size: int = 256,
shared_hidden_size: int = 512,
**kwargs
):
super().__init__(**kwargs)
self.backbone_model_name = backbone_model_name
self.num_scores = num_scores
self.score_names = score_names or [
"informativeness",
"clarity",
"plausibility",
"faithfulness"
]
self.hidden_size = hidden_size
self.attn_implementation = attn_implementation
self.max_position_embeddings = kwargs.get("max_position_embeddings", 512)
# Architecture configuration
self.use_attention_pooling = use_attention_pooling
self.use_shared_encoder = use_shared_encoder
self.hidden_dropout_prob = hidden_dropout_prob
self.head_hidden_size = head_hidden_size
self.shared_hidden_size = shared_hidden_size