| """ |
| Configuration class for ScorePredictorModel. |
| Compatible with Hugging Face's AutoConfig. |
| """ |
|
|
| from transformers import PretrainedConfig |
|
|
|
|
| class ScorePredictorConfig(PretrainedConfig): |
| """ |
| Configuration class for ScorePredictorModel (Encoder-only). |
| |
| Args: |
| backbone_model_name: Name or path of the backbone encoder model (e.g., 'bert-base-uncased') |
| num_scores: Number of regression outputs (default: 4) |
| score_names: Names of the scores being predicted |
| hidden_size: Hidden size of the backbone model (auto-detected) |
| attn_implementation: Attention implementation ('eager', 'sdpa', 'flash_attention_2') |
| use_attention_pooling: Whether to use attention pooling in addition to CLS+Mean |
| use_shared_encoder: Whether to use shared MLP before task-specific heads |
| hidden_dropout_prob: Dropout probability for hidden layers |
| head_hidden_size: Hidden size for task-specific score heads |
| shared_hidden_size: Hidden size for shared encoder layers |
| **kwargs: Additional arguments passed to PretrainedConfig |
| """ |
| |
| model_type = "score_predictor" |
| |
| def __init__( |
| self, |
| backbone_model_name: str = "bert-base-uncased", |
| num_scores: int = 4, |
| score_names: list = None, |
| hidden_size: int = None, |
| attn_implementation: str = None, |
| use_attention_pooling: bool = True, |
| max_position_embeddings: int = 512, |
| use_shared_encoder: bool = True, |
| hidden_dropout_prob: float = 0.1, |
| head_hidden_size: int = 256, |
| shared_hidden_size: int = 512, |
| **kwargs |
| ): |
| super().__init__(**kwargs) |
| |
| self.backbone_model_name = backbone_model_name |
| self.num_scores = num_scores |
| self.score_names = score_names or [ |
| "informativeness", |
| "clarity", |
| "plausibility", |
| "faithfulness" |
| ] |
| self.hidden_size = hidden_size |
| self.attn_implementation = attn_implementation |
| self.max_position_embeddings = kwargs.get("max_position_embeddings", 512) |
| |
| |
| self.use_attention_pooling = use_attention_pooling |
| self.use_shared_encoder = use_shared_encoder |
| self.hidden_dropout_prob = hidden_dropout_prob |
| self.head_hidden_size = head_hidden_size |
| self.shared_hidden_size = shared_hidden_size |