Upload EVTClassifier

d71d985 verified 2 months ago

6.69 kB

	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from transformers import PreTrainedModel, AutoModel, AutoConfig, PretrainedConfig
	from transformers.modeling_outputs import SequenceClassifierOutput


	# ==========================================
	# Configuration Class
	# ==========================================
	class EVTConfig(PretrainedConfig):
	"""Configuration class for the EVT Classifier."""
	model_type = "evt_classifier"

	def __init__(
	self,
	base_model_path="sentence-transformers/all-mpnet-base-v2",
	num_core_labels=5,
	temperature=20.0,
	hidden_size=768,
	dropout_rate=0.2,
	pos_weights=None,
	**kwargs
	):
	"""
	Args:
	base_model_path (str): The path to the base transformer model.
	num_core_labels (int): Number of output labels for the classifier.
	temperature (float): Scaling factor for the NormLinear layer.
	hidden_size (int): Hidden size of the base model.
	dropout_rate (float): Dropout rate for regularization.
	pos_weights (list): Positive class weights for handling class imbalance.
	**kwargs: Additional keyword arguments for PretrainedConfig.
	"""
	self.base_model_path = base_model_path
	self.num_core_labels = num_core_labels
	self.temperature = temperature
	self.hidden_size = hidden_size
	self.dropout_rate = dropout_rate
	self.pos_weights = pos_weights

	# Default labels for the EVT Classifier
	if "id2label" not in kwargs:
	kwargs["id2label"] = {
	0: "ATTAINMENT_VALUE",
	1: "COST",
	2: "EXPECTANCY",
	3: "INTRINSIC_VALUE",
	4: "UTILITY_VALUE"
	}
	if "label2id" not in kwargs:
	kwargs["label2id"] = {v: k for k, v in kwargs["id2label"].items()}

	super().__init__(**kwargs)


	# ==========================================
	# Model Architecture Components
	# ==========================================

	class ConcatPooling(nn.Module):
	"""Concatenates mean and max pooling of hidden states."""
	def forward(self, hidden_states, attention_mask):
	mask = attention_mask.unsqueeze(-1).expand(hidden_states.size()).float()
	sum_emb = torch.sum(hidden_states * mask, 1)
	sum_mask = torch.clamp(mask.sum(1), min=1e-9)
	mean_pool = sum_emb / sum_mask

	masked = hidden_states.masked_fill(mask == 0, -1e9)
	max_pool = torch.max(masked, dim=1)[0]
	return torch.cat([mean_pool, max_pool], dim=-1)

	class NormLinear(nn.Module):
	"""Linear layer using cosine similarity and a temperature scaling factor."""
	def __init__(self, in_features, out_features, temperature=20.0):
	super().__init__()
	self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
	nn.init.xavier_uniform_(self.weight)
	self.temperature = temperature

	def forward(self, x):
	return F.linear(F.normalize(x, p=2, dim=1),
	F.normalize(self.weight, p=2, dim=1)) * self.temperature

	class EVTClassifier(PreTrainedModel):
	"""Custom classifier model for Expectancy-Value Theory (EVT) text classification.

	5-class OvR sigmoid classifier with cosine head for multi-label classification.
	"""
	config_class = EVTConfig
	model_type = "evt_classifier"
	# We don't set base_model_prefix to avoid conflicts with PreTrainedModel.base_model property

	# Required for transformers 5.x compatibility
	_tied_weights_keys = []

	@property
	def all_tied_weights_keys(self):
	"""Property for compatibility with transformers 5.x. Returns empty dict since no weights are tied."""
	return {}

	def __init__(self, config):
	super().__init__(config)
	# Initialize the base transformer model
	# Using 'transformer' as the attribute name to avoid conflict with PreTrainedModel
	base_config = AutoConfig.from_pretrained(config.base_model_path)
	self.transformer = AutoModel.from_config(base_config)
	self.config = config
	h = config.hidden_size if hasattr(config, "hidden_size") else self.transformer.config.hidden_size

	# Custom layers matching training architecture
	self.pooler = ConcatPooling()
	self.dropout = nn.Dropout(getattr(config, "dropout_rate", 0.2))
	self.dense = nn.Linear(h * 2, h)
	self.gelu = nn.GELU()
	self.norm_head = NormLinear(
	h,
	config.num_core_labels,
	temperature=getattr(config, "temperature", 20.0)
	)

	# Register pos_weights buffer for class imbalance handling
	pos_weights = getattr(config, "pos_weights", None)
	if pos_weights is not None:
	self.register_buffer("pos_weights", torch.tensor(pos_weights, dtype=torch.float32))
	else:
	self.register_buffer("pos_weights", torch.ones(config.num_core_labels))

	def forward(self, input_ids, attention_mask, labels=None, **kwargs):
	"""
	Forward pass matching training architecture.
	Returns SequenceClassifierOutput with logits (sigmoid-based, not softmax).
	"""
	outputs = self.transformer(input_ids=input_ids, attention_mask=attention_mask, **kwargs)
	pooled = self.pooler(outputs.last_hidden_state, attention_mask)
	x = self.dropout(pooled)
	x = self.gelu(self.dense(x))
	x = self.dropout(x)
	logits = self.norm_head(x)

	return SequenceClassifierOutput(logits=logits)

	def load_state_dict(self, state_dict, strict=True):
	"""
	Custom loading to map 'base_model.' keys from training to 'transformer.' in this model.
	This handles the mismatch between training script and refactored architecture.
	"""
	new_state_dict = {}
	for key, value in state_dict.items():
	if key.startswith("base_model."):
	# Map base_model.* to transformer.*
	new_key = key.replace("base_model.", "transformer.", 1)
	new_state_dict[new_key] = value
	else:
	# Keep other keys as-is (dense, norm_head, pos_weights)
	new_state_dict[key] = value
	return super().load_state_dict(new_state_dict, strict=strict)


	# Register both classes for Hugging Face AutoClasses
	EVTConfig.register_for_auto_class()
	EVTClassifier.register_for_auto_class("AutoModel")