tiny-sentiment-classifier / modeling_tinytransformer.py

Upload 12 files

846dc7c verified about 1 month ago

3.58 kB

	# modeling_tinytransformer.py

	import torch
	import torch.nn as nn
	from transformers import PreTrainedModel

	from configuration_tinytransformer import TinyTransformerConfig


	class TinyTransformerModel(PreTrainedModel):
	"""
	一个非常小的 Transformer 编码器 + 分类头，用于情感分类演示
	"""
	config_class = TinyTransformerConfig

	def __init__(self, config: TinyTransformerConfig):
	super().__init__(config)
	self.config = config

	# 词嵌入
	self.embedding = nn.Embedding(
	config.vocab_size,
	config.hidden_size,
	padding_idx=config.pad_token_id if hasattr(config, 'pad_token_id') else 0
	)

	# 位置嵌入（学出来的）
	self.pos_embedding = nn.Embedding(
	config.max_position_embeddings,
	config.hidden_size
	)

	# Transformer Encoder 层
	encoder_layer = nn.TransformerEncoderLayer(
	d_model=config.hidden_size,
	nhead=config.num_attention_heads,
	dim_feedforward=config.intermediate_size,
	dropout=config.dropout,
	activation="gelu",
	batch_first=True,
	norm_first=False # 经典 post-norm
	)

	self.encoder = nn.TransformerEncoder(
	encoder_layer,
	num_layers=config.num_hidden_layers
	)

	# dropout 和分类头
	self.dropout = nn.Dropout(config.dropout)
	self.classifier = nn.Linear(config.hidden_size, config.num_labels)

	# 必须加这一行，且放在最后
	self._tied_weights_keys = []

	# 可选调试打印（加这个看是否执行到）
	print("DEBUG: 已设置 _tied_weights_keys =", self._tied_weights_keys)

	def _init_weights(self, module=None):
	"""简单权重初始化"""
	if module is None:
	module = self
	for m in module.modules():
	if isinstance(m, nn.Linear):
	nn.init.xavier_uniform_(m.weight)
	if m.bias is not None:
	nn.init.zeros_(m.bias)
	elif isinstance(m, nn.Embedding):
	nn.init.normal_(m.weight, mean=0.0, std=0.02)

	def forward(
	self,
	input_ids=None,
	attention_mask=None,
	labels=None,
	**kwargs
	):
	batch_size, seq_len = input_ids.shape

	# 位置编码
	position_ids = torch.arange(
	0, seq_len, dtype=torch.long, device=input_ids.device
	)
	position_ids = position_ids.unsqueeze(0).expand(batch_size, -1)

	# 嵌入 + 位置
	x = self.embedding(input_ids) + self.pos_embedding(position_ids)
	x = self.dropout(x)

	# 处理 attention mask
	if attention_mask is not None:
	# src_key_padding_mask: True 表示要忽略的位置 (padding)
	src_key_padding_mask = (attention_mask == 0)
	else:
	src_key_padding_mask = None

	# 通过 Transformer Encoder
	x = self.encoder(
	x,
	src_key_padding_mask=src_key_padding_mask,
	)

	# 取 [CLS] token（第一个位置）作为句子表示
	pooled = x[:, 0, :]

	# 分类头
	logits = self.classifier(pooled)

	loss = None
	if labels is not None:
	loss_fct = nn.CrossEntropyLoss()
	loss = loss_fct(logits.view(-1, self.config.num_labels), labels.view(-1))

	# 返回标准格式
	return {
	"loss": loss,
	"logits": logits
	}