File size: 2,028 Bytes
12fd5f2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | """
Custom HuggingFace Trainer subclass.
Uses the model's built-in cross-entropy loss (computed during forward pass)
instead of recomputing it, saving ~60MB of VRAM.
"""
from transformers import Trainer
import torch
from loguru import logger
class CorrectionTrainer(Trainer):
"""Custom trainer — uses model's built-in loss directly."""
def __init__(self, loss_fn, fingerprinter, tokenizer, **kwargs):
super().__init__(**kwargs)
self.loss_fn = loss_fn # Kept for API compat, not actually used
self.fingerprinter = fingerprinter
self.correction_tokenizer = tokenizer
def _strip_custom_fields(self, inputs):
"""Remove dataset fields that T5 doesn't accept."""
inputs.pop("style_vector", None)
inputs.pop("input_text", None)
inputs.pop("target_text", None)
return {k: v for k, v in inputs.items() if k in ("input_ids", "attention_mask", "labels")}
def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
"""Use model's built-in CE loss — avoids double-computing logits loss."""
model_inputs = self._strip_custom_fields(inputs)
outputs = model(**model_inputs)
# T5 computes CE loss internally when labels are provided — use it directly
# This avoids keeping the full logits tensor (batch × seq × 32128) alive
loss = outputs.loss
return (loss, outputs) if return_outputs else loss
def prediction_step(self, model, inputs, prediction_loss_only, ignore_keys=None):
"""Compute eval loss directly — strips custom fields and runs forward.
The parent's prediction_step doesn't return eval_loss when custom
fields are present, so we handle it ourselves.
"""
model_inputs = self._strip_custom_fields(inputs)
model_inputs = self._prepare_inputs(model_inputs)
with torch.no_grad():
outputs = model(**model_inputs)
loss = outputs.loss.detach()
return (loss, None, None)
|