from __future__ import annotations """Energy Intelligence Multitask Model. Shared DistilBERT encoder with two task heads: - NER head : token-level BIO entity tagging - CLS head : sequence-level multi-label topic classification """ from dataclasses import dataclass from typing import Optional, Tuple import torch import torch.nn as nn from transformers import PreTrainedModel from transformers.models.distilbert.modeling_distilbert import DistilBertModel from transformers.utils import ModelOutput # Works both as a HuggingFace remote-code module (relative) and as a plain # local file (absolute). The try/except handles both cases. try: from .configuration_energy_multitask import EnergyMultitaskConfig except ImportError: from configuration_energy_multitask import EnergyMultitaskConfig # --------------------------------------------------------------------------- # Output dataclass # --------------------------------------------------------------------------- @dataclass class EnergyMultitaskOutput(ModelOutput): """Output container returned by :class:`EnergyMultitaskModel`. Attributes ---------- loss: Combined NER + classification loss when labels are provided. ner_logits: Raw NER logits of shape ``(batch, seq_len, ner_num_labels)``. Apply ``argmax(-1)`` for predicted token tags. cls_logits: Raw classification logits of shape ``(batch, cls_num_labels)``. Apply ``sigmoid`` + threshold for active topic labels. hidden_states: Encoder hidden states (when ``output_hidden_states=True``). attentions: Attention weights (when ``output_attentions=True``). """ loss: Optional[torch.FloatTensor] = None ner_logits: Optional[torch.FloatTensor] = None cls_logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None # --------------------------------------------------------------------------- # Model # --------------------------------------------------------------------------- class EnergyMultitaskModel(PreTrainedModel): """DistilBERT encoder with a shared backbone and two task heads. NER head -------- Token-level linear classifier over all positions in the sequence. Uses BIO tagging scheme with 19 labels (O + 9 entity types x B/I). Classification head ------------------- Sequence-level multi-label classifier on the [CLS] representation. Uses ``BCEWithLogitsLoss`` during training (10 topic labels). Quick start ----------- >>> from transformers import AutoTokenizer >>> from modeling_energy_multitask import EnergyMultitaskModel >>> from configuration_energy_multitask import EnergyMultitaskConfig >>> >>> model = EnergyMultitaskModel.from_pretrained( ... "QuantBridge/energy-intelligence-multitask", ... trust_remote_code=True, ... ) >>> tokenizer = AutoTokenizer.from_pretrained( ... "QuantBridge/energy-intelligence-multitask", ... trust_remote_code=True, ... ) >>> inputs = tokenizer("Crude oil prices surged", return_tensors="pt") >>> inputs.pop("token_type_ids", None) # DistilBERT does not use these >>> out = model(**inputs) >>> out.ner_logits.shape # (1, seq_len, 19) >>> out.cls_logits.shape # (1, 10) """ config_class = EnergyMultitaskConfig def __init__(self, config: EnergyMultitaskConfig) -> None: super().__init__(config) # Shared encoder self.distilbert = DistilBertModel(config) self.dropout = nn.Dropout(config.dropout) # NER head: every token -> entity tag self.ner_classifier = nn.Linear(config.dim, config.ner_num_labels) # Classification head: [CLS] token -> topic labels self.pre_classifier = nn.Linear(config.dim, config.dim) self.cls_classifier = nn.Linear(config.dim, config.cls_num_labels) self.seq_classif_dropout = nn.Dropout(config.seq_classif_dropout) self.post_init() def forward( self, input_ids: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None, head_mask: Optional[torch.Tensor] = None, inputs_embeds: Optional[torch.Tensor] = None, ner_labels: Optional[torch.Tensor] = None, cls_labels: Optional[torch.Tensor] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, ) -> EnergyMultitaskOutput: """Forward pass through the shared encoder and both task heads. Parameters ---------- input_ids: Token ids, shape ``(batch, seq_len)``. attention_mask: Padding mask, shape ``(batch, seq_len)``. ner_labels: Integer token labels, shape ``(batch, seq_len)``. Ignored positions should be ``-100``. cls_labels: Float multi-hot vector, shape ``(batch, cls_num_labels)``. """ return_dict = ( return_dict if return_dict is not None else self.config.use_return_dict ) encoder_outputs = self.distilbert( input_ids=input_ids, attention_mask=attention_mask, head_mask=head_mask, inputs_embeds=inputs_embeds, output_attentions=output_attentions, output_hidden_states=output_hidden_states, return_dict=return_dict, ) sequence_output = encoder_outputs[0] # (batch, seq_len, dim) # ── NER head — all token positions ─────────────────────────────────── ner_output = self.dropout(sequence_output) ner_logits = self.ner_classifier(ner_output) # (batch, seq_len, ner_num_labels) # ── CLS head — [CLS] token only ────────────────────────────────────── cls_token = sequence_output[:, 0] # (batch, dim) cls_token = self.pre_classifier(cls_token) cls_token = nn.functional.relu(cls_token) cls_token = self.seq_classif_dropout(cls_token) cls_logits = self.cls_classifier(cls_token) # (batch, cls_num_labels) # ── Loss (only computed when labels are supplied) ───────────────────── loss = None if ner_labels is not None: loss = nn.CrossEntropyLoss(ignore_index=-100)( ner_logits.view(-1, self.config.ner_num_labels), ner_labels.view(-1), ) if cls_labels is not None: cls_loss = nn.BCEWithLogitsLoss()(cls_logits, cls_labels.float()) loss = cls_loss if loss is None else loss + cls_loss return EnergyMultitaskOutput( loss=loss, ner_logits=ner_logits, cls_logits=cls_logits, hidden_states=encoder_outputs.hidden_states if output_hidden_states else None, attentions=encoder_outputs.attentions if output_attentions else None, )