""" models/bert_model.py DistilBERT fine-tuned sentiment classifier. Training is done on Google Colab (GPU required) — see notebooks/colab_train.py. This file handles inference only, loading the saved checkpoint from disk. Public API (used by app.py): predict(text) -> {"label": str, "score": float, "keywords": list[str]} """ import os import sys import numpy as np import torch from transformers import ( DistilBertTokenizerFast, DistilBertForSequenceClassification, ) sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) from data.labels import LABEL_NAMES # {0: "Negative", 1: "Positive", 2: "Neutral"} # ── Paths ───────────────────────────────────────────────────────────────────── SAVE_DIR = os.path.join(os.path.dirname(__file__), "saved", "bert", "bert_sentiment") HUB_MODEL = "DanTan05/bert-sentiment" # fallback when local checkpoint not present # ── Module-level cache ──────────────────────────────────────────────────────── # Same pattern as baseline.py — load from disk once, reuse on every predict() _tokenizer = None _model = None _device = None def _load_models(): global _tokenizer, _model, _device if _model is not None: return # Use local checkpoint if present (dev), otherwise download from Hub (Spaces). source = SAVE_DIR if os.path.exists(SAVE_DIR) else HUB_MODEL _device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Loading BERT model from '{source}' on {_device}...") _tokenizer = DistilBertTokenizerFast.from_pretrained(source) _model = DistilBertForSequenceClassification.from_pretrained(source, attn_implementation="eager") _model.to(_device) _model.eval() # disables dropout — important for deterministic inference # ── Inference ───────────────────────────────────────────────────────────────── def predict(text: str) -> dict: """ Returns the same inference contract dict as baseline.py: { "label": "Positive" | "Negative" | "Neutral", "score": float, # confidence in the predicted class (0–1) "keywords": list[str], # tokens with highest attention weights } Why output_attentions=True? DistilBERT has 6 transformer layers, each with 12 attention heads. Each head produces a (seq_len × seq_len) attention matrix showing how much each token "attended to" every other token. We use these weights as a proxy for token importance. """ _load_models() # Tokenize # return_tensors="pt" → return PyTorch tensors (not lists) # truncation=True → clip to model's max 512 tokens # max_length=512 → DistilBERT's hard limit inputs = _tokenizer( text, return_tensors="pt", truncation=True, max_length=512, padding=True, ) inputs = {k: v.to(_device) for k, v in inputs.items()} # Forward pass — no gradient tracking needed at inference time. # torch.no_grad() saves memory and speeds things up. with torch.no_grad(): outputs = _model(**inputs, output_attentions=True) # outputs.logits shape: (1, n_classes) — raw unnormalised scores # softmax converts them to probabilities that sum to 1 proba = torch.softmax(outputs.logits, dim=-1)[0].cpu().numpy() class_idx = int(np.argmax(proba)) score = float(proba[class_idx]) # The neutral class was trained on Twitter data that was actually pos/neg, # so the model over-predicts neutral for short opinionated text. # If neutral wins but with low confidence, defer to the stronger of pos/neg. NEUTRAL_IDX = 2 NEUTRAL_THRESHOLD = 0.60 if class_idx == NEUTRAL_IDX and score < NEUTRAL_THRESHOLD: class_idx = int(np.argmax(proba[:2])) # best of Negative(0) / Positive(1) score = float(proba[class_idx]) # Map class index → label string using the model's own id2label config label_int = _model.config.id2label[class_idx] # e.g. "LABEL_1" # Our fine-tuning saves numeric keys, so fall back to LABEL_NAMES label_str = LABEL_NAMES.get(class_idx, label_int) keywords = _extract_keywords_from_attention(outputs.attentions, inputs, top_n=10) return { "label": label_str, "score": score, "keywords": keywords, } def _extract_keywords_from_attention(attentions, inputs, top_n: int = 10) -> list: """ Derives the most important tokens using the last layer's attention weights. Steps: 1. Take the last transformer layer's attention tensor Shape: (1, n_heads, seq_len, seq_len) 2. Average across all 12 heads → shape: (seq_len, seq_len) 3. Sum each token's incoming attention (column sum) — this measures how much the rest of the sequence attended TO this token 4. Convert token IDs back to strings, skip special tokens ([CLS], [SEP], [PAD]) which always get high attention artificially 5. Return the top_n tokens by attention score Caveat (worth knowing): Attention weights ≠ explanation. Research (Jain & Wallace 2019) shows attention doesn't always correlate with feature importance. For a demo this is fine; for production use SHAP or integrated gradients. """ # attentions is a tuple of tensors, one per layer — we want the last one last_layer_attn = attentions[-1] # (1, heads, seq, seq) avg_attn = last_layer_attn[0].mean(dim=0) # (seq, seq) token_scores = avg_attn.sum(dim=0).cpu().numpy() # (seq,) # Decode each token ID back to its string input_ids = inputs["input_ids"][0].cpu().numpy() special_ids = set(_tokenizer.all_special_ids) tokens = _tokenizer.convert_ids_to_tokens(input_ids) # Pair each token with its score, skip specials and subword prefixes scored = [] for token, score, tid in zip(tokens, token_scores, input_ids): if tid in special_ids: continue # WordPiece subword tokens start with "##" — strip the prefix clean = token.replace("##", "") if len(clean) < 2: # skip single characters continue scored.append((clean, float(score))) # Sort by score descending, deduplicate, return top_n scored.sort(key=lambda x: x[1], reverse=True) seen, keywords = set(), [] for word, _ in scored: if word not in seen: seen.add(word) keywords.append(word) if len(keywords) >= top_n: break return keywords