""" PhilVerify — XLM-RoBERTa Sequence Classifier (Layer 1, Phase 10) Fine-tuned on Philippine misinformation data (English / Filipino / Taglish). Drop-in replacement for TFIDFClassifier — same predict() interface. Uses `ml/models/xlmr_model/` if it exists (populated by train_xlmr.py). Raises ModelNotFoundError if the model has not been trained yet; the scoring engine falls back to TFIDFClassifier in that case. """ from __future__ import annotations import logging from dataclasses import dataclass, field from pathlib import Path logger = logging.getLogger(__name__) # Where train_xlmr.py saves the fine-tuned checkpoint MODEL_DIR = Path(__file__).parent / "models" / "xlmr_model" # Labels must match the id2label mapping saved during training LABEL_NAMES = {0: "Credible", 1: "Unverified", 2: "Likely Fake"} NUM_LABELS = 3 MAX_LENGTH = 256 # tokens; 256 covers 95%+ of PH news headlines/paragraphs class ModelNotFoundError(FileNotFoundError): """Raised when the fine-tuned checkpoint directory is missing.""" @dataclass class Layer1Result: verdict: str # "Credible" | "Unverified" | "Likely Fake" confidence: float # 0.0 – 100.0 triggered_features: list[str] = field(default_factory=list) # salient tokens class XLMRobertaClassifier: """ XLM-RoBERTa-based misinformation classifier. Loading is lazy: the model is not loaded until the first call to predict(). This keeps FastAPI startup fast when the model is available. Raises ModelNotFoundError on instantiation if MODEL_DIR does not exist, so the scoring engine can detect the missing checkpoint immediately. """ def __init__(self) -> None: if not MODEL_DIR.exists(): raise ModelNotFoundError( f"XLM-RoBERTa checkpoint not found at {MODEL_DIR}. " "Run `python ml/train_xlmr.py` to fine-tune the model first." ) self._tokenizer = None self._model = None # ── Lazy load ───────────────────────────────────────────────────────────── def _ensure_loaded(self) -> None: if self._model is not None: return try: from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch self._torch = torch logger.info("Loading XLM-RoBERTa from %s …", MODEL_DIR) self._tokenizer = AutoTokenizer.from_pretrained(str(MODEL_DIR)) self._model = AutoModelForSequenceClassification.from_pretrained( str(MODEL_DIR), num_labels=NUM_LABELS, ) self._model.eval() logger.info("XLM-RoBERTa loaded — device: %s", self._device) except Exception as exc: logger.exception("Failed to load XLM-RoBERTa model: %s", exc) raise @property def _device(self) -> str: try: import torch if torch.backends.mps.is_available(): return "mps" except Exception: pass try: import torch if torch.cuda.is_available(): return "cuda" except Exception: pass return "cpu" # ── Saliency: attention-based token importance ──────────────────────────── def _salient_tokens( self, input_ids, # (1, seq_len) torch.Tensor attentions, # tuple of (1, heads, seq_len, seq_len) per layer n: int = 5, ) -> list[str]: """ Average last-layer attention from CLS → all tokens. Returns top-N decoded sub-word tokens as human-readable strings. Strips the sentencepiece ▁ prefix and SFX tokens. """ import torch last_layer_attn = attentions[-1] # (1, heads, seq, seq) cls_attn = last_layer_attn[0, :, 0, :].mean(0) # (seq,) — avg over heads seq_len = cls_attn.shape[-1] tokens = self._tokenizer.convert_ids_to_tokens( input_ids[0].tolist()[:seq_len] ) # Score each token; skip special tokens scored = [] for i, (tok, score) in enumerate(zip(tokens, cls_attn.tolist())): if tok in ("", "", "", ""): continue clean = tok.lstrip("▁").strip() if len(clean) >= 3 and clean.isalpha(): scored.append((clean, score)) # Sort descending, dedup, return top N seen: set[str] = set() result = [] for word, _ in sorted(scored, key=lambda x: x[1], reverse=True): if word.lower() not in seen: seen.add(word.lower()) result.append(word) if len(result) >= n: break return result # ── Public API (same interface as TFIDFClassifier) ──────────────────────── def predict_probs(self, text: str): """Return raw softmax probability tensor for ensemble averaging.""" self._ensure_loaded() import torch encoding = self._tokenizer( text, truncation=True, max_length=MAX_LENGTH, return_tensors="pt", ) with torch.no_grad(): outputs = self._model( input_ids=encoding["input_ids"], attention_mask=encoding["attention_mask"], output_attentions=True, ) return torch.softmax(outputs.logits[0], dim=-1), outputs.attentions, encoding["input_ids"] def predict(self, text: str) -> Layer1Result: self._ensure_loaded() import torch encoding = self._tokenizer( text, truncation=True, max_length=MAX_LENGTH, return_tensors="pt", ) input_ids = encoding["input_ids"] attention_mask = encoding["attention_mask"] with torch.no_grad(): outputs = self._model( input_ids=input_ids, attention_mask=attention_mask, output_attentions=True, ) logits = outputs.logits[0] # (num_labels,) probs = torch.softmax(logits, dim=-1) pred_label = int(probs.argmax().item()) confidence = round(float(probs[pred_label].item()) * 100, 1) verdict = LABEL_NAMES[pred_label] # SDPA attention doesn't return attentions; fallback to empty triggered = self._salient_tokens(input_ids, outputs.attentions) if outputs.attentions else [] return Layer1Result( verdict=verdict, confidence=confidence, triggered_features=triggered, )