import torch import torch.nn as nn import torch.nn.functional as F from transformers import AutoTokenizer, AutoModel, AutoConfig from huggingface_hub import hf_hub_download import gradio as gr REPO_ID = "mental/mental-roberta-base" # backbone HF_REPO_ID = "vimdhayak/neda-learned-negation" # ← same as upload repo ID2LABEL = {0: 'Anxious', 1: 'Depressed', 2: 'Frustrated', 3: 'Others', 4: 'Suicidal'} LABEL2ID = {'Anxious': 0, 'Depressed': 1, 'Frustrated': 2, 'Others': 3, 'Suicidal': 4} NUM_LABELS = 5 MAX_LENGTH = 256 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # ── Architecture (must match training) ─────────────────────── class NegationAwareContextualModulation(nn.Module): def __init__(self, H, cw=3, dp=0.1): super().__init__() self.neg_detector = nn.Sequential(nn.Linear(H, H//2), nn.GELU(), nn.Linear(H//2, 1)) self.neg_embed = nn.Embedding(2, H) self.context_attn = nn.MultiheadAttention(H, 4, dropout=dp, batch_first=True) self.gate = nn.Sequential(nn.Linear(3*H, H), nn.GELU(), nn.Linear(H, 1), nn.Sigmoid()) self.neg_transform = nn.Sequential(nn.Linear(H, H), nn.GELU(), nn.Dropout(dp), nn.Linear(H, H)) self.neg_aux_head = nn.Linear(H, 1) def forward(self, h, mask): p = torch.sigmoid(self.neg_detector(h)) emb = (1-p)*self.neg_embed.weight[0] + p*self.neg_embed.weight[1] h = h + emb ctx, _ = self.context_attn(h, h, h, key_padding_mask=(mask==0)) g = self.gate(torch.cat([h, ctx, emb], -1)) h = h + p * g * (self.neg_transform(h) - h) return h, self.neg_aux_head(h[:,0,:]).squeeze(-1) class CrossLayerGatedAttentionFusion(nn.Module): def __init__(self, H, nl=4, nh=4, dp=0.1): super().__init__() self.lw = nn.Parameter(torch.ones(nl)/nl) self.cross_attn = nn.MultiheadAttention(H, nh, dropout=dp, batch_first=True) self.gate_proj = nn.Linear(2*H, H) self.ln = nn.LayerNorm(H) def forward(self, layers, mask): w = F.softmax(self.lw, 0) kv = (torch.stack(layers,0)*w.view(-1,1,1,1)).sum(0) a, _ = self.cross_attn(layers[-1], kv, kv, key_padding_mask=(mask==0)) g = torch.sigmoid(self.gate_proj(torch.cat([a, layers[-1]], -1))) return self.ln(g*a + (1-g)*layers[-1]) class MultiGranularityPooling(nn.Module): def __init__(self, H, k=4, dp=0.1): super().__init__() self.k = k self.attn_pool = nn.Sequential(nn.Linear(H,H), nn.Tanh(), nn.Linear(H,1)) self.sent_attn = nn.Sequential(nn.Linear(H,H//2), nn.Tanh(), nn.Linear(H//2,1)) self.fusion = nn.Sequential(nn.Linear(3*H,H), nn.LayerNorm(H), nn.GELU(), nn.Dropout(dp)) def forward(self, h, mask): B, L, H = h.shape cls = h[:,0,:] sc = self.attn_pool(h).squeeze(-1).masked_fill(mask==0, float("-inf")) ar = (h * F.softmax(sc,-1).unsqueeze(-1)).sum(1) cs = L // self.k sents = [] for i in range(self.k): s, e = i*cs, (i+1)*cs if i