Token Classification
Transformers
PyTorch
English
funding-extraction
arxiv
scholarly-communication
span-extraction
modernbert
Instructions to use cometadata/funding-extraction-modernbert-base-spanhead with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use cometadata/funding-extraction-modernbert-base-spanhead with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("token-classification", model="cometadata/funding-extraction-modernbert-base-spanhead")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("cometadata/funding-extraction-modernbert-base-spanhead", dtype="auto") - Notebooks
- Google Colab
- Kaggle
File size: 1,686 Bytes
1e00313 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | """Custom model class for funding-extraction-modernbert-base-spanhead.
Usage:
import torch
from huggingface_hub import hf_hub_download
from transformers import AutoTokenizer
from modeling import SpanHead
REPO = "cometadata/funding-extraction-modernbert-base-spanhead"
tokenizer = AutoTokenizer.from_pretrained(REPO)
model = SpanHead().to("cuda")
sd = torch.load(hf_hub_download(REPO, "pytorch_model.bin"),
map_location="cuda", weights_only=True)
model.load_state_dict(sd)
model.eval()
"""
import torch
import torch.nn as nn
from transformers import AutoModel
class SpanHead(nn.Module):
"""ModernBERT-base encoder + start/end/no-answer heads for funding span extraction."""
def __init__(self, base: str = "answerdotai/ModernBERT-base"):
super().__init__()
self.encoder = AutoModel.from_pretrained(base)
h = self.encoder.config.hidden_size # 768
self.start_head = nn.Linear(h, 1)
self.end_head = nn.Linear(h, 1)
self.no_answer_head = nn.Linear(h, 1)
self.dropout = nn.Dropout(0.1)
def forward(self, input_ids, attention_mask):
out = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
hidden = self.dropout(out.last_hidden_state)
start_logits = self.start_head(hidden).squeeze(-1)
end_logits = self.end_head(hidden).squeeze(-1)
# Mean-pool for no-answer
mask = attention_mask.unsqueeze(-1).float()
pooled = (out.last_hidden_state * mask).sum(1) / mask.sum(1).clamp(min=1)
no_answer = self.no_answer_head(pooled).squeeze(-1)
return start_logits, end_logits, no_answer
|