Token Classification
Transformers
PyTorch
English
funding-extraction
arxiv
scholarly-communication
span-extraction
modernbert
Instructions to use cometadata/funding-extraction-modernbert-base-spanhead with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use cometadata/funding-extraction-modernbert-base-spanhead with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("token-classification", model="cometadata/funding-extraction-modernbert-base-spanhead")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("cometadata/funding-extraction-modernbert-base-spanhead", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| """Custom model class for funding-extraction-modernbert-base-spanhead. | |
| Usage: | |
| import torch | |
| from huggingface_hub import hf_hub_download | |
| from transformers import AutoTokenizer | |
| from modeling import SpanHead | |
| REPO = "cometadata/funding-extraction-modernbert-base-spanhead" | |
| tokenizer = AutoTokenizer.from_pretrained(REPO) | |
| model = SpanHead().to("cuda") | |
| sd = torch.load(hf_hub_download(REPO, "pytorch_model.bin"), | |
| map_location="cuda", weights_only=True) | |
| model.load_state_dict(sd) | |
| model.eval() | |
| """ | |
| import torch | |
| import torch.nn as nn | |
| from transformers import AutoModel | |
| class SpanHead(nn.Module): | |
| """ModernBERT-base encoder + start/end/no-answer heads for funding span extraction.""" | |
| def __init__(self, base: str = "answerdotai/ModernBERT-base"): | |
| super().__init__() | |
| self.encoder = AutoModel.from_pretrained(base) | |
| h = self.encoder.config.hidden_size # 768 | |
| self.start_head = nn.Linear(h, 1) | |
| self.end_head = nn.Linear(h, 1) | |
| self.no_answer_head = nn.Linear(h, 1) | |
| self.dropout = nn.Dropout(0.1) | |
| def forward(self, input_ids, attention_mask): | |
| out = self.encoder(input_ids=input_ids, attention_mask=attention_mask) | |
| hidden = self.dropout(out.last_hidden_state) | |
| start_logits = self.start_head(hidden).squeeze(-1) | |
| end_logits = self.end_head(hidden).squeeze(-1) | |
| # Mean-pool for no-answer | |
| mask = attention_mask.unsqueeze(-1).float() | |
| pooled = (out.last_hidden_state * mask).sum(1) / mask.sum(1).clamp(min=1) | |
| no_answer = self.no_answer_head(pooled).squeeze(-1) | |
| return start_logits, end_logits, no_answer | |