HavelockAI
/

bert-token-classifier

@@ -38,26 +38,29 @@ This model performs multi-label span-level detection of 53 rhetorical marker typ
 | Min examples | 150 (types below this threshold excluded) |
 ## Usage
 ```python
 import json
 import torch
-from transformers import AutoTokenizer
-from estimators.tokens.model import MultiLabelTokenClassifier
-model_path = "models/bert_token_classifier"
-tokenizer = AutoTokenizer.from_pretrained(model_path)
-model = MultiLabelTokenClassifier.load(model_path, device="cpu")
 model.eval()
-type_to_idx = json.loads((model_path / "type_to_idx.json").read_text())
 idx_to_type = {v: k for k, v in type_to_idx.items()}
 text = "Tell me, O Muse, of that ingenious hero who travelled far and wide"
 inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=128)
 with torch.no_grad():
-    logits = model(inputs["input_ids"], inputs["attention_mask"])
-    preds = logits.argmax(dim=-1)  # (1, seq, num_types)
 tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
 for i, token in enumerate(tokens):
@@ -70,6 +73,8 @@ for i, token in enumerate(tokens):
         print(f"{token:15} {', '.join(active)}")
 ```
 ## Training Data
 - Sources: Project Gutenberg, textfiles.com, Reddit, Wikipedia talk pages
@@ -233,4 +238,4 @@ classifier.bias    → randomly initialized
 ---
-*Trained: February 2026*

 | Min examples | 150 (types below this threshold excluded) |
 ## Usage
 ```python
 import json
 import torch
+from transformers import AutoModel, AutoTokenizer
+from huggingface_hub import hf_hub_download
+model_name = "HavelockAI/bert-token-classifier"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
 model.eval()
+# Load marker type map
+type_map_path = hf_hub_download(model_name, "type_to_idx.json")
+type_to_idx = json.loads(open(type_map_path).read())
 idx_to_type = {v: k for k, v in type_to_idx.items()}
 text = "Tell me, O Muse, of that ingenious hero who travelled far and wide"
 inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=128)
 with torch.no_grad():
+    logits = model(**inputs)  # (1, seq_len, num_types, 3)
+    preds = logits.argmax(dim=-1)  # (1, seq_len, num_types)
 tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
 for i, token in enumerate(tokens):
         print(f"{token:15} {', '.join(active)}")
 ```
+> **Note:** This model uses a custom architecture (`HavelockTokenClassifier`) with independent B/I/O heads per marker type, enabling overlapping span detection. Loading requires `trust_remote_code=True`.
 ## Training Data
 - Sources: Project Gutenberg, textfiles.com, Reddit, Wikipedia talk pages
 ---
+*Trained: February 2026*

config.json CHANGED Viewed

@@ -349,5 +349,9 @@
     "O-oral_vocative": 156,
     "B-oral_vocative": 157,
     "I-oral_vocative": 158
   }
 }

     "O-oral_vocative": 156,
     "B-oral_vocative": 157,
     "I-oral_vocative": 158
+  },
+  "num_types": 53,
+  "auto_map": {
+    "AutoModel": "modeling_havelock.HavelockTokenClassifier"
   }
 }

modeling_havelock.py ADDED Viewed

+"""Custom multi-label token classifier for HuggingFace Hub."""
+import torch
+import torch.nn as nn
+from transformers import BertPreTrainedModel, AutoModel
+class HavelockTokenClassifier(BertPreTrainedModel):
+    """Multi-label BIO token classifier with independent O/B/I heads per marker type.
+    Each token gets num_types independent 3-way classifications, allowing
+    overlapping spans (e.g. a token simultaneously B-anaphora and I-concessive).
+    Output logits shape: (batch, seq_len, num_types, 3)
+    """
+    def __init__(self, config):
+        super().__init__(config)
+        self.num_types = config.num_types
+        self.bert = AutoModel.from_config(config)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+        self.classifier = nn.Linear(config.hidden_size, config.num_types * 3)
+        self.post_init()
+    def forward(self, input_ids, attention_mask=None, **kwargs):
+        hidden = self.bert(
+            input_ids=input_ids, attention_mask=attention_mask
+        ).last_hidden_state
+        hidden = self.dropout(hidden)
+        logits = self.classifier(hidden)
+        batch, seq, _ = logits.shape
+        return logits.view(batch, seq, self.num_types, 3)