permutans commited on
Commit
2796c1f
·
verified ·
1 Parent(s): a83fe53

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. modeling_havelock.py +47 -64
modeling_havelock.py CHANGED
@@ -1,75 +1,58 @@
1
- """Demo: score texts on the oral–literate spectrum."""
2
-
3
  import torch
4
- from transformers import AutoModel, AutoTokenizer
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- from estimators.defaults import resolve
 
7
 
8
- TEXTS = {
9
- "Rubio (speech)": (
10
- "National security, which this conference is largely about, is not merely series of technical questions – "
11
- "how much we spend on defense or where, how we deploy it, these are important questions. They are. "
12
- "But they are not the fundamental one. "
13
- "The fundamental question we must answer at the outset is what exactly are we defending, "
14
- "because armies do not fight for abstractions. "
15
- "Armies fight for a people; armies fight for a nation. Armies fight for a way of life."
16
- ),
17
- "Academic (hedged)": (
18
- "The relationship between institutional capacity and policy outcomes remains contested in the literature. "
19
- "While some scholars have argued that bureaucratic autonomy is a necessary condition for effective implementation, "
20
- "others have suggested that the causal mechanism operates primarily through elite consensus rather than "
21
- "organizational structure per se. The evidence presented here, though preliminary, is broadly consistent "
22
- "with the latter interpretation."
23
- ),
24
- "Reddit (casual)": (
25
- "ok so I just tried this and honestly? it's not bad. like yeah the UI is kinda janky and "
26
- "it crashed twice on me but once you get past that the actual features are solid. "
27
- "would I recommend it? ehh maybe wait for the next update but keep an eye on it for sure"
28
- ),
29
- "Legal (formal)": (
30
- "Notwithstanding any provision to the contrary contained herein, the obligations of the parties "
31
- "under this Agreement shall survive termination to the extent necessary to effectuate the purposes "
32
- "of the indemnification, confidentiality, and limitation of liability provisions set forth in "
33
- "Sections 7, 9, and 12 respectively."
34
- ),
35
- "Homer (oral epic)": (
36
- "Sing, O goddess, the anger of Achilles son of Peleus, that brought countless ills upon the Achaeans. "
37
- "Many a brave soul did it send hurrying down to Hades, and many a hero did it yield a prey to dogs "
38
- "and vultures, for so were the counsels of Jove fulfilled from the day on which the son of Atreus, "
39
- "king of men, and great Achilles, first fell out with one another."
40
- ),
41
- }
42
 
 
 
 
43
 
44
- def main():
45
- model_name = resolve("orality")["hub_model"]
46
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
 
 
 
 
47
 
48
- print(f"Loading {model_name} on {device}...")
49
- tokenizer = AutoTokenizer.from_pretrained(model_name)
 
50
 
51
- attn_impl = "flash_attention_2" if torch.cuda.is_available() else "eager"
52
- model = AutoModel.from_pretrained(
53
- model_name,
54
- trust_remote_code=True,
55
- attn_implementation=attn_impl,
56
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
57
- ).to(device)
58
- model.eval()
59
 
60
- print(f"\n{'Text':<25} {'Score':>6} {'Bar'}")
61
- print("=" * 65)
 
 
62
 
63
- for label, text in TEXTS.items():
64
- inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
65
- inputs = {k: v.to(device) for k, v in inputs.items()}
66
- with torch.no_grad():
67
- output = model(**inputs)
68
- score = output.logits.squeeze().item()
69
- score = max(0.0, min(1.0, score))
70
- bar = "█" * int(score * 30) + "░" * (30 - int(score * 30))
71
- print(f"{label:<25} {score:>5.3f} {bar}")
72
 
 
 
 
73
 
74
- if __name__ == "__main__":
75
- main()
 
 
 
1
  import torch
2
+ import torch.nn as nn
3
+ from transformers import AutoModel, ModernBertConfig, ModernBertModel, PretrainedConfig, PreTrainedModel
4
+ from transformers.modeling_outputs import SequenceClassifierOutput
5
+
6
+
7
+ class HavelockOralityConfig(PretrainedConfig):
8
+ model_type = "havelock-orality-regressor"
9
+
10
+ def __init__(self, dropout: float = 0.1, **kwargs):
11
+ super().__init__(**kwargs)
12
+ self.dropout = dropout
13
+
14
 
15
+ class HavelockOralityRegressor(PreTrainedModel):
16
+ config_class = HavelockOralityConfig
17
 
18
+ def __init__(self, config, backbone=None):
19
+ super().__init__(config)
20
+ if backbone is not None:
21
+ self.backbone = backbone
22
+ else:
23
+ backbone_config = ModernBertConfig.from_dict(config.to_dict())
24
+ self.backbone = ModernBertModel(backbone_config)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ self.dropout = nn.Dropout(config.dropout)
27
+ self.regressor = nn.Linear(config.hidden_size, 1)
28
+ self.post_init()
29
 
30
+ @classmethod
31
+ def from_backbone(cls, model_name: str, dropout: float = 0.1) -> "HavelockOralityRegressor":
32
+ backbone = AutoModel.from_pretrained(model_name)
33
+ config = HavelockOralityConfig(
34
+ dropout=dropout,
35
+ **backbone.config.to_dict(),
36
+ )
37
+ return cls(config, backbone=backbone)
38
 
39
+ def _pool(self, last_hidden_state: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
40
+ mask = attention_mask.unsqueeze(-1).float()
41
+ return (last_hidden_state * mask).sum(dim=1) / mask.sum(dim=1).clamp(min=1e-9)
42
 
43
+ def forward(self, input_ids, attention_mask=None, labels=None, **kwargs):
44
+ outputs = self.backbone(input_ids=input_ids, attention_mask=attention_mask)
 
 
 
 
 
 
45
 
46
+ if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
47
+ pooled = outputs.pooler_output
48
+ else:
49
+ pooled = self._pool(outputs.last_hidden_state, attention_mask)
50
 
51
+ pooled = self.dropout(pooled)
52
+ scores = self.regressor(pooled).squeeze(-1)
 
 
 
 
 
 
 
53
 
54
+ loss = None
55
+ if labels is not None:
56
+ loss = nn.MSELoss()(scores, labels)
57
 
58
+ return SequenceClassifierOutput(loss=loss, logits=scores)