Text Classification
Transformers
Safetensors
English
shannon2
image-feature-extraction
finance
news
macro
financial-news
custom_code
Instructions to use BinomialTechnologies/binomial-shannon-2 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use BinomialTechnologies/binomial-shannon-2 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-classification", model="BinomialTechnologies/binomial-shannon-2", trust_remote_code=True)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("BinomialTechnologies/binomial-shannon-2", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
| """Self-contained model class for binomial-shannon-2. | |
| Distributed alongside the weights on HuggingFace Hub so anyone can do: | |
| from transformers import AutoTokenizer, AutoModel | |
| tok = AutoTokenizer.from_pretrained("BinomialTechnologies/binomial-shannon-2") | |
| model = AutoModel.from_pretrained("BinomialTechnologies/binomial-shannon-2", | |
| trust_remote_code=True) | |
| Imports only from `transformers` + `torch` — no project-internal dependencies. | |
| Module names match the training checkpoint so weights load verbatim. | |
| Architecture: | |
| shared encoder | |
| ↓ (CLS + masked mean pool concatenated) | |
| ↓ 2-way router (ticker | macro) | |
| ↓ ticker head bank (19 outputs) + macro head bank (35 outputs) | |
| Router mode_prob over {ticker, macro} | |
| Ticker bank event(10) + tone + implied_direction + novelty | |
| + claim(4) + specificity + materiality (19, = shannon-1) | |
| Macro bank topic(18) + directional_read + severity(5) | |
| + novelty_macro(3) + claim_macro(4) + hawkish_dovish(5) (35) | |
| """ | |
| from __future__ import annotations | |
| from dataclasses import dataclass | |
| from typing import Optional | |
| import torch | |
| import torch.nn as nn | |
| import torch.nn.functional as F | |
| from transformers import AutoConfig | |
| from transformers.modeling_utils import PreTrainedModel | |
| from transformers.modeling_outputs import ModelOutput | |
| from .configuration_shannon2 import ( | |
| Shannon2Config, EVENTS, CLAIM_TYPES, TOPICS, SEVERITY_BUCKETS, | |
| NOVELTY_BUCKETS_MACRO, CLAIM_TYPES_MACRO, HAWKISH_DOVISH_BUCKETS, | |
| ) | |
| N_EVENTS = len(EVENTS) | |
| N_CLAIMS_TICKER = len(CLAIM_TYPES) | |
| N_TOPICS = len(TOPICS) | |
| N_SEVERITY = len(SEVERITY_BUCKETS) | |
| N_NOVELTY_MACRO = len(NOVELTY_BUCKETS_MACRO) | |
| N_CLAIMS_MACRO = len(CLAIM_TYPES_MACRO) | |
| N_HD = len(HAWKISH_DOVISH_BUCKETS) | |
| MODE_TICKER = 0 | |
| MODE_MACRO = 1 | |
| class Shannon2Output(ModelOutput): | |
| mode_logits: Optional[torch.Tensor] = None | |
| # ticker bank | |
| event_logits: Optional[torch.Tensor] = None | |
| tone: Optional[torch.Tensor] = None | |
| implied_direction: Optional[torch.Tensor] = None | |
| novelty: Optional[torch.Tensor] = None | |
| claim_logits: Optional[torch.Tensor] = None | |
| specificity: Optional[torch.Tensor] = None | |
| materiality_if_true: Optional[torch.Tensor] = None | |
| # macro bank | |
| topic_logits: Optional[torch.Tensor] = None | |
| directional_read: Optional[torch.Tensor] = None | |
| severity_logits: Optional[torch.Tensor] = None | |
| novelty_macro_logits: Optional[torch.Tensor] = None | |
| claim_macro_logits: Optional[torch.Tensor] = None | |
| hawkish_dovish_logits: Optional[torch.Tensor] = None | |
| class Shannon2MultiHead(PreTrainedModel): | |
| config_class = Shannon2Config | |
| base_model_prefix = "shannon2" | |
| _tied_weights_keys: list = [] | |
| all_tied_weights_keys: dict = {} | |
| def __init__(self, config: Shannon2Config) -> None: | |
| super().__init__(config) | |
| self.config = config | |
| # Rebuild the encoder from the bundled config so loading works offline. | |
| if hasattr(config, "encoder_config") and config.encoder_config: | |
| from transformers.models.auto.configuration_auto import CONFIG_MAPPING | |
| mtype = config.encoder_config.get("model_type") | |
| if mtype and mtype in CONFIG_MAPPING: | |
| enc_cfg = CONFIG_MAPPING[mtype].from_dict(config.encoder_config) | |
| else: | |
| enc_cfg = AutoConfig.from_pretrained(config.encoder_name_or_path) | |
| else: | |
| enc_cfg = AutoConfig.from_pretrained(config.encoder_name_or_path) | |
| if config.max_position_embeddings > getattr(enc_cfg, "max_position_embeddings", 8192): | |
| enc_cfg.max_position_embeddings = config.max_position_embeddings | |
| from transformers import AutoModel as _AutoModel | |
| self.encoder = _AutoModel.from_config(enc_cfg, attn_implementation="sdpa") | |
| H = enc_cfg.hidden_size | |
| head_in = 2 * H | |
| h1, h2 = config.head_h1, config.head_h2 | |
| d = config.dropout | |
| self.dropout = nn.Dropout(d) | |
| def _mlp(out_dim: int) -> nn.Sequential: | |
| return nn.Sequential( | |
| nn.Linear(head_in, h1), nn.GELU(), nn.Dropout(d), | |
| nn.Linear(h1, h2), nn.GELU(), nn.Dropout(d), | |
| nn.Linear(h2, out_dim), | |
| ) | |
| # Router | |
| self.head_router = _mlp(2) | |
| # Ticker bank | |
| self.head_event = _mlp(N_EVENTS) | |
| self.head_tone = _mlp(1) | |
| self.head_implied_direction = _mlp(1) | |
| self.head_novelty = _mlp(1) | |
| self.head_claim = _mlp(N_CLAIMS_TICKER) | |
| self.head_specificity = _mlp(1) | |
| self.head_materiality = _mlp(1) | |
| # Macro bank | |
| self.head_topic = _mlp(N_TOPICS) | |
| self.head_directional_read = _mlp(1) | |
| self.head_severity = _mlp(N_SEVERITY) | |
| self.head_novelty_macro = _mlp(N_NOVELTY_MACRO) | |
| self.head_claim_macro = _mlp(N_CLAIMS_MACRO) | |
| self.head_hawkish_dovish = _mlp(N_HD) | |
| def _pool(self, last_hidden: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor: | |
| cls = last_hidden[:, 0] | |
| m = attention_mask.unsqueeze(-1).to(last_hidden.dtype) | |
| mean_pool = (last_hidden * m).sum(1) / m.sum(1).clamp(min=1.0) | |
| return self.dropout(torch.cat([cls, mean_pool], dim=-1)) | |
| def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, | |
| **kwargs) -> Shannon2Output: | |
| enc = self.encoder(input_ids=input_ids, attention_mask=attention_mask) | |
| pooled = self._pool(enc.last_hidden_state, attention_mask) | |
| return Shannon2Output( | |
| mode_logits=self.head_router(pooled), | |
| event_logits=self.head_event(pooled), | |
| tone=self.head_tone(pooled).squeeze(-1), | |
| implied_direction=self.head_implied_direction(pooled).squeeze(-1), | |
| novelty=self.head_novelty(pooled).squeeze(-1), | |
| claim_logits=self.head_claim(pooled), | |
| specificity=self.head_specificity(pooled).squeeze(-1), | |
| materiality_if_true=self.head_materiality(pooled).squeeze(-1), | |
| topic_logits=self.head_topic(pooled), | |
| directional_read=self.head_directional_read(pooled).squeeze(-1), | |
| severity_logits=self.head_severity(pooled), | |
| novelty_macro_logits=self.head_novelty_macro(pooled), | |
| claim_macro_logits=self.head_claim_macro(pooled), | |
| hawkish_dovish_logits=self.head_hawkish_dovish(pooled), | |
| ) | |
| def predict(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, | |
| mention_threshold: float = 0.5) -> dict: | |
| out = self.forward(input_ids=input_ids, attention_mask=attention_mask) | |
| ev_prob = torch.sigmoid(out.event_logits) | |
| return { | |
| "mode_prob": F.softmax(out.mode_logits, dim=-1), | |
| # ticker | |
| "event_prob": ev_prob, | |
| "event_mentioned": (ev_prob >= mention_threshold).float(), | |
| "tone": out.tone.clamp(-1.0, 1.0), | |
| "implied_direction": out.implied_direction.clamp(-1.0, 1.0), | |
| "novelty": out.novelty.clamp(0.0, 1.0), | |
| "claim_prob": F.softmax(out.claim_logits, dim=-1), | |
| "specificity": out.specificity.clamp(0.0, 1.0), | |
| "materiality_if_true": out.materiality_if_true.clamp(0.0, 1.0), | |
| # macro | |
| "topic_prob": F.softmax(out.topic_logits, dim=-1), | |
| "directional_read": out.directional_read.clamp(-1.0, 1.0), | |
| "severity_prob": F.softmax(out.severity_logits, dim=-1), | |
| "novelty_macro_prob": F.softmax(out.novelty_macro_logits, dim=-1), | |
| "claim_macro_prob": F.softmax(out.claim_macro_logits, dim=-1), | |
| "hawkish_dovish_prob": F.softmax(out.hawkish_dovish_logits, dim=-1), | |
| } | |
| def gradient_checkpointing_enable(self, gradient_checkpointing_kwargs=None): | |
| if hasattr(self.encoder, "gradient_checkpointing_enable"): | |
| self.encoder.gradient_checkpointing_enable( | |
| gradient_checkpointing_kwargs=gradient_checkpointing_kwargs) | |
| def gradient_checkpointing_disable(self): | |
| if hasattr(self.encoder, "gradient_checkpointing_disable"): | |
| self.encoder.gradient_checkpointing_disable() | |