fix: add remote code for AutoModel.from_pretrained(trust_remote_code=True)

Browse files

Files changed (4) hide show

__init__.py +3 -0
config.json +6 -2
configuration_unicosys.py +67 -0
modeling_unicosys.py +305 -0

__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+"""Unicosys Hypergraph Knowledge Model — HuggingFace remote code."""
+from .configuration_unicosys import UnicosysConfig
+from .modeling_unicosys import UnicosysHypergraphModel

config.json CHANGED Viewed

@@ -51,5 +51,9 @@
   "text_num_heads": 4,
   "text_num_layers": 2,
   "text_vocab_size": 219,
-  "transformers_version": "5.3.0"
-}

   "text_num_heads": 4,
   "text_num_layers": 2,
   "text_vocab_size": 219,
+  "transformers_version": "5.3.0",
+  "auto_map": {
+    "AutoConfig": "configuration_unicosys.UnicosysConfig",
+    "AutoModel": "modeling_unicosys.UnicosysHypergraphModel"
+  }
+}

configuration_unicosys.py ADDED Viewed

	@@ -0,0 +1,67 @@

+"""Unicosys Hypergraph Knowledge Model — Configuration."""
+from transformers import PretrainedConfig
+class UnicosysConfig(PretrainedConfig):
+    """HuggingFace-compatible config for the Unicosys knowledge model."""
+    model_type = "unicosys_hypergraph"
+    def __init__(
+        self,
+        # Graph structure
+        num_node_types: int = 8,
+        num_edge_types: int = 15,
+        num_subsystems: int = 6,
+        max_nodes: int = 250000,
+        # Embedding dimensions
+        node_embed_dim: int = 128,
+        text_embed_dim: int = 256,
+        hidden_dim: int = 256,
+        # Transformer text encoder
+        text_vocab_size: int = 32000,
+        text_max_length: int = 128,
+        text_num_heads: int = 4,
+        text_num_layers: int = 2,
+        # Graph attention
+        gat_num_heads: int = 4,
+        gat_num_layers: int = 2,
+        gat_dropout: float = 0.1,
+        # Training
+        negative_sample_ratio: int = 5,
+        margin: float = 1.0,
+        # Metadata
+        case_number: str = "2025-137857",
+        num_entities: int = 0,
+        num_evidence: int = 0,
+        num_cross_links: int = 0,
+        node_type_vocab: dict = None,
+        edge_type_vocab: dict = None,
+        subsystem_vocab: dict = None,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.num_node_types = num_node_types
+        self.num_edge_types = num_edge_types
+        self.num_subsystems = num_subsystems
+        self.max_nodes = max_nodes
+        self.node_embed_dim = node_embed_dim
+        self.text_embed_dim = text_embed_dim
+        self.hidden_dim = hidden_dim
+        self.text_vocab_size = text_vocab_size
+        self.text_max_length = text_max_length
+        self.text_num_heads = text_num_heads
+        self.text_num_layers = text_num_layers
+        self.gat_num_heads = gat_num_heads
+        self.gat_num_layers = gat_num_layers
+        self.gat_dropout = gat_dropout
+        self.negative_sample_ratio = negative_sample_ratio
+        self.margin = margin
+        self.case_number = case_number
+        self.num_entities = num_entities
+        self.num_evidence = num_evidence
+        self.num_cross_links = num_cross_links
+        self.node_type_vocab = node_type_vocab or {}
+        self.edge_type_vocab = edge_type_vocab or {}
+        self.subsystem_vocab = subsystem_vocab or {}

modeling_unicosys.py ADDED Viewed

	@@ -0,0 +1,305 @@

+"""
+Unicosys Hypergraph Knowledge Model
+A trainable knowledge graph embedding model that encodes the unified
+hypergraph (entities, evidence, transactions, communications) as
+learned vector representations.
+Load with:
+    from transformers import AutoConfig, AutoModel
+    config = AutoConfig.from_pretrained("drzo/unicosys-hypergraph", trust_remote_code=True)
+    model = AutoModel.from_pretrained("drzo/unicosys-hypergraph", trust_remote_code=True)
+"""
+import json
+import math
+from typing import Optional
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from transformers import PreTrainedModel
+from .configuration_unicosys import UnicosysConfig
+# ---------------------------------------------------------------------------
+# Text Encoder (lightweight)
+# ---------------------------------------------------------------------------
+class LightweightTextEncoder(nn.Module):
+    """A small transformer encoder for node labels and descriptions."""
+    def __init__(self, config: UnicosysConfig):
+        super().__init__()
+        self.token_embed = nn.Embedding(config.text_vocab_size, config.text_embed_dim)
+        self.pos_embed = nn.Embedding(config.text_max_length, config.text_embed_dim)
+        encoder_layer = nn.TransformerEncoderLayer(
+            d_model=config.text_embed_dim,
+            nhead=config.text_num_heads,
+            dim_feedforward=config.text_embed_dim * 4,
+            dropout=config.gat_dropout,
+            batch_first=True,
+        )
+        self.encoder = nn.TransformerEncoder(
+            encoder_layer, num_layers=config.text_num_layers
+        )
+        self.pool_proj = nn.Linear(config.text_embed_dim, config.hidden_dim)
+    def forward(self, input_ids, attention_mask=None):
+        B, L = input_ids.shape
+        positions = torch.arange(L, device=input_ids.device).unsqueeze(0).expand(B, -1)
+        x = self.token_embed(input_ids) + self.pos_embed(positions)
+        if attention_mask is not None:
+            src_key_padding_mask = attention_mask == 0
+        else:
+            src_key_padding_mask = None
+        x = self.encoder(x, src_key_padding_mask=src_key_padding_mask)
+        if attention_mask is not None:
+            mask = attention_mask.unsqueeze(-1).float()
+            pooled = (x * mask).sum(dim=1) / mask.sum(dim=1).clamp(min=1)
+        else:
+            pooled = x.mean(dim=1)
+        return self.pool_proj(pooled)
+# ---------------------------------------------------------------------------
+# Graph Attention Layer
+# ---------------------------------------------------------------------------
+class GraphAttentionLayer(nn.Module):
+    """Multi-head graph attention for hypergraph node updates."""
+    def __init__(self, config: UnicosysConfig):
+        super().__init__()
+        self.num_heads = config.gat_num_heads
+        self.head_dim = config.hidden_dim // config.gat_num_heads
+        assert self.head_dim * self.num_heads == config.hidden_dim
+        self.q_proj = nn.Linear(config.hidden_dim, config.hidden_dim)
+        self.k_proj = nn.Linear(config.hidden_dim, config.hidden_dim)
+        self.v_proj = nn.Linear(config.hidden_dim, config.hidden_dim)
+        self.edge_proj = nn.Linear(config.node_embed_dim, config.hidden_dim)
+        self.out_proj = nn.Linear(config.hidden_dim, config.hidden_dim)
+        self.norm = nn.LayerNorm(config.hidden_dim)
+        self.dropout = nn.Dropout(config.gat_dropout)
+    def forward(self, node_embeds, edge_index, edge_type_embeds):
+        N = node_embeds.size(0)
+        src, tgt = edge_index
+        q = self.q_proj(node_embeds[tgt])
+        k = self.k_proj(node_embeds[src])
+        v = self.v_proj(node_embeds[src])
+        edge_bias = self.edge_proj(edge_type_embeds)
+        k = k + edge_bias
+        q = q.view(-1, self.num_heads, self.head_dim)
+        k = k.view(-1, self.num_heads, self.head_dim)
+        v = v.view(-1, self.num_heads, self.head_dim)
+        attn = (q * k).sum(dim=-1) / math.sqrt(self.head_dim)
+        attn_max = torch.zeros(N, self.num_heads, device=attn.device)
+        attn_max.scatter_reduce_(0, tgt.unsqueeze(1).expand_as(attn), attn, reduce="amax")
+        attn = torch.exp(attn - attn_max[tgt])
+        attn_sum = torch.zeros(N, self.num_heads, device=attn.device)
+        attn_sum.scatter_add_(0, tgt.unsqueeze(1).expand_as(attn), attn)
+        attn = attn / attn_sum[tgt].clamp(min=1e-8)
+        attn = self.dropout(attn)
+        weighted = v * attn.unsqueeze(-1)
+        weighted = weighted.view(-1, self.num_heads * self.head_dim)
+        out = torch.zeros(N, self.num_heads * self.head_dim, device=weighted.device)
+        out.scatter_add_(0, tgt.unsqueeze(1).expand_as(weighted), weighted)
+        out = self.out_proj(out)
+        return self.norm(node_embeds + out)
+# ---------------------------------------------------------------------------
+# Link Prediction Head
+# ---------------------------------------------------------------------------
+class LinkPredictionHead(nn.Module):
+    """Scores candidate edges for link prediction training."""
+    def __init__(self, config: UnicosysConfig):
+        super().__init__()
+        self.edge_type_embed = nn.Embedding(config.num_edge_types, config.hidden_dim)
+        self.scorer = nn.Sequential(
+            nn.Linear(config.hidden_dim * 3, config.hidden_dim),
+            nn.ReLU(),
+            nn.Dropout(config.gat_dropout),
+            nn.Linear(config.hidden_dim, 1),
+        )
+    def forward(self, src_embeds, tgt_embeds, edge_type_ids):
+        edge_embeds = self.edge_type_embed(edge_type_ids)
+        combined = torch.cat([src_embeds, tgt_embeds, edge_embeds], dim=-1)
+        return self.scorer(combined).squeeze(-1)
+# ---------------------------------------------------------------------------
+# Main Model
+# ---------------------------------------------------------------------------
+class UnicosysHypergraphModel(PreTrainedModel):
+    """
+    Unicosys Hypergraph Knowledge Model.
+    Encodes the unified hypergraph as trainable embeddings with:
+    - Node type + subsystem structural embeddings
+    - Text-based semantic embeddings from labels/descriptions
+    - Graph attention for relational reasoning
+    - Link prediction for discovering missing evidence connections
+    Usage:
+        from transformers import AutoConfig, AutoModel
+        config = AutoConfig.from_pretrained("drzo/unicosys-hypergraph", trust_remote_code=True)
+        model = AutoModel.from_pretrained("drzo/unicosys-hypergraph", trust_remote_code=True)
+    """
+    config_class = UnicosysConfig
+    def __init__(self, config: UnicosysConfig):
+        super().__init__(config)
+        # Structural embeddings
+        self.node_type_embed = nn.Embedding(config.num_node_types, config.node_embed_dim)
+        self.subsystem_embed = nn.Embedding(config.num_subsystems, config.node_embed_dim)
+        self.node_id_embed = nn.Embedding(config.max_nodes, config.node_embed_dim)
+        # Project structural features to hidden dim
+        self.struct_proj = nn.Linear(config.node_embed_dim * 3, config.hidden_dim)
+        # Text encoder for labels
+        self.text_encoder = LightweightTextEncoder(config)
+        # Combine structural + text
+        self.combine_proj = nn.Linear(config.hidden_dim * 2, config.hidden_dim)
+        self.combine_norm = nn.LayerNorm(config.hidden_dim)
+        # Graph attention layers
+        self.gat_layers = nn.ModuleList([
+            GraphAttentionLayer(config) for _ in range(config.gat_num_layers)
+        ])
+        # Edge type embeddings for GAT
+        self.edge_type_embed_gat = nn.Embedding(
+            config.num_edge_types, config.node_embed_dim
+        )
+        # Link prediction head
+        self.link_predictor = LinkPredictionHead(config)
+        # Initialize weights
+        self.apply(self._init_weights)
+    def _init_weights(self, module):
+        if isinstance(module, nn.Linear):
+            nn.init.xavier_uniform_(module.weight)
+            if module.bias is not None:
+                nn.init.zeros_(module.bias)
+        elif isinstance(module, nn.Embedding):
+            nn.init.normal_(module.weight, mean=0.0, std=0.02)
+    def encode_nodes(
+        self,
+        node_ids: torch.LongTensor,
+        node_type_ids: torch.LongTensor,
+        subsystem_ids: torch.LongTensor,
+        text_input_ids: Optional[torch.LongTensor] = None,
+        text_attention_mask: Optional[torch.LongTensor] = None,
+    ) -> torch.Tensor:
+        """Encode nodes into dense vectors of shape (N, hidden_dim)."""
+        struct = torch.cat([
+            self.node_id_embed(node_ids),
+            self.node_type_embed(node_type_ids),
+            self.subsystem_embed(subsystem_ids),
+        ], dim=-1)
+        struct = self.struct_proj(struct)
+        if text_input_ids is not None:
+            text = self.text_encoder(text_input_ids, text_attention_mask)
+            combined = torch.cat([struct, text], dim=-1)
+            return self.combine_norm(self.combine_proj(combined))
+        else:
+            zeros = torch.zeros_like(struct)
+            combined = torch.cat([struct, zeros], dim=-1)
+            return self.combine_norm(self.combine_proj(combined))
+    def forward(
+        self,
+        node_ids: torch.LongTensor,
+        node_type_ids: torch.LongTensor,
+        subsystem_ids: torch.LongTensor,
+        edge_index: torch.LongTensor,
+        edge_type_ids: torch.LongTensor,
+        text_input_ids: Optional[torch.LongTensor] = None,
+        text_attention_mask: Optional[torch.LongTensor] = None,
+        pos_edge_index: Optional[torch.LongTensor] = None,
+        pos_edge_types: Optional[torch.LongTensor] = None,
+        neg_edge_index: Optional[torch.LongTensor] = None,
+        neg_edge_types: Optional[torch.LongTensor] = None,
+        labels: Optional[torch.FloatTensor] = None,
+    ):
+        """
+        Forward pass with optional link prediction training.
+        Returns dict with:
+            - node_embeddings: (N, hidden_dim)
+            - loss: scalar (if labels provided)
+            - pos_scores: scores for positive edges
+            - neg_scores: scores for negative edges
+        """
+        # 1. Encode all nodes
+        node_embeds = self.encode_nodes(
+            node_ids, node_type_ids, subsystem_ids,
+            text_input_ids, text_attention_mask,
+        )
+        # 2. Graph attention message passing
+        edge_type_embeds = self.edge_type_embed_gat(edge_type_ids)
+        for gat_layer in self.gat_layers:
+            node_embeds = gat_layer(node_embeds, edge_index, edge_type_embeds)
+        result = {"node_embeddings": node_embeds}
+        # 3. Link prediction (if training edges provided)
+        if pos_edge_index is not None and neg_edge_index is not None:
+            pos_src, pos_tgt = pos_edge_index
+            neg_src, neg_tgt = neg_edge_index
+            pos_scores = self.link_predictor(
+                node_embeds[pos_src], node_embeds[pos_tgt], pos_edge_types
+            )
+            neg_scores = self.link_predictor(
+                node_embeds[neg_src], node_embeds[neg_tgt], neg_edge_types
+            )
+            result["pos_scores"] = pos_scores
+            result["neg_scores"] = neg_scores
+            if labels is not None:
+                loss = F.margin_ranking_loss(
+                    pos_scores, neg_scores,
+                    torch.ones_like(pos_scores),
+                    margin=self.config.margin,
+                )
+                result["loss"] = loss
+        return result
+    def get_node_embedding(self, node_idx: int) -> torch.Tensor:
+        """Get the embedding for a single node by index."""
+        with torch.no_grad():
+            return self.node_id_embed.weight[node_idx]