drzo commited on
Commit
6db96db
·
verified ·
1 Parent(s): 539df93

fix: add remote code for AutoModel.from_pretrained(trust_remote_code=True)

Browse files
Files changed (4) hide show
  1. __init__.py +3 -0
  2. config.json +6 -2
  3. configuration_unicosys.py +67 -0
  4. modeling_unicosys.py +305 -0
__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ """Unicosys Hypergraph Knowledge Model — HuggingFace remote code."""
2
+ from .configuration_unicosys import UnicosysConfig
3
+ from .modeling_unicosys import UnicosysHypergraphModel
config.json CHANGED
@@ -51,5 +51,9 @@
51
  "text_num_heads": 4,
52
  "text_num_layers": 2,
53
  "text_vocab_size": 219,
54
- "transformers_version": "5.3.0"
55
- }
 
 
 
 
 
51
  "text_num_heads": 4,
52
  "text_num_layers": 2,
53
  "text_vocab_size": 219,
54
+ "transformers_version": "5.3.0",
55
+ "auto_map": {
56
+ "AutoConfig": "configuration_unicosys.UnicosysConfig",
57
+ "AutoModel": "modeling_unicosys.UnicosysHypergraphModel"
58
+ }
59
+ }
configuration_unicosys.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Unicosys Hypergraph Knowledge Model — Configuration."""
2
+
3
+ from transformers import PretrainedConfig
4
+
5
+
6
+ class UnicosysConfig(PretrainedConfig):
7
+ """HuggingFace-compatible config for the Unicosys knowledge model."""
8
+
9
+ model_type = "unicosys_hypergraph"
10
+
11
+ def __init__(
12
+ self,
13
+ # Graph structure
14
+ num_node_types: int = 8,
15
+ num_edge_types: int = 15,
16
+ num_subsystems: int = 6,
17
+ max_nodes: int = 250000,
18
+ # Embedding dimensions
19
+ node_embed_dim: int = 128,
20
+ text_embed_dim: int = 256,
21
+ hidden_dim: int = 256,
22
+ # Transformer text encoder
23
+ text_vocab_size: int = 32000,
24
+ text_max_length: int = 128,
25
+ text_num_heads: int = 4,
26
+ text_num_layers: int = 2,
27
+ # Graph attention
28
+ gat_num_heads: int = 4,
29
+ gat_num_layers: int = 2,
30
+ gat_dropout: float = 0.1,
31
+ # Training
32
+ negative_sample_ratio: int = 5,
33
+ margin: float = 1.0,
34
+ # Metadata
35
+ case_number: str = "2025-137857",
36
+ num_entities: int = 0,
37
+ num_evidence: int = 0,
38
+ num_cross_links: int = 0,
39
+ node_type_vocab: dict = None,
40
+ edge_type_vocab: dict = None,
41
+ subsystem_vocab: dict = None,
42
+ **kwargs,
43
+ ):
44
+ super().__init__(**kwargs)
45
+ self.num_node_types = num_node_types
46
+ self.num_edge_types = num_edge_types
47
+ self.num_subsystems = num_subsystems
48
+ self.max_nodes = max_nodes
49
+ self.node_embed_dim = node_embed_dim
50
+ self.text_embed_dim = text_embed_dim
51
+ self.hidden_dim = hidden_dim
52
+ self.text_vocab_size = text_vocab_size
53
+ self.text_max_length = text_max_length
54
+ self.text_num_heads = text_num_heads
55
+ self.text_num_layers = text_num_layers
56
+ self.gat_num_heads = gat_num_heads
57
+ self.gat_num_layers = gat_num_layers
58
+ self.gat_dropout = gat_dropout
59
+ self.negative_sample_ratio = negative_sample_ratio
60
+ self.margin = margin
61
+ self.case_number = case_number
62
+ self.num_entities = num_entities
63
+ self.num_evidence = num_evidence
64
+ self.num_cross_links = num_cross_links
65
+ self.node_type_vocab = node_type_vocab or {}
66
+ self.edge_type_vocab = edge_type_vocab or {}
67
+ self.subsystem_vocab = subsystem_vocab or {}
modeling_unicosys.py ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Unicosys Hypergraph Knowledge Model
3
+
4
+ A trainable knowledge graph embedding model that encodes the unified
5
+ hypergraph (entities, evidence, transactions, communications) as
6
+ learned vector representations.
7
+
8
+ Load with:
9
+ from transformers import AutoConfig, AutoModel
10
+ config = AutoConfig.from_pretrained("drzo/unicosys-hypergraph", trust_remote_code=True)
11
+ model = AutoModel.from_pretrained("drzo/unicosys-hypergraph", trust_remote_code=True)
12
+ """
13
+
14
+ import json
15
+ import math
16
+ from typing import Optional
17
+
18
+ import torch
19
+ import torch.nn as nn
20
+ import torch.nn.functional as F
21
+ from transformers import PreTrainedModel
22
+
23
+ from .configuration_unicosys import UnicosysConfig
24
+
25
+
26
+ # ---------------------------------------------------------------------------
27
+ # Text Encoder (lightweight)
28
+ # ---------------------------------------------------------------------------
29
+
30
+ class LightweightTextEncoder(nn.Module):
31
+ """A small transformer encoder for node labels and descriptions."""
32
+
33
+ def __init__(self, config: UnicosysConfig):
34
+ super().__init__()
35
+ self.token_embed = nn.Embedding(config.text_vocab_size, config.text_embed_dim)
36
+ self.pos_embed = nn.Embedding(config.text_max_length, config.text_embed_dim)
37
+
38
+ encoder_layer = nn.TransformerEncoderLayer(
39
+ d_model=config.text_embed_dim,
40
+ nhead=config.text_num_heads,
41
+ dim_feedforward=config.text_embed_dim * 4,
42
+ dropout=config.gat_dropout,
43
+ batch_first=True,
44
+ )
45
+ self.encoder = nn.TransformerEncoder(
46
+ encoder_layer, num_layers=config.text_num_layers
47
+ )
48
+ self.pool_proj = nn.Linear(config.text_embed_dim, config.hidden_dim)
49
+
50
+ def forward(self, input_ids, attention_mask=None):
51
+ B, L = input_ids.shape
52
+ positions = torch.arange(L, device=input_ids.device).unsqueeze(0).expand(B, -1)
53
+ x = self.token_embed(input_ids) + self.pos_embed(positions)
54
+
55
+ if attention_mask is not None:
56
+ src_key_padding_mask = attention_mask == 0
57
+ else:
58
+ src_key_padding_mask = None
59
+
60
+ x = self.encoder(x, src_key_padding_mask=src_key_padding_mask)
61
+
62
+ if attention_mask is not None:
63
+ mask = attention_mask.unsqueeze(-1).float()
64
+ pooled = (x * mask).sum(dim=1) / mask.sum(dim=1).clamp(min=1)
65
+ else:
66
+ pooled = x.mean(dim=1)
67
+
68
+ return self.pool_proj(pooled)
69
+
70
+
71
+ # ---------------------------------------------------------------------------
72
+ # Graph Attention Layer
73
+ # ---------------------------------------------------------------------------
74
+
75
+ class GraphAttentionLayer(nn.Module):
76
+ """Multi-head graph attention for hypergraph node updates."""
77
+
78
+ def __init__(self, config: UnicosysConfig):
79
+ super().__init__()
80
+ self.num_heads = config.gat_num_heads
81
+ self.head_dim = config.hidden_dim // config.gat_num_heads
82
+ assert self.head_dim * self.num_heads == config.hidden_dim
83
+
84
+ self.q_proj = nn.Linear(config.hidden_dim, config.hidden_dim)
85
+ self.k_proj = nn.Linear(config.hidden_dim, config.hidden_dim)
86
+ self.v_proj = nn.Linear(config.hidden_dim, config.hidden_dim)
87
+ self.edge_proj = nn.Linear(config.node_embed_dim, config.hidden_dim)
88
+ self.out_proj = nn.Linear(config.hidden_dim, config.hidden_dim)
89
+ self.norm = nn.LayerNorm(config.hidden_dim)
90
+ self.dropout = nn.Dropout(config.gat_dropout)
91
+
92
+ def forward(self, node_embeds, edge_index, edge_type_embeds):
93
+ N = node_embeds.size(0)
94
+ src, tgt = edge_index
95
+
96
+ q = self.q_proj(node_embeds[tgt])
97
+ k = self.k_proj(node_embeds[src])
98
+ v = self.v_proj(node_embeds[src])
99
+
100
+ edge_bias = self.edge_proj(edge_type_embeds)
101
+ k = k + edge_bias
102
+
103
+ q = q.view(-1, self.num_heads, self.head_dim)
104
+ k = k.view(-1, self.num_heads, self.head_dim)
105
+ v = v.view(-1, self.num_heads, self.head_dim)
106
+
107
+ attn = (q * k).sum(dim=-1) / math.sqrt(self.head_dim)
108
+
109
+ attn_max = torch.zeros(N, self.num_heads, device=attn.device)
110
+ attn_max.scatter_reduce_(0, tgt.unsqueeze(1).expand_as(attn), attn, reduce="amax")
111
+ attn = torch.exp(attn - attn_max[tgt])
112
+
113
+ attn_sum = torch.zeros(N, self.num_heads, device=attn.device)
114
+ attn_sum.scatter_add_(0, tgt.unsqueeze(1).expand_as(attn), attn)
115
+ attn = attn / attn_sum[tgt].clamp(min=1e-8)
116
+ attn = self.dropout(attn)
117
+
118
+ weighted = v * attn.unsqueeze(-1)
119
+ weighted = weighted.view(-1, self.num_heads * self.head_dim)
120
+
121
+ out = torch.zeros(N, self.num_heads * self.head_dim, device=weighted.device)
122
+ out.scatter_add_(0, tgt.unsqueeze(1).expand_as(weighted), weighted)
123
+
124
+ out = self.out_proj(out)
125
+ return self.norm(node_embeds + out)
126
+
127
+
128
+ # ---------------------------------------------------------------------------
129
+ # Link Prediction Head
130
+ # ---------------------------------------------------------------------------
131
+
132
+ class LinkPredictionHead(nn.Module):
133
+ """Scores candidate edges for link prediction training."""
134
+
135
+ def __init__(self, config: UnicosysConfig):
136
+ super().__init__()
137
+ self.edge_type_embed = nn.Embedding(config.num_edge_types, config.hidden_dim)
138
+ self.scorer = nn.Sequential(
139
+ nn.Linear(config.hidden_dim * 3, config.hidden_dim),
140
+ nn.ReLU(),
141
+ nn.Dropout(config.gat_dropout),
142
+ nn.Linear(config.hidden_dim, 1),
143
+ )
144
+
145
+ def forward(self, src_embeds, tgt_embeds, edge_type_ids):
146
+ edge_embeds = self.edge_type_embed(edge_type_ids)
147
+ combined = torch.cat([src_embeds, tgt_embeds, edge_embeds], dim=-1)
148
+ return self.scorer(combined).squeeze(-1)
149
+
150
+
151
+ # ---------------------------------------------------------------------------
152
+ # Main Model
153
+ # ---------------------------------------------------------------------------
154
+
155
+ class UnicosysHypergraphModel(PreTrainedModel):
156
+ """
157
+ Unicosys Hypergraph Knowledge Model.
158
+
159
+ Encodes the unified hypergraph as trainable embeddings with:
160
+ - Node type + subsystem structural embeddings
161
+ - Text-based semantic embeddings from labels/descriptions
162
+ - Graph attention for relational reasoning
163
+ - Link prediction for discovering missing evidence connections
164
+
165
+ Usage:
166
+ from transformers import AutoConfig, AutoModel
167
+ config = AutoConfig.from_pretrained("drzo/unicosys-hypergraph", trust_remote_code=True)
168
+ model = AutoModel.from_pretrained("drzo/unicosys-hypergraph", trust_remote_code=True)
169
+ """
170
+
171
+ config_class = UnicosysConfig
172
+
173
+ def __init__(self, config: UnicosysConfig):
174
+ super().__init__(config)
175
+
176
+ # Structural embeddings
177
+ self.node_type_embed = nn.Embedding(config.num_node_types, config.node_embed_dim)
178
+ self.subsystem_embed = nn.Embedding(config.num_subsystems, config.node_embed_dim)
179
+ self.node_id_embed = nn.Embedding(config.max_nodes, config.node_embed_dim)
180
+
181
+ # Project structural features to hidden dim
182
+ self.struct_proj = nn.Linear(config.node_embed_dim * 3, config.hidden_dim)
183
+
184
+ # Text encoder for labels
185
+ self.text_encoder = LightweightTextEncoder(config)
186
+
187
+ # Combine structural + text
188
+ self.combine_proj = nn.Linear(config.hidden_dim * 2, config.hidden_dim)
189
+ self.combine_norm = nn.LayerNorm(config.hidden_dim)
190
+
191
+ # Graph attention layers
192
+ self.gat_layers = nn.ModuleList([
193
+ GraphAttentionLayer(config) for _ in range(config.gat_num_layers)
194
+ ])
195
+
196
+ # Edge type embeddings for GAT
197
+ self.edge_type_embed_gat = nn.Embedding(
198
+ config.num_edge_types, config.node_embed_dim
199
+ )
200
+
201
+ # Link prediction head
202
+ self.link_predictor = LinkPredictionHead(config)
203
+
204
+ # Initialize weights
205
+ self.apply(self._init_weights)
206
+
207
+ def _init_weights(self, module):
208
+ if isinstance(module, nn.Linear):
209
+ nn.init.xavier_uniform_(module.weight)
210
+ if module.bias is not None:
211
+ nn.init.zeros_(module.bias)
212
+ elif isinstance(module, nn.Embedding):
213
+ nn.init.normal_(module.weight, mean=0.0, std=0.02)
214
+
215
+ def encode_nodes(
216
+ self,
217
+ node_ids: torch.LongTensor,
218
+ node_type_ids: torch.LongTensor,
219
+ subsystem_ids: torch.LongTensor,
220
+ text_input_ids: Optional[torch.LongTensor] = None,
221
+ text_attention_mask: Optional[torch.LongTensor] = None,
222
+ ) -> torch.Tensor:
223
+ """Encode nodes into dense vectors of shape (N, hidden_dim)."""
224
+ struct = torch.cat([
225
+ self.node_id_embed(node_ids),
226
+ self.node_type_embed(node_type_ids),
227
+ self.subsystem_embed(subsystem_ids),
228
+ ], dim=-1)
229
+ struct = self.struct_proj(struct)
230
+
231
+ if text_input_ids is not None:
232
+ text = self.text_encoder(text_input_ids, text_attention_mask)
233
+ combined = torch.cat([struct, text], dim=-1)
234
+ return self.combine_norm(self.combine_proj(combined))
235
+ else:
236
+ zeros = torch.zeros_like(struct)
237
+ combined = torch.cat([struct, zeros], dim=-1)
238
+ return self.combine_norm(self.combine_proj(combined))
239
+
240
+ def forward(
241
+ self,
242
+ node_ids: torch.LongTensor,
243
+ node_type_ids: torch.LongTensor,
244
+ subsystem_ids: torch.LongTensor,
245
+ edge_index: torch.LongTensor,
246
+ edge_type_ids: torch.LongTensor,
247
+ text_input_ids: Optional[torch.LongTensor] = None,
248
+ text_attention_mask: Optional[torch.LongTensor] = None,
249
+ pos_edge_index: Optional[torch.LongTensor] = None,
250
+ pos_edge_types: Optional[torch.LongTensor] = None,
251
+ neg_edge_index: Optional[torch.LongTensor] = None,
252
+ neg_edge_types: Optional[torch.LongTensor] = None,
253
+ labels: Optional[torch.FloatTensor] = None,
254
+ ):
255
+ """
256
+ Forward pass with optional link prediction training.
257
+
258
+ Returns dict with:
259
+ - node_embeddings: (N, hidden_dim)
260
+ - loss: scalar (if labels provided)
261
+ - pos_scores: scores for positive edges
262
+ - neg_scores: scores for negative edges
263
+ """
264
+ # 1. Encode all nodes
265
+ node_embeds = self.encode_nodes(
266
+ node_ids, node_type_ids, subsystem_ids,
267
+ text_input_ids, text_attention_mask,
268
+ )
269
+
270
+ # 2. Graph attention message passing
271
+ edge_type_embeds = self.edge_type_embed_gat(edge_type_ids)
272
+ for gat_layer in self.gat_layers:
273
+ node_embeds = gat_layer(node_embeds, edge_index, edge_type_embeds)
274
+
275
+ result = {"node_embeddings": node_embeds}
276
+
277
+ # 3. Link prediction (if training edges provided)
278
+ if pos_edge_index is not None and neg_edge_index is not None:
279
+ pos_src, pos_tgt = pos_edge_index
280
+ neg_src, neg_tgt = neg_edge_index
281
+
282
+ pos_scores = self.link_predictor(
283
+ node_embeds[pos_src], node_embeds[pos_tgt], pos_edge_types
284
+ )
285
+ neg_scores = self.link_predictor(
286
+ node_embeds[neg_src], node_embeds[neg_tgt], neg_edge_types
287
+ )
288
+
289
+ result["pos_scores"] = pos_scores
290
+ result["neg_scores"] = neg_scores
291
+
292
+ if labels is not None:
293
+ loss = F.margin_ranking_loss(
294
+ pos_scores, neg_scores,
295
+ torch.ones_like(pos_scores),
296
+ margin=self.config.margin,
297
+ )
298
+ result["loss"] = loss
299
+
300
+ return result
301
+
302
+ def get_node_embedding(self, node_idx: int) -> torch.Tensor:
303
+ """Get the embedding for a single node by index."""
304
+ with torch.no_grad():
305
+ return self.node_id_embed.weight[node_idx]