| """ |
| GLADIUS v2.0 β Tool Cortex |
| |
| Tools as embeddings in the same vector space as vocabulary. |
| Tool activation via cosine similarity threshold β no JSON, no parsing. |
| SLA2 hybrid attention over tool registry at every layer (via kernel). |
| |
| STUB VERSION β tool registry exists but no real tools are connected. |
| """ |
|
|
| import torch |
| import torch.nn as nn |
| import torch.nn.functional as F |
|
|
| from .config import KernelConfig |
|
|
|
|
| class ToolCortex(nn.Module): |
| """ |
| Tool understanding via shared embedding space. |
| |
| Tools live in the same manifold as tokens. When a hidden state |
| is close enough to a tool embedding (cosine sim > threshold), |
| the tool activates. No special syntax needed. |
| |
| argmax_tool S(tool | hidden_state) where S = cosine_similarity |
| """ |
|
|
| def __init__(self, config: KernelConfig): |
| super().__init__() |
| self.config = config |
|
|
| |
| self.tool_embeddings = nn.Parameter( |
| torch.randn(config.max_tools, config.hidden_dim) * 0.02 |
| ) |
|
|
| |
| self.activation_gate = nn.Sequential( |
| nn.Linear(config.hidden_dim * 2, config.hidden_dim // 2), |
| nn.SiLU(), |
| nn.Linear(config.hidden_dim // 2, 1), |
| nn.Sigmoid(), |
| ) |
|
|
| |
| self.result_proj = nn.Linear(config.hidden_dim, config.hidden_dim, bias=False) |
|
|
| |
| self.register_buffer('tool_active', torch.zeros(config.max_tools, dtype=torch.bool)) |
| self.num_registered = 0 |
|
|
| def register_grid_tools(self): |
| """Register ARC grid manipulation tools (rotate, flip, fill, etc.).""" |
| grid_tool_names = ['rotate', 'flip_h', 'flip_v', 'fill', 'crop', 'tile', 'recolor', 'overlay'] |
| for i, name in enumerate(grid_tool_names): |
| if i < self.config.max_tools: |
| with torch.no_grad(): |
| self.tool_embeddings.data[i] = torch.randn(self.config.hidden_dim) * 0.01 |
| self.tool_active[i] = True |
| self.num_registered += 1 |
|
|
| def register_tool(self, tool_id: int, description_embedding: torch.Tensor): |
| """ |
| Register a tool by initializing its embedding. |
| |
| In the full system, description_embedding comes from encoding |
| the tool's natural language description through the shared embeddings. |
| """ |
| with torch.no_grad(): |
| self.tool_embeddings.data[tool_id] = description_embedding |
| self.tool_active[tool_id] = True |
| self.num_registered += 1 |
|
|
| def check_activation(self, hidden: torch.Tensor) -> torch.Tensor | None: |
| """ |
| Check if any tool should activate based on hidden state similarity. |
| |
| Args: |
| hidden: (batch, seq_len, hidden_dim) |
| Returns: |
| tool_contribution: (batch, seq_len, hidden_dim) or None |
| """ |
| if self.num_registered == 0: |
| return None |
|
|
| |
| pooled = hidden.mean(dim=1) |
|
|
| |
| pooled_norm = F.normalize(pooled, dim=-1) |
| tools_norm = F.normalize(self.tool_embeddings, dim=-1) |
| similarities = torch.matmul(pooled_norm, tools_norm.T) |
|
|
| |
| similarities = similarities.masked_fill(~self.tool_active.unsqueeze(0), -1.0) |
|
|
| |
| best_sim, best_tool = similarities.max(dim=-1) |
|
|
| |
| if best_sim.max().item() < self.config.tool_activation_threshold: |
| return None |
|
|
| |
| |
| tool_embed = self.tool_embeddings[best_tool] |
| contribution = self.result_proj(tool_embed) |
|
|
| |
| return contribution.unsqueeze(1).expand_as(hidden) |
|
|