amuzetnoM
/

gladius-v2-kernel

Model card Files Files and versions

gladius-v2-kernel / kernel /tools.py

amuzetnoM's picture

WYRM kernel source (v27 FINAL)

9463e5c verified about 1 month ago

history blame contribute delete

4.11 kB

	"""
	GLADIUS v2.0 — Tool Cortex

	Tools as embeddings in the same vector space as vocabulary.
	Tool activation via cosine similarity threshold — no JSON, no parsing.
	SLA2 hybrid attention over tool registry at every layer (via kernel).

	STUB VERSION — tool registry exists but no real tools are connected.
	"""

	import torch
	import torch.nn as nn
	import torch.nn.functional as F

	from .config import KernelConfig


	class ToolCortex(nn.Module):
	"""
	Tool understanding via shared embedding space.

	Tools live in the same manifold as tokens. When a hidden state
	is close enough to a tool embedding (cosine sim > threshold),
	the tool activates. No special syntax needed.

	argmax_tool S(tool \| hidden_state) where S = cosine_similarity
	"""

	def __init__(self, config: KernelConfig):
	super().__init__()
	self.config = config

	# Tool embeddings: same dimension as token embeddings
	self.tool_embeddings = nn.Parameter(
	torch.randn(config.max_tools, config.hidden_dim) * 0.02
	)

	# Tool activation gate (refines raw cosine similarity)
	self.activation_gate = nn.Sequential(
	nn.Linear(config.hidden_dim * 2, config.hidden_dim // 2),
	nn.SiLU(),
	nn.Linear(config.hidden_dim // 2, 1),
	nn.Sigmoid(),
	)

	# Tool result projection (maps tool output back to hidden space)
	self.result_proj = nn.Linear(config.hidden_dim, config.hidden_dim, bias=False)

	# Registry metadata (not learned — set at runtime)
	self.register_buffer('tool_active', torch.zeros(config.max_tools, dtype=torch.bool))
	self.num_registered = 0

	def register_grid_tools(self):
	"""Register ARC grid manipulation tools (rotate, flip, fill, etc.)."""
	grid_tool_names = ['rotate', 'flip_h', 'flip_v', 'fill', 'crop', 'tile', 'recolor', 'overlay']
	for i, name in enumerate(grid_tool_names):
	if i < self.config.max_tools:
	with torch.no_grad():
	self.tool_embeddings.data[i] = torch.randn(self.config.hidden_dim) * 0.01
	self.tool_active[i] = True
	self.num_registered += 1

	def register_tool(self, tool_id: int, description_embedding: torch.Tensor):
	"""
	Register a tool by initializing its embedding.

	In the full system, description_embedding comes from encoding
	the tool's natural language description through the shared embeddings.
	"""
	with torch.no_grad():
	self.tool_embeddings.data[tool_id] = description_embedding
	self.tool_active[tool_id] = True
	self.num_registered += 1

	def check_activation(self, hidden: torch.Tensor) -> torch.Tensor \| None:
	"""
	Check if any tool should activate based on hidden state similarity.

	Args:
	hidden: (batch, seq_len, hidden_dim)
	Returns:
	tool_contribution: (batch, seq_len, hidden_dim) or None
	"""
	if self.num_registered == 0:
	return None

	# Pool hidden state
	pooled = hidden.mean(dim=1) # (B, D)

	# Cosine similarity with all tool embeddings
	pooled_norm = F.normalize(pooled, dim=-1)
	tools_norm = F.normalize(self.tool_embeddings, dim=-1)
	similarities = torch.matmul(pooled_norm, tools_norm.T) # (B, max_tools)

	# Mask inactive tools
	similarities = similarities.masked_fill(~self.tool_active.unsqueeze(0), -1.0)

	# Find best matching tool
	best_sim, best_tool = similarities.max(dim=-1) # (B,)

	# Check threshold
	if best_sim.max().item() < self.config.tool_activation_threshold:
	return None

	# Tool activated — in full system, this would invoke the tool
	# STUB: return the tool embedding as contribution
	tool_embed = self.tool_embeddings[best_tool] # (B, D)
	contribution = self.result_proj(tool_embed)

	# Broadcast across sequence
	return contribution.unsqueeze(1).expand_as(hidden)