ech0-prime-agi / core /engine.py
workofarttattoo's picture
Upload folder using huggingface_hub
f3dce3d verified
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from typing import List, Dict, Optional, Tuple
from torch.distributions import Normal, kl_divergence
class NeuralCorticalLevel(nn.Module):
"""
A cortical level with learnable neural network parameters.
Implements predictive coding with proper generative functions.
"""
def __init__(self, level_id: int, input_dim: int, output_dim: int, hidden_dim: int, name: str):
super().__init__()
self.level_id = level_id
self.name = name
self.input_dim = input_dim
self.output_dim = output_dim
# Encoder: bottom-up (error to expectation)
self.encoder = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.LayerNorm(hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim),
nn.LayerNorm(hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, output_dim)
)
# Decoder: top-down (expectation to prediction)
self.decoder = nn.Sequential(
nn.Linear(output_dim, hidden_dim),
nn.LayerNorm(hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim),
nn.LayerNorm(hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, input_dim)
)
# Precision (uncertainty) network
self.precision_net = nn.Sequential(
nn.Linear(output_dim, hidden_dim // 2),
nn.ReLU(),
nn.Linear(hidden_dim // 2, input_dim),
nn.Softplus() # Ensure positive precision
)
# Prior distribution parameters (learnable)
self.prior_mu = nn.Parameter(torch.zeros(output_dim))
self.prior_logvar = nn.Parameter(torch.zeros(output_dim))
# Current state
self.expectation = None
self.prediction = None
self.error = None
self.precision = None
def encode(self, input_data: torch.Tensor) -> torch.Tensor:
"""Bottom-up: encode input to expectation"""
return self.encoder(input_data)
def decode(self, expectation: torch.Tensor) -> torch.Tensor:
"""Top-down: decode expectation to prediction"""
return self.decoder(expectation)
def compute_precision(self, expectation: torch.Tensor) -> torch.Tensor:
"""Compute precision (inverse variance) from expectation"""
return self.precision_net(expectation) + 1e-6 # Add small epsilon for stability
def forward(self, input_data: torch.Tensor, top_down_prediction: Optional[torch.Tensor] = None):
"""
Forward pass through the cortical level.
Args:
input_data: Bottom-up input (from lower level or sensory)
top_down_prediction: Top-down prediction from higher level (optional)
"""
# Encode input to expectation
self.expectation = self.encode(input_data)
# Generate top-down prediction if not provided
if top_down_prediction is None:
self.prediction = self.decode(self.expectation)
else:
self.prediction = top_down_prediction
# Compute prediction error
self.error = input_data - self.prediction
# Compute precision
self.precision = self.compute_precision(self.expectation)
return self.expectation, self.error, self.precision
def get_prior_distribution(self):
"""Get prior distribution for KL divergence calculation"""
return Normal(self.prior_mu, torch.exp(0.5 * self.prior_logvar))
def get_posterior_distribution(self):
"""Get posterior distribution (approximated as Normal)"""
if self.expectation is None:
return None
# Approximate posterior as Normal with mean=expectation
# In full implementation, this would use learned variance
posterior_var = 1.0 / (self.precision.mean() + 1e-6)
return Normal(self.expectation, torch.sqrt(posterior_var))
class HierarchicalGenerativeModel(nn.Module):
"""
A hierarchy of NeuralCorticalLevel objects with proper message passing.
Level 0: Sensory (V1/A1) - 1M dim
Level 1: Perceptual (Object/Word recognition) - 100K dim
Level 2: Associative (Relations/Grammar) - 10K dim
Level 3: Prefrontal (Plans/Narratives) - 1K dim
Level 4: Meta-cortex (Strategy/Self-model) - 100 dim
"""
def __init__(self, use_cuda: bool = False, lightweight: bool = False):
super().__init__()
self.use_cuda = use_cuda and torch.cuda.is_available()
self.device = torch.device("cuda" if self.use_cuda else "cpu")
self.lightweight = lightweight
# Define dimensions for each level
if lightweight:
# Drastically reduced dimensions for lightweight mode (e.g. for testing/debugging)
dims = [
(1000, 512, "Sensory"), # Level 0: 1K -> 512
(512, 256, "Perceptual"), # Level 1: 512 -> 256
(256, 128, "Associative"), # Level 2: 256 -> 128
(128, 64, "Prefrontal"), # Level 3: 128 -> 64
(64, 10, "Meta") # Level 4: 64 -> 10
]
else:
# Original dimensions (potentially very large)
dims = [
(1000000, 100000, "Sensory"), # Level 0: 1M -> 100K
(100000, 10000, "Perceptual"), # Level 1: 100K -> 10K
(10000, 1000, "Associative"), # Level 2: 10K -> 1K
(1000, 100, "Prefrontal"), # Level 3: 1K -> 100
(100, 10, "Meta") # Level 4: 100 -> 10
]
# Create neural cortical levels
self.levels = nn.ModuleList()
for i, (input_dim, output_dim, name) in enumerate(dims):
hidden_dim = max(256, output_dim // 4) # Adaptive hidden dimension
level = NeuralCorticalLevel(
level_id=i,
input_dim=input_dim,
output_dim=output_dim,
hidden_dim=hidden_dim,
name=name
)
self.levels.append(level)
self.to(self.device)
def step(self, sensory_input: Optional[torch.Tensor] = None):
"""
A single cycle of predictive processing.
1. Top-down: Level L predicts Level L-1.
2. Bottom-up: Level L-1 sends error to Level L.
3. Local: Update expectations to minimize error.
"""
# Base dimension for level 0
base_dim = self.levels[0].input_dim
# Initialize sensory input if not provided
if sensory_input is None:
sensory_input = torch.zeros(base_dim, device=self.device)
else:
# Ensure it's on the right device and right size
if isinstance(sensory_input, np.ndarray):
sensory_input = torch.from_numpy(sensory_input).float()
sensory_input = sensory_input.to(self.device)
# Ensure 1D for consistent processing
if sensory_input.dim() > 1:
sensory_input = sensory_input.view(-1)
# Pad or truncate to match expected size
if sensory_input.size(0) < base_dim:
sensory_input = F.pad(sensory_input, (0, base_dim - sensory_input.size(0)))
elif sensory_input.size(0) > base_dim:
sensory_input = sensory_input[:base_dim]
# Bottom-up pass: encode from sensory to meta
current_input = sensory_input
expectations = []
for i, level in enumerate(self.levels):
# level(current_input) calls forward()
expectation, error, precision = level(current_input)
expectations.append(expectation)
current_input = expectation # Next level's input is this level's expectation
# Top-down pass: decode from meta to sensory
top_down_predictions = []
for i in range(len(self.levels) - 1, -1, -1):
level = self.levels[i]
# Each level i decodes its own expectation to predict its own input
# (which is the output/expectation of level i-1)
prediction = level.decode(expectations[i])
top_down_predictions.insert(0, prediction)
# Recompute with top-down predictions
for i, level in enumerate(self.levels):
if i == 0:
level_input = sensory_input
else:
level_input = expectations[i-1]
if i < len(top_down_predictions):
top_down = top_down_predictions[i]
# Resize top-down to match level input if needed
if top_down.size(0) != level_input.size(0):
# Robust resizing using interpolation
top_down = F.interpolate(
top_down.unsqueeze(0).unsqueeze(0),
size=level_input.size(0),
mode='linear',
align_corners=False
).view(-1)
level(level_input, top_down_prediction=top_down)
else:
level(level_input)
return expectations
def forward(self, sensory_input: Optional[torch.Tensor] = None):
"""Forward pass through the hierarchy"""
return self.step(sensory_input)
class FreeEnergyEngine:
"""
Optimization controller that minimizes Variational Free Energy across the hierarchy.
F = Complexity (KL divergence) - Accuracy (log likelihood)
"""
def __init__(self, model: HierarchicalGenerativeModel, learning_rate: float = 0.001):
self.model = model
self.learning_rate = learning_rate
self.optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
def calculate_free_energy(self, sensory_input: Optional[torch.Tensor] = None) -> float:
"""
Calculates total free energy across all levels as a float.
"""
with torch.no_grad():
fe_tensor = self._compute_free_energy_tensor(sensory_input)
return float(fe_tensor.item())
def _compute_free_energy_tensor(self, sensory_input: Optional[torch.Tensor] = None) -> torch.Tensor:
"""
Calculates total free energy across all levels, preserving the gradient graph.
F = -log p(x|z) + KL(q(z|x) || p(z))
"""
# Ensure sensory input is correctly sized and on device
base_dim = self.model.levels[0].input_dim
if sensory_input is None:
sensory_input = torch.zeros(base_dim, device=self.model.device)
else:
if isinstance(sensory_input, np.ndarray):
sensory_input = torch.from_numpy(sensory_input).float()
sensory_input = sensory_input.to(self.model.device)
if sensory_input.dim() > 1:
sensory_input = sensory_input.view(-1)
if sensory_input.size(0) < base_dim:
sensory_input = F.pad(sensory_input, (0, base_dim - sensory_input.size(0)))
elif sensory_input.size(0) > base_dim:
sensory_input = sensory_input[:base_dim]
# Forward pass (builds the graph)
self.model.step(sensory_input)
total_f = torch.tensor(0.0, device=self.model.device)
for level in self.model.levels:
if level.error is not None and level.precision is not None:
# Accuracy term: precision-weighted squared error
accuracy_term = torch.sum(level.precision * (level.error ** 2))
# Complexity term: KL divergence from prior
prior_dist = level.get_prior_distribution()
posterior_dist = level.get_posterior_distribution()
if posterior_dist is not None:
# KL divergence is summed over the output dimension
kl_term = kl_divergence(posterior_dist, prior_dist).sum()
else:
kl_term = torch.tensor(0.0, device=self.model.device)
total_f = total_f + accuracy_term + kl_term
return total_f
def optimize(self, sensory_input: Optional[torch.Tensor] = None, iterations: int = 5):
"""
Optimize the model to minimize free energy.
"""
self.model.train()
for _ in range(iterations):
self.optimizer.zero_grad()
# Compute free energy tensor (maintaining gradients)
fe_tensor = self._compute_free_energy_tensor(sensory_input)
# Backward and optimize
fe_tensor.backward()
self.optimizer.step()
self.model.eval()
return self.calculate_free_energy(sensory_input)
class GlobalWorkspace:
"""
Simulates the 40Hz thalamocortical resonance for conscious access.
Broadcasts high-precision information across the hierarchy.
"""
def __init__(self, model: HierarchicalGenerativeModel):
self.model = model
self.frequency = 40.0 # Hz
self.synchrony = 0.0
self.broadcast_buffer = None
def broadcast(self):
"""
Synchronizes high-precision signals to the workspace.
Implements competitive selection based on precision.
"""
# Collect high-precision signals from all levels
signals = []
precisions = []
for level in self.model.levels:
if level.expectation is not None and level.precision is not None:
# Weight by precision (high precision = high confidence)
weighted_signal = level.expectation * level.precision.mean()
signals.append(weighted_signal)
precisions.append(level.precision.mean().item())
if signals:
# Competitive selection: signals with highest precision win
max_precision_idx = np.argmax(precisions)
self.broadcast_buffer = signals[max_precision_idx]
# Calculate synchrony (how aligned the signals are)
if len(signals) > 1:
# Compute cosine similarity between top signals
top_signals = sorted(zip(signals, precisions), key=lambda x: x[1], reverse=True)[:3]
if len(top_signals) > 1:
similarities = []
for i in range(len(top_signals) - 1):
s1 = top_signals[i][0]
s2 = top_signals[i+1][0]
# Normalize and compute cosine similarity
s1_norm = s1 / (s1.norm() + 1e-9)
s2_norm = s2 / (s2.norm() + 1e-9)
# Resize to same dimension if needed for dot product
if len(s1_norm) != len(s2_norm):
target_size = max(len(s1_norm), len(s2_norm))
s1_padded = F.pad(s1_norm, (0, target_size - len(s1_norm)))
s2_padded = F.pad(s2_norm, (0, target_size - len(s2_norm)))
sim = torch.dot(s1_padded, s2_padded).item()
else:
sim = torch.dot(s1_norm, s2_norm).item()
similarities.append(sim)
self.synchrony = np.mean(similarities) if similarities else 0.0
else:
self.synchrony = 1.0
else:
self.synchrony = 1.0
else:
self.broadcast_buffer = None
self.synchrony = 0.0
return self.broadcast_buffer, self.synchrony