Upload folder using huggingface_hub

518db7a verified 3 months ago

17.4 kB

	import torch
	import torch.nn as nn
	from typing import Optional, Any, cast
	from ..utils.sleep import EpisodicBuffer, SleepReplayScheduler

	class NeuroscienceEnhancer(nn.Module):
	"""
	Neuroscience-inspired enhancement layer for SEM V6.

	Integrates five biological learning mechanisms:

	1. STDP (Spike-Timing Dependent Plasticity): Temporal causality learning
	where synaptic strength changes based on relative timing of pre- and
	post-synaptic spikes (Bi & Poo, 1998).

	2. Sleep Consolidation: Offline memory replay during designated "sleep"
	phases to strengthen important patterns (Wilson & McNaughton, 1994).

	3. Neuromodulator Dynamics: Three neuromodulators (ACh, NE, 5-HT)
	dynamically adjust learning rates based on context:
	- ACh (Acetylcholine): Enhances plasticity during learning
	- NE (Norepinephrine): Increases exploration during arousal
	- 5-HT (Serotonin): Stabilizes weights during consolidation
	(Sara, 2009; Hasselmo, 2006)

	4. Lateral Inhibition: Winner-take-all competition where highly active
	neurons suppress neighbors to enforce sparsity.

	5. Predictive Coding: Error-driven learning where prediction errors
	propagate backward to update representations (Rao & Ballard, 1999).

	These mechanisms enhance Module C (ChebyKAN propagator) with biological
	realism while maintaining the frozen architecture constraint.

	References:
	- Bi, G., & Poo, M. (1998). Synaptic modifications in cultured
	hippocampal neurons: dependence on spike timing, synaptic strength,
	and postsynaptic cell type. Journal of Neuroscience, 18(24), 10464-10472.
	- Wilson, M. A., & McNaughton, B. L. (1994). Reactivation of hippocampal
	ensemble memories during sleep. Science, 265(5172), 676-679.
	- Rao, R. P., & Ballard, D. H. (1999). Predictive coding in the visual
	cortex: a functional interpretation of some extra-classical
	receptive-field effects. Nature Neuroscience, 2(1), 79-87.
	- Sara, S. J. (2009). The locus coeruleus and noradrenergic modulation
	of cognition. Nature Reviews Neuroscience, 10(3), 211-223.
	- Hasselmo, M. E. (2006). The role of acetylcholine in learning and
	memory. Current Opinion in Neurobiology, 16(6), 710-715.
	"""

	def __init__(
	self,
	manifold_dim: int = 16384,
	sparsity: float = 0.05,
	device: str = "cuda",
	enable_stdp: bool = True,
	enable_sleep: bool = True,
	enable_neuromodulation: bool = True,
	enable_lateral_inhibition: bool = True,
	enable_predictive_coding: bool = True,
	awake_steps: int = 1000,
	sleep_steps: int = 100,
	sleep_replay_batch: int = 32,
	buffer_max_size: int = 1000,
	):
	"""
	Initialize NeuroscienceEnhancer.

	Args:
	manifold_dim: Dimensionality of the hypergraph manifold (default: 16384)
	sparsity: Target sparsity for lateral inhibition (default: 0.05, i.e., 5%)
	device: Device for tensor operations ('cuda' or 'cpu')
	enable_stdp: Enable STDP learning mechanism
	enable_sleep: Enable sleep consolidation system
	enable_neuromodulation: Enable neuromodulator dynamics (ACh, NE, 5-HT)
	enable_lateral_inhibition: Enable lateral inhibition (k-WTA)
	enable_predictive_coding: Enable predictive coding error computation
	awake_steps: Number of training steps per awake phase (default: 1000)
	sleep_steps: Number of replay steps per sleep phase (default: 100)
	sleep_replay_batch: Batch size for sleep replay (default: 32)
	buffer_max_size: Maximum size of episodic replay buffer (default: 1000)
	"""
	super().__init__()

	# GPU requirement check (per CLAUDE.md)
	assert torch.cuda.is_available(), "GPU required for Module E (per CLAUDE.md)"

	self.manifold_dim = manifold_dim
	self.sparsity = sparsity
	self.k = int(manifold_dim * sparsity) # Number of winners for k-WTA
	self.device = torch.device(device)

	# Feature flags
	self.enable_stdp = enable_stdp
	self.enable_sleep = enable_sleep
	self.enable_neuromodulation = enable_neuromodulation
	self.enable_lateral_inhibition = enable_lateral_inhibition
	self.enable_predictive_coding = enable_predictive_coding

	# Sleep/wake cycle scheduler (subtask-2-3)
	self.sleep_scheduler: Optional[SleepReplayScheduler]
	self.episodic_buffer: Optional[EpisodicBuffer]

	if self.enable_sleep:
	self.sleep_scheduler = SleepReplayScheduler(
	awake_steps=awake_steps,
	sleep_steps=sleep_steps,
	sleep_replay_batch=sleep_replay_batch
	)
	# Episodic replay buffer (subtask-2-1)
	self.episodic_buffer = EpisodicBuffer(
	max_size=buffer_max_size,
	device=str(self.device)
	)
	else:
	self.sleep_scheduler = None
	self.episodic_buffer = None

	# Neuromodulator levels (learnable parameters)
	# Initialized to biologically plausible baseline values
	if self.enable_neuromodulation:
	self.ach = nn.Parameter(torch.tensor(1.0, device=self.device)) # Acetylcholine
	self.ne = nn.Parameter(torch.tensor(0.5, device=self.device)) # Norepinephrine
	self.serotonin = nn.Parameter(torch.tensor(0.3, device=self.device)) # Serotonin

	# Placeholder for future components (to be implemented in subsequent subtasks)
	# - STDP learner (subtask-1-2) - to be integrated
	# - Predictive coding error module (subtask-3-3) - to be implemented

	def set_sleep_mode(self, is_sleeping: bool) -> None:
	"""
	Set sleep/wake mode for the enhancer.

	Args:
	is_sleeping: True for sleep mode (offline consolidation),
	False for awake mode (online learning)

	Note:
	When using the sleep scheduler, prefer using step() for automatic
	sleep/wake transitions instead of manually setting sleep mode.
	"""
	# Manual override (bypasses scheduler if enabled)
	if self.enable_sleep and self.sleep_scheduler is not None:
	# Synchronize scheduler state with manual override
	self.sleep_scheduler.awake = not is_sleeping

	def step(self) -> None:
	"""
	Advance one step in sleep/wake cycle.

	Automatically transitions between awake and sleep modes based on
	the configured scheduler. Should be called once per training step.

	Example:
	>>> enhancer = NeuroscienceEnhancer(enable_sleep=True)
	>>> for step in range(10000):
	... if enhancer.is_awake():
	... # Online training
	... loss = train_step(data)
	... enhancer.add_episode(episode)
	... else:
	... # Sleep consolidation
	... replay_batch = enhancer.sample_episodes(batch_size=32)
	... consolidate(replay_batch)
	... enhancer.step()
	"""
	if self.enable_sleep and self.sleep_scheduler is not None:
	self.sleep_scheduler.step()

	def is_awake(self) -> bool:
	"""
	Check if currently in awake (online learning) mode.

	Returns:
	True if awake, False if sleeping
	"""
	if self.enable_sleep and self.sleep_scheduler is not None:
	return self.sleep_scheduler.is_awake()
	return True # Default to awake if sleep disabled

	def is_sleeping(self) -> bool:
	"""
	Check if currently in sleep (offline consolidation) mode.

	Returns:
	True if sleeping, False if awake
	"""
	if self.enable_sleep and self.sleep_scheduler is not None:
	return self.sleep_scheduler.is_sleeping()
	return False # Default to not sleeping if sleep disabled

	def add_episode(self, episode: dict[str, Any]) -> None:
	"""
	Add episode to replay buffer during awake phase.

	Args:
	episode: Episode dictionary containing 'sdr', 'reward', 'timestamp'

	Example:
	>>> episode = {
	... 'sdr': torch.randn(16384, device='cuda'),
	... 'reward': 1.5,
	... 'timestamp': 100.0
	... }
	>>> enhancer.add_episode(episode)
	"""
	if self.enable_sleep and self.episodic_buffer is not None:
	self.episodic_buffer.add(episode)

	def sample_episodes(self, batch_size: int, **kwargs: Any) -> list[dict[str, Any]]:
	"""
	Sample episodes from replay buffer for sleep consolidation.

	Args:
	batch_size: Number of episodes to sample
	**kwargs: Additional arguments passed to buffer.sample()
	(e.g., prioritize=True, reverse_temporal=True)

	Returns:
	List of episode dictionaries

	Example:
	>>> # Sample for reverse temporal replay during sleep
	>>> batch = enhancer.sample_episodes(
	... batch_size=32,
	... reverse_temporal=True
	... )
	"""
	if self.enable_sleep and self.episodic_buffer is not None:
	return self.episodic_buffer.sample(batch_size, **kwargs)
	return []

	def get_phase_progress(self) -> float:
	"""
	Get progress through current sleep/wake phase.

	Returns:
	Progress as fraction [0, 1] (0.0 = phase start, 1.0 = phase end)
	"""
	if self.enable_sleep and self.sleep_scheduler is not None:
	return self.sleep_scheduler.get_phase_progress()
	return 0.0

	def get_neuromodulator_states(self) -> tuple[float, float, float]:
	"""
	Get current neuromodulator levels.

	Returns:
	Tuple of (ACh, NE, Serotonin) levels
	"""
	if self.enable_neuromodulation:
	return (
	self.ach.item(),
	self.ne.item(),
	self.serotonin.item()
	)
	else:
	return (1.0, 0.0, 0.0) # Defaults when neuromodulation disabled

	def compute_effective_learning_rate(self, base_lr: float) -> float:
	"""
	Compute effective learning rate modulated by neuromodulators.

	Formula (per spec):
	lr_effective = base_lr * ach * (1 + ne) * (1 - 0.5 * serotonin)

	Args:
	base_lr: Base learning rate from optimizer

	Returns:
	Effective learning rate after neuromodulator modulation
	"""
	if not self.enable_neuromodulation:
	return base_lr

	# Clamp neuromodulators to safe range [0, 2] to prevent instability
	ach_clamped = torch.clamp(self.ach, 0.0, 2.0)
	ne_clamped = torch.clamp(self.ne, 0.0, 2.0)
	serotonin_clamped = torch.clamp(self.serotonin, 0.0, 2.0)

	lr_effective = base_lr * ach_clamped * (1 + ne_clamped) * (1 - 0.5 * serotonin_clamped)

	return cast(float, lr_effective.item())

	def compute_predictive_coding_error(
	self,
	prediction: torch.Tensor,
	target: torch.Tensor,
	return_magnitude: bool = False,
	) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
	"""
	Compute predictive coding error for error-driven learning.

	Implements Rao & Ballard (1999) predictive coding framework where
	prediction errors drive learning through hierarchical error propagation.
	Error neurons compute the difference between top-down predictions and
	bottom-up sensory input, and these errors are used to update
	representations at each level of the hierarchy.

	Mathematical formulation:
	error = target - prediction
	error_magnitude = \|\|error\|\|_2 (L2 norm)

	In predictive coding, weights are updated proportional to error magnitude:
	Δw ∝ error_magnitude * gradient

	This creates a Bayesian inference framework where:
	- Higher-level predictions influence lower-level representations
	- Prediction errors are minimized through gradient descent
	- Hierarchical structure emerges naturally

	Args:
	prediction: Model's prediction (batch, manifold_dim)
	This represents the top-down prediction from higher
	hierarchical levels
	target: Ground truth target (batch, manifold_dim)
	This represents the bottom-up sensory input or
	desired output from lower hierarchical levels
	return_magnitude: If True, also return L2 norm of error
	(useful for monitoring convergence)

	Returns:
	Tuple of:
	- error: Prediction error tensor (batch, manifold_dim)
	Sign indicates direction of error (target > pred: +, target < pred: -)
	- error_magnitude: L2 norm of error per sample (batch,)
	Only returned if return_magnitude=True, else None

	Example:
	>>> enhancer = NeuroscienceEnhancer(manifold_dim=16384, device='cuda')
	>>> prediction = torch.randn(32, 16384, device='cuda')
	>>> target = torch.randn(32, 16384, device='cuda')
	>>> error, magnitude = enhancer.compute_predictive_coding_error(
	... prediction, target, return_magnitude=True
	... )
	>>> # Use error for weight updates: Δw ∝ error
	>>> # Monitor magnitude to verify error reduction over training

	Note:
	In the hierarchical predictive coding framework:
	- Level N+1 predicts activity at Level N
	- Error at Level N = actual(N) - predicted(N)
	- This error is used to:
	1. Update Level N+1's predictions (top-down)
	2. Update Level N's representations (bottom-up)
	- Iterative minimization of prediction error across hierarchy
	implements Bayesian inference

	Reference:
	Rao, R. P., & Ballard, D. H. (1999). Predictive coding in the visual
	cortex: a functional interpretation of some extra-classical
	receptive-field effects. Nature Neuroscience, 2(1), 79-87.
	"""
	# Compute raw prediction error (target - prediction)
	# This represents the surprise signal that drives learning
	error = target - prediction

	# Optionally compute error magnitude for monitoring convergence
	error_magnitude = None
	if return_magnitude:
	# L2 norm per sample: \|\|error\|\|_2
	# Used to verify that error decreases over training iterations
	# (acceptance criterion from spec)
	error_magnitude = torch.norm(error, p=2, dim=1)

	return error, error_magnitude

	def apply_lateral_inhibition(self, activations: torch.Tensor) -> torch.Tensor:
	"""
	Apply k-Winners-Take-All lateral inhibition (vectorized).

	Keeps only the top-k activations, zeros out the rest to enforce sparsity.

	Args:
	activations: Input activations (batch, manifold_dim)

	Returns:
	Sparse activations with exactly k active neurons per sample
	"""
	if not self.enable_lateral_inhibition:
	return activations

	# Find top-k indices for each sample in batch
	_, top_k_indices = torch.topk(activations, self.k, dim=-1)

	# Vectorized: set top-k positions to 1 (no Python loop)
	sparse_activations = torch.zeros_like(activations)
	sparse_activations.scatter_(-1, top_k_indices, 1.0)

	return sparse_activations

	def forward(
	self,
	u: torch.Tensor,
	target: Optional[torch.Tensor] = None
	) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
	"""
	Forward pass through neuroscience enhancement layer.

	Args:
	u: Input state from Module C propagator (batch, manifold_dim)
	target: Optional target for predictive coding error computation

	Returns:
	Tuple of:
	- Enhanced state after neuroscience mechanisms
	- Prediction error (if target provided and predictive coding enabled)
	"""
	enhanced_u = u
	prediction_error = None

	# Apply lateral inhibition (if enabled and awake)
	if self.enable_lateral_inhibition and not self.is_sleeping():
	enhanced_u = self.apply_lateral_inhibition(enhanced_u)

	# Compute predictive coding error (if enabled and target provided)
	if self.enable_predictive_coding and target is not None:
	prediction_error, _ = self.compute_predictive_coding_error(
	prediction=enhanced_u,
	target=target,
	return_magnitude=False
	)

	# Note: STDP updates and sleep consolidation are handled externally
	# via callbacks and training loop orchestration (to be implemented)

	return enhanced_u, prediction_error