1990two
/

mobius_markov

+###########################################################################################################################################
+#||||- - - |6.25.2025| - - -                              ||   MÖBIUS MARKOV   ||                               - - - |1990two| - - -|||| #
+###########################################################################################################################################
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+import math
+import matplotlib.pyplot as plt
+from typing import List, Dict, Tuple, Optional
+SAFE_MIN = -1e6
+SAFE_MAX = 1e6
+EPS = 1e-8
+#||||- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 𓅸 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -||||#
+def make_safe(tensor, min_val=SAFE_MIN, max_val=SAFE_MAX):
+    zero = torch.tensor(0.0, device=tensor.device, dtype=tensor.dtype)
+    maxv = torch.tensor(max_val, device=tensor.device, dtype=tensor.dtype)
+    tensor = torch.where(torch.isnan(tensor), zero, tensor)
+    tensor = torch.where(torch.isinf(tensor), maxv, tensor)
+    return torch.clamp(tensor, min_val, max_val)
+def safe_complex_division(numerator, denominator, eps=EPS):
+    denominator_conj = torch.conj(denominator)
+    norm_sq = torch.real(denominator * denominator_conj)
+    norm_sq = torch.clamp(norm_sq, min=eps)
+    return (numerator * denominator_conj) / norm_sq
+###########################################################################################################################################
+####################################################- - -   MÖBIUS TRANSFORM   - - -#######################################################
+class MobiusTransform(nn.Module):
+    def __init__(self, learnable=True, init_identity=True):
+        super().__init__()
+        self.learnable = learnable
+        if init_identity:
+            a_init, b_init, c_init, d_init = 1.0, 0.0, 0.0, 1.0
+        else:
+            a_init, d_init = 1.0, 1.0
+            b_init, c_init = 0.1, 0.1
+        if learnable:
+            self.a = nn.Parameter(torch.tensor([a_init, 0.0]))
+            self.b = nn.Parameter(torch.tensor([b_init, 0.0]))
+            self.c = nn.Parameter(torch.tensor([c_init, 0.0]))
+            self.d = nn.Parameter(torch.tensor([d_init, 0.0]))
+        else:
+            self.register_buffer('a', torch.tensor([a_init, 0.0]))
+            self.register_buffer('b', torch.tensor([b_init, 0.0]))
+            self.register_buffer('c', torch.tensor([c_init, 0.0]))
+            self.register_buffer('d', torch.tensor([d_init, 0.0]))
+    def to_complex(self, param):
+        return torch.complex(param[0], param[1])
+    def get_determinant(self):
+        a_complex = self.to_complex(self.a)
+        b_complex = self.to_complex(self.b)
+        c_complex = self.to_complex(self.c)
+        d_complex = self.to_complex(self.d)
+        det = a_complex * d_complex - b_complex * c_complex
+        return det
+    def normalize_parameters(self):
+        if self.learnable:
+            with torch.no_grad():
+                det = torch.abs(self.get_determinant())
+                if det < EPS:
+                    one = torch.tensor([1.0, 0.0], device=self.a.device, dtype=self.a.dtype)
+                    self.a.copy_(one)
+                    self.d.copy_(one)
+                    self.b.mul_(0.1)
+                    self.c.mul_(0.1)
+                for p in (self.a, self.b, self.c, self.d):
+                    p.clamp_(-10.0, 10.0)
+    def transform(self, z):
+        self.normalize_parameters()
+        a_complex = self.to_complex(self.a)
+        b_complex = self.to_complex(self.b)
+        c_complex = self.to_complex(self.c)
+        d_complex = self.to_complex(self.d)
+        numerator = a_complex * z + b_complex
+        denominator = c_complex * z + d_complex
+        transformed = safe_complex_division(numerator, denominator)
+        return transformed
+    def inverse_transform(self, w):
+        self.normalize_parameters()
+        a_complex = self.to_complex(self.a)
+        b_complex = self.to_complex(self.b)
+        c_complex = self.to_complex(self.c)
+        d_complex = self.to_complex(self.d)
+        numerator = d_complex * w - b_complex
+        denominator = -c_complex * w + a_complex
+        return safe_complex_division(numerator, denominator)
+    def get_transform_info(self):
+        det = self.get_determinant()
+        one = torch.tensor(1.0, device=det.device, dtype=det.real.dtype)
+        return {
+            'determinant': det,
+            'is_identity': torch.allclose(torch.abs(det), one, atol=1e-6),
+            'parameters': {'a': self.to_complex(self.a), 'b': self.to_complex(self.b),
+                          'c': self.to_complex(self.c), 'd': self.to_complex(self.d)}
+        }
+###########################################################################################################################################
+#############################################- - -   COMPLEX STATE MARKOV CHAIN   - - -####################################################
+class ComplexStateMarkovChain(nn.Module):
+    def __init__(self, num_states, state_embedding_dim=64, distance_kernel='gaussian'):
+        super().__init__()
+        self.num_states = num_states
+        self.state_embedding_dim = state_embedding_dim
+        self.distance_kernel = distance_kernel
+        self.state_positions = nn.Parameter(
+            torch.complex(
+                torch.randn(num_states) * 2.0,
+                torch.randn(num_states) * 2.0
+            )
+        )
+        self.state_embeddings = nn.Parameter(torch.randn(num_states, state_embedding_dim) * 0.1)
+        self.base_transition_logits = nn.Parameter(torch.randn(num_states, num_states) * 0.1)
+        self.distance_scale = nn.Parameter(torch.tensor(1.0))
+        self.distance_bias = nn.Parameter(torch.tensor(0.0))
+        if distance_kernel == 'gaussian':
+            self.kernel_width = nn.Parameter(torch.tensor(1.0))
+        elif distance_kernel == 'inverse':
+            self.kernel_power = nn.Parameter(torch.tensor(1.0))
+    def compute_transformed_distances(self, mobius_transform):
+        transformed_positions = mobius_transform.transform(self.state_positions)
+        pos_i = transformed_positions.unsqueeze(0)  # [1, num_states]
+        pos_j = transformed_positions.unsqueeze(1)  # [num_states, 1]
+        complex_diff = pos_i - pos_j
+        distances = torch.abs(complex_diff)
+        return distances, transformed_positions
+    def distance_to_probability(self, distances):
+        distances = torch.clamp(distances, min=EPS)
+        if self.distance_kernel == 'gaussian':
+            width = torch.clamp(self.kernel_width, min=0.1, max=10.0)
+            prob_contrib = torch.exp(-distances**2 / (2 * width**2))
+        elif self.distance_kernel == 'inverse':
+            power = torch.clamp(self.kernel_power, min=0.5, max=3.0)
+            prob_contrib = 1.0 / (distances**power + EPS)
+        else:
+            prob_contrib = torch.clamp(1.0 - distances, min=0.0)
+        return prob_contrib
+    def compute_transition_matrix(self, mobius_transform):
+        distances, transformed_positions = self.compute_transformed_distances(mobius_transform)
+        distance_contrib = self.distance_to_probability(distances)
+        scale = torch.clamp(self.distance_scale, min=0.1, max=10.0)
+        bias = torch.clamp(self.distance_bias, min=-5.0, max=5.0)
+        scaled_distance = scale * distance_contrib + bias
+        transition_logits = self.base_transition_logits + scaled_distance
+        transition_logits = transition_logits + torch.eye(self.num_states, device=transition_logits.device)*0.05
+        transition_matrix = F.softmax(transition_logits, dim=1)
+        return transition_matrix, transformed_positions
+    def forward(self, initial_state, num_steps, mobius_transform):
+        batch_size = initial_state.shape[0] if initial_state.dim() > 1 else 1
+        if initial_state.dim() == 1:
+            current_state = initial_state.unsqueeze(0)
+        else:
+            current_state = initial_state
+        transition_matrix, transformed_positions = self.compute_transition_matrix(mobius_transform)
+        trajectory = [current_state.clone()]
+        state_positions = [transformed_positions[current_state.argmax(dim=-1)]]
+        for step in range(num_steps):
+            current_state = torch.matmul(current_state, transition_matrix)
+            trajectory.append(current_state.clone())
+            most_likely_states = current_state.argmax(dim=-1)
+            state_positions.append(transformed_positions[most_likely_states])
+        return {
+            'trajectory': torch.stack(trajectory),
+            'final_state': current_state,
+            'state_positions': torch.stack(state_positions),
+            'transition_matrix': transition_matrix,
+            'transformed_positions': transformed_positions
+        }
+###########################################################################################################################################
+#############################################- - -   MÖBIUS MARKOV SYSTEM   - - -##########################################################
+class MobiusMarkovSystem(nn.Module):
+    def __init__(self, num_states, state_embedding_dim=64, evolution_steps=10):
+        super().__init__()
+        self.num_states = num_states
+        self.evolution_steps = evolution_steps
+        self.mobius_transform = MobiusTransform(learnable=True, init_identity=True)
+        self.markov_chain = ComplexStateMarkovChain(num_states, state_embedding_dim)
+        self.mobius_evolution = nn.Sequential(
+            nn.Linear(state_embedding_dim, state_embedding_dim),
+            nn.Tanh(),
+            nn.Linear(state_embedding_dim, 8),  # 4 complex parameters = 8 real values
+        )
+        self.state_encoder = nn.Sequential(
+            nn.Linear(num_states, state_embedding_dim),
+            nn.LayerNorm(state_embedding_dim),
+            nn.ReLU(),
+            nn.Linear(state_embedding_dim, state_embedding_dim)
+        )
+        self.state_decoder = nn.Sequential(
+            nn.Linear(state_embedding_dim, state_embedding_dim),
+            nn.ReLU(),
+            nn.Linear(state_embedding_dim, num_states),
+            nn.Softmax(dim=-1)
+        )
+        self.geometry_controller = nn.Parameter(torch.tensor(0.1))
+    def evolve_mobius_parameters(self, state_embedding):
+        evolution_signal = self.mobius_evolution(state_embedding)
+        evolution_rate = torch.clamp(self.geometry_controller, 0.01, 1.0)
+        if self.mobius_transform.learnable:
+            with torch.no_grad():
+                updates = (evolution_signal.view(4, 2) * evolution_rate * 0.01)\
+                            .to(device=self.mobius_transform.a.device, dtype=self.mobius_transform.a.dtype)
+                self.mobius_transform.a.add_(updates[0])
+                self.mobius_transform.b.add_(updates[1])
+                self.mobius_transform.c.add_(updates[2])
+                self.mobius_transform.d.add_(updates[3])
+                self.mobius_transform.normalize_parameters()
+    def forward(self, initial_state, return_full_trajectory=False):
+        state_embedding = self.state_encoder(initial_state)
+        evolution_history = {
+            'states': [],
+            'geometries': [],
+            'transition_matrices': [],
+            'transformed_positions': []
+        }
+        current_state = initial_state
+        for step in range(self.evolution_steps):
+            state_embedding = self.state_encoder(current_state)
+            self.evolve_mobius_parameters(state_embedding.mean(dim=0))
+            markov_output = self.markov_chain.forward(
+                current_state,
+                num_steps=1,
+                mobius_transform=self.mobius_transform
+            )
+            current_state = markov_output['final_state']
+            if return_full_trajectory:
+                evolution_history['states'].append(current_state.clone())
+                evolution_history['geometries'].append(self.mobius_transform.get_transform_info())
+                evolution_history['transition_matrices'].append(markov_output['transition_matrix'])
+                evolution_history['transformed_positions'].append(markov_output['transformed_positions'])
+        final_embedding = self.state_encoder(current_state)
+        final_prediction = self.state_decoder(final_embedding)
+        output = {
+            'final_state': current_state,
+            'final_prediction': final_prediction,
+            'final_embedding': final_embedding,
+            'final_geometry': self.mobius_transform.get_transform_info()
+        }
+        if return_full_trajectory:
+            output['evolution_history'] = evolution_history
+        return output
+    def predict_sequence(self, initial_state, sequence_length):
+        predictions = []
+        current_state = initial_state
+        for _ in range(sequence_length):
+            output = self.forward(current_state)
+            predictions.append(output['final_prediction'])
+            current_state = output['final_state']
+        return torch.stack(predictions)
+    def get_system_info(self):
+        return {
+            'num_states': self.num_states,
+            'evolution_steps': self.evolution_steps,
+            'current_geometry': self.mobius_transform.get_transform_info(),
+            'state_positions': self.markov_chain.state_positions,
+            'geometry_evolution_rate': self.geometry_controller.item()
+        }

mobius_markov_docs.py ADDED Viewed

	@@ -0,0 +1,936 @@

+###########################################################################################################################################
+#||||- - - |6.25.2025| - - -                                ||   MÖBIUS MARKOV   ||                             - - - |1990two| - - -|||| #
+###########################################################################################################################################
+"""
+Mathematical Foundation & Conceptual Documentation
+-------------------------------------------------
+CORE PRINCIPLE:
+Combines Möbius transformations (complex analysis) with Markov chains to create
+probabilistic systems evolving in dynamically warped non-Euclidean state spaces.
+The geometry of the state space continuously adapts based on the system's evolution,
+enabling rich, non-linear dynamics impossible in traditional Euclidean spaces.
+MATHEMATICAL FOUNDATION:
+=======================
+1. MÖBIUS TRANSFORMATIONS:
+   f(z) = (az + b)/(cz + d)
+   Where:
+   - z ∈ ℂ: complex state variable
+   - a,b,c,d ∈ ℂ: complex parameters with ad - bc ≠ 0
+   - f: ℂ ∪ {∞} → ℂ ∪ {∞} (extended complex plane)
+   Properties:
+   - Conformal mapping (preserves angles)
+   - Maps circles/lines to circles/lines
+   - Group structure under composition
+   - Inverse: f⁻¹(w) = (dw - b)/(-cw + a)
+2. COMPLEX STATE SPACE:
+   State positions: z₁, z₂, ..., zₙ ∈ ℂ
+   Transformed positions: w_i = f(z_i) = (az_i + b)/(cz_i + d)
+   Distance in transformed space: d(w_i, w_j) = |w_i - w_j|
+3. MARKOV TRANSITION PROBABILITIES:
+   P(i → j) = softmax(β · K(d(w_i, w_j)) + θ_ij)
+   Where:
+   - K(d): distance kernel (Gaussian, inverse, linear)
+   - β: distance scaling parameter
+   - θ_ij: base transition logits
+   - Transformed distances create non-Euclidean transition structure
+4. ADAPTIVE GEOMETRY EVOLUTION:
+   ∂(a,b,c,d)/∂t = η · G(x_t, E_t)
+   Where:
+   - G: geometry evolution function
+   - x_t: current state distribution
+   - E_t: embedded state features
+   - η: geometry learning rate
+   The Möbius parameters evolve based on system state.
+5. KERNEL FUNCTIONS:
+   Gaussian: K(d) = exp(-d²/(2σ²))
+   Inverse: K(d) = 1/(d^α + ε)
+   Linear: K(d) = max(0, 1 - d)
+   Different kernels create different transition locality structures.
+CONCEPTUAL REASONING:
+====================
+WHY MÖBIUS + MARKOV?
+- Standard Markov chains assume fixed, Euclidean state spaces
+- Real systems often have curved, adaptive state geometries
+- Möbius transformations provide rich geometric transformations
+- Complex analysis offers elegant mathematical framework
+- Dynamic geometry enables meta-learning of state representations
+KEY INNOVATIONS:
+1. **Dynamic Non-Euclidean Geometry**: State space warps over time
+2. **Complex State Representations**: Rich 2D embedding in complex plane
+3. **Conformal Invariance**: Angle-preserving transformations maintain local structure
+4. **Learnable Geometry**: Möbius parameters adapt to data
+5. **Multi-Scale Dynamics**: Both local transitions and global geometry evolve
+APPLICATIONS:
+- Dynamical systems with changing phase spaces
+- Neural representations learning geometric structure
+- Sequential data with non-stationary transition patterns
+- Robotics in environments with changing topology
+- Financial modeling with regime changes
+COMPLEXITY ANALYSIS:
+- Möbius Transform: O(n) for n states
+- Distance Computation: O(n²) for all pairs
+- Markov Step: O(n²) for transition matrix
+- Geometry Evolution: O(1) for parameter updates
+- Memory: O(n²) for transition probabilities
+BIOLOGICAL INSPIRATION:
+- Neural manifold learning in cortical representations
+- Synaptic plasticity reshaping connectivity patterns
+- Developmental changes in brain network topology
+- Spatial navigation with changing environmental maps
+- Memory consolidation through representational geometry changes
+"""
+from __future__ import annotations
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+import math
+import matplotlib.pyplot as plt
+from typing import List, Dict, Tuple, Optional
+SAFE_MIN = -1e6
+SAFE_MAX = 1e6
+EPS = 1e-8
+#||||- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 𝔦 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -||||#
+def make_safe(tensor, min_val=SAFE_MIN, max_val=SAFE_MAX):
+    zero = torch.tensor(0.0, device=tensor.device, dtype=tensor.dtype)
+    maxv = torch.tensor(max_val, device=tensor.device, dtype=tensor.dtype)
+    tensor = torch.where(torch.isnan(tensor), zero, tensor)
+    tensor = torch.where(torch.isinf(tensor), maxv, tensor)
+    return torch.clamp(tensor, min_val, max_val)
+def safe_complex_division(numerator, denominator, eps=EPS):
+    """Safe complex division with numerical stability.
+    Performs complex division z₁/z₂ = (z₁ * conj(z₂))/(|z₂|²) with
+    proper handling of near-zero denominators to prevent numerical instability.
+    Mathematical Details:
+    - z₁/z₂ = (z₁ * z̄₂)/(z₂ * z̄₂) = (z₁ * z̄₂)/|z₂|²
+    - |z₂|² = Re(z₂ * z̄₂) clamped to minimum value
+    Args:
+        numerator: Complex numerator tensor
+        denominator: Complex denominator tensor
+        eps: Minimum denominator magnitude
+    Returns:
+        Result of safe complex division
+    """
+    denominator_conj = torch.conj(denominator)
+    norm_sq = torch.real(denominator * denominator_conj)
+    norm_sq = torch.clamp(norm_sq, min=eps)
+    return (numerator * denominator_conj) / norm_sq
+###########################################################################################################################################
+####################################################- - -   MÖBIUS TRANSFORM   - - -#######################################################
+class MobiusTransform(nn.Module):
+    """Learnable Möbius transformation for complex plane warping.
+    Implements a Möbius transformation f(z) = (az + b)/(cz + d) with
+    learnable complex parameters. Provides both forward and inverse
+    transformations with automatic parameter normalization for stability.
+    Mathematical Properties:
+    - Conformal mapping (preserves angles locally)
+    - Maps circles and lines to circles and lines
+    - Determinant constraint: ad - bc ≠ 0 for invertibility
+    - Group structure: composition of Möbius transforms is Möbius
+    The transformation can be initialized as identity or with random parameters.
+    Parameters are stored as [real, imaginary] pairs and converted to complex
+    tensors during computation.
+    """
+    def __init__(self, learnable=True, init_identity=True):
+        super().__init__()
+        self.learnable = learnable
+        if init_identity:
+            # Initialize as identity transform: f(z) = z (a=1, b=0, c=0, d=1)
+            a_init, b_init, c_init, d_init = 1.0, 0.0, 0.0, 1.0
+        else:
+            # Random initialization (ensuring ad - bc ≠ 0)
+            a_init, d_init = 1.0, 1.0
+            b_init, c_init = 0.1, 0.1
+        if learnable:
+            # Möbius parameters: f(z) = (az + b)/(cz + d)
+            # Stored as [real, imaginary] pairs for each complex parameter
+            self.a = nn.Parameter(torch.tensor([a_init, 0.0]))
+            self.b = nn.Parameter(torch.tensor([b_init, 0.0]))
+            self.c = nn.Parameter(torch.tensor([c_init, 0.0]))
+            self.d = nn.Parameter(torch.tensor([d_init, 0.0]))
+        else:
+            # Fixed parameters (non-learnable)
+            self.register_buffer('a', torch.tensor([a_init, 0.0]))
+            self.register_buffer('b', torch.tensor([b_init, 0.0]))
+            self.register_buffer('c', torch.tensor([c_init, 0.0]))
+            self.register_buffer('d', torch.tensor([d_init, 0.0]))
+    def to_complex(self, param):
+        """Convert [real, imag] tensor to complex tensor.
+        Args:
+            param: Tensor [real_part, imaginary_part]
+        Returns:
+            Complex tensor with real and imaginary components
+        """
+        return torch.complex(param[0], param[1])
+    def get_determinant(self):
+        """Compute transformation determinant ad - bc.
+        The determinant must be non-zero for the transformation to be invertible.
+        This is automatically enforced by parameter normalization.
+        Mathematical Details:
+        - det(M) = ad - bc where M = [[a,b],[c,d]]
+        - det ≠ 0 ensures bijective mapping
+        - |det| measures area scaling factor
+        Returns:
+            Complex determinant value
+        """
+        a_complex = self.to_complex(self.a)
+        b_complex = self.to_complex(self.b)
+        c_complex = self.to_complex(self.c)
+        d_complex = self.to_complex(self.d)
+        det = a_complex * d_complex - b_complex * c_complex
+        return det
+    def normalize_parameters(self):
+        """Ensure parameter validity and numerical stability.
+        Enforces constraints:
+        1. Determinant magnitude > ε (invertibility)
+        2. Parameter magnitudes bounded (numerical stability)
+        3. Reset to identity if determinant too small
+        Called automatically before each transformation to maintain stability.
+        """
+        if self.learnable:
+            with torch.no_grad():
+                det = torch.abs(self.get_determinant())
+                if det < EPS:
+                    # Reset to identity if determinant too small
+                    one = torch.tensor([1.0, 0.0], device=self.a.device, dtype=self.a.dtype)
+                    self.a.copy_(one)
+                    self.d.copy_(one)
+                    self.b.mul_(0.1)
+                    self.c.mul_(0.1)
+                # Clamp parameter magnitudes for numerical stability
+                for p in (self.a, self.b, self.c, self.d):
+                    p.clamp_(-10.0, 10.0)
+    def transform(self, z):
+        """Apply Möbius transform: f(z) = (az + b)/(cz + d).
+        Performs the forward Möbius transformation on complex input.
+        Automatically normalizes parameters before computation for stability.
+        Mathematical Details:
+        - Numerator: az + b (complex multiplication and addition)
+        - Denominator: cz + d (complex multiplication and addition)
+        - Division: safe complex division with stability checks
+        Args:
+            z: Complex input tensor [..., any_shape]
+        Returns:
+            Transformed complex tensor of same shape
+        """
+        self.normalize_parameters()
+        a_complex = self.to_complex(self.a)
+        b_complex = self.to_complex(self.b)
+        c_complex = self.to_complex(self.c)
+        d_complex = self.to_complex(self.d)
+        # Numerator: az + b
+        numerator = a_complex * z + b_complex
+        # Denominator: cz + d
+        denominator = c_complex * z + d_complex
+        # Safe complex division
+        transformed = safe_complex_division(numerator, denominator)
+        return transformed
+    def inverse_transform(self, w):
+        """Apply inverse Möbius transform: f⁻¹(w) = (dw - b)/(-cw + a).
+        Computes the inverse transformation to map from transformed space
+        back to original space. Uses the standard Möbius inverse formula.
+        Mathematical Details:
+        - For f(z) = (az + b)/(cz + d)
+        - f⁻¹(w) = (dw - b)/(-cw + a)
+        - Inverse determinant: det(f⁻¹) = 1/det(f)
+        Args:
+            w: Complex input in transformed space
+        Returns:
+            Original complex values before transformation
+        """
+        self.normalize_parameters()
+        a_complex = self.to_complex(self.a)
+        b_complex = self.to_complex(self.b)
+        c_complex = self.to_complex(self.c)
+        d_complex = self.to_complex(self.d)
+        # Inverse: f⁻¹(w) = (dw - b)/(-cw + a)
+        numerator = d_complex * w - b_complex
+        denominator = -c_complex * w + a_complex
+        return safe_complex_division(numerator, denominator)
+    def get_transform_info(self):
+        """Get comprehensive information about current transformation.
+        Returns diagnostic information including determinant, identity check,
+        and current parameter values for analysis and debugging.
+        Returns:
+            Dictionary with transformation properties
+        """
+        det = self.get_determinant()
+        one = torch.tensor(1.0, device=det.device, dtype=det.real.dtype)
+        return {
+            'determinant': det,
+            'is_identity': torch.allclose(torch.abs(det), one, atol=1e-6),
+            'parameters': {
+                'a': self.to_complex(self.a),
+                'b': self.to_complex(self.b),
+                'c': self.to_complex(self.c),
+                'd': self.to_complex(self.d)
+            }
+        }
+###########################################################################################################################################
+#############################################- - -   COMPLEX STATE MARKOV CHAIN   - - -####################################################
+class ComplexStateMarkovChain(nn.Module):
+    """Markov chain with complex state positions and distance-based transitions.
+    Implements a Markov chain where states are positioned in the complex plane
+    and transition probabilities depend on distances in the (potentially transformed)
+    complex space. Supports multiple distance kernels and learnable parameters.
+    Mathematical Framework:
+    - State positions: z₁, z₂, ..., zₙ ∈ ℂ (learnable)
+    - Transformed positions: w_i = f(z_i) via Möbius transform
+    - Distance-based transitions: P(i→j) ∝ K(|w_i - w_j|)
+    - Multiple kernel options: Gaussian, inverse power, linear
+    The chain combines base transition logits with distance-based terms,
+    allowing both learned and geometric transition structure.
+    """
+    def __init__(self, num_states, state_embedding_dim=64, distance_kernel='gaussian'):
+        super().__init__()
+        self.num_states = num_states
+        self.state_embedding_dim = state_embedding_dim
+        self.distance_kernel = distance_kernel
+        # Complex state positions (learnable parameters)
+        # Each state has a position in the complex plane
+        self.state_positions = nn.Parameter(
+            torch.complex(
+                torch.randn(num_states) * 2.0,  # Real parts
+                torch.randn(num_states) * 2.0   # Imaginary parts
+            )
+        )
+        # State feature embeddings for additional representation
+        self.state_embeddings = nn.Parameter(torch.randn(num_states, state_embedding_dim) * 0.1)
+        # Learnable transition parameters
+        self.base_transition_logits = nn.Parameter(torch.randn(num_states, num_states) * 0.1)
+        self.distance_scale = nn.Parameter(torch.tensor(1.0))
+        self.distance_bias = nn.Parameter(torch.tensor(0.0))
+        # Kernel-specific parameters
+        if distance_kernel == 'gaussian':
+            self.kernel_width = nn.Parameter(torch.tensor(1.0))
+        elif distance_kernel == 'inverse':
+            self.kernel_power = nn.Parameter(torch.tensor(1.0))
+    def compute_transformed_distances(self, mobius_transform):
+        """Compute pairwise distances between states in transformed space.
+        Applies the Möbius transformation to all state positions and computes
+        the Euclidean distances in the transformed complex plane.
+        Mathematical Details:
+        - Transform: w_i = f(z_i) for each state position z_i
+        - Distance: d(i,j) = |w_i - w_j| = |f(z_i) - f(z_j)|
+        - Results in [num_states, num_states] distance matrix
+        Args:
+            mobius_transform: MobiusTransform instance
+        Returns:
+            Tuple of (distance_matrix, transformed_positions)
+        """
+        # Transform all state positions through Möbius map
+        transformed_positions = mobius_transform.transform(self.state_positions)
+        # Compute pairwise distances in transformed space
+        pos_i = transformed_positions.unsqueeze(0)  # [1, num_states]
+        pos_j = transformed_positions.unsqueeze(1)  # [num_states, 1]
+        # Complex distance: |w_i - w_j|
+        complex_diff = pos_i - pos_j
+        distances = torch.abs(complex_diff)
+        return distances, transformed_positions
+    def distance_to_probability(self, distances):
+        """Convert distances to probability contributions via kernel function.
+        Applies the selected kernel function to transform geometric distances
+        into probability weights. Different kernels create different locality
+        structures in the transition probabilities.
+        Kernel Options:
+        - Gaussian: K(d) = exp(-d²/(2σ²)) - smooth, localized
+        - Inverse: K(d) = 1/(d^α + ε) - power-law decay
+        - Linear: K(d) = max(0, 1-d) - linear decay with cutoff
+        Args:
+            distances: Distance matrix [num_states, num_states]
+        Returns:
+            Probability contribution matrix [num_states, num_states]
+        """
+        distances = torch.clamp(distances, min=EPS)
+        if self.distance_kernel == 'gaussian':
+            width = torch.clamp(self.kernel_width, min=0.1, max=10.0)
+            prob_contrib = torch.exp(-distances**2 / (2 * width**2))
+        elif self.distance_kernel == 'inverse':
+            power = torch.clamp(self.kernel_power, min=0.5, max=3.0)
+            prob_contrib = 1.0 / (distances**power + EPS)
+        else:
+            # Linear kernel with cutoff
+            prob_contrib = torch.clamp(1.0 - distances, min=0.0)
+        return prob_contrib
+    def compute_transition_matrix(self, mobius_transform):
+        """Compute full transition matrix combining geometry and learned weights.
+        Creates the Markov transition matrix by combining:
+        1. Base transition logits (learned affinities)
+        2. Distance-based contributions (geometric structure)
+        3. Scaling and bias parameters
+        4. Softmax normalization for valid probabilities
+        Mathematical Details:
+        - Base logits: θᵢⱼ (learned transition preferences)
+        - Distance terms: β·K(d(wᵢ,wⱼ)) + γ (scaled kernel)
+        - Combined: logits = θᵢⱼ + β·K(d(wᵢ,wⱼ)) + γ
+        - Probabilities: P(i→j) = softmax_j(logits_i)
+        Args:
+            mobius_transform: MobiusTransform for state space warping
+        Returns:
+            Tuple of (transition_matrix, transformed_positions)
+        """
+        # Get transformed distances
+        distances, transformed_positions = self.compute_transformed_distances(mobius_transform)
+        # Convert distances to probability contributions
+        distance_contrib = self.distance_to_probability(distances)
+        # Scale and bias distance contributions
+        scale = torch.clamp(self.distance_scale, min=0.1, max=10.0)
+        bias = torch.clamp(self.distance_bias, min=-5.0, max=5.0)
+        scaled_distance = scale * distance_contrib + bias
+        # Combine with base transition logits
+        transition_logits = self.base_transition_logits + scaled_distance
+        # Add small diagonal bias for numerical stability
+        transition_logits = transition_logits + torch.eye(self.num_states, device=transition_logits.device) * 0.05
+        # Convert to valid probability matrix (row-stochastic)
+        transition_matrix = F.softmax(transition_logits, dim=1)
+        return transition_matrix, transformed_positions
+    def forward(self, initial_state, num_steps, mobius_transform):
+        """Execute Markov chain evolution in transformed space.
+        Runs the Markov chain for the specified number of steps using
+        transition probabilities computed in the Möbius-transformed space.
+        Records complete trajectory for analysis.
+        Mathematical Process:
+        1. Compute transition matrix P in transformed space
+        2. Evolve state: s_{t+1} = s_t · P
+        3. Track most likely state positions over time
+        4. Return complete trajectory and final state
+        Args:
+            initial_state: Initial state distribution [batch_size, num_states]
+            num_steps: Number of Markov steps to execute
+            mobius_transform: MobiusTransform for space warping
+        Returns:
+            Dictionary containing trajectory, final state, and diagnostics
+        """
+        batch_size = initial_state.shape[0] if initial_state.dim() > 1 else 1
+        if initial_state.dim() == 1:
+            current_state = initial_state.unsqueeze(0)
+        else:
+            current_state = initial_state
+        # Get transition matrix in transformed space
+        transition_matrix, transformed_positions = self.compute_transition_matrix(mobius_transform)
+        # Store complete trajectory
+        trajectory = [current_state.clone()]
+        state_positions = [transformed_positions[current_state.argmax(dim=-1)]]
+        # Execute Markov chain evolution
+        for step in range(num_steps):
+            # Matrix multiplication for batch processing
+            current_state = torch.matmul(current_state, transition_matrix)
+            trajectory.append(current_state.clone())
+            # Track most likely state positions in transformed space
+            most_likely_states = current_state.argmax(dim=-1)
+            state_positions.append(transformed_positions[most_likely_states])
+        return {
+            'trajectory': torch.stack(trajectory),
+            'final_state': current_state,
+            'state_positions': torch.stack(state_positions),
+            'transition_matrix': transition_matrix,
+            'transformed_positions': transformed_positions
+        }
+###########################################################################################################################################
+############################################- - -   MÖBIUS MARKOV SYSTEM   - - -###########################################################
+class MobiusMarkovSystem(nn.Module):
+    """Complete system integrating Möbius transformations with Markov dynamics.
+    Implements the full Möbius-Markov architecture where:
+    1. Complex state positions define Markov chain geometry
+    2. Möbius transformations dynamically warp the state space
+    3. Transformation parameters evolve based on system state
+    4. State transitions depend on transformed distances
+    This creates a meta-learning system where the geometry of state space
+    adapts based on the system's evolutionary trajectory, enabling
+    discovery of optimal representational geometries for different tasks.
+    Architecture Components:
+    - MobiusTransform: Learnable complex plane warping
+    - ComplexStateMarkovChain: Distance-based probabilistic transitions
+    - Evolution dynamics: State-dependent geometry adaptation
+    - Encoder/decoder: Interface with external representations
+    """
+    def __init__(self, num_states, state_embedding_dim=64, evolution_steps=10):
+        super().__init__()
+        self.num_states = num_states
+        self.evolution_steps = evolution_steps
+        # Core components
+        self.mobius_transform = MobiusTransform(learnable=True, init_identity=True)
+        self.markov_chain = ComplexStateMarkovChain(num_states, state_embedding_dim)
+        # Evolution dynamics for Möbius parameters
+        # Maps state embeddings to parameter updates
+        self.mobius_evolution = nn.Sequential(
+            nn.Linear(state_embedding_dim, state_embedding_dim),
+            nn.Tanh(),
+            nn.Linear(state_embedding_dim, 8),  # 4 complex parameters = 8 real values
+        )
+        # State encoder/decoder for external interface
+        self.state_encoder = nn.Sequential(
+            nn.Linear(num_states, state_embedding_dim),
+            nn.LayerNorm(state_embedding_dim),
+            nn.ReLU(),
+            nn.Linear(state_embedding_dim, state_embedding_dim)
+        )
+        self.state_decoder = nn.Sequential(
+            nn.Linear(state_embedding_dim, state_embedding_dim),
+            nn.ReLU(),
+            nn.Linear(state_embedding_dim, num_states),
+            nn.Softmax(dim=-1)
+        )
+        # Geometric evolution controller
+        self.geometry_controller = nn.Parameter(torch.tensor(0.1))
+    def evolve_mobius_parameters(self, state_embedding):
+        """Evolve Möbius transformation parameters based on current state.
+        Updates the complex parameters (a,b,c,d) of the Möbius transformation
+        based on the current state embedding. This enables the geometry to
+        adapt dynamically as the system evolves.
+        Mathematical Details:
+        - Parameter updates: Δp = η·G(embedding)
+        - G: neural network mapping embeddings to parameter changes
+        - η: learnable evolution rate
+        - Updates applied directly to parameter tensors
+        The evolution is designed to be conservative (small steps) to
+        maintain stability while allowing adaptive geometry.
+        Args:
+            state_embedding: Current state representation [embedding_dim]
+        """
+        # Generate parameter evolution signal
+        evolution_signal = self.mobius_evolution(state_embedding)
+        evolution_rate = torch.clamp(self.geometry_controller, 0.01, 1.0)
+        if self.mobius_transform.learnable:
+            with torch.no_grad():
+                # Reshape evolution signal to 4 complex parameters (8 real values)
+                updates = (evolution_signal.view(4, 2) * evolution_rate * 0.01)\
+                            .to(device=self.mobius_transform.a.device, dtype=self.mobius_transform.a.dtype)
+                # Apply updates to Möbius parameters
+                self.mobius_transform.a.add_(updates[0])
+                self.mobius_transform.b.add_(updates[1])
+                self.mobius_transform.c.add_(updates[2])
+                self.mobius_transform.d.add_(updates[3])
+                # Ensure parameters remain valid
+                self.mobius_transform.normalize_parameters()
+    def forward(self, initial_state, return_full_trajectory=False):
+        """Execute complete Möbius-Markov evolution cycle.
+        Implements the full system dynamics:
+        1. Encode initial state to embedding space
+        2. Iteratively evolve: geometry → Markov step → state update
+        3. Adapt Möbius parameters based on current state
+        4. Generate final prediction with learned decoder
+        This creates a feedback loop where the geometry of state space
+        continuously adapts based on the system's trajectory, enabling
+        meta-learning of optimal spatial representations.
+        Args:
+            initial_state: Initial state distribution [batch_size, num_states]
+            return_full_trajectory: Whether to return complete evolution history
+        Returns:
+            Dictionary containing final state, prediction, and optional trajectory
+        """
+        # Encode initial state to embedding space
+        state_embedding = self.state_encoder(initial_state)
+        # Store complete evolution history if requested
+        evolution_history = {
+            'states': [],
+            'geometries': [],
+            'transition_matrices': [],
+            'transformed_positions': []
+        }
+        current_state = initial_state
+        # Multi-step evolution loop
+        for step in range(self.evolution_steps):
+            # Encode current state
+            state_embedding = self.state_encoder(current_state)
+            # Evolve Möbius parameters based on current state
+            self.evolve_mobius_parameters(state_embedding.mean(dim=0))
+            # Execute one Markov step in current transformed space
+            markov_output = self.markov_chain.forward(
+                current_state,
+                num_steps=1,
+                mobius_transform=self.mobius_transform
+            )
+            # Update current state distribution
+            current_state = markov_output['final_state']
+            # Store evolution history
+            if return_full_trajectory:
+                evolution_history['states'].append(current_state.clone())
+                evolution_history['geometries'].append(self.mobius_transform.get_transform_info())
+                evolution_history['transition_matrices'].append(markov_output['transition_matrix'])
+                evolution_history['transformed_positions'].append(markov_output['transformed_positions'])
+        # Generate final prediction using learned decoder
+        final_embedding = self.state_encoder(current_state)
+        final_prediction = self.state_decoder(final_embedding)
+        output = {
+            'final_state': current_state,
+            'final_prediction': final_prediction,
+            'final_embedding': final_embedding,
+            'final_geometry': self.mobius_transform.get_transform_info()
+        }
+        if return_full_trajectory:
+            output['evolution_history'] = evolution_history
+        return output
+    def predict_sequence(self, initial_state, sequence_length):
+        """Generate sequence of predictions through iterative evolution.
+        Uses the system as a generative model by iteratively applying
+        the evolution process and collecting predictions at each step.
+        Args:
+            initial_state: Starting state distribution
+            sequence_length: Number of prediction steps
+        Returns:
+            Tensor of predictions [sequence_length, batch_size, num_states]
+        """
+        predictions = []
+        current_state = initial_state
+        for _ in range(sequence_length):
+            output = self.forward(current_state)
+            predictions.append(output['final_prediction'])
+            current_state = output['final_state']
+        return torch.stack(predictions)
+    def get_system_info(self):
+        """Get comprehensive system state information.
+        Returns diagnostic information about the current system state
+        including geometry parameters, state positions, and evolution settings.
+        Returns:
+            Dictionary with complete system diagnostics
+        """
+        return {
+            'num_states': self.num_states,
+            'evolution_steps': self.evolution_steps,
+            'current_geometry': self.mobius_transform.get_transform_info(),
+            'state_positions': self.markov_chain.state_positions,
+            'geometry_evolution_rate': self.geometry_controller.item()
+        }
+###########################################################################################################################################
+###################################################- - -   DEMO AND TESTING   - - -########################################################
+def test_mobius_markov():
+    """Comprehensive test of Möbius-Markov system functionality."""
+    print("Testing Möbius Markov - Non-Euclidean Probabilistic Systems")
+    print("=" * 75)
+    # Create Möbius-Markov system
+    num_states = 8
+    system = MobiusMarkovSystem(
+        num_states=num_states,
+        state_embedding_dim=32,
+        evolution_steps=5
+    )
+    print(f"Created Möbius-Markov System:")
+    print(f"  - Number of states: {num_states}")
+    print(f"  - Evolution steps: {system.evolution_steps}")
+    print(f"  - State embedding dimension: 32")
+    print(f"  - Complex state space with adaptive geometry")
+    # Create initial state distribution
+    batch_size = 4
+    initial_state = torch.zeros(batch_size, num_states)
+    initial_state[:, 0] = 1.0  # Start in state 0
+    print(f"\nTesting with batch size: {batch_size}")
+    print("Initial state: All samples start in state 0")
+    # Test Möbius transformation components
+    print("\nTesting Möbius transformation...")
+    mobius = system.mobius_transform
+    test_complex = torch.complex(torch.randn(5), torch.randn(5))
+    transformed = mobius.transform(test_complex)
+    inverse_transformed = mobius.inverse_transform(transformed)
+    # Check invertibility
+    reconstruction_error = torch.mean(torch.abs(test_complex - inverse_transformed))
+    print(f"  - Transformation invertibility error: {reconstruction_error:.6f}")
+    # Test geometry info
+    geometry_info = mobius.get_transform_info()
+    det_magnitude = torch.abs(geometry_info['determinant'])
+    print(f"  - Determinant magnitude: {det_magnitude:.4f}")
+    print(f"  - Is identity: {geometry_info['is_identity']}")
+    # Test forward evolution
+    print("\nExecuting Möbius-Markov evolution...")
+    output = system(initial_state, return_full_trajectory=True)
+    print("Evolution results:")
+    print(f"  - Final state shape: {output['final_state'].shape}")
+    print(f"  - Final prediction shape: {output['final_prediction'].shape}")
+    # Analyze geometric evolution
+    print("\nGeometric evolution analysis:")
+    history = output['evolution_history']
+    for step in range(min(3, len(history['geometries']))):
+        geometry = history['geometries'][step]
+        det = torch.abs(geometry['determinant'])
+        print(f"  Step {step+1}: Determinant magnitude = {det:.3f}")
+    # Test state transitions and trajectory
+    print("\nState transition analysis:")
+    final_states = output['final_state']
+    for i in range(min(batch_size, 3)):
+        most_likely = final_states[i].argmax().item()
+        confidence = final_states[i].max().item()
+        print(f"  Sample {i+1}: Most likely state = {most_likely}, Confidence = {confidence:.3f}")
+    # Test complex state positions
+    print("\nComplex state space analysis:")
+    state_positions = system.markov_chain.state_positions
+    print(f"  - State positions in complex plane:")
+    for i in range(min(4, num_states)):
+        pos = state_positions[i]
+        real, imag = pos.real.item(), pos.imag.item()
+        magnitude = torch.abs(pos).item()
+        print(f"    State {i}: {real:.3f} + {imag:.3f}i (|z| = {magnitude:.3f})")
+    # Test sequence prediction
+    print("\n Testing sequence prediction...")
+    sequence_length = 3
+    sequence = system.predict_sequence(initial_state[:1], sequence_length)
+    print(f"Generated sequence of length {sequence_length}:")
+    for t in range(sequence_length):
+        most_likely = sequence[t, 0].argmax().item()
+        confidence = sequence[t, 0].max().item()
+        print(f"  Time {t+1}: State {most_likely} (confidence: {confidence:.3f})")
+    # System information and diagnostics
+    info = system.get_system_info()
+    print(f"\nSystem diagnostics:")
+    print(f"  - Geometry evolution rate: {info['geometry_evolution_rate']:.4f}")
+    print(f"  - Current determinant: {torch.abs(info['current_geometry']['determinant']):.3f}")
+    # Test adaptive behavior with different inputs
+    print("\nTesting adaptive geometry...")
+    # Concentrated initial state
+    concentrated_state = torch.zeros(1, num_states)
+    concentrated_state[0, 0] = 1.0
+    conc_output = system(concentrated_state)
+    # Uniform initial state
+    uniform_state = torch.ones(1, num_states) / num_states
+    uniform_output = system(uniform_state)
+    conc_det = torch.abs(conc_output['final_geometry']['determinant'])
+    uniform_det = torch.abs(uniform_output['final_geometry']['determinant'])
+    print(f"  - Concentrated input → final determinant: {conc_det:.4f}")
+    print(f"  - Uniform input → final determinant: {uniform_det:.4f}")
+    print(f"  - Geometry adaptation difference: {abs(conc_det - uniform_det):.4f}")
+    print("\n Möbius-Markov test completed!")
+    print("✓ Non-Euclidean state space with dynamic geometry")
+    print("✓ Markov transitions in continuously warped space")
+    print("✓ Learnable Möbius transformations with invertibility")
+    print("✓ State-dependent geometric evolution")
+    print("✓ Complex plane representations and distance-based transitions")
+    print("✓ Adaptive spatial structure for different input patterns")
+    return True
+def visualization_demo():
+    """Demonstrate geometric transformations and state evolution."""
+    print("\n" + "="*60)
+    print(" GEOMETRIC TRANSFORMATION DEMO")
+    print("="*60)
+    # Create simple system for clear visualization
+    system = MobiusMarkovSystem(num_states=6, evolution_steps=3)
+    # Get initial state positions in complex plane
+    initial_positions = system.markov_chain.state_positions.detach()
+    print("Initial state positions (complex plane):")
+    for i, pos in enumerate(initial_positions):
+        real, imag = pos.real.item(), pos.imag.item()
+        magnitude = torch.abs(pos).item()
+        angle = torch.angle(pos).item() * 180 / math.pi
+        print(f"  State {i}: {real:.3f} + {imag:.3f}i (r={magnitude:.3f}, θ={angle:.1f}°)")
+    # Apply several geometric transformations
+    print("\nApplying geometric transformations...")
+    test_state = torch.zeros(1, 6)
+    test_state[0, 0] = 1.0
+    for step in range(3):
+        # Get current geometry
+        geometry = system.mobius_transform.get_transform_info()
+        # Transform state positions to show warping
+        transformed_pos = system.mobius_transform.transform(initial_positions)
+        print(f"\nStep {step+1}:")
+        print(f"  Transform determinant: {torch.abs(geometry['determinant']):.3f}")
+        # Show how first few states are transformed
+        for i in range(min(3, len(transformed_pos))):
+            orig = initial_positions[i]
+            trans = transformed_pos[i]
+            print(f"    State {i}: {orig.real:.2f}+{orig.imag:.2f}i → {trans.real:.2f}+{trans.imag:.2f}i")
+        # Evolve the system one step
+        output = system(test_state)
+        test_state = output['final_state']
+    print("\n Geometric evolution creates rich, non-Euclidean probabilistic dynamics!")
+    print("   State space continuously warps based on system trajectory")
+    print("   Distance-based transitions adapt to transformed geometry")
+if __name__ == "__main__":
+    test_mobius_markov()
+    visualization_demo()
+###########################################################################################################################################
+###########################################################################################################################################