Spaces:

RFTSystems
/

minimal_self_awareness

Sleeping

App Files Files Community

RFTSystems commited on Nov 25, 2025

Commit

4794088

verified ·

1 Parent(s): 65fb737

Create minimal_self_full.py

Browse files

Files changed (1) hide show

minimal_self_full.py +327 -0

minimal_self_full.py ADDED Viewed

	@@ -0,0 +1,327 @@

+import numpy as np
+import random
+from typing import List, Optional
+from collections import Counter
+import pandas as pd
+import matplotlib.pyplot as plt
+# --- Classes ---
+class SocialEntity:
+    def __init__(self, start_pos: np.ndarray, actions: List[np.ndarray], bounds: tuple = (0, 2), seed: int = 44):
+        random.seed(seed + 2)
+        np.random.seed(seed + 2)
+        self.pos = start_pos.astype(float)
+        self.actions = actions
+        self.bounds = bounds
+        self.last_action = np.array([0, 0])
+    def move(self):
+        chosen_action = random.choice(self.actions)
+        self.last_action = chosen_action.copy()
+        self.pos = np.clip(self.pos + chosen_action, self.bounds[0], self.bounds[1])
+class MovingObstacle:
+    def __init__(self, start_pos: np.ndarray, actions: List[np.ndarray], bounds: tuple = (0, 2), seed: int = 42):
+        random.seed(seed + 1)
+        np.random.seed(seed + 1)
+        self.pos = start_pos.astype(float)
+        self.actions = actions
+        self.bounds = bounds
+    def move(self):
+        chosen_action = random.choice(self.actions)
+        self.pos = np.clip(self.pos + chosen_action, self.bounds[0], self.bounds[1])
+class MinimalSelf:
+    def __init__(self, seed: int = 42, error_window: int = 5, uncertainty_factor: float = 0.2,
+                 initial_body_bit_strength: float = 1.0, body_bit_decay_rate: float = 0.01,
+                 body_bit_reinforce_factor: float = 0.1,
+                 learning_rate: float = 0.1, discount_factor: float = 0.9, epsilon: float = 0.2,
+                 reward_type: str = "original"):
+        random.seed(seed)
+        np.random.seed(seed)
+        # Embodied state
+        self.pos = np.array([1, 1]).astype(float)
+        self.body_bit_strength = initial_body_bit_strength
+        self.body_bit_decay_rate = body_bit_decay_rate
+        self.body_bit_reinforce_factor = body_bit_reinforce_factor
+        # Exploration
+        self.visited_positions = set()
+        self.previous_body_bit_strength = initial_body_bit_strength
+        # Actions
+        self.actions = [
+            np.array([0, 1]),  # N
+            np.array([1, 0]),  # E
+            np.array([0, -1]), # S
+            np.array([-1, 0]), # W
+        ]
+        self.action_map = {tuple(a.astype(int)): i for i, a in enumerate(self.actions)}
+        self.reverse_action_map = {i: a for i, a in enumerate(self.actions)}
+        self.last_action = np.array([0, 0])
+        # Error tracking
+        self.errors_history: List[float] = []
+        self.error_window = error_window
+        self.uncertainty_factor = uncertainty_factor
+        # Environment
+        self.env_bounds = (0, 2)
+        self.obstacle = None
+        self.social_entity = None
+        self.previous_social_entity_action = np.array([0, 0])
+        # Q-learning
+        self.q_table = np.zeros((self.env_bounds[1] + 1, self.env_bounds[1] + 1, len(self.actions)))
+        self.learning_rate = learning_rate
+        self.discount_factor = discount_factor
+        self.epsilon = epsilon
+        self.prev_state = None
+        self.prev_action_idx = None
+        self.reward_type = reward_type
+    def set_obstacle(self, obstacle: MovingObstacle):
+        self.obstacle = obstacle
+    def set_social_entity(self, social_entity: SocialEntity):
+        self.social_entity = social_entity
+    def sensory_input(self) -> np.ndarray:
+        self.pos = np.clip(self.pos, self.env_bounds[0], self.env_bounds[1])
+        sensation_vector = [self.pos[0], self.pos[1], self.body_bit_strength]
+        if self.obstacle:
+            sensation_vector.extend([self.obstacle.pos[0], self.obstacle.pos[1]])
+        if self.social_entity:
+            sensation_vector.extend([self.social_entity.pos[0], self.social_entity.pos[1],
+                                     self.social_entity.last_action[0], self.social_entity.last_action[1]])
+        return np.array(sensation_vector, dtype=float)
+    def counterfactual_sensory(self, action: np.ndarray) -> np.ndarray:
+        imagined_pos = self.pos + action
+        imagined_pos = np.clip(imagined_pos, self.env_bounds[0], self.env_bounds[1])
+        counterfactual_vector = [imagined_pos[0], imagined_pos[1], self.body_bit_strength]
+        if self.obstacle:
+            counterfactual_vector.extend([self.obstacle.pos[0], self.obstacle.pos[1]])
+        if self.social_entity:
+            counterfactual_vector.extend([self.social_entity.pos[0], self.social_entity.pos[1],
+                                          self.social_entity.last_action[0], self.social_entity.last_action[1]])
+        return np.array(counterfactual_vector, dtype=float)
+    def choose_action(self) -> np.ndarray:
+        current_pos_int = tuple(self.pos.astype(int))
+        if random.random() < self.epsilon:
+            chosen_action_idx = random.randrange(len(self.actions))
+        else:
+            chosen_action_idx = np.argmax(self.q_table[current_pos_int])
+        self.prev_state = current_pos_int
+        self.prev_action_idx = chosen_action_idx
+        return self.reverse_action_map[chosen_action_idx].copy()
+    def step(self) -> dict:
+        body_bit_strength_at_start = self.body_bit_strength
+        agent_chosen_action = self.choose_action()
+        predicted = self.counterfactual_sensory(agent_chosen_action)
+        self.pos += agent_chosen_action
+        if self.social_entity:
+            self.social_entity.move()
+        if self.obstacle:
+            self.obstacle.move()
+        actual = self.sensory_input()
+        # Prediction error
+        prediction_error = float(np.linalg.norm(predicted[:2] - actual[:2]))
+        self.errors_history.append(prediction_error)
+        if len(self.errors_history) > self.error_window:
+            self.errors_history.pop(0)
+        mean_abs_error = float(np.mean(self.errors_history)) if self.errors_history else 0.0
+        max_total_error = float(np.sqrt(8.0))
+        predictive_rate = 100.0 * (1.0 - (mean_abs_error / max_total_error)) if max_total_error > 0 else 100.0
+        predictive_rate = float(np.clip(predictive_rate, 0.0, 100.0))
+        simulated_internal_uncertainty = random.uniform(0.0, self.uncertainty_factor)
+        c_min = (max_total_error - mean_abs_error) * (1.0 - simulated_internal_uncertainty) if max_total_error > 0 else 0.0
+        c_min = float(c_min)
+        self.last_action = agent_chosen_action.copy()
+        reinforcement = (predictive_rate / 100.0) * self.body_bit_reinforce_factor
+        self.body_bit_strength += (reinforcement - self.body_bit_decay_rate)
+        self.body_bit_strength = np.clip(self.body_bit_strength, 0.0, 2.0)
+        # Q-learning update
+        reward = (predictive_rate / 100.0) + (self.body_bit_strength / 2.0)
+        if self.prev_state is not None and self.prev_action_idx is not None:
+            current_pos_tuple = tuple(self.pos.astype(int))
+            old_q_value = self.q_table[self.prev_state][self.prev_action_idx]
+            next_max_q = np.max(self.q_table[current_pos_tuple])
+            new_q_value = old_q_value + self.learning_rate * (reward + self.discount_factor * next_max_q - old_q_value)
+            self.q_table[self.prev_state][self.prev_action_idx] = new_q_value
+        return {
+            "sensation": actual,
+            "action": agent_chosen_action.copy(),
+            "error": prediction_error,
+            "position": self.pos.copy(),
+            "predictive_rate": predictive_rate,
+            "C_min": c_min,
+            "body_bit_strength": self.body_bit_strength,
+            "reward": reward
+        }
+# --- Helper Functions ---
+def compute_phi(history: List[dict]) -> float:
+    if not history:
+        return 0.0
+    recent = history[-20:] if len(history) >= 20 else history
+    positions = [tuple(h["sensation"][:2].astype(int)) for h in recent]
+    body_bit_strengths = [h["sensation"][2] for h in recent]
+    avg_body_bit_strength = np.mean(body_bit_strengths)
+    unique_positions = set(positions)
+    max_possible_unique_positions = min(len(recent), 9)
+    position_diversity_score = len(unique_positions) / max_possible_unique_positions if max_possible_unique_positions > 0 else 0.0
+    integrated_phi = avg_body_bit_strength * position_diversity_score
+    return float(np.clip(integrated_phi, 0.0, 2.0))
+def run_simulation(agent_instance: MinimalSelf, num_steps: int,
+                   obstacle_instance: Optional[MovingObstacle] = None,
+                   social_entity_instance: Optional[SocialEntity] = None) -> List[dict]:
+    history: List[dict] = []
+    if obstacle_instance:
+        agent_instance.set_obstacle(obstacle_instance)
+    if social_entity_instance:
+        agent_instance.set_social_entity(social_entity_instance)
+    for t in range(num_steps):
+        hist = agent_instance.step()
+        hist["t"] = t
+        history.append(hist)
+    return history
+def plot_time_series(df, title, metrics):
+    fig, axes = plt.subplots(len(metrics), 1, figsize=(12, 3 * len(metrics)), sharex=True)
+    if len(metrics) == 1:
+        axes = [axes]
+    for i, metric in enumerate(metrics):
+        if metric in df.columns:
+            axes[i].plot(df['t'], df[metric], label=metric)
+            axes[i].set_ylabel(metric)
+            axes[i].legend()
+            axes[i].grid(True)
+        else:
+            axes[i].set_ylabel(metric + ' (N/A)')
+            axes[i].text(0.5, 0.5, f'{metric} not available', ha='center', va='center',
+                         transform=axes[i].transAxes)
+            axes[i].grid(True)
+    axes[-1].set_xlabel("Time Step")
+    fig.suptitle(title, fontsize=16)
+    plt.tight_layout(rect=[0, 0.03, 1, 0.96])
+    return fig
+# --- Simulation Execution ---
+if __name__ == "__main__":
+    NUM_STEPS = 5000
+    all_histories = {}
+    all_dataframes = {}
+    # Re-usable actions
+    entity_actions = [np.array([0, 1]), np.array([1, 0]), np.array([0, -1]), np.array([-1, 0])]
+    # 1. No Learning Baseline
+    print(f"\nRunning 'No Learning' Baseline for {NUM_STEPS} steps...")
+    no_learning_agent = MinimalSelf(seed=123, initial_body_bit_strength=1.0,
+                                    body_bit_decay_rate=0.0, body_bit_reinforce_factor=0.0,
+                                    epsilon=0.0, learning_rate=0.0, reward_type="original")
+    history_no_learning = run_simulation(no_learning_agent, NUM_STEPS)
+    all_histories['no_learning'] = history_no_learning
+    print("Baseline completed.")
+    # 2. Q-Learning Original Reward Simple Environment
+    q_original_simple_agent = MinimalSelf(seed=123, epsilon=0.2, learning_rate=0.1,
+                                          body_bit_reinforce_factor=0.1, body_bit_decay_rate=0.01,
+                                          reward_type="original")
+    history_q_original_simple = run_simulation(q_original_simple_agent, NUM_STEPS)
+    all_histories['q_original_simple'] = history_q_original_simple
+    # 3. Q-Learning Original Reward Complex Environment
+    moving_obstacle = MovingObstacle(start_pos=np.array([0, 0]), actions=entity_actions, seed=43)
+    q_original_complex_agent = MinimalSelf(seed=123, epsilon=0.2, learning_rate=0.1,
+                                           body_bit_reinforce_factor=0.1, body_bit_decay_rate=0.01,
+                                           reward_type="original")
+    history_q_original_complex = run_simulation(q_original_complex_agent, NUM_STEPS,
+                                                obstacle_instance=moving_obstacle)
+    all_histories['q_original_complex'] = history_q_original_complex
+    # 4. Explore & Grow Simple Environment
+    explore_grow_simple_agent = MinimalSelf(seed=123, epsilon=0.2, learning_rate=0.1,
+                                            body_bit_reinforce_factor=0.1, body_bit_decay_rate=0.01,
+                                            reward_type="explore_grow")
+    history_explore_grow_simple = run_simulation(explore_grow_simple_agent, NUM_STEPS)
+    all_histories['explore_grow_simple'] = history_explore_grow_simple
+    # 5. Explore & Grow Complex Environment
+    moving_obstacle2 = MovingObstacle(start_pos=np.array([0, 0]), actions=entity_actions, seed=43)
+    explore_grow_complex_agent = MinimalSelf(seed=123, epsilon=0.2, learning_rate=0.1,
+                                             body_bit_reinforce_factor=0.1, body_bit_decay_rate=0.01,
+                                             reward_type="explore_grow")
+    history_explore_grow_complex = run_simulation(explore_grow_complex_agent, NUM_STEPS,
+                                                  obstacle_instance=moving_obstacle2)
+    all_histories['explore_grow_complex'] = history_explore_grow_complex
+    # 6. Social Simple Environment
+    social_entity_simple = SocialEntity(start_pos=np.array([2, 2]), actions=entity_actions, seed=44)
+    q_social_simple_agent = MinimalSelf(seed=123, epsilon=0.2, learning_rate=0.1,
+                                        body_bit_reinforce_factor=0.1, body_bit_decay_rate=0.01,
+                                        reward_type="social")
+    history_q_social_simple = run_simulation(q_social_simple_agent, NUM_STEPS,
+                                             social_entity_instance=social_entity_simple)
+    all_histories['q_social_simple'] = history_q_social_simple
+    # 7. Social Complex Environment
+    social_entity_complex = SocialEntity(start_pos=np.array([2, 2]), actions=entity_actions, seed=44)
+    moving_obstacle3 = MovingObstacle(start_pos=np.array([0, 0]), actions=entity_actions, seed=43)
+    q_social_complex_agent = MinimalSelf(seed=123, epsilon=0.2, learning_rate=0.1,
+                                         body_bit_reinforce_factor=0.1, body_bit_decay_rate=0.01,
+                                         reward_type="social")
+    history_q_social_complex = run_simulation(q_social_complex_agent, NUM_STEPS,
+                                              obstacle_instance=moving_obstacle3,
+                                              social_entity_instance=social_entity_complex)
+    all_histories['q_social_complex'] = history_q_social_complex
+    # Convert histories to DataFrames
+    for name, history_list in all_histories.items():
+        all_dataframes[f'df_{name}'] = pd.DataFrame(history_list)
+    # Print average metrics
+    print("\n--- Average Metrics Comparison ---")
+    metrics_for_avg = ['predictive_rate', 'C_min', 'body_bit_strength', 'reward']
+    for name, df in all_dataframes.items():
+        print(f"\n{name}:")
+        existing_metrics = [m for m in metrics_for_avg if m in df.columns]
+        print(df[existing_metrics].mean())
+    # Final Phi values
+    print("\n--- Final Phi Values ---")
+    for name, history_list in all_histories.items():
+        final_phi = compute_phi(history_list)
+        print(f"{name}: {final_phi:.2f}")
+    # Example plot for one run
+    metrics_for_plot = ['predictive_rate', 'C_min', 'body_bit_strength', 'reward']
+    plot_time_series(all_dataframes['df_q_original_simple'],
+                     "Q-Learning Original Reward Simple Environment", metrics_for_plot)
+    plt.show()