ahm3texe commited on
Commit
09a9ee3
·
verified ·
1 Parent(s): 0359bf7

Upload 4 files

Browse files
neurodino/brain.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # neurodino/brain.py
3
+ import numpy as np
4
+
5
+ class Brain:
6
+ """
7
+ A simple Feed-Forward Neural Network (MLP).
8
+ Input -> Hidden (Tanh) -> Output (Softmax)
9
+ """
10
+ def __init__(self, input_nodes: int, hidden_nodes: int, output_nodes: int):
11
+ self.input_nodes = input_nodes
12
+ self.hidden_nodes = hidden_nodes
13
+ self.output_nodes = output_nodes
14
+
15
+ # Xavier/Glorot Initialization (Optimal for Tanh)
16
+ # Limit = sqrt(6 / (fan_in + fan_out))
17
+
18
+ # Input -> Hidden
19
+ limit_ih = np.sqrt(6 / (self.input_nodes + self.hidden_nodes))
20
+ self.weights_ih = np.random.uniform(-limit_ih, limit_ih, (self.hidden_nodes, self.input_nodes))
21
+
22
+ # Hidden -> Output
23
+ limit_ho = np.sqrt(6 / (self.hidden_nodes + self.output_nodes))
24
+ self.weights_ho = np.random.uniform(-limit_ho, limit_ho, (self.output_nodes, self.hidden_nodes))
25
+
26
+ # Biases: Initialize symmetrically for Tanh (-0.5 to 0.5)
27
+ self.bias_h = np.random.uniform(-0.5, 0.5, (self.hidden_nodes, 1))
28
+ self.bias_o = np.random.uniform(-0.5, 0.5, (self.output_nodes, 1))
29
+
30
+ # Initialize visualization attributes to prevent AttributeError on first frame
31
+ self.last_inputs = np.zeros(input_nodes)
32
+ self.last_hidden = np.zeros(hidden_nodes)
33
+ self.last_outputs = np.zeros(output_nodes)
34
+
35
+ def predict(self, input_array: list) -> np.ndarray:
36
+ """
37
+ Forward propagation.
38
+ Returns probability distribution for actions.
39
+ """
40
+ # Store for visualization
41
+ self.last_inputs = np.array(input_array)
42
+
43
+ # Convert list to numpy array (column vector)
44
+ inputs = np.array(input_array).reshape(-1, 1)
45
+
46
+ # Input -> Hidden
47
+ hidden = np.dot(self.weights_ih, inputs) # W_ih * x
48
+ hidden = hidden + self.bias_h # b_h
49
+ hidden = np.tanh(hidden) # tanh(...)
50
+ self.last_hidden = hidden.flatten() # Store for viz
51
+
52
+ # Hidden -> Output
53
+ output = np.dot(self.weights_ho, hidden) # W_ho * h
54
+ output = output + self.bias_o # b_o
55
+ output = self.softmax(output) # softmax(...)
56
+ self.last_outputs = output.flatten() # Store for viz
57
+
58
+ return output.flatten()
59
+
60
+ def copy(self) -> 'Brain':
61
+ """Deep copy for genetics."""
62
+ new_brain = Brain(self.input_nodes, self.hidden_nodes, self.output_nodes)
63
+ new_brain.weights_ih = self.weights_ih.copy()
64
+ new_brain.weights_ho = self.weights_ho.copy()
65
+ new_brain.bias_h = self.bias_h.copy()
66
+ new_brain.bias_o = self.bias_o.copy()
67
+ return new_brain
68
+
69
+ def mutate(self, rate: float):
70
+ """
71
+ Adaptive mutation with intelligent parameter tweaking.
72
+
73
+ Features:
74
+ - Reduced shock mutation (3% instead of 10%)
75
+ - Gaussian nudge scales with mutation rate (gentler when rate is low)
76
+ - Preserves learned patterns while allowing exploration
77
+ """
78
+ # Adaptive sigma: When mutation rate is low, nudges are gentler
79
+ # rate=0.20 → sigma=0.15 (standard)
80
+ # rate=0.05 → sigma=0.08 (gentle)
81
+ # rate=0.02 → sigma=0.05 (very gentle)
82
+ sigma = 0.05 + (rate * 0.5) # Range: 0.05 to 0.175
83
+
84
+ # Increased shock rate: 8% to allow more exploration
85
+ # This helps escape local optima while still preserving most good solutions
86
+ shock_rate = 0.08
87
+
88
+ def mutate_val(val):
89
+ if np.random.random() < rate:
90
+ # Rare "Shock" mutation for escaping local optima
91
+ if np.random.random() < shock_rate:
92
+ return np.random.uniform(-1, 1)
93
+
94
+ # Adaptive Gaussian nudge (gentler over time)
95
+ return val + np.random.normal(0, sigma)
96
+ return val
97
+
98
+ v_mutate = np.vectorize(mutate_val)
99
+
100
+ self.weights_ih = v_mutate(self.weights_ih)
101
+ self.weights_ho = v_mutate(self.weights_ho)
102
+ self.bias_h = v_mutate(self.bias_h)
103
+ self.bias_o = v_mutate(self.bias_o)
104
+
105
+ # Activation functions
106
+
107
+
108
+ def softmax(self, x):
109
+ e_x = np.exp(x - np.max(x))
110
+ return e_x / e_x.sum(axis=0)
neurodino/genetics.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # neurodino/genetics.py
3
+ import random
4
+ import numpy as np
5
+ from typing import List, Tuple, Any
6
+ from .brain import Brain
7
+
8
+ class Genetics:
9
+ """
10
+ Manages population evolution: Selection, Crossover, Mutation.
11
+ """
12
+ def __init__(self, population_size: int):
13
+ self.pop_size = population_size
14
+ self.genomes: List[Brain] = []
15
+ self.generation = 1
16
+
17
+ def create_random_population(self, input_size: int, hidden_size: int, output_size: int) -> None:
18
+ """Initializes the population with random brains."""
19
+ self.genomes = []
20
+ for _ in range(self.pop_size):
21
+ brain = Brain(input_size, hidden_size, output_size)
22
+ self.genomes.append(brain)
23
+
24
+ @staticmethod
25
+ def crossover(brain_a: Brain, brain_b: Brain) -> Brain:
26
+ """
27
+ UNIFORM CROSSOVER: Each gene (weight) is randomly copied from Parent A or B.
28
+
29
+ Why NOT Arithmetic Crossover?
30
+ - Averaging weights can destroy learned patterns ("neural lobotomy")
31
+ - Parent A: +0.8 (jump), Parent B: -0.8 (don't jump) → Child: 0.0 (useless)
32
+ - Over generations, population regresses to mean ("gray population")
33
+
34
+ Uniform Crossover preserves exact gene values, just recombines them.
35
+ """
36
+ input_n = brain_a.input_nodes
37
+ hidden_n = brain_a.hidden_nodes
38
+ output_n = brain_a.output_nodes
39
+
40
+ offspring = Brain(input_n, hidden_n, output_n)
41
+
42
+ # UNIFORM CROSSOVER: Each weight randomly from A or B (no blending!)
43
+ # Create random masks (True = from A, False = from B)
44
+ mask_ih = np.random.random(brain_a.weights_ih.shape) < 0.5
45
+ mask_ho = np.random.random(brain_a.weights_ho.shape) < 0.5
46
+ mask_bh = np.random.random(brain_a.bias_h.shape) < 0.5
47
+ mask_bo = np.random.random(brain_a.bias_o.shape) < 0.5
48
+
49
+ # Apply masks: where True take from A, else from B
50
+ offspring.weights_ih = np.where(mask_ih, brain_a.weights_ih, brain_b.weights_ih)
51
+ offspring.weights_ho = np.where(mask_ho, brain_a.weights_ho, brain_b.weights_ho)
52
+ offspring.bias_h = np.where(mask_bh, brain_a.bias_h, brain_b.bias_h)
53
+ offspring.bias_o = np.where(mask_bo, brain_a.bias_o, brain_b.bias_o)
54
+
55
+ return offspring
56
+
57
+ @staticmethod
58
+ def select_parent(population_data: List[Tuple[Brain, float]]) -> Brain:
59
+ """Tournament Selection: Pick random few, return the best."""
60
+ tournament_size = 3
61
+ candidates = random.sample(population_data, min(len(population_data), tournament_size))
62
+ # Sort by fitness (descending)
63
+ candidates.sort(key=lambda x: x[1], reverse=True)
64
+ return candidates[0][0]
65
+
66
+ def evolve_population(self, old_population_data: List[Tuple[Brain, float]],
67
+ generation: int = 1, best_score: int = 0) -> List[Brain]:
68
+ """
69
+ Creates the next generation with ADAPTIVE mutation rates.
70
+
71
+ old_population_data: List of (Brain, fitness_score)
72
+ generation: Current generation number (for decay calculation)
73
+ best_score: Best score achieved so far (for adaptive rate)
74
+ """
75
+ # Sort entire population by fitness
76
+ old_population_data.sort(key=lambda x: x[1], reverse=True)
77
+
78
+ new_population = []
79
+
80
+ # 1. Elitism: Save the champion(s)
81
+ # Keep the absolute best unchanged to prevent regression.
82
+ best_brain = old_population_data[0][0]
83
+ new_population.append(best_brain.copy())
84
+
85
+ if len(old_population_data) > 1:
86
+ new_population.append(old_population_data[1][0].copy())
87
+
88
+ # 2. Calculate ADAPTIVE Mutation Rate
89
+ # Formula: Start high (exploration), decay over time (exploitation)
90
+ #
91
+ # Base Rate: 0.20 (20% of weights mutate)
92
+ # Decay: Exponential decay based on generation
93
+ # Score Bonus: Lower mutation when score is high (protect good solutions)
94
+ #
95
+ # Rate = base * generation_decay * score_factor
96
+ # Minimum: 0.02 (always keep some exploration)
97
+ # Maximum: 0.25 (never go too crazy)
98
+
99
+ base_rate = 0.20
100
+
101
+ # Generation Decay: 0.995^gen → Gen50: 0.78, Gen100: 0.60, Gen200: 0.37
102
+ generation_decay = 0.995 ** generation
103
+
104
+ # Score Factor: High score = lower mutation (protect the genius)
105
+ # At score 0: factor = 1.0
106
+ # At score 1000: factor = 0.67
107
+ # At score 5000: factor = 0.33
108
+ score_factor = 1.0 / (1.0 + best_score / 2000.0)
109
+
110
+ # Final adaptive rate with bounds
111
+ adaptive_rate = base_rate * generation_decay * score_factor
112
+ adaptive_rate = max(0.05, min(0.25, adaptive_rate)) # Clamp to [0.05, 0.25]
113
+
114
+ # 3. Crossover & Mutation with adaptive rate
115
+ while len(new_population) < self.pop_size:
116
+ parent_a = self.select_parent(old_population_data)
117
+ parent_b = self.select_parent(old_population_data)
118
+
119
+ child = self.crossover(parent_a, parent_b)
120
+ child.mutate(adaptive_rate)
121
+ new_population.append(child)
122
+
123
+ self.genomes = new_population
124
+ # Note: Generation counter managed by NeuroRunner, not here
125
+ return new_population
neurodino/neuro_runner.py ADDED
@@ -0,0 +1,630 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # neurodino/neuro_runner.py
3
+ from __future__ import annotations
4
+ import pygame
5
+ import numpy as np
6
+ import math
7
+ import pickle
8
+ import os
9
+ import csv
10
+ import time
11
+ from typing import List, Optional
12
+ from tensorboardX import SummaryWriter # Visualization
13
+
14
+ from pydino.runner import Runner, Config, _get
15
+ from pydino.trex import Status as TrexStatus
16
+ from .neuro_trex import NeuroTrex
17
+ from .genetics import Genetics
18
+ from .brain import Brain
19
+
20
+ # Game Constants
21
+ GAME_HEIGHT = 150 # Game window height for Y normalization
22
+ MAX_OBSTACLE_WIDTH = 75 # Maximum realistic obstacle width for better normalization spread
23
+ MAX_TTI_FRAMES = 50.0 # Maximum Time-to-Impact frames for normalization
24
+ DUCK_THRESHOLD_Y = 75 # Obstacles with yPos < this require ducking (high pterodactyl)
25
+
26
+ class NeuroRunner(Runner):
27
+ """
28
+ Subclass of Runner that manages a population of NeuroTrex agents.
29
+ Overrides the main game loop to simulate multiple agents simultaneously.
30
+ """
31
+ def __init__(self, screen, dimensions, pop_size=50, target_fps=60):
32
+ # Disable audio cues for training to avoid noise/overhead
33
+ super().__init__(screen, dimensions, use_audio=False)
34
+
35
+ self.pop_size = pop_size
36
+ self.target_fps = target_fps
37
+ self.genetics = Genetics(pop_size)
38
+ self.generation = 1
39
+ self.best_distance = 0
40
+ self.best_global_score = 0 # Track all-time best for safe saving
41
+ self.brain_file = "best_brain.pkl"
42
+ self.rendering = True # Default to rendering enabled
43
+
44
+ # Initialize CSV Log
45
+ self.log_file = "training_log.csv"
46
+ if not os.path.exists(self.log_file):
47
+ with open(self.log_file, "w", newline="") as f:
48
+ writer = csv.writer(f)
49
+ writer.writerow(["Generation", "Max_Score", "Avg_Score", "Global_Best"])
50
+ else:
51
+ # Resume generation count from CSV
52
+ try:
53
+ with open(self.log_file, "r") as f:
54
+ lines = list(csv.reader(f))
55
+ if len(lines) > 1: # Header + at least 1 row
56
+ last_row = lines[-1]
57
+ if last_row:
58
+ self.generation = int(last_row[0]) + 1
59
+ print(f"Resuming from Generation {self.generation}")
60
+ except Exception as e:
61
+ print(f"Could not read generation from CSV: {e}")
62
+
63
+ # We need to re-initialize population properly
64
+ self.watching_dino = None
65
+ self._init_population()
66
+
67
+ def set_rendering(self, enabled: bool):
68
+ """Toggle rendering to save resources."""
69
+ self.rendering = enabled
70
+
71
+
72
+ def _init_components(self) -> None:
73
+ """Override to prevent creating a default single Trex."""
74
+ # We call super()._init_components() to setup Horizon, DistanceMeter etc.
75
+ # But we will overwrite self.trex later.
76
+ super()._init_components()
77
+ # Create a dummy list if needed, but _init_population handles the real deal
78
+ self.population: List[NeuroTrex] = []
79
+
80
+ def _init_population(self):
81
+ """Create trexes for current genomes."""
82
+ # Try to load saved brain
83
+ self.best_brain = None # Initialize to avoid AttributeError
84
+ loaded_score = 0
85
+
86
+ # Load best brain if exists
87
+ if os.path.exists(self.brain_file):
88
+ try:
89
+ with open(self.brain_file, "rb") as f:
90
+ data = pickle.load(f)
91
+
92
+ if isinstance(data, tuple):
93
+ self.best_brain, saved_score = data
94
+ self.best_global_score = saved_score
95
+ self.best_distance = saved_score # Sync UI
96
+ print(f"Loaded best_brain.pkl! High Score: {self.best_global_score}")
97
+ else:
98
+ print("Loaded legacy brain file (no score). Resetting score.")
99
+ self.best_brain = data
100
+ self.best_global_score = 0
101
+ except (EOFError, pickle.UnpicklingError):
102
+ print("Error loading brain file. Starting fresh.")
103
+
104
+ # Initialize TensorBoard Writers
105
+
106
+ # 1. Full History (Static Name -> Single Color, Continuous)
107
+ self.writer_full = SummaryWriter(log_dir="runs/dino-full-train")
108
+
109
+ # 2. Session Log (Dynamic Name -> Multi Color, Segmented)
110
+ timestamp = int(time.time())
111
+ log_dir_session = f"runs/dino-train-{timestamp}"
112
+ self.writer_session = SummaryWriter(log_dir=log_dir_session)
113
+
114
+ print(f"TensorBoard logging to:\n - {log_dir_session}\n - runs/dino-full-train")
115
+
116
+ # If we have a saved brain, populate with mutations of it
117
+ if self.best_brain:
118
+ self.genetics.genomes = []
119
+ # Keep one exact copy (Elitism) check
120
+ self.genetics.genomes.append(self.best_brain.copy())
121
+ # Fill the rest with mutated versions
122
+ for _ in range(self.pop_size - 1):
123
+ child = self.best_brain.copy()
124
+ child.mutate(0.20) # Apply mutation to diversify
125
+ self.genetics.genomes.append(child)
126
+
127
+ # Check if we have genomes (first run vs next gen)
128
+ if not self.genetics.genomes:
129
+ # First gen: Create random brains
130
+ # Inputs: 12 (Enhanced for Immortal Play)
131
+ # Obs1[TTI, Action, Width], Obs2[TTI, Action, Width]
132
+ # Speed, Gap, DinoY, DinoVelocity, Airborne, Ducking
133
+ # Outputs: 3 (Jump, Duck, Run)
134
+ # Hidden: 48 (Increased capacity for complex timing)
135
+ self.genetics.create_random_population(input_size=12, hidden_size=48, output_size=3)
136
+
137
+ self.population = []
138
+ for i in range(self.genetics.pop_size):
139
+ # Create our NeuroTrex
140
+ t = NeuroTrex(self.screen, self.sprite_def["tRex"], self)
141
+ t.brain = self.genetics.genomes[i]
142
+ t.index = i
143
+ self.population.append(t)
144
+
145
+ # Point self.trex to the first one for compatibility with some Runner methods
146
+ if self.population:
147
+ self.trex = self.population[0]
148
+
149
+ def _get_inputs(self, dino):
150
+ """
151
+ 12-Input System: "Immortal Vision" (Perfect Information)
152
+
153
+ OBSTACLE INFORMATION:
154
+ 1. Obs1 TTI (Time-to-Impact, 1.0 = imminent danger)
155
+ 2. Obs1 Action (0.0 = JUMP, 1.0 = DUCK needed)
156
+ 3. Obs1 Width (normalized)
157
+ 4. Obs2 TTI
158
+ 5. Obs2 Action
159
+ 6. Obs2 Width
160
+
161
+ GAME STATE:
162
+ 7. Game Speed (0.0-1.0, normalized to maxSpeed)
163
+ 8. Gap Risk (1.0 = back-to-back obstacles)
164
+
165
+ DINO STATE (Critical for precise timing!):
166
+ 9. Dino Y Position (0.0 = ground, 1.0 = max jump height)
167
+ 10. Dino Velocity (negative = rising, positive = falling)
168
+ 11. Is Airborne (1.0 = in the air)
169
+ 12. Is Ducking (1.0 = currently ducking)
170
+
171
+ IMMORTALITY LOGIC:
172
+ - AI knows exactly where dino is in the jump arc
173
+ - Can time fast-drops precisely
174
+ - Can decide if there's time to duck after landing
175
+ """
176
+ speed = self.current_speed / self.config.maxSpeed
177
+
178
+ # DINO STATE - Critical for immortal play
179
+ ground_y = dino.groundYPos # ~93
180
+ max_jump = dino.config.maxJumpHeight # ~30
181
+
182
+ # Normalize dino Y: 0.0 = ground, 1.0 = max height
183
+ dino_y_normalized = 0.0
184
+ if dino.jumping:
185
+ height_above_ground = ground_y - dino.yPos
186
+ dino_y_normalized = min(1.0, max(0.0, height_above_ground / max_jump))
187
+
188
+ # Jump velocity: negative = rising, positive = falling
189
+ # Normalize to [-1, 1] range (tanh-friendly)
190
+ dino_velocity = 0.0
191
+ if dino.jumping:
192
+ # Typical velocity range is about -10 to +10
193
+ dino_velocity = max(-1.0, min(1.0, dino.jumpVelocity / 10.0))
194
+
195
+ is_airborne = 1.0 if dino.jumping else 0.0
196
+ is_ducking = 1.0 if dino.ducking else 0.0
197
+
198
+ # Default values (No obstacles = SAFE)
199
+ obs1_dist = 0.0
200
+ obs1_action = 0.0
201
+ obs1_w = 0.0
202
+
203
+ obs2_dist = 0.0
204
+ obs2_action = 0.0
205
+ obs2_w = 0.0
206
+
207
+ gap = 0.0
208
+
209
+ if self.horizon and self.horizon.obstacles:
210
+ dino_front = dino.xPos
211
+
212
+ future_obstacles = [o for o in self.horizon.obstacles
213
+ if o.xPos > dino_front]
214
+ future_obstacles.sort(key=lambda o: o.xPos)
215
+
216
+ # --- Obstacle 1 ---
217
+ if len(future_obstacles) > 0:
218
+ o1 = future_obstacles[0]
219
+ dist1 = o1.xPos - dino.xPos
220
+
221
+ tti1 = dist1 / max(1.0, self.current_speed)
222
+ obs1_dist = 1.0 - min(1.0, tti1 / MAX_TTI_FRAMES)
223
+
224
+ obs1_action = 1.0 if o1.yPos < DUCK_THRESHOLD_Y else 0.0
225
+ obs1_w = min(1.0, o1.width / MAX_OBSTACLE_WIDTH)
226
+
227
+ # --- Obstacle 2 ---
228
+ if len(future_obstacles) > 1:
229
+ o2 = future_obstacles[1]
230
+ dist2 = o2.xPos - dino.xPos
231
+
232
+ tti2 = dist2 / max(1.0, self.current_speed)
233
+ obs2_dist = 1.0 - min(1.0, tti2 / MAX_TTI_FRAMES)
234
+
235
+ obs2_action = 1.0 if o2.yPos < DUCK_THRESHOLD_Y else 0.0
236
+ obs2_w = min(1.0, o2.width / MAX_OBSTACLE_WIDTH)
237
+
238
+ raw_gap = o2.xPos - (o1.xPos + o1.width)
239
+ time_gap = raw_gap / max(1.0, self.current_speed)
240
+ gap = 1.0 - min(1.0, time_gap / 15.0)
241
+
242
+ return np.array([
243
+ obs1_dist,
244
+ obs1_action,
245
+ obs1_w,
246
+ obs2_dist,
247
+ obs2_action,
248
+ obs2_w,
249
+ speed,
250
+ gap,
251
+ dino_y_normalized, # NEW: Where is dino in jump arc?
252
+ dino_velocity, # NEW: Rising or falling?
253
+ is_airborne,
254
+ is_ducking # NEW: Currently ducking?
255
+ ])
256
+
257
+ def update(self) -> None:
258
+ """
259
+ Overridden game loop.
260
+ Adapted from Runner.update but for multiple agents.
261
+ """
262
+ now = pygame.time.get_ticks()
263
+ # We ignore actual wall-clock delta to enforce deterministic fixed time step.
264
+ # This ensures Cloud (frame-based) and Dino (time-based) remain in sync
265
+ # regardless of training speed (FPS).
266
+
267
+ # Calculate delta based on target FPS
268
+ delta = 1000.0 / self.target_fps
269
+
270
+ # Speed scale for frame-based objects (Clouds)
271
+ # If FPS is 120, scale should be 0.5 (move half distance per frame)
272
+ speed_scale = 60.0 / self.target_fps
273
+
274
+ self.time_ms = now
275
+
276
+ # 1. AI Updates (Think & Act)
277
+ if self.playing and not self.crashed:
278
+ active_dinos = [d for d in self.population if d.status != TrexStatus.CRASHED]
279
+ for dino in active_dinos:
280
+ dino.fitness = self.distance_ran
281
+ inputs = self._get_inputs(dino)
282
+ outputs = dino.brain.predict(inputs)
283
+ action = np.argmax(outputs)
284
+ dino.act(action)
285
+ dino.update(delta) # Apply animation state
286
+
287
+ # Apply jump physics if jumping
288
+ if dino.jumping:
289
+ dino.updateJump(delta)
290
+
291
+ # Debug Probe: Print Brain state if Leader is in danger
292
+ if self.watching_dino and self.watching_dino.status != TrexStatus.CRASHED:
293
+ # Re-calculate partial input to check checks
294
+ # Note: This is expensive if done every frame, but fine for debugging 1 agent
295
+ if self.watching_dino in active_dinos:
296
+ # Check direct inputs
297
+ dummy_inputs = self._get_inputs(self.watching_dino)
298
+
299
+
300
+ # 2. Physics & Logic
301
+ if self.rendering:
302
+ self.screen.fill((247, 247, 247))
303
+
304
+ if self.playing:
305
+ self.running_time += delta
306
+
307
+ has_obstacles = self.running_time > self.config.clearTime
308
+ # Apply speed_scale to horizon update (affects Clouds/Ground scroll per frame)
309
+ # Note: Horizon.update might draw internally depending on implementation,
310
+ # but we can't easily stop it without modifying pydino.
311
+ self.horizon.update(delta, self.current_speed * speed_scale, has_obstacles, False)
312
+
313
+ if self.rendering:
314
+ if self.playing:
315
+ self.distance_meter.update(delta, math.ceil(self.distance_ran))
316
+ else:
317
+ self.distance_meter.update(0, math.ceil(self.distance_ran))
318
+ else:
319
+ # Still need to update distance state even if not drawing?
320
+ # actually distance_meter.update does logic like score calc?
321
+ # Usually only drawing. Distance ran is tracked in self.distance_ran
322
+ pass
323
+
324
+ # Draw Logic: Sticky Camera (Prevents flickering)
325
+ if not hasattr(self, "watching_dino"):
326
+ self.watching_dino = None
327
+
328
+ if self.watching_dino is None or self.watching_dino.status == TrexStatus.CRASHED:
329
+ active_dinos = [d for d in self.population if d.status != TrexStatus.CRASHED]
330
+ if active_dinos:
331
+ # Watch the best performing dino (highest fitness = longest survival)
332
+ self.watching_dino = max(active_dinos, key=lambda d: d.fitness)
333
+ else:
334
+ self.watching_dino = None
335
+
336
+ # Note: dino.update() already called in AI loop (line 245)
337
+ # Only handle crashed dino position updates here
338
+ for dino in self.population:
339
+ if dino.status == TrexStatus.CRASHED:
340
+ dino.xPos -= self.current_speed * (delta / self.ms_per_frame)
341
+
342
+ # Draw Logic: Manual Smart Draw
343
+ # We manually fetch the correct animation frame and draw ONCE.
344
+ if self.rendering and self.watching_dino and self.watching_dino.status != TrexStatus.CRASHED:
345
+ try:
346
+ # 1. Temporarily enable drawing
347
+ self.watching_dino.visible = True
348
+
349
+ # 2. Calculate correct frame x-coordinate
350
+ # Trex.update calculates this internally, but doesn't expose 'sprite_x_to_draw' publicly
351
+ # So we re-calculate it from the state variables.
352
+ frames = self.watching_dino.currentAnimFrames
353
+ if frames:
354
+ idx = self.watching_dino.currentFrameIndex % len(frames)
355
+ x_pos = frames[idx]
356
+ self.watching_dino.draw(x_pos, 0)
357
+
358
+ # 3. Disable drawing again
359
+ self.watching_dino.visible = False
360
+ except Exception as e:
361
+ # Log for debugging instead of silent fail
362
+ print(f"Warning: Drawing error for watching_dino: {e}")
363
+
364
+ # 4. Collision Detection - Check ALL visible obstacles
365
+ if self.playing and not self.crashed:
366
+ if has_obstacles and self.horizon.obstacles:
367
+ for dino in self.population:
368
+ if dino.status == TrexStatus.CRASHED:
369
+ continue
370
+
371
+ # Check collision with ALL visible obstacles, not just the first
372
+ for obstacle in self.horizon.obstacles:
373
+ if self._check_for_collision(obstacle, dino):
374
+ dino.update(100, TrexStatus.CRASHED)
375
+ dino.fitness = self.distance_ran
376
+ break # No need to check more obstacles for this dino
377
+
378
+ # Check if anyone is alive
379
+ alive_count = sum(1 for d in self.population if d.status != TrexStatus.CRASHED)
380
+ if alive_count == 0:
381
+ self.crashed = True
382
+ self.start_next_generation()
383
+ else:
384
+ self.distance_ran += self.current_speed * (delta / self.ms_per_frame)
385
+ if self.current_speed < self.config.maxSpeed:
386
+ self.current_speed += self.config.acceleration
387
+
388
+ # 5. Draw Stats
389
+ if self.rendering:
390
+ self._draw_overlay(alive_count if 'alive_count' in locals() else 0)
391
+
392
+ # 6. Draw Brain Visualization
393
+ if self.rendering and self.watching_dino and self.watching_dino.status != TrexStatus.CRASHED:
394
+ self._draw_brain(self.watching_dino.brain)
395
+
396
+ def _draw_overlay(self, alive_count):
397
+ try:
398
+ font = pygame.font.Font(None, 24)
399
+ # Use max of recorded best or current generation's best if we are tracking it differently
400
+ display_best = max(self.best_global_score, int(self.best_distance))
401
+ stats = [
402
+ f"Gen: {self.generation}",
403
+ f"Alive: {alive_count}/{self.pop_size}",
404
+ f"Best: {display_best}",
405
+ f"Speed: {self.current_speed:.1f}"
406
+ ]
407
+ for i, line in enumerate(stats):
408
+ txt = font.render(line, True, (80, 80, 80))
409
+ self.screen.blit(txt, (10, 10 + i * 20))
410
+ except:
411
+ pass
412
+
413
+ def start_next_generation(self):
414
+ """Evolve and restart."""
415
+ # 1. Collect fitness data
416
+ pop_data = []
417
+ gen_max = 0
418
+ for dino in self.population:
419
+ gen_max = max(gen_max, dino.fitness)
420
+ pop_data.append((dino.brain, dino.fitness))
421
+
422
+ # Sync high score to GUI if possible
423
+ if hasattr(self.distance_meter, "highScore"):
424
+ self.distance_meter.highScore = self.best_distance
425
+
426
+ # Report Score (matched to game UI: pixels * 0.025)
427
+ current_score = int(gen_max * 0.025)
428
+ print(f"Gen {self.generation} Done. Max Score: {current_score}")
429
+
430
+ # Save Best Brain (ONLY IF REKOR KIRILDI)
431
+ if pop_data:
432
+ pop_data.sort(key=lambda x: x[1], reverse=True)
433
+ best_brain = pop_data[0][0]
434
+
435
+ # Update global best score if beaten
436
+ if current_score > self.best_global_score:
437
+ previous_best = self.best_global_score # Keep track for logging
438
+ self.best_global_score = current_score
439
+ self.best_distance = current_score # Sync for UI
440
+ print(f"🏆 NEW RECORD! (Was: {previous_best} -> Now: {current_score})")
441
+
442
+ # 1. Main Save (Overwrite)
443
+ with open("best_brain.pkl", "wb") as f:
444
+ pickle.dump((best_brain, current_score), f)
445
+
446
+ # 2. Backup Save (History)
447
+ if not os.path.exists("backups"):
448
+ os.makedirs("backups")
449
+
450
+ backup_filename = f"backups/brain_score_{current_score}.pkl"
451
+ with open(backup_filename, "wb") as f:
452
+ pickle.dump((best_brain, current_score), f)
453
+ print(f" Saved backup: {backup_filename}")
454
+
455
+ else:
456
+ # Optional: print current best to show we are safe
457
+ pass
458
+
459
+ # 1.5 Log to CSV
460
+ avg_score = sum(d.fitness for d in self.population) / len(self.population)
461
+ avg_score = int(avg_score * 0.025) # Convert to game score units
462
+
463
+ with open(self.log_file, "a", newline="") as f:
464
+ writer = csv.writer(f)
465
+ writer.writerow([
466
+ self.generation,
467
+ current_score,
468
+ avg_score,
469
+ self.best_global_score
470
+ ])
471
+
472
+ # 1.6 TensorBoard Logging (Dual Write)
473
+ # Log to Session (Color Segment)
474
+ if self.writer_session:
475
+ # SCALARS
476
+ self.writer_session.add_scalar("Score/Max", current_score, self.generation)
477
+ self.writer_session.add_scalar("Score/Average", avg_score, self.generation)
478
+ self.writer_session.add_scalar("Score/Global_Best", self.best_global_score, self.generation)
479
+ self.writer_session.add_scalar("Performance/Game_Speed", self.current_speed, self.generation)
480
+
481
+ # HISTOGRAMS
482
+ self.writer_session.add_histogram("Weights/Input_Hidden", best_brain.weights_ih, self.generation)
483
+ self.writer_session.add_histogram("Weights/Hidden_Output", best_brain.weights_ho, self.generation)
484
+ self.writer_session.add_histogram("Biases/Hidden", best_brain.bias_h, self.generation)
485
+ self.writer_session.add_histogram("Biases/Output", best_brain.bias_o, self.generation)
486
+
487
+ # Log to Full History (Continuous Segment)
488
+ if self.writer_full:
489
+ # SCALARS
490
+ self.writer_full.add_scalar("Score/Max", current_score, self.generation)
491
+ self.writer_full.add_scalar("Score/Average", avg_score, self.generation)
492
+ self.writer_full.add_scalar("Score/Global_Best", self.best_global_score, self.generation)
493
+ self.writer_full.add_scalar("Performance/Game_Speed", self.current_speed, self.generation)
494
+
495
+ # HISTOGRAMS
496
+ self.writer_full.add_histogram("Weights/Input_Hidden", best_brain.weights_ih, self.generation)
497
+ self.writer_full.add_histogram("Weights/Hidden_Output", best_brain.weights_ho, self.generation)
498
+ self.writer_full.add_histogram("Biases/Hidden", best_brain.bias_h, self.generation)
499
+ self.writer_full.add_histogram("Biases/Output", best_brain.bias_o, self.generation)
500
+
501
+ # Calculate current adaptive mutation rate for logging
502
+ # (Same formula as in Genetics.evolve_population)
503
+ base_rate = 0.20
504
+ generation_decay = 0.995 ** self.generation
505
+ score_factor = 1.0 / (1.0 + self.best_global_score / 2000.0)
506
+ current_mutation_rate = max(0.02, min(0.25, base_rate * generation_decay * score_factor))
507
+
508
+ # Log mutation rate to TensorBoard
509
+ if self.writer_session:
510
+ self.writer_session.add_scalar("Evolution/Mutation_Rate", current_mutation_rate, self.generation)
511
+ if self.writer_full:
512
+ self.writer_full.add_scalar("Evolution/Mutation_Rate", current_mutation_rate, self.generation)
513
+
514
+ # 2. Evolve with ADAPTIVE mutation rate
515
+ new_genomes = self.genetics.evolve_population(
516
+ pop_data,
517
+ generation=self.generation,
518
+ best_score=self.best_global_score
519
+ )
520
+
521
+ # 3. Reset Game State
522
+ self.generation += 1
523
+ self.crashed = False
524
+ self.playing = True
525
+ self.distance_ran = 0
526
+ self.current_speed = self.config.speed
527
+
528
+ self.horizon.reset()
529
+ self.distance_meter.reset()
530
+
531
+ # 4. Re-create population
532
+ self.population = []
533
+ for i in range(self.genetics.pop_size):
534
+ t = NeuroTrex(self.screen, self.sprite_def["tRex"], self)
535
+ t.brain = new_genomes[i]
536
+ t.index = i
537
+ self.population.append(t)
538
+
539
+ self.trex = self.population[0]
540
+ self.watching_dino = self.population[0] # Start watching the first one
541
+ self.activated = True # Skip intro
542
+
543
+ def _draw_brain(self, brain: Brain):
544
+ """Draws the neural network visualization."""
545
+ if not hasattr(brain, "last_inputs") or not hasattr(brain, "last_hidden"):
546
+ return
547
+
548
+ start_y = 160
549
+ w = self.screen.get_width()
550
+ h = self.screen.get_height() - start_y
551
+
552
+ # Background for dashboard
553
+ surf = pygame.Surface((w, h))
554
+ surf.fill((30, 30, 30)) # Dark Grey
555
+ self.screen.blit(surf, (0, start_y))
556
+
557
+ # Layout positions
558
+ layer_x = [50, 300, 550] # Input, Hidden, Output X coords
559
+
560
+ # Node positions
561
+ input_y = np.linspace(start_y + 40, start_y + h - 40, brain.input_nodes)
562
+ hidden_y = np.linspace(start_y + 20, start_y + h - 20, brain.hidden_nodes)
563
+ output_y = np.linspace(start_y + 60, start_y + h - 60, brain.output_nodes)
564
+
565
+ # Labels
566
+ input_labels = ["O1 TTI", "O1 Act", "O1 W", "O2 TTI", "O2 Act", "O2 W", "Speed", "Gap", "DinoY", "DinoVel", "Air", "Duck"]
567
+ output_labels = ["Jump", "Duck", "Run"]
568
+
569
+ font = pygame.font.Font(None, 20)
570
+
571
+ def get_color(val):
572
+ """Green for high activation, Fade for low."""
573
+ v = max(0, min(1, val))
574
+ return (int(v*255), int(v*255), int(v*255))
575
+
576
+ # 1. Draw Weights
577
+ # IH Weights
578
+ for i in range(brain.input_nodes):
579
+ for j in range(brain.hidden_nodes):
580
+ weight = brain.weights_ih[j][i]
581
+ color = (255, 50, 50) if weight < 0 else (50, 255, 50)
582
+ width = max(1, int(abs(weight) * 3))
583
+ if abs(weight) > 0.1: # Optimize drawing
584
+ pygame.draw.line(self.screen, color, (layer_x[0], int(input_y[i])), (layer_x[1], int(hidden_y[j])), width)
585
+
586
+ # HO Weights
587
+ for j in range(brain.hidden_nodes):
588
+ for k in range(brain.output_nodes):
589
+ weight = brain.weights_ho[k][j]
590
+ color = (255, 50, 50) if weight < 0 else (50, 255, 50)
591
+ width = max(1, int(abs(weight) * 3))
592
+ if abs(weight) > 0.1:
593
+ pygame.draw.line(self.screen, color, (layer_x[1], int(hidden_y[j])), (layer_x[2], int(output_y[k])), width)
594
+
595
+ # 2. Draw Nodes
596
+ # Input Nodes
597
+ for i, val in enumerate(brain.last_inputs):
598
+ color = get_color(val)
599
+ pos = (layer_x[0], int(input_y[i]))
600
+ pygame.draw.circle(self.screen, color, pos, 10)
601
+ pygame.draw.circle(self.screen, (200,200,200), pos, 10, 1)
602
+
603
+ # Label
604
+ lbl = font.render(f"{input_labels[i]}:{val:.2f}", True, (200,200,200))
605
+ self.screen.blit(lbl, (pos[0]-40, pos[1]-20))
606
+
607
+ # Hidden Nodes
608
+ for i, val in enumerate(brain.last_hidden):
609
+ # Tanh outputs [-1, 1], normalize to [0, 1] for visualization
610
+ normalized = (val + 1.0) / 2.0 # Maps -1→0, 0→0.5, 1→1
611
+ color = get_color(normalized)
612
+ pos = (layer_x[1], int(hidden_y[i]))
613
+ pygame.draw.circle(self.screen, color, pos, 8)
614
+ pygame.draw.circle(self.screen, (200,200,200), pos, 8, 1)
615
+
616
+ # Output Nodes
617
+ max_idx = np.argmax(brain.last_outputs)
618
+ for i, val in enumerate(brain.last_outputs):
619
+ color = (0, 255, 0) if i == max_idx else (100, 100, 100) # Highlight decision
620
+ pos = (layer_x[2], int(output_y[i]))
621
+
622
+ # Radius reflects confidence
623
+ radius = 10 + int(val * 10)
624
+ pygame.draw.circle(self.screen, color, pos, radius)
625
+ pygame.draw.circle(self.screen, (255,255,255), pos, radius, 2)
626
+
627
+ # Label
628
+ lbl_txt = f"{output_labels[i]} ({val:.1%})"
629
+ lbl = font.render(lbl_txt, True, (255,255,255))
630
+ self.screen.blit(lbl, (pos[0]+25, pos[1]-5))
neurodino/neuro_trex.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # neurodino/neuro_trex.py
3
+ from __future__ import annotations
4
+ import sys
5
+ import os
6
+
7
+ # Ensure pydino is importable
8
+ # Assuming this script is run from project root, 'pydino' is a top-level package.
9
+ # If run locally, we might need path hack.
10
+ from pydino.trex import Trex, Status
11
+ from typing import Optional, TYPE_CHECKING
12
+ if TYPE_CHECKING:
13
+ from .brain import Brain
14
+
15
+ class NeuroTrex(Trex):
16
+ """
17
+ AI-controlled T-Rex.
18
+ Overrides input handling to listen to the neural network instead of keyboard.
19
+ """
20
+ def __init__(self, screen, sprite_def, runner):
21
+ self.visible = False # Initialize before super() because super calls update->draw
22
+ super().__init__(screen, sprite_def, runner)
23
+ self.runner = runner
24
+ self.brain: Optional[Brain] = None
25
+ self.fitness: float = 0.0
26
+ self.is_alive: bool = True
27
+
28
+ # Force running state immediately for AI (properly init frames)
29
+ self.update(0, Status.RUNNING)
30
+
31
+ def draw(self, x, y):
32
+ """Only draw if visible."""
33
+ if self.visible:
34
+ super().draw(x, y)
35
+
36
+ def handle_event(self, event):
37
+ """Disable manual input."""
38
+ pass
39
+
40
+ def act(self, action: int):
41
+ """
42
+ Execute action decided by the Brain.
43
+ 0: JUMP
44
+ 1: DUCK
45
+ 2: RUN (Do nothing)
46
+ """
47
+ if self.status == Status.CRASHED:
48
+ return
49
+
50
+ # 0: JUMP
51
+ if action == 0:
52
+ if not self.jumping and not self.ducking:
53
+ self.startJump(self.runner.current_speed)
54
+
55
+ # 1: DUCK
56
+ elif action == 1:
57
+ if self.jumping:
58
+ # Fast drop
59
+ self.setSpeedDrop()
60
+ elif not self.ducking:
61
+ self.setDuck(True)
62
+
63
+ # 2: RUN
64
+ else:
65
+ if self.ducking:
66
+ self.setDuck(False)