"""Custom Learning Rule Benchmark ================================== Demonstrates P19 microcode learning engine with custom learning rules. Compares default STDP, anti-STDP, and a custom reward-modulated rule assembled from microcode text mnemonics. Features demonstrated: P19 microcode ISA, assembler, LearningRule, custom rules. """ import sys, os sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) import neurocore as nc from neurocore.microcode import LearningRule def build_network(): """Create a simple pre->post network for learning experiments.""" net = nc.Network() pre = net.population(1, params={"threshold": 100, "leak": 0, "refrac": 0}, label="pre") post = net.population(1, params={"threshold": 100, "leak": 0, "refrac": 0}, label="post") net.connect(pre, post, topology="all_to_all", weight=500) return net, pre, post def get_final_weight(sim): """Extract the weight from the simulator's adjacency table.""" for targets in sim._adjacency.values(): for entry in targets: return entry[1] return None def run_stdp(rule, rule_name, three_factor=False): """Run a learning trial with the given rule.""" net, pre, post = build_network() net.set_learning_rule(rule) sim = nc.Simulator() sim.deploy(net) sim.set_learning(learn=True, three_factor=three_factor) # Generate pre-before-post spike pattern (normally LTP) for _ in range(5): sim.inject(pre, current=200) sim.run(1) # pre spikes sim.run(1) # post receives input, spikes -> LTP correlation if three_factor: sim.reward(500) sim.run(1) final_w = get_final_weight(sim) print(f" {rule_name}: initial=500, final={final_w}") return final_w def main(): print("=" * 60) print(" Custom Learning Rule Benchmark (P19 Microcode)") print("=" * 60) # 1. Default STDP (weight directly modified) print("\n1. Default STDP (pre-before-post = LTP):") rule_stdp = LearningRule.stdp() w_stdp = run_stdp(rule_stdp, "Default STDP") assert w_stdp > 500, "STDP LTP should increase weight" # 2. Anti-STDP (inverted: pre-before-post = LTD) print("\n2. Anti-STDP (inverted correlation):") rule_anti = LearningRule() rule_anti.assemble_ltd(""" SHR R5, R0, 3 ; delta = trace >> 3 SKIP_Z R5 ; skip if zero ADD R2, R2, R5 ; weight += delta (anti-LTD = potentiate) STORE_W R2 HALT """) rule_anti.assemble_ltp(""" SHR R5, R0, 3 ; delta = trace >> 3 SKIP_Z R5 ; skip if zero SUB R2, R2, R5 ; weight -= delta (anti-LTP = depress) STORE_W R2 HALT """) w_anti = run_stdp(rule_anti, "Anti-STDP") assert w_anti < 500, "Anti-STDP should decrease weight for pre-before-post" # 3. Scaled STDP (2x learning rate via SHL) print("\n3. Scaled STDP (2x learning rate):") rule_fast = LearningRule() rule_fast.assemble_ltd(""" SHR R5, R0, 3 ; delta = trace >> 3 SHL R5, R5, 1 ; delta *= 2 (double rate) SKIP_Z R5 SUB R2, R2, R5 STORE_W R2 HALT """) rule_fast.assemble_ltp(""" SHR R5, R0, 3 ; delta = trace >> 3 SHL R5, R5, 1 ; delta *= 2 SKIP_Z R5 ADD R2, R2, R5 STORE_W R2 HALT """) w_fast = run_stdp(rule_fast, "2x STDP") assert w_fast > w_stdp, f"2x STDP ({w_fast}) should be > default ({w_stdp})" # 4. 3-factor eligibility learning (default program) print("\n4. 3-factor eligibility + reward:") rule_3f = LearningRule.three_factor() w_3f = run_stdp(rule_3f, "3-factor STDP", three_factor=True) print(f" (Reward applied: weight change reflects eligibility * reward)") # 5. Custom capped rule (weight bounded to [400, 600]) print("\n5. Capped STDP (weight bounded [400, 600]):") rule_capped = LearningRule() rule_capped.assemble_ltp(""" SHR R5, R0, 3 ; delta = trace >> 3 SKIP_Z R5 ADD R2, R2, R5 ; weight += delta LOADI R4, 600 ; max weight MIN R2, R2, R4 ; clamp to max STORE_W R2 HALT """) rule_capped.assemble_ltd(""" SHR R5, R0, 3 SKIP_Z R5 SUB R2, R2, R5 ; weight -= delta LOADI R4, 400 ; min weight MAX R2, R2, R4 ; clamp to min STORE_W R2 HALT """) w_capped = run_stdp(rule_capped, "Capped STDP") assert 400 <= w_capped <= 600, f"Capped weight should be in [400,600], got {w_capped}" # Summary print("\n--- Summary ---") print(f"Default STDP: {w_stdp:>6d} (LTP: weight increased)") print(f"Anti-STDP: {w_anti:>6d} (inverted: weight decreased)") print(f"2x STDP: {w_fast:>6d} (double learning rate)") print(f"3-Factor: {w_3f:>6d} (eligibility + reward)") print(f"Capped [400,600]: {w_capped:>4d} (bounded)") print("\nAll custom learning rules verified!") print("Done!") if __name__ == "__main__": main()