Consolidate CPU modules into single core.py

Browse files

Merge state.py, cycle.py, and threshold_cpu.py into cpu/core.py.

Files changed (3) hide show

cpu/{threshold_cpu.py → core.py} +279 -70
cpu/cycle.py +0 -148
cpu/state.py +0 -103

cpu/{threshold_cpu.py → core.py} RENAMED Viewed

@@ -1,29 +1,41 @@
 """
-Threshold-weight runtime for the 8-bit CPU.
-Implements a reference cycle using the frozen circuit weights for core ALU ops.
 """
 from __future__ import annotations
 from pathlib import Path
 from typing import List, Tuple
 import torch
 from safetensors.torch import load_file
-from .state import CPUState, pack_state, unpack_state, REG_BITS, PC_BITS, MEM_BYTES
-def heaviside(x: torch.Tensor) -> torch.Tensor:
-    return (x >= 0).float()
-def int_to_bits_msb(value: int, width: int) -> List[int]:
     return [(value >> (width - 1 - i)) & 1 for i in range(width)]
-def bits_to_int_msb(bits: List[int]) -> int:
     value = 0
     for bit in bits:
         value = (value << 1) | int(bit)
@@ -34,6 +46,217 @@ def bits_msb_to_lsb(bits: List[int]) -> List[int]:
     return list(reversed(bits))
 DEFAULT_MODEL_PATH = Path(__file__).resolve().parent.parent / "neural_computer.safetensors"
@@ -78,8 +301,8 @@ class ThresholdALU:
         return ha2_sum, cout
     def add(self, a: int, b: int) -> Tuple[int, int, int]:
-        a_bits = bits_msb_to_lsb(int_to_bits_msb(a, REG_BITS))
-        b_bits = bits_msb_to_lsb(int_to_bits_msb(b, REG_BITS))
         carry = 0.0
         sum_bits: List[int] = []
@@ -89,16 +312,16 @@ class ThresholdALU:
             )
             sum_bits.append(int(sum_bit))
-        result = bits_to_int_msb(list(reversed(sum_bits)))
         carry_out = int(carry)
         overflow = 1 if (((a ^ result) & (b ^ result)) & 0x80) else 0
         return result, carry_out, overflow
     def sub(self, a: int, b: int) -> Tuple[int, int, int]:
-        a_bits = bits_msb_to_lsb(int_to_bits_msb(a, REG_BITS))
-        b_bits = bits_msb_to_lsb(int_to_bits_msb(b, REG_BITS))
-        carry = 1.0  # two's complement carry-in
         sum_bits: List[int] = []
         for bit in range(REG_BITS):
             notb = self._eval_gate(
@@ -128,14 +351,14 @@ class ThresholdALU:
             sum_bits.append(int(xor2))
-        result = bits_to_int_msb(list(reversed(sum_bits)))
         carry_out = int(carry)
         overflow = 1 if (((a ^ b) & (a ^ result)) & 0x80) else 0
         return result, carry_out, overflow
     def bitwise_and(self, a: int, b: int) -> int:
-        a_bits = int_to_bits_msb(a, REG_BITS)
-        b_bits = int_to_bits_msb(b, REG_BITS)
         w = self._get("alu.alu8bit.and.weight")
         bias = self._get("alu.alu8bit.and.bias")
@@ -145,11 +368,11 @@ class ThresholdALU:
             out = heaviside((inp * w[bit * 2:bit * 2 + 2]).sum() + bias[bit]).item()
             out_bits.append(int(out))
-        return bits_to_int_msb(out_bits)
     def bitwise_or(self, a: int, b: int) -> int:
-        a_bits = int_to_bits_msb(a, REG_BITS)
-        b_bits = int_to_bits_msb(b, REG_BITS)
         w = self._get("alu.alu8bit.or.weight")
         bias = self._get("alu.alu8bit.or.bias")
@@ -159,10 +382,10 @@ class ThresholdALU:
             out = heaviside((inp * w[bit * 2:bit * 2 + 2]).sum() + bias[bit]).item()
             out_bits.append(int(out))
-        return bits_to_int_msb(out_bits)
     def bitwise_not(self, a: int) -> int:
-        a_bits = int_to_bits_msb(a, REG_BITS)
         w = self._get("alu.alu8bit.not.weight")
         bias = self._get("alu.alu8bit.not.bias")
@@ -172,11 +395,11 @@ class ThresholdALU:
             out = heaviside((inp * w[bit]).sum() + bias[bit]).item()
             out_bits.append(int(out))
-        return bits_to_int_msb(out_bits)
     def bitwise_xor(self, a: int, b: int) -> int:
-        a_bits = int_to_bits_msb(a, REG_BITS)
-        b_bits = int_to_bits_msb(b, REG_BITS)
         w_or = self._get("alu.alu8bit.xor.layer1.or.weight")
         b_or = self._get("alu.alu8bit.xor.layer1.or.bias")
@@ -194,7 +417,7 @@ class ThresholdALU:
             out = heaviside((hidden * w2[bit * 2:bit * 2 + 2]).sum() + b2[bit]).item()
             out_bits.append(int(out))
-        return bits_to_int_msb(out_bits)
 class ThresholdCPU:
@@ -202,24 +425,8 @@ class ThresholdCPU:
         self.device = device
         self.alu = ThresholdALU(str(model_path), device=device)
-    @staticmethod
-    def decode_ir(ir: int) -> Tuple[int, int, int, int]:
-        opcode = (ir >> 12) & 0xF
-        rd = (ir >> 10) & 0x3
-        rs = (ir >> 8) & 0x3
-        imm8 = ir & 0xFF
-        return opcode, rd, rs, imm8
-    @staticmethod
-    def flags_from_result(result: int, carry: int, overflow: int) -> List[int]:
-        z = 1 if result == 0 else 0
-        n = 1 if (result & 0x80) else 0
-        c = 1 if carry else 0
-        v = 1 if overflow else 0
-        return [z, n, c, v]
     def _addr_decode(self, addr: int) -> torch.Tensor:
-        bits = torch.tensor(int_to_bits_msb(addr, PC_BITS), device=self.device, dtype=torch.float32)
         w = self.alu._get("memory.addr_decode.weight")
         b = self.alu._get("memory.addr_decode.bias")
         return heaviside((w * bits).sum(dim=1) + b)
@@ -227,7 +434,7 @@ class ThresholdCPU:
     def _memory_read(self, mem: List[int], addr: int) -> int:
         sel = self._addr_decode(addr)
         mem_bits = torch.tensor(
-            [int_to_bits_msb(byte, REG_BITS) for byte in mem],
             device=self.device,
             dtype=torch.float32,
         )
@@ -243,13 +450,13 @@ class ThresholdCPU:
             out_bit = heaviside((and_out * or_w[bit]).sum() + or_b[bit]).item()
             out_bits.append(int(out_bit))
-        return bits_to_int_msb(out_bits)
     def _memory_write(self, mem: List[int], addr: int, value: int) -> List[int]:
         sel = self._addr_decode(addr)
-        data_bits = torch.tensor(int_to_bits_msb(value, REG_BITS), device=self.device, dtype=torch.float32)
         mem_bits = torch.tensor(
-            [int_to_bits_msb(byte, REG_BITS) for byte in mem],
             device=self.device,
             dtype=torch.float32,
         )
@@ -283,11 +490,11 @@ class ThresholdCPU:
             out_bit = heaviside((or_inp * or_w[:, bit]).sum(dim=1) + or_b[:, bit])
             new_mem_bits[:, bit] = out_bit
-        return [bits_to_int_msb([int(b) for b in new_mem_bits[i].tolist()]) for i in range(MEM_BYTES)]
     def _conditional_jump_byte(self, prefix: str, pc_byte: int, target_byte: int, flag: int) -> int:
-        pc_bits = int_to_bits_msb(pc_byte, REG_BITS)
-        target_bits = int_to_bits_msb(target_byte, REG_BITS)
         out_bits: List[int] = []
         for bit in range(REG_BITS):
@@ -313,21 +520,21 @@ class ThresholdCPU:
             )
             out_bits.append(int(out_bit))
-        return bits_to_int_msb(out_bits)
     def step(self, state: CPUState) -> CPUState:
-        if state.ctrl[0] == 1:  # HALT
             return state.copy()
         s = state.copy()
-        # Fetch: two bytes, big-endian
         hi = self._memory_read(s.mem, s.pc)
         lo = self._memory_read(s.mem, (s.pc + 1) & 0xFFFF)
         s.ir = ((hi & 0xFF) << 8) | (lo & 0xFF)
         next_pc = (s.pc + 2) & 0xFFFF
-        opcode, rd, rs, imm8 = self.decode_ir(s.ir)
         a = s.regs[rd]
         b = s.regs[rs]
@@ -344,45 +551,45 @@ class ThresholdCPU:
         carry = 0
         overflow = 0
-        if opcode == 0x0:  # ADD
             result, carry, overflow = self.alu.add(a, b)
-        elif opcode == 0x1:  # SUB
             result, carry, overflow = self.alu.sub(a, b)
-        elif opcode == 0x2:  # AND
             result = self.alu.bitwise_and(a, b)
-        elif opcode == 0x3:  # OR
             result = self.alu.bitwise_or(a, b)
-        elif opcode == 0x4:  # XOR
             result = self.alu.bitwise_xor(a, b)
-        elif opcode == 0x5:  # SHL
             carry = 1 if (a & 0x80) else 0
             result = (a << 1) & 0xFF
-        elif opcode == 0x6:  # SHR
             carry = 1 if (a & 0x01) else 0
             result = (a >> 1) & 0xFF
-        elif opcode == 0x7:  # MUL
             full = a * b
             result = full & 0xFF
             carry = 1 if full > 0xFF else 0
-        elif opcode == 0x8:  # DIV
             if b == 0:
                 result = 0
                 carry = 1
                 overflow = 1
             else:
                 result = (a // b) & 0xFF
-        elif opcode == 0x9:  # CMP
             result, carry, overflow = self.alu.sub(a, b)
             write_result = False
-        elif opcode == 0xA:  # LOAD
             result = self._memory_read(s.mem, addr16)
-        elif opcode == 0xB:  # STORE
             s.mem = self._memory_write(s.mem, addr16, b & 0xFF)
             write_result = False
-        elif opcode == 0xC:  # JMP
             s.pc = addr16 & 0xFFFF
             write_result = False
-        elif opcode == 0xD:  # JZ
             hi_pc = self._conditional_jump_byte(
                 "control.jz",
                 (next_pc_ext >> 8) & 0xFF,
@@ -397,7 +604,7 @@ class ThresholdCPU:
             )
             s.pc = ((hi_pc & 0xFF) << 8) | (lo_pc & 0xFF)
             write_result = False
-        elif opcode == 0xE:  # CALL
             ret_addr = next_pc_ext & 0xFFFF
             s.sp = (s.sp - 1) & 0xFFFF
             s.mem = self._memory_write(s.mem, s.sp, (ret_addr >> 8) & 0xFF)
@@ -405,12 +612,12 @@ class ThresholdCPU:
             s.mem = self._memory_write(s.mem, s.sp, ret_addr & 0xFF)
             s.pc = addr16 & 0xFFFF
             write_result = False
-        elif opcode == 0xF:  # HALT
             s.ctrl[0] = 1
             write_result = False
         if opcode <= 0x9 or opcode == 0xA:
-            s.flags = self.flags_from_result(result, carry, overflow)
         if write_result:
             s.regs[rd] = result & 0xFF
@@ -421,6 +628,7 @@ class ThresholdCPU:
         return s
     def run_until_halt(self, state: CPUState, max_cycles: int = 256) -> Tuple[CPUState, int]:
         s = state.copy()
         for i in range(max_cycles):
             if s.ctrl[0] == 1:
@@ -429,6 +637,7 @@ class ThresholdCPU:
         return s, max_cycles
     def forward(self, state_bits: torch.Tensor, max_cycles: int = 256) -> torch.Tensor:
         bits_list = [int(b) for b in state_bits.detach().cpu().flatten().tolist()]
         state = unpack_state(bits_list)
         final, _ = self.run_until_halt(state, max_cycles=max_cycles)

 """
+8-bit Threshold Computer - Combined CPU Module
+State layout, reference cycle, and threshold-weight runtime in one file.
+All multi-bit fields are MSB-first.
 """
 from __future__ import annotations
+from dataclasses import dataclass
 from pathlib import Path
 from typing import List, Tuple
 import torch
 from safetensors.torch import load_file
+FLAG_NAMES = ["Z", "N", "C", "V"]
+CTRL_NAMES = ["HALT", "MEM_WE", "MEM_RE", "RESERVED"]
+PC_BITS = 16
+IR_BITS = 16
+REG_BITS = 8
+REG_COUNT = 4
+FLAG_BITS = 4
+SP_BITS = 16
+CTRL_BITS = 4
+MEM_BYTES = 65536
+MEM_BITS = MEM_BYTES * 8
+STATE_BITS = PC_BITS + IR_BITS + (REG_BITS * REG_COUNT) + FLAG_BITS + SP_BITS + CTRL_BITS + MEM_BITS
+def int_to_bits(value: int, width: int) -> List[int]:
     return [(value >> (width - 1 - i)) & 1 for i in range(width)]
+def bits_to_int(bits: List[int]) -> int:
     value = 0
     for bit in bits:
         value = (value << 1) | int(bit)
     return list(reversed(bits))
+@dataclass
+class CPUState:
+    pc: int
+    ir: int
+    regs: List[int]
+    flags: List[int]
+    sp: int
+    ctrl: List[int]
+    mem: List[int]
+    def copy(self) -> CPUState:
+        return CPUState(
+            pc=int(self.pc),
+            ir=int(self.ir),
+            regs=[int(r) for r in self.regs],
+            flags=[int(f) for f in self.flags],
+            sp=int(self.sp),
+            ctrl=[int(c) for c in self.ctrl],
+            mem=[int(m) for m in self.mem],
+        )
+def pack_state(state: CPUState) -> List[int]:
+    bits: List[int] = []
+    bits.extend(int_to_bits(state.pc, PC_BITS))
+    bits.extend(int_to_bits(state.ir, IR_BITS))
+    for reg in state.regs:
+        bits.extend(int_to_bits(reg, REG_BITS))
+    bits.extend([int(f) for f in state.flags])
+    bits.extend(int_to_bits(state.sp, SP_BITS))
+    bits.extend([int(c) for c in state.ctrl])
+    for byte in state.mem:
+        bits.extend(int_to_bits(byte, REG_BITS))
+    return bits
+def unpack_state(bits: List[int]) -> CPUState:
+    if len(bits) != STATE_BITS:
+        raise ValueError(f"Expected {STATE_BITS} bits, got {len(bits)}")
+    idx = 0
+    pc = bits_to_int(bits[idx:idx + PC_BITS])
+    idx += PC_BITS
+    ir = bits_to_int(bits[idx:idx + IR_BITS])
+    idx += IR_BITS
+    regs = []
+    for _ in range(REG_COUNT):
+        regs.append(bits_to_int(bits[idx:idx + REG_BITS]))
+        idx += REG_BITS
+    flags = [int(b) for b in bits[idx:idx + FLAG_BITS]]
+    idx += FLAG_BITS
+    sp = bits_to_int(bits[idx:idx + SP_BITS])
+    idx += SP_BITS
+    ctrl = [int(b) for b in bits[idx:idx + CTRL_BITS]]
+    idx += CTRL_BITS
+    mem = []
+    for _ in range(MEM_BYTES):
+        mem.append(bits_to_int(bits[idx:idx + REG_BITS]))
+        idx += REG_BITS
+    return CPUState(pc=pc, ir=ir, regs=regs, flags=flags, sp=sp, ctrl=ctrl, mem=mem)
+def decode_ir(ir: int) -> Tuple[int, int, int, int]:
+    opcode = (ir >> 12) & 0xF
+    rd = (ir >> 10) & 0x3
+    rs = (ir >> 8) & 0x3
+    imm8 = ir & 0xFF
+    return opcode, rd, rs, imm8
+def flags_from_result(result: int, carry: int, overflow: int) -> Tuple[int, int, int, int]:
+    z = 1 if result == 0 else 0
+    n = 1 if (result & 0x80) else 0
+    c = 1 if carry else 0
+    v = 1 if overflow else 0
+    return z, n, c, v
+def alu_add(a: int, b: int) -> Tuple[int, int, int]:
+    full = a + b
+    result = full & 0xFF
+    carry = 1 if full > 0xFF else 0
+    overflow = 1 if (((a ^ result) & (b ^ result)) & 0x80) else 0
+    return result, carry, overflow
+def alu_sub(a: int, b: int) -> Tuple[int, int, int]:
+    full = (a - b) & 0x1FF
+    result = full & 0xFF
+    carry = 1 if a >= b else 0
+    overflow = 1 if (((a ^ b) & (a ^ result)) & 0x80) else 0
+    return result, carry, overflow
+def ref_step(state: CPUState) -> CPUState:
+    """Reference CPU cycle (pure Python arithmetic)."""
+    if state.ctrl[0] == 1:
+        return state.copy()
+    s = state.copy()
+    hi = s.mem[s.pc]
+    lo = s.mem[(s.pc + 1) & 0xFFFF]
+    s.ir = ((hi & 0xFF) << 8) | (lo & 0xFF)
+    next_pc = (s.pc + 2) & 0xFFFF
+    opcode, rd, rs, imm8 = decode_ir(s.ir)
+    a = s.regs[rd]
+    b = s.regs[rs]
+    addr16 = None
+    next_pc_ext = next_pc
+    if opcode in (0xA, 0xB, 0xC, 0xD, 0xE):
+        addr_hi = s.mem[next_pc]
+        addr_lo = s.mem[(next_pc + 1) & 0xFFFF]
+        addr16 = ((addr_hi & 0xFF) << 8) | (addr_lo & 0xFF)
+        next_pc_ext = (next_pc + 2) & 0xFFFF
+    write_result = True
+    result = a
+    carry = 0
+    overflow = 0
+    if opcode == 0x0:
+        result, carry, overflow = alu_add(a, b)
+    elif opcode == 0x1:
+        result, carry, overflow = alu_sub(a, b)
+    elif opcode == 0x2:
+        result = a & b
+    elif opcode == 0x3:
+        result = a | b
+    elif opcode == 0x4:
+        result = a ^ b
+    elif opcode == 0x5:
+        carry = 1 if (a & 0x80) else 0
+        result = (a << 1) & 0xFF
+    elif opcode == 0x6:
+        carry = 1 if (a & 0x01) else 0
+        result = (a >> 1) & 0xFF
+    elif opcode == 0x7:
+        full = a * b
+        result = full & 0xFF
+        carry = 1 if full > 0xFF else 0
+    elif opcode == 0x8:
+        if b == 0:
+            result = 0
+            carry = 1
+            overflow = 1
+        else:
+            result = (a // b) & 0xFF
+    elif opcode == 0x9:
+        result, carry, overflow = alu_sub(a, b)
+        write_result = False
+    elif opcode == 0xA:
+        result = s.mem[addr16]
+    elif opcode == 0xB:
+        s.mem[addr16] = b & 0xFF
+        write_result = False
+    elif opcode == 0xC:
+        s.pc = addr16 & 0xFFFF
+        write_result = False
+    elif opcode == 0xD:
+        if s.flags[0] == 1:
+            s.pc = addr16 & 0xFFFF
+        else:
+            s.pc = next_pc_ext
+        write_result = False
+    elif opcode == 0xE:
+        ret_addr = next_pc_ext & 0xFFFF
+        s.sp = (s.sp - 1) & 0xFFFF
+        s.mem[s.sp] = (ret_addr >> 8) & 0xFF
+        s.sp = (s.sp - 1) & 0xFFFF
+        s.mem[s.sp] = ret_addr & 0xFF
+        s.pc = addr16 & 0xFFFF
+        write_result = False
+    elif opcode == 0xF:
+        s.ctrl[0] = 1
+        write_result = False
+    if opcode <= 0x9 or opcode in (0xA, 0x7, 0x8):
+        s.flags = list(flags_from_result(result, carry, overflow))
+    if write_result:
+        s.regs[rd] = result & 0xFF
+    if opcode not in (0xC, 0xD, 0xE):
+        s.pc = next_pc_ext
+    return s
+def ref_run_until_halt(state: CPUState, max_cycles: int = 256) -> Tuple[CPUState, int]:
+    """Reference execution loop."""
+    s = state.copy()
+    for i in range(max_cycles):
+        if s.ctrl[0] == 1:
+            return s, i
+        s = ref_step(s)
+    return s, max_cycles
+def heaviside(x: torch.Tensor) -> torch.Tensor:
+    return (x >= 0).float()
 DEFAULT_MODEL_PATH = Path(__file__).resolve().parent.parent / "neural_computer.safetensors"
         return ha2_sum, cout
     def add(self, a: int, b: int) -> Tuple[int, int, int]:
+        a_bits = bits_msb_to_lsb(int_to_bits(a, REG_BITS))
+        b_bits = bits_msb_to_lsb(int_to_bits(b, REG_BITS))
         carry = 0.0
         sum_bits: List[int] = []
             )
             sum_bits.append(int(sum_bit))
+        result = bits_to_int(list(reversed(sum_bits)))
         carry_out = int(carry)
         overflow = 1 if (((a ^ result) & (b ^ result)) & 0x80) else 0
         return result, carry_out, overflow
     def sub(self, a: int, b: int) -> Tuple[int, int, int]:
+        a_bits = bits_msb_to_lsb(int_to_bits(a, REG_BITS))
+        b_bits = bits_msb_to_lsb(int_to_bits(b, REG_BITS))
+        carry = 1.0
         sum_bits: List[int] = []
         for bit in range(REG_BITS):
             notb = self._eval_gate(
             sum_bits.append(int(xor2))
+        result = bits_to_int(list(reversed(sum_bits)))
         carry_out = int(carry)
         overflow = 1 if (((a ^ b) & (a ^ result)) & 0x80) else 0
         return result, carry_out, overflow
     def bitwise_and(self, a: int, b: int) -> int:
+        a_bits = int_to_bits(a, REG_BITS)
+        b_bits = int_to_bits(b, REG_BITS)
         w = self._get("alu.alu8bit.and.weight")
         bias = self._get("alu.alu8bit.and.bias")
             out = heaviside((inp * w[bit * 2:bit * 2 + 2]).sum() + bias[bit]).item()
             out_bits.append(int(out))
+        return bits_to_int(out_bits)
     def bitwise_or(self, a: int, b: int) -> int:
+        a_bits = int_to_bits(a, REG_BITS)
+        b_bits = int_to_bits(b, REG_BITS)
         w = self._get("alu.alu8bit.or.weight")
         bias = self._get("alu.alu8bit.or.bias")
             out = heaviside((inp * w[bit * 2:bit * 2 + 2]).sum() + bias[bit]).item()
             out_bits.append(int(out))
+        return bits_to_int(out_bits)
     def bitwise_not(self, a: int) -> int:
+        a_bits = int_to_bits(a, REG_BITS)
         w = self._get("alu.alu8bit.not.weight")
         bias = self._get("alu.alu8bit.not.bias")
             out = heaviside((inp * w[bit]).sum() + bias[bit]).item()
             out_bits.append(int(out))
+        return bits_to_int(out_bits)
     def bitwise_xor(self, a: int, b: int) -> int:
+        a_bits = int_to_bits(a, REG_BITS)
+        b_bits = int_to_bits(b, REG_BITS)
         w_or = self._get("alu.alu8bit.xor.layer1.or.weight")
         b_or = self._get("alu.alu8bit.xor.layer1.or.bias")
             out = heaviside((hidden * w2[bit * 2:bit * 2 + 2]).sum() + b2[bit]).item()
             out_bits.append(int(out))
+        return bits_to_int(out_bits)
 class ThresholdCPU:
         self.device = device
         self.alu = ThresholdALU(str(model_path), device=device)
     def _addr_decode(self, addr: int) -> torch.Tensor:
+        bits = torch.tensor(int_to_bits(addr, PC_BITS), device=self.device, dtype=torch.float32)
         w = self.alu._get("memory.addr_decode.weight")
         b = self.alu._get("memory.addr_decode.bias")
         return heaviside((w * bits).sum(dim=1) + b)
     def _memory_read(self, mem: List[int], addr: int) -> int:
         sel = self._addr_decode(addr)
         mem_bits = torch.tensor(
+            [int_to_bits(byte, REG_BITS) for byte in mem],
             device=self.device,
             dtype=torch.float32,
         )
             out_bit = heaviside((and_out * or_w[bit]).sum() + or_b[bit]).item()
             out_bits.append(int(out_bit))
+        return bits_to_int(out_bits)
     def _memory_write(self, mem: List[int], addr: int, value: int) -> List[int]:
         sel = self._addr_decode(addr)
+        data_bits = torch.tensor(int_to_bits(value, REG_BITS), device=self.device, dtype=torch.float32)
         mem_bits = torch.tensor(
+            [int_to_bits(byte, REG_BITS) for byte in mem],
             device=self.device,
             dtype=torch.float32,
         )
             out_bit = heaviside((or_inp * or_w[:, bit]).sum(dim=1) + or_b[:, bit])
             new_mem_bits[:, bit] = out_bit
+        return [bits_to_int([int(b) for b in new_mem_bits[i].tolist()]) for i in range(MEM_BYTES)]
     def _conditional_jump_byte(self, prefix: str, pc_byte: int, target_byte: int, flag: int) -> int:
+        pc_bits = int_to_bits(pc_byte, REG_BITS)
+        target_bits = int_to_bits(target_byte, REG_BITS)
         out_bits: List[int] = []
         for bit in range(REG_BITS):
             )
             out_bits.append(int(out_bit))
+        return bits_to_int(out_bits)
     def step(self, state: CPUState) -> CPUState:
+        """Single CPU cycle using threshold neurons."""
+        if state.ctrl[0] == 1:
             return state.copy()
         s = state.copy()
         hi = self._memory_read(s.mem, s.pc)
         lo = self._memory_read(s.mem, (s.pc + 1) & 0xFFFF)
         s.ir = ((hi & 0xFF) << 8) | (lo & 0xFF)
         next_pc = (s.pc + 2) & 0xFFFF
+        opcode, rd, rs, imm8 = decode_ir(s.ir)
         a = s.regs[rd]
         b = s.regs[rs]
         carry = 0
         overflow = 0
+        if opcode == 0x0:
             result, carry, overflow = self.alu.add(a, b)
+        elif opcode == 0x1:
             result, carry, overflow = self.alu.sub(a, b)
+        elif opcode == 0x2:
             result = self.alu.bitwise_and(a, b)
+        elif opcode == 0x3:
             result = self.alu.bitwise_or(a, b)
+        elif opcode == 0x4:
             result = self.alu.bitwise_xor(a, b)
+        elif opcode == 0x5:
             carry = 1 if (a & 0x80) else 0
             result = (a << 1) & 0xFF
+        elif opcode == 0x6:
             carry = 1 if (a & 0x01) else 0
             result = (a >> 1) & 0xFF
+        elif opcode == 0x7:
             full = a * b
             result = full & 0xFF
             carry = 1 if full > 0xFF else 0
+        elif opcode == 0x8:
             if b == 0:
                 result = 0
                 carry = 1
                 overflow = 1
             else:
                 result = (a // b) & 0xFF
+        elif opcode == 0x9:
             result, carry, overflow = self.alu.sub(a, b)
             write_result = False
+        elif opcode == 0xA:
             result = self._memory_read(s.mem, addr16)
+        elif opcode == 0xB:
             s.mem = self._memory_write(s.mem, addr16, b & 0xFF)
             write_result = False
+        elif opcode == 0xC:
             s.pc = addr16 & 0xFFFF
             write_result = False
+        elif opcode == 0xD:
             hi_pc = self._conditional_jump_byte(
                 "control.jz",
                 (next_pc_ext >> 8) & 0xFF,
             )
             s.pc = ((hi_pc & 0xFF) << 8) | (lo_pc & 0xFF)
             write_result = False
+        elif opcode == 0xE:
             ret_addr = next_pc_ext & 0xFFFF
             s.sp = (s.sp - 1) & 0xFFFF
             s.mem = self._memory_write(s.mem, s.sp, (ret_addr >> 8) & 0xFF)
             s.mem = self._memory_write(s.mem, s.sp, ret_addr & 0xFF)
             s.pc = addr16 & 0xFFFF
             write_result = False
+        elif opcode == 0xF:
             s.ctrl[0] = 1
             write_result = False
         if opcode <= 0x9 or opcode == 0xA:
+            s.flags = list(flags_from_result(result, carry, overflow))
         if write_result:
             s.regs[rd] = result & 0xFF
         return s
     def run_until_halt(self, state: CPUState, max_cycles: int = 256) -> Tuple[CPUState, int]:
+        """Execute until HALT or max_cycles reached."""
         s = state.copy()
         for i in range(max_cycles):
             if s.ctrl[0] == 1:
         return s, max_cycles
     def forward(self, state_bits: torch.Tensor, max_cycles: int = 256) -> torch.Tensor:
+        """Tensor-in, tensor-out interface for neural integration."""
         bits_list = [int(b) for b in state_bits.detach().cpu().flatten().tolist()]
         state = unpack_state(bits_list)
         final, _ = self.run_until_halt(state, max_cycles=max_cycles)

cpu/cycle.py DELETED Viewed

@@ -1,148 +0,0 @@
-"""
-Reference CPU cycle (software) for the threshold computer.
-Implements fetch/decode/execute over the state layout.
-"""
-from __future__ import annotations
-from typing import Tuple
-from .state import CPUState
-def decode_ir(ir: int) -> Tuple[int, int, int, int]:
-    opcode = (ir >> 12) & 0xF
-    rd = (ir >> 10) & 0x3
-    rs = (ir >> 8) & 0x3
-    imm8 = ir & 0xFF
-    return opcode, rd, rs, imm8
-def _flags_from_result(result: int, carry: int, overflow: int) -> Tuple[int, int, int, int]:
-    z = 1 if result == 0 else 0
-    n = 1 if (result & 0x80) else 0
-    c = 1 if carry else 0
-    v = 1 if overflow else 0
-    return z, n, c, v
-def _alu_add(a: int, b: int) -> Tuple[int, int, int]:
-    full = a + b
-    result = full & 0xFF
-    carry = 1 if full > 0xFF else 0
-    overflow = 1 if (((a ^ result) & (b ^ result)) & 0x80) else 0
-    return result, carry, overflow
-def _alu_sub(a: int, b: int) -> Tuple[int, int, int]:
-    full = (a - b) & 0x1FF
-    result = full & 0xFF
-    carry = 1 if a >= b else 0
-    overflow = 1 if (((a ^ b) & (a ^ result)) & 0x80) else 0
-    return result, carry, overflow
-def step(state: CPUState) -> CPUState:
-    if state.ctrl[0] == 1:  # HALT
-        return state.copy()
-    s = state.copy()
-    # Fetch: two bytes, big-endian
-    hi = s.mem[s.pc]
-    lo = s.mem[(s.pc + 1) & 0xFFFF]
-    s.ir = ((hi & 0xFF) << 8) | (lo & 0xFF)
-    next_pc = (s.pc + 2) & 0xFFFF
-    opcode, rd, rs, imm8 = decode_ir(s.ir)
-    a = s.regs[rd]
-    b = s.regs[rs]
-    addr16 = None
-    next_pc_ext = next_pc
-    if opcode in (0xA, 0xB, 0xC, 0xD, 0xE):
-        addr_hi = s.mem[next_pc]
-        addr_lo = s.mem[(next_pc + 1) & 0xFFFF]
-        addr16 = ((addr_hi & 0xFF) << 8) | (addr_lo & 0xFF)
-        next_pc_ext = (next_pc + 2) & 0xFFFF
-    write_result = True
-    result = a
-    carry = 0
-    overflow = 0
-    if opcode == 0x0:  # ADD
-        result, carry, overflow = _alu_add(a, b)
-    elif opcode == 0x1:  # SUB
-        result, carry, overflow = _alu_sub(a, b)
-    elif opcode == 0x2:  # AND
-        result = a & b
-    elif opcode == 0x3:  # OR
-        result = a | b
-    elif opcode == 0x4:  # XOR
-        result = a ^ b
-    elif opcode == 0x5:  # SHL
-        carry = 1 if (a & 0x80) else 0
-        result = (a << 1) & 0xFF
-    elif opcode == 0x6:  # SHR
-        carry = 1 if (a & 0x01) else 0
-        result = (a >> 1) & 0xFF
-    elif opcode == 0x7:  # MUL
-        full = a * b
-        result = full & 0xFF
-        carry = 1 if full > 0xFF else 0
-    elif opcode == 0x8:  # DIV
-        if b == 0:
-            result = 0
-            carry = 1
-            overflow = 1
-        else:
-            result = (a // b) & 0xFF
-    elif opcode == 0x9:  # CMP
-        result, carry, overflow = _alu_sub(a, b)
-        write_result = False
-    elif opcode == 0xA:  # LOAD
-        result = s.mem[addr16]
-    elif opcode == 0xB:  # STORE
-        s.mem[addr16] = b & 0xFF
-        write_result = False
-    elif opcode == 0xC:  # JMP
-        s.pc = addr16 & 0xFFFF
-        write_result = False
-    elif opcode == 0xD:  # JZ
-        if s.flags[0] == 1:
-            s.pc = addr16 & 0xFFFF
-        else:
-            s.pc = next_pc_ext
-        write_result = False
-    elif opcode == 0xE:  # CALL
-        ret_addr = next_pc_ext & 0xFFFF
-        s.sp = (s.sp - 1) & 0xFFFF
-        s.mem[s.sp] = (ret_addr >> 8) & 0xFF
-        s.sp = (s.sp - 1) & 0xFFFF
-        s.mem[s.sp] = ret_addr & 0xFF
-        s.pc = addr16 & 0xFFFF
-        write_result = False
-    elif opcode == 0xF:  # HALT
-        s.ctrl[0] = 1
-        write_result = False
-    if opcode <= 0x9 or opcode in (0xA, 0x7, 0x8):
-        s.flags = list(_flags_from_result(result, carry, overflow))
-    if write_result:
-        s.regs[rd] = result & 0xFF
-    if opcode not in (0xC, 0xD, 0xE):
-        s.pc = next_pc_ext
-    return s
-def run_until_halt(state: CPUState, max_cycles: int = 256) -> Tuple[CPUState, int]:
-    s = state.copy()
-    for i in range(max_cycles):
-        if s.ctrl[0] == 1:
-            return s, i
-        s = step(s)
-    return s, max_cycles

cpu/state.py DELETED Viewed

@@ -1,103 +0,0 @@
-"""
-State layout helpers for the 8-bit threshold computer.
-All multi-bit fields are MSB-first.
-"""
-from __future__ import annotations
-from dataclasses import dataclass
-from typing import List
-FLAG_NAMES = ["Z", "N", "C", "V"]
-CTRL_NAMES = ["HALT", "MEM_WE", "MEM_RE", "RESERVED"]
-PC_BITS = 16
-IR_BITS = 16
-REG_BITS = 8
-REG_COUNT = 4
-FLAG_BITS = 4
-SP_BITS = 16
-CTRL_BITS = 4
-MEM_BYTES = 65536
-MEM_BITS = MEM_BYTES * 8
-STATE_BITS = PC_BITS + IR_BITS + (REG_BITS * REG_COUNT) + FLAG_BITS + SP_BITS + CTRL_BITS + MEM_BITS
-def int_to_bits(value: int, width: int) -> List[int]:
-    return [(value >> (width - 1 - i)) & 1 for i in range(width)]
-def bits_to_int(bits: List[int]) -> int:
-    value = 0
-    for bit in bits:
-        value = (value << 1) | int(bit)
-    return value
-@dataclass
-class CPUState:
-    pc: int
-    ir: int
-    regs: List[int]
-    flags: List[int]
-    sp: int
-    ctrl: List[int]
-    mem: List[int]
-    def copy(self) -> "CPUState":
-        return CPUState(
-            pc=int(self.pc),
-            ir=int(self.ir),
-            regs=[int(r) for r in self.regs],
-            flags=[int(f) for f in self.flags],
-            sp=int(self.sp),
-            ctrl=[int(c) for c in self.ctrl],
-            mem=[int(m) for m in self.mem],
-        )
-def pack_state(state: CPUState) -> List[int]:
-    bits: List[int] = []
-    bits.extend(int_to_bits(state.pc, PC_BITS))
-    bits.extend(int_to_bits(state.ir, IR_BITS))
-    for reg in state.regs:
-        bits.extend(int_to_bits(reg, REG_BITS))
-    bits.extend([int(f) for f in state.flags])
-    bits.extend(int_to_bits(state.sp, SP_BITS))
-    bits.extend([int(c) for c in state.ctrl])
-    for byte in state.mem:
-        bits.extend(int_to_bits(byte, REG_BITS))
-    return bits
-def unpack_state(bits: List[int]) -> CPUState:
-    if len(bits) != STATE_BITS:
-        raise ValueError(f"Expected {STATE_BITS} bits, got {len(bits)}")
-    idx = 0
-    pc = bits_to_int(bits[idx:idx + PC_BITS])
-    idx += PC_BITS
-    ir = bits_to_int(bits[idx:idx + IR_BITS])
-    idx += IR_BITS
-    regs = []
-    for _ in range(REG_COUNT):
-        regs.append(bits_to_int(bits[idx:idx + REG_BITS]))
-        idx += REG_BITS
-    flags = [int(b) for b in bits[idx:idx + FLAG_BITS]]
-    idx += FLAG_BITS
-    sp = bits_to_int(bits[idx:idx + SP_BITS])
-    idx += SP_BITS
-    ctrl = [int(b) for b in bits[idx:idx + CTRL_BITS]]
-    idx += CTRL_BITS
-    mem = []
-    for _ in range(MEM_BYTES):
-        mem.append(bits_to_int(bits[idx:idx + REG_BITS]))
-        idx += REG_BITS
-    return CPUState(pc=pc, ir=ir, regs=regs, flags=flags, sp=sp, ctrl=ctrl, mem=mem)