CharlesCNorton

Add SHL, SHR, MUL, DIV, and comparator circuits

6087b2e 3 months ago

40.4 kB

	"""
	Build tools for 8-bit Threshold Computer safetensors.

	Subcommands:
	python build.py memory - Generate 64KB memory circuits
	python build.py inputs - Add .inputs metadata tensors
	python build.py all - Run both (memory first, then inputs)


	ROUTING SCHEMA (formerly routing.json)
	======================================

	Routing info is now embedded in safetensors via .inputs tensors and signal registry metadata.


	INPUT SOURCE TYPES
	------------------

	1. External input: "$input_name" - Named input to the circuit
	- Example: "$a", "$b", "$cin"

	2. Gate output: "path.to.gate" - Output of another gate
	- Example: "ha1.sum", "layer1.or"

	3. Bit extraction: "$input[i]" - Single bit from multi-bit input
	- Example: "$a[0]" (LSB), "$a[7]" (MSB for 8-bit)

	4. Constant: "#0" or "#1" - Fixed value
	- Example: "#1" for carry-in in two's complement


	CIRCUIT TYPES
	-------------

	Single-Layer Gates: .weight and .bias only
	"boolean.and": ["$a", "$b"]

	Two-Layer Gates (XOR, XNOR): layer1 + layer2
	"boolean.xor.layer1.or": ["$a", "$b"]
	"boolean.xor.layer1.nand": ["$a", "$b"]
	"boolean.xor.layer2": ["layer1.or", "layer1.nand"]

	Hierarchical Circuits: nested sub-components
	"arithmetic.fulladder": {
	"ha1.sum.layer1.or": ["$a", "$b"],
	"ha1.carry": ["$a", "$b"],
	"ha2.sum.layer1.or": ["ha1.sum", "$cin"],
	"carry_or": ["ha1.carry", "ha2.carry"]
	}

	Bit-Indexed Circuits: multi-bit operations
	"arithmetic.ripplecarry8bit.fa0": ["$a[0]", "$b[0]", "#0"]
	"arithmetic.ripplecarry8bit.fa1": ["$a[1]", "$b[1]", "fa0.cout"]


	PACKED MEMORY CIRCUITS
	----------------------

	64KB memory uses packed tensors (shapes for 16-bit address, 8-bit data):

	memory.addr_decode.weight: [65536, 16]
	memory.addr_decode.bias: [65536]
	memory.read.and.weight: [8, 65536, 2]
	memory.read.and.bias: [8, 65536]
	memory.read.or.weight: [8, 65536]
	memory.read.or.bias: [8]
	memory.write.sel.weight: [65536, 2]
	memory.write.sel.bias: [65536]
	memory.write.nsel.weight: [65536, 1]
	memory.write.nsel.bias: [65536]
	memory.write.and_old.weight: [65536, 8, 2]
	memory.write.and_old.bias: [65536, 8]
	memory.write.and_new.weight: [65536, 8, 2]
	memory.write.and_new.bias: [65536, 8]
	memory.write.or.weight: [65536, 8, 2]
	memory.write.or.bias: [65536, 8]

	Semantics:
	decode: sel[i] = H(sum(addr_bits * weight[i]) + bias[i])
	read: bit[b] = H(sum(H([mem_bit, sel] * and_w) + and_b) * or_w + or_b)
	write: new = H(H([old, nsel] * and_old) + H([data, sel] * and_new) - 1)


	SIGNAL REGISTRY
	---------------

	Signal IDs are stored in safetensors metadata as JSON:

	{"0": "#0", "1": "#1", "2": "$a", "3": "$b", ...}

	Each gate's .inputs tensor contains integer IDs referencing this registry.


	NAMING CONVENTIONS
	------------------

	- External inputs: $name or $name[bit]
	- Constants: #0, #1
	- Internal gates: relative path from circuit root
	"""

	from __future__ import annotations

	import argparse
	import json
	import re
	from pathlib import Path
	from typing import Dict, Iterable, List, Set

	import torch
	from safetensors import safe_open
	from safetensors.torch import save_file


	MODEL_PATH = Path(__file__).resolve().parent / "neural_computer.safetensors"
	MANIFEST_PATH = Path(__file__).resolve().parent / "tensors.txt"

	ADDR_BITS = 16
	MEM_BYTES = 1 << ADDR_BITS


	def load_tensors(path: Path) -> Dict[str, torch.Tensor]:
	tensors: Dict[str, torch.Tensor] = {}
	with safe_open(str(path), framework="pt") as f:
	for name in f.keys():
	tensors[name] = f.get_tensor(name).clone()
	return tensors


	def get_all_gates(tensors: Dict[str, torch.Tensor]) -> Set[str]:
	gates = set()
	for name in tensors:
	if name.endswith('.weight'):
	gates.add(name[:-7])
	return gates


	class SignalRegistry:
	def __init__(self):
	self.name_to_id: Dict[str, int] = {}
	self.id_to_name: Dict[int, str] = {}
	self.next_id = 0
	self.register("#0")
	self.register("#1")

	def register(self, name: str) -> int:
	if name not in self.name_to_id:
	self.name_to_id[name] = self.next_id
	self.id_to_name[self.next_id] = name
	self.next_id += 1
	return self.name_to_id[name]

	def get_id(self, name: str) -> int:
	return self.name_to_id.get(name, -1)

	def to_metadata(self) -> str:
	return json.dumps(self.id_to_name)


	def add_gate(tensors: Dict[str, torch.Tensor], name: str, weight: Iterable[float], bias: Iterable[float]) -> None:
	w_key = f"{name}.weight"
	b_key = f"{name}.bias"
	if w_key in tensors or b_key in tensors:
	raise ValueError(f"Gate already exists: {name}")
	tensors[w_key] = torch.tensor(list(weight), dtype=torch.float32)
	tensors[b_key] = torch.tensor(list(bias), dtype=torch.float32)


	def drop_prefixes(tensors: Dict[str, torch.Tensor], prefixes: List[str]) -> None:
	for key in list(tensors.keys()):
	if any(key.startswith(prefix) for prefix in prefixes):
	del tensors[key]


	def add_decoder(tensors: Dict[str, torch.Tensor]) -> None:
	weights = torch.empty((MEM_BYTES, ADDR_BITS), dtype=torch.float32)
	bias = torch.empty((MEM_BYTES,), dtype=torch.float32)
	for addr in range(MEM_BYTES):
	bits = [(addr >> (ADDR_BITS - 1 - i)) & 1 for i in range(ADDR_BITS)]
	weights[addr] = torch.tensor([1.0 if bit == 1 else -1.0 for bit in bits], dtype=torch.float32)
	bias[addr] = -float(sum(bits))
	tensors["memory.addr_decode.weight"] = weights
	tensors["memory.addr_decode.bias"] = bias


	def add_memory_read_mux(tensors: Dict[str, torch.Tensor]) -> None:
	and_weight = torch.ones((8, MEM_BYTES, 2), dtype=torch.float32)
	and_bias = torch.full((8, MEM_BYTES), -2.0, dtype=torch.float32)
	or_weight = torch.ones((8, MEM_BYTES), dtype=torch.float32)
	or_bias = torch.full((8,), -1.0, dtype=torch.float32)
	tensors["memory.read.and.weight"] = and_weight
	tensors["memory.read.and.bias"] = and_bias
	tensors["memory.read.or.weight"] = or_weight
	tensors["memory.read.or.bias"] = or_bias


	def add_memory_write_cells(tensors: Dict[str, torch.Tensor]) -> None:
	sel_weight = torch.ones((MEM_BYTES, 2), dtype=torch.float32)
	sel_bias = torch.full((MEM_BYTES,), -2.0, dtype=torch.float32)
	nsel_weight = torch.full((MEM_BYTES, 1), -1.0, dtype=torch.float32)
	nsel_bias = torch.zeros((MEM_BYTES,), dtype=torch.float32)
	and_old_weight = torch.ones((MEM_BYTES, 8, 2), dtype=torch.float32)
	and_old_bias = torch.full((MEM_BYTES, 8), -2.0, dtype=torch.float32)
	and_new_weight = torch.ones((MEM_BYTES, 8, 2), dtype=torch.float32)
	and_new_bias = torch.full((MEM_BYTES, 8), -2.0, dtype=torch.float32)
	or_weight = torch.ones((MEM_BYTES, 8, 2), dtype=torch.float32)
	or_bias = torch.full((MEM_BYTES, 8), -1.0, dtype=torch.float32)
	tensors["memory.write.sel.weight"] = sel_weight
	tensors["memory.write.sel.bias"] = sel_bias
	tensors["memory.write.nsel.weight"] = nsel_weight
	tensors["memory.write.nsel.bias"] = nsel_bias
	tensors["memory.write.and_old.weight"] = and_old_weight
	tensors["memory.write.and_old.bias"] = and_old_bias
	tensors["memory.write.and_new.weight"] = and_new_weight
	tensors["memory.write.and_new.bias"] = and_new_bias
	tensors["memory.write.or.weight"] = or_weight
	tensors["memory.write.or.bias"] = or_bias


	def add_fetch_load_store_buffers(tensors: Dict[str, torch.Tensor]) -> None:
	for bit in range(16):
	add_gate(tensors, f"control.fetch.ir.bit{bit}", [1.0], [-1.0])
	for bit in range(8):
	add_gate(tensors, f"control.load.bit{bit}", [1.0], [-1.0])
	add_gate(tensors, f"control.store.bit{bit}", [1.0], [-1.0])
	for bit in range(ADDR_BITS):
	add_gate(tensors, f"control.mem_addr.bit{bit}", [1.0], [-1.0])


	def add_shl_shr(tensors: Dict[str, torch.Tensor]) -> None:
	"""Add SHL (shift left) and SHR (shift right) circuits.

	Identity gate: w=2, b=-1 -> H(x*2 - 1) = x for x in {0,1}
	Zero gate: w=0, b=-1 -> H(-1) = 0

	SHL (MSB-first): out[i] = in[i+1] for i<7, out[7] = 0
	SHR (MSB-first): out[0] = 0, out[i] = in[i-1] for i>0
	"""
	for bit in range(8):
	if bit < 7:
	add_gate(tensors, f"alu.alu8bit.shl.bit{bit}", [2.0], [-1.0])
	else:
	add_gate(tensors, f"alu.alu8bit.shl.bit{bit}", [0.0], [-1.0])

	for bit in range(8):
	if bit > 0:
	add_gate(tensors, f"alu.alu8bit.shr.bit{bit}", [2.0], [-1.0])
	else:
	add_gate(tensors, f"alu.alu8bit.shr.bit{bit}", [0.0], [-1.0])


	def add_mul(tensors: Dict[str, torch.Tensor]) -> None:
	"""Add 8-bit multiplication circuit.

	Produces low 8 bits of the 16-bit result.

	Structure:
	- 64 AND gates for partial products P[i][j] = A[i] AND B[j]
	- Uses existing ripple-carry adder components for summation

	The multiply method in ThresholdALU computes:
	1. Partial products via these AND gates
	2. Shift-add accumulation via existing 8-bit adder
	"""
	# AND gates for partial products: P[i][j] = A[i] AND B[j]
	# These compute whether bit i of A and bit j of B are both 1
	for i in range(8):
	for j in range(8):
	add_gate(tensors, f"alu.alu8bit.mul.pp.a{i}b{j}", [1.0, 1.0], [-2.0])


	def add_div(tensors: Dict[str, torch.Tensor]) -> None:
	"""Add 8-bit division circuit.

	Produces quotient (8 bits) and remainder (8 bits).

	Uses restoring division algorithm:
	- 8 iterations, each producing one quotient bit
	- Each iteration: compare, conditionally subtract, shift

	Structure:
	- 8 comparison gates (one per iteration)
	- 8 conditional subtraction stages
	- Uses existing comparator and subtractor components
	"""
	# Comparison gates: check if (remainder << 1 \| next_bit) >= divisor
	for stage in range(8):
	add_gate(tensors, f"alu.alu8bit.div.stage{stage}.cmp",
	[128.0, 64.0, 32.0, 16.0, 8.0, 4.0, 2.0, 1.0,
	-128.0, -64.0, -32.0, -16.0, -8.0, -4.0, -2.0, -1.0], [0.0])

	# Conditional mux gates: select (rem - div) or rem based on comparison
	for stage in range(8):
	for bit in range(8):
	# NOT for inverting comparison result
	add_gate(tensors, f"alu.alu8bit.div.stage{stage}.mux.bit{bit}.not_sel", [-1.0], [0.0])
	# AND gates for mux
	add_gate(tensors, f"alu.alu8bit.div.stage{stage}.mux.bit{bit}.and_a", [1.0, 1.0], [-2.0])
	add_gate(tensors, f"alu.alu8bit.div.stage{stage}.mux.bit{bit}.and_b", [1.0, 1.0], [-2.0])
	# OR gate for mux output
	add_gate(tensors, f"alu.alu8bit.div.stage{stage}.mux.bit{bit}.or", [1.0, 1.0], [-1.0])


	def add_comparators(tensors: Dict[str, torch.Tensor]) -> None:
	"""Add 8-bit comparator circuits (GT, LT, GE, LE, EQ).

	Each comparator takes 16 inputs (8 bits from A, 8 bits from B) in MSB-first order.
	Uses weighted sum comparison on the binary representation.

	For unsigned comparison of A vs B:
	- Assign positional weights: bit i has weight 2^(7-i)
	- A > B: sum(a_i * w_i) > sum(b_i * w_i)
	- This becomes: sum(a_i * w_i - b_i * w_i) > 0
	- Or: sum((a_i - b_i) * w_i) > 0

	Threshold gate: H(sum(x_i * w_i) + b) = 1 if sum >= -b

	For A > B: weights = [128, 64, 32, 16, 8, 4, 2, 1, -128, -64, -32, -16, -8, -4, -2, -1]
	bias = -1 (strictly greater, so need sum >= 1)
	For A >= B: bias = 0 (sum >= 0)
	For A < B: flip weights, bias = -1
	For A <= B: flip weights, bias = 0
	For A == B: need A >= B AND A <= B (two-layer)
	"""
	pos_weights = [128.0, 64.0, 32.0, 16.0, 8.0, 4.0, 2.0, 1.0]
	neg_weights = [-128.0, -64.0, -32.0, -16.0, -8.0, -4.0, -2.0, -1.0]

	gt_weights = pos_weights + neg_weights
	lt_weights = neg_weights + pos_weights

	add_gate(tensors, "arithmetic.greaterthan8bit", gt_weights, [-1.0])
	add_gate(tensors, "arithmetic.greaterorequal8bit", gt_weights, [0.0])
	add_gate(tensors, "arithmetic.lessthan8bit", lt_weights, [-1.0])
	add_gate(tensors, "arithmetic.lessorequal8bit", lt_weights, [0.0])

	add_gate(tensors, "arithmetic.equality8bit.layer1.geq", gt_weights, [0.0])
	add_gate(tensors, "arithmetic.equality8bit.layer1.leq", lt_weights, [0.0])
	add_gate(tensors, "arithmetic.equality8bit.layer2", [1.0, 1.0], [-2.0])


	def update_manifest(tensors: Dict[str, torch.Tensor]) -> None:
	tensors["manifest.memory_bytes"] = torch.tensor([float(MEM_BYTES)], dtype=torch.float32)
	tensors["manifest.pc_width"] = torch.tensor([float(ADDR_BITS)], dtype=torch.float32)
	tensors["manifest.version"] = torch.tensor([3.0], dtype=torch.float32)


	def write_manifest(path: Path, tensors: Dict[str, torch.Tensor]) -> None:
	lines: List[str] = []
	lines.append("# Tensor Manifest")
	lines.append(f"# Total: {len(tensors)} tensors")
	for name in sorted(tensors.keys()):
	t = tensors[name]
	values = ", ".join(f"{v:.1f}" for v in t.flatten().tolist())
	lines.append(f"{name}: shape={list(t.shape)}, values=[{values}]")
	path.write_text("\n".join(lines) + "\n", encoding="utf-8")


	def infer_boolean_inputs(gate: str, reg: SignalRegistry) -> List[int]:
	if gate == 'boolean.not':
	return [reg.register("$x")]
	if gate in ['boolean.and', 'boolean.or', 'boolean.nand', 'boolean.nor', 'boolean.implies']:
	return [reg.register("$a"), reg.register("$b")]
	if '.layer1.neuron1' in gate or '.layer1.neuron2' in gate or '.layer1.or' in gate or '.layer1.nand' in gate:
	return [reg.register("$a"), reg.register("$b")]
	if '.layer2' in gate:
	parent = gate.rsplit('.layer2', 1)[0]
	if '.layer1.neuron1' in parent or 'xor' in parent or 'xnor' in parent or 'biimplies' in parent:
	parent = parent.rsplit('.layer1', 1)[0] if '.layer1' in parent else parent
	return [reg.register(f"{parent}.layer1.or"), reg.register(f"{parent}.layer1.nand")]
	return []


	def infer_halfadder_inputs(gate: str, prefix: str, reg: SignalRegistry) -> List[int]:
	a = reg.register(f"{prefix}.$a")
	b = reg.register(f"{prefix}.$b")
	if '.sum.layer1' in gate:
	return [a, b]
	if '.sum.layer2' in gate:
	return [reg.register(f"{prefix}.sum.layer1.or"), reg.register(f"{prefix}.sum.layer1.nand")]
	if '.carry' in gate and '.layer' not in gate:
	return [a, b]
	return [a, b]


	def infer_fulladder_inputs(gate: str, prefix: str, reg: SignalRegistry) -> List[int]:
	a = reg.register(f"{prefix}.$a")
	b = reg.register(f"{prefix}.$b")
	cin = reg.register(f"{prefix}.$cin")
	if '.ha1.sum.layer1' in gate:
	return [a, b]
	if '.ha1.sum.layer2' in gate:
	return [reg.register(f"{prefix}.ha1.sum.layer1.or"), reg.register(f"{prefix}.ha1.sum.layer1.nand")]
	if '.ha1.carry' in gate and '.layer' not in gate:
	return [a, b]
	if '.ha2.sum.layer1' in gate:
	return [reg.register(f"{prefix}.ha1.sum.layer2"), cin]
	if '.ha2.sum.layer2' in gate:
	return [reg.register(f"{prefix}.ha2.sum.layer1.or"), reg.register(f"{prefix}.ha2.sum.layer1.nand")]
	if '.ha2.carry' in gate and '.layer' not in gate:
	return [reg.register(f"{prefix}.ha1.sum.layer2"), cin]
	if '.carry_or' in gate:
	return [reg.register(f"{prefix}.ha1.carry"), reg.register(f"{prefix}.ha2.carry")]
	return []


	def infer_ripplecarry_inputs(gate: str, prefix: str, bits: int, reg: SignalRegistry) -> List[int]:
	for i in range(bits):
	reg.register(f"{prefix}.$a[{i}]")
	reg.register(f"{prefix}.$b[{i}]")
	m = re.search(r'\.fa(\d+)\.', gate)
	if not m:
	return []
	bit = int(m.group(1))
	a_bit = reg.get_id(f"{prefix}.$a[{bit}]")
	b_bit = reg.get_id(f"{prefix}.$b[{bit}]")
	cin = reg.get_id("#0") if bit == 0 else reg.register(f"{prefix}.fa{bit-1}.carry_or")
	fa_prefix = f"{prefix}.fa{bit}"
	if '.ha1.sum.layer1' in gate:
	return [a_bit, b_bit]
	if '.ha1.sum.layer2' in gate:
	return [reg.register(f"{fa_prefix}.ha1.sum.layer1.or"), reg.register(f"{fa_prefix}.ha1.sum.layer1.nand")]
	if '.ha1.carry' in gate and '.layer' not in gate:
	return [a_bit, b_bit]
	if '.ha2.sum.layer1' in gate:
	return [reg.register(f"{fa_prefix}.ha1.sum.layer2"), cin]
	if '.ha2.sum.layer2' in gate:
	return [reg.register(f"{fa_prefix}.ha2.sum.layer1.or"), reg.register(f"{fa_prefix}.ha2.sum.layer1.nand")]
	if '.ha2.carry' in gate and '.layer' not in gate:
	return [reg.register(f"{fa_prefix}.ha1.sum.layer2"), cin]
	if '.carry_or' in gate:
	return [reg.register(f"{fa_prefix}.ha1.carry"), reg.register(f"{fa_prefix}.ha2.carry")]
	return []


	def infer_adcsbc_inputs(gate: str, prefix: str, is_sub: bool, reg: SignalRegistry) -> List[int]:
	for i in range(8):
	reg.register(f"{prefix}.$a[{i}]")
	reg.register(f"{prefix}.$b[{i}]")
	reg.register(f"{prefix}.$cin")
	if is_sub and '.notb' in gate:
	m = re.search(r'\.notb(\d+)', gate)
	if m:
	return [reg.get_id(f"{prefix}.$b[{int(m.group(1))}]")]
	return []
	m = re.search(r'\.fa(\d+)\.', gate)
	if not m:
	return []
	bit = int(m.group(1))
	if is_sub:
	a_bit = reg.get_id(f"{prefix}.$a[{bit}]")
	notb = reg.register(f"{prefix}.notb{bit}")
	else:
	a_bit = reg.get_id(f"{prefix}.$a[{bit}]")
	notb = reg.get_id(f"{prefix}.$b[{bit}]")
	cin = reg.get_id(f"{prefix}.$cin") if bit == 0 else reg.register(f"{prefix}.fa{bit-1}.or_carry")
	fa_prefix = f"{prefix}.fa{bit}"
	if '.xor1.layer1' in gate:
	return [a_bit, notb if is_sub else reg.get_id(f"{prefix}.$b[{bit}]")]
	if '.xor1.layer2' in gate:
	return [reg.register(f"{fa_prefix}.xor1.layer1.or"), reg.register(f"{fa_prefix}.xor1.layer1.nand")]
	if '.xor2.layer1' in gate:
	return [reg.register(f"{fa_prefix}.xor1.layer2"), cin]
	if '.xor2.layer2' in gate:
	return [reg.register(f"{fa_prefix}.xor2.layer1.or"), reg.register(f"{fa_prefix}.xor2.layer1.nand")]
	if '.and1' in gate:
	return [a_bit, notb if is_sub else reg.get_id(f"{prefix}.$b[{bit}]")]
	if '.and2' in gate:
	return [reg.register(f"{fa_prefix}.xor1.layer2"), cin]
	if '.or_carry' in gate:
	return [reg.register(f"{fa_prefix}.and1"), reg.register(f"{fa_prefix}.and2")]
	return []


	def infer_sub8bit_inputs(gate: str, reg: SignalRegistry) -> List[int]:
	prefix = "arithmetic.sub8bit"
	for i in range(8):
	reg.register(f"{prefix}.$a[{i}]")
	reg.register(f"{prefix}.$b[{i}]")
	if gate == f"{prefix}.carry_in":
	return [reg.get_id("#1")]
	if '.notb' in gate:
	m = re.search(r'\.notb(\d+)', gate)
	if m:
	return [reg.get_id(f"{prefix}.$b[{int(m.group(1))}]")]
	return []
	m = re.search(r'\.fa(\d+)\.', gate)
	if not m:
	return []
	bit = int(m.group(1))
	a_bit = reg.get_id(f"{prefix}.$a[{bit}]")
	notb = reg.register(f"{prefix}.notb{bit}")
	cin = reg.get_id("#1") if bit == 0 else reg.register(f"{prefix}.fa{bit-1}.or_carry")
	fa_prefix = f"{prefix}.fa{bit}"
	if '.xor1.layer1' in gate:
	return [a_bit, notb]
	if '.xor1.layer2' in gate:
	return [reg.register(f"{fa_prefix}.xor1.layer1.or"), reg.register(f"{fa_prefix}.xor1.layer1.nand")]
	if '.xor2.layer1' in gate:
	return [reg.register(f"{fa_prefix}.xor1.layer2"), cin]
	if '.xor2.layer2' in gate:
	return [reg.register(f"{fa_prefix}.xor2.layer1.or"), reg.register(f"{fa_prefix}.xor2.layer1.nand")]
	if '.and1' in gate:
	return [a_bit, notb]
	if '.and2' in gate:
	return [reg.register(f"{fa_prefix}.xor1.layer2"), cin]
	if '.or_carry' in gate:
	return [reg.register(f"{fa_prefix}.and1"), reg.register(f"{fa_prefix}.and2")]
	return []


	def infer_threshold_inputs(gate: str, reg: SignalRegistry) -> List[int]:
	for i in range(8):
	reg.register(f"$x[{i}]")
	return [reg.get_id(f"$x[{i}]") for i in range(8)]


	def infer_modular_inputs(gate: str, reg: SignalRegistry) -> List[int]:
	for i in range(8):
	reg.register(f"$x[{i}]")
	if '.layer1' in gate or '.layer2' in gate or '.layer3' in gate:
	if 'layer1.geq' in gate or 'layer1.leq' in gate:
	return [reg.get_id(f"$x[{i}]") for i in range(8)]
	if 'layer2.eq' in gate:
	m = re.search(r'layer2\.eq(\d+)', gate)
	if m:
	idx = m.group(1)
	parent = gate.rsplit('.layer2', 1)[0]
	return [reg.register(f"{parent}.layer1.geq{idx}"), reg.register(f"{parent}.layer1.leq{idx}")]
	if 'layer3.or' in gate:
	parent = gate.rsplit('.layer3', 1)[0]
	eq_gates = []
	for i in range(256):
	eq_gate = f"{parent}.layer2.eq{i}"
	if eq_gate in reg.name_to_id:
	eq_gates.append(reg.get_id(eq_gate))
	return eq_gates if eq_gates else [reg.get_id(f"$x[{i}]") for i in range(8)]
	return [reg.get_id(f"$x[{i}]") for i in range(8)]


	def infer_control_jump_inputs(gate: str, prefix: str, reg: SignalRegistry) -> List[int]:
	for i in range(8):
	reg.register(f"{prefix}.$pc[{i}]")
	reg.register(f"{prefix}.$target[{i}]")
	flag = "$cond"
	if "jz" in prefix:
	flag = "$zero"
	elif "jc" in prefix:
	flag = "$carry"
	elif "jn" in prefix and "jnc" not in prefix and "jnz" not in prefix and "jnv" not in prefix:
	flag = "$negative"
	elif "jv" in prefix and "jnv" not in prefix:
	flag = "$overflow"
	elif "jp" in prefix:
	flag = "$positive"
	elif "jnc" in prefix:
	flag = "$not_carry"
	elif "jnz" in prefix:
	flag = "$not_zero"
	elif "jnv" in prefix:
	flag = "$not_overflow"
	reg.register(f"{prefix}.{flag}")
	m = re.search(r'\.bit(\d+)\.', gate)
	if not m:
	return []
	bit = int(m.group(1))
	bit_prefix = f"{prefix}.bit{bit}"
	if '.not_sel' in gate:
	return [reg.get_id(f"{prefix}.{flag}")]
	if '.and_a' in gate:
	return [reg.get_id(f"{prefix}.$pc[{bit}]"), reg.register(f"{bit_prefix}.not_sel")]
	if '.and_b' in gate:
	return [reg.get_id(f"{prefix}.$target[{bit}]"), reg.get_id(f"{prefix}.{flag}")]
	if '.or' in gate:
	return [reg.register(f"{bit_prefix}.and_a"), reg.register(f"{bit_prefix}.and_b")]
	return []


	def infer_buffer_inputs(gate: str, reg: SignalRegistry) -> List[int]:
	m = re.search(r'\.bit(\d+)$', gate)
	if m:
	bit = int(m.group(1))
	prefix = gate.rsplit('.bit', 1)[0]
	return [reg.register(f"{prefix}.$data[{bit}]")]
	return [reg.register("$data")]


	def infer_memory_inputs(gate: str, reg: SignalRegistry) -> List[int]:
	if 'addr_decode' in gate:
	return [reg.register(f"$addr[{i}]") for i in range(16)]
	if 'read' in gate:
	return [reg.register("$mem"), reg.register("$sel")]
	if 'write' in gate:
	return [reg.register("$mem"), reg.register("$data"), reg.register("$sel"), reg.register("$we")]
	return []


	def infer_alu_inputs(gate: str, reg: SignalRegistry) -> List[int]:
	for i in range(8):
	reg.register(f"$a[{i}]")
	reg.register(f"$b[{i}]")
	for i in range(4):
	reg.register(f"$opcode[{i}]")
	if 'alucontrol' in gate:
	return [reg.get_id(f"$opcode[{i}]") for i in range(4)]
	if 'aluflags' in gate:
	return [reg.register("$result"), reg.register("$carry"), reg.register("$overflow")]
	if '.shl.bit' in gate:
	m = re.search(r'bit(\d+)', gate)
	if m:
	bit = int(m.group(1))
	if bit < 7:
	return [reg.get_id(f"$a[{bit + 1}]")]
	else:
	return [reg.get_id("#0")]
	return [reg.get_id(f"$a[{i}]") for i in range(8)]
	if '.shr.bit' in gate:
	m = re.search(r'bit(\d+)', gate)
	if m:
	bit = int(m.group(1))
	if bit > 0:
	return [reg.get_id(f"$a[{bit - 1}]")]
	else:
	return [reg.get_id("#0")]
	return [reg.get_id(f"$a[{i}]") for i in range(8)]
	if '.mul.pp.a' in gate:
	m = re.search(r'a(\d+)b(\d+)', gate)
	if m:
	i, j = int(m.group(1)), int(m.group(2))
	return [reg.get_id(f"$a[{i}]"), reg.get_id(f"$b[{j}]")]
	return [reg.get_id(f"$a[{i}]") for i in range(8)] + [reg.get_id(f"$b[{i}]") for i in range(8)]
	if '.mul.' in gate:
	return [reg.get_id(f"$a[{i}]") for i in range(8)] + [reg.get_id(f"$b[{i}]") for i in range(8)]
	if '.div.stage' in gate:
	if '.cmp' in gate:
	return [reg.get_id(f"$a[{i}]") for i in range(8)] + [reg.get_id(f"$b[{i}]") for i in range(8)]
	if '.mux.bit' in gate:
	m = re.search(r'stage(\d+)\.mux\.bit(\d+)', gate)
	if m:
	stage, bit = int(m.group(1)), int(m.group(2))
	prefix = f"alu.alu8bit.div.stage{stage}"
	if '.not_sel' in gate:
	return [reg.register(f"{prefix}.cmp")]
	if '.and_a' in gate:
	return [reg.register(f"$rem[{bit}]"), reg.register(f"{prefix}.mux.bit{bit}.not_sel")]
	if '.and_b' in gate:
	return [reg.register(f"$sub[{bit}]"), reg.register(f"{prefix}.cmp")]
	if '.or' in gate:
	return [reg.register(f"{prefix}.mux.bit{bit}.and_a"), reg.register(f"{prefix}.mux.bit{bit}.and_b")]
	return [reg.get_id(f"$a[{i}]") for i in range(8)] + [reg.get_id(f"$b[{i}]") for i in range(8)]
	if '.and' in gate or '.or' in gate or '.xor' in gate:
	m = re.search(r'bit(\d+)', gate)
	if m:
	bit = int(m.group(1))
	return [reg.get_id(f"$a[{bit}]"), reg.get_id(f"$b[{bit}]")]
	return [reg.get_id(f"$a[{i}]") for i in range(8)] + [reg.get_id(f"$b[{i}]") for i in range(8)]
	if '.not' in gate:
	m = re.search(r'bit(\d+)', gate)
	if m:
	return [reg.get_id(f"$a[{int(m.group(1))}]")]
	return [reg.get_id(f"$a[{i}]") for i in range(8)]
	if 'layer1' in gate or 'layer2' in gate:
	m = re.search(r'bit(\d+)', gate)
	if m:
	bit = int(m.group(1))
	if 'layer1' in gate:
	return [reg.get_id(f"$a[{bit}]"), reg.get_id(f"$b[{bit}]")]
	parent = gate.rsplit('.layer2', 1)[0]
	return [reg.register(f"{parent}.layer1.or"), reg.register(f"{parent}.layer1.nand")]
	return [reg.get_id(f"$a[{i}]") for i in range(8)]


	def infer_pattern_inputs(gate: str, reg: SignalRegistry) -> List[int]:
	for i in range(8):
	reg.register(f"$x[{i}]")
	if 'hammingdistance' in gate:
	for i in range(8):
	reg.register(f"$a[{i}]")
	reg.register(f"$b[{i}]")
	return [reg.get_id(f"$a[{i}]") for i in range(8)] + [reg.get_id(f"$b[{i}]") for i in range(8)]
	return [reg.get_id(f"$x[{i}]") for i in range(8)]


	def infer_error_detection_inputs(gate: str, reg: SignalRegistry) -> List[int]:
	for i in range(8):
	reg.register(f"$x[{i}]")
	if 'hamming' in gate:
	if 'encode' in gate:
	for i in range(4):
	reg.register(f"$d[{i}]")
	return [reg.get_id(f"$d[{i}]") for i in range(4)]
	if 'decode' in gate or 'syndrome' in gate:
	for i in range(7):
	reg.register(f"$c[{i}]")
	return [reg.get_id(f"$c[{i}]") for i in range(7)]
	if 'crc' in gate:
	return [reg.register(f"$data[{i}]") for i in range(8)]
	if 'parity' in gate and 'stage' in gate:
	m = re.search(r'stage(\d+)\.xor(\d+)', gate)
	if m:
	stage = int(m.group(1))
	idx = int(m.group(2))
	if stage == 1:
	return [reg.get_id(f"$x[{2idx}]"), reg.get_id(f"$x[{2idx+1}]")]
	parent = gate.rsplit(f'.stage{stage}', 1)[0]
	prev_stage = stage - 1
	return [
	reg.register(f"{parent}.stage{prev_stage}.xor{2*idx}.layer2"),
	reg.register(f"{parent}.stage{prev_stage}.xor{2*idx+1}.layer2")
	]
	if 'output.not' in gate:
	parent = gate.rsplit('.output', 1)[0]
	return [reg.register(f"{parent}.stage3.xor0.layer2")]
	return [reg.get_id(f"$x[{i}]") for i in range(8)]


	def infer_combinational_inputs(gate: str, reg: SignalRegistry) -> List[int]:
	if 'decoder3to8' in gate:
	for i in range(3):
	reg.register(f"$sel[{i}]")
	return [reg.get_id(f"$sel[{i}]") for i in range(3)]
	if 'encoder8to3' in gate:
	for i in range(8):
	reg.register(f"$x[{i}]")
	return [reg.get_id(f"$x[{i}]") for i in range(8)]
	if 'multiplexer' in gate:
	if '2to1' in gate:
	return [reg.register("$a"), reg.register("$b"), reg.register("$sel")]
	if '4to1' in gate:
	return [reg.register(f"$x[{i}]") for i in range(4)] + [reg.register(f"$sel[{i}]") for i in range(2)]
	if '8to1' in gate:
	return [reg.register(f"$x[{i}]") for i in range(8)] + [reg.register(f"$sel[{i}]") for i in range(3)]
	if 'demultiplexer' in gate:
	return [reg.register("$x"), reg.register("$sel")]
	if 'regmux4to1' in gate:
	for r in range(4):
	for i in range(8):
	reg.register(f"$r{r}[{i}]")
	for i in range(2):
	reg.register(f"$sel[{i}]")
	if gate == "combinational.regmux4to1.not_s0":
	return [reg.get_id("$sel[0]")]
	if gate == "combinational.regmux4to1.not_s1":
	return [reg.get_id("$sel[1]")]
	m = re.search(r'bit(\d+)', gate)
	if m:
	bit = int(m.group(1))
	if '.not_s' in gate:
	sidx = 0 if 's0' in gate else 1
	return [reg.get_id(f"$sel[{sidx}]")]
	if '.and' in gate:
	and_m = re.search(r'\.and(\d+)', gate)
	if and_m:
	and_idx = int(and_m.group(1))
	sel0 = "combinational.regmux4to1.not_s0" if (and_idx & 1) == 0 else "$sel[0]"
	sel1 = "combinational.regmux4to1.not_s1" if (and_idx & 2) == 0 else "$sel[1]"
	return [reg.get_id(f"$r{and_idx}[{bit}]"), reg.register(sel0), reg.register(sel1)]
	if '.or' in gate:
	return [reg.register(f"combinational.regmux4to1.bit{bit}.and{i}") for i in range(4)]
	return []
	if 'barrelshifter' in gate or 'priorityencoder' in gate:
	for i in range(8):
	reg.register(f"$x[{i}]")
	return [reg.get_id(f"$x[{i}]") for i in range(8)]
	return []


	def infer_inputs_for_gate(gate: str, reg: SignalRegistry, tensors: Dict[str, torch.Tensor]) -> List[int]:
	if gate.startswith('manifest.'):
	return []
	if gate.startswith('boolean.'):
	return infer_boolean_inputs(gate, reg)
	if gate.startswith('arithmetic.'):
	if 'halfadder' in gate:
	return infer_halfadder_inputs(gate, "arithmetic.halfadder", reg)
	if 'fulladder' in gate:
	return infer_fulladder_inputs(gate, "arithmetic.fulladder", reg)
	if 'ripplecarry2bit' in gate:
	return infer_ripplecarry_inputs(gate, "arithmetic.ripplecarry2bit", 2, reg)
	if 'ripplecarry4bit' in gate:
	return infer_ripplecarry_inputs(gate, "arithmetic.ripplecarry4bit", 4, reg)
	if 'ripplecarry8bit' in gate:
	return infer_ripplecarry_inputs(gate, "arithmetic.ripplecarry8bit", 8, reg)
	if 'adc8bit' in gate:
	return infer_adcsbc_inputs(gate, "arithmetic.adc8bit", False, reg)
	if 'sbc8bit' in gate:
	return infer_adcsbc_inputs(gate, "arithmetic.sbc8bit", True, reg)
	if 'sub8bit' in gate:
	return infer_sub8bit_inputs(gate, reg)
	if any(cmp in gate for cmp in ['greaterthan8bit', 'lessthan8bit', 'greaterorequal8bit', 'lessorequal8bit']):
	for i in range(8):
	reg.register(f"$a[{i}]")
	reg.register(f"$b[{i}]")
	return [reg.get_id(f"$a[{i}]") for i in range(8)] + [reg.get_id(f"$b[{i}]") for i in range(8)]
	if 'equality8bit' in gate:
	for i in range(8):
	reg.register(f"$a[{i}]")
	reg.register(f"$b[{i}]")
	if 'layer1' in gate:
	return [reg.get_id(f"$a[{i}]") for i in range(8)] + [reg.get_id(f"$b[{i}]") for i in range(8)]
	if 'layer2' in gate:
	return [reg.register("arithmetic.equality8bit.layer1.geq"), reg.register("arithmetic.equality8bit.layer1.leq")]
	return [reg.get_id(f"$a[{i}]") for i in range(8)] + [reg.get_id(f"$b[{i}]") for i in range(8)]
	for i in range(8):
	reg.register(f"$a[{i}]")
	reg.register(f"$b[{i}]")
	return [reg.get_id(f"$a[{i}]") for i in range(8)]
	if gate.startswith('threshold.'):
	return infer_threshold_inputs(gate, reg)
	if gate.startswith('modular.'):
	return infer_modular_inputs(gate, reg)
	if gate.startswith('control.'):
	if any(j in gate for j in ['jz', 'jc', 'jn', 'jv', 'jp', 'jnz', 'jnc', 'jnv', 'conditionaljump']):
	prefix = gate.split('.bit')[0] if '.bit' in gate else gate.rsplit('.', 1)[0]
	return infer_control_jump_inputs(gate, prefix, reg)
	if any(b in gate for b in ['fetch', 'load', 'store', 'mem_addr']):
	return infer_buffer_inputs(gate, reg)
	return [reg.register("$ctrl")]
	if gate.startswith('memory.'):
	return infer_memory_inputs(gate, reg)
	if gate.startswith('alu.'):
	return infer_alu_inputs(gate, reg)
	if gate.startswith('pattern_recognition.'):
	return infer_pattern_inputs(gate, reg)
	if gate.startswith('error_detection.'):
	return infer_error_detection_inputs(gate, reg)
	if gate.startswith('combinational.'):
	return infer_combinational_inputs(gate, reg)
	weight_key = f"{gate}.weight"
	if weight_key in tensors:
	w = tensors[weight_key]
	n_inputs = w.shape[0] if w.dim() == 1 else w.shape[-1]
	for i in range(n_inputs):
	reg.register(f"$input[{i}]")
	return [reg.get_id(f"$input[{i}]") for i in range(n_inputs)]
	return []


	def build_inputs(tensors: Dict[str, torch.Tensor]) -> tuple[Dict[str, torch.Tensor], SignalRegistry, dict]:
	reg = SignalRegistry()
	gates = get_all_gates(tensors)
	stats = {"added": 0, "skipped": 0, "empty": 0}
	for gate in sorted(gates):
	inputs_key = f"{gate}.inputs"
	if inputs_key in tensors:
	stats["skipped"] += 1
	continue
	inputs = infer_inputs_for_gate(gate, reg, tensors)
	if inputs:
	tensors[inputs_key] = torch.tensor(inputs, dtype=torch.int64)
	stats["added"] += 1
	else:
	stats["empty"] += 1
	return tensors, reg, stats


	def cmd_memory(args) -> None:
	print("=" * 60)
	print(" BUILD MEMORY CIRCUITS")
	print("=" * 60)
	print(f"\nLoading: {args.model}")
	tensors = load_tensors(args.model)
	print(f" Loaded {len(tensors)} tensors")
	print("\nDropping existing memory/control tensors...")
	drop_prefixes(tensors, [
	"memory.addr_decode.", "memory.read.", "memory.write.",
	"control.fetch.ir.", "control.load.", "control.store.", "control.mem_addr.",
	])
	print(f" Now {len(tensors)} tensors")
	print("\nGenerating memory circuits...")
	add_decoder(tensors)
	add_memory_read_mux(tensors)
	add_memory_write_cells(tensors)
	print(" Added decoder, read mux, write cells")
	print("\nGenerating buffer gates...")
	try:
	add_fetch_load_store_buffers(tensors)
	print(" Added fetch/load/store/mem_addr buffers")
	except ValueError as e:
	print(f" Buffers already exist: {e}")
	print("\nUpdating manifest...")
	update_manifest(tensors)
	print(f" memory_bytes={MEM_BYTES}, pc_width={ADDR_BITS}")
	if args.apply:
	print(f"\nSaving: {args.model}")
	save_file(tensors, str(args.model))
	if args.manifest:
	write_manifest(MANIFEST_PATH, tensors)
	print(f" Wrote manifest: {MANIFEST_PATH}")
	print(" Done.")
	else:
	print("\n[DRY-RUN] Use --apply to save.")
	print(f"\nTotal: {len(tensors)} tensors")
	print("=" * 60)


	def cmd_inputs(args) -> None:
	print("=" * 60)
	print(" BUILD .inputs TENSORS")
	print("=" * 60)
	print(f"\nLoading: {args.model}")
	tensors = load_tensors(args.model)
	print(f" Loaded {len(tensors)} tensors")
	gates = get_all_gates(tensors)
	print(f" Found {len(gates)} gates")
	print("\nBuilding .inputs tensors...")
	tensors, reg, stats = build_inputs(tensors)
	print(f"\nResults:")
	print(f" Added: {stats['added']}")
	print(f" Skipped: {stats['skipped']}")
	print(f" Empty: {stats['empty']}")
	print(f" Signals: {len(reg.name_to_id)}")
	print(f" Total: {len(tensors)}")
	if args.apply:
	print(f"\nSaving: {args.model}")
	metadata = {"signal_registry": reg.to_metadata()}
	save_file(tensors, str(args.model), metadata=metadata)
	print(" Done.")
	else:
	print("\n[DRY-RUN] Use --apply to save.")
	print("=" * 60)


	def cmd_alu(args) -> None:
	print("=" * 60)
	print(" BUILD ALU CIRCUITS")
	print("=" * 60)
	print(f"\nLoading: {args.model}")
	tensors = load_tensors(args.model)
	print(f" Loaded {len(tensors)} tensors")
	print("\nDropping existing ALU extension tensors...")
	drop_prefixes(tensors, [
	"alu.alu8bit.shl.", "alu.alu8bit.shr.",
	"alu.alu8bit.mul.", "alu.alu8bit.div.",
	"arithmetic.greaterthan8bit.", "arithmetic.lessthan8bit.",
	"arithmetic.greaterorequal8bit.", "arithmetic.lessorequal8bit.",
	"arithmetic.equality8bit.",
	])
	print(f" Now {len(tensors)} tensors")
	print("\nGenerating SHL/SHR circuits...")
	try:
	add_shl_shr(tensors)
	print(" Added SHL (8 gates), SHR (8 gates)")
	except ValueError as e:
	print(f" SHL/SHR already exist: {e}")
	print("\nGenerating MUL circuit...")
	try:
	add_mul(tensors)
	print(" Added MUL (64 partial product AND gates)")
	except ValueError as e:
	print(f" MUL already exists: {e}")
	print("\nGenerating DIV circuit...")
	try:
	add_div(tensors)
	print(" Added DIV (8 stages x comparison + mux)")
	except ValueError as e:
	print(f" DIV already exists: {e}")
	print("\nGenerating comparator circuits...")
	try:
	add_comparators(tensors)
	print(" Added GT, GE, LT, LE (single-layer), EQ (two-layer)")
	except ValueError as e:
	print(f" Comparators already exist: {e}")
	if args.apply:
	print(f"\nSaving: {args.model}")
	save_file(tensors, str(args.model))
	print(" Done.")
	else:
	print("\n[DRY-RUN] Use --apply to save.")
	print(f"\nTotal: {len(tensors)} tensors")
	print("=" * 60)


	def cmd_all(args) -> None:
	print("Running: memory")
	cmd_memory(args)
	print("\nRunning: alu")
	cmd_alu(args)
	print("\nRunning: inputs")
	cmd_inputs(args)


	def main() -> None:
	parser = argparse.ArgumentParser(description="Build tools for threshold computer safetensors")
	parser.add_argument("--model", type=Path, default=MODEL_PATH, help="Model path")
	parser.add_argument("--apply", action="store_true", help="Apply changes (default: dry-run)")
	parser.add_argument("--manifest", action="store_true", help="Write tensors.txt manifest (memory only)")
	subparsers = parser.add_subparsers(dest="command", help="Subcommands")
	subparsers.add_parser("memory", help="Generate 64KB memory circuits")
	subparsers.add_parser("alu", help="Generate ALU extension circuits (SHL, SHR, comparators)")
	subparsers.add_parser("inputs", help="Add .inputs metadata tensors")
	subparsers.add_parser("all", help="Run memory, alu, then inputs")
	args = parser.parse_args()
	if args.command == "memory":
	cmd_memory(args)
	elif args.command == "alu":
	cmd_alu(args)
	elif args.command == "inputs":
	cmd_inputs(args)
	elif args.command == "all":
	cmd_all(args)
	else:
	parser.print_help()


	if __name__ == "__main__":
	main()