// ============================================================================ // Multi-Chip Router // ============================================================================ // // Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd // Company No. 17054540 — UK Patent Application No. 2602902.6 // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // ============================================================================ `timescale 1ns/1ps module multi_chip_router #( parameter NUM_LINKS = 1, parameter CHIP_ID_BITS = 14, parameter CORE_ID_BITS = 7, parameter NEURON_BITS = 10, parameter DATA_WIDTH = 16, parameter TX_DEPTH = 256, parameter RX_DEPTH = 256 )( input wire clk, input wire rst_n, input wire [CHIP_ID_BITS-1:0] my_chip_id, input wire tx_push, input wire [CHIP_ID_BITS-1:0] tx_dest_chip, input wire [CORE_ID_BITS-1:0] tx_core, input wire [NEURON_BITS-1:0] tx_neuron, input wire [7:0] tx_payload, output wire tx_full, output wire [CHIP_ID_BITS-1:0] rx_src_chip, output wire [CORE_ID_BITS-1:0] rx_core, output wire [NEURON_BITS-1:0] rx_neuron, output wire signed [DATA_WIDTH-1:0] rx_current, input wire rx_pop, output wire rx_empty, input wire barrier_tx_send, output reg barrier_rx, input wire mgmt_tx_push, input wire [CORE_ID_BITS-1:0] mgmt_tx_core, input wire [NEURON_BITS-1:0] mgmt_tx_neuron, input wire [7:0] mgmt_tx_data, input wire mgmt_tx_is_write, input wire [CHIP_ID_BITS-1:0] mgmt_tx_dest_chip, output reg mgmt_rx_valid, output reg [CHIP_ID_BITS-1:0] mgmt_rx_src_chip, output reg [CORE_ID_BITS-1:0] mgmt_rx_core, output reg [NEURON_BITS-1:0] mgmt_rx_neuron, output reg [7:0] mgmt_rx_data, output reg mgmt_rx_is_write, input wire preempt_request, output reg preempt_rx, output wire [NUM_LINKS*8-1:0] link_tx_data, output wire [NUM_LINKS-1:0] link_tx_valid, input wire [NUM_LINKS-1:0] link_tx_ready, input wire [NUM_LINKS*8-1:0] link_rx_data, input wire [NUM_LINKS-1:0] link_rx_valid, output wire [NUM_LINKS-1:0] link_rx_ready ); localparam MSG_SPIKE = 2'b00; localparam MSG_BARRIER = 2'b01; localparam MSG_MGMT = 2'b10; localparam MSG_PREEMPT = 2'b11; localparam TX_FLAT_W = 1 + 2 + 2*CHIP_ID_BITS + CORE_ID_BITS + NEURON_BITS + 8; localparam TX_NUM_BYTES = (TX_FLAT_W + 7) / 8; localparam TX_PAD_W = TX_NUM_BYTES * 8; localparam MSGTYPE_OFFSET = TX_PAD_W - 1 - 1; localparam DEST_OFFSET = MSGTYPE_OFFSET - 2; localparam SRC_OFFSET = DEST_OFFSET - CHIP_ID_BITS; localparam CORE_OFFSET = SRC_OFFSET - CHIP_ID_BITS; localparam NRN_OFFSET = CORE_OFFSET - CORE_ID_BITS; localparam PAY_OFFSET = NRN_OFFSET - NEURON_BITS; localparam PKT_W = 2 + CHIP_ID_BITS + CORE_ID_BITS + NEURON_BITS + 8; reg [PKT_W-1:0] tx_fifo [0:TX_DEPTH-1]; reg [8:0] tx_wr_ptr, tx_rd_ptr; wire [8:0] tx_count = tx_wr_ptr - tx_rd_ptr; wire tx_fifo_empty = (tx_wr_ptr == tx_rd_ptr); assign tx_full = (tx_count >= TX_DEPTH); always @(posedge clk or negedge rst_n) begin if (!rst_n) tx_wr_ptr <= 0; else if (tx_push && !tx_full) begin tx_fifo[tx_wr_ptr[7:0]] <= {MSG_SPIKE, tx_dest_chip, tx_core, tx_neuron, tx_payload}; tx_wr_ptr <= tx_wr_ptr + 1; end else if (mgmt_tx_push && !tx_full) begin tx_fifo[tx_wr_ptr[7:0]] <= {MSG_MGMT, mgmt_tx_dest_chip, mgmt_tx_core, mgmt_tx_neuron, mgmt_tx_is_write, mgmt_tx_data[6:0]}; tx_wr_ptr <= tx_wr_ptr + 1; end end wire [PKT_W-1:0] tx_head = tx_fifo[tx_rd_ptr[7:0]]; wire [1:0] tx_head_msgtype = tx_head[PKT_W-1 -: 2]; wire [CHIP_ID_BITS-1:0] tx_head_chip = tx_head[PKT_W-3 -: CHIP_ID_BITS]; wire [CHIP_ID_BITS-1:0] tx_link_sel = tx_head_chip % NUM_LINKS; reg [TX_PAD_W-1:0] txs_shift; reg [$clog2(TX_NUM_BYTES+1)-1:0] txs_cnt; reg txs_active; reg [CHIP_ID_BITS-1:0] txs_link; reg [NUM_LINKS*8-1:0] ltx_data; reg [NUM_LINKS-1:0] ltx_valid; assign link_tx_data = ltx_data; assign link_tx_valid = ltx_valid; wire [TX_PAD_W-1:0] tx_flat = {1'b1, tx_head_msgtype, tx_head_chip, my_chip_id, tx_head[CORE_ID_BITS+NEURON_BITS+7 : 0], {(TX_PAD_W - TX_FLAT_W){1'b0}}}; wire [TX_PAD_W-1:0] barrier_flat = {1'b1, MSG_BARRIER, {CHIP_ID_BITS{1'b1}}, my_chip_id, {(CORE_ID_BITS+NEURON_BITS+8){1'b0}}, {(TX_PAD_W - TX_FLAT_W){1'b0}}}; wire [TX_PAD_W-1:0] preempt_flat = {1'b1, MSG_PREEMPT, {CHIP_ID_BITS{1'b1}}, my_chip_id, {(CORE_ID_BITS+NEURON_BITS+8){1'b0}}, {(TX_PAD_W - TX_FLAT_W){1'b0}}}; reg bcast_active; reg [TX_PAD_W-1:0] bcast_shift; reg [$clog2(TX_NUM_BYTES+1)-1:0] bcast_cnt; reg [CHIP_ID_BITS-1:0] bcast_link; reg [CHIP_ID_BITS-1:0] bcast_link_max; reg [1:0] bcast_msg_type; reg bcast_pending; reg [TX_PAD_W-1:0] bcast_flat_save; always @(posedge clk or negedge rst_n) begin if (!rst_n) begin txs_active <= 0; txs_cnt <= 0; txs_shift <= 0; txs_link <= 0; tx_rd_ptr <= 0; ltx_data <= 0; ltx_valid <= 0; bcast_active <= 0; bcast_shift <= 0; bcast_cnt <= 0; bcast_link <= 0; bcast_link_max <= 0; bcast_msg_type <= 0; bcast_pending <= 0; bcast_flat_save <= 0; end else begin ltx_valid <= 0; if (bcast_active) begin ltx_data[bcast_link*8 +: 8] <= bcast_shift[TX_PAD_W-1 -: 8]; ltx_valid[bcast_link] <= 1; if (link_tx_ready[bcast_link]) begin bcast_shift <= bcast_shift << 8; if (bcast_cnt == TX_NUM_BYTES - 1) begin if (bcast_link < NUM_LINKS - 1) begin bcast_link <= bcast_link + 1; bcast_shift <= bcast_flat_save; bcast_cnt <= 0; end else begin bcast_active <= 0; end end else begin bcast_cnt <= bcast_cnt + 1; end end end else if (!txs_active) begin if (barrier_tx_send) begin bcast_active <= 1; bcast_flat_save <= barrier_flat; bcast_shift <= barrier_flat; bcast_cnt <= 0; bcast_link <= 0; bcast_msg_type <= MSG_BARRIER; end else if (preempt_request) begin bcast_active <= 1; bcast_flat_save <= preempt_flat; bcast_shift <= preempt_flat; bcast_cnt <= 0; bcast_link <= 0; bcast_msg_type <= MSG_PREEMPT; end else if (!tx_fifo_empty) begin ltx_data[tx_link_sel*8 +: 8] <= tx_flat[TX_PAD_W-1 -: 8]; ltx_valid[tx_link_sel] <= 1; txs_shift <= tx_flat << 8; txs_link <= tx_link_sel; txs_cnt <= 1; txs_active <= 1; tx_rd_ptr <= tx_rd_ptr + 1; end end else begin ltx_data[txs_link*8 +: 8] <= txs_shift[TX_PAD_W-1 -: 8]; ltx_valid[txs_link] <= 1; if (link_tx_ready[txs_link]) begin txs_shift <= txs_shift << 8; if (txs_cnt == TX_NUM_BYTES - 1) txs_active <= 0; else txs_cnt <= txs_cnt + 1; end end end end localparam RX_PKT_W = CHIP_ID_BITS + CORE_ID_BITS + NEURON_BITS + DATA_WIDTH; reg [TX_PAD_W-1:0] rxs_accum [0:NUM_LINKS-1]; reg [$clog2(TX_NUM_BYTES+1)-1:0] rxs_cnt [0:NUM_LINKS-1]; reg [NUM_LINKS-1:0] rxs_push; assign link_rx_ready = (rx_count < RX_DEPTH - 4) ? {NUM_LINKS{1'b1}} : {NUM_LINKS{1'b0}}; genvar li; generate for (li = 0; li < NUM_LINKS; li = li + 1) begin : gen_rx always @(posedge clk or negedge rst_n) begin if (!rst_n) begin rxs_cnt[li] <= 0; rxs_push[li] <= 0; rxs_accum[li] <= 0; end else begin rxs_push[li] <= 0; if (link_rx_valid[li]) begin rxs_accum[li] <= {rxs_accum[li][TX_PAD_W-9:0], link_rx_data[li*8 +: 8]}; if (rxs_cnt[li] == 0) begin if (link_rx_data[li*8 + 7]) begin rxs_accum[li] <= {{(TX_PAD_W-8){1'b0}}, link_rx_data[li*8 +: 8]}; rxs_cnt[li] <= 1; end end else begin if (rxs_cnt[li] == TX_NUM_BYTES - 1) begin rxs_push[li] <= 1; rxs_cnt[li] <= 0; end else begin rxs_cnt[li] <= rxs_cnt[li] + 1; end end end end end end endgenerate reg [RX_PKT_W-1:0] rx_fifo [0:RX_DEPTH-1]; reg [8:0] rx_wr_ptr, rx_rd_ptr; wire [8:0] rx_count = rx_wr_ptr - rx_rd_ptr; assign rx_empty = (rx_wr_ptr == rx_rd_ptr); always @(posedge clk or negedge rst_n) begin : rx_fifo_wr integer k; reg [1:0] rx_msg_type; if (!rst_n) begin rx_wr_ptr <= 0; barrier_rx <= 0; preempt_rx <= 0; mgmt_rx_valid <= 0; mgmt_rx_src_chip <= 0; mgmt_rx_core <= 0; mgmt_rx_neuron <= 0; mgmt_rx_data <= 0; mgmt_rx_is_write <= 0; end else begin barrier_rx <= 0; preempt_rx <= 0; mgmt_rx_valid <= 0; for (k = 0; k < NUM_LINKS; k = k + 1) begin if (rxs_push[k]) begin rx_msg_type = rxs_accum[k][MSGTYPE_OFFSET -: 2]; case (rx_msg_type) MSG_SPIKE: begin if (rx_count < RX_DEPTH) begin rx_fifo[rx_wr_ptr[7:0]] <= { rxs_accum[k][SRC_OFFSET -: CHIP_ID_BITS], rxs_accum[k][CORE_OFFSET -: CORE_ID_BITS], rxs_accum[k][NRN_OFFSET -: NEURON_BITS], {{(DATA_WIDTH-8){1'b0}}, rxs_accum[k][PAY_OFFSET -: 8]} }; rx_wr_ptr <= rx_wr_ptr + 1; end end MSG_BARRIER: begin barrier_rx <= 1; end MSG_MGMT: begin mgmt_rx_valid <= 1; mgmt_rx_src_chip <= rxs_accum[k][SRC_OFFSET -: CHIP_ID_BITS]; mgmt_rx_core <= rxs_accum[k][CORE_OFFSET -: CORE_ID_BITS]; mgmt_rx_neuron <= rxs_accum[k][NRN_OFFSET -: NEURON_BITS]; mgmt_rx_is_write <= rxs_accum[k][PAY_OFFSET]; mgmt_rx_data <= {1'b0, rxs_accum[k][PAY_OFFSET-1 -: 7]}; end MSG_PREEMPT: begin preempt_rx <= 1; end endcase end end end end always @(posedge clk or negedge rst_n) begin if (!rst_n) rx_rd_ptr <= 0; else if (rx_pop && !rx_empty) rx_rd_ptr <= rx_rd_ptr + 1; end wire [RX_PKT_W-1:0] rx_top = rx_fifo[rx_rd_ptr[7:0]]; assign rx_src_chip = rx_top[RX_PKT_W-1 -: CHIP_ID_BITS]; assign rx_core = rx_top[NEURON_BITS+DATA_WIDTH +: CORE_ID_BITS]; assign rx_neuron = rx_top[DATA_WIDTH +: NEURON_BITS]; assign rx_current = rx_top[DATA_WIDTH-1:0]; endmodule