| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | `timescale 1ns/1ps
|
| |
|
| | module multi_chip_router #(
|
| | parameter NUM_LINKS = 1,
|
| | parameter CHIP_ID_BITS = 14,
|
| | parameter CORE_ID_BITS = 7,
|
| | parameter NEURON_BITS = 10,
|
| | parameter DATA_WIDTH = 16,
|
| | parameter TX_DEPTH = 256,
|
| | parameter RX_DEPTH = 256
|
| | )(
|
| | input wire clk,
|
| | input wire rst_n,
|
| |
|
| | input wire [CHIP_ID_BITS-1:0] my_chip_id,
|
| |
|
| | input wire tx_push,
|
| | input wire [CHIP_ID_BITS-1:0] tx_dest_chip,
|
| | input wire [CORE_ID_BITS-1:0] tx_core,
|
| | input wire [NEURON_BITS-1:0] tx_neuron,
|
| | input wire [7:0] tx_payload,
|
| | output wire tx_full,
|
| |
|
| | output wire [CHIP_ID_BITS-1:0] rx_src_chip,
|
| | output wire [CORE_ID_BITS-1:0] rx_core,
|
| | output wire [NEURON_BITS-1:0] rx_neuron,
|
| | output wire signed [DATA_WIDTH-1:0] rx_current,
|
| | input wire rx_pop,
|
| | output wire rx_empty,
|
| |
|
| | input wire barrier_tx_send,
|
| | output reg barrier_rx,
|
| |
|
| | input wire mgmt_tx_push,
|
| | input wire [CORE_ID_BITS-1:0] mgmt_tx_core,
|
| | input wire [NEURON_BITS-1:0] mgmt_tx_neuron,
|
| | input wire [7:0] mgmt_tx_data,
|
| | input wire mgmt_tx_is_write,
|
| | input wire [CHIP_ID_BITS-1:0] mgmt_tx_dest_chip,
|
| | output reg mgmt_rx_valid,
|
| | output reg [CHIP_ID_BITS-1:0] mgmt_rx_src_chip,
|
| | output reg [CORE_ID_BITS-1:0] mgmt_rx_core,
|
| | output reg [NEURON_BITS-1:0] mgmt_rx_neuron,
|
| | output reg [7:0] mgmt_rx_data,
|
| | output reg mgmt_rx_is_write,
|
| |
|
| | input wire preempt_request,
|
| | output reg preempt_rx,
|
| |
|
| | output wire [NUM_LINKS*8-1:0] link_tx_data,
|
| | output wire [NUM_LINKS-1:0] link_tx_valid,
|
| | input wire [NUM_LINKS-1:0] link_tx_ready,
|
| | input wire [NUM_LINKS*8-1:0] link_rx_data,
|
| | input wire [NUM_LINKS-1:0] link_rx_valid,
|
| | output wire [NUM_LINKS-1:0] link_rx_ready
|
| | );
|
| |
|
| | localparam MSG_SPIKE = 2'b00;
|
| | localparam MSG_BARRIER = 2'b01;
|
| | localparam MSG_MGMT = 2'b10;
|
| | localparam MSG_PREEMPT = 2'b11;
|
| |
|
| | localparam TX_FLAT_W = 1 + 2 + 2*CHIP_ID_BITS + CORE_ID_BITS + NEURON_BITS + 8;
|
| | localparam TX_NUM_BYTES = (TX_FLAT_W + 7) / 8;
|
| | localparam TX_PAD_W = TX_NUM_BYTES * 8;
|
| |
|
| | localparam MSGTYPE_OFFSET = TX_PAD_W - 1 - 1;
|
| | localparam DEST_OFFSET = MSGTYPE_OFFSET - 2;
|
| | localparam SRC_OFFSET = DEST_OFFSET - CHIP_ID_BITS;
|
| | localparam CORE_OFFSET = SRC_OFFSET - CHIP_ID_BITS;
|
| | localparam NRN_OFFSET = CORE_OFFSET - CORE_ID_BITS;
|
| | localparam PAY_OFFSET = NRN_OFFSET - NEURON_BITS;
|
| |
|
| | localparam PKT_W = 2 + CHIP_ID_BITS + CORE_ID_BITS + NEURON_BITS + 8;
|
| |
|
| | reg [PKT_W-1:0] tx_fifo [0:TX_DEPTH-1];
|
| | reg [8:0] tx_wr_ptr, tx_rd_ptr;
|
| | wire [8:0] tx_count = tx_wr_ptr - tx_rd_ptr;
|
| | wire tx_fifo_empty = (tx_wr_ptr == tx_rd_ptr);
|
| | assign tx_full = (tx_count >= TX_DEPTH);
|
| |
|
| | always @(posedge clk or negedge rst_n) begin
|
| | if (!rst_n)
|
| | tx_wr_ptr <= 0;
|
| | else if (tx_push && !tx_full) begin
|
| | tx_fifo[tx_wr_ptr[7:0]] <= {MSG_SPIKE, tx_dest_chip, tx_core, tx_neuron, tx_payload};
|
| | tx_wr_ptr <= tx_wr_ptr + 1;
|
| | end else if (mgmt_tx_push && !tx_full) begin
|
| | tx_fifo[tx_wr_ptr[7:0]] <= {MSG_MGMT, mgmt_tx_dest_chip, mgmt_tx_core, mgmt_tx_neuron,
|
| | mgmt_tx_is_write, mgmt_tx_data[6:0]};
|
| | tx_wr_ptr <= tx_wr_ptr + 1;
|
| | end
|
| | end
|
| |
|
| | wire [PKT_W-1:0] tx_head = tx_fifo[tx_rd_ptr[7:0]];
|
| | wire [1:0] tx_head_msgtype = tx_head[PKT_W-1 -: 2];
|
| | wire [CHIP_ID_BITS-1:0] tx_head_chip = tx_head[PKT_W-3 -: CHIP_ID_BITS];
|
| |
|
| | wire [CHIP_ID_BITS-1:0] tx_link_sel = tx_head_chip % NUM_LINKS;
|
| |
|
| | reg [TX_PAD_W-1:0] txs_shift;
|
| | reg [$clog2(TX_NUM_BYTES+1)-1:0] txs_cnt;
|
| | reg txs_active;
|
| | reg [CHIP_ID_BITS-1:0] txs_link;
|
| |
|
| | reg [NUM_LINKS*8-1:0] ltx_data;
|
| | reg [NUM_LINKS-1:0] ltx_valid;
|
| | assign link_tx_data = ltx_data;
|
| | assign link_tx_valid = ltx_valid;
|
| |
|
| | wire [TX_PAD_W-1:0] tx_flat = {1'b1, tx_head_msgtype, tx_head_chip, my_chip_id,
|
| | tx_head[CORE_ID_BITS+NEURON_BITS+7 : 0],
|
| | {(TX_PAD_W - TX_FLAT_W){1'b0}}};
|
| |
|
| | wire [TX_PAD_W-1:0] barrier_flat = {1'b1, MSG_BARRIER, {CHIP_ID_BITS{1'b1}}, my_chip_id,
|
| | {(CORE_ID_BITS+NEURON_BITS+8){1'b0}},
|
| | {(TX_PAD_W - TX_FLAT_W){1'b0}}};
|
| | wire [TX_PAD_W-1:0] preempt_flat = {1'b1, MSG_PREEMPT, {CHIP_ID_BITS{1'b1}}, my_chip_id,
|
| | {(CORE_ID_BITS+NEURON_BITS+8){1'b0}},
|
| | {(TX_PAD_W - TX_FLAT_W){1'b0}}};
|
| |
|
| | reg bcast_active;
|
| | reg [TX_PAD_W-1:0] bcast_shift;
|
| | reg [$clog2(TX_NUM_BYTES+1)-1:0] bcast_cnt;
|
| | reg [CHIP_ID_BITS-1:0] bcast_link;
|
| | reg [CHIP_ID_BITS-1:0] bcast_link_max;
|
| | reg [1:0] bcast_msg_type;
|
| | reg bcast_pending;
|
| | reg [TX_PAD_W-1:0] bcast_flat_save;
|
| |
|
| | always @(posedge clk or negedge rst_n) begin
|
| | if (!rst_n) begin
|
| | txs_active <= 0;
|
| | txs_cnt <= 0;
|
| | txs_shift <= 0;
|
| | txs_link <= 0;
|
| | tx_rd_ptr <= 0;
|
| | ltx_data <= 0;
|
| | ltx_valid <= 0;
|
| | bcast_active <= 0;
|
| | bcast_shift <= 0;
|
| | bcast_cnt <= 0;
|
| | bcast_link <= 0;
|
| | bcast_link_max <= 0;
|
| | bcast_msg_type <= 0;
|
| | bcast_pending <= 0;
|
| | bcast_flat_save <= 0;
|
| | end else begin
|
| | ltx_valid <= 0;
|
| |
|
| | if (bcast_active) begin
|
| | ltx_data[bcast_link*8 +: 8] <= bcast_shift[TX_PAD_W-1 -: 8];
|
| | ltx_valid[bcast_link] <= 1;
|
| |
|
| | if (link_tx_ready[bcast_link]) begin
|
| | bcast_shift <= bcast_shift << 8;
|
| | if (bcast_cnt == TX_NUM_BYTES - 1) begin
|
| | if (bcast_link < NUM_LINKS - 1) begin
|
| | bcast_link <= bcast_link + 1;
|
| | bcast_shift <= bcast_flat_save;
|
| | bcast_cnt <= 0;
|
| | end else begin
|
| | bcast_active <= 0;
|
| | end
|
| | end else begin
|
| | bcast_cnt <= bcast_cnt + 1;
|
| | end
|
| | end
|
| | end else if (!txs_active) begin
|
| | if (barrier_tx_send) begin
|
| | bcast_active <= 1;
|
| | bcast_flat_save <= barrier_flat;
|
| | bcast_shift <= barrier_flat;
|
| | bcast_cnt <= 0;
|
| | bcast_link <= 0;
|
| | bcast_msg_type <= MSG_BARRIER;
|
| | end else if (preempt_request) begin
|
| | bcast_active <= 1;
|
| | bcast_flat_save <= preempt_flat;
|
| | bcast_shift <= preempt_flat;
|
| | bcast_cnt <= 0;
|
| | bcast_link <= 0;
|
| | bcast_msg_type <= MSG_PREEMPT;
|
| | end else if (!tx_fifo_empty) begin
|
| | ltx_data[tx_link_sel*8 +: 8] <= tx_flat[TX_PAD_W-1 -: 8];
|
| | ltx_valid[tx_link_sel] <= 1;
|
| | txs_shift <= tx_flat << 8;
|
| | txs_link <= tx_link_sel;
|
| | txs_cnt <= 1;
|
| | txs_active <= 1;
|
| | tx_rd_ptr <= tx_rd_ptr + 1;
|
| | end
|
| | end else begin
|
| | ltx_data[txs_link*8 +: 8] <= txs_shift[TX_PAD_W-1 -: 8];
|
| | ltx_valid[txs_link] <= 1;
|
| |
|
| | if (link_tx_ready[txs_link]) begin
|
| | txs_shift <= txs_shift << 8;
|
| | if (txs_cnt == TX_NUM_BYTES - 1)
|
| | txs_active <= 0;
|
| | else
|
| | txs_cnt <= txs_cnt + 1;
|
| | end
|
| | end
|
| | end
|
| | end
|
| |
|
| | localparam RX_PKT_W = CHIP_ID_BITS + CORE_ID_BITS + NEURON_BITS + DATA_WIDTH;
|
| |
|
| | reg [TX_PAD_W-1:0] rxs_accum [0:NUM_LINKS-1];
|
| | reg [$clog2(TX_NUM_BYTES+1)-1:0] rxs_cnt [0:NUM_LINKS-1];
|
| | reg [NUM_LINKS-1:0] rxs_push;
|
| |
|
| | assign link_rx_ready = (rx_count < RX_DEPTH - 4) ? {NUM_LINKS{1'b1}} : {NUM_LINKS{1'b0}};
|
| |
|
| | genvar li;
|
| | generate
|
| | for (li = 0; li < NUM_LINKS; li = li + 1) begin : gen_rx
|
| | always @(posedge clk or negedge rst_n) begin
|
| | if (!rst_n) begin
|
| | rxs_cnt[li] <= 0;
|
| | rxs_push[li] <= 0;
|
| | rxs_accum[li] <= 0;
|
| | end else begin
|
| | rxs_push[li] <= 0;
|
| |
|
| | if (link_rx_valid[li]) begin
|
| | rxs_accum[li] <= {rxs_accum[li][TX_PAD_W-9:0], link_rx_data[li*8 +: 8]};
|
| |
|
| | if (rxs_cnt[li] == 0) begin
|
| | if (link_rx_data[li*8 + 7]) begin
|
| | rxs_accum[li] <= {{(TX_PAD_W-8){1'b0}}, link_rx_data[li*8 +: 8]};
|
| | rxs_cnt[li] <= 1;
|
| | end
|
| | end else begin
|
| | if (rxs_cnt[li] == TX_NUM_BYTES - 1) begin
|
| | rxs_push[li] <= 1;
|
| | rxs_cnt[li] <= 0;
|
| | end else begin
|
| | rxs_cnt[li] <= rxs_cnt[li] + 1;
|
| | end
|
| | end
|
| | end
|
| | end
|
| | end
|
| | end
|
| | endgenerate
|
| |
|
| |
|
| | reg [RX_PKT_W-1:0] rx_fifo [0:RX_DEPTH-1];
|
| | reg [8:0] rx_wr_ptr, rx_rd_ptr;
|
| | wire [8:0] rx_count = rx_wr_ptr - rx_rd_ptr;
|
| | assign rx_empty = (rx_wr_ptr == rx_rd_ptr);
|
| |
|
| | always @(posedge clk or negedge rst_n) begin : rx_fifo_wr
|
| | integer k;
|
| | reg [1:0] rx_msg_type;
|
| | if (!rst_n) begin
|
| | rx_wr_ptr <= 0;
|
| | barrier_rx <= 0;
|
| | preempt_rx <= 0;
|
| | mgmt_rx_valid <= 0;
|
| | mgmt_rx_src_chip <= 0;
|
| | mgmt_rx_core <= 0;
|
| | mgmt_rx_neuron <= 0;
|
| | mgmt_rx_data <= 0;
|
| | mgmt_rx_is_write <= 0;
|
| | end else begin
|
| | barrier_rx <= 0;
|
| | preempt_rx <= 0;
|
| | mgmt_rx_valid <= 0;
|
| |
|
| | for (k = 0; k < NUM_LINKS; k = k + 1) begin
|
| | if (rxs_push[k]) begin
|
| | rx_msg_type = rxs_accum[k][MSGTYPE_OFFSET -: 2];
|
| |
|
| | case (rx_msg_type)
|
| | MSG_SPIKE: begin
|
| | if (rx_count < RX_DEPTH) begin
|
| | rx_fifo[rx_wr_ptr[7:0]] <= {
|
| | rxs_accum[k][SRC_OFFSET -: CHIP_ID_BITS],
|
| | rxs_accum[k][CORE_OFFSET -: CORE_ID_BITS],
|
| | rxs_accum[k][NRN_OFFSET -: NEURON_BITS],
|
| | {{(DATA_WIDTH-8){1'b0}},
|
| | rxs_accum[k][PAY_OFFSET -: 8]}
|
| | };
|
| | rx_wr_ptr <= rx_wr_ptr + 1;
|
| | end
|
| | end
|
| |
|
| | MSG_BARRIER: begin
|
| | barrier_rx <= 1;
|
| | end
|
| |
|
| | MSG_MGMT: begin
|
| | mgmt_rx_valid <= 1;
|
| | mgmt_rx_src_chip <= rxs_accum[k][SRC_OFFSET -: CHIP_ID_BITS];
|
| | mgmt_rx_core <= rxs_accum[k][CORE_OFFSET -: CORE_ID_BITS];
|
| | mgmt_rx_neuron <= rxs_accum[k][NRN_OFFSET -: NEURON_BITS];
|
| | mgmt_rx_is_write <= rxs_accum[k][PAY_OFFSET];
|
| | mgmt_rx_data <= {1'b0, rxs_accum[k][PAY_OFFSET-1 -: 7]};
|
| | end
|
| |
|
| | MSG_PREEMPT: begin
|
| | preempt_rx <= 1;
|
| | end
|
| | endcase
|
| | end
|
| | end
|
| | end
|
| | end
|
| |
|
| | always @(posedge clk or negedge rst_n) begin
|
| | if (!rst_n)
|
| | rx_rd_ptr <= 0;
|
| | else if (rx_pop && !rx_empty)
|
| | rx_rd_ptr <= rx_rd_ptr + 1;
|
| | end
|
| |
|
| | wire [RX_PKT_W-1:0] rx_top = rx_fifo[rx_rd_ptr[7:0]];
|
| | assign rx_src_chip = rx_top[RX_PKT_W-1 -: CHIP_ID_BITS];
|
| | assign rx_core = rx_top[NEURON_BITS+DATA_WIDTH +: CORE_ID_BITS];
|
| | assign rx_neuron = rx_top[DATA_WIDTH +: NEURON_BITS];
|
| | assign rx_current = rx_top[DATA_WIDTH-1:0];
|
| |
|
| | endmodule
|
| |
|