catalyst-n1 / rtl /host_interface.v
mrwabbit's picture
Initial upload: Catalyst N1 open source neuromorphic processor RTL
e4cdd5f verified
// ============================================================================
// Host Interface
// ============================================================================
//
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
// Company No. 17054540 — UK Patent Application No. 2602902.6
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ============================================================================
module host_interface #(
parameter NUM_CORES = 4,
parameter CORE_ID_BITS = 2,
parameter NUM_NEURONS = 1024,
parameter NEURON_BITS = 10,
parameter DATA_WIDTH = 16,
parameter POOL_ADDR_BITS = 15,
parameter COUNT_BITS = 12,
parameter ROUTE_SLOT_BITS = 3,
parameter GLOBAL_ROUTE_SLOT_BITS = 2
)(
input wire clk,
input wire rst_n,
input wire [7:0] rx_data,
input wire rx_valid,
output reg [7:0] tx_data,
output reg tx_valid,
input wire tx_ready,
output reg mesh_start,
output reg mesh_prog_pool_we,
output reg [CORE_ID_BITS-1:0] mesh_prog_pool_core,
output reg [POOL_ADDR_BITS-1:0] mesh_prog_pool_addr,
output reg [NEURON_BITS-1:0] mesh_prog_pool_src,
output reg [NEURON_BITS-1:0] mesh_prog_pool_target,
output reg signed [DATA_WIDTH-1:0] mesh_prog_pool_weight,
output reg [1:0] mesh_prog_pool_comp,
output reg mesh_prog_index_we,
output reg [CORE_ID_BITS-1:0] mesh_prog_index_core,
output reg [NEURON_BITS-1:0] mesh_prog_index_neuron,
output reg [POOL_ADDR_BITS-1:0] mesh_prog_index_base,
output reg [COUNT_BITS-1:0] mesh_prog_index_count,
output reg [1:0] mesh_prog_index_format,
output reg mesh_prog_route_we,
output reg [CORE_ID_BITS-1:0] mesh_prog_route_src_core,
output reg [NEURON_BITS-1:0] mesh_prog_route_src_neuron,
output reg [ROUTE_SLOT_BITS-1:0] mesh_prog_route_slot,
output reg [CORE_ID_BITS-1:0] mesh_prog_route_dest_core,
output reg [NEURON_BITS-1:0] mesh_prog_route_dest_neuron,
output reg signed [DATA_WIDTH-1:0] mesh_prog_route_weight,
output reg mesh_prog_global_route_we,
output reg [CORE_ID_BITS-1:0] mesh_prog_global_route_src_core,
output reg [NEURON_BITS-1:0] mesh_prog_global_route_src_neuron,
output reg [GLOBAL_ROUTE_SLOT_BITS-1:0] mesh_prog_global_route_slot,
output reg [CORE_ID_BITS-1:0] mesh_prog_global_route_dest_core,
output reg [NEURON_BITS-1:0] mesh_prog_global_route_dest_neuron,
output reg signed [DATA_WIDTH-1:0] mesh_prog_global_route_weight,
output reg mesh_ext_valid,
output reg [CORE_ID_BITS-1:0] mesh_ext_core,
output reg [NEURON_BITS-1:0] mesh_ext_neuron_id,
output reg signed [DATA_WIDTH-1:0] mesh_ext_current,
output reg mesh_learn_enable,
output reg mesh_graded_enable,
output reg mesh_dendritic_enable,
output reg mesh_async_enable,
output reg mesh_threefactor_enable,
output reg signed [DATA_WIDTH-1:0] mesh_reward_value,
output reg mesh_noise_enable,
output reg mesh_skip_idle_enable,
output reg mesh_scale_u_enable,
output reg mesh_prog_delay_we,
output reg [CORE_ID_BITS-1:0] mesh_prog_delay_core,
output reg [POOL_ADDR_BITS-1:0] mesh_prog_delay_addr,
output reg [5:0] mesh_prog_delay_value,
output reg mesh_prog_ucode_we,
output reg [CORE_ID_BITS-1:0] mesh_prog_ucode_core,
output reg [7:0] mesh_prog_ucode_addr,
output reg [31:0] mesh_prog_ucode_data,
output reg mesh_prog_param_we,
output reg [CORE_ID_BITS-1:0] mesh_prog_param_core,
output reg [NEURON_BITS-1:0] mesh_prog_param_neuron,
output reg [4:0] mesh_prog_param_id,
output reg signed [DATA_WIDTH-1:0] mesh_prog_param_value,
output reg mesh_probe_read,
output reg [CORE_ID_BITS-1:0] mesh_probe_core,
output reg [NEURON_BITS-1:0] mesh_probe_neuron,
output reg [4:0] mesh_probe_state_id,
output reg [POOL_ADDR_BITS-1:0] mesh_probe_pool_addr,
input wire signed [DATA_WIDTH-1:0] mesh_probe_data,
input wire mesh_probe_valid,
output reg [7:0] mesh_dvfs_stall,
input wire mesh_timestep_done,
input wire [5:0] mesh_state,
input wire [31:0] mesh_total_spikes,
input wire [31:0] mesh_timestep_count
);
localparam CMD_PROG_POOL = 8'h01;
localparam CMD_PROG_ROUTE = 8'h02;
localparam CMD_STIMULUS = 8'h03;
localparam CMD_RUN = 8'h04;
localparam CMD_STATUS = 8'h05;
localparam CMD_LEARN_CFG = 8'h06;
localparam CMD_PROG_NEURON = 8'h07;
localparam CMD_PROG_INDEX = 8'h08;
localparam CMD_REWARD = 8'h09;
localparam CMD_PROG_DELAY = 8'h0A;
localparam CMD_PROG_FORMAT = 8'h0B;
localparam CMD_PROG_LEARN = 8'h0C;
localparam CMD_NOISE_SEED = 8'h0D;
localparam CMD_READ_WEIGHT = 8'h0E;
localparam CMD_PROG_DEND_TREE = 8'h0F;
localparam CMD_PROG_GLOBAL_ROUTE = 8'h10;
localparam CMD_DVFS_CFG = 8'h1C;
localparam CMD_RESET_PERF = 8'h1D;
localparam RESP_ACK = 8'hAA;
localparam RESP_DONE = 8'hDD;
localparam HI_IDLE = 6'd0;
localparam HI_RECV = 6'd1;
localparam HI_EXEC_POOL = 6'd2;
localparam HI_EXEC_ROUTE = 6'd3;
localparam HI_EXEC_STIM = 6'd4;
localparam HI_SEND_ACK = 6'd5;
localparam HI_RUN_START = 6'd6;
localparam HI_RUN_WAIT = 6'd7;
localparam HI_RUN_LOOP = 6'd8;
localparam HI_SEND_RESP = 6'd9;
localparam HI_EXEC_STATUS = 6'd10;
localparam HI_SEND_WAIT = 6'd11;
localparam HI_EXEC_LEARN = 6'd12;
localparam HI_EXEC_PARAM = 6'd13;
localparam HI_EXEC_INDEX = 6'd14;
localparam HI_EXEC_REWARD = 6'd15;
localparam HI_EXEC_DELAY = 6'd16;
localparam HI_EXEC_FORMAT = 6'd17;
localparam HI_EXEC_LEARN_MC = 6'd18;
localparam HI_EXEC_SEED = 6'd19;
localparam HI_EXEC_READ_WT = 6'd20;
localparam HI_EXEC_GLOBAL_ROUTE = 6'd21;
localparam HI_PROBE_WAIT = 6'd22;
localparam HI_PROBE_RESP = 6'd23;
localparam HI_EXEC_DEND_TREE = 6'd24;
localparam HI_EXEC_DVFS = 6'd25;
localparam HI_EXEC_RESET_PERF = 6'd26;
reg [5:0] state;
reg [7:0] cmd;
reg [4:0] byte_cnt;
reg [4:0] payload_len;
reg [7:0] payload [0:15];
reg [15:0] run_remaining;
reg [31:0] run_spike_base;
reg [7:0] resp_buf [0:4];
reg [2:0] resp_len;
reg [2:0] resp_idx;
function [4:0] cmd_payload_len;
input [7:0] opcode;
case (opcode)
CMD_PROG_POOL: cmd_payload_len = 5'd8;
CMD_PROG_ROUTE: cmd_payload_len = 5'd9;
CMD_STIMULUS: cmd_payload_len = 5'd5;
CMD_RUN: cmd_payload_len = 5'd2;
CMD_STATUS: cmd_payload_len = 5'd0;
CMD_LEARN_CFG: cmd_payload_len = 5'd1;
CMD_PROG_NEURON: cmd_payload_len = 5'd6;
CMD_PROG_INDEX: cmd_payload_len = 5'd7;
CMD_REWARD: cmd_payload_len = 5'd2;
CMD_PROG_DELAY: cmd_payload_len = 5'd4;
CMD_PROG_FORMAT: cmd_payload_len = 5'd4;
CMD_PROG_LEARN: cmd_payload_len = 5'd6;
CMD_NOISE_SEED: cmd_payload_len = 5'd3;
CMD_READ_WEIGHT: cmd_payload_len = 5'd4;
CMD_PROG_DEND_TREE: cmd_payload_len = 5'd4;
CMD_PROG_GLOBAL_ROUTE: cmd_payload_len = 5'd9;
CMD_DVFS_CFG: cmd_payload_len = 5'd1;
CMD_RESET_PERF: cmd_payload_len = 5'd1;
default: cmd_payload_len = 5'd0;
endcase
endfunction
always @(posedge clk or negedge rst_n) begin
if (!rst_n) begin
state <= HI_IDLE;
cmd <= 0;
byte_cnt <= 0;
payload_len <= 0;
tx_data <= 0;
tx_valid <= 0;
mesh_start <= 0;
mesh_prog_pool_we <= 0;
mesh_prog_pool_core <= 0;
mesh_prog_pool_addr <= 0;
mesh_prog_pool_src <= 0;
mesh_prog_pool_target <= 0;
mesh_prog_pool_weight <= 0;
mesh_prog_pool_comp <= 0;
mesh_prog_index_we <= 0;
mesh_prog_index_core <= 0;
mesh_prog_index_neuron <= 0;
mesh_prog_index_base <= 0;
mesh_prog_index_count <= 0;
mesh_prog_index_format <= 0;
mesh_prog_route_we <= 0;
mesh_prog_route_src_core <= 0;
mesh_prog_route_src_neuron <= 0;
mesh_prog_route_slot <= 0;
mesh_prog_route_dest_core <= 0;
mesh_prog_route_dest_neuron<= 0;
mesh_prog_route_weight <= 0;
mesh_prog_global_route_we <= 0;
mesh_prog_global_route_src_core <= 0;
mesh_prog_global_route_src_neuron <= 0;
mesh_prog_global_route_slot <= 0;
mesh_prog_global_route_dest_core <= 0;
mesh_prog_global_route_dest_neuron <= 0;
mesh_prog_global_route_weight <= 0;
mesh_ext_valid <= 0;
mesh_ext_core <= 0;
mesh_ext_neuron_id <= 0;
mesh_ext_current <= 0;
mesh_learn_enable <= 0;
mesh_graded_enable <= 0;
mesh_dendritic_enable <= 0;
mesh_async_enable <= 0;
mesh_threefactor_enable <= 0;
mesh_noise_enable <= 0;
mesh_skip_idle_enable <= 0;
mesh_scale_u_enable <= 0;
mesh_reward_value <= 0;
mesh_prog_delay_we <= 0;
mesh_prog_delay_core <= 0;
mesh_prog_delay_addr <= 0;
mesh_prog_delay_value <= 0;
mesh_prog_ucode_we <= 0;
mesh_prog_ucode_core <= 0;
mesh_prog_ucode_addr <= 0;
mesh_prog_ucode_data <= 0;
mesh_prog_param_we <= 0;
mesh_prog_param_core <= 0;
mesh_prog_param_neuron <= 0;
mesh_prog_param_id <= 0;
mesh_prog_param_value <= 0;
mesh_probe_read <= 0;
mesh_probe_core <= 0;
mesh_probe_neuron <= 0;
mesh_probe_state_id <= 0;
mesh_probe_pool_addr <= 0;
mesh_dvfs_stall <= 0;
run_remaining <= 0;
run_spike_base <= 0;
resp_len <= 0;
resp_idx <= 0;
end else begin
mesh_prog_pool_we <= 0;
mesh_prog_index_we <= 0;
mesh_prog_route_we <= 0;
mesh_prog_global_route_we <= 0;
mesh_prog_delay_we <= 0;
mesh_prog_ucode_we <= 0;
mesh_prog_param_we <= 0;
mesh_probe_read <= 0;
mesh_ext_valid <= 0;
mesh_start <= 0;
tx_valid <= 0;
case (state)
HI_IDLE: begin
if (rx_valid) begin
cmd <= rx_data;
payload_len <= cmd_payload_len(rx_data);
byte_cnt <= 0;
if (cmd_payload_len(rx_data) == 0) begin
case (rx_data)
CMD_STATUS: state <= HI_EXEC_STATUS;
default: state <= HI_IDLE;
endcase
end else begin
state <= HI_RECV;
end
end
end
HI_RECV: begin
if (rx_valid) begin
payload[byte_cnt] <= rx_data;
if (byte_cnt == payload_len - 1) begin
case (cmd)
CMD_PROG_POOL: state <= HI_EXEC_POOL;
CMD_PROG_ROUTE: state <= HI_EXEC_ROUTE;
CMD_STIMULUS: state <= HI_EXEC_STIM;
CMD_RUN: state <= HI_RUN_START;
CMD_LEARN_CFG: state <= HI_EXEC_LEARN;
CMD_PROG_NEURON: state <= HI_EXEC_PARAM;
CMD_PROG_INDEX: state <= HI_EXEC_INDEX;
CMD_REWARD: state <= HI_EXEC_REWARD;
CMD_PROG_DELAY: state <= HI_EXEC_DELAY;
CMD_PROG_FORMAT: state <= HI_EXEC_FORMAT;
CMD_PROG_LEARN: state <= HI_EXEC_LEARN_MC;
CMD_NOISE_SEED: state <= HI_EXEC_SEED;
CMD_READ_WEIGHT: state <= HI_EXEC_READ_WT;
CMD_PROG_DEND_TREE: state <= HI_EXEC_DEND_TREE;
CMD_PROG_GLOBAL_ROUTE: state <= HI_EXEC_GLOBAL_ROUTE;
CMD_DVFS_CFG: state <= HI_EXEC_DVFS;
CMD_RESET_PERF: state <= HI_EXEC_RESET_PERF;
default: state <= HI_IDLE;
endcase
end else begin
byte_cnt <= byte_cnt + 1;
end
end
end
HI_EXEC_POOL: begin
mesh_prog_pool_we <= 1;
mesh_prog_pool_core <= payload[0][CORE_ID_BITS-1:0];
mesh_prog_pool_addr <= {payload[1], payload[2]};
mesh_prog_pool_comp <= payload[3][7:6];
mesh_prog_pool_src <= {payload[3][5:4], payload[4]};
mesh_prog_pool_target <= {payload[3][3:2], payload[5]};
mesh_prog_pool_weight <= {payload[6], payload[7]};
state <= HI_SEND_ACK;
end
HI_EXEC_INDEX: begin
mesh_prog_index_we <= 1;
mesh_prog_index_core <= payload[0][CORE_ID_BITS-1:0];
mesh_prog_index_neuron <= {payload[1], payload[2]};
mesh_prog_index_base <= {payload[3], payload[4]};
mesh_prog_index_count <= {payload[5], payload[6]};
mesh_prog_index_format <= payload[5][7:6];
state <= HI_SEND_ACK;
end
HI_EXEC_REWARD: begin
mesh_reward_value <= {payload[0], payload[1]};
state <= HI_SEND_ACK;
end
HI_EXEC_ROUTE: begin
mesh_prog_route_we <= 1;
mesh_prog_route_src_core <= payload[0][CORE_ID_BITS-1:0];
mesh_prog_route_src_neuron <= {payload[1], payload[2]};
mesh_prog_route_slot <= payload[3][ROUTE_SLOT_BITS-1:0];
mesh_prog_route_dest_core <= payload[4][CORE_ID_BITS-1:0];
mesh_prog_route_dest_neuron<= {payload[5], payload[6]};
mesh_prog_route_weight <= {payload[7], payload[8]};
state <= HI_SEND_ACK;
end
HI_EXEC_STIM: begin
mesh_ext_valid <= 1;
mesh_ext_core <= payload[0][CORE_ID_BITS-1:0];
mesh_ext_neuron_id <= {payload[1], payload[2]};
mesh_ext_current <= {payload[3], payload[4]};
state <= HI_SEND_ACK;
end
HI_EXEC_LEARN: begin
mesh_learn_enable <= payload[0][0];
mesh_graded_enable <= payload[0][1];
mesh_dendritic_enable <= payload[0][2];
mesh_async_enable <= payload[0][3];
mesh_threefactor_enable <= payload[0][4];
mesh_noise_enable <= payload[0][5];
mesh_skip_idle_enable <= payload[0][6];
mesh_scale_u_enable <= payload[0][7];
state <= HI_SEND_ACK;
end
HI_EXEC_PARAM: begin
mesh_prog_param_we <= 1;
mesh_prog_param_core <= payload[0][CORE_ID_BITS-1:0];
mesh_prog_param_neuron <= {payload[1], payload[2]};
mesh_prog_param_id <= payload[3][4:0];
mesh_prog_param_value <= {payload[4], payload[5]};
state <= HI_SEND_ACK;
end
HI_SEND_ACK: begin
if (tx_ready) begin
tx_data <= RESP_ACK;
tx_valid <= 1;
state <= HI_IDLE;
end
end
HI_RUN_START: begin
run_remaining <= {payload[0], payload[1]};
run_spike_base <= mesh_total_spikes;
mesh_start <= 1;
state <= HI_RUN_WAIT;
end
HI_RUN_WAIT: begin
if (mesh_timestep_done) begin
state <= HI_RUN_LOOP;
end
end
HI_RUN_LOOP: begin
if (run_remaining <= 1) begin
resp_buf[0] <= RESP_DONE;
resp_buf[1] <= (mesh_total_spikes - run_spike_base) >> 24;
resp_buf[2] <= (mesh_total_spikes - run_spike_base) >> 16;
resp_buf[3] <= (mesh_total_spikes - run_spike_base) >> 8;
resp_buf[4] <= (mesh_total_spikes - run_spike_base);
resp_len <= 5;
resp_idx <= 0;
state <= HI_SEND_RESP;
end else begin
run_remaining <= run_remaining - 1;
mesh_start <= 1;
state <= HI_RUN_WAIT;
end
end
HI_EXEC_STATUS: begin
resp_buf[0] <= {3'b0, mesh_state};
resp_buf[1] <= mesh_timestep_count >> 24;
resp_buf[2] <= mesh_timestep_count >> 16;
resp_buf[3] <= mesh_timestep_count >> 8;
resp_buf[4] <= mesh_timestep_count;
resp_len <= 5;
resp_idx <= 0;
state <= HI_SEND_RESP;
end
HI_SEND_RESP: begin
if (tx_ready) begin
tx_data <= resp_buf[resp_idx];
tx_valid <= 1;
state <= HI_SEND_WAIT;
end
end
HI_SEND_WAIT: begin
if (resp_idx == resp_len - 1) begin
state <= HI_IDLE;
end else begin
resp_idx <= resp_idx + 1;
state <= HI_SEND_RESP;
end
end
HI_EXEC_DELAY: begin
mesh_prog_delay_we <= 1;
mesh_prog_delay_core <= payload[0][CORE_ID_BITS-1:0];
mesh_prog_delay_addr <= {payload[1], payload[2]};
mesh_prog_delay_value <= payload[3][5:0];
state <= HI_SEND_ACK;
end
HI_EXEC_FORMAT: state <= HI_SEND_ACK;
HI_EXEC_LEARN_MC: begin
mesh_prog_ucode_we <= 1;
mesh_prog_ucode_core <= payload[0][CORE_ID_BITS-1:0];
mesh_prog_ucode_addr <= payload[1][7:0];
mesh_prog_ucode_data <= {payload[2], payload[3], payload[4], payload[5]};
state <= HI_SEND_ACK;
end
HI_EXEC_SEED: state <= HI_SEND_ACK;
HI_EXEC_READ_WT: begin
mesh_probe_read <= 1;
mesh_probe_core <= payload[0][CORE_ID_BITS-1:0];
mesh_probe_neuron <= {payload[1], payload[2]};
mesh_probe_state_id <= payload[3][4:0];
mesh_probe_pool_addr <= {payload[1], payload[2]};
state <= HI_PROBE_WAIT;
end
HI_PROBE_WAIT: begin
if (mesh_probe_valid) begin
resp_buf[0] <= mesh_probe_data[15:8];
resp_buf[1] <= mesh_probe_data[7:0];
resp_len <= 2;
resp_idx <= 0;
state <= HI_SEND_RESP;
end
end
HI_EXEC_GLOBAL_ROUTE: begin
mesh_prog_global_route_we <= 1;
mesh_prog_global_route_src_core <= payload[0][CORE_ID_BITS-1:0];
mesh_prog_global_route_src_neuron <= {payload[1], payload[2]};
mesh_prog_global_route_slot <= payload[3][GLOBAL_ROUTE_SLOT_BITS-1:0];
mesh_prog_global_route_dest_core <= payload[4][CORE_ID_BITS-1:0];
mesh_prog_global_route_dest_neuron <= {payload[5], payload[6]};
mesh_prog_global_route_weight <= {payload[7], payload[8]};
state <= HI_SEND_ACK;
end
HI_EXEC_DEND_TREE: begin
mesh_prog_param_we <= 1;
mesh_prog_param_core <= payload[0][CORE_ID_BITS-1:0];
mesh_prog_param_neuron <= {payload[1], payload[2]};
mesh_prog_param_id <= 5'd15;
mesh_prog_param_value <= {{(DATA_WIDTH-6){1'b0}}, payload[3][5:0]};
state <= HI_SEND_ACK;
end
HI_EXEC_DVFS: begin
mesh_dvfs_stall <= payload[0];
state <= HI_SEND_ACK;
end
HI_EXEC_RESET_PERF: begin
mesh_prog_param_we <= 1;
mesh_prog_param_core <= payload[0][CORE_ID_BITS-1:0];
mesh_prog_param_neuron <= 0;
mesh_prog_param_id <= 5'd28;
mesh_prog_param_value <= 0;
state <= HI_SEND_ACK;
end
default: state <= HI_IDLE;
endcase
end
end
endmodule