catalyst-n1 / tb /tb_host_interface.v
mrwabbit's picture
Initial upload: Catalyst N1 open source neuromorphic processor RTL
e4cdd5f verified
// ============================================================================
// Testbench: Host Interface (byte-level, bypassing UART serial timing)
// ============================================================================
//
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
// Company No. 17054540 — UK Patent Application No. 2602902.6
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ============================================================================
`timescale 1ns / 1ps
module tb_host_interface;
parameter NUM_CORES = 4;
parameter CORE_ID_BITS = 2;
parameter NUM_NEURONS = 256;
parameter NEURON_BITS = 8;
parameter DATA_WIDTH = 16;
parameter MAX_FANOUT = 32;
parameter FANOUT_BITS = 5;
parameter CONN_ADDR_BITS = 13;
parameter CLK_PERIOD = 10;
reg clk, rst_n;
// Host interface byte I/O (simulates UART RX/TX at byte level)
reg [7:0] rx_data;
reg rx_valid;
wire [7:0] tx_data;
wire tx_valid;
reg tx_ready; // Always ready for fast sim
// Mesh connections (directly wired)
wire mesh_start;
wire mesh_prog_conn_we;
wire [CORE_ID_BITS-1:0] mesh_prog_conn_core;
wire [NEURON_BITS-1:0] mesh_prog_conn_src;
wire [FANOUT_BITS-1:0] mesh_prog_conn_slot;
wire [NEURON_BITS-1:0] mesh_prog_conn_target;
wire signed [DATA_WIDTH-1:0] mesh_prog_conn_weight;
wire mesh_prog_route_we;
wire [CORE_ID_BITS-1:0] mesh_prog_route_src_core;
wire [NEURON_BITS-1:0] mesh_prog_route_src_neuron;
wire [CORE_ID_BITS-1:0] mesh_prog_route_dest_core;
wire [NEURON_BITS-1:0] mesh_prog_route_dest_neuron;
wire signed [DATA_WIDTH-1:0] mesh_prog_route_weight;
wire mesh_ext_valid;
wire [CORE_ID_BITS-1:0] mesh_ext_core;
wire [NEURON_BITS-1:0] mesh_ext_neuron_id;
wire signed [DATA_WIDTH-1:0] mesh_ext_current;
wire mesh_timestep_done;
wire [4:0] mesh_state_out;
wire [31:0] mesh_total_spikes;
wire [31:0] mesh_timestep_count;
wire [NUM_CORES-1:0] spike_valid_bus;
wire [NUM_CORES*NEURON_BITS-1:0] spike_id_bus;
reg [7:0] resp_buf [0:15];
integer resp_cnt;
host_interface #(
.NUM_CORES (NUM_CORES),
.CORE_ID_BITS (CORE_ID_BITS),
.NUM_NEURONS (NUM_NEURONS),
.NEURON_BITS (NEURON_BITS),
.DATA_WIDTH (DATA_WIDTH),
.MAX_FANOUT (MAX_FANOUT),
.FANOUT_BITS (FANOUT_BITS)
) u_hi (
.clk (clk),
.rst_n (rst_n),
.rx_data (rx_data),
.rx_valid (rx_valid),
.tx_data (tx_data),
.tx_valid (tx_valid),
.tx_ready (tx_ready),
.mesh_start (mesh_start),
.mesh_prog_conn_we (mesh_prog_conn_we),
.mesh_prog_conn_core (mesh_prog_conn_core),
.mesh_prog_conn_src (mesh_prog_conn_src),
.mesh_prog_conn_slot (mesh_prog_conn_slot),
.mesh_prog_conn_target (mesh_prog_conn_target),
.mesh_prog_conn_weight (mesh_prog_conn_weight),
.mesh_prog_route_we (mesh_prog_route_we),
.mesh_prog_route_src_core (mesh_prog_route_src_core),
.mesh_prog_route_src_neuron (mesh_prog_route_src_neuron),
.mesh_prog_route_dest_core (mesh_prog_route_dest_core),
.mesh_prog_route_dest_neuron(mesh_prog_route_dest_neuron),
.mesh_prog_route_weight (mesh_prog_route_weight),
.mesh_ext_valid (mesh_ext_valid),
.mesh_ext_core (mesh_ext_core),
.mesh_ext_neuron_id (mesh_ext_neuron_id),
.mesh_ext_current (mesh_ext_current),
.mesh_learn_enable (),
.mesh_graded_enable (),
.mesh_dendritic_enable (),
.mesh_async_enable (),
.mesh_prog_conn_comp (),
.mesh_prog_param_we (),
.mesh_prog_param_core (),
.mesh_prog_param_neuron (),
.mesh_prog_param_id (),
.mesh_prog_param_value (),
.mesh_timestep_done (mesh_timestep_done),
.mesh_state (mesh_state_out),
.mesh_total_spikes (mesh_total_spikes),
.mesh_timestep_count (mesh_timestep_count)
);
neuromorphic_mesh #(
.NUM_CORES (NUM_CORES),
.CORE_ID_BITS (CORE_ID_BITS),
.NUM_NEURONS (NUM_NEURONS),
.NEURON_BITS (NEURON_BITS),
.DATA_WIDTH (DATA_WIDTH),
.MAX_FANOUT (MAX_FANOUT),
.FANOUT_BITS (FANOUT_BITS),
.CONN_ADDR_BITS (CONN_ADDR_BITS),
.THRESHOLD (16'sd1000),
.LEAK_RATE (16'sd3),
.REFRAC_CYCLES (3)
) u_mesh (
.clk (clk),
.rst_n (rst_n),
.start (mesh_start),
.prog_conn_we (mesh_prog_conn_we),
.prog_conn_core (mesh_prog_conn_core),
.prog_conn_src (mesh_prog_conn_src),
.prog_conn_slot (mesh_prog_conn_slot),
.prog_conn_target (mesh_prog_conn_target),
.prog_conn_weight (mesh_prog_conn_weight),
.prog_route_we (mesh_prog_route_we),
.prog_route_src_core (mesh_prog_route_src_core),
.prog_route_src_neuron (mesh_prog_route_src_neuron),
.prog_route_dest_core (mesh_prog_route_dest_core),
.prog_route_dest_neuron(mesh_prog_route_dest_neuron),
.prog_route_weight (mesh_prog_route_weight),
.learn_enable (1'b0),
.graded_enable (1'b0),
.dendritic_enable (1'b0),
.async_enable (1'b0),
.prog_conn_comp (2'd0),
.prog_param_we (1'b0),
.prog_param_core (2'd0),
.prog_param_neuron (8'd0),
.prog_param_id (3'd0),
.prog_param_value (16'sd0),
.ext_valid (mesh_ext_valid),
.ext_core (mesh_ext_core),
.ext_neuron_id (mesh_ext_neuron_id),
.ext_current (mesh_ext_current),
.timestep_done (mesh_timestep_done),
.spike_valid_bus (spike_valid_bus),
.spike_id_bus (spike_id_bus),
.mesh_state_out (mesh_state_out),
.total_spikes (mesh_total_spikes),
.timestep_count (mesh_timestep_count)
);
initial clk = 0;
always #(CLK_PERIOD/2) clk = ~clk;
initial begin
$dumpfile("host_interface.vcd");
$dumpvars(0, tb_host_interface);
end
integer i;
always @(posedge clk) begin
for (i = 0; i < NUM_CORES; i = i + 1) begin
if (spike_valid_bus[i]) begin
$display(" [spike] Core %0d Neuron %0d (ts=%0d)",
i, spike_id_bus[i*NEURON_BITS +: NEURON_BITS], mesh_timestep_count);
end
end
end
// Capture TX responses
always @(posedge clk) begin
if (tx_valid && tx_ready) begin
resp_buf[resp_cnt] <= tx_data;
resp_cnt <= resp_cnt + 1;
$display(" [TX] byte %0d: 0x%02h", resp_cnt, tx_data);
end
end
task send_byte;
input [7:0] b;
begin
@(posedge clk);
rx_data <= b;
rx_valid <= 1;
@(posedge clk);
rx_valid <= 0;
end
endtask
// 0x01 [core][src][slot][target][weight_hi][weight_lo]
task cmd_prog_conn;
input [7:0] core;
input [7:0] src;
input [7:0] slot;
input [7:0] target;
input signed [15:0] weight;
begin
send_byte(8'h01);
send_byte(core);
send_byte(src);
send_byte(slot);
send_byte(target);
send_byte(weight[15:8]);
send_byte(weight[7:0]);
end
endtask
// 0x02 [src_core][src_neuron][dest_core][dest_neuron][weight_hi][weight_lo]
task cmd_prog_route;
input [7:0] src_core;
input [7:0] src_neuron;
input [7:0] dest_core;
input [7:0] dest_neuron;
input signed [15:0] weight;
begin
send_byte(8'h02);
send_byte(src_core);
send_byte(src_neuron);
send_byte(dest_core);
send_byte(dest_neuron);
send_byte(weight[15:8]);
send_byte(weight[7:0]);
end
endtask
// 0x03 [core][neuron][current_hi][current_lo]
task cmd_stimulus;
input [7:0] core;
input [7:0] neuron;
input signed [15:0] current;
begin
send_byte(8'h03);
send_byte(core);
send_byte(neuron);
send_byte(current[15:8]);
send_byte(current[7:0]);
end
endtask
// 0x04 [timesteps_hi][timesteps_lo]
task cmd_run;
input [15:0] timesteps;
begin
send_byte(8'h04);
send_byte(timesteps[15:8]);
send_byte(timesteps[7:0]);
end
endtask
// 0x05
task cmd_status;
begin
send_byte(8'h05);
end
endtask
task wait_ack;
begin
wait(resp_cnt > 0);
@(posedge clk);
if (resp_buf[0] == 8'hAA)
$display(" -> ACK received");
else
$display(" -> ERROR: expected ACK (0xAA), got 0x%02h", resp_buf[0]);
resp_cnt <= 0;
@(posedge clk);
end
endtask
task wait_done;
begin
wait(resp_cnt >= 5);
@(posedge clk);
@(posedge clk);
if (resp_buf[0] == 8'hDD) begin
$display(" -> DONE received, spikes = %0d",
{resp_buf[1], resp_buf[2], resp_buf[3], resp_buf[4]});
end else begin
$display(" -> ERROR: expected DONE (0xDD), got 0x%02h", resp_buf[0]);
end
resp_cnt <= 0;
@(posedge clk);
end
endtask
task wait_status;
begin
wait(resp_cnt >= 5);
@(posedge clk);
@(posedge clk);
$display(" -> STATUS: state=%0d, timestep_count=%0d",
resp_buf[0],
{resp_buf[1], resp_buf[2], resp_buf[3], resp_buf[4]});
resp_cnt <= 0;
@(posedge clk);
end
endtask
initial begin
rst_n = 0;
rx_data = 0;
rx_valid = 0;
tx_ready = 1; // TX always ready for fast sim
resp_cnt = 0;
$display("");
$display("================================================================");
$display(" Host Interface Test - Byte-Level Command Protocol");
$display("================================================================");
#(CLK_PERIOD * 5);
rst_n = 1;
#(CLK_PERIOD * 5);
$display("");
$display("--- TEST 1: Program Connections via Host ---");
// Core 0: chain N0→N1→N2→N3 with strong weights
$display(" Sending PROG_CONN: Core 0, N0→N1, w=1200");
cmd_prog_conn(0, 0, 0, 1, 16'sd1200);
wait_ack();
$display(" Sending PROG_CONN: Core 0, N1→N2, w=1200");
cmd_prog_conn(0, 1, 0, 2, 16'sd1200);
wait_ack();
$display(" Sending PROG_CONN: Core 0, N2→N3, w=1200");
cmd_prog_conn(0, 2, 0, 3, 16'sd1200);
wait_ack();
$display(" Connections programmed successfully!");
$display("");
$display("--- TEST 2: Stimulus + Run (10 timesteps) ---");
$display(" Sending STIMULUS: Core 0, N0, current=1200");
cmd_stimulus(0, 0, 16'sd1200);
wait_ack();
$display(" Sending RUN: 10 timesteps");
cmd_run(16'd10);
wait_done();
$display("");
$display("--- TEST 3: Status Query ---");
cmd_status();
wait_status();
$display("");
$display("--- TEST 4: Cross-Core Route + Run ---");
// Route: Core 0 N3 → Core 1 N0
$display(" Sending PROG_ROUTE: C0:N3 -> C1:N0, w=1200");
cmd_prog_route(0, 3, 1, 0, 16'sd1200);
wait_ack();
// Core 1: chain N0→N1
$display(" Sending PROG_CONN: Core 1, N0→N1, w=1200");
cmd_prog_conn(1, 0, 0, 1, 16'sd1200);
wait_ack();
// Run with stimulus to drive cross-core propagation
$display(" Sending STIMULUS: Core 0, N0, current=1200");
cmd_stimulus(0, 0, 16'sd1200);
wait_ack();
$display(" Sending RUN: 20 timesteps");
cmd_run(16'd20);
wait_done();
$display("");
$display("--- TEST 5: Second RUN Burst (no new stimulus) ---");
$display(" Sending RUN: 5 timesteps (no stimulus)");
cmd_run(16'd5);
wait_done();
$display("");
$display("--- Final Status ---");
cmd_status();
wait_status();
$display("");
$display("================================================================");
$display(" FINAL REPORT");
$display("================================================================");
$display(" Total timesteps: %0d", mesh_timestep_count);
$display(" Total spikes: %0d", mesh_total_spikes);
$display(" Host protocol: 5 command types verified");
$display(" Architecture: UART -> Host IF -> Mesh (4x256 = 1024 neurons)");
$display("================================================================");
#(CLK_PERIOD * 10);
$finish;
end
initial begin
#(CLK_PERIOD * 3000000);
$display("TIMEOUT at mesh_state=%0d, ts=%0d", mesh_state_out, mesh_timestep_count);
$finish;
end
endmodule