File size: 1,028 Bytes
2b4ae64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
`timescale 1ns/1ps
// Reference implementation: 2-stage pipelined INT8 dot product (4 elements)
module dot_product_4 (
    input  wire        clk,
    input  wire        rst,
    input  wire        valid_in,
    input  wire signed [7:0] a0, a1, a2, a3,
    input  wire signed [7:0] b0, b1, b2, b3,
    output reg         valid_out,
    output reg  signed [17:0] result
);
    // Stage 1: four 8×8 → 16-bit products
    reg signed [15:0] p0, p1, p2, p3;
    reg               s1_valid;

    always @(posedge clk) begin
        if (rst) begin
            p0 <= 0; p1 <= 0; p2 <= 0; p3 <= 0;
            s1_valid  <= 1'b0;
            result    <= 0;
            valid_out <= 1'b0;
        end else begin
            // Stage 1
            p0       <= a0 * b0;
            p1       <= a1 * b1;
            p2       <= a2 * b2;
            p3       <= a3 * b3;
            s1_valid <= valid_in;

            // Stage 2
            result    <= p0 + p1 + p2 + p3;
            valid_out <= s1_valid;
        end
    end

endmodule