Initial upload: Catalyst N1 open source neuromorphic processor RTL
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitignore +65 -0
- LICENSE +190 -0
- Makefile +35 -0
- NOTICE +8 -0
- README.md +159 -0
- fpga/arty_a7.xdc +33 -0
- fpga/build_vivado.tcl +107 -0
- fpga/extract_power.py +171 -0
- fpga/f2/build_f2.tcl +55 -0
- fpga/f2/cl_id_defines.vh +25 -0
- fpga/f2/cl_neuromorphic.sv +249 -0
- fpga/f2/cl_neuromorphic.v +298 -0
- fpga/f2/cl_neuromorphic_defines.vh +25 -0
- fpga/f2/cl_synth_user.xdc +8 -0
- fpga/f2/cl_timing_user.xdc +14 -0
- fpga/f2/deploy_f2.sh +181 -0
- fpga/f2/run_build.sh +10 -0
- fpga/f2/synth_cl_neuromorphic.tcl +48 -0
- fpga/f2_host.py +580 -0
- fpga/fpga_top.v +174 -0
- fpga/host.py +418 -0
- fpga/kria/build_kria.tcl +73 -0
- fpga/kria/kria_neuromorphic.v +143 -0
- fpga/kria/kria_neuromorphic_8core_backup.v +143 -0
- fpga/kria/run_impl.tcl +68 -0
- rtl/async_fifo.v +96 -0
- rtl/async_noc_mesh.v +701 -0
- rtl/async_router.v +217 -0
- rtl/axi_uart_bridge.v +258 -0
- rtl/chip_link.v +199 -0
- rtl/host_interface.v +550 -0
- rtl/lif_neuron.v +71 -0
- rtl/mmio_bridge.v +447 -0
- rtl/multi_chip_router.v +346 -0
- rtl/neuromorphic_mesh.v +859 -0
- rtl/neuromorphic_top.v +557 -0
- rtl/neuron_core.v +112 -0
- rtl/neuron_core_stdp.v +132 -0
- rtl/rv32i_core.v +751 -0
- rtl/rv32im_cluster.v +171 -0
- rtl/scalable_core.v +382 -0
- rtl/scalable_core_v2.v +0 -0
- rtl/spike_fifo.v +70 -0
- rtl/sram.v +56 -0
- rtl/stdp_synapse.v +102 -0
- rtl/synapse.v +43 -0
- rtl/sync_tree.v +38 -0
- rtl/uart_rx.v +107 -0
- rtl/uart_tx.v +96 -0
- run_regression.sh +17 -0
.gitignore
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Compiled Verilog
|
| 2 |
+
*.vvp
|
| 3 |
+
*.out
|
| 4 |
+
|
| 5 |
+
# Waveform dumps
|
| 6 |
+
*.vcd
|
| 7 |
+
|
| 8 |
+
# Simulation directories and binaries
|
| 9 |
+
sim/
|
| 10 |
+
sim_async
|
| 11 |
+
sim_stress
|
| 12 |
+
|
| 13 |
+
# Synthesis outputs
|
| 14 |
+
synth/
|
| 15 |
+
|
| 16 |
+
# Windows artifacts
|
| 17 |
+
nul
|
| 18 |
+
|
| 19 |
+
# Python
|
| 20 |
+
__pycache__/
|
| 21 |
+
*.pyc
|
| 22 |
+
*.pyo
|
| 23 |
+
*.egg-info/
|
| 24 |
+
.pytest_cache/
|
| 25 |
+
|
| 26 |
+
# Datasets (large, download separately)
|
| 27 |
+
sdk/benchmarks/data/
|
| 28 |
+
sdk/data/
|
| 29 |
+
|
| 30 |
+
# Model checkpoints
|
| 31 |
+
*.pt
|
| 32 |
+
|
| 33 |
+
# Build archives
|
| 34 |
+
upload.zip
|
| 35 |
+
|
| 36 |
+
# Generated images (keep architecture.png)
|
| 37 |
+
spike_visualization.png
|
| 38 |
+
sdk/neurocore_dashboard.png
|
| 39 |
+
sdk/async_dashboard.png
|
| 40 |
+
sdk/p13_dashboard.png
|
| 41 |
+
sdk/raster_demo.png
|
| 42 |
+
sdk/results/
|
| 43 |
+
|
| 44 |
+
# FPGA build artifacts
|
| 45 |
+
fpga/f2/*.tar
|
| 46 |
+
|
| 47 |
+
# Editor/IDE
|
| 48 |
+
.vscode/
|
| 49 |
+
*.swp
|
| 50 |
+
*.swo
|
| 51 |
+
*~
|
| 52 |
+
|
| 53 |
+
# Vivado
|
| 54 |
+
*.jou
|
| 55 |
+
*.log
|
| 56 |
+
*.str
|
| 57 |
+
.Xil/
|
| 58 |
+
|
| 59 |
+
# LaTeX build artifacts
|
| 60 |
+
paper/*.aux
|
| 61 |
+
paper/*.bbl
|
| 62 |
+
paper/*.blg
|
| 63 |
+
paper/*.fdb_latexmk
|
| 64 |
+
paper/*.fls
|
| 65 |
+
paper/*.synctex.gz
|
LICENSE
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Apache License
|
| 2 |
+
Version 2.0, January 2004
|
| 3 |
+
http://www.apache.org/licenses/
|
| 4 |
+
|
| 5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
| 6 |
+
|
| 7 |
+
1. Definitions.
|
| 8 |
+
|
| 9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
| 10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
| 11 |
+
|
| 12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
| 13 |
+
the copyright owner that is granting the License.
|
| 14 |
+
|
| 15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
| 16 |
+
other entities that control, are controlled by, or are under common
|
| 17 |
+
control with that entity. For the purposes of this definition,
|
| 18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
| 19 |
+
direction or management of such entity, whether by contract or
|
| 20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
| 21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
| 22 |
+
|
| 23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
| 24 |
+
exercising permissions granted by this License.
|
| 25 |
+
|
| 26 |
+
"Source" form shall mean the preferred form for making modifications,
|
| 27 |
+
including but not limited to software source code, documentation
|
| 28 |
+
source, and configuration files.
|
| 29 |
+
|
| 30 |
+
"Object" form shall mean any form resulting from mechanical
|
| 31 |
+
transformation or translation of a Source form, including but not
|
| 32 |
+
limited to compiled object code, generated documentation, and
|
| 33 |
+
conversions to other media types.
|
| 34 |
+
|
| 35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
| 36 |
+
Object form, made available under the License, as indicated by a
|
| 37 |
+
copyright notice that is included in or attached to the work
|
| 38 |
+
(an example is provided in the Appendix below).
|
| 39 |
+
|
| 40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
| 41 |
+
form, that is based on (or derived from) the Work and for which the
|
| 42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
| 43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
| 44 |
+
of this License, Derivative Works shall not include works that remain
|
| 45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
| 46 |
+
the Work and Derivative Works thereof.
|
| 47 |
+
|
| 48 |
+
"Contribution" shall mean any work of authorship, including
|
| 49 |
+
the original version of the Work and any modifications or additions
|
| 50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
| 51 |
+
submitted to the Licensor for inclusion in the Work by the copyright owner
|
| 52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
| 53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
| 54 |
+
means any form of electronic, verbal, or written communication sent
|
| 55 |
+
to the Licensor or its representatives, including but not limited to
|
| 56 |
+
communication on electronic mailing lists, source code control systems,
|
| 57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
| 58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
| 59 |
+
excluding communication that is conspicuously marked or otherwise
|
| 60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
| 61 |
+
|
| 62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
| 63 |
+
on behalf of whom a Contribution has been received by the Licensor and
|
| 64 |
+
subsequently incorporated within the Work.
|
| 65 |
+
|
| 66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
| 67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
| 70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
| 71 |
+
Work and such Derivative Works in Source or Object form.
|
| 72 |
+
|
| 73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
| 74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 76 |
+
(except as stated in this section) patent license to make, have made,
|
| 77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
| 78 |
+
where such license applies only to those patent claims licensable
|
| 79 |
+
by such Contributor that are necessarily infringed by their
|
| 80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
| 81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
| 82 |
+
institute patent litigation against any entity (including a
|
| 83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
| 84 |
+
or a Contribution incorporated within the Work constitutes direct
|
| 85 |
+
or contributory patent infringement, then any patent licenses
|
| 86 |
+
granted to You under this License for that Work shall terminate
|
| 87 |
+
as of the date such litigation is filed.
|
| 88 |
+
|
| 89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
| 90 |
+
Work or Derivative Works thereof in any medium, with or without
|
| 91 |
+
modifications, and in Source or Object form, provided that You
|
| 92 |
+
meet the following conditions:
|
| 93 |
+
|
| 94 |
+
(a) You must give any other recipients of the Work or
|
| 95 |
+
Derivative Works a copy of this License; and
|
| 96 |
+
|
| 97 |
+
(b) You must cause any modified files to carry prominent notices
|
| 98 |
+
stating that You changed the files; and
|
| 99 |
+
|
| 100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
| 101 |
+
that You distribute, all copyright, patent, trademark, and
|
| 102 |
+
attribution notices from the Source form of the Work,
|
| 103 |
+
excluding those notices that do not pertain to any part of
|
| 104 |
+
the Derivative Works; and
|
| 105 |
+
|
| 106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
| 107 |
+
distribution, then any Derivative Works that You distribute must
|
| 108 |
+
include a readable copy of the attribution notices contained
|
| 109 |
+
within such NOTICE file, excluding any notices that do not
|
| 110 |
+
pertain to any part of the Derivative Works, in at least one
|
| 111 |
+
of the following places: within a NOTICE text file distributed
|
| 112 |
+
as part of the Derivative Works; within the Source form or
|
| 113 |
+
documentation, if provided along with the Derivative Works; or,
|
| 114 |
+
within a display generated by the Derivative Works, if and
|
| 115 |
+
wherever such third-party notices normally appear. The contents
|
| 116 |
+
of the NOTICE file are for informational purposes only and
|
| 117 |
+
do not modify the License. You may add Your own attribution
|
| 118 |
+
notices within Derivative Works that You distribute, alongside
|
| 119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
| 120 |
+
that such additional attribution notices cannot be construed
|
| 121 |
+
as modifying the License.
|
| 122 |
+
|
| 123 |
+
You may add Your own copyright statement to Your modifications and
|
| 124 |
+
may provide additional or different license terms and conditions
|
| 125 |
+
for use, reproduction, or distribution of Your modifications, or
|
| 126 |
+
for any such Derivative Works as a whole, provided Your use,
|
| 127 |
+
reproduction, and distribution of the Work otherwise complies with
|
| 128 |
+
the conditions stated in this License.
|
| 129 |
+
|
| 130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
| 131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
| 132 |
+
by You to the Licensor shall be under the terms and conditions of
|
| 133 |
+
this License, without any additional terms or conditions.
|
| 134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
| 135 |
+
the terms of any separate license agreement you may have executed
|
| 136 |
+
with Licensor regarding such Contributions.
|
| 137 |
+
|
| 138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
| 139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
| 140 |
+
except as required for reasonable and customary use in describing the
|
| 141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
| 142 |
+
|
| 143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
| 144 |
+
agreed to in writing, Licensor provides the Work (and each
|
| 145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
| 146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
| 147 |
+
implied, including, without limitation, any warranties or conditions
|
| 148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
| 149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
| 150 |
+
appropriateness of using or redistributing the Work and assume any
|
| 151 |
+
risks associated with Your exercise of permissions under this License.
|
| 152 |
+
|
| 153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
| 154 |
+
whether in tort (including negligence), contract, or otherwise,
|
| 155 |
+
unless required by applicable law (such as deliberate and grossly
|
| 156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
| 157 |
+
liable to You for damages, including any direct, indirect, special,
|
| 158 |
+
incidental, or consequential damages of any character arising as a
|
| 159 |
+
result of this License or out of the use or inability to use the
|
| 160 |
+
Work (including but not limited to damages for loss of goodwill,
|
| 161 |
+
work stoppage, computer failure or malfunction, or any and all
|
| 162 |
+
other commercial damages or losses), even if such Contributor
|
| 163 |
+
has been advised of the possibility of such damages.
|
| 164 |
+
|
| 165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
| 166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
| 167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
| 168 |
+
or other liability obligations and/or rights consistent with this
|
| 169 |
+
License. However, in accepting such obligations, You may act only
|
| 170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
| 171 |
+
of any other Contributor, and only if You agree to indemnify,
|
| 172 |
+
defend, and hold each Contributor harmless for any liability
|
| 173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
| 174 |
+
of your accepting any such warranty or additional liability.
|
| 175 |
+
|
| 176 |
+
END OF TERMS AND CONDITIONS
|
| 177 |
+
|
| 178 |
+
Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 179 |
+
|
| 180 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
| 181 |
+
you may not use this file except in compliance with the License.
|
| 182 |
+
You may obtain a copy of the License at
|
| 183 |
+
|
| 184 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
| 185 |
+
|
| 186 |
+
Unless required by applicable law or agreed to in writing, software
|
| 187 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
| 188 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 189 |
+
See the License for the specific language governing permissions and
|
| 190 |
+
limitations under the License.
|
Makefile
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Neuromorphic Chip - Build & Simulation Makefile
|
| 2 |
+
# Usage:
|
| 3 |
+
# make sim - Compile and run simulation
|
| 4 |
+
# make waves - Open waveform viewer
|
| 5 |
+
# make synth - Synthesize with Yosys (gate-level)
|
| 6 |
+
# make clean - Clean build artifacts
|
| 7 |
+
|
| 8 |
+
# Source files
|
| 9 |
+
RTL_DIR = rtl
|
| 10 |
+
TB_DIR = tb
|
| 11 |
+
SIM_DIR = sim
|
| 12 |
+
|
| 13 |
+
RTL_SRC = $(RTL_DIR)/lif_neuron.v $(RTL_DIR)/synapse.v $(RTL_DIR)/neuron_core.v
|
| 14 |
+
TB_SRC = $(TB_DIR)/tb_neuron_core.v
|
| 15 |
+
|
| 16 |
+
# Simulation
|
| 17 |
+
SIM_OUT = $(SIM_DIR)/neuron_core_sim
|
| 18 |
+
VCD_OUT = $(SIM_DIR)/neuron_core.vcd
|
| 19 |
+
|
| 20 |
+
.PHONY: sim waves synth clean
|
| 21 |
+
|
| 22 |
+
sim: $(RTL_SRC) $(TB_SRC)
|
| 23 |
+
@mkdir -p $(SIM_DIR)
|
| 24 |
+
iverilog -o $(SIM_OUT) -I $(RTL_DIR) $(RTL_SRC) $(TB_SRC)
|
| 25 |
+
cd $(SIM_DIR) && vvp ../$(SIM_OUT)
|
| 26 |
+
|
| 27 |
+
waves: $(VCD_OUT)
|
| 28 |
+
gtkwave $(VCD_OUT) &
|
| 29 |
+
|
| 30 |
+
synth:
|
| 31 |
+
@mkdir -p synth
|
| 32 |
+
yosys -p "read_verilog $(RTL_SRC); synth -top neuron_core; stat; write_json synth/neuron_core.json" 2>&1 | tail -30
|
| 33 |
+
|
| 34 |
+
clean:
|
| 35 |
+
rm -rf $(SIM_DIR)/*.vcd $(SIM_DIR)/neuron_core_sim synth/*.json
|
NOTICE
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Catalyst N1 Neuromorphic Processor
|
| 2 |
+
Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 3 |
+
Company No. 17054540
|
| 4 |
+
|
| 5 |
+
This product includes hardware description language (HDL) designs
|
| 6 |
+
originally developed by Henry Arthur Shulayev Barnes.
|
| 7 |
+
|
| 8 |
+
UK Patent Application No. 2602902.6 (filed 13 February 2026)
|
README.md
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: apache-2.0
|
| 3 |
+
tags:
|
| 4 |
+
- neuromorphic
|
| 5 |
+
- spiking-neural-networks
|
| 6 |
+
- fpga
|
| 7 |
+
- verilog
|
| 8 |
+
- hardware
|
| 9 |
+
- edge-ai
|
| 10 |
+
- loihi
|
| 11 |
+
- rtl
|
| 12 |
+
- noc
|
| 13 |
+
- stdp
|
| 14 |
+
language:
|
| 15 |
+
- en
|
| 16 |
+
library_name: neurocore
|
| 17 |
+
pipeline_tag: other
|
| 18 |
+
---
|
| 19 |
+
|
| 20 |
+
# Catalyst N1
|
| 21 |
+
|
| 22 |
+
Open source 128-core neuromorphic processor with full mesh NoC, STDP learning, and RISC-V management. Verilog RTL, validated on FPGA.
|
| 23 |
+
|
| 24 |
+
## Specifications
|
| 25 |
+
|
| 26 |
+
| Parameter | Value |
|
| 27 |
+
|-----------|-------|
|
| 28 |
+
| Cores | 128 |
|
| 29 |
+
| Neurons per core | 1,024 |
|
| 30 |
+
| Total neurons | 131,072 |
|
| 31 |
+
| Neuron model | Leaky Integrate-and-Fire (16-bit fixed-point) |
|
| 32 |
+
| Synapse pool | 131,072 entries per core |
|
| 33 |
+
| Learning | STDP, 14-opcode programmable learning ISA |
|
| 34 |
+
| Network-on-Chip | Configurable XY mesh with multicast |
|
| 35 |
+
| Host interface | UART (FPGA) / AXI-Lite (F2) / PCIe MMIO |
|
| 36 |
+
| Management | RV32IM RISC-V cluster |
|
| 37 |
+
| Multi-chip | Chip link with routing table |
|
| 38 |
+
| Clock | 100 MHz (simulation default) |
|
| 39 |
+
|
| 40 |
+
## Directory Structure
|
| 41 |
+
|
| 42 |
+
```
|
| 43 |
+
catalyst-n1/
|
| 44 |
+
rtl/ 25 Verilog modules (core, NoC, memory, host, RISC-V)
|
| 45 |
+
tb/ 46 testbenches (unit, integration, regression)
|
| 46 |
+
sdk/ Python SDK with CPU, GPU, and FPGA backends
|
| 47 |
+
fpga/ FPGA build files (Arty A7, AWS F2, Kria K26)
|
| 48 |
+
sim/ Simulation scripts and visualization
|
| 49 |
+
Makefile Compile and run simulation
|
| 50 |
+
```
|
| 51 |
+
|
| 52 |
+
## Simulation
|
| 53 |
+
|
| 54 |
+
Requires [Icarus Verilog](https://github.com/steveicarus/iverilog) (v12+).
|
| 55 |
+
|
| 56 |
+
```bash
|
| 57 |
+
# Compile and run basic simulation
|
| 58 |
+
make sim
|
| 59 |
+
|
| 60 |
+
# Run full regression (25 testbenches)
|
| 61 |
+
bash run_regression.sh
|
| 62 |
+
|
| 63 |
+
# Run a single testbench
|
| 64 |
+
iverilog -g2012 -DSIMULATION -o out.vvp \
|
| 65 |
+
rtl/sram.v rtl/spike_fifo.v rtl/uart_tx.v rtl/uart_rx.v \
|
| 66 |
+
rtl/scalable_core_v2.v rtl/neuromorphic_mesh.v \
|
| 67 |
+
rtl/host_interface.v rtl/neuromorphic_top.v rtl/sync_tree.v \
|
| 68 |
+
rtl/rv32i_core.v rtl/mmio_bridge.v rtl/rv32im_cluster.v \
|
| 69 |
+
tb/tb_p24_final.v
|
| 70 |
+
vvp out.vvp
|
| 71 |
+
|
| 72 |
+
# View waveforms (requires GTKWave)
|
| 73 |
+
make waves
|
| 74 |
+
```
|
| 75 |
+
|
| 76 |
+
## SDK
|
| 77 |
+
|
| 78 |
+
Python SDK for building, simulating, and deploying spiking neural networks. See [`sdk/README.md`](sdk/README.md) for full documentation.
|
| 79 |
+
|
| 80 |
+
```bash
|
| 81 |
+
cd sdk
|
| 82 |
+
pip install -e .
|
| 83 |
+
```
|
| 84 |
+
|
| 85 |
+
```python
|
| 86 |
+
import neurocore as nc
|
| 87 |
+
|
| 88 |
+
net = nc.Network()
|
| 89 |
+
inp = net.population(100, params={'threshold': 1000, 'leak': 10}, label='input')
|
| 90 |
+
hid = net.population(50, params={'threshold': 1000, 'leak': 5}, label='hidden')
|
| 91 |
+
out = net.population(10, params={'threshold': 1000, 'leak': 5}, label='output')
|
| 92 |
+
|
| 93 |
+
net.connect(inp, hid, weight=500, probability=0.3)
|
| 94 |
+
net.connect(hid, out, weight=400, probability=0.5)
|
| 95 |
+
|
| 96 |
+
sim = nc.Simulator()
|
| 97 |
+
sim.deploy(net)
|
| 98 |
+
|
| 99 |
+
for t in range(100):
|
| 100 |
+
sim.inject(inp, neuron_ids=[0, 5, 10], current=1500)
|
| 101 |
+
sim.step()
|
| 102 |
+
|
| 103 |
+
result = sim.get_result()
|
| 104 |
+
result.raster_plot(show=True)
|
| 105 |
+
```
|
| 106 |
+
|
| 107 |
+
Four backends: CPU simulator, GPU simulator (PyTorch CUDA), FPGA via UART (Arty A7), AWS F2 via PCIe. All share the same API.
|
| 108 |
+
|
| 109 |
+
## FPGA
|
| 110 |
+
|
| 111 |
+
### Arty A7
|
| 112 |
+
|
| 113 |
+
```bash
|
| 114 |
+
# Vivado batch build
|
| 115 |
+
vivado -mode batch -source fpga/build_vivado.tcl
|
| 116 |
+
```
|
| 117 |
+
|
| 118 |
+
Constraints: `fpga/arty_a7.xdc`. Top module: `fpga/fpga_top.v`.
|
| 119 |
+
|
| 120 |
+
### AWS F2
|
| 121 |
+
|
| 122 |
+
```bash
|
| 123 |
+
# Build on F2 build instance
|
| 124 |
+
cd fpga/f2
|
| 125 |
+
bash run_build.sh
|
| 126 |
+
```
|
| 127 |
+
|
| 128 |
+
CL wrapper: `fpga/f2/cl_neuromorphic.sv`. Host driver: `fpga/f2_host.py`.
|
| 129 |
+
|
| 130 |
+
### Kria K26
|
| 131 |
+
|
| 132 |
+
```bash
|
| 133 |
+
vivado -mode batch -source fpga/kria/build_kria.tcl
|
| 134 |
+
```
|
| 135 |
+
|
| 136 |
+
Wrapper: `fpga/kria/kria_neuromorphic.v`.
|
| 137 |
+
|
| 138 |
+
## Benchmarks
|
| 139 |
+
|
| 140 |
+
SHD (Spiking Heidelberg Digits) spoken digit classification:
|
| 141 |
+
|
| 142 |
+
```bash
|
| 143 |
+
cd sdk
|
| 144 |
+
python benchmarks/shd_train.py --data-dir benchmarks/data/shd --epochs 200
|
| 145 |
+
python benchmarks/shd_deploy.py --checkpoint benchmarks/shd_model.pt --data-dir benchmarks/data/shd
|
| 146 |
+
```
|
| 147 |
+
|
| 148 |
+
Additional benchmarks in `sdk/benchmarks/`: DVS gesture recognition, XOR classification, temporal patterns, scaling, stress tests.
|
| 149 |
+
|
| 150 |
+
## Links
|
| 151 |
+
|
| 152 |
+
- [GitHub Repository](https://github.com/catalyst-neuromorphic/catalyst-n1)
|
| 153 |
+
- [catalyst-neuromorphic.com](https://catalyst-neuromorphic.com)
|
| 154 |
+
- [Cloud API](https://github.com/catalyst-neuromorphic/catalyst-cloud-python)
|
| 155 |
+
- [Catalyst-Neurocore](https://github.com/catalyst-neuromorphic/catalyst-neurocore)
|
| 156 |
+
|
| 157 |
+
## License
|
| 158 |
+
|
| 159 |
+
Apache 2.0. See [LICENSE](LICENSE).
|
fpga/arty_a7.xdc
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## ============================================================================
|
| 2 |
+
## Arty A7-100T Pin Constraints
|
| 3 |
+
## ============================================================================
|
| 4 |
+
|
| 5 |
+
## System Clock (100 MHz)
|
| 6 |
+
set_property -dict { PACKAGE_PIN E3 IOSTANDARD LVCMOS33 } [get_ports {clk}]
|
| 7 |
+
create_clock -add -name sys_clk_pin -period 10.00 -waveform {0 5} [get_ports {clk}]
|
| 8 |
+
|
| 9 |
+
## Reset (BTN0, active-high)
|
| 10 |
+
set_property -dict { PACKAGE_PIN D9 IOSTANDARD LVCMOS33 } [get_ports {btn_rst}]
|
| 11 |
+
|
| 12 |
+
## UART
|
| 13 |
+
set_property -dict { PACKAGE_PIN A9 IOSTANDARD LVCMOS33 } [get_ports {uart_rxd}]
|
| 14 |
+
set_property -dict { PACKAGE_PIN D10 IOSTANDARD LVCMOS33 } [get_ports {uart_txd}]
|
| 15 |
+
|
| 16 |
+
## Status LEDs
|
| 17 |
+
set_property -dict { PACKAGE_PIN H5 IOSTANDARD LVCMOS33 } [get_ports {led[0]}]
|
| 18 |
+
set_property -dict { PACKAGE_PIN J5 IOSTANDARD LVCMOS33 } [get_ports {led[1]}]
|
| 19 |
+
set_property -dict { PACKAGE_PIN T9 IOSTANDARD LVCMOS33 } [get_ports {led[2]}]
|
| 20 |
+
set_property -dict { PACKAGE_PIN T10 IOSTANDARD LVCMOS33 } [get_ports {led[3]}]
|
| 21 |
+
|
| 22 |
+
## RGB LEDs (unused)
|
| 23 |
+
#set_property -dict { PACKAGE_PIN F6 IOSTANDARD LVCMOS33 } [get_ports {led_r[0]}]
|
| 24 |
+
#set_property -dict { PACKAGE_PIN J4 IOSTANDARD LVCMOS33 } [get_ports {led_g[0]}]
|
| 25 |
+
#set_property -dict { PACKAGE_PIN J2 IOSTANDARD LVCMOS33 } [get_ports {led_b[0]}]
|
| 26 |
+
|
| 27 |
+
## Configuration
|
| 28 |
+
set_property CONFIG_VOLTAGE 3.3 [current_design]
|
| 29 |
+
set_property CFGBVS VCCO [current_design]
|
| 30 |
+
|
| 31 |
+
## Bitstream
|
| 32 |
+
set_property BITSTREAM.CONFIG.SPI_BUSWIDTH 4 [current_design]
|
| 33 |
+
set_property BITSTREAM.GENERAL.COMPRESS TRUE [current_design]
|
fpga/build_vivado.tcl
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ============================================================================
|
| 2 |
+
# Vivado Non-Project Mode Build Script
|
| 3 |
+
# ============================================================================
|
| 4 |
+
# Target: Arty A7-100T (xc7a100tcsg324-1)
|
| 5 |
+
# Usage: vivado -mode batch -source fpga/build_vivado.tcl
|
| 6 |
+
# ============================================================================
|
| 7 |
+
|
| 8 |
+
# ---- Configuration ----
|
| 9 |
+
set part "xc7a100tcsg324-1"
|
| 10 |
+
set top "fpga_top"
|
| 11 |
+
set build_dir "fpga/build"
|
| 12 |
+
set bit_file "${build_dir}/neuromorphic.bit"
|
| 13 |
+
|
| 14 |
+
# ---- Create build directory ----
|
| 15 |
+
file mkdir $build_dir
|
| 16 |
+
|
| 17 |
+
# ---- Read RTL sources ----
|
| 18 |
+
read_verilog {
|
| 19 |
+
rtl/sram.v
|
| 20 |
+
rtl/spike_fifo.v
|
| 21 |
+
rtl/uart_tx.v
|
| 22 |
+
rtl/uart_rx.v
|
| 23 |
+
rtl/scalable_core_v2.v
|
| 24 |
+
rtl/neuromorphic_mesh.v
|
| 25 |
+
rtl/async_noc_mesh.v
|
| 26 |
+
rtl/async_router.v
|
| 27 |
+
rtl/sync_tree.v
|
| 28 |
+
rtl/chip_link.v
|
| 29 |
+
rtl/host_interface.v
|
| 30 |
+
rtl/neuromorphic_top.v
|
| 31 |
+
fpga/fpga_top.v
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
# ---- Read constraints ----
|
| 35 |
+
read_xdc fpga/arty_a7.xdc
|
| 36 |
+
|
| 37 |
+
# ---- Synthesis ----
|
| 38 |
+
puts "========================================"
|
| 39 |
+
puts " SYNTHESIS"
|
| 40 |
+
puts "========================================"
|
| 41 |
+
synth_design -top $top -part $part \
|
| 42 |
+
-flatten_hierarchy rebuilt \
|
| 43 |
+
-directive Default
|
| 44 |
+
|
| 45 |
+
# Report utilization after synthesis
|
| 46 |
+
report_utilization -file ${build_dir}/synth_utilization.rpt
|
| 47 |
+
report_timing_summary -file ${build_dir}/synth_timing.rpt
|
| 48 |
+
|
| 49 |
+
# ---- Optimization ----
|
| 50 |
+
puts "========================================"
|
| 51 |
+
puts " OPTIMIZATION"
|
| 52 |
+
puts "========================================"
|
| 53 |
+
opt_design
|
| 54 |
+
|
| 55 |
+
# ---- Placement ----
|
| 56 |
+
puts "========================================"
|
| 57 |
+
puts " PLACEMENT"
|
| 58 |
+
puts "========================================"
|
| 59 |
+
place_design -directive Explore
|
| 60 |
+
|
| 61 |
+
# Report utilization after placement
|
| 62 |
+
report_utilization -file ${build_dir}/place_utilization.rpt
|
| 63 |
+
|
| 64 |
+
# ---- Routing ----
|
| 65 |
+
puts "========================================"
|
| 66 |
+
puts " ROUTING"
|
| 67 |
+
puts "========================================"
|
| 68 |
+
route_design -directive Explore
|
| 69 |
+
|
| 70 |
+
# ---- Reports ----
|
| 71 |
+
puts "========================================"
|
| 72 |
+
puts " REPORTS"
|
| 73 |
+
puts "========================================"
|
| 74 |
+
report_utilization -file ${build_dir}/route_utilization.rpt
|
| 75 |
+
report_timing_summary -file ${build_dir}/route_timing.rpt -max_paths 10
|
| 76 |
+
report_power -file ${build_dir}/power.rpt
|
| 77 |
+
report_drc -file ${build_dir}/drc.rpt
|
| 78 |
+
report_methodology -file ${build_dir}/methodology.rpt
|
| 79 |
+
|
| 80 |
+
# Check timing
|
| 81 |
+
set timing_slack [get_property SLACK [get_timing_paths -max_paths 1]]
|
| 82 |
+
puts "Worst slack: ${timing_slack} ns"
|
| 83 |
+
if {$timing_slack < 0} {
|
| 84 |
+
puts "WARNING: Timing not met! Worst negative slack: ${timing_slack} ns"
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
# ---- Generate Bitstream ----
|
| 88 |
+
puts "========================================"
|
| 89 |
+
puts " BITSTREAM"
|
| 90 |
+
puts "========================================"
|
| 91 |
+
write_bitstream -force $bit_file
|
| 92 |
+
|
| 93 |
+
# ---- Summary ----
|
| 94 |
+
puts ""
|
| 95 |
+
puts "========================================"
|
| 96 |
+
puts " BUILD COMPLETE"
|
| 97 |
+
puts "========================================"
|
| 98 |
+
puts " Bitstream: $bit_file"
|
| 99 |
+
puts " Reports: ${build_dir}/"
|
| 100 |
+
puts ""
|
| 101 |
+
puts " To program the FPGA:"
|
| 102 |
+
puts " open_hw_manager"
|
| 103 |
+
puts " connect_hw_server"
|
| 104 |
+
puts " open_hw_target"
|
| 105 |
+
puts " set_property PROGRAM.FILE {${bit_file}} [current_hw_device]"
|
| 106 |
+
puts " program_hw_devices"
|
| 107 |
+
puts "========================================"
|
fpga/extract_power.py
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Extract power and utilization numbers from Vivado reports.
|
| 2 |
+
|
| 3 |
+
Parses post-implementation reports and outputs structured data
|
| 4 |
+
for the paper's resource and power tables.
|
| 5 |
+
|
| 6 |
+
Usage (on build instance):
|
| 7 |
+
# After opening DCP in Vivado and generating reports:
|
| 8 |
+
python extract_power.py power_report.rpt utilization_report.rpt
|
| 9 |
+
|
| 10 |
+
Usage (manual entry from existing numbers):
|
| 11 |
+
python extract_power.py --manual
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import argparse
|
| 15 |
+
import re
|
| 16 |
+
import sys
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def parse_power_report(path):
|
| 20 |
+
"""Parse Vivado report_power output."""
|
| 21 |
+
data = {}
|
| 22 |
+
with open(path, 'r') as f:
|
| 23 |
+
for line in f:
|
| 24 |
+
# Total On-Chip Power (W) : X.XXX
|
| 25 |
+
m = re.search(r'Total On-Chip Power.*?:\s+([\d.]+)', line)
|
| 26 |
+
if m:
|
| 27 |
+
data['total_power_w'] = float(m.group(1))
|
| 28 |
+
|
| 29 |
+
# Dynamic (W) : X.XXX
|
| 30 |
+
m = re.search(r'Dynamic.*?:\s+([\d.]+)', line)
|
| 31 |
+
if m and 'dynamic_power_w' not in data:
|
| 32 |
+
data['dynamic_power_w'] = float(m.group(1))
|
| 33 |
+
|
| 34 |
+
# Device Static (W) : X.XXX
|
| 35 |
+
m = re.search(r'Device Static.*?:\s+([\d.]+)', line)
|
| 36 |
+
if m:
|
| 37 |
+
data['static_power_w'] = float(m.group(1))
|
| 38 |
+
|
| 39 |
+
# Block RAM : X.XXX
|
| 40 |
+
m = re.search(r'Block RAM\s*:\s+([\d.]+)', line)
|
| 41 |
+
if m:
|
| 42 |
+
data['bram_power_w'] = float(m.group(1))
|
| 43 |
+
|
| 44 |
+
# Clocks : X.XXX
|
| 45 |
+
m = re.search(r'Clocks\s*:\s+([\d.]+)', line)
|
| 46 |
+
if m:
|
| 47 |
+
data['clock_power_w'] = float(m.group(1))
|
| 48 |
+
|
| 49 |
+
# Logic : X.XXX
|
| 50 |
+
m = re.search(r'Logic\s*:\s+([\d.]+)', line)
|
| 51 |
+
if m and 'logic_power_w' not in data:
|
| 52 |
+
data['logic_power_w'] = float(m.group(1))
|
| 53 |
+
|
| 54 |
+
return data
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def parse_utilization_report(path):
|
| 58 |
+
"""Parse Vivado report_utilization output."""
|
| 59 |
+
data = {}
|
| 60 |
+
with open(path, 'r') as f:
|
| 61 |
+
content = f.read()
|
| 62 |
+
|
| 63 |
+
# Look for: | Slice LUTs | XXXXX | XXXXX | XX.XX |
|
| 64 |
+
m = re.search(r'Slice LUTs\*?\s*\|\s*([\d,]+)\s*\|\s*([\d,]+)', content)
|
| 65 |
+
if m:
|
| 66 |
+
data['luts_used'] = int(m.group(1).replace(',', ''))
|
| 67 |
+
data['luts_total'] = int(m.group(2).replace(',', ''))
|
| 68 |
+
|
| 69 |
+
# Slice Registers / FFs
|
| 70 |
+
m = re.search(r'(?:Slice Registers|Register as Flip Flop)\s*\|\s*([\d,]+)\s*\|\s*([\d,]+)', content)
|
| 71 |
+
if m:
|
| 72 |
+
data['ffs_used'] = int(m.group(1).replace(',', ''))
|
| 73 |
+
data['ffs_total'] = int(m.group(2).replace(',', ''))
|
| 74 |
+
|
| 75 |
+
# Block RAM Tile
|
| 76 |
+
m = re.search(r'Block RAM Tile\s*\|\s*([\d.]+)\s*\|\s*([\d.]+)', content)
|
| 77 |
+
if m:
|
| 78 |
+
data['bram_used'] = float(m.group(1))
|
| 79 |
+
data['bram_total'] = float(m.group(2))
|
| 80 |
+
|
| 81 |
+
# DSPs
|
| 82 |
+
m = re.search(r'DSPs?\s*\|\s*([\d]+)\s*\|\s*([\d]+)', content)
|
| 83 |
+
if m:
|
| 84 |
+
data['dsps_used'] = int(m.group(1))
|
| 85 |
+
data['dsps_total'] = int(m.group(2))
|
| 86 |
+
|
| 87 |
+
return data
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def manual_entry():
|
| 91 |
+
"""Known numbers from the F2 build (16 cores, 62.5MHz)."""
|
| 92 |
+
return {
|
| 93 |
+
# From f2_deployment.md and build logs
|
| 94 |
+
'target': 'Xilinx VU47P (xcvu47p, AWS F2)',
|
| 95 |
+
'cores': 16,
|
| 96 |
+
'neurons_per_core': 1024,
|
| 97 |
+
'total_neurons': 16384,
|
| 98 |
+
'clock_mhz': 62.5,
|
| 99 |
+
'bram36k_used': 1999,
|
| 100 |
+
'bram36k_total': 3576,
|
| 101 |
+
'bram_pct': 55.9,
|
| 102 |
+
'wns_ns': 0.003,
|
| 103 |
+
'throughput_ts_per_sec': 8690,
|
| 104 |
+
# ASIC estimate: FPGA dynamic / 15x (typical FPGA-to-ASIC ratio)
|
| 105 |
+
'asic_estimate_note': 'FPGA power / 10-20x for ASIC estimate',
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def print_paper_table(power, util, manual):
|
| 110 |
+
"""Print formatted table for paper.tex."""
|
| 111 |
+
print("\n" + "=" * 60)
|
| 112 |
+
print("RESOURCE UTILIZATION (for paper Table)")
|
| 113 |
+
print("=" * 60)
|
| 114 |
+
print(f"Target: {manual['target']}")
|
| 115 |
+
print(f"Cores: {manual['cores']}")
|
| 116 |
+
print(f"Neurons: {manual['total_neurons']:,}")
|
| 117 |
+
print(f"Clock: {manual['clock_mhz']} MHz")
|
| 118 |
+
print(f"WNS: +{manual['wns_ns']} ns (timing MET)")
|
| 119 |
+
print(f"BRAM36K: {manual['bram36k_used']} / {manual['bram36k_total']} "
|
| 120 |
+
f"({manual['bram_pct']:.1f}%)")
|
| 121 |
+
|
| 122 |
+
if util:
|
| 123 |
+
if 'luts_used' in util:
|
| 124 |
+
lut_pct = 100 * util['luts_used'] / util['luts_total']
|
| 125 |
+
print(f"LUTs: {util['luts_used']:,} / {util['luts_total']:,} "
|
| 126 |
+
f"({lut_pct:.1f}%)")
|
| 127 |
+
if 'ffs_used' in util:
|
| 128 |
+
ff_pct = 100 * util['ffs_used'] / util['ffs_total']
|
| 129 |
+
print(f"Flip-Flops: {util['ffs_used']:,} / {util['ffs_total']:,} "
|
| 130 |
+
f"({ff_pct:.1f}%)")
|
| 131 |
+
if 'dsps_used' in util:
|
| 132 |
+
print(f"DSPs: {util['dsps_used']} / {util['dsps_total']}")
|
| 133 |
+
|
| 134 |
+
print(f"\nThroughput: {manual['throughput_ts_per_sec']:,} timesteps/sec")
|
| 135 |
+
|
| 136 |
+
if power:
|
| 137 |
+
print(f"\n{'='*60}")
|
| 138 |
+
print("POWER (from Vivado report_power)")
|
| 139 |
+
print(f"{'='*60}")
|
| 140 |
+
for k, v in sorted(power.items()):
|
| 141 |
+
print(f" {k}: {v:.3f} W")
|
| 142 |
+
|
| 143 |
+
if 'dynamic_power_w' in power:
|
| 144 |
+
asic_lo = power['dynamic_power_w'] / 20
|
| 145 |
+
asic_hi = power['dynamic_power_w'] / 10
|
| 146 |
+
print(f"\nASIC estimate: {asic_lo*1000:.0f} - {asic_hi*1000:.0f} mW "
|
| 147 |
+
f"(FPGA dynamic / 10-20x)")
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
def main():
|
| 151 |
+
parser = argparse.ArgumentParser(description="Extract Vivado power/utilization")
|
| 152 |
+
parser.add_argument("power_report", nargs='?', help="Vivado power report file")
|
| 153 |
+
parser.add_argument("util_report", nargs='?', help="Vivado utilization report file")
|
| 154 |
+
parser.add_argument("--manual", action="store_true",
|
| 155 |
+
help="Use known F2 build numbers")
|
| 156 |
+
args = parser.parse_args()
|
| 157 |
+
|
| 158 |
+
manual = manual_entry()
|
| 159 |
+
power = {}
|
| 160 |
+
util = {}
|
| 161 |
+
|
| 162 |
+
if args.power_report:
|
| 163 |
+
power = parse_power_report(args.power_report)
|
| 164 |
+
if args.util_report:
|
| 165 |
+
util = parse_utilization_report(args.util_report)
|
| 166 |
+
|
| 167 |
+
print_paper_table(power, util, manual)
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
if __name__ == "__main__":
|
| 171 |
+
main()
|
fpga/f2/build_f2.tcl
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ============================================================================
|
| 2 |
+
# F2 Build Script — Source File List
|
| 3 |
+
# ============================================================================
|
| 4 |
+
#
|
| 5 |
+
# This script is sourced by the AWS HDK build flow.
|
| 6 |
+
# It adds our CL design sources to the Vivado project.
|
| 7 |
+
#
|
| 8 |
+
# Usage (within HDK environment):
|
| 9 |
+
# source $CL_DIR/build/scripts/aws_build_dcp_from_cl.tcl
|
| 10 |
+
#
|
| 11 |
+
# The HDK flow expects CL sources in $CL_DIR/design/
|
| 12 |
+
# Copy all .v files there before running the build.
|
| 13 |
+
# ============================================================================
|
| 14 |
+
|
| 15 |
+
# ---- CL wrapper + bridge ----
|
| 16 |
+
set cl_design_files [list \
|
| 17 |
+
$CL_DIR/design/cl_neuromorphic_defines.vh \
|
| 18 |
+
$CL_DIR/design/cl_neuromorphic.v \
|
| 19 |
+
$CL_DIR/design/axi_uart_bridge.v \
|
| 20 |
+
]
|
| 21 |
+
|
| 22 |
+
# ---- Neuromorphic RTL ----
|
| 23 |
+
set neuro_rtl_files [list \
|
| 24 |
+
$CL_DIR/design/sram.v \
|
| 25 |
+
$CL_DIR/design/spike_fifo.v \
|
| 26 |
+
$CL_DIR/design/scalable_core_v2.v \
|
| 27 |
+
$CL_DIR/design/neuromorphic_mesh.v \
|
| 28 |
+
$CL_DIR/design/async_noc_mesh.v \
|
| 29 |
+
$CL_DIR/design/async_router.v \
|
| 30 |
+
$CL_DIR/design/sync_tree.v \
|
| 31 |
+
$CL_DIR/design/chip_link.v \
|
| 32 |
+
$CL_DIR/design/host_interface.v \
|
| 33 |
+
$CL_DIR/design/neuromorphic_top.v \
|
| 34 |
+
$CL_DIR/design/rv32i_core.v \
|
| 35 |
+
$CL_DIR/design/rv32im_cluster.v \
|
| 36 |
+
$CL_DIR/design/mmio_bridge.v \
|
| 37 |
+
$CL_DIR/design/multi_chip_router.v \
|
| 38 |
+
]
|
| 39 |
+
|
| 40 |
+
# Note: uart_rx.v and uart_tx.v are NOT needed (BYPASS_UART=1).
|
| 41 |
+
# They would be optimized away anyway, but omitting them prevents
|
| 42 |
+
# Vivado lint warnings about unconnected modules.
|
| 43 |
+
|
| 44 |
+
# ---- Add all sources ----
|
| 45 |
+
foreach f [concat $cl_design_files $neuro_rtl_files] {
|
| 46 |
+
if {[file exists $f]} {
|
| 47 |
+
read_verilog $f
|
| 48 |
+
} else {
|
| 49 |
+
puts "WARNING: File not found: $f"
|
| 50 |
+
}
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
# ---- Include path for defines ----
|
| 54 |
+
set_property verilog_define {} [current_fileset]
|
| 55 |
+
set_property include_dirs [list $CL_DIR/design] [current_fileset]
|
fpga/f2/cl_id_defines.vh
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ============================================================================
|
| 2 |
+
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 3 |
+
// Company No. 17054540 — UK Patent Application No. 2602902.6
|
| 4 |
+
//
|
| 5 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
| 6 |
+
// you may not use this file except in compliance with the License.
|
| 7 |
+
// You may obtain a copy of the License at
|
| 8 |
+
//
|
| 9 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
//
|
| 11 |
+
// Unless required by applicable law or agreed to in writing, software
|
| 12 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
| 13 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 14 |
+
// See the License for the specific language governing permissions and
|
| 15 |
+
// limitations under the License.
|
| 16 |
+
// ============================================================================
|
| 17 |
+
|
| 18 |
+
// CL Neuromorphic — PCIe ID defines
|
| 19 |
+
`ifndef CL_NEUROMORPHIC_DEFINES_VH
|
| 20 |
+
`define CL_NEUROMORPHIC_DEFINES_VH
|
| 21 |
+
|
| 22 |
+
`define CL_SH_ID0 32'hF230_1D0F // F230=neuromorphic, 1D0F=Amazon
|
| 23 |
+
`define CL_SH_ID1 32'h0010_1D0F // 0010=16-core
|
| 24 |
+
|
| 25 |
+
`endif
|
fpga/f2/cl_neuromorphic.sv
ADDED
|
@@ -0,0 +1,249 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ============================================================================
|
| 2 |
+
// CL Neuromorphic — AWS F2 FPGA Top-Level Custom Logic Wrapper
|
| 3 |
+
// Neuromorphic Chip v2.3 (16 cores x 1024 neurons) via PCIe MMIO
|
| 4 |
+
// MMCME4 generates 62.5 MHz for neuromorphic logic (CDC via async FIFOs)
|
| 5 |
+
// ============================================================================
|
| 6 |
+
//
|
| 7 |
+
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 8 |
+
// Company No. 17054540 — UK Patent Application No. 2602902.6
|
| 9 |
+
//
|
| 10 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
| 11 |
+
// you may not use this file except in compliance with the License.
|
| 12 |
+
// You may obtain a copy of the License at
|
| 13 |
+
//
|
| 14 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
| 15 |
+
//
|
| 16 |
+
// Unless required by applicable law or agreed to in writing, software
|
| 17 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
| 18 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 19 |
+
// See the License for the specific language governing permissions and
|
| 20 |
+
// limitations under the License.
|
| 21 |
+
// ============================================================================
|
| 22 |
+
|
| 23 |
+
module cl_neuromorphic
|
| 24 |
+
#(
|
| 25 |
+
parameter EN_DDR = 0,
|
| 26 |
+
parameter EN_HBM = 0
|
| 27 |
+
)
|
| 28 |
+
(
|
| 29 |
+
`include "cl_ports.vh"
|
| 30 |
+
);
|
| 31 |
+
|
| 32 |
+
`include "cl_neuromorphic_defines.vh"
|
| 33 |
+
|
| 34 |
+
//=============================================================================
|
| 35 |
+
// Reset synchronizer (AXI clock domain)
|
| 36 |
+
//=============================================================================
|
| 37 |
+
logic rst_main_n_sync;
|
| 38 |
+
always_ff @(negedge rst_main_n or posedge clk_main_a0)
|
| 39 |
+
if (!rst_main_n) rst_main_n_sync <= 1'b0;
|
| 40 |
+
else rst_main_n_sync <= 1'b1;
|
| 41 |
+
|
| 42 |
+
//=============================================================================
|
| 43 |
+
// MMCME4: Generate 62.5 MHz neuromorphic clock from 250 MHz
|
| 44 |
+
//=============================================================================
|
| 45 |
+
// VCO = 250 MHz * 4.0 = 1000 MHz
|
| 46 |
+
// CLKOUT0 = 1000 MHz / 16.0 = 62.5 MHz
|
| 47 |
+
wire clk_neuro_unbuf;
|
| 48 |
+
wire clk_neuro;
|
| 49 |
+
wire mmcm_fb;
|
| 50 |
+
wire mmcm_locked;
|
| 51 |
+
|
| 52 |
+
MMCME4_BASE #(
|
| 53 |
+
.CLKIN1_PERIOD (4.000), // 250 MHz input
|
| 54 |
+
.CLKFBOUT_MULT_F (4.000), // VCO = 1000 MHz
|
| 55 |
+
.CLKOUT0_DIVIDE_F(16.000), // 62.5 MHz output
|
| 56 |
+
.CLKOUT0_PHASE (0.000),
|
| 57 |
+
.DIVCLK_DIVIDE (1)
|
| 58 |
+
) u_mmcm (
|
| 59 |
+
.CLKIN1 (clk_main_a0),
|
| 60 |
+
.CLKFBOUT (mmcm_fb),
|
| 61 |
+
.CLKFBIN (mmcm_fb),
|
| 62 |
+
.CLKOUT0 (clk_neuro_unbuf),
|
| 63 |
+
.CLKOUT0B (),
|
| 64 |
+
.CLKOUT1 (),
|
| 65 |
+
.CLKOUT1B (),
|
| 66 |
+
.CLKOUT2 (),
|
| 67 |
+
.CLKOUT2B (),
|
| 68 |
+
.CLKOUT3 (),
|
| 69 |
+
.CLKOUT3B (),
|
| 70 |
+
.CLKOUT4 (),
|
| 71 |
+
.CLKOUT5 (),
|
| 72 |
+
.CLKOUT6 (),
|
| 73 |
+
.LOCKED (mmcm_locked),
|
| 74 |
+
.PWRDWN (1'b0),
|
| 75 |
+
.RST (~rst_main_n)
|
| 76 |
+
);
|
| 77 |
+
|
| 78 |
+
BUFG u_bufg_neuro (.I(clk_neuro_unbuf), .O(clk_neuro));
|
| 79 |
+
|
| 80 |
+
//=============================================================================
|
| 81 |
+
// Reset synchronizer (neuro clock domain)
|
| 82 |
+
//=============================================================================
|
| 83 |
+
logic rst_neuro_n_sync;
|
| 84 |
+
logic rst_neuro_n_pipe;
|
| 85 |
+
always_ff @(negedge mmcm_locked or posedge clk_neuro)
|
| 86 |
+
if (!mmcm_locked) begin
|
| 87 |
+
rst_neuro_n_pipe <= 1'b0;
|
| 88 |
+
rst_neuro_n_sync <= 1'b0;
|
| 89 |
+
end else begin
|
| 90 |
+
rst_neuro_n_pipe <= rst_main_n;
|
| 91 |
+
rst_neuro_n_sync <= rst_neuro_n_pipe;
|
| 92 |
+
end
|
| 93 |
+
|
| 94 |
+
//=============================================================================
|
| 95 |
+
// GLOBALS
|
| 96 |
+
//=============================================================================
|
| 97 |
+
assign cl_sh_flr_done = 1'b1;
|
| 98 |
+
assign cl_sh_status0 = {31'b0, mmcm_locked};
|
| 99 |
+
assign cl_sh_status1 = 32'b0;
|
| 100 |
+
assign cl_sh_status2 = 32'b0;
|
| 101 |
+
assign cl_sh_id0 = `CL_SH_ID0;
|
| 102 |
+
assign cl_sh_id1 = `CL_SH_ID1;
|
| 103 |
+
assign cl_sh_status_vled = {15'b0, mmcm_locked};
|
| 104 |
+
|
| 105 |
+
//=============================================================================
|
| 106 |
+
// Unused interfaces — tie off with standard AWS templates
|
| 107 |
+
//=============================================================================
|
| 108 |
+
|
| 109 |
+
// PCIM (CL-initiated DMA master) — unused
|
| 110 |
+
`include "unused_pcim_template.inc"
|
| 111 |
+
|
| 112 |
+
// PCIS (Host DMA slave) — unused
|
| 113 |
+
`include "unused_dma_pcis_template.inc"
|
| 114 |
+
|
| 115 |
+
// SDA (Management AXI-Lite BAR) — unused
|
| 116 |
+
`include "unused_cl_sda_template.inc"
|
| 117 |
+
|
| 118 |
+
// DDR4 — unused but sh_ddr required for pin connections
|
| 119 |
+
`include "unused_ddr_template.inc"
|
| 120 |
+
|
| 121 |
+
// Interrupts — unused
|
| 122 |
+
`include "unused_apppf_irq_template.inc"
|
| 123 |
+
|
| 124 |
+
//=============================================================================
|
| 125 |
+
// JTAG — unused
|
| 126 |
+
//=============================================================================
|
| 127 |
+
assign tdo = 1'b0;
|
| 128 |
+
|
| 129 |
+
//=============================================================================
|
| 130 |
+
// HBM Monitor — unused
|
| 131 |
+
//=============================================================================
|
| 132 |
+
assign hbm_apb_paddr_1 = 22'b0;
|
| 133 |
+
assign hbm_apb_pprot_1 = 3'b0;
|
| 134 |
+
assign hbm_apb_psel_1 = 1'b0;
|
| 135 |
+
assign hbm_apb_penable_1 = 1'b0;
|
| 136 |
+
assign hbm_apb_pwrite_1 = 1'b0;
|
| 137 |
+
assign hbm_apb_pwdata_1 = 32'b0;
|
| 138 |
+
assign hbm_apb_pstrb_1 = 4'b0;
|
| 139 |
+
assign hbm_apb_pready_1 = 1'b0;
|
| 140 |
+
assign hbm_apb_prdata_1 = 32'b0;
|
| 141 |
+
assign hbm_apb_pslverr_1 = 1'b0;
|
| 142 |
+
|
| 143 |
+
assign hbm_apb_paddr_0 = 22'b0;
|
| 144 |
+
assign hbm_apb_pprot_0 = 3'b0;
|
| 145 |
+
assign hbm_apb_psel_0 = 1'b0;
|
| 146 |
+
assign hbm_apb_penable_0 = 1'b0;
|
| 147 |
+
assign hbm_apb_pwrite_0 = 1'b0;
|
| 148 |
+
assign hbm_apb_pwdata_0 = 32'b0;
|
| 149 |
+
assign hbm_apb_pstrb_0 = 4'b0;
|
| 150 |
+
assign hbm_apb_pready_0 = 1'b0;
|
| 151 |
+
assign hbm_apb_prdata_0 = 32'b0;
|
| 152 |
+
assign hbm_apb_pslverr_0 = 1'b0;
|
| 153 |
+
|
| 154 |
+
//=============================================================================
|
| 155 |
+
// PCIe EP/RP — unused
|
| 156 |
+
//=============================================================================
|
| 157 |
+
assign PCIE_EP_TXP = 8'b0;
|
| 158 |
+
assign PCIE_EP_TXN = 8'b0;
|
| 159 |
+
assign PCIE_RP_PERSTN = 1'b0;
|
| 160 |
+
assign PCIE_RP_TXP = 8'b0;
|
| 161 |
+
assign PCIE_RP_TXN = 8'b0;
|
| 162 |
+
|
| 163 |
+
//=============================================================================
|
| 164 |
+
// OCL AXI-Lite -> AXI-UART Bridge -> Neuromorphic Top
|
| 165 |
+
//=============================================================================
|
| 166 |
+
|
| 167 |
+
// Bridge <-> neuromorphic_top byte-stream wires
|
| 168 |
+
wire [7:0] bridge_rx_data;
|
| 169 |
+
wire bridge_rx_valid;
|
| 170 |
+
wire [7:0] bridge_tx_data;
|
| 171 |
+
wire bridge_tx_valid;
|
| 172 |
+
wire bridge_tx_ready;
|
| 173 |
+
|
| 174 |
+
axi_uart_bridge #(
|
| 175 |
+
.VERSION_ID (32'hF2_02_03_10), // F2, v2.3, 16-core
|
| 176 |
+
.NUM_CORES (16)
|
| 177 |
+
) u_bridge (
|
| 178 |
+
.clk (clk_main_a0),
|
| 179 |
+
.rst_n (rst_main_n_sync),
|
| 180 |
+
.clk_neuro (clk_neuro),
|
| 181 |
+
.rst_neuro_n (rst_neuro_n_sync),
|
| 182 |
+
|
| 183 |
+
// AXI-Lite slave (OCL BAR0)
|
| 184 |
+
.s_axi_awaddr (ocl_cl_awaddr),
|
| 185 |
+
.s_axi_awvalid(ocl_cl_awvalid),
|
| 186 |
+
.s_axi_awready(cl_ocl_awready),
|
| 187 |
+
.s_axi_wdata (ocl_cl_wdata),
|
| 188 |
+
.s_axi_wstrb (ocl_cl_wstrb),
|
| 189 |
+
.s_axi_wvalid (ocl_cl_wvalid),
|
| 190 |
+
.s_axi_wready (cl_ocl_wready),
|
| 191 |
+
.s_axi_bresp (cl_ocl_bresp),
|
| 192 |
+
.s_axi_bvalid (cl_ocl_bvalid),
|
| 193 |
+
.s_axi_bready (ocl_cl_bready),
|
| 194 |
+
.s_axi_araddr (ocl_cl_araddr),
|
| 195 |
+
.s_axi_arvalid(ocl_cl_arvalid),
|
| 196 |
+
.s_axi_arready(cl_ocl_arready),
|
| 197 |
+
.s_axi_rdata (cl_ocl_rdata),
|
| 198 |
+
.s_axi_rresp (cl_ocl_rresp),
|
| 199 |
+
.s_axi_rvalid (cl_ocl_rvalid),
|
| 200 |
+
.s_axi_rready (ocl_cl_rready),
|
| 201 |
+
|
| 202 |
+
// Byte-stream to neuromorphic_top (clk_neuro domain)
|
| 203 |
+
.hi_rx_data (bridge_rx_data),
|
| 204 |
+
.hi_rx_valid (bridge_rx_valid),
|
| 205 |
+
.hi_tx_data (bridge_tx_data),
|
| 206 |
+
.hi_tx_valid (bridge_tx_valid),
|
| 207 |
+
.hi_tx_ready (bridge_tx_ready)
|
| 208 |
+
);
|
| 209 |
+
|
| 210 |
+
neuromorphic_top #(
|
| 211 |
+
.CLK_FREQ (62_500_000),
|
| 212 |
+
.BAUD (115200),
|
| 213 |
+
.BYPASS_UART (1),
|
| 214 |
+
.NUM_CORES (16),
|
| 215 |
+
.CORE_ID_BITS (4),
|
| 216 |
+
.NUM_NEURONS (1024),
|
| 217 |
+
.NEURON_BITS (10),
|
| 218 |
+
.POOL_DEPTH (4096),
|
| 219 |
+
.POOL_ADDR_BITS (12),
|
| 220 |
+
.COUNT_BITS (12),
|
| 221 |
+
.CHIP_LINK_EN (0),
|
| 222 |
+
.NOC_MODE (0),
|
| 223 |
+
.MESH_X (4),
|
| 224 |
+
.MESH_Y (4)
|
| 225 |
+
) u_neuromorphic (
|
| 226 |
+
.clk (clk_neuro),
|
| 227 |
+
.rst_n (rst_neuro_n_sync),
|
| 228 |
+
|
| 229 |
+
// UART unused (BYPASS_UART=1)
|
| 230 |
+
.uart_rxd (1'b1),
|
| 231 |
+
.uart_txd (),
|
| 232 |
+
|
| 233 |
+
// Byte-stream from AXI bridge (clk_neuro domain)
|
| 234 |
+
.rx_data_ext (bridge_rx_data),
|
| 235 |
+
.rx_valid_ext (bridge_rx_valid),
|
| 236 |
+
.tx_data_ext (bridge_tx_data),
|
| 237 |
+
.tx_valid_ext (bridge_tx_valid),
|
| 238 |
+
.tx_ready_ext (bridge_tx_ready),
|
| 239 |
+
|
| 240 |
+
// Multi-chip link disabled
|
| 241 |
+
.link_tx_data (),
|
| 242 |
+
.link_tx_valid (),
|
| 243 |
+
.link_tx_ready (1'b0),
|
| 244 |
+
.link_rx_data (8'b0),
|
| 245 |
+
.link_rx_valid (1'b0),
|
| 246 |
+
.link_rx_ready ()
|
| 247 |
+
);
|
| 248 |
+
|
| 249 |
+
endmodule
|
fpga/f2/cl_neuromorphic.v
ADDED
|
@@ -0,0 +1,298 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ============================================================================
|
| 2 |
+
// CL Top-Level — AWS F2 Shell ↔ Neuromorphic Chip
|
| 3 |
+
// ============================================================================
|
| 4 |
+
//
|
| 5 |
+
// Wraps the 128-core neuromorphic system for the AWS F2 FPGA (VU47P).
|
| 6 |
+
//
|
| 7 |
+
// Active interfaces:
|
| 8 |
+
// - OCL AXI-Lite (BAR0): Host MMIO → axi_uart_bridge → host_interface
|
| 9 |
+
//
|
| 10 |
+
// All other Shell interfaces (PCIM, PCIS/DMA, SDA, DDR, HBM, interrupts)
|
| 11 |
+
// are tied off as unused.
|
| 12 |
+
//
|
| 13 |
+
// ============================================================================
|
| 14 |
+
//
|
| 15 |
+
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 16 |
+
// Company No. 17054540 — UK Patent Application No. 2602902.6
|
| 17 |
+
//
|
| 18 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
| 19 |
+
// you may not use this file except in compliance with the License.
|
| 20 |
+
// You may obtain a copy of the License at
|
| 21 |
+
//
|
| 22 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
| 23 |
+
//
|
| 24 |
+
// Unless required by applicable law or agreed to in writing, software
|
| 25 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
| 26 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 27 |
+
// See the License for the specific language governing permissions and
|
| 28 |
+
// limitations under the License.
|
| 29 |
+
// ============================================================================
|
| 30 |
+
|
| 31 |
+
`include "cl_neuromorphic_defines.vh"
|
| 32 |
+
|
| 33 |
+
module cl_neuromorphic (
|
| 34 |
+
input wire clk_main_a0,
|
| 35 |
+
input wire rst_main_n,
|
| 36 |
+
|
| 37 |
+
output wire [31:0] cl_sh_id0,
|
| 38 |
+
output wire [31:0] cl_sh_id1,
|
| 39 |
+
|
| 40 |
+
input wire [31:0] sh_ocl_awaddr,
|
| 41 |
+
input wire sh_ocl_awvalid,
|
| 42 |
+
output wire ocl_sh_awready,
|
| 43 |
+
input wire [31:0] sh_ocl_wdata,
|
| 44 |
+
input wire [3:0] sh_ocl_wstrb,
|
| 45 |
+
input wire sh_ocl_wvalid,
|
| 46 |
+
output wire ocl_sh_wready,
|
| 47 |
+
output wire [1:0] ocl_sh_bresp,
|
| 48 |
+
output wire ocl_sh_bvalid,
|
| 49 |
+
input wire sh_ocl_bready,
|
| 50 |
+
input wire [31:0] sh_ocl_araddr,
|
| 51 |
+
input wire sh_ocl_arvalid,
|
| 52 |
+
output wire ocl_sh_arready,
|
| 53 |
+
output wire [31:0] ocl_sh_rdata,
|
| 54 |
+
output wire [1:0] ocl_sh_rresp,
|
| 55 |
+
output wire ocl_sh_rvalid,
|
| 56 |
+
input wire sh_ocl_rready,
|
| 57 |
+
|
| 58 |
+
input wire [31:0] sh_sda_awaddr,
|
| 59 |
+
input wire sh_sda_awvalid,
|
| 60 |
+
output wire sda_sh_awready,
|
| 61 |
+
input wire [31:0] sh_sda_wdata,
|
| 62 |
+
input wire [3:0] sh_sda_wstrb,
|
| 63 |
+
input wire sh_sda_wvalid,
|
| 64 |
+
output wire sda_sh_wready,
|
| 65 |
+
output wire [1:0] sda_sh_bresp,
|
| 66 |
+
output wire sda_sh_bvalid,
|
| 67 |
+
input wire sh_sda_bready,
|
| 68 |
+
input wire [31:0] sh_sda_araddr,
|
| 69 |
+
input wire sh_sda_arvalid,
|
| 70 |
+
output wire sda_sh_arready,
|
| 71 |
+
output wire [31:0] sda_sh_rdata,
|
| 72 |
+
output wire [1:0] sda_sh_rresp,
|
| 73 |
+
output wire sda_sh_rvalid,
|
| 74 |
+
input wire sh_sda_rready,
|
| 75 |
+
|
| 76 |
+
output wire [63:0] cl_sh_pcim_awaddr,
|
| 77 |
+
output wire [15:0] cl_sh_pcim_awid,
|
| 78 |
+
output wire [7:0] cl_sh_pcim_awlen,
|
| 79 |
+
output wire [2:0] cl_sh_pcim_awsize,
|
| 80 |
+
output wire cl_sh_pcim_awvalid,
|
| 81 |
+
input wire sh_cl_pcim_awready,
|
| 82 |
+
output wire [511:0] cl_sh_pcim_wdata,
|
| 83 |
+
output wire [63:0] cl_sh_pcim_wstrb,
|
| 84 |
+
output wire cl_sh_pcim_wlast,
|
| 85 |
+
output wire cl_sh_pcim_wvalid,
|
| 86 |
+
input wire sh_cl_pcim_wready,
|
| 87 |
+
input wire [1:0] sh_cl_pcim_bresp,
|
| 88 |
+
input wire [15:0] sh_cl_pcim_bid,
|
| 89 |
+
input wire sh_cl_pcim_bvalid,
|
| 90 |
+
output wire cl_sh_pcim_bready,
|
| 91 |
+
output wire [63:0] cl_sh_pcim_araddr,
|
| 92 |
+
output wire [15:0] cl_sh_pcim_arid,
|
| 93 |
+
output wire [7:0] cl_sh_pcim_arlen,
|
| 94 |
+
output wire [2:0] cl_sh_pcim_arsize,
|
| 95 |
+
output wire cl_sh_pcim_arvalid,
|
| 96 |
+
input wire sh_cl_pcim_arready,
|
| 97 |
+
input wire [511:0] sh_cl_pcim_rdata,
|
| 98 |
+
input wire [15:0] sh_cl_pcim_rid,
|
| 99 |
+
input wire [1:0] sh_cl_pcim_rresp,
|
| 100 |
+
input wire sh_cl_pcim_rlast,
|
| 101 |
+
input wire sh_cl_pcim_rvalid,
|
| 102 |
+
output wire cl_sh_pcim_rready,
|
| 103 |
+
|
| 104 |
+
input wire [63:0] sh_cl_dma_pcis_awaddr,
|
| 105 |
+
input wire [15:0] sh_cl_dma_pcis_awid,
|
| 106 |
+
input wire [7:0] sh_cl_dma_pcis_awlen,
|
| 107 |
+
input wire [2:0] sh_cl_dma_pcis_awsize,
|
| 108 |
+
input wire sh_cl_dma_pcis_awvalid,
|
| 109 |
+
output wire cl_sh_dma_pcis_awready,
|
| 110 |
+
input wire [511:0] sh_cl_dma_pcis_wdata,
|
| 111 |
+
input wire [63:0] sh_cl_dma_pcis_wstrb,
|
| 112 |
+
input wire sh_cl_dma_pcis_wlast,
|
| 113 |
+
input wire sh_cl_dma_pcis_wvalid,
|
| 114 |
+
output wire cl_sh_dma_pcis_wready,
|
| 115 |
+
output wire [1:0] cl_sh_dma_pcis_bresp,
|
| 116 |
+
output wire [15:0] cl_sh_dma_pcis_bid,
|
| 117 |
+
output wire cl_sh_dma_pcis_bvalid,
|
| 118 |
+
input wire sh_cl_dma_pcis_bready,
|
| 119 |
+
input wire [63:0] sh_cl_dma_pcis_araddr,
|
| 120 |
+
input wire [15:0] sh_cl_dma_pcis_arid,
|
| 121 |
+
input wire [7:0] sh_cl_dma_pcis_arlen,
|
| 122 |
+
input wire [2:0] sh_cl_dma_pcis_arsize,
|
| 123 |
+
input wire sh_cl_dma_pcis_arvalid,
|
| 124 |
+
output wire cl_sh_dma_pcis_arready,
|
| 125 |
+
output wire [511:0] cl_sh_dma_pcis_rdata,
|
| 126 |
+
output wire [15:0] cl_sh_dma_pcis_rid,
|
| 127 |
+
output wire [1:0] cl_sh_dma_pcis_rresp,
|
| 128 |
+
output wire cl_sh_dma_pcis_rlast,
|
| 129 |
+
output wire cl_sh_dma_pcis_rvalid,
|
| 130 |
+
input wire sh_cl_dma_pcis_rready,
|
| 131 |
+
|
| 132 |
+
input wire sh_cl_ddr_stat_wr,
|
| 133 |
+
input wire sh_cl_ddr_stat_rd,
|
| 134 |
+
input wire [7:0] sh_cl_ddr_stat_addr,
|
| 135 |
+
input wire [31:0] sh_cl_ddr_stat_wdata,
|
| 136 |
+
output wire cl_sh_ddr_stat_ack,
|
| 137 |
+
output wire [31:0] cl_sh_ddr_stat_rdata,
|
| 138 |
+
output wire [7:0] cl_sh_ddr_stat_int,
|
| 139 |
+
|
| 140 |
+
output wire [15:0] cl_sh_apppf_irq_req,
|
| 141 |
+
input wire [15:0] sh_cl_apppf_irq_ack,
|
| 142 |
+
|
| 143 |
+
input wire sh_cl_flr_assert,
|
| 144 |
+
output wire cl_sh_flr_done,
|
| 145 |
+
|
| 146 |
+
output wire [31:0] cl_sh_status0,
|
| 147 |
+
output wire [31:0] cl_sh_status1
|
| 148 |
+
);
|
| 149 |
+
|
| 150 |
+
assign cl_sh_id0 = `CL_SH_ID0;
|
| 151 |
+
assign cl_sh_id1 = `CL_SH_ID1;
|
| 152 |
+
|
| 153 |
+
assign cl_sh_status0 = 32'h0000_0001; // bit 0 = CL alive
|
| 154 |
+
assign cl_sh_status1 = 32'd128; // core count
|
| 155 |
+
|
| 156 |
+
// SDA — not used (management register space)
|
| 157 |
+
assign sda_sh_awready = 1'b0;
|
| 158 |
+
assign sda_sh_wready = 1'b0;
|
| 159 |
+
assign sda_sh_bresp = 2'b00;
|
| 160 |
+
assign sda_sh_bvalid = 1'b0;
|
| 161 |
+
assign sda_sh_arready = 1'b0;
|
| 162 |
+
assign sda_sh_rdata = 32'd0;
|
| 163 |
+
assign sda_sh_rresp = 2'b00;
|
| 164 |
+
assign sda_sh_rvalid = 1'b0;
|
| 165 |
+
|
| 166 |
+
// PCIM — not used (no CL-initiated DMA)
|
| 167 |
+
assign cl_sh_pcim_awaddr = 64'd0;
|
| 168 |
+
assign cl_sh_pcim_awid = 16'd0;
|
| 169 |
+
assign cl_sh_pcim_awlen = 8'd0;
|
| 170 |
+
assign cl_sh_pcim_awsize = 3'd0;
|
| 171 |
+
assign cl_sh_pcim_awvalid = 1'b0;
|
| 172 |
+
assign cl_sh_pcim_wdata = 512'd0;
|
| 173 |
+
assign cl_sh_pcim_wstrb = 64'd0;
|
| 174 |
+
assign cl_sh_pcim_wlast = 1'b0;
|
| 175 |
+
assign cl_sh_pcim_wvalid = 1'b0;
|
| 176 |
+
assign cl_sh_pcim_bready = 1'b1; // Accept any write response
|
| 177 |
+
assign cl_sh_pcim_araddr = 64'd0;
|
| 178 |
+
assign cl_sh_pcim_arid = 16'd0;
|
| 179 |
+
assign cl_sh_pcim_arlen = 8'd0;
|
| 180 |
+
assign cl_sh_pcim_arsize = 3'd0;
|
| 181 |
+
assign cl_sh_pcim_arvalid = 1'b0;
|
| 182 |
+
assign cl_sh_pcim_rready = 1'b1; // Accept any read data
|
| 183 |
+
|
| 184 |
+
// PCIS (DMA) — not used (no host DMA writes to CL)
|
| 185 |
+
assign cl_sh_dma_pcis_awready = 1'b0;
|
| 186 |
+
assign cl_sh_dma_pcis_wready = 1'b0;
|
| 187 |
+
assign cl_sh_dma_pcis_bresp = 2'b00;
|
| 188 |
+
assign cl_sh_dma_pcis_bid = 16'd0;
|
| 189 |
+
assign cl_sh_dma_pcis_bvalid = 1'b0;
|
| 190 |
+
assign cl_sh_dma_pcis_arready = 1'b0;
|
| 191 |
+
assign cl_sh_dma_pcis_rdata = 512'd0;
|
| 192 |
+
assign cl_sh_dma_pcis_rid = 16'd0;
|
| 193 |
+
assign cl_sh_dma_pcis_rresp = 2'b00;
|
| 194 |
+
assign cl_sh_dma_pcis_rlast = 1'b0;
|
| 195 |
+
assign cl_sh_dma_pcis_rvalid = 1'b0;
|
| 196 |
+
|
| 197 |
+
// DDR stat — ack any request, return 0
|
| 198 |
+
assign cl_sh_ddr_stat_ack = sh_cl_ddr_stat_wr | sh_cl_ddr_stat_rd;
|
| 199 |
+
assign cl_sh_ddr_stat_rdata = 32'd0;
|
| 200 |
+
assign cl_sh_ddr_stat_int = 8'd0;
|
| 201 |
+
|
| 202 |
+
// Interrupts — none
|
| 203 |
+
assign cl_sh_apppf_irq_req = 16'd0;
|
| 204 |
+
|
| 205 |
+
// FLR — immediate acknowledge
|
| 206 |
+
assign cl_sh_flr_done = sh_cl_flr_assert;
|
| 207 |
+
|
| 208 |
+
wire [7:0] bridge_rx_data;
|
| 209 |
+
wire bridge_rx_valid;
|
| 210 |
+
wire [7:0] bridge_tx_data;
|
| 211 |
+
wire bridge_tx_valid;
|
| 212 |
+
wire bridge_tx_ready;
|
| 213 |
+
|
| 214 |
+
axi_uart_bridge #(
|
| 215 |
+
.FIFO_DEPTH (32),
|
| 216 |
+
.VERSION_ID (32'hF2_02_03_80), // F2, v2.3, 128-core
|
| 217 |
+
.NUM_CORES (128)
|
| 218 |
+
) u_bridge (
|
| 219 |
+
.clk (clk_main_a0),
|
| 220 |
+
.rst_n (rst_main_n),
|
| 221 |
+
|
| 222 |
+
// AXI-Lite slave ← Shell OCL master
|
| 223 |
+
.s_axi_awaddr (sh_ocl_awaddr),
|
| 224 |
+
.s_axi_awvalid (sh_ocl_awvalid),
|
| 225 |
+
.s_axi_awready (ocl_sh_awready),
|
| 226 |
+
.s_axi_wdata (sh_ocl_wdata),
|
| 227 |
+
.s_axi_wstrb (sh_ocl_wstrb),
|
| 228 |
+
.s_axi_wvalid (sh_ocl_wvalid),
|
| 229 |
+
.s_axi_wready (ocl_sh_wready),
|
| 230 |
+
.s_axi_bresp (ocl_sh_bresp),
|
| 231 |
+
.s_axi_bvalid (ocl_sh_bvalid),
|
| 232 |
+
.s_axi_bready (sh_ocl_bready),
|
| 233 |
+
.s_axi_araddr (sh_ocl_araddr),
|
| 234 |
+
.s_axi_arvalid (sh_ocl_arvalid),
|
| 235 |
+
.s_axi_arready (ocl_sh_arready),
|
| 236 |
+
.s_axi_rdata (ocl_sh_rdata),
|
| 237 |
+
.s_axi_rresp (ocl_sh_rresp),
|
| 238 |
+
.s_axi_rvalid (ocl_sh_rvalid),
|
| 239 |
+
.s_axi_rready (sh_ocl_rready),
|
| 240 |
+
|
| 241 |
+
// Byte-stream to neuromorphic_top
|
| 242 |
+
.hi_rx_data (bridge_rx_data),
|
| 243 |
+
.hi_rx_valid (bridge_rx_valid),
|
| 244 |
+
.hi_tx_data (bridge_tx_data),
|
| 245 |
+
.hi_tx_valid (bridge_tx_valid),
|
| 246 |
+
.hi_tx_ready (bridge_tx_ready)
|
| 247 |
+
);
|
| 248 |
+
|
| 249 |
+
neuromorphic_top #(
|
| 250 |
+
.CLK_FREQ (250_000_000), // F2 clk_main_a0 = 250 MHz
|
| 251 |
+
.BAUD (115200), // Unused (BYPASS_UART=1)
|
| 252 |
+
.BYPASS_UART (1),
|
| 253 |
+
.NUM_CORES (128),
|
| 254 |
+
.CORE_ID_BITS (12),
|
| 255 |
+
.NUM_NEURONS (1024),
|
| 256 |
+
.NEURON_BITS (10),
|
| 257 |
+
.DATA_WIDTH (16),
|
| 258 |
+
.POOL_DEPTH (8192), // 8K/core × 128 cores = 1M total
|
| 259 |
+
.POOL_ADDR_BITS (13),
|
| 260 |
+
.COUNT_BITS (12),
|
| 261 |
+
.REV_FANIN (32),
|
| 262 |
+
.REV_SLOT_BITS (5),
|
| 263 |
+
.THRESHOLD (16'sd1000),
|
| 264 |
+
.LEAK_RATE (16'sd3),
|
| 265 |
+
.REFRAC_CYCLES (3),
|
| 266 |
+
.ROUTE_FANOUT (8),
|
| 267 |
+
.ROUTE_SLOT_BITS (3),
|
| 268 |
+
.GLOBAL_ROUTE_SLOTS (4),
|
| 269 |
+
.GLOBAL_ROUTE_SLOT_BITS (2),
|
| 270 |
+
.CHIP_LINK_EN (0),
|
| 271 |
+
.NOC_MODE (0), // Barrier mesh (deterministic)
|
| 272 |
+
.MESH_X (16), // 16×8 = 128 cores
|
| 273 |
+
.MESH_Y (8)
|
| 274 |
+
) u_neuromorphic (
|
| 275 |
+
.clk (clk_main_a0),
|
| 276 |
+
.rst_n (rst_main_n),
|
| 277 |
+
|
| 278 |
+
// UART — unused (BYPASS_UART=1)
|
| 279 |
+
.uart_rxd (1'b1),
|
| 280 |
+
.uart_txd (),
|
| 281 |
+
|
| 282 |
+
// Byte-stream from AXI bridge
|
| 283 |
+
.rx_data_ext (bridge_rx_data),
|
| 284 |
+
.rx_valid_ext (bridge_rx_valid),
|
| 285 |
+
.tx_data_ext (bridge_tx_data),
|
| 286 |
+
.tx_valid_ext (bridge_tx_valid),
|
| 287 |
+
.tx_ready_ext (bridge_tx_ready),
|
| 288 |
+
|
| 289 |
+
// Chip link — disabled
|
| 290 |
+
.link_tx_data (),
|
| 291 |
+
.link_tx_valid (),
|
| 292 |
+
.link_tx_ready (1'b0),
|
| 293 |
+
.link_rx_data (8'd0),
|
| 294 |
+
.link_rx_valid (1'b0),
|
| 295 |
+
.link_rx_ready ()
|
| 296 |
+
);
|
| 297 |
+
|
| 298 |
+
endmodule
|
fpga/f2/cl_neuromorphic_defines.vh
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ============================================================================
|
| 2 |
+
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 3 |
+
// Company No. 17054540 — UK Patent Application No. 2602902.6
|
| 4 |
+
//
|
| 5 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
| 6 |
+
// you may not use this file except in compliance with the License.
|
| 7 |
+
// You may obtain a copy of the License at
|
| 8 |
+
//
|
| 9 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
//
|
| 11 |
+
// Unless required by applicable law or agreed to in writing, software
|
| 12 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
| 13 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 14 |
+
// See the License for the specific language governing permissions and
|
| 15 |
+
// limitations under the License.
|
| 16 |
+
// ============================================================================
|
| 17 |
+
|
| 18 |
+
// CL Neuromorphic — PCIe ID defines
|
| 19 |
+
`ifndef CL_NEUROMORPHIC_DEFINES_VH
|
| 20 |
+
`define CL_NEUROMORPHIC_DEFINES_VH
|
| 21 |
+
|
| 22 |
+
`define CL_SH_ID0 32'hF230_1D0F // F230=neuromorphic, 1D0F=Amazon
|
| 23 |
+
`define CL_SH_ID1 32'h0010_1D0F // 0010=16-core
|
| 24 |
+
|
| 25 |
+
`endif
|
fpga/f2/cl_synth_user.xdc
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ============================================================================
|
| 2 |
+
# CL Synthesis Constraints — Neuromorphic Chip on AWS F2
|
| 3 |
+
# ============================================================================
|
| 4 |
+
# These are applied during synthesis only (not implementation).
|
| 5 |
+
|
| 6 |
+
# No false paths or multicycle needed — single clock domain design.
|
| 7 |
+
# The Shell provides clk_main_a0 at 250 MHz (4.0 ns period).
|
| 8 |
+
# All neuromorphic logic is synchronous to this single clock.
|
fpga/f2/cl_timing_user.xdc
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ===========================================================================
|
| 2 |
+
# CL Neuromorphic — User Timing Constraints
|
| 3 |
+
# ===========================================================================
|
| 4 |
+
|
| 5 |
+
# Generated clock from MMCME4 (62.5 MHz)
|
| 6 |
+
# The MMCM auto-generates clock constraints from its parameters,
|
| 7 |
+
# but we add explicit false paths between clock domains for CDC.
|
| 8 |
+
|
| 9 |
+
# Async FIFO CDC: false paths between AXI clock and neuro clock
|
| 10 |
+
# The Gray-code synchronizers in async_fifo handle the CDC safely.
|
| 11 |
+
set_false_path -from [get_clocks -of_objects [get_pins WRAPPER/CL/u_mmcm/CLKIN1]] \
|
| 12 |
+
-to [get_clocks -of_objects [get_pins WRAPPER/CL/u_mmcm/CLKOUT0]]
|
| 13 |
+
set_false_path -from [get_clocks -of_objects [get_pins WRAPPER/CL/u_mmcm/CLKOUT0]] \
|
| 14 |
+
-to [get_clocks -of_objects [get_pins WRAPPER/CL/u_mmcm/CLKIN1]]
|
fpga/f2/deploy_f2.sh
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# ============================================================================
|
| 3 |
+
# F2 Deploy Script — Build + Deploy Neuromorphic Chip to AWS F2
|
| 4 |
+
# ============================================================================
|
| 5 |
+
#
|
| 6 |
+
# Prerequisites:
|
| 7 |
+
# 1. AWS FPGA HDK cloned and set up:
|
| 8 |
+
# git clone https://github.com/aws/aws-fpga
|
| 9 |
+
# cd aws-fpga && source hdk_setup.sh
|
| 10 |
+
#
|
| 11 |
+
# 2. This repository cloned at $NEURO_DIR:
|
| 12 |
+
# export NEURO_DIR=/path/to/neuromorphic-chip
|
| 13 |
+
#
|
| 14 |
+
# 3. S3 bucket for AFI artifacts:
|
| 15 |
+
# export AFI_BUCKET=my-fpga-bucket
|
| 16 |
+
# export AFI_PREFIX=neuromorphic-v2.3
|
| 17 |
+
#
|
| 18 |
+
# Usage:
|
| 19 |
+
# ./deploy_f2.sh [--build-only | --load-only | --test]
|
| 20 |
+
# ============================================================================
|
| 21 |
+
|
| 22 |
+
set -euo pipefail
|
| 23 |
+
|
| 24 |
+
NEURO_DIR="${NEURO_DIR:-$(cd "$(dirname "$0")/../.." && pwd)}"
|
| 25 |
+
AFI_BUCKET="${AFI_BUCKET:-}"
|
| 26 |
+
AFI_PREFIX="${AFI_PREFIX:-neuromorphic-v2.3}"
|
| 27 |
+
CL_DIR="${CL_DIR:-$HDK_DIR/cl/developer_designs/cl_neuromorphic}"
|
| 28 |
+
MODE="${1:---full}"
|
| 29 |
+
|
| 30 |
+
echo "============================================"
|
| 31 |
+
echo " Neuromorphic Chip v2.3 — F2 Deployment"
|
| 32 |
+
echo "============================================"
|
| 33 |
+
echo " NEURO_DIR: $NEURO_DIR"
|
| 34 |
+
echo " CL_DIR: $CL_DIR"
|
| 35 |
+
echo " Mode: $MODE"
|
| 36 |
+
echo ""
|
| 37 |
+
|
| 38 |
+
# ---- Step 1: Copy design files into HDK CL tree ----
|
| 39 |
+
copy_design() {
|
| 40 |
+
echo "--- Copying design files ---"
|
| 41 |
+
mkdir -p "$CL_DIR/design"
|
| 42 |
+
mkdir -p "$CL_DIR/build/constraints"
|
| 43 |
+
|
| 44 |
+
# CL wrapper + bridge
|
| 45 |
+
cp "$NEURO_DIR/fpga/f2/cl_neuromorphic.v" "$CL_DIR/design/"
|
| 46 |
+
cp "$NEURO_DIR/fpga/f2/cl_neuromorphic_defines.vh" "$CL_DIR/design/"
|
| 47 |
+
cp "$NEURO_DIR/rtl/axi_uart_bridge.v" "$CL_DIR/design/"
|
| 48 |
+
|
| 49 |
+
# Neuromorphic RTL (excluding UART modules — BYPASS_UART=1)
|
| 50 |
+
for f in sram.v spike_fifo.v scalable_core_v2.v neuromorphic_mesh.v \
|
| 51 |
+
async_noc_mesh.v async_router.v sync_tree.v chip_link.v \
|
| 52 |
+
host_interface.v neuromorphic_top.v rv32i_core.v \
|
| 53 |
+
rv32im_cluster.v mmio_bridge.v multi_chip_router.v; do
|
| 54 |
+
cp "$NEURO_DIR/rtl/$f" "$CL_DIR/design/"
|
| 55 |
+
done
|
| 56 |
+
|
| 57 |
+
# Constraints
|
| 58 |
+
cp "$NEURO_DIR/fpga/f2/cl_synth_user.xdc" "$CL_DIR/build/constraints/"
|
| 59 |
+
cp "$NEURO_DIR/fpga/f2/cl_timing_user.xdc" "$CL_DIR/build/constraints/"
|
| 60 |
+
|
| 61 |
+
# Build source list
|
| 62 |
+
cp "$NEURO_DIR/fpga/f2/build_f2.tcl" "$CL_DIR/build/scripts/cl_build_user.tcl"
|
| 63 |
+
|
| 64 |
+
echo " Copied $(ls "$CL_DIR/design/"*.v 2>/dev/null | wc -l) Verilog files"
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
# ---- Step 2: Build DCP (synthesis + implementation) ----
|
| 68 |
+
build_dcp() {
|
| 69 |
+
echo ""
|
| 70 |
+
echo "--- Building DCP (this takes 4-8 hours) ---"
|
| 71 |
+
cd "$CL_DIR/build/scripts"
|
| 72 |
+
./aws_build_dcp_from_cl.sh -clock_recipe_a A1 # A1 = 250 MHz
|
| 73 |
+
echo " DCP build complete"
|
| 74 |
+
|
| 75 |
+
# Check for timing failures
|
| 76 |
+
local timing_rpt="$CL_DIR/build/checkpoints/to_aws/*.SH_CL_routed.rpt"
|
| 77 |
+
if grep -q "VIOLATED" $timing_rpt 2>/dev/null; then
|
| 78 |
+
echo " WARNING: Timing violations detected! Check reports."
|
| 79 |
+
else
|
| 80 |
+
echo " Timing met at 250 MHz"
|
| 81 |
+
fi
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
# ---- Step 3: Create AFI ----
|
| 85 |
+
create_afi() {
|
| 86 |
+
if [ -z "$AFI_BUCKET" ]; then
|
| 87 |
+
echo " ERROR: Set AFI_BUCKET environment variable"
|
| 88 |
+
exit 1
|
| 89 |
+
fi
|
| 90 |
+
|
| 91 |
+
echo ""
|
| 92 |
+
echo "--- Creating AFI ---"
|
| 93 |
+
local tar_file=$(ls "$CL_DIR/build/checkpoints/to_aws/"*.tar 2>/dev/null | head -1)
|
| 94 |
+
if [ -z "$tar_file" ]; then
|
| 95 |
+
echo " ERROR: No .tar file found in checkpoints/to_aws/"
|
| 96 |
+
exit 1
|
| 97 |
+
fi
|
| 98 |
+
|
| 99 |
+
aws s3 cp "$tar_file" "s3://$AFI_BUCKET/$AFI_PREFIX/"
|
| 100 |
+
|
| 101 |
+
local tar_name=$(basename "$tar_file")
|
| 102 |
+
aws ec2 create-fpga-image \
|
| 103 |
+
--name "neuromorphic-v2.3-16core" \
|
| 104 |
+
--description "Neuromorphic chip v2.3, 16 cores x 1024 neurons, F2 VU47P" \
|
| 105 |
+
--input-storage-location "Bucket=$AFI_BUCKET,Key=$AFI_PREFIX/$tar_name" \
|
| 106 |
+
--logs-storage-location "Bucket=$AFI_BUCKET,Key=$AFI_PREFIX/logs/" \
|
| 107 |
+
| tee /tmp/afi_create_output.json
|
| 108 |
+
|
| 109 |
+
echo ""
|
| 110 |
+
echo " AFI creation submitted. Monitor with:"
|
| 111 |
+
echo " aws ec2 describe-fpga-images --fpga-image-ids <afi-id>"
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
# ---- Step 4: Load AFI ----
|
| 115 |
+
load_afi() {
|
| 116 |
+
local afi_id="${AFI_ID:-}"
|
| 117 |
+
if [ -z "$afi_id" ]; then
|
| 118 |
+
echo " ERROR: Set AFI_ID environment variable (e.g., afi-XXXXXXXX)"
|
| 119 |
+
exit 1
|
| 120 |
+
fi
|
| 121 |
+
|
| 122 |
+
local agfi_id="${AGFI_ID:-}"
|
| 123 |
+
if [ -z "$agfi_id" ]; then
|
| 124 |
+
echo " ERROR: Set AGFI_ID environment variable (e.g., agfi-XXXXXXXX)"
|
| 125 |
+
exit 1
|
| 126 |
+
fi
|
| 127 |
+
|
| 128 |
+
echo ""
|
| 129 |
+
echo "--- Loading AFI onto slot 0 ---"
|
| 130 |
+
sudo fpga-load-local-image -S 0 -I "$agfi_id"
|
| 131 |
+
sleep 2
|
| 132 |
+
sudo fpga-describe-local-image -S 0 -H
|
| 133 |
+
echo " AFI loaded"
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
# ---- Step 5: Run test ----
|
| 137 |
+
run_test() {
|
| 138 |
+
echo ""
|
| 139 |
+
echo "--- Running connectivity test ---"
|
| 140 |
+
python3 "$NEURO_DIR/fpga/f2_host.py" --test-loopback
|
| 141 |
+
echo ""
|
| 142 |
+
echo "--- Running spike test ---"
|
| 143 |
+
python3 "$NEURO_DIR/fpga/f2_host.py" --test-spike
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
# ---- Main ----
|
| 147 |
+
case "$MODE" in
|
| 148 |
+
--build-only)
|
| 149 |
+
copy_design
|
| 150 |
+
build_dcp
|
| 151 |
+
;;
|
| 152 |
+
--afi-only)
|
| 153 |
+
create_afi
|
| 154 |
+
;;
|
| 155 |
+
--load-only)
|
| 156 |
+
load_afi
|
| 157 |
+
;;
|
| 158 |
+
--test)
|
| 159 |
+
run_test
|
| 160 |
+
;;
|
| 161 |
+
--full)
|
| 162 |
+
copy_design
|
| 163 |
+
build_dcp
|
| 164 |
+
create_afi
|
| 165 |
+
echo ""
|
| 166 |
+
echo "============================================"
|
| 167 |
+
echo " BUILD COMPLETE"
|
| 168 |
+
echo "============================================"
|
| 169 |
+
echo " Next steps:"
|
| 170 |
+
echo " 1. Wait for AFI to become available"
|
| 171 |
+
echo " 2. export AFI_ID=afi-XXXXXXXX"
|
| 172 |
+
echo " 3. export AGFI_ID=agfi-XXXXXXXX"
|
| 173 |
+
echo " 4. ./deploy_f2.sh --load-only"
|
| 174 |
+
echo " 5. ./deploy_f2.sh --test"
|
| 175 |
+
echo "============================================"
|
| 176 |
+
;;
|
| 177 |
+
*)
|
| 178 |
+
echo "Usage: $0 [--build-only | --afi-only | --load-only | --test | --full]"
|
| 179 |
+
exit 1
|
| 180 |
+
;;
|
| 181 |
+
esac
|
fpga/f2/run_build.sh
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
set -e
|
| 3 |
+
source /opt/Xilinx/2025.2/Vivado/settings64.sh
|
| 4 |
+
cd /home/ubuntu/aws-fpga
|
| 5 |
+
source hdk_setup.sh
|
| 6 |
+
export CL_DIR=/home/ubuntu/aws-fpga/hdk/cl/developer_designs/cl_neuromorphic
|
| 7 |
+
echo "=== Starting build at $(date) ==="
|
| 8 |
+
cd /home/ubuntu/aws-fpga/hdk/cl/developer_designs/cl_neuromorphic/build/scripts
|
| 9 |
+
python3 aws_build_dcp_from_cl.py -c cl_neuromorphic --no-encrypt
|
| 10 |
+
echo "=== Build finished at $(date) ==="
|
fpga/f2/synth_cl_neuromorphic.tcl
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
source ${HDK_SHELL_DIR}/build/scripts/synth_cl_header.tcl
|
| 2 |
+
|
| 3 |
+
print "Reading neuromorphic design sources"
|
| 4 |
+
|
| 5 |
+
# CL wrapper is SystemVerilog (uses cl_ports.vh with 'logic' types)
|
| 6 |
+
read_verilog -sv [ list \
|
| 7 |
+
${src_post_enc_dir}/cl_neuromorphic.sv \
|
| 8 |
+
]
|
| 9 |
+
|
| 10 |
+
# RTL modules are plain Verilog
|
| 11 |
+
read_verilog [ list \
|
| 12 |
+
${src_post_enc_dir}/cl_neuromorphic_defines.vh \
|
| 13 |
+
${src_post_enc_dir}/async_fifo.v \
|
| 14 |
+
${src_post_enc_dir}/axi_uart_bridge.v \
|
| 15 |
+
${src_post_enc_dir}/sram.v \
|
| 16 |
+
${src_post_enc_dir}/spike_fifo.v \
|
| 17 |
+
${src_post_enc_dir}/scalable_core_v2.v \
|
| 18 |
+
${src_post_enc_dir}/neuromorphic_mesh.v \
|
| 19 |
+
${src_post_enc_dir}/async_noc_mesh.v \
|
| 20 |
+
${src_post_enc_dir}/async_router.v \
|
| 21 |
+
${src_post_enc_dir}/sync_tree.v \
|
| 22 |
+
${src_post_enc_dir}/chip_link.v \
|
| 23 |
+
${src_post_enc_dir}/host_interface.v \
|
| 24 |
+
${src_post_enc_dir}/neuromorphic_top.v \
|
| 25 |
+
${src_post_enc_dir}/rv32i_core.v \
|
| 26 |
+
${src_post_enc_dir}/rv32im_cluster.v \
|
| 27 |
+
${src_post_enc_dir}/mmio_bridge.v \
|
| 28 |
+
${src_post_enc_dir}/multi_chip_router.v \
|
| 29 |
+
]
|
| 30 |
+
|
| 31 |
+
print "Reading user constraints"
|
| 32 |
+
read_xdc [ list \
|
| 33 |
+
${constraints_dir}/cl_synth_user.xdc \
|
| 34 |
+
${constraints_dir}/cl_timing_user.xdc \
|
| 35 |
+
]
|
| 36 |
+
set_property PROCESSING_ORDER LATE [get_files cl_synth_user.xdc]
|
| 37 |
+
set_property PROCESSING_ORDER LATE [get_files cl_timing_user.xdc]
|
| 38 |
+
|
| 39 |
+
print "Starting synthesizing customer design ${CL}"
|
| 40 |
+
update_compile_order -fileset sources_1
|
| 41 |
+
|
| 42 |
+
synth_design -mode out_of_context \
|
| 43 |
+
-top ${CL} \
|
| 44 |
+
-verilog_define XSDB_SLV_DIS \
|
| 45 |
+
-part ${DEVICE_TYPE} \
|
| 46 |
+
-keep_equivalent_registers
|
| 47 |
+
|
| 48 |
+
source ${HDK_SHELL_DIR}/build/scripts/synth_cl_footer.tcl
|
fpga/f2_host.py
ADDED
|
@@ -0,0 +1,580 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Neuromorphic Chip F2 Host Controller
|
| 3 |
+
=====================================
|
| 4 |
+
Python driver for the neuromorphic FPGA on AWS F2, communicating via
|
| 5 |
+
PCIe MMIO (AXI-Lite registers) instead of UART.
|
| 6 |
+
|
| 7 |
+
Same byte-level protocol as host.py, different transport layer.
|
| 8 |
+
|
| 9 |
+
Usage:
|
| 10 |
+
python fpga/f2_host.py --demo # Run demo (fpga_mgmt transport)
|
| 11 |
+
python fpga/f2_host.py --status # Query chip status
|
| 12 |
+
python fpga/f2_host.py --test-loopback # Connectivity test
|
| 13 |
+
python fpga/f2_host.py --test-spike # Spike chain test
|
| 14 |
+
python fpga/f2_host.py --transport mmap # Use mmap transport
|
| 15 |
+
|
| 16 |
+
Register map (BAR0 offsets, via fpga_mgmt BAR0):
|
| 17 |
+
0x000 [W] TX_DATA - write byte to host_interface
|
| 18 |
+
0x004 [R] TX_STATUS - bit[0] = ready (TX FIFO not full)
|
| 19 |
+
0x008 [R] RX_DATA - read response byte (auto-pops)
|
| 20 |
+
0x00C [R] RX_STATUS - bit[0] = not empty
|
| 21 |
+
0x010 [R/W] CONTROL - bit[0] = soft reset (self-clearing)
|
| 22 |
+
0x014 [R] VERSION - firmware version (0xF2020310 = 16-core)
|
| 23 |
+
0x018 [R/W] SCRATCH - loopback register
|
| 24 |
+
0x01C [R] CORE_COUNT - number of cores
|
| 25 |
+
|
| 26 |
+
FPGA BRAM init workaround:
|
| 27 |
+
On FPGA, all SRAMs init to 0. For compartment system correctness,
|
| 28 |
+
each used neuron must have is_root=1 (param_id=24) and
|
| 29 |
+
parent_ptr=1023 (param_id=22) set explicitly. Use setup_neuron().
|
| 30 |
+
"""
|
| 31 |
+
|
| 32 |
+
import struct
|
| 33 |
+
import time
|
| 34 |
+
import argparse
|
| 35 |
+
import sys
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
class MmapTransport:
|
| 39 |
+
"""MMIO via mmap of /dev/fpga0_ocl BAR0."""
|
| 40 |
+
|
| 41 |
+
def __init__(self, device="/dev/fpga0_ocl", bar_size=0x10000):
|
| 42 |
+
import mmap
|
| 43 |
+
import os
|
| 44 |
+
fd = os.open(device, os.O_RDWR | os.O_SYNC)
|
| 45 |
+
self._mm = mmap.mmap(fd, bar_size, access=mmap.ACCESS_WRITE)
|
| 46 |
+
os.close(fd) # mmap keeps its own reference
|
| 47 |
+
|
| 48 |
+
def write32(self, offset, value):
|
| 49 |
+
struct.pack_into('<I', self._mm, offset, value & 0xFFFFFFFF)
|
| 50 |
+
|
| 51 |
+
def read32(self, offset):
|
| 52 |
+
return struct.unpack_from('<I', self._mm, offset)[0]
|
| 53 |
+
|
| 54 |
+
def close(self):
|
| 55 |
+
self._mm.close()
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
class FpgaMgmtTransport:
|
| 59 |
+
"""MMIO via AWS FPGA Management Library (libfpga_mgmt.so)."""
|
| 60 |
+
|
| 61 |
+
def __init__(self, slot=0, bar=0):
|
| 62 |
+
import ctypes
|
| 63 |
+
self._lib = ctypes.CDLL("libfpga_mgmt.so")
|
| 64 |
+
|
| 65 |
+
# fpga_mgmt_init()
|
| 66 |
+
rc = self._lib.fpga_mgmt_init()
|
| 67 |
+
if rc != 0:
|
| 68 |
+
raise RuntimeError(f"fpga_mgmt_init failed: {rc}")
|
| 69 |
+
|
| 70 |
+
# fpga_pci_attach(slot, pf_id=0, bar, flags=0, &handle)
|
| 71 |
+
self._handle = ctypes.c_int()
|
| 72 |
+
rc = self._lib.fpga_pci_attach(slot, 0, bar, 0,
|
| 73 |
+
ctypes.byref(self._handle))
|
| 74 |
+
if rc != 0:
|
| 75 |
+
raise RuntimeError(f"fpga_pci_attach failed: {rc}")
|
| 76 |
+
|
| 77 |
+
self._poke = self._lib.fpga_pci_poke
|
| 78 |
+
self._peek = self._lib.fpga_pci_peek
|
| 79 |
+
self._ctypes = ctypes
|
| 80 |
+
|
| 81 |
+
def write32(self, offset, value):
|
| 82 |
+
rc = self._poke(self._handle, offset, value & 0xFFFFFFFF)
|
| 83 |
+
if rc != 0:
|
| 84 |
+
raise RuntimeError(f"fpga_pci_poke(0x{offset:X}, 0x{value:X}) failed: {rc}")
|
| 85 |
+
|
| 86 |
+
def read32(self, offset):
|
| 87 |
+
val = self._ctypes.c_uint32()
|
| 88 |
+
rc = self._peek(self._handle, offset, self._ctypes.byref(val))
|
| 89 |
+
if rc != 0:
|
| 90 |
+
raise RuntimeError(f"fpga_pci_peek(0x{offset:X}) failed: {rc}")
|
| 91 |
+
return val.value
|
| 92 |
+
|
| 93 |
+
def close(self):
|
| 94 |
+
self._lib.fpga_pci_detach(self._handle)
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
class F2NeuromorphicChip:
|
| 98 |
+
"""Interface to the neuromorphic FPGA via PCIe MMIO."""
|
| 99 |
+
|
| 100 |
+
# Register offsets
|
| 101 |
+
REG_TX_DATA = 0x000
|
| 102 |
+
REG_TX_STATUS = 0x004
|
| 103 |
+
REG_RX_DATA = 0x008
|
| 104 |
+
REG_RX_STATUS = 0x00C
|
| 105 |
+
REG_CONTROL = 0x010
|
| 106 |
+
REG_VERSION = 0x014
|
| 107 |
+
REG_SCRATCH = 0x018
|
| 108 |
+
REG_CORE_COUNT = 0x01C
|
| 109 |
+
|
| 110 |
+
# Command opcodes (same as host.py)
|
| 111 |
+
CMD_PROG_POOL = 0x01
|
| 112 |
+
CMD_PROG_ROUTE = 0x02
|
| 113 |
+
CMD_STIMULUS = 0x03
|
| 114 |
+
CMD_RUN = 0x04
|
| 115 |
+
CMD_STATUS = 0x05
|
| 116 |
+
CMD_LEARN_CFG = 0x06
|
| 117 |
+
CMD_PROG_NEURON = 0x07
|
| 118 |
+
CMD_PROG_INDEX = 0x08
|
| 119 |
+
CMD_REWARD = 0x09
|
| 120 |
+
CMD_PROG_DELAY = 0x0A
|
| 121 |
+
CMD_PROG_LEARN = 0x0C
|
| 122 |
+
CMD_PROG_GLOBAL_ROUTE = 0x10
|
| 123 |
+
|
| 124 |
+
# Parameter IDs
|
| 125 |
+
PARAM_THRESHOLD = 0
|
| 126 |
+
PARAM_LEAK = 1
|
| 127 |
+
PARAM_RESTING = 2
|
| 128 |
+
PARAM_REFRAC = 3
|
| 129 |
+
PARAM_DEND_THRESHOLD = 4
|
| 130 |
+
PARAM_DECAY_V = 16
|
| 131 |
+
PARAM_DECAY_U = 17
|
| 132 |
+
PARAM_BIAS_CFG = 18
|
| 133 |
+
PARAM_PARENT_PTR = 22
|
| 134 |
+
PARAM_JOINOP = 23
|
| 135 |
+
PARAM_IS_ROOT = 24
|
| 136 |
+
|
| 137 |
+
# Response codes
|
| 138 |
+
RESP_ACK = 0xAA
|
| 139 |
+
RESP_DONE = 0xDD
|
| 140 |
+
|
| 141 |
+
def __init__(self, transport='fpga_mgmt', slot=0, timeout=5.0):
|
| 142 |
+
if transport == 'mmap':
|
| 143 |
+
self._t = MmapTransport()
|
| 144 |
+
elif transport == 'fpga_mgmt':
|
| 145 |
+
self._t = FpgaMgmtTransport(slot=slot)
|
| 146 |
+
else:
|
| 147 |
+
raise ValueError(f"Unknown transport: {transport}")
|
| 148 |
+
|
| 149 |
+
self._timeout = timeout
|
| 150 |
+
self._pool_alloc = {}
|
| 151 |
+
|
| 152 |
+
# Verify connectivity
|
| 153 |
+
ver = self._t.read32(self.REG_VERSION)
|
| 154 |
+
cores = self._t.read32(self.REG_CORE_COUNT)
|
| 155 |
+
self._num_cores = cores
|
| 156 |
+
print(f"Connected via {transport}: version=0x{ver:08X}, cores={cores}")
|
| 157 |
+
|
| 158 |
+
def close(self):
|
| 159 |
+
self._t.close()
|
| 160 |
+
|
| 161 |
+
def _send(self, data):
|
| 162 |
+
"""Send bytes to host_interface via TX FIFO."""
|
| 163 |
+
for b in data:
|
| 164 |
+
deadline = time.monotonic() + self._timeout
|
| 165 |
+
while True:
|
| 166 |
+
status = self._t.read32(self.REG_TX_STATUS)
|
| 167 |
+
if status & 1:
|
| 168 |
+
break
|
| 169 |
+
if time.monotonic() > deadline:
|
| 170 |
+
raise TimeoutError("TX FIFO full timeout")
|
| 171 |
+
self._t.write32(self.REG_TX_DATA, b & 0xFF)
|
| 172 |
+
|
| 173 |
+
def _recv(self, n):
|
| 174 |
+
"""Receive n bytes from host_interface via RX FIFO."""
|
| 175 |
+
result = bytearray()
|
| 176 |
+
deadline = time.monotonic() + self._timeout
|
| 177 |
+
while len(result) < n:
|
| 178 |
+
status = self._t.read32(self.REG_RX_STATUS)
|
| 179 |
+
if status & 1: # not empty
|
| 180 |
+
val = self._t.read32(self.REG_RX_DATA)
|
| 181 |
+
result.append(val & 0xFF)
|
| 182 |
+
deadline = time.monotonic() + self._timeout # Reset per byte
|
| 183 |
+
elif time.monotonic() > deadline:
|
| 184 |
+
raise TimeoutError(
|
| 185 |
+
f"RX timeout: got {len(result)}/{n} bytes")
|
| 186 |
+
return bytes(result)
|
| 187 |
+
|
| 188 |
+
def _wait_ack(self):
|
| 189 |
+
"""Wait for ACK (0xAA) response."""
|
| 190 |
+
resp = self._recv(1)
|
| 191 |
+
if resp[0] != self.RESP_ACK:
|
| 192 |
+
raise ValueError(f"Expected ACK (0xAA), got 0x{resp[0]:02X}")
|
| 193 |
+
|
| 194 |
+
def _alloc_pool(self, core, count=1):
|
| 195 |
+
"""Allocate pool entries (bump allocator)."""
|
| 196 |
+
if core not in self._pool_alloc:
|
| 197 |
+
self._pool_alloc[core] = 0
|
| 198 |
+
addr = self._pool_alloc[core]
|
| 199 |
+
self._pool_alloc[core] += count
|
| 200 |
+
return addr
|
| 201 |
+
|
| 202 |
+
def soft_reset(self):
|
| 203 |
+
"""Issue a soft reset (clears FIFOs)."""
|
| 204 |
+
self._t.write32(self.REG_CONTROL, 1)
|
| 205 |
+
time.sleep(0.001)
|
| 206 |
+
|
| 207 |
+
def read_version(self):
|
| 208 |
+
return self._t.read32(self.REG_VERSION)
|
| 209 |
+
|
| 210 |
+
def read_core_count(self):
|
| 211 |
+
return self._t.read32(self.REG_CORE_COUNT)
|
| 212 |
+
|
| 213 |
+
def test_scratch(self, value=0xDEADBEEF):
|
| 214 |
+
"""Write/read SCRATCH register for loopback test."""
|
| 215 |
+
self._t.write32(self.REG_SCRATCH, value)
|
| 216 |
+
readback = self._t.read32(self.REG_SCRATCH)
|
| 217 |
+
return readback == value, readback
|
| 218 |
+
|
| 219 |
+
def prog_pool(self, core, pool_addr, src, target, weight, comp=0):
|
| 220 |
+
w = weight & 0xFFFF
|
| 221 |
+
flags = ((comp & 0x3) << 6) | (((src >> 8) & 0x3) << 4) | (((target >> 8) & 0x3) << 2)
|
| 222 |
+
self._send([
|
| 223 |
+
self.CMD_PROG_POOL,
|
| 224 |
+
core & 0xFF,
|
| 225 |
+
(pool_addr >> 8) & 0xFF, pool_addr & 0xFF,
|
| 226 |
+
flags,
|
| 227 |
+
src & 0xFF,
|
| 228 |
+
target & 0xFF,
|
| 229 |
+
(w >> 8) & 0xFF, w & 0xFF
|
| 230 |
+
])
|
| 231 |
+
self._wait_ack()
|
| 232 |
+
|
| 233 |
+
def prog_index(self, core, neuron, base_addr, count, format=0, base_target=0):
|
| 234 |
+
self._send([
|
| 235 |
+
self.CMD_PROG_INDEX,
|
| 236 |
+
core & 0xFF,
|
| 237 |
+
(neuron >> 8) & 0xFF, neuron & 0xFF,
|
| 238 |
+
(base_addr >> 8) & 0xFF, base_addr & 0xFF,
|
| 239 |
+
((format & 0x3) << 6) | ((count >> 8) & 0x3F), count & 0xFF,
|
| 240 |
+
])
|
| 241 |
+
self._wait_ack()
|
| 242 |
+
|
| 243 |
+
def prog_conn(self, core, src, targets_weights, comp=0):
|
| 244 |
+
if not targets_weights:
|
| 245 |
+
return
|
| 246 |
+
base = self._alloc_pool(core, len(targets_weights))
|
| 247 |
+
for i, (target, weight) in enumerate(targets_weights):
|
| 248 |
+
self.prog_pool(core, base + i, src, target, weight, comp)
|
| 249 |
+
self.prog_index(core, src, base, len(targets_weights))
|
| 250 |
+
|
| 251 |
+
def prog_route(self, src_core, src_neuron, dest_core, dest_neuron, weight, slot=0):
|
| 252 |
+
w = weight & 0xFFFF
|
| 253 |
+
self._send([
|
| 254 |
+
self.CMD_PROG_ROUTE,
|
| 255 |
+
src_core & 0xFF,
|
| 256 |
+
(src_neuron >> 8) & 0xFF, src_neuron & 0xFF,
|
| 257 |
+
slot & 0xFF,
|
| 258 |
+
dest_core & 0xFF,
|
| 259 |
+
(dest_neuron >> 8) & 0xFF, dest_neuron & 0xFF,
|
| 260 |
+
(w >> 8) & 0xFF, w & 0xFF
|
| 261 |
+
])
|
| 262 |
+
self._wait_ack()
|
| 263 |
+
|
| 264 |
+
def stimulus(self, core, neuron, current):
|
| 265 |
+
c = current & 0xFFFF
|
| 266 |
+
self._send([
|
| 267 |
+
self.CMD_STIMULUS,
|
| 268 |
+
core & 0xFF,
|
| 269 |
+
(neuron >> 8) & 0xFF, neuron & 0xFF,
|
| 270 |
+
(c >> 8) & 0xFF, c & 0xFF
|
| 271 |
+
])
|
| 272 |
+
self._wait_ack()
|
| 273 |
+
|
| 274 |
+
def run(self, timesteps):
|
| 275 |
+
ts = timesteps & 0xFFFF
|
| 276 |
+
self._send([
|
| 277 |
+
self.CMD_RUN,
|
| 278 |
+
(ts >> 8) & 0xFF, ts & 0xFF
|
| 279 |
+
])
|
| 280 |
+
resp = self._recv(5)
|
| 281 |
+
if resp[0] != self.RESP_DONE:
|
| 282 |
+
raise ValueError(f"Expected DONE (0xDD), got 0x{resp[0]:02X}")
|
| 283 |
+
spikes = struct.unpack('>I', resp[1:5])[0]
|
| 284 |
+
return spikes
|
| 285 |
+
|
| 286 |
+
def status(self):
|
| 287 |
+
self._send([self.CMD_STATUS])
|
| 288 |
+
resp = self._recv(5)
|
| 289 |
+
state = resp[0]
|
| 290 |
+
ts_count = struct.unpack('>I', resp[1:5])[0]
|
| 291 |
+
return state, ts_count
|
| 292 |
+
|
| 293 |
+
def reward(self, value):
|
| 294 |
+
v = value & 0xFFFF
|
| 295 |
+
self._send([
|
| 296 |
+
self.CMD_REWARD,
|
| 297 |
+
(v >> 8) & 0xFF, v & 0xFF
|
| 298 |
+
])
|
| 299 |
+
self._wait_ack()
|
| 300 |
+
|
| 301 |
+
def set_learning(self, learn_enable, graded_enable=False, dendritic_enable=False,
|
| 302 |
+
async_enable=False, threefactor_enable=False, noise_enable=False):
|
| 303 |
+
flags = ((int(learn_enable) & 1)
|
| 304 |
+
| ((int(graded_enable) & 1) << 1)
|
| 305 |
+
| ((int(dendritic_enable) & 1) << 2)
|
| 306 |
+
| ((int(async_enable) & 1) << 3)
|
| 307 |
+
| ((int(threefactor_enable) & 1) << 4)
|
| 308 |
+
| ((int(noise_enable) & 1) << 5))
|
| 309 |
+
self._send([self.CMD_LEARN_CFG, flags])
|
| 310 |
+
self._wait_ack()
|
| 311 |
+
|
| 312 |
+
def prog_neuron(self, core, neuron, param_id, value):
|
| 313 |
+
v = value & 0xFFFF
|
| 314 |
+
self._send([
|
| 315 |
+
self.CMD_PROG_NEURON,
|
| 316 |
+
core & 0xFF,
|
| 317 |
+
(neuron >> 8) & 0xFF, neuron & 0xFF,
|
| 318 |
+
param_id & 0xFF,
|
| 319 |
+
(v >> 8) & 0xFF, v & 0xFF
|
| 320 |
+
])
|
| 321 |
+
self._wait_ack()
|
| 322 |
+
|
| 323 |
+
def setup_neuron(self, core, neuron, threshold=1000):
|
| 324 |
+
"""Configure a neuron for standalone operation on FPGA.
|
| 325 |
+
|
| 326 |
+
FPGA BRAMs init to 0, which breaks the compartment system:
|
| 327 |
+
- is_root=0 means spikes never counted externally
|
| 328 |
+
- parent_ptr=0 means all neurons cascade to neuron 0
|
| 329 |
+
|
| 330 |
+
This sets threshold + is_root=1 + parent_ptr=sentinel for
|
| 331 |
+
correct standalone operation.
|
| 332 |
+
"""
|
| 333 |
+
self.prog_neuron(core, neuron, self.PARAM_THRESHOLD, threshold)
|
| 334 |
+
self.prog_neuron(core, neuron, self.PARAM_PARENT_PTR, 1023) # no-parent sentinel
|
| 335 |
+
self.prog_neuron(core, neuron, self.PARAM_IS_ROOT, 1)
|
| 336 |
+
|
| 337 |
+
def setup_neurons(self, neuron_list):
|
| 338 |
+
"""Setup multiple neurons. neuron_list: [(core, neuron, threshold), ...]"""
|
| 339 |
+
for core, neuron, threshold in neuron_list:
|
| 340 |
+
self.setup_neuron(core, neuron, threshold)
|
| 341 |
+
|
| 342 |
+
def prog_delay(self, core, pool_addr, delay):
|
| 343 |
+
self._send([
|
| 344 |
+
self.CMD_PROG_DELAY,
|
| 345 |
+
core & 0xFF,
|
| 346 |
+
(pool_addr >> 8) & 0xFF, pool_addr & 0xFF,
|
| 347 |
+
delay & 0x3F,
|
| 348 |
+
])
|
| 349 |
+
self._wait_ack()
|
| 350 |
+
|
| 351 |
+
def prog_learn(self, core, addr, instr):
|
| 352 |
+
self._send([
|
| 353 |
+
self.CMD_PROG_LEARN,
|
| 354 |
+
core & 0xFF,
|
| 355 |
+
addr & 0x3F,
|
| 356 |
+
(instr >> 24) & 0xFF,
|
| 357 |
+
(instr >> 16) & 0xFF,
|
| 358 |
+
(instr >> 8) & 0xFF,
|
| 359 |
+
instr & 0xFF,
|
| 360 |
+
])
|
| 361 |
+
self._wait_ack()
|
| 362 |
+
|
| 363 |
+
def prog_global_route(self, src_core, src_neuron, dest_core, dest_neuron,
|
| 364 |
+
weight, slot=0):
|
| 365 |
+
w = weight & 0xFFFF
|
| 366 |
+
self._send([
|
| 367 |
+
self.CMD_PROG_GLOBAL_ROUTE,
|
| 368 |
+
src_core & 0xFF,
|
| 369 |
+
(src_neuron >> 8) & 0xFF, src_neuron & 0xFF,
|
| 370 |
+
slot & 0xFF,
|
| 371 |
+
dest_core & 0xFF,
|
| 372 |
+
(dest_neuron >> 8) & 0xFF, dest_neuron & 0xFF,
|
| 373 |
+
(w >> 8) & 0xFF, w & 0xFF,
|
| 374 |
+
])
|
| 375 |
+
self._wait_ack()
|
| 376 |
+
|
| 377 |
+
|
| 378 |
+
def test_loopback(chip):
|
| 379 |
+
"""Basic connectivity test: registers only, no mesh interaction."""
|
| 380 |
+
print("\n" + "=" * 60)
|
| 381 |
+
print(" F2 Loopback Test")
|
| 382 |
+
print("=" * 60)
|
| 383 |
+
passed = 0
|
| 384 |
+
total = 0
|
| 385 |
+
|
| 386 |
+
# VERSION
|
| 387 |
+
total += 1
|
| 388 |
+
ver = chip.read_version()
|
| 389 |
+
if ver == 0xF2020310:
|
| 390 |
+
print(f" [PASS] VERSION = 0x{ver:08X}")
|
| 391 |
+
passed += 1
|
| 392 |
+
else:
|
| 393 |
+
print(f" [FAIL] VERSION = 0x{ver:08X} (expected 0xF2020310)")
|
| 394 |
+
|
| 395 |
+
# CORE_COUNT
|
| 396 |
+
total += 1
|
| 397 |
+
cores = chip.read_core_count()
|
| 398 |
+
if cores == 16:
|
| 399 |
+
print(f" [PASS] CORE_COUNT = {cores}")
|
| 400 |
+
passed += 1
|
| 401 |
+
else:
|
| 402 |
+
print(f" [FAIL] CORE_COUNT = {cores} (expected 16)")
|
| 403 |
+
|
| 404 |
+
# SCRATCH
|
| 405 |
+
total += 1
|
| 406 |
+
ok, val = chip.test_scratch(0xDEADBEEF)
|
| 407 |
+
if ok:
|
| 408 |
+
print(f" [PASS] SCRATCH loopback = 0x{val:08X}")
|
| 409 |
+
passed += 1
|
| 410 |
+
else:
|
| 411 |
+
print(f" [FAIL] SCRATCH = 0x{val:08X} (expected 0xDEADBEEF)")
|
| 412 |
+
|
| 413 |
+
total += 1
|
| 414 |
+
ok, val = chip.test_scratch(0x12345678)
|
| 415 |
+
if ok:
|
| 416 |
+
print(f" [PASS] SCRATCH loopback = 0x{val:08X}")
|
| 417 |
+
passed += 1
|
| 418 |
+
else:
|
| 419 |
+
print(f" [FAIL] SCRATCH = 0x{val:08X} (expected 0x12345678)")
|
| 420 |
+
|
| 421 |
+
# STATUS command
|
| 422 |
+
total += 1
|
| 423 |
+
try:
|
| 424 |
+
state, ts = chip.status()
|
| 425 |
+
print(f" [PASS] STATUS: state={state}, ts_count={ts}")
|
| 426 |
+
passed += 1
|
| 427 |
+
except Exception as e:
|
| 428 |
+
print(f" [FAIL] STATUS: {e}")
|
| 429 |
+
|
| 430 |
+
print(f"\n Result: {passed}/{total} passed")
|
| 431 |
+
print("=" * 60)
|
| 432 |
+
return passed == total
|
| 433 |
+
|
| 434 |
+
|
| 435 |
+
def test_spike(chip):
|
| 436 |
+
"""Program a 2-neuron chain, inject spike, verify propagation."""
|
| 437 |
+
print("\n" + "=" * 60)
|
| 438 |
+
print(" F2 Spike Test")
|
| 439 |
+
print("=" * 60)
|
| 440 |
+
|
| 441 |
+
# Soft reset to clear any previous state
|
| 442 |
+
chip.soft_reset()
|
| 443 |
+
chip._pool_alloc = {}
|
| 444 |
+
|
| 445 |
+
state, ts = chip.status()
|
| 446 |
+
print(f" Initial: state={state}, ts={ts}")
|
| 447 |
+
|
| 448 |
+
# Setup neurons (FPGA BRAM init workaround)
|
| 449 |
+
print(" Setting up neurons (is_root=1, parent_ptr=1023)...")
|
| 450 |
+
chip.setup_neuron(0, 0, threshold=1000)
|
| 451 |
+
chip.setup_neuron(0, 1, threshold=1000)
|
| 452 |
+
|
| 453 |
+
# Program: Core 0, N0→N1 (w=1200 > threshold=1000)
|
| 454 |
+
print(" Programming: N0 -> N1 (w=1200)")
|
| 455 |
+
chip.prog_conn(0, 0, [(1, 1200)])
|
| 456 |
+
|
| 457 |
+
# Stimulate N0
|
| 458 |
+
print(" Stimulating: Core 0, N0, current=1200")
|
| 459 |
+
chip.stimulus(core=0, neuron=0, current=1200)
|
| 460 |
+
|
| 461 |
+
# Run 5 timesteps
|
| 462 |
+
print(" Running 5 timesteps...")
|
| 463 |
+
t0 = time.monotonic()
|
| 464 |
+
spikes = chip.run(5)
|
| 465 |
+
dt = time.monotonic() - t0
|
| 466 |
+
print(f" Result: {spikes} spikes in {dt*1000:.1f} ms")
|
| 467 |
+
|
| 468 |
+
if spikes > 0:
|
| 469 |
+
print(" [PASS] Spike propagation detected")
|
| 470 |
+
else:
|
| 471 |
+
print(" [FAIL] No spikes (expected > 0)")
|
| 472 |
+
|
| 473 |
+
print("=" * 60)
|
| 474 |
+
return spikes > 0
|
| 475 |
+
|
| 476 |
+
|
| 477 |
+
def demo(chip):
|
| 478 |
+
"""Run full demo: program cross-core spike chain, run, observe."""
|
| 479 |
+
print("\n" + "=" * 60)
|
| 480 |
+
print(" Neuromorphic Chip F2 Demo (16-core, PCIe MMIO)")
|
| 481 |
+
print("=" * 60)
|
| 482 |
+
|
| 483 |
+
chip.soft_reset()
|
| 484 |
+
chip._pool_alloc = {}
|
| 485 |
+
|
| 486 |
+
state, ts = chip.status()
|
| 487 |
+
print(f"\nInitial status: state={state}, timesteps={ts}")
|
| 488 |
+
|
| 489 |
+
# Setup neurons (FPGA BRAM init workaround)
|
| 490 |
+
print("\nSetting up neurons (is_root=1, parent_ptr=1023)...")
|
| 491 |
+
neurons = [(0, i, 1000) for i in range(4)] + [(1, i, 1000) for i in range(3)]
|
| 492 |
+
chip.setup_neurons(neurons)
|
| 493 |
+
print(f" {len(neurons)} neurons configured")
|
| 494 |
+
|
| 495 |
+
# Program a spike chain: Core 0, N0→N1→N2→N3
|
| 496 |
+
print("\nProgramming spike chain: Core 0, N0 -> N1 -> N2 -> N3")
|
| 497 |
+
chip.prog_conn(0, 0, [(1, 1200)])
|
| 498 |
+
print(" N0 -> N1 (w=1200) OK")
|
| 499 |
+
chip.prog_conn(0, 1, [(2, 1200)])
|
| 500 |
+
print(" N1 -> N2 (w=1200) OK")
|
| 501 |
+
chip.prog_conn(0, 2, [(3, 1200)])
|
| 502 |
+
print(" N2 -> N3 (w=1200) OK")
|
| 503 |
+
|
| 504 |
+
# Cross-core route: Core 0 N3 → Core 1 N0
|
| 505 |
+
print("\nProgramming cross-core route: C0:N3 -> C1:N0")
|
| 506 |
+
chip.prog_route(src_core=0, src_neuron=3,
|
| 507 |
+
dest_core=1, dest_neuron=0, weight=1200)
|
| 508 |
+
print(" Route OK")
|
| 509 |
+
|
| 510 |
+
# Core 1 chain
|
| 511 |
+
print("Programming Core 1 chain: N0 -> N1 -> N2")
|
| 512 |
+
chip.prog_conn(1, 0, [(1, 1200)])
|
| 513 |
+
chip.prog_conn(1, 1, [(2, 1200)])
|
| 514 |
+
print(" Core 1 chain OK")
|
| 515 |
+
|
| 516 |
+
# Stimulate and run
|
| 517 |
+
print("\nApplying stimulus: Core 0, N0, current=1200")
|
| 518 |
+
chip.stimulus(core=0, neuron=0, current=1200)
|
| 519 |
+
|
| 520 |
+
print("Running 20 timesteps...")
|
| 521 |
+
t0 = time.monotonic()
|
| 522 |
+
spikes = chip.run(20)
|
| 523 |
+
dt = time.monotonic() - t0
|
| 524 |
+
print(f" Done! {spikes} spikes in {dt*1000:.1f} ms")
|
| 525 |
+
print(f" Throughput: {20/dt:.0f} timesteps/sec")
|
| 526 |
+
|
| 527 |
+
# Run more without stimulus
|
| 528 |
+
print("\nRunning 10 more timesteps (no stimulus)...")
|
| 529 |
+
spikes2 = chip.run(10)
|
| 530 |
+
print(f" {spikes2} spikes (should be 0 - no input)")
|
| 531 |
+
|
| 532 |
+
# Final status
|
| 533 |
+
state, ts = chip.status()
|
| 534 |
+
print(f"\nFinal status: state={state}, timesteps={ts}")
|
| 535 |
+
|
| 536 |
+
print("\n" + "=" * 60)
|
| 537 |
+
print(" Demo complete! The chip is alive on F2.")
|
| 538 |
+
print("=" * 60)
|
| 539 |
+
|
| 540 |
+
|
| 541 |
+
def main():
|
| 542 |
+
parser = argparse.ArgumentParser(
|
| 543 |
+
description="Neuromorphic Chip F2 Host Controller (PCIe MMIO)")
|
| 544 |
+
parser.add_argument("--transport", choices=["mmap", "fpga_mgmt"],
|
| 545 |
+
default="fpga_mgmt", help="MMIO transport (default: fpga_mgmt)")
|
| 546 |
+
parser.add_argument("--slot", type=int, default=0,
|
| 547 |
+
help="FPGA slot (default: 0)")
|
| 548 |
+
parser.add_argument("--demo", action="store_true",
|
| 549 |
+
help="Run full demo")
|
| 550 |
+
parser.add_argument("--status", action="store_true",
|
| 551 |
+
help="Query chip status")
|
| 552 |
+
parser.add_argument("--test-loopback", action="store_true",
|
| 553 |
+
help="Run loopback connectivity test")
|
| 554 |
+
parser.add_argument("--test-spike", action="store_true",
|
| 555 |
+
help="Run spike propagation test")
|
| 556 |
+
args = parser.parse_args()
|
| 557 |
+
|
| 558 |
+
chip = F2NeuromorphicChip(transport=args.transport, slot=args.slot)
|
| 559 |
+
|
| 560 |
+
try:
|
| 561 |
+
if args.test_loopback:
|
| 562 |
+
ok = test_loopback(chip)
|
| 563 |
+
sys.exit(0 if ok else 1)
|
| 564 |
+
elif args.test_spike:
|
| 565 |
+
ok = test_spike(chip)
|
| 566 |
+
sys.exit(0 if ok else 1)
|
| 567 |
+
elif args.status:
|
| 568 |
+
state, ts = chip.status()
|
| 569 |
+
print(f"State: {state} ({'idle' if state == 0 else 'busy'})")
|
| 570 |
+
print(f"Timestep count: {ts}")
|
| 571 |
+
elif args.demo:
|
| 572 |
+
demo(chip)
|
| 573 |
+
else:
|
| 574 |
+
print("No command specified. Use --demo, --status, --test-loopback, or --test-spike")
|
| 575 |
+
finally:
|
| 576 |
+
chip.close()
|
| 577 |
+
|
| 578 |
+
|
| 579 |
+
if __name__ == "__main__":
|
| 580 |
+
main()
|
fpga/fpga_top.v
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ============================================================================
|
| 2 |
+
// FPGA Top
|
| 3 |
+
// ============================================================================
|
| 4 |
+
//
|
| 5 |
+
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 6 |
+
// Company No. 17054540 — UK Patent Application No. 2602902.6
|
| 7 |
+
//
|
| 8 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
| 9 |
+
// you may not use this file except in compliance with the License.
|
| 10 |
+
// You may obtain a copy of the License at
|
| 11 |
+
//
|
| 12 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
| 13 |
+
//
|
| 14 |
+
// Unless required by applicable law or agreed to in writing, software
|
| 15 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
| 16 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 17 |
+
// See the License for the specific language governing permissions and
|
| 18 |
+
// limitations under the License.
|
| 19 |
+
// ============================================================================
|
| 20 |
+
|
| 21 |
+
module fpga_top #(
|
| 22 |
+
parameter CLK_FREQ = 100_000_000,
|
| 23 |
+
parameter BAUD = 115200,
|
| 24 |
+
parameter POR_BITS = 20
|
| 25 |
+
)(
|
| 26 |
+
input wire clk,
|
| 27 |
+
input wire btn_rst, // Active-high
|
| 28 |
+
input wire uart_rxd,
|
| 29 |
+
output wire uart_txd,
|
| 30 |
+
output reg [3:0] led
|
| 31 |
+
);
|
| 32 |
+
|
| 33 |
+
reg [POR_BITS-1:0] debounce_cnt;
|
| 34 |
+
reg btn_sync1, btn_sync2;
|
| 35 |
+
reg btn_stable;
|
| 36 |
+
wire rst_n;
|
| 37 |
+
|
| 38 |
+
always @(posedge clk) begin
|
| 39 |
+
btn_sync1 <= btn_rst;
|
| 40 |
+
btn_sync2 <= btn_sync1;
|
| 41 |
+
end
|
| 42 |
+
|
| 43 |
+
always @(posedge clk) begin
|
| 44 |
+
if (btn_sync2 != btn_stable) begin
|
| 45 |
+
debounce_cnt <= debounce_cnt + 1;
|
| 46 |
+
if (debounce_cnt == {POR_BITS{1'b1}}) begin
|
| 47 |
+
btn_stable <= btn_sync2;
|
| 48 |
+
debounce_cnt <= 0;
|
| 49 |
+
end
|
| 50 |
+
end else begin
|
| 51 |
+
debounce_cnt <= 0;
|
| 52 |
+
end
|
| 53 |
+
end
|
| 54 |
+
|
| 55 |
+
reg [POR_BITS-1:0] por_cnt;
|
| 56 |
+
reg por_done;
|
| 57 |
+
|
| 58 |
+
always @(posedge clk) begin
|
| 59 |
+
if (!por_done) begin
|
| 60 |
+
por_cnt <= por_cnt + 1;
|
| 61 |
+
if (por_cnt == {POR_BITS{1'b1}})
|
| 62 |
+
por_done <= 1;
|
| 63 |
+
end
|
| 64 |
+
end
|
| 65 |
+
|
| 66 |
+
initial begin
|
| 67 |
+
por_cnt = 0;
|
| 68 |
+
por_done = 0;
|
| 69 |
+
btn_stable = 0;
|
| 70 |
+
debounce_cnt = 0;
|
| 71 |
+
end
|
| 72 |
+
|
| 73 |
+
assign rst_n = por_done & ~btn_stable;
|
| 74 |
+
|
| 75 |
+
neuromorphic_top #(
|
| 76 |
+
.CLK_FREQ (CLK_FREQ),
|
| 77 |
+
.BAUD (BAUD),
|
| 78 |
+
.NUM_CORES (4),
|
| 79 |
+
.CORE_ID_BITS (2),
|
| 80 |
+
.NUM_NEURONS (256),
|
| 81 |
+
.NEURON_BITS (8),
|
| 82 |
+
.DATA_WIDTH (16),
|
| 83 |
+
.POOL_DEPTH (8192),
|
| 84 |
+
.POOL_ADDR_BITS (13),
|
| 85 |
+
.COUNT_BITS (6),
|
| 86 |
+
.REV_FANIN (16),
|
| 87 |
+
.REV_SLOT_BITS (4),
|
| 88 |
+
.THRESHOLD (16'sd1000),
|
| 89 |
+
.LEAK_RATE (16'sd3),
|
| 90 |
+
.REFRAC_CYCLES (3),
|
| 91 |
+
.ROUTE_FANOUT (8),
|
| 92 |
+
.ROUTE_SLOT_BITS (3),
|
| 93 |
+
.GLOBAL_ROUTE_SLOTS (4),
|
| 94 |
+
.GLOBAL_ROUTE_SLOT_BITS (2),
|
| 95 |
+
.CHIP_LINK_EN (0),
|
| 96 |
+
.NOC_MODE (0),
|
| 97 |
+
.MESH_X (2),
|
| 98 |
+
.MESH_Y (2)
|
| 99 |
+
) u_neuromorphic (
|
| 100 |
+
.clk (clk),
|
| 101 |
+
.rst_n (rst_n),
|
| 102 |
+
.uart_rxd (uart_rxd),
|
| 103 |
+
.uart_txd (uart_txd),
|
| 104 |
+
.link_tx_data (),
|
| 105 |
+
.link_tx_valid (),
|
| 106 |
+
.link_tx_ready (1'b0),
|
| 107 |
+
.link_rx_data (8'd0),
|
| 108 |
+
.link_rx_valid (1'b0),
|
| 109 |
+
.link_rx_ready (),
|
| 110 |
+
.rx_data_ext (8'd0),
|
| 111 |
+
.rx_valid_ext (1'b0),
|
| 112 |
+
.tx_data_ext (),
|
| 113 |
+
.tx_valid_ext (),
|
| 114 |
+
.tx_ready_ext (1'b0)
|
| 115 |
+
);
|
| 116 |
+
|
| 117 |
+
reg [25:0] heartbeat_cnt;
|
| 118 |
+
always @(posedge clk or negedge rst_n) begin
|
| 119 |
+
if (!rst_n)
|
| 120 |
+
heartbeat_cnt <= 0;
|
| 121 |
+
else
|
| 122 |
+
heartbeat_cnt <= heartbeat_cnt + 1;
|
| 123 |
+
end
|
| 124 |
+
|
| 125 |
+
reg [22:0] rx_blink_cnt;
|
| 126 |
+
wire rx_activity;
|
| 127 |
+
reg rxd_prev;
|
| 128 |
+
always @(posedge clk or negedge rst_n) begin
|
| 129 |
+
if (!rst_n) begin
|
| 130 |
+
rxd_prev <= 1;
|
| 131 |
+
rx_blink_cnt <= 0;
|
| 132 |
+
end else begin
|
| 133 |
+
rxd_prev <= uart_rxd;
|
| 134 |
+
if (rxd_prev && !uart_rxd)
|
| 135 |
+
rx_blink_cnt <= {23{1'b1}};
|
| 136 |
+
else if (rx_blink_cnt != 0)
|
| 137 |
+
rx_blink_cnt <= rx_blink_cnt - 1;
|
| 138 |
+
end
|
| 139 |
+
end
|
| 140 |
+
assign rx_activity = (rx_blink_cnt != 0);
|
| 141 |
+
|
| 142 |
+
reg txd_prev;
|
| 143 |
+
reg [22:0] tx_blink_cnt;
|
| 144 |
+
always @(posedge clk or negedge rst_n) begin
|
| 145 |
+
if (!rst_n) begin
|
| 146 |
+
txd_prev <= 1;
|
| 147 |
+
tx_blink_cnt <= 0;
|
| 148 |
+
end else begin
|
| 149 |
+
txd_prev <= uart_txd;
|
| 150 |
+
if (txd_prev && !uart_txd)
|
| 151 |
+
tx_blink_cnt <= {23{1'b1}};
|
| 152 |
+
else if (tx_blink_cnt != 0)
|
| 153 |
+
tx_blink_cnt <= tx_blink_cnt - 1;
|
| 154 |
+
end
|
| 155 |
+
end
|
| 156 |
+
|
| 157 |
+
reg [22:0] activity_cnt;
|
| 158 |
+
always @(posedge clk or negedge rst_n) begin
|
| 159 |
+
if (!rst_n)
|
| 160 |
+
activity_cnt <= 0;
|
| 161 |
+
else if (rx_activity || tx_blink_cnt != 0)
|
| 162 |
+
activity_cnt <= {23{1'b1}};
|
| 163 |
+
else if (activity_cnt != 0)
|
| 164 |
+
activity_cnt <= activity_cnt - 1;
|
| 165 |
+
end
|
| 166 |
+
|
| 167 |
+
always @(*) begin
|
| 168 |
+
led[0] = heartbeat_cnt[25];
|
| 169 |
+
led[1] = rx_activity;
|
| 170 |
+
led[2] = (tx_blink_cnt != 0);
|
| 171 |
+
led[3] = (activity_cnt != 0);
|
| 172 |
+
end
|
| 173 |
+
|
| 174 |
+
endmodule
|
fpga/host.py
ADDED
|
@@ -0,0 +1,418 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Neuromorphic Chip Host Controller
|
| 3 |
+
==================================
|
| 4 |
+
Python script to communicate with the neuromorphic FPGA over UART.
|
| 5 |
+
|
| 6 |
+
v1.0 Loihi parity: CSR pool, multicast routing, noise, dual traces,
|
| 7 |
+
axon delays, synapse formats, microcode learning, hierarchical routing.
|
| 8 |
+
|
| 9 |
+
Usage:
|
| 10 |
+
python fpga/host.py --port COM3 # Windows
|
| 11 |
+
python fpga/host.py --port /dev/ttyUSB1 # Linux
|
| 12 |
+
|
| 13 |
+
Commands:
|
| 14 |
+
python fpga/host.py --port COM3 --demo # Run demo (program chain, stimulate, run)
|
| 15 |
+
python fpga/host.py --port COM3 --status # Query chip status
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
import serial
|
| 19 |
+
import struct
|
| 20 |
+
import time
|
| 21 |
+
import argparse
|
| 22 |
+
import sys
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class NeuromorphicChip:
|
| 26 |
+
"""Interface to the neuromorphic FPGA over UART."""
|
| 27 |
+
|
| 28 |
+
# Command opcodes (Phase 13a protocol)
|
| 29 |
+
CMD_PROG_POOL = 0x01
|
| 30 |
+
CMD_PROG_ROUTE = 0x02
|
| 31 |
+
CMD_STIMULUS = 0x03
|
| 32 |
+
CMD_RUN = 0x04
|
| 33 |
+
CMD_STATUS = 0x05
|
| 34 |
+
CMD_LEARN_CFG = 0x06
|
| 35 |
+
CMD_PROG_NEURON = 0x07
|
| 36 |
+
CMD_PROG_INDEX = 0x08
|
| 37 |
+
CMD_REWARD = 0x09
|
| 38 |
+
CMD_PROG_DELAY = 0x0A
|
| 39 |
+
CMD_PROG_LEARN = 0x0C
|
| 40 |
+
CMD_PROG_GLOBAL_ROUTE = 0x10
|
| 41 |
+
|
| 42 |
+
# Parameter IDs for CMD_PROG_NEURON
|
| 43 |
+
PARAM_THRESHOLD = 0
|
| 44 |
+
PARAM_LEAK = 1
|
| 45 |
+
PARAM_RESTING = 2
|
| 46 |
+
PARAM_REFRAC = 3
|
| 47 |
+
PARAM_DEND_THRESHOLD = 4
|
| 48 |
+
|
| 49 |
+
# Response codes
|
| 50 |
+
RESP_ACK = 0xAA
|
| 51 |
+
RESP_DONE = 0xDD
|
| 52 |
+
|
| 53 |
+
def __init__(self, port, baud=115200, timeout=10):
|
| 54 |
+
self.ser = serial.Serial(port, baud, timeout=timeout)
|
| 55 |
+
time.sleep(0.1)
|
| 56 |
+
self.ser.reset_input_buffer()
|
| 57 |
+
self._pool_alloc = {} # per-core pool bump allocator: core -> next_addr
|
| 58 |
+
print(f"Connected to {port} @ {baud} baud")
|
| 59 |
+
|
| 60 |
+
def close(self):
|
| 61 |
+
self.ser.close()
|
| 62 |
+
|
| 63 |
+
def _send(self, data):
|
| 64 |
+
"""Send raw bytes."""
|
| 65 |
+
self.ser.write(bytes(data))
|
| 66 |
+
|
| 67 |
+
def _recv(self, n):
|
| 68 |
+
"""Receive exactly n bytes."""
|
| 69 |
+
data = self.ser.read(n)
|
| 70 |
+
if len(data) != n:
|
| 71 |
+
raise TimeoutError(f"Expected {n} bytes, got {len(data)}")
|
| 72 |
+
return data
|
| 73 |
+
|
| 74 |
+
def _wait_ack(self):
|
| 75 |
+
"""Wait for ACK (0xAA) response."""
|
| 76 |
+
resp = self._recv(1)
|
| 77 |
+
if resp[0] != self.RESP_ACK:
|
| 78 |
+
raise ValueError(f"Expected ACK (0xAA), got 0x{resp[0]:02X}")
|
| 79 |
+
|
| 80 |
+
def _alloc_pool(self, core, count=1):
|
| 81 |
+
"""Allocate pool entries for a core (bump allocator)."""
|
| 82 |
+
if core not in self._pool_alloc:
|
| 83 |
+
self._pool_alloc[core] = 0
|
| 84 |
+
addr = self._pool_alloc[core]
|
| 85 |
+
self._pool_alloc[core] += count
|
| 86 |
+
return addr
|
| 87 |
+
|
| 88 |
+
def prog_pool(self, core, pool_addr, src, target, weight, comp=0):
|
| 89 |
+
"""Program a connection pool entry.
|
| 90 |
+
|
| 91 |
+
Args:
|
| 92 |
+
core: Core ID
|
| 93 |
+
pool_addr: Pool address (0 to POOL_DEPTH-1)
|
| 94 |
+
src: Source neuron (for reverse table, 0-1023)
|
| 95 |
+
target: Target neuron (0-1023)
|
| 96 |
+
weight: Signed 16-bit weight
|
| 97 |
+
comp: Compartment ID (0=soma, 1-3=dendrites)
|
| 98 |
+
"""
|
| 99 |
+
w = weight & 0xFFFF
|
| 100 |
+
# Pack flags: {comp[1:0], src[9:8], target[9:8], 2'b00}
|
| 101 |
+
flags = ((comp & 0x3) << 6) | (((src >> 8) & 0x3) << 4) | (((target >> 8) & 0x3) << 2)
|
| 102 |
+
self._send([
|
| 103 |
+
self.CMD_PROG_POOL,
|
| 104 |
+
core & 0xFF,
|
| 105 |
+
(pool_addr >> 8) & 0xFF, pool_addr & 0xFF,
|
| 106 |
+
flags,
|
| 107 |
+
src & 0xFF,
|
| 108 |
+
target & 0xFF,
|
| 109 |
+
(w >> 8) & 0xFF, w & 0xFF
|
| 110 |
+
])
|
| 111 |
+
self._wait_ack()
|
| 112 |
+
|
| 113 |
+
def prog_index(self, core, neuron, base_addr, count, format=0, base_target=0):
|
| 114 |
+
"""Program a CSR index entry (base_addr + count for a neuron).
|
| 115 |
+
|
| 116 |
+
Args:
|
| 117 |
+
core: Core ID
|
| 118 |
+
neuron: Neuron ID (0-1023)
|
| 119 |
+
base_addr: Pool base address
|
| 120 |
+
count: Number of connections
|
| 121 |
+
format: Synapse format (0=sparse, 1=dense, 2=pop)
|
| 122 |
+
base_target: Base target neuron for dense/pop formats
|
| 123 |
+
"""
|
| 124 |
+
self._send([
|
| 125 |
+
self.CMD_PROG_INDEX,
|
| 126 |
+
core & 0xFF,
|
| 127 |
+
(neuron >> 8) & 0xFF, neuron & 0xFF,
|
| 128 |
+
(base_addr >> 8) & 0xFF, base_addr & 0xFF,
|
| 129 |
+
(count >> 8) & 0xFF, count & 0xFF,
|
| 130 |
+
((format & 0x3) << 6) | ((base_target >> 8) & 0x3),
|
| 131 |
+
base_target & 0xFF,
|
| 132 |
+
])
|
| 133 |
+
self._wait_ack()
|
| 134 |
+
|
| 135 |
+
def prog_conn(self, core, src, targets_weights, comp=0):
|
| 136 |
+
"""High-level: program connections for a source neuron using pool allocator.
|
| 137 |
+
|
| 138 |
+
Args:
|
| 139 |
+
core: Core ID
|
| 140 |
+
src: Source neuron
|
| 141 |
+
targets_weights: List of (target, weight) tuples
|
| 142 |
+
comp: Compartment ID (default 0=soma)
|
| 143 |
+
"""
|
| 144 |
+
if not targets_weights:
|
| 145 |
+
return
|
| 146 |
+
base = self._alloc_pool(core, len(targets_weights))
|
| 147 |
+
for i, (target, weight) in enumerate(targets_weights):
|
| 148 |
+
self.prog_pool(core, base + i, src, target, weight, comp)
|
| 149 |
+
self.prog_index(core, src, base, len(targets_weights))
|
| 150 |
+
|
| 151 |
+
def prog_route(self, src_core, src_neuron, dest_core, dest_neuron, weight, slot=0):
|
| 152 |
+
"""Program an inter-core route (multicast slot).
|
| 153 |
+
|
| 154 |
+
Args:
|
| 155 |
+
src_core: Source core ID
|
| 156 |
+
src_neuron: Source neuron (0-1023)
|
| 157 |
+
dest_core: Destination core ID
|
| 158 |
+
dest_neuron: Destination neuron (0-1023)
|
| 159 |
+
weight: Signed 16-bit weight
|
| 160 |
+
slot: Route slot (0-7) for multicast fanout
|
| 161 |
+
"""
|
| 162 |
+
w = weight & 0xFFFF
|
| 163 |
+
self._send([
|
| 164 |
+
self.CMD_PROG_ROUTE,
|
| 165 |
+
src_core & 0xFF,
|
| 166 |
+
(src_neuron >> 8) & 0xFF, src_neuron & 0xFF,
|
| 167 |
+
slot & 0xFF,
|
| 168 |
+
dest_core & 0xFF,
|
| 169 |
+
(dest_neuron >> 8) & 0xFF, dest_neuron & 0xFF,
|
| 170 |
+
(w >> 8) & 0xFF, w & 0xFF
|
| 171 |
+
])
|
| 172 |
+
self._wait_ack()
|
| 173 |
+
|
| 174 |
+
def stimulus(self, core, neuron, current):
|
| 175 |
+
"""Set external stimulus current for next RUN.
|
| 176 |
+
|
| 177 |
+
Args:
|
| 178 |
+
core: Target core ID
|
| 179 |
+
neuron: Target neuron (0-1023)
|
| 180 |
+
current: Signed 16-bit current value
|
| 181 |
+
"""
|
| 182 |
+
c = current & 0xFFFF
|
| 183 |
+
self._send([
|
| 184 |
+
self.CMD_STIMULUS,
|
| 185 |
+
core & 0xFF,
|
| 186 |
+
(neuron >> 8) & 0xFF, neuron & 0xFF,
|
| 187 |
+
(c >> 8) & 0xFF, c & 0xFF
|
| 188 |
+
])
|
| 189 |
+
self._wait_ack()
|
| 190 |
+
|
| 191 |
+
def run(self, timesteps):
|
| 192 |
+
"""Run the mesh for N timesteps.
|
| 193 |
+
|
| 194 |
+
Args:
|
| 195 |
+
timesteps: Number of timesteps (1-65535)
|
| 196 |
+
|
| 197 |
+
Returns:
|
| 198 |
+
Number of spikes that occurred during the run.
|
| 199 |
+
"""
|
| 200 |
+
ts = timesteps & 0xFFFF
|
| 201 |
+
self._send([
|
| 202 |
+
self.CMD_RUN,
|
| 203 |
+
(ts >> 8) & 0xFF, ts & 0xFF
|
| 204 |
+
])
|
| 205 |
+
resp = self._recv(5)
|
| 206 |
+
if resp[0] != self.RESP_DONE:
|
| 207 |
+
raise ValueError(f"Expected DONE (0xDD), got 0x{resp[0]:02X}")
|
| 208 |
+
spikes = struct.unpack('>I', resp[1:5])[0]
|
| 209 |
+
return spikes
|
| 210 |
+
|
| 211 |
+
def reward(self, value):
|
| 212 |
+
"""Set reward value for 3-factor learning.
|
| 213 |
+
|
| 214 |
+
Args:
|
| 215 |
+
value: Signed 16-bit reward (0 = no reward)
|
| 216 |
+
"""
|
| 217 |
+
v = value & 0xFFFF
|
| 218 |
+
self._send([
|
| 219 |
+
self.CMD_REWARD,
|
| 220 |
+
(v >> 8) & 0xFF, v & 0xFF
|
| 221 |
+
])
|
| 222 |
+
self._wait_ack()
|
| 223 |
+
|
| 224 |
+
def set_learning(self, learn_enable, graded_enable=False, dendritic_enable=False,
|
| 225 |
+
async_enable=False, threefactor_enable=False, noise_enable=False):
|
| 226 |
+
"""Configure learning mode flags."""
|
| 227 |
+
flags = ((int(learn_enable) & 1)
|
| 228 |
+
| ((int(graded_enable) & 1) << 1)
|
| 229 |
+
| ((int(dendritic_enable) & 1) << 2)
|
| 230 |
+
| ((int(async_enable) & 1) << 3)
|
| 231 |
+
| ((int(threefactor_enable) & 1) << 4)
|
| 232 |
+
| ((int(noise_enable) & 1) << 5))
|
| 233 |
+
self._send([self.CMD_LEARN_CFG, flags])
|
| 234 |
+
self._wait_ack()
|
| 235 |
+
|
| 236 |
+
def prog_delay(self, core, pool_addr, delay):
|
| 237 |
+
"""Program an axon delay for a pool entry (P17).
|
| 238 |
+
|
| 239 |
+
Args:
|
| 240 |
+
core: Core ID
|
| 241 |
+
pool_addr: Pool address of the connection
|
| 242 |
+
delay: Delay in timesteps (0-63)
|
| 243 |
+
"""
|
| 244 |
+
self._send([
|
| 245 |
+
self.CMD_PROG_DELAY,
|
| 246 |
+
core & 0xFF,
|
| 247 |
+
(pool_addr >> 8) & 0xFF, pool_addr & 0xFF,
|
| 248 |
+
delay & 0x3F,
|
| 249 |
+
])
|
| 250 |
+
self._wait_ack()
|
| 251 |
+
|
| 252 |
+
def prog_learn(self, core, addr, instr):
|
| 253 |
+
"""Program a microcode learning instruction (P19).
|
| 254 |
+
|
| 255 |
+
Args:
|
| 256 |
+
core: Core ID
|
| 257 |
+
addr: Instruction address (0-63)
|
| 258 |
+
instr: 32-bit instruction word
|
| 259 |
+
"""
|
| 260 |
+
self._send([
|
| 261 |
+
self.CMD_PROG_LEARN,
|
| 262 |
+
core & 0xFF,
|
| 263 |
+
addr & 0x3F,
|
| 264 |
+
(instr >> 24) & 0xFF,
|
| 265 |
+
(instr >> 16) & 0xFF,
|
| 266 |
+
(instr >> 8) & 0xFF,
|
| 267 |
+
instr & 0xFF,
|
| 268 |
+
])
|
| 269 |
+
self._wait_ack()
|
| 270 |
+
|
| 271 |
+
def prog_global_route(self, src_core, src_neuron, dest_core, dest_neuron,
|
| 272 |
+
weight, slot=0):
|
| 273 |
+
"""Program an inter-cluster global route (P20).
|
| 274 |
+
|
| 275 |
+
Args:
|
| 276 |
+
src_core: Source core ID
|
| 277 |
+
src_neuron: Source neuron (0-1023)
|
| 278 |
+
dest_core: Destination core ID
|
| 279 |
+
dest_neuron: Destination neuron (0-1023)
|
| 280 |
+
weight: Signed 16-bit weight
|
| 281 |
+
slot: Route slot (0-3)
|
| 282 |
+
"""
|
| 283 |
+
w = weight & 0xFFFF
|
| 284 |
+
self._send([
|
| 285 |
+
self.CMD_PROG_GLOBAL_ROUTE,
|
| 286 |
+
src_core & 0xFF,
|
| 287 |
+
(src_neuron >> 8) & 0xFF, src_neuron & 0xFF,
|
| 288 |
+
slot & 0xFF,
|
| 289 |
+
dest_core & 0xFF,
|
| 290 |
+
(dest_neuron >> 8) & 0xFF, dest_neuron & 0xFF,
|
| 291 |
+
(w >> 8) & 0xFF, w & 0xFF,
|
| 292 |
+
])
|
| 293 |
+
self._wait_ack()
|
| 294 |
+
|
| 295 |
+
def async_mode(self, enable=True):
|
| 296 |
+
"""Enable or disable async event-driven mode."""
|
| 297 |
+
self.set_learning(False, False, False, async_enable=enable)
|
| 298 |
+
|
| 299 |
+
def prog_neuron(self, core, neuron, param_id, value):
|
| 300 |
+
"""Program a per-neuron parameter.
|
| 301 |
+
|
| 302 |
+
Args:
|
| 303 |
+
core: Core ID
|
| 304 |
+
neuron: Neuron ID (0-1023)
|
| 305 |
+
param_id: Parameter (PARAM_THRESHOLD=0, PARAM_LEAK=1, etc.)
|
| 306 |
+
value: Signed 16-bit value
|
| 307 |
+
"""
|
| 308 |
+
v = value & 0xFFFF
|
| 309 |
+
self._send([
|
| 310 |
+
self.CMD_PROG_NEURON,
|
| 311 |
+
core & 0xFF,
|
| 312 |
+
(neuron >> 8) & 0xFF, neuron & 0xFF,
|
| 313 |
+
param_id & 0xFF,
|
| 314 |
+
(v >> 8) & 0xFF, v & 0xFF
|
| 315 |
+
])
|
| 316 |
+
self._wait_ack()
|
| 317 |
+
|
| 318 |
+
def status(self):
|
| 319 |
+
"""Query chip status.
|
| 320 |
+
|
| 321 |
+
Returns:
|
| 322 |
+
Tuple of (state, timestep_count)
|
| 323 |
+
"""
|
| 324 |
+
self._send([self.CMD_STATUS])
|
| 325 |
+
resp = self._recv(5)
|
| 326 |
+
state = resp[0]
|
| 327 |
+
ts_count = struct.unpack('>I', resp[1:5])[0]
|
| 328 |
+
return state, ts_count
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
def demo(chip):
|
| 332 |
+
"""Run a demonstration: program a spike chain and observe propagation."""
|
| 333 |
+
|
| 334 |
+
print("\n" + "=" * 60)
|
| 335 |
+
print(" Neuromorphic Chip Demo (Phase 13b: CSR + Multicast)")
|
| 336 |
+
print("=" * 60)
|
| 337 |
+
|
| 338 |
+
state, ts = chip.status()
|
| 339 |
+
print(f"\nInitial status: state={state}, timesteps={ts}")
|
| 340 |
+
|
| 341 |
+
# Program a spike chain: Core 0, N0→N1→N2→N3
|
| 342 |
+
print("\nProgramming spike chain: Core 0, N0 -> N1 -> N2 -> N3")
|
| 343 |
+
chip.prog_conn(0, 0, [(1, 1200)])
|
| 344 |
+
print(" N0 -> N1 (w=1200) OK")
|
| 345 |
+
chip.prog_conn(0, 1, [(2, 1200)])
|
| 346 |
+
print(" N1 -> N2 (w=1200) OK")
|
| 347 |
+
chip.prog_conn(0, 2, [(3, 1200)])
|
| 348 |
+
print(" N2 -> N3 (w=1200) OK")
|
| 349 |
+
|
| 350 |
+
# Program cross-core route: Core 0 N3 → Core 1 N0
|
| 351 |
+
print("\nProgramming cross-core route: C0:N3 -> C1:N0")
|
| 352 |
+
chip.prog_route(src_core=0, src_neuron=3,
|
| 353 |
+
dest_core=1, dest_neuron=0, weight=1200)
|
| 354 |
+
print(" Route OK")
|
| 355 |
+
|
| 356 |
+
# Core 1 chain
|
| 357 |
+
print("Programming Core 1 chain: N0 -> N1 -> N2")
|
| 358 |
+
chip.prog_conn(1, 0, [(1, 1200)])
|
| 359 |
+
chip.prog_conn(1, 1, [(2, 1200)])
|
| 360 |
+
print(" Core 1 chain OK")
|
| 361 |
+
|
| 362 |
+
# Stimulate and run
|
| 363 |
+
print("\nApplying stimulus: Core 0, N0, current=1200")
|
| 364 |
+
chip.stimulus(core=0, neuron=0, current=1200)
|
| 365 |
+
|
| 366 |
+
print("Running 20 timesteps...")
|
| 367 |
+
t_start = time.time()
|
| 368 |
+
spikes = chip.run(20)
|
| 369 |
+
elapsed = time.time() - t_start
|
| 370 |
+
print(f" Done! {spikes} spikes in {elapsed:.3f}s")
|
| 371 |
+
|
| 372 |
+
# Run more without stimulus
|
| 373 |
+
print("\nRunning 10 more timesteps (no stimulus)...")
|
| 374 |
+
spikes2 = chip.run(10)
|
| 375 |
+
print(f" {spikes2} spikes (should be 0 - no input)")
|
| 376 |
+
|
| 377 |
+
# Final status
|
| 378 |
+
state, ts = chip.status()
|
| 379 |
+
print(f"\nFinal status: state={state}, timesteps={ts}")
|
| 380 |
+
|
| 381 |
+
print("\n" + "=" * 60)
|
| 382 |
+
print(" Demo complete! The chip is alive.")
|
| 383 |
+
print("=" * 60)
|
| 384 |
+
|
| 385 |
+
|
| 386 |
+
def main():
|
| 387 |
+
parser = argparse.ArgumentParser(description="Neuromorphic Chip Host Controller")
|
| 388 |
+
parser.add_argument("--port", required=True, help="Serial port (e.g., COM3 or /dev/ttyUSB1)")
|
| 389 |
+
parser.add_argument("--baud", type=int, default=115200, help="Baud rate (default: 115200)")
|
| 390 |
+
parser.add_argument("--demo", action="store_true", help="Run demo program")
|
| 391 |
+
parser.add_argument("--status", action="store_true", help="Query chip status")
|
| 392 |
+
args = parser.parse_args()
|
| 393 |
+
|
| 394 |
+
chip = NeuromorphicChip(args.port, args.baud)
|
| 395 |
+
|
| 396 |
+
try:
|
| 397 |
+
if args.status:
|
| 398 |
+
state, ts = chip.status()
|
| 399 |
+
print(f"State: {state} ({'idle' if state == 0 else 'busy'})")
|
| 400 |
+
print(f"Timestep count: {ts}")
|
| 401 |
+
elif args.demo:
|
| 402 |
+
demo(chip)
|
| 403 |
+
else:
|
| 404 |
+
print("No command specified. Use --demo or --status")
|
| 405 |
+
print("Or import NeuromorphicChip in Python for programmatic access:")
|
| 406 |
+
print("")
|
| 407 |
+
print(" from host import NeuromorphicChip")
|
| 408 |
+
print(" chip = NeuromorphicChip('COM3')")
|
| 409 |
+
print(" chip.prog_conn(0, 0, [(1, 1200), (2, 800)]) # N0 -> N1(w=1200), N2(w=800)")
|
| 410 |
+
print(" chip.prog_index(0, 0, 0, 2) # Or use prog_conn() which handles this")
|
| 411 |
+
print(" chip.stimulus(core=0, neuron=0, current=1200)")
|
| 412 |
+
print(" spikes = chip.run(100)")
|
| 413 |
+
finally:
|
| 414 |
+
chip.close()
|
| 415 |
+
|
| 416 |
+
|
| 417 |
+
if __name__ == "__main__":
|
| 418 |
+
main()
|
fpga/kria/build_kria.tcl
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ============================================================================
|
| 2 |
+
# Vivado Build Script — Kria KV260 Target — Catalyst N1 (Loihi 1 Parity)
|
| 3 |
+
# ============================================================================
|
| 4 |
+
# Usage: vivado -mode batch -source fpga/kria/build_kria.tcl -tclargs synth_only
|
| 5 |
+
# ============================================================================
|
| 6 |
+
|
| 7 |
+
set script_dir [file dirname [file normalize [info script]]]
|
| 8 |
+
set project_dir "${script_dir}/build"
|
| 9 |
+
set part "xczu5ev-sfvc784-2-i"
|
| 10 |
+
set rtl_dir "[file normalize ${script_dir}/../../rtl]"
|
| 11 |
+
set kria_dir $script_dir
|
| 12 |
+
|
| 13 |
+
set mode "full"
|
| 14 |
+
if {[llength $argv] > 0} {
|
| 15 |
+
set mode [lindex $argv 0]
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
puts "============================================"
|
| 19 |
+
puts " Catalyst N1 — Kria KV260 Build"
|
| 20 |
+
puts " Mode: $mode"
|
| 21 |
+
puts " Part: $part"
|
| 22 |
+
puts "============================================"
|
| 23 |
+
|
| 24 |
+
file mkdir $project_dir
|
| 25 |
+
create_project catalyst_kria_n1 $project_dir -part $part -force
|
| 26 |
+
|
| 27 |
+
set rtl_files [list \
|
| 28 |
+
${rtl_dir}/sram.v \
|
| 29 |
+
${rtl_dir}/spike_fifo.v \
|
| 30 |
+
${rtl_dir}/async_fifo.v \
|
| 31 |
+
${rtl_dir}/uart_tx.v \
|
| 32 |
+
${rtl_dir}/uart_rx.v \
|
| 33 |
+
${rtl_dir}/scalable_core_v2.v \
|
| 34 |
+
${rtl_dir}/neuromorphic_mesh.v \
|
| 35 |
+
${rtl_dir}/async_noc_mesh.v \
|
| 36 |
+
${rtl_dir}/async_router.v \
|
| 37 |
+
${rtl_dir}/sync_tree.v \
|
| 38 |
+
${rtl_dir}/chip_link.v \
|
| 39 |
+
${rtl_dir}/host_interface.v \
|
| 40 |
+
${rtl_dir}/axi_uart_bridge.v \
|
| 41 |
+
${rtl_dir}/neuromorphic_top.v \
|
| 42 |
+
${kria_dir}/kria_neuromorphic.v \
|
| 43 |
+
]
|
| 44 |
+
add_files -norecurse $rtl_files
|
| 45 |
+
update_compile_order -fileset sources_1
|
| 46 |
+
|
| 47 |
+
if {$mode eq "synth_only"} {
|
| 48 |
+
puts "============================================"
|
| 49 |
+
puts " SYNTHESIS-ONLY MODE"
|
| 50 |
+
puts "============================================"
|
| 51 |
+
|
| 52 |
+
set_property top kria_neuromorphic [current_fileset]
|
| 53 |
+
update_compile_order -fileset sources_1
|
| 54 |
+
|
| 55 |
+
launch_runs synth_1 -jobs 4
|
| 56 |
+
wait_on_run synth_1
|
| 57 |
+
open_run synth_1
|
| 58 |
+
|
| 59 |
+
report_utilization -file ${project_dir}/synth_utilization.rpt
|
| 60 |
+
report_utilization -hierarchical -file ${project_dir}/synth_utilization_hier.rpt
|
| 61 |
+
report_timing_summary -file ${project_dir}/synth_timing.rpt
|
| 62 |
+
|
| 63 |
+
puts ""
|
| 64 |
+
puts "============================================"
|
| 65 |
+
puts " N1 SYNTHESIS COMPLETE"
|
| 66 |
+
puts "============================================"
|
| 67 |
+
report_utilization -return_string
|
| 68 |
+
|
| 69 |
+
close_project
|
| 70 |
+
exit
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
close_project
|
fpga/kria/kria_neuromorphic.v
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ============================================================================
|
| 2 |
+
// Kria KV260 Neuromorphic PL Wrapper — Catalyst N1 (Loihi 1 Parity)
|
| 3 |
+
// ============================================================================
|
| 4 |
+
//
|
| 5 |
+
// Catalyst N1 v2.3 — Zynq UltraScale+ ZU5EV target (2 cores x 256 neurons)
|
| 6 |
+
// 2-core variant for Kria K26 resource characterization.
|
| 7 |
+
//
|
| 8 |
+
// VERSION_ID: 0xA0_23_02_01
|
| 9 |
+
// A0 = Kria platform, 23 = N1 v2.3, 02 = 2-core, 01 = N1 generation
|
| 10 |
+
// ============================================================================
|
| 11 |
+
// ============================================================================
|
| 12 |
+
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 13 |
+
// Company No. 17054540 — UK Patent Application No. 2602902.6
|
| 14 |
+
//
|
| 15 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
| 16 |
+
// you may not use this file except in compliance with the License.
|
| 17 |
+
// You may obtain a copy of the License at
|
| 18 |
+
//
|
| 19 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
| 20 |
+
//
|
| 21 |
+
// Unless required by applicable law or agreed to in writing, software
|
| 22 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
| 23 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 24 |
+
// See the License for the specific language governing permissions and
|
| 25 |
+
// limitations under the License.
|
| 26 |
+
// ============================================================================
|
| 27 |
+
|
| 28 |
+
module kria_neuromorphic #(
|
| 29 |
+
parameter NUM_CORES = 2,
|
| 30 |
+
parameter CORE_ID_BITS = 1,
|
| 31 |
+
parameter NUM_NEURONS = 256,
|
| 32 |
+
parameter NEURON_BITS = 8,
|
| 33 |
+
parameter POOL_DEPTH = 4096,
|
| 34 |
+
parameter POOL_ADDR_BITS = 12,
|
| 35 |
+
parameter COUNT_BITS = 8,
|
| 36 |
+
parameter VERSION_ID = 32'hA0_23_02_01
|
| 37 |
+
)(
|
| 38 |
+
input wire s_axi_aclk,
|
| 39 |
+
input wire s_axi_aresetn,
|
| 40 |
+
input wire [31:0] s_axi_awaddr,
|
| 41 |
+
input wire s_axi_awvalid,
|
| 42 |
+
output wire s_axi_awready,
|
| 43 |
+
input wire [31:0] s_axi_wdata,
|
| 44 |
+
input wire [3:0] s_axi_wstrb,
|
| 45 |
+
input wire s_axi_wvalid,
|
| 46 |
+
output wire s_axi_wready,
|
| 47 |
+
output wire [1:0] s_axi_bresp,
|
| 48 |
+
output wire s_axi_bvalid,
|
| 49 |
+
input wire s_axi_bready,
|
| 50 |
+
input wire [31:0] s_axi_araddr,
|
| 51 |
+
input wire s_axi_arvalid,
|
| 52 |
+
output wire s_axi_arready,
|
| 53 |
+
output wire [31:0] s_axi_rdata,
|
| 54 |
+
output wire [1:0] s_axi_rresp,
|
| 55 |
+
output wire s_axi_rvalid,
|
| 56 |
+
input wire s_axi_rready
|
| 57 |
+
);
|
| 58 |
+
|
| 59 |
+
wire clk = s_axi_aclk;
|
| 60 |
+
wire rst_n = s_axi_aresetn;
|
| 61 |
+
|
| 62 |
+
wire [7:0] bridge_rx_data;
|
| 63 |
+
wire bridge_rx_valid;
|
| 64 |
+
wire [7:0] bridge_tx_data;
|
| 65 |
+
wire bridge_tx_valid;
|
| 66 |
+
wire bridge_tx_ready;
|
| 67 |
+
|
| 68 |
+
axi_uart_bridge #(
|
| 69 |
+
.VERSION_ID (VERSION_ID),
|
| 70 |
+
.NUM_CORES (NUM_CORES)
|
| 71 |
+
) u_bridge (
|
| 72 |
+
.clk (clk),
|
| 73 |
+
.rst_n (rst_n),
|
| 74 |
+
.clk_neuro (clk),
|
| 75 |
+
.rst_neuro_n (rst_n),
|
| 76 |
+
.s_axi_awaddr (s_axi_awaddr),
|
| 77 |
+
.s_axi_awvalid(s_axi_awvalid),
|
| 78 |
+
.s_axi_awready(s_axi_awready),
|
| 79 |
+
.s_axi_wdata (s_axi_wdata),
|
| 80 |
+
.s_axi_wstrb (s_axi_wstrb),
|
| 81 |
+
.s_axi_wvalid (s_axi_wvalid),
|
| 82 |
+
.s_axi_wready (s_axi_wready),
|
| 83 |
+
.s_axi_bresp (s_axi_bresp),
|
| 84 |
+
.s_axi_bvalid (s_axi_bvalid),
|
| 85 |
+
.s_axi_bready (s_axi_bready),
|
| 86 |
+
.s_axi_araddr (s_axi_araddr),
|
| 87 |
+
.s_axi_arvalid(s_axi_arvalid),
|
| 88 |
+
.s_axi_arready(s_axi_arready),
|
| 89 |
+
.s_axi_rdata (s_axi_rdata),
|
| 90 |
+
.s_axi_rresp (s_axi_rresp),
|
| 91 |
+
.s_axi_rvalid (s_axi_rvalid),
|
| 92 |
+
.s_axi_rready (s_axi_rready),
|
| 93 |
+
.hi_rx_data (bridge_rx_data),
|
| 94 |
+
.hi_rx_valid (bridge_rx_valid),
|
| 95 |
+
.hi_tx_data (bridge_tx_data),
|
| 96 |
+
.hi_tx_valid (bridge_tx_valid),
|
| 97 |
+
.hi_tx_ready (bridge_tx_ready)
|
| 98 |
+
);
|
| 99 |
+
|
| 100 |
+
neuromorphic_top #(
|
| 101 |
+
.CLK_FREQ (100_000_000),
|
| 102 |
+
.BAUD (115200),
|
| 103 |
+
.BYPASS_UART (1),
|
| 104 |
+
.NUM_CORES (NUM_CORES),
|
| 105 |
+
.CORE_ID_BITS (CORE_ID_BITS),
|
| 106 |
+
.NUM_NEURONS (NUM_NEURONS),
|
| 107 |
+
.NEURON_BITS (NEURON_BITS),
|
| 108 |
+
.DATA_WIDTH (16),
|
| 109 |
+
.POOL_DEPTH (POOL_DEPTH),
|
| 110 |
+
.POOL_ADDR_BITS (POOL_ADDR_BITS),
|
| 111 |
+
.COUNT_BITS (COUNT_BITS),
|
| 112 |
+
.REV_FANIN (16),
|
| 113 |
+
.REV_SLOT_BITS (4),
|
| 114 |
+
.THRESHOLD (16'sd1000),
|
| 115 |
+
.LEAK_RATE (16'sd3),
|
| 116 |
+
.REFRAC_CYCLES (3),
|
| 117 |
+
.ROUTE_FANOUT (8),
|
| 118 |
+
.ROUTE_SLOT_BITS (3),
|
| 119 |
+
.GLOBAL_ROUTE_SLOTS (4),
|
| 120 |
+
.GLOBAL_ROUTE_SLOT_BITS (2),
|
| 121 |
+
.CHIP_LINK_EN (0),
|
| 122 |
+
.NOC_MODE (0),
|
| 123 |
+
.MESH_X (2),
|
| 124 |
+
.MESH_Y (1)
|
| 125 |
+
) u_neuromorphic (
|
| 126 |
+
.clk (clk),
|
| 127 |
+
.rst_n (rst_n),
|
| 128 |
+
.uart_rxd (1'b1),
|
| 129 |
+
.uart_txd (),
|
| 130 |
+
.rx_data_ext (bridge_rx_data),
|
| 131 |
+
.rx_valid_ext (bridge_rx_valid),
|
| 132 |
+
.tx_data_ext (bridge_tx_data),
|
| 133 |
+
.tx_valid_ext (bridge_tx_valid),
|
| 134 |
+
.tx_ready_ext (bridge_tx_ready),
|
| 135 |
+
.link_tx_data (),
|
| 136 |
+
.link_tx_valid (),
|
| 137 |
+
.link_tx_ready (1'b0),
|
| 138 |
+
.link_rx_data (8'b0),
|
| 139 |
+
.link_rx_valid (1'b0),
|
| 140 |
+
.link_rx_ready ()
|
| 141 |
+
);
|
| 142 |
+
|
| 143 |
+
endmodule
|
fpga/kria/kria_neuromorphic_8core_backup.v
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ============================================================================
|
| 2 |
+
// Kria KV260 Neuromorphic PL Wrapper — Catalyst N1 (Loihi 1 Parity)
|
| 3 |
+
// ============================================================================
|
| 4 |
+
//
|
| 5 |
+
// Catalyst N1 v2.3 — Zynq UltraScale+ ZU5EV target (8 cores x 256 neurons)
|
| 6 |
+
// Same architecture as N2 wrapper but with N1 RTL (simpler, less resource usage).
|
| 7 |
+
//
|
| 8 |
+
// VERSION_ID: 0xA0_23_08_01
|
| 9 |
+
// A0 = Kria platform, 23 = N1 v2.3, 08 = 8-core, 01 = N1 generation
|
| 10 |
+
// ============================================================================
|
| 11 |
+
// ============================================================================
|
| 12 |
+
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 13 |
+
// Company No. 17054540 — UK Patent Application No. 2602902.6
|
| 14 |
+
//
|
| 15 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
| 16 |
+
// you may not use this file except in compliance with the License.
|
| 17 |
+
// You may obtain a copy of the License at
|
| 18 |
+
//
|
| 19 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
| 20 |
+
//
|
| 21 |
+
// Unless required by applicable law or agreed to in writing, software
|
| 22 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
| 23 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 24 |
+
// See the License for the specific language governing permissions and
|
| 25 |
+
// limitations under the License.
|
| 26 |
+
// ============================================================================
|
| 27 |
+
|
| 28 |
+
module kria_neuromorphic #(
|
| 29 |
+
parameter NUM_CORES = 8,
|
| 30 |
+
parameter CORE_ID_BITS = 3,
|
| 31 |
+
parameter NUM_NEURONS = 256,
|
| 32 |
+
parameter NEURON_BITS = 8,
|
| 33 |
+
parameter POOL_DEPTH = 4096,
|
| 34 |
+
parameter POOL_ADDR_BITS = 12,
|
| 35 |
+
parameter COUNT_BITS = 8,
|
| 36 |
+
parameter VERSION_ID = 32'hA0_23_08_01
|
| 37 |
+
)(
|
| 38 |
+
input wire s_axi_aclk,
|
| 39 |
+
input wire s_axi_aresetn,
|
| 40 |
+
input wire [31:0] s_axi_awaddr,
|
| 41 |
+
input wire s_axi_awvalid,
|
| 42 |
+
output wire s_axi_awready,
|
| 43 |
+
input wire [31:0] s_axi_wdata,
|
| 44 |
+
input wire [3:0] s_axi_wstrb,
|
| 45 |
+
input wire s_axi_wvalid,
|
| 46 |
+
output wire s_axi_wready,
|
| 47 |
+
output wire [1:0] s_axi_bresp,
|
| 48 |
+
output wire s_axi_bvalid,
|
| 49 |
+
input wire s_axi_bready,
|
| 50 |
+
input wire [31:0] s_axi_araddr,
|
| 51 |
+
input wire s_axi_arvalid,
|
| 52 |
+
output wire s_axi_arready,
|
| 53 |
+
output wire [31:0] s_axi_rdata,
|
| 54 |
+
output wire [1:0] s_axi_rresp,
|
| 55 |
+
output wire s_axi_rvalid,
|
| 56 |
+
input wire s_axi_rready
|
| 57 |
+
);
|
| 58 |
+
|
| 59 |
+
wire clk = s_axi_aclk;
|
| 60 |
+
wire rst_n = s_axi_aresetn;
|
| 61 |
+
|
| 62 |
+
wire [7:0] bridge_rx_data;
|
| 63 |
+
wire bridge_rx_valid;
|
| 64 |
+
wire [7:0] bridge_tx_data;
|
| 65 |
+
wire bridge_tx_valid;
|
| 66 |
+
wire bridge_tx_ready;
|
| 67 |
+
|
| 68 |
+
axi_uart_bridge #(
|
| 69 |
+
.VERSION_ID (VERSION_ID),
|
| 70 |
+
.NUM_CORES (NUM_CORES)
|
| 71 |
+
) u_bridge (
|
| 72 |
+
.clk (clk),
|
| 73 |
+
.rst_n (rst_n),
|
| 74 |
+
.clk_neuro (clk),
|
| 75 |
+
.rst_neuro_n (rst_n),
|
| 76 |
+
.s_axi_awaddr (s_axi_awaddr),
|
| 77 |
+
.s_axi_awvalid(s_axi_awvalid),
|
| 78 |
+
.s_axi_awready(s_axi_awready),
|
| 79 |
+
.s_axi_wdata (s_axi_wdata),
|
| 80 |
+
.s_axi_wstrb (s_axi_wstrb),
|
| 81 |
+
.s_axi_wvalid (s_axi_wvalid),
|
| 82 |
+
.s_axi_wready (s_axi_wready),
|
| 83 |
+
.s_axi_bresp (s_axi_bresp),
|
| 84 |
+
.s_axi_bvalid (s_axi_bvalid),
|
| 85 |
+
.s_axi_bready (s_axi_bready),
|
| 86 |
+
.s_axi_araddr (s_axi_araddr),
|
| 87 |
+
.s_axi_arvalid(s_axi_arvalid),
|
| 88 |
+
.s_axi_arready(s_axi_arready),
|
| 89 |
+
.s_axi_rdata (s_axi_rdata),
|
| 90 |
+
.s_axi_rresp (s_axi_rresp),
|
| 91 |
+
.s_axi_rvalid (s_axi_rvalid),
|
| 92 |
+
.s_axi_rready (s_axi_rready),
|
| 93 |
+
.hi_rx_data (bridge_rx_data),
|
| 94 |
+
.hi_rx_valid (bridge_rx_valid),
|
| 95 |
+
.hi_tx_data (bridge_tx_data),
|
| 96 |
+
.hi_tx_valid (bridge_tx_valid),
|
| 97 |
+
.hi_tx_ready (bridge_tx_ready)
|
| 98 |
+
);
|
| 99 |
+
|
| 100 |
+
neuromorphic_top #(
|
| 101 |
+
.CLK_FREQ (100_000_000),
|
| 102 |
+
.BAUD (115200),
|
| 103 |
+
.BYPASS_UART (1),
|
| 104 |
+
.NUM_CORES (NUM_CORES),
|
| 105 |
+
.CORE_ID_BITS (CORE_ID_BITS),
|
| 106 |
+
.NUM_NEURONS (NUM_NEURONS),
|
| 107 |
+
.NEURON_BITS (NEURON_BITS),
|
| 108 |
+
.DATA_WIDTH (16),
|
| 109 |
+
.POOL_DEPTH (POOL_DEPTH),
|
| 110 |
+
.POOL_ADDR_BITS (POOL_ADDR_BITS),
|
| 111 |
+
.COUNT_BITS (COUNT_BITS),
|
| 112 |
+
.REV_FANIN (16),
|
| 113 |
+
.REV_SLOT_BITS (4),
|
| 114 |
+
.THRESHOLD (16'sd1000),
|
| 115 |
+
.LEAK_RATE (16'sd3),
|
| 116 |
+
.REFRAC_CYCLES (3),
|
| 117 |
+
.ROUTE_FANOUT (8),
|
| 118 |
+
.ROUTE_SLOT_BITS (3),
|
| 119 |
+
.GLOBAL_ROUTE_SLOTS (4),
|
| 120 |
+
.GLOBAL_ROUTE_SLOT_BITS (2),
|
| 121 |
+
.CHIP_LINK_EN (0),
|
| 122 |
+
.NOC_MODE (0),
|
| 123 |
+
.MESH_X (2),
|
| 124 |
+
.MESH_Y (4)
|
| 125 |
+
) u_neuromorphic (
|
| 126 |
+
.clk (clk),
|
| 127 |
+
.rst_n (rst_n),
|
| 128 |
+
.uart_rxd (1'b1),
|
| 129 |
+
.uart_txd (),
|
| 130 |
+
.rx_data_ext (bridge_rx_data),
|
| 131 |
+
.rx_valid_ext (bridge_rx_valid),
|
| 132 |
+
.tx_data_ext (bridge_tx_data),
|
| 133 |
+
.tx_valid_ext (bridge_tx_valid),
|
| 134 |
+
.tx_ready_ext (bridge_tx_ready),
|
| 135 |
+
.link_tx_data (),
|
| 136 |
+
.link_tx_valid (),
|
| 137 |
+
.link_tx_ready (1'b0),
|
| 138 |
+
.link_rx_data (8'b0),
|
| 139 |
+
.link_rx_valid (1'b0),
|
| 140 |
+
.link_rx_ready ()
|
| 141 |
+
);
|
| 142 |
+
|
| 143 |
+
endmodule
|
fpga/kria/run_impl.tcl
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ============================================================================
|
| 2 |
+
# Vivado Implementation Script — Kria K26 — Catalyst N1 (Loihi 1 Parity)
|
| 3 |
+
# ============================================================================
|
| 4 |
+
# Opens existing synthesis checkpoint and runs Place & Route + reports
|
| 5 |
+
# Usage: vivado -mode batch -source fpga/kria/run_impl.tcl
|
| 6 |
+
# ============================================================================
|
| 7 |
+
|
| 8 |
+
set script_dir [file dirname [file normalize [info script]]]
|
| 9 |
+
set project_dir "${script_dir}/build"
|
| 10 |
+
set synth_dcp "${project_dir}/catalyst_kria_n1.runs/synth_1/kria_neuromorphic.dcp"
|
| 11 |
+
set out_dir "${project_dir}/impl_results"
|
| 12 |
+
|
| 13 |
+
file mkdir $out_dir
|
| 14 |
+
|
| 15 |
+
puts "============================================"
|
| 16 |
+
puts " Catalyst N1 — Kria K26 Implementation"
|
| 17 |
+
puts " Loading: $synth_dcp"
|
| 18 |
+
puts "============================================"
|
| 19 |
+
|
| 20 |
+
# Open synthesis checkpoint
|
| 21 |
+
open_checkpoint $synth_dcp
|
| 22 |
+
|
| 23 |
+
# Add clock constraint — Kria K26 PS provides 100 MHz PL clock
|
| 24 |
+
create_clock -period 10.000 -name sys_clk [get_ports s_axi_aclk]
|
| 25 |
+
|
| 26 |
+
# Set IO delay constraints (generic, for timing closure)
|
| 27 |
+
set_input_delay -clock sys_clk -max 2.0 [get_ports -filter {DIRECTION == IN && NAME != "s_axi_aclk"}]
|
| 28 |
+
set_output_delay -clock sys_clk -max 2.0 [get_ports -filter {DIRECTION == OUT}]
|
| 29 |
+
|
| 30 |
+
# Run implementation
|
| 31 |
+
puts "Running opt_design..."
|
| 32 |
+
opt_design
|
| 33 |
+
|
| 34 |
+
puts "Running place_design..."
|
| 35 |
+
place_design
|
| 36 |
+
|
| 37 |
+
puts "Running phys_opt_design..."
|
| 38 |
+
phys_opt_design
|
| 39 |
+
|
| 40 |
+
puts "Running route_design..."
|
| 41 |
+
route_design
|
| 42 |
+
|
| 43 |
+
# Save implemented checkpoint
|
| 44 |
+
write_checkpoint -force ${out_dir}/kria_n1_impl.dcp
|
| 45 |
+
|
| 46 |
+
# Generate reports
|
| 47 |
+
puts "Generating reports..."
|
| 48 |
+
report_timing_summary -file ${out_dir}/timing_summary.rpt
|
| 49 |
+
report_timing -max_paths 20 -file ${out_dir}/timing_paths.rpt
|
| 50 |
+
report_utilization -file ${out_dir}/utilization.rpt
|
| 51 |
+
report_utilization -hierarchical -file ${out_dir}/utilization_hier.rpt
|
| 52 |
+
report_power -file ${out_dir}/power.rpt
|
| 53 |
+
report_clock_utilization -file ${out_dir}/clock_utilization.rpt
|
| 54 |
+
report_design_analysis -file ${out_dir}/design_analysis.rpt
|
| 55 |
+
|
| 56 |
+
puts ""
|
| 57 |
+
puts "============================================"
|
| 58 |
+
puts " N1 IMPLEMENTATION COMPLETE"
|
| 59 |
+
puts "============================================"
|
| 60 |
+
puts "Reports in: $out_dir"
|
| 61 |
+
|
| 62 |
+
# Print summary to console
|
| 63 |
+
report_timing_summary -return_string
|
| 64 |
+
report_utilization -return_string
|
| 65 |
+
report_power -return_string
|
| 66 |
+
|
| 67 |
+
close_design
|
| 68 |
+
exit
|
rtl/async_fifo.v
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ============================================================================
|
| 2 |
+
// Async FIFO
|
| 3 |
+
// ============================================================================
|
| 4 |
+
//
|
| 5 |
+
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 6 |
+
// Company No. 17054540 — UK Patent Application No. 2602902.6
|
| 7 |
+
//
|
| 8 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
| 9 |
+
// you may not use this file except in compliance with the License.
|
| 10 |
+
// You may obtain a copy of the License at
|
| 11 |
+
//
|
| 12 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
| 13 |
+
//
|
| 14 |
+
// Unless required by applicable law or agreed to in writing, software
|
| 15 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
| 16 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 17 |
+
// See the License for the specific language governing permissions and
|
| 18 |
+
// limitations under the License.
|
| 19 |
+
// ============================================================================
|
| 20 |
+
module async_fifo #(
|
| 21 |
+
parameter DATA_WIDTH = 8,
|
| 22 |
+
parameter ADDR_BITS = 4
|
| 23 |
+
)(
|
| 24 |
+
input wire wr_clk,
|
| 25 |
+
input wire wr_rst_n,
|
| 26 |
+
input wire [DATA_WIDTH-1:0] wr_data,
|
| 27 |
+
input wire wr_en,
|
| 28 |
+
output wire wr_full,
|
| 29 |
+
|
| 30 |
+
input wire rd_clk,
|
| 31 |
+
input wire rd_rst_n,
|
| 32 |
+
input wire rd_en,
|
| 33 |
+
output wire [DATA_WIDTH-1:0] rd_data,
|
| 34 |
+
output wire rd_empty
|
| 35 |
+
);
|
| 36 |
+
|
| 37 |
+
localparam DEPTH = 1 << ADDR_BITS;
|
| 38 |
+
|
| 39 |
+
reg [DATA_WIDTH-1:0] mem [0:DEPTH-1];
|
| 40 |
+
|
| 41 |
+
reg [ADDR_BITS:0] wr_bin, wr_gray;
|
| 42 |
+
wire [ADDR_BITS:0] wr_bin_next = wr_bin + 1;
|
| 43 |
+
wire [ADDR_BITS:0] wr_gray_next = wr_bin_next ^ (wr_bin_next >> 1);
|
| 44 |
+
|
| 45 |
+
reg [ADDR_BITS:0] rd_bin, rd_gray;
|
| 46 |
+
wire [ADDR_BITS:0] rd_bin_next = rd_bin + 1;
|
| 47 |
+
wire [ADDR_BITS:0] rd_gray_next = rd_bin_next ^ (rd_bin_next >> 1);
|
| 48 |
+
|
| 49 |
+
reg [ADDR_BITS:0] wr_gray_rd_s1, wr_gray_rd_s2;
|
| 50 |
+
reg [ADDR_BITS:0] rd_gray_wr_s1, rd_gray_wr_s2;
|
| 51 |
+
|
| 52 |
+
always @(posedge wr_clk or negedge wr_rst_n)
|
| 53 |
+
if (!wr_rst_n) begin
|
| 54 |
+
wr_bin <= 0;
|
| 55 |
+
wr_gray <= 0;
|
| 56 |
+
end else if (wr_en && !wr_full) begin
|
| 57 |
+
mem[wr_bin[ADDR_BITS-1:0]] <= wr_data;
|
| 58 |
+
wr_bin <= wr_bin_next;
|
| 59 |
+
wr_gray <= wr_gray_next;
|
| 60 |
+
end
|
| 61 |
+
|
| 62 |
+
always @(posedge rd_clk or negedge rd_rst_n)
|
| 63 |
+
if (!rd_rst_n) begin
|
| 64 |
+
rd_bin <= 0;
|
| 65 |
+
rd_gray <= 0;
|
| 66 |
+
end else if (rd_en && !rd_empty) begin
|
| 67 |
+
rd_bin <= rd_bin_next;
|
| 68 |
+
rd_gray <= rd_gray_next;
|
| 69 |
+
end
|
| 70 |
+
|
| 71 |
+
always @(posedge rd_clk or negedge rd_rst_n)
|
| 72 |
+
if (!rd_rst_n) begin
|
| 73 |
+
wr_gray_rd_s1 <= 0;
|
| 74 |
+
wr_gray_rd_s2 <= 0;
|
| 75 |
+
end else begin
|
| 76 |
+
wr_gray_rd_s1 <= wr_gray;
|
| 77 |
+
wr_gray_rd_s2 <= wr_gray_rd_s1;
|
| 78 |
+
end
|
| 79 |
+
|
| 80 |
+
always @(posedge wr_clk or negedge wr_rst_n)
|
| 81 |
+
if (!wr_rst_n) begin
|
| 82 |
+
rd_gray_wr_s1 <= 0;
|
| 83 |
+
rd_gray_wr_s2 <= 0;
|
| 84 |
+
end else begin
|
| 85 |
+
rd_gray_wr_s1 <= rd_gray;
|
| 86 |
+
rd_gray_wr_s2 <= rd_gray_wr_s1;
|
| 87 |
+
end
|
| 88 |
+
|
| 89 |
+
assign wr_full = (wr_gray == {~rd_gray_wr_s2[ADDR_BITS:ADDR_BITS-1],
|
| 90 |
+
rd_gray_wr_s2[ADDR_BITS-2:0]});
|
| 91 |
+
|
| 92 |
+
assign rd_empty = (rd_gray == wr_gray_rd_s2);
|
| 93 |
+
|
| 94 |
+
assign rd_data = mem[rd_bin[ADDR_BITS-1:0]];
|
| 95 |
+
|
| 96 |
+
endmodule
|
rtl/async_noc_mesh.v
ADDED
|
@@ -0,0 +1,701 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ============================================================================
|
| 2 |
+
// Async NoC Mesh
|
| 3 |
+
// ============================================================================
|
| 4 |
+
//
|
| 5 |
+
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 6 |
+
// Company No. 17054540 — UK Patent Application No. 2602902.6
|
| 7 |
+
//
|
| 8 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
| 9 |
+
// you may not use this file except in compliance with the License.
|
| 10 |
+
// You may obtain a copy of the License at
|
| 11 |
+
//
|
| 12 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
| 13 |
+
//
|
| 14 |
+
// Unless required by applicable law or agreed to in writing, software
|
| 15 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
| 16 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 17 |
+
// See the License for the specific language governing permissions and
|
| 18 |
+
// limitations under the License.
|
| 19 |
+
// ============================================================================
|
| 20 |
+
|
| 21 |
+
`timescale 1ns/1ps
|
| 22 |
+
|
| 23 |
+
module async_noc_mesh #(
|
| 24 |
+
parameter NUM_CORES = 4,
|
| 25 |
+
parameter CORE_ID_BITS = 2,
|
| 26 |
+
parameter NUM_NEURONS = 1024,
|
| 27 |
+
parameter NEURON_BITS = 10,
|
| 28 |
+
parameter DATA_WIDTH = 16,
|
| 29 |
+
parameter POOL_DEPTH = 32768,
|
| 30 |
+
parameter POOL_ADDR_BITS = 15,
|
| 31 |
+
parameter COUNT_BITS = 12,
|
| 32 |
+
parameter REV_FANIN = 32,
|
| 33 |
+
parameter REV_SLOT_BITS = 5,
|
| 34 |
+
parameter THRESHOLD = 16'sd1000,
|
| 35 |
+
parameter LEAK_RATE = 16'sd3,
|
| 36 |
+
parameter REFRAC_CYCLES = 3,
|
| 37 |
+
parameter GRADE_SHIFT = 7,
|
| 38 |
+
parameter ROUTE_FANOUT = 8,
|
| 39 |
+
parameter ROUTE_SLOT_BITS = 3,
|
| 40 |
+
parameter ROUTE_ADDR_W = CORE_ID_BITS + NEURON_BITS + ROUTE_SLOT_BITS,
|
| 41 |
+
parameter ROUTE_DATA_W = 1 + CORE_ID_BITS + NEURON_BITS + DATA_WIDTH,
|
| 42 |
+
parameter CLUSTER_SIZE = 4,
|
| 43 |
+
parameter GLOBAL_ROUTE_SLOTS = 4,
|
| 44 |
+
parameter GLOBAL_ROUTE_SLOT_BITS = 2,
|
| 45 |
+
parameter GLOBAL_ROUTE_ADDR_W = CORE_ID_BITS + NEURON_BITS + GLOBAL_ROUTE_SLOT_BITS,
|
| 46 |
+
parameter CHIP_LINK_EN = 0,
|
| 47 |
+
parameter DUAL_NOC = 0,
|
| 48 |
+
parameter MESH_X = 2,
|
| 49 |
+
parameter MESH_Y = 2
|
| 50 |
+
)(
|
| 51 |
+
input wire clk,
|
| 52 |
+
input wire rst_n,
|
| 53 |
+
input wire start,
|
| 54 |
+
input wire prog_pool_we,
|
| 55 |
+
input wire [CORE_ID_BITS-1:0] prog_pool_core,
|
| 56 |
+
input wire [POOL_ADDR_BITS-1:0] prog_pool_addr,
|
| 57 |
+
input wire [NEURON_BITS-1:0] prog_pool_src,
|
| 58 |
+
input wire [NEURON_BITS-1:0] prog_pool_target,
|
| 59 |
+
input wire signed [DATA_WIDTH-1:0] prog_pool_weight,
|
| 60 |
+
input wire [1:0] prog_pool_comp,
|
| 61 |
+
input wire prog_index_we,
|
| 62 |
+
input wire [CORE_ID_BITS-1:0] prog_index_core,
|
| 63 |
+
input wire [NEURON_BITS-1:0] prog_index_neuron,
|
| 64 |
+
input wire [POOL_ADDR_BITS-1:0] prog_index_base,
|
| 65 |
+
input wire [COUNT_BITS-1:0] prog_index_count,
|
| 66 |
+
input wire [1:0] prog_index_format,
|
| 67 |
+
input wire prog_route_we,
|
| 68 |
+
input wire [CORE_ID_BITS-1:0] prog_route_src_core,
|
| 69 |
+
input wire [NEURON_BITS-1:0] prog_route_src_neuron,
|
| 70 |
+
input wire [ROUTE_SLOT_BITS-1:0] prog_route_slot,
|
| 71 |
+
input wire [CORE_ID_BITS-1:0] prog_route_dest_core,
|
| 72 |
+
input wire [NEURON_BITS-1:0] prog_route_dest_neuron,
|
| 73 |
+
input wire signed [DATA_WIDTH-1:0] prog_route_weight,
|
| 74 |
+
input wire prog_global_route_we,
|
| 75 |
+
input wire [CORE_ID_BITS-1:0] prog_global_route_src_core,
|
| 76 |
+
input wire [NEURON_BITS-1:0] prog_global_route_src_neuron,
|
| 77 |
+
input wire [GLOBAL_ROUTE_SLOT_BITS-1:0] prog_global_route_slot,
|
| 78 |
+
input wire [CORE_ID_BITS-1:0] prog_global_route_dest_core,
|
| 79 |
+
input wire [NEURON_BITS-1:0] prog_global_route_dest_neuron,
|
| 80 |
+
input wire signed [DATA_WIDTH-1:0] prog_global_route_weight,
|
| 81 |
+
input wire learn_enable,
|
| 82 |
+
input wire graded_enable,
|
| 83 |
+
input wire dendritic_enable,
|
| 84 |
+
input wire async_enable,
|
| 85 |
+
input wire threefactor_enable,
|
| 86 |
+
input wire noise_enable,
|
| 87 |
+
input wire skip_idle_enable,
|
| 88 |
+
input wire scale_u_enable,
|
| 89 |
+
input wire signed [DATA_WIDTH-1:0] reward_value,
|
| 90 |
+
input wire prog_delay_we,
|
| 91 |
+
input wire [CORE_ID_BITS-1:0] prog_delay_core,
|
| 92 |
+
input wire [POOL_ADDR_BITS-1:0] prog_delay_addr,
|
| 93 |
+
input wire [5:0] prog_delay_value,
|
| 94 |
+
input wire prog_ucode_we,
|
| 95 |
+
input wire [CORE_ID_BITS-1:0] prog_ucode_core,
|
| 96 |
+
input wire [7:0] prog_ucode_addr,
|
| 97 |
+
input wire [31:0] prog_ucode_data,
|
| 98 |
+
input wire prog_param_we,
|
| 99 |
+
input wire [CORE_ID_BITS-1:0] prog_param_core,
|
| 100 |
+
input wire [NEURON_BITS-1:0] prog_param_neuron,
|
| 101 |
+
input wire [4:0] prog_param_id,
|
| 102 |
+
input wire signed [DATA_WIDTH-1:0] prog_param_value,
|
| 103 |
+
input wire ext_valid,
|
| 104 |
+
input wire [CORE_ID_BITS-1:0] ext_core,
|
| 105 |
+
input wire [NEURON_BITS-1:0] ext_neuron_id,
|
| 106 |
+
input wire signed [DATA_WIDTH-1:0] ext_current,
|
| 107 |
+
input wire probe_read,
|
| 108 |
+
input wire [CORE_ID_BITS-1:0] probe_core,
|
| 109 |
+
input wire [NEURON_BITS-1:0] probe_neuron,
|
| 110 |
+
input wire [4:0] probe_state_id,
|
| 111 |
+
input wire [POOL_ADDR_BITS-1:0] probe_pool_addr,
|
| 112 |
+
output wire signed [DATA_WIDTH-1:0] probe_data,
|
| 113 |
+
output wire probe_valid,
|
| 114 |
+
output reg timestep_done,
|
| 115 |
+
output wire [NUM_CORES-1:0] spike_valid_bus,
|
| 116 |
+
output wire [NUM_CORES*NEURON_BITS-1:0] spike_id_bus,
|
| 117 |
+
output wire [5:0] mesh_state_out,
|
| 118 |
+
output reg [31:0] total_spikes,
|
| 119 |
+
output reg [31:0] timestep_count,
|
| 120 |
+
output wire [NUM_CORES-1:0] core_idle_bus,
|
| 121 |
+
output wire link_tx_push,
|
| 122 |
+
output wire [CORE_ID_BITS-1:0] link_tx_core,
|
| 123 |
+
output wire [NEURON_BITS-1:0] link_tx_neuron,
|
| 124 |
+
output wire [7:0] link_tx_payload,
|
| 125 |
+
input wire link_tx_full,
|
| 126 |
+
input wire [CORE_ID_BITS-1:0] link_rx_core,
|
| 127 |
+
input wire [NEURON_BITS-1:0] link_rx_neuron,
|
| 128 |
+
input wire signed [DATA_WIDTH-1:0] link_rx_current,
|
| 129 |
+
output wire link_rx_pop,
|
| 130 |
+
input wire link_rx_empty
|
| 131 |
+
);
|
| 132 |
+
|
| 133 |
+
assign link_tx_push = 0;
|
| 134 |
+
assign link_tx_core = 0;
|
| 135 |
+
assign link_tx_neuron = 0;
|
| 136 |
+
assign link_tx_payload = 0;
|
| 137 |
+
assign link_rx_pop = 0;
|
| 138 |
+
|
| 139 |
+
localparam COORD_BITS = 4;
|
| 140 |
+
localparam PACKET_W = 2*COORD_BITS + NEURON_BITS + DATA_WIDTH;
|
| 141 |
+
|
| 142 |
+
function [COORD_BITS-1:0] core_to_x;
|
| 143 |
+
input [CORE_ID_BITS-1:0] cid;
|
| 144 |
+
core_to_x = cid % MESH_X;
|
| 145 |
+
endfunction
|
| 146 |
+
|
| 147 |
+
function [COORD_BITS-1:0] core_to_y;
|
| 148 |
+
input [CORE_ID_BITS-1:0] cid;
|
| 149 |
+
core_to_y = cid / MESH_X;
|
| 150 |
+
endfunction
|
| 151 |
+
|
| 152 |
+
localparam SM_IDLE = 4'd0;
|
| 153 |
+
localparam SM_PKT_DRAIN = 4'd1;
|
| 154 |
+
localparam SM_START = 4'd2;
|
| 155 |
+
localparam SM_RUN_WAIT = 4'd3;
|
| 156 |
+
localparam SM_ROUTE_POP = 4'd4;
|
| 157 |
+
localparam SM_ROUTE_ADDR = 4'd5;
|
| 158 |
+
localparam SM_ROUTE_WAIT = 4'd6;
|
| 159 |
+
localparam SM_ROUTE_READ = 4'd7;
|
| 160 |
+
localparam SM_GRT_ADDR = 4'd8;
|
| 161 |
+
localparam SM_GRT_WAIT = 4'd9;
|
| 162 |
+
localparam SM_GRT_READ = 4'd10;
|
| 163 |
+
localparam SM_DONE = 4'd11;
|
| 164 |
+
|
| 165 |
+
reg [3:0] mesh_state;
|
| 166 |
+
assign mesh_state_out = {2'b0, mesh_state};
|
| 167 |
+
|
| 168 |
+
reg rt_we;
|
| 169 |
+
reg [ROUTE_ADDR_W-1:0] rt_addr;
|
| 170 |
+
wire [ROUTE_DATA_W-1:0] rt_rdata;
|
| 171 |
+
|
| 172 |
+
wire rt_we_mux = (mesh_state == SM_IDLE) ? prog_route_we : rt_we;
|
| 173 |
+
wire [ROUTE_ADDR_W-1:0] rt_addr_mux = (mesh_state == SM_IDLE) ?
|
| 174 |
+
{prog_route_src_core, prog_route_src_neuron, prog_route_slot} : rt_addr;
|
| 175 |
+
wire [ROUTE_DATA_W-1:0] rt_wdata_mux = (mesh_state == SM_IDLE) ?
|
| 176 |
+
{1'b1, prog_route_dest_core, prog_route_dest_neuron, prog_route_weight} : {ROUTE_DATA_W{1'b0}};
|
| 177 |
+
|
| 178 |
+
sram #(.DATA_WIDTH(ROUTE_DATA_W), .ADDR_WIDTH(ROUTE_ADDR_W)) route_table (
|
| 179 |
+
.clk(clk), .we_a(rt_we_mux), .addr_a(rt_addr_mux),
|
| 180 |
+
.wdata_a(rt_wdata_mux), .rdata_a(rt_rdata),
|
| 181 |
+
.addr_b({ROUTE_ADDR_W{1'b0}}), .rdata_b()
|
| 182 |
+
);
|
| 183 |
+
|
| 184 |
+
wire rt_valid = rt_rdata[ROUTE_DATA_W-1];
|
| 185 |
+
wire [CORE_ID_BITS-1:0] rt_dest_core = rt_rdata[NEURON_BITS+DATA_WIDTH +: CORE_ID_BITS];
|
| 186 |
+
wire [NEURON_BITS-1:0] rt_dest_nrn = rt_rdata[DATA_WIDTH +: NEURON_BITS];
|
| 187 |
+
wire signed [DATA_WIDTH-1:0] rt_weight = rt_rdata[DATA_WIDTH-1:0];
|
| 188 |
+
|
| 189 |
+
reg grt_we;
|
| 190 |
+
reg [GLOBAL_ROUTE_ADDR_W-1:0] grt_addr;
|
| 191 |
+
wire [ROUTE_DATA_W-1:0] grt_rdata;
|
| 192 |
+
|
| 193 |
+
wire grt_we_mux = (mesh_state == SM_IDLE) ? prog_global_route_we : grt_we;
|
| 194 |
+
wire [GLOBAL_ROUTE_ADDR_W-1:0] grt_addr_mux = (mesh_state == SM_IDLE) ?
|
| 195 |
+
{prog_global_route_src_core, prog_global_route_src_neuron, prog_global_route_slot} : grt_addr;
|
| 196 |
+
wire [ROUTE_DATA_W-1:0] grt_wdata_mux = (mesh_state == SM_IDLE) ?
|
| 197 |
+
{1'b1, prog_global_route_dest_core, prog_global_route_dest_neuron, prog_global_route_weight} : {ROUTE_DATA_W{1'b0}};
|
| 198 |
+
|
| 199 |
+
sram #(.DATA_WIDTH(ROUTE_DATA_W), .ADDR_WIDTH(GLOBAL_ROUTE_ADDR_W)) global_route_table (
|
| 200 |
+
.clk(clk), .we_a(grt_we_mux), .addr_a(grt_addr_mux),
|
| 201 |
+
.wdata_a(grt_wdata_mux), .rdata_a(grt_rdata),
|
| 202 |
+
.addr_b({GLOBAL_ROUTE_ADDR_W{1'b0}}), .rdata_b()
|
| 203 |
+
);
|
| 204 |
+
|
| 205 |
+
wire grt_valid = grt_rdata[ROUTE_DATA_W-1];
|
| 206 |
+
wire [CORE_ID_BITS-1:0] grt_dest_core = grt_rdata[NEURON_BITS+DATA_WIDTH +: CORE_ID_BITS];
|
| 207 |
+
wire [NEURON_BITS-1:0] grt_dest_nrn = grt_rdata[DATA_WIDTH +: NEURON_BITS];
|
| 208 |
+
wire signed [DATA_WIDTH-1:0] grt_weight = grt_rdata[DATA_WIDTH-1:0];
|
| 209 |
+
|
| 210 |
+
wire [NUM_CORES-1:0] core_done;
|
| 211 |
+
wire [NUM_CORES-1:0] core_spike_valid;
|
| 212 |
+
wire [NUM_CORES*NEURON_BITS-1:0] core_spike_id;
|
| 213 |
+
wire [NUM_CORES*8-1:0] core_spike_payload;
|
| 214 |
+
reg [NUM_CORES-1:0] core_start_r;
|
| 215 |
+
|
| 216 |
+
reg [NUM_CORES-1:0] core_done_latch;
|
| 217 |
+
always @(posedge clk or negedge rst_n) begin
|
| 218 |
+
if (!rst_n)
|
| 219 |
+
core_done_latch <= 0;
|
| 220 |
+
else if (mesh_state == SM_START)
|
| 221 |
+
core_done_latch <= 0;
|
| 222 |
+
else
|
| 223 |
+
core_done_latch <= core_done_latch | core_done;
|
| 224 |
+
end
|
| 225 |
+
|
| 226 |
+
assign spike_valid_bus = core_spike_valid;
|
| 227 |
+
assign spike_id_bus = core_spike_id;
|
| 228 |
+
|
| 229 |
+
wire sync_all_done;
|
| 230 |
+
sync_tree #(.NUM_LEAVES(NUM_CORES)) u_sync (
|
| 231 |
+
.clk(clk), .rst_n(rst_n),
|
| 232 |
+
.leaf_done(core_done_latch),
|
| 233 |
+
.all_done(sync_all_done),
|
| 234 |
+
.root_start(1'b0), .leaf_start()
|
| 235 |
+
);
|
| 236 |
+
|
| 237 |
+
localparam CAP_WIDTH = NEURON_BITS + 8;
|
| 238 |
+
reg [NUM_CORES-1:0] cap_pop;
|
| 239 |
+
reg [NUM_CORES-1:0] cap_clear;
|
| 240 |
+
wire [NUM_CORES-1:0] cap_empty;
|
| 241 |
+
wire [NUM_CORES*CAP_WIDTH-1:0] cap_data;
|
| 242 |
+
|
| 243 |
+
wire [NUM_CORES-1:0] core_probe_valid;
|
| 244 |
+
wire [NUM_CORES*DATA_WIDTH-1:0] core_probe_data;
|
| 245 |
+
assign probe_data = core_probe_data[probe_core*DATA_WIDTH +: DATA_WIDTH];
|
| 246 |
+
assign probe_valid = core_probe_valid[probe_core];
|
| 247 |
+
|
| 248 |
+
function [31:0] popcount;
|
| 249 |
+
input [NUM_CORES-1:0] bits;
|
| 250 |
+
integer k;
|
| 251 |
+
begin
|
| 252 |
+
popcount = 0;
|
| 253 |
+
for (k = 0; k < NUM_CORES; k = k + 1)
|
| 254 |
+
popcount = popcount + bits[k];
|
| 255 |
+
end
|
| 256 |
+
endfunction
|
| 257 |
+
|
| 258 |
+
wire [NUM_CORES-1:0] rtr_idle;
|
| 259 |
+
wire [NUM_CORES-1:0] rtr_local_out_valid;
|
| 260 |
+
wire [NUM_CORES*PACKET_W-1:0] rtr_local_out_data;
|
| 261 |
+
wire [NUM_CORES-1:0] rtr_local_in_ready;
|
| 262 |
+
|
| 263 |
+
reg [NUM_CORES-1:0] rtr_local_in_valid;
|
| 264 |
+
reg [NUM_CORES*PACKET_W-1:0] rtr_local_in_data;
|
| 265 |
+
|
| 266 |
+
wire [NUM_CORES-1:0] rtr_local_out_ready =
|
| 267 |
+
(mesh_state == SM_PKT_DRAIN) ? {NUM_CORES{1'b1}} : {NUM_CORES{1'b0}};
|
| 268 |
+
|
| 269 |
+
wire [NUM_CORES-1:0] rtr_n_out_v, rtr_s_out_v, rtr_e_out_v, rtr_w_out_v;
|
| 270 |
+
wire [NUM_CORES*PACKET_W-1:0] rtr_n_out_d, rtr_s_out_d, rtr_e_out_d, rtr_w_out_d;
|
| 271 |
+
wire [NUM_CORES-1:0] rtr_n_in_r, rtr_s_in_r, rtr_e_in_r, rtr_w_in_r;
|
| 272 |
+
|
| 273 |
+
wire [NUM_CORES-1:0] rtr_b_idle;
|
| 274 |
+
wire [NUM_CORES-1:0] rtr_b_local_out_valid;
|
| 275 |
+
wire [NUM_CORES*PACKET_W-1:0] rtr_b_local_out_data;
|
| 276 |
+
wire [NUM_CORES-1:0] rtr_b_local_in_ready;
|
| 277 |
+
|
| 278 |
+
reg [NUM_CORES-1:0] rtr_b_local_in_valid;
|
| 279 |
+
reg [NUM_CORES*PACKET_W-1:0] rtr_b_local_in_data;
|
| 280 |
+
|
| 281 |
+
wire [NUM_CORES-1:0] rtr_b_local_out_ready =
|
| 282 |
+
(mesh_state == SM_PKT_DRAIN) ? ~rtr_local_out_valid : {NUM_CORES{1'b0}};
|
| 283 |
+
|
| 284 |
+
wire [NUM_CORES-1:0] rtr_b_n_out_v, rtr_b_s_out_v, rtr_b_e_out_v, rtr_b_w_out_v;
|
| 285 |
+
wire [NUM_CORES*PACKET_W-1:0] rtr_b_n_out_d, rtr_b_s_out_d, rtr_b_e_out_d, rtr_b_w_out_d;
|
| 286 |
+
wire [NUM_CORES-1:0] rtr_b_n_in_r, rtr_b_s_in_r, rtr_b_e_in_r, rtr_b_w_in_r;
|
| 287 |
+
|
| 288 |
+
genvar gi;
|
| 289 |
+
generate
|
| 290 |
+
for (gi = 0; gi < NUM_CORES; gi = gi + 1) begin : gen_core
|
| 291 |
+
|
| 292 |
+
wire this_ext_valid =
|
| 293 |
+
(mesh_state == SM_IDLE && ext_valid && ext_core == gi[CORE_ID_BITS-1:0]) ||
|
| 294 |
+
(mesh_state == SM_PKT_DRAIN && (rtr_local_out_valid[gi] || rtr_b_local_out_valid[gi]));
|
| 295 |
+
|
| 296 |
+
wire [PACKET_W-1:0] drain_pkt = rtr_local_out_valid[gi] ?
|
| 297 |
+
rtr_local_out_data[gi*PACKET_W +: PACKET_W] :
|
| 298 |
+
rtr_b_local_out_data[gi*PACKET_W +: PACKET_W];
|
| 299 |
+
wire [NEURON_BITS-1:0] this_ext_nid =
|
| 300 |
+
(mesh_state == SM_PKT_DRAIN) ? drain_pkt[DATA_WIDTH +: NEURON_BITS] : ext_neuron_id;
|
| 301 |
+
wire signed [DATA_WIDTH-1:0] this_ext_cur =
|
| 302 |
+
(mesh_state == SM_PKT_DRAIN) ? drain_pkt[DATA_WIDTH-1:0] : ext_current;
|
| 303 |
+
|
| 304 |
+
wire this_pool_we = prog_pool_we && (prog_pool_core == gi[CORE_ID_BITS-1:0]) &&
|
| 305 |
+
(mesh_state == SM_IDLE);
|
| 306 |
+
wire this_index_we = prog_index_we && (prog_index_core == gi[CORE_ID_BITS-1:0]) &&
|
| 307 |
+
(mesh_state == SM_IDLE);
|
| 308 |
+
wire this_param_we = prog_param_we && (prog_param_core == gi[CORE_ID_BITS-1:0]) &&
|
| 309 |
+
(mesh_state == SM_IDLE);
|
| 310 |
+
wire this_delay_we = prog_delay_we && (prog_delay_core == gi[CORE_ID_BITS-1:0]) &&
|
| 311 |
+
(mesh_state == SM_IDLE);
|
| 312 |
+
wire this_ucode_we = prog_ucode_we && (prog_ucode_core == gi[CORE_ID_BITS-1:0]) &&
|
| 313 |
+
(mesh_state == SM_IDLE);
|
| 314 |
+
|
| 315 |
+
scalable_core_v2 #(
|
| 316 |
+
.NUM_NEURONS(NUM_NEURONS), .NEURON_BITS(NEURON_BITS),
|
| 317 |
+
.DATA_WIDTH(DATA_WIDTH), .POOL_DEPTH(POOL_DEPTH),
|
| 318 |
+
.POOL_ADDR_BITS(POOL_ADDR_BITS), .COUNT_BITS(COUNT_BITS),
|
| 319 |
+
.REV_FANIN(REV_FANIN), .REV_SLOT_BITS(REV_SLOT_BITS),
|
| 320 |
+
.THRESHOLD(THRESHOLD), .LEAK_RATE(LEAK_RATE),
|
| 321 |
+
.REFRAC_CYCLES(REFRAC_CYCLES), .GRADE_SHIFT(GRADE_SHIFT)
|
| 322 |
+
) core (
|
| 323 |
+
.clk(clk), .rst_n(rst_n),
|
| 324 |
+
.start(core_start_r[gi]),
|
| 325 |
+
.learn_enable(learn_enable), .graded_enable(graded_enable),
|
| 326 |
+
.dendritic_enable(dendritic_enable),
|
| 327 |
+
.threefactor_enable(threefactor_enable),
|
| 328 |
+
.noise_enable(noise_enable), .skip_idle_enable(skip_idle_enable),
|
| 329 |
+
.scale_u_enable(scale_u_enable),
|
| 330 |
+
.reward_value(reward_value),
|
| 331 |
+
.ext_valid(this_ext_valid),
|
| 332 |
+
.ext_neuron_id(this_ext_nid),
|
| 333 |
+
.ext_current(this_ext_cur),
|
| 334 |
+
.pool_we(this_pool_we), .pool_addr_in(prog_pool_addr),
|
| 335 |
+
.pool_src_in(prog_pool_src), .pool_target_in(prog_pool_target),
|
| 336 |
+
.pool_weight_in(prog_pool_weight), .pool_comp_in(prog_pool_comp),
|
| 337 |
+
.index_we(this_index_we), .index_neuron_in(prog_index_neuron),
|
| 338 |
+
.index_base_in(prog_index_base), .index_count_in(prog_index_count),
|
| 339 |
+
.index_format_in(prog_index_format),
|
| 340 |
+
.delay_we(this_delay_we), .delay_addr_in(prog_delay_addr),
|
| 341 |
+
.delay_value_in(prog_delay_value),
|
| 342 |
+
.ucode_prog_we(this_ucode_we), .ucode_prog_addr(prog_ucode_addr),
|
| 343 |
+
.ucode_prog_data(prog_ucode_data),
|
| 344 |
+
.prog_param_we(this_param_we), .prog_param_neuron(prog_param_neuron),
|
| 345 |
+
.prog_param_id(prog_param_id), .prog_param_value(prog_param_value),
|
| 346 |
+
.probe_read(probe_read && (probe_core == gi[CORE_ID_BITS-1:0])),
|
| 347 |
+
.probe_neuron(probe_neuron), .probe_state_id(probe_state_id),
|
| 348 |
+
.probe_pool_addr(probe_pool_addr),
|
| 349 |
+
.probe_data(core_probe_data[gi*DATA_WIDTH +: DATA_WIDTH]),
|
| 350 |
+
.probe_valid(core_probe_valid[gi]),
|
| 351 |
+
.timestep_done(core_done[gi]),
|
| 352 |
+
.spike_out_valid(core_spike_valid[gi]),
|
| 353 |
+
.spike_out_id(core_spike_id[gi*NEURON_BITS +: NEURON_BITS]),
|
| 354 |
+
.spike_out_payload(core_spike_payload[gi*8 +: 8]),
|
| 355 |
+
.state_out(), .total_spikes(), .timestep_count(),
|
| 356 |
+
.core_idle(core_idle_bus[gi])
|
| 357 |
+
);
|
| 358 |
+
|
| 359 |
+
spike_fifo #(.ID_WIDTH(CAP_WIDTH), .DEPTH(64), .PTR_BITS(6)) capture_fifo (
|
| 360 |
+
.clk(clk), .rst_n(rst_n), .clear(cap_clear[gi]),
|
| 361 |
+
.push(core_spike_valid[gi] && (mesh_state == SM_RUN_WAIT)),
|
| 362 |
+
.push_data({core_spike_id[gi*NEURON_BITS +: NEURON_BITS],
|
| 363 |
+
core_spike_payload[gi*8 +: 8]}),
|
| 364 |
+
.pop(cap_pop[gi]),
|
| 365 |
+
.pop_data(cap_data[gi*CAP_WIDTH +: CAP_WIDTH]),
|
| 366 |
+
.empty(cap_empty[gi]), .full(), .count()
|
| 367 |
+
);
|
| 368 |
+
|
| 369 |
+
localparam RX = gi % MESH_X;
|
| 370 |
+
localparam RY = gi / MESH_X;
|
| 371 |
+
localparam HAS_N = (RY < MESH_Y - 1) ? 1 : 0;
|
| 372 |
+
localparam HAS_S = (RY > 0) ? 1 : 0;
|
| 373 |
+
localparam HAS_E = (RX < MESH_X - 1) ? 1 : 0;
|
| 374 |
+
localparam HAS_W = (RX > 0) ? 1 : 0;
|
| 375 |
+
localparam N_ID = HAS_N ? ((RY+1)*MESH_X + RX) : 0;
|
| 376 |
+
localparam S_ID = HAS_S ? ((RY-1)*MESH_X + RX) : 0;
|
| 377 |
+
localparam E_ID = HAS_E ? (RY*MESH_X + (RX+1)) : 0;
|
| 378 |
+
localparam W_ID = HAS_W ? (RY*MESH_X + (RX-1)) : 0;
|
| 379 |
+
|
| 380 |
+
wire n_in_v = HAS_N ? rtr_s_out_v[N_ID] : 1'b0;
|
| 381 |
+
wire [PACKET_W-1:0] n_in_d = HAS_N ? rtr_s_out_d[N_ID*PACKET_W +: PACKET_W] : {PACKET_W{1'b0}};
|
| 382 |
+
wire n_out_r = HAS_N ? rtr_s_in_r[N_ID] : 1'b1;
|
| 383 |
+
|
| 384 |
+
wire s_in_v = HAS_S ? rtr_n_out_v[S_ID] : 1'b0;
|
| 385 |
+
wire [PACKET_W-1:0] s_in_d = HAS_S ? rtr_n_out_d[S_ID*PACKET_W +: PACKET_W] : {PACKET_W{1'b0}};
|
| 386 |
+
wire s_out_r = HAS_S ? rtr_n_in_r[S_ID] : 1'b1;
|
| 387 |
+
|
| 388 |
+
wire e_in_v = HAS_E ? rtr_w_out_v[E_ID] : 1'b0;
|
| 389 |
+
wire [PACKET_W-1:0] e_in_d = HAS_E ? rtr_w_out_d[E_ID*PACKET_W +: PACKET_W] : {PACKET_W{1'b0}};
|
| 390 |
+
wire e_out_r = HAS_E ? rtr_w_in_r[E_ID] : 1'b1;
|
| 391 |
+
|
| 392 |
+
wire w_in_v = HAS_W ? rtr_e_out_v[W_ID] : 1'b0;
|
| 393 |
+
wire [PACKET_W-1:0] w_in_d = HAS_W ? rtr_e_out_d[W_ID*PACKET_W +: PACKET_W] : {PACKET_W{1'b0}};
|
| 394 |
+
wire w_out_r = HAS_W ? rtr_e_in_r[W_ID] : 1'b1;
|
| 395 |
+
|
| 396 |
+
async_router #(
|
| 397 |
+
.PACKET_W(PACKET_W), .COORD_BITS(COORD_BITS),
|
| 398 |
+
.FIFO_DEPTH(16), .FIFO_PTR_BITS(4)
|
| 399 |
+
) router (
|
| 400 |
+
.clk(clk), .rst_n(rst_n),
|
| 401 |
+
.my_x(core_to_x(gi[CORE_ID_BITS-1:0])),
|
| 402 |
+
.my_y(core_to_y(gi[CORE_ID_BITS-1:0])),
|
| 403 |
+
.local_in_valid (rtr_local_in_valid[gi]),
|
| 404 |
+
.local_in_ready (rtr_local_in_ready[gi]),
|
| 405 |
+
.local_in_data (rtr_local_in_data[gi*PACKET_W +: PACKET_W]),
|
| 406 |
+
.local_out_valid(rtr_local_out_valid[gi]),
|
| 407 |
+
.local_out_ready(rtr_local_out_ready[gi]),
|
| 408 |
+
.local_out_data (rtr_local_out_data[gi*PACKET_W +: PACKET_W]),
|
| 409 |
+
.north_in_valid (n_in_v),
|
| 410 |
+
.north_in_ready (rtr_n_in_r[gi]),
|
| 411 |
+
.north_in_data (n_in_d),
|
| 412 |
+
.north_out_valid(rtr_n_out_v[gi]),
|
| 413 |
+
.north_out_ready(n_out_r),
|
| 414 |
+
.north_out_data (rtr_n_out_d[gi*PACKET_W +: PACKET_W]),
|
| 415 |
+
.south_in_valid (s_in_v),
|
| 416 |
+
.south_in_ready (rtr_s_in_r[gi]),
|
| 417 |
+
.south_in_data (s_in_d),
|
| 418 |
+
.south_out_valid(rtr_s_out_v[gi]),
|
| 419 |
+
.south_out_ready(s_out_r),
|
| 420 |
+
.south_out_data (rtr_s_out_d[gi*PACKET_W +: PACKET_W]),
|
| 421 |
+
.east_in_valid (e_in_v),
|
| 422 |
+
.east_in_ready (rtr_e_in_r[gi]),
|
| 423 |
+
.east_in_data (e_in_d),
|
| 424 |
+
.east_out_valid (rtr_e_out_v[gi]),
|
| 425 |
+
.east_out_ready (e_out_r),
|
| 426 |
+
.east_out_data (rtr_e_out_d[gi*PACKET_W +: PACKET_W]),
|
| 427 |
+
.west_in_valid (w_in_v),
|
| 428 |
+
.west_in_ready (rtr_w_in_r[gi]),
|
| 429 |
+
.west_in_data (w_in_d),
|
| 430 |
+
.west_out_valid (rtr_w_out_v[gi]),
|
| 431 |
+
.west_out_ready (w_out_r),
|
| 432 |
+
.west_out_data (rtr_w_out_d[gi*PACKET_W +: PACKET_W]),
|
| 433 |
+
.idle (rtr_idle[gi])
|
| 434 |
+
);
|
| 435 |
+
end
|
| 436 |
+
endgenerate
|
| 437 |
+
|
| 438 |
+
generate if (DUAL_NOC) begin : gen_net_b
|
| 439 |
+
genvar bi;
|
| 440 |
+
for (bi = 0; bi < NUM_CORES; bi = bi + 1) begin : gen_rtr_b
|
| 441 |
+
localparam BRX = bi % MESH_X;
|
| 442 |
+
localparam BRY = bi / MESH_X;
|
| 443 |
+
localparam B_HAS_N = (BRY < MESH_Y - 1) ? 1 : 0;
|
| 444 |
+
localparam B_HAS_S = (BRY > 0) ? 1 : 0;
|
| 445 |
+
localparam B_HAS_E = (BRX < MESH_X - 1) ? 1 : 0;
|
| 446 |
+
localparam B_HAS_W = (BRX > 0) ? 1 : 0;
|
| 447 |
+
localparam BN_ID = B_HAS_N ? ((BRY+1)*MESH_X + BRX) : 0;
|
| 448 |
+
localparam BS_ID = B_HAS_S ? ((BRY-1)*MESH_X + BRX) : 0;
|
| 449 |
+
localparam BE_ID = B_HAS_E ? (BRY*MESH_X + (BRX+1)) : 0;
|
| 450 |
+
localparam BW_ID = B_HAS_W ? (BRY*MESH_X + (BRX-1)) : 0;
|
| 451 |
+
|
| 452 |
+
wire bn_in_v = B_HAS_N ? rtr_b_s_out_v[BN_ID] : 1'b0;
|
| 453 |
+
wire [PACKET_W-1:0] bn_in_d = B_HAS_N ?
|
| 454 |
+
rtr_b_s_out_d[BN_ID*PACKET_W +: PACKET_W] : {PACKET_W{1'b0}};
|
| 455 |
+
wire bn_out_r = B_HAS_N ? rtr_b_s_in_r[BN_ID] : 1'b1;
|
| 456 |
+
|
| 457 |
+
wire bs_in_v = B_HAS_S ? rtr_b_n_out_v[BS_ID] : 1'b0;
|
| 458 |
+
wire [PACKET_W-1:0] bs_in_d = B_HAS_S ?
|
| 459 |
+
rtr_b_n_out_d[BS_ID*PACKET_W +: PACKET_W] : {PACKET_W{1'b0}};
|
| 460 |
+
wire bs_out_r = B_HAS_S ? rtr_b_n_in_r[BS_ID] : 1'b1;
|
| 461 |
+
|
| 462 |
+
wire be_in_v = B_HAS_E ? rtr_b_w_out_v[BE_ID] : 1'b0;
|
| 463 |
+
wire [PACKET_W-1:0] be_in_d = B_HAS_E ?
|
| 464 |
+
rtr_b_w_out_d[BE_ID*PACKET_W +: PACKET_W] : {PACKET_W{1'b0}};
|
| 465 |
+
wire be_out_r = B_HAS_E ? rtr_b_w_in_r[BE_ID] : 1'b1;
|
| 466 |
+
|
| 467 |
+
wire bw_in_v = B_HAS_W ? rtr_b_e_out_v[BW_ID] : 1'b0;
|
| 468 |
+
wire [PACKET_W-1:0] bw_in_d = B_HAS_W ?
|
| 469 |
+
rtr_b_e_out_d[BW_ID*PACKET_W +: PACKET_W] : {PACKET_W{1'b0}};
|
| 470 |
+
wire bw_out_r = B_HAS_W ? rtr_b_e_in_r[BW_ID] : 1'b1;
|
| 471 |
+
|
| 472 |
+
async_router #(
|
| 473 |
+
.PACKET_W(PACKET_W), .COORD_BITS(COORD_BITS),
|
| 474 |
+
.FIFO_DEPTH(16), .FIFO_PTR_BITS(4)
|
| 475 |
+
) router_b (
|
| 476 |
+
.clk(clk), .rst_n(rst_n),
|
| 477 |
+
.my_x(core_to_x(bi[CORE_ID_BITS-1:0])),
|
| 478 |
+
.my_y(core_to_y(bi[CORE_ID_BITS-1:0])),
|
| 479 |
+
.local_in_valid (rtr_b_local_in_valid[bi]),
|
| 480 |
+
.local_in_ready (rtr_b_local_in_ready[bi]),
|
| 481 |
+
.local_in_data (rtr_b_local_in_data[bi*PACKET_W +: PACKET_W]),
|
| 482 |
+
.local_out_valid(rtr_b_local_out_valid[bi]),
|
| 483 |
+
.local_out_ready(rtr_b_local_out_ready[bi]),
|
| 484 |
+
.local_out_data (rtr_b_local_out_data[bi*PACKET_W +: PACKET_W]),
|
| 485 |
+
.north_in_valid (bn_in_v),
|
| 486 |
+
.north_in_ready (rtr_b_n_in_r[bi]),
|
| 487 |
+
.north_in_data (bn_in_d),
|
| 488 |
+
.north_out_valid(rtr_b_n_out_v[bi]),
|
| 489 |
+
.north_out_ready(bn_out_r),
|
| 490 |
+
.north_out_data (rtr_b_n_out_d[bi*PACKET_W +: PACKET_W]),
|
| 491 |
+
.south_in_valid (bs_in_v),
|
| 492 |
+
.south_in_ready (rtr_b_s_in_r[bi]),
|
| 493 |
+
.south_in_data (bs_in_d),
|
| 494 |
+
.south_out_valid(rtr_b_s_out_v[bi]),
|
| 495 |
+
.south_out_ready(bs_out_r),
|
| 496 |
+
.south_out_data (rtr_b_s_out_d[bi*PACKET_W +: PACKET_W]),
|
| 497 |
+
.east_in_valid (be_in_v),
|
| 498 |
+
.east_in_ready (rtr_b_e_in_r[bi]),
|
| 499 |
+
.east_in_data (be_in_d),
|
| 500 |
+
.east_out_valid (rtr_b_e_out_v[bi]),
|
| 501 |
+
.east_out_ready (be_out_r),
|
| 502 |
+
.east_out_data (rtr_b_e_out_d[bi*PACKET_W +: PACKET_W]),
|
| 503 |
+
.west_in_valid (bw_in_v),
|
| 504 |
+
.west_in_ready (rtr_b_w_in_r[bi]),
|
| 505 |
+
.west_in_data (bw_in_d),
|
| 506 |
+
.west_out_valid (rtr_b_w_out_v[bi]),
|
| 507 |
+
.west_out_ready (bw_out_r),
|
| 508 |
+
.west_out_data (rtr_b_w_out_d[bi*PACKET_W +: PACKET_W]),
|
| 509 |
+
.idle (rtr_b_idle[bi])
|
| 510 |
+
);
|
| 511 |
+
end
|
| 512 |
+
end else begin : gen_no_net_b
|
| 513 |
+
assign rtr_b_idle = {NUM_CORES{1'b1}};
|
| 514 |
+
assign rtr_b_local_out_valid = {NUM_CORES{1'b0}};
|
| 515 |
+
assign rtr_b_local_out_data = {NUM_CORES*PACKET_W{1'b0}};
|
| 516 |
+
assign rtr_b_local_in_ready = {NUM_CORES{1'b1}};
|
| 517 |
+
end endgenerate
|
| 518 |
+
|
| 519 |
+
reg [CORE_ID_BITS-1:0] route_core_idx;
|
| 520 |
+
reg [NEURON_BITS-1:0] route_neuron;
|
| 521 |
+
reg [7:0] route_payload;
|
| 522 |
+
reg [ROUTE_SLOT_BITS-1:0] route_slot;
|
| 523 |
+
reg [GLOBAL_ROUTE_SLOT_BITS-1:0] global_slot;
|
| 524 |
+
reg [3:0] drain_wait;
|
| 525 |
+
|
| 526 |
+
wire signed [31:0] route_weight_ext = rt_weight;
|
| 527 |
+
wire signed [31:0] route_payload_ext = {24'd0, route_payload};
|
| 528 |
+
wire signed [31:0] route_graded_product = route_weight_ext * route_payload_ext;
|
| 529 |
+
wire signed [DATA_WIDTH-1:0] route_graded_current = route_graded_product >>> GRADE_SHIFT;
|
| 530 |
+
|
| 531 |
+
wire signed [31:0] grt_weight_ext = grt_weight;
|
| 532 |
+
wire signed [31:0] grt_graded_product = grt_weight_ext * route_payload_ext;
|
| 533 |
+
wire signed [DATA_WIDTH-1:0] grt_graded_current = grt_graded_product >>> GRADE_SHIFT;
|
| 534 |
+
|
| 535 |
+
wire signed [DATA_WIDTH-1:0] rt_eff_weight = graded_enable ? route_graded_current : rt_weight;
|
| 536 |
+
wire signed [DATA_WIDTH-1:0] grt_eff_weight = graded_enable ? grt_graded_current : grt_weight;
|
| 537 |
+
|
| 538 |
+
always @(posedge clk or negedge rst_n) begin
|
| 539 |
+
if (!rst_n) begin
|
| 540 |
+
mesh_state <= SM_IDLE;
|
| 541 |
+
timestep_done <= 0;
|
| 542 |
+
total_spikes <= 0;
|
| 543 |
+
timestep_count <= 0;
|
| 544 |
+
core_start_r <= 0;
|
| 545 |
+
route_core_idx <= 0;
|
| 546 |
+
route_neuron <= 0;
|
| 547 |
+
route_payload <= 0;
|
| 548 |
+
route_slot <= 0;
|
| 549 |
+
global_slot <= 0;
|
| 550 |
+
drain_wait <= 0;
|
| 551 |
+
rt_we <= 0;
|
| 552 |
+
rt_addr <= 0;
|
| 553 |
+
grt_we <= 0;
|
| 554 |
+
grt_addr <= 0;
|
| 555 |
+
cap_pop <= 0;
|
| 556 |
+
cap_clear <= 0;
|
| 557 |
+
rtr_local_in_valid <= 0;
|
| 558 |
+
rtr_local_in_data <= 0;
|
| 559 |
+
rtr_b_local_in_valid <= 0;
|
| 560 |
+
rtr_b_local_in_data <= 0;
|
| 561 |
+
end else begin
|
| 562 |
+
timestep_done <= 0;
|
| 563 |
+
core_start_r <= 0;
|
| 564 |
+
rt_we <= 0;
|
| 565 |
+
grt_we <= 0;
|
| 566 |
+
cap_pop <= 0;
|
| 567 |
+
cap_clear <= 0;
|
| 568 |
+
rtr_local_in_valid <= 0;
|
| 569 |
+
rtr_b_local_in_valid <= 0;
|
| 570 |
+
|
| 571 |
+
total_spikes <= total_spikes + popcount(core_spike_valid);
|
| 572 |
+
|
| 573 |
+
case (mesh_state)
|
| 574 |
+
SM_IDLE: begin
|
| 575 |
+
if (start) begin
|
| 576 |
+
drain_wait <= 0;
|
| 577 |
+
mesh_state <= SM_PKT_DRAIN;
|
| 578 |
+
end
|
| 579 |
+
end
|
| 580 |
+
|
| 581 |
+
SM_PKT_DRAIN: begin
|
| 582 |
+
if ((&rtr_idle) && (&rtr_b_idle) && !(|rtr_local_out_valid) && !(|rtr_b_local_out_valid)) begin
|
| 583 |
+
drain_wait <= drain_wait + 1;
|
| 584 |
+
if (drain_wait >= 4'd3)
|
| 585 |
+
mesh_state <= SM_START;
|
| 586 |
+
end else begin
|
| 587 |
+
drain_wait <= 0;
|
| 588 |
+
end
|
| 589 |
+
end
|
| 590 |
+
|
| 591 |
+
SM_START: begin
|
| 592 |
+
core_start_r <= {NUM_CORES{1'b1}};
|
| 593 |
+
mesh_state <= SM_RUN_WAIT;
|
| 594 |
+
end
|
| 595 |
+
|
| 596 |
+
SM_RUN_WAIT: begin
|
| 597 |
+
if (sync_all_done) begin
|
| 598 |
+
route_core_idx <= 0;
|
| 599 |
+
mesh_state <= SM_ROUTE_POP;
|
| 600 |
+
end
|
| 601 |
+
end
|
| 602 |
+
|
| 603 |
+
SM_ROUTE_POP: begin
|
| 604 |
+
if (cap_empty[route_core_idx]) begin
|
| 605 |
+
if (route_core_idx == NUM_CORES - 1)
|
| 606 |
+
mesh_state <= SM_DONE;
|
| 607 |
+
else
|
| 608 |
+
route_core_idx <= route_core_idx + 1;
|
| 609 |
+
end else begin
|
| 610 |
+
cap_pop[route_core_idx] <= 1;
|
| 611 |
+
route_neuron <= cap_data[route_core_idx * CAP_WIDTH + 8 +: NEURON_BITS];
|
| 612 |
+
route_payload <= cap_data[route_core_idx * CAP_WIDTH +: 8];
|
| 613 |
+
route_slot <= 0;
|
| 614 |
+
mesh_state <= SM_ROUTE_ADDR;
|
| 615 |
+
end
|
| 616 |
+
end
|
| 617 |
+
|
| 618 |
+
SM_ROUTE_ADDR: begin
|
| 619 |
+
rt_addr <= {route_core_idx, route_neuron, route_slot};
|
| 620 |
+
mesh_state <= SM_ROUTE_WAIT;
|
| 621 |
+
end
|
| 622 |
+
|
| 623 |
+
SM_ROUTE_WAIT: begin
|
| 624 |
+
mesh_state <= SM_ROUTE_READ;
|
| 625 |
+
end
|
| 626 |
+
|
| 627 |
+
SM_ROUTE_READ: begin
|
| 628 |
+
if (rt_valid) begin
|
| 629 |
+
if (route_core_idx[0] == 1'b0 || !DUAL_NOC) begin
|
| 630 |
+
if (rtr_local_in_ready[route_core_idx]) begin
|
| 631 |
+
rtr_local_in_valid[route_core_idx] <= 1;
|
| 632 |
+
rtr_local_in_data[route_core_idx*PACKET_W +: PACKET_W] <=
|
| 633 |
+
{core_to_x(rt_dest_core), core_to_y(rt_dest_core),
|
| 634 |
+
rt_dest_nrn, rt_eff_weight};
|
| 635 |
+
end
|
| 636 |
+
end else begin
|
| 637 |
+
if (rtr_b_local_in_ready[route_core_idx]) begin
|
| 638 |
+
rtr_b_local_in_valid[route_core_idx] <= 1;
|
| 639 |
+
rtr_b_local_in_data[route_core_idx*PACKET_W +: PACKET_W] <=
|
| 640 |
+
{core_to_x(rt_dest_core), core_to_y(rt_dest_core),
|
| 641 |
+
rt_dest_nrn, rt_eff_weight};
|
| 642 |
+
end
|
| 643 |
+
end
|
| 644 |
+
end
|
| 645 |
+
if (route_slot < ROUTE_FANOUT - 1) begin
|
| 646 |
+
route_slot <= route_slot + 1;
|
| 647 |
+
mesh_state <= SM_ROUTE_ADDR;
|
| 648 |
+
end else begin
|
| 649 |
+
global_slot <= 0;
|
| 650 |
+
mesh_state <= SM_GRT_ADDR;
|
| 651 |
+
end
|
| 652 |
+
end
|
| 653 |
+
|
| 654 |
+
SM_GRT_ADDR: begin
|
| 655 |
+
grt_addr <= {route_core_idx, route_neuron, global_slot};
|
| 656 |
+
mesh_state <= SM_GRT_WAIT;
|
| 657 |
+
end
|
| 658 |
+
|
| 659 |
+
SM_GRT_WAIT: begin
|
| 660 |
+
mesh_state <= SM_GRT_READ;
|
| 661 |
+
end
|
| 662 |
+
|
| 663 |
+
SM_GRT_READ: begin
|
| 664 |
+
if (grt_valid) begin
|
| 665 |
+
if (route_core_idx[0] == 1'b0 || !DUAL_NOC) begin
|
| 666 |
+
if (rtr_local_in_ready[route_core_idx]) begin
|
| 667 |
+
rtr_local_in_valid[route_core_idx] <= 1;
|
| 668 |
+
rtr_local_in_data[route_core_idx*PACKET_W +: PACKET_W] <=
|
| 669 |
+
{core_to_x(grt_dest_core), core_to_y(grt_dest_core),
|
| 670 |
+
grt_dest_nrn, grt_eff_weight};
|
| 671 |
+
end
|
| 672 |
+
end else begin
|
| 673 |
+
if (rtr_b_local_in_ready[route_core_idx]) begin
|
| 674 |
+
rtr_b_local_in_valid[route_core_idx] <= 1;
|
| 675 |
+
rtr_b_local_in_data[route_core_idx*PACKET_W +: PACKET_W] <=
|
| 676 |
+
{core_to_x(grt_dest_core), core_to_y(grt_dest_core),
|
| 677 |
+
grt_dest_nrn, grt_eff_weight};
|
| 678 |
+
end
|
| 679 |
+
end
|
| 680 |
+
end
|
| 681 |
+
if (global_slot < GLOBAL_ROUTE_SLOTS - 1) begin
|
| 682 |
+
global_slot <= global_slot + 1;
|
| 683 |
+
mesh_state <= SM_GRT_ADDR;
|
| 684 |
+
end else begin
|
| 685 |
+
mesh_state <= SM_ROUTE_POP;
|
| 686 |
+
end
|
| 687 |
+
end
|
| 688 |
+
|
| 689 |
+
SM_DONE: begin
|
| 690 |
+
cap_clear <= {NUM_CORES{1'b1}};
|
| 691 |
+
timestep_done <= 1;
|
| 692 |
+
timestep_count <= timestep_count + 1;
|
| 693 |
+
mesh_state <= SM_IDLE;
|
| 694 |
+
end
|
| 695 |
+
|
| 696 |
+
default: mesh_state <= SM_IDLE;
|
| 697 |
+
endcase
|
| 698 |
+
end
|
| 699 |
+
end
|
| 700 |
+
|
| 701 |
+
endmodule
|
rtl/async_router.v
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ============================================================================
|
| 2 |
+
// Async Router
|
| 3 |
+
// ============================================================================
|
| 4 |
+
//
|
| 5 |
+
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 6 |
+
// Company No. 17054540 — UK Patent Application No. 2602902.6
|
| 7 |
+
//
|
| 8 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
| 9 |
+
// you may not use this file except in compliance with the License.
|
| 10 |
+
// You may obtain a copy of the License at
|
| 11 |
+
//
|
| 12 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
| 13 |
+
//
|
| 14 |
+
// Unless required by applicable law or agreed to in writing, software
|
| 15 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
| 16 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 17 |
+
// See the License for the specific language governing permissions and
|
| 18 |
+
// limitations under the License.
|
| 19 |
+
// ============================================================================
|
| 20 |
+
|
| 21 |
+
`timescale 1ns/1ps
|
| 22 |
+
|
| 23 |
+
module async_router #(
|
| 24 |
+
parameter PACKET_W = 34,
|
| 25 |
+
parameter COORD_BITS = 4,
|
| 26 |
+
parameter FIFO_DEPTH = 16,
|
| 27 |
+
parameter FIFO_PTR_BITS = 4
|
| 28 |
+
)(
|
| 29 |
+
input wire clk,
|
| 30 |
+
input wire rst_n,
|
| 31 |
+
input wire [COORD_BITS-1:0] my_x,
|
| 32 |
+
input wire [COORD_BITS-1:0] my_y,
|
| 33 |
+
|
| 34 |
+
input wire local_in_valid,
|
| 35 |
+
output wire local_in_ready,
|
| 36 |
+
input wire [PACKET_W-1:0] local_in_data,
|
| 37 |
+
output wire local_out_valid,
|
| 38 |
+
input wire local_out_ready,
|
| 39 |
+
output wire [PACKET_W-1:0] local_out_data,
|
| 40 |
+
|
| 41 |
+
input wire north_in_valid,
|
| 42 |
+
output wire north_in_ready,
|
| 43 |
+
input wire [PACKET_W-1:0] north_in_data,
|
| 44 |
+
output wire north_out_valid,
|
| 45 |
+
input wire north_out_ready,
|
| 46 |
+
output wire [PACKET_W-1:0] north_out_data,
|
| 47 |
+
|
| 48 |
+
input wire south_in_valid,
|
| 49 |
+
output wire south_in_ready,
|
| 50 |
+
input wire [PACKET_W-1:0] south_in_data,
|
| 51 |
+
output wire south_out_valid,
|
| 52 |
+
input wire south_out_ready,
|
| 53 |
+
output wire [PACKET_W-1:0] south_out_data,
|
| 54 |
+
|
| 55 |
+
input wire east_in_valid,
|
| 56 |
+
output wire east_in_ready,
|
| 57 |
+
input wire [PACKET_W-1:0] east_in_data,
|
| 58 |
+
output wire east_out_valid,
|
| 59 |
+
input wire east_out_ready,
|
| 60 |
+
output wire [PACKET_W-1:0] east_out_data,
|
| 61 |
+
|
| 62 |
+
input wire west_in_valid,
|
| 63 |
+
output wire west_in_ready,
|
| 64 |
+
input wire [PACKET_W-1:0] west_in_data,
|
| 65 |
+
output wire west_out_valid,
|
| 66 |
+
input wire west_out_ready,
|
| 67 |
+
output wire [PACKET_W-1:0] west_out_data,
|
| 68 |
+
|
| 69 |
+
output wire idle
|
| 70 |
+
);
|
| 71 |
+
|
| 72 |
+
localparam P_LOCAL = 0, P_NORTH = 1, P_SOUTH = 2, P_EAST = 3, P_WEST = 4;
|
| 73 |
+
|
| 74 |
+
localparam DX_MSB = PACKET_W - 1;
|
| 75 |
+
localparam DX_LSB = PACKET_W - COORD_BITS;
|
| 76 |
+
localparam DY_MSB = DX_LSB - 1;
|
| 77 |
+
localparam DY_LSB = DX_LSB - COORD_BITS;
|
| 78 |
+
|
| 79 |
+
wire [4:0] fifo_empty, fifo_full;
|
| 80 |
+
wire [PACKET_W-1:0] fifo_head [0:4];
|
| 81 |
+
wire [4:0] fifo_push;
|
| 82 |
+
reg [4:0] fifo_pop;
|
| 83 |
+
|
| 84 |
+
assign fifo_push[P_LOCAL] = local_in_valid && !fifo_full[P_LOCAL];
|
| 85 |
+
assign fifo_push[P_NORTH] = north_in_valid && !fifo_full[P_NORTH];
|
| 86 |
+
assign fifo_push[P_SOUTH] = south_in_valid && !fifo_full[P_SOUTH];
|
| 87 |
+
assign fifo_push[P_EAST] = east_in_valid && !fifo_full[P_EAST];
|
| 88 |
+
assign fifo_push[P_WEST] = west_in_valid && !fifo_full[P_WEST];
|
| 89 |
+
|
| 90 |
+
assign local_in_ready = !fifo_full[P_LOCAL];
|
| 91 |
+
assign north_in_ready = !fifo_full[P_NORTH];
|
| 92 |
+
assign south_in_ready = !fifo_full[P_SOUTH];
|
| 93 |
+
assign east_in_ready = !fifo_full[P_EAST];
|
| 94 |
+
assign west_in_ready = !fifo_full[P_WEST];
|
| 95 |
+
|
| 96 |
+
wire [PACKET_W-1:0] in_data [0:4];
|
| 97 |
+
assign in_data[P_LOCAL] = local_in_data;
|
| 98 |
+
assign in_data[P_NORTH] = north_in_data;
|
| 99 |
+
assign in_data[P_SOUTH] = south_in_data;
|
| 100 |
+
assign in_data[P_EAST] = east_in_data;
|
| 101 |
+
assign in_data[P_WEST] = west_in_data;
|
| 102 |
+
|
| 103 |
+
genvar gi;
|
| 104 |
+
generate
|
| 105 |
+
for (gi = 0; gi < 5; gi = gi + 1) begin : gen_fifo
|
| 106 |
+
spike_fifo #(
|
| 107 |
+
.ID_WIDTH (PACKET_W),
|
| 108 |
+
.DEPTH (FIFO_DEPTH),
|
| 109 |
+
.PTR_BITS (FIFO_PTR_BITS)
|
| 110 |
+
) input_fifo (
|
| 111 |
+
.clk (clk),
|
| 112 |
+
.rst_n (rst_n),
|
| 113 |
+
.push (fifo_push[gi]),
|
| 114 |
+
.pop (fifo_pop[gi]),
|
| 115 |
+
.clear (1'b0),
|
| 116 |
+
.push_data (in_data[gi]),
|
| 117 |
+
.pop_data (fifo_head[gi]),
|
| 118 |
+
.empty (fifo_empty[gi]),
|
| 119 |
+
.full (fifo_full[gi])
|
| 120 |
+
);
|
| 121 |
+
end
|
| 122 |
+
endgenerate
|
| 123 |
+
|
| 124 |
+
function [2:0] xy_route;
|
| 125 |
+
input [COORD_BITS-1:0] dx, dy, cx, cy;
|
| 126 |
+
begin
|
| 127 |
+
if (dx > cx) xy_route = P_EAST;
|
| 128 |
+
else if (dx < cx) xy_route = P_WEST;
|
| 129 |
+
else if (dy > cy) xy_route = P_NORTH;
|
| 130 |
+
else if (dy < cy) xy_route = P_SOUTH;
|
| 131 |
+
else xy_route = P_LOCAL;
|
| 132 |
+
end
|
| 133 |
+
endfunction
|
| 134 |
+
|
| 135 |
+
wire [2:0] head_route [0:4];
|
| 136 |
+
generate
|
| 137 |
+
for (gi = 0; gi < 5; gi = gi + 1) begin : gen_route
|
| 138 |
+
assign head_route[gi] = xy_route(
|
| 139 |
+
fifo_head[gi][DX_MSB:DX_LSB],
|
| 140 |
+
fifo_head[gi][DY_MSB:DY_LSB],
|
| 141 |
+
my_x, my_y
|
| 142 |
+
);
|
| 143 |
+
end
|
| 144 |
+
endgenerate
|
| 145 |
+
|
| 146 |
+
reg [4:0] out_valid_r;
|
| 147 |
+
reg [PACKET_W-1:0] out_data_r [0:4];
|
| 148 |
+
|
| 149 |
+
wire [4:0] out_ready;
|
| 150 |
+
assign out_ready[P_LOCAL] = local_out_ready;
|
| 151 |
+
assign out_ready[P_NORTH] = north_out_ready;
|
| 152 |
+
assign out_ready[P_SOUTH] = south_out_ready;
|
| 153 |
+
assign out_ready[P_EAST] = east_out_ready;
|
| 154 |
+
assign out_ready[P_WEST] = west_out_ready;
|
| 155 |
+
|
| 156 |
+
assign local_out_valid = out_valid_r[P_LOCAL];
|
| 157 |
+
assign local_out_data = out_data_r[P_LOCAL];
|
| 158 |
+
assign north_out_valid = out_valid_r[P_NORTH];
|
| 159 |
+
assign north_out_data = out_data_r[P_NORTH];
|
| 160 |
+
assign south_out_valid = out_valid_r[P_SOUTH];
|
| 161 |
+
assign south_out_data = out_data_r[P_SOUTH];
|
| 162 |
+
assign east_out_valid = out_valid_r[P_EAST];
|
| 163 |
+
assign east_out_data = out_data_r[P_EAST];
|
| 164 |
+
assign west_out_valid = out_valid_r[P_WEST];
|
| 165 |
+
assign west_out_data = out_data_r[P_WEST];
|
| 166 |
+
|
| 167 |
+
reg [2:0] arb_ptr;
|
| 168 |
+
|
| 169 |
+
reg [4:0] comb_grant;
|
| 170 |
+
reg [4:0] comb_out_claim;
|
| 171 |
+
|
| 172 |
+
always @(*) begin : grant_logic
|
| 173 |
+
integer p, idx;
|
| 174 |
+
comb_grant = 5'b0;
|
| 175 |
+
comb_out_claim = 5'b0;
|
| 176 |
+
for (p = 0; p < 5; p = p + 1) begin
|
| 177 |
+
idx = arb_ptr + p;
|
| 178 |
+
if (idx >= 5) idx = idx - 5;
|
| 179 |
+
if (!fifo_empty[idx] && !comb_grant[idx]) begin
|
| 180 |
+
if (!out_valid_r[head_route[idx]] && !comb_out_claim[head_route[idx]]) begin
|
| 181 |
+
comb_grant[idx] = 1'b1;
|
| 182 |
+
comb_out_claim[head_route[idx]] = 1'b1;
|
| 183 |
+
end
|
| 184 |
+
end
|
| 185 |
+
end
|
| 186 |
+
end
|
| 187 |
+
|
| 188 |
+
always @(posedge clk or negedge rst_n) begin : seq_logic
|
| 189 |
+
integer i;
|
| 190 |
+
if (!rst_n) begin
|
| 191 |
+
out_valid_r <= 5'b0;
|
| 192 |
+
arb_ptr <= 3'd0;
|
| 193 |
+
for (i = 0; i < 5; i = i + 1)
|
| 194 |
+
out_data_r[i] <= {PACKET_W{1'b0}};
|
| 195 |
+
end else begin
|
| 196 |
+
for (i = 0; i < 5; i = i + 1)
|
| 197 |
+
if (out_valid_r[i] && out_ready[i])
|
| 198 |
+
out_valid_r[i] <= 1'b0;
|
| 199 |
+
|
| 200 |
+
for (i = 0; i < 5; i = i + 1) begin
|
| 201 |
+
if (comb_grant[i]) begin
|
| 202 |
+
out_valid_r[head_route[i]] <= 1'b1;
|
| 203 |
+
out_data_r[head_route[i]] <= fifo_head[i];
|
| 204 |
+
end
|
| 205 |
+
end
|
| 206 |
+
|
| 207 |
+
arb_ptr <= (arb_ptr == 3'd4) ? 3'd0 : arb_ptr + 3'd1;
|
| 208 |
+
end
|
| 209 |
+
end
|
| 210 |
+
|
| 211 |
+
always @(*) fifo_pop = comb_grant;
|
| 212 |
+
|
| 213 |
+
assign idle = (&fifo_empty) &&
|
| 214 |
+
!out_valid_r[P_NORTH] && !out_valid_r[P_SOUTH] &&
|
| 215 |
+
!out_valid_r[P_EAST] && !out_valid_r[P_WEST];
|
| 216 |
+
|
| 217 |
+
endmodule
|
rtl/axi_uart_bridge.v
ADDED
|
@@ -0,0 +1,258 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ============================================================================
|
| 2 |
+
// AXI-UART Bridge
|
| 3 |
+
// ============================================================================
|
| 4 |
+
//
|
| 5 |
+
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 6 |
+
// Company No. 17054540 — UK Patent Application No. 2602902.6
|
| 7 |
+
//
|
| 8 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
| 9 |
+
// you may not use this file except in compliance with the License.
|
| 10 |
+
// You may obtain a copy of the License at
|
| 11 |
+
//
|
| 12 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
| 13 |
+
//
|
| 14 |
+
// Unless required by applicable law or agreed to in writing, software
|
| 15 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
| 16 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 17 |
+
// See the License for the specific language governing permissions and
|
| 18 |
+
// limitations under the License.
|
| 19 |
+
// ============================================================================
|
| 20 |
+
|
| 21 |
+
module axi_uart_bridge #(
|
| 22 |
+
parameter VERSION_ID = 32'hF2_02_03_10,
|
| 23 |
+
parameter NUM_CORES = 16
|
| 24 |
+
)(
|
| 25 |
+
input wire clk,
|
| 26 |
+
input wire rst_n,
|
| 27 |
+
input wire clk_neuro,
|
| 28 |
+
input wire rst_neuro_n,
|
| 29 |
+
|
| 30 |
+
input wire [31:0] s_axi_awaddr,
|
| 31 |
+
input wire s_axi_awvalid,
|
| 32 |
+
output reg s_axi_awready,
|
| 33 |
+
input wire [31:0] s_axi_wdata,
|
| 34 |
+
input wire [3:0] s_axi_wstrb,
|
| 35 |
+
input wire s_axi_wvalid,
|
| 36 |
+
output reg s_axi_wready,
|
| 37 |
+
output reg [1:0] s_axi_bresp,
|
| 38 |
+
output reg s_axi_bvalid,
|
| 39 |
+
input wire s_axi_bready,
|
| 40 |
+
input wire [31:0] s_axi_araddr,
|
| 41 |
+
input wire s_axi_arvalid,
|
| 42 |
+
output reg s_axi_arready,
|
| 43 |
+
output reg [31:0] s_axi_rdata,
|
| 44 |
+
output reg [1:0] s_axi_rresp,
|
| 45 |
+
output reg s_axi_rvalid,
|
| 46 |
+
input wire s_axi_rready,
|
| 47 |
+
|
| 48 |
+
output reg [7:0] hi_rx_data,
|
| 49 |
+
output reg hi_rx_valid,
|
| 50 |
+
input wire [7:0] hi_tx_data,
|
| 51 |
+
input wire hi_tx_valid,
|
| 52 |
+
output wire hi_tx_ready
|
| 53 |
+
);
|
| 54 |
+
|
| 55 |
+
localparam REG_TX_DATA = 3'd0;
|
| 56 |
+
localparam REG_TX_STATUS = 3'd1;
|
| 57 |
+
localparam REG_RX_DATA = 3'd2;
|
| 58 |
+
localparam REG_RX_STATUS = 3'd3;
|
| 59 |
+
localparam REG_CONTROL = 3'd4;
|
| 60 |
+
localparam REG_VERSION = 3'd5;
|
| 61 |
+
localparam REG_SCRATCH = 3'd6;
|
| 62 |
+
localparam REG_CORE_COUNT = 3'd7;
|
| 63 |
+
|
| 64 |
+
wire tx_wr_full;
|
| 65 |
+
wire tx_rd_empty;
|
| 66 |
+
wire [7:0] tx_rd_data;
|
| 67 |
+
reg tx_rd_en;
|
| 68 |
+
reg tx_wr_en;
|
| 69 |
+
reg [7:0] tx_wr_data;
|
| 70 |
+
|
| 71 |
+
async_fifo #(.DATA_WIDTH(8), .ADDR_BITS(5)) u_tx_fifo (
|
| 72 |
+
.wr_clk (clk),
|
| 73 |
+
.wr_rst_n (rst_n),
|
| 74 |
+
.wr_data (tx_wr_data),
|
| 75 |
+
.wr_en (tx_wr_en),
|
| 76 |
+
.wr_full (tx_wr_full),
|
| 77 |
+
.rd_clk (clk_neuro),
|
| 78 |
+
.rd_rst_n (rst_neuro_n),
|
| 79 |
+
.rd_en (tx_rd_en),
|
| 80 |
+
.rd_data (tx_rd_data),
|
| 81 |
+
.rd_empty (tx_rd_empty)
|
| 82 |
+
);
|
| 83 |
+
|
| 84 |
+
wire rx_wr_full;
|
| 85 |
+
wire rx_rd_empty;
|
| 86 |
+
wire [7:0] rx_rd_data;
|
| 87 |
+
reg rx_rd_en;
|
| 88 |
+
reg rx_wr_en;
|
| 89 |
+
reg [7:0] rx_wr_data;
|
| 90 |
+
|
| 91 |
+
async_fifo #(.DATA_WIDTH(8), .ADDR_BITS(5)) u_rx_fifo (
|
| 92 |
+
.wr_clk (clk_neuro),
|
| 93 |
+
.wr_rst_n (rst_neuro_n),
|
| 94 |
+
.wr_data (rx_wr_data),
|
| 95 |
+
.wr_en (rx_wr_en),
|
| 96 |
+
.wr_full (rx_wr_full),
|
| 97 |
+
.rd_clk (clk),
|
| 98 |
+
.rd_rst_n (rst_n),
|
| 99 |
+
.rd_en (rx_rd_en),
|
| 100 |
+
.rd_data (rx_rd_data),
|
| 101 |
+
.rd_empty (rx_rd_empty)
|
| 102 |
+
);
|
| 103 |
+
|
| 104 |
+
always @(posedge clk_neuro or negedge rst_neuro_n) begin
|
| 105 |
+
if (!rst_neuro_n) begin
|
| 106 |
+
hi_rx_data <= 8'd0;
|
| 107 |
+
hi_rx_valid <= 1'b0;
|
| 108 |
+
tx_rd_en <= 1'b0;
|
| 109 |
+
end else begin
|
| 110 |
+
hi_rx_valid <= 1'b0;
|
| 111 |
+
tx_rd_en <= 1'b0;
|
| 112 |
+
if (!tx_rd_empty && !hi_rx_valid) begin
|
| 113 |
+
hi_rx_data <= tx_rd_data;
|
| 114 |
+
hi_rx_valid <= 1'b1;
|
| 115 |
+
tx_rd_en <= 1'b1;
|
| 116 |
+
end
|
| 117 |
+
end
|
| 118 |
+
end
|
| 119 |
+
|
| 120 |
+
reg [1:0] rx_holdoff;
|
| 121 |
+
reg tx_ready_prev;
|
| 122 |
+
|
| 123 |
+
wire internal_tx_ready = ~rx_wr_full & (rx_holdoff == 0);
|
| 124 |
+
wire tx_ready_rising = internal_tx_ready & ~tx_ready_prev;
|
| 125 |
+
wire do_rx_capture = hi_tx_valid & internal_tx_ready & ~tx_ready_rising;
|
| 126 |
+
|
| 127 |
+
assign hi_tx_ready = internal_tx_ready;
|
| 128 |
+
|
| 129 |
+
always @(posedge clk_neuro or negedge rst_neuro_n) begin
|
| 130 |
+
if (!rst_neuro_n) begin
|
| 131 |
+
rx_holdoff <= 2'd0;
|
| 132 |
+
tx_ready_prev <= 1'b1;
|
| 133 |
+
rx_wr_en <= 1'b0;
|
| 134 |
+
rx_wr_data <= 8'd0;
|
| 135 |
+
end else begin
|
| 136 |
+
tx_ready_prev <= internal_tx_ready;
|
| 137 |
+
rx_wr_en <= 1'b0;
|
| 138 |
+
|
| 139 |
+
if (rx_holdoff != 0)
|
| 140 |
+
rx_holdoff <= rx_holdoff - 1;
|
| 141 |
+
|
| 142 |
+
if (do_rx_capture) begin
|
| 143 |
+
rx_wr_data <= hi_tx_data;
|
| 144 |
+
rx_wr_en <= 1'b1;
|
| 145 |
+
rx_holdoff <= 2'd2;
|
| 146 |
+
end
|
| 147 |
+
end
|
| 148 |
+
end
|
| 149 |
+
|
| 150 |
+
reg [31:0] scratch_reg;
|
| 151 |
+
|
| 152 |
+
localparam S_IDLE = 2'd0;
|
| 153 |
+
localparam S_WRITE_RESP = 2'd1;
|
| 154 |
+
localparam S_READ_RESP = 2'd2;
|
| 155 |
+
|
| 156 |
+
reg [1:0] axi_state;
|
| 157 |
+
reg [2:0] wr_reg_addr;
|
| 158 |
+
reg [31:0] wr_data_reg;
|
| 159 |
+
reg [2:0] rd_reg_addr;
|
| 160 |
+
|
| 161 |
+
always @(posedge clk or negedge rst_n) begin
|
| 162 |
+
if (!rst_n) begin
|
| 163 |
+
axi_state <= S_IDLE;
|
| 164 |
+
s_axi_awready <= 1'b0;
|
| 165 |
+
s_axi_wready <= 1'b0;
|
| 166 |
+
s_axi_bvalid <= 1'b0;
|
| 167 |
+
s_axi_bresp <= 2'b00;
|
| 168 |
+
s_axi_arready <= 1'b0;
|
| 169 |
+
s_axi_rvalid <= 1'b0;
|
| 170 |
+
s_axi_rdata <= 32'd0;
|
| 171 |
+
s_axi_rresp <= 2'b00;
|
| 172 |
+
scratch_reg <= 32'd0;
|
| 173 |
+
wr_reg_addr <= 3'd0;
|
| 174 |
+
wr_data_reg <= 32'd0;
|
| 175 |
+
rd_reg_addr <= 3'd0;
|
| 176 |
+
tx_wr_en <= 1'b0;
|
| 177 |
+
tx_wr_data <= 8'd0;
|
| 178 |
+
rx_rd_en <= 1'b0;
|
| 179 |
+
end else begin
|
| 180 |
+
tx_wr_en <= 1'b0;
|
| 181 |
+
rx_rd_en <= 1'b0;
|
| 182 |
+
|
| 183 |
+
case (axi_state)
|
| 184 |
+
S_IDLE: begin
|
| 185 |
+
s_axi_bvalid <= 1'b0;
|
| 186 |
+
s_axi_rvalid <= 1'b0;
|
| 187 |
+
|
| 188 |
+
if (s_axi_awvalid && s_axi_wvalid) begin
|
| 189 |
+
s_axi_awready <= 1'b1;
|
| 190 |
+
s_axi_wready <= 1'b1;
|
| 191 |
+
wr_reg_addr <= s_axi_awaddr[4:2];
|
| 192 |
+
wr_data_reg <= s_axi_wdata;
|
| 193 |
+
axi_state <= S_WRITE_RESP;
|
| 194 |
+
end else if (s_axi_arvalid) begin
|
| 195 |
+
s_axi_arready <= 1'b1;
|
| 196 |
+
rd_reg_addr <= s_axi_araddr[4:2];
|
| 197 |
+
axi_state <= S_READ_RESP;
|
| 198 |
+
end
|
| 199 |
+
end
|
| 200 |
+
|
| 201 |
+
S_WRITE_RESP: begin
|
| 202 |
+
s_axi_awready <= 1'b0;
|
| 203 |
+
s_axi_wready <= 1'b0;
|
| 204 |
+
|
| 205 |
+
if (!s_axi_bvalid) begin
|
| 206 |
+
case (wr_reg_addr)
|
| 207 |
+
REG_TX_DATA: begin
|
| 208 |
+
if (!tx_wr_full) begin
|
| 209 |
+
tx_wr_data <= wr_data_reg[7:0];
|
| 210 |
+
tx_wr_en <= 1'b1;
|
| 211 |
+
end
|
| 212 |
+
end
|
| 213 |
+
REG_SCRATCH: scratch_reg <= wr_data_reg;
|
| 214 |
+
default: ;
|
| 215 |
+
endcase
|
| 216 |
+
s_axi_bvalid <= 1'b1;
|
| 217 |
+
s_axi_bresp <= 2'b00;
|
| 218 |
+
end
|
| 219 |
+
|
| 220 |
+
if (s_axi_bvalid && s_axi_bready)
|
| 221 |
+
axi_state <= S_IDLE;
|
| 222 |
+
end
|
| 223 |
+
|
| 224 |
+
S_READ_RESP: begin
|
| 225 |
+
s_axi_arready <= 1'b0;
|
| 226 |
+
|
| 227 |
+
if (!s_axi_rvalid) begin
|
| 228 |
+
case (rd_reg_addr)
|
| 229 |
+
REG_TX_DATA: s_axi_rdata <= 32'd0;
|
| 230 |
+
REG_TX_STATUS: s_axi_rdata <= {31'd0, ~tx_wr_full};
|
| 231 |
+
REG_RX_DATA: begin
|
| 232 |
+
if (!rx_rd_empty) begin
|
| 233 |
+
s_axi_rdata <= {24'd0, rx_rd_data};
|
| 234 |
+
rx_rd_en <= 1'b1;
|
| 235 |
+
end else begin
|
| 236 |
+
s_axi_rdata <= 32'd0;
|
| 237 |
+
end
|
| 238 |
+
end
|
| 239 |
+
REG_RX_STATUS: s_axi_rdata <= {31'd0, ~rx_rd_empty};
|
| 240 |
+
REG_CONTROL: s_axi_rdata <= 32'd0;
|
| 241 |
+
REG_VERSION: s_axi_rdata <= VERSION_ID;
|
| 242 |
+
REG_SCRATCH: s_axi_rdata <= scratch_reg;
|
| 243 |
+
REG_CORE_COUNT: s_axi_rdata <= NUM_CORES;
|
| 244 |
+
endcase
|
| 245 |
+
s_axi_rvalid <= 1'b1;
|
| 246 |
+
s_axi_rresp <= 2'b00;
|
| 247 |
+
end
|
| 248 |
+
|
| 249 |
+
if (s_axi_rvalid && s_axi_rready)
|
| 250 |
+
axi_state <= S_IDLE;
|
| 251 |
+
end
|
| 252 |
+
|
| 253 |
+
default: axi_state <= S_IDLE;
|
| 254 |
+
endcase
|
| 255 |
+
end
|
| 256 |
+
end
|
| 257 |
+
|
| 258 |
+
endmodule
|
rtl/chip_link.v
ADDED
|
@@ -0,0 +1,199 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ============================================================================
|
| 2 |
+
// Chip Link
|
| 3 |
+
// ============================================================================
|
| 4 |
+
//
|
| 5 |
+
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 6 |
+
// Company No. 17054540 — UK Patent Application No. 2602902.6
|
| 7 |
+
//
|
| 8 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
| 9 |
+
// you may not use this file except in compliance with the License.
|
| 10 |
+
// You may obtain a copy of the License at
|
| 11 |
+
//
|
| 12 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
| 13 |
+
//
|
| 14 |
+
// Unless required by applicable law or agreed to in writing, software
|
| 15 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
| 16 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 17 |
+
// See the License for the specific language governing permissions and
|
| 18 |
+
// limitations under the License.
|
| 19 |
+
// ============================================================================
|
| 20 |
+
|
| 21 |
+
module chip_link #(
|
| 22 |
+
parameter CORE_ID_BITS = 7,
|
| 23 |
+
parameter NEURON_BITS = 10,
|
| 24 |
+
parameter DATA_WIDTH = 16,
|
| 25 |
+
parameter TX_DEPTH = 256,
|
| 26 |
+
parameter RX_DEPTH = 256
|
| 27 |
+
)(
|
| 28 |
+
input wire clk,
|
| 29 |
+
input wire rst_n,
|
| 30 |
+
|
| 31 |
+
input wire tx_push,
|
| 32 |
+
input wire [CORE_ID_BITS-1:0] tx_core,
|
| 33 |
+
input wire [NEURON_BITS-1:0] tx_neuron,
|
| 34 |
+
input wire [7:0] tx_payload,
|
| 35 |
+
output wire tx_full,
|
| 36 |
+
|
| 37 |
+
output wire [CORE_ID_BITS-1:0] rx_core,
|
| 38 |
+
output wire [NEURON_BITS-1:0] rx_neuron,
|
| 39 |
+
output wire signed [DATA_WIDTH-1:0] rx_current,
|
| 40 |
+
input wire rx_pop,
|
| 41 |
+
output wire rx_empty,
|
| 42 |
+
|
| 43 |
+
output reg [7:0] link_tx_data,
|
| 44 |
+
output reg link_tx_valid,
|
| 45 |
+
input wire link_tx_ready,
|
| 46 |
+
|
| 47 |
+
input wire [7:0] link_rx_data,
|
| 48 |
+
input wire link_rx_valid,
|
| 49 |
+
output wire link_rx_ready
|
| 50 |
+
);
|
| 51 |
+
|
| 52 |
+
localparam TX_PKT_W = CORE_ID_BITS + NEURON_BITS + 8;
|
| 53 |
+
|
| 54 |
+
reg [TX_PKT_W-1:0] tx_fifo [0:TX_DEPTH-1];
|
| 55 |
+
reg [8:0] tx_wr_ptr, tx_rd_ptr;
|
| 56 |
+
wire [8:0] tx_count = tx_wr_ptr - tx_rd_ptr;
|
| 57 |
+
wire tx_empty_i = (tx_wr_ptr == tx_rd_ptr);
|
| 58 |
+
assign tx_full = (tx_count >= TX_DEPTH);
|
| 59 |
+
|
| 60 |
+
always @(posedge clk or negedge rst_n) begin
|
| 61 |
+
if (!rst_n)
|
| 62 |
+
tx_wr_ptr <= 0;
|
| 63 |
+
else if (tx_push && !tx_full) begin
|
| 64 |
+
tx_fifo[tx_wr_ptr[7:0]] <= {tx_core, tx_neuron, tx_payload};
|
| 65 |
+
tx_wr_ptr <= tx_wr_ptr + 1;
|
| 66 |
+
end
|
| 67 |
+
end
|
| 68 |
+
|
| 69 |
+
localparam TX_IDLE = 2'd0, TX_BYTE1 = 2'd1, TX_BYTE2 = 2'd2, TX_BYTE3 = 2'd3;
|
| 70 |
+
reg [1:0] tx_state;
|
| 71 |
+
reg [TX_PKT_W-1:0] tx_pkt;
|
| 72 |
+
|
| 73 |
+
always @(posedge clk or negedge rst_n) begin
|
| 74 |
+
if (!rst_n) begin
|
| 75 |
+
tx_state <= TX_IDLE;
|
| 76 |
+
tx_rd_ptr <= 0;
|
| 77 |
+
link_tx_valid <= 0;
|
| 78 |
+
link_tx_data <= 0;
|
| 79 |
+
end else begin
|
| 80 |
+
link_tx_valid <= 0;
|
| 81 |
+
|
| 82 |
+
case (tx_state)
|
| 83 |
+
TX_IDLE: begin
|
| 84 |
+
if (!tx_empty_i && link_tx_ready) begin
|
| 85 |
+
tx_pkt <= tx_fifo[tx_rd_ptr[7:0]];
|
| 86 |
+
tx_rd_ptr <= tx_rd_ptr + 1;
|
| 87 |
+
link_tx_data <= 8'h80 | tx_fifo[tx_rd_ptr[7:0]][TX_PKT_W-1 -: CORE_ID_BITS];
|
| 88 |
+
link_tx_valid <= 1;
|
| 89 |
+
tx_state <= TX_BYTE1;
|
| 90 |
+
end
|
| 91 |
+
end
|
| 92 |
+
|
| 93 |
+
TX_BYTE1: begin
|
| 94 |
+
if (link_tx_ready) begin
|
| 95 |
+
link_tx_data <= tx_pkt[NEURON_BITS+7:10];
|
| 96 |
+
link_tx_valid <= 1;
|
| 97 |
+
tx_state <= TX_BYTE2;
|
| 98 |
+
end
|
| 99 |
+
end
|
| 100 |
+
|
| 101 |
+
TX_BYTE2: begin
|
| 102 |
+
if (link_tx_ready) begin
|
| 103 |
+
link_tx_data <= {tx_pkt[9:8], tx_pkt[7:2]};
|
| 104 |
+
link_tx_valid <= 1;
|
| 105 |
+
tx_state <= TX_BYTE3;
|
| 106 |
+
end
|
| 107 |
+
end
|
| 108 |
+
|
| 109 |
+
TX_BYTE3: begin
|
| 110 |
+
if (link_tx_ready) begin
|
| 111 |
+
link_tx_data <= {tx_pkt[1:0], 6'd0};
|
| 112 |
+
link_tx_valid <= 1;
|
| 113 |
+
tx_state <= TX_IDLE;
|
| 114 |
+
end
|
| 115 |
+
end
|
| 116 |
+
endcase
|
| 117 |
+
end
|
| 118 |
+
end
|
| 119 |
+
|
| 120 |
+
localparam RX_PKT_W = CORE_ID_BITS + NEURON_BITS + DATA_WIDTH;
|
| 121 |
+
|
| 122 |
+
localparam RX_IDLE = 2'd0, RX_BYTE1 = 2'd1, RX_BYTE2 = 2'd2, RX_BYTE3 = 2'd3;
|
| 123 |
+
reg [1:0] rx_state;
|
| 124 |
+
reg [CORE_ID_BITS-1:0] rx_pkt_core;
|
| 125 |
+
reg [NEURON_BITS-1:0] rx_pkt_neuron;
|
| 126 |
+
reg [7:0] rx_pkt_payload;
|
| 127 |
+
reg rx_push;
|
| 128 |
+
|
| 129 |
+
assign link_rx_ready = (rx_count < RX_DEPTH - 4);
|
| 130 |
+
|
| 131 |
+
always @(posedge clk or negedge rst_n) begin
|
| 132 |
+
if (!rst_n) begin
|
| 133 |
+
rx_state <= RX_IDLE;
|
| 134 |
+
rx_push <= 0;
|
| 135 |
+
end else begin
|
| 136 |
+
rx_push <= 0;
|
| 137 |
+
|
| 138 |
+
case (rx_state)
|
| 139 |
+
RX_IDLE: begin
|
| 140 |
+
if (link_rx_valid && link_rx_data[7]) begin
|
| 141 |
+
rx_pkt_core <= link_rx_data[CORE_ID_BITS-1:0];
|
| 142 |
+
rx_state <= RX_BYTE1;
|
| 143 |
+
end
|
| 144 |
+
end
|
| 145 |
+
|
| 146 |
+
RX_BYTE1: begin
|
| 147 |
+
if (link_rx_valid) begin
|
| 148 |
+
rx_pkt_neuron[NEURON_BITS-1:2] <= link_rx_data;
|
| 149 |
+
rx_state <= RX_BYTE2;
|
| 150 |
+
end
|
| 151 |
+
end
|
| 152 |
+
|
| 153 |
+
RX_BYTE2: begin
|
| 154 |
+
if (link_rx_valid) begin
|
| 155 |
+
rx_pkt_neuron[1:0] <= link_rx_data[7:6];
|
| 156 |
+
rx_pkt_payload[7:2] <= link_rx_data[5:0];
|
| 157 |
+
rx_state <= RX_BYTE3;
|
| 158 |
+
end
|
| 159 |
+
end
|
| 160 |
+
|
| 161 |
+
RX_BYTE3: begin
|
| 162 |
+
if (link_rx_valid) begin
|
| 163 |
+
rx_pkt_payload[1:0] <= link_rx_data[7:6];
|
| 164 |
+
rx_push <= 1;
|
| 165 |
+
rx_state <= RX_IDLE;
|
| 166 |
+
end
|
| 167 |
+
end
|
| 168 |
+
endcase
|
| 169 |
+
end
|
| 170 |
+
end
|
| 171 |
+
|
| 172 |
+
reg [RX_PKT_W-1:0] rx_fifo [0:RX_DEPTH-1];
|
| 173 |
+
reg [8:0] rx_wr_ptr, rx_rd_ptr;
|
| 174 |
+
wire [8:0] rx_count = rx_wr_ptr - rx_rd_ptr;
|
| 175 |
+
assign rx_empty = (rx_wr_ptr == rx_rd_ptr);
|
| 176 |
+
|
| 177 |
+
always @(posedge clk or negedge rst_n) begin
|
| 178 |
+
if (!rst_n)
|
| 179 |
+
rx_wr_ptr <= 0;
|
| 180 |
+
else if (rx_push && rx_count < RX_DEPTH) begin
|
| 181 |
+
rx_fifo[rx_wr_ptr[7:0]] <= {rx_pkt_core, rx_pkt_neuron,
|
| 182 |
+
{{(DATA_WIDTH-8){1'b0}}, rx_pkt_payload}};
|
| 183 |
+
rx_wr_ptr <= rx_wr_ptr + 1;
|
| 184 |
+
end
|
| 185 |
+
end
|
| 186 |
+
|
| 187 |
+
always @(posedge clk or negedge rst_n) begin
|
| 188 |
+
if (!rst_n)
|
| 189 |
+
rx_rd_ptr <= 0;
|
| 190 |
+
else if (rx_pop && !rx_empty)
|
| 191 |
+
rx_rd_ptr <= rx_rd_ptr + 1;
|
| 192 |
+
end
|
| 193 |
+
|
| 194 |
+
wire [RX_PKT_W-1:0] rx_top = rx_fifo[rx_rd_ptr[7:0]];
|
| 195 |
+
assign rx_core = rx_top[RX_PKT_W-1 -: CORE_ID_BITS];
|
| 196 |
+
assign rx_neuron = rx_top[DATA_WIDTH +: NEURON_BITS];
|
| 197 |
+
assign rx_current = rx_top[DATA_WIDTH-1:0];
|
| 198 |
+
|
| 199 |
+
endmodule
|
rtl/host_interface.v
ADDED
|
@@ -0,0 +1,550 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ============================================================================
|
| 2 |
+
// Host Interface
|
| 3 |
+
// ============================================================================
|
| 4 |
+
//
|
| 5 |
+
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 6 |
+
// Company No. 17054540 — UK Patent Application No. 2602902.6
|
| 7 |
+
//
|
| 8 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
| 9 |
+
// you may not use this file except in compliance with the License.
|
| 10 |
+
// You may obtain a copy of the License at
|
| 11 |
+
//
|
| 12 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
| 13 |
+
//
|
| 14 |
+
// Unless required by applicable law or agreed to in writing, software
|
| 15 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
| 16 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 17 |
+
// See the License for the specific language governing permissions and
|
| 18 |
+
// limitations under the License.
|
| 19 |
+
// ============================================================================
|
| 20 |
+
|
| 21 |
+
module host_interface #(
|
| 22 |
+
parameter NUM_CORES = 4,
|
| 23 |
+
parameter CORE_ID_BITS = 2,
|
| 24 |
+
parameter NUM_NEURONS = 1024,
|
| 25 |
+
parameter NEURON_BITS = 10,
|
| 26 |
+
parameter DATA_WIDTH = 16,
|
| 27 |
+
parameter POOL_ADDR_BITS = 15,
|
| 28 |
+
parameter COUNT_BITS = 12,
|
| 29 |
+
parameter ROUTE_SLOT_BITS = 3,
|
| 30 |
+
parameter GLOBAL_ROUTE_SLOT_BITS = 2
|
| 31 |
+
)(
|
| 32 |
+
input wire clk,
|
| 33 |
+
input wire rst_n,
|
| 34 |
+
|
| 35 |
+
input wire [7:0] rx_data,
|
| 36 |
+
input wire rx_valid,
|
| 37 |
+
output reg [7:0] tx_data,
|
| 38 |
+
output reg tx_valid,
|
| 39 |
+
input wire tx_ready,
|
| 40 |
+
|
| 41 |
+
output reg mesh_start,
|
| 42 |
+
|
| 43 |
+
output reg mesh_prog_pool_we,
|
| 44 |
+
output reg [CORE_ID_BITS-1:0] mesh_prog_pool_core,
|
| 45 |
+
output reg [POOL_ADDR_BITS-1:0] mesh_prog_pool_addr,
|
| 46 |
+
output reg [NEURON_BITS-1:0] mesh_prog_pool_src,
|
| 47 |
+
output reg [NEURON_BITS-1:0] mesh_prog_pool_target,
|
| 48 |
+
output reg signed [DATA_WIDTH-1:0] mesh_prog_pool_weight,
|
| 49 |
+
output reg [1:0] mesh_prog_pool_comp,
|
| 50 |
+
|
| 51 |
+
output reg mesh_prog_index_we,
|
| 52 |
+
output reg [CORE_ID_BITS-1:0] mesh_prog_index_core,
|
| 53 |
+
output reg [NEURON_BITS-1:0] mesh_prog_index_neuron,
|
| 54 |
+
output reg [POOL_ADDR_BITS-1:0] mesh_prog_index_base,
|
| 55 |
+
output reg [COUNT_BITS-1:0] mesh_prog_index_count,
|
| 56 |
+
output reg [1:0] mesh_prog_index_format,
|
| 57 |
+
|
| 58 |
+
output reg mesh_prog_route_we,
|
| 59 |
+
output reg [CORE_ID_BITS-1:0] mesh_prog_route_src_core,
|
| 60 |
+
output reg [NEURON_BITS-1:0] mesh_prog_route_src_neuron,
|
| 61 |
+
output reg [ROUTE_SLOT_BITS-1:0] mesh_prog_route_slot,
|
| 62 |
+
output reg [CORE_ID_BITS-1:0] mesh_prog_route_dest_core,
|
| 63 |
+
output reg [NEURON_BITS-1:0] mesh_prog_route_dest_neuron,
|
| 64 |
+
output reg signed [DATA_WIDTH-1:0] mesh_prog_route_weight,
|
| 65 |
+
|
| 66 |
+
output reg mesh_prog_global_route_we,
|
| 67 |
+
output reg [CORE_ID_BITS-1:0] mesh_prog_global_route_src_core,
|
| 68 |
+
output reg [NEURON_BITS-1:0] mesh_prog_global_route_src_neuron,
|
| 69 |
+
output reg [GLOBAL_ROUTE_SLOT_BITS-1:0] mesh_prog_global_route_slot,
|
| 70 |
+
output reg [CORE_ID_BITS-1:0] mesh_prog_global_route_dest_core,
|
| 71 |
+
output reg [NEURON_BITS-1:0] mesh_prog_global_route_dest_neuron,
|
| 72 |
+
output reg signed [DATA_WIDTH-1:0] mesh_prog_global_route_weight,
|
| 73 |
+
|
| 74 |
+
output reg mesh_ext_valid,
|
| 75 |
+
output reg [CORE_ID_BITS-1:0] mesh_ext_core,
|
| 76 |
+
output reg [NEURON_BITS-1:0] mesh_ext_neuron_id,
|
| 77 |
+
output reg signed [DATA_WIDTH-1:0] mesh_ext_current,
|
| 78 |
+
|
| 79 |
+
output reg mesh_learn_enable,
|
| 80 |
+
output reg mesh_graded_enable,
|
| 81 |
+
output reg mesh_dendritic_enable,
|
| 82 |
+
output reg mesh_async_enable,
|
| 83 |
+
output reg mesh_threefactor_enable,
|
| 84 |
+
output reg signed [DATA_WIDTH-1:0] mesh_reward_value,
|
| 85 |
+
output reg mesh_noise_enable,
|
| 86 |
+
output reg mesh_skip_idle_enable,
|
| 87 |
+
output reg mesh_scale_u_enable,
|
| 88 |
+
|
| 89 |
+
output reg mesh_prog_delay_we,
|
| 90 |
+
output reg [CORE_ID_BITS-1:0] mesh_prog_delay_core,
|
| 91 |
+
output reg [POOL_ADDR_BITS-1:0] mesh_prog_delay_addr,
|
| 92 |
+
output reg [5:0] mesh_prog_delay_value,
|
| 93 |
+
|
| 94 |
+
output reg mesh_prog_ucode_we,
|
| 95 |
+
output reg [CORE_ID_BITS-1:0] mesh_prog_ucode_core,
|
| 96 |
+
output reg [7:0] mesh_prog_ucode_addr,
|
| 97 |
+
output reg [31:0] mesh_prog_ucode_data,
|
| 98 |
+
|
| 99 |
+
output reg mesh_prog_param_we,
|
| 100 |
+
output reg [CORE_ID_BITS-1:0] mesh_prog_param_core,
|
| 101 |
+
output reg [NEURON_BITS-1:0] mesh_prog_param_neuron,
|
| 102 |
+
output reg [4:0] mesh_prog_param_id,
|
| 103 |
+
output reg signed [DATA_WIDTH-1:0] mesh_prog_param_value,
|
| 104 |
+
|
| 105 |
+
output reg mesh_probe_read,
|
| 106 |
+
output reg [CORE_ID_BITS-1:0] mesh_probe_core,
|
| 107 |
+
output reg [NEURON_BITS-1:0] mesh_probe_neuron,
|
| 108 |
+
output reg [4:0] mesh_probe_state_id,
|
| 109 |
+
output reg [POOL_ADDR_BITS-1:0] mesh_probe_pool_addr,
|
| 110 |
+
input wire signed [DATA_WIDTH-1:0] mesh_probe_data,
|
| 111 |
+
input wire mesh_probe_valid,
|
| 112 |
+
|
| 113 |
+
output reg [7:0] mesh_dvfs_stall,
|
| 114 |
+
|
| 115 |
+
input wire mesh_timestep_done,
|
| 116 |
+
input wire [5:0] mesh_state,
|
| 117 |
+
input wire [31:0] mesh_total_spikes,
|
| 118 |
+
input wire [31:0] mesh_timestep_count
|
| 119 |
+
);
|
| 120 |
+
|
| 121 |
+
localparam CMD_PROG_POOL = 8'h01;
|
| 122 |
+
localparam CMD_PROG_ROUTE = 8'h02;
|
| 123 |
+
localparam CMD_STIMULUS = 8'h03;
|
| 124 |
+
localparam CMD_RUN = 8'h04;
|
| 125 |
+
localparam CMD_STATUS = 8'h05;
|
| 126 |
+
localparam CMD_LEARN_CFG = 8'h06;
|
| 127 |
+
localparam CMD_PROG_NEURON = 8'h07;
|
| 128 |
+
localparam CMD_PROG_INDEX = 8'h08;
|
| 129 |
+
localparam CMD_REWARD = 8'h09;
|
| 130 |
+
localparam CMD_PROG_DELAY = 8'h0A;
|
| 131 |
+
localparam CMD_PROG_FORMAT = 8'h0B;
|
| 132 |
+
localparam CMD_PROG_LEARN = 8'h0C;
|
| 133 |
+
localparam CMD_NOISE_SEED = 8'h0D;
|
| 134 |
+
localparam CMD_READ_WEIGHT = 8'h0E;
|
| 135 |
+
localparam CMD_PROG_DEND_TREE = 8'h0F;
|
| 136 |
+
localparam CMD_PROG_GLOBAL_ROUTE = 8'h10;
|
| 137 |
+
localparam CMD_DVFS_CFG = 8'h1C;
|
| 138 |
+
localparam CMD_RESET_PERF = 8'h1D;
|
| 139 |
+
|
| 140 |
+
localparam RESP_ACK = 8'hAA;
|
| 141 |
+
localparam RESP_DONE = 8'hDD;
|
| 142 |
+
|
| 143 |
+
localparam HI_IDLE = 6'd0;
|
| 144 |
+
localparam HI_RECV = 6'd1;
|
| 145 |
+
localparam HI_EXEC_POOL = 6'd2;
|
| 146 |
+
localparam HI_EXEC_ROUTE = 6'd3;
|
| 147 |
+
localparam HI_EXEC_STIM = 6'd4;
|
| 148 |
+
localparam HI_SEND_ACK = 6'd5;
|
| 149 |
+
localparam HI_RUN_START = 6'd6;
|
| 150 |
+
localparam HI_RUN_WAIT = 6'd7;
|
| 151 |
+
localparam HI_RUN_LOOP = 6'd8;
|
| 152 |
+
localparam HI_SEND_RESP = 6'd9;
|
| 153 |
+
localparam HI_EXEC_STATUS = 6'd10;
|
| 154 |
+
localparam HI_SEND_WAIT = 6'd11;
|
| 155 |
+
localparam HI_EXEC_LEARN = 6'd12;
|
| 156 |
+
localparam HI_EXEC_PARAM = 6'd13;
|
| 157 |
+
localparam HI_EXEC_INDEX = 6'd14;
|
| 158 |
+
localparam HI_EXEC_REWARD = 6'd15;
|
| 159 |
+
localparam HI_EXEC_DELAY = 6'd16;
|
| 160 |
+
localparam HI_EXEC_FORMAT = 6'd17;
|
| 161 |
+
localparam HI_EXEC_LEARN_MC = 6'd18;
|
| 162 |
+
localparam HI_EXEC_SEED = 6'd19;
|
| 163 |
+
localparam HI_EXEC_READ_WT = 6'd20;
|
| 164 |
+
localparam HI_EXEC_GLOBAL_ROUTE = 6'd21;
|
| 165 |
+
localparam HI_PROBE_WAIT = 6'd22;
|
| 166 |
+
localparam HI_PROBE_RESP = 6'd23;
|
| 167 |
+
localparam HI_EXEC_DEND_TREE = 6'd24;
|
| 168 |
+
localparam HI_EXEC_DVFS = 6'd25;
|
| 169 |
+
localparam HI_EXEC_RESET_PERF = 6'd26;
|
| 170 |
+
|
| 171 |
+
reg [5:0] state;
|
| 172 |
+
reg [7:0] cmd;
|
| 173 |
+
reg [4:0] byte_cnt;
|
| 174 |
+
reg [4:0] payload_len;
|
| 175 |
+
reg [7:0] payload [0:15];
|
| 176 |
+
|
| 177 |
+
reg [15:0] run_remaining;
|
| 178 |
+
reg [31:0] run_spike_base;
|
| 179 |
+
|
| 180 |
+
reg [7:0] resp_buf [0:4];
|
| 181 |
+
reg [2:0] resp_len;
|
| 182 |
+
reg [2:0] resp_idx;
|
| 183 |
+
|
| 184 |
+
function [4:0] cmd_payload_len;
|
| 185 |
+
input [7:0] opcode;
|
| 186 |
+
case (opcode)
|
| 187 |
+
CMD_PROG_POOL: cmd_payload_len = 5'd8;
|
| 188 |
+
CMD_PROG_ROUTE: cmd_payload_len = 5'd9;
|
| 189 |
+
CMD_STIMULUS: cmd_payload_len = 5'd5;
|
| 190 |
+
CMD_RUN: cmd_payload_len = 5'd2;
|
| 191 |
+
CMD_STATUS: cmd_payload_len = 5'd0;
|
| 192 |
+
CMD_LEARN_CFG: cmd_payload_len = 5'd1;
|
| 193 |
+
CMD_PROG_NEURON: cmd_payload_len = 5'd6;
|
| 194 |
+
CMD_PROG_INDEX: cmd_payload_len = 5'd7;
|
| 195 |
+
CMD_REWARD: cmd_payload_len = 5'd2;
|
| 196 |
+
CMD_PROG_DELAY: cmd_payload_len = 5'd4;
|
| 197 |
+
CMD_PROG_FORMAT: cmd_payload_len = 5'd4;
|
| 198 |
+
CMD_PROG_LEARN: cmd_payload_len = 5'd6;
|
| 199 |
+
CMD_NOISE_SEED: cmd_payload_len = 5'd3;
|
| 200 |
+
CMD_READ_WEIGHT: cmd_payload_len = 5'd4;
|
| 201 |
+
CMD_PROG_DEND_TREE: cmd_payload_len = 5'd4;
|
| 202 |
+
CMD_PROG_GLOBAL_ROUTE: cmd_payload_len = 5'd9;
|
| 203 |
+
CMD_DVFS_CFG: cmd_payload_len = 5'd1;
|
| 204 |
+
CMD_RESET_PERF: cmd_payload_len = 5'd1;
|
| 205 |
+
default: cmd_payload_len = 5'd0;
|
| 206 |
+
endcase
|
| 207 |
+
endfunction
|
| 208 |
+
|
| 209 |
+
always @(posedge clk or negedge rst_n) begin
|
| 210 |
+
if (!rst_n) begin
|
| 211 |
+
state <= HI_IDLE;
|
| 212 |
+
cmd <= 0;
|
| 213 |
+
byte_cnt <= 0;
|
| 214 |
+
payload_len <= 0;
|
| 215 |
+
tx_data <= 0;
|
| 216 |
+
tx_valid <= 0;
|
| 217 |
+
mesh_start <= 0;
|
| 218 |
+
mesh_prog_pool_we <= 0;
|
| 219 |
+
mesh_prog_pool_core <= 0;
|
| 220 |
+
mesh_prog_pool_addr <= 0;
|
| 221 |
+
mesh_prog_pool_src <= 0;
|
| 222 |
+
mesh_prog_pool_target <= 0;
|
| 223 |
+
mesh_prog_pool_weight <= 0;
|
| 224 |
+
mesh_prog_pool_comp <= 0;
|
| 225 |
+
mesh_prog_index_we <= 0;
|
| 226 |
+
mesh_prog_index_core <= 0;
|
| 227 |
+
mesh_prog_index_neuron <= 0;
|
| 228 |
+
mesh_prog_index_base <= 0;
|
| 229 |
+
mesh_prog_index_count <= 0;
|
| 230 |
+
mesh_prog_index_format <= 0;
|
| 231 |
+
mesh_prog_route_we <= 0;
|
| 232 |
+
mesh_prog_route_src_core <= 0;
|
| 233 |
+
mesh_prog_route_src_neuron <= 0;
|
| 234 |
+
mesh_prog_route_slot <= 0;
|
| 235 |
+
mesh_prog_route_dest_core <= 0;
|
| 236 |
+
mesh_prog_route_dest_neuron<= 0;
|
| 237 |
+
mesh_prog_route_weight <= 0;
|
| 238 |
+
mesh_prog_global_route_we <= 0;
|
| 239 |
+
mesh_prog_global_route_src_core <= 0;
|
| 240 |
+
mesh_prog_global_route_src_neuron <= 0;
|
| 241 |
+
mesh_prog_global_route_slot <= 0;
|
| 242 |
+
mesh_prog_global_route_dest_core <= 0;
|
| 243 |
+
mesh_prog_global_route_dest_neuron <= 0;
|
| 244 |
+
mesh_prog_global_route_weight <= 0;
|
| 245 |
+
mesh_ext_valid <= 0;
|
| 246 |
+
mesh_ext_core <= 0;
|
| 247 |
+
mesh_ext_neuron_id <= 0;
|
| 248 |
+
mesh_ext_current <= 0;
|
| 249 |
+
mesh_learn_enable <= 0;
|
| 250 |
+
mesh_graded_enable <= 0;
|
| 251 |
+
mesh_dendritic_enable <= 0;
|
| 252 |
+
mesh_async_enable <= 0;
|
| 253 |
+
mesh_threefactor_enable <= 0;
|
| 254 |
+
mesh_noise_enable <= 0;
|
| 255 |
+
mesh_skip_idle_enable <= 0;
|
| 256 |
+
mesh_scale_u_enable <= 0;
|
| 257 |
+
mesh_reward_value <= 0;
|
| 258 |
+
mesh_prog_delay_we <= 0;
|
| 259 |
+
mesh_prog_delay_core <= 0;
|
| 260 |
+
mesh_prog_delay_addr <= 0;
|
| 261 |
+
mesh_prog_delay_value <= 0;
|
| 262 |
+
mesh_prog_ucode_we <= 0;
|
| 263 |
+
mesh_prog_ucode_core <= 0;
|
| 264 |
+
mesh_prog_ucode_addr <= 0;
|
| 265 |
+
mesh_prog_ucode_data <= 0;
|
| 266 |
+
mesh_prog_param_we <= 0;
|
| 267 |
+
mesh_prog_param_core <= 0;
|
| 268 |
+
mesh_prog_param_neuron <= 0;
|
| 269 |
+
mesh_prog_param_id <= 0;
|
| 270 |
+
mesh_prog_param_value <= 0;
|
| 271 |
+
mesh_probe_read <= 0;
|
| 272 |
+
mesh_probe_core <= 0;
|
| 273 |
+
mesh_probe_neuron <= 0;
|
| 274 |
+
mesh_probe_state_id <= 0;
|
| 275 |
+
mesh_probe_pool_addr <= 0;
|
| 276 |
+
mesh_dvfs_stall <= 0;
|
| 277 |
+
run_remaining <= 0;
|
| 278 |
+
run_spike_base <= 0;
|
| 279 |
+
resp_len <= 0;
|
| 280 |
+
resp_idx <= 0;
|
| 281 |
+
end else begin
|
| 282 |
+
mesh_prog_pool_we <= 0;
|
| 283 |
+
mesh_prog_index_we <= 0;
|
| 284 |
+
mesh_prog_route_we <= 0;
|
| 285 |
+
mesh_prog_global_route_we <= 0;
|
| 286 |
+
mesh_prog_delay_we <= 0;
|
| 287 |
+
mesh_prog_ucode_we <= 0;
|
| 288 |
+
mesh_prog_param_we <= 0;
|
| 289 |
+
mesh_probe_read <= 0;
|
| 290 |
+
mesh_ext_valid <= 0;
|
| 291 |
+
mesh_start <= 0;
|
| 292 |
+
tx_valid <= 0;
|
| 293 |
+
|
| 294 |
+
case (state)
|
| 295 |
+
|
| 296 |
+
HI_IDLE: begin
|
| 297 |
+
if (rx_valid) begin
|
| 298 |
+
cmd <= rx_data;
|
| 299 |
+
payload_len <= cmd_payload_len(rx_data);
|
| 300 |
+
byte_cnt <= 0;
|
| 301 |
+
if (cmd_payload_len(rx_data) == 0) begin
|
| 302 |
+
case (rx_data)
|
| 303 |
+
CMD_STATUS: state <= HI_EXEC_STATUS;
|
| 304 |
+
default: state <= HI_IDLE;
|
| 305 |
+
endcase
|
| 306 |
+
end else begin
|
| 307 |
+
state <= HI_RECV;
|
| 308 |
+
end
|
| 309 |
+
end
|
| 310 |
+
end
|
| 311 |
+
|
| 312 |
+
HI_RECV: begin
|
| 313 |
+
if (rx_valid) begin
|
| 314 |
+
payload[byte_cnt] <= rx_data;
|
| 315 |
+
if (byte_cnt == payload_len - 1) begin
|
| 316 |
+
case (cmd)
|
| 317 |
+
CMD_PROG_POOL: state <= HI_EXEC_POOL;
|
| 318 |
+
CMD_PROG_ROUTE: state <= HI_EXEC_ROUTE;
|
| 319 |
+
CMD_STIMULUS: state <= HI_EXEC_STIM;
|
| 320 |
+
CMD_RUN: state <= HI_RUN_START;
|
| 321 |
+
CMD_LEARN_CFG: state <= HI_EXEC_LEARN;
|
| 322 |
+
CMD_PROG_NEURON: state <= HI_EXEC_PARAM;
|
| 323 |
+
CMD_PROG_INDEX: state <= HI_EXEC_INDEX;
|
| 324 |
+
CMD_REWARD: state <= HI_EXEC_REWARD;
|
| 325 |
+
CMD_PROG_DELAY: state <= HI_EXEC_DELAY;
|
| 326 |
+
CMD_PROG_FORMAT: state <= HI_EXEC_FORMAT;
|
| 327 |
+
CMD_PROG_LEARN: state <= HI_EXEC_LEARN_MC;
|
| 328 |
+
CMD_NOISE_SEED: state <= HI_EXEC_SEED;
|
| 329 |
+
CMD_READ_WEIGHT: state <= HI_EXEC_READ_WT;
|
| 330 |
+
CMD_PROG_DEND_TREE: state <= HI_EXEC_DEND_TREE;
|
| 331 |
+
CMD_PROG_GLOBAL_ROUTE: state <= HI_EXEC_GLOBAL_ROUTE;
|
| 332 |
+
CMD_DVFS_CFG: state <= HI_EXEC_DVFS;
|
| 333 |
+
CMD_RESET_PERF: state <= HI_EXEC_RESET_PERF;
|
| 334 |
+
default: state <= HI_IDLE;
|
| 335 |
+
endcase
|
| 336 |
+
end else begin
|
| 337 |
+
byte_cnt <= byte_cnt + 1;
|
| 338 |
+
end
|
| 339 |
+
end
|
| 340 |
+
end
|
| 341 |
+
|
| 342 |
+
HI_EXEC_POOL: begin
|
| 343 |
+
mesh_prog_pool_we <= 1;
|
| 344 |
+
mesh_prog_pool_core <= payload[0][CORE_ID_BITS-1:0];
|
| 345 |
+
mesh_prog_pool_addr <= {payload[1], payload[2]};
|
| 346 |
+
mesh_prog_pool_comp <= payload[3][7:6];
|
| 347 |
+
mesh_prog_pool_src <= {payload[3][5:4], payload[4]};
|
| 348 |
+
mesh_prog_pool_target <= {payload[3][3:2], payload[5]};
|
| 349 |
+
mesh_prog_pool_weight <= {payload[6], payload[7]};
|
| 350 |
+
state <= HI_SEND_ACK;
|
| 351 |
+
end
|
| 352 |
+
|
| 353 |
+
HI_EXEC_INDEX: begin
|
| 354 |
+
mesh_prog_index_we <= 1;
|
| 355 |
+
mesh_prog_index_core <= payload[0][CORE_ID_BITS-1:0];
|
| 356 |
+
mesh_prog_index_neuron <= {payload[1], payload[2]};
|
| 357 |
+
mesh_prog_index_base <= {payload[3], payload[4]};
|
| 358 |
+
mesh_prog_index_count <= {payload[5], payload[6]};
|
| 359 |
+
mesh_prog_index_format <= payload[5][7:6];
|
| 360 |
+
state <= HI_SEND_ACK;
|
| 361 |
+
end
|
| 362 |
+
|
| 363 |
+
HI_EXEC_REWARD: begin
|
| 364 |
+
mesh_reward_value <= {payload[0], payload[1]};
|
| 365 |
+
state <= HI_SEND_ACK;
|
| 366 |
+
end
|
| 367 |
+
|
| 368 |
+
HI_EXEC_ROUTE: begin
|
| 369 |
+
mesh_prog_route_we <= 1;
|
| 370 |
+
mesh_prog_route_src_core <= payload[0][CORE_ID_BITS-1:0];
|
| 371 |
+
mesh_prog_route_src_neuron <= {payload[1], payload[2]};
|
| 372 |
+
mesh_prog_route_slot <= payload[3][ROUTE_SLOT_BITS-1:0];
|
| 373 |
+
mesh_prog_route_dest_core <= payload[4][CORE_ID_BITS-1:0];
|
| 374 |
+
mesh_prog_route_dest_neuron<= {payload[5], payload[6]};
|
| 375 |
+
mesh_prog_route_weight <= {payload[7], payload[8]};
|
| 376 |
+
state <= HI_SEND_ACK;
|
| 377 |
+
end
|
| 378 |
+
|
| 379 |
+
HI_EXEC_STIM: begin
|
| 380 |
+
mesh_ext_valid <= 1;
|
| 381 |
+
mesh_ext_core <= payload[0][CORE_ID_BITS-1:0];
|
| 382 |
+
mesh_ext_neuron_id <= {payload[1], payload[2]};
|
| 383 |
+
mesh_ext_current <= {payload[3], payload[4]};
|
| 384 |
+
state <= HI_SEND_ACK;
|
| 385 |
+
end
|
| 386 |
+
|
| 387 |
+
HI_EXEC_LEARN: begin
|
| 388 |
+
mesh_learn_enable <= payload[0][0];
|
| 389 |
+
mesh_graded_enable <= payload[0][1];
|
| 390 |
+
mesh_dendritic_enable <= payload[0][2];
|
| 391 |
+
mesh_async_enable <= payload[0][3];
|
| 392 |
+
mesh_threefactor_enable <= payload[0][4];
|
| 393 |
+
mesh_noise_enable <= payload[0][5];
|
| 394 |
+
mesh_skip_idle_enable <= payload[0][6];
|
| 395 |
+
mesh_scale_u_enable <= payload[0][7];
|
| 396 |
+
state <= HI_SEND_ACK;
|
| 397 |
+
end
|
| 398 |
+
|
| 399 |
+
HI_EXEC_PARAM: begin
|
| 400 |
+
mesh_prog_param_we <= 1;
|
| 401 |
+
mesh_prog_param_core <= payload[0][CORE_ID_BITS-1:0];
|
| 402 |
+
mesh_prog_param_neuron <= {payload[1], payload[2]};
|
| 403 |
+
mesh_prog_param_id <= payload[3][4:0];
|
| 404 |
+
mesh_prog_param_value <= {payload[4], payload[5]};
|
| 405 |
+
state <= HI_SEND_ACK;
|
| 406 |
+
end
|
| 407 |
+
|
| 408 |
+
HI_SEND_ACK: begin
|
| 409 |
+
if (tx_ready) begin
|
| 410 |
+
tx_data <= RESP_ACK;
|
| 411 |
+
tx_valid <= 1;
|
| 412 |
+
state <= HI_IDLE;
|
| 413 |
+
end
|
| 414 |
+
end
|
| 415 |
+
|
| 416 |
+
HI_RUN_START: begin
|
| 417 |
+
run_remaining <= {payload[0], payload[1]};
|
| 418 |
+
run_spike_base <= mesh_total_spikes;
|
| 419 |
+
mesh_start <= 1;
|
| 420 |
+
state <= HI_RUN_WAIT;
|
| 421 |
+
end
|
| 422 |
+
|
| 423 |
+
HI_RUN_WAIT: begin
|
| 424 |
+
if (mesh_timestep_done) begin
|
| 425 |
+
state <= HI_RUN_LOOP;
|
| 426 |
+
end
|
| 427 |
+
end
|
| 428 |
+
|
| 429 |
+
HI_RUN_LOOP: begin
|
| 430 |
+
if (run_remaining <= 1) begin
|
| 431 |
+
resp_buf[0] <= RESP_DONE;
|
| 432 |
+
resp_buf[1] <= (mesh_total_spikes - run_spike_base) >> 24;
|
| 433 |
+
resp_buf[2] <= (mesh_total_spikes - run_spike_base) >> 16;
|
| 434 |
+
resp_buf[3] <= (mesh_total_spikes - run_spike_base) >> 8;
|
| 435 |
+
resp_buf[4] <= (mesh_total_spikes - run_spike_base);
|
| 436 |
+
resp_len <= 5;
|
| 437 |
+
resp_idx <= 0;
|
| 438 |
+
state <= HI_SEND_RESP;
|
| 439 |
+
end else begin
|
| 440 |
+
run_remaining <= run_remaining - 1;
|
| 441 |
+
mesh_start <= 1;
|
| 442 |
+
state <= HI_RUN_WAIT;
|
| 443 |
+
end
|
| 444 |
+
end
|
| 445 |
+
|
| 446 |
+
HI_EXEC_STATUS: begin
|
| 447 |
+
resp_buf[0] <= {3'b0, mesh_state};
|
| 448 |
+
resp_buf[1] <= mesh_timestep_count >> 24;
|
| 449 |
+
resp_buf[2] <= mesh_timestep_count >> 16;
|
| 450 |
+
resp_buf[3] <= mesh_timestep_count >> 8;
|
| 451 |
+
resp_buf[4] <= mesh_timestep_count;
|
| 452 |
+
resp_len <= 5;
|
| 453 |
+
resp_idx <= 0;
|
| 454 |
+
state <= HI_SEND_RESP;
|
| 455 |
+
end
|
| 456 |
+
|
| 457 |
+
HI_SEND_RESP: begin
|
| 458 |
+
if (tx_ready) begin
|
| 459 |
+
tx_data <= resp_buf[resp_idx];
|
| 460 |
+
tx_valid <= 1;
|
| 461 |
+
state <= HI_SEND_WAIT;
|
| 462 |
+
end
|
| 463 |
+
end
|
| 464 |
+
|
| 465 |
+
HI_SEND_WAIT: begin
|
| 466 |
+
if (resp_idx == resp_len - 1) begin
|
| 467 |
+
state <= HI_IDLE;
|
| 468 |
+
end else begin
|
| 469 |
+
resp_idx <= resp_idx + 1;
|
| 470 |
+
state <= HI_SEND_RESP;
|
| 471 |
+
end
|
| 472 |
+
end
|
| 473 |
+
|
| 474 |
+
HI_EXEC_DELAY: begin
|
| 475 |
+
mesh_prog_delay_we <= 1;
|
| 476 |
+
mesh_prog_delay_core <= payload[0][CORE_ID_BITS-1:0];
|
| 477 |
+
mesh_prog_delay_addr <= {payload[1], payload[2]};
|
| 478 |
+
mesh_prog_delay_value <= payload[3][5:0];
|
| 479 |
+
state <= HI_SEND_ACK;
|
| 480 |
+
end
|
| 481 |
+
HI_EXEC_FORMAT: state <= HI_SEND_ACK;
|
| 482 |
+
|
| 483 |
+
HI_EXEC_LEARN_MC: begin
|
| 484 |
+
mesh_prog_ucode_we <= 1;
|
| 485 |
+
mesh_prog_ucode_core <= payload[0][CORE_ID_BITS-1:0];
|
| 486 |
+
mesh_prog_ucode_addr <= payload[1][7:0];
|
| 487 |
+
mesh_prog_ucode_data <= {payload[2], payload[3], payload[4], payload[5]};
|
| 488 |
+
state <= HI_SEND_ACK;
|
| 489 |
+
end
|
| 490 |
+
HI_EXEC_SEED: state <= HI_SEND_ACK;
|
| 491 |
+
|
| 492 |
+
HI_EXEC_READ_WT: begin
|
| 493 |
+
mesh_probe_read <= 1;
|
| 494 |
+
mesh_probe_core <= payload[0][CORE_ID_BITS-1:0];
|
| 495 |
+
mesh_probe_neuron <= {payload[1], payload[2]};
|
| 496 |
+
mesh_probe_state_id <= payload[3][4:0];
|
| 497 |
+
mesh_probe_pool_addr <= {payload[1], payload[2]};
|
| 498 |
+
state <= HI_PROBE_WAIT;
|
| 499 |
+
end
|
| 500 |
+
|
| 501 |
+
HI_PROBE_WAIT: begin
|
| 502 |
+
if (mesh_probe_valid) begin
|
| 503 |
+
resp_buf[0] <= mesh_probe_data[15:8];
|
| 504 |
+
resp_buf[1] <= mesh_probe_data[7:0];
|
| 505 |
+
resp_len <= 2;
|
| 506 |
+
resp_idx <= 0;
|
| 507 |
+
state <= HI_SEND_RESP;
|
| 508 |
+
end
|
| 509 |
+
end
|
| 510 |
+
|
| 511 |
+
HI_EXEC_GLOBAL_ROUTE: begin
|
| 512 |
+
mesh_prog_global_route_we <= 1;
|
| 513 |
+
mesh_prog_global_route_src_core <= payload[0][CORE_ID_BITS-1:0];
|
| 514 |
+
mesh_prog_global_route_src_neuron <= {payload[1], payload[2]};
|
| 515 |
+
mesh_prog_global_route_slot <= payload[3][GLOBAL_ROUTE_SLOT_BITS-1:0];
|
| 516 |
+
mesh_prog_global_route_dest_core <= payload[4][CORE_ID_BITS-1:0];
|
| 517 |
+
mesh_prog_global_route_dest_neuron <= {payload[5], payload[6]};
|
| 518 |
+
mesh_prog_global_route_weight <= {payload[7], payload[8]};
|
| 519 |
+
state <= HI_SEND_ACK;
|
| 520 |
+
end
|
| 521 |
+
|
| 522 |
+
HI_EXEC_DEND_TREE: begin
|
| 523 |
+
mesh_prog_param_we <= 1;
|
| 524 |
+
mesh_prog_param_core <= payload[0][CORE_ID_BITS-1:0];
|
| 525 |
+
mesh_prog_param_neuron <= {payload[1], payload[2]};
|
| 526 |
+
mesh_prog_param_id <= 5'd15;
|
| 527 |
+
mesh_prog_param_value <= {{(DATA_WIDTH-6){1'b0}}, payload[3][5:0]};
|
| 528 |
+
state <= HI_SEND_ACK;
|
| 529 |
+
end
|
| 530 |
+
|
| 531 |
+
HI_EXEC_DVFS: begin
|
| 532 |
+
mesh_dvfs_stall <= payload[0];
|
| 533 |
+
state <= HI_SEND_ACK;
|
| 534 |
+
end
|
| 535 |
+
|
| 536 |
+
HI_EXEC_RESET_PERF: begin
|
| 537 |
+
mesh_prog_param_we <= 1;
|
| 538 |
+
mesh_prog_param_core <= payload[0][CORE_ID_BITS-1:0];
|
| 539 |
+
mesh_prog_param_neuron <= 0;
|
| 540 |
+
mesh_prog_param_id <= 5'd28;
|
| 541 |
+
mesh_prog_param_value <= 0;
|
| 542 |
+
state <= HI_SEND_ACK;
|
| 543 |
+
end
|
| 544 |
+
|
| 545 |
+
default: state <= HI_IDLE;
|
| 546 |
+
endcase
|
| 547 |
+
end
|
| 548 |
+
end
|
| 549 |
+
|
| 550 |
+
endmodule
|
rtl/lif_neuron.v
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ============================================================================
|
| 2 |
+
// Leaky Integrate-and-Fire (LIF) Neuron
|
| 3 |
+
// ============================================================================
|
| 4 |
+
//
|
| 5 |
+
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 6 |
+
// Company No. 17054540 — UK Patent Application No. 2602902.6
|
| 7 |
+
//
|
| 8 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
| 9 |
+
// you may not use this file except in compliance with the License.
|
| 10 |
+
// You may obtain a copy of the License at
|
| 11 |
+
//
|
| 12 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
| 13 |
+
//
|
| 14 |
+
// Unless required by applicable law or agreed to in writing, software
|
| 15 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
| 16 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 17 |
+
// See the License for the specific language governing permissions and
|
| 18 |
+
// limitations under the License.
|
| 19 |
+
// ============================================================================
|
| 20 |
+
|
| 21 |
+
module lif_neuron #(
|
| 22 |
+
parameter DATA_WIDTH = 16,
|
| 23 |
+
parameter THRESHOLD = 16'd1000,
|
| 24 |
+
parameter LEAK_RATE = 16'd2,
|
| 25 |
+
parameter RESTING_POT = 16'd0,
|
| 26 |
+
parameter REFRAC_CYCLES = 4
|
| 27 |
+
)(
|
| 28 |
+
input wire clk,
|
| 29 |
+
input wire rst_n,
|
| 30 |
+
input wire enable,
|
| 31 |
+
input wire signed [DATA_WIDTH-1:0] synaptic_input,
|
| 32 |
+
output reg spike,
|
| 33 |
+
output reg [DATA_WIDTH-1:0] membrane_pot
|
| 34 |
+
);
|
| 35 |
+
|
| 36 |
+
reg [DATA_WIDTH-1:0] potential;
|
| 37 |
+
reg [3:0] refrac_counter;
|
| 38 |
+
|
| 39 |
+
wire in_refractory = (refrac_counter > 0);
|
| 40 |
+
|
| 41 |
+
always @(posedge clk or negedge rst_n) begin
|
| 42 |
+
if (!rst_n) begin
|
| 43 |
+
potential <= RESTING_POT;
|
| 44 |
+
spike <= 1'b0;
|
| 45 |
+
refrac_counter <= 4'd0;
|
| 46 |
+
membrane_pot <= RESTING_POT;
|
| 47 |
+
|
| 48 |
+
end else if (enable) begin
|
| 49 |
+
spike <= 1'b0;
|
| 50 |
+
|
| 51 |
+
if (in_refractory) begin
|
| 52 |
+
refrac_counter <= refrac_counter - 1;
|
| 53 |
+
potential <= RESTING_POT;
|
| 54 |
+
|
| 55 |
+
end else begin
|
| 56 |
+
if (potential + synaptic_input > THRESHOLD) begin
|
| 57 |
+
spike <= 1'b1;
|
| 58 |
+
potential <= RESTING_POT;
|
| 59 |
+
refrac_counter <= REFRAC_CYCLES[3:0];
|
| 60 |
+
end else if (potential + synaptic_input < RESTING_POT + LEAK_RATE) begin
|
| 61 |
+
potential <= RESTING_POT;
|
| 62 |
+
end else begin
|
| 63 |
+
potential <= potential + synaptic_input - LEAK_RATE;
|
| 64 |
+
end
|
| 65 |
+
end
|
| 66 |
+
|
| 67 |
+
membrane_pot <= potential;
|
| 68 |
+
end
|
| 69 |
+
end
|
| 70 |
+
|
| 71 |
+
endmodule
|
rtl/mmio_bridge.v
ADDED
|
@@ -0,0 +1,447 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ============================================================================
|
| 2 |
+
// MMIO Bridge
|
| 3 |
+
// ============================================================================
|
| 4 |
+
//
|
| 5 |
+
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 6 |
+
// Company No. 17054540 — UK Patent Application No. 2602902.6
|
| 7 |
+
//
|
| 8 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
| 9 |
+
// you may not use this file except in compliance with the License.
|
| 10 |
+
// You may obtain a copy of the License at
|
| 11 |
+
//
|
| 12 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
| 13 |
+
//
|
| 14 |
+
// Unless required by applicable law or agreed to in writing, software
|
| 15 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
| 16 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 17 |
+
// See the License for the specific language governing permissions and
|
| 18 |
+
// limitations under the License.
|
| 19 |
+
// ============================================================================
|
| 20 |
+
|
| 21 |
+
`timescale 1ns/1ps
|
| 22 |
+
|
| 23 |
+
module mmio_bridge #(
|
| 24 |
+
parameter CORE_ID_BITS = 7,
|
| 25 |
+
parameter NEURON_BITS = 10,
|
| 26 |
+
parameter DATA_WIDTH = 16,
|
| 27 |
+
parameter POOL_ADDR_BITS = 15,
|
| 28 |
+
parameter ROUTE_SLOT_BITS = 3,
|
| 29 |
+
parameter GLOBAL_ROUTE_SLOT_BITS = 2,
|
| 30 |
+
parameter COUNT_BITS = 12
|
| 31 |
+
)(
|
| 32 |
+
input wire clk,
|
| 33 |
+
input wire rst_n,
|
| 34 |
+
|
| 35 |
+
input wire mgmt_phase,
|
| 36 |
+
|
| 37 |
+
input wire mmio_valid,
|
| 38 |
+
input wire mmio_we,
|
| 39 |
+
input wire [15:0] mmio_addr,
|
| 40 |
+
input wire [31:0] mmio_wdata,
|
| 41 |
+
output reg [31:0] mmio_rdata,
|
| 42 |
+
output reg mmio_ready,
|
| 43 |
+
|
| 44 |
+
output reg mesh_start,
|
| 45 |
+
output reg ext_valid,
|
| 46 |
+
output reg [CORE_ID_BITS-1:0] ext_core,
|
| 47 |
+
output reg [NEURON_BITS-1:0] ext_neuron_id,
|
| 48 |
+
output reg signed [DATA_WIDTH-1:0] ext_current,
|
| 49 |
+
|
| 50 |
+
output reg prog_param_we,
|
| 51 |
+
output reg [CORE_ID_BITS-1:0] prog_param_core,
|
| 52 |
+
output reg [NEURON_BITS-1:0] prog_param_neuron,
|
| 53 |
+
output reg [4:0] prog_param_id,
|
| 54 |
+
output reg signed [DATA_WIDTH-1:0] prog_param_value,
|
| 55 |
+
|
| 56 |
+
output reg probe_read,
|
| 57 |
+
output reg [CORE_ID_BITS-1:0] probe_core,
|
| 58 |
+
output reg [NEURON_BITS-1:0] probe_neuron,
|
| 59 |
+
output reg [3:0] probe_state_id,
|
| 60 |
+
input wire signed [DATA_WIDTH-1:0] probe_data,
|
| 61 |
+
input wire probe_valid,
|
| 62 |
+
|
| 63 |
+
output reg [7:0] uart_tx_data,
|
| 64 |
+
output reg uart_tx_valid,
|
| 65 |
+
input wire uart_tx_ready,
|
| 66 |
+
input wire [7:0] uart_rx_data,
|
| 67 |
+
input wire uart_rx_valid,
|
| 68 |
+
|
| 69 |
+
input wire rv_halted,
|
| 70 |
+
input wire rv_running,
|
| 71 |
+
input wire [31:0] timestep_count,
|
| 72 |
+
|
| 73 |
+
output reg learn_enable,
|
| 74 |
+
output reg graded_enable,
|
| 75 |
+
output reg dendritic_enable,
|
| 76 |
+
output reg async_enable,
|
| 77 |
+
output reg threefactor_enable,
|
| 78 |
+
output reg noise_enable,
|
| 79 |
+
output reg skip_idle_enable,
|
| 80 |
+
|
| 81 |
+
output reg signed [DATA_WIDTH-1:0] reward_value,
|
| 82 |
+
|
| 83 |
+
output reg prog_route_we,
|
| 84 |
+
output reg [CORE_ID_BITS-1:0] prog_route_src_core,
|
| 85 |
+
output reg [NEURON_BITS-1:0] prog_route_src_neuron,
|
| 86 |
+
output reg [ROUTE_SLOT_BITS-1:0] prog_route_slot,
|
| 87 |
+
output reg [CORE_ID_BITS-1:0] prog_route_dest_core,
|
| 88 |
+
output reg [NEURON_BITS-1:0] prog_route_dest_neuron,
|
| 89 |
+
output reg signed [DATA_WIDTH-1:0] prog_route_weight,
|
| 90 |
+
|
| 91 |
+
output reg prog_delay_we,
|
| 92 |
+
output reg [CORE_ID_BITS-1:0] prog_delay_core,
|
| 93 |
+
output reg [POOL_ADDR_BITS-1:0] prog_delay_addr,
|
| 94 |
+
output reg [5:0] prog_delay_value,
|
| 95 |
+
|
| 96 |
+
output reg prog_ucode_we,
|
| 97 |
+
output reg [CORE_ID_BITS-1:0] prog_ucode_core,
|
| 98 |
+
output reg [7:0] prog_ucode_addr,
|
| 99 |
+
output reg [31:0] prog_ucode_data,
|
| 100 |
+
|
| 101 |
+
output reg [7:0] dvfs_stall,
|
| 102 |
+
|
| 103 |
+
output reg prog_index_we,
|
| 104 |
+
output reg [CORE_ID_BITS-1:0] prog_index_core,
|
| 105 |
+
output reg [NEURON_BITS-1:0] prog_index_neuron,
|
| 106 |
+
output reg [POOL_ADDR_BITS-1:0] prog_index_base,
|
| 107 |
+
output reg [COUNT_BITS-1:0] prog_index_count,
|
| 108 |
+
|
| 109 |
+
output reg prog_noise_seed_we,
|
| 110 |
+
output reg [CORE_ID_BITS-1:0] prog_noise_seed_core,
|
| 111 |
+
output reg [31:0] prog_noise_seed_value,
|
| 112 |
+
|
| 113 |
+
output reg prog_dend_parent_we,
|
| 114 |
+
output reg [CORE_ID_BITS-1:0] prog_dend_parent_core,
|
| 115 |
+
output reg [NEURON_BITS-1:0] prog_dend_parent_neuron,
|
| 116 |
+
output reg [7:0] prog_dend_parent_data,
|
| 117 |
+
|
| 118 |
+
output reg prog_global_route_we,
|
| 119 |
+
output reg [CORE_ID_BITS-1:0] prog_global_route_src_core,
|
| 120 |
+
output reg [NEURON_BITS-1:0] prog_global_route_src_neuron,
|
| 121 |
+
output reg [GLOBAL_ROUTE_SLOT_BITS-1:0] prog_global_route_slot,
|
| 122 |
+
output reg [CORE_ID_BITS-1:0] prog_global_route_dest_core,
|
| 123 |
+
output reg [NEURON_BITS-1:0] prog_global_route_dest_neuron,
|
| 124 |
+
output reg signed [DATA_WIDTH-1:0] prog_global_route_weight,
|
| 125 |
+
|
| 126 |
+
input wire [31:0] perf_spike_count,
|
| 127 |
+
input wire [31:0] perf_synop_count,
|
| 128 |
+
input wire [31:0] perf_active_cycles,
|
| 129 |
+
input wire [31:0] perf_power_estimate,
|
| 130 |
+
|
| 131 |
+
output reg perf_reset_we,
|
| 132 |
+
output reg [CORE_ID_BITS-1:0] perf_reset_core,
|
| 133 |
+
|
| 134 |
+
output reg [31:0] debug_bp_addr_0,
|
| 135 |
+
output reg [31:0] debug_bp_addr_1,
|
| 136 |
+
output reg [31:0] debug_bp_addr_2,
|
| 137 |
+
output reg [31:0] debug_bp_addr_3,
|
| 138 |
+
output reg [3:0] debug_bp_enable,
|
| 139 |
+
output reg debug_resume,
|
| 140 |
+
output reg debug_halt_req,
|
| 141 |
+
output reg debug_single_step
|
| 142 |
+
);
|
| 143 |
+
|
| 144 |
+
reg [CORE_ID_BITS-1:0] sel_core;
|
| 145 |
+
reg [NEURON_BITS-1:0] sel_neuron;
|
| 146 |
+
reg [POOL_ADDR_BITS-1:0] sel_pool_addr;
|
| 147 |
+
|
| 148 |
+
reg [CORE_ID_BITS-1:0] route_dest_core;
|
| 149 |
+
reg [NEURON_BITS-1:0] route_dest_neuron;
|
| 150 |
+
reg signed [DATA_WIDTH-1:0] route_weight;
|
| 151 |
+
|
| 152 |
+
reg [POOL_ADDR_BITS-1:0] index_base;
|
| 153 |
+
|
| 154 |
+
reg [7:0] ucode_addr;
|
| 155 |
+
|
| 156 |
+
always @(posedge clk or negedge rst_n) begin
|
| 157 |
+
if (!rst_n) begin
|
| 158 |
+
mmio_rdata <= 32'd0;
|
| 159 |
+
mmio_ready <= 1'b0;
|
| 160 |
+
mesh_start <= 1'b0;
|
| 161 |
+
ext_valid <= 1'b0;
|
| 162 |
+
ext_core <= 0;
|
| 163 |
+
ext_neuron_id <= 0;
|
| 164 |
+
ext_current <= 0;
|
| 165 |
+
prog_param_we <= 1'b0;
|
| 166 |
+
prog_param_core <= 0;
|
| 167 |
+
prog_param_neuron <= 0;
|
| 168 |
+
prog_param_id <= 0;
|
| 169 |
+
prog_param_value <= 0;
|
| 170 |
+
probe_read <= 1'b0;
|
| 171 |
+
probe_core <= 0;
|
| 172 |
+
probe_neuron <= 0;
|
| 173 |
+
probe_state_id <= 0;
|
| 174 |
+
uart_tx_data <= 8'd0;
|
| 175 |
+
uart_tx_valid <= 1'b0;
|
| 176 |
+
sel_core <= 0;
|
| 177 |
+
sel_neuron <= 0;
|
| 178 |
+
sel_pool_addr <= 0;
|
| 179 |
+
learn_enable <= 1'b0;
|
| 180 |
+
graded_enable <= 1'b0;
|
| 181 |
+
dendritic_enable <= 1'b0;
|
| 182 |
+
async_enable <= 1'b0;
|
| 183 |
+
threefactor_enable <= 1'b0;
|
| 184 |
+
noise_enable <= 1'b0;
|
| 185 |
+
skip_idle_enable <= 1'b0;
|
| 186 |
+
reward_value <= 0;
|
| 187 |
+
prog_route_we <= 1'b0;
|
| 188 |
+
prog_route_src_core <= 0;
|
| 189 |
+
prog_route_src_neuron <= 0;
|
| 190 |
+
prog_route_slot <= 0;
|
| 191 |
+
prog_route_dest_core <= 0;
|
| 192 |
+
prog_route_dest_neuron <= 0;
|
| 193 |
+
prog_route_weight <= 0;
|
| 194 |
+
route_dest_core <= 0;
|
| 195 |
+
route_dest_neuron <= 0;
|
| 196 |
+
route_weight <= 0;
|
| 197 |
+
prog_delay_we <= 1'b0;
|
| 198 |
+
prog_delay_core <= 0;
|
| 199 |
+
prog_delay_addr <= 0;
|
| 200 |
+
prog_delay_value <= 0;
|
| 201 |
+
prog_ucode_we <= 1'b0;
|
| 202 |
+
prog_ucode_core <= 0;
|
| 203 |
+
prog_ucode_addr <= 0;
|
| 204 |
+
prog_ucode_data <= 0;
|
| 205 |
+
ucode_addr <= 0;
|
| 206 |
+
dvfs_stall <= 8'd0;
|
| 207 |
+
prog_index_we <= 1'b0;
|
| 208 |
+
prog_index_core <= 0;
|
| 209 |
+
prog_index_neuron <= 0;
|
| 210 |
+
prog_index_base <= 0;
|
| 211 |
+
prog_index_count <= 0;
|
| 212 |
+
index_base <= 0;
|
| 213 |
+
prog_noise_seed_we <= 1'b0;
|
| 214 |
+
prog_noise_seed_core <= 0;
|
| 215 |
+
prog_noise_seed_value <= 0;
|
| 216 |
+
prog_dend_parent_we <= 1'b0;
|
| 217 |
+
prog_dend_parent_core <= 0;
|
| 218 |
+
prog_dend_parent_neuron <= 0;
|
| 219 |
+
prog_dend_parent_data <= 0;
|
| 220 |
+
prog_global_route_we <= 1'b0;
|
| 221 |
+
prog_global_route_src_core <= 0;
|
| 222 |
+
prog_global_route_src_neuron <= 0;
|
| 223 |
+
prog_global_route_slot <= 0;
|
| 224 |
+
prog_global_route_dest_core <= 0;
|
| 225 |
+
prog_global_route_dest_neuron <= 0;
|
| 226 |
+
prog_global_route_weight <= 0;
|
| 227 |
+
perf_reset_we <= 1'b0;
|
| 228 |
+
perf_reset_core <= 0;
|
| 229 |
+
debug_bp_addr_0 <= 32'd0;
|
| 230 |
+
debug_bp_addr_1 <= 32'd0;
|
| 231 |
+
debug_bp_addr_2 <= 32'd0;
|
| 232 |
+
debug_bp_addr_3 <= 32'd0;
|
| 233 |
+
debug_bp_enable <= 4'd0;
|
| 234 |
+
debug_resume <= 1'b0;
|
| 235 |
+
debug_halt_req <= 1'b0;
|
| 236 |
+
debug_single_step <= 1'b0;
|
| 237 |
+
end else begin
|
| 238 |
+
mmio_ready <= 1'b0;
|
| 239 |
+
mesh_start <= 1'b0;
|
| 240 |
+
ext_valid <= 1'b0;
|
| 241 |
+
prog_param_we <= 1'b0;
|
| 242 |
+
probe_read <= 1'b0;
|
| 243 |
+
uart_tx_valid <= 1'b0;
|
| 244 |
+
prog_route_we <= 1'b0;
|
| 245 |
+
prog_delay_we <= 1'b0;
|
| 246 |
+
prog_ucode_we <= 1'b0;
|
| 247 |
+
prog_index_we <= 1'b0;
|
| 248 |
+
prog_noise_seed_we <= 1'b0;
|
| 249 |
+
prog_dend_parent_we <= 1'b0;
|
| 250 |
+
prog_global_route_we <= 1'b0;
|
| 251 |
+
perf_reset_we <= 1'b0;
|
| 252 |
+
debug_resume <= 1'b0;
|
| 253 |
+
debug_halt_req <= 1'b0;
|
| 254 |
+
debug_single_step <= 1'b0;
|
| 255 |
+
|
| 256 |
+
if (mmio_valid && !mmio_ready) begin
|
| 257 |
+
mmio_ready <= 1'b1;
|
| 258 |
+
|
| 259 |
+
if (mmio_we) begin
|
| 260 |
+
case (mmio_addr)
|
| 261 |
+
16'h0000: begin
|
| 262 |
+
if (mmio_wdata[0]) mesh_start <= 1'b1;
|
| 263 |
+
end
|
| 264 |
+
16'h0004: sel_core <= mmio_wdata[CORE_ID_BITS-1:0];
|
| 265 |
+
16'h0008: sel_neuron <= mmio_wdata[NEURON_BITS-1:0];
|
| 266 |
+
16'h000C: begin
|
| 267 |
+
prog_param_we <= mgmt_phase;
|
| 268 |
+
prog_param_core <= sel_core;
|
| 269 |
+
prog_param_neuron <= sel_neuron;
|
| 270 |
+
prog_param_id <= mmio_wdata[20:16];
|
| 271 |
+
prog_param_value <= mmio_wdata[DATA_WIDTH-1:0];
|
| 272 |
+
end
|
| 273 |
+
16'h0010: sel_pool_addr <= mmio_wdata[POOL_ADDR_BITS-1:0];
|
| 274 |
+
16'h0018: begin
|
| 275 |
+
ext_valid <= 1'b1;
|
| 276 |
+
ext_core <= sel_core;
|
| 277 |
+
ext_neuron_id <= mmio_wdata[NEURON_BITS-1:0];
|
| 278 |
+
ext_current <= mmio_wdata[DATA_WIDTH+NEURON_BITS-1:NEURON_BITS];
|
| 279 |
+
end
|
| 280 |
+
16'h0020: begin
|
| 281 |
+
uart_tx_data <= mmio_wdata[7:0];
|
| 282 |
+
uart_tx_valid <= 1'b1;
|
| 283 |
+
end
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
16'h0030: begin
|
| 287 |
+
if (mgmt_phase) begin
|
| 288 |
+
learn_enable <= mmio_wdata[0];
|
| 289 |
+
graded_enable <= mmio_wdata[1];
|
| 290 |
+
dendritic_enable <= mmio_wdata[2];
|
| 291 |
+
async_enable <= mmio_wdata[3];
|
| 292 |
+
threefactor_enable <= mmio_wdata[4];
|
| 293 |
+
noise_enable <= mmio_wdata[5];
|
| 294 |
+
skip_idle_enable <= mmio_wdata[6];
|
| 295 |
+
end
|
| 296 |
+
end
|
| 297 |
+
|
| 298 |
+
16'h0034: begin
|
| 299 |
+
if (mgmt_phase)
|
| 300 |
+
reward_value <= mmio_wdata[DATA_WIDTH-1:0];
|
| 301 |
+
end
|
| 302 |
+
|
| 303 |
+
16'h0038: begin
|
| 304 |
+
route_dest_core <= mmio_wdata[CORE_ID_BITS-1:0];
|
| 305 |
+
end
|
| 306 |
+
|
| 307 |
+
16'h003C: begin
|
| 308 |
+
route_dest_neuron <= mmio_wdata[NEURON_BITS-1:0];
|
| 309 |
+
end
|
| 310 |
+
|
| 311 |
+
16'h0040: begin
|
| 312 |
+
route_weight <= mmio_wdata[DATA_WIDTH-1:0];
|
| 313 |
+
end
|
| 314 |
+
|
| 315 |
+
16'h0044: begin
|
| 316 |
+
if (mgmt_phase) begin
|
| 317 |
+
prog_route_we <= 1'b1;
|
| 318 |
+
prog_route_src_core <= sel_core;
|
| 319 |
+
prog_route_src_neuron <= sel_neuron;
|
| 320 |
+
prog_route_slot <= mmio_wdata[ROUTE_SLOT_BITS-1:0];
|
| 321 |
+
prog_route_dest_core <= route_dest_core;
|
| 322 |
+
prog_route_dest_neuron <= route_dest_neuron;
|
| 323 |
+
prog_route_weight <= route_weight;
|
| 324 |
+
end
|
| 325 |
+
end
|
| 326 |
+
|
| 327 |
+
16'h0048: begin
|
| 328 |
+
if (mgmt_phase) begin
|
| 329 |
+
prog_delay_we <= 1'b1;
|
| 330 |
+
prog_delay_core <= sel_core;
|
| 331 |
+
prog_delay_addr <= sel_pool_addr;
|
| 332 |
+
prog_delay_value <= mmio_wdata[5:0];
|
| 333 |
+
end
|
| 334 |
+
end
|
| 335 |
+
|
| 336 |
+
16'h004C: begin
|
| 337 |
+
ucode_addr <= mmio_wdata[7:0];
|
| 338 |
+
end
|
| 339 |
+
|
| 340 |
+
16'h0050: begin
|
| 341 |
+
if (mgmt_phase) begin
|
| 342 |
+
prog_ucode_we <= 1'b1;
|
| 343 |
+
prog_ucode_core <= sel_core;
|
| 344 |
+
prog_ucode_addr <= ucode_addr;
|
| 345 |
+
prog_ucode_data <= mmio_wdata;
|
| 346 |
+
end
|
| 347 |
+
end
|
| 348 |
+
|
| 349 |
+
16'h0054: begin
|
| 350 |
+
if (mgmt_phase)
|
| 351 |
+
dvfs_stall <= mmio_wdata[7:0];
|
| 352 |
+
end
|
| 353 |
+
|
| 354 |
+
16'h0058: begin
|
| 355 |
+
if (mgmt_phase) begin
|
| 356 |
+
perf_reset_we <= 1'b1;
|
| 357 |
+
perf_reset_core <= sel_core;
|
| 358 |
+
end
|
| 359 |
+
end
|
| 360 |
+
|
| 361 |
+
16'h005C: begin
|
| 362 |
+
index_base <= mmio_wdata[POOL_ADDR_BITS-1:0];
|
| 363 |
+
end
|
| 364 |
+
|
| 365 |
+
16'h0060: begin
|
| 366 |
+
if (mgmt_phase) begin
|
| 367 |
+
prog_index_we <= 1'b1;
|
| 368 |
+
prog_index_core <= sel_core;
|
| 369 |
+
prog_index_neuron <= sel_neuron;
|
| 370 |
+
prog_index_base <= index_base;
|
| 371 |
+
prog_index_count <= mmio_wdata[COUNT_BITS-1:0];
|
| 372 |
+
end
|
| 373 |
+
end
|
| 374 |
+
|
| 375 |
+
16'h0064: begin
|
| 376 |
+
if (mgmt_phase) begin
|
| 377 |
+
prog_noise_seed_we <= 1'b1;
|
| 378 |
+
prog_noise_seed_core <= sel_core;
|
| 379 |
+
prog_noise_seed_value <= mmio_wdata;
|
| 380 |
+
end
|
| 381 |
+
end
|
| 382 |
+
|
| 383 |
+
16'h0068: begin
|
| 384 |
+
if (mgmt_phase) begin
|
| 385 |
+
prog_dend_parent_we <= 1'b1;
|
| 386 |
+
prog_dend_parent_core <= sel_core;
|
| 387 |
+
prog_dend_parent_neuron <= sel_neuron;
|
| 388 |
+
prog_dend_parent_data <= mmio_wdata[7:0];
|
| 389 |
+
end
|
| 390 |
+
end
|
| 391 |
+
|
| 392 |
+
16'h006C: begin
|
| 393 |
+
if (mgmt_phase) begin
|
| 394 |
+
prog_global_route_we <= 1'b1;
|
| 395 |
+
prog_global_route_src_core <= sel_core;
|
| 396 |
+
prog_global_route_src_neuron <= sel_neuron;
|
| 397 |
+
prog_global_route_slot <= mmio_wdata[GLOBAL_ROUTE_SLOT_BITS-1:0];
|
| 398 |
+
prog_global_route_dest_core <= route_dest_core;
|
| 399 |
+
prog_global_route_dest_neuron <= route_dest_neuron;
|
| 400 |
+
prog_global_route_weight <= route_weight;
|
| 401 |
+
end
|
| 402 |
+
end
|
| 403 |
+
|
| 404 |
+
|
| 405 |
+
16'h0090: begin
|
| 406 |
+
debug_resume <= mmio_wdata[0];
|
| 407 |
+
debug_halt_req <= mmio_wdata[1];
|
| 408 |
+
debug_single_step <= mmio_wdata[2];
|
| 409 |
+
end
|
| 410 |
+
|
| 411 |
+
16'h0094: debug_bp_addr_0 <= mmio_wdata;
|
| 412 |
+
16'h0098: debug_bp_addr_1 <= mmio_wdata;
|
| 413 |
+
16'h009C: debug_bp_addr_2 <= mmio_wdata;
|
| 414 |
+
16'h00A0: debug_bp_addr_3 <= mmio_wdata;
|
| 415 |
+
16'h00A4: debug_bp_enable <= mmio_wdata[3:0];
|
| 416 |
+
|
| 417 |
+
default: ;
|
| 418 |
+
endcase
|
| 419 |
+
end else begin
|
| 420 |
+
case (mmio_addr)
|
| 421 |
+
16'h0000: mmio_rdata <= {30'd0, rv_running, rv_halted};
|
| 422 |
+
16'h0004: mmio_rdata <= {{(32-CORE_ID_BITS){1'b0}}, sel_core};
|
| 423 |
+
16'h0008: mmio_rdata <= {{(32-NEURON_BITS){1'b0}}, sel_neuron};
|
| 424 |
+
16'h000C: begin
|
| 425 |
+
probe_read <= 1'b1;
|
| 426 |
+
probe_core <= sel_core;
|
| 427 |
+
probe_neuron <= sel_neuron;
|
| 428 |
+
probe_state_id <= mmio_wdata[3:0];
|
| 429 |
+
mmio_rdata <= {{(32-DATA_WIDTH){probe_data[DATA_WIDTH-1]}}, probe_data};
|
| 430 |
+
end
|
| 431 |
+
16'h0024: mmio_rdata <= {24'd0, uart_rx_data};
|
| 432 |
+
16'h0028: mmio_rdata <= {30'd0, uart_rx_valid, uart_tx_ready};
|
| 433 |
+
16'h002C: mmio_rdata <= timestep_count;
|
| 434 |
+
|
| 435 |
+
16'h0070: mmio_rdata <= perf_spike_count;
|
| 436 |
+
16'h0074: mmio_rdata <= perf_synop_count;
|
| 437 |
+
16'h0078: mmio_rdata <= perf_active_cycles;
|
| 438 |
+
16'h007C: mmio_rdata <= perf_power_estimate;
|
| 439 |
+
|
| 440 |
+
default: mmio_rdata <= 32'd0;
|
| 441 |
+
endcase
|
| 442 |
+
end
|
| 443 |
+
end
|
| 444 |
+
end
|
| 445 |
+
end
|
| 446 |
+
|
| 447 |
+
endmodule
|
rtl/multi_chip_router.v
ADDED
|
@@ -0,0 +1,346 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ============================================================================
|
| 2 |
+
// Multi-Chip Router
|
| 3 |
+
// ============================================================================
|
| 4 |
+
//
|
| 5 |
+
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 6 |
+
// Company No. 17054540 — UK Patent Application No. 2602902.6
|
| 7 |
+
//
|
| 8 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
| 9 |
+
// you may not use this file except in compliance with the License.
|
| 10 |
+
// You may obtain a copy of the License at
|
| 11 |
+
//
|
| 12 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
| 13 |
+
//
|
| 14 |
+
// Unless required by applicable law or agreed to in writing, software
|
| 15 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
| 16 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 17 |
+
// See the License for the specific language governing permissions and
|
| 18 |
+
// limitations under the License.
|
| 19 |
+
// ============================================================================
|
| 20 |
+
|
| 21 |
+
`timescale 1ns/1ps
|
| 22 |
+
|
| 23 |
+
module multi_chip_router #(
|
| 24 |
+
parameter NUM_LINKS = 1,
|
| 25 |
+
parameter CHIP_ID_BITS = 14,
|
| 26 |
+
parameter CORE_ID_BITS = 7,
|
| 27 |
+
parameter NEURON_BITS = 10,
|
| 28 |
+
parameter DATA_WIDTH = 16,
|
| 29 |
+
parameter TX_DEPTH = 256,
|
| 30 |
+
parameter RX_DEPTH = 256
|
| 31 |
+
)(
|
| 32 |
+
input wire clk,
|
| 33 |
+
input wire rst_n,
|
| 34 |
+
|
| 35 |
+
input wire [CHIP_ID_BITS-1:0] my_chip_id,
|
| 36 |
+
|
| 37 |
+
input wire tx_push,
|
| 38 |
+
input wire [CHIP_ID_BITS-1:0] tx_dest_chip,
|
| 39 |
+
input wire [CORE_ID_BITS-1:0] tx_core,
|
| 40 |
+
input wire [NEURON_BITS-1:0] tx_neuron,
|
| 41 |
+
input wire [7:0] tx_payload,
|
| 42 |
+
output wire tx_full,
|
| 43 |
+
|
| 44 |
+
output wire [CHIP_ID_BITS-1:0] rx_src_chip,
|
| 45 |
+
output wire [CORE_ID_BITS-1:0] rx_core,
|
| 46 |
+
output wire [NEURON_BITS-1:0] rx_neuron,
|
| 47 |
+
output wire signed [DATA_WIDTH-1:0] rx_current,
|
| 48 |
+
input wire rx_pop,
|
| 49 |
+
output wire rx_empty,
|
| 50 |
+
|
| 51 |
+
input wire barrier_tx_send,
|
| 52 |
+
output reg barrier_rx,
|
| 53 |
+
|
| 54 |
+
input wire mgmt_tx_push,
|
| 55 |
+
input wire [CORE_ID_BITS-1:0] mgmt_tx_core,
|
| 56 |
+
input wire [NEURON_BITS-1:0] mgmt_tx_neuron,
|
| 57 |
+
input wire [7:0] mgmt_tx_data,
|
| 58 |
+
input wire mgmt_tx_is_write,
|
| 59 |
+
input wire [CHIP_ID_BITS-1:0] mgmt_tx_dest_chip,
|
| 60 |
+
output reg mgmt_rx_valid,
|
| 61 |
+
output reg [CHIP_ID_BITS-1:0] mgmt_rx_src_chip,
|
| 62 |
+
output reg [CORE_ID_BITS-1:0] mgmt_rx_core,
|
| 63 |
+
output reg [NEURON_BITS-1:0] mgmt_rx_neuron,
|
| 64 |
+
output reg [7:0] mgmt_rx_data,
|
| 65 |
+
output reg mgmt_rx_is_write,
|
| 66 |
+
|
| 67 |
+
input wire preempt_request,
|
| 68 |
+
output reg preempt_rx,
|
| 69 |
+
|
| 70 |
+
output wire [NUM_LINKS*8-1:0] link_tx_data,
|
| 71 |
+
output wire [NUM_LINKS-1:0] link_tx_valid,
|
| 72 |
+
input wire [NUM_LINKS-1:0] link_tx_ready,
|
| 73 |
+
input wire [NUM_LINKS*8-1:0] link_rx_data,
|
| 74 |
+
input wire [NUM_LINKS-1:0] link_rx_valid,
|
| 75 |
+
output wire [NUM_LINKS-1:0] link_rx_ready
|
| 76 |
+
);
|
| 77 |
+
|
| 78 |
+
localparam MSG_SPIKE = 2'b00;
|
| 79 |
+
localparam MSG_BARRIER = 2'b01;
|
| 80 |
+
localparam MSG_MGMT = 2'b10;
|
| 81 |
+
localparam MSG_PREEMPT = 2'b11;
|
| 82 |
+
|
| 83 |
+
localparam TX_FLAT_W = 1 + 2 + 2*CHIP_ID_BITS + CORE_ID_BITS + NEURON_BITS + 8;
|
| 84 |
+
localparam TX_NUM_BYTES = (TX_FLAT_W + 7) / 8;
|
| 85 |
+
localparam TX_PAD_W = TX_NUM_BYTES * 8;
|
| 86 |
+
|
| 87 |
+
localparam MSGTYPE_OFFSET = TX_PAD_W - 1 - 1;
|
| 88 |
+
localparam DEST_OFFSET = MSGTYPE_OFFSET - 2;
|
| 89 |
+
localparam SRC_OFFSET = DEST_OFFSET - CHIP_ID_BITS;
|
| 90 |
+
localparam CORE_OFFSET = SRC_OFFSET - CHIP_ID_BITS;
|
| 91 |
+
localparam NRN_OFFSET = CORE_OFFSET - CORE_ID_BITS;
|
| 92 |
+
localparam PAY_OFFSET = NRN_OFFSET - NEURON_BITS;
|
| 93 |
+
|
| 94 |
+
localparam PKT_W = 2 + CHIP_ID_BITS + CORE_ID_BITS + NEURON_BITS + 8;
|
| 95 |
+
|
| 96 |
+
reg [PKT_W-1:0] tx_fifo [0:TX_DEPTH-1];
|
| 97 |
+
reg [8:0] tx_wr_ptr, tx_rd_ptr;
|
| 98 |
+
wire [8:0] tx_count = tx_wr_ptr - tx_rd_ptr;
|
| 99 |
+
wire tx_fifo_empty = (tx_wr_ptr == tx_rd_ptr);
|
| 100 |
+
assign tx_full = (tx_count >= TX_DEPTH);
|
| 101 |
+
|
| 102 |
+
always @(posedge clk or negedge rst_n) begin
|
| 103 |
+
if (!rst_n)
|
| 104 |
+
tx_wr_ptr <= 0;
|
| 105 |
+
else if (tx_push && !tx_full) begin
|
| 106 |
+
tx_fifo[tx_wr_ptr[7:0]] <= {MSG_SPIKE, tx_dest_chip, tx_core, tx_neuron, tx_payload};
|
| 107 |
+
tx_wr_ptr <= tx_wr_ptr + 1;
|
| 108 |
+
end else if (mgmt_tx_push && !tx_full) begin
|
| 109 |
+
tx_fifo[tx_wr_ptr[7:0]] <= {MSG_MGMT, mgmt_tx_dest_chip, mgmt_tx_core, mgmt_tx_neuron,
|
| 110 |
+
mgmt_tx_is_write, mgmt_tx_data[6:0]};
|
| 111 |
+
tx_wr_ptr <= tx_wr_ptr + 1;
|
| 112 |
+
end
|
| 113 |
+
end
|
| 114 |
+
|
| 115 |
+
wire [PKT_W-1:0] tx_head = tx_fifo[tx_rd_ptr[7:0]];
|
| 116 |
+
wire [1:0] tx_head_msgtype = tx_head[PKT_W-1 -: 2];
|
| 117 |
+
wire [CHIP_ID_BITS-1:0] tx_head_chip = tx_head[PKT_W-3 -: CHIP_ID_BITS];
|
| 118 |
+
|
| 119 |
+
wire [CHIP_ID_BITS-1:0] tx_link_sel = tx_head_chip % NUM_LINKS;
|
| 120 |
+
|
| 121 |
+
reg [TX_PAD_W-1:0] txs_shift;
|
| 122 |
+
reg [$clog2(TX_NUM_BYTES+1)-1:0] txs_cnt;
|
| 123 |
+
reg txs_active;
|
| 124 |
+
reg [CHIP_ID_BITS-1:0] txs_link;
|
| 125 |
+
|
| 126 |
+
reg [NUM_LINKS*8-1:0] ltx_data;
|
| 127 |
+
reg [NUM_LINKS-1:0] ltx_valid;
|
| 128 |
+
assign link_tx_data = ltx_data;
|
| 129 |
+
assign link_tx_valid = ltx_valid;
|
| 130 |
+
|
| 131 |
+
wire [TX_PAD_W-1:0] tx_flat = {1'b1, tx_head_msgtype, tx_head_chip, my_chip_id,
|
| 132 |
+
tx_head[CORE_ID_BITS+NEURON_BITS+7 : 0],
|
| 133 |
+
{(TX_PAD_W - TX_FLAT_W){1'b0}}};
|
| 134 |
+
|
| 135 |
+
wire [TX_PAD_W-1:0] barrier_flat = {1'b1, MSG_BARRIER, {CHIP_ID_BITS{1'b1}}, my_chip_id,
|
| 136 |
+
{(CORE_ID_BITS+NEURON_BITS+8){1'b0}},
|
| 137 |
+
{(TX_PAD_W - TX_FLAT_W){1'b0}}};
|
| 138 |
+
wire [TX_PAD_W-1:0] preempt_flat = {1'b1, MSG_PREEMPT, {CHIP_ID_BITS{1'b1}}, my_chip_id,
|
| 139 |
+
{(CORE_ID_BITS+NEURON_BITS+8){1'b0}},
|
| 140 |
+
{(TX_PAD_W - TX_FLAT_W){1'b0}}};
|
| 141 |
+
|
| 142 |
+
reg bcast_active;
|
| 143 |
+
reg [TX_PAD_W-1:0] bcast_shift;
|
| 144 |
+
reg [$clog2(TX_NUM_BYTES+1)-1:0] bcast_cnt;
|
| 145 |
+
reg [CHIP_ID_BITS-1:0] bcast_link;
|
| 146 |
+
reg [CHIP_ID_BITS-1:0] bcast_link_max;
|
| 147 |
+
reg [1:0] bcast_msg_type;
|
| 148 |
+
reg bcast_pending;
|
| 149 |
+
reg [TX_PAD_W-1:0] bcast_flat_save;
|
| 150 |
+
|
| 151 |
+
always @(posedge clk or negedge rst_n) begin
|
| 152 |
+
if (!rst_n) begin
|
| 153 |
+
txs_active <= 0;
|
| 154 |
+
txs_cnt <= 0;
|
| 155 |
+
txs_shift <= 0;
|
| 156 |
+
txs_link <= 0;
|
| 157 |
+
tx_rd_ptr <= 0;
|
| 158 |
+
ltx_data <= 0;
|
| 159 |
+
ltx_valid <= 0;
|
| 160 |
+
bcast_active <= 0;
|
| 161 |
+
bcast_shift <= 0;
|
| 162 |
+
bcast_cnt <= 0;
|
| 163 |
+
bcast_link <= 0;
|
| 164 |
+
bcast_link_max <= 0;
|
| 165 |
+
bcast_msg_type <= 0;
|
| 166 |
+
bcast_pending <= 0;
|
| 167 |
+
bcast_flat_save <= 0;
|
| 168 |
+
end else begin
|
| 169 |
+
ltx_valid <= 0;
|
| 170 |
+
|
| 171 |
+
if (bcast_active) begin
|
| 172 |
+
ltx_data[bcast_link*8 +: 8] <= bcast_shift[TX_PAD_W-1 -: 8];
|
| 173 |
+
ltx_valid[bcast_link] <= 1;
|
| 174 |
+
|
| 175 |
+
if (link_tx_ready[bcast_link]) begin
|
| 176 |
+
bcast_shift <= bcast_shift << 8;
|
| 177 |
+
if (bcast_cnt == TX_NUM_BYTES - 1) begin
|
| 178 |
+
if (bcast_link < NUM_LINKS - 1) begin
|
| 179 |
+
bcast_link <= bcast_link + 1;
|
| 180 |
+
bcast_shift <= bcast_flat_save;
|
| 181 |
+
bcast_cnt <= 0;
|
| 182 |
+
end else begin
|
| 183 |
+
bcast_active <= 0;
|
| 184 |
+
end
|
| 185 |
+
end else begin
|
| 186 |
+
bcast_cnt <= bcast_cnt + 1;
|
| 187 |
+
end
|
| 188 |
+
end
|
| 189 |
+
end else if (!txs_active) begin
|
| 190 |
+
if (barrier_tx_send) begin
|
| 191 |
+
bcast_active <= 1;
|
| 192 |
+
bcast_flat_save <= barrier_flat;
|
| 193 |
+
bcast_shift <= barrier_flat;
|
| 194 |
+
bcast_cnt <= 0;
|
| 195 |
+
bcast_link <= 0;
|
| 196 |
+
bcast_msg_type <= MSG_BARRIER;
|
| 197 |
+
end else if (preempt_request) begin
|
| 198 |
+
bcast_active <= 1;
|
| 199 |
+
bcast_flat_save <= preempt_flat;
|
| 200 |
+
bcast_shift <= preempt_flat;
|
| 201 |
+
bcast_cnt <= 0;
|
| 202 |
+
bcast_link <= 0;
|
| 203 |
+
bcast_msg_type <= MSG_PREEMPT;
|
| 204 |
+
end else if (!tx_fifo_empty) begin
|
| 205 |
+
ltx_data[tx_link_sel*8 +: 8] <= tx_flat[TX_PAD_W-1 -: 8];
|
| 206 |
+
ltx_valid[tx_link_sel] <= 1;
|
| 207 |
+
txs_shift <= tx_flat << 8;
|
| 208 |
+
txs_link <= tx_link_sel;
|
| 209 |
+
txs_cnt <= 1;
|
| 210 |
+
txs_active <= 1;
|
| 211 |
+
tx_rd_ptr <= tx_rd_ptr + 1;
|
| 212 |
+
end
|
| 213 |
+
end else begin
|
| 214 |
+
ltx_data[txs_link*8 +: 8] <= txs_shift[TX_PAD_W-1 -: 8];
|
| 215 |
+
ltx_valid[txs_link] <= 1;
|
| 216 |
+
|
| 217 |
+
if (link_tx_ready[txs_link]) begin
|
| 218 |
+
txs_shift <= txs_shift << 8;
|
| 219 |
+
if (txs_cnt == TX_NUM_BYTES - 1)
|
| 220 |
+
txs_active <= 0;
|
| 221 |
+
else
|
| 222 |
+
txs_cnt <= txs_cnt + 1;
|
| 223 |
+
end
|
| 224 |
+
end
|
| 225 |
+
end
|
| 226 |
+
end
|
| 227 |
+
|
| 228 |
+
localparam RX_PKT_W = CHIP_ID_BITS + CORE_ID_BITS + NEURON_BITS + DATA_WIDTH;
|
| 229 |
+
|
| 230 |
+
reg [TX_PAD_W-1:0] rxs_accum [0:NUM_LINKS-1];
|
| 231 |
+
reg [$clog2(TX_NUM_BYTES+1)-1:0] rxs_cnt [0:NUM_LINKS-1];
|
| 232 |
+
reg [NUM_LINKS-1:0] rxs_push;
|
| 233 |
+
|
| 234 |
+
assign link_rx_ready = (rx_count < RX_DEPTH - 4) ? {NUM_LINKS{1'b1}} : {NUM_LINKS{1'b0}};
|
| 235 |
+
|
| 236 |
+
genvar li;
|
| 237 |
+
generate
|
| 238 |
+
for (li = 0; li < NUM_LINKS; li = li + 1) begin : gen_rx
|
| 239 |
+
always @(posedge clk or negedge rst_n) begin
|
| 240 |
+
if (!rst_n) begin
|
| 241 |
+
rxs_cnt[li] <= 0;
|
| 242 |
+
rxs_push[li] <= 0;
|
| 243 |
+
rxs_accum[li] <= 0;
|
| 244 |
+
end else begin
|
| 245 |
+
rxs_push[li] <= 0;
|
| 246 |
+
|
| 247 |
+
if (link_rx_valid[li]) begin
|
| 248 |
+
rxs_accum[li] <= {rxs_accum[li][TX_PAD_W-9:0], link_rx_data[li*8 +: 8]};
|
| 249 |
+
|
| 250 |
+
if (rxs_cnt[li] == 0) begin
|
| 251 |
+
if (link_rx_data[li*8 + 7]) begin
|
| 252 |
+
rxs_accum[li] <= {{(TX_PAD_W-8){1'b0}}, link_rx_data[li*8 +: 8]};
|
| 253 |
+
rxs_cnt[li] <= 1;
|
| 254 |
+
end
|
| 255 |
+
end else begin
|
| 256 |
+
if (rxs_cnt[li] == TX_NUM_BYTES - 1) begin
|
| 257 |
+
rxs_push[li] <= 1;
|
| 258 |
+
rxs_cnt[li] <= 0;
|
| 259 |
+
end else begin
|
| 260 |
+
rxs_cnt[li] <= rxs_cnt[li] + 1;
|
| 261 |
+
end
|
| 262 |
+
end
|
| 263 |
+
end
|
| 264 |
+
end
|
| 265 |
+
end
|
| 266 |
+
end
|
| 267 |
+
endgenerate
|
| 268 |
+
|
| 269 |
+
|
| 270 |
+
reg [RX_PKT_W-1:0] rx_fifo [0:RX_DEPTH-1];
|
| 271 |
+
reg [8:0] rx_wr_ptr, rx_rd_ptr;
|
| 272 |
+
wire [8:0] rx_count = rx_wr_ptr - rx_rd_ptr;
|
| 273 |
+
assign rx_empty = (rx_wr_ptr == rx_rd_ptr);
|
| 274 |
+
|
| 275 |
+
always @(posedge clk or negedge rst_n) begin : rx_fifo_wr
|
| 276 |
+
integer k;
|
| 277 |
+
reg [1:0] rx_msg_type;
|
| 278 |
+
if (!rst_n) begin
|
| 279 |
+
rx_wr_ptr <= 0;
|
| 280 |
+
barrier_rx <= 0;
|
| 281 |
+
preempt_rx <= 0;
|
| 282 |
+
mgmt_rx_valid <= 0;
|
| 283 |
+
mgmt_rx_src_chip <= 0;
|
| 284 |
+
mgmt_rx_core <= 0;
|
| 285 |
+
mgmt_rx_neuron <= 0;
|
| 286 |
+
mgmt_rx_data <= 0;
|
| 287 |
+
mgmt_rx_is_write <= 0;
|
| 288 |
+
end else begin
|
| 289 |
+
barrier_rx <= 0;
|
| 290 |
+
preempt_rx <= 0;
|
| 291 |
+
mgmt_rx_valid <= 0;
|
| 292 |
+
|
| 293 |
+
for (k = 0; k < NUM_LINKS; k = k + 1) begin
|
| 294 |
+
if (rxs_push[k]) begin
|
| 295 |
+
rx_msg_type = rxs_accum[k][MSGTYPE_OFFSET -: 2];
|
| 296 |
+
|
| 297 |
+
case (rx_msg_type)
|
| 298 |
+
MSG_SPIKE: begin
|
| 299 |
+
if (rx_count < RX_DEPTH) begin
|
| 300 |
+
rx_fifo[rx_wr_ptr[7:0]] <= {
|
| 301 |
+
rxs_accum[k][SRC_OFFSET -: CHIP_ID_BITS],
|
| 302 |
+
rxs_accum[k][CORE_OFFSET -: CORE_ID_BITS],
|
| 303 |
+
rxs_accum[k][NRN_OFFSET -: NEURON_BITS],
|
| 304 |
+
{{(DATA_WIDTH-8){1'b0}},
|
| 305 |
+
rxs_accum[k][PAY_OFFSET -: 8]}
|
| 306 |
+
};
|
| 307 |
+
rx_wr_ptr <= rx_wr_ptr + 1;
|
| 308 |
+
end
|
| 309 |
+
end
|
| 310 |
+
|
| 311 |
+
MSG_BARRIER: begin
|
| 312 |
+
barrier_rx <= 1;
|
| 313 |
+
end
|
| 314 |
+
|
| 315 |
+
MSG_MGMT: begin
|
| 316 |
+
mgmt_rx_valid <= 1;
|
| 317 |
+
mgmt_rx_src_chip <= rxs_accum[k][SRC_OFFSET -: CHIP_ID_BITS];
|
| 318 |
+
mgmt_rx_core <= rxs_accum[k][CORE_OFFSET -: CORE_ID_BITS];
|
| 319 |
+
mgmt_rx_neuron <= rxs_accum[k][NRN_OFFSET -: NEURON_BITS];
|
| 320 |
+
mgmt_rx_is_write <= rxs_accum[k][PAY_OFFSET];
|
| 321 |
+
mgmt_rx_data <= {1'b0, rxs_accum[k][PAY_OFFSET-1 -: 7]};
|
| 322 |
+
end
|
| 323 |
+
|
| 324 |
+
MSG_PREEMPT: begin
|
| 325 |
+
preempt_rx <= 1;
|
| 326 |
+
end
|
| 327 |
+
endcase
|
| 328 |
+
end
|
| 329 |
+
end
|
| 330 |
+
end
|
| 331 |
+
end
|
| 332 |
+
|
| 333 |
+
always @(posedge clk or negedge rst_n) begin
|
| 334 |
+
if (!rst_n)
|
| 335 |
+
rx_rd_ptr <= 0;
|
| 336 |
+
else if (rx_pop && !rx_empty)
|
| 337 |
+
rx_rd_ptr <= rx_rd_ptr + 1;
|
| 338 |
+
end
|
| 339 |
+
|
| 340 |
+
wire [RX_PKT_W-1:0] rx_top = rx_fifo[rx_rd_ptr[7:0]];
|
| 341 |
+
assign rx_src_chip = rx_top[RX_PKT_W-1 -: CHIP_ID_BITS];
|
| 342 |
+
assign rx_core = rx_top[NEURON_BITS+DATA_WIDTH +: CORE_ID_BITS];
|
| 343 |
+
assign rx_neuron = rx_top[DATA_WIDTH +: NEURON_BITS];
|
| 344 |
+
assign rx_current = rx_top[DATA_WIDTH-1:0];
|
| 345 |
+
|
| 346 |
+
endmodule
|
rtl/neuromorphic_mesh.v
ADDED
|
@@ -0,0 +1,859 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ============================================================================
|
| 2 |
+
// Neuromorphic Mesh
|
| 3 |
+
// ============================================================================
|
| 4 |
+
//
|
| 5 |
+
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 6 |
+
// Company No. 17054540 — UK Patent Application No. 2602902.6
|
| 7 |
+
//
|
| 8 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
| 9 |
+
// you may not use this file except in compliance with the License.
|
| 10 |
+
// You may obtain a copy of the License at
|
| 11 |
+
//
|
| 12 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
| 13 |
+
//
|
| 14 |
+
// Unless required by applicable law or agreed to in writing, software
|
| 15 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
| 16 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 17 |
+
// See the License for the specific language governing permissions and
|
| 18 |
+
// limitations under the License.
|
| 19 |
+
// ============================================================================
|
| 20 |
+
|
| 21 |
+
module neuromorphic_mesh #(
|
| 22 |
+
parameter NUM_CORES = 4,
|
| 23 |
+
parameter CORE_ID_BITS = 2,
|
| 24 |
+
parameter NUM_NEURONS = 1024,
|
| 25 |
+
parameter NEURON_BITS = 10,
|
| 26 |
+
parameter DATA_WIDTH = 16,
|
| 27 |
+
parameter POOL_DEPTH = 32768,
|
| 28 |
+
parameter POOL_ADDR_BITS = 15,
|
| 29 |
+
parameter COUNT_BITS = 12,
|
| 30 |
+
parameter REV_FANIN = 32,
|
| 31 |
+
parameter REV_SLOT_BITS = 5,
|
| 32 |
+
parameter THRESHOLD = 16'sd1000,
|
| 33 |
+
parameter LEAK_RATE = 16'sd3,
|
| 34 |
+
parameter REFRAC_CYCLES = 3,
|
| 35 |
+
parameter GRADE_SHIFT = 7,
|
| 36 |
+
|
| 37 |
+
parameter ROUTE_FANOUT = 8,
|
| 38 |
+
parameter ROUTE_SLOT_BITS = 3,
|
| 39 |
+
|
| 40 |
+
parameter ROUTE_ADDR_W = CORE_ID_BITS + NEURON_BITS + ROUTE_SLOT_BITS,
|
| 41 |
+
parameter ROUTE_DATA_W = 1 + CORE_ID_BITS + NEURON_BITS + DATA_WIDTH,
|
| 42 |
+
|
| 43 |
+
parameter CLUSTER_SIZE = 4,
|
| 44 |
+
parameter GLOBAL_ROUTE_SLOTS = 4,
|
| 45 |
+
parameter GLOBAL_ROUTE_SLOT_BITS = 2,
|
| 46 |
+
parameter GLOBAL_ROUTE_ADDR_W = CORE_ID_BITS + NEURON_BITS + GLOBAL_ROUTE_SLOT_BITS,
|
| 47 |
+
|
| 48 |
+
parameter CHIP_LINK_EN = 0
|
| 49 |
+
)(
|
| 50 |
+
input wire clk,
|
| 51 |
+
input wire rst_n,
|
| 52 |
+
input wire start,
|
| 53 |
+
|
| 54 |
+
input wire prog_pool_we,
|
| 55 |
+
input wire [CORE_ID_BITS-1:0] prog_pool_core,
|
| 56 |
+
input wire [POOL_ADDR_BITS-1:0] prog_pool_addr,
|
| 57 |
+
input wire [NEURON_BITS-1:0] prog_pool_src,
|
| 58 |
+
input wire [NEURON_BITS-1:0] prog_pool_target,
|
| 59 |
+
input wire signed [DATA_WIDTH-1:0] prog_pool_weight,
|
| 60 |
+
input wire [1:0] prog_pool_comp,
|
| 61 |
+
|
| 62 |
+
input wire prog_index_we,
|
| 63 |
+
input wire [CORE_ID_BITS-1:0] prog_index_core,
|
| 64 |
+
input wire [NEURON_BITS-1:0] prog_index_neuron,
|
| 65 |
+
input wire [POOL_ADDR_BITS-1:0] prog_index_base,
|
| 66 |
+
input wire [COUNT_BITS-1:0] prog_index_count,
|
| 67 |
+
input wire [1:0] prog_index_format,
|
| 68 |
+
|
| 69 |
+
input wire prog_route_we,
|
| 70 |
+
input wire [CORE_ID_BITS-1:0] prog_route_src_core,
|
| 71 |
+
input wire [NEURON_BITS-1:0] prog_route_src_neuron,
|
| 72 |
+
input wire [ROUTE_SLOT_BITS-1:0] prog_route_slot,
|
| 73 |
+
input wire [CORE_ID_BITS-1:0] prog_route_dest_core,
|
| 74 |
+
input wire [NEURON_BITS-1:0] prog_route_dest_neuron,
|
| 75 |
+
input wire signed [DATA_WIDTH-1:0] prog_route_weight,
|
| 76 |
+
|
| 77 |
+
input wire prog_global_route_we,
|
| 78 |
+
input wire [CORE_ID_BITS-1:0] prog_global_route_src_core,
|
| 79 |
+
input wire [NEURON_BITS-1:0] prog_global_route_src_neuron,
|
| 80 |
+
input wire [GLOBAL_ROUTE_SLOT_BITS-1:0] prog_global_route_slot,
|
| 81 |
+
input wire [CORE_ID_BITS-1:0] prog_global_route_dest_core,
|
| 82 |
+
input wire [NEURON_BITS-1:0] prog_global_route_dest_neuron,
|
| 83 |
+
input wire signed [DATA_WIDTH-1:0] prog_global_route_weight,
|
| 84 |
+
|
| 85 |
+
input wire learn_enable,
|
| 86 |
+
|
| 87 |
+
input wire graded_enable,
|
| 88 |
+
|
| 89 |
+
input wire dendritic_enable,
|
| 90 |
+
|
| 91 |
+
input wire async_enable,
|
| 92 |
+
|
| 93 |
+
input wire threefactor_enable,
|
| 94 |
+
input wire signed [DATA_WIDTH-1:0] reward_value,
|
| 95 |
+
|
| 96 |
+
input wire noise_enable,
|
| 97 |
+
|
| 98 |
+
input wire skip_idle_enable,
|
| 99 |
+
|
| 100 |
+
input wire scale_u_enable,
|
| 101 |
+
|
| 102 |
+
input wire prog_delay_we,
|
| 103 |
+
input wire [CORE_ID_BITS-1:0] prog_delay_core,
|
| 104 |
+
input wire [POOL_ADDR_BITS-1:0] prog_delay_addr,
|
| 105 |
+
input wire [5:0] prog_delay_value,
|
| 106 |
+
|
| 107 |
+
input wire prog_ucode_we,
|
| 108 |
+
input wire [CORE_ID_BITS-1:0] prog_ucode_core,
|
| 109 |
+
input wire [7:0] prog_ucode_addr,
|
| 110 |
+
input wire [31:0] prog_ucode_data,
|
| 111 |
+
|
| 112 |
+
input wire prog_param_we,
|
| 113 |
+
input wire [CORE_ID_BITS-1:0] prog_param_core,
|
| 114 |
+
input wire [NEURON_BITS-1:0] prog_param_neuron,
|
| 115 |
+
input wire [4:0] prog_param_id,
|
| 116 |
+
input wire signed [DATA_WIDTH-1:0] prog_param_value,
|
| 117 |
+
|
| 118 |
+
input wire ext_valid,
|
| 119 |
+
input wire [CORE_ID_BITS-1:0] ext_core,
|
| 120 |
+
input wire [NEURON_BITS-1:0] ext_neuron_id,
|
| 121 |
+
input wire signed [DATA_WIDTH-1:0] ext_current,
|
| 122 |
+
|
| 123 |
+
input wire probe_read,
|
| 124 |
+
input wire [CORE_ID_BITS-1:0] probe_core,
|
| 125 |
+
input wire [NEURON_BITS-1:0] probe_neuron,
|
| 126 |
+
input wire [4:0] probe_state_id,
|
| 127 |
+
input wire [POOL_ADDR_BITS-1:0] probe_pool_addr,
|
| 128 |
+
output reg signed [DATA_WIDTH-1:0] probe_data,
|
| 129 |
+
output reg probe_valid,
|
| 130 |
+
|
| 131 |
+
output reg timestep_done,
|
| 132 |
+
output wire [NUM_CORES-1:0] spike_valid_bus,
|
| 133 |
+
output wire [NUM_CORES*NEURON_BITS-1:0] spike_id_bus,
|
| 134 |
+
output wire [5:0] mesh_state_out,
|
| 135 |
+
output reg [31:0] total_spikes,
|
| 136 |
+
output reg [31:0] timestep_count,
|
| 137 |
+
|
| 138 |
+
output wire [NUM_CORES-1:0] core_idle_bus,
|
| 139 |
+
|
| 140 |
+
input wire [7:0] dvfs_stall,
|
| 141 |
+
|
| 142 |
+
output wire [NUM_CORES-1:0] core_clock_en,
|
| 143 |
+
output reg [31:0] energy_counter,
|
| 144 |
+
output wire power_idle_hint,
|
| 145 |
+
|
| 146 |
+
output reg link_tx_push,
|
| 147 |
+
output reg [CORE_ID_BITS-1:0] link_tx_core,
|
| 148 |
+
output reg [NEURON_BITS-1:0] link_tx_neuron,
|
| 149 |
+
output reg [7:0] link_tx_payload,
|
| 150 |
+
input wire link_tx_full,
|
| 151 |
+
input wire [CORE_ID_BITS-1:0] link_rx_core,
|
| 152 |
+
input wire [NEURON_BITS-1:0] link_rx_neuron,
|
| 153 |
+
input wire signed [DATA_WIDTH-1:0] link_rx_current,
|
| 154 |
+
output reg link_rx_pop,
|
| 155 |
+
input wire link_rx_empty
|
| 156 |
+
);
|
| 157 |
+
|
| 158 |
+
localparam SM_IDLE = 6'd0;
|
| 159 |
+
localparam SM_INJECT = 6'd1;
|
| 160 |
+
localparam SM_START = 6'd2;
|
| 161 |
+
localparam SM_RUN_WAIT = 6'd3;
|
| 162 |
+
localparam SM_ROUTE_POP = 6'd4;
|
| 163 |
+
localparam SM_ROUTE_ADDR = 6'd5;
|
| 164 |
+
localparam SM_ROUTE_WAIT = 6'd6;
|
| 165 |
+
localparam SM_ROUTE_READ = 6'd7;
|
| 166 |
+
localparam SM_DONE = 6'd8;
|
| 167 |
+
|
| 168 |
+
localparam SM_ASYNC_ACTIVE = 6'd9;
|
| 169 |
+
localparam SM_ASYNC_INJECT = 6'd10;
|
| 170 |
+
localparam SM_ASYNC_ROUTE_POP = 6'd11;
|
| 171 |
+
localparam SM_ASYNC_ROUTE_ADDR = 6'd12;
|
| 172 |
+
localparam SM_ASYNC_ROUTE_WAIT = 6'd13;
|
| 173 |
+
localparam SM_ASYNC_ROUTE_READ = 6'd14;
|
| 174 |
+
localparam SM_ASYNC_DONE = 6'd15;
|
| 175 |
+
|
| 176 |
+
localparam SM_GLOBAL_ROUTE_ADDR = 6'd16;
|
| 177 |
+
localparam SM_GLOBAL_ROUTE_WAIT = 6'd17;
|
| 178 |
+
localparam SM_GLOBAL_ROUTE_READ = 6'd18;
|
| 179 |
+
|
| 180 |
+
localparam SM_LINK_RX_DRAIN = 6'd19;
|
| 181 |
+
localparam SM_LINK_RX_WAIT = 6'd20;
|
| 182 |
+
|
| 183 |
+
localparam SM_DVFS_WAIT = 6'd21;
|
| 184 |
+
|
| 185 |
+
reg [5:0] mesh_state;
|
| 186 |
+
assign mesh_state_out = mesh_state;
|
| 187 |
+
reg [7:0] dvfs_wait_cnt;
|
| 188 |
+
|
| 189 |
+
reg rt_we;
|
| 190 |
+
reg [ROUTE_ADDR_W-1:0] rt_addr;
|
| 191 |
+
reg [ROUTE_DATA_W-1:0] rt_wdata;
|
| 192 |
+
wire [ROUTE_DATA_W-1:0] rt_rdata;
|
| 193 |
+
|
| 194 |
+
wire rt_we_mux = (mesh_state == SM_IDLE) ? prog_route_we : rt_we;
|
| 195 |
+
wire [ROUTE_ADDR_W-1:0] rt_addr_mux = (mesh_state == SM_IDLE) ?
|
| 196 |
+
{prog_route_src_core, prog_route_src_neuron, prog_route_slot} : rt_addr;
|
| 197 |
+
wire [ROUTE_DATA_W-1:0] rt_wdata_mux = (mesh_state == SM_IDLE) ?
|
| 198 |
+
{1'b1, prog_route_dest_core, prog_route_dest_neuron, prog_route_weight} : rt_wdata;
|
| 199 |
+
|
| 200 |
+
sram #(.DATA_WIDTH(ROUTE_DATA_W), .ADDR_WIDTH(ROUTE_ADDR_W)) route_table (
|
| 201 |
+
.clk(clk),
|
| 202 |
+
.we_a(rt_we_mux), .addr_a(rt_addr_mux),
|
| 203 |
+
.wdata_a(rt_wdata_mux), .rdata_a(rt_rdata),
|
| 204 |
+
.addr_b({ROUTE_ADDR_W{1'b0}}), .rdata_b()
|
| 205 |
+
);
|
| 206 |
+
|
| 207 |
+
wire rt_valid = rt_rdata[ROUTE_DATA_W-1];
|
| 208 |
+
localparam RT_DEST_CORE_LO = NEURON_BITS + DATA_WIDTH;
|
| 209 |
+
localparam RT_DEST_CORE_HI = NEURON_BITS + DATA_WIDTH + CORE_ID_BITS - 1;
|
| 210 |
+
wire [CORE_ID_BITS-1:0] rt_dest_core = rt_rdata[RT_DEST_CORE_HI:RT_DEST_CORE_LO];
|
| 211 |
+
localparam RT_DEST_NRN_LO = DATA_WIDTH;
|
| 212 |
+
localparam RT_DEST_NRN_HI = DATA_WIDTH + NEURON_BITS - 1;
|
| 213 |
+
wire [NEURON_BITS-1:0] rt_dest_nrn = rt_rdata[RT_DEST_NRN_HI:RT_DEST_NRN_LO];
|
| 214 |
+
wire signed [DATA_WIDTH-1:0] rt_weight = rt_rdata[DATA_WIDTH-1:0];
|
| 215 |
+
|
| 216 |
+
reg grt_we;
|
| 217 |
+
reg [GLOBAL_ROUTE_ADDR_W-1:0] grt_addr;
|
| 218 |
+
wire [ROUTE_DATA_W-1:0] grt_rdata;
|
| 219 |
+
|
| 220 |
+
wire grt_we_mux = (mesh_state == SM_IDLE) ? prog_global_route_we : grt_we;
|
| 221 |
+
wire [GLOBAL_ROUTE_ADDR_W-1:0] grt_addr_mux = (mesh_state == SM_IDLE) ?
|
| 222 |
+
{prog_global_route_src_core, prog_global_route_src_neuron, prog_global_route_slot} : grt_addr;
|
| 223 |
+
wire [ROUTE_DATA_W-1:0] grt_wdata_mux = (mesh_state == SM_IDLE) ?
|
| 224 |
+
{1'b1, prog_global_route_dest_core, prog_global_route_dest_neuron, prog_global_route_weight} : {ROUTE_DATA_W{1'b0}};
|
| 225 |
+
|
| 226 |
+
sram #(.DATA_WIDTH(ROUTE_DATA_W), .ADDR_WIDTH(GLOBAL_ROUTE_ADDR_W)) global_route_table (
|
| 227 |
+
.clk(clk),
|
| 228 |
+
.we_a(grt_we_mux), .addr_a(grt_addr_mux),
|
| 229 |
+
.wdata_a(grt_wdata_mux), .rdata_a(grt_rdata),
|
| 230 |
+
.addr_b({GLOBAL_ROUTE_ADDR_W{1'b0}}), .rdata_b()
|
| 231 |
+
);
|
| 232 |
+
|
| 233 |
+
wire grt_valid = grt_rdata[ROUTE_DATA_W-1];
|
| 234 |
+
localparam GRT_DEST_CORE_LO = NEURON_BITS + DATA_WIDTH;
|
| 235 |
+
localparam GRT_DEST_CORE_HI = NEURON_BITS + DATA_WIDTH + CORE_ID_BITS - 1;
|
| 236 |
+
wire [CORE_ID_BITS-1:0] grt_dest_core = grt_rdata[GRT_DEST_CORE_HI:GRT_DEST_CORE_LO];
|
| 237 |
+
localparam GRT_DEST_NRN_LO = DATA_WIDTH;
|
| 238 |
+
localparam GRT_DEST_NRN_HI = DATA_WIDTH + NEURON_BITS - 1;
|
| 239 |
+
wire [NEURON_BITS-1:0] grt_dest_nrn = grt_rdata[GRT_DEST_NRN_HI:GRT_DEST_NRN_LO];
|
| 240 |
+
wire signed [DATA_WIDTH-1:0] grt_weight = grt_rdata[DATA_WIDTH-1:0];
|
| 241 |
+
|
| 242 |
+
wire signed [31:0] grt_weight_ext = grt_weight;
|
| 243 |
+
wire signed [31:0] grt_graded_product = grt_weight_ext * route_payload_ext;
|
| 244 |
+
wire signed [DATA_WIDTH-1:0] grt_graded_current = grt_graded_product >>> GRADE_SHIFT;
|
| 245 |
+
|
| 246 |
+
localparam INJECT_WIDTH = CORE_ID_BITS + NEURON_BITS + DATA_WIDTH;
|
| 247 |
+
|
| 248 |
+
reg inj_push, inj_pop, inj_clear;
|
| 249 |
+
reg [INJECT_WIDTH-1:0] inj_push_data;
|
| 250 |
+
wire [INJECT_WIDTH-1:0] inj_pop_data;
|
| 251 |
+
wire inj_empty, inj_full;
|
| 252 |
+
|
| 253 |
+
spike_fifo #(.ID_WIDTH(INJECT_WIDTH), .DEPTH(512), .PTR_BITS(9)) inject_fifo (
|
| 254 |
+
.clk(clk), .rst_n(rst_n), .clear(inj_clear),
|
| 255 |
+
.push(inj_push), .push_data(inj_push_data),
|
| 256 |
+
.pop(inj_pop), .pop_data(inj_pop_data),
|
| 257 |
+
.empty(inj_empty), .full(inj_full), .count()
|
| 258 |
+
);
|
| 259 |
+
|
| 260 |
+
localparam INJ_DEST_CORE_HI = INJECT_WIDTH - 1;
|
| 261 |
+
localparam INJ_DEST_CORE_LO = INJECT_WIDTH - CORE_ID_BITS;
|
| 262 |
+
wire [CORE_ID_BITS-1:0] inj_dest_core = inj_pop_data[INJ_DEST_CORE_HI:INJ_DEST_CORE_LO];
|
| 263 |
+
localparam INJ_DEST_NRN_LO = DATA_WIDTH;
|
| 264 |
+
localparam INJ_DEST_NRN_HI = DATA_WIDTH + NEURON_BITS - 1;
|
| 265 |
+
wire [NEURON_BITS-1:0] inj_dest_nrn = inj_pop_data[INJ_DEST_NRN_HI:INJ_DEST_NRN_LO];
|
| 266 |
+
wire signed [DATA_WIDTH-1:0] inj_weight = inj_pop_data[DATA_WIDTH-1:0];
|
| 267 |
+
|
| 268 |
+
wire [NUM_CORES-1:0] core_done;
|
| 269 |
+
wire [NUM_CORES-1:0] core_spike_valid;
|
| 270 |
+
wire [NUM_CORES*NEURON_BITS-1:0] core_spike_id;
|
| 271 |
+
wire [NUM_CORES*8-1:0] core_spike_payload;
|
| 272 |
+
|
| 273 |
+
reg [NUM_CORES-1:0] core_start_r;
|
| 274 |
+
|
| 275 |
+
reg [NUM_CORES-1:0] core_done_latch;
|
| 276 |
+
always @(posedge clk or negedge rst_n) begin
|
| 277 |
+
if (!rst_n)
|
| 278 |
+
core_done_latch <= 0;
|
| 279 |
+
else if (mesh_state == SM_START)
|
| 280 |
+
core_done_latch <= 0;
|
| 281 |
+
else
|
| 282 |
+
core_done_latch <= core_done_latch | core_done;
|
| 283 |
+
end
|
| 284 |
+
|
| 285 |
+
reg [NUM_CORES-1:0] core_running;
|
| 286 |
+
always @(posedge clk or negedge rst_n) begin
|
| 287 |
+
if (!rst_n)
|
| 288 |
+
core_running <= 0;
|
| 289 |
+
else
|
| 290 |
+
core_running <= (core_running | core_start_r) & ~core_done;
|
| 291 |
+
end
|
| 292 |
+
|
| 293 |
+
reg [NUM_CORES-1:0] core_produced_spike;
|
| 294 |
+
always @(posedge clk or negedge rst_n) begin
|
| 295 |
+
if (!rst_n)
|
| 296 |
+
core_produced_spike <= 0;
|
| 297 |
+
else
|
| 298 |
+
core_produced_spike <= (core_produced_spike & ~core_start_r)
|
| 299 |
+
| (core_spike_valid & core_running);
|
| 300 |
+
end
|
| 301 |
+
|
| 302 |
+
reg [NUM_CORES-1:0] core_needs_restart;
|
| 303 |
+
always @(posedge clk or negedge rst_n) begin
|
| 304 |
+
if (!rst_n)
|
| 305 |
+
core_needs_restart <= 0;
|
| 306 |
+
else if (mesh_state == SM_ASYNC_DONE)
|
| 307 |
+
core_needs_restart <= 0;
|
| 308 |
+
else
|
| 309 |
+
core_needs_restart <= (core_needs_restart
|
| 310 |
+
| (core_done & (core_produced_spike | core_spike_valid)))
|
| 311 |
+
& ~core_start_r;
|
| 312 |
+
end
|
| 313 |
+
|
| 314 |
+
assign spike_valid_bus = core_spike_valid;
|
| 315 |
+
assign spike_id_bus = core_spike_id;
|
| 316 |
+
|
| 317 |
+
localparam PCF_WIDTH = NEURON_BITS + DATA_WIDTH;
|
| 318 |
+
|
| 319 |
+
reg [NUM_CORES-1:0] pcif_push;
|
| 320 |
+
reg [NUM_CORES-1:0] pcif_pop;
|
| 321 |
+
reg [NUM_CORES-1:0] pcif_clear;
|
| 322 |
+
reg [PCF_WIDTH-1:0] pcif_push_data;
|
| 323 |
+
wire [NUM_CORES-1:0] pcif_empty;
|
| 324 |
+
wire [NUM_CORES-1:0] pcif_full;
|
| 325 |
+
wire [NUM_CORES*PCF_WIDTH-1:0] pcif_data;
|
| 326 |
+
|
| 327 |
+
reg [CORE_ID_BITS-1:0] inject_core_idx;
|
| 328 |
+
|
| 329 |
+
reg [PCF_WIDTH-1:0] active_pcif_entry;
|
| 330 |
+
always @(*) begin
|
| 331 |
+
active_pcif_entry = pcif_data >> (inject_core_idx * PCF_WIDTH);
|
| 332 |
+
end
|
| 333 |
+
localparam PCIF_NID_LO = DATA_WIDTH;
|
| 334 |
+
localparam PCIF_NID_HI = DATA_WIDTH + NEURON_BITS - 1;
|
| 335 |
+
wire [NEURON_BITS-1:0] pcif_nid = active_pcif_entry[PCIF_NID_HI:PCIF_NID_LO];
|
| 336 |
+
wire signed [DATA_WIDTH-1:0] pcif_cur = active_pcif_entry[DATA_WIDTH-1:0];
|
| 337 |
+
|
| 338 |
+
wire [NEURON_BITS-1:0] mesh_ext_nid =
|
| 339 |
+
(mesh_state == SM_INJECT) ? inj_dest_nrn :
|
| 340 |
+
(mesh_state == SM_ASYNC_INJECT) ? pcif_nid :
|
| 341 |
+
ext_neuron_id;
|
| 342 |
+
|
| 343 |
+
wire signed [DATA_WIDTH-1:0] mesh_ext_cur =
|
| 344 |
+
(mesh_state == SM_INJECT) ? inj_weight :
|
| 345 |
+
(mesh_state == SM_ASYNC_INJECT) ? pcif_cur :
|
| 346 |
+
ext_current;
|
| 347 |
+
|
| 348 |
+
localparam CAP_WIDTH = NEURON_BITS + 8;
|
| 349 |
+
|
| 350 |
+
reg [NUM_CORES-1:0] cap_pop;
|
| 351 |
+
reg [NUM_CORES-1:0] cap_clear;
|
| 352 |
+
wire [NUM_CORES-1:0] cap_empty;
|
| 353 |
+
wire [NUM_CORES*CAP_WIDTH-1:0] cap_data;
|
| 354 |
+
|
| 355 |
+
wire [NUM_CORES-1:0] core_probe_valid;
|
| 356 |
+
wire [NUM_CORES*DATA_WIDTH-1:0] core_probe_data;
|
| 357 |
+
|
| 358 |
+
always @(posedge clk or negedge rst_n) begin
|
| 359 |
+
if (!rst_n) begin
|
| 360 |
+
probe_data <= {DATA_WIDTH{1'b0}};
|
| 361 |
+
probe_valid <= 1'b0;
|
| 362 |
+
end else begin
|
| 363 |
+
probe_data <= core_probe_data >> (probe_core * DATA_WIDTH);
|
| 364 |
+
probe_valid <= core_probe_valid[probe_core];
|
| 365 |
+
end
|
| 366 |
+
end
|
| 367 |
+
|
| 368 |
+
genvar gi;
|
| 369 |
+
generate
|
| 370 |
+
for (gi = 0; gi < NUM_CORES; gi = gi + 1) begin : gen_core
|
| 371 |
+
|
| 372 |
+
localparam [CORE_ID_BITS-1:0] GI_CORE_ID = gi;
|
| 373 |
+
|
| 374 |
+
wire this_ext_valid =
|
| 375 |
+
(mesh_state == SM_IDLE && ext_valid && ext_core == GI_CORE_ID && !async_enable) ||
|
| 376 |
+
(mesh_state == SM_INJECT && !inj_empty && inj_dest_core == GI_CORE_ID) ||
|
| 377 |
+
(mesh_state == SM_ASYNC_INJECT && inject_core_idx == GI_CORE_ID && !pcif_empty[gi]);
|
| 378 |
+
|
| 379 |
+
wire this_pool_we = prog_pool_we && (prog_pool_core == GI_CORE_ID) &&
|
| 380 |
+
(mesh_state == SM_IDLE);
|
| 381 |
+
|
| 382 |
+
wire this_index_we = prog_index_we && (prog_index_core == GI_CORE_ID) &&
|
| 383 |
+
(mesh_state == SM_IDLE);
|
| 384 |
+
|
| 385 |
+
wire this_param_we = prog_param_we && (prog_param_core == GI_CORE_ID) &&
|
| 386 |
+
(mesh_state == SM_IDLE);
|
| 387 |
+
|
| 388 |
+
wire this_delay_we = prog_delay_we && (prog_delay_core == GI_CORE_ID) &&
|
| 389 |
+
(mesh_state == SM_IDLE);
|
| 390 |
+
|
| 391 |
+
wire this_ucode_we = prog_ucode_we && (prog_ucode_core == GI_CORE_ID) &&
|
| 392 |
+
(mesh_state == SM_IDLE);
|
| 393 |
+
|
| 394 |
+
scalable_core_v2 #(
|
| 395 |
+
.NUM_NEURONS (NUM_NEURONS),
|
| 396 |
+
.NEURON_BITS (NEURON_BITS),
|
| 397 |
+
.DATA_WIDTH (DATA_WIDTH),
|
| 398 |
+
.POOL_DEPTH (POOL_DEPTH),
|
| 399 |
+
.POOL_ADDR_BITS(POOL_ADDR_BITS),
|
| 400 |
+
.COUNT_BITS (COUNT_BITS),
|
| 401 |
+
.REV_FANIN (REV_FANIN),
|
| 402 |
+
.REV_SLOT_BITS (REV_SLOT_BITS),
|
| 403 |
+
.THRESHOLD (THRESHOLD),
|
| 404 |
+
.LEAK_RATE (LEAK_RATE),
|
| 405 |
+
.REFRAC_CYCLES (REFRAC_CYCLES),
|
| 406 |
+
.TRACE_MAX (8'd100),
|
| 407 |
+
.TRACE_DECAY (8'd3),
|
| 408 |
+
.LEARN_SHIFT (3),
|
| 409 |
+
.GRADE_SHIFT (GRADE_SHIFT)
|
| 410 |
+
) core (
|
| 411 |
+
.clk (clk),
|
| 412 |
+
.rst_n (rst_n),
|
| 413 |
+
.start (core_start_r[gi]),
|
| 414 |
+
.learn_enable (learn_enable),
|
| 415 |
+
.graded_enable (graded_enable),
|
| 416 |
+
.dendritic_enable(dendritic_enable),
|
| 417 |
+
.threefactor_enable(threefactor_enable),
|
| 418 |
+
.noise_enable (noise_enable),
|
| 419 |
+
.skip_idle_enable(skip_idle_enable),
|
| 420 |
+
.scale_u_enable (scale_u_enable),
|
| 421 |
+
.reward_value (reward_value),
|
| 422 |
+
.ext_valid (this_ext_valid),
|
| 423 |
+
.ext_neuron_id (mesh_ext_nid),
|
| 424 |
+
.ext_current (mesh_ext_cur),
|
| 425 |
+
.pool_we (this_pool_we),
|
| 426 |
+
.pool_addr_in (prog_pool_addr),
|
| 427 |
+
.pool_src_in (prog_pool_src),
|
| 428 |
+
.pool_target_in (prog_pool_target),
|
| 429 |
+
.pool_weight_in (prog_pool_weight),
|
| 430 |
+
.pool_comp_in (prog_pool_comp),
|
| 431 |
+
.index_we (this_index_we),
|
| 432 |
+
.index_neuron_in(prog_index_neuron),
|
| 433 |
+
.index_base_in (prog_index_base),
|
| 434 |
+
.index_count_in (prog_index_count),
|
| 435 |
+
.index_format_in(prog_index_format),
|
| 436 |
+
.delay_we (this_delay_we),
|
| 437 |
+
.delay_addr_in (prog_delay_addr),
|
| 438 |
+
.delay_value_in (prog_delay_value),
|
| 439 |
+
.ucode_prog_we (this_ucode_we),
|
| 440 |
+
.ucode_prog_addr (prog_ucode_addr),
|
| 441 |
+
.ucode_prog_data (prog_ucode_data),
|
| 442 |
+
.prog_param_we (this_param_we),
|
| 443 |
+
.prog_param_neuron(prog_param_neuron),
|
| 444 |
+
.prog_param_id (prog_param_id),
|
| 445 |
+
.prog_param_value (prog_param_value),
|
| 446 |
+
|
| 447 |
+
.probe_read (probe_read && (probe_core == GI_CORE_ID)),
|
| 448 |
+
.probe_neuron (probe_neuron),
|
| 449 |
+
.probe_state_id (probe_state_id),
|
| 450 |
+
.probe_pool_addr(probe_pool_addr),
|
| 451 |
+
.probe_data (core_probe_data[gi*DATA_WIDTH +: DATA_WIDTH]),
|
| 452 |
+
.probe_valid (core_probe_valid[gi]),
|
| 453 |
+
.timestep_done (core_done[gi]),
|
| 454 |
+
.spike_out_valid(core_spike_valid[gi]),
|
| 455 |
+
.spike_out_id (core_spike_id[gi*NEURON_BITS +: NEURON_BITS]),
|
| 456 |
+
.spike_out_payload(core_spike_payload[gi*8 +: 8]),
|
| 457 |
+
.state_out (),
|
| 458 |
+
.total_spikes (),
|
| 459 |
+
.timestep_count (),
|
| 460 |
+
.core_idle (core_idle_bus[gi])
|
| 461 |
+
);
|
| 462 |
+
|
| 463 |
+
spike_fifo #(.ID_WIDTH(CAP_WIDTH), .DEPTH(64), .PTR_BITS(6)) capture_fifo (
|
| 464 |
+
.clk(clk), .rst_n(rst_n),
|
| 465 |
+
.clear(cap_clear[gi]),
|
| 466 |
+
.push(core_spike_valid[gi] && (mesh_state == SM_RUN_WAIT || core_running[gi])),
|
| 467 |
+
.push_data({core_spike_id[gi*NEURON_BITS +: NEURON_BITS],
|
| 468 |
+
core_spike_payload[gi*8 +: 8]}),
|
| 469 |
+
.pop(cap_pop[gi]),
|
| 470 |
+
.pop_data(cap_data[gi*CAP_WIDTH +: CAP_WIDTH]),
|
| 471 |
+
.empty(cap_empty[gi]),
|
| 472 |
+
.full(), .count()
|
| 473 |
+
);
|
| 474 |
+
|
| 475 |
+
spike_fifo #(.ID_WIDTH(PCF_WIDTH), .DEPTH(8), .PTR_BITS(3)) pcif (
|
| 476 |
+
.clk(clk), .rst_n(rst_n),
|
| 477 |
+
.clear(pcif_clear[gi]),
|
| 478 |
+
.push(pcif_push[gi]),
|
| 479 |
+
.push_data(pcif_push_data),
|
| 480 |
+
.pop(pcif_pop[gi]),
|
| 481 |
+
.pop_data(pcif_data[gi*PCF_WIDTH +: PCF_WIDTH]),
|
| 482 |
+
.empty(pcif_empty[gi]),
|
| 483 |
+
.full(pcif_full[gi]),
|
| 484 |
+
.count()
|
| 485 |
+
);
|
| 486 |
+
end
|
| 487 |
+
endgenerate
|
| 488 |
+
|
| 489 |
+
wire mesh_active = (mesh_state != SM_IDLE && mesh_state != SM_DVFS_WAIT);
|
| 490 |
+
assign core_clock_en = mesh_active ? {NUM_CORES{1'b1}} : ~core_idle_bus;
|
| 491 |
+
assign power_idle_hint = (mesh_state == SM_IDLE) && (&core_idle_bus);
|
| 492 |
+
|
| 493 |
+
reg [7:0] e_spike_coeff;
|
| 494 |
+
reg [7:0] e_synop_coeff;
|
| 495 |
+
reg [7:0] e_cycle_coeff;
|
| 496 |
+
wire [31:0] total_spike_count_this_ts = popcount(core_spike_valid_sync);
|
| 497 |
+
reg [NUM_CORES-1:0] core_spike_valid_sync;
|
| 498 |
+
always @(posedge clk) core_spike_valid_sync <= {NUM_CORES{1'b0}};
|
| 499 |
+
|
| 500 |
+
always @(posedge clk or negedge rst_n) begin
|
| 501 |
+
if (!rst_n) begin
|
| 502 |
+
energy_counter <= 32'd0;
|
| 503 |
+
e_spike_coeff <= 8'd10;
|
| 504 |
+
e_synop_coeff <= 8'd1;
|
| 505 |
+
e_cycle_coeff <= 8'd1;
|
| 506 |
+
end else begin
|
| 507 |
+
if (mesh_active)
|
| 508 |
+
energy_counter <= energy_counter + {24'd0, e_cycle_coeff};
|
| 509 |
+
if (mesh_state == SM_DONE)
|
| 510 |
+
energy_counter <= energy_counter + total_spikes * {24'd0, e_spike_coeff};
|
| 511 |
+
end
|
| 512 |
+
end
|
| 513 |
+
|
| 514 |
+
function [31:0] popcount;
|
| 515 |
+
input [NUM_CORES-1:0] bits;
|
| 516 |
+
integer k;
|
| 517 |
+
begin
|
| 518 |
+
popcount = 0;
|
| 519 |
+
for (k = 0; k < NUM_CORES; k = k + 1)
|
| 520 |
+
popcount = popcount + bits[k];
|
| 521 |
+
end
|
| 522 |
+
endfunction
|
| 523 |
+
|
| 524 |
+
reg first_inject_found;
|
| 525 |
+
reg [CORE_ID_BITS-1:0] first_inject_core;
|
| 526 |
+
integer pe_i;
|
| 527 |
+
always @(*) begin
|
| 528 |
+
first_inject_found = 0;
|
| 529 |
+
first_inject_core = 0;
|
| 530 |
+
for (pe_i = 0; pe_i < NUM_CORES; pe_i = pe_i + 1) begin
|
| 531 |
+
if (!first_inject_found && !core_running[pe_i] && !pcif_empty[pe_i]) begin
|
| 532 |
+
first_inject_found = 1;
|
| 533 |
+
first_inject_core = pe_i[CORE_ID_BITS-1:0];
|
| 534 |
+
end
|
| 535 |
+
end
|
| 536 |
+
end
|
| 537 |
+
|
| 538 |
+
reg first_route_found;
|
| 539 |
+
reg [CORE_ID_BITS-1:0] first_route_core;
|
| 540 |
+
integer pe_j;
|
| 541 |
+
always @(*) begin
|
| 542 |
+
first_route_found = 0;
|
| 543 |
+
first_route_core = 0;
|
| 544 |
+
for (pe_j = 0; pe_j < NUM_CORES; pe_j = pe_j + 1) begin
|
| 545 |
+
if (!first_route_found && !cap_empty[pe_j]) begin
|
| 546 |
+
first_route_found = 1;
|
| 547 |
+
first_route_core = pe_j[CORE_ID_BITS-1:0];
|
| 548 |
+
end
|
| 549 |
+
end
|
| 550 |
+
end
|
| 551 |
+
|
| 552 |
+
reg first_restart_found;
|
| 553 |
+
reg [CORE_ID_BITS-1:0] first_restart_core;
|
| 554 |
+
integer pe_k;
|
| 555 |
+
always @(*) begin
|
| 556 |
+
first_restart_found = 0;
|
| 557 |
+
first_restart_core = 0;
|
| 558 |
+
for (pe_k = 0; pe_k < NUM_CORES; pe_k = pe_k + 1) begin
|
| 559 |
+
if (!first_restart_found && core_needs_restart[pe_k] && !core_running[pe_k]) begin
|
| 560 |
+
first_restart_found = 1;
|
| 561 |
+
first_restart_core = pe_k[CORE_ID_BITS-1:0];
|
| 562 |
+
end
|
| 563 |
+
end
|
| 564 |
+
end
|
| 565 |
+
|
| 566 |
+
wire quiescent = (core_running == 0) && (core_start_r == 0) &&
|
| 567 |
+
(core_needs_restart == 0) && (&pcif_empty) && (&cap_empty);
|
| 568 |
+
|
| 569 |
+
reg [CORE_ID_BITS-1:0] route_core_idx;
|
| 570 |
+
reg [NEURON_BITS-1:0] route_neuron;
|
| 571 |
+
reg [7:0] route_payload;
|
| 572 |
+
reg [ROUTE_SLOT_BITS-1:0] route_slot;
|
| 573 |
+
reg [GLOBAL_ROUTE_SLOT_BITS-1:0] global_slot;
|
| 574 |
+
|
| 575 |
+
wire signed [31:0] route_weight_ext = rt_weight;
|
| 576 |
+
wire signed [31:0] route_payload_ext = {24'd0, route_payload};
|
| 577 |
+
wire signed [31:0] route_graded_product = route_weight_ext * route_payload_ext;
|
| 578 |
+
wire signed [DATA_WIDTH-1:0] route_graded_current = route_graded_product >>> GRADE_SHIFT;
|
| 579 |
+
|
| 580 |
+
always @(posedge clk or negedge rst_n) begin
|
| 581 |
+
if (!rst_n) begin
|
| 582 |
+
mesh_state <= SM_IDLE;
|
| 583 |
+
timestep_done <= 0;
|
| 584 |
+
total_spikes <= 0;
|
| 585 |
+
timestep_count <= 0;
|
| 586 |
+
core_start_r <= 0;
|
| 587 |
+
route_core_idx <= 0;
|
| 588 |
+
route_neuron <= 0;
|
| 589 |
+
route_payload <= 0;
|
| 590 |
+
route_slot <= 0;
|
| 591 |
+
global_slot <= 0;
|
| 592 |
+
rt_we <= 0;
|
| 593 |
+
rt_addr <= 0;
|
| 594 |
+
rt_wdata <= 0;
|
| 595 |
+
grt_we <= 0;
|
| 596 |
+
grt_addr <= 0;
|
| 597 |
+
inj_push <= 0;
|
| 598 |
+
inj_pop <= 0;
|
| 599 |
+
inj_clear <= 0;
|
| 600 |
+
cap_pop <= 0;
|
| 601 |
+
cap_clear <= 0;
|
| 602 |
+
pcif_push <= 0;
|
| 603 |
+
pcif_pop <= 0;
|
| 604 |
+
pcif_clear <= 0;
|
| 605 |
+
pcif_push_data <= 0;
|
| 606 |
+
inject_core_idx <= 0;
|
| 607 |
+
link_tx_push <= 0;
|
| 608 |
+
link_tx_core <= 0;
|
| 609 |
+
link_tx_neuron <= 0;
|
| 610 |
+
link_tx_payload <= 0;
|
| 611 |
+
link_rx_pop <= 0;
|
| 612 |
+
dvfs_wait_cnt <= 0;
|
| 613 |
+
end else begin
|
| 614 |
+
timestep_done <= 0;
|
| 615 |
+
core_start_r <= 0;
|
| 616 |
+
rt_we <= 0;
|
| 617 |
+
grt_we <= 0;
|
| 618 |
+
inj_push <= 0;
|
| 619 |
+
inj_pop <= 0;
|
| 620 |
+
inj_clear <= 0;
|
| 621 |
+
cap_pop <= 0;
|
| 622 |
+
cap_clear <= 0;
|
| 623 |
+
pcif_push <= 0;
|
| 624 |
+
pcif_pop <= 0;
|
| 625 |
+
pcif_clear <= 0;
|
| 626 |
+
link_tx_push <= 0;
|
| 627 |
+
link_rx_pop <= 0;
|
| 628 |
+
|
| 629 |
+
total_spikes <= total_spikes + popcount(core_spike_valid);
|
| 630 |
+
|
| 631 |
+
case (mesh_state)
|
| 632 |
+
SM_IDLE: begin
|
| 633 |
+
if (async_enable && ext_valid) begin
|
| 634 |
+
pcif_push[ext_core] <= 1;
|
| 635 |
+
pcif_push_data <= {ext_neuron_id, ext_current};
|
| 636 |
+
end
|
| 637 |
+
if (start) begin
|
| 638 |
+
if (async_enable)
|
| 639 |
+
mesh_state <= SM_ASYNC_ACTIVE;
|
| 640 |
+
else if (CHIP_LINK_EN)
|
| 641 |
+
mesh_state <= SM_LINK_RX_DRAIN;
|
| 642 |
+
else
|
| 643 |
+
mesh_state <= SM_INJECT;
|
| 644 |
+
end
|
| 645 |
+
end
|
| 646 |
+
|
| 647 |
+
SM_INJECT: begin
|
| 648 |
+
if (inj_empty) begin
|
| 649 |
+
mesh_state <= SM_START;
|
| 650 |
+
end else begin
|
| 651 |
+
inj_pop <= 1;
|
| 652 |
+
end
|
| 653 |
+
end
|
| 654 |
+
|
| 655 |
+
SM_START: begin
|
| 656 |
+
core_start_r <= {NUM_CORES{1'b1}};
|
| 657 |
+
mesh_state <= SM_RUN_WAIT;
|
| 658 |
+
end
|
| 659 |
+
|
| 660 |
+
SM_RUN_WAIT: begin
|
| 661 |
+
if (core_done_latch == {NUM_CORES{1'b1}}) begin
|
| 662 |
+
route_core_idx <= 0;
|
| 663 |
+
mesh_state <= SM_ROUTE_POP;
|
| 664 |
+
end
|
| 665 |
+
end
|
| 666 |
+
|
| 667 |
+
SM_ROUTE_POP: begin
|
| 668 |
+
if (cap_empty[route_core_idx]) begin
|
| 669 |
+
if (route_core_idx == NUM_CORES - 1) begin
|
| 670 |
+
mesh_state <= SM_DONE;
|
| 671 |
+
end else begin
|
| 672 |
+
route_core_idx <= route_core_idx + 1;
|
| 673 |
+
end
|
| 674 |
+
end else begin
|
| 675 |
+
cap_pop[route_core_idx] <= 1;
|
| 676 |
+
route_neuron <= (cap_data >> (route_core_idx * CAP_WIDTH + 8));
|
| 677 |
+
route_payload <= (cap_data >> (route_core_idx * CAP_WIDTH));
|
| 678 |
+
route_slot <= 0;
|
| 679 |
+
mesh_state <= SM_ROUTE_ADDR;
|
| 680 |
+
end
|
| 681 |
+
end
|
| 682 |
+
|
| 683 |
+
SM_ROUTE_ADDR: begin
|
| 684 |
+
rt_addr <= {route_core_idx, route_neuron, route_slot};
|
| 685 |
+
mesh_state <= SM_ROUTE_WAIT;
|
| 686 |
+
end
|
| 687 |
+
|
| 688 |
+
SM_ROUTE_WAIT: begin
|
| 689 |
+
mesh_state <= SM_ROUTE_READ;
|
| 690 |
+
end
|
| 691 |
+
|
| 692 |
+
SM_ROUTE_READ: begin
|
| 693 |
+
if (rt_valid) begin
|
| 694 |
+
inj_push <= 1;
|
| 695 |
+
if (graded_enable)
|
| 696 |
+
inj_push_data <= {rt_dest_core, rt_dest_nrn, route_graded_current};
|
| 697 |
+
else
|
| 698 |
+
inj_push_data <= {rt_dest_core, rt_dest_nrn, rt_weight};
|
| 699 |
+
end
|
| 700 |
+
|
| 701 |
+
if (route_slot < ROUTE_FANOUT - 1) begin
|
| 702 |
+
route_slot <= route_slot + 1;
|
| 703 |
+
mesh_state <= SM_ROUTE_ADDR;
|
| 704 |
+
end else begin
|
| 705 |
+
|
| 706 |
+
global_slot <= 0;
|
| 707 |
+
mesh_state <= SM_GLOBAL_ROUTE_ADDR;
|
| 708 |
+
end
|
| 709 |
+
end
|
| 710 |
+
|
| 711 |
+
SM_GLOBAL_ROUTE_ADDR: begin
|
| 712 |
+
grt_addr <= {route_core_idx, route_neuron, global_slot};
|
| 713 |
+
mesh_state <= SM_GLOBAL_ROUTE_WAIT;
|
| 714 |
+
end
|
| 715 |
+
|
| 716 |
+
SM_GLOBAL_ROUTE_WAIT: begin
|
| 717 |
+
mesh_state <= SM_GLOBAL_ROUTE_READ;
|
| 718 |
+
end
|
| 719 |
+
|
| 720 |
+
SM_GLOBAL_ROUTE_READ: begin
|
| 721 |
+
if (grt_valid) begin
|
| 722 |
+
if (CHIP_LINK_EN && grt_weight[DATA_WIDTH-1]) begin
|
| 723 |
+
|
| 724 |
+
if (!link_tx_full) begin
|
| 725 |
+
link_tx_push <= 1;
|
| 726 |
+
link_tx_core <= grt_dest_core;
|
| 727 |
+
link_tx_neuron <= grt_dest_nrn;
|
| 728 |
+
link_tx_payload <= route_payload;
|
| 729 |
+
end
|
| 730 |
+
end else begin
|
| 731 |
+
|
| 732 |
+
inj_push <= 1;
|
| 733 |
+
if (graded_enable)
|
| 734 |
+
inj_push_data <= {grt_dest_core, grt_dest_nrn, grt_graded_current};
|
| 735 |
+
else
|
| 736 |
+
inj_push_data <= {grt_dest_core, grt_dest_nrn, grt_weight};
|
| 737 |
+
end
|
| 738 |
+
end
|
| 739 |
+
|
| 740 |
+
if (global_slot < GLOBAL_ROUTE_SLOTS - 1) begin
|
| 741 |
+
global_slot <= global_slot + 1;
|
| 742 |
+
mesh_state <= SM_GLOBAL_ROUTE_ADDR;
|
| 743 |
+
end else begin
|
| 744 |
+
mesh_state <= SM_ROUTE_POP;
|
| 745 |
+
end
|
| 746 |
+
end
|
| 747 |
+
|
| 748 |
+
SM_LINK_RX_DRAIN: begin
|
| 749 |
+
if (link_rx_empty) begin
|
| 750 |
+
mesh_state <= SM_INJECT;
|
| 751 |
+
end else if (!inj_full) begin
|
| 752 |
+
link_rx_pop <= 1;
|
| 753 |
+
inj_push <= 1;
|
| 754 |
+
inj_push_data <= {link_rx_core, link_rx_neuron, link_rx_current};
|
| 755 |
+
mesh_state <= SM_LINK_RX_WAIT;
|
| 756 |
+
end
|
| 757 |
+
end
|
| 758 |
+
|
| 759 |
+
SM_LINK_RX_WAIT: begin
|
| 760 |
+
|
| 761 |
+
mesh_state <= SM_LINK_RX_DRAIN;
|
| 762 |
+
end
|
| 763 |
+
|
| 764 |
+
SM_DONE: begin
|
| 765 |
+
cap_clear <= {NUM_CORES{1'b1}};
|
| 766 |
+
timestep_count <= timestep_count + 1;
|
| 767 |
+
if (dvfs_stall > 0) begin
|
| 768 |
+
dvfs_wait_cnt <= dvfs_stall;
|
| 769 |
+
mesh_state <= SM_DVFS_WAIT;
|
| 770 |
+
end else begin
|
| 771 |
+
timestep_done <= 1;
|
| 772 |
+
mesh_state <= SM_IDLE;
|
| 773 |
+
end
|
| 774 |
+
end
|
| 775 |
+
|
| 776 |
+
SM_DVFS_WAIT: begin
|
| 777 |
+
if (dvfs_wait_cnt <= 1) begin
|
| 778 |
+
timestep_done <= 1;
|
| 779 |
+
mesh_state <= SM_IDLE;
|
| 780 |
+
end else begin
|
| 781 |
+
dvfs_wait_cnt <= dvfs_wait_cnt - 1;
|
| 782 |
+
end
|
| 783 |
+
end
|
| 784 |
+
|
| 785 |
+
SM_ASYNC_ACTIVE: begin
|
| 786 |
+
if (quiescent) begin
|
| 787 |
+
mesh_state <= SM_ASYNC_DONE;
|
| 788 |
+
end else if (first_inject_found) begin
|
| 789 |
+
inject_core_idx <= first_inject_core;
|
| 790 |
+
mesh_state <= SM_ASYNC_INJECT;
|
| 791 |
+
end else if (first_route_found) begin
|
| 792 |
+
route_core_idx <= first_route_core;
|
| 793 |
+
mesh_state <= SM_ASYNC_ROUTE_POP;
|
| 794 |
+
end else if (first_restart_found) begin
|
| 795 |
+
core_start_r <= ({{(NUM_CORES-1){1'b0}}, 1'b1} << first_restart_core);
|
| 796 |
+
end
|
| 797 |
+
end
|
| 798 |
+
|
| 799 |
+
SM_ASYNC_INJECT: begin
|
| 800 |
+
if (pcif_empty[inject_core_idx]) begin
|
| 801 |
+
core_start_r <= ({{(NUM_CORES-1){1'b0}}, 1'b1} << inject_core_idx);
|
| 802 |
+
mesh_state <= SM_ASYNC_ACTIVE;
|
| 803 |
+
end else begin
|
| 804 |
+
pcif_pop[inject_core_idx] <= 1;
|
| 805 |
+
end
|
| 806 |
+
end
|
| 807 |
+
|
| 808 |
+
SM_ASYNC_ROUTE_POP: begin
|
| 809 |
+
if (cap_empty[route_core_idx]) begin
|
| 810 |
+
mesh_state <= SM_ASYNC_ACTIVE;
|
| 811 |
+
end else begin
|
| 812 |
+
cap_pop[route_core_idx] <= 1;
|
| 813 |
+
route_neuron <= (cap_data >> (route_core_idx * CAP_WIDTH + 8));
|
| 814 |
+
route_payload <= (cap_data >> (route_core_idx * CAP_WIDTH));
|
| 815 |
+
route_slot <= 0;
|
| 816 |
+
mesh_state <= SM_ASYNC_ROUTE_ADDR;
|
| 817 |
+
end
|
| 818 |
+
end
|
| 819 |
+
|
| 820 |
+
SM_ASYNC_ROUTE_ADDR: begin
|
| 821 |
+
rt_addr <= {route_core_idx, route_neuron, route_slot};
|
| 822 |
+
mesh_state <= SM_ASYNC_ROUTE_WAIT;
|
| 823 |
+
end
|
| 824 |
+
|
| 825 |
+
SM_ASYNC_ROUTE_WAIT: begin
|
| 826 |
+
mesh_state <= SM_ASYNC_ROUTE_READ;
|
| 827 |
+
end
|
| 828 |
+
|
| 829 |
+
SM_ASYNC_ROUTE_READ: begin
|
| 830 |
+
if (rt_valid && !pcif_full[rt_dest_core]) begin
|
| 831 |
+
pcif_push[rt_dest_core] <= 1;
|
| 832 |
+
if (graded_enable)
|
| 833 |
+
pcif_push_data <= {rt_dest_nrn, route_graded_current};
|
| 834 |
+
else
|
| 835 |
+
pcif_push_data <= {rt_dest_nrn, rt_weight};
|
| 836 |
+
end
|
| 837 |
+
|
| 838 |
+
if (route_slot < ROUTE_FANOUT - 1) begin
|
| 839 |
+
route_slot <= route_slot + 1;
|
| 840 |
+
mesh_state <= SM_ASYNC_ROUTE_ADDR;
|
| 841 |
+
end else begin
|
| 842 |
+
mesh_state <= SM_ASYNC_ROUTE_POP;
|
| 843 |
+
end
|
| 844 |
+
end
|
| 845 |
+
|
| 846 |
+
SM_ASYNC_DONE: begin
|
| 847 |
+
pcif_clear <= {NUM_CORES{1'b1}};
|
| 848 |
+
cap_clear <= {NUM_CORES{1'b1}};
|
| 849 |
+
timestep_done <= 1;
|
| 850 |
+
timestep_count <= timestep_count + 1;
|
| 851 |
+
mesh_state <= SM_IDLE;
|
| 852 |
+
end
|
| 853 |
+
|
| 854 |
+
default: mesh_state <= SM_IDLE;
|
| 855 |
+
endcase
|
| 856 |
+
end
|
| 857 |
+
end
|
| 858 |
+
|
| 859 |
+
endmodule
|
rtl/neuromorphic_top.v
ADDED
|
@@ -0,0 +1,557 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ============================================================================
|
| 2 |
+
// Neuromorphic Top
|
| 3 |
+
// ============================================================================
|
| 4 |
+
//
|
| 5 |
+
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 6 |
+
// Company No. 17054540 — UK Patent Application No. 2602902.6
|
| 7 |
+
//
|
| 8 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
| 9 |
+
// you may not use this file except in compliance with the License.
|
| 10 |
+
// You may obtain a copy of the License at
|
| 11 |
+
//
|
| 12 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
| 13 |
+
//
|
| 14 |
+
// Unless required by applicable law or agreed to in writing, software
|
| 15 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
| 16 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 17 |
+
// See the License for the specific language governing permissions and
|
| 18 |
+
// limitations under the License.
|
| 19 |
+
// ============================================================================
|
| 20 |
+
|
| 21 |
+
module neuromorphic_top #(
|
| 22 |
+
parameter CLK_FREQ = 100_000_000,
|
| 23 |
+
parameter BAUD = 115200,
|
| 24 |
+
parameter NUM_CORES = 128,
|
| 25 |
+
parameter CORE_ID_BITS = 12,
|
| 26 |
+
parameter NUM_NEURONS = 1024,
|
| 27 |
+
parameter NEURON_BITS = 10,
|
| 28 |
+
parameter DATA_WIDTH = 16,
|
| 29 |
+
parameter POOL_DEPTH = 131072,
|
| 30 |
+
parameter POOL_ADDR_BITS = 17,
|
| 31 |
+
parameter COUNT_BITS = 12,
|
| 32 |
+
parameter REV_FANIN = 32,
|
| 33 |
+
parameter REV_SLOT_BITS = 5,
|
| 34 |
+
parameter THRESHOLD = 16'sd1000,
|
| 35 |
+
parameter LEAK_RATE = 16'sd3,
|
| 36 |
+
parameter REFRAC_CYCLES = 3,
|
| 37 |
+
parameter ROUTE_FANOUT = 8,
|
| 38 |
+
parameter ROUTE_SLOT_BITS = 3,
|
| 39 |
+
parameter GLOBAL_ROUTE_SLOTS = 4,
|
| 40 |
+
parameter GLOBAL_ROUTE_SLOT_BITS = 2,
|
| 41 |
+
|
| 42 |
+
parameter CHIP_LINK_EN = 0,
|
| 43 |
+
parameter NOC_MODE = 0,
|
| 44 |
+
parameter MESH_X = 2,
|
| 45 |
+
parameter MESH_Y = 2,
|
| 46 |
+
|
| 47 |
+
parameter BYPASS_UART = 0
|
| 48 |
+
)(
|
| 49 |
+
input wire clk,
|
| 50 |
+
input wire rst_n,
|
| 51 |
+
input wire uart_rxd,
|
| 52 |
+
output wire uart_txd,
|
| 53 |
+
|
| 54 |
+
output wire [7:0] link_tx_data,
|
| 55 |
+
output wire link_tx_valid,
|
| 56 |
+
input wire link_tx_ready,
|
| 57 |
+
input wire [7:0] link_rx_data,
|
| 58 |
+
input wire link_rx_valid,
|
| 59 |
+
output wire link_rx_ready,
|
| 60 |
+
|
| 61 |
+
input wire [7:0] rx_data_ext,
|
| 62 |
+
input wire rx_valid_ext,
|
| 63 |
+
output wire [7:0] tx_data_ext,
|
| 64 |
+
output wire tx_valid_ext,
|
| 65 |
+
input wire tx_ready_ext
|
| 66 |
+
);
|
| 67 |
+
|
| 68 |
+
wire [7:0] rx_data;
|
| 69 |
+
wire rx_valid;
|
| 70 |
+
wire [7:0] tx_data;
|
| 71 |
+
wire tx_valid;
|
| 72 |
+
wire tx_ready;
|
| 73 |
+
|
| 74 |
+
generate
|
| 75 |
+
if (BYPASS_UART == 0) begin : gen_uart
|
| 76 |
+
uart_rx #(
|
| 77 |
+
.CLK_FREQ (CLK_FREQ),
|
| 78 |
+
.BAUD (BAUD)
|
| 79 |
+
) u_uart_rx (
|
| 80 |
+
.clk (clk),
|
| 81 |
+
.rst_n (rst_n),
|
| 82 |
+
.rx (uart_rxd),
|
| 83 |
+
.data (rx_data),
|
| 84 |
+
.valid (rx_valid)
|
| 85 |
+
);
|
| 86 |
+
|
| 87 |
+
uart_tx #(
|
| 88 |
+
.CLK_FREQ (CLK_FREQ),
|
| 89 |
+
.BAUD (BAUD)
|
| 90 |
+
) u_uart_tx (
|
| 91 |
+
.clk (clk),
|
| 92 |
+
.rst_n (rst_n),
|
| 93 |
+
.data (tx_data),
|
| 94 |
+
.valid (tx_valid),
|
| 95 |
+
.tx (uart_txd),
|
| 96 |
+
.ready (tx_ready)
|
| 97 |
+
);
|
| 98 |
+
end else begin : gen_bypass
|
| 99 |
+
assign rx_data = rx_data_ext;
|
| 100 |
+
assign rx_valid = rx_valid_ext;
|
| 101 |
+
assign tx_ready = tx_ready_ext;
|
| 102 |
+
assign uart_txd = 1'b1;
|
| 103 |
+
end
|
| 104 |
+
endgenerate
|
| 105 |
+
|
| 106 |
+
assign tx_data_ext = tx_data;
|
| 107 |
+
assign tx_valid_ext = tx_valid;
|
| 108 |
+
|
| 109 |
+
wire hi_mesh_start;
|
| 110 |
+
|
| 111 |
+
wire hi_prog_pool_we;
|
| 112 |
+
wire [CORE_ID_BITS-1:0] hi_prog_pool_core;
|
| 113 |
+
wire [POOL_ADDR_BITS-1:0] hi_prog_pool_addr;
|
| 114 |
+
wire [NEURON_BITS-1:0] hi_prog_pool_src;
|
| 115 |
+
wire [NEURON_BITS-1:0] hi_prog_pool_target;
|
| 116 |
+
wire signed [DATA_WIDTH-1:0] hi_prog_pool_weight;
|
| 117 |
+
wire [1:0] hi_prog_pool_comp;
|
| 118 |
+
|
| 119 |
+
wire hi_prog_index_we;
|
| 120 |
+
wire [CORE_ID_BITS-1:0] hi_prog_index_core;
|
| 121 |
+
wire [NEURON_BITS-1:0] hi_prog_index_neuron;
|
| 122 |
+
wire [POOL_ADDR_BITS-1:0] hi_prog_index_base;
|
| 123 |
+
wire [COUNT_BITS-1:0] hi_prog_index_count;
|
| 124 |
+
wire [1:0] hi_prog_index_format;
|
| 125 |
+
|
| 126 |
+
wire hi_prog_route_we;
|
| 127 |
+
wire [CORE_ID_BITS-1:0] hi_prog_route_src_core;
|
| 128 |
+
wire [NEURON_BITS-1:0] hi_prog_route_src_neuron;
|
| 129 |
+
wire [ROUTE_SLOT_BITS-1:0] hi_prog_route_slot;
|
| 130 |
+
wire [CORE_ID_BITS-1:0] hi_prog_route_dest_core;
|
| 131 |
+
wire [NEURON_BITS-1:0] hi_prog_route_dest_neuron;
|
| 132 |
+
wire signed [DATA_WIDTH-1:0] hi_prog_route_weight;
|
| 133 |
+
|
| 134 |
+
wire hi_prog_global_route_we;
|
| 135 |
+
wire [CORE_ID_BITS-1:0] hi_prog_global_route_src_core;
|
| 136 |
+
wire [NEURON_BITS-1:0] hi_prog_global_route_src_neuron;
|
| 137 |
+
wire [GLOBAL_ROUTE_SLOT_BITS-1:0] hi_prog_global_route_slot;
|
| 138 |
+
wire [CORE_ID_BITS-1:0] hi_prog_global_route_dest_core;
|
| 139 |
+
wire [NEURON_BITS-1:0] hi_prog_global_route_dest_neuron;
|
| 140 |
+
wire signed [DATA_WIDTH-1:0] hi_prog_global_route_weight;
|
| 141 |
+
|
| 142 |
+
wire hi_ext_valid;
|
| 143 |
+
wire [CORE_ID_BITS-1:0] hi_ext_core;
|
| 144 |
+
wire [NEURON_BITS-1:0] hi_ext_neuron_id;
|
| 145 |
+
wire signed [DATA_WIDTH-1:0] hi_ext_current;
|
| 146 |
+
|
| 147 |
+
wire hi_learn_enable;
|
| 148 |
+
wire hi_graded_enable;
|
| 149 |
+
wire hi_dendritic_enable;
|
| 150 |
+
wire hi_async_enable;
|
| 151 |
+
wire hi_threefactor_enable;
|
| 152 |
+
wire hi_noise_enable;
|
| 153 |
+
wire hi_skip_idle_enable;
|
| 154 |
+
wire hi_scale_u_enable;
|
| 155 |
+
wire signed [DATA_WIDTH-1:0] hi_reward_value;
|
| 156 |
+
|
| 157 |
+
wire hi_prog_delay_we;
|
| 158 |
+
wire [CORE_ID_BITS-1:0] hi_prog_delay_core;
|
| 159 |
+
wire [POOL_ADDR_BITS-1:0] hi_prog_delay_addr;
|
| 160 |
+
wire [5:0] hi_prog_delay_value;
|
| 161 |
+
|
| 162 |
+
wire hi_prog_ucode_we;
|
| 163 |
+
wire [CORE_ID_BITS-1:0] hi_prog_ucode_core;
|
| 164 |
+
wire [7:0] hi_prog_ucode_addr;
|
| 165 |
+
wire [31:0] hi_prog_ucode_data;
|
| 166 |
+
|
| 167 |
+
wire hi_prog_param_we;
|
| 168 |
+
wire [CORE_ID_BITS-1:0] hi_prog_param_core;
|
| 169 |
+
wire [NEURON_BITS-1:0] hi_prog_param_neuron;
|
| 170 |
+
wire [4:0] hi_prog_param_id;
|
| 171 |
+
wire signed [DATA_WIDTH-1:0] hi_prog_param_value;
|
| 172 |
+
|
| 173 |
+
wire hi_probe_read;
|
| 174 |
+
wire [CORE_ID_BITS-1:0] hi_probe_core;
|
| 175 |
+
wire [NEURON_BITS-1:0] hi_probe_neuron;
|
| 176 |
+
wire [4:0] hi_probe_state_id;
|
| 177 |
+
wire [POOL_ADDR_BITS-1:0] hi_probe_pool_addr;
|
| 178 |
+
wire signed [DATA_WIDTH-1:0] mesh_probe_data;
|
| 179 |
+
wire mesh_probe_valid;
|
| 180 |
+
|
| 181 |
+
wire [7:0] hi_dvfs_stall;
|
| 182 |
+
|
| 183 |
+
wire mesh_timestep_done;
|
| 184 |
+
wire [5:0] mesh_state;
|
| 185 |
+
wire [31:0] mesh_total_spikes;
|
| 186 |
+
wire [31:0] mesh_timestep_count;
|
| 187 |
+
|
| 188 |
+
host_interface #(
|
| 189 |
+
.NUM_CORES (NUM_CORES),
|
| 190 |
+
.CORE_ID_BITS (CORE_ID_BITS),
|
| 191 |
+
.NUM_NEURONS (NUM_NEURONS),
|
| 192 |
+
.NEURON_BITS (NEURON_BITS),
|
| 193 |
+
.DATA_WIDTH (DATA_WIDTH),
|
| 194 |
+
.POOL_ADDR_BITS (POOL_ADDR_BITS),
|
| 195 |
+
.COUNT_BITS (COUNT_BITS),
|
| 196 |
+
.ROUTE_SLOT_BITS(ROUTE_SLOT_BITS),
|
| 197 |
+
.GLOBAL_ROUTE_SLOT_BITS(GLOBAL_ROUTE_SLOT_BITS)
|
| 198 |
+
) u_host_if (
|
| 199 |
+
.clk (clk),
|
| 200 |
+
.rst_n (rst_n),
|
| 201 |
+
.rx_data (rx_data),
|
| 202 |
+
.rx_valid (rx_valid),
|
| 203 |
+
.tx_data (tx_data),
|
| 204 |
+
.tx_valid (tx_valid),
|
| 205 |
+
.tx_ready (tx_ready),
|
| 206 |
+
|
| 207 |
+
.mesh_start (hi_mesh_start),
|
| 208 |
+
|
| 209 |
+
.mesh_prog_pool_we (hi_prog_pool_we),
|
| 210 |
+
.mesh_prog_pool_core (hi_prog_pool_core),
|
| 211 |
+
.mesh_prog_pool_addr (hi_prog_pool_addr),
|
| 212 |
+
.mesh_prog_pool_src (hi_prog_pool_src),
|
| 213 |
+
.mesh_prog_pool_target (hi_prog_pool_target),
|
| 214 |
+
.mesh_prog_pool_weight (hi_prog_pool_weight),
|
| 215 |
+
.mesh_prog_pool_comp (hi_prog_pool_comp),
|
| 216 |
+
|
| 217 |
+
.mesh_prog_index_we (hi_prog_index_we),
|
| 218 |
+
.mesh_prog_index_core (hi_prog_index_core),
|
| 219 |
+
.mesh_prog_index_neuron (hi_prog_index_neuron),
|
| 220 |
+
.mesh_prog_index_base (hi_prog_index_base),
|
| 221 |
+
.mesh_prog_index_count (hi_prog_index_count),
|
| 222 |
+
.mesh_prog_index_format (hi_prog_index_format),
|
| 223 |
+
|
| 224 |
+
.mesh_prog_route_we (hi_prog_route_we),
|
| 225 |
+
.mesh_prog_route_src_core (hi_prog_route_src_core),
|
| 226 |
+
.mesh_prog_route_src_neuron (hi_prog_route_src_neuron),
|
| 227 |
+
.mesh_prog_route_slot (hi_prog_route_slot),
|
| 228 |
+
.mesh_prog_route_dest_core (hi_prog_route_dest_core),
|
| 229 |
+
.mesh_prog_route_dest_neuron(hi_prog_route_dest_neuron),
|
| 230 |
+
.mesh_prog_route_weight (hi_prog_route_weight),
|
| 231 |
+
|
| 232 |
+
.mesh_prog_global_route_we (hi_prog_global_route_we),
|
| 233 |
+
.mesh_prog_global_route_src_core (hi_prog_global_route_src_core),
|
| 234 |
+
.mesh_prog_global_route_src_neuron (hi_prog_global_route_src_neuron),
|
| 235 |
+
.mesh_prog_global_route_slot (hi_prog_global_route_slot),
|
| 236 |
+
.mesh_prog_global_route_dest_core (hi_prog_global_route_dest_core),
|
| 237 |
+
.mesh_prog_global_route_dest_neuron (hi_prog_global_route_dest_neuron),
|
| 238 |
+
.mesh_prog_global_route_weight (hi_prog_global_route_weight),
|
| 239 |
+
|
| 240 |
+
.mesh_ext_valid (hi_ext_valid),
|
| 241 |
+
.mesh_ext_core (hi_ext_core),
|
| 242 |
+
.mesh_ext_neuron_id (hi_ext_neuron_id),
|
| 243 |
+
.mesh_ext_current (hi_ext_current),
|
| 244 |
+
|
| 245 |
+
.mesh_learn_enable (hi_learn_enable),
|
| 246 |
+
.mesh_graded_enable (hi_graded_enable),
|
| 247 |
+
.mesh_dendritic_enable (hi_dendritic_enable),
|
| 248 |
+
.mesh_async_enable (hi_async_enable),
|
| 249 |
+
.mesh_threefactor_enable (hi_threefactor_enable),
|
| 250 |
+
.mesh_noise_enable (hi_noise_enable),
|
| 251 |
+
.mesh_skip_idle_enable (hi_skip_idle_enable),
|
| 252 |
+
.mesh_scale_u_enable (hi_scale_u_enable),
|
| 253 |
+
.mesh_reward_value (hi_reward_value),
|
| 254 |
+
|
| 255 |
+
.mesh_prog_delay_we (hi_prog_delay_we),
|
| 256 |
+
.mesh_prog_delay_core (hi_prog_delay_core),
|
| 257 |
+
.mesh_prog_delay_addr (hi_prog_delay_addr),
|
| 258 |
+
.mesh_prog_delay_value (hi_prog_delay_value),
|
| 259 |
+
|
| 260 |
+
.mesh_prog_ucode_we (hi_prog_ucode_we),
|
| 261 |
+
.mesh_prog_ucode_core (hi_prog_ucode_core),
|
| 262 |
+
.mesh_prog_ucode_addr (hi_prog_ucode_addr),
|
| 263 |
+
.mesh_prog_ucode_data (hi_prog_ucode_data),
|
| 264 |
+
|
| 265 |
+
.mesh_prog_param_we (hi_prog_param_we),
|
| 266 |
+
.mesh_prog_param_core (hi_prog_param_core),
|
| 267 |
+
.mesh_prog_param_neuron (hi_prog_param_neuron),
|
| 268 |
+
.mesh_prog_param_id (hi_prog_param_id),
|
| 269 |
+
.mesh_prog_param_value (hi_prog_param_value),
|
| 270 |
+
|
| 271 |
+
.mesh_probe_read (hi_probe_read),
|
| 272 |
+
.mesh_probe_core (hi_probe_core),
|
| 273 |
+
.mesh_probe_neuron (hi_probe_neuron),
|
| 274 |
+
.mesh_probe_state_id (hi_probe_state_id),
|
| 275 |
+
.mesh_probe_pool_addr(hi_probe_pool_addr),
|
| 276 |
+
.mesh_probe_data (mesh_probe_data),
|
| 277 |
+
.mesh_probe_valid (mesh_probe_valid),
|
| 278 |
+
|
| 279 |
+
.mesh_dvfs_stall (hi_dvfs_stall),
|
| 280 |
+
|
| 281 |
+
.mesh_timestep_done (mesh_timestep_done),
|
| 282 |
+
.mesh_state (mesh_state),
|
| 283 |
+
.mesh_total_spikes (mesh_total_spikes),
|
| 284 |
+
.mesh_timestep_count (mesh_timestep_count)
|
| 285 |
+
);
|
| 286 |
+
|
| 287 |
+
wire mesh_link_tx_push;
|
| 288 |
+
wire [CORE_ID_BITS-1:0] mesh_link_tx_core;
|
| 289 |
+
wire [NEURON_BITS-1:0] mesh_link_tx_neuron;
|
| 290 |
+
wire [7:0] mesh_link_tx_payload;
|
| 291 |
+
wire mesh_link_tx_full;
|
| 292 |
+
wire [CORE_ID_BITS-1:0] mesh_link_rx_core;
|
| 293 |
+
wire [NEURON_BITS-1:0] mesh_link_rx_neuron;
|
| 294 |
+
wire signed [DATA_WIDTH-1:0] mesh_link_rx_current;
|
| 295 |
+
wire mesh_link_rx_pop;
|
| 296 |
+
wire mesh_link_rx_empty;
|
| 297 |
+
|
| 298 |
+
wire [NUM_CORES-1:0] spike_valid_bus;
|
| 299 |
+
wire [NUM_CORES*NEURON_BITS-1:0] spike_id_bus;
|
| 300 |
+
|
| 301 |
+
generate
|
| 302 |
+
if (NOC_MODE == 1) begin : gen_async_noc
|
| 303 |
+
async_noc_mesh #(
|
| 304 |
+
.NUM_CORES (NUM_CORES),
|
| 305 |
+
.CORE_ID_BITS (CORE_ID_BITS),
|
| 306 |
+
.NUM_NEURONS (NUM_NEURONS),
|
| 307 |
+
.NEURON_BITS (NEURON_BITS),
|
| 308 |
+
.DATA_WIDTH (DATA_WIDTH),
|
| 309 |
+
.POOL_DEPTH (POOL_DEPTH),
|
| 310 |
+
.POOL_ADDR_BITS (POOL_ADDR_BITS),
|
| 311 |
+
.COUNT_BITS (COUNT_BITS),
|
| 312 |
+
.REV_FANIN (REV_FANIN),
|
| 313 |
+
.REV_SLOT_BITS (REV_SLOT_BITS),
|
| 314 |
+
.THRESHOLD (THRESHOLD),
|
| 315 |
+
.LEAK_RATE (LEAK_RATE),
|
| 316 |
+
.REFRAC_CYCLES (REFRAC_CYCLES),
|
| 317 |
+
.ROUTE_FANOUT (ROUTE_FANOUT),
|
| 318 |
+
.ROUTE_SLOT_BITS(ROUTE_SLOT_BITS),
|
| 319 |
+
.GLOBAL_ROUTE_SLOTS (GLOBAL_ROUTE_SLOTS),
|
| 320 |
+
.GLOBAL_ROUTE_SLOT_BITS(GLOBAL_ROUTE_SLOT_BITS),
|
| 321 |
+
.MESH_X (MESH_X),
|
| 322 |
+
.MESH_Y (MESH_Y)
|
| 323 |
+
) u_mesh (
|
| 324 |
+
.clk (clk),
|
| 325 |
+
.rst_n (rst_n),
|
| 326 |
+
.start (hi_mesh_start),
|
| 327 |
+
.learn_enable (hi_learn_enable),
|
| 328 |
+
.graded_enable (hi_graded_enable),
|
| 329 |
+
.dendritic_enable (hi_dendritic_enable),
|
| 330 |
+
.async_enable (hi_async_enable),
|
| 331 |
+
.threefactor_enable(hi_threefactor_enable),
|
| 332 |
+
.noise_enable (hi_noise_enable),
|
| 333 |
+
.skip_idle_enable (hi_skip_idle_enable),
|
| 334 |
+
.scale_u_enable (hi_scale_u_enable),
|
| 335 |
+
.reward_value (hi_reward_value),
|
| 336 |
+
.prog_pool_we (hi_prog_pool_we),
|
| 337 |
+
.prog_pool_core (hi_prog_pool_core),
|
| 338 |
+
.prog_pool_addr (hi_prog_pool_addr),
|
| 339 |
+
.prog_pool_src (hi_prog_pool_src),
|
| 340 |
+
.prog_pool_target (hi_prog_pool_target),
|
| 341 |
+
.prog_pool_weight (hi_prog_pool_weight),
|
| 342 |
+
.prog_pool_comp (hi_prog_pool_comp),
|
| 343 |
+
.prog_index_we (hi_prog_index_we),
|
| 344 |
+
.prog_index_core (hi_prog_index_core),
|
| 345 |
+
.prog_index_neuron (hi_prog_index_neuron),
|
| 346 |
+
.prog_index_base (hi_prog_index_base),
|
| 347 |
+
.prog_index_count (hi_prog_index_count),
|
| 348 |
+
.prog_index_format (hi_prog_index_format),
|
| 349 |
+
.prog_route_we (hi_prog_route_we),
|
| 350 |
+
.prog_route_src_core (hi_prog_route_src_core),
|
| 351 |
+
.prog_route_src_neuron (hi_prog_route_src_neuron),
|
| 352 |
+
.prog_route_slot (hi_prog_route_slot),
|
| 353 |
+
.prog_route_dest_core (hi_prog_route_dest_core),
|
| 354 |
+
.prog_route_dest_neuron(hi_prog_route_dest_neuron),
|
| 355 |
+
.prog_route_weight (hi_prog_route_weight),
|
| 356 |
+
.prog_global_route_we (hi_prog_global_route_we),
|
| 357 |
+
.prog_global_route_src_core (hi_prog_global_route_src_core),
|
| 358 |
+
.prog_global_route_src_neuron (hi_prog_global_route_src_neuron),
|
| 359 |
+
.prog_global_route_slot (hi_prog_global_route_slot),
|
| 360 |
+
.prog_global_route_dest_core (hi_prog_global_route_dest_core),
|
| 361 |
+
.prog_global_route_dest_neuron (hi_prog_global_route_dest_neuron),
|
| 362 |
+
.prog_global_route_weight (hi_prog_global_route_weight),
|
| 363 |
+
.prog_delay_we (hi_prog_delay_we),
|
| 364 |
+
.prog_delay_core (hi_prog_delay_core),
|
| 365 |
+
.prog_delay_addr (hi_prog_delay_addr),
|
| 366 |
+
.prog_delay_value (hi_prog_delay_value),
|
| 367 |
+
.prog_ucode_we (hi_prog_ucode_we),
|
| 368 |
+
.prog_ucode_core (hi_prog_ucode_core),
|
| 369 |
+
.prog_ucode_addr (hi_prog_ucode_addr),
|
| 370 |
+
.prog_ucode_data (hi_prog_ucode_data),
|
| 371 |
+
.prog_param_we (hi_prog_param_we),
|
| 372 |
+
.prog_param_core (hi_prog_param_core),
|
| 373 |
+
.prog_param_neuron (hi_prog_param_neuron),
|
| 374 |
+
.prog_param_id (hi_prog_param_id),
|
| 375 |
+
.prog_param_value (hi_prog_param_value),
|
| 376 |
+
.probe_read (hi_probe_read),
|
| 377 |
+
.probe_core (hi_probe_core),
|
| 378 |
+
.probe_neuron (hi_probe_neuron),
|
| 379 |
+
.probe_state_id (hi_probe_state_id),
|
| 380 |
+
.probe_pool_addr (hi_probe_pool_addr),
|
| 381 |
+
.probe_data (mesh_probe_data),
|
| 382 |
+
.probe_valid (mesh_probe_valid),
|
| 383 |
+
.ext_valid (hi_ext_valid),
|
| 384 |
+
.ext_core (hi_ext_core),
|
| 385 |
+
.ext_neuron_id (hi_ext_neuron_id),
|
| 386 |
+
.ext_current (hi_ext_current),
|
| 387 |
+
.timestep_done (mesh_timestep_done),
|
| 388 |
+
.spike_valid_bus (spike_valid_bus),
|
| 389 |
+
.spike_id_bus (spike_id_bus),
|
| 390 |
+
.mesh_state_out (mesh_state),
|
| 391 |
+
.total_spikes (mesh_total_spikes),
|
| 392 |
+
.timestep_count (mesh_timestep_count),
|
| 393 |
+
.core_idle_bus (),
|
| 394 |
+
.core_clock_en (),
|
| 395 |
+
.energy_counter (),
|
| 396 |
+
.power_idle_hint (),
|
| 397 |
+
.link_tx_push (mesh_link_tx_push),
|
| 398 |
+
.link_tx_core (mesh_link_tx_core),
|
| 399 |
+
.link_tx_neuron (mesh_link_tx_neuron),
|
| 400 |
+
.link_tx_payload (mesh_link_tx_payload),
|
| 401 |
+
.link_tx_full (mesh_link_tx_full),
|
| 402 |
+
.link_rx_core (mesh_link_rx_core),
|
| 403 |
+
.link_rx_neuron (mesh_link_rx_neuron),
|
| 404 |
+
.link_rx_current (mesh_link_rx_current),
|
| 405 |
+
.link_rx_pop (mesh_link_rx_pop),
|
| 406 |
+
.link_rx_empty (mesh_link_rx_empty)
|
| 407 |
+
);
|
| 408 |
+
end else begin : gen_barrier_mesh
|
| 409 |
+
neuromorphic_mesh #(
|
| 410 |
+
.NUM_CORES (NUM_CORES),
|
| 411 |
+
.CORE_ID_BITS (CORE_ID_BITS),
|
| 412 |
+
.NUM_NEURONS (NUM_NEURONS),
|
| 413 |
+
.NEURON_BITS (NEURON_BITS),
|
| 414 |
+
.DATA_WIDTH (DATA_WIDTH),
|
| 415 |
+
.POOL_DEPTH (POOL_DEPTH),
|
| 416 |
+
.POOL_ADDR_BITS (POOL_ADDR_BITS),
|
| 417 |
+
.COUNT_BITS (COUNT_BITS),
|
| 418 |
+
.REV_FANIN (REV_FANIN),
|
| 419 |
+
.REV_SLOT_BITS (REV_SLOT_BITS),
|
| 420 |
+
.THRESHOLD (THRESHOLD),
|
| 421 |
+
.LEAK_RATE (LEAK_RATE),
|
| 422 |
+
.REFRAC_CYCLES (REFRAC_CYCLES),
|
| 423 |
+
.ROUTE_FANOUT (ROUTE_FANOUT),
|
| 424 |
+
.ROUTE_SLOT_BITS(ROUTE_SLOT_BITS),
|
| 425 |
+
.GLOBAL_ROUTE_SLOTS (GLOBAL_ROUTE_SLOTS),
|
| 426 |
+
.GLOBAL_ROUTE_SLOT_BITS(GLOBAL_ROUTE_SLOT_BITS),
|
| 427 |
+
.CHIP_LINK_EN (CHIP_LINK_EN)
|
| 428 |
+
) u_mesh (
|
| 429 |
+
.clk (clk),
|
| 430 |
+
.rst_n (rst_n),
|
| 431 |
+
.start (hi_mesh_start),
|
| 432 |
+
.dvfs_stall (hi_dvfs_stall),
|
| 433 |
+
.learn_enable (hi_learn_enable),
|
| 434 |
+
.graded_enable (hi_graded_enable),
|
| 435 |
+
.dendritic_enable (hi_dendritic_enable),
|
| 436 |
+
.async_enable (hi_async_enable),
|
| 437 |
+
.threefactor_enable(hi_threefactor_enable),
|
| 438 |
+
.noise_enable (hi_noise_enable),
|
| 439 |
+
.skip_idle_enable (hi_skip_idle_enable),
|
| 440 |
+
.scale_u_enable (hi_scale_u_enable),
|
| 441 |
+
.reward_value (hi_reward_value),
|
| 442 |
+
.prog_pool_we (hi_prog_pool_we),
|
| 443 |
+
.prog_pool_core (hi_prog_pool_core),
|
| 444 |
+
.prog_pool_addr (hi_prog_pool_addr),
|
| 445 |
+
.prog_pool_src (hi_prog_pool_src),
|
| 446 |
+
.prog_pool_target (hi_prog_pool_target),
|
| 447 |
+
.prog_pool_weight (hi_prog_pool_weight),
|
| 448 |
+
.prog_pool_comp (hi_prog_pool_comp),
|
| 449 |
+
.prog_index_we (hi_prog_index_we),
|
| 450 |
+
.prog_index_core (hi_prog_index_core),
|
| 451 |
+
.prog_index_neuron (hi_prog_index_neuron),
|
| 452 |
+
.prog_index_base (hi_prog_index_base),
|
| 453 |
+
.prog_index_count (hi_prog_index_count),
|
| 454 |
+
.prog_index_format (hi_prog_index_format),
|
| 455 |
+
.prog_route_we (hi_prog_route_we),
|
| 456 |
+
.prog_route_src_core (hi_prog_route_src_core),
|
| 457 |
+
.prog_route_src_neuron (hi_prog_route_src_neuron),
|
| 458 |
+
.prog_route_slot (hi_prog_route_slot),
|
| 459 |
+
.prog_route_dest_core (hi_prog_route_dest_core),
|
| 460 |
+
.prog_route_dest_neuron(hi_prog_route_dest_neuron),
|
| 461 |
+
.prog_route_weight (hi_prog_route_weight),
|
| 462 |
+
.prog_global_route_we (hi_prog_global_route_we),
|
| 463 |
+
.prog_global_route_src_core (hi_prog_global_route_src_core),
|
| 464 |
+
.prog_global_route_src_neuron (hi_prog_global_route_src_neuron),
|
| 465 |
+
.prog_global_route_slot (hi_prog_global_route_slot),
|
| 466 |
+
.prog_global_route_dest_core (hi_prog_global_route_dest_core),
|
| 467 |
+
.prog_global_route_dest_neuron (hi_prog_global_route_dest_neuron),
|
| 468 |
+
.prog_global_route_weight (hi_prog_global_route_weight),
|
| 469 |
+
.prog_delay_we (hi_prog_delay_we),
|
| 470 |
+
.prog_delay_core (hi_prog_delay_core),
|
| 471 |
+
.prog_delay_addr (hi_prog_delay_addr),
|
| 472 |
+
.prog_delay_value (hi_prog_delay_value),
|
| 473 |
+
.prog_ucode_we (hi_prog_ucode_we),
|
| 474 |
+
.prog_ucode_core (hi_prog_ucode_core),
|
| 475 |
+
.prog_ucode_addr (hi_prog_ucode_addr),
|
| 476 |
+
.prog_ucode_data (hi_prog_ucode_data),
|
| 477 |
+
.prog_param_we (hi_prog_param_we),
|
| 478 |
+
.prog_param_core (hi_prog_param_core),
|
| 479 |
+
.prog_param_neuron (hi_prog_param_neuron),
|
| 480 |
+
.prog_param_id (hi_prog_param_id),
|
| 481 |
+
.prog_param_value (hi_prog_param_value),
|
| 482 |
+
.probe_read (hi_probe_read),
|
| 483 |
+
.probe_core (hi_probe_core),
|
| 484 |
+
.probe_neuron (hi_probe_neuron),
|
| 485 |
+
.probe_state_id (hi_probe_state_id),
|
| 486 |
+
.probe_pool_addr (hi_probe_pool_addr),
|
| 487 |
+
.probe_data (mesh_probe_data),
|
| 488 |
+
.probe_valid (mesh_probe_valid),
|
| 489 |
+
.ext_valid (hi_ext_valid),
|
| 490 |
+
.ext_core (hi_ext_core),
|
| 491 |
+
.ext_neuron_id (hi_ext_neuron_id),
|
| 492 |
+
.ext_current (hi_ext_current),
|
| 493 |
+
.timestep_done (mesh_timestep_done),
|
| 494 |
+
.spike_valid_bus (spike_valid_bus),
|
| 495 |
+
.spike_id_bus (spike_id_bus),
|
| 496 |
+
.mesh_state_out (mesh_state),
|
| 497 |
+
.total_spikes (mesh_total_spikes),
|
| 498 |
+
.timestep_count (mesh_timestep_count),
|
| 499 |
+
.core_idle_bus (),
|
| 500 |
+
.core_clock_en (),
|
| 501 |
+
.energy_counter (),
|
| 502 |
+
.power_idle_hint (),
|
| 503 |
+
.link_tx_push (mesh_link_tx_push),
|
| 504 |
+
.link_tx_core (mesh_link_tx_core),
|
| 505 |
+
.link_tx_neuron (mesh_link_tx_neuron),
|
| 506 |
+
.link_tx_payload (mesh_link_tx_payload),
|
| 507 |
+
.link_tx_full (mesh_link_tx_full),
|
| 508 |
+
.link_rx_core (mesh_link_rx_core),
|
| 509 |
+
.link_rx_neuron (mesh_link_rx_neuron),
|
| 510 |
+
.link_rx_current (mesh_link_rx_current),
|
| 511 |
+
.link_rx_pop (mesh_link_rx_pop),
|
| 512 |
+
.link_rx_empty (mesh_link_rx_empty)
|
| 513 |
+
);
|
| 514 |
+
end
|
| 515 |
+
endgenerate
|
| 516 |
+
|
| 517 |
+
generate
|
| 518 |
+
if (CHIP_LINK_EN) begin : gen_chip_link
|
| 519 |
+
chip_link #(
|
| 520 |
+
.CORE_ID_BITS (CORE_ID_BITS),
|
| 521 |
+
.NEURON_BITS (NEURON_BITS),
|
| 522 |
+
.DATA_WIDTH (DATA_WIDTH),
|
| 523 |
+
.TX_DEPTH (256),
|
| 524 |
+
.RX_DEPTH (256)
|
| 525 |
+
) u_chip_link (
|
| 526 |
+
.clk (clk),
|
| 527 |
+
.rst_n (rst_n),
|
| 528 |
+
.tx_push (mesh_link_tx_push),
|
| 529 |
+
.tx_core (mesh_link_tx_core),
|
| 530 |
+
.tx_neuron (mesh_link_tx_neuron),
|
| 531 |
+
.tx_payload (mesh_link_tx_payload),
|
| 532 |
+
.tx_full (mesh_link_tx_full),
|
| 533 |
+
.rx_core (mesh_link_rx_core),
|
| 534 |
+
.rx_neuron (mesh_link_rx_neuron),
|
| 535 |
+
.rx_current (mesh_link_rx_current),
|
| 536 |
+
.rx_pop (mesh_link_rx_pop),
|
| 537 |
+
.rx_empty (mesh_link_rx_empty),
|
| 538 |
+
.link_tx_data (link_tx_data),
|
| 539 |
+
.link_tx_valid (link_tx_valid),
|
| 540 |
+
.link_tx_ready (link_tx_ready),
|
| 541 |
+
.link_rx_data (link_rx_data),
|
| 542 |
+
.link_rx_valid (link_rx_valid),
|
| 543 |
+
.link_rx_ready (link_rx_ready)
|
| 544 |
+
);
|
| 545 |
+
end else begin : gen_no_chip_link
|
| 546 |
+
assign mesh_link_tx_full = 1'b0;
|
| 547 |
+
assign mesh_link_rx_core = {CORE_ID_BITS{1'b0}};
|
| 548 |
+
assign mesh_link_rx_neuron = {NEURON_BITS{1'b0}};
|
| 549 |
+
assign mesh_link_rx_current = {DATA_WIDTH{1'b0}};
|
| 550 |
+
assign mesh_link_rx_empty = 1'b1;
|
| 551 |
+
assign link_tx_data = 8'd0;
|
| 552 |
+
assign link_tx_valid = 1'b0;
|
| 553 |
+
assign link_rx_ready = 1'b0;
|
| 554 |
+
end
|
| 555 |
+
endgenerate
|
| 556 |
+
|
| 557 |
+
endmodule
|
rtl/neuron_core.v
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ============================================================================
|
| 2 |
+
// Neuron Core
|
| 3 |
+
// ============================================================================
|
| 4 |
+
//
|
| 5 |
+
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 6 |
+
// Company No. 17054540 — UK Patent Application No. 2602902.6
|
| 7 |
+
//
|
| 8 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
| 9 |
+
// you may not use this file except in compliance with the License.
|
| 10 |
+
// You may obtain a copy of the License at
|
| 11 |
+
//
|
| 12 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
| 13 |
+
//
|
| 14 |
+
// Unless required by applicable law or agreed to in writing, software
|
| 15 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
| 16 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 17 |
+
// See the License for the specific language governing permissions and
|
| 18 |
+
// limitations under the License.
|
| 19 |
+
// ============================================================================
|
| 20 |
+
|
| 21 |
+
module neuron_core #(
|
| 22 |
+
parameter NUM_NEURONS = 4,
|
| 23 |
+
parameter DATA_WIDTH = 16,
|
| 24 |
+
parameter THRESHOLD = 16'd1000,
|
| 25 |
+
parameter LEAK_RATE = 16'd2
|
| 26 |
+
)(
|
| 27 |
+
input wire clk,
|
| 28 |
+
input wire rst_n,
|
| 29 |
+
input wire enable,
|
| 30 |
+
|
| 31 |
+
input wire signed [DATA_WIDTH-1:0] ext_input_0,
|
| 32 |
+
input wire signed [DATA_WIDTH-1:0] ext_input_1,
|
| 33 |
+
input wire signed [DATA_WIDTH-1:0] ext_input_2,
|
| 34 |
+
input wire signed [DATA_WIDTH-1:0] ext_input_3,
|
| 35 |
+
|
| 36 |
+
input wire signed [DATA_WIDTH-1:0] weight_00, weight_01, weight_02, weight_03,
|
| 37 |
+
input wire signed [DATA_WIDTH-1:0] weight_10, weight_11, weight_12, weight_13,
|
| 38 |
+
input wire signed [DATA_WIDTH-1:0] weight_20, weight_21, weight_22, weight_23,
|
| 39 |
+
input wire signed [DATA_WIDTH-1:0] weight_30, weight_31, weight_32, weight_33,
|
| 40 |
+
|
| 41 |
+
output wire [NUM_NEURONS-1:0] spikes,
|
| 42 |
+
|
| 43 |
+
output wire [DATA_WIDTH-1:0] membrane_0,
|
| 44 |
+
output wire [DATA_WIDTH-1:0] membrane_1,
|
| 45 |
+
output wire [DATA_WIDTH-1:0] membrane_2,
|
| 46 |
+
output wire [DATA_WIDTH-1:0] membrane_3
|
| 47 |
+
);
|
| 48 |
+
|
| 49 |
+
wire signed [DATA_WIDTH-1:0] syn_current [0:3][0:3];
|
| 50 |
+
wire signed [DATA_WIDTH-1:0] total_input [0:3];
|
| 51 |
+
wire signed [DATA_WIDTH-1:0] weights [0:3][0:3];
|
| 52 |
+
|
| 53 |
+
assign weights[0][0] = weight_00; assign weights[0][1] = weight_01;
|
| 54 |
+
assign weights[0][2] = weight_02; assign weights[0][3] = weight_03;
|
| 55 |
+
assign weights[1][0] = weight_10; assign weights[1][1] = weight_11;
|
| 56 |
+
assign weights[1][2] = weight_12; assign weights[1][3] = weight_13;
|
| 57 |
+
assign weights[2][0] = weight_20; assign weights[2][1] = weight_21;
|
| 58 |
+
assign weights[2][2] = weight_22; assign weights[2][3] = weight_23;
|
| 59 |
+
assign weights[3][0] = weight_30; assign weights[3][1] = weight_31;
|
| 60 |
+
assign weights[3][2] = weight_32; assign weights[3][3] = weight_33;
|
| 61 |
+
|
| 62 |
+
wire signed [DATA_WIDTH-1:0] ext_inputs [0:3];
|
| 63 |
+
assign ext_inputs[0] = ext_input_0;
|
| 64 |
+
assign ext_inputs[1] = ext_input_1;
|
| 65 |
+
assign ext_inputs[2] = ext_input_2;
|
| 66 |
+
assign ext_inputs[3] = ext_input_3;
|
| 67 |
+
|
| 68 |
+
genvar src, dst;
|
| 69 |
+
generate
|
| 70 |
+
for (src = 0; src < NUM_NEURONS; src = src + 1) begin : syn_src
|
| 71 |
+
for (dst = 0; dst < NUM_NEURONS; dst = dst + 1) begin : syn_dst
|
| 72 |
+
synapse #(
|
| 73 |
+
.DATA_WIDTH(DATA_WIDTH)
|
| 74 |
+
) syn_inst (
|
| 75 |
+
.clk (clk),
|
| 76 |
+
.rst_n (rst_n),
|
| 77 |
+
.pre_spike (spikes[src]),
|
| 78 |
+
.weight (weights[src][dst]),
|
| 79 |
+
.post_current(syn_current[src][dst])
|
| 80 |
+
);
|
| 81 |
+
end
|
| 82 |
+
end
|
| 83 |
+
endgenerate
|
| 84 |
+
|
| 85 |
+
assign total_input[0] = ext_inputs[0] + syn_current[0][0] + syn_current[1][0] + syn_current[2][0] + syn_current[3][0];
|
| 86 |
+
assign total_input[1] = ext_inputs[1] + syn_current[0][1] + syn_current[1][1] + syn_current[2][1] + syn_current[3][1];
|
| 87 |
+
assign total_input[2] = ext_inputs[2] + syn_current[0][2] + syn_current[1][2] + syn_current[2][2] + syn_current[3][2];
|
| 88 |
+
assign total_input[3] = ext_inputs[3] + syn_current[0][3] + syn_current[1][3] + syn_current[2][3] + syn_current[3][3];
|
| 89 |
+
|
| 90 |
+
generate
|
| 91 |
+
for (dst = 0; dst < NUM_NEURONS; dst = dst + 1) begin : neurons
|
| 92 |
+
lif_neuron #(
|
| 93 |
+
.DATA_WIDTH (DATA_WIDTH),
|
| 94 |
+
.THRESHOLD (THRESHOLD),
|
| 95 |
+
.LEAK_RATE (LEAK_RATE)
|
| 96 |
+
) neuron_inst (
|
| 97 |
+
.clk (clk),
|
| 98 |
+
.rst_n (rst_n),
|
| 99 |
+
.enable (enable),
|
| 100 |
+
.synaptic_input (total_input[dst]),
|
| 101 |
+
.spike (spikes[dst]),
|
| 102 |
+
.membrane_pot ()
|
| 103 |
+
);
|
| 104 |
+
end
|
| 105 |
+
endgenerate
|
| 106 |
+
|
| 107 |
+
assign membrane_0 = neurons[0].neuron_inst.membrane_pot;
|
| 108 |
+
assign membrane_1 = neurons[1].neuron_inst.membrane_pot;
|
| 109 |
+
assign membrane_2 = neurons[2].neuron_inst.membrane_pot;
|
| 110 |
+
assign membrane_3 = neurons[3].neuron_inst.membrane_pot;
|
| 111 |
+
|
| 112 |
+
endmodule
|
rtl/neuron_core_stdp.v
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ============================================================================
|
| 2 |
+
// Neuron Core with STDP Learning
|
| 3 |
+
// ============================================================================
|
| 4 |
+
//
|
| 5 |
+
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 6 |
+
// Company No. 17054540 — UK Patent Application No. 2602902.6
|
| 7 |
+
//
|
| 8 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
| 9 |
+
// you may not use this file except in compliance with the License.
|
| 10 |
+
// You may obtain a copy of the License at
|
| 11 |
+
//
|
| 12 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
| 13 |
+
//
|
| 14 |
+
// Unless required by applicable law or agreed to in writing, software
|
| 15 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
| 16 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 17 |
+
// See the License for the specific language governing permissions and
|
| 18 |
+
// limitations under the License.
|
| 19 |
+
// ============================================================================
|
| 20 |
+
|
| 21 |
+
module neuron_core_stdp #(
|
| 22 |
+
parameter NUM_NEURONS = 4,
|
| 23 |
+
parameter DATA_WIDTH = 16,
|
| 24 |
+
parameter THRESHOLD = 16'd1000,
|
| 25 |
+
parameter LEAK_RATE = 16'd2,
|
| 26 |
+
parameter WEIGHT_INIT = 16'd100,
|
| 27 |
+
parameter WEIGHT_MAX = 16'd800,
|
| 28 |
+
parameter LEARN_RATE = 8'd3
|
| 29 |
+
)(
|
| 30 |
+
input wire clk,
|
| 31 |
+
input wire rst_n,
|
| 32 |
+
input wire enable,
|
| 33 |
+
input wire learn_enable,
|
| 34 |
+
|
| 35 |
+
input wire signed [DATA_WIDTH-1:0] ext_input_0,
|
| 36 |
+
input wire signed [DATA_WIDTH-1:0] ext_input_1,
|
| 37 |
+
input wire signed [DATA_WIDTH-1:0] ext_input_2,
|
| 38 |
+
input wire signed [DATA_WIDTH-1:0] ext_input_3,
|
| 39 |
+
|
| 40 |
+
output wire [NUM_NEURONS-1:0] spikes,
|
| 41 |
+
|
| 42 |
+
output wire [DATA_WIDTH-1:0] membrane_0,
|
| 43 |
+
output wire [DATA_WIDTH-1:0] membrane_1,
|
| 44 |
+
output wire [DATA_WIDTH-1:0] membrane_2,
|
| 45 |
+
output wire [DATA_WIDTH-1:0] membrane_3,
|
| 46 |
+
|
| 47 |
+
output wire signed [DATA_WIDTH-1:0] w_out_01, w_out_02, w_out_03,
|
| 48 |
+
output wire signed [DATA_WIDTH-1:0] w_out_10, w_out_12, w_out_13,
|
| 49 |
+
output wire signed [DATA_WIDTH-1:0] w_out_20, w_out_21, w_out_23,
|
| 50 |
+
output wire signed [DATA_WIDTH-1:0] w_out_30, w_out_31, w_out_32
|
| 51 |
+
);
|
| 52 |
+
|
| 53 |
+
wire signed [DATA_WIDTH-1:0] syn_current [0:3][0:3];
|
| 54 |
+
wire signed [DATA_WIDTH-1:0] syn_weight [0:3][0:3];
|
| 55 |
+
wire signed [DATA_WIDTH-1:0] total_input [0:3];
|
| 56 |
+
|
| 57 |
+
wire signed [DATA_WIDTH-1:0] ext_inputs [0:3];
|
| 58 |
+
assign ext_inputs[0] = ext_input_0;
|
| 59 |
+
assign ext_inputs[1] = ext_input_1;
|
| 60 |
+
assign ext_inputs[2] = ext_input_2;
|
| 61 |
+
assign ext_inputs[3] = ext_input_3;
|
| 62 |
+
|
| 63 |
+
genvar src, dst;
|
| 64 |
+
generate
|
| 65 |
+
for (src = 0; src < NUM_NEURONS; src = src + 1) begin : syn_src
|
| 66 |
+
for (dst = 0; dst < NUM_NEURONS; dst = dst + 1) begin : syn_dst
|
| 67 |
+
if (src != dst) begin : real_syn
|
| 68 |
+
stdp_synapse #(
|
| 69 |
+
.DATA_WIDTH (DATA_WIDTH),
|
| 70 |
+
.WEIGHT_INIT (WEIGHT_INIT),
|
| 71 |
+
.WEIGHT_MAX (WEIGHT_MAX),
|
| 72 |
+
.LEARN_RATE (LEARN_RATE)
|
| 73 |
+
) syn_inst (
|
| 74 |
+
.clk (clk),
|
| 75 |
+
.rst_n (rst_n),
|
| 76 |
+
.learn_enable (learn_enable),
|
| 77 |
+
.pre_spike (spikes[src]),
|
| 78 |
+
.post_spike (spikes[dst]),
|
| 79 |
+
.weight (syn_weight[src][dst]),
|
| 80 |
+
.post_current (syn_current[src][dst]),
|
| 81 |
+
.pre_trace_out (),
|
| 82 |
+
.post_trace_out()
|
| 83 |
+
);
|
| 84 |
+
end else begin : no_self
|
| 85 |
+
assign syn_current[src][dst] = 0;
|
| 86 |
+
assign syn_weight[src][dst] = 0;
|
| 87 |
+
end
|
| 88 |
+
end
|
| 89 |
+
end
|
| 90 |
+
endgenerate
|
| 91 |
+
|
| 92 |
+
assign total_input[0] = ext_inputs[0] + syn_current[0][0] + syn_current[1][0] + syn_current[2][0] + syn_current[3][0];
|
| 93 |
+
assign total_input[1] = ext_inputs[1] + syn_current[0][1] + syn_current[1][1] + syn_current[2][1] + syn_current[3][1];
|
| 94 |
+
assign total_input[2] = ext_inputs[2] + syn_current[0][2] + syn_current[1][2] + syn_current[2][2] + syn_current[3][2];
|
| 95 |
+
assign total_input[3] = ext_inputs[3] + syn_current[0][3] + syn_current[1][3] + syn_current[2][3] + syn_current[3][3];
|
| 96 |
+
|
| 97 |
+
generate
|
| 98 |
+
for (dst = 0; dst < NUM_NEURONS; dst = dst + 1) begin : neurons
|
| 99 |
+
lif_neuron #(
|
| 100 |
+
.DATA_WIDTH (DATA_WIDTH),
|
| 101 |
+
.THRESHOLD (THRESHOLD),
|
| 102 |
+
.LEAK_RATE (LEAK_RATE)
|
| 103 |
+
) neuron_inst (
|
| 104 |
+
.clk (clk),
|
| 105 |
+
.rst_n (rst_n),
|
| 106 |
+
.enable (enable),
|
| 107 |
+
.synaptic_input (total_input[dst]),
|
| 108 |
+
.spike (spikes[dst]),
|
| 109 |
+
.membrane_pot ()
|
| 110 |
+
);
|
| 111 |
+
end
|
| 112 |
+
endgenerate
|
| 113 |
+
|
| 114 |
+
assign membrane_0 = neurons[0].neuron_inst.membrane_pot;
|
| 115 |
+
assign membrane_1 = neurons[1].neuron_inst.membrane_pot;
|
| 116 |
+
assign membrane_2 = neurons[2].neuron_inst.membrane_pot;
|
| 117 |
+
assign membrane_3 = neurons[3].neuron_inst.membrane_pot;
|
| 118 |
+
|
| 119 |
+
assign w_out_01 = syn_weight[0][1];
|
| 120 |
+
assign w_out_02 = syn_weight[0][2];
|
| 121 |
+
assign w_out_03 = syn_weight[0][3];
|
| 122 |
+
assign w_out_10 = syn_weight[1][0];
|
| 123 |
+
assign w_out_12 = syn_weight[1][2];
|
| 124 |
+
assign w_out_13 = syn_weight[1][3];
|
| 125 |
+
assign w_out_20 = syn_weight[2][0];
|
| 126 |
+
assign w_out_21 = syn_weight[2][1];
|
| 127 |
+
assign w_out_23 = syn_weight[2][3];
|
| 128 |
+
assign w_out_30 = syn_weight[3][0];
|
| 129 |
+
assign w_out_31 = syn_weight[3][1];
|
| 130 |
+
assign w_out_32 = syn_weight[3][2];
|
| 131 |
+
|
| 132 |
+
endmodule
|
rtl/rv32i_core.v
ADDED
|
@@ -0,0 +1,751 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ============================================================================
|
| 2 |
+
// RV32I Core
|
| 3 |
+
// ============================================================================
|
| 4 |
+
//
|
| 5 |
+
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 6 |
+
// Company No. 17054540 — UK Patent Application No. 2602902.6
|
| 7 |
+
//
|
| 8 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
| 9 |
+
// you may not use this file except in compliance with the License.
|
| 10 |
+
// You may obtain a copy of the License at
|
| 11 |
+
//
|
| 12 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
| 13 |
+
//
|
| 14 |
+
// Unless required by applicable law or agreed to in writing, software
|
| 15 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
| 16 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 17 |
+
// See the License for the specific language governing permissions and
|
| 18 |
+
// limitations under the License.
|
| 19 |
+
// ============================================================================
|
| 20 |
+
|
| 21 |
+
`timescale 1ns/1ps
|
| 22 |
+
|
| 23 |
+
module rv32i_core #(
|
| 24 |
+
parameter IMEM_DEPTH = 65536,
|
| 25 |
+
parameter IMEM_ADDR_BITS = 16,
|
| 26 |
+
parameter DMEM_DEPTH = 65536,
|
| 27 |
+
parameter DMEM_ADDR_BITS = 16
|
| 28 |
+
)(
|
| 29 |
+
input wire clk,
|
| 30 |
+
input wire rst_n,
|
| 31 |
+
input wire enable,
|
| 32 |
+
input wire imem_we,
|
| 33 |
+
input wire [IMEM_ADDR_BITS-1:0] imem_waddr,
|
| 34 |
+
input wire [31:0] imem_wdata,
|
| 35 |
+
output reg mmio_valid,
|
| 36 |
+
output reg mmio_we,
|
| 37 |
+
output reg [15:0] mmio_addr,
|
| 38 |
+
output reg [31:0] mmio_wdata,
|
| 39 |
+
input wire [31:0] mmio_rdata,
|
| 40 |
+
input wire mmio_ready,
|
| 41 |
+
output wire halted,
|
| 42 |
+
output wire [31:0] pc_out,
|
| 43 |
+
input wire [31:0] debug_bp_addr_0,
|
| 44 |
+
input wire [31:0] debug_bp_addr_1,
|
| 45 |
+
input wire [31:0] debug_bp_addr_2,
|
| 46 |
+
input wire [31:0] debug_bp_addr_3,
|
| 47 |
+
input wire [3:0] debug_bp_enable,
|
| 48 |
+
input wire debug_resume,
|
| 49 |
+
input wire debug_halt_req,
|
| 50 |
+
input wire debug_single_step
|
| 51 |
+
);
|
| 52 |
+
|
| 53 |
+
reg [31:0] regfile [0:31];
|
| 54 |
+
|
| 55 |
+
reg [31:0] fregfile [0:31];
|
| 56 |
+
|
| 57 |
+
reg [31:0] imem [0:IMEM_DEPTH-1];
|
| 58 |
+
|
| 59 |
+
always @(posedge clk) begin
|
| 60 |
+
if (imem_we)
|
| 61 |
+
imem[imem_waddr] <= imem_wdata;
|
| 62 |
+
end
|
| 63 |
+
|
| 64 |
+
reg [31:0] dmem [0:DMEM_DEPTH-1];
|
| 65 |
+
|
| 66 |
+
reg [31:0] pc;
|
| 67 |
+
reg [31:0] instr;
|
| 68 |
+
reg fetch_valid;
|
| 69 |
+
reg halt_r;
|
| 70 |
+
|
| 71 |
+
assign pc_out = pc;
|
| 72 |
+
assign halted = halt_r;
|
| 73 |
+
|
| 74 |
+
wire [IMEM_ADDR_BITS-1:0] pc_word = pc[IMEM_ADDR_BITS+1:2];
|
| 75 |
+
wire [31:0] fetched_instr = imem[pc_word];
|
| 76 |
+
|
| 77 |
+
wire [6:0] opcode = instr[6:0];
|
| 78 |
+
wire [4:0] rd = instr[11:7];
|
| 79 |
+
wire [2:0] funct3 = instr[14:12];
|
| 80 |
+
wire [4:0] rs1 = instr[19:15];
|
| 81 |
+
wire [4:0] rs2 = instr[24:20];
|
| 82 |
+
wire [6:0] funct7 = instr[31:25];
|
| 83 |
+
|
| 84 |
+
wire [31:0] imm_i = {{20{instr[31]}}, instr[31:20]};
|
| 85 |
+
wire [31:0] imm_s = {{20{instr[31]}}, instr[31:25], instr[11:7]};
|
| 86 |
+
wire [31:0] imm_b = {{19{instr[31]}}, instr[31], instr[7], instr[30:25], instr[11:8], 1'b0};
|
| 87 |
+
wire [31:0] imm_u = {instr[31:12], 12'b0};
|
| 88 |
+
wire [31:0] imm_j = {{11{instr[31]}}, instr[31], instr[19:12], instr[20], instr[30:21], 1'b0};
|
| 89 |
+
|
| 90 |
+
wire [31:0] rs1_val = (rs1 == 5'd0) ? 32'd0 : regfile[rs1];
|
| 91 |
+
wire [31:0] rs2_val = (rs2 == 5'd0) ? 32'd0 : regfile[rs2];
|
| 92 |
+
|
| 93 |
+
localparam OP_LUI = 7'b0110111;
|
| 94 |
+
localparam OP_AUIPC = 7'b0010111;
|
| 95 |
+
localparam OP_JAL = 7'b1101111;
|
| 96 |
+
localparam OP_JALR = 7'b1100111;
|
| 97 |
+
localparam OP_BRANCH = 7'b1100011;
|
| 98 |
+
localparam OP_LOAD = 7'b0000011;
|
| 99 |
+
localparam OP_STORE = 7'b0100011;
|
| 100 |
+
localparam OP_IMM = 7'b0010011;
|
| 101 |
+
localparam OP_REG = 7'b0110011;
|
| 102 |
+
localparam OP_FENCE = 7'b0001111;
|
| 103 |
+
localparam OP_SYSTEM = 7'b1110011;
|
| 104 |
+
|
| 105 |
+
localparam OP_FLW = 7'b0000111;
|
| 106 |
+
localparam OP_FSW = 7'b0100111;
|
| 107 |
+
localparam OP_FP = 7'b1010011;
|
| 108 |
+
|
| 109 |
+
function real f32_to_real;
|
| 110 |
+
input [31:0] f;
|
| 111 |
+
reg [63:0] d;
|
| 112 |
+
begin
|
| 113 |
+
if (f[30:0] == 31'd0) begin
|
| 114 |
+
d = {f[31], 63'd0};
|
| 115 |
+
end else if (f[30:23] == 8'hFF) begin
|
| 116 |
+
d = {f[31], 11'h7FF, f[22:0], 29'd0};
|
| 117 |
+
end else begin
|
| 118 |
+
d[63] = f[31];
|
| 119 |
+
d[62:52] = {3'd0, f[30:23]} + 11'd896;
|
| 120 |
+
d[51:0] = {f[22:0], 29'd0};
|
| 121 |
+
end
|
| 122 |
+
f32_to_real = $bitstoreal(d);
|
| 123 |
+
end
|
| 124 |
+
endfunction
|
| 125 |
+
|
| 126 |
+
function [31:0] real_to_f32;
|
| 127 |
+
input real r;
|
| 128 |
+
reg [63:0] d;
|
| 129 |
+
reg [10:0] dexp;
|
| 130 |
+
reg [7:0] fexp;
|
| 131 |
+
begin
|
| 132 |
+
d = $realtobits(r);
|
| 133 |
+
if (d[62:0] == 63'd0) begin
|
| 134 |
+
real_to_f32 = {d[63], 31'd0};
|
| 135 |
+
end else begin
|
| 136 |
+
dexp = d[62:52];
|
| 137 |
+
if (dexp >= 11'd1151) begin
|
| 138 |
+
real_to_f32 = {d[63], 8'hFF, 23'd0};
|
| 139 |
+
end else if (dexp <= 11'd896) begin
|
| 140 |
+
real_to_f32 = {d[63], 31'd0};
|
| 141 |
+
end else begin
|
| 142 |
+
fexp = dexp - 11'd896;
|
| 143 |
+
real_to_f32 = {d[63], fexp, d[51:29]};
|
| 144 |
+
end
|
| 145 |
+
end
|
| 146 |
+
end
|
| 147 |
+
endfunction
|
| 148 |
+
|
| 149 |
+
function real fp_sqrt;
|
| 150 |
+
input real x;
|
| 151 |
+
real guess;
|
| 152 |
+
integer i;
|
| 153 |
+
begin
|
| 154 |
+
if (x <= 0.0) begin
|
| 155 |
+
fp_sqrt = 0.0;
|
| 156 |
+
end else begin
|
| 157 |
+
guess = x;
|
| 158 |
+
for (i = 0; i < 25; i = i + 1)
|
| 159 |
+
guess = (guess + x / guess) / 2.0;
|
| 160 |
+
fp_sqrt = guess;
|
| 161 |
+
end
|
| 162 |
+
end
|
| 163 |
+
endfunction
|
| 164 |
+
|
| 165 |
+
wire is_muldiv = (opcode == OP_REG) && (funct7 == 7'b0000001);
|
| 166 |
+
|
| 167 |
+
wire signed [63:0] mul_ss = $signed(rs1_val) * $signed(rs2_val);
|
| 168 |
+
wire [63:0] mul_uu = rs1_val * rs2_val;
|
| 169 |
+
wire signed [63:0] mul_su = $signed(rs1_val) * $signed({1'b0, rs2_val});
|
| 170 |
+
|
| 171 |
+
wire signed [31:0] div_s = (rs2_val == 0) ? -32'sd1 :
|
| 172 |
+
(rs1_val == 32'h80000000 && rs2_val == 32'hFFFFFFFF) ? 32'h80000000 :
|
| 173 |
+
$signed(rs1_val) / $signed(rs2_val);
|
| 174 |
+
wire [31:0] div_u = (rs2_val == 0) ? 32'hFFFFFFFF : rs1_val / rs2_val;
|
| 175 |
+
wire signed [31:0] rem_s = (rs2_val == 0) ? $signed(rs1_val) :
|
| 176 |
+
(rs1_val == 32'h80000000 && rs2_val == 32'hFFFFFFFF) ? 32'sd0 :
|
| 177 |
+
$signed(rs1_val) % $signed(rs2_val);
|
| 178 |
+
wire [31:0] rem_u = (rs2_val == 0) ? rs1_val : rs1_val % rs2_val;
|
| 179 |
+
|
| 180 |
+
reg [31:0] muldiv_result;
|
| 181 |
+
always @(*) begin
|
| 182 |
+
case (funct3)
|
| 183 |
+
3'b000: muldiv_result = mul_ss[31:0];
|
| 184 |
+
3'b001: muldiv_result = mul_ss[63:32];
|
| 185 |
+
3'b010: muldiv_result = mul_su[63:32];
|
| 186 |
+
3'b011: muldiv_result = mul_uu[63:32];
|
| 187 |
+
3'b100: muldiv_result = div_s;
|
| 188 |
+
3'b101: muldiv_result = div_u;
|
| 189 |
+
3'b110: muldiv_result = rem_s;
|
| 190 |
+
3'b111: muldiv_result = rem_u;
|
| 191 |
+
endcase
|
| 192 |
+
end
|
| 193 |
+
|
| 194 |
+
reg [31:0] csr_mtvec;
|
| 195 |
+
reg [31:0] csr_mepc;
|
| 196 |
+
reg [31:0] csr_mcause;
|
| 197 |
+
reg [31:0] csr_mstatus;
|
| 198 |
+
reg [31:0] csr_mie;
|
| 199 |
+
reg [31:0] csr_mip;
|
| 200 |
+
reg [63:0] csr_mcycle;
|
| 201 |
+
reg [63:0] csr_mtimecmp;
|
| 202 |
+
|
| 203 |
+
localparam CSR_MSTATUS = 12'h300;
|
| 204 |
+
localparam CSR_MIE = 12'h304;
|
| 205 |
+
localparam CSR_MTVEC = 12'h305;
|
| 206 |
+
localparam CSR_MEPC = 12'h341;
|
| 207 |
+
localparam CSR_MCAUSE = 12'h342;
|
| 208 |
+
localparam CSR_MIP = 12'h344;
|
| 209 |
+
localparam CSR_MCYCLE = 12'hB00;
|
| 210 |
+
localparam CSR_MCYCLEH = 12'hB80;
|
| 211 |
+
localparam CSR_MTIMECMP = 12'h7C0;
|
| 212 |
+
localparam CSR_MTIMECMPH = 12'h7C1;
|
| 213 |
+
|
| 214 |
+
wire [11:0] csr_addr = instr[31:20];
|
| 215 |
+
wire [4:0] csr_zimm = rs1;
|
| 216 |
+
|
| 217 |
+
reg [31:0] csr_rdata;
|
| 218 |
+
always @(*) begin
|
| 219 |
+
case (csr_addr)
|
| 220 |
+
CSR_MSTATUS: csr_rdata = csr_mstatus;
|
| 221 |
+
CSR_MIE: csr_rdata = csr_mie;
|
| 222 |
+
CSR_MTVEC: csr_rdata = csr_mtvec;
|
| 223 |
+
CSR_MEPC: csr_rdata = csr_mepc;
|
| 224 |
+
CSR_MCAUSE: csr_rdata = csr_mcause;
|
| 225 |
+
CSR_MIP: csr_rdata = csr_mip;
|
| 226 |
+
CSR_MCYCLE: csr_rdata = csr_mcycle[31:0];
|
| 227 |
+
CSR_MCYCLEH: csr_rdata = csr_mcycle[63:32];
|
| 228 |
+
CSR_MTIMECMP: csr_rdata = csr_mtimecmp[31:0];
|
| 229 |
+
CSR_MTIMECMPH:csr_rdata = csr_mtimecmp[63:32];
|
| 230 |
+
default: csr_rdata = 32'd0;
|
| 231 |
+
endcase
|
| 232 |
+
end
|
| 233 |
+
|
| 234 |
+
wire timer_pending = (csr_mcycle >= csr_mtimecmp);
|
| 235 |
+
|
| 236 |
+
wire timer_irq = timer_pending && csr_mstatus[3] && csr_mie[7];
|
| 237 |
+
|
| 238 |
+
wire [31:0] alu_b = (opcode == OP_REG) ? rs2_val : imm_i;
|
| 239 |
+
wire [4:0] shamt = alu_b[4:0];
|
| 240 |
+
|
| 241 |
+
reg [31:0] alu_result;
|
| 242 |
+
always @(*) begin
|
| 243 |
+
case (funct3)
|
| 244 |
+
3'b000: alu_result = (opcode == OP_REG && funct7[5]) ?
|
| 245 |
+
(rs1_val - rs2_val) : (rs1_val + alu_b);
|
| 246 |
+
3'b001: alu_result = rs1_val << shamt;
|
| 247 |
+
3'b010: alu_result = ($signed(rs1_val) < $signed(alu_b)) ? 32'd1 : 32'd0;
|
| 248 |
+
3'b011: alu_result = (rs1_val < alu_b) ? 32'd1 : 32'd0;
|
| 249 |
+
3'b100: alu_result = rs1_val ^ alu_b;
|
| 250 |
+
3'b101: alu_result = funct7[5] ? ($signed(rs1_val) >>> shamt) :
|
| 251 |
+
(rs1_val >> shamt);
|
| 252 |
+
3'b110: alu_result = rs1_val | alu_b;
|
| 253 |
+
3'b111: alu_result = rs1_val & alu_b;
|
| 254 |
+
default: alu_result = 32'd0;
|
| 255 |
+
endcase
|
| 256 |
+
end
|
| 257 |
+
|
| 258 |
+
reg branch_taken;
|
| 259 |
+
always @(*) begin
|
| 260 |
+
case (funct3)
|
| 261 |
+
3'b000: branch_taken = (rs1_val == rs2_val);
|
| 262 |
+
3'b001: branch_taken = (rs1_val != rs2_val);
|
| 263 |
+
3'b100: branch_taken = ($signed(rs1_val) < $signed(rs2_val));
|
| 264 |
+
3'b101: branch_taken = ($signed(rs1_val) >= $signed(rs2_val));
|
| 265 |
+
3'b110: branch_taken = (rs1_val < rs2_val);
|
| 266 |
+
3'b111: branch_taken = (rs1_val >= rs2_val);
|
| 267 |
+
default: branch_taken = 1'b0;
|
| 268 |
+
endcase
|
| 269 |
+
end
|
| 270 |
+
|
| 271 |
+
wire [31:0] mem_addr = rs1_val + ((opcode == OP_STORE) ? imm_s : imm_i);
|
| 272 |
+
wire is_mmio = (mem_addr[31:16] == 16'hFFFF);
|
| 273 |
+
wire [DMEM_ADDR_BITS-1:0] dmem_word_addr = mem_addr[DMEM_ADDR_BITS+1:2];
|
| 274 |
+
|
| 275 |
+
localparam S_FETCH = 4'd0;
|
| 276 |
+
localparam S_EXEC = 4'd1;
|
| 277 |
+
localparam S_MEM_RD = 4'd2;
|
| 278 |
+
localparam S_MEM_WR = 4'd3;
|
| 279 |
+
localparam S_HALT = 4'd4;
|
| 280 |
+
localparam S_TRAP = 4'd5;
|
| 281 |
+
localparam S_DEBUG_HALT = 4'd6;
|
| 282 |
+
|
| 283 |
+
reg [3:0] state;
|
| 284 |
+
|
| 285 |
+
reg debug_single_step_pending;
|
| 286 |
+
|
| 287 |
+
wire bp_match = (debug_bp_enable[0] && (pc == debug_bp_addr_0)) ||
|
| 288 |
+
(debug_bp_enable[1] && (pc == debug_bp_addr_1)) ||
|
| 289 |
+
(debug_bp_enable[2] && (pc == debug_bp_addr_2)) ||
|
| 290 |
+
(debug_bp_enable[3] && (pc == debug_bp_addr_3));
|
| 291 |
+
|
| 292 |
+
real fp_op_a, fp_op_b, fp_op_r;
|
| 293 |
+
reg mem_rd_is_float;
|
| 294 |
+
|
| 295 |
+
integer ri;
|
| 296 |
+
always @(posedge clk or negedge rst_n) begin
|
| 297 |
+
if (!rst_n) begin
|
| 298 |
+
pc <= 32'd0;
|
| 299 |
+
instr <= 32'd0;
|
| 300 |
+
fetch_valid <= 1'b0;
|
| 301 |
+
halt_r <= 1'b0;
|
| 302 |
+
state <= S_FETCH;
|
| 303 |
+
mmio_valid <= 1'b0;
|
| 304 |
+
mmio_we <= 1'b0;
|
| 305 |
+
mmio_addr <= 16'd0;
|
| 306 |
+
mmio_wdata <= 32'd0;
|
| 307 |
+
|
| 308 |
+
csr_mtvec <= 32'd0;
|
| 309 |
+
csr_mepc <= 32'd0;
|
| 310 |
+
csr_mcause <= 32'd0;
|
| 311 |
+
csr_mstatus <= 32'd0;
|
| 312 |
+
csr_mie <= 32'd0;
|
| 313 |
+
csr_mip <= 32'd0;
|
| 314 |
+
csr_mcycle <= 64'd0;
|
| 315 |
+
csr_mtimecmp <= 64'hFFFFFFFF_FFFFFFFF;
|
| 316 |
+
mem_rd_is_float <= 1'b0;
|
| 317 |
+
debug_single_step_pending <= 1'b0;
|
| 318 |
+
for (ri = 0; ri < 32; ri = ri + 1) begin
|
| 319 |
+
regfile[ri] <= 32'd0;
|
| 320 |
+
fregfile[ri] <= 32'd0;
|
| 321 |
+
end
|
| 322 |
+
end else if (!enable) begin
|
| 323 |
+
state <= S_FETCH;
|
| 324 |
+
pc <= 32'd0;
|
| 325 |
+
halt_r <= 1'b0;
|
| 326 |
+
mmio_valid <= 1'b0;
|
| 327 |
+
mem_rd_is_float <= 1'b0;
|
| 328 |
+
csr_mcycle <= 64'd0;
|
| 329 |
+
debug_single_step_pending <= 1'b0;
|
| 330 |
+
end else begin
|
| 331 |
+
|
| 332 |
+
csr_mcycle <= csr_mcycle + 64'd1;
|
| 333 |
+
|
| 334 |
+
csr_mip[7] <= timer_pending;
|
| 335 |
+
|
| 336 |
+
case (state)
|
| 337 |
+
S_FETCH: begin
|
| 338 |
+
|
| 339 |
+
if (debug_halt_req) begin
|
| 340 |
+
halt_r <= 1'b1;
|
| 341 |
+
state <= S_DEBUG_HALT;
|
| 342 |
+
end
|
| 343 |
+
|
| 344 |
+
else if (bp_match) begin
|
| 345 |
+
halt_r <= 1'b1;
|
| 346 |
+
state <= S_DEBUG_HALT;
|
| 347 |
+
end
|
| 348 |
+
|
| 349 |
+
else if (debug_single_step_pending) begin
|
| 350 |
+
debug_single_step_pending <= 1'b0;
|
| 351 |
+
halt_r <= 1'b1;
|
| 352 |
+
state <= S_DEBUG_HALT;
|
| 353 |
+
end
|
| 354 |
+
|
| 355 |
+
else if (timer_irq) begin
|
| 356 |
+
csr_mepc <= pc;
|
| 357 |
+
csr_mcause <= 32'h80000007;
|
| 358 |
+
csr_mstatus[3] <= 1'b0;
|
| 359 |
+
csr_mstatus[7] <= csr_mstatus[3];
|
| 360 |
+
pc <= csr_mtvec & ~32'd3;
|
| 361 |
+
state <= S_FETCH;
|
| 362 |
+
end else begin
|
| 363 |
+
instr <= fetched_instr;
|
| 364 |
+
fetch_valid <= 1'b1;
|
| 365 |
+
state <= S_EXEC;
|
| 366 |
+
end
|
| 367 |
+
end
|
| 368 |
+
|
| 369 |
+
S_EXEC: begin
|
| 370 |
+
mmio_valid <= 1'b0;
|
| 371 |
+
|
| 372 |
+
case (opcode)
|
| 373 |
+
OP_LUI: begin
|
| 374 |
+
if (rd != 0) regfile[rd] <= imm_u;
|
| 375 |
+
pc <= pc + 4;
|
| 376 |
+
state <= S_FETCH;
|
| 377 |
+
end
|
| 378 |
+
|
| 379 |
+
OP_AUIPC: begin
|
| 380 |
+
if (rd != 0) regfile[rd] <= pc + imm_u;
|
| 381 |
+
pc <= pc + 4;
|
| 382 |
+
state <= S_FETCH;
|
| 383 |
+
end
|
| 384 |
+
|
| 385 |
+
OP_JAL: begin
|
| 386 |
+
if (rd != 0) regfile[rd] <= pc + 4;
|
| 387 |
+
pc <= pc + imm_j;
|
| 388 |
+
state <= S_FETCH;
|
| 389 |
+
end
|
| 390 |
+
|
| 391 |
+
OP_JALR: begin
|
| 392 |
+
if (rd != 0) regfile[rd] <= pc + 4;
|
| 393 |
+
pc <= (rs1_val + imm_i) & ~32'd1;
|
| 394 |
+
state <= S_FETCH;
|
| 395 |
+
end
|
| 396 |
+
|
| 397 |
+
OP_BRANCH: begin
|
| 398 |
+
pc <= branch_taken ? (pc + imm_b) : (pc + 4);
|
| 399 |
+
state <= S_FETCH;
|
| 400 |
+
end
|
| 401 |
+
|
| 402 |
+
OP_LOAD: begin
|
| 403 |
+
if (is_mmio) begin
|
| 404 |
+
mmio_valid <= 1'b1;
|
| 405 |
+
mmio_we <= 1'b0;
|
| 406 |
+
mmio_addr <= mem_addr[15:0];
|
| 407 |
+
mem_rd_is_float <= 1'b0;
|
| 408 |
+
state <= S_MEM_RD;
|
| 409 |
+
end else begin
|
| 410 |
+
|
| 411 |
+
if (rd != 0) begin
|
| 412 |
+
case (funct3)
|
| 413 |
+
3'b000: begin
|
| 414 |
+
case (mem_addr[1:0])
|
| 415 |
+
2'd0: regfile[rd] <= {{24{dmem[dmem_word_addr][7]}}, dmem[dmem_word_addr][7:0]};
|
| 416 |
+
2'd1: regfile[rd] <= {{24{dmem[dmem_word_addr][15]}}, dmem[dmem_word_addr][15:8]};
|
| 417 |
+
2'd2: regfile[rd] <= {{24{dmem[dmem_word_addr][23]}}, dmem[dmem_word_addr][23:16]};
|
| 418 |
+
2'd3: regfile[rd] <= {{24{dmem[dmem_word_addr][31]}}, dmem[dmem_word_addr][31:24]};
|
| 419 |
+
endcase
|
| 420 |
+
end
|
| 421 |
+
3'b001: begin
|
| 422 |
+
if (mem_addr[1])
|
| 423 |
+
regfile[rd] <= {{16{dmem[dmem_word_addr][31]}}, dmem[dmem_word_addr][31:16]};
|
| 424 |
+
else
|
| 425 |
+
regfile[rd] <= {{16{dmem[dmem_word_addr][15]}}, dmem[dmem_word_addr][15:0]};
|
| 426 |
+
end
|
| 427 |
+
3'b010: regfile[rd] <= dmem[dmem_word_addr];
|
| 428 |
+
3'b100: begin
|
| 429 |
+
case (mem_addr[1:0])
|
| 430 |
+
2'd0: regfile[rd] <= {24'd0, dmem[dmem_word_addr][7:0]};
|
| 431 |
+
2'd1: regfile[rd] <= {24'd0, dmem[dmem_word_addr][15:8]};
|
| 432 |
+
2'd2: regfile[rd] <= {24'd0, dmem[dmem_word_addr][23:16]};
|
| 433 |
+
2'd3: regfile[rd] <= {24'd0, dmem[dmem_word_addr][31:24]};
|
| 434 |
+
endcase
|
| 435 |
+
end
|
| 436 |
+
3'b101: begin
|
| 437 |
+
if (mem_addr[1])
|
| 438 |
+
regfile[rd] <= {16'd0, dmem[dmem_word_addr][31:16]};
|
| 439 |
+
else
|
| 440 |
+
regfile[rd] <= {16'd0, dmem[dmem_word_addr][15:0]};
|
| 441 |
+
end
|
| 442 |
+
default: ;
|
| 443 |
+
endcase
|
| 444 |
+
end
|
| 445 |
+
pc <= pc + 4;
|
| 446 |
+
state <= S_FETCH;
|
| 447 |
+
end
|
| 448 |
+
end
|
| 449 |
+
|
| 450 |
+
OP_STORE: begin
|
| 451 |
+
if (is_mmio) begin
|
| 452 |
+
mmio_valid <= 1'b1;
|
| 453 |
+
mmio_we <= 1'b1;
|
| 454 |
+
mmio_addr <= mem_addr[15:0];
|
| 455 |
+
mmio_wdata <= rs2_val;
|
| 456 |
+
state <= S_MEM_WR;
|
| 457 |
+
end else begin
|
| 458 |
+
case (funct3)
|
| 459 |
+
3'b000: begin
|
| 460 |
+
case (mem_addr[1:0])
|
| 461 |
+
2'd0: dmem[dmem_word_addr][7:0] <= rs2_val[7:0];
|
| 462 |
+
2'd1: dmem[dmem_word_addr][15:8] <= rs2_val[7:0];
|
| 463 |
+
2'd2: dmem[dmem_word_addr][23:16] <= rs2_val[7:0];
|
| 464 |
+
2'd3: dmem[dmem_word_addr][31:24] <= rs2_val[7:0];
|
| 465 |
+
endcase
|
| 466 |
+
end
|
| 467 |
+
3'b001: begin
|
| 468 |
+
if (mem_addr[1])
|
| 469 |
+
dmem[dmem_word_addr][31:16] <= rs2_val[15:0];
|
| 470 |
+
else
|
| 471 |
+
dmem[dmem_word_addr][15:0] <= rs2_val[15:0];
|
| 472 |
+
end
|
| 473 |
+
3'b010: dmem[dmem_word_addr] <= rs2_val;
|
| 474 |
+
default: ;
|
| 475 |
+
endcase
|
| 476 |
+
pc <= pc + 4;
|
| 477 |
+
state <= S_FETCH;
|
| 478 |
+
end
|
| 479 |
+
end
|
| 480 |
+
|
| 481 |
+
OP_IMM: begin
|
| 482 |
+
if (rd != 0) regfile[rd] <= alu_result;
|
| 483 |
+
pc <= pc + 4;
|
| 484 |
+
state <= S_FETCH;
|
| 485 |
+
end
|
| 486 |
+
|
| 487 |
+
OP_REG: begin
|
| 488 |
+
|
| 489 |
+
if (is_muldiv) begin
|
| 490 |
+
if (rd != 0) regfile[rd] <= muldiv_result;
|
| 491 |
+
end else begin
|
| 492 |
+
if (rd != 0) regfile[rd] <= alu_result;
|
| 493 |
+
end
|
| 494 |
+
pc <= pc + 4;
|
| 495 |
+
state <= S_FETCH;
|
| 496 |
+
end
|
| 497 |
+
|
| 498 |
+
OP_FENCE: begin
|
| 499 |
+
|
| 500 |
+
pc <= pc + 4;
|
| 501 |
+
state <= S_FETCH;
|
| 502 |
+
end
|
| 503 |
+
|
| 504 |
+
OP_SYSTEM: begin
|
| 505 |
+
if (funct3 == 3'b000) begin
|
| 506 |
+
|
| 507 |
+
if (instr[31:20] == 12'h302) begin
|
| 508 |
+
|
| 509 |
+
pc <= csr_mepc;
|
| 510 |
+
csr_mstatus[3] <= csr_mstatus[7];
|
| 511 |
+
csr_mstatus[7] <= 1'b1;
|
| 512 |
+
state <= S_FETCH;
|
| 513 |
+
end else begin
|
| 514 |
+
|
| 515 |
+
halt_r <= 1'b1;
|
| 516 |
+
state <= S_HALT;
|
| 517 |
+
end
|
| 518 |
+
end else begin
|
| 519 |
+
|
| 520 |
+
if (rd != 0) regfile[rd] <= csr_rdata;
|
| 521 |
+
|
| 522 |
+
case (funct3)
|
| 523 |
+
3'b001: begin
|
| 524 |
+
case (csr_addr)
|
| 525 |
+
CSR_MSTATUS: csr_mstatus <= rs1_val;
|
| 526 |
+
CSR_MIE: csr_mie <= rs1_val;
|
| 527 |
+
CSR_MTVEC: csr_mtvec <= rs1_val;
|
| 528 |
+
CSR_MEPC: csr_mepc <= rs1_val;
|
| 529 |
+
CSR_MCAUSE: csr_mcause <= rs1_val;
|
| 530 |
+
CSR_MTIMECMP: csr_mtimecmp[31:0] <= rs1_val;
|
| 531 |
+
CSR_MTIMECMPH:csr_mtimecmp[63:32] <= rs1_val;
|
| 532 |
+
default: ;
|
| 533 |
+
endcase
|
| 534 |
+
end
|
| 535 |
+
3'b010: begin
|
| 536 |
+
if (rs1 != 0) begin
|
| 537 |
+
case (csr_addr)
|
| 538 |
+
CSR_MSTATUS: csr_mstatus <= csr_mstatus | rs1_val;
|
| 539 |
+
CSR_MIE: csr_mie <= csr_mie | rs1_val;
|
| 540 |
+
CSR_MTVEC: csr_mtvec <= csr_mtvec | rs1_val;
|
| 541 |
+
default: ;
|
| 542 |
+
endcase
|
| 543 |
+
end
|
| 544 |
+
end
|
| 545 |
+
3'b011: begin
|
| 546 |
+
if (rs1 != 0) begin
|
| 547 |
+
case (csr_addr)
|
| 548 |
+
CSR_MSTATUS: csr_mstatus <= csr_mstatus & ~rs1_val;
|
| 549 |
+
CSR_MIE: csr_mie <= csr_mie & ~rs1_val;
|
| 550 |
+
default: ;
|
| 551 |
+
endcase
|
| 552 |
+
end
|
| 553 |
+
end
|
| 554 |
+
3'b101: begin
|
| 555 |
+
case (csr_addr)
|
| 556 |
+
CSR_MSTATUS: csr_mstatus <= {27'd0, csr_zimm};
|
| 557 |
+
CSR_MIE: csr_mie <= {27'd0, csr_zimm};
|
| 558 |
+
CSR_MTVEC: csr_mtvec <= {27'd0, csr_zimm};
|
| 559 |
+
default: ;
|
| 560 |
+
endcase
|
| 561 |
+
end
|
| 562 |
+
3'b110: begin
|
| 563 |
+
if (csr_zimm != 0) begin
|
| 564 |
+
case (csr_addr)
|
| 565 |
+
CSR_MSTATUS: csr_mstatus <= csr_mstatus | {27'd0, csr_zimm};
|
| 566 |
+
CSR_MIE: csr_mie <= csr_mie | {27'd0, csr_zimm};
|
| 567 |
+
default: ;
|
| 568 |
+
endcase
|
| 569 |
+
end
|
| 570 |
+
end
|
| 571 |
+
3'b111: begin
|
| 572 |
+
if (csr_zimm != 0) begin
|
| 573 |
+
case (csr_addr)
|
| 574 |
+
CSR_MSTATUS: csr_mstatus <= csr_mstatus & ~{27'd0, csr_zimm};
|
| 575 |
+
CSR_MIE: csr_mie <= csr_mie & ~{27'd0, csr_zimm};
|
| 576 |
+
default: ;
|
| 577 |
+
endcase
|
| 578 |
+
end
|
| 579 |
+
end
|
| 580 |
+
default: ;
|
| 581 |
+
endcase
|
| 582 |
+
|
| 583 |
+
pc <= pc + 4;
|
| 584 |
+
state <= S_FETCH;
|
| 585 |
+
end
|
| 586 |
+
end
|
| 587 |
+
|
| 588 |
+
OP_FLW: begin
|
| 589 |
+
if (is_mmio) begin
|
| 590 |
+
mmio_valid <= 1'b1;
|
| 591 |
+
mmio_we <= 1'b0;
|
| 592 |
+
mmio_addr <= mem_addr[15:0];
|
| 593 |
+
mem_rd_is_float <= 1'b1;
|
| 594 |
+
state <= S_MEM_RD;
|
| 595 |
+
end else begin
|
| 596 |
+
fregfile[rd] <= dmem[dmem_word_addr];
|
| 597 |
+
pc <= pc + 4;
|
| 598 |
+
state <= S_FETCH;
|
| 599 |
+
end
|
| 600 |
+
end
|
| 601 |
+
|
| 602 |
+
OP_FSW: begin
|
| 603 |
+
if (is_mmio) begin
|
| 604 |
+
mmio_valid <= 1'b1;
|
| 605 |
+
mmio_we <= 1'b1;
|
| 606 |
+
mmio_addr <= mem_addr[15:0];
|
| 607 |
+
mmio_wdata <= fregfile[rs2];
|
| 608 |
+
state <= S_MEM_WR;
|
| 609 |
+
end else begin
|
| 610 |
+
dmem[dmem_word_addr] <= fregfile[rs2];
|
| 611 |
+
pc <= pc + 4;
|
| 612 |
+
state <= S_FETCH;
|
| 613 |
+
end
|
| 614 |
+
end
|
| 615 |
+
|
| 616 |
+
OP_FP: begin
|
| 617 |
+
case (funct7)
|
| 618 |
+
7'b0000000: begin
|
| 619 |
+
fp_op_a = f32_to_real(fregfile[rs1]);
|
| 620 |
+
fp_op_b = f32_to_real(fregfile[rs2]);
|
| 621 |
+
fregfile[rd] <= real_to_f32(fp_op_a + fp_op_b);
|
| 622 |
+
end
|
| 623 |
+
7'b0000100: begin
|
| 624 |
+
fp_op_a = f32_to_real(fregfile[rs1]);
|
| 625 |
+
fp_op_b = f32_to_real(fregfile[rs2]);
|
| 626 |
+
fregfile[rd] <= real_to_f32(fp_op_a - fp_op_b);
|
| 627 |
+
end
|
| 628 |
+
7'b0001000: begin
|
| 629 |
+
fp_op_a = f32_to_real(fregfile[rs1]);
|
| 630 |
+
fp_op_b = f32_to_real(fregfile[rs2]);
|
| 631 |
+
fregfile[rd] <= real_to_f32(fp_op_a * fp_op_b);
|
| 632 |
+
end
|
| 633 |
+
7'b0001100: begin
|
| 634 |
+
fp_op_a = f32_to_real(fregfile[rs1]);
|
| 635 |
+
fp_op_b = f32_to_real(fregfile[rs2]);
|
| 636 |
+
if (fp_op_b != 0.0)
|
| 637 |
+
fregfile[rd] <= real_to_f32(fp_op_a / fp_op_b);
|
| 638 |
+
else
|
| 639 |
+
fregfile[rd] <= 32'h7FC00000;
|
| 640 |
+
end
|
| 641 |
+
7'b0101100: begin
|
| 642 |
+
fp_op_a = f32_to_real(fregfile[rs1]);
|
| 643 |
+
fp_op_r = fp_sqrt(fp_op_a);
|
| 644 |
+
fregfile[rd] <= real_to_f32(fp_op_r);
|
| 645 |
+
end
|
| 646 |
+
7'b0010100: begin
|
| 647 |
+
fp_op_a = f32_to_real(fregfile[rs1]);
|
| 648 |
+
fp_op_b = f32_to_real(fregfile[rs2]);
|
| 649 |
+
case (funct3)
|
| 650 |
+
3'b000: fregfile[rd] <= (fp_op_a <= fp_op_b) ?
|
| 651 |
+
fregfile[rs1] : fregfile[rs2];
|
| 652 |
+
3'b001: fregfile[rd] <= (fp_op_a >= fp_op_b) ?
|
| 653 |
+
fregfile[rs1] : fregfile[rs2];
|
| 654 |
+
default: ;
|
| 655 |
+
endcase
|
| 656 |
+
end
|
| 657 |
+
7'b0010000: begin
|
| 658 |
+
case (funct3)
|
| 659 |
+
3'b000: fregfile[rd] <= {fregfile[rs2][31],
|
| 660 |
+
fregfile[rs1][30:0]};
|
| 661 |
+
3'b001: fregfile[rd] <= {~fregfile[rs2][31],
|
| 662 |
+
fregfile[rs1][30:0]};
|
| 663 |
+
3'b010: fregfile[rd] <= {fregfile[rs1][31] ^
|
| 664 |
+
fregfile[rs2][31],
|
| 665 |
+
fregfile[rs1][30:0]};
|
| 666 |
+
default: ;
|
| 667 |
+
endcase
|
| 668 |
+
end
|
| 669 |
+
7'b1100000: begin
|
| 670 |
+
fp_op_a = f32_to_real(fregfile[rs1]);
|
| 671 |
+
if (rd != 0) regfile[rd] <= $rtoi(fp_op_a);
|
| 672 |
+
end
|
| 673 |
+
7'b1101000: begin
|
| 674 |
+
fregfile[rd] <= real_to_f32($itor($signed(rs1_val)));
|
| 675 |
+
end
|
| 676 |
+
7'b1010000: begin
|
| 677 |
+
fp_op_a = f32_to_real(fregfile[rs1]);
|
| 678 |
+
fp_op_b = f32_to_real(fregfile[rs2]);
|
| 679 |
+
if (rd != 0) begin
|
| 680 |
+
case (funct3)
|
| 681 |
+
3'b010: regfile[rd] <= (fp_op_a == fp_op_b) ?
|
| 682 |
+
32'd1 : 32'd0;
|
| 683 |
+
3'b001: regfile[rd] <= (fp_op_a < fp_op_b) ?
|
| 684 |
+
32'd1 : 32'd0;
|
| 685 |
+
3'b000: regfile[rd] <= (fp_op_a <= fp_op_b) ?
|
| 686 |
+
32'd1 : 32'd0;
|
| 687 |
+
default: ;
|
| 688 |
+
endcase
|
| 689 |
+
end
|
| 690 |
+
end
|
| 691 |
+
7'b1110000: begin
|
| 692 |
+
if (rd != 0) regfile[rd] <= fregfile[rs1];
|
| 693 |
+
end
|
| 694 |
+
7'b1111000: begin
|
| 695 |
+
fregfile[rd] <= rs1_val;
|
| 696 |
+
end
|
| 697 |
+
default: ;
|
| 698 |
+
endcase
|
| 699 |
+
pc <= pc + 4;
|
| 700 |
+
state <= S_FETCH;
|
| 701 |
+
end
|
| 702 |
+
|
| 703 |
+
default: begin
|
| 704 |
+
halt_r <= 1'b1;
|
| 705 |
+
state <= S_HALT;
|
| 706 |
+
end
|
| 707 |
+
endcase
|
| 708 |
+
end
|
| 709 |
+
|
| 710 |
+
S_MEM_RD: begin
|
| 711 |
+
if (mmio_ready) begin
|
| 712 |
+
mmio_valid <= 1'b0;
|
| 713 |
+
if (mem_rd_is_float) begin
|
| 714 |
+
fregfile[rd] <= mmio_rdata;
|
| 715 |
+
mem_rd_is_float <= 1'b0;
|
| 716 |
+
end else begin
|
| 717 |
+
if (rd != 0) regfile[rd] <= mmio_rdata;
|
| 718 |
+
end
|
| 719 |
+
pc <= pc + 4;
|
| 720 |
+
state <= S_FETCH;
|
| 721 |
+
end
|
| 722 |
+
end
|
| 723 |
+
|
| 724 |
+
S_MEM_WR: begin
|
| 725 |
+
if (mmio_ready) begin
|
| 726 |
+
mmio_valid <= 1'b0;
|
| 727 |
+
pc <= pc + 4;
|
| 728 |
+
state <= S_FETCH;
|
| 729 |
+
end
|
| 730 |
+
end
|
| 731 |
+
|
| 732 |
+
S_HALT: begin
|
| 733 |
+
end
|
| 734 |
+
|
| 735 |
+
S_DEBUG_HALT: begin
|
| 736 |
+
if (debug_resume) begin
|
| 737 |
+
halt_r <= 1'b0;
|
| 738 |
+
state <= S_FETCH;
|
| 739 |
+
end else if (debug_single_step) begin
|
| 740 |
+
halt_r <= 1'b0;
|
| 741 |
+
debug_single_step_pending <= 1'b1;
|
| 742 |
+
state <= S_FETCH;
|
| 743 |
+
end
|
| 744 |
+
end
|
| 745 |
+
|
| 746 |
+
default: state <= S_HALT;
|
| 747 |
+
endcase
|
| 748 |
+
end
|
| 749 |
+
end
|
| 750 |
+
|
| 751 |
+
endmodule
|
rtl/rv32im_cluster.v
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ============================================================================
|
| 2 |
+
// RV32IM Cluster
|
| 3 |
+
// ============================================================================
|
| 4 |
+
//
|
| 5 |
+
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 6 |
+
// Company No. 17054540 — UK Patent Application No. 2602902.6
|
| 7 |
+
//
|
| 8 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
| 9 |
+
// you may not use this file except in compliance with the License.
|
| 10 |
+
// You may obtain a copy of the License at
|
| 11 |
+
//
|
| 12 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
| 13 |
+
//
|
| 14 |
+
// Unless required by applicable law or agreed to in writing, software
|
| 15 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
| 16 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 17 |
+
// See the License for the specific language governing permissions and
|
| 18 |
+
// limitations under the License.
|
| 19 |
+
// ============================================================================
|
| 20 |
+
|
| 21 |
+
`timescale 1ns/1ps
|
| 22 |
+
|
| 23 |
+
module rv32im_cluster #(
|
| 24 |
+
parameter IMEM_DEPTH = 65536,
|
| 25 |
+
parameter IMEM_ADDR_BITS = 16,
|
| 26 |
+
parameter DMEM_DEPTH = 65536,
|
| 27 |
+
parameter DMEM_ADDR_BITS = 16
|
| 28 |
+
)(
|
| 29 |
+
input wire clk,
|
| 30 |
+
input wire rst_n,
|
| 31 |
+
|
| 32 |
+
input wire [2:0] enable,
|
| 33 |
+
|
| 34 |
+
input wire imem_we_0,
|
| 35 |
+
input wire [IMEM_ADDR_BITS-1:0] imem_waddr_0,
|
| 36 |
+
input wire [31:0] imem_wdata_0,
|
| 37 |
+
|
| 38 |
+
input wire imem_we_1,
|
| 39 |
+
input wire [IMEM_ADDR_BITS-1:0] imem_waddr_1,
|
| 40 |
+
input wire [31:0] imem_wdata_1,
|
| 41 |
+
|
| 42 |
+
input wire imem_we_2,
|
| 43 |
+
input wire [IMEM_ADDR_BITS-1:0] imem_waddr_2,
|
| 44 |
+
input wire [31:0] imem_wdata_2,
|
| 45 |
+
|
| 46 |
+
output wire mmio_valid,
|
| 47 |
+
output wire mmio_we,
|
| 48 |
+
output wire [15:0] mmio_addr,
|
| 49 |
+
output wire [31:0] mmio_wdata,
|
| 50 |
+
input wire [31:0] mmio_rdata,
|
| 51 |
+
input wire mmio_ready,
|
| 52 |
+
|
| 53 |
+
output wire [2:0] halted,
|
| 54 |
+
output wire [31:0] pc_out_0,
|
| 55 |
+
output wire [31:0] pc_out_1,
|
| 56 |
+
output wire [31:0] pc_out_2
|
| 57 |
+
);
|
| 58 |
+
|
| 59 |
+
wire c0_mmio_valid, c0_mmio_we;
|
| 60 |
+
wire [15:0] c0_mmio_addr;
|
| 61 |
+
wire [31:0] c0_mmio_wdata;
|
| 62 |
+
|
| 63 |
+
rv32i_core #(
|
| 64 |
+
.IMEM_DEPTH(IMEM_DEPTH), .IMEM_ADDR_BITS(IMEM_ADDR_BITS),
|
| 65 |
+
.DMEM_DEPTH(DMEM_DEPTH), .DMEM_ADDR_BITS(DMEM_ADDR_BITS)
|
| 66 |
+
) core0 (
|
| 67 |
+
.clk(clk), .rst_n(rst_n), .enable(enable[0]),
|
| 68 |
+
.imem_we(imem_we_0), .imem_waddr(imem_waddr_0), .imem_wdata(imem_wdata_0),
|
| 69 |
+
.mmio_valid(c0_mmio_valid), .mmio_we(c0_mmio_we),
|
| 70 |
+
.mmio_addr(c0_mmio_addr), .mmio_wdata(c0_mmio_wdata),
|
| 71 |
+
.mmio_rdata(combined_rdata),
|
| 72 |
+
.mmio_ready(c0_mmio_valid ? combined_ready : 1'b0),
|
| 73 |
+
.halted(halted[0]), .pc_out(pc_out_0),
|
| 74 |
+
.debug_bp_addr_0(32'd0), .debug_bp_addr_1(32'd0),
|
| 75 |
+
.debug_bp_addr_2(32'd0), .debug_bp_addr_3(32'd0),
|
| 76 |
+
.debug_bp_enable(4'd0),
|
| 77 |
+
.debug_resume(1'b0), .debug_halt_req(1'b0), .debug_single_step(1'b0)
|
| 78 |
+
);
|
| 79 |
+
|
| 80 |
+
wire c1_mmio_valid, c1_mmio_we;
|
| 81 |
+
wire [15:0] c1_mmio_addr;
|
| 82 |
+
wire [31:0] c1_mmio_wdata;
|
| 83 |
+
|
| 84 |
+
wire c1_grant = c1_mmio_valid && !c0_mmio_valid;
|
| 85 |
+
|
| 86 |
+
rv32i_core #(
|
| 87 |
+
.IMEM_DEPTH(IMEM_DEPTH), .IMEM_ADDR_BITS(IMEM_ADDR_BITS),
|
| 88 |
+
.DMEM_DEPTH(DMEM_DEPTH), .DMEM_ADDR_BITS(DMEM_ADDR_BITS)
|
| 89 |
+
) core1 (
|
| 90 |
+
.clk(clk), .rst_n(rst_n), .enable(enable[1]),
|
| 91 |
+
.imem_we(imem_we_1), .imem_waddr(imem_waddr_1), .imem_wdata(imem_wdata_1),
|
| 92 |
+
.mmio_valid(c1_mmio_valid), .mmio_we(c1_mmio_we),
|
| 93 |
+
.mmio_addr(c1_mmio_addr), .mmio_wdata(c1_mmio_wdata),
|
| 94 |
+
.mmio_rdata(combined_rdata),
|
| 95 |
+
.mmio_ready(c1_grant ? combined_ready : 1'b0),
|
| 96 |
+
.halted(halted[1]), .pc_out(pc_out_1),
|
| 97 |
+
.debug_bp_addr_0(32'd0), .debug_bp_addr_1(32'd0),
|
| 98 |
+
.debug_bp_addr_2(32'd0), .debug_bp_addr_3(32'd0),
|
| 99 |
+
.debug_bp_enable(4'd0),
|
| 100 |
+
.debug_resume(1'b0), .debug_halt_req(1'b0), .debug_single_step(1'b0)
|
| 101 |
+
);
|
| 102 |
+
|
| 103 |
+
wire c2_mmio_valid, c2_mmio_we;
|
| 104 |
+
wire [15:0] c2_mmio_addr;
|
| 105 |
+
wire [31:0] c2_mmio_wdata;
|
| 106 |
+
|
| 107 |
+
wire c2_grant = c2_mmio_valid && !c0_mmio_valid && !c1_mmio_valid;
|
| 108 |
+
|
| 109 |
+
rv32i_core #(
|
| 110 |
+
.IMEM_DEPTH(IMEM_DEPTH), .IMEM_ADDR_BITS(IMEM_ADDR_BITS),
|
| 111 |
+
.DMEM_DEPTH(DMEM_DEPTH), .DMEM_ADDR_BITS(DMEM_ADDR_BITS)
|
| 112 |
+
) core2 (
|
| 113 |
+
.clk(clk), .rst_n(rst_n), .enable(enable[2]),
|
| 114 |
+
.imem_we(imem_we_2), .imem_waddr(imem_waddr_2), .imem_wdata(imem_wdata_2),
|
| 115 |
+
.mmio_valid(c2_mmio_valid), .mmio_we(c2_mmio_we),
|
| 116 |
+
.mmio_addr(c2_mmio_addr), .mmio_wdata(c2_mmio_wdata),
|
| 117 |
+
.mmio_rdata(combined_rdata),
|
| 118 |
+
.mmio_ready(c2_grant ? combined_ready : 1'b0),
|
| 119 |
+
.halted(halted[2]), .pc_out(pc_out_2),
|
| 120 |
+
.debug_bp_addr_0(32'd0), .debug_bp_addr_1(32'd0),
|
| 121 |
+
.debug_bp_addr_2(32'd0), .debug_bp_addr_3(32'd0),
|
| 122 |
+
.debug_bp_enable(4'd0),
|
| 123 |
+
.debug_resume(1'b0), .debug_halt_req(1'b0), .debug_single_step(1'b0)
|
| 124 |
+
);
|
| 125 |
+
|
| 126 |
+
reg [31:0] mailbox [0:3];
|
| 127 |
+
|
| 128 |
+
integer mbi;
|
| 129 |
+
|
| 130 |
+
wire arb_valid = c0_mmio_valid | c1_mmio_valid | c2_mmio_valid;
|
| 131 |
+
wire [15:0] arb_addr = c0_mmio_valid ? c0_mmio_addr :
|
| 132 |
+
c1_mmio_valid ? c1_mmio_addr :
|
| 133 |
+
c2_mmio_addr;
|
| 134 |
+
wire arb_we = c0_mmio_valid ? c0_mmio_we :
|
| 135 |
+
c1_mmio_valid ? c1_mmio_we :
|
| 136 |
+
c2_mmio_we;
|
| 137 |
+
wire [31:0] arb_wdata = c0_mmio_valid ? c0_mmio_wdata :
|
| 138 |
+
c1_mmio_valid ? c1_mmio_wdata :
|
| 139 |
+
c2_mmio_wdata;
|
| 140 |
+
|
| 141 |
+
wire is_mailbox = arb_valid && (arb_addr >= 16'h0080) && (arb_addr <= 16'h008C);
|
| 142 |
+
wire [1:0] mailbox_idx = arb_addr[3:2];
|
| 143 |
+
|
| 144 |
+
reg [31:0] mailbox_rdata;
|
| 145 |
+
always @(*) begin
|
| 146 |
+
mailbox_rdata = mailbox[mailbox_idx];
|
| 147 |
+
end
|
| 148 |
+
|
| 149 |
+
always @(posedge clk or negedge rst_n) begin
|
| 150 |
+
if (!rst_n) begin
|
| 151 |
+
for (mbi = 0; mbi < 4; mbi = mbi + 1)
|
| 152 |
+
mailbox[mbi] <= 32'd0;
|
| 153 |
+
end else if (is_mailbox && arb_we) begin
|
| 154 |
+
mailbox[mailbox_idx] <= arb_wdata;
|
| 155 |
+
end
|
| 156 |
+
end
|
| 157 |
+
|
| 158 |
+
wire mailbox_ready = is_mailbox;
|
| 159 |
+
|
| 160 |
+
assign mmio_valid = arb_valid && !is_mailbox;
|
| 161 |
+
|
| 162 |
+
assign mmio_we = arb_we;
|
| 163 |
+
|
| 164 |
+
assign mmio_addr = arb_addr;
|
| 165 |
+
|
| 166 |
+
assign mmio_wdata = arb_wdata;
|
| 167 |
+
|
| 168 |
+
wire [31:0] combined_rdata = is_mailbox ? mailbox_rdata : mmio_rdata;
|
| 169 |
+
wire combined_ready = is_mailbox ? mailbox_ready : mmio_ready;
|
| 170 |
+
|
| 171 |
+
endmodule
|
rtl/scalable_core.v
ADDED
|
@@ -0,0 +1,382 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ============================================================================
|
| 2 |
+
// Scalable Neuron Core
|
| 3 |
+
// ============================================================================
|
| 4 |
+
//
|
| 5 |
+
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 6 |
+
// Company No. 17054540 — UK Patent Application No. 2602902.6
|
| 7 |
+
//
|
| 8 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
| 9 |
+
// you may not use this file except in compliance with the License.
|
| 10 |
+
// You may obtain a copy of the License at
|
| 11 |
+
//
|
| 12 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
| 13 |
+
//
|
| 14 |
+
// Unless required by applicable law or agreed to in writing, software
|
| 15 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
| 16 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 17 |
+
// See the License for the specific language governing permissions and
|
| 18 |
+
// limitations under the License.
|
| 19 |
+
// ============================================================================
|
| 20 |
+
|
| 21 |
+
module scalable_core #(
|
| 22 |
+
parameter NUM_NEURONS = 64,
|
| 23 |
+
parameter DATA_WIDTH = 16,
|
| 24 |
+
parameter NEURON_BITS = 6,
|
| 25 |
+
parameter WEIGHT_BITS = 12,
|
| 26 |
+
parameter THRESHOLD = 16'sd1000,
|
| 27 |
+
parameter LEAK_RATE = 16'sd3,
|
| 28 |
+
parameter RESTING_POT = 16'sd0,
|
| 29 |
+
parameter REFRAC_CYCLES = 4,
|
| 30 |
+
parameter TRACE_MAX = 8'd100,
|
| 31 |
+
parameter TRACE_DECAY = 8'd3,
|
| 32 |
+
parameter LEARN_SHIFT = 3
|
| 33 |
+
)(
|
| 34 |
+
input wire clk,
|
| 35 |
+
input wire rst_n,
|
| 36 |
+
input wire start,
|
| 37 |
+
input wire learn_enable,
|
| 38 |
+
|
| 39 |
+
input wire ext_valid,
|
| 40 |
+
input wire [NEURON_BITS-1:0] ext_neuron_id,
|
| 41 |
+
input wire signed [DATA_WIDTH-1:0] ext_current,
|
| 42 |
+
|
| 43 |
+
input wire inject_spike_valid,
|
| 44 |
+
input wire [NEURON_BITS-1:0] inject_spike_id,
|
| 45 |
+
|
| 46 |
+
input wire weight_we,
|
| 47 |
+
input wire [WEIGHT_BITS-1:0] weight_addr,
|
| 48 |
+
input wire signed [DATA_WIDTH-1:0] weight_data,
|
| 49 |
+
|
| 50 |
+
output reg timestep_done,
|
| 51 |
+
output reg spike_out_valid,
|
| 52 |
+
output reg [NEURON_BITS-1:0] spike_out_id,
|
| 53 |
+
|
| 54 |
+
output wire [3:0] state_out,
|
| 55 |
+
output reg [15:0] total_spikes,
|
| 56 |
+
output reg [15:0] timestep_count
|
| 57 |
+
);
|
| 58 |
+
|
| 59 |
+
localparam S_IDLE = 4'd0;
|
| 60 |
+
localparam S_DELIVER_INIT = 4'd1;
|
| 61 |
+
localparam S_DELIVER_READ = 4'd2;
|
| 62 |
+
localparam S_DELIVER_ACC = 4'd3;
|
| 63 |
+
localparam S_DELIVER_NEXT = 4'd4;
|
| 64 |
+
localparam S_UPDATE_INIT = 4'd5;
|
| 65 |
+
localparam S_UPDATE_READ = 4'd6;
|
| 66 |
+
localparam S_UPDATE_CALC = 4'd7;
|
| 67 |
+
localparam S_UPDATE_WRITE = 4'd8;
|
| 68 |
+
localparam S_LEARN = 4'd9;
|
| 69 |
+
localparam S_LEARN_WRITE = 4'd10;
|
| 70 |
+
localparam S_DONE = 4'd11;
|
| 71 |
+
|
| 72 |
+
reg [3:0] state;
|
| 73 |
+
assign state_out = state;
|
| 74 |
+
|
| 75 |
+
reg mem_we;
|
| 76 |
+
reg [NEURON_BITS-1:0] mem_addr;
|
| 77 |
+
reg signed [DATA_WIDTH-1:0] mem_wdata;
|
| 78 |
+
wire signed [DATA_WIDTH-1:0] mem_rdata;
|
| 79 |
+
|
| 80 |
+
sram #(.DATA_WIDTH(DATA_WIDTH), .ADDR_WIDTH(NEURON_BITS)) neuron_mem (
|
| 81 |
+
.clk(clk),
|
| 82 |
+
.we_a(mem_we), .addr_a(mem_addr), .wdata_a(mem_wdata), .rdata_a(mem_rdata),
|
| 83 |
+
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
|
| 84 |
+
);
|
| 85 |
+
|
| 86 |
+
reg ref_we;
|
| 87 |
+
reg [NEURON_BITS-1:0] ref_addr;
|
| 88 |
+
reg [3:0] ref_wdata;
|
| 89 |
+
wire [3:0] ref_rdata_raw;
|
| 90 |
+
|
| 91 |
+
sram #(.DATA_WIDTH(4), .ADDR_WIDTH(NEURON_BITS)) refrac_mem (
|
| 92 |
+
.clk(clk),
|
| 93 |
+
.we_a(ref_we), .addr_a(ref_addr), .wdata_a(ref_wdata), .rdata_a(ref_rdata_raw),
|
| 94 |
+
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
|
| 95 |
+
);
|
| 96 |
+
|
| 97 |
+
wire wt_we_internal;
|
| 98 |
+
reg wt_we_core;
|
| 99 |
+
reg [WEIGHT_BITS-1:0] wt_addr_core;
|
| 100 |
+
reg signed [DATA_WIDTH-1:0] wt_wdata_core;
|
| 101 |
+
wire signed [DATA_WIDTH-1:0] wt_rdata;
|
| 102 |
+
|
| 103 |
+
wire wt_we_mux = (state == S_IDLE) ? weight_we : wt_we_core;
|
| 104 |
+
wire [WEIGHT_BITS-1:0] wt_addr_mux = (state == S_IDLE) ? weight_addr : wt_addr_core;
|
| 105 |
+
wire signed [DATA_WIDTH-1:0] wt_wdata_mux = (state == S_IDLE) ? weight_data : wt_wdata_core;
|
| 106 |
+
|
| 107 |
+
sram #(.DATA_WIDTH(DATA_WIDTH), .ADDR_WIDTH(WEIGHT_BITS)) weight_mem (
|
| 108 |
+
.clk(clk),
|
| 109 |
+
.we_a(wt_we_mux), .addr_a(wt_addr_mux), .wdata_a(wt_wdata_mux), .rdata_a(wt_rdata),
|
| 110 |
+
.addr_b({WEIGHT_BITS{1'b0}}), .rdata_b()
|
| 111 |
+
);
|
| 112 |
+
|
| 113 |
+
reg acc_we;
|
| 114 |
+
reg [NEURON_BITS-1:0] acc_addr;
|
| 115 |
+
reg signed [DATA_WIDTH-1:0] acc_wdata;
|
| 116 |
+
wire signed [DATA_WIDTH-1:0] acc_rdata;
|
| 117 |
+
|
| 118 |
+
sram #(.DATA_WIDTH(DATA_WIDTH), .ADDR_WIDTH(NEURON_BITS)) acc_mem (
|
| 119 |
+
.clk(clk),
|
| 120 |
+
.we_a(acc_we), .addr_a(acc_addr), .wdata_a(acc_wdata), .rdata_a(acc_rdata),
|
| 121 |
+
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
|
| 122 |
+
);
|
| 123 |
+
|
| 124 |
+
reg trace_we;
|
| 125 |
+
reg [NEURON_BITS-1:0] trace_addr;
|
| 126 |
+
reg [7:0] trace_wdata;
|
| 127 |
+
wire [7:0] trace_rdata;
|
| 128 |
+
|
| 129 |
+
sram #(.DATA_WIDTH(8), .ADDR_WIDTH(NEURON_BITS)) trace_mem (
|
| 130 |
+
.clk(clk),
|
| 131 |
+
.we_a(trace_we), .addr_a(trace_addr), .wdata_a(trace_wdata), .rdata_a(trace_rdata),
|
| 132 |
+
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
|
| 133 |
+
);
|
| 134 |
+
|
| 135 |
+
reg [NUM_NEURONS-1:0] spike_buf_prev;
|
| 136 |
+
reg [NUM_NEURONS-1:0] spike_buf_curr;
|
| 137 |
+
reg [NUM_NEURONS-1:0] spike_buf_temp;
|
| 138 |
+
|
| 139 |
+
reg [NEURON_BITS-1:0] proc_neuron;
|
| 140 |
+
reg [NEURON_BITS:0] deliver_src;
|
| 141 |
+
reg [NEURON_BITS:0] deliver_dst;
|
| 142 |
+
reg signed [DATA_WIDTH-1:0] proc_potential;
|
| 143 |
+
reg [3:0] proc_refrac;
|
| 144 |
+
reg signed [DATA_WIDTH-1:0] proc_input;
|
| 145 |
+
reg proc_spiked;
|
| 146 |
+
|
| 147 |
+
reg [NEURON_BITS-1:0] spike_scan_idx;
|
| 148 |
+
reg found_spike;
|
| 149 |
+
|
| 150 |
+
wire ext_acc_we = ext_valid && (state == S_IDLE || state == S_DONE);
|
| 151 |
+
|
| 152 |
+
always @(posedge clk or negedge rst_n) begin
|
| 153 |
+
if (!rst_n) begin
|
| 154 |
+
state <= S_IDLE;
|
| 155 |
+
spike_buf_prev <= 0;
|
| 156 |
+
spike_buf_curr <= 0;
|
| 157 |
+
timestep_done <= 0;
|
| 158 |
+
spike_out_valid <= 0;
|
| 159 |
+
total_spikes <= 0;
|
| 160 |
+
timestep_count <= 0;
|
| 161 |
+
mem_we <= 0; ref_we <= 0; acc_we <= 0;
|
| 162 |
+
wt_we_core <= 0; trace_we <= 0;
|
| 163 |
+
proc_neuron <= 0;
|
| 164 |
+
deliver_src <= 0;
|
| 165 |
+
deliver_dst <= 0;
|
| 166 |
+
spike_scan_idx <= 0;
|
| 167 |
+
end else begin
|
| 168 |
+
mem_we <= 0;
|
| 169 |
+
ref_we <= 0;
|
| 170 |
+
acc_we <= 0;
|
| 171 |
+
wt_we_core <= 0;
|
| 172 |
+
trace_we <= 0;
|
| 173 |
+
timestep_done <= 0;
|
| 174 |
+
spike_out_valid <= 0;
|
| 175 |
+
|
| 176 |
+
if (inject_spike_valid) begin
|
| 177 |
+
spike_buf_curr[inject_spike_id] <= 1'b1;
|
| 178 |
+
end
|
| 179 |
+
|
| 180 |
+
if (ext_valid && state == S_IDLE) begin
|
| 181 |
+
acc_we <= 1;
|
| 182 |
+
acc_addr <= ext_neuron_id;
|
| 183 |
+
acc_wdata <= ext_current;
|
| 184 |
+
end
|
| 185 |
+
|
| 186 |
+
case (state)
|
| 187 |
+
S_IDLE: begin
|
| 188 |
+
if (start) begin
|
| 189 |
+
state <= S_DELIVER_INIT;
|
| 190 |
+
deliver_src <= 0;
|
| 191 |
+
deliver_dst <= 0;
|
| 192 |
+
end
|
| 193 |
+
end
|
| 194 |
+
|
| 195 |
+
S_DELIVER_INIT: begin
|
| 196 |
+
if (deliver_src < NUM_NEURONS) begin
|
| 197 |
+
if (spike_buf_prev[deliver_src[NEURON_BITS-1:0]]) begin
|
| 198 |
+
deliver_dst <= 0;
|
| 199 |
+
wt_addr_core <= {deliver_src[NEURON_BITS-1:0], {NEURON_BITS{1'b0}}};
|
| 200 |
+
acc_addr <= 0;
|
| 201 |
+
state <= S_DELIVER_READ;
|
| 202 |
+
end else begin
|
| 203 |
+
deliver_src <= deliver_src + 1;
|
| 204 |
+
end
|
| 205 |
+
end else begin
|
| 206 |
+
state <= S_UPDATE_INIT;
|
| 207 |
+
proc_neuron <= 0;
|
| 208 |
+
end
|
| 209 |
+
end
|
| 210 |
+
|
| 211 |
+
S_DELIVER_READ: begin
|
| 212 |
+
wt_addr_core <= {deliver_src[NEURON_BITS-1:0], deliver_dst[NEURON_BITS-1:0]};
|
| 213 |
+
acc_addr <= deliver_dst[NEURON_BITS-1:0];
|
| 214 |
+
state <= S_DELIVER_ACC;
|
| 215 |
+
end
|
| 216 |
+
|
| 217 |
+
S_DELIVER_ACC: begin
|
| 218 |
+
if (deliver_src[NEURON_BITS-1:0] != deliver_dst[NEURON_BITS-1:0]) begin
|
| 219 |
+
acc_we <= 1;
|
| 220 |
+
acc_addr <= deliver_dst[NEURON_BITS-1:0];
|
| 221 |
+
acc_wdata <= acc_rdata + wt_rdata;
|
| 222 |
+
end
|
| 223 |
+
state <= S_DELIVER_NEXT;
|
| 224 |
+
end
|
| 225 |
+
|
| 226 |
+
S_DELIVER_NEXT: begin
|
| 227 |
+
if (deliver_dst < NUM_NEURONS - 1) begin
|
| 228 |
+
deliver_dst <= deliver_dst + 1;
|
| 229 |
+
wt_addr_core <= {deliver_src[NEURON_BITS-1:0], deliver_dst[NEURON_BITS-1:0] + {{(NEURON_BITS-1){1'b0}}, 1'b1}};
|
| 230 |
+
acc_addr <= deliver_dst[NEURON_BITS-1:0] + 1;
|
| 231 |
+
state <= S_DELIVER_READ;
|
| 232 |
+
end else begin
|
| 233 |
+
deliver_src <= deliver_src + 1;
|
| 234 |
+
state <= S_DELIVER_INIT;
|
| 235 |
+
end
|
| 236 |
+
end
|
| 237 |
+
|
| 238 |
+
S_UPDATE_INIT: begin
|
| 239 |
+
mem_addr <= proc_neuron;
|
| 240 |
+
ref_addr <= proc_neuron;
|
| 241 |
+
acc_addr <= proc_neuron;
|
| 242 |
+
trace_addr <= proc_neuron;
|
| 243 |
+
state <= S_UPDATE_READ;
|
| 244 |
+
end
|
| 245 |
+
|
| 246 |
+
S_UPDATE_READ: begin
|
| 247 |
+
mem_addr <= proc_neuron;
|
| 248 |
+
ref_addr <= proc_neuron;
|
| 249 |
+
acc_addr <= proc_neuron;
|
| 250 |
+
trace_addr <= proc_neuron;
|
| 251 |
+
state <= S_UPDATE_CALC;
|
| 252 |
+
end
|
| 253 |
+
|
| 254 |
+
S_UPDATE_CALC: begin
|
| 255 |
+
proc_potential <= mem_rdata;
|
| 256 |
+
proc_refrac <= ref_rdata_raw;
|
| 257 |
+
proc_input <= acc_rdata;
|
| 258 |
+
proc_spiked <= 0;
|
| 259 |
+
|
| 260 |
+
if (ref_rdata_raw > 0) begin
|
| 261 |
+
proc_potential <= RESTING_POT;
|
| 262 |
+
proc_refrac <= ref_rdata_raw - 1;
|
| 263 |
+
if (trace_rdata > TRACE_DECAY)
|
| 264 |
+
trace_wdata <= trace_rdata - TRACE_DECAY;
|
| 265 |
+
else
|
| 266 |
+
trace_wdata <= 0;
|
| 267 |
+
end else begin
|
| 268 |
+
if (mem_rdata + acc_rdata - LEAK_RATE >= THRESHOLD) begin
|
| 269 |
+
proc_potential <= RESTING_POT;
|
| 270 |
+
proc_refrac <= REFRAC_CYCLES[3:0];
|
| 271 |
+
proc_spiked <= 1;
|
| 272 |
+
trace_wdata <= TRACE_MAX;
|
| 273 |
+
end else if (mem_rdata + acc_rdata > LEAK_RATE) begin
|
| 274 |
+
proc_potential <= mem_rdata + acc_rdata - LEAK_RATE;
|
| 275 |
+
if (trace_rdata > TRACE_DECAY)
|
| 276 |
+
trace_wdata <= trace_rdata - TRACE_DECAY;
|
| 277 |
+
else
|
| 278 |
+
trace_wdata <= 0;
|
| 279 |
+
end else begin
|
| 280 |
+
proc_potential <= RESTING_POT;
|
| 281 |
+
if (trace_rdata > TRACE_DECAY)
|
| 282 |
+
trace_wdata <= trace_rdata - TRACE_DECAY;
|
| 283 |
+
else
|
| 284 |
+
trace_wdata <= 0;
|
| 285 |
+
end
|
| 286 |
+
end
|
| 287 |
+
|
| 288 |
+
state <= S_UPDATE_WRITE;
|
| 289 |
+
end
|
| 290 |
+
|
| 291 |
+
S_UPDATE_WRITE: begin
|
| 292 |
+
mem_we <= 1;
|
| 293 |
+
mem_addr <= proc_neuron;
|
| 294 |
+
mem_wdata <= proc_potential;
|
| 295 |
+
|
| 296 |
+
ref_we <= 1;
|
| 297 |
+
ref_addr <= proc_neuron;
|
| 298 |
+
ref_wdata <= proc_refrac;
|
| 299 |
+
|
| 300 |
+
acc_we <= 1;
|
| 301 |
+
acc_addr <= proc_neuron;
|
| 302 |
+
acc_wdata <= 0;
|
| 303 |
+
|
| 304 |
+
trace_we <= 1;
|
| 305 |
+
trace_addr <= proc_neuron;
|
| 306 |
+
|
| 307 |
+
if (proc_spiked) begin
|
| 308 |
+
spike_buf_curr[proc_neuron] <= 1'b1;
|
| 309 |
+
spike_out_valid <= 1;
|
| 310 |
+
spike_out_id <= proc_neuron;
|
| 311 |
+
total_spikes <= total_spikes + 1;
|
| 312 |
+
end
|
| 313 |
+
|
| 314 |
+
if (proc_neuron < NUM_NEURONS - 1) begin
|
| 315 |
+
proc_neuron <= proc_neuron + 1;
|
| 316 |
+
state <= S_UPDATE_INIT;
|
| 317 |
+
end else begin
|
| 318 |
+
if (learn_enable)
|
| 319 |
+
state <= S_LEARN;
|
| 320 |
+
else
|
| 321 |
+
state <= S_DONE;
|
| 322 |
+
deliver_src <= 0;
|
| 323 |
+
deliver_dst <= 0;
|
| 324 |
+
end
|
| 325 |
+
end
|
| 326 |
+
|
| 327 |
+
S_LEARN: begin
|
| 328 |
+
if (deliver_src < NUM_NEURONS) begin
|
| 329 |
+
if (spike_buf_curr[deliver_src[NEURON_BITS-1:0]]) begin
|
| 330 |
+
if (deliver_dst < NUM_NEURONS) begin
|
| 331 |
+
if (deliver_dst[NEURON_BITS-1:0] != deliver_src[NEURON_BITS-1:0]) begin
|
| 332 |
+
wt_addr_core <= {deliver_dst[NEURON_BITS-1:0], deliver_src[NEURON_BITS-1:0]};
|
| 333 |
+
trace_addr <= deliver_dst[NEURON_BITS-1:0];
|
| 334 |
+
state <= S_LEARN_WRITE;
|
| 335 |
+
end else begin
|
| 336 |
+
deliver_dst <= deliver_dst + 1;
|
| 337 |
+
end
|
| 338 |
+
end else begin
|
| 339 |
+
deliver_src <= deliver_src + 1;
|
| 340 |
+
deliver_dst <= 0;
|
| 341 |
+
end
|
| 342 |
+
end else begin
|
| 343 |
+
deliver_src <= deliver_src + 1;
|
| 344 |
+
deliver_dst <= 0;
|
| 345 |
+
end
|
| 346 |
+
end else begin
|
| 347 |
+
state <= S_DONE;
|
| 348 |
+
end
|
| 349 |
+
end
|
| 350 |
+
|
| 351 |
+
S_LEARN_WRITE: begin
|
| 352 |
+
if (trace_rdata > 0) begin
|
| 353 |
+
wt_we_core <= 1;
|
| 354 |
+
wt_addr_core <= {deliver_dst[NEURON_BITS-1:0], deliver_src[NEURON_BITS-1:0]};
|
| 355 |
+
if (wt_rdata + (trace_rdata >> LEARN_SHIFT) > $signed(THRESHOLD))
|
| 356 |
+
wt_wdata_core <= THRESHOLD;
|
| 357 |
+
else
|
| 358 |
+
wt_wdata_core <= wt_rdata + (trace_rdata >> LEARN_SHIFT);
|
| 359 |
+
end
|
| 360 |
+
|
| 361 |
+
deliver_dst <= deliver_dst + 1;
|
| 362 |
+
state <= S_LEARN;
|
| 363 |
+
end
|
| 364 |
+
|
| 365 |
+
S_DONE: begin
|
| 366 |
+
spike_buf_prev <= spike_buf_curr;
|
| 367 |
+
spike_buf_curr <= 0;
|
| 368 |
+
|
| 369 |
+
timestep_done <= 1;
|
| 370 |
+
timestep_count <= timestep_count + 1;
|
| 371 |
+
proc_neuron <= 0;
|
| 372 |
+
deliver_src <= 0;
|
| 373 |
+
|
| 374 |
+
state <= S_IDLE;
|
| 375 |
+
end
|
| 376 |
+
|
| 377 |
+
default: state <= S_IDLE;
|
| 378 |
+
endcase
|
| 379 |
+
end
|
| 380 |
+
end
|
| 381 |
+
|
| 382 |
+
endmodule
|
rtl/scalable_core_v2.v
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
rtl/spike_fifo.v
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ============================================================================
|
| 2 |
+
// Spike FIFO
|
| 3 |
+
// ============================================================================
|
| 4 |
+
//
|
| 5 |
+
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 6 |
+
// Company No. 17054540 — UK Patent Application No. 2602902.6
|
| 7 |
+
//
|
| 8 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
| 9 |
+
// you may not use this file except in compliance with the License.
|
| 10 |
+
// You may obtain a copy of the License at
|
| 11 |
+
//
|
| 12 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
| 13 |
+
//
|
| 14 |
+
// Unless required by applicable law or agreed to in writing, software
|
| 15 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
| 16 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 17 |
+
// See the License for the specific language governing permissions and
|
| 18 |
+
// limitations under the License.
|
| 19 |
+
// ============================================================================
|
| 20 |
+
|
| 21 |
+
module spike_fifo #(
|
| 22 |
+
parameter ID_WIDTH = 8,
|
| 23 |
+
parameter DEPTH = 64,
|
| 24 |
+
parameter PTR_BITS = 6
|
| 25 |
+
)(
|
| 26 |
+
input wire clk,
|
| 27 |
+
input wire rst_n,
|
| 28 |
+
input wire clear,
|
| 29 |
+
|
| 30 |
+
input wire push,
|
| 31 |
+
input wire [ID_WIDTH-1:0] push_data,
|
| 32 |
+
|
| 33 |
+
input wire pop,
|
| 34 |
+
output wire [ID_WIDTH-1:0] pop_data,
|
| 35 |
+
|
| 36 |
+
output wire empty,
|
| 37 |
+
output wire full,
|
| 38 |
+
output wire [PTR_BITS:0] count
|
| 39 |
+
);
|
| 40 |
+
|
| 41 |
+
reg [ID_WIDTH-1:0] mem [0:DEPTH-1];
|
| 42 |
+
|
| 43 |
+
reg [PTR_BITS:0] wr_ptr;
|
| 44 |
+
reg [PTR_BITS:0] rd_ptr;
|
| 45 |
+
|
| 46 |
+
assign count = wr_ptr - rd_ptr;
|
| 47 |
+
assign empty = (wr_ptr == rd_ptr);
|
| 48 |
+
assign full = (count == DEPTH);
|
| 49 |
+
|
| 50 |
+
assign pop_data = mem[rd_ptr[PTR_BITS-1:0]];
|
| 51 |
+
|
| 52 |
+
always @(posedge clk or negedge rst_n) begin
|
| 53 |
+
if (!rst_n) begin
|
| 54 |
+
wr_ptr <= 0;
|
| 55 |
+
rd_ptr <= 0;
|
| 56 |
+
end else if (clear) begin
|
| 57 |
+
wr_ptr <= 0;
|
| 58 |
+
rd_ptr <= 0;
|
| 59 |
+
end else begin
|
| 60 |
+
if (push && !full) begin
|
| 61 |
+
mem[wr_ptr[PTR_BITS-1:0]] <= push_data;
|
| 62 |
+
wr_ptr <= wr_ptr + 1;
|
| 63 |
+
end
|
| 64 |
+
if (pop && !empty) begin
|
| 65 |
+
rd_ptr <= rd_ptr + 1;
|
| 66 |
+
end
|
| 67 |
+
end
|
| 68 |
+
end
|
| 69 |
+
|
| 70 |
+
endmodule
|
rtl/sram.v
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ============================================================================
|
| 2 |
+
// SRAM
|
| 3 |
+
// ============================================================================
|
| 4 |
+
//
|
| 5 |
+
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 6 |
+
// Company No. 17054540 — UK Patent Application No. 2602902.6
|
| 7 |
+
//
|
| 8 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
| 9 |
+
// you may not use this file except in compliance with the License.
|
| 10 |
+
// You may obtain a copy of the License at
|
| 11 |
+
//
|
| 12 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
| 13 |
+
//
|
| 14 |
+
// Unless required by applicable law or agreed to in writing, software
|
| 15 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
| 16 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 17 |
+
// See the License for the specific language governing permissions and
|
| 18 |
+
// limitations under the License.
|
| 19 |
+
// ============================================================================
|
| 20 |
+
|
| 21 |
+
module sram #(
|
| 22 |
+
parameter DATA_WIDTH = 16,
|
| 23 |
+
parameter ADDR_WIDTH = 6,
|
| 24 |
+
parameter DEPTH = (1 << ADDR_WIDTH),
|
| 25 |
+
parameter [DATA_WIDTH-1:0] INIT_VALUE = {DATA_WIDTH{1'b0}}
|
| 26 |
+
)(
|
| 27 |
+
input wire clk,
|
| 28 |
+
|
| 29 |
+
input wire we_a,
|
| 30 |
+
input wire [ADDR_WIDTH-1:0] addr_a,
|
| 31 |
+
input wire [DATA_WIDTH-1:0] wdata_a,
|
| 32 |
+
output reg [DATA_WIDTH-1:0] rdata_a,
|
| 33 |
+
|
| 34 |
+
input wire [ADDR_WIDTH-1:0] addr_b,
|
| 35 |
+
output reg [DATA_WIDTH-1:0] rdata_b
|
| 36 |
+
);
|
| 37 |
+
|
| 38 |
+
reg [DATA_WIDTH-1:0] mem [0:DEPTH-1];
|
| 39 |
+
|
| 40 |
+
always @(posedge clk) begin
|
| 41 |
+
if (we_a)
|
| 42 |
+
mem[addr_a] <= wdata_a;
|
| 43 |
+
rdata_a <= mem[addr_a];
|
| 44 |
+
end
|
| 45 |
+
|
| 46 |
+
always @(posedge clk) begin
|
| 47 |
+
rdata_b <= mem[addr_b];
|
| 48 |
+
end
|
| 49 |
+
|
| 50 |
+
integer i;
|
| 51 |
+
initial begin
|
| 52 |
+
for (i = 0; i < DEPTH; i = i + 1)
|
| 53 |
+
mem[i] = INIT_VALUE;
|
| 54 |
+
end
|
| 55 |
+
|
| 56 |
+
endmodule
|
rtl/stdp_synapse.v
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ============================================================================
|
| 2 |
+
// STDP Synapse
|
| 3 |
+
// ============================================================================
|
| 4 |
+
//
|
| 5 |
+
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 6 |
+
// Company No. 17054540 — UK Patent Application No. 2602902.6
|
| 7 |
+
//
|
| 8 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
| 9 |
+
// you may not use this file except in compliance with the License.
|
| 10 |
+
// You may obtain a copy of the License at
|
| 11 |
+
//
|
| 12 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
| 13 |
+
//
|
| 14 |
+
// Unless required by applicable law or agreed to in writing, software
|
| 15 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
| 16 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 17 |
+
// See the License for the specific language governing permissions and
|
| 18 |
+
// limitations under the License.
|
| 19 |
+
// ============================================================================
|
| 20 |
+
|
| 21 |
+
module stdp_synapse #(
|
| 22 |
+
parameter DATA_WIDTH = 16,
|
| 23 |
+
parameter TRACE_WIDTH = 8,
|
| 24 |
+
parameter TRACE_MAX = 8'd127,
|
| 25 |
+
parameter TRACE_DECAY = 8'd4,
|
| 26 |
+
parameter LEARN_RATE = 8'd4,
|
| 27 |
+
parameter WEIGHT_MAX = 16'd800,
|
| 28 |
+
parameter WEIGHT_MIN = -16'sd800,
|
| 29 |
+
parameter WEIGHT_INIT = 16'd0
|
| 30 |
+
)(
|
| 31 |
+
input wire clk,
|
| 32 |
+
input wire rst_n,
|
| 33 |
+
input wire learn_enable,
|
| 34 |
+
input wire pre_spike,
|
| 35 |
+
input wire post_spike,
|
| 36 |
+
output reg signed [DATA_WIDTH-1:0] weight,
|
| 37 |
+
output reg signed [DATA_WIDTH-1:0] post_current,
|
| 38 |
+
output wire [TRACE_WIDTH-1:0] pre_trace_out,
|
| 39 |
+
output wire [TRACE_WIDTH-1:0] post_trace_out
|
| 40 |
+
);
|
| 41 |
+
|
| 42 |
+
reg [TRACE_WIDTH-1:0] pre_trace;
|
| 43 |
+
reg [TRACE_WIDTH-1:0] post_trace;
|
| 44 |
+
|
| 45 |
+
assign pre_trace_out = pre_trace;
|
| 46 |
+
assign post_trace_out = post_trace;
|
| 47 |
+
|
| 48 |
+
wire signed [DATA_WIDTH-1:0] ltp_delta;
|
| 49 |
+
wire signed [DATA_WIDTH-1:0] ltd_delta;
|
| 50 |
+
|
| 51 |
+
assign ltp_delta = {{(DATA_WIDTH-TRACE_WIDTH){1'b0}}, pre_trace} >>> LEARN_RATE;
|
| 52 |
+
assign ltd_delta = {{(DATA_WIDTH-TRACE_WIDTH){1'b0}}, post_trace} >>> LEARN_RATE;
|
| 53 |
+
|
| 54 |
+
always @(posedge clk or negedge rst_n) begin
|
| 55 |
+
if (!rst_n) begin
|
| 56 |
+
pre_trace <= 0;
|
| 57 |
+
post_trace <= 0;
|
| 58 |
+
weight <= WEIGHT_INIT;
|
| 59 |
+
post_current <= 0;
|
| 60 |
+
|
| 61 |
+
end else begin
|
| 62 |
+
if (pre_spike) begin
|
| 63 |
+
pre_trace <= TRACE_MAX;
|
| 64 |
+
end else if (pre_trace > TRACE_DECAY) begin
|
| 65 |
+
pre_trace <= pre_trace - TRACE_DECAY;
|
| 66 |
+
end else begin
|
| 67 |
+
pre_trace <= 0;
|
| 68 |
+
end
|
| 69 |
+
|
| 70 |
+
if (post_spike) begin
|
| 71 |
+
post_trace <= TRACE_MAX;
|
| 72 |
+
end else if (post_trace > TRACE_DECAY) begin
|
| 73 |
+
post_trace <= post_trace - TRACE_DECAY;
|
| 74 |
+
end else begin
|
| 75 |
+
post_trace <= 0;
|
| 76 |
+
end
|
| 77 |
+
|
| 78 |
+
if (learn_enable) begin
|
| 79 |
+
if (post_spike && pre_trace > 0) begin
|
| 80 |
+
if (weight + ltp_delta > WEIGHT_MAX)
|
| 81 |
+
weight <= WEIGHT_MAX;
|
| 82 |
+
else
|
| 83 |
+
weight <= weight + ltp_delta;
|
| 84 |
+
end
|
| 85 |
+
|
| 86 |
+
if (pre_spike && post_trace > 0) begin
|
| 87 |
+
if (weight - ltd_delta < WEIGHT_MIN)
|
| 88 |
+
weight <= WEIGHT_MIN;
|
| 89 |
+
else
|
| 90 |
+
weight <= weight - ltd_delta;
|
| 91 |
+
end
|
| 92 |
+
end
|
| 93 |
+
|
| 94 |
+
if (pre_spike) begin
|
| 95 |
+
post_current <= weight;
|
| 96 |
+
end else begin
|
| 97 |
+
post_current <= 0;
|
| 98 |
+
end
|
| 99 |
+
end
|
| 100 |
+
end
|
| 101 |
+
|
| 102 |
+
endmodule
|
rtl/synapse.v
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ============================================================================
|
| 2 |
+
// Synapse
|
| 3 |
+
// ============================================================================
|
| 4 |
+
//
|
| 5 |
+
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 6 |
+
// Company No. 17054540 — UK Patent Application No. 2602902.6
|
| 7 |
+
//
|
| 8 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
| 9 |
+
// you may not use this file except in compliance with the License.
|
| 10 |
+
// You may obtain a copy of the License at
|
| 11 |
+
//
|
| 12 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
| 13 |
+
//
|
| 14 |
+
// Unless required by applicable law or agreed to in writing, software
|
| 15 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
| 16 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 17 |
+
// See the License for the specific language governing permissions and
|
| 18 |
+
// limitations under the License.
|
| 19 |
+
// ============================================================================
|
| 20 |
+
|
| 21 |
+
module synapse #(
|
| 22 |
+
parameter DATA_WIDTH = 16
|
| 23 |
+
)(
|
| 24 |
+
input wire clk,
|
| 25 |
+
input wire rst_n,
|
| 26 |
+
input wire pre_spike,
|
| 27 |
+
input wire signed [DATA_WIDTH-1:0] weight,
|
| 28 |
+
output reg signed [DATA_WIDTH-1:0] post_current
|
| 29 |
+
);
|
| 30 |
+
|
| 31 |
+
always @(posedge clk or negedge rst_n) begin
|
| 32 |
+
if (!rst_n) begin
|
| 33 |
+
post_current <= 0;
|
| 34 |
+
end else begin
|
| 35 |
+
if (pre_spike) begin
|
| 36 |
+
post_current <= weight;
|
| 37 |
+
end else begin
|
| 38 |
+
post_current <= 0;
|
| 39 |
+
end
|
| 40 |
+
end
|
| 41 |
+
end
|
| 42 |
+
|
| 43 |
+
endmodule
|
rtl/sync_tree.v
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ============================================================================
|
| 2 |
+
// Sync Tree
|
| 3 |
+
// ============================================================================
|
| 4 |
+
//
|
| 5 |
+
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 6 |
+
// Company No. 17054540 — UK Patent Application No. 2602902.6
|
| 7 |
+
//
|
| 8 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
| 9 |
+
// you may not use this file except in compliance with the License.
|
| 10 |
+
// You may obtain a copy of the License at
|
| 11 |
+
//
|
| 12 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
| 13 |
+
//
|
| 14 |
+
// Unless required by applicable law or agreed to in writing, software
|
| 15 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
| 16 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 17 |
+
// See the License for the specific language governing permissions and
|
| 18 |
+
// limitations under the License.
|
| 19 |
+
// ============================================================================
|
| 20 |
+
|
| 21 |
+
`timescale 1ns/1ps
|
| 22 |
+
|
| 23 |
+
module sync_tree #(
|
| 24 |
+
parameter NUM_LEAVES = 4
|
| 25 |
+
)(
|
| 26 |
+
input wire clk,
|
| 27 |
+
input wire rst_n,
|
| 28 |
+
input wire [NUM_LEAVES-1:0] leaf_done,
|
| 29 |
+
output wire all_done,
|
| 30 |
+
input wire root_start,
|
| 31 |
+
output wire [NUM_LEAVES-1:0] leaf_start
|
| 32 |
+
);
|
| 33 |
+
|
| 34 |
+
assign all_done = &leaf_done;
|
| 35 |
+
|
| 36 |
+
assign leaf_start = {NUM_LEAVES{root_start}};
|
| 37 |
+
|
| 38 |
+
endmodule
|
rtl/uart_rx.v
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ============================================================================
|
| 2 |
+
// UART Receiver
|
| 3 |
+
// ============================================================================
|
| 4 |
+
//
|
| 5 |
+
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 6 |
+
// Company No. 17054540 — UK Patent Application No. 2602902.6
|
| 7 |
+
//
|
| 8 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
| 9 |
+
// you may not use this file except in compliance with the License.
|
| 10 |
+
// You may obtain a copy of the License at
|
| 11 |
+
//
|
| 12 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
| 13 |
+
//
|
| 14 |
+
// Unless required by applicable law or agreed to in writing, software
|
| 15 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
| 16 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 17 |
+
// See the License for the specific language governing permissions and
|
| 18 |
+
// limitations under the License.
|
| 19 |
+
// ============================================================================
|
| 20 |
+
|
| 21 |
+
module uart_rx #(
|
| 22 |
+
parameter CLK_FREQ = 100_000_000,
|
| 23 |
+
parameter BAUD = 115200
|
| 24 |
+
)(
|
| 25 |
+
input wire clk,
|
| 26 |
+
input wire rst_n,
|
| 27 |
+
input wire rx,
|
| 28 |
+
output reg [7:0] data,
|
| 29 |
+
output reg valid
|
| 30 |
+
);
|
| 31 |
+
|
| 32 |
+
localparam CLKS_PER_BIT = CLK_FREQ / BAUD;
|
| 33 |
+
localparam HALF_BIT = CLKS_PER_BIT / 2;
|
| 34 |
+
|
| 35 |
+
localparam S_IDLE = 2'd0;
|
| 36 |
+
localparam S_START = 2'd1;
|
| 37 |
+
localparam S_DATA = 2'd2;
|
| 38 |
+
localparam S_STOP = 2'd3;
|
| 39 |
+
|
| 40 |
+
reg [1:0] state;
|
| 41 |
+
reg [15:0] clk_cnt;
|
| 42 |
+
reg [2:0] bit_idx;
|
| 43 |
+
reg [7:0] shift;
|
| 44 |
+
reg rx_s1, rx_s2;
|
| 45 |
+
|
| 46 |
+
always @(posedge clk or negedge rst_n) begin
|
| 47 |
+
if (!rst_n) begin
|
| 48 |
+
rx_s1 <= 1;
|
| 49 |
+
rx_s2 <= 1;
|
| 50 |
+
end else begin
|
| 51 |
+
rx_s1 <= rx;
|
| 52 |
+
rx_s2 <= rx_s1;
|
| 53 |
+
end
|
| 54 |
+
end
|
| 55 |
+
|
| 56 |
+
always @(posedge clk or negedge rst_n) begin
|
| 57 |
+
if (!rst_n) begin
|
| 58 |
+
state <= S_IDLE;
|
| 59 |
+
valid <= 0;
|
| 60 |
+
clk_cnt <= 0;
|
| 61 |
+
bit_idx <= 0;
|
| 62 |
+
shift <= 0;
|
| 63 |
+
data <= 0;
|
| 64 |
+
end else begin
|
| 65 |
+
valid <= 0;
|
| 66 |
+
case (state)
|
| 67 |
+
S_IDLE: begin
|
| 68 |
+
if (!rx_s2) begin
|
| 69 |
+
clk_cnt <= 0;
|
| 70 |
+
state <= S_START;
|
| 71 |
+
end
|
| 72 |
+
end
|
| 73 |
+
S_START: begin
|
| 74 |
+
if (clk_cnt == HALF_BIT - 1) begin
|
| 75 |
+
if (!rx_s2) begin
|
| 76 |
+
clk_cnt <= 0;
|
| 77 |
+
bit_idx <= 0;
|
| 78 |
+
state <= S_DATA;
|
| 79 |
+
end else
|
| 80 |
+
state <= S_IDLE;
|
| 81 |
+
end else
|
| 82 |
+
clk_cnt <= clk_cnt + 1;
|
| 83 |
+
end
|
| 84 |
+
S_DATA: begin
|
| 85 |
+
if (clk_cnt == CLKS_PER_BIT - 1) begin
|
| 86 |
+
clk_cnt <= 0;
|
| 87 |
+
shift <= {rx_s2, shift[7:1]};
|
| 88 |
+
if (bit_idx == 7)
|
| 89 |
+
state <= S_STOP;
|
| 90 |
+
else
|
| 91 |
+
bit_idx <= bit_idx + 1;
|
| 92 |
+
end else
|
| 93 |
+
clk_cnt <= clk_cnt + 1;
|
| 94 |
+
end
|
| 95 |
+
S_STOP: begin
|
| 96 |
+
if (clk_cnt == CLKS_PER_BIT - 1) begin
|
| 97 |
+
data <= shift;
|
| 98 |
+
valid <= 1;
|
| 99 |
+
state <= S_IDLE;
|
| 100 |
+
end else
|
| 101 |
+
clk_cnt <= clk_cnt + 1;
|
| 102 |
+
end
|
| 103 |
+
endcase
|
| 104 |
+
end
|
| 105 |
+
end
|
| 106 |
+
|
| 107 |
+
endmodule
|
rtl/uart_tx.v
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ============================================================================
|
| 2 |
+
// UART Transmitter
|
| 3 |
+
// ============================================================================
|
| 4 |
+
//
|
| 5 |
+
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
|
| 6 |
+
// Company No. 17054540 — UK Patent Application No. 2602902.6
|
| 7 |
+
//
|
| 8 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
| 9 |
+
// you may not use this file except in compliance with the License.
|
| 10 |
+
// You may obtain a copy of the License at
|
| 11 |
+
//
|
| 12 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
| 13 |
+
//
|
| 14 |
+
// Unless required by applicable law or agreed to in writing, software
|
| 15 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
| 16 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 17 |
+
// See the License for the specific language governing permissions and
|
| 18 |
+
// limitations under the License.
|
| 19 |
+
// ============================================================================
|
| 20 |
+
|
| 21 |
+
module uart_tx #(
|
| 22 |
+
parameter CLK_FREQ = 100_000_000,
|
| 23 |
+
parameter BAUD = 115200
|
| 24 |
+
)(
|
| 25 |
+
input wire clk,
|
| 26 |
+
input wire rst_n,
|
| 27 |
+
input wire [7:0] data,
|
| 28 |
+
input wire valid,
|
| 29 |
+
output reg tx,
|
| 30 |
+
output wire ready
|
| 31 |
+
);
|
| 32 |
+
|
| 33 |
+
localparam CLKS_PER_BIT = CLK_FREQ / BAUD;
|
| 34 |
+
|
| 35 |
+
localparam S_IDLE = 2'd0;
|
| 36 |
+
localparam S_START = 2'd1;
|
| 37 |
+
localparam S_DATA = 2'd2;
|
| 38 |
+
localparam S_STOP = 2'd3;
|
| 39 |
+
|
| 40 |
+
reg [1:0] state;
|
| 41 |
+
reg [15:0] clk_cnt;
|
| 42 |
+
reg [2:0] bit_idx;
|
| 43 |
+
reg [7:0] shift;
|
| 44 |
+
|
| 45 |
+
assign ready = (state == S_IDLE);
|
| 46 |
+
|
| 47 |
+
always @(posedge clk or negedge rst_n) begin
|
| 48 |
+
if (!rst_n) begin
|
| 49 |
+
state <= S_IDLE;
|
| 50 |
+
tx <= 1;
|
| 51 |
+
clk_cnt <= 0;
|
| 52 |
+
bit_idx <= 0;
|
| 53 |
+
shift <= 0;
|
| 54 |
+
end else begin
|
| 55 |
+
case (state)
|
| 56 |
+
S_IDLE: begin
|
| 57 |
+
tx <= 1;
|
| 58 |
+
if (valid) begin
|
| 59 |
+
shift <= data;
|
| 60 |
+
state <= S_START;
|
| 61 |
+
clk_cnt <= 0;
|
| 62 |
+
end
|
| 63 |
+
end
|
| 64 |
+
S_START: begin
|
| 65 |
+
tx <= 0;
|
| 66 |
+
if (clk_cnt == CLKS_PER_BIT - 1) begin
|
| 67 |
+
clk_cnt <= 0;
|
| 68 |
+
bit_idx <= 0;
|
| 69 |
+
state <= S_DATA;
|
| 70 |
+
end else
|
| 71 |
+
clk_cnt <= clk_cnt + 1;
|
| 72 |
+
end
|
| 73 |
+
S_DATA: begin
|
| 74 |
+
tx <= shift[0];
|
| 75 |
+
if (clk_cnt == CLKS_PER_BIT - 1) begin
|
| 76 |
+
clk_cnt <= 0;
|
| 77 |
+
shift <= {1'b0, shift[7:1]};
|
| 78 |
+
if (bit_idx == 7)
|
| 79 |
+
state <= S_STOP;
|
| 80 |
+
else
|
| 81 |
+
bit_idx <= bit_idx + 1;
|
| 82 |
+
end else
|
| 83 |
+
clk_cnt <= clk_cnt + 1;
|
| 84 |
+
end
|
| 85 |
+
S_STOP: begin
|
| 86 |
+
tx <= 1;
|
| 87 |
+
if (clk_cnt == CLKS_PER_BIT - 1)
|
| 88 |
+
state <= S_IDLE;
|
| 89 |
+
else
|
| 90 |
+
clk_cnt <= clk_cnt + 1;
|
| 91 |
+
end
|
| 92 |
+
endcase
|
| 93 |
+
end
|
| 94 |
+
end
|
| 95 |
+
|
| 96 |
+
endmodule
|
run_regression.sh
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
cd /mnt/c/Users/mrwab/neuromorphic-chip
|
| 3 |
+
|
| 4 |
+
RTL="rtl/sram.v rtl/spike_fifo.v rtl/uart_tx.v rtl/uart_rx.v rtl/chip_link.v rtl/scalable_core_v2.v rtl/neuromorphic_mesh.v rtl/host_interface.v rtl/neuromorphic_top.v rtl/sync_tree.v rtl/async_router.v rtl/async_noc_mesh.v rtl/rv32i_core.v rtl/mmio_bridge.v rtl/multi_chip_router.v rtl/rv32im_cluster.v"
|
| 5 |
+
|
| 6 |
+
for tb in tb/tb_p13a.v tb/tb_p15_traces.v tb/tb_p17_delays.v tb/tb_p19_microcode.v tb/tb_p20_hierarchical.v tb/tb_p21a_dendrites.v tb/tb_p21b_observe.v tb/tb_p21c_power.v tb/tb_p21d_learning.v tb/tb_p21e_chiplink.v tb/tb_p22a_cuba.v tb/tb_p22c_learning.v tb/tb_p22b_compartments.v tb/tb_p22d_axontypes.v tb/tb_p22e_noc.v tb/tb_p22f_riscv.v tb/tb_p22g_multichip.v tb/tb_p22h_power.v tb/tb_p23a_neuron_arith.v tb/tb_p23b_comp_synapse.v tb/tb_p23c_scale.v tb/tb_p23d_riscv.v tb/tb_p24_final.v tb/tb_p25_final.v tb/tb_stress.v; do
|
| 7 |
+
echo "=== $tb ==="
|
| 8 |
+
# Extract module name from filename (e.g., tb/tb_p13a.v -> tb_p13a)
|
| 9 |
+
tb_mod=$(basename "$tb" .v)
|
| 10 |
+
iverilog -g2012 -DSIMULATION -s "$tb_mod" -o test_reg.vvp $RTL $tb 2>&1
|
| 11 |
+
if [ $? -eq 0 ]; then
|
| 12 |
+
timeout 120 vvp test_reg.vvp 2>&1 | grep -E "PASSED|FAILED|RESULTS|passed"
|
| 13 |
+
else
|
| 14 |
+
echo "COMPILE ERROR"
|
| 15 |
+
fi
|
| 16 |
+
echo ""
|
| 17 |
+
done
|