|
|
--- |
|
|
language: |
|
|
- en |
|
|
base_model: Qwen/Qwen3-8B |
|
|
tags: |
|
|
- chat |
|
|
library_name: transformers |
|
|
license: apache-2.0 |
|
|
--- |
|
|
|
|
|
# CodeV-SVA: Training Specialized LLMs for Hardware Assertion Generation via RTL-Grounded Bidirectional Data Synthesis |
|
|
|
|
|
<div align="center"> |
|
|
<a href="https://huggingface.co/wyt2000/CodeV-SVA-14B"><img src="https://img.shields.io/static/v1?label=Model&message=HuggingFace&color=yellow"></a>   |
|
|
<a href="https://github.com/wyt2000/CodeV-SVA"><img src="https://img.shields.io/static/v1?label=Code&message=Github&color=blue"></a>   |
|
|
</div> |
|
|
|
|
|
## Introduction |
|
|
|
|
|
We introduce CodeV-SVA, a family of large language models designed to translate natural-language verification properties into SystemVerilog Assertions (SVAs). |
|
|
|
|
|
Open-Source Plan: |
|
|
|
|
|
- Model ✓ |
|
|
- Evaluation code ✓ |
|
|
- Paper |
|
|
- Dataset |
|
|
- Data synthesis and training code |
|
|
|
|
|
## Evaluation Results |
|
|
|
|
|
| Model | | NL2SVA-Human | | | | NL2SVA-Machine | | |
|
|
| :-------------------- | :---------: | :----------: | :---------: | ---- | :---------: | :------------: | :---------: | |
|
|
| | Func.@1 | Func.@16 | Func.@32 | | Func.@1 | Func.@16 | Func.@32 | |
|
|
| | | | | | | | | |
|
|
| DeepSeek-R1-671B | <u>74.6</u> | **90.3** | <u>90.4</u> | | 81.0 | 93.3 | 94.3 | |
|
|
| GPT-5 | 71.8 | <u>90.2</u> | **92.7** | | 81.8 | 93.2 | 94.3 | |
|
|
| DeepSeek-V3.1-671B | 63.1 | 81.4 | 84.9 | | <u>83.8</u> | 92.9 | 93.6 | |
|
|
| GPT-4o | 64.1 | 75.2 | 78.1 | | 68.5 | 81.3 | 83.7 | |
|
|
| | | | | | | | | |
|
|
| RTLCoder-DS-v1.1-6.7B | 25.9 | 58.8 | 65.8 | | 21.7 | 54.8 | 60.8 | |
|
|
| CodeV-R1-Qwen-7B | 25.2 | 55.8 | 61.6 | | 37.4 | 76.6 | 83.0 | |
|
|
| | | | | | | | | |
|
|
| Qwen3-8B | 32.3 | 71.6 | 74.0 | | 46.1 | 88.0 | 90.5 | |
|
|
| Qwen3-14B | 61.6 | 86.1 | 87.7 | | 75.3 | 92.7 | 94.3 | |
|
|
| | | | | | | | | |
|
|
| SVACoder-no-think-8B | 65.8 | 84.4 | 86.3 | | 78.7 | 90.9 | 91.9 | |
|
|
| SVACoder-8B | 72.0 | 88.8 | <u>90.4</u> | | 83.5 | **96.3** | **97.2** | |
|
|
| SVACoder-14B | **75.8** | 89.4 | <u>90.4</u> | | **84.0** | <u>94.9</u> | <u>95.8</u> | |
|
|
|
|
|
See our GitHub [repo](https://github.com/wyt2000/CodeV-SVA/?tab=readme-ov-file#evaluation) for reproduction. |
|
|
|
|
|
## Models |
|
|
|
|
|
| Model | Download | |
|
|
| -------- | -------- | |
|
|
| CodeV-SVA-8B | [🤗HuggingFace](https://huggingface.co/wyt2000/CodeV-SVA-8B) | |
|
|
| CodeV-SVA-no-think-8B | [🤗HuggingFace](https://huggingface.co/wyt2000/CodeV-SVA-no-think-8B) | |
|
|
| CodeV-SVA-14B | [🤗HuggingFace](https://huggingface.co/wyt2000/CodeV-SVA-14B) | |
|
|
|
|
|
## Usage |
|
|
|
|
|
````python |
|
|
from vllm import LLM, SamplingParams |
|
|
from transformers import AutoTokenizer |
|
|
|
|
|
MODEL_DIR = "path/to/CodeV-SVA/" |
|
|
|
|
|
def get_nl2sva_prompt(testbench: str, problem: str, reset_signal=None) -> str: |
|
|
prompt = "" |
|
|
prompt += "Here is the testbench to perform your translation:\n" |
|
|
prompt += testbench |
|
|
prompt += "\nQuestion: Create a SVA assertion that checks: " |
|
|
prompt += problem + "\n" |
|
|
|
|
|
if reset_signal is not None: |
|
|
prompt += f"You should use `{reset_signal}` as the disable condition signal. " |
|
|
example = f'''asrt: assert property (@(posedge clk) disable iff ({reset_signal}) |
|
|
(a && b) != 1'b1 |
|
|
);''' |
|
|
else: |
|
|
example = '''asrt: assert property (@(posedge clk) |
|
|
(a && b) != 1'b1 |
|
|
);''' |
|
|
prompt += f"""Do not add code to output an error message string. |
|
|
Enclose your SVA code with ```systemverilog and ```. Only output the code snippet and do NOT output anything else. |
|
|
For example, |
|
|
```systemverilog |
|
|
{example} |
|
|
``` |
|
|
Answer:""" |
|
|
return prompt |
|
|
|
|
|
if __name__ == "__main__": |
|
|
system_prompt = "You are an AI assistant tasked with formal verification of register transfer level (RTL) designs.\nYour job is to translate a description of an assertion to concrete SystemVerilog Assertion (SVA) implementation.\n" |
|
|
testbench = "module fifo_1r1w_tb (\n clk,\n reset_,\n wr_vld,\n wr_data,\n wr_ready,\n rd_vld,\n rd_data,\n rd_ready\n );\n \n parameter FIFO_DEPTH = 4;\n parameter DATA_WIDTH = 1;\n\nlocalparam FIFO_DEPTH_log2 = $clog2(FIFO_DEPTH); \nlocalparam DATA_WIDTH_log2 = $clog2(DATA_WIDTH); \n\n input clk;\n input reset_;\n input wr_vld;\n input [DATA_WIDTH-1:0] wr_data;\n input wr_ready;\n input rd_vld;\n input [DATA_WIDTH-1:0] rd_data;\n input rd_ready;\n\nwire wr_push;\nwire rd_pop;\n\nwire tb_reset;\nassign tb_reset = (reset_ == 1'b0);\n\nwire fifo_full;\nassign wr_push = wr_vld && wr_ready;\nassign rd_pop = rd_vld && rd_ready; \n\nreg [DATA_WIDTH-1:0] fifo_array [FIFO_DEPTH-1:0]; //fifo array - shift register\nreg [FIFO_DEPTH_log2-1:0] fifo_rd_ptr; //fifo array - rd_ptr\nwire actual_fifo_pop; // actual pop == pop\nreg fifo_empty; // fifo empty\nwire [DATA_WIDTH-1:0] fifo_out_data; // dout\n\n// ---- shift register code start ----\nalways @(posedge clk) begin\n if (!reset_) fifo_array[0] <= 'd0;\n else if (wr_push) begin\n fifo_array[0] <= wr_data;\n end else fifo_array[0] <= fifo_array[0];\nend\nfor (genvar i = 1; i < FIFO_DEPTH; i++ ) begin : loop_id\n always @(posedge clk) begin\n if (!reset_) fifo_array[i] <= 'd0;\n else if (wr_push) begin\n fifo_array[i] <= fifo_array[i-1];\n end else fifo_array[i] <= fifo_array[i];\n end\nend\n\n// ---- read pointer/fifo empty code start ----\nalways @(posedge clk) begin\n if (!reset_) begin\n fifo_rd_ptr <= 'd0;\n end else if (wr_push && fifo_empty) begin\n fifo_rd_ptr <= 'd0;\n end else if (rd_pop && !fifo_empty && (fifo_rd_ptr == 'd0)) begin\n fifo_rd_ptr <= 'd0;\n end else begin\n fifo_rd_ptr <= fifo_rd_ptr + wr_push - rd_pop;\n end\n if (!reset_) begin\n fifo_empty <= 'd1;\n end else if (rd_pop && !fifo_empty && (fifo_rd_ptr == 'd0) && !wr_push) begin\n fifo_empty <= 'd1;\n end else if ((fifo_rd_ptr != 'd0) || wr_push && !rd_pop) begin\n fifo_empty <= 'd0;\n end\nend\n// ---- fifo full and dout code start ----\nassign fifo_full = (fifo_rd_ptr == (FIFO_DEPTH - 1)) && !fifo_empty;\nassign fifo_out_data = fifo_array[fifo_rd_ptr];\n\nendmodule" |
|
|
problem = "When there is a write push to the FIFO, data is eventually popped. Use the signals 'rd_pop' and 'wr_push'." |
|
|
reset_signal = "tb_reset" |
|
|
user_prompt = get_nl2sva_prompt(testbench, problem, reset_signal) |
|
|
|
|
|
dialog = [ |
|
|
{"role": "system", "content": system_prompt}, |
|
|
{"role": "user", "content": user_prompt} |
|
|
] |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR) |
|
|
prompt = tokenizer.apply_chat_template(dialog, tokenize=False, add_generation_prompt=True) |
|
|
print(f"### Prompt:\n{prompt}") |
|
|
|
|
|
model = LLM( |
|
|
MODEL_DIR, |
|
|
tensor_parallel_size=4 |
|
|
) |
|
|
|
|
|
sampling_params = SamplingParams( |
|
|
temperature=0.8, |
|
|
top_p=0.95, |
|
|
max_tokens=32768, |
|
|
n=1 |
|
|
) |
|
|
|
|
|
responses = model.generate(prompt, sampling_params) |
|
|
formal_statements = [] |
|
|
for i, output in enumerate(responses[0].outputs): |
|
|
response_text = output.text |
|
|
print(f"### Response:\n{response_text}") |
|
|
```` |
|
|
|
|
|
## Citation |
|
|
|
|
|
```latex |
|
|
@misc{CodeV-SVA, |
|
|
title={CodeV-SVA: Training Specialized LLMs for Hardware Assertion Generation via RTL-Grounded Bidirectional Data Synthesis}, |
|
|
author={Yutong Wu and Chenrui Cao and Pengwei Jin and Di Huang and Rui Zhang and Xishan Zhang and Zidong Du and Qi Guo and Xing Hu}, |
|
|
year={2025}, |
|
|
howpublished={\url{https://huggingface.co/wyt2000/CodeV-SVA-14B}}, |
|
|
} |
|
|
``` |