File size: 8,791 Bytes
aece2d5 0052935 aece2d5 0052935 aece2d5 5b69dae 56b9fd6 aece2d5 0052935 aece2d5 0052935 aece2d5 0052935 aece2d5 0052935 aece2d5 56b9fd6 aece2d5 0052935 d76569a 56b9fd6 d76569a aece2d5 0052935 aece2d5 f3b6da4 aece2d5 0052935 aece2d5 0052935 d228718 918ee08 d228718 0052935 aece2d5 0052935 aece2d5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
---
language:
- en
base_model: Qwen/Qwen3-8B
tags:
- chat
library_name: transformers
license: apache-2.0
---
# CodeV-SVA: Training Specialized LLMs for Hardware Assertion Generation via RTL-Grounded Bidirectional Data Synthesis
<div align="center">
<a href="https://huggingface.co/wyt2000/CodeV-SVA-14B"><img src="https://img.shields.io/static/v1?label=Model&message=HuggingFace&color=yellow"></a>  
<a href="https://github.com/wyt2000/CodeV-SVA"><img src="https://img.shields.io/static/v1?label=Code&message=Github&color=blue"></a>  
</div>
## Introduction
We introduce CodeV-SVA, a family of large language models designed to translate natural-language verification properties into SystemVerilog Assertions (SVAs).
Open-Source Plan:
- Model ✓
- Evaluation code ✓
- Paper
- Dataset
- Data synthesis and training code
## Evaluation Results
| Model | | NL2SVA-Human | | | | NL2SVA-Machine | |
| :-------------------- | :---------: | :----------: | :---------: | ---- | :---------: | :------------: | :---------: |
| | Func.@1 | Func.@16 | Func.@32 | | Func.@1 | Func.@16 | Func.@32 |
| | | | | | | | |
| DeepSeek-R1-671B | <u>74.6</u> | **90.3** | <u>90.4</u> | | 81.0 | 93.3 | 94.3 |
| GPT-5 | 71.8 | <u>90.2</u> | **92.7** | | 81.8 | 93.2 | 94.3 |
| DeepSeek-V3.1-671B | 63.1 | 81.4 | 84.9 | | <u>83.8</u> | 92.9 | 93.6 |
| GPT-4o | 64.1 | 75.2 | 78.1 | | 68.5 | 81.3 | 83.7 |
| | | | | | | | |
| RTLCoder-DS-v1.1-6.7B | 25.9 | 58.8 | 65.8 | | 21.7 | 54.8 | 60.8 |
| CodeV-R1-Qwen-7B | 25.2 | 55.8 | 61.6 | | 37.4 | 76.6 | 83.0 |
| | | | | | | | |
| Qwen3-8B | 32.3 | 71.6 | 74.0 | | 46.1 | 88.0 | 90.5 |
| Qwen3-14B | 61.6 | 86.1 | 87.7 | | 75.3 | 92.7 | 94.3 |
| | | | | | | | |
| SVACoder-no-think-8B | 65.8 | 84.4 | 86.3 | | 78.7 | 90.9 | 91.9 |
| SVACoder-8B | 72.0 | 88.8 | <u>90.4</u> | | 83.5 | **96.3** | **97.2** |
| SVACoder-14B | **75.8** | 89.4 | <u>90.4</u> | | **84.0** | <u>94.9</u> | <u>95.8</u> |
See our GitHub [repo](https://github.com/wyt2000/CodeV-SVA/?tab=readme-ov-file#evaluation) for reproduction.
## Models
| Model | Download |
| -------- | -------- |
| CodeV-SVA-8B | [🤗HuggingFace](https://huggingface.co/wyt2000/CodeV-SVA-8B) |
| CodeV-SVA-no-think-8B | [🤗HuggingFace](https://huggingface.co/wyt2000/CodeV-SVA-no-think-8B) |
| CodeV-SVA-14B | [🤗HuggingFace](https://huggingface.co/wyt2000/CodeV-SVA-14B) |
## Usage
````python
from vllm import LLM, SamplingParams
from transformers import AutoTokenizer
MODEL_DIR = "path/to/CodeV-SVA/"
def get_nl2sva_prompt(testbench: str, problem: str, reset_signal=None) -> str:
prompt = ""
prompt += "Here is the testbench to perform your translation:\n"
prompt += testbench
prompt += "\nQuestion: Create a SVA assertion that checks: "
prompt += problem + "\n"
if reset_signal is not None:
prompt += f"You should use `{reset_signal}` as the disable condition signal. "
example = f'''asrt: assert property (@(posedge clk) disable iff ({reset_signal})
(a && b) != 1'b1
);'''
else:
example = '''asrt: assert property (@(posedge clk)
(a && b) != 1'b1
);'''
prompt += f"""Do not add code to output an error message string.
Enclose your SVA code with ```systemverilog and ```. Only output the code snippet and do NOT output anything else.
For example,
```systemverilog
{example}
```
Answer:"""
return prompt
if __name__ == "__main__":
system_prompt = "You are an AI assistant tasked with formal verification of register transfer level (RTL) designs.\nYour job is to translate a description of an assertion to concrete SystemVerilog Assertion (SVA) implementation.\n"
testbench = "module fifo_1r1w_tb (\n clk,\n reset_,\n wr_vld,\n wr_data,\n wr_ready,\n rd_vld,\n rd_data,\n rd_ready\n );\n \n parameter FIFO_DEPTH = 4;\n parameter DATA_WIDTH = 1;\n\nlocalparam FIFO_DEPTH_log2 = $clog2(FIFO_DEPTH); \nlocalparam DATA_WIDTH_log2 = $clog2(DATA_WIDTH); \n\n input clk;\n input reset_;\n input wr_vld;\n input [DATA_WIDTH-1:0] wr_data;\n input wr_ready;\n input rd_vld;\n input [DATA_WIDTH-1:0] rd_data;\n input rd_ready;\n\nwire wr_push;\nwire rd_pop;\n\nwire tb_reset;\nassign tb_reset = (reset_ == 1'b0);\n\nwire fifo_full;\nassign wr_push = wr_vld && wr_ready;\nassign rd_pop = rd_vld && rd_ready; \n\nreg [DATA_WIDTH-1:0] fifo_array [FIFO_DEPTH-1:0]; //fifo array - shift register\nreg [FIFO_DEPTH_log2-1:0] fifo_rd_ptr; //fifo array - rd_ptr\nwire actual_fifo_pop; // actual pop == pop\nreg fifo_empty; // fifo empty\nwire [DATA_WIDTH-1:0] fifo_out_data; // dout\n\n// ---- shift register code start ----\nalways @(posedge clk) begin\n if (!reset_) fifo_array[0] <= 'd0;\n else if (wr_push) begin\n fifo_array[0] <= wr_data;\n end else fifo_array[0] <= fifo_array[0];\nend\nfor (genvar i = 1; i < FIFO_DEPTH; i++ ) begin : loop_id\n always @(posedge clk) begin\n if (!reset_) fifo_array[i] <= 'd0;\n else if (wr_push) begin\n fifo_array[i] <= fifo_array[i-1];\n end else fifo_array[i] <= fifo_array[i];\n end\nend\n\n// ---- read pointer/fifo empty code start ----\nalways @(posedge clk) begin\n if (!reset_) begin\n fifo_rd_ptr <= 'd0;\n end else if (wr_push && fifo_empty) begin\n fifo_rd_ptr <= 'd0;\n end else if (rd_pop && !fifo_empty && (fifo_rd_ptr == 'd0)) begin\n fifo_rd_ptr <= 'd0;\n end else begin\n fifo_rd_ptr <= fifo_rd_ptr + wr_push - rd_pop;\n end\n if (!reset_) begin\n fifo_empty <= 'd1;\n end else if (rd_pop && !fifo_empty && (fifo_rd_ptr == 'd0) && !wr_push) begin\n fifo_empty <= 'd1;\n end else if ((fifo_rd_ptr != 'd0) || wr_push && !rd_pop) begin\n fifo_empty <= 'd0;\n end\nend\n// ---- fifo full and dout code start ----\nassign fifo_full = (fifo_rd_ptr == (FIFO_DEPTH - 1)) && !fifo_empty;\nassign fifo_out_data = fifo_array[fifo_rd_ptr];\n\nendmodule"
problem = "When there is a write push to the FIFO, data is eventually popped. Use the signals 'rd_pop' and 'wr_push'."
reset_signal = "tb_reset"
user_prompt = get_nl2sva_prompt(testbench, problem, reset_signal)
dialog = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]
tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
prompt = tokenizer.apply_chat_template(dialog, tokenize=False, add_generation_prompt=True)
print(f"### Prompt:\n{prompt}")
model = LLM(
MODEL_DIR,
tensor_parallel_size=4
)
sampling_params = SamplingParams(
temperature=0.8,
top_p=0.95,
max_tokens=32768,
n=1
)
responses = model.generate(prompt, sampling_params)
formal_statements = []
for i, output in enumerate(responses[0].outputs):
response_text = output.text
print(f"### Response:\n{response_text}")
````
## Citation
```latex
@misc{CodeV-SVA,
title={CodeV-SVA: Training Specialized LLMs for Hardware Assertion Generation via RTL-Grounded Bidirectional Data Synthesis},
author={Yutong Wu and Chenrui Cao and Pengwei Jin and Di Huang and Rui Zhang and Xishan Zhang and Zidong Du and Qi Guo and Xing Hu},
year={2025},
howpublished={\url{https://huggingface.co/wyt2000/CodeV-SVA-14B}},
}
``` |