Spaces:

Bernadetta14
/

code-vulnerability-detector

Running

File size: 4,173 Bytes

import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Load model
MODEL_ID = "Bernadetta14/qwen2.5-coder-0.5b-vulnerability-detector"

print("Loading model...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.float32,
    device_map="cpu"
)
model.eval()
print("Model loaded!")

SYSTEM_PROMPT = "You are a code security expert. Analyze the given code for security vulnerabilities and provide a structured security report."

def analyze_code(code, lang):
    if not code.strip():
        return "Please enter some code to analyze."

    prompt = f"""<|im_start|>system
{SYSTEM_PROMPT}<|im_end|>
<|im_start|>user
Analyze this {lang} code for security vulnerabilities:

```{lang}
{code[:800]}
```

Provide a structured security report:<|im_end|>
<|im_start|>assistant
Vulnerability :"""

    inputs = tokenizer(prompt, return_tensors="pt")
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=300,
            do_sample=False,
            pad_token_id=tokenizer.eos_token_id
        )

    new_tokens = outputs[0][inputs["input_ids"].shape[1]:]
    result = tokenizer.decode(new_tokens, skip_special_tokens=True)
    return "Vulnerability :" + result

# Contoh code vulnerable
EXAMPLES = {
    "Python - SQL Injection": """\
def login(username, password):
    query = "SELECT * FROM users WHERE username='" + username + "' AND password='" + password + "'"
    result = db.execute(query)
    return result
""",
    "JavaScript - XSS": """\
function displayMessage(userInput) {
    document.getElementById('output').innerHTML = userInput;
}
""",
    "PHP - Command Injection": """\
<?php
$filename = $_GET['file'];
$output = shell_exec('cat ' . $filename);
echo $output;
?>
"""
}

with gr.Blocks(title="Code Vulnerability Detector", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # Code Vulnerability Detector
    
    Deteksi celah keamanan dalam kode secara otomatis menggunakan AI.
    Model akan menganalisis kode dan memberikan laporan terstruktur berisi
    jenis vulnerability, tingkat bahaya, penjelasan, dan saran perbaikan.
    
    **Model:** Qwen2.5-Coder-0.5B (fine-tuned on CyberNative Security Dataset)
    """)

    with gr.Row():
        with gr.Column():
            lang_input = gr.Dropdown(
                choices=["python", "javascript", "java", "c++", "c#", "php", "ruby", "swift", "go", "kotlin"],
                value="python",
                label="Programming Language"
            )
            code_input = gr.Code(
                label="Code to Analyze",
                language="python",
                lines=15,
                value=EXAMPLES["Python - SQL Injection"]
            )
            with gr.Row():
                example_btn = gr.Dropdown(
                    choices=list(EXAMPLES.keys()),
                    label="Load Example",
                    value="Python - SQL Injection"
                )
                analyze_btn = gr.Button("Analyze Code", variant="primary")

        with gr.Column():
            output = gr.Textbox(
                label="Security Report",
                lines=15,
                interactive=False
            )

    gr.Markdown("""
    ---
    ### Model Performance
    | Metric | Value |
    |--------|-------|
    | Accuracy | 62% |
    | Training Samples | 4,187 |
    | Supported Languages | 11 |
    | Model Size | 0.5B params |
    
    **Dataset:** CyberNative/Code_Vulnerability_Security_DPO  
    **Method:** LoRA fine-tuning via MLX-LM (Apple Silicon)
    """)

    # Load example
    def load_example(example_name):
        return EXAMPLES.get(example_name, "")

    example_btn.change(fn=load_example, inputs=example_btn, outputs=code_input)

    # Update code language highlight when lang changes
    lang_input.change(fn=lambda l: gr.Code(language=l if l in ["python", "javascript", "java"] else "python"), inputs=lang_input, outputs=code_input)

    analyze_btn.click(
        fn=analyze_code,
        inputs=[code_input, lang_input],
        outputs=output
    )

demo.launch()