File size: 4,173 Bytes
e932edf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b933169
e932edf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b933169
e932edf
 
 
 
 
 
 
 
 
 
 
 
 
b933169
e932edf
 
b933169
e932edf
 
 
 
 
 
 
b933169
e932edf
 
b933169
e932edf
 
 
b933169
e932edf
 
 
 
 
 
b933169
e932edf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Load model
MODEL_ID = "Bernadetta14/qwen2.5-coder-0.5b-vulnerability-detector"

print("Loading model...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.float32,
    device_map="cpu"
)
model.eval()
print("Model loaded!")

SYSTEM_PROMPT = "You are a code security expert. Analyze the given code for security vulnerabilities and provide a structured security report."

def analyze_code(code, lang):
    if not code.strip():
        return "Please enter some code to analyze."

    prompt = f"""<|im_start|>system
{SYSTEM_PROMPT}<|im_end|>
<|im_start|>user
Analyze this {lang} code for security vulnerabilities:

```{lang}
{code[:800]}
```

Provide a structured security report:<|im_end|>
<|im_start|>assistant
Vulnerability :"""

    inputs = tokenizer(prompt, return_tensors="pt")
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=300,
            do_sample=False,
            pad_token_id=tokenizer.eos_token_id
        )

    new_tokens = outputs[0][inputs["input_ids"].shape[1]:]
    result = tokenizer.decode(new_tokens, skip_special_tokens=True)
    return "Vulnerability :" + result

# Contoh code vulnerable
EXAMPLES = {
    "Python - SQL Injection": """\
def login(username, password):
    query = "SELECT * FROM users WHERE username='" + username + "' AND password='" + password + "'"
    result = db.execute(query)
    return result
""",
    "JavaScript - XSS": """\
function displayMessage(userInput) {
    document.getElementById('output').innerHTML = userInput;
}
""",
    "PHP - Command Injection": """\
<?php
$filename = $_GET['file'];
$output = shell_exec('cat ' . $filename);
echo $output;
?>
"""
}

with gr.Blocks(title="Code Vulnerability Detector", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # Code Vulnerability Detector
    
    Deteksi celah keamanan dalam kode secara otomatis menggunakan AI.
    Model akan menganalisis kode dan memberikan laporan terstruktur berisi
    jenis vulnerability, tingkat bahaya, penjelasan, dan saran perbaikan.
    
    **Model:** Qwen2.5-Coder-0.5B (fine-tuned on CyberNative Security Dataset)
    """)

    with gr.Row():
        with gr.Column():
            lang_input = gr.Dropdown(
                choices=["python", "javascript", "java", "c++", "c#", "php", "ruby", "swift", "go", "kotlin"],
                value="python",
                label="Programming Language"
            )
            code_input = gr.Code(
                label="Code to Analyze",
                language="python",
                lines=15,
                value=EXAMPLES["Python - SQL Injection"]
            )
            with gr.Row():
                example_btn = gr.Dropdown(
                    choices=list(EXAMPLES.keys()),
                    label="Load Example",
                    value="Python - SQL Injection"
                )
                analyze_btn = gr.Button("Analyze Code", variant="primary")

        with gr.Column():
            output = gr.Textbox(
                label="Security Report",
                lines=15,
                interactive=False
            )

    gr.Markdown("""
    ---
    ### Model Performance
    | Metric | Value |
    |--------|-------|
    | Accuracy | 62% |
    | Training Samples | 4,187 |
    | Supported Languages | 11 |
    | Model Size | 0.5B params |
    
    **Dataset:** CyberNative/Code_Vulnerability_Security_DPO  
    **Method:** LoRA fine-tuning via MLX-LM (Apple Silicon)
    """)

    # Load example
    def load_example(example_name):
        return EXAMPLES.get(example_name, "")

    example_btn.change(fn=load_example, inputs=example_btn, outputs=code_input)

    # Update code language highlight when lang changes
    lang_input.change(fn=lambda l: gr.Code(language=l if l in ["python", "javascript", "java"] else "python"), inputs=lang_input, outputs=code_input)

    analyze_btn.click(
        fn=analyze_code,
        inputs=[code_input, lang_input],
        outputs=output
    )

demo.launch()