File size: 5,658 Bytes
8f63a20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f10bb94
8f63a20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
"""
Nimo's Coder Agent v3 - Security Enhanced

A fine-tuned LLM for code generation and security vulnerability detection.
"""

import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel

# Configuration - V3 Security Enhanced
MODEL_ID = "CaptainNimo/nimos-coder-agent-v3"
BASE_MODEL_ID = "Qwen/Qwen2.5-Coder-0.5B-Instruct"

# Global variables
model = None
tokenizer = None


def load_model():
    """Load the fine-tuned model."""
    global model, tokenizer

    print("Loading tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token

    print("Loading base model...")
    if torch.cuda.is_available():
        bnb_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.bfloat16,
        )
        base_model = AutoModelForCausalLM.from_pretrained(
            BASE_MODEL_ID,
            quantization_config=bnb_config,
            device_map="auto",
            trust_remote_code=True,
        )
    else:
        base_model = AutoModelForCausalLM.from_pretrained(
            BASE_MODEL_ID,
            torch_dtype=torch.float32,
            device_map="cpu",
            trust_remote_code=True,
        )

    print("Loading fine-tuned adapter...")
    model = PeftModel.from_pretrained(base_model, MODEL_ID)
    model.eval()

    print("Model loaded successfully!")
    return model, tokenizer


def generate_code(instruction: str, context: str = "", max_tokens: int = 256, temperature: float = 0.7):
    """Generate code from instruction."""
    global model, tokenizer

    if model is None:
        return "Model is loading, please wait..."

    # Build prompt
    if context.strip():
        prompt = f"""### Instruction:
{instruction}

### Input:
{context}

### Response:
"""
    else:
        prompt = f"""### Instruction:
{instruction}

### Response:
"""

    # Generate
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            temperature=temperature,
            top_p=0.9,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    if "### Response:" in response:
        response = response.split("### Response:")[-1].strip()

    return response


# Example prompts - including security examples
EXAMPLES = [
    # Security review examples (NEW in v3!)
    ["Review this code for security vulnerabilities. Is it safe?", "import os\nuser_input = input('Enter filename: ')\nos.system(f'cat {user_input}')"],
    ["Is this code secure?", 'query = f"SELECT * FROM users WHERE id = {user_id}"'],
    ["Fix the security vulnerabilities in this code", "import os\nos.system(f'rm {filename}')"],
    # General coding
    ["Write a Python function to check if a number is prime", ""],
    ["Create a JavaScript function to debounce API calls", ""],
    ["Write a SQL query to find the top 5 customers by sales", ""],
    # Code improvement
    ["Add error handling to this function", "def divide(a, b):\n    return a / b"],
]

# Load model at startup
print("Initializing Nimo's Coder Agent v3 - Security Enhanced...")
load_model()

# Create interface
with gr.Blocks(title="Nimo's Coder Agent v3", theme=gr.themes.Soft()) as demo:
    gr.Markdown(
        """
        # Nimo's Coder Agent v3 - Security Enhanced

        A fine-tuned LLM for **code generation** and **security vulnerability detection**.

        **What's new in v3:**
        - Detects command injection, SQL injection vulnerabilities
        - Trained on 25k+ examples including security datasets
        - 81% token accuracy

        **Model**: Qwen2.5-Coder-0.5B + QLoRA | **Training**: CodeAlpaca + Security DPO + CrossVul

        [GitHub](https://github.com/nihalmorshed/nimos-coder-v3-security) |
        [Model](https://huggingface.co/CaptainNimo/nimos-coder-agent-v3) |
        [v2 (Previous)](https://huggingface.co/CaptainNimo/nimos-coder-agent-v2)

        ---
        **Try the security review!** Paste vulnerable code and ask "Is this code safe?"
        """
    )

    with gr.Row():
        with gr.Column():
            instruction = gr.Textbox(
                label="What do you need?",
                placeholder="e.g., Review this code for security vulnerabilities...",
                lines=2
            )
            context = gr.Textbox(
                label="Code to Review/Context (optional)",
                placeholder="Paste code here for security review, debugging, or refactoring...",
                lines=6
            )
            with gr.Row():
                max_tokens = gr.Slider(64, 512, value=256, step=32, label="Max Length")
                temperature = gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Creativity")

            btn = gr.Button("Generate / Review", variant="primary")

        with gr.Column():
            output = gr.Textbox(label="Response", lines=18)

    gr.Examples(examples=EXAMPLES, inputs=[instruction, context])

    btn.click(generate_code, inputs=[instruction, context, max_tokens, temperature], outputs=output)

    gr.Markdown(
        """
        ---
        **Note:** While v3 is better at detecting vulnerabilities than v2, always have security-critical code reviewed by experts.

        *Fine-tuned by Nimo using QLoRA on free Google Colab T4 GPU (2.8 hours)*
        """
    )

demo.launch()