# OpenEnv Environment Specification # This file describes the Code Security Review environment for the Meta PyTorch OpenEnv Hackathon. # Metadata section details the environment's identity. name: code-security-review version: "1.0.0" description: > An RL environment for training AI agents to perform code security review. Agents analyze code snippets from production pull requests and identify bugs, vulnerabilities, and security issues. author: Inmodel Labs # Tasks section defines the core challenges in the environment. # Each task has a unique ID, name, description, and difficulty level. tasks: - id: python-off-by-one name: "Python Off-by-One Error" description: "Identify an off-by-one index error in a Python finance batch processor" difficulty: easy max_steps: 2 reward_range: [0.0, 1.0] - id: js-idor-auth name: "JavaScript IDOR Authorization Bypass" description: "Identify a horizontal privilege escalation (IDOR) in a Node.js REST profile endpoint" difficulty: medium max_steps: 2 reward_range: [0.0, 1.0] - id: python-pickle-deserialization name: "Python Pickle Deserialization" description: "Identify an insecure deserialization vulnerability using pickle in a background worker" difficulty: hard max_steps: 2 reward_range: [0.0, 1.0] # The Action space defines the format of the agent's response. # Each field is scored by the grader to provide partial progress signals. action_space: type: object description: > Two-phase action space. Phase 1: submit {"request_file": true} to unlock the code snippet (+0.20 reward). Phase 2: submit a full review JSON. properties: request_file: { type: boolean, description: "Phase 1: Request the hidden file contents" } bug_identified: { type: boolean, description: "Boolean: true if a bug exists" } bug_location: { type: string, description: "String: Pinpoint the bug's location in code" } bug_type: { type: string, description: "String: off-by-one | logic-error | insecure-deserialization | none" } bug_description: { type: string, description: "String: Detailed analysis of the vulnerability" } severity: { type: string, enum: [none, low, medium, high, critical], description: "String: none | low | medium | high | critical" } suggested_fix: { type: string, description: "String: How to fix the identified bug" } # The Observation space defines what the agent sees at each step. # It uses a structured context to help the agent understand the code's purpose. observation_space: type: object properties: task_id: { type: string, description: "Unique task identifier" } language: { type: string, description: "Source code language" } difficulty: { type: string, enum: [easy, medium, hard], description: "Task complexity (easy/medium/hard)" } code_snippet: { type: string, description: "The source code to be reviewed" } context: { type: string, description: "Real-world context (e.g., API description)" } pr_title: { type: string, description: "Pull Request title for additional intent context" } file_path: { type: string, description: "Relative path to the file in the repository" } # Reward structure for evaluating agent performance. reward: min: 0.0 max: 1.0 description: > Step 1 — File request: +0.20 (flat, always granted). Step 2 — Bug review: partial rewards for bug identification (0.20), correct bug type (0.20), precise location (0.10), description quality (0.25, keyword density), fix quality (0.15), correct severity (0.10). Episode total is clamped to [0.0, 1.0]. Grader penalizes keyword stuffing. endpoints: health: GET / reset: POST /reset step: POST /step state: GET /state tasks: GET /tasks