Spaces:
Sleeping
Sleeping
| spec_version: 1 | |
| name: code_review_env | |
| version: "2.1.0" | |
| description: > | |
| A code review and security audit RL environment for training AI agents. | |
| The agent identifies bugs, security vulnerabilities, and performance issues | |
| across 7 tasks of increasing difficulty (easy β medium β medium-hard β hard). | |
| Features: PBRS reward shaping, graduated near-miss rewards, flood protection, | |
| CAMRL curriculum with task replay, VL return normalization, GRPO batch endpoint, | |
| diversity/exploration bonuses, and cross-language tasks (Python + JavaScript). | |
| type: space | |
| runtime: fastapi | |
| app: server.app:app | |
| entry_point: server | |
| port: 7860 | |
| tasks: | |
| - id: bug-detection | |
| difficulty: easy | |
| language: python | |
| num_issues: 3 | |
| max_steps: 15 | |
| - id: security-audit | |
| difficulty: medium | |
| language: python | |
| num_issues: 7 | |
| max_steps: 20 | |
| - id: async-review | |
| difficulty: medium-hard | |
| language: python | |
| num_issues: 6 | |
| max_steps: 20 | |
| - id: data-pipeline | |
| difficulty: hard | |
| language: python | |
| num_issues: 7 | |
| max_steps: 25 | |
| - id: comprehensive-review | |
| difficulty: hard | |
| language: python | |
| num_issues: 9 | |
| max_steps: 30 | |
| - id: api-security | |
| difficulty: hard | |
| language: python | |
| num_issues: 8 | |
| max_steps: 25 | |
| - id: js-security | |
| difficulty: hard | |
| language: javascript | |
| num_issues: 8 | |
| max_steps: 25 | |
| reward_design: | |
| terminal: "0.70 * F1 + 0.30 * severity_accuracy" | |
| shaping: "PBRS (Ng et al. 1999): phi(s) = (tp/total_gt) * 0.5" | |
| near_miss: "exponential decay: 0.10 * exp(-0.6 * (line_diff - 2)), requires compatible type" | |
| diversity_bonus: "+0.02 for first TP in a new issue category" | |
| exploration_bonus: "+0.01 for first TP in a new file (multi-file tasks)" | |
| flood_protection: "escalating FP penalty after 3rd false positive" | |
| normalization: "VL Norm (2025): normalized_return = cumulative / steps_used" | |
| training: | |
| grpo_endpoint: "/grpo_batch β group-relative advantages A_i = (r_i - mean) / std" | |
| curriculum: "CAMRL with 20% task replay to prevent forgetting" | |
| rollout: "/trl_rollout β TRL GRPOTrainer compatible batch rollout" | |