version: "1.0.0" name: "agentorg-codereview" owners: ["Arsh Verma", "Divyansh Rawat"] description: > AI Senior Code Reviewer evaluation environment for CodeLens. Benchmarks agents on 30 synthetic pull requests across Bug Detection, Security Audit, and Architectural Review tasks. entry_point: "app:app" dashboard: "/dashboard" api_docs: "/docs" license: "MIT" tags: ["code-review", "agentic-eval", "security-audit", "bug-detection"] contact: "Arsh Verma " tasks: - id: "bug_detection" description: "Identify logical errors and edge cases in Python code" max_steps: 10 scenarios: 10 difficulty_distribution: easy: 2 medium: 6 hard: 2 - id: "security_audit" description: "Detect OWASP Top 10 vulnerabilities in Python code" max_steps: 15 scenarios: 10 difficulty_distribution: easy: 1 medium: 7 hard: 2 - id: "architectural_review" description: "Evaluate design patterns, coupling, and system constraints" max_steps: 20 scenarios: 10 difficulty_distribution: easy: 0 medium: 7 hard: 3 environment: noise_budget: 5 line_tolerance_bug: 3 line_tolerance_arch: 5 keyword_match: "any" # agent body must contain ANY listed keyword case_sensitive: false grading: type: "deterministic" bug_detection: coverage_weight: 0.4 avg_issue_score_weight: 0.6 issue_score: keyword_weight: 0.5 severity_weight: 0.5 false_positive_penalty: 0.1 security_audit: formula: "avg_issue_score" issue_score: severity_weight: 0.7 keyword_weight: 0.3 severity_scale: critical: 4 high: 3 medium: 2 low: 1 info: 0 severity_penalty_per_level: 0.3 architectural_review: issue_detection_weight: 0.6 verdict_weight: 0.2 quality_weight: 0.2 quality_min_body_length: 20 quality_max_body_length: 200