Spaces:

SimranShaikh
/

code-review-env

Sleeping

File size: 6,995 Bytes

220b4a7

"""
Task definitions: Easy → Medium → Hard
Each task has: code snippet, context, ground truth bugs, and grading rubric.
"""

TASKS = {
    "easy_syntax": {
        "task_id": "easy_syntax",
        "task_name": "Python Syntax Error Detection",
        "difficulty": "easy",
        "language": "python",
        "max_steps": 5,
        "context": (
            "This Python function is supposed to calculate a discounted price. "
            "It should raise a ValueError if discount exceeds 100%, otherwise "
            "return the price after applying the discount. Find and fix any errors."
        ),
        "code_snippet": """\
def calculate_discount(price, discount_percent):
    if discount_percent > 100
        raise ValueError("Discount cannot exceed 100%")
    discount = price * (discount_percent / 100)
    return price - discount

result = calculate_discount(200, 15)
print(result)
""",
        "ground_truth": {
            "issue_type": "syntax_error",
            "bug_line": 2,
            "keywords": ["colon", "syntax", "if statement", "missing :", ":"],
            "fix_keywords": ["if discount_percent > 100:"],
            "description": "Missing colon at end of if statement on line 2",
        },
    },

    "medium_logic": {
        "task_id": "medium_logic",
        "task_name": "Logic Bug: Off-by-One in Palindrome Check",
        "difficulty": "medium",
        "language": "python",
        "max_steps": 8,
        "context": (
            "This function checks whether a given string is a palindrome "
            "(reads the same forwards and backwards, ignoring spaces and case). "
            "It passes some basic tests but fails on others. Find the logic bug and fix it."
        ),
        "code_snippet": """\
def is_palindrome(s: str) -> bool:
    s = s.lower().replace(" ", "")
    for i in range(len(s) // 2):
        if s[i] != s[-i]:   # Compare from both ends
            return False
    return True

# Expected: True for "racecar", "A man a plan a canal Panama"
# Expected: False for "hello", "world"
print(is_palindrome("racecar"))           # Should be True
print(is_palindrome("hello"))             # Should be False
print(is_palindrome("A man a plan a canal Panama"))  # Should be True
""",
        "ground_truth": {
            "issue_type": "logic_bug",
            "bug_line": 4,
            "keywords": [
                "off-by-one", "index", "-i", "-(i+1)", "s[-i]",
                "s[0]", "zero", "first character", "always equal"
            ],
            "fix_keywords": ["s[-(i+1)]", "s[-i-1]", "-(i+1)"],
            "description": (
                "s[-i] when i=0 evaluates to s[0] (the first character), "
                "so it always equals s[i] at i=0. Should be s[-(i+1)]."
            ),
            "test_cases": [
                {"input": "racecar", "expected": True},
                {"input": "hello", "expected": False},
                {"input": "amanaplanacanalpanama", "expected": True},
                {"input": "abba", "expected": True},
                {"input": "abc", "expected": False},
            ],
        },
    },

    "hard_security": {
        "task_id": "hard_security",
        "task_name": "Security Vulnerability: SQL Injection & Path Traversal",
        "difficulty": "hard",
        "language": "python",
        "max_steps": 10,
        "context": (
            "This is a user authentication module for a web application. "
            "It handles login and serves user-uploaded profile documents. "
            "Perform a thorough security review — identify ALL vulnerabilities "
            "and provide a fixed, secure version of the code."
        ),
        "code_snippet": """\
import sqlite3
import os


def authenticate_user(username: str, password: str) -> bool:
    \"\"\"Authenticate user against the database.\"\"\"
    conn = sqlite3.connect("users.db")
    cursor = conn.cursor()
    # Build query with user input directly
    query = (
        "SELECT * FROM users WHERE username='"
        + username
        + "' AND password='"
        + password
        + "'"
    )
    cursor.execute(query)
    result = cursor.fetchone()
    conn.close()
    return result is not None


def get_user_document(username: str, filename: str) -> str:
    \"\"\"Return contents of a user's uploaded document.\"\"\"
    base_dir = "/app/user_docs"
    filepath = os.path.join(base_dir, username, filename)
    with open(filepath, "r") as f:
        return f.read()


def hash_password(password: str) -> str:
    \"\"\"Hash password before storage.\"\"\"
    import hashlib
    return hashlib.md5(password.encode()).hexdigest()
""",
        "ground_truth": {
            "vulnerabilities": [
                {
                    "issue_type": "security_vulnerability",
                    "name": "SQL Injection",
                    "keywords": [
                        "sql injection", "injection", "parameterized",
                        "prepared statement", "user input", "string concatenation",
                        "sanitize", "placeholder", "?"
                    ],
                    "fix_keywords": [
                        "?", "parameterized", "cursor.execute(query, (username",
                        "execute(query, "
                    ],
                    "severity": "critical",
                },
                {
                    "issue_type": "security_vulnerability",
                    "name": "Path Traversal",
                    "keywords": [
                        "path traversal", "directory traversal", "../",
                        "os.path.abspath", "startswith", "realpath",
                        "sanitize", "filename", "escape"
                    ],
                    "fix_keywords": [
                        "abspath", "realpath", "startswith", "normpath"
                    ],
                    "severity": "high",
                },
                {
                    "issue_type": "security_vulnerability",
                    "name": "Weak Password Hashing (MD5)",
                    "keywords": [
                        "md5", "weak", "hash", "bcrypt", "argon2",
                        "sha256", "pbkdf2", "salt", "password hashing"
                    ],
                    "fix_keywords": [
                        "bcrypt", "argon2", "pbkdf2", "hashlib.sha256",
                        "passlib", "werkzeug"
                    ],
                    "severity": "high",
                },
            ],
        },
    },
}


def get_task(task_id: str) -> dict:
    if task_id not in TASKS:
        raise ValueError(f"Unknown task: {task_id}. Available: {list(TASKS.keys())}")
    return TASKS[task_id]


def list_tasks() -> list:
    return [
        {
            "task_id": t["task_id"],
            "task_name": t["task_name"],
            "difficulty": t["difficulty"],
            "language": t["language"],
            "max_steps": t["max_steps"],
        }
        for t in TASKS.values()
    ]