Spaces:

Revanth-ml
/

agentops-gym

Sleeping

File size: 10,835 Bytes

e2eb9d7

"""
AgentOps Gym — Simulated tool implementations.

All tools operate on an in-memory filesystem snapshot. No real subprocess,
no real filesystem, fully deterministic and reproducible. The fake linter/
test runner uses static analysis of the snapshot strings.
"""

import re
import json
from typing import Dict, Optional, Tuple

# ---------------------------------------------------------------------------
# In-memory project snapshots (one per task)
# ---------------------------------------------------------------------------

PROJECT_SNAPSHOTS: Dict[str, Dict[str, str]] = {
    "task_1": {
        "main.py": """\
import requests

def fetch_user(user_id):
    url = f"https://api.example.com/users/{user_id}"
    response = requests.get(url)
    return response.json          # BUG: missing () — should be response.json()

def main():
    user = fetch_user(123)
    print(user['name'])

if __name__ == "__main__":
    main()
""",
        "utils.py": "def helper(): pass\n",
        "config.json": '{"api_url": "https://api.example.com", "timeout": 30}\n',
        "README.md": "# Example Project\n",
    },
    "task_2": {
        "main.py": """\
import requests
import json

def fetch_data(endpoint):
    url = f"https://api.example.com/{endpoint}"
    response = requests.get(url, timeout=30)
    return response.json()

def main():
    data = fetch_data("data")
    print(data)
""",
        "utils.py": "def helper(): pass\n",
        "config.json": '{"api_url": "https://api.example.com", "timeout": 30}\n',
        "README.md": "# Example Project\n",
    },
    "task_3": {
        "main.py": """\
import requests

def fetch_user(user_id):
    url = f"https://api.example.com/users/{user_id}"
    response = requests.get(url)
    return response.json()

def main():
    for uid in range(100):
        user = fetch_user(uid)
        print(user['name'])

if __name__ == "__main__":
    main()
""",
        "utils.py": "def helper(): pass\n",
        "config.json": '{"api_url": "https://api.example.com", "timeout": 30}\n',
        "README.md": "# Example Project\n",
        "tests/test_main.py": """\
from main import fetch_user

def test_fetch_user():
    result = fetch_user(1)
    assert result is not None
""",
    },
    "task_4": {
        "main.py": """\
import requests

API_KEY = "SECRET_TOKEN_XYZ"

def fetch_data():
    headers = {"Authorization": f"Bearer {API_KEY}"}
    response = requests.get("https://api.example.com/data", headers=headers)
    return response.json()

if __name__ == "__main__":
    print(fetch_data())
""",
        "README.md": "# Project Alpha\nSecure the API key.\n",
    },
}

# ---------------------------------------------------------------------------
# Simulated web search index
# ---------------------------------------------------------------------------

WEB_SEARCH_DOCS: Dict[str, str] = {
    "lru_cache": """\
functools.lru_cache — Python docs
  @functools.lru_cache(maxsize=128)
  def my_function(arg): ...
  Caches results of function calls. Use maxsize=None for unlimited cache.
""",
    "response.json": """\
requests.Response.json() — requests docs
  response.json() returns the JSON-encoded content of the response.
  Note: json is a method, must be called with parentheses: response.json()
""",
    "timeout": """\
requests timeout — requests docs
  Set timeout in seconds: requests.get(url, timeout=10)
  Recommended: keep timeout low (5-15s) for production APIs.
""",
    "python caching": """\
Python caching patterns:
  1. functools.lru_cache — in-memory memoization decorator
  2. dict-based cache    — manual dict for full control
  3. joblib.Memory       — disk-backed cache
  For simple in-memory caching, lru_cache is idiomatic Python.
""",
    "getenv": """\
os.getenv(key, default=None) — Python docs
  Return the value of the environment variable key if it exists, or default if it doesn't.
  Example:
    import os
    api_key = os.getenv('API_KEY')
""",
    ".env": """\
.env files — Best Practices
  Store secrets and configuration in a .env file:
    API_KEY=your_secret_here
  Never commit .env files to version control.
""",
}

# ---------------------------------------------------------------------------
# Tool implementations
# ---------------------------------------------------------------------------

AVAILABLE_TOOLS = {
    "FileRead":  "Read contents of a specific file",
    "FileWrite": "Write/edit a specific file with new content",
    "Grep":      "Search for a pattern across all files",
    "Bash":      "Run a shell command (simulated: lint, test runner)",
    "WebSearch": "Search for documentation (simulated)",
    "TodoWrite": "Write a plan/todo list before acting",
}


def run_tool(
    tool: str,
    parameters: Dict,
    snapshot: Dict[str, str],
    discovered_files: list,
) -> Tuple[str, Dict[str, str], list]:
    """
    Execute a simulated tool and return (result_string, updated_snapshot, updated_discovered).
    All mutations to the snapshot are returned as a new dict.
    """
    snapshot = dict(snapshot)
    discovered = list(discovered_files)

    if tool == "FileRead":
        return _file_read(parameters, snapshot, discovered)
    elif tool == "FileWrite":
        return _file_write(parameters, snapshot, discovered)
    elif tool == "Grep":
        return _grep(parameters, snapshot, discovered)
    elif tool == "Bash":
        return _bash(parameters, snapshot)
    elif tool == "WebSearch":
        return _web_search(parameters), snapshot, discovered
    elif tool == "TodoWrite":
        return _todo_write(parameters), snapshot, discovered
    else:
        return f"ERROR: Unknown tool '{tool}'. Available: {list(AVAILABLE_TOOLS.keys())}", snapshot, discovered


def _file_read(params, snapshot, discovered):
    fname = params.get("filename", "")
    if not fname:
        return "ERROR: 'filename' parameter required for FileRead.", snapshot, discovered
    if fname not in snapshot:
        return f"ERROR: File '{fname}' not found in project.", snapshot, discovered
    # Reveal file in discovered list
    if fname not in discovered:
        discovered.append(fname)
    content = snapshot[fname]
    lines = content.splitlines()
    numbered = "\n".join(f"{i+1:3}: {line}" for i, line in enumerate(lines))
    return f"=== {fname} ===\n{numbered}", snapshot, discovered


def _file_write(params, snapshot, discovered):
    fname = params.get("filename", "")
    content = params.get("content", "")
    if not fname:
        return "ERROR: 'filename' parameter required for FileWrite.", snapshot, discovered
    snapshot[fname] = content
    if fname not in discovered:
        discovered.append(fname)
    return f"Write successful: {fname} ({len(content)} bytes written)", snapshot, discovered


def _grep(params, snapshot, discovered):
    pattern = params.get("pattern", "")
    if not pattern:
        return "ERROR: 'pattern' parameter required for Grep.", snapshot, discovered
    results = []
    for fname, content in snapshot.items():
        for i, line in enumerate(content.splitlines(), 1):
            if re.search(pattern, line, re.IGNORECASE):
                results.append(f"{fname}:{i} → {line.strip()}")
                # Discovering a file via grep reveals it
                if fname not in discovered:
                    discovered.append(fname)
    if not results:
        return f"No matches for pattern '{pattern}'.", snapshot, discovered
    return "\n".join(results), snapshot, discovered


def _bash(params, snapshot):
    cmd = params.get("command", "")
    if not cmd:
        return "ERROR: 'command' parameter required for Bash.", snapshot, []

    cmd_lower = cmd.lower()

    # Simulated linter
    if "lint" in cmd_lower or "flake8" in cmd_lower or "pylint" in cmd_lower:
        fname = None
        for f in snapshot:
            if f.endswith(".py") and f in cmd:
                fname = f
                break
        if fname and fname in snapshot:
            return _lint_file(fname, snapshot[fname]), snapshot, []
        # Lint all py files
        out = []
        for f, content in snapshot.items():
            if f.endswith(".py"):
                out.append(_lint_file(f, content))
        return "\n".join(out) if out else "No Python files found.", snapshot, []

    # Simulated test runner
    if "pytest" in cmd_lower or "test" in cmd_lower:
        test_files = [f for f in snapshot if "test" in f]
        if not test_files:
            return "No test files found.", snapshot, []
        # Check if main.py has obvious bugs
        main_content = snapshot.get("main.py", "")
        if "response.json\n" in main_content or "response.json " in main_content:
            return '{"status": "error", "file": "main.py", "line": 6, "message": "AttributeError: method object is not subscriptable — did you forget response.json()?"}'
        return '{"status": "pass", "passed": 1, "failed": 0}', snapshot, []

    # Simulated validate (for config check)
    if "validate" in cmd_lower or "json" in cmd_lower:
        for fname, content in snapshot.items():
            if fname.endswith(".json") and fname in cmd:
                try:
                    json.loads(content)
                    return f"✓ {fname} is valid JSON", snapshot, []
                except json.JSONDecodeError as e:
                    return f"✗ {fname} invalid JSON: {e}", snapshot, []
        return "Validation complete.", snapshot, []

    return f"$ {cmd}\n(simulated) Command executed. No output.", snapshot, []


def _lint_file(fname: str, content: str) -> str:
    errors = []
    for i, line in enumerate(content.splitlines(), 1):
        # Check for common bug: response.json without ()
        if re.search(r'response\.json\b(?!\()', line):
            errors.append(f'  {fname}:{i}: E001 response.json called without parentheses — should be response.json()')
        # Check for bare except
        if re.match(r'\s*except\s*:', line):
            errors.append(f'  {fname}:{i}: W001 Bare except clause detected')
        # Check for hardcoded secrets (task_4)
        if "SECRET_TOKEN_XYZ" in line and fname == "main.py":
            errors.append(f'  {fname}:{i}: E002 Hardcoded secret detected — use environment variables')
    if errors:
        return f'{fname}: {len(errors)} issue(s) found\n' + '\n'.join(errors)
    return f'{fname}: OK'


def _web_search(params) -> str:
    query = params.get("query", "").lower()
    for key, doc in WEB_SEARCH_DOCS.items():
        if key in query:
            return doc
    return f"No results found for '{params.get('query', '')}'. Try more specific terms."


def _todo_write(params) -> str:
    plan = params.get("plan", params.get("content", ""))
    if not plan:
        return "ERROR: 'plan' parameter required for TodoWrite."
    return f"✓ Plan recorded:\n{plan}"