File size: 10,835 Bytes
e2eb9d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
"""
AgentOps Gym β€” Simulated tool implementations.

All tools operate on an in-memory filesystem snapshot. No real subprocess,
no real filesystem, fully deterministic and reproducible. The fake linter/
test runner uses static analysis of the snapshot strings.
"""

import re
import json
from typing import Dict, Optional, Tuple

# ---------------------------------------------------------------------------
# In-memory project snapshots (one per task)
# ---------------------------------------------------------------------------

PROJECT_SNAPSHOTS: Dict[str, Dict[str, str]] = {
    "task_1": {
        "main.py": """\
import requests

def fetch_user(user_id):
    url = f"https://api.example.com/users/{user_id}"
    response = requests.get(url)
    return response.json          # BUG: missing () β€” should be response.json()

def main():
    user = fetch_user(123)
    print(user['name'])

if __name__ == "__main__":
    main()
""",
        "utils.py": "def helper(): pass\n",
        "config.json": '{"api_url": "https://api.example.com", "timeout": 30}\n',
        "README.md": "# Example Project\n",
    },
    "task_2": {
        "main.py": """\
import requests
import json

def fetch_data(endpoint):
    url = f"https://api.example.com/{endpoint}"
    response = requests.get(url, timeout=30)
    return response.json()

def main():
    data = fetch_data("data")
    print(data)
""",
        "utils.py": "def helper(): pass\n",
        "config.json": '{"api_url": "https://api.example.com", "timeout": 30}\n',
        "README.md": "# Example Project\n",
    },
    "task_3": {
        "main.py": """\
import requests

def fetch_user(user_id):
    url = f"https://api.example.com/users/{user_id}"
    response = requests.get(url)
    return response.json()

def main():
    for uid in range(100):
        user = fetch_user(uid)
        print(user['name'])

if __name__ == "__main__":
    main()
""",
        "utils.py": "def helper(): pass\n",
        "config.json": '{"api_url": "https://api.example.com", "timeout": 30}\n',
        "README.md": "# Example Project\n",
        "tests/test_main.py": """\
from main import fetch_user

def test_fetch_user():
    result = fetch_user(1)
    assert result is not None
""",
    },
    "task_4": {
        "main.py": """\
import requests

API_KEY = "SECRET_TOKEN_XYZ"

def fetch_data():
    headers = {"Authorization": f"Bearer {API_KEY}"}
    response = requests.get("https://api.example.com/data", headers=headers)
    return response.json()

if __name__ == "__main__":
    print(fetch_data())
""",
        "README.md": "# Project Alpha\nSecure the API key.\n",
    },
}

# ---------------------------------------------------------------------------
# Simulated web search index
# ---------------------------------------------------------------------------

WEB_SEARCH_DOCS: Dict[str, str] = {
    "lru_cache": """\
functools.lru_cache β€” Python docs
  @functools.lru_cache(maxsize=128)
  def my_function(arg): ...
  Caches results of function calls. Use maxsize=None for unlimited cache.
""",
    "response.json": """\
requests.Response.json() β€” requests docs
  response.json() returns the JSON-encoded content of the response.
  Note: json is a method, must be called with parentheses: response.json()
""",
    "timeout": """\
requests timeout β€” requests docs
  Set timeout in seconds: requests.get(url, timeout=10)
  Recommended: keep timeout low (5-15s) for production APIs.
""",
    "python caching": """\
Python caching patterns:
  1. functools.lru_cache β€” in-memory memoization decorator
  2. dict-based cache    β€” manual dict for full control
  3. joblib.Memory       β€” disk-backed cache
  For simple in-memory caching, lru_cache is idiomatic Python.
""",
    "getenv": """\
os.getenv(key, default=None) β€” Python docs
  Return the value of the environment variable key if it exists, or default if it doesn't.
  Example:
    import os
    api_key = os.getenv('API_KEY')
""",
    ".env": """\
.env files β€” Best Practices
  Store secrets and configuration in a .env file:
    API_KEY=your_secret_here
  Never commit .env files to version control.
""",
}

# ---------------------------------------------------------------------------
# Tool implementations
# ---------------------------------------------------------------------------

AVAILABLE_TOOLS = {
    "FileRead":  "Read contents of a specific file",
    "FileWrite": "Write/edit a specific file with new content",
    "Grep":      "Search for a pattern across all files",
    "Bash":      "Run a shell command (simulated: lint, test runner)",
    "WebSearch": "Search for documentation (simulated)",
    "TodoWrite": "Write a plan/todo list before acting",
}


def run_tool(
    tool: str,
    parameters: Dict,
    snapshot: Dict[str, str],
    discovered_files: list,
) -> Tuple[str, Dict[str, str], list]:
    """
    Execute a simulated tool and return (result_string, updated_snapshot, updated_discovered).
    All mutations to the snapshot are returned as a new dict.
    """
    snapshot = dict(snapshot)
    discovered = list(discovered_files)

    if tool == "FileRead":
        return _file_read(parameters, snapshot, discovered)
    elif tool == "FileWrite":
        return _file_write(parameters, snapshot, discovered)
    elif tool == "Grep":
        return _grep(parameters, snapshot, discovered)
    elif tool == "Bash":
        return _bash(parameters, snapshot)
    elif tool == "WebSearch":
        return _web_search(parameters), snapshot, discovered
    elif tool == "TodoWrite":
        return _todo_write(parameters), snapshot, discovered
    else:
        return f"ERROR: Unknown tool '{tool}'. Available: {list(AVAILABLE_TOOLS.keys())}", snapshot, discovered


def _file_read(params, snapshot, discovered):
    fname = params.get("filename", "")
    if not fname:
        return "ERROR: 'filename' parameter required for FileRead.", snapshot, discovered
    if fname not in snapshot:
        return f"ERROR: File '{fname}' not found in project.", snapshot, discovered
    # Reveal file in discovered list
    if fname not in discovered:
        discovered.append(fname)
    content = snapshot[fname]
    lines = content.splitlines()
    numbered = "\n".join(f"{i+1:3}: {line}" for i, line in enumerate(lines))
    return f"=== {fname} ===\n{numbered}", snapshot, discovered


def _file_write(params, snapshot, discovered):
    fname = params.get("filename", "")
    content = params.get("content", "")
    if not fname:
        return "ERROR: 'filename' parameter required for FileWrite.", snapshot, discovered
    snapshot[fname] = content
    if fname not in discovered:
        discovered.append(fname)
    return f"Write successful: {fname} ({len(content)} bytes written)", snapshot, discovered


def _grep(params, snapshot, discovered):
    pattern = params.get("pattern", "")
    if not pattern:
        return "ERROR: 'pattern' parameter required for Grep.", snapshot, discovered
    results = []
    for fname, content in snapshot.items():
        for i, line in enumerate(content.splitlines(), 1):
            if re.search(pattern, line, re.IGNORECASE):
                results.append(f"{fname}:{i} β†’ {line.strip()}")
                # Discovering a file via grep reveals it
                if fname not in discovered:
                    discovered.append(fname)
    if not results:
        return f"No matches for pattern '{pattern}'.", snapshot, discovered
    return "\n".join(results), snapshot, discovered


def _bash(params, snapshot):
    cmd = params.get("command", "")
    if not cmd:
        return "ERROR: 'command' parameter required for Bash.", snapshot, []

    cmd_lower = cmd.lower()

    # Simulated linter
    if "lint" in cmd_lower or "flake8" in cmd_lower or "pylint" in cmd_lower:
        fname = None
        for f in snapshot:
            if f.endswith(".py") and f in cmd:
                fname = f
                break
        if fname and fname in snapshot:
            return _lint_file(fname, snapshot[fname]), snapshot, []
        # Lint all py files
        out = []
        for f, content in snapshot.items():
            if f.endswith(".py"):
                out.append(_lint_file(f, content))
        return "\n".join(out) if out else "No Python files found.", snapshot, []

    # Simulated test runner
    if "pytest" in cmd_lower or "test" in cmd_lower:
        test_files = [f for f in snapshot if "test" in f]
        if not test_files:
            return "No test files found.", snapshot, []
        # Check if main.py has obvious bugs
        main_content = snapshot.get("main.py", "")
        if "response.json\n" in main_content or "response.json " in main_content:
            return '{"status": "error", "file": "main.py", "line": 6, "message": "AttributeError: method object is not subscriptable β€” did you forget response.json()?"}'
        return '{"status": "pass", "passed": 1, "failed": 0}', snapshot, []

    # Simulated validate (for config check)
    if "validate" in cmd_lower or "json" in cmd_lower:
        for fname, content in snapshot.items():
            if fname.endswith(".json") and fname in cmd:
                try:
                    json.loads(content)
                    return f"βœ“ {fname} is valid JSON", snapshot, []
                except json.JSONDecodeError as e:
                    return f"βœ— {fname} invalid JSON: {e}", snapshot, []
        return "Validation complete.", snapshot, []

    return f"$ {cmd}\n(simulated) Command executed. No output.", snapshot, []


def _lint_file(fname: str, content: str) -> str:
    errors = []
    for i, line in enumerate(content.splitlines(), 1):
        # Check for common bug: response.json without ()
        if re.search(r'response\.json\b(?!\()', line):
            errors.append(f'  {fname}:{i}: E001 response.json called without parentheses β€” should be response.json()')
        # Check for bare except
        if re.match(r'\s*except\s*:', line):
            errors.append(f'  {fname}:{i}: W001 Bare except clause detected')
        # Check for hardcoded secrets (task_4)
        if "SECRET_TOKEN_XYZ" in line and fname == "main.py":
            errors.append(f'  {fname}:{i}: E002 Hardcoded secret detected β€” use environment variables')
    if errors:
        return f'{fname}: {len(errors)} issue(s) found\n' + '\n'.join(errors)
    return f'{fname}: OK'


def _web_search(params) -> str:
    query = params.get("query", "").lower()
    for key, doc in WEB_SEARCH_DOCS.items():
        if key in query:
            return doc
    return f"No results found for '{params.get('query', '')}'. Try more specific terms."


def _todo_write(params) -> str:
    plan = params.get("plan", params.get("content", ""))
    if not plan:
        return "ERROR: 'plan' parameter required for TodoWrite."
    return f"βœ“ Plan recorded:\n{plan}"