AIMLxDIV commited on
Commit
da16640
·
1 Parent(s): 0d8eda5

Add scripts/baseline.py

Browse files
Files changed (1) hide show
  1. scripts/baseline.py +74 -0
scripts/baseline.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from codereview_env.models import TaskId, ActionType, Category, Severity, Verdict
3
+
4
+ API_URL = "http://localhost:8000"
5
+
6
+ def run_baseline(task_id: TaskId, seed: int = 42):
7
+ # 1. Reset
8
+ resp = requests.post(f"{API_URL}/reset", json={"task_id": task_id, "seed": seed})
9
+ resp.raise_for_status()
10
+ data = resp.json()
11
+ episode_id = data["episode_id"]
12
+ obs = data["result"]["observation"]
13
+
14
+ print(f"Started episode {episode_id} for task {task_id}")
15
+
16
+ # 2. Simple keyword-based logic
17
+ # Look for common bug/security keywords in the diff
18
+ keywords = {
19
+ "SQL": (Category.SECURITY, Severity.CRITICAL, "Potential SQL injection detected."),
20
+ "password": (Category.SECURITY, Severity.HIGH, "Hardcoded credential detected."),
21
+ "range(len": (Category.BUG, Severity.MEDIUM, "Off-by-one error suspected."),
22
+ "Exception": (Category.BUG, Severity.LOW, "Broad exception catch detected.")
23
+ }
24
+
25
+ # Simple loop
26
+ done = False
27
+ while not done:
28
+ diff = obs["diff"]
29
+ action = None
30
+
31
+ for kw, (cat, sev, desc) in keywords.items():
32
+ if kw in diff:
33
+ # Find line number (very naive)
34
+ line_no = 1
35
+ for i, line in enumerate(diff.split("\n")):
36
+ if kw in line:
37
+ line_no = i + 1
38
+ break
39
+
40
+ action = {
41
+ "action_type": ActionType.FLAG_ISSUE,
42
+ "body": desc,
43
+ "filename": obs["files_changed"][0]["filename"] if obs["files_changed"] else "unknown",
44
+ "line_number": line_no,
45
+ "severity": sev,
46
+ "category": cat
47
+ }
48
+ break
49
+
50
+ if not action:
51
+ # Terminal action
52
+ action = {
53
+ "action_type": ActionType.APPROVE if task_id != TaskId.ARCHITECTURAL_REVIEW else ActionType.REQUEST_CHANGES,
54
+ "verdict": Verdict.LGTM if task_id != TaskId.ARCHITECTURAL_REVIEW else Verdict.REQUEST_CHANGES,
55
+ "body": "LGTM" if task_id != TaskId.ARCHITECTURAL_REVIEW else "Architectural issues found."
56
+ }
57
+
58
+ step_resp = requests.post(f"{API_URL}/step/{episode_id}", json=action)
59
+ step_resp.raise_for_status()
60
+ step_data = step_resp.json()
61
+ obs = step_data["observation"]
62
+ done = step_data["done"]
63
+
64
+ # 3. Get final result
65
+ result_resp = requests.get(f"{API_URL}/result/{episode_id}")
66
+ result_resp.raise_for_status()
67
+ print(f"Final Score: {result_resp.json()['final_score']}")
68
+
69
+ if __name__ == "__main__":
70
+ # Note: Requires app.py to be running
71
+ try:
72
+ run_baseline(TaskId.BUG_DETECTION, seed=0)
73
+ except Exception as e:
74
+ print(f"Baseline failed (is the API running?): {e}")