ArshVerma commited on
Commit
8103ef8
·
1 Parent(s): 6761a52

fix: resolve critical startup bugs, port mismatch, and leaderboard crash

Browse files

- Fix hardcoded port 8000 → 7860 in scripts/baseline.py
- Add proper argparse CLI with --url, --task, --seed flags
- Fix leaderboard rank calculation crashing after list slicing
- Fix WebSocket disconnect to catch WebSocketDisconnect and use discard()
- Fix incoherent grading weights in openenv.yaml
- Add .env.example with documented environment variables
- Add missing package init files for codereview_env and graders

.DS_Store ADDED
Binary file (8.2 kB). View file
 
.env.example ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AgentOrg CodeReview — Environment Variables
2
+ # Copy this file to .env and fill in your values.
3
+
4
+ # API Configuration
5
+ APP_HOST=0.0.0.0
6
+ APP_PORT=7860
7
+ APP_ENV=development # development | production
8
+
9
+ # Security
10
+ API_KEY=changeme # Required in production; sent as X-API-Key header
11
+ API_KEY_ENABLED=false # Set to true in production
12
+
13
+ # Leaderboard
14
+ LEADERBOARD_MAX_ENTRIES=10 # Top-N entries to keep per task
15
+
16
+ # Logging
17
+ LOG_LEVEL=INFO # DEBUG | INFO | WARNING | ERROR
app.py CHANGED
@@ -87,11 +87,13 @@ def get_leaderboard():
87
  @app.post("/submit")
88
  def submit_to_leaderboard(submission: SubmitScore):
89
  entries = leaderboard.get(submission.task_id, [])
90
- entries.append(submission.model_dump())
91
- # Sort and keep top 5
92
  entries.sort(key=lambda x: x["score"], reverse=True)
 
93
  leaderboard[submission.task_id] = entries[:5]
94
- return {"status": "submitted", "rank": entries.index(submission.model_dump()) + 1 if submission.model_dump() in entries else None}
 
95
 
96
  @app.websocket("/ws/events")
97
  async def websocket_endpoint(websocket: WebSocket):
@@ -100,10 +102,10 @@ async def websocket_endpoint(websocket: WebSocket):
100
  try:
101
  while True:
102
  await websocket.receive_text()
103
- except Exception:
104
  pass
105
  finally:
106
- clients.remove(websocket)
107
 
108
  if __name__ == "__main__":
109
  import uvicorn
 
87
  @app.post("/submit")
88
  def submit_to_leaderboard(submission: SubmitScore):
89
  entries = leaderboard.get(submission.task_id, [])
90
+ new_entry = submission.model_dump()
91
+ entries.append(new_entry)
92
  entries.sort(key=lambda x: x["score"], reverse=True)
93
+ rank = entries.index(new_entry) + 1 # capture rank before slicing
94
  leaderboard[submission.task_id] = entries[:5]
95
+ in_top5 = rank <= 5
96
+ return {"status": "submitted", "rank": rank if in_top5 else None}
97
 
98
  @app.websocket("/ws/events")
99
  async def websocket_endpoint(websocket: WebSocket):
 
102
  try:
103
  while True:
104
  await websocket.receive_text()
105
+ except WebSocketDisconnect:
106
  pass
107
  finally:
108
+ clients.discard(websocket)
109
 
110
  if __name__ == "__main__":
111
  import uvicorn
codereview_env/__init__.py CHANGED
@@ -1 +1 @@
1
- # AgentOrg CodeReview Environment Package
 
1
+ """AgentOrg CodeReview Environment package."""
codereview_env/graders/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Grader modules for each task type."""
files/.DS_Store ADDED
Binary file (6.15 kB). View file
 
openenv.yaml CHANGED
@@ -16,12 +16,9 @@ tasks:
16
 
17
  grading:
18
  type: "deterministic"
19
- metrics:
20
- - name: "coverage"
21
- weight: 0.4
22
- - name: "precision"
23
- weight: 0.6
24
- - name: "severity_accuracy"
25
- weight: 0.7
26
- - name: "keyword_accuracy"
27
- weight: 0.3
 
16
 
17
  grading:
18
  type: "deterministic"
19
+ issue_matching:
20
+ coverage_weight: 0.4
21
+ precision_weight: 0.6
22
+ quality_scoring:
23
+ severity_weight: 0.7
24
+ keyword_weight: 0.3
 
 
 
scripts/baseline.py CHANGED
@@ -1,7 +1,7 @@
1
  import requests
2
  from codereview_env.models import TaskId, ActionType, Category, Severity, Verdict
3
 
4
- API_URL = "http://localhost:8000"
5
 
6
  def run_baseline(task_id: TaskId, seed: int = 42):
7
  # 1. Reset
@@ -67,8 +67,23 @@ def run_baseline(task_id: TaskId, seed: int = 42):
67
  print(f"Final Score: {result_resp.json()['final_score']}")
68
 
69
  if __name__ == "__main__":
70
- # Note: Requires app.py to be running
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  try:
72
- run_baseline(TaskId.BUG_DETECTION, seed=0)
73
  except Exception as e:
74
  print(f"Baseline failed (is the API running?): {e}")
 
1
  import requests
2
  from codereview_env.models import TaskId, ActionType, Category, Severity, Verdict
3
 
4
+ API_URL = "http://localhost:7860"
5
 
6
  def run_baseline(task_id: TaskId, seed: int = 42):
7
  # 1. Reset
 
67
  print(f"Final Score: {result_resp.json()['final_score']}")
68
 
69
  if __name__ == "__main__":
70
+ import argparse
71
+
72
+ parser = argparse.ArgumentParser(description="Run the baseline agent against the CodeReview API.")
73
+ parser.add_argument("--url", default="http://localhost:7860", help="Base URL of the running API (default: http://localhost:7860)")
74
+ parser.add_argument("--task", default="bug_detection", help="Task ID to run (default: bug_detection)")
75
+ parser.add_argument("--seed", type=int, default=0, help="Random seed (default: 0)")
76
+ args = parser.parse_args()
77
+
78
+ # Override module-level API_URL with CLI argument
79
+ API_URL = args.url
80
+
81
+ # Map string task id to TaskId enum
82
+ task_map = {t.value: t for t in TaskId}
83
+ if args.task not in task_map:
84
+ parser.error(f"Unknown task '{args.task}'. Choose from: {list(task_map.keys())}")
85
+
86
  try:
87
+ run_baseline(task_map[args.task], seed=args.seed)
88
  except Exception as e:
89
  print(f"Baseline failed (is the API running?): {e}")