Spaces:

ArshVerma
/

CodeLens

Sleeping

ArshVerma commited on Apr 1

Commit

8103ef8

1 Parent(s): 6761a52

fix: resolve critical startup bugs, port mismatch, and leaderboard crash

- Fix hardcoded port 8000 → 7860 in scripts/baseline.py
- Add proper argparse CLI with --url, --task, --seed flags
- Fix leaderboard rank calculation crashing after list slicing
- Fix WebSocket disconnect to catch WebSocketDisconnect and use discard()
- Fix incoherent grading weights in openenv.yaml
- Add .env.example with documented environment variables
- Add missing package init files for codereview_env and graders

Files changed (8) hide show

.DS_Store +0 -0
.env.example +17 -0
app.py +7 -5
codereview_env/__init__.py +1 -1
codereview_env/graders/__init__.py +1 -0
files/.DS_Store +0 -0
openenv.yaml +6 -9
scripts/baseline.py +18 -3

.DS_Store ADDED Viewed

Binary file (8.2 kB). View file

.env.example ADDED Viewed

	@@ -0,0 +1,17 @@

+# AgentOrg CodeReview — Environment Variables
+# Copy this file to .env and fill in your values.
+# API Configuration
+APP_HOST=0.0.0.0
+APP_PORT=7860
+APP_ENV=development          # development | production
+# Security
+API_KEY=changeme             # Required in production; sent as X-API-Key header
+API_KEY_ENABLED=false        # Set to true in production
+# Leaderboard
+LEADERBOARD_MAX_ENTRIES=10   # Top-N entries to keep per task
+# Logging
+LOG_LEVEL=INFO               # DEBUG | INFO | WARNING | ERROR

app.py CHANGED Viewed

@@ -87,11 +87,13 @@ def get_leaderboard():
 @app.post("/submit")
 def submit_to_leaderboard(submission: SubmitScore):
     entries = leaderboard.get(submission.task_id, [])
-    entries.append(submission.model_dump())
-    # Sort and keep top 5
     entries.sort(key=lambda x: x["score"], reverse=True)
     leaderboard[submission.task_id] = entries[:5]
-    return {"status": "submitted", "rank": entries.index(submission.model_dump()) + 1 if submission.model_dump() in entries else None}
 @app.websocket("/ws/events")
 async def websocket_endpoint(websocket: WebSocket):
@@ -100,10 +102,10 @@ async def websocket_endpoint(websocket: WebSocket):
     try:
         while True:
             await websocket.receive_text()
-    except Exception:
         pass
     finally:
-        clients.remove(websocket)
 if __name__ == "__main__":
     import uvicorn

 @app.post("/submit")
 def submit_to_leaderboard(submission: SubmitScore):
     entries = leaderboard.get(submission.task_id, [])
+    new_entry = submission.model_dump()
+    entries.append(new_entry)
     entries.sort(key=lambda x: x["score"], reverse=True)
+    rank = entries.index(new_entry) + 1   # capture rank before slicing
     leaderboard[submission.task_id] = entries[:5]
+    in_top5 = rank <= 5
+    return {"status": "submitted", "rank": rank if in_top5 else None}
 @app.websocket("/ws/events")
 async def websocket_endpoint(websocket: WebSocket):
     try:
         while True:
             await websocket.receive_text()
+    except WebSocketDisconnect:
         pass
     finally:
+        clients.discard(websocket)
 if __name__ == "__main__":
     import uvicorn

codereview_env/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- # AgentOrg CodeReview Environment ~~Package~~


1	+ """AgentOrg CodeReview Environment package."""

codereview_env/graders/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Grader modules for each task type."""

files/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

openenv.yaml CHANGED Viewed

@@ -16,12 +16,9 @@ tasks:
 grading:
   type: "deterministic"
-  metrics:
-    - name: "coverage"
-      weight: 0.4
-    - name: "precision"
-      weight: 0.6
-    - name: "severity_accuracy"
-      weight: 0.7
-    - name: "keyword_accuracy"
-      weight: 0.3

 grading:
   type: "deterministic"
+  issue_matching:
+    coverage_weight: 0.4
+    precision_weight: 0.6
+  quality_scoring:
+    severity_weight: 0.7
+    keyword_weight: 0.3

scripts/baseline.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import requests
 from codereview_env.models import TaskId, ActionType, Category, Severity, Verdict
-API_URL = "http://localhost:8000"
 def run_baseline(task_id: TaskId, seed: int = 42):
     # 1. Reset
@@ -67,8 +67,23 @@ def run_baseline(task_id: TaskId, seed: int = 42):
     print(f"Final Score: {result_resp.json()['final_score']}")
 if __name__ == "__main__":
-    # Note: Requires app.py to be running
     try:
-        run_baseline(TaskId.BUG_DETECTION, seed=0)
     except Exception as e:
         print(f"Baseline failed (is the API running?): {e}")

 import requests
 from codereview_env.models import TaskId, ActionType, Category, Severity, Verdict
+API_URL = "http://localhost:7860"
 def run_baseline(task_id: TaskId, seed: int = 42):
     # 1. Reset
     print(f"Final Score: {result_resp.json()['final_score']}")
 if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="Run the baseline agent against the CodeReview API.")
+    parser.add_argument("--url", default="http://localhost:7860", help="Base URL of the running API (default: http://localhost:7860)")
+    parser.add_argument("--task", default="bug_detection", help="Task ID to run (default: bug_detection)")
+    parser.add_argument("--seed", type=int, default=0, help="Random seed (default: 0)")
+    args = parser.parse_args()
+    # Override module-level API_URL with CLI argument
+    API_URL = args.url
+    # Map string task id to TaskId enum
+    task_map = {t.value: t for t in TaskId}
+    if args.task not in task_map:
+        parser.error(f"Unknown task '{args.task}'. Choose from: {list(task_map.keys())}")
     try:
+        run_baseline(task_map[args.task], seed=args.seed)
     except Exception as e:
         print(f"Baseline failed (is the API running?): {e}")