sameerkatte Claude Opus 4.6 (1M context) commited on
Commit
8d618ab
·
1 Parent(s): ee2d45e

Pass openenv validate: add multi-mode deployment + runtime endpoints

Browse files

Local validator (openenv validate .):
- Add pyproject.toml with openenv-core>=0.2.0 dependency
- Add server/app.py with main() entry point
- Add uv.lock for reproducible builds
- Update openenv.yaml to spec_version: 1 format
- Result: [OK] Ready for multi-mode deployment (docker, openenv_serve, uv_run, python_module)

Runtime validator (openenv validate --url):
- Fix /health to return {"status": "healthy"} (was "ok")
- Add /metadata endpoint (name, description, version, tasks)
- Add /schema endpoint (action, observation, state JSON schemas)
- Add /mcp endpoint (minimal JSON-RPC 2.0 shim with tools/list)
- Result: 6/6 criteria pass

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Files changed (6) hide show
  1. app/main.py +110 -5
  2. openenv.yaml +5 -19
  3. pyproject.toml +60 -0
  4. server/__init__.py +0 -0
  5. server/app.py +29 -0
  6. uv.lock +0 -0
app/main.py CHANGED
@@ -1,15 +1,23 @@
1
- from fastapi import FastAPI, HTTPException
2
- from typing import List, Optional
3
 
4
  from app.models import (
5
- ResetRequest, ResetResponse, StepRequest, StepResponse, TaskInfo
 
6
  )
7
  from app.environment import SecureReviewEnvironment
8
 
 
 
 
 
 
 
 
9
  app = FastAPI(
10
  title="SecureReview",
11
  version="1.0.0",
12
- description="AI Security Code Review Environment for OpenEnv",
13
  )
14
 
15
  env = SecureReviewEnvironment()
@@ -19,7 +27,104 @@ DEFAULT_TASK_ID = "dependency_review"
19
 
20
  @app.get("/health")
21
  async def health():
22
- return {"status": "ok"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
 
25
  @app.get("/tasks", response_model=List[TaskInfo])
 
1
+ from fastapi import FastAPI, HTTPException, Request
2
+ from typing import List, Optional, Any, Dict
3
 
4
  from app.models import (
5
+ ResetRequest, ResetResponse, StepRequest, StepResponse, TaskInfo,
6
+ Action, Observation
7
  )
8
  from app.environment import SecureReviewEnvironment
9
 
10
+ ENV_NAME = "securereview"
11
+ ENV_DESCRIPTION = (
12
+ "AI Security Code Review Environment — evaluates an agent's ability "
13
+ "to identify security vulnerabilities across dependency supply chains, "
14
+ "infrastructure-as-code, and database migrations"
15
+ )
16
+
17
  app = FastAPI(
18
  title="SecureReview",
19
  version="1.0.0",
20
+ description=ENV_DESCRIPTION,
21
  )
22
 
23
  env = SecureReviewEnvironment()
 
27
 
28
  @app.get("/health")
29
  async def health():
30
+ """OpenEnv health endpoint — must return ``status: healthy``."""
31
+ return {"status": "healthy"}
32
+
33
+
34
+ @app.get("/metadata")
35
+ async def metadata():
36
+ """OpenEnv metadata endpoint — returns environment name and description."""
37
+ return {
38
+ "name": ENV_NAME,
39
+ "description": ENV_DESCRIPTION,
40
+ "version": "1.0.0",
41
+ "author": "Team CookHouse",
42
+ "tasks": [t.model_dump() for t in env.get_tasks()],
43
+ }
44
+
45
+
46
+ @app.get("/schema")
47
+ async def schema():
48
+ """OpenEnv schema endpoint — returns action, observation, and state schemas."""
49
+ return {
50
+ "action": Action.model_json_schema(),
51
+ "observation": Observation.model_json_schema(),
52
+ "state": {
53
+ "type": "object",
54
+ "properties": {
55
+ "task_id": {"type": "string"},
56
+ "scenario_id": {"type": "string"},
57
+ "current_step": {"type": "integer"},
58
+ "max_steps": {"type": "integer"},
59
+ "done": {"type": "boolean"},
60
+ "findings_count": {"type": "integer"},
61
+ "revealed_files": {"type": "array", "items": {"type": "string"}},
62
+ "final_score": {"type": ["number", "null"]},
63
+ },
64
+ },
65
+ }
66
+
67
+
68
+ @app.post("/mcp")
69
+ async def mcp(request: Request):
70
+ """Minimal JSON-RPC 2.0 MCP endpoint for OpenEnv validator compatibility.
71
+
72
+ Exposes the environment's available tasks as MCP tools. This is a
73
+ lightweight shim — agents should prefer the typed ``/reset`` and ``/step``
74
+ endpoints for interaction.
75
+ """
76
+ try:
77
+ payload: Dict[str, Any] = await request.json()
78
+ except Exception:
79
+ payload = {}
80
+
81
+ req_id = payload.get("id", 1)
82
+ method = payload.get("method", "")
83
+
84
+ if method == "initialize":
85
+ result: Dict[str, Any] = {
86
+ "protocolVersion": "2024-11-05",
87
+ "capabilities": {"tools": {"listChanged": False}},
88
+ "serverInfo": {"name": ENV_NAME, "version": "1.0.0"},
89
+ }
90
+ elif method == "tools/list":
91
+ result = {
92
+ "tools": [
93
+ {
94
+ "name": t.id,
95
+ "description": t.description,
96
+ "inputSchema": {
97
+ "type": "object",
98
+ "properties": {
99
+ "scenario_id": {"type": "string"},
100
+ },
101
+ },
102
+ }
103
+ for t in env.get_tasks()
104
+ ]
105
+ }
106
+ elif method == "tools/call":
107
+ result = {
108
+ "content": [
109
+ {
110
+ "type": "text",
111
+ "text": (
112
+ "Use the HTTP /reset and /step endpoints to interact "
113
+ "with SecureReview. MCP tool-calling mode is not the "
114
+ "primary interface for this environment."
115
+ ),
116
+ }
117
+ ],
118
+ "isError": False,
119
+ }
120
+ else:
121
+ result = {"ok": True, "env": ENV_NAME}
122
+
123
+ return {
124
+ "jsonrpc": "2.0",
125
+ "id": req_id,
126
+ "result": result,
127
+ }
128
 
129
 
130
  @app.get("/tasks", response_model=List[TaskInfo])
openenv.yaml CHANGED
@@ -1,12 +1,13 @@
 
1
  name: securereview
 
 
 
 
2
  version: "1.0.0"
3
  description: "AI Security Code Review Environment — evaluates an agent's ability to identify security vulnerabilities across dependency supply chains, infrastructure-as-code, and database migrations"
4
  author: "Team CookHouse"
5
 
6
- environment:
7
- type: http
8
- url: "http://localhost:7860"
9
-
10
  tasks:
11
  - id: dependency_review
12
  name: "Dependency & Supply Chain Review"
@@ -25,18 +26,3 @@ tasks:
25
  description: "Review SQL migration scripts for backward-incompatibility, safety risks, and production impact"
26
  difficulty: hard
27
  max_steps: 35
28
-
29
- action_space:
30
- type: structured
31
- actions:
32
- - report_finding
33
- - request_context
34
- - request_file_list
35
- - mark_complete
36
-
37
- observation_space:
38
- type: structured
39
- fields:
40
- - context (ReviewContext)
41
- - findings_so_far (List[Finding])
42
- - feedback (Optional[str])
 
1
+ spec_version: 1
2
  name: securereview
3
+ type: space
4
+ runtime: fastapi
5
+ app: app.main:app
6
+ port: 7860
7
  version: "1.0.0"
8
  description: "AI Security Code Review Environment — evaluates an agent's ability to identify security vulnerabilities across dependency supply chains, infrastructure-as-code, and database migrations"
9
  author: "Team CookHouse"
10
 
 
 
 
 
11
  tasks:
12
  - id: dependency_review
13
  name: "Dependency & Supply Chain Review"
 
26
  description: "Review SQL migration scripts for backward-incompatibility, safety risks, and production impact"
27
  difficulty: hard
28
  max_steps: 35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pyproject.toml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools>=45", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "openenv-securereview"
7
+ version = "1.0.0"
8
+ description = "SecureReview OpenEnv environment — AI security code review across dependency supply chains, infrastructure-as-code, and database migrations"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = { text = "MIT" }
12
+ authors = [
13
+ { name = "Team CookHouse" },
14
+ ]
15
+ keywords = ["openenv", "security", "code-review", "agent", "evaluation", "rl"]
16
+ classifiers = [
17
+ "Development Status :: 4 - Beta",
18
+ "Intended Audience :: Developers",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.10",
22
+ "Programming Language :: Python :: 3.11",
23
+ "Topic :: Security",
24
+ "Topic :: Software Development :: Quality Assurance",
25
+ ]
26
+ dependencies = [
27
+ "openenv-core>=0.2.0",
28
+ "fastapi>=0.115.0",
29
+ "uvicorn[standard]>=0.30.0",
30
+ "pydantic>=2.9.0",
31
+ "openai>=1.0.0",
32
+ "requests>=2.28.0",
33
+ ]
34
+
35
+ [project.optional-dependencies]
36
+ dev = [
37
+ "pytest>=8.0.0",
38
+ "pytest-cov>=4.0.0",
39
+ "httpx>=0.27.0",
40
+ ]
41
+
42
+ [project.urls]
43
+ Homepage = "https://huggingface.co/spaces/sam25kat/securereview"
44
+ Repository = "https://github.com/sam25kat/Secure_Reveiw"
45
+ Documentation = "https://huggingface.co/spaces/sam25kat/securereview"
46
+
47
+ [project.scripts]
48
+ server = "server.app:main"
49
+
50
+ [tool.setuptools]
51
+ include-package-data = true
52
+ packages = [
53
+ "app",
54
+ "app.graders",
55
+ "app.tasks",
56
+ "server",
57
+ ]
58
+
59
+ [tool.setuptools.package-data]
60
+ "app.tasks" = ["scenarios/**/*"]
server/__init__.py ADDED
File without changes
server/app.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """OpenEnv server entry point for SecureReview.
2
+
3
+ This module re-exports the FastAPI app defined in ``app.main`` so the
4
+ environment is discoverable at the canonical ``server.app:app`` location
5
+ expected by ``openenv validate`` / ``openenv serve``. The ``main()``
6
+ function provides a direct-run entry point used by the ``[project.scripts]``
7
+ declaration in ``pyproject.toml``.
8
+ """
9
+
10
+ from app.main import app
11
+
12
+ __all__ = ["app", "main"]
13
+
14
+
15
+ def main() -> None:
16
+ """Run the SecureReview FastAPI server with uvicorn.
17
+
18
+ Entry point for ``uv run --project . server`` and
19
+ ``python -m server.app``.
20
+ """
21
+ import os
22
+ import uvicorn
23
+
24
+ port = int(os.getenv("PORT", "7860"))
25
+ uvicorn.run(app, host="0.0.0.0", port=port)
26
+
27
+
28
+ if __name__ == "__main__":
29
+ main()
uv.lock ADDED
The diff for this file is too large to render. See raw diff