Siteshcodes commited on
Commit
96939ad
·
1 Parent(s): cee7d71

Fix: add /tasks + /grade endpoints, fix inference.py logging, set PYTHONPATH

Browse files
Files changed (7) hide show
  1. .gitignore +8 -0
  2. Dockerfile +2 -0
  3. app.py +97 -70
  4. inference.py +60 -46
  5. pyproject.toml +1 -0
  6. requirements.txt +1 -0
  7. server/app.py +98 -70
.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
4
+ .env
5
+ *.egg-info/
6
+ dist/
7
+ build/
8
+ .pytest_cache/
Dockerfile CHANGED
@@ -12,6 +12,8 @@ COPY app.py .
12
  COPY inference.py .
13
  COPY openenv.yaml .
14
 
 
 
15
  EXPOSE 7860
16
 
17
  CMD ["python", "app.py"]
 
12
  COPY inference.py .
13
  COPY openenv.yaml .
14
 
15
+ ENV PYTHONPATH=/app
16
+
17
  EXPOSE 7860
18
 
19
  CMD ["python", "app.py"]
app.py CHANGED
@@ -1,97 +1,124 @@
1
- # the main environment file
 
2
 
3
  from fastapi import FastAPI
4
  from pydantic import BaseModel
5
  from environment.api_triage_env import APITriageEnv
6
 
7
- # creating an app and environment
8
- app = FastAPI()
9
  env = APITriageEnv()
10
 
11
- # defining a request model for /step endpoint
12
- # for fastapi so that it can understand that we expecting a JSON with an action field that is a text dtype
13
  class ActionRequest(BaseModel):
14
- action: str
15
 
16
 
17
- @app.post("/reset")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
 
19
  def reset():
20
- """
21
- Starting a new API debugging episode
22
- """
23
-
24
- print("INFO : reset endpoint is called , new debugging session started ")
25
- state = env.reset()
26
- return {
27
- "step" : state.step,
28
- "max_steps": state.max_steps,
29
- "incident_summary": state.incident_summary,
30
- "logs": state.logs,
31
- "response_code":state.response_code,
32
- "fix_applied": state.fix_applied,
33
- "is_resolved" : state.is_resolved
34
- }
35
 
36
 
37
  @app.get("/state")
38
-
39
  def state():
40
- """
41
- HELPs to return the current observation of the episode.
42
- """
43
- print("INFO : current state of the Episode as follows ")
44
- current = env.state()
45
- return {
46
- "step" : current.step,
47
- "max_steps": current.max_steps,
48
- "incident_summary": current.incident_summary,
49
- "logs": current.logs,
50
- "response_code": current.response_code,
51
- "fix_applied": current.fix_applied,
52
- "is_resolved" : current.is_resolved
53
- }
54
 
55
 
56
  @app.post("/step")
57
-
58
  def step(request: ActionRequest):
59
- """
60
- the agent sends an action and our environment will preocess it
61
- and update the state , returns what happened.
62
- """
63
-
64
- """
65
- action = what the agent wants to do (text)
66
- observation = what the agent sees after doing it (object with 7 fields)
67
- """
68
-
69
- action = request.action
70
- print(f"INFO : Action received: {action}")
71
-
72
-
73
- # calling env.step() from api_triage_env.py file to process the action
74
- observation , reward , done , info = env.step(action)
75
-
76
- # here returning the result
77
- return {
78
- "observation": {
79
- "step" : observation.step,
80
- "max_steps": observation.max_steps,
81
- "incident_summary": observation.incident_summary,
82
- "logs": observation.logs,
83
- "response_code": observation.response_code,
84
- "fix_applied": observation.fix_applied,
85
- "is_resolved" : observation.is_resolved
86
- },
87
- "reward": reward,
88
- "done": done,
89
- "info": info,
90
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
  def main():
93
  import uvicorn
94
  uvicorn.run("app:app", host="0.0.0.0", port=7860)
95
 
 
96
  if __name__ == "__main__":
97
  main()
 
1
+ import importlib
2
+ import yaml
3
 
4
  from fastapi import FastAPI
5
  from pydantic import BaseModel
6
  from environment.api_triage_env import APITriageEnv
7
 
8
+ app = FastAPI(title="API Triage Agent", version="1.0.0")
 
9
  env = APITriageEnv()
10
 
 
 
11
  class ActionRequest(BaseModel):
12
+ action: str
13
 
14
 
15
+ # load task definitions from openenv.yaml
16
+ def _load_tasks():
17
+ with open("openenv.yaml", "r") as f:
18
+ cfg = yaml.safe_load(f)
19
+ return cfg.get("tasks", [])
20
+
21
+
22
+ @app.get("/")
23
+ def root():
24
+ return {"status": "ok", "environment": "api-triage-agent"}
25
+
26
+
27
+ @app.get("/health")
28
+ def health():
29
+ return {"status": "healthy"}
30
+
31
 
32
+ @app.post("/reset")
33
  def reset():
34
+ state = env.reset()
35
+ return {
36
+ "observation": {
37
+ "step": state.step,
38
+ "max_steps": state.max_steps,
39
+ "incident_summary": state.incident_summary,
40
+ "logs": state.logs,
41
+ "response_code": state.response_code,
42
+ "fix_applied": state.fix_applied,
43
+ "is_resolved": state.is_resolved,
44
+ },
45
+ "reward": None,
46
+ "done": False,
47
+ }
 
48
 
49
 
50
  @app.get("/state")
 
51
  def state():
52
+ current = env.state()
53
+ return {
54
+ "step": current.step,
55
+ "max_steps": current.max_steps,
56
+ "incident_summary": current.incident_summary,
57
+ "logs": current.logs,
58
+ "response_code": current.response_code,
59
+ "fix_applied": current.fix_applied,
60
+ "is_resolved": current.is_resolved,
61
+ }
 
 
 
 
62
 
63
 
64
  @app.post("/step")
 
65
  def step(request: ActionRequest):
66
+ action = request.action
67
+ observation, reward, done, info = env.step(action)
68
+ return {
69
+ "observation": {
70
+ "step": observation.step,
71
+ "max_steps": observation.max_steps,
72
+ "incident_summary": observation.incident_summary,
73
+ "logs": observation.logs,
74
+ "response_code": observation.response_code,
75
+ "fix_applied": observation.fix_applied,
76
+ "is_resolved": observation.is_resolved,
77
+ },
78
+ "reward": reward,
79
+ "done": done,
80
+ "info": info,
81
+ }
82
+
83
+
84
+ @app.get("/tasks")
85
+ def list_tasks():
86
+ """Return all tasks defined in openenv.yaml with their graders."""
87
+ tasks = _load_tasks()
88
+ return {
89
+ "tasks": [
90
+ {
91
+ "id": t["id"],
92
+ "name": t["name"],
93
+ "description": t["description"],
94
+ "difficulty": t["difficulty"],
95
+ "grader": t["grader"],
96
+ }
97
+ for t in tasks
98
+ ]
99
+ }
100
+
101
+
102
+ @app.post("/grade/{task_id}")
103
+ def grade_task(task_id: str):
104
+ """Run the grader for a specific task and return the score."""
105
+ tasks = _load_tasks()
106
+ task = next((t for t in tasks if t["id"] == task_id), None)
107
+ if task is None:
108
+ return {"error": f"Task '{task_id}' not found", "score": 0.0}
109
+
110
+ grader_ref = task["grader"]
111
+ module_path, func_name = grader_ref.rsplit(":", 1)
112
+ mod = importlib.import_module(module_path)
113
+ grade_fn = getattr(mod, func_name)
114
+ score = grade_fn()
115
+ return {"task_id": task_id, "score": score}
116
+
117
 
118
  def main():
119
  import uvicorn
120
  uvicorn.run("app:app", host="0.0.0.0", port=7860)
121
 
122
+
123
  if __name__ == "__main__":
124
  main()
inference.py CHANGED
@@ -23,6 +23,7 @@ MAX_STEPS = 10
23
  TEMPERATURE = 0.7
24
  MAX_TOKENS = 50
25
  SUCCESS_SCORE_THRESHOLD = 0.5
 
26
 
27
  # ============================================
28
  # System Prompt
@@ -107,60 +108,73 @@ def get_model_action(client: OpenAI, step: int, observation, last_reward: float,
107
  return "inspect_logs"
108
 
109
  # ============================================
110
- # Main Async Function
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  # ============================================
112
  async def main() -> None:
113
  if not API_KEY:
114
  print("[ERROR] HF_TOKEN environment variable not set", flush=True)
115
  return
116
-
117
  client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
118
- env = APITriageEnv(max_steps=MAX_STEPS)
119
-
120
- # All 6 task IDs matching openenv.yaml — each evaluated explicitly
121
  task_ids = ["auth_error", "missing_fields", "rate_limit", "timeout", "wrong_endpoint", "server_error"]
122
-
123
- log_start(task=TASK_NAME, env=BENCHMARK, model=MODEL_NAME)
124
-
125
  for tid in task_ids:
126
- history: List[str] = []
127
- rewards: List[float] = []
128
- steps_taken = 0
129
- success = False
130
-
131
- try:
132
- # Reset env and FORCE the specific incident type (no randomness)
133
- observation = env.reset()
134
- env.incident = get_incident_by_type(tid)
135
- observation = env.state() # refresh observation with forced incident
136
-
137
- last_reward = 0.0
138
-
139
- for step in range(1, MAX_STEPS + 1):
140
- action = get_model_action(client, step, observation, last_reward, history)
141
- observation, reward, done, info = env.step(action)
142
-
143
- rewards.append(reward)
144
- steps_taken = step
145
- last_reward = reward
146
-
147
- log_step(step=step, action=action, reward=reward, done=done, error=None)
148
- history.append(f"Step {step}: {action} -> reward {reward:.2f}")
149
-
150
- if done:
151
- success = info.get("resolution") == "success"
152
- break
153
-
154
- # Score strictly between 0 and 1
155
- task_score = 0.95 if success else 0.05
156
- log_end(success=success, steps=steps_taken, score=task_score, rewards=rewards)
157
-
158
- except Exception as e:
159
- print(f"[DEBUG] Error in task {tid}: {e}", flush=True)
160
- log_end(success=False, steps=0, score=0.05, rewards=[0.0])
161
 
162
- # ============================================
163
- # Run
164
- # ============================================
165
  if __name__ == "__main__":
166
  asyncio.run(main())
 
23
  TEMPERATURE = 0.7
24
  MAX_TOKENS = 50
25
  SUCCESS_SCORE_THRESHOLD = 0.5
26
+ MAX_TOTAL_REWARD = 20.5 # best case: inspect_logs(0.5) + fix(5.0) + resolve(15.0)
27
 
28
  # ============================================
29
  # System Prompt
 
108
  return "inspect_logs"
109
 
110
  # ============================================
111
+ # Run a single task episode
112
+ # ============================================
113
+ def run_task(client: OpenAI, task_id: str) -> None:
114
+ """Run one task: [START] -> steps -> [END]."""
115
+ env = APITriageEnv(max_steps=MAX_STEPS)
116
+
117
+ history: List[str] = []
118
+ rewards: List[float] = []
119
+ steps_taken = 0
120
+ score = 0.0
121
+ success = False
122
+
123
+ log_start(task=task_id, env=BENCHMARK, model=MODEL_NAME)
124
+
125
+ try:
126
+ # Reset env and force the specific incident
127
+ env.reset()
128
+ env.incident = get_incident_by_type(task_id)
129
+ env.fix_applied = False
130
+ env.done = False
131
+ env.step_counter = 0
132
+ env.total_reward = 0.0
133
+ observation = env.state()
134
+
135
+ last_reward = 0.0
136
+
137
+ for step in range(1, MAX_STEPS + 1):
138
+ action = get_model_action(client, step, observation, last_reward, history)
139
+ observation, reward, done, info = env.step(action)
140
+
141
+ rewards.append(reward)
142
+ steps_taken = step
143
+ last_reward = reward
144
+
145
+ log_step(step=step, action=action, reward=reward, done=done, error=None)
146
+ history.append(f"Step {step}: {action} -> reward {reward:.2f}")
147
+
148
+ if done:
149
+ success = info.get("resolution") == "success"
150
+ break
151
+
152
+ # Compute score from actual rewards, clamped to [0, 1]
153
+ score = sum(rewards) / MAX_TOTAL_REWARD if MAX_TOTAL_REWARD > 0 else 0.0
154
+ score = min(max(score, 0.0), 1.0)
155
+ success = score >= SUCCESS_SCORE_THRESHOLD
156
+
157
+ except Exception as e:
158
+ print(f"[DEBUG] Error in task {task_id}: {e}", flush=True)
159
+
160
+ finally:
161
+ log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
162
+
163
+ # ============================================
164
+ # Main
165
  # ============================================
166
  async def main() -> None:
167
  if not API_KEY:
168
  print("[ERROR] HF_TOKEN environment variable not set", flush=True)
169
  return
170
+
171
  client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
172
+
173
+ # All 6 task IDs from openenv.yaml
 
174
  task_ids = ["auth_error", "missing_fields", "rate_limit", "timeout", "wrong_endpoint", "server_error"]
175
+
 
 
176
  for tid in task_ids:
177
+ run_task(client, tid)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
 
 
 
179
  if __name__ == "__main__":
180
  asyncio.run(main())
pyproject.toml CHANGED
@@ -16,6 +16,7 @@ dependencies = [
16
  "fastapi>=0.100.0",
17
  "uvicorn>=0.23.0",
18
  "openenv-core>=0.2.0",
 
19
  ]
20
 
21
  [project.scripts]
 
16
  "fastapi>=0.100.0",
17
  "uvicorn>=0.23.0",
18
  "openenv-core>=0.2.0",
19
+ "pyyaml>=6.0.0",
20
  ]
21
 
22
  [project.scripts]
requirements.txt CHANGED
@@ -6,3 +6,4 @@ numpy>=1.24.0
6
  pytest>=7.0.0
7
  fastapi>=0.100.0
8
  uvicorn>=0.23.0
 
 
6
  pytest>=7.0.0
7
  fastapi>=0.100.0
8
  uvicorn>=0.23.0
9
+ pyyaml>=6.0.0
server/app.py CHANGED
@@ -1,96 +1,124 @@
1
- # the main environment file
 
2
 
3
  from fastapi import FastAPI
4
  from pydantic import BaseModel
5
  from environment.api_triage_env import APITriageEnv
6
 
7
- # creating an app and environment
8
- app = FastAPI()
9
  env = APITriageEnv()
10
 
11
- # defining a request model for /step endpoint
12
- # for fastapi so that it can understand that we expecting a JSON with an action field that is a text dtype
13
  class ActionRequest(BaseModel):
14
- action: str
15
 
16
 
17
- @app.post("/reset")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
 
19
  def reset():
20
- """
21
- Starting a new API debugging episode
22
- """
23
-
24
- print("INFO : reset endpoint is called , new debugging session started ")
25
- state = env.reset()
26
- return {
27
- "step" : state.step,
28
- "max_steps": state.max_steps,
29
- "incident_summary": state.incident_summary,
30
- "logs": state.logs,
31
- "response_code":state.response_code,
32
- "fix_applied": state.fix_applied,
33
- "is_resolved" : state.is_resolved
34
- }
35
 
36
 
37
  @app.get("/state")
38
-
39
  def state():
40
- """
41
- HELPs to return the current observation of the episode.
42
- """
43
- print("INFO : current state of the Episode as follows ")
44
- current = env.state()
45
- return {
46
- "step" : current.step,
47
- "max_steps": current.max_steps,
48
- "incident_summary": current.incident_summary,
49
- "logs": current.logs,
50
- "response_code": current.response_code,
51
- "fix_applied": current.fix_applied,
52
- "is_resolved" : current.is_resolved
53
- }
54
 
55
 
56
  @app.post("/step")
57
-
58
  def step(request: ActionRequest):
59
- """
60
- the agent sends an action and our environment will preocess it
61
- and update the state , returns what happened.
62
- """
63
-
64
- """
65
- action = what the agent wants to do (text)
66
- observation = what the agent sees after doing it (object with 7 fields)
67
- """
68
-
69
- action = request.action
70
- print(f"INFO : Action received: {action}")
71
-
72
-
73
- # calling env.step() from api_triage_env.py file to process the action
74
- observation , reward , done , info = env.step(action)
75
-
76
- # here returning the result
77
- return {
78
- "observation": {
79
- "step" : observation.step,
80
- "max_steps": observation.max_steps,
81
- "incident_summary": observation.incident_summary,
82
- "logs": observation.logs,
83
- "response_code": observation.response_code,
84
- "fix_applied": observation.fix_applied,
85
- "is_resolved" : observation.is_resolved
86
- },
87
- "reward": reward,
88
- "done": done,
89
- "info": info,
90
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  def main():
92
  import uvicorn
93
  uvicorn.run("app:app", host="0.0.0.0", port=7860)
94
 
 
95
  if __name__ == "__main__":
96
  main()
 
1
+ import importlib
2
+ import yaml
3
 
4
  from fastapi import FastAPI
5
  from pydantic import BaseModel
6
  from environment.api_triage_env import APITriageEnv
7
 
8
+ app = FastAPI(title="API Triage Agent", version="1.0.0")
 
9
  env = APITriageEnv()
10
 
 
 
11
  class ActionRequest(BaseModel):
12
+ action: str
13
 
14
 
15
+ # load task definitions from openenv.yaml
16
+ def _load_tasks():
17
+ with open("openenv.yaml", "r") as f:
18
+ cfg = yaml.safe_load(f)
19
+ return cfg.get("tasks", [])
20
+
21
+
22
+ @app.get("/")
23
+ def root():
24
+ return {"status": "ok", "environment": "api-triage-agent"}
25
+
26
+
27
+ @app.get("/health")
28
+ def health():
29
+ return {"status": "healthy"}
30
+
31
 
32
+ @app.post("/reset")
33
  def reset():
34
+ state = env.reset()
35
+ return {
36
+ "observation": {
37
+ "step": state.step,
38
+ "max_steps": state.max_steps,
39
+ "incident_summary": state.incident_summary,
40
+ "logs": state.logs,
41
+ "response_code": state.response_code,
42
+ "fix_applied": state.fix_applied,
43
+ "is_resolved": state.is_resolved,
44
+ },
45
+ "reward": None,
46
+ "done": False,
47
+ }
 
48
 
49
 
50
  @app.get("/state")
 
51
  def state():
52
+ current = env.state()
53
+ return {
54
+ "step": current.step,
55
+ "max_steps": current.max_steps,
56
+ "incident_summary": current.incident_summary,
57
+ "logs": current.logs,
58
+ "response_code": current.response_code,
59
+ "fix_applied": current.fix_applied,
60
+ "is_resolved": current.is_resolved,
61
+ }
 
 
 
 
62
 
63
 
64
  @app.post("/step")
 
65
  def step(request: ActionRequest):
66
+ action = request.action
67
+ observation, reward, done, info = env.step(action)
68
+ return {
69
+ "observation": {
70
+ "step": observation.step,
71
+ "max_steps": observation.max_steps,
72
+ "incident_summary": observation.incident_summary,
73
+ "logs": observation.logs,
74
+ "response_code": observation.response_code,
75
+ "fix_applied": observation.fix_applied,
76
+ "is_resolved": observation.is_resolved,
77
+ },
78
+ "reward": reward,
79
+ "done": done,
80
+ "info": info,
81
+ }
82
+
83
+
84
+ @app.get("/tasks")
85
+ def list_tasks():
86
+ """Return all tasks defined in openenv.yaml with their graders."""
87
+ tasks = _load_tasks()
88
+ return {
89
+ "tasks": [
90
+ {
91
+ "id": t["id"],
92
+ "name": t["name"],
93
+ "description": t["description"],
94
+ "difficulty": t["difficulty"],
95
+ "grader": t["grader"],
96
+ }
97
+ for t in tasks
98
+ ]
99
+ }
100
+
101
+
102
+ @app.post("/grade/{task_id}")
103
+ def grade_task(task_id: str):
104
+ """Run the grader for a specific task and return the score."""
105
+ tasks = _load_tasks()
106
+ task = next((t for t in tasks if t["id"] == task_id), None)
107
+ if task is None:
108
+ return {"error": f"Task '{task_id}' not found", "score": 0.0}
109
+
110
+ grader_ref = task["grader"]
111
+ module_path, func_name = grader_ref.rsplit(":", 1)
112
+ mod = importlib.import_module(module_path)
113
+ grade_fn = getattr(mod, func_name)
114
+ score = grade_fn()
115
+ return {"task_id": task_id, "score": score}
116
+
117
+
118
  def main():
119
  import uvicorn
120
  uvicorn.run("app:app", host="0.0.0.0", port=7860)
121
 
122
+
123
  if __name__ == "__main__":
124
  main()