Spaces:

Kavya988
/

API_DEBUG_SOLVER

Sleeping

App Files Files Community

Siteshcodes commited on Apr 12

Commit

96939ad

1 Parent(s): cee7d71

Fix: add /tasks + /grade endpoints, fix inference.py logging, set PYTHONPATH

Browse files

Files changed (7) hide show

.gitignore +8 -0
Dockerfile +2 -0
app.py +97 -70
inference.py +60 -46
pyproject.toml +1 -0
requirements.txt +1 -0
server/app.py +98 -70

.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+__pycache__/
+*.pyc
+*.pyo
+.env
+*.egg-info/
+dist/
+build/
+.pytest_cache/

Dockerfile CHANGED Viewed

@@ -12,6 +12,8 @@ COPY app.py .
 COPY inference.py .
 COPY openenv.yaml .
 EXPOSE 7860
 CMD ["python", "app.py"]

 COPY inference.py .
 COPY openenv.yaml .
+ENV PYTHONPATH=/app
 EXPOSE 7860
 CMD ["python", "app.py"]

app.py CHANGED Viewed

@@ -1,97 +1,124 @@
-# the main environment file
 from fastapi import FastAPI
 from pydantic import BaseModel
 from environment.api_triage_env import APITriageEnv
-# creating an app and environment
-app = FastAPI()
 env = APITriageEnv()
-# defining a request model for /step endpoint
-# for fastapi so that it can understand that we expecting a JSON with an action field that is a text dtype
 class ActionRequest(BaseModel):
-  action: str
-@app.post("/reset")
 def reset():
-  """
-  Starting a new API debugging episode
-  """
-  print("INFO : reset endpoint is called , new debugging session started ")
-  state = env.reset()
-  return {
-    "step" : state.step,
-    "max_steps": state.max_steps,
-    "incident_summary": state.incident_summary,
-    "logs": state.logs,
-    "response_code":state.response_code,
-    "fix_applied": state.fix_applied,
-    "is_resolved" : state.is_resolved
-  }
 @app.get("/state")
 def state():
-  """
-  HELPs to return the current observation of the episode.
-  """
-  print("INFO : current state of the Episode as follows ")
-  current = env.state()
-  return {
-    "step" : current.step,
-    "max_steps": current.max_steps,
-    "incident_summary": current.incident_summary,
-    "logs": current.logs,
-    "response_code": current.response_code,
-    "fix_applied": current.fix_applied,
-    "is_resolved" : current.is_resolved
-  }
 @app.post("/step")
 def step(request: ActionRequest):
-  """
-  the agent sends an action and our environment will preocess it
-  and update the state , returns what happened.
-  """
-  """
-  action = what the agent wants to do (text)
-  observation = what the agent sees after doing it (object with 7 fields)
-  """
-  action = request.action
-  print(f"INFO : Action received: {action}")
-  # calling env.step() from api_triage_env.py file to process the action
-  observation , reward , done , info = env.step(action)
-  # here returning the result
-  return {
-    "observation": {
-    "step" : observation.step,
-    "max_steps": observation.max_steps,
-    "incident_summary": observation.incident_summary,
-    "logs": observation.logs,
-    "response_code": observation.response_code,
-    "fix_applied": observation.fix_applied,
-    "is_resolved" : observation.is_resolved
-    },
-    "reward": reward,
-    "done": done,
-    "info": info,
-  }
 def main():
     import uvicorn
     uvicorn.run("app:app", host="0.0.0.0", port=7860)
 if __name__ == "__main__":
     main()

+import importlib
+import yaml
 from fastapi import FastAPI
 from pydantic import BaseModel
 from environment.api_triage_env import APITriageEnv
+app = FastAPI(title="API Triage Agent", version="1.0.0")
 env = APITriageEnv()
 class ActionRequest(BaseModel):
+    action: str
+# load task definitions from openenv.yaml
+def _load_tasks():
+    with open("openenv.yaml", "r") as f:
+        cfg = yaml.safe_load(f)
+    return cfg.get("tasks", [])
+@app.get("/")
+def root():
+    return {"status": "ok", "environment": "api-triage-agent"}
+@app.get("/health")
+def health():
+    return {"status": "healthy"}
+@app.post("/reset")
 def reset():
+    state = env.reset()
+    return {
+        "observation": {
+            "step": state.step,
+            "max_steps": state.max_steps,
+            "incident_summary": state.incident_summary,
+            "logs": state.logs,
+            "response_code": state.response_code,
+            "fix_applied": state.fix_applied,
+            "is_resolved": state.is_resolved,
+        },
+        "reward": None,
+        "done": False,
+    }
 @app.get("/state")
 def state():
+    current = env.state()
+    return {
+        "step": current.step,
+        "max_steps": current.max_steps,
+        "incident_summary": current.incident_summary,
+        "logs": current.logs,
+        "response_code": current.response_code,
+        "fix_applied": current.fix_applied,
+        "is_resolved": current.is_resolved,
+    }
 @app.post("/step")
 def step(request: ActionRequest):
+    action = request.action
+    observation, reward, done, info = env.step(action)
+    return {
+        "observation": {
+            "step": observation.step,
+            "max_steps": observation.max_steps,
+            "incident_summary": observation.incident_summary,
+            "logs": observation.logs,
+            "response_code": observation.response_code,
+            "fix_applied": observation.fix_applied,
+            "is_resolved": observation.is_resolved,
+        },
+        "reward": reward,
+        "done": done,
+        "info": info,
+    }
+@app.get("/tasks")
+def list_tasks():
+    """Return all tasks defined in openenv.yaml with their graders."""
+    tasks = _load_tasks()
+    return {
+        "tasks": [
+            {
+                "id": t["id"],
+                "name": t["name"],
+                "description": t["description"],
+                "difficulty": t["difficulty"],
+                "grader": t["grader"],
+            }
+            for t in tasks
+        ]
+    }
+@app.post("/grade/{task_id}")
+def grade_task(task_id: str):
+    """Run the grader for a specific task and return the score."""
+    tasks = _load_tasks()
+    task = next((t for t in tasks if t["id"] == task_id), None)
+    if task is None:
+        return {"error": f"Task '{task_id}' not found", "score": 0.0}
+    grader_ref = task["grader"]
+    module_path, func_name = grader_ref.rsplit(":", 1)
+    mod = importlib.import_module(module_path)
+    grade_fn = getattr(mod, func_name)
+    score = grade_fn()
+    return {"task_id": task_id, "score": score}
 def main():
     import uvicorn
     uvicorn.run("app:app", host="0.0.0.0", port=7860)
 if __name__ == "__main__":
     main()

inference.py CHANGED Viewed

@@ -23,6 +23,7 @@ MAX_STEPS = 10
 TEMPERATURE = 0.7
 MAX_TOKENS = 50
 SUCCESS_SCORE_THRESHOLD = 0.5
 # ============================================
 # System Prompt
@@ -107,60 +108,73 @@ def get_model_action(client: OpenAI, step: int, observation, last_reward: float,
         return "inspect_logs"
 # ============================================
-# Main Async Function
 # ============================================
 async def main() -> None:
     if not API_KEY:
         print("[ERROR] HF_TOKEN environment variable not set", flush=True)
         return
     client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
-    env = APITriageEnv(max_steps=MAX_STEPS)
-    # All 6 task IDs matching openenv.yaml — each evaluated explicitly
     task_ids = ["auth_error", "missing_fields", "rate_limit", "timeout", "wrong_endpoint", "server_error"]
-    log_start(task=TASK_NAME, env=BENCHMARK, model=MODEL_NAME)
     for tid in task_ids:
-        history: List[str] = []
-        rewards: List[float] = []
-        steps_taken = 0
-        success = False
-        try:
-            # Reset env and FORCE the specific incident type (no randomness)
-            observation = env.reset()
-            env.incident = get_incident_by_type(tid)
-            observation = env.state()  # refresh observation with forced incident
-            last_reward = 0.0
-            for step in range(1, MAX_STEPS + 1):
-                action = get_model_action(client, step, observation, last_reward, history)
-                observation, reward, done, info = env.step(action)
-                rewards.append(reward)
-                steps_taken = step
-                last_reward = reward
-                log_step(step=step, action=action, reward=reward, done=done, error=None)
-                history.append(f"Step {step}: {action} -> reward {reward:.2f}")
-                if done:
-                    success = info.get("resolution") == "success"
-                    break
-            # Score strictly between 0 and 1
-            task_score = 0.95 if success else 0.05
-            log_end(success=success, steps=steps_taken, score=task_score, rewards=rewards)
-        except Exception as e:
-            print(f"[DEBUG] Error in task {tid}: {e}", flush=True)
-            log_end(success=False, steps=0, score=0.05, rewards=[0.0])
-# ============================================
-# Run
-# ============================================
 if __name__ == "__main__":
     asyncio.run(main())

 TEMPERATURE = 0.7
 MAX_TOKENS = 50
 SUCCESS_SCORE_THRESHOLD = 0.5
+MAX_TOTAL_REWARD = 20.5  # best case: inspect_logs(0.5) + fix(5.0) + resolve(15.0)
 # ============================================
 # System Prompt
         return "inspect_logs"
 # ============================================
+# Run a single task episode
+# ============================================
+def run_task(client: OpenAI, task_id: str) -> None:
+    """Run one task: [START] -> steps -> [END]."""
+    env = APITriageEnv(max_steps=MAX_STEPS)
+    history: List[str] = []
+    rewards: List[float] = []
+    steps_taken = 0
+    score = 0.0
+    success = False
+    log_start(task=task_id, env=BENCHMARK, model=MODEL_NAME)
+    try:
+        # Reset env and force the specific incident
+        env.reset()
+        env.incident = get_incident_by_type(task_id)
+        env.fix_applied = False
+        env.done = False
+        env.step_counter = 0
+        env.total_reward = 0.0
+        observation = env.state()
+        last_reward = 0.0
+        for step in range(1, MAX_STEPS + 1):
+            action = get_model_action(client, step, observation, last_reward, history)
+            observation, reward, done, info = env.step(action)
+            rewards.append(reward)
+            steps_taken = step
+            last_reward = reward
+            log_step(step=step, action=action, reward=reward, done=done, error=None)
+            history.append(f"Step {step}: {action} -> reward {reward:.2f}")
+            if done:
+                success = info.get("resolution") == "success"
+                break
+        # Compute score from actual rewards, clamped to [0, 1]
+        score = sum(rewards) / MAX_TOTAL_REWARD if MAX_TOTAL_REWARD > 0 else 0.0
+        score = min(max(score, 0.0), 1.0)
+        success = score >= SUCCESS_SCORE_THRESHOLD
+    except Exception as e:
+        print(f"[DEBUG] Error in task {task_id}: {e}", flush=True)
+    finally:
+        log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
+# ============================================
+# Main
 # ============================================
 async def main() -> None:
     if not API_KEY:
         print("[ERROR] HF_TOKEN environment variable not set", flush=True)
         return
     client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
+    # All 6 task IDs from openenv.yaml
     task_ids = ["auth_error", "missing_fields", "rate_limit", "timeout", "wrong_endpoint", "server_error"]
     for tid in task_ids:
+        run_task(client, tid)
 if __name__ == "__main__":
     asyncio.run(main())

pyproject.toml CHANGED Viewed

@@ -16,6 +16,7 @@ dependencies = [
     "fastapi>=0.100.0",
     "uvicorn>=0.23.0",
     "openenv-core>=0.2.0",
 ]
 [project.scripts]

     "fastapi>=0.100.0",
     "uvicorn>=0.23.0",
     "openenv-core>=0.2.0",
+    "pyyaml>=6.0.0",
 ]
 [project.scripts]

requirements.txt CHANGED Viewed

@@ -6,3 +6,4 @@ numpy>=1.24.0
 pytest>=7.0.0
 fastapi>=0.100.0
 uvicorn>=0.23.0

 pytest>=7.0.0
 fastapi>=0.100.0
 uvicorn>=0.23.0
+pyyaml>=6.0.0

server/app.py CHANGED Viewed

@@ -1,96 +1,124 @@
-# the main environment file
 from fastapi import FastAPI
 from pydantic import BaseModel
 from environment.api_triage_env import APITriageEnv
-# creating an app and environment
-app = FastAPI()
 env = APITriageEnv()
-# defining a request model for /step endpoint
-# for fastapi so that it can understand that we expecting a JSON with an action field that is a text dtype
 class ActionRequest(BaseModel):
-  action: str
-@app.post("/reset")
 def reset():
-  """
-  Starting a new API debugging episode
-  """
-  print("INFO : reset endpoint is called , new debugging session started ")
-  state = env.reset()
-  return {
-    "step" : state.step,
-    "max_steps": state.max_steps,
-    "incident_summary": state.incident_summary,
-    "logs": state.logs,
-    "response_code":state.response_code,
-    "fix_applied": state.fix_applied,
-    "is_resolved" : state.is_resolved
-  }
 @app.get("/state")
 def state():
-  """
-  HELPs to return the current observation of the episode.
-  """
-  print("INFO : current state of the Episode as follows ")
-  current = env.state()
-  return {
-    "step" : current.step,
-    "max_steps": current.max_steps,
-    "incident_summary": current.incident_summary,
-    "logs": current.logs,
-    "response_code": current.response_code,
-    "fix_applied": current.fix_applied,
-    "is_resolved" : current.is_resolved
-  }
 @app.post("/step")
 def step(request: ActionRequest):
-  """
-  the agent sends an action and our environment will preocess it
-  and update the state , returns what happened.
-  """
-  """
-  action = what the agent wants to do (text)
-  observation = what the agent sees after doing it (object with 7 fields)
-  """
-  action = request.action
-  print(f"INFO : Action received: {action}")
-  # calling env.step() from api_triage_env.py file to process the action
-  observation , reward , done , info = env.step(action)
-  # here returning the result
-  return {
-    "observation": {
-    "step" : observation.step,
-    "max_steps": observation.max_steps,
-    "incident_summary": observation.incident_summary,
-    "logs": observation.logs,
-    "response_code": observation.response_code,
-    "fix_applied": observation.fix_applied,
-    "is_resolved" : observation.is_resolved
-    },
-    "reward": reward,
-    "done": done,
-    "info": info,
-  }
 def main():
     import uvicorn
     uvicorn.run("app:app", host="0.0.0.0", port=7860)
 if __name__ == "__main__":
     main()

+import importlib
+import yaml
 from fastapi import FastAPI
 from pydantic import BaseModel
 from environment.api_triage_env import APITriageEnv
+app = FastAPI(title="API Triage Agent", version="1.0.0")
 env = APITriageEnv()
 class ActionRequest(BaseModel):
+    action: str
+# load task definitions from openenv.yaml
+def _load_tasks():
+    with open("openenv.yaml", "r") as f:
+        cfg = yaml.safe_load(f)
+    return cfg.get("tasks", [])
+@app.get("/")
+def root():
+    return {"status": "ok", "environment": "api-triage-agent"}
+@app.get("/health")
+def health():
+    return {"status": "healthy"}
+@app.post("/reset")
 def reset():
+    state = env.reset()
+    return {
+        "observation": {
+            "step": state.step,
+            "max_steps": state.max_steps,
+            "incident_summary": state.incident_summary,
+            "logs": state.logs,
+            "response_code": state.response_code,
+            "fix_applied": state.fix_applied,
+            "is_resolved": state.is_resolved,
+        },
+        "reward": None,
+        "done": False,
+    }
 @app.get("/state")
 def state():
+    current = env.state()
+    return {
+        "step": current.step,
+        "max_steps": current.max_steps,
+        "incident_summary": current.incident_summary,
+        "logs": current.logs,
+        "response_code": current.response_code,
+        "fix_applied": current.fix_applied,
+        "is_resolved": current.is_resolved,
+    }
 @app.post("/step")
 def step(request: ActionRequest):
+    action = request.action
+    observation, reward, done, info = env.step(action)
+    return {
+        "observation": {
+            "step": observation.step,
+            "max_steps": observation.max_steps,
+            "incident_summary": observation.incident_summary,
+            "logs": observation.logs,
+            "response_code": observation.response_code,
+            "fix_applied": observation.fix_applied,
+            "is_resolved": observation.is_resolved,
+        },
+        "reward": reward,
+        "done": done,
+        "info": info,
+    }
+@app.get("/tasks")
+def list_tasks():
+    """Return all tasks defined in openenv.yaml with their graders."""
+    tasks = _load_tasks()
+    return {
+        "tasks": [
+            {
+                "id": t["id"],
+                "name": t["name"],
+                "description": t["description"],
+                "difficulty": t["difficulty"],
+                "grader": t["grader"],
+            }
+            for t in tasks
+        ]
+    }
+@app.post("/grade/{task_id}")
+def grade_task(task_id: str):
+    """Run the grader for a specific task and return the score."""
+    tasks = _load_tasks()
+    task = next((t for t in tasks if t["id"] == task_id), None)
+    if task is None:
+        return {"error": f"Task '{task_id}' not found", "score": 0.0}
+    grader_ref = task["grader"]
+    module_path, func_name = grader_ref.rsplit(":", 1)
+    mod = importlib.import_module(module_path)
+    grade_fn = getattr(mod, func_name)
+    score = grade_fn()
+    return {"task_id": task_id, "score": score}
 def main():
     import uvicorn
     uvicorn.run("app:app", host="0.0.0.0", port=7860)
 if __name__ == "__main__":
     main()