Spaces:

sht4bharat
/

openenv-cloudaudit

Sleeping

App Files Files Community

sht4bharat commited on Apr 9

Commit

0bbdc65

1 Parent(s): 941ab5e

Final Framework Standard Implementation: Client.py, Tests, Enhanced Observations, and Terminal Rewards

Browse files

Files changed (9) hide show

README.md +28 -0
__init__.py +14 -0
client.py +33 -0
inference.py +17 -6
models.py +2 -0
pyproject.toml +3 -1
server/app.py +8 -0
server/cloud_audit_env.py +32 -7
tests/test_env.py +63 -0

README.md CHANGED Viewed

@@ -42,6 +42,34 @@ To guarantee stable JSON validation across different Pydantic parsing engines, t
 ---
 ## 💻 Quick Start & Evaluation
 ### 1. Run the Environment Server Locally

 ---
+## 🚀 Framework Standard (Recommended)
+As of the latest update, `openenv-cloudaudit` is a fully compliant OpenEnv package. You can interact with it using the standard async client:
+```python
+from cloudaudit_env import CloudAuditClient, CloudAction
+client = CloudAuditClient(base_url="http://localhost:7860")
+# 1. Reset for a specific task
+obs = await client.reset(task_name="medium_remediation")
+print(f"Goal: {obs.task_description}")
+# 2. Perform actions
+action = CloudAction(action_type="enable_s3_enc", bucket_name="bucket-uuid")
+obs = await client.step(action)
+print(f"Progress: {obs.message}")
+```
+## 📊 Observation & Scoring
+The environment now includes an enriched observation schema for advanced agent reasoning:
+- **`task_description`**: Clearly state the agent's goal for the current session.
+- **`vulnerability_manifest`**: Exposes the target vulnerability counts (e.g. `{"sg_vulns": 3}`).
+- **`health_score`**: Monitors deployment safety (dropping below 0.5 penalizes the agent).
+- **Terminal Bonus**: A trajectory-level reward (+0.1) granted on successful `submit` after full remediation.
+---
 ## 💻 Quick Start & Evaluation
 ### 1. Run the Environment Server Locally

__init__.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from .models import CloudAction, CloudObservation, CloudState, SecurityGroup, S3Bucket, IAMPolicy
+from .client import CloudAuditClient
+from .server.cloud_audit_env import CloudAuditEnv
+__all__ = [
+    "CloudAuditEnv",
+    "CloudAuditClient",
+    "CloudAction",
+    "CloudObservation",
+    "CloudState",
+    "SecurityGroup",
+    "S3Bucket",
+    "IAMPolicy"
+]

client.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import httpx
+from typing import Optional, List, Dict
+from .models import CloudAction, CloudObservation, CloudState
+class CloudAuditClient:
+    """
+    Standard OpenEnv client for the CloudAudit environment.
+    Wraps the FastAPI endpoints into a clean Python API.
+    """
+    def __init__(self, base_url: str = "http://localhost:7860"):
+        self.base_url = base_url.rstrip("/")
+    async def reset(self, task_name: str = "easy_audit") -> CloudObservation:
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            # OpenEnv standard reset often takes task_name in URL or body
+            resp = await client.post(f"{self.base_url}/reset", params={"task_name": task_name})
+            resp.raise_for_status()
+            return CloudObservation(**resp.json())
+    async def step(self, action: CloudAction) -> CloudObservation:
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            resp = await client.post(f"{self.base_url}/step", json=action.model_dump())
+            resp.raise_for_status()
+            return CloudObservation(**resp.json())
+    async def get_state(self) -> CloudState:
+        async with httpx.AsyncClient(timeout=10.0) as client:
+            resp = await client.get(f"{self.base_url}/state")
+            resp.raise_for_status()
+            return CloudState(**resp.json())
+    async def close(self):
+        pass # Handle session cleanup if needed

inference.py CHANGED Viewed

@@ -114,26 +114,37 @@ async def run_episode(openai_client: OpenAI, env: AsyncCloudClient, task_name: s
     log_start(task=task_name, env=BENCHMARK, model=model_name)
     try:
-        result = await env.reset()
         for step in range(1, MAX_STEPS + 1):
             if result.done:
                 break
             obs_dict = result.observation.model_dump()
-            # Remove reward/done/info from prompt context to keep LLM focused on state
-            if "reward" in obs_dict: del obs_dict["reward"]
-            if "done" in obs_dict: del obs_dict["done"]
-            if "info" in obs_dict: del obs_dict["info"]
             obs_json = json.dumps(obs_dict)
             try:
                 completion = openai_client.chat.completions.create(
                     model=model_name,
                     messages=[
                         {"role": "system", "content": SYSTEM_PROMPT},
-                        {"role": "user", "content": f"Task: {task_name}\nObservation: {obs_json}\nDecide your next action."},
                     ],
                     temperature=TEMPERATURE,
                     max_tokens=MAX_TOKENS,

     log_start(task=task_name, env=BENCHMARK, model=model_name)
     try:
+        # 1. Reset Environment with Task Context
+        response = await env.client.post(f"{env.base_url}/reset", params={"task_name": task_name})
+        response.raise_for_status()
+        result = StepResult(**response.json())
         for step in range(1, MAX_STEPS + 1):
             if result.done:
                 break
             obs_dict = result.observation.model_dump()
+            # Log the goal on the first step
+            if step == 1:
+                print(f"[TASK] {result.observation.task_description}")
+            # Enrich prompt with manifest for better reasoning
+            manifest = result.observation.vulnerability_manifest
+            manifest_str = json.dumps({k: v for k, v in manifest.items() if v > 0})
+            # Remove high-level metadata from LLM context to avoid confusion
+            for key in ["reward", "done", "info", "message", "health_score"]:
+                if key in obs_dict: del obs_dict[key]
             obs_json = json.dumps(obs_dict)
             try:
+                prompt = f"Goal: {result.observation.task_description}\nPending Vulnerabilities: {manifest_str}\nCurrent Status: {result.observation.message}\nObservation: {obs_json}\nDecide your next action."
                 completion = openai_client.chat.completions.create(
                     model=model_name,
                     messages=[
                         {"role": "system", "content": SYSTEM_PROMPT},
+                        {"role": "user", "content": prompt},
                     ],
                     temperature=TEMPERATURE,
                     max_tokens=MAX_TOKENS,

models.py CHANGED Viewed

@@ -36,6 +36,8 @@ class CloudObservation(BaseModel):
     rds_instances: List[RDSInstance] = []
     ebs_volumes: List[EBSVolume] = []
     iam_policies: List[IAMPolicy]
     message: str = "Cloud resources loaded."
     reward: float = 0.0
     health_score: float = 1.0 # 0.0 to 1.0 (AVAILABILITY)

     rds_instances: List[RDSInstance] = []
     ebs_volumes: List[EBSVolume] = []
     iam_policies: List[IAMPolicy]
+    task_description: str = "Perform a cloud security audit and remediate vulnerabilities."
+    vulnerability_manifest: Dict[str, int] = {} # e.g. {"sg_vulns": 4, "s3_vulns": 3}
     message: str = "Cloud resources loaded."
     reward: float = 0.0
     health_score: float = 1.0 # 0.0 to 1.0 (AVAILABILITY)

pyproject.toml CHANGED Viewed

@@ -13,7 +13,9 @@ dependencies = [
     "fastapi",
     "uvicorn",
     "pydantic",
-    "openai"
 ]
 [project.scripts]

     "fastapi",
     "uvicorn",
     "pydantic",
+    "openai",
+    "pytest",
+    "httpx"
 ]
 [project.scripts]

server/app.py CHANGED Viewed

@@ -10,6 +10,14 @@ app = create_fastapi_app(
     observation_cls=CloudObservation
 )
 def main():
     import uvicorn
     print("[DEBUG] Starting Unified CloudAudit Server", flush=True)

     observation_cls=CloudObservation
 )
+@app.post("/reset")
+async def reset_with_task(task_name: str = "easy_audit"):
+    # Access the shared environment instance created by create_fastapi_app
+    # Note: create_fastapi_app typically stores the env instance in app.state.env
+    env = app.state.env
+    obs = env.reset(task_name=task_name)
+    return obs.model_dump()
 def main():
     import uvicorn
     print("[DEBUG] Starting Unified CloudAudit Server", flush=True)

server/cloud_audit_env.py CHANGED Viewed

@@ -15,13 +15,22 @@ class CloudAuditEnv(Environment):
         self.max_steps = 30
         self.reset()
-    def reset(self) -> CloudObservation:
         self.step_count = 0
         self.remediated_count = 0
         self.health_score = 1.0
         self.cumulative_reward = 0.0
         self.done = False
         # Procedural Generation Configuration
         self.sgs: List[SecurityGroup] = []
         self.buckets: List[S3Bucket] = []
@@ -191,7 +200,11 @@ class CloudAuditEnv(Environment):
         elif at == "submit":
             self.done = True
-            message = "Audit report submitted."
         # Update Cumulative Tracking
         self.cumulative_reward += reward
@@ -235,12 +248,24 @@ class CloudAuditEnv(Environment):
         )
     def _get_observation(self, message: str, reward: float = 0.0, done: bool = False) -> CloudObservation:
         return CloudObservation(
-            security_groups=self.sgs,
-            s3_buckets=self.buckets,
-            rds_instances=self.rds,
-            ebs_volumes=self.ebs,
-            iam_policies=self.policies,
             message=message,
             reward=reward,
             health_score=self.health_score,

         self.max_steps = 30
         self.reset()
+    def reset(self, task_name: str = "easy_audit") -> CloudObservation:
+        self.task_name = task_name
         self.step_count = 0
         self.remediated_count = 0
         self.health_score = 1.0
         self.cumulative_reward = 0.0
         self.done = False
+        # Task Descriptions
+        descriptions = {
+            "easy_audit": "Find and fix all Security Group rules that allow SSH/RDP access from the public internet (0.0.0.0/0).",
+            "medium_remediation": "Ensure all S3 buckets, RDS instances, and EBS volumes are encrypted at rest.",
+            "hard_iam_refactor": "Refactor IAM policies to remove wildcards ('*') while preserving required service permissions."
+        }
+        self.task_description = descriptions.get(task_name, "Perform a comprehensive cloud security audit.")
         # Procedural Generation Configuration
         self.sgs: List[SecurityGroup] = []
         self.buckets: List[S3Bucket] = []
         elif at == "submit":
             self.done = True
+            if self.remediated_count >= self.initial_vulns and self.health_score > 0.5:
+                reward = 0.1 # Terminal bonus
+                message = "Audit report submitted. Perfect remediation achieved!"
+            else:
+                message = "Audit report submitted."
         # Update Cumulative Tracking
         self.cumulative_reward += reward
         )
     def _get_observation(self, message: str, reward: float = 0.0, done: bool = False) -> CloudObservation:
+        # Task-specific resource filtering (Optional based on user recommendation)
+        show_sgs = self.sgs
+        show_buckets = self.buckets
+        show_rds = self.rds
+        show_ebs = self.ebs
+        show_policies = self.policies
+        # If task is focused, we still show other resources but manifest tells the agent where the vulns are
+        manifest = self.vulnerability_manifest.copy()
         return CloudObservation(
+            security_groups=show_sgs,
+            s3_buckets=show_buckets,
+            rds_instances=show_rds,
+            ebs_volumes=show_ebs,
+            iam_policies=show_policies,
+            task_description=self.task_description,
+            vulnerability_manifest=manifest,
             message=message,
             reward=reward,
             health_score=self.health_score,

tests/test_env.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import pytest
+from server.cloud_audit_env import CloudAuditEnv
+from models import CloudAction
+def test_env_reset():
+    env = CloudAuditEnv()
+    obs = env.reset(task_name="easy_audit")
+    assert obs.task_description == "Find and fix all Security Group rules that allow SSH/RDP access from the public internet (0.0.0.0/0)."
+    assert len(obs.security_groups) > 0
+    assert obs.health_score == 1.0
+    assert not obs.done
+def test_env_step_audit():
+    env = CloudAuditEnv()
+    env.reset()
+    action = CloudAction(action_type="audit")
+    obs = env.step(action)
+    assert obs.message == "Audit log generated."
+    assert obs.reward == 0.01
+    assert env.step_count == 1
+def test_health_penalty_iam():
+    env = CloudAuditEnv()
+    env.reset(task_name="hard_iam_refactor")
+    # Get a policy that has a required permission
+    p_id = list(env.required_iam_perms.keys())[0]
+    required = env.required_iam_perms[p_id]
+    # Update with an empty document (breaking access)
+    action = CloudAction(action_type="update_iam", policy_id=p_id, new_document="{}")
+    obs = env.step(action)
+    assert obs.health_score < 1.0
+    assert "CRITICAL" in obs.message
+def test_terminal_bonus():
+    env = CloudAuditEnv()
+    env.reset()
+    # Spoof remediation count to match initial vulns
+    env.remediated_count = env.initial_vulns
+    action = CloudAction(action_type="submit")
+    obs = env.step(action)
+    assert obs.done
+    assert obs.reward == 0.1 # Terminal bonus
+    assert "Perfect remediation" in obs.message
+def test_grader_resilience():
+    from graders import get_task_score
+    env = CloudAuditEnv()
+    env.reset(task_name="easy_audit")
+    # Test with object-based state (directly from env)
+    score = get_task_score("easy_audit", env.state.__dict__)
+    assert 0.15 <= score <= 0.85
+    # Test with dict-based state (simulating JSON API)
+    state_dict = env.state.model_dump()
+    score = get_task_score("easy_audit", state_dict)
+    assert 0.15 <= score <= 0.85