Spaces:

CystronCode
/

api-gateway-defender

Sleeping

App Files Files Community

CystronCode commited on Mar 26

Commit

c3fbc01

verified ·

1 Parent(s): 022430a

Upload 5 files

Browse files

Files changed (5) hide show

Dockerfile +36 -0
baseline.py +247 -0
env.py +641 -0
main.py +199 -0
openenv.yaml +172 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,36 @@

+# ─── Build stage ─────────────────────────────────────────────────────────────────
+FROM python:3.11-slim AS builder
+WORKDIR /build
+# Install dependencies into a prefix we'll copy to the final image
+COPY requirements.txt .
+RUN pip install --no-cache-dir --prefix=/install -r requirements.txt
+# ─── Runtime stage ────────────────────────────────────────────────────────────────
+FROM python:3.11-slim
+# Hugging Face Spaces expects the app on port 7860
+ENV PORT=7860
+ENV PYTHONUNBUFFERED=1
+ENV PYTHONDONTWRITEBYTECODE=1
+WORKDIR /app
+# Copy pre-installed packages from builder
+COPY --from=builder /install /usr/local
+# Copy application code
+COPY env.py      .
+COPY main.py     .
+COPY baseline.py .
+# HF Spaces: non-root user for safety
+RUN useradd -m -u 1000 appuser && chown -R appuser /app
+USER appuser
+EXPOSE 7860
+# Increase workers for concurrent evaluation runs
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]

baseline.py ADDED Viewed

	@@ -0,0 +1,247 @@

+"""
+Baseline Inference Script — API Gateway Defender
+=================================================
+Evaluates an agent on all 3 tasks and prints reproducible scores.
+Usage
+-----
+  # With LLM (reads OPENAI_API_KEY from environment):
+  OPENAI_API_KEY=sk-... python baseline.py
+  # Heuristic fallback (no API key needed):
+  python baseline.py
+The LLM agent receives the traffic logs and task description, then
+produces a JSON action that is submitted to the environment.
+The heuristic agent reads the visible logs statistically and picks
+the correct rule — used to verify the grader is working correctly
+and as a reproducible baseline for submission.
+"""
+import json
+import os
+import sys
+import urllib.error
+import urllib.request
+from typing import Any, Dict
+# Allow running standalone (before FastAPI starts) by importing env directly
+try:
+    from env import (
+        Action,
+        APIGatewayDefender,
+        TASK_DESCRIPTIONS,
+        run_heuristic_baseline,
+    )
+    _DIRECT_IMPORT = True
+except ImportError:
+    _DIRECT_IMPORT = False
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
+ENV_BASE_URL   = os.getenv("ENV_BASE_URL", "http://localhost:8000")
+LLM_MODEL      = os.getenv("LLM_MODEL", "gpt-4o-mini")
+# ─── OpenAI helper ───────────────────────────────────────────────────────────────
+def _call_openai(messages: list, max_tokens: int = 512) -> str:
+    """Send a request to the OpenAI chat completions endpoint."""
+    payload = json.dumps(
+        {
+            "model":       LLM_MODEL,
+            "messages":    messages,
+            "max_tokens":  max_tokens,
+            "temperature": 0.1,
+        }
+    ).encode()
+    req = urllib.request.Request(
+        "https://api.openai.com/v1/chat/completions",
+        data=payload,
+        headers={
+            "Content-Type":  "application/json",
+            "Authorization": f"Bearer {OPENAI_API_KEY}",
+        },
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=30) as resp:
+            data = json.loads(resp.read())
+        return data["choices"][0]["message"]["content"]
+    except urllib.error.HTTPError as exc:
+        body = exc.read().decode(errors="replace")
+        raise RuntimeError(f"OpenAI API error {exc.code}: {body}") from exc
+def _parse_json_from_llm(raw: str) -> Dict[str, Any]:
+    """Extract a JSON object from LLM output, stripping markdown fences if present."""
+    raw = raw.strip()
+    if raw.startswith("```"):
+        parts = raw.split("```")
+        # parts[1] is the fenced block; strip language tag if present
+        inner = parts[1]
+        if inner.lower().startswith("json"):
+            inner = inner[4:]
+        raw = inner.strip()
+    return json.loads(raw)
+# ─── LLM agent ───────────────────────────────────────────────────────────────────
+def _llm_agent_run(task_id: str) -> float:
+    """
+    Run an LLM agent on a single task via the HTTP API.
+    1. Reset the environment.
+    2. Show the agent the traffic logs and task description.
+    3. Ask it to produce a JSON action.
+    4. Submit the action and return the reward score.
+    """
+    import urllib.request as urlreq
+    def _post(path: str, body: Any) -> Any:
+        data = json.dumps(body).encode()
+        req  = urlreq.Request(
+            f"{ENV_BASE_URL}{path}",
+            data=data,
+            headers={"Content-Type": "application/json"},
+        )
+        with urlreq.urlopen(req, timeout=15) as resp:
+            return json.loads(resp.read())
+    # 1. Reset
+    obs = _post("/reset", {"task_id": task_id})
+    # 2. Build prompt (truncate request list to 25 to stay within token budget)
+    sample_requests = obs["recent_requests"][:25]
+    system_prompt = (
+        "You are a Site Reliability Engineer responding to a live production incident. "
+        "You will be shown HTTP traffic logs and a task description. "
+        "Your job is to write exactly ONE firewall rule as a JSON object. "
+        "Respond with ONLY valid JSON — no prose, no markdown fences."
+    )
+    action_schema = (
+        "{\n"
+        '  "action_type": "block_ip" | "add_rate_limit" | "block_user_agent" | "write_custom_middleware",\n'
+        '  "target_ip":          "<string, required for block_ip / add_rate_limit>",\n'
+        '  "target_user_agent":  "<string, required for block_user_agent>",\n'
+        '  "regex_pattern":      "<Python regex, required for write_custom_middleware>",\n'
+        '  "max_requests":       <int, optional — requests/min cap for add_rate_limit>\n'
+        "}"
+    )
+    user_prompt = (
+        f"TASK: {obs['task_description']}\n\n"
+        f"HINT: {obs.get('hint', '')}\n\n"
+        f"TRAFFIC SAMPLE (first 25 requests):\n"
+        f"{json.dumps(sample_requests, indent=2)}\n\n"
+        f"Respond with ONE JSON action using this schema:\n{action_schema}"
+    )
+    # 3. Call LLM
+    llm_response = _call_openai(
+        [
+            {"role": "system", "content": system_prompt},
+            {"role": "user",   "content": user_prompt},
+        ]
+    )
+    # 4. Parse action
+    try:
+        action_dict = _parse_json_from_llm(llm_response)
+    except (json.JSONDecodeError, KeyError) as exc:
+        print(f"    [!] Failed to parse LLM response: {exc}\n    Raw: {llm_response[:200]}")
+        return 0.0
+    # 5. Step
+    result = _post("/step", action_dict)
+    score  = result["reward"]["score"]
+    msg    = result["reward"]["message"]
+    print(f"    Action:  {action_dict}")
+    print(f"    Result:  {msg}")
+    return score
+# ─── Main ────────────────────────────────────────────────────────────────────────
+def run_baseline_direct() -> Dict[str, float]:
+    """Run heuristic baseline directly on the Python class (no server needed)."""
+    return run_heuristic_baseline()
+def run_baseline_http() -> Dict[str, float]:
+    """Run heuristic baseline via the HTTP API."""
+    import urllib.request as urlreq
+    req = urlreq.Request(
+        f"{ENV_BASE_URL}/baseline",
+        data=b"{}",
+        headers={"Content-Type": "application/json"},
+        method="POST",
+    )
+    with urlreq.urlopen(req, timeout=30) as resp:
+        data = json.loads(resp.read())
+    return data["scores"]
+def main() -> None:
+    print("=" * 55)
+    print("  API Gateway Defender — Baseline Evaluation")
+    print("=" * 55)
+    print()
+    task_ids = ["easy", "medium", "hard"]
+    scores:   Dict[str, float] = {}
+    if OPENAI_API_KEY:
+        print(f"Mode : LLM agent  ({LLM_MODEL})")
+        print(f"URL  : {ENV_BASE_URL}")
+        print()
+        for task_id in task_ids:
+            print(f"[Task: {task_id}]")
+            try:
+                score = _llm_agent_run(task_id)
+                scores[task_id] = score
+                print(f"    Score: {score:.4f}")
+            except Exception as exc:
+                print(f"    [!] Error: {exc}. Falling back to heuristic.")
+                if _DIRECT_IMPORT:
+                    fb = run_heuristic_baseline()
+                    scores[task_id] = fb.get(task_id, 0.0)
+                else:
+                    scores[task_id] = 0.0
+            print()
+    else:
+        print("Mode : Heuristic agent  (set OPENAI_API_KEY to use LLM)")
+        print()
+        if _DIRECT_IMPORT:
+            scores = run_baseline_direct()
+        else:
+            print(f"Calling {ENV_BASE_URL}/baseline ...")
+            scores = run_baseline_http()
+        for task_id in task_ids:
+            print(f"  [{task_id}]  score = {scores.get(task_id, 0.0):.4f}")
+    print()
+    print("-" * 35)
+    avg = sum(scores.values()) / max(len(scores), 1)
+    for task_id in task_ids:
+        s = scores.get(task_id, 0.0)
+        bar = "█" * int(s * 20)
+        print(f"  {task_id:<8s}  {s:.4f}  {bar}")
+    print(f"  {'average':<8s}  {avg:.4f}")
+    print("-" * 35)
+    print()
+    # Exit non-zero if any task scored 0.0 (helps CI catch broken graders)
+    if any(v == 0.0 for v in scores.values()):
+        print("[WARN] One or more tasks scored 0.0. Check the environment.")
+        sys.exit(1)
+    else:
+        print("[OK] All tasks passed baseline threshold.")
+if __name__ == "__main__":
+    main()

env.py ADDED Viewed

	@@ -0,0 +1,641 @@

+"""
+API Gateway Defender — OpenEnv Environment
+==========================================
+A simulated HTTP traffic monitoring environment where an AI agent must
+configure middleware rules to block malicious web traffic while preserving
+legitimate user requests. Models a real production incident faced by SREs.
+Tasks:
+  easy   — Single IP volumetric flood on /login
+  medium — Scraper bot using identical User-Agent across many IPs
+  hard   — SQL injection attack with rotating IPs and User-Agents
+"""
+import re
+import random
+from typing import Any, Dict, List, Optional
+from pydantic import BaseModel, Field
+# ─── Constants ──────────────────────────────────────────────────────────────────
+MAX_STEPS = 5
+TRAFFIC_SIZE = 1000
+MALICIOUS_COUNT = 200
+FALSE_POSITIVE_THRESHOLD = 0.10   # >10% FP rate → score zeroed
+# Fixed attack artifacts (deterministic so graders are reproducible)
+ATTACK_IP_EASY   = "185.220.101.47"
+SCRAPER_UA       = "ScraperBot/3.1 (DataHarvester; +http://scrape.evil.io/bot)"
+SQLI_KEYWORD     = "UNION SELECT"
+LEGIT_USER_AGENTS = [
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36",
+    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 Version/17.0 Safari/605.1.15",
+    "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/115.0",
+    "Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 Mobile/15E148",
+    "Mozilla/5.0 (Android 13; Mobile; rv:109.0) Gecko/109.0 Firefox/115.0",
+]
+LEGIT_PATHS = [
+    "/", "/home", "/about", "/products", "/contact",
+    "/blog", "/faq", "/pricing", "/docs", "/api/users", "/api/products",
+]
+LEGIT_QUERY_STRINGS = [
+    "", "page=1", "q=hello", "id=42", "sort=asc&filter=new",
+    "utm_source=google", "lang=en", "category=tech", "ref=nav",
+]
+LEGIT_METHODS = ["GET", "GET", "GET", "POST", "PUT"]  # GET-heavy, realistic
+SQLI_PAYLOADS = [
+    f"id=1' {SQLI_KEYWORD} username,password FROM users--",
+    f"search=admin' {SQLI_KEYWORD} 1,2,3--",
+    f"q=x' {SQLI_KEYWORD} table_name FROM information_schema.tables--",
+    f"cat=1' {SQLI_KEYWORD} NULL,NULL,NULL--",
+    f"item=5' {SQLI_KEYWORD} version()--",
+]
+# ─── Pydantic Models ─────────────────────────────────────────────────────────────
+class Action(BaseModel):
+    """
+    An action the agent can take — adds one firewall rule to the gateway.
+    action_type choices:
+      block_ip               — Drop all traffic from target_ip
+      add_rate_limit         — Allow target_ip only max_requests/min
+      block_user_agent       — Drop all traffic matching target_user_agent exactly
+      write_custom_middleware — Drop requests where regex_pattern matches path?query_string
+    """
+    action_type: str = Field(
+        ...,
+        description=(
+            "Rule type: 'block_ip', 'add_rate_limit', "
+            "'block_user_agent', 'write_custom_middleware'"
+        ),
+    )
+    target_ip: Optional[str] = Field(
+        None, description="IP address (required for block_ip / add_rate_limit)"
+    )
+    target_user_agent: Optional[str] = Field(
+        None, description="Exact User-Agent string (required for block_user_agent)"
+    )
+    regex_pattern: Optional[str] = Field(
+        None,
+        description=(
+            "Python regex matched against '{path}?{query_string}' "
+            "(required for write_custom_middleware)"
+        ),
+    )
+    max_requests: Optional[int] = Field(
+        60, description="Requests-per-minute cap for add_rate_limit (default 60)"
+    )
+class Observation(BaseModel):
+    """What the agent sees at each step."""
+    recent_requests: List[Dict[str, Any]] = Field(
+        ...,
+        description=(
+            "Last 100 HTTP requests in the traffic stream. "
+            "Fields: ip, method, path, user_agent, query_string, status_code."
+        ),
+    )
+    active_rules: List[str] = Field(
+        ..., description="Human-readable list of rules currently active on the gateway."
+    )
+    current_task: str = Field(..., description="Task ID: 'easy', 'medium', or 'hard'")
+    task_description: str = Field(
+        ..., description="Natural language description of the attack the agent must repel."
+    )
+    step_count: int = Field(..., description="Number of rules submitted so far this episode.")
+    hint: str = Field("", description="Statistical hint derived from the visible traffic sample.")
+class Reward(BaseModel):
+    """Feedback returned after each step()."""
+    score: float = Field(..., ge=0.0, le=1.0, description="Task performance score 0.0–1.0")
+    malicious_blocked: int = Field(..., description="Malicious requests blocked by active rules")
+    legitimate_blocked: int = Field(..., description="Legitimate requests incorrectly blocked")
+    total_malicious: int
+    total_legitimate: int
+    false_positive_rate: float = Field(..., description="Fraction of legit requests blocked")
+    message: str = Field(..., description="Human-readable explanation of the score")
+class StepResult(BaseModel):
+    """Full return value of step()."""
+    observation: Observation
+    reward: Reward
+    done: bool
+    info: Dict[str, Any]
+class EnvironmentState(BaseModel):
+    """Full serialisable snapshot returned by state()."""
+    task_id: str
+    step_count: int
+    active_rules: List[Dict[str, Any]]
+    episode_done: bool
+    best_score: float
+    traffic_sample_size: int
+# ─── Traffic Generators ──────────────────────────────────────────────────────────
+def _rand_ip(rng: random.Random, exclude: str = "") -> str:
+    """Generate a random public-looking IPv4 address."""
+    while True:
+        ip = (
+            f"{rng.randint(10, 220)}."
+            f"{rng.randint(1, 254)}."
+            f"{rng.randint(1, 254)}."
+            f"{rng.randint(1, 254)}"
+        )
+        if ip != exclude:
+            return ip
+def _legit_request(rng: random.Random) -> Dict[str, Any]:
+    return {
+        "ip":           _rand_ip(rng, exclude=ATTACK_IP_EASY),
+        "method":       rng.choice(LEGIT_METHODS),
+        "path":         rng.choice(LEGIT_PATHS),
+        "user_agent":   rng.choice(LEGIT_USER_AGENTS),
+        "query_string": rng.choice(LEGIT_QUERY_STRINGS),
+        "status_code":  200,
+        "is_malicious": False,
+    }
+def generate_easy_traffic(seed: int) -> List[Dict[str, Any]]:
+    """
+    Easy: one IP floods /login with POST requests.
+    Correct action: block_ip or add_rate_limit on ATTACK_IP_EASY.
+    """
+    rng = random.Random(seed)
+    traffic: List[Dict[str, Any]] = []
+    for _ in range(MALICIOUS_COUNT):
+        traffic.append({
+            "ip":           ATTACK_IP_EASY,
+            "method":       "POST",
+            "path":         "/login",
+            "user_agent":   rng.choice(LEGIT_USER_AGENTS),  # UA blends in
+            "query_string": "",
+            "status_code":  200,
+            "is_malicious": True,
+        })
+    for _ in range(TRAFFIC_SIZE - MALICIOUS_COUNT):
+        traffic.append(_legit_request(rng))
+    rng.shuffle(traffic)
+    return traffic
+def generate_medium_traffic(seed: int) -> List[Dict[str, Any]]:
+    """
+    Medium: 50 IPs all share an identical unusual User-Agent, hitting /api/data.
+    Correct action: block_user_agent with SCRAPER_UA.
+    """
+    rng = random.Random(seed)
+    traffic: List[Dict[str, Any]] = []
+    scraper_ips = [_rand_ip(rng) for _ in range(50)]
+    for _ in range(MALICIOUS_COUNT):
+        traffic.append({
+            "ip":           rng.choice(scraper_ips),
+            "method":       "GET",
+            "path":         "/api/data",
+            "user_agent":   SCRAPER_UA,   # constant across all malicious requests
+            "query_string": f"page={rng.randint(1, 500)}",
+            "status_code":  200,
+            "is_malicious": True,
+        })
+    for _ in range(TRAFFIC_SIZE - MALICIOUS_COUNT):
+        traffic.append(_legit_request(rng))
+    rng.shuffle(traffic)
+    return traffic
+def generate_hard_traffic(seed: int) -> List[Dict[str, Any]]:
+    """
+    Hard: attacker rotates IPs and UAs but always carries a SQLi payload.
+    Correct action: write_custom_middleware with regex matching 'UNION.SELECT'.
+    """
+    rng = random.Random(seed)
+    traffic: List[Dict[str, Any]] = []
+    for _ in range(MALICIOUS_COUNT):
+        traffic.append({
+            "ip":           _rand_ip(rng),
+            "method":       "GET",
+            "path":         rng.choice(["/search", "/products", "/api/items", "/catalog"]),
+            "user_agent":   rng.choice(LEGIT_USER_AGENTS),
+            "query_string": rng.choice(SQLI_PAYLOADS),
+            "status_code":  200,
+            "is_malicious": True,
+        })
+    for _ in range(TRAFFIC_SIZE - MALICIOUS_COUNT):
+        req = _legit_request(rng)
+        # Guarantee legit requests never accidentally contain the payload
+        if SQLI_KEYWORD in req["query_string"].upper():
+            req["query_string"] = ""
+        traffic.append(req)
+    rng.shuffle(traffic)
+    return traffic
+TASK_GENERATORS = {
+    "easy":   generate_easy_traffic,
+    "medium": generate_medium_traffic,
+    "hard":   generate_hard_traffic,
+}
+TASK_DESCRIPTIONS = {
+    "easy": (
+        "A single IP address is flooding your /login endpoint with POST requests at high volume. "
+        "Inspect the traffic logs to identify the offending IP and block it or apply a rate limit."
+    ),
+    "medium": (
+        "A scraper bot is harvesting your /api/data endpoint from many different IP addresses. "
+        "All malicious requests share a single, unusual User-Agent string. "
+        "Identify the User-Agent and block it."
+    ),
+    "hard": (
+        "An attacker is probing your database via SQL injection. They rotate IP addresses and "
+        "User-Agents to evade simple rules, but every malicious request contains a SQL injection "
+        "payload in the query string. Write a regex middleware rule to detect and drop these requests."
+    ),
+}
+# ─── Rule Engine ─────────────────────────────────────────────────────────────────
+class _Rule:
+    """Internal class: wraps an Action and applies it to individual requests."""
+    def __init__(self, action: Action) -> None:
+        self.action = action
+        self._compiled_re = None
+        if action.action_type == "write_custom_middleware" and action.regex_pattern:
+            try:
+                self._compiled_re = re.compile(action.regex_pattern, re.IGNORECASE)
+            except re.error:
+                pass  # invalid regex → rule matches nothing
+    def blocks(self, request: Dict[str, Any]) -> bool:
+        a = self.action
+        if a.action_type in ("block_ip", "add_rate_limit"):
+            return bool(a.target_ip and request["ip"] == a.target_ip)
+        if a.action_type == "block_user_agent":
+            return bool(
+                a.target_user_agent
+                and request["user_agent"] == a.target_user_agent
+            )
+        if a.action_type == "write_custom_middleware" and self._compiled_re:
+            target = f"{request['path']}?{request['query_string']}"
+            return bool(self._compiled_re.search(target))
+        return False
+    def describe(self) -> str:
+        a = self.action
+        if a.action_type == "block_ip":
+            return f"BLOCK_IP({a.target_ip})"
+        if a.action_type == "add_rate_limit":
+            return f"RATE_LIMIT({a.target_ip}, max={a.max_requests}/min)"
+        if a.action_type == "block_user_agent":
+            return f"BLOCK_UA({a.target_user_agent!r})"
+        if a.action_type == "write_custom_middleware":
+            return f"MIDDLEWARE(regex={a.regex_pattern!r})"
+        return f"RULE({a.action_type})"
+    def to_dict(self) -> Dict[str, Any]:
+        a = self.action
+        return {
+            "action_type":       a.action_type,
+            "target_ip":         a.target_ip,
+            "target_user_agent": a.target_user_agent,
+            "regex_pattern":     a.regex_pattern,
+            "description":       self.describe(),
+        }
+# ─── Environment ─────────────────────────────────────────────────────────────────
+VALID_ACTION_TYPES = {"block_ip", "add_rate_limit", "block_user_agent", "write_custom_middleware"}
+class APIGatewayDefender:
+    """
+    OpenEnv-compliant RL environment — API Gateway Defender.
+    The agent monitors a simulated stream of HTTP requests and must apply
+    firewall middleware rules to block malicious traffic while preserving
+    legitimate requests.
+    Usage
+    -----
+        env = APIGatewayDefender()
+        obs = env.reset(task_id="easy")
+        action = Action(action_type="block_ip", target_ip="185.220.101.47")
+        result = env.step(action)
+        print(result.reward.score)
+    """
+    def __init__(self) -> None:
+        self._task_id: str = "easy"
+        self._rules: List[_Rule] = []
+        self._train_traffic: List[Dict[str, Any]] = []
+        self._test_traffic: List[Dict[str, Any]] = []
+        self._step_count: int = 0
+        self._done: bool = False
+        self._best_score: float = 0.0
+    # ── OpenEnv Interface ──────────────────────────────────────────────────────
+    def reset(self, task_id: str = "easy") -> Observation:
+        """
+        Start a new episode on the given task.
+        Parameters
+        ----------
+        task_id : str
+            One of 'easy', 'medium', 'hard'.
+        Returns
+        -------
+        Observation
+            Initial observation containing the first 100 traffic samples.
+        """
+        if task_id not in TASK_GENERATORS:
+            raise ValueError(
+                f"Unknown task_id '{task_id}'. Choose from: {sorted(TASK_GENERATORS)}"
+            )
+        self._task_id = task_id
+        self._rules = []
+        self._step_count = 0
+        self._done = False
+        self._best_score = 0.0
+        gen = TASK_GENERATORS[task_id]
+        self._train_traffic = gen(seed=42)   # agent can see this
+        self._test_traffic  = gen(seed=137)  # grading set (hidden from agent)
+        return self._make_observation()
+    def step(self, action: Action) -> StepResult:
+        """
+        Submit one firewall rule and receive a reward signal.
+        The rule is evaluated against a hidden test traffic set to prevent
+        overfitting to the visible sample. Partial credit is awarded for
+        partial detection; false positives incur a penalty.
+        Parameters
+        ----------
+        action : Action
+            The rule to apply.
+        Returns
+        -------
+        StepResult
+            observation, reward, done flag, and diagnostic info.
+        """
+        if self._done:
+            raise RuntimeError("Episode is finished. Call reset() to start a new episode.")
+        self._step_count += 1
+        # ── Validate action type ──────────────────────────────────────────────
+        if action.action_type not in VALID_ACTION_TYPES:
+            err_reward = Reward(
+                score=0.0,
+                malicious_blocked=0,
+                legitimate_blocked=0,
+                total_malicious=MALICIOUS_COUNT,
+                total_legitimate=TRAFFIC_SIZE - MALICIOUS_COUNT,
+                false_positive_rate=0.0,
+                message=(
+                    f"Invalid action_type '{action.action_type}'. "
+                    f"Must be one of {sorted(VALID_ACTION_TYPES)}."
+                ),
+            )
+            return StepResult(
+                observation=self._make_observation(),
+                reward=err_reward,
+                done=False,
+                info={"error": "invalid_action_type"},
+            )
+        # ── Apply rule ────────────────────────────────────────────────────────
+        self._rules.append(_Rule(action))
+        # ── Grade on hidden test traffic ──────────────────────────────────────
+        reward = self._grade()
+        self._best_score = max(self._best_score, reward.score)
+        # Episode ends at MAX_STEPS or when the agent achieves near-perfect score
+        self._done = self._step_count >= MAX_STEPS or reward.score >= 0.95
+        return StepResult(
+            observation=self._make_observation(),
+            reward=reward,
+            done=self._done,
+            info={
+                "step":          self._step_count,
+                "best_score":    self._best_score,
+                "rules_applied": [r.describe() for r in self._rules],
+                "max_steps":     MAX_STEPS,
+            },
+        )
+    def state(self) -> EnvironmentState:
+        """Return a full serialisable snapshot of the current environment state."""
+        return EnvironmentState(
+            task_id=self._task_id,
+            step_count=self._step_count,
+            active_rules=[r.to_dict() for r in self._rules],
+            episode_done=self._done,
+            best_score=self._best_score,
+            traffic_sample_size=len(self._train_traffic),
+        )
+    def get_task_grader_score(self) -> float:
+        """
+        Programmatic grader — returns score 0.0–1.0 for the current episode.
+        Returns 0.0 if no rules have been applied yet.
+        """
+        if not self._rules:
+            return 0.0
+        return self._grade().score
+    # ── Private Helpers ────────────────────────────────────────────────────────
+    def _make_observation(self) -> Observation:
+        """Build an Observation from the current state (no is_malicious flag exposed)."""
+        visible = [
+            {k: v for k, v in req.items() if k != "is_malicious"}
+            for req in self._train_traffic[:100]
+        ]
+        return Observation(
+            recent_requests=visible,
+            active_rules=[r.describe() for r in self._rules],
+            current_task=self._task_id,
+            task_description=TASK_DESCRIPTIONS[self._task_id],
+            step_count=self._step_count,
+            hint=self._build_hint(),
+        )
+    def _build_hint(self) -> str:
+        """Generate a statistical hint from the visible traffic sample."""
+        if not self._train_traffic:
+            return ""
+        sample = self._train_traffic[:100]
+        malicious_in_sample = [r for r in sample if r.get("is_malicious")]
+        n = len(malicious_in_sample)
+        if self._task_id == "easy":
+            if n == 0:
+                return "Traffic looks normal in this window."
+            ips = {r["ip"] for r in malicious_in_sample}
+            return (
+                f"Warning: {n} POST requests to /login detected in this window "
+                f"from {len(ips)} unique IP(s). Possible brute-force or flood."
+            )
+        elif self._task_id == "medium":
+            if n == 0:
+                return "Traffic looks normal in this window."
+            uas = {r["user_agent"] for r in malicious_in_sample}
+            return (
+                f"Warning: {n} requests to /api/data share {len(uas)} unique User-Agent(s) "
+                f"in this window. Possible scraper activity."
+            )
+        else:
+            if n == 0:
+                return "Traffic looks normal in this window."
+            return (
+                f"Warning: {n} requests in this window contain unusual query string patterns. "
+                f"Check for injection payloads."
+            )
+    def _grade(self) -> Reward:
+        """
+        Apply all active rules to the hidden test traffic set and compute a score.
+        Score formula:
+            detection_rate = malicious_blocked / total_malicious
+            fp_rate        = legitimate_blocked / total_legitimate
+            if fp_rate > FALSE_POSITIVE_THRESHOLD:
+                score = 0.0   ← too many false positives
+            else:
+                score = clamp(detection_rate - fp_rate * 5.0, 0.0, 1.0)
+        """
+        malicious = [r for r in self._test_traffic if r["is_malicious"]]
+        legit     = [r for r in self._test_traffic if not r["is_malicious"]]
+        mal_blocked  = sum(1 for r in malicious if any(rule.blocks(r) for rule in self._rules))
+        legit_blocked = sum(1 for r in legit    if any(rule.blocks(r) for rule in self._rules))
+        total_mal  = len(malicious)
+        total_legit = len(legit)
+        detection_rate = mal_blocked  / total_mal   if total_mal   > 0 else 0.0
+        fp_rate        = legit_blocked / total_legit if total_legit > 0 else 0.0
+        if fp_rate > FALSE_POSITIVE_THRESHOLD:
+            score = 0.0
+            message = (
+                f"Score zeroed: {fp_rate:.1%} false positive rate exceeds "
+                f"{FALSE_POSITIVE_THRESHOLD:.0%} threshold. Rules are too broad — "
+                f"legitimate users are being blocked."
+            )
+        else:
+            score   = max(0.0, min(1.0, detection_rate - fp_rate * 5.0))
+            message = (
+                f"Blocked {mal_blocked}/{total_mal} malicious requests "
+                f"({detection_rate:.1%} detection rate) with "
+                f"{fp_rate:.1%} false positive rate."
+            )
+        return Reward(
+            score=round(score, 4),
+            malicious_blocked=mal_blocked,
+            legitimate_blocked=legit_blocked,
+            total_malicious=total_mal,
+            total_legitimate=total_legit,
+            false_positive_rate=round(fp_rate, 4),
+            message=message,
+        )
+# ─── Convenience: heuristic baseline that runs directly on the class ────────────
+def run_heuristic_baseline() -> Dict[str, float]:
+    """
+    A deterministic heuristic agent that solves all 3 tasks correctly.
+    Used by the /baseline endpoint and as fallback in the inference script.
+    Returns
+    -------
+    Dict[str, float]
+        task_id → score
+    """
+    env = APIGatewayDefender()
+    scores: Dict[str, float] = {}
+    # ── Easy: identify the IP flooding /login ──────────────────────────────────
+    obs = env.reset("easy")
+    ip_counts: Dict[str, int] = {}
+    for req in obs.recent_requests:
+        if req["path"] == "/login" and req["method"] == "POST":
+            ip_counts[req["ip"]] = ip_counts.get(req["ip"], 0) + 1
+    suspect_ip = (
+        max(ip_counts, key=lambda k: ip_counts[k]) if ip_counts else ATTACK_IP_EASY
+    )
+    result = env.step(Action(action_type="block_ip", target_ip=suspect_ip))
+    scores["easy"] = result.reward.score
+    # ── Medium: identify the unusual User-Agent ────────────────────────────────
+    obs = env.reset("medium")
+    ua_counts: Dict[str, int] = {}
+    for req in obs.recent_requests:
+        ua_counts[req["user_agent"]] = ua_counts.get(req["user_agent"], 0) + 1
+    bot_keywords = {"scraper", "bot", "crawler", "spider", "harvester"}
+    browser_keywords = {"mozilla", "chrome", "safari", "firefox", "gecko", "webkit"}
+    suspect_ua = None
+    # Prefer UAs that look like bots
+    for ua, _ in sorted(ua_counts.items(), key=lambda x: -x[1]):
+        if any(k in ua.lower() for k in bot_keywords):
+            suspect_ua = ua
+            break
+    # Fallback: most common UA that doesn't look like a browser
+    if not suspect_ua:
+        for ua, _ in sorted(ua_counts.items(), key=lambda x: -x[1]):
+            if not any(k in ua.lower() for k in browser_keywords):
+                suspect_ua = ua
+                break
+    result = env.step(Action(action_type="block_user_agent", target_user_agent=suspect_ua or ""))
+    scores["medium"] = result.reward.score
+    # ── Hard: write a regex to catch SQLi payloads ────────────────────────────
+    env.reset("hard")
+    result = env.step(
+        Action(
+            action_type="write_custom_middleware",
+            regex_pattern=r"UNION\s+SELECT",
+        )
+    )
+    scores["hard"] = result.reward.score
+    return scores

main.py ADDED Viewed

	@@ -0,0 +1,199 @@

+"""
+API Gateway Defender — FastAPI Server
+=====================================
+Exposes the OpenEnv-compliant HTTP API for the environment.
+Endpoints
+---------
+  POST /reset       — Start a new episode
+  POST /step        — Submit a firewall rule, receive reward
+  GET  /state       — Inspect current environment state
+  GET  /tasks       — List tasks and action schema
+  GET  /grader      — Get grader score for current episode
+  POST /baseline    — Run heuristic baseline across all 3 tasks
+  GET  /health      — Liveness probe (required for HF Spaces ping)
+"""
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from typing import Any, Dict
+from env import (
+    Action,
+    APIGatewayDefender,
+    Observation,
+    TASK_DESCRIPTIONS,
+    run_heuristic_baseline,
+)
+# ─── App setup ───────────────────────────────────────────────────────────────────
+app = FastAPI(
+    title="API Gateway Defender",
+    description=(
+        "An OpenEnv RL environment where an AI agent defends a simulated web backend "
+        "against volumetric floods, scraper bots, and SQL injection attacks."
+    ),
+    version="1.0.0",
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Single shared environment instance (stateful, per-session)
+_env = APIGatewayDefender()
+# ─── Routes ──────────────────────────────────────────────────────────────────────
+@app.get("/health")
+def health() -> Dict[str, str]:
+    """Liveness probe — returns 200 and confirms the environment is running."""
+    return {"status": "ok", "environment": "api-gateway-defender"}
+@app.get("/")
+def root() -> Dict[str, Any]:
+    """Overview of the environment and available endpoints."""
+    return {
+        "name": "API Gateway Defender",
+        "description": (
+            "OpenEnv RL environment: configure firewall rules to block malicious "
+            "HTTP traffic while preserving legitimate requests."
+        ),
+        "version": "1.0.0",
+        "tasks": list(TASK_DESCRIPTIONS.keys()),
+        "endpoints": {
+            "POST /reset":    "Start a new episode. Body: {task_id: 'easy'|'medium'|'hard'}",
+            "POST /step":     "Submit a firewall rule. Body: Action schema.",
+            "GET  /state":    "Current environment state snapshot.",
+            "GET  /tasks":    "Task descriptions + action/observation schemas.",
+            "GET  /grader":   "Current grader score for the active episode.",
+            "POST /baseline": "Run heuristic baseline agent across all 3 tasks.",
+            "GET  /health":   "Liveness probe.",
+        },
+    }
+@app.post("/reset")
+def reset(body: Dict[str, str] = None) -> Dict[str, Any]:
+    """
+    Start a new episode.
+    Request body (JSON):
+        {"task_id": "easy" | "medium" | "hard"}
+    Returns the initial Observation.
+    """
+    task_id = (body or {}).get("task_id", "easy")
+    try:
+        obs: Observation = _env.reset(task_id=task_id)
+    except ValueError as exc:
+        raise HTTPException(status_code=422, detail=str(exc))
+    return obs.model_dump()
+@app.post("/step")
+def step(action: Action) -> Dict[str, Any]:
+    """
+    Submit one firewall rule.
+    Returns StepResult: {observation, reward, done, info}
+    Reward score: 0.0–1.0
+      = detection_rate − (false_positive_rate × 5)
+      = 0.0 if false positive rate > 10%
+    """
+    try:
+        result = _env.step(action)
+    except RuntimeError as exc:
+        raise HTTPException(status_code=400, detail=str(exc))
+    return result.model_dump()
+@app.get("/state")
+def state() -> Dict[str, Any]:
+    """Return the full serialisable state of the current episode."""
+    return _env.state().model_dump()
+@app.get("/tasks")
+def tasks() -> Dict[str, Any]:
+    """
+    List all tasks and their descriptions, plus the action and observation schemas.
+    Required by the OpenEnv spec.
+    """
+    return {
+        "tasks": [
+            {
+                "id":          "easy",
+                "name":        "Volumetric IP Flood Defense",
+                "difficulty":  "easy",
+                "description": TASK_DESCRIPTIONS["easy"],
+                "hint":        "One IP is responsible for all malicious traffic.",
+            },
+            {
+                "id":          "medium",
+                "name":        "Scraper Bot Detection",
+                "difficulty":  "medium",
+                "description": TASK_DESCRIPTIONS["medium"],
+                "hint":        "Many IPs, but a single shared User-Agent string.",
+            },
+            {
+                "id":          "hard",
+                "name":        "SQL Injection Middleware Defense",
+                "difficulty":  "hard",
+                "description": TASK_DESCRIPTIONS["hard"],
+                "hint":        "Rotating IPs and UAs, but a consistent payload pattern.",
+            },
+        ],
+        "action_schema":      Action.model_json_schema(),
+        "observation_schema": {
+            "recent_requests": "list[dict] — last 100 requests: ip, method, path, user_agent, query_string, status_code",
+            "active_rules":    "list[str] — human-readable active firewall rules",
+            "current_task":    "str — 'easy', 'medium', or 'hard'",
+            "task_description":"str — natural language goal",
+            "step_count":      "int — steps taken this episode",
+            "hint":            "str — statistical hint from visible traffic",
+        },
+    }
+@app.get("/grader")
+def grader() -> Dict[str, Any]:
+    """
+    Return the programmatic grader score for the current episode.
+    Score is 0.0–1.0; reflects detection rate minus false-positive penalty.
+    """
+    score      = _env.get_task_grader_score()
+    state_info = _env.state()
+    return {
+        "task_id":      state_info.task_id,
+        "score":        score,
+        "best_score":   state_info.best_score,
+        "rules_applied":[r["description"] for r in state_info.active_rules],
+        "episode_done": state_info.episode_done,
+        "max_steps":    5,
+    }
+@app.post("/baseline")
+def baseline() -> Dict[str, Any]:
+    """
+    Run the heuristic baseline agent across all 3 tasks and return scores.
+    Does not affect the shared episode state.
+    """
+    scores = run_heuristic_baseline()
+    avg    = sum(scores.values()) / len(scores)
+    return {
+        "scores":  scores,
+        "average": round(avg, 4),
+        "message": (
+            "Heuristic baseline: reads visible logs, identifies the attack pattern, "
+            "applies the optimal rule. No LLM required."
+        ),
+    }

openenv.yaml ADDED Viewed

	@@ -0,0 +1,172 @@

+name: api-gateway-defender
+version: "1.0.0"
+description: >
+  A simulated HTTP traffic monitoring environment where an AI agent acts as
+  a Site Reliability Engineer defending a web backend. The agent inspects a
+  stream of incoming HTTP requests and must configure middleware firewall rules
+  to block malicious traffic while preserving legitimate user requests.
+  Models a real production incident domain: rate-limiting, WAF rule authoring,
+  and pattern-based traffic filtering — skills that are highly valued in DevOps,
+  SRE, and cybersecurity engineering.
+author: "API Gateway Defender Team"
+license: "Apache-2.0"
+tags:
+  - openenv
+  - cybersecurity
+  - web-security
+  - sre
+  - real-world
+  - devops
+  - rate-limiting
+  - waf
+tasks:
+  - id: easy
+    name: "Volumetric IP Flood Defense"
+    difficulty: easy
+    max_score: 1.0
+    description: >
+      A single IP address is flooding the /login endpoint with POST requests.
+      The agent must identify the malicious IP from traffic logs and block it
+      (or apply a rate limit). Tests pattern recognition under high-volume noise.
+    success_criteria: >
+      block_ip or add_rate_limit action targeting the flooding IP address,
+      achieving ≥0.95 detection rate with <10% false positive rate.
+  - id: medium
+    name: "Scraper Bot Detection"
+    difficulty: medium
+    max_score: 1.0
+    description: >
+      A scraper bot harvests the /api/data endpoint from 50 different IP addresses,
+      rotating them to evade IP-based blocks. All malicious requests share one
+      identical unusual User-Agent string. The agent must identify and block it.
+    success_criteria: >
+      block_user_agent action with the exact malicious User-Agent string,
+      achieving ≥0.95 detection rate with <10% false positive rate.
+  - id: hard
+    name: "SQL Injection Middleware Defense"
+    difficulty: hard
+    max_score: 1.0
+    description: >
+      An attacker probes the database via SQL injection. They rotate IP addresses
+      AND User-Agents on every request to evade simple rules. Every malicious
+      request contains a SQL injection payload in the query string. The agent
+      must write a regex-based middleware rule to detect and block all payloads.
+    success_criteria: >
+      write_custom_middleware action with a regex that matches 'UNION SELECT'
+      pattern (case-insensitive), achieving ≥0.95 detection rate with <10% FP rate.
+observation_space:
+  type: structured
+  description: "Snapshot of recent HTTP traffic and active gateway configuration."
+  fields:
+    - name: recent_requests
+      type: "list[dict]"
+      description: "Last 100 HTTP requests. Each has: ip, method, path, user_agent, query_string, status_code."
+    - name: active_rules
+      type: "list[str]"
+      description: "Human-readable list of firewall rules currently active."
+    - name: current_task
+      type: string
+      description: "Task ID: 'easy', 'medium', or 'hard'."
+    - name: task_description
+      type: string
+      description: "Natural language description of the attack to defend against."
+    - name: step_count
+      type: integer
+      description: "Number of rules submitted in the current episode."
+    - name: hint
+      type: string
+      description: "Statistical hint about suspicious patterns in the visible traffic window."
+action_space:
+  type: discrete_parameterized
+  description: "Submit one firewall rule to the gateway middleware."
+  fields:
+    - name: action_type
+      type: string
+      required: true
+      choices:
+        - block_ip
+        - add_rate_limit
+        - block_user_agent
+        - write_custom_middleware
+      description: "Which type of rule to apply."
+    - name: target_ip
+      type: string
+      required: false
+      description: "IP address. Required for block_ip and add_rate_limit."
+    - name: target_user_agent
+      type: string
+      required: false
+      description: "Exact User-Agent string. Required for block_user_agent."
+    - name: regex_pattern
+      type: string
+      required: false
+      description: "Python regex matched against '{path}?{query_string}'. Required for write_custom_middleware."
+    - name: max_requests
+      type: integer
+      required: false
+      default: 60
+      description: "Requests per minute cap. Used with add_rate_limit."
+reward:
+  range: [0.0, 1.0]
+  type: continuous
+  formula: >
+    detection_rate = malicious_blocked / total_malicious
+    false_positive_rate = legitimate_blocked / total_legitimate
+    if false_positive_rate > 0.10:
+        score = 0.0
+    else:
+        score = clamp(detection_rate - false_positive_rate * 5.0, 0.0, 1.0)
+  description: >
+    Rewards accurate detection of malicious traffic. Penalises false positives
+    (blocking legitimate users) with a 5x multiplier. Zeroed entirely if
+    false positive rate exceeds 10% — models real operational constraints
+    where blocking paying customers is unacceptable.
+episode:
+  max_steps: 5
+  termination_conditions:
+    - "score >= 0.95 (success)"
+    - "step_count >= 5 (step limit)"
+  reset_required: true
+evaluation:
+  grader_type: programmatic
+  deterministic: true
+  train_seed: 42
+  test_seed: 137
+  description: >
+    Rules are graded against a hidden test traffic set (seed 137) distinct from
+    the visible training sample (seed 42). This prevents agents from overfitting
+    to specific IPs/UAs in the observation window.
+api:
+  framework: FastAPI
+  port: 7860
+  endpoints:
+    - "POST /reset"
+    - "POST /step"
+    - "GET  /state"
+    - "GET  /tasks"
+    - "GET  /grader"
+    - "POST /baseline"
+    - "GET  /health"
+baseline:
+  agent_type: heuristic
+  scores:
+    easy: 1.0
+    medium: 1.0
+    hard: 1.0
+  note: >
+    Heuristic agent reads the visible traffic sample, identifies the attack
+    pattern statistically, and applies the optimal rule. Scores are fully
+    reproducible with fixed seeds.